kernel: Make SMP support default (and non-optional).
[dragonfly.git] / sys / net / if.c
1 /*
2  * Copyright (c) 1980, 1986, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *      This product includes software developed by the University of
16  *      California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *      @(#)if.c        8.3 (Berkeley) 1/4/94
34  * $FreeBSD: src/sys/net/if.c,v 1.185 2004/03/13 02:35:03 brooks Exp $
35  */
36
37 #include "opt_compat.h"
38 #include "opt_inet6.h"
39 #include "opt_inet.h"
40 #include "opt_polling.h"
41 #include "opt_ifpoll.h"
42
43 #include <sys/param.h>
44 #include <sys/malloc.h>
45 #include <sys/mbuf.h>
46 #include <sys/systm.h>
47 #include <sys/proc.h>
48 #include <sys/priv.h>
49 #include <sys/protosw.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/socketops.h>
53 #include <sys/protosw.h>
54 #include <sys/kernel.h>
55 #include <sys/ktr.h>
56 #include <sys/mutex.h>
57 #include <sys/sockio.h>
58 #include <sys/syslog.h>
59 #include <sys/sysctl.h>
60 #include <sys/domain.h>
61 #include <sys/thread.h>
62 #include <sys/serialize.h>
63 #include <sys/bus.h>
64
65 #include <sys/thread2.h>
66 #include <sys/msgport2.h>
67 #include <sys/mutex2.h>
68
69 #include <net/if.h>
70 #include <net/if_arp.h>
71 #include <net/if_dl.h>
72 #include <net/if_types.h>
73 #include <net/if_var.h>
74 #include <net/ifq_var.h>
75 #include <net/radix.h>
76 #include <net/route.h>
77 #include <net/if_clone.h>
78 #include <net/netisr.h>
79 #include <net/netmsg2.h>
80
81 #include <machine/atomic.h>
82 #include <machine/stdarg.h>
83 #include <machine/smp.h>
84
85 #if defined(INET) || defined(INET6)
86 /*XXX*/
87 #include <netinet/in.h>
88 #include <netinet/in_var.h>
89 #include <netinet/if_ether.h>
90 #ifdef INET6
91 #include <netinet6/in6_var.h>
92 #include <netinet6/in6_ifattach.h>
93 #endif
94 #endif
95
96 #if defined(COMPAT_43)
97 #include <emulation/43bsd/43bsd_socket.h>
98 #endif /* COMPAT_43 */
99
100 struct netmsg_ifaddr {
101         struct netmsg_base base;
102         struct ifaddr   *ifa;
103         struct ifnet    *ifp;
104         int             tail;
105 };
106
107 /*
108  * System initialization
109  */
110 static void     if_attachdomain(void *);
111 static void     if_attachdomain1(struct ifnet *);
112 static int      ifconf(u_long, caddr_t, struct ucred *);
113 static void     ifinit(void *);
114 static void     ifnetinit(void *);
115 static void     if_slowtimo(void *);
116 static void     link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
117 static int      if_rtdel(struct radix_node *, void *);
118
119 #ifdef INET6
120 /*
121  * XXX: declare here to avoid to include many inet6 related files..
122  * should be more generalized?
123  */
124 extern void     nd6_setmtu(struct ifnet *);
125 #endif
126
127 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
128 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
129
130 SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, ifinit, NULL)
131 /* Must be after netisr_init */
132 SYSINIT(ifnet, SI_SUB_PRE_DRIVERS, SI_ORDER_SECOND, ifnetinit, NULL)
133
134 static  if_com_alloc_t *if_com_alloc[256];
135 static  if_com_free_t *if_com_free[256];
136
137 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
138 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
139 MALLOC_DEFINE(M_IFNET, "ifnet", "interface structure");
140
141 int                     ifqmaxlen = IFQ_MAXLEN;
142 struct ifnethead        ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
143
144 struct callout          if_slowtimo_timer;
145
146 int                     if_index = 0;
147 struct ifnet            **ifindex2ifnet = NULL;
148 static struct thread    ifnet_threads[MAXCPU];
149
150 #define IFQ_KTR_STRING          "ifq=%p"
151 #define IFQ_KTR_ARGS    struct ifaltq *ifq
152 #ifndef KTR_IFQ
153 #define KTR_IFQ                 KTR_ALL
154 #endif
155 KTR_INFO_MASTER(ifq);
156 KTR_INFO(KTR_IFQ, ifq, enqueue, 0, IFQ_KTR_STRING, IFQ_KTR_ARGS);
157 KTR_INFO(KTR_IFQ, ifq, dequeue, 1, IFQ_KTR_STRING, IFQ_KTR_ARGS);
158 #define logifq(name, arg)       KTR_LOG(ifq_ ## name, arg)
159
160 #define IF_START_KTR_STRING     "ifp=%p"
161 #define IF_START_KTR_ARGS       struct ifnet *ifp
162 #ifndef KTR_IF_START
163 #define KTR_IF_START            KTR_ALL
164 #endif
165 KTR_INFO_MASTER(if_start);
166 KTR_INFO(KTR_IF_START, if_start, run, 0,
167          IF_START_KTR_STRING, IF_START_KTR_ARGS);
168 KTR_INFO(KTR_IF_START, if_start, sched, 1,
169          IF_START_KTR_STRING, IF_START_KTR_ARGS);
170 KTR_INFO(KTR_IF_START, if_start, avoid, 2,
171          IF_START_KTR_STRING, IF_START_KTR_ARGS);
172 KTR_INFO(KTR_IF_START, if_start, contend_sched, 3,
173          IF_START_KTR_STRING, IF_START_KTR_ARGS);
174 KTR_INFO(KTR_IF_START, if_start, chase_sched, 4,
175          IF_START_KTR_STRING, IF_START_KTR_ARGS);
176 #define logifstart(name, arg)   KTR_LOG(if_start_ ## name, arg)
177
178 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
179
180 /*
181  * Network interface utility routines.
182  *
183  * Routines with ifa_ifwith* names take sockaddr *'s as
184  * parameters.
185  */
186 /* ARGSUSED*/
187 void
188 ifinit(void *dummy)
189 {
190         struct ifnet *ifp;
191
192         callout_init(&if_slowtimo_timer);
193
194         crit_enter();
195         TAILQ_FOREACH(ifp, &ifnet, if_link) {
196                 if (ifp->if_snd.ifq_maxlen == 0) {
197                         if_printf(ifp, "XXX: driver didn't set ifq_maxlen\n");
198                         ifp->if_snd.ifq_maxlen = ifqmaxlen;
199                 }
200         }
201         crit_exit();
202
203         if_slowtimo(0);
204 }
205
206 static int
207 if_start_cpuid(struct ifnet *ifp)
208 {
209         return ifp->if_cpuid;
210 }
211
212 #ifdef DEVICE_POLLING
213 static int
214 if_start_cpuid_poll(struct ifnet *ifp)
215 {
216         int poll_cpuid = ifp->if_poll_cpuid;
217
218         if (poll_cpuid >= 0)
219                 return poll_cpuid;
220         else
221                 return ifp->if_cpuid;
222 }
223 #endif
224
225 #ifdef IFPOLL_ENABLE
226 static int
227 if_start_cpuid_npoll(struct ifnet *ifp)
228 {
229         int poll_cpuid = ifp->if_npoll_cpuid;
230
231         if (poll_cpuid >= 0)
232                 return poll_cpuid;
233         else
234                 return ifp->if_cpuid;
235 }
236 #endif
237
238 static void
239 if_start_ipifunc(void *arg)
240 {
241         struct ifnet *ifp = arg;
242         struct lwkt_msg *lmsg = &ifp->if_start_nmsg[mycpuid].lmsg;
243
244         crit_enter();
245         if (lmsg->ms_flags & MSGF_DONE)
246                 lwkt_sendmsg(netisr_portfn(mycpuid), lmsg);
247         crit_exit();
248 }
249
250 /*
251  * Schedule ifnet.if_start on ifnet's CPU
252  */
253 static void
254 if_start_schedule(struct ifnet *ifp)
255 {
256         int cpu;
257
258         cpu = ifp->if_start_cpuid(ifp);
259         if (cpu != mycpuid)
260                 lwkt_send_ipiq(globaldata_find(cpu), if_start_ipifunc, ifp);
261         else
262         if_start_ipifunc(ifp);
263 }
264
265 /*
266  * NOTE:
267  * This function will release ifnet.if_start interlock,
268  * if ifnet.if_start does not need to be scheduled
269  */
270 static __inline int
271 if_start_need_schedule(struct ifaltq *ifq, int running)
272 {
273         if (!running || ifq_is_empty(ifq)
274 #ifdef ALTQ
275             || ifq->altq_tbr != NULL
276 #endif
277         ) {
278                 ALTQ_LOCK(ifq);
279                 /*
280                  * ifnet.if_start interlock is released, if:
281                  * 1) Hardware can not take any packets, due to
282                  *    o  interface is marked down
283                  *    o  hardware queue is full (IFF_OACTIVE)
284                  *    Under the second situation, hardware interrupt
285                  *    or polling(4) will call/schedule ifnet.if_start
286                  *    when hardware queue is ready
287                  * 2) There is not packet in the ifnet.if_snd.
288                  *    Further ifq_dispatch or ifq_handoff will call/
289                  *    schedule ifnet.if_start
290                  * 3) TBR is used and it does not allow further
291                  *    dequeueing.
292                  *    TBR callout will call ifnet.if_start
293                  */
294                 if (!running || !ifq_data_ready(ifq)) {
295                         ifq->altq_started = 0;
296                         ALTQ_UNLOCK(ifq);
297                         return 0;
298                 }
299                 ALTQ_UNLOCK(ifq);
300         }
301         return 1;
302 }
303
304 static void
305 if_start_dispatch(netmsg_t msg)
306 {
307         struct lwkt_msg *lmsg = &msg->base.lmsg;
308         struct ifnet *ifp = lmsg->u.ms_resultp;
309         struct ifaltq *ifq = &ifp->if_snd;
310         int running = 0;
311
312         crit_enter();
313         lwkt_replymsg(lmsg, 0); /* reply ASAP */
314         crit_exit();
315
316         if (mycpuid != ifp->if_start_cpuid(ifp)) {
317                 /*
318                  * If the ifnet is still up, we need to
319                  * chase its CPU change.
320                  */
321                 if (ifp->if_flags & IFF_UP) {
322                         logifstart(chase_sched, ifp);
323                         if_start_schedule(ifp);
324                         return;
325                 } else {
326                         goto check;
327                 }
328         }
329
330         if (ifp->if_flags & IFF_UP) {
331                 ifnet_serialize_tx(ifp); /* XXX try? */
332                 if ((ifp->if_flags & IFF_OACTIVE) == 0) {
333                         logifstart(run, ifp);
334                         ifp->if_start(ifp);
335                         if ((ifp->if_flags &
336                         (IFF_OACTIVE | IFF_RUNNING)) == IFF_RUNNING)
337                                 running = 1;
338                 }
339                 ifnet_deserialize_tx(ifp);
340         }
341 check:
342         if (if_start_need_schedule(ifq, running)) {
343                 crit_enter();
344                 if (lmsg->ms_flags & MSGF_DONE) { /* XXX necessary? */
345                         logifstart(sched, ifp);
346                         lwkt_sendmsg(netisr_portfn(mycpuid), lmsg);
347                 }
348                 crit_exit();
349         }
350 }
351
352 /* Device driver ifnet.if_start helper function */
353 void
354 if_devstart(struct ifnet *ifp)
355 {
356         struct ifaltq *ifq = &ifp->if_snd;
357         int running = 0;
358
359         ASSERT_IFNET_SERIALIZED_TX(ifp);
360
361         ALTQ_LOCK(ifq);
362         if (ifq->altq_started || !ifq_data_ready(ifq)) {
363                 logifstart(avoid, ifp);
364                 ALTQ_UNLOCK(ifq);
365                 return;
366         }
367         ifq->altq_started = 1;
368         ALTQ_UNLOCK(ifq);
369
370         logifstart(run, ifp);
371         ifp->if_start(ifp);
372
373         if ((ifp->if_flags & (IFF_OACTIVE | IFF_RUNNING)) == IFF_RUNNING)
374                 running = 1;
375
376         if (if_start_need_schedule(ifq, running)) {
377                 /*
378                  * More data need to be transmitted, ifnet.if_start is
379                  * scheduled on ifnet's CPU, and we keep going.
380                  * NOTE: ifnet.if_start interlock is not released.
381                  */
382                 logifstart(sched, ifp);
383                 if_start_schedule(ifp);
384         }
385 }
386
387 static void
388 if_default_serialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
389 {
390         lwkt_serialize_enter(ifp->if_serializer);
391 }
392
393 static void
394 if_default_deserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
395 {
396         lwkt_serialize_exit(ifp->if_serializer);
397 }
398
399 static int
400 if_default_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
401 {
402         return lwkt_serialize_try(ifp->if_serializer);
403 }
404
405 #ifdef INVARIANTS
406 static void
407 if_default_serialize_assert(struct ifnet *ifp,
408                             enum ifnet_serialize slz __unused,
409                             boolean_t serialized)
410 {
411         if (serialized)
412                 ASSERT_SERIALIZED(ifp->if_serializer);
413         else
414                 ASSERT_NOT_SERIALIZED(ifp->if_serializer);
415 }
416 #endif
417
418 /*
419  * Attach an interface to the list of "active" interfaces.
420  *
421  * The serializer is optional.  If non-NULL access to the interface
422  * may be MPSAFE.
423  */
424 void
425 if_attach(struct ifnet *ifp, lwkt_serialize_t serializer)
426 {
427         unsigned socksize, ifasize;
428         int namelen, masklen;
429         struct sockaddr_dl *sdl;
430         struct ifaddr *ifa;
431         struct ifaltq *ifq;
432         int i;
433
434         static int if_indexlim = 8;
435
436         if (ifp->if_serialize != NULL) {
437                 KASSERT(ifp->if_deserialize != NULL &&
438                         ifp->if_tryserialize != NULL &&
439                         ifp->if_serialize_assert != NULL,
440                         ("serialize functions are partially setup"));
441
442                 /*
443                  * If the device supplies serialize functions,
444                  * then clear if_serializer to catch any invalid
445                  * usage of this field.
446                  */
447                 KASSERT(serializer == NULL,
448                         ("both serialize functions and default serializer "
449                          "are supplied"));
450                 ifp->if_serializer = NULL;
451         } else {
452                 KASSERT(ifp->if_deserialize == NULL &&
453                         ifp->if_tryserialize == NULL &&
454                         ifp->if_serialize_assert == NULL,
455                         ("serialize functions are partially setup"));
456                 ifp->if_serialize = if_default_serialize;
457                 ifp->if_deserialize = if_default_deserialize;
458                 ifp->if_tryserialize = if_default_tryserialize;
459 #ifdef INVARIANTS
460                 ifp->if_serialize_assert = if_default_serialize_assert;
461 #endif
462
463                 /*
464                  * The serializer can be passed in from the device,
465                  * allowing the same serializer to be used for both
466                  * the interrupt interlock and the device queue.
467                  * If not specified, the netif structure will use an
468                  * embedded serializer.
469                  */
470                 if (serializer == NULL) {
471                         serializer = &ifp->if_default_serializer;
472                         lwkt_serialize_init(serializer);
473                 }
474                 ifp->if_serializer = serializer;
475         }
476
477         ifp->if_start_cpuid = if_start_cpuid;
478         ifp->if_cpuid = 0;
479
480 #ifdef DEVICE_POLLING
481         /* Device is not in polling mode by default */
482         ifp->if_poll_cpuid = -1;
483         if (ifp->if_poll != NULL)
484                 ifp->if_start_cpuid = if_start_cpuid_poll;
485 #endif
486 #ifdef IFPOLL_ENABLE
487         /* Device is not in polling mode by default */
488         ifp->if_npoll_cpuid = -1;
489         if (ifp->if_npoll != NULL)
490                 ifp->if_start_cpuid = if_start_cpuid_npoll;
491 #endif
492
493         ifp->if_start_nmsg = kmalloc(ncpus * sizeof(*ifp->if_start_nmsg),
494                                      M_LWKTMSG, M_WAITOK);
495         for (i = 0; i < ncpus; ++i) {
496                 netmsg_init(&ifp->if_start_nmsg[i], NULL, &netisr_adone_rport,
497                             0, if_start_dispatch);
498                 ifp->if_start_nmsg[i].lmsg.u.ms_resultp = ifp;
499         }
500
501         mtx_init(&ifp->if_ioctl_mtx);
502         mtx_lock(&ifp->if_ioctl_mtx);
503
504         TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
505         ifp->if_index = ++if_index;
506
507         /*
508          * XXX -
509          * The old code would work if the interface passed a pre-existing
510          * chain of ifaddrs to this code.  We don't trust our callers to
511          * properly initialize the tailq, however, so we no longer allow
512          * this unlikely case.
513          */
514         ifp->if_addrheads = kmalloc(ncpus * sizeof(struct ifaddrhead),
515                                     M_IFADDR, M_WAITOK | M_ZERO);
516         for (i = 0; i < ncpus; ++i)
517                 TAILQ_INIT(&ifp->if_addrheads[i]);
518
519         TAILQ_INIT(&ifp->if_prefixhead);
520         TAILQ_INIT(&ifp->if_multiaddrs);
521         TAILQ_INIT(&ifp->if_groups);
522         getmicrotime(&ifp->if_lastchange);
523         if (ifindex2ifnet == NULL || if_index >= if_indexlim) {
524                 unsigned int n;
525                 struct ifnet **q;
526
527                 if_indexlim <<= 1;
528
529                 /* grow ifindex2ifnet */
530                 n = if_indexlim * sizeof(*q);
531                 q = kmalloc(n, M_IFADDR, M_WAITOK | M_ZERO);
532                 if (ifindex2ifnet) {
533                         bcopy(ifindex2ifnet, q, n/2);
534                         kfree(ifindex2ifnet, M_IFADDR);
535                 }
536                 ifindex2ifnet = q;
537         }
538
539         ifindex2ifnet[if_index] = ifp;
540
541         /*
542          * create a Link Level name for this device
543          */
544         namelen = strlen(ifp->if_xname);
545         masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
546         socksize = masklen + ifp->if_addrlen;
547 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
548         if (socksize < sizeof(*sdl))
549                 socksize = sizeof(*sdl);
550         socksize = ROUNDUP(socksize);
551 #undef ROUNDUP
552         ifasize = sizeof(struct ifaddr) + 2 * socksize;
553         ifa = ifa_create(ifasize, M_WAITOK);
554         sdl = (struct sockaddr_dl *)(ifa + 1);
555         sdl->sdl_len = socksize;
556         sdl->sdl_family = AF_LINK;
557         bcopy(ifp->if_xname, sdl->sdl_data, namelen);
558         sdl->sdl_nlen = namelen;
559         sdl->sdl_index = ifp->if_index;
560         sdl->sdl_type = ifp->if_type;
561         ifp->if_lladdr = ifa;
562         ifa->ifa_ifp = ifp;
563         ifa->ifa_rtrequest = link_rtrequest;
564         ifa->ifa_addr = (struct sockaddr *)sdl;
565         sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
566         ifa->ifa_netmask = (struct sockaddr *)sdl;
567         sdl->sdl_len = masklen;
568         while (namelen != 0)
569                 sdl->sdl_data[--namelen] = 0xff;
570         ifa_iflink(ifa, ifp, 0 /* Insert head */);
571
572         EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
573         devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
574
575         ifq = &ifp->if_snd;
576         ifq->altq_type = 0;
577         ifq->altq_disc = NULL;
578         ifq->altq_flags &= ALTQF_CANTCHANGE;
579         ifq->altq_tbr = NULL;
580         ifq->altq_ifp = ifp;
581         ifq->altq_started = 0;
582         ifq->altq_prepended = NULL;
583         ALTQ_LOCK_INIT(ifq);
584         ifq_set_classic(ifq);
585
586         if (!SLIST_EMPTY(&domains))
587                 if_attachdomain1(ifp);
588
589         /* Announce the interface. */
590         rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
591
592         mtx_unlock(&ifp->if_ioctl_mtx);
593 }
594
595 static void
596 if_attachdomain(void *dummy)
597 {
598         struct ifnet *ifp;
599
600         crit_enter();
601         TAILQ_FOREACH(ifp, &ifnet, if_list)
602                 if_attachdomain1(ifp);
603         crit_exit();
604 }
605 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
606         if_attachdomain, NULL);
607
608 static void
609 if_attachdomain1(struct ifnet *ifp)
610 {
611         struct domain *dp;
612
613         crit_enter();
614
615         /* address family dependent data region */
616         bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
617         SLIST_FOREACH(dp, &domains, dom_next)
618                 if (dp->dom_ifattach)
619                         ifp->if_afdata[dp->dom_family] =
620                                 (*dp->dom_ifattach)(ifp);
621         crit_exit();
622 }
623
624 /*
625  * Purge all addresses whose type is _not_ AF_LINK
626  */
627 void
628 if_purgeaddrs_nolink(struct ifnet *ifp)
629 {
630         struct ifaddr_container *ifac, *next;
631
632         TAILQ_FOREACH_MUTABLE(ifac, &ifp->if_addrheads[mycpuid],
633                               ifa_link, next) {
634                 struct ifaddr *ifa = ifac->ifa;
635
636                 /* Leave link ifaddr as it is */
637                 if (ifa->ifa_addr->sa_family == AF_LINK)
638                         continue;
639 #ifdef INET
640                 /* XXX: Ugly!! ad hoc just for INET */
641                 if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
642                         struct ifaliasreq ifr;
643 #ifdef IFADDR_DEBUG_VERBOSE
644                         int i;
645
646                         kprintf("purge in4 addr %p: ", ifa);
647                         for (i = 0; i < ncpus; ++i)
648                                 kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt);
649                         kprintf("\n");
650 #endif
651
652                         bzero(&ifr, sizeof ifr);
653                         ifr.ifra_addr = *ifa->ifa_addr;
654                         if (ifa->ifa_dstaddr)
655                                 ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
656                         if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
657                                        NULL) == 0)
658                                 continue;
659                 }
660 #endif /* INET */
661 #ifdef INET6
662                 if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) {
663 #ifdef IFADDR_DEBUG_VERBOSE
664                         int i;
665
666                         kprintf("purge in6 addr %p: ", ifa);
667                         for (i = 0; i < ncpus; ++i)
668                                 kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt);
669                         kprintf("\n");
670 #endif
671
672                         in6_purgeaddr(ifa);
673                         /* ifp_addrhead is already updated */
674                         continue;
675                 }
676 #endif /* INET6 */
677                 ifa_ifunlink(ifa, ifp);
678                 ifa_destroy(ifa);
679         }
680 }
681
682 /*
683  * Detach an interface, removing it from the
684  * list of "active" interfaces.
685  */
686 void
687 if_detach(struct ifnet *ifp)
688 {
689         struct radix_node_head  *rnh;
690         int i;
691         int cpu, origcpu;
692         struct domain *dp;
693
694         EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
695
696         /*
697          * Remove routes and flush queues.
698          */
699         crit_enter();
700 #ifdef DEVICE_POLLING
701         if (ifp->if_flags & IFF_POLLING)
702                 ether_poll_deregister(ifp);
703 #endif
704 #ifdef IFPOLL_ENABLE
705         if (ifp->if_flags & IFF_NPOLLING)
706                 ifpoll_deregister(ifp);
707 #endif
708         if_down(ifp);
709
710 #ifdef ALTQ
711         if (ifq_is_enabled(&ifp->if_snd))
712                 altq_disable(&ifp->if_snd);
713         if (ifq_is_attached(&ifp->if_snd))
714                 altq_detach(&ifp->if_snd);
715 #endif
716
717         /*
718          * Clean up all addresses.
719          */
720         ifp->if_lladdr = NULL;
721
722         if_purgeaddrs_nolink(ifp);
723         if (!TAILQ_EMPTY(&ifp->if_addrheads[mycpuid])) {
724                 struct ifaddr *ifa;
725
726                 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
727                 KASSERT(ifa->ifa_addr->sa_family == AF_LINK,
728                         ("non-link ifaddr is left on if_addrheads"));
729
730                 ifa_ifunlink(ifa, ifp);
731                 ifa_destroy(ifa);
732                 KASSERT(TAILQ_EMPTY(&ifp->if_addrheads[mycpuid]),
733                         ("there are still ifaddrs left on if_addrheads"));
734         }
735
736 #ifdef INET
737         /*
738          * Remove all IPv4 kernel structures related to ifp.
739          */
740         in_ifdetach(ifp);
741 #endif
742
743 #ifdef INET6
744         /*
745          * Remove all IPv6 kernel structs related to ifp.  This should be done
746          * before removing routing entries below, since IPv6 interface direct
747          * routes are expected to be removed by the IPv6-specific kernel API.
748          * Otherwise, the kernel will detect some inconsistency and bark it.
749          */
750         in6_ifdetach(ifp);
751 #endif
752
753         /*
754          * Delete all remaining routes using this interface
755          * Unfortuneatly the only way to do this is to slog through
756          * the entire routing table looking for routes which point
757          * to this interface...oh well...
758          */
759         origcpu = mycpuid;
760         for (cpu = 0; cpu < ncpus; cpu++) {
761                 lwkt_migratecpu(cpu);
762                 for (i = 1; i <= AF_MAX; i++) {
763                         if ((rnh = rt_tables[cpu][i]) == NULL)
764                                 continue;
765                         rnh->rnh_walktree(rnh, if_rtdel, ifp);
766                 }
767         }
768         lwkt_migratecpu(origcpu);
769
770         /* Announce that the interface is gone. */
771         rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
772         devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
773
774         SLIST_FOREACH(dp, &domains, dom_next)
775                 if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
776                         (*dp->dom_ifdetach)(ifp,
777                                 ifp->if_afdata[dp->dom_family]);
778
779         /*
780          * Remove interface from ifindex2ifp[] and maybe decrement if_index.
781          */
782         ifindex2ifnet[ifp->if_index] = NULL;
783         while (if_index > 0 && ifindex2ifnet[if_index] == NULL)
784                 if_index--;
785
786         TAILQ_REMOVE(&ifnet, ifp, if_link);
787         kfree(ifp->if_addrheads, M_IFADDR);
788         kfree(ifp->if_start_nmsg, M_LWKTMSG);
789         crit_exit();
790 }
791
792 /*
793  * Create interface group without members
794  */
795 struct ifg_group *
796 if_creategroup(const char *groupname)
797 {
798         struct ifg_group        *ifg = NULL;
799
800         if ((ifg = (struct ifg_group *)kmalloc(sizeof(struct ifg_group),
801             M_TEMP, M_NOWAIT)) == NULL)
802                 return (NULL);
803
804         strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
805         ifg->ifg_refcnt = 0;
806         ifg->ifg_carp_demoted = 0;
807         TAILQ_INIT(&ifg->ifg_members);
808 #if NPF > 0
809         pfi_attach_ifgroup(ifg);
810 #endif
811         TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
812
813         return (ifg);
814 }
815
816 /*
817  * Add a group to an interface
818  */
819 int
820 if_addgroup(struct ifnet *ifp, const char *groupname)
821 {
822         struct ifg_list         *ifgl;
823         struct ifg_group        *ifg = NULL;
824         struct ifg_member       *ifgm;
825
826         if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
827             groupname[strlen(groupname) - 1] <= '9')
828                 return (EINVAL);
829
830         TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
831                 if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
832                         return (EEXIST);
833
834         if ((ifgl = kmalloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
835                 return (ENOMEM);
836
837         if ((ifgm = kmalloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
838                 kfree(ifgl, M_TEMP);
839                 return (ENOMEM);
840         }
841
842         TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
843                 if (!strcmp(ifg->ifg_group, groupname))
844                         break;
845
846         if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) {
847                 kfree(ifgl, M_TEMP);
848                 kfree(ifgm, M_TEMP);
849                 return (ENOMEM);
850         }
851
852         ifg->ifg_refcnt++;
853         ifgl->ifgl_group = ifg;
854         ifgm->ifgm_ifp = ifp;
855
856         TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
857         TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
858
859 #if NPF > 0
860         pfi_group_change(groupname);
861 #endif
862
863         return (0);
864 }
865
866 /*
867  * Remove a group from an interface
868  */
869 int
870 if_delgroup(struct ifnet *ifp, const char *groupname)
871 {
872         struct ifg_list         *ifgl;
873         struct ifg_member       *ifgm;
874
875         TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
876                 if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
877                         break;
878         if (ifgl == NULL)
879                 return (ENOENT);
880
881         TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
882
883         TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
884                 if (ifgm->ifgm_ifp == ifp)
885                         break;
886
887         if (ifgm != NULL) {
888                 TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
889                 kfree(ifgm, M_TEMP);
890         }
891
892         if (--ifgl->ifgl_group->ifg_refcnt == 0) {
893                 TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
894 #if NPF > 0
895                 pfi_detach_ifgroup(ifgl->ifgl_group);
896 #endif
897                 kfree(ifgl->ifgl_group, M_TEMP);
898         }
899
900         kfree(ifgl, M_TEMP);
901
902 #if NPF > 0
903         pfi_group_change(groupname);
904 #endif
905
906         return (0);
907 }
908
909 /*
910  * Stores all groups from an interface in memory pointed
911  * to by data
912  */
913 int
914 if_getgroup(caddr_t data, struct ifnet *ifp)
915 {
916         int                      len, error;
917         struct ifg_list         *ifgl;
918         struct ifg_req           ifgrq, *ifgp;
919         struct ifgroupreq       *ifgr = (struct ifgroupreq *)data;
920
921         if (ifgr->ifgr_len == 0) {
922                 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
923                         ifgr->ifgr_len += sizeof(struct ifg_req);
924                 return (0);
925         }
926
927         len = ifgr->ifgr_len;
928         ifgp = ifgr->ifgr_groups;
929         TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
930                 if (len < sizeof(ifgrq))
931                         return (EINVAL);
932                 bzero(&ifgrq, sizeof ifgrq);
933                 strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
934                     sizeof(ifgrq.ifgrq_group));
935                 if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
936                     sizeof(struct ifg_req))))
937                         return (error);
938                 len -= sizeof(ifgrq);
939                 ifgp++;
940         }
941
942         return (0);
943 }
944
945 /*
946  * Stores all members of a group in memory pointed to by data
947  */
948 int
949 if_getgroupmembers(caddr_t data)
950 {
951         struct ifgroupreq       *ifgr = (struct ifgroupreq *)data;
952         struct ifg_group        *ifg;
953         struct ifg_member       *ifgm;
954         struct ifg_req           ifgrq, *ifgp;
955         int                      len, error;
956
957         TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
958                 if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
959                         break;
960         if (ifg == NULL)
961                 return (ENOENT);
962
963         if (ifgr->ifgr_len == 0) {
964                 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
965                         ifgr->ifgr_len += sizeof(ifgrq);
966                 return (0);
967         }
968
969         len = ifgr->ifgr_len;
970         ifgp = ifgr->ifgr_groups;
971         TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
972                 if (len < sizeof(ifgrq))
973                         return (EINVAL);
974                 bzero(&ifgrq, sizeof ifgrq);
975                 strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
976                     sizeof(ifgrq.ifgrq_member));
977                 if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
978                     sizeof(struct ifg_req))))
979                         return (error);
980                 len -= sizeof(ifgrq);
981                 ifgp++;
982         }
983
984         return (0);
985 }
986
987 /*
988  * Delete Routes for a Network Interface
989  *
990  * Called for each routing entry via the rnh->rnh_walktree() call above
991  * to delete all route entries referencing a detaching network interface.
992  *
993  * Arguments:
994  *      rn      pointer to node in the routing table
995  *      arg     argument passed to rnh->rnh_walktree() - detaching interface
996  *
997  * Returns:
998  *      0       successful
999  *      errno   failed - reason indicated
1000  *
1001  */
1002 static int
1003 if_rtdel(struct radix_node *rn, void *arg)
1004 {
1005         struct rtentry  *rt = (struct rtentry *)rn;
1006         struct ifnet    *ifp = arg;
1007         int             err;
1008
1009         if (rt->rt_ifp == ifp) {
1010
1011                 /*
1012                  * Protect (sorta) against walktree recursion problems
1013                  * with cloned routes
1014                  */
1015                 if (!(rt->rt_flags & RTF_UP))
1016                         return (0);
1017
1018                 err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
1019                                 rt_mask(rt), rt->rt_flags,
1020                                 NULL);
1021                 if (err) {
1022                         log(LOG_WARNING, "if_rtdel: error %d\n", err);
1023                 }
1024         }
1025
1026         return (0);
1027 }
1028
1029 /*
1030  * Locate an interface based on a complete address.
1031  */
1032 struct ifaddr *
1033 ifa_ifwithaddr(struct sockaddr *addr)
1034 {
1035         struct ifnet *ifp;
1036
1037         TAILQ_FOREACH(ifp, &ifnet, if_link) {
1038                 struct ifaddr_container *ifac;
1039
1040                 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1041                         struct ifaddr *ifa = ifac->ifa;
1042
1043                         if (ifa->ifa_addr->sa_family != addr->sa_family)
1044                                 continue;
1045                         if (sa_equal(addr, ifa->ifa_addr))
1046                                 return (ifa);
1047                         if ((ifp->if_flags & IFF_BROADCAST) &&
1048                             ifa->ifa_broadaddr &&
1049                             /* IPv6 doesn't have broadcast */
1050                             ifa->ifa_broadaddr->sa_len != 0 &&
1051                             sa_equal(ifa->ifa_broadaddr, addr))
1052                                 return (ifa);
1053                 }
1054         }
1055         return (NULL);
1056 }
1057 /*
1058  * Locate the point to point interface with a given destination address.
1059  */
1060 struct ifaddr *
1061 ifa_ifwithdstaddr(struct sockaddr *addr)
1062 {
1063         struct ifnet *ifp;
1064
1065         TAILQ_FOREACH(ifp, &ifnet, if_link) {
1066                 struct ifaddr_container *ifac;
1067
1068                 if (!(ifp->if_flags & IFF_POINTOPOINT))
1069                         continue;
1070
1071                 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1072                         struct ifaddr *ifa = ifac->ifa;
1073
1074                         if (ifa->ifa_addr->sa_family != addr->sa_family)
1075                                 continue;
1076                         if (ifa->ifa_dstaddr &&
1077                             sa_equal(addr, ifa->ifa_dstaddr))
1078                                 return (ifa);
1079                 }
1080         }
1081         return (NULL);
1082 }
1083
1084 /*
1085  * Find an interface on a specific network.  If many, choice
1086  * is most specific found.
1087  */
1088 struct ifaddr *
1089 ifa_ifwithnet(struct sockaddr *addr)
1090 {
1091         struct ifnet *ifp;
1092         struct ifaddr *ifa_maybe = NULL;
1093         u_int af = addr->sa_family;
1094         char *addr_data = addr->sa_data, *cplim;
1095
1096         /*
1097          * AF_LINK addresses can be looked up directly by their index number,
1098          * so do that if we can.
1099          */
1100         if (af == AF_LINK) {
1101                 struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
1102
1103                 if (sdl->sdl_index && sdl->sdl_index <= if_index)
1104                         return (ifindex2ifnet[sdl->sdl_index]->if_lladdr);
1105         }
1106
1107         /*
1108          * Scan though each interface, looking for ones that have
1109          * addresses in this address family.
1110          */
1111         TAILQ_FOREACH(ifp, &ifnet, if_link) {
1112                 struct ifaddr_container *ifac;
1113
1114                 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1115                         struct ifaddr *ifa = ifac->ifa;
1116                         char *cp, *cp2, *cp3;
1117
1118                         if (ifa->ifa_addr->sa_family != af)
1119 next:                           continue;
1120                         if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
1121                                 /*
1122                                  * This is a bit broken as it doesn't
1123                                  * take into account that the remote end may
1124                                  * be a single node in the network we are
1125                                  * looking for.
1126                                  * The trouble is that we don't know the
1127                                  * netmask for the remote end.
1128                                  */
1129                                 if (ifa->ifa_dstaddr != NULL &&
1130                                     sa_equal(addr, ifa->ifa_dstaddr))
1131                                         return (ifa);
1132                         } else {
1133                                 /*
1134                                  * if we have a special address handler,
1135                                  * then use it instead of the generic one.
1136                                  */
1137                                 if (ifa->ifa_claim_addr) {
1138                                         if ((*ifa->ifa_claim_addr)(ifa, addr)) {
1139                                                 return (ifa);
1140                                         } else {
1141                                                 continue;
1142                                         }
1143                                 }
1144
1145                                 /*
1146                                  * Scan all the bits in the ifa's address.
1147                                  * If a bit dissagrees with what we are
1148                                  * looking for, mask it with the netmask
1149                                  * to see if it really matters.
1150                                  * (A byte at a time)
1151                                  */
1152                                 if (ifa->ifa_netmask == 0)
1153                                         continue;
1154                                 cp = addr_data;
1155                                 cp2 = ifa->ifa_addr->sa_data;
1156                                 cp3 = ifa->ifa_netmask->sa_data;
1157                                 cplim = ifa->ifa_netmask->sa_len +
1158                                         (char *)ifa->ifa_netmask;
1159                                 while (cp3 < cplim)
1160                                         if ((*cp++ ^ *cp2++) & *cp3++)
1161                                                 goto next; /* next address! */
1162                                 /*
1163                                  * If the netmask of what we just found
1164                                  * is more specific than what we had before
1165                                  * (if we had one) then remember the new one
1166                                  * before continuing to search
1167                                  * for an even better one.
1168                                  */
1169                                 if (ifa_maybe == NULL ||
1170                                     rn_refines((char *)ifa->ifa_netmask,
1171                                                (char *)ifa_maybe->ifa_netmask))
1172                                         ifa_maybe = ifa;
1173                         }
1174                 }
1175         }
1176         return (ifa_maybe);
1177 }
1178
1179 /*
1180  * Find an interface address specific to an interface best matching
1181  * a given address.
1182  */
1183 struct ifaddr *
1184 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1185 {
1186         struct ifaddr_container *ifac;
1187         char *cp, *cp2, *cp3;
1188         char *cplim;
1189         struct ifaddr *ifa_maybe = NULL;
1190         u_int af = addr->sa_family;
1191
1192         if (af >= AF_MAX)
1193                 return (0);
1194         TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1195                 struct ifaddr *ifa = ifac->ifa;
1196
1197                 if (ifa->ifa_addr->sa_family != af)
1198                         continue;
1199                 if (ifa_maybe == NULL)
1200                         ifa_maybe = ifa;
1201                 if (ifa->ifa_netmask == NULL) {
1202                         if (sa_equal(addr, ifa->ifa_addr) ||
1203                             (ifa->ifa_dstaddr != NULL &&
1204                              sa_equal(addr, ifa->ifa_dstaddr)))
1205                                 return (ifa);
1206                         continue;
1207                 }
1208                 if (ifp->if_flags & IFF_POINTOPOINT) {
1209                         if (sa_equal(addr, ifa->ifa_dstaddr))
1210                                 return (ifa);
1211                 } else {
1212                         cp = addr->sa_data;
1213                         cp2 = ifa->ifa_addr->sa_data;
1214                         cp3 = ifa->ifa_netmask->sa_data;
1215                         cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1216                         for (; cp3 < cplim; cp3++)
1217                                 if ((*cp++ ^ *cp2++) & *cp3)
1218                                         break;
1219                         if (cp3 == cplim)
1220                                 return (ifa);
1221                 }
1222         }
1223         return (ifa_maybe);
1224 }
1225
1226 /*
1227  * Default action when installing a route with a Link Level gateway.
1228  * Lookup an appropriate real ifa to point to.
1229  * This should be moved to /sys/net/link.c eventually.
1230  */
1231 static void
1232 link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
1233 {
1234         struct ifaddr *ifa;
1235         struct sockaddr *dst;
1236         struct ifnet *ifp;
1237
1238         if (cmd != RTM_ADD || (ifa = rt->rt_ifa) == NULL ||
1239             (ifp = ifa->ifa_ifp) == NULL || (dst = rt_key(rt)) == NULL)
1240                 return;
1241         ifa = ifaof_ifpforaddr(dst, ifp);
1242         if (ifa != NULL) {
1243                 IFAFREE(rt->rt_ifa);
1244                 IFAREF(ifa);
1245                 rt->rt_ifa = ifa;
1246                 if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
1247                         ifa->ifa_rtrequest(cmd, rt, info);
1248         }
1249 }
1250
1251 /*
1252  * Mark an interface down and notify protocols of
1253  * the transition.
1254  * NOTE: must be called at splnet or eqivalent.
1255  */
1256 void
1257 if_unroute(struct ifnet *ifp, int flag, int fam)
1258 {
1259         struct ifaddr_container *ifac;
1260
1261         ifp->if_flags &= ~flag;
1262         getmicrotime(&ifp->if_lastchange);
1263         TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1264                 struct ifaddr *ifa = ifac->ifa;
1265
1266                 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1267                         kpfctlinput(PRC_IFDOWN, ifa->ifa_addr);
1268         }
1269         ifq_purge(&ifp->if_snd);
1270         rt_ifmsg(ifp);
1271 }
1272
1273 /*
1274  * Mark an interface up and notify protocols of
1275  * the transition.
1276  * NOTE: must be called at splnet or eqivalent.
1277  */
1278 void
1279 if_route(struct ifnet *ifp, int flag, int fam)
1280 {
1281         struct ifaddr_container *ifac;
1282
1283         ifq_purge(&ifp->if_snd);
1284         ifp->if_flags |= flag;
1285         getmicrotime(&ifp->if_lastchange);
1286         TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1287                 struct ifaddr *ifa = ifac->ifa;
1288
1289                 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1290                         kpfctlinput(PRC_IFUP, ifa->ifa_addr);
1291         }
1292         rt_ifmsg(ifp);
1293 #ifdef INET6
1294         in6_if_up(ifp);
1295 #endif
1296 }
1297
1298 /*
1299  * Mark an interface down and notify protocols of the transition.  An
1300  * interface going down is also considered to be a synchronizing event.
1301  * We must ensure that all packet processing related to the interface
1302  * has completed before we return so e.g. the caller can free the ifnet
1303  * structure that the mbufs may be referencing.
1304  *
1305  * NOTE: must be called at splnet or eqivalent.
1306  */
1307 void
1308 if_down(struct ifnet *ifp)
1309 {
1310         if_unroute(ifp, IFF_UP, AF_UNSPEC);
1311         netmsg_service_sync();
1312 }
1313
1314 /*
1315  * Mark an interface up and notify protocols of
1316  * the transition.
1317  * NOTE: must be called at splnet or eqivalent.
1318  */
1319 void
1320 if_up(struct ifnet *ifp)
1321 {
1322         if_route(ifp, IFF_UP, AF_UNSPEC);
1323 }
1324
1325 /*
1326  * Process a link state change.
1327  * NOTE: must be called at splsoftnet or equivalent.
1328  */
1329 void
1330 if_link_state_change(struct ifnet *ifp)
1331 {
1332         int link_state = ifp->if_link_state;
1333
1334         rt_ifmsg(ifp);
1335         devctl_notify("IFNET", ifp->if_xname,
1336             (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL);
1337 }
1338
1339 /*
1340  * Handle interface watchdog timer routines.  Called
1341  * from softclock, we decrement timers (if set) and
1342  * call the appropriate interface routine on expiration.
1343  */
1344 static void
1345 if_slowtimo(void *arg)
1346 {
1347         struct ifnet *ifp;
1348
1349         crit_enter();
1350
1351         TAILQ_FOREACH(ifp, &ifnet, if_link) {
1352                 if (ifp->if_timer == 0 || --ifp->if_timer)
1353                         continue;
1354                 if (ifp->if_watchdog) {
1355                         if (ifnet_tryserialize_all(ifp)) {
1356                                 (*ifp->if_watchdog)(ifp);
1357                                 ifnet_deserialize_all(ifp);
1358                         } else {
1359                                 /* try again next timeout */
1360                                 ++ifp->if_timer;
1361                         }
1362                 }
1363         }
1364
1365         crit_exit();
1366
1367         callout_reset(&if_slowtimo_timer, hz / IFNET_SLOWHZ, if_slowtimo, NULL);
1368 }
1369
1370 /*
1371  * Map interface name to
1372  * interface structure pointer.
1373  */
1374 struct ifnet *
1375 ifunit(const char *name)
1376 {
1377         struct ifnet *ifp;
1378
1379         /*
1380          * Search all the interfaces for this name/number
1381          */
1382
1383         TAILQ_FOREACH(ifp, &ifnet, if_link) {
1384                 if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0)
1385                         break;
1386         }
1387         return (ifp);
1388 }
1389
1390
1391 /*
1392  * Map interface name in a sockaddr_dl to
1393  * interface structure pointer.
1394  */
1395 struct ifnet *
1396 if_withname(struct sockaddr *sa)
1397 {
1398         char ifname[IFNAMSIZ+1];
1399         struct sockaddr_dl *sdl = (struct sockaddr_dl *)sa;
1400
1401         if ( (sa->sa_family != AF_LINK) || (sdl->sdl_nlen == 0) ||
1402              (sdl->sdl_nlen > IFNAMSIZ) )
1403                 return NULL;
1404
1405         /*
1406          * ifunit wants a null-terminated name.  It may not be null-terminated
1407          * in the sockaddr.  We don't want to change the caller's sockaddr,
1408          * and there might not be room to put the trailing null anyway, so we
1409          * make a local copy that we know we can null terminate safely.
1410          */
1411
1412         bcopy(sdl->sdl_data, ifname, sdl->sdl_nlen);
1413         ifname[sdl->sdl_nlen] = '\0';
1414         return ifunit(ifname);
1415 }
1416
1417
1418 /*
1419  * Interface ioctls.
1420  */
1421 int
1422 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct ucred *cred)
1423 {
1424         struct ifnet *ifp;
1425         struct ifreq *ifr;
1426         struct ifstat *ifs;
1427         int error;
1428         short oif_flags;
1429         int new_flags;
1430 #ifdef COMPAT_43
1431         int ocmd;
1432 #endif
1433         size_t namelen, onamelen;
1434         char new_name[IFNAMSIZ];
1435         struct ifaddr *ifa;
1436         struct sockaddr_dl *sdl;
1437
1438         switch (cmd) {
1439         case SIOCGIFCONF:
1440         case OSIOCGIFCONF:
1441                 return (ifconf(cmd, data, cred));
1442         default:
1443                 break;
1444         }
1445
1446         ifr = (struct ifreq *)data;
1447
1448         switch (cmd) {
1449         case SIOCIFCREATE:
1450         case SIOCIFCREATE2:
1451                 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0)
1452                         return (error);
1453                 return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name),
1454                         cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL));
1455         case SIOCIFDESTROY:
1456                 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0)
1457                         return (error);
1458                 return (if_clone_destroy(ifr->ifr_name));
1459         case SIOCIFGCLONERS:
1460                 return (if_clone_list((struct if_clonereq *)data));
1461         default:
1462                 break;
1463         }
1464
1465         /*
1466          * Nominal ioctl through interface, lookup the ifp and obtain a
1467          * lock to serialize the ifconfig ioctl operation.
1468          */
1469         ifp = ifunit(ifr->ifr_name);
1470         if (ifp == NULL)
1471                 return (ENXIO);
1472         error = 0;
1473         mtx_lock(&ifp->if_ioctl_mtx);
1474
1475         switch (cmd) {
1476         case SIOCGIFINDEX:
1477                 ifr->ifr_index = ifp->if_index;
1478                 break;
1479
1480         case SIOCGIFFLAGS:
1481                 ifr->ifr_flags = ifp->if_flags;
1482                 ifr->ifr_flagshigh = ifp->if_flags >> 16;
1483                 break;
1484
1485         case SIOCGIFCAP:
1486                 ifr->ifr_reqcap = ifp->if_capabilities;
1487                 ifr->ifr_curcap = ifp->if_capenable;
1488                 break;
1489
1490         case SIOCGIFMETRIC:
1491                 ifr->ifr_metric = ifp->if_metric;
1492                 break;
1493
1494         case SIOCGIFMTU:
1495                 ifr->ifr_mtu = ifp->if_mtu;
1496                 break;
1497
1498         case SIOCGIFDATA:
1499                 error = copyout((caddr_t)&ifp->if_data, ifr->ifr_data,
1500                                 sizeof(ifp->if_data));
1501                 break;
1502
1503         case SIOCGIFPHYS:
1504                 ifr->ifr_phys = ifp->if_physical;
1505                 break;
1506
1507         case SIOCGIFPOLLCPU:
1508 #ifdef DEVICE_POLLING
1509                 ifr->ifr_pollcpu = ifp->if_poll_cpuid;
1510 #else
1511                 ifr->ifr_pollcpu = -1;
1512 #endif
1513                 break;
1514
1515         case SIOCSIFPOLLCPU:
1516 #ifdef DEVICE_POLLING
1517                 if ((ifp->if_flags & IFF_POLLING) == 0)
1518                         ether_pollcpu_register(ifp, ifr->ifr_pollcpu);
1519 #endif
1520                 break;
1521
1522         case SIOCSIFFLAGS:
1523                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1524                 if (error)
1525                         break;
1526                 new_flags = (ifr->ifr_flags & 0xffff) |
1527                     (ifr->ifr_flagshigh << 16);
1528                 if (ifp->if_flags & IFF_SMART) {
1529                         /* Smart drivers twiddle their own routes */
1530                 } else if (ifp->if_flags & IFF_UP &&
1531                     (new_flags & IFF_UP) == 0) {
1532                         crit_enter();
1533                         if_down(ifp);
1534                         crit_exit();
1535                 } else if (new_flags & IFF_UP &&
1536                     (ifp->if_flags & IFF_UP) == 0) {
1537                         crit_enter();
1538                         if_up(ifp);
1539                         crit_exit();
1540                 }
1541
1542 #ifdef DEVICE_POLLING
1543                 if ((new_flags ^ ifp->if_flags) & IFF_POLLING) {
1544                         if (new_flags & IFF_POLLING) {
1545                                 ether_poll_register(ifp);
1546                         } else {
1547                                 ether_poll_deregister(ifp);
1548                         }
1549                 }
1550 #endif
1551 #ifdef IFPOLL_ENABLE
1552                 if ((new_flags ^ ifp->if_flags) & IFF_NPOLLING) {
1553                         if (new_flags & IFF_NPOLLING)
1554                                 ifpoll_register(ifp);
1555                         else
1556                                 ifpoll_deregister(ifp);
1557                 }
1558 #endif
1559
1560                 ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1561                         (new_flags &~ IFF_CANTCHANGE);
1562                 if (new_flags & IFF_PPROMISC) {
1563                         /* Permanently promiscuous mode requested */
1564                         ifp->if_flags |= IFF_PROMISC;
1565                 } else if (ifp->if_pcount == 0) {
1566                         ifp->if_flags &= ~IFF_PROMISC;
1567                 }
1568                 if (ifp->if_ioctl) {
1569                         ifnet_serialize_all(ifp);
1570                         ifp->if_ioctl(ifp, cmd, data, cred);
1571                         ifnet_deserialize_all(ifp);
1572                 }
1573                 getmicrotime(&ifp->if_lastchange);
1574                 break;
1575
1576         case SIOCSIFCAP:
1577                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1578                 if (error)
1579                         break;
1580                 if (ifr->ifr_reqcap & ~ifp->if_capabilities) {
1581                         error = EINVAL;
1582                         break;
1583                 }
1584                 ifnet_serialize_all(ifp);
1585                 ifp->if_ioctl(ifp, cmd, data, cred);
1586                 ifnet_deserialize_all(ifp);
1587                 break;
1588
1589         case SIOCSIFNAME:
1590                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1591                 if (error)
1592                         break;
1593                 error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
1594                 if (error)
1595                         break;
1596                 if (new_name[0] == '\0') {
1597                         error = EINVAL;
1598                         break;
1599                 }
1600                 if (ifunit(new_name) != NULL) {
1601                         error = EEXIST;
1602                         break;
1603                 }
1604
1605                 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
1606
1607                 /* Announce the departure of the interface. */
1608                 rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1609
1610                 strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
1611                 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
1612                 /* XXX IFA_LOCK(ifa); */
1613                 sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1614                 namelen = strlen(new_name);
1615                 onamelen = sdl->sdl_nlen;
1616                 /*
1617                  * Move the address if needed.  This is safe because we
1618                  * allocate space for a name of length IFNAMSIZ when we
1619                  * create this in if_attach().
1620                  */
1621                 if (namelen != onamelen) {
1622                         bcopy(sdl->sdl_data + onamelen,
1623                             sdl->sdl_data + namelen, sdl->sdl_alen);
1624                 }
1625                 bcopy(new_name, sdl->sdl_data, namelen);
1626                 sdl->sdl_nlen = namelen;
1627                 sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
1628                 bzero(sdl->sdl_data, onamelen);
1629                 while (namelen != 0)
1630                         sdl->sdl_data[--namelen] = 0xff;
1631                 /* XXX IFA_UNLOCK(ifa) */
1632
1633                 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
1634
1635                 /* Announce the return of the interface. */
1636                 rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
1637                 break;
1638
1639         case SIOCSIFMETRIC:
1640                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1641                 if (error)
1642                         break;
1643                 ifp->if_metric = ifr->ifr_metric;
1644                 getmicrotime(&ifp->if_lastchange);
1645                 break;
1646
1647         case SIOCSIFPHYS:
1648                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1649                 if (error)
1650                         break;
1651                 if (ifp->if_ioctl == NULL) {
1652                         error = EOPNOTSUPP;
1653                         break;
1654                 }
1655                 ifnet_serialize_all(ifp);
1656                 error = ifp->if_ioctl(ifp, cmd, data, cred);
1657                 ifnet_deserialize_all(ifp);
1658                 if (error == 0)
1659                         getmicrotime(&ifp->if_lastchange);
1660                 break;
1661
1662         case SIOCSIFMTU:
1663         {
1664                 u_long oldmtu = ifp->if_mtu;
1665
1666                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1667                 if (error)
1668                         break;
1669                 if (ifp->if_ioctl == NULL) {
1670                         error = EOPNOTSUPP;
1671                         break;
1672                 }
1673                 if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) {
1674                         error = EINVAL;
1675                         break;
1676                 }
1677                 ifnet_serialize_all(ifp);
1678                 error = ifp->if_ioctl(ifp, cmd, data, cred);
1679                 ifnet_deserialize_all(ifp);
1680                 if (error == 0) {
1681                         getmicrotime(&ifp->if_lastchange);
1682                         rt_ifmsg(ifp);
1683                 }
1684                 /*
1685                  * If the link MTU changed, do network layer specific procedure.
1686                  */
1687                 if (ifp->if_mtu != oldmtu) {
1688 #ifdef INET6
1689                         nd6_setmtu(ifp);
1690 #endif
1691                 }
1692                 break;
1693         }
1694
1695         case SIOCADDMULTI:
1696         case SIOCDELMULTI:
1697                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1698                 if (error)
1699                         break;
1700
1701                 /* Don't allow group membership on non-multicast interfaces. */
1702                 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
1703                         error = EOPNOTSUPP;
1704                         break;
1705                 }
1706
1707                 /* Don't let users screw up protocols' entries. */
1708                 if (ifr->ifr_addr.sa_family != AF_LINK) {
1709                         error = EINVAL;
1710                         break;
1711                 }
1712
1713                 if (cmd == SIOCADDMULTI) {
1714                         struct ifmultiaddr *ifma;
1715                         error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
1716                 } else {
1717                         error = if_delmulti(ifp, &ifr->ifr_addr);
1718                 }
1719                 if (error == 0)
1720                         getmicrotime(&ifp->if_lastchange);
1721                 break;
1722
1723         case SIOCSIFPHYADDR:
1724         case SIOCDIFPHYADDR:
1725 #ifdef INET6
1726         case SIOCSIFPHYADDR_IN6:
1727 #endif
1728         case SIOCSLIFPHYADDR:
1729         case SIOCSIFMEDIA:
1730         case SIOCSIFGENERIC:
1731                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1732                 if (error)
1733                         break;
1734                 if (ifp->if_ioctl == 0) {
1735                         error = EOPNOTSUPP;
1736                         break;
1737                 }
1738                 ifnet_serialize_all(ifp);
1739                 error = ifp->if_ioctl(ifp, cmd, data, cred);
1740                 ifnet_deserialize_all(ifp);
1741                 if (error == 0)
1742                         getmicrotime(&ifp->if_lastchange);
1743                 break;
1744
1745         case SIOCGIFSTATUS:
1746                 ifs = (struct ifstat *)data;
1747                 ifs->ascii[0] = '\0';
1748                 /* fall through */
1749         case SIOCGIFPSRCADDR:
1750         case SIOCGIFPDSTADDR:
1751         case SIOCGLIFPHYADDR:
1752         case SIOCGIFMEDIA:
1753         case SIOCGIFGENERIC:
1754                 if (ifp->if_ioctl == NULL) {
1755                         error = EOPNOTSUPP;
1756                         break;
1757                 }
1758                 ifnet_serialize_all(ifp);
1759                 error = ifp->if_ioctl(ifp, cmd, data, cred);
1760                 ifnet_deserialize_all(ifp);
1761                 break;
1762
1763         case SIOCSIFLLADDR:
1764                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1765                 if (error)
1766                         break;
1767                 error = if_setlladdr(ifp, ifr->ifr_addr.sa_data,
1768                                      ifr->ifr_addr.sa_len);
1769                 EVENTHANDLER_INVOKE(iflladdr_event, ifp);
1770                 break;
1771
1772         default:
1773                 oif_flags = ifp->if_flags;
1774                 if (so->so_proto == 0) {
1775                         error = EOPNOTSUPP;
1776                         break;
1777                 }
1778 #ifndef COMPAT_43
1779                 error = so_pru_control_direct(so, cmd, data, ifp);
1780 #else
1781                 ocmd = cmd;
1782
1783                 switch (cmd) {
1784                 case SIOCSIFDSTADDR:
1785                 case SIOCSIFADDR:
1786                 case SIOCSIFBRDADDR:
1787                 case SIOCSIFNETMASK:
1788 #if BYTE_ORDER != BIG_ENDIAN
1789                         if (ifr->ifr_addr.sa_family == 0 &&
1790                             ifr->ifr_addr.sa_len < 16) {
1791                                 ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
1792                                 ifr->ifr_addr.sa_len = 16;
1793                         }
1794 #else
1795                         if (ifr->ifr_addr.sa_len == 0)
1796                                 ifr->ifr_addr.sa_len = 16;
1797 #endif
1798                         break;
1799                 case OSIOCGIFADDR:
1800                         cmd = SIOCGIFADDR;
1801                         break;
1802                 case OSIOCGIFDSTADDR:
1803                         cmd = SIOCGIFDSTADDR;
1804                         break;
1805                 case OSIOCGIFBRDADDR:
1806                         cmd = SIOCGIFBRDADDR;
1807                         break;
1808                 case OSIOCGIFNETMASK:
1809                         cmd = SIOCGIFNETMASK;
1810                         break;
1811                 default:
1812                         break;
1813                 }
1814
1815                 error = so_pru_control_direct(so, cmd, data, ifp);
1816
1817                 switch (ocmd) {
1818                 case OSIOCGIFADDR:
1819                 case OSIOCGIFDSTADDR:
1820                 case OSIOCGIFBRDADDR:
1821                 case OSIOCGIFNETMASK:
1822                         *(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
1823                         break;
1824                 }
1825 #endif /* COMPAT_43 */
1826
1827                 if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
1828 #ifdef INET6
1829                         DELAY(100);/* XXX: temporary workaround for fxp issue*/
1830                         if (ifp->if_flags & IFF_UP) {
1831                                 crit_enter();
1832                                 in6_if_up(ifp);
1833                                 crit_exit();
1834                         }
1835 #endif
1836                 }
1837                 break;
1838         }
1839
1840         mtx_unlock(&ifp->if_ioctl_mtx);
1841         return (error);
1842 }
1843
1844 /*
1845  * Set/clear promiscuous mode on interface ifp based on the truth value
1846  * of pswitch.  The calls are reference counted so that only the first
1847  * "on" request actually has an effect, as does the final "off" request.
1848  * Results are undefined if the "off" and "on" requests are not matched.
1849  */
1850 int
1851 ifpromisc(struct ifnet *ifp, int pswitch)
1852 {
1853         struct ifreq ifr;
1854         int error;
1855         int oldflags;
1856
1857         oldflags = ifp->if_flags;
1858         if (ifp->if_flags & IFF_PPROMISC) {
1859                 /* Do nothing if device is in permanently promiscuous mode */
1860                 ifp->if_pcount += pswitch ? 1 : -1;
1861                 return (0);
1862         }
1863         if (pswitch) {
1864                 /*
1865                  * If the device is not configured up, we cannot put it in
1866                  * promiscuous mode.
1867                  */
1868                 if ((ifp->if_flags & IFF_UP) == 0)
1869                         return (ENETDOWN);
1870                 if (ifp->if_pcount++ != 0)
1871                         return (0);
1872                 ifp->if_flags |= IFF_PROMISC;
1873                 log(LOG_INFO, "%s: promiscuous mode enabled\n",
1874                     ifp->if_xname);
1875         } else {
1876                 if (--ifp->if_pcount > 0)
1877                         return (0);
1878                 ifp->if_flags &= ~IFF_PROMISC;
1879                 log(LOG_INFO, "%s: promiscuous mode disabled\n",
1880                     ifp->if_xname);
1881         }
1882         ifr.ifr_flags = ifp->if_flags;
1883         ifr.ifr_flagshigh = ifp->if_flags >> 16;
1884         ifnet_serialize_all(ifp);
1885         error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, NULL);
1886         ifnet_deserialize_all(ifp);
1887         if (error == 0)
1888                 rt_ifmsg(ifp);
1889         else
1890                 ifp->if_flags = oldflags;
1891         return error;
1892 }
1893
1894 /*
1895  * Return interface configuration
1896  * of system.  List may be used
1897  * in later ioctl's (above) to get
1898  * other information.
1899  */
1900 static int
1901 ifconf(u_long cmd, caddr_t data, struct ucred *cred)
1902 {
1903         struct ifconf *ifc = (struct ifconf *)data;
1904         struct ifnet *ifp;
1905         struct sockaddr *sa;
1906         struct ifreq ifr, *ifrp;
1907         int space = ifc->ifc_len, error = 0;
1908
1909         ifrp = ifc->ifc_req;
1910         TAILQ_FOREACH(ifp, &ifnet, if_link) {
1911                 struct ifaddr_container *ifac;
1912                 int addrs;
1913
1914                 if (space <= sizeof ifr)
1915                         break;
1916
1917                 /*
1918                  * Zero the stack declared structure first to prevent
1919                  * memory disclosure.
1920                  */
1921                 bzero(&ifr, sizeof(ifr));
1922                 if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
1923                     >= sizeof(ifr.ifr_name)) {
1924                         error = ENAMETOOLONG;
1925                         break;
1926                 }
1927
1928                 addrs = 0;
1929                 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1930                         struct ifaddr *ifa = ifac->ifa;
1931
1932                         if (space <= sizeof ifr)
1933                                 break;
1934                         sa = ifa->ifa_addr;
1935                         if (cred->cr_prison &&
1936                             prison_if(cred, sa))
1937                                 continue;
1938                         addrs++;
1939 #ifdef COMPAT_43
1940                         if (cmd == OSIOCGIFCONF) {
1941                                 struct osockaddr *osa =
1942                                          (struct osockaddr *)&ifr.ifr_addr;
1943                                 ifr.ifr_addr = *sa;
1944                                 osa->sa_family = sa->sa_family;
1945                                 error = copyout(&ifr, ifrp, sizeof ifr);
1946                                 ifrp++;
1947                         } else
1948 #endif
1949                         if (sa->sa_len <= sizeof(*sa)) {
1950                                 ifr.ifr_addr = *sa;
1951                                 error = copyout(&ifr, ifrp, sizeof ifr);
1952                                 ifrp++;
1953                         } else {
1954                                 if (space < (sizeof ifr) + sa->sa_len -
1955                                             sizeof(*sa))
1956                                         break;
1957                                 space -= sa->sa_len - sizeof(*sa);
1958                                 error = copyout(&ifr, ifrp,
1959                                                 sizeof ifr.ifr_name);
1960                                 if (error == 0)
1961                                         error = copyout(sa, &ifrp->ifr_addr,
1962                                                         sa->sa_len);
1963                                 ifrp = (struct ifreq *)
1964                                         (sa->sa_len + (caddr_t)&ifrp->ifr_addr);
1965                         }
1966                         if (error)
1967                                 break;
1968                         space -= sizeof ifr;
1969                 }
1970                 if (error)
1971                         break;
1972                 if (!addrs) {
1973                         bzero(&ifr.ifr_addr, sizeof ifr.ifr_addr);
1974                         error = copyout(&ifr, ifrp, sizeof ifr);
1975                         if (error)
1976                                 break;
1977                         space -= sizeof ifr;
1978                         ifrp++;
1979                 }
1980         }
1981         ifc->ifc_len -= space;
1982         return (error);
1983 }
1984
1985 /*
1986  * Just like if_promisc(), but for all-multicast-reception mode.
1987  */
1988 int
1989 if_allmulti(struct ifnet *ifp, int onswitch)
1990 {
1991         int error = 0;
1992         struct ifreq ifr;
1993
1994         crit_enter();
1995
1996         if (onswitch) {
1997                 if (ifp->if_amcount++ == 0) {
1998                         ifp->if_flags |= IFF_ALLMULTI;
1999                         ifr.ifr_flags = ifp->if_flags;
2000                         ifr.ifr_flagshigh = ifp->if_flags >> 16;
2001                         ifnet_serialize_all(ifp);
2002                         error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2003                                               NULL);
2004                         ifnet_deserialize_all(ifp);
2005                 }
2006         } else {
2007                 if (ifp->if_amcount > 1) {
2008                         ifp->if_amcount--;
2009                 } else {
2010                         ifp->if_amcount = 0;
2011                         ifp->if_flags &= ~IFF_ALLMULTI;
2012                         ifr.ifr_flags = ifp->if_flags;
2013                         ifr.ifr_flagshigh = ifp->if_flags >> 16;
2014                         ifnet_serialize_all(ifp);
2015                         error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2016                                               NULL);
2017                         ifnet_deserialize_all(ifp);
2018                 }
2019         }
2020
2021         crit_exit();
2022
2023         if (error == 0)
2024                 rt_ifmsg(ifp);
2025         return error;
2026 }
2027
2028 /*
2029  * Add a multicast listenership to the interface in question.
2030  * The link layer provides a routine which converts
2031  */
2032 int
2033 if_addmulti(
2034         struct ifnet *ifp,      /* interface to manipulate */
2035         struct sockaddr *sa,    /* address to add */
2036         struct ifmultiaddr **retifma)
2037 {
2038         struct sockaddr *llsa, *dupsa;
2039         int error;
2040         struct ifmultiaddr *ifma;
2041
2042         /*
2043          * If the matching multicast address already exists
2044          * then don't add a new one, just add a reference
2045          */
2046         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2047                 if (sa_equal(sa, ifma->ifma_addr)) {
2048                         ifma->ifma_refcount++;
2049                         if (retifma)
2050                                 *retifma = ifma;
2051                         return 0;
2052                 }
2053         }
2054
2055         /*
2056          * Give the link layer a chance to accept/reject it, and also
2057          * find out which AF_LINK address this maps to, if it isn't one
2058          * already.
2059          */
2060         if (ifp->if_resolvemulti) {
2061                 ifnet_serialize_all(ifp);
2062                 error = ifp->if_resolvemulti(ifp, &llsa, sa);
2063                 ifnet_deserialize_all(ifp);
2064                 if (error) 
2065                         return error;
2066         } else {
2067                 llsa = NULL;
2068         }
2069
2070         ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_WAITOK);
2071         dupsa = kmalloc(sa->sa_len, M_IFMADDR, M_WAITOK);
2072         bcopy(sa, dupsa, sa->sa_len);
2073
2074         ifma->ifma_addr = dupsa;
2075         ifma->ifma_lladdr = llsa;
2076         ifma->ifma_ifp = ifp;
2077         ifma->ifma_refcount = 1;
2078         ifma->ifma_protospec = 0;
2079         rt_newmaddrmsg(RTM_NEWMADDR, ifma);
2080
2081         /*
2082          * Some network interfaces can scan the address list at
2083          * interrupt time; lock them out.
2084          */
2085         crit_enter();
2086         TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
2087         crit_exit();
2088         if (retifma)
2089                 *retifma = ifma;
2090
2091         if (llsa != NULL) {
2092                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2093                         if (sa_equal(ifma->ifma_addr, llsa))
2094                                 break;
2095                 }
2096                 if (ifma) {
2097                         ifma->ifma_refcount++;
2098                 } else {
2099                         ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_WAITOK);
2100                         dupsa = kmalloc(llsa->sa_len, M_IFMADDR, M_WAITOK);
2101                         bcopy(llsa, dupsa, llsa->sa_len);
2102                         ifma->ifma_addr = dupsa;
2103                         ifma->ifma_ifp = ifp;
2104                         ifma->ifma_refcount = 1;
2105                         crit_enter();
2106                         TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
2107                         crit_exit();
2108                 }
2109         }
2110         /*
2111          * We are certain we have added something, so call down to the
2112          * interface to let them know about it.
2113          */
2114         crit_enter();
2115         ifnet_serialize_all(ifp);
2116         if (ifp->if_ioctl)
2117                 ifp->if_ioctl(ifp, SIOCADDMULTI, 0, NULL);
2118         ifnet_deserialize_all(ifp);
2119         crit_exit();
2120
2121         return 0;
2122 }
2123
2124 /*
2125  * Remove a reference to a multicast address on this interface.  Yell
2126  * if the request does not match an existing membership.
2127  */
2128 int
2129 if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
2130 {
2131         struct ifmultiaddr *ifma;
2132
2133         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
2134                 if (sa_equal(sa, ifma->ifma_addr))
2135                         break;
2136         if (ifma == NULL)
2137                 return ENOENT;
2138
2139         if (ifma->ifma_refcount > 1) {
2140                 ifma->ifma_refcount--;
2141                 return 0;
2142         }
2143
2144         rt_newmaddrmsg(RTM_DELMADDR, ifma);
2145         sa = ifma->ifma_lladdr;
2146         crit_enter();
2147         TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
2148         /*
2149          * Make sure the interface driver is notified
2150          * in the case of a link layer mcast group being left.
2151          */
2152         if (ifma->ifma_addr->sa_family == AF_LINK && sa == NULL) {
2153                 ifnet_serialize_all(ifp);
2154                 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL);
2155                 ifnet_deserialize_all(ifp);
2156         }
2157         crit_exit();
2158         kfree(ifma->ifma_addr, M_IFMADDR);
2159         kfree(ifma, M_IFMADDR);
2160         if (sa == NULL)
2161                 return 0;
2162
2163         /*
2164          * Now look for the link-layer address which corresponds to
2165          * this network address.  It had been squirreled away in
2166          * ifma->ifma_lladdr for this purpose (so we don't have
2167          * to call ifp->if_resolvemulti() again), and we saved that
2168          * value in sa above.  If some nasty deleted the
2169          * link-layer address out from underneath us, we can deal because
2170          * the address we stored was is not the same as the one which was
2171          * in the record for the link-layer address.  (So we don't complain
2172          * in that case.)
2173          */
2174         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
2175                 if (sa_equal(sa, ifma->ifma_addr))
2176                         break;
2177         if (ifma == NULL)
2178                 return 0;
2179
2180         if (ifma->ifma_refcount > 1) {
2181                 ifma->ifma_refcount--;
2182                 return 0;
2183         }
2184
2185         crit_enter();
2186         ifnet_serialize_all(ifp);
2187         TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
2188         ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL);
2189         ifnet_deserialize_all(ifp);
2190         crit_exit();
2191         kfree(ifma->ifma_addr, M_IFMADDR);
2192         kfree(sa, M_IFMADDR);
2193         kfree(ifma, M_IFMADDR);
2194
2195         return 0;
2196 }
2197
2198 /*
2199  * Delete all multicast group membership for an interface.
2200  * Should be used to quickly flush all multicast filters.
2201  */
2202 void
2203 if_delallmulti(struct ifnet *ifp)
2204 {
2205         struct ifmultiaddr *ifma;
2206         struct ifmultiaddr *next;
2207
2208         TAILQ_FOREACH_MUTABLE(ifma, &ifp->if_multiaddrs, ifma_link, next)
2209                 if_delmulti(ifp, ifma->ifma_addr);
2210 }
2211
2212
2213 /*
2214  * Set the link layer address on an interface.
2215  *
2216  * At this time we only support certain types of interfaces,
2217  * and we don't allow the length of the address to change.
2218  */
2219 int
2220 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
2221 {
2222         struct sockaddr_dl *sdl;
2223         struct ifreq ifr;
2224
2225         sdl = IF_LLSOCKADDR(ifp);
2226         if (sdl == NULL)
2227                 return (EINVAL);
2228         if (len != sdl->sdl_alen)       /* don't allow length to change */
2229                 return (EINVAL);
2230         switch (ifp->if_type) {
2231         case IFT_ETHER:                 /* these types use struct arpcom */
2232         case IFT_XETHER:
2233         case IFT_L2VLAN:
2234                 bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len);
2235                 bcopy(lladdr, LLADDR(sdl), len);
2236                 break;
2237         default:
2238                 return (ENODEV);
2239         }
2240         /*
2241          * If the interface is already up, we need
2242          * to re-init it in order to reprogram its
2243          * address filter.
2244          */
2245         ifnet_serialize_all(ifp);
2246         if ((ifp->if_flags & IFF_UP) != 0) {
2247 #ifdef INET
2248                 struct ifaddr_container *ifac;
2249 #endif
2250
2251                 ifp->if_flags &= ~IFF_UP;
2252                 ifr.ifr_flags = ifp->if_flags;
2253                 ifr.ifr_flagshigh = ifp->if_flags >> 16;
2254                 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2255                               NULL);
2256                 ifp->if_flags |= IFF_UP;
2257                 ifr.ifr_flags = ifp->if_flags;
2258                 ifr.ifr_flagshigh = ifp->if_flags >> 16;
2259                 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2260                                  NULL);
2261 #ifdef INET
2262                 /*
2263                  * Also send gratuitous ARPs to notify other nodes about
2264                  * the address change.
2265                  */
2266                 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
2267                         struct ifaddr *ifa = ifac->ifa;
2268
2269                         if (ifa->ifa_addr != NULL &&
2270                             ifa->ifa_addr->sa_family == AF_INET)
2271                                 arp_gratuitous(ifp, ifa);
2272                 }
2273 #endif
2274         }
2275         ifnet_deserialize_all(ifp);
2276         return (0);
2277 }
2278
2279 struct ifmultiaddr *
2280 ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp)
2281 {
2282         struct ifmultiaddr *ifma;
2283
2284         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
2285                 if (sa_equal(ifma->ifma_addr, sa))
2286                         break;
2287
2288         return ifma;
2289 }
2290
2291 /*
2292  * This function locates the first real ethernet MAC from a network
2293  * card and loads it into node, returning 0 on success or ENOENT if
2294  * no suitable interfaces were found.  It is used by the uuid code to
2295  * generate a unique 6-byte number.
2296  */
2297 int
2298 if_getanyethermac(uint16_t *node, int minlen)
2299 {
2300         struct ifnet *ifp;
2301         struct sockaddr_dl *sdl;
2302
2303         TAILQ_FOREACH(ifp, &ifnet, if_link) {
2304                 if (ifp->if_type != IFT_ETHER)
2305                         continue;
2306                 sdl = IF_LLSOCKADDR(ifp);
2307                 if (sdl->sdl_alen < minlen)
2308                         continue;
2309                 bcopy(((struct arpcom *)ifp->if_softc)->ac_enaddr, node,
2310                       minlen);
2311                 return(0);
2312         }
2313         return (ENOENT);
2314 }
2315
2316 /*
2317  * The name argument must be a pointer to storage which will last as
2318  * long as the interface does.  For physical devices, the result of
2319  * device_get_name(dev) is a good choice and for pseudo-devices a
2320  * static string works well.
2321  */
2322 void
2323 if_initname(struct ifnet *ifp, const char *name, int unit)
2324 {
2325         ifp->if_dname = name;
2326         ifp->if_dunit = unit;
2327         if (unit != IF_DUNIT_NONE)
2328                 ksnprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
2329         else
2330                 strlcpy(ifp->if_xname, name, IFNAMSIZ);
2331 }
2332
2333 int
2334 if_printf(struct ifnet *ifp, const char *fmt, ...)
2335 {
2336         __va_list ap;
2337         int retval;
2338
2339         retval = kprintf("%s: ", ifp->if_xname);
2340         __va_start(ap, fmt);
2341         retval += kvprintf(fmt, ap);
2342         __va_end(ap);
2343         return (retval);
2344 }
2345
2346 struct ifnet *
2347 if_alloc(uint8_t type)
2348 {
2349         struct ifnet *ifp;
2350         size_t size;
2351
2352         /*
2353          * XXX temporary hack until arpcom is setup in if_l2com
2354          */
2355         if (type == IFT_ETHER)
2356                 size = sizeof(struct arpcom);
2357         else
2358                 size = sizeof(struct ifnet);
2359
2360         ifp = kmalloc(size, M_IFNET, M_WAITOK|M_ZERO);
2361
2362         ifp->if_type = type;
2363
2364         if (if_com_alloc[type] != NULL) {
2365                 ifp->if_l2com = if_com_alloc[type](type, ifp);
2366                 if (ifp->if_l2com == NULL) {
2367                         kfree(ifp, M_IFNET);
2368                         return (NULL);
2369                 }
2370         }
2371         return (ifp);
2372 }
2373
2374 void
2375 if_free(struct ifnet *ifp)
2376 {
2377         kfree(ifp, M_IFNET);
2378 }
2379
2380 void
2381 ifq_set_classic(struct ifaltq *ifq)
2382 {
2383         ifq->altq_enqueue = ifq_classic_enqueue;
2384         ifq->altq_dequeue = ifq_classic_dequeue;
2385         ifq->altq_request = ifq_classic_request;
2386 }
2387
2388 int
2389 ifq_classic_enqueue(struct ifaltq *ifq, struct mbuf *m,
2390                     struct altq_pktattr *pa __unused)
2391 {
2392         logifq(enqueue, ifq);
2393         if (IF_QFULL(ifq)) {
2394                 m_freem(m);
2395                 return(ENOBUFS);
2396         } else {
2397                 IF_ENQUEUE(ifq, m);
2398                 return(0);
2399         }       
2400 }
2401
2402 struct mbuf *
2403 ifq_classic_dequeue(struct ifaltq *ifq, struct mbuf *mpolled, int op)
2404 {
2405         struct mbuf *m;
2406
2407         switch (op) {
2408         case ALTDQ_POLL:
2409                 IF_POLL(ifq, m);
2410                 break;
2411         case ALTDQ_REMOVE:
2412                 logifq(dequeue, ifq);
2413                 IF_DEQUEUE(ifq, m);
2414                 break;
2415         default:
2416                 panic("unsupported ALTQ dequeue op: %d", op);
2417         }
2418         KKASSERT(mpolled == NULL || mpolled == m);
2419         return(m);
2420 }
2421
2422 int
2423 ifq_classic_request(struct ifaltq *ifq, int req, void *arg)
2424 {
2425         switch (req) {
2426         case ALTRQ_PURGE:
2427                 IF_DRAIN(ifq);
2428                 break;
2429         default:
2430                 panic("unsupported ALTQ request: %d", req);
2431         }
2432         return(0);
2433 }
2434
2435 int
2436 ifq_dispatch(struct ifnet *ifp, struct mbuf *m, struct altq_pktattr *pa)
2437 {
2438         struct ifaltq *ifq = &ifp->if_snd;
2439         int running = 0, error, start = 0;
2440
2441         ASSERT_IFNET_NOT_SERIALIZED_TX(ifp);
2442
2443         ALTQ_LOCK(ifq);
2444         error = ifq_enqueue_locked(ifq, m, pa);
2445         if (error) {
2446                 ALTQ_UNLOCK(ifq);
2447                 return error;
2448         }
2449         if (!ifq->altq_started) {
2450                 /*
2451                  * Hold the interlock of ifnet.if_start
2452                  */
2453                 ifq->altq_started = 1;
2454                 start = 1;
2455         }
2456         ALTQ_UNLOCK(ifq);
2457
2458         ifp->if_obytes += m->m_pkthdr.len;
2459         if (m->m_flags & M_MCAST)
2460                 ifp->if_omcasts++;
2461
2462         if (!start) {
2463                 logifstart(avoid, ifp);
2464                 return 0;
2465         }
2466
2467         /*
2468          * Try to do direct ifnet.if_start first, if there is
2469          * contention on ifnet's serializer, ifnet.if_start will
2470          * be scheduled on ifnet's CPU.
2471          */
2472         if (!ifnet_tryserialize_tx(ifp)) {
2473                 /*
2474                  * ifnet serializer contention happened,
2475                  * ifnet.if_start is scheduled on ifnet's
2476                  * CPU, and we keep going.
2477                  */
2478                 logifstart(contend_sched, ifp);
2479                 if_start_schedule(ifp);
2480                 return 0;
2481         }
2482
2483         if ((ifp->if_flags & IFF_OACTIVE) == 0) {
2484                 logifstart(run, ifp);
2485                 ifp->if_start(ifp);
2486                 if ((ifp->if_flags &
2487                      (IFF_OACTIVE | IFF_RUNNING)) == IFF_RUNNING)
2488                         running = 1;
2489         }
2490
2491         ifnet_deserialize_tx(ifp);
2492
2493         if (if_start_need_schedule(ifq, running)) {
2494                 /*
2495                  * More data need to be transmitted, ifnet.if_start is
2496                  * scheduled on ifnet's CPU, and we keep going.
2497                  * NOTE: ifnet.if_start interlock is not released.
2498                  */
2499                 logifstart(sched, ifp);
2500                 if_start_schedule(ifp);
2501         }
2502         return 0;
2503 }
2504
2505 void *
2506 ifa_create(int size, int flags)
2507 {
2508         struct ifaddr *ifa;
2509         int i;
2510
2511         KASSERT(size >= sizeof(*ifa), ("ifaddr size too small"));
2512
2513         ifa = kmalloc(size, M_IFADDR, flags | M_ZERO);
2514         if (ifa == NULL)
2515                 return NULL;
2516
2517         ifa->ifa_containers = kmalloc(ncpus * sizeof(struct ifaddr_container),
2518                                       M_IFADDR, M_WAITOK | M_ZERO);
2519         ifa->ifa_ncnt = ncpus;
2520         for (i = 0; i < ncpus; ++i) {
2521                 struct ifaddr_container *ifac = &ifa->ifa_containers[i];
2522
2523                 ifac->ifa_magic = IFA_CONTAINER_MAGIC;
2524                 ifac->ifa = ifa;
2525                 ifac->ifa_refcnt = 1;
2526         }
2527 #ifdef IFADDR_DEBUG
2528         kprintf("alloc ifa %p %d\n", ifa, size);
2529 #endif
2530         return ifa;
2531 }
2532
2533 void
2534 ifac_free(struct ifaddr_container *ifac, int cpu_id)
2535 {
2536         struct ifaddr *ifa = ifac->ifa;
2537
2538         KKASSERT(ifac->ifa_magic == IFA_CONTAINER_MAGIC);
2539         KKASSERT(ifac->ifa_refcnt == 0);
2540         KASSERT(ifac->ifa_listmask == 0,
2541                 ("ifa is still on %#x lists", ifac->ifa_listmask));
2542
2543         ifac->ifa_magic = IFA_CONTAINER_DEAD;
2544
2545 #ifdef IFADDR_DEBUG_VERBOSE
2546         kprintf("try free ifa %p cpu_id %d\n", ifac->ifa, cpu_id);
2547 #endif
2548
2549         KASSERT(ifa->ifa_ncnt > 0 && ifa->ifa_ncnt <= ncpus,
2550                 ("invalid # of ifac, %d", ifa->ifa_ncnt));
2551         if (atomic_fetchadd_int(&ifa->ifa_ncnt, -1) == 1) {
2552 #ifdef IFADDR_DEBUG
2553                 kprintf("free ifa %p\n", ifa);
2554 #endif
2555                 kfree(ifa->ifa_containers, M_IFADDR);
2556                 kfree(ifa, M_IFADDR);
2557         }
2558 }
2559
2560 static void
2561 ifa_iflink_dispatch(netmsg_t nmsg)
2562 {
2563         struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
2564         struct ifaddr *ifa = msg->ifa;
2565         struct ifnet *ifp = msg->ifp;
2566         int cpu = mycpuid;
2567         struct ifaddr_container *ifac;
2568
2569         crit_enter();
2570
2571         ifac = &ifa->ifa_containers[cpu];
2572         ASSERT_IFAC_VALID(ifac);
2573         KASSERT((ifac->ifa_listmask & IFA_LIST_IFADDRHEAD) == 0,
2574                 ("ifaddr is on if_addrheads"));
2575
2576         ifac->ifa_listmask |= IFA_LIST_IFADDRHEAD;
2577         if (msg->tail)
2578                 TAILQ_INSERT_TAIL(&ifp->if_addrheads[cpu], ifac, ifa_link);
2579         else
2580                 TAILQ_INSERT_HEAD(&ifp->if_addrheads[cpu], ifac, ifa_link);
2581
2582         crit_exit();
2583
2584         ifa_forwardmsg(&nmsg->lmsg, cpu + 1);
2585 }
2586
2587 void
2588 ifa_iflink(struct ifaddr *ifa, struct ifnet *ifp, int tail)
2589 {
2590         struct netmsg_ifaddr msg;
2591
2592         netmsg_init(&msg.base, NULL, &curthread->td_msgport,
2593                     0, ifa_iflink_dispatch);
2594         msg.ifa = ifa;
2595         msg.ifp = ifp;
2596         msg.tail = tail;
2597
2598         ifa_domsg(&msg.base.lmsg, 0);
2599 }
2600
2601 static void
2602 ifa_ifunlink_dispatch(netmsg_t nmsg)
2603 {
2604         struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
2605         struct ifaddr *ifa = msg->ifa;
2606         struct ifnet *ifp = msg->ifp;
2607         int cpu = mycpuid;
2608         struct ifaddr_container *ifac;
2609
2610         crit_enter();
2611
2612         ifac = &ifa->ifa_containers[cpu];
2613         ASSERT_IFAC_VALID(ifac);
2614         KASSERT(ifac->ifa_listmask & IFA_LIST_IFADDRHEAD,
2615                 ("ifaddr is not on if_addrhead"));
2616
2617         TAILQ_REMOVE(&ifp->if_addrheads[cpu], ifac, ifa_link);
2618         ifac->ifa_listmask &= ~IFA_LIST_IFADDRHEAD;
2619
2620         crit_exit();
2621
2622         ifa_forwardmsg(&nmsg->lmsg, cpu + 1);
2623 }
2624
2625 void
2626 ifa_ifunlink(struct ifaddr *ifa, struct ifnet *ifp)
2627 {
2628         struct netmsg_ifaddr msg;
2629
2630         netmsg_init(&msg.base, NULL, &curthread->td_msgport,
2631                     0, ifa_ifunlink_dispatch);
2632         msg.ifa = ifa;
2633         msg.ifp = ifp;
2634
2635         ifa_domsg(&msg.base.lmsg, 0);
2636 }
2637
2638 static void
2639 ifa_destroy_dispatch(netmsg_t nmsg)
2640 {
2641         struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
2642
2643         IFAFREE(msg->ifa);
2644         ifa_forwardmsg(&nmsg->lmsg, mycpuid + 1);
2645 }
2646
2647 void
2648 ifa_destroy(struct ifaddr *ifa)
2649 {
2650         struct netmsg_ifaddr msg;
2651
2652         netmsg_init(&msg.base, NULL, &curthread->td_msgport,
2653                     0, ifa_destroy_dispatch);
2654         msg.ifa = ifa;
2655
2656         ifa_domsg(&msg.base.lmsg, 0);
2657 }
2658
2659 struct lwkt_port *
2660 ifnet_portfn(int cpu)
2661 {
2662         return &ifnet_threads[cpu].td_msgport;
2663 }
2664
2665 void
2666 ifnet_forwardmsg(struct lwkt_msg *lmsg, int next_cpu)
2667 {
2668         KKASSERT(next_cpu > mycpuid && next_cpu <= ncpus);
2669
2670         if (next_cpu < ncpus)
2671                 lwkt_forwardmsg(ifnet_portfn(next_cpu), lmsg);
2672         else
2673                 lwkt_replymsg(lmsg, 0);
2674 }
2675
2676 int
2677 ifnet_domsg(struct lwkt_msg *lmsg, int cpu)
2678 {
2679         KKASSERT(cpu < ncpus);
2680         return lwkt_domsg(ifnet_portfn(cpu), lmsg, 0);
2681 }
2682
2683 void
2684 ifnet_sendmsg(struct lwkt_msg *lmsg, int cpu)
2685 {
2686         KKASSERT(cpu < ncpus);
2687         lwkt_sendmsg(ifnet_portfn(cpu), lmsg);
2688 }
2689
2690 /*
2691  * Generic netmsg service loop.  Some protocols may roll their own but all
2692  * must do the basic command dispatch function call done here.
2693  */
2694 static void
2695 ifnet_service_loop(void *arg __unused)
2696 {
2697         netmsg_t msg;
2698
2699         while ((msg = lwkt_waitport(&curthread->td_msgport, 0))) {
2700                 KASSERT(msg->base.nm_dispatch, ("ifnet_service: badmsg"));
2701                 msg->base.nm_dispatch(msg);
2702         }
2703 }
2704
2705 static void
2706 ifnetinit(void *dummy __unused)
2707 {
2708         int i;
2709
2710         for (i = 0; i < ncpus; ++i) {
2711                 struct thread *thr = &ifnet_threads[i];
2712
2713                 lwkt_create(ifnet_service_loop, NULL, NULL,
2714                             thr, TDF_NOSTART|TDF_FORCE_SPINPORT,
2715                             i, "ifnet %d", i);
2716                 netmsg_service_port_init(&thr->td_msgport);
2717                 lwkt_schedule(thr);
2718         }
2719 }
2720
2721 struct ifnet *
2722 ifnet_byindex(unsigned short idx)
2723 {
2724         if (idx > if_index)
2725                 return NULL;
2726         return ifindex2ifnet[idx];
2727 }
2728
2729 struct ifaddr *
2730 ifaddr_byindex(unsigned short idx)
2731 {
2732         struct ifnet *ifp;
2733
2734         ifp = ifnet_byindex(idx);
2735         if (!ifp)
2736                 return NULL;
2737         return TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
2738 }
2739
2740 void
2741 if_register_com_alloc(u_char type,
2742     if_com_alloc_t *a, if_com_free_t *f)
2743 {
2744
2745         KASSERT(if_com_alloc[type] == NULL,
2746             ("if_register_com_alloc: %d already registered", type));
2747         KASSERT(if_com_free[type] == NULL,
2748             ("if_register_com_alloc: %d free already registered", type));
2749
2750         if_com_alloc[type] = a;
2751         if_com_free[type] = f;
2752 }
2753
2754 void
2755 if_deregister_com_alloc(u_char type)
2756 {
2757
2758         KASSERT(if_com_alloc[type] != NULL,
2759             ("if_deregister_com_alloc: %d not registered", type));
2760         KASSERT(if_com_free[type] != NULL,
2761             ("if_deregister_com_alloc: %d free not registered", type));
2762         if_com_alloc[type] = NULL;
2763         if_com_free[type] = NULL;
2764 }
2765
2766 int
2767 if_ring_count2(int cnt, int cnt_max)
2768 {
2769         int shift = 0;
2770
2771         KASSERT(cnt_max >= 1 && powerof2(cnt_max),
2772             ("invalid ring count max %d", cnt_max));
2773
2774         if (cnt <= 0)
2775                 cnt = cnt_max;
2776         if (cnt > ncpus2)
2777                 cnt = ncpus2;
2778         if (cnt > cnt_max)
2779                 cnt = cnt_max;
2780
2781         while ((1 << (shift + 1)) <= cnt)
2782                 ++shift;
2783         cnt = 1 << shift;
2784
2785         KASSERT(cnt >= 1 && cnt <= ncpus2 && cnt <= cnt_max,
2786             ("calculate cnt %d, ncpus2 %d, cnt max %d",
2787              cnt, ncpus2, cnt_max));
2788         return cnt;
2789 }