Merge branch 'vendor/OPENSSL'
[dragonfly.git] / sys / net / if.c
1 /*
2  * Copyright (c) 1980, 1986, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *      This product includes software developed by the University of
16  *      California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *      @(#)if.c        8.3 (Berkeley) 1/4/94
34  * $FreeBSD: src/sys/net/if.c,v 1.185 2004/03/13 02:35:03 brooks Exp $
35  */
36
37 #include "opt_compat.h"
38 #include "opt_inet6.h"
39 #include "opt_inet.h"
40 #include "opt_polling.h"
41 #include "opt_ifpoll.h"
42
43 #include <sys/param.h>
44 #include <sys/malloc.h>
45 #include <sys/mbuf.h>
46 #include <sys/systm.h>
47 #include <sys/proc.h>
48 #include <sys/priv.h>
49 #include <sys/protosw.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/socketops.h>
53 #include <sys/protosw.h>
54 #include <sys/kernel.h>
55 #include <sys/ktr.h>
56 #include <sys/mutex.h>
57 #include <sys/sockio.h>
58 #include <sys/syslog.h>
59 #include <sys/sysctl.h>
60 #include <sys/domain.h>
61 #include <sys/thread.h>
62 #include <sys/serialize.h>
63 #include <sys/bus.h>
64
65 #include <sys/thread2.h>
66 #include <sys/msgport2.h>
67 #include <sys/mutex2.h>
68
69 #include <net/if.h>
70 #include <net/if_arp.h>
71 #include <net/if_dl.h>
72 #include <net/if_types.h>
73 #include <net/if_var.h>
74 #include <net/ifq_var.h>
75 #include <net/radix.h>
76 #include <net/route.h>
77 #include <net/if_clone.h>
78 #include <net/netisr.h>
79 #include <net/netmsg2.h>
80
81 #include <machine/atomic.h>
82 #include <machine/stdarg.h>
83 #include <machine/smp.h>
84
85 #if defined(INET) || defined(INET6)
86 /*XXX*/
87 #include <netinet/in.h>
88 #include <netinet/in_var.h>
89 #include <netinet/if_ether.h>
90 #ifdef INET6
91 #include <netinet6/in6_var.h>
92 #include <netinet6/in6_ifattach.h>
93 #endif
94 #endif
95
96 #if defined(COMPAT_43)
97 #include <emulation/43bsd/43bsd_socket.h>
98 #endif /* COMPAT_43 */
99
100 struct netmsg_ifaddr {
101         struct netmsg_base base;
102         struct ifaddr   *ifa;
103         struct ifnet    *ifp;
104         int             tail;
105 };
106
107 /*
108  * System initialization
109  */
110 static void     if_attachdomain(void *);
111 static void     if_attachdomain1(struct ifnet *);
112 static int      ifconf(u_long, caddr_t, struct ucred *);
113 static void     ifinit(void *);
114 static void     ifnetinit(void *);
115 static void     if_slowtimo(void *);
116 static void     link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
117 static int      if_rtdel(struct radix_node *, void *);
118
119 #ifdef INET6
120 /*
121  * XXX: declare here to avoid to include many inet6 related files..
122  * should be more generalized?
123  */
124 extern void     nd6_setmtu(struct ifnet *);
125 #endif
126
127 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
128 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
129
130 SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, ifinit, NULL)
131 /* Must be after netisr_init */
132 SYSINIT(ifnet, SI_SUB_PRE_DRIVERS, SI_ORDER_SECOND, ifnetinit, NULL)
133
134 static  if_com_alloc_t *if_com_alloc[256];
135 static  if_com_free_t *if_com_free[256];
136
137 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
138 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
139 MALLOC_DEFINE(M_IFNET, "ifnet", "interface structure");
140
141 int                     ifqmaxlen = IFQ_MAXLEN;
142 struct ifnethead        ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
143
144 /* In ifq_dispatch(), try to do direct ifnet.if_start first */
145 static int              ifq_dispatch_schedonly = 0;
146 SYSCTL_INT(_net_link_generic, OID_AUTO, ifq_dispatch_schedonly, CTLFLAG_RW,
147            &ifq_dispatch_schedonly, 0, "");
148
149 /* In ifq_dispatch(), schedule ifnet.if_start without checking ifnet.if_snd */
150 static int              ifq_dispatch_schednochk = 0;
151 SYSCTL_INT(_net_link_generic, OID_AUTO, ifq_dispatch_schednochk, CTLFLAG_RW,
152            &ifq_dispatch_schednochk, 0, "");
153
154 /* In if_devstart(), try to do direct ifnet.if_start first */
155 static int              if_devstart_schedonly = 0;
156 SYSCTL_INT(_net_link_generic, OID_AUTO, if_devstart_schedonly, CTLFLAG_RW,
157            &if_devstart_schedonly, 0, "");
158
159 /* In if_devstart(), schedule ifnet.if_start without checking ifnet.if_snd */
160 static int              if_devstart_schednochk = 0;
161 SYSCTL_INT(_net_link_generic, OID_AUTO, if_devstart_schednochk, CTLFLAG_RW,
162            &if_devstart_schednochk, 0, "");
163
164 #ifdef SMP
165 /* Schedule ifnet.if_start on the current CPU */
166 static int              if_start_oncpu_sched = 0;
167 SYSCTL_INT(_net_link_generic, OID_AUTO, if_start_oncpu_sched, CTLFLAG_RW,
168            &if_start_oncpu_sched, 0, "");
169 #endif
170
171 struct callout          if_slowtimo_timer;
172
173 int                     if_index = 0;
174 struct ifnet            **ifindex2ifnet = NULL;
175 static struct thread    ifnet_threads[MAXCPU];
176
177 #define IFQ_KTR_STRING          "ifq=%p"
178 #define IFQ_KTR_ARGS    struct ifaltq *ifq
179 #ifndef KTR_IFQ
180 #define KTR_IFQ                 KTR_ALL
181 #endif
182 KTR_INFO_MASTER(ifq);
183 KTR_INFO(KTR_IFQ, ifq, enqueue, 0, IFQ_KTR_STRING, IFQ_KTR_ARGS);
184 KTR_INFO(KTR_IFQ, ifq, dequeue, 1, IFQ_KTR_STRING, IFQ_KTR_ARGS);
185 #define logifq(name, arg)       KTR_LOG(ifq_ ## name, arg)
186
187 #define IF_START_KTR_STRING     "ifp=%p"
188 #define IF_START_KTR_ARGS       struct ifnet *ifp
189 #ifndef KTR_IF_START
190 #define KTR_IF_START            KTR_ALL
191 #endif
192 KTR_INFO_MASTER(if_start);
193 KTR_INFO(KTR_IF_START, if_start, run, 0,
194          IF_START_KTR_STRING, IF_START_KTR_ARGS);
195 KTR_INFO(KTR_IF_START, if_start, sched, 1,
196          IF_START_KTR_STRING, IF_START_KTR_ARGS);
197 KTR_INFO(KTR_IF_START, if_start, avoid, 2,
198          IF_START_KTR_STRING, IF_START_KTR_ARGS);
199 KTR_INFO(KTR_IF_START, if_start, contend_sched, 3,
200          IF_START_KTR_STRING, IF_START_KTR_ARGS);
201 #ifdef SMP
202 KTR_INFO(KTR_IF_START, if_start, chase_sched, 4,
203          IF_START_KTR_STRING, IF_START_KTR_ARGS);
204 #endif
205 #define logifstart(name, arg)   KTR_LOG(if_start_ ## name, arg)
206
207 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
208
209 /*
210  * Network interface utility routines.
211  *
212  * Routines with ifa_ifwith* names take sockaddr *'s as
213  * parameters.
214  */
215 /* ARGSUSED*/
216 void
217 ifinit(void *dummy)
218 {
219         struct ifnet *ifp;
220
221         callout_init(&if_slowtimo_timer);
222
223         crit_enter();
224         TAILQ_FOREACH(ifp, &ifnet, if_link) {
225                 if (ifp->if_snd.ifq_maxlen == 0) {
226                         if_printf(ifp, "XXX: driver didn't set ifq_maxlen\n");
227                         ifp->if_snd.ifq_maxlen = ifqmaxlen;
228                 }
229         }
230         crit_exit();
231
232         if_slowtimo(0);
233 }
234
235 static int
236 if_start_cpuid(struct ifnet *ifp)
237 {
238         return ifp->if_cpuid;
239 }
240
241 #ifdef DEVICE_POLLING
242 static int
243 if_start_cpuid_poll(struct ifnet *ifp)
244 {
245         int poll_cpuid = ifp->if_poll_cpuid;
246
247         if (poll_cpuid >= 0)
248                 return poll_cpuid;
249         else
250                 return ifp->if_cpuid;
251 }
252 #endif
253
254 static void
255 if_start_ipifunc(void *arg)
256 {
257         struct ifnet *ifp = arg;
258         struct lwkt_msg *lmsg = &ifp->if_start_nmsg[mycpuid].lmsg;
259
260         crit_enter();
261         if (lmsg->ms_flags & MSGF_DONE)
262                 lwkt_sendmsg(ifnet_portfn(mycpuid), lmsg);
263         crit_exit();
264 }
265
266 /*
267  * Schedule ifnet.if_start on ifnet's CPU
268  */
269 static void
270 if_start_schedule(struct ifnet *ifp)
271 {
272 #ifdef SMP
273         int cpu;
274
275         if (if_start_oncpu_sched)
276                 cpu = mycpuid;
277         else
278                 cpu = ifp->if_start_cpuid(ifp);
279
280         if (cpu != mycpuid)
281                 lwkt_send_ipiq(globaldata_find(cpu), if_start_ipifunc, ifp);
282         else
283 #endif
284         if_start_ipifunc(ifp);
285 }
286
287 /*
288  * NOTE:
289  * This function will release ifnet.if_start interlock,
290  * if ifnet.if_start does not need to be scheduled
291  */
292 static __inline int
293 if_start_need_schedule(struct ifaltq *ifq, int running)
294 {
295         if (!running || ifq_is_empty(ifq)
296 #ifdef ALTQ
297             || ifq->altq_tbr != NULL
298 #endif
299         ) {
300                 ALTQ_LOCK(ifq);
301                 /*
302                  * ifnet.if_start interlock is released, if:
303                  * 1) Hardware can not take any packets, due to
304                  *    o  interface is marked down
305                  *    o  hardware queue is full (IFF_OACTIVE)
306                  *    Under the second situation, hardware interrupt
307                  *    or polling(4) will call/schedule ifnet.if_start
308                  *    when hardware queue is ready
309                  * 2) There is not packet in the ifnet.if_snd.
310                  *    Further ifq_dispatch or ifq_handoff will call/
311                  *    schedule ifnet.if_start
312                  * 3) TBR is used and it does not allow further
313                  *    dequeueing.
314                  *    TBR callout will call ifnet.if_start
315                  */
316                 if (!running || !ifq_data_ready(ifq)) {
317                         ifq->altq_started = 0;
318                         ALTQ_UNLOCK(ifq);
319                         return 0;
320                 }
321                 ALTQ_UNLOCK(ifq);
322         }
323         return 1;
324 }
325
326 static void
327 if_start_dispatch(netmsg_t msg)
328 {
329         struct lwkt_msg *lmsg = &msg->base.lmsg;
330         struct ifnet *ifp = lmsg->u.ms_resultp;
331         struct ifaltq *ifq = &ifp->if_snd;
332         int running = 0;
333
334         crit_enter();
335         lwkt_replymsg(lmsg, 0); /* reply ASAP */
336         crit_exit();
337
338 #ifdef SMP
339         if (!if_start_oncpu_sched && mycpuid != ifp->if_start_cpuid(ifp)) {
340                 /*
341                  * If the ifnet is still up, we need to
342                  * chase its CPU change.
343                  */
344                 if (ifp->if_flags & IFF_UP) {
345                         logifstart(chase_sched, ifp);
346                         if_start_schedule(ifp);
347                         return;
348                 } else {
349                         goto check;
350                 }
351         }
352 #endif
353
354         if (ifp->if_flags & IFF_UP) {
355                 ifnet_serialize_tx(ifp); /* XXX try? */
356                 if ((ifp->if_flags & IFF_OACTIVE) == 0) {
357                         logifstart(run, ifp);
358                         ifp->if_start(ifp);
359                         if ((ifp->if_flags &
360                         (IFF_OACTIVE | IFF_RUNNING)) == IFF_RUNNING)
361                                 running = 1;
362                 }
363                 ifnet_deserialize_tx(ifp);
364         }
365 #ifdef SMP
366 check:
367 #endif
368         if (if_start_need_schedule(ifq, running)) {
369                 crit_enter();
370                 if (lmsg->ms_flags & MSGF_DONE) { /* XXX necessary? */
371                         logifstart(sched, ifp);
372                         lwkt_sendmsg(ifnet_portfn(mycpuid), lmsg);
373                 }
374                 crit_exit();
375         }
376 }
377
378 /* Device driver ifnet.if_start helper function */
379 void
380 if_devstart(struct ifnet *ifp)
381 {
382         struct ifaltq *ifq = &ifp->if_snd;
383         int running = 0;
384
385         ASSERT_IFNET_SERIALIZED_TX(ifp);
386
387         ALTQ_LOCK(ifq);
388         if (ifq->altq_started || !ifq_data_ready(ifq)) {
389                 logifstart(avoid, ifp);
390                 ALTQ_UNLOCK(ifq);
391                 return;
392         }
393         ifq->altq_started = 1;
394         ALTQ_UNLOCK(ifq);
395
396         if (if_devstart_schedonly) {
397                 /*
398                  * Always schedule ifnet.if_start on ifnet's CPU,
399                  * short circuit the rest of this function.
400                  */
401                 logifstart(sched, ifp);
402                 if_start_schedule(ifp);
403                 return;
404         }
405
406         logifstart(run, ifp);
407         ifp->if_start(ifp);
408
409         if ((ifp->if_flags & (IFF_OACTIVE | IFF_RUNNING)) == IFF_RUNNING)
410                 running = 1;
411
412         if (if_devstart_schednochk || if_start_need_schedule(ifq, running)) {
413                 /*
414                  * More data need to be transmitted, ifnet.if_start is
415                  * scheduled on ifnet's CPU, and we keep going.
416                  * NOTE: ifnet.if_start interlock is not released.
417                  */
418                 logifstart(sched, ifp);
419                 if_start_schedule(ifp);
420         }
421 }
422
423 static void
424 if_default_serialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
425 {
426         lwkt_serialize_enter(ifp->if_serializer);
427 }
428
429 static void
430 if_default_deserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
431 {
432         lwkt_serialize_exit(ifp->if_serializer);
433 }
434
435 static int
436 if_default_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
437 {
438         return lwkt_serialize_try(ifp->if_serializer);
439 }
440
441 #ifdef INVARIANTS
442 static void
443 if_default_serialize_assert(struct ifnet *ifp,
444                             enum ifnet_serialize slz __unused,
445                             boolean_t serialized)
446 {
447         if (serialized)
448                 ASSERT_SERIALIZED(ifp->if_serializer);
449         else
450                 ASSERT_NOT_SERIALIZED(ifp->if_serializer);
451 }
452 #endif
453
454 /*
455  * Attach an interface to the list of "active" interfaces.
456  *
457  * The serializer is optional.  If non-NULL access to the interface
458  * may be MPSAFE.
459  */
460 void
461 if_attach(struct ifnet *ifp, lwkt_serialize_t serializer)
462 {
463         unsigned socksize, ifasize;
464         int namelen, masklen;
465         struct sockaddr_dl *sdl;
466         struct ifaddr *ifa;
467         struct ifaltq *ifq;
468         int i;
469
470         static int if_indexlim = 8;
471
472         if (ifp->if_serialize != NULL) {
473                 KASSERT(ifp->if_deserialize != NULL &&
474                         ifp->if_tryserialize != NULL &&
475                         ifp->if_serialize_assert != NULL,
476                         ("serialize functions are partially setup\n"));
477
478                 /*
479                  * If the device supplies serialize functions,
480                  * then clear if_serializer to catch any invalid
481                  * usage of this field.
482                  */
483                 KASSERT(serializer == NULL,
484                         ("both serialize functions and default serializer "
485                          "are supplied\n"));
486                 ifp->if_serializer = NULL;
487         } else {
488                 KASSERT(ifp->if_deserialize == NULL &&
489                         ifp->if_tryserialize == NULL &&
490                         ifp->if_serialize_assert == NULL,
491                         ("serialize functions are partially setup\n"));
492                 ifp->if_serialize = if_default_serialize;
493                 ifp->if_deserialize = if_default_deserialize;
494                 ifp->if_tryserialize = if_default_tryserialize;
495 #ifdef INVARIANTS
496                 ifp->if_serialize_assert = if_default_serialize_assert;
497 #endif
498
499                 /*
500                  * The serializer can be passed in from the device,
501                  * allowing the same serializer to be used for both
502                  * the interrupt interlock and the device queue.
503                  * If not specified, the netif structure will use an
504                  * embedded serializer.
505                  */
506                 if (serializer == NULL) {
507                         serializer = &ifp->if_default_serializer;
508                         lwkt_serialize_init(serializer);
509                 }
510                 ifp->if_serializer = serializer;
511         }
512
513         ifp->if_start_cpuid = if_start_cpuid;
514         ifp->if_cpuid = 0;
515
516 #ifdef DEVICE_POLLING
517         /* Device is not in polling mode by default */
518         ifp->if_poll_cpuid = -1;
519         if (ifp->if_poll != NULL)
520                 ifp->if_start_cpuid = if_start_cpuid_poll;
521 #endif
522
523         ifp->if_start_nmsg = kmalloc(ncpus * sizeof(*ifp->if_start_nmsg),
524                                      M_LWKTMSG, M_WAITOK);
525         for (i = 0; i < ncpus; ++i) {
526                 netmsg_init(&ifp->if_start_nmsg[i], NULL, &netisr_adone_rport,
527                             0, if_start_dispatch);
528                 ifp->if_start_nmsg[i].lmsg.u.ms_resultp = ifp;
529         }
530
531         mtx_init(&ifp->if_ioctl_mtx);
532         mtx_lock(&ifp->if_ioctl_mtx);
533
534         TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
535         ifp->if_index = ++if_index;
536
537         /*
538          * XXX -
539          * The old code would work if the interface passed a pre-existing
540          * chain of ifaddrs to this code.  We don't trust our callers to
541          * properly initialize the tailq, however, so we no longer allow
542          * this unlikely case.
543          */
544         ifp->if_addrheads = kmalloc(ncpus * sizeof(struct ifaddrhead),
545                                     M_IFADDR, M_WAITOK | M_ZERO);
546         for (i = 0; i < ncpus; ++i)
547                 TAILQ_INIT(&ifp->if_addrheads[i]);
548
549         TAILQ_INIT(&ifp->if_prefixhead);
550         TAILQ_INIT(&ifp->if_multiaddrs);
551         TAILQ_INIT(&ifp->if_groups);
552         getmicrotime(&ifp->if_lastchange);
553         if (ifindex2ifnet == NULL || if_index >= if_indexlim) {
554                 unsigned int n;
555                 struct ifnet **q;
556
557                 if_indexlim <<= 1;
558
559                 /* grow ifindex2ifnet */
560                 n = if_indexlim * sizeof(*q);
561                 q = kmalloc(n, M_IFADDR, M_WAITOK | M_ZERO);
562                 if (ifindex2ifnet) {
563                         bcopy(ifindex2ifnet, q, n/2);
564                         kfree(ifindex2ifnet, M_IFADDR);
565                 }
566                 ifindex2ifnet = q;
567         }
568
569         ifindex2ifnet[if_index] = ifp;
570
571         /*
572          * create a Link Level name for this device
573          */
574         namelen = strlen(ifp->if_xname);
575         masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
576         socksize = masklen + ifp->if_addrlen;
577 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
578         if (socksize < sizeof(*sdl))
579                 socksize = sizeof(*sdl);
580         socksize = ROUNDUP(socksize);
581 #undef ROUNDUP
582         ifasize = sizeof(struct ifaddr) + 2 * socksize;
583         ifa = ifa_create(ifasize, M_WAITOK);
584         sdl = (struct sockaddr_dl *)(ifa + 1);
585         sdl->sdl_len = socksize;
586         sdl->sdl_family = AF_LINK;
587         bcopy(ifp->if_xname, sdl->sdl_data, namelen);
588         sdl->sdl_nlen = namelen;
589         sdl->sdl_index = ifp->if_index;
590         sdl->sdl_type = ifp->if_type;
591         ifp->if_lladdr = ifa;
592         ifa->ifa_ifp = ifp;
593         ifa->ifa_rtrequest = link_rtrequest;
594         ifa->ifa_addr = (struct sockaddr *)sdl;
595         sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
596         ifa->ifa_netmask = (struct sockaddr *)sdl;
597         sdl->sdl_len = masklen;
598         while (namelen != 0)
599                 sdl->sdl_data[--namelen] = 0xff;
600         ifa_iflink(ifa, ifp, 0 /* Insert head */);
601
602         EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
603         devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
604
605         ifq = &ifp->if_snd;
606         ifq->altq_type = 0;
607         ifq->altq_disc = NULL;
608         ifq->altq_flags &= ALTQF_CANTCHANGE;
609         ifq->altq_tbr = NULL;
610         ifq->altq_ifp = ifp;
611         ifq->altq_started = 0;
612         ifq->altq_prepended = NULL;
613         ALTQ_LOCK_INIT(ifq);
614         ifq_set_classic(ifq);
615
616         if (!SLIST_EMPTY(&domains))
617                 if_attachdomain1(ifp);
618
619         /* Announce the interface. */
620         rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
621
622         mtx_unlock(&ifp->if_ioctl_mtx);
623 }
624
625 static void
626 if_attachdomain(void *dummy)
627 {
628         struct ifnet *ifp;
629
630         crit_enter();
631         TAILQ_FOREACH(ifp, &ifnet, if_list)
632                 if_attachdomain1(ifp);
633         crit_exit();
634 }
635 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
636         if_attachdomain, NULL);
637
638 static void
639 if_attachdomain1(struct ifnet *ifp)
640 {
641         struct domain *dp;
642
643         crit_enter();
644
645         /* address family dependent data region */
646         bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
647         SLIST_FOREACH(dp, &domains, dom_next)
648                 if (dp->dom_ifattach)
649                         ifp->if_afdata[dp->dom_family] =
650                                 (*dp->dom_ifattach)(ifp);
651         crit_exit();
652 }
653
654 /*
655  * Purge all addresses whose type is _not_ AF_LINK
656  */
657 void
658 if_purgeaddrs_nolink(struct ifnet *ifp)
659 {
660         struct ifaddr_container *ifac, *next;
661
662         TAILQ_FOREACH_MUTABLE(ifac, &ifp->if_addrheads[mycpuid],
663                               ifa_link, next) {
664                 struct ifaddr *ifa = ifac->ifa;
665
666                 /* Leave link ifaddr as it is */
667                 if (ifa->ifa_addr->sa_family == AF_LINK)
668                         continue;
669 #ifdef INET
670                 /* XXX: Ugly!! ad hoc just for INET */
671                 if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
672                         struct ifaliasreq ifr;
673 #ifdef IFADDR_DEBUG_VERBOSE
674                         int i;
675
676                         kprintf("purge in4 addr %p: ", ifa);
677                         for (i = 0; i < ncpus; ++i)
678                                 kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt);
679                         kprintf("\n");
680 #endif
681
682                         bzero(&ifr, sizeof ifr);
683                         ifr.ifra_addr = *ifa->ifa_addr;
684                         if (ifa->ifa_dstaddr)
685                                 ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
686                         if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
687                                        NULL) == 0)
688                                 continue;
689                 }
690 #endif /* INET */
691 #ifdef INET6
692                 if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) {
693 #ifdef IFADDR_DEBUG_VERBOSE
694                         int i;
695
696                         kprintf("purge in6 addr %p: ", ifa);
697                         for (i = 0; i < ncpus; ++i)
698                                 kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt);
699                         kprintf("\n");
700 #endif
701
702                         in6_purgeaddr(ifa);
703                         /* ifp_addrhead is already updated */
704                         continue;
705                 }
706 #endif /* INET6 */
707                 ifa_ifunlink(ifa, ifp);
708                 ifa_destroy(ifa);
709         }
710 }
711
712 /*
713  * Detach an interface, removing it from the
714  * list of "active" interfaces.
715  */
716 void
717 if_detach(struct ifnet *ifp)
718 {
719         struct radix_node_head  *rnh;
720         int i;
721         int cpu, origcpu;
722         struct domain *dp;
723
724         EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
725
726         /*
727          * Remove routes and flush queues.
728          */
729         crit_enter();
730 #ifdef DEVICE_POLLING
731         if (ifp->if_flags & IFF_POLLING)
732                 ether_poll_deregister(ifp);
733 #endif
734 #ifdef IFPOLL_ENABLE
735         if (ifp->if_flags & IFF_NPOLLING)
736                 ifpoll_deregister(ifp);
737 #endif
738         if_down(ifp);
739
740 #ifdef ALTQ
741         if (ifq_is_enabled(&ifp->if_snd))
742                 altq_disable(&ifp->if_snd);
743         if (ifq_is_attached(&ifp->if_snd))
744                 altq_detach(&ifp->if_snd);
745 #endif
746
747         /*
748          * Clean up all addresses.
749          */
750         ifp->if_lladdr = NULL;
751
752         if_purgeaddrs_nolink(ifp);
753         if (!TAILQ_EMPTY(&ifp->if_addrheads[mycpuid])) {
754                 struct ifaddr *ifa;
755
756                 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
757                 KASSERT(ifa->ifa_addr->sa_family == AF_LINK,
758                         ("non-link ifaddr is left on if_addrheads"));
759
760                 ifa_ifunlink(ifa, ifp);
761                 ifa_destroy(ifa);
762                 KASSERT(TAILQ_EMPTY(&ifp->if_addrheads[mycpuid]),
763                         ("there are still ifaddrs left on if_addrheads"));
764         }
765
766 #ifdef INET
767         /*
768          * Remove all IPv4 kernel structures related to ifp.
769          */
770         in_ifdetach(ifp);
771 #endif
772
773 #ifdef INET6
774         /*
775          * Remove all IPv6 kernel structs related to ifp.  This should be done
776          * before removing routing entries below, since IPv6 interface direct
777          * routes are expected to be removed by the IPv6-specific kernel API.
778          * Otherwise, the kernel will detect some inconsistency and bark it.
779          */
780         in6_ifdetach(ifp);
781 #endif
782
783         /*
784          * Delete all remaining routes using this interface
785          * Unfortuneatly the only way to do this is to slog through
786          * the entire routing table looking for routes which point
787          * to this interface...oh well...
788          */
789         origcpu = mycpuid;
790         for (cpu = 0; cpu < ncpus2; cpu++) {
791                 lwkt_migratecpu(cpu);
792                 for (i = 1; i <= AF_MAX; i++) {
793                         if ((rnh = rt_tables[cpu][i]) == NULL)
794                                 continue;
795                         rnh->rnh_walktree(rnh, if_rtdel, ifp);
796                 }
797         }
798         lwkt_migratecpu(origcpu);
799
800         /* Announce that the interface is gone. */
801         rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
802         devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
803
804         SLIST_FOREACH(dp, &domains, dom_next)
805                 if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
806                         (*dp->dom_ifdetach)(ifp,
807                                 ifp->if_afdata[dp->dom_family]);
808
809         /*
810          * Remove interface from ifindex2ifp[] and maybe decrement if_index.
811          */
812         ifindex2ifnet[ifp->if_index] = NULL;
813         while (if_index > 0 && ifindex2ifnet[if_index] == NULL)
814                 if_index--;
815
816         TAILQ_REMOVE(&ifnet, ifp, if_link);
817         kfree(ifp->if_addrheads, M_IFADDR);
818         kfree(ifp->if_start_nmsg, M_LWKTMSG);
819         crit_exit();
820 }
821
822 /*
823  * Create interface group without members
824  */
825 struct ifg_group *
826 if_creategroup(const char *groupname)
827 {
828         struct ifg_group        *ifg = NULL;
829
830         if ((ifg = (struct ifg_group *)kmalloc(sizeof(struct ifg_group),
831             M_TEMP, M_NOWAIT)) == NULL)
832                 return (NULL);
833
834         strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
835         ifg->ifg_refcnt = 0;
836         ifg->ifg_carp_demoted = 0;
837         TAILQ_INIT(&ifg->ifg_members);
838 #if NPF > 0
839         pfi_attach_ifgroup(ifg);
840 #endif
841         TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
842
843         return (ifg);
844 }
845
846 /*
847  * Add a group to an interface
848  */
849 int
850 if_addgroup(struct ifnet *ifp, const char *groupname)
851 {
852         struct ifg_list         *ifgl;
853         struct ifg_group        *ifg = NULL;
854         struct ifg_member       *ifgm;
855
856         if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
857             groupname[strlen(groupname) - 1] <= '9')
858                 return (EINVAL);
859
860         TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
861                 if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
862                         return (EEXIST);
863
864         if ((ifgl = kmalloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
865                 return (ENOMEM);
866
867         if ((ifgm = kmalloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
868                 kfree(ifgl, M_TEMP);
869                 return (ENOMEM);
870         }
871
872         TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
873                 if (!strcmp(ifg->ifg_group, groupname))
874                         break;
875
876         if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) {
877                 kfree(ifgl, M_TEMP);
878                 kfree(ifgm, M_TEMP);
879                 return (ENOMEM);
880         }
881
882         ifg->ifg_refcnt++;
883         ifgl->ifgl_group = ifg;
884         ifgm->ifgm_ifp = ifp;
885
886         TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
887         TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
888
889 #if NPF > 0
890         pfi_group_change(groupname);
891 #endif
892
893         return (0);
894 }
895
896 /*
897  * Remove a group from an interface
898  */
899 int
900 if_delgroup(struct ifnet *ifp, const char *groupname)
901 {
902         struct ifg_list         *ifgl;
903         struct ifg_member       *ifgm;
904
905         TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
906                 if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
907                         break;
908         if (ifgl == NULL)
909                 return (ENOENT);
910
911         TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
912
913         TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
914                 if (ifgm->ifgm_ifp == ifp)
915                         break;
916
917         if (ifgm != NULL) {
918                 TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
919                 kfree(ifgm, M_TEMP);
920         }
921
922         if (--ifgl->ifgl_group->ifg_refcnt == 0) {
923                 TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
924 #if NPF > 0
925                 pfi_detach_ifgroup(ifgl->ifgl_group);
926 #endif
927                 kfree(ifgl->ifgl_group, M_TEMP);
928         }
929
930         kfree(ifgl, M_TEMP);
931
932 #if NPF > 0
933         pfi_group_change(groupname);
934 #endif
935
936         return (0);
937 }
938
939 /*
940  * Stores all groups from an interface in memory pointed
941  * to by data
942  */
943 int
944 if_getgroup(caddr_t data, struct ifnet *ifp)
945 {
946         int                      len, error;
947         struct ifg_list         *ifgl;
948         struct ifg_req           ifgrq, *ifgp;
949         struct ifgroupreq       *ifgr = (struct ifgroupreq *)data;
950
951         if (ifgr->ifgr_len == 0) {
952                 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
953                         ifgr->ifgr_len += sizeof(struct ifg_req);
954                 return (0);
955         }
956
957         len = ifgr->ifgr_len;
958         ifgp = ifgr->ifgr_groups;
959         TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
960                 if (len < sizeof(ifgrq))
961                         return (EINVAL);
962                 bzero(&ifgrq, sizeof ifgrq);
963                 strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
964                     sizeof(ifgrq.ifgrq_group));
965                 if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
966                     sizeof(struct ifg_req))))
967                         return (error);
968                 len -= sizeof(ifgrq);
969                 ifgp++;
970         }
971
972         return (0);
973 }
974
975 /*
976  * Stores all members of a group in memory pointed to by data
977  */
978 int
979 if_getgroupmembers(caddr_t data)
980 {
981         struct ifgroupreq       *ifgr = (struct ifgroupreq *)data;
982         struct ifg_group        *ifg;
983         struct ifg_member       *ifgm;
984         struct ifg_req           ifgrq, *ifgp;
985         int                      len, error;
986
987         TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
988                 if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
989                         break;
990         if (ifg == NULL)
991                 return (ENOENT);
992
993         if (ifgr->ifgr_len == 0) {
994                 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
995                         ifgr->ifgr_len += sizeof(ifgrq);
996                 return (0);
997         }
998
999         len = ifgr->ifgr_len;
1000         ifgp = ifgr->ifgr_groups;
1001         TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
1002                 if (len < sizeof(ifgrq))
1003                         return (EINVAL);
1004                 bzero(&ifgrq, sizeof ifgrq);
1005                 strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
1006                     sizeof(ifgrq.ifgrq_member));
1007                 if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
1008                     sizeof(struct ifg_req))))
1009                         return (error);
1010                 len -= sizeof(ifgrq);
1011                 ifgp++;
1012         }
1013
1014         return (0);
1015 }
1016
1017 /*
1018  * Delete Routes for a Network Interface
1019  *
1020  * Called for each routing entry via the rnh->rnh_walktree() call above
1021  * to delete all route entries referencing a detaching network interface.
1022  *
1023  * Arguments:
1024  *      rn      pointer to node in the routing table
1025  *      arg     argument passed to rnh->rnh_walktree() - detaching interface
1026  *
1027  * Returns:
1028  *      0       successful
1029  *      errno   failed - reason indicated
1030  *
1031  */
1032 static int
1033 if_rtdel(struct radix_node *rn, void *arg)
1034 {
1035         struct rtentry  *rt = (struct rtentry *)rn;
1036         struct ifnet    *ifp = arg;
1037         int             err;
1038
1039         if (rt->rt_ifp == ifp) {
1040
1041                 /*
1042                  * Protect (sorta) against walktree recursion problems
1043                  * with cloned routes
1044                  */
1045                 if (!(rt->rt_flags & RTF_UP))
1046                         return (0);
1047
1048                 err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
1049                                 rt_mask(rt), rt->rt_flags,
1050                                 NULL);
1051                 if (err) {
1052                         log(LOG_WARNING, "if_rtdel: error %d\n", err);
1053                 }
1054         }
1055
1056         return (0);
1057 }
1058
1059 /*
1060  * Locate an interface based on a complete address.
1061  */
1062 struct ifaddr *
1063 ifa_ifwithaddr(struct sockaddr *addr)
1064 {
1065         struct ifnet *ifp;
1066
1067         TAILQ_FOREACH(ifp, &ifnet, if_link) {
1068                 struct ifaddr_container *ifac;
1069
1070                 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1071                         struct ifaddr *ifa = ifac->ifa;
1072
1073                         if (ifa->ifa_addr->sa_family != addr->sa_family)
1074                                 continue;
1075                         if (sa_equal(addr, ifa->ifa_addr))
1076                                 return (ifa);
1077                         if ((ifp->if_flags & IFF_BROADCAST) &&
1078                             ifa->ifa_broadaddr &&
1079                             /* IPv6 doesn't have broadcast */
1080                             ifa->ifa_broadaddr->sa_len != 0 &&
1081                             sa_equal(ifa->ifa_broadaddr, addr))
1082                                 return (ifa);
1083                 }
1084         }
1085         return (NULL);
1086 }
1087 /*
1088  * Locate the point to point interface with a given destination address.
1089  */
1090 struct ifaddr *
1091 ifa_ifwithdstaddr(struct sockaddr *addr)
1092 {
1093         struct ifnet *ifp;
1094
1095         TAILQ_FOREACH(ifp, &ifnet, if_link) {
1096                 struct ifaddr_container *ifac;
1097
1098                 if (!(ifp->if_flags & IFF_POINTOPOINT))
1099                         continue;
1100
1101                 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1102                         struct ifaddr *ifa = ifac->ifa;
1103
1104                         if (ifa->ifa_addr->sa_family != addr->sa_family)
1105                                 continue;
1106                         if (ifa->ifa_dstaddr &&
1107                             sa_equal(addr, ifa->ifa_dstaddr))
1108                                 return (ifa);
1109                 }
1110         }
1111         return (NULL);
1112 }
1113
1114 /*
1115  * Find an interface on a specific network.  If many, choice
1116  * is most specific found.
1117  */
1118 struct ifaddr *
1119 ifa_ifwithnet(struct sockaddr *addr)
1120 {
1121         struct ifnet *ifp;
1122         struct ifaddr *ifa_maybe = NULL;
1123         u_int af = addr->sa_family;
1124         char *addr_data = addr->sa_data, *cplim;
1125
1126         /*
1127          * AF_LINK addresses can be looked up directly by their index number,
1128          * so do that if we can.
1129          */
1130         if (af == AF_LINK) {
1131                 struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
1132
1133                 if (sdl->sdl_index && sdl->sdl_index <= if_index)
1134                         return (ifindex2ifnet[sdl->sdl_index]->if_lladdr);
1135         }
1136
1137         /*
1138          * Scan though each interface, looking for ones that have
1139          * addresses in this address family.
1140          */
1141         TAILQ_FOREACH(ifp, &ifnet, if_link) {
1142                 struct ifaddr_container *ifac;
1143
1144                 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1145                         struct ifaddr *ifa = ifac->ifa;
1146                         char *cp, *cp2, *cp3;
1147
1148                         if (ifa->ifa_addr->sa_family != af)
1149 next:                           continue;
1150                         if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
1151                                 /*
1152                                  * This is a bit broken as it doesn't
1153                                  * take into account that the remote end may
1154                                  * be a single node in the network we are
1155                                  * looking for.
1156                                  * The trouble is that we don't know the
1157                                  * netmask for the remote end.
1158                                  */
1159                                 if (ifa->ifa_dstaddr != NULL &&
1160                                     sa_equal(addr, ifa->ifa_dstaddr))
1161                                         return (ifa);
1162                         } else {
1163                                 /*
1164                                  * if we have a special address handler,
1165                                  * then use it instead of the generic one.
1166                                  */
1167                                 if (ifa->ifa_claim_addr) {
1168                                         if ((*ifa->ifa_claim_addr)(ifa, addr)) {
1169                                                 return (ifa);
1170                                         } else {
1171                                                 continue;
1172                                         }
1173                                 }
1174
1175                                 /*
1176                                  * Scan all the bits in the ifa's address.
1177                                  * If a bit dissagrees with what we are
1178                                  * looking for, mask it with the netmask
1179                                  * to see if it really matters.
1180                                  * (A byte at a time)
1181                                  */
1182                                 if (ifa->ifa_netmask == 0)
1183                                         continue;
1184                                 cp = addr_data;
1185                                 cp2 = ifa->ifa_addr->sa_data;
1186                                 cp3 = ifa->ifa_netmask->sa_data;
1187                                 cplim = ifa->ifa_netmask->sa_len +
1188                                         (char *)ifa->ifa_netmask;
1189                                 while (cp3 < cplim)
1190                                         if ((*cp++ ^ *cp2++) & *cp3++)
1191                                                 goto next; /* next address! */
1192                                 /*
1193                                  * If the netmask of what we just found
1194                                  * is more specific than what we had before
1195                                  * (if we had one) then remember the new one
1196                                  * before continuing to search
1197                                  * for an even better one.
1198                                  */
1199                                 if (ifa_maybe == NULL ||
1200                                     rn_refines((char *)ifa->ifa_netmask,
1201                                                (char *)ifa_maybe->ifa_netmask))
1202                                         ifa_maybe = ifa;
1203                         }
1204                 }
1205         }
1206         return (ifa_maybe);
1207 }
1208
1209 /*
1210  * Find an interface address specific to an interface best matching
1211  * a given address.
1212  */
1213 struct ifaddr *
1214 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1215 {
1216         struct ifaddr_container *ifac;
1217         char *cp, *cp2, *cp3;
1218         char *cplim;
1219         struct ifaddr *ifa_maybe = NULL;
1220         u_int af = addr->sa_family;
1221
1222         if (af >= AF_MAX)
1223                 return (0);
1224         TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1225                 struct ifaddr *ifa = ifac->ifa;
1226
1227                 if (ifa->ifa_addr->sa_family != af)
1228                         continue;
1229                 if (ifa_maybe == NULL)
1230                         ifa_maybe = ifa;
1231                 if (ifa->ifa_netmask == NULL) {
1232                         if (sa_equal(addr, ifa->ifa_addr) ||
1233                             (ifa->ifa_dstaddr != NULL &&
1234                              sa_equal(addr, ifa->ifa_dstaddr)))
1235                                 return (ifa);
1236                         continue;
1237                 }
1238                 if (ifp->if_flags & IFF_POINTOPOINT) {
1239                         if (sa_equal(addr, ifa->ifa_dstaddr))
1240                                 return (ifa);
1241                 } else {
1242                         cp = addr->sa_data;
1243                         cp2 = ifa->ifa_addr->sa_data;
1244                         cp3 = ifa->ifa_netmask->sa_data;
1245                         cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1246                         for (; cp3 < cplim; cp3++)
1247                                 if ((*cp++ ^ *cp2++) & *cp3)
1248                                         break;
1249                         if (cp3 == cplim)
1250                                 return (ifa);
1251                 }
1252         }
1253         return (ifa_maybe);
1254 }
1255
1256 /*
1257  * Default action when installing a route with a Link Level gateway.
1258  * Lookup an appropriate real ifa to point to.
1259  * This should be moved to /sys/net/link.c eventually.
1260  */
1261 static void
1262 link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
1263 {
1264         struct ifaddr *ifa;
1265         struct sockaddr *dst;
1266         struct ifnet *ifp;
1267
1268         if (cmd != RTM_ADD || (ifa = rt->rt_ifa) == NULL ||
1269             (ifp = ifa->ifa_ifp) == NULL || (dst = rt_key(rt)) == NULL)
1270                 return;
1271         ifa = ifaof_ifpforaddr(dst, ifp);
1272         if (ifa != NULL) {
1273                 IFAFREE(rt->rt_ifa);
1274                 IFAREF(ifa);
1275                 rt->rt_ifa = ifa;
1276                 if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
1277                         ifa->ifa_rtrequest(cmd, rt, info);
1278         }
1279 }
1280
1281 /*
1282  * Mark an interface down and notify protocols of
1283  * the transition.
1284  * NOTE: must be called at splnet or eqivalent.
1285  */
1286 void
1287 if_unroute(struct ifnet *ifp, int flag, int fam)
1288 {
1289         struct ifaddr_container *ifac;
1290
1291         ifp->if_flags &= ~flag;
1292         getmicrotime(&ifp->if_lastchange);
1293         TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1294                 struct ifaddr *ifa = ifac->ifa;
1295
1296                 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1297                         kpfctlinput(PRC_IFDOWN, ifa->ifa_addr);
1298         }
1299         ifq_purge(&ifp->if_snd);
1300         rt_ifmsg(ifp);
1301 }
1302
1303 /*
1304  * Mark an interface up and notify protocols of
1305  * the transition.
1306  * NOTE: must be called at splnet or eqivalent.
1307  */
1308 void
1309 if_route(struct ifnet *ifp, int flag, int fam)
1310 {
1311         struct ifaddr_container *ifac;
1312
1313         ifq_purge(&ifp->if_snd);
1314         ifp->if_flags |= flag;
1315         getmicrotime(&ifp->if_lastchange);
1316         TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1317                 struct ifaddr *ifa = ifac->ifa;
1318
1319                 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1320                         kpfctlinput(PRC_IFUP, ifa->ifa_addr);
1321         }
1322         rt_ifmsg(ifp);
1323 #ifdef INET6
1324         in6_if_up(ifp);
1325 #endif
1326 }
1327
1328 /*
1329  * Mark an interface down and notify protocols of the transition.  An
1330  * interface going down is also considered to be a synchronizing event.
1331  * We must ensure that all packet processing related to the interface
1332  * has completed before we return so e.g. the caller can free the ifnet
1333  * structure that the mbufs may be referencing.
1334  *
1335  * NOTE: must be called at splnet or eqivalent.
1336  */
1337 void
1338 if_down(struct ifnet *ifp)
1339 {
1340         if_unroute(ifp, IFF_UP, AF_UNSPEC);
1341         netmsg_service_sync();
1342 }
1343
1344 /*
1345  * Mark an interface up and notify protocols of
1346  * the transition.
1347  * NOTE: must be called at splnet or eqivalent.
1348  */
1349 void
1350 if_up(struct ifnet *ifp)
1351 {
1352         if_route(ifp, IFF_UP, AF_UNSPEC);
1353 }
1354
1355 /*
1356  * Process a link state change.
1357  * NOTE: must be called at splsoftnet or equivalent.
1358  */
1359 void
1360 if_link_state_change(struct ifnet *ifp)
1361 {
1362         int link_state = ifp->if_link_state;
1363
1364         rt_ifmsg(ifp);
1365         devctl_notify("IFNET", ifp->if_xname,
1366             (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL);
1367 }
1368
1369 /*
1370  * Handle interface watchdog timer routines.  Called
1371  * from softclock, we decrement timers (if set) and
1372  * call the appropriate interface routine on expiration.
1373  */
1374 static void
1375 if_slowtimo(void *arg)
1376 {
1377         struct ifnet *ifp;
1378
1379         crit_enter();
1380
1381         TAILQ_FOREACH(ifp, &ifnet, if_link) {
1382                 if (ifp->if_timer == 0 || --ifp->if_timer)
1383                         continue;
1384                 if (ifp->if_watchdog) {
1385                         if (ifnet_tryserialize_all(ifp)) {
1386                                 (*ifp->if_watchdog)(ifp);
1387                                 ifnet_deserialize_all(ifp);
1388                         } else {
1389                                 /* try again next timeout */
1390                                 ++ifp->if_timer;
1391                         }
1392                 }
1393         }
1394
1395         crit_exit();
1396
1397         callout_reset(&if_slowtimo_timer, hz / IFNET_SLOWHZ, if_slowtimo, NULL);
1398 }
1399
1400 /*
1401  * Map interface name to
1402  * interface structure pointer.
1403  */
1404 struct ifnet *
1405 ifunit(const char *name)
1406 {
1407         struct ifnet *ifp;
1408
1409         /*
1410          * Search all the interfaces for this name/number
1411          */
1412
1413         TAILQ_FOREACH(ifp, &ifnet, if_link) {
1414                 if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0)
1415                         break;
1416         }
1417         return (ifp);
1418 }
1419
1420
1421 /*
1422  * Map interface name in a sockaddr_dl to
1423  * interface structure pointer.
1424  */
1425 struct ifnet *
1426 if_withname(struct sockaddr *sa)
1427 {
1428         char ifname[IFNAMSIZ+1];
1429         struct sockaddr_dl *sdl = (struct sockaddr_dl *)sa;
1430
1431         if ( (sa->sa_family != AF_LINK) || (sdl->sdl_nlen == 0) ||
1432              (sdl->sdl_nlen > IFNAMSIZ) )
1433                 return NULL;
1434
1435         /*
1436          * ifunit wants a null-terminated name.  It may not be null-terminated
1437          * in the sockaddr.  We don't want to change the caller's sockaddr,
1438          * and there might not be room to put the trailing null anyway, so we
1439          * make a local copy that we know we can null terminate safely.
1440          */
1441
1442         bcopy(sdl->sdl_data, ifname, sdl->sdl_nlen);
1443         ifname[sdl->sdl_nlen] = '\0';
1444         return ifunit(ifname);
1445 }
1446
1447
1448 /*
1449  * Interface ioctls.
1450  */
1451 int
1452 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct ucred *cred)
1453 {
1454         struct ifnet *ifp;
1455         struct ifreq *ifr;
1456         struct ifstat *ifs;
1457         int error;
1458         short oif_flags;
1459         int new_flags;
1460 #ifdef COMPAT_43
1461         int ocmd;
1462 #endif
1463         size_t namelen, onamelen;
1464         char new_name[IFNAMSIZ];
1465         struct ifaddr *ifa;
1466         struct sockaddr_dl *sdl;
1467
1468         switch (cmd) {
1469         case SIOCGIFCONF:
1470         case OSIOCGIFCONF:
1471                 return (ifconf(cmd, data, cred));
1472         default:
1473                 break;
1474         }
1475
1476         ifr = (struct ifreq *)data;
1477
1478         switch (cmd) {
1479         case SIOCIFCREATE:
1480         case SIOCIFCREATE2:
1481                 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0)
1482                         return (error);
1483                 return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name),
1484                         cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL));
1485         case SIOCIFDESTROY:
1486                 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0)
1487                         return (error);
1488                 return (if_clone_destroy(ifr->ifr_name));
1489         case SIOCIFGCLONERS:
1490                 return (if_clone_list((struct if_clonereq *)data));
1491         default:
1492                 break;
1493         }
1494
1495         /*
1496          * Nominal ioctl through interface, lookup the ifp and obtain a
1497          * lock to serialize the ifconfig ioctl operation.
1498          */
1499         ifp = ifunit(ifr->ifr_name);
1500         if (ifp == NULL)
1501                 return (ENXIO);
1502         error = 0;
1503         mtx_lock(&ifp->if_ioctl_mtx);
1504
1505         switch (cmd) {
1506         case SIOCGIFINDEX:
1507                 ifr->ifr_index = ifp->if_index;
1508                 break;
1509
1510         case SIOCGIFFLAGS:
1511                 ifr->ifr_flags = ifp->if_flags;
1512                 ifr->ifr_flagshigh = ifp->if_flags >> 16;
1513                 break;
1514
1515         case SIOCGIFCAP:
1516                 ifr->ifr_reqcap = ifp->if_capabilities;
1517                 ifr->ifr_curcap = ifp->if_capenable;
1518                 break;
1519
1520         case SIOCGIFMETRIC:
1521                 ifr->ifr_metric = ifp->if_metric;
1522                 break;
1523
1524         case SIOCGIFMTU:
1525                 ifr->ifr_mtu = ifp->if_mtu;
1526                 break;
1527
1528         case SIOCGIFDATA:
1529                 error = copyout((caddr_t)&ifp->if_data, ifr->ifr_data,
1530                                 sizeof(ifp->if_data));
1531                 break;
1532
1533         case SIOCGIFPHYS:
1534                 ifr->ifr_phys = ifp->if_physical;
1535                 break;
1536
1537         case SIOCGIFPOLLCPU:
1538 #ifdef DEVICE_POLLING
1539                 ifr->ifr_pollcpu = ifp->if_poll_cpuid;
1540 #else
1541                 ifr->ifr_pollcpu = -1;
1542 #endif
1543                 break;
1544
1545         case SIOCSIFPOLLCPU:
1546 #ifdef DEVICE_POLLING
1547                 if ((ifp->if_flags & IFF_POLLING) == 0)
1548                         ether_pollcpu_register(ifp, ifr->ifr_pollcpu);
1549 #endif
1550                 break;
1551
1552         case SIOCSIFFLAGS:
1553                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1554                 if (error)
1555                         break;
1556                 new_flags = (ifr->ifr_flags & 0xffff) |
1557                     (ifr->ifr_flagshigh << 16);
1558                 if (ifp->if_flags & IFF_SMART) {
1559                         /* Smart drivers twiddle their own routes */
1560                 } else if (ifp->if_flags & IFF_UP &&
1561                     (new_flags & IFF_UP) == 0) {
1562                         crit_enter();
1563                         if_down(ifp);
1564                         crit_exit();
1565                 } else if (new_flags & IFF_UP &&
1566                     (ifp->if_flags & IFF_UP) == 0) {
1567                         crit_enter();
1568                         if_up(ifp);
1569                         crit_exit();
1570                 }
1571
1572 #ifdef DEVICE_POLLING
1573                 if ((new_flags ^ ifp->if_flags) & IFF_POLLING) {
1574                         if (new_flags & IFF_POLLING) {
1575                                 ether_poll_register(ifp);
1576                         } else {
1577                                 ether_poll_deregister(ifp);
1578                         }
1579                 }
1580 #endif
1581 #ifdef IFPOLL_ENABLE
1582                 if ((new_flags ^ ifp->if_flags) & IFF_NPOLLING) {
1583                         if (new_flags & IFF_NPOLLING)
1584                                 ifpoll_register(ifp);
1585                         else
1586                                 ifpoll_deregister(ifp);
1587                 }
1588 #endif
1589
1590                 ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1591                         (new_flags &~ IFF_CANTCHANGE);
1592                 if (new_flags & IFF_PPROMISC) {
1593                         /* Permanently promiscuous mode requested */
1594                         ifp->if_flags |= IFF_PROMISC;
1595                 } else if (ifp->if_pcount == 0) {
1596                         ifp->if_flags &= ~IFF_PROMISC;
1597                 }
1598                 if (ifp->if_ioctl) {
1599                         ifnet_serialize_all(ifp);
1600                         ifp->if_ioctl(ifp, cmd, data, cred);
1601                         ifnet_deserialize_all(ifp);
1602                 }
1603                 getmicrotime(&ifp->if_lastchange);
1604                 break;
1605
1606         case SIOCSIFCAP:
1607                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1608                 if (error)
1609                         break;
1610                 if (ifr->ifr_reqcap & ~ifp->if_capabilities) {
1611                         error = EINVAL;
1612                         break;
1613                 }
1614                 ifnet_serialize_all(ifp);
1615                 ifp->if_ioctl(ifp, cmd, data, cred);
1616                 ifnet_deserialize_all(ifp);
1617                 break;
1618
1619         case SIOCSIFNAME:
1620                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1621                 if (error)
1622                         break;
1623                 error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
1624                 if (error)
1625                         break;
1626                 if (new_name[0] == '\0') {
1627                         error = EINVAL;
1628                         break;
1629                 }
1630                 if (ifunit(new_name) != NULL) {
1631                         error = EEXIST;
1632                         break;
1633                 }
1634
1635                 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
1636
1637                 /* Announce the departure of the interface. */
1638                 rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1639
1640                 strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
1641                 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
1642                 /* XXX IFA_LOCK(ifa); */
1643                 sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1644                 namelen = strlen(new_name);
1645                 onamelen = sdl->sdl_nlen;
1646                 /*
1647                  * Move the address if needed.  This is safe because we
1648                  * allocate space for a name of length IFNAMSIZ when we
1649                  * create this in if_attach().
1650                  */
1651                 if (namelen != onamelen) {
1652                         bcopy(sdl->sdl_data + onamelen,
1653                             sdl->sdl_data + namelen, sdl->sdl_alen);
1654                 }
1655                 bcopy(new_name, sdl->sdl_data, namelen);
1656                 sdl->sdl_nlen = namelen;
1657                 sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
1658                 bzero(sdl->sdl_data, onamelen);
1659                 while (namelen != 0)
1660                         sdl->sdl_data[--namelen] = 0xff;
1661                 /* XXX IFA_UNLOCK(ifa) */
1662
1663                 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
1664
1665                 /* Announce the return of the interface. */
1666                 rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
1667                 break;
1668
1669         case SIOCSIFMETRIC:
1670                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1671                 if (error)
1672                         break;
1673                 ifp->if_metric = ifr->ifr_metric;
1674                 getmicrotime(&ifp->if_lastchange);
1675                 break;
1676
1677         case SIOCSIFPHYS:
1678                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1679                 if (error)
1680                         break;
1681                 if (ifp->if_ioctl == NULL) {
1682                         error = EOPNOTSUPP;
1683                         break;
1684                 }
1685                 ifnet_serialize_all(ifp);
1686                 error = ifp->if_ioctl(ifp, cmd, data, cred);
1687                 ifnet_deserialize_all(ifp);
1688                 if (error == 0)
1689                         getmicrotime(&ifp->if_lastchange);
1690                 break;
1691
1692         case SIOCSIFMTU:
1693         {
1694                 u_long oldmtu = ifp->if_mtu;
1695
1696                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1697                 if (error)
1698                         break;
1699                 if (ifp->if_ioctl == NULL) {
1700                         error = EOPNOTSUPP;
1701                         break;
1702                 }
1703                 if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) {
1704                         error = EINVAL;
1705                         break;
1706                 }
1707                 ifnet_serialize_all(ifp);
1708                 error = ifp->if_ioctl(ifp, cmd, data, cred);
1709                 ifnet_deserialize_all(ifp);
1710                 if (error == 0) {
1711                         getmicrotime(&ifp->if_lastchange);
1712                         rt_ifmsg(ifp);
1713                 }
1714                 /*
1715                  * If the link MTU changed, do network layer specific procedure.
1716                  */
1717                 if (ifp->if_mtu != oldmtu) {
1718 #ifdef INET6
1719                         nd6_setmtu(ifp);
1720 #endif
1721                 }
1722                 break;
1723         }
1724
1725         case SIOCADDMULTI:
1726         case SIOCDELMULTI:
1727                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1728                 if (error)
1729                         break;
1730
1731                 /* Don't allow group membership on non-multicast interfaces. */
1732                 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
1733                         error = EOPNOTSUPP;
1734                         break;
1735                 }
1736
1737                 /* Don't let users screw up protocols' entries. */
1738                 if (ifr->ifr_addr.sa_family != AF_LINK) {
1739                         error = EINVAL;
1740                         break;
1741                 }
1742
1743                 if (cmd == SIOCADDMULTI) {
1744                         struct ifmultiaddr *ifma;
1745                         error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
1746                 } else {
1747                         error = if_delmulti(ifp, &ifr->ifr_addr);
1748                 }
1749                 if (error == 0)
1750                         getmicrotime(&ifp->if_lastchange);
1751                 break;
1752
1753         case SIOCSIFPHYADDR:
1754         case SIOCDIFPHYADDR:
1755 #ifdef INET6
1756         case SIOCSIFPHYADDR_IN6:
1757 #endif
1758         case SIOCSLIFPHYADDR:
1759         case SIOCSIFMEDIA:
1760         case SIOCSIFGENERIC:
1761                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1762                 if (error)
1763                         break;
1764                 if (ifp->if_ioctl == 0) {
1765                         error = EOPNOTSUPP;
1766                         break;
1767                 }
1768                 ifnet_serialize_all(ifp);
1769                 error = ifp->if_ioctl(ifp, cmd, data, cred);
1770                 ifnet_deserialize_all(ifp);
1771                 if (error == 0)
1772                         getmicrotime(&ifp->if_lastchange);
1773                 break;
1774
1775         case SIOCGIFSTATUS:
1776                 ifs = (struct ifstat *)data;
1777                 ifs->ascii[0] = '\0';
1778                 /* fall through */
1779         case SIOCGIFPSRCADDR:
1780         case SIOCGIFPDSTADDR:
1781         case SIOCGLIFPHYADDR:
1782         case SIOCGIFMEDIA:
1783         case SIOCGIFGENERIC:
1784                 if (ifp->if_ioctl == NULL) {
1785                         error = EOPNOTSUPP;
1786                         break;
1787                 }
1788                 ifnet_serialize_all(ifp);
1789                 error = ifp->if_ioctl(ifp, cmd, data, cred);
1790                 ifnet_deserialize_all(ifp);
1791                 break;
1792
1793         case SIOCSIFLLADDR:
1794                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1795                 if (error)
1796                         break;
1797                 error = if_setlladdr(ifp, ifr->ifr_addr.sa_data,
1798                                      ifr->ifr_addr.sa_len);
1799                 EVENTHANDLER_INVOKE(iflladdr_event, ifp);
1800                 break;
1801
1802         default:
1803                 oif_flags = ifp->if_flags;
1804                 if (so->so_proto == 0) {
1805                         error = EOPNOTSUPP;
1806                         break;
1807                 }
1808 #ifndef COMPAT_43
1809                 error = so_pru_control_direct(so, cmd, data, ifp);
1810 #else
1811                 ocmd = cmd;
1812
1813                 switch (cmd) {
1814                 case SIOCSIFDSTADDR:
1815                 case SIOCSIFADDR:
1816                 case SIOCSIFBRDADDR:
1817                 case SIOCSIFNETMASK:
1818 #if BYTE_ORDER != BIG_ENDIAN
1819                         if (ifr->ifr_addr.sa_family == 0 &&
1820                             ifr->ifr_addr.sa_len < 16) {
1821                                 ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
1822                                 ifr->ifr_addr.sa_len = 16;
1823                         }
1824 #else
1825                         if (ifr->ifr_addr.sa_len == 0)
1826                                 ifr->ifr_addr.sa_len = 16;
1827 #endif
1828                         break;
1829                 case OSIOCGIFADDR:
1830                         cmd = SIOCGIFADDR;
1831                         break;
1832                 case OSIOCGIFDSTADDR:
1833                         cmd = SIOCGIFDSTADDR;
1834                         break;
1835                 case OSIOCGIFBRDADDR:
1836                         cmd = SIOCGIFBRDADDR;
1837                         break;
1838                 case OSIOCGIFNETMASK:
1839                         cmd = SIOCGIFNETMASK;
1840                         break;
1841                 default:
1842                         break;
1843                 }
1844
1845                 error = so_pru_control_direct(so, cmd, data, ifp);
1846
1847                 switch (ocmd) {
1848                 case OSIOCGIFADDR:
1849                 case OSIOCGIFDSTADDR:
1850                 case OSIOCGIFBRDADDR:
1851                 case OSIOCGIFNETMASK:
1852                         *(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
1853                         break;
1854                 }
1855 #endif /* COMPAT_43 */
1856
1857                 if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
1858 #ifdef INET6
1859                         DELAY(100);/* XXX: temporary workaround for fxp issue*/
1860                         if (ifp->if_flags & IFF_UP) {
1861                                 crit_enter();
1862                                 in6_if_up(ifp);
1863                                 crit_exit();
1864                         }
1865 #endif
1866                 }
1867                 break;
1868         }
1869
1870         mtx_unlock(&ifp->if_ioctl_mtx);
1871         return (error);
1872 }
1873
1874 /*
1875  * Set/clear promiscuous mode on interface ifp based on the truth value
1876  * of pswitch.  The calls are reference counted so that only the first
1877  * "on" request actually has an effect, as does the final "off" request.
1878  * Results are undefined if the "off" and "on" requests are not matched.
1879  */
1880 int
1881 ifpromisc(struct ifnet *ifp, int pswitch)
1882 {
1883         struct ifreq ifr;
1884         int error;
1885         int oldflags;
1886
1887         oldflags = ifp->if_flags;
1888         if (ifp->if_flags & IFF_PPROMISC) {
1889                 /* Do nothing if device is in permanently promiscuous mode */
1890                 ifp->if_pcount += pswitch ? 1 : -1;
1891                 return (0);
1892         }
1893         if (pswitch) {
1894                 /*
1895                  * If the device is not configured up, we cannot put it in
1896                  * promiscuous mode.
1897                  */
1898                 if ((ifp->if_flags & IFF_UP) == 0)
1899                         return (ENETDOWN);
1900                 if (ifp->if_pcount++ != 0)
1901                         return (0);
1902                 ifp->if_flags |= IFF_PROMISC;
1903                 log(LOG_INFO, "%s: promiscuous mode enabled\n",
1904                     ifp->if_xname);
1905         } else {
1906                 if (--ifp->if_pcount > 0)
1907                         return (0);
1908                 ifp->if_flags &= ~IFF_PROMISC;
1909                 log(LOG_INFO, "%s: promiscuous mode disabled\n",
1910                     ifp->if_xname);
1911         }
1912         ifr.ifr_flags = ifp->if_flags;
1913         ifr.ifr_flagshigh = ifp->if_flags >> 16;
1914         ifnet_serialize_all(ifp);
1915         error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, NULL);
1916         ifnet_deserialize_all(ifp);
1917         if (error == 0)
1918                 rt_ifmsg(ifp);
1919         else
1920                 ifp->if_flags = oldflags;
1921         return error;
1922 }
1923
1924 /*
1925  * Return interface configuration
1926  * of system.  List may be used
1927  * in later ioctl's (above) to get
1928  * other information.
1929  */
1930 static int
1931 ifconf(u_long cmd, caddr_t data, struct ucred *cred)
1932 {
1933         struct ifconf *ifc = (struct ifconf *)data;
1934         struct ifnet *ifp;
1935         struct sockaddr *sa;
1936         struct ifreq ifr, *ifrp;
1937         int space = ifc->ifc_len, error = 0;
1938
1939         ifrp = ifc->ifc_req;
1940         TAILQ_FOREACH(ifp, &ifnet, if_link) {
1941                 struct ifaddr_container *ifac;
1942                 int addrs;
1943
1944                 if (space <= sizeof ifr)
1945                         break;
1946
1947                 /*
1948                  * Zero the stack declared structure first to prevent
1949                  * memory disclosure.
1950                  */
1951                 bzero(&ifr, sizeof(ifr));
1952                 if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
1953                     >= sizeof(ifr.ifr_name)) {
1954                         error = ENAMETOOLONG;
1955                         break;
1956                 }
1957
1958                 addrs = 0;
1959                 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1960                         struct ifaddr *ifa = ifac->ifa;
1961
1962                         if (space <= sizeof ifr)
1963                                 break;
1964                         sa = ifa->ifa_addr;
1965                         if (cred->cr_prison &&
1966                             prison_if(cred, sa))
1967                                 continue;
1968                         addrs++;
1969 #ifdef COMPAT_43
1970                         if (cmd == OSIOCGIFCONF) {
1971                                 struct osockaddr *osa =
1972                                          (struct osockaddr *)&ifr.ifr_addr;
1973                                 ifr.ifr_addr = *sa;
1974                                 osa->sa_family = sa->sa_family;
1975                                 error = copyout(&ifr, ifrp, sizeof ifr);
1976                                 ifrp++;
1977                         } else
1978 #endif
1979                         if (sa->sa_len <= sizeof(*sa)) {
1980                                 ifr.ifr_addr = *sa;
1981                                 error = copyout(&ifr, ifrp, sizeof ifr);
1982                                 ifrp++;
1983                         } else {
1984                                 if (space < (sizeof ifr) + sa->sa_len -
1985                                             sizeof(*sa))
1986                                         break;
1987                                 space -= sa->sa_len - sizeof(*sa);
1988                                 error = copyout(&ifr, ifrp,
1989                                                 sizeof ifr.ifr_name);
1990                                 if (error == 0)
1991                                         error = copyout(sa, &ifrp->ifr_addr,
1992                                                         sa->sa_len);
1993                                 ifrp = (struct ifreq *)
1994                                         (sa->sa_len + (caddr_t)&ifrp->ifr_addr);
1995                         }
1996                         if (error)
1997                                 break;
1998                         space -= sizeof ifr;
1999                 }
2000                 if (error)
2001                         break;
2002                 if (!addrs) {
2003                         bzero(&ifr.ifr_addr, sizeof ifr.ifr_addr);
2004                         error = copyout(&ifr, ifrp, sizeof ifr);
2005                         if (error)
2006                                 break;
2007                         space -= sizeof ifr;
2008                         ifrp++;
2009                 }
2010         }
2011         ifc->ifc_len -= space;
2012         return (error);
2013 }
2014
2015 /*
2016  * Just like if_promisc(), but for all-multicast-reception mode.
2017  */
2018 int
2019 if_allmulti(struct ifnet *ifp, int onswitch)
2020 {
2021         int error = 0;
2022         struct ifreq ifr;
2023
2024         crit_enter();
2025
2026         if (onswitch) {
2027                 if (ifp->if_amcount++ == 0) {
2028                         ifp->if_flags |= IFF_ALLMULTI;
2029                         ifr.ifr_flags = ifp->if_flags;
2030                         ifr.ifr_flagshigh = ifp->if_flags >> 16;
2031                         ifnet_serialize_all(ifp);
2032                         error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2033                                               NULL);
2034                         ifnet_deserialize_all(ifp);
2035                 }
2036         } else {
2037                 if (ifp->if_amcount > 1) {
2038                         ifp->if_amcount--;
2039                 } else {
2040                         ifp->if_amcount = 0;
2041                         ifp->if_flags &= ~IFF_ALLMULTI;
2042                         ifr.ifr_flags = ifp->if_flags;
2043                         ifr.ifr_flagshigh = ifp->if_flags >> 16;
2044                         ifnet_serialize_all(ifp);
2045                         error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2046                                               NULL);
2047                         ifnet_deserialize_all(ifp);
2048                 }
2049         }
2050
2051         crit_exit();
2052
2053         if (error == 0)
2054                 rt_ifmsg(ifp);
2055         return error;
2056 }
2057
2058 /*
2059  * Add a multicast listenership to the interface in question.
2060  * The link layer provides a routine which converts
2061  */
2062 int
2063 if_addmulti(
2064         struct ifnet *ifp,      /* interface to manipulate */
2065         struct sockaddr *sa,    /* address to add */
2066         struct ifmultiaddr **retifma)
2067 {
2068         struct sockaddr *llsa, *dupsa;
2069         int error;
2070         struct ifmultiaddr *ifma;
2071
2072         /*
2073          * If the matching multicast address already exists
2074          * then don't add a new one, just add a reference
2075          */
2076         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2077                 if (sa_equal(sa, ifma->ifma_addr)) {
2078                         ifma->ifma_refcount++;
2079                         if (retifma)
2080                                 *retifma = ifma;
2081                         return 0;
2082                 }
2083         }
2084
2085         /*
2086          * Give the link layer a chance to accept/reject it, and also
2087          * find out which AF_LINK address this maps to, if it isn't one
2088          * already.
2089          */
2090         if (ifp->if_resolvemulti) {
2091                 ifnet_serialize_all(ifp);
2092                 error = ifp->if_resolvemulti(ifp, &llsa, sa);
2093                 ifnet_deserialize_all(ifp);
2094                 if (error) 
2095                         return error;
2096         } else {
2097                 llsa = NULL;
2098         }
2099
2100         ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_WAITOK);
2101         dupsa = kmalloc(sa->sa_len, M_IFMADDR, M_WAITOK);
2102         bcopy(sa, dupsa, sa->sa_len);
2103
2104         ifma->ifma_addr = dupsa;
2105         ifma->ifma_lladdr = llsa;
2106         ifma->ifma_ifp = ifp;
2107         ifma->ifma_refcount = 1;
2108         ifma->ifma_protospec = 0;
2109         rt_newmaddrmsg(RTM_NEWMADDR, ifma);
2110
2111         /*
2112          * Some network interfaces can scan the address list at
2113          * interrupt time; lock them out.
2114          */
2115         crit_enter();
2116         TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
2117         crit_exit();
2118         if (retifma)
2119                 *retifma = ifma;
2120
2121         if (llsa != NULL) {
2122                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2123                         if (sa_equal(ifma->ifma_addr, llsa))
2124                                 break;
2125                 }
2126                 if (ifma) {
2127                         ifma->ifma_refcount++;
2128                 } else {
2129                         ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_WAITOK);
2130                         dupsa = kmalloc(llsa->sa_len, M_IFMADDR, M_WAITOK);
2131                         bcopy(llsa, dupsa, llsa->sa_len);
2132                         ifma->ifma_addr = dupsa;
2133                         ifma->ifma_ifp = ifp;
2134                         ifma->ifma_refcount = 1;
2135                         crit_enter();
2136                         TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
2137                         crit_exit();
2138                 }
2139         }
2140         /*
2141          * We are certain we have added something, so call down to the
2142          * interface to let them know about it.
2143          */
2144         crit_enter();
2145         ifnet_serialize_all(ifp);
2146         if (ifp->if_ioctl)
2147                 ifp->if_ioctl(ifp, SIOCADDMULTI, 0, NULL);
2148         ifnet_deserialize_all(ifp);
2149         crit_exit();
2150
2151         return 0;
2152 }
2153
2154 /*
2155  * Remove a reference to a multicast address on this interface.  Yell
2156  * if the request does not match an existing membership.
2157  */
2158 int
2159 if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
2160 {
2161         struct ifmultiaddr *ifma;
2162
2163         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
2164                 if (sa_equal(sa, ifma->ifma_addr))
2165                         break;
2166         if (ifma == NULL)
2167                 return ENOENT;
2168
2169         if (ifma->ifma_refcount > 1) {
2170                 ifma->ifma_refcount--;
2171                 return 0;
2172         }
2173
2174         rt_newmaddrmsg(RTM_DELMADDR, ifma);
2175         sa = ifma->ifma_lladdr;
2176         crit_enter();
2177         TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
2178         /*
2179          * Make sure the interface driver is notified
2180          * in the case of a link layer mcast group being left.
2181          */
2182         if (ifma->ifma_addr->sa_family == AF_LINK && sa == NULL) {
2183                 ifnet_serialize_all(ifp);
2184                 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL);
2185                 ifnet_deserialize_all(ifp);
2186         }
2187         crit_exit();
2188         kfree(ifma->ifma_addr, M_IFMADDR);
2189         kfree(ifma, M_IFMADDR);
2190         if (sa == NULL)
2191                 return 0;
2192
2193         /*
2194          * Now look for the link-layer address which corresponds to
2195          * this network address.  It had been squirreled away in
2196          * ifma->ifma_lladdr for this purpose (so we don't have
2197          * to call ifp->if_resolvemulti() again), and we saved that
2198          * value in sa above.  If some nasty deleted the
2199          * link-layer address out from underneath us, we can deal because
2200          * the address we stored was is not the same as the one which was
2201          * in the record for the link-layer address.  (So we don't complain
2202          * in that case.)
2203          */
2204         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
2205                 if (sa_equal(sa, ifma->ifma_addr))
2206                         break;
2207         if (ifma == NULL)
2208                 return 0;
2209
2210         if (ifma->ifma_refcount > 1) {
2211                 ifma->ifma_refcount--;
2212                 return 0;
2213         }
2214
2215         crit_enter();
2216         ifnet_serialize_all(ifp);
2217         TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
2218         ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL);
2219         ifnet_deserialize_all(ifp);
2220         crit_exit();
2221         kfree(ifma->ifma_addr, M_IFMADDR);
2222         kfree(sa, M_IFMADDR);
2223         kfree(ifma, M_IFMADDR);
2224
2225         return 0;
2226 }
2227
2228 /*
2229  * Delete all multicast group membership for an interface.
2230  * Should be used to quickly flush all multicast filters.
2231  */
2232 void
2233 if_delallmulti(struct ifnet *ifp)
2234 {
2235         struct ifmultiaddr *ifma;
2236         struct ifmultiaddr *next;
2237
2238         TAILQ_FOREACH_MUTABLE(ifma, &ifp->if_multiaddrs, ifma_link, next)
2239                 if_delmulti(ifp, ifma->ifma_addr);
2240 }
2241
2242
2243 /*
2244  * Set the link layer address on an interface.
2245  *
2246  * At this time we only support certain types of interfaces,
2247  * and we don't allow the length of the address to change.
2248  */
2249 int
2250 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
2251 {
2252         struct sockaddr_dl *sdl;
2253         struct ifreq ifr;
2254
2255         sdl = IF_LLSOCKADDR(ifp);
2256         if (sdl == NULL)
2257                 return (EINVAL);
2258         if (len != sdl->sdl_alen)       /* don't allow length to change */
2259                 return (EINVAL);
2260         switch (ifp->if_type) {
2261         case IFT_ETHER:                 /* these types use struct arpcom */
2262         case IFT_XETHER:
2263         case IFT_L2VLAN:
2264                 bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len);
2265                 bcopy(lladdr, LLADDR(sdl), len);
2266                 break;
2267         default:
2268                 return (ENODEV);
2269         }
2270         /*
2271          * If the interface is already up, we need
2272          * to re-init it in order to reprogram its
2273          * address filter.
2274          */
2275         ifnet_serialize_all(ifp);
2276         if ((ifp->if_flags & IFF_UP) != 0) {
2277 #ifdef INET
2278                 struct ifaddr_container *ifac;
2279 #endif
2280
2281                 ifp->if_flags &= ~IFF_UP;
2282                 ifr.ifr_flags = ifp->if_flags;
2283                 ifr.ifr_flagshigh = ifp->if_flags >> 16;
2284                 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2285                               NULL);
2286                 ifp->if_flags |= IFF_UP;
2287                 ifr.ifr_flags = ifp->if_flags;
2288                 ifr.ifr_flagshigh = ifp->if_flags >> 16;
2289                 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2290                                  NULL);
2291 #ifdef INET
2292                 /*
2293                  * Also send gratuitous ARPs to notify other nodes about
2294                  * the address change.
2295                  */
2296                 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
2297                         struct ifaddr *ifa = ifac->ifa;
2298
2299                         if (ifa->ifa_addr != NULL &&
2300                             ifa->ifa_addr->sa_family == AF_INET)
2301                                 arp_ifinit(ifp, ifa);
2302                 }
2303 #endif
2304         }
2305         ifnet_deserialize_all(ifp);
2306         return (0);
2307 }
2308
2309 struct ifmultiaddr *
2310 ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp)
2311 {
2312         struct ifmultiaddr *ifma;
2313
2314         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
2315                 if (sa_equal(ifma->ifma_addr, sa))
2316                         break;
2317
2318         return ifma;
2319 }
2320
2321 /*
2322  * This function locates the first real ethernet MAC from a network
2323  * card and loads it into node, returning 0 on success or ENOENT if
2324  * no suitable interfaces were found.  It is used by the uuid code to
2325  * generate a unique 6-byte number.
2326  */
2327 int
2328 if_getanyethermac(uint16_t *node, int minlen)
2329 {
2330         struct ifnet *ifp;
2331         struct sockaddr_dl *sdl;
2332
2333         TAILQ_FOREACH(ifp, &ifnet, if_link) {
2334                 if (ifp->if_type != IFT_ETHER)
2335                         continue;
2336                 sdl = IF_LLSOCKADDR(ifp);
2337                 if (sdl->sdl_alen < minlen)
2338                         continue;
2339                 bcopy(((struct arpcom *)ifp->if_softc)->ac_enaddr, node,
2340                       minlen);
2341                 return(0);
2342         }
2343         return (ENOENT);
2344 }
2345
2346 /*
2347  * The name argument must be a pointer to storage which will last as
2348  * long as the interface does.  For physical devices, the result of
2349  * device_get_name(dev) is a good choice and for pseudo-devices a
2350  * static string works well.
2351  */
2352 void
2353 if_initname(struct ifnet *ifp, const char *name, int unit)
2354 {
2355         ifp->if_dname = name;
2356         ifp->if_dunit = unit;
2357         if (unit != IF_DUNIT_NONE)
2358                 ksnprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
2359         else
2360                 strlcpy(ifp->if_xname, name, IFNAMSIZ);
2361 }
2362
2363 int
2364 if_printf(struct ifnet *ifp, const char *fmt, ...)
2365 {
2366         __va_list ap;
2367         int retval;
2368
2369         retval = kprintf("%s: ", ifp->if_xname);
2370         __va_start(ap, fmt);
2371         retval += kvprintf(fmt, ap);
2372         __va_end(ap);
2373         return (retval);
2374 }
2375
2376 struct ifnet *
2377 if_alloc(uint8_t type)
2378 {
2379         struct ifnet *ifp;
2380         size_t size;
2381
2382         /*
2383          * XXX temporary hack until arpcom is setup in if_l2com
2384          */
2385         if (type == IFT_ETHER)
2386                 size = sizeof(struct arpcom);
2387         else
2388                 size = sizeof(struct ifnet);
2389
2390         ifp = kmalloc(size, M_IFNET, M_WAITOK|M_ZERO);
2391
2392         ifp->if_type = type;
2393
2394         if (if_com_alloc[type] != NULL) {
2395                 ifp->if_l2com = if_com_alloc[type](type, ifp);
2396                 if (ifp->if_l2com == NULL) {
2397                         kfree(ifp, M_IFNET);
2398                         return (NULL);
2399                 }
2400         }
2401         return (ifp);
2402 }
2403
2404 void
2405 if_free(struct ifnet *ifp)
2406 {
2407         kfree(ifp, M_IFNET);
2408 }
2409
2410 void
2411 ifq_set_classic(struct ifaltq *ifq)
2412 {
2413         ifq->altq_enqueue = ifq_classic_enqueue;
2414         ifq->altq_dequeue = ifq_classic_dequeue;
2415         ifq->altq_request = ifq_classic_request;
2416 }
2417
2418 int
2419 ifq_classic_enqueue(struct ifaltq *ifq, struct mbuf *m,
2420                     struct altq_pktattr *pa __unused)
2421 {
2422         logifq(enqueue, ifq);
2423         if (IF_QFULL(ifq)) {
2424                 m_freem(m);
2425                 return(ENOBUFS);
2426         } else {
2427                 IF_ENQUEUE(ifq, m);
2428                 return(0);
2429         }       
2430 }
2431
2432 struct mbuf *
2433 ifq_classic_dequeue(struct ifaltq *ifq, struct mbuf *mpolled, int op)
2434 {
2435         struct mbuf *m;
2436
2437         switch (op) {
2438         case ALTDQ_POLL:
2439                 IF_POLL(ifq, m);
2440                 break;
2441         case ALTDQ_REMOVE:
2442                 logifq(dequeue, ifq);
2443                 IF_DEQUEUE(ifq, m);
2444                 break;
2445         default:
2446                 panic("unsupported ALTQ dequeue op: %d", op);
2447         }
2448         KKASSERT(mpolled == NULL || mpolled == m);
2449         return(m);
2450 }
2451
2452 int
2453 ifq_classic_request(struct ifaltq *ifq, int req, void *arg)
2454 {
2455         switch (req) {
2456         case ALTRQ_PURGE:
2457                 IF_DRAIN(ifq);
2458                 break;
2459         default:
2460                 panic("unsupported ALTQ request: %d", req);
2461         }
2462         return(0);
2463 }
2464
2465 int
2466 ifq_dispatch(struct ifnet *ifp, struct mbuf *m, struct altq_pktattr *pa)
2467 {
2468         struct ifaltq *ifq = &ifp->if_snd;
2469         int running = 0, error, start = 0;
2470
2471         ASSERT_IFNET_NOT_SERIALIZED_TX(ifp);
2472
2473         ALTQ_LOCK(ifq);
2474         error = ifq_enqueue_locked(ifq, m, pa);
2475         if (error) {
2476                 ALTQ_UNLOCK(ifq);
2477                 return error;
2478         }
2479         if (!ifq->altq_started) {
2480                 /*
2481                  * Hold the interlock of ifnet.if_start
2482                  */
2483                 ifq->altq_started = 1;
2484                 start = 1;
2485         }
2486         ALTQ_UNLOCK(ifq);
2487
2488         ifp->if_obytes += m->m_pkthdr.len;
2489         if (m->m_flags & M_MCAST)
2490                 ifp->if_omcasts++;
2491
2492         if (!start) {
2493                 logifstart(avoid, ifp);
2494                 return 0;
2495         }
2496
2497         if (ifq_dispatch_schedonly) {
2498                 /*
2499                  * Always schedule ifnet.if_start on ifnet's CPU,
2500                  * short circuit the rest of this function.
2501                  */
2502                 logifstart(sched, ifp);
2503                 if_start_schedule(ifp);
2504                 return 0;
2505         }
2506
2507         /*
2508          * Try to do direct ifnet.if_start first, if there is
2509          * contention on ifnet's serializer, ifnet.if_start will
2510          * be scheduled on ifnet's CPU.
2511          */
2512         if (!ifnet_tryserialize_tx(ifp)) {
2513                 /*
2514                  * ifnet serializer contention happened,
2515                  * ifnet.if_start is scheduled on ifnet's
2516                  * CPU, and we keep going.
2517                  */
2518                 logifstart(contend_sched, ifp);
2519                 if_start_schedule(ifp);
2520                 return 0;
2521         }
2522
2523         if ((ifp->if_flags & IFF_OACTIVE) == 0) {
2524                 logifstart(run, ifp);
2525                 ifp->if_start(ifp);
2526                 if ((ifp->if_flags &
2527                      (IFF_OACTIVE | IFF_RUNNING)) == IFF_RUNNING)
2528                         running = 1;
2529         }
2530
2531         ifnet_deserialize_tx(ifp);
2532
2533         if (ifq_dispatch_schednochk || if_start_need_schedule(ifq, running)) {
2534                 /*
2535                  * More data need to be transmitted, ifnet.if_start is
2536                  * scheduled on ifnet's CPU, and we keep going.
2537                  * NOTE: ifnet.if_start interlock is not released.
2538                  */
2539                 logifstart(sched, ifp);
2540                 if_start_schedule(ifp);
2541         }
2542         return 0;
2543 }
2544
2545 void *
2546 ifa_create(int size, int flags)
2547 {
2548         struct ifaddr *ifa;
2549         int i;
2550
2551         KASSERT(size >= sizeof(*ifa), ("ifaddr size too small\n"));
2552
2553         ifa = kmalloc(size, M_IFADDR, flags | M_ZERO);
2554         if (ifa == NULL)
2555                 return NULL;
2556
2557         ifa->ifa_containers = kmalloc(ncpus * sizeof(struct ifaddr_container),
2558                                       M_IFADDR, M_WAITOK | M_ZERO);
2559         ifa->ifa_ncnt = ncpus;
2560         for (i = 0; i < ncpus; ++i) {
2561                 struct ifaddr_container *ifac = &ifa->ifa_containers[i];
2562
2563                 ifac->ifa_magic = IFA_CONTAINER_MAGIC;
2564                 ifac->ifa = ifa;
2565                 ifac->ifa_refcnt = 1;
2566         }
2567 #ifdef IFADDR_DEBUG
2568         kprintf("alloc ifa %p %d\n", ifa, size);
2569 #endif
2570         return ifa;
2571 }
2572
2573 void
2574 ifac_free(struct ifaddr_container *ifac, int cpu_id)
2575 {
2576         struct ifaddr *ifa = ifac->ifa;
2577
2578         KKASSERT(ifac->ifa_magic == IFA_CONTAINER_MAGIC);
2579         KKASSERT(ifac->ifa_refcnt == 0);
2580         KASSERT(ifac->ifa_listmask == 0,
2581                 ("ifa is still on %#x lists\n", ifac->ifa_listmask));
2582
2583         ifac->ifa_magic = IFA_CONTAINER_DEAD;
2584
2585 #ifdef IFADDR_DEBUG_VERBOSE
2586         kprintf("try free ifa %p cpu_id %d\n", ifac->ifa, cpu_id);
2587 #endif
2588
2589         KASSERT(ifa->ifa_ncnt > 0 && ifa->ifa_ncnt <= ncpus,
2590                 ("invalid # of ifac, %d\n", ifa->ifa_ncnt));
2591         if (atomic_fetchadd_int(&ifa->ifa_ncnt, -1) == 1) {
2592 #ifdef IFADDR_DEBUG
2593                 kprintf("free ifa %p\n", ifa);
2594 #endif
2595                 kfree(ifa->ifa_containers, M_IFADDR);
2596                 kfree(ifa, M_IFADDR);
2597         }
2598 }
2599
2600 static void
2601 ifa_iflink_dispatch(netmsg_t nmsg)
2602 {
2603         struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
2604         struct ifaddr *ifa = msg->ifa;
2605         struct ifnet *ifp = msg->ifp;
2606         int cpu = mycpuid;
2607         struct ifaddr_container *ifac;
2608
2609         crit_enter();
2610
2611         ifac = &ifa->ifa_containers[cpu];
2612         ASSERT_IFAC_VALID(ifac);
2613         KASSERT((ifac->ifa_listmask & IFA_LIST_IFADDRHEAD) == 0,
2614                 ("ifaddr is on if_addrheads\n"));
2615
2616         ifac->ifa_listmask |= IFA_LIST_IFADDRHEAD;
2617         if (msg->tail)
2618                 TAILQ_INSERT_TAIL(&ifp->if_addrheads[cpu], ifac, ifa_link);
2619         else
2620                 TAILQ_INSERT_HEAD(&ifp->if_addrheads[cpu], ifac, ifa_link);
2621
2622         crit_exit();
2623
2624         ifa_forwardmsg(&nmsg->lmsg, cpu + 1);
2625 }
2626
2627 void
2628 ifa_iflink(struct ifaddr *ifa, struct ifnet *ifp, int tail)
2629 {
2630         struct netmsg_ifaddr msg;
2631
2632         netmsg_init(&msg.base, NULL, &curthread->td_msgport,
2633                     0, ifa_iflink_dispatch);
2634         msg.ifa = ifa;
2635         msg.ifp = ifp;
2636         msg.tail = tail;
2637
2638         ifa_domsg(&msg.base.lmsg, 0);
2639 }
2640
2641 static void
2642 ifa_ifunlink_dispatch(netmsg_t nmsg)
2643 {
2644         struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
2645         struct ifaddr *ifa = msg->ifa;
2646         struct ifnet *ifp = msg->ifp;
2647         int cpu = mycpuid;
2648         struct ifaddr_container *ifac;
2649
2650         crit_enter();
2651
2652         ifac = &ifa->ifa_containers[cpu];
2653         ASSERT_IFAC_VALID(ifac);
2654         KASSERT(ifac->ifa_listmask & IFA_LIST_IFADDRHEAD,
2655                 ("ifaddr is not on if_addrhead\n"));
2656
2657         TAILQ_REMOVE(&ifp->if_addrheads[cpu], ifac, ifa_link);
2658         ifac->ifa_listmask &= ~IFA_LIST_IFADDRHEAD;
2659
2660         crit_exit();
2661
2662         ifa_forwardmsg(&nmsg->lmsg, cpu + 1);
2663 }
2664
2665 void
2666 ifa_ifunlink(struct ifaddr *ifa, struct ifnet *ifp)
2667 {
2668         struct netmsg_ifaddr msg;
2669
2670         netmsg_init(&msg.base, NULL, &curthread->td_msgport,
2671                     0, ifa_ifunlink_dispatch);
2672         msg.ifa = ifa;
2673         msg.ifp = ifp;
2674
2675         ifa_domsg(&msg.base.lmsg, 0);
2676 }
2677
2678 static void
2679 ifa_destroy_dispatch(netmsg_t nmsg)
2680 {
2681         struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
2682
2683         IFAFREE(msg->ifa);
2684         ifa_forwardmsg(&nmsg->lmsg, mycpuid + 1);
2685 }
2686
2687 void
2688 ifa_destroy(struct ifaddr *ifa)
2689 {
2690         struct netmsg_ifaddr msg;
2691
2692         netmsg_init(&msg.base, NULL, &curthread->td_msgport,
2693                     0, ifa_destroy_dispatch);
2694         msg.ifa = ifa;
2695
2696         ifa_domsg(&msg.base.lmsg, 0);
2697 }
2698
2699 struct lwkt_port *
2700 ifnet_portfn(int cpu)
2701 {
2702         return &ifnet_threads[cpu].td_msgport;
2703 }
2704
2705 void
2706 ifnet_forwardmsg(struct lwkt_msg *lmsg, int next_cpu)
2707 {
2708         KKASSERT(next_cpu > mycpuid && next_cpu <= ncpus);
2709
2710         if (next_cpu < ncpus)
2711                 lwkt_forwardmsg(ifnet_portfn(next_cpu), lmsg);
2712         else
2713                 lwkt_replymsg(lmsg, 0);
2714 }
2715
2716 int
2717 ifnet_domsg(struct lwkt_msg *lmsg, int cpu)
2718 {
2719         KKASSERT(cpu < ncpus);
2720         return lwkt_domsg(ifnet_portfn(cpu), lmsg, 0);
2721 }
2722
2723 void
2724 ifnet_sendmsg(struct lwkt_msg *lmsg, int cpu)
2725 {
2726         KKASSERT(cpu < ncpus);
2727         lwkt_sendmsg(ifnet_portfn(cpu), lmsg);
2728 }
2729
2730 /*
2731  * Generic netmsg service loop.  Some protocols may roll their own but all
2732  * must do the basic command dispatch function call done here.
2733  */
2734 static void
2735 ifnet_service_loop(void *arg __unused)
2736 {
2737         netmsg_t msg;
2738
2739         while ((msg = lwkt_waitport(&curthread->td_msgport, 0))) {
2740                 KASSERT(msg->base.nm_dispatch, ("ifnet_service: badmsg"));
2741                 msg->base.nm_dispatch(msg);
2742         }
2743 }
2744
2745 static void
2746 ifnetinit(void *dummy __unused)
2747 {
2748         int i;
2749
2750         for (i = 0; i < ncpus; ++i) {
2751                 struct thread *thr = &ifnet_threads[i];
2752
2753                 lwkt_create(ifnet_service_loop, NULL, NULL,
2754                             thr, TDF_NOSTART|TDF_FORCE_SPINPORT,
2755                             i, "ifnet %d", i);
2756                 netmsg_service_port_init(&thr->td_msgport);
2757                 lwkt_schedule(thr);
2758         }
2759 }
2760
2761 struct ifnet *
2762 ifnet_byindex(unsigned short idx)
2763 {
2764         if (idx > if_index)
2765                 return NULL;
2766         return ifindex2ifnet[idx];
2767 }
2768
2769 struct ifaddr *
2770 ifaddr_byindex(unsigned short idx)
2771 {
2772         struct ifnet *ifp;
2773
2774         ifp = ifnet_byindex(idx);
2775         if (!ifp)
2776                 return NULL;
2777         return TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
2778 }
2779
2780 void
2781 if_register_com_alloc(u_char type,
2782     if_com_alloc_t *a, if_com_free_t *f)
2783 {
2784
2785         KASSERT(if_com_alloc[type] == NULL,
2786             ("if_register_com_alloc: %d already registered", type));
2787         KASSERT(if_com_free[type] == NULL,
2788             ("if_register_com_alloc: %d free already registered", type));
2789
2790         if_com_alloc[type] = a;
2791         if_com_free[type] = f;
2792 }
2793
2794 void
2795 if_deregister_com_alloc(u_char type)
2796 {
2797
2798         KASSERT(if_com_alloc[type] != NULL,
2799             ("if_deregister_com_alloc: %d not registered", type));
2800         KASSERT(if_com_free[type] != NULL,
2801             ("if_deregister_com_alloc: %d free not registered", type));
2802         if_com_alloc[type] = NULL;
2803         if_com_free[type] = NULL;
2804 }