Fix ifaddr_byindex().
[dragonfly.git] / sys / net / if.c
1 /*
2  * Copyright (c) 1980, 1986, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *      This product includes software developed by the University of
16  *      California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *      @(#)if.c        8.3 (Berkeley) 1/4/94
34  * $FreeBSD: src/sys/net/if.c,v 1.185 2004/03/13 02:35:03 brooks Exp $
35  * $DragonFly: src/sys/net/if.c,v 1.84 2008/11/15 11:58:16 sephe Exp $
36  */
37
38 #include "opt_compat.h"
39 #include "opt_inet6.h"
40 #include "opt_inet.h"
41 #include "opt_polling.h"
42 #include "opt_ifpoll.h"
43
44 #include <sys/param.h>
45 #include <sys/malloc.h>
46 #include <sys/mbuf.h>
47 #include <sys/systm.h>
48 #include <sys/proc.h>
49 #include <sys/priv.h>
50 #include <sys/protosw.h>
51 #include <sys/socket.h>
52 #include <sys/socketvar.h>
53 #include <sys/socketops.h>
54 #include <sys/protosw.h>
55 #include <sys/kernel.h>
56 #include <sys/ktr.h>
57 #include <sys/sockio.h>
58 #include <sys/syslog.h>
59 #include <sys/sysctl.h>
60 #include <sys/domain.h>
61 #include <sys/thread.h>
62 #include <sys/thread2.h>
63 #include <sys/serialize.h>
64 #include <sys/msgport2.h>
65 #include <sys/bus.h>
66
67 #include <net/if.h>
68 #include <net/if_arp.h>
69 #include <net/if_dl.h>
70 #include <net/if_types.h>
71 #include <net/if_var.h>
72 #include <net/ifq_var.h>
73 #include <net/radix.h>
74 #include <net/route.h>
75 #include <net/if_clone.h>
76 #include <net/netisr.h>
77 #include <net/netmsg2.h>
78
79 #include <machine/atomic.h>
80 #include <machine/stdarg.h>
81 #include <machine/smp.h>
82
83 #if defined(INET) || defined(INET6)
84 /*XXX*/
85 #include <netinet/in.h>
86 #include <netinet/in_var.h>
87 #include <netinet/if_ether.h>
88 #ifdef INET6
89 #include <netinet6/in6_var.h>
90 #include <netinet6/in6_ifattach.h>
91 #endif
92 #endif
93
94 #if defined(COMPAT_43)
95 #include <emulation/43bsd/43bsd_socket.h>
96 #endif /* COMPAT_43 */
97
98 struct netmsg_ifaddr {
99         struct netmsg   netmsg;
100         struct ifaddr   *ifa;
101         struct ifnet    *ifp;
102         int             tail;
103 };
104
105 /*
106  * System initialization
107  */
108 static void     if_attachdomain(void *);
109 static void     if_attachdomain1(struct ifnet *);
110 static int      ifconf(u_long, caddr_t, struct ucred *);
111 static void     ifinit(void *);
112 static void     ifnetinit(void *);
113 static void     if_slowtimo(void *);
114 static void     link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
115 static int      if_rtdel(struct radix_node *, void *);
116
117 #ifdef INET6
118 /*
119  * XXX: declare here to avoid to include many inet6 related files..
120  * should be more generalized?
121  */
122 extern void     nd6_setmtu(struct ifnet *);
123 #endif
124
125 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
126 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
127
128 SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, ifinit, NULL)
129 /* Must be after netisr_init */
130 SYSINIT(ifnet, SI_SUB_PRE_DRIVERS, SI_ORDER_SECOND, ifnetinit, NULL)
131
132 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
133 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
134 MALLOC_DEFINE(M_IFNET, "ifnet", "interface structure");
135
136 int                     ifqmaxlen = IFQ_MAXLEN;
137 struct ifnethead        ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
138
139 /* In ifq_dispatch(), try to do direct ifnet.if_start first */
140 static int              ifq_dispatch_schedonly = 0;
141 SYSCTL_INT(_net_link_generic, OID_AUTO, ifq_dispatch_schedonly, CTLFLAG_RW,
142            &ifq_dispatch_schedonly, 0, "");
143
144 /* In ifq_dispatch(), schedule ifnet.if_start without checking ifnet.if_snd */
145 static int              ifq_dispatch_schednochk = 0;
146 SYSCTL_INT(_net_link_generic, OID_AUTO, ifq_dispatch_schednochk, CTLFLAG_RW,
147            &ifq_dispatch_schednochk, 0, "");
148
149 /* In if_devstart(), try to do direct ifnet.if_start first */
150 static int              if_devstart_schedonly = 0;
151 SYSCTL_INT(_net_link_generic, OID_AUTO, if_devstart_schedonly, CTLFLAG_RW,
152            &if_devstart_schedonly, 0, "");
153
154 /* In if_devstart(), schedule ifnet.if_start without checking ifnet.if_snd */
155 static int              if_devstart_schednochk = 0;
156 SYSCTL_INT(_net_link_generic, OID_AUTO, if_devstart_schednochk, CTLFLAG_RW,
157            &if_devstart_schednochk, 0, "");
158
159 #ifdef SMP
160 /* Schedule ifnet.if_start on the current CPU */
161 static int              if_start_oncpu_sched = 0;
162 SYSCTL_INT(_net_link_generic, OID_AUTO, if_start_oncpu_sched, CTLFLAG_RW,
163            &if_start_oncpu_sched, 0, "");
164 #endif
165
166 struct callout          if_slowtimo_timer;
167
168 int                     if_index = 0;
169 struct ifnet            **ifindex2ifnet = NULL;
170 static struct thread    ifnet_threads[MAXCPU];
171 static int              ifnet_mpsafe_thread = NETMSG_SERVICE_MPSAFE;
172
173 #define IFQ_KTR_STRING          "ifq=%p"
174 #define IFQ_KTR_ARG_SIZE        (sizeof(void *))
175 #ifndef KTR_IFQ
176 #define KTR_IFQ                 KTR_ALL
177 #endif
178 KTR_INFO_MASTER(ifq);
179 KTR_INFO(KTR_IFQ, ifq, enqueue, 0, IFQ_KTR_STRING, IFQ_KTR_ARG_SIZE);
180 KTR_INFO(KTR_IFQ, ifq, dequeue, 1, IFQ_KTR_STRING, IFQ_KTR_ARG_SIZE);
181 #define logifq(name, arg)       KTR_LOG(ifq_ ## name, arg)
182
183 #define IF_START_KTR_STRING     "ifp=%p"
184 #define IF_START_KTR_ARG_SIZE   (sizeof(void *))
185 #ifndef KTR_IF_START
186 #define KTR_IF_START            KTR_ALL
187 #endif
188 KTR_INFO_MASTER(if_start);
189 KTR_INFO(KTR_IF_START, if_start, run, 0,
190          IF_START_KTR_STRING, IF_START_KTR_ARG_SIZE);
191 KTR_INFO(KTR_IF_START, if_start, sched, 1,
192          IF_START_KTR_STRING, IF_START_KTR_ARG_SIZE);
193 KTR_INFO(KTR_IF_START, if_start, avoid, 2,
194          IF_START_KTR_STRING, IF_START_KTR_ARG_SIZE);
195 KTR_INFO(KTR_IF_START, if_start, contend_sched, 3,
196          IF_START_KTR_STRING, IF_START_KTR_ARG_SIZE);
197 #ifdef SMP
198 KTR_INFO(KTR_IF_START, if_start, chase_sched, 4,
199          IF_START_KTR_STRING, IF_START_KTR_ARG_SIZE);
200 #endif
201 #define logifstart(name, arg)   KTR_LOG(if_start_ ## name, arg)
202
203 /*
204  * Network interface utility routines.
205  *
206  * Routines with ifa_ifwith* names take sockaddr *'s as
207  * parameters.
208  */
209 /* ARGSUSED*/
210 void
211 ifinit(void *dummy)
212 {
213         struct ifnet *ifp;
214
215         callout_init(&if_slowtimo_timer);
216
217         crit_enter();
218         TAILQ_FOREACH(ifp, &ifnet, if_link) {
219                 if (ifp->if_snd.ifq_maxlen == 0) {
220                         if_printf(ifp, "XXX: driver didn't set ifq_maxlen\n");
221                         ifp->if_snd.ifq_maxlen = ifqmaxlen;
222                 }
223         }
224         crit_exit();
225
226         if_slowtimo(0);
227 }
228
229 static int
230 if_start_cpuid(struct ifnet *ifp)
231 {
232         return ifp->if_cpuid;
233 }
234
235 #ifdef DEVICE_POLLING
236 static int
237 if_start_cpuid_poll(struct ifnet *ifp)
238 {
239         int poll_cpuid = ifp->if_poll_cpuid;
240
241         if (poll_cpuid >= 0)
242                 return poll_cpuid;
243         else
244                 return ifp->if_cpuid;
245 }
246 #endif
247
248 static void
249 if_start_ipifunc(void *arg)
250 {
251         struct ifnet *ifp = arg;
252         struct lwkt_msg *lmsg = &ifp->if_start_nmsg[mycpuid].nm_lmsg;
253
254         crit_enter();
255         if (lmsg->ms_flags & MSGF_DONE)
256                 lwkt_sendmsg(ifnet_portfn(mycpuid), lmsg);
257         crit_exit();
258 }
259
260 /*
261  * Schedule ifnet.if_start on ifnet's CPU
262  */
263 static void
264 if_start_schedule(struct ifnet *ifp)
265 {
266 #ifdef SMP
267         int cpu;
268
269         if (if_start_oncpu_sched)
270                 cpu = mycpuid;
271         else
272                 cpu = ifp->if_start_cpuid(ifp);
273
274         if (cpu != mycpuid)
275                 lwkt_send_ipiq(globaldata_find(cpu), if_start_ipifunc, ifp);
276         else
277 #endif
278         if_start_ipifunc(ifp);
279 }
280
281 /*
282  * NOTE:
283  * This function will release ifnet.if_start interlock,
284  * if ifnet.if_start does not need to be scheduled
285  */
286 static __inline int
287 if_start_need_schedule(struct ifaltq *ifq, int running)
288 {
289         if (!running || ifq_is_empty(ifq)
290 #ifdef ALTQ
291             || ifq->altq_tbr != NULL
292 #endif
293         ) {
294                 ALTQ_LOCK(ifq);
295                 /*
296                  * ifnet.if_start interlock is released, if:
297                  * 1) Hardware can not take any packets, due to
298                  *    o  interface is marked down
299                  *    o  hardware queue is full (IFF_OACTIVE)
300                  *    Under the second situation, hardware interrupt
301                  *    or polling(4) will call/schedule ifnet.if_start
302                  *    when hardware queue is ready
303                  * 2) There is not packet in the ifnet.if_snd.
304                  *    Further ifq_dispatch or ifq_handoff will call/
305                  *    schedule ifnet.if_start
306                  * 3) TBR is used and it does not allow further
307                  *    dequeueing.
308                  *    TBR callout will call ifnet.if_start
309                  */
310                 if (!running || !ifq_data_ready(ifq)) {
311                         ifq->altq_started = 0;
312                         ALTQ_UNLOCK(ifq);
313                         return 0;
314                 }
315                 ALTQ_UNLOCK(ifq);
316         }
317         return 1;
318 }
319
320 static void
321 if_start_dispatch(struct netmsg *nmsg)
322 {
323         struct lwkt_msg *lmsg = &nmsg->nm_lmsg;
324         struct ifnet *ifp = lmsg->u.ms_resultp;
325         struct ifaltq *ifq = &ifp->if_snd;
326         int running = 0;
327
328         crit_enter();
329         lwkt_replymsg(lmsg, 0); /* reply ASAP */
330         crit_exit();
331
332 #ifdef SMP
333         if (!if_start_oncpu_sched && mycpuid != ifp->if_start_cpuid(ifp)) {
334                 /*
335                  * If the ifnet is still up, we need to
336                  * chase its CPU change.
337                  */
338                 if (ifp->if_flags & IFF_UP) {
339                         logifstart(chase_sched, ifp);
340                         if_start_schedule(ifp);
341                         return;
342                 } else {
343                         goto check;
344                 }
345         }
346 #endif
347
348         if (ifp->if_flags & IFF_UP) {
349                 ifnet_serialize_tx(ifp); /* XXX try? */
350                 if ((ifp->if_flags & IFF_OACTIVE) == 0) {
351                         logifstart(run, ifp);
352                         ifp->if_start(ifp);
353                         if ((ifp->if_flags &
354                         (IFF_OACTIVE | IFF_RUNNING)) == IFF_RUNNING)
355                                 running = 1;
356                 }
357                 ifnet_deserialize_tx(ifp);
358         }
359 #ifdef SMP
360 check:
361 #endif
362         if (if_start_need_schedule(ifq, running)) {
363                 crit_enter();
364                 if (lmsg->ms_flags & MSGF_DONE) { /* XXX necessary? */
365                         logifstart(sched, ifp);
366                         lwkt_sendmsg(ifnet_portfn(mycpuid), lmsg);
367                 }
368                 crit_exit();
369         }
370 }
371
372 /* Device driver ifnet.if_start helper function */
373 void
374 if_devstart(struct ifnet *ifp)
375 {
376         struct ifaltq *ifq = &ifp->if_snd;
377         int running = 0;
378
379         ASSERT_IFNET_SERIALIZED_TX(ifp);
380
381         ALTQ_LOCK(ifq);
382         if (ifq->altq_started || !ifq_data_ready(ifq)) {
383                 logifstart(avoid, ifp);
384                 ALTQ_UNLOCK(ifq);
385                 return;
386         }
387         ifq->altq_started = 1;
388         ALTQ_UNLOCK(ifq);
389
390         if (if_devstart_schedonly) {
391                 /*
392                  * Always schedule ifnet.if_start on ifnet's CPU,
393                  * short circuit the rest of this function.
394                  */
395                 logifstart(sched, ifp);
396                 if_start_schedule(ifp);
397                 return;
398         }
399
400         logifstart(run, ifp);
401         ifp->if_start(ifp);
402
403         if ((ifp->if_flags & (IFF_OACTIVE | IFF_RUNNING)) == IFF_RUNNING)
404                 running = 1;
405
406         if (if_devstart_schednochk || if_start_need_schedule(ifq, running)) {
407                 /*
408                  * More data need to be transmitted, ifnet.if_start is
409                  * scheduled on ifnet's CPU, and we keep going.
410                  * NOTE: ifnet.if_start interlock is not released.
411                  */
412                 logifstart(sched, ifp);
413                 if_start_schedule(ifp);
414         }
415 }
416
417 static void
418 if_default_serialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
419 {
420         lwkt_serialize_enter(ifp->if_serializer);
421 }
422
423 static void
424 if_default_deserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
425 {
426         lwkt_serialize_exit(ifp->if_serializer);
427 }
428
429 static int
430 if_default_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
431 {
432         return lwkt_serialize_try(ifp->if_serializer);
433 }
434
435 #ifdef INVARIANTS
436 static void
437 if_default_serialize_assert(struct ifnet *ifp,
438                             enum ifnet_serialize slz __unused,
439                             boolean_t serialized)
440 {
441         if (serialized)
442                 ASSERT_SERIALIZED(ifp->if_serializer);
443         else
444                 ASSERT_NOT_SERIALIZED(ifp->if_serializer);
445 }
446 #endif
447
448 /*
449  * Attach an interface to the list of "active" interfaces.
450  *
451  * The serializer is optional.  If non-NULL access to the interface
452  * may be MPSAFE.
453  */
454 void
455 if_attach(struct ifnet *ifp, lwkt_serialize_t serializer)
456 {
457         unsigned socksize, ifasize;
458         int namelen, masklen;
459         struct sockaddr_dl *sdl;
460         struct ifaddr *ifa;
461         struct ifaltq *ifq;
462         int i;
463
464         static int if_indexlim = 8;
465
466         if (ifp->if_serialize != NULL) {
467                 KASSERT(ifp->if_deserialize != NULL &&
468                         ifp->if_tryserialize != NULL &&
469                         ifp->if_serialize_assert != NULL,
470                         ("serialize functions are partially setup\n"));
471
472                 /*
473                  * If the device supplies serialize functions,
474                  * then clear if_serializer to catch any invalid
475                  * usage of this field.
476                  */
477                 KASSERT(serializer == NULL,
478                         ("both serialize functions and default serializer "
479                          "are supplied\n"));
480                 ifp->if_serializer = NULL;
481         } else {
482                 KASSERT(ifp->if_deserialize == NULL &&
483                         ifp->if_tryserialize == NULL &&
484                         ifp->if_serialize_assert == NULL,
485                         ("serialize functions are partially setup\n"));
486                 ifp->if_serialize = if_default_serialize;
487                 ifp->if_deserialize = if_default_deserialize;
488                 ifp->if_tryserialize = if_default_tryserialize;
489 #ifdef INVARIANTS
490                 ifp->if_serialize_assert = if_default_serialize_assert;
491 #endif
492
493                 /*
494                  * The serializer can be passed in from the device,
495                  * allowing the same serializer to be used for both
496                  * the interrupt interlock and the device queue.
497                  * If not specified, the netif structure will use an
498                  * embedded serializer.
499                  */
500                 if (serializer == NULL) {
501                         serializer = &ifp->if_default_serializer;
502                         lwkt_serialize_init(serializer);
503                 }
504                 ifp->if_serializer = serializer;
505         }
506
507         ifp->if_start_cpuid = if_start_cpuid;
508         ifp->if_cpuid = 0;
509
510 #ifdef DEVICE_POLLING
511         /* Device is not in polling mode by default */
512         ifp->if_poll_cpuid = -1;
513         if (ifp->if_poll != NULL)
514                 ifp->if_start_cpuid = if_start_cpuid_poll;
515 #endif
516
517         ifp->if_start_nmsg = kmalloc(ncpus * sizeof(struct netmsg),
518                                      M_LWKTMSG, M_WAITOK);
519         for (i = 0; i < ncpus; ++i) {
520                 netmsg_init(&ifp->if_start_nmsg[i], NULL, &netisr_adone_rport,
521                             0, if_start_dispatch);
522                 ifp->if_start_nmsg[i].nm_lmsg.u.ms_resultp = ifp;
523         }
524
525         TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
526         ifp->if_index = ++if_index;
527
528         /*
529          * XXX -
530          * The old code would work if the interface passed a pre-existing
531          * chain of ifaddrs to this code.  We don't trust our callers to
532          * properly initialize the tailq, however, so we no longer allow
533          * this unlikely case.
534          */
535         ifp->if_addrheads = kmalloc(ncpus * sizeof(struct ifaddrhead),
536                                     M_IFADDR, M_WAITOK | M_ZERO);
537         for (i = 0; i < ncpus; ++i)
538                 TAILQ_INIT(&ifp->if_addrheads[i]);
539
540         TAILQ_INIT(&ifp->if_prefixhead);
541         LIST_INIT(&ifp->if_multiaddrs);
542         getmicrotime(&ifp->if_lastchange);
543         if (ifindex2ifnet == NULL || if_index >= if_indexlim) {
544                 unsigned int n;
545                 struct ifnet **q;
546
547                 if_indexlim <<= 1;
548
549                 /* grow ifindex2ifnet */
550                 n = if_indexlim * sizeof(*q);
551                 q = kmalloc(n, M_IFADDR, M_WAITOK | M_ZERO);
552                 if (ifindex2ifnet) {
553                         bcopy(ifindex2ifnet, q, n/2);
554                         kfree(ifindex2ifnet, M_IFADDR);
555                 }
556                 ifindex2ifnet = q;
557         }
558
559         ifindex2ifnet[if_index] = ifp;
560
561         /*
562          * create a Link Level name for this device
563          */
564         namelen = strlen(ifp->if_xname);
565 #define _offsetof(t, m) ((int)((caddr_t)&((t *)0)->m))
566         masklen = _offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
567         socksize = masklen + ifp->if_addrlen;
568 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
569         if (socksize < sizeof(*sdl))
570                 socksize = sizeof(*sdl);
571         socksize = ROUNDUP(socksize);
572         ifasize = sizeof(struct ifaddr) + 2 * socksize;
573         ifa = ifa_create(ifasize, M_WAITOK);
574         sdl = (struct sockaddr_dl *)(ifa + 1);
575         sdl->sdl_len = socksize;
576         sdl->sdl_family = AF_LINK;
577         bcopy(ifp->if_xname, sdl->sdl_data, namelen);
578         sdl->sdl_nlen = namelen;
579         sdl->sdl_index = ifp->if_index;
580         sdl->sdl_type = ifp->if_type;
581         ifp->if_lladdr = ifa;
582         ifa->ifa_ifp = ifp;
583         ifa->ifa_rtrequest = link_rtrequest;
584         ifa->ifa_addr = (struct sockaddr *)sdl;
585         sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
586         ifa->ifa_netmask = (struct sockaddr *)sdl;
587         sdl->sdl_len = masklen;
588         while (namelen != 0)
589                 sdl->sdl_data[--namelen] = 0xff;
590         ifa_iflink(ifa, ifp, 0 /* Insert head */);
591
592         EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
593         devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
594
595         ifq = &ifp->if_snd;
596         ifq->altq_type = 0;
597         ifq->altq_disc = NULL;
598         ifq->altq_flags &= ALTQF_CANTCHANGE;
599         ifq->altq_tbr = NULL;
600         ifq->altq_ifp = ifp;
601         ifq->altq_started = 0;
602         ifq->altq_prepended = NULL;
603         ALTQ_LOCK_INIT(ifq);
604         ifq_set_classic(ifq);
605
606         if (!SLIST_EMPTY(&domains))
607                 if_attachdomain1(ifp);
608
609         /* Announce the interface. */
610         rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
611 }
612
613 static void
614 if_attachdomain(void *dummy)
615 {
616         struct ifnet *ifp;
617
618         crit_enter();
619         TAILQ_FOREACH(ifp, &ifnet, if_list)
620                 if_attachdomain1(ifp);
621         crit_exit();
622 }
623 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
624         if_attachdomain, NULL);
625
626 static void
627 if_attachdomain1(struct ifnet *ifp)
628 {
629         struct domain *dp;
630
631         crit_enter();
632
633         /* address family dependent data region */
634         bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
635         SLIST_FOREACH(dp, &domains, dom_next)
636                 if (dp->dom_ifattach)
637                         ifp->if_afdata[dp->dom_family] =
638                                 (*dp->dom_ifattach)(ifp);
639         crit_exit();
640 }
641
642 /*
643  * Purge all addresses whose type is _not_ AF_LINK
644  */
645 void
646 if_purgeaddrs_nolink(struct ifnet *ifp)
647 {
648         struct ifaddr_container *ifac, *next;
649
650         TAILQ_FOREACH_MUTABLE(ifac, &ifp->if_addrheads[mycpuid],
651                               ifa_link, next) {
652                 struct ifaddr *ifa = ifac->ifa;
653
654                 /* Leave link ifaddr as it is */
655                 if (ifa->ifa_addr->sa_family == AF_LINK)
656                         continue;
657 #ifdef INET
658                 /* XXX: Ugly!! ad hoc just for INET */
659                 if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
660                         struct ifaliasreq ifr;
661 #ifdef IFADDR_DEBUG_VERBOSE
662                         int i;
663
664                         kprintf("purge in4 addr %p: ", ifa);
665                         for (i = 0; i < ncpus; ++i)
666                                 kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt);
667                         kprintf("\n");
668 #endif
669
670                         bzero(&ifr, sizeof ifr);
671                         ifr.ifra_addr = *ifa->ifa_addr;
672                         if (ifa->ifa_dstaddr)
673                                 ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
674                         if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
675                                        NULL) == 0)
676                                 continue;
677                 }
678 #endif /* INET */
679 #ifdef INET6
680                 if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) {
681 #ifdef IFADDR_DEBUG_VERBOSE
682                         int i;
683
684                         kprintf("purge in6 addr %p: ", ifa);
685                         for (i = 0; i < ncpus; ++i)
686                                 kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt);
687                         kprintf("\n");
688 #endif
689
690                         in6_purgeaddr(ifa);
691                         /* ifp_addrhead is already updated */
692                         continue;
693                 }
694 #endif /* INET6 */
695                 ifa_ifunlink(ifa, ifp);
696                 ifa_destroy(ifa);
697         }
698 }
699
700 /*
701  * Detach an interface, removing it from the
702  * list of "active" interfaces.
703  */
704 void
705 if_detach(struct ifnet *ifp)
706 {
707         struct radix_node_head  *rnh;
708         int i;
709         int cpu, origcpu;
710         struct domain *dp;
711
712         EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
713
714         /*
715          * Remove routes and flush queues.
716          */
717         crit_enter();
718 #ifdef DEVICE_POLLING
719         if (ifp->if_flags & IFF_POLLING)
720                 ether_poll_deregister(ifp);
721 #endif
722 #ifdef IFPOLL_ENABLE
723         if (ifp->if_flags & IFF_NPOLLING)
724                 ifpoll_deregister(ifp);
725 #endif
726         if_down(ifp);
727
728         if (ifq_is_enabled(&ifp->if_snd))
729                 altq_disable(&ifp->if_snd);
730         if (ifq_is_attached(&ifp->if_snd))
731                 altq_detach(&ifp->if_snd);
732
733         /*
734          * Clean up all addresses.
735          */
736         ifp->if_lladdr = NULL;
737
738         if_purgeaddrs_nolink(ifp);
739         if (!TAILQ_EMPTY(&ifp->if_addrheads[mycpuid])) {
740                 struct ifaddr *ifa;
741
742                 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
743                 KASSERT(ifa->ifa_addr->sa_family == AF_LINK,
744                         ("non-link ifaddr is left on if_addrheads"));
745
746                 ifa_ifunlink(ifa, ifp);
747                 ifa_destroy(ifa);
748                 KASSERT(TAILQ_EMPTY(&ifp->if_addrheads[mycpuid]),
749                         ("there are still ifaddrs left on if_addrheads"));
750         }
751
752 #ifdef INET
753         /*
754          * Remove all IPv4 kernel structures related to ifp.
755          */
756         in_ifdetach(ifp);
757 #endif
758
759 #ifdef INET6
760         /*
761          * Remove all IPv6 kernel structs related to ifp.  This should be done
762          * before removing routing entries below, since IPv6 interface direct
763          * routes are expected to be removed by the IPv6-specific kernel API.
764          * Otherwise, the kernel will detect some inconsistency and bark it.
765          */
766         in6_ifdetach(ifp);
767 #endif
768
769         /*
770          * Delete all remaining routes using this interface
771          * Unfortuneatly the only way to do this is to slog through
772          * the entire routing table looking for routes which point
773          * to this interface...oh well...
774          */
775         origcpu = mycpuid;
776         for (cpu = 0; cpu < ncpus2; cpu++) {
777                 lwkt_migratecpu(cpu);
778                 for (i = 1; i <= AF_MAX; i++) {
779                         if ((rnh = rt_tables[cpu][i]) == NULL)
780                                 continue;
781                         rnh->rnh_walktree(rnh, if_rtdel, ifp);
782                 }
783         }
784         lwkt_migratecpu(origcpu);
785
786         /* Announce that the interface is gone. */
787         rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
788         devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
789
790         SLIST_FOREACH(dp, &domains, dom_next)
791                 if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
792                         (*dp->dom_ifdetach)(ifp,
793                                 ifp->if_afdata[dp->dom_family]);
794
795         /*
796          * Remove interface from ifindex2ifp[] and maybe decrement if_index.
797          */
798         ifindex2ifnet[ifp->if_index] = NULL;
799         while (if_index > 0 && ifindex2ifnet[if_index] == NULL)
800                 if_index--;
801
802         TAILQ_REMOVE(&ifnet, ifp, if_link);
803         kfree(ifp->if_addrheads, M_IFADDR);
804         kfree(ifp->if_start_nmsg, M_LWKTMSG);
805         crit_exit();
806 }
807
808 /*
809  * Delete Routes for a Network Interface
810  *
811  * Called for each routing entry via the rnh->rnh_walktree() call above
812  * to delete all route entries referencing a detaching network interface.
813  *
814  * Arguments:
815  *      rn      pointer to node in the routing table
816  *      arg     argument passed to rnh->rnh_walktree() - detaching interface
817  *
818  * Returns:
819  *      0       successful
820  *      errno   failed - reason indicated
821  *
822  */
823 static int
824 if_rtdel(struct radix_node *rn, void *arg)
825 {
826         struct rtentry  *rt = (struct rtentry *)rn;
827         struct ifnet    *ifp = arg;
828         int             err;
829
830         if (rt->rt_ifp == ifp) {
831
832                 /*
833                  * Protect (sorta) against walktree recursion problems
834                  * with cloned routes
835                  */
836                 if (!(rt->rt_flags & RTF_UP))
837                         return (0);
838
839                 err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
840                                 rt_mask(rt), rt->rt_flags,
841                                 NULL);
842                 if (err) {
843                         log(LOG_WARNING, "if_rtdel: error %d\n", err);
844                 }
845         }
846
847         return (0);
848 }
849
850 /*
851  * Locate an interface based on a complete address.
852  */
853 struct ifaddr *
854 ifa_ifwithaddr(struct sockaddr *addr)
855 {
856         struct ifnet *ifp;
857
858         TAILQ_FOREACH(ifp, &ifnet, if_link) {
859                 struct ifaddr_container *ifac;
860
861                 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
862                         struct ifaddr *ifa = ifac->ifa;
863
864                         if (ifa->ifa_addr->sa_family != addr->sa_family)
865                                 continue;
866                         if (sa_equal(addr, ifa->ifa_addr))
867                                 return (ifa);
868                         if ((ifp->if_flags & IFF_BROADCAST) &&
869                             ifa->ifa_broadaddr &&
870                             /* IPv6 doesn't have broadcast */
871                             ifa->ifa_broadaddr->sa_len != 0 &&
872                             sa_equal(ifa->ifa_broadaddr, addr))
873                                 return (ifa);
874                 }
875         }
876         return (NULL);
877 }
878 /*
879  * Locate the point to point interface with a given destination address.
880  */
881 struct ifaddr *
882 ifa_ifwithdstaddr(struct sockaddr *addr)
883 {
884         struct ifnet *ifp;
885
886         TAILQ_FOREACH(ifp, &ifnet, if_link) {
887                 struct ifaddr_container *ifac;
888
889                 if (!(ifp->if_flags & IFF_POINTOPOINT))
890                         continue;
891
892                 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
893                         struct ifaddr *ifa = ifac->ifa;
894
895                         if (ifa->ifa_addr->sa_family != addr->sa_family)
896                                 continue;
897                         if (ifa->ifa_dstaddr &&
898                             sa_equal(addr, ifa->ifa_dstaddr))
899                                 return (ifa);
900                 }
901         }
902         return (NULL);
903 }
904
905 /*
906  * Find an interface on a specific network.  If many, choice
907  * is most specific found.
908  */
909 struct ifaddr *
910 ifa_ifwithnet(struct sockaddr *addr)
911 {
912         struct ifnet *ifp;
913         struct ifaddr *ifa_maybe = NULL;
914         u_int af = addr->sa_family;
915         char *addr_data = addr->sa_data, *cplim;
916
917         /*
918          * AF_LINK addresses can be looked up directly by their index number,
919          * so do that if we can.
920          */
921         if (af == AF_LINK) {
922                 struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
923
924                 if (sdl->sdl_index && sdl->sdl_index <= if_index)
925                         return (ifindex2ifnet[sdl->sdl_index]->if_lladdr);
926         }
927
928         /*
929          * Scan though each interface, looking for ones that have
930          * addresses in this address family.
931          */
932         TAILQ_FOREACH(ifp, &ifnet, if_link) {
933                 struct ifaddr_container *ifac;
934
935                 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
936                         struct ifaddr *ifa = ifac->ifa;
937                         char *cp, *cp2, *cp3;
938
939                         if (ifa->ifa_addr->sa_family != af)
940 next:                           continue;
941                         if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
942                                 /*
943                                  * This is a bit broken as it doesn't
944                                  * take into account that the remote end may
945                                  * be a single node in the network we are
946                                  * looking for.
947                                  * The trouble is that we don't know the
948                                  * netmask for the remote end.
949                                  */
950                                 if (ifa->ifa_dstaddr != NULL &&
951                                     sa_equal(addr, ifa->ifa_dstaddr))
952                                         return (ifa);
953                         } else {
954                                 /*
955                                  * if we have a special address handler,
956                                  * then use it instead of the generic one.
957                                  */
958                                 if (ifa->ifa_claim_addr) {
959                                         if ((*ifa->ifa_claim_addr)(ifa, addr)) {
960                                                 return (ifa);
961                                         } else {
962                                                 continue;
963                                         }
964                                 }
965
966                                 /*
967                                  * Scan all the bits in the ifa's address.
968                                  * If a bit dissagrees with what we are
969                                  * looking for, mask it with the netmask
970                                  * to see if it really matters.
971                                  * (A byte at a time)
972                                  */
973                                 if (ifa->ifa_netmask == 0)
974                                         continue;
975                                 cp = addr_data;
976                                 cp2 = ifa->ifa_addr->sa_data;
977                                 cp3 = ifa->ifa_netmask->sa_data;
978                                 cplim = ifa->ifa_netmask->sa_len +
979                                         (char *)ifa->ifa_netmask;
980                                 while (cp3 < cplim)
981                                         if ((*cp++ ^ *cp2++) & *cp3++)
982                                                 goto next; /* next address! */
983                                 /*
984                                  * If the netmask of what we just found
985                                  * is more specific than what we had before
986                                  * (if we had one) then remember the new one
987                                  * before continuing to search
988                                  * for an even better one.
989                                  */
990                                 if (ifa_maybe == 0 ||
991                                     rn_refines((char *)ifa->ifa_netmask,
992                                                (char *)ifa_maybe->ifa_netmask))
993                                         ifa_maybe = ifa;
994                         }
995                 }
996         }
997         return (ifa_maybe);
998 }
999
1000 /*
1001  * Find an interface address specific to an interface best matching
1002  * a given address.
1003  */
1004 struct ifaddr *
1005 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1006 {
1007         struct ifaddr_container *ifac;
1008         char *cp, *cp2, *cp3;
1009         char *cplim;
1010         struct ifaddr *ifa_maybe = 0;
1011         u_int af = addr->sa_family;
1012
1013         if (af >= AF_MAX)
1014                 return (0);
1015         TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1016                 struct ifaddr *ifa = ifac->ifa;
1017
1018                 if (ifa->ifa_addr->sa_family != af)
1019                         continue;
1020                 if (ifa_maybe == 0)
1021                         ifa_maybe = ifa;
1022                 if (ifa->ifa_netmask == NULL) {
1023                         if (sa_equal(addr, ifa->ifa_addr) ||
1024                             (ifa->ifa_dstaddr != NULL &&
1025                              sa_equal(addr, ifa->ifa_dstaddr)))
1026                                 return (ifa);
1027                         continue;
1028                 }
1029                 if (ifp->if_flags & IFF_POINTOPOINT) {
1030                         if (sa_equal(addr, ifa->ifa_dstaddr))
1031                                 return (ifa);
1032                 } else {
1033                         cp = addr->sa_data;
1034                         cp2 = ifa->ifa_addr->sa_data;
1035                         cp3 = ifa->ifa_netmask->sa_data;
1036                         cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1037                         for (; cp3 < cplim; cp3++)
1038                                 if ((*cp++ ^ *cp2++) & *cp3)
1039                                         break;
1040                         if (cp3 == cplim)
1041                                 return (ifa);
1042                 }
1043         }
1044         return (ifa_maybe);
1045 }
1046
1047 /*
1048  * Default action when installing a route with a Link Level gateway.
1049  * Lookup an appropriate real ifa to point to.
1050  * This should be moved to /sys/net/link.c eventually.
1051  */
1052 static void
1053 link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
1054 {
1055         struct ifaddr *ifa;
1056         struct sockaddr *dst;
1057         struct ifnet *ifp;
1058
1059         if (cmd != RTM_ADD || (ifa = rt->rt_ifa) == NULL ||
1060             (ifp = ifa->ifa_ifp) == NULL || (dst = rt_key(rt)) == NULL)
1061                 return;
1062         ifa = ifaof_ifpforaddr(dst, ifp);
1063         if (ifa != NULL) {
1064                 IFAFREE(rt->rt_ifa);
1065                 IFAREF(ifa);
1066                 rt->rt_ifa = ifa;
1067                 if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
1068                         ifa->ifa_rtrequest(cmd, rt, info);
1069         }
1070 }
1071
1072 /*
1073  * Mark an interface down and notify protocols of
1074  * the transition.
1075  * NOTE: must be called at splnet or eqivalent.
1076  */
1077 void
1078 if_unroute(struct ifnet *ifp, int flag, int fam)
1079 {
1080         struct ifaddr_container *ifac;
1081
1082         ifp->if_flags &= ~flag;
1083         getmicrotime(&ifp->if_lastchange);
1084         TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1085                 struct ifaddr *ifa = ifac->ifa;
1086
1087                 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1088                         kpfctlinput(PRC_IFDOWN, ifa->ifa_addr);
1089         }
1090         ifq_purge(&ifp->if_snd);
1091         rt_ifmsg(ifp);
1092 }
1093
1094 /*
1095  * Mark an interface up and notify protocols of
1096  * the transition.
1097  * NOTE: must be called at splnet or eqivalent.
1098  */
1099 void
1100 if_route(struct ifnet *ifp, int flag, int fam)
1101 {
1102         struct ifaddr_container *ifac;
1103
1104         ifq_purge(&ifp->if_snd);
1105         ifp->if_flags |= flag;
1106         getmicrotime(&ifp->if_lastchange);
1107         TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1108                 struct ifaddr *ifa = ifac->ifa;
1109
1110                 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1111                         kpfctlinput(PRC_IFUP, ifa->ifa_addr);
1112         }
1113         rt_ifmsg(ifp);
1114 #ifdef INET6
1115         in6_if_up(ifp);
1116 #endif
1117 }
1118
1119 /*
1120  * Mark an interface down and notify protocols of the transition.  An
1121  * interface going down is also considered to be a synchronizing event.
1122  * We must ensure that all packet processing related to the interface
1123  * has completed before we return so e.g. the caller can free the ifnet
1124  * structure that the mbufs may be referencing.
1125  *
1126  * NOTE: must be called at splnet or eqivalent.
1127  */
1128 void
1129 if_down(struct ifnet *ifp)
1130 {
1131         if_unroute(ifp, IFF_UP, AF_UNSPEC);
1132         netmsg_service_sync();
1133 }
1134
1135 /*
1136  * Mark an interface up and notify protocols of
1137  * the transition.
1138  * NOTE: must be called at splnet or eqivalent.
1139  */
1140 void
1141 if_up(struct ifnet *ifp)
1142 {
1143         if_route(ifp, IFF_UP, AF_UNSPEC);
1144 }
1145
1146 /*
1147  * Process a link state change.
1148  * NOTE: must be called at splsoftnet or equivalent.
1149  */
1150 void
1151 if_link_state_change(struct ifnet *ifp)
1152 {
1153         int link_state = ifp->if_link_state;
1154
1155         rt_ifmsg(ifp);
1156         devctl_notify("IFNET", ifp->if_xname,
1157             (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL);
1158 }
1159
1160 /*
1161  * Handle interface watchdog timer routines.  Called
1162  * from softclock, we decrement timers (if set) and
1163  * call the appropriate interface routine on expiration.
1164  */
1165 static void
1166 if_slowtimo(void *arg)
1167 {
1168         struct ifnet *ifp;
1169
1170         crit_enter();
1171
1172         TAILQ_FOREACH(ifp, &ifnet, if_link) {
1173                 if (ifp->if_timer == 0 || --ifp->if_timer)
1174                         continue;
1175                 if (ifp->if_watchdog) {
1176                         if (ifnet_tryserialize_all(ifp)) {
1177                                 (*ifp->if_watchdog)(ifp);
1178                                 ifnet_deserialize_all(ifp);
1179                         } else {
1180                                 /* try again next timeout */
1181                                 ++ifp->if_timer;
1182                         }
1183                 }
1184         }
1185
1186         crit_exit();
1187
1188         callout_reset(&if_slowtimo_timer, hz / IFNET_SLOWHZ, if_slowtimo, NULL);
1189 }
1190
1191 /*
1192  * Map interface name to
1193  * interface structure pointer.
1194  */
1195 struct ifnet *
1196 ifunit(const char *name)
1197 {
1198         struct ifnet *ifp;
1199
1200         /*
1201          * Search all the interfaces for this name/number
1202          */
1203
1204         TAILQ_FOREACH(ifp, &ifnet, if_link) {
1205                 if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0)
1206                         break;
1207         }
1208         return (ifp);
1209 }
1210
1211
1212 /*
1213  * Map interface name in a sockaddr_dl to
1214  * interface structure pointer.
1215  */
1216 struct ifnet *
1217 if_withname(struct sockaddr *sa)
1218 {
1219         char ifname[IFNAMSIZ+1];
1220         struct sockaddr_dl *sdl = (struct sockaddr_dl *)sa;
1221
1222         if ( (sa->sa_family != AF_LINK) || (sdl->sdl_nlen == 0) ||
1223              (sdl->sdl_nlen > IFNAMSIZ) )
1224                 return NULL;
1225
1226         /*
1227          * ifunit wants a null-terminated name.  It may not be null-terminated
1228          * in the sockaddr.  We don't want to change the caller's sockaddr,
1229          * and there might not be room to put the trailing null anyway, so we
1230          * make a local copy that we know we can null terminate safely.
1231          */
1232
1233         bcopy(sdl->sdl_data, ifname, sdl->sdl_nlen);
1234         ifname[sdl->sdl_nlen] = '\0';
1235         return ifunit(ifname);
1236 }
1237
1238
1239 /*
1240  * Interface ioctls.
1241  */
1242 int
1243 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct ucred *cred)
1244 {
1245         struct ifnet *ifp;
1246         struct ifreq *ifr;
1247         struct ifstat *ifs;
1248         int error;
1249         short oif_flags;
1250         int new_flags;
1251         size_t namelen, onamelen;
1252         char new_name[IFNAMSIZ];
1253         struct ifaddr *ifa;
1254         struct sockaddr_dl *sdl;
1255
1256         switch (cmd) {
1257
1258         case SIOCGIFCONF:
1259         case OSIOCGIFCONF:
1260                 return (ifconf(cmd, data, cred));
1261         }
1262         ifr = (struct ifreq *)data;
1263
1264         switch (cmd) {
1265         case SIOCIFCREATE:
1266         case SIOCIFDESTROY:
1267                 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0)
1268                         return (error);
1269                 return ((cmd == SIOCIFCREATE) ?
1270                         if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name)) :
1271                         if_clone_destroy(ifr->ifr_name));
1272
1273         case SIOCIFGCLONERS:
1274                 return (if_clone_list((struct if_clonereq *)data));
1275         }
1276
1277         ifp = ifunit(ifr->ifr_name);
1278         if (ifp == 0)
1279                 return (ENXIO);
1280         switch (cmd) {
1281
1282         case SIOCGIFINDEX:
1283                 ifr->ifr_index = ifp->if_index;
1284                 break;
1285
1286         case SIOCGIFFLAGS:
1287                 ifr->ifr_flags = ifp->if_flags;
1288                 ifr->ifr_flagshigh = ifp->if_flags >> 16;
1289                 break;
1290
1291         case SIOCGIFCAP:
1292                 ifr->ifr_reqcap = ifp->if_capabilities;
1293                 ifr->ifr_curcap = ifp->if_capenable;
1294                 break;
1295
1296         case SIOCGIFMETRIC:
1297                 ifr->ifr_metric = ifp->if_metric;
1298                 break;
1299
1300         case SIOCGIFMTU:
1301                 ifr->ifr_mtu = ifp->if_mtu;
1302                 break;
1303
1304         case SIOCGIFPHYS:
1305                 ifr->ifr_phys = ifp->if_physical;
1306                 break;
1307
1308         case SIOCGIFPOLLCPU:
1309 #ifdef DEVICE_POLLING
1310                 ifr->ifr_pollcpu = ifp->if_poll_cpuid;
1311 #else
1312                 ifr->ifr_pollcpu = -1;
1313 #endif
1314                 break;
1315
1316         case SIOCSIFPOLLCPU:
1317 #ifdef DEVICE_POLLING
1318                 if ((ifp->if_flags & IFF_POLLING) == 0)
1319                         ether_pollcpu_register(ifp, ifr->ifr_pollcpu);
1320 #endif
1321                 break;
1322
1323         case SIOCSIFFLAGS:
1324                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1325                 if (error)
1326                         return (error);
1327                 new_flags = (ifr->ifr_flags & 0xffff) |
1328                     (ifr->ifr_flagshigh << 16);
1329                 if (ifp->if_flags & IFF_SMART) {
1330                         /* Smart drivers twiddle their own routes */
1331                 } else if (ifp->if_flags & IFF_UP &&
1332                     (new_flags & IFF_UP) == 0) {
1333                         crit_enter();
1334                         if_down(ifp);
1335                         crit_exit();
1336                 } else if (new_flags & IFF_UP &&
1337                     (ifp->if_flags & IFF_UP) == 0) {
1338                         crit_enter();
1339                         if_up(ifp);
1340                         crit_exit();
1341                 }
1342
1343 #ifdef DEVICE_POLLING
1344                 if ((new_flags ^ ifp->if_flags) & IFF_POLLING) {
1345                         if (new_flags & IFF_POLLING) {
1346                                 ether_poll_register(ifp);
1347                         } else {
1348                                 ether_poll_deregister(ifp);
1349                         }
1350                 }
1351 #endif
1352 #ifdef IFPOLL_ENABLE
1353                 if ((new_flags ^ ifp->if_flags) & IFF_NPOLLING) {
1354                         if (new_flags & IFF_NPOLLING)
1355                                 ifpoll_register(ifp);
1356                         else
1357                                 ifpoll_deregister(ifp);
1358                 }
1359 #endif
1360
1361                 ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1362                         (new_flags &~ IFF_CANTCHANGE);
1363                 if (new_flags & IFF_PPROMISC) {
1364                         /* Permanently promiscuous mode requested */
1365                         ifp->if_flags |= IFF_PROMISC;
1366                 } else if (ifp->if_pcount == 0) {
1367                         ifp->if_flags &= ~IFF_PROMISC;
1368                 }
1369                 if (ifp->if_ioctl) {
1370                         ifnet_serialize_all(ifp);
1371                         ifp->if_ioctl(ifp, cmd, data, cred);
1372                         ifnet_deserialize_all(ifp);
1373                 }
1374                 getmicrotime(&ifp->if_lastchange);
1375                 break;
1376
1377         case SIOCSIFCAP:
1378                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1379                 if (error)
1380                         return (error);
1381                 if (ifr->ifr_reqcap & ~ifp->if_capabilities)
1382                         return (EINVAL);
1383                 ifnet_serialize_all(ifp);
1384                 ifp->if_ioctl(ifp, cmd, data, cred);
1385                 ifnet_deserialize_all(ifp);
1386                 break;
1387
1388         case SIOCSIFNAME:
1389                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1390                 if (error != 0)
1391                         return (error);
1392                 error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
1393                 if (error != 0)
1394                         return (error);
1395                 if (new_name[0] == '\0')
1396                         return (EINVAL);
1397                 if (ifunit(new_name) != NULL)
1398                         return (EEXIST);
1399
1400                 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
1401
1402                 /* Announce the departure of the interface. */
1403                 rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1404
1405                 strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
1406                 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
1407                 /* XXX IFA_LOCK(ifa); */
1408                 sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1409                 namelen = strlen(new_name);
1410                 onamelen = sdl->sdl_nlen;
1411                 /*
1412                  * Move the address if needed.  This is safe because we
1413                  * allocate space for a name of length IFNAMSIZ when we
1414                  * create this in if_attach().
1415                  */
1416                 if (namelen != onamelen) {
1417                         bcopy(sdl->sdl_data + onamelen,
1418                             sdl->sdl_data + namelen, sdl->sdl_alen);
1419                 }
1420                 bcopy(new_name, sdl->sdl_data, namelen);
1421                 sdl->sdl_nlen = namelen;
1422                 sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
1423                 bzero(sdl->sdl_data, onamelen);
1424                 while (namelen != 0)
1425                         sdl->sdl_data[--namelen] = 0xff;
1426                 /* XXX IFA_UNLOCK(ifa) */
1427
1428                 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
1429
1430                 /* Announce the return of the interface. */
1431                 rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
1432                 break;
1433
1434         case SIOCSIFMETRIC:
1435                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1436                 if (error)
1437                         return (error);
1438                 ifp->if_metric = ifr->ifr_metric;
1439                 getmicrotime(&ifp->if_lastchange);
1440                 break;
1441
1442         case SIOCSIFPHYS:
1443                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1444                 if (error)
1445                         return error;
1446                 if (!ifp->if_ioctl)
1447                         return EOPNOTSUPP;
1448                 ifnet_serialize_all(ifp);
1449                 error = ifp->if_ioctl(ifp, cmd, data, cred);
1450                 ifnet_deserialize_all(ifp);
1451                 if (error == 0)
1452                         getmicrotime(&ifp->if_lastchange);
1453                 return (error);
1454
1455         case SIOCSIFMTU:
1456         {
1457                 u_long oldmtu = ifp->if_mtu;
1458
1459                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1460                 if (error)
1461                         return (error);
1462                 if (ifp->if_ioctl == NULL)
1463                         return (EOPNOTSUPP);
1464                 if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
1465                         return (EINVAL);
1466                 ifnet_serialize_all(ifp);
1467                 error = ifp->if_ioctl(ifp, cmd, data, cred);
1468                 ifnet_deserialize_all(ifp);
1469                 if (error == 0) {
1470                         getmicrotime(&ifp->if_lastchange);
1471                         rt_ifmsg(ifp);
1472                 }
1473                 /*
1474                  * If the link MTU changed, do network layer specific procedure.
1475                  */
1476                 if (ifp->if_mtu != oldmtu) {
1477 #ifdef INET6
1478                         nd6_setmtu(ifp);
1479 #endif
1480                 }
1481                 return (error);
1482         }
1483
1484         case SIOCADDMULTI:
1485         case SIOCDELMULTI:
1486                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1487                 if (error)
1488                         return (error);
1489
1490                 /* Don't allow group membership on non-multicast interfaces. */
1491                 if ((ifp->if_flags & IFF_MULTICAST) == 0)
1492                         return EOPNOTSUPP;
1493
1494                 /* Don't let users screw up protocols' entries. */
1495                 if (ifr->ifr_addr.sa_family != AF_LINK)
1496                         return EINVAL;
1497
1498                 if (cmd == SIOCADDMULTI) {
1499                         struct ifmultiaddr *ifma;
1500                         error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
1501                 } else {
1502                         error = if_delmulti(ifp, &ifr->ifr_addr);
1503                 }
1504                 if (error == 0)
1505                         getmicrotime(&ifp->if_lastchange);
1506                 return error;
1507
1508         case SIOCSIFPHYADDR:
1509         case SIOCDIFPHYADDR:
1510 #ifdef INET6
1511         case SIOCSIFPHYADDR_IN6:
1512 #endif
1513         case SIOCSLIFPHYADDR:
1514         case SIOCSIFMEDIA:
1515         case SIOCSIFGENERIC:
1516                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1517                 if (error)
1518                         return (error);
1519                 if (ifp->if_ioctl == 0)
1520                         return (EOPNOTSUPP);
1521                 ifnet_serialize_all(ifp);
1522                 error = ifp->if_ioctl(ifp, cmd, data, cred);
1523                 ifnet_deserialize_all(ifp);
1524                 if (error == 0)
1525                         getmicrotime(&ifp->if_lastchange);
1526                 return error;
1527
1528         case SIOCGIFSTATUS:
1529                 ifs = (struct ifstat *)data;
1530                 ifs->ascii[0] = '\0';
1531
1532         case SIOCGIFPSRCADDR:
1533         case SIOCGIFPDSTADDR:
1534         case SIOCGLIFPHYADDR:
1535         case SIOCGIFMEDIA:
1536         case SIOCGIFGENERIC:
1537                 if (ifp->if_ioctl == NULL)
1538                         return (EOPNOTSUPP);
1539                 ifnet_serialize_all(ifp);
1540                 error = ifp->if_ioctl(ifp, cmd, data, cred);
1541                 ifnet_deserialize_all(ifp);
1542                 return (error);
1543
1544         case SIOCSIFLLADDR:
1545                 error = priv_check_cred(cred, PRIV_ROOT, 0);
1546                 if (error)
1547                         return (error);
1548                 return if_setlladdr(ifp,
1549                     ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
1550
1551         default:
1552                 oif_flags = ifp->if_flags;
1553                 if (so->so_proto == 0)
1554                         return (EOPNOTSUPP);
1555 #ifndef COMPAT_43
1556                 error = so_pru_control(so, cmd, data, ifp);
1557 #else
1558             {
1559                 int ocmd = cmd;
1560
1561                 switch (cmd) {
1562
1563                 case SIOCSIFDSTADDR:
1564                 case SIOCSIFADDR:
1565                 case SIOCSIFBRDADDR:
1566                 case SIOCSIFNETMASK:
1567 #if BYTE_ORDER != BIG_ENDIAN
1568                         if (ifr->ifr_addr.sa_family == 0 &&
1569                             ifr->ifr_addr.sa_len < 16) {
1570                                 ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
1571                                 ifr->ifr_addr.sa_len = 16;
1572                         }
1573 #else
1574                         if (ifr->ifr_addr.sa_len == 0)
1575                                 ifr->ifr_addr.sa_len = 16;
1576 #endif
1577                         break;
1578
1579                 case OSIOCGIFADDR:
1580                         cmd = SIOCGIFADDR;
1581                         break;
1582
1583                 case OSIOCGIFDSTADDR:
1584                         cmd = SIOCGIFDSTADDR;
1585                         break;
1586
1587                 case OSIOCGIFBRDADDR:
1588                         cmd = SIOCGIFBRDADDR;
1589                         break;
1590
1591                 case OSIOCGIFNETMASK:
1592                         cmd = SIOCGIFNETMASK;
1593                 }
1594                 error =  so_pru_control(so, cmd, data, ifp);
1595                 switch (ocmd) {
1596
1597                 case OSIOCGIFADDR:
1598                 case OSIOCGIFDSTADDR:
1599                 case OSIOCGIFBRDADDR:
1600                 case OSIOCGIFNETMASK:
1601                         *(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
1602
1603                 }
1604             }
1605 #endif /* COMPAT_43 */
1606
1607                 if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
1608 #ifdef INET6
1609                         DELAY(100);/* XXX: temporary workaround for fxp issue*/
1610                         if (ifp->if_flags & IFF_UP) {
1611                                 crit_enter();
1612                                 in6_if_up(ifp);
1613                                 crit_exit();
1614                         }
1615 #endif
1616                 }
1617                 return (error);
1618
1619         }
1620         return (0);
1621 }
1622
1623 /*
1624  * Set/clear promiscuous mode on interface ifp based on the truth value
1625  * of pswitch.  The calls are reference counted so that only the first
1626  * "on" request actually has an effect, as does the final "off" request.
1627  * Results are undefined if the "off" and "on" requests are not matched.
1628  */
1629 int
1630 ifpromisc(struct ifnet *ifp, int pswitch)
1631 {
1632         struct ifreq ifr;
1633         int error;
1634         int oldflags;
1635
1636         oldflags = ifp->if_flags;
1637         if (ifp->if_flags & IFF_PPROMISC) {
1638                 /* Do nothing if device is in permanently promiscuous mode */
1639                 ifp->if_pcount += pswitch ? 1 : -1;
1640                 return (0);
1641         }
1642         if (pswitch) {
1643                 /*
1644                  * If the device is not configured up, we cannot put it in
1645                  * promiscuous mode.
1646                  */
1647                 if ((ifp->if_flags & IFF_UP) == 0)
1648                         return (ENETDOWN);
1649                 if (ifp->if_pcount++ != 0)
1650                         return (0);
1651                 ifp->if_flags |= IFF_PROMISC;
1652                 log(LOG_INFO, "%s: promiscuous mode enabled\n",
1653                     ifp->if_xname);
1654         } else {
1655                 if (--ifp->if_pcount > 0)
1656                         return (0);
1657                 ifp->if_flags &= ~IFF_PROMISC;
1658                 log(LOG_INFO, "%s: promiscuous mode disabled\n",
1659                     ifp->if_xname);
1660         }
1661         ifr.ifr_flags = ifp->if_flags;
1662         ifr.ifr_flagshigh = ifp->if_flags >> 16;
1663         ifnet_serialize_all(ifp);
1664         error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, NULL);
1665         ifnet_deserialize_all(ifp);
1666         if (error == 0)
1667                 rt_ifmsg(ifp);
1668         else
1669                 ifp->if_flags = oldflags;
1670         return error;
1671 }
1672
1673 /*
1674  * Return interface configuration
1675  * of system.  List may be used
1676  * in later ioctl's (above) to get
1677  * other information.
1678  */
1679 static int
1680 ifconf(u_long cmd, caddr_t data, struct ucred *cred)
1681 {
1682         struct ifconf *ifc = (struct ifconf *)data;
1683         struct ifnet *ifp;
1684         struct sockaddr *sa;
1685         struct ifreq ifr, *ifrp;
1686         int space = ifc->ifc_len, error = 0;
1687
1688         ifrp = ifc->ifc_req;
1689         TAILQ_FOREACH(ifp, &ifnet, if_link) {
1690                 struct ifaddr_container *ifac;
1691                 int addrs;
1692
1693                 if (space <= sizeof ifr)
1694                         break;
1695
1696                 /*
1697                  * Zero the stack declared structure first to prevent
1698                  * memory disclosure.
1699                  */
1700                 bzero(&ifr, sizeof(ifr));
1701                 if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
1702                     >= sizeof(ifr.ifr_name)) {
1703                         error = ENAMETOOLONG;
1704                         break;
1705                 }
1706
1707                 addrs = 0;
1708                 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1709                         struct ifaddr *ifa = ifac->ifa;
1710
1711                         if (space <= sizeof ifr)
1712                                 break;
1713                         sa = ifa->ifa_addr;
1714                         if (cred->cr_prison &&
1715                             prison_if(cred, sa))
1716                                 continue;
1717                         addrs++;
1718 #ifdef COMPAT_43
1719                         if (cmd == OSIOCGIFCONF) {
1720                                 struct osockaddr *osa =
1721                                          (struct osockaddr *)&ifr.ifr_addr;
1722                                 ifr.ifr_addr = *sa;
1723                                 osa->sa_family = sa->sa_family;
1724                                 error = copyout(&ifr, ifrp, sizeof ifr);
1725                                 ifrp++;
1726                         } else
1727 #endif
1728                         if (sa->sa_len <= sizeof(*sa)) {
1729                                 ifr.ifr_addr = *sa;
1730                                 error = copyout(&ifr, ifrp, sizeof ifr);
1731                                 ifrp++;
1732                         } else {
1733                                 if (space < (sizeof ifr) + sa->sa_len -
1734                                             sizeof(*sa))
1735                                         break;
1736                                 space -= sa->sa_len - sizeof(*sa);
1737                                 error = copyout(&ifr, ifrp,
1738                                                 sizeof ifr.ifr_name);
1739                                 if (error == 0)
1740                                         error = copyout(sa, &ifrp->ifr_addr,
1741                                                         sa->sa_len);
1742                                 ifrp = (struct ifreq *)
1743                                         (sa->sa_len + (caddr_t)&ifrp->ifr_addr);
1744                         }
1745                         if (error)
1746                                 break;
1747                         space -= sizeof ifr;
1748                 }
1749                 if (error)
1750                         break;
1751                 if (!addrs) {
1752                         bzero(&ifr.ifr_addr, sizeof ifr.ifr_addr);
1753                         error = copyout(&ifr, ifrp, sizeof ifr);
1754                         if (error)
1755                                 break;
1756                         space -= sizeof ifr;
1757                         ifrp++;
1758                 }
1759         }
1760         ifc->ifc_len -= space;
1761         return (error);
1762 }
1763
1764 /*
1765  * Just like if_promisc(), but for all-multicast-reception mode.
1766  */
1767 int
1768 if_allmulti(struct ifnet *ifp, int onswitch)
1769 {
1770         int error = 0;
1771         struct ifreq ifr;
1772
1773         crit_enter();
1774
1775         if (onswitch) {
1776                 if (ifp->if_amcount++ == 0) {
1777                         ifp->if_flags |= IFF_ALLMULTI;
1778                         ifr.ifr_flags = ifp->if_flags;
1779                         ifr.ifr_flagshigh = ifp->if_flags >> 16;
1780                         ifnet_serialize_all(ifp);
1781                         error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
1782                                               NULL);
1783                         ifnet_deserialize_all(ifp);
1784                 }
1785         } else {
1786                 if (ifp->if_amcount > 1) {
1787                         ifp->if_amcount--;
1788                 } else {
1789                         ifp->if_amcount = 0;
1790                         ifp->if_flags &= ~IFF_ALLMULTI;
1791                         ifr.ifr_flags = ifp->if_flags;
1792                         ifr.ifr_flagshigh = ifp->if_flags >> 16;
1793                         ifnet_serialize_all(ifp);
1794                         error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
1795                                               NULL);
1796                         ifnet_deserialize_all(ifp);
1797                 }
1798         }
1799
1800         crit_exit();
1801
1802         if (error == 0)
1803                 rt_ifmsg(ifp);
1804         return error;
1805 }
1806
1807 /*
1808  * Add a multicast listenership to the interface in question.
1809  * The link layer provides a routine which converts
1810  */
1811 int
1812 if_addmulti(
1813         struct ifnet *ifp,      /* interface to manipulate */
1814         struct sockaddr *sa,    /* address to add */
1815         struct ifmultiaddr **retifma)
1816 {
1817         struct sockaddr *llsa, *dupsa;
1818         int error;
1819         struct ifmultiaddr *ifma;
1820
1821         /*
1822          * If the matching multicast address already exists
1823          * then don't add a new one, just add a reference
1824          */
1825         LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1826                 if (sa_equal(sa, ifma->ifma_addr)) {
1827                         ifma->ifma_refcount++;
1828                         if (retifma)
1829                                 *retifma = ifma;
1830                         return 0;
1831                 }
1832         }
1833
1834         /*
1835          * Give the link layer a chance to accept/reject it, and also
1836          * find out which AF_LINK address this maps to, if it isn't one
1837          * already.
1838          */
1839         if (ifp->if_resolvemulti) {
1840                 ifnet_serialize_all(ifp);
1841                 error = ifp->if_resolvemulti(ifp, &llsa, sa);
1842                 ifnet_deserialize_all(ifp);
1843                 if (error) 
1844                         return error;
1845         } else {
1846                 llsa = 0;
1847         }
1848
1849         MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma, M_IFMADDR, M_WAITOK);
1850         MALLOC(dupsa, struct sockaddr *, sa->sa_len, M_IFMADDR, M_WAITOK);
1851         bcopy(sa, dupsa, sa->sa_len);
1852
1853         ifma->ifma_addr = dupsa;
1854         ifma->ifma_lladdr = llsa;
1855         ifma->ifma_ifp = ifp;
1856         ifma->ifma_refcount = 1;
1857         ifma->ifma_protospec = 0;
1858         rt_newmaddrmsg(RTM_NEWMADDR, ifma);
1859
1860         /*
1861          * Some network interfaces can scan the address list at
1862          * interrupt time; lock them out.
1863          */
1864         crit_enter();
1865         LIST_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
1866         crit_exit();
1867         *retifma = ifma;
1868
1869         if (llsa != 0) {
1870                 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1871                         if (sa_equal(ifma->ifma_addr, llsa))
1872                                 break;
1873                 }
1874                 if (ifma) {
1875                         ifma->ifma_refcount++;
1876                 } else {
1877                         MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma,
1878                                M_IFMADDR, M_WAITOK);
1879                         MALLOC(dupsa, struct sockaddr *, llsa->sa_len,
1880                                M_IFMADDR, M_WAITOK);
1881                         bcopy(llsa, dupsa, llsa->sa_len);
1882                         ifma->ifma_addr = dupsa;
1883                         ifma->ifma_ifp = ifp;
1884                         ifma->ifma_refcount = 1;
1885                         crit_enter();
1886                         LIST_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
1887                         crit_exit();
1888                 }
1889         }
1890         /*
1891          * We are certain we have added something, so call down to the
1892          * interface to let them know about it.
1893          */
1894         crit_enter();
1895         ifnet_serialize_all(ifp);
1896         ifp->if_ioctl(ifp, SIOCADDMULTI, 0, NULL);
1897         ifnet_deserialize_all(ifp);
1898         crit_exit();
1899
1900         return 0;
1901 }
1902
1903 /*
1904  * Remove a reference to a multicast address on this interface.  Yell
1905  * if the request does not match an existing membership.
1906  */
1907 int
1908 if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
1909 {
1910         struct ifmultiaddr *ifma;
1911
1912         LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1913                 if (sa_equal(sa, ifma->ifma_addr))
1914                         break;
1915         if (ifma == 0)
1916                 return ENOENT;
1917
1918         if (ifma->ifma_refcount > 1) {
1919                 ifma->ifma_refcount--;
1920                 return 0;
1921         }
1922
1923         rt_newmaddrmsg(RTM_DELMADDR, ifma);
1924         sa = ifma->ifma_lladdr;
1925         crit_enter();
1926         LIST_REMOVE(ifma, ifma_link);
1927         /*
1928          * Make sure the interface driver is notified
1929          * in the case of a link layer mcast group being left.
1930          */
1931         if (ifma->ifma_addr->sa_family == AF_LINK && sa == 0) {
1932                 ifnet_serialize_all(ifp);
1933                 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL);
1934                 ifnet_deserialize_all(ifp);
1935         }
1936         crit_exit();
1937         kfree(ifma->ifma_addr, M_IFMADDR);
1938         kfree(ifma, M_IFMADDR);
1939         if (sa == 0)
1940                 return 0;
1941
1942         /*
1943          * Now look for the link-layer address which corresponds to
1944          * this network address.  It had been squirreled away in
1945          * ifma->ifma_lladdr for this purpose (so we don't have
1946          * to call ifp->if_resolvemulti() again), and we saved that
1947          * value in sa above.  If some nasty deleted the
1948          * link-layer address out from underneath us, we can deal because
1949          * the address we stored was is not the same as the one which was
1950          * in the record for the link-layer address.  (So we don't complain
1951          * in that case.)
1952          */
1953         LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1954                 if (sa_equal(sa, ifma->ifma_addr))
1955                         break;
1956         if (ifma == 0)
1957                 return 0;
1958
1959         if (ifma->ifma_refcount > 1) {
1960                 ifma->ifma_refcount--;
1961                 return 0;
1962         }
1963
1964         crit_enter();
1965         ifnet_serialize_all(ifp);
1966         LIST_REMOVE(ifma, ifma_link);
1967         ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL);
1968         ifnet_deserialize_all(ifp);
1969         crit_exit();
1970         kfree(ifma->ifma_addr, M_IFMADDR);
1971         kfree(sa, M_IFMADDR);
1972         kfree(ifma, M_IFMADDR);
1973
1974         return 0;
1975 }
1976
1977 /*
1978  * Set the link layer address on an interface.
1979  *
1980  * At this time we only support certain types of interfaces,
1981  * and we don't allow the length of the address to change.
1982  */
1983 int
1984 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
1985 {
1986         struct sockaddr_dl *sdl;
1987         struct ifreq ifr;
1988
1989         sdl = IF_LLSOCKADDR(ifp);
1990         if (sdl == NULL)
1991                 return (EINVAL);
1992         if (len != sdl->sdl_alen)       /* don't allow length to change */
1993                 return (EINVAL);
1994         switch (ifp->if_type) {
1995         case IFT_ETHER:                 /* these types use struct arpcom */
1996         case IFT_XETHER:
1997         case IFT_L2VLAN:
1998                 bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len);
1999                 bcopy(lladdr, LLADDR(sdl), len);
2000                 break;
2001         default:
2002                 return (ENODEV);
2003         }
2004         /*
2005          * If the interface is already up, we need
2006          * to re-init it in order to reprogram its
2007          * address filter.
2008          */
2009         ifnet_serialize_all(ifp);
2010         if ((ifp->if_flags & IFF_UP) != 0) {
2011                 struct ifaddr_container *ifac;
2012
2013                 ifp->if_flags &= ~IFF_UP;
2014                 ifr.ifr_flags = ifp->if_flags;
2015                 ifr.ifr_flagshigh = ifp->if_flags >> 16;
2016                 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2017                               NULL);
2018                 ifp->if_flags |= IFF_UP;
2019                 ifr.ifr_flags = ifp->if_flags;
2020                 ifr.ifr_flagshigh = ifp->if_flags >> 16;
2021                 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2022                                  NULL);
2023 #ifdef INET
2024                 /*
2025                  * Also send gratuitous ARPs to notify other nodes about
2026                  * the address change.
2027                  */
2028                 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
2029                         struct ifaddr *ifa = ifac->ifa;
2030
2031                         if (ifa->ifa_addr != NULL &&
2032                             ifa->ifa_addr->sa_family == AF_INET)
2033                                 arp_ifinit(ifp, ifa);
2034                 }
2035 #endif
2036         }
2037         ifnet_deserialize_all(ifp);
2038         return (0);
2039 }
2040
2041 struct ifmultiaddr *
2042 ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp)
2043 {
2044         struct ifmultiaddr *ifma;
2045
2046         LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
2047                 if (sa_equal(ifma->ifma_addr, sa))
2048                         break;
2049
2050         return ifma;
2051 }
2052
2053 /*
2054  * This function locates the first real ethernet MAC from a network
2055  * card and loads it into node, returning 0 on success or ENOENT if
2056  * no suitable interfaces were found.  It is used by the uuid code to
2057  * generate a unique 6-byte number.
2058  */
2059 int
2060 if_getanyethermac(uint16_t *node, int minlen)
2061 {
2062         struct ifnet *ifp;
2063         struct sockaddr_dl *sdl;
2064
2065         TAILQ_FOREACH(ifp, &ifnet, if_link) {
2066                 if (ifp->if_type != IFT_ETHER)
2067                         continue;
2068                 sdl = IF_LLSOCKADDR(ifp);
2069                 if (sdl->sdl_alen < minlen)
2070                         continue;
2071                 bcopy(((struct arpcom *)ifp->if_softc)->ac_enaddr, node,
2072                       minlen);
2073                 return(0);
2074         }
2075         return (ENOENT);
2076 }
2077
2078 /*
2079  * The name argument must be a pointer to storage which will last as
2080  * long as the interface does.  For physical devices, the result of
2081  * device_get_name(dev) is a good choice and for pseudo-devices a
2082  * static string works well.
2083  */
2084 void
2085 if_initname(struct ifnet *ifp, const char *name, int unit)
2086 {
2087         ifp->if_dname = name;
2088         ifp->if_dunit = unit;
2089         if (unit != IF_DUNIT_NONE)
2090                 ksnprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
2091         else
2092                 strlcpy(ifp->if_xname, name, IFNAMSIZ);
2093 }
2094
2095 int
2096 if_printf(struct ifnet *ifp, const char *fmt, ...)
2097 {
2098         __va_list ap;
2099         int retval;
2100
2101         retval = kprintf("%s: ", ifp->if_xname);
2102         __va_start(ap, fmt);
2103         retval += kvprintf(fmt, ap);
2104         __va_end(ap);
2105         return (retval);
2106 }
2107
2108 struct ifnet *
2109 if_alloc(uint8_t type)
2110 {
2111         struct ifnet *ifp;
2112
2113         ifp = kmalloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO);
2114
2115         ifp->if_type = type;
2116
2117         return (ifp);
2118 }
2119
2120 void
2121 if_free(struct ifnet *ifp)
2122 {
2123         kfree(ifp, M_IFNET);
2124 }
2125
2126 void
2127 ifq_set_classic(struct ifaltq *ifq)
2128 {
2129         ifq->altq_enqueue = ifq_classic_enqueue;
2130         ifq->altq_dequeue = ifq_classic_dequeue;
2131         ifq->altq_request = ifq_classic_request;
2132 }
2133
2134 int
2135 ifq_classic_enqueue(struct ifaltq *ifq, struct mbuf *m,
2136                     struct altq_pktattr *pa __unused)
2137 {
2138         logifq(enqueue, ifq);
2139         if (IF_QFULL(ifq)) {
2140                 m_freem(m);
2141                 return(ENOBUFS);
2142         } else {
2143                 IF_ENQUEUE(ifq, m);
2144                 return(0);
2145         }       
2146 }
2147
2148 struct mbuf *
2149 ifq_classic_dequeue(struct ifaltq *ifq, struct mbuf *mpolled, int op)
2150 {
2151         struct mbuf *m;
2152
2153         switch (op) {
2154         case ALTDQ_POLL:
2155                 IF_POLL(ifq, m);
2156                 break;
2157         case ALTDQ_REMOVE:
2158                 logifq(dequeue, ifq);
2159                 IF_DEQUEUE(ifq, m);
2160                 break;
2161         default:
2162                 panic("unsupported ALTQ dequeue op: %d", op);
2163         }
2164         KKASSERT(mpolled == NULL || mpolled == m);
2165         return(m);
2166 }
2167
2168 int
2169 ifq_classic_request(struct ifaltq *ifq, int req, void *arg)
2170 {
2171         switch (req) {
2172         case ALTRQ_PURGE:
2173                 IF_DRAIN(ifq);
2174                 break;
2175         default:
2176                 panic("unsupported ALTQ request: %d", req);
2177         }
2178         return(0);
2179 }
2180
2181 int
2182 ifq_dispatch(struct ifnet *ifp, struct mbuf *m, struct altq_pktattr *pa)
2183 {
2184         struct ifaltq *ifq = &ifp->if_snd;
2185         int running = 0, error, start = 0;
2186
2187         ASSERT_IFNET_NOT_SERIALIZED_TX(ifp);
2188
2189         ALTQ_LOCK(ifq);
2190         error = ifq_enqueue_locked(ifq, m, pa);
2191         if (error) {
2192                 ALTQ_UNLOCK(ifq);
2193                 return error;
2194         }
2195         if (!ifq->altq_started) {
2196                 /*
2197                  * Hold the interlock of ifnet.if_start
2198                  */
2199                 ifq->altq_started = 1;
2200                 start = 1;
2201         }
2202         ALTQ_UNLOCK(ifq);
2203
2204         ifp->if_obytes += m->m_pkthdr.len;
2205         if (m->m_flags & M_MCAST)
2206                 ifp->if_omcasts++;
2207
2208         if (!start) {
2209                 logifstart(avoid, ifp);
2210                 return 0;
2211         }
2212
2213         if (ifq_dispatch_schedonly) {
2214                 /*
2215                  * Always schedule ifnet.if_start on ifnet's CPU,
2216                  * short circuit the rest of this function.
2217                  */
2218                 logifstart(sched, ifp);
2219                 if_start_schedule(ifp);
2220                 return 0;
2221         }
2222
2223         /*
2224          * Try to do direct ifnet.if_start first, if there is
2225          * contention on ifnet's serializer, ifnet.if_start will
2226          * be scheduled on ifnet's CPU.
2227          */
2228         if (!ifnet_tryserialize_tx(ifp)) {
2229                 /*
2230                  * ifnet serializer contention happened,
2231                  * ifnet.if_start is scheduled on ifnet's
2232                  * CPU, and we keep going.
2233                  */
2234                 logifstart(contend_sched, ifp);
2235                 if_start_schedule(ifp);
2236                 return 0;
2237         }
2238
2239         if ((ifp->if_flags & IFF_OACTIVE) == 0) {
2240                 logifstart(run, ifp);
2241                 ifp->if_start(ifp);
2242                 if ((ifp->if_flags &
2243                      (IFF_OACTIVE | IFF_RUNNING)) == IFF_RUNNING)
2244                         running = 1;
2245         }
2246
2247         ifnet_deserialize_tx(ifp);
2248
2249         if (ifq_dispatch_schednochk || if_start_need_schedule(ifq, running)) {
2250                 /*
2251                  * More data need to be transmitted, ifnet.if_start is
2252                  * scheduled on ifnet's CPU, and we keep going.
2253                  * NOTE: ifnet.if_start interlock is not released.
2254                  */
2255                 logifstart(sched, ifp);
2256                 if_start_schedule(ifp);
2257         }
2258         return 0;
2259 }
2260
2261 void *
2262 ifa_create(int size, int flags)
2263 {
2264         struct ifaddr *ifa;
2265         int i;
2266
2267         KASSERT(size >= sizeof(*ifa), ("ifaddr size too small\n"));
2268
2269         ifa = kmalloc(size, M_IFADDR, flags | M_ZERO);
2270         if (ifa == NULL)
2271                 return NULL;
2272
2273         ifa->ifa_containers = kmalloc(ncpus * sizeof(struct ifaddr_container),
2274                                       M_IFADDR, M_WAITOK | M_ZERO);
2275         ifa->ifa_ncnt = ncpus;
2276         for (i = 0; i < ncpus; ++i) {
2277                 struct ifaddr_container *ifac = &ifa->ifa_containers[i];
2278
2279                 ifac->ifa_magic = IFA_CONTAINER_MAGIC;
2280                 ifac->ifa = ifa;
2281                 ifac->ifa_refcnt = 1;
2282         }
2283 #ifdef IFADDR_DEBUG
2284         kprintf("alloc ifa %p %d\n", ifa, size);
2285 #endif
2286         return ifa;
2287 }
2288
2289 void
2290 ifac_free(struct ifaddr_container *ifac, int cpu_id)
2291 {
2292         struct ifaddr *ifa = ifac->ifa;
2293
2294         KKASSERT(ifac->ifa_magic == IFA_CONTAINER_MAGIC);
2295         KKASSERT(ifac->ifa_refcnt == 0);
2296         KASSERT(ifac->ifa_listmask == 0,
2297                 ("ifa is still on %#x lists\n", ifac->ifa_listmask));
2298
2299         ifac->ifa_magic = IFA_CONTAINER_DEAD;
2300
2301 #ifdef IFADDR_DEBUG_VERBOSE
2302         kprintf("try free ifa %p cpu_id %d\n", ifac->ifa, cpu_id);
2303 #endif
2304
2305         KASSERT(ifa->ifa_ncnt > 0 && ifa->ifa_ncnt <= ncpus,
2306                 ("invalid # of ifac, %d\n", ifa->ifa_ncnt));
2307         if (atomic_fetchadd_int(&ifa->ifa_ncnt, -1) == 1) {
2308 #ifdef IFADDR_DEBUG
2309                 kprintf("free ifa %p\n", ifa);
2310 #endif
2311                 kfree(ifa->ifa_containers, M_IFADDR);
2312                 kfree(ifa, M_IFADDR);
2313         }
2314 }
2315
2316 static void
2317 ifa_iflink_dispatch(struct netmsg *nmsg)
2318 {
2319         struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
2320         struct ifaddr *ifa = msg->ifa;
2321         struct ifnet *ifp = msg->ifp;
2322         int cpu = mycpuid;
2323         struct ifaddr_container *ifac;
2324
2325         crit_enter();
2326
2327         ifac = &ifa->ifa_containers[cpu];
2328         ASSERT_IFAC_VALID(ifac);
2329         KASSERT((ifac->ifa_listmask & IFA_LIST_IFADDRHEAD) == 0,
2330                 ("ifaddr is on if_addrheads\n"));
2331
2332         ifac->ifa_listmask |= IFA_LIST_IFADDRHEAD;
2333         if (msg->tail)
2334                 TAILQ_INSERT_TAIL(&ifp->if_addrheads[cpu], ifac, ifa_link);
2335         else
2336                 TAILQ_INSERT_HEAD(&ifp->if_addrheads[cpu], ifac, ifa_link);
2337
2338         crit_exit();
2339
2340         ifa_forwardmsg(&nmsg->nm_lmsg, cpu + 1);
2341 }
2342
2343 void
2344 ifa_iflink(struct ifaddr *ifa, struct ifnet *ifp, int tail)
2345 {
2346         struct netmsg_ifaddr msg;
2347
2348         netmsg_init(&msg.netmsg, NULL, &curthread->td_msgport,
2349                     0, ifa_iflink_dispatch);
2350         msg.ifa = ifa;
2351         msg.ifp = ifp;
2352         msg.tail = tail;
2353
2354         ifa_domsg(&msg.netmsg.nm_lmsg, 0);
2355 }
2356
2357 static void
2358 ifa_ifunlink_dispatch(struct netmsg *nmsg)
2359 {
2360         struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
2361         struct ifaddr *ifa = msg->ifa;
2362         struct ifnet *ifp = msg->ifp;
2363         int cpu = mycpuid;
2364         struct ifaddr_container *ifac;
2365
2366         crit_enter();
2367
2368         ifac = &ifa->ifa_containers[cpu];
2369         ASSERT_IFAC_VALID(ifac);
2370         KASSERT(ifac->ifa_listmask & IFA_LIST_IFADDRHEAD,
2371                 ("ifaddr is not on if_addrhead\n"));
2372
2373         TAILQ_REMOVE(&ifp->if_addrheads[cpu], ifac, ifa_link);
2374         ifac->ifa_listmask &= ~IFA_LIST_IFADDRHEAD;
2375
2376         crit_exit();
2377
2378         ifa_forwardmsg(&nmsg->nm_lmsg, cpu + 1);
2379 }
2380
2381 void
2382 ifa_ifunlink(struct ifaddr *ifa, struct ifnet *ifp)
2383 {
2384         struct netmsg_ifaddr msg;
2385
2386         netmsg_init(&msg.netmsg, NULL, &curthread->td_msgport,
2387                     0, ifa_ifunlink_dispatch);
2388         msg.ifa = ifa;
2389         msg.ifp = ifp;
2390
2391         ifa_domsg(&msg.netmsg.nm_lmsg, 0);
2392 }
2393
2394 static void
2395 ifa_destroy_dispatch(struct netmsg *nmsg)
2396 {
2397         struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
2398
2399         IFAFREE(msg->ifa);
2400         ifa_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1);
2401 }
2402
2403 void
2404 ifa_destroy(struct ifaddr *ifa)
2405 {
2406         struct netmsg_ifaddr msg;
2407
2408         netmsg_init(&msg.netmsg, NULL, &curthread->td_msgport,
2409                     0, ifa_destroy_dispatch);
2410         msg.ifa = ifa;
2411
2412         ifa_domsg(&msg.netmsg.nm_lmsg, 0);
2413 }
2414
2415 struct lwkt_port *
2416 ifnet_portfn(int cpu)
2417 {
2418         return &ifnet_threads[cpu].td_msgport;
2419 }
2420
2421 void
2422 ifnet_forwardmsg(struct lwkt_msg *lmsg, int next_cpu)
2423 {
2424         KKASSERT(next_cpu > mycpuid && next_cpu <= ncpus);
2425
2426         if (next_cpu < ncpus)
2427                 lwkt_forwardmsg(ifnet_portfn(next_cpu), lmsg);
2428         else
2429                 lwkt_replymsg(lmsg, 0);
2430 }
2431
2432 int
2433 ifnet_domsg(struct lwkt_msg *lmsg, int cpu)
2434 {
2435         KKASSERT(cpu < ncpus);
2436         return lwkt_domsg(ifnet_portfn(cpu), lmsg, 0);
2437 }
2438
2439 void
2440 ifnet_sendmsg(struct lwkt_msg *lmsg, int cpu)
2441 {
2442         KKASSERT(cpu < ncpus);
2443         lwkt_sendmsg(ifnet_portfn(cpu), lmsg);
2444 }
2445
2446 static void
2447 ifnetinit(void *dummy __unused)
2448 {
2449         int i;
2450
2451         for (i = 0; i < ncpus; ++i) {
2452                 struct thread *thr = &ifnet_threads[i];
2453
2454                 lwkt_create(netmsg_service_loop, &ifnet_mpsafe_thread, NULL,
2455                             thr, TDF_NETWORK | TDF_MPSAFE, i, "ifnet %d", i);
2456                 netmsg_service_port_init(&thr->td_msgport);
2457         }
2458 }
2459
2460 struct ifnet *
2461 ifnet_byindex(unsigned short idx)
2462 {
2463         if (idx > if_index)
2464                 return NULL;
2465         return ifindex2ifnet[idx];
2466 }
2467
2468 struct ifaddr *
2469 ifaddr_byindex(unsigned short idx)
2470 {
2471         struct ifnet *ifp;
2472
2473         ifp = ifnet_byindex(idx);
2474         if (!ifp)
2475                 return NULL;
2476         return TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
2477 }