if: Add power of 2 mask based CPUID to subqueue mapping
[dragonfly.git] / sys / net / if.c
CommitLineData
984263bc
MD
1/*
2 * Copyright (c) 1980, 1986, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)if.c 8.3 (Berkeley) 1/4/94
f23061d4 34 * $FreeBSD: src/sys/net/if.c,v 1.185 2004/03/13 02:35:03 brooks Exp $
984263bc
MD
35 */
36
37#include "opt_compat.h"
38#include "opt_inet6.h"
39#include "opt_inet.h"
b3a7093f 40#include "opt_ifpoll.h"
984263bc
MD
41
42#include <sys/param.h>
43#include <sys/malloc.h>
44#include <sys/mbuf.h>
45#include <sys/systm.h>
46#include <sys/proc.h>
895c1f85 47#include <sys/priv.h>
6b6e0885 48#include <sys/protosw.h>
984263bc
MD
49#include <sys/socket.h>
50#include <sys/socketvar.h>
6b6e0885 51#include <sys/socketops.h>
984263bc
MD
52#include <sys/protosw.h>
53#include <sys/kernel.h>
9db4b353 54#include <sys/ktr.h>
9683f229 55#include <sys/mutex.h>
984263bc
MD
56#include <sys/sockio.h>
57#include <sys/syslog.h>
58#include <sys/sysctl.h>
698ac46c 59#include <sys/domain.h>
e9cb6d99 60#include <sys/thread.h>
78195a76 61#include <sys/serialize.h>
71fc104f 62#include <sys/bus.h>
984263bc 63
9683f229
MD
64#include <sys/thread2.h>
65#include <sys/msgport2.h>
66#include <sys/mutex2.h>
67
984263bc
MD
68#include <net/if.h>
69#include <net/if_arp.h>
70#include <net/if_dl.h>
71#include <net/if_types.h>
72#include <net/if_var.h>
4d723e5a 73#include <net/ifq_var.h>
984263bc
MD
74#include <net/radix.h>
75#include <net/route.h>
65a24520 76#include <net/if_clone.h>
9db4b353 77#include <net/netisr.h>
b2632176
SZ
78#include <net/netmsg2.h>
79
d5a2b87c 80#include <machine/atomic.h>
984263bc 81#include <machine/stdarg.h>
b2632176 82#include <machine/smp.h>
984263bc
MD
83
84#if defined(INET) || defined(INET6)
85/*XXX*/
86#include <netinet/in.h>
87#include <netinet/in_var.h>
88#include <netinet/if_ether.h>
89#ifdef INET6
984263bc
MD
90#include <netinet6/in6_var.h>
91#include <netinet6/in6_ifattach.h>
92#endif
93#endif
94
9eee10d0
DRJ
95#if defined(COMPAT_43)
96#include <emulation/43bsd/43bsd_socket.h>
97#endif /* COMPAT_43 */
98
b2632176 99struct netmsg_ifaddr {
002c1265 100 struct netmsg_base base;
b2632176
SZ
101 struct ifaddr *ifa;
102 struct ifnet *ifp;
103 int tail;
104};
105
f0a26983
SZ
106struct ifsubq_stage_head {
107 TAILQ_HEAD(, ifsubq_stage) stg_head;
28cc0c29
SZ
108} __cachealign;
109
984263bc
MD
110/*
111 * System initialization
112 */
698ac46c
HS
113static void if_attachdomain(void *);
114static void if_attachdomain1(struct ifnet *);
436c57ea
SZ
115static int ifconf(u_long, caddr_t, struct ucred *);
116static void ifinit(void *);
90af4fd3 117static void ifnetinit(void *);
436c57ea
SZ
118static void if_slowtimo(void *);
119static void link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
120static int if_rtdel(struct radix_node *, void *);
984263bc 121
8a248085
SZ
122/* Helper functions */
123static void ifsq_watchdog_reset(struct ifsubq_watchdog *);
124
984263bc
MD
125#ifdef INET6
126/*
127 * XXX: declare here to avoid to include many inet6 related files..
128 * should be more generalized?
129 */
436c57ea 130extern void nd6_setmtu(struct ifnet *);
984263bc
MD
131#endif
132
436c57ea
SZ
133SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
134SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
135
f0a26983
SZ
136static int ifsq_stage_cntmax = 4;
137TUNABLE_INT("net.link.stage_cntmax", &ifsq_stage_cntmax);
28cc0c29 138SYSCTL_INT(_net_link, OID_AUTO, stage_cntmax, CTLFLAG_RW,
f0a26983 139 &ifsq_stage_cntmax, 0, "ifq staging packet count max");
28cc0c29 140
436c57ea 141SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, ifinit, NULL)
b2632176 142/* Must be after netisr_init */
90af4fd3 143SYSINIT(ifnet, SI_SUB_PRE_DRIVERS, SI_ORDER_SECOND, ifnetinit, NULL)
436c57ea 144
aeb3c11e
RP
145static if_com_alloc_t *if_com_alloc[256];
146static if_com_free_t *if_com_free[256];
147
436c57ea
SZ
148MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
149MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
cb80735c 150MALLOC_DEFINE(M_IFNET, "ifnet", "interface structure");
984263bc 151
436c57ea 152int ifqmaxlen = IFQ_MAXLEN;
b64bfcc3 153struct ifnethead ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
984263bc 154
436c57ea
SZ
155struct callout if_slowtimo_timer;
156
157int if_index = 0;
158struct ifnet **ifindex2ifnet = NULL;
90af4fd3 159static struct thread ifnet_threads[MAXCPU];
abbb44bb 160
f0a26983 161static struct ifsubq_stage_head ifsubq_stage_heads[MAXCPU];
28cc0c29 162
f0a26983 163#ifdef notyet
9db4b353 164#define IFQ_KTR_STRING "ifq=%p"
5bf48697 165#define IFQ_KTR_ARGS struct ifaltq *ifq
9db4b353
SZ
166#ifndef KTR_IFQ
167#define KTR_IFQ KTR_ALL
168#endif
169KTR_INFO_MASTER(ifq);
5bf48697
AE
170KTR_INFO(KTR_IFQ, ifq, enqueue, 0, IFQ_KTR_STRING, IFQ_KTR_ARGS);
171KTR_INFO(KTR_IFQ, ifq, dequeue, 1, IFQ_KTR_STRING, IFQ_KTR_ARGS);
9db4b353
SZ
172#define logifq(name, arg) KTR_LOG(ifq_ ## name, arg)
173
174#define IF_START_KTR_STRING "ifp=%p"
5bf48697 175#define IF_START_KTR_ARGS struct ifnet *ifp
9db4b353
SZ
176#ifndef KTR_IF_START
177#define KTR_IF_START KTR_ALL
178#endif
179KTR_INFO_MASTER(if_start);
180KTR_INFO(KTR_IF_START, if_start, run, 0,
5bf48697 181 IF_START_KTR_STRING, IF_START_KTR_ARGS);
9db4b353 182KTR_INFO(KTR_IF_START, if_start, sched, 1,
5bf48697 183 IF_START_KTR_STRING, IF_START_KTR_ARGS);
9db4b353 184KTR_INFO(KTR_IF_START, if_start, avoid, 2,
5bf48697 185 IF_START_KTR_STRING, IF_START_KTR_ARGS);
9db4b353 186KTR_INFO(KTR_IF_START, if_start, contend_sched, 3,
5bf48697 187 IF_START_KTR_STRING, IF_START_KTR_ARGS);
9db4b353 188KTR_INFO(KTR_IF_START, if_start, chase_sched, 4,
5bf48697 189 IF_START_KTR_STRING, IF_START_KTR_ARGS);
9db4b353 190#define logifstart(name, arg) KTR_LOG(if_start_ ## name, arg)
f0a26983 191#endif
9db4b353 192
743da179 193TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
315a7da3 194
984263bc
MD
195/*
196 * Network interface utility routines.
197 *
198 * Routines with ifa_ifwith* names take sockaddr *'s as
199 * parameters.
200 */
201/* ARGSUSED*/
202void
f23061d4 203ifinit(void *dummy)
984263bc
MD
204{
205 struct ifnet *ifp;
984263bc 206
abbb44bb
JS
207 callout_init(&if_slowtimo_timer);
208
4986965b 209 crit_enter();
984263bc 210 TAILQ_FOREACH(ifp, &ifnet, if_link) {
f0a26983 211 if (ifp->if_snd.altq_maxlen == 0) {
3e4a09e7 212 if_printf(ifp, "XXX: driver didn't set ifq_maxlen\n");
ef9870ec 213 ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
984263bc
MD
214 }
215 }
4986965b 216 crit_exit();
abbb44bb 217
984263bc
MD
218 if_slowtimo(0);
219}
220
9db4b353 221static void
f0a26983 222ifsq_ifstart_ipifunc(void *arg)
9db4b353 223{
f0a26983
SZ
224 struct ifaltq_subque *ifsq = arg;
225 struct lwkt_msg *lmsg = ifsq_get_ifstart_lmsg(ifsq, mycpuid);
9db4b353
SZ
226
227 crit_enter();
228 if (lmsg->ms_flags & MSGF_DONE)
ff5fbdd8 229 lwkt_sendmsg(netisr_portfn(mycpuid), lmsg);
9db4b353
SZ
230 crit_exit();
231}
232
3cab6b0d 233static __inline void
f0a26983 234ifsq_stage_remove(struct ifsubq_stage_head *head, struct ifsubq_stage *stage)
3cab6b0d 235{
f0a26983
SZ
236 KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED);
237 TAILQ_REMOVE(&head->stg_head, stage, stg_link);
238 stage->stg_flags &= ~(IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED);
239 stage->stg_cnt = 0;
240 stage->stg_len = 0;
3cab6b0d
SZ
241}
242
243static __inline void
f0a26983 244ifsq_stage_insert(struct ifsubq_stage_head *head, struct ifsubq_stage *stage)
3cab6b0d 245{
f0a26983
SZ
246 KKASSERT((stage->stg_flags &
247 (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0);
248 stage->stg_flags |= IFSQ_STAGE_FLAG_QUED;
249 TAILQ_INSERT_TAIL(&head->stg_head, stage, stg_link);
3cab6b0d
SZ
250}
251
9db4b353
SZ
252/*
253 * Schedule ifnet.if_start on ifnet's CPU
254 */
255static void
f0a26983 256ifsq_ifstart_schedule(struct ifaltq_subque *ifsq, int force)
9db4b353 257{
9db4b353
SZ
258 int cpu;
259
3cab6b0d 260 if (!force && curthread->td_type == TD_TYPE_NETISR &&
f0a26983
SZ
261 ifsq_stage_cntmax > 0) {
262 struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid);
263
264 stage->stg_cnt = 0;
265 stage->stg_len = 0;
266 if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0)
267 ifsq_stage_insert(&ifsubq_stage_heads[mycpuid], stage);
268 stage->stg_flags |= IFSQ_STAGE_FLAG_SCHED;
3cab6b0d
SZ
269 return;
270 }
271
f0a26983 272 cpu = ifsq_get_cpuid(ifsq);
9db4b353 273 if (cpu != mycpuid)
f0a26983 274 lwkt_send_ipiq(globaldata_find(cpu), ifsq_ifstart_ipifunc, ifsq);
9db4b353 275 else
f0a26983 276 ifsq_ifstart_ipifunc(ifsq);
9db4b353
SZ
277}
278
279/*
280 * NOTE:
281 * This function will release ifnet.if_start interlock,
282 * if ifnet.if_start does not need to be scheduled
283 */
284static __inline int
f0a26983 285ifsq_ifstart_need_schedule(struct ifaltq_subque *ifsq, int running)
9db4b353 286{
f0a26983 287 if (!running || ifsq_is_empty(ifsq)
9db4b353 288#ifdef ALTQ
f0a26983 289 || ifsq->ifsq_altq->altq_tbr != NULL
9db4b353
SZ
290#endif
291 ) {
f0a26983 292 ALTQ_SQ_LOCK(ifsq);
9db4b353
SZ
293 /*
294 * ifnet.if_start interlock is released, if:
295 * 1) Hardware can not take any packets, due to
296 * o interface is marked down
9ed293e0 297 * o hardware queue is full (ifq_is_oactive)
9db4b353
SZ
298 * Under the second situation, hardware interrupt
299 * or polling(4) will call/schedule ifnet.if_start
300 * when hardware queue is ready
301 * 2) There is not packet in the ifnet.if_snd.
302 * Further ifq_dispatch or ifq_handoff will call/
303 * schedule ifnet.if_start
304 * 3) TBR is used and it does not allow further
305 * dequeueing.
306 * TBR callout will call ifnet.if_start
307 */
f0a26983
SZ
308 if (!running || !ifsq_data_ready(ifsq)) {
309 ifsq_clr_started(ifsq);
310 ALTQ_SQ_UNLOCK(ifsq);
9db4b353
SZ
311 return 0;
312 }
f0a26983 313 ALTQ_SQ_UNLOCK(ifsq);
9db4b353
SZ
314 }
315 return 1;
316}
317
318static void
f0a26983 319ifsq_ifstart_dispatch(netmsg_t msg)
9db4b353 320{
002c1265 321 struct lwkt_msg *lmsg = &msg->base.lmsg;
f0a26983
SZ
322 struct ifaltq_subque *ifsq = lmsg->u.ms_resultp;
323 struct ifnet *ifp = ifsq_get_ifp(ifsq);
404c9fd9 324 int running = 0, need_sched;
9db4b353
SZ
325
326 crit_enter();
327 lwkt_replymsg(lmsg, 0); /* reply ASAP */
328 crit_exit();
329
f0a26983 330 if (mycpuid != ifsq_get_cpuid(ifsq)) {
9db4b353 331 /*
404c9fd9 332 * We need to chase the ifnet CPU change.
9db4b353 333 */
f0a26983 334 ifsq_ifstart_schedule(ifsq, 1);
404c9fd9 335 return;
9db4b353 336 }
9db4b353 337
3c4cd924 338 ifnet_serialize_tx(ifp, ifsq);
f0a26983
SZ
339 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) {
340 ifp->if_start(ifp, ifsq);
341 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq))
404c9fd9 342 running = 1;
9db4b353 343 }
f0a26983 344 need_sched = ifsq_ifstart_need_schedule(ifsq, running);
3c4cd924 345 ifnet_deserialize_tx(ifp, ifsq);
404c9fd9
SZ
346
347 if (need_sched) {
2b2f1d64
SZ
348 /*
349 * More data need to be transmitted, ifnet.if_start is
350 * scheduled on ifnet's CPU, and we keep going.
351 * NOTE: ifnet.if_start interlock is not released.
352 */
f0a26983 353 ifsq_ifstart_schedule(ifsq, 0);
9db4b353
SZ
354 }
355}
356
357/* Device driver ifnet.if_start helper function */
358void
f0a26983 359ifsq_devstart(struct ifaltq_subque *ifsq)
9db4b353 360{
f0a26983 361 struct ifnet *ifp = ifsq_get_ifp(ifsq);
9db4b353
SZ
362 int running = 0;
363
3c4cd924 364 ASSERT_IFNET_SERIALIZED_TX(ifp, ifsq);
9db4b353 365
f0a26983
SZ
366 ALTQ_SQ_LOCK(ifsq);
367 if (ifsq_is_started(ifsq) || !ifsq_data_ready(ifsq)) {
368 ALTQ_SQ_UNLOCK(ifsq);
9db4b353
SZ
369 return;
370 }
f0a26983
SZ
371 ifsq_set_started(ifsq);
372 ALTQ_SQ_UNLOCK(ifsq);
9db4b353 373
f0a26983 374 ifp->if_start(ifp, ifsq);
9db4b353 375
f0a26983 376 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq))
9db4b353
SZ
377 running = 1;
378
f0a26983 379 if (ifsq_ifstart_need_schedule(ifsq, running)) {
9db4b353
SZ
380 /*
381 * More data need to be transmitted, ifnet.if_start is
382 * scheduled on ifnet's CPU, and we keep going.
383 * NOTE: ifnet.if_start interlock is not released.
384 */
f0a26983 385 ifsq_ifstart_schedule(ifsq, 0);
9db4b353
SZ
386 }
387}
388
f0a26983
SZ
389void
390if_devstart(struct ifnet *ifp)
391{
392 ifsq_devstart(ifq_get_subq_default(&ifp->if_snd));
393}
394
2dffecda
SZ
395/* Device driver ifnet.if_start schedule helper function */
396void
f0a26983
SZ
397ifsq_devstart_sched(struct ifaltq_subque *ifsq)
398{
399 ifsq_ifstart_schedule(ifsq, 1);
400}
401
402void
2dffecda
SZ
403if_devstart_sched(struct ifnet *ifp)
404{
f0a26983 405 ifsq_devstart_sched(ifq_get_subq_default(&ifp->if_snd));
2dffecda
SZ
406}
407
a3dd34d2
SZ
408static void
409if_default_serialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
410{
411 lwkt_serialize_enter(ifp->if_serializer);
412}
413
414static void
415if_default_deserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
416{
417 lwkt_serialize_exit(ifp->if_serializer);
418}
419
420static int
421if_default_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
422{
423 return lwkt_serialize_try(ifp->if_serializer);
424}
425
2c9effcf
SZ
426#ifdef INVARIANTS
427static void
428if_default_serialize_assert(struct ifnet *ifp,
429 enum ifnet_serialize slz __unused,
430 boolean_t serialized)
431{
432 if (serialized)
433 ASSERT_SERIALIZED(ifp->if_serializer);
434 else
435 ASSERT_NOT_SERIALIZED(ifp->if_serializer);
436}
437#endif
438
984263bc 439/*
78195a76
MD
440 * Attach an interface to the list of "active" interfaces.
441 *
442 * The serializer is optional. If non-NULL access to the interface
443 * may be MPSAFE.
984263bc
MD
444 */
445void
78195a76 446if_attach(struct ifnet *ifp, lwkt_serialize_t serializer)
984263bc
MD
447{
448 unsigned socksize, ifasize;
449 int namelen, masklen;
82ed7fc2
RG
450 struct sockaddr_dl *sdl;
451 struct ifaddr *ifa;
e3e4574a 452 struct ifaltq *ifq;
f0a26983 453 int i, q;
590b8cd4 454
984263bc 455 static int if_indexlim = 8;
984263bc 456
a3dd34d2
SZ
457 if (ifp->if_serialize != NULL) {
458 KASSERT(ifp->if_deserialize != NULL &&
2c9effcf
SZ
459 ifp->if_tryserialize != NULL &&
460 ifp->if_serialize_assert != NULL,
ed20d0e3 461 ("serialize functions are partially setup"));
ae474cfa
SZ
462
463 /*
464 * If the device supplies serialize functions,
465 * then clear if_serializer to catch any invalid
466 * usage of this field.
467 */
468 KASSERT(serializer == NULL,
469 ("both serialize functions and default serializer "
ed20d0e3 470 "are supplied"));
ae474cfa 471 ifp->if_serializer = NULL;
a3dd34d2
SZ
472 } else {
473 KASSERT(ifp->if_deserialize == NULL &&
2c9effcf
SZ
474 ifp->if_tryserialize == NULL &&
475 ifp->if_serialize_assert == NULL,
ed20d0e3 476 ("serialize functions are partially setup"));
a3dd34d2
SZ
477 ifp->if_serialize = if_default_serialize;
478 ifp->if_deserialize = if_default_deserialize;
479 ifp->if_tryserialize = if_default_tryserialize;
2c9effcf
SZ
480#ifdef INVARIANTS
481 ifp->if_serialize_assert = if_default_serialize_assert;
482#endif
ae474cfa
SZ
483
484 /*
485 * The serializer can be passed in from the device,
486 * allowing the same serializer to be used for both
487 * the interrupt interlock and the device queue.
488 * If not specified, the netif structure will use an
489 * embedded serializer.
490 */
491 if (serializer == NULL) {
492 serializer = &ifp->if_default_serializer;
493 lwkt_serialize_init(serializer);
494 }
495 ifp->if_serializer = serializer;
a3dd34d2
SZ
496 }
497
9683f229
MD
498 mtx_init(&ifp->if_ioctl_mtx);
499 mtx_lock(&ifp->if_ioctl_mtx);
500
984263bc
MD
501 TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
502 ifp->if_index = ++if_index;
b2632176 503
984263bc
MD
504 /*
505 * XXX -
506 * The old code would work if the interface passed a pre-existing
507 * chain of ifaddrs to this code. We don't trust our callers to
508 * properly initialize the tailq, however, so we no longer allow
509 * this unlikely case.
510 */
b2632176
SZ
511 ifp->if_addrheads = kmalloc(ncpus * sizeof(struct ifaddrhead),
512 M_IFADDR, M_WAITOK | M_ZERO);
513 for (i = 0; i < ncpus; ++i)
514 TAILQ_INIT(&ifp->if_addrheads[i]);
515
984263bc 516 TAILQ_INIT(&ifp->if_prefixhead);
441d34b2 517 TAILQ_INIT(&ifp->if_multiaddrs);
2097a299 518 TAILQ_INIT(&ifp->if_groups);
984263bc 519 getmicrotime(&ifp->if_lastchange);
141697b6 520 if (ifindex2ifnet == NULL || if_index >= if_indexlim) {
590b8cd4 521 unsigned int n;
141697b6 522 struct ifnet **q;
590b8cd4
JH
523
524 if_indexlim <<= 1;
984263bc
MD
525
526 /* grow ifindex2ifnet */
141697b6 527 n = if_indexlim * sizeof(*q);
efda3bd0 528 q = kmalloc(n, M_IFADDR, M_WAITOK | M_ZERO);
984263bc 529 if (ifindex2ifnet) {
f23061d4 530 bcopy(ifindex2ifnet, q, n/2);
efda3bd0 531 kfree(ifindex2ifnet, M_IFADDR);
984263bc 532 }
141697b6 533 ifindex2ifnet = q;
984263bc
MD
534 }
535
536 ifindex2ifnet[if_index] = ifp;
537
538 /*
539 * create a Link Level name for this device
540 */
3e4a09e7 541 namelen = strlen(ifp->if_xname);
60615e94 542 masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
984263bc
MD
543 socksize = masklen + ifp->if_addrlen;
544#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
545 if (socksize < sizeof(*sdl))
546 socksize = sizeof(*sdl);
547 socksize = ROUNDUP(socksize);
60615e94 548#undef ROUNDUP
590b8cd4 549 ifasize = sizeof(struct ifaddr) + 2 * socksize;
b2632176 550 ifa = ifa_create(ifasize, M_WAITOK);
590b8cd4
JH
551 sdl = (struct sockaddr_dl *)(ifa + 1);
552 sdl->sdl_len = socksize;
553 sdl->sdl_family = AF_LINK;
554 bcopy(ifp->if_xname, sdl->sdl_data, namelen);
555 sdl->sdl_nlen = namelen;
556 sdl->sdl_index = ifp->if_index;
557 sdl->sdl_type = ifp->if_type;
141697b6 558 ifp->if_lladdr = ifa;
590b8cd4
JH
559 ifa->ifa_ifp = ifp;
560 ifa->ifa_rtrequest = link_rtrequest;
561 ifa->ifa_addr = (struct sockaddr *)sdl;
562 sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
563 ifa->ifa_netmask = (struct sockaddr *)sdl;
564 sdl->sdl_len = masklen;
565 while (namelen != 0)
566 sdl->sdl_data[--namelen] = 0xff;
b2632176 567 ifa_iflink(ifa, ifp, 0 /* Insert head */);
984263bc 568
f2bd8b67 569 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
71fc104f 570 devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
f2bd8b67 571
2cc2f639
SZ
572 if (ifp->if_mapsubq == NULL)
573 ifp->if_mapsubq = ifq_mapsubq_default;
574
e3e4574a
JS
575 ifq = &ifp->if_snd;
576 ifq->altq_type = 0;
577 ifq->altq_disc = NULL;
578 ifq->altq_flags &= ALTQF_CANTCHANGE;
579 ifq->altq_tbr = NULL;
580 ifq->altq_ifp = ifp;
4d723e5a 581
f0a26983
SZ
582 if (ifq->altq_subq_cnt <= 0)
583 ifq->altq_subq_cnt = 1;
584 ifq->altq_subq = kmalloc_cachealign(
585 ifq->altq_subq_cnt * sizeof(struct ifaltq_subque),
28cc0c29 586 M_DEVBUF, M_WAITOK | M_ZERO);
28cc0c29 587
f0a26983
SZ
588 if (ifq->altq_maxlen == 0) {
589 if_printf(ifp, "driver didn't set ifq_maxlen\n");
590 ifq_set_maxlen(ifq, ifqmaxlen);
42fdf81e
SZ
591 }
592
f0a26983
SZ
593 for (q = 0; q < ifq->altq_subq_cnt; ++q) {
594 struct ifaltq_subque *ifsq = &ifq->altq_subq[q];
595
596 ALTQ_SQ_LOCK_INIT(ifsq);
597 ifsq->ifsq_index = q;
598
599 ifsq->ifsq_altq = ifq;
600 ifsq->ifsq_ifp = ifp;
601
602 ifsq->ifq_maxlen = ifq->altq_maxlen;
603 ifsq->ifsq_prepended = NULL;
604 ifsq->ifsq_started = 0;
605 ifsq->ifsq_hw_oactive = 0;
606 ifsq_set_cpuid(ifsq, 0);
607
608 ifsq->ifsq_stage =
609 kmalloc_cachealign(ncpus * sizeof(struct ifsubq_stage),
610 M_DEVBUF, M_WAITOK | M_ZERO);
611 for (i = 0; i < ncpus; ++i)
612 ifsq->ifsq_stage[i].stg_subq = ifsq;
613
614 ifsq->ifsq_ifstart_nmsg =
615 kmalloc(ncpus * sizeof(struct netmsg_base),
616 M_LWKTMSG, M_WAITOK);
617 for (i = 0; i < ncpus; ++i) {
618 netmsg_init(&ifsq->ifsq_ifstart_nmsg[i], NULL,
619 &netisr_adone_rport, 0, ifsq_ifstart_dispatch);
620 ifsq->ifsq_ifstart_nmsg[i].lmsg.u.ms_resultp = ifsq;
621 }
622 }
623 ifq_set_classic(ifq);
624
9c70fe43 625 if (!SLIST_EMPTY(&domains))
698ac46c
HS
626 if_attachdomain1(ifp);
627
984263bc
MD
628 /* Announce the interface. */
629 rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
9683f229
MD
630
631 mtx_unlock(&ifp->if_ioctl_mtx);
984263bc
MD
632}
633
698ac46c
HS
634static void
635if_attachdomain(void *dummy)
636{
637 struct ifnet *ifp;
698ac46c 638
4986965b
JS
639 crit_enter();
640 TAILQ_FOREACH(ifp, &ifnet, if_list)
698ac46c 641 if_attachdomain1(ifp);
4986965b 642 crit_exit();
698ac46c
HS
643}
644SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
645 if_attachdomain, NULL);
646
647static void
648if_attachdomain1(struct ifnet *ifp)
649{
650 struct domain *dp;
698ac46c 651
4986965b 652 crit_enter();
698ac46c
HS
653
654 /* address family dependent data region */
655 bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
9c70fe43 656 SLIST_FOREACH(dp, &domains, dom_next)
698ac46c
HS
657 if (dp->dom_ifattach)
658 ifp->if_afdata[dp->dom_family] =
659 (*dp->dom_ifattach)(ifp);
4986965b 660 crit_exit();
698ac46c
HS
661}
662
984263bc 663/*
c727e142
SZ
664 * Purge all addresses whose type is _not_ AF_LINK
665 */
666void
667if_purgeaddrs_nolink(struct ifnet *ifp)
668{
b2632176
SZ
669 struct ifaddr_container *ifac, *next;
670
671 TAILQ_FOREACH_MUTABLE(ifac, &ifp->if_addrheads[mycpuid],
672 ifa_link, next) {
673 struct ifaddr *ifa = ifac->ifa;
c727e142 674
c727e142
SZ
675 /* Leave link ifaddr as it is */
676 if (ifa->ifa_addr->sa_family == AF_LINK)
677 continue;
678#ifdef INET
679 /* XXX: Ugly!! ad hoc just for INET */
680 if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
681 struct ifaliasreq ifr;
b2632176
SZ
682#ifdef IFADDR_DEBUG_VERBOSE
683 int i;
684
685 kprintf("purge in4 addr %p: ", ifa);
686 for (i = 0; i < ncpus; ++i)
687 kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt);
688 kprintf("\n");
689#endif
c727e142
SZ
690
691 bzero(&ifr, sizeof ifr);
692 ifr.ifra_addr = *ifa->ifa_addr;
693 if (ifa->ifa_dstaddr)
694 ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
695 if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
696 NULL) == 0)
697 continue;
698 }
699#endif /* INET */
700#ifdef INET6
701 if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) {
b2632176
SZ
702#ifdef IFADDR_DEBUG_VERBOSE
703 int i;
704
705 kprintf("purge in6 addr %p: ", ifa);
706 for (i = 0; i < ncpus; ++i)
707 kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt);
708 kprintf("\n");
709#endif
710
c727e142
SZ
711 in6_purgeaddr(ifa);
712 /* ifp_addrhead is already updated */
713 continue;
714 }
715#endif /* INET6 */
b2632176
SZ
716 ifa_ifunlink(ifa, ifp);
717 ifa_destroy(ifa);
c727e142
SZ
718 }
719}
720
5804f3d1
SZ
721static void
722ifq_stage_detach_handler(netmsg_t nmsg)
723{
724 struct ifaltq *ifq = nmsg->lmsg.u.ms_resultp;
f0a26983 725 int q;
5804f3d1 726
f0a26983
SZ
727 for (q = 0; q < ifq->altq_subq_cnt; ++q) {
728 struct ifaltq_subque *ifsq = &ifq->altq_subq[q];
729 struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid);
730
731 if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED)
732 ifsq_stage_remove(&ifsubq_stage_heads[mycpuid], stage);
733 }
5804f3d1
SZ
734 lwkt_replymsg(&nmsg->lmsg, 0);
735}
736
737static void
738ifq_stage_detach(struct ifaltq *ifq)
739{
740 struct netmsg_base base;
741 int cpu;
742
743 netmsg_init(&base, NULL, &curthread->td_msgport, 0,
744 ifq_stage_detach_handler);
745 base.lmsg.u.ms_resultp = ifq;
746
747 for (cpu = 0; cpu < ncpus; ++cpu)
748 lwkt_domsg(netisr_portfn(cpu), &base.lmsg, 0);
749}
750
c727e142 751/*
984263bc
MD
752 * Detach an interface, removing it from the
753 * list of "active" interfaces.
754 */
755void
f23061d4 756if_detach(struct ifnet *ifp)
984263bc 757{
984263bc 758 struct radix_node_head *rnh;
f0a26983 759 int i, q;
ecdefdda 760 int cpu, origcpu;
698ac46c 761 struct domain *dp;
984263bc 762
f2bd8b67
JS
763 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
764
984263bc
MD
765 /*
766 * Remove routes and flush queues.
767 */
4986965b 768 crit_enter();
b3a7093f
SZ
769#ifdef IFPOLL_ENABLE
770 if (ifp->if_flags & IFF_NPOLLING)
771 ifpoll_deregister(ifp);
772#endif
984263bc
MD
773 if_down(ifp);
774
5b1156d4 775#ifdef ALTQ
4d723e5a
JS
776 if (ifq_is_enabled(&ifp->if_snd))
777 altq_disable(&ifp->if_snd);
778 if (ifq_is_attached(&ifp->if_snd))
779 altq_detach(&ifp->if_snd);
5b1156d4 780#endif
4d723e5a 781
984263bc 782 /*
984263bc
MD
783 * Clean up all addresses.
784 */
141697b6 785 ifp->if_lladdr = NULL;
984263bc 786
c727e142 787 if_purgeaddrs_nolink(ifp);
b2632176 788 if (!TAILQ_EMPTY(&ifp->if_addrheads[mycpuid])) {
c727e142
SZ
789 struct ifaddr *ifa;
790
b2632176 791 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
c727e142 792 KASSERT(ifa->ifa_addr->sa_family == AF_LINK,
27eaa4f1 793 ("non-link ifaddr is left on if_addrheads"));
984263bc 794
b2632176
SZ
795 ifa_ifunlink(ifa, ifp);
796 ifa_destroy(ifa);
797 KASSERT(TAILQ_EMPTY(&ifp->if_addrheads[mycpuid]),
27eaa4f1 798 ("there are still ifaddrs left on if_addrheads"));
984263bc
MD
799 }
800
a98eb818
JS
801#ifdef INET
802 /*
803 * Remove all IPv4 kernel structures related to ifp.
804 */
805 in_ifdetach(ifp);
806#endif
807
984263bc
MD
808#ifdef INET6
809 /*
810 * Remove all IPv6 kernel structs related to ifp. This should be done
811 * before removing routing entries below, since IPv6 interface direct
812 * routes are expected to be removed by the IPv6-specific kernel API.
813 * Otherwise, the kernel will detect some inconsistency and bark it.
814 */
815 in6_ifdetach(ifp);
816#endif
817
818 /*
819 * Delete all remaining routes using this interface
820 * Unfortuneatly the only way to do this is to slog through
821 * the entire routing table looking for routes which point
822 * to this interface...oh well...
823 */
ecdefdda 824 origcpu = mycpuid;
271d38c4 825 for (cpu = 0; cpu < ncpus; cpu++) {
ecdefdda
MD
826 lwkt_migratecpu(cpu);
827 for (i = 1; i <= AF_MAX; i++) {
b2632176 828 if ((rnh = rt_tables[cpu][i]) == NULL)
ecdefdda
MD
829 continue;
830 rnh->rnh_walktree(rnh, if_rtdel, ifp);
831 }
984263bc 832 }
ecdefdda 833 lwkt_migratecpu(origcpu);
984263bc
MD
834
835 /* Announce that the interface is gone. */
836 rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
71fc104f 837 devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
984263bc 838
9c70fe43 839 SLIST_FOREACH(dp, &domains, dom_next)
698ac46c
HS
840 if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
841 (*dp->dom_ifdetach)(ifp,
842 ifp->if_afdata[dp->dom_family]);
698ac46c 843
141697b6
JS
844 /*
845 * Remove interface from ifindex2ifp[] and maybe decrement if_index.
846 */
75857e7c 847 ifindex2ifnet[ifp->if_index] = NULL;
141697b6
JS
848 while (if_index > 0 && ifindex2ifnet[if_index] == NULL)
849 if_index--;
75857e7c 850
984263bc 851 TAILQ_REMOVE(&ifnet, ifp, if_link);
b2632176 852 kfree(ifp->if_addrheads, M_IFADDR);
5804f3d1
SZ
853
854 lwkt_synchronize_ipiqs("if_detach");
855 ifq_stage_detach(&ifp->if_snd);
856
f0a26983
SZ
857 for (q = 0; q < ifp->if_snd.altq_subq_cnt; ++q) {
858 struct ifaltq_subque *ifsq = &ifp->if_snd.altq_subq[q];
859
860 kfree(ifsq->ifsq_ifstart_nmsg, M_LWKTMSG);
861 kfree(ifsq->ifsq_stage, M_DEVBUF);
862 }
407cde39
SZ
863 kfree(ifp->if_snd.altq_subq, M_DEVBUF);
864
4986965b 865 crit_exit();
984263bc
MD
866}
867
868/*
315a7da3
JL
869 * Create interface group without members
870 */
871struct ifg_group *
872if_creategroup(const char *groupname)
873{
874 struct ifg_group *ifg = NULL;
875
876 if ((ifg = (struct ifg_group *)kmalloc(sizeof(struct ifg_group),
877 M_TEMP, M_NOWAIT)) == NULL)
878 return (NULL);
879
880 strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
881 ifg->ifg_refcnt = 0;
882 ifg->ifg_carp_demoted = 0;
883 TAILQ_INIT(&ifg->ifg_members);
884#if NPF > 0
885 pfi_attach_ifgroup(ifg);
886#endif
887 TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
888
889 return (ifg);
890}
891
892/*
893 * Add a group to an interface
894 */
895int
896if_addgroup(struct ifnet *ifp, const char *groupname)
897{
898 struct ifg_list *ifgl;
899 struct ifg_group *ifg = NULL;
900 struct ifg_member *ifgm;
901
902 if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
903 groupname[strlen(groupname) - 1] <= '9')
904 return (EINVAL);
905
906 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
907 if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
908 return (EEXIST);
909
910 if ((ifgl = kmalloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
911 return (ENOMEM);
912
913 if ((ifgm = kmalloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
914 kfree(ifgl, M_TEMP);
915 return (ENOMEM);
916 }
917
918 TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
919 if (!strcmp(ifg->ifg_group, groupname))
920 break;
921
922 if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) {
923 kfree(ifgl, M_TEMP);
924 kfree(ifgm, M_TEMP);
925 return (ENOMEM);
926 }
927
928 ifg->ifg_refcnt++;
929 ifgl->ifgl_group = ifg;
930 ifgm->ifgm_ifp = ifp;
931
932 TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
933 TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
934
935#if NPF > 0
936 pfi_group_change(groupname);
937#endif
938
939 return (0);
940}
941
942/*
943 * Remove a group from an interface
944 */
945int
946if_delgroup(struct ifnet *ifp, const char *groupname)
947{
948 struct ifg_list *ifgl;
949 struct ifg_member *ifgm;
950
951 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
952 if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
953 break;
954 if (ifgl == NULL)
955 return (ENOENT);
956
957 TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
958
959 TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
960 if (ifgm->ifgm_ifp == ifp)
961 break;
962
963 if (ifgm != NULL) {
964 TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
965 kfree(ifgm, M_TEMP);
966 }
967
968 if (--ifgl->ifgl_group->ifg_refcnt == 0) {
969 TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
970#if NPF > 0
971 pfi_detach_ifgroup(ifgl->ifgl_group);
972#endif
973 kfree(ifgl->ifgl_group, M_TEMP);
974 }
975
976 kfree(ifgl, M_TEMP);
977
978#if NPF > 0
979 pfi_group_change(groupname);
980#endif
981
982 return (0);
983}
984
985/*
986 * Stores all groups from an interface in memory pointed
987 * to by data
988 */
989int
990if_getgroup(caddr_t data, struct ifnet *ifp)
991{
992 int len, error;
993 struct ifg_list *ifgl;
994 struct ifg_req ifgrq, *ifgp;
995 struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
996
997 if (ifgr->ifgr_len == 0) {
998 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
999 ifgr->ifgr_len += sizeof(struct ifg_req);
1000 return (0);
1001 }
1002
1003 len = ifgr->ifgr_len;
1004 ifgp = ifgr->ifgr_groups;
1005 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
1006 if (len < sizeof(ifgrq))
1007 return (EINVAL);
1008 bzero(&ifgrq, sizeof ifgrq);
1009 strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
1010 sizeof(ifgrq.ifgrq_group));
1011 if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
1012 sizeof(struct ifg_req))))
1013 return (error);
1014 len -= sizeof(ifgrq);
1015 ifgp++;
1016 }
1017
1018 return (0);
1019}
1020
1021/*
1022 * Stores all members of a group in memory pointed to by data
1023 */
1024int
1025if_getgroupmembers(caddr_t data)
1026{
1027 struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
1028 struct ifg_group *ifg;
1029 struct ifg_member *ifgm;
1030 struct ifg_req ifgrq, *ifgp;
1031 int len, error;
1032
1033 TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
1034 if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
1035 break;
1036 if (ifg == NULL)
1037 return (ENOENT);
1038
1039 if (ifgr->ifgr_len == 0) {
1040 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
1041 ifgr->ifgr_len += sizeof(ifgrq);
1042 return (0);
1043 }
1044
1045 len = ifgr->ifgr_len;
1046 ifgp = ifgr->ifgr_groups;
1047 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
1048 if (len < sizeof(ifgrq))
1049 return (EINVAL);
1050 bzero(&ifgrq, sizeof ifgrq);
1051 strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
1052 sizeof(ifgrq.ifgrq_member));
1053 if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
1054 sizeof(struct ifg_req))))
1055 return (error);
1056 len -= sizeof(ifgrq);
1057 ifgp++;
1058 }
1059
1060 return (0);
1061}
1062
1063/*
984263bc 1064 * Delete Routes for a Network Interface
f23061d4 1065 *
984263bc
MD
1066 * Called for each routing entry via the rnh->rnh_walktree() call above
1067 * to delete all route entries referencing a detaching network interface.
1068 *
1069 * Arguments:
1070 * rn pointer to node in the routing table
1071 * arg argument passed to rnh->rnh_walktree() - detaching interface
1072 *
1073 * Returns:
1074 * 0 successful
1075 * errno failed - reason indicated
1076 *
1077 */
1078static int
f23061d4 1079if_rtdel(struct radix_node *rn, void *arg)
984263bc
MD
1080{
1081 struct rtentry *rt = (struct rtentry *)rn;
1082 struct ifnet *ifp = arg;
1083 int err;
1084
1085 if (rt->rt_ifp == ifp) {
1086
1087 /*
1088 * Protect (sorta) against walktree recursion problems
1089 * with cloned routes
1090 */
f23061d4 1091 if (!(rt->rt_flags & RTF_UP))
984263bc
MD
1092 return (0);
1093
1094 err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
1095 rt_mask(rt), rt->rt_flags,
2038fb68 1096 NULL);
984263bc
MD
1097 if (err) {
1098 log(LOG_WARNING, "if_rtdel: error %d\n", err);
1099 }
1100 }
1101
1102 return (0);
1103}
1104
1105/*
984263bc
MD
1106 * Locate an interface based on a complete address.
1107 */
984263bc 1108struct ifaddr *
f23061d4 1109ifa_ifwithaddr(struct sockaddr *addr)
984263bc 1110{
82ed7fc2 1111 struct ifnet *ifp;
984263bc 1112
b2632176
SZ
1113 TAILQ_FOREACH(ifp, &ifnet, if_link) {
1114 struct ifaddr_container *ifac;
1115
1116 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1117 struct ifaddr *ifa = ifac->ifa;
1118
1119 if (ifa->ifa_addr->sa_family != addr->sa_family)
1120 continue;
1121 if (sa_equal(addr, ifa->ifa_addr))
1122 return (ifa);
1123 if ((ifp->if_flags & IFF_BROADCAST) &&
1124 ifa->ifa_broadaddr &&
1125 /* IPv6 doesn't have broadcast */
1126 ifa->ifa_broadaddr->sa_len != 0 &&
1127 sa_equal(ifa->ifa_broadaddr, addr))
1128 return (ifa);
1129 }
984263bc 1130 }
b2632176 1131 return (NULL);
984263bc
MD
1132}
1133/*
1134 * Locate the point to point interface with a given destination address.
1135 */
984263bc 1136struct ifaddr *
f23061d4 1137ifa_ifwithdstaddr(struct sockaddr *addr)
984263bc 1138{
82ed7fc2 1139 struct ifnet *ifp;
984263bc 1140
b2632176
SZ
1141 TAILQ_FOREACH(ifp, &ifnet, if_link) {
1142 struct ifaddr_container *ifac;
1143
1144 if (!(ifp->if_flags & IFF_POINTOPOINT))
1145 continue;
1146
1147 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1148 struct ifaddr *ifa = ifac->ifa;
1149
984263bc
MD
1150 if (ifa->ifa_addr->sa_family != addr->sa_family)
1151 continue;
0c3c561c
JH
1152 if (ifa->ifa_dstaddr &&
1153 sa_equal(addr, ifa->ifa_dstaddr))
984263bc 1154 return (ifa);
b2632176 1155 }
984263bc 1156 }
b2632176 1157 return (NULL);
984263bc
MD
1158}
1159
1160/*
1161 * Find an interface on a specific network. If many, choice
1162 * is most specific found.
1163 */
1164struct ifaddr *
f23061d4 1165ifa_ifwithnet(struct sockaddr *addr)
984263bc 1166{
82ed7fc2 1167 struct ifnet *ifp;
b2632176 1168 struct ifaddr *ifa_maybe = NULL;
984263bc
MD
1169 u_int af = addr->sa_family;
1170 char *addr_data = addr->sa_data, *cplim;
1171
1172 /*
1173 * AF_LINK addresses can be looked up directly by their index number,
1174 * so do that if we can.
1175 */
1176 if (af == AF_LINK) {
b2632176 1177 struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
590b8cd4 1178
b2632176
SZ
1179 if (sdl->sdl_index && sdl->sdl_index <= if_index)
1180 return (ifindex2ifnet[sdl->sdl_index]->if_lladdr);
984263bc
MD
1181 }
1182
1183 /*
1184 * Scan though each interface, looking for ones that have
1185 * addresses in this address family.
1186 */
1187 TAILQ_FOREACH(ifp, &ifnet, if_link) {
b2632176
SZ
1188 struct ifaddr_container *ifac;
1189
1190 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1191 struct ifaddr *ifa = ifac->ifa;
82ed7fc2 1192 char *cp, *cp2, *cp3;
984263bc
MD
1193
1194 if (ifa->ifa_addr->sa_family != af)
1195next: continue;
1196 if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
1197 /*
1198 * This is a bit broken as it doesn't
1199 * take into account that the remote end may
1200 * be a single node in the network we are
1201 * looking for.
1202 * The trouble is that we don't know the
1203 * netmask for the remote end.
1204 */
0c3c561c
JH
1205 if (ifa->ifa_dstaddr != NULL &&
1206 sa_equal(addr, ifa->ifa_dstaddr))
f23061d4 1207 return (ifa);
984263bc
MD
1208 } else {
1209 /*
1210 * if we have a special address handler,
1211 * then use it instead of the generic one.
1212 */
f23061d4 1213 if (ifa->ifa_claim_addr) {
984263bc
MD
1214 if ((*ifa->ifa_claim_addr)(ifa, addr)) {
1215 return (ifa);
1216 } else {
1217 continue;
1218 }
1219 }
1220
1221 /*
1222 * Scan all the bits in the ifa's address.
1223 * If a bit dissagrees with what we are
1224 * looking for, mask it with the netmask
1225 * to see if it really matters.
1226 * (A byte at a time)
1227 */
1228 if (ifa->ifa_netmask == 0)
1229 continue;
1230 cp = addr_data;
1231 cp2 = ifa->ifa_addr->sa_data;
1232 cp3 = ifa->ifa_netmask->sa_data;
590b8cd4
JH
1233 cplim = ifa->ifa_netmask->sa_len +
1234 (char *)ifa->ifa_netmask;
984263bc
MD
1235 while (cp3 < cplim)
1236 if ((*cp++ ^ *cp2++) & *cp3++)
1237 goto next; /* next address! */
1238 /*
1239 * If the netmask of what we just found
1240 * is more specific than what we had before
1241 * (if we had one) then remember the new one
1242 * before continuing to search
1243 * for an even better one.
1244 */
4090d6ff 1245 if (ifa_maybe == NULL ||
f23061d4
JH
1246 rn_refines((char *)ifa->ifa_netmask,
1247 (char *)ifa_maybe->ifa_netmask))
984263bc
MD
1248 ifa_maybe = ifa;
1249 }
1250 }
1251 }
1252 return (ifa_maybe);
1253}
1254
1255/*
1256 * Find an interface address specific to an interface best matching
1257 * a given address.
1258 */
1259struct ifaddr *
f23061d4 1260ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
984263bc 1261{
b2632176 1262 struct ifaddr_container *ifac;
82ed7fc2
RG
1263 char *cp, *cp2, *cp3;
1264 char *cplim;
4090d6ff 1265 struct ifaddr *ifa_maybe = NULL;
984263bc
MD
1266 u_int af = addr->sa_family;
1267
1268 if (af >= AF_MAX)
1269 return (0);
b2632176
SZ
1270 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1271 struct ifaddr *ifa = ifac->ifa;
1272
984263bc
MD
1273 if (ifa->ifa_addr->sa_family != af)
1274 continue;
4090d6ff 1275 if (ifa_maybe == NULL)
984263bc 1276 ifa_maybe = ifa;
0c3c561c
JH
1277 if (ifa->ifa_netmask == NULL) {
1278 if (sa_equal(addr, ifa->ifa_addr) ||
1279 (ifa->ifa_dstaddr != NULL &&
1280 sa_equal(addr, ifa->ifa_dstaddr)))
984263bc
MD
1281 return (ifa);
1282 continue;
1283 }
1284 if (ifp->if_flags & IFF_POINTOPOINT) {
0c3c561c 1285 if (sa_equal(addr, ifa->ifa_dstaddr))
984263bc
MD
1286 return (ifa);
1287 } else {
1288 cp = addr->sa_data;
1289 cp2 = ifa->ifa_addr->sa_data;
1290 cp3 = ifa->ifa_netmask->sa_data;
1291 cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1292 for (; cp3 < cplim; cp3++)
1293 if ((*cp++ ^ *cp2++) & *cp3)
1294 break;
1295 if (cp3 == cplim)
1296 return (ifa);
1297 }
1298 }
1299 return (ifa_maybe);
1300}
1301
984263bc
MD
1302/*
1303 * Default action when installing a route with a Link Level gateway.
1304 * Lookup an appropriate real ifa to point to.
1305 * This should be moved to /sys/net/link.c eventually.
1306 */
1307static void
f23061d4 1308link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
984263bc 1309{
82ed7fc2 1310 struct ifaddr *ifa;
984263bc
MD
1311 struct sockaddr *dst;
1312 struct ifnet *ifp;
1313
f23061d4
JH
1314 if (cmd != RTM_ADD || (ifa = rt->rt_ifa) == NULL ||
1315 (ifp = ifa->ifa_ifp) == NULL || (dst = rt_key(rt)) == NULL)
984263bc
MD
1316 return;
1317 ifa = ifaof_ifpforaddr(dst, ifp);
f23061d4 1318 if (ifa != NULL) {
984263bc 1319 IFAFREE(rt->rt_ifa);
f23061d4 1320 IFAREF(ifa);
984263bc 1321 rt->rt_ifa = ifa;
984263bc
MD
1322 if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
1323 ifa->ifa_rtrequest(cmd, rt, info);
1324 }
1325}
1326
1327/*
1328 * Mark an interface down and notify protocols of
1329 * the transition.
1330 * NOTE: must be called at splnet or eqivalent.
1331 */
1332void
f23061d4 1333if_unroute(struct ifnet *ifp, int flag, int fam)
984263bc 1334{
b2632176 1335 struct ifaddr_container *ifac;
984263bc
MD
1336
1337 ifp->if_flags &= ~flag;
1338 getmicrotime(&ifp->if_lastchange);
b2632176
SZ
1339 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1340 struct ifaddr *ifa = ifac->ifa;
1341
984263bc 1342 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
91be174d 1343 kpfctlinput(PRC_IFDOWN, ifa->ifa_addr);
b2632176 1344 }
9275f515 1345 ifq_purge_all(&ifp->if_snd);
984263bc
MD
1346 rt_ifmsg(ifp);
1347}
1348
1349/*
1350 * Mark an interface up and notify protocols of
1351 * the transition.
1352 * NOTE: must be called at splnet or eqivalent.
1353 */
1354void
f23061d4 1355if_route(struct ifnet *ifp, int flag, int fam)
984263bc 1356{
b2632176 1357 struct ifaddr_container *ifac;
984263bc 1358
9275f515 1359 ifq_purge_all(&ifp->if_snd);
984263bc
MD
1360 ifp->if_flags |= flag;
1361 getmicrotime(&ifp->if_lastchange);
b2632176
SZ
1362 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1363 struct ifaddr *ifa = ifac->ifa;
1364
984263bc 1365 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
91be174d 1366 kpfctlinput(PRC_IFUP, ifa->ifa_addr);
b2632176 1367 }
984263bc
MD
1368 rt_ifmsg(ifp);
1369#ifdef INET6
1370 in6_if_up(ifp);
1371#endif
1372}
1373
1374/*
5c703385
MD
1375 * Mark an interface down and notify protocols of the transition. An
1376 * interface going down is also considered to be a synchronizing event.
1377 * We must ensure that all packet processing related to the interface
1378 * has completed before we return so e.g. the caller can free the ifnet
1379 * structure that the mbufs may be referencing.
1380 *
984263bc
MD
1381 * NOTE: must be called at splnet or eqivalent.
1382 */
1383void
f23061d4 1384if_down(struct ifnet *ifp)
984263bc 1385{
984263bc 1386 if_unroute(ifp, IFF_UP, AF_UNSPEC);
5c703385 1387 netmsg_service_sync();
984263bc
MD
1388}
1389
1390/*
1391 * Mark an interface up and notify protocols of
1392 * the transition.
1393 * NOTE: must be called at splnet or eqivalent.
1394 */
1395void
f23061d4 1396if_up(struct ifnet *ifp)
984263bc 1397{
984263bc
MD
1398 if_route(ifp, IFF_UP, AF_UNSPEC);
1399}
1400
1401/*
6de83abe
SZ
1402 * Process a link state change.
1403 * NOTE: must be called at splsoftnet or equivalent.
1404 */
1405void
1406if_link_state_change(struct ifnet *ifp)
1407{
71fc104f
HT
1408 int link_state = ifp->if_link_state;
1409
6de83abe 1410 rt_ifmsg(ifp);
71fc104f
HT
1411 devctl_notify("IFNET", ifp->if_xname,
1412 (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL);
6de83abe
SZ
1413}
1414
1415/*
984263bc
MD
1416 * Handle interface watchdog timer routines. Called
1417 * from softclock, we decrement timers (if set) and
1418 * call the appropriate interface routine on expiration.
1419 */
1420static void
f23061d4 1421if_slowtimo(void *arg)
984263bc 1422{
82ed7fc2 1423 struct ifnet *ifp;
4986965b
JS
1424
1425 crit_enter();
984263bc
MD
1426
1427 TAILQ_FOREACH(ifp, &ifnet, if_link) {
1428 if (ifp->if_timer == 0 || --ifp->if_timer)
1429 continue;
78195a76 1430 if (ifp->if_watchdog) {
a3dd34d2 1431 if (ifnet_tryserialize_all(ifp)) {
78195a76 1432 (*ifp->if_watchdog)(ifp);
a3dd34d2 1433 ifnet_deserialize_all(ifp);
78195a76
MD
1434 } else {
1435 /* try again next timeout */
1436 ++ifp->if_timer;
1437 }
1438 }
984263bc 1439 }
4986965b
JS
1440
1441 crit_exit();
1442
abbb44bb 1443 callout_reset(&if_slowtimo_timer, hz / IFNET_SLOWHZ, if_slowtimo, NULL);
984263bc
MD
1444}
1445
1446/*
1447 * Map interface name to
1448 * interface structure pointer.
1449 */
1450struct ifnet *
1451ifunit(const char *name)
1452{
984263bc 1453 struct ifnet *ifp;
984263bc 1454
984263bc 1455 /*
3e4a09e7 1456 * Search all the interfaces for this name/number
984263bc 1457 */
3e4a09e7 1458
984263bc 1459 TAILQ_FOREACH(ifp, &ifnet, if_link) {
3e4a09e7 1460 if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0)
984263bc
MD
1461 break;
1462 }
1463 return (ifp);
1464}
1465
1466
1467/*
1468 * Map interface name in a sockaddr_dl to
1469 * interface structure pointer.
1470 */
1471struct ifnet *
f23061d4 1472if_withname(struct sockaddr *sa)
984263bc
MD
1473{
1474 char ifname[IFNAMSIZ+1];
1475 struct sockaddr_dl *sdl = (struct sockaddr_dl *)sa;
1476
1477 if ( (sa->sa_family != AF_LINK) || (sdl->sdl_nlen == 0) ||
1478 (sdl->sdl_nlen > IFNAMSIZ) )
1479 return NULL;
1480
1481 /*
1482 * ifunit wants a null-terminated name. It may not be null-terminated
1483 * in the sockaddr. We don't want to change the caller's sockaddr,
1484 * and there might not be room to put the trailing null anyway, so we
1485 * make a local copy that we know we can null terminate safely.
1486 */
1487
1488 bcopy(sdl->sdl_data, ifname, sdl->sdl_nlen);
1489 ifname[sdl->sdl_nlen] = '\0';
1490 return ifunit(ifname);
1491}
1492
1493
1494/*
1495 * Interface ioctls.
1496 */
1497int
87de5057 1498ifioctl(struct socket *so, u_long cmd, caddr_t data, struct ucred *cred)
984263bc 1499{
41c20dac
MD
1500 struct ifnet *ifp;
1501 struct ifreq *ifr;
984263bc
MD
1502 struct ifstat *ifs;
1503 int error;
1504 short oif_flags;
1505 int new_flags;
9683f229
MD
1506#ifdef COMPAT_43
1507 int ocmd;
1508#endif
1fdf0954
HP
1509 size_t namelen, onamelen;
1510 char new_name[IFNAMSIZ];
1511 struct ifaddr *ifa;
1512 struct sockaddr_dl *sdl;
984263bc
MD
1513
1514 switch (cmd) {
984263bc
MD
1515 case SIOCGIFCONF:
1516 case OSIOCGIFCONF:
87de5057 1517 return (ifconf(cmd, data, cred));
9683f229
MD
1518 default:
1519 break;
984263bc 1520 }
9683f229 1521
984263bc
MD
1522 ifr = (struct ifreq *)data;
1523
1524 switch (cmd) {
1525 case SIOCIFCREATE:
c5e14c14
RP
1526 case SIOCIFCREATE2:
1527 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0)
1528 return (error);
1529 return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name),
1530 cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL));
984263bc 1531 case SIOCIFDESTROY:
895c1f85 1532 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0)
984263bc 1533 return (error);
c5e14c14 1534 return (if_clone_destroy(ifr->ifr_name));
984263bc
MD
1535 case SIOCIFGCLONERS:
1536 return (if_clone_list((struct if_clonereq *)data));
9683f229
MD
1537 default:
1538 break;
984263bc
MD
1539 }
1540
9683f229
MD
1541 /*
1542 * Nominal ioctl through interface, lookup the ifp and obtain a
1543 * lock to serialize the ifconfig ioctl operation.
1544 */
984263bc 1545 ifp = ifunit(ifr->ifr_name);
9683f229 1546 if (ifp == NULL)
984263bc 1547 return (ENXIO);
9683f229
MD
1548 error = 0;
1549 mtx_lock(&ifp->if_ioctl_mtx);
984263bc 1550
9683f229 1551 switch (cmd) {
12b71966
PA
1552 case SIOCGIFINDEX:
1553 ifr->ifr_index = ifp->if_index;
1554 break;
1555
984263bc
MD
1556 case SIOCGIFFLAGS:
1557 ifr->ifr_flags = ifp->if_flags;
46f25451 1558 ifr->ifr_flagshigh = ifp->if_flags >> 16;
984263bc
MD
1559 break;
1560
1561 case SIOCGIFCAP:
1562 ifr->ifr_reqcap = ifp->if_capabilities;
1563 ifr->ifr_curcap = ifp->if_capenable;
1564 break;
1565
1566 case SIOCGIFMETRIC:
1567 ifr->ifr_metric = ifp->if_metric;
1568 break;
1569
1570 case SIOCGIFMTU:
1571 ifr->ifr_mtu = ifp->if_mtu;
1572 break;
1573
e41e61d5
SZ
1574 case SIOCGIFTSOLEN:
1575 ifr->ifr_tsolen = ifp->if_tsolen;
1576 break;
1577
315a7da3
JL
1578 case SIOCGIFDATA:
1579 error = copyout((caddr_t)&ifp->if_data, ifr->ifr_data,
9683f229 1580 sizeof(ifp->if_data));
315a7da3
JL
1581 break;
1582
984263bc
MD
1583 case SIOCGIFPHYS:
1584 ifr->ifr_phys = ifp->if_physical;
1585 break;
1586
1630efc5 1587 case SIOCGIFPOLLCPU:
1630efc5 1588 ifr->ifr_pollcpu = -1;
1630efc5
SZ
1589 break;
1590
1591 case SIOCSIFPOLLCPU:
1630efc5
SZ
1592 break;
1593
984263bc 1594 case SIOCSIFFLAGS:
895c1f85 1595 error = priv_check_cred(cred, PRIV_ROOT, 0);
984263bc 1596 if (error)
9683f229 1597 break;
984263bc
MD
1598 new_flags = (ifr->ifr_flags & 0xffff) |
1599 (ifr->ifr_flagshigh << 16);
1600 if (ifp->if_flags & IFF_SMART) {
1601 /* Smart drivers twiddle their own routes */
1602 } else if (ifp->if_flags & IFF_UP &&
1603 (new_flags & IFF_UP) == 0) {
4986965b 1604 crit_enter();
984263bc 1605 if_down(ifp);
4986965b 1606 crit_exit();
984263bc
MD
1607 } else if (new_flags & IFF_UP &&
1608 (ifp->if_flags & IFF_UP) == 0) {
4986965b 1609 crit_enter();
984263bc 1610 if_up(ifp);
4986965b 1611 crit_exit();
984263bc 1612 }
9c095379 1613
b3a7093f
SZ
1614#ifdef IFPOLL_ENABLE
1615 if ((new_flags ^ ifp->if_flags) & IFF_NPOLLING) {
1616 if (new_flags & IFF_NPOLLING)
1617 ifpoll_register(ifp);
1618 else
1619 ifpoll_deregister(ifp);
1620 }
1621#endif
9c095379 1622
984263bc
MD
1623 ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1624 (new_flags &~ IFF_CANTCHANGE);
984263bc
MD
1625 if (new_flags & IFF_PPROMISC) {
1626 /* Permanently promiscuous mode requested */
1627 ifp->if_flags |= IFF_PROMISC;
1628 } else if (ifp->if_pcount == 0) {
1629 ifp->if_flags &= ~IFF_PROMISC;
1630 }
78195a76 1631 if (ifp->if_ioctl) {
a3dd34d2 1632 ifnet_serialize_all(ifp);
87de5057 1633 ifp->if_ioctl(ifp, cmd, data, cred);
a3dd34d2 1634 ifnet_deserialize_all(ifp);
78195a76 1635 }
984263bc
MD
1636 getmicrotime(&ifp->if_lastchange);
1637 break;
1638
1639 case SIOCSIFCAP:
895c1f85 1640 error = priv_check_cred(cred, PRIV_ROOT, 0);
984263bc 1641 if (error)
9683f229
MD
1642 break;
1643 if (ifr->ifr_reqcap & ~ifp->if_capabilities) {
1644 error = EINVAL;
1645 break;
1646 }
a3dd34d2 1647 ifnet_serialize_all(ifp);
87de5057 1648 ifp->if_ioctl(ifp, cmd, data, cred);
a3dd34d2 1649 ifnet_deserialize_all(ifp);
984263bc
MD
1650 break;
1651
f23061d4 1652 case SIOCSIFNAME:
895c1f85 1653 error = priv_check_cred(cred, PRIV_ROOT, 0);
9683f229
MD
1654 if (error)
1655 break;
f23061d4 1656 error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
9683f229
MD
1657 if (error)
1658 break;
1659 if (new_name[0] == '\0') {
1660 error = EINVAL;
1661 break;
1662 }
1663 if (ifunit(new_name) != NULL) {
1664 error = EEXIST;
1665 break;
1666 }
f2bd8b67
JS
1667
1668 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
f23061d4
JH
1669
1670 /* Announce the departure of the interface. */
1671 rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1672
1673 strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
b2632176 1674 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
f23061d4
JH
1675 /* XXX IFA_LOCK(ifa); */
1676 sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1677 namelen = strlen(new_name);
1678 onamelen = sdl->sdl_nlen;
1679 /*
1680 * Move the address if needed. This is safe because we
1681 * allocate space for a name of length IFNAMSIZ when we
1682 * create this in if_attach().
1683 */
1684 if (namelen != onamelen) {
1685 bcopy(sdl->sdl_data + onamelen,
1686 sdl->sdl_data + namelen, sdl->sdl_alen);
1687 }
1688 bcopy(new_name, sdl->sdl_data, namelen);
1689 sdl->sdl_nlen = namelen;
1690 sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
1691 bzero(sdl->sdl_data, onamelen);
1692 while (namelen != 0)
1693 sdl->sdl_data[--namelen] = 0xff;
1694 /* XXX IFA_UNLOCK(ifa) */
f2bd8b67
JS
1695
1696 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
f23061d4
JH
1697
1698 /* Announce the return of the interface. */
1699 rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
1700 break;
1fdf0954 1701
984263bc 1702 case SIOCSIFMETRIC:
895c1f85 1703 error = priv_check_cred(cred, PRIV_ROOT, 0);
984263bc 1704 if (error)
9683f229 1705 break;
984263bc
MD
1706 ifp->if_metric = ifr->ifr_metric;
1707 getmicrotime(&ifp->if_lastchange);
1708 break;
1709
1710 case SIOCSIFPHYS:
895c1f85 1711 error = priv_check_cred(cred, PRIV_ROOT, 0);
984263bc 1712 if (error)
9683f229
MD
1713 break;
1714 if (ifp->if_ioctl == NULL) {
1715 error = EOPNOTSUPP;
1716 break;
1717 }
a3dd34d2 1718 ifnet_serialize_all(ifp);
87de5057 1719 error = ifp->if_ioctl(ifp, cmd, data, cred);
a3dd34d2 1720 ifnet_deserialize_all(ifp);
984263bc
MD
1721 if (error == 0)
1722 getmicrotime(&ifp->if_lastchange);
9683f229 1723 break;
984263bc
MD
1724
1725 case SIOCSIFMTU:
1726 {
1727 u_long oldmtu = ifp->if_mtu;
1728
895c1f85 1729 error = priv_check_cred(cred, PRIV_ROOT, 0);
984263bc 1730 if (error)
9683f229
MD
1731 break;
1732 if (ifp->if_ioctl == NULL) {
1733 error = EOPNOTSUPP;
1734 break;
1735 }
1736 if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) {
1737 error = EINVAL;
1738 break;
1739 }
a3dd34d2 1740 ifnet_serialize_all(ifp);
87de5057 1741 error = ifp->if_ioctl(ifp, cmd, data, cred);
a3dd34d2 1742 ifnet_deserialize_all(ifp);
984263bc
MD
1743 if (error == 0) {
1744 getmicrotime(&ifp->if_lastchange);
1745 rt_ifmsg(ifp);
1746 }
1747 /*
1748 * If the link MTU changed, do network layer specific procedure.
1749 */
1750 if (ifp->if_mtu != oldmtu) {
1751#ifdef INET6
1752 nd6_setmtu(ifp);
1753#endif
1754 }
9683f229 1755 break;
984263bc
MD
1756 }
1757
e41e61d5
SZ
1758 case SIOCSIFTSOLEN:
1759 error = priv_check_cred(cred, PRIV_ROOT, 0);
1760 if (error)
1761 break;
1762
1763 /* XXX need driver supplied upper limit */
1764 if (ifr->ifr_tsolen <= 0) {
1765 error = EINVAL;
1766 break;
1767 }
1768 ifp->if_tsolen = ifr->ifr_tsolen;
1769 break;
1770
984263bc
MD
1771 case SIOCADDMULTI:
1772 case SIOCDELMULTI:
895c1f85 1773 error = priv_check_cred(cred, PRIV_ROOT, 0);
984263bc 1774 if (error)
9683f229 1775 break;
984263bc
MD
1776
1777 /* Don't allow group membership on non-multicast interfaces. */
9683f229
MD
1778 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
1779 error = EOPNOTSUPP;
1780 break;
1781 }
984263bc
MD
1782
1783 /* Don't let users screw up protocols' entries. */
9683f229
MD
1784 if (ifr->ifr_addr.sa_family != AF_LINK) {
1785 error = EINVAL;
1786 break;
1787 }
984263bc
MD
1788
1789 if (cmd == SIOCADDMULTI) {
1790 struct ifmultiaddr *ifma;
1791 error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
1792 } else {
1793 error = if_delmulti(ifp, &ifr->ifr_addr);
1794 }
1795 if (error == 0)
1796 getmicrotime(&ifp->if_lastchange);
9683f229 1797 break;
984263bc
MD
1798
1799 case SIOCSIFPHYADDR:
1800 case SIOCDIFPHYADDR:
1801#ifdef INET6
1802 case SIOCSIFPHYADDR_IN6:
1803#endif
1804 case SIOCSLIFPHYADDR:
1805 case SIOCSIFMEDIA:
1806 case SIOCSIFGENERIC:
895c1f85 1807 error = priv_check_cred(cred, PRIV_ROOT, 0);
984263bc 1808 if (error)
9683f229
MD
1809 break;
1810 if (ifp->if_ioctl == 0) {
1811 error = EOPNOTSUPP;
1812 break;
1813 }
a3dd34d2 1814 ifnet_serialize_all(ifp);
87de5057 1815 error = ifp->if_ioctl(ifp, cmd, data, cred);
a3dd34d2 1816 ifnet_deserialize_all(ifp);
984263bc
MD
1817 if (error == 0)
1818 getmicrotime(&ifp->if_lastchange);
9683f229 1819 break;
984263bc
MD
1820
1821 case SIOCGIFSTATUS:
1822 ifs = (struct ifstat *)data;
1823 ifs->ascii[0] = '\0';
9683f229 1824 /* fall through */
984263bc
MD
1825 case SIOCGIFPSRCADDR:
1826 case SIOCGIFPDSTADDR:
1827 case SIOCGLIFPHYADDR:
1828 case SIOCGIFMEDIA:
1829 case SIOCGIFGENERIC:
9683f229
MD
1830 if (ifp->if_ioctl == NULL) {
1831 error = EOPNOTSUPP;
1832 break;
1833 }
a3dd34d2 1834 ifnet_serialize_all(ifp);
87de5057 1835 error = ifp->if_ioctl(ifp, cmd, data, cred);
a3dd34d2 1836 ifnet_deserialize_all(ifp);
9683f229 1837 break;
984263bc
MD
1838
1839 case SIOCSIFLLADDR:
895c1f85 1840 error = priv_check_cred(cred, PRIV_ROOT, 0);
984263bc 1841 if (error)
9683f229
MD
1842 break;
1843 error = if_setlladdr(ifp, ifr->ifr_addr.sa_data,
1844 ifr->ifr_addr.sa_len);
19f10c78 1845 EVENTHANDLER_INVOKE(iflladdr_event, ifp);
9683f229 1846 break;
984263bc
MD
1847
1848 default:
1849 oif_flags = ifp->if_flags;
9683f229
MD
1850 if (so->so_proto == 0) {
1851 error = EOPNOTSUPP;
1852 break;
1853 }
984263bc 1854#ifndef COMPAT_43
04951810 1855 error = so_pru_control_direct(so, cmd, data, ifp);
984263bc 1856#else
9683f229 1857 ocmd = cmd;
984263bc
MD
1858
1859 switch (cmd) {
984263bc
MD
1860 case SIOCSIFDSTADDR:
1861 case SIOCSIFADDR:
1862 case SIOCSIFBRDADDR:
1863 case SIOCSIFNETMASK:
1864#if BYTE_ORDER != BIG_ENDIAN
1865 if (ifr->ifr_addr.sa_family == 0 &&
1866 ifr->ifr_addr.sa_len < 16) {
1867 ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
1868 ifr->ifr_addr.sa_len = 16;
1869 }
1870#else
1871 if (ifr->ifr_addr.sa_len == 0)
1872 ifr->ifr_addr.sa_len = 16;
1873#endif
1874 break;
984263bc
MD
1875 case OSIOCGIFADDR:
1876 cmd = SIOCGIFADDR;
1877 break;
984263bc
MD
1878 case OSIOCGIFDSTADDR:
1879 cmd = SIOCGIFDSTADDR;
1880 break;
984263bc
MD
1881 case OSIOCGIFBRDADDR:
1882 cmd = SIOCGIFBRDADDR;
1883 break;
984263bc
MD
1884 case OSIOCGIFNETMASK:
1885 cmd = SIOCGIFNETMASK;
9683f229
MD
1886 break;
1887 default:
1888 break;
984263bc 1889 }
984263bc 1890
002c1265
MD
1891 error = so_pru_control_direct(so, cmd, data, ifp);
1892
1893 switch (ocmd) {
984263bc
MD
1894 case OSIOCGIFADDR:
1895 case OSIOCGIFDSTADDR:
1896 case OSIOCGIFBRDADDR:
1897 case OSIOCGIFNETMASK:
1898 *(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
002c1265 1899 break;
984263bc 1900 }
984263bc
MD
1901#endif /* COMPAT_43 */
1902
1903 if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
1904#ifdef INET6
1905 DELAY(100);/* XXX: temporary workaround for fxp issue*/
1906 if (ifp->if_flags & IFF_UP) {
4986965b 1907 crit_enter();
984263bc 1908 in6_if_up(ifp);
4986965b 1909 crit_exit();
984263bc
MD
1910 }
1911#endif
1912 }
9683f229 1913 break;
984263bc 1914 }
9683f229
MD
1915
1916 mtx_unlock(&ifp->if_ioctl_mtx);
1917 return (error);
984263bc
MD
1918}
1919
1920/*
1921 * Set/clear promiscuous mode on interface ifp based on the truth value
1922 * of pswitch. The calls are reference counted so that only the first
1923 * "on" request actually has an effect, as does the final "off" request.
1924 * Results are undefined if the "off" and "on" requests are not matched.
1925 */
1926int
f23061d4 1927ifpromisc(struct ifnet *ifp, int pswitch)
984263bc
MD
1928{
1929 struct ifreq ifr;
1930 int error;
1931 int oldflags;
1932
1933 oldflags = ifp->if_flags;
46f25451 1934 if (ifp->if_flags & IFF_PPROMISC) {
984263bc
MD
1935 /* Do nothing if device is in permanently promiscuous mode */
1936 ifp->if_pcount += pswitch ? 1 : -1;
1937 return (0);
1938 }
1939 if (pswitch) {
1940 /*
1941 * If the device is not configured up, we cannot put it in
1942 * promiscuous mode.
1943 */
1944 if ((ifp->if_flags & IFF_UP) == 0)
1945 return (ENETDOWN);
1946 if (ifp->if_pcount++ != 0)
1947 return (0);
1948 ifp->if_flags |= IFF_PROMISC;
3e4a09e7
MD
1949 log(LOG_INFO, "%s: promiscuous mode enabled\n",
1950 ifp->if_xname);
984263bc
MD
1951 } else {
1952 if (--ifp->if_pcount > 0)
1953 return (0);
1954 ifp->if_flags &= ~IFF_PROMISC;
3e4a09e7
MD
1955 log(LOG_INFO, "%s: promiscuous mode disabled\n",
1956 ifp->if_xname);
984263bc
MD
1957 }
1958 ifr.ifr_flags = ifp->if_flags;
46f25451 1959 ifr.ifr_flagshigh = ifp->if_flags >> 16;
a3dd34d2
SZ
1960 ifnet_serialize_all(ifp);
1961 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, NULL);
1962 ifnet_deserialize_all(ifp);
984263bc
MD
1963 if (error == 0)
1964 rt_ifmsg(ifp);
1965 else
1966 ifp->if_flags = oldflags;
1967 return error;
1968}
1969
1970/*
1971 * Return interface configuration
1972 * of system. List may be used
1973 * in later ioctl's (above) to get
1974 * other information.
1975 */
984263bc 1976static int
87de5057 1977ifconf(u_long cmd, caddr_t data, struct ucred *cred)
984263bc 1978{
41c20dac
MD
1979 struct ifconf *ifc = (struct ifconf *)data;
1980 struct ifnet *ifp;
984263bc
MD
1981 struct sockaddr *sa;
1982 struct ifreq ifr, *ifrp;
1983 int space = ifc->ifc_len, error = 0;
1984
1985 ifrp = ifc->ifc_req;
1986 TAILQ_FOREACH(ifp, &ifnet, if_link) {
b2632176 1987 struct ifaddr_container *ifac;
3e4a09e7 1988 int addrs;
984263bc 1989
f23061d4 1990 if (space <= sizeof ifr)
984263bc 1991 break;
623c059e
JS
1992
1993 /*
95f018e8
MD
1994 * Zero the stack declared structure first to prevent
1995 * memory disclosure.
623c059e 1996 */
95f018e8 1997 bzero(&ifr, sizeof(ifr));
3e4a09e7
MD
1998 if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
1999 >= sizeof(ifr.ifr_name)) {
984263bc
MD
2000 error = ENAMETOOLONG;
2001 break;
984263bc
MD
2002 }
2003
2004 addrs = 0;
b2632176
SZ
2005 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
2006 struct ifaddr *ifa = ifac->ifa;
2007
f23061d4 2008 if (space <= sizeof ifr)
984263bc
MD
2009 break;
2010 sa = ifa->ifa_addr;
87de5057
MD
2011 if (cred->cr_prison &&
2012 prison_if(cred, sa))
984263bc
MD
2013 continue;
2014 addrs++;
2015#ifdef COMPAT_43
2016 if (cmd == OSIOCGIFCONF) {
2017 struct osockaddr *osa =
2018 (struct osockaddr *)&ifr.ifr_addr;
2019 ifr.ifr_addr = *sa;
2020 osa->sa_family = sa->sa_family;
f23061d4 2021 error = copyout(&ifr, ifrp, sizeof ifr);
984263bc
MD
2022 ifrp++;
2023 } else
2024#endif
2025 if (sa->sa_len <= sizeof(*sa)) {
2026 ifr.ifr_addr = *sa;
f23061d4 2027 error = copyout(&ifr, ifrp, sizeof ifr);
984263bc
MD
2028 ifrp++;
2029 } else {
f23061d4 2030 if (space < (sizeof ifr) + sa->sa_len -
984263bc
MD
2031 sizeof(*sa))
2032 break;
2033 space -= sa->sa_len - sizeof(*sa);
f23061d4
JH
2034 error = copyout(&ifr, ifrp,
2035 sizeof ifr.ifr_name);
984263bc 2036 if (error == 0)
f23061d4
JH
2037 error = copyout(sa, &ifrp->ifr_addr,
2038 sa->sa_len);
984263bc
MD
2039 ifrp = (struct ifreq *)
2040 (sa->sa_len + (caddr_t)&ifrp->ifr_addr);
2041 }
2042 if (error)
2043 break;
f23061d4 2044 space -= sizeof ifr;
984263bc
MD
2045 }
2046 if (error)
2047 break;
2048 if (!addrs) {
f23061d4
JH
2049 bzero(&ifr.ifr_addr, sizeof ifr.ifr_addr);
2050 error = copyout(&ifr, ifrp, sizeof ifr);
984263bc
MD
2051 if (error)
2052 break;
f23061d4 2053 space -= sizeof ifr;
984263bc
MD
2054 ifrp++;
2055 }
2056 }
2057 ifc->ifc_len -= space;
2058 return (error);
2059}
2060
2061/*
2062 * Just like if_promisc(), but for all-multicast-reception mode.
2063 */
2064int
f23061d4 2065if_allmulti(struct ifnet *ifp, int onswitch)
984263bc
MD
2066{
2067 int error = 0;
984263bc
MD
2068 struct ifreq ifr;
2069
4986965b
JS
2070 crit_enter();
2071
984263bc
MD
2072 if (onswitch) {
2073 if (ifp->if_amcount++ == 0) {
2074 ifp->if_flags |= IFF_ALLMULTI;
2075 ifr.ifr_flags = ifp->if_flags;
46f25451 2076 ifr.ifr_flagshigh = ifp->if_flags >> 16;
a3dd34d2 2077 ifnet_serialize_all(ifp);
bd4539cc 2078 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2038fb68 2079 NULL);
a3dd34d2 2080 ifnet_deserialize_all(ifp);
984263bc
MD
2081 }
2082 } else {
2083 if (ifp->if_amcount > 1) {
2084 ifp->if_amcount--;
2085 } else {
2086 ifp->if_amcount = 0;
2087 ifp->if_flags &= ~IFF_ALLMULTI;
2088 ifr.ifr_flags = ifp->if_flags;
46f25451 2089 ifr.ifr_flagshigh = ifp->if_flags >> 16;
a3dd34d2 2090 ifnet_serialize_all(ifp);
bd4539cc 2091 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2038fb68 2092 NULL);
a3dd34d2 2093 ifnet_deserialize_all(ifp);
984263bc
MD
2094 }
2095 }
4986965b
JS
2096
2097 crit_exit();
984263bc
MD
2098
2099 if (error == 0)
2100 rt_ifmsg(ifp);
2101 return error;
2102}
2103
2104/*
2105 * Add a multicast listenership to the interface in question.
2106 * The link layer provides a routine which converts
2107 */
2108int
f23061d4
JH
2109if_addmulti(
2110 struct ifnet *ifp, /* interface to manipulate */
2111 struct sockaddr *sa, /* address to add */
2112 struct ifmultiaddr **retifma)
984263bc
MD
2113{
2114 struct sockaddr *llsa, *dupsa;
4986965b 2115 int error;
984263bc
MD
2116 struct ifmultiaddr *ifma;
2117
2118 /*
2119 * If the matching multicast address already exists
2120 * then don't add a new one, just add a reference
2121 */
441d34b2 2122 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
0c3c561c 2123 if (sa_equal(sa, ifma->ifma_addr)) {
984263bc
MD
2124 ifma->ifma_refcount++;
2125 if (retifma)
2126 *retifma = ifma;
2127 return 0;
2128 }
2129 }
2130
2131 /*
2132 * Give the link layer a chance to accept/reject it, and also
2133 * find out which AF_LINK address this maps to, if it isn't one
2134 * already.
2135 */
2136 if (ifp->if_resolvemulti) {
a3dd34d2 2137 ifnet_serialize_all(ifp);
984263bc 2138 error = ifp->if_resolvemulti(ifp, &llsa, sa);
a3dd34d2 2139 ifnet_deserialize_all(ifp);
78195a76
MD
2140 if (error)
2141 return error;
984263bc 2142 } else {
4090d6ff 2143 llsa = NULL;
984263bc
MD
2144 }
2145
884717e1
SW
2146 ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_WAITOK);
2147 dupsa = kmalloc(sa->sa_len, M_IFMADDR, M_WAITOK);
984263bc
MD
2148 bcopy(sa, dupsa, sa->sa_len);
2149
2150 ifma->ifma_addr = dupsa;
2151 ifma->ifma_lladdr = llsa;
2152 ifma->ifma_ifp = ifp;
2153 ifma->ifma_refcount = 1;
2154 ifma->ifma_protospec = 0;
2155 rt_newmaddrmsg(RTM_NEWMADDR, ifma);
2156
2157 /*
2158 * Some network interfaces can scan the address list at
2159 * interrupt time; lock them out.
2160 */
4986965b 2161 crit_enter();
441d34b2 2162 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
4986965b 2163 crit_exit();
6cd0715f
RP
2164 if (retifma)
2165 *retifma = ifma;
984263bc 2166
4090d6ff 2167 if (llsa != NULL) {
441d34b2 2168 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
0c3c561c 2169 if (sa_equal(ifma->ifma_addr, llsa))
984263bc
MD
2170 break;
2171 }
2172 if (ifma) {
2173 ifma->ifma_refcount++;
2174 } else {
884717e1
SW
2175 ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_WAITOK);
2176 dupsa = kmalloc(llsa->sa_len, M_IFMADDR, M_WAITOK);
984263bc
MD
2177 bcopy(llsa, dupsa, llsa->sa_len);
2178 ifma->ifma_addr = dupsa;
2179 ifma->ifma_ifp = ifp;
2180 ifma->ifma_refcount = 1;
4986965b 2181 crit_enter();
441d34b2 2182 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
4986965b 2183 crit_exit();
984263bc
MD
2184 }
2185 }
2186 /*
2187 * We are certain we have added something, so call down to the
2188 * interface to let them know about it.
2189 */
4986965b 2190 crit_enter();
a3dd34d2 2191 ifnet_serialize_all(ifp);
6cd0715f
RP
2192 if (ifp->if_ioctl)
2193 ifp->if_ioctl(ifp, SIOCADDMULTI, 0, NULL);
a3dd34d2 2194 ifnet_deserialize_all(ifp);
4986965b 2195 crit_exit();
984263bc
MD
2196
2197 return 0;
2198}
2199
2200/*
2201 * Remove a reference to a multicast address on this interface. Yell
2202 * if the request does not match an existing membership.
2203 */
2204int
f23061d4 2205if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
984263bc
MD
2206{
2207 struct ifmultiaddr *ifma;
984263bc 2208
441d34b2 2209 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
0c3c561c 2210 if (sa_equal(sa, ifma->ifma_addr))
984263bc 2211 break;
4090d6ff 2212 if (ifma == NULL)
984263bc
MD
2213 return ENOENT;
2214
2215 if (ifma->ifma_refcount > 1) {
2216 ifma->ifma_refcount--;
2217 return 0;
2218 }
2219
2220 rt_newmaddrmsg(RTM_DELMADDR, ifma);
2221 sa = ifma->ifma_lladdr;
4986965b 2222 crit_enter();
441d34b2 2223 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
984263bc
MD
2224 /*
2225 * Make sure the interface driver is notified
2226 * in the case of a link layer mcast group being left.
2227 */
4090d6ff 2228 if (ifma->ifma_addr->sa_family == AF_LINK && sa == NULL) {
a3dd34d2 2229 ifnet_serialize_all(ifp);
2038fb68 2230 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL);
a3dd34d2 2231 ifnet_deserialize_all(ifp);
78195a76 2232 }
4986965b 2233 crit_exit();
efda3bd0
MD
2234 kfree(ifma->ifma_addr, M_IFMADDR);
2235 kfree(ifma, M_IFMADDR);
4090d6ff 2236 if (sa == NULL)
984263bc
MD
2237 return 0;
2238
2239 /*
2240 * Now look for the link-layer address which corresponds to
2241 * this network address. It had been squirreled away in
2242 * ifma->ifma_lladdr for this purpose (so we don't have
2243 * to call ifp->if_resolvemulti() again), and we saved that
2244 * value in sa above. If some nasty deleted the
2245 * link-layer address out from underneath us, we can deal because
2246 * the address we stored was is not the same as the one which was
2247 * in the record for the link-layer address. (So we don't complain
2248 * in that case.)
2249 */
441d34b2 2250 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
0c3c561c 2251 if (sa_equal(sa, ifma->ifma_addr))
984263bc 2252 break;
4090d6ff 2253 if (ifma == NULL)
984263bc
MD
2254 return 0;
2255
2256 if (ifma->ifma_refcount > 1) {
2257 ifma->ifma_refcount--;
2258 return 0;
2259 }
2260
4986965b 2261 crit_enter();
a3dd34d2 2262 ifnet_serialize_all(ifp);
441d34b2 2263 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
2038fb68 2264 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL);
a3dd34d2 2265 ifnet_deserialize_all(ifp);
4986965b 2266 crit_exit();
efda3bd0
MD
2267 kfree(ifma->ifma_addr, M_IFMADDR);
2268 kfree(sa, M_IFMADDR);
2269 kfree(ifma, M_IFMADDR);
984263bc
MD
2270
2271 return 0;
2272}
2273
2274/*
3976c93a
RP
2275 * Delete all multicast group membership for an interface.
2276 * Should be used to quickly flush all multicast filters.
2277 */
2278void
2279if_delallmulti(struct ifnet *ifp)
2280{
2281 struct ifmultiaddr *ifma;
2282 struct ifmultiaddr *next;
2283
441d34b2 2284 TAILQ_FOREACH_MUTABLE(ifma, &ifp->if_multiaddrs, ifma_link, next)
3976c93a
RP
2285 if_delmulti(ifp, ifma->ifma_addr);
2286}
2287
2288
2289/*
984263bc
MD
2290 * Set the link layer address on an interface.
2291 *
2292 * At this time we only support certain types of interfaces,
2293 * and we don't allow the length of the address to change.
2294 */
2295int
2296if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
2297{
2298 struct sockaddr_dl *sdl;
984263bc
MD
2299 struct ifreq ifr;
2300
f2682cb9 2301 sdl = IF_LLSOCKADDR(ifp);
984263bc
MD
2302 if (sdl == NULL)
2303 return (EINVAL);
2304 if (len != sdl->sdl_alen) /* don't allow length to change */
2305 return (EINVAL);
2306 switch (ifp->if_type) {
2307 case IFT_ETHER: /* these types use struct arpcom */
984263bc 2308 case IFT_XETHER:
984263bc
MD
2309 case IFT_L2VLAN:
2310 bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len);
984263bc
MD
2311 bcopy(lladdr, LLADDR(sdl), len);
2312 break;
2313 default:
2314 return (ENODEV);
2315 }
2316 /*
2317 * If the interface is already up, we need
2318 * to re-init it in order to reprogram its
2319 * address filter.
2320 */
a3dd34d2 2321 ifnet_serialize_all(ifp);
984263bc 2322 if ((ifp->if_flags & IFF_UP) != 0) {
c97d9b76 2323#ifdef INET
b2632176 2324 struct ifaddr_container *ifac;
c97d9b76 2325#endif
b2632176 2326
984263bc
MD
2327 ifp->if_flags &= ~IFF_UP;
2328 ifr.ifr_flags = ifp->if_flags;
46f25451 2329 ifr.ifr_flagshigh = ifp->if_flags >> 16;
78195a76 2330 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2038fb68 2331 NULL);
984263bc
MD
2332 ifp->if_flags |= IFF_UP;
2333 ifr.ifr_flags = ifp->if_flags;
46f25451 2334 ifr.ifr_flagshigh = ifp->if_flags >> 16;
78195a76 2335 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2038fb68 2336 NULL);
984263bc
MD
2337#ifdef INET
2338 /*
2339 * Also send gratuitous ARPs to notify other nodes about
2340 * the address change.
2341 */
b2632176
SZ
2342 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
2343 struct ifaddr *ifa = ifac->ifa;
2344
984263bc
MD
2345 if (ifa->ifa_addr != NULL &&
2346 ifa->ifa_addr->sa_family == AF_INET)
69b66ae8 2347 arp_gratuitous(ifp, ifa);
984263bc
MD
2348 }
2349#endif
2350 }
a3dd34d2 2351 ifnet_deserialize_all(ifp);
984263bc
MD
2352 return (0);
2353}
2354
2355struct ifmultiaddr *
f23061d4 2356ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp)
984263bc
MD
2357{
2358 struct ifmultiaddr *ifma;
2359
441d34b2 2360 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
0c3c561c 2361 if (sa_equal(ifma->ifma_addr, sa))
984263bc
MD
2362 break;
2363
2364 return ifma;
2365}
2366
1550dfd9 2367/*
e9bd1548
MD
2368 * This function locates the first real ethernet MAC from a network
2369 * card and loads it into node, returning 0 on success or ENOENT if
2370 * no suitable interfaces were found. It is used by the uuid code to
2371 * generate a unique 6-byte number.
2372 */
2373int
2374if_getanyethermac(uint16_t *node, int minlen)
2375{
2376 struct ifnet *ifp;
2377 struct sockaddr_dl *sdl;
2378
2379 TAILQ_FOREACH(ifp, &ifnet, if_link) {
2380 if (ifp->if_type != IFT_ETHER)
2381 continue;
2382 sdl = IF_LLSOCKADDR(ifp);
2383 if (sdl->sdl_alen < minlen)
2384 continue;
2385 bcopy(((struct arpcom *)ifp->if_softc)->ac_enaddr, node,
2386 minlen);
2387 return(0);
2388 }
2389 return (ENOENT);
2390}
2391
2392/*
1550dfd9
MD
2393 * The name argument must be a pointer to storage which will last as
2394 * long as the interface does. For physical devices, the result of
2395 * device_get_name(dev) is a good choice and for pseudo-devices a
2396 * static string works well.
2397 */
2398void
2399if_initname(struct ifnet *ifp, const char *name, int unit)
2400{
3e4a09e7
MD
2401 ifp->if_dname = name;
2402 ifp->if_dunit = unit;
1550dfd9 2403 if (unit != IF_DUNIT_NONE)
f8c7a42d 2404 ksnprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
1550dfd9
MD
2405 else
2406 strlcpy(ifp->if_xname, name, IFNAMSIZ);
2407}
2408
984263bc
MD
2409int
2410if_printf(struct ifnet *ifp, const char *fmt, ...)
2411{
e2565a42 2412 __va_list ap;
984263bc
MD
2413 int retval;
2414
4b1cf444 2415 retval = kprintf("%s: ", ifp->if_xname);
e2565a42 2416 __va_start(ap, fmt);
379210cb 2417 retval += kvprintf(fmt, ap);
e2565a42 2418 __va_end(ap);
984263bc
MD
2419 return (retval);
2420}
2421
cb80735c
RP
2422struct ifnet *
2423if_alloc(uint8_t type)
2424{
2425 struct ifnet *ifp;
7e395935 2426 size_t size;
cb80735c 2427
7e395935
MD
2428 /*
2429 * XXX temporary hack until arpcom is setup in if_l2com
2430 */
2431 if (type == IFT_ETHER)
2432 size = sizeof(struct arpcom);
2433 else
2434 size = sizeof(struct ifnet);
2435
2436 ifp = kmalloc(size, M_IFNET, M_WAITOK|M_ZERO);
cb80735c
RP
2437
2438 ifp->if_type = type;
2439
aeb3c11e
RP
2440 if (if_com_alloc[type] != NULL) {
2441 ifp->if_l2com = if_com_alloc[type](type, ifp);
2442 if (ifp->if_l2com == NULL) {
2443 kfree(ifp, M_IFNET);
2444 return (NULL);
2445 }
2446 }
cb80735c
RP
2447 return (ifp);
2448}
2449
2450void
2451if_free(struct ifnet *ifp)
2452{
2453 kfree(ifp, M_IFNET);
2454}
2455
b2f93efe
JS
2456void
2457ifq_set_classic(struct ifaltq *ifq)
2458{
2cc2f639
SZ
2459 ifq_set_methods(ifq, ifq->altq_ifp->if_mapsubq,
2460 ifsq_classic_enqueue, ifsq_classic_dequeue, ifsq_classic_request);
f0a26983
SZ
2461}
2462
2463void
2cc2f639
SZ
2464ifq_set_methods(struct ifaltq *ifq, altq_mapsubq_t mapsubq,
2465 ifsq_enqueue_t enqueue, ifsq_dequeue_t dequeue, ifsq_request_t request)
f0a26983
SZ
2466{
2467 int q;
2468
2cc2f639
SZ
2469 KASSERT(mapsubq != NULL, ("mapsubq is not specified"));
2470 KASSERT(enqueue != NULL, ("enqueue is not specified"));
2471 KASSERT(dequeue != NULL, ("dequeue is not specified"));
2472 KASSERT(request != NULL, ("request is not specified"));
2473
2474 ifq->altq_mapsubq = mapsubq;
f0a26983
SZ
2475 for (q = 0; q < ifq->altq_subq_cnt; ++q) {
2476 struct ifaltq_subque *ifsq = &ifq->altq_subq[q];
2477
2478 ifsq->ifsq_enqueue = enqueue;
2479 ifsq->ifsq_dequeue = dequeue;
2480 ifsq->ifsq_request = request;
2481 }
b2f93efe
JS
2482}
2483
9db4b353 2484int
f0a26983
SZ
2485ifsq_classic_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m,
2486 struct altq_pktattr *pa __unused)
e3e4574a 2487{
f0a26983 2488 if (IF_QFULL(ifsq)) {
e3e4574a
JS
2489 m_freem(m);
2490 return(ENOBUFS);
2491 } else {
f0a26983 2492 IF_ENQUEUE(ifsq, m);
e3e4574a
JS
2493 return(0);
2494 }
2495}
2496
9db4b353 2497struct mbuf *
f0a26983 2498ifsq_classic_dequeue(struct ifaltq_subque *ifsq, struct mbuf *mpolled, int op)
e3e4574a
JS
2499{
2500 struct mbuf *m;
2501
2502 switch (op) {
2503 case ALTDQ_POLL:
f0a26983 2504 IF_POLL(ifsq, m);
e3e4574a
JS
2505 break;
2506 case ALTDQ_REMOVE:
f0a26983 2507 IF_DEQUEUE(ifsq, m);
e3e4574a
JS
2508 break;
2509 default:
2510 panic("unsupported ALTQ dequeue op: %d", op);
2511 }
d2c71fa0 2512 KKASSERT(mpolled == NULL || mpolled == m);
e3e4574a
JS
2513 return(m);
2514}
2515
9db4b353 2516int
f0a26983 2517ifsq_classic_request(struct ifaltq_subque *ifsq, int req, void *arg)
e3e4574a
JS
2518{
2519 switch (req) {
2520 case ALTRQ_PURGE:
f0a26983 2521 IF_DRAIN(ifsq);
e3e4574a
JS
2522 break;
2523 default:
3f625015 2524 panic("unsupported ALTQ request: %d", req);
e3e4574a 2525 }
e3e4574a
JS
2526 return(0);
2527}
b2632176 2528
28cc0c29 2529static void
f0a26983 2530ifsq_ifstart_try(struct ifaltq_subque *ifsq, int force_sched)
28cc0c29 2531{
f0a26983 2532 struct ifnet *ifp = ifsq_get_ifp(ifsq);
28cc0c29
SZ
2533 int running = 0, need_sched;
2534
2535 /*
2536 * Try to do direct ifnet.if_start first, if there is
2537 * contention on ifnet's serializer, ifnet.if_start will
2538 * be scheduled on ifnet's CPU.
2539 */
3c4cd924 2540 if (!ifnet_tryserialize_tx(ifp, ifsq)) {
28cc0c29
SZ
2541 /*
2542 * ifnet serializer contention happened,
2543 * ifnet.if_start is scheduled on ifnet's
2544 * CPU, and we keep going.
2545 */
f0a26983 2546 ifsq_ifstart_schedule(ifsq, 1);
28cc0c29
SZ
2547 return;
2548 }
2549
f0a26983
SZ
2550 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) {
2551 ifp->if_start(ifp, ifsq);
2552 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq))
28cc0c29
SZ
2553 running = 1;
2554 }
f0a26983 2555 need_sched = ifsq_ifstart_need_schedule(ifsq, running);
28cc0c29 2556
3c4cd924 2557 ifnet_deserialize_tx(ifp, ifsq);
28cc0c29
SZ
2558
2559 if (need_sched) {
2560 /*
2561 * More data need to be transmitted, ifnet.if_start is
2562 * scheduled on ifnet's CPU, and we keep going.
2563 * NOTE: ifnet.if_start interlock is not released.
2564 */
f0a26983 2565 ifsq_ifstart_schedule(ifsq, force_sched);
28cc0c29
SZ
2566 }
2567}
2568
2aa7f7f8 2569/*
f0a26983 2570 * IFSUBQ packets staging mechanism:
2aa7f7f8 2571 *
f0a26983 2572 * The packets enqueued into IFSUBQ are staged to a certain amount before the
2aa7f7f8
SZ
2573 * ifnet's if_start is called. In this way, the driver could avoid writing
2574 * to hardware registers upon every packet, instead, hardware registers
2575 * could be written when certain amount of packets are put onto hardware
2576 * TX ring. The measurement on several modern NICs (emx(4), igb(4), bnx(4),
2577 * bge(4), jme(4)) shows that the hardware registers writing aggregation
2578 * could save ~20% CPU time when 18bytes UDP datagrams are transmitted at
2579 * 1.48Mpps. The performance improvement by hardware registers writing
2580 * aggeregation is also mentioned by Luigi Rizzo's netmap paper
2581 * (http://info.iet.unipi.it/~luigi/netmap/).
2582 *
f0a26983 2583 * IFSUBQ packets staging is performed for two entry points into drivers's
2aa7f7f8 2584 * transmission function:
f0a26983
SZ
2585 * - Direct ifnet's if_start calling, i.e. ifsq_ifstart_try()
2586 * - ifnet's if_start scheduling, i.e. ifsq_ifstart_schedule()
2aa7f7f8 2587 *
f0a26983 2588 * IFSUBQ packets staging will be stopped upon any of the following conditions:
2aa7f7f8 2589 * - If the count of packets enqueued on the current CPU is great than or
f0a26983 2590 * equal to ifsq_stage_cntmax. (XXX this should be per-interface)
2aa7f7f8
SZ
2591 * - If the total length of packets enqueued on the current CPU is great
2592 * than or equal to the hardware's MTU - max_protohdr. max_protohdr is
2593 * cut from the hardware's MTU mainly bacause a full TCP segment's size
2594 * is usually less than hardware's MTU.
f0a26983 2595 * - ifsq_ifstart_schedule() is not pending on the current CPU and if_start
2aa7f7f8
SZ
2596 * interlock (if_snd.altq_started) is not released.
2597 * - The if_start_rollup(), which is registered as low priority netisr
2598 * rollup function, is called; probably because no more work is pending
2599 * for netisr.
2600 *
2601 * NOTE:
f0a26983 2602 * Currently IFSUBQ packet staging is only performed in netisr threads.
2aa7f7f8 2603 */
9db4b353
SZ
2604int
2605ifq_dispatch(struct ifnet *ifp, struct mbuf *m, struct altq_pktattr *pa)
2606{
2607 struct ifaltq *ifq = &ifp->if_snd;
f0a26983 2608 struct ifaltq_subque *ifsq;
28cc0c29 2609 int error, start = 0, len, mcast = 0, avoid_start = 0;
f0a26983
SZ
2610 struct ifsubq_stage_head *head = NULL;
2611 struct ifsubq_stage *stage = NULL;
57dff79c 2612
2cc2f639 2613 ifsq = ifq_map_subq(ifq, mycpuid);
3c4cd924 2614 ASSERT_IFNET_NOT_SERIALIZED_TX(ifp, ifsq);
9db4b353 2615
fe53d127
SZ
2616 len = m->m_pkthdr.len;
2617 if (m->m_flags & M_MCAST)
2618 mcast = 1;
2619
28cc0c29 2620 if (curthread->td_type == TD_TYPE_NETISR) {
f0a26983
SZ
2621 head = &ifsubq_stage_heads[mycpuid];
2622 stage = ifsq_get_stage(ifsq, mycpuid);
28cc0c29 2623
f0a26983
SZ
2624 stage->stg_cnt++;
2625 stage->stg_len += len;
2626 if (stage->stg_cnt < ifsq_stage_cntmax &&
2627 stage->stg_len < (ifp->if_mtu - max_protohdr))
28cc0c29
SZ
2628 avoid_start = 1;
2629 }
2630
f0a26983
SZ
2631 ALTQ_SQ_LOCK(ifsq);
2632 error = ifsq_enqueue_locked(ifsq, m, pa);
9db4b353 2633 if (error) {
f0a26983
SZ
2634 if (!ifsq_data_ready(ifsq)) {
2635 ALTQ_SQ_UNLOCK(ifsq);
087561ef
SZ
2636 return error;
2637 }
28cc0c29 2638 avoid_start = 0;
9db4b353 2639 }
f0a26983 2640 if (!ifsq_is_started(ifsq)) {
28cc0c29 2641 if (avoid_start) {
f0a26983 2642 ALTQ_SQ_UNLOCK(ifsq);
28cc0c29
SZ
2643
2644 KKASSERT(!error);
f0a26983
SZ
2645 if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0)
2646 ifsq_stage_insert(head, stage);
28cc0c29
SZ
2647
2648 ifp->if_obytes += len;
2649 if (mcast)
2650 ifp->if_omcasts++;
28cc0c29
SZ
2651 return error;
2652 }
2653
9db4b353
SZ
2654 /*
2655 * Hold the interlock of ifnet.if_start
2656 */
f0a26983 2657 ifsq_set_started(ifsq);
9db4b353
SZ
2658 start = 1;
2659 }
f0a26983 2660 ALTQ_SQ_UNLOCK(ifsq);
9db4b353 2661
fe53d127
SZ
2662 if (!error) {
2663 ifp->if_obytes += len;
2664 if (mcast)
2665 ifp->if_omcasts++;
2666 }
9db4b353 2667
28cc0c29 2668 if (stage != NULL) {
f0a26983
SZ
2669 if (!start && (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED)) {
2670 KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED);
3cab6b0d 2671 if (!avoid_start) {
f0a26983
SZ
2672 ifsq_stage_remove(head, stage);
2673 ifsq_ifstart_schedule(ifsq, 1);
3cab6b0d
SZ
2674 }
2675 return error;
2676 }
2677
f0a26983
SZ
2678 if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED) {
2679 ifsq_stage_remove(head, stage);
28cc0c29 2680 } else {
f0a26983
SZ
2681 stage->stg_cnt = 0;
2682 stage->stg_len = 0;
28cc0c29 2683 }
9db4b353
SZ
2684 }
2685
f0a26983 2686 if (!start)
087561ef 2687 return error;
9db4b353 2688
f0a26983 2689 ifsq_ifstart_try(ifsq, 0);
087561ef 2690 return error;
9db4b353
SZ
2691}
2692
b2632176
SZ
2693void *
2694ifa_create(int size, int flags)
2695{
2696 struct ifaddr *ifa;
2697 int i;
2698
ed20d0e3 2699 KASSERT(size >= sizeof(*ifa), ("ifaddr size too small"));
b2632176
SZ
2700
2701 ifa = kmalloc(size, M_IFADDR, flags | M_ZERO);
2702 if (ifa == NULL)
2703 return NULL;
2704
2705 ifa->ifa_containers = kmalloc(ncpus * sizeof(struct ifaddr_container),
2706 M_IFADDR, M_WAITOK | M_ZERO);
d5a2b87c 2707 ifa->ifa_ncnt = ncpus;
b2632176
SZ
2708 for (i = 0; i < ncpus; ++i) {
2709 struct ifaddr_container *ifac = &ifa->ifa_containers[i];
2710
2711 ifac->ifa_magic = IFA_CONTAINER_MAGIC;
2712 ifac->ifa = ifa;
2713 ifac->ifa_refcnt = 1;
2714 }
2715#ifdef IFADDR_DEBUG
2716 kprintf("alloc ifa %p %d\n", ifa, size);
2717#endif
2718 return ifa;
2719}
2720
b2632176
SZ
2721void
2722ifac_free(struct ifaddr_container *ifac, int cpu_id)
2723{
d5a2b87c 2724 struct ifaddr *ifa = ifac->ifa;
b2632176
SZ
2725
2726 KKASSERT(ifac->ifa_magic == IFA_CONTAINER_MAGIC);
2727 KKASSERT(ifac->ifa_refcnt == 0);
40f667f2 2728 KASSERT(ifac->ifa_listmask == 0,
ed20d0e3 2729 ("ifa is still on %#x lists", ifac->ifa_listmask));
b2632176
SZ
2730
2731 ifac->ifa_magic = IFA_CONTAINER_DEAD;
2732
b2632176 2733#ifdef IFADDR_DEBUG_VERBOSE
8967ddc7 2734 kprintf("try free ifa %p cpu_id %d\n", ifac->ifa, cpu_id);
b2632176
SZ
2735#endif
2736
d5a2b87c 2737 KASSERT(ifa->ifa_ncnt > 0 && ifa->ifa_ncnt <= ncpus,
ed20d0e3 2738 ("invalid # of ifac, %d", ifa->ifa_ncnt));
d5a2b87c
SZ
2739 if (atomic_fetchadd_int(&ifa->ifa_ncnt, -1) == 1) {
2740#ifdef IFADDR_DEBUG
2741 kprintf("free ifa %p\n", ifa);
2742#endif
2743 kfree(ifa->ifa_containers, M_IFADDR);
2744 kfree(ifa, M_IFADDR);
2745 }
b2632176
SZ
2746}
2747
2748static void
002c1265 2749ifa_iflink_dispatch(netmsg_t nmsg)
b2632176
SZ
2750{
2751 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
2752 struct ifaddr *ifa = msg->ifa;
2753 struct ifnet *ifp = msg->ifp;
2754 int cpu = mycpuid;
40f667f2 2755 struct ifaddr_container *ifac;
b2632176
SZ
2756
2757 crit_enter();
23027d35 2758
40f667f2 2759 ifac = &ifa->ifa_containers[cpu];
2adb7bc2 2760 ASSERT_IFAC_VALID(ifac);
40f667f2 2761 KASSERT((ifac->ifa_listmask & IFA_LIST_IFADDRHEAD) == 0,
ed20d0e3 2762 ("ifaddr is on if_addrheads"));
23027d35 2763
40f667f2
SZ
2764 ifac->ifa_listmask |= IFA_LIST_IFADDRHEAD;
2765 if (msg->tail)
2766 TAILQ_INSERT_TAIL(&ifp->if_addrheads[cpu], ifac, ifa_link);
2767 else
2768 TAILQ_INSERT_HEAD(&ifp->if_addrheads[cpu], ifac, ifa_link);
23027d35 2769
b2632176
SZ
2770 crit_exit();
2771
002c1265 2772 ifa_forwardmsg(&nmsg->lmsg, cpu + 1);
b2632176
SZ
2773}
2774
2775void
2776ifa_iflink(struct ifaddr *ifa, struct ifnet *ifp, int tail)
2777{
2778 struct netmsg_ifaddr msg;
2779
002c1265 2780 netmsg_init(&msg.base, NULL, &curthread->td_msgport,
48e7b118 2781 0, ifa_iflink_dispatch);
b2632176
SZ
2782 msg.ifa = ifa;
2783 msg.ifp = ifp;
2784 msg.tail = tail;
2785
002c1265 2786 ifa_domsg(&msg.base.lmsg, 0);
b2632176
SZ
2787}
2788
2789static void
002c1265 2790ifa_ifunlink_dispatch(netmsg_t nmsg)
b2632176
SZ
2791{
2792 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
2793 struct ifaddr *ifa = msg->ifa;
2794 struct ifnet *ifp = msg->ifp;
2795 int cpu = mycpuid;
40f667f2 2796 struct ifaddr_container *ifac;
b2632176
SZ
2797
2798 crit_enter();
23027d35 2799
40f667f2 2800 ifac = &ifa->ifa_containers[cpu];
2adb7bc2 2801 ASSERT_IFAC_VALID(ifac);
40f667f2 2802 KASSERT(ifac->ifa_listmask & IFA_LIST_IFADDRHEAD,
ed20d0e3 2803 ("ifaddr is not on if_addrhead"));
23027d35 2804
40f667f2
SZ
2805 TAILQ_REMOVE(&ifp->if_addrheads[cpu], ifac, ifa_link);
2806 ifac->ifa_listmask &= ~IFA_LIST_IFADDRHEAD;
23027d35 2807
b2632176
SZ
2808 crit_exit();
2809
002c1265 2810 ifa_forwardmsg(&nmsg->lmsg, cpu + 1);
b2632176
SZ
2811}
2812
2813void
2814ifa_ifunlink(struct ifaddr *ifa, struct ifnet *ifp)
2815{
2816 struct netmsg_ifaddr msg;
2817
002c1265 2818 netmsg_init(&msg.base, NULL, &curthread->td_msgport,
48e7b118 2819 0, ifa_ifunlink_dispatch);
b2632176
SZ
2820 msg.ifa = ifa;
2821 msg.ifp = ifp;
2822
002c1265 2823 ifa_domsg(&msg.base.lmsg, 0);
b2632176
SZ
2824}
2825
2826static void
002c1265 2827ifa_destroy_dispatch(netmsg_t nmsg)
b2632176
SZ
2828{
2829 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
2830
2831 IFAFREE(msg->ifa);
002c1265 2832 ifa_forwardmsg(&nmsg->lmsg, mycpuid + 1);
b2632176
SZ
2833}
2834
2835void
2836ifa_destroy(struct ifaddr *ifa)
2837{
2838 struct netmsg_ifaddr msg;
2839
002c1265 2840 netmsg_init(&msg.base, NULL, &curthread->td_msgport,
48e7b118 2841 0, ifa_destroy_dispatch);
b2632176
SZ
2842 msg.ifa = ifa;
2843
002c1265 2844 ifa_domsg(&msg.base.lmsg, 0);
b2632176
SZ
2845}
2846
2847struct lwkt_port *
d7944f0b 2848ifnet_portfn(int cpu)
b2632176 2849{
90af4fd3 2850 return &ifnet_threads[cpu].td_msgport;
b2632176
SZ
2851}
2852
c4882b7e
SZ
2853void
2854ifnet_forwardmsg(struct lwkt_msg *lmsg, int next_cpu)
2855{
ea2e6532
SZ
2856 KKASSERT(next_cpu > mycpuid && next_cpu <= ncpus);
2857
c4882b7e
SZ
2858 if (next_cpu < ncpus)
2859 lwkt_forwardmsg(ifnet_portfn(next_cpu), lmsg);
2860 else
2861 lwkt_replymsg(lmsg, 0);
2862}
2863
2a3e1dbd 2864int
c4882b7e
SZ
2865ifnet_domsg(struct lwkt_msg *lmsg, int cpu)
2866{
2867 KKASSERT(cpu < ncpus);
2a3e1dbd 2868 return lwkt_domsg(ifnet_portfn(cpu), lmsg, 0);
c4882b7e
SZ
2869}
2870
8967ddc7
SZ
2871void
2872ifnet_sendmsg(struct lwkt_msg *lmsg, int cpu)
2873{
2874 KKASSERT(cpu < ncpus);
2875 lwkt_sendmsg(ifnet_portfn(cpu), lmsg);
2876}
2877
c3c96e44
MD
2878/*
2879 * Generic netmsg service loop. Some protocols may roll their own but all
2880 * must do the basic command dispatch function call done here.
2881 */
2882static void
2883ifnet_service_loop(void *arg __unused)
2884{
002c1265 2885 netmsg_t msg;
c3c96e44
MD
2886
2887 while ((msg = lwkt_waitport(&curthread->td_msgport, 0))) {
002c1265
MD
2888 KASSERT(msg->base.nm_dispatch, ("ifnet_service: badmsg"));
2889 msg->base.nm_dispatch(msg);
c3c96e44
MD
2890 }
2891}
2892
239bdb58
SZ
2893static void
2894if_start_rollup(void)
2895{
f0a26983
SZ
2896 struct ifsubq_stage_head *head = &ifsubq_stage_heads[mycpuid];
2897 struct ifsubq_stage *stage;
28cc0c29 2898
f0a26983
SZ
2899 while ((stage = TAILQ_FIRST(&head->stg_head)) != NULL) {
2900 struct ifaltq_subque *ifsq = stage->stg_subq;
3cab6b0d 2901 int is_sched = 0;
28cc0c29 2902
f0a26983 2903 if (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED)
3cab6b0d 2904 is_sched = 1;
f0a26983 2905 ifsq_stage_remove(head, stage);
28cc0c29 2906
3cab6b0d 2907 if (is_sched) {
f0a26983 2908 ifsq_ifstart_schedule(ifsq, 1);
3cab6b0d
SZ
2909 } else {
2910 int start = 0;
28cc0c29 2911
f0a26983
SZ
2912 ALTQ_SQ_LOCK(ifsq);
2913 if (!ifsq_is_started(ifsq)) {
3cab6b0d
SZ
2914 /*
2915 * Hold the interlock of ifnet.if_start
2916 */
f0a26983 2917 ifsq_set_started(ifsq);
3cab6b0d
SZ
2918 start = 1;
2919 }
f0a26983 2920 ALTQ_SQ_UNLOCK(ifsq);
3cab6b0d
SZ
2921
2922 if (start)
f0a26983 2923 ifsq_ifstart_try(ifsq, 1);
3cab6b0d 2924 }
f0a26983
SZ
2925 KKASSERT((stage->stg_flags &
2926 (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0);
28cc0c29 2927 }
239bdb58 2928}
239bdb58 2929
b2632176 2930static void
90af4fd3 2931ifnetinit(void *dummy __unused)
b2632176
SZ
2932{
2933 int i;
2934
2935 for (i = 0; i < ncpus; ++i) {
90af4fd3 2936 struct thread *thr = &ifnet_threads[i];
b2632176 2937
c3c96e44 2938 lwkt_create(ifnet_service_loop, NULL, NULL,
4643740a 2939 thr, TDF_NOSTART|TDF_FORCE_SPINPORT,
392cd266 2940 i, "ifnet %d", i);
b2632176 2941 netmsg_service_port_init(&thr->td_msgport);
c3c96e44 2942 lwkt_schedule(thr);
b2632176 2943 }
28cc0c29
SZ
2944
2945 for (i = 0; i < ncpus; ++i)
f0a26983 2946 TAILQ_INIT(&ifsubq_stage_heads[i].stg_head);
239bdb58 2947 netisr_register_rollup(if_start_rollup, NETISR_ROLLUP_PRIO_IFSTART);
b2632176 2948}
bd08b792
RP
2949
2950struct ifnet *
2951ifnet_byindex(unsigned short idx)
2952{
2953 if (idx > if_index)
2954 return NULL;
2955 return ifindex2ifnet[idx];
2956}
2957
2958struct ifaddr *
2959ifaddr_byindex(unsigned short idx)
2960{
2961 struct ifnet *ifp;
2962
2963 ifp = ifnet_byindex(idx);
ec27babc
RP
2964 if (!ifp)
2965 return NULL;
d79c4535 2966 return TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
bd08b792 2967}
aeb3c11e
RP
2968
2969void
2970if_register_com_alloc(u_char type,
2971 if_com_alloc_t *a, if_com_free_t *f)
2972{
2973
2974 KASSERT(if_com_alloc[type] == NULL,
2975 ("if_register_com_alloc: %d already registered", type));
2976 KASSERT(if_com_free[type] == NULL,
2977 ("if_register_com_alloc: %d free already registered", type));
2978
2979 if_com_alloc[type] = a;
2980 if_com_free[type] = f;
2981}
2982
2983void
2984if_deregister_com_alloc(u_char type)
2985{
2986
2987 KASSERT(if_com_alloc[type] != NULL,
2988 ("if_deregister_com_alloc: %d not registered", type));
2989 KASSERT(if_com_free[type] != NULL,
2990 ("if_deregister_com_alloc: %d free not registered", type));
2991 if_com_alloc[type] = NULL;
2992 if_com_free[type] = NULL;
2993}
a317449e
SZ
2994
2995int
2996if_ring_count2(int cnt, int cnt_max)
2997{
2998 int shift = 0;
2999
3000 KASSERT(cnt_max >= 1 && powerof2(cnt_max),
ed20d0e3 3001 ("invalid ring count max %d", cnt_max));
a317449e
SZ
3002
3003 if (cnt <= 0)
3004 cnt = cnt_max;
3005 if (cnt > ncpus2)
3006 cnt = ncpus2;
3007 if (cnt > cnt_max)
3008 cnt = cnt_max;
3009
3010 while ((1 << (shift + 1)) <= cnt)
3011 ++shift;
3012 cnt = 1 << shift;
3013
3014 KASSERT(cnt >= 1 && cnt <= ncpus2 && cnt <= cnt_max,
ed20d0e3 3015 ("calculate cnt %d, ncpus2 %d, cnt max %d",
a317449e
SZ
3016 cnt, ncpus2, cnt_max));
3017 return cnt;
3018}
b7a0c958
SZ
3019
3020void
3021ifq_set_maxlen(struct ifaltq *ifq, int len)
3022{
f0a26983 3023 ifq->altq_maxlen = len + (ncpus * ifsq_stage_cntmax);
b7a0c958 3024}
2cc2f639
SZ
3025
3026int
3027ifq_mapsubq_default(struct ifaltq *ifq __unused, int cpuid __unused)
3028{
3029 return ALTQ_SUBQ_INDEX_DEFAULT;
3030}
8a248085 3031
c3fb75dd
SZ
3032int
3033ifq_mapsubq_mask(struct ifaltq *ifq, int cpuid)
3034{
3035 return (cpuid & ifq->altq_subq_mask);
3036}
3037
8a248085
SZ
3038static void
3039ifsq_watchdog(void *arg)
3040{
3041 struct ifsubq_watchdog *wd = arg;
3042 struct ifnet *ifp;
3043
3044 if (__predict_true(wd->wd_timer == 0 || --wd->wd_timer))
3045 goto done;
3046
3047 ifp = ifsq_get_ifp(wd->wd_subq);
3048 if (ifnet_tryserialize_all(ifp)) {
3049 wd->wd_watchdog(wd->wd_subq);
3050 ifnet_deserialize_all(ifp);
3051 } else {
3052 /* try again next timeout */
3053 wd->wd_timer = 1;
3054 }
3055done:
3056 ifsq_watchdog_reset(wd);
3057}
3058
3059static void
3060ifsq_watchdog_reset(struct ifsubq_watchdog *wd)
3061{
3062 callout_reset_bycpu(&wd->wd_callout, hz, ifsq_watchdog, wd,
3063 ifsq_get_cpuid(wd->wd_subq));
3064}
3065
3066void
3067ifsq_watchdog_init(struct ifsubq_watchdog *wd, struct ifaltq_subque *ifsq,
3068 ifsq_watchdog_t watchdog)
3069{
3070 callout_init_mp(&wd->wd_callout);
3071 wd->wd_timer = 0;
3072 wd->wd_subq = ifsq;
3073 wd->wd_watchdog = watchdog;
3074}
3075
3076void
3077ifsq_watchdog_start(struct ifsubq_watchdog *wd)
3078{
3079 wd->wd_timer = 0;
3080 ifsq_watchdog_reset(wd);
3081}
3082
3083void
3084ifsq_watchdog_stop(struct ifsubq_watchdog *wd)
3085{
3086 wd->wd_timer = 0;
3087 callout_stop(&wd->wd_callout);
3088}