route: ensure RTM_IFINFO is sent first when bring interface down/up
[dragonfly.git] / sys / net / if.c
CommitLineData
984263bc
MD
1/*
2 * Copyright (c) 1980, 1986, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
dc71b7ab 13 * 3. Neither the name of the University nor the names of its contributors
984263bc
MD
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * @(#)if.c 8.3 (Berkeley) 1/4/94
f23061d4 30 * $FreeBSD: src/sys/net/if.c,v 1.185 2004/03/13 02:35:03 brooks Exp $
984263bc
MD
31 */
32
984263bc
MD
33#include "opt_inet6.h"
34#include "opt_inet.h"
b3a7093f 35#include "opt_ifpoll.h"
984263bc
MD
36
37#include <sys/param.h>
38#include <sys/malloc.h>
39#include <sys/mbuf.h>
40#include <sys/systm.h>
41#include <sys/proc.h>
895c1f85 42#include <sys/priv.h>
6b6e0885 43#include <sys/protosw.h>
984263bc
MD
44#include <sys/socket.h>
45#include <sys/socketvar.h>
6b6e0885 46#include <sys/socketops.h>
984263bc 47#include <sys/kernel.h>
9db4b353 48#include <sys/ktr.h>
9683f229 49#include <sys/mutex.h>
233c8570 50#include <sys/lock.h>
984263bc
MD
51#include <sys/sockio.h>
52#include <sys/syslog.h>
53#include <sys/sysctl.h>
698ac46c 54#include <sys/domain.h>
e9cb6d99 55#include <sys/thread.h>
78195a76 56#include <sys/serialize.h>
71fc104f 57#include <sys/bus.h>
e1c6b0c1 58#include <sys/jail.h>
984263bc 59
9683f229
MD
60#include <sys/thread2.h>
61#include <sys/msgport2.h>
62#include <sys/mutex2.h>
63
984263bc
MD
64#include <net/if.h>
65#include <net/if_arp.h>
66#include <net/if_dl.h>
67#include <net/if_types.h>
68#include <net/if_var.h>
afc5d5f3 69#include <net/if_ringmap.h>
4d723e5a 70#include <net/ifq_var.h>
984263bc
MD
71#include <net/radix.h>
72#include <net/route.h>
65a24520 73#include <net/if_clone.h>
5337421c 74#include <net/netisr2.h>
b2632176
SZ
75#include <net/netmsg2.h>
76
d5a2b87c 77#include <machine/atomic.h>
984263bc 78#include <machine/stdarg.h>
b2632176 79#include <machine/smp.h>
984263bc
MD
80
81#if defined(INET) || defined(INET6)
984263bc
MD
82#include <netinet/in.h>
83#include <netinet/in_var.h>
84#include <netinet/if_ether.h>
85#ifdef INET6
984263bc
MD
86#include <netinet6/in6_var.h>
87#include <netinet6/in6_ifattach.h>
233c8570
AL
88#endif /* INET6 */
89#endif /* INET || INET6 */
984263bc 90
b2632176 91struct netmsg_ifaddr {
002c1265 92 struct netmsg_base base;
b2632176
SZ
93 struct ifaddr *ifa;
94 struct ifnet *ifp;
95 int tail;
96};
97
f0a26983
SZ
98struct ifsubq_stage_head {
99 TAILQ_HEAD(, ifsubq_stage) stg_head;
28cc0c29
SZ
100} __cachealign;
101
68732d8f
SZ
102struct if_ringmap {
103 int rm_cnt;
104 int rm_grid;
105 int rm_cpumap[];
106};
107
434f3dd0
SZ
108#define RINGMAP_FLAG_NONE 0x0
109#define RINGMAP_FLAG_POWEROF2 0x1
110
984263bc
MD
111/*
112 * System initialization
113 */
698ac46c
HS
114static void if_attachdomain(void *);
115static void if_attachdomain1(struct ifnet *);
436c57ea
SZ
116static int ifconf(u_long, caddr_t, struct ucred *);
117static void ifinit(void *);
90af4fd3 118static void ifnetinit(void *);
436c57ea 119static void if_slowtimo(void *);
3ffea39d 120static void link_rtrequest(int, struct rtentry *);
436c57ea 121static int if_rtdel(struct radix_node *, void *);
b5df1a85 122static void if_slowtimo_dispatch(netmsg_t);
984263bc 123
8a248085
SZ
124/* Helper functions */
125static void ifsq_watchdog_reset(struct ifsubq_watchdog *);
72659ed0 126static int if_delmulti_serialized(struct ifnet *, struct sockaddr *);
b4051e25
SZ
127static struct ifnet_array *ifnet_array_alloc(int);
128static void ifnet_array_free(struct ifnet_array *);
129static struct ifnet_array *ifnet_array_add(struct ifnet *,
130 const struct ifnet_array *);
131static struct ifnet_array *ifnet_array_del(struct ifnet *,
132 const struct ifnet_array *);
233c8570
AL
133static struct ifg_group *if_creategroup(const char *);
134static int if_destroygroup(struct ifg_group *);
135static int if_delgroup_locked(struct ifnet *, const char *);
136static int if_getgroups(struct ifgroupreq *, struct ifnet *);
137static int if_getgroupmembers(struct ifgroupreq *);
8a248085 138
984263bc
MD
139#ifdef INET6
140/*
141 * XXX: declare here to avoid to include many inet6 related files..
142 * should be more generalized?
143 */
436c57ea 144extern void nd6_setmtu(struct ifnet *);
984263bc
MD
145#endif
146
436c57ea
SZ
147SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
148SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
68732d8f 149SYSCTL_NODE(_net_link, OID_AUTO, ringmap, CTLFLAG_RW, 0, "link ringmap");
436c57ea 150
335a88d5 151static int ifsq_stage_cntmax = 16;
f0a26983 152TUNABLE_INT("net.link.stage_cntmax", &ifsq_stage_cntmax);
28cc0c29 153SYSCTL_INT(_net_link, OID_AUTO, stage_cntmax, CTLFLAG_RW,
f0a26983 154 &ifsq_stage_cntmax, 0, "ifq staging packet count max");
28cc0c29 155
6517ec3f
SZ
156static int if_stats_compat = 0;
157SYSCTL_INT(_net_link, OID_AUTO, stats_compat, CTLFLAG_RW,
158 &if_stats_compat, 0, "Compat the old ifnet stats");
159
68732d8f
SZ
160static int if_ringmap_dumprdr = 0;
161SYSCTL_INT(_net_link_ringmap, OID_AUTO, dump_rdr, CTLFLAG_RW,
162 &if_ringmap_dumprdr, 0, "dump redirect table");
163
f3f3eadb 164SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, ifinit, NULL);
3c5b1eb8 165SYSINIT(ifnet, SI_SUB_PRE_DRIVERS, SI_ORDER_ANY, ifnetinit, NULL);
436c57ea 166
2949c680
AL
167static if_com_alloc_t *if_com_alloc[256];
168static if_com_free_t *if_com_free[256];
aeb3c11e 169
436c57ea
SZ
170MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
171MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
cb80735c 172MALLOC_DEFINE(M_IFNET, "ifnet", "interface structure");
984263bc 173
436c57ea 174int ifqmaxlen = IFQ_MAXLEN;
b64bfcc3 175struct ifnethead ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
233c8570
AL
176struct ifgrouphead ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
177static struct lock ifgroup_lock;
984263bc 178
b4051e25
SZ
179static struct ifnet_array ifnet_array0;
180static struct ifnet_array *ifnet_array = &ifnet_array0;
181
b5df1a85
SZ
182static struct callout if_slowtimo_timer;
183static struct netmsg_base if_slowtimo_netmsg;
436c57ea
SZ
184
185int if_index = 0;
186struct ifnet **ifindex2ifnet = NULL;
cabfc9f6 187static struct mtx ifnet_mtx = MTX_INITIALIZER("ifnet");
abbb44bb 188
f0a26983 189static struct ifsubq_stage_head ifsubq_stage_heads[MAXCPU];
28cc0c29 190
f0a26983 191#ifdef notyet
9db4b353 192#define IFQ_KTR_STRING "ifq=%p"
2949c680 193#define IFQ_KTR_ARGS struct ifaltq *ifq
9db4b353
SZ
194#ifndef KTR_IFQ
195#define KTR_IFQ KTR_ALL
196#endif
197KTR_INFO_MASTER(ifq);
5bf48697
AE
198KTR_INFO(KTR_IFQ, ifq, enqueue, 0, IFQ_KTR_STRING, IFQ_KTR_ARGS);
199KTR_INFO(KTR_IFQ, ifq, dequeue, 1, IFQ_KTR_STRING, IFQ_KTR_ARGS);
9db4b353
SZ
200#define logifq(name, arg) KTR_LOG(ifq_ ## name, arg)
201
202#define IF_START_KTR_STRING "ifp=%p"
5bf48697 203#define IF_START_KTR_ARGS struct ifnet *ifp
9db4b353
SZ
204#ifndef KTR_IF_START
205#define KTR_IF_START KTR_ALL
206#endif
207KTR_INFO_MASTER(if_start);
208KTR_INFO(KTR_IF_START, if_start, run, 0,
5bf48697 209 IF_START_KTR_STRING, IF_START_KTR_ARGS);
9db4b353 210KTR_INFO(KTR_IF_START, if_start, sched, 1,
5bf48697 211 IF_START_KTR_STRING, IF_START_KTR_ARGS);
9db4b353 212KTR_INFO(KTR_IF_START, if_start, avoid, 2,
5bf48697 213 IF_START_KTR_STRING, IF_START_KTR_ARGS);
9db4b353 214KTR_INFO(KTR_IF_START, if_start, contend_sched, 3,
5bf48697 215 IF_START_KTR_STRING, IF_START_KTR_ARGS);
9db4b353 216KTR_INFO(KTR_IF_START, if_start, chase_sched, 4,
5bf48697 217 IF_START_KTR_STRING, IF_START_KTR_ARGS);
9db4b353 218#define logifstart(name, arg) KTR_LOG(if_start_ ## name, arg)
233c8570 219#endif /* notyet */
315a7da3 220
984263bc
MD
221/*
222 * Network interface utility routines.
223 *
224 * Routines with ifa_ifwith* names take sockaddr *'s as
225 * parameters.
226 */
2949c680 227/* ARGSUSED */
c660ad18 228static void
f23061d4 229ifinit(void *dummy)
984263bc 230{
233c8570 231 lockinit(&ifgroup_lock, "ifgroup", 0, 0);
984263bc 232
b5df1a85
SZ
233 callout_init_mp(&if_slowtimo_timer);
234 netmsg_init(&if_slowtimo_netmsg, NULL, &netisr_adone_rport,
235 MSGF_PRIORITY, if_slowtimo_dispatch);
abbb44bb 236
b5df1a85
SZ
237 /* Start if_slowtimo */
238 lwkt_sendmsg(netisr_cpuport(0), &if_slowtimo_netmsg.lmsg);
984263bc
MD
239}
240
9db4b353 241static void
f0a26983 242ifsq_ifstart_ipifunc(void *arg)
9db4b353 243{
f0a26983
SZ
244 struct ifaltq_subque *ifsq = arg;
245 struct lwkt_msg *lmsg = ifsq_get_ifstart_lmsg(ifsq, mycpuid);
9db4b353
SZ
246
247 crit_enter();
248 if (lmsg->ms_flags & MSGF_DONE)
f6192acf 249 lwkt_sendmsg_oncpu(netisr_cpuport(mycpuid), lmsg);
9db4b353
SZ
250 crit_exit();
251}
252
3cab6b0d 253static __inline void
f0a26983 254ifsq_stage_remove(struct ifsubq_stage_head *head, struct ifsubq_stage *stage)
3cab6b0d 255{
f0a26983
SZ
256 KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED);
257 TAILQ_REMOVE(&head->stg_head, stage, stg_link);
258 stage->stg_flags &= ~(IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED);
259 stage->stg_cnt = 0;
260 stage->stg_len = 0;
3cab6b0d
SZ
261}
262
263static __inline void
f0a26983 264ifsq_stage_insert(struct ifsubq_stage_head *head, struct ifsubq_stage *stage)
3cab6b0d 265{
f0a26983
SZ
266 KKASSERT((stage->stg_flags &
267 (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0);
268 stage->stg_flags |= IFSQ_STAGE_FLAG_QUED;
269 TAILQ_INSERT_TAIL(&head->stg_head, stage, stg_link);
3cab6b0d
SZ
270}
271
9db4b353 272/*
5c593c2a 273 * Schedule ifnet.if_start on the subqueue owner CPU
9db4b353
SZ
274 */
275static void
f0a26983 276ifsq_ifstart_schedule(struct ifaltq_subque *ifsq, int force)
9db4b353 277{
9db4b353
SZ
278 int cpu;
279
3cab6b0d 280 if (!force && curthread->td_type == TD_TYPE_NETISR &&
f0a26983
SZ
281 ifsq_stage_cntmax > 0) {
282 struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid);
283
284 stage->stg_cnt = 0;
285 stage->stg_len = 0;
286 if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0)
287 ifsq_stage_insert(&ifsubq_stage_heads[mycpuid], stage);
288 stage->stg_flags |= IFSQ_STAGE_FLAG_SCHED;
3cab6b0d
SZ
289 return;
290 }
291
f0a26983 292 cpu = ifsq_get_cpuid(ifsq);
9db4b353 293 if (cpu != mycpuid)
f0a26983 294 lwkt_send_ipiq(globaldata_find(cpu), ifsq_ifstart_ipifunc, ifsq);
9db4b353 295 else
f0a26983 296 ifsq_ifstart_ipifunc(ifsq);
9db4b353
SZ
297}
298
299/*
300 * NOTE:
5c593c2a
SZ
301 * This function will release ifnet.if_start subqueue interlock,
302 * if ifnet.if_start for the subqueue does not need to be scheduled
9db4b353
SZ
303 */
304static __inline int
f0a26983 305ifsq_ifstart_need_schedule(struct ifaltq_subque *ifsq, int running)
9db4b353 306{
f0a26983 307 if (!running || ifsq_is_empty(ifsq)
9db4b353 308#ifdef ALTQ
f0a26983 309 || ifsq->ifsq_altq->altq_tbr != NULL
9db4b353
SZ
310#endif
311 ) {
f0a26983 312 ALTQ_SQ_LOCK(ifsq);
9db4b353 313 /*
5c593c2a 314 * ifnet.if_start subqueue interlock is released, if:
9db4b353
SZ
315 * 1) Hardware can not take any packets, due to
316 * o interface is marked down
5c593c2a 317 * o hardware queue is full (ifsq_is_oactive)
9db4b353
SZ
318 * Under the second situation, hardware interrupt
319 * or polling(4) will call/schedule ifnet.if_start
5c593c2a
SZ
320 * on the subqueue when hardware queue is ready
321 * 2) There is no packet in the subqueue.
9db4b353 322 * Further ifq_dispatch or ifq_handoff will call/
5c593c2a 323 * schedule ifnet.if_start on the subqueue.
9db4b353
SZ
324 * 3) TBR is used and it does not allow further
325 * dequeueing.
5c593c2a
SZ
326 * TBR callout will call ifnet.if_start on the
327 * subqueue.
9db4b353 328 */
f0a26983
SZ
329 if (!running || !ifsq_data_ready(ifsq)) {
330 ifsq_clr_started(ifsq);
331 ALTQ_SQ_UNLOCK(ifsq);
9db4b353
SZ
332 return 0;
333 }
f0a26983 334 ALTQ_SQ_UNLOCK(ifsq);
9db4b353
SZ
335 }
336 return 1;
337}
338
339static void
f0a26983 340ifsq_ifstart_dispatch(netmsg_t msg)
9db4b353 341{
002c1265 342 struct lwkt_msg *lmsg = &msg->base.lmsg;
f0a26983
SZ
343 struct ifaltq_subque *ifsq = lmsg->u.ms_resultp;
344 struct ifnet *ifp = ifsq_get_ifp(ifsq);
ac7fc6f0 345 struct globaldata *gd = mycpu;
404c9fd9 346 int running = 0, need_sched;
9db4b353 347
ac7fc6f0
SZ
348 crit_enter_gd(gd);
349
9db4b353 350 lwkt_replymsg(lmsg, 0); /* reply ASAP */
9db4b353 351
ac7fc6f0 352 if (gd->gd_cpuid != ifsq_get_cpuid(ifsq)) {
9db4b353 353 /*
5c593c2a 354 * We need to chase the subqueue owner CPU change.
9db4b353 355 */
f0a26983 356 ifsq_ifstart_schedule(ifsq, 1);
ac7fc6f0 357 crit_exit_gd(gd);
404c9fd9 358 return;
9db4b353 359 }
9db4b353 360
bfefe4a6 361 ifsq_serialize_hw(ifsq);
f0a26983
SZ
362 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) {
363 ifp->if_start(ifp, ifsq);
364 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq))
404c9fd9 365 running = 1;
9db4b353 366 }
f0a26983 367 need_sched = ifsq_ifstart_need_schedule(ifsq, running);
bfefe4a6 368 ifsq_deserialize_hw(ifsq);
404c9fd9
SZ
369
370 if (need_sched) {
2b2f1d64
SZ
371 /*
372 * More data need to be transmitted, ifnet.if_start is
5c593c2a
SZ
373 * scheduled on the subqueue owner CPU, and we keep going.
374 * NOTE: ifnet.if_start subqueue interlock is not released.
2b2f1d64 375 */
f0a26983 376 ifsq_ifstart_schedule(ifsq, 0);
9db4b353 377 }
ac7fc6f0
SZ
378
379 crit_exit_gd(gd);
9db4b353
SZ
380}
381
382/* Device driver ifnet.if_start helper function */
383void
f0a26983 384ifsq_devstart(struct ifaltq_subque *ifsq)
9db4b353 385{
f0a26983 386 struct ifnet *ifp = ifsq_get_ifp(ifsq);
9db4b353
SZ
387 int running = 0;
388
bfefe4a6 389 ASSERT_ALTQ_SQ_SERIALIZED_HW(ifsq);
9db4b353 390
f0a26983
SZ
391 ALTQ_SQ_LOCK(ifsq);
392 if (ifsq_is_started(ifsq) || !ifsq_data_ready(ifsq)) {
393 ALTQ_SQ_UNLOCK(ifsq);
9db4b353
SZ
394 return;
395 }
f0a26983
SZ
396 ifsq_set_started(ifsq);
397 ALTQ_SQ_UNLOCK(ifsq);
9db4b353 398
f0a26983 399 ifp->if_start(ifp, ifsq);
9db4b353 400
f0a26983 401 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq))
9db4b353
SZ
402 running = 1;
403
f0a26983 404 if (ifsq_ifstart_need_schedule(ifsq, running)) {
9db4b353
SZ
405 /*
406 * More data need to be transmitted, ifnet.if_start is
407 * scheduled on ifnet's CPU, and we keep going.
408 * NOTE: ifnet.if_start interlock is not released.
409 */
f0a26983 410 ifsq_ifstart_schedule(ifsq, 0);
9db4b353
SZ
411 }
412}
413
f0a26983
SZ
414void
415if_devstart(struct ifnet *ifp)
416{
417 ifsq_devstart(ifq_get_subq_default(&ifp->if_snd));
418}
419
2dffecda 420/* Device driver ifnet.if_start schedule helper function */
f0a26983
SZ
421void
422ifsq_devstart_sched(struct ifaltq_subque *ifsq)
423{
424 ifsq_ifstart_schedule(ifsq, 1);
425}
426
2dffecda
SZ
427void
428if_devstart_sched(struct ifnet *ifp)
429{
f0a26983 430 ifsq_devstart_sched(ifq_get_subq_default(&ifp->if_snd));
2dffecda
SZ
431}
432
a3dd34d2
SZ
433static void
434if_default_serialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
435{
436 lwkt_serialize_enter(ifp->if_serializer);
437}
438
439static void
440if_default_deserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
441{
442 lwkt_serialize_exit(ifp->if_serializer);
443}
444
445static int
446if_default_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
447{
448 return lwkt_serialize_try(ifp->if_serializer);
449}
450
2c9effcf
SZ
451#ifdef INVARIANTS
452static void
453if_default_serialize_assert(struct ifnet *ifp,
454 enum ifnet_serialize slz __unused,
455 boolean_t serialized)
456{
457 if (serialized)
458 ASSERT_SERIALIZED(ifp->if_serializer);
459 else
460 ASSERT_NOT_SERIALIZED(ifp->if_serializer);
461}
462#endif
463
984263bc 464/*
78195a76
MD
465 * Attach an interface to the list of "active" interfaces.
466 *
5c593c2a 467 * The serializer is optional.
984263bc
MD
468 */
469void
78195a76 470if_attach(struct ifnet *ifp, lwkt_serialize_t serializer)
984263bc 471{
52fbd92a 472 unsigned socksize;
984263bc 473 int namelen, masklen;
b4051e25 474 struct sockaddr_dl *sdl, *sdl_addr;
82ed7fc2 475 struct ifaddr *ifa;
e3e4574a 476 struct ifaltq *ifq;
b4051e25
SZ
477 struct ifnet **old_ifindex2ifnet = NULL;
478 struct ifnet_array *old_ifnet_array;
7d46fb61
SZ
479 int i, q, qlen;
480 char qlenname[64];
590b8cd4 481
984263bc 482 static int if_indexlim = 8;
984263bc 483
a3dd34d2
SZ
484 if (ifp->if_serialize != NULL) {
485 KASSERT(ifp->if_deserialize != NULL &&
2c9effcf
SZ
486 ifp->if_tryserialize != NULL &&
487 ifp->if_serialize_assert != NULL,
ed20d0e3 488 ("serialize functions are partially setup"));
ae474cfa
SZ
489
490 /*
491 * If the device supplies serialize functions,
492 * then clear if_serializer to catch any invalid
493 * usage of this field.
494 */
495 KASSERT(serializer == NULL,
496 ("both serialize functions and default serializer "
ed20d0e3 497 "are supplied"));
ae474cfa 498 ifp->if_serializer = NULL;
a3dd34d2
SZ
499 } else {
500 KASSERT(ifp->if_deserialize == NULL &&
2c9effcf
SZ
501 ifp->if_tryserialize == NULL &&
502 ifp->if_serialize_assert == NULL,
ed20d0e3 503 ("serialize functions are partially setup"));
a3dd34d2
SZ
504 ifp->if_serialize = if_default_serialize;
505 ifp->if_deserialize = if_default_deserialize;
506 ifp->if_tryserialize = if_default_tryserialize;
2c9effcf
SZ
507#ifdef INVARIANTS
508 ifp->if_serialize_assert = if_default_serialize_assert;
509#endif
ae474cfa
SZ
510
511 /*
512 * The serializer can be passed in from the device,
513 * allowing the same serializer to be used for both
514 * the interrupt interlock and the device queue.
515 * If not specified, the netif structure will use an
516 * embedded serializer.
517 */
518 if (serializer == NULL) {
519 serializer = &ifp->if_default_serializer;
520 lwkt_serialize_init(serializer);
521 }
522 ifp->if_serializer = serializer;
a3dd34d2
SZ
523 }
524
984263bc 525 /*
43dbcc2a
SZ
526 * Make if_addrhead available on all CPUs, since they
527 * could be accessed by any threads.
984263bc 528 */
b2632176
SZ
529 ifp->if_addrheads = kmalloc(ncpus * sizeof(struct ifaddrhead),
530 M_IFADDR, M_WAITOK | M_ZERO);
531 for (i = 0; i < ncpus; ++i)
532 TAILQ_INIT(&ifp->if_addrheads[i]);
533
441d34b2 534 TAILQ_INIT(&ifp->if_multiaddrs);
2097a299 535 TAILQ_INIT(&ifp->if_groups);
984263bc 536 getmicrotime(&ifp->if_lastchange);
233c8570 537 if_addgroup(ifp, IFG_ALL);
984263bc
MD
538
539 /*
540 * create a Link Level name for this device
541 */
3e4a09e7 542 namelen = strlen(ifp->if_xname);
60615e94 543 masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
984263bc 544 socksize = masklen + ifp->if_addrlen;
984263bc
MD
545 if (socksize < sizeof(*sdl))
546 socksize = sizeof(*sdl);
4ff4d99f 547 socksize = RT_ROUNDUP(socksize);
52fbd92a 548 ifa = ifa_create(sizeof(struct ifaddr) + 2 * socksize);
b4051e25 549 sdl = sdl_addr = (struct sockaddr_dl *)(ifa + 1);
590b8cd4
JH
550 sdl->sdl_len = socksize;
551 sdl->sdl_family = AF_LINK;
552 bcopy(ifp->if_xname, sdl->sdl_data, namelen);
553 sdl->sdl_nlen = namelen;
590b8cd4 554 sdl->sdl_type = ifp->if_type;
141697b6 555 ifp->if_lladdr = ifa;
590b8cd4
JH
556 ifa->ifa_ifp = ifp;
557 ifa->ifa_rtrequest = link_rtrequest;
558 ifa->ifa_addr = (struct sockaddr *)sdl;
559 sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
560 ifa->ifa_netmask = (struct sockaddr *)sdl;
561 sdl->sdl_len = masklen;
562 while (namelen != 0)
563 sdl->sdl_data[--namelen] = 0xff;
b2632176 564 ifa_iflink(ifa, ifp, 0 /* Insert head */);
984263bc 565
43dbcc2a
SZ
566 /*
567 * Make if_data available on all CPUs, since they could
568 * be updated by hardware interrupt routing, which could
569 * be bound to any CPU.
570 */
62938642
MD
571 ifp->if_data_pcpu = kmalloc(ncpus * sizeof(struct ifdata_pcpu),
572 M_DEVBUF,
573 M_WAITOK | M_ZERO | M_CACHEALIGN);
e1fcdad7 574
2cc2f639
SZ
575 if (ifp->if_mapsubq == NULL)
576 ifp->if_mapsubq = ifq_mapsubq_default;
577
e3e4574a
JS
578 ifq = &ifp->if_snd;
579 ifq->altq_type = 0;
580 ifq->altq_disc = NULL;
581 ifq->altq_flags &= ALTQF_CANTCHANGE;
582 ifq->altq_tbr = NULL;
583 ifq->altq_ifp = ifp;
4d723e5a 584
f0a26983
SZ
585 if (ifq->altq_subq_cnt <= 0)
586 ifq->altq_subq_cnt = 1;
62938642
MD
587 ifq->altq_subq =
588 kmalloc(ifq->altq_subq_cnt * sizeof(struct ifaltq_subque),
589 M_DEVBUF,
590 M_WAITOK | M_ZERO | M_CACHEALIGN);
28cc0c29 591
f0a26983 592 if (ifq->altq_maxlen == 0) {
b21c2105 593 if_printf(ifp, "driver didn't set altq_maxlen\n");
f0a26983 594 ifq_set_maxlen(ifq, ifqmaxlen);
42fdf81e
SZ
595 }
596
7d46fb61
SZ
597 /* Allow user to override driver's setting. */
598 ksnprintf(qlenname, sizeof(qlenname), "net.%s.qlenmax", ifp->if_xname);
599 qlen = -1;
600 TUNABLE_INT_FETCH(qlenname, &qlen);
601 if (qlen > 0) {
602 if_printf(ifp, "qlenmax -> %d\n", qlen);
603 ifq_set_maxlen(ifq, qlen);
604 }
605
f0a26983
SZ
606 for (q = 0; q < ifq->altq_subq_cnt; ++q) {
607 struct ifaltq_subque *ifsq = &ifq->altq_subq[q];
608
609 ALTQ_SQ_LOCK_INIT(ifsq);
610 ifsq->ifsq_index = q;
611
612 ifsq->ifsq_altq = ifq;
613 ifsq->ifsq_ifp = ifp;
614
b21c2105 615 ifsq->ifsq_maxlen = ifq->altq_maxlen;
68dc1916 616 ifsq->ifsq_maxbcnt = ifsq->ifsq_maxlen * MCLBYTES;
f0a26983
SZ
617 ifsq->ifsq_prepended = NULL;
618 ifsq->ifsq_started = 0;
619 ifsq->ifsq_hw_oactive = 0;
620 ifsq_set_cpuid(ifsq, 0);
bfefe4a6
SZ
621 if (ifp->if_serializer != NULL)
622 ifsq_set_hw_serialize(ifsq, ifp->if_serializer);
f0a26983 623
43dbcc2a 624 /* XXX: netisr_ncpus */
f0a26983 625 ifsq->ifsq_stage =
62938642
MD
626 kmalloc(ncpus * sizeof(struct ifsubq_stage),
627 M_DEVBUF,
628 M_WAITOK | M_ZERO | M_CACHEALIGN);
f0a26983
SZ
629 for (i = 0; i < ncpus; ++i)
630 ifsq->ifsq_stage[i].stg_subq = ifsq;
631
43dbcc2a
SZ
632 /*
633 * Allocate one if_start message for each CPU, since
634 * the hardware TX ring could be assigned to any CPU.
635 *
636 * NOTE:
637 * If the hardware TX ring polling CPU and the hardware
638 * TX ring interrupt CPU are same, one if_start message
639 * should be enough.
640 */
f0a26983
SZ
641 ifsq->ifsq_ifstart_nmsg =
642 kmalloc(ncpus * sizeof(struct netmsg_base),
643 M_LWKTMSG, M_WAITOK);
644 for (i = 0; i < ncpus; ++i) {
645 netmsg_init(&ifsq->ifsq_ifstart_nmsg[i], NULL,
646 &netisr_adone_rport, 0, ifsq_ifstart_dispatch);
647 ifsq->ifsq_ifstart_nmsg[i].lmsg.u.ms_resultp = ifsq;
648 }
649 }
650 ifq_set_classic(ifq);
651
ae6d2ace
SZ
652 /*
653 * Increase mbuf cluster/jcluster limits for the mbufs that
654 * could sit on the device queues for quite some time.
655 */
656 if (ifp->if_nmbclusters > 0)
657 mcl_inclimit(ifp->if_nmbclusters);
658 if (ifp->if_nmbjclusters > 0)
659 mjcl_inclimit(ifp->if_nmbjclusters);
660
b4051e25
SZ
661 /*
662 * Install this ifp into ifindex2inet, ifnet queue and ifnet
663 * array after it is setup.
664 *
665 * Protect ifindex2ifnet, ifnet queue and ifnet array changes
666 * by ifnet lock, so that non-netisr threads could get a
667 * consistent view.
668 */
669 ifnet_lock();
670
671 /* Don't update if_index until ifindex2ifnet is setup */
672 ifp->if_index = if_index + 1;
673 sdl_addr->sdl_index = ifp->if_index;
674
675 /*
676 * Install this ifp into ifindex2ifnet
677 */
678 if (ifindex2ifnet == NULL || ifp->if_index >= if_indexlim) {
679 unsigned int n;
680 struct ifnet **q;
681
682 /*
683 * Grow ifindex2ifnet
684 */
685 if_indexlim <<= 1;
686 n = if_indexlim * sizeof(*q);
687 q = kmalloc(n, M_IFADDR, M_WAITOK | M_ZERO);
688 if (ifindex2ifnet != NULL) {
689 bcopy(ifindex2ifnet, q, n/2);
690 /* Free old ifindex2ifnet after sync all netisrs */
691 old_ifindex2ifnet = ifindex2ifnet;
692 }
693 ifindex2ifnet = q;
694 }
695 ifindex2ifnet[ifp->if_index] = ifp;
696 /*
697 * Update if_index after this ifp is installed into ifindex2ifnet,
698 * so that netisrs could get a consistent view of ifindex2ifnet.
699 */
700 cpu_sfence();
701 if_index = ifp->if_index;
702
703 /*
704 * Install this ifp into ifnet array.
705 */
706 /* Free old ifnet array after sync all netisrs */
707 old_ifnet_array = ifnet_array;
708 ifnet_array = ifnet_array_add(ifp, old_ifnet_array);
709
710 /*
711 * Install this ifp into ifnet queue.
712 */
713 TAILQ_INSERT_TAIL(&ifnetlist, ifp, if_link);
714
715 ifnet_unlock();
716
717 /*
718 * Sync all netisrs so that the old ifindex2ifnet and ifnet array
719 * are no longer accessed and we can free them safely later on.
720 */
721 netmsg_service_sync();
722 if (old_ifindex2ifnet != NULL)
723 kfree(old_ifindex2ifnet, M_IFADDR);
724 ifnet_array_free(old_ifnet_array);
725
9c70fe43 726 if (!SLIST_EMPTY(&domains))
698ac46c
HS
727 if_attachdomain1(ifp);
728
984263bc 729 /* Announce the interface. */
8e13abe7
MD
730 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
731 devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
984263bc
MD
732 rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
733}
734
698ac46c
HS
735static void
736if_attachdomain(void *dummy)
737{
738 struct ifnet *ifp;
698ac46c 739
b4051e25
SZ
740 ifnet_lock();
741 TAILQ_FOREACH(ifp, &ifnetlist, if_list)
698ac46c 742 if_attachdomain1(ifp);
b4051e25 743 ifnet_unlock();
698ac46c
HS
744}
745SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
746 if_attachdomain, NULL);
747
748static void
749if_attachdomain1(struct ifnet *ifp)
750{
751 struct domain *dp;
698ac46c 752
4986965b 753 crit_enter();
698ac46c
HS
754
755 /* address family dependent data region */
756 bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
9c70fe43 757 SLIST_FOREACH(dp, &domains, dom_next)
698ac46c
HS
758 if (dp->dom_ifattach)
759 ifp->if_afdata[dp->dom_family] =
760 (*dp->dom_ifattach)(ifp);
4986965b 761 crit_exit();
698ac46c
HS
762}
763
c727e142
SZ
764/*
765 * Purge all addresses whose type is _not_ AF_LINK
766 */
9a74b592
SZ
767static void
768if_purgeaddrs_nolink_dispatch(netmsg_t nmsg)
c727e142 769{
5204e13c 770 struct ifnet *ifp = nmsg->lmsg.u.ms_resultp;
b2632176
SZ
771 struct ifaddr_container *ifac, *next;
772
5204e13c 773 ASSERT_NETISR0;
9a74b592
SZ
774
775 /*
776 * The ifaddr processing in the following loop will block,
777 * however, this function is called in netisr0, in which
778 * ifaddr list changes happen, so we don't care about the
779 * blockness of the ifaddr processing here.
780 */
b2632176
SZ
781 TAILQ_FOREACH_MUTABLE(ifac, &ifp->if_addrheads[mycpuid],
782 ifa_link, next) {
783 struct ifaddr *ifa = ifac->ifa;
c727e142 784
9a74b592
SZ
785 /* Ignore marker */
786 if (ifa->ifa_addr->sa_family == AF_UNSPEC)
787 continue;
788
c727e142
SZ
789 /* Leave link ifaddr as it is */
790 if (ifa->ifa_addr->sa_family == AF_LINK)
791 continue;
792#ifdef INET
793 /* XXX: Ugly!! ad hoc just for INET */
114c8e1b 794 if (ifa->ifa_addr->sa_family == AF_INET) {
c727e142 795 struct ifaliasreq ifr;
0d50e8a2 796 struct sockaddr_in saved_addr, saved_dst;
b2632176
SZ
797#ifdef IFADDR_DEBUG_VERBOSE
798 int i;
799
800 kprintf("purge in4 addr %p: ", ifa);
140920c2
SZ
801 for (i = 0; i < ncpus; ++i) {
802 kprintf("%d ",
803 ifa->ifa_containers[i].ifa_refcnt);
804 }
b2632176
SZ
805 kprintf("\n");
806#endif
c727e142 807
0d50e8a2
SZ
808 /* Save information for panic. */
809 memcpy(&saved_addr, ifa->ifa_addr, sizeof(saved_addr));
810 if (ifa->ifa_dstaddr != NULL) {
811 memcpy(&saved_dst, ifa->ifa_dstaddr,
812 sizeof(saved_dst));
813 } else {
814 memset(&saved_dst, 0, sizeof(saved_dst));
815 }
816
c727e142
SZ
817 bzero(&ifr, sizeof ifr);
818 ifr.ifra_addr = *ifa->ifa_addr;
819 if (ifa->ifa_dstaddr)
820 ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
2501b0ea 821 if (in_control(SIOCDIFADDR, (caddr_t)&ifr, ifp,
c727e142
SZ
822 NULL) == 0)
823 continue;
0d50e8a2
SZ
824
825 /* MUST NOT HAPPEN */
826 panic("%s: in_control failed %x, dst %x", ifp->if_xname,
827 ntohl(saved_addr.sin_addr.s_addr),
828 ntohl(saved_dst.sin_addr.s_addr));
c727e142
SZ
829 }
830#endif /* INET */
831#ifdef INET6
114c8e1b 832 if (ifa->ifa_addr->sa_family == AF_INET6) {
b2632176
SZ
833#ifdef IFADDR_DEBUG_VERBOSE
834 int i;
835
836 kprintf("purge in6 addr %p: ", ifa);
140920c2
SZ
837 for (i = 0; i < ncpus; ++i) {
838 kprintf("%d ",
839 ifa->ifa_containers[i].ifa_refcnt);
840 }
b2632176
SZ
841 kprintf("\n");
842#endif
843
c727e142
SZ
844 in6_purgeaddr(ifa);
845 /* ifp_addrhead is already updated */
846 continue;
847 }
848#endif /* INET6 */
fdd5cc47 849 if_printf(ifp, "destroy ifaddr family %d\n",
be04762d 850 ifa->ifa_addr->sa_family);
b2632176
SZ
851 ifa_ifunlink(ifa, ifp);
852 ifa_destroy(ifa);
c727e142 853 }
9a74b592 854
5204e13c 855 netisr_replymsg(&nmsg->base, 0);
9a74b592
SZ
856}
857
858void
859if_purgeaddrs_nolink(struct ifnet *ifp)
860{
861 struct netmsg_base nmsg;
9a74b592
SZ
862
863 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 0,
864 if_purgeaddrs_nolink_dispatch);
5204e13c
SZ
865 nmsg.lmsg.u.ms_resultp = ifp;
866 netisr_domsg(&nmsg, 0);
c727e142
SZ
867}
868
5804f3d1
SZ
869static void
870ifq_stage_detach_handler(netmsg_t nmsg)
871{
872 struct ifaltq *ifq = nmsg->lmsg.u.ms_resultp;
f0a26983 873 int q;
5804f3d1 874
f0a26983
SZ
875 for (q = 0; q < ifq->altq_subq_cnt; ++q) {
876 struct ifaltq_subque *ifsq = &ifq->altq_subq[q];
877 struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid);
878
879 if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED)
880 ifsq_stage_remove(&ifsubq_stage_heads[mycpuid], stage);
881 }
5804f3d1
SZ
882 lwkt_replymsg(&nmsg->lmsg, 0);
883}
884
885static void
886ifq_stage_detach(struct ifaltq *ifq)
887{
888 struct netmsg_base base;
889 int cpu;
890
891 netmsg_init(&base, NULL, &curthread->td_msgport, 0,
892 ifq_stage_detach_handler);
893 base.lmsg.u.ms_resultp = ifq;
894
43dbcc2a 895 /* XXX netisr_ncpus */
5804f3d1 896 for (cpu = 0; cpu < ncpus; ++cpu)
ec7f7fc8 897 lwkt_domsg(netisr_cpuport(cpu), &base.lmsg, 0);
5804f3d1
SZ
898}
899
a29ef6e8
SZ
900struct netmsg_if_rtdel {
901 struct netmsg_base base;
902 struct ifnet *ifp;
903};
904
905static void
906if_rtdel_dispatch(netmsg_t msg)
907{
908 struct netmsg_if_rtdel *rmsg = (void *)msg;
43dbcc2a 909 int i, cpu;
a29ef6e8
SZ
910
911 cpu = mycpuid;
43dbcc2a
SZ
912 ASSERT_NETISR_NCPUS(cpu);
913
a29ef6e8
SZ
914 for (i = 1; i <= AF_MAX; i++) {
915 struct radix_node_head *rnh;
916
917 if ((rnh = rt_tables[cpu][i]) == NULL)
918 continue;
919 rnh->rnh_walktree(rnh, if_rtdel, rmsg->ifp);
920 }
43dbcc2a 921 netisr_forwardmsg(&msg->base, cpu + 1);
a29ef6e8
SZ
922}
923
984263bc
MD
924/*
925 * Detach an interface, removing it from the
926 * list of "active" interfaces.
927 */
928void
f23061d4 929if_detach(struct ifnet *ifp)
984263bc 930{
b4051e25 931 struct ifnet_array *old_ifnet_array;
233c8570 932 struct ifg_list *ifgl;
a29ef6e8 933 struct netmsg_if_rtdel msg;
698ac46c 934 struct domain *dp;
a29ef6e8 935 int q;
984263bc 936
b4051e25 937 /* Announce that the interface is gone. */
f2bd8b67 938 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
b4051e25
SZ
939 rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
940 devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
941
942 /*
943 * Remove this ifp from ifindex2inet, ifnet queue and ifnet
944 * array before it is whacked.
945 *
946 * Protect ifindex2ifnet, ifnet queue and ifnet array changes
947 * by ifnet lock, so that non-netisr threads could get a
948 * consistent view.
949 */
950 ifnet_lock();
951
952 /*
953 * Remove this ifp from ifindex2ifnet and maybe decrement if_index.
954 */
955 ifindex2ifnet[ifp->if_index] = NULL;
956 while (if_index > 0 && ifindex2ifnet[if_index] == NULL)
957 if_index--;
958
959 /*
960 * Remove this ifp from ifnet queue.
961 */
962 TAILQ_REMOVE(&ifnetlist, ifp, if_link);
963
964 /*
965 * Remove this ifp from ifnet array.
966 */
967 /* Free old ifnet array after sync all netisrs */
968 old_ifnet_array = ifnet_array;
969 ifnet_array = ifnet_array_del(ifp, old_ifnet_array);
970
971 ifnet_unlock();
972
233c8570
AL
973 ifgroup_lockmgr(LK_EXCLUSIVE);
974 while ((ifgl = TAILQ_FIRST(&ifp->if_groups)) != NULL)
975 if_delgroup_locked(ifp, ifgl->ifgl_group->ifg_group);
976 ifgroup_lockmgr(LK_RELEASE);
977
b4051e25
SZ
978 /*
979 * Sync all netisrs so that the old ifnet array is no longer
980 * accessed and we can free it safely later on.
981 */
982 netmsg_service_sync();
983 ifnet_array_free(old_ifnet_array);
f2bd8b67 984
984263bc
MD
985 /*
986 * Remove routes and flush queues.
987 */
4986965b 988 crit_enter();
b3a7093f
SZ
989#ifdef IFPOLL_ENABLE
990 if (ifp->if_flags & IFF_NPOLLING)
991 ifpoll_deregister(ifp);
323f031d 992#endif
984263bc
MD
993 if_down(ifp);
994
ae6d2ace
SZ
995 /* Decrease the mbuf clusters/jclusters limits increased by us */
996 if (ifp->if_nmbclusters > 0)
997 mcl_inclimit(-ifp->if_nmbclusters);
998 if (ifp->if_nmbjclusters > 0)
999 mjcl_inclimit(-ifp->if_nmbjclusters);
1000
5b1156d4 1001#ifdef ALTQ
4d723e5a
JS
1002 if (ifq_is_enabled(&ifp->if_snd))
1003 altq_disable(&ifp->if_snd);
1004 if (ifq_is_attached(&ifp->if_snd))
1005 altq_detach(&ifp->if_snd);
5b1156d4 1006#endif
4d723e5a 1007
984263bc 1008 /*
984263bc
MD
1009 * Clean up all addresses.
1010 */
141697b6 1011 ifp->if_lladdr = NULL;
984263bc 1012
c727e142 1013 if_purgeaddrs_nolink(ifp);
b2632176 1014 if (!TAILQ_EMPTY(&ifp->if_addrheads[mycpuid])) {
c727e142
SZ
1015 struct ifaddr *ifa;
1016
b2632176 1017 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
c727e142 1018 KASSERT(ifa->ifa_addr->sa_family == AF_LINK,
27eaa4f1 1019 ("non-link ifaddr is left on if_addrheads"));
984263bc 1020
b2632176
SZ
1021 ifa_ifunlink(ifa, ifp);
1022 ifa_destroy(ifa);
1023 KASSERT(TAILQ_EMPTY(&ifp->if_addrheads[mycpuid]),
27eaa4f1 1024 ("there are still ifaddrs left on if_addrheads"));
984263bc
MD
1025 }
1026
a98eb818
JS
1027#ifdef INET
1028 /*
1029 * Remove all IPv4 kernel structures related to ifp.
1030 */
1031 in_ifdetach(ifp);
1032#endif
1033
984263bc
MD
1034#ifdef INET6
1035 /*
1036 * Remove all IPv6 kernel structs related to ifp. This should be done
1037 * before removing routing entries below, since IPv6 interface direct
1038 * routes are expected to be removed by the IPv6-specific kernel API.
1039 * Otherwise, the kernel will detect some inconsistency and bark it.
1040 */
1041 in6_ifdetach(ifp);
1042#endif
1043
1044 /*
1045 * Delete all remaining routes using this interface
984263bc 1046 */
d20d6787 1047 netmsg_init(&msg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY,
a29ef6e8
SZ
1048 if_rtdel_dispatch);
1049 msg.ifp = ifp;
43dbcc2a 1050 netisr_domsg_global(&msg.base);
984263bc 1051
2949c680 1052 SLIST_FOREACH(dp, &domains, dom_next) {
698ac46c
HS
1053 if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
1054 (*dp->dom_ifdetach)(ifp,
1055 ifp->if_afdata[dp->dom_family]);
2949c680 1056 }
698ac46c 1057
b2632176 1058 kfree(ifp->if_addrheads, M_IFADDR);
5804f3d1
SZ
1059
1060 lwkt_synchronize_ipiqs("if_detach");
1061 ifq_stage_detach(&ifp->if_snd);
1062
f0a26983
SZ
1063 for (q = 0; q < ifp->if_snd.altq_subq_cnt; ++q) {
1064 struct ifaltq_subque *ifsq = &ifp->if_snd.altq_subq[q];
1065
1066 kfree(ifsq->ifsq_ifstart_nmsg, M_LWKTMSG);
1067 kfree(ifsq->ifsq_stage, M_DEVBUF);
1068 }
407cde39
SZ
1069 kfree(ifp->if_snd.altq_subq, M_DEVBUF);
1070
e1fcdad7
SZ
1071 kfree(ifp->if_data_pcpu, M_DEVBUF);
1072
4986965b 1073 crit_exit();
984263bc
MD
1074}
1075
233c8570
AL
1076int
1077ifgroup_lockmgr(u_int flags)
1078{
1079 return lockmgr(&ifgroup_lock, flags);
1080}
1081
315a7da3 1082/*
233c8570 1083 * Create an empty interface group.
315a7da3 1084 */
233c8570 1085static struct ifg_group *
315a7da3
JL
1086if_creategroup(const char *groupname)
1087{
233c8570
AL
1088 struct ifg_group *ifg;
1089
1090 ifg = kmalloc(sizeof(*ifg), M_IFNET, M_WAITOK);
1091 strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
1092 ifg->ifg_refcnt = 0;
1093 ifg->ifg_carp_demoted = 0;
1094 TAILQ_INIT(&ifg->ifg_members);
1095
1096 ifgroup_lockmgr(LK_EXCLUSIVE);
1097 TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
1098 ifgroup_lockmgr(LK_RELEASE);
315a7da3 1099
233c8570
AL
1100 EVENTHANDLER_INVOKE(group_attach_event, ifg);
1101
1102 return (ifg);
315a7da3
JL
1103}
1104
1105/*
233c8570
AL
1106 * Destroy an empty interface group.
1107 */
1108static int
1109if_destroygroup(struct ifg_group *ifg)
1110{
1111 KASSERT(ifg->ifg_refcnt == 0,
1112 ("trying to delete a non-empty interface group"));
1113
1114 ifgroup_lockmgr(LK_EXCLUSIVE);
1115 TAILQ_REMOVE(&ifg_head, ifg, ifg_next);
1116 ifgroup_lockmgr(LK_RELEASE);
1117
1118 EVENTHANDLER_INVOKE(group_detach_event, ifg);
1119 kfree(ifg, M_IFNET);
1120
1121 return (0);
1122}
1123
1124/*
1125 * Add the interface to a group.
1126 * The target group will be created if it doesn't exist.
315a7da3
JL
1127 */
1128int
1129if_addgroup(struct ifnet *ifp, const char *groupname)
1130{
233c8570
AL
1131 struct ifg_list *ifgl;
1132 struct ifg_group *ifg;
1133 struct ifg_member *ifgm;
315a7da3 1134
233c8570
AL
1135 if (groupname[0] &&
1136 groupname[strlen(groupname) - 1] >= '0' &&
315a7da3
JL
1137 groupname[strlen(groupname) - 1] <= '9')
1138 return (EINVAL);
1139
233c8570 1140 ifgroup_lockmgr(LK_SHARED);
315a7da3 1141
233c8570
AL
1142 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
1143 if (strcmp(ifgl->ifgl_group->ifg_group, groupname) == 0) {
1144 ifgroup_lockmgr(LK_RELEASE);
1145 return (EEXIST);
1146 }
315a7da3
JL
1147 }
1148
233c8570
AL
1149 TAILQ_FOREACH(ifg, &ifg_head, ifg_next) {
1150 if (strcmp(ifg->ifg_group, groupname) == 0)
315a7da3 1151 break;
315a7da3
JL
1152 }
1153
233c8570
AL
1154 ifgroup_lockmgr(LK_RELEASE);
1155
1156 if (ifg == NULL)
1157 ifg = if_creategroup(groupname);
1158
1159 ifgl = kmalloc(sizeof(*ifgl), M_IFNET, M_WAITOK);
1160 ifgm = kmalloc(sizeof(*ifgm), M_IFNET, M_WAITOK);
315a7da3
JL
1161 ifgl->ifgl_group = ifg;
1162 ifgm->ifgm_ifp = ifp;
233c8570 1163 ifg->ifg_refcnt++;
315a7da3 1164
233c8570 1165 ifgroup_lockmgr(LK_EXCLUSIVE);
315a7da3
JL
1166 TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
1167 TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
233c8570 1168 ifgroup_lockmgr(LK_RELEASE);
315a7da3 1169
233c8570 1170 EVENTHANDLER_INVOKE(group_change_event, groupname);
315a7da3
JL
1171
1172 return (0);
1173}
1174
1175/*
233c8570
AL
1176 * Remove the interface from a group.
1177 * The group will be destroyed if it becomes empty.
1178 *
1179 * The 'ifgroup_lock' must be hold exclusively when calling this.
315a7da3 1180 */
233c8570
AL
1181static int
1182if_delgroup_locked(struct ifnet *ifp, const char *groupname)
315a7da3 1183{
233c8570
AL
1184 struct ifg_list *ifgl;
1185 struct ifg_member *ifgm;
315a7da3 1186
233c8570
AL
1187 KKASSERT(lockstatus(&ifgroup_lock, curthread) == LK_EXCLUSIVE);
1188
1189 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
1190 if (strcmp(ifgl->ifgl_group->ifg_group, groupname) == 0)
315a7da3 1191 break;
233c8570 1192 }
315a7da3
JL
1193 if (ifgl == NULL)
1194 return (ENOENT);
1195
1196 TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
1197
233c8570 1198 TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) {
315a7da3
JL
1199 if (ifgm->ifgm_ifp == ifp)
1200 break;
233c8570 1201 }
315a7da3
JL
1202
1203 if (ifgm != NULL) {
1204 TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
315a7da3 1205
233c8570
AL
1206 ifgroup_lockmgr(LK_RELEASE);
1207 EVENTHANDLER_INVOKE(group_change_event, groupname);
1208 ifgroup_lockmgr(LK_EXCLUSIVE);
1209
1210 kfree(ifgm, M_IFNET);
1211 ifgl->ifgl_group->ifg_refcnt--;
315a7da3
JL
1212 }
1213
233c8570
AL
1214 if (ifgl->ifgl_group->ifg_refcnt == 0) {
1215 ifgroup_lockmgr(LK_RELEASE);
1216 if_destroygroup(ifgl->ifgl_group);
1217 ifgroup_lockmgr(LK_EXCLUSIVE);
1218 }
315a7da3 1219
233c8570 1220 kfree(ifgl, M_IFNET);
315a7da3
JL
1221
1222 return (0);
1223}
1224
233c8570
AL
1225int
1226if_delgroup(struct ifnet *ifp, const char *groupname)
1227{
1228 int error;
1229
1230 ifgroup_lockmgr(LK_EXCLUSIVE);
1231 error = if_delgroup_locked(ifp, groupname);
1232 ifgroup_lockmgr(LK_RELEASE);
1233
1234 return (error);
1235}
1236
315a7da3 1237/*
233c8570
AL
1238 * Store all the groups that the interface belongs to in memory
1239 * pointed to by data.
315a7da3 1240 */
233c8570
AL
1241static int
1242if_getgroups(struct ifgroupreq *ifgr, struct ifnet *ifp)
315a7da3 1243{
233c8570
AL
1244 struct ifg_list *ifgl;
1245 struct ifg_req *ifgrq, *p;
1246 int len, error;
1247
1248 len = 0;
1249 ifgroup_lockmgr(LK_SHARED);
1250 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1251 len += sizeof(struct ifg_req);
1252 ifgroup_lockmgr(LK_RELEASE);
315a7da3
JL
1253
1254 if (ifgr->ifgr_len == 0) {
233c8570
AL
1255 /*
1256 * Caller is asking how much memory should be allocated in
1257 * the next request in order to hold all the groups.
1258 */
1259 ifgr->ifgr_len = len;
315a7da3 1260 return (0);
233c8570
AL
1261 } else if (ifgr->ifgr_len != len) {
1262 return (EINVAL);
315a7da3
JL
1263 }
1264
233c8570
AL
1265 ifgrq = kmalloc(len, M_TEMP, M_INTWAIT | M_NULLOK | M_ZERO);
1266 if (ifgrq == NULL)
1267 return (ENOMEM);
1268
1269 ifgroup_lockmgr(LK_SHARED);
1270 p = ifgrq;
315a7da3 1271 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
233c8570
AL
1272 if (len < sizeof(struct ifg_req)) {
1273 ifgroup_lockmgr(LK_RELEASE);
315a7da3 1274 return (EINVAL);
233c8570
AL
1275 }
1276
1277 strlcpy(p->ifgrq_group, ifgl->ifgl_group->ifg_group,
1278 sizeof(ifgrq->ifgrq_group));
1279 len -= sizeof(struct ifg_req);
1280 p++;
315a7da3 1281 }
233c8570
AL
1282 ifgroup_lockmgr(LK_RELEASE);
1283
1284 error = copyout(ifgrq, ifgr->ifgr_groups, ifgr->ifgr_len);
1285 kfree(ifgrq, M_TEMP);
1286 if (error)
1287 return (error);
315a7da3
JL
1288
1289 return (0);
1290}
1291
1292/*
233c8570 1293 * Store all the members of a group in memory pointed to by data.
315a7da3 1294 */
233c8570
AL
1295static int
1296if_getgroupmembers(struct ifgroupreq *ifgr)
315a7da3 1297{
233c8570
AL
1298 struct ifg_group *ifg;
1299 struct ifg_member *ifgm;
1300 struct ifg_req *ifgrq, *p;
1301 int len, error;
1302
1303 ifgroup_lockmgr(LK_SHARED);
1304
1305 TAILQ_FOREACH(ifg, &ifg_head, ifg_next) {
1306 if (strcmp(ifg->ifg_group, ifgr->ifgr_name) == 0)
315a7da3 1307 break;
233c8570
AL
1308 }
1309 if (ifg == NULL) {
1310 ifgroup_lockmgr(LK_RELEASE);
315a7da3 1311 return (ENOENT);
233c8570
AL
1312 }
1313
1314 len = 0;
1315 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
1316 len += sizeof(struct ifg_req);
1317
1318 ifgroup_lockmgr(LK_RELEASE);
315a7da3
JL
1319
1320 if (ifgr->ifgr_len == 0) {
233c8570 1321 ifgr->ifgr_len = len;
315a7da3 1322 return (0);
233c8570
AL
1323 } else if (ifgr->ifgr_len != len) {
1324 return (EINVAL);
315a7da3
JL
1325 }
1326
233c8570
AL
1327 ifgrq = kmalloc(len, M_TEMP, M_INTWAIT | M_NULLOK | M_ZERO);
1328 if (ifgrq == NULL)
1329 return (ENOMEM);
1330
1331 ifgroup_lockmgr(LK_SHARED);
1332 p = ifgrq;
315a7da3 1333 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
233c8570
AL
1334 if (len < sizeof(struct ifg_req)) {
1335 ifgroup_lockmgr(LK_RELEASE);
315a7da3 1336 return (EINVAL);
233c8570
AL
1337 }
1338
1339 strlcpy(p->ifgrq_member, ifgm->ifgm_ifp->if_xname,
1340 sizeof(p->ifgrq_member));
1341 len -= sizeof(struct ifg_req);
1342 p++;
315a7da3 1343 }
233c8570
AL
1344 ifgroup_lockmgr(LK_RELEASE);
1345
1346 error = copyout(ifgrq, ifgr->ifgr_groups, ifgr->ifgr_len);
1347 kfree(ifgrq, M_TEMP);
1348 if (error)
1349 return (error);
315a7da3
JL
1350
1351 return (0);
1352}
1353
984263bc
MD
1354/*
1355 * Delete Routes for a Network Interface
f23061d4 1356 *
984263bc
MD
1357 * Called for each routing entry via the rnh->rnh_walktree() call above
1358 * to delete all route entries referencing a detaching network interface.
1359 *
1360 * Arguments:
1361 * rn pointer to node in the routing table
1362 * arg argument passed to rnh->rnh_walktree() - detaching interface
1363 *
1364 * Returns:
1365 * 0 successful
1366 * errno failed - reason indicated
1367 *
1368 */
1369static int
f23061d4 1370if_rtdel(struct radix_node *rn, void *arg)
984263bc
MD
1371{
1372 struct rtentry *rt = (struct rtentry *)rn;
1373 struct ifnet *ifp = arg;
1374 int err;
1375
1376 if (rt->rt_ifp == ifp) {
1377
1378 /*
1379 * Protect (sorta) against walktree recursion problems
1380 * with cloned routes
1381 */
f23061d4 1382 if (!(rt->rt_flags & RTF_UP))
984263bc
MD
1383 return (0);
1384
1385 err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
1386 rt_mask(rt), rt->rt_flags,
2038fb68 1387 NULL);
984263bc
MD
1388 if (err) {
1389 log(LOG_WARNING, "if_rtdel: error %d\n", err);
1390 }
1391 }
1392
1393 return (0);
1394}
1395
0925f9d8
SZ
1396static __inline boolean_t
1397ifa_prefer(const struct ifaddr *cur_ifa, const struct ifaddr *old_ifa)
1398{
1399 if (old_ifa == NULL)
1400 return TRUE;
1401
1402 if ((old_ifa->ifa_ifp->if_flags & IFF_UP) == 0 &&
1403 (cur_ifa->ifa_ifp->if_flags & IFF_UP))
1404 return TRUE;
1405 if ((old_ifa->ifa_flags & IFA_ROUTE) == 0 &&
1406 (cur_ifa->ifa_flags & IFA_ROUTE))
1407 return TRUE;
1408 return FALSE;
1409}
1410
984263bc
MD
1411/*
1412 * Locate an interface based on a complete address.
1413 */
984263bc 1414struct ifaddr *
f23061d4 1415ifa_ifwithaddr(struct sockaddr *addr)
984263bc 1416{
b4051e25
SZ
1417 const struct ifnet_array *arr;
1418 int i;
984263bc 1419
b4051e25
SZ
1420 arr = ifnet_array_get();
1421 for (i = 0; i < arr->ifnet_count; ++i) {
1422 struct ifnet *ifp = arr->ifnet_arr[i];
b2632176
SZ
1423 struct ifaddr_container *ifac;
1424
1425 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1426 struct ifaddr *ifa = ifac->ifa;
1427
1428 if (ifa->ifa_addr->sa_family != addr->sa_family)
1429 continue;
1430 if (sa_equal(addr, ifa->ifa_addr))
1431 return (ifa);
1432 if ((ifp->if_flags & IFF_BROADCAST) &&
1433 ifa->ifa_broadaddr &&
1434 /* IPv6 doesn't have broadcast */
1435 ifa->ifa_broadaddr->sa_len != 0 &&
1436 sa_equal(ifa->ifa_broadaddr, addr))
1437 return (ifa);
1438 }
984263bc 1439 }
b2632176 1440 return (NULL);
984263bc 1441}
0925f9d8 1442
984263bc
MD
1443/*
1444 * Locate the point to point interface with a given destination address.
1445 */
984263bc 1446struct ifaddr *
f23061d4 1447ifa_ifwithdstaddr(struct sockaddr *addr)
984263bc 1448{
b4051e25
SZ
1449 const struct ifnet_array *arr;
1450 int i;
984263bc 1451
b4051e25
SZ
1452 arr = ifnet_array_get();
1453 for (i = 0; i < arr->ifnet_count; ++i) {
1454 struct ifnet *ifp = arr->ifnet_arr[i];
b2632176
SZ
1455 struct ifaddr_container *ifac;
1456
1457 if (!(ifp->if_flags & IFF_POINTOPOINT))
1458 continue;
1459
1460 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1461 struct ifaddr *ifa = ifac->ifa;
1462
984263bc
MD
1463 if (ifa->ifa_addr->sa_family != addr->sa_family)
1464 continue;
0c3c561c
JH
1465 if (ifa->ifa_dstaddr &&
1466 sa_equal(addr, ifa->ifa_dstaddr))
984263bc 1467 return (ifa);
b2632176 1468 }
984263bc 1469 }
b2632176 1470 return (NULL);
984263bc
MD
1471}
1472
1473/*
1474 * Find an interface on a specific network. If many, choice
1475 * is most specific found.
1476 */
1477struct ifaddr *
f23061d4 1478ifa_ifwithnet(struct sockaddr *addr)
984263bc 1479{
b2632176 1480 struct ifaddr *ifa_maybe = NULL;
984263bc
MD
1481 u_int af = addr->sa_family;
1482 char *addr_data = addr->sa_data, *cplim;
b4051e25
SZ
1483 const struct ifnet_array *arr;
1484 int i;
984263bc
MD
1485
1486 /*
1487 * AF_LINK addresses can be looked up directly by their index number,
1488 * so do that if we can.
1489 */
1490 if (af == AF_LINK) {
b2632176 1491 struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
590b8cd4 1492
b2632176
SZ
1493 if (sdl->sdl_index && sdl->sdl_index <= if_index)
1494 return (ifindex2ifnet[sdl->sdl_index]->if_lladdr);
984263bc
MD
1495 }
1496
1497 /*
1498 * Scan though each interface, looking for ones that have
1499 * addresses in this address family.
1500 */
b4051e25
SZ
1501 arr = ifnet_array_get();
1502 for (i = 0; i < arr->ifnet_count; ++i) {
1503 struct ifnet *ifp = arr->ifnet_arr[i];
b2632176
SZ
1504 struct ifaddr_container *ifac;
1505
1506 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1507 struct ifaddr *ifa = ifac->ifa;
82ed7fc2 1508 char *cp, *cp2, *cp3;
984263bc
MD
1509
1510 if (ifa->ifa_addr->sa_family != af)
1511next: continue;
1512 if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
1513 /*
1514 * This is a bit broken as it doesn't
1515 * take into account that the remote end may
1516 * be a single node in the network we are
1517 * looking for.
1518 * The trouble is that we don't know the
1519 * netmask for the remote end.
1520 */
0c3c561c
JH
1521 if (ifa->ifa_dstaddr != NULL &&
1522 sa_equal(addr, ifa->ifa_dstaddr))
f23061d4 1523 return (ifa);
984263bc
MD
1524 } else {
1525 /*
1526 * if we have a special address handler,
1527 * then use it instead of the generic one.
1528 */
f23061d4 1529 if (ifa->ifa_claim_addr) {
984263bc
MD
1530 if ((*ifa->ifa_claim_addr)(ifa, addr)) {
1531 return (ifa);
1532 } else {
1533 continue;
1534 }
1535 }
1536
1537 /*
1538 * Scan all the bits in the ifa's address.
1539 * If a bit dissagrees with what we are
1540 * looking for, mask it with the netmask
1541 * to see if it really matters.
1542 * (A byte at a time)
1543 */
1544 if (ifa->ifa_netmask == 0)
1545 continue;
1546 cp = addr_data;
1547 cp2 = ifa->ifa_addr->sa_data;
1548 cp3 = ifa->ifa_netmask->sa_data;
590b8cd4
JH
1549 cplim = ifa->ifa_netmask->sa_len +
1550 (char *)ifa->ifa_netmask;
984263bc
MD
1551 while (cp3 < cplim)
1552 if ((*cp++ ^ *cp2++) & *cp3++)
1553 goto next; /* next address! */
1554 /*
1555 * If the netmask of what we just found
1556 * is more specific than what we had before
1557 * (if we had one) then remember the new one
0925f9d8
SZ
1558 * before continuing to search for an even
1559 * better one. If the netmasks are equal,
1560 * we prefer the this ifa based on the result
1561 * of ifa_prefer().
984263bc 1562 */
4090d6ff 1563 if (ifa_maybe == NULL ||
f23061d4 1564 rn_refines((char *)ifa->ifa_netmask,
0925f9d8
SZ
1565 (char *)ifa_maybe->ifa_netmask) ||
1566 (sa_equal(ifa_maybe->ifa_netmask,
1567 ifa->ifa_netmask) &&
1568 ifa_prefer(ifa, ifa_maybe)))
984263bc
MD
1569 ifa_maybe = ifa;
1570 }
1571 }
1572 }
1573 return (ifa_maybe);
1574}
1575
1576/*
1577 * Find an interface address specific to an interface best matching
1578 * a given address.
1579 */
1580struct ifaddr *
f23061d4 1581ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
984263bc 1582{
b2632176 1583 struct ifaddr_container *ifac;
82ed7fc2
RG
1584 char *cp, *cp2, *cp3;
1585 char *cplim;
4090d6ff 1586 struct ifaddr *ifa_maybe = NULL;
984263bc
MD
1587 u_int af = addr->sa_family;
1588
1589 if (af >= AF_MAX)
1590 return (0);
b2632176
SZ
1591 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1592 struct ifaddr *ifa = ifac->ifa;
1593
984263bc
MD
1594 if (ifa->ifa_addr->sa_family != af)
1595 continue;
4090d6ff 1596 if (ifa_maybe == NULL)
984263bc 1597 ifa_maybe = ifa;
0c3c561c
JH
1598 if (ifa->ifa_netmask == NULL) {
1599 if (sa_equal(addr, ifa->ifa_addr) ||
1600 (ifa->ifa_dstaddr != NULL &&
1601 sa_equal(addr, ifa->ifa_dstaddr)))
984263bc
MD
1602 return (ifa);
1603 continue;
1604 }
1605 if (ifp->if_flags & IFF_POINTOPOINT) {
0c3c561c 1606 if (sa_equal(addr, ifa->ifa_dstaddr))
984263bc
MD
1607 return (ifa);
1608 } else {
1609 cp = addr->sa_data;
1610 cp2 = ifa->ifa_addr->sa_data;
1611 cp3 = ifa->ifa_netmask->sa_data;
1612 cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1613 for (; cp3 < cplim; cp3++)
1614 if ((*cp++ ^ *cp2++) & *cp3)
1615 break;
1616 if (cp3 == cplim)
1617 return (ifa);
1618 }
1619 }
1620 return (ifa_maybe);
1621}
1622
984263bc
MD
1623/*
1624 * Default action when installing a route with a Link Level gateway.
1625 * Lookup an appropriate real ifa to point to.
1626 * This should be moved to /sys/net/link.c eventually.
1627 */
1628static void
3ffea39d 1629link_rtrequest(int cmd, struct rtentry *rt)
984263bc 1630{
82ed7fc2 1631 struct ifaddr *ifa;
984263bc
MD
1632 struct sockaddr *dst;
1633 struct ifnet *ifp;
1634
f23061d4
JH
1635 if (cmd != RTM_ADD || (ifa = rt->rt_ifa) == NULL ||
1636 (ifp = ifa->ifa_ifp) == NULL || (dst = rt_key(rt)) == NULL)
984263bc
MD
1637 return;
1638 ifa = ifaof_ifpforaddr(dst, ifp);
f23061d4 1639 if (ifa != NULL) {
984263bc 1640 IFAFREE(rt->rt_ifa);
f23061d4 1641 IFAREF(ifa);
984263bc 1642 rt->rt_ifa = ifa;
984263bc 1643 if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
3ffea39d 1644 ifa->ifa_rtrequest(cmd, rt);
984263bc
MD
1645 }
1646}
1647
9a74b592
SZ
1648struct netmsg_ifroute {
1649 struct netmsg_base base;
1650 struct ifnet *ifp;
1651 int flag;
1652 int fam;
1653};
1654
984263bc 1655/*
9a74b592 1656 * Mark an interface down and notify protocols of the transition.
984263bc 1657 */
9a74b592
SZ
1658static void
1659if_unroute_dispatch(netmsg_t nmsg)
984263bc 1660{
9a74b592
SZ
1661 struct netmsg_ifroute *msg = (struct netmsg_ifroute *)nmsg;
1662 struct ifnet *ifp = msg->ifp;
1663 int flag = msg->flag, fam = msg->fam;
b2632176 1664 struct ifaddr_container *ifac;
984263bc 1665
43dbcc2a
SZ
1666 ASSERT_NETISR0;
1667
984263bc
MD
1668 ifp->if_flags &= ~flag;
1669 getmicrotime(&ifp->if_lastchange);
4d2ff05c
RM
1670 rt_ifmsg(ifp);
1671
9a74b592
SZ
1672 /*
1673 * The ifaddr processing in the following loop will block,
1674 * however, this function is called in netisr0, in which
1675 * ifaddr list changes happen, so we don't care about the
1676 * blockness of the ifaddr processing here.
1677 */
b2632176
SZ
1678 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1679 struct ifaddr *ifa = ifac->ifa;
1680
9a74b592
SZ
1681 /* Ignore marker */
1682 if (ifa->ifa_addr->sa_family == AF_UNSPEC)
1683 continue;
1684
984263bc 1685 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
91be174d 1686 kpfctlinput(PRC_IFDOWN, ifa->ifa_addr);
b2632176 1687 }
9a74b592 1688
4d2ff05c 1689 ifq_purge_all(&ifp->if_snd);
5204e13c 1690 netisr_replymsg(&nmsg->base, 0);
9a74b592
SZ
1691}
1692
4d2ff05c 1693static void
9a74b592
SZ
1694if_unroute(struct ifnet *ifp, int flag, int fam)
1695{
1696 struct netmsg_ifroute msg;
1697
9a74b592
SZ
1698 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 0,
1699 if_unroute_dispatch);
1700 msg.ifp = ifp;
1701 msg.flag = flag;
1702 msg.fam = fam;
5204e13c 1703 netisr_domsg(&msg.base, 0);
984263bc
MD
1704}
1705
1706/*
9a74b592 1707 * Mark an interface up and notify protocols of the transition.
984263bc 1708 */
9a74b592
SZ
1709static void
1710if_route_dispatch(netmsg_t nmsg)
984263bc 1711{
9a74b592
SZ
1712 struct netmsg_ifroute *msg = (struct netmsg_ifroute *)nmsg;
1713 struct ifnet *ifp = msg->ifp;
1714 int flag = msg->flag, fam = msg->fam;
b2632176 1715 struct ifaddr_container *ifac;
984263bc 1716
43dbcc2a
SZ
1717 ASSERT_NETISR0;
1718
9275f515 1719 ifq_purge_all(&ifp->if_snd);
984263bc
MD
1720 ifp->if_flags |= flag;
1721 getmicrotime(&ifp->if_lastchange);
4d2ff05c
RM
1722 rt_ifmsg(ifp);
1723
9a74b592
SZ
1724 /*
1725 * The ifaddr processing in the following loop will block,
1726 * however, this function is called in netisr0, in which
1727 * ifaddr list changes happen, so we don't care about the
1728 * blockness of the ifaddr processing here.
1729 */
b2632176
SZ
1730 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1731 struct ifaddr *ifa = ifac->ifa;
1732
9a74b592
SZ
1733 /* Ignore marker */
1734 if (ifa->ifa_addr->sa_family == AF_UNSPEC)
1735 continue;
1736
984263bc 1737 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
91be174d 1738 kpfctlinput(PRC_IFUP, ifa->ifa_addr);
b2632176 1739 }
984263bc
MD
1740#ifdef INET6
1741 in6_if_up(ifp);
1742#endif
9a74b592 1743
5204e13c 1744 netisr_replymsg(&nmsg->base, 0);
9a74b592
SZ
1745}
1746
4d2ff05c 1747static void
9a74b592
SZ
1748if_route(struct ifnet *ifp, int flag, int fam)
1749{
1750 struct netmsg_ifroute msg;
1751
9a74b592
SZ
1752 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 0,
1753 if_route_dispatch);
1754 msg.ifp = ifp;
1755 msg.flag = flag;
1756 msg.fam = fam;
5204e13c 1757 netisr_domsg(&msg.base, 0);
984263bc
MD
1758}
1759
1760/*
5c703385
MD
1761 * Mark an interface down and notify protocols of the transition. An
1762 * interface going down is also considered to be a synchronizing event.
1763 * We must ensure that all packet processing related to the interface
1764 * has completed before we return so e.g. the caller can free the ifnet
1765 * structure that the mbufs may be referencing.
1766 *
984263bc
MD
1767 * NOTE: must be called at splnet or eqivalent.
1768 */
1769void
f23061d4 1770if_down(struct ifnet *ifp)
984263bc 1771{
fcddd1b6 1772 EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN);
984263bc 1773 if_unroute(ifp, IFF_UP, AF_UNSPEC);
5c703385 1774 netmsg_service_sync();
984263bc
MD
1775}
1776
1777/*
1778 * Mark an interface up and notify protocols of
1779 * the transition.
1780 * NOTE: must be called at splnet or eqivalent.
1781 */
1782void
f23061d4 1783if_up(struct ifnet *ifp)
984263bc 1784{
984263bc 1785 if_route(ifp, IFF_UP, AF_UNSPEC);
fcddd1b6 1786 EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP);
984263bc
MD
1787}
1788
6de83abe
SZ
1789/*
1790 * Process a link state change.
1791 * NOTE: must be called at splsoftnet or equivalent.
1792 */
1793void
1794if_link_state_change(struct ifnet *ifp)
1795{
71fc104f
HT
1796 int link_state = ifp->if_link_state;
1797
6de83abe 1798 rt_ifmsg(ifp);
71fc104f
HT
1799 devctl_notify("IFNET", ifp->if_xname,
1800 (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL);
bc1a39e2
AL
1801
1802 EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state);
6de83abe
SZ
1803}
1804
984263bc
MD
1805/*
1806 * Handle interface watchdog timer routines. Called
1807 * from softclock, we decrement timers (if set) and
1808 * call the appropriate interface routine on expiration.
1809 */
1810static void
b5df1a85 1811if_slowtimo_dispatch(netmsg_t nmsg)
984263bc 1812{
b5df1a85 1813 struct globaldata *gd = mycpu;
b4051e25
SZ
1814 const struct ifnet_array *arr;
1815 int i;
4986965b 1816
5204e13c 1817 ASSERT_NETISR0;
b5df1a85
SZ
1818
1819 crit_enter_gd(gd);
1820 lwkt_replymsg(&nmsg->lmsg, 0); /* reply ASAP */
1821 crit_exit_gd(gd);
984263bc 1822
b4051e25
SZ
1823 arr = ifnet_array_get();
1824 for (i = 0; i < arr->ifnet_count; ++i) {
1825 struct ifnet *ifp = arr->ifnet_arr[i];
1826
b5df1a85
SZ
1827 crit_enter_gd(gd);
1828
6517ec3f
SZ
1829 if (if_stats_compat) {
1830 IFNET_STAT_GET(ifp, ipackets, ifp->if_ipackets);
1831 IFNET_STAT_GET(ifp, ierrors, ifp->if_ierrors);
1832 IFNET_STAT_GET(ifp, opackets, ifp->if_opackets);
1833 IFNET_STAT_GET(ifp, oerrors, ifp->if_oerrors);
1834 IFNET_STAT_GET(ifp, collisions, ifp->if_collisions);
1835 IFNET_STAT_GET(ifp, ibytes, ifp->if_ibytes);
1836 IFNET_STAT_GET(ifp, obytes, ifp->if_obytes);
1837 IFNET_STAT_GET(ifp, imcasts, ifp->if_imcasts);
1838 IFNET_STAT_GET(ifp, omcasts, ifp->if_omcasts);
1839 IFNET_STAT_GET(ifp, iqdrops, ifp->if_iqdrops);
1840 IFNET_STAT_GET(ifp, noproto, ifp->if_noproto);
6de344ba 1841 IFNET_STAT_GET(ifp, oqdrops, ifp->if_oqdrops);
6517ec3f
SZ
1842 }
1843
b5df1a85
SZ
1844 if (ifp->if_timer == 0 || --ifp->if_timer) {
1845 crit_exit_gd(gd);
984263bc 1846 continue;
b5df1a85 1847 }
78195a76 1848 if (ifp->if_watchdog) {
a3dd34d2 1849 if (ifnet_tryserialize_all(ifp)) {
78195a76 1850 (*ifp->if_watchdog)(ifp);
a3dd34d2 1851 ifnet_deserialize_all(ifp);
78195a76
MD
1852 } else {
1853 /* try again next timeout */
1854 ++ifp->if_timer;
1855 }
1856 }
4986965b 1857
b5df1a85
SZ
1858 crit_exit_gd(gd);
1859 }
4986965b 1860
abbb44bb 1861 callout_reset(&if_slowtimo_timer, hz / IFNET_SLOWHZ, if_slowtimo, NULL);
984263bc
MD
1862}
1863
b5df1a85
SZ
1864static void
1865if_slowtimo(void *arg __unused)
1866{
1867 struct lwkt_msg *lmsg = &if_slowtimo_netmsg.lmsg;
1868
1869 KASSERT(mycpuid == 0, ("not on cpu0"));
1870 crit_enter();
1871 if (lmsg->ms_flags & MSGF_DONE)
1872 lwkt_sendmsg_oncpu(netisr_cpuport(0), lmsg);
1873 crit_exit();
1874}
1875
984263bc
MD
1876/*
1877 * Map interface name to
1878 * interface structure pointer.
1879 */
1880struct ifnet *
1881ifunit(const char *name)
1882{
984263bc 1883 struct ifnet *ifp;
984263bc 1884
984263bc 1885 /*
3e4a09e7 1886 * Search all the interfaces for this name/number
984263bc 1887 */
b4051e25 1888 KASSERT(mtx_owned(&ifnet_mtx), ("ifnet is not locked"));
3e4a09e7 1889
b4051e25 1890 TAILQ_FOREACH(ifp, &ifnetlist, if_link) {
3e4a09e7 1891 if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0)
984263bc
MD
1892 break;
1893 }
1894 return (ifp);
1895}
1896
984263bc 1897struct ifnet *
b4051e25 1898ifunit_netisr(const char *name)
984263bc 1899{
b4051e25
SZ
1900 const struct ifnet_array *arr;
1901 int i;
984263bc
MD
1902
1903 /*
b4051e25 1904 * Search all the interfaces for this name/number
984263bc
MD
1905 */
1906
b4051e25
SZ
1907 arr = ifnet_array_get();
1908 for (i = 0; i < arr->ifnet_count; ++i) {
1909 struct ifnet *ifp = arr->ifnet_arr[i];
984263bc 1910
b4051e25
SZ
1911 if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0)
1912 return ifp;
1913 }
1914 return NULL;
1915}
984263bc
MD
1916
1917/*
1918 * Interface ioctls.
1919 */
1920int
87de5057 1921ifioctl(struct socket *so, u_long cmd, caddr_t data, struct ucred *cred)
984263bc 1922{
41c20dac 1923 struct ifnet *ifp;
233c8570 1924 struct ifgroupreq *ifgr;
41c20dac 1925 struct ifreq *ifr;
984263bc 1926 struct ifstat *ifs;
e612af50 1927 int error, do_ifup = 0;
984263bc
MD
1928 short oif_flags;
1929 int new_flags;
1fdf0954
HP
1930 size_t namelen, onamelen;
1931 char new_name[IFNAMSIZ];
1932 struct ifaddr *ifa;
1933 struct sockaddr_dl *sdl;
984263bc
MD
1934
1935 switch (cmd) {
984263bc 1936 case SIOCGIFCONF:
87de5057 1937 return (ifconf(cmd, data, cred));
9683f229
MD
1938 default:
1939 break;
984263bc 1940 }
9683f229 1941
984263bc
MD
1942 ifr = (struct ifreq *)data;
1943
1944 switch (cmd) {
1945 case SIOCIFCREATE:
c5e14c14
RP
1946 case SIOCIFCREATE2:
1947 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0)
1948 return (error);
1949 return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name),
233c8570 1950 cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL));
984263bc 1951 case SIOCIFDESTROY:
895c1f85 1952 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0)
984263bc 1953 return (error);
c5e14c14 1954 return (if_clone_destroy(ifr->ifr_name));
984263bc
MD
1955 case SIOCIFGCLONERS:
1956 return (if_clone_list((struct if_clonereq *)data));
233c8570
AL
1957 case SIOCGIFGMEMB:
1958 return (if_getgroupmembers((struct ifgroupreq *)data));
9683f229
MD
1959 default:
1960 break;
984263bc
MD
1961 }
1962
9683f229
MD
1963 /*
1964 * Nominal ioctl through interface, lookup the ifp and obtain a
1965 * lock to serialize the ifconfig ioctl operation.
1966 */
b4051e25
SZ
1967 ifnet_lock();
1968
984263bc 1969 ifp = ifunit(ifr->ifr_name);
b4051e25
SZ
1970 if (ifp == NULL) {
1971 ifnet_unlock();
984263bc 1972 return (ENXIO);
b4051e25 1973 }
9683f229 1974 error = 0;
984263bc 1975
9683f229 1976 switch (cmd) {
12b71966
PA
1977 case SIOCGIFINDEX:
1978 ifr->ifr_index = ifp->if_index;
1979 break;
1980
984263bc
MD
1981 case SIOCGIFFLAGS:
1982 ifr->ifr_flags = ifp->if_flags;
46f25451 1983 ifr->ifr_flagshigh = ifp->if_flags >> 16;
984263bc
MD
1984 break;
1985
1986 case SIOCGIFCAP:
1987 ifr->ifr_reqcap = ifp->if_capabilities;
1988 ifr->ifr_curcap = ifp->if_capenable;
1989 break;
1990
1991 case SIOCGIFMETRIC:
1992 ifr->ifr_metric = ifp->if_metric;
1993 break;
1994
1995 case SIOCGIFMTU:
1996 ifr->ifr_mtu = ifp->if_mtu;
1997 break;
1998
e41e61d5
SZ
1999 case SIOCGIFTSOLEN:
2000 ifr->ifr_tsolen = ifp->if_tsolen;
2001 break;
2002
315a7da3
JL
2003 case SIOCGIFDATA:
2004 error = copyout((caddr_t)&ifp->if_data, ifr->ifr_data,
9683f229 2005 sizeof(ifp->if_data));
315a7da3
JL
2006 break;
2007
984263bc
MD
2008 case SIOCGIFPHYS:
2009 ifr->ifr_phys = ifp->if_physical;
2010 break;
2011
1630efc5 2012 case SIOCGIFPOLLCPU:
1630efc5 2013 ifr->ifr_pollcpu = -1;
1630efc5
SZ
2014 break;
2015
2016 case SIOCSIFPOLLCPU:
1630efc5
SZ
2017 break;
2018
984263bc 2019 case SIOCSIFFLAGS:
895c1f85 2020 error = priv_check_cred(cred, PRIV_ROOT, 0);
984263bc 2021 if (error)
9683f229 2022 break;
984263bc
MD
2023 new_flags = (ifr->ifr_flags & 0xffff) |
2024 (ifr->ifr_flagshigh << 16);
2025 if (ifp->if_flags & IFF_SMART) {
2026 /* Smart drivers twiddle their own routes */
2027 } else if (ifp->if_flags & IFF_UP &&
2028 (new_flags & IFF_UP) == 0) {
984263bc 2029 if_down(ifp);
984263bc
MD
2030 } else if (new_flags & IFF_UP &&
2031 (ifp->if_flags & IFF_UP) == 0) {
e612af50 2032 do_ifup = 1;
984263bc 2033 }
9c095379 2034
b3a7093f
SZ
2035#ifdef IFPOLL_ENABLE
2036 if ((new_flags ^ ifp->if_flags) & IFF_NPOLLING) {
2037 if (new_flags & IFF_NPOLLING)
2038 ifpoll_register(ifp);
2039 else
2040 ifpoll_deregister(ifp);
2041 }
2042#endif
9c095379 2043
984263bc
MD
2044 ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
2045 (new_flags &~ IFF_CANTCHANGE);
984263bc
MD
2046 if (new_flags & IFF_PPROMISC) {
2047 /* Permanently promiscuous mode requested */
2048 ifp->if_flags |= IFF_PROMISC;
2049 } else if (ifp->if_pcount == 0) {
2050 ifp->if_flags &= ~IFF_PROMISC;
2051 }
78195a76 2052 if (ifp->if_ioctl) {
a3dd34d2 2053 ifnet_serialize_all(ifp);
87de5057 2054 ifp->if_ioctl(ifp, cmd, data, cred);
a3dd34d2 2055 ifnet_deserialize_all(ifp);
78195a76 2056 }
e612af50
SZ
2057 if (do_ifup)
2058 if_up(ifp);
984263bc
MD
2059 getmicrotime(&ifp->if_lastchange);
2060 break;
2061
2062 case SIOCSIFCAP:
895c1f85 2063 error = priv_check_cred(cred, PRIV_ROOT, 0);
984263bc 2064 if (error)
9683f229
MD
2065 break;
2066 if (ifr->ifr_reqcap & ~ifp->if_capabilities) {
2067 error = EINVAL;
2068 break;
2069 }
a3dd34d2 2070 ifnet_serialize_all(ifp);
87de5057 2071 ifp->if_ioctl(ifp, cmd, data, cred);
a3dd34d2 2072 ifnet_deserialize_all(ifp);
984263bc
MD
2073 break;
2074
f23061d4 2075 case SIOCSIFNAME:
895c1f85 2076 error = priv_check_cred(cred, PRIV_ROOT, 0);
9683f229
MD
2077 if (error)
2078 break;
f23061d4 2079 error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
9683f229
MD
2080 if (error)
2081 break;
2082 if (new_name[0] == '\0') {
2083 error = EINVAL;
2084 break;
2085 }
2086 if (ifunit(new_name) != NULL) {
2087 error = EEXIST;
2088 break;
2089 }
f2bd8b67
JS
2090
2091 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
f23061d4
JH
2092
2093 /* Announce the departure of the interface. */
2094 rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
2095
2096 strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
b2632176 2097 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
f23061d4
JH
2098 sdl = (struct sockaddr_dl *)ifa->ifa_addr;
2099 namelen = strlen(new_name);
2100 onamelen = sdl->sdl_nlen;
2101 /*
2102 * Move the address if needed. This is safe because we
2103 * allocate space for a name of length IFNAMSIZ when we
2104 * create this in if_attach().
2105 */
2106 if (namelen != onamelen) {
2107 bcopy(sdl->sdl_data + onamelen,
2108 sdl->sdl_data + namelen, sdl->sdl_alen);
2109 }
2110 bcopy(new_name, sdl->sdl_data, namelen);
2111 sdl->sdl_nlen = namelen;
2112 sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
2113 bzero(sdl->sdl_data, onamelen);
2114 while (namelen != 0)
2115 sdl->sdl_data[--namelen] = 0xff;
f2bd8b67
JS
2116
2117 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
f23061d4
JH
2118
2119 /* Announce the return of the interface. */
2120 rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
2121 break;
1fdf0954 2122
984263bc 2123 case SIOCSIFMETRIC:
895c1f85 2124 error = priv_check_cred(cred, PRIV_ROOT, 0);
984263bc 2125 if (error)
9683f229 2126 break;
984263bc
MD
2127 ifp->if_metric = ifr->ifr_metric;
2128 getmicrotime(&ifp->if_lastchange);
2129 break;
2130
2131 case SIOCSIFPHYS:
895c1f85 2132 error = priv_check_cred(cred, PRIV_ROOT, 0);
984263bc 2133 if (error)
9683f229
MD
2134 break;
2135 if (ifp->if_ioctl == NULL) {
2136 error = EOPNOTSUPP;
2137 break;
2138 }
a3dd34d2 2139 ifnet_serialize_all(ifp);
87de5057 2140 error = ifp->if_ioctl(ifp, cmd, data, cred);
a3dd34d2 2141 ifnet_deserialize_all(ifp);
984263bc
MD
2142 if (error == 0)
2143 getmicrotime(&ifp->if_lastchange);
9683f229 2144 break;
984263bc
MD
2145
2146 case SIOCSIFMTU:
2147 {
2148 u_long oldmtu = ifp->if_mtu;
2149
895c1f85 2150 error = priv_check_cred(cred, PRIV_ROOT, 0);
984263bc 2151 if (error)
9683f229
MD
2152 break;
2153 if (ifp->if_ioctl == NULL) {
2154 error = EOPNOTSUPP;
2155 break;
2156 }
2157 if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) {
2158 error = EINVAL;
2159 break;
2160 }
a3dd34d2 2161 ifnet_serialize_all(ifp);
87de5057 2162 error = ifp->if_ioctl(ifp, cmd, data, cred);
a3dd34d2 2163 ifnet_deserialize_all(ifp);
984263bc
MD
2164 if (error == 0) {
2165 getmicrotime(&ifp->if_lastchange);
2166 rt_ifmsg(ifp);
2167 }
2168 /*
2169 * If the link MTU changed, do network layer specific procedure.
2170 */
2171 if (ifp->if_mtu != oldmtu) {
2172#ifdef INET6
2173 nd6_setmtu(ifp);
2174#endif
2175 }
9683f229 2176 break;
984263bc
MD
2177 }
2178
e41e61d5
SZ
2179 case SIOCSIFTSOLEN:
2180 error = priv_check_cred(cred, PRIV_ROOT, 0);
2181 if (error)
2182 break;
2183
2184 /* XXX need driver supplied upper limit */
2185 if (ifr->ifr_tsolen <= 0) {
2186 error = EINVAL;
2187 break;
2188 }
2189 ifp->if_tsolen = ifr->ifr_tsolen;
2190 break;
2191
984263bc
MD
2192 case SIOCADDMULTI:
2193 case SIOCDELMULTI:
895c1f85 2194 error = priv_check_cred(cred, PRIV_ROOT, 0);
984263bc 2195 if (error)
9683f229 2196 break;
984263bc
MD
2197
2198 /* Don't allow group membership on non-multicast interfaces. */
9683f229
MD
2199 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
2200 error = EOPNOTSUPP;
2201 break;
2202 }
984263bc
MD
2203
2204 /* Don't let users screw up protocols' entries. */
9683f229
MD
2205 if (ifr->ifr_addr.sa_family != AF_LINK) {
2206 error = EINVAL;
2207 break;
2208 }
984263bc
MD
2209
2210 if (cmd == SIOCADDMULTI) {
2211 struct ifmultiaddr *ifma;
2212 error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
2213 } else {
2214 error = if_delmulti(ifp, &ifr->ifr_addr);
2215 }
2216 if (error == 0)
2217 getmicrotime(&ifp->if_lastchange);
9683f229 2218 break;
984263bc
MD
2219
2220 case SIOCSIFPHYADDR:
2221 case SIOCDIFPHYADDR:
2222#ifdef INET6
2223 case SIOCSIFPHYADDR_IN6:
2224#endif
2225 case SIOCSLIFPHYADDR:
233c8570 2226 case SIOCSIFMEDIA:
984263bc 2227 case SIOCSIFGENERIC:
895c1f85 2228 error = priv_check_cred(cred, PRIV_ROOT, 0);
984263bc 2229 if (error)
9683f229 2230 break;
baf84f0a 2231 if (ifp->if_ioctl == NULL) {
9683f229
MD
2232 error = EOPNOTSUPP;
2233 break;
2234 }
a3dd34d2 2235 ifnet_serialize_all(ifp);
87de5057 2236 error = ifp->if_ioctl(ifp, cmd, data, cred);
a3dd34d2 2237 ifnet_deserialize_all(ifp);
984263bc
MD
2238 if (error == 0)
2239 getmicrotime(&ifp->if_lastchange);
9683f229 2240 break;
984263bc
MD
2241
2242 case SIOCGIFSTATUS:
2243 ifs = (struct ifstat *)data;
2244 ifs->ascii[0] = '\0';
9683f229 2245 /* fall through */
984263bc
MD
2246 case SIOCGIFPSRCADDR:
2247 case SIOCGIFPDSTADDR:
2248 case SIOCGLIFPHYADDR:
2249 case SIOCGIFMEDIA:
2250 case SIOCGIFGENERIC:
9683f229
MD
2251 if (ifp->if_ioctl == NULL) {
2252 error = EOPNOTSUPP;
2253 break;
2254 }
a3dd34d2 2255 ifnet_serialize_all(ifp);
87de5057 2256 error = ifp->if_ioctl(ifp, cmd, data, cred);
a3dd34d2 2257 ifnet_deserialize_all(ifp);
9683f229 2258 break;
984263bc
MD
2259
2260 case SIOCSIFLLADDR:
895c1f85 2261 error = priv_check_cred(cred, PRIV_ROOT, 0);
984263bc 2262 if (error)
9683f229
MD
2263 break;
2264 error = if_setlladdr(ifp, ifr->ifr_addr.sa_data,
2265 ifr->ifr_addr.sa_len);
19f10c78 2266 EVENTHANDLER_INVOKE(iflladdr_event, ifp);
9683f229 2267 break;
984263bc 2268
233c8570
AL
2269 case SIOCAIFGROUP:
2270 ifgr = (struct ifgroupreq *)ifr;
2271 if ((error = priv_check_cred(cred, PRIV_NET_ADDIFGROUP, 0)))
2272 return (error);
2273 if ((error = if_addgroup(ifp, ifgr->ifgr_group)))
2274 return (error);
2275 break;
2276
2277 case SIOCDIFGROUP:
2278 ifgr = (struct ifgroupreq *)ifr;
2279 if ((error = priv_check_cred(cred, PRIV_NET_DELIFGROUP, 0)))
2280 return (error);
2281 if ((error = if_delgroup(ifp, ifgr->ifgr_group)))
2282 return (error);
2283 break;
2284
2285 case SIOCGIFGROUP:
2286 ifgr = (struct ifgroupreq *)ifr;
2287 if ((error = if_getgroups(ifgr, ifp)))
2288 return (error);
2289 break;
2290
984263bc
MD
2291 default:
2292 oif_flags = ifp->if_flags;
9683f229
MD
2293 if (so->so_proto == 0) {
2294 error = EOPNOTSUPP;
2295 break;
2296 }
002c1265
MD
2297 error = so_pru_control_direct(so, cmd, data, ifp);
2298
baf84f0a
AL
2299 /*
2300 * If the socket control method returns EOPNOTSUPP, pass the
2301 * request directly to the interface.
2302 *
2303 * Exclude the SIOCSIF{ADDR,BRDADDR,DSTADDR,NETMASK} ioctls,
2304 * because drivers may trust these ioctls to come from an
2305 * already privileged layer and thus do not perform credentials
2306 * checks or input validation.
2307 */
2308 if (error == EOPNOTSUPP &&
2309 ifp->if_ioctl != NULL &&
2310 cmd != SIOCSIFADDR &&
2311 cmd != SIOCSIFBRDADDR &&
2312 cmd != SIOCSIFDSTADDR &&
2313 cmd != SIOCSIFNETMASK) {
2314 ifnet_serialize_all(ifp);
2315 error = ifp->if_ioctl(ifp, cmd, data, cred);
2316 ifnet_deserialize_all(ifp);
2317 }
2318
984263bc
MD
2319 if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
2320#ifdef INET6
2321 DELAY(100);/* XXX: temporary workaround for fxp issue*/
2322 if (ifp->if_flags & IFF_UP) {
4986965b 2323 crit_enter();
984263bc 2324 in6_if_up(ifp);
4986965b 2325 crit_exit();
984263bc
MD
2326 }
2327#endif
2328 }
9683f229 2329 break;
984263bc 2330 }
9683f229 2331
b4051e25 2332 ifnet_unlock();
9683f229 2333 return (error);
984263bc
MD
2334}
2335
2336/*
2337 * Set/clear promiscuous mode on interface ifp based on the truth value
2338 * of pswitch. The calls are reference counted so that only the first
2339 * "on" request actually has an effect, as does the final "off" request.
2340 * Results are undefined if the "off" and "on" requests are not matched.
2341 */
2342int
f23061d4 2343ifpromisc(struct ifnet *ifp, int pswitch)
984263bc
MD
2344{
2345 struct ifreq ifr;
2346 int error;
2347 int oldflags;
2348
2349 oldflags = ifp->if_flags;
46f25451 2350 if (ifp->if_flags & IFF_PPROMISC) {
984263bc
MD
2351 /* Do nothing if device is in permanently promiscuous mode */
2352 ifp->if_pcount += pswitch ? 1 : -1;
2353 return (0);
2354 }
2355 if (pswitch) {
2356 /*
2357 * If the device is not configured up, we cannot put it in
2358 * promiscuous mode.
2359 */
2360 if ((ifp->if_flags & IFF_UP) == 0)
2361 return (ENETDOWN);
2362 if (ifp->if_pcount++ != 0)
2363 return (0);
2364 ifp->if_flags |= IFF_PROMISC;
3e4a09e7
MD
2365 log(LOG_INFO, "%s: promiscuous mode enabled\n",
2366 ifp->if_xname);
984263bc
MD
2367 } else {
2368 if (--ifp->if_pcount > 0)
2369 return (0);
2370 ifp->if_flags &= ~IFF_PROMISC;
3e4a09e7
MD
2371 log(LOG_INFO, "%s: promiscuous mode disabled\n",
2372 ifp->if_xname);
984263bc
MD
2373 }
2374 ifr.ifr_flags = ifp->if_flags;
46f25451 2375 ifr.ifr_flagshigh = ifp->if_flags >> 16;
a3dd34d2
SZ
2376 ifnet_serialize_all(ifp);
2377 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, NULL);
2378 ifnet_deserialize_all(ifp);
984263bc
MD
2379 if (error == 0)
2380 rt_ifmsg(ifp);
2381 else
2382 ifp->if_flags = oldflags;
2383 return error;
2384}
2385
2386/*
2387 * Return interface configuration
2388 * of system. List may be used
2389 * in later ioctl's (above) to get
2390 * other information.
2391 */
984263bc 2392static int
87de5057 2393ifconf(u_long cmd, caddr_t data, struct ucred *cred)
984263bc 2394{
41c20dac
MD
2395 struct ifconf *ifc = (struct ifconf *)data;
2396 struct ifnet *ifp;
984263bc
MD
2397 struct sockaddr *sa;
2398 struct ifreq ifr, *ifrp;
2399 int space = ifc->ifc_len, error = 0;
2400
2401 ifrp = ifc->ifc_req;
b4051e25
SZ
2402
2403 ifnet_lock();
2404 TAILQ_FOREACH(ifp, &ifnetlist, if_link) {
9a74b592
SZ
2405 struct ifaddr_container *ifac, *ifac_mark;
2406 struct ifaddr_marker mark;
2407 struct ifaddrhead *head;
3e4a09e7 2408 int addrs;
984263bc 2409
f23061d4 2410 if (space <= sizeof ifr)
984263bc 2411 break;
623c059e
JS
2412
2413 /*
95f018e8
MD
2414 * Zero the stack declared structure first to prevent
2415 * memory disclosure.
623c059e 2416 */
95f018e8 2417 bzero(&ifr, sizeof(ifr));
3e4a09e7
MD
2418 if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
2419 >= sizeof(ifr.ifr_name)) {
984263bc
MD
2420 error = ENAMETOOLONG;
2421 break;
984263bc
MD
2422 }
2423
9a74b592
SZ
2424 /*
2425 * Add a marker, since copyout() could block and during that
2426 * period the list could be changed. Inserting the marker to
2427 * the header of the list will not cause trouble for the code
2428 * assuming that the first element of the list is AF_LINK; the
2429 * marker will be moved to the next position w/o blocking.
2430 */
2431 ifa_marker_init(&mark, ifp);
2432 ifac_mark = &mark.ifac;
2433 head = &ifp->if_addrheads[mycpuid];
2434
984263bc 2435 addrs = 0;
9a74b592
SZ
2436 TAILQ_INSERT_HEAD(head, ifac_mark, ifa_link);
2437 while ((ifac = TAILQ_NEXT(ifac_mark, ifa_link)) != NULL) {
b2632176
SZ
2438 struct ifaddr *ifa = ifac->ifa;
2439
9a74b592
SZ
2440 TAILQ_REMOVE(head, ifac_mark, ifa_link);
2441 TAILQ_INSERT_AFTER(head, ifac, ifac_mark, ifa_link);
2442
2443 /* Ignore marker */
2444 if (ifa->ifa_addr->sa_family == AF_UNSPEC)
2445 continue;
2446
f23061d4 2447 if (space <= sizeof ifr)
984263bc
MD
2448 break;
2449 sa = ifa->ifa_addr;
87de5057
MD
2450 if (cred->cr_prison &&
2451 prison_if(cred, sa))
984263bc
MD
2452 continue;
2453 addrs++;
9a74b592
SZ
2454 /*
2455 * Keep a reference on this ifaddr, so that it will
2456 * not be destroyed when its address is copied to
2457 * the userland, which could block.
2458 */
2459 IFAREF(ifa);
984263bc
MD
2460 if (sa->sa_len <= sizeof(*sa)) {
2461 ifr.ifr_addr = *sa;
f23061d4 2462 error = copyout(&ifr, ifrp, sizeof ifr);
984263bc
MD
2463 ifrp++;
2464 } else {
f23061d4 2465 if (space < (sizeof ifr) + sa->sa_len -
9a74b592
SZ
2466 sizeof(*sa)) {
2467 IFAFREE(ifa);
984263bc 2468 break;
9a74b592 2469 }
984263bc 2470 space -= sa->sa_len - sizeof(*sa);
f23061d4
JH
2471 error = copyout(&ifr, ifrp,
2472 sizeof ifr.ifr_name);
984263bc 2473 if (error == 0)
f23061d4
JH
2474 error = copyout(sa, &ifrp->ifr_addr,
2475 sa->sa_len);
984263bc
MD
2476 ifrp = (struct ifreq *)
2477 (sa->sa_len + (caddr_t)&ifrp->ifr_addr);
2478 }
9a74b592 2479 IFAFREE(ifa);
984263bc
MD
2480 if (error)
2481 break;
f23061d4 2482 space -= sizeof ifr;
984263bc 2483 }
9a74b592 2484 TAILQ_REMOVE(head, ifac_mark, ifa_link);
984263bc
MD
2485 if (error)
2486 break;
2487 if (!addrs) {
f23061d4
JH
2488 bzero(&ifr.ifr_addr, sizeof ifr.ifr_addr);
2489 error = copyout(&ifr, ifrp, sizeof ifr);
984263bc
MD
2490 if (error)
2491 break;
f23061d4 2492 space -= sizeof ifr;
984263bc
MD
2493 ifrp++;
2494 }
2495 }
b4051e25
SZ
2496 ifnet_unlock();
2497
984263bc
MD
2498 ifc->ifc_len -= space;
2499 return (error);
2500}
2501
2502/*
2503 * Just like if_promisc(), but for all-multicast-reception mode.
2504 */
2505int
f23061d4 2506if_allmulti(struct ifnet *ifp, int onswitch)
984263bc
MD
2507{
2508 int error = 0;
984263bc
MD
2509 struct ifreq ifr;
2510
4986965b
JS
2511 crit_enter();
2512
984263bc
MD
2513 if (onswitch) {
2514 if (ifp->if_amcount++ == 0) {
2515 ifp->if_flags |= IFF_ALLMULTI;
2516 ifr.ifr_flags = ifp->if_flags;
46f25451 2517 ifr.ifr_flagshigh = ifp->if_flags >> 16;
a3dd34d2 2518 ifnet_serialize_all(ifp);
bd4539cc 2519 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2038fb68 2520 NULL);
a3dd34d2 2521 ifnet_deserialize_all(ifp);
984263bc
MD
2522 }
2523 } else {
2524 if (ifp->if_amcount > 1) {
2525 ifp->if_amcount--;
2526 } else {
2527 ifp->if_amcount = 0;
2528 ifp->if_flags &= ~IFF_ALLMULTI;
2529 ifr.ifr_flags = ifp->if_flags;
46f25451 2530 ifr.ifr_flagshigh = ifp->if_flags >> 16;
a3dd34d2 2531 ifnet_serialize_all(ifp);
bd4539cc 2532 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2038fb68 2533 NULL);
a3dd34d2 2534 ifnet_deserialize_all(ifp);
984263bc
MD
2535 }
2536 }
4986965b
JS
2537
2538 crit_exit();
984263bc
MD
2539
2540 if (error == 0)
2541 rt_ifmsg(ifp);
2542 return error;
2543}
2544
2545/*
2546 * Add a multicast listenership to the interface in question.
2547 * The link layer provides a routine which converts
2548 */
2549int
72659ed0
SZ
2550if_addmulti_serialized(struct ifnet *ifp, struct sockaddr *sa,
2551 struct ifmultiaddr **retifma)
984263bc
MD
2552{
2553 struct sockaddr *llsa, *dupsa;
4986965b 2554 int error;
984263bc
MD
2555 struct ifmultiaddr *ifma;
2556
72659ed0
SZ
2557 ASSERT_IFNET_SERIALIZED_ALL(ifp);
2558
984263bc
MD
2559 /*
2560 * If the matching multicast address already exists
2561 * then don't add a new one, just add a reference
2562 */
441d34b2 2563 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
0c3c561c 2564 if (sa_equal(sa, ifma->ifma_addr)) {
984263bc
MD
2565 ifma->ifma_refcount++;
2566 if (retifma)
2567 *retifma = ifma;
2568 return 0;
2569 }
2570 }
2571
2572 /*
2573 * Give the link layer a chance to accept/reject it, and also
2574 * find out which AF_LINK address this maps to, if it isn't one
2575 * already.
2576 */
2577 if (ifp->if_resolvemulti) {
2578 error = ifp->if_resolvemulti(ifp, &llsa, sa);
72659ed0 2579 if (error)
78195a76 2580 return error;
984263bc 2581 } else {
4090d6ff 2582 llsa = NULL;
984263bc
MD
2583 }
2584
c1e12ca9
SZ
2585 ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_INTWAIT);
2586 dupsa = kmalloc(sa->sa_len, M_IFMADDR, M_INTWAIT);
984263bc
MD
2587 bcopy(sa, dupsa, sa->sa_len);
2588
2589 ifma->ifma_addr = dupsa;
2590 ifma->ifma_lladdr = llsa;
2591 ifma->ifma_ifp = ifp;
2592 ifma->ifma_refcount = 1;
e333f801 2593 ifma->ifma_protospec = NULL;
984263bc
MD
2594 rt_newmaddrmsg(RTM_NEWMADDR, ifma);
2595
441d34b2 2596 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
6cd0715f
RP
2597 if (retifma)
2598 *retifma = ifma;
984263bc 2599
4090d6ff 2600 if (llsa != NULL) {
441d34b2 2601 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
0c3c561c 2602 if (sa_equal(ifma->ifma_addr, llsa))
984263bc
MD
2603 break;
2604 }
2605 if (ifma) {
2606 ifma->ifma_refcount++;
2607 } else {
c1e12ca9
SZ
2608 ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_INTWAIT);
2609 dupsa = kmalloc(llsa->sa_len, M_IFMADDR, M_INTWAIT);
984263bc
MD
2610 bcopy(llsa, dupsa, llsa->sa_len);
2611 ifma->ifma_addr = dupsa;
2612 ifma->ifma_ifp = ifp;
2613 ifma->ifma_refcount = 1;
441d34b2 2614 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
984263bc
MD
2615 }
2616 }
2617 /*
2618 * We are certain we have added something, so call down to the
2619 * interface to let them know about it.
2620 */
6cd0715f
RP
2621 if (ifp->if_ioctl)
2622 ifp->if_ioctl(ifp, SIOCADDMULTI, 0, NULL);
984263bc
MD
2623
2624 return 0;
2625}
2626
72659ed0
SZ
2627int
2628if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
2629 struct ifmultiaddr **retifma)
2630{
2631 int error;
2632
2633 ifnet_serialize_all(ifp);
2634 error = if_addmulti_serialized(ifp, sa, retifma);
2635 ifnet_deserialize_all(ifp);
2636
2637 return error;
2638}
2639
984263bc
MD
2640/*
2641 * Remove a reference to a multicast address on this interface. Yell
2642 * if the request does not match an existing membership.
2643 */
72659ed0
SZ
2644static int
2645if_delmulti_serialized(struct ifnet *ifp, struct sockaddr *sa)
984263bc
MD
2646{
2647 struct ifmultiaddr *ifma;
984263bc 2648
72659ed0
SZ
2649 ASSERT_IFNET_SERIALIZED_ALL(ifp);
2650
441d34b2 2651 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
0c3c561c 2652 if (sa_equal(sa, ifma->ifma_addr))
984263bc 2653 break;
4090d6ff 2654 if (ifma == NULL)
984263bc
MD
2655 return ENOENT;
2656
2657 if (ifma->ifma_refcount > 1) {
2658 ifma->ifma_refcount--;
2659 return 0;
2660 }
2661
2662 rt_newmaddrmsg(RTM_DELMADDR, ifma);
2663 sa = ifma->ifma_lladdr;
441d34b2 2664 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
984263bc
MD
2665 /*
2666 * Make sure the interface driver is notified
2667 * in the case of a link layer mcast group being left.
2668 */
72659ed0 2669 if (ifma->ifma_addr->sa_family == AF_LINK && sa == NULL)
2038fb68 2670 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL);
efda3bd0
MD
2671 kfree(ifma->ifma_addr, M_IFMADDR);
2672 kfree(ifma, M_IFMADDR);
4090d6ff 2673 if (sa == NULL)
984263bc
MD
2674 return 0;
2675
2676 /*
2677 * Now look for the link-layer address which corresponds to
2678 * this network address. It had been squirreled away in
2679 * ifma->ifma_lladdr for this purpose (so we don't have
2680 * to call ifp->if_resolvemulti() again), and we saved that
2681 * value in sa above. If some nasty deleted the
2682 * link-layer address out from underneath us, we can deal because
2683 * the address we stored was is not the same as the one which was
2684 * in the record for the link-layer address. (So we don't complain
2685 * in that case.)
2686 */
441d34b2 2687 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
0c3c561c 2688 if (sa_equal(sa, ifma->ifma_addr))
984263bc 2689 break;
4090d6ff 2690 if (ifma == NULL)
984263bc
MD
2691 return 0;
2692
2693 if (ifma->ifma_refcount > 1) {
2694 ifma->ifma_refcount--;
2695 return 0;
2696 }
2697
441d34b2 2698 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
2038fb68 2699 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL);
efda3bd0
MD
2700 kfree(ifma->ifma_addr, M_IFMADDR);
2701 kfree(sa, M_IFMADDR);
2702 kfree(ifma, M_IFMADDR);
984263bc
MD
2703
2704 return 0;
2705}
2706
72659ed0
SZ
2707int
2708if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
2709{
2710 int error;
2711
2712 ifnet_serialize_all(ifp);
2713 error = if_delmulti_serialized(ifp, sa);
2714 ifnet_deserialize_all(ifp);
2715
2716 return error;
2717}
2718
3976c93a
RP
2719/*
2720 * Delete all multicast group membership for an interface.
2721 * Should be used to quickly flush all multicast filters.
2722 */
2723void
72659ed0 2724if_delallmulti_serialized(struct ifnet *ifp)
3976c93a 2725{
72659ed0
SZ
2726 struct ifmultiaddr *ifma, mark;
2727 struct sockaddr sa;
2728
2729 ASSERT_IFNET_SERIALIZED_ALL(ifp);
2730
2731 bzero(&sa, sizeof(sa));
2732 sa.sa_family = AF_UNSPEC;
2733 sa.sa_len = sizeof(sa);
2734
2735 bzero(&mark, sizeof(mark));
2736 mark.ifma_addr = &sa;
3976c93a 2737
72659ed0 2738 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, &mark, ifma_link);
72659ed0
SZ
2739 while ((ifma = TAILQ_NEXT(&mark, ifma_link)) != NULL) {
2740 TAILQ_REMOVE(&ifp->if_multiaddrs, &mark, ifma_link);
2741 TAILQ_INSERT_AFTER(&ifp->if_multiaddrs, ifma, &mark,
2742 ifma_link);
2743
2744 if (ifma->ifma_addr->sa_family == AF_UNSPEC)
2745 continue;
2746
2747 if_delmulti_serialized(ifp, ifma->ifma_addr);
2748 }
89d620aa 2749 TAILQ_REMOVE(&ifp->if_multiaddrs, &mark, ifma_link);
3976c93a
RP
2750}
2751
2752
984263bc
MD
2753/*
2754 * Set the link layer address on an interface.
2755 *
2756 * At this time we only support certain types of interfaces,
2757 * and we don't allow the length of the address to change.
2758 */
2759int
2760if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
2761{
2762 struct sockaddr_dl *sdl;
984263bc
MD
2763 struct ifreq ifr;
2764
f2682cb9 2765 sdl = IF_LLSOCKADDR(ifp);
984263bc
MD
2766 if (sdl == NULL)
2767 return (EINVAL);
2768 if (len != sdl->sdl_alen) /* don't allow length to change */
2769 return (EINVAL);
2770 switch (ifp->if_type) {
2771 case IFT_ETHER: /* these types use struct arpcom */
984263bc 2772 case IFT_XETHER:
984263bc 2773 case IFT_L2VLAN:
50b1e235 2774 case IFT_IEEE8023ADLAG:
984263bc 2775 bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len);
984263bc
MD
2776 bcopy(lladdr, LLADDR(sdl), len);
2777 break;
2778 default:
2779 return (ENODEV);
2780 }
2781 /*
2782 * If the interface is already up, we need
2783 * to re-init it in order to reprogram its
2784 * address filter.
2785 */
a3dd34d2 2786 ifnet_serialize_all(ifp);
984263bc 2787 if ((ifp->if_flags & IFF_UP) != 0) {
c97d9b76 2788#ifdef INET
b2632176 2789 struct ifaddr_container *ifac;
c97d9b76 2790#endif
b2632176 2791
984263bc
MD
2792 ifp->if_flags &= ~IFF_UP;
2793 ifr.ifr_flags = ifp->if_flags;
46f25451 2794 ifr.ifr_flagshigh = ifp->if_flags >> 16;
78195a76 2795 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2038fb68 2796 NULL);
984263bc
MD
2797 ifp->if_flags |= IFF_UP;
2798 ifr.ifr_flags = ifp->if_flags;
46f25451 2799 ifr.ifr_flagshigh = ifp->if_flags >> 16;
78195a76 2800 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2038fb68 2801 NULL);
984263bc
MD
2802#ifdef INET
2803 /*
2804 * Also send gratuitous ARPs to notify other nodes about
2805 * the address change.
2806 */
b2632176
SZ
2807 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
2808 struct ifaddr *ifa = ifac->ifa;
2809
984263bc
MD
2810 if (ifa->ifa_addr != NULL &&
2811 ifa->ifa_addr->sa_family == AF_INET)
69b66ae8 2812 arp_gratuitous(ifp, ifa);
984263bc
MD
2813 }
2814#endif
2815 }
a3dd34d2 2816 ifnet_deserialize_all(ifp);
984263bc
MD
2817 return (0);
2818}
2819
c42bebbd
RM
2820
2821/*
2822 * Locate an interface based on a complete address.
2823 */
2824struct ifnet *
2825if_bylla(const void *lla, unsigned char lla_len)
2826{
2827 const struct ifnet_array *arr;
2828 struct ifnet *ifp;
2829 struct sockaddr_dl *sdl;
2830 int i;
2831
2832 arr = ifnet_array_get();
2833 for (i = 0; i < arr->ifnet_count; ++i) {
2834 ifp = arr->ifnet_arr[i];
2835 if (ifp->if_addrlen != lla_len)
2836 continue;
2837
2838 sdl = IF_LLSOCKADDR(ifp);
2839 if (memcmp(lla, LLADDR(sdl), lla_len) == 0)
2840 return (ifp);
2841 }
2842 return (NULL);
2843}
2844
984263bc 2845struct ifmultiaddr *
f23061d4 2846ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp)
984263bc
MD
2847{
2848 struct ifmultiaddr *ifma;
2849
72659ed0
SZ
2850 /* TODO: need ifnet_serialize_main */
2851 ifnet_serialize_all(ifp);
441d34b2 2852 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
0c3c561c 2853 if (sa_equal(ifma->ifma_addr, sa))
984263bc 2854 break;
72659ed0 2855 ifnet_deserialize_all(ifp);
984263bc
MD
2856
2857 return ifma;
2858}
2859
e9bd1548
MD
2860/*
2861 * This function locates the first real ethernet MAC from a network
2862 * card and loads it into node, returning 0 on success or ENOENT if
2863 * no suitable interfaces were found. It is used by the uuid code to
2864 * generate a unique 6-byte number.
2865 */
2866int
2867if_getanyethermac(uint16_t *node, int minlen)
2868{
2869 struct ifnet *ifp;
2870 struct sockaddr_dl *sdl;
2871
b4051e25
SZ
2872 ifnet_lock();
2873 TAILQ_FOREACH(ifp, &ifnetlist, if_link) {
e9bd1548
MD
2874 if (ifp->if_type != IFT_ETHER)
2875 continue;
2876 sdl = IF_LLSOCKADDR(ifp);
2877 if (sdl->sdl_alen < minlen)
2878 continue;
2879 bcopy(((struct arpcom *)ifp->if_softc)->ac_enaddr, node,
2880 minlen);
b4051e25 2881 ifnet_unlock();
e9bd1548
MD
2882 return(0);
2883 }
b4051e25 2884 ifnet_unlock();
e9bd1548
MD
2885 return (ENOENT);
2886}
2887
1550dfd9
MD
2888/*
2889 * The name argument must be a pointer to storage which will last as
2890 * long as the interface does. For physical devices, the result of
2891 * device_get_name(dev) is a good choice and for pseudo-devices a
2892 * static string works well.
2893 */
2894void
2895if_initname(struct ifnet *ifp, const char *name, int unit)
2896{
3e4a09e7
MD
2897 ifp->if_dname = name;
2898 ifp->if_dunit = unit;
1550dfd9 2899 if (unit != IF_DUNIT_NONE)
f8c7a42d 2900 ksnprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
1550dfd9
MD
2901 else
2902 strlcpy(ifp->if_xname, name, IFNAMSIZ);
2903}
2904
984263bc
MD
2905int
2906if_printf(struct ifnet *ifp, const char *fmt, ...)
2907{
e2565a42 2908 __va_list ap;
984263bc
MD
2909 int retval;
2910
4b1cf444 2911 retval = kprintf("%s: ", ifp->if_xname);
e2565a42 2912 __va_start(ap, fmt);
379210cb 2913 retval += kvprintf(fmt, ap);
e2565a42 2914 __va_end(ap);
984263bc
MD
2915 return (retval);
2916}
2917
cb80735c
RP
2918struct ifnet *
2919if_alloc(uint8_t type)
2920{
2949c680 2921 struct ifnet *ifp;
7e395935 2922 size_t size;
cb80735c 2923
7e395935
MD
2924 /*
2925 * XXX temporary hack until arpcom is setup in if_l2com
2926 */
2927 if (type == IFT_ETHER)
2928 size = sizeof(struct arpcom);
2929 else
2930 size = sizeof(struct ifnet);
2931
2932 ifp = kmalloc(size, M_IFNET, M_WAITOK|M_ZERO);
cb80735c
RP
2933
2934 ifp->if_type = type;
2935
aeb3c11e
RP
2936 if (if_com_alloc[type] != NULL) {
2937 ifp->if_l2com = if_com_alloc[type](type, ifp);
2938 if (ifp->if_l2com == NULL) {
2939 kfree(ifp, M_IFNET);
2940 return (NULL);
2941 }
2942 }
cb80735c
RP
2943 return (ifp);
2944}
2945
2946void
2947if_free(struct ifnet *ifp)
2948{
2949 kfree(ifp, M_IFNET);
2950}
2951
b2f93efe
JS
2952void
2953ifq_set_classic(struct ifaltq *ifq)
2954{
2cc2f639
SZ
2955 ifq_set_methods(ifq, ifq->altq_ifp->if_mapsubq,
2956 ifsq_classic_enqueue, ifsq_classic_dequeue, ifsq_classic_request);
f0a26983
SZ
2957}
2958
2959void
2cc2f639
SZ
2960ifq_set_methods(struct ifaltq *ifq, altq_mapsubq_t mapsubq,
2961 ifsq_enqueue_t enqueue, ifsq_dequeue_t dequeue, ifsq_request_t request)
f0a26983
SZ
2962{
2963 int q;
2964
2cc2f639
SZ
2965 KASSERT(mapsubq != NULL, ("mapsubq is not specified"));
2966 KASSERT(enqueue != NULL, ("enqueue is not specified"));
2967 KASSERT(dequeue != NULL, ("dequeue is not specified"));
2968 KASSERT(request != NULL, ("request is not specified"));
2969
2970 ifq->altq_mapsubq = mapsubq;
f0a26983
SZ
2971 for (q = 0; q < ifq->altq_subq_cnt; ++q) {
2972 struct ifaltq_subque *ifsq = &ifq->altq_subq[q];
2973
2974 ifsq->ifsq_enqueue = enqueue;
2975 ifsq->ifsq_dequeue = dequeue;
2976 ifsq->ifsq_request = request;
2977 }
b2f93efe
JS
2978}
2979
4cc8caef
SZ
2980static void
2981ifsq_norm_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m)
2982{
e7d68516
SZ
2983
2984 classq_add(&ifsq->ifsq_norm, m);
4cc8caef
SZ
2985 ALTQ_SQ_CNTR_INC(ifsq, m->m_pkthdr.len);
2986}
2987
2988static void
2989ifsq_prio_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m)
2990{
e7d68516
SZ
2991
2992 classq_add(&ifsq->ifsq_prio, m);
4cc8caef
SZ
2993 ALTQ_SQ_CNTR_INC(ifsq, m->m_pkthdr.len);
2994 ALTQ_SQ_PRIO_CNTR_INC(ifsq, m->m_pkthdr.len);
2995}
2996
2997static struct mbuf *
2998ifsq_norm_dequeue(struct ifaltq_subque *ifsq)
2999{
3000 struct mbuf *m;
3001
e7d68516
SZ
3002 m = classq_get(&ifsq->ifsq_norm);
3003 if (m != NULL)
4cc8caef 3004 ALTQ_SQ_CNTR_DEC(ifsq, m->m_pkthdr.len);
e7d68516 3005 return (m);
4cc8caef
SZ
3006}
3007
3008static struct mbuf *
3009ifsq_prio_dequeue(struct ifaltq_subque *ifsq)
3010{
3011 struct mbuf *m;
3012
e7d68516 3013 m = classq_get(&ifsq->ifsq_prio);
4cc8caef 3014 if (m != NULL) {
4cc8caef
SZ
3015 ALTQ_SQ_CNTR_DEC(ifsq, m->m_pkthdr.len);
3016 ALTQ_SQ_PRIO_CNTR_DEC(ifsq, m->m_pkthdr.len);
3017 }
e7d68516 3018 return (m);
4cc8caef
SZ
3019}
3020
9db4b353 3021int
f0a26983
SZ
3022ifsq_classic_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m,
3023 struct altq_pktattr *pa __unused)
e3e4574a 3024{
2739afc4 3025
0ec85f2e 3026 M_ASSERTPKTHDR(m);
2739afc4 3027again:
68dc1916
SZ
3028 if (ifsq->ifsq_len >= ifsq->ifsq_maxlen ||
3029 ifsq->ifsq_bcnt >= ifsq->ifsq_maxbcnt) {
2739afc4
SZ
3030 struct mbuf *m_drop;
3031
3032 if (m->m_flags & M_PRIO) {
3033 m_drop = NULL;
3034 if (ifsq->ifsq_prio_len < (ifsq->ifsq_maxlen >> 1) &&
3035 ifsq->ifsq_prio_bcnt < (ifsq->ifsq_maxbcnt >> 1)) {
3036 /* Try dropping some from normal queue. */
3037 m_drop = ifsq_norm_dequeue(ifsq);
4cc8caef 3038 }
2739afc4
SZ
3039 if (m_drop == NULL)
3040 m_drop = ifsq_prio_dequeue(ifsq);
3041 } else {
3042 m_drop = ifsq_norm_dequeue(ifsq);
3043 }
3044 if (m_drop != NULL) {
3045 IFNET_STAT_INC(ifsq->ifsq_ifp, oqdrops, 1);
3046 m_freem(m_drop);
3047 goto again;
4cc8caef 3048 }
2739afc4
SZ
3049 /*
3050 * No old packets could be dropped!
3051 * NOTE: Caller increases oqdrops.
3052 */
e3e4574a 3053 m_freem(m);
2739afc4 3054 return (ENOBUFS);
e3e4574a 3055 } else {
4cc8caef
SZ
3056 if (m->m_flags & M_PRIO)
3057 ifsq_prio_enqueue(ifsq, m);
338bb46c 3058 else
4cc8caef 3059 ifsq_norm_enqueue(ifsq, m);
2739afc4 3060 return (0);
0ec85f2e 3061 }
e3e4574a
JS
3062}
3063
9db4b353 3064struct mbuf *
6dadc833 3065ifsq_classic_dequeue(struct ifaltq_subque *ifsq, int op)
e3e4574a
JS
3066{
3067 struct mbuf *m;
3068
3069 switch (op) {
3070 case ALTDQ_POLL:
e7d68516 3071 m = classq_head(&ifsq->ifsq_prio);
4cc8caef 3072 if (m == NULL)
e7d68516 3073 m = classq_head(&ifsq->ifsq_norm);
e3e4574a 3074 break;
338bb46c 3075
e3e4574a 3076 case ALTDQ_REMOVE:
4cc8caef
SZ
3077 m = ifsq_prio_dequeue(ifsq);
3078 if (m == NULL)
3079 m = ifsq_norm_dequeue(ifsq);
e3e4574a 3080 break;
338bb46c 3081
e3e4574a
JS
3082 default:
3083 panic("unsupported ALTQ dequeue op: %d", op);
3084 }
338bb46c 3085 return m;
e3e4574a
JS
3086}
3087
9db4b353 3088int
f0a26983 3089ifsq_classic_request(struct ifaltq_subque *ifsq, int req, void *arg)
e3e4574a
JS
3090{
3091 switch (req) {
3092 case ALTRQ_PURGE:
338bb46c
SZ
3093 for (;;) {
3094 struct mbuf *m;
3095
6dadc833 3096 m = ifsq_classic_dequeue(ifsq, ALTDQ_REMOVE);
338bb46c
SZ
3097 if (m == NULL)
3098 break;
3099 m_freem(m);
3100 }
e3e4574a 3101 break;
338bb46c 3102
e3e4574a 3103 default:
3f625015 3104 panic("unsupported ALTQ request: %d", req);
e3e4574a 3105 }
338bb46c 3106 return 0;
e3e4574a 3107}
b2632176 3108
28cc0c29 3109static void
f0a26983 3110ifsq_ifstart_try(struct ifaltq_subque *ifsq, int force_sched)
28cc0c29 3111{
f0a26983 3112 struct ifnet *ifp = ifsq_get_ifp(ifsq);
28cc0c29
SZ
3113 int running = 0, need_sched;
3114
3115 /*
5c593c2a
SZ
3116 * Try to do direct ifnet.if_start on the subqueue first, if there is
3117 * contention on the subqueue hardware serializer, ifnet.if_start on
3118 * the subqueue will be scheduled on the subqueue owner CPU.
28cc0c29 3119 */
bfefe4a6 3120 if (!ifsq_tryserialize_hw(ifsq)) {
28cc0c29 3121 /*
5c593c2a
SZ
3122 * Subqueue hardware serializer contention happened,
3123 * ifnet.if_start on the subqueue is scheduled on
3124 * the subqueue owner CPU, and we keep going.
28cc0c29 3125 */
f0a26983 3126 ifsq_ifstart_schedule(ifsq, 1);
28cc0c29
SZ
3127 return;
3128 }
3129
f0a26983
SZ
3130 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) {
3131 ifp->if_start(ifp, ifsq);
3132 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq))
28cc0c29
SZ
3133 running = 1;
3134 }
f0a26983 3135 need_sched = ifsq_ifstart_need_schedule(ifsq, running);
28cc0c29 3136
bfefe4a6 3137 ifsq_deserialize_hw(ifsq);
28cc0c29
SZ
3138
3139 if (need_sched) {
3140 /*
5c593c2a
SZ
3141 * More data need to be transmitted, ifnet.if_start on the
3142 * subqueue is scheduled on the subqueue owner CPU, and we
3143 * keep going.
3144 * NOTE: ifnet.if_start subqueue interlock is not released.
28cc0c29 3145 */
f0a26983 3146 ifsq_ifstart_schedule(ifsq, force_sched);
28cc0c29
SZ
3147 }
3148}
3149
2aa7f7f8 3150/*
5c593c2a 3151 * Subqeue packets staging mechanism:
2aa7f7f8 3152 *
5c593c2a
SZ
3153 * The packets enqueued into the subqueue are staged to a certain amount
3154 * before the ifnet.if_start on the subqueue is called. In this way, the
3155 * driver could avoid writing to hardware registers upon every packet,
3156 * instead, hardware registers could be written when certain amount of
3157 * packets are put onto hardware TX ring. The measurement on several modern
3158 * NICs (emx(4), igb(4), bnx(4), bge(4), jme(4)) shows that the hardware
3159 * registers writing aggregation could save ~20% CPU time when 18bytes UDP
3160 * datagrams are transmitted at 1.48Mpps. The performance improvement by
3161 * hardware registers writing aggeregation is also mentioned by Luigi Rizzo's
3162 * netmap paper (http://info.iet.unipi.it/~luigi/netmap/).
2aa7f7f8 3163 *
5c593c2a 3164 * Subqueue packets staging is performed for two entry points into drivers'
2aa7f7f8 3165 * transmission function:
5c593c2a
SZ
3166 * - Direct ifnet.if_start calling on the subqueue, i.e. ifsq_ifstart_try()
3167 * - ifnet.if_start scheduling on the subqueue, i.e. ifsq_ifstart_schedule()
2aa7f7f8 3168 *
5c593c2a
SZ
3169 * Subqueue packets staging will be stopped upon any of the following
3170 * conditions:
2aa7f7f8 3171 * - If the count of packets enqueued on the current CPU is great than or
f0a26983 3172 * equal to ifsq_stage_cntmax. (XXX this should be per-interface)
2aa7f7f8
SZ
3173 * - If the total length of packets enqueued on the current CPU is great
3174 * than or equal to the hardware's MTU - max_protohdr. max_protohdr is
3175 * cut from the hardware's MTU mainly bacause a full TCP segment's size
3176 * is usually less than hardware's MTU.
5c593c2a
SZ
3177 * - ifsq_ifstart_schedule() is not pending on the current CPU and
3178 * ifnet.if_start subqueue interlock (ifaltq_subq.ifsq_started) is not
3179 * released.
2aa7f7f8
SZ
3180 * - The if_start_rollup(), which is registered as low priority netisr
3181 * rollup function, is called; probably because no more work is pending
3182 * for netisr.
3183 *
3184 * NOTE:
5c593c2a 3185 * Currently subqueue packet staging is only performed in netisr threads.
2aa7f7f8 3186 */
9db4b353
SZ
3187int
3188ifq_dispatch(struct ifnet *ifp, struct mbuf *m, struct altq_pktattr *pa)
3189{
3190 struct ifaltq *ifq = &ifp->if_snd;
f0a26983 3191 struct ifaltq_subque *ifsq;
28cc0c29 3192 int error, start = 0, len, mcast = 0, avoid_start = 0;
f0a26983
SZ
3193 struct ifsubq_stage_head *head = NULL;
3194 struct ifsubq_stage *stage = NULL;
ac7fc6f0
SZ
3195 struct globaldata *gd = mycpu;
3196 struct thread *td = gd->gd_curthread;
3197
3198 crit_enter_quick(td);
57dff79c 3199
ac7fc6f0 3200 ifsq = ifq_map_subq(ifq, gd->gd_cpuid);
bfefe4a6 3201 ASSERT_ALTQ_SQ_NOT_SERIALIZED_HW(ifsq);
9db4b353 3202
fe53d127
SZ
3203 len = m->m_pkthdr.len;
3204 if (m->m_flags & M_MCAST)
3205 mcast = 1;
3206
ac7fc6f0 3207 if (td->td_type == TD_TYPE_NETISR) {
f0a26983
SZ
3208 head = &ifsubq_stage_heads[mycpuid];
3209 stage = ifsq_get_stage(ifsq, mycpuid);
28cc0c29 3210
f0a26983
SZ
3211 stage->stg_cnt++;
3212 stage->stg_len += len;
3213 if (stage->stg_cnt < ifsq_stage_cntmax &&
3214 stage->stg_len < (ifp->if_mtu - max_protohdr))
28cc0c29
SZ
3215 avoid_start = 1;
3216 }
3217
f0a26983
SZ
3218 ALTQ_SQ_LOCK(ifsq);
3219 error = ifsq_enqueue_locked(ifsq, m, pa);
9db4b353 3220 if (error) {
6de344ba 3221 IFNET_STAT_INC(ifp, oqdrops, 1);
f0a26983
SZ
3222 if (!ifsq_data_ready(ifsq)) {
3223 ALTQ_SQ_UNLOCK(ifsq);
ac7fc6f0 3224 crit_exit_quick(td);
087561ef
SZ
3225 return error;
3226 }
28cc0c29 3227 avoid_start = 0;
9db4b353 3228 }
f0a26983 3229 if (!ifsq_is_started(ifsq)) {
28cc0c29 3230 if (avoid_start) {
f0a26983 3231 ALTQ_SQ_UNLOCK(ifsq);
28cc0c29
SZ
3232
3233 KKASSERT(!error);
f0a26983
SZ
3234 if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0)
3235 ifsq_stage_insert(head, stage);
28cc0c29 3236
d40991ef 3237 IFNET_STAT_INC(ifp, obytes, len);
28cc0c29 3238 if (mcast)
d40991ef 3239 IFNET_STAT_INC(ifp, omcasts, 1);
ac7fc6f0 3240 crit_exit_quick(td);
28cc0c29
SZ
3241 return error;
3242 }
3243
9db4b353 3244 /*
5c593c2a 3245 * Hold the subqueue interlock of ifnet.if_start
9db4b353 3246 */
f0a26983 3247 ifsq_set_started(ifsq);
9db4b353
SZ
3248 start = 1;
3249 }
f0a26983 3250 ALTQ_SQ_UNLOCK(ifsq);
9db4b353 3251
fe53d127 3252 if (!error) {
d40991ef 3253 IFNET_STAT_INC(ifp, obytes, len);
fe53d127 3254 if (mcast)
d40991ef 3255 IFNET_STAT_INC(ifp, omcasts, 1);
fe53d127 3256 }
9db4b353 3257
28cc0c29 3258 if (stage != NULL) {
f0a26983
SZ
3259 if (!start && (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED)) {
3260 KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED);
3cab6b0d 3261 if (!avoid_start) {
f0a26983
SZ
3262 ifsq_stage_remove(head, stage);
3263 ifsq_ifstart_schedule(ifsq, 1);
3cab6b0d 3264 }
ac7fc6f0 3265 crit_exit_quick(td);
3cab6b0d
SZ
3266 return error;
3267 }
3268
f0a26983
SZ
3269 if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED) {
3270 ifsq_stage_remove(head, stage);
28cc0c29 3271 } else {
f0a26983
SZ
3272 stage->stg_cnt = 0;
3273 stage->stg_len = 0;
28cc0c29 3274 }
9db4b353
SZ
3275 }
3276
ac7fc6f0
SZ
3277 if (!start) {
3278 crit_exit_quick(td);
087561ef 3279 return error;
ac7fc6f0 3280 }
9db4b353 3281
f0a26983 3282 ifsq_ifstart_try(ifsq, 0);
ac7fc6f0
SZ
3283
3284 crit_exit_quick(td);
087561ef 3285 return error;
9db4b353
SZ
3286}
3287
b2632176 3288void *
52fbd92a 3289ifa_create(int size)
b2632176
SZ
3290{
3291 struct ifaddr *ifa;
3292 int i;
3293
ed20d0e3 3294 KASSERT(size >= sizeof(*ifa), ("ifaddr size too small"));
b2632176 3295
52fbd92a 3296 ifa = kmalloc(size, M_IFADDR, M_INTWAIT | M_ZERO);
43dbcc2a
SZ
3297
3298 /*
3299 * Make ifa_container availabel on all CPUs, since they
3300 * could be accessed by any threads.
3301 */
7d1c3473 3302 ifa->ifa_containers =
62938642
MD
3303 kmalloc(ncpus * sizeof(struct ifaddr_container),
3304 M_IFADDR,
3305 M_INTWAIT | M_ZERO | M_CACHEALIGN);
52fbd92a 3306
d5a2b87c 3307 ifa->ifa_ncnt = ncpus;
b2632176
SZ
3308 for (i = 0; i < ncpus; ++i) {
3309 struct ifaddr_container *ifac = &ifa->ifa_containers[i];
3310
3311 ifac->ifa_magic = IFA_CONTAINER_MAGIC;
3312 ifac->ifa = ifa;
3313 ifac->ifa_refcnt = 1;
3314 }
3315#ifdef IFADDR_DEBUG
3316 kprintf("alloc ifa %p %d\n", ifa, size);
3317#endif
3318 return ifa;
3319}
3320
b2632176
SZ
3321void
3322ifac_free(struct ifaddr_container *ifac, int cpu_id)
3323{
d5a2b87c 3324 struct ifaddr *ifa = ifac->ifa;
b2632176
SZ
3325
3326 KKASSERT(ifac->ifa_magic == IFA_CONTAINER_MAGIC);
3327 KKASSERT(ifac->ifa_refcnt == 0);
40f667f2 3328 KASSERT(ifac->ifa_listmask == 0,
ed20d0e3 3329 ("ifa is still on %#x lists", ifac->ifa_listmask));
b2632176
SZ
3330
3331 ifac->ifa_magic = IFA_CONTAINER_DEAD;
3332
b2632176 3333#ifdef IFADDR_DEBUG_VERBOSE
8967ddc7 3334 kprintf("try free ifa %p cpu_id %d\n", ifac->ifa, cpu_id);
b2632176
SZ
3335#endif
3336
d5a2b87c 3337 KASSERT(ifa->ifa_ncnt > 0 && ifa->ifa_ncnt <= ncpus,
ed20d0e3 3338 ("invalid # of ifac, %d", ifa->ifa_ncnt));
d5a2b87c
SZ
3339 if (atomic_fetchadd_int(&ifa->ifa_ncnt, -1) == 1) {
3340#ifdef IFADDR_DEBUG
3341 kprintf("free ifa %p\n", ifa);
3342#endif
3343 kfree(ifa->ifa_containers, M_IFADDR);
3344 kfree(ifa, M_IFADDR);
3345 }
b2632176
SZ
3346}
3347
3348static void
002c1265 3349ifa_iflink_dispatch(netmsg_t nmsg)
b2632176
SZ
3350{
3351 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
3352 struct ifaddr *ifa = msg->ifa;
3353 struct ifnet *ifp = msg->ifp;
3354 int cpu = mycpuid;
40f667f2 3355 struct ifaddr_container *ifac;
b2632176
SZ
3356
3357 crit_enter();
23027d35 3358
40f667f2 3359 ifac = &ifa->ifa_containers[cpu];
2adb7bc2 3360 ASSERT_IFAC_VALID(ifac);
40f667f2 3361 KASSERT((ifac->ifa_listmask & IFA_LIST_IFADDRHEAD) == 0,
ed20d0e3 3362 ("ifaddr is on if_addrheads"));
23027d35 3363
40f667f2
SZ
3364 ifac->ifa_listmask |= IFA_LIST_IFADDRHEAD;
3365 if (msg->tail)
3366 TAILQ_INSERT_TAIL(&ifp->if_addrheads[cpu], ifac, ifa_link);
3367 else
3368 TAILQ_INSERT_HEAD(&ifp->if_addrheads[cpu], ifac, ifa_link);
23027d35 3369
b2632176
SZ
3370 crit_exit();
3371
c3b4f1bf 3372 netisr_forwardmsg_all(&nmsg->base, cpu + 1);
b2632176
SZ
3373}
3374
3375void
3376ifa_iflink(struct ifaddr *ifa, struct ifnet *ifp, int tail)
3377{
3378 struct netmsg_ifaddr msg;
3379
002c1265 3380 netmsg_init(&msg.base, NULL, &curthread->td_msgport,
48e7b118 3381 0, ifa_iflink_dispatch);
b2632176
SZ
3382 msg.ifa = ifa;
3383 msg.ifp = ifp;
3384 msg.tail = tail;
3385
92b34312 3386 netisr_domsg(&msg.base, 0);
b2632176
SZ
3387}
3388
3389static void
002c1265 3390ifa_ifunlink_dispatch(netmsg_t nmsg)
b2632176
SZ
3391{
3392 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
3393 struct ifaddr *ifa = msg->ifa;
3394 struct ifnet *ifp = msg->ifp;
3395 int cpu = mycpuid;
40f667f2 3396 struct ifaddr_container *ifac;
b2632176
SZ
3397
3398 crit_enter();
23027d35 3399
40f667f2 3400 ifac = &ifa->ifa_containers[cpu];
2adb7bc2 3401 ASSERT_IFAC_VALID(ifac);
40f667f2 3402 KASSERT(ifac->ifa_listmask & IFA_LIST_IFADDRHEAD,
ed20d0e3 3403 ("ifaddr is not on if_addrhead"));
23027d35 3404
40f667f2
SZ
3405 TAILQ_REMOVE(&ifp->if_addrheads[cpu], ifac, ifa_link);
3406 ifac->ifa_listmask &= ~IFA_LIST_IFADDRHEAD;
23027d35 3407
b2632176
SZ
3408 crit_exit();
3409
c3b4f1bf 3410 netisr_forwardmsg_all(&nmsg->base, cpu + 1);
b2632176
SZ
3411}
3412
3413void
3414ifa_ifunlink(struct ifaddr *ifa, struct ifnet *ifp)
3415{
3416 struct netmsg_ifaddr msg;
3417
002c1265 3418 netmsg_init(&msg.base, NULL, &curthread->td_msgport,
48e7b118 3419 0, ifa_ifunlink_dispatch);
b2632176
SZ
3420 msg.ifa = ifa;
3421 msg.ifp = ifp;
3422
92b34312 3423 netisr_domsg(&msg.base, 0);
b2632176
SZ
3424}
3425
3426static void
002c1265 3427ifa_destroy_dispatch(netmsg_t nmsg)
b2632176
SZ
3428{
3429 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
3430
3431 IFAFREE(msg->ifa);
c3b4f1bf 3432 netisr_forwardmsg_all(&nmsg->base, mycpuid + 1);
b2632176
SZ
3433}
3434
3435void
3436ifa_destroy(struct ifaddr *ifa)
3437{
3438 struct netmsg_ifaddr msg;
3439
002c1265 3440 netmsg_init(&msg.base, NULL, &curthread->td_msgport,
48e7b118 3441 0, ifa_destroy_dispatch);
b2632176
SZ
3442 msg.ifa = ifa;
3443
92b34312 3444 netisr_domsg(&msg.base, 0);
b2632176
SZ
3445}
3446
239bdb58
SZ
3447static void
3448if_start_rollup(void)
3449{
f0a26983
SZ
3450 struct ifsubq_stage_head *head = &ifsubq_stage_heads[mycpuid];
3451 struct ifsubq_stage *stage;
28cc0c29 3452
ac7fc6f0
SZ
3453 crit_enter();
3454
f0a26983
SZ
3455 while ((stage = TAILQ_FIRST(&head->stg_head)) != NULL) {
3456 struct ifaltq_subque *ifsq = stage->stg_subq;
3cab6b0d 3457 int is_sched = 0;
28cc0c29 3458
f0a26983 3459 if (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED)
3cab6b0d 3460 is_sched = 1;
f0a26983 3461 ifsq_stage_remove(head, stage);
28cc0c29 3462
3cab6b0d 3463 if (is_sched) {
f0a26983 3464 ifsq_ifstart_schedule(ifsq, 1);
3cab6b0d
SZ
3465 } else {
3466 int start = 0;
28cc0c29 3467
f0a26983
SZ
3468 ALTQ_SQ_LOCK(ifsq);
3469 if (!ifsq_is_started(ifsq)) {
3cab6b0d 3470 /*
5c593c2a
SZ
3471 * Hold the subqueue interlock of
3472 * ifnet.if_start
3cab6b0d 3473 */
f0a26983 3474 ifsq_set_started(ifsq);
3cab6b0d
SZ
3475 start = 1;
3476 }
f0a26983 3477 ALTQ_SQ_UNLOCK(ifsq);
3cab6b0d
SZ
3478
3479 if (start)
f0a26983 3480 ifsq_ifstart_try(ifsq, 1);
3cab6b0d 3481 }
f0a26983
SZ
3482 KKASSERT((stage->stg_flags &
3483 (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0);
28cc0c29 3484 }
ac7fc6f0
SZ
3485
3486 crit_exit();
239bdb58