nrelease - fix/improve livecd
[dragonfly.git] / sys / net / if.c
CommitLineData
984263bc
MD
1/*
2 * Copyright (c) 1980, 1986, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
dc71b7ab 13 * 3. Neither the name of the University nor the names of its contributors
984263bc
MD
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * @(#)if.c 8.3 (Berkeley) 1/4/94
f23061d4 30 * $FreeBSD: src/sys/net/if.c,v 1.185 2004/03/13 02:35:03 brooks Exp $
984263bc
MD
31 */
32
984263bc
MD
33#include "opt_inet6.h"
34#include "opt_inet.h"
b3a7093f 35#include "opt_ifpoll.h"
984263bc
MD
36
37#include <sys/param.h>
38#include <sys/malloc.h>
39#include <sys/mbuf.h>
40#include <sys/systm.h>
41#include <sys/proc.h>
2b3f93ea 42#include <sys/caps.h>
6b6e0885 43#include <sys/protosw.h>
984263bc
MD
44#include <sys/socket.h>
45#include <sys/socketvar.h>
6b6e0885 46#include <sys/socketops.h>
984263bc 47#include <sys/kernel.h>
9db4b353 48#include <sys/ktr.h>
9683f229 49#include <sys/mutex.h>
233c8570 50#include <sys/lock.h>
984263bc
MD
51#include <sys/sockio.h>
52#include <sys/syslog.h>
53#include <sys/sysctl.h>
698ac46c 54#include <sys/domain.h>
e9cb6d99 55#include <sys/thread.h>
78195a76 56#include <sys/serialize.h>
71fc104f 57#include <sys/bus.h>
e1c6b0c1 58#include <sys/jail.h>
984263bc 59
9683f229
MD
60#include <sys/thread2.h>
61#include <sys/msgport2.h>
62#include <sys/mutex2.h>
63
984263bc
MD
64#include <net/if.h>
65#include <net/if_arp.h>
66#include <net/if_dl.h>
67#include <net/if_types.h>
68#include <net/if_var.h>
afc5d5f3 69#include <net/if_ringmap.h>
4d723e5a 70#include <net/ifq_var.h>
984263bc
MD
71#include <net/radix.h>
72#include <net/route.h>
65a24520 73#include <net/if_clone.h>
5337421c 74#include <net/netisr2.h>
b2632176
SZ
75#include <net/netmsg2.h>
76
d5a2b87c 77#include <machine/atomic.h>
984263bc 78#include <machine/stdarg.h>
b2632176 79#include <machine/smp.h>
984263bc
MD
80
81#if defined(INET) || defined(INET6)
984263bc
MD
82#include <netinet/in.h>
83#include <netinet/in_var.h>
84#include <netinet/if_ether.h>
85#ifdef INET6
984263bc
MD
86#include <netinet6/in6_var.h>
87#include <netinet6/in6_ifattach.h>
233c8570
AL
88#endif /* INET6 */
89#endif /* INET || INET6 */
984263bc 90
b2632176 91struct netmsg_ifaddr {
002c1265 92 struct netmsg_base base;
b2632176
SZ
93 struct ifaddr *ifa;
94 struct ifnet *ifp;
95 int tail;
96};
97
f0a26983
SZ
98struct ifsubq_stage_head {
99 TAILQ_HEAD(, ifsubq_stage) stg_head;
28cc0c29
SZ
100} __cachealign;
101
68732d8f
SZ
102struct if_ringmap {
103 int rm_cnt;
104 int rm_grid;
105 int rm_cpumap[];
106};
107
434f3dd0
SZ
108#define RINGMAP_FLAG_NONE 0x0
109#define RINGMAP_FLAG_POWEROF2 0x1
110
984263bc
MD
111/*
112 * System initialization
113 */
698ac46c
HS
114static void if_attachdomain(void *);
115static void if_attachdomain1(struct ifnet *);
436c57ea
SZ
116static int ifconf(u_long, caddr_t, struct ucred *);
117static void ifinit(void *);
90af4fd3 118static void ifnetinit(void *);
436c57ea 119static void if_slowtimo(void *);
436c57ea 120static int if_rtdel(struct radix_node *, void *);
b5df1a85 121static void if_slowtimo_dispatch(netmsg_t);
984263bc 122
8a248085
SZ
123/* Helper functions */
124static void ifsq_watchdog_reset(struct ifsubq_watchdog *);
72659ed0 125static int if_delmulti_serialized(struct ifnet *, struct sockaddr *);
b4051e25
SZ
126static struct ifnet_array *ifnet_array_alloc(int);
127static void ifnet_array_free(struct ifnet_array *);
128static struct ifnet_array *ifnet_array_add(struct ifnet *,
129 const struct ifnet_array *);
130static struct ifnet_array *ifnet_array_del(struct ifnet *,
131 const struct ifnet_array *);
233c8570
AL
132static struct ifg_group *if_creategroup(const char *);
133static int if_destroygroup(struct ifg_group *);
134static int if_delgroup_locked(struct ifnet *, const char *);
135static int if_getgroups(struct ifgroupreq *, struct ifnet *);
136static int if_getgroupmembers(struct ifgroupreq *);
8a248085 137
984263bc
MD
138#ifdef INET6
139/*
140 * XXX: declare here to avoid to include many inet6 related files..
141 * should be more generalized?
142 */
436c57ea 143extern void nd6_setmtu(struct ifnet *);
984263bc
MD
144#endif
145
436c57ea
SZ
146SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
147SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
68732d8f 148SYSCTL_NODE(_net_link, OID_AUTO, ringmap, CTLFLAG_RW, 0, "link ringmap");
436c57ea 149
335a88d5 150static int ifsq_stage_cntmax = 16;
f0a26983 151TUNABLE_INT("net.link.stage_cntmax", &ifsq_stage_cntmax);
28cc0c29 152SYSCTL_INT(_net_link, OID_AUTO, stage_cntmax, CTLFLAG_RW,
f0a26983 153 &ifsq_stage_cntmax, 0, "ifq staging packet count max");
28cc0c29 154
6517ec3f
SZ
155static int if_stats_compat = 0;
156SYSCTL_INT(_net_link, OID_AUTO, stats_compat, CTLFLAG_RW,
157 &if_stats_compat, 0, "Compat the old ifnet stats");
158
68732d8f
SZ
159static int if_ringmap_dumprdr = 0;
160SYSCTL_INT(_net_link_ringmap, OID_AUTO, dump_rdr, CTLFLAG_RW,
161 &if_ringmap_dumprdr, 0, "dump redirect table");
162
f6994c54
AHJ
163/* Interface description */
164static unsigned int ifdescr_maxlen = 1024;
165SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW,
166 &ifdescr_maxlen, 0,
167 "administrative maximum length for interface description");
168
f3f3eadb 169SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, ifinit, NULL);
3c5b1eb8 170SYSINIT(ifnet, SI_SUB_PRE_DRIVERS, SI_ORDER_ANY, ifnetinit, NULL);
436c57ea 171
2949c680
AL
172static if_com_alloc_t *if_com_alloc[256];
173static if_com_free_t *if_com_free[256];
aeb3c11e 174
436c57ea
SZ
175MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
176MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
cb80735c 177MALLOC_DEFINE(M_IFNET, "ifnet", "interface structure");
f6994c54 178MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions");
984263bc 179
436c57ea 180int ifqmaxlen = IFQ_MAXLEN;
b64bfcc3 181struct ifnethead ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
233c8570
AL
182struct ifgrouphead ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
183static struct lock ifgroup_lock;
984263bc 184
b4051e25
SZ
185static struct ifnet_array ifnet_array0;
186static struct ifnet_array *ifnet_array = &ifnet_array0;
187
b5df1a85
SZ
188static struct callout if_slowtimo_timer;
189static struct netmsg_base if_slowtimo_netmsg;
436c57ea
SZ
190
191int if_index = 0;
192struct ifnet **ifindex2ifnet = NULL;
cabfc9f6 193static struct mtx ifnet_mtx = MTX_INITIALIZER("ifnet");
abbb44bb 194
f0a26983 195static struct ifsubq_stage_head ifsubq_stage_heads[MAXCPU];
28cc0c29 196
f0a26983 197#ifdef notyet
9db4b353 198#define IFQ_KTR_STRING "ifq=%p"
2949c680 199#define IFQ_KTR_ARGS struct ifaltq *ifq
9db4b353
SZ
200#ifndef KTR_IFQ
201#define KTR_IFQ KTR_ALL
202#endif
203KTR_INFO_MASTER(ifq);
5bf48697
AE
204KTR_INFO(KTR_IFQ, ifq, enqueue, 0, IFQ_KTR_STRING, IFQ_KTR_ARGS);
205KTR_INFO(KTR_IFQ, ifq, dequeue, 1, IFQ_KTR_STRING, IFQ_KTR_ARGS);
9db4b353
SZ
206#define logifq(name, arg) KTR_LOG(ifq_ ## name, arg)
207
208#define IF_START_KTR_STRING "ifp=%p"
5bf48697 209#define IF_START_KTR_ARGS struct ifnet *ifp
9db4b353
SZ
210#ifndef KTR_IF_START
211#define KTR_IF_START KTR_ALL
212#endif
213KTR_INFO_MASTER(if_start);
214KTR_INFO(KTR_IF_START, if_start, run, 0,
5bf48697 215 IF_START_KTR_STRING, IF_START_KTR_ARGS);
9db4b353 216KTR_INFO(KTR_IF_START, if_start, sched, 1,
5bf48697 217 IF_START_KTR_STRING, IF_START_KTR_ARGS);
9db4b353 218KTR_INFO(KTR_IF_START, if_start, avoid, 2,
5bf48697 219 IF_START_KTR_STRING, IF_START_KTR_ARGS);
9db4b353 220KTR_INFO(KTR_IF_START, if_start, contend_sched, 3,
5bf48697 221 IF_START_KTR_STRING, IF_START_KTR_ARGS);
9db4b353 222KTR_INFO(KTR_IF_START, if_start, chase_sched, 4,
5bf48697 223 IF_START_KTR_STRING, IF_START_KTR_ARGS);
9db4b353 224#define logifstart(name, arg) KTR_LOG(if_start_ ## name, arg)
233c8570 225#endif /* notyet */
315a7da3 226
984263bc
MD
227/*
228 * Network interface utility routines.
229 *
230 * Routines with ifa_ifwith* names take sockaddr *'s as
231 * parameters.
232 */
2949c680 233/* ARGSUSED */
c660ad18 234static void
f23061d4 235ifinit(void *dummy)
984263bc 236{
233c8570 237 lockinit(&ifgroup_lock, "ifgroup", 0, 0);
984263bc 238
b5df1a85
SZ
239 callout_init_mp(&if_slowtimo_timer);
240 netmsg_init(&if_slowtimo_netmsg, NULL, &netisr_adone_rport,
241 MSGF_PRIORITY, if_slowtimo_dispatch);
abbb44bb 242
b5df1a85
SZ
243 /* Start if_slowtimo */
244 lwkt_sendmsg(netisr_cpuport(0), &if_slowtimo_netmsg.lmsg);
984263bc
MD
245}
246
9db4b353 247static void
f0a26983 248ifsq_ifstart_ipifunc(void *arg)
9db4b353 249{
f0a26983
SZ
250 struct ifaltq_subque *ifsq = arg;
251 struct lwkt_msg *lmsg = ifsq_get_ifstart_lmsg(ifsq, mycpuid);
9db4b353
SZ
252
253 crit_enter();
254 if (lmsg->ms_flags & MSGF_DONE)
f6192acf 255 lwkt_sendmsg_oncpu(netisr_cpuport(mycpuid), lmsg);
9db4b353
SZ
256 crit_exit();
257}
258
3cab6b0d 259static __inline void
f0a26983 260ifsq_stage_remove(struct ifsubq_stage_head *head, struct ifsubq_stage *stage)
3cab6b0d 261{
f0a26983
SZ
262 KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED);
263 TAILQ_REMOVE(&head->stg_head, stage, stg_link);
264 stage->stg_flags &= ~(IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED);
265 stage->stg_cnt = 0;
266 stage->stg_len = 0;
3cab6b0d
SZ
267}
268
269static __inline void
f0a26983 270ifsq_stage_insert(struct ifsubq_stage_head *head, struct ifsubq_stage *stage)
3cab6b0d 271{
f0a26983
SZ
272 KKASSERT((stage->stg_flags &
273 (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0);
274 stage->stg_flags |= IFSQ_STAGE_FLAG_QUED;
275 TAILQ_INSERT_TAIL(&head->stg_head, stage, stg_link);
3cab6b0d
SZ
276}
277
9db4b353 278/*
5c593c2a 279 * Schedule ifnet.if_start on the subqueue owner CPU
9db4b353
SZ
280 */
281static void
f0a26983 282ifsq_ifstart_schedule(struct ifaltq_subque *ifsq, int force)
9db4b353 283{
9db4b353
SZ
284 int cpu;
285
3cab6b0d 286 if (!force && curthread->td_type == TD_TYPE_NETISR &&
f0a26983
SZ
287 ifsq_stage_cntmax > 0) {
288 struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid);
289
290 stage->stg_cnt = 0;
291 stage->stg_len = 0;
292 if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0)
293 ifsq_stage_insert(&ifsubq_stage_heads[mycpuid], stage);
294 stage->stg_flags |= IFSQ_STAGE_FLAG_SCHED;
3cab6b0d
SZ
295 return;
296 }
297
f0a26983 298 cpu = ifsq_get_cpuid(ifsq);
9db4b353 299 if (cpu != mycpuid)
f0a26983 300 lwkt_send_ipiq(globaldata_find(cpu), ifsq_ifstart_ipifunc, ifsq);
9db4b353 301 else
f0a26983 302 ifsq_ifstart_ipifunc(ifsq);
9db4b353
SZ
303}
304
305/*
306 * NOTE:
5c593c2a
SZ
307 * This function will release ifnet.if_start subqueue interlock,
308 * if ifnet.if_start for the subqueue does not need to be scheduled
9db4b353
SZ
309 */
310static __inline int
f0a26983 311ifsq_ifstart_need_schedule(struct ifaltq_subque *ifsq, int running)
9db4b353 312{
f0a26983 313 if (!running || ifsq_is_empty(ifsq)
9db4b353 314#ifdef ALTQ
f0a26983 315 || ifsq->ifsq_altq->altq_tbr != NULL
9db4b353
SZ
316#endif
317 ) {
f0a26983 318 ALTQ_SQ_LOCK(ifsq);
9db4b353 319 /*
5c593c2a 320 * ifnet.if_start subqueue interlock is released, if:
9db4b353
SZ
321 * 1) Hardware can not take any packets, due to
322 * o interface is marked down
5c593c2a 323 * o hardware queue is full (ifsq_is_oactive)
9db4b353
SZ
324 * Under the second situation, hardware interrupt
325 * or polling(4) will call/schedule ifnet.if_start
5c593c2a
SZ
326 * on the subqueue when hardware queue is ready
327 * 2) There is no packet in the subqueue.
9db4b353 328 * Further ifq_dispatch or ifq_handoff will call/
5c593c2a 329 * schedule ifnet.if_start on the subqueue.
9db4b353
SZ
330 * 3) TBR is used and it does not allow further
331 * dequeueing.
5c593c2a
SZ
332 * TBR callout will call ifnet.if_start on the
333 * subqueue.
9db4b353 334 */
f0a26983
SZ
335 if (!running || !ifsq_data_ready(ifsq)) {
336 ifsq_clr_started(ifsq);
337 ALTQ_SQ_UNLOCK(ifsq);
9db4b353
SZ
338 return 0;
339 }
f0a26983 340 ALTQ_SQ_UNLOCK(ifsq);
9db4b353
SZ
341 }
342 return 1;
343}
344
345static void
f0a26983 346ifsq_ifstart_dispatch(netmsg_t msg)
9db4b353 347{
002c1265 348 struct lwkt_msg *lmsg = &msg->base.lmsg;
f0a26983
SZ
349 struct ifaltq_subque *ifsq = lmsg->u.ms_resultp;
350 struct ifnet *ifp = ifsq_get_ifp(ifsq);
ac7fc6f0 351 struct globaldata *gd = mycpu;
404c9fd9 352 int running = 0, need_sched;
9db4b353 353
ac7fc6f0
SZ
354 crit_enter_gd(gd);
355
9db4b353 356 lwkt_replymsg(lmsg, 0); /* reply ASAP */
9db4b353 357
ac7fc6f0 358 if (gd->gd_cpuid != ifsq_get_cpuid(ifsq)) {
9db4b353 359 /*
5c593c2a 360 * We need to chase the subqueue owner CPU change.
9db4b353 361 */
f0a26983 362 ifsq_ifstart_schedule(ifsq, 1);
ac7fc6f0 363 crit_exit_gd(gd);
404c9fd9 364 return;
9db4b353 365 }
9db4b353 366
bfefe4a6 367 ifsq_serialize_hw(ifsq);
f0a26983
SZ
368 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) {
369 ifp->if_start(ifp, ifsq);
370 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq))
404c9fd9 371 running = 1;
9db4b353 372 }
f0a26983 373 need_sched = ifsq_ifstart_need_schedule(ifsq, running);
bfefe4a6 374 ifsq_deserialize_hw(ifsq);
404c9fd9
SZ
375
376 if (need_sched) {
2b2f1d64
SZ
377 /*
378 * More data need to be transmitted, ifnet.if_start is
5c593c2a
SZ
379 * scheduled on the subqueue owner CPU, and we keep going.
380 * NOTE: ifnet.if_start subqueue interlock is not released.
2b2f1d64 381 */
f0a26983 382 ifsq_ifstart_schedule(ifsq, 0);
9db4b353 383 }
ac7fc6f0
SZ
384
385 crit_exit_gd(gd);
9db4b353
SZ
386}
387
388/* Device driver ifnet.if_start helper function */
389void
f0a26983 390ifsq_devstart(struct ifaltq_subque *ifsq)
9db4b353 391{
f0a26983 392 struct ifnet *ifp = ifsq_get_ifp(ifsq);
9db4b353
SZ
393 int running = 0;
394
bfefe4a6 395 ASSERT_ALTQ_SQ_SERIALIZED_HW(ifsq);
9db4b353 396
f0a26983
SZ
397 ALTQ_SQ_LOCK(ifsq);
398 if (ifsq_is_started(ifsq) || !ifsq_data_ready(ifsq)) {
399 ALTQ_SQ_UNLOCK(ifsq);
9db4b353
SZ
400 return;
401 }
f0a26983
SZ
402 ifsq_set_started(ifsq);
403 ALTQ_SQ_UNLOCK(ifsq);
9db4b353 404
f0a26983 405 ifp->if_start(ifp, ifsq);
9db4b353 406
f0a26983 407 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq))
9db4b353
SZ
408 running = 1;
409
f0a26983 410 if (ifsq_ifstart_need_schedule(ifsq, running)) {
9db4b353
SZ
411 /*
412 * More data need to be transmitted, ifnet.if_start is
413 * scheduled on ifnet's CPU, and we keep going.
414 * NOTE: ifnet.if_start interlock is not released.
415 */
f0a26983 416 ifsq_ifstart_schedule(ifsq, 0);
9db4b353
SZ
417 }
418}
419
f0a26983
SZ
420void
421if_devstart(struct ifnet *ifp)
422{
423 ifsq_devstart(ifq_get_subq_default(&ifp->if_snd));
424}
425
2dffecda 426/* Device driver ifnet.if_start schedule helper function */
f0a26983
SZ
427void
428ifsq_devstart_sched(struct ifaltq_subque *ifsq)
429{
430 ifsq_ifstart_schedule(ifsq, 1);
431}
432
2dffecda
SZ
433void
434if_devstart_sched(struct ifnet *ifp)
435{
f0a26983 436 ifsq_devstart_sched(ifq_get_subq_default(&ifp->if_snd));
2dffecda
SZ
437}
438
a3dd34d2
SZ
439static void
440if_default_serialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
441{
442 lwkt_serialize_enter(ifp->if_serializer);
443}
444
445static void
446if_default_deserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
447{
448 lwkt_serialize_exit(ifp->if_serializer);
449}
450
451static int
452if_default_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
453{
454 return lwkt_serialize_try(ifp->if_serializer);
455}
456
2c9effcf
SZ
457#ifdef INVARIANTS
458static void
459if_default_serialize_assert(struct ifnet *ifp,
460 enum ifnet_serialize slz __unused,
461 boolean_t serialized)
462{
463 if (serialized)
464 ASSERT_SERIALIZED(ifp->if_serializer);
465 else
466 ASSERT_NOT_SERIALIZED(ifp->if_serializer);
467}
468#endif
469
984263bc 470/*
78195a76
MD
471 * Attach an interface to the list of "active" interfaces.
472 *
5c593c2a 473 * The serializer is optional.
984263bc
MD
474 */
475void
78195a76 476if_attach(struct ifnet *ifp, lwkt_serialize_t serializer)
984263bc 477{
52fbd92a 478 unsigned socksize;
984263bc 479 int namelen, masklen;
b4051e25 480 struct sockaddr_dl *sdl, *sdl_addr;
82ed7fc2 481 struct ifaddr *ifa;
e3e4574a 482 struct ifaltq *ifq;
b4051e25
SZ
483 struct ifnet **old_ifindex2ifnet = NULL;
484 struct ifnet_array *old_ifnet_array;
7d46fb61
SZ
485 int i, q, qlen;
486 char qlenname[64];
590b8cd4 487
984263bc 488 static int if_indexlim = 8;
984263bc 489
a3dd34d2
SZ
490 if (ifp->if_serialize != NULL) {
491 KASSERT(ifp->if_deserialize != NULL &&
2c9effcf
SZ
492 ifp->if_tryserialize != NULL &&
493 ifp->if_serialize_assert != NULL,
ed20d0e3 494 ("serialize functions are partially setup"));
ae474cfa
SZ
495
496 /*
497 * If the device supplies serialize functions,
498 * then clear if_serializer to catch any invalid
499 * usage of this field.
500 */
501 KASSERT(serializer == NULL,
502 ("both serialize functions and default serializer "
ed20d0e3 503 "are supplied"));
ae474cfa 504 ifp->if_serializer = NULL;
a3dd34d2
SZ
505 } else {
506 KASSERT(ifp->if_deserialize == NULL &&
2c9effcf
SZ
507 ifp->if_tryserialize == NULL &&
508 ifp->if_serialize_assert == NULL,
ed20d0e3 509 ("serialize functions are partially setup"));
a3dd34d2
SZ
510 ifp->if_serialize = if_default_serialize;
511 ifp->if_deserialize = if_default_deserialize;
512 ifp->if_tryserialize = if_default_tryserialize;
2c9effcf
SZ
513#ifdef INVARIANTS
514 ifp->if_serialize_assert = if_default_serialize_assert;
515#endif
ae474cfa
SZ
516
517 /*
518 * The serializer can be passed in from the device,
519 * allowing the same serializer to be used for both
520 * the interrupt interlock and the device queue.
521 * If not specified, the netif structure will use an
522 * embedded serializer.
523 */
524 if (serializer == NULL) {
525 serializer = &ifp->if_default_serializer;
526 lwkt_serialize_init(serializer);
527 }
528 ifp->if_serializer = serializer;
a3dd34d2
SZ
529 }
530
984263bc 531 /*
43dbcc2a
SZ
532 * Make if_addrhead available on all CPUs, since they
533 * could be accessed by any threads.
984263bc 534 */
b2632176
SZ
535 ifp->if_addrheads = kmalloc(ncpus * sizeof(struct ifaddrhead),
536 M_IFADDR, M_WAITOK | M_ZERO);
537 for (i = 0; i < ncpus; ++i)
538 TAILQ_INIT(&ifp->if_addrheads[i]);
539
441d34b2 540 TAILQ_INIT(&ifp->if_multiaddrs);
2097a299 541 TAILQ_INIT(&ifp->if_groups);
984263bc 542 getmicrotime(&ifp->if_lastchange);
233c8570 543 if_addgroup(ifp, IFG_ALL);
984263bc
MD
544
545 /*
546 * create a Link Level name for this device
547 */
3e4a09e7 548 namelen = strlen(ifp->if_xname);
60615e94 549 masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
984263bc 550 socksize = masklen + ifp->if_addrlen;
984263bc
MD
551 if (socksize < sizeof(*sdl))
552 socksize = sizeof(*sdl);
4ff4d99f 553 socksize = RT_ROUNDUP(socksize);
52fbd92a 554 ifa = ifa_create(sizeof(struct ifaddr) + 2 * socksize);
b4051e25 555 sdl = sdl_addr = (struct sockaddr_dl *)(ifa + 1);
590b8cd4
JH
556 sdl->sdl_len = socksize;
557 sdl->sdl_family = AF_LINK;
558 bcopy(ifp->if_xname, sdl->sdl_data, namelen);
559 sdl->sdl_nlen = namelen;
590b8cd4 560 sdl->sdl_type = ifp->if_type;
141697b6 561 ifp->if_lladdr = ifa;
590b8cd4 562 ifa->ifa_ifp = ifp;
590b8cd4
JH
563 ifa->ifa_addr = (struct sockaddr *)sdl;
564 sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
565 ifa->ifa_netmask = (struct sockaddr *)sdl;
566 sdl->sdl_len = masklen;
567 while (namelen != 0)
568 sdl->sdl_data[--namelen] = 0xff;
b2632176 569 ifa_iflink(ifa, ifp, 0 /* Insert head */);
984263bc 570
43dbcc2a
SZ
571 /*
572 * Make if_data available on all CPUs, since they could
573 * be updated by hardware interrupt routing, which could
574 * be bound to any CPU.
575 */
62938642
MD
576 ifp->if_data_pcpu = kmalloc(ncpus * sizeof(struct ifdata_pcpu),
577 M_DEVBUF,
578 M_WAITOK | M_ZERO | M_CACHEALIGN);
e1fcdad7 579
2cc2f639
SZ
580 if (ifp->if_mapsubq == NULL)
581 ifp->if_mapsubq = ifq_mapsubq_default;
582
e3e4574a
JS
583 ifq = &ifp->if_snd;
584 ifq->altq_type = 0;
585 ifq->altq_disc = NULL;
586 ifq->altq_flags &= ALTQF_CANTCHANGE;
587 ifq->altq_tbr = NULL;
588 ifq->altq_ifp = ifp;
4d723e5a 589
f0a26983
SZ
590 if (ifq->altq_subq_cnt <= 0)
591 ifq->altq_subq_cnt = 1;
62938642
MD
592 ifq->altq_subq =
593 kmalloc(ifq->altq_subq_cnt * sizeof(struct ifaltq_subque),
594 M_DEVBUF,
595 M_WAITOK | M_ZERO | M_CACHEALIGN);
28cc0c29 596
f0a26983 597 if (ifq->altq_maxlen == 0) {
b21c2105 598 if_printf(ifp, "driver didn't set altq_maxlen\n");
f0a26983 599 ifq_set_maxlen(ifq, ifqmaxlen);
42fdf81e
SZ
600 }
601
7d46fb61
SZ
602 /* Allow user to override driver's setting. */
603 ksnprintf(qlenname, sizeof(qlenname), "net.%s.qlenmax", ifp->if_xname);
604 qlen = -1;
605 TUNABLE_INT_FETCH(qlenname, &qlen);
606 if (qlen > 0) {
607 if_printf(ifp, "qlenmax -> %d\n", qlen);
608 ifq_set_maxlen(ifq, qlen);
609 }
610
f0a26983
SZ
611 for (q = 0; q < ifq->altq_subq_cnt; ++q) {
612 struct ifaltq_subque *ifsq = &ifq->altq_subq[q];
613
614 ALTQ_SQ_LOCK_INIT(ifsq);
615 ifsq->ifsq_index = q;
616
617 ifsq->ifsq_altq = ifq;
618 ifsq->ifsq_ifp = ifp;
619
b21c2105 620 ifsq->ifsq_maxlen = ifq->altq_maxlen;
68dc1916 621 ifsq->ifsq_maxbcnt = ifsq->ifsq_maxlen * MCLBYTES;
f0a26983
SZ
622 ifsq->ifsq_prepended = NULL;
623 ifsq->ifsq_started = 0;
624 ifsq->ifsq_hw_oactive = 0;
625 ifsq_set_cpuid(ifsq, 0);
bfefe4a6
SZ
626 if (ifp->if_serializer != NULL)
627 ifsq_set_hw_serialize(ifsq, ifp->if_serializer);
f0a26983 628
43dbcc2a 629 /* XXX: netisr_ncpus */
f0a26983 630 ifsq->ifsq_stage =
62938642
MD
631 kmalloc(ncpus * sizeof(struct ifsubq_stage),
632 M_DEVBUF,
633 M_WAITOK | M_ZERO | M_CACHEALIGN);
f0a26983
SZ
634 for (i = 0; i < ncpus; ++i)
635 ifsq->ifsq_stage[i].stg_subq = ifsq;
636
43dbcc2a
SZ
637 /*
638 * Allocate one if_start message for each CPU, since
639 * the hardware TX ring could be assigned to any CPU.
640 *
641 * NOTE:
642 * If the hardware TX ring polling CPU and the hardware
643 * TX ring interrupt CPU are same, one if_start message
644 * should be enough.
645 */
f0a26983
SZ
646 ifsq->ifsq_ifstart_nmsg =
647 kmalloc(ncpus * sizeof(struct netmsg_base),
648 M_LWKTMSG, M_WAITOK);
649 for (i = 0; i < ncpus; ++i) {
650 netmsg_init(&ifsq->ifsq_ifstart_nmsg[i], NULL,
651 &netisr_adone_rport, 0, ifsq_ifstart_dispatch);
652 ifsq->ifsq_ifstart_nmsg[i].lmsg.u.ms_resultp = ifsq;
653 }
654 }
655 ifq_set_classic(ifq);
656
ae6d2ace
SZ
657 /*
658 * Increase mbuf cluster/jcluster limits for the mbufs that
659 * could sit on the device queues for quite some time.
660 */
661 if (ifp->if_nmbclusters > 0)
662 mcl_inclimit(ifp->if_nmbclusters);
663 if (ifp->if_nmbjclusters > 0)
664 mjcl_inclimit(ifp->if_nmbjclusters);
665
b4051e25
SZ
666 /*
667 * Install this ifp into ifindex2inet, ifnet queue and ifnet
668 * array after it is setup.
669 *
670 * Protect ifindex2ifnet, ifnet queue and ifnet array changes
671 * by ifnet lock, so that non-netisr threads could get a
672 * consistent view.
673 */
674 ifnet_lock();
675
676 /* Don't update if_index until ifindex2ifnet is setup */
677 ifp->if_index = if_index + 1;
678 sdl_addr->sdl_index = ifp->if_index;
679
680 /*
681 * Install this ifp into ifindex2ifnet
682 */
683 if (ifindex2ifnet == NULL || ifp->if_index >= if_indexlim) {
684 unsigned int n;
685 struct ifnet **q;
686
687 /*
688 * Grow ifindex2ifnet
689 */
690 if_indexlim <<= 1;
691 n = if_indexlim * sizeof(*q);
692 q = kmalloc(n, M_IFADDR, M_WAITOK | M_ZERO);
693 if (ifindex2ifnet != NULL) {
694 bcopy(ifindex2ifnet, q, n/2);
695 /* Free old ifindex2ifnet after sync all netisrs */
696 old_ifindex2ifnet = ifindex2ifnet;
697 }
698 ifindex2ifnet = q;
699 }
700 ifindex2ifnet[ifp->if_index] = ifp;
701 /*
702 * Update if_index after this ifp is installed into ifindex2ifnet,
703 * so that netisrs could get a consistent view of ifindex2ifnet.
704 */
705 cpu_sfence();
706 if_index = ifp->if_index;
707
708 /*
709 * Install this ifp into ifnet array.
710 */
711 /* Free old ifnet array after sync all netisrs */
712 old_ifnet_array = ifnet_array;
713 ifnet_array = ifnet_array_add(ifp, old_ifnet_array);
714
715 /*
716 * Install this ifp into ifnet queue.
717 */
718 TAILQ_INSERT_TAIL(&ifnetlist, ifp, if_link);
719
720 ifnet_unlock();
721
722 /*
723 * Sync all netisrs so that the old ifindex2ifnet and ifnet array
724 * are no longer accessed and we can free them safely later on.
725 */
726 netmsg_service_sync();
727 if (old_ifindex2ifnet != NULL)
728 kfree(old_ifindex2ifnet, M_IFADDR);
729 ifnet_array_free(old_ifnet_array);
730
9c70fe43 731 if (!SLIST_EMPTY(&domains))
698ac46c
HS
732 if_attachdomain1(ifp);
733
984263bc 734 /* Announce the interface. */
8e13abe7
MD
735 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
736 devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
984263bc
MD
737 rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
738}
739
698ac46c
HS
740static void
741if_attachdomain(void *dummy)
742{
743 struct ifnet *ifp;
698ac46c 744
b4051e25
SZ
745 ifnet_lock();
746 TAILQ_FOREACH(ifp, &ifnetlist, if_list)
698ac46c 747 if_attachdomain1(ifp);
b4051e25 748 ifnet_unlock();
698ac46c
HS
749}
750SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
751 if_attachdomain, NULL);
752
753static void
754if_attachdomain1(struct ifnet *ifp)
755{
756 struct domain *dp;
698ac46c 757
4986965b 758 crit_enter();
698ac46c
HS
759
760 /* address family dependent data region */
761 bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
9c70fe43 762 SLIST_FOREACH(dp, &domains, dom_next)
698ac46c
HS
763 if (dp->dom_ifattach)
764 ifp->if_afdata[dp->dom_family] =
765 (*dp->dom_ifattach)(ifp);
4986965b 766 crit_exit();
698ac46c
HS
767}
768
c727e142
SZ
769/*
770 * Purge all addresses whose type is _not_ AF_LINK
771 */
9a74b592
SZ
772static void
773if_purgeaddrs_nolink_dispatch(netmsg_t nmsg)
c727e142 774{
5204e13c 775 struct ifnet *ifp = nmsg->lmsg.u.ms_resultp;
b2632176
SZ
776 struct ifaddr_container *ifac, *next;
777
5204e13c 778 ASSERT_NETISR0;
9a74b592
SZ
779
780 /*
781 * The ifaddr processing in the following loop will block,
782 * however, this function is called in netisr0, in which
783 * ifaddr list changes happen, so we don't care about the
784 * blockness of the ifaddr processing here.
785 */
b2632176
SZ
786 TAILQ_FOREACH_MUTABLE(ifac, &ifp->if_addrheads[mycpuid],
787 ifa_link, next) {
788 struct ifaddr *ifa = ifac->ifa;
c727e142 789
9a74b592
SZ
790 /* Ignore marker */
791 if (ifa->ifa_addr->sa_family == AF_UNSPEC)
792 continue;
793
c727e142
SZ
794 /* Leave link ifaddr as it is */
795 if (ifa->ifa_addr->sa_family == AF_LINK)
796 continue;
797#ifdef INET
798 /* XXX: Ugly!! ad hoc just for INET */
114c8e1b 799 if (ifa->ifa_addr->sa_family == AF_INET) {
c727e142 800 struct ifaliasreq ifr;
0d50e8a2 801 struct sockaddr_in saved_addr, saved_dst;
b2632176
SZ
802#ifdef IFADDR_DEBUG_VERBOSE
803 int i;
804
805 kprintf("purge in4 addr %p: ", ifa);
140920c2
SZ
806 for (i = 0; i < ncpus; ++i) {
807 kprintf("%d ",
808 ifa->ifa_containers[i].ifa_refcnt);
809 }
b2632176
SZ
810 kprintf("\n");
811#endif
c727e142 812
0d50e8a2
SZ
813 /* Save information for panic. */
814 memcpy(&saved_addr, ifa->ifa_addr, sizeof(saved_addr));
815 if (ifa->ifa_dstaddr != NULL) {
816 memcpy(&saved_dst, ifa->ifa_dstaddr,
817 sizeof(saved_dst));
818 } else {
819 memset(&saved_dst, 0, sizeof(saved_dst));
820 }
821
c727e142
SZ
822 bzero(&ifr, sizeof ifr);
823 ifr.ifra_addr = *ifa->ifa_addr;
824 if (ifa->ifa_dstaddr)
825 ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
2501b0ea 826 if (in_control(SIOCDIFADDR, (caddr_t)&ifr, ifp,
c727e142
SZ
827 NULL) == 0)
828 continue;
0d50e8a2
SZ
829
830 /* MUST NOT HAPPEN */
831 panic("%s: in_control failed %x, dst %x", ifp->if_xname,
832 ntohl(saved_addr.sin_addr.s_addr),
833 ntohl(saved_dst.sin_addr.s_addr));
c727e142
SZ
834 }
835#endif /* INET */
836#ifdef INET6
114c8e1b 837 if (ifa->ifa_addr->sa_family == AF_INET6) {
b2632176
SZ
838#ifdef IFADDR_DEBUG_VERBOSE
839 int i;
840
841 kprintf("purge in6 addr %p: ", ifa);
140920c2
SZ
842 for (i = 0; i < ncpus; ++i) {
843 kprintf("%d ",
844 ifa->ifa_containers[i].ifa_refcnt);
845 }
b2632176
SZ
846 kprintf("\n");
847#endif
848
c727e142
SZ
849 in6_purgeaddr(ifa);
850 /* ifp_addrhead is already updated */
851 continue;
852 }
853#endif /* INET6 */
fdd5cc47 854 if_printf(ifp, "destroy ifaddr family %d\n",
be04762d 855 ifa->ifa_addr->sa_family);
b2632176
SZ
856 ifa_ifunlink(ifa, ifp);
857 ifa_destroy(ifa);
c727e142 858 }
9a74b592 859
5204e13c 860 netisr_replymsg(&nmsg->base, 0);
9a74b592
SZ
861}
862
863void
864if_purgeaddrs_nolink(struct ifnet *ifp)
865{
866 struct netmsg_base nmsg;
9a74b592
SZ
867
868 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 0,
869 if_purgeaddrs_nolink_dispatch);
5204e13c
SZ
870 nmsg.lmsg.u.ms_resultp = ifp;
871 netisr_domsg(&nmsg, 0);
c727e142
SZ
872}
873
5804f3d1
SZ
874static void
875ifq_stage_detach_handler(netmsg_t nmsg)
876{
877 struct ifaltq *ifq = nmsg->lmsg.u.ms_resultp;
f0a26983 878 int q;
5804f3d1 879
f0a26983
SZ
880 for (q = 0; q < ifq->altq_subq_cnt; ++q) {
881 struct ifaltq_subque *ifsq = &ifq->altq_subq[q];
882 struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid);
883
884 if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED)
885 ifsq_stage_remove(&ifsubq_stage_heads[mycpuid], stage);
886 }
5804f3d1
SZ
887 lwkt_replymsg(&nmsg->lmsg, 0);
888}
889
890static void
891ifq_stage_detach(struct ifaltq *ifq)
892{
893 struct netmsg_base base;
894 int cpu;
895
896 netmsg_init(&base, NULL, &curthread->td_msgport, 0,
897 ifq_stage_detach_handler);
898 base.lmsg.u.ms_resultp = ifq;
899
43dbcc2a 900 /* XXX netisr_ncpus */
5804f3d1 901 for (cpu = 0; cpu < ncpus; ++cpu)
ec7f7fc8 902 lwkt_domsg(netisr_cpuport(cpu), &base.lmsg, 0);
5804f3d1
SZ
903}
904
a29ef6e8
SZ
905struct netmsg_if_rtdel {
906 struct netmsg_base base;
907 struct ifnet *ifp;
908};
909
910static void
911if_rtdel_dispatch(netmsg_t msg)
912{
913 struct netmsg_if_rtdel *rmsg = (void *)msg;
43dbcc2a 914 int i, cpu;
a29ef6e8
SZ
915
916 cpu = mycpuid;
43dbcc2a
SZ
917 ASSERT_NETISR_NCPUS(cpu);
918
a29ef6e8
SZ
919 for (i = 1; i <= AF_MAX; i++) {
920 struct radix_node_head *rnh;
921
922 if ((rnh = rt_tables[cpu][i]) == NULL)
923 continue;
924 rnh->rnh_walktree(rnh, if_rtdel, rmsg->ifp);
925 }
43dbcc2a 926 netisr_forwardmsg(&msg->base, cpu + 1);
a29ef6e8
SZ
927}
928
984263bc
MD
929/*
930 * Detach an interface, removing it from the
931 * list of "active" interfaces.
932 */
933void
f23061d4 934if_detach(struct ifnet *ifp)
984263bc 935{
b4051e25 936 struct ifnet_array *old_ifnet_array;
233c8570 937 struct ifg_list *ifgl;
a29ef6e8 938 struct netmsg_if_rtdel msg;
698ac46c 939 struct domain *dp;
a29ef6e8 940 int q;
984263bc 941
b4051e25 942 /* Announce that the interface is gone. */
f2bd8b67 943 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
b4051e25
SZ
944 rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
945 devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
946
947 /*
948 * Remove this ifp from ifindex2inet, ifnet queue and ifnet
949 * array before it is whacked.
950 *
951 * Protect ifindex2ifnet, ifnet queue and ifnet array changes
952 * by ifnet lock, so that non-netisr threads could get a
953 * consistent view.
954 */
955 ifnet_lock();
956
957 /*
958 * Remove this ifp from ifindex2ifnet and maybe decrement if_index.
959 */
960 ifindex2ifnet[ifp->if_index] = NULL;
961 while (if_index > 0 && ifindex2ifnet[if_index] == NULL)
962 if_index--;
963
964 /*
965 * Remove this ifp from ifnet queue.
966 */
967 TAILQ_REMOVE(&ifnetlist, ifp, if_link);
968
969 /*
970 * Remove this ifp from ifnet array.
971 */
972 /* Free old ifnet array after sync all netisrs */
973 old_ifnet_array = ifnet_array;
974 ifnet_array = ifnet_array_del(ifp, old_ifnet_array);
975
976 ifnet_unlock();
977
233c8570
AL
978 ifgroup_lockmgr(LK_EXCLUSIVE);
979 while ((ifgl = TAILQ_FIRST(&ifp->if_groups)) != NULL)
980 if_delgroup_locked(ifp, ifgl->ifgl_group->ifg_group);
981 ifgroup_lockmgr(LK_RELEASE);
982
b4051e25
SZ
983 /*
984 * Sync all netisrs so that the old ifnet array is no longer
985 * accessed and we can free it safely later on.
986 */
987 netmsg_service_sync();
988 ifnet_array_free(old_ifnet_array);
f2bd8b67 989
984263bc
MD
990 /*
991 * Remove routes and flush queues.
992 */
4986965b 993 crit_enter();
b3a7093f
SZ
994#ifdef IFPOLL_ENABLE
995 if (ifp->if_flags & IFF_NPOLLING)
996 ifpoll_deregister(ifp);
323f031d 997#endif
984263bc
MD
998 if_down(ifp);
999
ae6d2ace
SZ
1000 /* Decrease the mbuf clusters/jclusters limits increased by us */
1001 if (ifp->if_nmbclusters > 0)
1002 mcl_inclimit(-ifp->if_nmbclusters);
1003 if (ifp->if_nmbjclusters > 0)
1004 mjcl_inclimit(-ifp->if_nmbjclusters);
1005
5b1156d4 1006#ifdef ALTQ
4d723e5a
JS
1007 if (ifq_is_enabled(&ifp->if_snd))
1008 altq_disable(&ifp->if_snd);
1009 if (ifq_is_attached(&ifp->if_snd))
1010 altq_detach(&ifp->if_snd);
5b1156d4 1011#endif
4d723e5a 1012
984263bc 1013 /*
984263bc
MD
1014 * Clean up all addresses.
1015 */
141697b6 1016 ifp->if_lladdr = NULL;
984263bc 1017
c727e142 1018 if_purgeaddrs_nolink(ifp);
b2632176 1019 if (!TAILQ_EMPTY(&ifp->if_addrheads[mycpuid])) {
c727e142
SZ
1020 struct ifaddr *ifa;
1021
b2632176 1022 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
c727e142 1023 KASSERT(ifa->ifa_addr->sa_family == AF_LINK,
27eaa4f1 1024 ("non-link ifaddr is left on if_addrheads"));
984263bc 1025
b2632176
SZ
1026 ifa_ifunlink(ifa, ifp);
1027 ifa_destroy(ifa);
1028 KASSERT(TAILQ_EMPTY(&ifp->if_addrheads[mycpuid]),
27eaa4f1 1029 ("there are still ifaddrs left on if_addrheads"));
984263bc
MD
1030 }
1031
a98eb818
JS
1032#ifdef INET
1033 /*
1034 * Remove all IPv4 kernel structures related to ifp.
1035 */
1036 in_ifdetach(ifp);
1037#endif
1038
984263bc
MD
1039#ifdef INET6
1040 /*
1041 * Remove all IPv6 kernel structs related to ifp. This should be done
1042 * before removing routing entries below, since IPv6 interface direct
1043 * routes are expected to be removed by the IPv6-specific kernel API.
1044 * Otherwise, the kernel will detect some inconsistency and bark it.
1045 */
1046 in6_ifdetach(ifp);
1047#endif
1048
1049 /*
1050 * Delete all remaining routes using this interface
984263bc 1051 */
d20d6787 1052 netmsg_init(&msg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY,
a29ef6e8
SZ
1053 if_rtdel_dispatch);
1054 msg.ifp = ifp;
43dbcc2a 1055 netisr_domsg_global(&msg.base);
984263bc 1056
2949c680 1057 SLIST_FOREACH(dp, &domains, dom_next) {
698ac46c
HS
1058 if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
1059 (*dp->dom_ifdetach)(ifp,
1060 ifp->if_afdata[dp->dom_family]);
2949c680 1061 }
698ac46c 1062
b2632176 1063 kfree(ifp->if_addrheads, M_IFADDR);
5804f3d1
SZ
1064
1065 lwkt_synchronize_ipiqs("if_detach");
1066 ifq_stage_detach(&ifp->if_snd);
1067
f0a26983
SZ
1068 for (q = 0; q < ifp->if_snd.altq_subq_cnt; ++q) {
1069 struct ifaltq_subque *ifsq = &ifp->if_snd.altq_subq[q];
1070
1071 kfree(ifsq->ifsq_ifstart_nmsg, M_LWKTMSG);
1072 kfree(ifsq->ifsq_stage, M_DEVBUF);
1073 }
407cde39
SZ
1074 kfree(ifp->if_snd.altq_subq, M_DEVBUF);
1075
e1fcdad7
SZ
1076 kfree(ifp->if_data_pcpu, M_DEVBUF);
1077
4986965b 1078 crit_exit();
984263bc
MD
1079}
1080
233c8570
AL
1081int
1082ifgroup_lockmgr(u_int flags)
1083{
1084 return lockmgr(&ifgroup_lock, flags);
1085}
1086
315a7da3 1087/*
233c8570 1088 * Create an empty interface group.
315a7da3 1089 */
233c8570 1090static struct ifg_group *
315a7da3
JL
1091if_creategroup(const char *groupname)
1092{
233c8570
AL
1093 struct ifg_group *ifg;
1094
1095 ifg = kmalloc(sizeof(*ifg), M_IFNET, M_WAITOK);
1096 strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
1097 ifg->ifg_refcnt = 0;
1098 ifg->ifg_carp_demoted = 0;
1099 TAILQ_INIT(&ifg->ifg_members);
1100
1101 ifgroup_lockmgr(LK_EXCLUSIVE);
1102 TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
1103 ifgroup_lockmgr(LK_RELEASE);
315a7da3 1104
233c8570
AL
1105 EVENTHANDLER_INVOKE(group_attach_event, ifg);
1106
1107 return (ifg);
315a7da3
JL
1108}
1109
1110/*
233c8570
AL
1111 * Destroy an empty interface group.
1112 */
1113static int
1114if_destroygroup(struct ifg_group *ifg)
1115{
1116 KASSERT(ifg->ifg_refcnt == 0,
1117 ("trying to delete a non-empty interface group"));
1118
1119 ifgroup_lockmgr(LK_EXCLUSIVE);
1120 TAILQ_REMOVE(&ifg_head, ifg, ifg_next);
1121 ifgroup_lockmgr(LK_RELEASE);
1122
1123 EVENTHANDLER_INVOKE(group_detach_event, ifg);
1124 kfree(ifg, M_IFNET);
1125
1126 return (0);
1127}
1128
1129/*
1130 * Add the interface to a group.
1131 * The target group will be created if it doesn't exist.
315a7da3
JL
1132 */
1133int
1134if_addgroup(struct ifnet *ifp, const char *groupname)
1135{
233c8570
AL
1136 struct ifg_list *ifgl;
1137 struct ifg_group *ifg;
1138 struct ifg_member *ifgm;
315a7da3 1139
233c8570
AL
1140 if (groupname[0] &&
1141 groupname[strlen(groupname) - 1] >= '0' &&
315a7da3
JL
1142 groupname[strlen(groupname) - 1] <= '9')
1143 return (EINVAL);
1144
233c8570 1145 ifgroup_lockmgr(LK_SHARED);
315a7da3 1146
233c8570
AL
1147 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
1148 if (strcmp(ifgl->ifgl_group->ifg_group, groupname) == 0) {
1149 ifgroup_lockmgr(LK_RELEASE);
1150 return (EEXIST);
1151 }
315a7da3
JL
1152 }
1153
233c8570
AL
1154 TAILQ_FOREACH(ifg, &ifg_head, ifg_next) {
1155 if (strcmp(ifg->ifg_group, groupname) == 0)
315a7da3 1156 break;
315a7da3
JL
1157 }
1158
233c8570
AL
1159 ifgroup_lockmgr(LK_RELEASE);
1160
1161 if (ifg == NULL)
1162 ifg = if_creategroup(groupname);
1163
1164 ifgl = kmalloc(sizeof(*ifgl), M_IFNET, M_WAITOK);
1165 ifgm = kmalloc(sizeof(*ifgm), M_IFNET, M_WAITOK);
315a7da3
JL
1166 ifgl->ifgl_group = ifg;
1167 ifgm->ifgm_ifp = ifp;
233c8570 1168 ifg->ifg_refcnt++;
315a7da3 1169
233c8570 1170 ifgroup_lockmgr(LK_EXCLUSIVE);
315a7da3
JL
1171 TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
1172 TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
233c8570 1173 ifgroup_lockmgr(LK_RELEASE);
315a7da3 1174
233c8570 1175 EVENTHANDLER_INVOKE(group_change_event, groupname);
315a7da3
JL
1176
1177 return (0);
1178}
1179
1180/*
233c8570
AL
1181 * Remove the interface from a group.
1182 * The group will be destroyed if it becomes empty.
1183 *
1184 * The 'ifgroup_lock' must be hold exclusively when calling this.
315a7da3 1185 */
233c8570
AL
1186static int
1187if_delgroup_locked(struct ifnet *ifp, const char *groupname)
315a7da3 1188{
233c8570
AL
1189 struct ifg_list *ifgl;
1190 struct ifg_member *ifgm;
315a7da3 1191
233c8570
AL
1192 KKASSERT(lockstatus(&ifgroup_lock, curthread) == LK_EXCLUSIVE);
1193
1194 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
1195 if (strcmp(ifgl->ifgl_group->ifg_group, groupname) == 0)
315a7da3 1196 break;
233c8570 1197 }
315a7da3
JL
1198 if (ifgl == NULL)
1199 return (ENOENT);
1200
1201 TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
1202
233c8570 1203 TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) {
315a7da3
JL
1204 if (ifgm->ifgm_ifp == ifp)
1205 break;
233c8570 1206 }
315a7da3
JL
1207
1208 if (ifgm != NULL) {
1209 TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
315a7da3 1210
233c8570
AL
1211 ifgroup_lockmgr(LK_RELEASE);
1212 EVENTHANDLER_INVOKE(group_change_event, groupname);
1213 ifgroup_lockmgr(LK_EXCLUSIVE);
1214
1215 kfree(ifgm, M_IFNET);
1216 ifgl->ifgl_group->ifg_refcnt--;
315a7da3
JL
1217 }
1218
233c8570
AL
1219 if (ifgl->ifgl_group->ifg_refcnt == 0) {
1220 ifgroup_lockmgr(LK_RELEASE);
1221 if_destroygroup(ifgl->ifgl_group);
1222 ifgroup_lockmgr(LK_EXCLUSIVE);
1223 }
315a7da3 1224
233c8570 1225 kfree(ifgl, M_IFNET);
315a7da3
JL
1226
1227 return (0);
1228}
1229
233c8570
AL
1230int
1231if_delgroup(struct ifnet *ifp, const char *groupname)
1232{
1233 int error;
1234
1235 ifgroup_lockmgr(LK_EXCLUSIVE);
1236 error = if_delgroup_locked(ifp, groupname);
1237 ifgroup_lockmgr(LK_RELEASE);
1238
1239 return (error);
1240}
1241
315a7da3 1242/*
233c8570
AL
1243 * Store all the groups that the interface belongs to in memory
1244 * pointed to by data.
315a7da3 1245 */
233c8570
AL
1246static int
1247if_getgroups(struct ifgroupreq *ifgr, struct ifnet *ifp)
315a7da3 1248{
233c8570
AL
1249 struct ifg_list *ifgl;
1250 struct ifg_req *ifgrq, *p;
1251 int len, error;
1252
1253 len = 0;
1254 ifgroup_lockmgr(LK_SHARED);
1255 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1256 len += sizeof(struct ifg_req);
1257 ifgroup_lockmgr(LK_RELEASE);
315a7da3
JL
1258
1259 if (ifgr->ifgr_len == 0) {
233c8570
AL
1260 /*
1261 * Caller is asking how much memory should be allocated in
1262 * the next request in order to hold all the groups.
1263 */
1264 ifgr->ifgr_len = len;
315a7da3 1265 return (0);
233c8570
AL
1266 } else if (ifgr->ifgr_len != len) {
1267 return (EINVAL);
315a7da3
JL
1268 }
1269
233c8570
AL
1270 ifgrq = kmalloc(len, M_TEMP, M_INTWAIT | M_NULLOK | M_ZERO);
1271 if (ifgrq == NULL)
1272 return (ENOMEM);
1273
1274 ifgroup_lockmgr(LK_SHARED);
1275 p = ifgrq;
315a7da3 1276 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
233c8570
AL
1277 if (len < sizeof(struct ifg_req)) {
1278 ifgroup_lockmgr(LK_RELEASE);
b8b9f56e
MD
1279 error = EINVAL;
1280 goto failed;
233c8570
AL
1281 }
1282
1283 strlcpy(p->ifgrq_group, ifgl->ifgl_group->ifg_group,
1284 sizeof(ifgrq->ifgrq_group));
1285 len -= sizeof(struct ifg_req);
1286 p++;
315a7da3 1287 }
233c8570
AL
1288 ifgroup_lockmgr(LK_RELEASE);
1289
1290 error = copyout(ifgrq, ifgr->ifgr_groups, ifgr->ifgr_len);
b8b9f56e 1291failed:
233c8570 1292 kfree(ifgrq, M_TEMP);
b8b9f56e 1293 return error;
315a7da3
JL
1294}
1295
1296/*
233c8570 1297 * Store all the members of a group in memory pointed to by data.
315a7da3 1298 */
233c8570
AL
1299static int
1300if_getgroupmembers(struct ifgroupreq *ifgr)
315a7da3 1301{
233c8570
AL
1302 struct ifg_group *ifg;
1303 struct ifg_member *ifgm;
1304 struct ifg_req *ifgrq, *p;
1305 int len, error;
1306
1307 ifgroup_lockmgr(LK_SHARED);
1308
1309 TAILQ_FOREACH(ifg, &ifg_head, ifg_next) {
1310 if (strcmp(ifg->ifg_group, ifgr->ifgr_name) == 0)
315a7da3 1311 break;
233c8570
AL
1312 }
1313 if (ifg == NULL) {
1314 ifgroup_lockmgr(LK_RELEASE);
315a7da3 1315 return (ENOENT);
233c8570
AL
1316 }
1317
1318 len = 0;
1319 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
1320 len += sizeof(struct ifg_req);
1321
1322 ifgroup_lockmgr(LK_RELEASE);
315a7da3
JL
1323
1324 if (ifgr->ifgr_len == 0) {
233c8570 1325 ifgr->ifgr_len = len;
315a7da3 1326 return (0);
233c8570
AL
1327 } else if (ifgr->ifgr_len != len) {
1328 return (EINVAL);
315a7da3
JL
1329 }
1330
233c8570
AL
1331 ifgrq = kmalloc(len, M_TEMP, M_INTWAIT | M_NULLOK | M_ZERO);
1332 if (ifgrq == NULL)
1333 return (ENOMEM);
1334
1335 ifgroup_lockmgr(LK_SHARED);
1336 p = ifgrq;
315a7da3 1337 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
233c8570
AL
1338 if (len < sizeof(struct ifg_req)) {
1339 ifgroup_lockmgr(LK_RELEASE);
b8b9f56e
MD
1340 error = EINVAL;
1341 goto failed;
233c8570
AL
1342 }
1343
1344 strlcpy(p->ifgrq_member, ifgm->ifgm_ifp->if_xname,
1345 sizeof(p->ifgrq_member));
1346 len -= sizeof(struct ifg_req);
1347 p++;
315a7da3 1348 }
233c8570
AL
1349 ifgroup_lockmgr(LK_RELEASE);
1350
1351 error = copyout(ifgrq, ifgr->ifgr_groups, ifgr->ifgr_len);
b8b9f56e 1352failed:
233c8570 1353 kfree(ifgrq, M_TEMP);
b8b9f56e 1354 return error;
315a7da3
JL
1355}
1356
951ecd7f
AL
1357static int
1358ifa_maintain_loopback_route(int cmd, struct ifaddr *ifa, struct sockaddr *ia)
1359{
1360 struct sockaddr_dl null_sdl;
1361 struct rt_addrinfo info;
1362 struct ifaddr *rti_ifa;
1363 struct ifnet *ifp;
1364 int error;
1365
5e1a59d5
AL
1366 /* RTM_CHANGE is unsupported in rtrequest1() yet. */
1367 KKASSERT(cmd == RTM_DELETE || cmd == RTM_ADD);
1368
951ecd7f
AL
1369 rti_ifa = NULL;
1370 ifp = ifa->ifa_ifp;
1371
1372 bzero(&null_sdl, sizeof(null_sdl));
1373 null_sdl.sdl_len = sizeof(null_sdl);
1374 null_sdl.sdl_family = AF_LINK;
1375 null_sdl.sdl_index = ifp->if_index;
1376 null_sdl.sdl_type = ifp->if_type;
1377
1378 bzero(&info, sizeof(info));
1379 if (cmd != RTM_DELETE)
1380 info.rti_ifp = loif;
1381 if (cmd == RTM_ADD) {
1382 /*
1383 * Explicitly specify the loopback IFA.
1384 */
1385 rti_ifa = ifaof_ifpforaddr(ifa->ifa_addr, info.rti_ifp);
1386 if (rti_ifa != NULL) {
1387 /*
1388 * The loopback IFA wouldn't disappear, but ref it
1389 * for safety.
1390 */
1391 IFAREF(rti_ifa);
1392 info.rti_ifa = rti_ifa;
1393 }
1394 }
1395 info.rti_info[RTAX_DST] = ia;
1396 info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl;
1397 /*
1398 * Manually set RTF_LOCAL so that the IFA and IFP wouldn't be
1399 * overrided to be the owner of the destination address (ia)
1400 * by in_addroute().
1401 */
1402 info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_LOCAL;
1403
1404 error = rtrequest1_global(cmd, &info, NULL, NULL, RTREQ_PRIO_NORM);
1405
1406 if (rti_ifa != NULL)
1407 IFAFREE(rti_ifa);
1408
1409 if (error == 0 ||
1410 (cmd == RTM_ADD && error == EEXIST) ||
1411 (cmd == RTM_DELETE && (error == ESRCH || error == ENOENT)))
1412 return (error);
1413
1414 log(LOG_DEBUG, "%s: %s failed for interface %s: %d\n",
1415 __func__, (cmd == RTM_ADD ? "insertion" : "deletion"),
1416 ifp->if_xname, error);
1417 return (error);
1418}
1419
1420int
1421ifa_add_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
1422{
1423 return ifa_maintain_loopback_route(RTM_ADD, ifa, ia);
1424}
1425
1426int
1427ifa_del_loopback_route(struct ifaddr *ifa, struct sockaddr *ia)
1428{
1429 return ifa_maintain_loopback_route(RTM_DELETE, ifa, ia);
1430}
1431
984263bc
MD
1432/*
1433 * Delete Routes for a Network Interface
f23061d4 1434 *
984263bc
MD
1435 * Called for each routing entry via the rnh->rnh_walktree() call above
1436 * to delete all route entries referencing a detaching network interface.
1437 *
1438 * Arguments:
1439 * rn pointer to node in the routing table
1440 * arg argument passed to rnh->rnh_walktree() - detaching interface
1441 *
1442 * Returns:
1443 * 0 successful
1444 * errno failed - reason indicated
1445 *
1446 */
1447static int
f23061d4 1448if_rtdel(struct radix_node *rn, void *arg)
984263bc
MD
1449{
1450 struct rtentry *rt = (struct rtentry *)rn;
1451 struct ifnet *ifp = arg;
1452 int err;
1453
1454 if (rt->rt_ifp == ifp) {
1455
1456 /*
1457 * Protect (sorta) against walktree recursion problems
1458 * with cloned routes
1459 */
f23061d4 1460 if (!(rt->rt_flags & RTF_UP))
984263bc
MD
1461 return (0);
1462
1463 err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
1464 rt_mask(rt), rt->rt_flags,
2038fb68 1465 NULL);
984263bc
MD
1466 if (err) {
1467 log(LOG_WARNING, "if_rtdel: error %d\n", err);
1468 }
1469 }
1470
1471 return (0);
1472}
1473
c008937e
AL
1474static __inline boolean_t
1475ifa_match_withmask(const struct ifaddr *ifa, const struct sockaddr *addr)
1476{
1477 const char *cp, *cp2, *cp3, *cplim;
1478
1479 KKASSERT(ifa->ifa_addr->sa_family == addr->sa_family);
1480
1481 cp = addr->sa_data;
1482 cp2 = ifa->ifa_addr->sa_data;
1483 cp3 = ifa->ifa_netmask->sa_data;
1484 cplim = (const char *)ifa->ifa_netmask + ifa->ifa_netmask->sa_len;
1485
1486 while (cp3 < cplim) {
1487 if ((*cp++ ^ *cp2++) & *cp3++)
1488 return (FALSE);
1489 }
1490
1491 return (TRUE);
1492}
1493
0925f9d8
SZ
1494static __inline boolean_t
1495ifa_prefer(const struct ifaddr *cur_ifa, const struct ifaddr *old_ifa)
1496{
1497 if (old_ifa == NULL)
c008937e 1498 return (TRUE);
0925f9d8
SZ
1499
1500 if ((old_ifa->ifa_ifp->if_flags & IFF_UP) == 0 &&
1501 (cur_ifa->ifa_ifp->if_flags & IFF_UP))
c008937e 1502 return (TRUE);
0925f9d8
SZ
1503 if ((old_ifa->ifa_flags & IFA_ROUTE) == 0 &&
1504 (cur_ifa->ifa_flags & IFA_ROUTE))
c008937e
AL
1505 return (TRUE);
1506
1507 return (FALSE);
0925f9d8
SZ
1508}
1509
984263bc
MD
1510/*
1511 * Locate an interface based on a complete address.
1512 */
984263bc 1513struct ifaddr *
f23061d4 1514ifa_ifwithaddr(struct sockaddr *addr)
984263bc 1515{
b4051e25
SZ
1516 const struct ifnet_array *arr;
1517 int i;
984263bc 1518
b4051e25
SZ
1519 arr = ifnet_array_get();
1520 for (i = 0; i < arr->ifnet_count; ++i) {
1521 struct ifnet *ifp = arr->ifnet_arr[i];
b2632176
SZ
1522 struct ifaddr_container *ifac;
1523
1524 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1525 struct ifaddr *ifa = ifac->ifa;
1526
1527 if (ifa->ifa_addr->sa_family != addr->sa_family)
1528 continue;
1529 if (sa_equal(addr, ifa->ifa_addr))
1530 return (ifa);
1531 if ((ifp->if_flags & IFF_BROADCAST) &&
1532 ifa->ifa_broadaddr &&
1533 /* IPv6 doesn't have broadcast */
1534 ifa->ifa_broadaddr->sa_len != 0 &&
1535 sa_equal(ifa->ifa_broadaddr, addr))
1536 return (ifa);
1537 }
984263bc 1538 }
b2632176 1539 return (NULL);
984263bc 1540}
0925f9d8 1541
984263bc 1542/*
2976dea7 1543 * Locate the point-to-point interface with a given destination address.
984263bc 1544 */
984263bc 1545struct ifaddr *
f23061d4 1546ifa_ifwithdstaddr(struct sockaddr *addr)
984263bc 1547{
b4051e25
SZ
1548 const struct ifnet_array *arr;
1549 int i;
984263bc 1550
b4051e25
SZ
1551 arr = ifnet_array_get();
1552 for (i = 0; i < arr->ifnet_count; ++i) {
1553 struct ifnet *ifp = arr->ifnet_arr[i];
b2632176
SZ
1554 struct ifaddr_container *ifac;
1555
1556 if (!(ifp->if_flags & IFF_POINTOPOINT))
1557 continue;
1558
1559 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1560 struct ifaddr *ifa = ifac->ifa;
1561
984263bc
MD
1562 if (ifa->ifa_addr->sa_family != addr->sa_family)
1563 continue;
0c3c561c
JH
1564 if (ifa->ifa_dstaddr &&
1565 sa_equal(addr, ifa->ifa_dstaddr))
984263bc 1566 return (ifa);
b2632176 1567 }
984263bc 1568 }
b2632176 1569 return (NULL);
984263bc
MD
1570}
1571
1572/*
1573 * Find an interface on a specific network. If many, choice
1574 * is most specific found.
1575 */
1576struct ifaddr *
f23061d4 1577ifa_ifwithnet(struct sockaddr *addr)
984263bc 1578{
b2632176 1579 struct ifaddr *ifa_maybe = NULL;
984263bc 1580 u_int af = addr->sa_family;
b4051e25
SZ
1581 const struct ifnet_array *arr;
1582 int i;
984263bc
MD
1583
1584 /*
1585 * AF_LINK addresses can be looked up directly by their index number,
1586 * so do that if we can.
1587 */
1588 if (af == AF_LINK) {
b2632176 1589 struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
590b8cd4 1590
b2632176
SZ
1591 if (sdl->sdl_index && sdl->sdl_index <= if_index)
1592 return (ifindex2ifnet[sdl->sdl_index]->if_lladdr);
984263bc
MD
1593 }
1594
1595 /*
1596 * Scan though each interface, looking for ones that have
1597 * addresses in this address family.
1598 */
b4051e25
SZ
1599 arr = ifnet_array_get();
1600 for (i = 0; i < arr->ifnet_count; ++i) {
1601 struct ifnet *ifp = arr->ifnet_arr[i];
b2632176
SZ
1602 struct ifaddr_container *ifac;
1603
1604 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1605 struct ifaddr *ifa = ifac->ifa;
984263bc
MD
1606
1607 if (ifa->ifa_addr->sa_family != af)
c008937e 1608 continue;
984263bc
MD
1609 if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
1610 /*
1611 * This is a bit broken as it doesn't
1612 * take into account that the remote end may
1613 * be a single node in the network we are
1614 * looking for.
1615 * The trouble is that we don't know the
1616 * netmask for the remote end.
1617 */
0c3c561c
JH
1618 if (ifa->ifa_dstaddr != NULL &&
1619 sa_equal(addr, ifa->ifa_dstaddr))
f23061d4 1620 return (ifa);
984263bc
MD
1621 } else {
1622 /*
c008937e 1623 * If we have a special address handler,
984263bc
MD
1624 * then use it instead of the generic one.
1625 */
f23061d4 1626 if (ifa->ifa_claim_addr) {
984263bc
MD
1627 if ((*ifa->ifa_claim_addr)(ifa, addr)) {
1628 return (ifa);
1629 } else {
1630 continue;
1631 }
1632 }
1633
c008937e
AL
1634 if (ifa->ifa_netmask == NULL ||
1635 !ifa_match_withmask(ifa, addr))
984263bc 1636 continue;
c008937e 1637
984263bc
MD
1638 /*
1639 * If the netmask of what we just found
1640 * is more specific than what we had before
1641 * (if we had one) then remember the new one
0925f9d8
SZ
1642 * before continuing to search for an even
1643 * better one. If the netmasks are equal,
1644 * we prefer the this ifa based on the result
1645 * of ifa_prefer().
984263bc 1646 */
4090d6ff 1647 if (ifa_maybe == NULL ||
d8449084
AL
1648 rn_refines(ifa->ifa_netmask,
1649 ifa_maybe->ifa_netmask) ||
0925f9d8 1650 (sa_equal(ifa_maybe->ifa_netmask,
d8449084 1651 ifa->ifa_netmask) &&
0925f9d8 1652 ifa_prefer(ifa, ifa_maybe)))
984263bc
MD
1653 ifa_maybe = ifa;
1654 }
1655 }
1656 }
c008937e 1657
984263bc
MD
1658 return (ifa_maybe);
1659}
1660
1661/*
1662 * Find an interface address specific to an interface best matching
1663 * a given address.
1664 */
1665struct ifaddr *
f23061d4 1666ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
984263bc 1667{
b2632176 1668 struct ifaddr_container *ifac;
4090d6ff 1669 struct ifaddr *ifa_maybe = NULL;
984263bc
MD
1670 u_int af = addr->sa_family;
1671
1672 if (af >= AF_MAX)
c008937e
AL
1673 return (NULL);
1674
b2632176
SZ
1675 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1676 struct ifaddr *ifa = ifac->ifa;
1677
984263bc
MD
1678 if (ifa->ifa_addr->sa_family != af)
1679 continue;
4090d6ff 1680 if (ifa_maybe == NULL)
984263bc 1681 ifa_maybe = ifa;
0c3c561c
JH
1682 if (ifa->ifa_netmask == NULL) {
1683 if (sa_equal(addr, ifa->ifa_addr) ||
1684 (ifa->ifa_dstaddr != NULL &&
1685 sa_equal(addr, ifa->ifa_dstaddr)))
984263bc
MD
1686 return (ifa);
1687 continue;
1688 }
1689 if (ifp->if_flags & IFF_POINTOPOINT) {
0c3c561c 1690 if (sa_equal(addr, ifa->ifa_dstaddr))
984263bc
MD
1691 return (ifa);
1692 } else {
c008937e 1693 if (ifa_match_withmask(ifa, addr))
984263bc
MD
1694 return (ifa);
1695 }
1696 }
c008937e 1697
984263bc
MD
1698 return (ifa_maybe);
1699}
1700
e782981c 1701struct netmsg_if {
9a74b592
SZ
1702 struct netmsg_base base;
1703 struct ifnet *ifp;
9a74b592
SZ
1704};
1705
984263bc 1706/*
9a74b592 1707 * Mark an interface down and notify protocols of the transition.
984263bc 1708 */
9a74b592 1709static void
e782981c 1710if_down_dispatch(netmsg_t nmsg)
984263bc 1711{
e782981c 1712 struct netmsg_if *msg = (struct netmsg_if *)nmsg;
9a74b592 1713 struct ifnet *ifp = msg->ifp;
b2632176 1714 struct ifaddr_container *ifac;
a2b099dd 1715 struct domain *dp;
984263bc 1716
43dbcc2a
SZ
1717 ASSERT_NETISR0;
1718
e782981c 1719 ifp->if_flags &= ~IFF_UP;
984263bc 1720 getmicrotime(&ifp->if_lastchange);
4d2ff05c
RM
1721 rt_ifmsg(ifp);
1722
9a74b592
SZ
1723 /*
1724 * The ifaddr processing in the following loop will block,
1725 * however, this function is called in netisr0, in which
1726 * ifaddr list changes happen, so we don't care about the
1727 * blockness of the ifaddr processing here.
1728 */
b2632176
SZ
1729 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1730 struct ifaddr *ifa = ifac->ifa;
1731
9a74b592
SZ
1732 /* Ignore marker */
1733 if (ifa->ifa_addr->sa_family == AF_UNSPEC)
1734 continue;
1735
e782981c 1736 kpfctlinput(PRC_IFDOWN, ifa->ifa_addr);
b2632176 1737 }
9a74b592 1738
a2b099dd
RM
1739 SLIST_FOREACH(dp, &domains, dom_next)
1740 if (dp->dom_if_down != NULL)
1741 dp->dom_if_down(ifp);
1742
4d2ff05c 1743 ifq_purge_all(&ifp->if_snd);
5204e13c 1744 netisr_replymsg(&nmsg->base, 0);
9a74b592
SZ
1745}
1746
984263bc 1747/*
9a74b592 1748 * Mark an interface up and notify protocols of the transition.
984263bc 1749 */
9a74b592 1750static void
e782981c 1751if_up_dispatch(netmsg_t nmsg)
984263bc 1752{
e782981c 1753 struct netmsg_if *msg = (struct netmsg_if *)nmsg;
9a74b592 1754 struct ifnet *ifp = msg->ifp;
b2632176 1755 struct ifaddr_container *ifac;
a2b099dd 1756 struct domain *dp;
984263bc 1757
43dbcc2a
SZ
1758 ASSERT_NETISR0;
1759
9275f515 1760 ifq_purge_all(&ifp->if_snd);
e782981c 1761 ifp->if_flags |= IFF_UP;
984263bc 1762 getmicrotime(&ifp->if_lastchange);
4d2ff05c
RM
1763 rt_ifmsg(ifp);
1764
9a74b592
SZ
1765 /*
1766 * The ifaddr processing in the following loop will block,
1767 * however, this function is called in netisr0, in which
1768 * ifaddr list changes happen, so we don't care about the
1769 * blockness of the ifaddr processing here.
1770 */
b2632176
SZ
1771 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1772 struct ifaddr *ifa = ifac->ifa;
1773
9a74b592
SZ
1774 /* Ignore marker */
1775 if (ifa->ifa_addr->sa_family == AF_UNSPEC)
1776 continue;
1777
e782981c 1778 kpfctlinput(PRC_IFUP, ifa->ifa_addr);
b2632176 1779 }
a2b099dd
RM
1780
1781 SLIST_FOREACH(dp, &domains, dom_next)
1782 if (dp->dom_if_up != NULL)
1783 dp->dom_if_up(ifp);
9a74b592 1784
5204e13c 1785 netisr_replymsg(&nmsg->base, 0);
9a74b592
SZ
1786}
1787
984263bc 1788/*
5c703385
MD
1789 * Mark an interface down and notify protocols of the transition. An
1790 * interface going down is also considered to be a synchronizing event.
1791 * We must ensure that all packet processing related to the interface
1792 * has completed before we return so e.g. the caller can free the ifnet
1793 * structure that the mbufs may be referencing.
1794 *
984263bc
MD
1795 * NOTE: must be called at splnet or eqivalent.
1796 */
1797void
f23061d4 1798if_down(struct ifnet *ifp)
984263bc 1799{
e782981c
RM
1800 struct netmsg_if msg;
1801
fcddd1b6 1802 EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN);
e782981c
RM
1803 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 0,
1804 if_down_dispatch);
1805 msg.ifp = ifp;
1806 netisr_domsg(&msg.base, 0);
5c703385 1807 netmsg_service_sync();
984263bc
MD
1808}
1809
1810/*
1811 * Mark an interface up and notify protocols of
1812 * the transition.
1813 * NOTE: must be called at splnet or eqivalent.
1814 */
1815void
f23061d4 1816if_up(struct ifnet *ifp)
984263bc 1817{
e782981c
RM
1818 struct netmsg_if msg;
1819
1820 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 0,
1821 if_up_dispatch);
1822 msg.ifp = ifp;
1823 netisr_domsg(&msg.base, 0);
fcddd1b6 1824 EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP);
984263bc
MD
1825}
1826
6de83abe
SZ
1827/*
1828 * Process a link state change.
1829 * NOTE: must be called at splsoftnet or equivalent.
1830 */
1831void
1832if_link_state_change(struct ifnet *ifp)
1833{
71fc104f
HT
1834 int link_state = ifp->if_link_state;
1835
6de83abe 1836 rt_ifmsg(ifp);
71fc104f
HT
1837 devctl_notify("IFNET", ifp->if_xname,
1838 (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL);
bc1a39e2
AL
1839
1840 EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state);
6de83abe
SZ
1841}
1842
984263bc
MD
1843/*
1844 * Handle interface watchdog timer routines. Called
1845 * from softclock, we decrement timers (if set) and
1846 * call the appropriate interface routine on expiration.
1847 */
1848static void
b5df1a85 1849if_slowtimo_dispatch(netmsg_t nmsg)
984263bc 1850{
b5df1a85 1851 struct globaldata *gd = mycpu;
b4051e25
SZ
1852 const struct ifnet_array *arr;
1853 int i;
4986965b 1854
5204e13c 1855 ASSERT_NETISR0;
b5df1a85
SZ
1856
1857 crit_enter_gd(gd);
1858 lwkt_replymsg(&nmsg->lmsg, 0); /* reply ASAP */
1859 crit_exit_gd(gd);
984263bc 1860
b4051e25
SZ
1861 arr = ifnet_array_get();
1862 for (i = 0; i < arr->ifnet_count; ++i) {
1863 struct ifnet *ifp = arr->ifnet_arr[i];
1864
b5df1a85
SZ
1865 crit_enter_gd(gd);
1866
6517ec3f
SZ
1867 if (if_stats_compat) {
1868 IFNET_STAT_GET(ifp, ipackets, ifp->if_ipackets);
1869 IFNET_STAT_GET(ifp, ierrors, ifp->if_ierrors);
1870 IFNET_STAT_GET(ifp, opackets, ifp->if_opackets);
1871 IFNET_STAT_GET(ifp, oerrors, ifp->if_oerrors);
1872 IFNET_STAT_GET(ifp, collisions, ifp->if_collisions);
1873 IFNET_STAT_GET(ifp, ibytes, ifp->if_ibytes);
1874 IFNET_STAT_GET(ifp, obytes, ifp->if_obytes);
1875 IFNET_STAT_GET(ifp, imcasts, ifp->if_imcasts);
1876 IFNET_STAT_GET(ifp, omcasts, ifp->if_omcasts);
1877 IFNET_STAT_GET(ifp, iqdrops, ifp->if_iqdrops);
1878 IFNET_STAT_GET(ifp, noproto, ifp->if_noproto);
6de344ba 1879 IFNET_STAT_GET(ifp, oqdrops, ifp->if_oqdrops);
6517ec3f
SZ
1880 }
1881
b5df1a85
SZ
1882 if (ifp->if_timer == 0 || --ifp->if_timer) {
1883 crit_exit_gd(gd);
984263bc 1884 continue;
b5df1a85 1885 }
78195a76 1886 if (ifp->if_watchdog) {
a3dd34d2 1887 if (ifnet_tryserialize_all(ifp)) {
78195a76 1888 (*ifp->if_watchdog)(ifp);
a3dd34d2 1889 ifnet_deserialize_all(ifp);
78195a76
MD
1890 } else {
1891 /* try again next timeout */
1892 ++ifp->if_timer;
1893 }
1894 }
4986965b 1895
b5df1a85
SZ
1896 crit_exit_gd(gd);
1897 }
4986965b 1898
abbb44bb 1899 callout_reset(&if_slowtimo_timer, hz / IFNET_SLOWHZ, if_slowtimo, NULL);
984263bc
MD
1900}
1901
b5df1a85
SZ
1902static void
1903if_slowtimo(void *arg __unused)
1904{
1905 struct lwkt_msg *lmsg = &if_slowtimo_netmsg.lmsg;
1906
1907 KASSERT(mycpuid == 0, ("not on cpu0"));
1908 crit_enter();
1909 if (lmsg->ms_flags & MSGF_DONE)
1910 lwkt_sendmsg_oncpu(netisr_cpuport(0), lmsg);
1911 crit_exit();
1912}
1913
984263bc
MD
1914/*
1915 * Map interface name to
1916 * interface structure pointer.
1917 */
1918struct ifnet *
1919ifunit(const char *name)
1920{
984263bc 1921 struct ifnet *ifp;
984263bc 1922
984263bc 1923 /*
3e4a09e7 1924 * Search all the interfaces for this name/number
984263bc 1925 */
b4051e25 1926 KASSERT(mtx_owned(&ifnet_mtx), ("ifnet is not locked"));
3e4a09e7 1927
b4051e25 1928 TAILQ_FOREACH(ifp, &ifnetlist, if_link) {
3e4a09e7 1929 if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0)
984263bc
MD
1930 break;
1931 }
1932 return (ifp);
1933}
1934
984263bc 1935struct ifnet *
b4051e25 1936ifunit_netisr(const char *name)
984263bc 1937{
b4051e25
SZ
1938 const struct ifnet_array *arr;
1939 int i;
984263bc
MD
1940
1941 /*
b4051e25 1942 * Search all the interfaces for this name/number
984263bc
MD
1943 */
1944
b4051e25
SZ
1945 arr = ifnet_array_get();
1946 for (i = 0; i < arr->ifnet_count; ++i) {
1947 struct ifnet *ifp = arr->ifnet_arr[i];
984263bc 1948
b4051e25
SZ
1949 if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0)
1950 return ifp;
1951 }
1952 return NULL;
1953}
984263bc
MD
1954
1955/*
1956 * Interface ioctls.
1957 */
1958int
87de5057 1959ifioctl(struct socket *so, u_long cmd, caddr_t data, struct ucred *cred)
984263bc 1960{
41c20dac 1961 struct ifnet *ifp;
233c8570 1962 struct ifgroupreq *ifgr;
41c20dac 1963 struct ifreq *ifr;
984263bc 1964 struct ifstat *ifs;
e612af50 1965 int error, do_ifup = 0;
984263bc
MD
1966 short oif_flags;
1967 int new_flags;
1fdf0954 1968 size_t namelen, onamelen;
f6994c54
AHJ
1969 size_t descrlen;
1970 char *descrbuf, *odescrbuf;
1fdf0954
HP
1971 char new_name[IFNAMSIZ];
1972 struct ifaddr *ifa;
1973 struct sockaddr_dl *sdl;
984263bc
MD
1974
1975 switch (cmd) {
984263bc 1976 case SIOCGIFCONF:
87de5057 1977 return (ifconf(cmd, data, cred));
9683f229
MD
1978 default:
1979 break;
984263bc 1980 }
9683f229 1981
984263bc
MD
1982 ifr = (struct ifreq *)data;
1983
1984 switch (cmd) {
1985 case SIOCIFCREATE:
c5e14c14 1986 case SIOCIFCREATE2:
2b3f93ea
MD
1987 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT);
1988 if (error)
c5e14c14
RP
1989 return (error);
1990 return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name),
bb54c3a2 1991 (cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL), NULL));
984263bc 1992 case SIOCIFDESTROY:
2b3f93ea
MD
1993 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT);
1994 if (error)
984263bc 1995 return (error);
c5e14c14 1996 return (if_clone_destroy(ifr->ifr_name));
984263bc
MD
1997 case SIOCIFGCLONERS:
1998 return (if_clone_list((struct if_clonereq *)data));
233c8570
AL
1999 case SIOCGIFGMEMB:
2000 return (if_getgroupmembers((struct ifgroupreq *)data));
9683f229
MD
2001 default:
2002 break;
984263bc
MD
2003 }
2004
9683f229
MD
2005 /*
2006 * Nominal ioctl through interface, lookup the ifp and obtain a
2007 * lock to serialize the ifconfig ioctl operation.
2008 */
b4051e25
SZ
2009 ifnet_lock();
2010
984263bc 2011 ifp = ifunit(ifr->ifr_name);
b4051e25
SZ
2012 if (ifp == NULL) {
2013 ifnet_unlock();
984263bc 2014 return (ENXIO);
b4051e25 2015 }
9683f229 2016 error = 0;
984263bc 2017
9683f229 2018 switch (cmd) {
12b71966
PA
2019 case SIOCGIFINDEX:
2020 ifr->ifr_index = ifp->if_index;
2021 break;
2022
984263bc
MD
2023 case SIOCGIFFLAGS:
2024 ifr->ifr_flags = ifp->if_flags;
46f25451 2025 ifr->ifr_flagshigh = ifp->if_flags >> 16;
984263bc
MD
2026 break;
2027
2028 case SIOCGIFCAP:
2029 ifr->ifr_reqcap = ifp->if_capabilities;
2030 ifr->ifr_curcap = ifp->if_capenable;
2031 break;
2032
2033 case SIOCGIFMETRIC:
2034 ifr->ifr_metric = ifp->if_metric;
2035 break;
2036
2037 case SIOCGIFMTU:
2038 ifr->ifr_mtu = ifp->if_mtu;
2039 break;
2040
e41e61d5
SZ
2041 case SIOCGIFTSOLEN:
2042 ifr->ifr_tsolen = ifp->if_tsolen;
2043 break;
2044
315a7da3
JL
2045 case SIOCGIFDATA:
2046 error = copyout((caddr_t)&ifp->if_data, ifr->ifr_data,
9683f229 2047 sizeof(ifp->if_data));
315a7da3
JL
2048 break;
2049
984263bc
MD
2050 case SIOCGIFPHYS:
2051 ifr->ifr_phys = ifp->if_physical;
2052 break;
2053
1630efc5 2054 case SIOCGIFPOLLCPU:
1630efc5 2055 ifr->ifr_pollcpu = -1;
1630efc5
SZ
2056 break;
2057
2058 case SIOCSIFPOLLCPU:
1630efc5
SZ
2059 break;
2060
f6994c54
AHJ
2061 case SIOCGIFDESCR:
2062 error = 0;
2063 ifnet_lock();
2064 if (ifp->if_description == NULL) {
2065 ifr->ifr_buffer.length = 0;
2066 error = ENOMSG;
2067 } else {
2068 /* space for terminating nul */
2069 descrlen = strlen(ifp->if_description) + 1;
2070 if (ifr->ifr_buffer.length < descrlen)
2071 error = ENAMETOOLONG;
2072 else
2073 error = copyout(ifp->if_description,
2074 ifr->ifr_buffer.buffer, descrlen);
2075 ifr->ifr_buffer.length = descrlen;
2076 }
2077 ifnet_unlock();
2078 break;
2079
2080 case SIOCSIFDESCR:
2b3f93ea 2081 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT);
f6994c54
AHJ
2082 if (error)
2083 break;
2084
2085 /*
2086 * Copy only (length-1) bytes to make sure that
2087 * if_description is always nul terminated. The
2088 * length parameter is supposed to count the
2089 * terminating nul in.
2090 */
2091 if (ifr->ifr_buffer.length > ifdescr_maxlen)
2092 return (ENAMETOOLONG);
2093 else if (ifr->ifr_buffer.length == 0)
2094 descrbuf = NULL;
2095 else {
2096 descrbuf = kmalloc(ifr->ifr_buffer.length, M_IFDESCR,
2097 M_WAITOK | M_ZERO);
2098 error = copyin(ifr->ifr_buffer.buffer, descrbuf,
2099 ifr->ifr_buffer.length - 1);
2100 if (error) {
2101 kfree(descrbuf, M_IFDESCR);
2102 break;
2103 }
2104 }
2105
2106 ifnet_lock();
2107 odescrbuf = ifp->if_description;
2108 ifp->if_description = descrbuf;
2109 ifnet_unlock();
2110
2111 if (odescrbuf)
2112 kfree(odescrbuf, M_IFDESCR);
2113
984263bc 2114 case SIOCSIFFLAGS:
2b3f93ea 2115 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT);
984263bc 2116 if (error)
9683f229 2117 break;
984263bc
MD
2118 new_flags = (ifr->ifr_flags & 0xffff) |
2119 (ifr->ifr_flagshigh << 16);
2120 if (ifp->if_flags & IFF_SMART) {
2121 /* Smart drivers twiddle their own routes */
2122 } else if (ifp->if_flags & IFF_UP &&
2123 (new_flags & IFF_UP) == 0) {
984263bc 2124 if_down(ifp);
984263bc
MD
2125 } else if (new_flags & IFF_UP &&
2126 (ifp->if_flags & IFF_UP) == 0) {
e612af50 2127 do_ifup = 1;
984263bc 2128 }
9c095379 2129
b3a7093f
SZ
2130#ifdef IFPOLL_ENABLE
2131 if ((new_flags ^ ifp->if_flags) & IFF_NPOLLING) {
2132 if (new_flags & IFF_NPOLLING)
2133 ifpoll_register(ifp);
2134 else
2135 ifpoll_deregister(ifp);
2136 }
2137#endif
9c095379 2138
984263bc
MD
2139 ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
2140 (new_flags &~ IFF_CANTCHANGE);
984263bc
MD
2141 if (new_flags & IFF_PPROMISC) {
2142 /* Permanently promiscuous mode requested */
2143 ifp->if_flags |= IFF_PROMISC;
2144 } else if (ifp->if_pcount == 0) {
2145 ifp->if_flags &= ~IFF_PROMISC;
2146 }
78195a76 2147 if (ifp->if_ioctl) {
a3dd34d2 2148 ifnet_serialize_all(ifp);
87de5057 2149 ifp->if_ioctl(ifp, cmd, data, cred);
a3dd34d2 2150 ifnet_deserialize_all(ifp);
78195a76 2151 }
e612af50
SZ
2152 if (do_ifup)
2153 if_up(ifp);
984263bc
MD
2154 getmicrotime(&ifp->if_lastchange);
2155 break;
2156
2157 case SIOCSIFCAP:
2b3f93ea 2158 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT);
984263bc 2159 if (error)
9683f229
MD
2160 break;
2161 if (ifr->ifr_reqcap & ~ifp->if_capabilities) {
2162 error = EINVAL;
2163 break;
2164 }
a3dd34d2 2165 ifnet_serialize_all(ifp);
87de5057 2166 ifp->if_ioctl(ifp, cmd, data, cred);
a3dd34d2 2167 ifnet_deserialize_all(ifp);
984263bc
MD
2168 break;
2169
f23061d4 2170 case SIOCSIFNAME:
2b3f93ea 2171 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT);
9683f229
MD
2172 if (error)
2173 break;
f23061d4 2174 error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
9683f229
MD
2175 if (error)
2176 break;
2177 if (new_name[0] == '\0') {
2178 error = EINVAL;
2179 break;
2180 }
2181 if (ifunit(new_name) != NULL) {
2182 error = EEXIST;
2183 break;
2184 }
f2bd8b67
JS
2185
2186 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
f23061d4
JH
2187
2188 /* Announce the departure of the interface. */
2189 rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
2190
2191 strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
b2632176 2192 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
f23061d4
JH
2193 sdl = (struct sockaddr_dl *)ifa->ifa_addr;
2194 namelen = strlen(new_name);
2195 onamelen = sdl->sdl_nlen;
2196 /*
2197 * Move the address if needed. This is safe because we
2198 * allocate space for a name of length IFNAMSIZ when we
2199 * create this in if_attach().
2200 */
2201 if (namelen != onamelen) {
2202 bcopy(sdl->sdl_data + onamelen,
2203 sdl->sdl_data + namelen, sdl->sdl_alen);
2204 }
2205 bcopy(new_name, sdl->sdl_data, namelen);
2206 sdl->sdl_nlen = namelen;
2207 sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
2208 bzero(sdl->sdl_data, onamelen);
2209 while (namelen != 0)
2210 sdl->sdl_data[--namelen] = 0xff;
f2bd8b67
JS
2211
2212 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
f23061d4
JH
2213
2214 /* Announce the return of the interface. */
2215 rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
2216 break;
1fdf0954 2217
984263bc 2218 case SIOCSIFMETRIC:
2b3f93ea 2219 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT);
984263bc 2220 if (error)
9683f229 2221 break;
984263bc
MD
2222 ifp->if_metric = ifr->ifr_metric;
2223 getmicrotime(&ifp->if_lastchange);
2224 break;
2225
2226 case SIOCSIFPHYS:
2b3f93ea 2227 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT);
984263bc 2228 if (error)
9683f229
MD
2229 break;
2230 if (ifp->if_ioctl == NULL) {
2231 error = EOPNOTSUPP;
2232 break;
2233 }
a3dd34d2 2234 ifnet_serialize_all(ifp);
87de5057 2235 error = ifp->if_ioctl(ifp, cmd, data, cred);
a3dd34d2 2236 ifnet_deserialize_all(ifp);
984263bc
MD
2237 if (error == 0)
2238 getmicrotime(&ifp->if_lastchange);
9683f229 2239 break;
984263bc
MD
2240
2241 case SIOCSIFMTU:
2242 {
2243 u_long oldmtu = ifp->if_mtu;
2244
2b3f93ea 2245 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT);
984263bc 2246 if (error)
9683f229
MD
2247 break;
2248 if (ifp->if_ioctl == NULL) {
2249 error = EOPNOTSUPP;
2250 break;
2251 }
2252 if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) {
2253 error = EINVAL;
2254 break;
2255 }
a3dd34d2 2256 ifnet_serialize_all(ifp);
87de5057 2257 error = ifp->if_ioctl(ifp, cmd, data, cred);
a3dd34d2 2258 ifnet_deserialize_all(ifp);
984263bc
MD
2259 if (error == 0) {
2260 getmicrotime(&ifp->if_lastchange);
2261 rt_ifmsg(ifp);
2262 }
2263 /*
2264 * If the link MTU changed, do network layer specific procedure.
2265 */
2266 if (ifp->if_mtu != oldmtu) {
2267#ifdef INET6
2268 nd6_setmtu(ifp);
2269#endif
2270 }
9683f229 2271 break;
984263bc
MD
2272 }
2273
e41e61d5 2274 case SIOCSIFTSOLEN:
2b3f93ea 2275 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT);
e41e61d5
SZ
2276 if (error)
2277 break;
2278
2279 /* XXX need driver supplied upper limit */
2280 if (ifr->ifr_tsolen <= 0) {
2281 error = EINVAL;
2282 break;
2283 }
2284 ifp->if_tsolen = ifr->ifr_tsolen;
2285 break;
2286
984263bc
MD
2287 case SIOCADDMULTI:
2288 case SIOCDELMULTI:
2b3f93ea 2289 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT);
984263bc 2290 if (error)
9683f229 2291 break;
984263bc
MD
2292
2293 /* Don't allow group membership on non-multicast interfaces. */
9683f229
MD
2294 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
2295 error = EOPNOTSUPP;
2296 break;
2297 }
984263bc
MD
2298
2299 /* Don't let users screw up protocols' entries. */
9683f229
MD
2300 if (ifr->ifr_addr.sa_family != AF_LINK) {
2301 error = EINVAL;
2302 break;
2303 }
984263bc
MD
2304
2305 if (cmd == SIOCADDMULTI) {
2306 struct ifmultiaddr *ifma;
2307 error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
2308 } else {
2309 error = if_delmulti(ifp, &ifr->ifr_addr);
2310 }
2311 if (error == 0)
2312 getmicrotime(&ifp->if_lastchange);
9683f229 2313 break;
984263bc
MD
2314
2315 case SIOCSIFPHYADDR:
2316 case SIOCDIFPHYADDR:
2317#ifdef INET6
2318 case SIOCSIFPHYADDR_IN6:
2319#endif
2320 case SIOCSLIFPHYADDR:
233c8570 2321 case SIOCSIFMEDIA:
984263bc 2322 case SIOCSIFGENERIC:
2b3f93ea 2323 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT);
984263bc 2324 if (error)
9683f229 2325 break;
baf84f0a 2326 if (ifp->if_ioctl == NULL) {
9683f229
MD
2327 error = EOPNOTSUPP;
2328 break;
2329 }
a3dd34d2 2330 ifnet_serialize_all(ifp);
87de5057 2331 error = ifp->if_ioctl(ifp, cmd, data, cred);
a3dd34d2 2332 ifnet_deserialize_all(ifp);
984263bc
MD
2333 if (error == 0)
2334 getmicrotime(&ifp->if_lastchange);
9683f229 2335 break;
984263bc
MD
2336
2337 case SIOCGIFSTATUS:
2338 ifs = (struct ifstat *)data;
2339 ifs->ascii[0] = '\0';
9683f229 2340 /* fall through */
984263bc
MD
2341 case SIOCGIFPSRCADDR:
2342 case SIOCGIFPDSTADDR:
2343 case SIOCGLIFPHYADDR:
2344 case SIOCGIFMEDIA:
1e1c5fac 2345 case SIOCGIFXMEDIA:
984263bc 2346 case SIOCGIFGENERIC:
9683f229
MD
2347 if (ifp->if_ioctl == NULL) {
2348 error = EOPNOTSUPP;
2349 break;
2350 }
a3dd34d2 2351 ifnet_serialize_all(ifp);
87de5057 2352 error = ifp->if_ioctl(ifp, cmd, data, cred);
a3dd34d2 2353 ifnet_deserialize_all(ifp);
9683f229 2354 break;
984263bc
MD
2355
2356 case SIOCSIFLLADDR:
2b3f93ea 2357 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT);
984263bc 2358 if (error)
9683f229
MD
2359 break;
2360 error = if_setlladdr(ifp, ifr->ifr_addr.sa_data,
2361 ifr->ifr_addr.sa_len);
19f10c78 2362 EVENTHANDLER_INVOKE(iflladdr_event, ifp);
9683f229 2363 break;
984263bc 2364
233c8570
AL
2365 case SIOCAIFGROUP:
2366 ifgr = (struct ifgroupreq *)ifr;
2b3f93ea
MD
2367 error = caps_priv_check(cred, SYSCAP_NONET_IFCONFIG);
2368 if (error)
233c8570
AL
2369 return (error);
2370 if ((error = if_addgroup(ifp, ifgr->ifgr_group)))
2371 return (error);
2372 break;
2373
2374 case SIOCDIFGROUP:
2375 ifgr = (struct ifgroupreq *)ifr;
2b3f93ea
MD
2376 error = caps_priv_check(cred, SYSCAP_NONET_IFCONFIG);
2377 if (error)
233c8570
AL
2378 return (error);
2379 if ((error = if_delgroup(ifp, ifgr->ifgr_group)))
2380 return (error);
2381 break;
2382
2383 case SIOCGIFGROUP:
2384 ifgr = (struct ifgroupreq *)ifr;
2385 if ((error = if_getgroups(ifgr, ifp)))
2386 return (error);
2387 break;
2388
984263bc
MD
2389 default:
2390 oif_flags = ifp->if_flags;
9683f229
MD
2391 if (so->so_proto == 0) {
2392 error = EOPNOTSUPP;
2393 break;
2394 }
002c1265
MD
2395 error = so_pru_control_direct(so, cmd, data, ifp);
2396
baf84f0a
AL
2397 /*
2398 * If the socket control method returns EOPNOTSUPP, pass the
2399 * request directly to the interface.
2400 *
2401 * Exclude the SIOCSIF{ADDR,BRDADDR,DSTADDR,NETMASK} ioctls,
2402 * because drivers may trust these ioctls to come from an
2403 * already privileged layer and thus do not perform credentials
2404 * checks or input validation.
2405 */
2406 if (error == EOPNOTSUPP &&
2407 ifp->if_ioctl != NULL &&
2408 cmd != SIOCSIFADDR &&
2409 cmd != SIOCSIFBRDADDR &&
2410 cmd != SIOCSIFDSTADDR &&
2411 cmd != SIOCSIFNETMASK) {
2412 ifnet_serialize_all(ifp);
2413 error = ifp->if_ioctl(ifp, cmd, data, cred);
2414 ifnet_deserialize_all(ifp);
2415 }
2416
984263bc
MD
2417 if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
2418#ifdef INET6
2419 DELAY(100);/* XXX: temporary workaround for fxp issue*/
2420 if (ifp->if_flags & IFF_UP) {
4986965b 2421 crit_enter();
984263bc 2422 in6_if_up(ifp);
4986965b 2423 crit_exit();
984263bc
MD
2424 }
2425#endif
2426 }
9683f229 2427 break;
984263bc 2428 }
9683f229 2429
b4051e25 2430 ifnet_unlock();
9683f229 2431 return (error);
984263bc
MD
2432}
2433
2434/*
2435 * Set/clear promiscuous mode on interface ifp based on the truth value
2436 * of pswitch. The calls are reference counted so that only the first
2437 * "on" request actually has an effect, as does the final "off" request.
2438 * Results are undefined if the "off" and "on" requests are not matched.
2439 */
2440int
f23061d4 2441ifpromisc(struct ifnet *ifp, int pswitch)
984263bc
MD
2442{
2443 struct ifreq ifr;
2444 int error;
2445 int oldflags;
2446
2447 oldflags = ifp->if_flags;
46f25451 2448 if (ifp->if_flags & IFF_PPROMISC) {
984263bc
MD
2449 /* Do nothing if device is in permanently promiscuous mode */
2450 ifp->if_pcount += pswitch ? 1 : -1;
2451 return (0);
2452 }
2453 if (pswitch) {
2454 /*
2455 * If the device is not configured up, we cannot put it in
2456 * promiscuous mode.
2457 */
2458 if ((ifp->if_flags & IFF_UP) == 0)
2459 return (ENETDOWN);
2460 if (ifp->if_pcount++ != 0)
2461 return (0);
2462 ifp->if_flags |= IFF_PROMISC;
3e4a09e7
MD
2463 log(LOG_INFO, "%s: promiscuous mode enabled\n",
2464 ifp->if_xname);
984263bc
MD
2465 } else {
2466 if (--ifp->if_pcount > 0)
2467 return (0);
2468 ifp->if_flags &= ~IFF_PROMISC;
3e4a09e7
MD
2469 log(LOG_INFO, "%s: promiscuous mode disabled\n",
2470 ifp->if_xname);
984263bc
MD
2471 }
2472 ifr.ifr_flags = ifp->if_flags;
46f25451 2473 ifr.ifr_flagshigh = ifp->if_flags >> 16;
a3dd34d2
SZ
2474 ifnet_serialize_all(ifp);
2475 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, NULL);
2476 ifnet_deserialize_all(ifp);
984263bc
MD
2477 if (error == 0)
2478 rt_ifmsg(ifp);
2479 else
2480 ifp->if_flags = oldflags;
2481 return error;
2482}
2483
2484/*
2485 * Return interface configuration
2486 * of system. List may be used
2487 * in later ioctl's (above) to get
2488 * other information.
2489 */
984263bc 2490static int
87de5057 2491ifconf(u_long cmd, caddr_t data, struct ucred *cred)
984263bc 2492{
41c20dac
MD
2493 struct ifconf *ifc = (struct ifconf *)data;
2494 struct ifnet *ifp;
984263bc
MD
2495 struct sockaddr *sa;
2496 struct ifreq ifr, *ifrp;
2497 int space = ifc->ifc_len, error = 0;
2498
2499 ifrp = ifc->ifc_req;
b4051e25
SZ
2500
2501 ifnet_lock();
2502 TAILQ_FOREACH(ifp, &ifnetlist, if_link) {
9a74b592
SZ
2503 struct ifaddr_container *ifac, *ifac_mark;
2504 struct ifaddr_marker mark;
2505 struct ifaddrhead *head;
3e4a09e7 2506 int addrs;
984263bc 2507
f23061d4 2508 if (space <= sizeof ifr)
984263bc 2509 break;
623c059e
JS
2510
2511 /*
95f018e8
MD
2512 * Zero the stack declared structure first to prevent
2513 * memory disclosure.
623c059e 2514 */
95f018e8 2515 bzero(&ifr, sizeof(ifr));
3e4a09e7
MD
2516 if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
2517 >= sizeof(ifr.ifr_name)) {
984263bc
MD
2518 error = ENAMETOOLONG;
2519 break;
984263bc
MD
2520 }
2521
9a74b592
SZ
2522 /*
2523 * Add a marker, since copyout() could block and during that
2524 * period the list could be changed. Inserting the marker to
2525 * the header of the list will not cause trouble for the code
2526 * assuming that the first element of the list is AF_LINK; the
2527 * marker will be moved to the next position w/o blocking.
2528 */
2529 ifa_marker_init(&mark, ifp);
2530 ifac_mark = &mark.ifac;
2531 head = &ifp->if_addrheads[mycpuid];
2532
984263bc 2533 addrs = 0;
9a74b592
SZ
2534 TAILQ_INSERT_HEAD(head, ifac_mark, ifa_link);
2535 while ((ifac = TAILQ_NEXT(ifac_mark, ifa_link)) != NULL) {
b2632176
SZ
2536 struct ifaddr *ifa = ifac->ifa;
2537
9a74b592
SZ
2538 TAILQ_REMOVE(head, ifac_mark, ifa_link);
2539 TAILQ_INSERT_AFTER(head, ifac, ifac_mark, ifa_link);
2540
2541 /* Ignore marker */
2542 if (ifa->ifa_addr->sa_family == AF_UNSPEC)
2543 continue;
2544
f23061d4 2545 if (space <= sizeof ifr)
984263bc
MD
2546 break;
2547 sa = ifa->ifa_addr;
2ea2781e 2548 if (cred->cr_prison && prison_if(cred, sa))
984263bc
MD
2549 continue;
2550 addrs++;
9a74b592
SZ
2551 /*
2552 * Keep a reference on this ifaddr, so that it will
2553 * not be destroyed when its address is copied to
2554 * the userland, which could block.
2555 */
2556 IFAREF(ifa);
984263bc
MD
2557 if (sa->sa_len <= sizeof(*sa)) {
2558 ifr.ifr_addr = *sa;
f23061d4 2559 error = copyout(&ifr, ifrp, sizeof ifr);
984263bc
MD
2560 ifrp++;
2561 } else {
f23061d4 2562 if (space < (sizeof ifr) + sa->sa_len -
9a74b592
SZ
2563 sizeof(*sa)) {
2564 IFAFREE(ifa);
984263bc 2565 break;
9a74b592 2566 }
984263bc 2567 space -= sa->sa_len - sizeof(*sa);
f23061d4
JH
2568 error = copyout(&ifr, ifrp,
2569 sizeof ifr.ifr_name);
984263bc 2570 if (error == 0)
f23061d4
JH
2571 error = copyout(sa, &ifrp->ifr_addr,
2572 sa->sa_len);
984263bc
MD
2573 ifrp = (struct ifreq *)
2574 (sa->sa_len + (caddr_t)&ifrp->ifr_addr);
2575 }
9a74b592 2576 IFAFREE(ifa);
984263bc
MD
2577 if (error)
2578 break;
f23061d4 2579 space -= sizeof ifr;
984263bc 2580 }
9a74b592 2581 TAILQ_REMOVE(head, ifac_mark, ifa_link);
984263bc
MD
2582 if (error)
2583 break;
2584 if (!addrs) {
f23061d4
JH
2585 bzero(&ifr.ifr_addr, sizeof ifr.ifr_addr);
2586 error = copyout(&ifr, ifrp, sizeof ifr);
984263bc
MD
2587 if (error)
2588 break;
f23061d4 2589 space -= sizeof ifr;
984263bc
MD
2590 ifrp++;
2591 }
2592 }
b4051e25
SZ
2593 ifnet_unlock();
2594
984263bc
MD
2595 ifc->ifc_len -= space;
2596 return (error);
2597}
2598
2599/*
2600 * Just like if_promisc(), but for all-multicast-reception mode.
2601 */
2602int
f23061d4 2603if_allmulti(struct ifnet *ifp, int onswitch)
984263bc
MD
2604{
2605 int error = 0;
984263bc
MD
2606 struct ifreq ifr;
2607
4986965b
JS
2608 crit_enter();
2609
984263bc
MD
2610 if (onswitch) {
2611 if (ifp->if_amcount++ == 0) {
2612 ifp->if_flags |= IFF_ALLMULTI;
2613 ifr.ifr_flags = ifp->if_flags;
46f25451 2614 ifr.ifr_flagshigh = ifp->if_flags >> 16;
a3dd34d2 2615 ifnet_serialize_all(ifp);
bd4539cc 2616 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2038fb68 2617 NULL);
a3dd34d2 2618 ifnet_deserialize_all(ifp);
984263bc
MD
2619 }
2620 } else {
2621 if (ifp->if_amcount > 1) {
2622 ifp->if_amcount--;
2623 } else {
2624 ifp->if_amcount = 0;
2625 ifp->if_flags &= ~IFF_ALLMULTI;
2626 ifr.ifr_flags = ifp->if_flags;
46f25451 2627 ifr.ifr_flagshigh = ifp->if_flags >> 16;
a3dd34d2 2628 ifnet_serialize_all(ifp);
bd4539cc 2629 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2038fb68 2630 NULL);
a3dd34d2 2631 ifnet_deserialize_all(ifp);
984263bc
MD
2632 }
2633 }
4986965b
JS
2634
2635 crit_exit();
984263bc
MD
2636
2637 if (error == 0)
2638 rt_ifmsg(ifp);
2639 return error;
2640}
2641
2642/*
2643 * Add a multicast listenership to the interface in question.
2644 * The link layer provides a routine which converts
2645 */
2646int
72659ed0
SZ
2647if_addmulti_serialized(struct ifnet *ifp, struct sockaddr *sa,
2648 struct ifmultiaddr **retifma)
984263bc
MD
2649{
2650 struct sockaddr *llsa, *dupsa;
4986965b 2651 int error;
984263bc
MD
2652 struct ifmultiaddr *ifma;
2653
72659ed0
SZ
2654 ASSERT_IFNET_SERIALIZED_ALL(ifp);
2655
984263bc
MD
2656 /*
2657 * If the matching multicast address already exists
2658 * then don't add a new one, just add a reference
2659 */
441d34b2 2660 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
0c3c561c 2661 if (sa_equal(sa, ifma->ifma_addr)) {
984263bc
MD
2662 ifma->ifma_refcount++;
2663 if (retifma)
2664 *retifma = ifma;
2665 return 0;
2666 }
2667 }
2668
2669 /*
2670 * Give the link layer a chance to accept/reject it, and also
2671 * find out which AF_LINK address this maps to, if it isn't one
2672 * already.
2673 */
2674 if (ifp->if_resolvemulti) {
2675 error = ifp->if_resolvemulti(ifp, &llsa, sa);
72659ed0 2676 if (error)
78195a76 2677 return error;
984263bc 2678 } else {
4090d6ff 2679 llsa = NULL;
984263bc
MD
2680 }
2681
c1e12ca9
SZ
2682 ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_INTWAIT);
2683 dupsa = kmalloc(sa->sa_len, M_IFMADDR, M_INTWAIT);
984263bc
MD
2684 bcopy(sa, dupsa, sa->sa_len);
2685
2686 ifma->ifma_addr = dupsa;
2687 ifma->ifma_lladdr = llsa;
2688 ifma->ifma_ifp = ifp;
2689 ifma->ifma_refcount = 1;
e333f801 2690 ifma->ifma_protospec = NULL;
984263bc
MD
2691 rt_newmaddrmsg(RTM_NEWMADDR, ifma);
2692
441d34b2 2693 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
6cd0715f
RP
2694 if (retifma)
2695 *retifma = ifma;
984263bc 2696
4090d6ff 2697 if (llsa != NULL) {
441d34b2 2698 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
0c3c561c 2699 if (sa_equal(ifma->ifma_addr, llsa))
984263bc
MD
2700 break;
2701 }
2702 if (ifma) {
2703 ifma->ifma_refcount++;
2704 } else {
c1e12ca9
SZ
2705 ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_INTWAIT);
2706 dupsa = kmalloc(llsa->sa_len, M_IFMADDR, M_INTWAIT);
984263bc
MD
2707 bcopy(llsa, dupsa, llsa->sa_len);
2708 ifma->ifma_addr = dupsa;
2709 ifma->ifma_ifp = ifp;
2710 ifma->ifma_refcount = 1;
441d34b2 2711 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
984263bc
MD
2712 }
2713 }
2714 /*
2715 * We are certain we have added something, so call down to the
2716 * interface to let them know about it.
2717 */
6cd0715f
RP
2718 if (ifp->if_ioctl)
2719 ifp->if_ioctl(ifp, SIOCADDMULTI, 0, NULL);
984263bc
MD
2720
2721 return 0;
2722}
2723
72659ed0
SZ
2724int
2725if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
2726 struct ifmultiaddr **retifma)
2727{
2728 int error;
2729
2730 ifnet_serialize_all(ifp);
2731 error = if_addmulti_serialized(ifp, sa, retifma);
2732 ifnet_deserialize_all(ifp);
2733
2734 return error;
2735}
2736
984263bc
MD
2737/*
2738 * Remove a reference to a multicast address on this interface. Yell
2739 * if the request does not match an existing membership.
2740 */
72659ed0
SZ
2741static int
2742if_delmulti_serialized(struct ifnet *ifp, struct sockaddr *sa)
984263bc
MD
2743{
2744 struct ifmultiaddr *ifma;
984263bc 2745
72659ed0
SZ
2746 ASSERT_IFNET_SERIALIZED_ALL(ifp);
2747
441d34b2 2748 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
0c3c561c 2749 if (sa_equal(sa, ifma->ifma_addr))
984263bc 2750 break;
4090d6ff 2751 if (ifma == NULL)
984263bc
MD
2752 return ENOENT;
2753
2754 if (ifma->ifma_refcount > 1) {
2755 ifma->ifma_refcount--;
2756 return 0;
2757 }
2758
2759 rt_newmaddrmsg(RTM_DELMADDR, ifma);
2760 sa = ifma->ifma_lladdr;
441d34b2 2761 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
984263bc
MD
2762 /*
2763 * Make sure the interface driver is notified
2764 * in the case of a link layer mcast group being left.
2765 */
72659ed0 2766 if (ifma->ifma_addr->sa_family == AF_LINK && sa == NULL)
2038fb68 2767 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL);
efda3bd0
MD
2768 kfree(ifma->ifma_addr, M_IFMADDR);
2769 kfree(ifma, M_IFMADDR);
4090d6ff 2770 if (sa == NULL)
984263bc
MD
2771 return 0;
2772
2773 /*
2774 * Now look for the link-layer address which corresponds to
2775 * this network address. It had been squirreled away in
2776 * ifma->ifma_lladdr for this purpose (so we don't have
2777 * to call ifp->if_resolvemulti() again), and we saved that
2778 * value in sa above. If some nasty deleted the
2779 * link-layer address out from underneath us, we can deal because
2780 * the address we stored was is not the same as the one which was
2781 * in the record for the link-layer address. (So we don't complain
2782 * in that case.)
2783 */
441d34b2 2784 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
0c3c561c 2785 if (sa_equal(sa, ifma->ifma_addr))
984263bc 2786 break;
4090d6ff 2787 if (ifma == NULL)
984263bc
MD
2788 return 0;
2789
2790 if (ifma->ifma_refcount > 1) {
2791 ifma->ifma_refcount--;
2792 return 0;
2793 }
2794
441d34b2 2795 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
2038fb68 2796 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL);
efda3bd0
MD
2797 kfree(ifma->ifma_addr, M_IFMADDR);
2798 kfree(sa, M_IFMADDR);
2799 kfree(ifma, M_IFMADDR);
984263bc
MD
2800
2801 return 0;
2802}
2803
72659ed0
SZ
2804int
2805if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
2806{
2807 int error;
2808
2809 ifnet_serialize_all(ifp);
2810 error = if_delmulti_serialized(ifp, sa);
2811 ifnet_deserialize_all(ifp);
2812
2813 return error;
2814}
2815
3976c93a
RP
2816/*
2817 * Delete all multicast group membership for an interface.
2818 * Should be used to quickly flush all multicast filters.
2819 */
2820void
72659ed0 2821if_delallmulti_serialized(struct ifnet *ifp)
3976c93a 2822{
72659ed0
SZ
2823 struct ifmultiaddr *ifma, mark;
2824 struct sockaddr sa;
2825
2826 ASSERT_IFNET_SERIALIZED_ALL(ifp);
2827
2828 bzero(&sa, sizeof(sa));
2829 sa.sa_family = AF_UNSPEC;
2830 sa.sa_len = sizeof(sa);
2831
2832 bzero(&mark, sizeof(mark));
2833 mark.ifma_addr = &sa;
3976c93a 2834
72659ed0 2835 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, &mark, ifma_link);
72659ed0
SZ
2836 while ((ifma = TAILQ_NEXT(&mark, ifma_link)) != NULL) {
2837 TAILQ_REMOVE(&ifp->if_multiaddrs, &mark, ifma_link);
2838 TAILQ_INSERT_AFTER(&ifp->if_multiaddrs, ifma, &mark,
2839 ifma_link);
2840
2841 if (ifma->ifma_addr->sa_family == AF_UNSPEC)
2842 continue;
2843
2844 if_delmulti_serialized(ifp, ifma->ifma_addr);
2845 }
89d620aa 2846 TAILQ_REMOVE(&ifp->if_multiaddrs, &mark, ifma_link);
3976c93a
RP
2847}
2848
2849
984263bc
MD
2850/*
2851 * Set the link layer address on an interface.
2852 *
2853 * At this time we only support certain types of interfaces,
2854 * and we don't allow the length of the address to change.
2855 */
2856int
2857if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
2858{
2859 struct sockaddr_dl *sdl;
984263bc
MD
2860 struct ifreq ifr;
2861
f2682cb9 2862 sdl = IF_LLSOCKADDR(ifp);
984263bc
MD
2863 if (sdl == NULL)
2864 return (EINVAL);
2865 if (len != sdl->sdl_alen) /* don't allow length to change */
2866 return (EINVAL);
2867 switch (ifp->if_type) {
2868 case IFT_ETHER: /* these types use struct arpcom */
984263bc 2869 case IFT_XETHER:
984263bc 2870 case IFT_L2VLAN:
50b1e235 2871 case IFT_IEEE8023ADLAG:
984263bc 2872 bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len);
984263bc
MD
2873 bcopy(lladdr, LLADDR(sdl), len);
2874 break;
2875 default:
2876 return (ENODEV);
2877 }
2878 /*
2879 * If the interface is already up, we need
2880 * to re-init it in order to reprogram its
2881 * address filter.
2882 */
a3dd34d2 2883 ifnet_serialize_all(ifp);
984263bc 2884 if ((ifp->if_flags & IFF_UP) != 0) {
c97d9b76 2885#ifdef INET
b2632176 2886 struct ifaddr_container *ifac;
c97d9b76 2887#endif
b2632176 2888
984263bc
MD
2889 ifp->if_flags &= ~IFF_UP;
2890 ifr.ifr_flags = ifp->if_flags;
46f25451 2891 ifr.ifr_flagshigh = ifp->if_flags >> 16;
78195a76 2892 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2038fb68 2893 NULL);
984263bc
MD
2894 ifp->if_flags |= IFF_UP;
2895 ifr.ifr_flags = ifp->if_flags;
46f25451 2896 ifr.ifr_flagshigh = ifp->if_flags >> 16;
78195a76 2897 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2038fb68 2898 NULL);
984263bc
MD
2899#ifdef INET
2900 /*
2901 * Also send gratuitous ARPs to notify other nodes about
2902 * the address change.
2903 */
b2632176
SZ
2904 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
2905 struct ifaddr *ifa = ifac->ifa;
2906
984263bc
MD
2907 if (ifa->ifa_addr != NULL &&
2908 ifa->ifa_addr->sa_family == AF_INET)
69b66ae8 2909 arp_gratuitous(ifp, ifa);
984263bc
MD
2910 }
2911#endif
2912 }
a3dd34d2 2913 ifnet_deserialize_all(ifp);
984263bc
MD
2914 return (0);
2915}
2916
c42bebbd 2917
b44c913f
AL
2918/*
2919 * Tunnel interfaces can nest, also they may cause infinite recursion
2920 * calls when misconfigured. Introduce an upper limit to prevent infinite
2921 * recursions, as well as to constrain the nesting depth.
2922 *
2923 * Return 0, if tunnel nesting count is equal or less than limit.
2924 */
2925int
2926if_tunnel_check_nesting(struct ifnet *ifp, struct mbuf *m, uint32_t cookie,
2927 int limit)
2928{
2929 struct m_tag *mtag;
2930 int count;
2931
2932 count = 1;
2933 mtag = m_tag_locate(m, cookie, 0 /* type */, NULL);
2934 if (mtag != NULL)
2935 count += *(int *)(mtag + 1);
2936 if (count > limit) {
2937 log(LOG_NOTICE,
2938 "%s: packet looped too many times (%d), limit %d\n",
2939 ifp->if_xname, count, limit);
2940 return (ELOOP);
2941 }
2942
2943 if (mtag == NULL) {
2944 mtag = m_tag_alloc(cookie, 0, sizeof(int), M_NOWAIT);
2945 if (mtag == NULL)
2946 return (ENOMEM);
2947 m_tag_prepend(m, mtag);
2948 }
2949
2950 *(int *)(mtag + 1) = count;
2951 return (0);
2952}
2953
2954
c42bebbd
RM
2955/*
2956 * Locate an interface based on a complete address.
2957 */
2958struct ifnet *
2959if_bylla(const void *lla, unsigned char lla_len)
2960{
2961 const struct ifnet_array *arr;
2962 struct ifnet *ifp;
2963 struct sockaddr_dl *sdl;
2964 int i;
2965
2966 arr = ifnet_array_get();
2967 for (i = 0; i < arr->ifnet_count; ++i) {
2968 ifp = arr->ifnet_arr[i];
2969 if (ifp->if_addrlen != lla_len)
2970 continue;
2971
2972 sdl = IF_LLSOCKADDR(ifp);
2973 if (memcmp(lla, LLADDR(sdl), lla_len) == 0)
2974 return (ifp);
2975 }
2976 return (NULL);
2977}
2978
984263bc 2979struct ifmultiaddr *
f23061d4 2980ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp)
984263bc
MD
2981{
2982 struct ifmultiaddr *ifma;
2983
72659ed0
SZ
2984 /* TODO: need ifnet_serialize_main */
2985 ifnet_serialize_all(ifp);
441d34b2 2986 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
0c3c561c 2987 if (sa_equal(ifma->ifma_addr, sa))
984263bc 2988 break;
72659ed0 2989 ifnet_deserialize_all(ifp);
984263bc
MD
2990
2991 return ifma;
2992}
2993
e9bd1548
MD
2994/*
2995 * This function locates the first real ethernet MAC from a network
2996 * card and loads it into node, returning 0 on success or ENOENT if
2997 * no suitable interfaces were found. It is used by the uuid code to
2998 * generate a unique 6-byte number.
2999 */
3000int
3001if_getanyethermac(uint16_t *node, int minlen)
3002{
3003 struct ifnet *ifp;
3004 struct sockaddr_dl *sdl;
3005
b4051e25
SZ
3006 ifnet_lock();
3007 TAILQ_FOREACH(ifp, &ifnetlist, if_link) {
e9bd1548
MD
3008 if (ifp->if_type != IFT_ETHER)
3009 continue;
3010 sdl = IF_LLSOCKADDR(ifp);
3011 if (sdl->sdl_alen < minlen)
3012 continue;
3013 bcopy(((struct arpcom *)ifp->if_softc)->ac_enaddr, node,
3014 minlen);
b4051e25 3015 ifnet_unlock();
e9bd1548
MD
3016 return(0);
3017 }
b4051e25 3018 ifnet_unlock();
e9bd1548
MD
3019 return (ENOENT);
3020}
3021
1550dfd9
MD
3022/*
3023 * The name argument must be a pointer to storage which will last as
3024 * long as the interface does. For physical devices, the result of
3025 * device_get_name(dev) is a good choice and for pseudo-devices a
3026 * static string works well.
3027 */
3028void
3029if_initname(struct ifnet *ifp, const char *name, int unit)
3030{
3e4a09e7
MD
3031 ifp->if_dname = name;
3032 ifp->if_dunit = unit;
1550dfd9 3033 if (unit != IF_DUNIT_NONE)
f8c7a42d 3034 ksnprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
1550dfd9
MD
3035 else
3036 strlcpy(ifp->if_xname, name, IFNAMSIZ);
3037}
3038
984263bc
MD
3039int
3040if_printf(struct ifnet *ifp, const char *fmt, ...)
3041{
e2565a42 3042 __va_list ap;
984263bc
MD
3043 int retval;
3044
4b1cf444 3045 retval = kprintf("%s: ", ifp->if_xname);
e2565a42 3046 __va_start(ap, fmt);
379210cb 3047 retval += kvprintf(fmt, ap);
e2565a42 3048 __va_end(ap);
984263bc
MD
3049 return (retval);
3050}
3051
cb80735c
RP
3052struct ifnet *
3053if_alloc(uint8_t type)
3054{
2949c680 3055 struct ifnet *ifp;
7e395935 3056 size_t size;
cb80735c 3057
7e395935
MD
3058 /*
3059 * XXX temporary hack until arpcom is setup in if_l2com
3060 */
3061 if (type == IFT_ETHER)
3062 size = sizeof(struct arpcom);
3063 else
3064 size = sizeof(struct ifnet);
3065
3066 ifp = kmalloc(size, M_IFNET, M_WAITOK|M_ZERO);
cb80735c
RP
3067
3068 ifp->if_type = type;
3069
aeb3c11e
RP
3070 if (if_com_alloc[type] != NULL) {
3071 ifp->if_l2com = if_com_alloc[type](type, ifp);
3072 if (ifp->if_l2com == NULL) {
3073 kfree(ifp, M_IFNET);
3074 return (NULL);
3075 }
3076 }
cb80735c
RP
3077 return (ifp);
3078}
3079
3080void
3081if_free(struct ifnet *ifp)
3082{
f6994c54
AHJ
3083 if (ifp->if_description != NULL)
3084 kfree(ifp->if_description, M_IFDESCR);
cb80735c
RP
3085 kfree(ifp, M_IFNET);
3086}
3087
b2f93efe
JS
3088void
3089ifq_set_classic(struct ifaltq *ifq)
3090{
2cc2f639
SZ
3091 ifq_set_methods(ifq, ifq->altq_ifp->if_mapsubq,
3092 ifsq_classic_enqueue, ifsq_classic_dequeue, ifsq_classic_request);
f0a26983
SZ
3093}
3094
3095void
2cc2f639
SZ
3096ifq_set_methods(struct ifaltq *ifq, altq_mapsubq_t mapsubq,
3097 ifsq_enqueue_t enqueue, ifsq_dequeue_t dequeue, ifsq_request_t request)
f0a26983
SZ
3098{
3099 int q;
3100
2cc2f639
SZ
3101 KASSERT(mapsubq != NULL, ("mapsubq is not specified"));
3102 KASSERT(enqueue != NULL, ("enqueue is not specified"));
3103 KASSERT(dequeue != NULL, ("dequeue is not specified"));
3104 KASSERT(request != NULL, ("request is not specified"));
3105
3106 ifq->altq_mapsubq = mapsubq;
f0a26983
SZ
3107 for (q = 0; q < ifq->altq_subq_cnt; ++q) {
3108 struct ifaltq_subque *ifsq = &ifq->altq_subq[q];
3109
3110 ifsq->ifsq_enqueue = enqueue;
3111 ifsq->ifsq_dequeue = dequeue;
3112 ifsq->ifsq_request = request;
3113 }
b2f93efe
JS
3114}
3115
4cc8caef
SZ
3116static void
3117ifsq_norm_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m)
3118{
e7d68516
SZ
3119
3120 classq_add(&ifsq->ifsq_norm, m);
4cc8caef
SZ
3121 ALTQ_SQ_CNTR_INC(ifsq, m->m_pkthdr.len);
3122}
3123
3124static void
3125ifsq_prio_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m)
3126{
e7d68516
SZ
3127
3128 classq_add(&ifsq->ifsq_prio, m);
4cc8caef
SZ
3129 ALTQ_SQ_CNTR_INC(ifsq, m->m_pkthdr.len);
3130 ALTQ_SQ_PRIO_CNTR_INC(ifsq, m->m_pkthdr.len);
3131}
3132
3133static struct mbuf *
3134ifsq_norm_dequeue(struct ifaltq_subque *ifsq)
3135{
3136 struct mbuf *m;
3137
e7d68516
SZ
3138 m = classq_get(&ifsq->ifsq_norm);
3139 if (m != NULL)
4cc8caef 3140 ALTQ_SQ_CNTR_DEC(ifsq, m->m_pkthdr.len);
e7d68516 3141 return (m);
4cc8caef
SZ
3142}
3143
3144static struct mbuf *
3145ifsq_prio_dequeue(struct ifaltq_subque *ifsq)
3146{
3147 struct mbuf *m;
3148
e7d68516 3149 m = classq_get(&ifsq->ifsq_prio);
4cc8caef 3150 if (m != NULL) {
4cc8caef
SZ
3151 ALTQ_SQ_CNTR_DEC(ifsq, m->m_pkthdr.len);
3152 ALTQ_SQ_PRIO_CNTR_DEC(ifsq, m->m_pkthdr.len);
3153 }
e7d68516 3154 return (m);
4cc8caef
SZ
3155}
3156
9db4b353 3157int
f0a26983
SZ
3158ifsq_classic_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m,
3159 struct altq_pktattr *pa __unused)
e3e4574a 3160{
2739afc4 3161
0ec85f2e 3162 M_ASSERTPKTHDR(m);
2739afc4 3163again:
68dc1916
SZ
3164 if (ifsq->ifsq_len >= ifsq->ifsq_maxlen ||
3165 ifsq->ifsq_bcnt >= ifsq->ifsq_maxbcnt) {
2739afc4
SZ
3166 struct mbuf *m_drop;
3167
3168 if (m->m_flags & M_PRIO) {
3169 m_drop = NULL;
3170 if (ifsq->ifsq_prio_len < (ifsq->ifsq_maxlen >> 1) &&
3171 ifsq->ifsq_prio_bcnt < (ifsq->ifsq_maxbcnt >> 1)) {
3172 /* Try dropping some from normal queue. */
3173 m_drop = ifsq_norm_dequeue(ifsq);
4cc8caef 3174 }
2739afc4
SZ
3175 if (m_drop == NULL)
3176 m_drop = ifsq_prio_dequeue(ifsq);
3177 } else {
3178 m_drop = ifsq_norm_dequeue(ifsq);
3179 }
3180 if (m_drop != NULL) {
3181 IFNET_STAT_INC(ifsq->ifsq_ifp, oqdrops, 1);
3182 m_freem(m_drop);
3183 goto again;
4cc8caef 3184 }
2739afc4
SZ
3185 /*
3186 * No old packets could be dropped!
3187 * NOTE: Caller increases oqdrops.
3188 */
e3e4574a 3189 m_freem(m);
2739afc4 3190 return (ENOBUFS);
e3e4574a 3191 } else {
4cc8caef
SZ
3192 if (m->m_flags & M_PRIO)
3193 ifsq_prio_enqueue(ifsq, m);
338bb46c 3194 else
4cc8caef 3195 ifsq_norm_enqueue(ifsq, m);
2739afc4 3196 return (0);
0ec85f2e 3197 }
e3e4574a
JS
3198}
3199
9db4b353 3200struct mbuf *
6dadc833 3201ifsq_classic_dequeue(struct ifaltq_subque *ifsq, int op)
e3e4574a
JS
3202{
3203 struct mbuf *m;
3204
3205 switch (op) {
3206 case ALTDQ_POLL:
e7d68516 3207 m = classq_head(&ifsq->ifsq_prio);
4cc8caef 3208 if (m == NULL)
e7d68516 3209 m = classq_head(&ifsq->ifsq_norm);
e3e4574a 3210 break;
338bb46c 3211
e3e4574a 3212 case ALTDQ_REMOVE:
4cc8caef
SZ
3213 m = ifsq_prio_dequeue(ifsq);
3214 if (m == NULL)
3215 m = ifsq_norm_dequeue(ifsq);
e3e4574a 3216 break;
338bb46c 3217
e3e4574a
JS
3218 default:
3219 panic("unsupported ALTQ dequeue op: %d", op);
3220 }
338bb46c 3221 return m;
e3e4574a
JS
3222}
3223
9db4b353 3224int
f0a26983 3225ifsq_classic_request(struct ifaltq_subque *ifsq, int req, void *arg)
e3e4574a
JS
3226{
3227 switch (req) {
3228 case ALTRQ_PURGE:
338bb46c
SZ
3229 for (;;) {
3230 struct mbuf *m;
3231
6dadc833 3232 m = ifsq_classic_dequeue(ifsq, ALTDQ_REMOVE);
338bb46c
SZ
3233 if (m == NULL)
3234 break;
3235 m_freem(m);
3236 }
e3e4574a 3237 break;
338bb46c 3238
e3e4574a 3239 default:
3f625015 3240 panic("unsupported ALTQ request: %d", req);
e3e4574a 3241 }
338bb46c 3242 return 0;
e3e4574a 3243}
b2632176 3244
28cc0c29 3245static void
f0a26983 3246ifsq_ifstart_try(struct ifaltq_subque *ifsq, int force_sched)
28cc0c29 3247{
f0a26983 3248 struct ifnet *ifp = ifsq_get_ifp(ifsq);
28cc0c29
SZ
3249 int running = 0, need_sched;
3250
3251 /*
5c593c2a
SZ
3252 * Try to do direct ifnet.if_start on the subqueue first, if there is
3253 * contention on the subqueue hardware serializer, ifnet.if_start on
3254 * the subqueue will be scheduled on the subqueue owner CPU.
28cc0c29 3255 */
bfefe4a6 3256 if (!ifsq_tryserialize_hw(ifsq)) {
28cc0c29 3257 /*
5c593c2a
SZ
3258 * Subqueue hardware serializer contention happened,
3259 * ifnet.if_start on the subqueue is scheduled on
3260 * the subqueue owner CPU, and we keep going.
28cc0c29 3261 */
f0a26983 3262 ifsq_ifstart_schedule(ifsq, 1);
28cc0c29
SZ
3263 return;
3264 }
3265
f0a26983
SZ
3266 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) {
3267 ifp->if_start(ifp, ifsq);
3268 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq))
28cc0c29
SZ
3269 running = 1;
3270 }
f0a26983 3271 need_sched = ifsq_ifstart_need_schedule(ifsq, running);
28cc0c29 3272
bfefe4a6 3273 ifsq_deserialize_hw(ifsq);
28cc0c29
SZ
3274
3275 if (need_sched) {
3276 /*
5c593c2a
SZ
3277 * More data need to be transmitted, ifnet.if_start on the
3278 * subqueue is scheduled on the subqueue owner CPU, and we
3279 * keep going.
3280 * NOTE: ifnet.if_start subqueue interlock is not released.
28cc0c29 3281 */
f0a26983 3282 ifsq_ifstart_schedule(ifsq, force_sched);
28cc0c29
SZ
3283 }
3284}
3285
2aa7f7f8 3286/*
5c593c2a 3287 * Subqeue packets staging mechanism:
2aa7f7f8 3288 *
5c593c2a
SZ
3289 * The packets enqueued into the subqueue are staged to a certain amount
3290 * before the ifnet.if_start on the subqueue is called. In this way, the
3291 * driver could avoid writing to hardware registers upon every packet,
3292 * instead, hardware registers could be written when certain amount of
3293 * packets are put onto hardware TX ring. The measurement on several modern
3294 * NICs (emx(4), igb(4), bnx(4), bge(4), jme(4)) shows that the hardware
3295 * registers writing aggregation could save ~20% CPU time when 18bytes UDP
3296 * datagrams are transmitted at 1.48Mpps. The performance improvement by
3297 * hardware registers writing aggeregation is also mentioned by Luigi Rizzo's
3298 * netmap paper (http://info.iet.unipi.it/~luigi/netmap/).
2aa7f7f8 3299 *
5c593c2a 3300 * Subqueue packets staging is performed for two entry points into drivers'
2aa7f7f8 3301 * transmission function:
5c593c2a
SZ
3302 * - Direct ifnet.if_start calling on the subqueue, i.e. ifsq_ifstart_try()
3303 * - ifnet.if_start scheduling on the subqueue, i.e. ifsq_ifstart_schedule()
2aa7f7f8 3304 *
5c593c2a
SZ
3305 * Subqueue packets staging will be stopped upon any of the following
3306 * conditions:
2aa7f7f8 3307 * - If the count of packets enqueued on the current CPU is great than or
f0a26983 3308 * equal to ifsq_stage_cntmax. (XXX this should be per-interface)
2aa7f7f8
SZ
3309 * - If the total length of packets enqueued on the current CPU is great
3310 * than or equal to the hardware's MTU - max_protohdr. max_protohdr is
3311 * cut from the hardware's MTU mainly bacause a full TCP segment's size
3312 * is usually less than hardware's MTU.
5c593c2a
SZ
3313 * - ifsq_ifstart_schedule() is not pending on the current CPU and
3314 * ifnet.if_start subqueue interlock (ifaltq_subq.ifsq_started) is not
3315 * released.
2aa7f7f8
SZ
3316 * - The if_start_rollup(), which is registered as low priority netisr
3317 * rollup function, is called; probably because no more work is pending
3318 * for netisr.
3319 *
3320 * NOTE:
5c593c2a 3321 * Currently subqueue packet staging is only performed in netisr threads.
2aa7f7f8 3322 */
9db4b353
SZ
3323int
3324ifq_dispatch(struct ifnet *ifp, struct mbuf *m, struct altq_pktattr *pa)
3325{
3326 struct ifaltq *ifq = &ifp->if_snd;
f0a26983 3327 struct ifaltq_subque *ifsq;
28cc0c29 3328 int error, start = 0, len, mcast = 0, avoid_start = 0;
f0a26983
SZ
3329 struct ifsubq_stage_head *head = NULL;
3330 struct ifsubq_stage *stage = NULL;
ac7fc6f0
SZ
3331 struct globaldata *gd = mycpu;
3332 struct thread *td = gd->gd_curthread;
3333
3334 crit_enter_quick(td);
57dff79c 3335
ac7fc6f0 3336 ifsq = ifq_map_subq(ifq, gd->gd_cpuid);
bfefe4a6 3337 ASSERT_ALTQ_SQ_NOT_SERIALIZED_HW(ifsq);
9db4b353 3338
fe53d127
SZ
3339 len = m->m_pkthdr.len;
3340 if (m->m_flags & M_MCAST)
3341 mcast = 1;
3342
ac7fc6f0 3343 if (td->td_type == TD_TYPE_NETISR) {
f0a26983
SZ
3344 head = &ifsubq_stage_heads[mycpuid];
3345 stage = ifsq_get_stage(ifsq, mycpuid);
28cc0c29 3346
f0a26983
SZ
3347 stage->stg_cnt++;
3348 stage->stg_len += len;
3349 if (stage->stg_cnt < ifsq_stage_cntmax &&
3350 stage->stg_len < (ifp->if_mtu - max_protohdr))
28cc0c29
SZ
3351 avoid_start = 1;
3352 }
3353
f0a26983
SZ
3354 ALTQ_SQ_LOCK(ifsq);
3355 error = ifsq_enqueue_locked(ifsq, m, pa);
9db4b353 3356 if (error) {
6de344ba 3357 IFNET_STAT_INC(ifp, oqdrops, 1);
f0a26983
SZ
3358 if (!ifsq_data_ready(ifsq)) {
3359 ALTQ_SQ_UNLOCK(ifsq);
55dba7d5 3360 goto done;
087561ef 3361 }
28cc0c29 3362 avoid_start = 0;
55dba7d5
AL
3363 } else {
3364 IFNET_STAT_INC(ifp, obytes, len);
3365 if (mcast)
3366 IFNET_STAT_INC(ifp, omcasts, 1);
9db4b353 3367 }
f0a26983 3368 if (!ifsq_is_started(ifsq)) {
28cc0c29 3369 if (avoid_start) {
f0a26983 3370 ALTQ_SQ_UNLOCK(ifsq);
28cc0c29
SZ
3371
3372 KKASSERT(!error);
f0a26983
SZ
3373 if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0)
3374 ifsq_stage_insert(head, stage);
28cc0c29 3375
55dba7d5 3376 goto done;
28cc0c29
SZ
3377 }
3378
9db4b353 3379 /*
5c593c2a 3380 * Hold the subqueue interlock of ifnet.if_start
9db4b353 3381 */
f0a26983 3382 ifsq_set_started(ifsq);
9db4b353
SZ
3383 start = 1;
3384 }
f0a26983 3385 ALTQ_SQ_UNLOCK(ifsq);
9db4b353 3386
28cc0c29 3387 if (stage != NULL) {
f0a26983
SZ
3388 if (!start && (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED)) {
3389 KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED);
3cab6b0d 3390 if (!avoid_start) {
f0a26983
SZ
3391 ifsq_stage_remove(head, stage);
3392 ifsq_ifstart_schedule(ifsq, 1);
3cab6b0d 3393 }
55dba7d5 3394 goto done;
3cab6b0d
SZ
3395 }
3396
f0a26983
SZ
3397 if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED) {
3398 ifsq_stage_remove(head, stage);
28cc0c29 3399 } else {
f0a26983
SZ
3400 stage->stg_cnt = 0;
3401 stage->stg_len = 0;
28cc0c29 3402 }
9db4b353
SZ
3403 }
3404
55dba7d5
AL
3405 if (start)
3406 ifsq_ifstart_try(ifsq, 0);
ac7fc6f0 3407
55dba7d5 3408done:
ac7fc6f0 3409 crit_exit_quick(td);
087561ef 3410 return error;
9db4b353
SZ
3411}
3412
b2632176 3413void *
52fbd92a 3414ifa_create(int size)
b2632176
SZ
3415{
3416 struct ifaddr *ifa;
3417 int i;
3418
ed20d0e3 3419 KASSERT(size >= sizeof(*ifa), ("ifaddr size too small"));
b2632176 3420
52fbd92a 3421 ifa = kmalloc(size, M_IFADDR, M_INTWAIT | M_ZERO);
43dbcc2a
SZ
3422
3423 /*
3424 * Make ifa_container availabel on all CPUs, since they
3425 * could be accessed by any threads.
3426 */
7d1c3473 3427 ifa->ifa_containers =
62938642
MD
3428 kmalloc(ncpus * sizeof(struct ifaddr_container),
3429 M_IFADDR,
3430 M_INTWAIT | M_ZERO | M_CACHEALIGN);
52fbd92a 3431
d5a2b87c 3432 ifa->ifa_ncnt = ncpus;
b2632176
SZ
3433 for (i = 0; i < ncpus; ++i) {
3434 struct ifaddr_container *ifac = &ifa->ifa_containers[i];
3435
3436 ifac->ifa_magic = IFA_CONTAINER_MAGIC;
3437 ifac->ifa = ifa;
3438 ifac->ifa_refcnt = 1;
3439 }
3440#ifdef IFADDR_DEBUG
3441 kprintf("alloc ifa %p %d\n", ifa, size);
3442#endif
3443 return ifa;
3444}
3445
b2632176
SZ
3446void
3447ifac_free(struct ifaddr_container *ifac, int cpu_id)
3448{
d5a2b87c 3449 struct ifaddr *ifa = ifac->ifa;
b2632176
SZ
3450
3451 KKASSERT(ifac->ifa_magic == IFA_CONTAINER_MAGIC);
3452 KKASSERT(ifac->ifa_refcnt == 0);
40f667f2 3453 KASSERT(ifac->ifa_listmask == 0,
ed20d0e3 3454 ("ifa is still on %#x lists", ifac->ifa_listmask));
b2632176
SZ
3455
3456 ifac->ifa_magic = IFA_CONTAINER_DEAD;
3457
b2632176 3458#ifdef IFADDR_DEBUG_VERBOSE
8967ddc7 3459 kprintf("try free ifa %p cpu_id %d\n", ifac->ifa, cpu_id);
b2632176
SZ
3460#endif
3461
d5a2b87c 3462 KASSERT(ifa->ifa_ncnt > 0 && ifa->ifa_ncnt <= ncpus,
ed20d0e3 3463 ("invalid # of ifac, %d", ifa->ifa_ncnt));
d5a2b87c
SZ
3464 if (atomic_fetchadd_int(&ifa->ifa_ncnt, -1) == 1) {
3465#ifdef IFADDR_DEBUG
3466 kprintf("free ifa %p\n", ifa);
3467#endif
3468 kfree(ifa->ifa_containers, M_IFADDR);
3469 kfree(ifa, M_IFADDR);
3470 }
b2632176
SZ
3471}
3472
3473static void
002c1265 3474ifa_iflink_dispatch(netmsg_t nmsg)
b2632176
SZ
3475{
3476 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
3477 struct ifaddr *ifa = msg->ifa;
3478 struct ifnet *ifp = msg->ifp;
3479 int cpu = mycpuid;
40f667f2 3480 struct ifaddr_container *ifac;
b2632176
SZ
3481
3482 crit_enter();
23027d35 3483
40f667f2 3484 ifac = &ifa->ifa_containers[cpu];
2adb7bc2 3485 ASSERT_IFAC_VALID(ifac);
40f667f2 3486 KASSERT((ifac->ifa_listmask & IFA_LIST_IFADDRHEAD) == 0,
ed20d0e3 3487 ("ifaddr is on if_addrheads"));
23027d35 3488
40f667f2
SZ
3489 ifac->ifa_listmask |= IFA_LIST_IFADDRHEAD;
3490 if (msg->tail)
3491 TAILQ_INSERT_TAIL(&ifp->if_addrheads[cpu], ifac, ifa_link);
3492 else
3493 TAILQ_INSERT_HEAD(&ifp->if_addrheads[cpu], ifac, ifa_link);
23027d35 3494
b2632176
SZ
3495 crit_exit();
3496
c3b4f1bf 3497 netisr_forwardmsg_all(&nmsg->base, cpu + 1);
b2632176
SZ
3498}
3499
3500void
3501ifa_iflink(struct ifaddr *ifa, struct ifnet *ifp, int tail)
3502{
3503 struct netmsg_ifaddr msg;
3504
002c1265 3505 netmsg_init(&msg.base, NULL, &curthread->td_msgport,
48e7b118 3506 0, ifa_iflink_dispatch);
b2632176
SZ
3507 msg.ifa = ifa;
3508 msg.ifp = ifp;
3509 msg.tail = tail;
3510
92b34312 3511 netisr_domsg(&msg.base, 0);
b2632176
SZ
3512}
3513
3514static void
002c1265 3515ifa_ifunlink_dispatch(netmsg_t nmsg)
b2632176
SZ
3516{
3517 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
3518 struct ifaddr *ifa = msg->ifa;
3519 struct ifnet *ifp = msg->ifp;
3520 int cpu = mycpuid;
40f667f2 3521 struct ifaddr_container *ifac;
b2632176
SZ
3522
3523 crit_enter();
23027d35 3524
40f667f2 3525 ifac = &ifa->ifa_containers[cpu];
2adb7bc2 3526 ASSERT_IFAC_VALID(ifac);
40f667f2 3527 KASSERT(ifac->ifa_listmask & IFA_LIST_IFADDRHEAD,
ed20d0e3 3528 ("ifaddr is not on if_addrhead"));
23027d35 3529
40f667f2
SZ
3530 TAILQ_REMOVE(&ifp->if_addrheads[cpu], ifac, ifa_link);
3531 ifac->ifa_listmask &= ~IFA_LIST_IFADDRHEAD;
23027d35 3532
b2632176
SZ
3533 crit_exit();
3534
c3b4f1bf 3535 netisr_forwardmsg_all(&nmsg->base, cpu + 1);
b2632176
SZ
3536}
3537
3538void
3539ifa_ifunlink(struct ifaddr *ifa, struct ifnet *ifp)
3540{
3541 struct netmsg_ifaddr msg;
3542
002c1265 3543 netmsg_init(&msg.base, NULL, &curthread->td_msgport,
48e7b118 3544 0, ifa_ifunlink_dispatch);
b2632176
SZ
3545 msg.ifa = ifa;
3546 msg.ifp = ifp;
3547
92b34312 3548 netisr_domsg(&msg.base, 0);
b2632176
SZ
3549}
3550
3551static void
002c1265 3552ifa_destroy_dispatch(netmsg_t nmsg)
b2632176
SZ
3553{
3554 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
3555
3556 IFAFREE(msg->ifa);
c3b4f1bf 3557 netisr_forwardmsg_all(&nmsg->base, mycpuid + 1);
b2632176
SZ
3558}
3559
3560void
3561ifa_destroy(struct ifaddr *ifa)
3562{
3563 struct netmsg_ifaddr msg;
3564
002c1265 3565 netmsg_init(&msg.base, NULL, &curthread->td_msgport,
48e7b118 3566 0, ifa_destroy_dispatch);
b2632176
SZ
3567 msg.ifa = ifa;
3568
92b34312 3569 netisr_domsg(&msg.base, 0);
b2632176
SZ
3570}
3571
239bdb58
SZ
3572static void
3573if_start_rollup(void)
3574{
f0a26983
SZ
3575 struct ifsubq_stage_head *head = &ifsubq_stage_heads[mycpuid];
3576 struct ifsubq_stage *stage;
28cc0c29 3577
ac7fc6f0
SZ
3578 crit_enter();
3579
f0a26983
SZ
3580 while ((stage = TAILQ_FIRST(&head->stg_head)) != NULL) {
3581 struct ifaltq_subque *ifsq = stage->stg_subq;
3cab6b0d 3582 int is_sched = 0;
28cc0c29 3583
f0a26983 3584 if (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED)
3cab6b0d 3585 is_sched = 1;
f0a26983 3586 ifsq_stage_remove(head, stage);
28cc0c29 3587
3cab6b0d 3588 if (is_sched) {
f0a26983 3589 ifsq_ifstart_schedule(ifsq, 1);
3cab6b0d
SZ
3590 } else {
3591 int start = 0;
28cc0c29 3592
f0a26983
SZ
3593 ALTQ_SQ_LOCK(ifsq);
3594 if (!ifsq_is_started(ifsq)) {
3cab6b0d 3595 /*
5c593c2a
SZ
3596 * Hold the subqueue interlock of
3597 * ifnet.if_start
3cab6b0d 3598 */
f0a26983 3599 ifsq_set_started(ifsq);
3cab6b0d
SZ
3600 start = 1;
3601 }
f0a26983 3602 ALTQ_SQ_UNLOCK(ifsq);
3cab6b0d
SZ
3603
3604 if (start)
f0a26983 3605 ifsq_ifstart_try(ifsq, 1);
3cab6b0d 3606 }
f0a26983
SZ
3607 KKASSERT((stage->stg_flags &
3608 (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0);
28cc0c29 3609 }
ac7fc6f0
SZ
3610
3611 crit_exit();
239bdb58 3612}
239bdb58 3613
b2632176 3614static void
90af4fd3 3615ifnetinit(void *dummy __unused)
b2632176
SZ
3616{
3617 int i;
3618
43dbcc2a 3619 /* XXX netisr_ncpus */
28cc0c29 3620 for (i = 0; i < ncpus; ++i)
f0a26983 3621 TAILQ_INIT(&ifsubq_stage_heads[i].stg_head);
239bdb58 3622 netisr_register_rollup(if_start_rollup, NETISR_ROLLUP_PRIO_IFSTART);
b2632176 3623}
bd08b792 3624
aeb3c11e
RP
3625void
3626if_register_com_alloc(u_char type,
3627 if_com_alloc_t *a, if_com_free_t *f)
3628{
3629
3630 KASSERT(if_com_alloc[type] == NULL,
3631 ("if_register_com_alloc: %d already registered", type));
3632 KASSERT(if_com_free[type] == NULL,
3633 ("if_register_com_alloc: %d free already registered", type));
3634
3635 if_com_alloc[type] = a;
3636 if_com_free[type] = f;
3637}
3638
3639void
3640if_deregister_com_alloc(u_char type)
3641{
3642
3643 KASSERT(if_com_alloc[type] != NULL,
3644 ("if_deregister_com_alloc: %d not registered", type));
3645 KASSERT(if_com_free[type] != NULL,
3646 ("if_deregister_com_alloc: %d free not registered", type));
3647 if_com_alloc[type] = NULL;
3648 if_com_free[type] = NULL;
3649}
a317449e 3650
b7a0c958
SZ
3651void
3652ifq_set_maxlen(struct ifaltq *ifq, int len)
3653{
f0a26983 3654 ifq->altq_maxlen = len + (ncpus * ifsq_stage_cntmax);
b7a0c958 3655}
2cc2f639
SZ
3656
3657int
3658ifq_mapsubq_default(struct ifaltq *ifq __unused, int cpuid __unused)
3659{
3660 return ALTQ_SUBQ_INDEX_DEFAULT;
3661}
8a248085 3662
68732d8f
SZ
3663int
3664ifq_mapsubq_modulo(struct ifaltq *ifq, int cpuid)
3665{
3666
3667 return (cpuid % ifq->altq_subq_mappriv);
c3fb75dd
SZ
3668}
3669
e2292763
MD
3670/*
3671 * Watchdog timeout. Process callback as appropriate. If we cannot
3672 * serialize the ifnet just try again on the next timeout.
3673 *
3674 * NOTE: The ifnet can adjust wd_timer while holding the serializer. We
3675 * can only safely adjust it under the same circumstances.
3676 */
8a248085
SZ
3677static void
3678ifsq_watchdog(void *arg)
3679{
3680 struct ifsubq_watchdog *wd = arg;
3681 struct ifnet *ifp;
e2292763 3682 int count;
8a248085 3683
e2292763
MD
3684 /*
3685 * Fast track. Try to avoid acquiring the serializer when not
3686 * near the terminal count, unless asked to. If the atomic op
3687 * to decrement the count fails just retry on the next callout.
3688 */
3689 count = wd->wd_timer;
3690 cpu_ccfence();
3691 if (count == 0)
3692 goto done;
3693 if (count > 2 && (wd->wd_flags & IF_WDOG_ALLTICKS) == 0) {
3694 (void)atomic_cmpset_int(&wd->wd_timer, count, count - 1);
8a248085 3695 goto done;
e2292763 3696 }
8a248085 3697
e2292763
MD
3698 /*
3699 * Obtain the serializer and then re-test all wd_timer conditions
3700 * as it may have changed. NICs do not mess with wd_timer without
3701 * holding the serializer.
3702 *
3703 * If we are unable to obtain the serializer just retry the same
3704 * count on the next callout.
3705 *
3706 * - call watchdog in terminal count (0)
3707 * - call watchdog on last tick (1) if requested
3708 * - call watchdog on all ticks if requested
3709 */
8a248085 3710 ifp = ifsq_get_ifp(wd->wd_subq);
e2292763
MD
3711 if (ifnet_tryserialize_all(ifp) == 0)
3712 goto done;
3713 if (atomic_cmpset_int(&wd->wd_timer, count, count - 1)) {
3714 --count;
3715 if (count == 0 ||
3716 (wd->wd_flags & IF_WDOG_ALLTICKS) ||
3717 ((wd->wd_flags & IF_WDOG_LASTTICK) && count == 1)) {
3718 wd->wd_watchdog(wd->wd_subq);
3719 }
8a248085 3720 }
e2292763 3721 ifnet_deserialize_all(ifp);
8a248085
SZ
3722done:
3723 ifsq_watchdog_reset(wd);
3724}
3725
3726static void
3727ifsq_watchdog_reset(struct ifsubq_watchdog *wd)
3728{
3729 callout_reset_bycpu(&wd->wd_callout, hz, ifsq_watchdog, wd,
3730 ifsq_get_cpuid(wd->wd_subq));
3731}
3732
3733void
3734ifsq_watchdog_init(struct ifsubq_watchdog *wd, struct ifaltq_subque *ifsq,
e2292763 3735 ifsq_watchdog_t watchdog, int flags)
8a248085
SZ
3736{
3737 callout_init_mp(&wd->wd_callout);
3738 wd->wd_timer = 0;
e2292763 3739 wd->wd_flags = flags;
8a248085
SZ
3740 wd->wd_subq = ifsq;
3741 wd->wd_watchdog = watchdog;
3742}
3743
3744void
3745ifsq_watchdog_start(struct ifsubq_watchdog *wd)
3746{
e2292763 3747 atomic_swap_int(&wd->wd_timer, 0);
8a248085
SZ
3748 ifsq_watchdog_reset(wd);
3749}
3750
3751void
3752ifsq_watchdog_stop(struct ifsubq_watchdog *wd)
3753{
e2292763 3754 atomic_swap_int(&wd->wd_timer, 0);
8a248085
SZ
3755 callout_stop(&wd->wd_callout);
3756}
b4051e25 3757
e2292763
MD
3758void
3759ifsq_watchdog_set_count(struct ifsubq_watchdog *wd, int count)
3760{
3761 atomic_swap_int(&wd->wd_timer, count);
3762}
3763
b4051e25
SZ
3764void
3765ifnet_lock(void)
3766{
3767 KASSERT(curthread->td_type != TD_TYPE_NETISR,
3768 ("try holding ifnet lock in netisr"));
3769 mtx_lock(&ifnet_mtx);
3770}
3771
3772void
3773ifnet_unlock(void)
3774{
3775 KASSERT(curthread->td_type != TD_TYPE_NETISR,
3776 ("try holding ifnet lock in netisr"));
3777 mtx_unlock(&ifnet_mtx);
3778}
3779
3780static struct ifnet_array *
3781ifnet_array_alloc(int count)
3782{
3783 struct ifnet_array *arr;
3784
3785 arr = kmalloc(__offsetof(struct ifnet_array, ifnet_arr[count]),
3786 M_IFNET, M_WAITOK);
3787 arr->ifnet_count = count;
3788
3789 return arr;
3790}
3791
3792static void
3793ifnet_array_free(struct ifnet_array *arr)
3794{
3795 if (arr == &ifnet_array0)
3796 return;
3797 kfree(arr, M_IFNET);
3798}
3799
3800static struct ifnet_array *
3801ifnet_array_add(struct ifnet *ifp, const struct ifnet_array *old_arr)
3802{
3803 struct ifnet_array *arr;
3804 int count, i;
3805
3806 KASSERT(old_arr->ifnet_count >= 0,
3807 ("invalid ifnet array count %d", old_arr->ifnet_count));
3808 count = old_arr->ifnet_count + 1;
3809 arr = ifnet_array_alloc(count);
3810
3811 /*
3812 * Save the old ifnet array and append this ifp to the end of
3813 * the new ifnet array.
3814 */
3815 for (i = 0; i < old_arr->ifnet_count; ++i) {
3816 KASSERT(old_arr->ifnet_arr[i] != ifp,
3817 ("%s is already in ifnet array", ifp->if_xname));
3818 arr->ifnet_arr[i] = old_arr->ifnet_arr[i];
3819 }
3820 KASSERT(i == count - 1,
3821 ("add %s, ifnet array index mismatch, should be %d, but got %d",
3822 ifp->if_xname, count - 1, i));
3823 arr->ifnet_arr[i] = ifp;
3824
3825 return arr;
3826}
3827
3828static struct ifnet_array *
3829ifnet_array_del(struct ifnet *ifp, const struct ifnet_array *old_arr)
3830{
3831 struct ifnet_array *arr;
3832 int count, i, idx, found = 0;
3833
3834 KASSERT(old_arr->ifnet_count > 0,
3835 ("invalid ifnet array count %d", old_arr->ifnet_count));
3836 count = old_arr->ifnet_count - 1;
3837 arr = ifnet_array_alloc(count);
3838
3839 /*
3840 * Save the old ifnet array, but skip this ifp.
3841 */
3842 idx = 0;
3843 for (i = 0; i < old_arr->ifnet_count; ++i) {
3844 if (old_arr->ifnet_arr[i] == ifp) {
3845 KASSERT(!found,
3846 ("dup %s is in ifnet array", ifp->if_xname));
3847 found = 1;
3848 continue;
3849 }
3850 KASSERT(idx < count,
3851 ("invalid ifnet array index %d, count %d", idx, count));
3852 arr->ifnet_arr[idx] = old_arr->ifnet_arr[i];
3853 ++idx;
3854 }
3855 KASSERT(found, ("%s is not in ifnet array", ifp->if_xname));
3856 KASSERT(idx == count,
3857 ("del %s, ifnet array count mismatch, should be %d, but got %d ",
3858 ifp->if_xname, count, idx));
3859
3860 return arr;
3861}
3862
3863const struct ifnet_array *
3864ifnet_array_get(void)
3865{
a9821961
SZ
3866 const struct ifnet_array *ret;
3867
b4051e25 3868 KASSERT(curthread->td_type == TD_TYPE_NETISR, ("not in netisr"));
a9821961
SZ
3869 ret = ifnet_array;
3870 /* Make sure 'ret' is really used. */
3871 cpu_ccfence();
3872 return (ret);
b4051e25
SZ
3873}
3874
3875int
3876ifnet_array_isempty(void)
3877{
3878 KASSERT(curthread->td_type == TD_TYPE_NETISR, ("not in netisr"));
3879 if (ifnet_array->ifnet_count == 0)
3880 return 1;
3881 else
3882 return 0;
3883}
9a74b592
SZ
3884
3885void
3886ifa_marker_init(struct ifaddr_marker *mark, struct ifnet *ifp)
3887{
3888 struct ifaddr *ifa;
3889
3890 memset(mark, 0, sizeof(*mark));
3891 ifa = &mark->ifa;
3892
3893 mark->ifac.ifa = ifa;
3894
3895 ifa->ifa_addr = &mark->addr;
3896 ifa->ifa_dstaddr = &mark->dstaddr;
3897 ifa->ifa_netmask = &mark->netmask;
3898 ifa->ifa_ifp = ifp;
3899}
68732d8f
SZ
3900
3901static int
3902if_ringcnt_fixup(int ring_cnt, int ring_cntmax)
3903{
3904
3905 KASSERT(ring_cntmax > 0, ("invalid ring count max %d", ring_cntmax));
68732d8f
SZ
3906
3907 if (ring_cnt <= 0 || ring_cnt > ring_cntmax)
3908 ring_cnt = ring_cntmax;
3909 if (ring_cnt > netisr_ncpus)
3910 ring_cnt = netisr_ncpus;
3911 return (ring_cnt);
3912}
3913
3914static void
3915if_ringmap_set_grid(device_t dev, struct if_ringmap *rm, int grid)
3916{
3917 int i, offset;
3918
3919 KASSERT(grid > 0, ("invalid if_ringmap grid %d", grid));
d45c022d
SZ
3920 KASSERT(grid >= rm->rm_cnt, ("invalid if_ringmap grid %d, count %d",
3921 grid, rm->rm_cnt));
68732d8f
SZ
3922 rm->rm_grid = grid;
3923
3924 offset = (rm->rm_grid * device_get_unit(dev)) % netisr_ncpus;
a6450489
SZ
3925 for (i = 0; i < rm->rm_cnt; ++i) {
3926 rm->rm_cpumap[i] = offset + i;
3927 KASSERT(rm->rm_cpumap[i] < netisr_ncpus,
3928 ("invalid cpumap[%d] = %d, offset %d", i,
3929 rm->rm_cpumap[i], offset));
3930 }
68732d8f
SZ
3931}
3932
434f3dd0
SZ
3933static struct if_ringmap *
3934if_ringmap_alloc_flags(device_t dev, int ring_cnt, int ring_cntmax,
3935 uint32_t flags)
68732d8f
SZ
3936{
3937 struct if_ringmap *rm;
75b7fb90 3938 int i, grid = 0, prev_grid;
68732d8f
SZ
3939
3940 ring_cnt = if_ringcnt_fixup(ring_cnt, ring_cntmax);
3941 rm = kmalloc(__offsetof(struct if_ringmap, rm_cpumap[ring_cnt]),
3942 M_DEVBUF, M_WAITOK | M_ZERO);
3943
3944 rm->rm_cnt = ring_cnt;
434f3dd0
SZ
3945 if (flags & RINGMAP_FLAG_POWEROF2)
3946 rm->rm_cnt = 1 << (fls(rm->rm_cnt) - 1);
3947
75b7fb90 3948 prev_grid = netisr_ncpus;
68732d8f
SZ
3949 for (i = 0; i < netisr_ncpus; ++i) {
3950 if (netisr_ncpus % (i + 1) != 0)
3951 continue;
3952
75b7fb90
SZ
3953 grid = netisr_ncpus / (i + 1);
3954 if (rm->rm_cnt > grid) {
3955 grid = prev_grid;
68732d8f
SZ
3956 break;
3957 }
75b7fb90
SZ
3958
3959 if (rm->rm_cnt > netisr_ncpus / (i + 2))
3960 break;
3961 prev_grid = grid;
68732d8f
SZ
3962 }
3963 if_ringmap_set_grid(dev, rm, grid);
3964
3965 return (rm);
3966}
3967
434f3dd0
SZ
3968struct if_ringmap *
3969if_ringmap_alloc(device_t dev, int ring_cnt, int ring_cntmax)
3970{
3971
3972 return (if_ringmap_alloc_flags(dev, ring_cnt, ring_cntmax,
3973 RINGMAP_FLAG_NONE));
3974}
3975
3976struct if_ringmap *
3977if_ringmap_alloc2(device_t dev, int ring_cnt, int ring_cntmax)
3978{
3979
3980 return (if_ringmap_alloc_flags(dev, ring_cnt, ring_cntmax,
3981 RINGMAP_FLAG_POWEROF2));
3982}
3983
68732d8f
SZ
3984void
3985if_ringmap_free(struct if_ringmap *rm)
3986{
3987
3988 kfree(rm, M_DEVBUF);
3989}
3990
a0964e91
SZ
3991/*
3992 * Align the two ringmaps.
3993 *
3994 * e.g. 8 netisrs, rm0 contains 4 rings, rm1 contains 2 rings.
3995 *
3996 * Before:
3997 *
3998 * CPU 0 1 2 3 4 5 6 7
3999 * NIC_RX n0 n1 n2 n3
4000 * NIC_TX N0 N1
4001 *
4002 * After:
4003 *
4004 * CPU 0 1 2 3 4 5 6 7
4005 * NIC_RX n0 n1 n2 n3
4006 * NIC_TX N0 N1
4007 */
68732d8f
SZ
4008void
4009if_ringmap_align(device_t dev, struct if_ringmap *rm0, struct if_ringmap *rm1)
4010{
4011
4012 if (rm0->rm_grid > rm1->rm_grid)
4013 if_ringmap_set_grid(dev, rm1, rm0->rm_grid);
4014 else if (rm0->rm_grid < rm1->rm_grid)
4015 if_ringmap_set_grid(dev, rm0, rm1->rm_grid);
4016}
4017
4018void
4019if_ringmap_match(device_t dev, struct if_ringmap *rm0, struct if_ringmap *rm1)
4020{
a6450489
SZ
4021 int subset_grid, cnt, divisor, mod, offset, i;
4022 struct if_ringmap *subset_rm, *rm;
4023 int old_rm0_grid, old_rm1_grid;
68732d8f 4024
a6450489 4025 if (rm0->rm_grid == rm1->rm_grid)
68732d8f 4026 return;
a6450489
SZ
4027
4028 /* Save grid for later use */
4029 old_rm0_grid = rm0->rm_grid;
4030 old_rm1_grid = rm1->rm_grid;
4031
68732d8f 4032 if_ringmap_align(dev, rm0, rm1);
a6450489 4033
a0964e91
SZ
4034 /*
4035 * Re-shuffle rings to get more even distribution.
4036 *
4037 * e.g. 12 netisrs, rm0 contains 4 rings, rm1 contains 2 rings.
4038 *
4039 * CPU 0 1 2 3 4 5 6 7 8 9 10 11
4040 *
4041 * NIC_RX a0 a1 a2 a3 b0 b1 b2 b3 c0 c1 c2 c3
4042 * NIC_TX A0 A1 B0 B1 C0 C1
4043 *
4044 * NIC_RX d0 d1 d2 d3 e0 e1 e2 e3 f0 f1 f2 f3
4045 * NIC_TX D0 D1 E0 E1 F0 F1
4046 */
4047
a6450489
SZ
4048 if (rm0->rm_cnt >= (2 * old_rm1_grid)) {
4049 cnt = rm0->rm_cnt;
4050 subset_grid = old_rm1_grid;
4051 subset_rm = rm1;
4052 rm = rm0;
4053 } else if (rm1->rm_cnt > (2 * old_rm0_grid)) {
4054 cnt = rm1->rm_cnt;
4055 subset_grid = old_rm0_grid;
4056 subset_rm = rm0;
4057 rm = rm1;
4058 } else {
a0964e91 4059 /* No space to shuffle. */
a6450489
SZ
4060 return;
4061 }
4062
4063 mod = cnt / subset_grid;
4064 KKASSERT(mod >= 2);
4065 divisor = netisr_ncpus / rm->rm_grid;
4066 offset = ((device_get_unit(dev) / divisor) % mod) * subset_grid;
4067
4068 for (i = 0; i < subset_rm->rm_cnt; ++i) {
4069 subset_rm->rm_cpumap[i] += offset;
4070 KASSERT(subset_rm->rm_cpumap[i] < netisr_ncpus,
4071 ("match: invalid cpumap[%d] = %d, offset %d",
4072 i, subset_rm->rm_cpumap[i], offset));
4073 }
4074#ifdef INVARIANTS
4075 for (i = 0; i < subset_rm->rm_cnt; ++i) {
4076 int j;
4077
4078 for (j = 0; j < rm->rm_cnt; ++j) {
4079 if (rm->rm_cpumap[j] == subset_rm->rm_cpumap[i])
4080 break;
4081 }
4082 KASSERT(j < rm->rm_cnt,
4083 ("subset cpumap[%d] = %d not found in superset",
4084 i, subset_rm->rm_cpumap[i]));
4085 }
4086#endif
68732d8f
SZ
4087}
4088
4089int
4090if_ringmap_count(const struct if_ringmap *rm)
4091{
4092
4093 return (rm->rm_cnt);
4094}
4095
4096int
4097if_ringmap_cpumap(const struct if_ringmap *rm, int ring)
4098{
4099
4100 KASSERT(ring >= 0 && ring < rm->rm_cnt, ("invalid ring %d", ring));
4101 return (rm->rm_cpumap[ring]);
4102}
4103
4104void
4105if_ringmap_rdrtable(const struct if_ringmap *rm, int table[], int table_nent)
4106{
4107 int i, grid_idx, grid_cnt, patch_off, patch_cnt, ncopy;
4108
4109 KASSERT(table_nent > 0 && (table_nent & NETISR_CPUMASK) == 0,
4110 ("invalid redirect table entries %d", table_nent));
4111
4112 grid_idx = 0;
4113 for (i = 0; i < NETISR_CPUMAX; ++i) {
4114 table[i] = grid_idx++ % rm->rm_cnt;
4115
4116 if (grid_idx == rm->rm_grid)
4117 grid_idx = 0;
4118 }
4119
4120 /*
a0964e91
SZ
4121 * Make the ring distributed more evenly for the remainder
4122 * of each grid.
4123 *
4124 * e.g. 12 netisrs, rm contains 8 rings.
4125 *
4126 * Redirect table before:
4127 *
4128 * 0 1 2 3 4 5 6 7 0 1 2 3 0 1 2 3
4129 * 4 5 6 7 0 1 2 3 0 1 2 3 4 5 6 7
4130 * 0 1 2 3 0 1 2 3 4 5 6 7 0 1 2 3
4131 * ....
4132 *
4133 * Redirect table after being patched (pX, patched entries):
4134 *
4135 * 0 1 2 3 4 5 6 7 p0 p1 p2 p3 0 1 2 3
4136 * 4 5 6 7 p4 p5 p6 p7 0 1 2 3 4 5 6 7
4137 * p0 p1 p2 p3 0 1 2 3 4 5 6 7 p4 p5 p6 p7
4138 * ....
68732d8f
SZ
4139 */
4140 patch_cnt = rm->rm_grid % rm->rm_cnt;
4141 if (patch_cnt == 0)
4142 goto done;
4143 patch_off = rm->rm_grid - (rm->rm_grid % rm->rm_cnt);
4144
4145 grid_cnt = roundup(NETISR_CPUMAX, rm->rm_grid) / rm->rm_grid;
4146 grid_idx = 0;
4147 for (i = 0; i < grid_cnt; ++i) {
4148 int j;
4149
4150 for (j = 0; j < patch_cnt; ++j) {
4151 int fix_idx;
4152
4153 fix_idx = (i * rm->rm_grid) + patch_off + j;
4154 if (fix_idx >= NETISR_CPUMAX)
4155 goto done;
4156 table[fix_idx] = grid_idx++ % rm->rm_cnt;
4157 }
4158 }
4159done:
a0964e91
SZ
4160 /*
4161 * If the device supports larger redirect table, duplicate
4162 * the first NETISR_CPUMAX entries to the rest of the table,
4163 * so that it matches upper layer's expectation:
4164 * (hash & NETISR_CPUMASK) % netisr_ncpus
4165 */
68732d8f
SZ
4166 ncopy = table_nent / NETISR_CPUMAX;
4167 for (i = 1; i < ncopy; ++i) {
4168 memcpy(&table[i * NETISR_CPUMAX], table,
4169 NETISR_CPUMAX * sizeof(table[0]));
4170 }
4171 if (if_ringmap_dumprdr) {
4172 for (i = 0; i < table_nent; ++i) {
4173 if (i != 0 && i % 16 == 0)
4174 kprintf("\n");
4175 kprintf("%03d ", table[i]);
4176 }
4177 kprintf("\n");
4178 }
4179}
4180
4181int
4182if_ringmap_cpumap_sysctl(SYSCTL_HANDLER_ARGS)
4183{
4184 struct if_ringmap *rm = arg1;
4185 int i, error = 0;
4186
4187 for (i = 0; i < rm->rm_cnt; ++i) {
4188 int cpu = rm->rm_cpumap[i];
4189
4190 error = SYSCTL_OUT(req, &cpu, sizeof(cpu));
4191 if (error)
4192 break;
4193 }
4194 return (error);
4195}