Commit | Line | Data |
---|---|---|
984263bc MD |
1 | /* |
2 | * Copyright (c) 1980, 1986, 1993 | |
3 | * The Regents of the University of California. All rights reserved. | |
4 | * | |
5 | * Redistribution and use in source and binary forms, with or without | |
6 | * modification, are permitted provided that the following conditions | |
7 | * are met: | |
8 | * 1. Redistributions of source code must retain the above copyright | |
9 | * notice, this list of conditions and the following disclaimer. | |
10 | * 2. Redistributions in binary form must reproduce the above copyright | |
11 | * notice, this list of conditions and the following disclaimer in the | |
12 | * documentation and/or other materials provided with the distribution. | |
dc71b7ab | 13 | * 3. Neither the name of the University nor the names of its contributors |
984263bc MD |
14 | * may be used to endorse or promote products derived from this software |
15 | * without specific prior written permission. | |
16 | * | |
17 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
27 | * SUCH DAMAGE. | |
28 | * | |
29 | * @(#)if.c 8.3 (Berkeley) 1/4/94 | |
f23061d4 | 30 | * $FreeBSD: src/sys/net/if.c,v 1.185 2004/03/13 02:35:03 brooks Exp $ |
984263bc MD |
31 | */ |
32 | ||
984263bc MD |
33 | #include "opt_inet6.h" |
34 | #include "opt_inet.h" | |
b3a7093f | 35 | #include "opt_ifpoll.h" |
984263bc MD |
36 | |
37 | #include <sys/param.h> | |
38 | #include <sys/malloc.h> | |
39 | #include <sys/mbuf.h> | |
40 | #include <sys/systm.h> | |
41 | #include <sys/proc.h> | |
2b3f93ea | 42 | #include <sys/caps.h> |
6b6e0885 | 43 | #include <sys/protosw.h> |
984263bc MD |
44 | #include <sys/socket.h> |
45 | #include <sys/socketvar.h> | |
6b6e0885 | 46 | #include <sys/socketops.h> |
984263bc | 47 | #include <sys/kernel.h> |
9db4b353 | 48 | #include <sys/ktr.h> |
9683f229 | 49 | #include <sys/mutex.h> |
233c8570 | 50 | #include <sys/lock.h> |
984263bc MD |
51 | #include <sys/sockio.h> |
52 | #include <sys/syslog.h> | |
53 | #include <sys/sysctl.h> | |
698ac46c | 54 | #include <sys/domain.h> |
e9cb6d99 | 55 | #include <sys/thread.h> |
78195a76 | 56 | #include <sys/serialize.h> |
71fc104f | 57 | #include <sys/bus.h> |
e1c6b0c1 | 58 | #include <sys/jail.h> |
984263bc | 59 | |
9683f229 MD |
60 | #include <sys/thread2.h> |
61 | #include <sys/msgport2.h> | |
62 | #include <sys/mutex2.h> | |
63 | ||
984263bc MD |
64 | #include <net/if.h> |
65 | #include <net/if_arp.h> | |
66 | #include <net/if_dl.h> | |
67 | #include <net/if_types.h> | |
68 | #include <net/if_var.h> | |
afc5d5f3 | 69 | #include <net/if_ringmap.h> |
4d723e5a | 70 | #include <net/ifq_var.h> |
984263bc MD |
71 | #include <net/radix.h> |
72 | #include <net/route.h> | |
65a24520 | 73 | #include <net/if_clone.h> |
5337421c | 74 | #include <net/netisr2.h> |
b2632176 SZ |
75 | #include <net/netmsg2.h> |
76 | ||
d5a2b87c | 77 | #include <machine/atomic.h> |
984263bc | 78 | #include <machine/stdarg.h> |
b2632176 | 79 | #include <machine/smp.h> |
984263bc MD |
80 | |
81 | #if defined(INET) || defined(INET6) | |
984263bc MD |
82 | #include <netinet/in.h> |
83 | #include <netinet/in_var.h> | |
84 | #include <netinet/if_ether.h> | |
85 | #ifdef INET6 | |
984263bc MD |
86 | #include <netinet6/in6_var.h> |
87 | #include <netinet6/in6_ifattach.h> | |
233c8570 AL |
88 | #endif /* INET6 */ |
89 | #endif /* INET || INET6 */ | |
984263bc | 90 | |
b2632176 | 91 | struct netmsg_ifaddr { |
002c1265 | 92 | struct netmsg_base base; |
b2632176 SZ |
93 | struct ifaddr *ifa; |
94 | struct ifnet *ifp; | |
95 | int tail; | |
96 | }; | |
97 | ||
f0a26983 SZ |
98 | struct ifsubq_stage_head { |
99 | TAILQ_HEAD(, ifsubq_stage) stg_head; | |
28cc0c29 SZ |
100 | } __cachealign; |
101 | ||
68732d8f SZ |
102 | struct if_ringmap { |
103 | int rm_cnt; | |
104 | int rm_grid; | |
105 | int rm_cpumap[]; | |
106 | }; | |
107 | ||
434f3dd0 SZ |
108 | #define RINGMAP_FLAG_NONE 0x0 |
109 | #define RINGMAP_FLAG_POWEROF2 0x1 | |
110 | ||
984263bc MD |
111 | /* |
112 | * System initialization | |
113 | */ | |
698ac46c HS |
114 | static void if_attachdomain(void *); |
115 | static void if_attachdomain1(struct ifnet *); | |
436c57ea SZ |
116 | static int ifconf(u_long, caddr_t, struct ucred *); |
117 | static void ifinit(void *); | |
90af4fd3 | 118 | static void ifnetinit(void *); |
436c57ea | 119 | static void if_slowtimo(void *); |
436c57ea | 120 | static int if_rtdel(struct radix_node *, void *); |
b5df1a85 | 121 | static void if_slowtimo_dispatch(netmsg_t); |
984263bc | 122 | |
8a248085 SZ |
123 | /* Helper functions */ |
124 | static void ifsq_watchdog_reset(struct ifsubq_watchdog *); | |
72659ed0 | 125 | static int if_delmulti_serialized(struct ifnet *, struct sockaddr *); |
b4051e25 SZ |
126 | static struct ifnet_array *ifnet_array_alloc(int); |
127 | static void ifnet_array_free(struct ifnet_array *); | |
128 | static struct ifnet_array *ifnet_array_add(struct ifnet *, | |
129 | const struct ifnet_array *); | |
130 | static struct ifnet_array *ifnet_array_del(struct ifnet *, | |
131 | const struct ifnet_array *); | |
233c8570 AL |
132 | static struct ifg_group *if_creategroup(const char *); |
133 | static int if_destroygroup(struct ifg_group *); | |
134 | static int if_delgroup_locked(struct ifnet *, const char *); | |
135 | static int if_getgroups(struct ifgroupreq *, struct ifnet *); | |
136 | static int if_getgroupmembers(struct ifgroupreq *); | |
8a248085 | 137 | |
984263bc MD |
138 | #ifdef INET6 |
139 | /* | |
140 | * XXX: declare here to avoid to include many inet6 related files.. | |
141 | * should be more generalized? | |
142 | */ | |
436c57ea | 143 | extern void nd6_setmtu(struct ifnet *); |
984263bc MD |
144 | #endif |
145 | ||
436c57ea SZ |
146 | SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers"); |
147 | SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management"); | |
68732d8f | 148 | SYSCTL_NODE(_net_link, OID_AUTO, ringmap, CTLFLAG_RW, 0, "link ringmap"); |
436c57ea | 149 | |
335a88d5 | 150 | static int ifsq_stage_cntmax = 16; |
f0a26983 | 151 | TUNABLE_INT("net.link.stage_cntmax", &ifsq_stage_cntmax); |
28cc0c29 | 152 | SYSCTL_INT(_net_link, OID_AUTO, stage_cntmax, CTLFLAG_RW, |
f0a26983 | 153 | &ifsq_stage_cntmax, 0, "ifq staging packet count max"); |
28cc0c29 | 154 | |
6517ec3f SZ |
155 | static int if_stats_compat = 0; |
156 | SYSCTL_INT(_net_link, OID_AUTO, stats_compat, CTLFLAG_RW, | |
157 | &if_stats_compat, 0, "Compat the old ifnet stats"); | |
158 | ||
68732d8f SZ |
159 | static int if_ringmap_dumprdr = 0; |
160 | SYSCTL_INT(_net_link_ringmap, OID_AUTO, dump_rdr, CTLFLAG_RW, | |
161 | &if_ringmap_dumprdr, 0, "dump redirect table"); | |
162 | ||
f6994c54 AHJ |
163 | /* Interface description */ |
164 | static unsigned int ifdescr_maxlen = 1024; | |
165 | SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW, | |
166 | &ifdescr_maxlen, 0, | |
167 | "administrative maximum length for interface description"); | |
168 | ||
f3f3eadb | 169 | SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, ifinit, NULL); |
3c5b1eb8 | 170 | SYSINIT(ifnet, SI_SUB_PRE_DRIVERS, SI_ORDER_ANY, ifnetinit, NULL); |
436c57ea | 171 | |
2949c680 AL |
172 | static if_com_alloc_t *if_com_alloc[256]; |
173 | static if_com_free_t *if_com_free[256]; | |
aeb3c11e | 174 | |
436c57ea SZ |
175 | MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); |
176 | MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); | |
cb80735c | 177 | MALLOC_DEFINE(M_IFNET, "ifnet", "interface structure"); |
f6994c54 | 178 | MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions"); |
984263bc | 179 | |
436c57ea | 180 | int ifqmaxlen = IFQ_MAXLEN; |
b64bfcc3 | 181 | struct ifnethead ifnet = TAILQ_HEAD_INITIALIZER(ifnet); |
233c8570 AL |
182 | struct ifgrouphead ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head); |
183 | static struct lock ifgroup_lock; | |
984263bc | 184 | |
b4051e25 SZ |
185 | static struct ifnet_array ifnet_array0; |
186 | static struct ifnet_array *ifnet_array = &ifnet_array0; | |
187 | ||
b5df1a85 SZ |
188 | static struct callout if_slowtimo_timer; |
189 | static struct netmsg_base if_slowtimo_netmsg; | |
436c57ea SZ |
190 | |
191 | int if_index = 0; | |
192 | struct ifnet **ifindex2ifnet = NULL; | |
cabfc9f6 | 193 | static struct mtx ifnet_mtx = MTX_INITIALIZER("ifnet"); |
abbb44bb | 194 | |
f0a26983 | 195 | static struct ifsubq_stage_head ifsubq_stage_heads[MAXCPU]; |
28cc0c29 | 196 | |
f0a26983 | 197 | #ifdef notyet |
9db4b353 | 198 | #define IFQ_KTR_STRING "ifq=%p" |
2949c680 | 199 | #define IFQ_KTR_ARGS struct ifaltq *ifq |
9db4b353 SZ |
200 | #ifndef KTR_IFQ |
201 | #define KTR_IFQ KTR_ALL | |
202 | #endif | |
203 | KTR_INFO_MASTER(ifq); | |
5bf48697 AE |
204 | KTR_INFO(KTR_IFQ, ifq, enqueue, 0, IFQ_KTR_STRING, IFQ_KTR_ARGS); |
205 | KTR_INFO(KTR_IFQ, ifq, dequeue, 1, IFQ_KTR_STRING, IFQ_KTR_ARGS); | |
9db4b353 SZ |
206 | #define logifq(name, arg) KTR_LOG(ifq_ ## name, arg) |
207 | ||
208 | #define IF_START_KTR_STRING "ifp=%p" | |
5bf48697 | 209 | #define IF_START_KTR_ARGS struct ifnet *ifp |
9db4b353 SZ |
210 | #ifndef KTR_IF_START |
211 | #define KTR_IF_START KTR_ALL | |
212 | #endif | |
213 | KTR_INFO_MASTER(if_start); | |
214 | KTR_INFO(KTR_IF_START, if_start, run, 0, | |
5bf48697 | 215 | IF_START_KTR_STRING, IF_START_KTR_ARGS); |
9db4b353 | 216 | KTR_INFO(KTR_IF_START, if_start, sched, 1, |
5bf48697 | 217 | IF_START_KTR_STRING, IF_START_KTR_ARGS); |
9db4b353 | 218 | KTR_INFO(KTR_IF_START, if_start, avoid, 2, |
5bf48697 | 219 | IF_START_KTR_STRING, IF_START_KTR_ARGS); |
9db4b353 | 220 | KTR_INFO(KTR_IF_START, if_start, contend_sched, 3, |
5bf48697 | 221 | IF_START_KTR_STRING, IF_START_KTR_ARGS); |
9db4b353 | 222 | KTR_INFO(KTR_IF_START, if_start, chase_sched, 4, |
5bf48697 | 223 | IF_START_KTR_STRING, IF_START_KTR_ARGS); |
9db4b353 | 224 | #define logifstart(name, arg) KTR_LOG(if_start_ ## name, arg) |
233c8570 | 225 | #endif /* notyet */ |
315a7da3 | 226 | |
984263bc MD |
227 | /* |
228 | * Network interface utility routines. | |
229 | * | |
230 | * Routines with ifa_ifwith* names take sockaddr *'s as | |
231 | * parameters. | |
232 | */ | |
2949c680 | 233 | /* ARGSUSED */ |
c660ad18 | 234 | static void |
f23061d4 | 235 | ifinit(void *dummy) |
984263bc | 236 | { |
233c8570 | 237 | lockinit(&ifgroup_lock, "ifgroup", 0, 0); |
984263bc | 238 | |
b5df1a85 SZ |
239 | callout_init_mp(&if_slowtimo_timer); |
240 | netmsg_init(&if_slowtimo_netmsg, NULL, &netisr_adone_rport, | |
241 | MSGF_PRIORITY, if_slowtimo_dispatch); | |
abbb44bb | 242 | |
b5df1a85 SZ |
243 | /* Start if_slowtimo */ |
244 | lwkt_sendmsg(netisr_cpuport(0), &if_slowtimo_netmsg.lmsg); | |
984263bc MD |
245 | } |
246 | ||
9db4b353 | 247 | static void |
f0a26983 | 248 | ifsq_ifstart_ipifunc(void *arg) |
9db4b353 | 249 | { |
f0a26983 SZ |
250 | struct ifaltq_subque *ifsq = arg; |
251 | struct lwkt_msg *lmsg = ifsq_get_ifstart_lmsg(ifsq, mycpuid); | |
9db4b353 SZ |
252 | |
253 | crit_enter(); | |
254 | if (lmsg->ms_flags & MSGF_DONE) | |
f6192acf | 255 | lwkt_sendmsg_oncpu(netisr_cpuport(mycpuid), lmsg); |
9db4b353 SZ |
256 | crit_exit(); |
257 | } | |
258 | ||
3cab6b0d | 259 | static __inline void |
f0a26983 | 260 | ifsq_stage_remove(struct ifsubq_stage_head *head, struct ifsubq_stage *stage) |
3cab6b0d | 261 | { |
f0a26983 SZ |
262 | KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED); |
263 | TAILQ_REMOVE(&head->stg_head, stage, stg_link); | |
264 | stage->stg_flags &= ~(IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED); | |
265 | stage->stg_cnt = 0; | |
266 | stage->stg_len = 0; | |
3cab6b0d SZ |
267 | } |
268 | ||
269 | static __inline void | |
f0a26983 | 270 | ifsq_stage_insert(struct ifsubq_stage_head *head, struct ifsubq_stage *stage) |
3cab6b0d | 271 | { |
f0a26983 SZ |
272 | KKASSERT((stage->stg_flags & |
273 | (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0); | |
274 | stage->stg_flags |= IFSQ_STAGE_FLAG_QUED; | |
275 | TAILQ_INSERT_TAIL(&head->stg_head, stage, stg_link); | |
3cab6b0d SZ |
276 | } |
277 | ||
9db4b353 | 278 | /* |
5c593c2a | 279 | * Schedule ifnet.if_start on the subqueue owner CPU |
9db4b353 SZ |
280 | */ |
281 | static void | |
f0a26983 | 282 | ifsq_ifstart_schedule(struct ifaltq_subque *ifsq, int force) |
9db4b353 | 283 | { |
9db4b353 SZ |
284 | int cpu; |
285 | ||
3cab6b0d | 286 | if (!force && curthread->td_type == TD_TYPE_NETISR && |
f0a26983 SZ |
287 | ifsq_stage_cntmax > 0) { |
288 | struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid); | |
289 | ||
290 | stage->stg_cnt = 0; | |
291 | stage->stg_len = 0; | |
292 | if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0) | |
293 | ifsq_stage_insert(&ifsubq_stage_heads[mycpuid], stage); | |
294 | stage->stg_flags |= IFSQ_STAGE_FLAG_SCHED; | |
3cab6b0d SZ |
295 | return; |
296 | } | |
297 | ||
f0a26983 | 298 | cpu = ifsq_get_cpuid(ifsq); |
9db4b353 | 299 | if (cpu != mycpuid) |
f0a26983 | 300 | lwkt_send_ipiq(globaldata_find(cpu), ifsq_ifstart_ipifunc, ifsq); |
9db4b353 | 301 | else |
f0a26983 | 302 | ifsq_ifstart_ipifunc(ifsq); |
9db4b353 SZ |
303 | } |
304 | ||
305 | /* | |
306 | * NOTE: | |
5c593c2a SZ |
307 | * This function will release ifnet.if_start subqueue interlock, |
308 | * if ifnet.if_start for the subqueue does not need to be scheduled | |
9db4b353 SZ |
309 | */ |
310 | static __inline int | |
f0a26983 | 311 | ifsq_ifstart_need_schedule(struct ifaltq_subque *ifsq, int running) |
9db4b353 | 312 | { |
f0a26983 | 313 | if (!running || ifsq_is_empty(ifsq) |
9db4b353 | 314 | #ifdef ALTQ |
f0a26983 | 315 | || ifsq->ifsq_altq->altq_tbr != NULL |
9db4b353 SZ |
316 | #endif |
317 | ) { | |
f0a26983 | 318 | ALTQ_SQ_LOCK(ifsq); |
9db4b353 | 319 | /* |
5c593c2a | 320 | * ifnet.if_start subqueue interlock is released, if: |
9db4b353 SZ |
321 | * 1) Hardware can not take any packets, due to |
322 | * o interface is marked down | |
5c593c2a | 323 | * o hardware queue is full (ifsq_is_oactive) |
9db4b353 SZ |
324 | * Under the second situation, hardware interrupt |
325 | * or polling(4) will call/schedule ifnet.if_start | |
5c593c2a SZ |
326 | * on the subqueue when hardware queue is ready |
327 | * 2) There is no packet in the subqueue. | |
9db4b353 | 328 | * Further ifq_dispatch or ifq_handoff will call/ |
5c593c2a | 329 | * schedule ifnet.if_start on the subqueue. |
9db4b353 SZ |
330 | * 3) TBR is used and it does not allow further |
331 | * dequeueing. | |
5c593c2a SZ |
332 | * TBR callout will call ifnet.if_start on the |
333 | * subqueue. | |
9db4b353 | 334 | */ |
f0a26983 SZ |
335 | if (!running || !ifsq_data_ready(ifsq)) { |
336 | ifsq_clr_started(ifsq); | |
337 | ALTQ_SQ_UNLOCK(ifsq); | |
9db4b353 SZ |
338 | return 0; |
339 | } | |
f0a26983 | 340 | ALTQ_SQ_UNLOCK(ifsq); |
9db4b353 SZ |
341 | } |
342 | return 1; | |
343 | } | |
344 | ||
345 | static void | |
f0a26983 | 346 | ifsq_ifstart_dispatch(netmsg_t msg) |
9db4b353 | 347 | { |
002c1265 | 348 | struct lwkt_msg *lmsg = &msg->base.lmsg; |
f0a26983 SZ |
349 | struct ifaltq_subque *ifsq = lmsg->u.ms_resultp; |
350 | struct ifnet *ifp = ifsq_get_ifp(ifsq); | |
ac7fc6f0 | 351 | struct globaldata *gd = mycpu; |
404c9fd9 | 352 | int running = 0, need_sched; |
9db4b353 | 353 | |
ac7fc6f0 SZ |
354 | crit_enter_gd(gd); |
355 | ||
9db4b353 | 356 | lwkt_replymsg(lmsg, 0); /* reply ASAP */ |
9db4b353 | 357 | |
ac7fc6f0 | 358 | if (gd->gd_cpuid != ifsq_get_cpuid(ifsq)) { |
9db4b353 | 359 | /* |
5c593c2a | 360 | * We need to chase the subqueue owner CPU change. |
9db4b353 | 361 | */ |
f0a26983 | 362 | ifsq_ifstart_schedule(ifsq, 1); |
ac7fc6f0 | 363 | crit_exit_gd(gd); |
404c9fd9 | 364 | return; |
9db4b353 | 365 | } |
9db4b353 | 366 | |
bfefe4a6 | 367 | ifsq_serialize_hw(ifsq); |
f0a26983 SZ |
368 | if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) { |
369 | ifp->if_start(ifp, ifsq); | |
370 | if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) | |
404c9fd9 | 371 | running = 1; |
9db4b353 | 372 | } |
f0a26983 | 373 | need_sched = ifsq_ifstart_need_schedule(ifsq, running); |
bfefe4a6 | 374 | ifsq_deserialize_hw(ifsq); |
404c9fd9 SZ |
375 | |
376 | if (need_sched) { | |
2b2f1d64 SZ |
377 | /* |
378 | * More data need to be transmitted, ifnet.if_start is | |
5c593c2a SZ |
379 | * scheduled on the subqueue owner CPU, and we keep going. |
380 | * NOTE: ifnet.if_start subqueue interlock is not released. | |
2b2f1d64 | 381 | */ |
f0a26983 | 382 | ifsq_ifstart_schedule(ifsq, 0); |
9db4b353 | 383 | } |
ac7fc6f0 SZ |
384 | |
385 | crit_exit_gd(gd); | |
9db4b353 SZ |
386 | } |
387 | ||
388 | /* Device driver ifnet.if_start helper function */ | |
389 | void | |
f0a26983 | 390 | ifsq_devstart(struct ifaltq_subque *ifsq) |
9db4b353 | 391 | { |
f0a26983 | 392 | struct ifnet *ifp = ifsq_get_ifp(ifsq); |
9db4b353 SZ |
393 | int running = 0; |
394 | ||
bfefe4a6 | 395 | ASSERT_ALTQ_SQ_SERIALIZED_HW(ifsq); |
9db4b353 | 396 | |
f0a26983 SZ |
397 | ALTQ_SQ_LOCK(ifsq); |
398 | if (ifsq_is_started(ifsq) || !ifsq_data_ready(ifsq)) { | |
399 | ALTQ_SQ_UNLOCK(ifsq); | |
9db4b353 SZ |
400 | return; |
401 | } | |
f0a26983 SZ |
402 | ifsq_set_started(ifsq); |
403 | ALTQ_SQ_UNLOCK(ifsq); | |
9db4b353 | 404 | |
f0a26983 | 405 | ifp->if_start(ifp, ifsq); |
9db4b353 | 406 | |
f0a26983 | 407 | if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) |
9db4b353 SZ |
408 | running = 1; |
409 | ||
f0a26983 | 410 | if (ifsq_ifstart_need_schedule(ifsq, running)) { |
9db4b353 SZ |
411 | /* |
412 | * More data need to be transmitted, ifnet.if_start is | |
413 | * scheduled on ifnet's CPU, and we keep going. | |
414 | * NOTE: ifnet.if_start interlock is not released. | |
415 | */ | |
f0a26983 | 416 | ifsq_ifstart_schedule(ifsq, 0); |
9db4b353 SZ |
417 | } |
418 | } | |
419 | ||
f0a26983 SZ |
420 | void |
421 | if_devstart(struct ifnet *ifp) | |
422 | { | |
423 | ifsq_devstart(ifq_get_subq_default(&ifp->if_snd)); | |
424 | } | |
425 | ||
2dffecda | 426 | /* Device driver ifnet.if_start schedule helper function */ |
f0a26983 SZ |
427 | void |
428 | ifsq_devstart_sched(struct ifaltq_subque *ifsq) | |
429 | { | |
430 | ifsq_ifstart_schedule(ifsq, 1); | |
431 | } | |
432 | ||
2dffecda SZ |
433 | void |
434 | if_devstart_sched(struct ifnet *ifp) | |
435 | { | |
f0a26983 | 436 | ifsq_devstart_sched(ifq_get_subq_default(&ifp->if_snd)); |
2dffecda SZ |
437 | } |
438 | ||
a3dd34d2 SZ |
439 | static void |
440 | if_default_serialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) | |
441 | { | |
442 | lwkt_serialize_enter(ifp->if_serializer); | |
443 | } | |
444 | ||
445 | static void | |
446 | if_default_deserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) | |
447 | { | |
448 | lwkt_serialize_exit(ifp->if_serializer); | |
449 | } | |
450 | ||
451 | static int | |
452 | if_default_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) | |
453 | { | |
454 | return lwkt_serialize_try(ifp->if_serializer); | |
455 | } | |
456 | ||
2c9effcf SZ |
457 | #ifdef INVARIANTS |
458 | static void | |
459 | if_default_serialize_assert(struct ifnet *ifp, | |
460 | enum ifnet_serialize slz __unused, | |
461 | boolean_t serialized) | |
462 | { | |
463 | if (serialized) | |
464 | ASSERT_SERIALIZED(ifp->if_serializer); | |
465 | else | |
466 | ASSERT_NOT_SERIALIZED(ifp->if_serializer); | |
467 | } | |
468 | #endif | |
469 | ||
984263bc | 470 | /* |
78195a76 MD |
471 | * Attach an interface to the list of "active" interfaces. |
472 | * | |
5c593c2a | 473 | * The serializer is optional. |
984263bc MD |
474 | */ |
475 | void | |
78195a76 | 476 | if_attach(struct ifnet *ifp, lwkt_serialize_t serializer) |
984263bc | 477 | { |
52fbd92a | 478 | unsigned socksize; |
984263bc | 479 | int namelen, masklen; |
b4051e25 | 480 | struct sockaddr_dl *sdl, *sdl_addr; |
82ed7fc2 | 481 | struct ifaddr *ifa; |
e3e4574a | 482 | struct ifaltq *ifq; |
b4051e25 SZ |
483 | struct ifnet **old_ifindex2ifnet = NULL; |
484 | struct ifnet_array *old_ifnet_array; | |
7d46fb61 SZ |
485 | int i, q, qlen; |
486 | char qlenname[64]; | |
590b8cd4 | 487 | |
984263bc | 488 | static int if_indexlim = 8; |
984263bc | 489 | |
a3dd34d2 SZ |
490 | if (ifp->if_serialize != NULL) { |
491 | KASSERT(ifp->if_deserialize != NULL && | |
2c9effcf SZ |
492 | ifp->if_tryserialize != NULL && |
493 | ifp->if_serialize_assert != NULL, | |
ed20d0e3 | 494 | ("serialize functions are partially setup")); |
ae474cfa SZ |
495 | |
496 | /* | |
497 | * If the device supplies serialize functions, | |
498 | * then clear if_serializer to catch any invalid | |
499 | * usage of this field. | |
500 | */ | |
501 | KASSERT(serializer == NULL, | |
502 | ("both serialize functions and default serializer " | |
ed20d0e3 | 503 | "are supplied")); |
ae474cfa | 504 | ifp->if_serializer = NULL; |
a3dd34d2 SZ |
505 | } else { |
506 | KASSERT(ifp->if_deserialize == NULL && | |
2c9effcf SZ |
507 | ifp->if_tryserialize == NULL && |
508 | ifp->if_serialize_assert == NULL, | |
ed20d0e3 | 509 | ("serialize functions are partially setup")); |
a3dd34d2 SZ |
510 | ifp->if_serialize = if_default_serialize; |
511 | ifp->if_deserialize = if_default_deserialize; | |
512 | ifp->if_tryserialize = if_default_tryserialize; | |
2c9effcf SZ |
513 | #ifdef INVARIANTS |
514 | ifp->if_serialize_assert = if_default_serialize_assert; | |
515 | #endif | |
ae474cfa SZ |
516 | |
517 | /* | |
518 | * The serializer can be passed in from the device, | |
519 | * allowing the same serializer to be used for both | |
520 | * the interrupt interlock and the device queue. | |
521 | * If not specified, the netif structure will use an | |
522 | * embedded serializer. | |
523 | */ | |
524 | if (serializer == NULL) { | |
525 | serializer = &ifp->if_default_serializer; | |
526 | lwkt_serialize_init(serializer); | |
527 | } | |
528 | ifp->if_serializer = serializer; | |
a3dd34d2 SZ |
529 | } |
530 | ||
984263bc | 531 | /* |
43dbcc2a SZ |
532 | * Make if_addrhead available on all CPUs, since they |
533 | * could be accessed by any threads. | |
984263bc | 534 | */ |
b2632176 SZ |
535 | ifp->if_addrheads = kmalloc(ncpus * sizeof(struct ifaddrhead), |
536 | M_IFADDR, M_WAITOK | M_ZERO); | |
537 | for (i = 0; i < ncpus; ++i) | |
538 | TAILQ_INIT(&ifp->if_addrheads[i]); | |
539 | ||
441d34b2 | 540 | TAILQ_INIT(&ifp->if_multiaddrs); |
2097a299 | 541 | TAILQ_INIT(&ifp->if_groups); |
984263bc | 542 | getmicrotime(&ifp->if_lastchange); |
233c8570 | 543 | if_addgroup(ifp, IFG_ALL); |
984263bc MD |
544 | |
545 | /* | |
546 | * create a Link Level name for this device | |
547 | */ | |
3e4a09e7 | 548 | namelen = strlen(ifp->if_xname); |
60615e94 | 549 | masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen; |
984263bc | 550 | socksize = masklen + ifp->if_addrlen; |
984263bc MD |
551 | if (socksize < sizeof(*sdl)) |
552 | socksize = sizeof(*sdl); | |
4ff4d99f | 553 | socksize = RT_ROUNDUP(socksize); |
52fbd92a | 554 | ifa = ifa_create(sizeof(struct ifaddr) + 2 * socksize); |
b4051e25 | 555 | sdl = sdl_addr = (struct sockaddr_dl *)(ifa + 1); |
590b8cd4 JH |
556 | sdl->sdl_len = socksize; |
557 | sdl->sdl_family = AF_LINK; | |
558 | bcopy(ifp->if_xname, sdl->sdl_data, namelen); | |
559 | sdl->sdl_nlen = namelen; | |
590b8cd4 | 560 | sdl->sdl_type = ifp->if_type; |
141697b6 | 561 | ifp->if_lladdr = ifa; |
590b8cd4 | 562 | ifa->ifa_ifp = ifp; |
590b8cd4 JH |
563 | ifa->ifa_addr = (struct sockaddr *)sdl; |
564 | sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl); | |
565 | ifa->ifa_netmask = (struct sockaddr *)sdl; | |
566 | sdl->sdl_len = masklen; | |
567 | while (namelen != 0) | |
568 | sdl->sdl_data[--namelen] = 0xff; | |
b2632176 | 569 | ifa_iflink(ifa, ifp, 0 /* Insert head */); |
984263bc | 570 | |
43dbcc2a SZ |
571 | /* |
572 | * Make if_data available on all CPUs, since they could | |
573 | * be updated by hardware interrupt routing, which could | |
574 | * be bound to any CPU. | |
575 | */ | |
62938642 MD |
576 | ifp->if_data_pcpu = kmalloc(ncpus * sizeof(struct ifdata_pcpu), |
577 | M_DEVBUF, | |
578 | M_WAITOK | M_ZERO | M_CACHEALIGN); | |
e1fcdad7 | 579 | |
2cc2f639 SZ |
580 | if (ifp->if_mapsubq == NULL) |
581 | ifp->if_mapsubq = ifq_mapsubq_default; | |
582 | ||
e3e4574a JS |
583 | ifq = &ifp->if_snd; |
584 | ifq->altq_type = 0; | |
585 | ifq->altq_disc = NULL; | |
586 | ifq->altq_flags &= ALTQF_CANTCHANGE; | |
587 | ifq->altq_tbr = NULL; | |
588 | ifq->altq_ifp = ifp; | |
4d723e5a | 589 | |
f0a26983 SZ |
590 | if (ifq->altq_subq_cnt <= 0) |
591 | ifq->altq_subq_cnt = 1; | |
62938642 MD |
592 | ifq->altq_subq = |
593 | kmalloc(ifq->altq_subq_cnt * sizeof(struct ifaltq_subque), | |
594 | M_DEVBUF, | |
595 | M_WAITOK | M_ZERO | M_CACHEALIGN); | |
28cc0c29 | 596 | |
f0a26983 | 597 | if (ifq->altq_maxlen == 0) { |
b21c2105 | 598 | if_printf(ifp, "driver didn't set altq_maxlen\n"); |
f0a26983 | 599 | ifq_set_maxlen(ifq, ifqmaxlen); |
42fdf81e SZ |
600 | } |
601 | ||
7d46fb61 SZ |
602 | /* Allow user to override driver's setting. */ |
603 | ksnprintf(qlenname, sizeof(qlenname), "net.%s.qlenmax", ifp->if_xname); | |
604 | qlen = -1; | |
605 | TUNABLE_INT_FETCH(qlenname, &qlen); | |
606 | if (qlen > 0) { | |
607 | if_printf(ifp, "qlenmax -> %d\n", qlen); | |
608 | ifq_set_maxlen(ifq, qlen); | |
609 | } | |
610 | ||
f0a26983 SZ |
611 | for (q = 0; q < ifq->altq_subq_cnt; ++q) { |
612 | struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; | |
613 | ||
614 | ALTQ_SQ_LOCK_INIT(ifsq); | |
615 | ifsq->ifsq_index = q; | |
616 | ||
617 | ifsq->ifsq_altq = ifq; | |
618 | ifsq->ifsq_ifp = ifp; | |
619 | ||
b21c2105 | 620 | ifsq->ifsq_maxlen = ifq->altq_maxlen; |
68dc1916 | 621 | ifsq->ifsq_maxbcnt = ifsq->ifsq_maxlen * MCLBYTES; |
f0a26983 SZ |
622 | ifsq->ifsq_prepended = NULL; |
623 | ifsq->ifsq_started = 0; | |
624 | ifsq->ifsq_hw_oactive = 0; | |
625 | ifsq_set_cpuid(ifsq, 0); | |
bfefe4a6 SZ |
626 | if (ifp->if_serializer != NULL) |
627 | ifsq_set_hw_serialize(ifsq, ifp->if_serializer); | |
f0a26983 | 628 | |
43dbcc2a | 629 | /* XXX: netisr_ncpus */ |
f0a26983 | 630 | ifsq->ifsq_stage = |
62938642 MD |
631 | kmalloc(ncpus * sizeof(struct ifsubq_stage), |
632 | M_DEVBUF, | |
633 | M_WAITOK | M_ZERO | M_CACHEALIGN); | |
f0a26983 SZ |
634 | for (i = 0; i < ncpus; ++i) |
635 | ifsq->ifsq_stage[i].stg_subq = ifsq; | |
636 | ||
43dbcc2a SZ |
637 | /* |
638 | * Allocate one if_start message for each CPU, since | |
639 | * the hardware TX ring could be assigned to any CPU. | |
640 | * | |
641 | * NOTE: | |
642 | * If the hardware TX ring polling CPU and the hardware | |
643 | * TX ring interrupt CPU are same, one if_start message | |
644 | * should be enough. | |
645 | */ | |
f0a26983 SZ |
646 | ifsq->ifsq_ifstart_nmsg = |
647 | kmalloc(ncpus * sizeof(struct netmsg_base), | |
648 | M_LWKTMSG, M_WAITOK); | |
649 | for (i = 0; i < ncpus; ++i) { | |
650 | netmsg_init(&ifsq->ifsq_ifstart_nmsg[i], NULL, | |
651 | &netisr_adone_rport, 0, ifsq_ifstart_dispatch); | |
652 | ifsq->ifsq_ifstart_nmsg[i].lmsg.u.ms_resultp = ifsq; | |
653 | } | |
654 | } | |
655 | ifq_set_classic(ifq); | |
656 | ||
ae6d2ace SZ |
657 | /* |
658 | * Increase mbuf cluster/jcluster limits for the mbufs that | |
659 | * could sit on the device queues for quite some time. | |
660 | */ | |
661 | if (ifp->if_nmbclusters > 0) | |
662 | mcl_inclimit(ifp->if_nmbclusters); | |
663 | if (ifp->if_nmbjclusters > 0) | |
664 | mjcl_inclimit(ifp->if_nmbjclusters); | |
665 | ||
b4051e25 SZ |
666 | /* |
667 | * Install this ifp into ifindex2inet, ifnet queue and ifnet | |
668 | * array after it is setup. | |
669 | * | |
670 | * Protect ifindex2ifnet, ifnet queue and ifnet array changes | |
671 | * by ifnet lock, so that non-netisr threads could get a | |
672 | * consistent view. | |
673 | */ | |
674 | ifnet_lock(); | |
675 | ||
676 | /* Don't update if_index until ifindex2ifnet is setup */ | |
677 | ifp->if_index = if_index + 1; | |
678 | sdl_addr->sdl_index = ifp->if_index; | |
679 | ||
680 | /* | |
681 | * Install this ifp into ifindex2ifnet | |
682 | */ | |
683 | if (ifindex2ifnet == NULL || ifp->if_index >= if_indexlim) { | |
684 | unsigned int n; | |
685 | struct ifnet **q; | |
686 | ||
687 | /* | |
688 | * Grow ifindex2ifnet | |
689 | */ | |
690 | if_indexlim <<= 1; | |
691 | n = if_indexlim * sizeof(*q); | |
692 | q = kmalloc(n, M_IFADDR, M_WAITOK | M_ZERO); | |
693 | if (ifindex2ifnet != NULL) { | |
694 | bcopy(ifindex2ifnet, q, n/2); | |
695 | /* Free old ifindex2ifnet after sync all netisrs */ | |
696 | old_ifindex2ifnet = ifindex2ifnet; | |
697 | } | |
698 | ifindex2ifnet = q; | |
699 | } | |
700 | ifindex2ifnet[ifp->if_index] = ifp; | |
701 | /* | |
702 | * Update if_index after this ifp is installed into ifindex2ifnet, | |
703 | * so that netisrs could get a consistent view of ifindex2ifnet. | |
704 | */ | |
705 | cpu_sfence(); | |
706 | if_index = ifp->if_index; | |
707 | ||
708 | /* | |
709 | * Install this ifp into ifnet array. | |
710 | */ | |
711 | /* Free old ifnet array after sync all netisrs */ | |
712 | old_ifnet_array = ifnet_array; | |
713 | ifnet_array = ifnet_array_add(ifp, old_ifnet_array); | |
714 | ||
715 | /* | |
716 | * Install this ifp into ifnet queue. | |
717 | */ | |
718 | TAILQ_INSERT_TAIL(&ifnetlist, ifp, if_link); | |
719 | ||
720 | ifnet_unlock(); | |
721 | ||
722 | /* | |
723 | * Sync all netisrs so that the old ifindex2ifnet and ifnet array | |
724 | * are no longer accessed and we can free them safely later on. | |
725 | */ | |
726 | netmsg_service_sync(); | |
727 | if (old_ifindex2ifnet != NULL) | |
728 | kfree(old_ifindex2ifnet, M_IFADDR); | |
729 | ifnet_array_free(old_ifnet_array); | |
730 | ||
9c70fe43 | 731 | if (!SLIST_EMPTY(&domains)) |
698ac46c HS |
732 | if_attachdomain1(ifp); |
733 | ||
984263bc | 734 | /* Announce the interface. */ |
8e13abe7 MD |
735 | EVENTHANDLER_INVOKE(ifnet_attach_event, ifp); |
736 | devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL); | |
984263bc MD |
737 | rt_ifannouncemsg(ifp, IFAN_ARRIVAL); |
738 | } | |
739 | ||
698ac46c HS |
740 | static void |
741 | if_attachdomain(void *dummy) | |
742 | { | |
743 | struct ifnet *ifp; | |
698ac46c | 744 | |
b4051e25 SZ |
745 | ifnet_lock(); |
746 | TAILQ_FOREACH(ifp, &ifnetlist, if_list) | |
698ac46c | 747 | if_attachdomain1(ifp); |
b4051e25 | 748 | ifnet_unlock(); |
698ac46c HS |
749 | } |
750 | SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, | |
751 | if_attachdomain, NULL); | |
752 | ||
753 | static void | |
754 | if_attachdomain1(struct ifnet *ifp) | |
755 | { | |
756 | struct domain *dp; | |
698ac46c | 757 | |
4986965b | 758 | crit_enter(); |
698ac46c HS |
759 | |
760 | /* address family dependent data region */ | |
761 | bzero(ifp->if_afdata, sizeof(ifp->if_afdata)); | |
9c70fe43 | 762 | SLIST_FOREACH(dp, &domains, dom_next) |
698ac46c HS |
763 | if (dp->dom_ifattach) |
764 | ifp->if_afdata[dp->dom_family] = | |
765 | (*dp->dom_ifattach)(ifp); | |
4986965b | 766 | crit_exit(); |
698ac46c HS |
767 | } |
768 | ||
c727e142 SZ |
769 | /* |
770 | * Purge all addresses whose type is _not_ AF_LINK | |
771 | */ | |
9a74b592 SZ |
772 | static void |
773 | if_purgeaddrs_nolink_dispatch(netmsg_t nmsg) | |
c727e142 | 774 | { |
5204e13c | 775 | struct ifnet *ifp = nmsg->lmsg.u.ms_resultp; |
b2632176 SZ |
776 | struct ifaddr_container *ifac, *next; |
777 | ||
5204e13c | 778 | ASSERT_NETISR0; |
9a74b592 SZ |
779 | |
780 | /* | |
781 | * The ifaddr processing in the following loop will block, | |
782 | * however, this function is called in netisr0, in which | |
783 | * ifaddr list changes happen, so we don't care about the | |
784 | * blockness of the ifaddr processing here. | |
785 | */ | |
b2632176 SZ |
786 | TAILQ_FOREACH_MUTABLE(ifac, &ifp->if_addrheads[mycpuid], |
787 | ifa_link, next) { | |
788 | struct ifaddr *ifa = ifac->ifa; | |
c727e142 | 789 | |
9a74b592 SZ |
790 | /* Ignore marker */ |
791 | if (ifa->ifa_addr->sa_family == AF_UNSPEC) | |
792 | continue; | |
793 | ||
c727e142 SZ |
794 | /* Leave link ifaddr as it is */ |
795 | if (ifa->ifa_addr->sa_family == AF_LINK) | |
796 | continue; | |
797 | #ifdef INET | |
798 | /* XXX: Ugly!! ad hoc just for INET */ | |
114c8e1b | 799 | if (ifa->ifa_addr->sa_family == AF_INET) { |
c727e142 | 800 | struct ifaliasreq ifr; |
0d50e8a2 | 801 | struct sockaddr_in saved_addr, saved_dst; |
b2632176 SZ |
802 | #ifdef IFADDR_DEBUG_VERBOSE |
803 | int i; | |
804 | ||
805 | kprintf("purge in4 addr %p: ", ifa); | |
140920c2 SZ |
806 | for (i = 0; i < ncpus; ++i) { |
807 | kprintf("%d ", | |
808 | ifa->ifa_containers[i].ifa_refcnt); | |
809 | } | |
b2632176 SZ |
810 | kprintf("\n"); |
811 | #endif | |
c727e142 | 812 | |
0d50e8a2 SZ |
813 | /* Save information for panic. */ |
814 | memcpy(&saved_addr, ifa->ifa_addr, sizeof(saved_addr)); | |
815 | if (ifa->ifa_dstaddr != NULL) { | |
816 | memcpy(&saved_dst, ifa->ifa_dstaddr, | |
817 | sizeof(saved_dst)); | |
818 | } else { | |
819 | memset(&saved_dst, 0, sizeof(saved_dst)); | |
820 | } | |
821 | ||
c727e142 SZ |
822 | bzero(&ifr, sizeof ifr); |
823 | ifr.ifra_addr = *ifa->ifa_addr; | |
824 | if (ifa->ifa_dstaddr) | |
825 | ifr.ifra_broadaddr = *ifa->ifa_dstaddr; | |
2501b0ea | 826 | if (in_control(SIOCDIFADDR, (caddr_t)&ifr, ifp, |
c727e142 SZ |
827 | NULL) == 0) |
828 | continue; | |
0d50e8a2 SZ |
829 | |
830 | /* MUST NOT HAPPEN */ | |
831 | panic("%s: in_control failed %x, dst %x", ifp->if_xname, | |
832 | ntohl(saved_addr.sin_addr.s_addr), | |
833 | ntohl(saved_dst.sin_addr.s_addr)); | |
c727e142 SZ |
834 | } |
835 | #endif /* INET */ | |
836 | #ifdef INET6 | |
114c8e1b | 837 | if (ifa->ifa_addr->sa_family == AF_INET6) { |
b2632176 SZ |
838 | #ifdef IFADDR_DEBUG_VERBOSE |
839 | int i; | |
840 | ||
841 | kprintf("purge in6 addr %p: ", ifa); | |
140920c2 SZ |
842 | for (i = 0; i < ncpus; ++i) { |
843 | kprintf("%d ", | |
844 | ifa->ifa_containers[i].ifa_refcnt); | |
845 | } | |
b2632176 SZ |
846 | kprintf("\n"); |
847 | #endif | |
848 | ||
c727e142 SZ |
849 | in6_purgeaddr(ifa); |
850 | /* ifp_addrhead is already updated */ | |
851 | continue; | |
852 | } | |
853 | #endif /* INET6 */ | |
fdd5cc47 | 854 | if_printf(ifp, "destroy ifaddr family %d\n", |
be04762d | 855 | ifa->ifa_addr->sa_family); |
b2632176 SZ |
856 | ifa_ifunlink(ifa, ifp); |
857 | ifa_destroy(ifa); | |
c727e142 | 858 | } |
9a74b592 | 859 | |
5204e13c | 860 | netisr_replymsg(&nmsg->base, 0); |
9a74b592 SZ |
861 | } |
862 | ||
863 | void | |
864 | if_purgeaddrs_nolink(struct ifnet *ifp) | |
865 | { | |
866 | struct netmsg_base nmsg; | |
9a74b592 SZ |
867 | |
868 | netmsg_init(&nmsg, NULL, &curthread->td_msgport, 0, | |
869 | if_purgeaddrs_nolink_dispatch); | |
5204e13c SZ |
870 | nmsg.lmsg.u.ms_resultp = ifp; |
871 | netisr_domsg(&nmsg, 0); | |
c727e142 SZ |
872 | } |
873 | ||
5804f3d1 SZ |
874 | static void |
875 | ifq_stage_detach_handler(netmsg_t nmsg) | |
876 | { | |
877 | struct ifaltq *ifq = nmsg->lmsg.u.ms_resultp; | |
f0a26983 | 878 | int q; |
5804f3d1 | 879 | |
f0a26983 SZ |
880 | for (q = 0; q < ifq->altq_subq_cnt; ++q) { |
881 | struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; | |
882 | struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid); | |
883 | ||
884 | if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED) | |
885 | ifsq_stage_remove(&ifsubq_stage_heads[mycpuid], stage); | |
886 | } | |
5804f3d1 SZ |
887 | lwkt_replymsg(&nmsg->lmsg, 0); |
888 | } | |
889 | ||
890 | static void | |
891 | ifq_stage_detach(struct ifaltq *ifq) | |
892 | { | |
893 | struct netmsg_base base; | |
894 | int cpu; | |
895 | ||
896 | netmsg_init(&base, NULL, &curthread->td_msgport, 0, | |
897 | ifq_stage_detach_handler); | |
898 | base.lmsg.u.ms_resultp = ifq; | |
899 | ||
43dbcc2a | 900 | /* XXX netisr_ncpus */ |
5804f3d1 | 901 | for (cpu = 0; cpu < ncpus; ++cpu) |
ec7f7fc8 | 902 | lwkt_domsg(netisr_cpuport(cpu), &base.lmsg, 0); |
5804f3d1 SZ |
903 | } |
904 | ||
a29ef6e8 SZ |
905 | struct netmsg_if_rtdel { |
906 | struct netmsg_base base; | |
907 | struct ifnet *ifp; | |
908 | }; | |
909 | ||
910 | static void | |
911 | if_rtdel_dispatch(netmsg_t msg) | |
912 | { | |
913 | struct netmsg_if_rtdel *rmsg = (void *)msg; | |
43dbcc2a | 914 | int i, cpu; |
a29ef6e8 SZ |
915 | |
916 | cpu = mycpuid; | |
43dbcc2a SZ |
917 | ASSERT_NETISR_NCPUS(cpu); |
918 | ||
a29ef6e8 SZ |
919 | for (i = 1; i <= AF_MAX; i++) { |
920 | struct radix_node_head *rnh; | |
921 | ||
922 | if ((rnh = rt_tables[cpu][i]) == NULL) | |
923 | continue; | |
924 | rnh->rnh_walktree(rnh, if_rtdel, rmsg->ifp); | |
925 | } | |
43dbcc2a | 926 | netisr_forwardmsg(&msg->base, cpu + 1); |
a29ef6e8 SZ |
927 | } |
928 | ||
984263bc MD |
929 | /* |
930 | * Detach an interface, removing it from the | |
931 | * list of "active" interfaces. | |
932 | */ | |
933 | void | |
f23061d4 | 934 | if_detach(struct ifnet *ifp) |
984263bc | 935 | { |
b4051e25 | 936 | struct ifnet_array *old_ifnet_array; |
233c8570 | 937 | struct ifg_list *ifgl; |
a29ef6e8 | 938 | struct netmsg_if_rtdel msg; |
698ac46c | 939 | struct domain *dp; |
a29ef6e8 | 940 | int q; |
984263bc | 941 | |
b4051e25 | 942 | /* Announce that the interface is gone. */ |
f2bd8b67 | 943 | EVENTHANDLER_INVOKE(ifnet_detach_event, ifp); |
b4051e25 SZ |
944 | rt_ifannouncemsg(ifp, IFAN_DEPARTURE); |
945 | devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL); | |
946 | ||
947 | /* | |
948 | * Remove this ifp from ifindex2inet, ifnet queue and ifnet | |
949 | * array before it is whacked. | |
950 | * | |
951 | * Protect ifindex2ifnet, ifnet queue and ifnet array changes | |
952 | * by ifnet lock, so that non-netisr threads could get a | |
953 | * consistent view. | |
954 | */ | |
955 | ifnet_lock(); | |
956 | ||
957 | /* | |
958 | * Remove this ifp from ifindex2ifnet and maybe decrement if_index. | |
959 | */ | |
960 | ifindex2ifnet[ifp->if_index] = NULL; | |
961 | while (if_index > 0 && ifindex2ifnet[if_index] == NULL) | |
962 | if_index--; | |
963 | ||
964 | /* | |
965 | * Remove this ifp from ifnet queue. | |
966 | */ | |
967 | TAILQ_REMOVE(&ifnetlist, ifp, if_link); | |
968 | ||
969 | /* | |
970 | * Remove this ifp from ifnet array. | |
971 | */ | |
972 | /* Free old ifnet array after sync all netisrs */ | |
973 | old_ifnet_array = ifnet_array; | |
974 | ifnet_array = ifnet_array_del(ifp, old_ifnet_array); | |
975 | ||
976 | ifnet_unlock(); | |
977 | ||
233c8570 AL |
978 | ifgroup_lockmgr(LK_EXCLUSIVE); |
979 | while ((ifgl = TAILQ_FIRST(&ifp->if_groups)) != NULL) | |
980 | if_delgroup_locked(ifp, ifgl->ifgl_group->ifg_group); | |
981 | ifgroup_lockmgr(LK_RELEASE); | |
982 | ||
b4051e25 SZ |
983 | /* |
984 | * Sync all netisrs so that the old ifnet array is no longer | |
985 | * accessed and we can free it safely later on. | |
986 | */ | |
987 | netmsg_service_sync(); | |
988 | ifnet_array_free(old_ifnet_array); | |
f2bd8b67 | 989 | |
984263bc MD |
990 | /* |
991 | * Remove routes and flush queues. | |
992 | */ | |
4986965b | 993 | crit_enter(); |
b3a7093f SZ |
994 | #ifdef IFPOLL_ENABLE |
995 | if (ifp->if_flags & IFF_NPOLLING) | |
996 | ifpoll_deregister(ifp); | |
323f031d | 997 | #endif |
984263bc MD |
998 | if_down(ifp); |
999 | ||
ae6d2ace SZ |
1000 | /* Decrease the mbuf clusters/jclusters limits increased by us */ |
1001 | if (ifp->if_nmbclusters > 0) | |
1002 | mcl_inclimit(-ifp->if_nmbclusters); | |
1003 | if (ifp->if_nmbjclusters > 0) | |
1004 | mjcl_inclimit(-ifp->if_nmbjclusters); | |
1005 | ||
5b1156d4 | 1006 | #ifdef ALTQ |
4d723e5a JS |
1007 | if (ifq_is_enabled(&ifp->if_snd)) |
1008 | altq_disable(&ifp->if_snd); | |
1009 | if (ifq_is_attached(&ifp->if_snd)) | |
1010 | altq_detach(&ifp->if_snd); | |
5b1156d4 | 1011 | #endif |
4d723e5a | 1012 | |
984263bc | 1013 | /* |
984263bc MD |
1014 | * Clean up all addresses. |
1015 | */ | |
141697b6 | 1016 | ifp->if_lladdr = NULL; |
984263bc | 1017 | |
c727e142 | 1018 | if_purgeaddrs_nolink(ifp); |
b2632176 | 1019 | if (!TAILQ_EMPTY(&ifp->if_addrheads[mycpuid])) { |
c727e142 SZ |
1020 | struct ifaddr *ifa; |
1021 | ||
b2632176 | 1022 | ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa; |
c727e142 | 1023 | KASSERT(ifa->ifa_addr->sa_family == AF_LINK, |
27eaa4f1 | 1024 | ("non-link ifaddr is left on if_addrheads")); |
984263bc | 1025 | |
b2632176 SZ |
1026 | ifa_ifunlink(ifa, ifp); |
1027 | ifa_destroy(ifa); | |
1028 | KASSERT(TAILQ_EMPTY(&ifp->if_addrheads[mycpuid]), | |
27eaa4f1 | 1029 | ("there are still ifaddrs left on if_addrheads")); |
984263bc MD |
1030 | } |
1031 | ||
a98eb818 JS |
1032 | #ifdef INET |
1033 | /* | |
1034 | * Remove all IPv4 kernel structures related to ifp. | |
1035 | */ | |
1036 | in_ifdetach(ifp); | |
1037 | #endif | |
1038 | ||
984263bc MD |
1039 | #ifdef INET6 |
1040 | /* | |
1041 | * Remove all IPv6 kernel structs related to ifp. This should be done | |
1042 | * before removing routing entries below, since IPv6 interface direct | |
1043 | * routes are expected to be removed by the IPv6-specific kernel API. | |
1044 | * Otherwise, the kernel will detect some inconsistency and bark it. | |
1045 | */ | |
1046 | in6_ifdetach(ifp); | |
1047 | #endif | |
1048 | ||
1049 | /* | |
1050 | * Delete all remaining routes using this interface | |
984263bc | 1051 | */ |
d20d6787 | 1052 | netmsg_init(&msg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, |
a29ef6e8 SZ |
1053 | if_rtdel_dispatch); |
1054 | msg.ifp = ifp; | |
43dbcc2a | 1055 | netisr_domsg_global(&msg.base); |
984263bc | 1056 | |
2949c680 | 1057 | SLIST_FOREACH(dp, &domains, dom_next) { |
698ac46c HS |
1058 | if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) |
1059 | (*dp->dom_ifdetach)(ifp, | |
1060 | ifp->if_afdata[dp->dom_family]); | |
2949c680 | 1061 | } |
698ac46c | 1062 | |
b2632176 | 1063 | kfree(ifp->if_addrheads, M_IFADDR); |
5804f3d1 SZ |
1064 | |
1065 | lwkt_synchronize_ipiqs("if_detach"); | |
1066 | ifq_stage_detach(&ifp->if_snd); | |
1067 | ||
f0a26983 SZ |
1068 | for (q = 0; q < ifp->if_snd.altq_subq_cnt; ++q) { |
1069 | struct ifaltq_subque *ifsq = &ifp->if_snd.altq_subq[q]; | |
1070 | ||
1071 | kfree(ifsq->ifsq_ifstart_nmsg, M_LWKTMSG); | |
1072 | kfree(ifsq->ifsq_stage, M_DEVBUF); | |
1073 | } | |
407cde39 SZ |
1074 | kfree(ifp->if_snd.altq_subq, M_DEVBUF); |
1075 | ||
e1fcdad7 SZ |
1076 | kfree(ifp->if_data_pcpu, M_DEVBUF); |
1077 | ||
4986965b | 1078 | crit_exit(); |
984263bc MD |
1079 | } |
1080 | ||
233c8570 AL |
1081 | int |
1082 | ifgroup_lockmgr(u_int flags) | |
1083 | { | |
1084 | return lockmgr(&ifgroup_lock, flags); | |
1085 | } | |
1086 | ||
315a7da3 | 1087 | /* |
233c8570 | 1088 | * Create an empty interface group. |
315a7da3 | 1089 | */ |
233c8570 | 1090 | static struct ifg_group * |
315a7da3 JL |
1091 | if_creategroup(const char *groupname) |
1092 | { | |
233c8570 AL |
1093 | struct ifg_group *ifg; |
1094 | ||
1095 | ifg = kmalloc(sizeof(*ifg), M_IFNET, M_WAITOK); | |
1096 | strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group)); | |
1097 | ifg->ifg_refcnt = 0; | |
1098 | ifg->ifg_carp_demoted = 0; | |
1099 | TAILQ_INIT(&ifg->ifg_members); | |
1100 | ||
1101 | ifgroup_lockmgr(LK_EXCLUSIVE); | |
1102 | TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next); | |
1103 | ifgroup_lockmgr(LK_RELEASE); | |
315a7da3 | 1104 | |
233c8570 AL |
1105 | EVENTHANDLER_INVOKE(group_attach_event, ifg); |
1106 | ||
1107 | return (ifg); | |
315a7da3 JL |
1108 | } |
1109 | ||
1110 | /* | |
233c8570 AL |
1111 | * Destroy an empty interface group. |
1112 | */ | |
1113 | static int | |
1114 | if_destroygroup(struct ifg_group *ifg) | |
1115 | { | |
1116 | KASSERT(ifg->ifg_refcnt == 0, | |
1117 | ("trying to delete a non-empty interface group")); | |
1118 | ||
1119 | ifgroup_lockmgr(LK_EXCLUSIVE); | |
1120 | TAILQ_REMOVE(&ifg_head, ifg, ifg_next); | |
1121 | ifgroup_lockmgr(LK_RELEASE); | |
1122 | ||
1123 | EVENTHANDLER_INVOKE(group_detach_event, ifg); | |
1124 | kfree(ifg, M_IFNET); | |
1125 | ||
1126 | return (0); | |
1127 | } | |
1128 | ||
1129 | /* | |
1130 | * Add the interface to a group. | |
1131 | * The target group will be created if it doesn't exist. | |
315a7da3 JL |
1132 | */ |
1133 | int | |
1134 | if_addgroup(struct ifnet *ifp, const char *groupname) | |
1135 | { | |
233c8570 AL |
1136 | struct ifg_list *ifgl; |
1137 | struct ifg_group *ifg; | |
1138 | struct ifg_member *ifgm; | |
315a7da3 | 1139 | |
233c8570 AL |
1140 | if (groupname[0] && |
1141 | groupname[strlen(groupname) - 1] >= '0' && | |
315a7da3 JL |
1142 | groupname[strlen(groupname) - 1] <= '9') |
1143 | return (EINVAL); | |
1144 | ||
233c8570 | 1145 | ifgroup_lockmgr(LK_SHARED); |
315a7da3 | 1146 | |
233c8570 AL |
1147 | TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { |
1148 | if (strcmp(ifgl->ifgl_group->ifg_group, groupname) == 0) { | |
1149 | ifgroup_lockmgr(LK_RELEASE); | |
1150 | return (EEXIST); | |
1151 | } | |
315a7da3 JL |
1152 | } |
1153 | ||
233c8570 AL |
1154 | TAILQ_FOREACH(ifg, &ifg_head, ifg_next) { |
1155 | if (strcmp(ifg->ifg_group, groupname) == 0) | |
315a7da3 | 1156 | break; |
315a7da3 JL |
1157 | } |
1158 | ||
233c8570 AL |
1159 | ifgroup_lockmgr(LK_RELEASE); |
1160 | ||
1161 | if (ifg == NULL) | |
1162 | ifg = if_creategroup(groupname); | |
1163 | ||
1164 | ifgl = kmalloc(sizeof(*ifgl), M_IFNET, M_WAITOK); | |
1165 | ifgm = kmalloc(sizeof(*ifgm), M_IFNET, M_WAITOK); | |
315a7da3 JL |
1166 | ifgl->ifgl_group = ifg; |
1167 | ifgm->ifgm_ifp = ifp; | |
233c8570 | 1168 | ifg->ifg_refcnt++; |
315a7da3 | 1169 | |
233c8570 | 1170 | ifgroup_lockmgr(LK_EXCLUSIVE); |
315a7da3 JL |
1171 | TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next); |
1172 | TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next); | |
233c8570 | 1173 | ifgroup_lockmgr(LK_RELEASE); |
315a7da3 | 1174 | |
233c8570 | 1175 | EVENTHANDLER_INVOKE(group_change_event, groupname); |
315a7da3 JL |
1176 | |
1177 | return (0); | |
1178 | } | |
1179 | ||
1180 | /* | |
233c8570 AL |
1181 | * Remove the interface from a group. |
1182 | * The group will be destroyed if it becomes empty. | |
1183 | * | |
1184 | * The 'ifgroup_lock' must be hold exclusively when calling this. | |
315a7da3 | 1185 | */ |
233c8570 AL |
1186 | static int |
1187 | if_delgroup_locked(struct ifnet *ifp, const char *groupname) | |
315a7da3 | 1188 | { |
233c8570 AL |
1189 | struct ifg_list *ifgl; |
1190 | struct ifg_member *ifgm; | |
315a7da3 | 1191 | |
233c8570 AL |
1192 | KKASSERT(lockstatus(&ifgroup_lock, curthread) == LK_EXCLUSIVE); |
1193 | ||
1194 | TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { | |
1195 | if (strcmp(ifgl->ifgl_group->ifg_group, groupname) == 0) | |
315a7da3 | 1196 | break; |
233c8570 | 1197 | } |
315a7da3 JL |
1198 | if (ifgl == NULL) |
1199 | return (ENOENT); | |
1200 | ||
1201 | TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next); | |
1202 | ||
233c8570 | 1203 | TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) { |
315a7da3 JL |
1204 | if (ifgm->ifgm_ifp == ifp) |
1205 | break; | |
233c8570 | 1206 | } |
315a7da3 JL |
1207 | |
1208 | if (ifgm != NULL) { | |
1209 | TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next); | |
315a7da3 | 1210 | |
233c8570 AL |
1211 | ifgroup_lockmgr(LK_RELEASE); |
1212 | EVENTHANDLER_INVOKE(group_change_event, groupname); | |
1213 | ifgroup_lockmgr(LK_EXCLUSIVE); | |
1214 | ||
1215 | kfree(ifgm, M_IFNET); | |
1216 | ifgl->ifgl_group->ifg_refcnt--; | |
315a7da3 JL |
1217 | } |
1218 | ||
233c8570 AL |
1219 | if (ifgl->ifgl_group->ifg_refcnt == 0) { |
1220 | ifgroup_lockmgr(LK_RELEASE); | |
1221 | if_destroygroup(ifgl->ifgl_group); | |
1222 | ifgroup_lockmgr(LK_EXCLUSIVE); | |
1223 | } | |
315a7da3 | 1224 | |
233c8570 | 1225 | kfree(ifgl, M_IFNET); |
315a7da3 JL |
1226 | |
1227 | return (0); | |
1228 | } | |
1229 | ||
233c8570 AL |
1230 | int |
1231 | if_delgroup(struct ifnet *ifp, const char *groupname) | |
1232 | { | |
1233 | int error; | |
1234 | ||
1235 | ifgroup_lockmgr(LK_EXCLUSIVE); | |
1236 | error = if_delgroup_locked(ifp, groupname); | |
1237 | ifgroup_lockmgr(LK_RELEASE); | |
1238 | ||
1239 | return (error); | |
1240 | } | |
1241 | ||
315a7da3 | 1242 | /* |
233c8570 AL |
1243 | * Store all the groups that the interface belongs to in memory |
1244 | * pointed to by data. | |
315a7da3 | 1245 | */ |
233c8570 AL |
1246 | static int |
1247 | if_getgroups(struct ifgroupreq *ifgr, struct ifnet *ifp) | |
315a7da3 | 1248 | { |
233c8570 AL |
1249 | struct ifg_list *ifgl; |
1250 | struct ifg_req *ifgrq, *p; | |
1251 | int len, error; | |
1252 | ||
1253 | len = 0; | |
1254 | ifgroup_lockmgr(LK_SHARED); | |
1255 | TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) | |
1256 | len += sizeof(struct ifg_req); | |
1257 | ifgroup_lockmgr(LK_RELEASE); | |
315a7da3 JL |
1258 | |
1259 | if (ifgr->ifgr_len == 0) { | |
233c8570 AL |
1260 | /* |
1261 | * Caller is asking how much memory should be allocated in | |
1262 | * the next request in order to hold all the groups. | |
1263 | */ | |
1264 | ifgr->ifgr_len = len; | |
315a7da3 | 1265 | return (0); |
233c8570 AL |
1266 | } else if (ifgr->ifgr_len != len) { |
1267 | return (EINVAL); | |
315a7da3 JL |
1268 | } |
1269 | ||
233c8570 AL |
1270 | ifgrq = kmalloc(len, M_TEMP, M_INTWAIT | M_NULLOK | M_ZERO); |
1271 | if (ifgrq == NULL) | |
1272 | return (ENOMEM); | |
1273 | ||
1274 | ifgroup_lockmgr(LK_SHARED); | |
1275 | p = ifgrq; | |
315a7da3 | 1276 | TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { |
233c8570 AL |
1277 | if (len < sizeof(struct ifg_req)) { |
1278 | ifgroup_lockmgr(LK_RELEASE); | |
b8b9f56e MD |
1279 | error = EINVAL; |
1280 | goto failed; | |
233c8570 AL |
1281 | } |
1282 | ||
1283 | strlcpy(p->ifgrq_group, ifgl->ifgl_group->ifg_group, | |
1284 | sizeof(ifgrq->ifgrq_group)); | |
1285 | len -= sizeof(struct ifg_req); | |
1286 | p++; | |
315a7da3 | 1287 | } |
233c8570 AL |
1288 | ifgroup_lockmgr(LK_RELEASE); |
1289 | ||
1290 | error = copyout(ifgrq, ifgr->ifgr_groups, ifgr->ifgr_len); | |
b8b9f56e | 1291 | failed: |
233c8570 | 1292 | kfree(ifgrq, M_TEMP); |
b8b9f56e | 1293 | return error; |
315a7da3 JL |
1294 | } |
1295 | ||
1296 | /* | |
233c8570 | 1297 | * Store all the members of a group in memory pointed to by data. |
315a7da3 | 1298 | */ |
233c8570 AL |
1299 | static int |
1300 | if_getgroupmembers(struct ifgroupreq *ifgr) | |
315a7da3 | 1301 | { |
233c8570 AL |
1302 | struct ifg_group *ifg; |
1303 | struct ifg_member *ifgm; | |
1304 | struct ifg_req *ifgrq, *p; | |
1305 | int len, error; | |
1306 | ||
1307 | ifgroup_lockmgr(LK_SHARED); | |
1308 | ||
1309 | TAILQ_FOREACH(ifg, &ifg_head, ifg_next) { | |
1310 | if (strcmp(ifg->ifg_group, ifgr->ifgr_name) == 0) | |
315a7da3 | 1311 | break; |
233c8570 AL |
1312 | } |
1313 | if (ifg == NULL) { | |
1314 | ifgroup_lockmgr(LK_RELEASE); | |
315a7da3 | 1315 | return (ENOENT); |
233c8570 AL |
1316 | } |
1317 | ||
1318 | len = 0; | |
1319 | TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) | |
1320 | len += sizeof(struct ifg_req); | |
1321 | ||
1322 | ifgroup_lockmgr(LK_RELEASE); | |
315a7da3 JL |
1323 | |
1324 | if (ifgr->ifgr_len == 0) { | |
233c8570 | 1325 | ifgr->ifgr_len = len; |
315a7da3 | 1326 | return (0); |
233c8570 AL |
1327 | } else if (ifgr->ifgr_len != len) { |
1328 | return (EINVAL); | |
315a7da3 JL |
1329 | } |
1330 | ||
233c8570 AL |
1331 | ifgrq = kmalloc(len, M_TEMP, M_INTWAIT | M_NULLOK | M_ZERO); |
1332 | if (ifgrq == NULL) | |
1333 | return (ENOMEM); | |
1334 | ||
1335 | ifgroup_lockmgr(LK_SHARED); | |
1336 | p = ifgrq; | |
315a7da3 | 1337 | TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) { |
233c8570 AL |
1338 | if (len < sizeof(struct ifg_req)) { |
1339 | ifgroup_lockmgr(LK_RELEASE); | |
b8b9f56e MD |
1340 | error = EINVAL; |
1341 | goto failed; | |
233c8570 AL |
1342 | } |
1343 | ||
1344 | strlcpy(p->ifgrq_member, ifgm->ifgm_ifp->if_xname, | |
1345 | sizeof(p->ifgrq_member)); | |
1346 | len -= sizeof(struct ifg_req); | |
1347 | p++; | |
315a7da3 | 1348 | } |
233c8570 AL |
1349 | ifgroup_lockmgr(LK_RELEASE); |
1350 | ||
1351 | error = copyout(ifgrq, ifgr->ifgr_groups, ifgr->ifgr_len); | |
b8b9f56e | 1352 | failed: |
233c8570 | 1353 | kfree(ifgrq, M_TEMP); |
b8b9f56e | 1354 | return error; |
315a7da3 JL |
1355 | } |
1356 | ||
951ecd7f AL |
1357 | static int |
1358 | ifa_maintain_loopback_route(int cmd, struct ifaddr *ifa, struct sockaddr *ia) | |
1359 | { | |
1360 | struct sockaddr_dl null_sdl; | |
1361 | struct rt_addrinfo info; | |
1362 | struct ifaddr *rti_ifa; | |
1363 | struct ifnet *ifp; | |
1364 | int error; | |
1365 | ||
5e1a59d5 AL |
1366 | /* RTM_CHANGE is unsupported in rtrequest1() yet. */ |
1367 | KKASSERT(cmd == RTM_DELETE || cmd == RTM_ADD); | |
1368 | ||
951ecd7f AL |
1369 | rti_ifa = NULL; |
1370 | ifp = ifa->ifa_ifp; | |
1371 | ||
1372 | bzero(&null_sdl, sizeof(null_sdl)); | |
1373 | null_sdl.sdl_len = sizeof(null_sdl); | |
1374 | null_sdl.sdl_family = AF_LINK; | |
1375 | null_sdl.sdl_index = ifp->if_index; | |
1376 | null_sdl.sdl_type = ifp->if_type; | |
1377 | ||
1378 | bzero(&info, sizeof(info)); | |
1379 | if (cmd != RTM_DELETE) | |
1380 | info.rti_ifp = loif; | |
1381 | if (cmd == RTM_ADD) { | |
1382 | /* | |
1383 | * Explicitly specify the loopback IFA. | |
1384 | */ | |
1385 | rti_ifa = ifaof_ifpforaddr(ifa->ifa_addr, info.rti_ifp); | |
1386 | if (rti_ifa != NULL) { | |
1387 | /* | |
1388 | * The loopback IFA wouldn't disappear, but ref it | |
1389 | * for safety. | |
1390 | */ | |
1391 | IFAREF(rti_ifa); | |
1392 | info.rti_ifa = rti_ifa; | |
1393 | } | |
1394 | } | |
1395 | info.rti_info[RTAX_DST] = ia; | |
1396 | info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl; | |
1397 | /* | |
1398 | * Manually set RTF_LOCAL so that the IFA and IFP wouldn't be | |
1399 | * overrided to be the owner of the destination address (ia) | |
1400 | * by in_addroute(). | |
1401 | */ | |
1402 | info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_LOCAL; | |
1403 | ||
1404 | error = rtrequest1_global(cmd, &info, NULL, NULL, RTREQ_PRIO_NORM); | |
1405 | ||
1406 | if (rti_ifa != NULL) | |
1407 | IFAFREE(rti_ifa); | |
1408 | ||
1409 | if (error == 0 || | |
1410 | (cmd == RTM_ADD && error == EEXIST) || | |
1411 | (cmd == RTM_DELETE && (error == ESRCH || error == ENOENT))) | |
1412 | return (error); | |
1413 | ||
1414 | log(LOG_DEBUG, "%s: %s failed for interface %s: %d\n", | |
1415 | __func__, (cmd == RTM_ADD ? "insertion" : "deletion"), | |
1416 | ifp->if_xname, error); | |
1417 | return (error); | |
1418 | } | |
1419 | ||
1420 | int | |
1421 | ifa_add_loopback_route(struct ifaddr *ifa, struct sockaddr *ia) | |
1422 | { | |
1423 | return ifa_maintain_loopback_route(RTM_ADD, ifa, ia); | |
1424 | } | |
1425 | ||
1426 | int | |
1427 | ifa_del_loopback_route(struct ifaddr *ifa, struct sockaddr *ia) | |
1428 | { | |
1429 | return ifa_maintain_loopback_route(RTM_DELETE, ifa, ia); | |
1430 | } | |
1431 | ||
984263bc MD |
1432 | /* |
1433 | * Delete Routes for a Network Interface | |
f23061d4 | 1434 | * |
984263bc MD |
1435 | * Called for each routing entry via the rnh->rnh_walktree() call above |
1436 | * to delete all route entries referencing a detaching network interface. | |
1437 | * | |
1438 | * Arguments: | |
1439 | * rn pointer to node in the routing table | |
1440 | * arg argument passed to rnh->rnh_walktree() - detaching interface | |
1441 | * | |
1442 | * Returns: | |
1443 | * 0 successful | |
1444 | * errno failed - reason indicated | |
1445 | * | |
1446 | */ | |
1447 | static int | |
f23061d4 | 1448 | if_rtdel(struct radix_node *rn, void *arg) |
984263bc MD |
1449 | { |
1450 | struct rtentry *rt = (struct rtentry *)rn; | |
1451 | struct ifnet *ifp = arg; | |
1452 | int err; | |
1453 | ||
1454 | if (rt->rt_ifp == ifp) { | |
1455 | ||
1456 | /* | |
1457 | * Protect (sorta) against walktree recursion problems | |
1458 | * with cloned routes | |
1459 | */ | |
f23061d4 | 1460 | if (!(rt->rt_flags & RTF_UP)) |
984263bc MD |
1461 | return (0); |
1462 | ||
1463 | err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, | |
1464 | rt_mask(rt), rt->rt_flags, | |
2038fb68 | 1465 | NULL); |
984263bc MD |
1466 | if (err) { |
1467 | log(LOG_WARNING, "if_rtdel: error %d\n", err); | |
1468 | } | |
1469 | } | |
1470 | ||
1471 | return (0); | |
1472 | } | |
1473 | ||
c008937e AL |
1474 | static __inline boolean_t |
1475 | ifa_match_withmask(const struct ifaddr *ifa, const struct sockaddr *addr) | |
1476 | { | |
1477 | const char *cp, *cp2, *cp3, *cplim; | |
1478 | ||
1479 | KKASSERT(ifa->ifa_addr->sa_family == addr->sa_family); | |
1480 | ||
1481 | cp = addr->sa_data; | |
1482 | cp2 = ifa->ifa_addr->sa_data; | |
1483 | cp3 = ifa->ifa_netmask->sa_data; | |
1484 | cplim = (const char *)ifa->ifa_netmask + ifa->ifa_netmask->sa_len; | |
1485 | ||
1486 | while (cp3 < cplim) { | |
1487 | if ((*cp++ ^ *cp2++) & *cp3++) | |
1488 | return (FALSE); | |
1489 | } | |
1490 | ||
1491 | return (TRUE); | |
1492 | } | |
1493 | ||
0925f9d8 SZ |
1494 | static __inline boolean_t |
1495 | ifa_prefer(const struct ifaddr *cur_ifa, const struct ifaddr *old_ifa) | |
1496 | { | |
1497 | if (old_ifa == NULL) | |
c008937e | 1498 | return (TRUE); |
0925f9d8 SZ |
1499 | |
1500 | if ((old_ifa->ifa_ifp->if_flags & IFF_UP) == 0 && | |
1501 | (cur_ifa->ifa_ifp->if_flags & IFF_UP)) | |
c008937e | 1502 | return (TRUE); |
0925f9d8 SZ |
1503 | if ((old_ifa->ifa_flags & IFA_ROUTE) == 0 && |
1504 | (cur_ifa->ifa_flags & IFA_ROUTE)) | |
c008937e AL |
1505 | return (TRUE); |
1506 | ||
1507 | return (FALSE); | |
0925f9d8 SZ |
1508 | } |
1509 | ||
984263bc MD |
1510 | /* |
1511 | * Locate an interface based on a complete address. | |
1512 | */ | |
984263bc | 1513 | struct ifaddr * |
f23061d4 | 1514 | ifa_ifwithaddr(struct sockaddr *addr) |
984263bc | 1515 | { |
b4051e25 SZ |
1516 | const struct ifnet_array *arr; |
1517 | int i; | |
984263bc | 1518 | |
b4051e25 SZ |
1519 | arr = ifnet_array_get(); |
1520 | for (i = 0; i < arr->ifnet_count; ++i) { | |
1521 | struct ifnet *ifp = arr->ifnet_arr[i]; | |
b2632176 SZ |
1522 | struct ifaddr_container *ifac; |
1523 | ||
1524 | TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { | |
1525 | struct ifaddr *ifa = ifac->ifa; | |
1526 | ||
1527 | if (ifa->ifa_addr->sa_family != addr->sa_family) | |
1528 | continue; | |
1529 | if (sa_equal(addr, ifa->ifa_addr)) | |
1530 | return (ifa); | |
1531 | if ((ifp->if_flags & IFF_BROADCAST) && | |
1532 | ifa->ifa_broadaddr && | |
1533 | /* IPv6 doesn't have broadcast */ | |
1534 | ifa->ifa_broadaddr->sa_len != 0 && | |
1535 | sa_equal(ifa->ifa_broadaddr, addr)) | |
1536 | return (ifa); | |
1537 | } | |
984263bc | 1538 | } |
b2632176 | 1539 | return (NULL); |
984263bc | 1540 | } |
0925f9d8 | 1541 | |
984263bc | 1542 | /* |
2976dea7 | 1543 | * Locate the point-to-point interface with a given destination address. |
984263bc | 1544 | */ |
984263bc | 1545 | struct ifaddr * |
f23061d4 | 1546 | ifa_ifwithdstaddr(struct sockaddr *addr) |
984263bc | 1547 | { |
b4051e25 SZ |
1548 | const struct ifnet_array *arr; |
1549 | int i; | |
984263bc | 1550 | |
b4051e25 SZ |
1551 | arr = ifnet_array_get(); |
1552 | for (i = 0; i < arr->ifnet_count; ++i) { | |
1553 | struct ifnet *ifp = arr->ifnet_arr[i]; | |
b2632176 SZ |
1554 | struct ifaddr_container *ifac; |
1555 | ||
1556 | if (!(ifp->if_flags & IFF_POINTOPOINT)) | |
1557 | continue; | |
1558 | ||
1559 | TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { | |
1560 | struct ifaddr *ifa = ifac->ifa; | |
1561 | ||
984263bc MD |
1562 | if (ifa->ifa_addr->sa_family != addr->sa_family) |
1563 | continue; | |
0c3c561c JH |
1564 | if (ifa->ifa_dstaddr && |
1565 | sa_equal(addr, ifa->ifa_dstaddr)) | |
984263bc | 1566 | return (ifa); |
b2632176 | 1567 | } |
984263bc | 1568 | } |
b2632176 | 1569 | return (NULL); |
984263bc MD |
1570 | } |
1571 | ||
1572 | /* | |
1573 | * Find an interface on a specific network. If many, choice | |
1574 | * is most specific found. | |
1575 | */ | |
1576 | struct ifaddr * | |
f23061d4 | 1577 | ifa_ifwithnet(struct sockaddr *addr) |
984263bc | 1578 | { |
b2632176 | 1579 | struct ifaddr *ifa_maybe = NULL; |
984263bc | 1580 | u_int af = addr->sa_family; |
b4051e25 SZ |
1581 | const struct ifnet_array *arr; |
1582 | int i; | |
984263bc MD |
1583 | |
1584 | /* | |
1585 | * AF_LINK addresses can be looked up directly by their index number, | |
1586 | * so do that if we can. | |
1587 | */ | |
1588 | if (af == AF_LINK) { | |
b2632176 | 1589 | struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr; |
590b8cd4 | 1590 | |
b2632176 SZ |
1591 | if (sdl->sdl_index && sdl->sdl_index <= if_index) |
1592 | return (ifindex2ifnet[sdl->sdl_index]->if_lladdr); | |
984263bc MD |
1593 | } |
1594 | ||
1595 | /* | |
1596 | * Scan though each interface, looking for ones that have | |
1597 | * addresses in this address family. | |
1598 | */ | |
b4051e25 SZ |
1599 | arr = ifnet_array_get(); |
1600 | for (i = 0; i < arr->ifnet_count; ++i) { | |
1601 | struct ifnet *ifp = arr->ifnet_arr[i]; | |
b2632176 SZ |
1602 | struct ifaddr_container *ifac; |
1603 | ||
1604 | TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { | |
1605 | struct ifaddr *ifa = ifac->ifa; | |
984263bc MD |
1606 | |
1607 | if (ifa->ifa_addr->sa_family != af) | |
c008937e | 1608 | continue; |
984263bc MD |
1609 | if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) { |
1610 | /* | |
1611 | * This is a bit broken as it doesn't | |
1612 | * take into account that the remote end may | |
1613 | * be a single node in the network we are | |
1614 | * looking for. | |
1615 | * The trouble is that we don't know the | |
1616 | * netmask for the remote end. | |
1617 | */ | |
0c3c561c JH |
1618 | if (ifa->ifa_dstaddr != NULL && |
1619 | sa_equal(addr, ifa->ifa_dstaddr)) | |
f23061d4 | 1620 | return (ifa); |
984263bc MD |
1621 | } else { |
1622 | /* | |
c008937e | 1623 | * If we have a special address handler, |
984263bc MD |
1624 | * then use it instead of the generic one. |
1625 | */ | |
f23061d4 | 1626 | if (ifa->ifa_claim_addr) { |
984263bc MD |
1627 | if ((*ifa->ifa_claim_addr)(ifa, addr)) { |
1628 | return (ifa); | |
1629 | } else { | |
1630 | continue; | |
1631 | } | |
1632 | } | |
1633 | ||
c008937e AL |
1634 | if (ifa->ifa_netmask == NULL || |
1635 | !ifa_match_withmask(ifa, addr)) | |
984263bc | 1636 | continue; |
c008937e | 1637 | |
984263bc MD |
1638 | /* |
1639 | * If the netmask of what we just found | |
1640 | * is more specific than what we had before | |
1641 | * (if we had one) then remember the new one | |
0925f9d8 SZ |
1642 | * before continuing to search for an even |
1643 | * better one. If the netmasks are equal, | |
1644 | * we prefer the this ifa based on the result | |
1645 | * of ifa_prefer(). | |
984263bc | 1646 | */ |
4090d6ff | 1647 | if (ifa_maybe == NULL || |
d8449084 AL |
1648 | rn_refines(ifa->ifa_netmask, |
1649 | ifa_maybe->ifa_netmask) || | |
0925f9d8 | 1650 | (sa_equal(ifa_maybe->ifa_netmask, |
d8449084 | 1651 | ifa->ifa_netmask) && |
0925f9d8 | 1652 | ifa_prefer(ifa, ifa_maybe))) |
984263bc MD |
1653 | ifa_maybe = ifa; |
1654 | } | |
1655 | } | |
1656 | } | |
c008937e | 1657 | |
984263bc MD |
1658 | return (ifa_maybe); |
1659 | } | |
1660 | ||
1661 | /* | |
1662 | * Find an interface address specific to an interface best matching | |
1663 | * a given address. | |
1664 | */ | |
1665 | struct ifaddr * | |
f23061d4 | 1666 | ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp) |
984263bc | 1667 | { |
b2632176 | 1668 | struct ifaddr_container *ifac; |
4090d6ff | 1669 | struct ifaddr *ifa_maybe = NULL; |
984263bc MD |
1670 | u_int af = addr->sa_family; |
1671 | ||
1672 | if (af >= AF_MAX) | |
c008937e AL |
1673 | return (NULL); |
1674 | ||
b2632176 SZ |
1675 | TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { |
1676 | struct ifaddr *ifa = ifac->ifa; | |
1677 | ||
984263bc MD |
1678 | if (ifa->ifa_addr->sa_family != af) |
1679 | continue; | |
4090d6ff | 1680 | if (ifa_maybe == NULL) |
984263bc | 1681 | ifa_maybe = ifa; |
0c3c561c JH |
1682 | if (ifa->ifa_netmask == NULL) { |
1683 | if (sa_equal(addr, ifa->ifa_addr) || | |
1684 | (ifa->ifa_dstaddr != NULL && | |
1685 | sa_equal(addr, ifa->ifa_dstaddr))) | |
984263bc MD |
1686 | return (ifa); |
1687 | continue; | |
1688 | } | |
1689 | if (ifp->if_flags & IFF_POINTOPOINT) { | |
0c3c561c | 1690 | if (sa_equal(addr, ifa->ifa_dstaddr)) |
984263bc MD |
1691 | return (ifa); |
1692 | } else { | |
c008937e | 1693 | if (ifa_match_withmask(ifa, addr)) |
984263bc MD |
1694 | return (ifa); |
1695 | } | |
1696 | } | |
c008937e | 1697 | |
984263bc MD |
1698 | return (ifa_maybe); |
1699 | } | |
1700 | ||
e782981c | 1701 | struct netmsg_if { |
9a74b592 SZ |
1702 | struct netmsg_base base; |
1703 | struct ifnet *ifp; | |
9a74b592 SZ |
1704 | }; |
1705 | ||
984263bc | 1706 | /* |
9a74b592 | 1707 | * Mark an interface down and notify protocols of the transition. |
984263bc | 1708 | */ |
9a74b592 | 1709 | static void |
e782981c | 1710 | if_down_dispatch(netmsg_t nmsg) |
984263bc | 1711 | { |
e782981c | 1712 | struct netmsg_if *msg = (struct netmsg_if *)nmsg; |
9a74b592 | 1713 | struct ifnet *ifp = msg->ifp; |
b2632176 | 1714 | struct ifaddr_container *ifac; |
a2b099dd | 1715 | struct domain *dp; |
984263bc | 1716 | |
43dbcc2a SZ |
1717 | ASSERT_NETISR0; |
1718 | ||
e782981c | 1719 | ifp->if_flags &= ~IFF_UP; |
984263bc | 1720 | getmicrotime(&ifp->if_lastchange); |
4d2ff05c RM |
1721 | rt_ifmsg(ifp); |
1722 | ||
9a74b592 SZ |
1723 | /* |
1724 | * The ifaddr processing in the following loop will block, | |
1725 | * however, this function is called in netisr0, in which | |
1726 | * ifaddr list changes happen, so we don't care about the | |
1727 | * blockness of the ifaddr processing here. | |
1728 | */ | |
b2632176 SZ |
1729 | TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { |
1730 | struct ifaddr *ifa = ifac->ifa; | |
1731 | ||
9a74b592 SZ |
1732 | /* Ignore marker */ |
1733 | if (ifa->ifa_addr->sa_family == AF_UNSPEC) | |
1734 | continue; | |
1735 | ||
e782981c | 1736 | kpfctlinput(PRC_IFDOWN, ifa->ifa_addr); |
b2632176 | 1737 | } |
9a74b592 | 1738 | |
a2b099dd RM |
1739 | SLIST_FOREACH(dp, &domains, dom_next) |
1740 | if (dp->dom_if_down != NULL) | |
1741 | dp->dom_if_down(ifp); | |
1742 | ||
4d2ff05c | 1743 | ifq_purge_all(&ifp->if_snd); |
5204e13c | 1744 | netisr_replymsg(&nmsg->base, 0); |
9a74b592 SZ |
1745 | } |
1746 | ||
984263bc | 1747 | /* |
9a74b592 | 1748 | * Mark an interface up and notify protocols of the transition. |
984263bc | 1749 | */ |
9a74b592 | 1750 | static void |
e782981c | 1751 | if_up_dispatch(netmsg_t nmsg) |
984263bc | 1752 | { |
e782981c | 1753 | struct netmsg_if *msg = (struct netmsg_if *)nmsg; |
9a74b592 | 1754 | struct ifnet *ifp = msg->ifp; |
b2632176 | 1755 | struct ifaddr_container *ifac; |
a2b099dd | 1756 | struct domain *dp; |
984263bc | 1757 | |
43dbcc2a SZ |
1758 | ASSERT_NETISR0; |
1759 | ||
9275f515 | 1760 | ifq_purge_all(&ifp->if_snd); |
e782981c | 1761 | ifp->if_flags |= IFF_UP; |
984263bc | 1762 | getmicrotime(&ifp->if_lastchange); |
4d2ff05c RM |
1763 | rt_ifmsg(ifp); |
1764 | ||
9a74b592 SZ |
1765 | /* |
1766 | * The ifaddr processing in the following loop will block, | |
1767 | * however, this function is called in netisr0, in which | |
1768 | * ifaddr list changes happen, so we don't care about the | |
1769 | * blockness of the ifaddr processing here. | |
1770 | */ | |
b2632176 SZ |
1771 | TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { |
1772 | struct ifaddr *ifa = ifac->ifa; | |
1773 | ||
9a74b592 SZ |
1774 | /* Ignore marker */ |
1775 | if (ifa->ifa_addr->sa_family == AF_UNSPEC) | |
1776 | continue; | |
1777 | ||
e782981c | 1778 | kpfctlinput(PRC_IFUP, ifa->ifa_addr); |
b2632176 | 1779 | } |
a2b099dd RM |
1780 | |
1781 | SLIST_FOREACH(dp, &domains, dom_next) | |
1782 | if (dp->dom_if_up != NULL) | |
1783 | dp->dom_if_up(ifp); | |
9a74b592 | 1784 | |
5204e13c | 1785 | netisr_replymsg(&nmsg->base, 0); |
9a74b592 SZ |
1786 | } |
1787 | ||
984263bc | 1788 | /* |
5c703385 MD |
1789 | * Mark an interface down and notify protocols of the transition. An |
1790 | * interface going down is also considered to be a synchronizing event. | |
1791 | * We must ensure that all packet processing related to the interface | |
1792 | * has completed before we return so e.g. the caller can free the ifnet | |
1793 | * structure that the mbufs may be referencing. | |
1794 | * | |
984263bc MD |
1795 | * NOTE: must be called at splnet or eqivalent. |
1796 | */ | |
1797 | void | |
f23061d4 | 1798 | if_down(struct ifnet *ifp) |
984263bc | 1799 | { |
e782981c RM |
1800 | struct netmsg_if msg; |
1801 | ||
fcddd1b6 | 1802 | EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN); |
e782981c RM |
1803 | netmsg_init(&msg.base, NULL, &curthread->td_msgport, 0, |
1804 | if_down_dispatch); | |
1805 | msg.ifp = ifp; | |
1806 | netisr_domsg(&msg.base, 0); | |
5c703385 | 1807 | netmsg_service_sync(); |
984263bc MD |
1808 | } |
1809 | ||
1810 | /* | |
1811 | * Mark an interface up and notify protocols of | |
1812 | * the transition. | |
1813 | * NOTE: must be called at splnet or eqivalent. | |
1814 | */ | |
1815 | void | |
f23061d4 | 1816 | if_up(struct ifnet *ifp) |
984263bc | 1817 | { |
e782981c RM |
1818 | struct netmsg_if msg; |
1819 | ||
1820 | netmsg_init(&msg.base, NULL, &curthread->td_msgport, 0, | |
1821 | if_up_dispatch); | |
1822 | msg.ifp = ifp; | |
1823 | netisr_domsg(&msg.base, 0); | |
fcddd1b6 | 1824 | EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP); |
984263bc MD |
1825 | } |
1826 | ||
6de83abe SZ |
1827 | /* |
1828 | * Process a link state change. | |
1829 | * NOTE: must be called at splsoftnet or equivalent. | |
1830 | */ | |
1831 | void | |
1832 | if_link_state_change(struct ifnet *ifp) | |
1833 | { | |
71fc104f HT |
1834 | int link_state = ifp->if_link_state; |
1835 | ||
6de83abe | 1836 | rt_ifmsg(ifp); |
71fc104f HT |
1837 | devctl_notify("IFNET", ifp->if_xname, |
1838 | (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL); | |
bc1a39e2 AL |
1839 | |
1840 | EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state); | |
6de83abe SZ |
1841 | } |
1842 | ||
984263bc MD |
1843 | /* |
1844 | * Handle interface watchdog timer routines. Called | |
1845 | * from softclock, we decrement timers (if set) and | |
1846 | * call the appropriate interface routine on expiration. | |
1847 | */ | |
1848 | static void | |
b5df1a85 | 1849 | if_slowtimo_dispatch(netmsg_t nmsg) |
984263bc | 1850 | { |
b5df1a85 | 1851 | struct globaldata *gd = mycpu; |
b4051e25 SZ |
1852 | const struct ifnet_array *arr; |
1853 | int i; | |
4986965b | 1854 | |
5204e13c | 1855 | ASSERT_NETISR0; |
b5df1a85 SZ |
1856 | |
1857 | crit_enter_gd(gd); | |
1858 | lwkt_replymsg(&nmsg->lmsg, 0); /* reply ASAP */ | |
1859 | crit_exit_gd(gd); | |
984263bc | 1860 | |
b4051e25 SZ |
1861 | arr = ifnet_array_get(); |
1862 | for (i = 0; i < arr->ifnet_count; ++i) { | |
1863 | struct ifnet *ifp = arr->ifnet_arr[i]; | |
1864 | ||
b5df1a85 SZ |
1865 | crit_enter_gd(gd); |
1866 | ||
6517ec3f SZ |
1867 | if (if_stats_compat) { |
1868 | IFNET_STAT_GET(ifp, ipackets, ifp->if_ipackets); | |
1869 | IFNET_STAT_GET(ifp, ierrors, ifp->if_ierrors); | |
1870 | IFNET_STAT_GET(ifp, opackets, ifp->if_opackets); | |
1871 | IFNET_STAT_GET(ifp, oerrors, ifp->if_oerrors); | |
1872 | IFNET_STAT_GET(ifp, collisions, ifp->if_collisions); | |
1873 | IFNET_STAT_GET(ifp, ibytes, ifp->if_ibytes); | |
1874 | IFNET_STAT_GET(ifp, obytes, ifp->if_obytes); | |
1875 | IFNET_STAT_GET(ifp, imcasts, ifp->if_imcasts); | |
1876 | IFNET_STAT_GET(ifp, omcasts, ifp->if_omcasts); | |
1877 | IFNET_STAT_GET(ifp, iqdrops, ifp->if_iqdrops); | |
1878 | IFNET_STAT_GET(ifp, noproto, ifp->if_noproto); | |
6de344ba | 1879 | IFNET_STAT_GET(ifp, oqdrops, ifp->if_oqdrops); |
6517ec3f SZ |
1880 | } |
1881 | ||
b5df1a85 SZ |
1882 | if (ifp->if_timer == 0 || --ifp->if_timer) { |
1883 | crit_exit_gd(gd); | |
984263bc | 1884 | continue; |
b5df1a85 | 1885 | } |
78195a76 | 1886 | if (ifp->if_watchdog) { |
a3dd34d2 | 1887 | if (ifnet_tryserialize_all(ifp)) { |
78195a76 | 1888 | (*ifp->if_watchdog)(ifp); |
a3dd34d2 | 1889 | ifnet_deserialize_all(ifp); |
78195a76 MD |
1890 | } else { |
1891 | /* try again next timeout */ | |
1892 | ++ifp->if_timer; | |
1893 | } | |
1894 | } | |
4986965b | 1895 | |
b5df1a85 SZ |
1896 | crit_exit_gd(gd); |
1897 | } | |
4986965b | 1898 | |
abbb44bb | 1899 | callout_reset(&if_slowtimo_timer, hz / IFNET_SLOWHZ, if_slowtimo, NULL); |
984263bc MD |
1900 | } |
1901 | ||
b5df1a85 SZ |
1902 | static void |
1903 | if_slowtimo(void *arg __unused) | |
1904 | { | |
1905 | struct lwkt_msg *lmsg = &if_slowtimo_netmsg.lmsg; | |
1906 | ||
1907 | KASSERT(mycpuid == 0, ("not on cpu0")); | |
1908 | crit_enter(); | |
1909 | if (lmsg->ms_flags & MSGF_DONE) | |
1910 | lwkt_sendmsg_oncpu(netisr_cpuport(0), lmsg); | |
1911 | crit_exit(); | |
1912 | } | |
1913 | ||
984263bc MD |
1914 | /* |
1915 | * Map interface name to | |
1916 | * interface structure pointer. | |
1917 | */ | |
1918 | struct ifnet * | |
1919 | ifunit(const char *name) | |
1920 | { | |
984263bc | 1921 | struct ifnet *ifp; |
984263bc | 1922 | |
984263bc | 1923 | /* |
3e4a09e7 | 1924 | * Search all the interfaces for this name/number |
984263bc | 1925 | */ |
b4051e25 | 1926 | KASSERT(mtx_owned(&ifnet_mtx), ("ifnet is not locked")); |
3e4a09e7 | 1927 | |
b4051e25 | 1928 | TAILQ_FOREACH(ifp, &ifnetlist, if_link) { |
3e4a09e7 | 1929 | if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0) |
984263bc MD |
1930 | break; |
1931 | } | |
1932 | return (ifp); | |
1933 | } | |
1934 | ||
984263bc | 1935 | struct ifnet * |
b4051e25 | 1936 | ifunit_netisr(const char *name) |
984263bc | 1937 | { |
b4051e25 SZ |
1938 | const struct ifnet_array *arr; |
1939 | int i; | |
984263bc MD |
1940 | |
1941 | /* | |
b4051e25 | 1942 | * Search all the interfaces for this name/number |
984263bc MD |
1943 | */ |
1944 | ||
b4051e25 SZ |
1945 | arr = ifnet_array_get(); |
1946 | for (i = 0; i < arr->ifnet_count; ++i) { | |
1947 | struct ifnet *ifp = arr->ifnet_arr[i]; | |
984263bc | 1948 | |
b4051e25 SZ |
1949 | if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0) |
1950 | return ifp; | |
1951 | } | |
1952 | return NULL; | |
1953 | } | |
984263bc MD |
1954 | |
1955 | /* | |
1956 | * Interface ioctls. | |
1957 | */ | |
1958 | int | |
87de5057 | 1959 | ifioctl(struct socket *so, u_long cmd, caddr_t data, struct ucred *cred) |
984263bc | 1960 | { |
41c20dac | 1961 | struct ifnet *ifp; |
233c8570 | 1962 | struct ifgroupreq *ifgr; |
41c20dac | 1963 | struct ifreq *ifr; |
984263bc | 1964 | struct ifstat *ifs; |
e612af50 | 1965 | int error, do_ifup = 0; |
984263bc MD |
1966 | short oif_flags; |
1967 | int new_flags; | |
1fdf0954 | 1968 | size_t namelen, onamelen; |
f6994c54 AHJ |
1969 | size_t descrlen; |
1970 | char *descrbuf, *odescrbuf; | |
1fdf0954 HP |
1971 | char new_name[IFNAMSIZ]; |
1972 | struct ifaddr *ifa; | |
1973 | struct sockaddr_dl *sdl; | |
984263bc MD |
1974 | |
1975 | switch (cmd) { | |
984263bc | 1976 | case SIOCGIFCONF: |
87de5057 | 1977 | return (ifconf(cmd, data, cred)); |
9683f229 MD |
1978 | default: |
1979 | break; | |
984263bc | 1980 | } |
9683f229 | 1981 | |
984263bc MD |
1982 | ifr = (struct ifreq *)data; |
1983 | ||
1984 | switch (cmd) { | |
1985 | case SIOCIFCREATE: | |
c5e14c14 | 1986 | case SIOCIFCREATE2: |
2b3f93ea MD |
1987 | error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); |
1988 | if (error) | |
c5e14c14 RP |
1989 | return (error); |
1990 | return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name), | |
bb54c3a2 | 1991 | (cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL), NULL)); |
984263bc | 1992 | case SIOCIFDESTROY: |
2b3f93ea MD |
1993 | error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); |
1994 | if (error) | |
984263bc | 1995 | return (error); |
c5e14c14 | 1996 | return (if_clone_destroy(ifr->ifr_name)); |
984263bc MD |
1997 | case SIOCIFGCLONERS: |
1998 | return (if_clone_list((struct if_clonereq *)data)); | |
233c8570 AL |
1999 | case SIOCGIFGMEMB: |
2000 | return (if_getgroupmembers((struct ifgroupreq *)data)); | |
9683f229 MD |
2001 | default: |
2002 | break; | |
984263bc MD |
2003 | } |
2004 | ||
9683f229 MD |
2005 | /* |
2006 | * Nominal ioctl through interface, lookup the ifp and obtain a | |
2007 | * lock to serialize the ifconfig ioctl operation. | |
2008 | */ | |
b4051e25 SZ |
2009 | ifnet_lock(); |
2010 | ||
984263bc | 2011 | ifp = ifunit(ifr->ifr_name); |
b4051e25 SZ |
2012 | if (ifp == NULL) { |
2013 | ifnet_unlock(); | |
984263bc | 2014 | return (ENXIO); |
b4051e25 | 2015 | } |
9683f229 | 2016 | error = 0; |
984263bc | 2017 | |
9683f229 | 2018 | switch (cmd) { |
12b71966 PA |
2019 | case SIOCGIFINDEX: |
2020 | ifr->ifr_index = ifp->if_index; | |
2021 | break; | |
2022 | ||
984263bc MD |
2023 | case SIOCGIFFLAGS: |
2024 | ifr->ifr_flags = ifp->if_flags; | |
46f25451 | 2025 | ifr->ifr_flagshigh = ifp->if_flags >> 16; |
984263bc MD |
2026 | break; |
2027 | ||
2028 | case SIOCGIFCAP: | |
2029 | ifr->ifr_reqcap = ifp->if_capabilities; | |
2030 | ifr->ifr_curcap = ifp->if_capenable; | |
2031 | break; | |
2032 | ||
2033 | case SIOCGIFMETRIC: | |
2034 | ifr->ifr_metric = ifp->if_metric; | |
2035 | break; | |
2036 | ||
2037 | case SIOCGIFMTU: | |
2038 | ifr->ifr_mtu = ifp->if_mtu; | |
2039 | break; | |
2040 | ||
e41e61d5 SZ |
2041 | case SIOCGIFTSOLEN: |
2042 | ifr->ifr_tsolen = ifp->if_tsolen; | |
2043 | break; | |
2044 | ||
315a7da3 JL |
2045 | case SIOCGIFDATA: |
2046 | error = copyout((caddr_t)&ifp->if_data, ifr->ifr_data, | |
9683f229 | 2047 | sizeof(ifp->if_data)); |
315a7da3 JL |
2048 | break; |
2049 | ||
984263bc MD |
2050 | case SIOCGIFPHYS: |
2051 | ifr->ifr_phys = ifp->if_physical; | |
2052 | break; | |
2053 | ||
1630efc5 | 2054 | case SIOCGIFPOLLCPU: |
1630efc5 | 2055 | ifr->ifr_pollcpu = -1; |
1630efc5 SZ |
2056 | break; |
2057 | ||
2058 | case SIOCSIFPOLLCPU: | |
1630efc5 SZ |
2059 | break; |
2060 | ||
f6994c54 AHJ |
2061 | case SIOCGIFDESCR: |
2062 | error = 0; | |
2063 | ifnet_lock(); | |
2064 | if (ifp->if_description == NULL) { | |
2065 | ifr->ifr_buffer.length = 0; | |
2066 | error = ENOMSG; | |
2067 | } else { | |
2068 | /* space for terminating nul */ | |
2069 | descrlen = strlen(ifp->if_description) + 1; | |
2070 | if (ifr->ifr_buffer.length < descrlen) | |
2071 | error = ENAMETOOLONG; | |
2072 | else | |
2073 | error = copyout(ifp->if_description, | |
2074 | ifr->ifr_buffer.buffer, descrlen); | |
2075 | ifr->ifr_buffer.length = descrlen; | |
2076 | } | |
2077 | ifnet_unlock(); | |
2078 | break; | |
2079 | ||
2080 | case SIOCSIFDESCR: | |
2b3f93ea | 2081 | error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); |
f6994c54 AHJ |
2082 | if (error) |
2083 | break; | |
2084 | ||
2085 | /* | |
2086 | * Copy only (length-1) bytes to make sure that | |
2087 | * if_description is always nul terminated. The | |
2088 | * length parameter is supposed to count the | |
2089 | * terminating nul in. | |
2090 | */ | |
2091 | if (ifr->ifr_buffer.length > ifdescr_maxlen) | |
2092 | return (ENAMETOOLONG); | |
2093 | else if (ifr->ifr_buffer.length == 0) | |
2094 | descrbuf = NULL; | |
2095 | else { | |
2096 | descrbuf = kmalloc(ifr->ifr_buffer.length, M_IFDESCR, | |
2097 | M_WAITOK | M_ZERO); | |
2098 | error = copyin(ifr->ifr_buffer.buffer, descrbuf, | |
2099 | ifr->ifr_buffer.length - 1); | |
2100 | if (error) { | |
2101 | kfree(descrbuf, M_IFDESCR); | |
2102 | break; | |
2103 | } | |
2104 | } | |
2105 | ||
2106 | ifnet_lock(); | |
2107 | odescrbuf = ifp->if_description; | |
2108 | ifp->if_description = descrbuf; | |
2109 | ifnet_unlock(); | |
2110 | ||
2111 | if (odescrbuf) | |
2112 | kfree(odescrbuf, M_IFDESCR); | |
2113 | ||
984263bc | 2114 | case SIOCSIFFLAGS: |
2b3f93ea | 2115 | error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); |
984263bc | 2116 | if (error) |
9683f229 | 2117 | break; |
984263bc MD |
2118 | new_flags = (ifr->ifr_flags & 0xffff) | |
2119 | (ifr->ifr_flagshigh << 16); | |
2120 | if (ifp->if_flags & IFF_SMART) { | |
2121 | /* Smart drivers twiddle their own routes */ | |
2122 | } else if (ifp->if_flags & IFF_UP && | |
2123 | (new_flags & IFF_UP) == 0) { | |
984263bc | 2124 | if_down(ifp); |
984263bc MD |
2125 | } else if (new_flags & IFF_UP && |
2126 | (ifp->if_flags & IFF_UP) == 0) { | |
e612af50 | 2127 | do_ifup = 1; |
984263bc | 2128 | } |
9c095379 | 2129 | |
b3a7093f SZ |
2130 | #ifdef IFPOLL_ENABLE |
2131 | if ((new_flags ^ ifp->if_flags) & IFF_NPOLLING) { | |
2132 | if (new_flags & IFF_NPOLLING) | |
2133 | ifpoll_register(ifp); | |
2134 | else | |
2135 | ifpoll_deregister(ifp); | |
2136 | } | |
2137 | #endif | |
9c095379 | 2138 | |
984263bc MD |
2139 | ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) | |
2140 | (new_flags &~ IFF_CANTCHANGE); | |
984263bc MD |
2141 | if (new_flags & IFF_PPROMISC) { |
2142 | /* Permanently promiscuous mode requested */ | |
2143 | ifp->if_flags |= IFF_PROMISC; | |
2144 | } else if (ifp->if_pcount == 0) { | |
2145 | ifp->if_flags &= ~IFF_PROMISC; | |
2146 | } | |
78195a76 | 2147 | if (ifp->if_ioctl) { |
a3dd34d2 | 2148 | ifnet_serialize_all(ifp); |
87de5057 | 2149 | ifp->if_ioctl(ifp, cmd, data, cred); |
a3dd34d2 | 2150 | ifnet_deserialize_all(ifp); |
78195a76 | 2151 | } |
e612af50 SZ |
2152 | if (do_ifup) |
2153 | if_up(ifp); | |
984263bc MD |
2154 | getmicrotime(&ifp->if_lastchange); |
2155 | break; | |
2156 | ||
2157 | case SIOCSIFCAP: | |
2b3f93ea | 2158 | error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); |
984263bc | 2159 | if (error) |
9683f229 MD |
2160 | break; |
2161 | if (ifr->ifr_reqcap & ~ifp->if_capabilities) { | |
2162 | error = EINVAL; | |
2163 | break; | |
2164 | } | |
a3dd34d2 | 2165 | ifnet_serialize_all(ifp); |
87de5057 | 2166 | ifp->if_ioctl(ifp, cmd, data, cred); |
a3dd34d2 | 2167 | ifnet_deserialize_all(ifp); |
984263bc MD |
2168 | break; |
2169 | ||
f23061d4 | 2170 | case SIOCSIFNAME: |
2b3f93ea | 2171 | error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); |
9683f229 MD |
2172 | if (error) |
2173 | break; | |
f23061d4 | 2174 | error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL); |
9683f229 MD |
2175 | if (error) |
2176 | break; | |
2177 | if (new_name[0] == '\0') { | |
2178 | error = EINVAL; | |
2179 | break; | |
2180 | } | |
2181 | if (ifunit(new_name) != NULL) { | |
2182 | error = EEXIST; | |
2183 | break; | |
2184 | } | |
f2bd8b67 JS |
2185 | |
2186 | EVENTHANDLER_INVOKE(ifnet_detach_event, ifp); | |
f23061d4 JH |
2187 | |
2188 | /* Announce the departure of the interface. */ | |
2189 | rt_ifannouncemsg(ifp, IFAN_DEPARTURE); | |
2190 | ||
2191 | strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname)); | |
b2632176 | 2192 | ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa; |
f23061d4 JH |
2193 | sdl = (struct sockaddr_dl *)ifa->ifa_addr; |
2194 | namelen = strlen(new_name); | |
2195 | onamelen = sdl->sdl_nlen; | |
2196 | /* | |
2197 | * Move the address if needed. This is safe because we | |
2198 | * allocate space for a name of length IFNAMSIZ when we | |
2199 | * create this in if_attach(). | |
2200 | */ | |
2201 | if (namelen != onamelen) { | |
2202 | bcopy(sdl->sdl_data + onamelen, | |
2203 | sdl->sdl_data + namelen, sdl->sdl_alen); | |
2204 | } | |
2205 | bcopy(new_name, sdl->sdl_data, namelen); | |
2206 | sdl->sdl_nlen = namelen; | |
2207 | sdl = (struct sockaddr_dl *)ifa->ifa_netmask; | |
2208 | bzero(sdl->sdl_data, onamelen); | |
2209 | while (namelen != 0) | |
2210 | sdl->sdl_data[--namelen] = 0xff; | |
f2bd8b67 JS |
2211 | |
2212 | EVENTHANDLER_INVOKE(ifnet_attach_event, ifp); | |
f23061d4 JH |
2213 | |
2214 | /* Announce the return of the interface. */ | |
2215 | rt_ifannouncemsg(ifp, IFAN_ARRIVAL); | |
2216 | break; | |
1fdf0954 | 2217 | |
984263bc | 2218 | case SIOCSIFMETRIC: |
2b3f93ea | 2219 | error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); |
984263bc | 2220 | if (error) |
9683f229 | 2221 | break; |
984263bc MD |
2222 | ifp->if_metric = ifr->ifr_metric; |
2223 | getmicrotime(&ifp->if_lastchange); | |
2224 | break; | |
2225 | ||
2226 | case SIOCSIFPHYS: | |
2b3f93ea | 2227 | error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); |
984263bc | 2228 | if (error) |
9683f229 MD |
2229 | break; |
2230 | if (ifp->if_ioctl == NULL) { | |
2231 | error = EOPNOTSUPP; | |
2232 | break; | |
2233 | } | |
a3dd34d2 | 2234 | ifnet_serialize_all(ifp); |
87de5057 | 2235 | error = ifp->if_ioctl(ifp, cmd, data, cred); |
a3dd34d2 | 2236 | ifnet_deserialize_all(ifp); |
984263bc MD |
2237 | if (error == 0) |
2238 | getmicrotime(&ifp->if_lastchange); | |
9683f229 | 2239 | break; |
984263bc MD |
2240 | |
2241 | case SIOCSIFMTU: | |
2242 | { | |
2243 | u_long oldmtu = ifp->if_mtu; | |
2244 | ||
2b3f93ea | 2245 | error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); |
984263bc | 2246 | if (error) |
9683f229 MD |
2247 | break; |
2248 | if (ifp->if_ioctl == NULL) { | |
2249 | error = EOPNOTSUPP; | |
2250 | break; | |
2251 | } | |
2252 | if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) { | |
2253 | error = EINVAL; | |
2254 | break; | |
2255 | } | |
a3dd34d2 | 2256 | ifnet_serialize_all(ifp); |
87de5057 | 2257 | error = ifp->if_ioctl(ifp, cmd, data, cred); |
a3dd34d2 | 2258 | ifnet_deserialize_all(ifp); |
984263bc MD |
2259 | if (error == 0) { |
2260 | getmicrotime(&ifp->if_lastchange); | |
2261 | rt_ifmsg(ifp); | |
2262 | } | |
2263 | /* | |
2264 | * If the link MTU changed, do network layer specific procedure. | |
2265 | */ | |
2266 | if (ifp->if_mtu != oldmtu) { | |
2267 | #ifdef INET6 | |
2268 | nd6_setmtu(ifp); | |
2269 | #endif | |
2270 | } | |
9683f229 | 2271 | break; |
984263bc MD |
2272 | } |
2273 | ||
e41e61d5 | 2274 | case SIOCSIFTSOLEN: |
2b3f93ea | 2275 | error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); |
e41e61d5 SZ |
2276 | if (error) |
2277 | break; | |
2278 | ||
2279 | /* XXX need driver supplied upper limit */ | |
2280 | if (ifr->ifr_tsolen <= 0) { | |
2281 | error = EINVAL; | |
2282 | break; | |
2283 | } | |
2284 | ifp->if_tsolen = ifr->ifr_tsolen; | |
2285 | break; | |
2286 | ||
984263bc MD |
2287 | case SIOCADDMULTI: |
2288 | case SIOCDELMULTI: | |
2b3f93ea | 2289 | error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); |
984263bc | 2290 | if (error) |
9683f229 | 2291 | break; |
984263bc MD |
2292 | |
2293 | /* Don't allow group membership on non-multicast interfaces. */ | |
9683f229 MD |
2294 | if ((ifp->if_flags & IFF_MULTICAST) == 0) { |
2295 | error = EOPNOTSUPP; | |
2296 | break; | |
2297 | } | |
984263bc MD |
2298 | |
2299 | /* Don't let users screw up protocols' entries. */ | |
9683f229 MD |
2300 | if (ifr->ifr_addr.sa_family != AF_LINK) { |
2301 | error = EINVAL; | |
2302 | break; | |
2303 | } | |
984263bc MD |
2304 | |
2305 | if (cmd == SIOCADDMULTI) { | |
2306 | struct ifmultiaddr *ifma; | |
2307 | error = if_addmulti(ifp, &ifr->ifr_addr, &ifma); | |
2308 | } else { | |
2309 | error = if_delmulti(ifp, &ifr->ifr_addr); | |
2310 | } | |
2311 | if (error == 0) | |
2312 | getmicrotime(&ifp->if_lastchange); | |
9683f229 | 2313 | break; |
984263bc MD |
2314 | |
2315 | case SIOCSIFPHYADDR: | |
2316 | case SIOCDIFPHYADDR: | |
2317 | #ifdef INET6 | |
2318 | case SIOCSIFPHYADDR_IN6: | |
2319 | #endif | |
2320 | case SIOCSLIFPHYADDR: | |
233c8570 | 2321 | case SIOCSIFMEDIA: |
984263bc | 2322 | case SIOCSIFGENERIC: |
2b3f93ea | 2323 | error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); |
984263bc | 2324 | if (error) |
9683f229 | 2325 | break; |
baf84f0a | 2326 | if (ifp->if_ioctl == NULL) { |
9683f229 MD |
2327 | error = EOPNOTSUPP; |
2328 | break; | |
2329 | } | |
a3dd34d2 | 2330 | ifnet_serialize_all(ifp); |
87de5057 | 2331 | error = ifp->if_ioctl(ifp, cmd, data, cred); |
a3dd34d2 | 2332 | ifnet_deserialize_all(ifp); |
984263bc MD |
2333 | if (error == 0) |
2334 | getmicrotime(&ifp->if_lastchange); | |
9683f229 | 2335 | break; |
984263bc MD |
2336 | |
2337 | case SIOCGIFSTATUS: | |
2338 | ifs = (struct ifstat *)data; | |
2339 | ifs->ascii[0] = '\0'; | |
9683f229 | 2340 | /* fall through */ |
984263bc MD |
2341 | case SIOCGIFPSRCADDR: |
2342 | case SIOCGIFPDSTADDR: | |
2343 | case SIOCGLIFPHYADDR: | |
2344 | case SIOCGIFMEDIA: | |
1e1c5fac | 2345 | case SIOCGIFXMEDIA: |
984263bc | 2346 | case SIOCGIFGENERIC: |
9683f229 MD |
2347 | if (ifp->if_ioctl == NULL) { |
2348 | error = EOPNOTSUPP; | |
2349 | break; | |
2350 | } | |
a3dd34d2 | 2351 | ifnet_serialize_all(ifp); |
87de5057 | 2352 | error = ifp->if_ioctl(ifp, cmd, data, cred); |
a3dd34d2 | 2353 | ifnet_deserialize_all(ifp); |
9683f229 | 2354 | break; |
984263bc MD |
2355 | |
2356 | case SIOCSIFLLADDR: | |
2b3f93ea | 2357 | error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); |
984263bc | 2358 | if (error) |
9683f229 MD |
2359 | break; |
2360 | error = if_setlladdr(ifp, ifr->ifr_addr.sa_data, | |
2361 | ifr->ifr_addr.sa_len); | |
19f10c78 | 2362 | EVENTHANDLER_INVOKE(iflladdr_event, ifp); |
9683f229 | 2363 | break; |
984263bc | 2364 | |
233c8570 AL |
2365 | case SIOCAIFGROUP: |
2366 | ifgr = (struct ifgroupreq *)ifr; | |
2b3f93ea MD |
2367 | error = caps_priv_check(cred, SYSCAP_NONET_IFCONFIG); |
2368 | if (error) | |
233c8570 AL |
2369 | return (error); |
2370 | if ((error = if_addgroup(ifp, ifgr->ifgr_group))) | |
2371 | return (error); | |
2372 | break; | |
2373 | ||
2374 | case SIOCDIFGROUP: | |
2375 | ifgr = (struct ifgroupreq *)ifr; | |
2b3f93ea MD |
2376 | error = caps_priv_check(cred, SYSCAP_NONET_IFCONFIG); |
2377 | if (error) | |
233c8570 AL |
2378 | return (error); |
2379 | if ((error = if_delgroup(ifp, ifgr->ifgr_group))) | |
2380 | return (error); | |
2381 | break; | |
2382 | ||
2383 | case SIOCGIFGROUP: | |
2384 | ifgr = (struct ifgroupreq *)ifr; | |
2385 | if ((error = if_getgroups(ifgr, ifp))) | |
2386 | return (error); | |
2387 | break; | |
2388 | ||
984263bc MD |
2389 | default: |
2390 | oif_flags = ifp->if_flags; | |
9683f229 MD |
2391 | if (so->so_proto == 0) { |
2392 | error = EOPNOTSUPP; | |
2393 | break; | |
2394 | } | |
002c1265 MD |
2395 | error = so_pru_control_direct(so, cmd, data, ifp); |
2396 | ||
baf84f0a AL |
2397 | /* |
2398 | * If the socket control method returns EOPNOTSUPP, pass the | |
2399 | * request directly to the interface. | |
2400 | * | |
2401 | * Exclude the SIOCSIF{ADDR,BRDADDR,DSTADDR,NETMASK} ioctls, | |
2402 | * because drivers may trust these ioctls to come from an | |
2403 | * already privileged layer and thus do not perform credentials | |
2404 | * checks or input validation. | |
2405 | */ | |
2406 | if (error == EOPNOTSUPP && | |
2407 | ifp->if_ioctl != NULL && | |
2408 | cmd != SIOCSIFADDR && | |
2409 | cmd != SIOCSIFBRDADDR && | |
2410 | cmd != SIOCSIFDSTADDR && | |
2411 | cmd != SIOCSIFNETMASK) { | |
2412 | ifnet_serialize_all(ifp); | |
2413 | error = ifp->if_ioctl(ifp, cmd, data, cred); | |
2414 | ifnet_deserialize_all(ifp); | |
2415 | } | |
2416 | ||
984263bc MD |
2417 | if ((oif_flags ^ ifp->if_flags) & IFF_UP) { |
2418 | #ifdef INET6 | |
2419 | DELAY(100);/* XXX: temporary workaround for fxp issue*/ | |
2420 | if (ifp->if_flags & IFF_UP) { | |
4986965b | 2421 | crit_enter(); |
984263bc | 2422 | in6_if_up(ifp); |
4986965b | 2423 | crit_exit(); |
984263bc MD |
2424 | } |
2425 | #endif | |
2426 | } | |
9683f229 | 2427 | break; |
984263bc | 2428 | } |
9683f229 | 2429 | |
b4051e25 | 2430 | ifnet_unlock(); |
9683f229 | 2431 | return (error); |
984263bc MD |
2432 | } |
2433 | ||
2434 | /* | |
2435 | * Set/clear promiscuous mode on interface ifp based on the truth value | |
2436 | * of pswitch. The calls are reference counted so that only the first | |
2437 | * "on" request actually has an effect, as does the final "off" request. | |
2438 | * Results are undefined if the "off" and "on" requests are not matched. | |
2439 | */ | |
2440 | int | |
f23061d4 | 2441 | ifpromisc(struct ifnet *ifp, int pswitch) |
984263bc MD |
2442 | { |
2443 | struct ifreq ifr; | |
2444 | int error; | |
2445 | int oldflags; | |
2446 | ||
2447 | oldflags = ifp->if_flags; | |
46f25451 | 2448 | if (ifp->if_flags & IFF_PPROMISC) { |
984263bc MD |
2449 | /* Do nothing if device is in permanently promiscuous mode */ |
2450 | ifp->if_pcount += pswitch ? 1 : -1; | |
2451 | return (0); | |
2452 | } | |
2453 | if (pswitch) { | |
2454 | /* | |
2455 | * If the device is not configured up, we cannot put it in | |
2456 | * promiscuous mode. | |
2457 | */ | |
2458 | if ((ifp->if_flags & IFF_UP) == 0) | |
2459 | return (ENETDOWN); | |
2460 | if (ifp->if_pcount++ != 0) | |
2461 | return (0); | |
2462 | ifp->if_flags |= IFF_PROMISC; | |
3e4a09e7 MD |
2463 | log(LOG_INFO, "%s: promiscuous mode enabled\n", |
2464 | ifp->if_xname); | |
984263bc MD |
2465 | } else { |
2466 | if (--ifp->if_pcount > 0) | |
2467 | return (0); | |
2468 | ifp->if_flags &= ~IFF_PROMISC; | |
3e4a09e7 MD |
2469 | log(LOG_INFO, "%s: promiscuous mode disabled\n", |
2470 | ifp->if_xname); | |
984263bc MD |
2471 | } |
2472 | ifr.ifr_flags = ifp->if_flags; | |
46f25451 | 2473 | ifr.ifr_flagshigh = ifp->if_flags >> 16; |
a3dd34d2 SZ |
2474 | ifnet_serialize_all(ifp); |
2475 | error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, NULL); | |
2476 | ifnet_deserialize_all(ifp); | |
984263bc MD |
2477 | if (error == 0) |
2478 | rt_ifmsg(ifp); | |
2479 | else | |
2480 | ifp->if_flags = oldflags; | |
2481 | return error; | |
2482 | } | |
2483 | ||
2484 | /* | |
2485 | * Return interface configuration | |
2486 | * of system. List may be used | |
2487 | * in later ioctl's (above) to get | |
2488 | * other information. | |
2489 | */ | |
984263bc | 2490 | static int |
87de5057 | 2491 | ifconf(u_long cmd, caddr_t data, struct ucred *cred) |
984263bc | 2492 | { |
41c20dac MD |
2493 | struct ifconf *ifc = (struct ifconf *)data; |
2494 | struct ifnet *ifp; | |
984263bc MD |
2495 | struct sockaddr *sa; |
2496 | struct ifreq ifr, *ifrp; | |
2497 | int space = ifc->ifc_len, error = 0; | |
2498 | ||
2499 | ifrp = ifc->ifc_req; | |
b4051e25 SZ |
2500 | |
2501 | ifnet_lock(); | |
2502 | TAILQ_FOREACH(ifp, &ifnetlist, if_link) { | |
9a74b592 SZ |
2503 | struct ifaddr_container *ifac, *ifac_mark; |
2504 | struct ifaddr_marker mark; | |
2505 | struct ifaddrhead *head; | |
3e4a09e7 | 2506 | int addrs; |
984263bc | 2507 | |
f23061d4 | 2508 | if (space <= sizeof ifr) |
984263bc | 2509 | break; |
623c059e JS |
2510 | |
2511 | /* | |
95f018e8 MD |
2512 | * Zero the stack declared structure first to prevent |
2513 | * memory disclosure. | |
623c059e | 2514 | */ |
95f018e8 | 2515 | bzero(&ifr, sizeof(ifr)); |
3e4a09e7 MD |
2516 | if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name)) |
2517 | >= sizeof(ifr.ifr_name)) { | |
984263bc MD |
2518 | error = ENAMETOOLONG; |
2519 | break; | |
984263bc MD |
2520 | } |
2521 | ||
9a74b592 SZ |
2522 | /* |
2523 | * Add a marker, since copyout() could block and during that | |
2524 | * period the list could be changed. Inserting the marker to | |
2525 | * the header of the list will not cause trouble for the code | |
2526 | * assuming that the first element of the list is AF_LINK; the | |
2527 | * marker will be moved to the next position w/o blocking. | |
2528 | */ | |
2529 | ifa_marker_init(&mark, ifp); | |
2530 | ifac_mark = &mark.ifac; | |
2531 | head = &ifp->if_addrheads[mycpuid]; | |
2532 | ||
984263bc | 2533 | addrs = 0; |
9a74b592 SZ |
2534 | TAILQ_INSERT_HEAD(head, ifac_mark, ifa_link); |
2535 | while ((ifac = TAILQ_NEXT(ifac_mark, ifa_link)) != NULL) { | |
b2632176 SZ |
2536 | struct ifaddr *ifa = ifac->ifa; |
2537 | ||
9a74b592 SZ |
2538 | TAILQ_REMOVE(head, ifac_mark, ifa_link); |
2539 | TAILQ_INSERT_AFTER(head, ifac, ifac_mark, ifa_link); | |
2540 | ||
2541 | /* Ignore marker */ | |
2542 | if (ifa->ifa_addr->sa_family == AF_UNSPEC) | |
2543 | continue; | |
2544 | ||
f23061d4 | 2545 | if (space <= sizeof ifr) |
984263bc MD |
2546 | break; |
2547 | sa = ifa->ifa_addr; | |
2ea2781e | 2548 | if (cred->cr_prison && prison_if(cred, sa)) |
984263bc MD |
2549 | continue; |
2550 | addrs++; | |
9a74b592 SZ |
2551 | /* |
2552 | * Keep a reference on this ifaddr, so that it will | |
2553 | * not be destroyed when its address is copied to | |
2554 | * the userland, which could block. | |
2555 | */ | |
2556 | IFAREF(ifa); | |
984263bc MD |
2557 | if (sa->sa_len <= sizeof(*sa)) { |
2558 | ifr.ifr_addr = *sa; | |
f23061d4 | 2559 | error = copyout(&ifr, ifrp, sizeof ifr); |
984263bc MD |
2560 | ifrp++; |
2561 | } else { | |
f23061d4 | 2562 | if (space < (sizeof ifr) + sa->sa_len - |
9a74b592 SZ |
2563 | sizeof(*sa)) { |
2564 | IFAFREE(ifa); | |
984263bc | 2565 | break; |
9a74b592 | 2566 | } |
984263bc | 2567 | space -= sa->sa_len - sizeof(*sa); |
f23061d4 JH |
2568 | error = copyout(&ifr, ifrp, |
2569 | sizeof ifr.ifr_name); | |
984263bc | 2570 | if (error == 0) |
f23061d4 JH |
2571 | error = copyout(sa, &ifrp->ifr_addr, |
2572 | sa->sa_len); | |
984263bc MD |
2573 | ifrp = (struct ifreq *) |
2574 | (sa->sa_len + (caddr_t)&ifrp->ifr_addr); | |
2575 | } | |
9a74b592 | 2576 | IFAFREE(ifa); |
984263bc MD |
2577 | if (error) |
2578 | break; | |
f23061d4 | 2579 | space -= sizeof ifr; |
984263bc | 2580 | } |
9a74b592 | 2581 | TAILQ_REMOVE(head, ifac_mark, ifa_link); |
984263bc MD |
2582 | if (error) |
2583 | break; | |
2584 | if (!addrs) { | |
f23061d4 JH |
2585 | bzero(&ifr.ifr_addr, sizeof ifr.ifr_addr); |
2586 | error = copyout(&ifr, ifrp, sizeof ifr); | |
984263bc MD |
2587 | if (error) |
2588 | break; | |
f23061d4 | 2589 | space -= sizeof ifr; |
984263bc MD |
2590 | ifrp++; |
2591 | } | |
2592 | } | |
b4051e25 SZ |
2593 | ifnet_unlock(); |
2594 | ||
984263bc MD |
2595 | ifc->ifc_len -= space; |
2596 | return (error); | |
2597 | } | |
2598 | ||
2599 | /* | |
2600 | * Just like if_promisc(), but for all-multicast-reception mode. | |
2601 | */ | |
2602 | int | |
f23061d4 | 2603 | if_allmulti(struct ifnet *ifp, int onswitch) |
984263bc MD |
2604 | { |
2605 | int error = 0; | |
984263bc MD |
2606 | struct ifreq ifr; |
2607 | ||
4986965b JS |
2608 | crit_enter(); |
2609 | ||
984263bc MD |
2610 | if (onswitch) { |
2611 | if (ifp->if_amcount++ == 0) { | |
2612 | ifp->if_flags |= IFF_ALLMULTI; | |
2613 | ifr.ifr_flags = ifp->if_flags; | |
46f25451 | 2614 | ifr.ifr_flagshigh = ifp->if_flags >> 16; |
a3dd34d2 | 2615 | ifnet_serialize_all(ifp); |
bd4539cc | 2616 | error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, |
2038fb68 | 2617 | NULL); |
a3dd34d2 | 2618 | ifnet_deserialize_all(ifp); |
984263bc MD |
2619 | } |
2620 | } else { | |
2621 | if (ifp->if_amcount > 1) { | |
2622 | ifp->if_amcount--; | |
2623 | } else { | |
2624 | ifp->if_amcount = 0; | |
2625 | ifp->if_flags &= ~IFF_ALLMULTI; | |
2626 | ifr.ifr_flags = ifp->if_flags; | |
46f25451 | 2627 | ifr.ifr_flagshigh = ifp->if_flags >> 16; |
a3dd34d2 | 2628 | ifnet_serialize_all(ifp); |
bd4539cc | 2629 | error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, |
2038fb68 | 2630 | NULL); |
a3dd34d2 | 2631 | ifnet_deserialize_all(ifp); |
984263bc MD |
2632 | } |
2633 | } | |
4986965b JS |
2634 | |
2635 | crit_exit(); | |
984263bc MD |
2636 | |
2637 | if (error == 0) | |
2638 | rt_ifmsg(ifp); | |
2639 | return error; | |
2640 | } | |
2641 | ||
2642 | /* | |
2643 | * Add a multicast listenership to the interface in question. | |
2644 | * The link layer provides a routine which converts | |
2645 | */ | |
2646 | int | |
72659ed0 SZ |
2647 | if_addmulti_serialized(struct ifnet *ifp, struct sockaddr *sa, |
2648 | struct ifmultiaddr **retifma) | |
984263bc MD |
2649 | { |
2650 | struct sockaddr *llsa, *dupsa; | |
4986965b | 2651 | int error; |
984263bc MD |
2652 | struct ifmultiaddr *ifma; |
2653 | ||
72659ed0 SZ |
2654 | ASSERT_IFNET_SERIALIZED_ALL(ifp); |
2655 | ||
984263bc MD |
2656 | /* |
2657 | * If the matching multicast address already exists | |
2658 | * then don't add a new one, just add a reference | |
2659 | */ | |
441d34b2 | 2660 | TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { |
0c3c561c | 2661 | if (sa_equal(sa, ifma->ifma_addr)) { |
984263bc MD |
2662 | ifma->ifma_refcount++; |
2663 | if (retifma) | |
2664 | *retifma = ifma; | |
2665 | return 0; | |
2666 | } | |
2667 | } | |
2668 | ||
2669 | /* | |
2670 | * Give the link layer a chance to accept/reject it, and also | |
2671 | * find out which AF_LINK address this maps to, if it isn't one | |
2672 | * already. | |
2673 | */ | |
2674 | if (ifp->if_resolvemulti) { | |
2675 | error = ifp->if_resolvemulti(ifp, &llsa, sa); | |
72659ed0 | 2676 | if (error) |
78195a76 | 2677 | return error; |
984263bc | 2678 | } else { |
4090d6ff | 2679 | llsa = NULL; |
984263bc MD |
2680 | } |
2681 | ||
c1e12ca9 SZ |
2682 | ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_INTWAIT); |
2683 | dupsa = kmalloc(sa->sa_len, M_IFMADDR, M_INTWAIT); | |
984263bc MD |
2684 | bcopy(sa, dupsa, sa->sa_len); |
2685 | ||
2686 | ifma->ifma_addr = dupsa; | |
2687 | ifma->ifma_lladdr = llsa; | |
2688 | ifma->ifma_ifp = ifp; | |
2689 | ifma->ifma_refcount = 1; | |
e333f801 | 2690 | ifma->ifma_protospec = NULL; |
984263bc MD |
2691 | rt_newmaddrmsg(RTM_NEWMADDR, ifma); |
2692 | ||
441d34b2 | 2693 | TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); |
6cd0715f RP |
2694 | if (retifma) |
2695 | *retifma = ifma; | |
984263bc | 2696 | |
4090d6ff | 2697 | if (llsa != NULL) { |
441d34b2 | 2698 | TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { |
0c3c561c | 2699 | if (sa_equal(ifma->ifma_addr, llsa)) |
984263bc MD |
2700 | break; |
2701 | } | |
2702 | if (ifma) { | |
2703 | ifma->ifma_refcount++; | |
2704 | } else { | |
c1e12ca9 SZ |
2705 | ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_INTWAIT); |
2706 | dupsa = kmalloc(llsa->sa_len, M_IFMADDR, M_INTWAIT); | |
984263bc MD |
2707 | bcopy(llsa, dupsa, llsa->sa_len); |
2708 | ifma->ifma_addr = dupsa; | |
2709 | ifma->ifma_ifp = ifp; | |
2710 | ifma->ifma_refcount = 1; | |
441d34b2 | 2711 | TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); |
984263bc MD |
2712 | } |
2713 | } | |
2714 | /* | |
2715 | * We are certain we have added something, so call down to the | |
2716 | * interface to let them know about it. | |
2717 | */ | |
6cd0715f RP |
2718 | if (ifp->if_ioctl) |
2719 | ifp->if_ioctl(ifp, SIOCADDMULTI, 0, NULL); | |
984263bc MD |
2720 | |
2721 | return 0; | |
2722 | } | |
2723 | ||
72659ed0 SZ |
2724 | int |
2725 | if_addmulti(struct ifnet *ifp, struct sockaddr *sa, | |
2726 | struct ifmultiaddr **retifma) | |
2727 | { | |
2728 | int error; | |
2729 | ||
2730 | ifnet_serialize_all(ifp); | |
2731 | error = if_addmulti_serialized(ifp, sa, retifma); | |
2732 | ifnet_deserialize_all(ifp); | |
2733 | ||
2734 | return error; | |
2735 | } | |
2736 | ||
984263bc MD |
2737 | /* |
2738 | * Remove a reference to a multicast address on this interface. Yell | |
2739 | * if the request does not match an existing membership. | |
2740 | */ | |
72659ed0 SZ |
2741 | static int |
2742 | if_delmulti_serialized(struct ifnet *ifp, struct sockaddr *sa) | |
984263bc MD |
2743 | { |
2744 | struct ifmultiaddr *ifma; | |
984263bc | 2745 | |
72659ed0 SZ |
2746 | ASSERT_IFNET_SERIALIZED_ALL(ifp); |
2747 | ||
441d34b2 | 2748 | TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) |
0c3c561c | 2749 | if (sa_equal(sa, ifma->ifma_addr)) |
984263bc | 2750 | break; |
4090d6ff | 2751 | if (ifma == NULL) |
984263bc MD |
2752 | return ENOENT; |
2753 | ||
2754 | if (ifma->ifma_refcount > 1) { | |
2755 | ifma->ifma_refcount--; | |
2756 | return 0; | |
2757 | } | |
2758 | ||
2759 | rt_newmaddrmsg(RTM_DELMADDR, ifma); | |
2760 | sa = ifma->ifma_lladdr; | |
441d34b2 | 2761 | TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); |
984263bc MD |
2762 | /* |
2763 | * Make sure the interface driver is notified | |
2764 | * in the case of a link layer mcast group being left. | |
2765 | */ | |
72659ed0 | 2766 | if (ifma->ifma_addr->sa_family == AF_LINK && sa == NULL) |
2038fb68 | 2767 | ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL); |
efda3bd0 MD |
2768 | kfree(ifma->ifma_addr, M_IFMADDR); |
2769 | kfree(ifma, M_IFMADDR); | |
4090d6ff | 2770 | if (sa == NULL) |
984263bc MD |
2771 | return 0; |
2772 | ||
2773 | /* | |
2774 | * Now look for the link-layer address which corresponds to | |
2775 | * this network address. It had been squirreled away in | |
2776 | * ifma->ifma_lladdr for this purpose (so we don't have | |
2777 | * to call ifp->if_resolvemulti() again), and we saved that | |
2778 | * value in sa above. If some nasty deleted the | |
2779 | * link-layer address out from underneath us, we can deal because | |
2780 | * the address we stored was is not the same as the one which was | |
2781 | * in the record for the link-layer address. (So we don't complain | |
2782 | * in that case.) | |
2783 | */ | |
441d34b2 | 2784 | TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) |
0c3c561c | 2785 | if (sa_equal(sa, ifma->ifma_addr)) |
984263bc | 2786 | break; |
4090d6ff | 2787 | if (ifma == NULL) |
984263bc MD |
2788 | return 0; |
2789 | ||
2790 | if (ifma->ifma_refcount > 1) { | |
2791 | ifma->ifma_refcount--; | |
2792 | return 0; | |
2793 | } | |
2794 | ||
441d34b2 | 2795 | TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); |
2038fb68 | 2796 | ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL); |
efda3bd0 MD |
2797 | kfree(ifma->ifma_addr, M_IFMADDR); |
2798 | kfree(sa, M_IFMADDR); | |
2799 | kfree(ifma, M_IFMADDR); | |
984263bc MD |
2800 | |
2801 | return 0; | |
2802 | } | |
2803 | ||
72659ed0 SZ |
2804 | int |
2805 | if_delmulti(struct ifnet *ifp, struct sockaddr *sa) | |
2806 | { | |
2807 | int error; | |
2808 | ||
2809 | ifnet_serialize_all(ifp); | |
2810 | error = if_delmulti_serialized(ifp, sa); | |
2811 | ifnet_deserialize_all(ifp); | |
2812 | ||
2813 | return error; | |
2814 | } | |
2815 | ||
3976c93a RP |
2816 | /* |
2817 | * Delete all multicast group membership for an interface. | |
2818 | * Should be used to quickly flush all multicast filters. | |
2819 | */ | |
2820 | void | |
72659ed0 | 2821 | if_delallmulti_serialized(struct ifnet *ifp) |
3976c93a | 2822 | { |
72659ed0 SZ |
2823 | struct ifmultiaddr *ifma, mark; |
2824 | struct sockaddr sa; | |
2825 | ||
2826 | ASSERT_IFNET_SERIALIZED_ALL(ifp); | |
2827 | ||
2828 | bzero(&sa, sizeof(sa)); | |
2829 | sa.sa_family = AF_UNSPEC; | |
2830 | sa.sa_len = sizeof(sa); | |
2831 | ||
2832 | bzero(&mark, sizeof(mark)); | |
2833 | mark.ifma_addr = &sa; | |
3976c93a | 2834 | |
72659ed0 | 2835 | TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, &mark, ifma_link); |
72659ed0 SZ |
2836 | while ((ifma = TAILQ_NEXT(&mark, ifma_link)) != NULL) { |
2837 | TAILQ_REMOVE(&ifp->if_multiaddrs, &mark, ifma_link); | |
2838 | TAILQ_INSERT_AFTER(&ifp->if_multiaddrs, ifma, &mark, | |
2839 | ifma_link); | |
2840 | ||
2841 | if (ifma->ifma_addr->sa_family == AF_UNSPEC) | |
2842 | continue; | |
2843 | ||
2844 | if_delmulti_serialized(ifp, ifma->ifma_addr); | |
2845 | } | |
89d620aa | 2846 | TAILQ_REMOVE(&ifp->if_multiaddrs, &mark, ifma_link); |
3976c93a RP |
2847 | } |
2848 | ||
2849 | ||
984263bc MD |
2850 | /* |
2851 | * Set the link layer address on an interface. | |
2852 | * | |
2853 | * At this time we only support certain types of interfaces, | |
2854 | * and we don't allow the length of the address to change. | |
2855 | */ | |
2856 | int | |
2857 | if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len) | |
2858 | { | |
2859 | struct sockaddr_dl *sdl; | |
984263bc MD |
2860 | struct ifreq ifr; |
2861 | ||
f2682cb9 | 2862 | sdl = IF_LLSOCKADDR(ifp); |
984263bc MD |
2863 | if (sdl == NULL) |
2864 | return (EINVAL); | |
2865 | if (len != sdl->sdl_alen) /* don't allow length to change */ | |
2866 | return (EINVAL); | |
2867 | switch (ifp->if_type) { | |
2868 | case IFT_ETHER: /* these types use struct arpcom */ | |
984263bc | 2869 | case IFT_XETHER: |
984263bc | 2870 | case IFT_L2VLAN: |
50b1e235 | 2871 | case IFT_IEEE8023ADLAG: |
984263bc | 2872 | bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len); |
984263bc MD |
2873 | bcopy(lladdr, LLADDR(sdl), len); |
2874 | break; | |
2875 | default: | |
2876 | return (ENODEV); | |
2877 | } | |
2878 | /* | |
2879 | * If the interface is already up, we need | |
2880 | * to re-init it in order to reprogram its | |
2881 | * address filter. | |
2882 | */ | |
a3dd34d2 | 2883 | ifnet_serialize_all(ifp); |
984263bc | 2884 | if ((ifp->if_flags & IFF_UP) != 0) { |
c97d9b76 | 2885 | #ifdef INET |
b2632176 | 2886 | struct ifaddr_container *ifac; |
c97d9b76 | 2887 | #endif |
b2632176 | 2888 | |
984263bc MD |
2889 | ifp->if_flags &= ~IFF_UP; |
2890 | ifr.ifr_flags = ifp->if_flags; | |
46f25451 | 2891 | ifr.ifr_flagshigh = ifp->if_flags >> 16; |
78195a76 | 2892 | ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, |
2038fb68 | 2893 | NULL); |
984263bc MD |
2894 | ifp->if_flags |= IFF_UP; |
2895 | ifr.ifr_flags = ifp->if_flags; | |
46f25451 | 2896 | ifr.ifr_flagshigh = ifp->if_flags >> 16; |
78195a76 | 2897 | ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, |
2038fb68 | 2898 | NULL); |
984263bc MD |
2899 | #ifdef INET |
2900 | /* | |
2901 | * Also send gratuitous ARPs to notify other nodes about | |
2902 | * the address change. | |
2903 | */ | |
b2632176 SZ |
2904 | TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { |
2905 | struct ifaddr *ifa = ifac->ifa; | |
2906 | ||
984263bc MD |
2907 | if (ifa->ifa_addr != NULL && |
2908 | ifa->ifa_addr->sa_family == AF_INET) | |
69b66ae8 | 2909 | arp_gratuitous(ifp, ifa); |
984263bc MD |
2910 | } |
2911 | #endif | |
2912 | } | |
a3dd34d2 | 2913 | ifnet_deserialize_all(ifp); |
984263bc MD |
2914 | return (0); |
2915 | } | |
2916 | ||
c42bebbd | 2917 | |
b44c913f AL |
2918 | /* |
2919 | * Tunnel interfaces can nest, also they may cause infinite recursion | |
2920 | * calls when misconfigured. Introduce an upper limit to prevent infinite | |
2921 | * recursions, as well as to constrain the nesting depth. | |
2922 | * | |
2923 | * Return 0, if tunnel nesting count is equal or less than limit. | |
2924 | */ | |
2925 | int | |
2926 | if_tunnel_check_nesting(struct ifnet *ifp, struct mbuf *m, uint32_t cookie, | |
2927 | int limit) | |
2928 | { | |
2929 | struct m_tag *mtag; | |
2930 | int count; | |
2931 | ||
2932 | count = 1; | |
2933 | mtag = m_tag_locate(m, cookie, 0 /* type */, NULL); | |
2934 | if (mtag != NULL) | |
2935 | count += *(int *)(mtag + 1); | |
2936 | if (count > limit) { | |
2937 | log(LOG_NOTICE, | |
2938 | "%s: packet looped too many times (%d), limit %d\n", | |
2939 | ifp->if_xname, count, limit); | |
2940 | return (ELOOP); | |
2941 | } | |
2942 | ||
2943 | if (mtag == NULL) { | |
2944 | mtag = m_tag_alloc(cookie, 0, sizeof(int), M_NOWAIT); | |
2945 | if (mtag == NULL) | |
2946 | return (ENOMEM); | |
2947 | m_tag_prepend(m, mtag); | |
2948 | } | |
2949 | ||
2950 | *(int *)(mtag + 1) = count; | |
2951 | return (0); | |
2952 | } | |
2953 | ||
2954 | ||
c42bebbd RM |
2955 | /* |
2956 | * Locate an interface based on a complete address. | |
2957 | */ | |
2958 | struct ifnet * | |
2959 | if_bylla(const void *lla, unsigned char lla_len) | |
2960 | { | |
2961 | const struct ifnet_array *arr; | |
2962 | struct ifnet *ifp; | |
2963 | struct sockaddr_dl *sdl; | |
2964 | int i; | |
2965 | ||
2966 | arr = ifnet_array_get(); | |
2967 | for (i = 0; i < arr->ifnet_count; ++i) { | |
2968 | ifp = arr->ifnet_arr[i]; | |
2969 | if (ifp->if_addrlen != lla_len) | |
2970 | continue; | |
2971 | ||
2972 | sdl = IF_LLSOCKADDR(ifp); | |
2973 | if (memcmp(lla, LLADDR(sdl), lla_len) == 0) | |
2974 | return (ifp); | |
2975 | } | |
2976 | return (NULL); | |
2977 | } | |
2978 | ||
984263bc | 2979 | struct ifmultiaddr * |
f23061d4 | 2980 | ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp) |
984263bc MD |
2981 | { |
2982 | struct ifmultiaddr *ifma; | |
2983 | ||
72659ed0 SZ |
2984 | /* TODO: need ifnet_serialize_main */ |
2985 | ifnet_serialize_all(ifp); | |
441d34b2 | 2986 | TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) |
0c3c561c | 2987 | if (sa_equal(ifma->ifma_addr, sa)) |
984263bc | 2988 | break; |
72659ed0 | 2989 | ifnet_deserialize_all(ifp); |
984263bc MD |
2990 | |
2991 | return ifma; | |
2992 | } | |
2993 | ||
e9bd1548 MD |
2994 | /* |
2995 | * This function locates the first real ethernet MAC from a network | |
2996 | * card and loads it into node, returning 0 on success or ENOENT if | |
2997 | * no suitable interfaces were found. It is used by the uuid code to | |
2998 | * generate a unique 6-byte number. | |
2999 | */ | |
3000 | int | |
3001 | if_getanyethermac(uint16_t *node, int minlen) | |
3002 | { | |
3003 | struct ifnet *ifp; | |
3004 | struct sockaddr_dl *sdl; | |
3005 | ||
b4051e25 SZ |
3006 | ifnet_lock(); |
3007 | TAILQ_FOREACH(ifp, &ifnetlist, if_link) { | |
e9bd1548 MD |
3008 | if (ifp->if_type != IFT_ETHER) |
3009 | continue; | |
3010 | sdl = IF_LLSOCKADDR(ifp); | |
3011 | if (sdl->sdl_alen < minlen) | |
3012 | continue; | |
3013 | bcopy(((struct arpcom *)ifp->if_softc)->ac_enaddr, node, | |
3014 | minlen); | |
b4051e25 | 3015 | ifnet_unlock(); |
e9bd1548 MD |
3016 | return(0); |
3017 | } | |
b4051e25 | 3018 | ifnet_unlock(); |
e9bd1548 MD |
3019 | return (ENOENT); |
3020 | } | |
3021 | ||
1550dfd9 MD |
3022 | /* |
3023 | * The name argument must be a pointer to storage which will last as | |
3024 | * long as the interface does. For physical devices, the result of | |
3025 | * device_get_name(dev) is a good choice and for pseudo-devices a | |
3026 | * static string works well. | |
3027 | */ | |
3028 | void | |
3029 | if_initname(struct ifnet *ifp, const char *name, int unit) | |
3030 | { | |
3e4a09e7 MD |
3031 | ifp->if_dname = name; |
3032 | ifp->if_dunit = unit; | |
1550dfd9 | 3033 | if (unit != IF_DUNIT_NONE) |
f8c7a42d | 3034 | ksnprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit); |
1550dfd9 MD |
3035 | else |
3036 | strlcpy(ifp->if_xname, name, IFNAMSIZ); | |
3037 | } | |
3038 | ||
984263bc MD |
3039 | int |
3040 | if_printf(struct ifnet *ifp, const char *fmt, ...) | |
3041 | { | |
e2565a42 | 3042 | __va_list ap; |
984263bc MD |
3043 | int retval; |
3044 | ||
4b1cf444 | 3045 | retval = kprintf("%s: ", ifp->if_xname); |
e2565a42 | 3046 | __va_start(ap, fmt); |
379210cb | 3047 | retval += kvprintf(fmt, ap); |
e2565a42 | 3048 | __va_end(ap); |
984263bc MD |
3049 | return (retval); |
3050 | } | |
3051 | ||
cb80735c RP |
3052 | struct ifnet * |
3053 | if_alloc(uint8_t type) | |
3054 | { | |
2949c680 | 3055 | struct ifnet *ifp; |
7e395935 | 3056 | size_t size; |
cb80735c | 3057 | |
7e395935 MD |
3058 | /* |
3059 | * XXX temporary hack until arpcom is setup in if_l2com | |
3060 | */ | |
3061 | if (type == IFT_ETHER) | |
3062 | size = sizeof(struct arpcom); | |
3063 | else | |
3064 | size = sizeof(struct ifnet); | |
3065 | ||
3066 | ifp = kmalloc(size, M_IFNET, M_WAITOK|M_ZERO); | |
cb80735c RP |
3067 | |
3068 | ifp->if_type = type; | |
3069 | ||
aeb3c11e RP |
3070 | if (if_com_alloc[type] != NULL) { |
3071 | ifp->if_l2com = if_com_alloc[type](type, ifp); | |
3072 | if (ifp->if_l2com == NULL) { | |
3073 | kfree(ifp, M_IFNET); | |
3074 | return (NULL); | |
3075 | } | |
3076 | } | |
cb80735c RP |
3077 | return (ifp); |
3078 | } | |
3079 | ||
3080 | void | |
3081 | if_free(struct ifnet *ifp) | |
3082 | { | |
f6994c54 AHJ |
3083 | if (ifp->if_description != NULL) |
3084 | kfree(ifp->if_description, M_IFDESCR); | |
cb80735c RP |
3085 | kfree(ifp, M_IFNET); |
3086 | } | |
3087 | ||
b2f93efe JS |
3088 | void |
3089 | ifq_set_classic(struct ifaltq *ifq) | |
3090 | { | |
2cc2f639 SZ |
3091 | ifq_set_methods(ifq, ifq->altq_ifp->if_mapsubq, |
3092 | ifsq_classic_enqueue, ifsq_classic_dequeue, ifsq_classic_request); | |
f0a26983 SZ |
3093 | } |
3094 | ||
3095 | void | |
2cc2f639 SZ |
3096 | ifq_set_methods(struct ifaltq *ifq, altq_mapsubq_t mapsubq, |
3097 | ifsq_enqueue_t enqueue, ifsq_dequeue_t dequeue, ifsq_request_t request) | |
f0a26983 SZ |
3098 | { |
3099 | int q; | |
3100 | ||
2cc2f639 SZ |
3101 | KASSERT(mapsubq != NULL, ("mapsubq is not specified")); |
3102 | KASSERT(enqueue != NULL, ("enqueue is not specified")); | |
3103 | KASSERT(dequeue != NULL, ("dequeue is not specified")); | |
3104 | KASSERT(request != NULL, ("request is not specified")); | |
3105 | ||
3106 | ifq->altq_mapsubq = mapsubq; | |
f0a26983 SZ |
3107 | for (q = 0; q < ifq->altq_subq_cnt; ++q) { |
3108 | struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; | |
3109 | ||
3110 | ifsq->ifsq_enqueue = enqueue; | |
3111 | ifsq->ifsq_dequeue = dequeue; | |
3112 | ifsq->ifsq_request = request; | |
3113 | } | |
b2f93efe JS |
3114 | } |
3115 | ||
4cc8caef SZ |
3116 | static void |
3117 | ifsq_norm_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m) | |
3118 | { | |
e7d68516 SZ |
3119 | |
3120 | classq_add(&ifsq->ifsq_norm, m); | |
4cc8caef SZ |
3121 | ALTQ_SQ_CNTR_INC(ifsq, m->m_pkthdr.len); |
3122 | } | |
3123 | ||
3124 | static void | |
3125 | ifsq_prio_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m) | |
3126 | { | |
e7d68516 SZ |
3127 | |
3128 | classq_add(&ifsq->ifsq_prio, m); | |
4cc8caef SZ |
3129 | ALTQ_SQ_CNTR_INC(ifsq, m->m_pkthdr.len); |
3130 | ALTQ_SQ_PRIO_CNTR_INC(ifsq, m->m_pkthdr.len); | |
3131 | } | |
3132 | ||
3133 | static struct mbuf * | |
3134 | ifsq_norm_dequeue(struct ifaltq_subque *ifsq) | |
3135 | { | |
3136 | struct mbuf *m; | |
3137 | ||
e7d68516 SZ |
3138 | m = classq_get(&ifsq->ifsq_norm); |
3139 | if (m != NULL) | |
4cc8caef | 3140 | ALTQ_SQ_CNTR_DEC(ifsq, m->m_pkthdr.len); |
e7d68516 | 3141 | return (m); |
4cc8caef SZ |
3142 | } |
3143 | ||
3144 | static struct mbuf * | |
3145 | ifsq_prio_dequeue(struct ifaltq_subque *ifsq) | |
3146 | { | |
3147 | struct mbuf *m; | |
3148 | ||
e7d68516 | 3149 | m = classq_get(&ifsq->ifsq_prio); |
4cc8caef | 3150 | if (m != NULL) { |
4cc8caef SZ |
3151 | ALTQ_SQ_CNTR_DEC(ifsq, m->m_pkthdr.len); |
3152 | ALTQ_SQ_PRIO_CNTR_DEC(ifsq, m->m_pkthdr.len); | |
3153 | } | |
e7d68516 | 3154 | return (m); |
4cc8caef SZ |
3155 | } |
3156 | ||
9db4b353 | 3157 | int |
f0a26983 SZ |
3158 | ifsq_classic_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m, |
3159 | struct altq_pktattr *pa __unused) | |
e3e4574a | 3160 | { |
2739afc4 | 3161 | |
0ec85f2e | 3162 | M_ASSERTPKTHDR(m); |
2739afc4 | 3163 | again: |
68dc1916 SZ |
3164 | if (ifsq->ifsq_len >= ifsq->ifsq_maxlen || |
3165 | ifsq->ifsq_bcnt >= ifsq->ifsq_maxbcnt) { | |
2739afc4 SZ |
3166 | struct mbuf *m_drop; |
3167 | ||
3168 | if (m->m_flags & M_PRIO) { | |
3169 | m_drop = NULL; | |
3170 | if (ifsq->ifsq_prio_len < (ifsq->ifsq_maxlen >> 1) && | |
3171 | ifsq->ifsq_prio_bcnt < (ifsq->ifsq_maxbcnt >> 1)) { | |
3172 | /* Try dropping some from normal queue. */ | |
3173 | m_drop = ifsq_norm_dequeue(ifsq); | |
4cc8caef | 3174 | } |
2739afc4 SZ |
3175 | if (m_drop == NULL) |
3176 | m_drop = ifsq_prio_dequeue(ifsq); | |
3177 | } else { | |
3178 | m_drop = ifsq_norm_dequeue(ifsq); | |
3179 | } | |
3180 | if (m_drop != NULL) { | |
3181 | IFNET_STAT_INC(ifsq->ifsq_ifp, oqdrops, 1); | |
3182 | m_freem(m_drop); | |
3183 | goto again; | |
4cc8caef | 3184 | } |
2739afc4 SZ |
3185 | /* |
3186 | * No old packets could be dropped! | |
3187 | * NOTE: Caller increases oqdrops. | |
3188 | */ | |
e3e4574a | 3189 | m_freem(m); |
2739afc4 | 3190 | return (ENOBUFS); |
e3e4574a | 3191 | } else { |
4cc8caef SZ |
3192 | if (m->m_flags & M_PRIO) |
3193 | ifsq_prio_enqueue(ifsq, m); | |
338bb46c | 3194 | else |
4cc8caef | 3195 | ifsq_norm_enqueue(ifsq, m); |
2739afc4 | 3196 | return (0); |
0ec85f2e | 3197 | } |
e3e4574a JS |
3198 | } |
3199 | ||
9db4b353 | 3200 | struct mbuf * |
6dadc833 | 3201 | ifsq_classic_dequeue(struct ifaltq_subque *ifsq, int op) |
e3e4574a JS |
3202 | { |
3203 | struct mbuf *m; | |
3204 | ||
3205 | switch (op) { | |
3206 | case ALTDQ_POLL: | |
e7d68516 | 3207 | m = classq_head(&ifsq->ifsq_prio); |
4cc8caef | 3208 | if (m == NULL) |
e7d68516 | 3209 | m = classq_head(&ifsq->ifsq_norm); |
e3e4574a | 3210 | break; |
338bb46c | 3211 | |
e3e4574a | 3212 | case ALTDQ_REMOVE: |
4cc8caef SZ |
3213 | m = ifsq_prio_dequeue(ifsq); |
3214 | if (m == NULL) | |
3215 | m = ifsq_norm_dequeue(ifsq); | |
e3e4574a | 3216 | break; |
338bb46c | 3217 | |
e3e4574a JS |
3218 | default: |
3219 | panic("unsupported ALTQ dequeue op: %d", op); | |
3220 | } | |
338bb46c | 3221 | return m; |
e3e4574a JS |
3222 | } |
3223 | ||
9db4b353 | 3224 | int |
f0a26983 | 3225 | ifsq_classic_request(struct ifaltq_subque *ifsq, int req, void *arg) |
e3e4574a JS |
3226 | { |
3227 | switch (req) { | |
3228 | case ALTRQ_PURGE: | |
338bb46c SZ |
3229 | for (;;) { |
3230 | struct mbuf *m; | |
3231 | ||
6dadc833 | 3232 | m = ifsq_classic_dequeue(ifsq, ALTDQ_REMOVE); |
338bb46c SZ |
3233 | if (m == NULL) |
3234 | break; | |
3235 | m_freem(m); | |
3236 | } | |
e3e4574a | 3237 | break; |
338bb46c | 3238 | |
e3e4574a | 3239 | default: |
3f625015 | 3240 | panic("unsupported ALTQ request: %d", req); |
e3e4574a | 3241 | } |
338bb46c | 3242 | return 0; |
e3e4574a | 3243 | } |
b2632176 | 3244 | |
28cc0c29 | 3245 | static void |
f0a26983 | 3246 | ifsq_ifstart_try(struct ifaltq_subque *ifsq, int force_sched) |
28cc0c29 | 3247 | { |
f0a26983 | 3248 | struct ifnet *ifp = ifsq_get_ifp(ifsq); |
28cc0c29 SZ |
3249 | int running = 0, need_sched; |
3250 | ||
3251 | /* | |
5c593c2a SZ |
3252 | * Try to do direct ifnet.if_start on the subqueue first, if there is |
3253 | * contention on the subqueue hardware serializer, ifnet.if_start on | |
3254 | * the subqueue will be scheduled on the subqueue owner CPU. | |
28cc0c29 | 3255 | */ |
bfefe4a6 | 3256 | if (!ifsq_tryserialize_hw(ifsq)) { |
28cc0c29 | 3257 | /* |
5c593c2a SZ |
3258 | * Subqueue hardware serializer contention happened, |
3259 | * ifnet.if_start on the subqueue is scheduled on | |
3260 | * the subqueue owner CPU, and we keep going. | |
28cc0c29 | 3261 | */ |
f0a26983 | 3262 | ifsq_ifstart_schedule(ifsq, 1); |
28cc0c29 SZ |
3263 | return; |
3264 | } | |
3265 | ||
f0a26983 SZ |
3266 | if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) { |
3267 | ifp->if_start(ifp, ifsq); | |
3268 | if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) | |
28cc0c29 SZ |
3269 | running = 1; |
3270 | } | |
f0a26983 | 3271 | need_sched = ifsq_ifstart_need_schedule(ifsq, running); |
28cc0c29 | 3272 | |
bfefe4a6 | 3273 | ifsq_deserialize_hw(ifsq); |
28cc0c29 SZ |
3274 | |
3275 | if (need_sched) { | |
3276 | /* | |
5c593c2a SZ |
3277 | * More data need to be transmitted, ifnet.if_start on the |
3278 | * subqueue is scheduled on the subqueue owner CPU, and we | |
3279 | * keep going. | |
3280 | * NOTE: ifnet.if_start subqueue interlock is not released. | |
28cc0c29 | 3281 | */ |
f0a26983 | 3282 | ifsq_ifstart_schedule(ifsq, force_sched); |
28cc0c29 SZ |
3283 | } |
3284 | } | |
3285 | ||
2aa7f7f8 | 3286 | /* |
5c593c2a | 3287 | * Subqeue packets staging mechanism: |
2aa7f7f8 | 3288 | * |
5c593c2a SZ |
3289 | * The packets enqueued into the subqueue are staged to a certain amount |
3290 | * before the ifnet.if_start on the subqueue is called. In this way, the | |
3291 | * driver could avoid writing to hardware registers upon every packet, | |
3292 | * instead, hardware registers could be written when certain amount of | |
3293 | * packets are put onto hardware TX ring. The measurement on several modern | |
3294 | * NICs (emx(4), igb(4), bnx(4), bge(4), jme(4)) shows that the hardware | |
3295 | * registers writing aggregation could save ~20% CPU time when 18bytes UDP | |
3296 | * datagrams are transmitted at 1.48Mpps. The performance improvement by | |
3297 | * hardware registers writing aggeregation is also mentioned by Luigi Rizzo's | |
3298 | * netmap paper (http://info.iet.unipi.it/~luigi/netmap/). | |
2aa7f7f8 | 3299 | * |
5c593c2a | 3300 | * Subqueue packets staging is performed for two entry points into drivers' |
2aa7f7f8 | 3301 | * transmission function: |
5c593c2a SZ |
3302 | * - Direct ifnet.if_start calling on the subqueue, i.e. ifsq_ifstart_try() |
3303 | * - ifnet.if_start scheduling on the subqueue, i.e. ifsq_ifstart_schedule() | |
2aa7f7f8 | 3304 | * |
5c593c2a SZ |
3305 | * Subqueue packets staging will be stopped upon any of the following |
3306 | * conditions: | |
2aa7f7f8 | 3307 | * - If the count of packets enqueued on the current CPU is great than or |
f0a26983 | 3308 | * equal to ifsq_stage_cntmax. (XXX this should be per-interface) |
2aa7f7f8 SZ |
3309 | * - If the total length of packets enqueued on the current CPU is great |
3310 | * than or equal to the hardware's MTU - max_protohdr. max_protohdr is | |
3311 | * cut from the hardware's MTU mainly bacause a full TCP segment's size | |
3312 | * is usually less than hardware's MTU. | |
5c593c2a SZ |
3313 | * - ifsq_ifstart_schedule() is not pending on the current CPU and |
3314 | * ifnet.if_start subqueue interlock (ifaltq_subq.ifsq_started) is not | |
3315 | * released. | |
2aa7f7f8 SZ |
3316 | * - The if_start_rollup(), which is registered as low priority netisr |
3317 | * rollup function, is called; probably because no more work is pending | |
3318 | * for netisr. | |
3319 | * | |
3320 | * NOTE: | |
5c593c2a | 3321 | * Currently subqueue packet staging is only performed in netisr threads. |
2aa7f7f8 | 3322 | */ |
9db4b353 SZ |
3323 | int |
3324 | ifq_dispatch(struct ifnet *ifp, struct mbuf *m, struct altq_pktattr *pa) | |
3325 | { | |
3326 | struct ifaltq *ifq = &ifp->if_snd; | |
f0a26983 | 3327 | struct ifaltq_subque *ifsq; |
28cc0c29 | 3328 | int error, start = 0, len, mcast = 0, avoid_start = 0; |
f0a26983 SZ |
3329 | struct ifsubq_stage_head *head = NULL; |
3330 | struct ifsubq_stage *stage = NULL; | |
ac7fc6f0 SZ |
3331 | struct globaldata *gd = mycpu; |
3332 | struct thread *td = gd->gd_curthread; | |
3333 | ||
3334 | crit_enter_quick(td); | |
57dff79c | 3335 | |
ac7fc6f0 | 3336 | ifsq = ifq_map_subq(ifq, gd->gd_cpuid); |
bfefe4a6 | 3337 | ASSERT_ALTQ_SQ_NOT_SERIALIZED_HW(ifsq); |
9db4b353 | 3338 | |
fe53d127 SZ |
3339 | len = m->m_pkthdr.len; |
3340 | if (m->m_flags & M_MCAST) | |
3341 | mcast = 1; | |
3342 | ||
ac7fc6f0 | 3343 | if (td->td_type == TD_TYPE_NETISR) { |
f0a26983 SZ |
3344 | head = &ifsubq_stage_heads[mycpuid]; |
3345 | stage = ifsq_get_stage(ifsq, mycpuid); | |
28cc0c29 | 3346 | |
f0a26983 SZ |
3347 | stage->stg_cnt++; |
3348 | stage->stg_len += len; | |
3349 | if (stage->stg_cnt < ifsq_stage_cntmax && | |
3350 | stage->stg_len < (ifp->if_mtu - max_protohdr)) | |
28cc0c29 SZ |
3351 | avoid_start = 1; |
3352 | } | |
3353 | ||
f0a26983 SZ |
3354 | ALTQ_SQ_LOCK(ifsq); |
3355 | error = ifsq_enqueue_locked(ifsq, m, pa); | |
9db4b353 | 3356 | if (error) { |
6de344ba | 3357 | IFNET_STAT_INC(ifp, oqdrops, 1); |
f0a26983 SZ |
3358 | if (!ifsq_data_ready(ifsq)) { |
3359 | ALTQ_SQ_UNLOCK(ifsq); | |
55dba7d5 | 3360 | goto done; |
087561ef | 3361 | } |
28cc0c29 | 3362 | avoid_start = 0; |
55dba7d5 AL |
3363 | } else { |
3364 | IFNET_STAT_INC(ifp, obytes, len); | |
3365 | if (mcast) | |
3366 | IFNET_STAT_INC(ifp, omcasts, 1); | |
9db4b353 | 3367 | } |
f0a26983 | 3368 | if (!ifsq_is_started(ifsq)) { |
28cc0c29 | 3369 | if (avoid_start) { |
f0a26983 | 3370 | ALTQ_SQ_UNLOCK(ifsq); |
28cc0c29 SZ |
3371 | |
3372 | KKASSERT(!error); | |
f0a26983 SZ |
3373 | if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0) |
3374 | ifsq_stage_insert(head, stage); | |
28cc0c29 | 3375 | |
55dba7d5 | 3376 | goto done; |
28cc0c29 SZ |
3377 | } |
3378 | ||
9db4b353 | 3379 | /* |
5c593c2a | 3380 | * Hold the subqueue interlock of ifnet.if_start |
9db4b353 | 3381 | */ |
f0a26983 | 3382 | ifsq_set_started(ifsq); |
9db4b353 SZ |
3383 | start = 1; |
3384 | } | |
f0a26983 | 3385 | ALTQ_SQ_UNLOCK(ifsq); |
9db4b353 | 3386 | |
28cc0c29 | 3387 | if (stage != NULL) { |
f0a26983 SZ |
3388 | if (!start && (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED)) { |
3389 | KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED); | |
3cab6b0d | 3390 | if (!avoid_start) { |
f0a26983 SZ |
3391 | ifsq_stage_remove(head, stage); |
3392 | ifsq_ifstart_schedule(ifsq, 1); | |
3cab6b0d | 3393 | } |
55dba7d5 | 3394 | goto done; |
3cab6b0d SZ |
3395 | } |
3396 | ||
f0a26983 SZ |
3397 | if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED) { |
3398 | ifsq_stage_remove(head, stage); | |
28cc0c29 | 3399 | } else { |
f0a26983 SZ |
3400 | stage->stg_cnt = 0; |
3401 | stage->stg_len = 0; | |
28cc0c29 | 3402 | } |
9db4b353 SZ |
3403 | } |
3404 | ||
55dba7d5 AL |
3405 | if (start) |
3406 | ifsq_ifstart_try(ifsq, 0); | |
ac7fc6f0 | 3407 | |
55dba7d5 | 3408 | done: |
ac7fc6f0 | 3409 | crit_exit_quick(td); |
087561ef | 3410 | return error; |
9db4b353 SZ |
3411 | } |
3412 | ||
b2632176 | 3413 | void * |
52fbd92a | 3414 | ifa_create(int size) |
b2632176 SZ |
3415 | { |
3416 | struct ifaddr *ifa; | |
3417 | int i; | |
3418 | ||
ed20d0e3 | 3419 | KASSERT(size >= sizeof(*ifa), ("ifaddr size too small")); |
b2632176 | 3420 | |
52fbd92a | 3421 | ifa = kmalloc(size, M_IFADDR, M_INTWAIT | M_ZERO); |
43dbcc2a SZ |
3422 | |
3423 | /* | |
3424 | * Make ifa_container availabel on all CPUs, since they | |
3425 | * could be accessed by any threads. | |
3426 | */ | |
7d1c3473 | 3427 | ifa->ifa_containers = |
62938642 MD |
3428 | kmalloc(ncpus * sizeof(struct ifaddr_container), |
3429 | M_IFADDR, | |
3430 | M_INTWAIT | M_ZERO | M_CACHEALIGN); | |
52fbd92a | 3431 | |
d5a2b87c | 3432 | ifa->ifa_ncnt = ncpus; |
b2632176 SZ |
3433 | for (i = 0; i < ncpus; ++i) { |
3434 | struct ifaddr_container *ifac = &ifa->ifa_containers[i]; | |
3435 | ||
3436 | ifac->ifa_magic = IFA_CONTAINER_MAGIC; | |
3437 | ifac->ifa = ifa; | |
3438 | ifac->ifa_refcnt = 1; | |
3439 | } | |
3440 | #ifdef IFADDR_DEBUG | |
3441 | kprintf("alloc ifa %p %d\n", ifa, size); | |
3442 | #endif | |
3443 | return ifa; | |
3444 | } | |
3445 | ||
b2632176 SZ |
3446 | void |
3447 | ifac_free(struct ifaddr_container *ifac, int cpu_id) | |
3448 | { | |
d5a2b87c | 3449 | struct ifaddr *ifa = ifac->ifa; |
b2632176 SZ |
3450 | |
3451 | KKASSERT(ifac->ifa_magic == IFA_CONTAINER_MAGIC); | |
3452 | KKASSERT(ifac->ifa_refcnt == 0); | |
40f667f2 | 3453 | KASSERT(ifac->ifa_listmask == 0, |
ed20d0e3 | 3454 | ("ifa is still on %#x lists", ifac->ifa_listmask)); |
b2632176 SZ |
3455 | |
3456 | ifac->ifa_magic = IFA_CONTAINER_DEAD; | |
3457 | ||
b2632176 | 3458 | #ifdef IFADDR_DEBUG_VERBOSE |
8967ddc7 | 3459 | kprintf("try free ifa %p cpu_id %d\n", ifac->ifa, cpu_id); |
b2632176 SZ |
3460 | #endif |
3461 | ||
d5a2b87c | 3462 | KASSERT(ifa->ifa_ncnt > 0 && ifa->ifa_ncnt <= ncpus, |
ed20d0e3 | 3463 | ("invalid # of ifac, %d", ifa->ifa_ncnt)); |
d5a2b87c SZ |
3464 | if (atomic_fetchadd_int(&ifa->ifa_ncnt, -1) == 1) { |
3465 | #ifdef IFADDR_DEBUG | |
3466 | kprintf("free ifa %p\n", ifa); | |
3467 | #endif | |
3468 | kfree(ifa->ifa_containers, M_IFADDR); | |
3469 | kfree(ifa, M_IFADDR); | |
3470 | } | |
b2632176 SZ |
3471 | } |
3472 | ||
3473 | static void | |
002c1265 | 3474 | ifa_iflink_dispatch(netmsg_t nmsg) |
b2632176 SZ |
3475 | { |
3476 | struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; | |
3477 | struct ifaddr *ifa = msg->ifa; | |
3478 | struct ifnet *ifp = msg->ifp; | |
3479 | int cpu = mycpuid; | |
40f667f2 | 3480 | struct ifaddr_container *ifac; |
b2632176 SZ |
3481 | |
3482 | crit_enter(); | |
23027d35 | 3483 | |
40f667f2 | 3484 | ifac = &ifa->ifa_containers[cpu]; |
2adb7bc2 | 3485 | ASSERT_IFAC_VALID(ifac); |
40f667f2 | 3486 | KASSERT((ifac->ifa_listmask & IFA_LIST_IFADDRHEAD) == 0, |
ed20d0e3 | 3487 | ("ifaddr is on if_addrheads")); |
23027d35 | 3488 | |
40f667f2 SZ |
3489 | ifac->ifa_listmask |= IFA_LIST_IFADDRHEAD; |
3490 | if (msg->tail) | |
3491 | TAILQ_INSERT_TAIL(&ifp->if_addrheads[cpu], ifac, ifa_link); | |
3492 | else | |
3493 | TAILQ_INSERT_HEAD(&ifp->if_addrheads[cpu], ifac, ifa_link); | |
23027d35 | 3494 | |
b2632176 SZ |
3495 | crit_exit(); |
3496 | ||
c3b4f1bf | 3497 | netisr_forwardmsg_all(&nmsg->base, cpu + 1); |
b2632176 SZ |
3498 | } |
3499 | ||
3500 | void | |
3501 | ifa_iflink(struct ifaddr *ifa, struct ifnet *ifp, int tail) | |
3502 | { | |
3503 | struct netmsg_ifaddr msg; | |
3504 | ||
002c1265 | 3505 | netmsg_init(&msg.base, NULL, &curthread->td_msgport, |
48e7b118 | 3506 | 0, ifa_iflink_dispatch); |
b2632176 SZ |
3507 | msg.ifa = ifa; |
3508 | msg.ifp = ifp; | |
3509 | msg.tail = tail; | |
3510 | ||
92b34312 | 3511 | netisr_domsg(&msg.base, 0); |
b2632176 SZ |
3512 | } |
3513 | ||
3514 | static void | |
002c1265 | 3515 | ifa_ifunlink_dispatch(netmsg_t nmsg) |
b2632176 SZ |
3516 | { |
3517 | struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; | |
3518 | struct ifaddr *ifa = msg->ifa; | |
3519 | struct ifnet *ifp = msg->ifp; | |
3520 | int cpu = mycpuid; | |
40f667f2 | 3521 | struct ifaddr_container *ifac; |
b2632176 SZ |
3522 | |
3523 | crit_enter(); | |
23027d35 | 3524 | |
40f667f2 | 3525 | ifac = &ifa->ifa_containers[cpu]; |
2adb7bc2 | 3526 | ASSERT_IFAC_VALID(ifac); |
40f667f2 | 3527 | KASSERT(ifac->ifa_listmask & IFA_LIST_IFADDRHEAD, |
ed20d0e3 | 3528 | ("ifaddr is not on if_addrhead")); |
23027d35 | 3529 | |
40f667f2 SZ |
3530 | TAILQ_REMOVE(&ifp->if_addrheads[cpu], ifac, ifa_link); |
3531 | ifac->ifa_listmask &= ~IFA_LIST_IFADDRHEAD; | |
23027d35 | 3532 | |
b2632176 SZ |
3533 | crit_exit(); |
3534 | ||
c3b4f1bf | 3535 | netisr_forwardmsg_all(&nmsg->base, cpu + 1); |
b2632176 SZ |
3536 | } |
3537 | ||
3538 | void | |
3539 | ifa_ifunlink(struct ifaddr *ifa, struct ifnet *ifp) | |
3540 | { | |
3541 | struct netmsg_ifaddr msg; | |
3542 | ||
002c1265 | 3543 | netmsg_init(&msg.base, NULL, &curthread->td_msgport, |
48e7b118 | 3544 | 0, ifa_ifunlink_dispatch); |
b2632176 SZ |
3545 | msg.ifa = ifa; |
3546 | msg.ifp = ifp; | |
3547 | ||
92b34312 | 3548 | netisr_domsg(&msg.base, 0); |
b2632176 SZ |
3549 | } |
3550 | ||
3551 | static void | |
002c1265 | 3552 | ifa_destroy_dispatch(netmsg_t nmsg) |
b2632176 SZ |
3553 | { |
3554 | struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; | |
3555 | ||
3556 | IFAFREE(msg->ifa); | |
c3b4f1bf | 3557 | netisr_forwardmsg_all(&nmsg->base, mycpuid + 1); |
b2632176 SZ |
3558 | } |
3559 | ||
3560 | void | |
3561 | ifa_destroy(struct ifaddr *ifa) | |
3562 | { | |
3563 | struct netmsg_ifaddr msg; | |
3564 | ||
002c1265 | 3565 | netmsg_init(&msg.base, NULL, &curthread->td_msgport, |
48e7b118 | 3566 | 0, ifa_destroy_dispatch); |
b2632176 SZ |
3567 | msg.ifa = ifa; |
3568 | ||
92b34312 | 3569 | netisr_domsg(&msg.base, 0); |
b2632176 SZ |
3570 | } |
3571 | ||
239bdb58 SZ |
3572 | static void |
3573 | if_start_rollup(void) | |
3574 | { | |
f0a26983 SZ |
3575 | struct ifsubq_stage_head *head = &ifsubq_stage_heads[mycpuid]; |
3576 | struct ifsubq_stage *stage; | |
28cc0c29 | 3577 | |
ac7fc6f0 SZ |
3578 | crit_enter(); |
3579 | ||
f0a26983 SZ |
3580 | while ((stage = TAILQ_FIRST(&head->stg_head)) != NULL) { |
3581 | struct ifaltq_subque *ifsq = stage->stg_subq; | |
3cab6b0d | 3582 | int is_sched = 0; |
28cc0c29 | 3583 | |
f0a26983 | 3584 | if (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED) |
3cab6b0d | 3585 | is_sched = 1; |
f0a26983 | 3586 | ifsq_stage_remove(head, stage); |
28cc0c29 | 3587 | |
3cab6b0d | 3588 | if (is_sched) { |
f0a26983 | 3589 | ifsq_ifstart_schedule(ifsq, 1); |
3cab6b0d SZ |
3590 | } else { |
3591 | int start = 0; | |
28cc0c29 | 3592 | |
f0a26983 SZ |
3593 | ALTQ_SQ_LOCK(ifsq); |
3594 | if (!ifsq_is_started(ifsq)) { | |
3cab6b0d | 3595 | /* |
5c593c2a SZ |
3596 | * Hold the subqueue interlock of |
3597 | * ifnet.if_start | |
3cab6b0d | 3598 | */ |
f0a26983 | 3599 | ifsq_set_started(ifsq); |
3cab6b0d SZ |
3600 | start = 1; |
3601 | } | |
f0a26983 | 3602 | ALTQ_SQ_UNLOCK(ifsq); |
3cab6b0d SZ |
3603 | |
3604 | if (start) | |
f0a26983 | 3605 | ifsq_ifstart_try(ifsq, 1); |
3cab6b0d | 3606 | } |
f0a26983 SZ |
3607 | KKASSERT((stage->stg_flags & |
3608 | (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0); | |
28cc0c29 | 3609 | } |
ac7fc6f0 SZ |
3610 | |
3611 | crit_exit(); | |
239bdb58 | 3612 | } |
239bdb58 | 3613 | |
b2632176 | 3614 | static void |
90af4fd3 | 3615 | ifnetinit(void *dummy __unused) |
b2632176 SZ |
3616 | { |
3617 | int i; | |
3618 | ||
43dbcc2a | 3619 | /* XXX netisr_ncpus */ |
28cc0c29 | 3620 | for (i = 0; i < ncpus; ++i) |
f0a26983 | 3621 | TAILQ_INIT(&ifsubq_stage_heads[i].stg_head); |
239bdb58 | 3622 | netisr_register_rollup(if_start_rollup, NETISR_ROLLUP_PRIO_IFSTART); |
b2632176 | 3623 | } |
bd08b792 | 3624 | |
aeb3c11e RP |
3625 | void |
3626 | if_register_com_alloc(u_char type, | |
3627 | if_com_alloc_t *a, if_com_free_t *f) | |
3628 | { | |
3629 | ||
3630 | KASSERT(if_com_alloc[type] == NULL, | |
3631 | ("if_register_com_alloc: %d already registered", type)); | |
3632 | KASSERT(if_com_free[type] == NULL, | |
3633 | ("if_register_com_alloc: %d free already registered", type)); | |
3634 | ||
3635 | if_com_alloc[type] = a; | |
3636 | if_com_free[type] = f; | |
3637 | } | |
3638 | ||
3639 | void | |
3640 | if_deregister_com_alloc(u_char type) | |
3641 | { | |
3642 | ||
3643 | KASSERT(if_com_alloc[type] != NULL, | |
3644 | ("if_deregister_com_alloc: %d not registered", type)); | |
3645 | KASSERT(if_com_free[type] != NULL, | |
3646 | ("if_deregister_com_alloc: %d free not registered", type)); | |
3647 | if_com_alloc[type] = NULL; | |
3648 | if_com_free[type] = NULL; | |
3649 | } | |
a317449e | 3650 | |
b7a0c958 SZ |
3651 | void |
3652 | ifq_set_maxlen(struct ifaltq *ifq, int len) | |
3653 | { | |
f0a26983 | 3654 | ifq->altq_maxlen = len + (ncpus * ifsq_stage_cntmax); |
b7a0c958 | 3655 | } |
2cc2f639 SZ |
3656 | |
3657 | int | |
3658 | ifq_mapsubq_default(struct ifaltq *ifq __unused, int cpuid __unused) | |
3659 | { | |
3660 | return ALTQ_SUBQ_INDEX_DEFAULT; | |
3661 | } | |
8a248085 | 3662 | |
68732d8f SZ |
3663 | int |
3664 | ifq_mapsubq_modulo(struct ifaltq *ifq, int cpuid) | |
3665 | { | |
3666 | ||
3667 | return (cpuid % ifq->altq_subq_mappriv); | |
c3fb75dd SZ |
3668 | } |
3669 | ||
e2292763 MD |
3670 | /* |
3671 | * Watchdog timeout. Process callback as appropriate. If we cannot | |
3672 | * serialize the ifnet just try again on the next timeout. | |
3673 | * | |
3674 | * NOTE: The ifnet can adjust wd_timer while holding the serializer. We | |
3675 | * can only safely adjust it under the same circumstances. | |
3676 | */ | |
8a248085 SZ |
3677 | static void |
3678 | ifsq_watchdog(void *arg) | |
3679 | { | |
3680 | struct ifsubq_watchdog *wd = arg; | |
3681 | struct ifnet *ifp; | |
e2292763 | 3682 | int count; |
8a248085 | 3683 | |
e2292763 MD |
3684 | /* |
3685 | * Fast track. Try to avoid acquiring the serializer when not | |
3686 | * near the terminal count, unless asked to. If the atomic op | |
3687 | * to decrement the count fails just retry on the next callout. | |
3688 | */ | |
3689 | count = wd->wd_timer; | |
3690 | cpu_ccfence(); | |
3691 | if (count == 0) | |
3692 | goto done; | |
3693 | if (count > 2 && (wd->wd_flags & IF_WDOG_ALLTICKS) == 0) { | |
3694 | (void)atomic_cmpset_int(&wd->wd_timer, count, count - 1); | |
8a248085 | 3695 | goto done; |
e2292763 | 3696 | } |
8a248085 | 3697 | |
e2292763 MD |
3698 | /* |
3699 | * Obtain the serializer and then re-test all wd_timer conditions | |
3700 | * as it may have changed. NICs do not mess with wd_timer without | |
3701 | * holding the serializer. | |
3702 | * | |
3703 | * If we are unable to obtain the serializer just retry the same | |
3704 | * count on the next callout. | |
3705 | * | |
3706 | * - call watchdog in terminal count (0) | |
3707 | * - call watchdog on last tick (1) if requested | |
3708 | * - call watchdog on all ticks if requested | |
3709 | */ | |
8a248085 | 3710 | ifp = ifsq_get_ifp(wd->wd_subq); |
e2292763 MD |
3711 | if (ifnet_tryserialize_all(ifp) == 0) |
3712 | goto done; | |
3713 | if (atomic_cmpset_int(&wd->wd_timer, count, count - 1)) { | |
3714 | --count; | |
3715 | if (count == 0 || | |
3716 | (wd->wd_flags & IF_WDOG_ALLTICKS) || | |
3717 | ((wd->wd_flags & IF_WDOG_LASTTICK) && count == 1)) { | |
3718 | wd->wd_watchdog(wd->wd_subq); | |
3719 | } | |
8a248085 | 3720 | } |
e2292763 | 3721 | ifnet_deserialize_all(ifp); |
8a248085 SZ |
3722 | done: |
3723 | ifsq_watchdog_reset(wd); | |
3724 | } | |
3725 | ||
3726 | static void | |
3727 | ifsq_watchdog_reset(struct ifsubq_watchdog *wd) | |
3728 | { | |
3729 | callout_reset_bycpu(&wd->wd_callout, hz, ifsq_watchdog, wd, | |
3730 | ifsq_get_cpuid(wd->wd_subq)); | |
3731 | } | |
3732 | ||
3733 | void | |
3734 | ifsq_watchdog_init(struct ifsubq_watchdog *wd, struct ifaltq_subque *ifsq, | |
e2292763 | 3735 | ifsq_watchdog_t watchdog, int flags) |
8a248085 SZ |
3736 | { |
3737 | callout_init_mp(&wd->wd_callout); | |
3738 | wd->wd_timer = 0; | |
e2292763 | 3739 | wd->wd_flags = flags; |
8a248085 SZ |
3740 | wd->wd_subq = ifsq; |
3741 | wd->wd_watchdog = watchdog; | |
3742 | } | |
3743 | ||
3744 | void | |
3745 | ifsq_watchdog_start(struct ifsubq_watchdog *wd) | |
3746 | { | |
e2292763 | 3747 | atomic_swap_int(&wd->wd_timer, 0); |
8a248085 SZ |
3748 | ifsq_watchdog_reset(wd); |
3749 | } | |
3750 | ||
3751 | void | |
3752 | ifsq_watchdog_stop(struct ifsubq_watchdog *wd) | |
3753 | { | |
e2292763 | 3754 | atomic_swap_int(&wd->wd_timer, 0); |
8a248085 SZ |
3755 | callout_stop(&wd->wd_callout); |
3756 | } | |
b4051e25 | 3757 | |
e2292763 MD |
3758 | void |
3759 | ifsq_watchdog_set_count(struct ifsubq_watchdog *wd, int count) | |
3760 | { | |
3761 | atomic_swap_int(&wd->wd_timer, count); | |
3762 | } | |
3763 | ||
b4051e25 SZ |
3764 | void |
3765 | ifnet_lock(void) | |
3766 | { | |
3767 | KASSERT(curthread->td_type != TD_TYPE_NETISR, | |
3768 | ("try holding ifnet lock in netisr")); | |
3769 | mtx_lock(&ifnet_mtx); | |
3770 | } | |
3771 | ||
3772 | void | |
3773 | ifnet_unlock(void) | |
3774 | { | |
3775 | KASSERT(curthread->td_type != TD_TYPE_NETISR, | |
3776 | ("try holding ifnet lock in netisr")); | |
3777 | mtx_unlock(&ifnet_mtx); | |
3778 | } | |
3779 | ||
3780 | static struct ifnet_array * | |
3781 | ifnet_array_alloc(int count) | |
3782 | { | |
3783 | struct ifnet_array *arr; | |
3784 | ||
3785 | arr = kmalloc(__offsetof(struct ifnet_array, ifnet_arr[count]), | |
3786 | M_IFNET, M_WAITOK); | |
3787 | arr->ifnet_count = count; | |
3788 | ||
3789 | return arr; | |
3790 | } | |
3791 | ||
3792 | static void | |
3793 | ifnet_array_free(struct ifnet_array *arr) | |
3794 | { | |
3795 | if (arr == &ifnet_array0) | |
3796 | return; | |
3797 | kfree(arr, M_IFNET); | |
3798 | } | |
3799 | ||
3800 | static struct ifnet_array * | |
3801 | ifnet_array_add(struct ifnet *ifp, const struct ifnet_array *old_arr) | |
3802 | { | |
3803 | struct ifnet_array *arr; | |
3804 | int count, i; | |
3805 | ||
3806 | KASSERT(old_arr->ifnet_count >= 0, | |
3807 | ("invalid ifnet array count %d", old_arr->ifnet_count)); | |
3808 | count = old_arr->ifnet_count + 1; | |
3809 | arr = ifnet_array_alloc(count); | |
3810 | ||
3811 | /* | |
3812 | * Save the old ifnet array and append this ifp to the end of | |
3813 | * the new ifnet array. | |
3814 | */ | |
3815 | for (i = 0; i < old_arr->ifnet_count; ++i) { | |
3816 | KASSERT(old_arr->ifnet_arr[i] != ifp, | |
3817 | ("%s is already in ifnet array", ifp->if_xname)); | |
3818 | arr->ifnet_arr[i] = old_arr->ifnet_arr[i]; | |
3819 | } | |
3820 | KASSERT(i == count - 1, | |
3821 | ("add %s, ifnet array index mismatch, should be %d, but got %d", | |
3822 | ifp->if_xname, count - 1, i)); | |
3823 | arr->ifnet_arr[i] = ifp; | |
3824 | ||
3825 | return arr; | |
3826 | } | |
3827 | ||
3828 | static struct ifnet_array * | |
3829 | ifnet_array_del(struct ifnet *ifp, const struct ifnet_array *old_arr) | |
3830 | { | |
3831 | struct ifnet_array *arr; | |
3832 | int count, i, idx, found = 0; | |
3833 | ||
3834 | KASSERT(old_arr->ifnet_count > 0, | |
3835 | ("invalid ifnet array count %d", old_arr->ifnet_count)); | |
3836 | count = old_arr->ifnet_count - 1; | |
3837 | arr = ifnet_array_alloc(count); | |
3838 | ||
3839 | /* | |
3840 | * Save the old ifnet array, but skip this ifp. | |
3841 | */ | |
3842 | idx = 0; | |
3843 | for (i = 0; i < old_arr->ifnet_count; ++i) { | |
3844 | if (old_arr->ifnet_arr[i] == ifp) { | |
3845 | KASSERT(!found, | |
3846 | ("dup %s is in ifnet array", ifp->if_xname)); | |
3847 | found = 1; | |
3848 | continue; | |
3849 | } | |
3850 | KASSERT(idx < count, | |
3851 | ("invalid ifnet array index %d, count %d", idx, count)); | |
3852 | arr->ifnet_arr[idx] = old_arr->ifnet_arr[i]; | |
3853 | ++idx; | |
3854 | } | |
3855 | KASSERT(found, ("%s is not in ifnet array", ifp->if_xname)); | |
3856 | KASSERT(idx == count, | |
3857 | ("del %s, ifnet array count mismatch, should be %d, but got %d ", | |
3858 | ifp->if_xname, count, idx)); | |
3859 | ||
3860 | return arr; | |
3861 | } | |
3862 | ||
3863 | const struct ifnet_array * | |
3864 | ifnet_array_get(void) | |
3865 | { | |
a9821961 SZ |
3866 | const struct ifnet_array *ret; |
3867 | ||
b4051e25 | 3868 | KASSERT(curthread->td_type == TD_TYPE_NETISR, ("not in netisr")); |
a9821961 SZ |
3869 | ret = ifnet_array; |
3870 | /* Make sure 'ret' is really used. */ | |
3871 | cpu_ccfence(); | |
3872 | return (ret); | |
b4051e25 SZ |
3873 | } |
3874 | ||
3875 | int | |
3876 | ifnet_array_isempty(void) | |
3877 | { | |
3878 | KASSERT(curthread->td_type == TD_TYPE_NETISR, ("not in netisr")); | |
3879 | if (ifnet_array->ifnet_count == 0) | |
3880 | return 1; | |
3881 | else | |
3882 | return 0; | |
3883 | } | |
9a74b592 SZ |
3884 | |
3885 | void | |
3886 | ifa_marker_init(struct ifaddr_marker *mark, struct ifnet *ifp) | |
3887 | { | |
3888 | struct ifaddr *ifa; | |
3889 | ||
3890 | memset(mark, 0, sizeof(*mark)); | |
3891 | ifa = &mark->ifa; | |
3892 | ||
3893 | mark->ifac.ifa = ifa; | |
3894 | ||
3895 | ifa->ifa_addr = &mark->addr; | |
3896 | ifa->ifa_dstaddr = &mark->dstaddr; | |
3897 | ifa->ifa_netmask = &mark->netmask; | |
3898 | ifa->ifa_ifp = ifp; | |
3899 | } | |
68732d8f SZ |
3900 | |
3901 | static int | |
3902 | if_ringcnt_fixup(int ring_cnt, int ring_cntmax) | |
3903 | { | |
3904 | ||
3905 | KASSERT(ring_cntmax > 0, ("invalid ring count max %d", ring_cntmax)); | |
68732d8f SZ |
3906 | |
3907 | if (ring_cnt <= 0 || ring_cnt > ring_cntmax) | |
3908 | ring_cnt = ring_cntmax; | |
3909 | if (ring_cnt > netisr_ncpus) | |
3910 | ring_cnt = netisr_ncpus; | |
3911 | return (ring_cnt); | |
3912 | } | |
3913 | ||
3914 | static void | |
3915 | if_ringmap_set_grid(device_t dev, struct if_ringmap *rm, int grid) | |
3916 | { | |
3917 | int i, offset; | |
3918 | ||
3919 | KASSERT(grid > 0, ("invalid if_ringmap grid %d", grid)); | |
d45c022d SZ |
3920 | KASSERT(grid >= rm->rm_cnt, ("invalid if_ringmap grid %d, count %d", |
3921 | grid, rm->rm_cnt)); | |
68732d8f SZ |
3922 | rm->rm_grid = grid; |
3923 | ||
3924 | offset = (rm->rm_grid * device_get_unit(dev)) % netisr_ncpus; | |
a6450489 SZ |
3925 | for (i = 0; i < rm->rm_cnt; ++i) { |
3926 | rm->rm_cpumap[i] = offset + i; | |
3927 | KASSERT(rm->rm_cpumap[i] < netisr_ncpus, | |
3928 | ("invalid cpumap[%d] = %d, offset %d", i, | |
3929 | rm->rm_cpumap[i], offset)); | |
3930 | } | |
68732d8f SZ |
3931 | } |
3932 | ||
434f3dd0 SZ |
3933 | static struct if_ringmap * |
3934 | if_ringmap_alloc_flags(device_t dev, int ring_cnt, int ring_cntmax, | |
3935 | uint32_t flags) | |
68732d8f SZ |
3936 | { |
3937 | struct if_ringmap *rm; | |
75b7fb90 | 3938 | int i, grid = 0, prev_grid; |
68732d8f SZ |
3939 | |
3940 | ring_cnt = if_ringcnt_fixup(ring_cnt, ring_cntmax); | |
3941 | rm = kmalloc(__offsetof(struct if_ringmap, rm_cpumap[ring_cnt]), | |
3942 | M_DEVBUF, M_WAITOK | M_ZERO); | |
3943 | ||
3944 | rm->rm_cnt = ring_cnt; | |
434f3dd0 SZ |
3945 | if (flags & RINGMAP_FLAG_POWEROF2) |
3946 | rm->rm_cnt = 1 << (fls(rm->rm_cnt) - 1); | |
3947 | ||
75b7fb90 | 3948 | prev_grid = netisr_ncpus; |
68732d8f SZ |
3949 | for (i = 0; i < netisr_ncpus; ++i) { |
3950 | if (netisr_ncpus % (i + 1) != 0) | |
3951 | continue; | |
3952 | ||
75b7fb90 SZ |
3953 | grid = netisr_ncpus / (i + 1); |
3954 | if (rm->rm_cnt > grid) { | |
3955 | grid = prev_grid; | |
68732d8f SZ |
3956 | break; |
3957 | } | |
75b7fb90 SZ |
3958 | |
3959 | if (rm->rm_cnt > netisr_ncpus / (i + 2)) | |
3960 | break; | |
3961 | prev_grid = grid; | |
68732d8f SZ |
3962 | } |
3963 | if_ringmap_set_grid(dev, rm, grid); | |
3964 | ||
3965 | return (rm); | |
3966 | } | |
3967 | ||
434f3dd0 SZ |
3968 | struct if_ringmap * |
3969 | if_ringmap_alloc(device_t dev, int ring_cnt, int ring_cntmax) | |
3970 | { | |
3971 | ||
3972 | return (if_ringmap_alloc_flags(dev, ring_cnt, ring_cntmax, | |
3973 | RINGMAP_FLAG_NONE)); | |
3974 | } | |
3975 | ||
3976 | struct if_ringmap * | |
3977 | if_ringmap_alloc2(device_t dev, int ring_cnt, int ring_cntmax) | |
3978 | { | |
3979 | ||
3980 | return (if_ringmap_alloc_flags(dev, ring_cnt, ring_cntmax, | |
3981 | RINGMAP_FLAG_POWEROF2)); | |
3982 | } | |
3983 | ||
68732d8f SZ |
3984 | void |
3985 | if_ringmap_free(struct if_ringmap *rm) | |
3986 | { | |
3987 | ||
3988 | kfree(rm, M_DEVBUF); | |
3989 | } | |
3990 | ||
a0964e91 SZ |
3991 | /* |
3992 | * Align the two ringmaps. | |
3993 | * | |
3994 | * e.g. 8 netisrs, rm0 contains 4 rings, rm1 contains 2 rings. | |
3995 | * | |
3996 | * Before: | |
3997 | * | |
3998 | * CPU 0 1 2 3 4 5 6 7 | |
3999 | * NIC_RX n0 n1 n2 n3 | |
4000 | * NIC_TX N0 N1 | |
4001 | * | |
4002 | * After: | |
4003 | * | |
4004 | * CPU 0 1 2 3 4 5 6 7 | |
4005 | * NIC_RX n0 n1 n2 n3 | |
4006 | * NIC_TX N0 N1 | |
4007 | */ | |
68732d8f SZ |
4008 | void |
4009 | if_ringmap_align(device_t dev, struct if_ringmap *rm0, struct if_ringmap *rm1) | |
4010 | { | |
4011 | ||
4012 | if (rm0->rm_grid > rm1->rm_grid) | |
4013 | if_ringmap_set_grid(dev, rm1, rm0->rm_grid); | |
4014 | else if (rm0->rm_grid < rm1->rm_grid) | |
4015 | if_ringmap_set_grid(dev, rm0, rm1->rm_grid); | |
4016 | } | |
4017 | ||
4018 | void | |
4019 | if_ringmap_match(device_t dev, struct if_ringmap *rm0, struct if_ringmap *rm1) | |
4020 | { | |
a6450489 SZ |
4021 | int subset_grid, cnt, divisor, mod, offset, i; |
4022 | struct if_ringmap *subset_rm, *rm; | |
4023 | int old_rm0_grid, old_rm1_grid; | |
68732d8f | 4024 | |
a6450489 | 4025 | if (rm0->rm_grid == rm1->rm_grid) |
68732d8f | 4026 | return; |
a6450489 SZ |
4027 | |
4028 | /* Save grid for later use */ | |
4029 | old_rm0_grid = rm0->rm_grid; | |
4030 | old_rm1_grid = rm1->rm_grid; | |
4031 | ||
68732d8f | 4032 | if_ringmap_align(dev, rm0, rm1); |
a6450489 | 4033 | |
a0964e91 SZ |
4034 | /* |
4035 | * Re-shuffle rings to get more even distribution. | |
4036 | * | |
4037 | * e.g. 12 netisrs, rm0 contains 4 rings, rm1 contains 2 rings. | |
4038 | * | |
4039 | * CPU 0 1 2 3 4 5 6 7 8 9 10 11 | |
4040 | * | |
4041 | * NIC_RX a0 a1 a2 a3 b0 b1 b2 b3 c0 c1 c2 c3 | |
4042 | * NIC_TX A0 A1 B0 B1 C0 C1 | |
4043 | * | |
4044 | * NIC_RX d0 d1 d2 d3 e0 e1 e2 e3 f0 f1 f2 f3 | |
4045 | * NIC_TX D0 D1 E0 E1 F0 F1 | |
4046 | */ | |
4047 | ||
a6450489 SZ |
4048 | if (rm0->rm_cnt >= (2 * old_rm1_grid)) { |
4049 | cnt = rm0->rm_cnt; | |
4050 | subset_grid = old_rm1_grid; | |
4051 | subset_rm = rm1; | |
4052 | rm = rm0; | |
4053 | } else if (rm1->rm_cnt > (2 * old_rm0_grid)) { | |
4054 | cnt = rm1->rm_cnt; | |
4055 | subset_grid = old_rm0_grid; | |
4056 | subset_rm = rm0; | |
4057 | rm = rm1; | |
4058 | } else { | |
a0964e91 | 4059 | /* No space to shuffle. */ |
a6450489 SZ |
4060 | return; |
4061 | } | |
4062 | ||
4063 | mod = cnt / subset_grid; | |
4064 | KKASSERT(mod >= 2); | |
4065 | divisor = netisr_ncpus / rm->rm_grid; | |
4066 | offset = ((device_get_unit(dev) / divisor) % mod) * subset_grid; | |
4067 | ||
4068 | for (i = 0; i < subset_rm->rm_cnt; ++i) { | |
4069 | subset_rm->rm_cpumap[i] += offset; | |
4070 | KASSERT(subset_rm->rm_cpumap[i] < netisr_ncpus, | |
4071 | ("match: invalid cpumap[%d] = %d, offset %d", | |
4072 | i, subset_rm->rm_cpumap[i], offset)); | |
4073 | } | |
4074 | #ifdef INVARIANTS | |
4075 | for (i = 0; i < subset_rm->rm_cnt; ++i) { | |
4076 | int j; | |
4077 | ||
4078 | for (j = 0; j < rm->rm_cnt; ++j) { | |
4079 | if (rm->rm_cpumap[j] == subset_rm->rm_cpumap[i]) | |
4080 | break; | |
4081 | } | |
4082 | KASSERT(j < rm->rm_cnt, | |
4083 | ("subset cpumap[%d] = %d not found in superset", | |
4084 | i, subset_rm->rm_cpumap[i])); | |
4085 | } | |
4086 | #endif | |
68732d8f SZ |
4087 | } |
4088 | ||
4089 | int | |
4090 | if_ringmap_count(const struct if_ringmap *rm) | |
4091 | { | |
4092 | ||
4093 | return (rm->rm_cnt); | |
4094 | } | |
4095 | ||
4096 | int | |
4097 | if_ringmap_cpumap(const struct if_ringmap *rm, int ring) | |
4098 | { | |
4099 | ||
4100 | KASSERT(ring >= 0 && ring < rm->rm_cnt, ("invalid ring %d", ring)); | |
4101 | return (rm->rm_cpumap[ring]); | |
4102 | } | |
4103 | ||
4104 | void | |
4105 | if_ringmap_rdrtable(const struct if_ringmap *rm, int table[], int table_nent) | |
4106 | { | |
4107 | int i, grid_idx, grid_cnt, patch_off, patch_cnt, ncopy; | |
4108 | ||
4109 | KASSERT(table_nent > 0 && (table_nent & NETISR_CPUMASK) == 0, | |
4110 | ("invalid redirect table entries %d", table_nent)); | |
4111 | ||
4112 | grid_idx = 0; | |
4113 | for (i = 0; i < NETISR_CPUMAX; ++i) { | |
4114 | table[i] = grid_idx++ % rm->rm_cnt; | |
4115 | ||
4116 | if (grid_idx == rm->rm_grid) | |
4117 | grid_idx = 0; | |
4118 | } | |
4119 | ||
4120 | /* | |
a0964e91 SZ |
4121 | * Make the ring distributed more evenly for the remainder |
4122 | * of each grid. | |
4123 | * | |
4124 | * e.g. 12 netisrs, rm contains 8 rings. | |
4125 | * | |
4126 | * Redirect table before: | |
4127 | * | |
4128 | * 0 1 2 3 4 5 6 7 0 1 2 3 0 1 2 3 | |
4129 | * 4 5 6 7 0 1 2 3 0 1 2 3 4 5 6 7 | |
4130 | * 0 1 2 3 0 1 2 3 4 5 6 7 0 1 2 3 | |
4131 | * .... | |
4132 | * | |
4133 | * Redirect table after being patched (pX, patched entries): | |
4134 | * | |
4135 | * 0 1 2 3 4 5 6 7 p0 p1 p2 p3 0 1 2 3 | |
4136 | * 4 5 6 7 p4 p5 p6 p7 0 1 2 3 4 5 6 7 | |
4137 | * p0 p1 p2 p3 0 1 2 3 4 5 6 7 p4 p5 p6 p7 | |
4138 | * .... | |
68732d8f SZ |
4139 | */ |
4140 | patch_cnt = rm->rm_grid % rm->rm_cnt; | |
4141 | if (patch_cnt == 0) | |
4142 | goto done; | |
4143 | patch_off = rm->rm_grid - (rm->rm_grid % rm->rm_cnt); | |
4144 | ||
4145 | grid_cnt = roundup(NETISR_CPUMAX, rm->rm_grid) / rm->rm_grid; | |
4146 | grid_idx = 0; | |
4147 | for (i = 0; i < grid_cnt; ++i) { | |
4148 | int j; | |
4149 | ||
4150 | for (j = 0; j < patch_cnt; ++j) { | |
4151 | int fix_idx; | |
4152 | ||
4153 | fix_idx = (i * rm->rm_grid) + patch_off + j; | |
4154 | if (fix_idx >= NETISR_CPUMAX) | |
4155 | goto done; | |
4156 | table[fix_idx] = grid_idx++ % rm->rm_cnt; | |
4157 | } | |
4158 | } | |
4159 | done: | |
a0964e91 SZ |
4160 | /* |
4161 | * If the device supports larger redirect table, duplicate | |
4162 | * the first NETISR_CPUMAX entries to the rest of the table, | |
4163 | * so that it matches upper layer's expectation: | |
4164 | * (hash & NETISR_CPUMASK) % netisr_ncpus | |
4165 | */ | |
68732d8f SZ |
4166 | ncopy = table_nent / NETISR_CPUMAX; |
4167 | for (i = 1; i < ncopy; ++i) { | |
4168 | memcpy(&table[i * NETISR_CPUMAX], table, | |
4169 | NETISR_CPUMAX * sizeof(table[0])); | |
4170 | } | |
4171 | if (if_ringmap_dumprdr) { | |
4172 | for (i = 0; i < table_nent; ++i) { | |
4173 | if (i != 0 && i % 16 == 0) | |
4174 | kprintf("\n"); | |
4175 | kprintf("%03d ", table[i]); | |
4176 | } | |
4177 | kprintf("\n"); | |
4178 | } | |
4179 | } | |
4180 | ||
4181 | int | |
4182 | if_ringmap_cpumap_sysctl(SYSCTL_HANDLER_ARGS) | |
4183 | { | |
4184 | struct if_ringmap *rm = arg1; | |
4185 | int i, error = 0; | |
4186 | ||
4187 | for (i = 0; i < rm->rm_cnt; ++i) { | |
4188 | int cpu = rm->rm_cpumap[i]; | |
4189 | ||
4190 | error = SYSCTL_OUT(req, &cpu, sizeof(cpu)); | |
4191 | if (error) | |
4192 | break; | |
4193 | } | |
4194 | return (error); | |
4195 | } |