Commit | Line | Data |
---|---|---|
984263bc MD |
1 | /* |
2 | * Copyright (c) 1982, 1986, 1988, 1990, 1993 | |
3 | * The Regents of the University of California. All rights reserved. | |
4 | * | |
5 | * Redistribution and use in source and binary forms, with or without | |
6 | * modification, are permitted provided that the following conditions | |
7 | * are met: | |
8 | * 1. Redistributions of source code must retain the above copyright | |
9 | * notice, this list of conditions and the following disclaimer. | |
10 | * 2. Redistributions in binary form must reproduce the above copyright | |
11 | * notice, this list of conditions and the following disclaimer in the | |
12 | * documentation and/or other materials provided with the distribution. | |
e90fc8a2 | 13 | * 3. Neither the name of the University nor the names of its contributors |
984263bc MD |
14 | * may be used to endorse or promote products derived from this software |
15 | * without specific prior written permission. | |
16 | * | |
17 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
27 | * SUCH DAMAGE. | |
28 | * | |
29 | * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 | |
30 | * $FreeBSD: src/sys/netinet/ip_output.c,v 1.99.2.37 2003/04/15 06:44:45 silby Exp $ | |
47d96de7 | 31 | * $DragonFly: src/sys/netinet/ip_output.c,v 1.67 2008/10/28 03:07:28 sephe Exp $ |
984263bc MD |
32 | */ |
33 | ||
34 | #define _IP_VHL | |
35 | ||
36 | #include "opt_ipfw.h" | |
37 | #include "opt_ipdn.h" | |
38 | #include "opt_ipdivert.h" | |
39 | #include "opt_ipfilter.h" | |
40 | #include "opt_ipsec.h" | |
984263bc | 41 | #include "opt_mbuf_stress_test.h" |
9b42cabe | 42 | #include "opt_mpls.h" |
984263bc MD |
43 | |
44 | #include <sys/param.h> | |
45 | #include <sys/systm.h> | |
46 | #include <sys/kernel.h> | |
47 | #include <sys/malloc.h> | |
48 | #include <sys/mbuf.h> | |
49 | #include <sys/protosw.h> | |
50 | #include <sys/socket.h> | |
51 | #include <sys/socketvar.h> | |
52 | #include <sys/proc.h> | |
895c1f85 | 53 | #include <sys/priv.h> |
984263bc | 54 | #include <sys/sysctl.h> |
1cae611f | 55 | #include <sys/thread2.h> |
3f9db7f8 | 56 | #include <sys/in_cksum.h> |
87b66be9 | 57 | #include <sys/lock.h> |
984263bc MD |
58 | |
59 | #include <net/if.h> | |
9eeaa8a9 | 60 | #include <net/netisr.h> |
e7e55f42 | 61 | #include <net/pfil.h> |
984263bc MD |
62 | #include <net/route.h> |
63 | ||
64 | #include <netinet/in.h> | |
65 | #include <netinet/in_systm.h> | |
66 | #include <netinet/ip.h> | |
67 | #include <netinet/in_pcb.h> | |
68 | #include <netinet/in_var.h> | |
69 | #include <netinet/ip_var.h> | |
70 | ||
9b42cabe NA |
71 | #include <netproto/mpls/mpls_var.h> |
72 | ||
984263bc MD |
73 | static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options"); |
74 | ||
75 | #ifdef IPSEC | |
76 | #include <netinet6/ipsec.h> | |
d2438d69 | 77 | #include <netproto/key/key.h> |
984263bc | 78 | #ifdef IPSEC_DEBUG |
d2438d69 | 79 | #include <netproto/key/key_debug.h> |
984263bc MD |
80 | #else |
81 | #define KEYDEBUG(lev,arg) | |
82 | #endif | |
83 | #endif /*IPSEC*/ | |
84 | ||
85 | #ifdef FAST_IPSEC | |
bf844ffa JH |
86 | #include <netproto/ipsec/ipsec.h> |
87 | #include <netproto/ipsec/xform.h> | |
88 | #include <netproto/ipsec/key.h> | |
984263bc MD |
89 | #endif /*FAST_IPSEC*/ |
90 | ||
1f2de5d4 MD |
91 | #include <net/ipfw/ip_fw.h> |
92 | #include <net/dummynet/ip_dummynet.h> | |
984263bc | 93 | |
a6ec04bc | 94 | #define print_ip(x, a, y) kprintf("%s %d.%d.%d.%d%s",\ |
984263bc MD |
95 | x, (ntohl(a.s_addr)>>24)&0xFF,\ |
96 | (ntohl(a.s_addr)>>16)&0xFF,\ | |
97 | (ntohl(a.s_addr)>>8)&0xFF,\ | |
98 | (ntohl(a.s_addr))&0xFF, y); | |
99 | ||
100 | u_short ip_id; | |
101 | ||
102 | #ifdef MBUF_STRESS_TEST | |
103 | int mbuf_frag_size = 0; | |
104 | SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW, | |
105 | &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size"); | |
106 | #endif | |
107 | ||
108 | static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *); | |
109 | static struct ifnet *ip_multicast_if(struct in_addr *, int *); | |
110 | static void ip_mloopback | |
111 | (struct ifnet *, struct mbuf *, struct sockaddr_in *, int); | |
112 | static int ip_getmoptions | |
113 | (struct sockopt *, struct ip_moptions *); | |
114 | static int ip_pcbopts(int, struct mbuf **, struct mbuf *); | |
115 | static int ip_setmoptions | |
116 | (struct sockopt *, struct ip_moptions **); | |
117 | ||
118 | int ip_optcopy(struct ip *, struct ip *); | |
984263bc | 119 | |
facaabe1 | 120 | extern int route_assert_owner_access; |
984263bc MD |
121 | |
122 | extern struct protosw inetsw[]; | |
123 | ||
768b3631 | 124 | static int |
47d96de7 | 125 | ip_localforward(struct mbuf *m, const struct sockaddr_in *dst, int hlen) |
768b3631 SZ |
126 | { |
127 | struct in_ifaddr_container *iac; | |
128 | ||
129 | /* | |
130 | * We need to figure out if we have been forwarded to a local | |
131 | * socket. If so, then we should somehow "loop back" to | |
132 | * ip_input(), and get directed to the PCB as if we had received | |
133 | * this packet. This is because it may be difficult to identify | |
134 | * the packets you want to forward until they are being output | |
135 | * and have selected an interface (e.g. locally initiated | |
136 | * packets). If we used the loopback inteface, we would not be | |
137 | * able to control what happens as the packet runs through | |
138 | * ip_input() as it is done through a ISR. | |
139 | */ | |
140 | LIST_FOREACH(iac, INADDR_HASH(dst->sin_addr.s_addr), ia_hash) { | |
141 | /* | |
142 | * If the addr to forward to is one of ours, we pretend | |
143 | * to be the destination for this packet. | |
144 | */ | |
145 | if (IA_SIN(iac->ia)->sin_addr.s_addr == dst->sin_addr.s_addr) | |
146 | break; | |
147 | } | |
148 | if (iac != NULL) { | |
47d96de7 | 149 | struct ip *ip; |
768b3631 SZ |
150 | |
151 | if (m->m_pkthdr.rcvif == NULL) | |
152 | m->m_pkthdr.rcvif = ifunit("lo0"); | |
153 | if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { | |
154 | m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | | |
155 | CSUM_PSEUDO_HDR; | |
156 | m->m_pkthdr.csum_data = 0xffff; | |
157 | } | |
158 | m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID; | |
159 | ||
47d96de7 SZ |
160 | /* |
161 | * Make sure that the IP header is in one mbuf, | |
162 | * required by ip_input | |
163 | */ | |
164 | if (m->m_len < hlen) { | |
165 | m = m_pullup(m, hlen); | |
166 | if (m == NULL) { | |
167 | /* The packet was freed; we are done */ | |
168 | return 1; | |
169 | } | |
170 | } | |
171 | ip = mtod(m, struct ip *); | |
172 | ||
768b3631 SZ |
173 | ip->ip_len = htons(ip->ip_len); |
174 | ip->ip_off = htons(ip->ip_off); | |
175 | ip_input(m); | |
176 | ||
47d96de7 | 177 | return 1; /* The packet gets forwarded locally */ |
768b3631 SZ |
178 | } |
179 | return 0; | |
180 | } | |
181 | ||
984263bc MD |
182 | /* |
183 | * IP output. The packet in mbuf chain m contains a skeletal IP | |
184 | * header (with len, off, ttl, proto, tos, src, dst). | |
185 | * The mbuf chain containing the packet will be freed. | |
186 | * The mbuf opt, if present, will not be freed. | |
187 | */ | |
188 | int | |
f1f552f6 | 189 | ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, |
5fe66e68 | 190 | int flags, struct ip_moptions *imo, struct inpcb *inp) |
984263bc | 191 | { |
f1f552f6 | 192 | struct ip *ip; |
984263bc MD |
193 | struct ifnet *ifp = NULL; /* keep compiler happy */ |
194 | struct mbuf *m; | |
5fe66e68 | 195 | int hlen = sizeof(struct ip); |
8c6081b9 | 196 | int len, error = 0; |
984263bc MD |
197 | struct sockaddr_in *dst = NULL; /* keep compiler happy */ |
198 | struct in_ifaddr *ia = NULL; | |
199 | int isbroadcast, sw_csum; | |
200 | struct in_addr pkt_dst; | |
984263bc | 201 | struct route iproute; |
4d935b66 | 202 | struct m_tag *mtag; |
fade9ce3 | 203 | #ifdef IPSEC |
984263bc MD |
204 | struct secpolicy *sp = NULL; |
205 | struct socket *so = inp ? inp->inp_socket : NULL; | |
206 | #endif | |
207 | #ifdef FAST_IPSEC | |
984263bc MD |
208 | struct secpolicy *sp = NULL; |
209 | struct tdb_ident *tdbi; | |
984263bc | 210 | #endif /* FAST_IPSEC */ |
5de23090 | 211 | struct sockaddr_in *next_hop = NULL; |
984263bc MD |
212 | int src_was_INADDR_ANY = 0; /* as the name says... */ |
213 | ||
984263bc | 214 | m = m0; |
814907cb | 215 | M_ASSERTPKTHDR(m); |
fade9ce3 | 216 | |
4d935b66 SZ |
217 | if (ro == NULL) { |
218 | ro = &iproute; | |
219 | bzero(ro, sizeof *ro); | |
3eef1c4e | 220 | } else if (ro->ro_rt != NULL && ro->ro_rt->rt_cpuid != mycpuid) { |
facaabe1 SZ |
221 | if (flags & IP_DEBUGROUTE) { |
222 | if (route_assert_owner_access) { | |
223 | panic("ip_output: " | |
224 | "rt rt_cpuid %d accessed on cpu %d\n", | |
225 | ro->ro_rt->rt_cpuid, mycpuid); | |
226 | } else { | |
227 | kprintf("ip_output: " | |
228 | "rt rt_cpuid %d accessed on cpu %d\n", | |
229 | ro->ro_rt->rt_cpuid, mycpuid); | |
1e5fb84b | 230 | print_backtrace(); |
facaabe1 SZ |
231 | } |
232 | } | |
233 | ||
3eef1c4e SZ |
234 | /* |
235 | * XXX | |
236 | * If the cached rtentry's owner CPU is not the current CPU, | |
237 | * then don't touch the cached rtentry (remote free is too | |
238 | * expensive in this context); just relocate the route. | |
239 | */ | |
240 | ro = &iproute; | |
241 | bzero(ro, sizeof *ro); | |
4d935b66 SZ |
242 | } |
243 | ||
5de23090 | 244 | if (m->m_pkthdr.fw_flags & IPFORWARD_MBUF_TAGGED) { |
eb241549 | 245 | /* Next hop */ |
5de23090 SZ |
246 | mtag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); |
247 | KKASSERT(mtag != NULL); | |
248 | next_hop = m_tag_data(mtag); | |
249 | } | |
250 | ||
eb241549 SZ |
251 | if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED) { |
252 | struct dn_pkt *dn_pkt; | |
253 | ||
254 | /* Extract info from dummynet tag */ | |
4d935b66 SZ |
255 | mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); |
256 | KKASSERT(mtag != NULL); | |
257 | dn_pkt = m_tag_data(mtag); | |
4c7020ad SZ |
258 | |
259 | /* | |
260 | * The packet was already tagged, so part of the | |
261 | * processing was already done, and we need to go down. | |
4d935b66 | 262 | * Get the calculated parameters from the tag. |
4c7020ad | 263 | */ |
4c7020ad | 264 | ifp = dn_pkt->ifp; |
4c7020ad | 265 | |
4d935b66 SZ |
266 | KKASSERT(ro == &iproute); |
267 | *ro = dn_pkt->ro; /* structure copy */ | |
3eef1c4e | 268 | KKASSERT(ro->ro_rt == NULL || ro->ro_rt->rt_cpuid == mycpuid); |
4c7020ad | 269 | |
4d935b66 SZ |
270 | dst = dn_pkt->dn_dst; |
271 | if (dst == (struct sockaddr_in *)&(dn_pkt->ro.ro_dst)) { | |
272 | /* If 'dst' points into dummynet tag, adjust it */ | |
273 | dst = (struct sockaddr_in *)&(ro->ro_dst); | |
274 | } | |
984263bc | 275 | |
984263bc MD |
276 | ip = mtod(m, struct ip *); |
277 | hlen = IP_VHL_HL(ip->ip_vhl) << 2 ; | |
278 | if (ro->ro_rt) | |
279 | ia = ifatoia(ro->ro_rt->rt_ifa); | |
280 | goto sendit; | |
281 | } | |
282 | ||
283 | if (opt) { | |
284 | len = 0; | |
285 | m = ip_insertoptions(m, opt, &len); | |
286 | if (len != 0) | |
287 | hlen = len; | |
288 | } | |
289 | ip = mtod(m, struct ip *); | |
984263bc MD |
290 | |
291 | /* | |
292 | * Fill in IP header. | |
293 | */ | |
f23061d4 | 294 | if (!(flags & (IP_FORWARDING|IP_RAWOUTPUT))) { |
984263bc MD |
295 | ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2); |
296 | ip->ip_off &= IP_DF; | |
6277137d | 297 | ip->ip_id = ip_newid(); |
984263bc MD |
298 | ipstat.ips_localout++; |
299 | } else { | |
300 | hlen = IP_VHL_HL(ip->ip_vhl) << 2; | |
301 | } | |
302 | ||
768b3631 SZ |
303 | reroute: |
304 | pkt_dst = next_hop ? next_hop->sin_addr : ip->ip_dst; | |
305 | ||
87b66be9 SZ |
306 | #ifdef INVARIANTS |
307 | if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) { | |
308 | /* | |
309 | * XXX | |
310 | * Multicast is not MPSAFE yet. Caller must hold | |
311 | * BGL when output a multicast IP packet. | |
312 | */ | |
313 | ASSERT_MP_LOCK_HELD(curthread); | |
314 | } | |
315 | #endif | |
316 | ||
984263bc MD |
317 | dst = (struct sockaddr_in *)&ro->ro_dst; |
318 | /* | |
319 | * If there is a cached route, | |
320 | * check that it is to the same destination | |
321 | * and is still up. If not, free it and try again. | |
322 | * The address family should also be checked in case of sharing the | |
323 | * cache with IPv6. | |
324 | */ | |
f23061d4 JH |
325 | if (ro->ro_rt && |
326 | (!(ro->ro_rt->rt_flags & RTF_UP) || | |
327 | dst->sin_family != AF_INET || | |
328 | dst->sin_addr.s_addr != pkt_dst.s_addr)) { | |
5fe66e68 | 329 | rtfree(ro->ro_rt); |
f23061d4 | 330 | ro->ro_rt = (struct rtentry *)NULL; |
984263bc | 331 | } |
f23061d4 | 332 | if (ro->ro_rt == NULL) { |
5fe66e68 | 333 | bzero(dst, sizeof *dst); |
984263bc | 334 | dst->sin_family = AF_INET; |
5fe66e68 | 335 | dst->sin_len = sizeof *dst; |
984263bc MD |
336 | dst->sin_addr = pkt_dst; |
337 | } | |
338 | /* | |
339 | * If routing to interface only, | |
340 | * short circuit routing lookup. | |
341 | */ | |
342 | if (flags & IP_ROUTETOIF) { | |
f23061d4 JH |
343 | if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL && |
344 | (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == NULL) { | |
984263bc MD |
345 | ipstat.ips_noroute++; |
346 | error = ENETUNREACH; | |
347 | goto bad; | |
348 | } | |
349 | ifp = ia->ia_ifp; | |
350 | ip->ip_ttl = 1; | |
351 | isbroadcast = in_broadcast(dst->sin_addr, ifp); | |
f26d3013 | 352 | } else if (IN_MULTICAST(ntohl(pkt_dst.s_addr)) && |
f23061d4 | 353 | imo != NULL && imo->imo_multicast_ifp != NULL) { |
984263bc MD |
354 | /* |
355 | * Bypass the normal routing lookup for multicast | |
356 | * packets if the interface is specified. | |
357 | */ | |
358 | ifp = imo->imo_multicast_ifp; | |
1b562c24 | 359 | ia = IFP_TO_IA(ifp); |
984263bc MD |
360 | isbroadcast = 0; /* fool gcc */ |
361 | } else { | |
362 | /* | |
363 | * If this is the case, we probably don't want to allocate | |
364 | * a protocol-cloned route since we didn't get one from the | |
365 | * ULP. This lets TCP do its thing, while not burdening | |
366 | * forwarding or ICMP with the overhead of cloning a route. | |
367 | * Of course, we still want to do any cloning requested by | |
368 | * the link layer, as this is probably required in all cases | |
369 | * for correct operation (as it is for ARP). | |
370 | */ | |
f23061d4 | 371 | if (ro->ro_rt == NULL) |
984263bc | 372 | rtalloc_ign(ro, RTF_PRCLONING); |
f23061d4 | 373 | if (ro->ro_rt == NULL) { |
984263bc MD |
374 | ipstat.ips_noroute++; |
375 | error = EHOSTUNREACH; | |
376 | goto bad; | |
377 | } | |
378 | ia = ifatoia(ro->ro_rt->rt_ifa); | |
379 | ifp = ro->ro_rt->rt_ifp; | |
380 | ro->ro_rt->rt_use++; | |
381 | if (ro->ro_rt->rt_flags & RTF_GATEWAY) | |
382 | dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway; | |
383 | if (ro->ro_rt->rt_flags & RTF_HOST) | |
384 | isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST); | |
385 | else | |
386 | isbroadcast = in_broadcast(dst->sin_addr, ifp); | |
387 | } | |
388 | if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) { | |
389 | struct in_multi *inm; | |
390 | ||
391 | m->m_flags |= M_MCAST; | |
392 | /* | |
393 | * IP destination address is multicast. Make sure "dst" | |
394 | * still points to the address in "ro". (It may have been | |
395 | * changed to point to a gateway address, above.) | |
396 | */ | |
397 | dst = (struct sockaddr_in *)&ro->ro_dst; | |
398 | /* | |
399 | * See if the caller provided any multicast options | |
400 | */ | |
401 | if (imo != NULL) { | |
402 | ip->ip_ttl = imo->imo_multicast_ttl; | |
459837b1 | 403 | if (imo->imo_multicast_vif != -1) { |
984263bc MD |
404 | ip->ip_src.s_addr = |
405 | ip_mcast_src ? | |
406 | ip_mcast_src(imo->imo_multicast_vif) : | |
407 | INADDR_ANY; | |
459837b1 SZ |
408 | } |
409 | } else { | |
984263bc | 410 | ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; |
459837b1 | 411 | } |
984263bc MD |
412 | /* |
413 | * Confirm that the outgoing interface supports multicast. | |
414 | */ | |
415 | if ((imo == NULL) || (imo->imo_multicast_vif == -1)) { | |
f23061d4 | 416 | if (!(ifp->if_flags & IFF_MULTICAST)) { |
984263bc MD |
417 | ipstat.ips_noroute++; |
418 | error = ENETUNREACH; | |
419 | goto bad; | |
420 | } | |
421 | } | |
422 | /* | |
423 | * If source address not specified yet, use address | |
424 | * of outgoing interface. | |
425 | */ | |
426 | if (ip->ip_src.s_addr == INADDR_ANY) { | |
427 | /* Interface may have no addresses. */ | |
428 | if (ia != NULL) | |
429 | ip->ip_src = IA_SIN(ia)->sin_addr; | |
430 | } | |
431 | ||
432 | IN_LOOKUP_MULTI(pkt_dst, ifp, inm); | |
433 | if (inm != NULL && | |
459837b1 | 434 | (imo == NULL || imo->imo_multicast_loop)) { |
984263bc MD |
435 | /* |
436 | * If we belong to the destination multicast group | |
437 | * on the outgoing interface, and the caller did not | |
438 | * forbid loopback, loop back a copy. | |
439 | */ | |
440 | ip_mloopback(ifp, m, dst, hlen); | |
459837b1 | 441 | } else { |
984263bc MD |
442 | /* |
443 | * If we are acting as a multicast router, perform | |
444 | * multicast forwarding as if the packet had just | |
445 | * arrived on the interface to which we are about | |
446 | * to send. The multicast forwarding function | |
447 | * recursively calls this function, using the | |
448 | * IP_FORWARDING flag to prevent infinite recursion. | |
449 | * | |
450 | * Multicasts that are looped back by ip_mloopback(), | |
451 | * above, will be forwarded by the ip_input() routine, | |
452 | * if necessary. | |
453 | */ | |
f23061d4 | 454 | if (ip_mrouter && !(flags & IP_FORWARDING)) { |
984263bc MD |
455 | /* |
456 | * If rsvp daemon is not running, do not | |
457 | * set ip_moptions. This ensures that the packet | |
458 | * is multicast and not just sent down one link | |
459 | * as prescribed by rsvpd. | |
460 | */ | |
461 | if (!rsvp_on) | |
462 | imo = NULL; | |
463 | if (ip_mforward && | |
464 | ip_mforward(ip, ifp, m, imo) != 0) { | |
465 | m_freem(m); | |
466 | goto done; | |
467 | } | |
468 | } | |
469 | } | |
470 | ||
471 | /* | |
472 | * Multicasts with a time-to-live of zero may be looped- | |
473 | * back, above, but must not be transmitted on a network. | |
474 | * Also, multicasts addressed to the loopback interface | |
475 | * are not sent -- the above call to ip_mloopback() will | |
476 | * loop back a copy if this host actually belongs to the | |
477 | * destination group on the loopback interface. | |
478 | */ | |
479 | if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) { | |
480 | m_freem(m); | |
481 | goto done; | |
482 | } | |
483 | ||
484 | goto sendit; | |
f26d3013 SZ |
485 | } else { |
486 | m->m_flags &= ~M_MCAST; | |
984263bc | 487 | } |
768b3631 | 488 | |
984263bc MD |
489 | /* |
490 | * If the source address is not specified yet, use the address | |
491 | * of the outoing interface. In case, keep note we did that, so | |
492 | * if the the firewall changes the next-hop causing the output | |
493 | * interface to change, we can fix that. | |
494 | */ | |
768b3631 | 495 | if (ip->ip_src.s_addr == INADDR_ANY || src_was_INADDR_ANY) { |
984263bc MD |
496 | /* Interface may have no addresses. */ |
497 | if (ia != NULL) { | |
498 | ip->ip_src = IA_SIN(ia)->sin_addr; | |
499 | src_was_INADDR_ANY = 1; | |
500 | } | |
501 | } | |
768b3631 | 502 | |
4d723e5a JS |
503 | #ifdef ALTQ |
504 | /* | |
505 | * Disable packet drop hack. | |
506 | * Packetdrop should be done by queueing. | |
507 | */ | |
508 | #else /* !ALTQ */ | |
984263bc MD |
509 | /* |
510 | * Verify that we have any chance at all of being able to queue | |
511 | * the packet or packet fragments | |
512 | */ | |
513 | if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >= | |
459837b1 SZ |
514 | ifp->if_snd.ifq_maxlen) { |
515 | error = ENOBUFS; | |
516 | ipstat.ips_odropped++; | |
517 | goto bad; | |
984263bc | 518 | } |
4d723e5a | 519 | #endif /* !ALTQ */ |
984263bc MD |
520 | |
521 | /* | |
522 | * Look for broadcast address and | |
523 | * verify user is allowed to send | |
524 | * such a packet. | |
525 | */ | |
526 | if (isbroadcast) { | |
f23061d4 | 527 | if (!(ifp->if_flags & IFF_BROADCAST)) { |
984263bc MD |
528 | error = EADDRNOTAVAIL; |
529 | goto bad; | |
530 | } | |
f23061d4 | 531 | if (!(flags & IP_ALLOWBROADCAST)) { |
984263bc MD |
532 | error = EACCES; |
533 | goto bad; | |
534 | } | |
535 | /* don't allow broadcast messages to be fragmented */ | |
f1f552f6 | 536 | if (ip->ip_len > ifp->if_mtu) { |
984263bc MD |
537 | error = EMSGSIZE; |
538 | goto bad; | |
539 | } | |
540 | m->m_flags |= M_BCAST; | |
541 | } else { | |
542 | m->m_flags &= ~M_BCAST; | |
543 | } | |
544 | ||
545 | sendit: | |
546 | #ifdef IPSEC | |
547 | /* get SP for this packet */ | |
548 | if (so == NULL) | |
549 | sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error); | |
550 | else | |
551 | sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error); | |
552 | ||
553 | if (sp == NULL) { | |
554 | ipsecstat.out_inval++; | |
555 | goto bad; | |
556 | } | |
557 | ||
558 | error = 0; | |
559 | ||
560 | /* check policy */ | |
561 | switch (sp->policy) { | |
562 | case IPSEC_POLICY_DISCARD: | |
563 | /* | |
564 | * This packet is just discarded. | |
565 | */ | |
566 | ipsecstat.out_polvio++; | |
567 | goto bad; | |
568 | ||
569 | case IPSEC_POLICY_BYPASS: | |
570 | case IPSEC_POLICY_NONE: | |
571 | /* no need to do IPsec. */ | |
572 | goto skip_ipsec; | |
5fe66e68 | 573 | |
984263bc MD |
574 | case IPSEC_POLICY_IPSEC: |
575 | if (sp->req == NULL) { | |
576 | /* acquire a policy */ | |
577 | error = key_spdacquire(sp); | |
578 | goto bad; | |
579 | } | |
580 | break; | |
581 | ||
582 | case IPSEC_POLICY_ENTRUST: | |
583 | default: | |
a6ec04bc | 584 | kprintf("ip_output: Invalid policy found. %d\n", sp->policy); |
984263bc MD |
585 | } |
586 | { | |
587 | struct ipsec_output_state state; | |
5fe66e68 | 588 | bzero(&state, sizeof state); |
984263bc MD |
589 | state.m = m; |
590 | if (flags & IP_ROUTETOIF) { | |
591 | state.ro = &iproute; | |
5fe66e68 | 592 | bzero(&iproute, sizeof iproute); |
984263bc MD |
593 | } else |
594 | state.ro = ro; | |
595 | state.dst = (struct sockaddr *)dst; | |
596 | ||
597 | ip->ip_sum = 0; | |
598 | ||
599 | /* | |
600 | * XXX | |
601 | * delayed checksums are not currently compatible with IPsec | |
602 | */ | |
603 | if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { | |
604 | in_delayed_cksum(m); | |
605 | m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; | |
606 | } | |
607 | ||
608 | ip->ip_len = htons(ip->ip_len); | |
609 | ip->ip_off = htons(ip->ip_off); | |
610 | ||
611 | error = ipsec4_output(&state, sp, flags); | |
612 | ||
613 | m = state.m; | |
614 | if (flags & IP_ROUTETOIF) { | |
615 | /* | |
616 | * if we have tunnel mode SA, we may need to ignore | |
617 | * IP_ROUTETOIF. | |
618 | */ | |
619 | if (state.ro != &iproute || state.ro->ro_rt != NULL) { | |
620 | flags &= ~IP_ROUTETOIF; | |
621 | ro = state.ro; | |
622 | } | |
623 | } else | |
624 | ro = state.ro; | |
625 | dst = (struct sockaddr_in *)state.dst; | |
626 | if (error) { | |
627 | /* mbuf is already reclaimed in ipsec4_output. */ | |
628 | m0 = NULL; | |
629 | switch (error) { | |
630 | case EHOSTUNREACH: | |
631 | case ENETUNREACH: | |
632 | case EMSGSIZE: | |
633 | case ENOBUFS: | |
634 | case ENOMEM: | |
635 | break; | |
636 | default: | |
a6ec04bc | 637 | kprintf("ip4_output (ipsec): error code %d\n", error); |
984263bc MD |
638 | /*fall through*/ |
639 | case ENOENT: | |
640 | /* don't show these error codes to the user */ | |
641 | error = 0; | |
642 | break; | |
643 | } | |
644 | goto bad; | |
645 | } | |
646 | } | |
647 | ||
648 | /* be sure to update variables that are affected by ipsec4_output() */ | |
649 | ip = mtod(m, struct ip *); | |
650 | #ifdef _IP_VHL | |
651 | hlen = IP_VHL_HL(ip->ip_vhl) << 2; | |
652 | #else | |
653 | hlen = ip->ip_hl << 2; | |
654 | #endif | |
655 | if (ro->ro_rt == NULL) { | |
f23061d4 | 656 | if (!(flags & IP_ROUTETOIF)) { |
a6ec04bc | 657 | kprintf("ip_output: " |
984263bc MD |
658 | "can't update route after IPsec processing\n"); |
659 | error = EHOSTUNREACH; /*XXX*/ | |
660 | goto bad; | |
661 | } | |
662 | } else { | |
663 | ia = ifatoia(ro->ro_rt->rt_ifa); | |
664 | ifp = ro->ro_rt->rt_ifp; | |
665 | } | |
666 | ||
667 | /* make it flipped, again. */ | |
668 | ip->ip_len = ntohs(ip->ip_len); | |
669 | ip->ip_off = ntohs(ip->ip_off); | |
670 | skip_ipsec: | |
671 | #endif /*IPSEC*/ | |
672 | #ifdef FAST_IPSEC | |
673 | /* | |
674 | * Check the security policy (SP) for the packet and, if | |
675 | * required, do IPsec-related processing. There are two | |
676 | * cases here; the first time a packet is sent through | |
677 | * it will be untagged and handled by ipsec4_checkpolicy. | |
678 | * If the packet is resubmitted to ip_output (e.g. after | |
679 | * AH, ESP, etc. processing), there will be a tag to bypass | |
680 | * the lookup and related policy checking. | |
681 | */ | |
682 | mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL); | |
1cae611f | 683 | crit_enter(); |
984263bc | 684 | if (mtag != NULL) { |
d031aa80 | 685 | tdbi = (struct tdb_ident *)m_tag_data(mtag); |
984263bc MD |
686 | sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND); |
687 | if (sp == NULL) | |
688 | error = -EINVAL; /* force silent drop */ | |
689 | m_tag_delete(m, mtag); | |
690 | } else { | |
691 | sp = ipsec4_checkpolicy(m, IPSEC_DIR_OUTBOUND, flags, | |
692 | &error, inp); | |
693 | } | |
694 | /* | |
695 | * There are four return cases: | |
f23061d4 | 696 | * sp != NULL apply IPsec policy |
984263bc MD |
697 | * sp == NULL, error == 0 no IPsec handling needed |
698 | * sp == NULL, error == -EINVAL discard packet w/o error | |
699 | * sp == NULL, error != 0 discard packet, report error | |
700 | */ | |
701 | if (sp != NULL) { | |
702 | /* Loop detection, check if ipsec processing already done */ | |
703 | KASSERT(sp->req != NULL, ("ip_output: no ipsec request")); | |
704 | for (mtag = m_tag_first(m); mtag != NULL; | |
705 | mtag = m_tag_next(m, mtag)) { | |
706 | if (mtag->m_tag_cookie != MTAG_ABI_COMPAT) | |
707 | continue; | |
708 | if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE && | |
709 | mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED) | |
710 | continue; | |
711 | /* | |
712 | * Check if policy has an SA associated with it. | |
713 | * This can happen when an SP has yet to acquire | |
714 | * an SA; e.g. on first reference. If it occurs, | |
715 | * then we let ipsec4_process_packet do its thing. | |
716 | */ | |
717 | if (sp->req->sav == NULL) | |
718 | break; | |
d031aa80 | 719 | tdbi = (struct tdb_ident *)m_tag_data(mtag); |
984263bc MD |
720 | if (tdbi->spi == sp->req->sav->spi && |
721 | tdbi->proto == sp->req->sav->sah->saidx.proto && | |
722 | bcmp(&tdbi->dst, &sp->req->sav->sah->saidx.dst, | |
5fe66e68 | 723 | sizeof(union sockaddr_union)) == 0) { |
984263bc MD |
724 | /* |
725 | * No IPsec processing is needed, free | |
726 | * reference to SP. | |
727 | * | |
728 | * NB: null pointer to avoid free at | |
729 | * done: below. | |
730 | */ | |
731 | KEY_FREESP(&sp), sp = NULL; | |
1cae611f | 732 | crit_exit(); |
984263bc MD |
733 | goto spd_done; |
734 | } | |
735 | } | |
736 | ||
737 | /* | |
738 | * Do delayed checksums now because we send before | |
739 | * this is done in the normal processing path. | |
740 | */ | |
741 | if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { | |
742 | in_delayed_cksum(m); | |
743 | m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; | |
744 | } | |
745 | ||
746 | ip->ip_len = htons(ip->ip_len); | |
747 | ip->ip_off = htons(ip->ip_off); | |
748 | ||
749 | /* NB: callee frees mbuf */ | |
750 | error = ipsec4_process_packet(m, sp->req, flags, 0); | |
751 | /* | |
752 | * Preserve KAME behaviour: ENOENT can be returned | |
753 | * when an SA acquire is in progress. Don't propagate | |
754 | * this to user-level; it confuses applications. | |
755 | * | |
756 | * XXX this will go away when the SADB is redone. | |
757 | */ | |
758 | if (error == ENOENT) | |
759 | error = 0; | |
1cae611f | 760 | crit_exit(); |
984263bc MD |
761 | goto done; |
762 | } else { | |
1cae611f | 763 | crit_exit(); |
984263bc MD |
764 | |
765 | if (error != 0) { | |
766 | /* | |
767 | * Hack: -EINVAL is used to signal that a packet | |
768 | * should be silently discarded. This is typically | |
769 | * because we asked key management for an SA and | |
770 | * it was delayed (e.g. kicked up to IKE). | |
771 | */ | |
772 | if (error == -EINVAL) | |
773 | error = 0; | |
774 | goto bad; | |
775 | } else { | |
776 | /* No IPsec processing for this packet. */ | |
777 | } | |
778 | #ifdef notyet | |
779 | /* | |
780 | * If deferred crypto processing is needed, check that | |
781 | * the interface supports it. | |
f23061d4 | 782 | */ |
984263bc | 783 | mtag = m_tag_find(m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL); |
f23061d4 | 784 | if (mtag != NULL && !(ifp->if_capenable & IFCAP_IPSEC)) { |
984263bc | 785 | /* notify IPsec to do its own crypto */ |
d031aa80 | 786 | ipsp_skipcrypto_unmark((struct tdb_ident *)m_tag_data(mtag)); |
984263bc MD |
787 | error = EHOSTUNREACH; |
788 | goto bad; | |
789 | } | |
790 | #endif | |
791 | } | |
792 | spd_done: | |
793 | #endif /* FAST_IPSEC */ | |
768b3631 SZ |
794 | |
795 | /* We are already being fwd'd from a firewall. */ | |
796 | if (next_hop != NULL) | |
797 | goto pass; | |
798 | ||
a93c9c2f SZ |
799 | /* No pfil hooks */ |
800 | if (!pfil_has_hooks(&inet_pfil_hook)) { | |
801 | if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED) { | |
802 | /* | |
803 | * Strip dummynet tags from stranded packets | |
804 | */ | |
805 | mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); | |
806 | KKASSERT(mtag != NULL); | |
807 | m_tag_delete(m, mtag); | |
808 | m->m_pkthdr.fw_flags &= ~DUMMYNET_MBUF_TAGGED; | |
809 | } | |
810 | goto pass; | |
811 | } | |
812 | ||
984263bc MD |
813 | /* |
814 | * IpHack's section. | |
815 | * - Xlate: translate packet's addr/port (NAT). | |
816 | * - Firewall: deny/allow/etc. | |
817 | * - Wrap: fake packet's addr/port <unimpl.> | |
818 | * - Encapsulate: put it in another IP and send out. <unimp.> | |
f23061d4 | 819 | */ |
5e3f3b7a | 820 | |
e7e55f42 JR |
821 | /* |
822 | * Run through list of hooks for output packets. | |
823 | */ | |
a93c9c2f SZ |
824 | error = pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT); |
825 | if (error != 0 || m == NULL) | |
826 | goto done; | |
827 | ip = mtod(m, struct ip *); | |
984263bc | 828 | |
768b3631 | 829 | if (m->m_pkthdr.fw_flags & IPFORWARD_MBUF_TAGGED) { |
984263bc MD |
830 | /* |
831 | * Check dst to make sure it is directly reachable on the | |
832 | * interface we previously thought it was. | |
833 | * If it isn't (which may be likely in some situations) we have | |
834 | * to re-route it (ie, find a route for the next-hop and the | |
835 | * associated interface) and set them here. This is nested | |
836 | * forwarding which in most cases is undesirable, except where | |
837 | * such control is nigh impossible. So we do it here. | |
838 | * And I'm babbling. | |
839 | */ | |
768b3631 SZ |
840 | mtag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); |
841 | KKASSERT(mtag != NULL); | |
842 | next_hop = m_tag_data(mtag); | |
984263bc | 843 | |
768b3631 SZ |
844 | /* |
845 | * Try local forwarding first | |
846 | */ | |
47d96de7 | 847 | if (ip_localforward(m, next_hop, hlen)) |
768b3631 | 848 | goto done; |
984263bc | 849 | |
768b3631 SZ |
850 | /* |
851 | * Relocate the route based on next_hop. | |
852 | * If the current route is inp's cache, keep it untouched. | |
853 | */ | |
854 | if (ro == &iproute && ro->ro_rt != NULL) { | |
855 | RTFREE(ro->ro_rt); | |
856 | ro->ro_rt = NULL; | |
857 | } | |
858 | ro = &iproute; | |
859 | bzero(ro, sizeof *ro); | |
984263bc | 860 | |
768b3631 SZ |
861 | /* |
862 | * Forwarding to broadcast address is not allowed. | |
863 | * XXX Should we follow IP_ROUTETOIF? | |
864 | */ | |
865 | flags &= ~(IP_ALLOWBROADCAST | IP_ROUTETOIF); | |
984263bc | 866 | |
768b3631 SZ |
867 | /* We are doing forwarding now */ |
868 | flags |= IP_FORWARDING; | |
984263bc | 869 | |
768b3631 | 870 | goto reroute; |
984263bc MD |
871 | } |
872 | ||
e4d4f9c3 SZ |
873 | if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED) { |
874 | struct dn_pkt *dn_pkt; | |
875 | ||
876 | mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); | |
877 | KKASSERT(mtag != NULL); | |
878 | dn_pkt = m_tag_data(mtag); | |
879 | ||
880 | /* | |
881 | * Under certain cases it is not possible to recalculate | |
882 | * 'ro' and 'dst', let alone 'flags', so just save them in | |
883 | * dummynet tag and avoid the possible wrong reculcalation | |
884 | * when we come back to ip_output() again. | |
885 | * | |
886 | * All other parameters have been already used and so they | |
887 | * are not needed anymore. | |
888 | * XXX if the ifp is deleted while a pkt is in dummynet, | |
889 | * we are in trouble! (TODO use ifnet_detach_event) | |
890 | * | |
891 | * We need to copy *ro because for ICMP pkts (and maybe | |
892 | * others) the caller passed a pointer into the stack; | |
893 | * dst might also be a pointer into *ro so it needs to | |
894 | * be updated. | |
895 | */ | |
896 | dn_pkt->ro = *ro; | |
897 | if (ro->ro_rt) | |
898 | ro->ro_rt->rt_refcnt++; | |
899 | if (dst == (struct sockaddr_in *)&ro->ro_dst) { | |
900 | /* 'dst' points into 'ro' */ | |
901 | dst = (struct sockaddr_in *)&(dn_pkt->ro.ro_dst); | |
902 | } | |
903 | dn_pkt->dn_dst = dst; | |
904 | dn_pkt->flags = flags; | |
905 | ||
906 | ip_dn_queue(m); | |
907 | goto done; | |
908 | } | |
984263bc MD |
909 | pass: |
910 | /* 127/8 must not appear on wire - RFC1122. */ | |
911 | if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || | |
912 | (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { | |
f23061d4 | 913 | if (!(ifp->if_flags & IFF_LOOPBACK)) { |
984263bc MD |
914 | ipstat.ips_badaddr++; |
915 | error = EADDRNOTAVAIL; | |
916 | goto bad; | |
917 | } | |
918 | } | |
919 | ||
920 | m->m_pkthdr.csum_flags |= CSUM_IP; | |
921 | sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist; | |
922 | if (sw_csum & CSUM_DELAY_DATA) { | |
923 | in_delayed_cksum(m); | |
924 | sw_csum &= ~CSUM_DELAY_DATA; | |
925 | } | |
926 | m->m_pkthdr.csum_flags &= ifp->if_hwassist; | |
927 | ||
928 | /* | |
929 | * If small enough for interface, or the interface will take | |
930 | * care of the fragmentation for us, can just send directly. | |
931 | */ | |
7fb962be | 932 | if (ip->ip_len <= ifp->if_mtu || ((ifp->if_hwassist & CSUM_FRAGMENT) && |
f23061d4 | 933 | !(ip->ip_off & IP_DF))) { |
984263bc MD |
934 | ip->ip_len = htons(ip->ip_len); |
935 | ip->ip_off = htons(ip->ip_off); | |
936 | ip->ip_sum = 0; | |
937 | if (sw_csum & CSUM_DELAY_IP) { | |
459837b1 | 938 | if (ip->ip_vhl == IP_VHL_BORING) |
984263bc | 939 | ip->ip_sum = in_cksum_hdr(ip); |
459837b1 | 940 | else |
984263bc | 941 | ip->ip_sum = in_cksum(m, hlen); |
984263bc MD |
942 | } |
943 | ||
944 | /* Record statistics for this interface address. */ | |
945 | if (!(flags & IP_FORWARDING) && ia) { | |
946 | ia->ia_ifa.if_opackets++; | |
947 | ia->ia_ifa.if_obytes += m->m_pkthdr.len; | |
948 | } | |
949 | ||
950 | #ifdef IPSEC | |
951 | /* clean ipsec history once it goes out of the node */ | |
952 | ipsec_delaux(m); | |
953 | #endif | |
954 | ||
955 | #ifdef MBUF_STRESS_TEST | |
956 | if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size) { | |
957 | struct mbuf *m1, *m2; | |
958 | int length, tmp; | |
959 | ||
960 | tmp = length = m->m_pkthdr.len; | |
961 | ||
962 | while ((length -= mbuf_frag_size) >= 1) { | |
74f1caca | 963 | m1 = m_split(m, length, MB_DONTWAIT); |
984263bc MD |
964 | if (m1 == NULL) |
965 | break; | |
984263bc MD |
966 | m2 = m; |
967 | while (m2->m_next != NULL) | |
968 | m2 = m2->m_next; | |
969 | m2->m_next = m1; | |
970 | } | |
971 | m->m_pkthdr.len = tmp; | |
972 | } | |
973 | #endif | |
9b42cabe NA |
974 | |
975 | #ifdef MPLS | |
cb8d752c | 976 | if (!mpls_output_process(m, ro->ro_rt)) |
9b42cabe NA |
977 | goto done; |
978 | #endif | |
9db4b353 SZ |
979 | error = ifp->if_output(ifp, m, (struct sockaddr *)dst, |
980 | ro->ro_rt); | |
984263bc MD |
981 | goto done; |
982 | } | |
f1f552f6 | 983 | |
984263bc MD |
984 | if (ip->ip_off & IP_DF) { |
985 | error = EMSGSIZE; | |
986 | /* | |
987 | * This case can happen if the user changed the MTU | |
988 | * of an interface after enabling IP on it. Because | |
989 | * most netifs don't keep track of routes pointing to | |
990 | * them, there is no way for one to update all its | |
991 | * routes when the MTU is changed. | |
992 | */ | |
f1f552f6 JH |
993 | if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) && |
994 | !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) && | |
995 | (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) { | |
984263bc MD |
996 | ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; |
997 | } | |
998 | ipstat.ips_cantfrag++; | |
999 | goto bad; | |
1000 | } | |
f1f552f6 JH |
1001 | |
1002 | /* | |
1003 | * Too large for interface; fragment if possible. If successful, | |
1004 | * on return, m will point to a list of packets to be sent. | |
1005 | */ | |
1006 | error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist, sw_csum); | |
1007 | if (error) | |
984263bc | 1008 | goto bad; |
f1f552f6 JH |
1009 | for (; m; m = m0) { |
1010 | m0 = m->m_nextpkt; | |
f23061d4 | 1011 | m->m_nextpkt = NULL; |
f1f552f6 JH |
1012 | #ifdef IPSEC |
1013 | /* clean ipsec history once it goes out of the node */ | |
1014 | ipsec_delaux(m); | |
1015 | #endif | |
1016 | if (error == 0) { | |
1017 | /* Record statistics for this interface address. */ | |
1018 | if (ia != NULL) { | |
1019 | ia->ia_ifa.if_opackets++; | |
1020 | ia->ia_ifa.if_obytes += m->m_pkthdr.len; | |
1021 | } | |
9b42cabe | 1022 | #ifdef MPLS |
cb8d752c | 1023 | if (!mpls_output_process(m, ro->ro_rt)) |
540ba0ec | 1024 | continue; |
9b42cabe | 1025 | #endif |
9db4b353 SZ |
1026 | error = ifp->if_output(ifp, m, (struct sockaddr *)dst, |
1027 | ro->ro_rt); | |
78195a76 | 1028 | } else { |
f1f552f6 | 1029 | m_freem(m); |
78195a76 | 1030 | } |
984263bc MD |
1031 | } |
1032 | ||
f1f552f6 JH |
1033 | if (error == 0) |
1034 | ipstat.ips_fragmented++; | |
1035 | ||
1036 | done: | |
5fe66e68 | 1037 | if (ro == &iproute && ro->ro_rt != NULL) { |
f1f552f6 JH |
1038 | RTFREE(ro->ro_rt); |
1039 | ro->ro_rt = NULL; | |
1040 | } | |
fade9ce3 | 1041 | #ifdef IPSEC |
f1f552f6 JH |
1042 | if (sp != NULL) { |
1043 | KEYDEBUG(KEYDEBUG_IPSEC_STAMP, | |
a6ec04bc | 1044 | kprintf("DP ip_output call free SP:%p\n", sp)); |
f1f552f6 JH |
1045 | key_freesp(sp); |
1046 | } | |
1047 | #endif | |
1048 | #ifdef FAST_IPSEC | |
f1f552f6 JH |
1049 | if (sp != NULL) |
1050 | KEY_FREESP(&sp); | |
1051 | #endif | |
1052 | return (error); | |
1053 | bad: | |
1054 | m_freem(m); | |
1055 | goto done; | |
1056 | } | |
1057 | ||
1058 | /* | |
1059 | * Create a chain of fragments which fit the given mtu. m_frag points to the | |
1060 | * mbuf to be fragmented; on return it points to the chain with the fragments. | |
1061 | * Return 0 if no error. If error, m_frag may contain a partially built | |
1062 | * chain of fragments that should be freed by the caller. | |
1063 | * | |
1064 | * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist) | |
1065 | * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP). | |
1066 | */ | |
1067 | int | |
1068 | ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu, | |
1069 | u_long if_hwassist_flags, int sw_csum) | |
1070 | { | |
1071 | int error = 0; | |
1072 | int hlen = IP_VHL_HL(ip->ip_vhl) << 2; | |
1073 | int len = (mtu - hlen) & ~7; /* size of payload in each fragment */ | |
1074 | int off; | |
1075 | struct mbuf *m0 = *m_frag; /* the original packet */ | |
1076 | int firstlen; | |
1077 | struct mbuf **mnext; | |
1078 | int nfrags; | |
1079 | ||
1080 | if (ip->ip_off & IP_DF) { /* Fragmentation not allowed */ | |
1081 | ipstat.ips_cantfrag++; | |
1082 | return EMSGSIZE; | |
1083 | } | |
1084 | ||
1085 | /* | |
1086 | * Must be able to put at least 8 bytes per fragment. | |
1087 | */ | |
1088 | if (len < 8) | |
1089 | return EMSGSIZE; | |
1090 | ||
984263bc | 1091 | /* |
f1f552f6 | 1092 | * If the interface will not calculate checksums on |
984263bc MD |
1093 | * fragmented packets, then do it here. |
1094 | */ | |
f23061d4 JH |
1095 | if ((m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA) && |
1096 | !(if_hwassist_flags & CSUM_IP_FRAGS)) { | |
f1f552f6 JH |
1097 | in_delayed_cksum(m0); |
1098 | m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; | |
984263bc MD |
1099 | } |
1100 | ||
f1f552f6 | 1101 | if (len > PAGE_SIZE) { |
f23061d4 JH |
1102 | /* |
1103 | * Fragment large datagrams such that each segment | |
1104 | * contains a multiple of PAGE_SIZE amount of data, | |
1105 | * plus headers. This enables a receiver to perform | |
f1f552f6 JH |
1106 | * page-flipping zero-copy optimizations. |
1107 | * | |
1108 | * XXX When does this help given that sender and receiver | |
1109 | * could have different page sizes, and also mtu could | |
1110 | * be less than the receiver's page size ? | |
1111 | */ | |
1112 | int newlen; | |
1113 | struct mbuf *m; | |
1114 | ||
1115 | for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next) | |
1116 | off += m->m_len; | |
1117 | ||
1118 | /* | |
f23061d4 | 1119 | * firstlen (off - hlen) must be aligned on an |
f1f552f6 JH |
1120 | * 8-byte boundary |
1121 | */ | |
1122 | if (off < hlen) | |
1123 | goto smart_frag_failure; | |
1124 | off = ((off - hlen) & ~7) + hlen; | |
1125 | newlen = (~PAGE_MASK) & mtu; | |
5fe66e68 | 1126 | if ((newlen + sizeof(struct ip)) > mtu) { |
f1f552f6 JH |
1127 | /* we failed, go back the default */ |
1128 | smart_frag_failure: | |
1129 | newlen = len; | |
1130 | off = hlen + len; | |
1131 | } | |
1132 | len = newlen; | |
1133 | ||
1134 | } else { | |
1135 | off = hlen + len; | |
1136 | } | |
1137 | ||
1138 | firstlen = off - hlen; | |
1139 | mnext = &m0->m_nextpkt; /* pointer to next packet */ | |
984263bc MD |
1140 | |
1141 | /* | |
1142 | * Loop through length of segment after first fragment, | |
1143 | * make new header and copy data of each part and link onto chain. | |
f1f552f6 JH |
1144 | * Here, m0 is the original packet, m is the fragment being created. |
1145 | * The fragments are linked off the m_nextpkt of the original | |
1146 | * packet, which after processing serves as the first fragment. | |
984263bc | 1147 | */ |
f1f552f6 JH |
1148 | for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) { |
1149 | struct ip *mhip; /* ip header on the fragment */ | |
1150 | struct mbuf *m; | |
5fe66e68 | 1151 | int mhlen = sizeof(struct ip); |
f1f552f6 | 1152 | |
74f1caca | 1153 | MGETHDR(m, MB_DONTWAIT, MT_HEADER); |
f23061d4 | 1154 | if (m == NULL) { |
984263bc MD |
1155 | error = ENOBUFS; |
1156 | ipstat.ips_odropped++; | |
f1f552f6 | 1157 | goto done; |
984263bc MD |
1158 | } |
1159 | m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG; | |
f1f552f6 JH |
1160 | /* |
1161 | * In the first mbuf, leave room for the link header, then | |
1162 | * copy the original IP header including options. The payload | |
1163 | * goes into an additional mbuf chain returned by m_copy(). | |
1164 | */ | |
984263bc MD |
1165 | m->m_data += max_linkhdr; |
1166 | mhip = mtod(m, struct ip *); | |
1167 | *mhip = *ip; | |
5fe66e68 JH |
1168 | if (hlen > sizeof(struct ip)) { |
1169 | mhlen = ip_optcopy(ip, mhip) + sizeof(struct ip); | |
984263bc MD |
1170 | mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2); |
1171 | } | |
1172 | m->m_len = mhlen; | |
f1f552f6 | 1173 | /* XXX do we need to add ip->ip_off below ? */ |
984263bc | 1174 | mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off; |
f1f552f6 JH |
1175 | if (off + len >= ip->ip_len) { /* last fragment */ |
1176 | len = ip->ip_len - off; | |
1177 | m->m_flags |= M_LASTFRAG; | |
1178 | } else | |
984263bc MD |
1179 | mhip->ip_off |= IP_MF; |
1180 | mhip->ip_len = htons((u_short)(len + mhlen)); | |
1181 | m->m_next = m_copy(m0, off, len); | |
f23061d4 | 1182 | if (m->m_next == NULL) { /* copy failed */ |
f1f552f6 | 1183 | m_free(m); |
984263bc MD |
1184 | error = ENOBUFS; /* ??? */ |
1185 | ipstat.ips_odropped++; | |
f1f552f6 | 1186 | goto done; |
984263bc MD |
1187 | } |
1188 | m->m_pkthdr.len = mhlen + len; | |
f23061d4 | 1189 | m->m_pkthdr.rcvif = (struct ifnet *)NULL; |
984263bc MD |
1190 | m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags; |
1191 | mhip->ip_off = htons(mhip->ip_off); | |
1192 | mhip->ip_sum = 0; | |
f1f552f6 JH |
1193 | if (sw_csum & CSUM_DELAY_IP) |
1194 | mhip->ip_sum = in_cksum(m, mhlen); | |
984263bc MD |
1195 | *mnext = m; |
1196 | mnext = &m->m_nextpkt; | |
984263bc MD |
1197 | } |
1198 | ipstat.ips_ofragments += nfrags; | |
1199 | ||
f1f552f6 | 1200 | /* set first marker for fragment chain */ |
984263bc MD |
1201 | m0->m_flags |= M_FIRSTFRAG | M_FRAG; |
1202 | m0->m_pkthdr.csum_data = nfrags; | |
1203 | ||
1204 | /* | |
1205 | * Update first fragment by trimming what's been copied out | |
f1f552f6 | 1206 | * and updating header. |
984263bc | 1207 | */ |
f1f552f6 JH |
1208 | m_adj(m0, hlen + firstlen - ip->ip_len); |
1209 | m0->m_pkthdr.len = hlen + firstlen; | |
1210 | ip->ip_len = htons((u_short)m0->m_pkthdr.len); | |
984263bc MD |
1211 | ip->ip_off |= IP_MF; |
1212 | ip->ip_off = htons(ip->ip_off); | |
1213 | ip->ip_sum = 0; | |
f1f552f6 JH |
1214 | if (sw_csum & CSUM_DELAY_IP) |
1215 | ip->ip_sum = in_cksum(m0, hlen); | |
984263bc | 1216 | |
984263bc | 1217 | done: |
f1f552f6 JH |
1218 | *m_frag = m0; |
1219 | return error; | |
984263bc MD |
1220 | } |
1221 | ||
1222 | void | |
1223 | in_delayed_cksum(struct mbuf *m) | |
1224 | { | |
1225 | struct ip *ip; | |
1226 | u_short csum, offset; | |
1227 | ||
1228 | ip = mtod(m, struct ip *); | |
1229 | offset = IP_VHL_HL(ip->ip_vhl) << 2 ; | |
1230 | csum = in_cksum_skip(m, ip->ip_len, offset); | |
1231 | if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) | |
1232 | csum = 0xffff; | |
1233 | offset += m->m_pkthdr.csum_data; /* checksum offset */ | |
1234 | ||
1235 | if (offset + sizeof(u_short) > m->m_len) { | |
a6ec04bc | 1236 | kprintf("delayed m_pullup, m->len: %d off: %d p: %d\n", |
984263bc MD |
1237 | m->m_len, offset, ip->ip_p); |
1238 | /* | |
1239 | * XXX | |
1240 | * this shouldn't happen, but if it does, the | |
1241 | * correct behavior may be to insert the checksum | |
1242 | * in the existing chain instead of rearranging it. | |
1243 | */ | |
1244 | m = m_pullup(m, offset + sizeof(u_short)); | |
1245 | } | |
1246 | *(u_short *)(m->m_data + offset) = csum; | |
1247 | } | |
1248 | ||
1249 | /* | |
1250 | * Insert IP options into preformed packet. | |
1251 | * Adjust IP destination as required for IP source routing, | |
1252 | * as indicated by a non-zero in_addr at the start of the options. | |
1253 | * | |
1254 | * XXX This routine assumes that the packet has no options in place. | |
1255 | */ | |
1256 | static struct mbuf * | |
5fe66e68 | 1257 | ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen) |
984263bc | 1258 | { |
2256ba69 | 1259 | struct ipoption *p = mtod(opt, struct ipoption *); |
984263bc | 1260 | struct mbuf *n; |
2256ba69 | 1261 | struct ip *ip = mtod(m, struct ip *); |
984263bc MD |
1262 | unsigned optlen; |
1263 | ||
5fe66e68 | 1264 | optlen = opt->m_len - sizeof p->ipopt_dst; |
984263bc MD |
1265 | if (optlen + (u_short)ip->ip_len > IP_MAXPACKET) { |
1266 | *phlen = 0; | |
1267 | return (m); /* XXX should fail */ | |
1268 | } | |
1269 | if (p->ipopt_dst.s_addr) | |
1270 | ip->ip_dst = p->ipopt_dst; | |
1271 | if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { | |
74f1caca | 1272 | MGETHDR(n, MB_DONTWAIT, MT_HEADER); |
f23061d4 | 1273 | if (n == NULL) { |
984263bc MD |
1274 | *phlen = 0; |
1275 | return (m); | |
1276 | } | |
f23061d4 | 1277 | n->m_pkthdr.rcvif = (struct ifnet *)NULL; |
984263bc MD |
1278 | n->m_pkthdr.len = m->m_pkthdr.len + optlen; |
1279 | m->m_len -= sizeof(struct ip); | |
1280 | m->m_data += sizeof(struct ip); | |
1281 | n->m_next = m; | |
1282 | m = n; | |
1283 | m->m_len = optlen + sizeof(struct ip); | |
1284 | m->m_data += max_linkhdr; | |
f23061d4 | 1285 | memcpy(mtod(m, void *), ip, sizeof(struct ip)); |
984263bc MD |
1286 | } else { |
1287 | m->m_data -= optlen; | |
1288 | m->m_len += optlen; | |
1289 | m->m_pkthdr.len += optlen; | |
f23061d4 | 1290 | ovbcopy(ip, mtod(m, caddr_t), sizeof(struct ip)); |
984263bc MD |
1291 | } |
1292 | ip = mtod(m, struct ip *); | |
1293 | bcopy(p->ipopt_list, ip + 1, optlen); | |
1294 | *phlen = sizeof(struct ip) + optlen; | |
1295 | ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2); | |
1296 | ip->ip_len += optlen; | |
1297 | return (m); | |
1298 | } | |
1299 | ||
1300 | /* | |
1301 | * Copy options from ip to jp, | |
1302 | * omitting those not copied during fragmentation. | |
1303 | */ | |
1304 | int | |
5fe66e68 | 1305 | ip_optcopy(struct ip *ip, struct ip *jp) |
984263bc | 1306 | { |
2256ba69 | 1307 | u_char *cp, *dp; |
984263bc MD |
1308 | int opt, optlen, cnt; |
1309 | ||
1310 | cp = (u_char *)(ip + 1); | |
1311 | dp = (u_char *)(jp + 1); | |
5fe66e68 | 1312 | cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof(struct ip); |
984263bc MD |
1313 | for (; cnt > 0; cnt -= optlen, cp += optlen) { |
1314 | opt = cp[0]; | |
1315 | if (opt == IPOPT_EOL) | |
1316 | break; | |
1317 | if (opt == IPOPT_NOP) { | |
1318 | /* Preserve for IP mcast tunnel's LSRR alignment. */ | |
1319 | *dp++ = IPOPT_NOP; | |
1320 | optlen = 1; | |
1321 | continue; | |
1322 | } | |
1323 | ||
5fe66e68 | 1324 | KASSERT(cnt >= IPOPT_OLEN + sizeof *cp, |
984263bc MD |
1325 | ("ip_optcopy: malformed ipv4 option")); |
1326 | optlen = cp[IPOPT_OLEN]; | |
5fe66e68 | 1327 | KASSERT(optlen >= IPOPT_OLEN + sizeof *cp && optlen <= cnt, |
984263bc MD |
1328 | ("ip_optcopy: malformed ipv4 option")); |
1329 | ||
1330 | /* bogus lengths should have been caught by ip_dooptions */ | |
1331 | if (optlen > cnt) | |
1332 | optlen = cnt; | |
1333 | if (IPOPT_COPIED(opt)) { | |
1334 | bcopy(cp, dp, optlen); | |
1335 | dp += optlen; | |
1336 | } | |
1337 | } | |
1338 | for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) | |
1339 | *dp++ = IPOPT_EOL; | |
1340 | return (optlen); | |
1341 | } | |
1342 | ||
1343 | /* | |
1344 | * IP socket option processing. | |
1345 | */ | |
1346 | int | |
5fe66e68 | 1347 | ip_ctloutput(struct socket *so, struct sockopt *sopt) |
984263bc | 1348 | { |
ed894f8c | 1349 | struct inpcb *inp = so->so_pcb; |
984263bc MD |
1350 | int error, optval; |
1351 | ||
1352 | error = optval = 0; | |
1353 | if (sopt->sopt_level != IPPROTO_IP) { | |
1354 | return (EINVAL); | |
1355 | } | |
1356 | ||
1357 | switch (sopt->sopt_dir) { | |
1358 | case SOPT_SET: | |
1359 | switch (sopt->sopt_name) { | |
1360 | case IP_OPTIONS: | |
1361 | #ifdef notyet | |
1362 | case IP_RETOPTS: | |
1363 | #endif | |
1364 | { | |
1365 | struct mbuf *m; | |
1366 | if (sopt->sopt_valsize > MLEN) { | |
1367 | error = EMSGSIZE; | |
1368 | break; | |
1369 | } | |
74f1caca | 1370 | MGET(m, sopt->sopt_td ? MB_WAIT : MB_DONTWAIT, MT_HEADER); |
f23061d4 | 1371 | if (m == NULL) { |
984263bc MD |
1372 | error = ENOBUFS; |
1373 | break; | |
1374 | } | |
1375 | m->m_len = sopt->sopt_valsize; | |
e71a125f AE |
1376 | error = soopt_to_kbuf(sopt, mtod(m, void *), m->m_len, |
1377 | m->m_len); | |
984263bc MD |
1378 | return (ip_pcbopts(sopt->sopt_name, &inp->inp_options, |
1379 | m)); | |
1380 | } | |
1381 | ||
1382 | case IP_TOS: | |
1383 | case IP_TTL: | |
95926362 | 1384 | case IP_MINTTL: |
984263bc MD |
1385 | case IP_RECVOPTS: |
1386 | case IP_RECVRETOPTS: | |
1387 | case IP_RECVDSTADDR: | |
1388 | case IP_RECVIF: | |
95926362 | 1389 | case IP_RECVTTL: |
984263bc | 1390 | case IP_FAITH: |
e71a125f AE |
1391 | error = soopt_to_kbuf(sopt, &optval, sizeof optval, |
1392 | sizeof optval); | |
984263bc MD |
1393 | if (error) |
1394 | break; | |
984263bc MD |
1395 | switch (sopt->sopt_name) { |
1396 | case IP_TOS: | |
1397 | inp->inp_ip_tos = optval; | |
1398 | break; | |
1399 | ||
1400 | case IP_TTL: | |
1401 | inp->inp_ip_ttl = optval; | |
1402 | break; | |
95926362 MD |
1403 | case IP_MINTTL: |
1404 | if (optval > 0 && optval <= MAXTTL) | |
1405 | inp->inp_ip_minttl = optval; | |
1406 | else | |
1407 | error = EINVAL; | |
1408 | break; | |
984263bc MD |
1409 | #define OPTSET(bit) \ |
1410 | if (optval) \ | |
1411 | inp->inp_flags |= bit; \ | |
1412 | else \ | |
1413 | inp->inp_flags &= ~bit; | |
1414 | ||
1415 | case IP_RECVOPTS: | |
1416 | OPTSET(INP_RECVOPTS); | |
1417 | break; | |
1418 | ||
1419 | case IP_RECVRETOPTS: | |
1420 | OPTSET(INP_RECVRETOPTS); | |
1421 | break; | |
1422 | ||
1423 | case IP_RECVDSTADDR: | |
1424 | OPTSET(INP_RECVDSTADDR); | |
1425 | break; | |
1426 | ||
1427 | case IP_RECVIF: | |
1428 | OPTSET(INP_RECVIF); | |
1429 | break; | |
1430 | ||
95926362 MD |
1431 | case IP_RECVTTL: |
1432 | OPTSET(INP_RECVTTL); | |
1433 | break; | |
1434 | ||
984263bc MD |
1435 | case IP_FAITH: |
1436 | OPTSET(INP_FAITH); | |
1437 | break; | |
1438 | } | |
1439 | break; | |
1440 | #undef OPTSET | |
1441 | ||
1442 | case IP_MULTICAST_IF: | |
1443 | case IP_MULTICAST_VIF: | |
1444 | case IP_MULTICAST_TTL: | |
1445 | case IP_MULTICAST_LOOP: | |
1446 | case IP_ADD_MEMBERSHIP: | |
1447 | case IP_DROP_MEMBERSHIP: | |
1448 | error = ip_setmoptions(sopt, &inp->inp_moptions); | |
1449 | break; | |
1450 | ||
1451 | case IP_PORTRANGE: | |
e71a125f | 1452 | error = soopt_to_kbuf(sopt, &optval, sizeof optval, |
984263bc MD |
1453 | sizeof optval); |
1454 | if (error) | |
1455 | break; | |
1456 | ||
1457 | switch (optval) { | |
1458 | case IP_PORTRANGE_DEFAULT: | |
1459 | inp->inp_flags &= ~(INP_LOWPORT); | |
1460 | inp->inp_flags &= ~(INP_HIGHPORT); | |
1461 | break; | |
1462 | ||
1463 | case IP_PORTRANGE_HIGH: | |
1464 | inp->inp_flags &= ~(INP_LOWPORT); | |
1465 | inp->inp_flags |= INP_HIGHPORT; | |
1466 | break; | |
1467 | ||
1468 | case IP_PORTRANGE_LOW: | |
1469 | inp->inp_flags &= ~(INP_HIGHPORT); | |
1470 | inp->inp_flags |= INP_LOWPORT; | |
1471 | break; | |
1472 | ||
1473 | default: | |
1474 | error = EINVAL; | |
1475 | break; | |
1476 | } | |
1477 | break; | |
1478 | ||
1479 | #if defined(IPSEC) || defined(FAST_IPSEC) | |
1480 | case IP_IPSEC_POLICY: | |
1481 | { | |
1482 | caddr_t req; | |
1483 | size_t len = 0; | |
1484 | int priv; | |
1485 | struct mbuf *m; | |
1486 | int optname; | |
1487 | ||
1488 | if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ | |
1489 | break; | |
e71a125f | 1490 | soopt_to_mbuf(sopt, m); |
7b95be2a | 1491 | priv = (sopt->sopt_td != NULL && |
895c1f85 | 1492 | priv_check(sopt->sopt_td, PRIV_ROOT) != 0) ? 0 : 1; |
984263bc MD |
1493 | req = mtod(m, caddr_t); |
1494 | len = m->m_len; | |
1495 | optname = sopt->sopt_name; | |
1496 | error = ipsec4_set_policy(inp, optname, req, len, priv); | |
1497 | m_freem(m); | |
1498 | break; | |
1499 | } | |
1500 | #endif /*IPSEC*/ | |
1501 | ||
1502 | default: | |
1503 | error = ENOPROTOOPT; | |
1504 | break; | |
1505 | } | |
1506 | break; | |
1507 | ||
1508 | case SOPT_GET: | |
1509 | switch (sopt->sopt_name) { | |
1510 | case IP_OPTIONS: | |
1511 | case IP_RETOPTS: | |
1512 | if (inp->inp_options) | |
e71a125f AE |
1513 | soopt_from_kbuf(sopt, mtod(inp->inp_options, |
1514 | char *), | |
1515 | inp->inp_options->m_len); | |
984263bc MD |
1516 | else |
1517 | sopt->sopt_valsize = 0; | |
1518 | break; | |
1519 | ||
1520 | case IP_TOS: | |
1521 | case IP_TTL: | |
95926362 | 1522 | case IP_MINTTL: |
984263bc MD |
1523 | case IP_RECVOPTS: |
1524 | case IP_RECVRETOPTS: | |
1525 | case IP_RECVDSTADDR: | |
95926362 | 1526 | case IP_RECVTTL: |
984263bc MD |
1527 | case IP_RECVIF: |
1528 | case IP_PORTRANGE: | |
1529 | case IP_FAITH: | |
1530 | switch (sopt->sopt_name) { | |
1531 | ||
1532 | case IP_TOS: | |
1533 | optval = inp->inp_ip_tos; | |
1534 | break; | |
1535 | ||
1536 | case IP_TTL: | |
1537 | optval = inp->inp_ip_ttl; | |
1538 | break; | |
95926362 MD |
1539 | case IP_MINTTL: |
1540 | optval = inp->inp_ip_minttl; | |
1541 | break; | |
984263bc MD |
1542 | |
1543 | #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) | |
1544 | ||
1545 | case IP_RECVOPTS: | |
1546 | optval = OPTBIT(INP_RECVOPTS); | |
1547 | break; | |
1548 | ||
1549 | case IP_RECVRETOPTS: | |
1550 | optval = OPTBIT(INP_RECVRETOPTS); | |
1551 | break; | |
1552 | ||
1553 | case IP_RECVDSTADDR: | |
1554 | optval = OPTBIT(INP_RECVDSTADDR); | |
1555 | break; | |
1556 | ||
95926362 MD |
1557 | case IP_RECVTTL: |
1558 | optval = OPTBIT(INP_RECVTTL); | |
1559 | break; | |
1560 | ||
984263bc MD |
1561 | case IP_RECVIF: |
1562 | optval = OPTBIT(INP_RECVIF); | |
1563 | break; | |
1564 | ||
1565 | case IP_PORTRANGE: | |
1566 | if (inp->inp_flags & INP_HIGHPORT) | |
1567 | optval = IP_PORTRANGE_HIGH; | |
1568 | else if (inp->inp_flags & INP_LOWPORT) | |
1569 | optval = IP_PORTRANGE_LOW; | |
1570 | else | |
1571 | optval = 0; | |
1572 | break; | |
1573 | ||
1574 | case IP_FAITH: | |
1575 | optval = OPTBIT(INP_FAITH); | |
1576 | break; | |
1577 | } | |
e71a125f | 1578 | soopt_from_kbuf(sopt, &optval, sizeof optval); |
984263bc MD |
1579 | break; |
1580 | ||
1581 | case IP_MULTICAST_IF: | |
1582 | case IP_MULTICAST_VIF: | |
1583 | case IP_MULTICAST_TTL: | |
1584 | case IP_MULTICAST_LOOP: | |
1585 | case IP_ADD_MEMBERSHIP: | |
1586 | case IP_DROP_MEMBERSHIP: | |
1587 | error = ip_getmoptions(sopt, inp->inp_moptions); | |
1588 | break; | |
1589 | ||
1590 | #if defined(IPSEC) || defined(FAST_IPSEC) | |
1591 | case IP_IPSEC_POLICY: | |
1592 | { | |
1593 | struct mbuf *m = NULL; | |
1594 | caddr_t req = NULL; | |
1595 | size_t len = 0; | |
1596 | ||
f23061d4 | 1597 | if (m != NULL) { |
984263bc MD |
1598 | req = mtod(m, caddr_t); |
1599 | len = m->m_len; | |
1600 | } | |
ed894f8c | 1601 | error = ipsec4_get_policy(so->so_pcb, req, len, &m); |
984263bc | 1602 | if (error == 0) |
e71a125f | 1603 | error = soopt_from_mbuf(sopt, m); /* XXX */ |
984263bc MD |
1604 | if (error == 0) |
1605 | m_freem(m); | |
1606 | break; | |
1607 | } | |
1608 | #endif /*IPSEC*/ | |
1609 | ||
1610 | default: | |
1611 | error = ENOPROTOOPT; | |
1612 | break; | |
1613 | } | |
1614 | break; | |
1615 | } | |
1616 | return (error); | |
1617 | } | |
1618 | ||
1619 | /* | |
1620 | * Set up IP options in pcb for insertion in output packets. | |
1621 | * Store in mbuf with pointer in pcbopt, adding pseudo-option | |
1622 | * with destination address if source routed. | |
1623 | */ | |
1624 | static int | |
f23061d4 | 1625 | ip_pcbopts(int optname, struct mbuf **pcbopt, struct mbuf *m) |
984263bc | 1626 | { |
2256ba69 RG |
1627 | int cnt, optlen; |
1628 | u_char *cp; | |
984263bc MD |
1629 | u_char opt; |
1630 | ||
1631 | /* turn off any old options */ | |
1632 | if (*pcbopt) | |
f23061d4 | 1633 | m_free(*pcbopt); |
984263bc | 1634 | *pcbopt = 0; |
f23061d4 | 1635 | if (m == NULL || m->m_len == 0) { |
984263bc MD |
1636 | /* |
1637 | * Only turning off any previous options. | |
1638 | */ | |
f23061d4 JH |
1639 | if (m != NULL) |
1640 | m_free(m); | |
984263bc MD |
1641 | return (0); |
1642 | } | |
1643 | ||
1644 | if (m->m_len % sizeof(int32_t)) | |
1645 | goto bad; | |
1646 | /* | |
1647 | * IP first-hop destination address will be stored before | |
1648 | * actual options; move other options back | |
1649 | * and clear it when none present. | |
1650 | */ | |
1651 | if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN]) | |
1652 | goto bad; | |
1653 | cnt = m->m_len; | |
1654 | m->m_len += sizeof(struct in_addr); | |
1655 | cp = mtod(m, u_char *) + sizeof(struct in_addr); | |
f23061d4 | 1656 | ovbcopy(mtod(m, caddr_t), cp, cnt); |
984263bc MD |
1657 | bzero(mtod(m, caddr_t), sizeof(struct in_addr)); |
1658 | ||
1659 | for (; cnt > 0; cnt -= optlen, cp += optlen) { | |
1660 | opt = cp[IPOPT_OPTVAL]; | |
1661 | if (opt == IPOPT_EOL) | |
1662 | break; | |
1663 | if (opt == IPOPT_NOP) | |
1664 | optlen = 1; | |
1665 | else { | |
5fe66e68 | 1666 | if (cnt < IPOPT_OLEN + sizeof *cp) |
984263bc MD |
1667 | goto bad; |
1668 | optlen = cp[IPOPT_OLEN]; | |
5fe66e68 | 1669 | if (optlen < IPOPT_OLEN + sizeof *cp || optlen > cnt) |
984263bc MD |
1670 | goto bad; |
1671 | } | |
1672 | switch (opt) { | |
1673 | ||
1674 | default: | |
1675 | break; | |
1676 | ||
1677 | case IPOPT_LSRR: | |
1678 | case IPOPT_SSRR: | |
1679 | /* | |
1680 | * user process specifies route as: | |
1681 | * ->A->B->C->D | |
1682 | * D must be our final destination (but we can't | |
1683 | * check that since we may not have connected yet). | |
1684 | * A is first hop destination, which doesn't appear in | |
1685 | * actual IP option, but is stored before the options. | |
1686 | */ | |
1687 | if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) | |
1688 | goto bad; | |
1689 | m->m_len -= sizeof(struct in_addr); | |
1690 | cnt -= sizeof(struct in_addr); | |
1691 | optlen -= sizeof(struct in_addr); | |
1692 | cp[IPOPT_OLEN] = optlen; | |
1693 | /* | |
1694 | * Move first hop before start of options. | |
1695 | */ | |
f23061d4 | 1696 | bcopy(&cp[IPOPT_OFFSET+1], mtod(m, caddr_t), |
5fe66e68 | 1697 | sizeof(struct in_addr)); |
984263bc MD |
1698 | /* |
1699 | * Then copy rest of options back | |
1700 | * to close up the deleted entry. | |
1701 | */ | |
f23061d4 JH |
1702 | ovbcopy(&cp[IPOPT_OFFSET+1] + sizeof(struct in_addr), |
1703 | &cp[IPOPT_OFFSET+1], | |
1704 | cnt - (IPOPT_MINOFF - 1)); | |
984263bc MD |
1705 | break; |
1706 | } | |
1707 | } | |
1708 | if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) | |
1709 | goto bad; | |
1710 | *pcbopt = m; | |
1711 | return (0); | |
1712 | ||
1713 | bad: | |
f23061d4 | 1714 | m_free(m); |
984263bc MD |
1715 | return (EINVAL); |
1716 | } | |
1717 | ||
1718 | /* | |
1719 | * XXX | |
1720 | * The whole multicast option thing needs to be re-thought. | |
1721 | * Several of these options are equally applicable to non-multicast | |
1722 | * transmission, and one (IP_MULTICAST_TTL) totally duplicates a | |
1723 | * standard option (IP_TTL). | |
1724 | */ | |
1725 | ||
1726 | /* | |
1727 | * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index. | |
1728 | */ | |
1729 | static struct ifnet * | |
f23061d4 | 1730 | ip_multicast_if(struct in_addr *a, int *ifindexp) |
984263bc MD |
1731 | { |
1732 | int ifindex; | |
1733 | struct ifnet *ifp; | |
1734 | ||
1735 | if (ifindexp) | |
1736 | *ifindexp = 0; | |
1737 | if (ntohl(a->s_addr) >> 24 == 0) { | |
1738 | ifindex = ntohl(a->s_addr) & 0xffffff; | |
1739 | if (ifindex < 0 || if_index < ifindex) | |
1740 | return NULL; | |
1741 | ifp = ifindex2ifnet[ifindex]; | |
1742 | if (ifindexp) | |
1743 | *ifindexp = ifindex; | |
1744 | } else { | |
f8983475 | 1745 | ifp = INADDR_TO_IFP(a); |
984263bc MD |
1746 | } |
1747 | return ifp; | |
1748 | } | |
1749 | ||
1750 | /* | |
1751 | * Set the IP multicast options in response to user setsockopt(). | |
1752 | */ | |
1753 | static int | |
f23061d4 | 1754 | ip_setmoptions(struct sockopt *sopt, struct ip_moptions **imop) |
984263bc MD |
1755 | { |
1756 | int error = 0; | |
1757 | int i; | |
1758 | struct in_addr addr; | |
1759 | struct ip_mreq mreq; | |
1760 | struct ifnet *ifp; | |
1761 | struct ip_moptions *imo = *imop; | |
984263bc | 1762 | int ifindex; |
984263bc MD |
1763 | |
1764 | if (imo == NULL) { | |
1765 | /* | |
1766 | * No multicast option buffer attached to the pcb; | |
1767 | * allocate one and initialize to default values. | |
1768 | */ | |
efda3bd0 | 1769 | imo = kmalloc(sizeof *imo, M_IPMOPTS, M_WAITOK); |
984263bc | 1770 | |
984263bc MD |
1771 | *imop = imo; |
1772 | imo->imo_multicast_ifp = NULL; | |
1773 | imo->imo_multicast_addr.s_addr = INADDR_ANY; | |
1774 | imo->imo_multicast_vif = -1; | |
1775 | imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; | |
1776 | imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; | |
1777 | imo->imo_num_memberships = 0; | |
1778 | } | |
984263bc MD |
1779 | switch (sopt->sopt_name) { |
1780 | /* store an index number for the vif you wanna use in the send */ | |
1781 | case IP_MULTICAST_VIF: | |
1782 | if (legal_vif_num == 0) { | |
1783 | error = EOPNOTSUPP; | |
1784 | break; | |
1785 | } | |
e71a125f | 1786 | error = soopt_to_kbuf(sopt, &i, sizeof i, sizeof i); |
984263bc MD |
1787 | if (error) |
1788 | break; | |
1789 | if (!legal_vif_num(i) && (i != -1)) { | |
1790 | error = EINVAL; | |
1791 | break; | |
1792 | } | |
1793 | imo->imo_multicast_vif = i; | |
1794 | break; | |
1795 | ||
1796 | case IP_MULTICAST_IF: | |
1797 | /* | |
1798 | * Select the interface for outgoing multicast packets. | |
1799 | */ | |
e71a125f | 1800 | error = soopt_to_kbuf(sopt, &addr, sizeof addr, sizeof addr); |
984263bc MD |
1801 | if (error) |
1802 | break; | |
e71a125f | 1803 | |
984263bc MD |
1804 | /* |
1805 | * INADDR_ANY is used to remove a previous selection. | |
1806 | * When no interface is selected, a default one is | |
1807 | * chosen every time a multicast packet is sent. | |
1808 | */ | |
1809 | if (addr.s_addr == INADDR_ANY) { | |
1810 | imo->imo_multicast_ifp = NULL; | |
1811 | break; | |
1812 | } | |
1813 | /* | |
1814 | * The selected interface is identified by its local | |
1815 | * IP address. Find the interface and confirm that | |
1816 | * it supports multicasting. | |
1817 | */ | |
1cae611f | 1818 | crit_enter(); |
984263bc | 1819 | ifp = ip_multicast_if(&addr, &ifindex); |
f23061d4 | 1820 | if (ifp == NULL || !(ifp->if_flags & IFF_MULTICAST)) { |
1cae611f | 1821 | crit_exit(); |
984263bc MD |
1822 | error = EADDRNOTAVAIL; |
1823 | break; | |
1824 | } | |
1825 | imo->imo_multicast_ifp = ifp; | |
1826 | if (ifindex) | |
1827 | imo->imo_multicast_addr = addr; | |
1828 | else | |
1829 | imo->imo_multicast_addr.s_addr = INADDR_ANY; | |
1cae611f | 1830 | crit_exit(); |
984263bc MD |
1831 | break; |
1832 | ||
1833 | case IP_MULTICAST_TTL: | |
1834 | /* | |
1835 | * Set the IP time-to-live for outgoing multicast packets. | |
1836 | * The original multicast API required a char argument, | |
1837 | * which is inconsistent with the rest of the socket API. | |
1838 | * We allow either a char or an int. | |
1839 | */ | |
1840 | if (sopt->sopt_valsize == 1) { | |
1841 | u_char ttl; | |
e71a125f | 1842 | error = soopt_to_kbuf(sopt, &ttl, 1, 1); |
984263bc MD |
1843 | if (error) |
1844 | break; | |
1845 | imo->imo_multicast_ttl = ttl; | |
1846 | } else { | |
1847 | u_int ttl; | |
e71a125f | 1848 | error = soopt_to_kbuf(sopt, &ttl, sizeof ttl, sizeof ttl); |
984263bc MD |
1849 | if (error) |
1850 | break; | |
1851 | if (ttl > 255) | |
1852 | error = EINVAL; | |
1853 | else | |
1854 | imo->imo_multicast_ttl = ttl; | |
1855 | } | |
1856 | break; | |
1857 | ||
1858 | case IP_MULTICAST_LOOP: | |
1859 | /* | |
1860 | * Set the loopback flag for outgoing multicast packets. | |
1861 | * Must be zero or one. The original multicast API required a | |
1862 | * char argument, which is inconsistent with the rest | |
1863 | * of the socket API. We allow either a char or an int. | |
1864 | */ | |
1865 | if (sopt->sopt_valsize == 1) { | |
1866 | u_char loop; | |
5fe66e68 | 1867 | |
e71a125f | 1868 | error = soopt_to_kbuf(sopt, &loop, 1, 1); |
984263bc MD |
1869 | if (error) |
1870 | break; | |
1871 | imo->imo_multicast_loop = !!loop; | |
1872 | } else { | |
1873 | u_int loop; | |
5fe66e68 | 1874 | |
e71a125f | 1875 | error = soopt_to_kbuf(sopt, &loop, sizeof loop, |
984263bc MD |
1876 | sizeof loop); |
1877 | if (error) | |
1878 | break; | |
1879 | imo->imo_multicast_loop = !!loop; | |
1880 | } | |
1881 | break; | |
1882 | ||
1883 | case IP_ADD_MEMBERSHIP: | |
1884 | /* | |
1885 | * Add a multicast group membership. | |
1886 | * Group must be a valid IP multicast address. | |
1887 | */ | |
e71a125f | 1888 | error = soopt_to_kbuf(sopt, &mreq, sizeof mreq, sizeof mreq); |
984263bc MD |
1889 | if (error) |
1890 | break; | |
1891 | ||
1892 | if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { | |
1893 | error = EINVAL; | |
1894 | break; | |
1895 | } | |
1cae611f | 1896 | crit_enter(); |
984263bc MD |
1897 | /* |
1898 | * If no interface address was provided, use the interface of | |
1899 | * the route to the given multicast address. | |
1900 | */ | |
1901 | if (mreq.imr_interface.s_addr == INADDR_ANY) { | |
1ce00f4f JH |
1902 | struct sockaddr_in dst; |
1903 | struct rtentry *rt; | |
1904 | ||
1905 | bzero(&dst, sizeof(struct sockaddr_in)); | |
1906 | dst.sin_len = sizeof(struct sockaddr_in); | |
1907 | dst.sin_family = AF_INET; | |
1908 | dst.sin_addr = mreq.imr_multiaddr; | |
1909 | rt = rtlookup((struct sockaddr *)&dst); | |
1910 | if (rt == NULL) { | |
984263bc | 1911 | error = EADDRNOTAVAIL; |
1cae611f | 1912 | crit_exit(); |
984263bc MD |
1913 | break; |
1914 | } | |
1ce00f4f JH |
1915 | --rt->rt_refcnt; |
1916 | ifp = rt->rt_ifp; | |
1917 | } else { | |
984263bc MD |
1918 | ifp = ip_multicast_if(&mreq.imr_interface, NULL); |
1919 | } | |
1920 | ||
1921 | /* | |
1922 | * See if we found an interface, and confirm that it | |
1923 | * supports multicast. | |
1924 | */ | |
f23061d4 | 1925 | if (ifp == NULL || !(ifp->if_flags & IFF_MULTICAST)) { |
984263bc | 1926 | error = EADDRNOTAVAIL; |
1cae611f | 1927 | crit_exit(); |
984263bc MD |
1928 | break; |
1929 | } | |
1930 | /* | |
1931 | * See if the membership already exists or if all the | |
1932 | * membership slots are full. | |
1933 | */ | |
1934 | for (i = 0; i < imo->imo_num_memberships; ++i) { | |
1935 | if (imo->imo_membership[i]->inm_ifp == ifp && | |
1936 | imo->imo_membership[i]->inm_addr.s_addr | |
1937 | == mreq.imr_multiaddr.s_addr) | |
1938 | break; | |
1939 | } | |
1940 | if (i < imo->imo_num_memberships) { | |
1941 | error = EADDRINUSE; | |
1cae611f | 1942 | crit_exit(); |
984263bc MD |
1943 | break; |
1944 | } | |
1945 | if (i == IP_MAX_MEMBERSHIPS) { | |
1946 | error = ETOOMANYREFS; | |
1cae611f | 1947 | crit_exit(); |
984263bc MD |
1948 | break; |
1949 | } | |
1950 | /* | |
1951 | * Everything looks good; add a new record to the multicast | |
1952 | * address list for the given interface. | |
1953 | */ | |
1954 | if ((imo->imo_membership[i] = | |
5fe66e68 | 1955 | in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) { |
984263bc | 1956 | error = ENOBUFS; |
1cae611f | 1957 | crit_exit(); |
984263bc MD |
1958 | break; |
1959 | } | |
1960 | ++imo->imo_num_memberships; | |
1cae611f | 1961 | crit_exit(); |
984263bc MD |
1962 | break; |
1963 | ||
1964 | case IP_DROP_MEMBERSHIP: | |
1965 | /* | |
1966 | * Drop a multicast group membership. | |
1967 | * Group must be a valid IP multicast address. | |
1968 | */ | |
e71a125f | 1969 | error = soopt_to_kbuf(sopt, &mreq, sizeof mreq, sizeof mreq); |
984263bc MD |
1970 | if (error) |
1971 | break; | |
1972 | ||
1973 | if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { | |
1974 | error = EINVAL; | |
1975 | break; | |
1976 | } | |
1977 | ||
1cae611f | 1978 | crit_enter(); |
984263bc MD |
1979 | /* |
1980 | * If an interface address was specified, get a pointer | |
1981 | * to its ifnet structure. | |
1982 | */ | |
1983 | if (mreq.imr_interface.s_addr == INADDR_ANY) | |
1984 | ifp = NULL; | |
1985 | else { | |
1986 | ifp = ip_multicast_if(&mreq.imr_interface, NULL); | |
1987 | if (ifp == NULL) { | |
1988 | error = EADDRNOTAVAIL; | |
1cae611f | 1989 | crit_exit(); |
984263bc MD |
1990 | break; |
1991 | } | |
1992 | } | |
1993 | /* | |
1994 | * Find the membership in the membership array. | |
1995 | */ | |
1996 | for (i = 0; i < imo->imo_num_memberships; ++i) { | |
1997 | if ((ifp == NULL || | |
1998 | imo->imo_membership[i]->inm_ifp == ifp) && | |
5fe66e68 JH |
1999 | imo->imo_membership[i]->inm_addr.s_addr == |
2000 | mreq.imr_multiaddr.s_addr) | |
984263bc MD |
2001 | break; |
2002 | } | |
2003 | if (i == imo->imo_num_memberships) { | |
2004 | error = EADDRNOTAVAIL; | |
1cae611f | 2005 | crit_exit(); |
984263bc MD |
2006 | break; |
2007 | } | |
2008 | /* | |
2009 | * Give up the multicast address record to which the | |
2010 | * membership points. | |
2011 | */ | |
2012 | in_delmulti(imo->imo_membership[i]); | |
2013 | /* | |
2014 | * Remove the gap in the membership array. | |
2015 | */ | |
2016 | for (++i; i < imo->imo_num_memberships; ++i) | |
2017 | imo->imo_membership[i-1] = imo->imo_membership[i]; | |
2018 | --imo->imo_num_memberships; | |
1cae611f | 2019 | crit_exit(); |
984263bc MD |
2020 | break; |
2021 | ||
2022 | default: | |
2023 | error = EOPNOTSUPP; | |
2024 | break; | |
2025 | } | |
2026 | ||
2027 | /* | |
2028 | * If all options have default values, no need to keep the mbuf. | |
2029 | */ | |
2030 | if (imo->imo_multicast_ifp == NULL && | |
2031 | imo->imo_multicast_vif == -1 && | |
2032 | imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL && | |
2033 | imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP && | |
2034 | imo->imo_num_memberships == 0) { | |
efda3bd0 | 2035 | kfree(*imop, M_IPMOPTS); |
984263bc MD |
2036 | *imop = NULL; |
2037 | } | |
2038 | ||
2039 | return (error); | |
2040 | } | |
2041 | ||
2042 | /* | |
2043 | * Return the IP multicast options in response to user getsockopt(). | |
2044 | */ | |
2045 | static int | |
f23061d4 | 2046 | ip_getmoptions(struct sockopt *sopt, struct ip_moptions *imo) |
984263bc MD |
2047 | { |
2048 | struct in_addr addr; | |
2049 | struct in_ifaddr *ia; | |
2050 | int error, optval; | |
2051 | u_char coptval; | |
2052 | ||
2053 | error = 0; | |
2054 | switch (sopt->sopt_name) { | |
f23061d4 | 2055 | case IP_MULTICAST_VIF: |
984263bc MD |
2056 | if (imo != NULL) |
2057 | optval = imo->imo_multicast_vif; | |
2058 | else | |
2059 | optval = -1; | |
e71a125f | 2060 | soopt_from_kbuf(sopt, &optval, sizeof optval); |
984263bc MD |
2061 | break; |
2062 | ||
2063 | case IP_MULTICAST_IF: | |
2064 | if (imo == NULL || imo->imo_multicast_ifp == NULL) | |
2065 | addr.s_addr = INADDR_ANY; | |
2066 | else if (imo->imo_multicast_addr.s_addr) { | |
2067 | /* return the value user has set */ | |
2068 | addr = imo->imo_multicast_addr; | |
2069 | } else { | |
1b562c24 | 2070 | ia = IFP_TO_IA(imo->imo_multicast_ifp); |
984263bc MD |
2071 | addr.s_addr = (ia == NULL) ? INADDR_ANY |
2072 | : IA_SIN(ia)->sin_addr.s_addr; | |
2073 | } | |
e71a125f | 2074 | soopt_from_kbuf(sopt, &addr, sizeof addr); |
984263bc MD |
2075 | break; |
2076 | ||
2077 | case IP_MULTICAST_TTL: | |
f23061d4 | 2078 | if (imo == NULL) |
984263bc MD |
2079 | optval = coptval = IP_DEFAULT_MULTICAST_TTL; |
2080 | else | |
2081 | optval = coptval = imo->imo_multicast_ttl; | |
2082 | if (sopt->sopt_valsize == 1) | |
e71a125f | 2083 | soopt_from_kbuf(sopt, &coptval, 1); |
984263bc | 2084 | else |
e71a125f | 2085 | soopt_from_kbuf(sopt, &optval, sizeof optval); |
984263bc MD |
2086 | break; |
2087 | ||
2088 | case IP_MULTICAST_LOOP: | |
f23061d4 | 2089 | if (imo == NULL) |
984263bc MD |
2090 | optval = coptval = IP_DEFAULT_MULTICAST_LOOP; |
2091 | else | |
2092 | optval = coptval = imo->imo_multicast_loop; | |
2093 | if (sopt->sopt_valsize == 1) | |
e71a125f | 2094 | soopt_from_kbuf(sopt, &coptval, 1); |
984263bc | 2095 | else |
e71a125f | 2096 | soopt_from_kbuf(sopt, &optval, sizeof optval); |
984263bc MD |
2097 | break; |
2098 | ||
2099 | default: | |
2100 | error = ENOPROTOOPT; | |
2101 | break; | |
2102 | } | |
2103 | return (error); | |
2104 | } | |
2105 | ||
2106 | /* | |
2107 | * Discard the IP multicast options. | |
2108 | */ | |
2109 | void | |
f23061d4 | 2110 | ip_freemoptions(struct ip_moptions *imo) |
984263bc | 2111 | { |
2256ba69 | 2112 | int i; |
984263bc MD |
2113 | |
2114 | if (imo != NULL) { | |
2115 | for (i = 0; i < imo->imo_num_memberships; ++i) | |
2116 | in_delmulti(imo->imo_membership[i]); | |
efda3bd0 | 2117 | kfree(imo, M_IPMOPTS); |
984263bc MD |
2118 | } |
2119 | } | |
2120 | ||
2121 | /* | |
2122 | * Routine called from ip_output() to loop back a copy of an IP multicast | |
2123 | * packet to the input queue of a specified interface. Note that this | |
2124 | * calls the output routine of the loopback "driver", but with an interface | |
2125 | * pointer that might NOT be a loopback interface -- evil, but easier than | |
2126 | * replicating that code here. | |
2127 | */ | |
2128 | static void | |
5fe66e68 JH |
2129 | ip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst, |
2130 | int hlen) | |
984263bc | 2131 | { |
2256ba69 | 2132 | struct ip *ip; |
984263bc MD |
2133 | struct mbuf *copym; |
2134 | ||
f23061d4 | 2135 | copym = m_copypacket(m, MB_DONTWAIT); |
984263bc MD |
2136 | if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen)) |
2137 | copym = m_pullup(copym, hlen); | |
2138 | if (copym != NULL) { | |
f23061d4 | 2139 | /* |
7db7d2da MD |
2140 | * if the checksum hasn't been computed, mark it as valid |
2141 | */ | |
2142 | if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { | |
2143 | in_delayed_cksum(copym); | |
2144 | copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; | |
2145 | copym->m_pkthdr.csum_flags |= | |
2146 | CSUM_DATA_VALID | CSUM_PSEUDO_HDR; | |
2147 | copym->m_pkthdr.csum_data = 0xffff; | |
2148 | } | |
984263bc MD |
2149 | /* |
2150 | * We don't bother to fragment if the IP length is greater | |
2151 | * than the interface's MTU. Can this possibly matter? | |
2152 | */ | |
2153 | ip = mtod(copym, struct ip *); | |
2154 | ip->ip_len = htons(ip->ip_len); | |
2155 | ip->ip_off = htons(ip->ip_off); | |
2156 | ip->ip_sum = 0; | |
2157 | if (ip->ip_vhl == IP_VHL_BORING) { | |
2158 | ip->ip_sum = in_cksum_hdr(ip); | |
2159 | } else { | |
2160 | ip->ip_sum = in_cksum(copym, hlen); | |
2161 | } | |
2162 | /* | |
2163 | * NB: | |
2164 | * It's not clear whether there are any lingering | |
2165 | * reentrancy problems in other areas which might | |
2166 | * be exposed by using ip_input directly (in | |
2167 | * particular, everything which modifies the packet | |
2168 | * in-place). Yet another option is using the | |
2169 | * protosw directly to deliver the looped back | |
2170 | * packet. For the moment, we'll err on the side | |
2171 | * of safety by using if_simloop(). | |
2172 | */ | |
2173 | #if 1 /* XXX */ | |
2174 | if (dst->sin_family != AF_INET) { | |
a6ec04bc | 2175 | kprintf("ip_mloopback: bad address family %d\n", |
984263bc MD |
2176 | dst->sin_family); |
2177 | dst->sin_family = AF_INET; | |
2178 | } | |
2179 | #endif | |
984263bc | 2180 | if_simloop(ifp, copym, dst->sin_family, 0); |
984263bc MD |
2181 | } |
2182 | } |