kernel - network protocol thread routing
[dragonfly.git] / sys / netinet6 / ip6_input.c
CommitLineData
984263bc 1/* $FreeBSD: src/sys/netinet6/ip6_input.c,v 1.11.2.15 2003/01/24 05:11:35 sam Exp $ */
95af0087 2/* $DragonFly: src/sys/netinet6/ip6_input.c,v 1.38 2008/09/24 14:26:39 sephe Exp $ */
984263bc
MD
3/* $KAME: ip6_input.c,v 1.259 2002/01/21 04:58:09 jinmei Exp $ */
4
5/*
6 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the project nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34/*
35 * Copyright (c) 1982, 1986, 1988, 1993
36 * The Regents of the University of California. All rights reserved.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)ip_input.c 8.2 (Berkeley) 1/4/94
67 */
68
69#include "opt_ip6fw.h"
70#include "opt_inet.h"
71#include "opt_inet6.h"
72#include "opt_ipsec.h"
73
74#include <sys/param.h>
75#include <sys/systm.h>
76#include <sys/malloc.h>
77#include <sys/mbuf.h>
78#include <sys/domain.h>
79#include <sys/protosw.h>
80#include <sys/socket.h>
81#include <sys/socketvar.h>
82#include <sys/errno.h>
83#include <sys/time.h>
84#include <sys/kernel.h>
85#include <sys/syslog.h>
86#include <sys/proc.h>
895c1f85 87#include <sys/priv.h>
984263bc
MD
88
89#include <net/if.h>
90#include <net/if_types.h>
91#include <net/if_dl.h>
92#include <net/route.h>
93#include <net/netisr.h>
e7e55f42 94#include <net/pfil.h>
984263bc 95
6a704092
MD
96#include <sys/thread2.h>
97#include <sys/msgport2.h>
98#include <net/netmsg2.h>
99
984263bc
MD
100#include <netinet/in.h>
101#include <netinet/in_systm.h>
102#ifdef INET
103#include <netinet/ip.h>
104#include <netinet/ip_icmp.h>
105#endif /* INET */
106#include <netinet/ip6.h>
107#include <netinet6/in6_var.h>
108#include <netinet6/ip6_var.h>
109#include <netinet/in_pcb.h>
110#include <netinet/icmp6.h>
698ac46c 111#include <netinet6/scope6_var.h>
984263bc
MD
112#include <netinet6/in6_ifattach.h>
113#include <netinet6/nd6.h>
114#include <netinet6/in6_prefix.h>
115
116#ifdef IPSEC
117#include <netinet6/ipsec.h>
118#ifdef INET6
119#include <netinet6/ipsec6.h>
120#endif
121#endif
122
123#ifdef FAST_IPSEC
bf844ffa
JH
124#include <netproto/ipsec/ipsec.h>
125#include <netproto/ipsec/ipsec6.h>
984263bc
MD
126#define IPSEC
127#endif /* FAST_IPSEC */
128
1f2de5d4 129#include <net/ip6fw/ip6_fw.h>
984263bc
MD
130
131#include <netinet6/ip6protosw.h>
132
133/* we need it for NLOOP. */
1f2de5d4 134#include "use_loop.h"
984263bc
MD
135
136#include <net/net_osdep.h>
137
984263bc
MD
138extern struct domain inet6domain;
139extern struct ip6protosw inet6sw[];
140
141u_char ip6_protox[IPPROTO_MAX];
984263bc
MD
142struct in6_ifaddr *in6_ifaddr;
143
144extern struct callout in6_tmpaddrtimer_ch;
145
146int ip6_forward_srcrt; /* XXX */
147int ip6_sourcecheck; /* XXX */
148int ip6_sourcecheck_interval; /* XXX */
984263bc
MD
149
150int ip6_ours_check_algorithm;
151
e7e55f42 152struct pfil_head inet6_pfil_hook;
e7e55f42 153
984263bc
MD
154/* firewall hooks */
155ip6_fw_chk_t *ip6_fw_chk_ptr;
156ip6_fw_ctl_t *ip6_fw_ctl_ptr;
157int ip6_fw_enable = 1;
158
159struct ip6stat ip6stat;
160
56d702cc
RG
161static void ip6_init2 (void *);
162static struct ip6aux *ip6_setdstifaddr (struct mbuf *, struct in6_ifaddr *);
163static int ip6_hopopts_input (u_int32_t *, u_int32_t *, struct mbuf **, int *);
4599cf19 164static void ip6_input(struct netmsg *msg);
984263bc 165#ifdef PULLDOWN_TEST
56d702cc 166static struct mbuf *ip6_pullexthdr (struct mbuf *, size_t, int);
984263bc 167#endif
6a704092 168static void transport6_processing_handler(netmsg_t netmsg);
984263bc 169
984263bc
MD
170/*
171 * IP6 initialization: fill in IP6 protocol switch table.
172 * All protocols not implemented in kernel go to raw IP6 protocol handler.
173 */
174void
122ebd49 175ip6_init(void)
984263bc
MD
176{
177 struct ip6protosw *pr;
178 int i;
179 struct timeval tv;
180
181#ifdef DIAGNOSTIC
182 if (sizeof(struct protosw) != sizeof(struct ip6protosw))
183 panic("sizeof(protosw) != sizeof(ip6protosw)");
184#endif
185 pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
186 if (pr == 0)
187 panic("ip6_init");
188 for (i = 0; i < IPPROTO_MAX; i++)
189 ip6_protox[i] = pr - inet6sw;
190 for (pr = (struct ip6protosw *)inet6domain.dom_protosw;
191 pr < (struct ip6protosw *)inet6domain.dom_protoswNPROTOSW; pr++)
192 if (pr->pr_domain->dom_family == PF_INET6 &&
193 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
194 ip6_protox[pr->pr_protocol] = pr - inet6sw;
e7e55f42 195
e7e55f42
JR
196 inet6_pfil_hook.ph_type = PFIL_TYPE_AF;
197 inet6_pfil_hook.ph_af = AF_INET6;
5e3f3b7a 198 if ((i = pfil_head_register(&inet6_pfil_hook)) != 0) {
a6ec04bc 199 kprintf("%s: WARNING: unable to register pfil hook, "
e7e55f42 200 "error %d\n", __func__, i);
5e3f3b7a 201 }
e7e55f42 202
f92bfd8c
SZ
203 netisr_register(NETISR_IPV6, cpu0_portfn, pktinfo_portfn_cpu0,
204 ip6_input, NETISR_FLAG_NOTMPSAFE);
698ac46c 205 scope6_init();
02809d29 206 addrsel_policy_init();
984263bc
MD
207 nd6_init();
208 frag6_init();
209 /*
210 * in many cases, random() here does NOT return random number
211 * as initialization during bootstrap time occur in fixed order.
212 */
213 microtime(&tv);
cddfb7bb 214 ip6_flow_seq = krandom() ^ tv.tv_usec;
984263bc 215 microtime(&tv);
cddfb7bb 216 ip6_desync_factor = (krandom() ^ tv.tv_usec) % MAX_TEMP_DESYNC_FACTOR;
984263bc
MD
217}
218
219static void
122ebd49 220ip6_init2(void *dummy)
984263bc 221{
984263bc
MD
222 /* nd6_timer_init */
223 callout_init(&nd6_timer_ch);
224 callout_reset(&nd6_timer_ch, hz, nd6_timer, NULL);
225
226 /* router renumbering prefix list maintenance */
227 callout_init(&in6_rr_timer_ch);
228 callout_reset(&in6_rr_timer_ch, hz, in6_rr_timer, NULL);
229
230 /* timer for regeneranation of temporary addresses randomize ID */
5db3a956 231 callout_init(&in6_tmpaddrtimer_ch);
984263bc
MD
232 callout_reset(&in6_tmpaddrtimer_ch,
233 (ip6_temp_preferred_lifetime - ip6_desync_factor -
234 ip6_temp_regen_advance) * hz,
235 in6_tmpaddrtimer, NULL);
236}
237
238/* cheat */
239/* This must be after route_init(), which is now SI_ORDER_THIRD */
240SYSINIT(netinet6init2, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ip6_init2, NULL);
241
984263bc
MD
242extern struct route_in6 ip6_forward_rt;
243
b76bed62 244static
4599cf19 245void
9eeaa8a9 246ip6_input(struct netmsg *msg)
984263bc 247{
9eeaa8a9 248 struct mbuf *m = ((struct netmsg_packet *)msg)->nm_packet;
984263bc
MD
249 struct ip6_hdr *ip6;
250 int off = sizeof(struct ip6_hdr), nest;
251 u_int32_t plen;
252 u_int32_t rtalert = ~0;
d4da173d 253 int nxt, ours = 0, rh_present = 0;
984263bc 254 struct ifnet *deliverifp = NULL;
e7e55f42 255 struct in6_addr odst;
e7e55f42 256 int srcrt = 0;
984263bc
MD
257
258#ifdef IPSEC
259 /*
260 * should the inner packet be considered authentic?
261 * see comment in ah4_input().
262 */
263 if (m) {
264 m->m_flags &= ~M_AUTHIPHDR;
265 m->m_flags &= ~M_AUTHIPDGM;
266 }
267#endif
268
269 /*
270 * make sure we don't have onion peering information into m_aux.
271 */
272 ip6_delaux(m);
273
274 /*
275 * mbuf statistics
276 */
277 if (m->m_flags & M_EXT) {
278 if (m->m_next)
279 ip6stat.ip6s_mext2m++;
280 else
281 ip6stat.ip6s_mext1++;
282 } else {
283#define M2MMAX (sizeof(ip6stat.ip6s_m2m)/sizeof(ip6stat.ip6s_m2m[0]))
284 if (m->m_next) {
285 if (m->m_flags & M_LOOP) {
286 ip6stat.ip6s_m2m[loif[0].if_index]++; /* XXX */
287 } else if (m->m_pkthdr.rcvif->if_index < M2MMAX)
288 ip6stat.ip6s_m2m[m->m_pkthdr.rcvif->if_index]++;
289 else
290 ip6stat.ip6s_m2m[0]++;
291 } else
292 ip6stat.ip6s_m1++;
293#undef M2MMAX
294 }
295
296 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_receive);
297 ip6stat.ip6s_total++;
298
299#ifndef PULLDOWN_TEST
300 /*
301 * L2 bridge code and some other code can return mbuf chain
302 * that does not conform to KAME requirement. too bad.
303 * XXX: fails to join if interface MTU > MCLBYTES. jumbogram?
304 */
305 if (m && m->m_next != NULL && m->m_pkthdr.len < MCLBYTES) {
306 struct mbuf *n;
307
42947373 308 n = m_getb(m->m_pkthdr.len, MB_DONTWAIT, MT_HEADER, M_PKTHDR);
a80cf23b
JH
309 if (n == NULL)
310 goto bad;
42947373 311 M_MOVE_PKTHDR(n, m);
984263bc
MD
312
313 m_copydata(m, 0, n->m_pkthdr.len, mtod(n, caddr_t));
314 n->m_len = n->m_pkthdr.len;
315 m_freem(m);
316 m = n;
317 }
4599cf19 318 IP6_EXTHDR_CHECK_VOIDRET(m, 0, sizeof(struct ip6_hdr));
984263bc
MD
319#endif
320
321 if (m->m_len < sizeof(struct ip6_hdr)) {
322 struct ifnet *inifp;
323 inifp = m->m_pkthdr.rcvif;
324 if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == 0) {
325 ip6stat.ip6s_toosmall++;
326 in6_ifstat_inc(inifp, ifs6_in_hdrerr);
a80cf23b 327 goto bad2;
984263bc
MD
328 }
329 }
330
331 ip6 = mtod(m, struct ip6_hdr *);
332
333 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
334 ip6stat.ip6s_badvers++;
335 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
336 goto bad;
337 }
338
339 /*
e7e55f42
JR
340 * Run through list of hooks for input packets.
341 *
342 * NB: Beware of the destination address changing
343 * (e.g. by NAT rewriting). When this happens,
344 * tell ip6_forward to do the right thing.
984263bc 345 */
afabe90c
MD
346 if (pfil_has_hooks(&inet6_pfil_hook)) {
347 odst = ip6->ip6_dst;
348 if (pfil_run_hooks(&inet6_pfil_hook, &m,
349 m->m_pkthdr.rcvif, PFIL_IN)) {
350 goto bad2;
351 }
352 if (m == NULL) /* consumed by filter */
353 goto bad2;
354 ip6 = mtod(m, struct ip6_hdr *);
355 srcrt = !IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst);
356 }
984263bc
MD
357
358 ip6stat.ip6s_nxthist[ip6->ip6_nxt]++;
359
4d723e5a
JS
360#ifdef ALTQ
361 if (altq_input != NULL && (*altq_input)(m, AF_INET6) == 0) {
362 /* packet is dropped by traffic conditioner */
4599cf19 363 return;
4d723e5a
JS
364 }
365#endif
366
984263bc
MD
367 /*
368 * Check with the firewall...
369 */
370 if (ip6_fw_enable && ip6_fw_chk_ptr) {
371 u_short port = 0;
372 /* If ipfw says divert, we have to just drop packet */
373 /* use port as a dummy argument */
374 if ((*ip6_fw_chk_ptr)(&ip6, NULL, &port, &m)) {
375 m_freem(m);
376 m = NULL;
377 }
378 if (!m)
a80cf23b 379 goto bad2;
984263bc
MD
380 }
381
382 /*
383 * Check against address spoofing/corruption.
384 */
385 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src) ||
386 IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_dst)) {
387 /*
388 * XXX: "badscope" is not very suitable for a multicast source.
389 */
390 ip6stat.ip6s_badscope++;
391 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
392 goto bad;
393 }
394 if ((IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) ||
395 IN6_IS_ADDR_LOOPBACK(&ip6->ip6_dst)) &&
396 (m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) {
397 ip6stat.ip6s_badscope++;
398 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
399 goto bad;
400 }
401
402 /*
403 * The following check is not documented in specs. A malicious
404 * party may be able to use IPv4 mapped addr to confuse tcp/udp stack
405 * and bypass security checks (act as if it was from 127.0.0.1 by using
406 * IPv6 src ::ffff:127.0.0.1). Be cautious.
407 *
408 * This check chokes if we are in an SIIT cloud. As none of BSDs
409 * support IPv4-less kernel compilation, we cannot support SIIT
410 * environment at all. So, it makes more sense for us to reject any
411 * malicious packets for non-SIIT environment, than try to do a
412 * partical support for SIIT environment.
413 */
414 if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
415 IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
416 ip6stat.ip6s_badscope++;
417 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
418 goto bad;
419 }
420#if 0
421 /*
422 * Reject packets with IPv4 compatible addresses (auto tunnel).
423 *
424 * The code forbids auto tunnel relay case in RFC1933 (the check is
425 * stronger than RFC1933). We may want to re-enable it if mech-xx
426 * is revised to forbid relaying case.
427 */
428 if (IN6_IS_ADDR_V4COMPAT(&ip6->ip6_src) ||
429 IN6_IS_ADDR_V4COMPAT(&ip6->ip6_dst)) {
430 ip6stat.ip6s_badscope++;
431 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
432 goto bad;
433 }
434#endif
435
436 /* drop packets if interface ID portion is already filled */
437 if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) {
438 if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src) &&
439 ip6->ip6_src.s6_addr16[1]) {
440 ip6stat.ip6s_badscope++;
441 goto bad;
442 }
443 if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst) &&
444 ip6->ip6_dst.s6_addr16[1]) {
445 ip6stat.ip6s_badscope++;
446 goto bad;
447 }
448 }
449
450 if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src))
451 ip6->ip6_src.s6_addr16[1]
452 = htons(m->m_pkthdr.rcvif->if_index);
453 if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst))
454 ip6->ip6_dst.s6_addr16[1]
455 = htons(m->m_pkthdr.rcvif->if_index);
456
457#if 0 /* this case seems to be unnecessary. (jinmei, 20010401) */
458 /*
459 * We use rt->rt_ifp to determine if the address is ours or not.
460 * If rt_ifp is lo0, the address is ours.
461 * The problem here is, rt->rt_ifp for fe80::%lo0/64 is set to lo0,
462 * so any address under fe80::%lo0/64 will be mistakenly considered
463 * local. The special case is supplied to handle the case properly
464 * by actually looking at interface addresses
465 * (using in6ifa_ifpwithaddr).
466 */
bde3511a 467 if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) &&
984263bc
MD
468 IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst)) {
469 if (!in6ifa_ifpwithaddr(m->m_pkthdr.rcvif, &ip6->ip6_dst)) {
470 icmp6_error(m, ICMP6_DST_UNREACH,
471 ICMP6_DST_UNREACH_ADDR, 0);
472 /* m is already freed */
a80cf23b 473 goto bad2;
984263bc
MD
474 }
475
476 ours = 1;
477 deliverifp = m->m_pkthdr.rcvif;
478 goto hbhcheck;
479 }
480#endif
481
482 /*
483 * Multicast check
484 */
485 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
486 struct in6_multi *in6m = 0;
487
488 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mcast);
489 /*
490 * See if we belong to the destination multicast group on the
491 * arrival interface.
492 */
493 IN6_LOOKUP_MULTI(ip6->ip6_dst, m->m_pkthdr.rcvif, in6m);
494 if (in6m)
495 ours = 1;
496 else if (!ip6_mrouter) {
497 ip6stat.ip6s_notmember++;
498 ip6stat.ip6s_cantforward++;
499 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
500 goto bad;
501 }
502 deliverifp = m->m_pkthdr.rcvif;
503 goto hbhcheck;
504 }
505
506 /*
507 * Unicast check
508 */
509 switch (ip6_ours_check_algorithm) {
510 default:
511 /*
512 * XXX: I intentionally broke our indentation rule here,
513 * since this switch-case is just for measurement and
514 * therefore should soon be removed.
515 */
516 if (ip6_forward_rt.ro_rt != NULL &&
bde3511a 517 (ip6_forward_rt.ro_rt->rt_flags & RTF_UP) &&
984263bc
MD
518 IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
519 &((struct sockaddr_in6 *)(&ip6_forward_rt.ro_dst))->sin6_addr))
520 ip6stat.ip6s_forward_cachehit++;
521 else {
522 struct sockaddr_in6 *dst6;
523
524 if (ip6_forward_rt.ro_rt) {
525 /* route is down or destination is different */
526 ip6stat.ip6s_forward_cachemiss++;
527 RTFREE(ip6_forward_rt.ro_rt);
528 ip6_forward_rt.ro_rt = 0;
529 }
530
531 bzero(&ip6_forward_rt.ro_dst, sizeof(struct sockaddr_in6));
f23061d4 532 dst6 = &ip6_forward_rt.ro_dst;
984263bc
MD
533 dst6->sin6_len = sizeof(struct sockaddr_in6);
534 dst6->sin6_family = AF_INET6;
535 dst6->sin6_addr = ip6->ip6_dst;
984263bc
MD
536
537 rtalloc_ign((struct route *)&ip6_forward_rt, RTF_PRCLONING);
538 }
539
540#define rt6_key(r) ((struct sockaddr_in6 *)((r)->rt_nodes->rn_key))
541
542 /*
543 * Accept the packet if the forwarding interface to the destination
544 * according to the routing table is the loopback interface,
545 * unless the associated route has a gateway.
546 * Note that this approach causes to accept a packet if there is a
547 * route to the loopback interface for the destination of the packet.
548 * But we think it's even useful in some situations, e.g. when using
549 * a special daemon which wants to intercept the packet.
550 *
551 * XXX: some OSes automatically make a cloned route for the destination
552 * of an outgoing packet. If the outgoing interface of the packet
553 * is a loopback one, the kernel would consider the packet to be
554 * accepted, even if we have no such address assinged on the interface.
555 * We check the cloned flag of the route entry to reject such cases,
556 * assuming that route entries for our own addresses are not made by
557 * cloning (it should be true because in6_addloop explicitly installs
558 * the host route). However, we might have to do an explicit check
559 * while it would be less efficient. Or, should we rather install a
560 * reject route for such a case?
561 */
562 if (ip6_forward_rt.ro_rt &&
563 (ip6_forward_rt.ro_rt->rt_flags &
564 (RTF_HOST|RTF_GATEWAY)) == RTF_HOST &&
565#ifdef RTF_WASCLONED
566 !(ip6_forward_rt.ro_rt->rt_flags & RTF_WASCLONED) &&
567#endif
568#ifdef RTF_CLONED
569 !(ip6_forward_rt.ro_rt->rt_flags & RTF_CLONED) &&
570#endif
571#if 0
572 /*
573 * The check below is redundant since the comparison of
574 * the destination and the key of the rtentry has
575 * already done through looking up the routing table.
576 */
577 IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
578 &rt6_key(ip6_forward_rt.ro_rt)->sin6_addr)
579#endif
580 ip6_forward_rt.ro_rt->rt_ifp->if_type == IFT_LOOP) {
581 struct in6_ifaddr *ia6 =
582 (struct in6_ifaddr *)ip6_forward_rt.ro_rt->rt_ifa;
583
584 /*
585 * record address information into m_aux.
586 */
0bdb1448 587 ip6_setdstifaddr(m, ia6);
984263bc
MD
588
589 /*
590 * packets to a tentative, duplicated, or somehow invalid
591 * address must not be accepted.
592 */
593 if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) {
594 /* this address is ready */
595 ours = 1;
596 deliverifp = ia6->ia_ifp; /* correct? */
597 /* Count the packet in the ip address stats */
598 ia6->ia_ifa.if_ipackets++;
599 ia6->ia_ifa.if_ibytes += m->m_pkthdr.len;
600 goto hbhcheck;
601 } else {
602 /* address is not ready, so discard the packet. */
603 nd6log((LOG_INFO,
604 "ip6_input: packet to an unready address %s->%s\n",
605 ip6_sprintf(&ip6->ip6_src),
606 ip6_sprintf(&ip6->ip6_dst)));
607
608 goto bad;
609 }
610 }
611 } /* XXX indentation (see above) */
612
613 /*
614 * FAITH(Firewall Aided Internet Translator)
615 */
616 if (ip6_keepfaith) {
617 if (ip6_forward_rt.ro_rt && ip6_forward_rt.ro_rt->rt_ifp
618 && ip6_forward_rt.ro_rt->rt_ifp->if_type == IFT_FAITH) {
619 /* XXX do we need more sanity checks? */
620 ours = 1;
621 deliverifp = ip6_forward_rt.ro_rt->rt_ifp; /* faith */
622 goto hbhcheck;
623 }
624 }
625
626 /*
627 * Now there is no reason to process the packet if it's not our own
628 * and we're not a router.
629 */
630 if (!ip6_forwarding) {
631 ip6stat.ip6s_cantforward++;
632 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
633 goto bad;
634 }
635
bde3511a 636hbhcheck:
984263bc
MD
637 /*
638 * record address information into m_aux, if we don't have one yet.
639 * note that we are unable to record it, if the address is not listed
640 * as our interface address (e.g. multicast addresses, addresses
641 * within FAITH prefixes and such).
642 */
643 if (deliverifp && !ip6_getdstifaddr(m)) {
644 struct in6_ifaddr *ia6;
645
646 ia6 = in6_ifawithifp(deliverifp, &ip6->ip6_dst);
647 if (ia6) {
648 if (!ip6_setdstifaddr(m, ia6)) {
649 /*
650 * XXX maybe we should drop the packet here,
651 * as we could not provide enough information
652 * to the upper layers.
653 */
654 }
655 }
656 }
657
658 /*
659 * Process Hop-by-Hop options header if it's contained.
660 * m may be modified in ip6_hopopts_input().
661 * If a JumboPayload option is included, plen will also be modified.
662 */
663 plen = (u_int32_t)ntohs(ip6->ip6_plen);
664 if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
665 struct ip6_hbh *hbh;
666
667 if (ip6_hopopts_input(&plen, &rtalert, &m, &off)) {
668#if 0 /*touches NULL pointer*/
669 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
670#endif
a80cf23b 671 goto bad2; /* m have already been freed */
984263bc
MD
672 }
673
674 /* adjust pointer */
675 ip6 = mtod(m, struct ip6_hdr *);
676
677 /*
678 * if the payload length field is 0 and the next header field
679 * indicates Hop-by-Hop Options header, then a Jumbo Payload
680 * option MUST be included.
681 */
682 if (ip6->ip6_plen == 0 && plen == 0) {
683 /*
684 * Note that if a valid jumbo payload option is
685 * contained, ip6_hoptops_input() must set a valid
bde3511a 686 * (non-zero) payload length to the variable plen.
984263bc
MD
687 */
688 ip6stat.ip6s_badoptions++;
689 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
690 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
691 icmp6_error(m, ICMP6_PARAM_PROB,
692 ICMP6_PARAMPROB_HEADER,
693 (caddr_t)&ip6->ip6_plen - (caddr_t)ip6);
a80cf23b 694 goto bad2;
984263bc
MD
695 }
696#ifndef PULLDOWN_TEST
697 /* ip6_hopopts_input() ensures that mbuf is contiguous */
698 hbh = (struct ip6_hbh *)(ip6 + 1);
699#else
700 IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
701 sizeof(struct ip6_hbh));
702 if (hbh == NULL) {
703 ip6stat.ip6s_tooshort++;
a80cf23b 704 goto bad2;
984263bc
MD
705 }
706#endif
707 nxt = hbh->ip6h_nxt;
708
709 /*
e754a851
HT
710 * If we are acting as a router and the packet contains a
711 * router alert option, see if we know the option value.
712 * Currently, we only support the option value for MLD, in which
713 * case we should pass the packet to the multicast routing
714 * daemon.
984263bc 715 */
e754a851
HT
716 if (rtalert != ~0 && ip6_forwarding) {
717 switch (rtalert) {
718 case IP6OPT_RTALERT_MLD:
719 ours = 1;
720 break;
721 default:
722 /*
723 * RFC2711 requires unrecognized values must be
724 * silently ignored.
725 */
726 break;
727 }
728 }
984263bc
MD
729 } else
730 nxt = ip6->ip6_nxt;
731
732 /*
733 * Check that the amount of data in the buffers
734 * is as at least much as the IPv6 header would have us expect.
735 * Trim mbufs if longer than we expect.
736 * Drop packet if shorter than we expect.
737 */
738 if (m->m_pkthdr.len - sizeof(struct ip6_hdr) < plen) {
739 ip6stat.ip6s_tooshort++;
740 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
741 goto bad;
742 }
743 if (m->m_pkthdr.len > sizeof(struct ip6_hdr) + plen) {
744 if (m->m_len == m->m_pkthdr.len) {
745 m->m_len = sizeof(struct ip6_hdr) + plen;
746 m->m_pkthdr.len = sizeof(struct ip6_hdr) + plen;
747 } else
748 m_adj(m, sizeof(struct ip6_hdr) + plen - m->m_pkthdr.len);
749 }
750
751 /*
752 * Forward if desirable.
753 */
754 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
755 /*
756 * If we are acting as a multicast router, all
757 * incoming multicast packets are passed to the
758 * kernel-level multicast forwarding function.
759 * The packet is returned (relatively) intact; if
760 * ip6_mforward() returns a non-zero value, the packet
761 * must be discarded, else it may be accepted below.
762 */
763 if (ip6_mrouter && ip6_mforward(ip6, m->m_pkthdr.rcvif, m)) {
764 ip6stat.ip6s_cantforward++;
a80cf23b 765 goto bad;
984263bc 766 }
a80cf23b
JH
767 if (!ours)
768 goto bad;
984263bc 769 } else if (!ours) {
e7e55f42 770 ip6_forward(m, srcrt);
a80cf23b 771 goto bad2;
984263bc
MD
772 }
773
774 ip6 = mtod(m, struct ip6_hdr *);
775
776 /*
777 * Malicious party may be able to use IPv4 mapped addr to confuse
778 * tcp/udp stack and bypass security checks (act as if it was from
779 * 127.0.0.1 by using IPv6 src ::ffff:127.0.0.1). Be cautious.
780 *
781 * For SIIT end node behavior, you may want to disable the check.
782 * However, you will become vulnerable to attacks using IPv4 mapped
783 * source.
784 */
785 if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
786 IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
787 ip6stat.ip6s_badscope++;
788 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
789 goto bad;
790 }
791
792 /*
793 * Tell launch routine the next header
794 */
795 ip6stat.ip6s_delivered++;
796 in6_ifstat_inc(deliverifp, ifs6_in_deliver);
797 nest = 0;
798
d4da173d 799 rh_present = 0;
984263bc 800 while (nxt != IPPROTO_DONE) {
6a704092
MD
801 struct ip6protosw *sw6;
802
984263bc
MD
803 if (ip6_hdrnestlimit && (++nest > ip6_hdrnestlimit)) {
804 ip6stat.ip6s_toomanyhdr++;
d4da173d 805 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
984263bc
MD
806 goto bad;
807 }
808
809 /*
810 * protection against faulty packet - there should be
811 * more sanity checks in header chain processing.
812 */
813 if (m->m_pkthdr.len < off) {
814 ip6stat.ip6s_tooshort++;
815 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
816 goto bad;
817 }
818
819#if 0
820 /*
821 * do we need to do it for every header? yeah, other
822 * functions can play with it (like re-allocate and copy).
823 */
824 mhist = ip6_addaux(m);
825 if (mhist && M_TRAILINGSPACE(mhist) >= sizeof(nxt)) {
826 hist = mtod(mhist, caddr_t) + mhist->m_len;
827 bcopy(&nxt, hist, sizeof(nxt));
828 mhist->m_len += sizeof(nxt);
829 } else {
830 ip6stat.ip6s_toomanyhdr++;
831 goto bad;
832 }
833#endif
834
d4da173d
HT
835 if (nxt == IPPROTO_ROUTING) {
836 if (rh_present++) {
837 in6_ifstat_inc(m->m_pkthdr.rcvif,
838 ifs6_in_hdrerr);
839 ip6stat.ip6s_badoptions++;
840 goto bad;
841 }
842 }
843
6a704092 844 sw6 = &inet6sw[ip6_protox[nxt]];
984263bc
MD
845#ifdef IPSEC
846 /*
847 * enforce IPsec policy checking if we are seeing last header.
848 * note that we do not visit this with protocols with pcb layer
849 * code - like udp/tcp/raw ip.
850 */
6a704092 851 if ((sw6->pr_flags & PR_LASTHDR) && ipsec6_in_reject(m, NULL)) {
984263bc
MD
852 ipsec6stat.in_polvio++;
853 goto bad;
854 }
855#endif
6a704092
MD
856 /*
857 * If this is a terminal header forward to the port, otherwise
858 * process synchronously for more headers.
859 */
860 if (sw6->pr_flags & PR_LASTHDR) {
861 struct netmsg_packet *pmsg;
862 lwkt_port_t port;
863
864 port = sw6->pr_soport(NULL, NULL, &m);
865 KKASSERT(port != NULL);
866 pmsg = &m->m_hdr.mh_netmsg;
867 netmsg_init(&pmsg->nm_netmsg, NULL,
868 &netisr_apanic_rport,
869 MSGF_MPSAFE, transport6_processing_handler);
870 pmsg->nm_packet = m;
871 pmsg->nm_nxt = nxt;
872 pmsg->nm_netmsg.nm_lmsg.u.ms_result = off;
873 lwkt_sendmsg(port, &pmsg->nm_netmsg.nm_lmsg);
874 /* done with m */
875 nxt = IPPROTO_DONE;
876 } else {
877 nxt = sw6->pr_input(&m, &off, nxt);
878 }
984263bc 879 }
a80cf23b
JH
880 goto bad2;
881bad:
984263bc 882 m_freem(m);
a80cf23b 883bad2:
4599cf19 884 ;
6aad077d 885 /* msg was embedded in the mbuf, do not reply! */
984263bc
MD
886}
887
6a704092
MD
888/*
889 * We have to call the pr_input() function from the correct protocol
890 * thread. The sw6->pr_soport() request at the end of ip6_input()
891 * returns the port and we forward a netmsg to the port to execute
892 * this function.
893 */
894static void
895transport6_processing_handler(netmsg_t netmsg)
896{
897 struct netmsg_packet *pmsg = (struct netmsg_packet *)netmsg;
898 struct ip6protosw *sw6;
899 int hlen;
900 int nxt;
901
902 sw6 = &inet6sw[ip6_protox[pmsg->nm_nxt]];
903 hlen = pmsg->nm_netmsg.nm_lmsg.u.ms_result;
904
905 nxt = sw6->pr_input(&pmsg->nm_packet, &hlen, pmsg->nm_nxt);
906 KKASSERT(nxt == IPPROTO_DONE);
907 /* netmsg was embedded in the mbuf, do not reply! */
908}
909
984263bc
MD
910/*
911 * set/grab in6_ifaddr correspond to IPv6 destination address.
912 * XXX backward compatibility wrapper
913 */
914static struct ip6aux *
122ebd49 915ip6_setdstifaddr(struct mbuf *m, struct in6_ifaddr *ia6)
984263bc
MD
916{
917 struct ip6aux *n;
918
919 n = ip6_addaux(m);
920 if (n)
921 n->ip6a_dstia6 = ia6;
922 return n; /* NULL if failed to set */
923}
924
925struct in6_ifaddr *
122ebd49 926ip6_getdstifaddr(struct mbuf *m)
984263bc
MD
927{
928 struct ip6aux *n;
929
930 n = ip6_findaux(m);
931 if (n)
932 return n->ip6a_dstia6;
933 else
934 return NULL;
935}
936
937/*
938 * Hop-by-Hop options header processing. If a valid jumbo payload option is
939 * included, the real payload length will be stored in plenp.
940 */
941static int
122ebd49
CP
942ip6_hopopts_input(u_int32_t *plenp,
943 u_int32_t *rtalertp,/* XXX: should be stored more smart way */
944 struct mbuf **mp,
945 int *offp)
984263bc
MD
946{
947 struct mbuf *m = *mp;
948 int off = *offp, hbhlen;
949 struct ip6_hbh *hbh;
950 u_int8_t *opt;
951
952 /* validation of the length of the header */
953#ifndef PULLDOWN_TEST
954 IP6_EXTHDR_CHECK(m, off, sizeof(*hbh), -1);
955 hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off);
956 hbhlen = (hbh->ip6h_len + 1) << 3;
957
958 IP6_EXTHDR_CHECK(m, off, hbhlen, -1);
959 hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off);
960#else
961 IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m,
962 sizeof(struct ip6_hdr), sizeof(struct ip6_hbh));
963 if (hbh == NULL) {
964 ip6stat.ip6s_tooshort++;
965 return -1;
966 }
967 hbhlen = (hbh->ip6h_len + 1) << 3;
968 IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
969 hbhlen);
970 if (hbh == NULL) {
971 ip6stat.ip6s_tooshort++;
972 return -1;
973 }
974#endif
975 off += hbhlen;
976 hbhlen -= sizeof(struct ip6_hbh);
977 opt = (u_int8_t *)hbh + sizeof(struct ip6_hbh);
978
979 if (ip6_process_hopopts(m, (u_int8_t *)hbh + sizeof(struct ip6_hbh),
980 hbhlen, rtalertp, plenp) < 0)
bde3511a 981 return (-1);
984263bc
MD
982
983 *offp = off;
984 *mp = m;
bde3511a 985 return (0);
984263bc
MD
986}
987
988/*
989 * Search header for all Hop-by-hop options and process each option.
990 * This function is separate from ip6_hopopts_input() in order to
991 * handle a case where the sending node itself process its hop-by-hop
992 * options header. In such a case, the function is called from ip6_output().
993 *
994 * The function assumes that hbh header is located right after the IPv6 header
995 * (RFC2460 p7), opthead is pointer into data content in m, and opthead to
996 * opthead + hbhlen is located in continuous memory region.
997 */
998int
122ebd49
CP
999ip6_process_hopopts(struct mbuf *m, u_int8_t *opthead, int hbhlen,
1000 u_int32_t *rtalertp, u_int32_t *plenp)
984263bc
MD
1001{
1002 struct ip6_hdr *ip6;
1003 int optlen = 0;
1004 u_int8_t *opt = opthead;
1005 u_int16_t rtalert_val;
1006 u_int32_t jumboplen;
1007 const int erroff = sizeof(struct ip6_hdr) + sizeof(struct ip6_hbh);
1008
1009 for (; hbhlen > 0; hbhlen -= optlen, opt += optlen) {
1010 switch (*opt) {
1011 case IP6OPT_PAD1:
1012 optlen = 1;
1013 break;
1014 case IP6OPT_PADN:
1015 if (hbhlen < IP6OPT_MINLEN) {
1016 ip6stat.ip6s_toosmall++;
1017 goto bad;
1018 }
1019 optlen = *(opt + 1) + 2;
1020 break;
1021 case IP6OPT_RTALERT:
1022 /* XXX may need check for alignment */
1023 if (hbhlen < IP6OPT_RTALERT_LEN) {
1024 ip6stat.ip6s_toosmall++;
1025 goto bad;
1026 }
1027 if (*(opt + 1) != IP6OPT_RTALERT_LEN - 2) {
1028 /* XXX stat */
1029 icmp6_error(m, ICMP6_PARAM_PROB,
1030 ICMP6_PARAMPROB_HEADER,
1031 erroff + opt + 1 - opthead);
bde3511a 1032 return (-1);
984263bc
MD
1033 }
1034 optlen = IP6OPT_RTALERT_LEN;
1035 bcopy((caddr_t)(opt + 2), (caddr_t)&rtalert_val, 2);
1036 *rtalertp = ntohs(rtalert_val);
1037 break;
1038 case IP6OPT_JUMBO:
1039 /* XXX may need check for alignment */
1040 if (hbhlen < IP6OPT_JUMBO_LEN) {
1041 ip6stat.ip6s_toosmall++;
1042 goto bad;
1043 }
1044 if (*(opt + 1) != IP6OPT_JUMBO_LEN - 2) {
1045 /* XXX stat */
1046 icmp6_error(m, ICMP6_PARAM_PROB,
1047 ICMP6_PARAMPROB_HEADER,
1048 erroff + opt + 1 - opthead);
bde3511a 1049 return (-1);
984263bc
MD
1050 }
1051 optlen = IP6OPT_JUMBO_LEN;
1052
1053 /*
1054 * IPv6 packets that have non 0 payload length
1055 * must not contain a jumbo payload option.
1056 */
1057 ip6 = mtod(m, struct ip6_hdr *);
1058 if (ip6->ip6_plen) {
1059 ip6stat.ip6s_badoptions++;
1060 icmp6_error(m, ICMP6_PARAM_PROB,
1061 ICMP6_PARAMPROB_HEADER,
1062 erroff + opt - opthead);
bde3511a 1063 return (-1);
984263bc
MD
1064 }
1065
1066 /*
1067 * We may see jumbolen in unaligned location, so
1068 * we'd need to perform bcopy().
1069 */
1070 bcopy(opt + 2, &jumboplen, sizeof(jumboplen));
1071 jumboplen = (u_int32_t)htonl(jumboplen);
1072
1073#if 1
1074 /*
1075 * if there are multiple jumbo payload options,
1076 * *plenp will be non-zero and the packet will be
1077 * rejected.
1078 * the behavior may need some debate in ipngwg -
1079 * multiple options does not make sense, however,
1080 * there's no explicit mention in specification.
1081 */
1082 if (*plenp != 0) {
1083 ip6stat.ip6s_badoptions++;
1084 icmp6_error(m, ICMP6_PARAM_PROB,
1085 ICMP6_PARAMPROB_HEADER,
1086 erroff + opt + 2 - opthead);
bde3511a 1087 return (-1);
984263bc
MD
1088 }
1089#endif
1090
1091 /*
1092 * jumbo payload length must be larger than 65535.
1093 */
1094 if (jumboplen <= IPV6_MAXPACKET) {
1095 ip6stat.ip6s_badoptions++;
1096 icmp6_error(m, ICMP6_PARAM_PROB,
1097 ICMP6_PARAMPROB_HEADER,
1098 erroff + opt + 2 - opthead);
bde3511a 1099 return (-1);
984263bc
MD
1100 }
1101 *plenp = jumboplen;
1102
1103 break;
1104 default: /* unknown option */
1105 if (hbhlen < IP6OPT_MINLEN) {
1106 ip6stat.ip6s_toosmall++;
1107 goto bad;
1108 }
1109 optlen = ip6_unknown_opt(opt, m,
1110 erroff + opt - opthead);
1111 if (optlen == -1)
bde3511a 1112 return (-1);
984263bc
MD
1113 optlen += 2;
1114 break;
1115 }
1116 }
1117
bde3511a 1118 return (0);
984263bc 1119
bde3511a 1120bad:
984263bc 1121 m_freem(m);
bde3511a 1122 return (-1);
984263bc
MD
1123}
1124
1125/*
1126 * Unknown option processing.
1127 * The third argument `off' is the offset from the IPv6 header to the option,
1128 * which is necessary if the IPv6 header the and option header and IPv6 header
1129 * is not continuous in order to return an ICMPv6 error.
1130 */
1131int
122ebd49 1132ip6_unknown_opt(u_int8_t *optp, struct mbuf *m, int off)
984263bc
MD
1133{
1134 struct ip6_hdr *ip6;
1135
1136 switch (IP6OPT_TYPE(*optp)) {
1137 case IP6OPT_TYPE_SKIP: /* ignore the option */
bde3511a 1138 return ((int)*(optp + 1));
984263bc
MD
1139 case IP6OPT_TYPE_DISCARD: /* silently discard */
1140 m_freem(m);
bde3511a 1141 return (-1);
984263bc
MD
1142 case IP6OPT_TYPE_FORCEICMP: /* send ICMP even if multicasted */
1143 ip6stat.ip6s_badoptions++;
1144 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_OPTION, off);
bde3511a 1145 return (-1);
984263bc
MD
1146 case IP6OPT_TYPE_ICMP: /* send ICMP if not multicasted */
1147 ip6stat.ip6s_badoptions++;
1148 ip6 = mtod(m, struct ip6_hdr *);
1149 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
1150 (m->m_flags & (M_BCAST|M_MCAST)))
1151 m_freem(m);
1152 else
1153 icmp6_error(m, ICMP6_PARAM_PROB,
1154 ICMP6_PARAMPROB_OPTION, off);
bde3511a 1155 return (-1);
984263bc
MD
1156 }
1157
1158 m_freem(m); /* XXX: NOTREACHED */
bde3511a 1159 return (-1);
984263bc
MD
1160}
1161
1162/*
1163 * Create the "control" list for this pcb.
1164 * The function will not modify mbuf chain at all.
1165 *
1166 * with KAME mbuf chain restriction:
1167 * The routine will be called from upper layer handlers like tcp6_input().
1168 * Thus the routine assumes that the caller (tcp6_input) have already
1169 * called IP6_EXTHDR_CHECK() and all the extension headers are located in the
1170 * very first mbuf on the mbuf chain.
1171 */
1172void
122ebd49
CP
1173ip6_savecontrol(struct inpcb *in6p, struct mbuf **mp, struct ip6_hdr *ip6,
1174 struct mbuf *m)
984263bc 1175{
5aa41e7c 1176 #define IS2292(x, y) ((in6p->in6p_flags & IN6P_RFC2292) ? (x) : (y))
dadab5e9 1177 struct thread *td = curthread; /* XXX */
984263bc
MD
1178 int privileged = 0;
1179 int rthdr_exist = 0;
1180
1181
895c1f85 1182 if (priv_check(td, PRIV_ROOT) == 0)
984263bc
MD
1183 privileged++;
1184
1185#ifdef SO_TIMESTAMP
bde3511a 1186 if (in6p->in6p_socket->so_options & SO_TIMESTAMP) {
984263bc
MD
1187 struct timeval tv;
1188
1189 microtime(&tv);
1190 *mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
1191 SCM_TIMESTAMP, SOL_SOCKET);
1192 if (*mp) {
1193 mp = &(*mp)->m_next;
1194 }
1195 }
1196#endif
1197
1198 /* RFC 2292 sec. 5 */
bde3511a 1199 if (in6p->in6p_flags & IN6P_PKTINFO) {
984263bc
MD
1200 struct in6_pktinfo pi6;
1201 bcopy(&ip6->ip6_dst, &pi6.ipi6_addr, sizeof(struct in6_addr));
1202 if (IN6_IS_SCOPE_LINKLOCAL(&pi6.ipi6_addr))
1203 pi6.ipi6_addr.s6_addr16[1] = 0;
1204 pi6.ipi6_ifindex = (m && m->m_pkthdr.rcvif)
1205 ? m->m_pkthdr.rcvif->if_index
1206 : 0;
1207 *mp = sbcreatecontrol((caddr_t) &pi6,
5aa41e7c 1208 sizeof(struct in6_pktinfo), IS2292(IPV6_2292PKTINFO, IPV6_PKTINFO),
984263bc
MD
1209 IPPROTO_IPV6);
1210 if (*mp)
1211 mp = &(*mp)->m_next;
1212 }
1213
bde3511a 1214 if (in6p->in6p_flags & IN6P_HOPLIMIT) {
984263bc
MD
1215 int hlim = ip6->ip6_hlim & 0xff;
1216 *mp = sbcreatecontrol((caddr_t) &hlim,
5aa41e7c
HT
1217 sizeof(int), IS2292(IPV6_2292HOPLIMIT, IPV6_HOPLIMIT), IPPROTO_IPV6);
1218 if (*mp)
1219 mp = &(*mp)->m_next;
1220 }
1221
1222 if ((in6p->in6p_flags & IN6P_TCLASS) != 0) {
1223 u_int32_t flowinfo;
1224 int tclass;
1225
1226 flowinfo = (u_int32_t)ntohl(ip6->ip6_flow & IPV6_FLOWINFO_MASK);
1227 flowinfo >>= 20;
1228
1229 tclass = flowinfo & 0xff;
1230 *mp = sbcreatecontrol((caddr_t) &tclass, sizeof(tclass),
1231 IPV6_TCLASS, IPPROTO_IPV6);
984263bc
MD
1232 if (*mp)
1233 mp = &(*mp)->m_next;
1234 }
1235
1236 /*
1237 * IPV6_HOPOPTS socket option. We require super-user privilege
1238 * for the option, but it might be too strict, since there might
1239 * be some hop-by-hop options which can be returned to normal user.
1240 * See RFC 2292 section 6.
1241 */
bde3511a 1242 if ((in6p->in6p_flags & IN6P_HOPOPTS) && privileged) {
984263bc
MD
1243 /*
1244 * Check if a hop-by-hop options header is contatined in the
1245 * received packet, and if so, store the options as ancillary
1246 * data. Note that a hop-by-hop options header must be
1247 * just after the IPv6 header, which fact is assured through
1248 * the IPv6 input processing.
1249 */
1250 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1251 if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
1252 struct ip6_hbh *hbh;
1253 int hbhlen = 0;
1254#ifdef PULLDOWN_TEST
1255 struct mbuf *ext;
1256#endif
1257
1258#ifndef PULLDOWN_TEST
1259 hbh = (struct ip6_hbh *)(ip6 + 1);
1260 hbhlen = (hbh->ip6h_len + 1) << 3;
1261#else
1262 ext = ip6_pullexthdr(m, sizeof(struct ip6_hdr),
1263 ip6->ip6_nxt);
1264 if (ext == NULL) {
1265 ip6stat.ip6s_tooshort++;
1266 return;
1267 }
1268 hbh = mtod(ext, struct ip6_hbh *);
1269 hbhlen = (hbh->ip6h_len + 1) << 3;
1270 if (hbhlen != ext->m_len) {
1271 m_freem(ext);
1272 ip6stat.ip6s_tooshort++;
1273 return;
1274 }
1275#endif
1276
1277 /*
1278 * XXX: We copy whole the header even if a jumbo
1279 * payload option is included, which option is to
1280 * be removed before returning in the RFC 2292.
1281 * Note: this constraint is removed in 2292bis.
1282 */
1283 *mp = sbcreatecontrol((caddr_t)hbh, hbhlen,
5aa41e7c 1284 IS2292(IPV6_2292HOPOPTS, IPV6_HOPOPTS), IPPROTO_IPV6);
984263bc
MD
1285 if (*mp)
1286 mp = &(*mp)->m_next;
1287#ifdef PULLDOWN_TEST
1288 m_freem(ext);
1289#endif
1290 }
1291 }
1292
1293 /* IPV6_DSTOPTS and IPV6_RTHDR socket options */
1294 if ((in6p->in6p_flags & (IN6P_DSTOPTS | IN6P_RTHDRDSTOPTS)) != 0) {
1295 int proto, off, nxt;
1296
1297 /*
1298 * go through the header chain to see if a routing header is
1299 * contained in the packet. We need this information to store
1300 * destination options headers (if any) properly.
1301 * XXX: performance issue. We should record this info when
1302 * processing extension headers in incoming routine.
bde3511a 1303 * (todo) use m_aux?
984263bc
MD
1304 */
1305 proto = IPPROTO_IPV6;
1306 off = 0;
1307 nxt = -1;
1308 while (1) {
1309 int newoff;
1310
1311 newoff = ip6_nexthdr(m, off, proto, &nxt);
1312 if (newoff < 0)
1313 break;
1314 if (newoff < off) /* invalid, check for safety */
1315 break;
1316 if ((proto = nxt) == IPPROTO_ROUTING) {
1317 rthdr_exist = 1;
1318 break;
1319 }
1320 off = newoff;
1321 }
1322 }
1323
1324 if ((in6p->in6p_flags &
1325 (IN6P_RTHDR | IN6P_DSTOPTS | IN6P_RTHDRDSTOPTS)) != 0) {
1326 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1327 int nxt = ip6->ip6_nxt, off = sizeof(struct ip6_hdr);
1328
1329 /*
1330 * Search for destination options headers or routing
1331 * header(s) through the header chain, and stores each
1332 * header as ancillary data.
1333 * Note that the order of the headers remains in
1334 * the chain of ancillary data.
1335 */
1336 while (1) { /* is explicit loop prevention necessary? */
1337 struct ip6_ext *ip6e = NULL;
1338 int elen;
1339#ifdef PULLDOWN_TEST
1340 struct mbuf *ext = NULL;
1341#endif
1342
1343 /*
1344 * if it is not an extension header, don't try to
1345 * pull it from the chain.
1346 */
1347 switch (nxt) {
1348 case IPPROTO_DSTOPTS:
1349 case IPPROTO_ROUTING:
1350 case IPPROTO_HOPOPTS:
1351 case IPPROTO_AH: /* is it possible? */
1352 break;
1353 default:
1354 goto loopend;
1355 }
1356
1357#ifndef PULLDOWN_TEST
1358 if (off + sizeof(*ip6e) > m->m_len)
1359 goto loopend;
1360 ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + off);
1361 if (nxt == IPPROTO_AH)
1362 elen = (ip6e->ip6e_len + 2) << 2;
1363 else
1364 elen = (ip6e->ip6e_len + 1) << 3;
1365 if (off + elen > m->m_len)
1366 goto loopend;
1367#else
1368 ext = ip6_pullexthdr(m, off, nxt);
1369 if (ext == NULL) {
1370 ip6stat.ip6s_tooshort++;
1371 return;
1372 }
1373 ip6e = mtod(ext, struct ip6_ext *);
1374 if (nxt == IPPROTO_AH)
1375 elen = (ip6e->ip6e_len + 2) << 2;
1376 else
1377 elen = (ip6e->ip6e_len + 1) << 3;
1378 if (elen != ext->m_len) {
1379 m_freem(ext);
1380 ip6stat.ip6s_tooshort++;
1381 return;
1382 }
1383#endif
1384
1385 switch (nxt) {
1386 case IPPROTO_DSTOPTS:
1387 if ((in6p->in6p_flags & IN6P_DSTOPTS) == 0)
1388 break;
1389
1390 /*
1391 * We also require super-user privilege for
1392 * the option.
1393 * See the comments on IN6_HOPOPTS.
1394 */
1395 if (!privileged)
1396 break;
1397
1398 *mp = sbcreatecontrol((caddr_t)ip6e, elen,
5aa41e7c 1399 IS2292(IPV6_2292DSTOPTS, IPV6_DSTOPTS),
984263bc
MD
1400 IPPROTO_IPV6);
1401 if (*mp)
1402 mp = &(*mp)->m_next;
1403 break;
1404 case IPPROTO_ROUTING:
1405 if (!in6p->in6p_flags & IN6P_RTHDR)
1406 break;
1407
1408 *mp = sbcreatecontrol((caddr_t)ip6e, elen,
5aa41e7c 1409 IS2292(IPV6_2292RTHDR, IPV6_RTHDR),
984263bc
MD
1410 IPPROTO_IPV6);
1411 if (*mp)
1412 mp = &(*mp)->m_next;
1413 break;
1414 case IPPROTO_HOPOPTS:
1415 case IPPROTO_AH: /* is it possible? */
1416 break;
1417
1418 default:
1419 /*
1420 * other cases have been filtered in the above.
1421 * none will visit this case. here we supply
1422 * the code just in case (nxt overwritten or
1423 * other cases).
1424 */
1425#ifdef PULLDOWN_TEST
1426 m_freem(ext);
1427#endif
1428 goto loopend;
1429
1430 }
1431
1432 /* proceed with the next header. */
1433 off += elen;
1434 nxt = ip6e->ip6e_nxt;
1435 ip6e = NULL;
1436#ifdef PULLDOWN_TEST
1437 m_freem(ext);
1438 ext = NULL;
1439#endif
1440 }
1441 loopend:
1442 ;
1443 }
5aa41e7c
HT
1444#undef IS2292
1445}
1446
1447void
1448ip6_notify_pmtu(struct inpcb *in6p, struct sockaddr_in6 *dst, u_int32_t *mtu)
1449{
1450 struct socket *so;
1451 struct mbuf *m_mtu;
1452 struct ip6_mtuinfo mtuctl;
1453
1454 so = in6p->inp_socket;
1455
1456 if (mtu == NULL)
1457 return;
1458
1459#ifdef DIAGNOSTIC
1460 if (so == NULL) /* I believe this is impossible */
1461 panic("ip6_notify_pmtu: socket is NULL");
1462#endif
1463
1464 bzero(&mtuctl, sizeof(mtuctl)); /* zero-clear for safety */
1465 mtuctl.ip6m_mtu = *mtu;
1466 mtuctl.ip6m_addr = *dst;
1467
1468 if ((m_mtu = sbcreatecontrol((caddr_t)&mtuctl, sizeof(mtuctl),
1469 IPV6_PATHMTU, IPPROTO_IPV6)) == NULL)
1470 return;
1471
d557216f 1472 if (sbappendaddr(&so->so_rcv.sb, (struct sockaddr *)dst, NULL, m_mtu)
5aa41e7c
HT
1473 == 0) {
1474 m_freem(m_mtu);
1475 /* XXX: should count statistics */
1476 } else
1477 sorwakeup(so);
984263bc 1478
5aa41e7c 1479 return;
984263bc
MD
1480}
1481
1482#ifdef PULLDOWN_TEST
1483/*
1484 * pull single extension header from mbuf chain. returns single mbuf that
1485 * contains the result, or NULL on error.
1486 */
1487static struct mbuf *
122ebd49 1488ip6_pullexthdr(struct mbuf *m, size_t off, int nxt)
984263bc
MD
1489{
1490 struct ip6_ext ip6e;
1491 size_t elen;
1492 struct mbuf *n;
1493
1494#ifdef DIAGNOSTIC
1495 switch (nxt) {
1496 case IPPROTO_DSTOPTS:
1497 case IPPROTO_ROUTING:
1498 case IPPROTO_HOPOPTS:
1499 case IPPROTO_AH: /* is it possible? */
1500 break;
1501 default:
a6ec04bc 1502 kprintf("ip6_pullexthdr: invalid nxt=%d\n", nxt);
984263bc
MD
1503 }
1504#endif
1505
1506 m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
1507 if (nxt == IPPROTO_AH)
1508 elen = (ip6e.ip6e_len + 2) << 2;
1509 else
1510 elen = (ip6e.ip6e_len + 1) << 3;
1511
42947373
JH
1512 n = m_getb(elen, MB_DONTWAIT, MT_DATA, 0);
1513 if (n == NULL)
984263bc 1514 return NULL;
984263bc 1515 n->m_len = 0;
42947373 1516
984263bc
MD
1517 if (elen >= M_TRAILINGSPACE(n)) {
1518 m_free(n);
1519 return NULL;
1520 }
1521
1522 m_copydata(m, off, elen, mtod(n, caddr_t));
1523 n->m_len = elen;
1524 return n;
1525}
1526#endif
1527
1528/*
1529 * Get pointer to the previous header followed by the header
1530 * currently processed.
1531 * XXX: This function supposes that
1532 * M includes all headers,
1533 * the next header field and the header length field of each header
1534 * are valid, and
1535 * the sum of each header length equals to OFF.
1536 * Because of these assumptions, this function must be called very
1537 * carefully. Moreover, it will not be used in the near future when
1538 * we develop `neater' mechanism to process extension headers.
1539 */
1540char *
122ebd49 1541ip6_get_prevhdr(struct mbuf *m, int off)
984263bc
MD
1542{
1543 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1544
1545 if (off == sizeof(struct ip6_hdr))
bde3511a 1546 return (&ip6->ip6_nxt);
984263bc
MD
1547 else {
1548 int len, nxt;
1549 struct ip6_ext *ip6e = NULL;
1550
1551 nxt = ip6->ip6_nxt;
1552 len = sizeof(struct ip6_hdr);
1553 while (len < off) {
1554 ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + len);
1555
1556 switch (nxt) {
1557 case IPPROTO_FRAGMENT:
1558 len += sizeof(struct ip6_frag);
1559 break;
1560 case IPPROTO_AH:
1561 len += (ip6e->ip6e_len + 2) << 2;
1562 break;
1563 default:
1564 len += (ip6e->ip6e_len + 1) << 3;
1565 break;
1566 }
1567 nxt = ip6e->ip6e_nxt;
1568 }
1569 if (ip6e)
bde3511a 1570 return (&ip6e->ip6e_nxt);
984263bc
MD
1571 else
1572 return NULL;
1573 }
1574}
1575
1576/*
1577 * get next header offset. m will be retained.
1578 */
1579int
122ebd49 1580ip6_nexthdr(struct mbuf *m, int off, int proto, int *nxtp)
984263bc
MD
1581{
1582 struct ip6_hdr ip6;
1583 struct ip6_ext ip6e;
1584 struct ip6_frag fh;
1585
1586 /* just in case */
1587 if (m == NULL)
1588 panic("ip6_nexthdr: m == NULL");
1589 if ((m->m_flags & M_PKTHDR) == 0 || m->m_pkthdr.len < off)
1590 return -1;
1591
1592 switch (proto) {
1593 case IPPROTO_IPV6:
1594 if (m->m_pkthdr.len < off + sizeof(ip6))
1595 return -1;
1596 m_copydata(m, off, sizeof(ip6), (caddr_t)&ip6);
1597 if (nxtp)
1598 *nxtp = ip6.ip6_nxt;
1599 off += sizeof(ip6);
1600 return off;
1601
1602 case IPPROTO_FRAGMENT:
1603 /*
1604 * terminate parsing if it is not the first fragment,
1605 * it does not make sense to parse through it.
1606 */
1607 if (m->m_pkthdr.len < off + sizeof(fh))
1608 return -1;
1609 m_copydata(m, off, sizeof(fh), (caddr_t)&fh);
a990c205
JH
1610 /* IP6F_OFF_MASK = 0xfff8(BigEndian), 0xf8ff(LittleEndian) */
1611 if (fh.ip6f_offlg & IP6F_OFF_MASK)
984263bc
MD
1612 return -1;
1613 if (nxtp)
1614 *nxtp = fh.ip6f_nxt;
1615 off += sizeof(struct ip6_frag);
1616 return off;
1617
1618 case IPPROTO_AH:
1619 if (m->m_pkthdr.len < off + sizeof(ip6e))
1620 return -1;
1621 m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
1622 if (nxtp)
1623 *nxtp = ip6e.ip6e_nxt;
1624 off += (ip6e.ip6e_len + 2) << 2;
1625 return off;
1626
1627 case IPPROTO_HOPOPTS:
1628 case IPPROTO_ROUTING:
1629 case IPPROTO_DSTOPTS:
1630 if (m->m_pkthdr.len < off + sizeof(ip6e))
1631 return -1;
1632 m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
1633 if (nxtp)
1634 *nxtp = ip6e.ip6e_nxt;
1635 off += (ip6e.ip6e_len + 1) << 3;
1636 return off;
1637
1638 case IPPROTO_NONE:
1639 case IPPROTO_ESP:
1640 case IPPROTO_IPCOMP:
1641 /* give up */
1642 return -1;
1643
1644 default:
1645 return -1;
1646 }
1647
1648 return -1;
1649}
1650
1651/*
1652 * get offset for the last header in the chain. m will be kept untainted.
1653 */
1654int
122ebd49 1655ip6_lasthdr(struct mbuf *m, int off, int proto, int *nxtp)
984263bc
MD
1656{
1657 int newoff;
1658 int nxt;
1659
1660 if (!nxtp) {
1661 nxt = -1;
1662 nxtp = &nxt;
1663 }
1664 while (1) {
1665 newoff = ip6_nexthdr(m, off, proto, nxtp);
1666 if (newoff < 0)
1667 return off;
1668 else if (newoff < off)
1669 return -1; /* invalid */
1670 else if (newoff == off)
1671 return newoff;
1672
1673 off = newoff;
1674 proto = *nxtp;
1675 }
1676}
1677
1678struct ip6aux *
122ebd49 1679ip6_addaux(struct mbuf *m)
984263bc
MD
1680{
1681 struct m_tag *tag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
1682 if (!tag) {
1683 tag = m_tag_get(PACKET_TAG_IPV6_INPUT,
1684 sizeof (struct ip6aux),
74f1caca 1685 MB_DONTWAIT);
984263bc
MD
1686 if (tag)
1687 m_tag_prepend(m, tag);
1688 }
1689 if (tag)
d031aa80
MD
1690 bzero(m_tag_data(tag), sizeof (struct ip6aux));
1691 return tag ? (struct ip6aux *)m_tag_data(tag) : NULL;
984263bc
MD
1692}
1693
1694struct ip6aux *
122ebd49 1695ip6_findaux(struct mbuf *m)
984263bc
MD
1696{
1697 struct m_tag *tag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
1698 return tag ? (struct ip6aux*)(tag+1) : NULL;
1699}
1700
1701void
122ebd49 1702ip6_delaux(struct mbuf *m)
984263bc
MD
1703{
1704 struct m_tag *tag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
1705 if (tag)
1706 m_tag_delete(m, tag);
1707}
1708
1709/*
1710 * System control for IP6
1711 */
1712
1713u_char inet6ctlerrmap[PRC_NCMDS] = {
1714 0, 0, 0, 0,
1715 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH,
1716 EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED,
1717 EMSGSIZE, EHOSTUNREACH, 0, 0,
1718 0, 0, 0, 0,
1719 ENOPROTOOPT
1720};