kill db_print_backtrace()
[dragonfly.git] / sys / net / route.c
CommitLineData
f3ed2586
JH
1/*
2 * Copyright (c) 2004, 2005 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Jeffrey M. Hsu.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of The DragonFly Project nor the names of its
16 * contributors may be used to endorse or promote products derived
17 * from this software without specific, prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
984263bc
MD
33/*
34 * Copyright (c) 1980, 1986, 1991, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)route.c 8.3 (Berkeley) 1/9/95
66 * $FreeBSD: src/sys/net/route.c,v 1.59.2.10 2003/01/17 08:04:00 ru Exp $
22c452ab 67 * $DragonFly: src/sys/net/route.c,v 1.41 2008/11/09 10:50:15 sephe Exp $
984263bc
MD
68 */
69
70#include "opt_inet.h"
9b42cabe 71#include "opt_mpls.h"
984263bc
MD
72
73#include <sys/param.h>
74#include <sys/systm.h>
75#include <sys/malloc.h>
76#include <sys/mbuf.h>
77#include <sys/socket.h>
78#include <sys/domain.h>
79#include <sys/kernel.h>
72ce0f6d
HP
80#include <sys/sysctl.h>
81#include <sys/globaldata.h>
82#include <sys/thread.h>
984263bc
MD
83
84#include <net/if.h>
85#include <net/route.h>
ecdefdda 86#include <net/netisr.h>
984263bc
MD
87
88#include <netinet/in.h>
1f2de5d4 89#include <net/ip_mroute/ip_mroute.h>
984263bc 90
4599cf19
MD
91#include <sys/thread2.h>
92#include <sys/msgport2.h>
93#include <net/netmsg2.h>
94
9b42cabe
NA
95#ifdef MPLS
96#include <netproto/mpls/mpls.h>
97#endif
98
72ce0f6d
HP
99static struct rtstatistics rtstatistics_percpu[MAXCPU];
100#ifdef SMP
101#define rtstat rtstatistics_percpu[mycpuid]
102#else
103#define rtstat rtstatistics_percpu[0]
104#endif
105
ecdefdda
MD
106struct radix_node_head *rt_tables[MAXCPU][AF_MAX+1];
107struct lwkt_port *rt_ports[MAXCPU];
984263bc 108
f23061d4
JH
109static void rt_maskedcopy (struct sockaddr *, struct sockaddr *,
110 struct sockaddr *);
ecdefdda
MD
111static void rtable_init(void);
112static void rtable_service_loop(void *dummy);
113static void rtinit_rtrequest_callback(int, int, struct rt_addrinfo *,
114 struct rtentry *, void *);
115
116#ifdef SMP
4599cf19
MD
117static void rtredirect_msghandler(struct netmsg *netmsg);
118static void rtrequest1_msghandler(struct netmsg *netmsg);
ecdefdda 119#endif
984263bc 120
9b42cabe
NA
121static int rt_setshims(struct rtentry *, struct sockaddr **);
122
72ce0f6d
HP
123SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RW, 0, "Routing");
124
69cb4182
MD
125#ifdef ROUTE_DEBUG
126static int route_debug = 1;
127SYSCTL_INT(_net_route, OID_AUTO, route_debug, CTLFLAG_RW,
128 &route_debug, 0, "");
129#endif
130
facaabe1
SZ
131int route_assert_owner_access = 0;
132SYSCTL_INT(_net_route, OID_AUTO, assert_owner_access, CTLFLAG_RW,
133 &route_assert_owner_access, 0, "");
3eef1c4e 134
ecdefdda
MD
135/*
136 * Initialize the route table(s) for protocol domains and
137 * create a helper thread which will be responsible for updating
138 * route table entries on each cpu.
139 */
140void
141route_init(void)
142{
a4d91f46 143 int cpu;
ecdefdda
MD
144 thread_t rtd;
145
146 for (cpu = 0; cpu < ncpus; ++cpu)
147 bzero(&rtstatistics_percpu[cpu], sizeof(struct rtstatistics));
148 rn_init(); /* initialize all zeroes, all ones, mask table */
a4d91f46
SZ
149 rtable_init(); /* call dom_rtattach() on each cpu */
150
ecdefdda 151 for (cpu = 0; cpu < ncpus; cpu++) {
ecdefdda 152 lwkt_create(rtable_service_loop, NULL, &rtd, NULL,
a4d91f46 153 0, cpu, "rtable_cpu %d", cpu);
ecdefdda 154 rt_ports[cpu] = &rtd->td_msgport;
ecdefdda 155 }
ecdefdda
MD
156}
157
984263bc 158static void
a4d91f46 159rtable_init_oncpu(struct netmsg *nmsg)
984263bc
MD
160{
161 struct domain *dom;
a4d91f46 162 int cpu = mycpuid;
2e9572df 163
ecdefdda
MD
164 SLIST_FOREACH(dom, &domains, dom_next) {
165 if (dom->dom_rtattach) {
166 dom->dom_rtattach(
a4d91f46 167 (void **)&rt_tables[cpu][dom->dom_family],
ecdefdda
MD
168 dom->dom_rtoffset);
169 }
170 }
a4d91f46
SZ
171 ifnet_forwardmsg(&nmsg->nm_lmsg, cpu + 1);
172}
173
174static void
175rtable_init(void)
176{
177 struct netmsg nmsg;
178
179 netmsg_init(&nmsg, &curthread->td_msgport, 0, rtable_init_oncpu);
180 ifnet_domsg(&nmsg.nm_lmsg, 0);
984263bc
MD
181}
182
ecdefdda
MD
183/*
184 * Our per-cpu table management protocol thread. All route table operations
6dba1ac0 185 * are sequentially chained through all cpus starting at cpu #0 in order to
ecdefdda
MD
186 * maintain duplicate route tables on each cpu. Having a spearate route
187 * table management thread allows the protocol and interrupt threads to
188 * issue route table changes.
189 */
190static void
191rtable_service_loop(void *dummy __unused)
984263bc 192{
4599cf19 193 struct netmsg *netmsg;
ecdefdda 194 thread_t td = curthread;
72ce0f6d 195
1e3f8217 196 while ((netmsg = lwkt_waitport(&td->td_msgport, 0)) != NULL) {
4599cf19 197 netmsg->nm_dispatch(netmsg);
ecdefdda 198 }
984263bc
MD
199}
200
72ce0f6d
HP
201/*
202 * Routing statistics.
203 */
204#ifdef SMP
205static int
206sysctl_rtstatistics(SYSCTL_HANDLER_ARGS)
207{
208 int cpu, error = 0;
209
210 for (cpu = 0; cpu < ncpus; ++cpu) {
211 if ((error = SYSCTL_OUT(req, &rtstatistics_percpu[cpu],
212 sizeof(struct rtstatistics))))
213 break;
214 if ((error = SYSCTL_IN(req, &rtstatistics_percpu[cpu],
215 sizeof(struct rtstatistics))))
216 break;
217 }
218
219 return (error);
220}
221SYSCTL_PROC(_net_route, OID_AUTO, stats, (CTLTYPE_OPAQUE|CTLFLAG_RW),
222 0, 0, sysctl_rtstatistics, "S,rtstatistics", "Routing statistics");
223#else
224SYSCTL_STRUCT(_net_route, OID_AUTO, stats, CTLFLAG_RW, &rtstat, rtstatistics,
225"Routing statistics");
226#endif
227
984263bc
MD
228/*
229 * Packet routing routines.
230 */
f23061d4
JH
231
232/*
f3ed2586
JH
233 * Look up and fill in the "ro_rt" rtentry field in a route structure given
234 * an address in the "ro_dst" field. Always send a report on a miss and
235 * always clone routes.
f23061d4 236 */
984263bc 237void
2e9572df 238rtalloc(struct route *ro)
984263bc
MD
239{
240 rtalloc_ign(ro, 0UL);
241}
242
5fe66e68 243/*
f3ed2586
JH
244 * Look up and fill in the "ro_rt" rtentry field in a route structure given
245 * an address in the "ro_dst" field. Always send a report on a miss and
246 * optionally clone routes when RTF_CLONING or RTF_PRCLONING are not being
247 * ignored.
5fe66e68 248 */
984263bc 249void
f3ed2586 250rtalloc_ign(struct route *ro, u_long ignoreflags)
984263bc 251{
f23061d4
JH
252 if (ro->ro_rt != NULL) {
253 if (ro->ro_rt->rt_ifp != NULL && ro->ro_rt->rt_flags & RTF_UP)
984263bc 254 return;
f23061d4 255 rtfree(ro->ro_rt);
984263bc 256 ro->ro_rt = NULL;
984263bc 257 }
f3ed2586 258 ro->ro_rt = _rtlookup(&ro->ro_dst, RTL_REPORTMSG, ignoreflags);
984263bc
MD
259}
260
261/*
5fe66e68 262 * Look up the route that matches the given "dst" address.
f23061d4 263 *
f3ed2586 264 * Route lookup can have the side-effect of creating and returning
6554f2c4 265 * a cloned route instead when "dst" matches a cloning route and the
f3ed2586 266 * RTF_CLONING and RTF_PRCLONING flags are not being ignored.
f23061d4 267 *
f3ed2586 268 * Any route returned has its reference count incremented.
984263bc
MD
269 */
270struct rtentry *
6554f2c4 271_rtlookup(struct sockaddr *dst, boolean_t generate_report, u_long ignore)
984263bc 272{
ecdefdda 273 struct radix_node_head *rnh = rt_tables[mycpuid][dst->sa_family];
f3ed2586 274 struct rtentry *rt;
984263bc 275
f3ed2586
JH
276 if (rnh == NULL)
277 goto unreach;
278
279 /*
280 * Look up route in the radix tree.
281 */
590b8cd4
JH
282 rt = (struct rtentry *) rnh->rnh_matchaddr((char *)dst, rnh);
283 if (rt == NULL)
f3ed2586 284 goto unreach;
f3ed2586
JH
285
286 /*
287 * Handle cloning routes.
288 */
289 if ((rt->rt_flags & ~ignore & (RTF_CLONING | RTF_PRCLONING)) != 0) {
290 struct rtentry *clonedroute;
291 int error;
292
293 clonedroute = rt; /* copy in/copy out parameter */
294 error = rtrequest(RTM_RESOLVE, dst, NULL, NULL, 0,
295 &clonedroute); /* clone the route */
296 if (error != 0) { /* cloning failed */
6554f2c4 297 if (generate_report)
f3ed2586
JH
298 rt_dstmsg(RTM_MISS, dst, error);
299 rt->rt_refcnt++;
300 return (rt); /* return the uncloned route */
301 }
6554f2c4 302 if (generate_report) {
f3ed2586
JH
303 if (clonedroute->rt_flags & RTF_XRESOLVE)
304 rt_dstmsg(RTM_RESOLVE, dst, 0);
305 else
306 rt_rtmsg(RTM_ADD, clonedroute,
307 clonedroute->rt_ifp, 0);
984263bc 308 }
f3ed2586 309 return (clonedroute); /* return cloned route */
984263bc 310 }
f3ed2586
JH
311
312 /*
313 * Increment the reference count of the matched route and return.
314 */
315 rt->rt_refcnt++;
f23061d4 316 return (rt);
f3ed2586
JH
317
318unreach:
319 rtstat.rts_unreach++;
6554f2c4 320 if (generate_report)
f3ed2586
JH
321 rt_dstmsg(RTM_MISS, dst, 0);
322 return (NULL);
984263bc
MD
323}
324
984263bc 325void
2e9572df 326rtfree(struct rtentry *rt)
984263bc 327{
3eef1c4e
SZ
328 if (rt->rt_cpuid == mycpuid)
329 rtfree_oncpu(rt);
330 else
331 rtfree_remote(rt, 1);
332}
333
334void
335rtfree_oncpu(struct rtentry *rt)
336{
337 KKASSERT(rt->rt_cpuid == mycpuid);
0bf6c14c 338 KASSERT(rt->rt_refcnt > 0, ("rtfree: rt_refcnt %ld", rt->rt_refcnt));
984263bc 339
f23061d4 340 --rt->rt_refcnt;
0bf6c14c 341 if (rt->rt_refcnt == 0) {
ecdefdda
MD
342 struct radix_node_head *rnh =
343 rt_tables[mycpuid][rt_key(rt)->sa_family];
0bf6c14c
JH
344
345 if (rnh->rnh_close)
346 rnh->rnh_close((struct radix_node *)rt, rnh);
347 if (!(rt->rt_flags & RTF_UP)) {
348 /* deallocate route */
349 if (rt->rt_ifa != NULL)
350 IFAFREE(rt->rt_ifa);
351 if (rt->rt_parent != NULL)
352 RTFREE(rt->rt_parent); /* recursive call! */
353 Free(rt_key(rt));
354 Free(rt);
355 }
984263bc
MD
356 }
357}
358
3eef1c4e
SZ
359static void
360rtfree_remote_dispatch(struct netmsg *nmsg)
361{
362 struct lwkt_msg *lmsg = &nmsg->nm_lmsg;
363 struct rtentry *rt = lmsg->u.ms_resultp;
364
365 rtfree_oncpu(rt);
366 lwkt_replymsg(lmsg, 0);
367}
368
369void
370rtfree_remote(struct rtentry *rt, int allow_panic)
371{
372 struct netmsg nmsg;
373 struct lwkt_msg *lmsg;
374
375 KKASSERT(rt->rt_cpuid != mycpuid);
376
facaabe1 377 if (route_assert_owner_access && allow_panic) {
3eef1c4e
SZ
378 panic("rt remote free rt_cpuid %d, mycpuid %d\n",
379 rt->rt_cpuid, mycpuid);
380 } else {
381 kprintf("rt remote free rt_cpuid %d, mycpuid %d\n",
382 rt->rt_cpuid, mycpuid);
1e5fb84b 383 print_backtrace();
3eef1c4e
SZ
384 }
385
386 netmsg_init(&nmsg, &curthread->td_msgport, 0, rtfree_remote_dispatch);
387 lmsg = &nmsg.nm_lmsg;
388 lmsg->u.ms_resultp = rt;
389
390 lwkt_domsg(rtable_portfn(rt->rt_cpuid), lmsg, 0);
391}
392
ecdefdda
MD
393static int
394rtredirect_oncpu(struct sockaddr *dst, struct sockaddr *gateway,
395 struct sockaddr *netmask, int flags, struct sockaddr *src)
984263bc 396{
6554f2c4
JH
397 struct rtentry *rt = NULL;
398 struct rt_addrinfo rtinfo;
984263bc 399 struct ifaddr *ifa;
72ce0f6d 400 u_long *stat = NULL;
f23061d4 401 int error;
984263bc
MD
402
403 /* verify the gateway is directly reachable */
2e9572df 404 if ((ifa = ifa_ifwithnet(gateway)) == NULL) {
984263bc
MD
405 error = ENETUNREACH;
406 goto out;
407 }
f23061d4 408
984263bc 409 /*
6554f2c4
JH
410 * If the redirect isn't from our current router for this destination,
411 * it's either old or wrong.
984263bc 412 */
6554f2c4 413 if (!(flags & RTF_DONE) && /* XXX JH */
f3ed2586 414 (rt = rtpurelookup(dst)) != NULL &&
f23061d4 415 (!sa_equal(src, rt->rt_gateway) || rt->rt_ifa != ifa)) {
984263bc 416 error = EINVAL;
f23061d4 417 goto done;
6554f2c4
JH
418 }
419
420 /*
421 * If it redirects us to ourselves, we have a routing loop,
422 * perhaps as a result of an interface going down recently.
423 */
424 if (ifa_ifwithaddr(gateway)) {
984263bc 425 error = EHOSTUNREACH;
984263bc 426 goto done;
f23061d4
JH
427 }
428
984263bc 429 /*
6554f2c4
JH
430 * Create a new entry if the lookup failed or if we got back
431 * a wildcard entry for the default route. This is necessary
432 * for hosts which use routing redirects generated by smart
433 * gateways to dynamically build the routing tables.
984263bc 434 */
6554f2c4
JH
435 if (rt == NULL)
436 goto create;
437 if ((rt_mask(rt) != NULL && rt_mask(rt)->sa_len < 2)) {
438 rtfree(rt);
984263bc 439 goto create;
6554f2c4 440 }
f23061d4 441
6554f2c4
JH
442 /* Ignore redirects for directly connected hosts. */
443 if (!(rt->rt_flags & RTF_GATEWAY)) {
444 error = EHOSTUNREACH;
445 goto done;
446 }
447
448 if (!(rt->rt_flags & RTF_HOST) && (flags & RTF_HOST)) {
449 /*
450 * Changing from a network route to a host route.
451 * Create a new host route rather than smashing the
452 * network route.
453 */
f23061d4 454create:
6554f2c4
JH
455 flags |= RTF_GATEWAY | RTF_DYNAMIC;
456 bzero(&rtinfo, sizeof(struct rt_addrinfo));
457 rtinfo.rti_info[RTAX_DST] = dst;
458 rtinfo.rti_info[RTAX_GATEWAY] = gateway;
459 rtinfo.rti_info[RTAX_NETMASK] = netmask;
460 rtinfo.rti_flags = flags;
461 rtinfo.rti_ifa = ifa;
462 rt = NULL; /* copy-in/copy-out parameter */
463 error = rtrequest1(RTM_ADD, &rtinfo, &rt);
464 if (rt != NULL)
465 flags = rt->rt_flags;
466 stat = &rtstat.rts_dynamic;
f23061d4 467 } else {
6554f2c4
JH
468 /*
469 * Smash the current notion of the gateway to this destination.
470 * Should check about netmask!!!
471 */
472 rt->rt_flags |= RTF_MODIFIED;
473 flags |= RTF_MODIFIED;
474 rt_setgate(rt, rt_key(rt), gateway);
475 error = 0;
476 stat = &rtstat.rts_newgateway;
f23061d4
JH
477 }
478
984263bc 479done:
f6870fe3
JH
480 if (rt != NULL)
481 rtfree(rt);
984263bc 482out:
f23061d4 483 if (error != 0)
984263bc
MD
484 rtstat.rts_badredirect++;
485 else if (stat != NULL)
486 (*stat)++;
f23061d4 487
ecdefdda
MD
488 return error;
489}
490
491#ifdef SMP
492
493struct netmsg_rtredirect {
4599cf19 494 struct netmsg netmsg;
ecdefdda
MD
495 struct sockaddr *dst;
496 struct sockaddr *gateway;
497 struct sockaddr *netmask;
498 int flags;
499 struct sockaddr *src;
500};
501
502#endif
503
504/*
505 * Force a routing table entry to the specified
506 * destination to go through the given gateway.
507 * Normally called as a result of a routing redirect
508 * message from the network layer.
509 *
510 * N.B.: must be called at splnet
511 */
512void
513rtredirect(struct sockaddr *dst, struct sockaddr *gateway,
514 struct sockaddr *netmask, int flags, struct sockaddr *src)
515{
516 struct rt_addrinfo rtinfo;
517 int error;
518#ifdef SMP
519 struct netmsg_rtredirect msg;
520
4599cf19
MD
521 netmsg_init(&msg.netmsg, &curthread->td_msgport, 0,
522 rtredirect_msghandler);
ecdefdda
MD
523 msg.dst = dst;
524 msg.gateway = gateway;
525 msg.netmask = netmask;
526 msg.flags = flags;
527 msg.src = src;
a22c590e 528 error = lwkt_domsg(rtable_portfn(0), &msg.netmsg.nm_lmsg, 0);
ecdefdda
MD
529#else
530 error = rtredirect_oncpu(dst, gateway, netmask, flags, src);
531#endif
6554f2c4
JH
532 bzero(&rtinfo, sizeof(struct rt_addrinfo));
533 rtinfo.rti_info[RTAX_DST] = dst;
534 rtinfo.rti_info[RTAX_GATEWAY] = gateway;
535 rtinfo.rti_info[RTAX_NETMASK] = netmask;
536 rtinfo.rti_info[RTAX_AUTHOR] = src;
537 rt_missmsg(RTM_REDIRECT, &rtinfo, flags, error);
984263bc
MD
538}
539
ecdefdda
MD
540#ifdef SMP
541
4599cf19
MD
542static void
543rtredirect_msghandler(struct netmsg *netmsg)
ecdefdda 544{
4599cf19 545 struct netmsg_rtredirect *msg = (void *)netmsg;
ecdefdda
MD
546 int nextcpu;
547
548 rtredirect_oncpu(msg->dst, msg->gateway, msg->netmask,
549 msg->flags, msg->src);
550 nextcpu = mycpuid + 1;
551 if (nextcpu < ncpus)
4599cf19 552 lwkt_forwardmsg(rtable_portfn(nextcpu), &netmsg->nm_lmsg);
ecdefdda 553 else
4599cf19 554 lwkt_replymsg(&netmsg->nm_lmsg, 0);
ecdefdda
MD
555}
556
557#endif
558
984263bc
MD
559/*
560* Routing table ioctl interface.
561*/
562int
87de5057 563rtioctl(u_long req, caddr_t data, struct ucred *cred)
984263bc
MD
564{
565#ifdef INET
566 /* Multicast goop, grrr... */
567 return mrt_ioctl ? mrt_ioctl(req, data) : EOPNOTSUPP;
2e9572df 568#else
984263bc 569 return ENXIO;
2e9572df 570#endif
984263bc
MD
571}
572
573struct ifaddr *
2e9572df 574ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway)
984263bc 575{
82ed7fc2 576 struct ifaddr *ifa;
2e9572df
JH
577
578 if (!(flags & RTF_GATEWAY)) {
984263bc
MD
579 /*
580 * If we are adding a route to an interface,
f23061d4 581 * and the interface is a point-to-point link,
984263bc
MD
582 * we should search for the destination
583 * as our clue to the interface. Otherwise
584 * we can use the local address.
585 */
2e9572df 586 ifa = NULL;
984263bc
MD
587 if (flags & RTF_HOST) {
588 ifa = ifa_ifwithdstaddr(dst);
589 }
2e9572df 590 if (ifa == NULL)
984263bc
MD
591 ifa = ifa_ifwithaddr(gateway);
592 } else {
593 /*
594 * If we are adding a route to a remote net
595 * or host, the gateway may still be on the
596 * other end of a pt to pt link.
597 */
598 ifa = ifa_ifwithdstaddr(gateway);
599 }
2e9572df 600 if (ifa == NULL)
984263bc 601 ifa = ifa_ifwithnet(gateway);
2e9572df 602 if (ifa == NULL) {
590b8cd4 603 struct rtentry *rt;
f23061d4 604
590b8cd4 605 rt = rtpurelookup(gateway);
2e9572df
JH
606 if (rt == NULL)
607 return (NULL);
984263bc 608 rt->rt_refcnt--;
2e9572df
JH
609 if ((ifa = rt->rt_ifa) == NULL)
610 return (NULL);
984263bc
MD
611 }
612 if (ifa->ifa_addr->sa_family != dst->sa_family) {
6554f2c4 613 struct ifaddr *oldifa = ifa;
f23061d4 614
984263bc 615 ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
2e9572df 616 if (ifa == NULL)
6554f2c4 617 ifa = oldifa;
984263bc
MD
618 }
619 return (ifa);
620}
621
158abb01
RG
622static int rt_fixdelete (struct radix_node *, void *);
623static int rt_fixchange (struct radix_node *, void *);
984263bc
MD
624
625struct rtfc_arg {
626 struct rtentry *rt0;
627 struct radix_node_head *rnh;
628};
629
590b8cd4
JH
630/*
631 * Set rtinfo->rti_ifa and rtinfo->rti_ifp.
632 */
984263bc 633int
590b8cd4 634rt_getifa(struct rt_addrinfo *rtinfo)
984263bc 635{
590b8cd4
JH
636 struct sockaddr *gateway = rtinfo->rti_info[RTAX_GATEWAY];
637 struct sockaddr *dst = rtinfo->rti_info[RTAX_DST];
638 struct sockaddr *ifaaddr = rtinfo->rti_info[RTAX_IFA];
639 int flags = rtinfo->rti_flags;
984263bc
MD
640
641 /*
642 * ifp may be specified by sockaddr_dl
643 * when protocol address is ambiguous.
644 */
590b8cd4
JH
645 if (rtinfo->rti_ifp == NULL) {
646 struct sockaddr *ifpaddr;
647
648 ifpaddr = rtinfo->rti_info[RTAX_IFP];
649 if (ifpaddr != NULL && ifpaddr->sa_family == AF_LINK) {
650 struct ifaddr *ifa;
651
652 ifa = ifa_ifwithnet(ifpaddr);
653 if (ifa != NULL)
654 rtinfo->rti_ifp = ifa->ifa_ifp;
655 }
656 }
657
658 if (rtinfo->rti_ifa == NULL && ifaaddr != NULL)
659 rtinfo->rti_ifa = ifa_ifwithaddr(ifaaddr);
660 if (rtinfo->rti_ifa == NULL) {
984263bc
MD
661 struct sockaddr *sa;
662
663 sa = ifaaddr != NULL ? ifaaddr :
664 (gateway != NULL ? gateway : dst);
590b8cd4
JH
665 if (sa != NULL && rtinfo->rti_ifp != NULL)
666 rtinfo->rti_ifa = ifaof_ifpforaddr(sa, rtinfo->rti_ifp);
984263bc 667 else if (dst != NULL && gateway != NULL)
590b8cd4 668 rtinfo->rti_ifa = ifa_ifwithroute(flags, dst, gateway);
984263bc 669 else if (sa != NULL)
590b8cd4 670 rtinfo->rti_ifa = ifa_ifwithroute(flags, sa, sa);
984263bc 671 }
590b8cd4
JH
672 if (rtinfo->rti_ifa == NULL)
673 return (ENETUNREACH);
674
675 if (rtinfo->rti_ifp == NULL)
676 rtinfo->rti_ifp = rtinfo->rti_ifa->ifa_ifp;
677 return (0);
984263bc
MD
678}
679
f23061d4
JH
680/*
681 * Do appropriate manipulations of a routing tree given
682 * all the bits of info needed
683 */
684int
685rtrequest(
686 int req,
687 struct sockaddr *dst,
688 struct sockaddr *gateway,
689 struct sockaddr *netmask,
690 int flags,
691 struct rtentry **ret_nrt)
692{
6554f2c4
JH
693 struct rt_addrinfo rtinfo;
694
695 bzero(&rtinfo, sizeof(struct rt_addrinfo));
696 rtinfo.rti_info[RTAX_DST] = dst;
697 rtinfo.rti_info[RTAX_GATEWAY] = gateway;
698 rtinfo.rti_info[RTAX_NETMASK] = netmask;
699 rtinfo.rti_flags = flags;
700 return rtrequest1(req, &rtinfo, ret_nrt);
f23061d4
JH
701}
702
ecdefdda
MD
703int
704rtrequest_global(
705 int req,
706 struct sockaddr *dst,
707 struct sockaddr *gateway,
708 struct sockaddr *netmask,
709 int flags)
710{
711 struct rt_addrinfo rtinfo;
712
713 bzero(&rtinfo, sizeof(struct rt_addrinfo));
714 rtinfo.rti_info[RTAX_DST] = dst;
715 rtinfo.rti_info[RTAX_GATEWAY] = gateway;
716 rtinfo.rti_info[RTAX_NETMASK] = netmask;
717 rtinfo.rti_flags = flags;
718 return rtrequest1_global(req, &rtinfo, NULL, NULL);
719}
720
721#ifdef SMP
722
723struct netmsg_rtq {
4599cf19 724 struct netmsg netmsg;
ecdefdda
MD
725 int req;
726 struct rt_addrinfo *rtinfo;
727 rtrequest1_callback_func_t callback;
728 void *arg;
729};
730
731#endif
732
733int
734rtrequest1_global(int req, struct rt_addrinfo *rtinfo,
735 rtrequest1_callback_func_t callback, void *arg)
736{
737 int error;
738#ifdef SMP
739 struct netmsg_rtq msg;
740
4599cf19
MD
741 netmsg_init(&msg.netmsg, &curthread->td_msgport, 0,
742 rtrequest1_msghandler);
743 msg.netmsg.nm_lmsg.ms_error = -1;
ecdefdda
MD
744 msg.req = req;
745 msg.rtinfo = rtinfo;
746 msg.callback = callback;
747 msg.arg = arg;
a22c590e 748 error = lwkt_domsg(rtable_portfn(0), &msg.netmsg.nm_lmsg, 0);
ecdefdda
MD
749#else
750 struct rtentry *rt = NULL;
751
752 error = rtrequest1(req, rtinfo, &rt);
753 if (rt)
754 --rt->rt_refcnt;
755 if (callback)
756 callback(req, error, rtinfo, rt, arg);
757#endif
758 return (error);
759}
760
761/*
762 * Handle a route table request on the current cpu. Since the route table's
763 * are supposed to be identical on each cpu, an error occuring later in the
764 * message chain is considered system-fatal.
765 */
766#ifdef SMP
767
4599cf19
MD
768static void
769rtrequest1_msghandler(struct netmsg *netmsg)
ecdefdda 770{
4599cf19 771 struct netmsg_rtq *msg = (void *)netmsg;
ecdefdda
MD
772 struct rtentry *rt = NULL;
773 int nextcpu;
774 int error;
775
776 error = rtrequest1(msg->req, msg->rtinfo, &rt);
777 if (rt)
778 --rt->rt_refcnt;
779 if (msg->callback)
780 msg->callback(msg->req, error, msg->rtinfo, rt, msg->arg);
781
782 /*
783 * RTM_DELETE's are propogated even if an error occurs, since a
784 * cloned route might be undergoing deletion and cloned routes
785 * are not necessarily replicated. An overall error is returned
786 * only if no cpus have the route in question.
787 */
4599cf19
MD
788 if (msg->netmsg.nm_lmsg.ms_error < 0 || error == 0)
789 msg->netmsg.nm_lmsg.ms_error = error;
ecdefdda
MD
790
791 nextcpu = mycpuid + 1;
792 if (error && msg->req != RTM_DELETE) {
793 if (mycpuid != 0) {
794 panic("rtrequest1_msghandler: rtrequest table "
69cb4182 795 "error was not on cpu #0: %p", msg->rtinfo);
ecdefdda 796 }
4599cf19 797 lwkt_replymsg(&msg->netmsg.nm_lmsg, error);
ecdefdda 798 } else if (nextcpu < ncpus) {
4599cf19 799 lwkt_forwardmsg(rtable_portfn(nextcpu), &msg->netmsg.nm_lmsg);
ecdefdda 800 } else {
4599cf19
MD
801 lwkt_replymsg(&msg->netmsg.nm_lmsg,
802 msg->netmsg.nm_lmsg.ms_error);
ecdefdda 803 }
ecdefdda
MD
804}
805
806#endif
807
984263bc 808int
6554f2c4 809rtrequest1(int req, struct rt_addrinfo *rtinfo, struct rtentry **ret_nrt)
984263bc 810{
6554f2c4 811 struct sockaddr *dst = rtinfo->rti_info[RTAX_DST];
82ed7fc2
RG
812 struct rtentry *rt;
813 struct radix_node *rn;
814 struct radix_node_head *rnh;
984263bc
MD
815 struct ifaddr *ifa;
816 struct sockaddr *ndst;
2e9572df
JH
817 int error = 0;
818
819#define gotoerr(x) { error = x ; goto bad; }
984263bc 820
69cb4182
MD
821#ifdef ROUTE_DEBUG
822 if (route_debug)
823 rt_addrinfo_print(req, rtinfo);
824#endif
825
4986965b 826 crit_enter();
984263bc
MD
827 /*
828 * Find the correct routing tree to use for this Address Family
829 */
ecdefdda 830 if ((rnh = rt_tables[mycpuid][dst->sa_family]) == NULL)
2e9572df 831 gotoerr(EAFNOSUPPORT);
f23061d4 832
984263bc
MD
833 /*
834 * If we are adding a host route then we don't want to put
835 * a netmask in the tree, nor do we want to clone it.
836 */
6554f2c4
JH
837 if (rtinfo->rti_flags & RTF_HOST) {
838 rtinfo->rti_info[RTAX_NETMASK] = NULL;
839 rtinfo->rti_flags &= ~(RTF_CLONING | RTF_PRCLONING);
984263bc 840 }
f23061d4 841
984263bc
MD
842 switch (req) {
843 case RTM_DELETE:
f23061d4 844 /* Remove the item from the tree. */
6554f2c4
JH
845 rn = rnh->rnh_deladdr((char *)rtinfo->rti_info[RTAX_DST],
846 (char *)rtinfo->rti_info[RTAX_NETMASK],
f23061d4
JH
847 rnh);
848 if (rn == NULL)
2e9572df 849 gotoerr(ESRCH);
f23061d4
JH
850 KASSERT(!(rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)),
851 ("rnh_deladdr returned flags 0x%x", rn->rn_flags));
984263bc
MD
852 rt = (struct rtentry *)rn;
853
ecdefdda
MD
854 /* ref to prevent a deletion race */
855 ++rt->rt_refcnt;
856
f23061d4 857 /* Free any routes cloned from this one. */
984263bc 858 if ((rt->rt_flags & (RTF_CLONING | RTF_PRCLONING)) &&
2e9572df 859 rt_mask(rt) != NULL) {
f23061d4 860 rnh->rnh_walktree_from(rnh, (char *)rt_key(rt),
2e9572df 861 (char *)rt_mask(rt),
984263bc
MD
862 rt_fixdelete, rt);
863 }
864
f23061d4
JH
865 if (rt->rt_gwroute != NULL) {
866 RTFREE(rt->rt_gwroute);
867 rt->rt_gwroute = NULL;
984263bc
MD
868 }
869
870 /*
871 * NB: RTF_UP must be set during the search above,
872 * because we might delete the last ref, causing
873 * rt to get freed prematurely.
984263bc
MD
874 */
875 rt->rt_flags &= ~RTF_UP;
876
69cb4182
MD
877#ifdef ROUTE_DEBUG
878 if (route_debug)
879 rt_print(rtinfo, rt);
880#endif
881
f23061d4 882 /* Give the protocol a chance to keep things in sync. */
984263bc 883 if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
6554f2c4 884 ifa->ifa_rtrequest(RTM_DELETE, rt, rtinfo);
984263bc 885
984263bc
MD
886 /*
887 * If the caller wants it, then it can have it,
888 * but it's up to it to free the rtentry as we won't be
889 * doing it.
890 */
a40401d5
JH
891 KASSERT(rt->rt_refcnt >= 0,
892 ("rtrequest1(DELETE): refcnt %ld", rt->rt_refcnt));
f23061d4 893 if (ret_nrt != NULL) {
ecdefdda 894 /* leave ref intact for return */
984263bc 895 *ret_nrt = rt;
ecdefdda
MD
896 } else {
897 /* deref / attempt to destroy */
984263bc
MD
898 rtfree(rt);
899 }
900 break;
901
902 case RTM_RESOLVE:
2e9572df
JH
903 if (ret_nrt == NULL || (rt = *ret_nrt) == NULL)
904 gotoerr(EINVAL);
984263bc 905 ifa = rt->rt_ifa;
6554f2c4 906 rtinfo->rti_flags =
590b8cd4 907 rt->rt_flags & ~(RTF_CLONING | RTF_PRCLONING | RTF_STATIC);
6554f2c4
JH
908 rtinfo->rti_flags |= RTF_WASCLONED;
909 rtinfo->rti_info[RTAX_GATEWAY] = rt->rt_gateway;
910 if ((rtinfo->rti_info[RTAX_NETMASK] = rt->rt_genmask) == NULL)
911 rtinfo->rti_flags |= RTF_HOST;
9b42cabe
NA
912 rtinfo->rti_info[RTAX_MPLS1] = rt->rt_shim[0];
913 rtinfo->rti_info[RTAX_MPLS2] = rt->rt_shim[1];
914 rtinfo->rti_info[RTAX_MPLS3] = rt->rt_shim[2];
984263bc
MD
915 goto makeroute;
916
917 case RTM_ADD:
6554f2c4
JH
918 KASSERT(!(rtinfo->rti_flags & RTF_GATEWAY) ||
919 rtinfo->rti_info[RTAX_GATEWAY] != NULL,
f23061d4 920 ("rtrequest: GATEWAY but no gateway"));
984263bc 921
6554f2c4 922 if (rtinfo->rti_ifa == NULL && (error = rt_getifa(rtinfo)))
2e9572df 923 gotoerr(error);
6554f2c4 924 ifa = rtinfo->rti_ifa;
2e9572df 925makeroute:
6554f2c4 926 R_Malloc(rt, struct rtentry *, sizeof(struct rtentry));
2e9572df
JH
927 if (rt == NULL)
928 gotoerr(ENOBUFS);
6554f2c4
JH
929 bzero(rt, sizeof(struct rtentry));
930 rt->rt_flags = RTF_UP | rtinfo->rti_flags;
da14dbcf 931 rt->rt_cpuid = mycpuid;
6554f2c4 932 error = rt_setgate(rt, dst, rtinfo->rti_info[RTAX_GATEWAY]);
f23061d4 933 if (error != 0) {
984263bc 934 Free(rt);
2e9572df 935 gotoerr(error);
984263bc
MD
936 }
937
984263bc 938 ndst = rt_key(rt);
6554f2c4
JH
939 if (rtinfo->rti_info[RTAX_NETMASK] != NULL)
940 rt_maskedcopy(dst, ndst,
941 rtinfo->rti_info[RTAX_NETMASK]);
f23061d4 942 else
2e9572df 943 bcopy(dst, ndst, dst->sa_len);
984263bc 944
9b42cabe
NA
945 if (rtinfo->rti_info[RTAX_MPLS1] != NULL)
946 rt_setshims(rt, rtinfo->rti_info);
947
984263bc
MD
948 /*
949 * Note that we now have a reference to the ifa.
950 * This moved from below so that rnh->rnh_addaddr() can
951 * examine the ifa and ifa->ifa_ifp if it so desires.
952 */
f23061d4 953 IFAREF(ifa);
984263bc
MD
954 rt->rt_ifa = ifa;
955 rt->rt_ifp = ifa->ifa_ifp;
956 /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
957
2e9572df 958 rn = rnh->rnh_addaddr((char *)ndst,
6554f2c4 959 (char *)rtinfo->rti_info[RTAX_NETMASK],
2e9572df
JH
960 rnh, rt->rt_nodes);
961 if (rn == NULL) {
f23061d4
JH
962 struct rtentry *oldrt;
963
984263bc 964 /*
f23061d4
JH
965 * We already have one of these in the tree.
966 * We do a special hack: if the old route was
967 * cloned, then we blow it away and try
968 * re-inserting the new one.
984263bc 969 */
f3ed2586 970 oldrt = rtpurelookup(ndst);
f23061d4
JH
971 if (oldrt != NULL) {
972 --oldrt->rt_refcnt;
973 if (oldrt->rt_flags & RTF_WASCLONED) {
974 rtrequest(RTM_DELETE, rt_key(oldrt),
975 oldrt->rt_gateway,
976 rt_mask(oldrt),
977 oldrt->rt_flags, NULL);
978 rn = rnh->rnh_addaddr((char *)ndst,
6554f2c4
JH
979 (char *)
980 rtinfo->rti_info[RTAX_NETMASK],
981 rnh, rt->rt_nodes);
f23061d4 982 }
984263bc
MD
983 }
984 }
985
986 /*
987 * If it still failed to go into the tree,
f23061d4 988 * then un-make it (this should be a function).
984263bc 989 */
2e9572df 990 if (rn == NULL) {
f23061d4 991 if (rt->rt_gwroute != NULL)
984263bc 992 rtfree(rt->rt_gwroute);
f23061d4 993 IFAFREE(ifa);
984263bc
MD
994 Free(rt_key(rt));
995 Free(rt);
2e9572df 996 gotoerr(EEXIST);
984263bc
MD
997 }
998
984263bc
MD
999 /*
1000 * If we got here from RESOLVE, then we are cloning
1001 * so clone the rest, and note that we
1002 * are a clone (and increment the parent's references)
1003 */
1004 if (req == RTM_RESOLVE) {
f23061d4
JH
1005 rt->rt_rmx = (*ret_nrt)->rt_rmx; /* copy metrics */
1006 rt->rt_rmx.rmx_pksent = 0; /* reset packet counter */
2e9572df 1007 if ((*ret_nrt)->rt_flags &
f23061d4 1008 (RTF_CLONING | RTF_PRCLONING)) {
ef87f48d 1009 rt->rt_parent = *ret_nrt;
984263bc
MD
1010 (*ret_nrt)->rt_refcnt++;
1011 }
1012 }
1013
1014 /*
1015 * if this protocol has something to add to this then
1016 * allow it to do that as well.
1017 */
f23061d4 1018 if (ifa->ifa_rtrequest != NULL)
6554f2c4 1019 ifa->ifa_rtrequest(req, rt, rtinfo);
984263bc
MD
1020
1021 /*
1022 * We repeat the same procedure from rt_setgate() here because
1023 * it doesn't fire when we call it there because the node
1024 * hasn't been added to the tree yet.
1025 */
2e9572df
JH
1026 if (req == RTM_ADD && !(rt->rt_flags & RTF_HOST) &&
1027 rt_mask(rt) != NULL) {
f23061d4
JH
1028 struct rtfc_arg arg = { rt, rnh };
1029
2e9572df
JH
1030 rnh->rnh_walktree_from(rnh, (char *)rt_key(rt),
1031 (char *)rt_mask(rt),
984263bc
MD
1032 rt_fixchange, &arg);
1033 }
1034
69cb4182
MD
1035#ifdef ROUTE_DEBUG
1036 if (route_debug)
1037 rt_print(rtinfo, rt);
1038#endif
984263bc 1039 /*
f23061d4
JH
1040 * Return the resulting rtentry,
1041 * increasing the number of references by one.
984263bc 1042 */
ef87f48d 1043 if (ret_nrt != NULL) {
984263bc 1044 rt->rt_refcnt++;
f23061d4 1045 *ret_nrt = rt;
984263bc
MD
1046 }
1047 break;
1048 default:
1049 error = EOPNOTSUPP;
1050 }
1051bad:
69cb4182
MD
1052#ifdef ROUTE_DEBUG
1053 if (route_debug) {
1054 if (error)
4b1cf444 1055 kprintf("rti %p failed error %d\n", rtinfo, error);
69cb4182 1056 else
4b1cf444 1057 kprintf("rti %p succeeded\n", rtinfo);
69cb4182
MD
1058 }
1059#endif
4986965b 1060 crit_exit();
984263bc 1061 return (error);
984263bc
MD
1062}
1063
1064/*
1065 * Called from rtrequest(RTM_DELETE, ...) to fix up the route's ``family''
1066 * (i.e., the routes related to it by the operation of cloning). This
1067 * routine is iterated over all potential former-child-routes by way of
1068 * rnh->rnh_walktree_from() above, and those that actually are children of
1069 * the late parent (passed in as VP here) are themselves deleted.
1070 */
1071static int
2e9572df 1072rt_fixdelete(struct radix_node *rn, void *vp)
984263bc
MD
1073{
1074 struct rtentry *rt = (struct rtentry *)rn;
1075 struct rtentry *rt0 = vp;
1076
1077 if (rt->rt_parent == rt0 &&
1078 !(rt->rt_flags & (RTF_PINNED | RTF_CLONING | RTF_PRCLONING))) {
f23061d4
JH
1079 return rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt),
1080 rt->rt_flags, NULL);
984263bc
MD
1081 }
1082 return 0;
1083}
1084
1085/*
1086 * This routine is called from rt_setgate() to do the analogous thing for
1087 * adds and changes. There is the added complication in this case of a
1088 * middle insert; i.e., insertion of a new network route between an older
1089 * network route and (cloned) host routes. For this reason, a simple check
1090 * of rt->rt_parent is insufficient; each candidate route must be tested
1091 * against the (mask, value) of the new route (passed as before in vp)
1092 * to see if the new route matches it.
1093 *
1094 * XXX - it may be possible to do fixdelete() for changes and reserve this
1095 * routine just for adds. I'm not sure why I thought it was necessary to do
1096 * changes this way.
1097 */
1098#ifdef DEBUG
1099static int rtfcdebug = 0;
1100#endif
1101
1102static int
2e9572df 1103rt_fixchange(struct radix_node *rn, void *vp)
984263bc
MD
1104{
1105 struct rtentry *rt = (struct rtentry *)rn;
1106 struct rtfc_arg *ap = vp;
1107 struct rtentry *rt0 = ap->rt0;
1108 struct radix_node_head *rnh = ap->rnh;
1109 u_char *xk1, *xm1, *xk2, *xmp;
1110 int i, len, mlen;
1111
1112#ifdef DEBUG
1113 if (rtfcdebug)
4b1cf444 1114 kprintf("rt_fixchange: rt %p, rt0 %p\n", rt, rt0);
984263bc
MD
1115#endif
1116
f23061d4 1117 if (rt->rt_parent == NULL ||
984263bc
MD
1118 (rt->rt_flags & (RTF_PINNED | RTF_CLONING | RTF_PRCLONING))) {
1119#ifdef DEBUG
4b1cf444 1120 if (rtfcdebug) kprintf("no parent, pinned or cloning\n");
984263bc
MD
1121#endif
1122 return 0;
1123 }
1124
1125 if (rt->rt_parent == rt0) {
1126#ifdef DEBUG
4b1cf444 1127 if (rtfcdebug) kprintf("parent match\n");
984263bc 1128#endif
f23061d4
JH
1129 return rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt),
1130 rt->rt_flags, NULL);
984263bc
MD
1131 }
1132
1133 /*
1134 * There probably is a function somewhere which does this...
1135 * if not, there should be.
1136 */
f23061d4 1137 len = imin(rt_key(rt0)->sa_len, rt_key(rt)->sa_len);
984263bc
MD
1138
1139 xk1 = (u_char *)rt_key(rt0);
1140 xm1 = (u_char *)rt_mask(rt0);
1141 xk2 = (u_char *)rt_key(rt);
1142
1143 /* avoid applying a less specific route */
1144 xmp = (u_char *)rt_mask(rt->rt_parent);
f23061d4
JH
1145 mlen = rt_key(rt->rt_parent)->sa_len;
1146 if (mlen > rt_key(rt0)->sa_len) {
984263bc
MD
1147#ifdef DEBUG
1148 if (rtfcdebug)
4b1cf444 1149 kprintf("rt_fixchange: inserting a less "
984263bc
MD
1150 "specific route\n");
1151#endif
1152 return 0;
1153 }
1154 for (i = rnh->rnh_treetop->rn_offset; i < mlen; i++) {
1155 if ((xmp[i] & ~(xmp[i] ^ xm1[i])) != xmp[i]) {
1156#ifdef DEBUG
1157 if (rtfcdebug)
4b1cf444 1158 kprintf("rt_fixchange: inserting a less "
984263bc
MD
1159 "specific route\n");
1160#endif
1161 return 0;
1162 }
1163 }
1164
1165 for (i = rnh->rnh_treetop->rn_offset; i < len; i++) {
1166 if ((xk2[i] & xm1[i]) != xk1[i]) {
1167#ifdef DEBUG
4b1cf444 1168 if (rtfcdebug) kprintf("no match\n");
984263bc
MD
1169#endif
1170 return 0;
1171 }
1172 }
1173
1174 /*
1175 * OK, this node is a clone, and matches the node currently being
1176 * changed/added under the node's mask. So, get rid of it.
1177 */
1178#ifdef DEBUG
4b1cf444 1179 if (rtfcdebug) kprintf("deleting\n");
984263bc 1180#endif
f23061d4
JH
1181 return rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt),
1182 rt->rt_flags, NULL);
984263bc
MD
1183}
1184
1185#define ROUNDUP(a) (a>0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
1186
1187int
2e9572df 1188rt_setgate(struct rtentry *rt0, struct sockaddr *dst, struct sockaddr *gate)
984263bc 1189{
f23061d4 1190 char *space, *oldspace;
984263bc 1191 int dlen = ROUNDUP(dst->sa_len), glen = ROUNDUP(gate->sa_len);
82ed7fc2 1192 struct rtentry *rt = rt0;
ecdefdda 1193 struct radix_node_head *rnh = rt_tables[mycpuid][dst->sa_family];
984263bc
MD
1194
1195 /*
1196 * A host route with the destination equal to the gateway
1197 * will interfere with keeping LLINFO in the routing
1198 * table, so disallow it.
1199 */
f23061d4
JH
1200 if (((rt0->rt_flags & (RTF_HOST | RTF_GATEWAY | RTF_LLINFO)) ==
1201 (RTF_HOST | RTF_GATEWAY)) &&
1202 dst->sa_len == gate->sa_len &&
1203 sa_equal(dst, gate)) {
984263bc
MD
1204 /*
1205 * The route might already exist if this is an RTM_CHANGE
1206 * or a routing redirect, so try to delete it.
1207 */
2e9572df
JH
1208 if (rt_key(rt0) != NULL)
1209 rtrequest(RTM_DELETE, rt_key(rt0), rt0->rt_gateway,
f23061d4 1210 rt_mask(rt0), rt0->rt_flags, NULL);
984263bc
MD
1211 return EADDRNOTAVAIL;
1212 }
1213
1214 /*
f23061d4 1215 * Both dst and gateway are stored in the same malloc'ed chunk
984263bc
MD
1216 * (If I ever get my hands on....)
1217 * if we need to malloc a new chunk, then keep the old one around
1218 * till we don't need it any more.
1219 */
2e9572df 1220 if (rt->rt_gateway == NULL || glen > ROUNDUP(rt->rt_gateway->sa_len)) {
f23061d4
JH
1221 oldspace = (char *)rt_key(rt);
1222 R_Malloc(space, char *, dlen + glen);
1223 if (space == NULL)
984263bc 1224 return ENOBUFS;
f23061d4 1225 rt->rt_nodes->rn_key = space;
984263bc 1226 } else {
f23061d4
JH
1227 space = (char *)rt_key(rt); /* Just use the old space. */
1228 oldspace = NULL;
984263bc
MD
1229 }
1230
f23061d4
JH
1231 /* Set the gateway value. */
1232 rt->rt_gateway = (struct sockaddr *)(space + dlen);
2e9572df 1233 bcopy(gate, rt->rt_gateway, glen);
984263bc 1234
f23061d4
JH
1235 if (oldspace != NULL) {
1236 /*
1237 * If we allocated a new chunk, preserve the original dst.
1238 * This way, rt_setgate() really just sets the gate
1239 * and leaves the dst field alone.
1240 */
1241 bcopy(dst, space, dlen);
1242 Free(oldspace);
984263bc
MD
1243 }
1244
1245 /*
f23061d4 1246 * If there is already a gwroute, it's now almost definitely wrong
984263bc
MD
1247 * so drop it.
1248 */
1249 if (rt->rt_gwroute != NULL) {
1250 RTFREE(rt->rt_gwroute);
1251 rt->rt_gwroute = NULL;
1252 }
984263bc 1253 if (rt->rt_flags & RTF_GATEWAY) {
f23061d4
JH
1254 /*
1255 * Cloning loop avoidance: In the presence of
1256 * protocol-cloning and bad configuration, it is
1257 * possible to get stuck in bottomless mutual recursion
1258 * (rtrequest rt_setgate rtlookup). We avoid this
1259 * by not allowing protocol-cloning to operate for
1260 * gateways (which is probably the correct choice
1261 * anyway), and avoid the resulting reference loops
1262 * by disallowing any route to run through itself as
1263 * a gateway. This is obviously mandatory when we
1264 * get rt->rt_output().
1265 *
0c3c561c 1266 * This breaks TTCP for hosts outside the gateway! XXX JH
f23061d4 1267 */
f3ed2586 1268 rt->rt_gwroute = _rtlookup(gate, RTL_REPORTMSG, RTF_PRCLONING);
984263bc 1269 if (rt->rt_gwroute == rt) {
f23061d4
JH
1270 rt->rt_gwroute = NULL;
1271 --rt->rt_refcnt;
984263bc
MD
1272 return EDQUOT; /* failure */
1273 }
1274 }
1275
1276 /*
1277 * This isn't going to do anything useful for host routes, so
1278 * don't bother. Also make sure we have a reasonable mask
1279 * (we don't yet have one during adds).
1280 */
2e9572df 1281 if (!(rt->rt_flags & RTF_HOST) && rt_mask(rt) != NULL) {
f23061d4
JH
1282 struct rtfc_arg arg = { rt, rnh };
1283
1284 rnh->rnh_walktree_from(rnh, (char *)rt_key(rt),
2e9572df 1285 (char *)rt_mask(rt),
984263bc
MD
1286 rt_fixchange, &arg);
1287 }
1288
1289 return 0;
1290}
1291
1292static void
f23061d4
JH
1293rt_maskedcopy(
1294 struct sockaddr *src,
1295 struct sockaddr *dst,
1296 struct sockaddr *netmask)
984263bc 1297{
82ed7fc2
RG
1298 u_char *cp1 = (u_char *)src;
1299 u_char *cp2 = (u_char *)dst;
1300 u_char *cp3 = (u_char *)netmask;
984263bc
MD
1301 u_char *cplim = cp2 + *cp3;
1302 u_char *cplim2 = cp2 + *cp1;
1303
1304 *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
1305 cp3 += 2;
1306 if (cplim > cplim2)
1307 cplim = cplim2;
1308 while (cp2 < cplim)
1309 *cp2++ = *cp1++ & *cp3++;
1310 if (cp2 < cplim2)
f23061d4
JH
1311 bzero(cp2, cplim2 - cp2);
1312}
1313
1314int
1315rt_llroute(struct sockaddr *dst, struct rtentry *rt0, struct rtentry **drt)
1316{
1317 struct rtentry *up_rt, *rt;
1318
1319 if (!(rt0->rt_flags & RTF_UP)) {
f3ed2586 1320 up_rt = rtlookup(dst);
f23061d4
JH
1321 if (up_rt == NULL)
1322 return (EHOSTUNREACH);
1323 up_rt->rt_refcnt--;
1324 } else
1325 up_rt = rt0;
1326 if (up_rt->rt_flags & RTF_GATEWAY) {
1327 if (up_rt->rt_gwroute == NULL) {
f3ed2586 1328 up_rt->rt_gwroute = rtlookup(up_rt->rt_gateway);
f23061d4
JH
1329 if (up_rt->rt_gwroute == NULL)
1330 return (EHOSTUNREACH);
1331 } else if (!(up_rt->rt_gwroute->rt_flags & RTF_UP)) {
1332 rtfree(up_rt->rt_gwroute);
f3ed2586 1333 up_rt->rt_gwroute = rtlookup(up_rt->rt_gateway);
f23061d4
JH
1334 if (up_rt->rt_gwroute == NULL)
1335 return (EHOSTUNREACH);
1336 }
1337 rt = up_rt->rt_gwroute;
1338 } else
1339 rt = up_rt;
1340 if (rt->rt_flags & RTF_REJECT &&
1341 (rt->rt_rmx.rmx_expire == 0 || /* rt doesn't expire */
1342 time_second < rt->rt_rmx.rmx_expire)) /* rt not expired */
1343 return (rt->rt_flags & RTF_HOST ? EHOSTDOWN : EHOSTUNREACH);
1344 *drt = rt;
1345 return 0;
984263bc
MD
1346}
1347
9b42cabe
NA
1348static int
1349rt_setshims(struct rtentry *rt, struct sockaddr **rt_shim){
1350 int i;
1351
1352 for (i=0; i<3; i++) {
1353 struct sockaddr *shim = rt_shim[RTAX_MPLS1 + i];
1354 int shimlen;
1355
1356 if (shim == NULL)
1357 break;
1358
1359 shimlen = ROUNDUP(shim->sa_len);
03c05fdf 1360 R_Malloc(rt->rt_shim[i], struct sockaddr *, shimlen);
9b42cabe
NA
1361 bcopy(shim, rt->rt_shim[i], shimlen);
1362 }
1363
1364 return 0;
1365}
1366
69cb4182
MD
1367#ifdef ROUTE_DEBUG
1368
1369/*
1370 * Print out a route table entry
1371 */
1372void
1373rt_print(struct rt_addrinfo *rtinfo, struct rtentry *rn)
1374{
4b1cf444 1375 kprintf("rti %p cpu %d route %p flags %08lx: ",
69cb4182
MD
1376 rtinfo, mycpuid, rn, rn->rt_flags);
1377 sockaddr_print(rt_key(rn));
4b1cf444 1378 kprintf(" mask ");
69cb4182 1379 sockaddr_print(rt_mask(rn));
4b1cf444 1380 kprintf(" gw ");
69cb4182 1381 sockaddr_print(rn->rt_gateway);
4b1cf444
SW
1382 kprintf(" ifc \"%s\"", rn->rt_ifp ? rn->rt_ifp->if_dname : "?");
1383 kprintf(" ifa %p\n", rn->rt_ifa);
69cb4182
MD
1384}
1385
1386void
1387rt_addrinfo_print(int cmd, struct rt_addrinfo *rti)
1388{
1389 int didit = 0;
1390 int i;
1391
1392#ifdef ROUTE_DEBUG
1393 if (cmd == RTM_DELETE && route_debug > 1)
1e5fb84b 1394 print_backtrace();
69cb4182
MD
1395#endif
1396
1397 switch(cmd) {
1398 case RTM_ADD:
4b1cf444 1399 kprintf("ADD ");
69cb4182
MD
1400 break;
1401 case RTM_RESOLVE:
4b1cf444 1402 kprintf("RES ");
69cb4182
MD
1403 break;
1404 case RTM_DELETE:
4b1cf444 1405 kprintf("DEL ");
69cb4182
MD
1406 break;
1407 default:
4b1cf444 1408 kprintf("C%02d ", cmd);
69cb4182
MD
1409 break;
1410 }
4b1cf444 1411 kprintf("rti %p cpu %d ", rti, mycpuid);
69cb4182
MD
1412 for (i = 0; i < rti->rti_addrs; ++i) {
1413 if (rti->rti_info[i] == NULL)
1414 continue;
1415 if (didit)
4b1cf444 1416 kprintf(" ,");
69cb4182
MD
1417 switch(i) {
1418 case RTAX_DST:
4b1cf444 1419 kprintf("(DST ");
69cb4182
MD
1420 break;
1421 case RTAX_GATEWAY:
4b1cf444 1422 kprintf("(GWY ");
69cb4182
MD
1423 break;
1424 case RTAX_NETMASK:
4b1cf444 1425 kprintf("(MSK ");
69cb4182
MD
1426 break;
1427 case RTAX_GENMASK:
4b1cf444 1428 kprintf("(GEN ");
69cb4182
MD
1429 break;
1430 case RTAX_IFP:
4b1cf444 1431 kprintf("(IFP ");
69cb4182
MD
1432 break;
1433 case RTAX_IFA:
4b1cf444 1434 kprintf("(IFA ");
69cb4182
MD
1435 break;
1436 case RTAX_AUTHOR:
4b1cf444 1437 kprintf("(AUT ");
69cb4182
MD
1438 break;
1439 case RTAX_BRD:
4b1cf444 1440 kprintf("(BRD ");
69cb4182
MD
1441 break;
1442 default:
4b1cf444 1443 kprintf("(?%02d ", i);
69cb4182
MD
1444 break;
1445 }
1446 sockaddr_print(rti->rti_info[i]);
4b1cf444 1447 kprintf(")");
69cb4182
MD
1448 didit = 1;
1449 }
4b1cf444 1450 kprintf("\n");
69cb4182
MD
1451}
1452
1453void
1454sockaddr_print(struct sockaddr *sa)
1455{
1456 struct sockaddr_in *sa4;
1457 struct sockaddr_in6 *sa6;
1458 int len;
1459 int i;
1460
1461 if (sa == NULL) {
4b1cf444 1462 kprintf("NULL");
69cb4182
MD
1463 return;
1464 }
1465
1466 len = sa->sa_len - offsetof(struct sockaddr, sa_data[0]);
1467
1468 switch(sa->sa_family) {
1469 case AF_INET:
1470 case AF_INET6:
1471 default:
1472 switch(sa->sa_family) {
1473 case AF_INET:
1474 sa4 = (struct sockaddr_in *)sa;
4b1cf444 1475 kprintf("INET %d %d.%d.%d.%d",
69cb4182
MD
1476 ntohs(sa4->sin_port),
1477 (ntohl(sa4->sin_addr.s_addr) >> 24) & 255,
1478 (ntohl(sa4->sin_addr.s_addr) >> 16) & 255,
1479 (ntohl(sa4->sin_addr.s_addr) >> 8) & 255,
1480 (ntohl(sa4->sin_addr.s_addr) >> 0) & 255
1481 );
1482 break;
1483 case AF_INET6:
1484 sa6 = (struct sockaddr_in6 *)sa;
4b1cf444 1485 kprintf("INET6 %d %04x:%04x%04x:%04x:%04x:%04x:%04x:%04x",
69cb4182
MD
1486 ntohs(sa6->sin6_port),
1487 sa6->sin6_addr.s6_addr16[0],
1488 sa6->sin6_addr.s6_addr16[1],
1489 sa6->sin6_addr.s6_addr16[2],
1490 sa6->sin6_addr.s6_addr16[3],
1491 sa6->sin6_addr.s6_addr16[4],
1492 sa6->sin6_addr.s6_addr16[5],
1493 sa6->sin6_addr.s6_addr16[6],
1494 sa6->sin6_addr.s6_addr16[7]
1495 );
1496 break;
1497 default:
4b1cf444 1498 kprintf("AF%d ", sa->sa_family);
69cb4182
MD
1499 while (len > 0 && sa->sa_data[len-1] == 0)
1500 --len;
1501
1502 for (i = 0; i < len; ++i) {
1503 if (i)
4b1cf444
SW
1504 kprintf(".");
1505 kprintf("%d", (unsigned char)sa->sa_data[i]);
69cb4182
MD
1506 }
1507 break;
1508 }
1509 }
1510}
1511
1512#endif
1513
984263bc 1514/*
6554f2c4 1515 * Set up a routing table entry, normally for an interface.
984263bc
MD
1516 */
1517int
2e9572df 1518rtinit(struct ifaddr *ifa, int cmd, int flags)
984263bc 1519{
f23061d4 1520 struct sockaddr *dst, *deldst, *netmask;
f23061d4 1521 struct mbuf *m = NULL;
984263bc
MD
1522 struct radix_node_head *rnh;
1523 struct radix_node *rn;
6554f2c4 1524 struct rt_addrinfo rtinfo;
f23061d4 1525 int error;
984263bc
MD
1526
1527 if (flags & RTF_HOST) {
1528 dst = ifa->ifa_dstaddr;
1529 netmask = NULL;
1530 } else {
1531 dst = ifa->ifa_addr;
1532 netmask = ifa->ifa_netmask;
1533 }
1534 /*
1535 * If it's a delete, check that if it exists, it's on the correct
1536 * interface or we might scrub a route to another ifa which would
1537 * be confusing at best and possibly worse.
1538 */
1539 if (cmd == RTM_DELETE) {
1540 /*
1541 * It's a delete, so it should already exist..
1542 * If it's a net, mask off the host bits
1543 * (Assuming we have a mask)
1544 */
1545 if (netmask != NULL) {
74f1caca 1546 m = m_get(MB_DONTWAIT, MT_SONAME);
984263bc 1547 if (m == NULL)
f23061d4 1548 return (ENOBUFS);
e9fa4b60 1549 mbuftrackid(m, 34);
984263bc
MD
1550 deldst = mtod(m, struct sockaddr *);
1551 rt_maskedcopy(dst, deldst, netmask);
1552 dst = deldst;
1553 }
1554 /*
1555 * Look up an rtentry that is in the routing tree and
1556 * contains the correct info.
1557 */
ecdefdda 1558 if ((rnh = rt_tables[mycpuid][dst->sa_family]) == NULL ||
f23061d4
JH
1559 (rn = rnh->rnh_lookup((char *)dst,
1560 (char *)netmask, rnh)) == NULL ||
984263bc 1561 ((struct rtentry *)rn)->rt_ifa != ifa ||
f23061d4 1562 !sa_equal((struct sockaddr *)rn->rn_key, dst)) {
ef87f48d 1563 if (m != NULL)
f23061d4 1564 m_free(m);
984263bc
MD
1565 return (flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
1566 }
1567 /* XXX */
1568#if 0
1569 else {
1570 /*
1571 * One would think that as we are deleting, and we know
1572 * it doesn't exist, we could just return at this point
1573 * with an "ELSE" clause, but apparently not..
1574 */
1575 return (flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
1576 }
1577#endif
1578 }
1579 /*
1580 * Do the actual request
1581 */
6554f2c4
JH
1582 bzero(&rtinfo, sizeof(struct rt_addrinfo));
1583 rtinfo.rti_info[RTAX_DST] = dst;
1584 rtinfo.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1585 rtinfo.rti_info[RTAX_NETMASK] = netmask;
1586 rtinfo.rti_flags = flags | ifa->ifa_flags;
1587 rtinfo.rti_ifa = ifa;
ecdefdda
MD
1588 error = rtrequest1_global(cmd, &rtinfo, rtinit_rtrequest_callback, ifa);
1589 if (m != NULL)
1590 m_free(m);
1591 return (error);
1592}
1593
1594static void
1595rtinit_rtrequest_callback(int cmd, int error,
1596 struct rt_addrinfo *rtinfo, struct rtentry *rt,
1597 void *arg)
1598{
1599 struct ifaddr *ifa = arg;
1600
1601 if (error == 0 && rt) {
1602 if (mycpuid == 0) {
1603 ++rt->rt_refcnt;
1604 rt_newaddrmsg(cmd, ifa, error, rt);
1605 --rt->rt_refcnt;
1606 }
984263bc 1607 if (cmd == RTM_DELETE) {
a40401d5 1608 if (rt->rt_refcnt == 0) {
ecdefdda 1609 ++rt->rt_refcnt;
984263bc
MD
1610 rtfree(rt);
1611 }
984263bc
MD
1612 }
1613 }
984263bc
MD
1614}
1615
1616/* This must be before ip6_init2(), which is now SI_ORDER_MIDDLE */
1617SYSINIT(route, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0);