/* * Copyright (c) 2004, 2005 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Jeffrey M. Hsu. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of The DragonFly Project nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific, prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)rtsock.c 8.7 (Berkeley) 10/12/95 * $FreeBSD: src/sys/net/rtsock.c,v 1.44.2.11 2002/12/04 14:05:41 ru Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables"); static struct route_cb { int ip_count; int ip6_count; int ns_count; int any_count; } route_cb; static const struct sockaddr route_src = { 2, PF_ROUTE, }; struct walkarg { int w_tmemsize; int w_op, w_arg; void *w_tmem; struct sysctl_req *w_req; }; #ifndef RTTABLE_DUMP_MSGCNT_MAX /* Should be large enough for dupkeys */ #define RTTABLE_DUMP_MSGCNT_MAX 64 #endif struct rttable_walkarg { int w_op; int w_arg; int w_bufsz; void *w_buf; int w_buflen; const char *w_key; const char *w_mask; struct sockaddr_storage w_key0; struct sockaddr_storage w_mask0; }; struct netmsg_rttable_walk { struct netmsg_base base; int af; struct rttable_walkarg *w; }; struct routecb { struct rawcb rocb_rcb; unsigned int rocb_msgfilter; char *rocb_missfilter; size_t rocb_missfilterlen; }; #define sotoroutecb(so) ((struct routecb *)(so)->so_pcb) static struct mbuf * rt_msg_mbuf (int, struct rt_addrinfo *); static void rt_msg_buffer (int, struct rt_addrinfo *, void *buf, int len); static int rt_msgsize(int type, const struct rt_addrinfo *rtinfo); static int rt_xaddrs (char *, char *, struct rt_addrinfo *); static int sysctl_rttable(int af, struct sysctl_req *req, int op, int arg); static int if_addrflags(const struct ifaddr *ifa); static int sysctl_iflist (int af, struct walkarg *w); static int route_output(struct mbuf *, struct socket *, ...); static void rt_setmetrics (u_long, struct rt_metrics *, struct rt_metrics *); /* * It really doesn't make any sense at all for this code to share much * with raw_usrreq.c, since its functionality is so restricted. XXX */ static void rts_abort(netmsg_t msg) { crit_enter(); raw_usrreqs.pru_abort(msg); /* msg invalid now */ crit_exit(); } static int rts_filter(struct mbuf *m, const struct sockproto *proto, const struct rawcb *rp) { const struct routecb *rop = (const struct routecb *)rp; const struct rt_msghdr *rtm; KKASSERT(m != NULL); KKASSERT(proto != NULL); KKASSERT(rp != NULL); /* Wrong family for this socket. */ if (proto->sp_family != PF_ROUTE) return ENOPROTOOPT; /* If no filter set, just return. */ if (rop->rocb_msgfilter == 0 && rop->rocb_missfilterlen == 0) return 0; /* Ensure we can access rtm_type */ if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + sizeof(rtm->rtm_type)) return EINVAL; rtm = mtod(m, const struct rt_msghdr *); /* If the rtm type is filtered out, return a positive. */ if (rop->rocb_msgfilter != 0 && !(rop->rocb_msgfilter & ROUTE_FILTER(rtm->rtm_type))) return EEXIST; if (rop->rocb_missfilterlen != 0 && rtm->rtm_type == RTM_MISS) { CTASSERT(RTAX_DST == 0); struct sockaddr *sa; struct sockaddr_storage ss; struct sockaddr *dst = (struct sockaddr *)&ss; char *cp = rop->rocb_missfilter; char *ep = cp + rop->rocb_missfilterlen; /* Ensure we can access sa_len */ if (m->m_pkthdr.len < sizeof(*rtm) + offsetof(struct sockaddr, sa_len) + sizeof(dst->sa_len)) return EINVAL; m_copydata(m, sizeof(*rtm) + offsetof(struct sockaddr, sa_len), sizeof(ss.ss_len), (caddr_t)&ss); if (m->m_pkthdr.len < sizeof(*rtm) + ss.ss_len) return EINVAL; /* Copy out the destination sockaddr */ m_copydata(m, sizeof(*rtm), ss.ss_len, (caddr_t)&ss); /* Find a matching sockaddr in the filter */ while (cp < ep) { sa = (struct sockaddr *)cp; if (sa->sa_len == dst->sa_len && memcmp(sa, dst, sa->sa_len) == 0) break; cp += RT_ROUNDUP(sa->sa_len); } if (cp == ep) return EEXIST; } /* Passed the filter. */ return 0; } /* pru_accept is EOPNOTSUPP */ static void rts_attach(netmsg_t msg) { struct socket *so = msg->base.nm_so; struct pru_attach_info *ai = msg->attach.nm_ai; struct rawcb *rp; struct routecb *rop; int proto = msg->attach.nm_proto; int error; crit_enter(); if (sotorawcb(so) != NULL) { error = EISCONN; goto done; } rop = kmalloc(sizeof *rop, M_PCB, M_WAITOK | M_ZERO); rp = &rop->rocb_rcb; /* * The critical section is necessary to block protocols from sending * error notifications (like RTM_REDIRECT or RTM_LOSING) while * this PCB is extant but incompletely initialized. * Probably we should try to do more of this work beforehand and * eliminate the critical section. */ so->so_pcb = rp; soreference(so); /* so_pcb assignment */ error = raw_attach(so, proto, ai->sb_rlimit); rp = sotorawcb(so); if (error) { kfree(rop, M_PCB); goto done; } switch(rp->rcb_proto.sp_protocol) { case AF_INET: route_cb.ip_count++; break; case AF_INET6: route_cb.ip6_count++; break; } rp->rcb_faddr = &route_src; rp->rcb_filter = rts_filter; route_cb.any_count++; soisconnected(so); so->so_options |= SO_USELOOPBACK; error = 0; done: crit_exit(); lwkt_replymsg(&msg->lmsg, error); } static void rts_bind(netmsg_t msg) { crit_enter(); raw_usrreqs.pru_bind(msg); /* xxx just EINVAL */ /* msg invalid now */ crit_exit(); } static void rts_connect(netmsg_t msg) { crit_enter(); raw_usrreqs.pru_connect(msg); /* XXX just EINVAL */ /* msg invalid now */ crit_exit(); } /* pru_connect2 is EOPNOTSUPP */ /* pru_control is EOPNOTSUPP */ static void rts_detach(netmsg_t msg) { struct socket *so = msg->base.nm_so; struct rawcb *rp = sotorawcb(so); struct routecb *rop = (struct routecb *)rp; crit_enter(); if (rop->rocb_missfilterlen != 0) kfree(rop->rocb_missfilter, M_PCB); if (rp != NULL) { switch(rp->rcb_proto.sp_protocol) { case AF_INET: route_cb.ip_count--; break; case AF_INET6: route_cb.ip6_count--; break; } route_cb.any_count--; } raw_usrreqs.pru_detach(msg); /* msg invalid now */ crit_exit(); } static void rts_disconnect(netmsg_t msg) { crit_enter(); raw_usrreqs.pru_disconnect(msg); /* msg invalid now */ crit_exit(); } /* pru_listen is EOPNOTSUPP */ static void rts_peeraddr(netmsg_t msg) { crit_enter(); raw_usrreqs.pru_peeraddr(msg); /* msg invalid now */ crit_exit(); } /* pru_rcvd is EOPNOTSUPP */ /* pru_rcvoob is EOPNOTSUPP */ static void rts_send(netmsg_t msg) { crit_enter(); raw_usrreqs.pru_send(msg); /* msg invalid now */ crit_exit(); } /* pru_sense is null */ static void rts_shutdown(netmsg_t msg) { crit_enter(); raw_usrreqs.pru_shutdown(msg); /* msg invalid now */ crit_exit(); } static void rts_sockaddr(netmsg_t msg) { crit_enter(); raw_usrreqs.pru_sockaddr(msg); /* msg invalid now */ crit_exit(); } static struct pr_usrreqs route_usrreqs = { .pru_abort = rts_abort, .pru_accept = pr_generic_notsupp, .pru_attach = rts_attach, .pru_bind = rts_bind, .pru_connect = rts_connect, .pru_connect2 = pr_generic_notsupp, .pru_control = pr_generic_notsupp, .pru_detach = rts_detach, .pru_disconnect = rts_disconnect, .pru_listen = pr_generic_notsupp, .pru_peeraddr = rts_peeraddr, .pru_rcvd = pr_generic_notsupp, .pru_rcvoob = pr_generic_notsupp, .pru_send = rts_send, .pru_sense = pru_sense_null, .pru_shutdown = rts_shutdown, .pru_sockaddr = rts_sockaddr, .pru_sosend = sosend, .pru_soreceive = soreceive }; static __inline sa_family_t familyof(struct sockaddr *sa) { return (sa != NULL ? sa->sa_family : 0); } /* * Routing socket input function. The packet must be serialized onto cpu 0. * We use the cpu0_soport() netisr processing loop to handle it. * * This looks messy but it means that anyone, including interrupt code, * can send a message to the routing socket. */ static void rts_input_handler(netmsg_t msg) { static const struct sockaddr route_dst = { 2, PF_ROUTE, }; struct sockproto route_proto; struct netmsg_packet *pmsg = &msg->packet; struct mbuf *m; sa_family_t family; struct rawcb *skip; family = pmsg->base.lmsg.u.ms_result; route_proto.sp_family = PF_ROUTE; route_proto.sp_protocol = family; m = pmsg->nm_packet; M_ASSERTPKTHDR(m); skip = m->m_pkthdr.header; m->m_pkthdr.header = NULL; raw_input(m, &route_proto, &route_src, &route_dst, skip); } static void rts_input_skip(struct mbuf *m, sa_family_t family, struct rawcb *skip) { struct netmsg_packet *pmsg; lwkt_port_t port; M_ASSERTPKTHDR(m); port = netisr_cpuport(0); /* XXX same as for routing socket */ pmsg = &m->m_hdr.mh_netmsg; netmsg_init(&pmsg->base, NULL, &netisr_apanic_rport, 0, rts_input_handler); pmsg->nm_packet = m; pmsg->base.lmsg.u.ms_result = family; m->m_pkthdr.header = skip; /* XXX steal field in pkthdr */ lwkt_sendmsg(port, &pmsg->base.lmsg); } static __inline void rts_input(struct mbuf *m, sa_family_t family) { rts_input_skip(m, family, NULL); } static void route_ctloutput(netmsg_t msg) { struct socket *so = msg->ctloutput.base.nm_so; struct sockopt *sopt = msg->ctloutput.nm_sopt; struct routecb *rop = sotoroutecb(so); int error; unsigned int msgfilter; unsigned char *cp, *ep; size_t len; struct sockaddr *sa; if (sopt->sopt_level != AF_ROUTE) { error = EINVAL; goto out; } error = 0; switch (sopt->sopt_dir) { case SOPT_SET: switch (sopt->sopt_name) { case ROUTE_MSGFILTER: error = soopt_to_kbuf(sopt, &msgfilter, sizeof(msgfilter), sizeof(msgfilter)); if (error == 0) rop->rocb_msgfilter = msgfilter; break; case RO_MISSFILTER: /* Validate the data */ len = 0; cp = sopt->sopt_val; ep = cp + sopt->sopt_valsize; while (cp < ep) { if (ep - cp < offsetof(struct sockaddr, sa_len) + sizeof(sa->sa_len)) break; if (++len > RO_FILTSA_MAX) { error = ENOBUFS; break; } sa = (struct sockaddr *)cp; cp += RT_ROUNDUP(sa->sa_len); } if (cp != ep) { if (error == 0) error = EINVAL; break; } if (rop->rocb_missfilterlen != 0) kfree(rop->rocb_missfilter, M_PCB); if (sopt->sopt_valsize != 0) { rop->rocb_missfilter = kmalloc(sopt->sopt_valsize, M_PCB, M_WAITOK | M_NULLOK); if (rop->rocb_missfilter == NULL) { rop->rocb_missfilterlen = 0; error = ENOBUFS; break; } } else rop->rocb_missfilter = NULL; rop->rocb_missfilterlen = sopt->sopt_valsize; if (rop->rocb_missfilterlen != 0) memcpy(rop->rocb_missfilter, sopt->sopt_val, rop->rocb_missfilterlen); break; default: error = ENOPROTOOPT; break; } break; case SOPT_GET: switch (sopt->sopt_name) { case ROUTE_MSGFILTER: msgfilter = rop->rocb_msgfilter; soopt_from_kbuf(sopt, &msgfilter, sizeof(msgfilter)); break; case RO_MISSFILTER: soopt_from_kbuf(sopt, rop->rocb_missfilter, rop->rocb_missfilterlen); break; default: error = ENOPROTOOPT; break; } } out: lwkt_replymsg(&msg->ctloutput.base.lmsg, error); } static void * reallocbuf_nofree(void *ptr, size_t len, size_t olen) { void *newptr; newptr = kmalloc(len, M_RTABLE, M_INTWAIT | M_NULLOK); if (newptr == NULL) return NULL; bcopy(ptr, newptr, olen); if (olen < len) bzero((char *)newptr + olen, len - olen); return (newptr); } /* * Internal helper routine for route_output(). */ static int _fillrtmsg(struct rt_msghdr **prtm, struct rtentry *rt, struct rt_addrinfo *rtinfo) { int msglen; struct rt_msghdr *rtm = *prtm; /* Fill in rt_addrinfo for call to rt_msg_buffer(). */ rtinfo->rti_dst = rt_key(rt); rtinfo->rti_gateway = rt->rt_gateway; rtinfo->rti_netmask = rt_mask(rt); /* might be NULL */ rtinfo->rti_genmask = rt->rt_genmask; /* might be NULL */ if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) { if (rt->rt_ifp != NULL) { rtinfo->rti_ifpaddr = TAILQ_FIRST(&rt->rt_ifp->if_addrheads[mycpuid]) ->ifa->ifa_addr; rtinfo->rti_ifaaddr = rt->rt_ifa->ifa_addr; if (rt->rt_ifp->if_flags & IFF_POINTOPOINT) rtinfo->rti_bcastaddr = rt->rt_ifa->ifa_dstaddr; rtm->rtm_index = rt->rt_ifp->if_index; } else { rtinfo->rti_ifpaddr = NULL; rtinfo->rti_ifaaddr = NULL; } } else if (rt->rt_ifp != NULL) { rtm->rtm_index = rt->rt_ifp->if_index; } msglen = rt_msgsize(rtm->rtm_type, rtinfo); if (rtm->rtm_msglen < msglen) { /* NOTE: Caller will free the old rtm accordingly */ rtm = reallocbuf_nofree(rtm, msglen, rtm->rtm_msglen); if (rtm == NULL) return (ENOBUFS); *prtm = rtm; } rt_msg_buffer(rtm->rtm_type, rtinfo, rtm, msglen); rtm->rtm_flags = rt->rt_flags; rtm->rtm_rmx = rt->rt_rmx; rtm->rtm_addrs = rtinfo->rti_addrs; return (0); } struct rtm_arg { struct rt_msghdr *bak_rtm; struct rt_msghdr *new_rtm; }; static int fillrtmsg(struct rtm_arg *arg, struct rtentry *rt, struct rt_addrinfo *rtinfo) { struct rt_msghdr *rtm = arg->new_rtm; int error; error = _fillrtmsg(&rtm, rt, rtinfo); if (!error) { if (arg->new_rtm != rtm) { /* * _fillrtmsg() just allocated a new rtm; * if the previously allocated rtm is not * the backing rtm, it should be freed. */ if (arg->new_rtm != arg->bak_rtm) kfree(arg->new_rtm, M_RTABLE); arg->new_rtm = rtm; } } return error; } static void route_output_add_callback(int, int, struct rt_addrinfo *, struct rtentry *, void *); static void route_output_delete_callback(int, int, struct rt_addrinfo *, struct rtentry *, void *); static int route_output_get_callback(int, struct rt_addrinfo *, struct rtentry *, void *, int); static int route_output_change_callback(int, struct rt_addrinfo *, struct rtentry *, void *, int); static int route_output_lock_callback(int, struct rt_addrinfo *, struct rtentry *, void *, int); /*ARGSUSED*/ static int route_output(struct mbuf *m, struct socket *so, ...) { struct rtm_arg arg; struct rt_msghdr *rtm = NULL; struct rawcb *rp = NULL; struct pr_output_info *oi; struct rt_addrinfo rtinfo; sa_family_t family; int len, error = 0; __va_list ap; M_ASSERTPKTHDR(m); __va_start(ap, so); oi = __va_arg(ap, struct pr_output_info *); __va_end(ap); family = familyof(NULL); #define gotoerr(e) { error = e; goto flush;} if (m == NULL || (m->m_len < sizeof(long) && (m = m_pullup(m, sizeof(long))) == NULL)) return (ENOBUFS); len = m->m_pkthdr.len; if (len < sizeof(struct rt_msghdr) || len != mtod(m, struct rt_msghdr *)->rtm_msglen) gotoerr(EINVAL); rtm = kmalloc(len, M_RTABLE, M_INTWAIT | M_NULLOK); if (rtm == NULL) gotoerr(ENOBUFS); m_copydata(m, 0, len, (caddr_t)rtm); if (rtm->rtm_version != RTM_VERSION) gotoerr(EPROTONOSUPPORT); rtm->rtm_pid = oi->p_pid; bzero(&rtinfo, sizeof(struct rt_addrinfo)); rtinfo.rti_addrs = rtm->rtm_addrs; if (rt_xaddrs((char *)(rtm + 1), (char *)rtm + len, &rtinfo) != 0) gotoerr(EINVAL); rtinfo.rti_flags = rtm->rtm_flags; if (rtinfo.rti_dst == NULL || rtinfo.rti_dst->sa_family >= AF_MAX || (rtinfo.rti_gateway && rtinfo.rti_gateway->sa_family >= AF_MAX)) gotoerr(EINVAL); family = familyof(rtinfo.rti_dst); /* * Verify that the caller has the appropriate privilege; RTM_GET * is the only operation the non-superuser is allowed. */ if (rtm->rtm_type != RTM_GET && priv_check_cred(so->so_cred, PRIV_ROOT, 0) != 0) gotoerr(EPERM); if (rtinfo.rti_genmask != NULL) { error = rtmask_add_global(rtinfo.rti_genmask, rtm->rtm_type != RTM_GET ? RTREQ_PRIO_HIGH : RTREQ_PRIO_NORM); if (error) goto flush; } switch (rtm->rtm_type) { case RTM_ADD: if (rtinfo.rti_gateway == NULL) { error = EINVAL; } else { error = rtrequest1_global(RTM_ADD, &rtinfo, route_output_add_callback, rtm, RTREQ_PRIO_HIGH); } break; case RTM_DELETE: /* * Backing rtm (bak_rtm) could _not_ be freed during * rtrequest1_global or rtsearch_global, even if the * callback reallocates the rtm due to its size changes, * since rtinfo points to the backing rtm's memory area. * After rtrequest1_global or rtsearch_global returns, * it is safe to free the backing rtm, since rtinfo will * not be used anymore. * * new_rtm will be used to save the new rtm allocated * by rtrequest1_global or rtsearch_global. */ arg.bak_rtm = rtm; arg.new_rtm = rtm; error = rtrequest1_global(RTM_DELETE, &rtinfo, route_output_delete_callback, &arg, RTREQ_PRIO_HIGH); rtm = arg.new_rtm; if (rtm != arg.bak_rtm) kfree(arg.bak_rtm, M_RTABLE); break; case RTM_GET: /* See the comment in RTM_DELETE */ arg.bak_rtm = rtm; arg.new_rtm = rtm; error = rtsearch_global(RTM_GET, &rtinfo, route_output_get_callback, &arg, RTS_NOEXACTMATCH, RTREQ_PRIO_NORM); rtm = arg.new_rtm; if (rtm != arg.bak_rtm) kfree(arg.bak_rtm, M_RTABLE); break; case RTM_CHANGE: error = rtsearch_global(RTM_CHANGE, &rtinfo, route_output_change_callback, rtm, RTS_EXACTMATCH, RTREQ_PRIO_HIGH); break; case RTM_LOCK: error = rtsearch_global(RTM_LOCK, &rtinfo, route_output_lock_callback, rtm, RTS_EXACTMATCH, RTREQ_PRIO_HIGH); break; default: error = EOPNOTSUPP; break; } flush: if (rtm != NULL) { if (error != 0) rtm->rtm_errno = error; else rtm->rtm_flags |= RTF_DONE; } /* * Check to see if we don't want our own messages. */ if (!(so->so_options & SO_USELOOPBACK)) { if (route_cb.any_count <= 1) { if (rtm != NULL) kfree(rtm, M_RTABLE); m_freem(m); return (error); } /* There is another listener, so construct message */ rp = sotorawcb(so); } if (rtm != NULL) { m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm); if (m->m_pkthdr.len < rtm->rtm_msglen) { m_freem(m); m = NULL; } else if (m->m_pkthdr.len > rtm->rtm_msglen) m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len); kfree(rtm, M_RTABLE); } if (m != NULL) rts_input_skip(m, family, rp); return (error); } static void route_output_add_callback(int cmd, int error, struct rt_addrinfo *rtinfo, struct rtentry *rt, void *arg) { struct rt_msghdr *rtm = arg; if (error == 0 && rt != NULL) { rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx); rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits); rt->rt_rmx.rmx_locks |= (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); if (rtinfo->rti_genmask != NULL) { rt->rt_genmask = rtmask_purelookup(rtinfo->rti_genmask); if (rt->rt_genmask == NULL) { /* * This should not happen, since we * have already installed genmask * on each CPU before we reach here. */ panic("genmask is gone!?"); } } else { rt->rt_genmask = NULL; } rtm->rtm_index = rt->rt_ifp->if_index; } } static void route_output_delete_callback(int cmd, int error, struct rt_addrinfo *rtinfo, struct rtentry *rt, void *arg) { if (error == 0 && rt) { ++rt->rt_refcnt; if (fillrtmsg(arg, rt, rtinfo) != 0) { error = ENOBUFS; /* XXX no way to return the error */ } --rt->rt_refcnt; } if (rt && rt->rt_refcnt == 0) { ++rt->rt_refcnt; rtfree(rt); } } static int route_output_get_callback(int cmd, struct rt_addrinfo *rtinfo, struct rtentry *rt, void *arg, int found_cnt) { int error, found = 0; if (((rtinfo->rti_flags ^ rt->rt_flags) & RTF_HOST) == 0) found = 1; error = fillrtmsg(arg, rt, rtinfo); if (!error && found) { /* Got the exact match, we could return now! */ error = EJUSTRETURN; } return error; } static int route_output_change_callback(int cmd, struct rt_addrinfo *rtinfo, struct rtentry *rt, void *arg, int found_cnt) { struct rt_msghdr *rtm = arg; struct ifaddr *ifa; int error = 0; /* * new gateway could require new ifaddr, ifp; * flags may also be different; ifp may be specified * by ll sockaddr when protocol address is ambiguous */ if (((rt->rt_flags & RTF_GATEWAY) && rtinfo->rti_gateway != NULL) || rtinfo->rti_ifpaddr != NULL || (rtinfo->rti_ifaaddr != NULL && !sa_equal(rtinfo->rti_ifaaddr, rt->rt_ifa->ifa_addr))) { error = rt_getifa(rtinfo); if (error != 0) goto done; } if (rtinfo->rti_gateway != NULL) { /* * We only need to generate rtmsg upon the * first route to be changed. */ error = rt_setgate(rt, rt_key(rt), rtinfo->rti_gateway); if (error != 0) goto done; } if ((ifa = rtinfo->rti_ifa) != NULL) { struct ifaddr *oifa = rt->rt_ifa; if (oifa != ifa) { if (oifa && oifa->ifa_rtrequest) oifa->ifa_rtrequest(RTM_DELETE, rt); IFAFREE(rt->rt_ifa); IFAREF(ifa); rt->rt_ifa = ifa; rt->rt_ifp = rtinfo->rti_ifp; } } rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx); if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest) rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt); if (rtinfo->rti_genmask != NULL) { rt->rt_genmask = rtmask_purelookup(rtinfo->rti_genmask); if (rt->rt_genmask == NULL) { /* * This should not happen, since we * have already installed genmask * on each CPU before we reach here. */ panic("genmask is gone!?"); } } rtm->rtm_index = rt->rt_ifp->if_index; if (found_cnt == 1) rt_rtmsg(RTM_CHANGE, rt, rt->rt_ifp, 0); done: return error; } static int route_output_lock_callback(int cmd, struct rt_addrinfo *rtinfo, struct rtentry *rt, void *arg, int found_cnt __unused) { struct rt_msghdr *rtm = arg; rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits); rt->rt_rmx.rmx_locks |= (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); return 0; } static void rt_setmetrics(u_long which, struct rt_metrics *in, struct rt_metrics *out) { #define setmetric(flag, elt) if (which & (flag)) out->elt = in->elt; setmetric(RTV_RPIPE, rmx_recvpipe); setmetric(RTV_SPIPE, rmx_sendpipe); setmetric(RTV_SSTHRESH, rmx_ssthresh); setmetric(RTV_RTT, rmx_rtt); setmetric(RTV_RTTVAR, rmx_rttvar); setmetric(RTV_HOPCOUNT, rmx_hopcount); setmetric(RTV_MTU, rmx_mtu); setmetric(RTV_EXPIRE, rmx_expire); setmetric(RTV_MSL, rmx_msl); setmetric(RTV_IWMAXSEGS, rmx_iwmaxsegs); setmetric(RTV_IWCAPSEGS, rmx_iwcapsegs); #undef setmetric } /* * Extract the addresses of the passed sockaddrs. * Do a little sanity checking so as to avoid bad memory references. * This data is derived straight from userland. */ static int rt_xaddrs(char *cp, char *cplim, struct rt_addrinfo *rtinfo) { struct sockaddr *sa; int i; for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) { if ((rtinfo->rti_addrs & (1 << i)) == 0) continue; sa = (struct sockaddr *)cp; /* * It won't fit. */ if ((cp + sa->sa_len) > cplim) { return (EINVAL); } /* * There are no more... Quit now. * If there are more bits, they are in error. * I've seen this. route(1) can evidently generate these. * This causes kernel to core dump. * For compatibility, if we see this, point to a safe address. */ if (sa->sa_len == 0) { static struct sockaddr sa_zero = { sizeof sa_zero, AF_INET, }; rtinfo->rti_info[i] = &sa_zero; kprintf("rtsock: received more addr bits than sockaddrs.\n"); return (0); /* should be EINVAL but for compat */ } /* Accept the sockaddr. */ rtinfo->rti_info[i] = sa; cp += RT_ROUNDUP(sa->sa_len); } return (0); } static int rt_msghdrsize(int type) { switch (type) { case RTM_DELADDR: case RTM_NEWADDR: return sizeof(struct ifa_msghdr); case RTM_DELMADDR: case RTM_NEWMADDR: return sizeof(struct ifma_msghdr); case RTM_IFINFO: return sizeof(struct if_msghdr); case RTM_IFANNOUNCE: case RTM_IEEE80211: return sizeof(struct if_announcemsghdr); default: return sizeof(struct rt_msghdr); } } static int rt_msgsize(int type, const struct rt_addrinfo *rtinfo) { int len, i; len = rt_msghdrsize(type); for (i = 0; i < RTAX_MAX; i++) { if (rtinfo->rti_info[i] != NULL) len += RT_ROUNDUP(rtinfo->rti_info[i]->sa_len); } len = ALIGN(len); return len; } /* * Build a routing message in a buffer. * Copy the addresses in the rtinfo->rti_info[] sockaddr array * to the end of the buffer after the message header. * * Set the rtinfo->rti_addrs bitmask of addresses present in rtinfo->rti_info[]. * This side-effect can be avoided if we reorder the addrs bitmask field in all * the route messages to line up so we can set it here instead of back in the * calling routine. * * NOTE! The buffer may already contain a partially filled-out rtm via * _fillrtmsg(). */ static void rt_msg_buffer(int type, struct rt_addrinfo *rtinfo, void *buf, int msglen) { struct rt_msghdr *rtm; char *cp; int dlen, i; rtm = (struct rt_msghdr *) buf; rtm->rtm_version = RTM_VERSION; rtm->rtm_type = type; rtm->rtm_msglen = msglen; cp = (char *)buf + rt_msghdrsize(type); rtinfo->rti_addrs = 0; for (i = 0; i < RTAX_MAX; i++) { struct sockaddr *sa; if ((sa = rtinfo->rti_info[i]) == NULL) continue; rtinfo->rti_addrs |= (1 << i); dlen = RT_ROUNDUP(sa->sa_len); bcopy(sa, cp, dlen); cp += dlen; } } /* * Build a routing message in a mbuf chain. * Copy the addresses in the rtinfo->rti_info[] sockaddr array * to the end of the mbuf after the message header. * * Set the rtinfo->rti_addrs bitmask of addresses present in rtinfo->rti_info[]. * This side-effect can be avoided if we reorder the addrs bitmask field in all * the route messages to line up so we can set it here instead of back in the * calling routine. */ static struct mbuf * rt_msg_mbuf(int type, struct rt_addrinfo *rtinfo) { struct mbuf *m; struct rt_msghdr *rtm; int hlen, len; int i; hlen = rt_msghdrsize(type); KASSERT(hlen <= MCLBYTES, ("rt_msg_mbuf: hlen %d doesn't fit", hlen)); m = m_getl(hlen, M_NOWAIT, MT_DATA, M_PKTHDR, NULL); if (m == NULL) return (NULL); mbuftrackid(m, 32); m->m_pkthdr.len = m->m_len = hlen; m->m_pkthdr.rcvif = NULL; rtinfo->rti_addrs = 0; len = hlen; for (i = 0; i < RTAX_MAX; i++) { struct sockaddr *sa; int dlen; if ((sa = rtinfo->rti_info[i]) == NULL) continue; rtinfo->rti_addrs |= (1 << i); dlen = RT_ROUNDUP(sa->sa_len); m_copyback(m, len, dlen, (caddr_t)sa); /* can grow mbuf chain */ len += dlen; } if (m->m_pkthdr.len != len) { /* one of the m_copyback() calls failed */ m_freem(m); return (NULL); } rtm = mtod(m, struct rt_msghdr *); bzero(rtm, hlen); rtm->rtm_msglen = len; rtm->rtm_version = RTM_VERSION; rtm->rtm_type = type; return (m); } /* * This routine is called to generate a message from the routing * socket indicating that a redirect has occurred, a routing lookup * has failed, or that a protocol has detected timeouts to a particular * destination. */ void rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error) { struct sockaddr *dst = rtinfo->rti_info[RTAX_DST]; struct rt_msghdr *rtm; struct mbuf *m; if (route_cb.any_count == 0) return; m = rt_msg_mbuf(type, rtinfo); if (m == NULL) return; rtm = mtod(m, struct rt_msghdr *); rtm->rtm_flags = RTF_DONE | flags; rtm->rtm_errno = error; rtm->rtm_addrs = rtinfo->rti_addrs; rts_input(m, familyof(dst)); } void rt_dstmsg(int type, struct sockaddr *dst, int error) { struct rt_msghdr *rtm; struct rt_addrinfo addrs; struct mbuf *m; if (route_cb.any_count == 0) return; bzero(&addrs, sizeof(struct rt_addrinfo)); addrs.rti_info[RTAX_DST] = dst; m = rt_msg_mbuf(type, &addrs); if (m == NULL) return; rtm = mtod(m, struct rt_msghdr *); rtm->rtm_flags = RTF_DONE; rtm->rtm_errno = error; rtm->rtm_addrs = addrs.rti_addrs; rts_input(m, familyof(dst)); } /* * This routine is called to generate a message from the routing * socket indicating that the status of a network interface has changed. */ void rt_ifmsg(struct ifnet *ifp) { struct if_msghdr *ifm; struct mbuf *m; struct rt_addrinfo rtinfo; if (route_cb.any_count == 0) return; bzero(&rtinfo, sizeof(struct rt_addrinfo)); m = rt_msg_mbuf(RTM_IFINFO, &rtinfo); if (m == NULL) return; ifm = mtod(m, struct if_msghdr *); ifm->ifm_index = ifp->if_index; ifm->ifm_flags = ifp->if_flags; ifm->ifm_data = ifp->if_data; ifm->ifm_addrs = 0; rts_input(m, 0); } static void rt_ifamsg(int cmd, struct ifaddr *ifa) { struct ifa_msghdr *ifam; struct rt_addrinfo rtinfo; struct mbuf *m; struct ifnet *ifp = ifa->ifa_ifp; bzero(&rtinfo, sizeof(struct rt_addrinfo)); rtinfo.rti_ifaaddr = ifa->ifa_addr; rtinfo.rti_ifpaddr = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa->ifa_addr; rtinfo.rti_netmask = ifa->ifa_netmask; rtinfo.rti_bcastaddr = ifa->ifa_dstaddr; m = rt_msg_mbuf(cmd, &rtinfo); if (m == NULL) return; ifam = mtod(m, struct ifa_msghdr *); ifam->ifam_index = ifp->if_index; ifam->ifam_flags = ifa->ifa_flags; ifam->ifam_addrs = rtinfo.rti_addrs; ifam->ifam_addrflags = if_addrflags(ifa); ifam->ifam_metric = ifa->ifa_metric; rts_input(m, familyof(ifa->ifa_addr)); } void rt_rtmsg(int cmd, struct rtentry *rt, struct ifnet *ifp, int error) { struct rt_msghdr *rtm; struct rt_addrinfo rtinfo; struct mbuf *m; struct sockaddr *dst; if (rt == NULL) return; bzero(&rtinfo, sizeof(struct rt_addrinfo)); rtinfo.rti_dst = dst = rt_key(rt); rtinfo.rti_gateway = rt->rt_gateway; rtinfo.rti_netmask = rt_mask(rt); if (ifp != NULL) { rtinfo.rti_ifpaddr = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa->ifa_addr; } if (rt->rt_ifa != NULL) rtinfo.rti_ifaaddr = rt->rt_ifa->ifa_addr; m = rt_msg_mbuf(cmd, &rtinfo); if (m == NULL) return; rtm = mtod(m, struct rt_msghdr *); if (ifp != NULL) rtm->rtm_index = ifp->if_index; rtm->rtm_flags |= rt->rt_flags; rtm->rtm_errno = error; rtm->rtm_addrs = rtinfo.rti_addrs; rts_input(m, familyof(dst)); } /* * This is called to generate messages from the routing socket * indicating a network interface has had addresses associated with it. * if we ever reverse the logic and replace messages TO the routing * socket indicate a request to configure interfaces, then it will * be unnecessary as the routing socket will automatically generate * copies of it. */ void rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt) { if (route_cb.any_count == 0) return; if (cmd == RTM_ADD) { rt_ifamsg(RTM_NEWADDR, ifa); rt_rtmsg(RTM_ADD, rt, ifa->ifa_ifp, error); } else { KASSERT((cmd == RTM_DELETE), ("unknown cmd %d", cmd)); rt_rtmsg(RTM_DELETE, rt, ifa->ifa_ifp, error); rt_ifamsg(RTM_DELADDR, ifa); } } /* * This is the analogue to the rt_newaddrmsg which performs the same * function but for multicast group memberhips. This is easier since * there is no route state to worry about. */ void rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma) { struct rt_addrinfo rtinfo; struct mbuf *m = NULL; struct ifnet *ifp = ifma->ifma_ifp; struct ifma_msghdr *ifmam; if (route_cb.any_count == 0) return; bzero(&rtinfo, sizeof(struct rt_addrinfo)); rtinfo.rti_ifaaddr = ifma->ifma_addr; if (ifp != NULL && !TAILQ_EMPTY(&ifp->if_addrheads[mycpuid])) { rtinfo.rti_ifpaddr = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa->ifa_addr; } /* * If a link-layer address is present, present it as a ``gateway'' * (similarly to how ARP entries, e.g., are presented). */ rtinfo.rti_gateway = ifma->ifma_lladdr; m = rt_msg_mbuf(cmd, &rtinfo); if (m == NULL) return; ifmam = mtod(m, struct ifma_msghdr *); ifmam->ifmam_index = ifp->if_index; ifmam->ifmam_addrs = rtinfo.rti_addrs; rts_input(m, familyof(ifma->ifma_addr)); } static struct mbuf * rt_makeifannouncemsg(struct ifnet *ifp, int type, int what, struct rt_addrinfo *info) { struct if_announcemsghdr *ifan; struct mbuf *m; if (route_cb.any_count == 0) return NULL; bzero(info, sizeof(*info)); m = rt_msg_mbuf(type, info); if (m == NULL) return NULL; ifan = mtod(m, struct if_announcemsghdr *); ifan->ifan_index = ifp->if_index; strlcpy(ifan->ifan_name, ifp->if_xname, sizeof ifan->ifan_name); ifan->ifan_what = what; return m; } /* * This is called to generate routing socket messages indicating * IEEE80211 wireless events. * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way. */ void rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len) { struct rt_addrinfo info; struct mbuf *m; m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info); if (m == NULL) return; /* * Append the ieee80211 data. Try to stick it in the * mbuf containing the ifannounce msg; otherwise allocate * a new mbuf and append. * * NB: we assume m is a single mbuf. */ if (data_len > M_TRAILINGSPACE(m)) { /* XXX use m_getb(data_len, M_NOWAIT, MT_DATA, 0); */ struct mbuf *n = m_get(M_NOWAIT, MT_DATA); if (n == NULL) { m_freem(m); return; } KKASSERT(data_len <= M_TRAILINGSPACE(n)); bcopy(data, mtod(n, void *), data_len); n->m_len = data_len; m->m_next = n; } else if (data_len > 0) { bcopy(data, mtod(m, u_int8_t *) + m->m_len, data_len); m->m_len += data_len; } mbuftrackid(m, 33); if (m->m_flags & M_PKTHDR) m->m_pkthdr.len += data_len; mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len; rts_input(m, 0); } /* * This is called to generate routing socket messages indicating * network interface arrival and departure. */ void rt_ifannouncemsg(struct ifnet *ifp, int what) { struct rt_addrinfo addrinfo; struct mbuf *m; m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &addrinfo); if (m != NULL) rts_input(m, 0); } static int resizewalkarg(struct walkarg *w, int len) { void *newptr; newptr = kmalloc(len, M_RTABLE, M_INTWAIT | M_NULLOK); if (newptr == NULL) return (ENOMEM); if (w->w_tmem != NULL) kfree(w->w_tmem, M_RTABLE); w->w_tmem = newptr; w->w_tmemsize = len; bzero(newptr, len); return (0); } static void ifnet_compute_stats(struct ifnet *ifp) { IFNET_STAT_GET(ifp, ipackets, ifp->if_ipackets); IFNET_STAT_GET(ifp, ierrors, ifp->if_ierrors); IFNET_STAT_GET(ifp, opackets, ifp->if_opackets); IFNET_STAT_GET(ifp, collisions, ifp->if_collisions); IFNET_STAT_GET(ifp, ibytes, ifp->if_ibytes); IFNET_STAT_GET(ifp, obytes, ifp->if_obytes); IFNET_STAT_GET(ifp, imcasts, ifp->if_imcasts); IFNET_STAT_GET(ifp, omcasts, ifp->if_omcasts); IFNET_STAT_GET(ifp, iqdrops, ifp->if_iqdrops); IFNET_STAT_GET(ifp, noproto, ifp->if_noproto); IFNET_STAT_GET(ifp, oqdrops, ifp->if_oqdrops); } static int if_addrflags(const struct ifaddr *ifa) { switch (ifa->ifa_addr->sa_family) { #ifdef INET6 case AF_INET6: return ((const struct in6_ifaddr *)ifa)->ia6_flags; #endif default: return 0; } } static int sysctl_iflist(int af, struct walkarg *w) { struct ifnet *ifp; struct rt_addrinfo rtinfo; int msglen, error; bzero(&rtinfo, sizeof(struct rt_addrinfo)); ifnet_lock(); TAILQ_FOREACH(ifp, &ifnetlist, if_link) { struct ifaddr_container *ifac, *ifac_mark; struct ifaddr_marker mark; struct ifaddrhead *head; struct ifaddr *ifa; if (w->w_arg && w->w_arg != ifp->if_index) continue; head = &ifp->if_addrheads[mycpuid]; /* * There is no need to reference the first ifaddr * even if the following resizewalkarg() blocks, * since the first ifaddr will not be destroyed * when the ifnet lock is held. */ ifac = TAILQ_FIRST(head); ifa = ifac->ifa; rtinfo.rti_ifpaddr = ifa->ifa_addr; msglen = rt_msgsize(RTM_IFINFO, &rtinfo); if (w->w_tmemsize < msglen && resizewalkarg(w, msglen) != 0) { ifnet_unlock(); return (ENOMEM); } rt_msg_buffer(RTM_IFINFO, &rtinfo, w->w_tmem, msglen); rtinfo.rti_ifpaddr = NULL; if (w->w_req != NULL && w->w_tmem != NULL) { struct if_msghdr *ifm = w->w_tmem; ifm->ifm_index = ifp->if_index; ifm->ifm_flags = ifp->if_flags; ifnet_compute_stats(ifp); ifm->ifm_data = ifp->if_data; ifm->ifm_addrs = rtinfo.rti_addrs; error = SYSCTL_OUT(w->w_req, ifm, msglen); if (error) { ifnet_unlock(); return (error); } } /* * Add a marker, since SYSCTL_OUT() could block and during * that period the list could be changed. */ ifa_marker_init(&mark, ifp); ifac_mark = &mark.ifac; TAILQ_INSERT_AFTER(head, ifac, ifac_mark, ifa_link); while ((ifac = TAILQ_NEXT(ifac_mark, ifa_link)) != NULL) { TAILQ_REMOVE(head, ifac_mark, ifa_link); TAILQ_INSERT_AFTER(head, ifac, ifac_mark, ifa_link); ifa = ifac->ifa; /* Ignore marker */ if (ifa->ifa_addr->sa_family == AF_UNSPEC) continue; if (af && af != ifa->ifa_addr->sa_family) continue; if (curproc->p_ucred->cr_prison && prison_if(curproc->p_ucred, ifa->ifa_addr)) continue; rtinfo.rti_ifaaddr = ifa->ifa_addr; rtinfo.rti_netmask = ifa->ifa_netmask; rtinfo.rti_bcastaddr = ifa->ifa_dstaddr; msglen = rt_msgsize(RTM_NEWADDR, &rtinfo); /* * Keep a reference on this ifaddr, so that it will * not be destroyed if the following resizewalkarg() * blocks. */ IFAREF(ifa); if (w->w_tmemsize < msglen && resizewalkarg(w, msglen) != 0) { IFAFREE(ifa); TAILQ_REMOVE(head, ifac_mark, ifa_link); ifnet_unlock(); return (ENOMEM); } rt_msg_buffer(RTM_NEWADDR, &rtinfo, w->w_tmem, msglen); if (w->w_req != NULL) { struct ifa_msghdr *ifam = w->w_tmem; ifam->ifam_index = ifa->ifa_ifp->if_index; ifam->ifam_flags = ifa->ifa_flags; ifam->ifam_addrs = rtinfo.rti_addrs; ifam->ifam_addrflags = if_addrflags(ifa); ifam->ifam_metric = ifa->ifa_metric; error = SYSCTL_OUT(w->w_req, w->w_tmem, msglen); if (error) { IFAFREE(ifa); TAILQ_REMOVE(head, ifac_mark, ifa_link); ifnet_unlock(); return (error); } } IFAFREE(ifa); } TAILQ_REMOVE(head, ifac_mark, ifa_link); rtinfo.rti_netmask = NULL; rtinfo.rti_ifaaddr = NULL; rtinfo.rti_bcastaddr = NULL; } ifnet_unlock(); return (0); } static int rttable_walkarg_create(struct rttable_walkarg *w, int op, int arg) { struct rt_addrinfo rtinfo; struct sockaddr_storage ss; int i, msglen; memset(w, 0, sizeof(*w)); w->w_op = op; w->w_arg = arg; memset(&ss, 0, sizeof(ss)); ss.ss_len = sizeof(ss); memset(&rtinfo, 0, sizeof(rtinfo)); for (i = 0; i < RTAX_MAX; ++i) rtinfo.rti_info[i] = (struct sockaddr *)&ss; msglen = rt_msgsize(RTM_GET, &rtinfo); w->w_bufsz = msglen * RTTABLE_DUMP_MSGCNT_MAX; w->w_buf = kmalloc(w->w_bufsz, M_TEMP, M_WAITOK | M_NULLOK); if (w->w_buf == NULL) return ENOMEM; return 0; } static void rttable_walkarg_destroy(struct rttable_walkarg *w) { kfree(w->w_buf, M_TEMP); } static void rttable_entry_rtinfo(struct rt_addrinfo *rtinfo, struct radix_node *rn) { struct rtentry *rt = (struct rtentry *)rn; bzero(rtinfo, sizeof(*rtinfo)); rtinfo->rti_dst = rt_key(rt); rtinfo->rti_gateway = rt->rt_gateway; rtinfo->rti_netmask = rt_mask(rt); rtinfo->rti_genmask = rt->rt_genmask; if (rt->rt_ifp != NULL) { rtinfo->rti_ifpaddr = TAILQ_FIRST(&rt->rt_ifp->if_addrheads[mycpuid])->ifa->ifa_addr; rtinfo->rti_ifaaddr = rt->rt_ifa->ifa_addr; if (rt->rt_ifp->if_flags & IFF_POINTOPOINT) rtinfo->rti_bcastaddr = rt->rt_ifa->ifa_dstaddr; } } static int rttable_walk_entry(struct radix_node *rn, void *xw) { struct rttable_walkarg *w = xw; struct rtentry *rt = (struct rtentry *)rn; struct rt_addrinfo rtinfo; struct rt_msghdr *rtm; boolean_t save = FALSE; int msglen, w_bufleft; void *ptr; rttable_entry_rtinfo(&rtinfo, rn); msglen = rt_msgsize(RTM_GET, &rtinfo); w_bufleft = w->w_bufsz - w->w_buflen; if (rn->rn_dupedkey != NULL) { struct radix_node *rn1 = rn; int total_msglen = msglen; /* * Make sure that we have enough space left for all * dupedkeys, since rn_walktree_at always starts * from the first dupedkey. */ while ((rn1 = rn1->rn_dupedkey) != NULL) { struct rt_addrinfo rtinfo1; int msglen1; if (rn1->rn_flags & RNF_ROOT) continue; rttable_entry_rtinfo(&rtinfo1, rn1); msglen1 = rt_msgsize(RTM_GET, &rtinfo1); total_msglen += msglen1; } if (total_msglen > w_bufleft) { if (total_msglen > w->w_bufsz) { static int logged = 0; if (!logged) { kprintf("buffer is too small for " "all dupedkeys, increase " "RTTABLE_DUMP_MSGCNT_MAX\n"); logged = 1; } return ENOMEM; } save = TRUE; } } else if (msglen > w_bufleft) { save = TRUE; } if (save) { /* * Not enough buffer left; remember the position * to start from upon next round. */ KASSERT(msglen <= w->w_bufsz, ("msg too long %d", msglen)); KASSERT(rtinfo.rti_dst->sa_len <= sizeof(w->w_key0), ("key too long %d", rtinfo.rti_dst->sa_len)); memset(&w->w_key0, 0, sizeof(w->w_key0)); memcpy(&w->w_key0, rtinfo.rti_dst, rtinfo.rti_dst->sa_len); w->w_key = (const char *)&w->w_key0; if (rtinfo.rti_netmask != NULL) { KASSERT( rtinfo.rti_netmask->sa_len <= sizeof(w->w_mask0), ("mask too long %d", rtinfo.rti_netmask->sa_len)); memset(&w->w_mask0, 0, sizeof(w->w_mask0)); memcpy(&w->w_mask0, rtinfo.rti_netmask, rtinfo.rti_netmask->sa_len); w->w_mask = (const char *)&w->w_mask0; } else { w->w_mask = NULL; } return EJUSTRETURN; } if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) return 0; ptr = ((uint8_t *)w->w_buf) + w->w_buflen; rt_msg_buffer(RTM_GET, &rtinfo, ptr, msglen); rtm = (struct rt_msghdr *)ptr; rtm->rtm_flags = rt->rt_flags; rtm->rtm_use = rt->rt_use; rtm->rtm_rmx = rt->rt_rmx; rtm->rtm_index = rt->rt_ifp->if_index; rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0; rtm->rtm_addrs = rtinfo.rti_addrs; w->w_buflen += msglen; return 0; } static void rttable_walk_dispatch(netmsg_t msg) { struct netmsg_rttable_walk *nmsg = (struct netmsg_rttable_walk *)msg; struct radix_node_head *rnh = rt_tables[mycpuid][nmsg->af]; struct rttable_walkarg *w = nmsg->w; int error; error = rnh->rnh_walktree_at(rnh, w->w_key, w->w_mask, rttable_walk_entry, w); lwkt_replymsg(&nmsg->base.lmsg, error); } static int sysctl_rttable(int af, struct sysctl_req *req, int op, int arg) { struct rttable_walkarg w; int error, i; error = rttable_walkarg_create(&w, op, arg); if (error) return error; error = EINVAL; for (i = 1; i <= AF_MAX; i++) { if (rt_tables[mycpuid][i] != NULL && (af == 0 || af == i)) { w.w_key = NULL; w.w_mask = NULL; for (;;) { struct netmsg_rttable_walk nmsg; netmsg_init(&nmsg.base, NULL, &curthread->td_msgport, 0, rttable_walk_dispatch); nmsg.af = i; nmsg.w = &w; w.w_buflen = 0; error = lwkt_domsg(netisr_cpuport(mycpuid), &nmsg.base.lmsg, 0); if (error && error != EJUSTRETURN) goto done; if (req != NULL && w.w_buflen > 0) { int error1; error1 = SYSCTL_OUT(req, w.w_buf, w.w_buflen); if (error1) { error = error1; goto done; } } if (error == 0) /* done */ break; } } } done: rttable_walkarg_destroy(&w); return error; } static int sysctl_rtsock(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; int error = EINVAL; int origcpu, cpu; u_char af; struct walkarg w; name ++; namelen--; if (req->newptr) return (EPERM); if (namelen != 3 && namelen != 4) return (EINVAL); af = name[0]; bzero(&w, sizeof w); w.w_op = name[1]; w.w_arg = name[2]; w.w_req = req; /* * Optional third argument specifies cpu, used primarily for * debugging the route table. */ if (namelen == 4) { if (name[3] < 0 || name[3] >= netisr_ncpus) return (EINVAL); cpu = name[3]; } else { /* * Target cpu is not specified, use cpu0 then, so that * the result set will be relatively stable. */ cpu = 0; } origcpu = mycpuid; lwkt_migratecpu(cpu); switch (w.w_op) { case NET_RT_DUMP: case NET_RT_FLAGS: error = sysctl_rttable(af, w.w_req, w.w_op, w.w_arg); break; case NET_RT_IFLIST: error = sysctl_iflist(af, &w); break; } if (w.w_tmem != NULL) kfree(w.w_tmem, M_RTABLE); lwkt_migratecpu(origcpu); return (error); } SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, ""); /* * Definitions of protocols supported in the ROUTE domain. */ static struct domain routedomain; /* or at least forward */ static struct protosw routesw[] = { { .pr_type = SOCK_RAW, .pr_domain = &routedomain, .pr_protocol = 0, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_input = NULL, .pr_output = route_output, .pr_ctlinput = raw_ctlinput, .pr_ctloutput = route_ctloutput, .pr_ctlport = cpu0_ctlport, .pr_init = raw_init, .pr_usrreqs = &route_usrreqs } }; static struct domain routedomain = { .dom_family = AF_ROUTE, .dom_name = "route", .dom_init = NULL, .dom_externalize = NULL, .dom_dispose = NULL, .dom_protosw = routesw, .dom_protoswNPROTOSW = &routesw[(sizeof routesw)/(sizeof routesw[0])], .dom_next = SLIST_ENTRY_INITIALIZER, .dom_rtattach = NULL, .dom_rtoffset = 0, .dom_maxrtkey = 0, .dom_ifattach = NULL, .dom_ifdetach = NULL }; DOMAIN_SET(route);