From f884afc578303ab662bba5f90d9b7b8dd773883d Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Mon, 18 Jul 2011 15:02:26 -0700 Subject: [PATCH] kernel - Fix several IPV6 issues * Replace old critical sections with a lock around global structures, primarily the default route list for IPV6. This fixes numerous MP races. * Use rtrequest*_global() instead of rtrequest*() and remove user route socket notification (the rtrequest*_global() code should do the notification). This fixes several issues with the per-cpu route table getting out of sync and triggering panics, and also appears to fix several connectivity issues. Reported-by: Francois Tigeot --- sys/netinet6/nd6.c | 40 +++++++------ sys/netinet6/nd6.h | 4 ++ sys/netinet6/nd6_nbr.c | 22 +++---- sys/netinet6/nd6_rtr.c | 126 ++++++++++++++--------------------------- 4 files changed, 78 insertions(+), 114 deletions(-) diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c index a3e3e52e93..4dda747aa7 100644 --- a/sys/netinet6/nd6.c +++ b/sys/netinet6/nd6.c @@ -55,7 +55,10 @@ #include #include #include +#include + #include +#include #include #include @@ -105,6 +108,7 @@ static int nd6_inuse, nd6_allocated; struct llinfo_nd6 llinfo_nd6 = {&llinfo_nd6, &llinfo_nd6}; struct nd_drhead nd_defrouter; struct nd_prhead nd_prefix = { 0 }; +struct mtx nd6_mtx = MTX_INITIALIZER; int nd6_recalc_reachtm_interval = ND6_RECALC_REACHTM_INTERVAL; static struct sockaddr_in6 all1_sa; @@ -402,7 +406,7 @@ nd6_timer(void *ignored_arg) struct in6_ifaddr *ia6, *nia6; struct in6_addrlifetime *lt6; - crit_enter(); + mtx_lock(&nd6_mtx); callout_reset(&nd6_timer_ch, nd6_prune * hz, nd6_timer, NULL); @@ -618,7 +622,7 @@ addrloop: } else pr = pr->ndpr_next; } - crit_exit(); + mtx_unlock(&nd6_mtx); } static int @@ -926,7 +930,7 @@ nd6_free(struct rtentry *rt) */ if (!ip6_forwarding && ip6_accept_rtadv) { /* XXX: too restrictive? */ - crit_enter(); + mtx_lock(&nd6_mtx); dr = defrouter_lookup(&((struct sockaddr_in6 *)rt_key(rt))->sin6_addr, rt->rt_ifp); @@ -977,7 +981,7 @@ nd6_free(struct rtentry *rt) defrouter_select(); } } - crit_exit(); + mtx_unlock(&nd6_mtx); } /* @@ -1320,7 +1324,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) * obsolete API, use sysctl under net.inet6.icmp6 */ bzero(drl, sizeof(*drl)); - crit_enter(); + mtx_lock(&nd6_mtx); dr = TAILQ_FIRST(&nd_defrouter); while (dr && i < DRLSTSIZ) { drl->defrouter[i].rtaddr = dr->rtaddr; @@ -1340,7 +1344,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) i++; dr = TAILQ_NEXT(dr, dr_entry); } - crit_exit(); + mtx_unlock(&nd6_mtx); break; case SIOCGPRLST_IN6: /* @@ -1352,7 +1356,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) * how about separating ioctls into two? */ bzero(prl, sizeof(*prl)); - crit_enter(); + mtx_lock(&nd6_mtx); pr = nd_prefix.lh_first; while (pr && i < PRLSTSIZ) { struct nd_pfxrouter *pfr; @@ -1413,7 +1417,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) i++; } } - crit_exit(); + mtx_unlock(&nd6_mtx); break; case OSIOCGIFINFO_IN6: @@ -1451,7 +1455,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) /* flush all the prefix advertised by routers */ struct nd_prefix *pr, *next; - crit_enter(); + mtx_lock(&nd6_mtx); for (pr = nd_prefix.lh_first; pr; pr = next) { struct in6_ifaddr *ia, *ia_next; @@ -1473,7 +1477,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) } prelist_remove(pr); } - crit_exit(); + mtx_unlock(&nd6_mtx); break; } case SIOCSRTRFLUSH_IN6: @@ -1481,7 +1485,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) /* flush all the default routers */ struct nd_defrouter *dr, *next; - crit_enter(); + mtx_lock(&nd6_mtx); if ((dr = TAILQ_FIRST(&nd_defrouter)) != NULL) { /* * The first entry of the list may be stored in @@ -1493,7 +1497,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) } defrtrlist_del(TAILQ_FIRST(&nd_defrouter)); } - crit_exit(); + mtx_unlock(&nd6_mtx); break; } case SIOCGNBRINFO_IN6: @@ -1513,10 +1517,10 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) *idp = htons(ifp->if_index); } - crit_enter(); + mtx_lock(&nd6_mtx); if ((rt = nd6_lookup(&nb_addr, 0, ifp)) == NULL) { error = EINVAL; - crit_exit(); + mtx_unlock(&nd6_mtx); break; } ln = (struct llinfo_nd6 *)rt->rt_llinfo; @@ -1524,7 +1528,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) nbi->asked = ln->ln_asked; nbi->isrouter = ln->ln_router; nbi->expire = ln->ln_expire; - crit_exit(); + mtx_unlock(&nd6_mtx); break; } @@ -1776,9 +1780,9 @@ nd6_slowtimo(void *ignored_arg) struct nd_ifinfo *nd6if; struct ifnet *ifp; - crit_enter(); + mtx_lock(&nd6_mtx); callout_reset(&nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz, - nd6_slowtimo, NULL); + nd6_slowtimo, NULL); for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list)) { nd6if = ND_IFINFO(ifp); if (nd6if->basereachable && /* already initialized */ @@ -1793,7 +1797,7 @@ nd6_slowtimo(void *ignored_arg) nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable); } } - crit_exit(); + mtx_unlock(&nd6_mtx); } #define gotoerr(e) { error = (e); goto bad;} diff --git a/sys/netinet6/nd6.h b/sys/netinet6/nd6.h index b1de8ea701..40640d68fa 100644 --- a/sys/netinet6/nd6.h +++ b/sys/netinet6/nd6.h @@ -57,6 +57,9 @@ #ifndef _NETINET6_IN6_VAR_H_ #include #endif +#ifndef _SYS_MUTEX_H_ +#include +#endif struct llinfo_nd6 { struct llinfo_nd6 *ln_next; @@ -323,6 +326,7 @@ extern struct llinfo_nd6 llinfo_nd6; extern struct nd_ifinfo *nd_ifinfo; extern struct nd_drhead nd_defrouter; extern struct nd_prhead nd_prefix; +extern struct mtx nd6_mtx; extern int nd6_debug; #define nd6log(x) do { if (nd6_debug) log x; } while (0) diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c index 2c4a8e301b..4fd21af93c 100644 --- a/sys/netinet6/nd6_nbr.c +++ b/sys/netinet6/nd6_nbr.c @@ -48,7 +48,10 @@ #include #include #include +#include + #include +#include #include #include @@ -787,11 +790,13 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) * is only called under the network software interrupt * context. However, we keep it just for safety. */ - crit_enter(); + mtx_lock(&nd6_mtx); dr = defrouter_lookup(in6, rt->rt_ifp); if (dr) defrtrlist_del(dr); - else if (!ip6_forwarding && ip6_accept_rtadv) { + mtx_unlock(&nd6_mtx); + + if (dr == NULL && !ip6_forwarding && ip6_accept_rtadv) { /* * Even if the neighbor is not in the default * router list, the neighbor may be used @@ -801,7 +806,6 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) */ rt6_flush(&ip6->ip6_src, rt->rt_ifp); } - crit_exit(); } ln->ln_router = is_router; } @@ -1102,9 +1106,7 @@ nd6_dad_start(struct ifaddr *ifa, * (re)initialization. */ dp->dad_ifa = ifa; - crit_enter(); /* XXX MP not MP safe */ _IFAREF(ifa, 0); /* just for safety */ - crit_exit(); dp->dad_count = ip6_dad_count; dp->dad_ns_icount = dp->dad_na_icount = 0; dp->dad_ns_ocount = dp->dad_ns_tcount = 0; @@ -1145,9 +1147,7 @@ nd6_dad_stop(struct ifaddr *ifa) TAILQ_REMOVE(&dadq, (struct dadq *)dp, dad_list); kfree(dp, M_IP6NDP); dp = NULL; - crit_enter(); /* XXX MP not MP safe */ _IFAFREE(ifa, 0); - crit_exit(); } static void @@ -1156,7 +1156,7 @@ nd6_dad_timer(struct ifaddr *ifa) struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; struct dadq *dp; - crit_enter(); /* XXX */ + mtx_lock(&nd6_mtx); /* Sanity check */ if (ia == NULL) { @@ -1191,9 +1191,7 @@ nd6_dad_timer(struct ifaddr *ifa) TAILQ_REMOVE(&dadq, (struct dadq *)dp, dad_list); kfree(dp, M_IP6NDP); dp = NULL; - crit_enter(); /* XXX MP not MP safe */ _IFAFREE(ifa, 0); - crit_exit(); goto done; } @@ -1276,7 +1274,7 @@ nd6_dad_timer(struct ifaddr *ifa) } done: - crit_exit(); + mtx_unlock(&nd6_mtx); } void @@ -1310,9 +1308,7 @@ nd6_dad_duplicated(struct ifaddr *ifa) TAILQ_REMOVE(&dadq, (struct dadq *)dp, dad_list); kfree(dp, M_IP6NDP); dp = NULL; - crit_enter(); /* XXX MP not MP safe */ _IFAFREE(ifa, 0); - crit_exit(); } static void diff --git a/sys/netinet6/nd6_rtr.c b/sys/netinet6/nd6_rtr.c index 681d09bb40..87b52de0ca 100644 --- a/sys/netinet6/nd6_rtr.c +++ b/sys/netinet6/nd6_rtr.c @@ -46,7 +46,10 @@ #include #include #include +#include + #include +#include #include #include @@ -77,7 +80,6 @@ static void pfxrtr_del (struct nd_pfxrouter *); static struct nd_pfxrouter *find_pfxlist_reachable_router (struct nd_prefix *); static void defrouter_addifreq (struct ifnet *); -static void nd6_rtmsg (int, struct rtentry *); static void in6_init_address_ltimes (struct nd_prefix *ndpr, struct in6_addrlifetime *lt6); @@ -446,6 +448,7 @@ bad: * default router list proccessing sub routines */ +#if 0 /* tell the change to user processes watching the routing socket. */ static void nd6_rtmsg(int cmd, struct rtentry *rt) @@ -467,12 +470,12 @@ nd6_rtmsg(int cmd, struct rtentry *rt) rt_missmsg(cmd, &info, rt->rt_flags, 0); } +#endif void defrouter_addreq(struct nd_defrouter *new) { struct sockaddr_in6 def, mask, gate; - struct rtentry *newrt = NULL; bzero(&def, sizeof(def)); bzero(&mask, sizeof(mask)); @@ -483,14 +486,9 @@ defrouter_addreq(struct nd_defrouter *new) def.sin6_family = mask.sin6_family = gate.sin6_family = AF_INET6; gate.sin6_addr = new->rtaddr; - crit_enter(); - rtrequest(RTM_ADD, (struct sockaddr *)&def, (struct sockaddr *)&gate, - (struct sockaddr *)&mask, RTF_GATEWAY, &newrt); - if (newrt) { - nd6_rtmsg(RTM_ADD, newrt); /* tell user process */ - newrt->rt_refcnt--; - } - crit_exit(); + rtrequest_global(RTM_ADD, (struct sockaddr *)&def, + (struct sockaddr *)&gate, (struct sockaddr *)&mask, + RTF_GATEWAY); return; } @@ -500,7 +498,6 @@ defrouter_addifreq(struct ifnet *ifp) { struct sockaddr_in6 def, mask; struct ifaddr *ifa; - struct rtentry *newrt = NULL; int error, flags; bzero(&def, sizeof(def)); @@ -522,21 +519,16 @@ defrouter_addifreq(struct ifnet *ifp) } flags = ifa->ifa_flags; - error = rtrequest(RTM_ADD, (struct sockaddr *)&def, ifa->ifa_addr, - (struct sockaddr *)&mask, flags, &newrt); + error = rtrequest_global(RTM_ADD, + (struct sockaddr *)&def, + ifa->ifa_addr, + (struct sockaddr *)&mask, + flags); if (error != 0) { nd6log((LOG_ERR, "defrouter_addifreq: failed to install a route to " "interface %s (errno = %d)\n", if_name(ifp), error)); - - if (newrt) /* maybe unnecessary, but do it for safety */ - newrt->rt_refcnt--; - } else { - if (newrt) { - nd6_rtmsg(RTM_ADD, newrt); - newrt->rt_refcnt--; - } } } @@ -558,7 +550,6 @@ void defrouter_delreq(struct nd_defrouter *dr, int dofree) { struct sockaddr_in6 def, mask, gate; - struct rtentry *oldrt = NULL; bzero(&def, sizeof(def)); bzero(&mask, sizeof(mask)); @@ -569,22 +560,11 @@ defrouter_delreq(struct nd_defrouter *dr, int dofree) def.sin6_family = mask.sin6_family = gate.sin6_family = AF_INET6; gate.sin6_addr = dr->rtaddr; - rtrequest(RTM_DELETE, (struct sockaddr *)&def, - (struct sockaddr *)&gate, - (struct sockaddr *)&mask, - RTF_GATEWAY, &oldrt); - if (oldrt) { - nd6_rtmsg(RTM_DELETE, oldrt); - if (oldrt->rt_refcnt <= 0) { - /* - * XXX: borrowed from the RTM_DELETE case of - * rtrequest(). - */ - oldrt->rt_refcnt++; - rtfree(oldrt); - } - } - + rtrequest_global(RTM_DELETE, + (struct sockaddr *)&def, + (struct sockaddr *)&gate, + (struct sockaddr *)&mask, + RTF_GATEWAY); if (dofree) /* XXX: necessary? */ kfree(dr, M_IP6NDP); } @@ -647,7 +627,7 @@ defrouter_select(void) struct rtentry *rt = NULL; struct llinfo_nd6 *ln = NULL; - crit_enter(); + mtx_lock(&nd6_mtx); /* * Search for a (probably) reachable router from the list. @@ -708,8 +688,7 @@ defrouter_select(void) } } } - - crit_exit(); + mtx_unlock(&nd6_mtx); return; } @@ -718,7 +697,7 @@ defrtrlist_update(struct nd_defrouter *new) { struct nd_defrouter *dr, *n; - crit_enter(); + mtx_lock(&nd6_mtx); if ((dr = defrouter_lookup(&new->rtaddr, new->ifp)) != NULL) { /* entry exists */ @@ -731,20 +710,20 @@ defrtrlist_update(struct nd_defrouter *new) dr->rtlifetime = new->rtlifetime; dr->expire = new->expire; } - crit_exit(); + mtx_unlock(&nd6_mtx); return (dr); } /* entry does not exist */ if (new->rtlifetime == 0) { - crit_exit(); + mtx_unlock(&nd6_mtx); return (NULL); } n = (struct nd_defrouter *)kmalloc(sizeof(*n), M_IP6NDP, M_NOWAIT | M_ZERO); if (n == NULL) { - crit_exit(); + mtx_unlock(&nd6_mtx); return (NULL); } *n = *new; @@ -757,7 +736,7 @@ defrtrlist_update(struct nd_defrouter *new) TAILQ_INSERT_TAIL(&nd_defrouter, n, dr_entry); if (TAILQ_FIRST(&nd_defrouter) == n) defrouter_select(); - crit_exit(); + mtx_unlock(&nd6_mtx); return (n); } @@ -839,10 +818,10 @@ nd6_prelist_add(struct nd_prefix *pr, struct nd_defrouter *dr, new->ndpr_prefix.sin6_addr.s6_addr32[i] &= new->ndpr_mask.s6_addr32[i]; - crit_enter(); + mtx_lock(&nd6_mtx); /* link ndpr_entry to nd_prefix list */ LIST_INSERT_HEAD(&nd_prefix, new, ndpr_entry); - crit_exit(); + mtx_unlock(&nd6_mtx); /* ND_OPT_PI_FLAG_ONLINK processing */ if (new->ndpr_raf_onlink) { @@ -893,7 +872,7 @@ prelist_remove(struct nd_prefix *pr) if (pr->ndpr_refcnt > 0) return; /* notice here? */ - crit_enter(); + mtx_lock(&nd6_mtx); /* unlink ndpr_entry from nd_prefix list */ LIST_REMOVE(pr, ndpr_entry); @@ -904,7 +883,7 @@ prelist_remove(struct nd_prefix *pr) kfree(pfr, M_IP6NDP); } - crit_exit(); + mtx_unlock(&nd6_mtx); kfree(pr, M_IP6NDP); @@ -928,7 +907,7 @@ prelist_update(struct nd_prefix *new, struct nd_defrouter *dr, struct mbuf *m) struct in6_addrlifetime lt6_tmp; auth = 0; - crit_enter(); + mtx_lock(&nd6_mtx); if (m) { /* * Authenticity for NA consists authentication for @@ -1201,7 +1180,7 @@ prelist_update(struct nd_prefix *new, struct nd_defrouter *dr, struct mbuf *m) afteraddrconf: end: - crit_exit(); + mtx_unlock(&nd6_mtx); return error; } @@ -1394,7 +1373,6 @@ nd6_prefix_onlink(struct nd_prefix *pr) struct nd_prefix *opr; u_long rtflags; int error = 0; - struct rtentry *rt = NULL; /* sanity check */ if (pr->ndpr_stateflags & NDPRF_ONLINK) { @@ -1476,12 +1454,12 @@ nd6_prefix_onlink(struct nd_prefix *pr) */ rtflags &= ~RTF_CLONING; } - error = rtrequest(RTM_ADD, (struct sockaddr *)&pr->ndpr_prefix, - ifa->ifa_addr, (struct sockaddr *)&mask6, - rtflags, &rt); + error = rtrequest_global(RTM_ADD, + (struct sockaddr *)&pr->ndpr_prefix, + ifa->ifa_addr, + (struct sockaddr *)&mask6, + rtflags); if (error == 0) { - if (rt != NULL) /* this should be non NULL, though */ - nd6_rtmsg(RTM_ADD, rt); pr->ndpr_stateflags |= NDPRF_ONLINK; } else { @@ -1493,10 +1471,6 @@ nd6_prefix_onlink(struct nd_prefix *pr) ip6_sprintf(&((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr), ip6_sprintf(&mask6.sin6_addr), rtflags, error)); } - - if (rt != NULL) - rt->rt_refcnt--; - return (error); } @@ -1507,7 +1481,6 @@ nd6_prefix_offlink(struct nd_prefix *pr) struct ifnet *ifp = pr->ndpr_ifp; struct nd_prefix *opr; struct sockaddr_in6 sa6, mask6; - struct rtentry *rt = NULL; /* sanity check */ if (!(pr->ndpr_stateflags & NDPRF_ONLINK)) { @@ -1526,15 +1499,14 @@ nd6_prefix_offlink(struct nd_prefix *pr) mask6.sin6_family = AF_INET6; mask6.sin6_len = sizeof(sa6); bcopy(&pr->ndpr_mask, &mask6.sin6_addr, sizeof(struct in6_addr)); - error = rtrequest(RTM_DELETE, (struct sockaddr *)&sa6, NULL, - (struct sockaddr *)&mask6, 0, &rt); + error = rtrequest_global(RTM_DELETE, + (struct sockaddr *)&sa6, + NULL, + (struct sockaddr *)&mask6, + 0); if (error == 0) { pr->ndpr_stateflags &= ~NDPRF_ONLINK; - /* report the route deletion to the routing socket. */ - if (rt != NULL) - nd6_rtmsg(RTM_DELETE, rt); - /* * There might be the same prefix on another interface, * the prefix which could not be on-link just because we have @@ -1583,14 +1555,6 @@ nd6_prefix_offlink(struct nd_prefix *pr) error)); } - if (rt != NULL) { - if (rt->rt_refcnt <= 0) { - /* XXX: we should free the entry ourselves. */ - rt->rt_refcnt++; - rtfree(rt); - } - } - return (error); } @@ -1882,6 +1846,7 @@ in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6) /* * Delete all the routing table entries that use the specified gateway. + * * XXX: this function causes search through all entries of routing table, so * it shouldn't be called when acting as a router. */ @@ -1890,18 +1855,13 @@ rt6_flush(struct in6_addr *gateway, struct ifnet *ifp) { struct radix_node_head *rnh = rt_tables[mycpuid][AF_INET6]; - crit_enter(); - /* We'll care only link-local addresses */ - if (!IN6_IS_ADDR_LINKLOCAL(gateway)) { - crit_exit(); + if (!IN6_IS_ADDR_LINKLOCAL(gateway)) return; - } /* XXX: hack for KAME's link-local address kludge */ gateway->s6_addr16[1] = htons(ifp->if_index); rnh->rnh_walktree(rnh, rt6_deleteroute, (void *)gateway); - crit_exit(); } static int -- 2.41.0