From 5bd4422ea3a838c306436350229ff4bc24397660 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Fri, 26 Dec 2008 19:14:19 +0800 Subject: [PATCH] Rework carp(4) IPv4 support. Generic layer changes: - Pass more detailed information to ifaddr_event handler. o The ifaddr which triggers the event is passed in o The action (add/delete/change) performed upon the ifaddr is passed in - Add ifa_prflags field in ifaddr_container. This field should be used to hold protocol specific flags. For inet addresses, IA_PRF_RTEXISTOK is defined to ignore rtinit() EEXIST error in in_ifinit(). carp(4) changes: - Add virtual address struct, which holds corresponding carp(4) inet address and backing address of a "real" interface (backing interface). - The list holding virtual address struct is sorted. This is mainly used to fix the bug in following case: host1: ifconfig carp0 192.168.5.1 ifconfig carp0 alias 192.168.5.2 host2: ifconfig carp0 192.168.5.2 ifconfig carp0 alias 192.168.5.1 Before this change, the inet addresses sha1 calculated for these two host will be different, thus CARP fails. Based-on: OpenBSD - Allow inet addresses to be added to carp(4) interface, even if no backing interface could be found or the backing interface is not running. - Don't abuse IFF_UP, which is administrative flag; use IFF_RUNNING instead. - Factor out carp_stop(). - Handle ifaddr_event; most of the carp(4) inet address configuration happens in this event handler. In carp_ioctl(), we just mark the carp(4) interface IFF_UP|IFF_RUNNING and set IA_PRF_RTEXISTOK on the inet address. - Fix the ifdetach_event handler: o Don't sit on the branch while we are sawing it off. o We always need to leave the joined multicast group. - Free carp_if to the proper kmalloc pool. - Simplify the carp_if struct; except the TAILQ_HEAD, rest of the fields are not used; nuke them. - Use 'void *' as ifnet.if_carp's type. This could ease upcoming carp(4) MPSAFE work. - M_NOWAIT -> MB_DONTWAIT - Throw in assertions - Cleanup: o Nuke SC2IFP o Nuke carp_softc.sc_ifp compat shim o Constify function parameters o ... --- sbin/ifconfig/ifcarp.c | 82 ++- sys/net/if_var.h | 27 +- sys/net/pf/pf_if.c | 8 +- sys/netinet/if_ether.c | 62 +- sys/netinet/if_ether.h | 2 +- sys/netinet/in.c | 27 +- sys/netinet/in_var.h | 3 + sys/netinet/ip_carp.c | 1360 +++++++++++++++++++++++++++------------- sys/netinet/ip_carp.h | 22 +- sys/netinet6/in6.c | 19 +- 10 files changed, 1101 insertions(+), 511 deletions(-) diff --git a/sbin/ifconfig/ifcarp.c b/sbin/ifconfig/ifcarp.c index e18fafa97f..38feeae9fb 100644 --- a/sbin/ifconfig/ifcarp.c +++ b/sbin/ifconfig/ifcarp.c @@ -39,9 +39,12 @@ #include #include +#include #include #include +#include + #include #include #include @@ -65,6 +68,9 @@ carp_status(int s) { const char *state; struct carpreq carpr; + struct ifdrv ifd; + char devname[IFNAMSIZ]; + int count, i; memset((char *)&carpr, 0, sizeof(struct carpreq)); ifr.ifr_data = (caddr_t)&carpr; @@ -83,8 +89,15 @@ carp_status(int s) carpr.carpr_advskew); } - return; - + memset(&ifd, 0, sizeof(ifd)); + strncpy(ifd.ifd_name, ifr.ifr_name, sizeof(ifd.ifd_name)); + ifd.ifd_cmd = CARPGDEVNAME; + ifd.ifd_len = sizeof(devname); + ifd.ifd_data = devname; + if (ioctl(s, SIOCGDRVSPEC, &ifd) < 0) + strlcpy(devname, "none", sizeof(devname)); + if (devname[0] != '\0') + printf("\tcarpdev: %s\n", devname); } void @@ -103,8 +116,6 @@ setcarp_passwd(const char *val, int d, int s, const struct afswtch *afp) if (ioctl(s, SIOCSVH, (caddr_t)&ifr) == -1) err(1, "SIOCSVH"); - - return; } void @@ -128,8 +139,6 @@ setcarp_vhid(const char *val, int d, int s, const struct afswtch *afp) if (ioctl(s, SIOCSVH, (caddr_t)&ifr) == -1) err(1, "SIOCSVH"); - - return; } void @@ -150,8 +159,6 @@ setcarp_advskew(const char *val, int d, int s, const struct afswtch *afp) if (ioctl(s, SIOCSVH, (caddr_t)&ifr) == -1) err(1, "SIOCSVH"); - - return; } void @@ -172,8 +179,64 @@ setcarp_advbase(const char *val, int d, int s, const struct afswtch *afp) if (ioctl(s, SIOCSVH, (caddr_t)&ifr) == -1) err(1, "SIOCSVH"); +} + +static void +getcarp_vhaddr(const char *val, int d, int s, const struct afswtch *afp) +{ +#define VHADDR_PFMT "%-15s %-15s %s\n" + + struct ifdrv ifd; + struct ifcarpvhaddr *carpa; + char devname[IFNAMSIZ]; + int count, i; + + memset(&ifd, 0, sizeof(ifd)); + strncpy(ifd.ifd_name, ifr.ifr_name, sizeof(ifd.ifd_name)); + ifd.ifd_cmd = CARPGVHADDR; + if (ioctl(s, SIOCGDRVSPEC, &ifd) < 0) + return; + if (ifd.ifd_len != 0) { + carpa = malloc(ifd.ifd_len); + if (carpa == NULL) + return; + + ifd.ifd_cmd = CARPGVHADDR; + ifd.ifd_data = carpa; + if (ioctl(s, SIOCGDRVSPEC, &ifd) < 0) { + free(carpa); + return; + } + } else { + carpa = NULL; + } + count = ifd.ifd_len / sizeof(*carpa); + if (count != 0) + printf(VHADDR_PFMT, "virtual addr", "backing addr", "flags"); + for (i = 0; i < count; ++i) { + char flags[16]; + char baddr[INET_ADDRSTRLEN]; + int a = 0; + + memset(flags, 0, sizeof(flags)); + flags[a] = '*'; + if (carpa[i].carpa_flags & CARP_VHAF_OWNER) + flags[a++] = 'O'; + + memset(baddr, 0, sizeof(baddr)); + baddr[0] = '*'; + if (carpa[i].carpa_baddr.sin_addr.s_addr != INADDR_ANY) { + inet_ntop(AF_INET, &carpa[i].carpa_baddr.sin_addr, + baddr, sizeof(baddr)); + } + + printf(VHADDR_PFMT, inet_ntoa(carpa[i].carpa_addr.sin_addr), + baddr, flags); + } + if (carpa != NULL) + free(carpa); - return; +#undef VHADDR_PFMT } static struct cmd carp_cmds[] = { @@ -181,6 +244,7 @@ static struct cmd carp_cmds[] = { DEF_CMD_ARG("advskew", setcarp_advskew), DEF_CMD_ARG("pass", setcarp_passwd), DEF_CMD_ARG("vhid", setcarp_vhid), + DEF_CMD("vhaddr", 1, getcarp_vhaddr) }; static struct afswtch af_carp = { .af_name = "af_carp", diff --git a/sys/net/if_var.h b/sys/net/if_var.h index 841fd577e1..ae2a7e8114 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -82,7 +82,6 @@ struct rtentry; struct rt_addrinfo; struct socket; struct ether_header; -struct carp_if; struct ucred; struct lwkt_serialize; struct ifaddr_container; @@ -187,7 +186,7 @@ struct ifnet { void *if_vlantrunks; /* vlan trunks */ struct ifaddrhead *if_addrheads; /* array[NCPU] of TAILQs of addresses per if */ int if_pcount; /* number of promiscuous listeners */ - struct carp_if *if_carp; /* carp interface structure */ + void *if_carp; /* carp interfaces */ struct bpf_if *if_bpf; /* packet filter structure */ u_short if_index; /* numeric abbreviation for this if */ short if_timer; /* time 'til if_watchdog called */ @@ -381,7 +380,8 @@ struct ifaddr_container { struct ifaddr *ifa; TAILQ_ENTRY(ifaddr_container) ifa_link; /* queue macro glue */ u_int ifa_refcnt; /* references to this structure */ - uint32_t ifa_listmask; /* IFA_LIST_ */ + uint16_t ifa_listmask; /* IFA_LIST_ */ + uint16_t ifa_prflags; /* protocol specific flags */ /* * Protocol specific states @@ -391,9 +391,14 @@ struct ifaddr_container { } ifa_proto_u; }; -#define IFA_LIST_IFADDRHEAD 0x1 /* on ifnet.if_addrheads[cpuid] */ -#define IFA_LIST_IN_IFADDRHEAD 0x2 /* on in_ifaddrheads[cpuid] */ -#define IFA_LIST_IN_IFADDRHASH 0x4 /* on in_ifaddrhashtbls[cpuid] */ +#define IFA_LIST_IFADDRHEAD 0x01 /* on ifnet.if_addrheads[cpuid] */ +#define IFA_LIST_IN_IFADDRHEAD 0x02 /* on in_ifaddrheads[cpuid] */ +#define IFA_LIST_IN_IFADDRHASH 0x04 /* on in_ifaddrhashtbls[cpuid] */ + +#define IFA_PRF_FLAG0 0x01 +#define IFA_PRF_FLAG1 0x02 +#define IFA_PRF_FLAG2 0x04 +#define IFA_PRF_FLAG3 0x08 /* * The ifaddr structure contains information about one address @@ -457,8 +462,16 @@ struct ifmultiaddr { }; #ifdef _KERNEL + +enum ifaddr_event { + IFADDR_EVENT_ADD, + IFADDR_EVENT_DELETE, + IFADDR_EVENT_CHANGE +}; + /* interface address change event */ -typedef void (*ifaddr_event_handler_t)(void *, struct ifnet *); +typedef void (*ifaddr_event_handler_t)(void *, struct ifnet *, + enum ifaddr_event, struct ifaddr *); EVENTHANDLER_DECLARE(ifaddr_event, ifaddr_event_handler_t); /* new interface attach event */ typedef void (*ifnet_attach_event_handler_t)(void *, struct ifnet *); diff --git a/sys/net/pf/pf_if.c b/sys/net/pf/pf_if.c index 6a3e5ec5c7..b68ac99c7e 100644 --- a/sys/net/pf/pf_if.c +++ b/sys/net/pf/pf_if.c @@ -110,7 +110,8 @@ void pfi_newgroup(const char *, int); int pfi_skip_if(const char *, struct pfi_kif *, int); int pfi_unmask(void *); void pfi_dohooks(struct pfi_kif *); -void pfi_kifaddr_update_event(void *, struct ifnet *); +void pfi_kifaddr_update_event(void *, struct ifnet *, + enum ifaddr_event, struct ifaddr *); void pfi_attach_clone_event(void *, struct if_clone *); void pfi_attach_ifnet_event(void *, struct ifnet *); void pfi_detach_ifnet_event(void *, struct ifnet *); @@ -187,10 +188,11 @@ pfi_cleanup(void) * Wrapper functions for FreeBSD eventhandler */ void -pfi_kifaddr_update_event(void *arg, struct ifnet *ifp) +pfi_kifaddr_update_event(void *arg, struct ifnet *ifp, + enum ifaddr_event event __unused, struct ifaddr *ifa __unused) { struct pfi_kif *p = arg; - + /* * Check to see if it is 'our' interface as we do not have per * interface hooks and thus get an update for every interface. diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c index 5e762ea19e..5fddc358ce 100644 --- a/sys/netinet/if_ether.c +++ b/sys/netinet/if_ether.c @@ -772,13 +772,13 @@ in_arpinput(struct mbuf *m) struct rtentry *rt; struct ifaddr_container *ifac; struct in_ifaddr_container *iac; - struct in_ifaddr *ia; + struct in_ifaddr *ia = NULL; struct sockaddr sa; struct in_addr isaddr, itaddr, myaddr; #ifdef SMP struct netmsg_arp_update msg; #endif - u_int8_t *enaddr = NULL; + uint8_t *enaddr = NULL; int op; int req_len; @@ -792,6 +792,21 @@ in_arpinput(struct mbuf *m) op = ntohs(ah->ar_op); memcpy(&isaddr, ar_spa(ah), sizeof isaddr); memcpy(&itaddr, ar_tpa(ah), sizeof itaddr); + + myaddr.s_addr = INADDR_ANY; +#ifdef CARP + if (ifp->if_carp != NULL) { + get_mplock(); + if (ifp->if_carp != NULL && + carp_iamatch(ifp->if_carp, &itaddr, &isaddr, &enaddr)) { + rel_mplock(); + myaddr = itaddr; + goto match; + } + rel_mplock(); + } +#endif + /* * Check both target and sender IP addresses: * @@ -808,31 +823,16 @@ in_arpinput(struct mbuf *m) /* Skip all ia's which don't match */ if (itaddr.s_addr != ia->ia_addr.sin_addr.s_addr) continue; - +#ifdef CARP + if (ia->ia_ifp->if_type == IFT_CARP) + continue; +#endif if (ia->ia_ifp == ifp) goto match; if (ifp->if_bridge && ia->ia_ifp && ifp->if_bridge == ia->ia_ifp->if_bridge) goto match; - -#ifdef CARP - /* - * If the interface does not match, but the recieving interface - * is part of carp, we call carp_iamatch to see if this is a - * request for the virtual host ip. - * XXX: This is really ugly! - */ - if (ifp->if_carp != NULL) { - get_mplock(); - if (ifp->if_carp != NULL && - carp_iamatch(ifp->if_carp, ia, &isaddr, &enaddr)) { - rel_mplock(); - goto match; - } - rel_mplock(); - } -#endif } LIST_FOREACH(iac, INADDR_HASH(isaddr.s_addr), ia_hash) { ia = iac->ia; @@ -840,7 +840,10 @@ in_arpinput(struct mbuf *m) /* Skip all ia's which don't match */ if (isaddr.s_addr != ia->ia_addr.sin_addr.s_addr) continue; - +#ifdef CARP + if (ia->ia_ifp->if_type == IFT_CARP) + continue; +#endif if (ia->ia_ifp == ifp) goto match; @@ -869,8 +872,9 @@ in_arpinput(struct mbuf *m) match: if (!enaddr) - enaddr = (u_int8_t *)IF_LLADDR(ifp); - myaddr = ia->ia_addr.sin_addr; + enaddr = (uint8_t *)IF_LLADDR(ifp); + if (myaddr.s_addr == INADDR_ANY) + myaddr = ia->ia_addr.sin_addr; if (!bcmp(ar_sha(ah), enaddr, ifp->if_addrlen)) { m_freem(m); /* it's from me, ignore it. */ return; @@ -1081,14 +1085,10 @@ arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa) } void -arp_ifinit2(struct ifnet *ifp, struct ifaddr *ifa, u_char *enaddr) +arp_iainit(struct ifnet *ifp, const struct in_addr *addr, const u_char *enaddr) { - if (IA_SIN(ifa)->sin_addr.s_addr != INADDR_ANY) { - arprequest_async(ifp, &IA_SIN(ifa)->sin_addr, - &IA_SIN(ifa)->sin_addr, enaddr); - } - ifa->ifa_rtrequest = arp_rtrequest; - ifa->ifa_flags |= RTF_CLONING; + if (addr->s_addr != INADDR_ANY) + arprequest_async(ifp, addr, addr, enaddr); } static void diff --git a/sys/netinet/if_ether.h b/sys/netinet/if_ether.h index 7e068db6c6..039c04aa61 100644 --- a/sys/netinet/if_ether.h +++ b/sys/netinet/if_ether.h @@ -124,7 +124,7 @@ extern u_char ether_ipmulticast_max[ETHER_ADDR_LEN]; int arpresolve (struct ifnet *, struct rtentry *, struct mbuf *, struct sockaddr *, u_char *); void arp_ifinit (struct ifnet *, struct ifaddr *); -void arp_ifinit2 (struct ifnet *, struct ifaddr *, u_char *); +void arp_iainit (struct ifnet *, const struct in_addr *, const u_char *); #endif #endif diff --git a/sys/netinet/in.c b/sys/netinet/in.c index ab41137f82..8e984e9710 100644 --- a/sys/netinet/in.c +++ b/sys/netinet/in.c @@ -635,8 +635,11 @@ in_control_internal(u_long cmd, caddr_t data, struct ifnet *ifp, (const struct sockaddr_in *)&ifr->ifr_addr, 1); if (error != 0 && iaIsNew) break; - if (error == 0) - EVENTHANDLER_INVOKE(ifaddr_event, ifp); + if (error == 0) { + EVENTHANDLER_INVOKE(ifaddr_event, ifp, + iaIsNew ? IFADDR_EVENT_ADD : IFADDR_EVENT_CHANGE, + &ia->ia_ifa); + } return (0); case SIOCSIFNETMASK: @@ -681,8 +684,11 @@ in_control_internal(u_long cmd, caddr_t data, struct ifnet *ifp, if ((ifp->if_flags & IFF_BROADCAST) && ifra->ifra_broadaddr.sin_family == AF_INET) ia->ia_broadaddr = ifra->ifra_broadaddr; - if (error == 0) - EVENTHANDLER_INVOKE(ifaddr_event, ifp); + if (error == 0) { + EVENTHANDLER_INVOKE(ifaddr_event, ifp, + iaIsNew ? IFADDR_EVENT_ADD : IFADDR_EVENT_CHANGE, + &ia->ia_ifa); + } return (error); case SIOCDIFADDR: @@ -697,7 +703,8 @@ in_control_internal(u_long cmd, caddr_t data, struct ifnet *ifp, * a routing process they will come back. */ in_ifadown(&ia->ia_ifa, 1); - EVENTHANDLER_INVOKE(ifaddr_event, ifp); + EVENTHANDLER_INVOKE(ifaddr_event, ifp, IFADDR_EVENT_DELETE, + &ia->ia_ifa); error = 0; break; @@ -1037,9 +1044,13 @@ in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, if (ia->ia_addr.sin_addr.s_addr != INADDR_ANY || ia->ia_netmask != IN_CLASSA_NET || ia->ia_dstaddr.sin_addr.s_addr != htonl(IN_CLASSA_HOST)) { - if ((error = rtinit(&ia->ia_ifa, RTM_ADD, flags)) != 0) - goto fail; - ia->ia_flags |= IFA_ROUTE; + if ((error = rtinit(&ia->ia_ifa, RTM_ADD, flags)) != 0) { + if (error != EEXIST || + !(ifac->ifa_prflags & IA_PRF_RTEXISTOK)) + goto fail; + } else { + ia->ia_flags |= IFA_ROUTE; + } } /* diff --git a/sys/netinet/in_var.h b/sys/netinet/in_var.h index 3085f69385..21be456f50 100644 --- a/sys/netinet/in_var.h +++ b/sys/netinet/in_var.h @@ -96,6 +96,9 @@ struct in_aliasreq { #ifdef _KERNEL struct in_ifaddr_container; +#define IA_PRF_RTEXISTOK IFA_PRF_FLAG0 /* don't fail even if rtentry + * already exist */ + extern struct in_addr zeroin_addr; extern u_char inetctlerrmap[]; diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c index a20b558108..5c34573a83 100644 --- a/sys/netinet/ip_carp.c +++ b/sys/netinet/ip_carp.c @@ -80,12 +80,22 @@ #define CARP_IS_RUNNING(ifp) \ (((ifp)->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING)) +struct carp_vhaddr { + uint32_t vha_flags; /* CARP_VHAF_ */ + const struct in_ifaddr *vha_ia; /* carp address */ + const struct in_ifaddr *vha_iaback; /* backing address */ + TAILQ_ENTRY(carp_vhaddr) vha_link; +}; +TAILQ_HEAD(carp_vhaddr_list, carp_vhaddr); + struct carp_softc { struct ifnet sc_if; - struct ifnet *sc_ifp; /* compat shim */ struct ifnet *sc_carpdev; /* parent interface */ - struct in_ifaddr *sc_ia; /* primary iface address */ + struct carp_vhaddr_list sc_vha_list; /* virtual addr list */ + + const struct in_ifaddr *sc_ia; /* primary iface address v4 */ struct ip_moptions sc_imo; + #ifdef INET6 struct in6_ifaddr *sc_ia6; /* primary iface address v6 */ struct ip6_moptions sc_im6o; @@ -94,8 +104,8 @@ struct carp_softc { enum { INIT = 0, BACKUP, MASTER } sc_state; + int sc_dead; - int sc_flags_backup; int sc_suppress; int sc_sendad_errors; @@ -105,7 +115,7 @@ struct carp_softc { int sc_vhid; int sc_advskew; - int sc_naddrs; + int sc_naddrs; /* actually used IPv4 vha */ int sc_naddrs6; int sc_advbase; /* seconds */ int sc_init_counter; @@ -123,13 +133,9 @@ struct carp_softc { LIST_ENTRY(carp_softc) sc_next; /* Interface clue */ }; -#define SC2IFP(sc) ((sc)->sc_ifp) struct carp_if { TAILQ_HEAD(, carp_softc) vhif_vrs; - int vhif_nvrs; - - struct ifnet *vhif_ifp; }; enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING }; @@ -174,34 +180,51 @@ static void carp_setroute(struct carp_softc *, int); static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t); static int carp_clone_create(struct if_clone *, int); static void carp_clone_destroy(struct ifnet *); -static void carpdetach(struct carp_softc *, int); +static void carp_detach(struct carp_softc *, int); static int carp_prepare_ad(struct mbuf *, struct carp_softc *, struct carp_header *); static void carp_send_ad_all(void); -static void carp_send_ad(void *); -static void carp_send_ad_locked(struct carp_softc *); +static void carp_send_ad_timeout(void *); +static void carp_send_ad(struct carp_softc *); static void carp_send_arp(struct carp_softc *); -static void carp_master_down(void *); -static void carp_master_down_locked(struct carp_softc *); +static void carp_master_down_timeout(void *); +static void carp_master_down(struct carp_softc *); static int carp_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *); static int carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); static void carp_start(struct ifnet *); static void carp_setrun(struct carp_softc *, sa_family_t); static void carp_set_state(struct carp_softc *, int); -static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int); static void carp_multicast_cleanup(struct carp_softc *); -static int carp_set_addr(struct carp_softc *, struct sockaddr_in *); -static int carp_del_addr(struct carp_softc *, struct sockaddr_in *); -static void carp_carpdev_state_locked(struct carp_if *); -static void carp_sc_state_locked(struct carp_softc *); +static void carp_add_addr(struct carp_softc *, struct ifaddr *); +static void carp_del_addr(struct carp_softc *, struct ifaddr *); +static void carp_config_addr(struct carp_softc *, struct ifaddr *); +static void carp_link_addrs(struct carp_softc *, struct ifnet *, + struct ifaddr *); +static void carp_unlink_addrs(struct carp_softc *, struct ifnet *, + struct ifaddr *); + +static int carp_get_vhaddr(struct carp_softc *, struct ifdrv *); +static int carp_config_vhaddr(struct carp_softc *, struct carp_vhaddr *); +static int carp_activate_vhaddr(struct carp_softc *, struct carp_vhaddr *, + struct ifnet *, const struct in_ifaddr *, int); +static void carp_deactivate_vhaddr(struct carp_softc *, + struct carp_vhaddr *); + +static void carp_sc_state(struct carp_softc *); #ifdef INET6 static void carp_send_na(struct carp_softc *); static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); static int carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *); static void carp_multicast6_cleanup(struct carp_softc *); #endif +static void carp_stop(struct carp_softc *, int); +static void carp_reset(struct carp_softc *, int); + +static void carp_ifaddr(void *, struct ifnet *, enum ifaddr_event, + struct ifaddr *); +static void carp_ifdetach(void *, struct ifnet *); static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces"); @@ -212,6 +235,50 @@ IF_CLONE_INITIALIZER(CARP_IFNAME, carp_clone_create, carp_clone_destroy, 0, IF_MAXUNIT); static eventhandler_tag carp_ifdetach_event; +static eventhandler_tag carp_ifaddr_event; + +static __inline void +ifa_set_prflags(struct ifaddr *ifa, uint16_t prflags) +{ + int cpu; + + for (cpu = 0; cpu < ncpus; ++cpu) + ifa->ifa_containers[cpu].ifa_prflags |= prflags; +} + +static __inline void +carp_insert_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha_new) +{ + struct carp_vhaddr *vha; + u_long new_addr, addr; + + KKASSERT((vha_new->vha_flags & CARP_VHAF_ONLIST) == 0); + + /* + * Virtual address list is sorted; smaller one first + */ + new_addr = ntohl(vha_new->vha_ia->ia_addr.sin_addr.s_addr); + + TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { + addr = ntohl(vha->vha_ia->ia_addr.sin_addr.s_addr); + + if (addr > new_addr) + break; + } + if (vha == NULL) + TAILQ_INSERT_TAIL(&sc->sc_vha_list, vha_new, vha_link); + else + TAILQ_INSERT_BEFORE(vha, vha_new, vha_link); + vha_new->vha_flags |= CARP_VHAF_ONLIST; +} + +static __inline void +carp_remove_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha) +{ + KKASSERT(vha->vha_flags & CARP_VHAF_ONLIST); + vha->vha_flags &= ~CARP_VHAF_ONLIST; + TAILQ_REMOVE(&sc->sc_vha_list, vha, vha_link); +} static void carp_hmac_prepare(struct carp_softc *sc) @@ -223,6 +290,9 @@ carp_hmac_prepare(struct carp_softc *sc) #ifdef INET6 struct in6_addr in6; #endif +#ifdef INET + struct carp_vhaddr *vha; +#endif /* XXX: possible race here */ @@ -239,17 +309,14 @@ carp_hmac_prepare(struct carp_softc *sc) SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); #ifdef INET - TAILQ_FOREACH(ifac, &SC2IFP(sc)->if_addrheads[mycpuid], ifa_link) { - struct ifaddr *ifa = ifac->ifa; - - if (ifa->ifa_addr->sa_family == AF_INET) - SHA1Update(&sc->sc_sha1, - (void *)&ifatoia(ifa)->ia_addr.sin_addr.s_addr, - sizeof(struct in_addr)); + TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { + SHA1Update(&sc->sc_sha1, + (const uint8_t *)&vha->vha_ia->ia_addr.sin_addr, + sizeof(struct in_addr)); } #endif /* INET */ #ifdef INET6 - TAILQ_FOREACH(ifac, &SC2IFP(sc)->if_addrheads[mycpuid], ifa_link) { + TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) { struct ifaddr *ifa = ifac->ifa; if (ifa->ifa_addr->sa_family == AF_INET6) { @@ -297,32 +364,22 @@ carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2], static void carp_setroute(struct carp_softc *sc, int cmd) { +#ifdef INET6 struct ifaddr_container *ifac; crit_enter(); - TAILQ_FOREACH(ifac, &SC2IFP(sc)->if_addrheads[mycpuid], ifa_link) { + TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) { struct ifaddr *ifa = ifac->ifa; - if (ifa->ifa_addr->sa_family == AF_INET && - sc->sc_carpdev != NULL) { - int count = carp_addrcount( - (struct carp_if *)sc->sc_carpdev->if_carp, - ifatoia(ifa), CARP_COUNT_MASTER); - - if ((cmd == RTM_ADD && count == 1) || - (cmd == RTM_DELETE && count == 0)) - rtinit(ifa, cmd, RTF_UP | RTF_HOST); - } -#ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) { if (cmd == RTM_ADD) in6_ifaddloop(ifa); else in6_ifremloop(ifa); } -#endif /* INET6 */ } crit_exit(); +#endif /* INET6 */ } static int @@ -332,15 +389,17 @@ carp_clone_create(struct if_clone *ifc, int unit) struct ifnet *ifp; sc = kmalloc(sizeof(*sc), M_CARP, M_WAITOK | M_ZERO); - ifp = sc->sc_ifp = &sc->sc_if; + ifp = &sc->sc_if; - sc->sc_flags_backup = 0; sc->sc_suppress = 0; sc->sc_advbase = CARP_DFLTINTV; sc->sc_vhid = -1; /* required setting */ sc->sc_advskew = 0; sc->sc_init_counter = 1; - sc->sc_naddrs = sc->sc_naddrs6 = 0; + sc->sc_naddrs = 0; + sc->sc_naddrs6 = 0; + + TAILQ_INIT(&sc->sc_vha_list); #ifdef INET6 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; @@ -375,58 +434,49 @@ carp_clone_destroy(struct ifnet *ifp) { struct carp_softc *sc = ifp->if_softc; - carpdetach(sc, 1); + sc->sc_dead = 1; + carp_detach(sc, 1); crit_enter(); LIST_REMOVE(sc, sc_next); crit_exit(); bpfdetach(ifp); if_detach(ifp); + + KASSERT(sc->sc_naddrs == 0, ("certain inet address is still active\n")); kfree(sc, M_CARP); } -/* - * This function can be called on CARP interface destroy path, - * and in case of the removal of the underlying interface as - * well. We differentiate these two cases. In the latter case - * we do not cleanup our multicast memberships, since they - * are already freed. - */ static void -carpdetach(struct carp_softc *sc, int unlock) +carp_detach(struct carp_softc *sc, int detach) { struct carp_if *cif; - callout_stop(&sc->sc_ad_tmo); - callout_stop(&sc->sc_md_tmo); - callout_stop(&sc->sc_md6_tmo); + carp_reset(sc, detach); - if (sc->sc_suppress) - carp_suppress_preempt--; - sc->sc_suppress = 0; - - if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) - carp_suppress_preempt--; - sc->sc_sendad_errors = 0; - - carp_set_state(sc, INIT); - SC2IFP(sc)->if_flags &= ~IFF_UP; - carp_setrun(sc, 0); - if (unlock) - carp_multicast_cleanup(sc); + carp_multicast_cleanup(sc); #ifdef INET6 carp_multicast6_cleanup(sc); #endif + if (!sc->sc_dead && detach) { + struct carp_vhaddr *vha; + + TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) + carp_deactivate_vhaddr(sc, vha); + KKASSERT(sc->sc_naddrs == 0); + } + if (sc->sc_carpdev != NULL) { - cif = (struct carp_if *)sc->sc_carpdev->if_carp; + cif = sc->sc_carpdev->if_carp; TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); - if (!--cif->vhif_nvrs) { + if (TAILQ_EMPTY(&cif->vhif_vrs)) { ifpromisc(sc->sc_carpdev, 0); sc->sc_carpdev->if_carp = NULL; - kfree(cif, M_IFADDR); + kfree(cif, M_CARP); } sc->sc_carpdev = NULL; + sc->sc_ia = NULL; } } @@ -434,16 +484,12 @@ carpdetach(struct carp_softc *sc, int unlock) static void carp_ifdetach(void *arg __unused, struct ifnet *ifp) { - struct carp_if *cif = (struct carp_if *)ifp->if_carp; - struct carp_softc *sc, *nextsc; - - if (cif == NULL) - return; + struct carp_if *cif = ifp->if_carp; + struct carp_softc *sc; - for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) { - nextsc = TAILQ_NEXT(sc, sc_list); - carpdetach(sc, 0); - } + while (ifp->if_carp && + (sc = TAILQ_FIRST(&cif->vhif_vrs)) != NULL) + carp_detach(sc, 1); } /* @@ -591,6 +637,7 @@ static void carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) { struct ifnet *ifp = m->m_pkthdr.rcvif; + struct ifnet *cifp; struct carp_softc *sc; uint64_t tmp_counter; struct timeval sc_tv, ch_tv; @@ -600,32 +647,32 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) if (sc->sc_vhid == ch->carp_vhid) break; - if (!sc || !CARP_IS_RUNNING(SC2IFP(sc))) { + if (!sc || !CARP_IS_RUNNING(&sc->sc_if)) { carpstats.carps_badvhid++; m_freem(m); return; } + cifp = &sc->sc_if; - getmicrotime(&SC2IFP(sc)->if_lastchange); - SC2IFP(sc)->if_ipackets++; - SC2IFP(sc)->if_ibytes += m->m_pkthdr.len; + getmicrotime(&cifp->if_lastchange); + cifp->if_ipackets++; + cifp->if_ibytes += m->m_pkthdr.len; - if (SC2IFP(sc)->if_bpf) { + if (cifp->if_bpf) { struct ip *ip = mtod(m, struct ip *); /* BPF wants net byte order */ ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2)); ip->ip_off = htons(ip->ip_off); - bpf_mtap(SC2IFP(sc)->if_bpf, m); + bpf_mtap(cifp->if_bpf, m); } /* verify the CARP version. */ if (ch->carp_version != CARP_VERSION) { carpstats.carps_badver++; - SC2IFP(sc)->if_ierrors++; - CARP_LOG("%s; invalid version %d\n", - SC2IFP(sc)->if_xname, - ch->carp_version); + cifp->if_ierrors++; + CARP_LOG("%s; invalid version %d\n", cifp->if_xname, + ch->carp_version); m_freem(m); return; } @@ -633,8 +680,8 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) /* verify the hash */ if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { carpstats.carps_badauth++; - SC2IFP(sc)->if_ierrors++; - CARP_LOG("%s: incorrect hash\n", SC2IFP(sc)->if_xname); + cifp->if_ierrors++; + CARP_LOG("%s: incorrect hash\n", cifp->if_xname); m_freem(m); return; } @@ -670,7 +717,7 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) callout_stop(&sc->sc_ad_tmo); CARP_DEBUG("%s: MASTER -> BACKUP " "(more frequent advertisement received)\n", - SC2IFP(sc)->if_xname); + cifp->if_xname); carp_set_state(sc, BACKUP); carp_setrun(sc, 0); carp_setroute(sc, RTM_DELETE); @@ -685,9 +732,8 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) if (carp_opts[CARPCTL_PREEMPT] && timevalcmp(&sc_tv, &ch_tv, <)) { CARP_DEBUG("%s: BACKUP -> MASTER " - "(preempting a slower master)\n", - SC2IFP(sc)->if_xname); - carp_master_down_locked(sc); + "(preempting a slower master)\n", cifp->if_xname); + carp_master_down(sc); break; } @@ -698,10 +744,9 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) */ sc_tv.tv_sec = sc->sc_advbase * 3; if (timevalcmp(&sc_tv, &ch_tv, <)) { - CARP_DEBUG("%s: BACKUP -> MASTER " - "(master timed out)\n", - SC2IFP(sc)->if_xname); - carp_master_down_locked(sc); + CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n", + cifp->if_xname); + carp_master_down(sc); break; } @@ -718,8 +763,8 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) static int carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) { + struct ifnet *cifp = &sc->sc_if; struct m_tag *mtag; - struct ifnet *ifp = SC2IFP(sc); if (sc->sc_init_counter) { /* this could also be seconds since unix epoch */ @@ -730,8 +775,8 @@ carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) sc->sc_counter++; } - ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); - ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); + ch->carp_counter[0] = htonl((sc->sc_counter >> 32) & 0xffffffff); + ch->carp_counter[1] = htonl(sc->sc_counter & 0xffffffff); carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); @@ -739,13 +784,13 @@ carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), MB_DONTWAIT); if (mtag == NULL) { m_freem(m); - SC2IFP(sc)->if_oerrors++; - return (ENOMEM); + cifp->if_oerrors++; + return ENOMEM; } - bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *)); + bcopy(&cifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *)); m_tag_prepend(m, mtag); - return (0); + return 0; } static void @@ -757,30 +802,29 @@ carp_send_ad_all(void) if (sc->sc_carpdev == NULL) continue; - if (CARP_IS_RUNNING(SC2IFP(sc)) && sc->sc_state == MASTER) - carp_send_ad_locked(sc); + if (CARP_IS_RUNNING(&sc->sc_if) && sc->sc_state == MASTER) + carp_send_ad(sc); } } static void -carp_send_ad(void *v) +carp_send_ad_timeout(void *xsc) { - struct carp_softc *sc = v; - - carp_send_ad_locked(sc); + carp_send_ad(xsc); } static void -carp_send_ad_locked(struct carp_softc *sc) +carp_send_ad(struct carp_softc *sc) { + struct ifnet *cifp = &sc->sc_if; struct carp_header ch; struct timeval tv; struct carp_header *ch_ptr; struct mbuf *m; int len, advbase, advskew; - /* bow out if we've lost our UPness or RUNNINGuiness */ - if (!CARP_IS_RUNNING(SC2IFP(sc))) { + if (!CARP_IS_RUNNING(cifp)) { + /* Bow out */ advbase = 255; advskew = 255; } else { @@ -803,17 +847,17 @@ carp_send_ad_locked(struct carp_softc *sc) ch.carp_cksum = 0; #ifdef INET - if (sc->sc_ia) { + if (sc->sc_ia != NULL) { struct ip *ip; - MGETHDR(m, M_NOWAIT, MT_HEADER); + MGETHDR(m, MB_DONTWAIT, MT_HEADER); if (m == NULL) { - SC2IFP(sc)->if_oerrors++; + cifp->if_oerrors++; carpstats.carps_onomem++; /* XXX maybe less ? */ if (advbase != 255 || advskew != 255) callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), - carp_send_ad, sc); + carp_send_ad_timeout, sc); return; } len = sizeof(*ip) + sizeof(ch); @@ -832,7 +876,7 @@ carp_send_ad_locked(struct carp_softc *sc) ip->ip_ttl = CARP_DFLTTL; ip->ip_p = IPPROTO_CARP; ip->ip_sum = 0; - ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr; + ip->ip_src = sc->sc_ia->ia_addr.sin_addr; ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); ch_ptr = (struct carp_header *)(&ip[1]); @@ -841,13 +885,13 @@ carp_send_ad_locked(struct carp_softc *sc) return; ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip)); - getmicrotime(&SC2IFP(sc)->if_lastchange); - SC2IFP(sc)->if_opackets++; - SC2IFP(sc)->if_obytes += len; + getmicrotime(&cifp->if_lastchange); + cifp->if_opackets++; + cifp->if_obytes += len; carpstats.carps_opackets++; if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) { - SC2IFP(sc)->if_oerrors++; + cifp->if_oerrors++; if (sc->sc_sendad_errors < INT_MAX) sc->sc_sendad_errors++; if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { @@ -874,14 +918,14 @@ carp_send_ad_locked(struct carp_softc *sc) if (sc->sc_ia6) { struct ip6_hdr *ip6; - MGETHDR(m, M_NOWAIT, MT_HEADER); + MGETHDR(m, MB_DONTWAIT, MT_HEADER); if (m == NULL) { - SC2IFP(sc)->if_oerrors++; + cifp->if_oerrors++; carpstats.carps_onomem++; /* XXX maybe less ? */ if (advbase != 255 || advskew != 255) callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), - carp_send_ad, sc); + carp_send_ad_timeout, sc); return; } len = sizeof(*ip6) + sizeof(ch); @@ -902,7 +946,7 @@ carp_send_ad_locked(struct carp_softc *sc) ip6->ip6_dst.s6_addr16[0] = htons(0xff02); ip6->ip6_dst.s6_addr8[15] = 0x12; if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { - SC2IFP(sc)->if_oerrors++; + cifp->if_oerrors++; m_freem(m); CARP_LOG("%s: in6_setscope failed\n", __func__); return; @@ -914,13 +958,13 @@ carp_send_ad_locked(struct carp_softc *sc) return; ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip6)); - getmicrotime(&SC2IFP(sc)->if_lastchange); - SC2IFP(sc)->if_opackets++; - SC2IFP(sc)->if_obytes += len; + getmicrotime(&cifp->if_lastchange); + cifp->if_opackets++; + cifp->if_obytes += len; carpstats.carps_opackets6++; if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) { - SC2IFP(sc)->if_oerrors++; + cifp->if_oerrors++; if (sc->sc_sendad_errors < INT_MAX) sc->sc_sendad_errors++; if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { @@ -946,7 +990,7 @@ carp_send_ad_locked(struct carp_softc *sc) if (advbase != 255 || advskew != 255) callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), - carp_send_ad, sc); + carp_send_ad_timeout, sc); } /* @@ -957,16 +1001,14 @@ carp_send_ad_locked(struct carp_softc *sc) static void carp_send_arp(struct carp_softc *sc) { - struct ifaddr_container *ifac; - - TAILQ_FOREACH(ifac, &SC2IFP(sc)->if_addrheads[mycpuid], ifa_link) { - struct ifaddr *ifa = ifac->ifa; + const struct carp_vhaddr *vha; - if (ifa->ifa_addr->sa_family != AF_INET) + TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { + if (vha->vha_iaback == NULL) continue; - arp_ifinit2(sc->sc_carpdev, ifa, IF_LLADDR(sc->sc_ifp)); - DELAY(1000); /* XXX */ + arp_iainit(sc->sc_carpdev, &vha->vha_ia->ia_addr.sin_addr, + IF_LLADDR(&sc->sc_if)); } } @@ -978,7 +1020,7 @@ carp_send_na(struct carp_softc *sc) struct in6_addr *in6; static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; - TAILQ_FOREACH(ifac, &SC2IFP(sc)->if_addrheads[mycpuid], ifa_link) { + TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) { struct ifaddr *ifa = ifac->ifa; if (ifa->ifa_addr->sa_family != AF_INET6) @@ -992,93 +1034,90 @@ carp_send_na(struct carp_softc *sc) } #endif /* INET6 */ +static __inline const struct carp_vhaddr * +carp_find_addr(const struct carp_softc *sc, const struct in_addr *addr) +{ + struct carp_vhaddr *vha; + + TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { + if (vha->vha_iaback == NULL) + continue; + + if (vha->vha_ia->ia_addr.sin_addr.s_addr == addr->s_addr) + return vha; + } + return NULL; +} + static int -carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type) +carp_iamatch_balance(const struct carp_if *cif, const struct in_addr *itaddr, + const struct in_addr *isaddr, uint8_t **enaddr) { - struct carp_softc *vh; - int count = 0; + const struct carp_softc *vh; + int index, count = 0; + + /* + * XXX proof of concept implementation. + * We use the source ip to decide which virtual host should + * handle the request. If we're master of that virtual host, + * then we respond, otherwise, just drop the arp packet on + * the floor. + */ TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { - if ((type == CARP_COUNT_RUNNING && - CARP_IS_RUNNING(SC2IFP(vh))) || - (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) { - struct ifaddr_container *ifac; - - TAILQ_FOREACH(ifac, &SC2IFP(vh)->if_addrheads[mycpuid], - ifa_link) { - struct ifaddr *ifa = ifac->ifa; - - if (ifa->ifa_addr->sa_family == AF_INET && - ia->ia_addr.sin_addr.s_addr == - ifatoia(ifa)->ia_addr.sin_addr.s_addr) - count++; + if (!CARP_IS_RUNNING(&vh->sc_if)) + continue; + + if (carp_find_addr(vh, itaddr) != NULL) + count++; + } + if (count == 0) + return 0; + + /* this should be a hash, like pf_hash() */ + index = ntohl(isaddr->s_addr) % count; + count = 0; + + TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { + if (!CARP_IS_RUNNING(&vh->sc_if)) + continue; + + if (carp_find_addr(vh, itaddr) == NULL) + continue; + + if (count == index) { + if (vh->sc_state == MASTER) { + *enaddr = IF_LLADDR(&vh->sc_if); + return 1; + } else { + return 0; } } + count++; } - return (count); + return 0; } int -carp_iamatch(void *v, struct in_ifaddr *ia, - struct in_addr *isaddr, uint8_t **enaddr) +carp_iamatch(const void *v, const struct in_addr *itaddr, + const struct in_addr *isaddr, uint8_t **enaddr) { - struct carp_if *cif = v; - struct carp_softc *vh; - int index, count = 0; + const struct carp_if *cif = v; + const struct carp_softc *vh; - if (carp_opts[CARPCTL_ARPBALANCE]) { - /* - * XXX proof of concept implementation. - * We use the source ip to decide which virtual host should - * handle the request. If we're master of that virtual host, - * then we respond, otherwise, just drop the arp packet on - * the floor. - */ - count = carp_addrcount(cif, ia, CARP_COUNT_RUNNING); - if (count == 0) { - /* should never reach this */ - return (0); - } + if (carp_opts[CARPCTL_ARPBALANCE]) + return carp_iamatch_balance(cif, itaddr, isaddr, enaddr); - /* this should be a hash, like pf_hash() */ - index = ntohl(isaddr->s_addr) % count; - count = 0; - - TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { - if (CARP_IS_RUNNING(SC2IFP(vh))) { - struct ifaddr_container *ifac; - - TAILQ_FOREACH(ifac, - &SC2IFP(vh)->if_addrheads[mycpuid], ifa_link) { - struct ifaddr *ifa = ifac->ifa; - - if (ifa->ifa_addr->sa_family == - AF_INET && - ia->ia_addr.sin_addr.s_addr == - ifatoia(ifa)->ia_addr.sin_addr.s_addr) { - if (count == index) { - if (vh->sc_state == MASTER) { - *enaddr = IF_LLADDR(vh->sc_ifp); - return (1); - } else { - return (0); - } - } - count++; - } - } - } - } - } else { - TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { - if (CARP_IS_RUNNING(SC2IFP(vh)) && - vh->sc_state == MASTER) { - *enaddr = IF_LLADDR(vh->sc_ifp); - return (1); - } + TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { + if (!CARP_IS_RUNNING(&vh->sc_if) || vh->sc_state != MASTER) + continue; + + if (carp_find_addr(vh, itaddr) != NULL) { + *enaddr = IF_LLADDR(&vh->sc_if); + return 1; } } - return(0); + return 0; } #ifdef INET6 @@ -1091,13 +1130,13 @@ carp_iamatch6(void *v, struct in6_addr *taddr) TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { struct ifaddr_container *ifac; - TAILQ_FOREACH(ifac, &SC2IFP(vh)->if_addrheads[mycpuid], + TAILQ_FOREACH(ifac, &vh->sc_if.if_addrheads[mycpuid], ifa_link) { struct ifaddr *ifa = ifac->ifa; if (IN6_ARE_ADDR_EQUAL(taddr, &ifatoia6(ifa)->ia_addr.sin6_addr) && - CARP_IS_RUNNING(SC2IFP(vh)) && + CARP_IS_RUNNING(&vh->sc_if) && vh->sc_state == MASTER) { return (ifa); } @@ -1116,25 +1155,26 @@ carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr) TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { struct ifaddr_container *ifac; - TAILQ_FOREACH(ifac, &SC2IFP(sc)->if_addrheads[mycpuid], + TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) { struct ifaddr *ifa = ifac->ifa; if (IN6_ARE_ADDR_EQUAL(taddr, &ifatoia6(ifa)->ia_addr.sin6_addr) && - CARP_IS_RUNNING(SC2IFP(sc))) { - struct ifnet *ifp = SC2IFP(sc); + CARP_IS_RUNNING(&sc->sc_if)) { + struct ifnet *ifp = &sc->sc_if; + mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), MB_DONTWAIT); if (mtag == NULL) { /* better a bit than nothing */ - return (IF_LLADDR(sc->sc_ifp)); + return (IF_LLADDR(ifp)); } bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *)); m_tag_prepend(m, mtag); - return (IF_LLADDR(sc->sc_ifp)); + return (IF_LLADDR(ifp)); } } } @@ -1142,47 +1182,43 @@ carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr) } #endif -struct ifnet * -carp_forus(void *v, void *dhost) +int +carp_forus(const void *v, const void *dhost) { - struct carp_if *cif = v; - struct carp_softc *vh; - uint8_t *ena = dhost; - - /** - * XXX: See here for check on MAC adr is not for virtual use - * - **/ + const struct carp_if *cif = v; + const struct carp_softc *vh; + const uint8_t *ena = dhost; if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) - return (NULL); + return 0; TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { - if (CARP_IS_RUNNING(SC2IFP(vh)) && vh->sc_state == MASTER && - !bcmp(dhost, IF_LLADDR(vh->sc_ifp), ETHER_ADDR_LEN)) { - return (SC2IFP(vh)); - } + const struct ifnet *cifp = &vh->sc_if; + + if (CARP_IS_RUNNING(cifp) && vh->sc_state == MASTER && + !bcmp(dhost, IF_LLADDR(cifp), ETHER_ADDR_LEN)) + return 1; } - return (NULL); + return 0; } static void -carp_master_down(void *v) +carp_master_down_timeout(void *xsc) { - struct carp_softc *sc = v; + struct carp_softc *sc = xsc; - lwkt_serialize_enter(sc->sc_ifp->if_serializer); - carp_master_down_locked(sc); - lwkt_serialize_exit(sc->sc_ifp->if_serializer); + CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n", + sc->sc_if.if_xname); + carp_master_down(sc); } static void -carp_master_down_locked(struct carp_softc *sc) +carp_master_down(struct carp_softc *sc) { switch (sc->sc_state) { case INIT: kprintf("%s: master_down event in INIT state\n", - SC2IFP(sc)->if_xname); + sc->sc_if.if_xname); break; case MASTER: @@ -1190,7 +1226,7 @@ carp_master_down_locked(struct carp_softc *sc) case BACKUP: carp_set_state(sc, MASTER); - carp_send_ad_locked(sc); + carp_send_ad(sc); carp_send_arp(sc); #ifdef INET6 carp_send_na(sc); @@ -1208,19 +1244,18 @@ carp_master_down_locked(struct carp_softc *sc) static void carp_setrun(struct carp_softc *sc, sa_family_t af) { + struct ifnet *cifp = &sc->sc_if; struct timeval tv; if (sc->sc_carpdev == NULL) { - SC2IFP(sc)->if_flags &= ~IFF_RUNNING; carp_set_state(sc, INIT); return; } - if (SC2IFP(sc)->if_flags & IFF_UP && - sc->sc_vhid > 0 && (sc->sc_naddrs || sc->sc_naddrs6)) { - SC2IFP(sc)->if_flags |= IFF_RUNNING; + if ((cifp->if_flags & IFF_RUNNING) && sc->sc_vhid > 0 && + (sc->sc_naddrs || sc->sc_naddrs6)) { + /* Nothing */ } else { - SC2IFP(sc)->if_flags &= ~IFF_RUNNING; carp_setroute(sc, RTM_DELETE); return; } @@ -1228,18 +1263,17 @@ carp_setrun(struct carp_softc *sc, sa_family_t af) switch (sc->sc_state) { case INIT: if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) { - carp_send_ad_locked(sc); + carp_send_ad(sc); carp_send_arp(sc); #ifdef INET6 carp_send_na(sc); #endif /* INET6 */ CARP_DEBUG("%s: INIT -> MASTER (preempting)\n", - SC2IFP(sc)->if_xname); + cifp->if_xname); carp_set_state(sc, MASTER); carp_setroute(sc, RTM_ADD); } else { - CARP_DEBUG("%s: INIT -> BACKUP\n", - SC2IFP(sc)->if_xname); + CARP_DEBUG("%s: INIT -> BACKUP\n", cifp->if_xname); carp_set_state(sc, BACKUP); carp_setroute(sc, RTM_DELETE); carp_setrun(sc, 0); @@ -1254,22 +1288,22 @@ carp_setrun(struct carp_softc *sc, sa_family_t af) #ifdef INET case AF_INET: callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv), - carp_master_down, sc); + carp_master_down_timeout, sc); break; #endif /* INET */ #ifdef INET6 case AF_INET6: callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv), - carp_master_down, sc); + carp_master_down_timeout, sc); break; #endif /* INET6 */ default: if (sc->sc_naddrs) callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv), - carp_master_down, sc); + carp_master_down_timeout, sc); if (sc->sc_naddrs6) callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv), - carp_master_down, sc); + carp_master_down_timeout, sc); break; } break; @@ -1278,7 +1312,7 @@ carp_setrun(struct carp_softc *sc, sa_family_t af) tv.tv_sec = sc->sc_advbase; tv.tv_usec = sc->sc_advskew * 1000000 / 256; callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), - carp_send_ad, sc); + carp_send_ad_timeout, sc); break; } } @@ -1287,15 +1321,13 @@ static void carp_multicast_cleanup(struct carp_softc *sc) { struct ip_moptions *imo = &sc->sc_imo; - uint16_t n = imo->imo_num_memberships; - /* Clean up our own multicast memberships */ - while (n-- > 0) { - if (imo->imo_membership[n] != NULL) { - in_delmulti(imo->imo_membership[n]); - imo->imo_membership[n] = NULL; - } - } + if (imo->imo_num_memberships == 0) + return; + KKASSERT(imo->imo_num_memberships == 1); + + in_delmulti(imo->imo_membership[0]); + imo->imo_membership[0] = NULL; imo->imo_num_memberships = 0; imo->imo_multicast_ifp = NULL; } @@ -1318,144 +1350,205 @@ carp_multicast6_cleanup(struct carp_softc *sc) #endif static int -carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin) +carp_get_vhaddr(struct carp_softc *sc, struct ifdrv *ifd) +{ + const struct carp_vhaddr *vha; + struct ifcarpvhaddr *carpa, *carpa0; + int count, len, error; + + count = 0; + TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) + ++count; + + if (ifd->ifd_len == 0) { + ifd->ifd_len = count * sizeof(*carpa); + return 0; + } else if (count == 0 || ifd->ifd_len < sizeof(*carpa)) { + ifd->ifd_len = 0; + return 0; + } + len = min(ifd->ifd_len, sizeof(*carpa) * count); + KKASSERT(len >= sizeof(*carpa)); + + carpa0 = carpa = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO); + if (carpa == NULL) + return ENOMEM; + + count = 0; + TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { + if (len < sizeof(*carpa)) + break; + + carpa->carpa_flags = vha->vha_flags; + carpa->carpa_addr.sin_family = AF_INET; + carpa->carpa_addr.sin_addr = vha->vha_ia->ia_addr.sin_addr; + + carpa->carpa_baddr.sin_family = AF_INET; + if (vha->vha_iaback == NULL) { + carpa->carpa_baddr.sin_addr.s_addr = INADDR_ANY; + } else { + carpa->carpa_baddr.sin_addr = + vha->vha_iaback->ia_addr.sin_addr; + } + + ++carpa; + ++count; + len -= sizeof(*carpa); + } + ifd->ifd_len = sizeof(*carpa) * count; + KKASSERT(ifd->ifd_len > 0); + + error = copyout(carpa0, ifd->ifd_data, ifd->ifd_len); + kfree(carpa0, M_TEMP); + return error; +} + +static int +carp_config_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha) { struct ifnet *ifp; - struct carp_if *cif; - struct in_ifaddr *ia, *ia_if; + struct in_ifaddr *ia_if; struct in_ifaddr_container *iac; - struct ip_moptions *imo = &sc->sc_imo; - struct in_addr addr; - u_long iaddr = htonl(sin->sin_addr.s_addr); - int own, error; - - if (sin->sin_addr.s_addr == 0) { - if (!(SC2IFP(sc)->if_flags & IFF_UP)) - carp_set_state(sc, INIT); - if (sc->sc_naddrs) - SC2IFP(sc)->if_flags |= IFF_UP; - carp_setrun(sc, 0); - return (0); - } - /* we have to do it by hands to check we won't match on us */ - ia_if = NULL; own = 0; + const struct sockaddr_in *sin; + u_long iaddr; + int own; + + KKASSERT(vha->vha_ia != NULL); + + sin = &vha->vha_ia->ia_addr; + iaddr = ntohl(sin->sin_addr.s_addr); + + ia_if = NULL; + own = 0; TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) { - ia = iac->ia; + struct in_ifaddr *ia = iac->ia; + + if ((ia->ia_flags & IFA_ROUTE) == 0) + continue; + + if (ia->ia_ifp->if_type == IFT_CARP) + continue; /* and, yeah, we need a multicast-capable iface too */ - if (ia->ia_ifp != SC2IFP(sc) && - (ia->ia_ifp->if_flags & IFF_MULTICAST) && - (iaddr & ia->ia_subnetmask) == ia->ia_subnet) { - if (!ia_if) - ia_if = ia; + if ((ia->ia_ifp->if_flags & IFF_MULTICAST) == 0) + continue; + + if ((iaddr & ia->ia_subnetmask) == ia->ia_subnet) { if (sin->sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr) - own++; + own = 1; + if (ia_if == NULL) + ia_if = ia; + else if (sc->sc_carpdev != NULL && + sc->sc_carpdev == ia->ia_ifp) + ia_if = ia; } } + carp_deactivate_vhaddr(sc, vha); if (!ia_if) - return (EADDRNOTAVAIL); + return ENOENT; - ia = ia_if; - ifp = ia->ia_ifp; + ifp = ia_if->ia_ifp; - if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || - (imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp)) - return (EADDRNOTAVAIL); + /* XXX Don't allow parent iface to be changed */ + if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp) + return EEXIST; - if (imo->imo_num_memberships == 0) { - addr.s_addr = htonl(INADDR_CARP_GROUP); - if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) == NULL) - return (ENOBUFS); - imo->imo_num_memberships++; - imo->imo_multicast_ifp = ifp; - imo->imo_multicast_ttl = CARP_DFLTTL; - imo->imo_multicast_loop = 0; - } + return carp_activate_vhaddr(sc, vha, ifp, ia_if, own); +} - if (!ifp->if_carp) { - cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO); - if ((error = ifpromisc(ifp, 1))) { - kfree(cif, M_CARP); - goto cleanup; - } - - cif->vhif_ifp = ifp; - TAILQ_INIT(&cif->vhif_vrs); - ifp->if_carp = cif; - } else { - struct carp_softc *vr; +static void +carp_add_addr(struct carp_softc *sc, struct ifaddr *carp_ifa) +{ + struct carp_vhaddr *vha_new; + struct in_ifaddr *carp_ia; +#ifdef INVARIANTS + struct carp_vhaddr *vha; +#endif - cif = (struct carp_if *)ifp->if_carp; - TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { - if (vr != sc && vr->sc_vhid == sc->sc_vhid) { - error = EINVAL; - goto cleanup; - } - } - } - sc->sc_ia = ia; - sc->sc_carpdev = ifp; + KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET); + carp_ia = ifatoia(carp_ifa); - { /* XXX prevent endless loop if already in queue */ - struct carp_softc *vr, *after = NULL; - int myself = 0; - cif = (struct carp_if *)ifp->if_carp; +#ifdef INVARIANTS + TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) + KKASSERT(vha->vha_ia != NULL && vha->vha_ia != carp_ia); +#endif - TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { - if (vr == sc) - myself = 1; - if (vr->sc_vhid < sc->sc_vhid) - after = vr; - } + vha_new = kmalloc(sizeof(*vha_new), M_CARP, M_WAITOK | M_ZERO); + vha_new->vha_ia = carp_ia; + carp_insert_vhaddr(sc, vha_new); - if (!myself) { - /* We're trying to keep things in order */ - if (after == NULL) - TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); - else - TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); - cif->vhif_nvrs++; - } + if (carp_config_vhaddr(sc, vha_new) != 0) { + /* + * If the above configuration fails, it may only mean + * that the new address is problematic. However, the + * carp(4) interface may already have several working + * addresses. Since the expected behaviour of + * SIOC[AS]IFADDR is to put the NIC into working state, + * we try starting the state machine manually here with + * the hope that the carp(4)'s previously working + * addresses still could be brought up. + */ + carp_hmac_prepare(sc); + carp_set_state(sc, INIT); + carp_setrun(sc, 0); } +} - sc->sc_naddrs++; - SC2IFP(sc)->if_flags |= IFF_UP; - if (own) - sc->sc_advskew = 0; +static void +carp_del_addr(struct carp_softc *sc, struct ifaddr *carp_ifa) +{ + struct carp_vhaddr *vha; + struct in_ifaddr *carp_ia; - carp_sc_state_locked(sc); - carp_setrun(sc, 0); + KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET); + carp_ia = ifatoia(carp_ifa); - return (0); + TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { + KKASSERT(vha->vha_ia != NULL); + if (vha->vha_ia == carp_ia) + break; + } + KASSERT(vha != NULL, ("no corresponding vhaddr %p\n", carp_ifa)); -cleanup: - in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); - return (error); + /* + * Remove the vhaddr from the list before deactivating + * the vhaddr, so that the HMAC could be correctly + * updated in carp_deactivate_vhaddr() + */ + carp_remove_vhaddr(sc, vha); + + carp_deactivate_vhaddr(sc, vha); + kfree(vha, M_CARP); } -static int -carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin) +static void +carp_config_addr(struct carp_softc *sc, struct ifaddr *carp_ifa) { - int error = 0; + struct carp_vhaddr *vha; + struct in_ifaddr *carp_ia; - if (!--sc->sc_naddrs) { - struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp; - struct ip_moptions *imo = &sc->sc_imo; + KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET); + carp_ia = ifatoia(carp_ifa); - callout_stop(&sc->sc_ad_tmo); - SC2IFP(sc)->if_flags &= ~(IFF_UP | IFF_RUNNING); - sc->sc_vhid = -1; - in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); - imo->imo_multicast_ifp = NULL; - TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); - if (!--cif->vhif_nvrs) { - sc->sc_carpdev->if_carp = NULL; - kfree(cif, M_IFADDR); - } + TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { + KKASSERT(vha->vha_ia != NULL); + if (vha->vha_ia == carp_ia) + break; + } + KASSERT(vha != NULL, ("no corresponding vhaddr %p\n", carp_ifa)); + + /* Remove then reinsert, to keep the vhaddr list sorted */ + carp_remove_vhaddr(sc, vha); + carp_insert_vhaddr(sc, vha); + + if (carp_config_vhaddr(sc, vha) != 0) { + /* See the comment in carp_add_addr() */ + carp_hmac_prepare(sc); + carp_set_state(sc, INIT); + carp_setrun(sc, 0); } - return (error); } #ifdef INET6 @@ -1471,10 +1564,6 @@ carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) int own, error; if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { - if (!(SC2IFP(sc)->if_flags & IFF_UP)) - carp_set_state(sc, INIT); - if (sc->sc_naddrs6) - SC2IFP(sc)->if_flags |= IFF_UP; carp_setrun(sc, 0); return (0); } @@ -1492,7 +1581,7 @@ carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) break; } /* and, yeah, we need a multicast-capable iface too */ - if (ia->ia_ifp != SC2IFP(sc) && + if (ia->ia_ifp != &sc->sc_if && (ia->ia_ifp->if_flags & IFF_MULTICAST) && (i == 4)) { if (!ia_if) @@ -1541,18 +1630,18 @@ carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) if (!ifp->if_carp) { cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO); + if ((error = ifpromisc(ifp, 1))) { kfree(cif, M_CARP); goto cleanup; } - cif->vhif_ifp = ifp; TAILQ_INIT(&cif->vhif_vrs); ifp->if_carp = cif; } else { struct carp_softc *vr; - cif = (struct carp_if *)ifp->if_carp; + cif = ifp->if_carp; TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { if (vr != sc && vr->sc_vhid == sc->sc_vhid) { error = EINVAL; @@ -1566,7 +1655,7 @@ carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) { /* XXX prevent endless loop if already in queue */ struct carp_softc *vr, *after = NULL; int myself = 0; - cif = (struct carp_if *)ifp->if_carp; + cif = ifp->if_carp; TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { if (vr == sc) @@ -1581,15 +1670,13 @@ carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); else TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); - cif->vhif_nvrs++; } } sc->sc_naddrs6++; - SC2IFP(sc)->if_flags |= IFF_UP; if (own) sc->sc_advskew = 0; - carp_sc_state_locked(sc); + carp_sc_state(sc); carp_setrun(sc, 0); return (0); @@ -1612,11 +1699,10 @@ carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) int error = 0; if (!--sc->sc_naddrs6) { - struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp; + struct carp_if *cif = sc->sc_carpdev->if_carp; struct ip6_moptions *im6o = &sc->sc_im6o; callout_stop(&sc->sc_ad_tmo); - SC2IFP(sc)->if_flags &= ~(IFF_UP | IFF_RUNNING); sc->sc_vhid = -1; while (!LIST_EMPTY(&im6o->im6o_memberships)) { struct in6_multi_mship *imm = @@ -1627,7 +1713,7 @@ carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) } im6o->im6o_multicast_ifp = NULL; TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); - if (!--cif->vhif_nvrs) { + if (TAILQ_EMPTY(&cif->vhif_vrs)) { sc->sc_carpdev->if_carp = NULL; kfree(cif, M_IFADDR); } @@ -1644,26 +1730,27 @@ carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr) struct ifaddr *ifa; struct ifreq *ifr; struct ifaliasreq *ifra; + struct ifdrv *ifd; + char devname[IFNAMSIZ]; int error = 0; ifa = (struct ifaddr *)addr; ifra = (struct ifaliasreq *)addr; ifr = (struct ifreq *)addr; + ifd = (struct ifdrv *)addr; switch (cmd) { case SIOCSIFADDR: switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: - SC2IFP(sc)->if_flags |= IFF_UP; - bcopy(ifa->ifa_addr, ifa->ifa_dstaddr, - sizeof(struct sockaddr)); - error = carp_set_addr(sc, satosin(ifa->ifa_addr)); + ifp->if_flags |= IFF_UP | IFF_RUNNING; + ifa_set_prflags(ifa, IA_PRF_RTEXISTOK); break; #endif /* INET */ #ifdef INET6 case AF_INET6: - SC2IFP(sc)->if_flags |= IFF_UP; + ifp->if_flags |= IFF_UP | IFF_RUNNING; error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); break; #endif /* INET6 */ @@ -1677,15 +1764,11 @@ carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr) switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: - SC2IFP(sc)->if_flags |= IFF_UP; - bcopy(ifa->ifa_addr, ifa->ifa_dstaddr, - sizeof(struct sockaddr)); - error = carp_set_addr(sc, satosin(&ifra->ifra_addr)); - break; + panic("SIOCAIFADDR should never be seen\n"); #endif /* INET */ #ifdef INET6 case AF_INET6: - SC2IFP(sc)->if_flags |= IFF_UP; + ifp->if_flags |= IFF_UP | IFF_RUNNING; error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr)); break; #endif /* INET6 */ @@ -1699,8 +1782,7 @@ carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr) switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: - error = carp_del_addr(sc, satosin(&ifra->ifra_addr)); - break; + panic("SIOCDIFADDR should never be seen\n"); #endif /* INET */ #ifdef INET6 case AF_INET6: @@ -1714,17 +1796,14 @@ carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr) break; case SIOCSIFFLAGS: - if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) { - callout_stop(&sc->sc_ad_tmo); - callout_stop(&sc->sc_md_tmo); - callout_stop(&sc->sc_md6_tmo); - if (sc->sc_state == MASTER) - carp_send_ad_locked(sc); - carp_set_state(sc, INIT); - carp_setrun(sc, 0); - } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) { - SC2IFP(sc)->if_flags |= IFF_UP; - carp_setrun(sc, 0); + if (ifp->if_flags & IFF_UP) { + if ((ifp->if_flags & IFF_RUNNING) == 0) { + ifp->if_flags |= IFF_RUNNING; + carp_set_state(sc, INIT); + carp_setrun(sc, 0); + } + } else if (ifp->if_flags & IFF_RUNNING) { + carp_stop(sc, 0); } break; @@ -1732,10 +1811,13 @@ carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr) error = suser_cred(cr, NULL_CRED_OKAY); if (error) break; - if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) + error = copyin(ifr->ifr_data, &carpr, sizeof(carpr)); + if (error) break; + error = 1; - if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) { + if ((ifp->if_flags & IFF_RUNNING) && + sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) { switch (carpr.carpr_state) { case BACKUP: callout_stop(&sc->sc_ad_tmo); @@ -1745,7 +1827,7 @@ carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr) break; case MASTER: - carp_master_down_locked(sc); + carp_master_down(sc); break; default: @@ -1758,8 +1840,8 @@ carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr) break; } if (sc->sc_carpdev) { - struct carp_if *cif; - cif = (struct carp_if *)sc->sc_carpdev->if_carp; + struct carp_if *cif = sc->sc_carpdev->if_carp; + TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { if (vr != sc && vr->sc_vhid == carpr.carpr_vhid) @@ -1767,12 +1849,12 @@ carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr) } } sc->sc_vhid = carpr.carpr_vhid; - IF_LLADDR(sc->sc_ifp)[0] = 0; - IF_LLADDR(sc->sc_ifp)[1] = 0; - IF_LLADDR(sc->sc_ifp)[2] = 0x5e; - IF_LLADDR(sc->sc_ifp)[3] = 0; - IF_LLADDR(sc->sc_ifp)[4] = 1; - IF_LLADDR(sc->sc_ifp)[5] = sc->sc_vhid; + IF_LLADDR(ifp)[0] = 0; + IF_LLADDR(ifp)[1] = 0; + IF_LLADDR(ifp)[2] = 0x5e; + IF_LLADDR(ifp)[3] = 0; + IF_LLADDR(ifp)[4] = 1; + IF_LLADDR(ifp)[5] = sc->sc_vhid; error--; } if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) { @@ -1803,19 +1885,56 @@ carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr) carpr.carpr_vhid = sc->sc_vhid; carpr.carpr_advbase = sc->sc_advbase; carpr.carpr_advskew = sc->sc_advskew; + error = suser_cred(cr, NULL_CRED_OKAY); if (error == 0) { bcopy(sc->sc_key, carpr.carpr_key, - sizeof(carpr.carpr_key)); + sizeof(carpr.carpr_key)); } + error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); break; + case SIOCGDRVSPEC: + switch (ifd->ifd_cmd) { + case CARPGDEVNAME: + if (ifd->ifd_len != sizeof(devname)) + error = EINVAL; + break; + + case CARPGVHADDR: + break; + + default: + error = EINVAL; + break; + } + if (error) + break; + + switch (ifd->ifd_cmd) { + case CARPGVHADDR: + error = carp_get_vhaddr(sc, ifd); + break; + + case CARPGDEVNAME: + bzero(devname, sizeof(devname)); + if (sc->sc_carpdev != NULL) { + strlcpy(devname, sc->sc_carpdev->if_xname, + sizeof(devname)); + } + error = copyout(devname, ifd->ifd_data, + sizeof(devname)); + break; + } + break; + default: error = EINVAL; + break; } carp_hmac_prepare(sc); - return (error); + return error; } /* @@ -1927,49 +2046,42 @@ carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, static void carp_set_state(struct carp_softc *sc, int state) { + struct ifnet *cifp = &sc->sc_if; + if (sc->sc_state == state) return; - sc->sc_state = state; - switch (state) { + + switch (sc->sc_state) { case BACKUP: - SC2IFP(sc)->if_link_state = LINK_STATE_DOWN; + cifp->if_link_state = LINK_STATE_DOWN; break; case MASTER: - SC2IFP(sc)->if_link_state = LINK_STATE_UP; + cifp->if_link_state = LINK_STATE_UP; break; default: - SC2IFP(sc)->if_link_state = LINK_STATE_UNKNOWN; + cifp->if_link_state = LINK_STATE_UNKNOWN; break; } - rt_ifmsg(SC2IFP(sc)); + rt_ifmsg(cifp); } void carp_carpdev_state(void *v) { struct carp_if *cif = v; - - carp_carpdev_state_locked(cif); -} - -static void -carp_carpdev_state_locked(struct carp_if *cif) -{ struct carp_softc *sc; TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) - carp_sc_state_locked(sc); + carp_sc_state(sc); } static void -carp_sc_state_locked(struct carp_softc *sc) +carp_sc_state(struct carp_softc *sc) { if (!(sc->sc_carpdev->if_flags & IFF_UP)) { - sc->sc_flags_backup = SC2IFP(sc)->if_flags; - SC2IFP(sc)->if_flags &= ~(IFF_UP | IFF_RUNNING); callout_stop(&sc->sc_ad_tmo); callout_stop(&sc->sc_md_tmo); callout_stop(&sc->sc_md6_tmo); @@ -1982,7 +2094,6 @@ carp_sc_state_locked(struct carp_softc *sc) } sc->sc_suppress = 1; } else { - SC2IFP(sc)->if_flags |= sc->sc_flags_backup; carp_set_state(sc, INIT); carp_setrun(sc, 0); if (sc->sc_suppress) @@ -1991,6 +2102,366 @@ carp_sc_state_locked(struct carp_softc *sc) } } +static void +carp_stop(struct carp_softc *sc, int detach) +{ + sc->sc_if.if_flags &= ~IFF_RUNNING; + + callout_stop(&sc->sc_ad_tmo); + callout_stop(&sc->sc_md_tmo); + callout_stop(&sc->sc_md6_tmo); + + if (!detach && sc->sc_state == MASTER) + carp_send_ad(sc); + + if (sc->sc_suppress) + carp_suppress_preempt--; + sc->sc_suppress = 0; + + if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) + carp_suppress_preempt--; + sc->sc_sendad_errors = 0; + sc->sc_sendad_success = 0; + + carp_set_state(sc, INIT); + carp_setrun(sc, 0); +} + +static void +carp_reset(struct carp_softc *sc, int detach) +{ + struct ifnet *cifp = &sc->sc_if; + + carp_stop(sc, detach); + if (!sc->sc_dead && (cifp->if_flags & IFF_UP)) + cifp->if_flags |= IFF_RUNNING; +} + +static int +carp_activate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha, + struct ifnet *ifp, const struct in_ifaddr *ia_if, int own) +{ + struct ip_moptions *imo = &sc->sc_imo; + struct carp_if *cif; + struct carp_softc *vr, *after = NULL; + int onlist, error; +#ifdef INVARIANTS + int assert_onlist; +#endif + + KKASSERT(vha->vha_ia != NULL); + + KASSERT(ia_if != NULL, ("NULL backing address\n")); + KASSERT(vha->vha_iaback == NULL, ("%p is already activated\n", vha)); + KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0, + ("inactive vhaddr %p is the address owner\n", vha)); + + KASSERT(sc->sc_carpdev == NULL || sc->sc_carpdev == ifp, + ("%s is already on %s\n", sc->sc_if.if_xname, + sc->sc_carpdev->if_xname)); + + KASSERT(imo->imo_multicast_ifp == NULL || + imo->imo_multicast_ifp == ifp, + ("%s didn't leave mcast group on %s\n", + sc->sc_if.if_xname, imo->imo_multicast_ifp->if_xname)); + + if (imo->imo_num_memberships == 0) { + struct in_addr addr; + + addr.s_addr = htonl(INADDR_CARP_GROUP); + if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) == NULL) + return ENOBUFS; + imo->imo_num_memberships++; + imo->imo_multicast_ifp = ifp; + imo->imo_multicast_ttl = CARP_DFLTTL; + imo->imo_multicast_loop = 0; + } + + if (!ifp->if_carp) { + KASSERT(sc->sc_carpdev == NULL, + ("%s is already on %s\n", sc->sc_if.if_xname, + sc->sc_carpdev->if_xname)); + + cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO); + + error = ifpromisc(ifp, 1); + if (error) { + kfree(cif, M_CARP); + goto cleanup; + } + + TAILQ_INIT(&cif->vhif_vrs); + ifp->if_carp = cif; + } else { + cif = ifp->if_carp; + TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { + if (vr != sc && vr->sc_vhid == sc->sc_vhid) { + error = EINVAL; + goto cleanup; + } + } + } + +#ifdef INVARIANTS + if (sc->sc_carpdev != NULL) + assert_onlist = 1; + else + assert_onlist = 0; +#endif + sc->sc_ia = ia_if; + sc->sc_carpdev = ifp; + + cif = ifp->if_carp; + onlist = 0; + TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { + if (vr == sc) + onlist = 1; + if (vr->sc_vhid < sc->sc_vhid) + after = vr; + } + +#ifdef INVARIANTS + if (assert_onlist) { + KASSERT(onlist, ("%s is not on %s carp list\n", + sc->sc_if.if_xname, ifp->if_xname)); + } else { + KASSERT(!onlist, ("%s is already on %s carp list\n", + sc->sc_if.if_xname, ifp->if_xname)); + } +#endif + + if (!onlist) { + /* We're trying to keep things in order */ + if (after == NULL) + TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); + else + TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); + } + + vha->vha_iaback = ia_if; + sc->sc_naddrs++; + + if (own) { + vha->vha_flags |= CARP_VHAF_OWNER; + + /* XXX save user configured advskew? */ + sc->sc_advskew = 0; + } + + carp_hmac_prepare(sc); + carp_set_state(sc, INIT); + carp_setrun(sc, 0); + return 0; +cleanup: + carp_multicast_cleanup(sc); + return error; +} + +static void +carp_deactivate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha) +{ + KKASSERT(vha->vha_ia != NULL); + + carp_hmac_prepare(sc); + + if (vha->vha_iaback == NULL) { + KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0, + ("inactive vhaddr %p is the address owner\n", vha)); + return; + } + + vha->vha_flags &= ~CARP_VHAF_OWNER; + + KKASSERT(sc->sc_naddrs > 0); + vha->vha_iaback = NULL; + sc->sc_naddrs--; + if (!sc->sc_naddrs) { + if (sc->sc_naddrs6) { + carp_multicast_cleanup(sc); + sc->sc_ia = NULL; + } else { + carp_detach(sc, 0); + } + } +} + +static void +carp_link_addrs(struct carp_softc *sc, struct ifnet *ifp, struct ifaddr *ifa_if) +{ + struct carp_vhaddr *vha; + struct in_ifaddr *ia_if; + + KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET); + ia_if = ifatoia(ifa_if); + + if ((ia_if->ia_flags & IFA_ROUTE) == 0) + return; + + /* + * Test each inactive vhaddr against the newly added address. + * If the newly added address could be the backing address, + * then activate the matching vhaddr. + */ + TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { + const struct in_ifaddr *ia; + u_long iaddr; + int own; + + if (vha->vha_iaback != NULL) + continue; + + ia = vha->vha_ia; + iaddr = ntohl(ia->ia_addr.sin_addr.s_addr); + + if ((iaddr & ia_if->ia_subnetmask) != ia_if->ia_subnet) + continue; + + own = 0; + if (ia->ia_addr.sin_addr.s_addr == + ia_if->ia_addr.sin_addr.s_addr) + own = 1; + + carp_activate_vhaddr(sc, vha, ifp, ia_if, own); + } +} + +static void +carp_unlink_addrs(struct carp_softc *sc, struct ifnet *ifp, + struct ifaddr *ifa_if) +{ + struct carp_vhaddr *vha; + struct in_ifaddr *ia_if; + + KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET); + ia_if = ifatoia(ifa_if); + + /* + * Ad src address is deleted; set it to NULL. + * Following loop will try pick up a new ad src address + * if one of the vhaddr could retain its backing address. + */ + if (sc->sc_ia == ia_if) + sc->sc_ia = NULL; + + /* + * Test each active vhaddr against the deleted address. + * If the deleted address is vhaddr address's backing + * address, then deactivate the vhaddr. + */ + TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { + if (vha->vha_iaback == NULL) + continue; + + if (vha->vha_iaback == ia_if) + carp_deactivate_vhaddr(sc, vha); + else if (sc->sc_ia == NULL) + sc->sc_ia = vha->vha_iaback; + } +} + +static void +carp_update_addrs(struct carp_softc *sc) +{ + struct carp_vhaddr *vha; + + KKASSERT(sc->sc_carpdev == NULL); + + TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) + carp_config_vhaddr(sc, vha); +} + +static void +carp_ifaddr(void *arg __unused, struct ifnet *ifp, + enum ifaddr_event event, struct ifaddr *ifa) +{ + struct carp_softc *sc; + + if (ifa->ifa_addr->sa_family != AF_INET) + return; + + if (ifp->if_type == IFT_CARP) { + /* + * Address is changed on carp(4) interface + */ + switch (event) { + case IFADDR_EVENT_ADD: + carp_add_addr(ifp->if_softc, ifa); + break; + + case IFADDR_EVENT_CHANGE: + carp_config_addr(ifp->if_softc, ifa); + break; + + case IFADDR_EVENT_DELETE: + carp_del_addr(ifp->if_softc, ifa); + break; + } + return; + } + + /* + * Address is changed on non-carp(4) interface + */ + if ((ifp->if_flags & IFF_MULTICAST) == 0) + return; + + crit_enter(); + LIST_FOREACH(sc, &carpif_list, sc_next) { + if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp) { + /* Not the parent iface; skip */ + continue; + } + + switch (event) { + case IFADDR_EVENT_ADD: + carp_link_addrs(sc, ifp, ifa); + break; + + case IFADDR_EVENT_DELETE: + if (sc->sc_carpdev != NULL) { + carp_unlink_addrs(sc, ifp, ifa); + if (sc->sc_carpdev == NULL) + carp_update_addrs(sc); + } else { + /* + * The carp(4) interface didn't have a + * parent iface, so it is not possible + * that it will contain any address to + * be unlinked. + */ + } + break; + + case IFADDR_EVENT_CHANGE: + if (sc->sc_carpdev == NULL) { + /* + * The carp(4) interface didn't have a + * parent iface, so it is not possible + * that it will contain any address to + * be updated. + */ + carp_link_addrs(sc, ifp, ifa); + } else { + /* + * First try breaking tie with the old + * address. Then see whether we could + * link certain vhaddr to the new address. + * If that fails, i.e. carpdev is NULL, + * we try a global update. + * + * NOTE: The above order is critical. + */ + carp_unlink_addrs(sc, ifp, ifa); + carp_link_addrs(sc, ifp, ifa); + if (sc->sc_carpdev == NULL) + carp_update_addrs(sc); + } + break; + } + } + crit_exit(); +} + static int carp_modevent(module_t mod, int type, void *data) { @@ -2000,12 +2471,17 @@ carp_modevent(module_t mod, int type, void *data) carp_ifdetach_event = EVENTHANDLER_REGISTER(ifnet_detach_event, carp_ifdetach, NULL, EVENTHANDLER_PRI_ANY); + carp_ifaddr_event = + EVENTHANDLER_REGISTER(ifaddr_event, carp_ifaddr, NULL, + EVENTHANDLER_PRI_ANY); if_clone_attach(&carp_cloner); break; case MOD_UNLOAD: EVENTHANDLER_DEREGISTER(ifnet_detach_event, carp_ifdetach_event); + EVENTHANDLER_DEREGISTER(ifaddr_event, + carp_ifaddr_event); if_clone_detach(&carp_cloner); break; diff --git a/sys/netinet/ip_carp.h b/sys/netinet/ip_carp.h index 3c5a56df46..2560ab6eaa 100644 --- a/sys/netinet/ip_carp.h +++ b/sys/netinet/ip_carp.h @@ -132,8 +132,20 @@ struct carpreq { int carpr_advbase; unsigned char carpr_key[CARP_KEY_LEN]; }; -#define SIOCSVH _IOWR('i', 245, struct ifreq) -#define SIOCGVH _IOWR('i', 246, struct ifreq) +#define SIOCSVH _IOWR('i', 245, struct ifreq) +#define SIOCGVH _IOWR('i', 246, struct ifreq) + + +struct ifcarpvhaddr { + uint32_t carpa_flags; /* CARP_VHAF_ */ + struct sockaddr_in carpa_addr; /* carp address */ + struct sockaddr_in carpa_baddr; /* backing address */ +}; +#define CARP_VHAF_OWNER 0x1 +#define CARP_VHAF_ONLIST 0x2 + +#define CARPGDEVNAME 0 /* SIOCGDRVSPEC char[IFNAMSIZ] */ +#define CARPGVHADDR 1 /* SIOCGDRVSPEC ifcarpvhaddr array */ /* * Names for CARP sysctl objects @@ -160,11 +172,11 @@ void carp_input(struct mbuf *, ...); int carp6_input(struct mbuf **, int *, int); int carp_output(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); -int carp_iamatch(void *, struct in_ifaddr *, struct in_addr *, - uint8_t **); +int carp_iamatch(const void *, const struct in_addr *, + const struct in_addr *, uint8_t **); struct ifaddr *carp_iamatch6(void *, struct in6_addr *); void *carp_macmatch6(void *, struct mbuf *, const struct in6_addr *); -struct ifnet *carp_forus(void *, void *); +int carp_forus(const void *, const void *); #endif /* _KERNEL */ #endif /* _IP_CARP_H */ diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c index 71316a30db..4e249f9ea5 100644 --- a/sys/netinet6/in6.c +++ b/sys/netinet6/in6.c @@ -611,9 +611,14 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, case SIOCAIFADDR_IN6: { - int i, error = 0; + int i, error = 0, iaIsNew; struct nd_prefix pr0, *pr; + if (ia != NULL) + iaIsNew = 0; + else + iaIsNew = 1; + /* * first, make or update the interface address structure, * and link it to the list. @@ -708,8 +713,11 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, */ pfxlist_onlink_check(); } - if (error == 0 && ia) - EVENTHANDLER_INVOKE(ifaddr_event, ifp); + if (error == 0 && ia) { + EVENTHANDLER_INVOKE(ifaddr_event, ifp, + iaIsNew ? IFADDR_EVENT_ADD : IFADDR_EVENT_CHANGE, + &ia->ia_ifa); + } break; } @@ -755,9 +763,10 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, pr->ndpr_expire = 1; /* XXX: just for expiration */ } - purgeaddr: +purgeaddr: + EVENTHANDLER_INVOKE(ifaddr_event, ifp, IFADDR_EVENT_DELETE, + &ia->ia_ifa); in6_purgeaddr(&ia->ia_ifa); - EVENTHANDLER_INVOKE(ifaddr_event, ifp); break; } -- 2.41.0