From: Matthew Dillon Date: Thu, 24 Feb 2011 22:14:42 +0000 (-0800) Subject: kernel - Major bridging functionality added (bonding) X-Git-Tag: v2.11.0~267^2~19 X-Git-Url: https://gitweb.dragonflybsd.org/dragonfly.git/commitdiff_plain/1e8583743e3c4e239dbff20b8d85e6b25fe43c3f kernel - Major bridging functionality added (bonding) * Add channel bonding support to if_bridge. This utilizes the link2 flag on the bridge interface. Participating member interfaces must be programmed to the same MAC address. Multiple bonding groups can be created. Note that both sides of participating member interfaces must implement the link2 feature. If the other side does not it will likely be blocking (due to the spanning tree protocol) some of the member interfaces and you will get packet loss. * Document all the link flags and new features. --- diff --git a/sbin/ifconfig/ifbridge.c b/sbin/ifconfig/ifbridge.c index 042ff1f9b3..6d8fa3ae3f 100644 --- a/sbin/ifconfig/ifbridge.c +++ b/sbin/ifconfig/ifbridge.c @@ -116,6 +116,7 @@ bridge_interfaces(int s, const char *prefix) "learning", "forwarding", "blocking", + "bonded", "blocking (link1)" }; struct ifbifconf bifc; @@ -156,7 +157,7 @@ bridge_interfaces(int s, const char *prefix) printf("%s", pad); printf("port %u priority %u", req->ifbr_portno, req->ifbr_priority); - printf(" path cost %u", req->ifbr_path_cost); + printf(" pathcost %u", req->ifbr_path_cost); if (req->ifbr_state < sizeof(stpstates) / sizeof(stpstates[0])) printf(" %s", stpstates[req->ifbr_state]); @@ -164,6 +165,8 @@ bridge_interfaces(int s, const char *prefix) printf(" ", req->ifbr_state); printf("\n"); + printf("%sbondweight %u\n", + pad, req->ifbr_bond_weight); printf("%sdesignated root: %016jx\n", pad, (intmax_t)req->ifbr_designated_root); printf("%sdesignated bridge: %016jx\n", @@ -521,6 +524,27 @@ setbridge_ifpathcost(const char *ifn, const char *cost, int s, err(1, "BRDGSIFCOST %s", cost); } +static void +setbridge_ifbondweight(const char *ifn, const char *cost, int s, + const struct afswtch *afp) +{ + struct ifbreq req; + u_long val; + + memset(&req, 0, sizeof(req)); + + if (get_val(cost, &val) < 0 || (val & ~0xff) != 0) + errx(1, "invalid value: %s", cost); + + strlcpy(req.ifbr_ifsname, ifn, sizeof(req.ifbr_ifsname)); + if (val > 65535) + val = 65535; + req.ifbr_bond_weight = (uint16_t)val; + + if (do_cmd(s, BRDGSBONDWGHT, &req, sizeof(req), 1) < 0) + err(1, "BRDGSBONDWGHT %s", cost); +} + static void setbridge_timeout(const char *arg, int d, int s, const struct afswtch *afp) { @@ -559,6 +583,7 @@ static struct cmd bridge_cmds[] = { DEF_CMD_ARG("priority", setbridge_priority), DEF_CMD_ARG2("ifpriority", setbridge_ifpriority), DEF_CMD_ARG2("ifpathcost", setbridge_ifpathcost), + DEF_CMD_ARG2("ifbondweight", setbridge_ifbondweight), DEF_CMD_ARG("timeout", setbridge_timeout), }; static struct afswtch af_bridge = { diff --git a/sbin/ifconfig/ifconfig.8 b/sbin/ifconfig/ifconfig.8 index 8d96867c4b..85090940bd 100644 --- a/sbin/ifconfig/ifconfig.8 +++ b/sbin/ifconfig/ifconfig.8 @@ -2053,6 +2053,16 @@ to .Ar value . The default is 128. The minimum is 0 and the maximum is 255. +.Pp +The priority is used to select which interface out of all +forwarding and bonded interfaces with the same MAC +to output a packet on whe +.Cm link2 +mode is not being used. +Note that interfaces in the 'blocking' state do not participate +in the priority selection. +If the priorities are the same on a non-bonded member, the +designated member will be used. .It Cm ifpathcost Ar interface Ar value Set the Spanning Tree path cost of .Ar interface @@ -2060,6 +2070,46 @@ to .Ar value . The default is 55. The minimum is 0 and the maximum is 65535. +.Pp +The path cost is added to both incoming and outgoing packets on the +member, lower values will make the member more valuable. +.It Cm ifbondweight Ar interface Ar value +Set the number of packets to output on a bonded member before +round-robining to the next member. +The default is 1. +Larger values or different values for each member can be used +if bursting would be beneficial or if the outgoing bandwidth +on each of the members is assymetric. +For example, one specify a value of 6 on tap0 and 4 on tap1 +for a 6:4 ratio. +Remember that this also controls packet bursting. +.It Cm link0 +The link0 option enables transparent bridging mode. +The bridge will make every effort to retain the ethernet header +when forwarding packets between interfaces, making the bridging +function work more like a hardware bridge device. +.It Cm link1 +The link1 option enables keepalive transmission and automatically +places a member into a special blocked mode if no keepalive reception +occurs. +If either sides of the link uses this option then both sides must use +this option. +This option is impemented by sending CFG updates on the hello interval +to the remote. +The link is considered lost after 10 intervals (typically 20 seconds). +.It Cm link2 +The link2 option enables channel bonding (see also ifbondweight). +All member interfaces with the same mac address are considered to +be in a bonding group. +When something like +.Xr tap 4 +is used, you can manually control or copy the mac to create bonding groups. +When interface bonding is enabled normally blocked interfaces belonging +to the same bonding group as an active forwarding interface will be +changed to the bonding state. +Both sides of link the member represents must operate in bonding mode +for this to work, otherwise the remote end may decide to throw away +half your packets. .El .Pp The following parameters are specific to IP tunnel interfaces, diff --git a/share/man/man4/bridge.4 b/share/man/man4/bridge.4 index ef7b50f998..defd8dbd6b 100644 --- a/share/man/man4/bridge.4 +++ b/share/man/man4/bridge.4 @@ -77,11 +77,38 @@ The bridge operates in a safe mode by default, setting the MAC source in the link header on outgoing packets to the outgoing interface MAC. This reduces the chance that the layer-2 switching in your switches will become confused. -To operate the bridge in transparent MAC mode you must set the -.Cm link0 -flag on the bridge interface via -.Xr ifconfig 8 -and then carefully check that your network is still fully operational. +.Pp +The bridge supports various special features via +.Cm link +options. +.Pp +.It Cm link0 +The link0 option enables transparent bridging mode. +The bridge will make every effort to retain the ethernet header +when forwarding packets between interfaces, making the bridging +function work more like a hardware bridge device. +.It Cm link1 +The link1 option enables keepalive transmission and automatically +places a member into a special blocked mode if no keepalive reception +occurs. +If either sides of the link uses this option then both sides must use +this option. +This option is impemented by sending CFG updates on the hello interval +to the remote. +The link is considered lost after 10 intervals (typically 20 seconds). +.It Cm link2 +The link2 option enables channel bonding (see also ifbondweight). +All member interfaces with the same mac address are considered to +be in a bonding group. +When something like +.Xr tap 4 +is used, you can manually control or copy the mac to create bonding groups. +When interface bonding is enabled normally blocked interfaces belonging +to the same bonding group as an active forwarding interface will be +changed to the bonding state. +Both sides of link the member represents must operate in bonding mode +for this to work, otherwise the remote end may decide to throw away +half your packets. .Pp If your network becomes glitchy, with long pauses in tcp sessions, then transparent bridging mode is likely the cause. This mode should only be @@ -89,6 +116,14 @@ used when you are bridging networks with devices that do MAC-based security or firewalling (for example, the supremely braindead at&t uverse router), or which impose severe limitations on MAC:IP assignments. .Pp +If member interfaces constantly enter a 'blocked (link1)' state then the +other end of those interfaces is not implementing the link1 keepalive. +Both sides must implement the keepalive. +.Pp +If you get an enormous amount of packet loss and are using link2-based +bonding, then the other side of those member interfaces are probably +not implementing link2-based bonding. +.Pp The .Nm driver implements the IEEE 802.1D Spanning Tree protocol (STP). diff --git a/sys/net/bridge/bridgestp.c b/sys/net/bridge/bridgestp.c index dacfcc39e5..a7e7961d0f 100644 --- a/sys/net/bridge/bridgestp.c +++ b/sys/net/bridge/bridgestp.c @@ -155,6 +155,8 @@ static void bstp_make_blocking(struct bridge_softc *, struct bridge_iflist *); static void bstp_make_l1blocking(struct bridge_softc *sc, struct bridge_iflist *bif); +static void bstp_adjust_bonded_states(struct bridge_softc *sc, + struct bridge_iflist *obif); static void bstp_set_port_state(struct bridge_iflist *, uint8_t); #ifdef notused static void bstp_set_bridge_priority(struct bridge_softc *, uint64_t); @@ -607,7 +609,7 @@ set_port2: * (yet), or the peer who is closest to the root. We push this port towards * a FORWARDING state as well. * - * Any remaining ports are pushed towards a BLOCKED state. Both sides of + * Any remaining ports are pushed towards a BLOCKING state. Both sides of * the port (us and our peer) should wind up placing the two ends in this * state or bad things happen. */ @@ -661,7 +663,8 @@ bstp_clear_peer_info(struct bridge_softc *sc, struct bridge_iflist *bif) static void bstp_make_forwarding(struct bridge_softc *sc, struct bridge_iflist *bif) { - if (bif->bif_state == BSTP_IFSTATE_BLOCKING) { + if (bif->bif_state == BSTP_IFSTATE_BLOCKING || + bif->bif_state == BSTP_IFSTATE_BONDED) { bstp_set_port_state(bif, BSTP_IFSTATE_LISTENING); bstp_timer_start(&bif->bif_forward_delay_timer, 0); } @@ -672,6 +675,7 @@ bstp_make_blocking(struct bridge_softc *sc, struct bridge_iflist *bif) { if (bif->bif_state != BSTP_IFSTATE_DISABLED && bif->bif_state != BSTP_IFSTATE_BLOCKING && + bif->bif_state != BSTP_IFSTATE_BONDED && bif->bif_state != BSTP_IFSTATE_L1BLOCKING) { if ((bif->bif_state == BSTP_IFSTATE_FORWARDING) || (bif->bif_state == BSTP_IFSTATE_LEARNING)) { @@ -682,17 +686,22 @@ bstp_make_blocking(struct bridge_softc *sc, struct bridge_iflist *bif) bstp_set_port_state(bif, BSTP_IFSTATE_BLOCKING); bridge_rtdelete(sc, bif->bif_ifp, IFBF_FLUSHDYN); bstp_timer_stop(&bif->bif_forward_delay_timer); + if (sc->sc_ifp->if_flags & IFF_LINK2) + bstp_adjust_bonded_states(sc, bif); } } static void bstp_make_l1blocking(struct bridge_softc *sc, struct bridge_iflist *bif) { + int was_forwarding = (bif->bif_state == BSTP_IFSTATE_FORWARDING); + switch(bif->bif_state) { case BSTP_IFSTATE_LISTENING: case BSTP_IFSTATE_LEARNING: case BSTP_IFSTATE_FORWARDING: case BSTP_IFSTATE_BLOCKING: + case BSTP_IFSTATE_BONDED: bstp_set_port_state(bif, BSTP_IFSTATE_L1BLOCKING); bridge_rtdelete(sc, bif->bif_ifp, IFBF_FLUSHDYN); bstp_timer_stop(&bif->bif_forward_delay_timer); @@ -702,12 +711,59 @@ bstp_make_l1blocking(struct bridge_softc *sc, struct bridge_iflist *bif) bstp_configuration_update(sc); bstp_port_state_selection(sc); } + if (was_forwarding && (sc->sc_ifp->if_flags & IFF_LINK2)) + bstp_adjust_bonded_states(sc, bif); break; default: break; } } +/* + * Member (bif) changes to or from a FORWARDING state. All members in the + * same bonding group which are in a BLOCKING or BONDED state must be set + * to either BLOCKING or BONDED based on whether any members in the bonding + * group remain in the FORWARDING state. + * + * Going between the BLOCKING and BONDED states does not require a + * configuration update. + */ +static void +bstp_adjust_bonded_states(struct bridge_softc *sc, struct bridge_iflist *obif) +{ + struct bridge_iflist *bif; + int state = BSTP_IFSTATE_BLOCKING; + + TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) { + if ((bif->bif_flags & IFBIF_STP) == 0) + continue; + if (bif->bif_state != BSTP_IFSTATE_FORWARDING) + continue; + if (memcmp(IF_LLADDR(bif->bif_ifp), IF_LLADDR(obif->bif_ifp), + ETHER_ADDR_LEN) != 0) { + continue; + } + state = BSTP_IFSTATE_BONDED; + break; + } + TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) { + if ((bif->bif_flags & IFBIF_STP) == 0) + continue; + if (bif->bif_state != BSTP_IFSTATE_BLOCKING && + bif->bif_state != BSTP_IFSTATE_BONDED) { + continue; + } + if (memcmp(IF_LLADDR(bif->bif_ifp), IF_LLADDR(obif->bif_ifp), + ETHER_ADDR_LEN) != 0) { + continue; + } + if (bif->bif_bond_weight == 0) + bif->bif_state = BSTP_IFSTATE_BLOCKING; + else + bif->bif_state = state; + } +} + static void bstp_set_port_state(struct bridge_iflist *bif, uint8_t state) { @@ -978,6 +1034,8 @@ bstp_forward_delay_timer_expiry(struct bridge_softc *sc, bif->bif_change_detection_enabled) { bstp_topology_change_detection(sc); } + if (sc->sc_ifp->if_flags & IFF_LINK2) + bstp_adjust_bonded_states(sc, bif); } } @@ -1150,6 +1208,10 @@ bstp_stop(struct bridge_softc *sc) static void bstp_initialize_port(struct bridge_softc *sc, struct bridge_iflist *bif) { + int needs_adjust = (bif->bif_state == BSTP_IFSTATE_FORWARDING || + bif->bif_state == BSTP_IFSTATE_BLOCKING || + bif->bif_state == BSTP_IFSTATE_BONDED); + bstp_set_port_state(bif, BSTP_IFSTATE_BLOCKING); bstp_clear_peer_info(sc, bif); bif->bif_topology_change_acknowledge = 0; @@ -1159,6 +1221,8 @@ bstp_initialize_port(struct bridge_softc *sc, struct bridge_iflist *bif) bstp_timer_stop(&bif->bif_forward_delay_timer); bstp_timer_stop(&bif->bif_hold_timer); bstp_timer_stop(&bif->bif_link1_timer); + if (needs_adjust && (sc->sc_ifp->if_flags & IFF_LINK2)) + bstp_adjust_bonded_states(sc, bif); } static void @@ -1173,6 +1237,7 @@ bstp_enable_port(struct bridge_softc *sc, struct bridge_iflist *bif) static void bstp_disable_port(struct bridge_softc *sc, struct bridge_iflist *bif) { + int was_forwarding = (bif->bif_state == BSTP_IFSTATE_FORWARDING); int iamroot; iamroot = bstp_root_bridge(sc); @@ -1187,6 +1252,8 @@ bstp_disable_port(struct bridge_softc *sc, struct bridge_iflist *bif) bstp_configuration_update(sc); bstp_port_state_selection(sc); bridge_rtdelete(sc, bif->bif_ifp, IFBF_FLUSHDYN); + if (was_forwarding && (sc->sc_ifp->if_flags & IFF_LINK2)) + bstp_adjust_bonded_states(sc, bif); if (iamroot == 0 && bstp_root_bridge(sc)) { sc->sc_max_age = sc->sc_bridge_max_age; diff --git a/sys/net/bridge/if_bridge.c b/sys/net/bridge/if_bridge.c index deb12583de..d9cd8d9881 100644 --- a/sys/net/bridge/if_bridge.c +++ b/sys/net/bridge/if_bridge.c @@ -454,6 +454,7 @@ static int bridge_ioctl_sifprio(struct bridge_softc *, void *); static int bridge_ioctl_sifcost(struct bridge_softc *, void *); static int bridge_ioctl_addspan(struct bridge_softc *, void *); static int bridge_ioctl_delspan(struct bridge_softc *, void *); +static int bridge_ioctl_sifbondwght(struct bridge_softc *, void *); static int bridge_pfil(struct mbuf **, struct ifnet *, struct ifnet *, int); static int bridge_ip_checkbasic(struct mbuf **mp); @@ -577,6 +578,10 @@ const struct bridge_control bridge_control_table[] = { BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_delspan, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, + + { bridge_ioctl_sifbondwght, sizeof(struct ifbreq), + BC_F_COPYIN|BC_F_SUSER }, + }; static const int bridge_control_table_size = NELEM(bridge_control_table); @@ -1173,6 +1178,7 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg) bif_info->bifi_priority = BSTP_DEFAULT_PORT_PRIORITY; bif_info->bifi_path_cost = BSTP_DEFAULT_PATH_COST; bif_info->bifi_ifp = ifs; + bif_info->bifi_bond_weight = 1; /* * Release bridge interface's serializer: @@ -1265,6 +1271,7 @@ bridge_ioctl_fillflags(struct bridge_softc *sc, struct bridge_iflist *bif, req->ifbr_state = bif->bif_state; req->ifbr_priority = bif->bif_priority; req->ifbr_path_cost = bif->bif_path_cost; + req->ifbr_bond_weight = bif->bif_bond_weight; req->ifbr_portno = bif->bif_ifp->if_index & 0xff; if (bif->bif_flags & IFBIF_STP) { req->ifbr_peer_root = bif->bif_peer_root; @@ -1685,6 +1692,23 @@ bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg) return (0); } +static int +bridge_ioctl_sifbondwght(struct bridge_softc *sc, void *arg) +{ + struct ifbreq *req = arg; + struct bridge_iflist *bif; + + bif = bridge_lookup_member(sc, req->ifbr_ifsname); + if (bif == NULL) + return (ENOENT); + + bif->bif_bond_weight = req->ifbr_bond_weight; + + /* no reinit needed */ + + return (0); +} + static int bridge_ioctl_addspan(struct bridge_softc *sc, void *arg) { @@ -1907,9 +1931,10 @@ bridge_output(struct ifnet *ifp, struct mbuf *m) struct bridge_softc *sc = ifp->if_bridge; struct bridge_iflist *bif, *nbif; struct ether_header *eh; - struct ifnet *dst_if, *bifp; + struct ifnet *dst_if, *alt_if, *bifp; int from_us; int priority; + int alt_priority; ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp); @@ -1957,13 +1982,17 @@ bridge_output(struct ifnet *ifp, struct mbuf *m) if (dst_if == NULL) { struct mbuf *mc; int used = 0; + int found = 0; if (sc->sc_span) bridge_span(sc, m); + alt_if = NULL; + alt_priority = 0; TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid], bif_next, nbif) { dst_if = bif->bif_ifp; + if ((dst_if->if_flags & IFF_RUNNING) == 0) continue; @@ -1972,18 +2001,55 @@ bridge_output(struct ifnet *ifp, struct mbuf *m) * and the interface is participating in spanning * tree, make sure the port is in a state that * allows forwarding. + * + * We keep track of a possible backup IF if we are + * unable to find any interfaces to forward through. + * + * NOTE: Currently round-robining is not implemented + * across bonded interface groups (needs an + * algorithm to track each group somehow). + * + * Similarly we track only one alternative + * interface if no suitable interfaces are + * found. */ if (dst_if != ifp && (bif->bif_flags & IFBIF_STP) != 0) { switch (bif->bif_state) { - case BSTP_IFSTATE_L1BLOCKING: + case BSTP_IFSTATE_BONDED: + if (bif->bif_priority + 512 > + alt_priority) { + alt_priority = + bif->bif_priority + 512; + alt_if = bif->bif_ifp; + } + continue; case BSTP_IFSTATE_BLOCKING: + if (bif->bif_priority + 256 > + alt_priority) { + alt_priority = + bif->bif_priority + 256; + alt_if = bif->bif_ifp; + } + continue; + case BSTP_IFSTATE_LEARNING: + if (bif->bif_priority > alt_priority) { + alt_priority = + bif->bif_priority; + alt_if = bif->bif_ifp; + } + continue; + case BSTP_IFSTATE_L1BLOCKING: case BSTP_IFSTATE_LISTENING: case BSTP_IFSTATE_DISABLED: continue; + default: + /* FORWARDING */ + break; } } + KKASSERT(used == 0); if (TAILQ_NEXT(bif, bif_next) == NULL) { used = 1; mc = m; @@ -1999,12 +2065,25 @@ bridge_output(struct ifnet *ifp, struct mbuf *m) * If the packet is 'from' us override ether_shost. */ bridge_handoff(sc, dst_if, mc, from_us); + found = 1; if (nbif != NULL && !nbif->bif_onlist) { KKASSERT(bif->bif_onlist); nbif = TAILQ_NEXT(bif, bif_next); } } + + /* + * If we couldn't find anything use the backup interface + * if we have one. + */ + if (found == 0 && alt_if) { + KKASSERT(used == 0); + mc = m; + used = 1; + bridge_handoff(sc, alt_if, mc, from_us); + } + if (used == 0) m_freem(m); return (0); @@ -2023,14 +2102,30 @@ sendunicast: * * If LINK2 is set the matching links are bonded and we-round robin. * (the MAC address must be the same for the participating links). - * In this case links in a STP BLOCKING state are allowed for unicast - * packets. + * In this case links in a STP FORWARDING or BONDED state are + * allowed for unicast packets. */ bif = bridge_lookup_member_if(sc, dst_if); if (bif->bif_flags & IFBIF_STP) { + alt_if = NULL; priority = 0; + alt_priority = 0; + TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid], bif_next, nbif) { + /* + * Ignore member interfaces which aren't running. + */ + if ((bif->bif_ifp->if_flags & IFF_RUNNING) == 0) + continue; + + /* + * member interfaces with the same MAC (usually TAPs) + * are considered to be the same. Select the best + * one from BONDED or FORWARDING and keep track of + * the best one in the BLOCKING state if no + * candidates are available otherwise. + */ if (memcmp(IF_LLADDR(bif->bif_ifp), IF_LLADDR(dst_if), ETHER_ADDR_LEN) != 0) { @@ -2039,18 +2134,25 @@ sendunicast: switch(bif->bif_state) { case BSTP_IFSTATE_BLOCKING: - if (sc->sc_ifp->if_flags & IFF_LINK2) - break; - /* fall through */ + if (bif->bif_priority > alt_priority + 256) { + alt_priority = bif->bif_priority + 256; + alt_if = bif->bif_ifp; + } + continue; + case BSTP_IFSTATE_LEARNING: + if (bif->bif_priority > alt_priority) { + alt_priority = bif->bif_priority; + alt_if = bif->bif_ifp; + } + continue; case BSTP_IFSTATE_L1BLOCKING: case BSTP_IFSTATE_LISTENING: case BSTP_IFSTATE_DISABLED: continue; default: + /* bonded, forwarding */ break; } - if ((bif->bif_ifp->if_flags & IFF_RUNNING) == 0) - continue; /* * XXX we need to use the toepliz hash or @@ -2059,11 +2161,16 @@ sendunicast: */ if (sc->sc_ifp->if_flags & IFF_LINK2) { dst_if = bif->bif_ifp; - TAILQ_REMOVE(&sc->sc_iflists[mycpuid], - bif, bif_next); - TAILQ_INSERT_TAIL( - &sc->sc_iflists[mycpuid], - bif, bif_next); + if (++bif->bif_bond_count >= + bif->bif_bond_weight) { + bif->bif_bond_count = 0; + TAILQ_REMOVE(&sc->sc_iflists[mycpuid], + bif, bif_next); + TAILQ_INSERT_TAIL( + &sc->sc_iflists[mycpuid], + bif, bif_next); + } + priority = 1; break; } if (bif->bif_priority > priority) { @@ -2071,6 +2178,12 @@ sendunicast: dst_if = bif->bif_ifp; } } + + /* + * Interface of last resort if nothing was found. + */ + if (priority == 0 && alt_if) + dst_if = alt_if; } if (sc->sc_span) @@ -2150,15 +2263,17 @@ bridge_start(struct ifnet *ifp) * Forward packets received on a bridge interface via the input * path. * - * The forwarding function of the bridge. + * This implements the forwarding function of the bridge. */ static void bridge_forward(struct bridge_softc *sc, struct mbuf *m) { struct bridge_iflist *bif, *nbif; - struct ifnet *src_if, *dst_if, *ifp; + struct ifnet *src_if, *dst_if, *alt_if, *ifp; struct ether_header *eh; int priority; + int alt_priority; + int from_blocking; src_if = m->m_pkthdr.rcvif; ifp = sc->sc_ifp; @@ -2178,23 +2293,29 @@ bridge_forward(struct bridge_softc *sc, struct mbuf *m) return; } + /* + * In spanning tree mode receiving a packet from an interface + * in a BLOCKING state is allowed, it could be a member of last + * resort from the sender's point of view, but forwarding it is + * not allowed. + * + * The sender's spanning tree will eventually sync up and the + * sender will go into a BLOCKING state too (but this still may be + * an interface of last resort during state changes). + */ if (bif->bif_flags & IFBIF_STP) { switch (bif->bif_state) { - case BSTP_IFSTATE_BLOCKING: - if ((sc->sc_ifp->if_flags & IFF_LINK2) && - (m->m_flags & (M_BCAST|M_MCAST)) == 0) { - break; - } - /* fall through */ case BSTP_IFSTATE_L1BLOCKING: case BSTP_IFSTATE_LISTENING: case BSTP_IFSTATE_DISABLED: m_freem(m); return; default: + /* learning, blocking, bonded, forwarding */ break; } } + from_blocking = (bif->bif_state == BSTP_IFSTATE_BLOCKING); eh = mtod(m, struct ether_header *); @@ -2204,7 +2325,7 @@ bridge_forward(struct bridge_softc *sc, struct mbuf *m) * the address. */ if ((bif->bif_flags & IFBIF_LEARNING) != 0 && - bif->bif_state != BSTP_IFSTATE_BLOCKING && + from_blocking == 0 && ETHER_IS_MULTICAST(eh->ether_shost) == 0 && (eh->ether_shost[0] == 0 && eh->ether_shost[1] == 0 && @@ -2215,8 +2336,18 @@ bridge_forward(struct bridge_softc *sc, struct mbuf *m) bridge_rtupdate(sc, eh->ether_shost, src_if, IFBAF_DYNAMIC); } + /* + * Don't forward from an interface in the listening or learning + * state. That is, in the learning state we learn information + * but we throw away the packets. + * + * We let through packets on interfaces in the blocking state. + * The blocking state is applicable to the send side, not the + * receive side. + */ if ((bif->bif_flags & IFBIF_STP) != 0 && - bif->bif_state == BSTP_IFSTATE_LEARNING) { + (bif->bif_state == BSTP_IFSTATE_LISTENING || + bif->bif_state == BSTP_IFSTATE_LEARNING)) { m_freem(m); return; } @@ -2242,8 +2373,16 @@ bridge_forward(struct bridge_softc *sc, struct mbuf *m) dst_if = NULL; } + /* + * Brodcast if we do not have forwarding information. However, if + * we received the packet on a blocking interface we do not do this + * (unless you really want to blow up your network). + */ if (dst_if == NULL) { - bridge_broadcast(sc, src_if, m); + if (from_blocking) + m_freem(m); + else + bridge_broadcast(sc, src_if, m); return; } @@ -2258,7 +2397,10 @@ bridge_forward(struct bridge_softc *sc, struct mbuf *m) } if (bif->bif_flags & IFBIF_STP) { + alt_if = NULL; + alt_priority = 0; priority = 0; + TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid], bif_next, nbif) { if (memcmp(IF_LLADDR(bif->bif_ifp), @@ -2267,22 +2409,43 @@ bridge_forward(struct bridge_softc *sc, struct mbuf *m) continue; } + if ((bif->bif_ifp->if_flags & IFF_RUNNING) == 0) + continue; + + /* + * NOTE: We allow tranmissions through a BLOCKING + * or LEARNING interface only as a last resort. + * We DISALLOW both cases if the receiving + * + * NOTE: If we send a packet through a learning + * interface the receiving end (if also in + * LEARNING) will throw it away, so this is + * the ultimate last resort. + */ switch(bif->bif_state) { case BSTP_IFSTATE_BLOCKING: - if (sc->sc_ifp->if_flags & IFF_LINK2) - break; - /* fall through */ + if (from_blocking == 0 && + bif->bif_priority + 256 > alt_priority) { + alt_priority = bif->bif_priority + 256; + alt_if = bif->bif_ifp; + } + continue; + case BSTP_IFSTATE_LEARNING: + if (from_blocking == 0 && + bif->bif_priority > alt_priority) { + alt_priority = bif->bif_priority; + alt_if = bif->bif_ifp; + } + continue; case BSTP_IFSTATE_L1BLOCKING: case BSTP_IFSTATE_LISTENING: case BSTP_IFSTATE_DISABLED: continue; default: + /* FORWARDING, BONDED */ break; } - if ((bif->bif_ifp->if_flags & IFF_RUNNING) == 0) - continue; - /* * XXX we need to use the toepliz hash or * something like that instead of @@ -2290,18 +2453,39 @@ bridge_forward(struct bridge_softc *sc, struct mbuf *m) */ if (sc->sc_ifp->if_flags & IFF_LINK2) { dst_if = bif->bif_ifp; - TAILQ_REMOVE(&sc->sc_iflists[mycpuid], - bif, bif_next); - TAILQ_INSERT_TAIL( - &sc->sc_iflists[mycpuid], - bif, bif_next); + if (++bif->bif_bond_count >= + bif->bif_bond_weight) { + bif->bif_bond_count = 0; + TAILQ_REMOVE(&sc->sc_iflists[mycpuid], + bif, bif_next); + TAILQ_INSERT_TAIL( + &sc->sc_iflists[mycpuid], + bif, bif_next); + } + priority = 1; break; } + + /* + * Select best interface in the FORWARDING or + * BONDED set. Well, there shouldn't be any + * in a BONDED state if LINK2 is not set (they + * will all be in a BLOCKING) state, but there + * could be a transitory condition here. + */ if (bif->bif_priority > priority) { priority = bif->bif_priority; dst_if = bif->bif_ifp; } } + + /* + * If no suitable interfaces were found but a suitable + * alternative interface was found, use the alternative + * interface. + */ + if (priority == 0 && alt_if) + dst_if = alt_if; } /* @@ -2345,6 +2529,7 @@ bridge_input(struct ifnet *ifp, struct mbuf *m) struct ifnet *bifp, *new_ifp; struct ether_header *eh; struct mbuf *mc, *mc2; + int from_blocking; ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp); @@ -2501,14 +2686,24 @@ bridge_input(struct ifnet *ifp, struct mbuf *m) /* * Other than 802.11d packets, ignore packets if the * interface is not in a good state. + * + * NOTE: Broadcast/mcast packets received on a blocking or + * learning interface are allowed for local processing. + * + * The sending side of a blocked port will stop + * transmitting when a better alternative is found. + * However, later on we will disallow the forwarding + * of bcast/mcsat packets over a blocking interface. */ if (bif->bif_flags & IFBIF_STP) { switch (bif->bif_state) { case BSTP_IFSTATE_L1BLOCKING: - case BSTP_IFSTATE_BLOCKING: case BSTP_IFSTATE_LISTENING: case BSTP_IFSTATE_DISABLED: goto out; + default: + /* blocking, learning, bonded, forwarding */ + break; } } @@ -2521,7 +2716,15 @@ bridge_input(struct ifnet *ifp, struct mbuf *m) if (mc == NULL) goto out; - bridge_forward(sc, mc); + /* + * It's just too dangerous to allow bcast/mcast over a + * blocked interface, eventually the network will sort + * itself out and a better path will be found. + */ + if ((bif->bif_flags & IFBIF_STP) == 0 || + bif->bif_state != BSTP_IFSTATE_BLOCKING) { + bridge_forward(sc, mc); + } /* * Reinject the mbuf as arriving on the bridge so we have a @@ -2560,7 +2763,8 @@ bridge_input(struct ifnet *ifp, struct mbuf *m) /* * Input of a unicast packet. We have to allow unicast packets - * input from links in the BLOCKING state. + * input from links in the BLOCKING state as this might be an + * interface of last resort. * * NOTE: We explicitly ignore normal packets received on a link * in the BLOCKING state. The point of being in that state @@ -2572,19 +2776,18 @@ bridge_input(struct ifnet *ifp, struct mbuf *m) * case (hence we only do it in LINK2), but it isn't quite as * bad as a broadcast packet looping. */ + from_blocking = 0; if (bif->bif_flags & IFBIF_STP) { switch (bif->bif_state) { -#if 0 - case BSTP_IFSTATE_BLOCKING: - if (sc->sc_ifp->if_flags & IFF_LINK2) - break; - /* fall through */ -#endif case BSTP_IFSTATE_L1BLOCKING: case BSTP_IFSTATE_LISTENING: case BSTP_IFSTATE_DISABLED: goto out; + case BSTP_IFSTATE_BLOCKING: + from_blocking = 1; + /* fall through */ default: + /* blocking, bonded, forwarding, learning */ break; } } @@ -2606,6 +2809,10 @@ bridge_input(struct ifnet *ifp, struct mbuf *m) * i.e. if you ping an IP on a target interface associated * with the bridge, the arp is-at response should indicate * the bridge MAC. + * + * Only update our addr list when learning if the port + * is not in a blocking state. If it is we still allow + * the packet but we do not try to learn from it. */ if (memcmp(IF_LLADDR(bif->bif_ifp), eh->ether_dhost, ETHER_ADDR_LEN) == 0) { @@ -2613,7 +2820,8 @@ bridge_input(struct ifnet *ifp, struct mbuf *m) /* XXX loop prevention */ m->m_flags |= M_ETHER_BRIDGED; } - if (bif->bif_flags & IFBIF_LEARNING) { + if ((bif->bif_flags & IFBIF_LEARNING) && + bif->bif_state != BSTP_IFSTATE_BLOCKING) { bridge_rtupdate(sc, eh->ether_shost, ifp, IFBAF_DYNAMIC); } @@ -2633,7 +2841,13 @@ bridge_input(struct ifnet *ifp, struct mbuf *m) } } - /* Perform the bridge forwarding function. */ + /* + * It isn't for us. + * + * Perform the bridge forwarding function, but disallow bridging + * to interfaces in the blocking state if the packet came in on + * an interface in the blocking state. + */ bridge_forward(sc, m); m = NULL; @@ -2667,8 +2881,10 @@ bridge_start_bcast(struct bridge_softc *sc, struct mbuf *m) { struct bridge_iflist *bif; struct mbuf *mc; - struct ifnet *dst_if, *bifp; + struct ifnet *dst_if, *alt_if, *bifp; int used = 0; + int found = 0; + int alt_priority; bifp = sc->sc_ifp; ASSERT_IFNET_SERIALIZED_ALL(bifp); @@ -2676,16 +2892,30 @@ bridge_start_bcast(struct bridge_softc *sc, struct mbuf *m) /* * Following loop is MPSAFE; nothing is blocking * in the loop body. + * + * NOTE: We transmit through an member in the BLOCKING state only + * as a last resort. */ + alt_if = NULL; + alt_priority = 0; + TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) { dst_if = bif->bif_ifp; if (bif->bif_flags & IFBIF_STP) { switch (bif->bif_state) { - case BSTP_IFSTATE_L1BLOCKING: case BSTP_IFSTATE_BLOCKING: + if (bif->bif_priority > alt_priority) { + alt_priority = bif->bif_priority; + alt_if = bif->bif_ifp; + } + /* fall through */ + case BSTP_IFSTATE_L1BLOCKING: case BSTP_IFSTATE_DISABLED: continue; + default: + /* listening, learning, bonded, forwarding */ + break; } } @@ -2706,8 +2936,17 @@ bridge_start_bcast(struct bridge_softc *sc, struct mbuf *m) continue; } } + found = 1; bridge_enqueue(dst_if, mc); } + + if (found == 0 && alt_if) { + KKASSERT(used == 0); + mc = m; + used = 1; + bridge_enqueue(alt_if, mc); + } + if (used == 0) m_freem(m); } @@ -2726,8 +2965,10 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if, struct bridge_iflist *bif, *nbif; struct ether_header *eh; struct mbuf *mc; - struct ifnet *dst_if, *bifp; - int used = 0; + struct ifnet *dst_if, *alt_if, *bifp; + int used; + int found; + int alt_priority; int from_us; bifp = sc->sc_ifp; @@ -2753,26 +2994,59 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if, return; } + alt_if = 0; + alt_priority = 0; + found = 0; + used = 0; + TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid], bif_next, nbif) { dst_if = bif->bif_ifp; if (dst_if == src_if) continue; + if ((dst_if->if_flags & IFF_RUNNING) == 0) + continue; + + /* + * Generally speaking we only broadcast through forwarding + * interfaces. If no interfaces are available we select + * a BONDED, BLOCKING, or LEARNING interface to forward + * through. + */ if (bif->bif_flags & IFBIF_STP) { switch (bif->bif_state) { - case BSTP_IFSTATE_L1BLOCKING: + case BSTP_IFSTATE_BONDED: + if (bif->bif_priority + 512 > alt_priority) { + alt_priority = bif->bif_priority + 512; + alt_if = bif->bif_ifp; + } + continue; case BSTP_IFSTATE_BLOCKING: + if (bif->bif_priority + 256 > alt_priority) { + alt_priority = bif->bif_priority + 256; + alt_if = bif->bif_ifp; + } + continue; + case BSTP_IFSTATE_LEARNING: + if (bif->bif_priority > alt_priority) { + alt_priority = bif->bif_priority; + alt_if = bif->bif_ifp; + } + continue; + case BSTP_IFSTATE_L1BLOCKING: case BSTP_IFSTATE_DISABLED: + case BSTP_IFSTATE_LISTENING: continue; + default: + /* forwarding */ + break; } } if ((bif->bif_flags & IFBIF_DISCOVER) == 0 && - (m->m_flags & (M_BCAST|M_MCAST)) == 0) - continue; - - if ((dst_if->if_flags & IFF_RUNNING) == 0) + (m->m_flags & (M_BCAST|M_MCAST)) == 0) { continue; + } if (TAILQ_NEXT(bif, bif_next) == NULL) { mc = m; @@ -2784,6 +3058,7 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if, continue; } } + found = 1; /* * Filter on the output interface. Pass a NULL bridge @@ -2807,6 +3082,14 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if, nbif = TAILQ_NEXT(bif, bif_next); } } + + if (found == 0 && alt_if) { + KKASSERT(used == 0); + mc = m; + used = 1; + bridge_enqueue(alt_if, mc); + } + if (used == 0) m_freem(m); } diff --git a/sys/net/bridge/if_bridgevar.h b/sys/net/bridge/if_bridgevar.h index f40f8b9f90..5d9dd92be1 100644 --- a/sys/net/bridge/if_bridgevar.h +++ b/sys/net/bridge/if_bridgevar.h @@ -109,6 +109,7 @@ #define BRDGSIFCOST 22 /* set if path cost (ifbreq) */ #define BRDGADDS 23 /* add bridge span member (ifbreq) */ #define BRDGDELS 24 /* delete bridge span member (ifbreq) */ +#define BRDGSBONDWGHT 25 /* set bonding weighting (ifbreq) */ /* * Generic bridge control request. @@ -130,6 +131,9 @@ struct ifbreq { uint32_t ifbr_peer_cost; /* from peer */ uint16_t ifbr_peer_port; /* from peer */ uint16_t unused02; + uint16_t ifbr_bond_weight; + uint16_t unused03; + uint32_t unused04[8]; }; /* BRDGGIFFLAGS, BRDGSIFFLAGS */ @@ -156,7 +160,8 @@ struct ifbreq { #define BSTP_IFSTATE_LEARNING 2 #define BSTP_IFSTATE_FORWARDING 3 #define BSTP_IFSTATE_BLOCKING 4 -#define BSTP_IFSTATE_L1BLOCKING 5 /* link1 blocking mode no-activity */ +#define BSTP_IFSTATE_BONDED 5 /* link2 bonding mode */ +#define BSTP_IFSTATE_L1BLOCKING 6 /* link1 blocking mode no-activity */ /* * Interface list structure. @@ -268,6 +273,7 @@ struct bridge_ifinfo { uint8_t bifi_priority; struct ifnet *bifi_ifp; /* member if */ int bifi_mutecap; /* member muted caps */ + int bifi_bond_weight; /* when link2 active */ }; #define bif_peer_root bif_info->bifi_peer_root @@ -289,6 +295,7 @@ struct bridge_ifinfo { #define bif_change_detection_enabled bif_info->bifi_change_detection_enabled #define bif_priority bif_info->bifi_priority #define bif_message_age_timer bif_info->bifi_message_age_timer +#define bif_bond_weight bif_info->bifi_bond_weight /* * Bridge interface list entry. @@ -298,6 +305,7 @@ struct bridge_iflist { struct ifnet *bif_ifp; /* member if */ int bif_onlist; struct bridge_ifinfo *bif_info; + int bif_bond_count; /* when link2 active */ }; TAILQ_HEAD(bridge_iflist_head, bridge_iflist);