kernel - Major bridging functionality completed
authorMatthew Dillon <dillon@apollo.backplane.com>
Thu, 24 Feb 2011 08:28:23 +0000 (00:28 -0800)
committerMatthew Dillon <dillon@apollo.backplane.com>
Thu, 24 Feb 2011 09:03:13 +0000 (01:03 -0800)
* Rewrite the spanning tree algorithm.  Not well tested but both sides
  properly calculate the blocking pairs for the ports whereas before
  they did not.

  Document the code as needed.  The poor documentation created a lot of
  unnecessary headaches.

  Separate out the peer state from the aggregated state.

* Greatly enhance the 'ifconfig [-v] bridgeN' status output so one
  can see exactly what the state of the sub-interfaces is.

* The bridge interface's ether address can now be modified.  It is also
  possible to add IP addresses to the bridge interface but this has
  not been tested well and might not work.

* Nearly all traffic sourced from interfaces attached to the bridge
  now use the bridge's MAC address.  This includes ARP.  Theoretically
  this means that ganged links between bridges (bonding is NOT yet
  supported! Strictly master/backup)... should be able to failover
  without destroying the ARP tables on various systems.

* Add an experimental LINK2 option to the bridge.  This will eventually
  be channel bonding but doesn't work so hot right now.  At the moment
  it just round-robins output on sub-interfaces with the same MAC (usually
  TAP interfaces).  Ill gets aggregated using the bridge's MAC but the
  comparison is used to create bonding groups.

  This one needs considerably more work on properly adjusting its state
  to DESIGNATED instead of hacking packets over members in the BLOCKING
  state.

* Clean up some of the state transitions used by the LINK1 failover
  feature.

* Change the bridge interface to IFT_ETHER to allow IP and MAC assignments
  and for it to be properly handled in the rest of the stack.

* Aggregate input from all member interfaces into the bridge proper and
  re-output/forward/route as appropriate using the stateful information
  available in the bif lists to handle failover and other features.

* ARP handles MAC snafus due to bridging a little better.

* Changeover to TAILQs from LISTs for bifs.

* Move bif_flags to the bif_info structure so we can use it to hold
  active state.

* Implement SIOCGIFMEDIA in IF_TAP (still needs some work).  This
  is required by the bridge code to properly be able to use TAP
  interfaces as members.

sbin/ifconfig/ifbridge.c
sys/net/bridge/bridgestp.c
sys/net/bridge/if_bridge.c
sys/net/bridge/if_bridgevar.h
sys/net/if_ethersubr.c
sys/net/if_var.h
sys/net/tap/if_tap.c
sys/netinet/if_ether.c

index 5f86d6e..042ff1f 100644 (file)
@@ -172,6 +172,16 @@ bridge_interfaces(int s, const char *prefix)
                                pad, req->ifbr_designated_cost);
                        printf("%sdesignated port:   %u\n",
                                pad, req->ifbr_designated_port);
+                       if (verbose) {
+                               printf("%speer root:   %016jx\n",
+                                       pad, (intmax_t)req->ifbr_peer_root);
+                               printf("%speer bridge: %016jx\n",
+                                       pad, (intmax_t)req->ifbr_peer_bridge);
+                               printf("%speer cost:   %u\n",
+                                       pad, req->ifbr_peer_cost);
+                               printf("%speer port:   %u\n",
+                                       pad, req->ifbr_peer_port);
+                       }
                }
        }
 
index f3a4690..dacfcc3 100644 (file)
@@ -131,11 +131,6 @@ static void        bstp_enable_change_detection(struct bridge_iflist *);
 static void    bstp_disable_change_detection(struct bridge_iflist *);
 #endif /* notused */
 static int     bstp_root_bridge(struct bridge_softc *sc);
-static int     bstp_supersedes_port_info(struct bridge_softc *,
-                   struct bridge_iflist *, struct bstp_config_unit *);
-static int     bstp_designated_port(struct bridge_softc *,
-                   struct bridge_iflist *);
-static int     bstp_designated_for_some_port(struct bridge_softc *);
 static void    bstp_transmit_config(struct bridge_softc *,
                    struct bridge_iflist *);
 static void    bstp_transmit_tcn(struct bridge_softc *);
@@ -151,11 +146,9 @@ static void        bstp_config_bpdu_generation(struct bridge_softc *);
 static void    bstp_send_config_bpdu(struct bridge_softc *,
                    struct bridge_iflist *, struct bstp_config_unit *);
 static void    bstp_configuration_update(struct bridge_softc *);
-static void    bstp_root_selection(struct bridge_softc *);
-static void    bstp_designated_port_selection(struct bridge_softc *);
-static void    bstp_become_designated_port(struct bridge_softc *,
-                   struct bridge_iflist *);
 static void    bstp_port_state_selection(struct bridge_softc *);
+static void    bstp_clear_peer_info(struct bridge_softc *,
+                   struct bridge_iflist *);
 static void    bstp_make_forwarding(struct bridge_softc *,
                    struct bridge_iflist *);
 static void    bstp_make_blocking(struct bridge_softc *,
@@ -191,6 +184,10 @@ static void        bstp_tcn_timer_expiry(struct bridge_softc *);
 static void    bstp_hello_timer_expiry(struct bridge_softc *);
 static int     bstp_addr_cmp(const uint8_t *, const uint8_t *);
 
+/*
+ * When transmitting a config we tack on our path cost to
+ * our aggregated path-to-root cost.
+ */
 static void
 bstp_transmit_config(struct bridge_softc *sc, struct bridge_iflist *bif)
 {
@@ -201,16 +198,19 @@ bstp_transmit_config(struct bridge_softc *sc, struct bridge_iflist *bif)
 
        bif->bif_config_bpdu.cu_message_type = BSTP_MSGTYPE_CFG;
        bif->bif_config_bpdu.cu_rootid = sc->sc_designated_root;
-       bif->bif_config_bpdu.cu_root_path_cost = sc->sc_root_path_cost;
+       bif->bif_config_bpdu.cu_root_path_cost = sc->sc_designated_cost +
+                                                bif->bif_path_cost;
        bif->bif_config_bpdu.cu_bridge_id = sc->sc_bridge_id;
        bif->bif_config_bpdu.cu_port_id = bif->bif_port_id;
 
        if (bstp_root_bridge(sc)) {
                bif->bif_config_bpdu.cu_message_age = 0;
-       } else {
+       } else if (sc->sc_root_port) {
                bif->bif_config_bpdu.cu_message_age =
-                   sc->sc_root_port->bifi_message_age_timer.value +
-                   BSTP_MESSAGE_AGE_INCR;
+                       sc->sc_root_port->bif_message_age_timer.value +
+                       BSTP_MESSAGE_AGE_INCR;
+       } else {
+               bif->bif_config_bpdu.cu_message_age = BSTP_MESSAGE_AGE_INCR;
        }
 
        bif->bif_config_bpdu.cu_max_age = sc->sc_max_age;
@@ -285,7 +285,11 @@ bstp_send_config_bpdu(struct bridge_softc *sc, struct bridge_iflist *bif,
        bpdu.cbu_hellotime = htons(cu->cu_hello_time);
        bpdu.cbu_forwarddelay = htons(cu->cu_forward_delay);
 
-       memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+       /*
+        * Packets sent from the bridge always use the bridge MAC
+        * as the source.
+        */
+       memcpy(eh->ether_shost, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
        memcpy(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN);
        eh->ether_type = htons(sizeof(bpdu));
 
@@ -300,46 +304,62 @@ bstp_root_bridge(struct bridge_softc *sc)
        return (sc->sc_designated_root == sc->sc_bridge_id);
 }
 
-static int
-bstp_supersedes_port_info(struct bridge_softc *sc, struct bridge_iflist *bif,
-    struct bstp_config_unit *cu)
+/*
+ * Returns TRUE if the recorded information from our peer has a shorter
+ * graph distance than our current best.
+ */
+int
+bstp_supersedes_port_info(struct bridge_softc *sc, struct bridge_iflist *bif)
 {
-       if (cu->cu_rootid < bif->bif_designated_root)
+       if (bif->bif_peer_root < sc->sc_designated_root)
                return (1);
-       if (cu->cu_rootid > bif->bif_designated_root)
+       if (bif->bif_peer_root > sc->sc_designated_root)
                return (0);
 
-       if (cu->cu_root_path_cost < bif->bif_designated_cost)
+       /*
+        * Both bif_peer_cost and sc_designated_cost have NOT added in
+        * bif->bif_path_cost, so we can optimize it out.
+        */
+       if (bif->bif_peer_cost < sc->sc_designated_cost)
                return (1);
-       if (cu->cu_root_path_cost > bif->bif_designated_cost)
+       if (bif->bif_peer_cost > sc->sc_designated_cost)
                return (0);
 
-       if (cu->cu_bridge_id < bif->bif_designated_bridge)
+       if (bif->bif_peer_bridge < sc->sc_designated_bridge)
                return (1);
-       if (cu->cu_bridge_id > bif->bif_designated_bridge)
+       if (bif->bif_peer_bridge > sc->sc_designated_bridge)
                return (0);
 
-       if (sc->sc_bridge_id != cu->cu_bridge_id)
+       /* bridge_id or bridge+port collision w/peer returns TRUE */
+       if (bif->bif_peer_bridge != sc->sc_bridge_id)
                return (1);
-       if (cu->cu_port_id <= bif->bif_designated_port)
+       if (bif->bif_peer_port <= sc->sc_designated_port)
                return (1);
        return (0);
 }
 
+/*
+ * The shorter graph distance represented by cu (most of which is also
+ * already stored in our bif_peer_* fields) becomes the designated info.
+ *
+ * NOTE: sc_designated_cost does not include bif_path_cost, it is added
+ *      in later on a port-by-port basis as needed.
+ */
 static void
 bstp_record_config_information(struct bridge_softc *sc,
-    struct bridge_iflist *bif, struct bstp_config_unit *cu)
+                              struct bridge_iflist *bif,
+                              struct bstp_config_unit *cu)
 {
-       bif->bif_designated_root = cu->cu_rootid;
-       bif->bif_designated_cost = cu->cu_root_path_cost;
-       bif->bif_designated_bridge = cu->cu_bridge_id;
-       bif->bif_designated_port = cu->cu_port_id;
+       sc->sc_designated_root = bif->bif_peer_root;
+       sc->sc_designated_cost = bif->bif_peer_cost;
+       sc->sc_designated_bridge = bif->bif_peer_bridge;
+       sc->sc_designated_port = bif->bif_peer_port;
        bstp_timer_start(&bif->bif_message_age_timer, cu->cu_message_age);
 }
 
 static void
 bstp_record_config_timeout_values(struct bridge_softc *sc,
-    struct bstp_config_unit *config)
+                                 struct bstp_config_unit *config)
 {
        sc->sc_max_age = config->cu_max_age;
        sc->sc_hello_time = config->cu_hello_time;
@@ -352,37 +372,34 @@ bstp_config_bpdu_generation(struct bridge_softc *sc)
 {
        struct bridge_iflist *bif, *nbif;
 
-       LIST_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid], bif_next, nbif) {
+       TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid], bif_next, nbif) {
                if ((bif->bif_flags & IFBIF_STP) == 0)
                        continue;
                if (bif->bif_state != BSTP_IFSTATE_DISABLED &&
                    ((sc->sc_ifp->if_flags & IFF_LINK1) ||
-                    bstp_designated_port(sc, bif))) {
+                    (bif->bif_flags & IFBIF_DESIGNATED))) {
                        bstp_transmit_config(sc, bif);
                }
 
                if (nbif != NULL && !nbif->bif_onlist) {
                        KKASSERT(bif->bif_onlist);
-                       nbif = LIST_NEXT(bif, bif_next);
+                       nbif = TAILQ_NEXT(bif, bif_next);
                }
        }
 }
 
-static int
-bstp_designated_port(struct bridge_softc *sc, struct bridge_iflist *bif)
-{
-       return ((bif->bif_designated_bridge == sc->sc_bridge_id)
-           && (bif->bif_designated_port == bif->bif_port_id));
-}
-
 static void
 bstp_transmit_tcn(struct bridge_softc *sc)
 {
        struct bstp_tbpdu bpdu;
-       struct ifnet *ifp = sc->sc_root_port->bifi_ifp;
+       struct ifnet *ifp;
        struct ether_header *eh;
        struct mbuf *m;
 
+       if (sc->sc_root_port == NULL)   /* all iterfaces disabled */
+               return;
+
+       ifp = sc->sc_root_port->bif_ifp;
        if ((ifp->if_flags & IFF_RUNNING) == 0)
                return;
 
@@ -396,7 +413,11 @@ bstp_transmit_tcn(struct bridge_softc *sc)
 
        eh = mtod(m, struct ether_header *);
 
-       memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+       /*
+        * Packets sent from the bridge always use the bridge MAC
+        * as the source.
+        */
+       memcpy(eh->ether_shost, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
        memcpy(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN);
        eh->ether_type = htons(sizeof(bpdu));
 
@@ -411,124 +432,199 @@ bstp_transmit_tcn(struct bridge_softc *sc)
        bridge_enqueue(ifp, m);
 }
 
+/*
+ * Recalculate sc->sc_designated* and sc->sc_root_port (if our bridge
+ * is calculated to be the root bridge).  We do this by initializing
+ * the designated variables to point at us and then scan our peers.
+ * Any uninitialized peers will have a max-value root.
+ *
+ * Clear IFBIF_DESIGNATED on any ports which no longer match the criteria
+ * required to be a designated port.  Only aged out ports and the root
+ * port can be designated.
+ *
+ * If we win we do a second scan to determine which port on our bridge
+ * is the best.
+ */
 static void
 bstp_configuration_update(struct bridge_softc *sc)
 {
-       bstp_root_selection(sc);
-       bstp_designated_port_selection(sc);
-}
-
-static void
-bstp_root_selection(struct bridge_softc *sc)
-{
-       struct bridge_iflist *root_port = NULL, *bif;
+       uint64_t        designated_root = sc->sc_bridge_id;
+       uint64_t        designated_bridge = sc->sc_bridge_id;
+       uint32_t        designated_cost = 0xFFFFFFFFU;
+       uint16_t        designated_port = 65535;
+       struct bridge_iflist *root_port = NULL;
+       struct bridge_iflist *bif;
 
-       LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
+       /*
+        * Resolve information from our peers.  Aged peers will have
+        * a maxed bif_peer_root and not come under consideration.
+        */
+       TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
                if ((bif->bif_flags & IFBIF_STP) == 0)
                        continue;
-               if (bstp_designated_port(sc, bif))
-                       continue;
-               if (bif->bif_state == BSTP_IFSTATE_DISABLED)
+               if (bif->bif_state == BSTP_IFSTATE_DISABLED ||
+                   bif->bif_state == BSTP_IFSTATE_L1BLOCKING) {
                        continue;
-               if (bif->bif_designated_root >= sc->sc_bridge_id)
+               }
+
+               if (bif->bif_peer_root > designated_root)
                        continue;
-               if (root_port == NULL)
+               if (bif->bif_peer_root < designated_root)
                        goto set_port;
 
-               if (bif->bif_designated_root < root_port->bif_designated_root)
-                       goto set_port;
-               if (bif->bif_designated_root > root_port->bif_designated_root)
+               /*
+                * NOTE: The designated_cost temporary variable already added
+                *       in the path code of the related root port.
+                */
+               if (bif->bif_peer_cost + bif->bif_path_cost > designated_cost)
                        continue;
-
-               if ((bif->bif_designated_cost + bif->bif_path_cost) <
-                   (root_port->bif_designated_cost + root_port->bif_path_cost))
+               if (bif->bif_peer_cost + bif->bif_path_cost < designated_cost)
                        goto set_port;
-               if ((bif->bif_designated_cost + bif->bif_path_cost) >
-                   (root_port->bif_designated_cost + root_port->bif_path_cost))
-                       continue;
 
-               if (bif->bif_designated_bridge <
-                   root_port->bif_designated_bridge)
-                       goto set_port;
-               if (bif->bif_designated_bridge >
-                   root_port->bif_designated_bridge)
+               if (bif->bif_peer_bridge > designated_bridge)
                        continue;
-
-               if (bif->bif_designated_port < root_port->bif_designated_port)
+               if (bif->bif_peer_bridge < designated_bridge)
                        goto set_port;
-               if (bif->bif_designated_port > root_port->bif_designated_port)
+
+               if (bif->bif_peer_port > designated_port)
                        continue;
+               if (bif->bif_peer_port < designated_port)
+                       goto set_port;
 
-               if (bif->bif_port_id >= root_port->bif_port_id)
+               /*
+                * Same root, path cost, bridge, and port.  Set the root
+                * only if we do not already have it.
+                */
+               if (root_port)
                        continue;
+
+               /*
+                * New root port (from peers)
+                *
+                * NOTE: Temporarily add bif_path_cost into the designated
+                *       cost to reduce complexity in the loop, it will be
+                *       subtracted out when we are done.
+                */
 set_port:
+               designated_root = bif->bif_peer_root;
+               designated_cost = bif->bif_peer_cost + bif->bif_path_cost;
+               designated_bridge = bif->bif_peer_bridge;
+               designated_port = bif->bif_peer_port;
                root_port = bif;
        }
 
-       if (root_port == NULL) {
-               sc->sc_root_port = NULL;
-               sc->sc_designated_root = sc->sc_bridge_id;
-               sc->sc_root_path_cost = 0;
-       } else {
-               sc->sc_root_port = root_port->bif_info;
-               sc->sc_designated_root = root_port->bif_designated_root;
-               sc->sc_root_path_cost = root_port->bif_designated_cost +
-                   root_port->bif_path_cost;
-       }
-}
-
-static void
-bstp_designated_port_selection(struct bridge_softc *sc)
-{
-       struct bridge_iflist *bif;
-
-       LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
+       /*
+        * root_port will be NULL at the start here if all of our
+        * peers are aged or are not as good a root as our bridge would
+        * be.  It can also be NULL due to all related links being
+        * disabled.
+        *
+        * If the root winds up being our bridge scan again against local
+        * information.  Unconditionally update IFBIF_DESIGNATED.
+        */
+       TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
+               bif->bif_flags &= ~(IFBIF_DESIGNATED | IFBIF_ROOT);
                if ((bif->bif_flags & IFBIF_STP) == 0)
                        continue;
-               if (bstp_designated_port(sc, bif))
-                       goto designated;
-               if (bif->bif_designated_root != sc->sc_designated_root)
-                       goto designated;
-
-               if (sc->sc_root_path_cost < bif->bif_designated_cost)
-                       goto designated;
-               if (sc->sc_root_path_cost > bif->bif_designated_cost)
+               if (bif->bif_state == BSTP_IFSTATE_DISABLED ||
+                   bif->bif_state == BSTP_IFSTATE_L1BLOCKING) {
+                       continue;
+               }
+
+               /*
+                * Set DESIGNATED for an aged or unknown peer.
+                */
+               if (bif->bif_peer_bridge == 0xFFFFFFFFFFFFFFFFLLU)
+                       bif->bif_flags |= IFBIF_DESIGNATED;
+               if (designated_root != sc->sc_bridge_id)
                        continue;
 
-               if (sc->sc_bridge_id < bif->bif_designated_bridge)
-                       goto designated;
-               if (sc->sc_bridge_id > bif->bif_designated_bridge)
+               /*
+                * This is only reached if our bridge is the root bridge,
+                * select the root port (IFBIF_DESIGNATED is set at the
+                * end).
+                *
+                * We do NOT use peer info here.
+                */
+               if (bif->bif_path_cost > designated_cost)
                        continue;
+               if (bif->bif_path_cost < designated_cost)
+                       goto set_port2;
 
-               if (bif->bif_port_id > bif->bif_designated_port)
+               if (bif->bif_port_id > designated_port)
                        continue;
-designated:
-               bstp_become_designated_port(sc, bif);
+               if (bif->bif_port_id < designated_port)
+                       goto set_port2;
+               /* degenerate case (possible peer collision w/our key */
+
+               /*
+                * New port
+                */
+set_port2:
+               designated_cost = bif->bif_path_cost;
+               designated_bridge = sc->sc_bridge_id;
+               designated_port = bif->bif_port_id;
+               root_port = bif;
        }
-}
 
-static void
-bstp_become_designated_port(struct bridge_softc *sc, struct bridge_iflist *bif)
-{
-       bif->bif_designated_root = sc->sc_designated_root;
-       bif->bif_designated_cost = sc->sc_root_path_cost;
-       bif->bif_designated_bridge = sc->sc_bridge_id;
-       bif->bif_designated_port = bif->bif_port_id;
+       /*
+        * Update aggregate information.  The selected root port always
+        * becomes a designated port (along with aged ports).  This can
+        * either be the port whos peer is closest to the root or it
+        * can be one of our ports if our bridge is the root.
+        *
+        * The root cost we record in sc_designated_root does not include
+        * bif_path_cost of the root port, since we may transmit this
+        * out of any port we will add the cost back in later on on
+        * a per-port basis.
+        *
+        * root_port can be NULL here (if all links are disabled)
+        */
+       if (root_port) {
+               sc->sc_designated_root = designated_root;
+               sc->sc_designated_cost = designated_cost -
+                                        root_port->bif_path_cost;
+               sc->sc_designated_bridge = designated_bridge;
+               sc->sc_designated_port = designated_port;
+               root_port->bif_flags |= IFBIF_DESIGNATED | IFBIF_ROOT;
+       } else {
+               sc->sc_designated_root = designated_root;
+               sc->sc_designated_cost = designated_cost;
+               sc->sc_designated_bridge = designated_bridge;
+               sc->sc_designated_port = designated_port;
+       }
+       sc->sc_root_port = root_port;
 }
 
+/*
+ * Calculate the desired state for each interface link on our bridge.
+ *
+ * The best port will match against sc->sc_root_port (whether we are root
+ * or whether that port is the closest to the root).  We push this port
+ * towards a FORWARDING state.
+ *
+ * Next come designated ports, either aged ports or ports with no peer info
+ * (yet), or the peer who is closest to the root. We push this port towards
+ * a FORWARDING state as well.
+ *
+ * Any remaining ports are pushed towards a BLOCKED state.  Both sides of
+ * the port (us and our peer) should wind up placing the two ends in this
+ * state or bad things happen.
+ */
 static void
 bstp_port_state_selection(struct bridge_softc *sc)
 {
        struct bridge_iflist *bif, *nbif;
 
-       LIST_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid], bif_next, nbif) {
+       TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid], bif_next, nbif) {
                if ((bif->bif_flags & IFBIF_STP) == 0)
                        continue;
-               if (bif->bif_info == sc->sc_root_port) {
+               if (sc->sc_root_port &&
+                   bif->bif_info == sc->sc_root_port->bif_info) {
                        bif->bif_config_pending = 0;
                        bif->bif_topology_change_acknowledge = 0;
                        bstp_make_forwarding(sc, bif);
-               } else if (bstp_designated_port(sc, bif)) {
+               } else if (bif->bif_flags & IFBIF_DESIGNATED) {
                        bstp_timer_stop(&bif->bif_message_age_timer);
                        bstp_make_forwarding(sc, bif);
                } else {
@@ -539,11 +635,29 @@ bstp_port_state_selection(struct bridge_softc *sc)
 
                if (nbif != NULL && !nbif->bif_onlist) {
                        KKASSERT(bif->bif_onlist);
-                       nbif = LIST_NEXT(bif, bif_next);
+                       nbif = TAILQ_NEXT(bif, bif_next);
                }
        }
 }
 
+/*
+ * Clear peer info, effectively makes the port looked aged out.
+ * It becomes a designated go-to port.
+ */
+static void
+bstp_clear_peer_info(struct bridge_softc *sc, struct bridge_iflist *bif)
+{
+       bif->bif_peer_root = 0xFFFFFFFFFFFFFFFFLLU;
+       bif->bif_peer_cost = 0xFFFFFFFFU;
+       bif->bif_peer_bridge = 0xFFFFFFFFFFFFFFFFLLU;
+       bif->bif_peer_port = 0xFFFFU;
+
+       if (bif->bif_state != BSTP_IFSTATE_DISABLED &&
+           bif->bif_state != BSTP_IFSTATE_L1BLOCKING) {
+               bif->bif_flags |= IFBIF_DESIGNATED;
+       }
+}
+
 static void
 bstp_make_forwarding(struct bridge_softc *sc, struct bridge_iflist *bif)
 {
@@ -583,6 +697,11 @@ bstp_make_l1blocking(struct bridge_softc *sc, struct bridge_iflist *bif)
                bridge_rtdelete(sc, bif->bif_ifp, IFBF_FLUSHDYN);
                bstp_timer_stop(&bif->bif_forward_delay_timer);
                bstp_timer_stop(&bif->bif_link1_timer);
+               if (bif->bif_flags & IFBIF_DESIGNATED) {
+                       bif->bif_flags &= ~IFBIF_DESIGNATED;
+                       bstp_configuration_update(sc);
+                       bstp_port_state_selection(sc);
+               }
                break;
        default:
                break;
@@ -617,7 +736,7 @@ bstp_topology_change_acknowledged(struct bridge_softc *sc)
 
 static void
 bstp_acknowledge_topology_change(struct bridge_softc *sc,
-    struct bridge_iflist *bif)
+                                struct bridge_iflist *bif)
 {
        bif->bif_topology_change_acknowledge = 1;
        bstp_transmit_config(sc, bif);
@@ -724,24 +843,38 @@ out:
 
 static void
 bstp_received_config_bpdu(struct bridge_softc *sc, struct bridge_iflist *bif,
-    struct bstp_config_unit *cu)
+                         struct bstp_config_unit *cu)
 {
-       int root;
+       int iamroot;
 
-       root = bstp_root_bridge(sc);
+       iamroot = bstp_root_bridge(sc);
 
        if (bif->bif_state != BSTP_IFSTATE_DISABLED) {
-               if (bstp_supersedes_port_info(sc, bif, cu)) {
+               /*
+                * Record information from peer.  The peer_cost field
+                * does not include the local bif->bif_path_cost, it will
+                * be added in as needed (since it can be modified manually
+                * this way we don't have to worry about fixups).
+                */
+               bif->bif_peer_root = cu->cu_rootid;
+               bif->bif_peer_cost = cu->cu_root_path_cost;
+               bif->bif_peer_bridge = cu->cu_bridge_id;
+               bif->bif_peer_port = cu->cu_port_id;
+
+               if (bstp_supersedes_port_info(sc, bif)) {
                        bstp_record_config_information(sc, bif, cu);
                        bstp_configuration_update(sc);
                        bstp_port_state_selection(sc);
 
-                       if ((bstp_root_bridge(sc) == 0) && root) {
-                               /*
-                                * We continuously transmit hello's if
-                                * link1 is set (topology change bit will
-                                * be zero so they shouldn't propagate).
-                                */
+                       /*
+                        * If our bridge loses its root status (?)
+                        *
+                        * Hello's (unsolicited CFG packets) are generated
+                        * every hello period of LINK1 is set, otherwise
+                        * we are no longer the root bridge and must stop
+                        * generating unsolicited CFG packets.
+                        */
+                       if (iamroot && bstp_root_bridge(sc) == 0) {
                                if ((sc->sc_ifp->if_flags & IFF_LINK1) == 0)
                                        bstp_timer_stop(&sc->sc_hello_timer);
 
@@ -753,24 +886,38 @@ bstp_received_config_bpdu(struct bridge_softc *sc, struct bridge_iflist *bif,
                                }
                        }
 
-                       if (bif->bif_info == sc->sc_root_port) {
+                       if (sc->sc_root_port &&
+                           bif->bif_info == sc->sc_root_port->bif_info) {
                                bstp_record_config_timeout_values(sc, cu);
                                bstp_config_bpdu_generation(sc);
 
                                if (cu->cu_topology_change_acknowledgment)
                                        bstp_topology_change_acknowledged(sc);
                        }
-               } else if (bstp_designated_port(sc, bif))
+               } else if (bif->bif_flags & IFBIF_DESIGNATED) {
+                       /*
+                        * Update designated ports (aged out peers or
+                        * the port closest to the root) at a faster pace.
+                        *
+                        * Clear our designated flag if we aren't marked
+                        * as the root port.
+                        */
                        bstp_transmit_config(sc, bif);
+                       if ((bif->bif_flags & IFBIF_ROOT) == 0) {
+                               bif->bif_flags &= ~IFBIF_DESIGNATED;
+                               bstp_configuration_update(sc);
+                               bstp_port_state_selection(sc);
+                       }
+               }
        }
 }
 
 static void
 bstp_received_tcn_bpdu(struct bridge_softc *sc, struct bridge_iflist *bif,
-    struct bstp_tcn_unit *tcn)
+                      struct bstp_tcn_unit *tcn)
 {
        if (bif->bif_state != BSTP_IFSTATE_DISABLED &&
-           bstp_designated_port(sc, bif)) {
+           (bif->bif_flags & IFBIF_DESIGNATED)) {
                bstp_topology_change_detection(sc);
                bstp_acknowledge_topology_change(sc, bif);
        }
@@ -794,10 +941,10 @@ static void
 bstp_message_age_timer_expiry(struct bridge_softc *sc,
                              struct bridge_iflist *bif)
 {
-       int root;
+       int iamroot;
 
-       root = bstp_root_bridge(sc);
-       bstp_become_designated_port(sc, bif);
+       iamroot = bstp_root_bridge(sc);
+       bstp_clear_peer_info(sc, bif);
        bstp_configuration_update(sc);
        bstp_port_state_selection(sc);
 
@@ -806,7 +953,7 @@ bstp_message_age_timer_expiry(struct bridge_softc *sc,
         * we have some cleanup to do.  This also occurs if we
         * wind up being completely isolated.
         */
-       if ((bstp_root_bridge(sc)) && (root == 0)) {
+       if (iamroot == 0 && bstp_root_bridge(sc)) {
                sc->sc_max_age = sc->sc_bridge_max_age;
                sc->sc_hello_time = sc->sc_bridge_hello_time;
                sc->sc_forward_delay = sc->sc_bridge_forward_delay;
@@ -820,34 +967,20 @@ bstp_message_age_timer_expiry(struct bridge_softc *sc,
 
 static void
 bstp_forward_delay_timer_expiry(struct bridge_softc *sc,
-    struct bridge_iflist *bif)
+                               struct bridge_iflist *bif)
 {
        if (bif->bif_state == BSTP_IFSTATE_LISTENING) {
                bstp_set_port_state(bif, BSTP_IFSTATE_LEARNING);
                bstp_timer_start(&bif->bif_forward_delay_timer, 0);
        } else if (bif->bif_state == BSTP_IFSTATE_LEARNING) {
                bstp_set_port_state(bif, BSTP_IFSTATE_FORWARDING);
-               if (bstp_designated_for_some_port(sc) &&
-                   bif->bif_change_detection_enabled)
+               if (sc->sc_designated_bridge == sc->sc_bridge_id &&
+                   bif->bif_change_detection_enabled) {
                        bstp_topology_change_detection(sc);
+               }
        }
 }
 
-static int
-bstp_designated_for_some_port(struct bridge_softc *sc)
-{
-
-       struct bridge_iflist *bif;
-
-       LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
-               if ((bif->bif_flags & IFBIF_STP) == 0)
-                       continue;
-               if (bif->bif_designated_bridge == sc->sc_bridge_id)
-                       return (1);
-       }
-       return (0);
-}
-
 static void
 bstp_tcn_timer_expiry(struct bridge_softc *sc)
 {
@@ -901,21 +1034,26 @@ bstp_initialization(struct bridge_softc *sc)
 
        KKASSERT(&curthread->td_msgport == BRIDGE_CFGPORT);
 
+       /*
+        * Figure out our bridge ID, use the lowest-valued MAC.
+        * Include the bridge's own random MAC in the calculation.
+        */
        mif = NULL;
-       LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
+
+       TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
                if ((bif->bif_flags & IFBIF_STP) == 0)
                        continue;
                if (bif->bif_ifp->if_type != IFT_ETHER)
                        continue;
-               bif->bif_port_id = (bif->bif_priority << 8) |
-                   (bif->bif_ifp->if_index & 0xff);
-
                if (mif == NULL) {
                        mif = bif;
                        continue;
                }
+
+               bif->bif_port_id = (bif->bif_priority << 8) |
+                                  (bif->bif_ifp->if_index & 0xff);
                if (bstp_addr_cmp(IF_LLADDR(bif->bif_ifp),
-                   IF_LLADDR(mif->bif_ifp)) < 0) {
+                                 IF_LLADDR(mif->bif_ifp)) < 0) {
                        mif = bif;
                        continue;
                }
@@ -925,7 +1063,11 @@ bstp_initialization(struct bridge_softc *sc)
                return;
        }
 
-       e_addr = IF_LLADDR(mif->bif_ifp);
+       if (bstp_addr_cmp(IF_LLADDR(sc->sc_ifp), IF_LLADDR(mif->bif_ifp)) < 0)
+               e_addr = IF_LLADDR(sc->sc_ifp);
+       else
+               e_addr = IF_LLADDR(mif->bif_ifp);
+
        sc->sc_bridge_id =
            (((uint64_t)sc->sc_bridge_priority) << 48) |
            (((uint64_t)e_addr[0]) << 40) |
@@ -935,8 +1077,12 @@ bstp_initialization(struct bridge_softc *sc)
            (((uint64_t)e_addr[4]) << 8) |
            (((uint64_t)e_addr[5]));
 
+       /*
+        * Remainder of setup.
+        */
+
        sc->sc_designated_root = sc->sc_bridge_id;
-       sc->sc_root_path_cost = 0;
+       sc->sc_designated_cost = 0;
        sc->sc_root_port = NULL;
 
        sc->sc_max_age = sc->sc_bridge_max_age;
@@ -951,7 +1097,7 @@ bstp_initialization(struct bridge_softc *sc)
                callout_reset(&sc->sc_bstpcallout, hz,
                    bstp_tick, sc);
 
-       LIST_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid], bif_next, nbif) {
+       TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid], bif_next, nbif) {
                if (sc->sc_ifp->if_flags & IFF_LINK1)
                        bstp_timer_start(&bif->bif_link1_timer, 0);
                if (bif->bif_flags & IFBIF_STP)
@@ -961,7 +1107,7 @@ bstp_initialization(struct bridge_softc *sc)
 
                if (nbif != NULL && !nbif->bif_onlist) {
                        KKASSERT(bif->bif_onlist);
-                       nbif = LIST_NEXT(bif, bif_next);
+                       nbif = TAILQ_NEXT(bif, bif_next);
                }
        }
 
@@ -978,7 +1124,7 @@ bstp_stop(struct bridge_softc *sc)
 
        KKASSERT(&curthread->td_msgport == BRIDGE_CFGPORT);
 
-       LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
+       TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
                bstp_set_port_state(bif, BSTP_IFSTATE_DISABLED);
                bstp_timer_stop(&bif->bif_hold_timer);
                bstp_timer_stop(&bif->bif_message_age_timer);
@@ -1004,8 +1150,8 @@ bstp_stop(struct bridge_softc *sc)
 static void
 bstp_initialize_port(struct bridge_softc *sc, struct bridge_iflist *bif)
 {
-       bstp_become_designated_port(sc, bif);
        bstp_set_port_state(bif, BSTP_IFSTATE_BLOCKING);
+       bstp_clear_peer_info(sc, bif);
        bif->bif_topology_change_acknowledge = 0;
        bif->bif_config_pending = 0;
        bif->bif_change_detection_enabled = 1;
@@ -1027,10 +1173,11 @@ bstp_enable_port(struct bridge_softc *sc, struct bridge_iflist *bif)
 static void
 bstp_disable_port(struct bridge_softc *sc, struct bridge_iflist *bif)
 {
-       int root;
+       int iamroot;
 
-       root = bstp_root_bridge(sc);
-       bstp_become_designated_port(sc, bif);
+       iamroot = bstp_root_bridge(sc);
+
+       bstp_clear_peer_info(sc, bif);
        bstp_set_port_state(bif, BSTP_IFSTATE_DISABLED);
        bif->bif_topology_change_acknowledge = 0;
        bif->bif_config_pending = 0;
@@ -1041,7 +1188,7 @@ bstp_disable_port(struct bridge_softc *sc, struct bridge_iflist *bif)
        bstp_port_state_selection(sc);
        bridge_rtdelete(sc, bif->bif_ifp, IFBF_FLUSHDYN);
 
-       if (bstp_root_bridge(sc) && (root == 0)) {
+       if (iamroot == 0 && bstp_root_bridge(sc)) {
                sc->sc_max_age = sc->sc_bridge_max_age;
                sc->sc_hello_time = sc->sc_bridge_hello_time;
                sc->sc_forward_delay = sc->sc_bridge_forward_delay;
@@ -1053,77 +1200,6 @@ bstp_disable_port(struct bridge_softc *sc, struct bridge_iflist *bif)
        }
 }
 
-#ifdef notused
-static void
-bstp_set_bridge_priority(struct bridge_softc *sc, uint64_t new_bridge_id)
-{
-       struct bridge_iflist *bif;
-       int root;
-
-       root = bstp_root_bridge(sc);
-
-       LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
-               if ((bif->bif_flags & IFBIF_STP) == 0)
-                       continue;
-               if (bstp_designated_port(sc, bif))
-                       bif->bif_designated_bridge = new_bridge_id;
-       }
-
-       sc->sc_bridge_id = new_bridge_id;
-
-       bstp_configuration_update(sc);
-       bstp_port_state_selection(sc);
-
-       if (bstp_root_bridge(sc) && (root == 0)) {
-               sc->sc_max_age = sc->sc_bridge_max_age;
-               sc->sc_hello_time = sc->sc_bridge_hello_time;
-               sc->sc_forward_delay = sc->sc_bridge_forward_delay;
-
-               bstp_topology_change_detection(sc);
-               bstp_timer_stop(&sc->sc_tcn_timer);
-               bstp_config_bpdu_generation(sc);
-               bstp_timer_start(&sc->sc_hello_timer, 0);
-       }
-}
-
-static void
-bstp_set_port_priority(struct bridge_softc *sc, struct bridge_iflist *bif,
-    uint16_t new_port_id)
-{
-       if (bstp_designated_port(sc, bif))
-               bif->bif_designated_port = new_port_id;
-
-       bif->bif_port_id = new_port_id;
-
-       if ((sc->sc_bridge_id == bif->bif_designated_bridge) &&
-           (bif->bif_port_id < bif->bif_designated_port)) {
-               bstp_become_designated_port(sc, bif);
-               bstp_port_state_selection(sc);
-       }
-}
-
-static void
-bstp_set_path_cost(struct bridge_softc *sc, struct bridge_iflist *bif,
-    uint32_t path_cost)
-{
-       bif->bif_path_cost = path_cost;
-       bstp_configuration_update(sc);
-       bstp_port_state_selection(sc);
-}
-
-static void
-bstp_enable_change_detection(struct bridge_iflist *bif)
-{
-       bif->bif_change_detection_enabled = 1;
-}
-
-static void
-bstp_disable_change_detection(struct bridge_iflist *bif)
-{
-       bif->bif_change_detection_enabled = 0;
-}
-#endif /* notused */
-
 void
 bstp_linkstate(struct ifnet *ifp, int state)
 {
@@ -1139,7 +1215,7 @@ bstp_linkstate(struct ifnet *ifp, int state)
         * don't need to use LIST_FOREACH_MUTABLE()+bif_onlist
         * check here.
         */
-       LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
+       TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
                if ((bif->bif_flags & IFBIF_STP) == 0)
                        continue;
 
@@ -1225,7 +1301,7 @@ bstp_tick_handler(netmsg_t msg)
         * BRIDGE_CFGPORT
         */
 
-       LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
+       TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
                if ((bif->bif_flags & IFBIF_STP) == 0)
                        continue;
                /*
@@ -1248,7 +1324,7 @@ bstp_tick_handler(netmsg_t msg)
            sc->sc_topology_change_time))
                bstp_topology_change_timer_expiry(sc);
 
-       LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
+       TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
                if ((bif->bif_flags & IFBIF_STP) == 0)
                        continue;
                if (bstp_timer_expired(&bif->bif_message_age_timer,
@@ -1256,7 +1332,7 @@ bstp_tick_handler(netmsg_t msg)
                        bstp_message_age_timer_expiry(sc, bif);
        }
 
-       LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
+       TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
                if ((bif->bif_flags & IFBIF_STP) == 0)
                        continue;
                if (bstp_timer_expired(&bif->bif_forward_delay_timer,
index a9f35f0..deb1258 100644 (file)
  * interface is ripped from the percpu list during the blocking operation,
  * the iteration still could keep going:
  *
- * LIST_FOREACH_MUTABLE(bif, sc->sc_iflists[mycpuid], bif_next, nbif) {
+ * TAILQ_FOREACH_MUTABLE(bif, sc->sc_iflists[mycpuid], bif_next, nbif) {
  *     blocking operation;
  *     blocking operation;
  *     ...
  *     ...
  *     if (nbif != NULL && !nbif->bif_onlist) {
  *         KKASSERT(bif->bif_onlist);
- *         nbif = LIST_NEXT(bif, bif_next);
+ *         nbif = TAILQ_NEXT(bif, bif_next);
  *     }
  * }
  *
@@ -355,6 +355,7 @@ eventhandler_tag    bridge_detach_cookie = NULL;
 extern struct mbuf *(*bridge_input_p)(struct ifnet *, struct mbuf *);
 extern int (*bridge_output_p)(struct ifnet *, struct mbuf *);
 extern void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
+extern  struct ifnet *(*bridge_interface_p)(void *if_bridge);
 
 static int     bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
 
@@ -365,10 +366,12 @@ static int        bridge_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
 static void    bridge_mutecaps(struct bridge_ifinfo *, struct ifnet *, int);
 static void    bridge_ifdetach(void *, struct ifnet *);
 static void    bridge_init(void *);
+static int     bridge_from_us(struct bridge_softc *, struct ether_header *);
 static void    bridge_stop(struct ifnet *);
 static void    bridge_start(struct ifnet *);
 static struct mbuf *bridge_input(struct ifnet *, struct mbuf *);
 static int     bridge_output(struct ifnet *, struct mbuf *);
+static struct ifnet *bridge_interface(void *if_bridge);
 
 static void    bridge_forward(struct bridge_softc *, struct mbuf *m);
 
@@ -426,6 +429,8 @@ static int  bridge_ioctl_init(struct bridge_softc *, void *);
 static int     bridge_ioctl_stop(struct bridge_softc *, void *);
 static int     bridge_ioctl_add(struct bridge_softc *, void *);
 static int     bridge_ioctl_del(struct bridge_softc *, void *);
+static void    bridge_ioctl_fillflags(struct bridge_softc *sc,
+                               struct bridge_iflist *bif, struct ifbreq *req);
 static int     bridge_ioctl_gifflags(struct bridge_softc *, void *);
 static int     bridge_ioctl_sifflags(struct bridge_softc *, void *);
 static int     bridge_ioctl_scache(struct bridge_softc *, void *);
@@ -458,17 +463,15 @@ static int        bridge_ip6_checkbasic(struct mbuf **mp);
 static int     bridge_fragment(struct ifnet *, struct mbuf *,
                    struct ether_header *, int, struct llc *);
 static void    bridge_enqueue_handler(netmsg_t);
-static void    bridge_handoff(struct ifnet *, struct mbuf *, int);
+static void    bridge_handoff(struct bridge_softc *, struct ifnet *,
+                   struct mbuf *, int);
 
 static void    bridge_del_bif_handler(netmsg_t);
 static void    bridge_add_bif_handler(netmsg_t);
-static void    bridge_set_bifflags_handler(netmsg_t);
 static void    bridge_del_bif(struct bridge_softc *, struct bridge_ifinfo *,
                    struct bridge_iflist_head *);
 static void    bridge_add_bif(struct bridge_softc *, struct bridge_ifinfo *,
                    struct ifnet *);
-static void    bridge_set_bifflags(struct bridge_softc *,
-                   struct bridge_ifinfo *, uint32_t);
 
 SYSCTL_DECL(_net_link);
 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW, 0, "Bridge");
@@ -476,12 +479,15 @@ SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW, 0, "Bridge");
 static int pfil_onlyip = 1; /* only pass IP[46] packets when pfil is enabled */
 static int pfil_bridge = 1; /* run pfil hooks on the bridge interface */
 static int pfil_member = 1; /* run pfil hooks on the member interface */
+static int bridge_debug;
 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip, CTLFLAG_RW,
     &pfil_onlyip, 0, "Only pass IP packets when pfil is enabled");
 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge, CTLFLAG_RW,
     &pfil_bridge, 0, "Packet filter on the bridge interface");
 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member, CTLFLAG_RW,
     &pfil_member, 0, "Packet filter on the member interface");
+SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW,
+    &bridge_debug, 0, "Bridge debug mode");
 
 struct bridge_control_arg {
        union {
@@ -589,6 +595,7 @@ bridge_modevent(module_t mod, int type, void *data)
                if_clone_attach(&bridge_cloner);
                bridge_input_p = bridge_input;
                bridge_output_p = bridge_output;
+               bridge_interface_p = bridge_interface;
                bridge_detach_cookie = EVENTHANDLER_REGISTER(
                    ifnet_detach_event, bridge_ifdetach, NULL,
                    EVENTHANDLER_PRI_ANY);
@@ -604,6 +611,7 @@ bridge_modevent(module_t mod, int type, void *data)
                if_clone_detach(&bridge_cloner);
                bridge_input_p = NULL;
                bridge_output_p = NULL;
+               bridge_interface_p = NULL;
 #if notyet
                bstp_linkstate_p = NULL;
 #endif
@@ -664,9 +672,9 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused)
        sc->sc_iflists = kmalloc(sizeof(*sc->sc_iflists) * ncpus,
                                 M_DEVBUF, M_WAITOK);
        for (cpu = 0; cpu < ncpus; ++cpu)
-               LIST_INIT(&sc->sc_iflists[cpu]);
+               TAILQ_INIT(&sc->sc_iflists[cpu]);
 
-       LIST_INIT(&sc->sc_spanlist);
+       TAILQ_INIT(&sc->sc_spanlist);
 
        ifp->if_softc = sc;
        if_initname(ifp, ifc->ifc_name, unit);
@@ -675,7 +683,7 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused)
        ifp->if_ioctl = bridge_ioctl;
        ifp->if_start = bridge_start;
        ifp->if_init = bridge_init;
-       ifp->if_type = IFT_BRIDGE;
+       ifp->if_type = IFT_ETHER;
        ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
        ifq_set_ready(&ifp->if_snd);
        ifp->if_hdrlen = ETHER_HDR_LEN;
@@ -695,7 +703,7 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused)
        ether_ifattach(ifp, eaddr, NULL);
        /* Now undo some of the damage... */
        ifp->if_baudrate = 0;
-       ifp->if_type = IFT_BRIDGE;
+       /*ifp->if_type = IFT_BRIDGE;*/
 
        crit_enter();   /* XXX MP */
        LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
@@ -713,10 +721,10 @@ bridge_delete_dispatch(netmsg_t msg)
 
        ifnet_serialize_all(bifp);
 
-       while ((bif = LIST_FIRST(&sc->sc_iflists[mycpuid])) != NULL)
+       while ((bif = TAILQ_FIRST(&sc->sc_iflists[mycpuid])) != NULL)
                bridge_delete_member(sc, bif, 0);
 
-       while ((bif = LIST_FIRST(&sc->sc_spanlist)) != NULL)
+       while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL)
                bridge_delete_span(sc, bif);
 
        ifnet_deserialize_all(bifp);
@@ -920,7 +928,7 @@ bridge_lookup_member(struct bridge_softc *sc, const char *name)
 {
        struct bridge_iflist *bif;
 
-       LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
+       TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
                if (strcmp(bif->bif_ifp->if_xname, name) == 0)
                        return (bif);
        }
@@ -937,7 +945,7 @@ bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
 {
        struct bridge_iflist *bif;
 
-       LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
+       TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
                if (bif->bif_ifp == member_ifp)
                        return (bif);
        }
@@ -955,7 +963,7 @@ bridge_lookup_member_ifinfo(struct bridge_softc *sc,
 {
        struct bridge_iflist *bif;
 
-       LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
+       TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
                if (bif->bif_info == bif_info)
                        return (bif);
        }
@@ -1016,7 +1024,7 @@ bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
         * after we make sure that no one is accessing them,
         * i.e. after following netmsg_service_sync()
         */
-       LIST_INIT(&saved_bifs);
+       TAILQ_INIT(&saved_bifs);
        bridge_del_bif(sc, bif_info, &saved_bifs);
 
        /*
@@ -1029,9 +1037,9 @@ bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
        /*
         * Free the removed bifs
         */
-       KKASSERT(!LIST_EMPTY(&saved_bifs));
-       while ((bif = LIST_FIRST(&saved_bifs)) != NULL) {
-               LIST_REMOVE(bif, bif_next);
+       KKASSERT(!TAILQ_EMPTY(&saved_bifs));
+       while ((bif = TAILQ_FIRST(&saved_bifs)) != NULL) {
+               TAILQ_REMOVE(&saved_bifs, bif, bif_next);
                kfree(bif, M_DEVBUF);
        }
 
@@ -1063,7 +1071,7 @@ bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
        KASSERT(bif->bif_ifp->if_bridge == NULL,
            ("%s: not a span interface", __func__));
 
-       LIST_REMOVE(bif, bif_next);
+       TAILQ_REMOVE(&sc->sc_iflists[mycpuid], bif, bif_next);
        kfree(bif, M_DEVBUF);
 }
 
@@ -1141,13 +1149,13 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
                return (ENOENT);
 
        /* If it's in the span list, it can't be a member. */
-       LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
+       TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
                if (ifs == bif->bif_ifp)
                        return (EBUSY);
 
        /* Allow the first Ethernet member to define the MTU */
        if (ifs->if_type != IFT_GIF) {
-               if (LIST_EMPTY(&sc->sc_iflists[mycpuid])) {
+               if (TAILQ_EMPTY(&sc->sc_iflists[mycpuid])) {
                        bifp->if_mtu = ifs->if_mtu;
                } else if (bifp->if_mtu != ifs->if_mtu) {
                        if_printf(bifp, "invalid MTU for %s\n", ifs->if_xname);
@@ -1245,18 +1253,46 @@ bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
        bif = bridge_lookup_member(sc, req->ifbr_ifsname);
        if (bif == NULL)
                return (ENOENT);
+       bridge_ioctl_fillflags(sc, bif, req);
+       return (0);
+}
 
+static void
+bridge_ioctl_fillflags(struct bridge_softc *sc, struct bridge_iflist *bif,
+                      struct ifbreq *req)
+{
        req->ifbr_ifsflags = bif->bif_flags;
        req->ifbr_state = bif->bif_state;
        req->ifbr_priority = bif->bif_priority;
        req->ifbr_path_cost = bif->bif_path_cost;
        req->ifbr_portno = bif->bif_ifp->if_index & 0xff;
-       req->ifbr_designated_root = bif->bif_designated_root;
-       req->ifbr_designated_bridge = bif->bif_designated_bridge;
-       req->ifbr_designated_cost = bif->bif_designated_cost;
-       req->ifbr_designated_port = bif->bif_designated_port;
-
-       return (0);
+       if (bif->bif_flags & IFBIF_STP) {
+               req->ifbr_peer_root = bif->bif_peer_root;
+               req->ifbr_peer_bridge = bif->bif_peer_bridge;
+               req->ifbr_peer_cost = bif->bif_peer_cost;
+               req->ifbr_peer_port = bif->bif_peer_port;
+               if (bstp_supersedes_port_info(sc, bif)) {
+                       req->ifbr_designated_root = bif->bif_peer_root;
+                       req->ifbr_designated_bridge = bif->bif_peer_bridge;
+                       req->ifbr_designated_cost = bif->bif_peer_cost;
+                       req->ifbr_designated_port = bif->bif_peer_port;
+               } else {
+                       req->ifbr_designated_root = sc->sc_bridge_id;
+                       req->ifbr_designated_bridge = sc->sc_bridge_id;
+                       req->ifbr_designated_cost = bif->bif_path_cost +
+                                                   bif->bif_peer_cost;
+                       req->ifbr_designated_port = bif->bif_port_id;
+               }
+       } else {
+               req->ifbr_peer_root = 0;
+               req->ifbr_peer_bridge = 0;
+               req->ifbr_peer_cost = 0;
+               req->ifbr_peer_port = 0;
+               req->ifbr_designated_root = 0;
+               req->ifbr_designated_bridge = 0;
+               req->ifbr_designated_cost = 0;
+               req->ifbr_designated_port = 0;
+       }
 }
 
 static int
@@ -1287,10 +1323,8 @@ bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
                }
        }
 
-       ifnet_deserialize_all(bifp);
-       bridge_set_bifflags(sc, bif->bif_info, req->ifbr_ifsflags);
-       ifnet_serialize_all(bifp);
-
+       bif->bif_flags = (bif->bif_flags & IFBIF_KEEPMASK) |
+                        (req->ifbr_ifsflags & ~IFBIF_KEEPMASK);
        if (bifp->if_flags & IFF_RUNNING)
                bstp_initialization(sc);
 
@@ -1332,9 +1366,9 @@ bridge_ioctl_gifs(struct bridge_softc *sc, void *arg)
        int count, len;
 
        count = 0;
-       LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next)
+       TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next)
                count++;
-       LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
+       TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
                count++;
 
        if (bifc->ifbic_len == 0) {
@@ -1356,26 +1390,18 @@ bridge_ioctl_gifs(struct bridge_softc *sc, void *arg)
        bc_arg->bca_kptr = breq;
 
        count = 0;
-       LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
+       TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
                if (len < sizeof(*breq))
                        break;
 
                strlcpy(breq->ifbr_ifsname, bif->bif_ifp->if_xname,
                        sizeof(breq->ifbr_ifsname));
-               breq->ifbr_ifsflags = bif->bif_flags;
-               breq->ifbr_state = bif->bif_state;
-               breq->ifbr_priority = bif->bif_priority;
-               breq->ifbr_path_cost = bif->bif_path_cost;
-               breq->ifbr_portno = bif->bif_ifp->if_index & 0xff;
-               breq->ifbr_designated_root = bif->bif_designated_root;
-               breq->ifbr_designated_bridge = bif->bif_designated_bridge;
-               breq->ifbr_designated_cost = bif->bif_designated_cost;
-               breq->ifbr_designated_port = bif->bif_designated_port;
+               bridge_ioctl_fillflags(sc, bif, breq);
                breq++;
                count++;
                len -= sizeof(*breq);
        }
-       LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) {
+       TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
                if (len < sizeof(*breq))
                        break;
 
@@ -1665,12 +1691,13 @@ bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
        struct ifbreq *req = arg;
        struct bridge_iflist *bif;
        struct ifnet *ifs;
+       struct bridge_ifinfo *bif_info;
 
        ifs = ifunit(req->ifbr_ifsname);
        if (ifs == NULL)
                return (ENOENT);
 
-       LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
+       TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
                if (ifs == bif->bif_ifp)
                        return (EBUSY);
 
@@ -1687,12 +1714,19 @@ bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
                return (EINVAL);
        }
 
+       /*
+        * bif_info is needed for bif_flags
+        */
+        bif_info = kmalloc(sizeof(*bif_info), M_DEVBUF, M_WAITOK | M_ZERO);
+        bif_info->bifi_ifp = ifs;
+
        bif = kmalloc(sizeof(*bif), M_DEVBUF, M_WAITOK | M_ZERO);
        bif->bif_ifp = ifs;
+       bif->bif_info = bif_info;
        bif->bif_flags = IFBIF_SPAN;
        /* NOTE: span bif does not need bridge_ifinfo */
 
-       LIST_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
+       TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
 
        sc->sc_span = 1;
 
@@ -1710,7 +1744,7 @@ bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
        if (ifs == NULL)
                return (ENOENT);
 
-       LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
+       TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
                if (ifs == bif->bif_ifp)
                        break;
 
@@ -1719,7 +1753,7 @@ bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
 
        bridge_delete_span(sc, bif);
 
-       if (LIST_EMPTY(&sc->sc_spanlist))
+       if (TAILQ_EMPTY(&sc->sc_spanlist))
                sc->sc_span = 0;
 
        return (0);
@@ -1760,7 +1794,7 @@ bridge_ifdetach_dispatch(netmsg_t msg)
 
                ifnet_serialize_all(bifp);
 
-               LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
+               TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
                        if (ifp == bif->bif_ifp) {
                                bridge_delete_span(sc, bif);
                                break;
@@ -1816,6 +1850,28 @@ bridge_stop(struct ifnet *ifp)
 }
 
 /*
+ * Returns TRUE if the packet is being sent 'from us'... from our bridge
+ * interface or from any member of our bridge interface.  This is used
+ * later on to force the MAC to be the MAC of our bridge interface.
+ */
+static int
+bridge_from_us(struct bridge_softc *sc, struct ether_header *eh)
+{
+       struct bridge_iflist *bif;
+
+       if (memcmp(eh->ether_shost, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN) == 0)
+               return (1);
+
+       TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
+               if (memcmp(eh->ether_shost, IF_LLADDR(bif->bif_ifp),
+                          ETHER_ADDR_LEN) == 0) {
+                       return (1);
+               }
+       }
+       return (0);
+}
+
+/*
  * bridge_enqueue:
  *
  *     Enqueue a packet on a bridge member interface.
@@ -1853,6 +1909,7 @@ bridge_output(struct ifnet *ifp, struct mbuf *m)
        struct ether_header *eh;
        struct ifnet *dst_if, *bifp;
        int from_us;
+       int priority;
 
        ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
 
@@ -1865,17 +1922,18 @@ bridge_output(struct ifnet *ifp, struct mbuf *m)
        }
        bifp = sc->sc_ifp;
 
+       /*
+        * Acquire header
+        */
        if (m->m_len < ETHER_HDR_LEN) {
                m = m_pullup(m, ETHER_HDR_LEN);
-               if (m == NULL)
+               if (m == NULL) {
+                       bifp->if_oerrors++;
                        return (0);
+               }
        }
        eh = mtod(m, struct ether_header *);
-
-       if (memcmp(eh->ether_dhost, IF_LLADDR(bifp), ETHER_ADDR_LEN) == 0)
-               from_us = 1;
-       else
-               from_us = 0;
+       from_us = bridge_from_us(sc, eh);
 
        /*
         * If bridge is down, but the original output interface is up,
@@ -1895,6 +1953,7 @@ bridge_output(struct ifnet *ifp, struct mbuf *m)
                dst_if = NULL;
        else
                dst_if = bridge_rtlookup(sc, eh->ether_dhost);
+
        if (dst_if == NULL) {
                struct mbuf *mc;
                int used = 0;
@@ -1902,7 +1961,7 @@ bridge_output(struct ifnet *ifp, struct mbuf *m)
                if (sc->sc_span)
                        bridge_span(sc, m);
 
-               LIST_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid],
+               TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid],
                                     bif_next, nbif) {
                        dst_if = bif->bif_ifp;
                        if ((dst_if->if_flags & IFF_RUNNING) == 0)
@@ -1925,7 +1984,7 @@ bridge_output(struct ifnet *ifp, struct mbuf *m)
                                }
                        }
 
-                       if (LIST_NEXT(bif, bif_next) == NULL) {
+                       if (TAILQ_NEXT(bif, bif_next) == NULL) {
                                used = 1;
                                mc = m;
                        } else {
@@ -1939,11 +1998,11 @@ bridge_output(struct ifnet *ifp, struct mbuf *m)
                        /*
                         * If the packet is 'from' us override ether_shost.
                         */
-                       bridge_handoff(dst_if, mc, from_us);
+                       bridge_handoff(sc, dst_if, mc, from_us);
 
                        if (nbif != NULL && !nbif->bif_onlist) {
                                KKASSERT(bif->bif_onlist);
-                               nbif = LIST_NEXT(bif, bif_next);
+                               nbif = TAILQ_NEXT(bif, bif_next);
                        }
                }
                if (used == 0)
@@ -1953,41 +2012,64 @@ bridge_output(struct ifnet *ifp, struct mbuf *m)
 
 sendunicast:
        /*
-        * If STP is enabled on the target and it is not in a good state
-        * scan all bridged interfaces for any with a matching MAC which is
-        * in a good state and use that one.
+        * If STP is enabled on the target we are an equal opportunity
+        * employer and do not necessarily output to dst_if.  Instead
+        * scan available links with the same MAC as the current dst_if
+        * and choose the best one.
         *
-        * We need to do this because arp entries tag onto a particular
+        * We also need to do this because arp entries tag onto a particular
         * interface and if it happens to be dead then the packets will
         * go into a bit bucket.
+        *
+        * If LINK2 is set the matching links are bonded and we-round robin.
+        * (the MAC address must be the same for the participating links).
+        * In this case links in a STP BLOCKING state are allowed for unicast
+        * packets.
         */
        bif = bridge_lookup_member_if(sc, dst_if);
        if (bif->bif_flags & IFBIF_STP) {
-               switch (bif->bif_state) {
-               case BSTP_IFSTATE_L1BLOCKING:
-               case BSTP_IFSTATE_BLOCKING:
-               case BSTP_IFSTATE_LISTENING:
-               case BSTP_IFSTATE_DISABLED:
-                       LIST_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid],
-                                            bif_next, nbif) {
-                               if (memcmp(IF_LLADDR(bif->bif_ifp),
-                                          IF_LLADDR(dst_if),
-                                          ETHER_ADDR_LEN) != 0) {
-                                       continue;
-                               }
-                               if (bif->bif_state == BSTP_IFSTATE_L1BLOCKING||
-                                   bif->bif_state == BSTP_IFSTATE_BLOCKING ||
-                                   bif->bif_state == BSTP_IFSTATE_LISTENING||
-                                   bif->bif_state == BSTP_IFSTATE_DISABLED) {
-                                       continue;
-                               }
+               priority = 0;
+               TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid],
+                                    bif_next, nbif) {
+                       if (memcmp(IF_LLADDR(bif->bif_ifp),
+                                  IF_LLADDR(dst_if),
+                                  ETHER_ADDR_LEN) != 0) {
+                               continue;
+                       }
+
+                       switch(bif->bif_state) {
+                       case BSTP_IFSTATE_BLOCKING:
+                               if (sc->sc_ifp->if_flags & IFF_LINK2)
+                                       break;
+                               /* fall through */
+                       case BSTP_IFSTATE_L1BLOCKING:
+                       case BSTP_IFSTATE_LISTENING:
+                       case BSTP_IFSTATE_DISABLED:
+                               continue;
+                       default:
+                               break;
+                       }
+                       if ((bif->bif_ifp->if_flags & IFF_RUNNING) == 0)
+                               continue;
+
+                       /*
+                        * XXX we need to use the toepliz hash or
+                        *     something like that instead of
+                        *     round-robining.
+                        */
+                       if (sc->sc_ifp->if_flags & IFF_LINK2) {
                                dst_if = bif->bif_ifp;
+                               TAILQ_REMOVE(&sc->sc_iflists[mycpuid],
+                                       bif, bif_next);
+                               TAILQ_INSERT_TAIL(
+                                       &sc->sc_iflists[mycpuid],
+                                       bif, bif_next);
                                break;
                        }
-                       break;
-               default:
-                       /* keep dst_if */
-                       break;
+                       if (bif->bif_priority > priority) {
+                               priority = bif->bif_priority;
+                               dst_if = bif->bif_ifp;
+                       }
                }
        }
 
@@ -1996,11 +2078,28 @@ sendunicast:
        if ((dst_if->if_flags & IFF_RUNNING) == 0)
                m_freem(m);
        else
-               bridge_handoff(dst_if, m, from_us);
+               bridge_handoff(sc, dst_if, m, from_us);
        return (0);
 }
 
 /*
+ * Returns the bridge interface associated with an ifc.
+ * Pass ifp->if_bridge (must not be NULL).  Used by the ARP
+ * code to supply the bridge for the is-at info, making
+ * the bridge responsible for matching local addresses.
+ *
+ * Without this the ARP code will supply bridge member interfaces
+ * for the is-at which makes it difficult the bridge to fail-over
+ * interfaces (amoung other things).
+ */
+static struct ifnet *
+bridge_interface(void *if_bridge)
+{
+       struct bridge_softc *sc = if_bridge;
+       return (sc->sc_ifp);
+}
+
+/*
  * bridge_start:
  *
  *     Start output on a bridge.
@@ -2056,9 +2155,10 @@ bridge_start(struct ifnet *ifp)
 static void
 bridge_forward(struct bridge_softc *sc, struct mbuf *m)
 {
-       struct bridge_iflist *bif;
+       struct bridge_iflist *bif, *nbif;
        struct ifnet *src_if, *dst_if, *ifp;
        struct ether_header *eh;
+       int priority;
 
        src_if = m->m_pkthdr.rcvif;
        ifp = sc->sc_ifp;
@@ -2080,12 +2180,19 @@ bridge_forward(struct bridge_softc *sc, struct mbuf *m)
 
        if (bif->bif_flags & IFBIF_STP) {
                switch (bif->bif_state) {
-               case BSTP_IFSTATE_L1BLOCKING:
                case BSTP_IFSTATE_BLOCKING:
+                       if ((sc->sc_ifp->if_flags & IFF_LINK2) &&
+                           (m->m_flags & (M_BCAST|M_MCAST)) == 0) {
+                               break;
+                       }
+                       /* fall through */
+               case BSTP_IFSTATE_L1BLOCKING:
                case BSTP_IFSTATE_LISTENING:
                case BSTP_IFSTATE_DISABLED:
                        m_freem(m);
                        return;
+               default:
+                       break;
                }
        }
 
@@ -2097,14 +2204,16 @@ bridge_forward(struct bridge_softc *sc, struct mbuf *m)
         * the address.
         */
        if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
+           bif->bif_state != BSTP_IFSTATE_BLOCKING &&
            ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
            (eh->ether_shost[0] == 0 &&
             eh->ether_shost[1] == 0 &&
             eh->ether_shost[2] == 0 &&
             eh->ether_shost[3] == 0 &&
             eh->ether_shost[4] == 0 &&
-            eh->ether_shost[5] == 0) == 0)
+            eh->ether_shost[5] == 0) == 0) {
                bridge_rtupdate(sc, eh->ether_shost, src_if, IFBAF_DYNAMIC);
+       }
 
        if ((bif->bif_flags & IFBIF_STP) != 0 &&
            bif->bif_state == BSTP_IFSTATE_LEARNING) {
@@ -2139,13 +2248,8 @@ bridge_forward(struct bridge_softc *sc, struct mbuf *m)
        }
 
        /*
-        * At this point, we're dealing with a unicast frame
-        * going to a different interface.
+        * Unicast, kinda replicates the output side of bridge_output().
         */
-       if ((dst_if->if_flags & IFF_RUNNING) == 0) {
-               m_freem(m);
-               return;
-       }
        bif = bridge_lookup_member_if(sc, dst_if);
        if (bif == NULL) {
                /* Not a member of the bridge (anymore?) */
@@ -2154,15 +2258,61 @@ bridge_forward(struct bridge_softc *sc, struct mbuf *m)
        }
 
        if (bif->bif_flags & IFBIF_STP) {
-               switch (bif->bif_state) {
-               case BSTP_IFSTATE_DISABLED:
-               case BSTP_IFSTATE_BLOCKING:
-               case BSTP_IFSTATE_L1BLOCKING:
-                       m_freem(m);
-                       return;
+               priority = 0;
+               TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid],
+                                    bif_next, nbif) {
+                       if (memcmp(IF_LLADDR(bif->bif_ifp),
+                                  IF_LLADDR(dst_if),
+                                  ETHER_ADDR_LEN) != 0) {
+                               continue;
+                       }
+
+                       switch(bif->bif_state) {
+                       case BSTP_IFSTATE_BLOCKING:
+                               if (sc->sc_ifp->if_flags & IFF_LINK2)
+                                       break;
+                               /* fall through */
+                       case BSTP_IFSTATE_L1BLOCKING:
+                       case BSTP_IFSTATE_LISTENING:
+                       case BSTP_IFSTATE_DISABLED:
+                               continue;
+                       default:
+                               break;
+                       }
+
+                       if ((bif->bif_ifp->if_flags & IFF_RUNNING) == 0)
+                               continue;
+
+                       /*
+                        * XXX we need to use the toepliz hash or
+                        *     something like that instead of
+                        *     round-robining.
+                        */
+                       if (sc->sc_ifp->if_flags & IFF_LINK2) {
+                               dst_if = bif->bif_ifp;
+                               TAILQ_REMOVE(&sc->sc_iflists[mycpuid],
+                                       bif, bif_next);
+                               TAILQ_INSERT_TAIL(
+                                       &sc->sc_iflists[mycpuid],
+                                       bif, bif_next);
+                               break;
+                       }
+                       if (bif->bif_priority > priority) {
+                               priority = bif->bif_priority;
+                               dst_if = bif->bif_ifp;
+                       }
                }
        }
 
+       /*
+        * At this point, we're dealing with a unicast frame
+        * going to a different interface.
+        */
+       if ((dst_if->if_flags & IFF_RUNNING) == 0) {
+               m_freem(m);
+               return;
+       }
+
        if (inet_pfil_hook.ph_hashooks > 0
 #ifdef INET6
            || inet6_pfil_hook.ph_hashooks > 0
@@ -2178,7 +2328,7 @@ bridge_forward(struct bridge_softc *sc, struct mbuf *m)
                if (m == NULL)
                        return;
        }
-       bridge_handoff(dst_if, m, 0);
+       bridge_handoff(sc, dst_if, m, 0);
 }
 
 /*
@@ -2239,11 +2389,62 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
         * In all cases if the packet is destined for us via our MAC
         * we must clear BRIDGE_MBUF_TAGGED to ensure that we don't
         * repeat the source MAC out the same interface.
+        *
+        * This first test against our bridge MAC is the fast-path.
+        *
+        * NOTE!  The bridge interface can serve as an endpoint for
+        *        communication but normally there are no IPs associated
+        *        with it so you cannot route through it.  Instead what
+        *        you do is point your default route *THROUGH* the bridge
+        *        to the actual default router for one of the bridged spaces.
+        *
+        *        Another possibility is to put all your IP specifications
+        *        on the bridge instead of on the individual interfaces.  If
+        *        you do this it should be possible to use the bridge as an
+        *        end point and route (rather than switch) through it using
+        *        the default route or ipfw forwarding rules.
         */
+
+       /*
+        * Acquire header
+        */
+       if (m->m_len < ETHER_HDR_LEN) {
+               m = m_pullup(m, ETHER_HDR_LEN);
+               if (m == NULL)
+                       goto out;
+       }
        eh = mtod(m, struct ether_header *);
        m->m_pkthdr.fw_flags |= BRIDGE_MBUF_TAGGED;
        bcopy(eh, &m->m_pkthdr.br.ether, sizeof(*eh));
 
+       if ((bridge_debug & 1) &&
+           (ntohs(eh->ether_type) == ETHERTYPE_ARP ||
+           ntohs(eh->ether_type) == ETHERTYPE_REVARP)) {
+               kprintf("%02x:%02x:%02x:%02x:%02x:%02x "
+                       "%02x:%02x:%02x:%02x:%02x:%02x type %04x "
+                       "lla %02x:%02x:%02x:%02x:%02x:%02x\n",
+                       eh->ether_dhost[0],
+                       eh->ether_dhost[1],
+                       eh->ether_dhost[2],
+                       eh->ether_dhost[3],
+                       eh->ether_dhost[4],
+                       eh->ether_dhost[5],
+                       eh->ether_shost[0],
+                       eh->ether_shost[1],
+                       eh->ether_shost[2],
+                       eh->ether_shost[3],
+                       eh->ether_shost[4],
+                       eh->ether_shost[5],
+                       eh->ether_type,
+                       ((u_char *)IF_LLADDR(bifp))[0],
+                       ((u_char *)IF_LLADDR(bifp))[1],
+                       ((u_char *)IF_LLADDR(bifp))[2],
+                       ((u_char *)IF_LLADDR(bifp))[3],
+                       ((u_char *)IF_LLADDR(bifp))[4],
+                       ((u_char *)IF_LLADDR(bifp))[5]
+               );
+       }
+
        if (memcmp(eh->ether_dhost, IF_LLADDR(bifp), ETHER_ADDR_LEN) == 0) {
                /*
                 * If the packet is for us, set the packets source as the
@@ -2283,7 +2484,9 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
                bridge_span(sc, m);
 
        if (m->m_flags & (M_BCAST | M_MCAST)) {
-               /* Tap off 802.1D packets; they do not get forwarded. */
+               /*
+                * Tap off 802.1D packets; they do not get forwarded.
+                */
                if (memcmp(eh->ether_dhost, bstp_etheraddr,
                            ETHER_ADDR_LEN) == 0) {
                        ifnet_serialize_all(bifp);
@@ -2295,6 +2498,10 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
                        goto out;
                }
 
+               /*
+                * Other than 802.11d packets, ignore packets if the
+                * interface is not in a good state.
+                */
                if (bif->bif_flags & IFBIF_STP) {
                        switch (bif->bif_state) {
                        case BSTP_IFSTATE_L1BLOCKING:
@@ -2344,20 +2551,41 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
                         * case the packet gets routed.
                         */
                        mc2->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
-                       ether_reinput_oncpu(bifp, mc2, REINPUT_KEEPRCVIF);
+                       ether_reinput_oncpu(bifp, mc2, 0);
                }
 
                /* Return the original packet for local processing. */
                goto out;
        }
 
+       /*
+        * Input of a unicast packet.  We have to allow unicast packets
+        * input from links in the BLOCKING state.
+        *
+        * NOTE: We explicitly ignore normal packets received on a link
+        *       in the BLOCKING state.  The point of being in that state
+        *       is to avoid getting duplicate packets.
+        *
+        *       HOWEVER, if LINK2 is set the normal spanning tree code
+        *       will mark an interface BLOCKING to avoid multi-cast/broadcast
+        *       loops.  Unicast packets CAN still loop if we allow the
+        *       case (hence we only do it in LINK2), but it isn't quite as
+        *       bad as a broadcast packet looping.
+        */
        if (bif->bif_flags & IFBIF_STP) {
                switch (bif->bif_state) {
-               case BSTP_IFSTATE_L1BLOCKING:
+#if 0
                case BSTP_IFSTATE_BLOCKING:
+                       if (sc->sc_ifp->if_flags & IFF_LINK2)
+                               break;
+                       /* fall through */
+#endif
+               case BSTP_IFSTATE_L1BLOCKING:
                case BSTP_IFSTATE_LISTENING:
                case BSTP_IFSTATE_DISABLED:
                        goto out;
+               default:
+                       break;
                }
        }
 
@@ -2367,34 +2595,38 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
         * This loop is MPSAFE; the only blocking operation (bridge_rtupdate)
         * is followed by breaking out of the loop.
         */
-       LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
+       TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
                if (bif->bif_ifp->if_type != IFT_ETHER)
                        continue;
 
                /*
-                * It is destined for us.  Reinput on the same interface
-                * it came in on so things like ARP responses get assigned
-                * to the correct member (the incoming interface) and not
-                * to the member which happens to have the matching dhost.
+                * It is destined for an interface linked to the bridge.
+                * We want the bridge itself to take care of link level
+                * forwarding to member interfaces so reinput on the bridge.
+                * i.e. if you ping an IP on a target interface associated
+                * with the bridge, the arp is-at response should indicate
+                * the bridge MAC.
                 */
                if (memcmp(IF_LLADDR(bif->bif_ifp), eh->ether_dhost,
-                   ETHER_ADDR_LEN) == 0) {
+                          ETHER_ADDR_LEN) == 0) {
                        if (bif->bif_ifp != ifp) {
                                /* XXX loop prevention */
                                m->m_flags |= M_ETHER_BRIDGED;
-                               new_ifp = bif->bif_ifp;
                        }
                        if (bif->bif_flags & IFBIF_LEARNING) {
                                bridge_rtupdate(sc, eh->ether_shost,
                                                ifp, IFBAF_DYNAMIC);
                        }
+                       new_ifp = bifp; /* not bif->bif_ifp */
                        m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
                        goto out;
                }
 
-               /* We just received a packet that we sent out. */
+               /*
+                * Ignore received packets that were sent by us.
+                */
                if (memcmp(IF_LLADDR(bif->bif_ifp), eh->ether_shost,
-                   ETHER_ADDR_LEN) == 0) {
+                          ETHER_ADDR_LEN) == 0) {
                        m_freem(m);
                        m = NULL;
                        goto out;
@@ -2406,8 +2638,9 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
        m = NULL;
 
        /*
-        * Leave m_pkthdr.rcvif alone, so ARP replies are
-        * processed as coming in on the correct interface.
+        * ether_reinput_oncpu() will reprocess rcvif as
+        * coming from new_ifp (since we do not specify
+        * REINPUT_KEEPRCVIF).
         */
 out:
        if (new_ifp != NULL) {
@@ -2415,8 +2648,7 @@ out:
                 * Clear the bridge flag for local processing in
                 * case the packet gets routed.
                 */
-               ether_reinput_oncpu(new_ifp, m,
-                                   REINPUT_KEEPRCVIF|REINPUT_RUNBPF);
+               ether_reinput_oncpu(new_ifp, m, REINPUT_RUNBPF);
                m = NULL;
        }
        return (m);
@@ -2445,7 +2677,7 @@ bridge_start_bcast(struct bridge_softc *sc, struct mbuf *m)
         * Following loop is MPSAFE; nothing is blocking
         * in the loop body.
         */
-       LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
+       TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
                dst_if = bif->bif_ifp;
 
                if (bif->bif_flags & IFBIF_STP) {
@@ -2464,7 +2696,7 @@ bridge_start_bcast(struct bridge_softc *sc, struct mbuf *m)
                if ((dst_if->if_flags & IFF_RUNNING) == 0)
                        continue;
 
-               if (LIST_NEXT(bif, bif_next) == NULL) {
+               if (TAILQ_NEXT(bif, bif_next) == NULL) {
                        mc = m;
                        used = 1;
                } else {
@@ -2502,10 +2734,7 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
        ASSERT_IFNET_NOT_SERIALIZED_ALL(bifp);
 
        eh = mtod(m, struct ether_header *);
-       if (memcmp(eh->ether_dhost, IF_LLADDR(src_if), ETHER_ADDR_LEN) == 0)
-               from_us = 1;
-       else
-               from_us = 0;
+       from_us = bridge_from_us(sc, eh);
 
        if (inet_pfil_hook.ph_hashooks > 0
 #ifdef INET6
@@ -2524,7 +2753,7 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
                        return;
        }
 
-       LIST_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid], bif_next, nbif) {
+       TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid], bif_next, nbif) {
                dst_if = bif->bif_ifp;
                if (dst_if == src_if)
                        continue;
@@ -2545,7 +2774,7 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
                if ((dst_if->if_flags & IFF_RUNNING) == 0)
                        continue;
 
-               if (LIST_NEXT(bif, bif_next) == NULL) {
+               if (TAILQ_NEXT(bif, bif_next) == NULL) {
                        mc = m;
                        used = 1;
                } else {
@@ -2571,11 +2800,11 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
                        if (mc == NULL)
                                continue;
                }
-               bridge_handoff(dst_if, mc, from_us);
+               bridge_handoff(sc, dst_if, mc, from_us);
 
                if (nbif != NULL && !nbif->bif_onlist) {
                        KKASSERT(bif->bif_onlist);
-                       nbif = LIST_NEXT(bif, bif_next);
+                       nbif = TAILQ_NEXT(bif, bif_next);
                }
        }
        if (used == 0)
@@ -2598,7 +2827,7 @@ bridge_span(struct bridge_softc *sc, struct mbuf *m)
        bifp = sc->sc_ifp;
        ifnet_serialize_all(bifp);
 
-       LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) {
+       TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
                dst_if = bif->bif_ifp;
 
                if ((dst_if->if_flags & IFF_RUNNING) == 0)
@@ -3214,7 +3443,8 @@ bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
 
        lbrt = LIST_FIRST(&sc->sc_rthashs[mycpuid][hash]);
        if (lbrt == NULL) {
-               LIST_INSERT_HEAD(&sc->sc_rthashs[mycpuid][hash], brt, brt_hash);
+               LIST_INSERT_HEAD(&sc->sc_rthashs[mycpuid][hash],
+                                 brt, brt_hash);
                goto out;
        }
 
@@ -3756,16 +3986,17 @@ bridge_enqueue_handler(netmsg_t msg)
        m = nmp->nm_packet;
        dst_ifp = nmp->base.lmsg.u.ms_resultp;
 
-       bridge_handoff(dst_ifp, m, 1);
+       bridge_handoff(dst_ifp->if_bridge, dst_ifp, m, 1);
 }
 
 static void
-bridge_handoff(struct ifnet *dst_ifp, struct mbuf *m, int from_us)
+bridge_handoff(struct bridge_softc *sc, struct ifnet *dst_ifp,
+              struct mbuf *m, int from_us)
 {
        struct mbuf *m0;
        struct ifnet *bifp;
 
-       bifp = ((struct bridge_softc *)dst_ifp->if_bridge)->sc_ifp;
+       bifp = sc->sc_ifp;
 
        /* We may be sending a fragment so traverse the mbuf */
        for (; m; m = m0) {
@@ -3776,9 +4007,10 @@ bridge_handoff(struct ifnet *dst_ifp, struct mbuf *m, int from_us)
 
                /*
                 * If being sent from our host override ether_shost
-                * so any replies go the correct interface.  This is
-                * mandatory or ARP replies will wind up on the wrong
-                * interface.
+                * with the bridge MAC.  This is mandatory for ARP
+                * so things don't get confused.  In particular we
+                * don't want ARPs to get associated with link interfaces
+                * under the bridge which might or might not stay valid.
                 *
                 * Also override ether_shost when relaying a packet out
                 * the same interface it came in on, due to multi-homed
@@ -3790,14 +4022,14 @@ bridge_handoff(struct ifnet *dst_ifp, struct mbuf *m, int from_us)
                if (from_us || m->m_pkthdr.rcvif == dst_ifp) {
                        m_copyback(m,
                                   offsetof(struct ether_header, ether_shost),
-                                  ETHER_ADDR_LEN, IF_LLADDR(dst_ifp));
+                                  ETHER_ADDR_LEN, IF_LLADDR(sc->sc_ifp));
                } else if ((bifp->if_flags & IFF_LINK0) &&
                           (m->m_pkthdr.fw_flags & BRIDGE_MBUF_TAGGED)) {
                        m_copyback(m,
                                   offsetof(struct ether_header, ether_shost),
                                   ETHER_ADDR_LEN,
                                   m->m_pkthdr.br.ether.ether_shost);
-               }
+               } /* else retain shost */
 
                if (ifq_is_enabled(&dst_ifp->if_snd))
                        altq_etherclassify(&dst_ifp->if_snd, m, &pktattr);
@@ -3855,11 +4087,15 @@ bridge_add_bif_handler(netmsg_t msg)
 
        bif = kmalloc(sizeof(*bif), M_DEVBUF, M_WAITOK | M_ZERO);
        bif->bif_ifp = amsg->br_bif_ifp;
-       bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
        bif->bif_onlist = 1;
        bif->bif_info = amsg->br_bif_info;
 
-       LIST_INSERT_HEAD(&sc->sc_iflists[mycpuid], bif, bif_next);
+       /*
+        * runs through bif_info
+        */
+       bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
+
+       TAILQ_INSERT_HEAD(&sc->sc_iflists[mycpuid], bif, bif_next);
 
        ifnet_forwardmsg(&amsg->base.lmsg, mycpuid + 1);
 }
@@ -3899,10 +4135,10 @@ bridge_del_bif_handler(netmsg_t msg)
 
        /* Remove the bif from the current CPU's iflist */
        bif->bif_onlist = 0;
-       LIST_REMOVE(bif, bif_next);
+       TAILQ_REMOVE(dmsg->br_bif_list, bif, bif_next);
 
        /* Save the removed bif for later freeing */
-       LIST_INSERT_HEAD(dmsg->br_bif_list, bif, bif_next);
+       TAILQ_INSERT_HEAD(dmsg->br_bif_list, bif, bif_next);
 
        ifnet_forwardmsg(&dmsg->base.lmsg, mycpuid + 1);
 }
@@ -3923,41 +4159,3 @@ bridge_del_bif(struct bridge_softc *sc, struct bridge_ifinfo *bif_info,
 
        ifnet_domsg(&dmsg.base.lmsg, 0);
 }
-
-static void
-bridge_set_bifflags_handler(netmsg_t msg)
-{
-       struct netmsg_brsflags *smsg = (struct netmsg_brsflags *)msg;
-       struct bridge_softc *sc;
-       struct bridge_iflist *bif;
-
-       sc = smsg->br_softc;
-
-       /*
-        * Locate the bif associated with the br_bif_info
-        * on the current CPU
-        */
-       bif = bridge_lookup_member_ifinfo(sc, smsg->br_bif_info);
-       KKASSERT(bif != NULL && bif->bif_onlist);
-
-       bif->bif_flags = smsg->br_bif_flags;
-
-       ifnet_forwardmsg(&smsg->base.lmsg, mycpuid + 1);
-}
-
-static void
-bridge_set_bifflags(struct bridge_softc *sc, struct bridge_ifinfo *bif_info,
-                   uint32_t bif_flags)
-{
-       struct netmsg_brsflags smsg;
-
-       ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
-
-       netmsg_init(&smsg.base, NULL, &curthread->td_msgport,
-                   0, bridge_set_bifflags_handler);
-       smsg.br_softc = sc;
-       smsg.br_bif_info = bif_info;
-       smsg.br_bif_flags = bif_flags;
-
-       ifnet_domsg(&smsg.base.lmsg, 0);
-}
index 0bb866f..f40f8b9 100644 (file)
@@ -120,11 +120,16 @@ struct ifbreq {
        uint8_t         ifbr_priority;          /* member if STP priority */
        uint8_t         ifbr_path_cost;         /* member if STP cost */
        uint8_t         ifbr_portno;            /* member if port number */
-       uint64_t        ifbr_designated_root;   /* current root id */
-       uint64_t        ifbr_designated_bridge; /* current bridge id */
-       uint32_t        ifbr_designated_cost;   /* current cost calc */
-       uint16_t        ifbr_designated_port;   /* current port calc */
+       uint64_t        ifbr_designated_root;   /* synthesized */
+       uint64_t        ifbr_designated_bridge;
+       uint32_t        ifbr_designated_cost;
+       uint16_t        ifbr_designated_port;
        uint16_t        unused01;
+       uint64_t        ifbr_peer_root;         /* from peer */
+       uint64_t        ifbr_peer_bridge;       /* from peer */
+       uint32_t        ifbr_peer_cost;         /* from peer */
+       uint16_t        ifbr_peer_port;         /* from peer */
+       uint16_t        unused02;
 };
 
 /* BRDGGIFFLAGS, BRDGSIFFLAGS */
@@ -132,8 +137,13 @@ struct ifbreq {
 #define        IFBIF_DISCOVER          0x02    /* if sends packets w/ unknown dest. */
 #define        IFBIF_STP               0x04    /* if participates in spanning tree */
 #define        IFBIF_SPAN              0x08    /* if is a span port */
+#define        IFBIF_DESIGNATED        0x10    /* mostly age timer expired */
+#define        IFBIF_ROOT              0x20    /* selected root or near-root */
 
-#define        IFBIFBITS       "\020\1LEARNING\2DISCOVER\3STP\4SPAN"
+#define IFBIF_KEEPMASK         (IFBIF_SPAN | IFBIF_DESIGNATED | IFBIF_ROOT)
+
+#define        IFBIFBITS       "\020\1LEARNING\2DISCOVER\3STP\4SPAN\5DESIGNATED" \
+                       "\6ROOT"
 
 /* BRDGFLUSH */
 #define        IFBF_FLUSHDYN           0x00    /* flush learned addresses only */
@@ -238,17 +248,19 @@ struct bstp_tcn_unit {
  * Bridge interface entry.
  */
 struct bridge_ifinfo {
-       uint64_t                bifi_designated_root;
-       uint64_t                bifi_designated_bridge;
+       uint64_t                bifi_peer_root;
+       uint64_t                bifi_peer_bridge;
+       uint32_t                bifi_peer_cost;
+       uint16_t                bifi_peer_port;
+       uint16_t                bifi_unused02;
+       uint16_t                bifi_port_id;
        uint32_t                bifi_path_cost;
-       uint32_t                bifi_designated_cost;
        struct bridge_timer     bifi_hold_timer;
        struct bridge_timer     bifi_message_age_timer;
        struct bridge_timer     bifi_forward_delay_timer;
        struct bridge_timer     bifi_link1_timer;
        struct bstp_config_unit bifi_config_bpdu;
-       uint16_t                bifi_port_id;
-       uint16_t                bifi_designated_port;
+       uint32_t                bifi_flags;     /* member if flags */
        uint8_t                 bifi_state;
        uint8_t                 bifi_topology_change_acknowledge;
        uint8_t                 bifi_config_pending;
@@ -258,35 +270,36 @@ struct bridge_ifinfo {
        int                     bifi_mutecap;   /* member muted caps */
 };
 
-#define bif_designated_root            bif_info->bifi_designated_root
-#define bif_designated_bridge          bif_info->bifi_designated_bridge
+#define bif_peer_root                  bif_info->bifi_peer_root
+#define bif_peer_bridge                        bif_info->bifi_peer_bridge
+#define bif_peer_cost                  bif_info->bifi_peer_cost
+#define bif_peer_port                  bif_info->bifi_peer_port
 #define bif_path_cost                  bif_info->bifi_path_cost
-#define bif_designated_cost            bif_info->bifi_designated_cost
 #define bif_hold_timer                 bif_info->bifi_hold_timer
 #define bif_message_age_timer          bif_info->bifi_message_age_timer
 #define bif_forward_delay_timer                bif_info->bifi_forward_delay_timer
 #define bif_link1_timer                        bif_info->bifi_link1_timer
 #define bif_config_bpdu                        bif_info->bifi_config_bpdu
 #define bif_port_id                    bif_info->bifi_port_id
-#define bif_designated_port            bif_info->bifi_designated_port
 #define bif_state                      bif_info->bifi_state
+#define bif_flags                      bif_info->bifi_flags
 #define bif_topology_change_acknowledge        \
        bif_info->bifi_topology_change_acknowledge
 #define bif_config_pending             bif_info->bifi_config_pending
 #define bif_change_detection_enabled   bif_info->bifi_change_detection_enabled
 #define bif_priority                   bif_info->bifi_priority
+#define bif_message_age_timer          bif_info->bifi_message_age_timer
 
 /*
  * Bridge interface list entry.
  */
 struct bridge_iflist {
-       LIST_ENTRY(bridge_iflist) bif_next;
+       TAILQ_ENTRY(bridge_iflist) bif_next;
        struct ifnet            *bif_ifp;       /* member if */
-       uint32_t                bif_flags;      /* member if flags */
        int                     bif_onlist;
        struct bridge_ifinfo    *bif_info;
 };
-LIST_HEAD(bridge_iflist_head, bridge_iflist);
+TAILQ_HEAD(bridge_iflist_head, bridge_iflist);
 
 /*
  * Bridge route info.
@@ -318,10 +331,13 @@ struct bridge_softc {
        struct arpcom           sc_arp;
        struct ifnet            *sc_ifp;        /* make this an interface */
        LIST_ENTRY(bridge_softc) sc_list;
-       uint64_t                sc_designated_root;
        uint64_t                sc_bridge_id;
-       struct bridge_ifinfo    *sc_root_port;
-       uint32_t                sc_root_path_cost;
+       uint64_t                sc_designated_root;
+       uint64_t                sc_designated_bridge;
+       uint32_t                sc_designated_cost;     /* root path cost */
+       uint16_t                sc_designated_port;
+       uint16_t                sc_unused01;
+       struct bridge_iflist    *sc_root_port;
        uint16_t                sc_max_age;
        uint16_t                sc_hello_time;
        uint16_t                sc_forward_delay;
@@ -368,6 +384,9 @@ void        bstp_stop(struct bridge_softc *);
 void   bstp_input(struct bridge_softc *, struct bridge_iflist *,
                   struct mbuf *);
 void   bstp_tick_handler(netmsg_t);
+int    bstp_supersedes_port_info(struct bridge_softc *,
+                  struct bridge_iflist *);
+
 
 void   bridge_enqueue(struct ifnet *, struct mbuf *);
 
index 41d93c3..5dcc9d7 100644 (file)
@@ -134,6 +134,7 @@ static void ether_restore_header(struct mbuf **, const struct ether_header *,
 struct mbuf *(*bridge_input_p)(struct ifnet *, struct mbuf *);
 int (*bridge_output_p)(struct ifnet *, struct mbuf *);
 void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
+struct ifnet *(*bridge_interface_p)(void *if_bridge);
 
 static int ether_resolvemulti(struct ifnet *, struct sockaddr **,
                              struct sockaddr *);
@@ -152,6 +153,7 @@ static boolean_t ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst,
 static int ether_ipfw;
 static u_int ether_restore_hdr;
 static u_int ether_prepend_hdr;
+static int ether_debug;
 
 #ifdef RSS_DEBUG
 static u_int ether_pktinfo_try;
@@ -162,6 +164,8 @@ static u_int ether_rss_nohash;
 
 SYSCTL_DECL(_net_link);
 SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet");
+SYSCTL_INT(_net_link_ether, OID_AUTO, debug, CTLFLAG_RW,
+          &ether_debug, 0, "Ether debug");
 SYSCTL_INT(_net_link_ether, OID_AUTO, ipfw, CTLFLAG_RW,
           &ether_ipfw, 0, "Pass ether pkts through firewall");
 SYSCTL_UINT(_net_link_ether, OID_AUTO, restore_hdr, CTLFLAG_RW,
@@ -427,6 +431,20 @@ bad:
 }
 
 /*
+ * Returns the bridge interface an ifp is associated
+ * with.
+ *
+ * Only call if ifp->if_bridge != NULL.
+ */
+struct ifnet *
+ether_bridge_interface(struct ifnet *ifp)
+{
+       if (bridge_interface_p)
+               return(bridge_interface_p(ifp->if_bridge));
+       return (ifp);
+}
+
+/*
  * Ethernet link layer output routine to send a raw frame to the device.
  *
  * This assumes that the 14 byte Ethernet header is present and contiguous
@@ -1081,8 +1099,35 @@ ether_demux_oncpu(struct ifnet *ifp, struct mbuf *m)
         */
        if (((ifp->if_flags & (IFF_PROMISC | IFF_PPROMISC)) == IFF_PROMISC) &&
            !ETHER_IS_MULTICAST(eh->ether_dhost) &&
-           bcmp(eh->ether_dhost, IFP2AC(ifp)->ac_enaddr, ETHER_ADDR_LEN))
-               discard = 1;
+           bcmp(eh->ether_dhost, IFP2AC(ifp)->ac_enaddr, ETHER_ADDR_LEN)) {
+               if (ether_debug & 1) {
+                       kprintf("%02x:%02x:%02x:%02x:%02x:%02x "
+                               "%02x:%02x:%02x:%02x:%02x:%02x "
+                               "%04x vs %02x:%02x:%02x:%02x:%02x:%02x\n",
+                               eh->ether_dhost[0],
+                               eh->ether_dhost[1],
+                               eh->ether_dhost[2],
+                               eh->ether_dhost[3],
+                               eh->ether_dhost[4],
+                               eh->ether_dhost[5],
+                               eh->ether_shost[0],
+                               eh->ether_shost[1],
+                               eh->ether_shost[2],
+                               eh->ether_shost[3],
+                               eh->ether_shost[4],
+                               eh->ether_shost[5],
+                               eh->ether_type,
+                               ((u_char *)IFP2AC(ifp)->ac_enaddr)[0],
+                               ((u_char *)IFP2AC(ifp)->ac_enaddr)[1],
+                               ((u_char *)IFP2AC(ifp)->ac_enaddr)[2],
+                               ((u_char *)IFP2AC(ifp)->ac_enaddr)[3],
+                               ((u_char *)IFP2AC(ifp)->ac_enaddr)[4],
+                               ((u_char *)IFP2AC(ifp)->ac_enaddr)[5]
+                       );
+               }
+               if ((ether_debug & 2) == 0)
+                       discard = 1;
+       }
 
 post_stats:
        if (IPFW_LOADED && ether_ipfw != 0 && !discard) {
index 976bb71..6cd1c82 100644 (file)
@@ -739,6 +739,7 @@ void        ether_input_chain_init(struct mbuf_chain *);
 void   ether_input_dispatch(struct mbuf_chain *);
 int    ether_output_frame(struct ifnet *, struct mbuf *);
 int    ether_ioctl(struct ifnet *, int, caddr_t);
+struct ifnet *ether_bridge_interface(struct ifnet *ifp);
 uint32_t       ether_crc32_le(const uint8_t *, size_t);
 uint32_t       ether_crc32_be(const uint8_t *, size_t);
 
index 016f5f2..35152ea 100644 (file)
@@ -66,6 +66,7 @@
 #include <net/ifq_var.h>
 #include <net/if_arp.h>
 #include <net/if_clone.h>
+#include <net/if_media.h>
 #include <net/route.h>
 #include <sys/devfs.h>
 
@@ -107,6 +108,7 @@ static void         tapifinit       (void *);
 static void            tapifstop(struct tap_softc *, int);
 static void            tapifflags(struct tap_softc *);
 
+
 /* character device */
 static d_open_t                tapopen;
 static d_clone_t       tapclone;
@@ -550,14 +552,16 @@ tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
 {
        struct tap_softc        *tp = (struct tap_softc *)(ifp->if_softc);
        struct ifstat           *ifs = NULL;
-       int                      dummy;
+       struct ifmediareq       *ifmr = NULL;
+       int                     error = 0;
+       int                     dummy;
 
        switch (cmd) {
                case SIOCSIFADDR:
                case SIOCGIFADDR:
                case SIOCSIFMTU:
-                       dummy = ether_ioctl(ifp, cmd, data);
-                       return (dummy);
+                       error = ether_ioctl(ifp, cmd, data);
+                       break;
 
                case SIOCSIFFLAGS:
                        tapifflags(tp);
@@ -567,6 +571,27 @@ tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
                case SIOCDELMULTI:
                        break;
 
+               case SIOCGIFMEDIA:
+                       /*
+                        * The bridge code needs this when running the
+                        * spanning tree protocol.
+                        */
+                       ifmr = (struct ifmediareq *)data;
+                       dummy = ifmr->ifm_count;
+                       ifmr->ifm_count = 1;
+                       ifmr->ifm_status = IFM_AVALID;
+                       ifmr->ifm_active = IFM_ETHER;
+                       if (tp->tap_flags & TAP_OPEN)
+                               ifmr->ifm_status |= IFM_ACTIVE;
+                       ifmr->ifm_current = ifmr->ifm_active;
+                       if (dummy >= 1) {
+                               int media = IFM_ETHER;
+                               error = copyout(&media,
+                                               ifmr->ifm_ulist,
+                                               sizeof(int));
+                       }
+                       break;
+
                case SIOCGIFSTATUS:
                        ifs = (struct ifstat *)data;
                        dummy = strlen(ifs->ascii);
@@ -586,10 +611,11 @@ tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
                        break;
 
                default:
-                       return (EINVAL);
+                       error = EINVAL;
+                       break;
        }
 
-       return (0);
+       return (error);
 }
 
 
index 5c9bdb0..10f2353 100644 (file)
@@ -141,6 +141,8 @@ static      LIST_HEAD(, llinfo_arp) llinfo_arp_list[MAXCPU];
 static int     arp_maxtries = 5;
 static int     useloopback = 1; /* use loopback interface for local traffic */
 static int     arp_proxyall = 0;
+static int     arp_refresh = 60; /* refresh arp cache ~60 (not impl yet) */
+static int     arp_restricted_match = 0;
 
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, maxtries, CTLFLAG_RW,
           &arp_maxtries, 0, "ARP resolution attempts before returning error");
@@ -148,6 +150,10 @@ SYSCTL_INT(_net_link_ether_inet, OID_AUTO, useloopback, CTLFLAG_RW,
           &useloopback, 0, "Use the loopback interface for local traffic");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_RW,
           &arp_proxyall, 0, "Enable proxy ARP for all suitable requests");
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, restricted_match, CTLFLAG_RW,
+          &arp_restricted_match, 0, "Only match against the sender");
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, refresh, CTLFLAG_RW,
+          &arp_refresh, 0, "Preemptively refresh the ARP");
 
 static void    arp_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
 static void    arprequest(struct ifnet *, const struct in_addr *,
@@ -405,6 +411,14 @@ arprequest(struct ifnet *ifp, const struct in_addr *sip,
 {
        struct mbuf *m;
 
+       if (enaddr == NULL) {
+               if (ifp->if_bridge) {
+                       enaddr = IF_LLADDR(ether_bridge_interface(ifp));
+               } else {
+                       enaddr = IF_LLADDR(ifp);
+               }
+       }
+
        m = arpreq_alloc(ifp, sip, tip, enaddr);
        if (m == NULL)
                return;
@@ -423,6 +437,13 @@ arprequest_async(struct ifnet *ifp, const struct in_addr *sip,
        struct mbuf *m;
        struct netmsg_packet *pmsg;
 
+       if (enaddr == NULL) {
+               if (ifp->if_bridge) {
+                       enaddr = IF_LLADDR(ether_bridge_interface(ifp));
+               } else {
+                       enaddr = IF_LLADDR(ifp);
+               }
+       }
        m = arpreq_alloc(ifp, sip, tip, enaddr);
        if (m == NULL)
                return;
@@ -499,7 +520,7 @@ arpresolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m,
                        arprequest(ifp,
                                   &SIN(rt->rt_ifa->ifa_addr)->sin_addr,
                                   &SIN(dst)->sin_addr,
-                                  IF_LLADDR(ifp));
+                                  NULL);
                        la->la_preempt--;
                }
 
@@ -533,7 +554,7 @@ arpresolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m,
                                arprequest(ifp,
                                           &SIN(rt->rt_ifa->ifa_addr)->sin_addr,
                                           &SIN(dst)->sin_addr,
-                                          IF_LLADDR(ifp));
+                                          NULL);
                        } else {
                                rt->rt_flags |= RTF_REJECT;
                                rt->rt_expire += arpt_down;
@@ -655,18 +676,37 @@ arp_update_oncpu(struct mbuf *m, in_addr_t saddr, boolean_t create,
        if (la && (rt = la->la_rt) && (sdl = SDL(rt->rt_gateway))) {
                struct in_addr isaddr = { saddr };
 
-               /* the following is not an error when doing bridging */
+               /*
+                * Normally arps coming in on the wrong interface are ignored,
+                * but if we are bridging and the two interfaces belong to
+                * the same bridge, or one is a member of the bridge which
+                * is the other, then it isn't an error.
+                */
                if (rt->rt_ifp != ifp) {
+                       /*
+                        * (1) ifp and rt_ifp both members of same bridge
+                        * (2) rt_ifp member of bridge ifp
+                        * (3) ifp member of bridge rt_ifp
+                        *
+                        * Always replace rt_ifp with the bridge ifc.
+                        */
+                       struct ifnet *nifp;
+
                        if (ifp->if_bridge &&
                            rt->rt_ifp->if_bridge == ifp->if_bridge) {
-                               rt->rt_ifp = ifp;
-                               sdl->sdl_type = ifp->if_type;
-                               sdl->sdl_index = ifp->if_index;
-                               if (dologging && log_arp_wrong_iface < 2)
-                                       dologging = 0;
+                               nifp = ether_bridge_interface(ifp);
+                       } else if (rt->rt_ifp->if_bridge &&
+                                  ether_bridge_interface(rt->rt_ifp) == ifp) {
+                               nifp = ifp;
+                       } else if (ifp->if_bridge &&
+                                  ether_bridge_interface(ifp) == rt->rt_ifp) {
+                               nifp = rt->rt_ifp;
+                       } else {
+                               nifp = NULL;
                        }
-                       if (dologging && log_arp_wrong_iface) {
 
+                       if ((log_arp_wrong_iface == 1 && nifp == NULL) ||
+                           log_arp_wrong_iface == 2) {
                                log(LOG_ERR,
                                    "arp: %s is on %s "
                                    "but got reply from %*D on %s\n",
@@ -675,7 +715,16 @@ arp_update_oncpu(struct mbuf *m, in_addr_t saddr, boolean_t create,
                                    ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
                                    ifp->if_xname);
                        }
-                       return;
+                       if (nifp == NULL)
+                               return;
+
+                       /*
+                        * nifp is our man!  Replace rt_ifp and adjust
+                        * the sdl.
+                        */
+                       ifp = rt->rt_ifp = nifp;
+                       sdl->sdl_type = ifp->if_type;
+                       sdl->sdl_index = ifp->if_index;
                }
                if (sdl->sdl_alen &&
                    bcmp(ar_sha(ah), LLADDR(sdl), sdl->sdl_alen)) {
@@ -722,8 +771,9 @@ arp_update_oncpu(struct mbuf *m, in_addr_t saddr, boolean_t create,
                        return;
                }
                memcpy(LLADDR(sdl), ar_sha(ah), sdl->sdl_alen = ah->ar_hln);
-               if (rt->rt_expire != 0)
+               if (rt->rt_expire != 0) {
                        rt->rt_expire = time_second + arpt_keep;
+               }
                rt->rt_flags &= ~RTF_REJECT;
                la->la_asked = 0;
                la->la_preempt = arp_maxtries;
@@ -832,8 +882,11 @@ in_arpinput(struct mbuf *m)
         * then accept the address.
         *
         * For a bridge, we accept the address if the receive interface and
-        * the interface owning the address are on the same bridge.
-        * (This will change slightly when we have clusters of interfaces).
+        * the interface owning the address are on the same bridge, and
+        * use the bridge MAC as the is-at response.  The bridge will be
+        * responsible for handling the packet.
+        *
+        * (1) Check target IP against our local IPs
         */
        LIST_FOREACH(iac, INADDR_HASH(itaddr.s_addr), ia_hash) {
                ia = iac->ia;
@@ -845,13 +898,27 @@ in_arpinput(struct mbuf *m)
                if (ia->ia_ifp->if_type == IFT_CARP)
                        continue;
 #endif
+               if (ifp->if_bridge && ia->ia_ifp &&
+                   ifp->if_bridge == ia->ia_ifp->if_bridge) {
+                       ifp = ether_bridge_interface(ifp);
+                       goto match;
+               }
+               if (ia->ia_ifp && ia->ia_ifp->if_bridge &&
+                   ether_bridge_interface(ia->ia_ifp) == ifp) {
+                       goto match;
+               }
+               if (ifp->if_bridge && ether_bridge_interface(ifp) ==
+                   ia->ia_ifp) {
+                       goto match;
+               }
                if (ia->ia_ifp == ifp)
                        goto match;
 
-               if (ifp->if_bridge && ia->ia_ifp && 
-                   ifp->if_bridge == ia->ia_ifp->if_bridge)
-                       goto match;
        }
+
+       /*
+        * (2) Check sender IP against our local IPs
+        */
        LIST_FOREACH(iac, INADDR_HASH(isaddr.s_addr), ia_hash) {
                ia = iac->ia;
 
@@ -862,13 +929,24 @@ in_arpinput(struct mbuf *m)
                if (ia->ia_ifp->if_type == IFT_CARP)
                        continue;
 #endif
-               if (ia->ia_ifp == ifp)
+               if (ifp->if_bridge && ia->ia_ifp &&
+                   ifp->if_bridge == ia->ia_ifp->if_bridge) {
+                       ifp = ether_bridge_interface(ifp);
                        goto match;
+               }
+               if (ia->ia_ifp && ia->ia_ifp->if_bridge &&
+                   ether_bridge_interface(ia->ia_ifp) == ifp) {
+                       goto match;
+               }
+               if (ifp->if_bridge && ether_bridge_interface(ifp) ==
+                   ia->ia_ifp) {
+                       goto match;
+               }
 
-               if (ifp->if_bridge && ia->ia_ifp &&
-                   ifp->if_bridge == ia->ia_ifp->if_bridge)
+               if (ia->ia_ifp == ifp)
                        goto match;
        }
+
        /*
         * No match, use the first inet address on the receive interface
         * as a dummy address for the rest of the function.
@@ -881,6 +959,7 @@ in_arpinput(struct mbuf *m)
                        goto match;
                }
        }
+
        /*
         * If we got here, we didn't find any suitable interface,
         * so drop the packet.
@@ -914,6 +993,17 @@ match:
        }
        if (ifp->if_flags & IFF_STATICARP)
                goto reply;
+
+       /*
+        * When arp_restricted_match is true and the ARP response is not
+        * specifically targetted to me, ignore it.  Otherwise the entry
+        * timeout may be updated for an old MAC.
+        */
+       if (arp_restricted_match && itaddr.s_addr != myaddr.s_addr) {
+               m_freem(m);
+               return;
+       }
+
 #ifdef SMP
        netmsg_init(&msg.base, NULL, &curthread->td_msgport,
                    0, arp_update_msghandler);
@@ -987,10 +1077,10 @@ reply:
        ah->ar_pro = htons(ETHERTYPE_IP); /* let's be sure! */
        switch (ifp->if_type) {
        case IFT_ETHER:
-       /*
-        * May not be correct for types not explictly
-        * listed, but it is our best guess.
-        */
+               /*
+                * May not be correct for types not explictly
+                * listed, but it is our best guess.
+                */
        default:
                eh = (struct ether_header *)sa.sa_data;
                memcpy(eh->ether_dhost, ar_tha(ah), sizeof eh->ether_dhost);
@@ -1107,7 +1197,7 @@ arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa)
 {
        if (IA_SIN(ifa)->sin_addr.s_addr != INADDR_ANY) {
                arprequest_async(ifp, &IA_SIN(ifa)->sin_addr,
-                                &IA_SIN(ifa)->sin_addr, IF_LLADDR(ifp));
+                                &IA_SIN(ifa)->sin_addr, NULL);
        }
        ifa->ifa_rtrequest = arp_rtrequest;
        ifa->ifa_flags |= RTF_CLONING;