kernel - Major bridging functionality added (bonding)
[dragonfly.git] / sys / net / bridge / if_bridge.c
1 /*
2  * Copyright 2001 Wasabi Systems, Inc.
3  * All rights reserved.
4  *
5  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *      This product includes software developed for the NetBSD Project by
18  *      Wasabi Systems, Inc.
19  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
20  *    or promote products derived from this software without specific prior
21  *    written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
27  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /*
37  * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
38  * All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. All advertising materials mentioning features or use of this software
49  *    must display the following acknowledgement:
50  *      This product includes software developed by Jason L. Wright
51  * 4. The name of the author may not be used to endorse or promote products
52  *    derived from this software without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
55  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
56  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
57  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
58  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
59  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
60  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
62  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
63  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
64  * POSSIBILITY OF SUCH DAMAGE.
65  *
66  * $OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp $
67  * $NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $
68  * $FreeBSD: src/sys/net/if_bridge.c,v 1.26 2005/10/13 23:05:55 thompsa Exp $
69  */
70
71 /*
72  * Network interface bridge support.
73  *
74  * TODO:
75  *
76  *      - Currently only supports Ethernet-like interfaces (Ethernet,
77  *        802.11, VLANs on Ethernet, etc.)  Figure out a nice way
78  *        to bridge other types of interfaces (FDDI-FDDI, and maybe
79  *        consider heterogenous bridges).
80  *
81  *
82  * Bridge's route information is duplicated to each CPUs:
83  *
84  *      CPU0          CPU1          CPU2          CPU3
85  * +-----------+ +-----------+ +-----------+ +-----------+
86  * |  rtnode   | |  rtnode   | |  rtnode   | |  rtnode   |
87  * |           | |           | |           | |           |
88  * | dst eaddr | | dst eaddr | | dst eaddr | | dst eaddr |
89  * +-----------+ +-----------+ +-----------+ +-----------+
90  *       |         |                     |         |
91  *       |         |                     |         |
92  *       |         |     +----------+    |         |
93  *       |         |     |  rtinfo  |    |         |
94  *       |         +---->|          |<---+         |
95  *       |               |  flags   |              |
96  *       +-------------->|  timeout |<-------------+
97  *                       |  dst_ifp |
98  *                       +----------+
99  *
100  * We choose to put timeout and dst_ifp into shared part, so updating
101  * them will be cheaper than using message forwarding.  Also there is
102  * not need to use spinlock to protect the updating: timeout and dst_ifp
103  * is not related and specific field's updating order has no importance.
104  * The cache pollution by the share part should not be heavy: in a stable
105  * setup, dst_ifp probably will be not changed in rtnode's life time,
106  * while timeout is refreshed once per second; most of the time, timeout
107  * and dst_ifp are read-only accessed.
108  *
109  *
110  * Bridge route information installation on bridge_input path:
111  *
112  *      CPU0           CPU1         CPU2          CPU3
113  *
114  *                               tcp_thread2
115  *                                    |
116  *                                alloc nmsg
117  *                    snd nmsg        |
118  *                    w/o rtinfo      |
119  *      ifnet0<-----------------------+
120  *        |                           :
121  *    lookup dst                      :
122  *   rtnode exists?(Y)free nmsg       :
123  *        |(N)                        :
124  *        |
125  *  alloc rtinfo
126  *  alloc rtnode
127  * install rtnode
128  *        |
129  *        +---------->ifnet1
130  *        : fwd nmsg    |
131  *        : w/ rtinfo   |
132  *        :             |
133  *        :             |
134  *                 alloc rtnode
135  *               (w/ nmsg's rtinfo)
136  *                install rtnode
137  *                      |
138  *                      +---------->ifnet2
139  *                      : fwd nmsg    |
140  *                      : w/ rtinfo   |
141  *                      :             |
142  *                      :         same as ifnet1
143  *                                    |
144  *                                    +---------->ifnet3
145  *                                    : fwd nmsg    |
146  *                                    : w/ rtinfo   |
147  *                                    :             |
148  *                                    :         same as ifnet1
149  *                                               free nmsg
150  *                                                  :
151  *                                                  :
152  *
153  * The netmsgs forwarded between protocol threads and ifnet threads are
154  * allocated with (M_WAITOK|M_NULLOK), so it will not fail under most
155  * cases (route information is too precious to be not installed :).
156  * Since multiple threads may try to install route information for the
157  * same dst eaddr, we look up route information in ifnet0.  However, this
158  * looking up only need to be performed on ifnet0, which is the start
159  * point of the route information installation process.
160  *
161  *
162  * Bridge route information deleting/flushing:
163  *
164  *  CPU0            CPU1             CPU2             CPU3
165  *
166  * netisr0
167  *   |
168  * find suitable rtnodes,
169  * mark their rtinfo dead
170  *   |
171  *   | domsg <------------------------------------------+
172  *   |                                                  | replymsg
173  *   |                                                  |
174  *   V     fwdmsg           fwdmsg           fwdmsg     |
175  * ifnet0 --------> ifnet1 --------> ifnet2 --------> ifnet3
176  * delete rtnodes   delete rtnodes   delete rtnodes   delete rtnodes
177  * w/ dead rtinfo   w/ dead rtinfo   w/ dead rtinfo   w/ dead rtinfo
178  *                                                    free dead rtinfos
179  *
180  * All deleting/flushing operations are serialized by netisr0, so each
181  * operation only reaps the route information marked dead by itself.
182  *
183  *
184  * Bridge route information adding/deleting/flushing:
185  * Since all operation is serialized by the fixed message flow between
186  * ifnet threads, it is not possible to create corrupted per-cpu route
187  * information.
188  *
189  *
190  *
191  * Percpu member interface list iteration with blocking operation:
192  * Since one bridge could only delete one member interface at a time and
193  * the deleted member interface is not freed after netmsg_service_sync(),
194  * following way is used to make sure that even if the certain member
195  * interface is ripped from the percpu list during the blocking operation,
196  * the iteration still could keep going:
197  *
198  * TAILQ_FOREACH_MUTABLE(bif, sc->sc_iflists[mycpuid], bif_next, nbif) {
199  *     blocking operation;
200  *     blocking operation;
201  *     ...
202  *     ...
203  *     if (nbif != NULL && !nbif->bif_onlist) {
204  *         KKASSERT(bif->bif_onlist);
205  *         nbif = TAILQ_NEXT(bif, bif_next);
206  *     }
207  * }
208  *
209  * As mentioned above only one member interface could be unlinked from the
210  * percpu member interface list, so either bif or nbif may be not on the list,
211  * but _not_ both.  To keep the list iteration, we don't care about bif, but
212  * only nbif.  Since removed member interface will only be freed after we
213  * finish our work, it is safe to access any field in an unlinked bif (here
214  * bif_onlist).  If nbif is no longer on the list, then bif must be on the
215  * list, so we change nbif to the next element of bif and keep going.
216  */
217
218 #include "opt_inet.h"
219 #include "opt_inet6.h"
220
221 #include <sys/param.h>
222 #include <sys/mbuf.h>
223 #include <sys/malloc.h>
224 #include <sys/protosw.h>
225 #include <sys/systm.h>
226 #include <sys/time.h>
227 #include <sys/socket.h> /* for net/if.h */
228 #include <sys/sockio.h>
229 #include <sys/ctype.h>  /* string functions */
230 #include <sys/kernel.h>
231 #include <sys/random.h>
232 #include <sys/sysctl.h>
233 #include <sys/module.h>
234 #include <sys/proc.h>
235 #include <sys/priv.h>
236 #include <sys/lock.h>
237 #include <sys/thread.h>
238 #include <sys/thread2.h>
239 #include <sys/mpipe.h>
240
241 #include <net/bpf.h>
242 #include <net/if.h>
243 #include <net/if_dl.h>
244 #include <net/if_types.h>
245 #include <net/if_var.h>
246 #include <net/pfil.h>
247 #include <net/ifq_var.h>
248 #include <net/if_clone.h>
249
250 #include <netinet/in.h> /* for struct arpcom */
251 #include <netinet/in_systm.h>
252 #include <netinet/in_var.h>
253 #include <netinet/ip.h>
254 #include <netinet/ip_var.h>
255 #ifdef INET6
256 #include <netinet/ip6.h>
257 #include <netinet6/ip6_var.h>
258 #endif
259 #include <netinet/if_ether.h> /* for struct arpcom */
260 #include <net/bridge/if_bridgevar.h>
261 #include <net/if_llc.h>
262 #include <net/netmsg2.h>
263
264 #include <net/route.h>
265 #include <sys/in_cksum.h>
266
267 /*
268  * Size of the route hash table.  Must be a power of two.
269  */
270 #ifndef BRIDGE_RTHASH_SIZE
271 #define BRIDGE_RTHASH_SIZE              1024
272 #endif
273
274 #define BRIDGE_RTHASH_MASK              (BRIDGE_RTHASH_SIZE - 1)
275
276 /*
277  * Maximum number of addresses to cache.
278  */
279 #ifndef BRIDGE_RTABLE_MAX
280 #define BRIDGE_RTABLE_MAX               100
281 #endif
282
283 /*
284  * Spanning tree defaults.
285  */
286 #define BSTP_DEFAULT_MAX_AGE            (20 * 256)
287 #define BSTP_DEFAULT_HELLO_TIME         (2 * 256)
288 #define BSTP_DEFAULT_FORWARD_DELAY      (15 * 256)
289 #define BSTP_DEFAULT_HOLD_TIME          (1 * 256)
290 #define BSTP_DEFAULT_BRIDGE_PRIORITY    0x8000
291 #define BSTP_DEFAULT_PORT_PRIORITY      0x80
292 #define BSTP_DEFAULT_PATH_COST          55
293
294 /*
295  * Timeout (in seconds) for entries learned dynamically.
296  */
297 #ifndef BRIDGE_RTABLE_TIMEOUT
298 #define BRIDGE_RTABLE_TIMEOUT           (20 * 60)       /* same as ARP */
299 #endif
300
301 /*
302  * Number of seconds between walks of the route list.
303  */
304 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
305 #define BRIDGE_RTABLE_PRUNE_PERIOD      (5 * 60)
306 #endif
307
308 /*
309  * List of capabilities to mask on the member interface.
310  */
311 #define BRIDGE_IFCAPS_MASK              IFCAP_TXCSUM
312
313 typedef int     (*bridge_ctl_t)(struct bridge_softc *, void *);
314
315 struct netmsg_brctl {
316         struct netmsg_base      base;
317         bridge_ctl_t            bc_func;
318         struct bridge_softc     *bc_sc;
319         void                    *bc_arg;
320 };
321
322 struct netmsg_brsaddr {
323         struct netmsg_base      base;
324         struct bridge_softc     *br_softc;
325         struct ifnet            *br_dst_if;
326         struct bridge_rtinfo    *br_rtinfo;
327         int                     br_setflags;
328         uint8_t                 br_dst[ETHER_ADDR_LEN];
329         uint8_t                 br_flags;
330 };
331
332 struct netmsg_braddbif {
333         struct netmsg_base      base;
334         struct bridge_softc     *br_softc;
335         struct bridge_ifinfo    *br_bif_info;
336         struct ifnet            *br_bif_ifp;
337 };
338
339 struct netmsg_brdelbif {
340         struct netmsg_base      base;
341         struct bridge_softc     *br_softc;
342         struct bridge_ifinfo    *br_bif_info;
343         struct bridge_iflist_head *br_bif_list;
344 };
345
346 struct netmsg_brsflags {
347         struct netmsg_base      base;
348         struct bridge_softc     *br_softc;
349         struct bridge_ifinfo    *br_bif_info;
350         uint32_t                br_bif_flags;
351 };
352
353 eventhandler_tag        bridge_detach_cookie = NULL;
354
355 extern  struct mbuf *(*bridge_input_p)(struct ifnet *, struct mbuf *);
356 extern  int (*bridge_output_p)(struct ifnet *, struct mbuf *);
357 extern  void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
358 extern  struct ifnet *(*bridge_interface_p)(void *if_bridge);
359
360 static int      bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
361
362 static int      bridge_clone_create(struct if_clone *, int, caddr_t);
363 static int      bridge_clone_destroy(struct ifnet *);
364
365 static int      bridge_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
366 static void     bridge_mutecaps(struct bridge_ifinfo *, struct ifnet *, int);
367 static void     bridge_ifdetach(void *, struct ifnet *);
368 static void     bridge_init(void *);
369 static int      bridge_from_us(struct bridge_softc *, struct ether_header *);
370 static void     bridge_stop(struct ifnet *);
371 static void     bridge_start(struct ifnet *);
372 static struct mbuf *bridge_input(struct ifnet *, struct mbuf *);
373 static int      bridge_output(struct ifnet *, struct mbuf *);
374 static struct ifnet *bridge_interface(void *if_bridge);
375
376 static void     bridge_forward(struct bridge_softc *, struct mbuf *m);
377
378 static void     bridge_timer_handler(netmsg_t);
379 static void     bridge_timer(void *);
380
381 static void     bridge_start_bcast(struct bridge_softc *, struct mbuf *);
382 static void     bridge_broadcast(struct bridge_softc *, struct ifnet *,
383                     struct mbuf *);
384 static void     bridge_span(struct bridge_softc *, struct mbuf *);
385
386 static int      bridge_rtupdate(struct bridge_softc *, const uint8_t *,
387                     struct ifnet *, uint8_t);
388 static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *);
389 static void     bridge_rtreap(struct bridge_softc *);
390 static void     bridge_rtreap_async(struct bridge_softc *);
391 static void     bridge_rttrim(struct bridge_softc *);
392 static int      bridge_rtage_finddead(struct bridge_softc *);
393 static void     bridge_rtage(struct bridge_softc *);
394 static void     bridge_rtflush(struct bridge_softc *, int);
395 static int      bridge_rtdaddr(struct bridge_softc *, const uint8_t *);
396 static int      bridge_rtsaddr(struct bridge_softc *, const uint8_t *,
397                     struct ifnet *, uint8_t);
398 static void     bridge_rtmsg_sync(struct bridge_softc *sc);
399 static void     bridge_rtreap_handler(netmsg_t);
400 static void     bridge_rtinstall_handler(netmsg_t);
401 static int      bridge_rtinstall_oncpu(struct bridge_softc *, const uint8_t *,
402                     struct ifnet *, int, uint8_t, struct bridge_rtinfo **);
403
404 static void     bridge_rtable_init(struct bridge_softc *);
405 static void     bridge_rtable_fini(struct bridge_softc *);
406
407 static int      bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
408 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
409                     const uint8_t *);
410 static void     bridge_rtnode_insert(struct bridge_softc *,
411                     struct bridge_rtnode *);
412 static void     bridge_rtnode_destroy(struct bridge_softc *,
413                     struct bridge_rtnode *);
414
415 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
416                     const char *name);
417 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
418                     struct ifnet *ifp);
419 static struct bridge_iflist *bridge_lookup_member_ifinfo(struct bridge_softc *,
420                     struct bridge_ifinfo *);
421 static void     bridge_delete_member(struct bridge_softc *,
422                     struct bridge_iflist *, int);
423 static void     bridge_delete_span(struct bridge_softc *,
424                     struct bridge_iflist *);
425
426 static int      bridge_control(struct bridge_softc *, u_long,
427                                bridge_ctl_t, void *);
428 static int      bridge_ioctl_init(struct bridge_softc *, void *);
429 static int      bridge_ioctl_stop(struct bridge_softc *, void *);
430 static int      bridge_ioctl_add(struct bridge_softc *, void *);
431 static int      bridge_ioctl_del(struct bridge_softc *, void *);
432 static void     bridge_ioctl_fillflags(struct bridge_softc *sc,
433                                 struct bridge_iflist *bif, struct ifbreq *req);
434 static int      bridge_ioctl_gifflags(struct bridge_softc *, void *);
435 static int      bridge_ioctl_sifflags(struct bridge_softc *, void *);
436 static int      bridge_ioctl_scache(struct bridge_softc *, void *);
437 static int      bridge_ioctl_gcache(struct bridge_softc *, void *);
438 static int      bridge_ioctl_gifs(struct bridge_softc *, void *);
439 static int      bridge_ioctl_rts(struct bridge_softc *, void *);
440 static int      bridge_ioctl_saddr(struct bridge_softc *, void *);
441 static int      bridge_ioctl_sto(struct bridge_softc *, void *);
442 static int      bridge_ioctl_gto(struct bridge_softc *, void *);
443 static int      bridge_ioctl_daddr(struct bridge_softc *, void *);
444 static int      bridge_ioctl_flush(struct bridge_softc *, void *);
445 static int      bridge_ioctl_gpri(struct bridge_softc *, void *);
446 static int      bridge_ioctl_spri(struct bridge_softc *, void *);
447 static int      bridge_ioctl_ght(struct bridge_softc *, void *);
448 static int      bridge_ioctl_sht(struct bridge_softc *, void *);
449 static int      bridge_ioctl_gfd(struct bridge_softc *, void *);
450 static int      bridge_ioctl_sfd(struct bridge_softc *, void *);
451 static int      bridge_ioctl_gma(struct bridge_softc *, void *);
452 static int      bridge_ioctl_sma(struct bridge_softc *, void *);
453 static int      bridge_ioctl_sifprio(struct bridge_softc *, void *);
454 static int      bridge_ioctl_sifcost(struct bridge_softc *, void *);
455 static int      bridge_ioctl_addspan(struct bridge_softc *, void *);
456 static int      bridge_ioctl_delspan(struct bridge_softc *, void *);
457 static int      bridge_ioctl_sifbondwght(struct bridge_softc *, void *);
458 static int      bridge_pfil(struct mbuf **, struct ifnet *, struct ifnet *,
459                     int);
460 static int      bridge_ip_checkbasic(struct mbuf **mp);
461 #ifdef INET6
462 static int      bridge_ip6_checkbasic(struct mbuf **mp);
463 #endif /* INET6 */
464 static int      bridge_fragment(struct ifnet *, struct mbuf *,
465                     struct ether_header *, int, struct llc *);
466 static void     bridge_enqueue_handler(netmsg_t);
467 static void     bridge_handoff(struct bridge_softc *, struct ifnet *,
468                     struct mbuf *, int);
469
470 static void     bridge_del_bif_handler(netmsg_t);
471 static void     bridge_add_bif_handler(netmsg_t);
472 static void     bridge_del_bif(struct bridge_softc *, struct bridge_ifinfo *,
473                     struct bridge_iflist_head *);
474 static void     bridge_add_bif(struct bridge_softc *, struct bridge_ifinfo *,
475                     struct ifnet *);
476
477 SYSCTL_DECL(_net_link);
478 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW, 0, "Bridge");
479
480 static int pfil_onlyip = 1; /* only pass IP[46] packets when pfil is enabled */
481 static int pfil_bridge = 1; /* run pfil hooks on the bridge interface */
482 static int pfil_member = 1; /* run pfil hooks on the member interface */
483 static int bridge_debug;
484 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip, CTLFLAG_RW,
485     &pfil_onlyip, 0, "Only pass IP packets when pfil is enabled");
486 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge, CTLFLAG_RW,
487     &pfil_bridge, 0, "Packet filter on the bridge interface");
488 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member, CTLFLAG_RW,
489     &pfil_member, 0, "Packet filter on the member interface");
490 SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW,
491     &bridge_debug, 0, "Bridge debug mode");
492
493 struct bridge_control_arg {
494         union {
495                 struct ifbreq ifbreq;
496                 struct ifbifconf ifbifconf;
497                 struct ifbareq ifbareq;
498                 struct ifbaconf ifbaconf;
499                 struct ifbrparam ifbrparam;
500         } bca_u;
501         int     bca_len;
502         void    *bca_uptr;
503         void    *bca_kptr;
504 };
505
506 struct bridge_control {
507         bridge_ctl_t    bc_func;
508         int             bc_argsize;
509         int             bc_flags;
510 };
511
512 #define BC_F_COPYIN             0x01    /* copy arguments in */
513 #define BC_F_COPYOUT            0x02    /* copy arguments out */
514 #define BC_F_SUSER              0x04    /* do super-user check */
515
516 const struct bridge_control bridge_control_table[] = {
517         { bridge_ioctl_add,             sizeof(struct ifbreq),
518           BC_F_COPYIN|BC_F_SUSER },
519         { bridge_ioctl_del,             sizeof(struct ifbreq),
520           BC_F_COPYIN|BC_F_SUSER },
521
522         { bridge_ioctl_gifflags,        sizeof(struct ifbreq),
523           BC_F_COPYIN|BC_F_COPYOUT },
524         { bridge_ioctl_sifflags,        sizeof(struct ifbreq),
525           BC_F_COPYIN|BC_F_SUSER },
526
527         { bridge_ioctl_scache,          sizeof(struct ifbrparam),
528           BC_F_COPYIN|BC_F_SUSER },
529         { bridge_ioctl_gcache,          sizeof(struct ifbrparam),
530           BC_F_COPYOUT },
531
532         { bridge_ioctl_gifs,            sizeof(struct ifbifconf),
533           BC_F_COPYIN|BC_F_COPYOUT },
534         { bridge_ioctl_rts,             sizeof(struct ifbaconf),
535           BC_F_COPYIN|BC_F_COPYOUT },
536
537         { bridge_ioctl_saddr,           sizeof(struct ifbareq),
538           BC_F_COPYIN|BC_F_SUSER },
539
540         { bridge_ioctl_sto,             sizeof(struct ifbrparam),
541           BC_F_COPYIN|BC_F_SUSER },
542         { bridge_ioctl_gto,             sizeof(struct ifbrparam),
543           BC_F_COPYOUT },
544
545         { bridge_ioctl_daddr,           sizeof(struct ifbareq),
546           BC_F_COPYIN|BC_F_SUSER },
547
548         { bridge_ioctl_flush,           sizeof(struct ifbreq),
549           BC_F_COPYIN|BC_F_SUSER },
550
551         { bridge_ioctl_gpri,            sizeof(struct ifbrparam),
552           BC_F_COPYOUT },
553         { bridge_ioctl_spri,            sizeof(struct ifbrparam),
554           BC_F_COPYIN|BC_F_SUSER },
555
556         { bridge_ioctl_ght,             sizeof(struct ifbrparam),
557           BC_F_COPYOUT },
558         { bridge_ioctl_sht,             sizeof(struct ifbrparam),
559           BC_F_COPYIN|BC_F_SUSER },
560
561         { bridge_ioctl_gfd,             sizeof(struct ifbrparam),
562           BC_F_COPYOUT },
563         { bridge_ioctl_sfd,             sizeof(struct ifbrparam),
564           BC_F_COPYIN|BC_F_SUSER },
565
566         { bridge_ioctl_gma,             sizeof(struct ifbrparam),
567           BC_F_COPYOUT },
568         { bridge_ioctl_sma,             sizeof(struct ifbrparam),
569           BC_F_COPYIN|BC_F_SUSER },
570
571         { bridge_ioctl_sifprio,         sizeof(struct ifbreq),
572           BC_F_COPYIN|BC_F_SUSER },
573
574         { bridge_ioctl_sifcost,         sizeof(struct ifbreq),
575           BC_F_COPYIN|BC_F_SUSER },
576
577         { bridge_ioctl_addspan,         sizeof(struct ifbreq),
578           BC_F_COPYIN|BC_F_SUSER },
579         { bridge_ioctl_delspan,         sizeof(struct ifbreq),
580           BC_F_COPYIN|BC_F_SUSER },
581
582         { bridge_ioctl_sifbondwght,     sizeof(struct ifbreq),
583           BC_F_COPYIN|BC_F_SUSER },
584
585 };
586 static const int bridge_control_table_size = NELEM(bridge_control_table);
587
588 LIST_HEAD(, bridge_softc) bridge_list;
589
590 struct if_clone bridge_cloner = IF_CLONE_INITIALIZER("bridge",
591                                 bridge_clone_create,
592                                 bridge_clone_destroy, 0, IF_MAXUNIT);
593
594 static int
595 bridge_modevent(module_t mod, int type, void *data)
596 {
597         switch (type) {
598         case MOD_LOAD:
599                 LIST_INIT(&bridge_list);
600                 if_clone_attach(&bridge_cloner);
601                 bridge_input_p = bridge_input;
602                 bridge_output_p = bridge_output;
603                 bridge_interface_p = bridge_interface;
604                 bridge_detach_cookie = EVENTHANDLER_REGISTER(
605                     ifnet_detach_event, bridge_ifdetach, NULL,
606                     EVENTHANDLER_PRI_ANY);
607 #if notyet
608                 bstp_linkstate_p = bstp_linkstate;
609 #endif
610                 break;
611         case MOD_UNLOAD:
612                 if (!LIST_EMPTY(&bridge_list))
613                         return (EBUSY);
614                 EVENTHANDLER_DEREGISTER(ifnet_detach_event,
615                     bridge_detach_cookie);
616                 if_clone_detach(&bridge_cloner);
617                 bridge_input_p = NULL;
618                 bridge_output_p = NULL;
619                 bridge_interface_p = NULL;
620 #if notyet
621                 bstp_linkstate_p = NULL;
622 #endif
623                 break;
624         default:
625                 return (EOPNOTSUPP);
626         }
627         return (0);
628 }
629
630 static moduledata_t bridge_mod = {
631         "if_bridge",
632         bridge_modevent,
633         0
634 };
635
636 DECLARE_MODULE(if_bridge, bridge_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
637
638
639 /*
640  * bridge_clone_create:
641  *
642  *      Create a new bridge instance.
643  */
644 static int
645 bridge_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused)
646 {
647         struct bridge_softc *sc;
648         struct ifnet *ifp;
649         u_char eaddr[6];
650         int cpu, rnd;
651
652         sc = kmalloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
653         ifp = sc->sc_ifp = &sc->sc_if;
654
655         sc->sc_brtmax = BRIDGE_RTABLE_MAX;
656         sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
657         sc->sc_bridge_max_age = BSTP_DEFAULT_MAX_AGE;
658         sc->sc_bridge_hello_time = BSTP_DEFAULT_HELLO_TIME;
659         sc->sc_bridge_forward_delay = BSTP_DEFAULT_FORWARD_DELAY;
660         sc->sc_bridge_priority = BSTP_DEFAULT_BRIDGE_PRIORITY;
661         sc->sc_hold_time = BSTP_DEFAULT_HOLD_TIME;
662
663         /* Initialize our routing table. */
664         bridge_rtable_init(sc);
665
666         callout_init(&sc->sc_brcallout);
667         netmsg_init(&sc->sc_brtimemsg, NULL, &netisr_adone_rport,
668                     MSGF_DROPABLE, bridge_timer_handler);
669         sc->sc_brtimemsg.lmsg.u.ms_resultp = sc;
670
671         callout_init(&sc->sc_bstpcallout);
672         netmsg_init(&sc->sc_bstptimemsg, NULL, &netisr_adone_rport,
673                     MSGF_DROPABLE, bstp_tick_handler);
674         sc->sc_bstptimemsg.lmsg.u.ms_resultp = sc;
675
676         /* Initialize per-cpu member iface lists */
677         sc->sc_iflists = kmalloc(sizeof(*sc->sc_iflists) * ncpus,
678                                  M_DEVBUF, M_WAITOK);
679         for (cpu = 0; cpu < ncpus; ++cpu)
680                 TAILQ_INIT(&sc->sc_iflists[cpu]);
681
682         TAILQ_INIT(&sc->sc_spanlist);
683
684         ifp->if_softc = sc;
685         if_initname(ifp, ifc->ifc_name, unit);
686         ifp->if_mtu = ETHERMTU;
687         ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST;
688         ifp->if_ioctl = bridge_ioctl;
689         ifp->if_start = bridge_start;
690         ifp->if_init = bridge_init;
691         ifp->if_type = IFT_ETHER;
692         ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
693         ifq_set_ready(&ifp->if_snd);
694         ifp->if_hdrlen = ETHER_HDR_LEN;
695
696         /*
697          * Generate a random ethernet address and use the private AC:DE:48
698          * OUI code.
699          */
700         rnd = karc4random();
701         bcopy(&rnd, &eaddr[0], 4); /* ETHER_ADDR_LEN == 6 */
702         rnd = karc4random();
703         bcopy(&rnd, &eaddr[2], 4); /* ETHER_ADDR_LEN == 6 */
704
705         eaddr[0] &= ~1; /* clear multicast bit */
706         eaddr[0] |= 2;  /* set the LAA bit */
707
708         ether_ifattach(ifp, eaddr, NULL);
709         /* Now undo some of the damage... */
710         ifp->if_baudrate = 0;
711         /*ifp->if_type = IFT_BRIDGE;*/
712
713         crit_enter();   /* XXX MP */
714         LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
715         crit_exit();
716
717         return (0);
718 }
719
720 static void
721 bridge_delete_dispatch(netmsg_t msg)
722 {
723         struct bridge_softc *sc = msg->lmsg.u.ms_resultp;
724         struct ifnet *bifp = sc->sc_ifp;
725         struct bridge_iflist *bif;
726
727         ifnet_serialize_all(bifp);
728
729         while ((bif = TAILQ_FIRST(&sc->sc_iflists[mycpuid])) != NULL)
730                 bridge_delete_member(sc, bif, 0);
731
732         while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL)
733                 bridge_delete_span(sc, bif);
734
735         ifnet_deserialize_all(bifp);
736
737         lwkt_replymsg(&msg->lmsg, 0);
738 }
739
740 /*
741  * bridge_clone_destroy:
742  *
743  *      Destroy a bridge instance.
744  */
745 static int
746 bridge_clone_destroy(struct ifnet *ifp)
747 {
748         struct bridge_softc *sc = ifp->if_softc;
749         struct netmsg_base msg;
750
751         ifnet_serialize_all(ifp);
752
753         bridge_stop(ifp);
754         ifp->if_flags &= ~IFF_UP;
755
756         ifnet_deserialize_all(ifp);
757
758         netmsg_init(&msg, NULL, &curthread->td_msgport,
759                     0, bridge_delete_dispatch);
760         msg.lmsg.u.ms_resultp = sc;
761         lwkt_domsg(BRIDGE_CFGPORT, &msg.lmsg, 0);
762
763         crit_enter();   /* XXX MP */
764         LIST_REMOVE(sc, sc_list);
765         crit_exit();
766
767         ether_ifdetach(ifp);
768
769         /* Tear down the routing table. */
770         bridge_rtable_fini(sc);
771
772         /* Free per-cpu member iface lists */
773         kfree(sc->sc_iflists, M_DEVBUF);
774
775         kfree(sc, M_DEVBUF);
776
777         return 0;
778 }
779
780 /*
781  * bridge_ioctl:
782  *
783  *      Handle a control request from the operator.
784  */
785 static int
786 bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
787 {
788         struct bridge_softc *sc = ifp->if_softc;
789         struct bridge_control_arg args;
790         struct ifdrv *ifd = (struct ifdrv *) data;
791         const struct bridge_control *bc;
792         int error = 0;
793
794         ASSERT_IFNET_SERIALIZED_ALL(ifp);
795
796         switch (cmd) {
797         case SIOCADDMULTI:
798         case SIOCDELMULTI:
799                 break;
800
801         case SIOCGDRVSPEC:
802         case SIOCSDRVSPEC:
803                 if (ifd->ifd_cmd >= bridge_control_table_size) {
804                         error = EINVAL;
805                         break;
806                 }
807                 bc = &bridge_control_table[ifd->ifd_cmd];
808
809                 if (cmd == SIOCGDRVSPEC &&
810                     (bc->bc_flags & BC_F_COPYOUT) == 0) {
811                         error = EINVAL;
812                         break;
813                 } else if (cmd == SIOCSDRVSPEC &&
814                            (bc->bc_flags & BC_F_COPYOUT)) {
815                         error = EINVAL;
816                         break;
817                 }
818
819                 if (bc->bc_flags & BC_F_SUSER) {
820                         error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY);
821                         if (error)
822                                 break;
823                 }
824
825                 if (ifd->ifd_len != bc->bc_argsize ||
826                     ifd->ifd_len > sizeof(args.bca_u)) {
827                         error = EINVAL;
828                         break;
829                 }
830
831                 memset(&args, 0, sizeof(args));
832                 if (bc->bc_flags & BC_F_COPYIN) {
833                         error = copyin(ifd->ifd_data, &args.bca_u,
834                                        ifd->ifd_len);
835                         if (error)
836                                 break;
837                 }
838
839                 error = bridge_control(sc, cmd, bc->bc_func, &args);
840                 if (error) {
841                         KKASSERT(args.bca_len == 0 && args.bca_kptr == NULL);
842                         break;
843                 }
844
845                 if (bc->bc_flags & BC_F_COPYOUT) {
846                         error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
847                         if (args.bca_len != 0) {
848                                 KKASSERT(args.bca_kptr != NULL);
849                                 if (!error) {
850                                         error = copyout(args.bca_kptr,
851                                                 args.bca_uptr, args.bca_len);
852                                 }
853                                 kfree(args.bca_kptr, M_TEMP);
854                         } else {
855                                 KKASSERT(args.bca_kptr == NULL);
856                         }
857                 } else {
858                         KKASSERT(args.bca_len == 0 && args.bca_kptr == NULL);
859                 }
860                 break;
861
862         case SIOCSIFFLAGS:
863                 if (!(ifp->if_flags & IFF_UP) &&
864                     (ifp->if_flags & IFF_RUNNING)) {
865                         /*
866                          * If interface is marked down and it is running,
867                          * then stop it.
868                          */
869                         bridge_stop(ifp);
870                 } else if ((ifp->if_flags & IFF_UP) &&
871                     !(ifp->if_flags & IFF_RUNNING)) {
872                         /*
873                          * If interface is marked up and it is stopped, then
874                          * start it.
875                          */
876                         ifp->if_init(sc);
877                 }
878                 break;
879
880         case SIOCSIFMTU:
881                 /* Do not allow the MTU to be changed on the bridge */
882                 error = EINVAL;
883                 break;
884
885         default:
886                 error = ether_ioctl(ifp, cmd, data);
887                 break;
888         }
889         return (error);
890 }
891
892 /*
893  * bridge_mutecaps:
894  *
895  *      Clear or restore unwanted capabilities on the member interface
896  */
897 static void
898 bridge_mutecaps(struct bridge_ifinfo *bif_info, struct ifnet *ifp, int mute)
899 {
900         struct ifreq ifr;
901         int error;
902
903         if (ifp->if_ioctl == NULL)
904                 return;
905
906         bzero(&ifr, sizeof(ifr));
907         ifr.ifr_reqcap = ifp->if_capenable;
908
909         if (mute) {
910                 /* mask off and save capabilities */
911                 bif_info->bifi_mutecap = ifr.ifr_reqcap & BRIDGE_IFCAPS_MASK;
912                 if (bif_info->bifi_mutecap != 0)
913                         ifr.ifr_reqcap &= ~BRIDGE_IFCAPS_MASK;
914         } else {
915                 /* restore muted capabilities */
916                 ifr.ifr_reqcap |= bif_info->bifi_mutecap;
917         }
918
919         if (bif_info->bifi_mutecap != 0) {
920                 ifnet_serialize_all(ifp);
921                 error = ifp->if_ioctl(ifp, SIOCSIFCAP, (caddr_t)&ifr, NULL);
922                 ifnet_deserialize_all(ifp);
923         }
924 }
925
926 /*
927  * bridge_lookup_member:
928  *
929  *      Lookup a bridge member interface.
930  */
931 static struct bridge_iflist *
932 bridge_lookup_member(struct bridge_softc *sc, const char *name)
933 {
934         struct bridge_iflist *bif;
935
936         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
937                 if (strcmp(bif->bif_ifp->if_xname, name) == 0)
938                         return (bif);
939         }
940         return (NULL);
941 }
942
943 /*
944  * bridge_lookup_member_if:
945  *
946  *      Lookup a bridge member interface by ifnet*.
947  */
948 static struct bridge_iflist *
949 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
950 {
951         struct bridge_iflist *bif;
952
953         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
954                 if (bif->bif_ifp == member_ifp)
955                         return (bif);
956         }
957         return (NULL);
958 }
959
960 /*
961  * bridge_lookup_member_ifinfo:
962  *
963  *      Lookup a bridge member interface by bridge_ifinfo.
964  */
965 static struct bridge_iflist *
966 bridge_lookup_member_ifinfo(struct bridge_softc *sc,
967                             struct bridge_ifinfo *bif_info)
968 {
969         struct bridge_iflist *bif;
970
971         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
972                 if (bif->bif_info == bif_info)
973                         return (bif);
974         }
975         return (NULL);
976 }
977
978 /*
979  * bridge_delete_member:
980  *
981  *      Delete the specified member interface.
982  */
983 static void
984 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
985     int gone)
986 {
987         struct ifnet *ifs = bif->bif_ifp;
988         struct ifnet *bifp = sc->sc_ifp;
989         struct bridge_ifinfo *bif_info = bif->bif_info;
990         struct bridge_iflist_head saved_bifs;
991
992         ASSERT_IFNET_SERIALIZED_ALL(bifp);
993         KKASSERT(bif_info != NULL);
994
995         ifs->if_bridge = NULL;
996
997         /*
998          * Release bridge interface's serializer:
999          * - To avoid possible dead lock.
1000          * - Various sync operation will block the current thread.
1001          */
1002         ifnet_deserialize_all(bifp);
1003
1004         if (!gone) {
1005                 switch (ifs->if_type) {
1006                 case IFT_ETHER:
1007                 case IFT_L2VLAN:
1008                         /*
1009                          * Take the interface out of promiscuous mode.
1010                          */
1011                         ifpromisc(ifs, 0);
1012                         bridge_mutecaps(bif_info, ifs, 0);
1013                         break;
1014
1015                 case IFT_GIF:
1016                         break;
1017
1018                 default:
1019                         panic("bridge_delete_member: impossible");
1020                         break;
1021                 }
1022         }
1023
1024         /*
1025          * Remove bifs from percpu linked list.
1026          *
1027          * Removed bifs are not freed immediately, instead,
1028          * they are saved in saved_bifs.  They will be freed
1029          * after we make sure that no one is accessing them,
1030          * i.e. after following netmsg_service_sync()
1031          */
1032         TAILQ_INIT(&saved_bifs);
1033         bridge_del_bif(sc, bif_info, &saved_bifs);
1034
1035         /*
1036          * Make sure that all protocol threads:
1037          * o  see 'ifs' if_bridge is changed
1038          * o  know that bif is removed from the percpu linked list
1039          */
1040         netmsg_service_sync();
1041
1042         /*
1043          * Free the removed bifs
1044          */
1045         KKASSERT(!TAILQ_EMPTY(&saved_bifs));
1046         while ((bif = TAILQ_FIRST(&saved_bifs)) != NULL) {
1047                 TAILQ_REMOVE(&saved_bifs, bif, bif_next);
1048                 kfree(bif, M_DEVBUF);
1049         }
1050
1051         /* See the comment in bridge_ioctl_stop() */
1052         bridge_rtmsg_sync(sc);
1053         bridge_rtdelete(sc, ifs, IFBF_FLUSHALL | IFBF_FLUSHSYNC);
1054
1055         ifnet_serialize_all(bifp);
1056
1057         if (bifp->if_flags & IFF_RUNNING)
1058                 bstp_initialization(sc);
1059
1060         /*
1061          * Free the bif_info after bstp_initialization(), so that
1062          * bridge_softc.sc_root_port will not reference a dangling
1063          * pointer.
1064          */
1065         kfree(bif_info, M_DEVBUF);
1066 }
1067
1068 /*
1069  * bridge_delete_span:
1070  *
1071  *      Delete the specified span interface.
1072  */
1073 static void
1074 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
1075 {
1076         KASSERT(bif->bif_ifp->if_bridge == NULL,
1077             ("%s: not a span interface", __func__));
1078
1079         TAILQ_REMOVE(&sc->sc_iflists[mycpuid], bif, bif_next);
1080         kfree(bif, M_DEVBUF);
1081 }
1082
1083 static int
1084 bridge_ioctl_init(struct bridge_softc *sc, void *arg __unused)
1085 {
1086         struct ifnet *ifp = sc->sc_ifp;
1087
1088         if (ifp->if_flags & IFF_RUNNING)
1089                 return 0;
1090
1091         callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz,
1092             bridge_timer, sc);
1093
1094         ifp->if_flags |= IFF_RUNNING;
1095         bstp_initialization(sc);
1096         return 0;
1097 }
1098
1099 static int
1100 bridge_ioctl_stop(struct bridge_softc *sc, void *arg __unused)
1101 {
1102         struct ifnet *ifp = sc->sc_ifp;
1103         struct lwkt_msg *lmsg;
1104
1105         if ((ifp->if_flags & IFF_RUNNING) == 0)
1106                 return 0;
1107
1108         callout_stop(&sc->sc_brcallout);
1109
1110         crit_enter();
1111         lmsg = &sc->sc_brtimemsg.lmsg;
1112         if ((lmsg->ms_flags & MSGF_DONE) == 0) {
1113                 /* Pending to be processed; drop it */
1114                 lwkt_dropmsg(lmsg);
1115         }
1116         crit_exit();
1117
1118         bstp_stop(sc);
1119
1120         ifp->if_flags &= ~IFF_RUNNING;
1121
1122         ifnet_deserialize_all(ifp);
1123
1124         /* Let everyone know that we are stopped */
1125         netmsg_service_sync();
1126
1127         /*
1128          * Sync ifnetX msgports in the order we forward rtnode
1129          * installation message.  This is used to make sure that
1130          * all rtnode installation messages sent by bridge_rtupdate()
1131          * during above netmsg_service_sync() are flushed.
1132          */
1133         bridge_rtmsg_sync(sc);
1134         bridge_rtflush(sc, IFBF_FLUSHDYN | IFBF_FLUSHSYNC);
1135
1136         ifnet_serialize_all(ifp);
1137         return 0;
1138 }
1139
1140 static int
1141 bridge_ioctl_add(struct bridge_softc *sc, void *arg)
1142 {
1143         struct ifbreq *req = arg;
1144         struct bridge_iflist *bif;
1145         struct bridge_ifinfo *bif_info;
1146         struct ifnet *ifs, *bifp;
1147         int error = 0;
1148
1149         bifp = sc->sc_ifp;
1150         ASSERT_IFNET_SERIALIZED_ALL(bifp);
1151
1152         ifs = ifunit(req->ifbr_ifsname);
1153         if (ifs == NULL)
1154                 return (ENOENT);
1155
1156         /* If it's in the span list, it can't be a member. */
1157         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1158                 if (ifs == bif->bif_ifp)
1159                         return (EBUSY);
1160
1161         /* Allow the first Ethernet member to define the MTU */
1162         if (ifs->if_type != IFT_GIF) {
1163                 if (TAILQ_EMPTY(&sc->sc_iflists[mycpuid])) {
1164                         bifp->if_mtu = ifs->if_mtu;
1165                 } else if (bifp->if_mtu != ifs->if_mtu) {
1166                         if_printf(bifp, "invalid MTU for %s\n", ifs->if_xname);
1167                         return (EINVAL);
1168                 }
1169         }
1170
1171         if (ifs->if_bridge == sc)
1172                 return (EEXIST);
1173
1174         if (ifs->if_bridge != NULL)
1175                 return (EBUSY);
1176
1177         bif_info = kmalloc(sizeof(*bif_info), M_DEVBUF, M_WAITOK | M_ZERO);
1178         bif_info->bifi_priority = BSTP_DEFAULT_PORT_PRIORITY;
1179         bif_info->bifi_path_cost = BSTP_DEFAULT_PATH_COST;
1180         bif_info->bifi_ifp = ifs;
1181         bif_info->bifi_bond_weight = 1;
1182
1183         /*
1184          * Release bridge interface's serializer:
1185          * - To avoid possible dead lock.
1186          * - Various sync operation will block the current thread.
1187          */
1188         ifnet_deserialize_all(bifp);
1189
1190         switch (ifs->if_type) {
1191         case IFT_ETHER:
1192         case IFT_L2VLAN:
1193                 /*
1194                  * Place the interface into promiscuous mode.
1195                  */
1196                 error = ifpromisc(ifs, 1);
1197                 if (error) {
1198                         ifnet_serialize_all(bifp);
1199                         goto out;
1200                 }
1201                 bridge_mutecaps(bif_info, ifs, 1);
1202                 break;
1203
1204         case IFT_GIF: /* :^) */
1205                 break;
1206
1207         default:
1208                 error = EINVAL;
1209                 ifnet_serialize_all(bifp);
1210                 goto out;
1211         }
1212
1213         /*
1214          * Add bifs to percpu linked lists
1215          */
1216         bridge_add_bif(sc, bif_info, ifs);
1217
1218         ifnet_serialize_all(bifp);
1219
1220         if (bifp->if_flags & IFF_RUNNING)
1221                 bstp_initialization(sc);
1222         else
1223                 bstp_stop(sc);
1224
1225         /*
1226          * Everything has been setup, so let the member interface
1227          * deliver packets to this bridge on its input/output path.
1228          */
1229         ifs->if_bridge = sc;
1230 out:
1231         if (error) {
1232                 if (bif_info != NULL)
1233                         kfree(bif_info, M_DEVBUF);
1234         }
1235         return (error);
1236 }
1237
1238 static int
1239 bridge_ioctl_del(struct bridge_softc *sc, void *arg)
1240 {
1241         struct ifbreq *req = arg;
1242         struct bridge_iflist *bif;
1243
1244         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1245         if (bif == NULL)
1246                 return (ENOENT);
1247
1248         bridge_delete_member(sc, bif, 0);
1249
1250         return (0);
1251 }
1252
1253 static int
1254 bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
1255 {
1256         struct ifbreq *req = arg;
1257         struct bridge_iflist *bif;
1258
1259         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1260         if (bif == NULL)
1261                 return (ENOENT);
1262         bridge_ioctl_fillflags(sc, bif, req);
1263         return (0);
1264 }
1265
1266 static void
1267 bridge_ioctl_fillflags(struct bridge_softc *sc, struct bridge_iflist *bif,
1268                        struct ifbreq *req)
1269 {
1270         req->ifbr_ifsflags = bif->bif_flags;
1271         req->ifbr_state = bif->bif_state;
1272         req->ifbr_priority = bif->bif_priority;
1273         req->ifbr_path_cost = bif->bif_path_cost;
1274         req->ifbr_bond_weight = bif->bif_bond_weight;
1275         req->ifbr_portno = bif->bif_ifp->if_index & 0xff;
1276         if (bif->bif_flags & IFBIF_STP) {
1277                 req->ifbr_peer_root = bif->bif_peer_root;
1278                 req->ifbr_peer_bridge = bif->bif_peer_bridge;
1279                 req->ifbr_peer_cost = bif->bif_peer_cost;
1280                 req->ifbr_peer_port = bif->bif_peer_port;
1281                 if (bstp_supersedes_port_info(sc, bif)) {
1282                         req->ifbr_designated_root = bif->bif_peer_root;
1283                         req->ifbr_designated_bridge = bif->bif_peer_bridge;
1284                         req->ifbr_designated_cost = bif->bif_peer_cost;
1285                         req->ifbr_designated_port = bif->bif_peer_port;
1286                 } else {
1287                         req->ifbr_designated_root = sc->sc_bridge_id;
1288                         req->ifbr_designated_bridge = sc->sc_bridge_id;
1289                         req->ifbr_designated_cost = bif->bif_path_cost +
1290                                                     bif->bif_peer_cost;
1291                         req->ifbr_designated_port = bif->bif_port_id;
1292                 }
1293         } else {
1294                 req->ifbr_peer_root = 0;
1295                 req->ifbr_peer_bridge = 0;
1296                 req->ifbr_peer_cost = 0;
1297                 req->ifbr_peer_port = 0;
1298                 req->ifbr_designated_root = 0;
1299                 req->ifbr_designated_bridge = 0;
1300                 req->ifbr_designated_cost = 0;
1301                 req->ifbr_designated_port = 0;
1302         }
1303 }
1304
1305 static int
1306 bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
1307 {
1308         struct ifbreq *req = arg;
1309         struct bridge_iflist *bif;
1310         struct ifnet *bifp = sc->sc_ifp;
1311
1312         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1313         if (bif == NULL)
1314                 return (ENOENT);
1315
1316         if (req->ifbr_ifsflags & IFBIF_SPAN) {
1317                 /* SPAN is readonly */
1318                 return (EINVAL);
1319         }
1320
1321         if (req->ifbr_ifsflags & IFBIF_STP) {
1322                 switch (bif->bif_ifp->if_type) {
1323                 case IFT_ETHER:
1324                         /* These can do spanning tree. */
1325                         break;
1326
1327                 default:
1328                         /* Nothing else can. */
1329                         return (EINVAL);
1330                 }
1331         }
1332
1333         bif->bif_flags = (bif->bif_flags & IFBIF_KEEPMASK) |
1334                          (req->ifbr_ifsflags & ~IFBIF_KEEPMASK);
1335         if (bifp->if_flags & IFF_RUNNING)
1336                 bstp_initialization(sc);
1337
1338         return (0);
1339 }
1340
1341 static int
1342 bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
1343 {
1344         struct ifbrparam *param = arg;
1345         struct ifnet *ifp = sc->sc_ifp;
1346
1347         sc->sc_brtmax = param->ifbrp_csize;
1348
1349         ifnet_deserialize_all(ifp);
1350         bridge_rttrim(sc);
1351         ifnet_serialize_all(ifp);
1352
1353         return (0);
1354 }
1355
1356 static int
1357 bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
1358 {
1359         struct ifbrparam *param = arg;
1360
1361         param->ifbrp_csize = sc->sc_brtmax;
1362
1363         return (0);
1364 }
1365
1366 static int
1367 bridge_ioctl_gifs(struct bridge_softc *sc, void *arg)
1368 {
1369         struct bridge_control_arg *bc_arg = arg;
1370         struct ifbifconf *bifc = arg;
1371         struct bridge_iflist *bif;
1372         struct ifbreq *breq;
1373         int count, len;
1374
1375         count = 0;
1376         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next)
1377                 count++;
1378         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1379                 count++;
1380
1381         if (bifc->ifbic_len == 0) {
1382                 bifc->ifbic_len = sizeof(*breq) * count;
1383                 return 0;
1384         } else if (count == 0 || bifc->ifbic_len < sizeof(*breq)) {
1385                 bifc->ifbic_len = 0;
1386                 return 0;
1387         }
1388
1389         len = min(bifc->ifbic_len, sizeof(*breq) * count);
1390         KKASSERT(len >= sizeof(*breq));
1391
1392         breq = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO);
1393         if (breq == NULL) {
1394                 bifc->ifbic_len = 0;
1395                 return ENOMEM;
1396         }
1397         bc_arg->bca_kptr = breq;
1398
1399         count = 0;
1400         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
1401                 if (len < sizeof(*breq))
1402                         break;
1403
1404                 strlcpy(breq->ifbr_ifsname, bif->bif_ifp->if_xname,
1405                         sizeof(breq->ifbr_ifsname));
1406                 bridge_ioctl_fillflags(sc, bif, breq);
1407                 breq++;
1408                 count++;
1409                 len -= sizeof(*breq);
1410         }
1411         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
1412                 if (len < sizeof(*breq))
1413                         break;
1414
1415                 strlcpy(breq->ifbr_ifsname, bif->bif_ifp->if_xname,
1416                         sizeof(breq->ifbr_ifsname));
1417                 breq->ifbr_ifsflags = bif->bif_flags;
1418                 breq->ifbr_portno = bif->bif_ifp->if_index & 0xff;
1419                 breq++;
1420                 count++;
1421                 len -= sizeof(*breq);
1422         }
1423
1424         bifc->ifbic_len = sizeof(*breq) * count;
1425         KKASSERT(bifc->ifbic_len > 0);
1426
1427         bc_arg->bca_len = bifc->ifbic_len;
1428         bc_arg->bca_uptr = bifc->ifbic_req;
1429         return 0;
1430 }
1431
1432 static int
1433 bridge_ioctl_rts(struct bridge_softc *sc, void *arg)
1434 {
1435         struct bridge_control_arg *bc_arg = arg;
1436         struct ifbaconf *bac = arg;
1437         struct bridge_rtnode *brt;
1438         struct ifbareq *bareq;
1439         int count, len;
1440
1441         count = 0;
1442         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list)
1443                 count++;
1444
1445         if (bac->ifbac_len == 0) {
1446                 bac->ifbac_len = sizeof(*bareq) * count;
1447                 return 0;
1448         } else if (count == 0 || bac->ifbac_len < sizeof(*bareq)) {
1449                 bac->ifbac_len = 0;
1450                 return 0;
1451         }
1452
1453         len = min(bac->ifbac_len, sizeof(*bareq) * count);
1454         KKASSERT(len >= sizeof(*bareq));
1455
1456         bareq = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO);
1457         if (bareq == NULL) {
1458                 bac->ifbac_len = 0;
1459                 return ENOMEM;
1460         }
1461         bc_arg->bca_kptr = bareq;
1462
1463         count = 0;
1464         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
1465                 struct bridge_rtinfo *bri = brt->brt_info;
1466                 unsigned long expire;
1467
1468                 if (len < sizeof(*bareq))
1469                         break;
1470
1471                 strlcpy(bareq->ifba_ifsname, bri->bri_ifp->if_xname,
1472                         sizeof(bareq->ifba_ifsname));
1473                 memcpy(bareq->ifba_dst, brt->brt_addr, sizeof(brt->brt_addr));
1474                 expire = bri->bri_expire;
1475                 if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
1476                     time_second < expire)
1477                         bareq->ifba_expire = expire - time_second;
1478                 else
1479                         bareq->ifba_expire = 0;
1480                 bareq->ifba_flags = bri->bri_flags;
1481                 bareq++;
1482                 count++;
1483                 len -= sizeof(*bareq);
1484         }
1485
1486         bac->ifbac_len = sizeof(*bareq) * count;
1487         KKASSERT(bac->ifbac_len > 0);
1488
1489         bc_arg->bca_len = bac->ifbac_len;
1490         bc_arg->bca_uptr = bac->ifbac_req;
1491         return 0;
1492 }
1493
1494 static int
1495 bridge_ioctl_saddr(struct bridge_softc *sc, void *arg)
1496 {
1497         struct ifbareq *req = arg;
1498         struct bridge_iflist *bif;
1499         struct ifnet *ifp = sc->sc_ifp;
1500         int error;
1501
1502         ASSERT_IFNET_SERIALIZED_ALL(ifp);
1503
1504         bif = bridge_lookup_member(sc, req->ifba_ifsname);
1505         if (bif == NULL)
1506                 return (ENOENT);
1507
1508         ifnet_deserialize_all(ifp);
1509         error = bridge_rtsaddr(sc, req->ifba_dst, bif->bif_ifp,
1510                                req->ifba_flags);
1511         ifnet_serialize_all(ifp);
1512         return (error);
1513 }
1514
1515 static int
1516 bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
1517 {
1518         struct ifbrparam *param = arg;
1519
1520         sc->sc_brttimeout = param->ifbrp_ctime;
1521
1522         return (0);
1523 }
1524
1525 static int
1526 bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
1527 {
1528         struct ifbrparam *param = arg;
1529
1530         param->ifbrp_ctime = sc->sc_brttimeout;
1531
1532         return (0);
1533 }
1534
1535 static int
1536 bridge_ioctl_daddr(struct bridge_softc *sc, void *arg)
1537 {
1538         struct ifbareq *req = arg;
1539         struct ifnet *ifp = sc->sc_ifp;
1540         int error;
1541
1542         ifnet_deserialize_all(ifp);
1543         error = bridge_rtdaddr(sc, req->ifba_dst);
1544         ifnet_serialize_all(ifp);
1545         return error;
1546 }
1547
1548 static int
1549 bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
1550 {
1551         struct ifbreq *req = arg;
1552         struct ifnet *ifp = sc->sc_ifp;
1553
1554         ifnet_deserialize_all(ifp);
1555         bridge_rtflush(sc, req->ifbr_ifsflags | IFBF_FLUSHSYNC);
1556         ifnet_serialize_all(ifp);
1557
1558         return (0);
1559 }
1560
1561 static int
1562 bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
1563 {
1564         struct ifbrparam *param = arg;
1565
1566         param->ifbrp_prio = sc->sc_bridge_priority;
1567
1568         return (0);
1569 }
1570
1571 static int
1572 bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
1573 {
1574         struct ifbrparam *param = arg;
1575
1576         sc->sc_bridge_priority = param->ifbrp_prio;
1577
1578         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1579                 bstp_initialization(sc);
1580
1581         return (0);
1582 }
1583
1584 static int
1585 bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
1586 {
1587         struct ifbrparam *param = arg;
1588
1589         param->ifbrp_hellotime = sc->sc_bridge_hello_time >> 8;
1590
1591         return (0);
1592 }
1593
1594 static int
1595 bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
1596 {
1597         struct ifbrparam *param = arg;
1598
1599         if (param->ifbrp_hellotime == 0)
1600                 return (EINVAL);
1601         sc->sc_bridge_hello_time = param->ifbrp_hellotime << 8;
1602
1603         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1604                 bstp_initialization(sc);
1605
1606         return (0);
1607 }
1608
1609 static int
1610 bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
1611 {
1612         struct ifbrparam *param = arg;
1613
1614         param->ifbrp_fwddelay = sc->sc_bridge_forward_delay >> 8;
1615
1616         return (0);
1617 }
1618
1619 static int
1620 bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
1621 {
1622         struct ifbrparam *param = arg;
1623
1624         if (param->ifbrp_fwddelay == 0)
1625                 return (EINVAL);
1626         sc->sc_bridge_forward_delay = param->ifbrp_fwddelay << 8;
1627
1628         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1629                 bstp_initialization(sc);
1630
1631         return (0);
1632 }
1633
1634 static int
1635 bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
1636 {
1637         struct ifbrparam *param = arg;
1638
1639         param->ifbrp_maxage = sc->sc_bridge_max_age >> 8;
1640
1641         return (0);
1642 }
1643
1644 static int
1645 bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
1646 {
1647         struct ifbrparam *param = arg;
1648
1649         if (param->ifbrp_maxage == 0)
1650                 return (EINVAL);
1651         sc->sc_bridge_max_age = param->ifbrp_maxage << 8;
1652
1653         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1654                 bstp_initialization(sc);
1655
1656         return (0);
1657 }
1658
1659 static int
1660 bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
1661 {
1662         struct ifbreq *req = arg;
1663         struct bridge_iflist *bif;
1664
1665         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1666         if (bif == NULL)
1667                 return (ENOENT);
1668
1669         bif->bif_priority = req->ifbr_priority;
1670
1671         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1672                 bstp_initialization(sc);
1673
1674         return (0);
1675 }
1676
1677 static int
1678 bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
1679 {
1680         struct ifbreq *req = arg;
1681         struct bridge_iflist *bif;
1682
1683         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1684         if (bif == NULL)
1685                 return (ENOENT);
1686
1687         bif->bif_path_cost = req->ifbr_path_cost;
1688
1689         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1690                 bstp_initialization(sc);
1691
1692         return (0);
1693 }
1694
1695 static int
1696 bridge_ioctl_sifbondwght(struct bridge_softc *sc, void *arg)
1697 {
1698         struct ifbreq *req = arg;
1699         struct bridge_iflist *bif;
1700
1701         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1702         if (bif == NULL)
1703                 return (ENOENT);
1704
1705         bif->bif_bond_weight = req->ifbr_bond_weight;
1706
1707         /* no reinit needed */
1708
1709         return (0);
1710 }
1711
1712 static int
1713 bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
1714 {
1715         struct ifbreq *req = arg;
1716         struct bridge_iflist *bif;
1717         struct ifnet *ifs;
1718         struct bridge_ifinfo *bif_info;
1719
1720         ifs = ifunit(req->ifbr_ifsname);
1721         if (ifs == NULL)
1722                 return (ENOENT);
1723
1724         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1725                 if (ifs == bif->bif_ifp)
1726                         return (EBUSY);
1727
1728         if (ifs->if_bridge != NULL)
1729                 return (EBUSY);
1730
1731         switch (ifs->if_type) {
1732         case IFT_ETHER:
1733         case IFT_GIF:
1734         case IFT_L2VLAN:
1735                 break;
1736
1737         default:
1738                 return (EINVAL);
1739         }
1740
1741         /*
1742          * bif_info is needed for bif_flags
1743          */
1744         bif_info = kmalloc(sizeof(*bif_info), M_DEVBUF, M_WAITOK | M_ZERO);
1745         bif_info->bifi_ifp = ifs;
1746
1747         bif = kmalloc(sizeof(*bif), M_DEVBUF, M_WAITOK | M_ZERO);
1748         bif->bif_ifp = ifs;
1749         bif->bif_info = bif_info;
1750         bif->bif_flags = IFBIF_SPAN;
1751         /* NOTE: span bif does not need bridge_ifinfo */
1752
1753         TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
1754
1755         sc->sc_span = 1;
1756
1757         return (0);
1758 }
1759
1760 static int
1761 bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
1762 {
1763         struct ifbreq *req = arg;
1764         struct bridge_iflist *bif;
1765         struct ifnet *ifs;
1766
1767         ifs = ifunit(req->ifbr_ifsname);
1768         if (ifs == NULL)
1769                 return (ENOENT);
1770
1771         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1772                 if (ifs == bif->bif_ifp)
1773                         break;
1774
1775         if (bif == NULL)
1776                 return (ENOENT);
1777
1778         bridge_delete_span(sc, bif);
1779
1780         if (TAILQ_EMPTY(&sc->sc_spanlist))
1781                 sc->sc_span = 0;
1782
1783         return (0);
1784 }
1785
1786 static void
1787 bridge_ifdetach_dispatch(netmsg_t msg)
1788 {
1789         struct ifnet *ifp, *bifp;
1790         struct bridge_softc *sc;
1791         struct bridge_iflist *bif;
1792
1793         ifp = msg->lmsg.u.ms_resultp;
1794         sc = ifp->if_bridge;
1795
1796         /* Check if the interface is a bridge member */
1797         if (sc != NULL) {
1798                 bifp = sc->sc_ifp;
1799
1800                 ifnet_serialize_all(bifp);
1801
1802                 bif = bridge_lookup_member_if(sc, ifp);
1803                 if (bif != NULL) {
1804                         bridge_delete_member(sc, bif, 1);
1805                 } else {
1806                         /* XXX Why bif will be NULL? */
1807                 }
1808
1809                 ifnet_deserialize_all(bifp);
1810                 goto reply;
1811         }
1812
1813         crit_enter();   /* XXX MP */
1814
1815         /* Check if the interface is a span port */
1816         LIST_FOREACH(sc, &bridge_list, sc_list) {
1817                 bifp = sc->sc_ifp;
1818
1819                 ifnet_serialize_all(bifp);
1820
1821                 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1822                         if (ifp == bif->bif_ifp) {
1823                                 bridge_delete_span(sc, bif);
1824                                 break;
1825                         }
1826
1827                 ifnet_deserialize_all(bifp);
1828         }
1829
1830         crit_exit();
1831
1832 reply:
1833         lwkt_replymsg(&msg->lmsg, 0);
1834 }
1835
1836 /*
1837  * bridge_ifdetach:
1838  *
1839  *      Detach an interface from a bridge.  Called when a member
1840  *      interface is detaching.
1841  */
1842 static void
1843 bridge_ifdetach(void *arg __unused, struct ifnet *ifp)
1844 {
1845         struct netmsg_base msg;
1846
1847         netmsg_init(&msg, NULL, &curthread->td_msgport,
1848                     0, bridge_ifdetach_dispatch);
1849         msg.lmsg.u.ms_resultp = ifp;
1850
1851         lwkt_domsg(BRIDGE_CFGPORT, &msg.lmsg, 0);
1852 }
1853
1854 /*
1855  * bridge_init:
1856  *
1857  *      Initialize a bridge interface.
1858  */
1859 static void
1860 bridge_init(void *xsc)
1861 {
1862         bridge_control(xsc, SIOCSIFFLAGS, bridge_ioctl_init, NULL);
1863 }
1864
1865 /*
1866  * bridge_stop:
1867  *
1868  *      Stop the bridge interface.
1869  */
1870 static void
1871 bridge_stop(struct ifnet *ifp)
1872 {
1873         bridge_control(ifp->if_softc, SIOCSIFFLAGS, bridge_ioctl_stop, NULL);
1874 }
1875
1876 /*
1877  * Returns TRUE if the packet is being sent 'from us'... from our bridge
1878  * interface or from any member of our bridge interface.  This is used
1879  * later on to force the MAC to be the MAC of our bridge interface.
1880  */
1881 static int
1882 bridge_from_us(struct bridge_softc *sc, struct ether_header *eh)
1883 {
1884         struct bridge_iflist *bif;
1885
1886         if (memcmp(eh->ether_shost, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN) == 0)
1887                 return (1);
1888
1889         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
1890                 if (memcmp(eh->ether_shost, IF_LLADDR(bif->bif_ifp),
1891                            ETHER_ADDR_LEN) == 0) {
1892                         return (1);
1893                 }
1894         }
1895         return (0);
1896 }
1897
1898 /*
1899  * bridge_enqueue:
1900  *
1901  *      Enqueue a packet on a bridge member interface.
1902  *
1903  */
1904 void
1905 bridge_enqueue(struct ifnet *dst_ifp, struct mbuf *m)
1906 {
1907         struct netmsg_packet *nmp;
1908
1909         nmp = &m->m_hdr.mh_netmsg;
1910         netmsg_init(&nmp->base, NULL, &netisr_apanic_rport,
1911                     0, bridge_enqueue_handler);
1912         nmp->nm_packet = m;
1913         nmp->base.lmsg.u.ms_resultp = dst_ifp;
1914
1915         lwkt_sendmsg(ifnet_portfn(mycpu->gd_cpuid), &nmp->base.lmsg);
1916 }
1917
1918 /*
1919  * bridge_output:
1920  *
1921  *      Send output from a bridge member interface.  This
1922  *      performs the bridging function for locally originated
1923  *      packets.
1924  *
1925  *      The mbuf has the Ethernet header already attached.  We must
1926  *      enqueue or free the mbuf before returning.
1927  */
1928 static int
1929 bridge_output(struct ifnet *ifp, struct mbuf *m)
1930 {
1931         struct bridge_softc *sc = ifp->if_bridge;
1932         struct bridge_iflist *bif, *nbif;
1933         struct ether_header *eh;
1934         struct ifnet *dst_if, *alt_if, *bifp;
1935         int from_us;
1936         int priority;
1937         int alt_priority;
1938
1939         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
1940
1941         /*
1942          * Make sure that we are still a member of a bridge interface.
1943          */
1944         if (sc == NULL) {
1945                 m_freem(m);
1946                 return (0);
1947         }
1948         bifp = sc->sc_ifp;
1949
1950         /*
1951          * Acquire header
1952          */
1953         if (m->m_len < ETHER_HDR_LEN) {
1954                 m = m_pullup(m, ETHER_HDR_LEN);
1955                 if (m == NULL) {
1956                         bifp->if_oerrors++;
1957                         return (0);
1958                 }
1959         }
1960         eh = mtod(m, struct ether_header *);
1961         from_us = bridge_from_us(sc, eh);
1962
1963         /*
1964          * If bridge is down, but the original output interface is up,
1965          * go ahead and send out that interface.  Otherwise, the packet
1966          * is dropped below.
1967          */
1968         if ((bifp->if_flags & IFF_RUNNING) == 0) {
1969                 dst_if = ifp;
1970                 goto sendunicast;
1971         }
1972
1973         /*
1974          * If the packet is a multicast, or we don't know a better way to
1975          * get there, send to all interfaces.
1976          */
1977         if (ETHER_IS_MULTICAST(eh->ether_dhost))
1978                 dst_if = NULL;
1979         else
1980                 dst_if = bridge_rtlookup(sc, eh->ether_dhost);
1981
1982         if (dst_if == NULL) {
1983                 struct mbuf *mc;
1984                 int used = 0;
1985                 int found = 0;
1986
1987                 if (sc->sc_span)
1988                         bridge_span(sc, m);
1989
1990                 alt_if = NULL;
1991                 alt_priority = 0;
1992                 TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid],
1993                                      bif_next, nbif) {
1994                         dst_if = bif->bif_ifp;
1995
1996                         if ((dst_if->if_flags & IFF_RUNNING) == 0)
1997                                 continue;
1998
1999                         /*
2000                          * If this is not the original output interface,
2001                          * and the interface is participating in spanning
2002                          * tree, make sure the port is in a state that
2003                          * allows forwarding.
2004                          *
2005                          * We keep track of a possible backup IF if we are
2006                          * unable to find any interfaces to forward through.
2007                          *
2008                          * NOTE: Currently round-robining is not implemented
2009                          *       across bonded interface groups (needs an
2010                          *       algorithm to track each group somehow).
2011                          *
2012                          *       Similarly we track only one alternative
2013                          *       interface if no suitable interfaces are
2014                          *       found.
2015                          */
2016                         if (dst_if != ifp &&
2017                             (bif->bif_flags & IFBIF_STP) != 0) {
2018                                 switch (bif->bif_state) {
2019                                 case BSTP_IFSTATE_BONDED:
2020                                         if (bif->bif_priority + 512 >
2021                                             alt_priority) {
2022                                                 alt_priority =
2023                                                     bif->bif_priority + 512;
2024                                                 alt_if = bif->bif_ifp;
2025                                         }
2026                                         continue;
2027                                 case BSTP_IFSTATE_BLOCKING:
2028                                         if (bif->bif_priority + 256 >
2029                                             alt_priority) {
2030                                                 alt_priority =
2031                                                     bif->bif_priority + 256;
2032                                                 alt_if = bif->bif_ifp;
2033                                         }
2034                                         continue;
2035                                 case BSTP_IFSTATE_LEARNING:
2036                                         if (bif->bif_priority > alt_priority) {
2037                                                 alt_priority =
2038                                                     bif->bif_priority;
2039                                                 alt_if = bif->bif_ifp;
2040                                         }
2041                                         continue;
2042                                 case BSTP_IFSTATE_L1BLOCKING:
2043                                 case BSTP_IFSTATE_LISTENING:
2044                                 case BSTP_IFSTATE_DISABLED:
2045                                         continue;
2046                                 default:
2047                                         /* FORWARDING */
2048                                         break;
2049                                 }
2050                         }
2051
2052                         KKASSERT(used == 0);
2053                         if (TAILQ_NEXT(bif, bif_next) == NULL) {
2054                                 used = 1;
2055                                 mc = m;
2056                         } else {
2057                                 mc = m_copypacket(m, MB_DONTWAIT);
2058                                 if (mc == NULL) {
2059                                         bifp->if_oerrors++;
2060                                         continue;
2061                                 }
2062                         }
2063
2064                         /*
2065                          * If the packet is 'from' us override ether_shost.
2066                          */
2067                         bridge_handoff(sc, dst_if, mc, from_us);
2068                         found = 1;
2069
2070                         if (nbif != NULL && !nbif->bif_onlist) {
2071                                 KKASSERT(bif->bif_onlist);
2072                                 nbif = TAILQ_NEXT(bif, bif_next);
2073                         }
2074                 }
2075
2076                 /*
2077                  * If we couldn't find anything use the backup interface
2078                  * if we have one.
2079                  */
2080                 if (found == 0 && alt_if) {
2081                         KKASSERT(used == 0);
2082                         mc = m;
2083                         used = 1;
2084                         bridge_handoff(sc, alt_if, mc, from_us);
2085                 }
2086
2087                 if (used == 0)
2088                         m_freem(m);
2089                 return (0);
2090         }
2091
2092 sendunicast:
2093         /*
2094          * If STP is enabled on the target we are an equal opportunity
2095          * employer and do not necessarily output to dst_if.  Instead
2096          * scan available links with the same MAC as the current dst_if
2097          * and choose the best one.
2098          *
2099          * We also need to do this because arp entries tag onto a particular
2100          * interface and if it happens to be dead then the packets will
2101          * go into a bit bucket.
2102          *
2103          * If LINK2 is set the matching links are bonded and we-round robin.
2104          * (the MAC address must be the same for the participating links).
2105          * In this case links in a STP FORWARDING or BONDED state are
2106          * allowed for unicast packets.
2107          */
2108         bif = bridge_lookup_member_if(sc, dst_if);
2109         if (bif->bif_flags & IFBIF_STP) {
2110                 alt_if = NULL;
2111                 priority = 0;
2112                 alt_priority = 0;
2113
2114                 TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid],
2115                                      bif_next, nbif) {
2116                         /*
2117                          * Ignore member interfaces which aren't running.
2118                          */
2119                         if ((bif->bif_ifp->if_flags & IFF_RUNNING) == 0)
2120                                 continue;
2121
2122                         /*
2123                          * member interfaces with the same MAC (usually TAPs)
2124                          * are considered to be the same.  Select the best
2125                          * one from BONDED or FORWARDING and keep track of
2126                          * the best one in the BLOCKING state if no
2127                          * candidates are available otherwise.
2128                          */
2129                         if (memcmp(IF_LLADDR(bif->bif_ifp),
2130                                    IF_LLADDR(dst_if),
2131                                    ETHER_ADDR_LEN) != 0) {
2132                                 continue;
2133                         }
2134
2135                         switch(bif->bif_state) {
2136                         case BSTP_IFSTATE_BLOCKING:
2137                                 if (bif->bif_priority > alt_priority + 256) {
2138                                         alt_priority = bif->bif_priority + 256;
2139                                         alt_if = bif->bif_ifp;
2140                                 }
2141                                 continue;
2142                         case BSTP_IFSTATE_LEARNING:
2143                                 if (bif->bif_priority > alt_priority) {
2144                                         alt_priority = bif->bif_priority;
2145                                         alt_if = bif->bif_ifp;
2146                                 }
2147                                 continue;
2148                         case BSTP_IFSTATE_L1BLOCKING:
2149                         case BSTP_IFSTATE_LISTENING:
2150                         case BSTP_IFSTATE_DISABLED:
2151                                 continue;
2152                         default:
2153                                 /* bonded, forwarding */
2154                                 break;
2155                         }
2156
2157                         /*
2158                          * XXX we need to use the toepliz hash or
2159                          *     something like that instead of
2160                          *     round-robining.
2161                          */
2162                         if (sc->sc_ifp->if_flags & IFF_LINK2) {
2163                                 dst_if = bif->bif_ifp;
2164                                 if (++bif->bif_bond_count >=
2165                                     bif->bif_bond_weight) {
2166                                         bif->bif_bond_count = 0;
2167                                         TAILQ_REMOVE(&sc->sc_iflists[mycpuid],
2168                                                      bif, bif_next);
2169                                         TAILQ_INSERT_TAIL(
2170                                                      &sc->sc_iflists[mycpuid],
2171                                                      bif, bif_next);
2172                                 }
2173                                 priority = 1;
2174                                 break;
2175                         }
2176                         if (bif->bif_priority > priority) {
2177                                 priority = bif->bif_priority;
2178                                 dst_if = bif->bif_ifp;
2179                         }
2180                 }
2181
2182                 /*
2183                  * Interface of last resort if nothing was found.
2184                  */
2185                 if (priority == 0 && alt_if)
2186                         dst_if = alt_if;
2187         }
2188
2189         if (sc->sc_span)
2190                 bridge_span(sc, m);
2191         if ((dst_if->if_flags & IFF_RUNNING) == 0)
2192                 m_freem(m);
2193         else
2194                 bridge_handoff(sc, dst_if, m, from_us);
2195         return (0);
2196 }
2197
2198 /*
2199  * Returns the bridge interface associated with an ifc.
2200  * Pass ifp->if_bridge (must not be NULL).  Used by the ARP
2201  * code to supply the bridge for the is-at info, making
2202  * the bridge responsible for matching local addresses.
2203  *
2204  * Without this the ARP code will supply bridge member interfaces
2205  * for the is-at which makes it difficult the bridge to fail-over
2206  * interfaces (amoung other things).
2207  */
2208 static struct ifnet *
2209 bridge_interface(void *if_bridge)
2210 {
2211         struct bridge_softc *sc = if_bridge;
2212         return (sc->sc_ifp);
2213 }
2214
2215 /*
2216  * bridge_start:
2217  *
2218  *      Start output on a bridge.
2219  */
2220 static void
2221 bridge_start(struct ifnet *ifp)
2222 {
2223         struct bridge_softc *sc = ifp->if_softc;
2224
2225         ASSERT_IFNET_SERIALIZED_TX(ifp);
2226
2227         ifp->if_flags |= IFF_OACTIVE;
2228         for (;;) {
2229                 struct ifnet *dst_if = NULL;
2230                 struct ether_header *eh;
2231                 struct mbuf *m;
2232
2233                 m = ifq_dequeue(&ifp->if_snd, NULL);
2234                 if (m == NULL)
2235                         break;
2236
2237                 if (m->m_len < sizeof(*eh)) {
2238                         m = m_pullup(m, sizeof(*eh));
2239                         if (m == NULL) {
2240                                 ifp->if_oerrors++;
2241                                 continue;
2242                         }
2243                 }
2244                 eh = mtod(m, struct ether_header *);
2245
2246                 BPF_MTAP(ifp, m);
2247                 ifp->if_opackets++;
2248
2249                 if ((m->m_flags & (M_BCAST|M_MCAST)) == 0)
2250                         dst_if = bridge_rtlookup(sc, eh->ether_dhost);
2251
2252                 if (dst_if == NULL)
2253                         bridge_start_bcast(sc, m);
2254                 else
2255                         bridge_enqueue(dst_if, m);
2256         }
2257         ifp->if_flags &= ~IFF_OACTIVE;
2258 }
2259
2260 /*
2261  * bridge_forward:
2262  *
2263  *      Forward packets received on a bridge interface via the input
2264  *      path.
2265  *
2266  *      This implements the forwarding function of the bridge.
2267  */
2268 static void
2269 bridge_forward(struct bridge_softc *sc, struct mbuf *m)
2270 {
2271         struct bridge_iflist *bif, *nbif;
2272         struct ifnet *src_if, *dst_if, *alt_if, *ifp;
2273         struct ether_header *eh;
2274         int priority;
2275         int alt_priority;
2276         int from_blocking;
2277
2278         src_if = m->m_pkthdr.rcvif;
2279         ifp = sc->sc_ifp;
2280
2281         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
2282
2283         ifp->if_ipackets++;
2284         ifp->if_ibytes += m->m_pkthdr.len;
2285
2286         /*
2287          * Look up the bridge_iflist.
2288          */
2289         bif = bridge_lookup_member_if(sc, src_if);
2290         if (bif == NULL) {
2291                 /* Interface is not a bridge member (anymore?) */
2292                 m_freem(m);
2293                 return;
2294         }
2295
2296         /*
2297          * In spanning tree mode receiving a packet from an interface
2298          * in a BLOCKING state is allowed, it could be a member of last
2299          * resort from the sender's point of view, but forwarding it is
2300          * not allowed.
2301          *
2302          * The sender's spanning tree will eventually sync up and the
2303          * sender will go into a BLOCKING state too (but this still may be
2304          * an interface of last resort during state changes).
2305          */
2306         if (bif->bif_flags & IFBIF_STP) {
2307                 switch (bif->bif_state) {
2308                 case BSTP_IFSTATE_L1BLOCKING:
2309                 case BSTP_IFSTATE_LISTENING:
2310                 case BSTP_IFSTATE_DISABLED:
2311                         m_freem(m);
2312                         return;
2313                 default:
2314                         /* learning, blocking, bonded, forwarding */
2315                         break;
2316                 }
2317         }
2318         from_blocking = (bif->bif_state == BSTP_IFSTATE_BLOCKING);
2319
2320         eh = mtod(m, struct ether_header *);
2321
2322         /*
2323          * If the interface is learning, and the source
2324          * address is valid and not multicast, record
2325          * the address.
2326          */
2327         if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
2328             from_blocking == 0 &&
2329             ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
2330             (eh->ether_shost[0] == 0 &&
2331              eh->ether_shost[1] == 0 &&
2332              eh->ether_shost[2] == 0 &&
2333              eh->ether_shost[3] == 0 &&
2334              eh->ether_shost[4] == 0 &&
2335              eh->ether_shost[5] == 0) == 0) {
2336                 bridge_rtupdate(sc, eh->ether_shost, src_if, IFBAF_DYNAMIC);
2337         }
2338
2339         /*
2340          * Don't forward from an interface in the listening or learning
2341          * state.  That is, in the learning state we learn information
2342          * but we throw away the packets.
2343          *
2344          * We let through packets on interfaces in the blocking state.
2345          * The blocking state is applicable to the send side, not the
2346          * receive side.
2347          */
2348         if ((bif->bif_flags & IFBIF_STP) != 0 &&
2349             (bif->bif_state == BSTP_IFSTATE_LISTENING ||
2350              bif->bif_state == BSTP_IFSTATE_LEARNING)) {
2351                 m_freem(m);
2352                 return;
2353         }
2354
2355         /*
2356          * At this point, the port either doesn't participate
2357          * in spanning tree or it is in the forwarding state.
2358          */
2359
2360         /*
2361          * If the packet is unicast, destined for someone on
2362          * "this" side of the bridge, drop it.
2363          */
2364         if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
2365                 dst_if = bridge_rtlookup(sc, eh->ether_dhost);
2366                 if (src_if == dst_if) {
2367                         m_freem(m);
2368                         return;
2369                 }
2370         } else {
2371                 /* ...forward it to all interfaces. */
2372                 ifp->if_imcasts++;
2373                 dst_if = NULL;
2374         }
2375
2376         /*
2377          * Brodcast if we do not have forwarding information.  However, if
2378          * we received the packet on a blocking interface we do not do this
2379          * (unless you really want to blow up your network).
2380          */
2381         if (dst_if == NULL) {
2382                 if (from_blocking)
2383                         m_freem(m);
2384                 else
2385                         bridge_broadcast(sc, src_if, m);
2386                 return;
2387         }
2388
2389         /*
2390          * Unicast, kinda replicates the output side of bridge_output().
2391          */
2392         bif = bridge_lookup_member_if(sc, dst_if);
2393         if (bif == NULL) {
2394                 /* Not a member of the bridge (anymore?) */
2395                 m_freem(m);
2396                 return;
2397         }
2398
2399         if (bif->bif_flags & IFBIF_STP) {
2400                 alt_if = NULL;
2401                 alt_priority = 0;
2402                 priority = 0;
2403
2404                 TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid],
2405                                      bif_next, nbif) {
2406                         if (memcmp(IF_LLADDR(bif->bif_ifp),
2407                                    IF_LLADDR(dst_if),
2408                                    ETHER_ADDR_LEN) != 0) {
2409                                 continue;
2410                         }
2411
2412                         if ((bif->bif_ifp->if_flags & IFF_RUNNING) == 0)
2413                                 continue;
2414
2415                         /*
2416                          * NOTE: We allow tranmissions through a BLOCKING
2417                          *       or LEARNING interface only as a last resort.
2418                          *       We DISALLOW both cases if the receiving
2419                          *
2420                          * NOTE: If we send a packet through a learning
2421                          *       interface the receiving end (if also in
2422                          *       LEARNING) will throw it away, so this is
2423                          *       the ultimate last resort.
2424                          */
2425                         switch(bif->bif_state) {
2426                         case BSTP_IFSTATE_BLOCKING:
2427                                 if (from_blocking == 0 &&
2428                                     bif->bif_priority + 256 > alt_priority) {
2429                                         alt_priority = bif->bif_priority + 256;
2430                                         alt_if = bif->bif_ifp;
2431                                 }
2432                                 continue;
2433                         case BSTP_IFSTATE_LEARNING:
2434                                 if (from_blocking == 0 &&
2435                                     bif->bif_priority > alt_priority) {
2436                                         alt_priority = bif->bif_priority;
2437                                         alt_if = bif->bif_ifp;
2438                                 }
2439                                 continue;
2440                         case BSTP_IFSTATE_L1BLOCKING:
2441                         case BSTP_IFSTATE_LISTENING:
2442                         case BSTP_IFSTATE_DISABLED:
2443                                 continue;
2444                         default:
2445                                 /* FORWARDING, BONDED */
2446                                 break;
2447                         }
2448
2449                         /*
2450                          * XXX we need to use the toepliz hash or
2451                          *     something like that instead of
2452                          *     round-robining.
2453                          */
2454                         if (sc->sc_ifp->if_flags & IFF_LINK2) {
2455                                 dst_if = bif->bif_ifp;
2456                                 if (++bif->bif_bond_count >=
2457                                     bif->bif_bond_weight) {
2458                                         bif->bif_bond_count = 0;
2459                                         TAILQ_REMOVE(&sc->sc_iflists[mycpuid],
2460                                                      bif, bif_next);
2461                                         TAILQ_INSERT_TAIL(
2462                                                      &sc->sc_iflists[mycpuid],
2463                                                      bif, bif_next);
2464                                 }
2465                                 priority = 1;
2466                                 break;
2467                         }
2468
2469                         /*
2470                          * Select best interface in the FORWARDING or
2471                          * BONDED set.  Well, there shouldn't be any
2472                          * in a BONDED state if LINK2 is not set (they
2473                          * will all be in a BLOCKING) state, but there
2474                          * could be a transitory condition here.
2475                          */
2476                         if (bif->bif_priority > priority) {
2477                                 priority = bif->bif_priority;
2478                                 dst_if = bif->bif_ifp;
2479                         }
2480                 }
2481
2482                 /*
2483                  * If no suitable interfaces were found but a suitable
2484                  * alternative interface was found, use the alternative
2485                  * interface.
2486                  */
2487                 if (priority == 0 && alt_if)
2488                         dst_if = alt_if;
2489         }
2490
2491         /*
2492          * At this point, we're dealing with a unicast frame
2493          * going to a different interface.
2494          */
2495         if ((dst_if->if_flags & IFF_RUNNING) == 0) {
2496                 m_freem(m);
2497                 return;
2498         }
2499
2500         if (inet_pfil_hook.ph_hashooks > 0
2501 #ifdef INET6
2502             || inet6_pfil_hook.ph_hashooks > 0
2503 #endif
2504             ) {
2505                 if (bridge_pfil(&m, ifp, src_if, PFIL_IN) != 0)
2506                         return;
2507                 if (m == NULL)
2508                         return;
2509
2510                 if (bridge_pfil(&m, ifp, dst_if, PFIL_OUT) != 0)
2511                         return;
2512                 if (m == NULL)
2513                         return;
2514         }
2515         bridge_handoff(sc, dst_if, m, 0);
2516 }
2517
2518 /*
2519  * bridge_input:
2520  *
2521  *      Receive input from a member interface.  Queue the packet for
2522  *      bridging if it is not for us.
2523  */
2524 static struct mbuf *
2525 bridge_input(struct ifnet *ifp, struct mbuf *m)
2526 {
2527         struct bridge_softc *sc = ifp->if_bridge;
2528         struct bridge_iflist *bif;
2529         struct ifnet *bifp, *new_ifp;
2530         struct ether_header *eh;
2531         struct mbuf *mc, *mc2;
2532         int from_blocking;
2533
2534         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
2535
2536         /*
2537          * Make sure that we are still a member of a bridge interface.
2538          */
2539         if (sc == NULL)
2540                 return m;
2541
2542         new_ifp = NULL;
2543         bifp = sc->sc_ifp;
2544
2545         if ((bifp->if_flags & IFF_RUNNING) == 0)
2546                 goto out;
2547
2548         /*
2549          * Implement support for bridge monitoring.  If this flag has been
2550          * set on this interface, discard the packet once we push it through
2551          * the bpf(4) machinery, but before we do, increment various counters
2552          * associated with this bridge.
2553          */
2554         if (bifp->if_flags & IFF_MONITOR) {
2555                 /* Change input interface to this bridge */
2556                 m->m_pkthdr.rcvif = bifp;
2557
2558                 BPF_MTAP(bifp, m);
2559
2560                 /* Update bridge's ifnet statistics */
2561                 bifp->if_ipackets++;
2562                 bifp->if_ibytes += m->m_pkthdr.len;
2563                 if (m->m_flags & (M_MCAST | M_BCAST))
2564                         bifp->if_imcasts++;
2565
2566                 m_freem(m);
2567                 m = NULL;
2568                 goto out;
2569         }
2570
2571         /*
2572          * Handle the ether_header
2573          *
2574          * In all cases if the packet is destined for us via our MAC
2575          * we must clear BRIDGE_MBUF_TAGGED to ensure that we don't
2576          * repeat the source MAC out the same interface.
2577          *
2578          * This first test against our bridge MAC is the fast-path.
2579          *
2580          * NOTE!  The bridge interface can serve as an endpoint for
2581          *        communication but normally there are no IPs associated
2582          *        with it so you cannot route through it.  Instead what
2583          *        you do is point your default route *THROUGH* the bridge
2584          *        to the actual default router for one of the bridged spaces.
2585          *
2586          *        Another possibility is to put all your IP specifications
2587          *        on the bridge instead of on the individual interfaces.  If
2588          *        you do this it should be possible to use the bridge as an
2589          *        end point and route (rather than switch) through it using
2590          *        the default route or ipfw forwarding rules.
2591          */
2592
2593         /*
2594          * Acquire header
2595          */
2596         if (m->m_len < ETHER_HDR_LEN) {
2597                 m = m_pullup(m, ETHER_HDR_LEN);
2598                 if (m == NULL)
2599                         goto out;
2600         }
2601         eh = mtod(m, struct ether_header *);
2602         m->m_pkthdr.fw_flags |= BRIDGE_MBUF_TAGGED;
2603         bcopy(eh, &m->m_pkthdr.br.ether, sizeof(*eh));
2604
2605         if ((bridge_debug & 1) &&
2606             (ntohs(eh->ether_type) == ETHERTYPE_ARP ||
2607             ntohs(eh->ether_type) == ETHERTYPE_REVARP)) {
2608                 kprintf("%02x:%02x:%02x:%02x:%02x:%02x "
2609                         "%02x:%02x:%02x:%02x:%02x:%02x type %04x "
2610                         "lla %02x:%02x:%02x:%02x:%02x:%02x\n",
2611                         eh->ether_dhost[0],
2612                         eh->ether_dhost[1],
2613                         eh->ether_dhost[2],
2614                         eh->ether_dhost[3],
2615                         eh->ether_dhost[4],
2616                         eh->ether_dhost[5],
2617                         eh->ether_shost[0],
2618                         eh->ether_shost[1],
2619                         eh->ether_shost[2],
2620                         eh->ether_shost[3],
2621                         eh->ether_shost[4],
2622                         eh->ether_shost[5],
2623                         eh->ether_type,
2624                         ((u_char *)IF_LLADDR(bifp))[0],
2625                         ((u_char *)IF_LLADDR(bifp))[1],
2626                         ((u_char *)IF_LLADDR(bifp))[2],
2627                         ((u_char *)IF_LLADDR(bifp))[3],
2628                         ((u_char *)IF_LLADDR(bifp))[4],
2629                         ((u_char *)IF_LLADDR(bifp))[5]
2630                 );
2631         }
2632
2633         if (memcmp(eh->ether_dhost, IF_LLADDR(bifp), ETHER_ADDR_LEN) == 0) {
2634                 /*
2635                  * If the packet is for us, set the packets source as the
2636                  * bridge, and return the packet back to ifnet.if_input for
2637                  * local processing.
2638                  */
2639                 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
2640                 KASSERT(bifp->if_bridge == NULL,
2641                         ("loop created in bridge_input"));
2642                 if (pfil_member != 0) {
2643                         if (inet_pfil_hook.ph_hashooks > 0
2644 #ifdef INET6
2645                             || inet6_pfil_hook.ph_hashooks > 0
2646 #endif
2647                         ) {
2648                                 if (bridge_pfil(&m, NULL, ifp, PFIL_IN) != 0)
2649                                         goto out;
2650                                 if (m == NULL)
2651                                         goto out;
2652                         }
2653                 }
2654                 new_ifp = bifp;
2655                 goto out;
2656         }
2657
2658         /*
2659          * Tap all packets arriving on the bridge, no matter if
2660          * they are local destinations or not.  In is in.
2661          */
2662         BPF_MTAP(bifp, m);
2663
2664         bif = bridge_lookup_member_if(sc, ifp);
2665         if (bif == NULL)
2666                 goto out;
2667
2668         if (sc->sc_span)
2669                 bridge_span(sc, m);
2670
2671         if (m->m_flags & (M_BCAST | M_MCAST)) {
2672                 /*
2673                  * Tap off 802.1D packets; they do not get forwarded.
2674                  */
2675                 if (memcmp(eh->ether_dhost, bstp_etheraddr,
2676                             ETHER_ADDR_LEN) == 0) {
2677                         ifnet_serialize_all(bifp);
2678                         bstp_input(sc, bif, m);
2679                         ifnet_deserialize_all(bifp);
2680
2681                         /* m is freed by bstp_input */
2682                         m = NULL;
2683                         goto out;
2684                 }
2685
2686                 /*
2687                  * Other than 802.11d packets, ignore packets if the
2688                  * interface is not in a good state.
2689                  *
2690                  * NOTE: Broadcast/mcast packets received on a blocking or
2691                  *       learning interface are allowed for local processing.
2692                  *
2693                  *       The sending side of a blocked port will stop
2694                  *       transmitting when a better alternative is found.
2695                  *       However, later on we will disallow the forwarding
2696                  *       of bcast/mcsat packets over a blocking interface.
2697                  */
2698                 if (bif->bif_flags & IFBIF_STP) {
2699                         switch (bif->bif_state) {
2700                         case BSTP_IFSTATE_L1BLOCKING:
2701                         case BSTP_IFSTATE_LISTENING:
2702                         case BSTP_IFSTATE_DISABLED:
2703                                 goto out;
2704                         default:
2705                                 /* blocking, learning, bonded, forwarding */
2706                                 break;
2707                         }
2708                 }
2709
2710                 /*
2711                  * Make a deep copy of the packet and enqueue the copy
2712                  * for bridge processing; return the original packet for
2713                  * local processing.
2714                  */
2715                 mc = m_dup(m, MB_DONTWAIT);
2716                 if (mc == NULL)
2717                         goto out;
2718
2719                 /*
2720                  * It's just too dangerous to allow bcast/mcast over a
2721                  * blocked interface, eventually the network will sort
2722                  * itself out and a better path will be found.
2723                  */
2724                 if ((bif->bif_flags & IFBIF_STP) == 0 ||
2725                     bif->bif_state != BSTP_IFSTATE_BLOCKING) {
2726                         bridge_forward(sc, mc);
2727                 }
2728
2729                 /*
2730                  * Reinject the mbuf as arriving on the bridge so we have a
2731                  * chance at claiming multicast packets. We can not loop back
2732                  * here from ether_input as a bridge is never a member of a
2733                  * bridge.
2734                  */
2735                 KASSERT(bifp->if_bridge == NULL,
2736                         ("loop created in bridge_input"));
2737                 mc2 = m_dup(m, MB_DONTWAIT);
2738 #ifdef notyet
2739                 if (mc2 != NULL) {
2740                         /* Keep the layer3 header aligned */
2741                         int i = min(mc2->m_pkthdr.len, max_protohdr);
2742                         mc2 = m_copyup(mc2, i, ETHER_ALIGN);
2743                 }
2744 #endif
2745                 if (mc2 != NULL) {
2746                         /*
2747                          * Don't tap to bpf(4) again; we have already done
2748                          * the tapping.
2749                          *
2750                          * Leave m_pkthdr.rcvif alone, so ARP replies are
2751                          * processed as coming in on the correct interface.
2752                          *
2753                          * Clear the bridge flag for local processing in
2754                          * case the packet gets routed.
2755                          */
2756                         mc2->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
2757                         ether_reinput_oncpu(bifp, mc2, 0);
2758                 }
2759
2760                 /* Return the original packet for local processing. */
2761                 goto out;
2762         }
2763
2764         /*
2765          * Input of a unicast packet.  We have to allow unicast packets
2766          * input from links in the BLOCKING state as this might be an
2767          * interface of last resort.
2768          *
2769          * NOTE: We explicitly ignore normal packets received on a link
2770          *       in the BLOCKING state.  The point of being in that state
2771          *       is to avoid getting duplicate packets.
2772          *
2773          *       HOWEVER, if LINK2 is set the normal spanning tree code
2774          *       will mark an interface BLOCKING to avoid multi-cast/broadcast
2775          *       loops.  Unicast packets CAN still loop if we allow the
2776          *       case (hence we only do it in LINK2), but it isn't quite as
2777          *       bad as a broadcast packet looping.
2778          */
2779         from_blocking = 0;
2780         if (bif->bif_flags & IFBIF_STP) {
2781                 switch (bif->bif_state) {
2782                 case BSTP_IFSTATE_L1BLOCKING:
2783                 case BSTP_IFSTATE_LISTENING:
2784                 case BSTP_IFSTATE_DISABLED:
2785                         goto out;
2786                 case BSTP_IFSTATE_BLOCKING:
2787                         from_blocking = 1;
2788                         /* fall through */
2789                 default:
2790                         /* blocking, bonded, forwarding, learning */
2791                         break;
2792                 }
2793         }
2794
2795         /*
2796          * Unicast.  Make sure it's not for us.
2797          *
2798          * This loop is MPSAFE; the only blocking operation (bridge_rtupdate)
2799          * is followed by breaking out of the loop.
2800          */
2801         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
2802                 if (bif->bif_ifp->if_type != IFT_ETHER)
2803                         continue;
2804
2805                 /*
2806                  * It is destined for an interface linked to the bridge.
2807                  * We want the bridge itself to take care of link level
2808                  * forwarding to member interfaces so reinput on the bridge.
2809                  * i.e. if you ping an IP on a target interface associated
2810                  * with the bridge, the arp is-at response should indicate
2811                  * the bridge MAC.
2812                  *
2813                  * Only update our addr list when learning if the port
2814                  * is not in a blocking state.  If it is we still allow
2815                  * the packet but we do not try to learn from it.
2816                  */
2817                 if (memcmp(IF_LLADDR(bif->bif_ifp), eh->ether_dhost,
2818                            ETHER_ADDR_LEN) == 0) {
2819                         if (bif->bif_ifp != ifp) {
2820                                 /* XXX loop prevention */
2821                                 m->m_flags |= M_ETHER_BRIDGED;
2822                         }
2823                         if ((bif->bif_flags & IFBIF_LEARNING) &&
2824                             bif->bif_state != BSTP_IFSTATE_BLOCKING) {
2825                                 bridge_rtupdate(sc, eh->ether_shost,
2826                                                 ifp, IFBAF_DYNAMIC);
2827                         }
2828                         new_ifp = bifp; /* not bif->bif_ifp */
2829                         m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
2830                         goto out;
2831                 }
2832
2833                 /*
2834                  * Ignore received packets that were sent by us.
2835                  */
2836                 if (memcmp(IF_LLADDR(bif->bif_ifp), eh->ether_shost,
2837                            ETHER_ADDR_LEN) == 0) {
2838                         m_freem(m);
2839                         m = NULL;
2840                         goto out;
2841                 }
2842         }
2843
2844         /*
2845          * It isn't for us.
2846          *
2847          * Perform the bridge forwarding function, but disallow bridging
2848          * to interfaces in the blocking state if the packet came in on
2849          * an interface in the blocking state.
2850          */
2851         bridge_forward(sc, m);
2852         m = NULL;
2853
2854         /*
2855          * ether_reinput_oncpu() will reprocess rcvif as
2856          * coming from new_ifp (since we do not specify
2857          * REINPUT_KEEPRCVIF).
2858          */
2859 out:
2860         if (new_ifp != NULL) {
2861                 /*
2862                  * Clear the bridge flag for local processing in
2863                  * case the packet gets routed.
2864                  */
2865                 ether_reinput_oncpu(new_ifp, m, REINPUT_RUNBPF);
2866                 m = NULL;
2867         }
2868         return (m);
2869 }
2870
2871 /*
2872  * bridge_start_bcast:
2873  *
2874  *      Broadcast the packet sent from bridge to all member
2875  *      interfaces.
2876  *      This is a simplified version of bridge_broadcast(), however,
2877  *      this function expects caller to hold bridge's serializer.
2878  */
2879 static void
2880 bridge_start_bcast(struct bridge_softc *sc, struct mbuf *m)
2881 {
2882         struct bridge_iflist *bif;
2883         struct mbuf *mc;
2884         struct ifnet *dst_if, *alt_if, *bifp;
2885         int used = 0;
2886         int found = 0;
2887         int alt_priority;
2888
2889         bifp = sc->sc_ifp;
2890         ASSERT_IFNET_SERIALIZED_ALL(bifp);
2891
2892         /*
2893          * Following loop is MPSAFE; nothing is blocking
2894          * in the loop body.
2895          *
2896          * NOTE: We transmit through an member in the BLOCKING state only
2897          *       as a last resort.
2898          */
2899         alt_if = NULL;
2900         alt_priority = 0;
2901
2902         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
2903                 dst_if = bif->bif_ifp;
2904
2905                 if (bif->bif_flags & IFBIF_STP) {
2906                         switch (bif->bif_state) {
2907                         case BSTP_IFSTATE_BLOCKING:
2908                                 if (bif->bif_priority > alt_priority) {
2909                                         alt_priority = bif->bif_priority;
2910                                         alt_if = bif->bif_ifp;
2911                                 }
2912                                 /* fall through */
2913                         case BSTP_IFSTATE_L1BLOCKING:
2914                         case BSTP_IFSTATE_DISABLED:
2915                                 continue;
2916                         default:
2917                                 /* listening, learning, bonded, forwarding */
2918                                 break;
2919                         }
2920                 }
2921
2922                 if ((bif->bif_flags & IFBIF_DISCOVER) == 0 &&
2923                     (m->m_flags & (M_BCAST|M_MCAST)) == 0)
2924                         continue;
2925
2926                 if ((dst_if->if_flags & IFF_RUNNING) == 0)
2927                         continue;
2928
2929                 if (TAILQ_NEXT(bif, bif_next) == NULL) {
2930                         mc = m;
2931                         used = 1;
2932                 } else {
2933                         mc = m_copypacket(m, MB_DONTWAIT);
2934                         if (mc == NULL) {
2935                                 bifp->if_oerrors++;
2936                                 continue;
2937                         }
2938                 }
2939                 found = 1;
2940                 bridge_enqueue(dst_if, mc);
2941         }
2942
2943         if (found == 0 && alt_if) {
2944                 KKASSERT(used == 0);
2945                 mc = m;
2946                 used = 1;
2947                 bridge_enqueue(alt_if, mc);
2948         }
2949
2950         if (used == 0)
2951                 m_freem(m);
2952 }
2953
2954 /*
2955  * bridge_broadcast:
2956  *
2957  *      Send a frame to all interfaces that are members of
2958  *      the bridge, except for the one on which the packet
2959  *      arrived.
2960  */
2961 static void
2962 bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
2963                  struct mbuf *m)
2964 {
2965         struct bridge_iflist *bif, *nbif;
2966         struct ether_header *eh;
2967         struct mbuf *mc;
2968         struct ifnet *dst_if, *alt_if, *bifp;
2969         int used;
2970         int found;
2971         int alt_priority;
2972         int from_us;
2973
2974         bifp = sc->sc_ifp;
2975         ASSERT_IFNET_NOT_SERIALIZED_ALL(bifp);
2976
2977         eh = mtod(m, struct ether_header *);
2978         from_us = bridge_from_us(sc, eh);
2979
2980         if (inet_pfil_hook.ph_hashooks > 0
2981 #ifdef INET6
2982             || inet6_pfil_hook.ph_hashooks > 0
2983 #endif
2984             ) {
2985                 if (bridge_pfil(&m, bifp, src_if, PFIL_IN) != 0)
2986                         return;
2987                 if (m == NULL)
2988                         return;
2989
2990                 /* Filter on the bridge interface before broadcasting */
2991                 if (bridge_pfil(&m, bifp, NULL, PFIL_OUT) != 0)
2992                         return;
2993                 if (m == NULL)
2994                         return;
2995         }
2996
2997         alt_if = 0;
2998         alt_priority = 0;
2999         found = 0;
3000         used = 0;
3001
3002         TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid], bif_next, nbif) {
3003                 dst_if = bif->bif_ifp;
3004                 if (dst_if == src_if)
3005                         continue;
3006
3007                 if ((dst_if->if_flags & IFF_RUNNING) == 0)
3008                         continue;
3009
3010                 /*
3011                  * Generally speaking we only broadcast through forwarding
3012                  * interfaces.  If no interfaces are available we select
3013                  * a BONDED, BLOCKING, or LEARNING interface to forward
3014                  * through.
3015                  */
3016                 if (bif->bif_flags & IFBIF_STP) {
3017                         switch (bif->bif_state) {
3018                         case BSTP_IFSTATE_BONDED:
3019                                 if (bif->bif_priority + 512 > alt_priority) {
3020                                         alt_priority = bif->bif_priority + 512;
3021                                         alt_if = bif->bif_ifp;
3022                                 }
3023                                 continue;
3024                         case BSTP_IFSTATE_BLOCKING:
3025                                 if (bif->bif_priority + 256 > alt_priority) {
3026                                         alt_priority = bif->bif_priority + 256;
3027                                         alt_if = bif->bif_ifp;
3028                                 }
3029                                 continue;
3030                         case BSTP_IFSTATE_LEARNING:
3031                                 if (bif->bif_priority > alt_priority) {
3032                                         alt_priority = bif->bif_priority;
3033                                         alt_if = bif->bif_ifp;
3034                                 }
3035                                 continue;
3036                         case BSTP_IFSTATE_L1BLOCKING:
3037                         case BSTP_IFSTATE_DISABLED:
3038                         case BSTP_IFSTATE_LISTENING:
3039                                 continue;
3040                         default:
3041                                 /* forwarding */
3042                                 break;
3043                         }
3044                 }
3045
3046                 if ((bif->bif_flags & IFBIF_DISCOVER) == 0 &&
3047                     (m->m_flags & (M_BCAST|M_MCAST)) == 0) {
3048                         continue;
3049                 }
3050
3051                 if (TAILQ_NEXT(bif, bif_next) == NULL) {
3052                         mc = m;
3053                         used = 1;
3054                 } else {
3055                         mc = m_copypacket(m, MB_DONTWAIT);
3056                         if (mc == NULL) {
3057                                 sc->sc_ifp->if_oerrors++;
3058                                 continue;
3059                         }
3060                 }
3061                 found = 1;
3062
3063                 /*
3064                  * Filter on the output interface.  Pass a NULL bridge
3065                  * interface pointer so we do not redundantly filter on
3066                  * the bridge for each interface we broadcast on.
3067                  */
3068                 if (inet_pfil_hook.ph_hashooks > 0
3069 #ifdef INET6
3070                     || inet6_pfil_hook.ph_hashooks > 0
3071 #endif
3072                     ) {
3073                         if (bridge_pfil(&mc, NULL, dst_if, PFIL_OUT) != 0)
3074                                 continue;
3075                         if (mc == NULL)
3076                                 continue;
3077                 }
3078                 bridge_handoff(sc, dst_if, mc, from_us);
3079
3080                 if (nbif != NULL && !nbif->bif_onlist) {
3081                         KKASSERT(bif->bif_onlist);
3082                         nbif = TAILQ_NEXT(bif, bif_next);
3083                 }
3084         }
3085
3086         if (found == 0 && alt_if) {
3087                 KKASSERT(used == 0);
3088                 mc = m;
3089                 used = 1;
3090                 bridge_enqueue(alt_if, mc);
3091         }
3092
3093         if (used == 0)
3094                 m_freem(m);
3095 }
3096
3097 /*
3098  * bridge_span:
3099  *
3100  *      Duplicate a packet out one or more interfaces that are in span mode,
3101  *      the original mbuf is unmodified.
3102  */
3103 static void
3104 bridge_span(struct bridge_softc *sc, struct mbuf *m)
3105 {
3106         struct bridge_iflist *bif;
3107         struct ifnet *dst_if, *bifp;
3108         struct mbuf *mc;
3109
3110         bifp = sc->sc_ifp;
3111         ifnet_serialize_all(bifp);
3112
3113         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
3114                 dst_if = bif->bif_ifp;
3115
3116                 if ((dst_if->if_flags & IFF_RUNNING) == 0)
3117                         continue;
3118
3119                 mc = m_copypacket(m, MB_DONTWAIT);
3120                 if (mc == NULL) {
3121                         sc->sc_ifp->if_oerrors++;
3122                         continue;
3123                 }
3124                 bridge_enqueue(dst_if, mc);
3125         }
3126
3127         ifnet_deserialize_all(bifp);
3128 }
3129
3130 static void
3131 bridge_rtmsg_sync_handler(netmsg_t msg)
3132 {
3133         ifnet_forwardmsg(&msg->lmsg, mycpuid + 1);
3134 }
3135
3136 static void
3137 bridge_rtmsg_sync(struct bridge_softc *sc)
3138 {
3139         struct netmsg_base msg;
3140
3141         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3142
3143         netmsg_init(&msg, NULL, &curthread->td_msgport,
3144                     0, bridge_rtmsg_sync_handler);
3145         ifnet_domsg(&msg.lmsg, 0);
3146 }
3147
3148 static __inline void
3149 bridge_rtinfo_update(struct bridge_rtinfo *bri, struct ifnet *dst_if,
3150                      int setflags, uint8_t flags, uint32_t timeo)
3151 {
3152         if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
3153             bri->bri_ifp != dst_if)
3154                 bri->bri_ifp = dst_if;
3155         if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
3156             bri->bri_expire != time_second + timeo)
3157                 bri->bri_expire = time_second + timeo;
3158         if (setflags)
3159                 bri->bri_flags = flags;
3160 }
3161
3162 static int
3163 bridge_rtinstall_oncpu(struct bridge_softc *sc, const uint8_t *dst,
3164                        struct ifnet *dst_if, int setflags, uint8_t flags,
3165                        struct bridge_rtinfo **bri0)
3166 {
3167         struct bridge_rtnode *brt;
3168         struct bridge_rtinfo *bri;
3169
3170         if (mycpuid == 0) {
3171                 brt = bridge_rtnode_lookup(sc, dst);
3172                 if (brt != NULL) {
3173                         /*
3174                          * rtnode for 'dst' already exists.  We inform the
3175                          * caller about this by leaving bri0 as NULL.  The
3176                          * caller will terminate the intallation upon getting
3177                          * NULL bri0.  However, we still need to update the
3178                          * rtinfo.
3179                          */
3180                         KKASSERT(*bri0 == NULL);
3181
3182                         /* Update rtinfo */
3183                         bridge_rtinfo_update(brt->brt_info, dst_if, setflags,
3184                                              flags, sc->sc_brttimeout);
3185                         return 0;
3186                 }
3187
3188                 /*
3189                  * We only need to check brtcnt on CPU0, since if limit
3190                  * is to be exceeded, ENOSPC is returned.  Caller knows
3191                  * this and will terminate the installation.
3192                  */
3193                 if (sc->sc_brtcnt >= sc->sc_brtmax)
3194                         return ENOSPC;
3195
3196                 KKASSERT(*bri0 == NULL);
3197                 bri = kmalloc(sizeof(struct bridge_rtinfo), M_DEVBUF,
3198                                   M_WAITOK | M_ZERO);
3199                 *bri0 = bri;
3200
3201                 /* Setup rtinfo */
3202                 bri->bri_flags = IFBAF_DYNAMIC;
3203                 bridge_rtinfo_update(bri, dst_if, setflags, flags,
3204                                      sc->sc_brttimeout);
3205         } else {
3206                 bri = *bri0;
3207                 KKASSERT(bri != NULL);
3208         }
3209
3210         brt = kmalloc(sizeof(struct bridge_rtnode), M_DEVBUF,
3211                       M_WAITOK | M_ZERO);
3212         memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
3213         brt->brt_info = bri;
3214
3215         bridge_rtnode_insert(sc, brt);
3216         return 0;
3217 }
3218
3219 static void
3220 bridge_rtinstall_handler(netmsg_t msg)
3221 {
3222         struct netmsg_brsaddr *brmsg = (struct netmsg_brsaddr *)msg;
3223         int error;
3224
3225         error = bridge_rtinstall_oncpu(brmsg->br_softc,
3226                                        brmsg->br_dst, brmsg->br_dst_if,
3227                                        brmsg->br_setflags, brmsg->br_flags,
3228                                        &brmsg->br_rtinfo);
3229         if (error) {
3230                 KKASSERT(mycpuid == 0 && brmsg->br_rtinfo == NULL);
3231                 lwkt_replymsg(&brmsg->base.lmsg, error);
3232                 return;
3233         } else if (brmsg->br_rtinfo == NULL) {
3234                 /* rtnode already exists for 'dst' */
3235                 KKASSERT(mycpuid == 0);
3236                 lwkt_replymsg(&brmsg->base.lmsg, 0);
3237                 return;
3238         }
3239         ifnet_forwardmsg(&brmsg->base.lmsg, mycpuid + 1);
3240 }
3241
3242 /*
3243  * bridge_rtupdate:
3244  *
3245  *      Add/Update a bridge routing entry.
3246  */
3247 static int
3248 bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst,
3249                 struct ifnet *dst_if, uint8_t flags)
3250 {
3251         struct bridge_rtnode *brt;
3252
3253         /*
3254          * A route for this destination might already exist.  If so,
3255          * update it, otherwise create a new one.
3256          */
3257         if ((brt = bridge_rtnode_lookup(sc, dst)) == NULL) {
3258                 struct netmsg_brsaddr *brmsg;
3259
3260                 if (sc->sc_brtcnt >= sc->sc_brtmax)
3261                         return ENOSPC;
3262
3263                 brmsg = kmalloc(sizeof(*brmsg), M_LWKTMSG, M_WAITOK | M_NULLOK);
3264                 if (brmsg == NULL)
3265                         return ENOMEM;
3266
3267                 netmsg_init(&brmsg->base, NULL, &netisr_afree_rport,
3268                             0, bridge_rtinstall_handler);
3269                 memcpy(brmsg->br_dst, dst, ETHER_ADDR_LEN);
3270                 brmsg->br_dst_if = dst_if;
3271                 brmsg->br_flags = flags;
3272                 brmsg->br_setflags = 0;
3273                 brmsg->br_softc = sc;
3274                 brmsg->br_rtinfo = NULL;
3275
3276                 ifnet_sendmsg(&brmsg->base.lmsg, 0);
3277                 return 0;
3278         }
3279         bridge_rtinfo_update(brt->brt_info, dst_if, 0, flags,
3280                              sc->sc_brttimeout);
3281         return 0;
3282 }
3283
3284 static int
3285 bridge_rtsaddr(struct bridge_softc *sc, const uint8_t *dst,
3286                struct ifnet *dst_if, uint8_t flags)
3287 {
3288         struct netmsg_brsaddr brmsg;
3289
3290         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3291
3292         netmsg_init(&brmsg.base, NULL, &curthread->td_msgport,
3293                     0, bridge_rtinstall_handler);
3294         memcpy(brmsg.br_dst, dst, ETHER_ADDR_LEN);
3295         brmsg.br_dst_if = dst_if;
3296         brmsg.br_flags = flags;
3297         brmsg.br_setflags = 1;
3298         brmsg.br_softc = sc;
3299         brmsg.br_rtinfo = NULL;
3300
3301         return ifnet_domsg(&brmsg.base.lmsg, 0);
3302 }
3303
3304 /*
3305  * bridge_rtlookup:
3306  *
3307  *      Lookup the destination interface for an address.
3308  */
3309 static struct ifnet *
3310 bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr)
3311 {
3312         struct bridge_rtnode *brt;
3313
3314         if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL)
3315                 return NULL;
3316         return brt->brt_info->bri_ifp;
3317 }
3318
3319 static void
3320 bridge_rtreap_handler(netmsg_t msg)
3321 {
3322         struct bridge_softc *sc = msg->lmsg.u.ms_resultp;
3323         struct bridge_rtnode *brt, *nbrt;
3324
3325         LIST_FOREACH_MUTABLE(brt, &sc->sc_rtlists[mycpuid], brt_list, nbrt) {
3326                 if (brt->brt_info->bri_dead)
3327                         bridge_rtnode_destroy(sc, brt);
3328         }
3329         ifnet_forwardmsg(&msg->lmsg, mycpuid + 1);
3330 }
3331
3332 static void
3333 bridge_rtreap(struct bridge_softc *sc)
3334 {
3335         struct netmsg_base msg;
3336
3337         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3338
3339         netmsg_init(&msg, NULL, &curthread->td_msgport,
3340                     0, bridge_rtreap_handler);
3341         msg.lmsg.u.ms_resultp = sc;
3342
3343         ifnet_domsg(&msg.lmsg, 0);
3344 }
3345
3346 static void
3347 bridge_rtreap_async(struct bridge_softc *sc)
3348 {
3349         struct netmsg_base *msg;
3350
3351         msg = kmalloc(sizeof(*msg), M_LWKTMSG, M_WAITOK);
3352
3353         netmsg_init(msg, NULL, &netisr_afree_rport,
3354                     0, bridge_rtreap_handler);
3355         msg->lmsg.u.ms_resultp = sc;
3356
3357         ifnet_sendmsg(&msg->lmsg, 0);
3358 }
3359
3360 /*
3361  * bridge_rttrim:
3362  *
3363  *      Trim the routine table so that we have a number
3364  *      of routing entries less than or equal to the
3365  *      maximum number.
3366  */
3367 static void
3368 bridge_rttrim(struct bridge_softc *sc)
3369 {
3370         struct bridge_rtnode *brt;
3371         int dead;
3372
3373         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3374
3375         /* Make sure we actually need to do this. */
3376         if (sc->sc_brtcnt <= sc->sc_brtmax)
3377                 return;
3378
3379         /*
3380          * Find out how many rtnodes are dead
3381          */
3382         dead = bridge_rtage_finddead(sc);
3383         KKASSERT(dead <= sc->sc_brtcnt);
3384
3385         if (sc->sc_brtcnt - dead <= sc->sc_brtmax) {
3386                 /* Enough dead rtnodes are found */
3387                 bridge_rtreap(sc);
3388                 return;
3389         }
3390
3391         /*
3392          * Kill some dynamic rtnodes to meet the brtmax
3393          */
3394         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3395                 struct bridge_rtinfo *bri = brt->brt_info;
3396
3397                 if (bri->bri_dead) {
3398                         /*
3399                          * We have counted this rtnode in
3400                          * bridge_rtage_finddead()
3401                          */
3402                         continue;
3403                 }
3404
3405                 if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
3406                         bri->bri_dead = 1;
3407                         ++dead;
3408                         KKASSERT(dead <= sc->sc_brtcnt);
3409
3410                         if (sc->sc_brtcnt - dead <= sc->sc_brtmax) {
3411                                 /* Enough rtnodes are collected */
3412                                 break;
3413                         }
3414                 }
3415         }
3416         if (dead)
3417                 bridge_rtreap(sc);
3418 }
3419
3420 /*
3421  * bridge_timer:
3422  *
3423  *      Aging timer for the bridge.
3424  */
3425 static void
3426 bridge_timer(void *arg)
3427 {
3428         struct bridge_softc *sc = arg;
3429         struct netmsg_base *msg;
3430
3431         KKASSERT(mycpuid == BRIDGE_CFGCPU);
3432
3433         crit_enter();
3434
3435         if (callout_pending(&sc->sc_brcallout) ||
3436             !callout_active(&sc->sc_brcallout)) {
3437                 crit_exit();
3438                 return;
3439         }
3440         callout_deactivate(&sc->sc_brcallout);
3441
3442         msg = &sc->sc_brtimemsg;
3443         KKASSERT(msg->lmsg.ms_flags & MSGF_DONE);
3444         lwkt_sendmsg(BRIDGE_CFGPORT, &msg->lmsg);
3445
3446         crit_exit();
3447 }
3448
3449 static void
3450 bridge_timer_handler(netmsg_t msg)
3451 {
3452         struct bridge_softc *sc = msg->lmsg.u.ms_resultp;
3453
3454         KKASSERT(&curthread->td_msgport == BRIDGE_CFGPORT);
3455
3456         crit_enter();
3457         /* Reply ASAP */
3458         lwkt_replymsg(&msg->lmsg, 0);
3459         crit_exit();
3460
3461         bridge_rtage(sc);
3462         if (sc->sc_ifp->if_flags & IFF_RUNNING) {
3463                 callout_reset(&sc->sc_brcallout,
3464                     bridge_rtable_prune_period * hz, bridge_timer, sc);
3465         }
3466 }
3467
3468 static int
3469 bridge_rtage_finddead(struct bridge_softc *sc)
3470 {
3471         struct bridge_rtnode *brt;
3472         int dead = 0;
3473
3474         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3475                 struct bridge_rtinfo *bri = brt->brt_info;
3476
3477                 if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
3478                     time_second >= bri->bri_expire) {
3479                         bri->bri_dead = 1;
3480                         ++dead;
3481                         KKASSERT(dead <= sc->sc_brtcnt);
3482                 }
3483         }
3484         return dead;
3485 }
3486
3487 /*
3488  * bridge_rtage:
3489  *
3490  *      Perform an aging cycle.
3491  */
3492 static void
3493 bridge_rtage(struct bridge_softc *sc)
3494 {
3495         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3496
3497         if (bridge_rtage_finddead(sc))
3498                 bridge_rtreap(sc);
3499 }
3500
3501 /*
3502  * bridge_rtflush:
3503  *
3504  *      Remove all dynamic addresses from the bridge.
3505  */
3506 static void
3507 bridge_rtflush(struct bridge_softc *sc, int bf)
3508 {
3509         struct bridge_rtnode *brt;
3510         int reap;
3511
3512         reap = 0;
3513         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3514                 struct bridge_rtinfo *bri = brt->brt_info;
3515
3516                 if ((bf & IFBF_FLUSHALL) ||
3517                     (bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
3518                         bri->bri_dead = 1;
3519                         reap = 1;
3520                 }
3521         }
3522         if (reap) {
3523                 if (bf & IFBF_FLUSHSYNC)
3524                         bridge_rtreap(sc);
3525                 else
3526                         bridge_rtreap_async(sc);
3527         }
3528 }
3529
3530 /*
3531  * bridge_rtdaddr:
3532  *
3533  *      Remove an address from the table.
3534  */
3535 static int
3536 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr)
3537 {
3538         struct bridge_rtnode *brt;
3539
3540         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3541
3542         if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL)
3543                 return (ENOENT);
3544
3545         /* TODO: add a cheaper delete operation */
3546         brt->brt_info->bri_dead = 1;
3547         bridge_rtreap(sc);
3548         return (0);
3549 }
3550
3551 /*
3552  * bridge_rtdelete:
3553  *
3554  *      Delete routes to a speicifc member interface.
3555  */
3556 void
3557 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int bf)
3558 {
3559         struct bridge_rtnode *brt;
3560         int reap;
3561
3562         reap = 0;
3563         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3564                 struct bridge_rtinfo *bri = brt->brt_info;
3565
3566                 if (bri->bri_ifp == ifp &&
3567                     ((bf & IFBF_FLUSHALL) ||
3568                      (bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
3569                         bri->bri_dead = 1;
3570                         reap = 1;
3571                 }
3572         }
3573         if (reap) {
3574                 if (bf & IFBF_FLUSHSYNC)
3575                         bridge_rtreap(sc);
3576                 else
3577                         bridge_rtreap_async(sc);
3578         }
3579 }
3580
3581 /*
3582  * bridge_rtable_init:
3583  *
3584  *      Initialize the route table for this bridge.
3585  */
3586 static void
3587 bridge_rtable_init(struct bridge_softc *sc)
3588 {
3589         int cpu;
3590
3591         /*
3592          * Initialize per-cpu hash tables
3593          */
3594         sc->sc_rthashs = kmalloc(sizeof(*sc->sc_rthashs) * ncpus,
3595                                  M_DEVBUF, M_WAITOK);
3596         for (cpu = 0; cpu < ncpus; ++cpu) {
3597                 int i;
3598
3599                 sc->sc_rthashs[cpu] =
3600                 kmalloc(sizeof(struct bridge_rtnode_head) * BRIDGE_RTHASH_SIZE,
3601                         M_DEVBUF, M_WAITOK);
3602
3603                 for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
3604                         LIST_INIT(&sc->sc_rthashs[cpu][i]);
3605         }
3606         sc->sc_rthash_key = karc4random();
3607
3608         /*
3609          * Initialize per-cpu lists
3610          */
3611         sc->sc_rtlists = kmalloc(sizeof(struct bridge_rtnode_head) * ncpus,
3612                                  M_DEVBUF, M_WAITOK);
3613         for (cpu = 0; cpu < ncpus; ++cpu)
3614                 LIST_INIT(&sc->sc_rtlists[cpu]);
3615 }
3616
3617 /*
3618  * bridge_rtable_fini:
3619  *
3620  *      Deconstruct the route table for this bridge.
3621  */
3622 static void
3623 bridge_rtable_fini(struct bridge_softc *sc)
3624 {
3625         int cpu;
3626
3627         /*
3628          * Free per-cpu hash tables
3629          */
3630         for (cpu = 0; cpu < ncpus; ++cpu)
3631                 kfree(sc->sc_rthashs[cpu], M_DEVBUF);
3632         kfree(sc->sc_rthashs, M_DEVBUF);
3633
3634         /*
3635          * Free per-cpu lists
3636          */
3637         kfree(sc->sc_rtlists, M_DEVBUF);
3638 }
3639
3640 /*
3641  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
3642  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
3643  */
3644 #define mix(a, b, c)                                                    \
3645 do {                                                                    \
3646         a -= b; a -= c; a ^= (c >> 13);                                 \
3647         b -= c; b -= a; b ^= (a << 8);                                  \
3648         c -= a; c -= b; c ^= (b >> 13);                                 \
3649         a -= b; a -= c; a ^= (c >> 12);                                 \
3650         b -= c; b -= a; b ^= (a << 16);                                 \
3651         c -= a; c -= b; c ^= (b >> 5);                                  \
3652         a -= b; a -= c; a ^= (c >> 3);                                  \
3653         b -= c; b -= a; b ^= (a << 10);                                 \
3654         c -= a; c -= b; c ^= (b >> 15);                                 \
3655 } while (/*CONSTCOND*/0)
3656
3657 static __inline uint32_t
3658 bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
3659 {
3660         uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
3661
3662         b += addr[5] << 8;
3663         b += addr[4];
3664         a += addr[3] << 24;
3665         a += addr[2] << 16;
3666         a += addr[1] << 8;
3667         a += addr[0];
3668
3669         mix(a, b, c);
3670
3671         return (c & BRIDGE_RTHASH_MASK);
3672 }
3673
3674 #undef mix
3675
3676 static int
3677 bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b)
3678 {
3679         int i, d;
3680
3681         for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
3682                 d = ((int)a[i]) - ((int)b[i]);
3683         }
3684
3685         return (d);
3686 }
3687
3688 /*
3689  * bridge_rtnode_lookup:
3690  *
3691  *      Look up a bridge route node for the specified destination.
3692  */
3693 static struct bridge_rtnode *
3694 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr)
3695 {
3696         struct bridge_rtnode *brt;
3697         uint32_t hash;
3698         int dir;
3699
3700         hash = bridge_rthash(sc, addr);
3701         LIST_FOREACH(brt, &sc->sc_rthashs[mycpuid][hash], brt_hash) {
3702                 dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
3703                 if (dir == 0)
3704                         return (brt);
3705                 if (dir > 0)
3706                         return (NULL);
3707         }
3708
3709         return (NULL);
3710 }
3711
3712 /*
3713  * bridge_rtnode_insert:
3714  *
3715  *      Insert the specified bridge node into the route table.
3716  *      Caller has to make sure that rtnode does not exist.
3717  */
3718 static void
3719 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
3720 {
3721         struct bridge_rtnode *lbrt;
3722         uint32_t hash;
3723         int dir;
3724
3725         hash = bridge_rthash(sc, brt->brt_addr);
3726
3727         lbrt = LIST_FIRST(&sc->sc_rthashs[mycpuid][hash]);
3728         if (lbrt == NULL) {
3729                 LIST_INSERT_HEAD(&sc->sc_rthashs[mycpuid][hash],
3730                                   brt, brt_hash);
3731                 goto out;
3732         }
3733
3734         do {
3735                 dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
3736                 KASSERT(dir != 0, ("rtnode already exist\n"));
3737
3738                 if (dir > 0) {
3739                         LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
3740                         goto out;
3741                 }
3742                 if (LIST_NEXT(lbrt, brt_hash) == NULL) {
3743                         LIST_INSERT_AFTER(lbrt, brt, brt_hash);
3744                         goto out;
3745                 }
3746                 lbrt = LIST_NEXT(lbrt, brt_hash);
3747         } while (lbrt != NULL);
3748
3749         panic("no suitable position found for rtnode\n");
3750 out:
3751         LIST_INSERT_HEAD(&sc->sc_rtlists[mycpuid], brt, brt_list);
3752         if (mycpuid == 0) {
3753                 /*
3754                  * Update the brtcnt.
3755                  * We only need to do it once and we do it on CPU0.
3756                  */
3757                 sc->sc_brtcnt++;
3758         }
3759 }
3760
3761 /*
3762  * bridge_rtnode_destroy:
3763  *
3764  *      Destroy a bridge rtnode.
3765  */
3766 static void
3767 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
3768 {
3769         LIST_REMOVE(brt, brt_hash);
3770         LIST_REMOVE(brt, brt_list);
3771
3772         if (mycpuid + 1 == ncpus) {
3773                 /* Free rtinfo associated with rtnode on the last cpu */
3774                 kfree(brt->brt_info, M_DEVBUF);
3775         }
3776         kfree(brt, M_DEVBUF);
3777
3778         if (mycpuid == 0) {
3779                 /* Update brtcnt only on CPU0 */
3780                 sc->sc_brtcnt--;
3781         }
3782 }
3783
3784 static __inline int
3785 bridge_post_pfil(struct mbuf *m)
3786 {
3787         if (m->m_pkthdr.fw_flags & IPFORWARD_MBUF_TAGGED)
3788                 return EOPNOTSUPP;
3789
3790         /* Not yet */
3791         if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED)
3792                 return EOPNOTSUPP;
3793
3794         return 0;
3795 }
3796
3797 /*
3798  * Send bridge packets through pfil if they are one of the types pfil can deal
3799  * with, or if they are ARP or REVARP.  (pfil will pass ARP and REVARP without
3800  * question.) If *bifp or *ifp are NULL then packet filtering is skipped for
3801  * that interface.
3802  */
3803 static int
3804 bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
3805 {
3806         int snap, error, i, hlen;
3807         struct ether_header *eh1, eh2;
3808         struct ip *ip;
3809         struct llc llc1;
3810         u_int16_t ether_type;
3811
3812         snap = 0;
3813         error = -1;     /* Default error if not error == 0 */
3814
3815         if (pfil_bridge == 0 && pfil_member == 0)
3816                 return (0); /* filtering is disabled */
3817
3818         i = min((*mp)->m_pkthdr.len, max_protohdr);
3819         if ((*mp)->m_len < i) {
3820                 *mp = m_pullup(*mp, i);
3821                 if (*mp == NULL) {
3822                         kprintf("%s: m_pullup failed\n", __func__);
3823                         return (-1);
3824                 }
3825         }
3826
3827         eh1 = mtod(*mp, struct ether_header *);
3828         ether_type = ntohs(eh1->ether_type);
3829
3830         /*
3831          * Check for SNAP/LLC.
3832          */
3833         if (ether_type < ETHERMTU) {
3834                 struct llc *llc2 = (struct llc *)(eh1 + 1);
3835
3836                 if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
3837                     llc2->llc_dsap == LLC_SNAP_LSAP &&
3838                     llc2->llc_ssap == LLC_SNAP_LSAP &&
3839                     llc2->llc_control == LLC_UI) {
3840                         ether_type = htons(llc2->llc_un.type_snap.ether_type);
3841                         snap = 1;
3842                 }
3843         }
3844
3845         /*
3846          * If we're trying to filter bridge traffic, don't look at anything
3847          * other than IP and ARP traffic.  If the filter doesn't understand
3848          * IPv6, don't allow IPv6 through the bridge either.  This is lame
3849          * since if we really wanted, say, an AppleTalk filter, we are hosed,
3850          * but of course we don't have an AppleTalk filter to begin with.
3851          * (Note that since pfil doesn't understand ARP it will pass *ALL*
3852          * ARP traffic.)
3853          */
3854         switch (ether_type) {
3855         case ETHERTYPE_ARP:
3856         case ETHERTYPE_REVARP:
3857                 return (0); /* Automatically pass */
3858
3859         case ETHERTYPE_IP:
3860 #ifdef INET6
3861         case ETHERTYPE_IPV6:
3862 #endif /* INET6 */
3863                 break;
3864
3865         default:
3866                 /*
3867                  * Check to see if the user wants to pass non-ip
3868                  * packets, these will not be checked by pfil(9)
3869                  * and passed unconditionally so the default is to drop.
3870                  */
3871                 if (pfil_onlyip)
3872                         goto bad;
3873         }
3874
3875         /* Strip off the Ethernet header and keep a copy. */
3876         m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t) &eh2);
3877         m_adj(*mp, ETHER_HDR_LEN);
3878
3879         /* Strip off snap header, if present */
3880         if (snap) {
3881                 m_copydata(*mp, 0, sizeof(struct llc), (caddr_t) &llc1);
3882                 m_adj(*mp, sizeof(struct llc));
3883         }
3884
3885         /*
3886          * Check the IP header for alignment and errors
3887          */
3888         if (dir == PFIL_IN) {
3889                 switch (ether_type) {
3890                 case ETHERTYPE_IP:
3891                         error = bridge_ip_checkbasic(mp);
3892                         break;
3893 #ifdef INET6
3894                 case ETHERTYPE_IPV6:
3895                         error = bridge_ip6_checkbasic(mp);
3896                         break;
3897 #endif /* INET6 */
3898                 default:
3899                         error = 0;
3900                 }
3901                 if (error)
3902                         goto bad;
3903         }
3904
3905         error = 0;
3906
3907         /*
3908          * Run the packet through pfil
3909          */
3910         switch (ether_type) {
3911         case ETHERTYPE_IP:
3912                 /*
3913                  * before calling the firewall, swap fields the same as
3914                  * IP does. here we assume the header is contiguous
3915                  */
3916                 ip = mtod(*mp, struct ip *);
3917
3918                 ip->ip_len = ntohs(ip->ip_len);
3919                 ip->ip_off = ntohs(ip->ip_off);
3920
3921                 /*
3922                  * Run pfil on the member interface and the bridge, both can
3923                  * be skipped by clearing pfil_member or pfil_bridge.
3924                  *
3925                  * Keep the order:
3926                  *   in_if -> bridge_if -> out_if
3927                  */
3928                 if (pfil_bridge && dir == PFIL_OUT && bifp != NULL) {
3929                         error = pfil_run_hooks(&inet_pfil_hook, mp, bifp, dir);
3930                         if (*mp == NULL || error != 0) /* filter may consume */
3931                                 break;
3932                         error = bridge_post_pfil(*mp);
3933                         if (error)
3934                                 break;
3935                 }
3936
3937                 if (pfil_member && ifp != NULL) {
3938                         error = pfil_run_hooks(&inet_pfil_hook, mp, ifp, dir);
3939                         if (*mp == NULL || error != 0) /* filter may consume */
3940                                 break;
3941                         error = bridge_post_pfil(*mp);
3942                         if (error)
3943                                 break;
3944                 }
3945
3946                 if (pfil_bridge && dir == PFIL_IN && bifp != NULL) {
3947                         error = pfil_run_hooks(&inet_pfil_hook, mp, bifp, dir);
3948                         if (*mp == NULL || error != 0) /* filter may consume */
3949                                 break;
3950                         error = bridge_post_pfil(*mp);
3951                         if (error)
3952                                 break;
3953                 }
3954
3955                 /* check if we need to fragment the packet */
3956                 if (pfil_member && ifp != NULL && dir == PFIL_OUT) {
3957                         i = (*mp)->m_pkthdr.len;
3958                         if (i > ifp->if_mtu) {
3959                                 error = bridge_fragment(ifp, *mp, &eh2, snap,
3960                                             &llc1);
3961                                 return (error);
3962                         }
3963                 }
3964
3965                 /* Recalculate the ip checksum and restore byte ordering */
3966                 ip = mtod(*mp, struct ip *);
3967                 hlen = ip->ip_hl << 2;
3968                 if (hlen < sizeof(struct ip))
3969                         goto bad;
3970                 if (hlen > (*mp)->m_len) {
3971                         if ((*mp = m_pullup(*mp, hlen)) == 0)
3972                                 goto bad;
3973                         ip = mtod(*mp, struct ip *);
3974                         if (ip == NULL)
3975                                 goto bad;
3976                 }
3977                 ip->ip_len = htons(ip->ip_len);
3978                 ip->ip_off = htons(ip->ip_off);
3979                 ip->ip_sum = 0;
3980                 if (hlen == sizeof(struct ip))
3981                         ip->ip_sum = in_cksum_hdr(ip);
3982                 else
3983                         ip->ip_sum = in_cksum(*mp, hlen);
3984
3985                 break;
3986 #ifdef INET6
3987         case ETHERTYPE_IPV6:
3988                 if (pfil_bridge && dir == PFIL_OUT && bifp != NULL)
3989                         error = pfil_run_hooks(&inet6_pfil_hook, mp, bifp,
3990                                         dir);
3991
3992                 if (*mp == NULL || error != 0) /* filter may consume */
3993                         break;
3994
3995                 if (pfil_member && ifp != NULL)
3996                         error = pfil_run_hooks(&inet6_pfil_hook, mp, ifp,
3997                                         dir);
3998
3999                 if (*mp == NULL || error != 0) /* filter may consume */
4000                         break;
4001
4002                 if (pfil_bridge && dir == PFIL_IN && bifp != NULL)
4003                         error = pfil_run_hooks(&inet6_pfil_hook, mp, bifp,
4004                                         dir);
4005                 break;
4006 #endif
4007         default:
4008                 error = 0;
4009                 break;
4010         }
4011
4012         if (*mp == NULL)
4013                 return (error);
4014         if (error != 0)
4015                 goto bad;
4016
4017         error = -1;
4018
4019         /*
4020          * Finally, put everything back the way it was and return
4021          */
4022         if (snap) {
4023                 M_PREPEND(*mp, sizeof(struct llc), MB_DONTWAIT);
4024                 if (*mp == NULL)
4025                         return (error);
4026                 bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
4027         }
4028
4029         M_PREPEND(*mp, ETHER_HDR_LEN, MB_DONTWAIT);
4030         if (*mp == NULL)
4031                 return (error);
4032         bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
4033
4034         return (0);
4035
4036 bad:
4037         m_freem(*mp);
4038         *mp = NULL;
4039         return (error);
4040 }
4041
4042 /*
4043  * Perform basic checks on header size since
4044  * pfil assumes ip_input has already processed
4045  * it for it.  Cut-and-pasted from ip_input.c.
4046  * Given how simple the IPv6 version is,
4047  * does the IPv4 version really need to be
4048  * this complicated?
4049  *
4050  * XXX Should we update ipstat here, or not?
4051  * XXX Right now we update ipstat but not
4052  * XXX csum_counter.
4053  */
4054 static int
4055 bridge_ip_checkbasic(struct mbuf **mp)
4056 {
4057         struct mbuf *m = *mp;
4058         struct ip *ip;
4059         int len, hlen;
4060         u_short sum;
4061
4062         if (*mp == NULL)
4063                 return (-1);
4064 #if notyet
4065         if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
4066                 if ((m = m_copyup(m, sizeof(struct ip),
4067                         (max_linkhdr + 3) & ~3)) == NULL) {
4068                         /* XXXJRT new stat, please */
4069                         ipstat.ips_toosmall++;
4070                         goto bad;
4071                 }
4072         } else
4073 #endif
4074 #ifndef __predict_false
4075 #define __predict_false(x) x
4076 #endif
4077          if (__predict_false(m->m_len < sizeof (struct ip))) {
4078                 if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
4079                         ipstat.ips_toosmall++;
4080                         goto bad;
4081                 }
4082         }
4083         ip = mtod(m, struct ip *);
4084         if (ip == NULL) goto bad;
4085
4086         if (ip->ip_v != IPVERSION) {
4087                 ipstat.ips_badvers++;
4088                 goto bad;
4089         }
4090         hlen = ip->ip_hl << 2;
4091         if (hlen < sizeof(struct ip)) { /* minimum header length */
4092                 ipstat.ips_badhlen++;
4093                 goto bad;
4094         }
4095         if (hlen > m->m_len) {
4096                 if ((m = m_pullup(m, hlen)) == 0) {
4097                         ipstat.ips_badhlen++;
4098                         goto bad;
4099                 }
4100                 ip = mtod(m, struct ip *);
4101                 if (ip == NULL) goto bad;
4102         }
4103
4104         if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
4105                 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
4106         } else {
4107                 if (hlen == sizeof(struct ip)) {
4108                         sum = in_cksum_hdr(ip);
4109                 } else {
4110                         sum = in_cksum(m, hlen);
4111                 }
4112         }
4113         if (sum) {
4114                 ipstat.ips_badsum++;
4115                 goto bad;
4116         }
4117
4118         /* Retrieve the packet length. */
4119         len = ntohs(ip->ip_len);
4120
4121         /*
4122          * Check for additional length bogosity
4123          */
4124         if (len < hlen) {
4125                 ipstat.ips_badlen++;
4126                 goto bad;
4127         }
4128
4129         /*
4130          * Check that the amount of data in the buffers
4131          * is as at least much as the IP header would have us expect.
4132          * Drop packet if shorter than we expect.
4133          */
4134         if (m->m_pkthdr.len < len) {
4135                 ipstat.ips_tooshort++;
4136                 goto bad;
4137         }
4138
4139         /* Checks out, proceed */
4140         *mp = m;
4141         return (0);
4142
4143 bad:
4144         *mp = m;
4145         return (-1);
4146 }
4147
4148 #ifdef INET6
4149 /*
4150  * Same as above, but for IPv6.
4151  * Cut-and-pasted from ip6_input.c.
4152  * XXX Should we update ip6stat, or not?
4153  */
4154 static int
4155 bridge_ip6_checkbasic(struct mbuf **mp)
4156 {
4157         struct mbuf *m = *mp;
4158         struct ip6_hdr *ip6;
4159
4160         /*
4161          * If the IPv6 header is not aligned, slurp it up into a new
4162          * mbuf with space for link headers, in the event we forward
4163          * it.  Otherwise, if it is aligned, make sure the entire base
4164          * IPv6 header is in the first mbuf of the chain.
4165          */
4166 #if notyet
4167         if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
4168                 struct ifnet *inifp = m->m_pkthdr.rcvif;
4169                 if ((m = m_copyup(m, sizeof(struct ip6_hdr),
4170                             (max_linkhdr + 3) & ~3)) == NULL) {
4171                         /* XXXJRT new stat, please */
4172                         ip6stat.ip6s_toosmall++;
4173                         in6_ifstat_inc(inifp, ifs6_in_hdrerr);
4174                         goto bad;
4175                 }
4176         } else
4177 #endif
4178         if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
4179                 struct ifnet *inifp = m->m_pkthdr.rcvif;
4180                 if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
4181                         ip6stat.ip6s_toosmall++;
4182                         in6_ifstat_inc(inifp, ifs6_in_hdrerr);
4183                         goto bad;
4184                 }
4185         }
4186
4187         ip6 = mtod(m, struct ip6_hdr *);
4188
4189         if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
4190                 ip6stat.ip6s_badvers++;
4191                 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
4192                 goto bad;
4193         }
4194
4195         /* Checks out, proceed */
4196         *mp = m;
4197         return (0);
4198
4199 bad:
4200         *mp = m;
4201         return (-1);
4202 }
4203 #endif /* INET6 */
4204
4205 /*
4206  * bridge_fragment:
4207  *
4208  *      Return a fragmented mbuf chain.
4209  */
4210 static int
4211 bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
4212     int snap, struct llc *llc)
4213 {
4214         struct mbuf *m0;
4215         struct ip *ip;
4216         int error = -1;
4217
4218         if (m->m_len < sizeof(struct ip) &&
4219             (m = m_pullup(m, sizeof(struct ip))) == NULL)
4220                 goto out;
4221         ip = mtod(m, struct ip *);
4222
4223         error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist,
4224                     CSUM_DELAY_IP);
4225         if (error)
4226                 goto out;
4227
4228         /* walk the chain and re-add the Ethernet header */
4229         for (m0 = m; m0; m0 = m0->m_nextpkt) {
4230                 if (error == 0) {
4231                         if (snap) {
4232                                 M_PREPEND(m0, sizeof(struct llc), MB_DONTWAIT);
4233                                 if (m0 == NULL) {
4234                                         error = ENOBUFS;
4235                                         continue;
4236                                 }
4237                                 bcopy(llc, mtod(m0, caddr_t),
4238                                     sizeof(struct llc));
4239                         }
4240                         M_PREPEND(m0, ETHER_HDR_LEN, MB_DONTWAIT);
4241                         if (m0 == NULL) {
4242                                 error = ENOBUFS;
4243                                 continue;
4244                         }
4245                         bcopy(eh, mtod(m0, caddr_t), ETHER_HDR_LEN);
4246                 } else 
4247                         m_freem(m);
4248         }
4249
4250         if (error == 0)
4251                 ipstat.ips_fragmented++;
4252
4253         return (error);
4254
4255 out:
4256         if (m != NULL)
4257                 m_freem(m);
4258         return (error);
4259 }
4260
4261 static void
4262 bridge_enqueue_handler(netmsg_t msg)
4263 {
4264         struct netmsg_packet *nmp;
4265         struct ifnet *dst_ifp;
4266         struct mbuf *m;
4267
4268         nmp = &msg->packet;
4269         m = nmp->nm_packet;
4270         dst_ifp = nmp->base.lmsg.u.ms_resultp;
4271
4272         bridge_handoff(dst_ifp->if_bridge, dst_ifp, m, 1);
4273 }
4274
4275 static void
4276 bridge_handoff(struct bridge_softc *sc, struct ifnet *dst_ifp,
4277                struct mbuf *m, int from_us)
4278 {
4279         struct mbuf *m0;
4280         struct ifnet *bifp;
4281
4282         bifp = sc->sc_ifp;
4283
4284         /* We may be sending a fragment so traverse the mbuf */
4285         for (; m; m = m0) {
4286                 struct altq_pktattr pktattr;
4287
4288                 m0 = m->m_nextpkt;
4289                 m->m_nextpkt = NULL;
4290
4291                 /*
4292                  * If being sent from our host override ether_shost
4293                  * with the bridge MAC.  This is mandatory for ARP
4294                  * so things don't get confused.  In particular we
4295                  * don't want ARPs to get associated with link interfaces
4296                  * under the bridge which might or might not stay valid.
4297                  *
4298                  * Also override ether_shost when relaying a packet out
4299                  * the same interface it came in on, due to multi-homed
4300                  * addresses & default routes, otherwise switches will
4301                  * get very confused.
4302                  *
4303                  * Otherwise if we are in transparent mode.
4304                  */
4305                 if (from_us || m->m_pkthdr.rcvif == dst_ifp) {
4306                         m_copyback(m,
4307                                    offsetof(struct ether_header, ether_shost),
4308                                    ETHER_ADDR_LEN, IF_LLADDR(sc->sc_ifp));
4309                 } else if ((bifp->if_flags & IFF_LINK0) &&
4310                            (m->m_pkthdr.fw_flags & BRIDGE_MBUF_TAGGED)) {
4311                         m_copyback(m,
4312                                    offsetof(struct ether_header, ether_shost),
4313                                    ETHER_ADDR_LEN,
4314                                    m->m_pkthdr.br.ether.ether_shost);
4315                 } /* else retain shost */
4316
4317                 if (ifq_is_enabled(&dst_ifp->if_snd))
4318                         altq_etherclassify(&dst_ifp->if_snd, m, &pktattr);
4319
4320                 ifq_dispatch(dst_ifp, m, &pktattr);
4321         }
4322 }
4323
4324 static void
4325 bridge_control_dispatch(netmsg_t msg)
4326 {
4327         struct netmsg_brctl *bc_msg = (struct netmsg_brctl *)msg;
4328         struct ifnet *bifp = bc_msg->bc_sc->sc_ifp;
4329         int error;
4330
4331         ifnet_serialize_all(bifp);
4332         error = bc_msg->bc_func(bc_msg->bc_sc, bc_msg->bc_arg);
4333         ifnet_deserialize_all(bifp);
4334
4335         lwkt_replymsg(&bc_msg->base.lmsg, error);
4336 }
4337
4338 static int
4339 bridge_control(struct bridge_softc *sc, u_long cmd,
4340                bridge_ctl_t bc_func, void *bc_arg)
4341 {
4342         struct ifnet *bifp = sc->sc_ifp;
4343         struct netmsg_brctl bc_msg;
4344         int error;
4345
4346         ASSERT_IFNET_SERIALIZED_ALL(bifp);
4347
4348         bzero(&bc_msg, sizeof(bc_msg));
4349
4350         netmsg_init(&bc_msg.base, NULL, &curthread->td_msgport,
4351                     0, bridge_control_dispatch);
4352         bc_msg.bc_func = bc_func;
4353         bc_msg.bc_sc = sc;
4354         bc_msg.bc_arg = bc_arg;
4355
4356         ifnet_deserialize_all(bifp);
4357         error = lwkt_domsg(BRIDGE_CFGPORT, &bc_msg.base.lmsg, 0);
4358         ifnet_serialize_all(bifp);
4359         return error;
4360 }
4361
4362 static void
4363 bridge_add_bif_handler(netmsg_t msg)
4364 {
4365         struct netmsg_braddbif *amsg = (struct netmsg_braddbif *)msg;
4366         struct bridge_softc *sc;
4367         struct bridge_iflist *bif;
4368
4369         sc = amsg->br_softc;
4370
4371         bif = kmalloc(sizeof(*bif), M_DEVBUF, M_WAITOK | M_ZERO);
4372         bif->bif_ifp = amsg->br_bif_ifp;
4373         bif->bif_onlist = 1;
4374         bif->bif_info = amsg->br_bif_info;
4375
4376         /*
4377          * runs through bif_info
4378          */
4379         bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
4380
4381         TAILQ_INSERT_HEAD(&sc->sc_iflists[mycpuid], bif, bif_next);
4382
4383         ifnet_forwardmsg(&amsg->base.lmsg, mycpuid + 1);
4384 }
4385
4386 static void
4387 bridge_add_bif(struct bridge_softc *sc, struct bridge_ifinfo *bif_info,
4388                struct ifnet *ifp)
4389 {
4390         struct netmsg_braddbif amsg;
4391
4392         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
4393
4394         netmsg_init(&amsg.base, NULL, &curthread->td_msgport,
4395                     0, bridge_add_bif_handler);
4396         amsg.br_softc = sc;
4397         amsg.br_bif_info = bif_info;
4398         amsg.br_bif_ifp = ifp;
4399
4400         ifnet_domsg(&amsg.base.lmsg, 0);
4401 }
4402
4403 static void
4404 bridge_del_bif_handler(netmsg_t msg)
4405 {
4406         struct netmsg_brdelbif *dmsg = (struct netmsg_brdelbif *)msg;
4407         struct bridge_softc *sc;
4408         struct bridge_iflist *bif;
4409
4410         sc = dmsg->br_softc;
4411
4412         /*
4413          * Locate the bif associated with the br_bif_info
4414          * on the current CPU
4415          */
4416         bif = bridge_lookup_member_ifinfo(sc, dmsg->br_bif_info);
4417         KKASSERT(bif != NULL && bif->bif_onlist);
4418
4419         /* Remove the bif from the current CPU's iflist */
4420         bif->bif_onlist = 0;
4421         TAILQ_REMOVE(dmsg->br_bif_list, bif, bif_next);
4422
4423         /* Save the removed bif for later freeing */
4424         TAILQ_INSERT_HEAD(dmsg->br_bif_list, bif, bif_next);
4425
4426         ifnet_forwardmsg(&dmsg->base.lmsg, mycpuid + 1);
4427 }
4428
4429 static void
4430 bridge_del_bif(struct bridge_softc *sc, struct bridge_ifinfo *bif_info,
4431                struct bridge_iflist_head *saved_bifs)
4432 {
4433         struct netmsg_brdelbif dmsg;
4434
4435         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
4436
4437         netmsg_init(&dmsg.base, NULL, &curthread->td_msgport,
4438                     0, bridge_del_bif_handler);
4439         dmsg.br_softc = sc;
4440         dmsg.br_bif_info = bif_info;
4441         dmsg.br_bif_list = saved_bifs;
4442
4443         ifnet_domsg(&dmsg.base.lmsg, 0);
4444 }