deb12583de7ccabab58c34f7cf144df806aae5fe
[dragonfly.git] / sys / net / bridge / if_bridge.c
1 /*
2  * Copyright 2001 Wasabi Systems, Inc.
3  * All rights reserved.
4  *
5  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *      This product includes software developed for the NetBSD Project by
18  *      Wasabi Systems, Inc.
19  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
20  *    or promote products derived from this software without specific prior
21  *    written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
27  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /*
37  * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
38  * All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. All advertising materials mentioning features or use of this software
49  *    must display the following acknowledgement:
50  *      This product includes software developed by Jason L. Wright
51  * 4. The name of the author may not be used to endorse or promote products
52  *    derived from this software without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
55  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
56  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
57  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
58  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
59  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
60  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
62  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
63  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
64  * POSSIBILITY OF SUCH DAMAGE.
65  *
66  * $OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp $
67  * $NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $
68  * $FreeBSD: src/sys/net/if_bridge.c,v 1.26 2005/10/13 23:05:55 thompsa Exp $
69  */
70
71 /*
72  * Network interface bridge support.
73  *
74  * TODO:
75  *
76  *      - Currently only supports Ethernet-like interfaces (Ethernet,
77  *        802.11, VLANs on Ethernet, etc.)  Figure out a nice way
78  *        to bridge other types of interfaces (FDDI-FDDI, and maybe
79  *        consider heterogenous bridges).
80  *
81  *
82  * Bridge's route information is duplicated to each CPUs:
83  *
84  *      CPU0          CPU1          CPU2          CPU3
85  * +-----------+ +-----------+ +-----------+ +-----------+
86  * |  rtnode   | |  rtnode   | |  rtnode   | |  rtnode   |
87  * |           | |           | |           | |           |
88  * | dst eaddr | | dst eaddr | | dst eaddr | | dst eaddr |
89  * +-----------+ +-----------+ +-----------+ +-----------+
90  *       |         |                     |         |
91  *       |         |                     |         |
92  *       |         |     +----------+    |         |
93  *       |         |     |  rtinfo  |    |         |
94  *       |         +---->|          |<---+         |
95  *       |               |  flags   |              |
96  *       +-------------->|  timeout |<-------------+
97  *                       |  dst_ifp |
98  *                       +----------+
99  *
100  * We choose to put timeout and dst_ifp into shared part, so updating
101  * them will be cheaper than using message forwarding.  Also there is
102  * not need to use spinlock to protect the updating: timeout and dst_ifp
103  * is not related and specific field's updating order has no importance.
104  * The cache pollution by the share part should not be heavy: in a stable
105  * setup, dst_ifp probably will be not changed in rtnode's life time,
106  * while timeout is refreshed once per second; most of the time, timeout
107  * and dst_ifp are read-only accessed.
108  *
109  *
110  * Bridge route information installation on bridge_input path:
111  *
112  *      CPU0           CPU1         CPU2          CPU3
113  *
114  *                               tcp_thread2
115  *                                    |
116  *                                alloc nmsg
117  *                    snd nmsg        |
118  *                    w/o rtinfo      |
119  *      ifnet0<-----------------------+
120  *        |                           :
121  *    lookup dst                      :
122  *   rtnode exists?(Y)free nmsg       :
123  *        |(N)                        :
124  *        |
125  *  alloc rtinfo
126  *  alloc rtnode
127  * install rtnode
128  *        |
129  *        +---------->ifnet1
130  *        : fwd nmsg    |
131  *        : w/ rtinfo   |
132  *        :             |
133  *        :             |
134  *                 alloc rtnode
135  *               (w/ nmsg's rtinfo)
136  *                install rtnode
137  *                      |
138  *                      +---------->ifnet2
139  *                      : fwd nmsg    |
140  *                      : w/ rtinfo   |
141  *                      :             |
142  *                      :         same as ifnet1
143  *                                    |
144  *                                    +---------->ifnet3
145  *                                    : fwd nmsg    |
146  *                                    : w/ rtinfo   |
147  *                                    :             |
148  *                                    :         same as ifnet1
149  *                                               free nmsg
150  *                                                  :
151  *                                                  :
152  *
153  * The netmsgs forwarded between protocol threads and ifnet threads are
154  * allocated with (M_WAITOK|M_NULLOK), so it will not fail under most
155  * cases (route information is too precious to be not installed :).
156  * Since multiple threads may try to install route information for the
157  * same dst eaddr, we look up route information in ifnet0.  However, this
158  * looking up only need to be performed on ifnet0, which is the start
159  * point of the route information installation process.
160  *
161  *
162  * Bridge route information deleting/flushing:
163  *
164  *  CPU0            CPU1             CPU2             CPU3
165  *
166  * netisr0
167  *   |
168  * find suitable rtnodes,
169  * mark their rtinfo dead
170  *   |
171  *   | domsg <------------------------------------------+
172  *   |                                                  | replymsg
173  *   |                                                  |
174  *   V     fwdmsg           fwdmsg           fwdmsg     |
175  * ifnet0 --------> ifnet1 --------> ifnet2 --------> ifnet3
176  * delete rtnodes   delete rtnodes   delete rtnodes   delete rtnodes
177  * w/ dead rtinfo   w/ dead rtinfo   w/ dead rtinfo   w/ dead rtinfo
178  *                                                    free dead rtinfos
179  *
180  * All deleting/flushing operations are serialized by netisr0, so each
181  * operation only reaps the route information marked dead by itself.
182  *
183  *
184  * Bridge route information adding/deleting/flushing:
185  * Since all operation is serialized by the fixed message flow between
186  * ifnet threads, it is not possible to create corrupted per-cpu route
187  * information.
188  *
189  *
190  *
191  * Percpu member interface list iteration with blocking operation:
192  * Since one bridge could only delete one member interface at a time and
193  * the deleted member interface is not freed after netmsg_service_sync(),
194  * following way is used to make sure that even if the certain member
195  * interface is ripped from the percpu list during the blocking operation,
196  * the iteration still could keep going:
197  *
198  * TAILQ_FOREACH_MUTABLE(bif, sc->sc_iflists[mycpuid], bif_next, nbif) {
199  *     blocking operation;
200  *     blocking operation;
201  *     ...
202  *     ...
203  *     if (nbif != NULL && !nbif->bif_onlist) {
204  *         KKASSERT(bif->bif_onlist);
205  *         nbif = TAILQ_NEXT(bif, bif_next);
206  *     }
207  * }
208  *
209  * As mentioned above only one member interface could be unlinked from the
210  * percpu member interface list, so either bif or nbif may be not on the list,
211  * but _not_ both.  To keep the list iteration, we don't care about bif, but
212  * only nbif.  Since removed member interface will only be freed after we
213  * finish our work, it is safe to access any field in an unlinked bif (here
214  * bif_onlist).  If nbif is no longer on the list, then bif must be on the
215  * list, so we change nbif to the next element of bif and keep going.
216  */
217
218 #include "opt_inet.h"
219 #include "opt_inet6.h"
220
221 #include <sys/param.h>
222 #include <sys/mbuf.h>
223 #include <sys/malloc.h>
224 #include <sys/protosw.h>
225 #include <sys/systm.h>
226 #include <sys/time.h>
227 #include <sys/socket.h> /* for net/if.h */
228 #include <sys/sockio.h>
229 #include <sys/ctype.h>  /* string functions */
230 #include <sys/kernel.h>
231 #include <sys/random.h>
232 #include <sys/sysctl.h>
233 #include <sys/module.h>
234 #include <sys/proc.h>
235 #include <sys/priv.h>
236 #include <sys/lock.h>
237 #include <sys/thread.h>
238 #include <sys/thread2.h>
239 #include <sys/mpipe.h>
240
241 #include <net/bpf.h>
242 #include <net/if.h>
243 #include <net/if_dl.h>
244 #include <net/if_types.h>
245 #include <net/if_var.h>
246 #include <net/pfil.h>
247 #include <net/ifq_var.h>
248 #include <net/if_clone.h>
249
250 #include <netinet/in.h> /* for struct arpcom */
251 #include <netinet/in_systm.h>
252 #include <netinet/in_var.h>
253 #include <netinet/ip.h>
254 #include <netinet/ip_var.h>
255 #ifdef INET6
256 #include <netinet/ip6.h>
257 #include <netinet6/ip6_var.h>
258 #endif
259 #include <netinet/if_ether.h> /* for struct arpcom */
260 #include <net/bridge/if_bridgevar.h>
261 #include <net/if_llc.h>
262 #include <net/netmsg2.h>
263
264 #include <net/route.h>
265 #include <sys/in_cksum.h>
266
267 /*
268  * Size of the route hash table.  Must be a power of two.
269  */
270 #ifndef BRIDGE_RTHASH_SIZE
271 #define BRIDGE_RTHASH_SIZE              1024
272 #endif
273
274 #define BRIDGE_RTHASH_MASK              (BRIDGE_RTHASH_SIZE - 1)
275
276 /*
277  * Maximum number of addresses to cache.
278  */
279 #ifndef BRIDGE_RTABLE_MAX
280 #define BRIDGE_RTABLE_MAX               100
281 #endif
282
283 /*
284  * Spanning tree defaults.
285  */
286 #define BSTP_DEFAULT_MAX_AGE            (20 * 256)
287 #define BSTP_DEFAULT_HELLO_TIME         (2 * 256)
288 #define BSTP_DEFAULT_FORWARD_DELAY      (15 * 256)
289 #define BSTP_DEFAULT_HOLD_TIME          (1 * 256)
290 #define BSTP_DEFAULT_BRIDGE_PRIORITY    0x8000
291 #define BSTP_DEFAULT_PORT_PRIORITY      0x80
292 #define BSTP_DEFAULT_PATH_COST          55
293
294 /*
295  * Timeout (in seconds) for entries learned dynamically.
296  */
297 #ifndef BRIDGE_RTABLE_TIMEOUT
298 #define BRIDGE_RTABLE_TIMEOUT           (20 * 60)       /* same as ARP */
299 #endif
300
301 /*
302  * Number of seconds between walks of the route list.
303  */
304 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
305 #define BRIDGE_RTABLE_PRUNE_PERIOD      (5 * 60)
306 #endif
307
308 /*
309  * List of capabilities to mask on the member interface.
310  */
311 #define BRIDGE_IFCAPS_MASK              IFCAP_TXCSUM
312
313 typedef int     (*bridge_ctl_t)(struct bridge_softc *, void *);
314
315 struct netmsg_brctl {
316         struct netmsg_base      base;
317         bridge_ctl_t            bc_func;
318         struct bridge_softc     *bc_sc;
319         void                    *bc_arg;
320 };
321
322 struct netmsg_brsaddr {
323         struct netmsg_base      base;
324         struct bridge_softc     *br_softc;
325         struct ifnet            *br_dst_if;
326         struct bridge_rtinfo    *br_rtinfo;
327         int                     br_setflags;
328         uint8_t                 br_dst[ETHER_ADDR_LEN];
329         uint8_t                 br_flags;
330 };
331
332 struct netmsg_braddbif {
333         struct netmsg_base      base;
334         struct bridge_softc     *br_softc;
335         struct bridge_ifinfo    *br_bif_info;
336         struct ifnet            *br_bif_ifp;
337 };
338
339 struct netmsg_brdelbif {
340         struct netmsg_base      base;
341         struct bridge_softc     *br_softc;
342         struct bridge_ifinfo    *br_bif_info;
343         struct bridge_iflist_head *br_bif_list;
344 };
345
346 struct netmsg_brsflags {
347         struct netmsg_base      base;
348         struct bridge_softc     *br_softc;
349         struct bridge_ifinfo    *br_bif_info;
350         uint32_t                br_bif_flags;
351 };
352
353 eventhandler_tag        bridge_detach_cookie = NULL;
354
355 extern  struct mbuf *(*bridge_input_p)(struct ifnet *, struct mbuf *);
356 extern  int (*bridge_output_p)(struct ifnet *, struct mbuf *);
357 extern  void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
358 extern  struct ifnet *(*bridge_interface_p)(void *if_bridge);
359
360 static int      bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
361
362 static int      bridge_clone_create(struct if_clone *, int, caddr_t);
363 static int      bridge_clone_destroy(struct ifnet *);
364
365 static int      bridge_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
366 static void     bridge_mutecaps(struct bridge_ifinfo *, struct ifnet *, int);
367 static void     bridge_ifdetach(void *, struct ifnet *);
368 static void     bridge_init(void *);
369 static int      bridge_from_us(struct bridge_softc *, struct ether_header *);
370 static void     bridge_stop(struct ifnet *);
371 static void     bridge_start(struct ifnet *);
372 static struct mbuf *bridge_input(struct ifnet *, struct mbuf *);
373 static int      bridge_output(struct ifnet *, struct mbuf *);
374 static struct ifnet *bridge_interface(void *if_bridge);
375
376 static void     bridge_forward(struct bridge_softc *, struct mbuf *m);
377
378 static void     bridge_timer_handler(netmsg_t);
379 static void     bridge_timer(void *);
380
381 static void     bridge_start_bcast(struct bridge_softc *, struct mbuf *);
382 static void     bridge_broadcast(struct bridge_softc *, struct ifnet *,
383                     struct mbuf *);
384 static void     bridge_span(struct bridge_softc *, struct mbuf *);
385
386 static int      bridge_rtupdate(struct bridge_softc *, const uint8_t *,
387                     struct ifnet *, uint8_t);
388 static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *);
389 static void     bridge_rtreap(struct bridge_softc *);
390 static void     bridge_rtreap_async(struct bridge_softc *);
391 static void     bridge_rttrim(struct bridge_softc *);
392 static int      bridge_rtage_finddead(struct bridge_softc *);
393 static void     bridge_rtage(struct bridge_softc *);
394 static void     bridge_rtflush(struct bridge_softc *, int);
395 static int      bridge_rtdaddr(struct bridge_softc *, const uint8_t *);
396 static int      bridge_rtsaddr(struct bridge_softc *, const uint8_t *,
397                     struct ifnet *, uint8_t);
398 static void     bridge_rtmsg_sync(struct bridge_softc *sc);
399 static void     bridge_rtreap_handler(netmsg_t);
400 static void     bridge_rtinstall_handler(netmsg_t);
401 static int      bridge_rtinstall_oncpu(struct bridge_softc *, const uint8_t *,
402                     struct ifnet *, int, uint8_t, struct bridge_rtinfo **);
403
404 static void     bridge_rtable_init(struct bridge_softc *);
405 static void     bridge_rtable_fini(struct bridge_softc *);
406
407 static int      bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
408 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
409                     const uint8_t *);
410 static void     bridge_rtnode_insert(struct bridge_softc *,
411                     struct bridge_rtnode *);
412 static void     bridge_rtnode_destroy(struct bridge_softc *,
413                     struct bridge_rtnode *);
414
415 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
416                     const char *name);
417 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
418                     struct ifnet *ifp);
419 static struct bridge_iflist *bridge_lookup_member_ifinfo(struct bridge_softc *,
420                     struct bridge_ifinfo *);
421 static void     bridge_delete_member(struct bridge_softc *,
422                     struct bridge_iflist *, int);
423 static void     bridge_delete_span(struct bridge_softc *,
424                     struct bridge_iflist *);
425
426 static int      bridge_control(struct bridge_softc *, u_long,
427                                bridge_ctl_t, void *);
428 static int      bridge_ioctl_init(struct bridge_softc *, void *);
429 static int      bridge_ioctl_stop(struct bridge_softc *, void *);
430 static int      bridge_ioctl_add(struct bridge_softc *, void *);
431 static int      bridge_ioctl_del(struct bridge_softc *, void *);
432 static void     bridge_ioctl_fillflags(struct bridge_softc *sc,
433                                 struct bridge_iflist *bif, struct ifbreq *req);
434 static int      bridge_ioctl_gifflags(struct bridge_softc *, void *);
435 static int      bridge_ioctl_sifflags(struct bridge_softc *, void *);
436 static int      bridge_ioctl_scache(struct bridge_softc *, void *);
437 static int      bridge_ioctl_gcache(struct bridge_softc *, void *);
438 static int      bridge_ioctl_gifs(struct bridge_softc *, void *);
439 static int      bridge_ioctl_rts(struct bridge_softc *, void *);
440 static int      bridge_ioctl_saddr(struct bridge_softc *, void *);
441 static int      bridge_ioctl_sto(struct bridge_softc *, void *);
442 static int      bridge_ioctl_gto(struct bridge_softc *, void *);
443 static int      bridge_ioctl_daddr(struct bridge_softc *, void *);
444 static int      bridge_ioctl_flush(struct bridge_softc *, void *);
445 static int      bridge_ioctl_gpri(struct bridge_softc *, void *);
446 static int      bridge_ioctl_spri(struct bridge_softc *, void *);
447 static int      bridge_ioctl_ght(struct bridge_softc *, void *);
448 static int      bridge_ioctl_sht(struct bridge_softc *, void *);
449 static int      bridge_ioctl_gfd(struct bridge_softc *, void *);
450 static int      bridge_ioctl_sfd(struct bridge_softc *, void *);
451 static int      bridge_ioctl_gma(struct bridge_softc *, void *);
452 static int      bridge_ioctl_sma(struct bridge_softc *, void *);
453 static int      bridge_ioctl_sifprio(struct bridge_softc *, void *);
454 static int      bridge_ioctl_sifcost(struct bridge_softc *, void *);
455 static int      bridge_ioctl_addspan(struct bridge_softc *, void *);
456 static int      bridge_ioctl_delspan(struct bridge_softc *, void *);
457 static int      bridge_pfil(struct mbuf **, struct ifnet *, struct ifnet *,
458                     int);
459 static int      bridge_ip_checkbasic(struct mbuf **mp);
460 #ifdef INET6
461 static int      bridge_ip6_checkbasic(struct mbuf **mp);
462 #endif /* INET6 */
463 static int      bridge_fragment(struct ifnet *, struct mbuf *,
464                     struct ether_header *, int, struct llc *);
465 static void     bridge_enqueue_handler(netmsg_t);
466 static void     bridge_handoff(struct bridge_softc *, struct ifnet *,
467                     struct mbuf *, int);
468
469 static void     bridge_del_bif_handler(netmsg_t);
470 static void     bridge_add_bif_handler(netmsg_t);
471 static void     bridge_del_bif(struct bridge_softc *, struct bridge_ifinfo *,
472                     struct bridge_iflist_head *);
473 static void     bridge_add_bif(struct bridge_softc *, struct bridge_ifinfo *,
474                     struct ifnet *);
475
476 SYSCTL_DECL(_net_link);
477 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW, 0, "Bridge");
478
479 static int pfil_onlyip = 1; /* only pass IP[46] packets when pfil is enabled */
480 static int pfil_bridge = 1; /* run pfil hooks on the bridge interface */
481 static int pfil_member = 1; /* run pfil hooks on the member interface */
482 static int bridge_debug;
483 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip, CTLFLAG_RW,
484     &pfil_onlyip, 0, "Only pass IP packets when pfil is enabled");
485 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge, CTLFLAG_RW,
486     &pfil_bridge, 0, "Packet filter on the bridge interface");
487 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member, CTLFLAG_RW,
488     &pfil_member, 0, "Packet filter on the member interface");
489 SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW,
490     &bridge_debug, 0, "Bridge debug mode");
491
492 struct bridge_control_arg {
493         union {
494                 struct ifbreq ifbreq;
495                 struct ifbifconf ifbifconf;
496                 struct ifbareq ifbareq;
497                 struct ifbaconf ifbaconf;
498                 struct ifbrparam ifbrparam;
499         } bca_u;
500         int     bca_len;
501         void    *bca_uptr;
502         void    *bca_kptr;
503 };
504
505 struct bridge_control {
506         bridge_ctl_t    bc_func;
507         int             bc_argsize;
508         int             bc_flags;
509 };
510
511 #define BC_F_COPYIN             0x01    /* copy arguments in */
512 #define BC_F_COPYOUT            0x02    /* copy arguments out */
513 #define BC_F_SUSER              0x04    /* do super-user check */
514
515 const struct bridge_control bridge_control_table[] = {
516         { bridge_ioctl_add,             sizeof(struct ifbreq),
517           BC_F_COPYIN|BC_F_SUSER },
518         { bridge_ioctl_del,             sizeof(struct ifbreq),
519           BC_F_COPYIN|BC_F_SUSER },
520
521         { bridge_ioctl_gifflags,        sizeof(struct ifbreq),
522           BC_F_COPYIN|BC_F_COPYOUT },
523         { bridge_ioctl_sifflags,        sizeof(struct ifbreq),
524           BC_F_COPYIN|BC_F_SUSER },
525
526         { bridge_ioctl_scache,          sizeof(struct ifbrparam),
527           BC_F_COPYIN|BC_F_SUSER },
528         { bridge_ioctl_gcache,          sizeof(struct ifbrparam),
529           BC_F_COPYOUT },
530
531         { bridge_ioctl_gifs,            sizeof(struct ifbifconf),
532           BC_F_COPYIN|BC_F_COPYOUT },
533         { bridge_ioctl_rts,             sizeof(struct ifbaconf),
534           BC_F_COPYIN|BC_F_COPYOUT },
535
536         { bridge_ioctl_saddr,           sizeof(struct ifbareq),
537           BC_F_COPYIN|BC_F_SUSER },
538
539         { bridge_ioctl_sto,             sizeof(struct ifbrparam),
540           BC_F_COPYIN|BC_F_SUSER },
541         { bridge_ioctl_gto,             sizeof(struct ifbrparam),
542           BC_F_COPYOUT },
543
544         { bridge_ioctl_daddr,           sizeof(struct ifbareq),
545           BC_F_COPYIN|BC_F_SUSER },
546
547         { bridge_ioctl_flush,           sizeof(struct ifbreq),
548           BC_F_COPYIN|BC_F_SUSER },
549
550         { bridge_ioctl_gpri,            sizeof(struct ifbrparam),
551           BC_F_COPYOUT },
552         { bridge_ioctl_spri,            sizeof(struct ifbrparam),
553           BC_F_COPYIN|BC_F_SUSER },
554
555         { bridge_ioctl_ght,             sizeof(struct ifbrparam),
556           BC_F_COPYOUT },
557         { bridge_ioctl_sht,             sizeof(struct ifbrparam),
558           BC_F_COPYIN|BC_F_SUSER },
559
560         { bridge_ioctl_gfd,             sizeof(struct ifbrparam),
561           BC_F_COPYOUT },
562         { bridge_ioctl_sfd,             sizeof(struct ifbrparam),
563           BC_F_COPYIN|BC_F_SUSER },
564
565         { bridge_ioctl_gma,             sizeof(struct ifbrparam),
566           BC_F_COPYOUT },
567         { bridge_ioctl_sma,             sizeof(struct ifbrparam),
568           BC_F_COPYIN|BC_F_SUSER },
569
570         { bridge_ioctl_sifprio,         sizeof(struct ifbreq),
571           BC_F_COPYIN|BC_F_SUSER },
572
573         { bridge_ioctl_sifcost,         sizeof(struct ifbreq),
574           BC_F_COPYIN|BC_F_SUSER },
575
576         { bridge_ioctl_addspan,         sizeof(struct ifbreq),
577           BC_F_COPYIN|BC_F_SUSER },
578         { bridge_ioctl_delspan,         sizeof(struct ifbreq),
579           BC_F_COPYIN|BC_F_SUSER },
580 };
581 static const int bridge_control_table_size = NELEM(bridge_control_table);
582
583 LIST_HEAD(, bridge_softc) bridge_list;
584
585 struct if_clone bridge_cloner = IF_CLONE_INITIALIZER("bridge",
586                                 bridge_clone_create,
587                                 bridge_clone_destroy, 0, IF_MAXUNIT);
588
589 static int
590 bridge_modevent(module_t mod, int type, void *data)
591 {
592         switch (type) {
593         case MOD_LOAD:
594                 LIST_INIT(&bridge_list);
595                 if_clone_attach(&bridge_cloner);
596                 bridge_input_p = bridge_input;
597                 bridge_output_p = bridge_output;
598                 bridge_interface_p = bridge_interface;
599                 bridge_detach_cookie = EVENTHANDLER_REGISTER(
600                     ifnet_detach_event, bridge_ifdetach, NULL,
601                     EVENTHANDLER_PRI_ANY);
602 #if notyet
603                 bstp_linkstate_p = bstp_linkstate;
604 #endif
605                 break;
606         case MOD_UNLOAD:
607                 if (!LIST_EMPTY(&bridge_list))
608                         return (EBUSY);
609                 EVENTHANDLER_DEREGISTER(ifnet_detach_event,
610                     bridge_detach_cookie);
611                 if_clone_detach(&bridge_cloner);
612                 bridge_input_p = NULL;
613                 bridge_output_p = NULL;
614                 bridge_interface_p = NULL;
615 #if notyet
616                 bstp_linkstate_p = NULL;
617 #endif
618                 break;
619         default:
620                 return (EOPNOTSUPP);
621         }
622         return (0);
623 }
624
625 static moduledata_t bridge_mod = {
626         "if_bridge",
627         bridge_modevent,
628         0
629 };
630
631 DECLARE_MODULE(if_bridge, bridge_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
632
633
634 /*
635  * bridge_clone_create:
636  *
637  *      Create a new bridge instance.
638  */
639 static int
640 bridge_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused)
641 {
642         struct bridge_softc *sc;
643         struct ifnet *ifp;
644         u_char eaddr[6];
645         int cpu, rnd;
646
647         sc = kmalloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
648         ifp = sc->sc_ifp = &sc->sc_if;
649
650         sc->sc_brtmax = BRIDGE_RTABLE_MAX;
651         sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
652         sc->sc_bridge_max_age = BSTP_DEFAULT_MAX_AGE;
653         sc->sc_bridge_hello_time = BSTP_DEFAULT_HELLO_TIME;
654         sc->sc_bridge_forward_delay = BSTP_DEFAULT_FORWARD_DELAY;
655         sc->sc_bridge_priority = BSTP_DEFAULT_BRIDGE_PRIORITY;
656         sc->sc_hold_time = BSTP_DEFAULT_HOLD_TIME;
657
658         /* Initialize our routing table. */
659         bridge_rtable_init(sc);
660
661         callout_init(&sc->sc_brcallout);
662         netmsg_init(&sc->sc_brtimemsg, NULL, &netisr_adone_rport,
663                     MSGF_DROPABLE, bridge_timer_handler);
664         sc->sc_brtimemsg.lmsg.u.ms_resultp = sc;
665
666         callout_init(&sc->sc_bstpcallout);
667         netmsg_init(&sc->sc_bstptimemsg, NULL, &netisr_adone_rport,
668                     MSGF_DROPABLE, bstp_tick_handler);
669         sc->sc_bstptimemsg.lmsg.u.ms_resultp = sc;
670
671         /* Initialize per-cpu member iface lists */
672         sc->sc_iflists = kmalloc(sizeof(*sc->sc_iflists) * ncpus,
673                                  M_DEVBUF, M_WAITOK);
674         for (cpu = 0; cpu < ncpus; ++cpu)
675                 TAILQ_INIT(&sc->sc_iflists[cpu]);
676
677         TAILQ_INIT(&sc->sc_spanlist);
678
679         ifp->if_softc = sc;
680         if_initname(ifp, ifc->ifc_name, unit);
681         ifp->if_mtu = ETHERMTU;
682         ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST;
683         ifp->if_ioctl = bridge_ioctl;
684         ifp->if_start = bridge_start;
685         ifp->if_init = bridge_init;
686         ifp->if_type = IFT_ETHER;
687         ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
688         ifq_set_ready(&ifp->if_snd);
689         ifp->if_hdrlen = ETHER_HDR_LEN;
690
691         /*
692          * Generate a random ethernet address and use the private AC:DE:48
693          * OUI code.
694          */
695         rnd = karc4random();
696         bcopy(&rnd, &eaddr[0], 4); /* ETHER_ADDR_LEN == 6 */
697         rnd = karc4random();
698         bcopy(&rnd, &eaddr[2], 4); /* ETHER_ADDR_LEN == 6 */
699
700         eaddr[0] &= ~1; /* clear multicast bit */
701         eaddr[0] |= 2;  /* set the LAA bit */
702
703         ether_ifattach(ifp, eaddr, NULL);
704         /* Now undo some of the damage... */
705         ifp->if_baudrate = 0;
706         /*ifp->if_type = IFT_BRIDGE;*/
707
708         crit_enter();   /* XXX MP */
709         LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
710         crit_exit();
711
712         return (0);
713 }
714
715 static void
716 bridge_delete_dispatch(netmsg_t msg)
717 {
718         struct bridge_softc *sc = msg->lmsg.u.ms_resultp;
719         struct ifnet *bifp = sc->sc_ifp;
720         struct bridge_iflist *bif;
721
722         ifnet_serialize_all(bifp);
723
724         while ((bif = TAILQ_FIRST(&sc->sc_iflists[mycpuid])) != NULL)
725                 bridge_delete_member(sc, bif, 0);
726
727         while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL)
728                 bridge_delete_span(sc, bif);
729
730         ifnet_deserialize_all(bifp);
731
732         lwkt_replymsg(&msg->lmsg, 0);
733 }
734
735 /*
736  * bridge_clone_destroy:
737  *
738  *      Destroy a bridge instance.
739  */
740 static int
741 bridge_clone_destroy(struct ifnet *ifp)
742 {
743         struct bridge_softc *sc = ifp->if_softc;
744         struct netmsg_base msg;
745
746         ifnet_serialize_all(ifp);
747
748         bridge_stop(ifp);
749         ifp->if_flags &= ~IFF_UP;
750
751         ifnet_deserialize_all(ifp);
752
753         netmsg_init(&msg, NULL, &curthread->td_msgport,
754                     0, bridge_delete_dispatch);
755         msg.lmsg.u.ms_resultp = sc;
756         lwkt_domsg(BRIDGE_CFGPORT, &msg.lmsg, 0);
757
758         crit_enter();   /* XXX MP */
759         LIST_REMOVE(sc, sc_list);
760         crit_exit();
761
762         ether_ifdetach(ifp);
763
764         /* Tear down the routing table. */
765         bridge_rtable_fini(sc);
766
767         /* Free per-cpu member iface lists */
768         kfree(sc->sc_iflists, M_DEVBUF);
769
770         kfree(sc, M_DEVBUF);
771
772         return 0;
773 }
774
775 /*
776  * bridge_ioctl:
777  *
778  *      Handle a control request from the operator.
779  */
780 static int
781 bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
782 {
783         struct bridge_softc *sc = ifp->if_softc;
784         struct bridge_control_arg args;
785         struct ifdrv *ifd = (struct ifdrv *) data;
786         const struct bridge_control *bc;
787         int error = 0;
788
789         ASSERT_IFNET_SERIALIZED_ALL(ifp);
790
791         switch (cmd) {
792         case SIOCADDMULTI:
793         case SIOCDELMULTI:
794                 break;
795
796         case SIOCGDRVSPEC:
797         case SIOCSDRVSPEC:
798                 if (ifd->ifd_cmd >= bridge_control_table_size) {
799                         error = EINVAL;
800                         break;
801                 }
802                 bc = &bridge_control_table[ifd->ifd_cmd];
803
804                 if (cmd == SIOCGDRVSPEC &&
805                     (bc->bc_flags & BC_F_COPYOUT) == 0) {
806                         error = EINVAL;
807                         break;
808                 } else if (cmd == SIOCSDRVSPEC &&
809                            (bc->bc_flags & BC_F_COPYOUT)) {
810                         error = EINVAL;
811                         break;
812                 }
813
814                 if (bc->bc_flags & BC_F_SUSER) {
815                         error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY);
816                         if (error)
817                                 break;
818                 }
819
820                 if (ifd->ifd_len != bc->bc_argsize ||
821                     ifd->ifd_len > sizeof(args.bca_u)) {
822                         error = EINVAL;
823                         break;
824                 }
825
826                 memset(&args, 0, sizeof(args));
827                 if (bc->bc_flags & BC_F_COPYIN) {
828                         error = copyin(ifd->ifd_data, &args.bca_u,
829                                        ifd->ifd_len);
830                         if (error)
831                                 break;
832                 }
833
834                 error = bridge_control(sc, cmd, bc->bc_func, &args);
835                 if (error) {
836                         KKASSERT(args.bca_len == 0 && args.bca_kptr == NULL);
837                         break;
838                 }
839
840                 if (bc->bc_flags & BC_F_COPYOUT) {
841                         error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
842                         if (args.bca_len != 0) {
843                                 KKASSERT(args.bca_kptr != NULL);
844                                 if (!error) {
845                                         error = copyout(args.bca_kptr,
846                                                 args.bca_uptr, args.bca_len);
847                                 }
848                                 kfree(args.bca_kptr, M_TEMP);
849                         } else {
850                                 KKASSERT(args.bca_kptr == NULL);
851                         }
852                 } else {
853                         KKASSERT(args.bca_len == 0 && args.bca_kptr == NULL);
854                 }
855                 break;
856
857         case SIOCSIFFLAGS:
858                 if (!(ifp->if_flags & IFF_UP) &&
859                     (ifp->if_flags & IFF_RUNNING)) {
860                         /*
861                          * If interface is marked down and it is running,
862                          * then stop it.
863                          */
864                         bridge_stop(ifp);
865                 } else if ((ifp->if_flags & IFF_UP) &&
866                     !(ifp->if_flags & IFF_RUNNING)) {
867                         /*
868                          * If interface is marked up and it is stopped, then
869                          * start it.
870                          */
871                         ifp->if_init(sc);
872                 }
873                 break;
874
875         case SIOCSIFMTU:
876                 /* Do not allow the MTU to be changed on the bridge */
877                 error = EINVAL;
878                 break;
879
880         default:
881                 error = ether_ioctl(ifp, cmd, data);
882                 break;
883         }
884         return (error);
885 }
886
887 /*
888  * bridge_mutecaps:
889  *
890  *      Clear or restore unwanted capabilities on the member interface
891  */
892 static void
893 bridge_mutecaps(struct bridge_ifinfo *bif_info, struct ifnet *ifp, int mute)
894 {
895         struct ifreq ifr;
896         int error;
897
898         if (ifp->if_ioctl == NULL)
899                 return;
900
901         bzero(&ifr, sizeof(ifr));
902         ifr.ifr_reqcap = ifp->if_capenable;
903
904         if (mute) {
905                 /* mask off and save capabilities */
906                 bif_info->bifi_mutecap = ifr.ifr_reqcap & BRIDGE_IFCAPS_MASK;
907                 if (bif_info->bifi_mutecap != 0)
908                         ifr.ifr_reqcap &= ~BRIDGE_IFCAPS_MASK;
909         } else {
910                 /* restore muted capabilities */
911                 ifr.ifr_reqcap |= bif_info->bifi_mutecap;
912         }
913
914         if (bif_info->bifi_mutecap != 0) {
915                 ifnet_serialize_all(ifp);
916                 error = ifp->if_ioctl(ifp, SIOCSIFCAP, (caddr_t)&ifr, NULL);
917                 ifnet_deserialize_all(ifp);
918         }
919 }
920
921 /*
922  * bridge_lookup_member:
923  *
924  *      Lookup a bridge member interface.
925  */
926 static struct bridge_iflist *
927 bridge_lookup_member(struct bridge_softc *sc, const char *name)
928 {
929         struct bridge_iflist *bif;
930
931         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
932                 if (strcmp(bif->bif_ifp->if_xname, name) == 0)
933                         return (bif);
934         }
935         return (NULL);
936 }
937
938 /*
939  * bridge_lookup_member_if:
940  *
941  *      Lookup a bridge member interface by ifnet*.
942  */
943 static struct bridge_iflist *
944 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
945 {
946         struct bridge_iflist *bif;
947
948         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
949                 if (bif->bif_ifp == member_ifp)
950                         return (bif);
951         }
952         return (NULL);
953 }
954
955 /*
956  * bridge_lookup_member_ifinfo:
957  *
958  *      Lookup a bridge member interface by bridge_ifinfo.
959  */
960 static struct bridge_iflist *
961 bridge_lookup_member_ifinfo(struct bridge_softc *sc,
962                             struct bridge_ifinfo *bif_info)
963 {
964         struct bridge_iflist *bif;
965
966         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
967                 if (bif->bif_info == bif_info)
968                         return (bif);
969         }
970         return (NULL);
971 }
972
973 /*
974  * bridge_delete_member:
975  *
976  *      Delete the specified member interface.
977  */
978 static void
979 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
980     int gone)
981 {
982         struct ifnet *ifs = bif->bif_ifp;
983         struct ifnet *bifp = sc->sc_ifp;
984         struct bridge_ifinfo *bif_info = bif->bif_info;
985         struct bridge_iflist_head saved_bifs;
986
987         ASSERT_IFNET_SERIALIZED_ALL(bifp);
988         KKASSERT(bif_info != NULL);
989
990         ifs->if_bridge = NULL;
991
992         /*
993          * Release bridge interface's serializer:
994          * - To avoid possible dead lock.
995          * - Various sync operation will block the current thread.
996          */
997         ifnet_deserialize_all(bifp);
998
999         if (!gone) {
1000                 switch (ifs->if_type) {
1001                 case IFT_ETHER:
1002                 case IFT_L2VLAN:
1003                         /*
1004                          * Take the interface out of promiscuous mode.
1005                          */
1006                         ifpromisc(ifs, 0);
1007                         bridge_mutecaps(bif_info, ifs, 0);
1008                         break;
1009
1010                 case IFT_GIF:
1011                         break;
1012
1013                 default:
1014                         panic("bridge_delete_member: impossible");
1015                         break;
1016                 }
1017         }
1018
1019         /*
1020          * Remove bifs from percpu linked list.
1021          *
1022          * Removed bifs are not freed immediately, instead,
1023          * they are saved in saved_bifs.  They will be freed
1024          * after we make sure that no one is accessing them,
1025          * i.e. after following netmsg_service_sync()
1026          */
1027         TAILQ_INIT(&saved_bifs);
1028         bridge_del_bif(sc, bif_info, &saved_bifs);
1029
1030         /*
1031          * Make sure that all protocol threads:
1032          * o  see 'ifs' if_bridge is changed
1033          * o  know that bif is removed from the percpu linked list
1034          */
1035         netmsg_service_sync();
1036
1037         /*
1038          * Free the removed bifs
1039          */
1040         KKASSERT(!TAILQ_EMPTY(&saved_bifs));
1041         while ((bif = TAILQ_FIRST(&saved_bifs)) != NULL) {
1042                 TAILQ_REMOVE(&saved_bifs, bif, bif_next);
1043                 kfree(bif, M_DEVBUF);
1044         }
1045
1046         /* See the comment in bridge_ioctl_stop() */
1047         bridge_rtmsg_sync(sc);
1048         bridge_rtdelete(sc, ifs, IFBF_FLUSHALL | IFBF_FLUSHSYNC);
1049
1050         ifnet_serialize_all(bifp);
1051
1052         if (bifp->if_flags & IFF_RUNNING)
1053                 bstp_initialization(sc);
1054
1055         /*
1056          * Free the bif_info after bstp_initialization(), so that
1057          * bridge_softc.sc_root_port will not reference a dangling
1058          * pointer.
1059          */
1060         kfree(bif_info, M_DEVBUF);
1061 }
1062
1063 /*
1064  * bridge_delete_span:
1065  *
1066  *      Delete the specified span interface.
1067  */
1068 static void
1069 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
1070 {
1071         KASSERT(bif->bif_ifp->if_bridge == NULL,
1072             ("%s: not a span interface", __func__));
1073
1074         TAILQ_REMOVE(&sc->sc_iflists[mycpuid], bif, bif_next);
1075         kfree(bif, M_DEVBUF);
1076 }
1077
1078 static int
1079 bridge_ioctl_init(struct bridge_softc *sc, void *arg __unused)
1080 {
1081         struct ifnet *ifp = sc->sc_ifp;
1082
1083         if (ifp->if_flags & IFF_RUNNING)
1084                 return 0;
1085
1086         callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz,
1087             bridge_timer, sc);
1088
1089         ifp->if_flags |= IFF_RUNNING;
1090         bstp_initialization(sc);
1091         return 0;
1092 }
1093
1094 static int
1095 bridge_ioctl_stop(struct bridge_softc *sc, void *arg __unused)
1096 {
1097         struct ifnet *ifp = sc->sc_ifp;
1098         struct lwkt_msg *lmsg;
1099
1100         if ((ifp->if_flags & IFF_RUNNING) == 0)
1101                 return 0;
1102
1103         callout_stop(&sc->sc_brcallout);
1104
1105         crit_enter();
1106         lmsg = &sc->sc_brtimemsg.lmsg;
1107         if ((lmsg->ms_flags & MSGF_DONE) == 0) {
1108                 /* Pending to be processed; drop it */
1109                 lwkt_dropmsg(lmsg);
1110         }
1111         crit_exit();
1112
1113         bstp_stop(sc);
1114
1115         ifp->if_flags &= ~IFF_RUNNING;
1116
1117         ifnet_deserialize_all(ifp);
1118
1119         /* Let everyone know that we are stopped */
1120         netmsg_service_sync();
1121
1122         /*
1123          * Sync ifnetX msgports in the order we forward rtnode
1124          * installation message.  This is used to make sure that
1125          * all rtnode installation messages sent by bridge_rtupdate()
1126          * during above netmsg_service_sync() are flushed.
1127          */
1128         bridge_rtmsg_sync(sc);
1129         bridge_rtflush(sc, IFBF_FLUSHDYN | IFBF_FLUSHSYNC);
1130
1131         ifnet_serialize_all(ifp);
1132         return 0;
1133 }
1134
1135 static int
1136 bridge_ioctl_add(struct bridge_softc *sc, void *arg)
1137 {
1138         struct ifbreq *req = arg;
1139         struct bridge_iflist *bif;
1140         struct bridge_ifinfo *bif_info;
1141         struct ifnet *ifs, *bifp;
1142         int error = 0;
1143
1144         bifp = sc->sc_ifp;
1145         ASSERT_IFNET_SERIALIZED_ALL(bifp);
1146
1147         ifs = ifunit(req->ifbr_ifsname);
1148         if (ifs == NULL)
1149                 return (ENOENT);
1150
1151         /* If it's in the span list, it can't be a member. */
1152         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1153                 if (ifs == bif->bif_ifp)
1154                         return (EBUSY);
1155
1156         /* Allow the first Ethernet member to define the MTU */
1157         if (ifs->if_type != IFT_GIF) {
1158                 if (TAILQ_EMPTY(&sc->sc_iflists[mycpuid])) {
1159                         bifp->if_mtu = ifs->if_mtu;
1160                 } else if (bifp->if_mtu != ifs->if_mtu) {
1161                         if_printf(bifp, "invalid MTU for %s\n", ifs->if_xname);
1162                         return (EINVAL);
1163                 }
1164         }
1165
1166         if (ifs->if_bridge == sc)
1167                 return (EEXIST);
1168
1169         if (ifs->if_bridge != NULL)
1170                 return (EBUSY);
1171
1172         bif_info = kmalloc(sizeof(*bif_info), M_DEVBUF, M_WAITOK | M_ZERO);
1173         bif_info->bifi_priority = BSTP_DEFAULT_PORT_PRIORITY;
1174         bif_info->bifi_path_cost = BSTP_DEFAULT_PATH_COST;
1175         bif_info->bifi_ifp = ifs;
1176
1177         /*
1178          * Release bridge interface's serializer:
1179          * - To avoid possible dead lock.
1180          * - Various sync operation will block the current thread.
1181          */
1182         ifnet_deserialize_all(bifp);
1183
1184         switch (ifs->if_type) {
1185         case IFT_ETHER:
1186         case IFT_L2VLAN:
1187                 /*
1188                  * Place the interface into promiscuous mode.
1189                  */
1190                 error = ifpromisc(ifs, 1);
1191                 if (error) {
1192                         ifnet_serialize_all(bifp);
1193                         goto out;
1194                 }
1195                 bridge_mutecaps(bif_info, ifs, 1);
1196                 break;
1197
1198         case IFT_GIF: /* :^) */
1199                 break;
1200
1201         default:
1202                 error = EINVAL;
1203                 ifnet_serialize_all(bifp);
1204                 goto out;
1205         }
1206
1207         /*
1208          * Add bifs to percpu linked lists
1209          */
1210         bridge_add_bif(sc, bif_info, ifs);
1211
1212         ifnet_serialize_all(bifp);
1213
1214         if (bifp->if_flags & IFF_RUNNING)
1215                 bstp_initialization(sc);
1216         else
1217                 bstp_stop(sc);
1218
1219         /*
1220          * Everything has been setup, so let the member interface
1221          * deliver packets to this bridge on its input/output path.
1222          */
1223         ifs->if_bridge = sc;
1224 out:
1225         if (error) {
1226                 if (bif_info != NULL)
1227                         kfree(bif_info, M_DEVBUF);
1228         }
1229         return (error);
1230 }
1231
1232 static int
1233 bridge_ioctl_del(struct bridge_softc *sc, void *arg)
1234 {
1235         struct ifbreq *req = arg;
1236         struct bridge_iflist *bif;
1237
1238         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1239         if (bif == NULL)
1240                 return (ENOENT);
1241
1242         bridge_delete_member(sc, bif, 0);
1243
1244         return (0);
1245 }
1246
1247 static int
1248 bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
1249 {
1250         struct ifbreq *req = arg;
1251         struct bridge_iflist *bif;
1252
1253         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1254         if (bif == NULL)
1255                 return (ENOENT);
1256         bridge_ioctl_fillflags(sc, bif, req);
1257         return (0);
1258 }
1259
1260 static void
1261 bridge_ioctl_fillflags(struct bridge_softc *sc, struct bridge_iflist *bif,
1262                        struct ifbreq *req)
1263 {
1264         req->ifbr_ifsflags = bif->bif_flags;
1265         req->ifbr_state = bif->bif_state;
1266         req->ifbr_priority = bif->bif_priority;
1267         req->ifbr_path_cost = bif->bif_path_cost;
1268         req->ifbr_portno = bif->bif_ifp->if_index & 0xff;
1269         if (bif->bif_flags & IFBIF_STP) {
1270                 req->ifbr_peer_root = bif->bif_peer_root;
1271                 req->ifbr_peer_bridge = bif->bif_peer_bridge;
1272                 req->ifbr_peer_cost = bif->bif_peer_cost;
1273                 req->ifbr_peer_port = bif->bif_peer_port;
1274                 if (bstp_supersedes_port_info(sc, bif)) {
1275                         req->ifbr_designated_root = bif->bif_peer_root;
1276                         req->ifbr_designated_bridge = bif->bif_peer_bridge;
1277                         req->ifbr_designated_cost = bif->bif_peer_cost;
1278                         req->ifbr_designated_port = bif->bif_peer_port;
1279                 } else {
1280                         req->ifbr_designated_root = sc->sc_bridge_id;
1281                         req->ifbr_designated_bridge = sc->sc_bridge_id;
1282                         req->ifbr_designated_cost = bif->bif_path_cost +
1283                                                     bif->bif_peer_cost;
1284                         req->ifbr_designated_port = bif->bif_port_id;
1285                 }
1286         } else {
1287                 req->ifbr_peer_root = 0;
1288                 req->ifbr_peer_bridge = 0;
1289                 req->ifbr_peer_cost = 0;
1290                 req->ifbr_peer_port = 0;
1291                 req->ifbr_designated_root = 0;
1292                 req->ifbr_designated_bridge = 0;
1293                 req->ifbr_designated_cost = 0;
1294                 req->ifbr_designated_port = 0;
1295         }
1296 }
1297
1298 static int
1299 bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
1300 {
1301         struct ifbreq *req = arg;
1302         struct bridge_iflist *bif;
1303         struct ifnet *bifp = sc->sc_ifp;
1304
1305         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1306         if (bif == NULL)
1307                 return (ENOENT);
1308
1309         if (req->ifbr_ifsflags & IFBIF_SPAN) {
1310                 /* SPAN is readonly */
1311                 return (EINVAL);
1312         }
1313
1314         if (req->ifbr_ifsflags & IFBIF_STP) {
1315                 switch (bif->bif_ifp->if_type) {
1316                 case IFT_ETHER:
1317                         /* These can do spanning tree. */
1318                         break;
1319
1320                 default:
1321                         /* Nothing else can. */
1322                         return (EINVAL);
1323                 }
1324         }
1325
1326         bif->bif_flags = (bif->bif_flags & IFBIF_KEEPMASK) |
1327                          (req->ifbr_ifsflags & ~IFBIF_KEEPMASK);
1328         if (bifp->if_flags & IFF_RUNNING)
1329                 bstp_initialization(sc);
1330
1331         return (0);
1332 }
1333
1334 static int
1335 bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
1336 {
1337         struct ifbrparam *param = arg;
1338         struct ifnet *ifp = sc->sc_ifp;
1339
1340         sc->sc_brtmax = param->ifbrp_csize;
1341
1342         ifnet_deserialize_all(ifp);
1343         bridge_rttrim(sc);
1344         ifnet_serialize_all(ifp);
1345
1346         return (0);
1347 }
1348
1349 static int
1350 bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
1351 {
1352         struct ifbrparam *param = arg;
1353
1354         param->ifbrp_csize = sc->sc_brtmax;
1355
1356         return (0);
1357 }
1358
1359 static int
1360 bridge_ioctl_gifs(struct bridge_softc *sc, void *arg)
1361 {
1362         struct bridge_control_arg *bc_arg = arg;
1363         struct ifbifconf *bifc = arg;
1364         struct bridge_iflist *bif;
1365         struct ifbreq *breq;
1366         int count, len;
1367
1368         count = 0;
1369         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next)
1370                 count++;
1371         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1372                 count++;
1373
1374         if (bifc->ifbic_len == 0) {
1375                 bifc->ifbic_len = sizeof(*breq) * count;
1376                 return 0;
1377         } else if (count == 0 || bifc->ifbic_len < sizeof(*breq)) {
1378                 bifc->ifbic_len = 0;
1379                 return 0;
1380         }
1381
1382         len = min(bifc->ifbic_len, sizeof(*breq) * count);
1383         KKASSERT(len >= sizeof(*breq));
1384
1385         breq = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO);
1386         if (breq == NULL) {
1387                 bifc->ifbic_len = 0;
1388                 return ENOMEM;
1389         }
1390         bc_arg->bca_kptr = breq;
1391
1392         count = 0;
1393         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
1394                 if (len < sizeof(*breq))
1395                         break;
1396
1397                 strlcpy(breq->ifbr_ifsname, bif->bif_ifp->if_xname,
1398                         sizeof(breq->ifbr_ifsname));
1399                 bridge_ioctl_fillflags(sc, bif, breq);
1400                 breq++;
1401                 count++;
1402                 len -= sizeof(*breq);
1403         }
1404         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
1405                 if (len < sizeof(*breq))
1406                         break;
1407
1408                 strlcpy(breq->ifbr_ifsname, bif->bif_ifp->if_xname,
1409                         sizeof(breq->ifbr_ifsname));
1410                 breq->ifbr_ifsflags = bif->bif_flags;
1411                 breq->ifbr_portno = bif->bif_ifp->if_index & 0xff;
1412                 breq++;
1413                 count++;
1414                 len -= sizeof(*breq);
1415         }
1416
1417         bifc->ifbic_len = sizeof(*breq) * count;
1418         KKASSERT(bifc->ifbic_len > 0);
1419
1420         bc_arg->bca_len = bifc->ifbic_len;
1421         bc_arg->bca_uptr = bifc->ifbic_req;
1422         return 0;
1423 }
1424
1425 static int
1426 bridge_ioctl_rts(struct bridge_softc *sc, void *arg)
1427 {
1428         struct bridge_control_arg *bc_arg = arg;
1429         struct ifbaconf *bac = arg;
1430         struct bridge_rtnode *brt;
1431         struct ifbareq *bareq;
1432         int count, len;
1433
1434         count = 0;
1435         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list)
1436                 count++;
1437
1438         if (bac->ifbac_len == 0) {
1439                 bac->ifbac_len = sizeof(*bareq) * count;
1440                 return 0;
1441         } else if (count == 0 || bac->ifbac_len < sizeof(*bareq)) {
1442                 bac->ifbac_len = 0;
1443                 return 0;
1444         }
1445
1446         len = min(bac->ifbac_len, sizeof(*bareq) * count);
1447         KKASSERT(len >= sizeof(*bareq));
1448
1449         bareq = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO);
1450         if (bareq == NULL) {
1451                 bac->ifbac_len = 0;
1452                 return ENOMEM;
1453         }
1454         bc_arg->bca_kptr = bareq;
1455
1456         count = 0;
1457         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
1458                 struct bridge_rtinfo *bri = brt->brt_info;
1459                 unsigned long expire;
1460
1461                 if (len < sizeof(*bareq))
1462                         break;
1463
1464                 strlcpy(bareq->ifba_ifsname, bri->bri_ifp->if_xname,
1465                         sizeof(bareq->ifba_ifsname));
1466                 memcpy(bareq->ifba_dst, brt->brt_addr, sizeof(brt->brt_addr));
1467                 expire = bri->bri_expire;
1468                 if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
1469                     time_second < expire)
1470                         bareq->ifba_expire = expire - time_second;
1471                 else
1472                         bareq->ifba_expire = 0;
1473                 bareq->ifba_flags = bri->bri_flags;
1474                 bareq++;
1475                 count++;
1476                 len -= sizeof(*bareq);
1477         }
1478
1479         bac->ifbac_len = sizeof(*bareq) * count;
1480         KKASSERT(bac->ifbac_len > 0);
1481
1482         bc_arg->bca_len = bac->ifbac_len;
1483         bc_arg->bca_uptr = bac->ifbac_req;
1484         return 0;
1485 }
1486
1487 static int
1488 bridge_ioctl_saddr(struct bridge_softc *sc, void *arg)
1489 {
1490         struct ifbareq *req = arg;
1491         struct bridge_iflist *bif;
1492         struct ifnet *ifp = sc->sc_ifp;
1493         int error;
1494
1495         ASSERT_IFNET_SERIALIZED_ALL(ifp);
1496
1497         bif = bridge_lookup_member(sc, req->ifba_ifsname);
1498         if (bif == NULL)
1499                 return (ENOENT);
1500
1501         ifnet_deserialize_all(ifp);
1502         error = bridge_rtsaddr(sc, req->ifba_dst, bif->bif_ifp,
1503                                req->ifba_flags);
1504         ifnet_serialize_all(ifp);
1505         return (error);
1506 }
1507
1508 static int
1509 bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
1510 {
1511         struct ifbrparam *param = arg;
1512
1513         sc->sc_brttimeout = param->ifbrp_ctime;
1514
1515         return (0);
1516 }
1517
1518 static int
1519 bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
1520 {
1521         struct ifbrparam *param = arg;
1522
1523         param->ifbrp_ctime = sc->sc_brttimeout;
1524
1525         return (0);
1526 }
1527
1528 static int
1529 bridge_ioctl_daddr(struct bridge_softc *sc, void *arg)
1530 {
1531         struct ifbareq *req = arg;
1532         struct ifnet *ifp = sc->sc_ifp;
1533         int error;
1534
1535         ifnet_deserialize_all(ifp);
1536         error = bridge_rtdaddr(sc, req->ifba_dst);
1537         ifnet_serialize_all(ifp);
1538         return error;
1539 }
1540
1541 static int
1542 bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
1543 {
1544         struct ifbreq *req = arg;
1545         struct ifnet *ifp = sc->sc_ifp;
1546
1547         ifnet_deserialize_all(ifp);
1548         bridge_rtflush(sc, req->ifbr_ifsflags | IFBF_FLUSHSYNC);
1549         ifnet_serialize_all(ifp);
1550
1551         return (0);
1552 }
1553
1554 static int
1555 bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
1556 {
1557         struct ifbrparam *param = arg;
1558
1559         param->ifbrp_prio = sc->sc_bridge_priority;
1560
1561         return (0);
1562 }
1563
1564 static int
1565 bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
1566 {
1567         struct ifbrparam *param = arg;
1568
1569         sc->sc_bridge_priority = param->ifbrp_prio;
1570
1571         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1572                 bstp_initialization(sc);
1573
1574         return (0);
1575 }
1576
1577 static int
1578 bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
1579 {
1580         struct ifbrparam *param = arg;
1581
1582         param->ifbrp_hellotime = sc->sc_bridge_hello_time >> 8;
1583
1584         return (0);
1585 }
1586
1587 static int
1588 bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
1589 {
1590         struct ifbrparam *param = arg;
1591
1592         if (param->ifbrp_hellotime == 0)
1593                 return (EINVAL);
1594         sc->sc_bridge_hello_time = param->ifbrp_hellotime << 8;
1595
1596         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1597                 bstp_initialization(sc);
1598
1599         return (0);
1600 }
1601
1602 static int
1603 bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
1604 {
1605         struct ifbrparam *param = arg;
1606
1607         param->ifbrp_fwddelay = sc->sc_bridge_forward_delay >> 8;
1608
1609         return (0);
1610 }
1611
1612 static int
1613 bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
1614 {
1615         struct ifbrparam *param = arg;
1616
1617         if (param->ifbrp_fwddelay == 0)
1618                 return (EINVAL);
1619         sc->sc_bridge_forward_delay = param->ifbrp_fwddelay << 8;
1620
1621         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1622                 bstp_initialization(sc);
1623
1624         return (0);
1625 }
1626
1627 static int
1628 bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
1629 {
1630         struct ifbrparam *param = arg;
1631
1632         param->ifbrp_maxage = sc->sc_bridge_max_age >> 8;
1633
1634         return (0);
1635 }
1636
1637 static int
1638 bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
1639 {
1640         struct ifbrparam *param = arg;
1641
1642         if (param->ifbrp_maxage == 0)
1643                 return (EINVAL);
1644         sc->sc_bridge_max_age = param->ifbrp_maxage << 8;
1645
1646         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1647                 bstp_initialization(sc);
1648
1649         return (0);
1650 }
1651
1652 static int
1653 bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
1654 {
1655         struct ifbreq *req = arg;
1656         struct bridge_iflist *bif;
1657
1658         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1659         if (bif == NULL)
1660                 return (ENOENT);
1661
1662         bif->bif_priority = req->ifbr_priority;
1663
1664         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1665                 bstp_initialization(sc);
1666
1667         return (0);
1668 }
1669
1670 static int
1671 bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
1672 {
1673         struct ifbreq *req = arg;
1674         struct bridge_iflist *bif;
1675
1676         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1677         if (bif == NULL)
1678                 return (ENOENT);
1679
1680         bif->bif_path_cost = req->ifbr_path_cost;
1681
1682         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1683                 bstp_initialization(sc);
1684
1685         return (0);
1686 }
1687
1688 static int
1689 bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
1690 {
1691         struct ifbreq *req = arg;
1692         struct bridge_iflist *bif;
1693         struct ifnet *ifs;
1694         struct bridge_ifinfo *bif_info;
1695
1696         ifs = ifunit(req->ifbr_ifsname);
1697         if (ifs == NULL)
1698                 return (ENOENT);
1699
1700         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1701                 if (ifs == bif->bif_ifp)
1702                         return (EBUSY);
1703
1704         if (ifs->if_bridge != NULL)
1705                 return (EBUSY);
1706
1707         switch (ifs->if_type) {
1708         case IFT_ETHER:
1709         case IFT_GIF:
1710         case IFT_L2VLAN:
1711                 break;
1712
1713         default:
1714                 return (EINVAL);
1715         }
1716
1717         /*
1718          * bif_info is needed for bif_flags
1719          */
1720         bif_info = kmalloc(sizeof(*bif_info), M_DEVBUF, M_WAITOK | M_ZERO);
1721         bif_info->bifi_ifp = ifs;
1722
1723         bif = kmalloc(sizeof(*bif), M_DEVBUF, M_WAITOK | M_ZERO);
1724         bif->bif_ifp = ifs;
1725         bif->bif_info = bif_info;
1726         bif->bif_flags = IFBIF_SPAN;
1727         /* NOTE: span bif does not need bridge_ifinfo */
1728
1729         TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
1730
1731         sc->sc_span = 1;
1732
1733         return (0);
1734 }
1735
1736 static int
1737 bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
1738 {
1739         struct ifbreq *req = arg;
1740         struct bridge_iflist *bif;
1741         struct ifnet *ifs;
1742
1743         ifs = ifunit(req->ifbr_ifsname);
1744         if (ifs == NULL)
1745                 return (ENOENT);
1746
1747         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1748                 if (ifs == bif->bif_ifp)
1749                         break;
1750
1751         if (bif == NULL)
1752                 return (ENOENT);
1753
1754         bridge_delete_span(sc, bif);
1755
1756         if (TAILQ_EMPTY(&sc->sc_spanlist))
1757                 sc->sc_span = 0;
1758
1759         return (0);
1760 }
1761
1762 static void
1763 bridge_ifdetach_dispatch(netmsg_t msg)
1764 {
1765         struct ifnet *ifp, *bifp;
1766         struct bridge_softc *sc;
1767         struct bridge_iflist *bif;
1768
1769         ifp = msg->lmsg.u.ms_resultp;
1770         sc = ifp->if_bridge;
1771
1772         /* Check if the interface is a bridge member */
1773         if (sc != NULL) {
1774                 bifp = sc->sc_ifp;
1775
1776                 ifnet_serialize_all(bifp);
1777
1778                 bif = bridge_lookup_member_if(sc, ifp);
1779                 if (bif != NULL) {
1780                         bridge_delete_member(sc, bif, 1);
1781                 } else {
1782                         /* XXX Why bif will be NULL? */
1783                 }
1784
1785                 ifnet_deserialize_all(bifp);
1786                 goto reply;
1787         }
1788
1789         crit_enter();   /* XXX MP */
1790
1791         /* Check if the interface is a span port */
1792         LIST_FOREACH(sc, &bridge_list, sc_list) {
1793                 bifp = sc->sc_ifp;
1794
1795                 ifnet_serialize_all(bifp);
1796
1797                 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1798                         if (ifp == bif->bif_ifp) {
1799                                 bridge_delete_span(sc, bif);
1800                                 break;
1801                         }
1802
1803                 ifnet_deserialize_all(bifp);
1804         }
1805
1806         crit_exit();
1807
1808 reply:
1809         lwkt_replymsg(&msg->lmsg, 0);
1810 }
1811
1812 /*
1813  * bridge_ifdetach:
1814  *
1815  *      Detach an interface from a bridge.  Called when a member
1816  *      interface is detaching.
1817  */
1818 static void
1819 bridge_ifdetach(void *arg __unused, struct ifnet *ifp)
1820 {
1821         struct netmsg_base msg;
1822
1823         netmsg_init(&msg, NULL, &curthread->td_msgport,
1824                     0, bridge_ifdetach_dispatch);
1825         msg.lmsg.u.ms_resultp = ifp;
1826
1827         lwkt_domsg(BRIDGE_CFGPORT, &msg.lmsg, 0);
1828 }
1829
1830 /*
1831  * bridge_init:
1832  *
1833  *      Initialize a bridge interface.
1834  */
1835 static void
1836 bridge_init(void *xsc)
1837 {
1838         bridge_control(xsc, SIOCSIFFLAGS, bridge_ioctl_init, NULL);
1839 }
1840
1841 /*
1842  * bridge_stop:
1843  *
1844  *      Stop the bridge interface.
1845  */
1846 static void
1847 bridge_stop(struct ifnet *ifp)
1848 {
1849         bridge_control(ifp->if_softc, SIOCSIFFLAGS, bridge_ioctl_stop, NULL);
1850 }
1851
1852 /*
1853  * Returns TRUE if the packet is being sent 'from us'... from our bridge
1854  * interface or from any member of our bridge interface.  This is used
1855  * later on to force the MAC to be the MAC of our bridge interface.
1856  */
1857 static int
1858 bridge_from_us(struct bridge_softc *sc, struct ether_header *eh)
1859 {
1860         struct bridge_iflist *bif;
1861
1862         if (memcmp(eh->ether_shost, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN) == 0)
1863                 return (1);
1864
1865         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
1866                 if (memcmp(eh->ether_shost, IF_LLADDR(bif->bif_ifp),
1867                            ETHER_ADDR_LEN) == 0) {
1868                         return (1);
1869                 }
1870         }
1871         return (0);
1872 }
1873
1874 /*
1875  * bridge_enqueue:
1876  *
1877  *      Enqueue a packet on a bridge member interface.
1878  *
1879  */
1880 void
1881 bridge_enqueue(struct ifnet *dst_ifp, struct mbuf *m)
1882 {
1883         struct netmsg_packet *nmp;
1884
1885         nmp = &m->m_hdr.mh_netmsg;
1886         netmsg_init(&nmp->base, NULL, &netisr_apanic_rport,
1887                     0, bridge_enqueue_handler);
1888         nmp->nm_packet = m;
1889         nmp->base.lmsg.u.ms_resultp = dst_ifp;
1890
1891         lwkt_sendmsg(ifnet_portfn(mycpu->gd_cpuid), &nmp->base.lmsg);
1892 }
1893
1894 /*
1895  * bridge_output:
1896  *
1897  *      Send output from a bridge member interface.  This
1898  *      performs the bridging function for locally originated
1899  *      packets.
1900  *
1901  *      The mbuf has the Ethernet header already attached.  We must
1902  *      enqueue or free the mbuf before returning.
1903  */
1904 static int
1905 bridge_output(struct ifnet *ifp, struct mbuf *m)
1906 {
1907         struct bridge_softc *sc = ifp->if_bridge;
1908         struct bridge_iflist *bif, *nbif;
1909         struct ether_header *eh;
1910         struct ifnet *dst_if, *bifp;
1911         int from_us;
1912         int priority;
1913
1914         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
1915
1916         /*
1917          * Make sure that we are still a member of a bridge interface.
1918          */
1919         if (sc == NULL) {
1920                 m_freem(m);
1921                 return (0);
1922         }
1923         bifp = sc->sc_ifp;
1924
1925         /*
1926          * Acquire header
1927          */
1928         if (m->m_len < ETHER_HDR_LEN) {
1929                 m = m_pullup(m, ETHER_HDR_LEN);
1930                 if (m == NULL) {
1931                         bifp->if_oerrors++;
1932                         return (0);
1933                 }
1934         }
1935         eh = mtod(m, struct ether_header *);
1936         from_us = bridge_from_us(sc, eh);
1937
1938         /*
1939          * If bridge is down, but the original output interface is up,
1940          * go ahead and send out that interface.  Otherwise, the packet
1941          * is dropped below.
1942          */
1943         if ((bifp->if_flags & IFF_RUNNING) == 0) {
1944                 dst_if = ifp;
1945                 goto sendunicast;
1946         }
1947
1948         /*
1949          * If the packet is a multicast, or we don't know a better way to
1950          * get there, send to all interfaces.
1951          */
1952         if (ETHER_IS_MULTICAST(eh->ether_dhost))
1953                 dst_if = NULL;
1954         else
1955                 dst_if = bridge_rtlookup(sc, eh->ether_dhost);
1956
1957         if (dst_if == NULL) {
1958                 struct mbuf *mc;
1959                 int used = 0;
1960
1961                 if (sc->sc_span)
1962                         bridge_span(sc, m);
1963
1964                 TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid],
1965                                      bif_next, nbif) {
1966                         dst_if = bif->bif_ifp;
1967                         if ((dst_if->if_flags & IFF_RUNNING) == 0)
1968                                 continue;
1969
1970                         /*
1971                          * If this is not the original output interface,
1972                          * and the interface is participating in spanning
1973                          * tree, make sure the port is in a state that
1974                          * allows forwarding.
1975                          */
1976                         if (dst_if != ifp &&
1977                             (bif->bif_flags & IFBIF_STP) != 0) {
1978                                 switch (bif->bif_state) {
1979                                 case BSTP_IFSTATE_L1BLOCKING:
1980                                 case BSTP_IFSTATE_BLOCKING:
1981                                 case BSTP_IFSTATE_LISTENING:
1982                                 case BSTP_IFSTATE_DISABLED:
1983                                         continue;
1984                                 }
1985                         }
1986
1987                         if (TAILQ_NEXT(bif, bif_next) == NULL) {
1988                                 used = 1;
1989                                 mc = m;
1990                         } else {
1991                                 mc = m_copypacket(m, MB_DONTWAIT);
1992                                 if (mc == NULL) {
1993                                         bifp->if_oerrors++;
1994                                         continue;
1995                                 }
1996                         }
1997
1998                         /*
1999                          * If the packet is 'from' us override ether_shost.
2000                          */
2001                         bridge_handoff(sc, dst_if, mc, from_us);
2002
2003                         if (nbif != NULL && !nbif->bif_onlist) {
2004                                 KKASSERT(bif->bif_onlist);
2005                                 nbif = TAILQ_NEXT(bif, bif_next);
2006                         }
2007                 }
2008                 if (used == 0)
2009                         m_freem(m);
2010                 return (0);
2011         }
2012
2013 sendunicast:
2014         /*
2015          * If STP is enabled on the target we are an equal opportunity
2016          * employer and do not necessarily output to dst_if.  Instead
2017          * scan available links with the same MAC as the current dst_if
2018          * and choose the best one.
2019          *
2020          * We also need to do this because arp entries tag onto a particular
2021          * interface and if it happens to be dead then the packets will
2022          * go into a bit bucket.
2023          *
2024          * If LINK2 is set the matching links are bonded and we-round robin.
2025          * (the MAC address must be the same for the participating links).
2026          * In this case links in a STP BLOCKING state are allowed for unicast
2027          * packets.
2028          */
2029         bif = bridge_lookup_member_if(sc, dst_if);
2030         if (bif->bif_flags & IFBIF_STP) {
2031                 priority = 0;
2032                 TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid],
2033                                      bif_next, nbif) {
2034                         if (memcmp(IF_LLADDR(bif->bif_ifp),
2035                                    IF_LLADDR(dst_if),
2036                                    ETHER_ADDR_LEN) != 0) {
2037                                 continue;
2038                         }
2039
2040                         switch(bif->bif_state) {
2041                         case BSTP_IFSTATE_BLOCKING:
2042                                 if (sc->sc_ifp->if_flags & IFF_LINK2)
2043                                         break;
2044                                 /* fall through */
2045                         case BSTP_IFSTATE_L1BLOCKING:
2046                         case BSTP_IFSTATE_LISTENING:
2047                         case BSTP_IFSTATE_DISABLED:
2048                                 continue;
2049                         default:
2050                                 break;
2051                         }
2052                         if ((bif->bif_ifp->if_flags & IFF_RUNNING) == 0)
2053                                 continue;
2054
2055                         /*
2056                          * XXX we need to use the toepliz hash or
2057                          *     something like that instead of
2058                          *     round-robining.
2059                          */
2060                         if (sc->sc_ifp->if_flags & IFF_LINK2) {
2061                                 dst_if = bif->bif_ifp;
2062                                 TAILQ_REMOVE(&sc->sc_iflists[mycpuid],
2063                                         bif, bif_next);
2064                                 TAILQ_INSERT_TAIL(
2065                                         &sc->sc_iflists[mycpuid],
2066                                         bif, bif_next);
2067                                 break;
2068                         }
2069                         if (bif->bif_priority > priority) {
2070                                 priority = bif->bif_priority;
2071                                 dst_if = bif->bif_ifp;
2072                         }
2073                 }
2074         }
2075
2076         if (sc->sc_span)
2077                 bridge_span(sc, m);
2078         if ((dst_if->if_flags & IFF_RUNNING) == 0)
2079                 m_freem(m);
2080         else
2081                 bridge_handoff(sc, dst_if, m, from_us);
2082         return (0);
2083 }
2084
2085 /*
2086  * Returns the bridge interface associated with an ifc.
2087  * Pass ifp->if_bridge (must not be NULL).  Used by the ARP
2088  * code to supply the bridge for the is-at info, making
2089  * the bridge responsible for matching local addresses.
2090  *
2091  * Without this the ARP code will supply bridge member interfaces
2092  * for the is-at which makes it difficult the bridge to fail-over
2093  * interfaces (amoung other things).
2094  */
2095 static struct ifnet *
2096 bridge_interface(void *if_bridge)
2097 {
2098         struct bridge_softc *sc = if_bridge;
2099         return (sc->sc_ifp);
2100 }
2101
2102 /*
2103  * bridge_start:
2104  *
2105  *      Start output on a bridge.
2106  */
2107 static void
2108 bridge_start(struct ifnet *ifp)
2109 {
2110         struct bridge_softc *sc = ifp->if_softc;
2111
2112         ASSERT_IFNET_SERIALIZED_TX(ifp);
2113
2114         ifp->if_flags |= IFF_OACTIVE;
2115         for (;;) {
2116                 struct ifnet *dst_if = NULL;
2117                 struct ether_header *eh;
2118                 struct mbuf *m;
2119
2120                 m = ifq_dequeue(&ifp->if_snd, NULL);
2121                 if (m == NULL)
2122                         break;
2123
2124                 if (m->m_len < sizeof(*eh)) {
2125                         m = m_pullup(m, sizeof(*eh));
2126                         if (m == NULL) {
2127                                 ifp->if_oerrors++;
2128                                 continue;
2129                         }
2130                 }
2131                 eh = mtod(m, struct ether_header *);
2132
2133                 BPF_MTAP(ifp, m);
2134                 ifp->if_opackets++;
2135
2136                 if ((m->m_flags & (M_BCAST|M_MCAST)) == 0)
2137                         dst_if = bridge_rtlookup(sc, eh->ether_dhost);
2138
2139                 if (dst_if == NULL)
2140                         bridge_start_bcast(sc, m);
2141                 else
2142                         bridge_enqueue(dst_if, m);
2143         }
2144         ifp->if_flags &= ~IFF_OACTIVE;
2145 }
2146
2147 /*
2148  * bridge_forward:
2149  *
2150  *      Forward packets received on a bridge interface via the input
2151  *      path.
2152  *
2153  *      The forwarding function of the bridge.
2154  */
2155 static void
2156 bridge_forward(struct bridge_softc *sc, struct mbuf *m)
2157 {
2158         struct bridge_iflist *bif, *nbif;
2159         struct ifnet *src_if, *dst_if, *ifp;
2160         struct ether_header *eh;
2161         int priority;
2162
2163         src_if = m->m_pkthdr.rcvif;
2164         ifp = sc->sc_ifp;
2165
2166         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
2167
2168         ifp->if_ipackets++;
2169         ifp->if_ibytes += m->m_pkthdr.len;
2170
2171         /*
2172          * Look up the bridge_iflist.
2173          */
2174         bif = bridge_lookup_member_if(sc, src_if);
2175         if (bif == NULL) {
2176                 /* Interface is not a bridge member (anymore?) */
2177                 m_freem(m);
2178                 return;
2179         }
2180
2181         if (bif->bif_flags & IFBIF_STP) {
2182                 switch (bif->bif_state) {
2183                 case BSTP_IFSTATE_BLOCKING:
2184                         if ((sc->sc_ifp->if_flags & IFF_LINK2) &&
2185                             (m->m_flags & (M_BCAST|M_MCAST)) == 0) {
2186                                 break;
2187                         }
2188                         /* fall through */
2189                 case BSTP_IFSTATE_L1BLOCKING:
2190                 case BSTP_IFSTATE_LISTENING:
2191                 case BSTP_IFSTATE_DISABLED:
2192                         m_freem(m);
2193                         return;
2194                 default:
2195                         break;
2196                 }
2197         }
2198
2199         eh = mtod(m, struct ether_header *);
2200
2201         /*
2202          * If the interface is learning, and the source
2203          * address is valid and not multicast, record
2204          * the address.
2205          */
2206         if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
2207             bif->bif_state != BSTP_IFSTATE_BLOCKING &&
2208             ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
2209             (eh->ether_shost[0] == 0 &&
2210              eh->ether_shost[1] == 0 &&
2211              eh->ether_shost[2] == 0 &&
2212              eh->ether_shost[3] == 0 &&
2213              eh->ether_shost[4] == 0 &&
2214              eh->ether_shost[5] == 0) == 0) {
2215                 bridge_rtupdate(sc, eh->ether_shost, src_if, IFBAF_DYNAMIC);
2216         }
2217
2218         if ((bif->bif_flags & IFBIF_STP) != 0 &&
2219             bif->bif_state == BSTP_IFSTATE_LEARNING) {
2220                 m_freem(m);
2221                 return;
2222         }
2223
2224         /*
2225          * At this point, the port either doesn't participate
2226          * in spanning tree or it is in the forwarding state.
2227          */
2228
2229         /*
2230          * If the packet is unicast, destined for someone on
2231          * "this" side of the bridge, drop it.
2232          */
2233         if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
2234                 dst_if = bridge_rtlookup(sc, eh->ether_dhost);
2235                 if (src_if == dst_if) {
2236                         m_freem(m);
2237                         return;
2238                 }
2239         } else {
2240                 /* ...forward it to all interfaces. */
2241                 ifp->if_imcasts++;
2242                 dst_if = NULL;
2243         }
2244
2245         if (dst_if == NULL) {
2246                 bridge_broadcast(sc, src_if, m);
2247                 return;
2248         }
2249
2250         /*
2251          * Unicast, kinda replicates the output side of bridge_output().
2252          */
2253         bif = bridge_lookup_member_if(sc, dst_if);
2254         if (bif == NULL) {
2255                 /* Not a member of the bridge (anymore?) */
2256                 m_freem(m);
2257                 return;
2258         }
2259
2260         if (bif->bif_flags & IFBIF_STP) {
2261                 priority = 0;
2262                 TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid],
2263                                      bif_next, nbif) {
2264                         if (memcmp(IF_LLADDR(bif->bif_ifp),
2265                                    IF_LLADDR(dst_if),
2266                                    ETHER_ADDR_LEN) != 0) {
2267                                 continue;
2268                         }
2269
2270                         switch(bif->bif_state) {
2271                         case BSTP_IFSTATE_BLOCKING:
2272                                 if (sc->sc_ifp->if_flags & IFF_LINK2)
2273                                         break;
2274                                 /* fall through */
2275                         case BSTP_IFSTATE_L1BLOCKING:
2276                         case BSTP_IFSTATE_LISTENING:
2277                         case BSTP_IFSTATE_DISABLED:
2278                                 continue;
2279                         default:
2280                                 break;
2281                         }
2282
2283                         if ((bif->bif_ifp->if_flags & IFF_RUNNING) == 0)
2284                                 continue;
2285
2286                         /*
2287                          * XXX we need to use the toepliz hash or
2288                          *     something like that instead of
2289                          *     round-robining.
2290                          */
2291                         if (sc->sc_ifp->if_flags & IFF_LINK2) {
2292                                 dst_if = bif->bif_ifp;
2293                                 TAILQ_REMOVE(&sc->sc_iflists[mycpuid],
2294                                         bif, bif_next);
2295                                 TAILQ_INSERT_TAIL(
2296                                         &sc->sc_iflists[mycpuid],
2297                                         bif, bif_next);
2298                                 break;
2299                         }
2300                         if (bif->bif_priority > priority) {
2301                                 priority = bif->bif_priority;
2302                                 dst_if = bif->bif_ifp;
2303                         }
2304                 }
2305         }
2306
2307         /*
2308          * At this point, we're dealing with a unicast frame
2309          * going to a different interface.
2310          */
2311         if ((dst_if->if_flags & IFF_RUNNING) == 0) {
2312                 m_freem(m);
2313                 return;
2314         }
2315
2316         if (inet_pfil_hook.ph_hashooks > 0
2317 #ifdef INET6
2318             || inet6_pfil_hook.ph_hashooks > 0
2319 #endif
2320             ) {
2321                 if (bridge_pfil(&m, ifp, src_if, PFIL_IN) != 0)
2322                         return;
2323                 if (m == NULL)
2324                         return;
2325
2326                 if (bridge_pfil(&m, ifp, dst_if, PFIL_OUT) != 0)
2327                         return;
2328                 if (m == NULL)
2329                         return;
2330         }
2331         bridge_handoff(sc, dst_if, m, 0);
2332 }
2333
2334 /*
2335  * bridge_input:
2336  *
2337  *      Receive input from a member interface.  Queue the packet for
2338  *      bridging if it is not for us.
2339  */
2340 static struct mbuf *
2341 bridge_input(struct ifnet *ifp, struct mbuf *m)
2342 {
2343         struct bridge_softc *sc = ifp->if_bridge;
2344         struct bridge_iflist *bif;
2345         struct ifnet *bifp, *new_ifp;
2346         struct ether_header *eh;
2347         struct mbuf *mc, *mc2;
2348
2349         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
2350
2351         /*
2352          * Make sure that we are still a member of a bridge interface.
2353          */
2354         if (sc == NULL)
2355                 return m;
2356
2357         new_ifp = NULL;
2358         bifp = sc->sc_ifp;
2359
2360         if ((bifp->if_flags & IFF_RUNNING) == 0)
2361                 goto out;
2362
2363         /*
2364          * Implement support for bridge monitoring.  If this flag has been
2365          * set on this interface, discard the packet once we push it through
2366          * the bpf(4) machinery, but before we do, increment various counters
2367          * associated with this bridge.
2368          */
2369         if (bifp->if_flags & IFF_MONITOR) {
2370                 /* Change input interface to this bridge */
2371                 m->m_pkthdr.rcvif = bifp;
2372
2373                 BPF_MTAP(bifp, m);
2374
2375                 /* Update bridge's ifnet statistics */
2376                 bifp->if_ipackets++;
2377                 bifp->if_ibytes += m->m_pkthdr.len;
2378                 if (m->m_flags & (M_MCAST | M_BCAST))
2379                         bifp->if_imcasts++;
2380
2381                 m_freem(m);
2382                 m = NULL;
2383                 goto out;
2384         }
2385
2386         /*
2387          * Handle the ether_header
2388          *
2389          * In all cases if the packet is destined for us via our MAC
2390          * we must clear BRIDGE_MBUF_TAGGED to ensure that we don't
2391          * repeat the source MAC out the same interface.
2392          *
2393          * This first test against our bridge MAC is the fast-path.
2394          *
2395          * NOTE!  The bridge interface can serve as an endpoint for
2396          *        communication but normally there are no IPs associated
2397          *        with it so you cannot route through it.  Instead what
2398          *        you do is point your default route *THROUGH* the bridge
2399          *        to the actual default router for one of the bridged spaces.
2400          *
2401          *        Another possibility is to put all your IP specifications
2402          *        on the bridge instead of on the individual interfaces.  If
2403          *        you do this it should be possible to use the bridge as an
2404          *        end point and route (rather than switch) through it using
2405          *        the default route or ipfw forwarding rules.
2406          */
2407
2408         /*
2409          * Acquire header
2410          */
2411         if (m->m_len < ETHER_HDR_LEN) {
2412                 m = m_pullup(m, ETHER_HDR_LEN);
2413                 if (m == NULL)
2414                         goto out;
2415         }
2416         eh = mtod(m, struct ether_header *);
2417         m->m_pkthdr.fw_flags |= BRIDGE_MBUF_TAGGED;
2418         bcopy(eh, &m->m_pkthdr.br.ether, sizeof(*eh));
2419
2420         if ((bridge_debug & 1) &&
2421             (ntohs(eh->ether_type) == ETHERTYPE_ARP ||
2422             ntohs(eh->ether_type) == ETHERTYPE_REVARP)) {
2423                 kprintf("%02x:%02x:%02x:%02x:%02x:%02x "
2424                         "%02x:%02x:%02x:%02x:%02x:%02x type %04x "
2425                         "lla %02x:%02x:%02x:%02x:%02x:%02x\n",
2426                         eh->ether_dhost[0],
2427                         eh->ether_dhost[1],
2428                         eh->ether_dhost[2],
2429                         eh->ether_dhost[3],
2430                         eh->ether_dhost[4],
2431                         eh->ether_dhost[5],
2432                         eh->ether_shost[0],
2433                         eh->ether_shost[1],
2434                         eh->ether_shost[2],
2435                         eh->ether_shost[3],
2436                         eh->ether_shost[4],
2437                         eh->ether_shost[5],
2438                         eh->ether_type,
2439                         ((u_char *)IF_LLADDR(bifp))[0],
2440                         ((u_char *)IF_LLADDR(bifp))[1],
2441                         ((u_char *)IF_LLADDR(bifp))[2],
2442                         ((u_char *)IF_LLADDR(bifp))[3],
2443                         ((u_char *)IF_LLADDR(bifp))[4],
2444                         ((u_char *)IF_LLADDR(bifp))[5]
2445                 );
2446         }
2447
2448         if (memcmp(eh->ether_dhost, IF_LLADDR(bifp), ETHER_ADDR_LEN) == 0) {
2449                 /*
2450                  * If the packet is for us, set the packets source as the
2451                  * bridge, and return the packet back to ifnet.if_input for
2452                  * local processing.
2453                  */
2454                 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
2455                 KASSERT(bifp->if_bridge == NULL,
2456                         ("loop created in bridge_input"));
2457                 if (pfil_member != 0) {
2458                         if (inet_pfil_hook.ph_hashooks > 0
2459 #ifdef INET6
2460                             || inet6_pfil_hook.ph_hashooks > 0
2461 #endif
2462                         ) {
2463                                 if (bridge_pfil(&m, NULL, ifp, PFIL_IN) != 0)
2464                                         goto out;
2465                                 if (m == NULL)
2466                                         goto out;
2467                         }
2468                 }
2469                 new_ifp = bifp;
2470                 goto out;
2471         }
2472
2473         /*
2474          * Tap all packets arriving on the bridge, no matter if
2475          * they are local destinations or not.  In is in.
2476          */
2477         BPF_MTAP(bifp, m);
2478
2479         bif = bridge_lookup_member_if(sc, ifp);
2480         if (bif == NULL)
2481                 goto out;
2482
2483         if (sc->sc_span)
2484                 bridge_span(sc, m);
2485
2486         if (m->m_flags & (M_BCAST | M_MCAST)) {
2487                 /*
2488                  * Tap off 802.1D packets; they do not get forwarded.
2489                  */
2490                 if (memcmp(eh->ether_dhost, bstp_etheraddr,
2491                             ETHER_ADDR_LEN) == 0) {
2492                         ifnet_serialize_all(bifp);
2493                         bstp_input(sc, bif, m);
2494                         ifnet_deserialize_all(bifp);
2495
2496                         /* m is freed by bstp_input */
2497                         m = NULL;
2498                         goto out;
2499                 }
2500
2501                 /*
2502                  * Other than 802.11d packets, ignore packets if the
2503                  * interface is not in a good state.
2504                  */
2505                 if (bif->bif_flags & IFBIF_STP) {
2506                         switch (bif->bif_state) {
2507                         case BSTP_IFSTATE_L1BLOCKING:
2508                         case BSTP_IFSTATE_BLOCKING:
2509                         case BSTP_IFSTATE_LISTENING:
2510                         case BSTP_IFSTATE_DISABLED:
2511                                 goto out;
2512                         }
2513                 }
2514
2515                 /*
2516                  * Make a deep copy of the packet and enqueue the copy
2517                  * for bridge processing; return the original packet for
2518                  * local processing.
2519                  */
2520                 mc = m_dup(m, MB_DONTWAIT);
2521                 if (mc == NULL)
2522                         goto out;
2523
2524                 bridge_forward(sc, mc);
2525
2526                 /*
2527                  * Reinject the mbuf as arriving on the bridge so we have a
2528                  * chance at claiming multicast packets. We can not loop back
2529                  * here from ether_input as a bridge is never a member of a
2530                  * bridge.
2531                  */
2532                 KASSERT(bifp->if_bridge == NULL,
2533                         ("loop created in bridge_input"));
2534                 mc2 = m_dup(m, MB_DONTWAIT);
2535 #ifdef notyet
2536                 if (mc2 != NULL) {
2537                         /* Keep the layer3 header aligned */
2538                         int i = min(mc2->m_pkthdr.len, max_protohdr);
2539                         mc2 = m_copyup(mc2, i, ETHER_ALIGN);
2540                 }
2541 #endif
2542                 if (mc2 != NULL) {
2543                         /*
2544                          * Don't tap to bpf(4) again; we have already done
2545                          * the tapping.
2546                          *
2547                          * Leave m_pkthdr.rcvif alone, so ARP replies are
2548                          * processed as coming in on the correct interface.
2549                          *
2550                          * Clear the bridge flag for local processing in
2551                          * case the packet gets routed.
2552                          */
2553                         mc2->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
2554                         ether_reinput_oncpu(bifp, mc2, 0);
2555                 }
2556
2557                 /* Return the original packet for local processing. */
2558                 goto out;
2559         }
2560
2561         /*
2562          * Input of a unicast packet.  We have to allow unicast packets
2563          * input from links in the BLOCKING state.
2564          *
2565          * NOTE: We explicitly ignore normal packets received on a link
2566          *       in the BLOCKING state.  The point of being in that state
2567          *       is to avoid getting duplicate packets.
2568          *
2569          *       HOWEVER, if LINK2 is set the normal spanning tree code
2570          *       will mark an interface BLOCKING to avoid multi-cast/broadcast
2571          *       loops.  Unicast packets CAN still loop if we allow the
2572          *       case (hence we only do it in LINK2), but it isn't quite as
2573          *       bad as a broadcast packet looping.
2574          */
2575         if (bif->bif_flags & IFBIF_STP) {
2576                 switch (bif->bif_state) {
2577 #if 0
2578                 case BSTP_IFSTATE_BLOCKING:
2579                         if (sc->sc_ifp->if_flags & IFF_LINK2)
2580                                 break;
2581                         /* fall through */
2582 #endif
2583                 case BSTP_IFSTATE_L1BLOCKING:
2584                 case BSTP_IFSTATE_LISTENING:
2585                 case BSTP_IFSTATE_DISABLED:
2586                         goto out;
2587                 default:
2588                         break;
2589                 }
2590         }
2591
2592         /*
2593          * Unicast.  Make sure it's not for us.
2594          *
2595          * This loop is MPSAFE; the only blocking operation (bridge_rtupdate)
2596          * is followed by breaking out of the loop.
2597          */
2598         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
2599                 if (bif->bif_ifp->if_type != IFT_ETHER)
2600                         continue;
2601
2602                 /*
2603                  * It is destined for an interface linked to the bridge.
2604                  * We want the bridge itself to take care of link level
2605                  * forwarding to member interfaces so reinput on the bridge.
2606                  * i.e. if you ping an IP on a target interface associated
2607                  * with the bridge, the arp is-at response should indicate
2608                  * the bridge MAC.
2609                  */
2610                 if (memcmp(IF_LLADDR(bif->bif_ifp), eh->ether_dhost,
2611                            ETHER_ADDR_LEN) == 0) {
2612                         if (bif->bif_ifp != ifp) {
2613                                 /* XXX loop prevention */
2614                                 m->m_flags |= M_ETHER_BRIDGED;
2615                         }
2616                         if (bif->bif_flags & IFBIF_LEARNING) {
2617                                 bridge_rtupdate(sc, eh->ether_shost,
2618                                                 ifp, IFBAF_DYNAMIC);
2619                         }
2620                         new_ifp = bifp; /* not bif->bif_ifp */
2621                         m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
2622                         goto out;
2623                 }
2624
2625                 /*
2626                  * Ignore received packets that were sent by us.
2627                  */
2628                 if (memcmp(IF_LLADDR(bif->bif_ifp), eh->ether_shost,
2629                            ETHER_ADDR_LEN) == 0) {
2630                         m_freem(m);
2631                         m = NULL;
2632                         goto out;
2633                 }
2634         }
2635
2636         /* Perform the bridge forwarding function. */
2637         bridge_forward(sc, m);
2638         m = NULL;
2639
2640         /*
2641          * ether_reinput_oncpu() will reprocess rcvif as
2642          * coming from new_ifp (since we do not specify
2643          * REINPUT_KEEPRCVIF).
2644          */
2645 out:
2646         if (new_ifp != NULL) {
2647                 /*
2648                  * Clear the bridge flag for local processing in
2649                  * case the packet gets routed.
2650                  */
2651                 ether_reinput_oncpu(new_ifp, m, REINPUT_RUNBPF);
2652                 m = NULL;
2653         }
2654         return (m);
2655 }
2656
2657 /*
2658  * bridge_start_bcast:
2659  *
2660  *      Broadcast the packet sent from bridge to all member
2661  *      interfaces.
2662  *      This is a simplified version of bridge_broadcast(), however,
2663  *      this function expects caller to hold bridge's serializer.
2664  */
2665 static void
2666 bridge_start_bcast(struct bridge_softc *sc, struct mbuf *m)
2667 {
2668         struct bridge_iflist *bif;
2669         struct mbuf *mc;
2670         struct ifnet *dst_if, *bifp;
2671         int used = 0;
2672
2673         bifp = sc->sc_ifp;
2674         ASSERT_IFNET_SERIALIZED_ALL(bifp);
2675
2676         /*
2677          * Following loop is MPSAFE; nothing is blocking
2678          * in the loop body.
2679          */
2680         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
2681                 dst_if = bif->bif_ifp;
2682
2683                 if (bif->bif_flags & IFBIF_STP) {
2684                         switch (bif->bif_state) {
2685                         case BSTP_IFSTATE_L1BLOCKING:
2686                         case BSTP_IFSTATE_BLOCKING:
2687                         case BSTP_IFSTATE_DISABLED:
2688                                 continue;
2689                         }
2690                 }
2691
2692                 if ((bif->bif_flags & IFBIF_DISCOVER) == 0 &&
2693                     (m->m_flags & (M_BCAST|M_MCAST)) == 0)
2694                         continue;
2695
2696                 if ((dst_if->if_flags & IFF_RUNNING) == 0)
2697                         continue;
2698
2699                 if (TAILQ_NEXT(bif, bif_next) == NULL) {
2700                         mc = m;
2701                         used = 1;
2702                 } else {
2703                         mc = m_copypacket(m, MB_DONTWAIT);
2704                         if (mc == NULL) {
2705                                 bifp->if_oerrors++;
2706                                 continue;
2707                         }
2708                 }
2709                 bridge_enqueue(dst_if, mc);
2710         }
2711         if (used == 0)
2712                 m_freem(m);
2713 }
2714
2715 /*
2716  * bridge_broadcast:
2717  *
2718  *      Send a frame to all interfaces that are members of
2719  *      the bridge, except for the one on which the packet
2720  *      arrived.
2721  */
2722 static void
2723 bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
2724                  struct mbuf *m)
2725 {
2726         struct bridge_iflist *bif, *nbif;
2727         struct ether_header *eh;
2728         struct mbuf *mc;
2729         struct ifnet *dst_if, *bifp;
2730         int used = 0;
2731         int from_us;
2732
2733         bifp = sc->sc_ifp;
2734         ASSERT_IFNET_NOT_SERIALIZED_ALL(bifp);
2735
2736         eh = mtod(m, struct ether_header *);
2737         from_us = bridge_from_us(sc, eh);
2738
2739         if (inet_pfil_hook.ph_hashooks > 0
2740 #ifdef INET6
2741             || inet6_pfil_hook.ph_hashooks > 0
2742 #endif
2743             ) {
2744                 if (bridge_pfil(&m, bifp, src_if, PFIL_IN) != 0)
2745                         return;
2746                 if (m == NULL)
2747                         return;
2748
2749                 /* Filter on the bridge interface before broadcasting */
2750                 if (bridge_pfil(&m, bifp, NULL, PFIL_OUT) != 0)
2751                         return;
2752                 if (m == NULL)
2753                         return;
2754         }
2755
2756         TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid], bif_next, nbif) {
2757                 dst_if = bif->bif_ifp;
2758                 if (dst_if == src_if)
2759                         continue;
2760
2761                 if (bif->bif_flags & IFBIF_STP) {
2762                         switch (bif->bif_state) {
2763                         case BSTP_IFSTATE_L1BLOCKING:
2764                         case BSTP_IFSTATE_BLOCKING:
2765                         case BSTP_IFSTATE_DISABLED:
2766                                 continue;
2767                         }
2768                 }
2769
2770                 if ((bif->bif_flags & IFBIF_DISCOVER) == 0 &&
2771                     (m->m_flags & (M_BCAST|M_MCAST)) == 0)
2772                         continue;
2773
2774                 if ((dst_if->if_flags & IFF_RUNNING) == 0)
2775                         continue;
2776
2777                 if (TAILQ_NEXT(bif, bif_next) == NULL) {
2778                         mc = m;
2779                         used = 1;
2780                 } else {
2781                         mc = m_copypacket(m, MB_DONTWAIT);
2782                         if (mc == NULL) {
2783                                 sc->sc_ifp->if_oerrors++;
2784                                 continue;
2785                         }
2786                 }
2787
2788                 /*
2789                  * Filter on the output interface.  Pass a NULL bridge
2790                  * interface pointer so we do not redundantly filter on
2791                  * the bridge for each interface we broadcast on.
2792                  */
2793                 if (inet_pfil_hook.ph_hashooks > 0
2794 #ifdef INET6
2795                     || inet6_pfil_hook.ph_hashooks > 0
2796 #endif
2797                     ) {
2798                         if (bridge_pfil(&mc, NULL, dst_if, PFIL_OUT) != 0)
2799                                 continue;
2800                         if (mc == NULL)
2801                                 continue;
2802                 }
2803                 bridge_handoff(sc, dst_if, mc, from_us);
2804
2805                 if (nbif != NULL && !nbif->bif_onlist) {
2806                         KKASSERT(bif->bif_onlist);
2807                         nbif = TAILQ_NEXT(bif, bif_next);
2808                 }
2809         }
2810         if (used == 0)
2811                 m_freem(m);
2812 }
2813
2814 /*
2815  * bridge_span:
2816  *
2817  *      Duplicate a packet out one or more interfaces that are in span mode,
2818  *      the original mbuf is unmodified.
2819  */
2820 static void
2821 bridge_span(struct bridge_softc *sc, struct mbuf *m)
2822 {
2823         struct bridge_iflist *bif;
2824         struct ifnet *dst_if, *bifp;
2825         struct mbuf *mc;
2826
2827         bifp = sc->sc_ifp;
2828         ifnet_serialize_all(bifp);
2829
2830         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
2831                 dst_if = bif->bif_ifp;
2832
2833                 if ((dst_if->if_flags & IFF_RUNNING) == 0)
2834                         continue;
2835
2836                 mc = m_copypacket(m, MB_DONTWAIT);
2837                 if (mc == NULL) {
2838                         sc->sc_ifp->if_oerrors++;
2839                         continue;
2840                 }
2841                 bridge_enqueue(dst_if, mc);
2842         }
2843
2844         ifnet_deserialize_all(bifp);
2845 }
2846
2847 static void
2848 bridge_rtmsg_sync_handler(netmsg_t msg)
2849 {
2850         ifnet_forwardmsg(&msg->lmsg, mycpuid + 1);
2851 }
2852
2853 static void
2854 bridge_rtmsg_sync(struct bridge_softc *sc)
2855 {
2856         struct netmsg_base msg;
2857
2858         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
2859
2860         netmsg_init(&msg, NULL, &curthread->td_msgport,
2861                     0, bridge_rtmsg_sync_handler);
2862         ifnet_domsg(&msg.lmsg, 0);
2863 }
2864
2865 static __inline void
2866 bridge_rtinfo_update(struct bridge_rtinfo *bri, struct ifnet *dst_if,
2867                      int setflags, uint8_t flags, uint32_t timeo)
2868 {
2869         if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
2870             bri->bri_ifp != dst_if)
2871                 bri->bri_ifp = dst_if;
2872         if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
2873             bri->bri_expire != time_second + timeo)
2874                 bri->bri_expire = time_second + timeo;
2875         if (setflags)
2876                 bri->bri_flags = flags;
2877 }
2878
2879 static int
2880 bridge_rtinstall_oncpu(struct bridge_softc *sc, const uint8_t *dst,
2881                        struct ifnet *dst_if, int setflags, uint8_t flags,
2882                        struct bridge_rtinfo **bri0)
2883 {
2884         struct bridge_rtnode *brt;
2885         struct bridge_rtinfo *bri;
2886
2887         if (mycpuid == 0) {
2888                 brt = bridge_rtnode_lookup(sc, dst);
2889                 if (brt != NULL) {
2890                         /*
2891                          * rtnode for 'dst' already exists.  We inform the
2892                          * caller about this by leaving bri0 as NULL.  The
2893                          * caller will terminate the intallation upon getting
2894                          * NULL bri0.  However, we still need to update the
2895                          * rtinfo.
2896                          */
2897                         KKASSERT(*bri0 == NULL);
2898
2899                         /* Update rtinfo */
2900                         bridge_rtinfo_update(brt->brt_info, dst_if, setflags,
2901                                              flags, sc->sc_brttimeout);
2902                         return 0;
2903                 }
2904
2905                 /*
2906                  * We only need to check brtcnt on CPU0, since if limit
2907                  * is to be exceeded, ENOSPC is returned.  Caller knows
2908                  * this and will terminate the installation.
2909                  */
2910                 if (sc->sc_brtcnt >= sc->sc_brtmax)
2911                         return ENOSPC;
2912
2913                 KKASSERT(*bri0 == NULL);
2914                 bri = kmalloc(sizeof(struct bridge_rtinfo), M_DEVBUF,
2915                                   M_WAITOK | M_ZERO);
2916                 *bri0 = bri;
2917
2918                 /* Setup rtinfo */
2919                 bri->bri_flags = IFBAF_DYNAMIC;
2920                 bridge_rtinfo_update(bri, dst_if, setflags, flags,
2921                                      sc->sc_brttimeout);
2922         } else {
2923                 bri = *bri0;
2924                 KKASSERT(bri != NULL);
2925         }
2926
2927         brt = kmalloc(sizeof(struct bridge_rtnode), M_DEVBUF,
2928                       M_WAITOK | M_ZERO);
2929         memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
2930         brt->brt_info = bri;
2931
2932         bridge_rtnode_insert(sc, brt);
2933         return 0;
2934 }
2935
2936 static void
2937 bridge_rtinstall_handler(netmsg_t msg)
2938 {
2939         struct netmsg_brsaddr *brmsg = (struct netmsg_brsaddr *)msg;
2940         int error;
2941
2942         error = bridge_rtinstall_oncpu(brmsg->br_softc,
2943                                        brmsg->br_dst, brmsg->br_dst_if,
2944                                        brmsg->br_setflags, brmsg->br_flags,
2945                                        &brmsg->br_rtinfo);
2946         if (error) {
2947                 KKASSERT(mycpuid == 0 && brmsg->br_rtinfo == NULL);
2948                 lwkt_replymsg(&brmsg->base.lmsg, error);
2949                 return;
2950         } else if (brmsg->br_rtinfo == NULL) {
2951                 /* rtnode already exists for 'dst' */
2952                 KKASSERT(mycpuid == 0);
2953                 lwkt_replymsg(&brmsg->base.lmsg, 0);
2954                 return;
2955         }
2956         ifnet_forwardmsg(&brmsg->base.lmsg, mycpuid + 1);
2957 }
2958
2959 /*
2960  * bridge_rtupdate:
2961  *
2962  *      Add/Update a bridge routing entry.
2963  */
2964 static int
2965 bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst,
2966                 struct ifnet *dst_if, uint8_t flags)
2967 {
2968         struct bridge_rtnode *brt;
2969
2970         /*
2971          * A route for this destination might already exist.  If so,
2972          * update it, otherwise create a new one.
2973          */
2974         if ((brt = bridge_rtnode_lookup(sc, dst)) == NULL) {
2975                 struct netmsg_brsaddr *brmsg;
2976
2977                 if (sc->sc_brtcnt >= sc->sc_brtmax)
2978                         return ENOSPC;
2979
2980                 brmsg = kmalloc(sizeof(*brmsg), M_LWKTMSG, M_WAITOK | M_NULLOK);
2981                 if (brmsg == NULL)
2982                         return ENOMEM;
2983
2984                 netmsg_init(&brmsg->base, NULL, &netisr_afree_rport,
2985                             0, bridge_rtinstall_handler);
2986                 memcpy(brmsg->br_dst, dst, ETHER_ADDR_LEN);
2987                 brmsg->br_dst_if = dst_if;
2988                 brmsg->br_flags = flags;
2989                 brmsg->br_setflags = 0;
2990                 brmsg->br_softc = sc;
2991                 brmsg->br_rtinfo = NULL;
2992
2993                 ifnet_sendmsg(&brmsg->base.lmsg, 0);
2994                 return 0;
2995         }
2996         bridge_rtinfo_update(brt->brt_info, dst_if, 0, flags,
2997                              sc->sc_brttimeout);
2998         return 0;
2999 }
3000
3001 static int
3002 bridge_rtsaddr(struct bridge_softc *sc, const uint8_t *dst,
3003                struct ifnet *dst_if, uint8_t flags)
3004 {
3005         struct netmsg_brsaddr brmsg;
3006
3007         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3008
3009         netmsg_init(&brmsg.base, NULL, &curthread->td_msgport,
3010                     0, bridge_rtinstall_handler);
3011         memcpy(brmsg.br_dst, dst, ETHER_ADDR_LEN);
3012         brmsg.br_dst_if = dst_if;
3013         brmsg.br_flags = flags;
3014         brmsg.br_setflags = 1;
3015         brmsg.br_softc = sc;
3016         brmsg.br_rtinfo = NULL;
3017
3018         return ifnet_domsg(&brmsg.base.lmsg, 0);
3019 }
3020
3021 /*
3022  * bridge_rtlookup:
3023  *
3024  *      Lookup the destination interface for an address.
3025  */
3026 static struct ifnet *
3027 bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr)
3028 {
3029         struct bridge_rtnode *brt;
3030
3031         if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL)
3032                 return NULL;
3033         return brt->brt_info->bri_ifp;
3034 }
3035
3036 static void
3037 bridge_rtreap_handler(netmsg_t msg)
3038 {
3039         struct bridge_softc *sc = msg->lmsg.u.ms_resultp;
3040         struct bridge_rtnode *brt, *nbrt;
3041
3042         LIST_FOREACH_MUTABLE(brt, &sc->sc_rtlists[mycpuid], brt_list, nbrt) {
3043                 if (brt->brt_info->bri_dead)
3044                         bridge_rtnode_destroy(sc, brt);
3045         }
3046         ifnet_forwardmsg(&msg->lmsg, mycpuid + 1);
3047 }
3048
3049 static void
3050 bridge_rtreap(struct bridge_softc *sc)
3051 {
3052         struct netmsg_base msg;
3053
3054         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3055
3056         netmsg_init(&msg, NULL, &curthread->td_msgport,
3057                     0, bridge_rtreap_handler);
3058         msg.lmsg.u.ms_resultp = sc;
3059
3060         ifnet_domsg(&msg.lmsg, 0);
3061 }
3062
3063 static void
3064 bridge_rtreap_async(struct bridge_softc *sc)
3065 {
3066         struct netmsg_base *msg;
3067
3068         msg = kmalloc(sizeof(*msg), M_LWKTMSG, M_WAITOK);
3069
3070         netmsg_init(msg, NULL, &netisr_afree_rport,
3071                     0, bridge_rtreap_handler);
3072         msg->lmsg.u.ms_resultp = sc;
3073
3074         ifnet_sendmsg(&msg->lmsg, 0);
3075 }
3076
3077 /*
3078  * bridge_rttrim:
3079  *
3080  *      Trim the routine table so that we have a number
3081  *      of routing entries less than or equal to the
3082  *      maximum number.
3083  */
3084 static void
3085 bridge_rttrim(struct bridge_softc *sc)
3086 {
3087         struct bridge_rtnode *brt;
3088         int dead;
3089
3090         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3091
3092         /* Make sure we actually need to do this. */
3093         if (sc->sc_brtcnt <= sc->sc_brtmax)
3094                 return;
3095
3096         /*
3097          * Find out how many rtnodes are dead
3098          */
3099         dead = bridge_rtage_finddead(sc);
3100         KKASSERT(dead <= sc->sc_brtcnt);
3101
3102         if (sc->sc_brtcnt - dead <= sc->sc_brtmax) {
3103                 /* Enough dead rtnodes are found */
3104                 bridge_rtreap(sc);
3105                 return;
3106         }
3107
3108         /*
3109          * Kill some dynamic rtnodes to meet the brtmax
3110          */
3111         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3112                 struct bridge_rtinfo *bri = brt->brt_info;
3113
3114                 if (bri->bri_dead) {
3115                         /*
3116                          * We have counted this rtnode in
3117                          * bridge_rtage_finddead()
3118                          */
3119                         continue;
3120                 }
3121
3122                 if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
3123                         bri->bri_dead = 1;
3124                         ++dead;
3125                         KKASSERT(dead <= sc->sc_brtcnt);
3126
3127                         if (sc->sc_brtcnt - dead <= sc->sc_brtmax) {
3128                                 /* Enough rtnodes are collected */
3129                                 break;
3130                         }
3131                 }
3132         }
3133         if (dead)
3134                 bridge_rtreap(sc);
3135 }
3136
3137 /*
3138  * bridge_timer:
3139  *
3140  *      Aging timer for the bridge.
3141  */
3142 static void
3143 bridge_timer(void *arg)
3144 {
3145         struct bridge_softc *sc = arg;
3146         struct netmsg_base *msg;
3147
3148         KKASSERT(mycpuid == BRIDGE_CFGCPU);
3149
3150         crit_enter();
3151
3152         if (callout_pending(&sc->sc_brcallout) ||
3153             !callout_active(&sc->sc_brcallout)) {
3154                 crit_exit();
3155                 return;
3156         }
3157         callout_deactivate(&sc->sc_brcallout);
3158
3159         msg = &sc->sc_brtimemsg;
3160         KKASSERT(msg->lmsg.ms_flags & MSGF_DONE);
3161         lwkt_sendmsg(BRIDGE_CFGPORT, &msg->lmsg);
3162
3163         crit_exit();
3164 }
3165
3166 static void
3167 bridge_timer_handler(netmsg_t msg)
3168 {
3169         struct bridge_softc *sc = msg->lmsg.u.ms_resultp;
3170
3171         KKASSERT(&curthread->td_msgport == BRIDGE_CFGPORT);
3172
3173         crit_enter();
3174         /* Reply ASAP */
3175         lwkt_replymsg(&msg->lmsg, 0);
3176         crit_exit();
3177
3178         bridge_rtage(sc);
3179         if (sc->sc_ifp->if_flags & IFF_RUNNING) {
3180                 callout_reset(&sc->sc_brcallout,
3181                     bridge_rtable_prune_period * hz, bridge_timer, sc);
3182         }
3183 }
3184
3185 static int
3186 bridge_rtage_finddead(struct bridge_softc *sc)
3187 {
3188         struct bridge_rtnode *brt;
3189         int dead = 0;
3190
3191         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3192                 struct bridge_rtinfo *bri = brt->brt_info;
3193
3194                 if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
3195                     time_second >= bri->bri_expire) {
3196                         bri->bri_dead = 1;
3197                         ++dead;
3198                         KKASSERT(dead <= sc->sc_brtcnt);
3199                 }
3200         }
3201         return dead;
3202 }
3203
3204 /*
3205  * bridge_rtage:
3206  *
3207  *      Perform an aging cycle.
3208  */
3209 static void
3210 bridge_rtage(struct bridge_softc *sc)
3211 {
3212         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3213
3214         if (bridge_rtage_finddead(sc))
3215                 bridge_rtreap(sc);
3216 }
3217
3218 /*
3219  * bridge_rtflush:
3220  *
3221  *      Remove all dynamic addresses from the bridge.
3222  */
3223 static void
3224 bridge_rtflush(struct bridge_softc *sc, int bf)
3225 {
3226         struct bridge_rtnode *brt;
3227         int reap;
3228
3229         reap = 0;
3230         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3231                 struct bridge_rtinfo *bri = brt->brt_info;
3232
3233                 if ((bf & IFBF_FLUSHALL) ||
3234                     (bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
3235                         bri->bri_dead = 1;
3236                         reap = 1;
3237                 }
3238         }
3239         if (reap) {
3240                 if (bf & IFBF_FLUSHSYNC)
3241                         bridge_rtreap(sc);
3242                 else
3243                         bridge_rtreap_async(sc);
3244         }
3245 }
3246
3247 /*
3248  * bridge_rtdaddr:
3249  *
3250  *      Remove an address from the table.
3251  */
3252 static int
3253 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr)
3254 {
3255         struct bridge_rtnode *brt;
3256
3257         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3258
3259         if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL)
3260                 return (ENOENT);
3261
3262         /* TODO: add a cheaper delete operation */
3263         brt->brt_info->bri_dead = 1;
3264         bridge_rtreap(sc);
3265         return (0);
3266 }
3267
3268 /*
3269  * bridge_rtdelete:
3270  *
3271  *      Delete routes to a speicifc member interface.
3272  */
3273 void
3274 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int bf)
3275 {
3276         struct bridge_rtnode *brt;
3277         int reap;
3278
3279         reap = 0;
3280         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3281                 struct bridge_rtinfo *bri = brt->brt_info;
3282
3283                 if (bri->bri_ifp == ifp &&
3284                     ((bf & IFBF_FLUSHALL) ||
3285                      (bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
3286                         bri->bri_dead = 1;
3287                         reap = 1;
3288                 }
3289         }
3290         if (reap) {
3291                 if (bf & IFBF_FLUSHSYNC)
3292                         bridge_rtreap(sc);
3293                 else
3294                         bridge_rtreap_async(sc);
3295         }
3296 }
3297
3298 /*
3299  * bridge_rtable_init:
3300  *
3301  *      Initialize the route table for this bridge.
3302  */
3303 static void
3304 bridge_rtable_init(struct bridge_softc *sc)
3305 {
3306         int cpu;
3307
3308         /*
3309          * Initialize per-cpu hash tables
3310          */
3311         sc->sc_rthashs = kmalloc(sizeof(*sc->sc_rthashs) * ncpus,
3312                                  M_DEVBUF, M_WAITOK);
3313         for (cpu = 0; cpu < ncpus; ++cpu) {
3314                 int i;
3315
3316                 sc->sc_rthashs[cpu] =
3317                 kmalloc(sizeof(struct bridge_rtnode_head) * BRIDGE_RTHASH_SIZE,
3318                         M_DEVBUF, M_WAITOK);
3319
3320                 for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
3321                         LIST_INIT(&sc->sc_rthashs[cpu][i]);
3322         }
3323         sc->sc_rthash_key = karc4random();
3324
3325         /*
3326          * Initialize per-cpu lists
3327          */
3328         sc->sc_rtlists = kmalloc(sizeof(struct bridge_rtnode_head) * ncpus,
3329                                  M_DEVBUF, M_WAITOK);
3330         for (cpu = 0; cpu < ncpus; ++cpu)
3331                 LIST_INIT(&sc->sc_rtlists[cpu]);
3332 }
3333
3334 /*
3335  * bridge_rtable_fini:
3336  *
3337  *      Deconstruct the route table for this bridge.
3338  */
3339 static void
3340 bridge_rtable_fini(struct bridge_softc *sc)
3341 {
3342         int cpu;
3343
3344         /*
3345          * Free per-cpu hash tables
3346          */
3347         for (cpu = 0; cpu < ncpus; ++cpu)
3348                 kfree(sc->sc_rthashs[cpu], M_DEVBUF);
3349         kfree(sc->sc_rthashs, M_DEVBUF);
3350
3351         /*
3352          * Free per-cpu lists
3353          */
3354         kfree(sc->sc_rtlists, M_DEVBUF);
3355 }
3356
3357 /*
3358  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
3359  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
3360  */
3361 #define mix(a, b, c)                                                    \
3362 do {                                                                    \
3363         a -= b; a -= c; a ^= (c >> 13);                                 \
3364         b -= c; b -= a; b ^= (a << 8);                                  \
3365         c -= a; c -= b; c ^= (b >> 13);                                 \
3366         a -= b; a -= c; a ^= (c >> 12);                                 \
3367         b -= c; b -= a; b ^= (a << 16);                                 \
3368         c -= a; c -= b; c ^= (b >> 5);                                  \
3369         a -= b; a -= c; a ^= (c >> 3);                                  \
3370         b -= c; b -= a; b ^= (a << 10);                                 \
3371         c -= a; c -= b; c ^= (b >> 15);                                 \
3372 } while (/*CONSTCOND*/0)
3373
3374 static __inline uint32_t
3375 bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
3376 {
3377         uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
3378
3379         b += addr[5] << 8;
3380         b += addr[4];
3381         a += addr[3] << 24;
3382         a += addr[2] << 16;
3383         a += addr[1] << 8;
3384         a += addr[0];
3385
3386         mix(a, b, c);
3387
3388         return (c & BRIDGE_RTHASH_MASK);
3389 }
3390
3391 #undef mix
3392
3393 static int
3394 bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b)
3395 {
3396         int i, d;
3397
3398         for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
3399                 d = ((int)a[i]) - ((int)b[i]);
3400         }
3401
3402         return (d);
3403 }
3404
3405 /*
3406  * bridge_rtnode_lookup:
3407  *
3408  *      Look up a bridge route node for the specified destination.
3409  */
3410 static struct bridge_rtnode *
3411 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr)
3412 {
3413         struct bridge_rtnode *brt;
3414         uint32_t hash;
3415         int dir;
3416
3417         hash = bridge_rthash(sc, addr);
3418         LIST_FOREACH(brt, &sc->sc_rthashs[mycpuid][hash], brt_hash) {
3419                 dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
3420                 if (dir == 0)
3421                         return (brt);
3422                 if (dir > 0)
3423                         return (NULL);
3424         }
3425
3426         return (NULL);
3427 }
3428
3429 /*
3430  * bridge_rtnode_insert:
3431  *
3432  *      Insert the specified bridge node into the route table.
3433  *      Caller has to make sure that rtnode does not exist.
3434  */
3435 static void
3436 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
3437 {
3438         struct bridge_rtnode *lbrt;
3439         uint32_t hash;
3440         int dir;
3441
3442         hash = bridge_rthash(sc, brt->brt_addr);
3443
3444         lbrt = LIST_FIRST(&sc->sc_rthashs[mycpuid][hash]);
3445         if (lbrt == NULL) {
3446                 LIST_INSERT_HEAD(&sc->sc_rthashs[mycpuid][hash],
3447                                   brt, brt_hash);
3448                 goto out;
3449         }
3450
3451         do {
3452                 dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
3453                 KASSERT(dir != 0, ("rtnode already exist\n"));
3454
3455                 if (dir > 0) {
3456                         LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
3457                         goto out;
3458                 }
3459                 if (LIST_NEXT(lbrt, brt_hash) == NULL) {
3460                         LIST_INSERT_AFTER(lbrt, brt, brt_hash);
3461                         goto out;
3462                 }
3463                 lbrt = LIST_NEXT(lbrt, brt_hash);
3464         } while (lbrt != NULL);
3465
3466         panic("no suitable position found for rtnode\n");
3467 out:
3468         LIST_INSERT_HEAD(&sc->sc_rtlists[mycpuid], brt, brt_list);
3469         if (mycpuid == 0) {
3470                 /*
3471                  * Update the brtcnt.
3472                  * We only need to do it once and we do it on CPU0.
3473                  */
3474                 sc->sc_brtcnt++;
3475         }
3476 }
3477
3478 /*
3479  * bridge_rtnode_destroy:
3480  *
3481  *      Destroy a bridge rtnode.
3482  */
3483 static void
3484 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
3485 {
3486         LIST_REMOVE(brt, brt_hash);
3487         LIST_REMOVE(brt, brt_list);
3488
3489         if (mycpuid + 1 == ncpus) {
3490                 /* Free rtinfo associated with rtnode on the last cpu */
3491                 kfree(brt->brt_info, M_DEVBUF);
3492         }
3493         kfree(brt, M_DEVBUF);
3494
3495         if (mycpuid == 0) {
3496                 /* Update brtcnt only on CPU0 */
3497                 sc->sc_brtcnt--;
3498         }
3499 }
3500
3501 static __inline int
3502 bridge_post_pfil(struct mbuf *m)
3503 {
3504         if (m->m_pkthdr.fw_flags & IPFORWARD_MBUF_TAGGED)
3505                 return EOPNOTSUPP;
3506
3507         /* Not yet */
3508         if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED)
3509                 return EOPNOTSUPP;
3510
3511         return 0;
3512 }
3513
3514 /*
3515  * Send bridge packets through pfil if they are one of the types pfil can deal
3516  * with, or if they are ARP or REVARP.  (pfil will pass ARP and REVARP without
3517  * question.) If *bifp or *ifp are NULL then packet filtering is skipped for
3518  * that interface.
3519  */
3520 static int
3521 bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
3522 {
3523         int snap, error, i, hlen;
3524         struct ether_header *eh1, eh2;
3525         struct ip *ip;
3526         struct llc llc1;
3527         u_int16_t ether_type;
3528
3529         snap = 0;
3530         error = -1;     /* Default error if not error == 0 */
3531
3532         if (pfil_bridge == 0 && pfil_member == 0)
3533                 return (0); /* filtering is disabled */
3534
3535         i = min((*mp)->m_pkthdr.len, max_protohdr);
3536         if ((*mp)->m_len < i) {
3537                 *mp = m_pullup(*mp, i);
3538                 if (*mp == NULL) {
3539                         kprintf("%s: m_pullup failed\n", __func__);
3540                         return (-1);
3541                 }
3542         }
3543
3544         eh1 = mtod(*mp, struct ether_header *);
3545         ether_type = ntohs(eh1->ether_type);
3546
3547         /*
3548          * Check for SNAP/LLC.
3549          */
3550         if (ether_type < ETHERMTU) {
3551                 struct llc *llc2 = (struct llc *)(eh1 + 1);
3552
3553                 if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
3554                     llc2->llc_dsap == LLC_SNAP_LSAP &&
3555                     llc2->llc_ssap == LLC_SNAP_LSAP &&
3556                     llc2->llc_control == LLC_UI) {
3557                         ether_type = htons(llc2->llc_un.type_snap.ether_type);
3558                         snap = 1;
3559                 }
3560         }
3561
3562         /*
3563          * If we're trying to filter bridge traffic, don't look at anything
3564          * other than IP and ARP traffic.  If the filter doesn't understand
3565          * IPv6, don't allow IPv6 through the bridge either.  This is lame
3566          * since if we really wanted, say, an AppleTalk filter, we are hosed,
3567          * but of course we don't have an AppleTalk filter to begin with.
3568          * (Note that since pfil doesn't understand ARP it will pass *ALL*
3569          * ARP traffic.)
3570          */
3571         switch (ether_type) {
3572         case ETHERTYPE_ARP:
3573         case ETHERTYPE_REVARP:
3574                 return (0); /* Automatically pass */
3575
3576         case ETHERTYPE_IP:
3577 #ifdef INET6
3578         case ETHERTYPE_IPV6:
3579 #endif /* INET6 */
3580                 break;
3581
3582         default:
3583                 /*
3584                  * Check to see if the user wants to pass non-ip
3585                  * packets, these will not be checked by pfil(9)
3586                  * and passed unconditionally so the default is to drop.
3587                  */
3588                 if (pfil_onlyip)
3589                         goto bad;
3590         }
3591
3592         /* Strip off the Ethernet header and keep a copy. */
3593         m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t) &eh2);
3594         m_adj(*mp, ETHER_HDR_LEN);
3595
3596         /* Strip off snap header, if present */
3597         if (snap) {
3598                 m_copydata(*mp, 0, sizeof(struct llc), (caddr_t) &llc1);
3599                 m_adj(*mp, sizeof(struct llc));
3600         }
3601
3602         /*
3603          * Check the IP header for alignment and errors
3604          */
3605         if (dir == PFIL_IN) {
3606                 switch (ether_type) {
3607                 case ETHERTYPE_IP:
3608                         error = bridge_ip_checkbasic(mp);
3609                         break;
3610 #ifdef INET6
3611                 case ETHERTYPE_IPV6:
3612                         error = bridge_ip6_checkbasic(mp);
3613                         break;
3614 #endif /* INET6 */
3615                 default:
3616                         error = 0;
3617                 }
3618                 if (error)
3619                         goto bad;
3620         }
3621
3622         error = 0;
3623
3624         /*
3625          * Run the packet through pfil
3626          */
3627         switch (ether_type) {
3628         case ETHERTYPE_IP:
3629                 /*
3630                  * before calling the firewall, swap fields the same as
3631                  * IP does. here we assume the header is contiguous
3632                  */
3633                 ip = mtod(*mp, struct ip *);
3634
3635                 ip->ip_len = ntohs(ip->ip_len);
3636                 ip->ip_off = ntohs(ip->ip_off);
3637
3638                 /*
3639                  * Run pfil on the member interface and the bridge, both can
3640                  * be skipped by clearing pfil_member or pfil_bridge.
3641                  *
3642                  * Keep the order:
3643                  *   in_if -> bridge_if -> out_if
3644                  */
3645                 if (pfil_bridge && dir == PFIL_OUT && bifp != NULL) {
3646                         error = pfil_run_hooks(&inet_pfil_hook, mp, bifp, dir);
3647                         if (*mp == NULL || error != 0) /* filter may consume */
3648                                 break;
3649                         error = bridge_post_pfil(*mp);
3650                         if (error)
3651                                 break;
3652                 }
3653
3654                 if (pfil_member && ifp != NULL) {
3655                         error = pfil_run_hooks(&inet_pfil_hook, mp, ifp, dir);
3656                         if (*mp == NULL || error != 0) /* filter may consume */
3657                                 break;
3658                         error = bridge_post_pfil(*mp);
3659                         if (error)
3660                                 break;
3661                 }
3662
3663                 if (pfil_bridge && dir == PFIL_IN && bifp != NULL) {
3664                         error = pfil_run_hooks(&inet_pfil_hook, mp, bifp, dir);
3665                         if (*mp == NULL || error != 0) /* filter may consume */
3666                                 break;
3667                         error = bridge_post_pfil(*mp);
3668                         if (error)
3669                                 break;
3670                 }
3671
3672                 /* check if we need to fragment the packet */
3673                 if (pfil_member && ifp != NULL && dir == PFIL_OUT) {
3674                         i = (*mp)->m_pkthdr.len;
3675                         if (i > ifp->if_mtu) {
3676                                 error = bridge_fragment(ifp, *mp, &eh2, snap,
3677                                             &llc1);
3678                                 return (error);
3679                         }
3680                 }
3681
3682                 /* Recalculate the ip checksum and restore byte ordering */
3683                 ip = mtod(*mp, struct ip *);
3684                 hlen = ip->ip_hl << 2;
3685                 if (hlen < sizeof(struct ip))
3686                         goto bad;
3687                 if (hlen > (*mp)->m_len) {
3688                         if ((*mp = m_pullup(*mp, hlen)) == 0)
3689                                 goto bad;
3690                         ip = mtod(*mp, struct ip *);
3691                         if (ip == NULL)
3692                                 goto bad;
3693                 }
3694                 ip->ip_len = htons(ip->ip_len);
3695                 ip->ip_off = htons(ip->ip_off);
3696                 ip->ip_sum = 0;
3697                 if (hlen == sizeof(struct ip))
3698                         ip->ip_sum = in_cksum_hdr(ip);
3699                 else
3700                         ip->ip_sum = in_cksum(*mp, hlen);
3701
3702                 break;
3703 #ifdef INET6
3704         case ETHERTYPE_IPV6:
3705                 if (pfil_bridge && dir == PFIL_OUT && bifp != NULL)
3706                         error = pfil_run_hooks(&inet6_pfil_hook, mp, bifp,
3707                                         dir);
3708
3709                 if (*mp == NULL || error != 0) /* filter may consume */
3710                         break;
3711
3712                 if (pfil_member && ifp != NULL)
3713                         error = pfil_run_hooks(&inet6_pfil_hook, mp, ifp,
3714                                         dir);
3715
3716                 if (*mp == NULL || error != 0) /* filter may consume */
3717                         break;
3718
3719                 if (pfil_bridge && dir == PFIL_IN && bifp != NULL)
3720                         error = pfil_run_hooks(&inet6_pfil_hook, mp, bifp,
3721                                         dir);
3722                 break;
3723 #endif
3724         default:
3725                 error = 0;
3726                 break;
3727         }
3728
3729         if (*mp == NULL)
3730                 return (error);
3731         if (error != 0)
3732                 goto bad;
3733
3734         error = -1;
3735
3736         /*
3737          * Finally, put everything back the way it was and return
3738          */
3739         if (snap) {
3740                 M_PREPEND(*mp, sizeof(struct llc), MB_DONTWAIT);
3741                 if (*mp == NULL)
3742                         return (error);
3743                 bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
3744         }
3745
3746         M_PREPEND(*mp, ETHER_HDR_LEN, MB_DONTWAIT);
3747         if (*mp == NULL)
3748                 return (error);
3749         bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
3750
3751         return (0);
3752
3753 bad:
3754         m_freem(*mp);
3755         *mp = NULL;
3756         return (error);
3757 }
3758
3759 /*
3760  * Perform basic checks on header size since
3761  * pfil assumes ip_input has already processed
3762  * it for it.  Cut-and-pasted from ip_input.c.
3763  * Given how simple the IPv6 version is,
3764  * does the IPv4 version really need to be
3765  * this complicated?
3766  *
3767  * XXX Should we update ipstat here, or not?
3768  * XXX Right now we update ipstat but not
3769  * XXX csum_counter.
3770  */
3771 static int
3772 bridge_ip_checkbasic(struct mbuf **mp)
3773 {
3774         struct mbuf *m = *mp;
3775         struct ip *ip;
3776         int len, hlen;
3777         u_short sum;
3778
3779         if (*mp == NULL)
3780                 return (-1);
3781 #if notyet
3782         if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
3783                 if ((m = m_copyup(m, sizeof(struct ip),
3784                         (max_linkhdr + 3) & ~3)) == NULL) {
3785                         /* XXXJRT new stat, please */
3786                         ipstat.ips_toosmall++;
3787                         goto bad;
3788                 }
3789         } else
3790 #endif
3791 #ifndef __predict_false
3792 #define __predict_false(x) x
3793 #endif
3794          if (__predict_false(m->m_len < sizeof (struct ip))) {
3795                 if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
3796                         ipstat.ips_toosmall++;
3797                         goto bad;
3798                 }
3799         }
3800         ip = mtod(m, struct ip *);
3801         if (ip == NULL) goto bad;
3802
3803         if (ip->ip_v != IPVERSION) {
3804                 ipstat.ips_badvers++;
3805                 goto bad;
3806         }
3807         hlen = ip->ip_hl << 2;
3808         if (hlen < sizeof(struct ip)) { /* minimum header length */
3809                 ipstat.ips_badhlen++;
3810                 goto bad;
3811         }
3812         if (hlen > m->m_len) {
3813                 if ((m = m_pullup(m, hlen)) == 0) {
3814                         ipstat.ips_badhlen++;
3815                         goto bad;
3816                 }
3817                 ip = mtod(m, struct ip *);
3818                 if (ip == NULL) goto bad;
3819         }
3820
3821         if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
3822                 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
3823         } else {
3824                 if (hlen == sizeof(struct ip)) {
3825                         sum = in_cksum_hdr(ip);
3826                 } else {
3827                         sum = in_cksum(m, hlen);
3828                 }
3829         }
3830         if (sum) {
3831                 ipstat.ips_badsum++;
3832                 goto bad;
3833         }
3834
3835         /* Retrieve the packet length. */
3836         len = ntohs(ip->ip_len);
3837
3838         /*
3839          * Check for additional length bogosity
3840          */
3841         if (len < hlen) {
3842                 ipstat.ips_badlen++;
3843                 goto bad;
3844         }
3845
3846         /*
3847          * Check that the amount of data in the buffers
3848          * is as at least much as the IP header would have us expect.
3849          * Drop packet if shorter than we expect.
3850          */
3851         if (m->m_pkthdr.len < len) {
3852                 ipstat.ips_tooshort++;
3853                 goto bad;
3854         }
3855
3856         /* Checks out, proceed */
3857         *mp = m;
3858         return (0);
3859
3860 bad:
3861         *mp = m;
3862         return (-1);
3863 }
3864
3865 #ifdef INET6
3866 /*
3867  * Same as above, but for IPv6.
3868  * Cut-and-pasted from ip6_input.c.
3869  * XXX Should we update ip6stat, or not?
3870  */
3871 static int
3872 bridge_ip6_checkbasic(struct mbuf **mp)
3873 {
3874         struct mbuf *m = *mp;
3875         struct ip6_hdr *ip6;
3876
3877         /*
3878          * If the IPv6 header is not aligned, slurp it up into a new
3879          * mbuf with space for link headers, in the event we forward
3880          * it.  Otherwise, if it is aligned, make sure the entire base
3881          * IPv6 header is in the first mbuf of the chain.
3882          */
3883 #if notyet
3884         if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
3885                 struct ifnet *inifp = m->m_pkthdr.rcvif;
3886                 if ((m = m_copyup(m, sizeof(struct ip6_hdr),
3887                             (max_linkhdr + 3) & ~3)) == NULL) {
3888                         /* XXXJRT new stat, please */
3889                         ip6stat.ip6s_toosmall++;
3890                         in6_ifstat_inc(inifp, ifs6_in_hdrerr);
3891                         goto bad;
3892                 }
3893         } else
3894 #endif
3895         if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
3896                 struct ifnet *inifp = m->m_pkthdr.rcvif;
3897                 if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
3898                         ip6stat.ip6s_toosmall++;
3899                         in6_ifstat_inc(inifp, ifs6_in_hdrerr);
3900                         goto bad;
3901                 }
3902         }
3903
3904         ip6 = mtod(m, struct ip6_hdr *);
3905
3906         if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
3907                 ip6stat.ip6s_badvers++;
3908                 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
3909                 goto bad;
3910         }
3911
3912         /* Checks out, proceed */
3913         *mp = m;
3914         return (0);
3915
3916 bad:
3917         *mp = m;
3918         return (-1);
3919 }
3920 #endif /* INET6 */
3921
3922 /*
3923  * bridge_fragment:
3924  *
3925  *      Return a fragmented mbuf chain.
3926  */
3927 static int
3928 bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
3929     int snap, struct llc *llc)
3930 {
3931         struct mbuf *m0;
3932         struct ip *ip;
3933         int error = -1;
3934
3935         if (m->m_len < sizeof(struct ip) &&
3936             (m = m_pullup(m, sizeof(struct ip))) == NULL)
3937                 goto out;
3938         ip = mtod(m, struct ip *);
3939
3940         error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist,
3941                     CSUM_DELAY_IP);
3942         if (error)
3943                 goto out;
3944
3945         /* walk the chain and re-add the Ethernet header */
3946         for (m0 = m; m0; m0 = m0->m_nextpkt) {
3947                 if (error == 0) {
3948                         if (snap) {
3949                                 M_PREPEND(m0, sizeof(struct llc), MB_DONTWAIT);
3950                                 if (m0 == NULL) {
3951                                         error = ENOBUFS;
3952                                         continue;
3953                                 }
3954                                 bcopy(llc, mtod(m0, caddr_t),
3955                                     sizeof(struct llc));
3956                         }
3957                         M_PREPEND(m0, ETHER_HDR_LEN, MB_DONTWAIT);
3958                         if (m0 == NULL) {
3959                                 error = ENOBUFS;
3960                                 continue;
3961                         }
3962                         bcopy(eh, mtod(m0, caddr_t), ETHER_HDR_LEN);
3963                 } else 
3964                         m_freem(m);
3965         }
3966
3967         if (error == 0)
3968                 ipstat.ips_fragmented++;
3969
3970         return (error);
3971
3972 out:
3973         if (m != NULL)
3974                 m_freem(m);
3975         return (error);
3976 }
3977
3978 static void
3979 bridge_enqueue_handler(netmsg_t msg)
3980 {
3981         struct netmsg_packet *nmp;
3982         struct ifnet *dst_ifp;
3983         struct mbuf *m;
3984
3985         nmp = &msg->packet;
3986         m = nmp->nm_packet;
3987         dst_ifp = nmp->base.lmsg.u.ms_resultp;
3988
3989         bridge_handoff(dst_ifp->if_bridge, dst_ifp, m, 1);
3990 }
3991
3992 static void
3993 bridge_handoff(struct bridge_softc *sc, struct ifnet *dst_ifp,
3994                struct mbuf *m, int from_us)
3995 {
3996         struct mbuf *m0;
3997         struct ifnet *bifp;
3998
3999         bifp = sc->sc_ifp;
4000
4001         /* We may be sending a fragment so traverse the mbuf */
4002         for (; m; m = m0) {
4003                 struct altq_pktattr pktattr;
4004
4005                 m0 = m->m_nextpkt;
4006                 m->m_nextpkt = NULL;
4007
4008                 /*
4009                  * If being sent from our host override ether_shost
4010                  * with the bridge MAC.  This is mandatory for ARP
4011                  * so things don't get confused.  In particular we
4012                  * don't want ARPs to get associated with link interfaces
4013                  * under the bridge which might or might not stay valid.
4014                  *
4015                  * Also override ether_shost when relaying a packet out
4016                  * the same interface it came in on, due to multi-homed
4017                  * addresses & default routes, otherwise switches will
4018                  * get very confused.
4019                  *
4020                  * Otherwise if we are in transparent mode.
4021                  */
4022                 if (from_us || m->m_pkthdr.rcvif == dst_ifp) {
4023                         m_copyback(m,
4024                                    offsetof(struct ether_header, ether_shost),
4025                                    ETHER_ADDR_LEN, IF_LLADDR(sc->sc_ifp));
4026                 } else if ((bifp->if_flags & IFF_LINK0) &&
4027                            (m->m_pkthdr.fw_flags & BRIDGE_MBUF_TAGGED)) {
4028                         m_copyback(m,
4029                                    offsetof(struct ether_header, ether_shost),
4030                                    ETHER_ADDR_LEN,
4031                                    m->m_pkthdr.br.ether.ether_shost);
4032                 } /* else retain shost */
4033
4034                 if (ifq_is_enabled(&dst_ifp->if_snd))
4035                         altq_etherclassify(&dst_ifp->if_snd, m, &pktattr);
4036
4037                 ifq_dispatch(dst_ifp, m, &pktattr);
4038         }
4039 }
4040
4041 static void
4042 bridge_control_dispatch(netmsg_t msg)
4043 {
4044         struct netmsg_brctl *bc_msg = (struct netmsg_brctl *)msg;
4045         struct ifnet *bifp = bc_msg->bc_sc->sc_ifp;
4046         int error;
4047
4048         ifnet_serialize_all(bifp);
4049         error = bc_msg->bc_func(bc_msg->bc_sc, bc_msg->bc_arg);
4050         ifnet_deserialize_all(bifp);
4051
4052         lwkt_replymsg(&bc_msg->base.lmsg, error);
4053 }
4054
4055 static int
4056 bridge_control(struct bridge_softc *sc, u_long cmd,
4057                bridge_ctl_t bc_func, void *bc_arg)
4058 {
4059         struct ifnet *bifp = sc->sc_ifp;
4060         struct netmsg_brctl bc_msg;
4061         int error;
4062
4063         ASSERT_IFNET_SERIALIZED_ALL(bifp);
4064
4065         bzero(&bc_msg, sizeof(bc_msg));
4066
4067         netmsg_init(&bc_msg.base, NULL, &curthread->td_msgport,
4068                     0, bridge_control_dispatch);
4069         bc_msg.bc_func = bc_func;
4070         bc_msg.bc_sc = sc;
4071         bc_msg.bc_arg = bc_arg;
4072
4073         ifnet_deserialize_all(bifp);
4074         error = lwkt_domsg(BRIDGE_CFGPORT, &bc_msg.base.lmsg, 0);
4075         ifnet_serialize_all(bifp);
4076         return error;
4077 }
4078
4079 static void
4080 bridge_add_bif_handler(netmsg_t msg)
4081 {
4082         struct netmsg_braddbif *amsg = (struct netmsg_braddbif *)msg;
4083         struct bridge_softc *sc;
4084         struct bridge_iflist *bif;
4085
4086         sc = amsg->br_softc;
4087
4088         bif = kmalloc(sizeof(*bif), M_DEVBUF, M_WAITOK | M_ZERO);
4089         bif->bif_ifp = amsg->br_bif_ifp;
4090         bif->bif_onlist = 1;
4091         bif->bif_info = amsg->br_bif_info;
4092
4093         /*
4094          * runs through bif_info
4095          */
4096         bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
4097
4098         TAILQ_INSERT_HEAD(&sc->sc_iflists[mycpuid], bif, bif_next);
4099
4100         ifnet_forwardmsg(&amsg->base.lmsg, mycpuid + 1);
4101 }
4102
4103 static void
4104 bridge_add_bif(struct bridge_softc *sc, struct bridge_ifinfo *bif_info,
4105                struct ifnet *ifp)
4106 {
4107         struct netmsg_braddbif amsg;
4108
4109         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
4110
4111         netmsg_init(&amsg.base, NULL, &curthread->td_msgport,
4112                     0, bridge_add_bif_handler);
4113         amsg.br_softc = sc;
4114         amsg.br_bif_info = bif_info;
4115         amsg.br_bif_ifp = ifp;
4116
4117         ifnet_domsg(&amsg.base.lmsg, 0);
4118 }
4119
4120 static void
4121 bridge_del_bif_handler(netmsg_t msg)
4122 {
4123         struct netmsg_brdelbif *dmsg = (struct netmsg_brdelbif *)msg;
4124         struct bridge_softc *sc;
4125         struct bridge_iflist *bif;
4126
4127         sc = dmsg->br_softc;
4128
4129         /*
4130          * Locate the bif associated with the br_bif_info
4131          * on the current CPU
4132          */
4133         bif = bridge_lookup_member_ifinfo(sc, dmsg->br_bif_info);
4134         KKASSERT(bif != NULL && bif->bif_onlist);
4135
4136         /* Remove the bif from the current CPU's iflist */
4137         bif->bif_onlist = 0;
4138         TAILQ_REMOVE(dmsg->br_bif_list, bif, bif_next);
4139
4140         /* Save the removed bif for later freeing */
4141         TAILQ_INSERT_HEAD(dmsg->br_bif_list, bif, bif_next);
4142
4143         ifnet_forwardmsg(&dmsg->base.lmsg, mycpuid + 1);
4144 }
4145
4146 static void
4147 bridge_del_bif(struct bridge_softc *sc, struct bridge_ifinfo *bif_info,
4148                struct bridge_iflist_head *saved_bifs)
4149 {
4150         struct netmsg_brdelbif dmsg;
4151
4152         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
4153
4154         netmsg_init(&dmsg.base, NULL, &curthread->td_msgport,
4155                     0, bridge_del_bif_handler);
4156         dmsg.br_softc = sc;
4157         dmsg.br_bif_info = bif_info;
4158         dmsg.br_bif_list = saved_bifs;
4159
4160         ifnet_domsg(&dmsg.base.lmsg, 0);
4161 }