Merge branch 'vendor/GCC50'
[dragonfly.git] / sys / net / bridge / if_bridge.c
1 /*
2  * Copyright 2001 Wasabi Systems, Inc.
3  * All rights reserved.
4  *
5  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *      This product includes software developed for the NetBSD Project by
18  *      Wasabi Systems, Inc.
19  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
20  *    or promote products derived from this software without specific prior
21  *    written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
27  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /*
37  * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
38  * All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. All advertising materials mentioning features or use of this software
49  *    must display the following acknowledgement:
50  *      This product includes software developed by Jason L. Wright
51  * 4. The name of the author may not be used to endorse or promote products
52  *    derived from this software without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
55  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
56  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
57  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
58  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
59  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
60  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
62  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
63  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
64  * POSSIBILITY OF SUCH DAMAGE.
65  *
66  * $OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp $
67  * $NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $
68  * $FreeBSD: src/sys/net/if_bridge.c,v 1.26 2005/10/13 23:05:55 thompsa Exp $
69  */
70
71 /*
72  * Network interface bridge support.
73  *
74  * TODO:
75  *
76  *      - Currently only supports Ethernet-like interfaces (Ethernet,
77  *        802.11, VLANs on Ethernet, etc.)  Figure out a nice way
78  *        to bridge other types of interfaces (FDDI-FDDI, and maybe
79  *        consider heterogenous bridges).
80  *
81  *
82  * Bridge's route information is duplicated to each CPUs:
83  *
84  *      CPU0          CPU1          CPU2          CPU3
85  * +-----------+ +-----------+ +-----------+ +-----------+
86  * |  rtnode   | |  rtnode   | |  rtnode   | |  rtnode   |
87  * |           | |           | |           | |           |
88  * | dst eaddr | | dst eaddr | | dst eaddr | | dst eaddr |
89  * +-----------+ +-----------+ +-----------+ +-----------+
90  *       |         |                     |         |
91  *       |         |                     |         |
92  *       |         |     +----------+    |         |
93  *       |         |     |  rtinfo  |    |         |
94  *       |         +---->|          |<---+         |
95  *       |               |  flags   |              |
96  *       +-------------->|  timeout |<-------------+
97  *                       |  dst_ifp |
98  *                       +----------+
99  *
100  * We choose to put timeout and dst_ifp into shared part, so updating
101  * them will be cheaper than using message forwarding.  Also there is
102  * not need to use spinlock to protect the updating: timeout and dst_ifp
103  * is not related and specific field's updating order has no importance.
104  * The cache pollution by the share part should not be heavy: in a stable
105  * setup, dst_ifp probably will be not changed in rtnode's life time,
106  * while timeout is refreshed once per second; most of the time, timeout
107  * and dst_ifp are read-only accessed.
108  *
109  *
110  * Bridge route information installation on bridge_input path:
111  *
112  *      CPU0           CPU1         CPU2          CPU3
113  *
114  *                               tcp_thread2
115  *                                    |
116  *                                alloc nmsg
117  *                    snd nmsg        |
118  *                    w/o rtinfo      |
119  *      ifnet0<-----------------------+
120  *        |                           :
121  *    lookup dst                      :
122  *   rtnode exists?(Y)free nmsg       :
123  *        |(N)                        :
124  *        |
125  *  alloc rtinfo
126  *  alloc rtnode
127  * install rtnode
128  *        |
129  *        +---------->ifnet1
130  *        : fwd nmsg    |
131  *        : w/ rtinfo   |
132  *        :             |
133  *        :             |
134  *                 alloc rtnode
135  *               (w/ nmsg's rtinfo)
136  *                install rtnode
137  *                      |
138  *                      +---------->ifnet2
139  *                      : fwd nmsg    |
140  *                      : w/ rtinfo   |
141  *                      :             |
142  *                      :         same as ifnet1
143  *                                    |
144  *                                    +---------->ifnet3
145  *                                    : fwd nmsg    |
146  *                                    : w/ rtinfo   |
147  *                                    :             |
148  *                                    :         same as ifnet1
149  *                                               free nmsg
150  *                                                  :
151  *                                                  :
152  *
153  * The netmsgs forwarded between protocol threads and ifnet threads are
154  * allocated with (M_WAITOK|M_NULLOK), so it will not fail under most
155  * cases (route information is too precious to be not installed :).
156  * Since multiple threads may try to install route information for the
157  * same dst eaddr, we look up route information in ifnet0.  However, this
158  * looking up only need to be performed on ifnet0, which is the start
159  * point of the route information installation process.
160  *
161  *
162  * Bridge route information deleting/flushing:
163  *
164  *  CPU0            CPU1             CPU2             CPU3
165  *
166  * netisr0
167  *   |
168  * find suitable rtnodes,
169  * mark their rtinfo dead
170  *   |
171  *   | domsg <------------------------------------------+
172  *   |                                                  | replymsg
173  *   |                                                  |
174  *   V     fwdmsg           fwdmsg           fwdmsg     |
175  * ifnet0 --------> ifnet1 --------> ifnet2 --------> ifnet3
176  * delete rtnodes   delete rtnodes   delete rtnodes   delete rtnodes
177  * w/ dead rtinfo   w/ dead rtinfo   w/ dead rtinfo   w/ dead rtinfo
178  *                                                    free dead rtinfos
179  *
180  * All deleting/flushing operations are serialized by netisr0, so each
181  * operation only reaps the route information marked dead by itself.
182  *
183  *
184  * Bridge route information adding/deleting/flushing:
185  * Since all operation is serialized by the fixed message flow between
186  * ifnet threads, it is not possible to create corrupted per-cpu route
187  * information.
188  *
189  *
190  *
191  * Percpu member interface list iteration with blocking operation:
192  * Since one bridge could only delete one member interface at a time and
193  * the deleted member interface is not freed after netmsg_service_sync(),
194  * following way is used to make sure that even if the certain member
195  * interface is ripped from the percpu list during the blocking operation,
196  * the iteration still could keep going:
197  *
198  * TAILQ_FOREACH_MUTABLE(bif, sc->sc_iflists[mycpuid], bif_next, nbif) {
199  *     blocking operation;
200  *     blocking operation;
201  *     ...
202  *     ...
203  *     if (nbif != NULL && !nbif->bif_onlist) {
204  *         KKASSERT(bif->bif_onlist);
205  *         nbif = TAILQ_NEXT(bif, bif_next);
206  *     }
207  * }
208  *
209  * As mentioned above only one member interface could be unlinked from the
210  * percpu member interface list, so either bif or nbif may be not on the list,
211  * but _not_ both.  To keep the list iteration, we don't care about bif, but
212  * only nbif.  Since removed member interface will only be freed after we
213  * finish our work, it is safe to access any field in an unlinked bif (here
214  * bif_onlist).  If nbif is no longer on the list, then bif must be on the
215  * list, so we change nbif to the next element of bif and keep going.
216  */
217
218 #include "opt_inet.h"
219 #include "opt_inet6.h"
220
221 #include <sys/param.h>
222 #include <sys/mbuf.h>
223 #include <sys/malloc.h>
224 #include <sys/protosw.h>
225 #include <sys/systm.h>
226 #include <sys/time.h>
227 #include <sys/socket.h> /* for net/if.h */
228 #include <sys/sockio.h>
229 #include <sys/ctype.h>  /* string functions */
230 #include <sys/kernel.h>
231 #include <sys/random.h>
232 #include <sys/sysctl.h>
233 #include <sys/module.h>
234 #include <sys/proc.h>
235 #include <sys/priv.h>
236 #include <sys/lock.h>
237 #include <sys/thread.h>
238 #include <sys/thread2.h>
239 #include <sys/mpipe.h>
240
241 #include <net/bpf.h>
242 #include <net/if.h>
243 #include <net/if_dl.h>
244 #include <net/if_types.h>
245 #include <net/if_var.h>
246 #include <net/pfil.h>
247 #include <net/ifq_var.h>
248 #include <net/if_clone.h>
249
250 #include <netinet/in.h> /* for struct arpcom */
251 #include <netinet/in_systm.h>
252 #include <netinet/in_var.h>
253 #include <netinet/ip.h>
254 #include <netinet/ip_var.h>
255 #ifdef INET6
256 #include <netinet/ip6.h>
257 #include <netinet6/ip6_var.h>
258 #endif
259 #include <netinet/if_ether.h> /* for struct arpcom */
260 #include <net/bridge/if_bridgevar.h>
261 #include <net/if_llc.h>
262 #include <net/netmsg2.h>
263 #include <net/netisr2.h>
264
265 #include <net/route.h>
266 #include <sys/in_cksum.h>
267
268 /*
269  * Size of the route hash table.  Must be a power of two.
270  */
271 #ifndef BRIDGE_RTHASH_SIZE
272 #define BRIDGE_RTHASH_SIZE              1024
273 #endif
274
275 #define BRIDGE_RTHASH_MASK              (BRIDGE_RTHASH_SIZE - 1)
276
277 /*
278  * Maximum number of addresses to cache.
279  */
280 #ifndef BRIDGE_RTABLE_MAX
281 #define BRIDGE_RTABLE_MAX               4096
282 #endif
283
284 /*
285  * Spanning tree defaults.
286  */
287 #define BSTP_DEFAULT_MAX_AGE            (20 * 256)
288 #define BSTP_DEFAULT_HELLO_TIME         (2 * 256)
289 #define BSTP_DEFAULT_FORWARD_DELAY      (15 * 256)
290 #define BSTP_DEFAULT_HOLD_TIME          (1 * 256)
291 #define BSTP_DEFAULT_BRIDGE_PRIORITY    0x8000
292 #define BSTP_DEFAULT_PORT_PRIORITY      0x80
293 #define BSTP_DEFAULT_PATH_COST          55
294
295 /*
296  * Timeout (in seconds) for entries learned dynamically.
297  */
298 #ifndef BRIDGE_RTABLE_TIMEOUT
299 #define BRIDGE_RTABLE_TIMEOUT           (20 * 60)       /* same as ARP */
300 #endif
301
302 /*
303  * Number of seconds between walks of the route list.
304  */
305 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
306 #define BRIDGE_RTABLE_PRUNE_PERIOD      (5 * 60)
307 #endif
308
309 /*
310  * List of capabilities to mask on the member interface.
311  */
312 #define BRIDGE_IFCAPS_MASK              (IFCAP_TXCSUM | IFCAP_TSO)
313
314 typedef int     (*bridge_ctl_t)(struct bridge_softc *, void *);
315
316 struct netmsg_brctl {
317         struct netmsg_base      base;
318         bridge_ctl_t            bc_func;
319         struct bridge_softc     *bc_sc;
320         void                    *bc_arg;
321 };
322
323 struct netmsg_brsaddr {
324         struct netmsg_base      base;
325         struct bridge_softc     *br_softc;
326         struct ifnet            *br_dst_if;
327         struct bridge_rtinfo    *br_rtinfo;
328         int                     br_setflags;
329         uint8_t                 br_dst[ETHER_ADDR_LEN];
330         uint8_t                 br_flags;
331 };
332
333 struct netmsg_braddbif {
334         struct netmsg_base      base;
335         struct bridge_softc     *br_softc;
336         struct bridge_ifinfo    *br_bif_info;
337         struct ifnet            *br_bif_ifp;
338 };
339
340 struct netmsg_brdelbif {
341         struct netmsg_base      base;
342         struct bridge_softc     *br_softc;
343         struct bridge_ifinfo    *br_bif_info;
344         struct bridge_iflist_head *br_bif_list;
345 };
346
347 struct netmsg_brsflags {
348         struct netmsg_base      base;
349         struct bridge_softc     *br_softc;
350         struct bridge_ifinfo    *br_bif_info;
351         uint32_t                br_bif_flags;
352 };
353
354 eventhandler_tag        bridge_detach_cookie = NULL;
355
356 extern  struct mbuf *(*bridge_input_p)(struct ifnet *, struct mbuf *);
357 extern  int (*bridge_output_p)(struct ifnet *, struct mbuf *);
358 extern  void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
359 extern  struct ifnet *(*bridge_interface_p)(void *if_bridge);
360
361 static int      bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
362
363 static int      bridge_clone_create(struct if_clone *, int, caddr_t);
364 static int      bridge_clone_destroy(struct ifnet *);
365
366 static int      bridge_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
367 static void     bridge_mutecaps(struct bridge_ifinfo *, struct ifnet *, int);
368 static void     bridge_ifdetach(void *, struct ifnet *);
369 static void     bridge_init(void *);
370 static int      bridge_from_us(struct bridge_softc *, struct ether_header *);
371 static void     bridge_stop(struct ifnet *);
372 static void     bridge_start(struct ifnet *, struct ifaltq_subque *);
373 static struct mbuf *bridge_input(struct ifnet *, struct mbuf *);
374 static int      bridge_output(struct ifnet *, struct mbuf *);
375 static struct ifnet *bridge_interface(void *if_bridge);
376
377 static void     bridge_forward(struct bridge_softc *, struct mbuf *m);
378
379 static void     bridge_timer_handler(netmsg_t);
380 static void     bridge_timer(void *);
381
382 static void     bridge_start_bcast(struct bridge_softc *, struct mbuf *);
383 static void     bridge_broadcast(struct bridge_softc *, struct ifnet *,
384                     struct mbuf *);
385 static void     bridge_span(struct bridge_softc *, struct mbuf *);
386
387 static int      bridge_rtupdate(struct bridge_softc *, const uint8_t *,
388                     struct ifnet *, uint8_t);
389 static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *);
390 static void     bridge_rtreap(struct bridge_softc *);
391 static void     bridge_rtreap_async(struct bridge_softc *);
392 static void     bridge_rttrim(struct bridge_softc *);
393 static int      bridge_rtage_finddead(struct bridge_softc *);
394 static void     bridge_rtage(struct bridge_softc *);
395 static void     bridge_rtflush(struct bridge_softc *, int);
396 static int      bridge_rtdaddr(struct bridge_softc *, const uint8_t *);
397 static int      bridge_rtsaddr(struct bridge_softc *, const uint8_t *,
398                     struct ifnet *, uint8_t);
399 static void     bridge_rtmsg_sync(struct bridge_softc *sc);
400 static void     bridge_rtreap_handler(netmsg_t);
401 static void     bridge_rtinstall_handler(netmsg_t);
402 static int      bridge_rtinstall_oncpu(struct bridge_softc *, const uint8_t *,
403                     struct ifnet *, int, uint8_t, struct bridge_rtinfo **);
404
405 static void     bridge_rtable_init(struct bridge_softc *);
406 static void     bridge_rtable_fini(struct bridge_softc *);
407
408 static int      bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
409 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
410                     const uint8_t *);
411 static void     bridge_rtnode_insert(struct bridge_softc *,
412                     struct bridge_rtnode *);
413 static void     bridge_rtnode_destroy(struct bridge_softc *,
414                     struct bridge_rtnode *);
415
416 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
417                     const char *name);
418 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
419                     struct ifnet *ifp);
420 static struct bridge_iflist *bridge_lookup_member_ifinfo(struct bridge_softc *,
421                     struct bridge_ifinfo *);
422 static void     bridge_delete_member(struct bridge_softc *,
423                     struct bridge_iflist *, int);
424 static void     bridge_delete_span(struct bridge_softc *,
425                     struct bridge_iflist *);
426
427 static int      bridge_control(struct bridge_softc *, u_long,
428                                bridge_ctl_t, void *);
429 static int      bridge_ioctl_init(struct bridge_softc *, void *);
430 static int      bridge_ioctl_stop(struct bridge_softc *, void *);
431 static int      bridge_ioctl_add(struct bridge_softc *, void *);
432 static int      bridge_ioctl_del(struct bridge_softc *, void *);
433 static void     bridge_ioctl_fillflags(struct bridge_softc *sc,
434                                 struct bridge_iflist *bif, struct ifbreq *req);
435 static int      bridge_ioctl_gifflags(struct bridge_softc *, void *);
436 static int      bridge_ioctl_sifflags(struct bridge_softc *, void *);
437 static int      bridge_ioctl_scache(struct bridge_softc *, void *);
438 static int      bridge_ioctl_gcache(struct bridge_softc *, void *);
439 static int      bridge_ioctl_gifs(struct bridge_softc *, void *);
440 static int      bridge_ioctl_rts(struct bridge_softc *, void *);
441 static int      bridge_ioctl_saddr(struct bridge_softc *, void *);
442 static int      bridge_ioctl_sto(struct bridge_softc *, void *);
443 static int      bridge_ioctl_gto(struct bridge_softc *, void *);
444 static int      bridge_ioctl_daddr(struct bridge_softc *, void *);
445 static int      bridge_ioctl_flush(struct bridge_softc *, void *);
446 static int      bridge_ioctl_gpri(struct bridge_softc *, void *);
447 static int      bridge_ioctl_spri(struct bridge_softc *, void *);
448 static int      bridge_ioctl_reinit(struct bridge_softc *, void *);
449 static int      bridge_ioctl_ght(struct bridge_softc *, void *);
450 static int      bridge_ioctl_sht(struct bridge_softc *, void *);
451 static int      bridge_ioctl_gfd(struct bridge_softc *, void *);
452 static int      bridge_ioctl_sfd(struct bridge_softc *, void *);
453 static int      bridge_ioctl_gma(struct bridge_softc *, void *);
454 static int      bridge_ioctl_sma(struct bridge_softc *, void *);
455 static int      bridge_ioctl_sifprio(struct bridge_softc *, void *);
456 static int      bridge_ioctl_sifcost(struct bridge_softc *, void *);
457 static int      bridge_ioctl_addspan(struct bridge_softc *, void *);
458 static int      bridge_ioctl_delspan(struct bridge_softc *, void *);
459 static int      bridge_ioctl_sifbondwght(struct bridge_softc *, void *);
460 static int      bridge_pfil(struct mbuf **, struct ifnet *, struct ifnet *,
461                     int);
462 static int      bridge_ip_checkbasic(struct mbuf **mp);
463 #ifdef INET6
464 static int      bridge_ip6_checkbasic(struct mbuf **mp);
465 #endif /* INET6 */
466 static int      bridge_fragment(struct ifnet *, struct mbuf *,
467                     struct ether_header *, int, struct llc *);
468 static void     bridge_enqueue_handler(netmsg_t);
469 static void     bridge_handoff(struct bridge_softc *, struct ifnet *,
470                     struct mbuf *, int);
471
472 static void     bridge_del_bif_handler(netmsg_t);
473 static void     bridge_add_bif_handler(netmsg_t);
474 static void     bridge_del_bif(struct bridge_softc *, struct bridge_ifinfo *,
475                     struct bridge_iflist_head *);
476 static void     bridge_add_bif(struct bridge_softc *, struct bridge_ifinfo *,
477                     struct ifnet *);
478
479 SYSCTL_DECL(_net_link);
480 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW, 0, "Bridge");
481
482 static int pfil_onlyip = 1; /* only pass IP[46] packets when pfil is enabled */
483 static int pfil_bridge = 1; /* run pfil hooks on the bridge interface */
484 static int pfil_member = 1; /* run pfil hooks on the member interface */
485 static int bridge_debug;
486 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip, CTLFLAG_RW,
487     &pfil_onlyip, 0, "Only pass IP packets when pfil is enabled");
488 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge, CTLFLAG_RW,
489     &pfil_bridge, 0, "Packet filter on the bridge interface");
490 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member, CTLFLAG_RW,
491     &pfil_member, 0, "Packet filter on the member interface");
492 SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW,
493     &bridge_debug, 0, "Bridge debug mode");
494
495 struct bridge_control_arg {
496         union {
497                 struct ifbreq ifbreq;
498                 struct ifbifconf ifbifconf;
499                 struct ifbareq ifbareq;
500                 struct ifbaconf ifbaconf;
501                 struct ifbrparam ifbrparam;
502         } bca_u;
503         int     bca_len;
504         void    *bca_uptr;
505         void    *bca_kptr;
506 };
507
508 struct bridge_control {
509         bridge_ctl_t    bc_func;
510         int             bc_argsize;
511         int             bc_flags;
512 };
513
514 #define BC_F_COPYIN             0x01    /* copy arguments in */
515 #define BC_F_COPYOUT            0x02    /* copy arguments out */
516 #define BC_F_SUSER              0x04    /* do super-user check */
517
518 const struct bridge_control bridge_control_table[] = {
519         { bridge_ioctl_add,             sizeof(struct ifbreq),
520           BC_F_COPYIN|BC_F_SUSER },
521         { bridge_ioctl_del,             sizeof(struct ifbreq),
522           BC_F_COPYIN|BC_F_SUSER },
523
524         { bridge_ioctl_gifflags,        sizeof(struct ifbreq),
525           BC_F_COPYIN|BC_F_COPYOUT },
526         { bridge_ioctl_sifflags,        sizeof(struct ifbreq),
527           BC_F_COPYIN|BC_F_SUSER },
528
529         { bridge_ioctl_scache,          sizeof(struct ifbrparam),
530           BC_F_COPYIN|BC_F_SUSER },
531         { bridge_ioctl_gcache,          sizeof(struct ifbrparam),
532           BC_F_COPYOUT },
533
534         { bridge_ioctl_gifs,            sizeof(struct ifbifconf),
535           BC_F_COPYIN|BC_F_COPYOUT },
536         { bridge_ioctl_rts,             sizeof(struct ifbaconf),
537           BC_F_COPYIN|BC_F_COPYOUT },
538
539         { bridge_ioctl_saddr,           sizeof(struct ifbareq),
540           BC_F_COPYIN|BC_F_SUSER },
541
542         { bridge_ioctl_sto,             sizeof(struct ifbrparam),
543           BC_F_COPYIN|BC_F_SUSER },
544         { bridge_ioctl_gto,             sizeof(struct ifbrparam),
545           BC_F_COPYOUT },
546
547         { bridge_ioctl_daddr,           sizeof(struct ifbareq),
548           BC_F_COPYIN|BC_F_SUSER },
549
550         { bridge_ioctl_flush,           sizeof(struct ifbreq),
551           BC_F_COPYIN|BC_F_SUSER },
552
553         { bridge_ioctl_gpri,            sizeof(struct ifbrparam),
554           BC_F_COPYOUT },
555         { bridge_ioctl_spri,            sizeof(struct ifbrparam),
556           BC_F_COPYIN|BC_F_SUSER },
557
558         { bridge_ioctl_ght,             sizeof(struct ifbrparam),
559           BC_F_COPYOUT },
560         { bridge_ioctl_sht,             sizeof(struct ifbrparam),
561           BC_F_COPYIN|BC_F_SUSER },
562
563         { bridge_ioctl_gfd,             sizeof(struct ifbrparam),
564           BC_F_COPYOUT },
565         { bridge_ioctl_sfd,             sizeof(struct ifbrparam),
566           BC_F_COPYIN|BC_F_SUSER },
567
568         { bridge_ioctl_gma,             sizeof(struct ifbrparam),
569           BC_F_COPYOUT },
570         { bridge_ioctl_sma,             sizeof(struct ifbrparam),
571           BC_F_COPYIN|BC_F_SUSER },
572
573         { bridge_ioctl_sifprio,         sizeof(struct ifbreq),
574           BC_F_COPYIN|BC_F_SUSER },
575
576         { bridge_ioctl_sifcost,         sizeof(struct ifbreq),
577           BC_F_COPYIN|BC_F_SUSER },
578
579         { bridge_ioctl_addspan,         sizeof(struct ifbreq),
580           BC_F_COPYIN|BC_F_SUSER },
581         { bridge_ioctl_delspan,         sizeof(struct ifbreq),
582           BC_F_COPYIN|BC_F_SUSER },
583
584         { bridge_ioctl_sifbondwght,     sizeof(struct ifbreq),
585           BC_F_COPYIN|BC_F_SUSER },
586
587 };
588 static const int bridge_control_table_size = NELEM(bridge_control_table);
589
590 LIST_HEAD(, bridge_softc) bridge_list;
591
592 struct if_clone bridge_cloner = IF_CLONE_INITIALIZER("bridge",
593                                 bridge_clone_create,
594                                 bridge_clone_destroy, 0, IF_MAXUNIT);
595
596 static int
597 bridge_modevent(module_t mod, int type, void *data)
598 {
599         switch (type) {
600         case MOD_LOAD:
601                 LIST_INIT(&bridge_list);
602                 if_clone_attach(&bridge_cloner);
603                 bridge_input_p = bridge_input;
604                 bridge_output_p = bridge_output;
605                 bridge_interface_p = bridge_interface;
606                 bridge_detach_cookie = EVENTHANDLER_REGISTER(
607                     ifnet_detach_event, bridge_ifdetach, NULL,
608                     EVENTHANDLER_PRI_ANY);
609 #if 0 /* notyet */
610                 bstp_linkstate_p = bstp_linkstate;
611 #endif
612                 break;
613         case MOD_UNLOAD:
614                 if (!LIST_EMPTY(&bridge_list))
615                         return (EBUSY);
616                 EVENTHANDLER_DEREGISTER(ifnet_detach_event,
617                     bridge_detach_cookie);
618                 if_clone_detach(&bridge_cloner);
619                 bridge_input_p = NULL;
620                 bridge_output_p = NULL;
621                 bridge_interface_p = NULL;
622 #if 0 /* notyet */
623                 bstp_linkstate_p = NULL;
624 #endif
625                 break;
626         default:
627                 return (EOPNOTSUPP);
628         }
629         return (0);
630 }
631
632 static moduledata_t bridge_mod = {
633         "if_bridge",
634         bridge_modevent,
635         0
636 };
637
638 DECLARE_MODULE(if_bridge, bridge_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
639
640
641 /*
642  * bridge_clone_create:
643  *
644  *      Create a new bridge instance.
645  */
646 static int
647 bridge_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused)
648 {
649         struct bridge_softc *sc;
650         struct ifnet *ifp;
651         u_char eaddr[6];
652         int cpu, rnd;
653
654         sc = kmalloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
655         ifp = sc->sc_ifp = &sc->sc_if;
656
657         sc->sc_brtmax = BRIDGE_RTABLE_MAX;
658         sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
659         sc->sc_bridge_max_age = BSTP_DEFAULT_MAX_AGE;
660         sc->sc_bridge_hello_time = BSTP_DEFAULT_HELLO_TIME;
661         sc->sc_bridge_forward_delay = BSTP_DEFAULT_FORWARD_DELAY;
662         sc->sc_bridge_priority = BSTP_DEFAULT_BRIDGE_PRIORITY;
663         sc->sc_hold_time = BSTP_DEFAULT_HOLD_TIME;
664
665         /* Initialize our routing table. */
666         bridge_rtable_init(sc);
667
668         callout_init(&sc->sc_brcallout);
669         netmsg_init(&sc->sc_brtimemsg, NULL, &netisr_adone_rport,
670                     MSGF_DROPABLE, bridge_timer_handler);
671         sc->sc_brtimemsg.lmsg.u.ms_resultp = sc;
672
673         callout_init(&sc->sc_bstpcallout);
674         netmsg_init(&sc->sc_bstptimemsg, NULL, &netisr_adone_rport,
675                     MSGF_DROPABLE, bstp_tick_handler);
676         sc->sc_bstptimemsg.lmsg.u.ms_resultp = sc;
677
678         /* Initialize per-cpu member iface lists */
679         sc->sc_iflists = kmalloc(sizeof(*sc->sc_iflists) * ncpus,
680                                  M_DEVBUF, M_WAITOK);
681         for (cpu = 0; cpu < ncpus; ++cpu)
682                 TAILQ_INIT(&sc->sc_iflists[cpu]);
683
684         TAILQ_INIT(&sc->sc_spanlist);
685
686         ifp->if_softc = sc;
687         if_initname(ifp, ifc->ifc_name, unit);
688         ifp->if_mtu = ETHERMTU;
689         ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST;
690         ifp->if_ioctl = bridge_ioctl;
691         ifp->if_start = bridge_start;
692         ifp->if_init = bridge_init;
693         ifp->if_type = IFT_ETHER;
694         ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
695         ifq_set_ready(&ifp->if_snd);
696         ifp->if_hdrlen = ETHER_HDR_LEN;
697
698         /*
699          * Generate a random ethernet address and use the private AC:DE:48
700          * OUI code.
701          */
702         rnd = karc4random();
703         bcopy(&rnd, &eaddr[0], 4); /* ETHER_ADDR_LEN == 6 */
704         rnd = karc4random();
705         bcopy(&rnd, &eaddr[2], 4); /* ETHER_ADDR_LEN == 6 */
706
707         eaddr[0] &= ~1; /* clear multicast bit */
708         eaddr[0] |= 2;  /* set the LAA bit */
709
710         ether_ifattach(ifp, eaddr, NULL);
711         /* Now undo some of the damage... */
712         ifp->if_baudrate = 0;
713         /*ifp->if_type = IFT_BRIDGE;*/
714
715         crit_enter();   /* XXX MP */
716         LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
717         crit_exit();
718
719         return (0);
720 }
721
722 static void
723 bridge_delete_dispatch(netmsg_t msg)
724 {
725         struct bridge_softc *sc = msg->lmsg.u.ms_resultp;
726         struct ifnet *bifp = sc->sc_ifp;
727         struct bridge_iflist *bif;
728
729         ifnet_serialize_all(bifp);
730
731         while ((bif = TAILQ_FIRST(&sc->sc_iflists[mycpuid])) != NULL)
732                 bridge_delete_member(sc, bif, 0);
733
734         while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL)
735                 bridge_delete_span(sc, bif);
736
737         ifnet_deserialize_all(bifp);
738
739         lwkt_replymsg(&msg->lmsg, 0);
740 }
741
742 /*
743  * bridge_clone_destroy:
744  *
745  *      Destroy a bridge instance.
746  */
747 static int
748 bridge_clone_destroy(struct ifnet *ifp)
749 {
750         struct bridge_softc *sc = ifp->if_softc;
751         struct netmsg_base msg;
752
753         ifnet_serialize_all(ifp);
754
755         bridge_stop(ifp);
756         ifp->if_flags &= ~IFF_UP;
757
758         ifnet_deserialize_all(ifp);
759
760         netmsg_init(&msg, NULL, &curthread->td_msgport,
761                     0, bridge_delete_dispatch);
762         msg.lmsg.u.ms_resultp = sc;
763         lwkt_domsg(BRIDGE_CFGPORT, &msg.lmsg, 0);
764
765         crit_enter();   /* XXX MP */
766         LIST_REMOVE(sc, sc_list);
767         crit_exit();
768
769         ether_ifdetach(ifp);
770
771         /* Tear down the routing table. */
772         bridge_rtable_fini(sc);
773
774         /* Free per-cpu member iface lists */
775         kfree(sc->sc_iflists, M_DEVBUF);
776
777         kfree(sc, M_DEVBUF);
778
779         return 0;
780 }
781
782 /*
783  * bridge_ioctl:
784  *
785  *      Handle a control request from the operator.
786  */
787 static int
788 bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
789 {
790         struct bridge_softc *sc = ifp->if_softc;
791         struct bridge_control_arg args;
792         struct ifdrv *ifd = (struct ifdrv *) data;
793         const struct bridge_control *bc;
794         int error = 0;
795
796         ASSERT_IFNET_SERIALIZED_ALL(ifp);
797
798         switch (cmd) {
799         case SIOCADDMULTI:
800         case SIOCDELMULTI:
801                 break;
802
803         case SIOCGDRVSPEC:
804         case SIOCSDRVSPEC:
805                 if (ifd->ifd_cmd >= bridge_control_table_size) {
806                         error = EINVAL;
807                         break;
808                 }
809                 bc = &bridge_control_table[ifd->ifd_cmd];
810
811                 if (cmd == SIOCGDRVSPEC &&
812                     (bc->bc_flags & BC_F_COPYOUT) == 0) {
813                         error = EINVAL;
814                         break;
815                 } else if (cmd == SIOCSDRVSPEC &&
816                            (bc->bc_flags & BC_F_COPYOUT)) {
817                         error = EINVAL;
818                         break;
819                 }
820
821                 if (bc->bc_flags & BC_F_SUSER) {
822                         error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY);
823                         if (error)
824                                 break;
825                 }
826
827                 if (ifd->ifd_len != bc->bc_argsize ||
828                     ifd->ifd_len > sizeof(args.bca_u)) {
829                         error = EINVAL;
830                         break;
831                 }
832
833                 memset(&args, 0, sizeof(args));
834                 if (bc->bc_flags & BC_F_COPYIN) {
835                         error = copyin(ifd->ifd_data, &args.bca_u,
836                                        ifd->ifd_len);
837                         if (error)
838                                 break;
839                 }
840
841                 error = bridge_control(sc, cmd, bc->bc_func, &args);
842                 if (error) {
843                         KKASSERT(args.bca_len == 0 && args.bca_kptr == NULL);
844                         break;
845                 }
846
847                 if (bc->bc_flags & BC_F_COPYOUT) {
848                         error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
849                         if (args.bca_len != 0) {
850                                 KKASSERT(args.bca_kptr != NULL);
851                                 if (!error) {
852                                         error = copyout(args.bca_kptr,
853                                                 args.bca_uptr, args.bca_len);
854                                 }
855                                 kfree(args.bca_kptr, M_TEMP);
856                         } else {
857                                 KKASSERT(args.bca_kptr == NULL);
858                         }
859                 } else {
860                         KKASSERT(args.bca_len == 0 && args.bca_kptr == NULL);
861                 }
862                 break;
863
864         case SIOCSIFFLAGS:
865                 if (!(ifp->if_flags & IFF_UP) &&
866                     (ifp->if_flags & IFF_RUNNING)) {
867                         /*
868                          * If interface is marked down and it is running,
869                          * then stop it.
870                          */
871                         bridge_stop(ifp);
872                 } else if ((ifp->if_flags & IFF_UP) &&
873                     !(ifp->if_flags & IFF_RUNNING)) {
874                         /*
875                          * If interface is marked up and it is stopped, then
876                          * start it.
877                          */
878                         ifp->if_init(sc);
879                 }
880
881                 /*
882                  * If running and link flag state change we have to
883                  * reinitialize as well.
884                  */
885                 if ((ifp->if_flags & IFF_RUNNING) &&
886                     (ifp->if_flags & (IFF_LINK0|IFF_LINK1|IFF_LINK2)) !=
887                     sc->sc_copy_flags) {
888                         sc->sc_copy_flags = ifp->if_flags &
889                                         (IFF_LINK0|IFF_LINK1|IFF_LINK2);
890                         bridge_control(sc, 0, bridge_ioctl_reinit, NULL);
891                 }
892
893                 break;
894
895         case SIOCSIFMTU:
896                 /* Do not allow the MTU to be changed on the bridge */
897                 error = EINVAL;
898                 break;
899
900         default:
901                 error = ether_ioctl(ifp, cmd, data);
902                 break;
903         }
904         return (error);
905 }
906
907 /*
908  * bridge_mutecaps:
909  *
910  *      Clear or restore unwanted capabilities on the member interface
911  */
912 static void
913 bridge_mutecaps(struct bridge_ifinfo *bif_info, struct ifnet *ifp, int mute)
914 {
915         struct ifreq ifr;
916
917         if (ifp->if_ioctl == NULL)
918                 return;
919
920         bzero(&ifr, sizeof(ifr));
921         ifr.ifr_reqcap = ifp->if_capenable;
922
923         if (mute) {
924                 /* mask off and save capabilities */
925                 bif_info->bifi_mutecap = ifr.ifr_reqcap & BRIDGE_IFCAPS_MASK;
926                 if (bif_info->bifi_mutecap != 0)
927                         ifr.ifr_reqcap &= ~BRIDGE_IFCAPS_MASK;
928         } else {
929                 /* restore muted capabilities */
930                 ifr.ifr_reqcap |= bif_info->bifi_mutecap;
931         }
932
933         if (bif_info->bifi_mutecap != 0) {
934                 ifnet_serialize_all(ifp);
935                 ifp->if_ioctl(ifp, SIOCSIFCAP, (caddr_t)&ifr, NULL);
936                 ifnet_deserialize_all(ifp);
937         }
938 }
939
940 /*
941  * bridge_lookup_member:
942  *
943  *      Lookup a bridge member interface.
944  */
945 static struct bridge_iflist *
946 bridge_lookup_member(struct bridge_softc *sc, const char *name)
947 {
948         struct bridge_iflist *bif;
949
950         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
951                 if (strcmp(bif->bif_ifp->if_xname, name) == 0)
952                         return (bif);
953         }
954         return (NULL);
955 }
956
957 /*
958  * bridge_lookup_member_if:
959  *
960  *      Lookup a bridge member interface by ifnet*.
961  */
962 static struct bridge_iflist *
963 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
964 {
965         struct bridge_iflist *bif;
966
967         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
968                 if (bif->bif_ifp == member_ifp)
969                         return (bif);
970         }
971         return (NULL);
972 }
973
974 /*
975  * bridge_lookup_member_ifinfo:
976  *
977  *      Lookup a bridge member interface by bridge_ifinfo.
978  */
979 static struct bridge_iflist *
980 bridge_lookup_member_ifinfo(struct bridge_softc *sc,
981                             struct bridge_ifinfo *bif_info)
982 {
983         struct bridge_iflist *bif;
984
985         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
986                 if (bif->bif_info == bif_info)
987                         return (bif);
988         }
989         return (NULL);
990 }
991
992 /*
993  * bridge_delete_member:
994  *
995  *      Delete the specified member interface.
996  */
997 static void
998 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
999     int gone)
1000 {
1001         struct ifnet *ifs = bif->bif_ifp;
1002         struct ifnet *bifp = sc->sc_ifp;
1003         struct bridge_ifinfo *bif_info = bif->bif_info;
1004         struct bridge_iflist_head saved_bifs;
1005
1006         ASSERT_IFNET_SERIALIZED_ALL(bifp);
1007         KKASSERT(bif_info != NULL);
1008
1009         ifs->if_bridge = NULL;
1010
1011         /*
1012          * Release bridge interface's serializer:
1013          * - To avoid possible dead lock.
1014          * - Various sync operation will block the current thread.
1015          */
1016         ifnet_deserialize_all(bifp);
1017
1018         if (!gone) {
1019                 switch (ifs->if_type) {
1020                 case IFT_ETHER:
1021                 case IFT_L2VLAN:
1022                         /*
1023                          * Take the interface out of promiscuous mode.
1024                          */
1025                         ifpromisc(ifs, 0);
1026                         bridge_mutecaps(bif_info, ifs, 0);
1027                         break;
1028
1029                 case IFT_GIF:
1030                         break;
1031
1032                 default:
1033                         panic("bridge_delete_member: impossible");
1034                         break;
1035                 }
1036         }
1037
1038         /*
1039          * Remove bifs from percpu linked list.
1040          *
1041          * Removed bifs are not freed immediately, instead,
1042          * they are saved in saved_bifs.  They will be freed
1043          * after we make sure that no one is accessing them,
1044          * i.e. after following netmsg_service_sync()
1045          */
1046         TAILQ_INIT(&saved_bifs);
1047         bridge_del_bif(sc, bif_info, &saved_bifs);
1048
1049         /*
1050          * Make sure that all protocol threads:
1051          * o  see 'ifs' if_bridge is changed
1052          * o  know that bif is removed from the percpu linked list
1053          */
1054         netmsg_service_sync();
1055
1056         /*
1057          * Free the removed bifs
1058          */
1059         KKASSERT(!TAILQ_EMPTY(&saved_bifs));
1060         while ((bif = TAILQ_FIRST(&saved_bifs)) != NULL) {
1061                 TAILQ_REMOVE(&saved_bifs, bif, bif_next);
1062                 kfree(bif, M_DEVBUF);
1063         }
1064
1065         /* See the comment in bridge_ioctl_stop() */
1066         bridge_rtmsg_sync(sc);
1067         bridge_rtdelete(sc, ifs, IFBF_FLUSHALL | IFBF_FLUSHSYNC);
1068
1069         ifnet_serialize_all(bifp);
1070
1071         if (bifp->if_flags & IFF_RUNNING)
1072                 bstp_initialization(sc);
1073
1074         /*
1075          * Free the bif_info after bstp_initialization(), so that
1076          * bridge_softc.sc_root_port will not reference a dangling
1077          * pointer.
1078          */
1079         kfree(bif_info, M_DEVBUF);
1080 }
1081
1082 /*
1083  * bridge_delete_span:
1084  *
1085  *      Delete the specified span interface.
1086  */
1087 static void
1088 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
1089 {
1090         KASSERT(bif->bif_ifp->if_bridge == NULL,
1091             ("%s: not a span interface", __func__));
1092
1093         TAILQ_REMOVE(&sc->sc_iflists[mycpuid], bif, bif_next);
1094         kfree(bif, M_DEVBUF);
1095 }
1096
1097 static int
1098 bridge_ioctl_init(struct bridge_softc *sc, void *arg __unused)
1099 {
1100         struct ifnet *ifp = sc->sc_ifp;
1101
1102         if (ifp->if_flags & IFF_RUNNING)
1103                 return 0;
1104
1105         callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz,
1106             bridge_timer, sc);
1107
1108         ifp->if_flags |= IFF_RUNNING;
1109         bstp_initialization(sc);
1110         return 0;
1111 }
1112
1113 static int
1114 bridge_ioctl_stop(struct bridge_softc *sc, void *arg __unused)
1115 {
1116         struct ifnet *ifp = sc->sc_ifp;
1117
1118         if ((ifp->if_flags & IFF_RUNNING) == 0)
1119                 return 0;
1120
1121         callout_stop(&sc->sc_brcallout);
1122
1123         crit_enter();
1124         lwkt_dropmsg(&sc->sc_brtimemsg.lmsg);
1125         crit_exit();
1126
1127         bstp_stop(sc);
1128
1129         ifp->if_flags &= ~IFF_RUNNING;
1130
1131         ifnet_deserialize_all(ifp);
1132
1133         /* Let everyone know that we are stopped */
1134         netmsg_service_sync();
1135
1136         /*
1137          * Sync ifnetX msgports in the order we forward rtnode
1138          * installation message.  This is used to make sure that
1139          * all rtnode installation messages sent by bridge_rtupdate()
1140          * during above netmsg_service_sync() are flushed.
1141          */
1142         bridge_rtmsg_sync(sc);
1143         bridge_rtflush(sc, IFBF_FLUSHDYN | IFBF_FLUSHSYNC);
1144
1145         ifnet_serialize_all(ifp);
1146         return 0;
1147 }
1148
1149 static int
1150 bridge_ioctl_add(struct bridge_softc *sc, void *arg)
1151 {
1152         struct ifbreq *req = arg;
1153         struct bridge_iflist *bif;
1154         struct bridge_ifinfo *bif_info;
1155         struct ifnet *ifs, *bifp;
1156         int error = 0;
1157
1158         bifp = sc->sc_ifp;
1159         ASSERT_IFNET_SERIALIZED_ALL(bifp);
1160
1161         ifs = ifunit_netisr(req->ifbr_ifsname);
1162         if (ifs == NULL)
1163                 return (ENOENT);
1164
1165         /* If it's in the span list, it can't be a member. */
1166         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1167                 if (ifs == bif->bif_ifp)
1168                         return (EBUSY);
1169
1170         /* Allow the first Ethernet member to define the MTU */
1171         if (ifs->if_type != IFT_GIF) {
1172                 if (TAILQ_EMPTY(&sc->sc_iflists[mycpuid])) {
1173                         bifp->if_mtu = ifs->if_mtu;
1174                 } else if (bifp->if_mtu != ifs->if_mtu) {
1175                         if_printf(bifp, "invalid MTU for %s\n", ifs->if_xname);
1176                         return (EINVAL);
1177                 }
1178         }
1179
1180         if (ifs->if_bridge == sc)
1181                 return (EEXIST);
1182
1183         if (ifs->if_bridge != NULL)
1184                 return (EBUSY);
1185
1186         bif_info = kmalloc(sizeof(*bif_info), M_DEVBUF, M_WAITOK | M_ZERO);
1187         bif_info->bifi_priority = BSTP_DEFAULT_PORT_PRIORITY;
1188         bif_info->bifi_path_cost = BSTP_DEFAULT_PATH_COST;
1189         bif_info->bifi_ifp = ifs;
1190         bif_info->bifi_bond_weight = 1;
1191
1192         /*
1193          * Release bridge interface's serializer:
1194          * - To avoid possible dead lock.
1195          * - Various sync operation will block the current thread.
1196          */
1197         ifnet_deserialize_all(bifp);
1198
1199         switch (ifs->if_type) {
1200         case IFT_ETHER:
1201         case IFT_L2VLAN:
1202                 /*
1203                  * Place the interface into promiscuous mode.
1204                  */
1205                 error = ifpromisc(ifs, 1);
1206                 if (error) {
1207                         ifnet_serialize_all(bifp);
1208                         goto out;
1209                 }
1210                 bridge_mutecaps(bif_info, ifs, 1);
1211                 break;
1212
1213         case IFT_GIF: /* :^) */
1214                 break;
1215
1216         default:
1217                 error = EINVAL;
1218                 ifnet_serialize_all(bifp);
1219                 goto out;
1220         }
1221
1222         /*
1223          * Add bifs to percpu linked lists
1224          */
1225         bridge_add_bif(sc, bif_info, ifs);
1226
1227         ifnet_serialize_all(bifp);
1228
1229         if (bifp->if_flags & IFF_RUNNING)
1230                 bstp_initialization(sc);
1231         else
1232                 bstp_stop(sc);
1233
1234         /*
1235          * Everything has been setup, so let the member interface
1236          * deliver packets to this bridge on its input/output path.
1237          */
1238         ifs->if_bridge = sc;
1239 out:
1240         if (error) {
1241                 if (bif_info != NULL)
1242                         kfree(bif_info, M_DEVBUF);
1243         }
1244         return (error);
1245 }
1246
1247 static int
1248 bridge_ioctl_del(struct bridge_softc *sc, void *arg)
1249 {
1250         struct ifbreq *req = arg;
1251         struct bridge_iflist *bif;
1252
1253         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1254         if (bif == NULL)
1255                 return (ENOENT);
1256
1257         bridge_delete_member(sc, bif, 0);
1258
1259         return (0);
1260 }
1261
1262 static int
1263 bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
1264 {
1265         struct ifbreq *req = arg;
1266         struct bridge_iflist *bif;
1267
1268         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1269         if (bif == NULL)
1270                 return (ENOENT);
1271         bridge_ioctl_fillflags(sc, bif, req);
1272         return (0);
1273 }
1274
1275 static void
1276 bridge_ioctl_fillflags(struct bridge_softc *sc, struct bridge_iflist *bif,
1277                        struct ifbreq *req)
1278 {
1279         req->ifbr_ifsflags = bif->bif_flags;
1280         req->ifbr_state = bif->bif_state;
1281         req->ifbr_priority = bif->bif_priority;
1282         req->ifbr_path_cost = bif->bif_path_cost;
1283         req->ifbr_bond_weight = bif->bif_bond_weight;
1284         req->ifbr_portno = bif->bif_ifp->if_index & 0xff;
1285         if (bif->bif_flags & IFBIF_STP) {
1286                 req->ifbr_peer_root = bif->bif_peer_root;
1287                 req->ifbr_peer_bridge = bif->bif_peer_bridge;
1288                 req->ifbr_peer_cost = bif->bif_peer_cost;
1289                 req->ifbr_peer_port = bif->bif_peer_port;
1290                 if (bstp_supersedes_port_info(sc, bif)) {
1291                         req->ifbr_designated_root = bif->bif_peer_root;
1292                         req->ifbr_designated_bridge = bif->bif_peer_bridge;
1293                         req->ifbr_designated_cost = bif->bif_peer_cost;
1294                         req->ifbr_designated_port = bif->bif_peer_port;
1295                 } else {
1296                         req->ifbr_designated_root = sc->sc_bridge_id;
1297                         req->ifbr_designated_bridge = sc->sc_bridge_id;
1298                         req->ifbr_designated_cost = bif->bif_path_cost +
1299                                                     bif->bif_peer_cost;
1300                         req->ifbr_designated_port = bif->bif_port_id;
1301                 }
1302         } else {
1303                 req->ifbr_peer_root = 0;
1304                 req->ifbr_peer_bridge = 0;
1305                 req->ifbr_peer_cost = 0;
1306                 req->ifbr_peer_port = 0;
1307                 req->ifbr_designated_root = 0;
1308                 req->ifbr_designated_bridge = 0;
1309                 req->ifbr_designated_cost = 0;
1310                 req->ifbr_designated_port = 0;
1311         }
1312 }
1313
1314 static int
1315 bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
1316 {
1317         struct ifbreq *req = arg;
1318         struct bridge_iflist *bif;
1319         struct ifnet *bifp = sc->sc_ifp;
1320
1321         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1322         if (bif == NULL)
1323                 return (ENOENT);
1324
1325         if (req->ifbr_ifsflags & IFBIF_SPAN) {
1326                 /* SPAN is readonly */
1327                 return (EINVAL);
1328         }
1329
1330         if (req->ifbr_ifsflags & IFBIF_STP) {
1331                 switch (bif->bif_ifp->if_type) {
1332                 case IFT_ETHER:
1333                         /* These can do spanning tree. */
1334                         break;
1335
1336                 default:
1337                         /* Nothing else can. */
1338                         return (EINVAL);
1339                 }
1340         }
1341
1342         bif->bif_flags = (bif->bif_flags & IFBIF_KEEPMASK) |
1343                          (req->ifbr_ifsflags & ~IFBIF_KEEPMASK);
1344         if (bifp->if_flags & IFF_RUNNING)
1345                 bstp_initialization(sc);
1346
1347         return (0);
1348 }
1349
1350 static int
1351 bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
1352 {
1353         struct ifbrparam *param = arg;
1354         struct ifnet *ifp = sc->sc_ifp;
1355
1356         sc->sc_brtmax = param->ifbrp_csize;
1357
1358         ifnet_deserialize_all(ifp);
1359         bridge_rttrim(sc);
1360         ifnet_serialize_all(ifp);
1361
1362         return (0);
1363 }
1364
1365 static int
1366 bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
1367 {
1368         struct ifbrparam *param = arg;
1369
1370         param->ifbrp_csize = sc->sc_brtmax;
1371
1372         return (0);
1373 }
1374
1375 static int
1376 bridge_ioctl_gifs(struct bridge_softc *sc, void *arg)
1377 {
1378         struct bridge_control_arg *bc_arg = arg;
1379         struct ifbifconf *bifc = arg;
1380         struct bridge_iflist *bif;
1381         struct ifbreq *breq;
1382         int count, len;
1383
1384         count = 0;
1385         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next)
1386                 count++;
1387         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1388                 count++;
1389
1390         if (bifc->ifbic_len == 0) {
1391                 bifc->ifbic_len = sizeof(*breq) * count;
1392                 return 0;
1393         } else if (count == 0 || bifc->ifbic_len < sizeof(*breq)) {
1394                 bifc->ifbic_len = 0;
1395                 return 0;
1396         }
1397
1398         len = min(bifc->ifbic_len, sizeof(*breq) * count);
1399         KKASSERT(len >= sizeof(*breq));
1400
1401         breq = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO);
1402         if (breq == NULL) {
1403                 bifc->ifbic_len = 0;
1404                 return ENOMEM;
1405         }
1406         bc_arg->bca_kptr = breq;
1407
1408         count = 0;
1409         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
1410                 if (len < sizeof(*breq))
1411                         break;
1412
1413                 strlcpy(breq->ifbr_ifsname, bif->bif_ifp->if_xname,
1414                         sizeof(breq->ifbr_ifsname));
1415                 bridge_ioctl_fillflags(sc, bif, breq);
1416                 breq++;
1417                 count++;
1418                 len -= sizeof(*breq);
1419         }
1420         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
1421                 if (len < sizeof(*breq))
1422                         break;
1423
1424                 strlcpy(breq->ifbr_ifsname, bif->bif_ifp->if_xname,
1425                         sizeof(breq->ifbr_ifsname));
1426                 breq->ifbr_ifsflags = bif->bif_flags;
1427                 breq->ifbr_portno = bif->bif_ifp->if_index & 0xff;
1428                 breq++;
1429                 count++;
1430                 len -= sizeof(*breq);
1431         }
1432
1433         bifc->ifbic_len = sizeof(*breq) * count;
1434         KKASSERT(bifc->ifbic_len > 0);
1435
1436         bc_arg->bca_len = bifc->ifbic_len;
1437         bc_arg->bca_uptr = bifc->ifbic_req;
1438         return 0;
1439 }
1440
1441 static int
1442 bridge_ioctl_rts(struct bridge_softc *sc, void *arg)
1443 {
1444         struct bridge_control_arg *bc_arg = arg;
1445         struct ifbaconf *bac = arg;
1446         struct bridge_rtnode *brt;
1447         struct ifbareq *bareq;
1448         int count, len;
1449
1450         count = 0;
1451         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list)
1452                 count++;
1453
1454         if (bac->ifbac_len == 0) {
1455                 bac->ifbac_len = sizeof(*bareq) * count;
1456                 return 0;
1457         } else if (count == 0 || bac->ifbac_len < sizeof(*bareq)) {
1458                 bac->ifbac_len = 0;
1459                 return 0;
1460         }
1461
1462         len = min(bac->ifbac_len, sizeof(*bareq) * count);
1463         KKASSERT(len >= sizeof(*bareq));
1464
1465         bareq = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO);
1466         if (bareq == NULL) {
1467                 bac->ifbac_len = 0;
1468                 return ENOMEM;
1469         }
1470         bc_arg->bca_kptr = bareq;
1471
1472         count = 0;
1473         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
1474                 struct bridge_rtinfo *bri = brt->brt_info;
1475                 time_t expire;
1476
1477                 if (len < sizeof(*bareq))
1478                         break;
1479
1480                 strlcpy(bareq->ifba_ifsname, bri->bri_ifp->if_xname,
1481                         sizeof(bareq->ifba_ifsname));
1482                 memcpy(bareq->ifba_dst, brt->brt_addr, sizeof(brt->brt_addr));
1483                 expire = bri->bri_expire;
1484                 if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
1485                     time_uptime < expire)
1486                         bareq->ifba_expire = expire - time_uptime;
1487                 else
1488                         bareq->ifba_expire = 0;
1489                 bareq->ifba_flags = bri->bri_flags;
1490                 bareq++;
1491                 count++;
1492                 len -= sizeof(*bareq);
1493         }
1494
1495         bac->ifbac_len = sizeof(*bareq) * count;
1496         KKASSERT(bac->ifbac_len > 0);
1497
1498         bc_arg->bca_len = bac->ifbac_len;
1499         bc_arg->bca_uptr = bac->ifbac_req;
1500         return 0;
1501 }
1502
1503 static int
1504 bridge_ioctl_saddr(struct bridge_softc *sc, void *arg)
1505 {
1506         struct ifbareq *req = arg;
1507         struct bridge_iflist *bif;
1508         struct ifnet *ifp = sc->sc_ifp;
1509         int error;
1510
1511         ASSERT_IFNET_SERIALIZED_ALL(ifp);
1512
1513         bif = bridge_lookup_member(sc, req->ifba_ifsname);
1514         if (bif == NULL)
1515                 return (ENOENT);
1516
1517         ifnet_deserialize_all(ifp);
1518         error = bridge_rtsaddr(sc, req->ifba_dst, bif->bif_ifp,
1519                                req->ifba_flags);
1520         ifnet_serialize_all(ifp);
1521         return (error);
1522 }
1523
1524 static int
1525 bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
1526 {
1527         struct ifbrparam *param = arg;
1528
1529         sc->sc_brttimeout = param->ifbrp_ctime;
1530
1531         return (0);
1532 }
1533
1534 static int
1535 bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
1536 {
1537         struct ifbrparam *param = arg;
1538
1539         param->ifbrp_ctime = sc->sc_brttimeout;
1540
1541         return (0);
1542 }
1543
1544 static int
1545 bridge_ioctl_daddr(struct bridge_softc *sc, void *arg)
1546 {
1547         struct ifbareq *req = arg;
1548         struct ifnet *ifp = sc->sc_ifp;
1549         int error;
1550
1551         ifnet_deserialize_all(ifp);
1552         error = bridge_rtdaddr(sc, req->ifba_dst);
1553         ifnet_serialize_all(ifp);
1554         return error;
1555 }
1556
1557 static int
1558 bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
1559 {
1560         struct ifbreq *req = arg;
1561         struct ifnet *ifp = sc->sc_ifp;
1562
1563         ifnet_deserialize_all(ifp);
1564         bridge_rtflush(sc, req->ifbr_ifsflags | IFBF_FLUSHSYNC);
1565         ifnet_serialize_all(ifp);
1566
1567         return (0);
1568 }
1569
1570 static int
1571 bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
1572 {
1573         struct ifbrparam *param = arg;
1574
1575         param->ifbrp_prio = sc->sc_bridge_priority;
1576
1577         return (0);
1578 }
1579
1580 static int
1581 bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
1582 {
1583         struct ifbrparam *param = arg;
1584
1585         sc->sc_bridge_priority = param->ifbrp_prio;
1586
1587         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1588                 bstp_initialization(sc);
1589
1590         return (0);
1591 }
1592
1593 static int
1594 bridge_ioctl_reinit(struct bridge_softc *sc, void *arg __unused)
1595 {
1596         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1597                 bstp_initialization(sc);
1598         return (0);
1599 }
1600
1601 static int
1602 bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
1603 {
1604         struct ifbrparam *param = arg;
1605
1606         param->ifbrp_hellotime = sc->sc_bridge_hello_time >> 8;
1607
1608         return (0);
1609 }
1610
1611 static int
1612 bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
1613 {
1614         struct ifbrparam *param = arg;
1615
1616         if (param->ifbrp_hellotime == 0)
1617                 return (EINVAL);
1618         sc->sc_bridge_hello_time = param->ifbrp_hellotime << 8;
1619
1620         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1621                 bstp_initialization(sc);
1622
1623         return (0);
1624 }
1625
1626 static int
1627 bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
1628 {
1629         struct ifbrparam *param = arg;
1630
1631         param->ifbrp_fwddelay = sc->sc_bridge_forward_delay >> 8;
1632
1633         return (0);
1634 }
1635
1636 static int
1637 bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
1638 {
1639         struct ifbrparam *param = arg;
1640
1641         if (param->ifbrp_fwddelay == 0)
1642                 return (EINVAL);
1643         sc->sc_bridge_forward_delay = param->ifbrp_fwddelay << 8;
1644
1645         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1646                 bstp_initialization(sc);
1647
1648         return (0);
1649 }
1650
1651 static int
1652 bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
1653 {
1654         struct ifbrparam *param = arg;
1655
1656         param->ifbrp_maxage = sc->sc_bridge_max_age >> 8;
1657
1658         return (0);
1659 }
1660
1661 static int
1662 bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
1663 {
1664         struct ifbrparam *param = arg;
1665
1666         if (param->ifbrp_maxage == 0)
1667                 return (EINVAL);
1668         sc->sc_bridge_max_age = param->ifbrp_maxage << 8;
1669
1670         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1671                 bstp_initialization(sc);
1672
1673         return (0);
1674 }
1675
1676 static int
1677 bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
1678 {
1679         struct ifbreq *req = arg;
1680         struct bridge_iflist *bif;
1681
1682         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1683         if (bif == NULL)
1684                 return (ENOENT);
1685
1686         bif->bif_priority = req->ifbr_priority;
1687
1688         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1689                 bstp_initialization(sc);
1690
1691         return (0);
1692 }
1693
1694 static int
1695 bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
1696 {
1697         struct ifbreq *req = arg;
1698         struct bridge_iflist *bif;
1699
1700         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1701         if (bif == NULL)
1702                 return (ENOENT);
1703
1704         bif->bif_path_cost = req->ifbr_path_cost;
1705
1706         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1707                 bstp_initialization(sc);
1708
1709         return (0);
1710 }
1711
1712 static int
1713 bridge_ioctl_sifbondwght(struct bridge_softc *sc, void *arg)
1714 {
1715         struct ifbreq *req = arg;
1716         struct bridge_iflist *bif;
1717
1718         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1719         if (bif == NULL)
1720                 return (ENOENT);
1721
1722         bif->bif_bond_weight = req->ifbr_bond_weight;
1723
1724         /* no reinit needed */
1725
1726         return (0);
1727 }
1728
1729 static int
1730 bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
1731 {
1732         struct ifbreq *req = arg;
1733         struct bridge_iflist *bif;
1734         struct ifnet *ifs;
1735         struct bridge_ifinfo *bif_info;
1736
1737         ifs = ifunit_netisr(req->ifbr_ifsname);
1738         if (ifs == NULL)
1739                 return (ENOENT);
1740
1741         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1742                 if (ifs == bif->bif_ifp)
1743                         return (EBUSY);
1744
1745         if (ifs->if_bridge != NULL)
1746                 return (EBUSY);
1747
1748         switch (ifs->if_type) {
1749         case IFT_ETHER:
1750         case IFT_GIF:
1751         case IFT_L2VLAN:
1752                 break;
1753
1754         default:
1755                 return (EINVAL);
1756         }
1757
1758         /*
1759          * bif_info is needed for bif_flags
1760          */
1761         bif_info = kmalloc(sizeof(*bif_info), M_DEVBUF, M_WAITOK | M_ZERO);
1762         bif_info->bifi_ifp = ifs;
1763
1764         bif = kmalloc(sizeof(*bif), M_DEVBUF, M_WAITOK | M_ZERO);
1765         bif->bif_ifp = ifs;
1766         bif->bif_info = bif_info;
1767         bif->bif_flags = IFBIF_SPAN;
1768         /* NOTE: span bif does not need bridge_ifinfo */
1769
1770         TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
1771
1772         sc->sc_span = 1;
1773
1774         return (0);
1775 }
1776
1777 static int
1778 bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
1779 {
1780         struct ifbreq *req = arg;
1781         struct bridge_iflist *bif;
1782         struct ifnet *ifs;
1783
1784         ifs = ifunit_netisr(req->ifbr_ifsname);
1785         if (ifs == NULL)
1786                 return (ENOENT);
1787
1788         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1789                 if (ifs == bif->bif_ifp)
1790                         break;
1791
1792         if (bif == NULL)
1793                 return (ENOENT);
1794
1795         bridge_delete_span(sc, bif);
1796
1797         if (TAILQ_EMPTY(&sc->sc_spanlist))
1798                 sc->sc_span = 0;
1799
1800         return (0);
1801 }
1802
1803 static void
1804 bridge_ifdetach_dispatch(netmsg_t msg)
1805 {
1806         struct ifnet *ifp, *bifp;
1807         struct bridge_softc *sc;
1808         struct bridge_iflist *bif;
1809
1810         ifp = msg->lmsg.u.ms_resultp;
1811         sc = ifp->if_bridge;
1812
1813         /* Check if the interface is a bridge member */
1814         if (sc != NULL) {
1815                 bifp = sc->sc_ifp;
1816
1817                 ifnet_serialize_all(bifp);
1818
1819                 bif = bridge_lookup_member_if(sc, ifp);
1820                 if (bif != NULL) {
1821                         bridge_delete_member(sc, bif, 1);
1822                 } else {
1823                         /* XXX Why bif will be NULL? */
1824                 }
1825
1826                 ifnet_deserialize_all(bifp);
1827                 goto reply;
1828         }
1829
1830         crit_enter();   /* XXX MP */
1831
1832         /* Check if the interface is a span port */
1833         LIST_FOREACH(sc, &bridge_list, sc_list) {
1834                 bifp = sc->sc_ifp;
1835
1836                 ifnet_serialize_all(bifp);
1837
1838                 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1839                         if (ifp == bif->bif_ifp) {
1840                                 bridge_delete_span(sc, bif);
1841                                 break;
1842                         }
1843
1844                 ifnet_deserialize_all(bifp);
1845         }
1846
1847         crit_exit();
1848
1849 reply:
1850         lwkt_replymsg(&msg->lmsg, 0);
1851 }
1852
1853 /*
1854  * bridge_ifdetach:
1855  *
1856  *      Detach an interface from a bridge.  Called when a member
1857  *      interface is detaching.
1858  */
1859 static void
1860 bridge_ifdetach(void *arg __unused, struct ifnet *ifp)
1861 {
1862         struct netmsg_base msg;
1863
1864         netmsg_init(&msg, NULL, &curthread->td_msgport,
1865                     0, bridge_ifdetach_dispatch);
1866         msg.lmsg.u.ms_resultp = ifp;
1867
1868         lwkt_domsg(BRIDGE_CFGPORT, &msg.lmsg, 0);
1869 }
1870
1871 /*
1872  * bridge_init:
1873  *
1874  *      Initialize a bridge interface.
1875  */
1876 static void
1877 bridge_init(void *xsc)
1878 {
1879         bridge_control(xsc, SIOCSIFFLAGS, bridge_ioctl_init, NULL);
1880 }
1881
1882 /*
1883  * bridge_stop:
1884  *
1885  *      Stop the bridge interface.
1886  */
1887 static void
1888 bridge_stop(struct ifnet *ifp)
1889 {
1890         bridge_control(ifp->if_softc, SIOCSIFFLAGS, bridge_ioctl_stop, NULL);
1891 }
1892
1893 /*
1894  * Returns TRUE if the packet is being sent 'from us'... from our bridge
1895  * interface or from any member of our bridge interface.  This is used
1896  * later on to force the MAC to be the MAC of our bridge interface.
1897  */
1898 static int
1899 bridge_from_us(struct bridge_softc *sc, struct ether_header *eh)
1900 {
1901         struct bridge_iflist *bif;
1902
1903         if (memcmp(eh->ether_shost, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN) == 0)
1904                 return (1);
1905
1906         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
1907                 if (memcmp(eh->ether_shost, IF_LLADDR(bif->bif_ifp),
1908                            ETHER_ADDR_LEN) == 0) {
1909                         return (1);
1910                 }
1911         }
1912         return (0);
1913 }
1914
1915 /*
1916  * bridge_enqueue:
1917  *
1918  *      Enqueue a packet on a bridge member interface.
1919  *
1920  */
1921 void
1922 bridge_enqueue(struct ifnet *dst_ifp, struct mbuf *m)
1923 {
1924         struct netmsg_packet *nmp;
1925
1926         mbuftrackid(m, 64);
1927
1928         nmp = &m->m_hdr.mh_netmsg;
1929         netmsg_init(&nmp->base, NULL, &netisr_apanic_rport,
1930                     0, bridge_enqueue_handler);
1931         nmp->nm_packet = m;
1932         nmp->base.lmsg.u.ms_resultp = dst_ifp;
1933
1934         lwkt_sendmsg_oncpu(netisr_cpuport(mycpuid), &nmp->base.lmsg);
1935 }
1936
1937 /*
1938  * After looking up dst_if in our forwarding table we still have to
1939  * deal with channel bonding.  Find the best interface in the bonding set.
1940  */
1941 static struct ifnet *
1942 bridge_select_unicast(struct bridge_softc *sc, struct ifnet *dst_if,
1943                       int from_blocking, struct mbuf *m)
1944 {
1945         struct bridge_iflist *bif, *nbif;
1946         struct ifnet *alt_if;
1947         int alt_priority;
1948         int priority;
1949
1950         /*
1951          * Unicast, kinda replicates the output side of bridge_output().
1952          *
1953          * Even though this is a uni-cast packet we may have to select
1954          * an interface from a bonding set.
1955          */
1956         bif = bridge_lookup_member_if(sc, dst_if);
1957         if (bif == NULL) {
1958                 /* Not a member of the bridge (anymore?) */
1959                 return NULL;
1960         }
1961
1962         /*
1963          * If STP is enabled on the target we are an equal opportunity
1964          * employer and do not necessarily output to dst_if.  Instead
1965          * scan available links with the same MAC as the current dst_if
1966          * and choose the best one.
1967          *
1968          * We also need to do this because arp entries tag onto a particular
1969          * interface and if it happens to be dead then the packets will
1970          * go into a bit bucket.
1971          *
1972          * If LINK2 is set the matching links are bonded and we-round robin.
1973          * (the MAC address must be the same for the participating links).
1974          * In this case links in a STP FORWARDING or BONDED state are
1975          * allowed for unicast packets.
1976          */
1977         if (bif->bif_flags & IFBIF_STP) {
1978                 alt_if = NULL;
1979                 alt_priority = 0;
1980                 priority = 0;
1981
1982                 TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid],
1983                                      bif_next, nbif) {
1984                         /*
1985                          * dst_if may imply a bonding set so we must compare
1986                          * MAC addresses.
1987                          */
1988                         if (memcmp(IF_LLADDR(bif->bif_ifp),
1989                                    IF_LLADDR(dst_if),
1990                                    ETHER_ADDR_LEN) != 0) {
1991                                 continue;
1992                         }
1993
1994                         if ((bif->bif_ifp->if_flags & IFF_RUNNING) == 0)
1995                                 continue;
1996
1997                         /*
1998                          * NOTE: We allow tranmissions through a BLOCKING
1999                          *       or LEARNING interface only as a last resort.
2000                          *       We DISALLOW both cases if the receiving
2001                          *
2002                          * NOTE: If we send a packet through a learning
2003                          *       interface the receiving end (if also in
2004                          *       LEARNING) will throw it away, so this is
2005                          *       the ultimate last resort.
2006                          */
2007                         switch(bif->bif_state) {
2008                         case BSTP_IFSTATE_BLOCKING:
2009                                 if (from_blocking == 0 &&
2010                                     bif->bif_priority + 256 > alt_priority) {
2011                                         alt_priority = bif->bif_priority + 256;
2012                                         alt_if = bif->bif_ifp;
2013                                 }
2014                                 continue;
2015                         case BSTP_IFSTATE_LEARNING:
2016                                 if (from_blocking == 0 &&
2017                                     bif->bif_priority > alt_priority) {
2018                                         alt_priority = bif->bif_priority;
2019                                         alt_if = bif->bif_ifp;
2020                                 }
2021                                 continue;
2022                         case BSTP_IFSTATE_L1BLOCKING:
2023                         case BSTP_IFSTATE_LISTENING:
2024                         case BSTP_IFSTATE_DISABLED:
2025                                 continue;
2026                         default:
2027                                 /* FORWARDING, BONDED */
2028                                 break;
2029                         }
2030
2031                         /*
2032                          * XXX we need to use the toepliz hash or
2033                          *     something like that instead of
2034                          *     round-robining.
2035                          */
2036                         if (sc->sc_ifp->if_flags & IFF_LINK2) {
2037                                 dst_if = bif->bif_ifp;
2038                                 if (++bif->bif_bond_count >=
2039                                     bif->bif_bond_weight) {
2040                                         bif->bif_bond_count = 0;
2041                                         TAILQ_REMOVE(&sc->sc_iflists[mycpuid],
2042                                                      bif, bif_next);
2043                                         TAILQ_INSERT_TAIL(
2044                                                      &sc->sc_iflists[mycpuid],
2045                                                      bif, bif_next);
2046                                 }
2047                                 priority = 1;
2048                                 break;
2049                         }
2050
2051                         /*
2052                          * Select best interface in the FORWARDING or
2053                          * BONDED set.  Well, there shouldn't be any
2054                          * in a BONDED state if LINK2 is not set (they
2055                          * will all be in a BLOCKING) state, but there
2056                          * could be a transitory condition here.
2057                          */
2058                         if (bif->bif_priority > priority) {
2059                                 priority = bif->bif_priority;
2060                                 dst_if = bif->bif_ifp;
2061                         }
2062                 }
2063
2064                 /*
2065                  * If no suitable interfaces were found but a suitable
2066                  * alternative interface was found, use the alternative
2067                  * interface.
2068                  */
2069                 if (priority == 0 && alt_if)
2070                         dst_if = alt_if;
2071         }
2072
2073         /*
2074          * At this point, we're dealing with a unicast frame
2075          * going to a different interface.
2076          */
2077         if ((dst_if->if_flags & IFF_RUNNING) == 0)
2078                 dst_if = NULL;
2079         return (dst_if);
2080 }
2081
2082
2083 /*
2084  * bridge_output:
2085  *
2086  *      Send output from a bridge member interface.  This
2087  *      performs the bridging function for locally originated
2088  *      packets.
2089  *
2090  *      The mbuf has the Ethernet header already attached.  We must
2091  *      enqueue or free the mbuf before returning.
2092  */
2093 static int
2094 bridge_output(struct ifnet *ifp, struct mbuf *m)
2095 {
2096         struct bridge_softc *sc = ifp->if_bridge;
2097         struct bridge_iflist *bif, *nbif;
2098         struct ether_header *eh;
2099         struct ifnet *dst_if, *alt_if, *bifp;
2100         int from_us;
2101         int alt_priority;
2102
2103         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
2104         mbuftrackid(m, 65);
2105
2106         /*
2107          * Make sure that we are still a member of a bridge interface.
2108          */
2109         if (sc == NULL) {
2110                 m_freem(m);
2111                 return (0);
2112         }
2113         bifp = sc->sc_ifp;
2114
2115         /*
2116          * Acquire header
2117          */
2118         if (m->m_len < ETHER_HDR_LEN) {
2119                 m = m_pullup(m, ETHER_HDR_LEN);
2120                 if (m == NULL) {
2121                         IFNET_STAT_INC(bifp, oerrors, 1);
2122                         return (0);
2123                 }
2124         }
2125         eh = mtod(m, struct ether_header *);
2126         from_us = bridge_from_us(sc, eh);
2127
2128         /*
2129          * If bridge is down, but the original output interface is up,
2130          * go ahead and send out that interface.  Otherwise, the packet
2131          * is dropped below.
2132          */
2133         if ((bifp->if_flags & IFF_RUNNING) == 0) {
2134                 dst_if = ifp;
2135                 goto sendunicast;
2136         }
2137
2138         /*
2139          * If the packet is a multicast, or we don't know a better way to
2140          * get there, send to all interfaces.
2141          */
2142         if (ETHER_IS_MULTICAST(eh->ether_dhost))
2143                 dst_if = NULL;
2144         else
2145                 dst_if = bridge_rtlookup(sc, eh->ether_dhost);
2146
2147         if (dst_if == NULL) {
2148                 struct mbuf *mc;
2149                 int used = 0;
2150                 int found = 0;
2151
2152                 if (sc->sc_span)
2153                         bridge_span(sc, m);
2154
2155                 alt_if = NULL;
2156                 alt_priority = 0;
2157                 TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid],
2158                                      bif_next, nbif) {
2159                         dst_if = bif->bif_ifp;
2160
2161                         if ((dst_if->if_flags & IFF_RUNNING) == 0)
2162                                 continue;
2163
2164                         /*
2165                          * If this is not the original output interface,
2166                          * and the interface is participating in spanning
2167                          * tree, make sure the port is in a state that
2168                          * allows forwarding.
2169                          *
2170                          * We keep track of a possible backup IF if we are
2171                          * unable to find any interfaces to forward through.
2172                          *
2173                          * NOTE: Currently round-robining is not implemented
2174                          *       across bonded interface groups (needs an
2175                          *       algorithm to track each group somehow).
2176                          *
2177                          *       Similarly we track only one alternative
2178                          *       interface if no suitable interfaces are
2179                          *       found.
2180                          */
2181                         if (dst_if != ifp &&
2182                             (bif->bif_flags & IFBIF_STP) != 0) {
2183                                 switch (bif->bif_state) {
2184                                 case BSTP_IFSTATE_BONDED:
2185                                         if (bif->bif_priority + 512 >
2186                                             alt_priority) {
2187                                                 alt_priority =
2188                                                     bif->bif_priority + 512;
2189                                                 alt_if = bif->bif_ifp;
2190                                         }
2191                                         continue;
2192                                 case BSTP_IFSTATE_BLOCKING:
2193                                         if (bif->bif_priority + 256 >
2194                                             alt_priority) {
2195                                                 alt_priority =
2196                                                     bif->bif_priority + 256;
2197                                                 alt_if = bif->bif_ifp;
2198                                         }
2199                                         continue;
2200                                 case BSTP_IFSTATE_LEARNING:
2201                                         if (bif->bif_priority > alt_priority) {
2202                                                 alt_priority =
2203                                                     bif->bif_priority;
2204                                                 alt_if = bif->bif_ifp;
2205                                         }
2206                                         continue;
2207                                 case BSTP_IFSTATE_L1BLOCKING:
2208                                 case BSTP_IFSTATE_LISTENING:
2209                                 case BSTP_IFSTATE_DISABLED:
2210                                         continue;
2211                                 default:
2212                                         /* FORWARDING */
2213                                         break;
2214                                 }
2215                         }
2216
2217                         KKASSERT(used == 0);
2218                         if (TAILQ_NEXT(bif, bif_next) == NULL) {
2219                                 used = 1;
2220                                 mc = m;
2221                         } else {
2222                                 mc = m_copypacket(m, M_NOWAIT);
2223                                 if (mc == NULL) {
2224                                         IFNET_STAT_INC(bifp, oerrors, 1);
2225                                         continue;
2226                                 }
2227                         }
2228
2229                         /*
2230                          * If the packet is 'from' us override ether_shost.
2231                          */
2232                         bridge_handoff(sc, dst_if, mc, from_us);
2233                         found = 1;
2234
2235                         if (nbif != NULL && !nbif->bif_onlist) {
2236                                 KKASSERT(bif->bif_onlist);
2237                                 nbif = TAILQ_NEXT(bif, bif_next);
2238                         }
2239                 }
2240
2241                 /*
2242                  * If we couldn't find anything use the backup interface
2243                  * if we have one.
2244                  */
2245                 if (found == 0 && alt_if) {
2246                         KKASSERT(used == 0);
2247                         mc = m;
2248                         used = 1;
2249                         bridge_handoff(sc, alt_if, mc, from_us);
2250                 }
2251
2252                 if (used == 0)
2253                         m_freem(m);
2254                 return (0);
2255         }
2256
2257         /*
2258          * Unicast
2259          */
2260 sendunicast:
2261         dst_if = bridge_select_unicast(sc, dst_if, 0, m);
2262
2263         if (sc->sc_span)
2264                 bridge_span(sc, m);
2265         if (dst_if == NULL)
2266                 m_freem(m);
2267         else
2268                 bridge_handoff(sc, dst_if, m, from_us);
2269         return (0);
2270 }
2271
2272 /*
2273  * Returns the bridge interface associated with an ifc.
2274  * Pass ifp->if_bridge (must not be NULL).  Used by the ARP
2275  * code to supply the bridge for the is-at info, making
2276  * the bridge responsible for matching local addresses.
2277  *
2278  * Without this the ARP code will supply bridge member interfaces
2279  * for the is-at which makes it difficult the bridge to fail-over
2280  * interfaces (amoung other things).
2281  */
2282 static struct ifnet *
2283 bridge_interface(void *if_bridge)
2284 {
2285         struct bridge_softc *sc = if_bridge;
2286         return (sc->sc_ifp);
2287 }
2288
2289 /*
2290  * bridge_start:
2291  *
2292  *      Start output on a bridge.
2293  */
2294 static void
2295 bridge_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
2296 {
2297         struct bridge_softc *sc = ifp->if_softc;
2298
2299         ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq);
2300         ASSERT_ALTQ_SQ_SERIALIZED_HW(ifsq);
2301
2302         ifsq_set_oactive(ifsq);
2303         for (;;) {
2304                 struct ifnet *dst_if = NULL;
2305                 struct ether_header *eh;
2306                 struct mbuf *m;
2307
2308                 m = ifsq_dequeue(ifsq);
2309                 if (m == NULL)
2310                         break;
2311                 mbuftrackid(m, 75);
2312
2313                 if (m->m_len < sizeof(*eh)) {
2314                         m = m_pullup(m, sizeof(*eh));
2315                         if (m == NULL) {
2316                                 IFNET_STAT_INC(ifp, oerrors, 1);
2317                                 continue;
2318                         }
2319                 }
2320                 eh = mtod(m, struct ether_header *);
2321
2322                 BPF_MTAP(ifp, m);
2323                 IFNET_STAT_INC(ifp, opackets, 1);
2324
2325                 if ((m->m_flags & (M_BCAST|M_MCAST)) == 0)
2326                         dst_if = bridge_rtlookup(sc, eh->ether_dhost);
2327
2328                 /*
2329                  * Multicast or broadcast
2330                  */
2331                 if (dst_if == NULL) {
2332                         bridge_start_bcast(sc, m);
2333                         continue;
2334                 }
2335
2336                 /*
2337                  * Unicast
2338                  */
2339                 dst_if = bridge_select_unicast(sc, dst_if, 0, m);
2340
2341                 if (dst_if == NULL)
2342                         m_freem(m);
2343                 else
2344                         bridge_enqueue(dst_if, m);
2345         }
2346         ifsq_clr_oactive(ifsq);
2347 }
2348
2349 /*
2350  * bridge_forward:
2351  *
2352  *      Forward packets received on a bridge interface via the input
2353  *      path.
2354  *
2355  *      This implements the forwarding function of the bridge.
2356  */
2357 static void
2358 bridge_forward(struct bridge_softc *sc, struct mbuf *m)
2359 {
2360         struct bridge_iflist *bif;
2361         struct ifnet *src_if, *dst_if, *ifp;
2362         struct ether_header *eh;
2363         int from_blocking;
2364
2365         mbuftrackid(m, 66);
2366         src_if = m->m_pkthdr.rcvif;
2367         ifp = sc->sc_ifp;
2368
2369         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
2370
2371         /*
2372          * packet coming in on the bridge is also going out on the bridge,
2373          * but ether code won't adjust output stats for the bridge because
2374          * we are changing the interface to something else.
2375          */
2376         IFNET_STAT_INC(ifp, opackets, 1);
2377         IFNET_STAT_INC(ifp, obytes, m->m_pkthdr.len);
2378
2379         /*
2380          * Look up the bridge_iflist.
2381          */
2382         bif = bridge_lookup_member_if(sc, src_if);
2383         if (bif == NULL) {
2384                 /* Interface is not a bridge member (anymore?) */
2385                 m_freem(m);
2386                 return;
2387         }
2388
2389         /*
2390          * In spanning tree mode receiving a packet from an interface
2391          * in a BLOCKING state is allowed, it could be a member of last
2392          * resort from the sender's point of view, but forwarding it is
2393          * not allowed.
2394          *
2395          * The sender's spanning tree will eventually sync up and the
2396          * sender will go into a BLOCKING state too (but this still may be
2397          * an interface of last resort during state changes).
2398          */
2399         if (bif->bif_flags & IFBIF_STP) {
2400                 switch (bif->bif_state) {
2401                 case BSTP_IFSTATE_L1BLOCKING:
2402                 case BSTP_IFSTATE_LISTENING:
2403                 case BSTP_IFSTATE_DISABLED:
2404                         m_freem(m);
2405                         return;
2406                 default:
2407                         /* learning, blocking, bonded, forwarding */
2408                         break;
2409                 }
2410                 from_blocking = (bif->bif_state == BSTP_IFSTATE_BLOCKING);
2411         } else {
2412                 from_blocking = 0;
2413         }
2414
2415         eh = mtod(m, struct ether_header *);
2416
2417         /*
2418          * If the interface is learning, and the source
2419          * address is valid and not multicast, record
2420          * the address.
2421          */
2422         if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
2423             from_blocking == 0 &&
2424             ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
2425             (eh->ether_shost[0] == 0 &&
2426              eh->ether_shost[1] == 0 &&
2427              eh->ether_shost[2] == 0 &&
2428              eh->ether_shost[3] == 0 &&
2429              eh->ether_shost[4] == 0 &&
2430              eh->ether_shost[5] == 0) == 0) {
2431                 bridge_rtupdate(sc, eh->ether_shost, src_if, IFBAF_DYNAMIC);
2432         }
2433
2434         /*
2435          * Don't forward from an interface in the listening or learning
2436          * state.  That is, in the learning state we learn information
2437          * but we throw away the packets.
2438          *
2439          * We let through packets on interfaces in the blocking state.
2440          * The blocking state is applicable to the send side, not the
2441          * receive side.
2442          */
2443         if ((bif->bif_flags & IFBIF_STP) != 0 &&
2444             (bif->bif_state == BSTP_IFSTATE_LISTENING ||
2445              bif->bif_state == BSTP_IFSTATE_LEARNING)) {
2446                 m_freem(m);
2447                 return;
2448         }
2449
2450         /*
2451          * At this point, the port either doesn't participate
2452          * in spanning tree or it is in the forwarding state.
2453          */
2454
2455         /*
2456          * If the packet is unicast, destined for someone on
2457          * "this" side of the bridge, drop it.
2458          *
2459          * src_if implies the entire bonding set so we have to compare MAC
2460          * addresses and not just if pointers.
2461          */
2462         if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
2463                 dst_if = bridge_rtlookup(sc, eh->ether_dhost);
2464                 if (dst_if && memcmp(IF_LLADDR(src_if), IF_LLADDR(dst_if),
2465                                      ETHER_ADDR_LEN) == 0) {
2466                         m_freem(m);
2467                         return;
2468                 }
2469         } else {
2470                 /* ...forward it to all interfaces. */
2471                 IFNET_STAT_INC(ifp, imcasts, 1);
2472                 dst_if = NULL;
2473         }
2474
2475         /*
2476          * Brodcast if we do not have forwarding information.  However, if
2477          * we received the packet on a blocking interface we do not do this
2478          * (unless you really want to blow up your network).
2479          */
2480         if (dst_if == NULL) {
2481                 if (from_blocking)
2482                         m_freem(m);
2483                 else
2484                         bridge_broadcast(sc, src_if, m);
2485                 return;
2486         }
2487
2488         dst_if = bridge_select_unicast(sc, dst_if, from_blocking, m);
2489
2490         if (dst_if == NULL) {
2491                 m_freem(m);
2492                 return;
2493         }
2494
2495         if (inet_pfil_hook.ph_hashooks > 0
2496 #ifdef INET6
2497             || inet6_pfil_hook.ph_hashooks > 0
2498 #endif
2499             ) {
2500                 if (bridge_pfil(&m, ifp, src_if, PFIL_IN) != 0)
2501                         return;
2502                 if (m == NULL)
2503                         return;
2504
2505                 if (bridge_pfil(&m, ifp, dst_if, PFIL_OUT) != 0)
2506                         return;
2507                 if (m == NULL)
2508                         return;
2509         }
2510         bridge_handoff(sc, dst_if, m, 0);
2511 }
2512
2513 /*
2514  * bridge_input:
2515  *
2516  *      Receive input from a member interface.  Queue the packet for
2517  *      bridging if it is not for us.
2518  */
2519 static struct mbuf *
2520 bridge_input(struct ifnet *ifp, struct mbuf *m)
2521 {
2522         struct bridge_softc *sc = ifp->if_bridge;
2523         struct bridge_iflist *bif;
2524         struct ifnet *bifp, *new_ifp;
2525         struct ether_header *eh;
2526         struct mbuf *mc, *mc2;
2527
2528         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
2529         mbuftrackid(m, 67);
2530
2531         /*
2532          * Make sure that we are still a member of a bridge interface.
2533          */
2534         if (sc == NULL)
2535                 return m;
2536
2537         new_ifp = NULL;
2538         bifp = sc->sc_ifp;
2539
2540         if ((bifp->if_flags & IFF_RUNNING) == 0)
2541                 goto out;
2542
2543         /*
2544          * Implement support for bridge monitoring.  If this flag has been
2545          * set on this interface, discard the packet once we push it through
2546          * the bpf(4) machinery, but before we do, increment various counters
2547          * associated with this bridge.
2548          */
2549         if (bifp->if_flags & IFF_MONITOR) {
2550                 /*
2551                  * Change input interface to this bridge
2552                  *
2553                  * Update bridge's ifnet statistics
2554                  */
2555                 m->m_pkthdr.rcvif = bifp;
2556
2557                 BPF_MTAP(bifp, m);
2558                 IFNET_STAT_INC(bifp, ipackets, 1);
2559                 IFNET_STAT_INC(bifp, ibytes, m->m_pkthdr.len);
2560                 if (m->m_flags & (M_MCAST | M_BCAST))
2561                         IFNET_STAT_INC(bifp, imcasts, 1);
2562
2563                 m_freem(m);
2564                 m = NULL;
2565                 goto out;
2566         }
2567
2568         /*
2569          * Handle the ether_header
2570          *
2571          * In all cases if the packet is destined for us via our MAC
2572          * we must clear BRIDGE_MBUF_TAGGED to ensure that we don't
2573          * repeat the source MAC out the same interface.
2574          *
2575          * This first test against our bridge MAC is the fast-path.
2576          *
2577          * NOTE!  The bridge interface can serve as an endpoint for
2578          *        communication but normally there are no IPs associated
2579          *        with it so you cannot route through it.  Instead what
2580          *        you do is point your default route *THROUGH* the bridge
2581          *        to the actual default router for one of the bridged spaces.
2582          *
2583          *        Another possibility is to put all your IP specifications
2584          *        on the bridge instead of on the individual interfaces.  If
2585          *        you do this it should be possible to use the bridge as an
2586          *        end point and route (rather than switch) through it using
2587          *        the default route or ipfw forwarding rules.
2588          */
2589
2590         /*
2591          * Acquire header
2592          */
2593         if (m->m_len < ETHER_HDR_LEN) {
2594                 m = m_pullup(m, ETHER_HDR_LEN);
2595                 if (m == NULL)
2596                         goto out;
2597         }
2598         eh = mtod(m, struct ether_header *);
2599         m->m_pkthdr.fw_flags |= BRIDGE_MBUF_TAGGED;
2600         bcopy(eh->ether_shost, m->m_pkthdr.ether_br_shost, ETHER_ADDR_LEN);
2601
2602         if ((bridge_debug & 1) &&
2603             (ntohs(eh->ether_type) == ETHERTYPE_ARP ||
2604             ntohs(eh->ether_type) == ETHERTYPE_REVARP)) {
2605                 kprintf("%02x:%02x:%02x:%02x:%02x:%02x "
2606                         "%02x:%02x:%02x:%02x:%02x:%02x type %04x "
2607                         "lla %02x:%02x:%02x:%02x:%02x:%02x\n",
2608                         eh->ether_dhost[0],
2609                         eh->ether_dhost[1],
2610                         eh->ether_dhost[2],
2611                         eh->ether_dhost[3],
2612                         eh->ether_dhost[4],
2613                         eh->ether_dhost[5],
2614                         eh->ether_shost[0],
2615                         eh->ether_shost[1],
2616                         eh->ether_shost[2],
2617                         eh->ether_shost[3],
2618                         eh->ether_shost[4],
2619                         eh->ether_shost[5],
2620                         eh->ether_type,
2621                         ((u_char *)IF_LLADDR(bifp))[0],
2622                         ((u_char *)IF_LLADDR(bifp))[1],
2623                         ((u_char *)IF_LLADDR(bifp))[2],
2624                         ((u_char *)IF_LLADDR(bifp))[3],
2625                         ((u_char *)IF_LLADDR(bifp))[4],
2626                         ((u_char *)IF_LLADDR(bifp))[5]
2627                 );
2628         }
2629
2630         /*
2631          * If the packet is for us, set the packets source as the
2632          * bridge, and return the packet back to ifnet.if_input for
2633          * local processing.
2634          */
2635         if (memcmp(eh->ether_dhost, IF_LLADDR(bifp), ETHER_ADDR_LEN) == 0) {
2636                 /*
2637                  * We must still record the source interface in our
2638                  * addr cache, otherwise our bridge won't know where
2639                  * to send responses and will broadcast them.
2640                  */
2641                 bif = bridge_lookup_member_if(sc, ifp);
2642                 if ((bif->bif_flags & IFBIF_LEARNING) &&
2643                     ((bif->bif_flags & IFBIF_STP) == 0 ||
2644                      bif->bif_state != BSTP_IFSTATE_BLOCKING)) {
2645                         bridge_rtupdate(sc, eh->ether_shost,
2646                                         ifp, IFBAF_DYNAMIC);
2647                 }
2648
2649                 /*
2650                  * Perform pfil hooks.
2651                  */
2652                 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
2653                 KASSERT(bifp->if_bridge == NULL,
2654                         ("loop created in bridge_input"));
2655                 if (pfil_member != 0) {
2656                         if (inet_pfil_hook.ph_hashooks > 0
2657 #ifdef INET6
2658                             || inet6_pfil_hook.ph_hashooks > 0
2659 #endif
2660                         ) {
2661                                 if (bridge_pfil(&m, NULL, ifp, PFIL_IN) != 0)
2662                                         goto out;
2663                                 if (m == NULL)
2664                                         goto out;
2665                         }
2666                 }
2667
2668                 /*
2669                  * Set new_ifp and skip to the end.  This will trigger code
2670                  * to reinput the packet and run it into our stack.
2671                  */
2672                 new_ifp = bifp;
2673                 goto out;
2674         }
2675
2676         /*
2677          * Tap all packets arriving on the bridge, no matter if
2678          * they are local destinations or not.  In is in.
2679          *
2680          * Update bridge's ifnet statistics
2681          */
2682         BPF_MTAP(bifp, m);
2683         IFNET_STAT_INC(bifp, ipackets, 1);
2684         IFNET_STAT_INC(bifp, ibytes, m->m_pkthdr.len);
2685         if (m->m_flags & (M_MCAST | M_BCAST))
2686                 IFNET_STAT_INC(bifp, imcasts, 1);
2687
2688         bif = bridge_lookup_member_if(sc, ifp);
2689         if (bif == NULL)
2690                 goto out;
2691
2692         if (sc->sc_span)
2693                 bridge_span(sc, m);
2694
2695         if (m->m_flags & (M_BCAST | M_MCAST)) {
2696                 /*
2697                  * Tap off 802.1D packets; they do not get forwarded.
2698                  */
2699                 if (memcmp(eh->ether_dhost, bstp_etheraddr,
2700                             ETHER_ADDR_LEN) == 0) {
2701                         ifnet_serialize_all(bifp);
2702                         bstp_input(sc, bif, m);
2703                         ifnet_deserialize_all(bifp);
2704
2705                         /* m is freed by bstp_input */
2706                         m = NULL;
2707                         goto out;
2708                 }
2709
2710                 /*
2711                  * Other than 802.11d packets, ignore packets if the
2712                  * interface is not in a good state.
2713                  *
2714                  * NOTE: Broadcast/mcast packets received on a blocking or
2715                  *       learning interface are allowed for local processing.
2716                  *
2717                  *       The sending side of a blocked port will stop
2718                  *       transmitting when a better alternative is found.
2719                  *       However, later on we will disallow the forwarding
2720                  *       of bcast/mcsat packets over a blocking interface.
2721                  */
2722                 if (bif->bif_flags & IFBIF_STP) {
2723                         switch (bif->bif_state) {
2724                         case BSTP_IFSTATE_L1BLOCKING:
2725                         case BSTP_IFSTATE_LISTENING:
2726                         case BSTP_IFSTATE_DISABLED:
2727                                 goto out;
2728                         default:
2729                                 /* blocking, learning, bonded, forwarding */
2730                                 break;
2731                         }
2732                 }
2733
2734                 /*
2735                  * Make a deep copy of the packet and enqueue the copy
2736                  * for bridge processing; return the original packet for
2737                  * local processing.
2738                  */
2739                 mc = m_dup(m, M_NOWAIT);
2740                 if (mc == NULL)
2741                         goto out;
2742
2743                 /*
2744                  * It's just too dangerous to allow bcast/mcast over a
2745                  * blocked interface, eventually the network will sort
2746                  * itself out and a better path will be found.
2747                  */
2748                 if ((bif->bif_flags & IFBIF_STP) == 0 ||
2749                     bif->bif_state != BSTP_IFSTATE_BLOCKING) {
2750                         bridge_forward(sc, mc);
2751                 }
2752
2753                 /*
2754                  * Reinject the mbuf as arriving on the bridge so we have a
2755                  * chance at claiming multicast packets. We can not loop back
2756                  * here from ether_input as a bridge is never a member of a
2757                  * bridge.
2758                  */
2759                 KASSERT(bifp->if_bridge == NULL,
2760                         ("loop created in bridge_input"));
2761                 mc2 = m_dup(m, M_NOWAIT);
2762 #ifdef notyet
2763                 if (mc2 != NULL) {
2764                         /* Keep the layer3 header aligned */
2765                         int i = min(mc2->m_pkthdr.len, max_protohdr);
2766                         mc2 = m_copyup(mc2, i, ETHER_ALIGN);
2767                 }
2768 #endif
2769                 if (mc2 != NULL) {
2770                         /*
2771                          * Don't tap to bpf(4) again; we have already done
2772                          * the tapping.
2773                          *
2774                          * Leave m_pkthdr.rcvif alone, so ARP replies are
2775                          * processed as coming in on the correct interface.
2776                          *
2777                          * Clear the bridge flag for local processing in
2778                          * case the packet gets routed.
2779                          */
2780                         mc2->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
2781                         ether_reinput_oncpu(bifp, mc2, 0);
2782                 }
2783
2784                 /* Return the original packet for local processing. */
2785                 goto out;
2786         }
2787
2788         /*
2789          * Input of a unicast packet.  We have to allow unicast packets
2790          * input from links in the BLOCKING state as this might be an
2791          * interface of last resort.
2792          *
2793          * NOTE: We explicitly ignore normal packets received on a link
2794          *       in the BLOCKING state.  The point of being in that state
2795          *       is to avoid getting duplicate packets.
2796          *
2797          *       HOWEVER, if LINK2 is set the normal spanning tree code
2798          *       will mark an interface BLOCKING to avoid multi-cast/broadcast
2799          *       loops.  Unicast packets CAN still loop if we allow the
2800          *       case (hence we only do it in LINK2), but it isn't quite as
2801          *       bad as a broadcast packet looping.
2802          */
2803         if (bif->bif_flags & IFBIF_STP) {
2804                 switch (bif->bif_state) {
2805                 case BSTP_IFSTATE_L1BLOCKING:
2806                 case BSTP_IFSTATE_LISTENING:
2807                 case BSTP_IFSTATE_DISABLED:
2808                         goto out;
2809                 default:
2810                         /* blocking, bonded, forwarding, learning */
2811                         break;
2812                 }
2813         }
2814
2815         /*
2816          * Unicast.  Make sure it's not for us.
2817          *
2818          * This loop is MPSAFE; the only blocking operation (bridge_rtupdate)
2819          * is followed by breaking out of the loop.
2820          */
2821         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
2822                 if (bif->bif_ifp->if_type != IFT_ETHER)
2823                         continue;
2824
2825                 /*
2826                  * It is destined for an interface linked to the bridge.
2827                  * We want the bridge itself to take care of link level
2828                  * forwarding to member interfaces so reinput on the bridge.
2829                  * i.e. if you ping an IP on a target interface associated
2830                  * with the bridge, the arp is-at response should indicate
2831                  * the bridge MAC.
2832                  *
2833                  * Only update our addr list when learning if the port
2834                  * is not in a blocking state.  If it is we still allow
2835                  * the packet but we do not try to learn from it.
2836                  */
2837                 if (memcmp(IF_LLADDR(bif->bif_ifp), eh->ether_dhost,
2838                            ETHER_ADDR_LEN) == 0) {
2839                         if (bif->bif_ifp != ifp) {
2840                                 /* XXX loop prevention */
2841                                 m->m_flags |= M_ETHER_BRIDGED;
2842                         }
2843                         if ((bif->bif_flags & IFBIF_LEARNING) &&
2844                             ((bif->bif_flags & IFBIF_STP) == 0 ||
2845                              bif->bif_state != BSTP_IFSTATE_BLOCKING)) {
2846                                 bridge_rtupdate(sc, eh->ether_shost,
2847                                                 ifp, IFBAF_DYNAMIC);
2848                         }
2849                         new_ifp = bifp; /* not bif->bif_ifp */
2850                         m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
2851                         goto out;
2852                 }
2853
2854                 /*
2855                  * Ignore received packets that were sent by us.
2856                  */
2857                 if (memcmp(IF_LLADDR(bif->bif_ifp), eh->ether_shost,
2858                            ETHER_ADDR_LEN) == 0) {
2859                         m_freem(m);
2860                         m = NULL;
2861                         goto out;
2862                 }
2863         }
2864
2865         /*
2866          * It isn't for us.
2867          *
2868          * Perform the bridge forwarding function, but disallow bridging
2869          * to interfaces in the blocking state if the packet came in on
2870          * an interface in the blocking state.
2871          *
2872          * (bridge_forward also updates the addr cache).
2873          */
2874         bridge_forward(sc, m);
2875         m = NULL;
2876
2877         /*
2878          * ether_reinput_oncpu() will reprocess rcvif as
2879          * coming from new_ifp (since we do not specify
2880          * REINPUT_KEEPRCVIF).
2881          */
2882 out:
2883         if (new_ifp != NULL) {
2884                 /*
2885                  * Clear the bridge flag for local processing in
2886                  * case the packet gets routed.
2887                  */
2888                 ether_reinput_oncpu(new_ifp, m, REINPUT_RUNBPF);
2889                 m = NULL;
2890         }
2891         return (m);
2892 }
2893
2894 /*
2895  * bridge_start_bcast:
2896  *
2897  *      Broadcast the packet sent from bridge to all member
2898  *      interfaces.
2899  *      This is a simplified version of bridge_broadcast(), however,
2900  *      this function expects caller to hold bridge's serializer.
2901  */
2902 static void
2903 bridge_start_bcast(struct bridge_softc *sc, struct mbuf *m)
2904 {
2905         struct bridge_iflist *bif;
2906         struct mbuf *mc;
2907         struct ifnet *dst_if, *alt_if, *bifp;
2908         int used = 0;
2909         int found = 0;
2910         int alt_priority;
2911
2912         mbuftrackid(m, 68);
2913         bifp = sc->sc_ifp;
2914         ASSERT_IFNET_SERIALIZED_ALL(bifp);
2915
2916         /*
2917          * Following loop is MPSAFE; nothing is blocking
2918          * in the loop body.
2919          *
2920          * NOTE: We transmit through an member in the BLOCKING state only
2921          *       as a last resort.
2922          */
2923         alt_if = NULL;
2924         alt_priority = 0;
2925
2926         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
2927                 dst_if = bif->bif_ifp;
2928
2929                 if (bif->bif_flags & IFBIF_STP) {
2930                         switch (bif->bif_state) {
2931                         case BSTP_IFSTATE_BLOCKING:
2932                                 if (bif->bif_priority > alt_priority) {
2933                                         alt_priority = bif->bif_priority;
2934                                         alt_if = bif->bif_ifp;
2935                                 }
2936                                 /* fall through */
2937                         case BSTP_IFSTATE_L1BLOCKING:
2938                         case BSTP_IFSTATE_DISABLED:
2939                                 continue;
2940                         default:
2941                                 /* listening, learning, bonded, forwarding */
2942                                 break;
2943                         }
2944                 }
2945
2946                 if ((bif->bif_flags & IFBIF_DISCOVER) == 0 &&
2947                     (m->m_flags & (M_BCAST|M_MCAST)) == 0)
2948                         continue;
2949
2950                 if ((dst_if->if_flags & IFF_RUNNING) == 0)
2951                         continue;
2952
2953                 if (TAILQ_NEXT(bif, bif_next) == NULL) {
2954                         mc = m;
2955                         used = 1;
2956                 } else {
2957                         mc = m_copypacket(m, M_NOWAIT);
2958                         if (mc == NULL) {
2959                                 IFNET_STAT_INC(bifp, oerrors, 1);
2960                                 continue;
2961                         }
2962                 }
2963                 found = 1;
2964                 bridge_enqueue(dst_if, mc);
2965         }
2966
2967         if (found == 0 && alt_if) {
2968                 KKASSERT(used == 0);
2969                 mc = m;
2970                 used = 1;
2971                 bridge_enqueue(alt_if, mc);
2972         }
2973
2974         if (used == 0)
2975                 m_freem(m);
2976 }
2977
2978 /*
2979  * bridge_broadcast:
2980  *
2981  *      Send a frame to all interfaces that are members of
2982  *      the bridge, except for the one on which the packet
2983  *      arrived.
2984  */
2985 static void
2986 bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
2987                  struct mbuf *m)
2988 {
2989         struct bridge_iflist *bif, *nbif;
2990         struct ether_header *eh;
2991         struct mbuf *mc;
2992         struct ifnet *dst_if, *alt_if, *bifp;
2993         int used;
2994         int found;
2995         int alt_priority;
2996         int from_us;
2997
2998         mbuftrackid(m, 69);
2999         bifp = sc->sc_ifp;
3000         ASSERT_IFNET_NOT_SERIALIZED_ALL(bifp);
3001
3002         eh = mtod(m, struct ether_header *);
3003         from_us = bridge_from_us(sc, eh);
3004
3005         if (inet_pfil_hook.ph_hashooks > 0
3006 #ifdef INET6
3007             || inet6_pfil_hook.ph_hashooks > 0
3008 #endif
3009             ) {
3010                 if (bridge_pfil(&m, bifp, src_if, PFIL_IN) != 0)
3011                         return;
3012                 if (m == NULL)
3013                         return;
3014
3015                 /* Filter on the bridge interface before broadcasting */
3016                 if (bridge_pfil(&m, bifp, NULL, PFIL_OUT) != 0)
3017                         return;
3018                 if (m == NULL)
3019                         return;
3020         }
3021
3022         alt_if = NULL;
3023         alt_priority = 0;
3024         found = 0;
3025         used = 0;
3026
3027         TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid], bif_next, nbif) {
3028                 dst_if = bif->bif_ifp;
3029
3030                 if ((dst_if->if_flags & IFF_RUNNING) == 0)
3031                         continue;
3032
3033                 /*
3034                  * Don't bounce the packet out the same interface it came
3035                  * in on.  We have to test MAC addresses because a packet
3036                  * can come in a bonded interface and we don't want it to
3037                  * be echod out the forwarding interface for the same bonding
3038                  * set.
3039                  */
3040                 if (src_if && memcmp(IF_LLADDR(src_if), IF_LLADDR(dst_if),
3041                                      ETHER_ADDR_LEN) == 0) {
3042                         continue;
3043                 }
3044
3045                 /*
3046                  * Generally speaking we only broadcast through forwarding
3047                  * interfaces.  If no interfaces are available we select
3048                  * a BONDED, BLOCKING, or LEARNING interface to forward
3049                  * through.
3050                  */
3051                 if (bif->bif_flags & IFBIF_STP) {
3052                         switch (bif->bif_state) {
3053                         case BSTP_IFSTATE_BONDED:
3054                                 if (bif->bif_priority + 512 > alt_priority) {
3055                                         alt_priority = bif->bif_priority + 512;
3056                                         alt_if = bif->bif_ifp;
3057                                 }
3058                                 continue;
3059                         case BSTP_IFSTATE_BLOCKING:
3060                                 if (bif->bif_priority + 256 > alt_priority) {
3061                                         alt_priority = bif->bif_priority + 256;
3062                                         alt_if = bif->bif_ifp;
3063                                 }
3064                                 continue;
3065                         case BSTP_IFSTATE_LEARNING:
3066                                 if (bif->bif_priority > alt_priority) {
3067                                         alt_priority = bif->bif_priority;
3068                                         alt_if = bif->bif_ifp;
3069                                 }
3070                                 continue;
3071                         case BSTP_IFSTATE_L1BLOCKING:
3072                         case BSTP_IFSTATE_DISABLED:
3073                         case BSTP_IFSTATE_LISTENING:
3074                                 continue;
3075                         default:
3076                                 /* forwarding */
3077                                 break;
3078                         }
3079                 }
3080
3081                 if ((bif->bif_flags & IFBIF_DISCOVER) == 0 &&
3082                     (m->m_flags & (M_BCAST|M_MCAST)) == 0) {
3083                         continue;
3084                 }
3085
3086                 if (TAILQ_NEXT(bif, bif_next) == NULL) {
3087                         mc = m;
3088                         used = 1;
3089                 } else {
3090                         mc = m_copypacket(m, M_NOWAIT);
3091                         if (mc == NULL) {
3092                                 IFNET_STAT_INC(sc->sc_ifp, oerrors, 1);
3093                                 continue;
3094                         }
3095                 }
3096                 found = 1;
3097
3098                 /*
3099                  * Filter on the output interface.  Pass a NULL bridge
3100                  * interface pointer so we do not redundantly filter on
3101                  * the bridge for each interface we broadcast on.
3102                  */
3103                 if (inet_pfil_hook.ph_hashooks > 0
3104 #ifdef INET6
3105                     || inet6_pfil_hook.ph_hashooks > 0
3106 #endif
3107                     ) {
3108                         if (bridge_pfil(&mc, NULL, dst_if, PFIL_OUT) != 0)
3109                                 continue;
3110                         if (mc == NULL)
3111                                 continue;
3112                 }
3113                 bridge_handoff(sc, dst_if, mc, from_us);
3114
3115                 if (nbif != NULL && !nbif->bif_onlist) {
3116                         KKASSERT(bif->bif_onlist);
3117                         nbif = TAILQ_NEXT(bif, bif_next);
3118                 }
3119         }
3120
3121         if (found == 0 && alt_if) {
3122                 KKASSERT(used == 0);
3123                 mc = m;
3124                 used = 1;
3125                 bridge_enqueue(alt_if, mc);
3126         }
3127
3128         if (used == 0)
3129                 m_freem(m);
3130 }
3131
3132 /*
3133  * bridge_span:
3134  *
3135  *      Duplicate a packet out one or more interfaces that are in span mode,
3136  *      the original mbuf is unmodified.
3137  */
3138 static void
3139 bridge_span(struct bridge_softc *sc, struct mbuf *m)
3140 {
3141         struct bridge_iflist *bif;
3142         struct ifnet *dst_if, *bifp;
3143         struct mbuf *mc;
3144
3145         mbuftrackid(m, 70);
3146         bifp = sc->sc_ifp;
3147         ifnet_serialize_all(bifp);
3148
3149         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
3150                 dst_if = bif->bif_ifp;
3151
3152                 if ((dst_if->if_flags & IFF_RUNNING) == 0)
3153                         continue;
3154
3155                 mc = m_copypacket(m, M_NOWAIT);
3156                 if (mc == NULL) {
3157                         IFNET_STAT_INC(sc->sc_ifp, oerrors, 1);
3158                         continue;
3159                 }
3160                 bridge_enqueue(dst_if, mc);
3161         }
3162
3163         ifnet_deserialize_all(bifp);
3164 }
3165
3166 static void
3167 bridge_rtmsg_sync_handler(netmsg_t msg)
3168 {
3169         ifnet_forwardmsg(&msg->lmsg, mycpuid + 1);
3170 }
3171
3172 static void
3173 bridge_rtmsg_sync(struct bridge_softc *sc)
3174 {
3175         struct netmsg_base msg;
3176
3177         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3178
3179         netmsg_init(&msg, NULL, &curthread->td_msgport,
3180                     0, bridge_rtmsg_sync_handler);
3181         ifnet_domsg(&msg.lmsg, 0);
3182 }
3183
3184 static __inline void
3185 bridge_rtinfo_update(struct bridge_rtinfo *bri, struct ifnet *dst_if,
3186                      int setflags, uint8_t flags, uint32_t timeo)
3187 {
3188         if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
3189             bri->bri_ifp != dst_if)
3190                 bri->bri_ifp = dst_if;
3191         if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
3192             bri->bri_expire != time_uptime + timeo)
3193                 bri->bri_expire = time_uptime + timeo;
3194         if (setflags)
3195                 bri->bri_flags = flags;
3196 }
3197
3198 static int
3199 bridge_rtinstall_oncpu(struct bridge_softc *sc, const uint8_t *dst,
3200                        struct ifnet *dst_if, int setflags, uint8_t flags,
3201                        struct bridge_rtinfo **bri0)
3202 {
3203         struct bridge_rtnode *brt;
3204         struct bridge_rtinfo *bri;
3205
3206         if (mycpuid == 0) {
3207                 brt = bridge_rtnode_lookup(sc, dst);
3208                 if (brt != NULL) {
3209                         /*
3210                          * rtnode for 'dst' already exists.  We inform the
3211                          * caller about this by leaving bri0 as NULL.  The
3212                          * caller will terminate the intallation upon getting
3213                          * NULL bri0.  However, we still need to update the
3214                          * rtinfo.
3215                          */
3216                         KKASSERT(*bri0 == NULL);
3217
3218                         /* Update rtinfo */
3219                         bridge_rtinfo_update(brt->brt_info, dst_if, setflags,
3220                                              flags, sc->sc_brttimeout);
3221                         return 0;
3222                 }
3223
3224                 /*
3225                  * We only need to check brtcnt on CPU0, since if limit
3226                  * is to be exceeded, ENOSPC is returned.  Caller knows
3227                  * this and will terminate the installation.
3228                  */
3229                 if (sc->sc_brtcnt >= sc->sc_brtmax)
3230                         return ENOSPC;
3231
3232                 KKASSERT(*bri0 == NULL);
3233                 bri = kmalloc(sizeof(struct bridge_rtinfo), M_DEVBUF,
3234                                   M_WAITOK | M_ZERO);
3235                 *bri0 = bri;
3236
3237                 /* Setup rtinfo */
3238                 bri->bri_flags = IFBAF_DYNAMIC;
3239                 bridge_rtinfo_update(bri, dst_if, setflags, flags,
3240                                      sc->sc_brttimeout);
3241         } else {
3242                 bri = *bri0;
3243                 KKASSERT(bri != NULL);
3244         }
3245
3246         brt = kmalloc(sizeof(struct bridge_rtnode), M_DEVBUF,
3247                       M_WAITOK | M_ZERO);
3248         memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
3249         brt->brt_info = bri;
3250
3251         bridge_rtnode_insert(sc, brt);
3252         return 0;
3253 }
3254
3255 static void
3256 bridge_rtinstall_handler(netmsg_t msg)
3257 {
3258         struct netmsg_brsaddr *brmsg = (struct netmsg_brsaddr *)msg;
3259         int error;
3260
3261         error = bridge_rtinstall_oncpu(brmsg->br_softc,
3262                                        brmsg->br_dst, brmsg->br_dst_if,
3263                                        brmsg->br_setflags, brmsg->br_flags,
3264                                        &brmsg->br_rtinfo);
3265         if (error) {
3266                 KKASSERT(mycpuid == 0 && brmsg->br_rtinfo == NULL);
3267                 lwkt_replymsg(&brmsg->base.lmsg, error);
3268                 return;
3269         } else if (brmsg->br_rtinfo == NULL) {
3270                 /* rtnode already exists for 'dst' */
3271                 KKASSERT(mycpuid == 0);
3272                 lwkt_replymsg(&brmsg->base.lmsg, 0);
3273                 return;
3274         }
3275         ifnet_forwardmsg(&brmsg->base.lmsg, mycpuid + 1);
3276 }
3277
3278 /*
3279  * bridge_rtupdate:
3280  *
3281  *      Add/Update a bridge routing entry.
3282  */
3283 static int
3284 bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst,
3285                 struct ifnet *dst_if, uint8_t flags)
3286 {
3287         struct bridge_rtnode *brt;
3288
3289         /*
3290          * A route for this destination might already exist.  If so,
3291          * update it, otherwise create a new one.
3292          */
3293         if ((brt = bridge_rtnode_lookup(sc, dst)) == NULL) {
3294                 struct netmsg_brsaddr *brmsg;
3295
3296                 if (sc->sc_brtcnt >= sc->sc_brtmax)
3297                         return ENOSPC;
3298
3299                 brmsg = kmalloc(sizeof(*brmsg), M_LWKTMSG, M_WAITOK | M_NULLOK);
3300                 if (brmsg == NULL)
3301                         return ENOMEM;
3302
3303                 netmsg_init(&brmsg->base, NULL, &netisr_afree_rport,
3304                             0, bridge_rtinstall_handler);
3305                 memcpy(brmsg->br_dst, dst, ETHER_ADDR_LEN);
3306                 brmsg->br_dst_if = dst_if;
3307                 brmsg->br_flags = flags;
3308                 brmsg->br_setflags = 0;
3309                 brmsg->br_softc = sc;
3310                 brmsg->br_rtinfo = NULL;
3311
3312                 ifnet_sendmsg(&brmsg->base.lmsg, 0);
3313                 return 0;
3314         }
3315         bridge_rtinfo_update(brt->brt_info, dst_if, 0, flags,
3316                              sc->sc_brttimeout);
3317         return 0;
3318 }
3319
3320 static int
3321 bridge_rtsaddr(struct bridge_softc *sc, const uint8_t *dst,
3322                struct ifnet *dst_if, uint8_t flags)
3323 {
3324         struct netmsg_brsaddr brmsg;
3325
3326         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3327
3328         netmsg_init(&brmsg.base, NULL, &curthread->td_msgport,
3329                     0, bridge_rtinstall_handler);
3330         memcpy(brmsg.br_dst, dst, ETHER_ADDR_LEN);
3331         brmsg.br_dst_if = dst_if;
3332         brmsg.br_flags = flags;
3333         brmsg.br_setflags = 1;
3334         brmsg.br_softc = sc;
3335         brmsg.br_rtinfo = NULL;
3336
3337         return ifnet_domsg(&brmsg.base.lmsg, 0);
3338 }
3339
3340 /*
3341  * bridge_rtlookup:
3342  *
3343  *      Lookup the destination interface for an address.
3344  */
3345 static struct ifnet *
3346 bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr)
3347 {
3348         struct bridge_rtnode *brt;
3349
3350         if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL)
3351                 return NULL;
3352         return brt->brt_info->bri_ifp;
3353 }
3354
3355 static void
3356 bridge_rtreap_handler(netmsg_t msg)
3357 {
3358         struct bridge_softc *sc = msg->lmsg.u.ms_resultp;
3359         struct bridge_rtnode *brt, *nbrt;
3360
3361         LIST_FOREACH_MUTABLE(brt, &sc->sc_rtlists[mycpuid], brt_list, nbrt) {
3362                 if (brt->brt_info->bri_dead)
3363                         bridge_rtnode_destroy(sc, brt);
3364         }
3365         ifnet_forwardmsg(&msg->lmsg, mycpuid + 1);
3366 }
3367
3368 static void
3369 bridge_rtreap(struct bridge_softc *sc)
3370 {
3371         struct netmsg_base msg;
3372
3373         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3374
3375         netmsg_init(&msg, NULL, &curthread->td_msgport,
3376                     0, bridge_rtreap_handler);
3377         msg.lmsg.u.ms_resultp = sc;
3378
3379         ifnet_domsg(&msg.lmsg, 0);
3380 }
3381
3382 static void
3383 bridge_rtreap_async(struct bridge_softc *sc)
3384 {
3385         struct netmsg_base *msg;
3386
3387         msg = kmalloc(sizeof(*msg), M_LWKTMSG, M_WAITOK);
3388
3389         netmsg_init(msg, NULL, &netisr_afree_rport,
3390                     0, bridge_rtreap_handler);
3391         msg->lmsg.u.ms_resultp = sc;
3392
3393         ifnet_sendmsg(&msg->lmsg, 0);
3394 }
3395
3396 /*
3397  * bridge_rttrim:
3398  *
3399  *      Trim the routine table so that we have a number
3400  *      of routing entries less than or equal to the
3401  *      maximum number.
3402  */
3403 static void
3404 bridge_rttrim(struct bridge_softc *sc)
3405 {
3406         struct bridge_rtnode *brt;
3407         int dead;
3408
3409         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3410
3411         /* Make sure we actually need to do this. */
3412         if (sc->sc_brtcnt <= sc->sc_brtmax)
3413                 return;
3414
3415         /*
3416          * Find out how many rtnodes are dead
3417          */
3418         dead = bridge_rtage_finddead(sc);
3419         KKASSERT(dead <= sc->sc_brtcnt);
3420
3421         if (sc->sc_brtcnt - dead <= sc->sc_brtmax) {
3422                 /* Enough dead rtnodes are found */
3423                 bridge_rtreap(sc);
3424                 return;
3425         }
3426
3427         /*
3428          * Kill some dynamic rtnodes to meet the brtmax
3429          */
3430         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3431                 struct bridge_rtinfo *bri = brt->brt_info;
3432
3433                 if (bri->bri_dead) {
3434                         /*
3435                          * We have counted this rtnode in
3436                          * bridge_rtage_finddead()
3437                          */
3438                         continue;
3439                 }
3440
3441                 if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
3442                         bri->bri_dead = 1;
3443                         ++dead;
3444                         KKASSERT(dead <= sc->sc_brtcnt);
3445
3446                         if (sc->sc_brtcnt - dead <= sc->sc_brtmax) {
3447                                 /* Enough rtnodes are collected */
3448                                 break;
3449                         }
3450                 }
3451         }
3452         if (dead)
3453                 bridge_rtreap(sc);
3454 }
3455
3456 /*
3457  * bridge_timer:
3458  *
3459  *      Aging timer for the bridge.
3460  */
3461 static void
3462 bridge_timer(void *arg)
3463 {
3464         struct bridge_softc *sc = arg;
3465         struct netmsg_base *msg;
3466
3467         KKASSERT(mycpuid == BRIDGE_CFGCPU);
3468
3469         crit_enter();
3470
3471         if (callout_pending(&sc->sc_brcallout) ||
3472             !callout_active(&sc->sc_brcallout)) {
3473                 crit_exit();
3474                 return;
3475         }
3476         callout_deactivate(&sc->sc_brcallout);
3477
3478         msg = &sc->sc_brtimemsg;
3479         KKASSERT(msg->lmsg.ms_flags & MSGF_DONE);
3480         lwkt_sendmsg_oncpu(BRIDGE_CFGPORT, &msg->lmsg);
3481
3482         crit_exit();
3483 }
3484
3485 static void
3486 bridge_timer_handler(netmsg_t msg)
3487 {
3488         struct bridge_softc *sc = msg->lmsg.u.ms_resultp;
3489
3490         KKASSERT(&curthread->td_msgport == BRIDGE_CFGPORT);
3491
3492         crit_enter();
3493         /* Reply ASAP */
3494         lwkt_replymsg(&msg->lmsg, 0);
3495         crit_exit();
3496
3497         bridge_rtage(sc);
3498         if (sc->sc_ifp->if_flags & IFF_RUNNING) {
3499                 callout_reset(&sc->sc_brcallout,
3500                     bridge_rtable_prune_period * hz, bridge_timer, sc);
3501         }
3502 }
3503
3504 static int
3505 bridge_rtage_finddead(struct bridge_softc *sc)
3506 {
3507         struct bridge_rtnode *brt;
3508         int dead = 0;
3509
3510         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3511                 struct bridge_rtinfo *bri = brt->brt_info;
3512
3513                 if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
3514                     time_uptime >= bri->bri_expire) {
3515                         bri->bri_dead = 1;
3516                         ++dead;
3517                         KKASSERT(dead <= sc->sc_brtcnt);
3518                 }
3519         }
3520         return dead;
3521 }
3522
3523 /*
3524  * bridge_rtage:
3525  *
3526  *      Perform an aging cycle.
3527  */
3528 static void
3529 bridge_rtage(struct bridge_softc *sc)
3530 {
3531         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3532
3533         if (bridge_rtage_finddead(sc))
3534                 bridge_rtreap(sc);
3535 }
3536
3537 /*
3538  * bridge_rtflush:
3539  *
3540  *      Remove all dynamic addresses from the bridge.
3541  */
3542 static void
3543 bridge_rtflush(struct bridge_softc *sc, int bf)
3544 {
3545         struct bridge_rtnode *brt;
3546         int reap;
3547
3548         reap = 0;
3549         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3550                 struct bridge_rtinfo *bri = brt->brt_info;
3551
3552                 if ((bf & IFBF_FLUSHALL) ||
3553                     (bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
3554                         bri->bri_dead = 1;
3555                         reap = 1;
3556                 }
3557         }
3558         if (reap) {
3559                 if (bf & IFBF_FLUSHSYNC)
3560                         bridge_rtreap(sc);
3561                 else
3562                         bridge_rtreap_async(sc);
3563         }
3564 }
3565
3566 /*
3567  * bridge_rtdaddr:
3568  *
3569  *      Remove an address from the table.
3570  */
3571 static int
3572 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr)
3573 {
3574         struct bridge_rtnode *brt;
3575
3576         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3577
3578         if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL)
3579                 return (ENOENT);
3580
3581         /* TODO: add a cheaper delete operation */
3582         brt->brt_info->bri_dead = 1;
3583         bridge_rtreap(sc);
3584         return (0);
3585 }
3586
3587 /*
3588  * bridge_rtdelete:
3589  *
3590  *      Delete routes to a speicifc member interface.
3591  */
3592 void
3593 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int bf)
3594 {
3595         struct bridge_rtnode *brt;
3596         int reap;
3597
3598         reap = 0;
3599         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3600                 struct bridge_rtinfo *bri = brt->brt_info;
3601
3602                 if (bri->bri_ifp == ifp &&
3603                     ((bf & IFBF_FLUSHALL) ||
3604                      (bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
3605                         bri->bri_dead = 1;
3606                         reap = 1;
3607                 }
3608         }
3609         if (reap) {
3610                 if (bf & IFBF_FLUSHSYNC)
3611                         bridge_rtreap(sc);
3612                 else
3613                         bridge_rtreap_async(sc);
3614         }
3615 }
3616
3617 /*
3618  * bridge_rtable_init:
3619  *
3620  *      Initialize the route table for this bridge.
3621  */
3622 static void
3623 bridge_rtable_init(struct bridge_softc *sc)
3624 {
3625         int cpu;
3626
3627         /*
3628          * Initialize per-cpu hash tables
3629          */
3630         sc->sc_rthashs = kmalloc(sizeof(*sc->sc_rthashs) * ncpus,
3631                                  M_DEVBUF, M_WAITOK);
3632         for (cpu = 0; cpu < ncpus; ++cpu) {
3633                 int i;
3634
3635                 sc->sc_rthashs[cpu] =
3636                 kmalloc(sizeof(struct bridge_rtnode_head) * BRIDGE_RTHASH_SIZE,
3637                         M_DEVBUF, M_WAITOK);
3638
3639                 for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
3640                         LIST_INIT(&sc->sc_rthashs[cpu][i]);
3641         }
3642         sc->sc_rthash_key = karc4random();
3643
3644         /*
3645          * Initialize per-cpu lists
3646          */
3647         sc->sc_rtlists = kmalloc(sizeof(struct bridge_rtnode_head) * ncpus,
3648                                  M_DEVBUF, M_WAITOK);
3649         for (cpu = 0; cpu < ncpus; ++cpu)
3650                 LIST_INIT(&sc->sc_rtlists[cpu]);
3651 }
3652
3653 /*
3654  * bridge_rtable_fini:
3655  *
3656  *      Deconstruct the route table for this bridge.
3657  */
3658 static void
3659 bridge_rtable_fini(struct bridge_softc *sc)
3660 {
3661         int cpu;
3662
3663         /*
3664          * Free per-cpu hash tables
3665          */
3666         for (cpu = 0; cpu < ncpus; ++cpu)
3667                 kfree(sc->sc_rthashs[cpu], M_DEVBUF);
3668         kfree(sc->sc_rthashs, M_DEVBUF);
3669
3670         /*
3671          * Free per-cpu lists
3672          */
3673         kfree(sc->sc_rtlists, M_DEVBUF);
3674 }
3675
3676 /*
3677  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
3678  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
3679  */
3680 #define mix(a, b, c)                                                    \
3681 do {                                                                    \
3682         a -= b; a -= c; a ^= (c >> 13);                                 \
3683         b -= c; b -= a; b ^= (a << 8);                                  \
3684         c -= a; c -= b; c ^= (b >> 13);                                 \
3685         a -= b; a -= c; a ^= (c >> 12);                                 \
3686         b -= c; b -= a; b ^= (a << 16);                                 \
3687         c -= a; c -= b; c ^= (b >> 5);                                  \
3688         a -= b; a -= c; a ^= (c >> 3);                                  \
3689         b -= c; b -= a; b ^= (a << 10);                                 \
3690         c -= a; c -= b; c ^= (b >> 15);                                 \
3691 } while (/*CONSTCOND*/0)
3692
3693 static __inline uint32_t
3694 bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
3695 {
3696         uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
3697
3698         b += addr[5] << 8;
3699         b += addr[4];
3700         a += addr[3] << 24;
3701         a += addr[2] << 16;
3702         a += addr[1] << 8;
3703         a += addr[0];
3704
3705         mix(a, b, c);
3706
3707         return (c & BRIDGE_RTHASH_MASK);
3708 }
3709
3710 #undef mix
3711
3712 static int
3713 bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b)
3714 {
3715         int i, d;
3716
3717         for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
3718                 d = ((int)a[i]) - ((int)b[i]);
3719         }
3720
3721         return (d);
3722 }
3723
3724 /*
3725  * bridge_rtnode_lookup:
3726  *
3727  *      Look up a bridge route node for the specified destination.
3728  */
3729 static struct bridge_rtnode *
3730 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr)
3731 {
3732         struct bridge_rtnode *brt;
3733         uint32_t hash;
3734         int dir;
3735
3736         hash = bridge_rthash(sc, addr);
3737         LIST_FOREACH(brt, &sc->sc_rthashs[mycpuid][hash], brt_hash) {
3738                 dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
3739                 if (dir == 0)
3740                         return (brt);
3741                 if (dir > 0)
3742                         return (NULL);
3743         }
3744
3745         return (NULL);
3746 }
3747
3748 /*
3749  * bridge_rtnode_insert:
3750  *
3751  *      Insert the specified bridge node into the route table.
3752  *      Caller has to make sure that rtnode does not exist.
3753  */
3754 static void
3755 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
3756 {
3757         struct bridge_rtnode *lbrt;
3758         uint32_t hash;
3759         int dir;
3760
3761         hash = bridge_rthash(sc, brt->brt_addr);
3762
3763         lbrt = LIST_FIRST(&sc->sc_rthashs[mycpuid][hash]);
3764         if (lbrt == NULL) {
3765                 LIST_INSERT_HEAD(&sc->sc_rthashs[mycpuid][hash],
3766                                   brt, brt_hash);
3767                 goto out;
3768         }
3769
3770         do {
3771                 dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
3772                 KASSERT(dir != 0, ("rtnode already exist"));
3773
3774                 if (dir > 0) {
3775                         LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
3776                         goto out;
3777                 }
3778                 if (LIST_NEXT(lbrt, brt_hash) == NULL) {
3779                         LIST_INSERT_AFTER(lbrt, brt, brt_hash);
3780                         goto out;
3781                 }
3782                 lbrt = LIST_NEXT(lbrt, brt_hash);
3783         } while (lbrt != NULL);
3784
3785         panic("no suitable position found for rtnode");
3786 out:
3787         LIST_INSERT_HEAD(&sc->sc_rtlists[mycpuid], brt, brt_list);
3788         if (mycpuid == 0) {
3789                 /*
3790                  * Update the brtcnt.
3791                  * We only need to do it once and we do it on CPU0.
3792                  */
3793                 sc->sc_brtcnt++;
3794         }
3795 }
3796
3797 /*
3798  * bridge_rtnode_destroy:
3799  *
3800  *      Destroy a bridge rtnode.
3801  */
3802 static void
3803 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
3804 {
3805         LIST_REMOVE(brt, brt_hash);
3806         LIST_REMOVE(brt, brt_list);
3807
3808         if (mycpuid + 1 == ncpus) {
3809                 /* Free rtinfo associated with rtnode on the last cpu */
3810                 kfree(brt->brt_info, M_DEVBUF);
3811         }
3812         kfree(brt, M_DEVBUF);
3813
3814         if (mycpuid == 0) {
3815                 /* Update brtcnt only on CPU0 */
3816                 sc->sc_brtcnt--;
3817         }
3818 }
3819
3820 static __inline int
3821 bridge_post_pfil(struct mbuf *m)
3822 {
3823         if (m->m_pkthdr.fw_flags & IPFORWARD_MBUF_TAGGED)
3824                 return EOPNOTSUPP;
3825
3826         /* Not yet */
3827         if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED)
3828                 return EOPNOTSUPP;
3829
3830         return 0;
3831 }
3832
3833 /*
3834  * Send bridge packets through pfil if they are one of the types pfil can deal
3835  * with, or if they are ARP or REVARP.  (pfil will pass ARP and REVARP without
3836  * question.) If *bifp or *ifp are NULL then packet filtering is skipped for
3837  * that interface.
3838  */
3839 static int
3840 bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
3841 {
3842         int snap, error, i, hlen;
3843         struct ether_header *eh1, eh2;
3844         struct ip *ip;
3845         struct llc llc1;
3846         u_int16_t ether_type;
3847
3848         snap = 0;
3849         error = -1;     /* Default error if not error == 0 */
3850
3851         if (pfil_bridge == 0 && pfil_member == 0)
3852                 return (0); /* filtering is disabled */
3853
3854         i = min((*mp)->m_pkthdr.len, max_protohdr);
3855         if ((*mp)->m_len < i) {
3856                 *mp = m_pullup(*mp, i);
3857                 if (*mp == NULL) {
3858                         kprintf("%s: m_pullup failed\n", __func__);
3859                         return (-1);
3860                 }
3861         }
3862
3863         eh1 = mtod(*mp, struct ether_header *);
3864         ether_type = ntohs(eh1->ether_type);
3865
3866         /*
3867          * Check for SNAP/LLC.
3868          */
3869         if (ether_type < ETHERMTU) {
3870                 struct llc *llc2 = (struct llc *)(eh1 + 1);
3871
3872                 if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
3873                     llc2->llc_dsap == LLC_SNAP_LSAP &&
3874                     llc2->llc_ssap == LLC_SNAP_LSAP &&
3875                     llc2->llc_control == LLC_UI) {
3876                         ether_type = htons(llc2->llc_un.type_snap.ether_type);
3877                         snap = 1;
3878                 }
3879         }
3880
3881         /*
3882          * If we're trying to filter bridge traffic, don't look at anything
3883          * other than IP and ARP traffic.  If the filter doesn't understand
3884          * IPv6, don't allow IPv6 through the bridge either.  This is lame
3885          * since if we really wanted, say, an AppleTalk filter, we are hosed,
3886          * but of course we don't have an AppleTalk filter to begin with.
3887          * (Note that since pfil doesn't understand ARP it will pass *ALL*
3888          * ARP traffic.)
3889          */
3890         switch (ether_type) {
3891         case ETHERTYPE_ARP:
3892         case ETHERTYPE_REVARP:
3893                 return (0); /* Automatically pass */
3894
3895         case ETHERTYPE_IP:
3896 #ifdef INET6
3897         case ETHERTYPE_IPV6:
3898 #endif /* INET6 */
3899                 break;
3900
3901         default:
3902                 /*
3903                  * Check to see if the user wants to pass non-ip
3904                  * packets, these will not be checked by pfil(9)
3905                  * and passed unconditionally so the default is to drop.
3906                  */
3907                 if (pfil_onlyip)
3908                         goto bad;
3909         }
3910
3911         /* Strip off the Ethernet header and keep a copy. */
3912         m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t) &eh2);
3913         m_adj(*mp, ETHER_HDR_LEN);
3914
3915         /* Strip off snap header, if present */
3916         if (snap) {
3917                 m_copydata(*mp, 0, sizeof(struct llc), (caddr_t) &llc1);
3918                 m_adj(*mp, sizeof(struct llc));
3919         }
3920
3921         /*
3922          * Check the IP header for alignment and errors
3923          */
3924         if (dir == PFIL_IN) {
3925                 switch (ether_type) {
3926                 case ETHERTYPE_IP:
3927                         error = bridge_ip_checkbasic(mp);
3928                         break;
3929 #ifdef INET6
3930                 case ETHERTYPE_IPV6:
3931                         error = bridge_ip6_checkbasic(mp);
3932                         break;
3933 #endif /* INET6 */
3934                 default:
3935                         error = 0;
3936                 }
3937                 if (error)
3938                         goto bad;
3939         }
3940
3941         error = 0;
3942
3943         /*
3944          * Run the packet through pfil
3945          */
3946         switch (ether_type) {
3947         case ETHERTYPE_IP:
3948                 /*
3949                  * before calling the firewall, swap fields the same as
3950                  * IP does. here we assume the header is contiguous
3951                  */
3952                 ip = mtod(*mp, struct ip *);
3953
3954                 ip->ip_len = ntohs(ip->ip_len);
3955                 ip->ip_off = ntohs(ip->ip_off);
3956
3957                 /*
3958                  * Run pfil on the member interface and the bridge, both can
3959                  * be skipped by clearing pfil_member or pfil_bridge.
3960                  *
3961                  * Keep the order:
3962                  *   in_if -> bridge_if -> out_if
3963                  */
3964                 if (pfil_bridge && dir == PFIL_OUT && bifp != NULL) {
3965                         error = pfil_run_hooks(&inet_pfil_hook, mp, bifp, dir);
3966                         if (*mp == NULL || error != 0) /* filter may consume */
3967                                 break;
3968                         error = bridge_post_pfil(*mp);
3969                         if (error)
3970                                 break;
3971                 }
3972
3973                 if (pfil_member && ifp != NULL) {
3974                         error = pfil_run_hooks(&inet_pfil_hook, mp, ifp, dir);
3975                         if (*mp == NULL || error != 0) /* filter may consume */
3976                                 break;
3977                         error = bridge_post_pfil(*mp);
3978                         if (error)
3979                                 break;
3980                 }
3981
3982                 if (pfil_bridge && dir == PFIL_IN && bifp != NULL) {
3983                         error = pfil_run_hooks(&inet_pfil_hook, mp, bifp, dir);
3984                         if (*mp == NULL || error != 0) /* filter may consume */
3985                                 break;
3986                         error = bridge_post_pfil(*mp);
3987                         if (error)
3988                                 break;
3989                 }
3990
3991                 /* check if we need to fragment the packet */
3992                 if (pfil_member && ifp != NULL && dir == PFIL_OUT) {
3993                         i = (*mp)->m_pkthdr.len;
3994                         if (i > ifp->if_mtu) {
3995                                 error = bridge_fragment(ifp, *mp, &eh2, snap,
3996                                             &llc1);
3997                                 return (error);
3998                         }
3999                 }
4000
4001                 /* Recalculate the ip checksum and restore byte ordering */
4002                 ip = mtod(*mp, struct ip *);
4003                 hlen = ip->ip_hl << 2;
4004                 if (hlen < sizeof(struct ip))
4005                         goto bad;
4006                 if (hlen > (*mp)->m_len) {
4007                         if ((*mp = m_pullup(*mp, hlen)) == NULL)
4008                                 goto bad;
4009                         ip = mtod(*mp, struct ip *);
4010                         if (ip == NULL)
4011                                 goto bad;
4012                 }
4013                 ip->ip_len = htons(ip->ip_len);
4014                 ip->ip_off = htons(ip->ip_off);
4015                 ip->ip_sum = 0;
4016                 if (hlen == sizeof(struct ip))
4017                         ip->ip_sum = in_cksum_hdr(ip);
4018                 else
4019                         ip->ip_sum = in_cksum(*mp, hlen);
4020
4021                 break;
4022 #ifdef INET6
4023         case ETHERTYPE_IPV6:
4024                 if (pfil_bridge && dir == PFIL_OUT && bifp != NULL)
4025                         error = pfil_run_hooks(&inet6_pfil_hook, mp, bifp,
4026                                         dir);
4027
4028                 if (*mp == NULL || error != 0) /* filter may consume */
4029                         break;
4030
4031                 if (pfil_member && ifp != NULL)
4032                         error = pfil_run_hooks(&inet6_pfil_hook, mp, ifp,
4033                                         dir);
4034
4035                 if (*mp == NULL || error != 0) /* filter may consume */
4036                         break;
4037
4038                 if (pfil_bridge && dir == PFIL_IN && bifp != NULL)
4039                         error = pfil_run_hooks(&inet6_pfil_hook, mp, bifp,
4040                                         dir);
4041                 break;
4042 #endif
4043         default:
4044                 error = 0;
4045                 break;
4046         }
4047
4048         if (*mp == NULL)
4049                 return (error);
4050         if (error != 0)
4051                 goto bad;
4052
4053         error = -1;
4054
4055         /*
4056          * Finally, put everything back the way it was and return
4057          */
4058         if (snap) {
4059                 M_PREPEND(*mp, sizeof(struct llc), M_NOWAIT);
4060                 if (*mp == NULL)
4061                         return (error);
4062                 bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
4063         }
4064
4065         M_PREPEND(*mp, ETHER_HDR_LEN, M_NOWAIT);
4066         if (*mp == NULL)
4067                 return (error);
4068         bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
4069
4070         return (0);
4071
4072 bad:
4073         m_freem(*mp);
4074         *mp = NULL;
4075         return (error);
4076 }
4077
4078 /*
4079  * Perform basic checks on header size since
4080  * pfil assumes ip_input has already processed
4081  * it for it.  Cut-and-pasted from ip_input.c.
4082  * Given how simple the IPv6 version is,
4083  * does the IPv4 version really need to be
4084  * this complicated?
4085  *
4086  * XXX Should we update ipstat here, or not?
4087  * XXX Right now we update ipstat but not
4088  * XXX csum_counter.
4089  */
4090 static int
4091 bridge_ip_checkbasic(struct mbuf **mp)
4092 {
4093         struct mbuf *m = *mp;
4094         struct ip *ip;
4095         int len, hlen;
4096         u_short sum;
4097
4098         if (*mp == NULL)
4099                 return (-1);
4100 #if 0 /* notyet */
4101         if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
4102                 if ((m = m_copyup(m, sizeof(struct ip),
4103                         (max_linkhdr + 3) & ~3)) == NULL) {
4104                         /* XXXJRT new stat, please */
4105                         ipstat.ips_toosmall++;
4106                         goto bad;
4107                 }
4108         } else
4109 #endif
4110 #ifndef __predict_false
4111 #define __predict_false(x) x
4112 #endif
4113          if (__predict_false(m->m_len < sizeof (struct ip))) {
4114                 if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
4115                         ipstat.ips_toosmall++;
4116                         goto bad;
4117                 }
4118         }
4119         ip = mtod(m, struct ip *);
4120         if (ip == NULL) goto bad;
4121
4122         if (ip->ip_v != IPVERSION) {
4123                 ipstat.ips_badvers++;
4124                 goto bad;
4125         }
4126         hlen = ip->ip_hl << 2;
4127         if (hlen < sizeof(struct ip)) { /* minimum header length */
4128                 ipstat.ips_badhlen++;
4129                 goto bad;
4130         }
4131         if (hlen > m->m_len) {
4132                 if ((m = m_pullup(m, hlen)) == NULL) {
4133                         ipstat.ips_badhlen++;
4134                         goto bad;
4135                 }
4136                 ip = mtod(m, struct ip *);
4137                 if (ip == NULL) goto bad;
4138         }
4139
4140         if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
4141                 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
4142         } else {
4143                 if (hlen == sizeof(struct ip)) {
4144                         sum = in_cksum_hdr(ip);
4145                 } else {
4146                         sum = in_cksum(m, hlen);
4147                 }
4148         }
4149         if (sum) {
4150                 ipstat.ips_badsum++;
4151                 goto bad;
4152         }
4153
4154         /* Retrieve the packet length. */
4155         len = ntohs(ip->ip_len);
4156
4157         /*
4158          * Check for additional length bogosity
4159          */
4160         if (len < hlen) {
4161                 ipstat.ips_badlen++;
4162                 goto bad;
4163         }
4164
4165         /*
4166          * Check that the amount of data in the buffers
4167          * is as at least much as the IP header would have us expect.
4168          * Drop packet if shorter than we expect.
4169          */
4170         if (m->m_pkthdr.len < len) {
4171                 ipstat.ips_tooshort++;
4172                 goto bad;
4173         }
4174
4175         /* Checks out, proceed */
4176         *mp = m;
4177         return (0);
4178
4179 bad:
4180         *mp = m;
4181         return (-1);
4182 }
4183
4184 #ifdef INET6
4185 /*
4186  * Same as above, but for IPv6.
4187  * Cut-and-pasted from ip6_input.c.
4188  * XXX Should we update ip6stat, or not?
4189  */
4190 static int
4191 bridge_ip6_checkbasic(struct mbuf **mp)
4192 {
4193         struct mbuf *m = *mp;
4194         struct ip6_hdr *ip6;
4195
4196         /*
4197          * If the IPv6 header is not aligned, slurp it up into a new
4198          * mbuf with space for link headers, in the event we forward
4199          * it.  Otherwise, if it is aligned, make sure the entire base
4200          * IPv6 header is in the first mbuf of the chain.
4201          */
4202 #if 0 /* notyet */
4203         if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
4204                 struct ifnet *inifp = m->m_pkthdr.rcvif;
4205                 if ((m = m_copyup(m, sizeof(struct ip6_hdr),
4206                             (max_linkhdr + 3) & ~3)) == NULL) {
4207                         /* XXXJRT new stat, please */
4208                         ip6stat.ip6s_toosmall++;
4209                         in6_ifstat_inc(inifp, ifs6_in_hdrerr);
4210                         goto bad;
4211                 }
4212         } else
4213 #endif
4214         if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
4215                 struct ifnet *inifp = m->m_pkthdr.rcvif;
4216                 if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
4217                         ip6stat.ip6s_toosmall++;
4218                         in6_ifstat_inc(inifp, ifs6_in_hdrerr);
4219                         goto bad;
4220                 }
4221         }
4222
4223         ip6 = mtod(m, struct ip6_hdr *);
4224
4225         if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
4226                 ip6stat.ip6s_badvers++;
4227                 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
4228                 goto bad;
4229         }
4230
4231         /* Checks out, proceed */
4232         *mp = m;
4233         return (0);
4234
4235 bad:
4236         *mp = m;
4237         return (-1);
4238 }
4239 #endif /* INET6 */
4240
4241 /*
4242  * bridge_fragment:
4243  *
4244  *      Return a fragmented mbuf chain.
4245  */
4246 static int
4247 bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
4248     int snap, struct llc *llc)
4249 {
4250         struct mbuf *m0;
4251         struct ip *ip;
4252         int error = -1;
4253
4254         if (m->m_len < sizeof(struct ip) &&
4255             (m = m_pullup(m, sizeof(struct ip))) == NULL)
4256                 goto out;
4257         ip = mtod(m, struct ip *);
4258
4259         error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist,
4260                     CSUM_DELAY_IP);
4261         if (error)
4262                 goto out;
4263
4264         /* walk the chain and re-add the Ethernet header */
4265         for (m0 = m; m0; m0 = m0->m_nextpkt) {
4266                 if (error == 0) {
4267                         if (snap) {
4268                                 M_PREPEND(m0, sizeof(struct llc), M_NOWAIT);
4269                                 if (m0 == NULL) {
4270                                         error = ENOBUFS;
4271                                         continue;
4272                                 }
4273                                 bcopy(llc, mtod(m0, caddr_t),
4274                                     sizeof(struct llc));
4275                         }
4276                         M_PREPEND(m0, ETHER_HDR_LEN, M_NOWAIT);
4277                         if (m0 == NULL) {
4278                                 error = ENOBUFS;
4279                                 continue;
4280                         }
4281                         bcopy(eh, mtod(m0, caddr_t), ETHER_HDR_LEN);
4282                 } else 
4283                         m_freem(m);
4284         }
4285
4286         if (error == 0)
4287                 ipstat.ips_fragmented++;
4288
4289         return (error);
4290
4291 out:
4292         if (m != NULL)
4293                 m_freem(m);
4294         return (error);
4295 }
4296
4297 static void
4298 bridge_enqueue_handler(netmsg_t msg)
4299 {
4300         struct netmsg_packet *nmp;
4301         struct ifnet *dst_ifp;
4302         struct mbuf *m;
4303
4304         nmp = &msg->packet;
4305         m = nmp->nm_packet;
4306         dst_ifp = nmp->base.lmsg.u.ms_resultp;
4307         mbuftrackid(m, 71);
4308
4309         bridge_handoff(dst_ifp->if_bridge, dst_ifp, m, 1);
4310 }
4311
4312 static void
4313 bridge_handoff(struct bridge_softc *sc, struct ifnet *dst_ifp,
4314                struct mbuf *m, int from_us)
4315 {
4316         struct mbuf *m0;
4317         struct ifnet *bifp;
4318
4319         bifp = sc->sc_ifp;
4320         mbuftrackid(m, 72);
4321
4322         /* We may be sending a fragment so traverse the mbuf */
4323         for (; m; m = m0) {
4324                 struct altq_pktattr pktattr;
4325
4326                 m0 = m->m_nextpkt;
4327                 m->m_nextpkt = NULL;
4328
4329                 /*
4330                  * If being sent from our host override ether_shost
4331                  * with the bridge MAC.  This is mandatory for ARP
4332                  * so things don't get confused.  In particular we
4333                  * don't want ARPs to get associated with link interfaces
4334                  * under the bridge which might or might not stay valid.
4335                  *
4336                  * Also override ether_shost when relaying a packet out
4337                  * the same interface it came in on, due to multi-homed
4338                  * addresses & default routes, otherwise switches will
4339                  * get very confused.
4340                  *
4341                  * Otherwise if we are in transparent mode.
4342                  */
4343                 if (from_us || m->m_pkthdr.rcvif == dst_ifp) {
4344                         m_copyback(m,
4345                                    offsetof(struct ether_header, ether_shost),
4346                                    ETHER_ADDR_LEN, IF_LLADDR(sc->sc_ifp));
4347                 } else if ((bifp->if_flags & IFF_LINK0) &&
4348                            (m->m_pkthdr.fw_flags & BRIDGE_MBUF_TAGGED)) {
4349                         m_copyback(m,
4350                                    offsetof(struct ether_header, ether_shost),
4351                                    ETHER_ADDR_LEN,
4352                                    m->m_pkthdr.ether_br_shost);
4353                 } /* else retain shost */
4354
4355                 if (ifq_is_enabled(&dst_ifp->if_snd))
4356                         altq_etherclassify(&dst_ifp->if_snd, m, &pktattr);
4357
4358                 ifq_dispatch(dst_ifp, m, &pktattr);
4359         }
4360 }
4361
4362 static void
4363 bridge_control_dispatch(netmsg_t msg)
4364 {
4365         struct netmsg_brctl *bc_msg = (struct netmsg_brctl *)msg;
4366         struct ifnet *bifp = bc_msg->bc_sc->sc_ifp;
4367         int error;
4368
4369         ifnet_serialize_all(bifp);
4370         error = bc_msg->bc_func(bc_msg->bc_sc, bc_msg->bc_arg);
4371         ifnet_deserialize_all(bifp);
4372
4373         lwkt_replymsg(&bc_msg->base.lmsg, error);
4374 }
4375
4376 static int
4377 bridge_control(struct bridge_softc *sc, u_long cmd,
4378                bridge_ctl_t bc_func, void *bc_arg)
4379 {
4380         struct ifnet *bifp = sc->sc_ifp;
4381         struct netmsg_brctl bc_msg;
4382         int error;
4383
4384         ASSERT_IFNET_SERIALIZED_ALL(bifp);
4385
4386         bzero(&bc_msg, sizeof(bc_msg));
4387
4388         netmsg_init(&bc_msg.base, NULL, &curthread->td_msgport,
4389                     0, bridge_control_dispatch);
4390         bc_msg.bc_func = bc_func;
4391         bc_msg.bc_sc = sc;
4392         bc_msg.bc_arg = bc_arg;
4393
4394         ifnet_deserialize_all(bifp);
4395         error = lwkt_domsg(BRIDGE_CFGPORT, &bc_msg.base.lmsg, 0);
4396         ifnet_serialize_all(bifp);
4397         return error;
4398 }
4399
4400 static void
4401 bridge_add_bif_handler(netmsg_t msg)
4402 {
4403         struct netmsg_braddbif *amsg = (struct netmsg_braddbif *)msg;
4404         struct bridge_softc *sc;
4405         struct bridge_iflist *bif;
4406
4407         sc = amsg->br_softc;
4408
4409         bif = kmalloc(sizeof(*bif), M_DEVBUF, M_WAITOK | M_ZERO);
4410         bif->bif_ifp = amsg->br_bif_ifp;
4411         bif->bif_onlist = 1;
4412         bif->bif_info = amsg->br_bif_info;
4413
4414         /*
4415          * runs through bif_info
4416          */
4417         bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
4418
4419         TAILQ_INSERT_HEAD(&sc->sc_iflists[mycpuid], bif, bif_next);
4420
4421         ifnet_forwardmsg(&amsg->base.lmsg, mycpuid + 1);
4422 }
4423
4424 static void
4425 bridge_add_bif(struct bridge_softc *sc, struct bridge_ifinfo *bif_info,
4426                struct ifnet *ifp)
4427 {
4428         struct netmsg_braddbif amsg;
4429
4430         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
4431
4432         netmsg_init(&amsg.base, NULL, &curthread->td_msgport,
4433                     0, bridge_add_bif_handler);
4434         amsg.br_softc = sc;
4435         amsg.br_bif_info = bif_info;
4436         amsg.br_bif_ifp = ifp;
4437
4438         ifnet_domsg(&amsg.base.lmsg, 0);
4439 }
4440
4441 static void
4442 bridge_del_bif_handler(netmsg_t msg)
4443 {
4444         struct netmsg_brdelbif *dmsg = (struct netmsg_brdelbif *)msg;
4445         struct bridge_softc *sc;
4446         struct bridge_iflist *bif;
4447
4448         sc = dmsg->br_softc;
4449
4450         /*
4451          * Locate the bif associated with the br_bif_info
4452          * on the current CPU
4453          */
4454         bif = bridge_lookup_member_ifinfo(sc, dmsg->br_bif_info);
4455         KKASSERT(bif != NULL && bif->bif_onlist);
4456
4457         /* Remove the bif from the current CPU's iflist */
4458         bif->bif_onlist = 0;
4459         TAILQ_REMOVE(dmsg->br_bif_list, bif, bif_next);
4460
4461         /* Save the removed bif for later freeing */
4462         TAILQ_INSERT_HEAD(dmsg->br_bif_list, bif, bif_next);
4463
4464         ifnet_forwardmsg(&dmsg->base.lmsg, mycpuid + 1);
4465 }
4466
4467 static void
4468 bridge_del_bif(struct bridge_softc *sc, struct bridge_ifinfo *bif_info,
4469                struct bridge_iflist_head *saved_bifs)
4470 {
4471         struct netmsg_brdelbif dmsg;
4472
4473         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
4474
4475         netmsg_init(&dmsg.base, NULL, &curthread->td_msgport,
4476                     0, bridge_del_bif_handler);
4477         dmsg.br_softc = sc;
4478         dmsg.br_bif_info = bif_info;
4479         dmsg.br_bif_list = saved_bifs;
4480
4481         ifnet_domsg(&dmsg.base.lmsg, 0);
4482 }