7af7913ae71072c2afc86bdbca2194cd739dc287
[dragonfly.git] / sys / net / bridge / if_bridge.c
1 /*
2  * Copyright 2001 Wasabi Systems, Inc.
3  * All rights reserved.
4  *
5  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *      This product includes software developed for the NetBSD Project by
18  *      Wasabi Systems, Inc.
19  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
20  *    or promote products derived from this software without specific prior
21  *    written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
27  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /*
37  * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
38  * All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. All advertising materials mentioning features or use of this software
49  *    must display the following acknowledgement:
50  *      This product includes software developed by Jason L. Wright
51  * 4. The name of the author may not be used to endorse or promote products
52  *    derived from this software without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
55  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
56  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
57  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
58  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
59  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
60  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
62  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
63  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
64  * POSSIBILITY OF SUCH DAMAGE.
65  *
66  * $OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp $
67  * $NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $
68  * $FreeBSD: src/sys/net/if_bridge.c,v 1.26 2005/10/13 23:05:55 thompsa Exp $
69  */
70
71 /*
72  * Network interface bridge support.
73  *
74  * TODO:
75  *
76  *      - Currently only supports Ethernet-like interfaces (Ethernet,
77  *        802.11, VLANs on Ethernet, etc.)  Figure out a nice way
78  *        to bridge other types of interfaces (FDDI-FDDI, and maybe
79  *        consider heterogenous bridges).
80  *
81  *
82  * Bridge's route information is duplicated to each CPUs:
83  *
84  *      CPU0          CPU1          CPU2          CPU3
85  * +-----------+ +-----------+ +-----------+ +-----------+
86  * |  rtnode   | |  rtnode   | |  rtnode   | |  rtnode   |
87  * |           | |           | |           | |           |
88  * | dst eaddr | | dst eaddr | | dst eaddr | | dst eaddr |
89  * +-----------+ +-----------+ +-----------+ +-----------+
90  *       |         |                     |         |
91  *       |         |                     |         |
92  *       |         |     +----------+    |         |
93  *       |         |     |  rtinfo  |    |         |
94  *       |         +---->|          |<---+         |
95  *       |               |  flags   |              |
96  *       +-------------->|  timeout |<-------------+
97  *                       |  dst_ifp |
98  *                       +----------+
99  *
100  * We choose to put timeout and dst_ifp into shared part, so updating
101  * them will be cheaper than using message forwarding.  Also there is
102  * not need to use spinlock to protect the updating: timeout and dst_ifp
103  * is not related and specific field's updating order has no importance.
104  * The cache pollution by the share part should not be heavy: in a stable
105  * setup, dst_ifp probably will be not changed in rtnode's life time,
106  * while timeout is refreshed once per second; most of the time, timeout
107  * and dst_ifp are read-only accessed.
108  *
109  *
110  * Bridge route information installation on bridge_input path:
111  *
112  *      CPU0           CPU1         CPU2          CPU3
113  *
114  *                               tcp_thread2
115  *                                    |
116  *                                alloc nmsg
117  *                    snd nmsg        |
118  *                    w/o rtinfo      |
119  *      ifnet0<-----------------------+
120  *        |                           :
121  *    lookup dst                      :
122  *   rtnode exists?(Y)free nmsg       :
123  *        |(N)                        :
124  *        |
125  *  alloc rtinfo
126  *  alloc rtnode
127  * install rtnode
128  *        |
129  *        +---------->ifnet1
130  *        : fwd nmsg    |
131  *        : w/ rtinfo   |
132  *        :             |
133  *        :             |
134  *                 alloc rtnode
135  *               (w/ nmsg's rtinfo)
136  *                install rtnode
137  *                      |
138  *                      +---------->ifnet2
139  *                      : fwd nmsg    |
140  *                      : w/ rtinfo   |
141  *                      :             |
142  *                      :         same as ifnet1
143  *                                    |
144  *                                    +---------->ifnet3
145  *                                    : fwd nmsg    |
146  *                                    : w/ rtinfo   |
147  *                                    :             |
148  *                                    :         same as ifnet1
149  *                                               free nmsg
150  *                                                  :
151  *                                                  :
152  *
153  * The netmsgs forwarded between protocol threads and ifnet threads are
154  * allocated with (M_WAITOK|M_NULLOK), so it will not fail under most
155  * cases (route information is too precious to be not installed :).
156  * Since multiple threads may try to install route information for the
157  * same dst eaddr, we look up route information in ifnet0.  However, this
158  * looking up only need to be performed on ifnet0, which is the start
159  * point of the route information installation process.
160  *
161  *
162  * Bridge route information deleting/flushing:
163  *
164  *  CPU0            CPU1             CPU2             CPU3
165  *
166  * netisr0
167  *   |
168  * find suitable rtnodes,
169  * mark their rtinfo dead
170  *   |
171  *   | domsg <------------------------------------------+
172  *   |                                                  | replymsg
173  *   |                                                  |
174  *   V     fwdmsg           fwdmsg           fwdmsg     |
175  * ifnet0 --------> ifnet1 --------> ifnet2 --------> ifnet3
176  * delete rtnodes   delete rtnodes   delete rtnodes   delete rtnodes
177  * w/ dead rtinfo   w/ dead rtinfo   w/ dead rtinfo   w/ dead rtinfo
178  *                                                    free dead rtinfos
179  *
180  * All deleting/flushing operations are serialized by netisr0, so each
181  * operation only reaps the route information marked dead by itself.
182  *
183  *
184  * Bridge route information adding/deleting/flushing:
185  * Since all operation is serialized by the fixed message flow between
186  * ifnet threads, it is not possible to create corrupted per-cpu route
187  * information.
188  *
189  *
190  *
191  * Percpu member interface list iteration with blocking operation:
192  * Since one bridge could only delete one member interface at a time and
193  * the deleted member interface is not freed after netmsg_service_sync(),
194  * following way is used to make sure that even if the certain member
195  * interface is ripped from the percpu list during the blocking operation,
196  * the iteration still could keep going:
197  *
198  * TAILQ_FOREACH_MUTABLE(bif, sc->sc_iflists[mycpuid], bif_next, nbif) {
199  *     blocking operation;
200  *     blocking operation;
201  *     ...
202  *     ...
203  *     if (nbif != NULL && !nbif->bif_onlist) {
204  *         KKASSERT(bif->bif_onlist);
205  *         nbif = TAILQ_NEXT(bif, bif_next);
206  *     }
207  * }
208  *
209  * As mentioned above only one member interface could be unlinked from the
210  * percpu member interface list, so either bif or nbif may be not on the list,
211  * but _not_ both.  To keep the list iteration, we don't care about bif, but
212  * only nbif.  Since removed member interface will only be freed after we
213  * finish our work, it is safe to access any field in an unlinked bif (here
214  * bif_onlist).  If nbif is no longer on the list, then bif must be on the
215  * list, so we change nbif to the next element of bif and keep going.
216  */
217
218 #include "opt_inet.h"
219 #include "opt_inet6.h"
220
221 #include <sys/param.h>
222 #include <sys/mbuf.h>
223 #include <sys/malloc.h>
224 #include <sys/protosw.h>
225 #include <sys/systm.h>
226 #include <sys/time.h>
227 #include <sys/socket.h> /* for net/if.h */
228 #include <sys/sockio.h>
229 #include <sys/ctype.h>  /* string functions */
230 #include <sys/kernel.h>
231 #include <sys/random.h>
232 #include <sys/sysctl.h>
233 #include <sys/module.h>
234 #include <sys/proc.h>
235 #include <sys/priv.h>
236 #include <sys/lock.h>
237 #include <sys/thread.h>
238 #include <sys/thread2.h>
239 #include <sys/mpipe.h>
240
241 #include <net/bpf.h>
242 #include <net/if.h>
243 #include <net/if_dl.h>
244 #include <net/if_types.h>
245 #include <net/if_var.h>
246 #include <net/pfil.h>
247 #include <net/ifq_var.h>
248 #include <net/if_clone.h>
249
250 #include <netinet/in.h> /* for struct arpcom */
251 #include <netinet/in_systm.h>
252 #include <netinet/in_var.h>
253 #include <netinet/ip.h>
254 #include <netinet/ip_var.h>
255 #ifdef INET6
256 #include <netinet/ip6.h>
257 #include <netinet6/ip6_var.h>
258 #endif
259 #include <netinet/if_ether.h> /* for struct arpcom */
260 #include <net/bridge/if_bridgevar.h>
261 #include <net/if_llc.h>
262 #include <net/netmsg2.h>
263 #include <net/netisr2.h>
264
265 #include <net/route.h>
266 #include <sys/in_cksum.h>
267
268 /*
269  * Size of the route hash table.  Must be a power of two.
270  */
271 #ifndef BRIDGE_RTHASH_SIZE
272 #define BRIDGE_RTHASH_SIZE              1024
273 #endif
274
275 #define BRIDGE_RTHASH_MASK              (BRIDGE_RTHASH_SIZE - 1)
276
277 /*
278  * Maximum number of addresses to cache.
279  */
280 #ifndef BRIDGE_RTABLE_MAX
281 #define BRIDGE_RTABLE_MAX               100
282 #endif
283
284 /*
285  * Spanning tree defaults.
286  */
287 #define BSTP_DEFAULT_MAX_AGE            (20 * 256)
288 #define BSTP_DEFAULT_HELLO_TIME         (2 * 256)
289 #define BSTP_DEFAULT_FORWARD_DELAY      (15 * 256)
290 #define BSTP_DEFAULT_HOLD_TIME          (1 * 256)
291 #define BSTP_DEFAULT_BRIDGE_PRIORITY    0x8000
292 #define BSTP_DEFAULT_PORT_PRIORITY      0x80
293 #define BSTP_DEFAULT_PATH_COST          55
294
295 /*
296  * Timeout (in seconds) for entries learned dynamically.
297  */
298 #ifndef BRIDGE_RTABLE_TIMEOUT
299 #define BRIDGE_RTABLE_TIMEOUT           (20 * 60)       /* same as ARP */
300 #endif
301
302 /*
303  * Number of seconds between walks of the route list.
304  */
305 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
306 #define BRIDGE_RTABLE_PRUNE_PERIOD      (5 * 60)
307 #endif
308
309 /*
310  * List of capabilities to mask on the member interface.
311  */
312 #define BRIDGE_IFCAPS_MASK              (IFCAP_TXCSUM | IFCAP_TSO)
313
314 typedef int     (*bridge_ctl_t)(struct bridge_softc *, void *);
315
316 struct netmsg_brctl {
317         struct netmsg_base      base;
318         bridge_ctl_t            bc_func;
319         struct bridge_softc     *bc_sc;
320         void                    *bc_arg;
321 };
322
323 struct netmsg_brsaddr {
324         struct netmsg_base      base;
325         struct bridge_softc     *br_softc;
326         struct ifnet            *br_dst_if;
327         struct bridge_rtinfo    *br_rtinfo;
328         int                     br_setflags;
329         uint8_t                 br_dst[ETHER_ADDR_LEN];
330         uint8_t                 br_flags;
331 };
332
333 struct netmsg_braddbif {
334         struct netmsg_base      base;
335         struct bridge_softc     *br_softc;
336         struct bridge_ifinfo    *br_bif_info;
337         struct ifnet            *br_bif_ifp;
338 };
339
340 struct netmsg_brdelbif {
341         struct netmsg_base      base;
342         struct bridge_softc     *br_softc;
343         struct bridge_ifinfo    *br_bif_info;
344         struct bridge_iflist_head *br_bif_list;
345 };
346
347 struct netmsg_brsflags {
348         struct netmsg_base      base;
349         struct bridge_softc     *br_softc;
350         struct bridge_ifinfo    *br_bif_info;
351         uint32_t                br_bif_flags;
352 };
353
354 eventhandler_tag        bridge_detach_cookie = NULL;
355
356 extern  struct mbuf *(*bridge_input_p)(struct ifnet *, struct mbuf *);
357 extern  int (*bridge_output_p)(struct ifnet *, struct mbuf *);
358 extern  void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
359 extern  struct ifnet *(*bridge_interface_p)(void *if_bridge);
360
361 static int      bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
362
363 static int      bridge_clone_create(struct if_clone *, int, caddr_t);
364 static int      bridge_clone_destroy(struct ifnet *);
365
366 static int      bridge_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
367 static void     bridge_mutecaps(struct bridge_ifinfo *, struct ifnet *, int);
368 static void     bridge_ifdetach(void *, struct ifnet *);
369 static void     bridge_init(void *);
370 static int      bridge_from_us(struct bridge_softc *, struct ether_header *);
371 static void     bridge_stop(struct ifnet *);
372 static void     bridge_start(struct ifnet *, struct ifaltq_subque *);
373 static struct mbuf *bridge_input(struct ifnet *, struct mbuf *);
374 static int      bridge_output(struct ifnet *, struct mbuf *);
375 static struct ifnet *bridge_interface(void *if_bridge);
376
377 static void     bridge_forward(struct bridge_softc *, struct mbuf *m);
378
379 static void     bridge_timer_handler(netmsg_t);
380 static void     bridge_timer(void *);
381
382 static void     bridge_start_bcast(struct bridge_softc *, struct mbuf *);
383 static void     bridge_broadcast(struct bridge_softc *, struct ifnet *,
384                     struct mbuf *);
385 static void     bridge_span(struct bridge_softc *, struct mbuf *);
386
387 static int      bridge_rtupdate(struct bridge_softc *, const uint8_t *,
388                     struct ifnet *, uint8_t);
389 static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *);
390 static void     bridge_rtreap(struct bridge_softc *);
391 static void     bridge_rtreap_async(struct bridge_softc *);
392 static void     bridge_rttrim(struct bridge_softc *);
393 static int      bridge_rtage_finddead(struct bridge_softc *);
394 static void     bridge_rtage(struct bridge_softc *);
395 static void     bridge_rtflush(struct bridge_softc *, int);
396 static int      bridge_rtdaddr(struct bridge_softc *, const uint8_t *);
397 static int      bridge_rtsaddr(struct bridge_softc *, const uint8_t *,
398                     struct ifnet *, uint8_t);
399 static void     bridge_rtmsg_sync(struct bridge_softc *sc);
400 static void     bridge_rtreap_handler(netmsg_t);
401 static void     bridge_rtinstall_handler(netmsg_t);
402 static int      bridge_rtinstall_oncpu(struct bridge_softc *, const uint8_t *,
403                     struct ifnet *, int, uint8_t, struct bridge_rtinfo **);
404
405 static void     bridge_rtable_init(struct bridge_softc *);
406 static void     bridge_rtable_fini(struct bridge_softc *);
407
408 static int      bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
409 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
410                     const uint8_t *);
411 static void     bridge_rtnode_insert(struct bridge_softc *,
412                     struct bridge_rtnode *);
413 static void     bridge_rtnode_destroy(struct bridge_softc *,
414                     struct bridge_rtnode *);
415
416 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
417                     const char *name);
418 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
419                     struct ifnet *ifp);
420 static struct bridge_iflist *bridge_lookup_member_ifinfo(struct bridge_softc *,
421                     struct bridge_ifinfo *);
422 static void     bridge_delete_member(struct bridge_softc *,
423                     struct bridge_iflist *, int);
424 static void     bridge_delete_span(struct bridge_softc *,
425                     struct bridge_iflist *);
426
427 static int      bridge_control(struct bridge_softc *, u_long,
428                                bridge_ctl_t, void *);
429 static int      bridge_ioctl_init(struct bridge_softc *, void *);
430 static int      bridge_ioctl_stop(struct bridge_softc *, void *);
431 static int      bridge_ioctl_add(struct bridge_softc *, void *);
432 static int      bridge_ioctl_del(struct bridge_softc *, void *);
433 static void     bridge_ioctl_fillflags(struct bridge_softc *sc,
434                                 struct bridge_iflist *bif, struct ifbreq *req);
435 static int      bridge_ioctl_gifflags(struct bridge_softc *, void *);
436 static int      bridge_ioctl_sifflags(struct bridge_softc *, void *);
437 static int      bridge_ioctl_scache(struct bridge_softc *, void *);
438 static int      bridge_ioctl_gcache(struct bridge_softc *, void *);
439 static int      bridge_ioctl_gifs(struct bridge_softc *, void *);
440 static int      bridge_ioctl_rts(struct bridge_softc *, void *);
441 static int      bridge_ioctl_saddr(struct bridge_softc *, void *);
442 static int      bridge_ioctl_sto(struct bridge_softc *, void *);
443 static int      bridge_ioctl_gto(struct bridge_softc *, void *);
444 static int      bridge_ioctl_daddr(struct bridge_softc *, void *);
445 static int      bridge_ioctl_flush(struct bridge_softc *, void *);
446 static int      bridge_ioctl_gpri(struct bridge_softc *, void *);
447 static int      bridge_ioctl_spri(struct bridge_softc *, void *);
448 static int      bridge_ioctl_reinit(struct bridge_softc *, void *);
449 static int      bridge_ioctl_ght(struct bridge_softc *, void *);
450 static int      bridge_ioctl_sht(struct bridge_softc *, void *);
451 static int      bridge_ioctl_gfd(struct bridge_softc *, void *);
452 static int      bridge_ioctl_sfd(struct bridge_softc *, void *);
453 static int      bridge_ioctl_gma(struct bridge_softc *, void *);
454 static int      bridge_ioctl_sma(struct bridge_softc *, void *);
455 static int      bridge_ioctl_sifprio(struct bridge_softc *, void *);
456 static int      bridge_ioctl_sifcost(struct bridge_softc *, void *);
457 static int      bridge_ioctl_addspan(struct bridge_softc *, void *);
458 static int      bridge_ioctl_delspan(struct bridge_softc *, void *);
459 static int      bridge_ioctl_sifbondwght(struct bridge_softc *, void *);
460 static int      bridge_pfil(struct mbuf **, struct ifnet *, struct ifnet *,
461                     int);
462 static int      bridge_ip_checkbasic(struct mbuf **mp);
463 #ifdef INET6
464 static int      bridge_ip6_checkbasic(struct mbuf **mp);
465 #endif /* INET6 */
466 static int      bridge_fragment(struct ifnet *, struct mbuf *,
467                     struct ether_header *, int, struct llc *);
468 static void     bridge_enqueue_handler(netmsg_t);
469 static void     bridge_handoff(struct bridge_softc *, struct ifnet *,
470                     struct mbuf *, int);
471
472 static void     bridge_del_bif_handler(netmsg_t);
473 static void     bridge_add_bif_handler(netmsg_t);
474 static void     bridge_del_bif(struct bridge_softc *, struct bridge_ifinfo *,
475                     struct bridge_iflist_head *);
476 static void     bridge_add_bif(struct bridge_softc *, struct bridge_ifinfo *,
477                     struct ifnet *);
478
479 SYSCTL_DECL(_net_link);
480 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW, 0, "Bridge");
481
482 static int pfil_onlyip = 1; /* only pass IP[46] packets when pfil is enabled */
483 static int pfil_bridge = 1; /* run pfil hooks on the bridge interface */
484 static int pfil_member = 1; /* run pfil hooks on the member interface */
485 static int bridge_debug;
486 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip, CTLFLAG_RW,
487     &pfil_onlyip, 0, "Only pass IP packets when pfil is enabled");
488 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge, CTLFLAG_RW,
489     &pfil_bridge, 0, "Packet filter on the bridge interface");
490 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member, CTLFLAG_RW,
491     &pfil_member, 0, "Packet filter on the member interface");
492 SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW,
493     &bridge_debug, 0, "Bridge debug mode");
494
495 struct bridge_control_arg {
496         union {
497                 struct ifbreq ifbreq;
498                 struct ifbifconf ifbifconf;
499                 struct ifbareq ifbareq;
500                 struct ifbaconf ifbaconf;
501                 struct ifbrparam ifbrparam;
502         } bca_u;
503         int     bca_len;
504         void    *bca_uptr;
505         void    *bca_kptr;
506 };
507
508 struct bridge_control {
509         bridge_ctl_t    bc_func;
510         int             bc_argsize;
511         int             bc_flags;
512 };
513
514 #define BC_F_COPYIN             0x01    /* copy arguments in */
515 #define BC_F_COPYOUT            0x02    /* copy arguments out */
516 #define BC_F_SUSER              0x04    /* do super-user check */
517
518 const struct bridge_control bridge_control_table[] = {
519         { bridge_ioctl_add,             sizeof(struct ifbreq),
520           BC_F_COPYIN|BC_F_SUSER },
521         { bridge_ioctl_del,             sizeof(struct ifbreq),
522           BC_F_COPYIN|BC_F_SUSER },
523
524         { bridge_ioctl_gifflags,        sizeof(struct ifbreq),
525           BC_F_COPYIN|BC_F_COPYOUT },
526         { bridge_ioctl_sifflags,        sizeof(struct ifbreq),
527           BC_F_COPYIN|BC_F_SUSER },
528
529         { bridge_ioctl_scache,          sizeof(struct ifbrparam),
530           BC_F_COPYIN|BC_F_SUSER },
531         { bridge_ioctl_gcache,          sizeof(struct ifbrparam),
532           BC_F_COPYOUT },
533
534         { bridge_ioctl_gifs,            sizeof(struct ifbifconf),
535           BC_F_COPYIN|BC_F_COPYOUT },
536         { bridge_ioctl_rts,             sizeof(struct ifbaconf),
537           BC_F_COPYIN|BC_F_COPYOUT },
538
539         { bridge_ioctl_saddr,           sizeof(struct ifbareq),
540           BC_F_COPYIN|BC_F_SUSER },
541
542         { bridge_ioctl_sto,             sizeof(struct ifbrparam),
543           BC_F_COPYIN|BC_F_SUSER },
544         { bridge_ioctl_gto,             sizeof(struct ifbrparam),
545           BC_F_COPYOUT },
546
547         { bridge_ioctl_daddr,           sizeof(struct ifbareq),
548           BC_F_COPYIN|BC_F_SUSER },
549
550         { bridge_ioctl_flush,           sizeof(struct ifbreq),
551           BC_F_COPYIN|BC_F_SUSER },
552
553         { bridge_ioctl_gpri,            sizeof(struct ifbrparam),
554           BC_F_COPYOUT },
555         { bridge_ioctl_spri,            sizeof(struct ifbrparam),
556           BC_F_COPYIN|BC_F_SUSER },
557
558         { bridge_ioctl_ght,             sizeof(struct ifbrparam),
559           BC_F_COPYOUT },
560         { bridge_ioctl_sht,             sizeof(struct ifbrparam),
561           BC_F_COPYIN|BC_F_SUSER },
562
563         { bridge_ioctl_gfd,             sizeof(struct ifbrparam),
564           BC_F_COPYOUT },
565         { bridge_ioctl_sfd,             sizeof(struct ifbrparam),
566           BC_F_COPYIN|BC_F_SUSER },
567
568         { bridge_ioctl_gma,             sizeof(struct ifbrparam),
569           BC_F_COPYOUT },
570         { bridge_ioctl_sma,             sizeof(struct ifbrparam),
571           BC_F_COPYIN|BC_F_SUSER },
572
573         { bridge_ioctl_sifprio,         sizeof(struct ifbreq),
574           BC_F_COPYIN|BC_F_SUSER },
575
576         { bridge_ioctl_sifcost,         sizeof(struct ifbreq),
577           BC_F_COPYIN|BC_F_SUSER },
578
579         { bridge_ioctl_addspan,         sizeof(struct ifbreq),
580           BC_F_COPYIN|BC_F_SUSER },
581         { bridge_ioctl_delspan,         sizeof(struct ifbreq),
582           BC_F_COPYIN|BC_F_SUSER },
583
584         { bridge_ioctl_sifbondwght,     sizeof(struct ifbreq),
585           BC_F_COPYIN|BC_F_SUSER },
586
587 };
588 static const int bridge_control_table_size = NELEM(bridge_control_table);
589
590 LIST_HEAD(, bridge_softc) bridge_list;
591
592 struct if_clone bridge_cloner = IF_CLONE_INITIALIZER("bridge",
593                                 bridge_clone_create,
594                                 bridge_clone_destroy, 0, IF_MAXUNIT);
595
596 static int
597 bridge_modevent(module_t mod, int type, void *data)
598 {
599         switch (type) {
600         case MOD_LOAD:
601                 LIST_INIT(&bridge_list);
602                 if_clone_attach(&bridge_cloner);
603                 bridge_input_p = bridge_input;
604                 bridge_output_p = bridge_output;
605                 bridge_interface_p = bridge_interface;
606                 bridge_detach_cookie = EVENTHANDLER_REGISTER(
607                     ifnet_detach_event, bridge_ifdetach, NULL,
608                     EVENTHANDLER_PRI_ANY);
609 #if 0 /* notyet */
610                 bstp_linkstate_p = bstp_linkstate;
611 #endif
612                 break;
613         case MOD_UNLOAD:
614                 if (!LIST_EMPTY(&bridge_list))
615                         return (EBUSY);
616                 EVENTHANDLER_DEREGISTER(ifnet_detach_event,
617                     bridge_detach_cookie);
618                 if_clone_detach(&bridge_cloner);
619                 bridge_input_p = NULL;
620                 bridge_output_p = NULL;
621                 bridge_interface_p = NULL;
622 #if 0 /* notyet */
623                 bstp_linkstate_p = NULL;
624 #endif
625                 break;
626         default:
627                 return (EOPNOTSUPP);
628         }
629         return (0);
630 }
631
632 static moduledata_t bridge_mod = {
633         "if_bridge",
634         bridge_modevent,
635         0
636 };
637
638 DECLARE_MODULE(if_bridge, bridge_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
639
640
641 /*
642  * bridge_clone_create:
643  *
644  *      Create a new bridge instance.
645  */
646 static int
647 bridge_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused)
648 {
649         struct bridge_softc *sc;
650         struct ifnet *ifp;
651         u_char eaddr[6];
652         int cpu, rnd;
653
654         sc = kmalloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
655         ifp = sc->sc_ifp = &sc->sc_if;
656
657         sc->sc_brtmax = BRIDGE_RTABLE_MAX;
658         sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
659         sc->sc_bridge_max_age = BSTP_DEFAULT_MAX_AGE;
660         sc->sc_bridge_hello_time = BSTP_DEFAULT_HELLO_TIME;
661         sc->sc_bridge_forward_delay = BSTP_DEFAULT_FORWARD_DELAY;
662         sc->sc_bridge_priority = BSTP_DEFAULT_BRIDGE_PRIORITY;
663         sc->sc_hold_time = BSTP_DEFAULT_HOLD_TIME;
664
665         /* Initialize our routing table. */
666         bridge_rtable_init(sc);
667
668         callout_init(&sc->sc_brcallout);
669         netmsg_init(&sc->sc_brtimemsg, NULL, &netisr_adone_rport,
670                     MSGF_DROPABLE, bridge_timer_handler);
671         sc->sc_brtimemsg.lmsg.u.ms_resultp = sc;
672
673         callout_init(&sc->sc_bstpcallout);
674         netmsg_init(&sc->sc_bstptimemsg, NULL, &netisr_adone_rport,
675                     MSGF_DROPABLE, bstp_tick_handler);
676         sc->sc_bstptimemsg.lmsg.u.ms_resultp = sc;
677
678         /* Initialize per-cpu member iface lists */
679         sc->sc_iflists = kmalloc(sizeof(*sc->sc_iflists) * ncpus,
680                                  M_DEVBUF, M_WAITOK);
681         for (cpu = 0; cpu < ncpus; ++cpu)
682                 TAILQ_INIT(&sc->sc_iflists[cpu]);
683
684         TAILQ_INIT(&sc->sc_spanlist);
685
686         ifp->if_softc = sc;
687         if_initname(ifp, ifc->ifc_name, unit);
688         ifp->if_mtu = ETHERMTU;
689         ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST;
690         ifp->if_ioctl = bridge_ioctl;
691         ifp->if_start = bridge_start;
692         ifp->if_init = bridge_init;
693         ifp->if_type = IFT_ETHER;
694         ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
695         ifq_set_ready(&ifp->if_snd);
696         ifp->if_hdrlen = ETHER_HDR_LEN;
697
698         /*
699          * Generate a random ethernet address and use the private AC:DE:48
700          * OUI code.
701          */
702         rnd = karc4random();
703         bcopy(&rnd, &eaddr[0], 4); /* ETHER_ADDR_LEN == 6 */
704         rnd = karc4random();
705         bcopy(&rnd, &eaddr[2], 4); /* ETHER_ADDR_LEN == 6 */
706
707         eaddr[0] &= ~1; /* clear multicast bit */
708         eaddr[0] |= 2;  /* set the LAA bit */
709
710         ether_ifattach(ifp, eaddr, NULL);
711         /* Now undo some of the damage... */
712         ifp->if_baudrate = 0;
713         /*ifp->if_type = IFT_BRIDGE;*/
714
715         crit_enter();   /* XXX MP */
716         LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
717         crit_exit();
718
719         return (0);
720 }
721
722 static void
723 bridge_delete_dispatch(netmsg_t msg)
724 {
725         struct bridge_softc *sc = msg->lmsg.u.ms_resultp;
726         struct ifnet *bifp = sc->sc_ifp;
727         struct bridge_iflist *bif;
728
729         ifnet_serialize_all(bifp);
730
731         while ((bif = TAILQ_FIRST(&sc->sc_iflists[mycpuid])) != NULL)
732                 bridge_delete_member(sc, bif, 0);
733
734         while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL)
735                 bridge_delete_span(sc, bif);
736
737         ifnet_deserialize_all(bifp);
738
739         lwkt_replymsg(&msg->lmsg, 0);
740 }
741
742 /*
743  * bridge_clone_destroy:
744  *
745  *      Destroy a bridge instance.
746  */
747 static int
748 bridge_clone_destroy(struct ifnet *ifp)
749 {
750         struct bridge_softc *sc = ifp->if_softc;
751         struct netmsg_base msg;
752
753         ifnet_serialize_all(ifp);
754
755         bridge_stop(ifp);
756         ifp->if_flags &= ~IFF_UP;
757
758         ifnet_deserialize_all(ifp);
759
760         netmsg_init(&msg, NULL, &curthread->td_msgport,
761                     0, bridge_delete_dispatch);
762         msg.lmsg.u.ms_resultp = sc;
763         lwkt_domsg(BRIDGE_CFGPORT, &msg.lmsg, 0);
764
765         crit_enter();   /* XXX MP */
766         LIST_REMOVE(sc, sc_list);
767         crit_exit();
768
769         ether_ifdetach(ifp);
770
771         /* Tear down the routing table. */
772         bridge_rtable_fini(sc);
773
774         /* Free per-cpu member iface lists */
775         kfree(sc->sc_iflists, M_DEVBUF);
776
777         kfree(sc, M_DEVBUF);
778
779         return 0;
780 }
781
782 /*
783  * bridge_ioctl:
784  *
785  *      Handle a control request from the operator.
786  */
787 static int
788 bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
789 {
790         struct bridge_softc *sc = ifp->if_softc;
791         struct bridge_control_arg args;
792         struct ifdrv *ifd = (struct ifdrv *) data;
793         const struct bridge_control *bc;
794         int error = 0;
795
796         ASSERT_IFNET_SERIALIZED_ALL(ifp);
797
798         switch (cmd) {
799         case SIOCADDMULTI:
800         case SIOCDELMULTI:
801                 break;
802
803         case SIOCGDRVSPEC:
804         case SIOCSDRVSPEC:
805                 if (ifd->ifd_cmd >= bridge_control_table_size) {
806                         error = EINVAL;
807                         break;
808                 }
809                 bc = &bridge_control_table[ifd->ifd_cmd];
810
811                 if (cmd == SIOCGDRVSPEC &&
812                     (bc->bc_flags & BC_F_COPYOUT) == 0) {
813                         error = EINVAL;
814                         break;
815                 } else if (cmd == SIOCSDRVSPEC &&
816                            (bc->bc_flags & BC_F_COPYOUT)) {
817                         error = EINVAL;
818                         break;
819                 }
820
821                 if (bc->bc_flags & BC_F_SUSER) {
822                         error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY);
823                         if (error)
824                                 break;
825                 }
826
827                 if (ifd->ifd_len != bc->bc_argsize ||
828                     ifd->ifd_len > sizeof(args.bca_u)) {
829                         error = EINVAL;
830                         break;
831                 }
832
833                 memset(&args, 0, sizeof(args));
834                 if (bc->bc_flags & BC_F_COPYIN) {
835                         error = copyin(ifd->ifd_data, &args.bca_u,
836                                        ifd->ifd_len);
837                         if (error)
838                                 break;
839                 }
840
841                 error = bridge_control(sc, cmd, bc->bc_func, &args);
842                 if (error) {
843                         KKASSERT(args.bca_len == 0 && args.bca_kptr == NULL);
844                         break;
845                 }
846
847                 if (bc->bc_flags & BC_F_COPYOUT) {
848                         error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
849                         if (args.bca_len != 0) {
850                                 KKASSERT(args.bca_kptr != NULL);
851                                 if (!error) {
852                                         error = copyout(args.bca_kptr,
853                                                 args.bca_uptr, args.bca_len);
854                                 }
855                                 kfree(args.bca_kptr, M_TEMP);
856                         } else {
857                                 KKASSERT(args.bca_kptr == NULL);
858                         }
859                 } else {
860                         KKASSERT(args.bca_len == 0 && args.bca_kptr == NULL);
861                 }
862                 break;
863
864         case SIOCSIFFLAGS:
865                 if (!(ifp->if_flags & IFF_UP) &&
866                     (ifp->if_flags & IFF_RUNNING)) {
867                         /*
868                          * If interface is marked down and it is running,
869                          * then stop it.
870                          */
871                         bridge_stop(ifp);
872                 } else if ((ifp->if_flags & IFF_UP) &&
873                     !(ifp->if_flags & IFF_RUNNING)) {
874                         /*
875                          * If interface is marked up and it is stopped, then
876                          * start it.
877                          */
878                         ifp->if_init(sc);
879                 }
880
881                 /*
882                  * If running and link flag state change we have to
883                  * reinitialize as well.
884                  */
885                 if ((ifp->if_flags & IFF_RUNNING) &&
886                     (ifp->if_flags & (IFF_LINK0|IFF_LINK1|IFF_LINK2)) !=
887                     sc->sc_copy_flags) {
888                         sc->sc_copy_flags = ifp->if_flags &
889                                         (IFF_LINK0|IFF_LINK1|IFF_LINK2);
890                         bridge_control(sc, 0, bridge_ioctl_reinit, NULL);
891                 }
892
893                 break;
894
895         case SIOCSIFMTU:
896                 /* Do not allow the MTU to be changed on the bridge */
897                 error = EINVAL;
898                 break;
899
900         default:
901                 error = ether_ioctl(ifp, cmd, data);
902                 break;
903         }
904         return (error);
905 }
906
907 /*
908  * bridge_mutecaps:
909  *
910  *      Clear or restore unwanted capabilities on the member interface
911  */
912 static void
913 bridge_mutecaps(struct bridge_ifinfo *bif_info, struct ifnet *ifp, int mute)
914 {
915         struct ifreq ifr;
916
917         if (ifp->if_ioctl == NULL)
918                 return;
919
920         bzero(&ifr, sizeof(ifr));
921         ifr.ifr_reqcap = ifp->if_capenable;
922
923         if (mute) {
924                 /* mask off and save capabilities */
925                 bif_info->bifi_mutecap = ifr.ifr_reqcap & BRIDGE_IFCAPS_MASK;
926                 if (bif_info->bifi_mutecap != 0)
927                         ifr.ifr_reqcap &= ~BRIDGE_IFCAPS_MASK;
928         } else {
929                 /* restore muted capabilities */
930                 ifr.ifr_reqcap |= bif_info->bifi_mutecap;
931         }
932
933         if (bif_info->bifi_mutecap != 0) {
934                 ifnet_serialize_all(ifp);
935                 ifp->if_ioctl(ifp, SIOCSIFCAP, (caddr_t)&ifr, NULL);
936                 ifnet_deserialize_all(ifp);
937         }
938 }
939
940 /*
941  * bridge_lookup_member:
942  *
943  *      Lookup a bridge member interface.
944  */
945 static struct bridge_iflist *
946 bridge_lookup_member(struct bridge_softc *sc, const char *name)
947 {
948         struct bridge_iflist *bif;
949
950         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
951                 if (strcmp(bif->bif_ifp->if_xname, name) == 0)
952                         return (bif);
953         }
954         return (NULL);
955 }
956
957 /*
958  * bridge_lookup_member_if:
959  *
960  *      Lookup a bridge member interface by ifnet*.
961  */
962 static struct bridge_iflist *
963 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
964 {
965         struct bridge_iflist *bif;
966
967         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
968                 if (bif->bif_ifp == member_ifp)
969                         return (bif);
970         }
971         return (NULL);
972 }
973
974 /*
975  * bridge_lookup_member_ifinfo:
976  *
977  *      Lookup a bridge member interface by bridge_ifinfo.
978  */
979 static struct bridge_iflist *
980 bridge_lookup_member_ifinfo(struct bridge_softc *sc,
981                             struct bridge_ifinfo *bif_info)
982 {
983         struct bridge_iflist *bif;
984
985         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
986                 if (bif->bif_info == bif_info)
987                         return (bif);
988         }
989         return (NULL);
990 }
991
992 /*
993  * bridge_delete_member:
994  *
995  *      Delete the specified member interface.
996  */
997 static void
998 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
999     int gone)
1000 {
1001         struct ifnet *ifs = bif->bif_ifp;
1002         struct ifnet *bifp = sc->sc_ifp;
1003         struct bridge_ifinfo *bif_info = bif->bif_info;
1004         struct bridge_iflist_head saved_bifs;
1005
1006         ASSERT_IFNET_SERIALIZED_ALL(bifp);
1007         KKASSERT(bif_info != NULL);
1008
1009         ifs->if_bridge = NULL;
1010
1011         /*
1012          * Release bridge interface's serializer:
1013          * - To avoid possible dead lock.
1014          * - Various sync operation will block the current thread.
1015          */
1016         ifnet_deserialize_all(bifp);
1017
1018         if (!gone) {
1019                 switch (ifs->if_type) {
1020                 case IFT_ETHER:
1021                 case IFT_L2VLAN:
1022                         /*
1023                          * Take the interface out of promiscuous mode.
1024                          */
1025                         ifpromisc(ifs, 0);
1026                         bridge_mutecaps(bif_info, ifs, 0);
1027                         break;
1028
1029                 case IFT_GIF:
1030                         break;
1031
1032                 default:
1033                         panic("bridge_delete_member: impossible");
1034                         break;
1035                 }
1036         }
1037
1038         /*
1039          * Remove bifs from percpu linked list.
1040          *
1041          * Removed bifs are not freed immediately, instead,
1042          * they are saved in saved_bifs.  They will be freed
1043          * after we make sure that no one is accessing them,
1044          * i.e. after following netmsg_service_sync()
1045          */
1046         TAILQ_INIT(&saved_bifs);
1047         bridge_del_bif(sc, bif_info, &saved_bifs);
1048
1049         /*
1050          * Make sure that all protocol threads:
1051          * o  see 'ifs' if_bridge is changed
1052          * o  know that bif is removed from the percpu linked list
1053          */
1054         netmsg_service_sync();
1055
1056         /*
1057          * Free the removed bifs
1058          */
1059         KKASSERT(!TAILQ_EMPTY(&saved_bifs));
1060         while ((bif = TAILQ_FIRST(&saved_bifs)) != NULL) {
1061                 TAILQ_REMOVE(&saved_bifs, bif, bif_next);
1062                 kfree(bif, M_DEVBUF);
1063         }
1064
1065         /* See the comment in bridge_ioctl_stop() */
1066         bridge_rtmsg_sync(sc);
1067         bridge_rtdelete(sc, ifs, IFBF_FLUSHALL | IFBF_FLUSHSYNC);
1068
1069         ifnet_serialize_all(bifp);
1070
1071         if (bifp->if_flags & IFF_RUNNING)
1072                 bstp_initialization(sc);
1073
1074         /*
1075          * Free the bif_info after bstp_initialization(), so that
1076          * bridge_softc.sc_root_port will not reference a dangling
1077          * pointer.
1078          */
1079         kfree(bif_info, M_DEVBUF);
1080 }
1081
1082 /*
1083  * bridge_delete_span:
1084  *
1085  *      Delete the specified span interface.
1086  */
1087 static void
1088 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
1089 {
1090         KASSERT(bif->bif_ifp->if_bridge == NULL,
1091             ("%s: not a span interface", __func__));
1092
1093         TAILQ_REMOVE(&sc->sc_iflists[mycpuid], bif, bif_next);
1094         kfree(bif, M_DEVBUF);
1095 }
1096
1097 static int
1098 bridge_ioctl_init(struct bridge_softc *sc, void *arg __unused)
1099 {
1100         struct ifnet *ifp = sc->sc_ifp;
1101
1102         if (ifp->if_flags & IFF_RUNNING)
1103                 return 0;
1104
1105         callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz,
1106             bridge_timer, sc);
1107
1108         ifp->if_flags |= IFF_RUNNING;
1109         bstp_initialization(sc);
1110         return 0;
1111 }
1112
1113 static int
1114 bridge_ioctl_stop(struct bridge_softc *sc, void *arg __unused)
1115 {
1116         struct ifnet *ifp = sc->sc_ifp;
1117
1118         if ((ifp->if_flags & IFF_RUNNING) == 0)
1119                 return 0;
1120
1121         callout_stop(&sc->sc_brcallout);
1122
1123         crit_enter();
1124         lwkt_dropmsg(&sc->sc_brtimemsg.lmsg);
1125         crit_exit();
1126
1127         bstp_stop(sc);
1128
1129         ifp->if_flags &= ~IFF_RUNNING;
1130
1131         ifnet_deserialize_all(ifp);
1132
1133         /* Let everyone know that we are stopped */
1134         netmsg_service_sync();
1135
1136         /*
1137          * Sync ifnetX msgports in the order we forward rtnode
1138          * installation message.  This is used to make sure that
1139          * all rtnode installation messages sent by bridge_rtupdate()
1140          * during above netmsg_service_sync() are flushed.
1141          */
1142         bridge_rtmsg_sync(sc);
1143         bridge_rtflush(sc, IFBF_FLUSHDYN | IFBF_FLUSHSYNC);
1144
1145         ifnet_serialize_all(ifp);
1146         return 0;
1147 }
1148
1149 static int
1150 bridge_ioctl_add(struct bridge_softc *sc, void *arg)
1151 {
1152         struct ifbreq *req = arg;
1153         struct bridge_iflist *bif;
1154         struct bridge_ifinfo *bif_info;
1155         struct ifnet *ifs, *bifp;
1156         int error = 0;
1157
1158         bifp = sc->sc_ifp;
1159         ASSERT_IFNET_SERIALIZED_ALL(bifp);
1160
1161         ifs = ifunit(req->ifbr_ifsname);
1162         if (ifs == NULL)
1163                 return (ENOENT);
1164
1165         /* If it's in the span list, it can't be a member. */
1166         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1167                 if (ifs == bif->bif_ifp)
1168                         return (EBUSY);
1169
1170         /* Allow the first Ethernet member to define the MTU */
1171         if (ifs->if_type != IFT_GIF) {
1172                 if (TAILQ_EMPTY(&sc->sc_iflists[mycpuid])) {
1173                         bifp->if_mtu = ifs->if_mtu;
1174                 } else if (bifp->if_mtu != ifs->if_mtu) {
1175                         if_printf(bifp, "invalid MTU for %s\n", ifs->if_xname);
1176                         return (EINVAL);
1177                 }
1178         }
1179
1180         if (ifs->if_bridge == sc)
1181                 return (EEXIST);
1182
1183         if (ifs->if_bridge != NULL)
1184                 return (EBUSY);
1185
1186         bif_info = kmalloc(sizeof(*bif_info), M_DEVBUF, M_WAITOK | M_ZERO);
1187         bif_info->bifi_priority = BSTP_DEFAULT_PORT_PRIORITY;
1188         bif_info->bifi_path_cost = BSTP_DEFAULT_PATH_COST;
1189         bif_info->bifi_ifp = ifs;
1190         bif_info->bifi_bond_weight = 1;
1191
1192         /*
1193          * Release bridge interface's serializer:
1194          * - To avoid possible dead lock.
1195          * - Various sync operation will block the current thread.
1196          */
1197         ifnet_deserialize_all(bifp);
1198
1199         switch (ifs->if_type) {
1200         case IFT_ETHER:
1201         case IFT_L2VLAN:
1202                 /*
1203                  * Place the interface into promiscuous mode.
1204                  */
1205                 error = ifpromisc(ifs, 1);
1206                 if (error) {
1207                         ifnet_serialize_all(bifp);
1208                         goto out;
1209                 }
1210                 bridge_mutecaps(bif_info, ifs, 1);
1211                 break;
1212
1213         case IFT_GIF: /* :^) */
1214                 break;
1215
1216         default:
1217                 error = EINVAL;
1218                 ifnet_serialize_all(bifp);
1219                 goto out;
1220         }
1221
1222         /*
1223          * Add bifs to percpu linked lists
1224          */
1225         bridge_add_bif(sc, bif_info, ifs);
1226
1227         ifnet_serialize_all(bifp);
1228
1229         if (bifp->if_flags & IFF_RUNNING)
1230                 bstp_initialization(sc);
1231         else
1232                 bstp_stop(sc);
1233
1234         /*
1235          * Everything has been setup, so let the member interface
1236          * deliver packets to this bridge on its input/output path.
1237          */
1238         ifs->if_bridge = sc;
1239 out:
1240         if (error) {
1241                 if (bif_info != NULL)
1242                         kfree(bif_info, M_DEVBUF);
1243         }
1244         return (error);
1245 }
1246
1247 static int
1248 bridge_ioctl_del(struct bridge_softc *sc, void *arg)
1249 {
1250         struct ifbreq *req = arg;
1251         struct bridge_iflist *bif;
1252
1253         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1254         if (bif == NULL)
1255                 return (ENOENT);
1256
1257         bridge_delete_member(sc, bif, 0);
1258
1259         return (0);
1260 }
1261
1262 static int
1263 bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
1264 {
1265         struct ifbreq *req = arg;
1266         struct bridge_iflist *bif;
1267
1268         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1269         if (bif == NULL)
1270                 return (ENOENT);
1271         bridge_ioctl_fillflags(sc, bif, req);
1272         return (0);
1273 }
1274
1275 static void
1276 bridge_ioctl_fillflags(struct bridge_softc *sc, struct bridge_iflist *bif,
1277                        struct ifbreq *req)
1278 {
1279         req->ifbr_ifsflags = bif->bif_flags;
1280         req->ifbr_state = bif->bif_state;
1281         req->ifbr_priority = bif->bif_priority;
1282         req->ifbr_path_cost = bif->bif_path_cost;
1283         req->ifbr_bond_weight = bif->bif_bond_weight;
1284         req->ifbr_portno = bif->bif_ifp->if_index & 0xff;
1285         if (bif->bif_flags & IFBIF_STP) {
1286                 req->ifbr_peer_root = bif->bif_peer_root;
1287                 req->ifbr_peer_bridge = bif->bif_peer_bridge;
1288                 req->ifbr_peer_cost = bif->bif_peer_cost;
1289                 req->ifbr_peer_port = bif->bif_peer_port;
1290                 if (bstp_supersedes_port_info(sc, bif)) {
1291                         req->ifbr_designated_root = bif->bif_peer_root;
1292                         req->ifbr_designated_bridge = bif->bif_peer_bridge;
1293                         req->ifbr_designated_cost = bif->bif_peer_cost;
1294                         req->ifbr_designated_port = bif->bif_peer_port;
1295                 } else {
1296                         req->ifbr_designated_root = sc->sc_bridge_id;
1297                         req->ifbr_designated_bridge = sc->sc_bridge_id;
1298                         req->ifbr_designated_cost = bif->bif_path_cost +
1299                                                     bif->bif_peer_cost;
1300                         req->ifbr_designated_port = bif->bif_port_id;
1301                 }
1302         } else {
1303                 req->ifbr_peer_root = 0;
1304                 req->ifbr_peer_bridge = 0;
1305                 req->ifbr_peer_cost = 0;
1306                 req->ifbr_peer_port = 0;
1307                 req->ifbr_designated_root = 0;
1308                 req->ifbr_designated_bridge = 0;
1309                 req->ifbr_designated_cost = 0;
1310                 req->ifbr_designated_port = 0;
1311         }
1312 }
1313
1314 static int
1315 bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
1316 {
1317         struct ifbreq *req = arg;
1318         struct bridge_iflist *bif;
1319         struct ifnet *bifp = sc->sc_ifp;
1320
1321         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1322         if (bif == NULL)
1323                 return (ENOENT);
1324
1325         if (req->ifbr_ifsflags & IFBIF_SPAN) {
1326                 /* SPAN is readonly */
1327                 return (EINVAL);
1328         }
1329
1330         if (req->ifbr_ifsflags & IFBIF_STP) {
1331                 switch (bif->bif_ifp->if_type) {
1332                 case IFT_ETHER:
1333                         /* These can do spanning tree. */
1334                         break;
1335
1336                 default:
1337                         /* Nothing else can. */
1338                         return (EINVAL);
1339                 }
1340         }
1341
1342         bif->bif_flags = (bif->bif_flags & IFBIF_KEEPMASK) |
1343                          (req->ifbr_ifsflags & ~IFBIF_KEEPMASK);
1344         if (bifp->if_flags & IFF_RUNNING)
1345                 bstp_initialization(sc);
1346
1347         return (0);
1348 }
1349
1350 static int
1351 bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
1352 {
1353         struct ifbrparam *param = arg;
1354         struct ifnet *ifp = sc->sc_ifp;
1355
1356         sc->sc_brtmax = param->ifbrp_csize;
1357
1358         ifnet_deserialize_all(ifp);
1359         bridge_rttrim(sc);
1360         ifnet_serialize_all(ifp);
1361
1362         return (0);
1363 }
1364
1365 static int
1366 bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
1367 {
1368         struct ifbrparam *param = arg;
1369
1370         param->ifbrp_csize = sc->sc_brtmax;
1371
1372         return (0);
1373 }
1374
1375 static int
1376 bridge_ioctl_gifs(struct bridge_softc *sc, void *arg)
1377 {
1378         struct bridge_control_arg *bc_arg = arg;
1379         struct ifbifconf *bifc = arg;
1380         struct bridge_iflist *bif;
1381         struct ifbreq *breq;
1382         int count, len;
1383
1384         count = 0;
1385         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next)
1386                 count++;
1387         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1388                 count++;
1389
1390         if (bifc->ifbic_len == 0) {
1391                 bifc->ifbic_len = sizeof(*breq) * count;
1392                 return 0;
1393         } else if (count == 0 || bifc->ifbic_len < sizeof(*breq)) {
1394                 bifc->ifbic_len = 0;
1395                 return 0;
1396         }
1397
1398         len = min(bifc->ifbic_len, sizeof(*breq) * count);
1399         KKASSERT(len >= sizeof(*breq));
1400
1401         breq = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO);
1402         if (breq == NULL) {
1403                 bifc->ifbic_len = 0;
1404                 return ENOMEM;
1405         }
1406         bc_arg->bca_kptr = breq;
1407
1408         count = 0;
1409         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
1410                 if (len < sizeof(*breq))
1411                         break;
1412
1413                 strlcpy(breq->ifbr_ifsname, bif->bif_ifp->if_xname,
1414                         sizeof(breq->ifbr_ifsname));
1415                 bridge_ioctl_fillflags(sc, bif, breq);
1416                 breq++;
1417                 count++;
1418                 len -= sizeof(*breq);
1419         }
1420         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
1421                 if (len < sizeof(*breq))
1422                         break;
1423
1424                 strlcpy(breq->ifbr_ifsname, bif->bif_ifp->if_xname,
1425                         sizeof(breq->ifbr_ifsname));
1426                 breq->ifbr_ifsflags = bif->bif_flags;
1427                 breq->ifbr_portno = bif->bif_ifp->if_index & 0xff;
1428                 breq++;
1429                 count++;
1430                 len -= sizeof(*breq);
1431         }
1432
1433         bifc->ifbic_len = sizeof(*breq) * count;
1434         KKASSERT(bifc->ifbic_len > 0);
1435
1436         bc_arg->bca_len = bifc->ifbic_len;
1437         bc_arg->bca_uptr = bifc->ifbic_req;
1438         return 0;
1439 }
1440
1441 static int
1442 bridge_ioctl_rts(struct bridge_softc *sc, void *arg)
1443 {
1444         struct bridge_control_arg *bc_arg = arg;
1445         struct ifbaconf *bac = arg;
1446         struct bridge_rtnode *brt;
1447         struct ifbareq *bareq;
1448         int count, len;
1449
1450         count = 0;
1451         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list)
1452                 count++;
1453
1454         if (bac->ifbac_len == 0) {
1455                 bac->ifbac_len = sizeof(*bareq) * count;
1456                 return 0;
1457         } else if (count == 0 || bac->ifbac_len < sizeof(*bareq)) {
1458                 bac->ifbac_len = 0;
1459                 return 0;
1460         }
1461
1462         len = min(bac->ifbac_len, sizeof(*bareq) * count);
1463         KKASSERT(len >= sizeof(*bareq));
1464
1465         bareq = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO);
1466         if (bareq == NULL) {
1467                 bac->ifbac_len = 0;
1468                 return ENOMEM;
1469         }
1470         bc_arg->bca_kptr = bareq;
1471
1472         count = 0;
1473         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
1474                 struct bridge_rtinfo *bri = brt->brt_info;
1475                 unsigned long expire;
1476
1477                 if (len < sizeof(*bareq))
1478                         break;
1479
1480                 strlcpy(bareq->ifba_ifsname, bri->bri_ifp->if_xname,
1481                         sizeof(bareq->ifba_ifsname));
1482                 memcpy(bareq->ifba_dst, brt->brt_addr, sizeof(brt->brt_addr));
1483                 expire = bri->bri_expire;
1484                 if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
1485                     time_second < expire)
1486                         bareq->ifba_expire = expire - time_second;
1487                 else
1488                         bareq->ifba_expire = 0;
1489                 bareq->ifba_flags = bri->bri_flags;
1490                 bareq++;
1491                 count++;
1492                 len -= sizeof(*bareq);
1493         }
1494
1495         bac->ifbac_len = sizeof(*bareq) * count;
1496         KKASSERT(bac->ifbac_len > 0);
1497
1498         bc_arg->bca_len = bac->ifbac_len;
1499         bc_arg->bca_uptr = bac->ifbac_req;
1500         return 0;
1501 }
1502
1503 static int
1504 bridge_ioctl_saddr(struct bridge_softc *sc, void *arg)
1505 {
1506         struct ifbareq *req = arg;
1507         struct bridge_iflist *bif;
1508         struct ifnet *ifp = sc->sc_ifp;
1509         int error;
1510
1511         ASSERT_IFNET_SERIALIZED_ALL(ifp);
1512
1513         bif = bridge_lookup_member(sc, req->ifba_ifsname);
1514         if (bif == NULL)
1515                 return (ENOENT);
1516
1517         ifnet_deserialize_all(ifp);
1518         error = bridge_rtsaddr(sc, req->ifba_dst, bif->bif_ifp,
1519                                req->ifba_flags);
1520         ifnet_serialize_all(ifp);
1521         return (error);
1522 }
1523
1524 static int
1525 bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
1526 {
1527         struct ifbrparam *param = arg;
1528
1529         sc->sc_brttimeout = param->ifbrp_ctime;
1530
1531         return (0);
1532 }
1533
1534 static int
1535 bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
1536 {
1537         struct ifbrparam *param = arg;
1538
1539         param->ifbrp_ctime = sc->sc_brttimeout;
1540
1541         return (0);
1542 }
1543
1544 static int
1545 bridge_ioctl_daddr(struct bridge_softc *sc, void *arg)
1546 {
1547         struct ifbareq *req = arg;
1548         struct ifnet *ifp = sc->sc_ifp;
1549         int error;
1550
1551         ifnet_deserialize_all(ifp);
1552         error = bridge_rtdaddr(sc, req->ifba_dst);
1553         ifnet_serialize_all(ifp);
1554         return error;
1555 }
1556
1557 static int
1558 bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
1559 {
1560         struct ifbreq *req = arg;
1561         struct ifnet *ifp = sc->sc_ifp;
1562
1563         ifnet_deserialize_all(ifp);
1564         bridge_rtflush(sc, req->ifbr_ifsflags | IFBF_FLUSHSYNC);
1565         ifnet_serialize_all(ifp);
1566
1567         return (0);
1568 }
1569
1570 static int
1571 bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
1572 {
1573         struct ifbrparam *param = arg;
1574
1575         param->ifbrp_prio = sc->sc_bridge_priority;
1576
1577         return (0);
1578 }
1579
1580 static int
1581 bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
1582 {
1583         struct ifbrparam *param = arg;
1584
1585         sc->sc_bridge_priority = param->ifbrp_prio;
1586
1587         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1588                 bstp_initialization(sc);
1589
1590         return (0);
1591 }
1592
1593 static int
1594 bridge_ioctl_reinit(struct bridge_softc *sc, void *arg __unused)
1595 {
1596         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1597                 bstp_initialization(sc);
1598         return (0);
1599 }
1600
1601 static int
1602 bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
1603 {
1604         struct ifbrparam *param = arg;
1605
1606         param->ifbrp_hellotime = sc->sc_bridge_hello_time >> 8;
1607
1608         return (0);
1609 }
1610
1611 static int
1612 bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
1613 {
1614         struct ifbrparam *param = arg;
1615
1616         if (param->ifbrp_hellotime == 0)
1617                 return (EINVAL);
1618         sc->sc_bridge_hello_time = param->ifbrp_hellotime << 8;
1619
1620         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1621                 bstp_initialization(sc);
1622
1623         return (0);
1624 }
1625
1626 static int
1627 bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
1628 {
1629         struct ifbrparam *param = arg;
1630
1631         param->ifbrp_fwddelay = sc->sc_bridge_forward_delay >> 8;
1632
1633         return (0);
1634 }
1635
1636 static int
1637 bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
1638 {
1639         struct ifbrparam *param = arg;
1640
1641         if (param->ifbrp_fwddelay == 0)
1642                 return (EINVAL);
1643         sc->sc_bridge_forward_delay = param->ifbrp_fwddelay << 8;
1644
1645         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1646                 bstp_initialization(sc);
1647
1648         return (0);
1649 }
1650
1651 static int
1652 bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
1653 {
1654         struct ifbrparam *param = arg;
1655
1656         param->ifbrp_maxage = sc->sc_bridge_max_age >> 8;
1657
1658         return (0);
1659 }
1660
1661 static int
1662 bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
1663 {
1664         struct ifbrparam *param = arg;
1665
1666         if (param->ifbrp_maxage == 0)
1667                 return (EINVAL);
1668         sc->sc_bridge_max_age = param->ifbrp_maxage << 8;
1669
1670         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1671                 bstp_initialization(sc);
1672
1673         return (0);
1674 }
1675
1676 static int
1677 bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
1678 {
1679         struct ifbreq *req = arg;
1680         struct bridge_iflist *bif;
1681
1682         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1683         if (bif == NULL)
1684                 return (ENOENT);
1685
1686         bif->bif_priority = req->ifbr_priority;
1687
1688         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1689                 bstp_initialization(sc);
1690
1691         return (0);
1692 }
1693
1694 static int
1695 bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
1696 {
1697         struct ifbreq *req = arg;
1698         struct bridge_iflist *bif;
1699
1700         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1701         if (bif == NULL)
1702                 return (ENOENT);
1703
1704         bif->bif_path_cost = req->ifbr_path_cost;
1705
1706         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1707                 bstp_initialization(sc);
1708
1709         return (0);
1710 }
1711
1712 static int
1713 bridge_ioctl_sifbondwght(struct bridge_softc *sc, void *arg)
1714 {
1715         struct ifbreq *req = arg;
1716         struct bridge_iflist *bif;
1717
1718         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1719         if (bif == NULL)
1720                 return (ENOENT);
1721
1722         bif->bif_bond_weight = req->ifbr_bond_weight;
1723
1724         /* no reinit needed */
1725
1726         return (0);
1727 }
1728
1729 static int
1730 bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
1731 {
1732         struct ifbreq *req = arg;
1733         struct bridge_iflist *bif;
1734         struct ifnet *ifs;
1735         struct bridge_ifinfo *bif_info;
1736
1737         ifs = ifunit(req->ifbr_ifsname);
1738         if (ifs == NULL)
1739                 return (ENOENT);
1740
1741         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1742                 if (ifs == bif->bif_ifp)
1743                         return (EBUSY);
1744
1745         if (ifs->if_bridge != NULL)
1746                 return (EBUSY);
1747
1748         switch (ifs->if_type) {
1749         case IFT_ETHER:
1750         case IFT_GIF:
1751         case IFT_L2VLAN:
1752                 break;
1753
1754         default:
1755                 return (EINVAL);
1756         }
1757
1758         /*
1759          * bif_info is needed for bif_flags
1760          */
1761         bif_info = kmalloc(sizeof(*bif_info), M_DEVBUF, M_WAITOK | M_ZERO);
1762         bif_info->bifi_ifp = ifs;
1763
1764         bif = kmalloc(sizeof(*bif), M_DEVBUF, M_WAITOK | M_ZERO);
1765         bif->bif_ifp = ifs;
1766         bif->bif_info = bif_info;
1767         bif->bif_flags = IFBIF_SPAN;
1768         /* NOTE: span bif does not need bridge_ifinfo */
1769
1770         TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
1771
1772         sc->sc_span = 1;
1773
1774         return (0);
1775 }
1776
1777 static int
1778 bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
1779 {
1780         struct ifbreq *req = arg;
1781         struct bridge_iflist *bif;
1782         struct ifnet *ifs;
1783
1784         ifs = ifunit(req->ifbr_ifsname);
1785         if (ifs == NULL)
1786                 return (ENOENT);
1787
1788         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1789                 if (ifs == bif->bif_ifp)
1790                         break;
1791
1792         if (bif == NULL)
1793                 return (ENOENT);
1794
1795         bridge_delete_span(sc, bif);
1796
1797         if (TAILQ_EMPTY(&sc->sc_spanlist))
1798                 sc->sc_span = 0;
1799
1800         return (0);
1801 }
1802
1803 static void
1804 bridge_ifdetach_dispatch(netmsg_t msg)
1805 {
1806         struct ifnet *ifp, *bifp;
1807         struct bridge_softc *sc;
1808         struct bridge_iflist *bif;
1809
1810         ifp = msg->lmsg.u.ms_resultp;
1811         sc = ifp->if_bridge;
1812
1813         /* Check if the interface is a bridge member */
1814         if (sc != NULL) {
1815                 bifp = sc->sc_ifp;
1816
1817                 ifnet_serialize_all(bifp);
1818
1819                 bif = bridge_lookup_member_if(sc, ifp);
1820                 if (bif != NULL) {
1821                         bridge_delete_member(sc, bif, 1);
1822                 } else {
1823                         /* XXX Why bif will be NULL? */
1824                 }
1825
1826                 ifnet_deserialize_all(bifp);
1827                 goto reply;
1828         }
1829
1830         crit_enter();   /* XXX MP */
1831
1832         /* Check if the interface is a span port */
1833         LIST_FOREACH(sc, &bridge_list, sc_list) {
1834                 bifp = sc->sc_ifp;
1835
1836                 ifnet_serialize_all(bifp);
1837
1838                 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1839                         if (ifp == bif->bif_ifp) {
1840                                 bridge_delete_span(sc, bif);
1841                                 break;
1842                         }
1843
1844                 ifnet_deserialize_all(bifp);
1845         }
1846
1847         crit_exit();
1848
1849 reply:
1850         lwkt_replymsg(&msg->lmsg, 0);
1851 }
1852
1853 /*
1854  * bridge_ifdetach:
1855  *
1856  *      Detach an interface from a bridge.  Called when a member
1857  *      interface is detaching.
1858  */
1859 static void
1860 bridge_ifdetach(void *arg __unused, struct ifnet *ifp)
1861 {
1862         struct netmsg_base msg;
1863
1864         netmsg_init(&msg, NULL, &curthread->td_msgport,
1865                     0, bridge_ifdetach_dispatch);
1866         msg.lmsg.u.ms_resultp = ifp;
1867
1868         lwkt_domsg(BRIDGE_CFGPORT, &msg.lmsg, 0);
1869 }
1870
1871 /*
1872  * bridge_init:
1873  *
1874  *      Initialize a bridge interface.
1875  */
1876 static void
1877 bridge_init(void *xsc)
1878 {
1879         bridge_control(xsc, SIOCSIFFLAGS, bridge_ioctl_init, NULL);
1880 }
1881
1882 /*
1883  * bridge_stop:
1884  *
1885  *      Stop the bridge interface.
1886  */
1887 static void
1888 bridge_stop(struct ifnet *ifp)
1889 {
1890         bridge_control(ifp->if_softc, SIOCSIFFLAGS, bridge_ioctl_stop, NULL);
1891 }
1892
1893 /*
1894  * Returns TRUE if the packet is being sent 'from us'... from our bridge
1895  * interface or from any member of our bridge interface.  This is used
1896  * later on to force the MAC to be the MAC of our bridge interface.
1897  */
1898 static int
1899 bridge_from_us(struct bridge_softc *sc, struct ether_header *eh)
1900 {
1901         struct bridge_iflist *bif;
1902
1903         if (memcmp(eh->ether_shost, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN) == 0)
1904                 return (1);
1905
1906         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
1907                 if (memcmp(eh->ether_shost, IF_LLADDR(bif->bif_ifp),
1908                            ETHER_ADDR_LEN) == 0) {
1909                         return (1);
1910                 }
1911         }
1912         return (0);
1913 }
1914
1915 /*
1916  * bridge_enqueue:
1917  *
1918  *      Enqueue a packet on a bridge member interface.
1919  *
1920  */
1921 void
1922 bridge_enqueue(struct ifnet *dst_ifp, struct mbuf *m)
1923 {
1924         struct netmsg_packet *nmp;
1925
1926         mbuftrackid(m, 64);
1927
1928         nmp = &m->m_hdr.mh_netmsg;
1929         netmsg_init(&nmp->base, NULL, &netisr_apanic_rport,
1930                     0, bridge_enqueue_handler);
1931         nmp->nm_packet = m;
1932         nmp->base.lmsg.u.ms_resultp = dst_ifp;
1933
1934         lwkt_sendmsg(netisr_cpuport(mycpuid), &nmp->base.lmsg);
1935 }
1936
1937 /*
1938  * After looking up dst_if in our forwarding table we still have to
1939  * deal with channel bonding.  Find the best interface in the bonding set.
1940  */
1941 static struct ifnet *
1942 bridge_select_unicast(struct bridge_softc *sc, struct ifnet *dst_if,
1943                       int from_blocking, struct mbuf *m)
1944 {
1945         struct bridge_iflist *bif, *nbif;
1946         struct ifnet *alt_if;
1947         int alt_priority;
1948         int priority;
1949
1950         /*
1951          * Unicast, kinda replicates the output side of bridge_output().
1952          *
1953          * Even though this is a uni-cast packet we may have to select
1954          * an interface from a bonding set.
1955          */
1956         bif = bridge_lookup_member_if(sc, dst_if);
1957         if (bif == NULL) {
1958                 /* Not a member of the bridge (anymore?) */
1959                 return NULL;
1960         }
1961
1962         /*
1963          * If STP is enabled on the target we are an equal opportunity
1964          * employer and do not necessarily output to dst_if.  Instead
1965          * scan available links with the same MAC as the current dst_if
1966          * and choose the best one.
1967          *
1968          * We also need to do this because arp entries tag onto a particular
1969          * interface and if it happens to be dead then the packets will
1970          * go into a bit bucket.
1971          *
1972          * If LINK2 is set the matching links are bonded and we-round robin.
1973          * (the MAC address must be the same for the participating links).
1974          * In this case links in a STP FORWARDING or BONDED state are
1975          * allowed for unicast packets.
1976          */
1977         if (bif->bif_flags & IFBIF_STP) {
1978                 alt_if = NULL;
1979                 alt_priority = 0;
1980                 priority = 0;
1981
1982                 TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid],
1983                                      bif_next, nbif) {
1984                         /*
1985                          * dst_if may imply a bonding set so we must compare
1986                          * MAC addresses.
1987                          */
1988                         if (memcmp(IF_LLADDR(bif->bif_ifp),
1989                                    IF_LLADDR(dst_if),
1990                                    ETHER_ADDR_LEN) != 0) {
1991                                 continue;
1992                         }
1993
1994                         if ((bif->bif_ifp->if_flags & IFF_RUNNING) == 0)
1995                                 continue;
1996
1997                         /*
1998                          * NOTE: We allow tranmissions through a BLOCKING
1999                          *       or LEARNING interface only as a last resort.
2000                          *       We DISALLOW both cases if the receiving
2001                          *
2002                          * NOTE: If we send a packet through a learning
2003                          *       interface the receiving end (if also in
2004                          *       LEARNING) will throw it away, so this is
2005                          *       the ultimate last resort.
2006                          */
2007                         switch(bif->bif_state) {
2008                         case BSTP_IFSTATE_BLOCKING:
2009                                 if (from_blocking == 0 &&
2010                                     bif->bif_priority + 256 > alt_priority) {
2011                                         alt_priority = bif->bif_priority + 256;
2012                                         alt_if = bif->bif_ifp;
2013                                 }
2014                                 continue;
2015                         case BSTP_IFSTATE_LEARNING:
2016                                 if (from_blocking == 0 &&
2017                                     bif->bif_priority > alt_priority) {
2018                                         alt_priority = bif->bif_priority;
2019                                         alt_if = bif->bif_ifp;
2020                                 }
2021                                 continue;
2022                         case BSTP_IFSTATE_L1BLOCKING:
2023                         case BSTP_IFSTATE_LISTENING:
2024                         case BSTP_IFSTATE_DISABLED:
2025                                 continue;
2026                         default:
2027                                 /* FORWARDING, BONDED */
2028                                 break;
2029                         }
2030
2031                         /*
2032                          * XXX we need to use the toepliz hash or
2033                          *     something like that instead of
2034                          *     round-robining.
2035                          */
2036                         if (sc->sc_ifp->if_flags & IFF_LINK2) {
2037                                 dst_if = bif->bif_ifp;
2038                                 if (++bif->bif_bond_count >=
2039                                     bif->bif_bond_weight) {
2040                                         bif->bif_bond_count = 0;
2041                                         TAILQ_REMOVE(&sc->sc_iflists[mycpuid],
2042                                                      bif, bif_next);
2043                                         TAILQ_INSERT_TAIL(
2044                                                      &sc->sc_iflists[mycpuid],
2045                                                      bif, bif_next);
2046                                 }
2047                                 priority = 1;
2048                                 break;
2049                         }
2050
2051                         /*
2052                          * Select best interface in the FORWARDING or
2053                          * BONDED set.  Well, there shouldn't be any
2054                          * in a BONDED state if LINK2 is not set (they
2055                          * will all be in a BLOCKING) state, but there
2056                          * could be a transitory condition here.
2057                          */
2058                         if (bif->bif_priority > priority) {
2059                                 priority = bif->bif_priority;
2060                                 dst_if = bif->bif_ifp;
2061                         }
2062                 }
2063
2064                 /*
2065                  * If no suitable interfaces were found but a suitable
2066                  * alternative interface was found, use the alternative
2067                  * interface.
2068                  */
2069                 if (priority == 0 && alt_if)
2070                         dst_if = alt_if;
2071         }
2072
2073         /*
2074          * At this point, we're dealing with a unicast frame
2075          * going to a different interface.
2076          */
2077         if ((dst_if->if_flags & IFF_RUNNING) == 0)
2078                 dst_if = NULL;
2079         return (dst_if);
2080 }
2081
2082
2083 /*
2084  * bridge_output:
2085  *
2086  *      Send output from a bridge member interface.  This
2087  *      performs the bridging function for locally originated
2088  *      packets.
2089  *
2090  *      The mbuf has the Ethernet header already attached.  We must
2091  *      enqueue or free the mbuf before returning.
2092  */
2093 static int
2094 bridge_output(struct ifnet *ifp, struct mbuf *m)
2095 {
2096         struct bridge_softc *sc = ifp->if_bridge;
2097         struct bridge_iflist *bif, *nbif;
2098         struct ether_header *eh;
2099         struct ifnet *dst_if, *alt_if, *bifp;
2100         int from_us;
2101         int alt_priority;
2102
2103         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
2104         mbuftrackid(m, 65);
2105
2106         /*
2107          * Make sure that we are still a member of a bridge interface.
2108          */
2109         if (sc == NULL) {
2110                 m_freem(m);
2111                 return (0);
2112         }
2113         bifp = sc->sc_ifp;
2114
2115         /*
2116          * Acquire header
2117          */
2118         if (m->m_len < ETHER_HDR_LEN) {
2119                 m = m_pullup(m, ETHER_HDR_LEN);
2120                 if (m == NULL) {
2121                         IFNET_STAT_INC(bifp, oerrors, 1);
2122                         return (0);
2123                 }
2124         }
2125         eh = mtod(m, struct ether_header *);
2126         from_us = bridge_from_us(sc, eh);
2127
2128         /*
2129          * If bridge is down, but the original output interface is up,
2130          * go ahead and send out that interface.  Otherwise, the packet
2131          * is dropped below.
2132          */
2133         if ((bifp->if_flags & IFF_RUNNING) == 0) {
2134                 dst_if = ifp;
2135                 goto sendunicast;
2136         }
2137
2138         /*
2139          * If the packet is a multicast, or we don't know a better way to
2140          * get there, send to all interfaces.
2141          */
2142         if (ETHER_IS_MULTICAST(eh->ether_dhost))
2143                 dst_if = NULL;
2144         else
2145                 dst_if = bridge_rtlookup(sc, eh->ether_dhost);
2146
2147         if (dst_if == NULL) {
2148                 struct mbuf *mc;
2149                 int used = 0;
2150                 int found = 0;
2151
2152                 if (sc->sc_span)
2153                         bridge_span(sc, m);
2154
2155                 alt_if = NULL;
2156                 alt_priority = 0;
2157                 TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid],
2158                                      bif_next, nbif) {
2159                         dst_if = bif->bif_ifp;
2160
2161                         if ((dst_if->if_flags & IFF_RUNNING) == 0)
2162                                 continue;
2163
2164                         /*
2165                          * If this is not the original output interface,
2166                          * and the interface is participating in spanning
2167                          * tree, make sure the port is in a state that
2168                          * allows forwarding.
2169                          *
2170                          * We keep track of a possible backup IF if we are
2171                          * unable to find any interfaces to forward through.
2172                          *
2173                          * NOTE: Currently round-robining is not implemented
2174                          *       across bonded interface groups (needs an
2175                          *       algorithm to track each group somehow).
2176                          *
2177                          *       Similarly we track only one alternative
2178                          *       interface if no suitable interfaces are
2179                          *       found.
2180                          */
2181                         if (dst_if != ifp &&
2182                             (bif->bif_flags & IFBIF_STP) != 0) {
2183                                 switch (bif->bif_state) {
2184                                 case BSTP_IFSTATE_BONDED:
2185                                         if (bif->bif_priority + 512 >
2186                                             alt_priority) {
2187                                                 alt_priority =
2188                                                     bif->bif_priority + 512;
2189                                                 alt_if = bif->bif_ifp;
2190                                         }
2191                                         continue;
2192                                 case BSTP_IFSTATE_BLOCKING:
2193                                         if (bif->bif_priority + 256 >
2194                                             alt_priority) {
2195                                                 alt_priority =
2196                                                     bif->bif_priority + 256;
2197                                                 alt_if = bif->bif_ifp;
2198                                         }
2199                                         continue;
2200                                 case BSTP_IFSTATE_LEARNING:
2201                                         if (bif->bif_priority > alt_priority) {
2202                                                 alt_priority =
2203                                                     bif->bif_priority;
2204                                                 alt_if = bif->bif_ifp;
2205                                         }
2206                                         continue;
2207                                 case BSTP_IFSTATE_L1BLOCKING:
2208                                 case BSTP_IFSTATE_LISTENING:
2209                                 case BSTP_IFSTATE_DISABLED:
2210                                         continue;
2211                                 default:
2212                                         /* FORWARDING */
2213                                         break;
2214                                 }
2215                         }
2216
2217                         KKASSERT(used == 0);
2218                         if (TAILQ_NEXT(bif, bif_next) == NULL) {
2219                                 used = 1;
2220                                 mc = m;
2221                         } else {
2222                                 mc = m_copypacket(m, MB_DONTWAIT);
2223                                 if (mc == NULL) {
2224                                         IFNET_STAT_INC(bifp, oerrors, 1);
2225                                         continue;
2226                                 }
2227                         }
2228
2229                         /*
2230                          * If the packet is 'from' us override ether_shost.
2231                          */
2232                         bridge_handoff(sc, dst_if, mc, from_us);
2233                         found = 1;
2234
2235                         if (nbif != NULL && !nbif->bif_onlist) {
2236                                 KKASSERT(bif->bif_onlist);
2237                                 nbif = TAILQ_NEXT(bif, bif_next);
2238                         }
2239                 }
2240
2241                 /*
2242                  * If we couldn't find anything use the backup interface
2243                  * if we have one.
2244                  */
2245                 if (found == 0 && alt_if) {
2246                         KKASSERT(used == 0);
2247                         mc = m;
2248                         used = 1;
2249                         bridge_handoff(sc, alt_if, mc, from_us);
2250                 }
2251
2252                 if (used == 0)
2253                         m_freem(m);
2254                 return (0);
2255         }
2256
2257         /*
2258          * Unicast
2259          */
2260 sendunicast:
2261         dst_if = bridge_select_unicast(sc, dst_if, 0, m);
2262
2263         if (sc->sc_span)
2264                 bridge_span(sc, m);
2265         if (dst_if == NULL)
2266                 m_freem(m);
2267         else
2268                 bridge_handoff(sc, dst_if, m, from_us);
2269         return (0);
2270 }
2271
2272 /*
2273  * Returns the bridge interface associated with an ifc.
2274  * Pass ifp->if_bridge (must not be NULL).  Used by the ARP
2275  * code to supply the bridge for the is-at info, making
2276  * the bridge responsible for matching local addresses.
2277  *
2278  * Without this the ARP code will supply bridge member interfaces
2279  * for the is-at which makes it difficult the bridge to fail-over
2280  * interfaces (amoung other things).
2281  */
2282 static struct ifnet *
2283 bridge_interface(void *if_bridge)
2284 {
2285         struct bridge_softc *sc = if_bridge;
2286         return (sc->sc_ifp);
2287 }
2288
2289 /*
2290  * bridge_start:
2291  *
2292  *      Start output on a bridge.
2293  */
2294 static void
2295 bridge_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
2296 {
2297         struct bridge_softc *sc = ifp->if_softc;
2298
2299         ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq);
2300         ASSERT_ALTQ_SQ_SERIALIZED_HW(ifsq);
2301
2302         ifsq_set_oactive(ifsq);
2303         for (;;) {
2304                 struct ifnet *dst_if = NULL;
2305                 struct ether_header *eh;
2306                 struct mbuf *m;
2307
2308                 m = ifsq_dequeue(ifsq, NULL);
2309                 if (m == NULL)
2310                         break;
2311                 mbuftrackid(m, 75);
2312
2313                 if (m->m_len < sizeof(*eh)) {
2314                         m = m_pullup(m, sizeof(*eh));
2315                         if (m == NULL) {
2316                                 IFNET_STAT_INC(ifp, oerrors, 1);
2317                                 continue;
2318                         }
2319                 }
2320                 eh = mtod(m, struct ether_header *);
2321
2322                 BPF_MTAP(ifp, m);
2323                 IFNET_STAT_INC(ifp, opackets, 1);
2324
2325                 if ((m->m_flags & (M_BCAST|M_MCAST)) == 0)
2326                         dst_if = bridge_rtlookup(sc, eh->ether_dhost);
2327
2328                 /*
2329                  * Multicast or broadcast
2330                  */
2331                 if (dst_if == NULL) {
2332                         bridge_start_bcast(sc, m);
2333                         continue;
2334                 }
2335
2336                 /*
2337                  * Unicast
2338                  */
2339                 dst_if = bridge_select_unicast(sc, dst_if, 0, m);
2340
2341                 if (dst_if == NULL)
2342                         m_freem(m);
2343                 else
2344                         bridge_enqueue(dst_if, m);
2345         }
2346         ifsq_clr_oactive(ifsq);
2347 }
2348
2349 /*
2350  * bridge_forward:
2351  *
2352  *      Forward packets received on a bridge interface via the input
2353  *      path.
2354  *
2355  *      This implements the forwarding function of the bridge.
2356  */
2357 static void
2358 bridge_forward(struct bridge_softc *sc, struct mbuf *m)
2359 {
2360         struct bridge_iflist *bif;
2361         struct ifnet *src_if, *dst_if, *ifp;
2362         struct ether_header *eh;
2363         int from_blocking;
2364
2365         mbuftrackid(m, 66);
2366         src_if = m->m_pkthdr.rcvif;
2367         ifp = sc->sc_ifp;
2368
2369         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
2370
2371         IFNET_STAT_INC(ifp, ipackets, 1);
2372         IFNET_STAT_INC(ifp, ibytes, m->m_pkthdr.len);
2373
2374         /*
2375          * Look up the bridge_iflist.
2376          */
2377         bif = bridge_lookup_member_if(sc, src_if);
2378         if (bif == NULL) {
2379                 /* Interface is not a bridge member (anymore?) */
2380                 m_freem(m);
2381                 return;
2382         }
2383
2384         /*
2385          * In spanning tree mode receiving a packet from an interface
2386          * in a BLOCKING state is allowed, it could be a member of last
2387          * resort from the sender's point of view, but forwarding it is
2388          * not allowed.
2389          *
2390          * The sender's spanning tree will eventually sync up and the
2391          * sender will go into a BLOCKING state too (but this still may be
2392          * an interface of last resort during state changes).
2393          */
2394         if (bif->bif_flags & IFBIF_STP) {
2395                 switch (bif->bif_state) {
2396                 case BSTP_IFSTATE_L1BLOCKING:
2397                 case BSTP_IFSTATE_LISTENING:
2398                 case BSTP_IFSTATE_DISABLED:
2399                         m_freem(m);
2400                         return;
2401                 default:
2402                         /* learning, blocking, bonded, forwarding */
2403                         break;
2404                 }
2405                 from_blocking = (bif->bif_state == BSTP_IFSTATE_BLOCKING);
2406         } else {
2407                 from_blocking = 0;
2408         }
2409
2410         eh = mtod(m, struct ether_header *);
2411
2412         /*
2413          * If the interface is learning, and the source
2414          * address is valid and not multicast, record
2415          * the address.
2416          */
2417         if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
2418             from_blocking == 0 &&
2419             ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
2420             (eh->ether_shost[0] == 0 &&
2421              eh->ether_shost[1] == 0 &&
2422              eh->ether_shost[2] == 0 &&
2423              eh->ether_shost[3] == 0 &&
2424              eh->ether_shost[4] == 0 &&
2425              eh->ether_shost[5] == 0) == 0) {
2426                 bridge_rtupdate(sc, eh->ether_shost, src_if, IFBAF_DYNAMIC);
2427         }
2428
2429         /*
2430          * Don't forward from an interface in the listening or learning
2431          * state.  That is, in the learning state we learn information
2432          * but we throw away the packets.
2433          *
2434          * We let through packets on interfaces in the blocking state.
2435          * The blocking state is applicable to the send side, not the
2436          * receive side.
2437          */
2438         if ((bif->bif_flags & IFBIF_STP) != 0 &&
2439             (bif->bif_state == BSTP_IFSTATE_LISTENING ||
2440              bif->bif_state == BSTP_IFSTATE_LEARNING)) {
2441                 m_freem(m);
2442                 return;
2443         }
2444
2445         /*
2446          * At this point, the port either doesn't participate
2447          * in spanning tree or it is in the forwarding state.
2448          */
2449
2450         /*
2451          * If the packet is unicast, destined for someone on
2452          * "this" side of the bridge, drop it.
2453          *
2454          * src_if implies the entire bonding set so we have to compare MAC
2455          * addresses and not just if pointers.
2456          */
2457         if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
2458                 dst_if = bridge_rtlookup(sc, eh->ether_dhost);
2459                 if (dst_if && memcmp(IF_LLADDR(src_if), IF_LLADDR(dst_if),
2460                                      ETHER_ADDR_LEN) == 0) {
2461                         m_freem(m);
2462                         return;
2463                 }
2464         } else {
2465                 /* ...forward it to all interfaces. */
2466                 IFNET_STAT_INC(ifp, imcasts, 1);
2467                 dst_if = NULL;
2468         }
2469
2470         /*
2471          * Brodcast if we do not have forwarding information.  However, if
2472          * we received the packet on a blocking interface we do not do this
2473          * (unless you really want to blow up your network).
2474          */
2475         if (dst_if == NULL) {
2476                 if (from_blocking)
2477                         m_freem(m);
2478                 else
2479                         bridge_broadcast(sc, src_if, m);
2480                 return;
2481         }
2482
2483         dst_if = bridge_select_unicast(sc, dst_if, from_blocking, m);
2484
2485         if (dst_if == NULL) {
2486                 m_freem(m);
2487                 return;
2488         }
2489
2490         if (inet_pfil_hook.ph_hashooks > 0
2491 #ifdef INET6
2492             || inet6_pfil_hook.ph_hashooks > 0
2493 #endif
2494             ) {
2495                 if (bridge_pfil(&m, ifp, src_if, PFIL_IN) != 0)
2496                         return;
2497                 if (m == NULL)
2498                         return;
2499
2500                 if (bridge_pfil(&m, ifp, dst_if, PFIL_OUT) != 0)
2501                         return;
2502                 if (m == NULL)
2503                         return;
2504         }
2505         bridge_handoff(sc, dst_if, m, 0);
2506 }
2507
2508 /*
2509  * bridge_input:
2510  *
2511  *      Receive input from a member interface.  Queue the packet for
2512  *      bridging if it is not for us.
2513  */
2514 static struct mbuf *
2515 bridge_input(struct ifnet *ifp, struct mbuf *m)
2516 {
2517         struct bridge_softc *sc = ifp->if_bridge;
2518         struct bridge_iflist *bif;
2519         struct ifnet *bifp, *new_ifp;
2520         struct ether_header *eh;
2521         struct mbuf *mc, *mc2;
2522
2523         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
2524         mbuftrackid(m, 67);
2525
2526         /*
2527          * Make sure that we are still a member of a bridge interface.
2528          */
2529         if (sc == NULL)
2530                 return m;
2531
2532         new_ifp = NULL;
2533         bifp = sc->sc_ifp;
2534
2535         if ((bifp->if_flags & IFF_RUNNING) == 0)
2536                 goto out;
2537
2538         /*
2539          * Implement support for bridge monitoring.  If this flag has been
2540          * set on this interface, discard the packet once we push it through
2541          * the bpf(4) machinery, but before we do, increment various counters
2542          * associated with this bridge.
2543          */
2544         if (bifp->if_flags & IFF_MONITOR) {
2545                 /* Change input interface to this bridge */
2546                 m->m_pkthdr.rcvif = bifp;
2547
2548                 BPF_MTAP(bifp, m);
2549
2550                 /* Update bridge's ifnet statistics */
2551                 IFNET_STAT_INC(bifp, ipackets, 1);
2552                 IFNET_STAT_INC(bifp, ibytes, m->m_pkthdr.len);
2553                 if (m->m_flags & (M_MCAST | M_BCAST))
2554                         IFNET_STAT_INC(bifp, imcasts, 1);
2555
2556                 m_freem(m);
2557                 m = NULL;
2558                 goto out;
2559         }
2560
2561         /*
2562          * Handle the ether_header
2563          *
2564          * In all cases if the packet is destined for us via our MAC
2565          * we must clear BRIDGE_MBUF_TAGGED to ensure that we don't
2566          * repeat the source MAC out the same interface.
2567          *
2568          * This first test against our bridge MAC is the fast-path.
2569          *
2570          * NOTE!  The bridge interface can serve as an endpoint for
2571          *        communication but normally there are no IPs associated
2572          *        with it so you cannot route through it.  Instead what
2573          *        you do is point your default route *THROUGH* the bridge
2574          *        to the actual default router for one of the bridged spaces.
2575          *
2576          *        Another possibility is to put all your IP specifications
2577          *        on the bridge instead of on the individual interfaces.  If
2578          *        you do this it should be possible to use the bridge as an
2579          *        end point and route (rather than switch) through it using
2580          *        the default route or ipfw forwarding rules.
2581          */
2582
2583         /*
2584          * Acquire header
2585          */
2586         if (m->m_len < ETHER_HDR_LEN) {
2587                 m = m_pullup(m, ETHER_HDR_LEN);
2588                 if (m == NULL)
2589                         goto out;
2590         }
2591         eh = mtod(m, struct ether_header *);
2592         m->m_pkthdr.fw_flags |= BRIDGE_MBUF_TAGGED;
2593         bcopy(eh, &m->m_pkthdr.br.ether, sizeof(*eh));
2594
2595         if ((bridge_debug & 1) &&
2596             (ntohs(eh->ether_type) == ETHERTYPE_ARP ||
2597             ntohs(eh->ether_type) == ETHERTYPE_REVARP)) {
2598                 kprintf("%02x:%02x:%02x:%02x:%02x:%02x "
2599                         "%02x:%02x:%02x:%02x:%02x:%02x type %04x "
2600                         "lla %02x:%02x:%02x:%02x:%02x:%02x\n",
2601                         eh->ether_dhost[0],
2602                         eh->ether_dhost[1],
2603                         eh->ether_dhost[2],
2604                         eh->ether_dhost[3],
2605                         eh->ether_dhost[4],
2606                         eh->ether_dhost[5],
2607                         eh->ether_shost[0],
2608                         eh->ether_shost[1],
2609                         eh->ether_shost[2],
2610                         eh->ether_shost[3],
2611                         eh->ether_shost[4],
2612                         eh->ether_shost[5],
2613                         eh->ether_type,
2614                         ((u_char *)IF_LLADDR(bifp))[0],
2615                         ((u_char *)IF_LLADDR(bifp))[1],
2616                         ((u_char *)IF_LLADDR(bifp))[2],
2617                         ((u_char *)IF_LLADDR(bifp))[3],
2618                         ((u_char *)IF_LLADDR(bifp))[4],
2619                         ((u_char *)IF_LLADDR(bifp))[5]
2620                 );
2621         }
2622
2623         if (memcmp(eh->ether_dhost, IF_LLADDR(bifp), ETHER_ADDR_LEN) == 0) {
2624                 /*
2625                  * If the packet is for us, set the packets source as the
2626                  * bridge, and return the packet back to ifnet.if_input for
2627                  * local processing.
2628                  */
2629                 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
2630                 KASSERT(bifp->if_bridge == NULL,
2631                         ("loop created in bridge_input"));
2632                 if (pfil_member != 0) {
2633                         if (inet_pfil_hook.ph_hashooks > 0
2634 #ifdef INET6
2635                             || inet6_pfil_hook.ph_hashooks > 0
2636 #endif
2637                         ) {
2638                                 if (bridge_pfil(&m, NULL, ifp, PFIL_IN) != 0)
2639                                         goto out;
2640                                 if (m == NULL)
2641                                         goto out;
2642                         }
2643                 }
2644                 new_ifp = bifp;
2645                 goto out;
2646         }
2647
2648         /*
2649          * Tap all packets arriving on the bridge, no matter if
2650          * they are local destinations or not.  In is in.
2651          */
2652         BPF_MTAP(bifp, m);
2653
2654         bif = bridge_lookup_member_if(sc, ifp);
2655         if (bif == NULL)
2656                 goto out;
2657
2658         if (sc->sc_span)
2659                 bridge_span(sc, m);
2660
2661         if (m->m_flags & (M_BCAST | M_MCAST)) {
2662                 /*
2663                  * Tap off 802.1D packets; they do not get forwarded.
2664                  */
2665                 if (memcmp(eh->ether_dhost, bstp_etheraddr,
2666                             ETHER_ADDR_LEN) == 0) {
2667                         ifnet_serialize_all(bifp);
2668                         bstp_input(sc, bif, m);
2669                         ifnet_deserialize_all(bifp);
2670
2671                         /* m is freed by bstp_input */
2672                         m = NULL;
2673                         goto out;
2674                 }
2675
2676                 /*
2677                  * Other than 802.11d packets, ignore packets if the
2678                  * interface is not in a good state.
2679                  *
2680                  * NOTE: Broadcast/mcast packets received on a blocking or
2681                  *       learning interface are allowed for local processing.
2682                  *
2683                  *       The sending side of a blocked port will stop
2684                  *       transmitting when a better alternative is found.
2685                  *       However, later on we will disallow the forwarding
2686                  *       of bcast/mcsat packets over a blocking interface.
2687                  */
2688                 if (bif->bif_flags & IFBIF_STP) {
2689                         switch (bif->bif_state) {
2690                         case BSTP_IFSTATE_L1BLOCKING:
2691                         case BSTP_IFSTATE_LISTENING:
2692                         case BSTP_IFSTATE_DISABLED:
2693                                 goto out;
2694                         default:
2695                                 /* blocking, learning, bonded, forwarding */
2696                                 break;
2697                         }
2698                 }
2699
2700                 /*
2701                  * Make a deep copy of the packet and enqueue the copy
2702                  * for bridge processing; return the original packet for
2703                  * local processing.
2704                  */
2705                 mc = m_dup(m, MB_DONTWAIT);
2706                 if (mc == NULL)
2707                         goto out;
2708
2709                 /*
2710                  * It's just too dangerous to allow bcast/mcast over a
2711                  * blocked interface, eventually the network will sort
2712                  * itself out and a better path will be found.
2713                  */
2714                 if ((bif->bif_flags & IFBIF_STP) == 0 ||
2715                     bif->bif_state != BSTP_IFSTATE_BLOCKING) {
2716                         bridge_forward(sc, mc);
2717                 }
2718
2719                 /*
2720                  * Reinject the mbuf as arriving on the bridge so we have a
2721                  * chance at claiming multicast packets. We can not loop back
2722                  * here from ether_input as a bridge is never a member of a
2723                  * bridge.
2724                  */
2725                 KASSERT(bifp->if_bridge == NULL,
2726                         ("loop created in bridge_input"));
2727                 mc2 = m_dup(m, MB_DONTWAIT);
2728 #ifdef notyet
2729                 if (mc2 != NULL) {
2730                         /* Keep the layer3 header aligned */
2731                         int i = min(mc2->m_pkthdr.len, max_protohdr);
2732                         mc2 = m_copyup(mc2, i, ETHER_ALIGN);
2733                 }
2734 #endif
2735                 if (mc2 != NULL) {
2736                         /*
2737                          * Don't tap to bpf(4) again; we have already done
2738                          * the tapping.
2739                          *
2740                          * Leave m_pkthdr.rcvif alone, so ARP replies are
2741                          * processed as coming in on the correct interface.
2742                          *
2743                          * Clear the bridge flag for local processing in
2744                          * case the packet gets routed.
2745                          */
2746                         mc2->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
2747                         ether_reinput_oncpu(bifp, mc2, 0);
2748                 }
2749
2750                 /* Return the original packet for local processing. */
2751                 goto out;
2752         }
2753
2754         /*
2755          * Input of a unicast packet.  We have to allow unicast packets
2756          * input from links in the BLOCKING state as this might be an
2757          * interface of last resort.
2758          *
2759          * NOTE: We explicitly ignore normal packets received on a link
2760          *       in the BLOCKING state.  The point of being in that state
2761          *       is to avoid getting duplicate packets.
2762          *
2763          *       HOWEVER, if LINK2 is set the normal spanning tree code
2764          *       will mark an interface BLOCKING to avoid multi-cast/broadcast
2765          *       loops.  Unicast packets CAN still loop if we allow the
2766          *       case (hence we only do it in LINK2), but it isn't quite as
2767          *       bad as a broadcast packet looping.
2768          */
2769         if (bif->bif_flags & IFBIF_STP) {
2770                 switch (bif->bif_state) {
2771                 case BSTP_IFSTATE_L1BLOCKING:
2772                 case BSTP_IFSTATE_LISTENING:
2773                 case BSTP_IFSTATE_DISABLED:
2774                         goto out;
2775                 default:
2776                         /* blocking, bonded, forwarding, learning */
2777                         break;
2778                 }
2779         }
2780
2781         /*
2782          * Unicast.  Make sure it's not for us.
2783          *
2784          * This loop is MPSAFE; the only blocking operation (bridge_rtupdate)
2785          * is followed by breaking out of the loop.
2786          */
2787         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
2788                 if (bif->bif_ifp->if_type != IFT_ETHER)
2789                         continue;
2790
2791                 /*
2792                  * It is destined for an interface linked to the bridge.
2793                  * We want the bridge itself to take care of link level
2794                  * forwarding to member interfaces so reinput on the bridge.
2795                  * i.e. if you ping an IP on a target interface associated
2796                  * with the bridge, the arp is-at response should indicate
2797                  * the bridge MAC.
2798                  *
2799                  * Only update our addr list when learning if the port
2800                  * is not in a blocking state.  If it is we still allow
2801                  * the packet but we do not try to learn from it.
2802                  */
2803                 if (memcmp(IF_LLADDR(bif->bif_ifp), eh->ether_dhost,
2804                            ETHER_ADDR_LEN) == 0) {
2805                         if (bif->bif_ifp != ifp) {
2806                                 /* XXX loop prevention */
2807                                 m->m_flags |= M_ETHER_BRIDGED;
2808                         }
2809                         if ((bif->bif_flags & IFBIF_LEARNING) &&
2810                             ((bif->bif_flags & IFBIF_STP) == 0 ||
2811                              bif->bif_state != BSTP_IFSTATE_BLOCKING)) {
2812                                 bridge_rtupdate(sc, eh->ether_shost,
2813                                                 ifp, IFBAF_DYNAMIC);
2814                         }
2815                         new_ifp = bifp; /* not bif->bif_ifp */
2816                         m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
2817                         goto out;
2818                 }
2819
2820                 /*
2821                  * Ignore received packets that were sent by us.
2822                  */
2823                 if (memcmp(IF_LLADDR(bif->bif_ifp), eh->ether_shost,
2824                            ETHER_ADDR_LEN) == 0) {
2825                         m_freem(m);
2826                         m = NULL;
2827                         goto out;
2828                 }
2829         }
2830
2831         /*
2832          * It isn't for us.
2833          *
2834          * Perform the bridge forwarding function, but disallow bridging
2835          * to interfaces in the blocking state if the packet came in on
2836          * an interface in the blocking state.
2837          */
2838         bridge_forward(sc, m);
2839         m = NULL;
2840
2841         /*
2842          * ether_reinput_oncpu() will reprocess rcvif as
2843          * coming from new_ifp (since we do not specify
2844          * REINPUT_KEEPRCVIF).
2845          */
2846 out:
2847         if (new_ifp != NULL) {
2848                 /*
2849                  * Clear the bridge flag for local processing in
2850                  * case the packet gets routed.
2851                  */
2852                 ether_reinput_oncpu(new_ifp, m, REINPUT_RUNBPF);
2853                 m = NULL;
2854         }
2855         return (m);
2856 }
2857
2858 /*
2859  * bridge_start_bcast:
2860  *
2861  *      Broadcast the packet sent from bridge to all member
2862  *      interfaces.
2863  *      This is a simplified version of bridge_broadcast(), however,
2864  *      this function expects caller to hold bridge's serializer.
2865  */
2866 static void
2867 bridge_start_bcast(struct bridge_softc *sc, struct mbuf *m)
2868 {
2869         struct bridge_iflist *bif;
2870         struct mbuf *mc;
2871         struct ifnet *dst_if, *alt_if, *bifp;
2872         int used = 0;
2873         int found = 0;
2874         int alt_priority;
2875
2876         mbuftrackid(m, 68);
2877         bifp = sc->sc_ifp;
2878         ASSERT_IFNET_SERIALIZED_ALL(bifp);
2879
2880         /*
2881          * Following loop is MPSAFE; nothing is blocking
2882          * in the loop body.
2883          *
2884          * NOTE: We transmit through an member in the BLOCKING state only
2885          *       as a last resort.
2886          */
2887         alt_if = NULL;
2888         alt_priority = 0;
2889
2890         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
2891                 dst_if = bif->bif_ifp;
2892
2893                 if (bif->bif_flags & IFBIF_STP) {
2894                         switch (bif->bif_state) {
2895                         case BSTP_IFSTATE_BLOCKING:
2896                                 if (bif->bif_priority > alt_priority) {
2897                                         alt_priority = bif->bif_priority;
2898                                         alt_if = bif->bif_ifp;
2899                                 }
2900                                 /* fall through */
2901                         case BSTP_IFSTATE_L1BLOCKING:
2902                         case BSTP_IFSTATE_DISABLED:
2903                                 continue;
2904                         default:
2905                                 /* listening, learning, bonded, forwarding */
2906                                 break;
2907                         }
2908                 }
2909
2910                 if ((bif->bif_flags & IFBIF_DISCOVER) == 0 &&
2911                     (m->m_flags & (M_BCAST|M_MCAST)) == 0)
2912                         continue;
2913
2914                 if ((dst_if->if_flags & IFF_RUNNING) == 0)
2915                         continue;
2916
2917                 if (TAILQ_NEXT(bif, bif_next) == NULL) {
2918                         mc = m;
2919                         used = 1;
2920                 } else {
2921                         mc = m_copypacket(m, MB_DONTWAIT);
2922                         if (mc == NULL) {
2923                                 IFNET_STAT_INC(bifp, oerrors, 1);
2924                                 continue;
2925                         }
2926                 }
2927                 found = 1;
2928                 bridge_enqueue(dst_if, mc);
2929         }
2930
2931         if (found == 0 && alt_if) {
2932                 KKASSERT(used == 0);
2933                 mc = m;
2934                 used = 1;
2935                 bridge_enqueue(alt_if, mc);
2936         }
2937
2938         if (used == 0)
2939                 m_freem(m);
2940 }
2941
2942 /*
2943  * bridge_broadcast:
2944  *
2945  *      Send a frame to all interfaces that are members of
2946  *      the bridge, except for the one on which the packet
2947  *      arrived.
2948  */
2949 static void
2950 bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
2951                  struct mbuf *m)
2952 {
2953         struct bridge_iflist *bif, *nbif;
2954         struct ether_header *eh;
2955         struct mbuf *mc;
2956         struct ifnet *dst_if, *alt_if, *bifp;
2957         int used;
2958         int found;
2959         int alt_priority;
2960         int from_us;
2961
2962         mbuftrackid(m, 69);
2963         bifp = sc->sc_ifp;
2964         ASSERT_IFNET_NOT_SERIALIZED_ALL(bifp);
2965
2966         eh = mtod(m, struct ether_header *);
2967         from_us = bridge_from_us(sc, eh);
2968
2969         if (inet_pfil_hook.ph_hashooks > 0
2970 #ifdef INET6
2971             || inet6_pfil_hook.ph_hashooks > 0
2972 #endif
2973             ) {
2974                 if (bridge_pfil(&m, bifp, src_if, PFIL_IN) != 0)
2975                         return;
2976                 if (m == NULL)
2977                         return;
2978
2979                 /* Filter on the bridge interface before broadcasting */
2980                 if (bridge_pfil(&m, bifp, NULL, PFIL_OUT) != 0)
2981                         return;
2982                 if (m == NULL)
2983                         return;
2984         }
2985
2986         alt_if = NULL;
2987         alt_priority = 0;
2988         found = 0;
2989         used = 0;
2990
2991         TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid], bif_next, nbif) {
2992                 dst_if = bif->bif_ifp;
2993
2994                 if ((dst_if->if_flags & IFF_RUNNING) == 0)
2995                         continue;
2996
2997                 /*
2998                  * Don't bounce the packet out the same interface it came
2999                  * in on.  We have to test MAC addresses because a packet
3000                  * can come in a bonded interface and we don't want it to
3001                  * be echod out the forwarding interface for the same bonding
3002                  * set.
3003                  */
3004                 if (src_if && memcmp(IF_LLADDR(src_if), IF_LLADDR(dst_if),
3005                                      ETHER_ADDR_LEN) == 0) {
3006                         continue;
3007                 }
3008
3009                 /*
3010                  * Generally speaking we only broadcast through forwarding
3011                  * interfaces.  If no interfaces are available we select
3012                  * a BONDED, BLOCKING, or LEARNING interface to forward
3013                  * through.
3014                  */
3015                 if (bif->bif_flags & IFBIF_STP) {
3016                         switch (bif->bif_state) {
3017                         case BSTP_IFSTATE_BONDED:
3018                                 if (bif->bif_priority + 512 > alt_priority) {
3019                                         alt_priority = bif->bif_priority + 512;
3020                                         alt_if = bif->bif_ifp;
3021                                 }
3022                                 continue;
3023                         case BSTP_IFSTATE_BLOCKING:
3024                                 if (bif->bif_priority + 256 > alt_priority) {
3025                                         alt_priority = bif->bif_priority + 256;
3026                                         alt_if = bif->bif_ifp;
3027                                 }
3028                                 continue;
3029                         case BSTP_IFSTATE_LEARNING:
3030                                 if (bif->bif_priority > alt_priority) {
3031                                         alt_priority = bif->bif_priority;
3032                                         alt_if = bif->bif_ifp;
3033                                 }
3034                                 continue;
3035                         case BSTP_IFSTATE_L1BLOCKING:
3036                         case BSTP_IFSTATE_DISABLED:
3037                         case BSTP_IFSTATE_LISTENING:
3038                                 continue;
3039                         default:
3040                                 /* forwarding */
3041                                 break;
3042                         }
3043                 }
3044
3045                 if ((bif->bif_flags & IFBIF_DISCOVER) == 0 &&
3046                     (m->m_flags & (M_BCAST|M_MCAST)) == 0) {
3047                         continue;
3048                 }
3049
3050                 if (TAILQ_NEXT(bif, bif_next) == NULL) {
3051                         mc = m;
3052                         used = 1;
3053                 } else {
3054                         mc = m_copypacket(m, MB_DONTWAIT);
3055                         if (mc == NULL) {
3056                                 IFNET_STAT_INC(sc->sc_ifp, oerrors, 1);
3057                                 continue;
3058                         }
3059                 }
3060                 found = 1;
3061
3062                 /*
3063                  * Filter on the output interface.  Pass a NULL bridge
3064                  * interface pointer so we do not redundantly filter on
3065                  * the bridge for each interface we broadcast on.
3066                  */
3067                 if (inet_pfil_hook.ph_hashooks > 0
3068 #ifdef INET6
3069                     || inet6_pfil_hook.ph_hashooks > 0
3070 #endif
3071                     ) {
3072                         if (bridge_pfil(&mc, NULL, dst_if, PFIL_OUT) != 0)
3073                                 continue;
3074                         if (mc == NULL)
3075                                 continue;
3076                 }
3077                 bridge_handoff(sc, dst_if, mc, from_us);
3078
3079                 if (nbif != NULL && !nbif->bif_onlist) {
3080                         KKASSERT(bif->bif_onlist);
3081                         nbif = TAILQ_NEXT(bif, bif_next);
3082                 }
3083         }
3084
3085         if (found == 0 && alt_if) {
3086                 KKASSERT(used == 0);
3087                 mc = m;
3088                 used = 1;
3089                 bridge_enqueue(alt_if, mc);
3090         }
3091
3092         if (used == 0)
3093                 m_freem(m);
3094 }
3095
3096 /*
3097  * bridge_span:
3098  *
3099  *      Duplicate a packet out one or more interfaces that are in span mode,
3100  *      the original mbuf is unmodified.
3101  */
3102 static void
3103 bridge_span(struct bridge_softc *sc, struct mbuf *m)
3104 {
3105         struct bridge_iflist *bif;
3106         struct ifnet *dst_if, *bifp;
3107         struct mbuf *mc;
3108
3109         mbuftrackid(m, 70);
3110         bifp = sc->sc_ifp;
3111         ifnet_serialize_all(bifp);
3112
3113         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
3114                 dst_if = bif->bif_ifp;
3115
3116                 if ((dst_if->if_flags & IFF_RUNNING) == 0)
3117                         continue;
3118
3119                 mc = m_copypacket(m, MB_DONTWAIT);
3120                 if (mc == NULL) {
3121                         IFNET_STAT_INC(sc->sc_ifp, oerrors, 1);
3122                         continue;
3123                 }
3124                 bridge_enqueue(dst_if, mc);
3125         }
3126
3127         ifnet_deserialize_all(bifp);
3128 }
3129
3130 static void
3131 bridge_rtmsg_sync_handler(netmsg_t msg)
3132 {
3133         ifnet_forwardmsg(&msg->lmsg, mycpuid + 1);
3134 }
3135
3136 static void
3137 bridge_rtmsg_sync(struct bridge_softc *sc)
3138 {
3139         struct netmsg_base msg;
3140
3141         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3142
3143         netmsg_init(&msg, NULL, &curthread->td_msgport,
3144                     0, bridge_rtmsg_sync_handler);
3145         ifnet_domsg(&msg.lmsg, 0);
3146 }
3147
3148 static __inline void
3149 bridge_rtinfo_update(struct bridge_rtinfo *bri, struct ifnet *dst_if,
3150                      int setflags, uint8_t flags, uint32_t timeo)
3151 {
3152         if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
3153             bri->bri_ifp != dst_if)
3154                 bri->bri_ifp = dst_if;
3155         if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
3156             bri->bri_expire != time_second + timeo)
3157                 bri->bri_expire = time_second + timeo;
3158         if (setflags)
3159                 bri->bri_flags = flags;
3160 }
3161
3162 static int
3163 bridge_rtinstall_oncpu(struct bridge_softc *sc, const uint8_t *dst,
3164                        struct ifnet *dst_if, int setflags, uint8_t flags,
3165                        struct bridge_rtinfo **bri0)
3166 {
3167         struct bridge_rtnode *brt;
3168         struct bridge_rtinfo *bri;
3169
3170         if (mycpuid == 0) {
3171                 brt = bridge_rtnode_lookup(sc, dst);
3172                 if (brt != NULL) {
3173                         /*
3174                          * rtnode for 'dst' already exists.  We inform the
3175                          * caller about this by leaving bri0 as NULL.  The
3176                          * caller will terminate the intallation upon getting
3177                          * NULL bri0.  However, we still need to update the
3178                          * rtinfo.
3179                          */
3180                         KKASSERT(*bri0 == NULL);
3181
3182                         /* Update rtinfo */
3183                         bridge_rtinfo_update(brt->brt_info, dst_if, setflags,
3184                                              flags, sc->sc_brttimeout);
3185                         return 0;
3186                 }
3187
3188                 /*
3189                  * We only need to check brtcnt on CPU0, since if limit
3190                  * is to be exceeded, ENOSPC is returned.  Caller knows
3191                  * this and will terminate the installation.
3192                  */
3193                 if (sc->sc_brtcnt >= sc->sc_brtmax)
3194                         return ENOSPC;
3195
3196                 KKASSERT(*bri0 == NULL);
3197                 bri = kmalloc(sizeof(struct bridge_rtinfo), M_DEVBUF,
3198                                   M_WAITOK | M_ZERO);
3199                 *bri0 = bri;
3200
3201                 /* Setup rtinfo */
3202                 bri->bri_flags = IFBAF_DYNAMIC;
3203                 bridge_rtinfo_update(bri, dst_if, setflags, flags,
3204                                      sc->sc_brttimeout);
3205         } else {
3206                 bri = *bri0;
3207                 KKASSERT(bri != NULL);
3208         }
3209
3210         brt = kmalloc(sizeof(struct bridge_rtnode), M_DEVBUF,
3211                       M_WAITOK | M_ZERO);
3212         memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
3213         brt->brt_info = bri;
3214
3215         bridge_rtnode_insert(sc, brt);
3216         return 0;
3217 }
3218
3219 static void
3220 bridge_rtinstall_handler(netmsg_t msg)
3221 {
3222         struct netmsg_brsaddr *brmsg = (struct netmsg_brsaddr *)msg;
3223         int error;
3224
3225         error = bridge_rtinstall_oncpu(brmsg->br_softc,
3226                                        brmsg->br_dst, brmsg->br_dst_if,
3227                                        brmsg->br_setflags, brmsg->br_flags,
3228                                        &brmsg->br_rtinfo);
3229         if (error) {
3230                 KKASSERT(mycpuid == 0 && brmsg->br_rtinfo == NULL);
3231                 lwkt_replymsg(&brmsg->base.lmsg, error);
3232                 return;
3233         } else if (brmsg->br_rtinfo == NULL) {
3234                 /* rtnode already exists for 'dst' */
3235                 KKASSERT(mycpuid == 0);
3236                 lwkt_replymsg(&brmsg->base.lmsg, 0);
3237                 return;
3238         }
3239         ifnet_forwardmsg(&brmsg->base.lmsg, mycpuid + 1);
3240 }
3241
3242 /*
3243  * bridge_rtupdate:
3244  *
3245  *      Add/Update a bridge routing entry.
3246  */
3247 static int
3248 bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst,
3249                 struct ifnet *dst_if, uint8_t flags)
3250 {
3251         struct bridge_rtnode *brt;
3252
3253         /*
3254          * A route for this destination might already exist.  If so,
3255          * update it, otherwise create a new one.
3256          */
3257         if ((brt = bridge_rtnode_lookup(sc, dst)) == NULL) {
3258                 struct netmsg_brsaddr *brmsg;
3259
3260                 if (sc->sc_brtcnt >= sc->sc_brtmax)
3261                         return ENOSPC;
3262
3263                 brmsg = kmalloc(sizeof(*brmsg), M_LWKTMSG, M_WAITOK | M_NULLOK);
3264                 if (brmsg == NULL)
3265                         return ENOMEM;
3266
3267                 netmsg_init(&brmsg->base, NULL, &netisr_afree_rport,
3268                             0, bridge_rtinstall_handler);
3269                 memcpy(brmsg->br_dst, dst, ETHER_ADDR_LEN);
3270                 brmsg->br_dst_if = dst_if;
3271                 brmsg->br_flags = flags;
3272                 brmsg->br_setflags = 0;
3273                 brmsg->br_softc = sc;
3274                 brmsg->br_rtinfo = NULL;
3275
3276                 ifnet_sendmsg(&brmsg->base.lmsg, 0);
3277                 return 0;
3278         }
3279         bridge_rtinfo_update(brt->brt_info, dst_if, 0, flags,
3280                              sc->sc_brttimeout);
3281         return 0;
3282 }
3283
3284 static int
3285 bridge_rtsaddr(struct bridge_softc *sc, const uint8_t *dst,
3286                struct ifnet *dst_if, uint8_t flags)
3287 {
3288         struct netmsg_brsaddr brmsg;
3289
3290         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3291
3292         netmsg_init(&brmsg.base, NULL, &curthread->td_msgport,
3293                     0, bridge_rtinstall_handler);
3294         memcpy(brmsg.br_dst, dst, ETHER_ADDR_LEN);
3295         brmsg.br_dst_if = dst_if;
3296         brmsg.br_flags = flags;
3297         brmsg.br_setflags = 1;
3298         brmsg.br_softc = sc;
3299         brmsg.br_rtinfo = NULL;
3300
3301         return ifnet_domsg(&brmsg.base.lmsg, 0);
3302 }
3303
3304 /*
3305  * bridge_rtlookup:
3306  *
3307  *      Lookup the destination interface for an address.
3308  */
3309 static struct ifnet *
3310 bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr)
3311 {
3312         struct bridge_rtnode *brt;
3313
3314         if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL)
3315                 return NULL;
3316         return brt->brt_info->bri_ifp;
3317 }
3318
3319 static void
3320 bridge_rtreap_handler(netmsg_t msg)
3321 {
3322         struct bridge_softc *sc = msg->lmsg.u.ms_resultp;
3323         struct bridge_rtnode *brt, *nbrt;
3324
3325         LIST_FOREACH_MUTABLE(brt, &sc->sc_rtlists[mycpuid], brt_list, nbrt) {
3326                 if (brt->brt_info->bri_dead)
3327                         bridge_rtnode_destroy(sc, brt);
3328         }
3329         ifnet_forwardmsg(&msg->lmsg, mycpuid + 1);
3330 }
3331
3332 static void
3333 bridge_rtreap(struct bridge_softc *sc)
3334 {
3335         struct netmsg_base msg;
3336
3337         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3338
3339         netmsg_init(&msg, NULL, &curthread->td_msgport,
3340                     0, bridge_rtreap_handler);
3341         msg.lmsg.u.ms_resultp = sc;
3342
3343         ifnet_domsg(&msg.lmsg, 0);
3344 }
3345
3346 static void
3347 bridge_rtreap_async(struct bridge_softc *sc)
3348 {
3349         struct netmsg_base *msg;
3350
3351         msg = kmalloc(sizeof(*msg), M_LWKTMSG, M_WAITOK);
3352
3353         netmsg_init(msg, NULL, &netisr_afree_rport,
3354                     0, bridge_rtreap_handler);
3355         msg->lmsg.u.ms_resultp = sc;
3356
3357         ifnet_sendmsg(&msg->lmsg, 0);
3358 }
3359
3360 /*
3361  * bridge_rttrim:
3362  *
3363  *      Trim the routine table so that we have a number
3364  *      of routing entries less than or equal to the
3365  *      maximum number.
3366  */
3367 static void
3368 bridge_rttrim(struct bridge_softc *sc)
3369 {
3370         struct bridge_rtnode *brt;
3371         int dead;
3372
3373         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3374
3375         /* Make sure we actually need to do this. */
3376         if (sc->sc_brtcnt <= sc->sc_brtmax)
3377                 return;
3378
3379         /*
3380          * Find out how many rtnodes are dead
3381          */
3382         dead = bridge_rtage_finddead(sc);
3383         KKASSERT(dead <= sc->sc_brtcnt);
3384
3385         if (sc->sc_brtcnt - dead <= sc->sc_brtmax) {
3386                 /* Enough dead rtnodes are found */
3387                 bridge_rtreap(sc);
3388                 return;
3389         }
3390
3391         /*
3392          * Kill some dynamic rtnodes to meet the brtmax
3393          */
3394         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3395                 struct bridge_rtinfo *bri = brt->brt_info;
3396
3397                 if (bri->bri_dead) {
3398                         /*
3399                          * We have counted this rtnode in
3400                          * bridge_rtage_finddead()
3401                          */
3402                         continue;
3403                 }
3404
3405                 if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
3406                         bri->bri_dead = 1;
3407                         ++dead;
3408                         KKASSERT(dead <= sc->sc_brtcnt);
3409
3410                         if (sc->sc_brtcnt - dead <= sc->sc_brtmax) {
3411                                 /* Enough rtnodes are collected */
3412                                 break;
3413                         }
3414                 }
3415         }
3416         if (dead)
3417                 bridge_rtreap(sc);
3418 }
3419
3420 /*
3421  * bridge_timer:
3422  *
3423  *      Aging timer for the bridge.
3424  */
3425 static void
3426 bridge_timer(void *arg)
3427 {
3428         struct bridge_softc *sc = arg;
3429         struct netmsg_base *msg;
3430
3431         KKASSERT(mycpuid == BRIDGE_CFGCPU);
3432
3433         crit_enter();
3434
3435         if (callout_pending(&sc->sc_brcallout) ||
3436             !callout_active(&sc->sc_brcallout)) {
3437                 crit_exit();
3438                 return;
3439         }
3440         callout_deactivate(&sc->sc_brcallout);
3441
3442         msg = &sc->sc_brtimemsg;
3443         KKASSERT(msg->lmsg.ms_flags & MSGF_DONE);
3444         lwkt_sendmsg(BRIDGE_CFGPORT, &msg->lmsg);
3445
3446         crit_exit();
3447 }
3448
3449 static void
3450 bridge_timer_handler(netmsg_t msg)
3451 {
3452         struct bridge_softc *sc = msg->lmsg.u.ms_resultp;
3453
3454         KKASSERT(&curthread->td_msgport == BRIDGE_CFGPORT);
3455
3456         crit_enter();
3457         /* Reply ASAP */
3458         lwkt_replymsg(&msg->lmsg, 0);
3459         crit_exit();
3460
3461         bridge_rtage(sc);
3462         if (sc->sc_ifp->if_flags & IFF_RUNNING) {
3463                 callout_reset(&sc->sc_brcallout,
3464                     bridge_rtable_prune_period * hz, bridge_timer, sc);
3465         }
3466 }
3467
3468 static int
3469 bridge_rtage_finddead(struct bridge_softc *sc)
3470 {
3471         struct bridge_rtnode *brt;
3472         int dead = 0;
3473
3474         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3475                 struct bridge_rtinfo *bri = brt->brt_info;
3476
3477                 if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
3478                     time_second >= bri->bri_expire) {
3479                         bri->bri_dead = 1;
3480                         ++dead;
3481                         KKASSERT(dead <= sc->sc_brtcnt);
3482                 }
3483         }
3484         return dead;
3485 }
3486
3487 /*
3488  * bridge_rtage:
3489  *
3490  *      Perform an aging cycle.
3491  */
3492 static void
3493 bridge_rtage(struct bridge_softc *sc)
3494 {
3495         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3496
3497         if (bridge_rtage_finddead(sc))
3498                 bridge_rtreap(sc);
3499 }
3500
3501 /*
3502  * bridge_rtflush:
3503  *
3504  *      Remove all dynamic addresses from the bridge.
3505  */
3506 static void
3507 bridge_rtflush(struct bridge_softc *sc, int bf)
3508 {
3509         struct bridge_rtnode *brt;
3510         int reap;
3511
3512         reap = 0;
3513         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3514                 struct bridge_rtinfo *bri = brt->brt_info;
3515
3516                 if ((bf & IFBF_FLUSHALL) ||
3517                     (bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
3518                         bri->bri_dead = 1;
3519                         reap = 1;
3520                 }
3521         }
3522         if (reap) {
3523                 if (bf & IFBF_FLUSHSYNC)
3524                         bridge_rtreap(sc);
3525                 else
3526                         bridge_rtreap_async(sc);
3527         }
3528 }
3529
3530 /*
3531  * bridge_rtdaddr:
3532  *
3533  *      Remove an address from the table.
3534  */
3535 static int
3536 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr)
3537 {
3538         struct bridge_rtnode *brt;
3539
3540         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3541
3542         if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL)
3543                 return (ENOENT);
3544
3545         /* TODO: add a cheaper delete operation */
3546         brt->brt_info->bri_dead = 1;
3547         bridge_rtreap(sc);
3548         return (0);
3549 }
3550
3551 /*
3552  * bridge_rtdelete:
3553  *
3554  *      Delete routes to a speicifc member interface.
3555  */
3556 void
3557 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int bf)
3558 {
3559         struct bridge_rtnode *brt;
3560         int reap;
3561
3562         reap = 0;
3563         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3564                 struct bridge_rtinfo *bri = brt->brt_info;
3565
3566                 if (bri->bri_ifp == ifp &&
3567                     ((bf & IFBF_FLUSHALL) ||
3568                      (bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
3569                         bri->bri_dead = 1;
3570                         reap = 1;
3571                 }
3572         }
3573         if (reap) {
3574                 if (bf & IFBF_FLUSHSYNC)
3575                         bridge_rtreap(sc);
3576                 else
3577                         bridge_rtreap_async(sc);
3578         }
3579 }
3580
3581 /*
3582  * bridge_rtable_init:
3583  *
3584  *      Initialize the route table for this bridge.
3585  */
3586 static void
3587 bridge_rtable_init(struct bridge_softc *sc)
3588 {
3589         int cpu;
3590
3591         /*
3592          * Initialize per-cpu hash tables
3593          */
3594         sc->sc_rthashs = kmalloc(sizeof(*sc->sc_rthashs) * ncpus,
3595                                  M_DEVBUF, M_WAITOK);
3596         for (cpu = 0; cpu < ncpus; ++cpu) {
3597                 int i;
3598
3599                 sc->sc_rthashs[cpu] =
3600                 kmalloc(sizeof(struct bridge_rtnode_head) * BRIDGE_RTHASH_SIZE,
3601                         M_DEVBUF, M_WAITOK);
3602
3603                 for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
3604                         LIST_INIT(&sc->sc_rthashs[cpu][i]);
3605         }
3606         sc->sc_rthash_key = karc4random();
3607
3608         /*
3609          * Initialize per-cpu lists
3610          */
3611         sc->sc_rtlists = kmalloc(sizeof(struct bridge_rtnode_head) * ncpus,
3612                                  M_DEVBUF, M_WAITOK);
3613         for (cpu = 0; cpu < ncpus; ++cpu)
3614                 LIST_INIT(&sc->sc_rtlists[cpu]);
3615 }
3616
3617 /*
3618  * bridge_rtable_fini:
3619  *
3620  *      Deconstruct the route table for this bridge.
3621  */
3622 static void
3623 bridge_rtable_fini(struct bridge_softc *sc)
3624 {
3625         int cpu;
3626
3627         /*
3628          * Free per-cpu hash tables
3629          */
3630         for (cpu = 0; cpu < ncpus; ++cpu)
3631                 kfree(sc->sc_rthashs[cpu], M_DEVBUF);
3632         kfree(sc->sc_rthashs, M_DEVBUF);
3633
3634         /*
3635          * Free per-cpu lists
3636          */
3637         kfree(sc->sc_rtlists, M_DEVBUF);
3638 }
3639
3640 /*
3641  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
3642  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
3643  */
3644 #define mix(a, b, c)                                                    \
3645 do {                                                                    \
3646         a -= b; a -= c; a ^= (c >> 13);                                 \
3647         b -= c; b -= a; b ^= (a << 8);                                  \
3648         c -= a; c -= b; c ^= (b >> 13);                                 \
3649         a -= b; a -= c; a ^= (c >> 12);                                 \
3650         b -= c; b -= a; b ^= (a << 16);                                 \
3651         c -= a; c -= b; c ^= (b >> 5);                                  \
3652         a -= b; a -= c; a ^= (c >> 3);                                  \
3653         b -= c; b -= a; b ^= (a << 10);                                 \
3654         c -= a; c -= b; c ^= (b >> 15);                                 \
3655 } while (/*CONSTCOND*/0)
3656
3657 static __inline uint32_t
3658 bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
3659 {
3660         uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
3661
3662         b += addr[5] << 8;
3663         b += addr[4];
3664         a += addr[3] << 24;
3665         a += addr[2] << 16;
3666         a += addr[1] << 8;
3667         a += addr[0];
3668
3669         mix(a, b, c);
3670
3671         return (c & BRIDGE_RTHASH_MASK);
3672 }
3673
3674 #undef mix
3675
3676 static int
3677 bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b)
3678 {
3679         int i, d;
3680
3681         for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
3682                 d = ((int)a[i]) - ((int)b[i]);
3683         }
3684
3685         return (d);
3686 }
3687
3688 /*
3689  * bridge_rtnode_lookup:
3690  *
3691  *      Look up a bridge route node for the specified destination.
3692  */
3693 static struct bridge_rtnode *
3694 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr)
3695 {
3696         struct bridge_rtnode *brt;
3697         uint32_t hash;
3698         int dir;
3699
3700         hash = bridge_rthash(sc, addr);
3701         LIST_FOREACH(brt, &sc->sc_rthashs[mycpuid][hash], brt_hash) {
3702                 dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
3703                 if (dir == 0)
3704                         return (brt);
3705                 if (dir > 0)
3706                         return (NULL);
3707         }
3708
3709         return (NULL);
3710 }
3711
3712 /*
3713  * bridge_rtnode_insert:
3714  *
3715  *      Insert the specified bridge node into the route table.
3716  *      Caller has to make sure that rtnode does not exist.
3717  */
3718 static void
3719 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
3720 {
3721         struct bridge_rtnode *lbrt;
3722         uint32_t hash;
3723         int dir;
3724
3725         hash = bridge_rthash(sc, brt->brt_addr);
3726
3727         lbrt = LIST_FIRST(&sc->sc_rthashs[mycpuid][hash]);
3728         if (lbrt == NULL) {
3729                 LIST_INSERT_HEAD(&sc->sc_rthashs[mycpuid][hash],
3730                                   brt, brt_hash);
3731                 goto out;
3732         }
3733
3734         do {
3735                 dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
3736                 KASSERT(dir != 0, ("rtnode already exist"));
3737
3738                 if (dir > 0) {
3739                         LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
3740                         goto out;
3741                 }
3742                 if (LIST_NEXT(lbrt, brt_hash) == NULL) {
3743                         LIST_INSERT_AFTER(lbrt, brt, brt_hash);
3744                         goto out;
3745                 }
3746                 lbrt = LIST_NEXT(lbrt, brt_hash);
3747         } while (lbrt != NULL);
3748
3749         panic("no suitable position found for rtnode");
3750 out:
3751         LIST_INSERT_HEAD(&sc->sc_rtlists[mycpuid], brt, brt_list);
3752         if (mycpuid == 0) {
3753                 /*
3754                  * Update the brtcnt.
3755                  * We only need to do it once and we do it on CPU0.
3756                  */
3757                 sc->sc_brtcnt++;
3758         }
3759 }
3760
3761 /*
3762  * bridge_rtnode_destroy:
3763  *
3764  *      Destroy a bridge rtnode.
3765  */
3766 static void
3767 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
3768 {
3769         LIST_REMOVE(brt, brt_hash);
3770         LIST_REMOVE(brt, brt_list);
3771
3772         if (mycpuid + 1 == ncpus) {
3773                 /* Free rtinfo associated with rtnode on the last cpu */
3774                 kfree(brt->brt_info, M_DEVBUF);
3775         }
3776         kfree(brt, M_DEVBUF);
3777
3778         if (mycpuid == 0) {
3779                 /* Update brtcnt only on CPU0 */
3780                 sc->sc_brtcnt--;
3781         }
3782 }
3783
3784 static __inline int
3785 bridge_post_pfil(struct mbuf *m)
3786 {
3787         if (m->m_pkthdr.fw_flags & IPFORWARD_MBUF_TAGGED)
3788                 return EOPNOTSUPP;
3789
3790         /* Not yet */
3791         if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED)
3792                 return EOPNOTSUPP;
3793
3794         return 0;
3795 }
3796
3797 /*
3798  * Send bridge packets through pfil if they are one of the types pfil can deal
3799  * with, or if they are ARP or REVARP.  (pfil will pass ARP and REVARP without
3800  * question.) If *bifp or *ifp are NULL then packet filtering is skipped for
3801  * that interface.
3802  */
3803 static int
3804 bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
3805 {
3806         int snap, error, i, hlen;
3807         struct ether_header *eh1, eh2;
3808         struct ip *ip;
3809         struct llc llc1;
3810         u_int16_t ether_type;
3811
3812         snap = 0;
3813         error = -1;     /* Default error if not error == 0 */
3814
3815         if (pfil_bridge == 0 && pfil_member == 0)
3816                 return (0); /* filtering is disabled */
3817
3818         i = min((*mp)->m_pkthdr.len, max_protohdr);
3819         if ((*mp)->m_len < i) {
3820                 *mp = m_pullup(*mp, i);
3821                 if (*mp == NULL) {
3822                         kprintf("%s: m_pullup failed\n", __func__);
3823                         return (-1);
3824                 }
3825         }
3826
3827         eh1 = mtod(*mp, struct ether_header *);
3828         ether_type = ntohs(eh1->ether_type);
3829
3830         /*
3831          * Check for SNAP/LLC.
3832          */
3833         if (ether_type < ETHERMTU) {
3834                 struct llc *llc2 = (struct llc *)(eh1 + 1);
3835
3836                 if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
3837                     llc2->llc_dsap == LLC_SNAP_LSAP &&
3838                     llc2->llc_ssap == LLC_SNAP_LSAP &&
3839                     llc2->llc_control == LLC_UI) {
3840                         ether_type = htons(llc2->llc_un.type_snap.ether_type);
3841                         snap = 1;
3842                 }
3843         }
3844
3845         /*
3846          * If we're trying to filter bridge traffic, don't look at anything
3847          * other than IP and ARP traffic.  If the filter doesn't understand
3848          * IPv6, don't allow IPv6 through the bridge either.  This is lame
3849          * since if we really wanted, say, an AppleTalk filter, we are hosed,
3850          * but of course we don't have an AppleTalk filter to begin with.
3851          * (Note that since pfil doesn't understand ARP it will pass *ALL*
3852          * ARP traffic.)
3853          */
3854         switch (ether_type) {
3855         case ETHERTYPE_ARP:
3856         case ETHERTYPE_REVARP:
3857                 return (0); /* Automatically pass */
3858
3859         case ETHERTYPE_IP:
3860 #ifdef INET6
3861         case ETHERTYPE_IPV6:
3862 #endif /* INET6 */
3863                 break;
3864
3865         default:
3866                 /*
3867                  * Check to see if the user wants to pass non-ip
3868                  * packets, these will not be checked by pfil(9)
3869                  * and passed unconditionally so the default is to drop.
3870                  */
3871                 if (pfil_onlyip)
3872                         goto bad;
3873         }
3874
3875         /* Strip off the Ethernet header and keep a copy. */
3876         m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t) &eh2);
3877         m_adj(*mp, ETHER_HDR_LEN);
3878
3879         /* Strip off snap header, if present */
3880         if (snap) {
3881                 m_copydata(*mp, 0, sizeof(struct llc), (caddr_t) &llc1);
3882                 m_adj(*mp, sizeof(struct llc));
3883         }
3884
3885         /*
3886          * Check the IP header for alignment and errors
3887          */
3888         if (dir == PFIL_IN) {
3889                 switch (ether_type) {
3890                 case ETHERTYPE_IP:
3891                         error = bridge_ip_checkbasic(mp);
3892                         break;
3893 #ifdef INET6
3894                 case ETHERTYPE_IPV6:
3895                         error = bridge_ip6_checkbasic(mp);
3896                         break;
3897 #endif /* INET6 */
3898                 default:
3899                         error = 0;
3900                 }
3901                 if (error)
3902                         goto bad;
3903         }
3904
3905         error = 0;
3906
3907         /*
3908          * Run the packet through pfil
3909          */
3910         switch (ether_type) {
3911         case ETHERTYPE_IP:
3912                 /*
3913                  * before calling the firewall, swap fields the same as
3914                  * IP does. here we assume the header is contiguous
3915                  */
3916                 ip = mtod(*mp, struct ip *);
3917
3918                 ip->ip_len = ntohs(ip->ip_len);
3919                 ip->ip_off = ntohs(ip->ip_off);
3920
3921                 /*
3922                  * Run pfil on the member interface and the bridge, both can
3923                  * be skipped by clearing pfil_member or pfil_bridge.
3924                  *
3925                  * Keep the order:
3926                  *   in_if -> bridge_if -> out_if
3927                  */
3928                 if (pfil_bridge && dir == PFIL_OUT && bifp != NULL) {
3929                         error = pfil_run_hooks(&inet_pfil_hook, mp, bifp, dir);
3930                         if (*mp == NULL || error != 0) /* filter may consume */
3931                                 break;
3932                         error = bridge_post_pfil(*mp);
3933                         if (error)
3934                                 break;
3935                 }
3936
3937                 if (pfil_member && ifp != NULL) {
3938                         error = pfil_run_hooks(&inet_pfil_hook, mp, ifp, dir);
3939                         if (*mp == NULL || error != 0) /* filter may consume */
3940                                 break;
3941                         error = bridge_post_pfil(*mp);
3942                         if (error)
3943                                 break;
3944                 }
3945
3946                 if (pfil_bridge && dir == PFIL_IN && bifp != NULL) {
3947                         error = pfil_run_hooks(&inet_pfil_hook, mp, bifp, dir);
3948                         if (*mp == NULL || error != 0) /* filter may consume */
3949                                 break;
3950                         error = bridge_post_pfil(*mp);
3951                         if (error)
3952                                 break;
3953                 }
3954
3955                 /* check if we need to fragment the packet */
3956                 if (pfil_member && ifp != NULL && dir == PFIL_OUT) {
3957                         i = (*mp)->m_pkthdr.len;
3958                         if (i > ifp->if_mtu) {
3959                                 error = bridge_fragment(ifp, *mp, &eh2, snap,
3960                                             &llc1);
3961                                 return (error);
3962                         }
3963                 }
3964
3965                 /* Recalculate the ip checksum and restore byte ordering */
3966                 ip = mtod(*mp, struct ip *);
3967                 hlen = ip->ip_hl << 2;
3968                 if (hlen < sizeof(struct ip))
3969                         goto bad;
3970                 if (hlen > (*mp)->m_len) {
3971                         if ((*mp = m_pullup(*mp, hlen)) == NULL)
3972                                 goto bad;
3973                         ip = mtod(*mp, struct ip *);
3974                         if (ip == NULL)
3975                                 goto bad;
3976                 }
3977                 ip->ip_len = htons(ip->ip_len);
3978                 ip->ip_off = htons(ip->ip_off);
3979                 ip->ip_sum = 0;
3980                 if (hlen == sizeof(struct ip))
3981                         ip->ip_sum = in_cksum_hdr(ip);
3982                 else
3983                         ip->ip_sum = in_cksum(*mp, hlen);
3984
3985                 break;
3986 #ifdef INET6
3987         case ETHERTYPE_IPV6:
3988                 if (pfil_bridge && dir == PFIL_OUT && bifp != NULL)
3989                         error = pfil_run_hooks(&inet6_pfil_hook, mp, bifp,
3990                                         dir);
3991
3992                 if (*mp == NULL || error != 0) /* filter may consume */
3993                         break;
3994
3995                 if (pfil_member && ifp != NULL)
3996                         error = pfil_run_hooks(&inet6_pfil_hook, mp, ifp,
3997                                         dir);
3998
3999                 if (*mp == NULL || error != 0) /* filter may consume */
4000                         break;
4001
4002                 if (pfil_bridge && dir == PFIL_IN && bifp != NULL)
4003                         error = pfil_run_hooks(&inet6_pfil_hook, mp, bifp,
4004                                         dir);
4005                 break;
4006 #endif
4007         default:
4008                 error = 0;
4009                 break;
4010         }
4011
4012         if (*mp == NULL)
4013                 return (error);
4014         if (error != 0)
4015                 goto bad;
4016
4017         error = -1;
4018
4019         /*
4020          * Finally, put everything back the way it was and return
4021          */
4022         if (snap) {
4023                 M_PREPEND(*mp, sizeof(struct llc), MB_DONTWAIT);
4024                 if (*mp == NULL)
4025                         return (error);
4026                 bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
4027         }
4028
4029         M_PREPEND(*mp, ETHER_HDR_LEN, MB_DONTWAIT);
4030         if (*mp == NULL)
4031                 return (error);
4032         bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
4033
4034         return (0);
4035
4036 bad:
4037         m_freem(*mp);
4038         *mp = NULL;
4039         return (error);
4040 }
4041
4042 /*
4043  * Perform basic checks on header size since
4044  * pfil assumes ip_input has already processed
4045  * it for it.  Cut-and-pasted from ip_input.c.
4046  * Given how simple the IPv6 version is,
4047  * does the IPv4 version really need to be
4048  * this complicated?
4049  *
4050  * XXX Should we update ipstat here, or not?
4051  * XXX Right now we update ipstat but not
4052  * XXX csum_counter.
4053  */
4054 static int
4055 bridge_ip_checkbasic(struct mbuf **mp)
4056 {
4057         struct mbuf *m = *mp;
4058         struct ip *ip;
4059         int len, hlen;
4060         u_short sum;
4061
4062         if (*mp == NULL)
4063                 return (-1);
4064 #if 0 /* notyet */
4065         if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
4066                 if ((m = m_copyup(m, sizeof(struct ip),
4067                         (max_linkhdr + 3) & ~3)) == NULL) {
4068                         /* XXXJRT new stat, please */
4069                         ipstat.ips_toosmall++;
4070                         goto bad;
4071                 }
4072         } else
4073 #endif
4074 #ifndef __predict_false
4075 #define __predict_false(x) x
4076 #endif
4077          if (__predict_false(m->m_len < sizeof (struct ip))) {
4078                 if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
4079                         ipstat.ips_toosmall++;
4080                         goto bad;
4081                 }
4082         }
4083         ip = mtod(m, struct ip *);
4084         if (ip == NULL) goto bad;
4085
4086         if (ip->ip_v != IPVERSION) {
4087                 ipstat.ips_badvers++;
4088                 goto bad;
4089         }
4090         hlen = ip->ip_hl << 2;
4091         if (hlen < sizeof(struct ip)) { /* minimum header length */
4092                 ipstat.ips_badhlen++;
4093                 goto bad;
4094         }
4095         if (hlen > m->m_len) {
4096                 if ((m = m_pullup(m, hlen)) == NULL) {
4097                         ipstat.ips_badhlen++;
4098                         goto bad;
4099                 }
4100                 ip = mtod(m, struct ip *);
4101                 if (ip == NULL) goto bad;
4102         }
4103
4104         if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
4105                 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
4106         } else {
4107                 if (hlen == sizeof(struct ip)) {
4108                         sum = in_cksum_hdr(ip);
4109                 } else {
4110                         sum = in_cksum(m, hlen);
4111                 }
4112         }
4113         if (sum) {
4114                 ipstat.ips_badsum++;
4115                 goto bad;
4116         }
4117
4118         /* Retrieve the packet length. */
4119         len = ntohs(ip->ip_len);
4120
4121         /*
4122          * Check for additional length bogosity
4123          */
4124         if (len < hlen) {
4125                 ipstat.ips_badlen++;
4126                 goto bad;
4127         }
4128
4129         /*
4130          * Check that the amount of data in the buffers
4131          * is as at least much as the IP header would have us expect.
4132          * Drop packet if shorter than we expect.
4133          */
4134         if (m->m_pkthdr.len < len) {
4135                 ipstat.ips_tooshort++;
4136                 goto bad;
4137         }
4138
4139         /* Checks out, proceed */
4140         *mp = m;
4141         return (0);
4142
4143 bad:
4144         *mp = m;
4145         return (-1);
4146 }
4147
4148 #ifdef INET6
4149 /*
4150  * Same as above, but for IPv6.
4151  * Cut-and-pasted from ip6_input.c.
4152  * XXX Should we update ip6stat, or not?
4153  */
4154 static int
4155 bridge_ip6_checkbasic(struct mbuf **mp)
4156 {
4157         struct mbuf *m = *mp;
4158         struct ip6_hdr *ip6;
4159
4160         /*
4161          * If the IPv6 header is not aligned, slurp it up into a new
4162          * mbuf with space for link headers, in the event we forward
4163          * it.  Otherwise, if it is aligned, make sure the entire base
4164          * IPv6 header is in the first mbuf of the chain.
4165          */
4166 #if 0 /* notyet */
4167         if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
4168                 struct ifnet *inifp = m->m_pkthdr.rcvif;
4169                 if ((m = m_copyup(m, sizeof(struct ip6_hdr),
4170                             (max_linkhdr + 3) & ~3)) == NULL) {
4171                         /* XXXJRT new stat, please */
4172                         ip6stat.ip6s_toosmall++;
4173                         in6_ifstat_inc(inifp, ifs6_in_hdrerr);
4174                         goto bad;
4175                 }
4176         } else
4177 #endif
4178         if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
4179                 struct ifnet *inifp = m->m_pkthdr.rcvif;
4180                 if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
4181                         ip6stat.ip6s_toosmall++;
4182                         in6_ifstat_inc(inifp, ifs6_in_hdrerr);
4183                         goto bad;
4184                 }
4185         }
4186
4187         ip6 = mtod(m, struct ip6_hdr *);
4188
4189         if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
4190                 ip6stat.ip6s_badvers++;
4191                 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
4192                 goto bad;
4193         }
4194
4195         /* Checks out, proceed */
4196         *mp = m;
4197         return (0);
4198
4199 bad:
4200         *mp = m;
4201         return (-1);
4202 }
4203 #endif /* INET6 */
4204
4205 /*
4206  * bridge_fragment:
4207  *
4208  *      Return a fragmented mbuf chain.
4209  */
4210 static int
4211 bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
4212     int snap, struct llc *llc)
4213 {
4214         struct mbuf *m0;
4215         struct ip *ip;
4216         int error = -1;
4217
4218         if (m->m_len < sizeof(struct ip) &&
4219             (m = m_pullup(m, sizeof(struct ip))) == NULL)
4220                 goto out;
4221         ip = mtod(m, struct ip *);
4222
4223         error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist,
4224                     CSUM_DELAY_IP);
4225         if (error)
4226                 goto out;
4227
4228         /* walk the chain and re-add the Ethernet header */
4229         for (m0 = m; m0; m0 = m0->m_nextpkt) {
4230                 if (error == 0) {
4231                         if (snap) {
4232                                 M_PREPEND(m0, sizeof(struct llc), MB_DONTWAIT);
4233                                 if (m0 == NULL) {
4234                                         error = ENOBUFS;
4235                                         continue;
4236                                 }
4237                                 bcopy(llc, mtod(m0, caddr_t),
4238                                     sizeof(struct llc));
4239                         }
4240                         M_PREPEND(m0, ETHER_HDR_LEN, MB_DONTWAIT);
4241                         if (m0 == NULL) {
4242                                 error = ENOBUFS;
4243                                 continue;
4244                         }
4245                         bcopy(eh, mtod(m0, caddr_t), ETHER_HDR_LEN);
4246                 } else 
4247                         m_freem(m);
4248         }
4249
4250         if (error == 0)
4251                 ipstat.ips_fragmented++;
4252
4253         return (error);
4254
4255 out:
4256         if (m != NULL)
4257                 m_freem(m);
4258         return (error);
4259 }
4260
4261 static void
4262 bridge_enqueue_handler(netmsg_t msg)
4263 {
4264         struct netmsg_packet *nmp;
4265         struct ifnet *dst_ifp;
4266         struct mbuf *m;
4267
4268         nmp = &msg->packet;
4269         m = nmp->nm_packet;
4270         dst_ifp = nmp->base.lmsg.u.ms_resultp;
4271         mbuftrackid(m, 71);
4272
4273         bridge_handoff(dst_ifp->if_bridge, dst_ifp, m, 1);
4274 }
4275
4276 static void
4277 bridge_handoff(struct bridge_softc *sc, struct ifnet *dst_ifp,
4278                struct mbuf *m, int from_us)
4279 {
4280         struct mbuf *m0;
4281         struct ifnet *bifp;
4282
4283         bifp = sc->sc_ifp;
4284         mbuftrackid(m, 72);
4285
4286         /* We may be sending a fragment so traverse the mbuf */
4287         for (; m; m = m0) {
4288                 struct altq_pktattr pktattr;
4289
4290                 m0 = m->m_nextpkt;
4291                 m->m_nextpkt = NULL;
4292
4293                 /*
4294                  * If being sent from our host override ether_shost
4295                  * with the bridge MAC.  This is mandatory for ARP
4296                  * so things don't get confused.  In particular we
4297                  * don't want ARPs to get associated with link interfaces
4298                  * under the bridge which might or might not stay valid.
4299                  *
4300                  * Also override ether_shost when relaying a packet out
4301                  * the same interface it came in on, due to multi-homed
4302                  * addresses & default routes, otherwise switches will
4303                  * get very confused.
4304                  *
4305                  * Otherwise if we are in transparent mode.
4306                  */
4307                 if (from_us || m->m_pkthdr.rcvif == dst_ifp) {
4308                         m_copyback(m,
4309                                    offsetof(struct ether_header, ether_shost),
4310                                    ETHER_ADDR_LEN, IF_LLADDR(sc->sc_ifp));
4311                 } else if ((bifp->if_flags & IFF_LINK0) &&
4312                            (m->m_pkthdr.fw_flags & BRIDGE_MBUF_TAGGED)) {
4313                         m_copyback(m,
4314                                    offsetof(struct ether_header, ether_shost),
4315                                    ETHER_ADDR_LEN,
4316                                    m->m_pkthdr.br.ether.ether_shost);
4317                 } /* else retain shost */
4318
4319                 if (ifq_is_enabled(&dst_ifp->if_snd))
4320                         altq_etherclassify(&dst_ifp->if_snd, m, &pktattr);
4321
4322                 ifq_dispatch(dst_ifp, m, &pktattr);
4323         }
4324 }
4325
4326 static void
4327 bridge_control_dispatch(netmsg_t msg)
4328 {
4329         struct netmsg_brctl *bc_msg = (struct netmsg_brctl *)msg;
4330         struct ifnet *bifp = bc_msg->bc_sc->sc_ifp;
4331         int error;
4332
4333         ifnet_serialize_all(bifp);
4334         error = bc_msg->bc_func(bc_msg->bc_sc, bc_msg->bc_arg);
4335         ifnet_deserialize_all(bifp);
4336
4337         lwkt_replymsg(&bc_msg->base.lmsg, error);
4338 }
4339
4340 static int
4341 bridge_control(struct bridge_softc *sc, u_long cmd,
4342                bridge_ctl_t bc_func, void *bc_arg)
4343 {
4344         struct ifnet *bifp = sc->sc_ifp;
4345         struct netmsg_brctl bc_msg;
4346         int error;
4347
4348         ASSERT_IFNET_SERIALIZED_ALL(bifp);
4349
4350         bzero(&bc_msg, sizeof(bc_msg));
4351
4352         netmsg_init(&bc_msg.base, NULL, &curthread->td_msgport,
4353                     0, bridge_control_dispatch);
4354         bc_msg.bc_func = bc_func;
4355         bc_msg.bc_sc = sc;
4356         bc_msg.bc_arg = bc_arg;
4357
4358         ifnet_deserialize_all(bifp);
4359         error = lwkt_domsg(BRIDGE_CFGPORT, &bc_msg.base.lmsg, 0);
4360         ifnet_serialize_all(bifp);
4361         return error;
4362 }
4363
4364 static void
4365 bridge_add_bif_handler(netmsg_t msg)
4366 {
4367         struct netmsg_braddbif *amsg = (struct netmsg_braddbif *)msg;
4368         struct bridge_softc *sc;
4369         struct bridge_iflist *bif;
4370
4371         sc = amsg->br_softc;
4372
4373         bif = kmalloc(sizeof(*bif), M_DEVBUF, M_WAITOK | M_ZERO);
4374         bif->bif_ifp = amsg->br_bif_ifp;
4375         bif->bif_onlist = 1;
4376         bif->bif_info = amsg->br_bif_info;
4377
4378         /*
4379          * runs through bif_info
4380          */
4381         bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
4382
4383         TAILQ_INSERT_HEAD(&sc->sc_iflists[mycpuid], bif, bif_next);
4384
4385         ifnet_forwardmsg(&amsg->base.lmsg, mycpuid + 1);
4386 }
4387
4388 static void
4389 bridge_add_bif(struct bridge_softc *sc, struct bridge_ifinfo *bif_info,
4390                struct ifnet *ifp)
4391 {
4392         struct netmsg_braddbif amsg;
4393
4394         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
4395
4396         netmsg_init(&amsg.base, NULL, &curthread->td_msgport,
4397                     0, bridge_add_bif_handler);
4398         amsg.br_softc = sc;
4399         amsg.br_bif_info = bif_info;
4400         amsg.br_bif_ifp = ifp;
4401
4402         ifnet_domsg(&amsg.base.lmsg, 0);
4403 }
4404
4405 static void
4406 bridge_del_bif_handler(netmsg_t msg)
4407 {
4408         struct netmsg_brdelbif *dmsg = (struct netmsg_brdelbif *)msg;
4409         struct bridge_softc *sc;
4410         struct bridge_iflist *bif;
4411
4412         sc = dmsg->br_softc;
4413
4414         /*
4415          * Locate the bif associated with the br_bif_info
4416          * on the current CPU
4417          */
4418         bif = bridge_lookup_member_ifinfo(sc, dmsg->br_bif_info);
4419         KKASSERT(bif != NULL && bif->bif_onlist);
4420
4421         /* Remove the bif from the current CPU's iflist */
4422         bif->bif_onlist = 0;
4423         TAILQ_REMOVE(dmsg->br_bif_list, bif, bif_next);
4424
4425         /* Save the removed bif for later freeing */
4426         TAILQ_INSERT_HEAD(dmsg->br_bif_list, bif, bif_next);
4427
4428         ifnet_forwardmsg(&dmsg->base.lmsg, mycpuid + 1);
4429 }
4430
4431 static void
4432 bridge_del_bif(struct bridge_softc *sc, struct bridge_ifinfo *bif_info,
4433                struct bridge_iflist_head *saved_bifs)
4434 {
4435         struct netmsg_brdelbif dmsg;
4436
4437         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
4438
4439         netmsg_init(&dmsg.base, NULL, &curthread->td_msgport,
4440                     0, bridge_del_bif_handler);
4441         dmsg.br_softc = sc;
4442         dmsg.br_bif_info = bif_info;
4443         dmsg.br_bif_list = saved_bifs;
4444
4445         ifnet_domsg(&dmsg.base.lmsg, 0);
4446 }