Merge branch 'vendor/BINUTILS221'
[dragonfly.git] / sys / net / bridge / if_bridge.c
1 /*
2  * Copyright 2001 Wasabi Systems, Inc.
3  * All rights reserved.
4  *
5  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *      This product includes software developed for the NetBSD Project by
18  *      Wasabi Systems, Inc.
19  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
20  *    or promote products derived from this software without specific prior
21  *    written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
27  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /*
37  * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
38  * All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. All advertising materials mentioning features or use of this software
49  *    must display the following acknowledgement:
50  *      This product includes software developed by Jason L. Wright
51  * 4. The name of the author may not be used to endorse or promote products
52  *    derived from this software without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
55  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
56  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
57  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
58  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
59  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
60  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
62  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
63  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
64  * POSSIBILITY OF SUCH DAMAGE.
65  *
66  * $OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp $
67  * $NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $
68  * $FreeBSD: src/sys/net/if_bridge.c,v 1.26 2005/10/13 23:05:55 thompsa Exp $
69  */
70
71 /*
72  * Network interface bridge support.
73  *
74  * TODO:
75  *
76  *      - Currently only supports Ethernet-like interfaces (Ethernet,
77  *        802.11, VLANs on Ethernet, etc.)  Figure out a nice way
78  *        to bridge other types of interfaces (FDDI-FDDI, and maybe
79  *        consider heterogenous bridges).
80  *
81  *
82  * Bridge's route information is duplicated to each CPUs:
83  *
84  *      CPU0          CPU1          CPU2          CPU3
85  * +-----------+ +-----------+ +-----------+ +-----------+
86  * |  rtnode   | |  rtnode   | |  rtnode   | |  rtnode   |
87  * |           | |           | |           | |           |
88  * | dst eaddr | | dst eaddr | | dst eaddr | | dst eaddr |
89  * +-----------+ +-----------+ +-----------+ +-----------+
90  *       |         |                     |         |
91  *       |         |                     |         |
92  *       |         |     +----------+    |         |
93  *       |         |     |  rtinfo  |    |         |
94  *       |         +---->|          |<---+         |
95  *       |               |  flags   |              |
96  *       +-------------->|  timeout |<-------------+
97  *                       |  dst_ifp |
98  *                       +----------+
99  *
100  * We choose to put timeout and dst_ifp into shared part, so updating
101  * them will be cheaper than using message forwarding.  Also there is
102  * not need to use spinlock to protect the updating: timeout and dst_ifp
103  * is not related and specific field's updating order has no importance.
104  * The cache pollution by the share part should not be heavy: in a stable
105  * setup, dst_ifp probably will be not changed in rtnode's life time,
106  * while timeout is refreshed once per second; most of the time, timeout
107  * and dst_ifp are read-only accessed.
108  *
109  *
110  * Bridge route information installation on bridge_input path:
111  *
112  *      CPU0           CPU1         CPU2          CPU3
113  *
114  *                               tcp_thread2
115  *                                    |
116  *                                alloc nmsg
117  *                    snd nmsg        |
118  *                    w/o rtinfo      |
119  *      ifnet0<-----------------------+
120  *        |                           :
121  *    lookup dst                      :
122  *   rtnode exists?(Y)free nmsg       :
123  *        |(N)                        :
124  *        |
125  *  alloc rtinfo
126  *  alloc rtnode
127  * install rtnode
128  *        |
129  *        +---------->ifnet1
130  *        : fwd nmsg    |
131  *        : w/ rtinfo   |
132  *        :             |
133  *        :             |
134  *                 alloc rtnode
135  *               (w/ nmsg's rtinfo)
136  *                install rtnode
137  *                      |
138  *                      +---------->ifnet2
139  *                      : fwd nmsg    |
140  *                      : w/ rtinfo   |
141  *                      :             |
142  *                      :         same as ifnet1
143  *                                    |
144  *                                    +---------->ifnet3
145  *                                    : fwd nmsg    |
146  *                                    : w/ rtinfo   |
147  *                                    :             |
148  *                                    :         same as ifnet1
149  *                                               free nmsg
150  *                                                  :
151  *                                                  :
152  *
153  * The netmsgs forwarded between protocol threads and ifnet threads are
154  * allocated with (M_WAITOK|M_NULLOK), so it will not fail under most
155  * cases (route information is too precious to be not installed :).
156  * Since multiple threads may try to install route information for the
157  * same dst eaddr, we look up route information in ifnet0.  However, this
158  * looking up only need to be performed on ifnet0, which is the start
159  * point of the route information installation process.
160  *
161  *
162  * Bridge route information deleting/flushing:
163  *
164  *  CPU0            CPU1             CPU2             CPU3
165  *
166  * netisr0
167  *   |
168  * find suitable rtnodes,
169  * mark their rtinfo dead
170  *   |
171  *   | domsg <------------------------------------------+
172  *   |                                                  | replymsg
173  *   |                                                  |
174  *   V     fwdmsg           fwdmsg           fwdmsg     |
175  * ifnet0 --------> ifnet1 --------> ifnet2 --------> ifnet3
176  * delete rtnodes   delete rtnodes   delete rtnodes   delete rtnodes
177  * w/ dead rtinfo   w/ dead rtinfo   w/ dead rtinfo   w/ dead rtinfo
178  *                                                    free dead rtinfos
179  *
180  * All deleting/flushing operations are serialized by netisr0, so each
181  * operation only reaps the route information marked dead by itself.
182  *
183  *
184  * Bridge route information adding/deleting/flushing:
185  * Since all operation is serialized by the fixed message flow between
186  * ifnet threads, it is not possible to create corrupted per-cpu route
187  * information.
188  *
189  *
190  *
191  * Percpu member interface list iteration with blocking operation:
192  * Since one bridge could only delete one member interface at a time and
193  * the deleted member interface is not freed after netmsg_service_sync(),
194  * following way is used to make sure that even if the certain member
195  * interface is ripped from the percpu list during the blocking operation,
196  * the iteration still could keep going:
197  *
198  * TAILQ_FOREACH_MUTABLE(bif, sc->sc_iflists[mycpuid], bif_next, nbif) {
199  *     blocking operation;
200  *     blocking operation;
201  *     ...
202  *     ...
203  *     if (nbif != NULL && !nbif->bif_onlist) {
204  *         KKASSERT(bif->bif_onlist);
205  *         nbif = TAILQ_NEXT(bif, bif_next);
206  *     }
207  * }
208  *
209  * As mentioned above only one member interface could be unlinked from the
210  * percpu member interface list, so either bif or nbif may be not on the list,
211  * but _not_ both.  To keep the list iteration, we don't care about bif, but
212  * only nbif.  Since removed member interface will only be freed after we
213  * finish our work, it is safe to access any field in an unlinked bif (here
214  * bif_onlist).  If nbif is no longer on the list, then bif must be on the
215  * list, so we change nbif to the next element of bif and keep going.
216  */
217
218 #include "opt_inet.h"
219 #include "opt_inet6.h"
220
221 #include <sys/param.h>
222 #include <sys/mbuf.h>
223 #include <sys/malloc.h>
224 #include <sys/protosw.h>
225 #include <sys/systm.h>
226 #include <sys/time.h>
227 #include <sys/socket.h> /* for net/if.h */
228 #include <sys/sockio.h>
229 #include <sys/ctype.h>  /* string functions */
230 #include <sys/kernel.h>
231 #include <sys/random.h>
232 #include <sys/sysctl.h>
233 #include <sys/module.h>
234 #include <sys/proc.h>
235 #include <sys/priv.h>
236 #include <sys/lock.h>
237 #include <sys/thread.h>
238 #include <sys/thread2.h>
239 #include <sys/mpipe.h>
240
241 #include <net/bpf.h>
242 #include <net/if.h>
243 #include <net/if_dl.h>
244 #include <net/if_types.h>
245 #include <net/if_var.h>
246 #include <net/pfil.h>
247 #include <net/ifq_var.h>
248 #include <net/if_clone.h>
249
250 #include <netinet/in.h> /* for struct arpcom */
251 #include <netinet/in_systm.h>
252 #include <netinet/in_var.h>
253 #include <netinet/ip.h>
254 #include <netinet/ip_var.h>
255 #ifdef INET6
256 #include <netinet/ip6.h>
257 #include <netinet6/ip6_var.h>
258 #endif
259 #include <netinet/if_ether.h> /* for struct arpcom */
260 #include <net/bridge/if_bridgevar.h>
261 #include <net/if_llc.h>
262 #include <net/netmsg2.h>
263
264 #include <net/route.h>
265 #include <sys/in_cksum.h>
266
267 /*
268  * Size of the route hash table.  Must be a power of two.
269  */
270 #ifndef BRIDGE_RTHASH_SIZE
271 #define BRIDGE_RTHASH_SIZE              1024
272 #endif
273
274 #define BRIDGE_RTHASH_MASK              (BRIDGE_RTHASH_SIZE - 1)
275
276 /*
277  * Maximum number of addresses to cache.
278  */
279 #ifndef BRIDGE_RTABLE_MAX
280 #define BRIDGE_RTABLE_MAX               100
281 #endif
282
283 /*
284  * Spanning tree defaults.
285  */
286 #define BSTP_DEFAULT_MAX_AGE            (20 * 256)
287 #define BSTP_DEFAULT_HELLO_TIME         (2 * 256)
288 #define BSTP_DEFAULT_FORWARD_DELAY      (15 * 256)
289 #define BSTP_DEFAULT_HOLD_TIME          (1 * 256)
290 #define BSTP_DEFAULT_BRIDGE_PRIORITY    0x8000
291 #define BSTP_DEFAULT_PORT_PRIORITY      0x80
292 #define BSTP_DEFAULT_PATH_COST          55
293
294 /*
295  * Timeout (in seconds) for entries learned dynamically.
296  */
297 #ifndef BRIDGE_RTABLE_TIMEOUT
298 #define BRIDGE_RTABLE_TIMEOUT           (20 * 60)       /* same as ARP */
299 #endif
300
301 /*
302  * Number of seconds between walks of the route list.
303  */
304 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
305 #define BRIDGE_RTABLE_PRUNE_PERIOD      (5 * 60)
306 #endif
307
308 /*
309  * List of capabilities to mask on the member interface.
310  */
311 #define BRIDGE_IFCAPS_MASK              IFCAP_TXCSUM
312
313 typedef int     (*bridge_ctl_t)(struct bridge_softc *, void *);
314
315 struct netmsg_brctl {
316         struct netmsg_base      base;
317         bridge_ctl_t            bc_func;
318         struct bridge_softc     *bc_sc;
319         void                    *bc_arg;
320 };
321
322 struct netmsg_brsaddr {
323         struct netmsg_base      base;
324         struct bridge_softc     *br_softc;
325         struct ifnet            *br_dst_if;
326         struct bridge_rtinfo    *br_rtinfo;
327         int                     br_setflags;
328         uint8_t                 br_dst[ETHER_ADDR_LEN];
329         uint8_t                 br_flags;
330 };
331
332 struct netmsg_braddbif {
333         struct netmsg_base      base;
334         struct bridge_softc     *br_softc;
335         struct bridge_ifinfo    *br_bif_info;
336         struct ifnet            *br_bif_ifp;
337 };
338
339 struct netmsg_brdelbif {
340         struct netmsg_base      base;
341         struct bridge_softc     *br_softc;
342         struct bridge_ifinfo    *br_bif_info;
343         struct bridge_iflist_head *br_bif_list;
344 };
345
346 struct netmsg_brsflags {
347         struct netmsg_base      base;
348         struct bridge_softc     *br_softc;
349         struct bridge_ifinfo    *br_bif_info;
350         uint32_t                br_bif_flags;
351 };
352
353 eventhandler_tag        bridge_detach_cookie = NULL;
354
355 extern  struct mbuf *(*bridge_input_p)(struct ifnet *, struct mbuf *);
356 extern  int (*bridge_output_p)(struct ifnet *, struct mbuf *);
357 extern  void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
358 extern  struct ifnet *(*bridge_interface_p)(void *if_bridge);
359
360 static int      bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
361
362 static int      bridge_clone_create(struct if_clone *, int, caddr_t);
363 static int      bridge_clone_destroy(struct ifnet *);
364
365 static int      bridge_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
366 static void     bridge_mutecaps(struct bridge_ifinfo *, struct ifnet *, int);
367 static void     bridge_ifdetach(void *, struct ifnet *);
368 static void     bridge_init(void *);
369 static int      bridge_from_us(struct bridge_softc *, struct ether_header *);
370 static void     bridge_stop(struct ifnet *);
371 static void     bridge_start(struct ifnet *);
372 static struct mbuf *bridge_input(struct ifnet *, struct mbuf *);
373 static int      bridge_output(struct ifnet *, struct mbuf *);
374 static struct ifnet *bridge_interface(void *if_bridge);
375
376 static void     bridge_forward(struct bridge_softc *, struct mbuf *m);
377
378 static void     bridge_timer_handler(netmsg_t);
379 static void     bridge_timer(void *);
380
381 static void     bridge_start_bcast(struct bridge_softc *, struct mbuf *);
382 static void     bridge_broadcast(struct bridge_softc *, struct ifnet *,
383                     struct mbuf *);
384 static void     bridge_span(struct bridge_softc *, struct mbuf *);
385
386 static int      bridge_rtupdate(struct bridge_softc *, const uint8_t *,
387                     struct ifnet *, uint8_t);
388 static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *);
389 static void     bridge_rtreap(struct bridge_softc *);
390 static void     bridge_rtreap_async(struct bridge_softc *);
391 static void     bridge_rttrim(struct bridge_softc *);
392 static int      bridge_rtage_finddead(struct bridge_softc *);
393 static void     bridge_rtage(struct bridge_softc *);
394 static void     bridge_rtflush(struct bridge_softc *, int);
395 static int      bridge_rtdaddr(struct bridge_softc *, const uint8_t *);
396 static int      bridge_rtsaddr(struct bridge_softc *, const uint8_t *,
397                     struct ifnet *, uint8_t);
398 static void     bridge_rtmsg_sync(struct bridge_softc *sc);
399 static void     bridge_rtreap_handler(netmsg_t);
400 static void     bridge_rtinstall_handler(netmsg_t);
401 static int      bridge_rtinstall_oncpu(struct bridge_softc *, const uint8_t *,
402                     struct ifnet *, int, uint8_t, struct bridge_rtinfo **);
403
404 static void     bridge_rtable_init(struct bridge_softc *);
405 static void     bridge_rtable_fini(struct bridge_softc *);
406
407 static int      bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
408 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
409                     const uint8_t *);
410 static void     bridge_rtnode_insert(struct bridge_softc *,
411                     struct bridge_rtnode *);
412 static void     bridge_rtnode_destroy(struct bridge_softc *,
413                     struct bridge_rtnode *);
414
415 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
416                     const char *name);
417 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
418                     struct ifnet *ifp);
419 static struct bridge_iflist *bridge_lookup_member_ifinfo(struct bridge_softc *,
420                     struct bridge_ifinfo *);
421 static void     bridge_delete_member(struct bridge_softc *,
422                     struct bridge_iflist *, int);
423 static void     bridge_delete_span(struct bridge_softc *,
424                     struct bridge_iflist *);
425
426 static int      bridge_control(struct bridge_softc *, u_long,
427                                bridge_ctl_t, void *);
428 static int      bridge_ioctl_init(struct bridge_softc *, void *);
429 static int      bridge_ioctl_stop(struct bridge_softc *, void *);
430 static int      bridge_ioctl_add(struct bridge_softc *, void *);
431 static int      bridge_ioctl_del(struct bridge_softc *, void *);
432 static void     bridge_ioctl_fillflags(struct bridge_softc *sc,
433                                 struct bridge_iflist *bif, struct ifbreq *req);
434 static int      bridge_ioctl_gifflags(struct bridge_softc *, void *);
435 static int      bridge_ioctl_sifflags(struct bridge_softc *, void *);
436 static int      bridge_ioctl_scache(struct bridge_softc *, void *);
437 static int      bridge_ioctl_gcache(struct bridge_softc *, void *);
438 static int      bridge_ioctl_gifs(struct bridge_softc *, void *);
439 static int      bridge_ioctl_rts(struct bridge_softc *, void *);
440 static int      bridge_ioctl_saddr(struct bridge_softc *, void *);
441 static int      bridge_ioctl_sto(struct bridge_softc *, void *);
442 static int      bridge_ioctl_gto(struct bridge_softc *, void *);
443 static int      bridge_ioctl_daddr(struct bridge_softc *, void *);
444 static int      bridge_ioctl_flush(struct bridge_softc *, void *);
445 static int      bridge_ioctl_gpri(struct bridge_softc *, void *);
446 static int      bridge_ioctl_spri(struct bridge_softc *, void *);
447 static int      bridge_ioctl_reinit(struct bridge_softc *, void *);
448 static int      bridge_ioctl_ght(struct bridge_softc *, void *);
449 static int      bridge_ioctl_sht(struct bridge_softc *, void *);
450 static int      bridge_ioctl_gfd(struct bridge_softc *, void *);
451 static int      bridge_ioctl_sfd(struct bridge_softc *, void *);
452 static int      bridge_ioctl_gma(struct bridge_softc *, void *);
453 static int      bridge_ioctl_sma(struct bridge_softc *, void *);
454 static int      bridge_ioctl_sifprio(struct bridge_softc *, void *);
455 static int      bridge_ioctl_sifcost(struct bridge_softc *, void *);
456 static int      bridge_ioctl_addspan(struct bridge_softc *, void *);
457 static int      bridge_ioctl_delspan(struct bridge_softc *, void *);
458 static int      bridge_ioctl_sifbondwght(struct bridge_softc *, void *);
459 static int      bridge_pfil(struct mbuf **, struct ifnet *, struct ifnet *,
460                     int);
461 static int      bridge_ip_checkbasic(struct mbuf **mp);
462 #ifdef INET6
463 static int      bridge_ip6_checkbasic(struct mbuf **mp);
464 #endif /* INET6 */
465 static int      bridge_fragment(struct ifnet *, struct mbuf *,
466                     struct ether_header *, int, struct llc *);
467 static void     bridge_enqueue_handler(netmsg_t);
468 static void     bridge_handoff(struct bridge_softc *, struct ifnet *,
469                     struct mbuf *, int);
470
471 static void     bridge_del_bif_handler(netmsg_t);
472 static void     bridge_add_bif_handler(netmsg_t);
473 static void     bridge_del_bif(struct bridge_softc *, struct bridge_ifinfo *,
474                     struct bridge_iflist_head *);
475 static void     bridge_add_bif(struct bridge_softc *, struct bridge_ifinfo *,
476                     struct ifnet *);
477
478 SYSCTL_DECL(_net_link);
479 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW, 0, "Bridge");
480
481 static int pfil_onlyip = 1; /* only pass IP[46] packets when pfil is enabled */
482 static int pfil_bridge = 1; /* run pfil hooks on the bridge interface */
483 static int pfil_member = 1; /* run pfil hooks on the member interface */
484 static int bridge_debug;
485 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip, CTLFLAG_RW,
486     &pfil_onlyip, 0, "Only pass IP packets when pfil is enabled");
487 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge, CTLFLAG_RW,
488     &pfil_bridge, 0, "Packet filter on the bridge interface");
489 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member, CTLFLAG_RW,
490     &pfil_member, 0, "Packet filter on the member interface");
491 SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW,
492     &bridge_debug, 0, "Bridge debug mode");
493
494 struct bridge_control_arg {
495         union {
496                 struct ifbreq ifbreq;
497                 struct ifbifconf ifbifconf;
498                 struct ifbareq ifbareq;
499                 struct ifbaconf ifbaconf;
500                 struct ifbrparam ifbrparam;
501         } bca_u;
502         int     bca_len;
503         void    *bca_uptr;
504         void    *bca_kptr;
505 };
506
507 struct bridge_control {
508         bridge_ctl_t    bc_func;
509         int             bc_argsize;
510         int             bc_flags;
511 };
512
513 #define BC_F_COPYIN             0x01    /* copy arguments in */
514 #define BC_F_COPYOUT            0x02    /* copy arguments out */
515 #define BC_F_SUSER              0x04    /* do super-user check */
516
517 const struct bridge_control bridge_control_table[] = {
518         { bridge_ioctl_add,             sizeof(struct ifbreq),
519           BC_F_COPYIN|BC_F_SUSER },
520         { bridge_ioctl_del,             sizeof(struct ifbreq),
521           BC_F_COPYIN|BC_F_SUSER },
522
523         { bridge_ioctl_gifflags,        sizeof(struct ifbreq),
524           BC_F_COPYIN|BC_F_COPYOUT },
525         { bridge_ioctl_sifflags,        sizeof(struct ifbreq),
526           BC_F_COPYIN|BC_F_SUSER },
527
528         { bridge_ioctl_scache,          sizeof(struct ifbrparam),
529           BC_F_COPYIN|BC_F_SUSER },
530         { bridge_ioctl_gcache,          sizeof(struct ifbrparam),
531           BC_F_COPYOUT },
532
533         { bridge_ioctl_gifs,            sizeof(struct ifbifconf),
534           BC_F_COPYIN|BC_F_COPYOUT },
535         { bridge_ioctl_rts,             sizeof(struct ifbaconf),
536           BC_F_COPYIN|BC_F_COPYOUT },
537
538         { bridge_ioctl_saddr,           sizeof(struct ifbareq),
539           BC_F_COPYIN|BC_F_SUSER },
540
541         { bridge_ioctl_sto,             sizeof(struct ifbrparam),
542           BC_F_COPYIN|BC_F_SUSER },
543         { bridge_ioctl_gto,             sizeof(struct ifbrparam),
544           BC_F_COPYOUT },
545
546         { bridge_ioctl_daddr,           sizeof(struct ifbareq),
547           BC_F_COPYIN|BC_F_SUSER },
548
549         { bridge_ioctl_flush,           sizeof(struct ifbreq),
550           BC_F_COPYIN|BC_F_SUSER },
551
552         { bridge_ioctl_gpri,            sizeof(struct ifbrparam),
553           BC_F_COPYOUT },
554         { bridge_ioctl_spri,            sizeof(struct ifbrparam),
555           BC_F_COPYIN|BC_F_SUSER },
556
557         { bridge_ioctl_ght,             sizeof(struct ifbrparam),
558           BC_F_COPYOUT },
559         { bridge_ioctl_sht,             sizeof(struct ifbrparam),
560           BC_F_COPYIN|BC_F_SUSER },
561
562         { bridge_ioctl_gfd,             sizeof(struct ifbrparam),
563           BC_F_COPYOUT },
564         { bridge_ioctl_sfd,             sizeof(struct ifbrparam),
565           BC_F_COPYIN|BC_F_SUSER },
566
567         { bridge_ioctl_gma,             sizeof(struct ifbrparam),
568           BC_F_COPYOUT },
569         { bridge_ioctl_sma,             sizeof(struct ifbrparam),
570           BC_F_COPYIN|BC_F_SUSER },
571
572         { bridge_ioctl_sifprio,         sizeof(struct ifbreq),
573           BC_F_COPYIN|BC_F_SUSER },
574
575         { bridge_ioctl_sifcost,         sizeof(struct ifbreq),
576           BC_F_COPYIN|BC_F_SUSER },
577
578         { bridge_ioctl_addspan,         sizeof(struct ifbreq),
579           BC_F_COPYIN|BC_F_SUSER },
580         { bridge_ioctl_delspan,         sizeof(struct ifbreq),
581           BC_F_COPYIN|BC_F_SUSER },
582
583         { bridge_ioctl_sifbondwght,     sizeof(struct ifbreq),
584           BC_F_COPYIN|BC_F_SUSER },
585
586 };
587 static const int bridge_control_table_size = NELEM(bridge_control_table);
588
589 LIST_HEAD(, bridge_softc) bridge_list;
590
591 struct if_clone bridge_cloner = IF_CLONE_INITIALIZER("bridge",
592                                 bridge_clone_create,
593                                 bridge_clone_destroy, 0, IF_MAXUNIT);
594
595 static int
596 bridge_modevent(module_t mod, int type, void *data)
597 {
598         switch (type) {
599         case MOD_LOAD:
600                 LIST_INIT(&bridge_list);
601                 if_clone_attach(&bridge_cloner);
602                 bridge_input_p = bridge_input;
603                 bridge_output_p = bridge_output;
604                 bridge_interface_p = bridge_interface;
605                 bridge_detach_cookie = EVENTHANDLER_REGISTER(
606                     ifnet_detach_event, bridge_ifdetach, NULL,
607                     EVENTHANDLER_PRI_ANY);
608 #if 0 /* notyet */
609                 bstp_linkstate_p = bstp_linkstate;
610 #endif
611                 break;
612         case MOD_UNLOAD:
613                 if (!LIST_EMPTY(&bridge_list))
614                         return (EBUSY);
615                 EVENTHANDLER_DEREGISTER(ifnet_detach_event,
616                     bridge_detach_cookie);
617                 if_clone_detach(&bridge_cloner);
618                 bridge_input_p = NULL;
619                 bridge_output_p = NULL;
620                 bridge_interface_p = NULL;
621 #if 0 /* notyet */
622                 bstp_linkstate_p = NULL;
623 #endif
624                 break;
625         default:
626                 return (EOPNOTSUPP);
627         }
628         return (0);
629 }
630
631 static moduledata_t bridge_mod = {
632         "if_bridge",
633         bridge_modevent,
634         0
635 };
636
637 DECLARE_MODULE(if_bridge, bridge_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
638
639
640 /*
641  * bridge_clone_create:
642  *
643  *      Create a new bridge instance.
644  */
645 static int
646 bridge_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused)
647 {
648         struct bridge_softc *sc;
649         struct ifnet *ifp;
650         u_char eaddr[6];
651         int cpu, rnd;
652
653         sc = kmalloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
654         ifp = sc->sc_ifp = &sc->sc_if;
655
656         sc->sc_brtmax = BRIDGE_RTABLE_MAX;
657         sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
658         sc->sc_bridge_max_age = BSTP_DEFAULT_MAX_AGE;
659         sc->sc_bridge_hello_time = BSTP_DEFAULT_HELLO_TIME;
660         sc->sc_bridge_forward_delay = BSTP_DEFAULT_FORWARD_DELAY;
661         sc->sc_bridge_priority = BSTP_DEFAULT_BRIDGE_PRIORITY;
662         sc->sc_hold_time = BSTP_DEFAULT_HOLD_TIME;
663
664         /* Initialize our routing table. */
665         bridge_rtable_init(sc);
666
667         callout_init(&sc->sc_brcallout);
668         netmsg_init(&sc->sc_brtimemsg, NULL, &netisr_adone_rport,
669                     MSGF_DROPABLE, bridge_timer_handler);
670         sc->sc_brtimemsg.lmsg.u.ms_resultp = sc;
671
672         callout_init(&sc->sc_bstpcallout);
673         netmsg_init(&sc->sc_bstptimemsg, NULL, &netisr_adone_rport,
674                     MSGF_DROPABLE, bstp_tick_handler);
675         sc->sc_bstptimemsg.lmsg.u.ms_resultp = sc;
676
677         /* Initialize per-cpu member iface lists */
678         sc->sc_iflists = kmalloc(sizeof(*sc->sc_iflists) * ncpus,
679                                  M_DEVBUF, M_WAITOK);
680         for (cpu = 0; cpu < ncpus; ++cpu)
681                 TAILQ_INIT(&sc->sc_iflists[cpu]);
682
683         TAILQ_INIT(&sc->sc_spanlist);
684
685         ifp->if_softc = sc;
686         if_initname(ifp, ifc->ifc_name, unit);
687         ifp->if_mtu = ETHERMTU;
688         ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST;
689         ifp->if_ioctl = bridge_ioctl;
690         ifp->if_start = bridge_start;
691         ifp->if_init = bridge_init;
692         ifp->if_type = IFT_ETHER;
693         ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
694         ifq_set_ready(&ifp->if_snd);
695         ifp->if_hdrlen = ETHER_HDR_LEN;
696
697         /*
698          * Generate a random ethernet address and use the private AC:DE:48
699          * OUI code.
700          */
701         rnd = karc4random();
702         bcopy(&rnd, &eaddr[0], 4); /* ETHER_ADDR_LEN == 6 */
703         rnd = karc4random();
704         bcopy(&rnd, &eaddr[2], 4); /* ETHER_ADDR_LEN == 6 */
705
706         eaddr[0] &= ~1; /* clear multicast bit */
707         eaddr[0] |= 2;  /* set the LAA bit */
708
709         ether_ifattach(ifp, eaddr, NULL);
710         /* Now undo some of the damage... */
711         ifp->if_baudrate = 0;
712         /*ifp->if_type = IFT_BRIDGE;*/
713
714         crit_enter();   /* XXX MP */
715         LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
716         crit_exit();
717
718         return (0);
719 }
720
721 static void
722 bridge_delete_dispatch(netmsg_t msg)
723 {
724         struct bridge_softc *sc = msg->lmsg.u.ms_resultp;
725         struct ifnet *bifp = sc->sc_ifp;
726         struct bridge_iflist *bif;
727
728         ifnet_serialize_all(bifp);
729
730         while ((bif = TAILQ_FIRST(&sc->sc_iflists[mycpuid])) != NULL)
731                 bridge_delete_member(sc, bif, 0);
732
733         while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL)
734                 bridge_delete_span(sc, bif);
735
736         ifnet_deserialize_all(bifp);
737
738         lwkt_replymsg(&msg->lmsg, 0);
739 }
740
741 /*
742  * bridge_clone_destroy:
743  *
744  *      Destroy a bridge instance.
745  */
746 static int
747 bridge_clone_destroy(struct ifnet *ifp)
748 {
749         struct bridge_softc *sc = ifp->if_softc;
750         struct netmsg_base msg;
751
752         ifnet_serialize_all(ifp);
753
754         bridge_stop(ifp);
755         ifp->if_flags &= ~IFF_UP;
756
757         ifnet_deserialize_all(ifp);
758
759         netmsg_init(&msg, NULL, &curthread->td_msgport,
760                     0, bridge_delete_dispatch);
761         msg.lmsg.u.ms_resultp = sc;
762         lwkt_domsg(BRIDGE_CFGPORT, &msg.lmsg, 0);
763
764         crit_enter();   /* XXX MP */
765         LIST_REMOVE(sc, sc_list);
766         crit_exit();
767
768         ether_ifdetach(ifp);
769
770         /* Tear down the routing table. */
771         bridge_rtable_fini(sc);
772
773         /* Free per-cpu member iface lists */
774         kfree(sc->sc_iflists, M_DEVBUF);
775
776         kfree(sc, M_DEVBUF);
777
778         return 0;
779 }
780
781 /*
782  * bridge_ioctl:
783  *
784  *      Handle a control request from the operator.
785  */
786 static int
787 bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
788 {
789         struct bridge_softc *sc = ifp->if_softc;
790         struct bridge_control_arg args;
791         struct ifdrv *ifd = (struct ifdrv *) data;
792         const struct bridge_control *bc;
793         int error = 0;
794
795         ASSERT_IFNET_SERIALIZED_ALL(ifp);
796
797         switch (cmd) {
798         case SIOCADDMULTI:
799         case SIOCDELMULTI:
800                 break;
801
802         case SIOCGDRVSPEC:
803         case SIOCSDRVSPEC:
804                 if (ifd->ifd_cmd >= bridge_control_table_size) {
805                         error = EINVAL;
806                         break;
807                 }
808                 bc = &bridge_control_table[ifd->ifd_cmd];
809
810                 if (cmd == SIOCGDRVSPEC &&
811                     (bc->bc_flags & BC_F_COPYOUT) == 0) {
812                         error = EINVAL;
813                         break;
814                 } else if (cmd == SIOCSDRVSPEC &&
815                            (bc->bc_flags & BC_F_COPYOUT)) {
816                         error = EINVAL;
817                         break;
818                 }
819
820                 if (bc->bc_flags & BC_F_SUSER) {
821                         error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY);
822                         if (error)
823                                 break;
824                 }
825
826                 if (ifd->ifd_len != bc->bc_argsize ||
827                     ifd->ifd_len > sizeof(args.bca_u)) {
828                         error = EINVAL;
829                         break;
830                 }
831
832                 memset(&args, 0, sizeof(args));
833                 if (bc->bc_flags & BC_F_COPYIN) {
834                         error = copyin(ifd->ifd_data, &args.bca_u,
835                                        ifd->ifd_len);
836                         if (error)
837                                 break;
838                 }
839
840                 error = bridge_control(sc, cmd, bc->bc_func, &args);
841                 if (error) {
842                         KKASSERT(args.bca_len == 0 && args.bca_kptr == NULL);
843                         break;
844                 }
845
846                 if (bc->bc_flags & BC_F_COPYOUT) {
847                         error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
848                         if (args.bca_len != 0) {
849                                 KKASSERT(args.bca_kptr != NULL);
850                                 if (!error) {
851                                         error = copyout(args.bca_kptr,
852                                                 args.bca_uptr, args.bca_len);
853                                 }
854                                 kfree(args.bca_kptr, M_TEMP);
855                         } else {
856                                 KKASSERT(args.bca_kptr == NULL);
857                         }
858                 } else {
859                         KKASSERT(args.bca_len == 0 && args.bca_kptr == NULL);
860                 }
861                 break;
862
863         case SIOCSIFFLAGS:
864                 if (!(ifp->if_flags & IFF_UP) &&
865                     (ifp->if_flags & IFF_RUNNING)) {
866                         /*
867                          * If interface is marked down and it is running,
868                          * then stop it.
869                          */
870                         bridge_stop(ifp);
871                 } else if ((ifp->if_flags & IFF_UP) &&
872                     !(ifp->if_flags & IFF_RUNNING)) {
873                         /*
874                          * If interface is marked up and it is stopped, then
875                          * start it.
876                          */
877                         ifp->if_init(sc);
878                 }
879
880                 /*
881                  * If running and link flag state change we have to
882                  * reinitialize as well.
883                  */
884                 if ((ifp->if_flags & IFF_RUNNING) &&
885                     (ifp->if_flags & (IFF_LINK0|IFF_LINK1|IFF_LINK2)) !=
886                     sc->sc_copy_flags) {
887                         sc->sc_copy_flags = ifp->if_flags &
888                                         (IFF_LINK0|IFF_LINK1|IFF_LINK2);
889                         bridge_control(sc, 0, bridge_ioctl_reinit, NULL);
890                 }
891
892                 break;
893
894         case SIOCSIFMTU:
895                 /* Do not allow the MTU to be changed on the bridge */
896                 error = EINVAL;
897                 break;
898
899         default:
900                 error = ether_ioctl(ifp, cmd, data);
901                 break;
902         }
903         return (error);
904 }
905
906 /*
907  * bridge_mutecaps:
908  *
909  *      Clear or restore unwanted capabilities on the member interface
910  */
911 static void
912 bridge_mutecaps(struct bridge_ifinfo *bif_info, struct ifnet *ifp, int mute)
913 {
914         struct ifreq ifr;
915         int error;
916
917         if (ifp->if_ioctl == NULL)
918                 return;
919
920         bzero(&ifr, sizeof(ifr));
921         ifr.ifr_reqcap = ifp->if_capenable;
922
923         if (mute) {
924                 /* mask off and save capabilities */
925                 bif_info->bifi_mutecap = ifr.ifr_reqcap & BRIDGE_IFCAPS_MASK;
926                 if (bif_info->bifi_mutecap != 0)
927                         ifr.ifr_reqcap &= ~BRIDGE_IFCAPS_MASK;
928         } else {
929                 /* restore muted capabilities */
930                 ifr.ifr_reqcap |= bif_info->bifi_mutecap;
931         }
932
933         if (bif_info->bifi_mutecap != 0) {
934                 ifnet_serialize_all(ifp);
935                 error = ifp->if_ioctl(ifp, SIOCSIFCAP, (caddr_t)&ifr, NULL);
936                 ifnet_deserialize_all(ifp);
937         }
938 }
939
940 /*
941  * bridge_lookup_member:
942  *
943  *      Lookup a bridge member interface.
944  */
945 static struct bridge_iflist *
946 bridge_lookup_member(struct bridge_softc *sc, const char *name)
947 {
948         struct bridge_iflist *bif;
949
950         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
951                 if (strcmp(bif->bif_ifp->if_xname, name) == 0)
952                         return (bif);
953         }
954         return (NULL);
955 }
956
957 /*
958  * bridge_lookup_member_if:
959  *
960  *      Lookup a bridge member interface by ifnet*.
961  */
962 static struct bridge_iflist *
963 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
964 {
965         struct bridge_iflist *bif;
966
967         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
968                 if (bif->bif_ifp == member_ifp)
969                         return (bif);
970         }
971         return (NULL);
972 }
973
974 /*
975  * bridge_lookup_member_ifinfo:
976  *
977  *      Lookup a bridge member interface by bridge_ifinfo.
978  */
979 static struct bridge_iflist *
980 bridge_lookup_member_ifinfo(struct bridge_softc *sc,
981                             struct bridge_ifinfo *bif_info)
982 {
983         struct bridge_iflist *bif;
984
985         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
986                 if (bif->bif_info == bif_info)
987                         return (bif);
988         }
989         return (NULL);
990 }
991
992 /*
993  * bridge_delete_member:
994  *
995  *      Delete the specified member interface.
996  */
997 static void
998 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
999     int gone)
1000 {
1001         struct ifnet *ifs = bif->bif_ifp;
1002         struct ifnet *bifp = sc->sc_ifp;
1003         struct bridge_ifinfo *bif_info = bif->bif_info;
1004         struct bridge_iflist_head saved_bifs;
1005
1006         ASSERT_IFNET_SERIALIZED_ALL(bifp);
1007         KKASSERT(bif_info != NULL);
1008
1009         ifs->if_bridge = NULL;
1010
1011         /*
1012          * Release bridge interface's serializer:
1013          * - To avoid possible dead lock.
1014          * - Various sync operation will block the current thread.
1015          */
1016         ifnet_deserialize_all(bifp);
1017
1018         if (!gone) {
1019                 switch (ifs->if_type) {
1020                 case IFT_ETHER:
1021                 case IFT_L2VLAN:
1022                         /*
1023                          * Take the interface out of promiscuous mode.
1024                          */
1025                         ifpromisc(ifs, 0);
1026                         bridge_mutecaps(bif_info, ifs, 0);
1027                         break;
1028
1029                 case IFT_GIF:
1030                         break;
1031
1032                 default:
1033                         panic("bridge_delete_member: impossible");
1034                         break;
1035                 }
1036         }
1037
1038         /*
1039          * Remove bifs from percpu linked list.
1040          *
1041          * Removed bifs are not freed immediately, instead,
1042          * they are saved in saved_bifs.  They will be freed
1043          * after we make sure that no one is accessing them,
1044          * i.e. after following netmsg_service_sync()
1045          */
1046         TAILQ_INIT(&saved_bifs);
1047         bridge_del_bif(sc, bif_info, &saved_bifs);
1048
1049         /*
1050          * Make sure that all protocol threads:
1051          * o  see 'ifs' if_bridge is changed
1052          * o  know that bif is removed from the percpu linked list
1053          */
1054         netmsg_service_sync();
1055
1056         /*
1057          * Free the removed bifs
1058          */
1059         KKASSERT(!TAILQ_EMPTY(&saved_bifs));
1060         while ((bif = TAILQ_FIRST(&saved_bifs)) != NULL) {
1061                 TAILQ_REMOVE(&saved_bifs, bif, bif_next);
1062                 kfree(bif, M_DEVBUF);
1063         }
1064
1065         /* See the comment in bridge_ioctl_stop() */
1066         bridge_rtmsg_sync(sc);
1067         bridge_rtdelete(sc, ifs, IFBF_FLUSHALL | IFBF_FLUSHSYNC);
1068
1069         ifnet_serialize_all(bifp);
1070
1071         if (bifp->if_flags & IFF_RUNNING)
1072                 bstp_initialization(sc);
1073
1074         /*
1075          * Free the bif_info after bstp_initialization(), so that
1076          * bridge_softc.sc_root_port will not reference a dangling
1077          * pointer.
1078          */
1079         kfree(bif_info, M_DEVBUF);
1080 }
1081
1082 /*
1083  * bridge_delete_span:
1084  *
1085  *      Delete the specified span interface.
1086  */
1087 static void
1088 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
1089 {
1090         KASSERT(bif->bif_ifp->if_bridge == NULL,
1091             ("%s: not a span interface", __func__));
1092
1093         TAILQ_REMOVE(&sc->sc_iflists[mycpuid], bif, bif_next);
1094         kfree(bif, M_DEVBUF);
1095 }
1096
1097 static int
1098 bridge_ioctl_init(struct bridge_softc *sc, void *arg __unused)
1099 {
1100         struct ifnet *ifp = sc->sc_ifp;
1101
1102         if (ifp->if_flags & IFF_RUNNING)
1103                 return 0;
1104
1105         callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz,
1106             bridge_timer, sc);
1107
1108         ifp->if_flags |= IFF_RUNNING;
1109         bstp_initialization(sc);
1110         return 0;
1111 }
1112
1113 static int
1114 bridge_ioctl_stop(struct bridge_softc *sc, void *arg __unused)
1115 {
1116         struct ifnet *ifp = sc->sc_ifp;
1117         struct lwkt_msg *lmsg;
1118
1119         if ((ifp->if_flags & IFF_RUNNING) == 0)
1120                 return 0;
1121
1122         callout_stop(&sc->sc_brcallout);
1123
1124         crit_enter();
1125         lmsg = &sc->sc_brtimemsg.lmsg;
1126         if ((lmsg->ms_flags & MSGF_DONE) == 0) {
1127                 /* Pending to be processed; drop it */
1128                 lwkt_dropmsg(lmsg);
1129         }
1130         crit_exit();
1131
1132         bstp_stop(sc);
1133
1134         ifp->if_flags &= ~IFF_RUNNING;
1135
1136         ifnet_deserialize_all(ifp);
1137
1138         /* Let everyone know that we are stopped */
1139         netmsg_service_sync();
1140
1141         /*
1142          * Sync ifnetX msgports in the order we forward rtnode
1143          * installation message.  This is used to make sure that
1144          * all rtnode installation messages sent by bridge_rtupdate()
1145          * during above netmsg_service_sync() are flushed.
1146          */
1147         bridge_rtmsg_sync(sc);
1148         bridge_rtflush(sc, IFBF_FLUSHDYN | IFBF_FLUSHSYNC);
1149
1150         ifnet_serialize_all(ifp);
1151         return 0;
1152 }
1153
1154 static int
1155 bridge_ioctl_add(struct bridge_softc *sc, void *arg)
1156 {
1157         struct ifbreq *req = arg;
1158         struct bridge_iflist *bif;
1159         struct bridge_ifinfo *bif_info;
1160         struct ifnet *ifs, *bifp;
1161         int error = 0;
1162
1163         bifp = sc->sc_ifp;
1164         ASSERT_IFNET_SERIALIZED_ALL(bifp);
1165
1166         ifs = ifunit(req->ifbr_ifsname);
1167         if (ifs == NULL)
1168                 return (ENOENT);
1169
1170         /* If it's in the span list, it can't be a member. */
1171         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1172                 if (ifs == bif->bif_ifp)
1173                         return (EBUSY);
1174
1175         /* Allow the first Ethernet member to define the MTU */
1176         if (ifs->if_type != IFT_GIF) {
1177                 if (TAILQ_EMPTY(&sc->sc_iflists[mycpuid])) {
1178                         bifp->if_mtu = ifs->if_mtu;
1179                 } else if (bifp->if_mtu != ifs->if_mtu) {
1180                         if_printf(bifp, "invalid MTU for %s\n", ifs->if_xname);
1181                         return (EINVAL);
1182                 }
1183         }
1184
1185         if (ifs->if_bridge == sc)
1186                 return (EEXIST);
1187
1188         if (ifs->if_bridge != NULL)
1189                 return (EBUSY);
1190
1191         bif_info = kmalloc(sizeof(*bif_info), M_DEVBUF, M_WAITOK | M_ZERO);
1192         bif_info->bifi_priority = BSTP_DEFAULT_PORT_PRIORITY;
1193         bif_info->bifi_path_cost = BSTP_DEFAULT_PATH_COST;
1194         bif_info->bifi_ifp = ifs;
1195         bif_info->bifi_bond_weight = 1;
1196
1197         /*
1198          * Release bridge interface's serializer:
1199          * - To avoid possible dead lock.
1200          * - Various sync operation will block the current thread.
1201          */
1202         ifnet_deserialize_all(bifp);
1203
1204         switch (ifs->if_type) {
1205         case IFT_ETHER:
1206         case IFT_L2VLAN:
1207                 /*
1208                  * Place the interface into promiscuous mode.
1209                  */
1210                 error = ifpromisc(ifs, 1);
1211                 if (error) {
1212                         ifnet_serialize_all(bifp);
1213                         goto out;
1214                 }
1215                 bridge_mutecaps(bif_info, ifs, 1);
1216                 break;
1217
1218         case IFT_GIF: /* :^) */
1219                 break;
1220
1221         default:
1222                 error = EINVAL;
1223                 ifnet_serialize_all(bifp);
1224                 goto out;
1225         }
1226
1227         /*
1228          * Add bifs to percpu linked lists
1229          */
1230         bridge_add_bif(sc, bif_info, ifs);
1231
1232         ifnet_serialize_all(bifp);
1233
1234         if (bifp->if_flags & IFF_RUNNING)
1235                 bstp_initialization(sc);
1236         else
1237                 bstp_stop(sc);
1238
1239         /*
1240          * Everything has been setup, so let the member interface
1241          * deliver packets to this bridge on its input/output path.
1242          */
1243         ifs->if_bridge = sc;
1244 out:
1245         if (error) {
1246                 if (bif_info != NULL)
1247                         kfree(bif_info, M_DEVBUF);
1248         }
1249         return (error);
1250 }
1251
1252 static int
1253 bridge_ioctl_del(struct bridge_softc *sc, void *arg)
1254 {
1255         struct ifbreq *req = arg;
1256         struct bridge_iflist *bif;
1257
1258         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1259         if (bif == NULL)
1260                 return (ENOENT);
1261
1262         bridge_delete_member(sc, bif, 0);
1263
1264         return (0);
1265 }
1266
1267 static int
1268 bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
1269 {
1270         struct ifbreq *req = arg;
1271         struct bridge_iflist *bif;
1272
1273         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1274         if (bif == NULL)
1275                 return (ENOENT);
1276         bridge_ioctl_fillflags(sc, bif, req);
1277         return (0);
1278 }
1279
1280 static void
1281 bridge_ioctl_fillflags(struct bridge_softc *sc, struct bridge_iflist *bif,
1282                        struct ifbreq *req)
1283 {
1284         req->ifbr_ifsflags = bif->bif_flags;
1285         req->ifbr_state = bif->bif_state;
1286         req->ifbr_priority = bif->bif_priority;
1287         req->ifbr_path_cost = bif->bif_path_cost;
1288         req->ifbr_bond_weight = bif->bif_bond_weight;
1289         req->ifbr_portno = bif->bif_ifp->if_index & 0xff;
1290         if (bif->bif_flags & IFBIF_STP) {
1291                 req->ifbr_peer_root = bif->bif_peer_root;
1292                 req->ifbr_peer_bridge = bif->bif_peer_bridge;
1293                 req->ifbr_peer_cost = bif->bif_peer_cost;
1294                 req->ifbr_peer_port = bif->bif_peer_port;
1295                 if (bstp_supersedes_port_info(sc, bif)) {
1296                         req->ifbr_designated_root = bif->bif_peer_root;
1297                         req->ifbr_designated_bridge = bif->bif_peer_bridge;
1298                         req->ifbr_designated_cost = bif->bif_peer_cost;
1299                         req->ifbr_designated_port = bif->bif_peer_port;
1300                 } else {
1301                         req->ifbr_designated_root = sc->sc_bridge_id;
1302                         req->ifbr_designated_bridge = sc->sc_bridge_id;
1303                         req->ifbr_designated_cost = bif->bif_path_cost +
1304                                                     bif->bif_peer_cost;
1305                         req->ifbr_designated_port = bif->bif_port_id;
1306                 }
1307         } else {
1308                 req->ifbr_peer_root = 0;
1309                 req->ifbr_peer_bridge = 0;
1310                 req->ifbr_peer_cost = 0;
1311                 req->ifbr_peer_port = 0;
1312                 req->ifbr_designated_root = 0;
1313                 req->ifbr_designated_bridge = 0;
1314                 req->ifbr_designated_cost = 0;
1315                 req->ifbr_designated_port = 0;
1316         }
1317 }
1318
1319 static int
1320 bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
1321 {
1322         struct ifbreq *req = arg;
1323         struct bridge_iflist *bif;
1324         struct ifnet *bifp = sc->sc_ifp;
1325
1326         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1327         if (bif == NULL)
1328                 return (ENOENT);
1329
1330         if (req->ifbr_ifsflags & IFBIF_SPAN) {
1331                 /* SPAN is readonly */
1332                 return (EINVAL);
1333         }
1334
1335         if (req->ifbr_ifsflags & IFBIF_STP) {
1336                 switch (bif->bif_ifp->if_type) {
1337                 case IFT_ETHER:
1338                         /* These can do spanning tree. */
1339                         break;
1340
1341                 default:
1342                         /* Nothing else can. */
1343                         return (EINVAL);
1344                 }
1345         }
1346
1347         bif->bif_flags = (bif->bif_flags & IFBIF_KEEPMASK) |
1348                          (req->ifbr_ifsflags & ~IFBIF_KEEPMASK);
1349         if (bifp->if_flags & IFF_RUNNING)
1350                 bstp_initialization(sc);
1351
1352         return (0);
1353 }
1354
1355 static int
1356 bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
1357 {
1358         struct ifbrparam *param = arg;
1359         struct ifnet *ifp = sc->sc_ifp;
1360
1361         sc->sc_brtmax = param->ifbrp_csize;
1362
1363         ifnet_deserialize_all(ifp);
1364         bridge_rttrim(sc);
1365         ifnet_serialize_all(ifp);
1366
1367         return (0);
1368 }
1369
1370 static int
1371 bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
1372 {
1373         struct ifbrparam *param = arg;
1374
1375         param->ifbrp_csize = sc->sc_brtmax;
1376
1377         return (0);
1378 }
1379
1380 static int
1381 bridge_ioctl_gifs(struct bridge_softc *sc, void *arg)
1382 {
1383         struct bridge_control_arg *bc_arg = arg;
1384         struct ifbifconf *bifc = arg;
1385         struct bridge_iflist *bif;
1386         struct ifbreq *breq;
1387         int count, len;
1388
1389         count = 0;
1390         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next)
1391                 count++;
1392         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1393                 count++;
1394
1395         if (bifc->ifbic_len == 0) {
1396                 bifc->ifbic_len = sizeof(*breq) * count;
1397                 return 0;
1398         } else if (count == 0 || bifc->ifbic_len < sizeof(*breq)) {
1399                 bifc->ifbic_len = 0;
1400                 return 0;
1401         }
1402
1403         len = min(bifc->ifbic_len, sizeof(*breq) * count);
1404         KKASSERT(len >= sizeof(*breq));
1405
1406         breq = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO);
1407         if (breq == NULL) {
1408                 bifc->ifbic_len = 0;
1409                 return ENOMEM;
1410         }
1411         bc_arg->bca_kptr = breq;
1412
1413         count = 0;
1414         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
1415                 if (len < sizeof(*breq))
1416                         break;
1417
1418                 strlcpy(breq->ifbr_ifsname, bif->bif_ifp->if_xname,
1419                         sizeof(breq->ifbr_ifsname));
1420                 bridge_ioctl_fillflags(sc, bif, breq);
1421                 breq++;
1422                 count++;
1423                 len -= sizeof(*breq);
1424         }
1425         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
1426                 if (len < sizeof(*breq))
1427                         break;
1428
1429                 strlcpy(breq->ifbr_ifsname, bif->bif_ifp->if_xname,
1430                         sizeof(breq->ifbr_ifsname));
1431                 breq->ifbr_ifsflags = bif->bif_flags;
1432                 breq->ifbr_portno = bif->bif_ifp->if_index & 0xff;
1433                 breq++;
1434                 count++;
1435                 len -= sizeof(*breq);
1436         }
1437
1438         bifc->ifbic_len = sizeof(*breq) * count;
1439         KKASSERT(bifc->ifbic_len > 0);
1440
1441         bc_arg->bca_len = bifc->ifbic_len;
1442         bc_arg->bca_uptr = bifc->ifbic_req;
1443         return 0;
1444 }
1445
1446 static int
1447 bridge_ioctl_rts(struct bridge_softc *sc, void *arg)
1448 {
1449         struct bridge_control_arg *bc_arg = arg;
1450         struct ifbaconf *bac = arg;
1451         struct bridge_rtnode *brt;
1452         struct ifbareq *bareq;
1453         int count, len;
1454
1455         count = 0;
1456         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list)
1457                 count++;
1458
1459         if (bac->ifbac_len == 0) {
1460                 bac->ifbac_len = sizeof(*bareq) * count;
1461                 return 0;
1462         } else if (count == 0 || bac->ifbac_len < sizeof(*bareq)) {
1463                 bac->ifbac_len = 0;
1464                 return 0;
1465         }
1466
1467         len = min(bac->ifbac_len, sizeof(*bareq) * count);
1468         KKASSERT(len >= sizeof(*bareq));
1469
1470         bareq = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO);
1471         if (bareq == NULL) {
1472                 bac->ifbac_len = 0;
1473                 return ENOMEM;
1474         }
1475         bc_arg->bca_kptr = bareq;
1476
1477         count = 0;
1478         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
1479                 struct bridge_rtinfo *bri = brt->brt_info;
1480                 unsigned long expire;
1481
1482                 if (len < sizeof(*bareq))
1483                         break;
1484
1485                 strlcpy(bareq->ifba_ifsname, bri->bri_ifp->if_xname,
1486                         sizeof(bareq->ifba_ifsname));
1487                 memcpy(bareq->ifba_dst, brt->brt_addr, sizeof(brt->brt_addr));
1488                 expire = bri->bri_expire;
1489                 if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
1490                     time_second < expire)
1491                         bareq->ifba_expire = expire - time_second;
1492                 else
1493                         bareq->ifba_expire = 0;
1494                 bareq->ifba_flags = bri->bri_flags;
1495                 bareq++;
1496                 count++;
1497                 len -= sizeof(*bareq);
1498         }
1499
1500         bac->ifbac_len = sizeof(*bareq) * count;
1501         KKASSERT(bac->ifbac_len > 0);
1502
1503         bc_arg->bca_len = bac->ifbac_len;
1504         bc_arg->bca_uptr = bac->ifbac_req;
1505         return 0;
1506 }
1507
1508 static int
1509 bridge_ioctl_saddr(struct bridge_softc *sc, void *arg)
1510 {
1511         struct ifbareq *req = arg;
1512         struct bridge_iflist *bif;
1513         struct ifnet *ifp = sc->sc_ifp;
1514         int error;
1515
1516         ASSERT_IFNET_SERIALIZED_ALL(ifp);
1517
1518         bif = bridge_lookup_member(sc, req->ifba_ifsname);
1519         if (bif == NULL)
1520                 return (ENOENT);
1521
1522         ifnet_deserialize_all(ifp);
1523         error = bridge_rtsaddr(sc, req->ifba_dst, bif->bif_ifp,
1524                                req->ifba_flags);
1525         ifnet_serialize_all(ifp);
1526         return (error);
1527 }
1528
1529 static int
1530 bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
1531 {
1532         struct ifbrparam *param = arg;
1533
1534         sc->sc_brttimeout = param->ifbrp_ctime;
1535
1536         return (0);
1537 }
1538
1539 static int
1540 bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
1541 {
1542         struct ifbrparam *param = arg;
1543
1544         param->ifbrp_ctime = sc->sc_brttimeout;
1545
1546         return (0);
1547 }
1548
1549 static int
1550 bridge_ioctl_daddr(struct bridge_softc *sc, void *arg)
1551 {
1552         struct ifbareq *req = arg;
1553         struct ifnet *ifp = sc->sc_ifp;
1554         int error;
1555
1556         ifnet_deserialize_all(ifp);
1557         error = bridge_rtdaddr(sc, req->ifba_dst);
1558         ifnet_serialize_all(ifp);
1559         return error;
1560 }
1561
1562 static int
1563 bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
1564 {
1565         struct ifbreq *req = arg;
1566         struct ifnet *ifp = sc->sc_ifp;
1567
1568         ifnet_deserialize_all(ifp);
1569         bridge_rtflush(sc, req->ifbr_ifsflags | IFBF_FLUSHSYNC);
1570         ifnet_serialize_all(ifp);
1571
1572         return (0);
1573 }
1574
1575 static int
1576 bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
1577 {
1578         struct ifbrparam *param = arg;
1579
1580         param->ifbrp_prio = sc->sc_bridge_priority;
1581
1582         return (0);
1583 }
1584
1585 static int
1586 bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
1587 {
1588         struct ifbrparam *param = arg;
1589
1590         sc->sc_bridge_priority = param->ifbrp_prio;
1591
1592         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1593                 bstp_initialization(sc);
1594
1595         return (0);
1596 }
1597
1598 static int
1599 bridge_ioctl_reinit(struct bridge_softc *sc, void *arg __unused)
1600 {
1601         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1602                 bstp_initialization(sc);
1603         return (0);
1604 }
1605
1606 static int
1607 bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
1608 {
1609         struct ifbrparam *param = arg;
1610
1611         param->ifbrp_hellotime = sc->sc_bridge_hello_time >> 8;
1612
1613         return (0);
1614 }
1615
1616 static int
1617 bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
1618 {
1619         struct ifbrparam *param = arg;
1620
1621         if (param->ifbrp_hellotime == 0)
1622                 return (EINVAL);
1623         sc->sc_bridge_hello_time = param->ifbrp_hellotime << 8;
1624
1625         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1626                 bstp_initialization(sc);
1627
1628         return (0);
1629 }
1630
1631 static int
1632 bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
1633 {
1634         struct ifbrparam *param = arg;
1635
1636         param->ifbrp_fwddelay = sc->sc_bridge_forward_delay >> 8;
1637
1638         return (0);
1639 }
1640
1641 static int
1642 bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
1643 {
1644         struct ifbrparam *param = arg;
1645
1646         if (param->ifbrp_fwddelay == 0)
1647                 return (EINVAL);
1648         sc->sc_bridge_forward_delay = param->ifbrp_fwddelay << 8;
1649
1650         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1651                 bstp_initialization(sc);
1652
1653         return (0);
1654 }
1655
1656 static int
1657 bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
1658 {
1659         struct ifbrparam *param = arg;
1660
1661         param->ifbrp_maxage = sc->sc_bridge_max_age >> 8;
1662
1663         return (0);
1664 }
1665
1666 static int
1667 bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
1668 {
1669         struct ifbrparam *param = arg;
1670
1671         if (param->ifbrp_maxage == 0)
1672                 return (EINVAL);
1673         sc->sc_bridge_max_age = param->ifbrp_maxage << 8;
1674
1675         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1676                 bstp_initialization(sc);
1677
1678         return (0);
1679 }
1680
1681 static int
1682 bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
1683 {
1684         struct ifbreq *req = arg;
1685         struct bridge_iflist *bif;
1686
1687         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1688         if (bif == NULL)
1689                 return (ENOENT);
1690
1691         bif->bif_priority = req->ifbr_priority;
1692
1693         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1694                 bstp_initialization(sc);
1695
1696         return (0);
1697 }
1698
1699 static int
1700 bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
1701 {
1702         struct ifbreq *req = arg;
1703         struct bridge_iflist *bif;
1704
1705         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1706         if (bif == NULL)
1707                 return (ENOENT);
1708
1709         bif->bif_path_cost = req->ifbr_path_cost;
1710
1711         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1712                 bstp_initialization(sc);
1713
1714         return (0);
1715 }
1716
1717 static int
1718 bridge_ioctl_sifbondwght(struct bridge_softc *sc, void *arg)
1719 {
1720         struct ifbreq *req = arg;
1721         struct bridge_iflist *bif;
1722
1723         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1724         if (bif == NULL)
1725                 return (ENOENT);
1726
1727         bif->bif_bond_weight = req->ifbr_bond_weight;
1728
1729         /* no reinit needed */
1730
1731         return (0);
1732 }
1733
1734 static int
1735 bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
1736 {
1737         struct ifbreq *req = arg;
1738         struct bridge_iflist *bif;
1739         struct ifnet *ifs;
1740         struct bridge_ifinfo *bif_info;
1741
1742         ifs = ifunit(req->ifbr_ifsname);
1743         if (ifs == NULL)
1744                 return (ENOENT);
1745
1746         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1747                 if (ifs == bif->bif_ifp)
1748                         return (EBUSY);
1749
1750         if (ifs->if_bridge != NULL)
1751                 return (EBUSY);
1752
1753         switch (ifs->if_type) {
1754         case IFT_ETHER:
1755         case IFT_GIF:
1756         case IFT_L2VLAN:
1757                 break;
1758
1759         default:
1760                 return (EINVAL);
1761         }
1762
1763         /*
1764          * bif_info is needed for bif_flags
1765          */
1766         bif_info = kmalloc(sizeof(*bif_info), M_DEVBUF, M_WAITOK | M_ZERO);
1767         bif_info->bifi_ifp = ifs;
1768
1769         bif = kmalloc(sizeof(*bif), M_DEVBUF, M_WAITOK | M_ZERO);
1770         bif->bif_ifp = ifs;
1771         bif->bif_info = bif_info;
1772         bif->bif_flags = IFBIF_SPAN;
1773         /* NOTE: span bif does not need bridge_ifinfo */
1774
1775         TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
1776
1777         sc->sc_span = 1;
1778
1779         return (0);
1780 }
1781
1782 static int
1783 bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
1784 {
1785         struct ifbreq *req = arg;
1786         struct bridge_iflist *bif;
1787         struct ifnet *ifs;
1788
1789         ifs = ifunit(req->ifbr_ifsname);
1790         if (ifs == NULL)
1791                 return (ENOENT);
1792
1793         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1794                 if (ifs == bif->bif_ifp)
1795                         break;
1796
1797         if (bif == NULL)
1798                 return (ENOENT);
1799
1800         bridge_delete_span(sc, bif);
1801
1802         if (TAILQ_EMPTY(&sc->sc_spanlist))
1803                 sc->sc_span = 0;
1804
1805         return (0);
1806 }
1807
1808 static void
1809 bridge_ifdetach_dispatch(netmsg_t msg)
1810 {
1811         struct ifnet *ifp, *bifp;
1812         struct bridge_softc *sc;
1813         struct bridge_iflist *bif;
1814
1815         ifp = msg->lmsg.u.ms_resultp;
1816         sc = ifp->if_bridge;
1817
1818         /* Check if the interface is a bridge member */
1819         if (sc != NULL) {
1820                 bifp = sc->sc_ifp;
1821
1822                 ifnet_serialize_all(bifp);
1823
1824                 bif = bridge_lookup_member_if(sc, ifp);
1825                 if (bif != NULL) {
1826                         bridge_delete_member(sc, bif, 1);
1827                 } else {
1828                         /* XXX Why bif will be NULL? */
1829                 }
1830
1831                 ifnet_deserialize_all(bifp);
1832                 goto reply;
1833         }
1834
1835         crit_enter();   /* XXX MP */
1836
1837         /* Check if the interface is a span port */
1838         LIST_FOREACH(sc, &bridge_list, sc_list) {
1839                 bifp = sc->sc_ifp;
1840
1841                 ifnet_serialize_all(bifp);
1842
1843                 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1844                         if (ifp == bif->bif_ifp) {
1845                                 bridge_delete_span(sc, bif);
1846                                 break;
1847                         }
1848
1849                 ifnet_deserialize_all(bifp);
1850         }
1851
1852         crit_exit();
1853
1854 reply:
1855         lwkt_replymsg(&msg->lmsg, 0);
1856 }
1857
1858 /*
1859  * bridge_ifdetach:
1860  *
1861  *      Detach an interface from a bridge.  Called when a member
1862  *      interface is detaching.
1863  */
1864 static void
1865 bridge_ifdetach(void *arg __unused, struct ifnet *ifp)
1866 {
1867         struct netmsg_base msg;
1868
1869         netmsg_init(&msg, NULL, &curthread->td_msgport,
1870                     0, bridge_ifdetach_dispatch);
1871         msg.lmsg.u.ms_resultp = ifp;
1872
1873         lwkt_domsg(BRIDGE_CFGPORT, &msg.lmsg, 0);
1874 }
1875
1876 /*
1877  * bridge_init:
1878  *
1879  *      Initialize a bridge interface.
1880  */
1881 static void
1882 bridge_init(void *xsc)
1883 {
1884         bridge_control(xsc, SIOCSIFFLAGS, bridge_ioctl_init, NULL);
1885 }
1886
1887 /*
1888  * bridge_stop:
1889  *
1890  *      Stop the bridge interface.
1891  */
1892 static void
1893 bridge_stop(struct ifnet *ifp)
1894 {
1895         bridge_control(ifp->if_softc, SIOCSIFFLAGS, bridge_ioctl_stop, NULL);
1896 }
1897
1898 /*
1899  * Returns TRUE if the packet is being sent 'from us'... from our bridge
1900  * interface or from any member of our bridge interface.  This is used
1901  * later on to force the MAC to be the MAC of our bridge interface.
1902  */
1903 static int
1904 bridge_from_us(struct bridge_softc *sc, struct ether_header *eh)
1905 {
1906         struct bridge_iflist *bif;
1907
1908         if (memcmp(eh->ether_shost, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN) == 0)
1909                 return (1);
1910
1911         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
1912                 if (memcmp(eh->ether_shost, IF_LLADDR(bif->bif_ifp),
1913                            ETHER_ADDR_LEN) == 0) {
1914                         return (1);
1915                 }
1916         }
1917         return (0);
1918 }
1919
1920 /*
1921  * bridge_enqueue:
1922  *
1923  *      Enqueue a packet on a bridge member interface.
1924  *
1925  */
1926 void
1927 bridge_enqueue(struct ifnet *dst_ifp, struct mbuf *m)
1928 {
1929         struct netmsg_packet *nmp;
1930
1931         nmp = &m->m_hdr.mh_netmsg;
1932         netmsg_init(&nmp->base, NULL, &netisr_apanic_rport,
1933                     0, bridge_enqueue_handler);
1934         nmp->nm_packet = m;
1935         nmp->base.lmsg.u.ms_resultp = dst_ifp;
1936
1937         lwkt_sendmsg(ifnet_portfn(mycpu->gd_cpuid), &nmp->base.lmsg);
1938 }
1939
1940 /*
1941  * bridge_output:
1942  *
1943  *      Send output from a bridge member interface.  This
1944  *      performs the bridging function for locally originated
1945  *      packets.
1946  *
1947  *      The mbuf has the Ethernet header already attached.  We must
1948  *      enqueue or free the mbuf before returning.
1949  */
1950 static int
1951 bridge_output(struct ifnet *ifp, struct mbuf *m)
1952 {
1953         struct bridge_softc *sc = ifp->if_bridge;
1954         struct bridge_iflist *bif, *nbif;
1955         struct ether_header *eh;
1956         struct ifnet *dst_if, *alt_if, *bifp;
1957         int from_us;
1958         int priority;
1959         int alt_priority;
1960
1961         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
1962
1963         /*
1964          * Make sure that we are still a member of a bridge interface.
1965          */
1966         if (sc == NULL) {
1967                 m_freem(m);
1968                 return (0);
1969         }
1970         bifp = sc->sc_ifp;
1971
1972         /*
1973          * Acquire header
1974          */
1975         if (m->m_len < ETHER_HDR_LEN) {
1976                 m = m_pullup(m, ETHER_HDR_LEN);
1977                 if (m == NULL) {
1978                         bifp->if_oerrors++;
1979                         return (0);
1980                 }
1981         }
1982         eh = mtod(m, struct ether_header *);
1983         from_us = bridge_from_us(sc, eh);
1984
1985         /*
1986          * If bridge is down, but the original output interface is up,
1987          * go ahead and send out that interface.  Otherwise, the packet
1988          * is dropped below.
1989          */
1990         if ((bifp->if_flags & IFF_RUNNING) == 0) {
1991                 dst_if = ifp;
1992                 goto sendunicast;
1993         }
1994
1995         /*
1996          * If the packet is a multicast, or we don't know a better way to
1997          * get there, send to all interfaces.
1998          */
1999         if (ETHER_IS_MULTICAST(eh->ether_dhost))
2000                 dst_if = NULL;
2001         else
2002                 dst_if = bridge_rtlookup(sc, eh->ether_dhost);
2003
2004         if (dst_if == NULL) {
2005                 struct mbuf *mc;
2006                 int used = 0;
2007                 int found = 0;
2008
2009                 if (sc->sc_span)
2010                         bridge_span(sc, m);
2011
2012                 alt_if = NULL;
2013                 alt_priority = 0;
2014                 TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid],
2015                                      bif_next, nbif) {
2016                         dst_if = bif->bif_ifp;
2017
2018                         if ((dst_if->if_flags & IFF_RUNNING) == 0)
2019                                 continue;
2020
2021                         /*
2022                          * If this is not the original output interface,
2023                          * and the interface is participating in spanning
2024                          * tree, make sure the port is in a state that
2025                          * allows forwarding.
2026                          *
2027                          * We keep track of a possible backup IF if we are
2028                          * unable to find any interfaces to forward through.
2029                          *
2030                          * NOTE: Currently round-robining is not implemented
2031                          *       across bonded interface groups (needs an
2032                          *       algorithm to track each group somehow).
2033                          *
2034                          *       Similarly we track only one alternative
2035                          *       interface if no suitable interfaces are
2036                          *       found.
2037                          */
2038                         if (dst_if != ifp &&
2039                             (bif->bif_flags & IFBIF_STP) != 0) {
2040                                 switch (bif->bif_state) {
2041                                 case BSTP_IFSTATE_BONDED:
2042                                         if (bif->bif_priority + 512 >
2043                                             alt_priority) {
2044                                                 alt_priority =
2045                                                     bif->bif_priority + 512;
2046                                                 alt_if = bif->bif_ifp;
2047                                         }
2048                                         continue;
2049                                 case BSTP_IFSTATE_BLOCKING:
2050                                         if (bif->bif_priority + 256 >
2051                                             alt_priority) {
2052                                                 alt_priority =
2053                                                     bif->bif_priority + 256;
2054                                                 alt_if = bif->bif_ifp;
2055                                         }
2056                                         continue;
2057                                 case BSTP_IFSTATE_LEARNING:
2058                                         if (bif->bif_priority > alt_priority) {
2059                                                 alt_priority =
2060                                                     bif->bif_priority;
2061                                                 alt_if = bif->bif_ifp;
2062                                         }
2063                                         continue;
2064                                 case BSTP_IFSTATE_L1BLOCKING:
2065                                 case BSTP_IFSTATE_LISTENING:
2066                                 case BSTP_IFSTATE_DISABLED:
2067                                         continue;
2068                                 default:
2069                                         /* FORWARDING */
2070                                         break;
2071                                 }
2072                         }
2073
2074                         KKASSERT(used == 0);
2075                         if (TAILQ_NEXT(bif, bif_next) == NULL) {
2076                                 used = 1;
2077                                 mc = m;
2078                         } else {
2079                                 mc = m_copypacket(m, MB_DONTWAIT);
2080                                 if (mc == NULL) {
2081                                         bifp->if_oerrors++;
2082                                         continue;
2083                                 }
2084                         }
2085
2086                         /*
2087                          * If the packet is 'from' us override ether_shost.
2088                          */
2089                         bridge_handoff(sc, dst_if, mc, from_us);
2090                         found = 1;
2091
2092                         if (nbif != NULL && !nbif->bif_onlist) {
2093                                 KKASSERT(bif->bif_onlist);
2094                                 nbif = TAILQ_NEXT(bif, bif_next);
2095                         }
2096                 }
2097
2098                 /*
2099                  * If we couldn't find anything use the backup interface
2100                  * if we have one.
2101                  */
2102                 if (found == 0 && alt_if) {
2103                         KKASSERT(used == 0);
2104                         mc = m;
2105                         used = 1;
2106                         bridge_handoff(sc, alt_if, mc, from_us);
2107                 }
2108
2109                 if (used == 0)
2110                         m_freem(m);
2111                 return (0);
2112         }
2113
2114 sendunicast:
2115         /*
2116          * If STP is enabled on the target we are an equal opportunity
2117          * employer and do not necessarily output to dst_if.  Instead
2118          * scan available links with the same MAC as the current dst_if
2119          * and choose the best one.
2120          *
2121          * We also need to do this because arp entries tag onto a particular
2122          * interface and if it happens to be dead then the packets will
2123          * go into a bit bucket.
2124          *
2125          * If LINK2 is set the matching links are bonded and we-round robin.
2126          * (the MAC address must be the same for the participating links).
2127          * In this case links in a STP FORWARDING or BONDED state are
2128          * allowed for unicast packets.
2129          */
2130         bif = bridge_lookup_member_if(sc, dst_if);
2131         if (bif->bif_flags & IFBIF_STP) {
2132                 alt_if = NULL;
2133                 priority = 0;
2134                 alt_priority = 0;
2135
2136                 TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid],
2137                                      bif_next, nbif) {
2138                         /*
2139                          * Ignore member interfaces which aren't running.
2140                          */
2141                         if ((bif->bif_ifp->if_flags & IFF_RUNNING) == 0)
2142                                 continue;
2143
2144                         /*
2145                          * member interfaces with the same MAC (usually TAPs)
2146                          * are considered to be the same.  Select the best
2147                          * one from BONDED or FORWARDING and keep track of
2148                          * the best one in the BLOCKING state if no
2149                          * candidates are available otherwise.
2150                          */
2151                         if (memcmp(IF_LLADDR(bif->bif_ifp),
2152                                    IF_LLADDR(dst_if),
2153                                    ETHER_ADDR_LEN) != 0) {
2154                                 continue;
2155                         }
2156
2157                         switch(bif->bif_state) {
2158                         case BSTP_IFSTATE_BLOCKING:
2159                                 if (bif->bif_priority > alt_priority + 256) {
2160                                         alt_priority = bif->bif_priority + 256;
2161                                         alt_if = bif->bif_ifp;
2162                                 }
2163                                 continue;
2164                         case BSTP_IFSTATE_LEARNING:
2165                                 if (bif->bif_priority > alt_priority) {
2166                                         alt_priority = bif->bif_priority;
2167                                         alt_if = bif->bif_ifp;
2168                                 }
2169                                 continue;
2170                         case BSTP_IFSTATE_L1BLOCKING:
2171                         case BSTP_IFSTATE_LISTENING:
2172                         case BSTP_IFSTATE_DISABLED:
2173                                 continue;
2174                         default:
2175                                 /* bonded, forwarding */
2176                                 break;
2177                         }
2178
2179                         /*
2180                          * XXX we need to use the toepliz hash or
2181                          *     something like that instead of
2182                          *     round-robining.
2183                          */
2184                         if (sc->sc_ifp->if_flags & IFF_LINK2) {
2185                                 dst_if = bif->bif_ifp;
2186                                 if (++bif->bif_bond_count >=
2187                                     bif->bif_bond_weight) {
2188                                         bif->bif_bond_count = 0;
2189                                         TAILQ_REMOVE(&sc->sc_iflists[mycpuid],
2190                                                      bif, bif_next);
2191                                         TAILQ_INSERT_TAIL(
2192                                                      &sc->sc_iflists[mycpuid],
2193                                                      bif, bif_next);
2194                                 }
2195                                 priority = 1;
2196                                 break;
2197                         }
2198                         if (bif->bif_priority > priority) {
2199                                 priority = bif->bif_priority;
2200                                 dst_if = bif->bif_ifp;
2201                         }
2202                 }
2203
2204                 /*
2205                  * Interface of last resort if nothing was found.
2206                  */
2207                 if (priority == 0 && alt_if)
2208                         dst_if = alt_if;
2209         }
2210
2211         if (sc->sc_span)
2212                 bridge_span(sc, m);
2213         if ((dst_if->if_flags & IFF_RUNNING) == 0)
2214                 m_freem(m);
2215         else
2216                 bridge_handoff(sc, dst_if, m, from_us);
2217         return (0);
2218 }
2219
2220 /*
2221  * Returns the bridge interface associated with an ifc.
2222  * Pass ifp->if_bridge (must not be NULL).  Used by the ARP
2223  * code to supply the bridge for the is-at info, making
2224  * the bridge responsible for matching local addresses.
2225  *
2226  * Without this the ARP code will supply bridge member interfaces
2227  * for the is-at which makes it difficult the bridge to fail-over
2228  * interfaces (amoung other things).
2229  */
2230 static struct ifnet *
2231 bridge_interface(void *if_bridge)
2232 {
2233         struct bridge_softc *sc = if_bridge;
2234         return (sc->sc_ifp);
2235 }
2236
2237 /*
2238  * bridge_start:
2239  *
2240  *      Start output on a bridge.
2241  */
2242 static void
2243 bridge_start(struct ifnet *ifp)
2244 {
2245         struct bridge_softc *sc = ifp->if_softc;
2246
2247         ASSERT_IFNET_SERIALIZED_TX(ifp);
2248
2249         ifp->if_flags |= IFF_OACTIVE;
2250         for (;;) {
2251                 struct ifnet *dst_if = NULL;
2252                 struct ether_header *eh;
2253                 struct mbuf *m;
2254
2255                 m = ifq_dequeue(&ifp->if_snd, NULL);
2256                 if (m == NULL)
2257                         break;
2258
2259                 if (m->m_len < sizeof(*eh)) {
2260                         m = m_pullup(m, sizeof(*eh));
2261                         if (m == NULL) {
2262                                 ifp->if_oerrors++;
2263                                 continue;
2264                         }
2265                 }
2266                 eh = mtod(m, struct ether_header *);
2267
2268                 BPF_MTAP(ifp, m);
2269                 ifp->if_opackets++;
2270
2271                 if ((m->m_flags & (M_BCAST|M_MCAST)) == 0)
2272                         dst_if = bridge_rtlookup(sc, eh->ether_dhost);
2273
2274                 if (dst_if == NULL)
2275                         bridge_start_bcast(sc, m);
2276                 else
2277                         bridge_enqueue(dst_if, m);
2278         }
2279         ifp->if_flags &= ~IFF_OACTIVE;
2280 }
2281
2282 /*
2283  * bridge_forward:
2284  *
2285  *      Forward packets received on a bridge interface via the input
2286  *      path.
2287  *
2288  *      This implements the forwarding function of the bridge.
2289  */
2290 static void
2291 bridge_forward(struct bridge_softc *sc, struct mbuf *m)
2292 {
2293         struct bridge_iflist *bif, *nbif;
2294         struct ifnet *src_if, *dst_if, *alt_if, *ifp;
2295         struct ether_header *eh;
2296         int priority;
2297         int alt_priority;
2298         int from_blocking;
2299
2300         src_if = m->m_pkthdr.rcvif;
2301         ifp = sc->sc_ifp;
2302
2303         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
2304
2305         ifp->if_ipackets++;
2306         ifp->if_ibytes += m->m_pkthdr.len;
2307
2308         /*
2309          * Look up the bridge_iflist.
2310          */
2311         bif = bridge_lookup_member_if(sc, src_if);
2312         if (bif == NULL) {
2313                 /* Interface is not a bridge member (anymore?) */
2314                 m_freem(m);
2315                 return;
2316         }
2317
2318         /*
2319          * In spanning tree mode receiving a packet from an interface
2320          * in a BLOCKING state is allowed, it could be a member of last
2321          * resort from the sender's point of view, but forwarding it is
2322          * not allowed.
2323          *
2324          * The sender's spanning tree will eventually sync up and the
2325          * sender will go into a BLOCKING state too (but this still may be
2326          * an interface of last resort during state changes).
2327          */
2328         if (bif->bif_flags & IFBIF_STP) {
2329                 switch (bif->bif_state) {
2330                 case BSTP_IFSTATE_L1BLOCKING:
2331                 case BSTP_IFSTATE_LISTENING:
2332                 case BSTP_IFSTATE_DISABLED:
2333                         m_freem(m);
2334                         return;
2335                 default:
2336                         /* learning, blocking, bonded, forwarding */
2337                         break;
2338                 }
2339         }
2340         from_blocking = (bif->bif_state == BSTP_IFSTATE_BLOCKING);
2341
2342         eh = mtod(m, struct ether_header *);
2343
2344         /*
2345          * If the interface is learning, and the source
2346          * address is valid and not multicast, record
2347          * the address.
2348          */
2349         if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
2350             from_blocking == 0 &&
2351             ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
2352             (eh->ether_shost[0] == 0 &&
2353              eh->ether_shost[1] == 0 &&
2354              eh->ether_shost[2] == 0 &&
2355              eh->ether_shost[3] == 0 &&
2356              eh->ether_shost[4] == 0 &&
2357              eh->ether_shost[5] == 0) == 0) {
2358                 bridge_rtupdate(sc, eh->ether_shost, src_if, IFBAF_DYNAMIC);
2359         }
2360
2361         /*
2362          * Don't forward from an interface in the listening or learning
2363          * state.  That is, in the learning state we learn information
2364          * but we throw away the packets.
2365          *
2366          * We let through packets on interfaces in the blocking state.
2367          * The blocking state is applicable to the send side, not the
2368          * receive side.
2369          */
2370         if ((bif->bif_flags & IFBIF_STP) != 0 &&
2371             (bif->bif_state == BSTP_IFSTATE_LISTENING ||
2372              bif->bif_state == BSTP_IFSTATE_LEARNING)) {
2373                 m_freem(m);
2374                 return;
2375         }
2376
2377         /*
2378          * At this point, the port either doesn't participate
2379          * in spanning tree or it is in the forwarding state.
2380          */
2381
2382         /*
2383          * If the packet is unicast, destined for someone on
2384          * "this" side of the bridge, drop it.
2385          *
2386          * src_if implies the entire bonding set so we have to compare MAC
2387          * addresses and not just if pointers.
2388          */
2389         if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
2390                 dst_if = bridge_rtlookup(sc, eh->ether_dhost);
2391                 if (dst_if && memcmp(IF_LLADDR(src_if), IF_LLADDR(dst_if),
2392                                      ETHER_ADDR_LEN) == 0) {
2393                         m_freem(m);
2394                         return;
2395                 }
2396         } else {
2397                 /* ...forward it to all interfaces. */
2398                 ifp->if_imcasts++;
2399                 dst_if = NULL;
2400         }
2401
2402         /*
2403          * Brodcast if we do not have forwarding information.  However, if
2404          * we received the packet on a blocking interface we do not do this
2405          * (unless you really want to blow up your network).
2406          */
2407         if (dst_if == NULL) {
2408                 if (from_blocking)
2409                         m_freem(m);
2410                 else
2411                         bridge_broadcast(sc, src_if, m);
2412                 return;
2413         }
2414
2415         /*
2416          * Unicast, kinda replicates the output side of bridge_output().
2417          *
2418          * Even though this is a uni-cast packet we may have to select
2419          * an interface from a bonding set.
2420          */
2421         bif = bridge_lookup_member_if(sc, dst_if);
2422         if (bif == NULL) {
2423                 /* Not a member of the bridge (anymore?) */
2424                 m_freem(m);
2425                 return;
2426         }
2427
2428         if (bif->bif_flags & IFBIF_STP) {
2429                 alt_if = NULL;
2430                 alt_priority = 0;
2431                 priority = 0;
2432
2433                 TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid],
2434                                      bif_next, nbif) {
2435                         /*
2436                          * dst_if may imply a bonding set so we must compare
2437                          * MAC addresses.
2438                          */
2439                         if (memcmp(IF_LLADDR(bif->bif_ifp),
2440                                    IF_LLADDR(dst_if),
2441                                    ETHER_ADDR_LEN) != 0) {
2442                                 continue;
2443                         }
2444
2445                         if ((bif->bif_ifp->if_flags & IFF_RUNNING) == 0)
2446                                 continue;
2447
2448                         /*
2449                          * NOTE: We allow tranmissions through a BLOCKING
2450                          *       or LEARNING interface only as a last resort.
2451                          *       We DISALLOW both cases if the receiving
2452                          *
2453                          * NOTE: If we send a packet through a learning
2454                          *       interface the receiving end (if also in
2455                          *       LEARNING) will throw it away, so this is
2456                          *       the ultimate last resort.
2457                          */
2458                         switch(bif->bif_state) {
2459                         case BSTP_IFSTATE_BLOCKING:
2460                                 if (from_blocking == 0 &&
2461                                     bif->bif_priority + 256 > alt_priority) {
2462                                         alt_priority = bif->bif_priority + 256;
2463                                         alt_if = bif->bif_ifp;
2464                                 }
2465                                 continue;
2466                         case BSTP_IFSTATE_LEARNING:
2467                                 if (from_blocking == 0 &&
2468                                     bif->bif_priority > alt_priority) {
2469                                         alt_priority = bif->bif_priority;
2470                                         alt_if = bif->bif_ifp;
2471                                 }
2472                                 continue;
2473                         case BSTP_IFSTATE_L1BLOCKING:
2474                         case BSTP_IFSTATE_LISTENING:
2475                         case BSTP_IFSTATE_DISABLED:
2476                                 continue;
2477                         default:
2478                                 /* FORWARDING, BONDED */
2479                                 break;
2480                         }
2481
2482                         /*
2483                          * XXX we need to use the toepliz hash or
2484                          *     something like that instead of
2485                          *     round-robining.
2486                          */
2487                         if (sc->sc_ifp->if_flags & IFF_LINK2) {
2488                                 dst_if = bif->bif_ifp;
2489                                 if (++bif->bif_bond_count >=
2490                                     bif->bif_bond_weight) {
2491                                         bif->bif_bond_count = 0;
2492                                         TAILQ_REMOVE(&sc->sc_iflists[mycpuid],
2493                                                      bif, bif_next);
2494                                         TAILQ_INSERT_TAIL(
2495                                                      &sc->sc_iflists[mycpuid],
2496                                                      bif, bif_next);
2497                                 }
2498                                 priority = 1;
2499                                 break;
2500                         }
2501
2502                         /*
2503                          * Select best interface in the FORWARDING or
2504                          * BONDED set.  Well, there shouldn't be any
2505                          * in a BONDED state if LINK2 is not set (they
2506                          * will all be in a BLOCKING) state, but there
2507                          * could be a transitory condition here.
2508                          */
2509                         if (bif->bif_priority > priority) {
2510                                 priority = bif->bif_priority;
2511                                 dst_if = bif->bif_ifp;
2512                         }
2513                 }
2514
2515                 /*
2516                  * If no suitable interfaces were found but a suitable
2517                  * alternative interface was found, use the alternative
2518                  * interface.
2519                  */
2520                 if (priority == 0 && alt_if)
2521                         dst_if = alt_if;
2522         }
2523
2524         /*
2525          * At this point, we're dealing with a unicast frame
2526          * going to a different interface.
2527          */
2528         if ((dst_if->if_flags & IFF_RUNNING) == 0) {
2529                 m_freem(m);
2530                 return;
2531         }
2532
2533         if (inet_pfil_hook.ph_hashooks > 0
2534 #ifdef INET6
2535             || inet6_pfil_hook.ph_hashooks > 0
2536 #endif
2537             ) {
2538                 if (bridge_pfil(&m, ifp, src_if, PFIL_IN) != 0)
2539                         return;
2540                 if (m == NULL)
2541                         return;
2542
2543                 if (bridge_pfil(&m, ifp, dst_if, PFIL_OUT) != 0)
2544                         return;
2545                 if (m == NULL)
2546                         return;
2547         }
2548         bridge_handoff(sc, dst_if, m, 0);
2549 }
2550
2551 /*
2552  * bridge_input:
2553  *
2554  *      Receive input from a member interface.  Queue the packet for
2555  *      bridging if it is not for us.
2556  */
2557 static struct mbuf *
2558 bridge_input(struct ifnet *ifp, struct mbuf *m)
2559 {
2560         struct bridge_softc *sc = ifp->if_bridge;
2561         struct bridge_iflist *bif;
2562         struct ifnet *bifp, *new_ifp;
2563         struct ether_header *eh;
2564         struct mbuf *mc, *mc2;
2565         int from_blocking;
2566
2567         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
2568
2569         /*
2570          * Make sure that we are still a member of a bridge interface.
2571          */
2572         if (sc == NULL)
2573                 return m;
2574
2575         new_ifp = NULL;
2576         bifp = sc->sc_ifp;
2577
2578         if ((bifp->if_flags & IFF_RUNNING) == 0)
2579                 goto out;
2580
2581         /*
2582          * Implement support for bridge monitoring.  If this flag has been
2583          * set on this interface, discard the packet once we push it through
2584          * the bpf(4) machinery, but before we do, increment various counters
2585          * associated with this bridge.
2586          */
2587         if (bifp->if_flags & IFF_MONITOR) {
2588                 /* Change input interface to this bridge */
2589                 m->m_pkthdr.rcvif = bifp;
2590
2591                 BPF_MTAP(bifp, m);
2592
2593                 /* Update bridge's ifnet statistics */
2594                 bifp->if_ipackets++;
2595                 bifp->if_ibytes += m->m_pkthdr.len;
2596                 if (m->m_flags & (M_MCAST | M_BCAST))
2597                         bifp->if_imcasts++;
2598
2599                 m_freem(m);
2600                 m = NULL;
2601                 goto out;
2602         }
2603
2604         /*
2605          * Handle the ether_header
2606          *
2607          * In all cases if the packet is destined for us via our MAC
2608          * we must clear BRIDGE_MBUF_TAGGED to ensure that we don't
2609          * repeat the source MAC out the same interface.
2610          *
2611          * This first test against our bridge MAC is the fast-path.
2612          *
2613          * NOTE!  The bridge interface can serve as an endpoint for
2614          *        communication but normally there are no IPs associated
2615          *        with it so you cannot route through it.  Instead what
2616          *        you do is point your default route *THROUGH* the bridge
2617          *        to the actual default router for one of the bridged spaces.
2618          *
2619          *        Another possibility is to put all your IP specifications
2620          *        on the bridge instead of on the individual interfaces.  If
2621          *        you do this it should be possible to use the bridge as an
2622          *        end point and route (rather than switch) through it using
2623          *        the default route or ipfw forwarding rules.
2624          */
2625
2626         /*
2627          * Acquire header
2628          */
2629         if (m->m_len < ETHER_HDR_LEN) {
2630                 m = m_pullup(m, ETHER_HDR_LEN);
2631                 if (m == NULL)
2632                         goto out;
2633         }
2634         eh = mtod(m, struct ether_header *);
2635         m->m_pkthdr.fw_flags |= BRIDGE_MBUF_TAGGED;
2636         bcopy(eh, &m->m_pkthdr.br.ether, sizeof(*eh));
2637
2638         if ((bridge_debug & 1) &&
2639             (ntohs(eh->ether_type) == ETHERTYPE_ARP ||
2640             ntohs(eh->ether_type) == ETHERTYPE_REVARP)) {
2641                 kprintf("%02x:%02x:%02x:%02x:%02x:%02x "
2642                         "%02x:%02x:%02x:%02x:%02x:%02x type %04x "
2643                         "lla %02x:%02x:%02x:%02x:%02x:%02x\n",
2644                         eh->ether_dhost[0],
2645                         eh->ether_dhost[1],
2646                         eh->ether_dhost[2],
2647                         eh->ether_dhost[3],
2648                         eh->ether_dhost[4],
2649                         eh->ether_dhost[5],
2650                         eh->ether_shost[0],
2651                         eh->ether_shost[1],
2652                         eh->ether_shost[2],
2653                         eh->ether_shost[3],
2654                         eh->ether_shost[4],
2655                         eh->ether_shost[5],
2656                         eh->ether_type,
2657                         ((u_char *)IF_LLADDR(bifp))[0],
2658                         ((u_char *)IF_LLADDR(bifp))[1],
2659                         ((u_char *)IF_LLADDR(bifp))[2],
2660                         ((u_char *)IF_LLADDR(bifp))[3],
2661                         ((u_char *)IF_LLADDR(bifp))[4],
2662                         ((u_char *)IF_LLADDR(bifp))[5]
2663                 );
2664         }
2665
2666         if (memcmp(eh->ether_dhost, IF_LLADDR(bifp), ETHER_ADDR_LEN) == 0) {
2667                 /*
2668                  * If the packet is for us, set the packets source as the
2669                  * bridge, and return the packet back to ifnet.if_input for
2670                  * local processing.
2671                  */
2672                 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
2673                 KASSERT(bifp->if_bridge == NULL,
2674                         ("loop created in bridge_input"));
2675                 if (pfil_member != 0) {
2676                         if (inet_pfil_hook.ph_hashooks > 0
2677 #ifdef INET6
2678                             || inet6_pfil_hook.ph_hashooks > 0
2679 #endif
2680                         ) {
2681                                 if (bridge_pfil(&m, NULL, ifp, PFIL_IN) != 0)
2682                                         goto out;
2683                                 if (m == NULL)
2684                                         goto out;
2685                         }
2686                 }
2687                 new_ifp = bifp;
2688                 goto out;
2689         }
2690
2691         /*
2692          * Tap all packets arriving on the bridge, no matter if
2693          * they are local destinations or not.  In is in.
2694          */
2695         BPF_MTAP(bifp, m);
2696
2697         bif = bridge_lookup_member_if(sc, ifp);
2698         if (bif == NULL)
2699                 goto out;
2700
2701         if (sc->sc_span)
2702                 bridge_span(sc, m);
2703
2704         if (m->m_flags & (M_BCAST | M_MCAST)) {
2705                 /*
2706                  * Tap off 802.1D packets; they do not get forwarded.
2707                  */
2708                 if (memcmp(eh->ether_dhost, bstp_etheraddr,
2709                             ETHER_ADDR_LEN) == 0) {
2710                         ifnet_serialize_all(bifp);
2711                         bstp_input(sc, bif, m);
2712                         ifnet_deserialize_all(bifp);
2713
2714                         /* m is freed by bstp_input */
2715                         m = NULL;
2716                         goto out;
2717                 }
2718
2719                 /*
2720                  * Other than 802.11d packets, ignore packets if the
2721                  * interface is not in a good state.
2722                  *
2723                  * NOTE: Broadcast/mcast packets received on a blocking or
2724                  *       learning interface are allowed for local processing.
2725                  *
2726                  *       The sending side of a blocked port will stop
2727                  *       transmitting when a better alternative is found.
2728                  *       However, later on we will disallow the forwarding
2729                  *       of bcast/mcsat packets over a blocking interface.
2730                  */
2731                 if (bif->bif_flags & IFBIF_STP) {
2732                         switch (bif->bif_state) {
2733                         case BSTP_IFSTATE_L1BLOCKING:
2734                         case BSTP_IFSTATE_LISTENING:
2735                         case BSTP_IFSTATE_DISABLED:
2736                                 goto out;
2737                         default:
2738                                 /* blocking, learning, bonded, forwarding */
2739                                 break;
2740                         }
2741                 }
2742
2743                 /*
2744                  * Make a deep copy of the packet and enqueue the copy
2745                  * for bridge processing; return the original packet for
2746                  * local processing.
2747                  */
2748                 mc = m_dup(m, MB_DONTWAIT);
2749                 if (mc == NULL)
2750                         goto out;
2751
2752                 /*
2753                  * It's just too dangerous to allow bcast/mcast over a
2754                  * blocked interface, eventually the network will sort
2755                  * itself out and a better path will be found.
2756                  */
2757                 if ((bif->bif_flags & IFBIF_STP) == 0 ||
2758                     bif->bif_state != BSTP_IFSTATE_BLOCKING) {
2759                         bridge_forward(sc, mc);
2760                 }
2761
2762                 /*
2763                  * Reinject the mbuf as arriving on the bridge so we have a
2764                  * chance at claiming multicast packets. We can not loop back
2765                  * here from ether_input as a bridge is never a member of a
2766                  * bridge.
2767                  */
2768                 KASSERT(bifp->if_bridge == NULL,
2769                         ("loop created in bridge_input"));
2770                 mc2 = m_dup(m, MB_DONTWAIT);
2771 #ifdef notyet
2772                 if (mc2 != NULL) {
2773                         /* Keep the layer3 header aligned */
2774                         int i = min(mc2->m_pkthdr.len, max_protohdr);
2775                         mc2 = m_copyup(mc2, i, ETHER_ALIGN);
2776                 }
2777 #endif
2778                 if (mc2 != NULL) {
2779                         /*
2780                          * Don't tap to bpf(4) again; we have already done
2781                          * the tapping.
2782                          *
2783                          * Leave m_pkthdr.rcvif alone, so ARP replies are
2784                          * processed as coming in on the correct interface.
2785                          *
2786                          * Clear the bridge flag for local processing in
2787                          * case the packet gets routed.
2788                          */
2789                         mc2->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
2790                         ether_reinput_oncpu(bifp, mc2, 0);
2791                 }
2792
2793                 /* Return the original packet for local processing. */
2794                 goto out;
2795         }
2796
2797         /*
2798          * Input of a unicast packet.  We have to allow unicast packets
2799          * input from links in the BLOCKING state as this might be an
2800          * interface of last resort.
2801          *
2802          * NOTE: We explicitly ignore normal packets received on a link
2803          *       in the BLOCKING state.  The point of being in that state
2804          *       is to avoid getting duplicate packets.
2805          *
2806          *       HOWEVER, if LINK2 is set the normal spanning tree code
2807          *       will mark an interface BLOCKING to avoid multi-cast/broadcast
2808          *       loops.  Unicast packets CAN still loop if we allow the
2809          *       case (hence we only do it in LINK2), but it isn't quite as
2810          *       bad as a broadcast packet looping.
2811          */
2812         from_blocking = 0;
2813         if (bif->bif_flags & IFBIF_STP) {
2814                 switch (bif->bif_state) {
2815                 case BSTP_IFSTATE_L1BLOCKING:
2816                 case BSTP_IFSTATE_LISTENING:
2817                 case BSTP_IFSTATE_DISABLED:
2818                         goto out;
2819                 case BSTP_IFSTATE_BLOCKING:
2820                         from_blocking = 1;
2821                         /* fall through */
2822                 default:
2823                         /* blocking, bonded, forwarding, learning */
2824                         break;
2825                 }
2826         }
2827
2828         /*
2829          * Unicast.  Make sure it's not for us.
2830          *
2831          * This loop is MPSAFE; the only blocking operation (bridge_rtupdate)
2832          * is followed by breaking out of the loop.
2833          */
2834         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
2835                 if (bif->bif_ifp->if_type != IFT_ETHER)
2836                         continue;
2837
2838                 /*
2839                  * It is destined for an interface linked to the bridge.
2840                  * We want the bridge itself to take care of link level
2841                  * forwarding to member interfaces so reinput on the bridge.
2842                  * i.e. if you ping an IP on a target interface associated
2843                  * with the bridge, the arp is-at response should indicate
2844                  * the bridge MAC.
2845                  *
2846                  * Only update our addr list when learning if the port
2847                  * is not in a blocking state.  If it is we still allow
2848                  * the packet but we do not try to learn from it.
2849                  */
2850                 if (memcmp(IF_LLADDR(bif->bif_ifp), eh->ether_dhost,
2851                            ETHER_ADDR_LEN) == 0) {
2852                         if (bif->bif_ifp != ifp) {
2853                                 /* XXX loop prevention */
2854                                 m->m_flags |= M_ETHER_BRIDGED;
2855                         }
2856                         if ((bif->bif_flags & IFBIF_LEARNING) &&
2857                             bif->bif_state != BSTP_IFSTATE_BLOCKING) {
2858                                 bridge_rtupdate(sc, eh->ether_shost,
2859                                                 ifp, IFBAF_DYNAMIC);
2860                         }
2861                         new_ifp = bifp; /* not bif->bif_ifp */
2862                         m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
2863                         goto out;
2864                 }
2865
2866                 /*
2867                  * Ignore received packets that were sent by us.
2868                  */
2869                 if (memcmp(IF_LLADDR(bif->bif_ifp), eh->ether_shost,
2870                            ETHER_ADDR_LEN) == 0) {
2871                         m_freem(m);
2872                         m = NULL;
2873                         goto out;
2874                 }
2875         }
2876
2877         /*
2878          * It isn't for us.
2879          *
2880          * Perform the bridge forwarding function, but disallow bridging
2881          * to interfaces in the blocking state if the packet came in on
2882          * an interface in the blocking state.
2883          */
2884         bridge_forward(sc, m);
2885         m = NULL;
2886
2887         /*
2888          * ether_reinput_oncpu() will reprocess rcvif as
2889          * coming from new_ifp (since we do not specify
2890          * REINPUT_KEEPRCVIF).
2891          */
2892 out:
2893         if (new_ifp != NULL) {
2894                 /*
2895                  * Clear the bridge flag for local processing in
2896                  * case the packet gets routed.
2897                  */
2898                 ether_reinput_oncpu(new_ifp, m, REINPUT_RUNBPF);
2899                 m = NULL;
2900         }
2901         return (m);
2902 }
2903
2904 /*
2905  * bridge_start_bcast:
2906  *
2907  *      Broadcast the packet sent from bridge to all member
2908  *      interfaces.
2909  *      This is a simplified version of bridge_broadcast(), however,
2910  *      this function expects caller to hold bridge's serializer.
2911  */
2912 static void
2913 bridge_start_bcast(struct bridge_softc *sc, struct mbuf *m)
2914 {
2915         struct bridge_iflist *bif;
2916         struct mbuf *mc;
2917         struct ifnet *dst_if, *alt_if, *bifp;
2918         int used = 0;
2919         int found = 0;
2920         int alt_priority;
2921
2922         bifp = sc->sc_ifp;
2923         ASSERT_IFNET_SERIALIZED_ALL(bifp);
2924
2925         /*
2926          * Following loop is MPSAFE; nothing is blocking
2927          * in the loop body.
2928          *
2929          * NOTE: We transmit through an member in the BLOCKING state only
2930          *       as a last resort.
2931          */
2932         alt_if = NULL;
2933         alt_priority = 0;
2934
2935         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
2936                 dst_if = bif->bif_ifp;
2937
2938                 if (bif->bif_flags & IFBIF_STP) {
2939                         switch (bif->bif_state) {
2940                         case BSTP_IFSTATE_BLOCKING:
2941                                 if (bif->bif_priority > alt_priority) {
2942                                         alt_priority = bif->bif_priority;
2943                                         alt_if = bif->bif_ifp;
2944                                 }
2945                                 /* fall through */
2946                         case BSTP_IFSTATE_L1BLOCKING:
2947                         case BSTP_IFSTATE_DISABLED:
2948                                 continue;
2949                         default:
2950                                 /* listening, learning, bonded, forwarding */
2951                                 break;
2952                         }
2953                 }
2954
2955                 if ((bif->bif_flags & IFBIF_DISCOVER) == 0 &&
2956                     (m->m_flags & (M_BCAST|M_MCAST)) == 0)
2957                         continue;
2958
2959                 if ((dst_if->if_flags & IFF_RUNNING) == 0)
2960                         continue;
2961
2962                 if (TAILQ_NEXT(bif, bif_next) == NULL) {
2963                         mc = m;
2964                         used = 1;
2965                 } else {
2966                         mc = m_copypacket(m, MB_DONTWAIT);
2967                         if (mc == NULL) {
2968                                 bifp->if_oerrors++;
2969                                 continue;
2970                         }
2971                 }
2972                 found = 1;
2973                 bridge_enqueue(dst_if, mc);
2974         }
2975
2976         if (found == 0 && alt_if) {
2977                 KKASSERT(used == 0);
2978                 mc = m;
2979                 used = 1;
2980                 bridge_enqueue(alt_if, mc);
2981         }
2982
2983         if (used == 0)
2984                 m_freem(m);
2985 }
2986
2987 /*
2988  * bridge_broadcast:
2989  *
2990  *      Send a frame to all interfaces that are members of
2991  *      the bridge, except for the one on which the packet
2992  *      arrived.
2993  */
2994 static void
2995 bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
2996                  struct mbuf *m)
2997 {
2998         struct bridge_iflist *bif, *nbif;
2999         struct ether_header *eh;
3000         struct mbuf *mc;
3001         struct ifnet *dst_if, *alt_if, *bifp;
3002         int used;
3003         int found;
3004         int alt_priority;
3005         int from_us;
3006
3007         bifp = sc->sc_ifp;
3008         ASSERT_IFNET_NOT_SERIALIZED_ALL(bifp);
3009
3010         eh = mtod(m, struct ether_header *);
3011         from_us = bridge_from_us(sc, eh);
3012
3013         if (inet_pfil_hook.ph_hashooks > 0
3014 #ifdef INET6
3015             || inet6_pfil_hook.ph_hashooks > 0
3016 #endif
3017             ) {
3018                 if (bridge_pfil(&m, bifp, src_if, PFIL_IN) != 0)
3019                         return;
3020                 if (m == NULL)
3021                         return;
3022
3023                 /* Filter on the bridge interface before broadcasting */
3024                 if (bridge_pfil(&m, bifp, NULL, PFIL_OUT) != 0)
3025                         return;
3026                 if (m == NULL)
3027                         return;
3028         }
3029
3030         alt_if = 0;
3031         alt_priority = 0;
3032         found = 0;
3033         used = 0;
3034
3035         TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid], bif_next, nbif) {
3036                 dst_if = bif->bif_ifp;
3037
3038                 if ((dst_if->if_flags & IFF_RUNNING) == 0)
3039                         continue;
3040
3041                 /*
3042                  * Don't bounce the packet out the same interface it came
3043                  * in on.  We have to test MAC addresses because a packet
3044                  * can come in a bonded interface and we don't want it to
3045                  * be echod out the forwarding interface for the same bonding
3046                  * set.
3047                  */
3048                 if (src_if && memcmp(IF_LLADDR(src_if), IF_LLADDR(dst_if),
3049                                      ETHER_ADDR_LEN) == 0) {
3050                         continue;
3051                 }
3052
3053                 /*
3054                  * Generally speaking we only broadcast through forwarding
3055                  * interfaces.  If no interfaces are available we select
3056                  * a BONDED, BLOCKING, or LEARNING interface to forward
3057                  * through.
3058                  */
3059                 if (bif->bif_flags & IFBIF_STP) {
3060                         switch (bif->bif_state) {
3061                         case BSTP_IFSTATE_BONDED:
3062                                 if (bif->bif_priority + 512 > alt_priority) {
3063                                         alt_priority = bif->bif_priority + 512;
3064                                         alt_if = bif->bif_ifp;
3065                                 }
3066                                 continue;
3067                         case BSTP_IFSTATE_BLOCKING:
3068                                 if (bif->bif_priority + 256 > alt_priority) {
3069                                         alt_priority = bif->bif_priority + 256;
3070                                         alt_if = bif->bif_ifp;
3071                                 }
3072                                 continue;
3073                         case BSTP_IFSTATE_LEARNING:
3074                                 if (bif->bif_priority > alt_priority) {
3075                                         alt_priority = bif->bif_priority;
3076                                         alt_if = bif->bif_ifp;
3077                                 }
3078                                 continue;
3079                         case BSTP_IFSTATE_L1BLOCKING:
3080                         case BSTP_IFSTATE_DISABLED:
3081                         case BSTP_IFSTATE_LISTENING:
3082                                 continue;
3083                         default:
3084                                 /* forwarding */
3085                                 break;
3086                         }
3087                 }
3088
3089                 if ((bif->bif_flags & IFBIF_DISCOVER) == 0 &&
3090                     (m->m_flags & (M_BCAST|M_MCAST)) == 0) {
3091                         continue;
3092                 }
3093
3094                 if (TAILQ_NEXT(bif, bif_next) == NULL) {
3095                         mc = m;
3096                         used = 1;
3097                 } else {
3098                         mc = m_copypacket(m, MB_DONTWAIT);
3099                         if (mc == NULL) {
3100                                 sc->sc_ifp->if_oerrors++;
3101                                 continue;
3102                         }
3103                 }
3104                 found = 1;
3105
3106                 /*
3107                  * Filter on the output interface.  Pass a NULL bridge
3108                  * interface pointer so we do not redundantly filter on
3109                  * the bridge for each interface we broadcast on.
3110                  */
3111                 if (inet_pfil_hook.ph_hashooks > 0
3112 #ifdef INET6
3113                     || inet6_pfil_hook.ph_hashooks > 0
3114 #endif
3115                     ) {
3116                         if (bridge_pfil(&mc, NULL, dst_if, PFIL_OUT) != 0)
3117                                 continue;
3118                         if (mc == NULL)
3119                                 continue;
3120                 }
3121                 bridge_handoff(sc, dst_if, mc, from_us);
3122
3123                 if (nbif != NULL && !nbif->bif_onlist) {
3124                         KKASSERT(bif->bif_onlist);
3125                         nbif = TAILQ_NEXT(bif, bif_next);
3126                 }
3127         }
3128
3129         if (found == 0 && alt_if) {
3130                 KKASSERT(used == 0);
3131                 mc = m;
3132                 used = 1;
3133                 bridge_enqueue(alt_if, mc);
3134         }
3135
3136         if (used == 0)
3137                 m_freem(m);
3138 }
3139
3140 /*
3141  * bridge_span:
3142  *
3143  *      Duplicate a packet out one or more interfaces that are in span mode,
3144  *      the original mbuf is unmodified.
3145  */
3146 static void
3147 bridge_span(struct bridge_softc *sc, struct mbuf *m)
3148 {
3149         struct bridge_iflist *bif;
3150         struct ifnet *dst_if, *bifp;
3151         struct mbuf *mc;
3152
3153         bifp = sc->sc_ifp;
3154         ifnet_serialize_all(bifp);
3155
3156         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
3157                 dst_if = bif->bif_ifp;
3158
3159                 if ((dst_if->if_flags & IFF_RUNNING) == 0)
3160                         continue;
3161
3162                 mc = m_copypacket(m, MB_DONTWAIT);
3163                 if (mc == NULL) {
3164                         sc->sc_ifp->if_oerrors++;
3165                         continue;
3166                 }
3167                 bridge_enqueue(dst_if, mc);
3168         }
3169
3170         ifnet_deserialize_all(bifp);
3171 }
3172
3173 static void
3174 bridge_rtmsg_sync_handler(netmsg_t msg)
3175 {
3176         ifnet_forwardmsg(&msg->lmsg, mycpuid + 1);
3177 }
3178
3179 static void
3180 bridge_rtmsg_sync(struct bridge_softc *sc)
3181 {
3182         struct netmsg_base msg;
3183
3184         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3185
3186         netmsg_init(&msg, NULL, &curthread->td_msgport,
3187                     0, bridge_rtmsg_sync_handler);
3188         ifnet_domsg(&msg.lmsg, 0);
3189 }
3190
3191 static __inline void
3192 bridge_rtinfo_update(struct bridge_rtinfo *bri, struct ifnet *dst_if,
3193                      int setflags, uint8_t flags, uint32_t timeo)
3194 {
3195         if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
3196             bri->bri_ifp != dst_if)
3197                 bri->bri_ifp = dst_if;
3198         if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
3199             bri->bri_expire != time_second + timeo)
3200                 bri->bri_expire = time_second + timeo;
3201         if (setflags)
3202                 bri->bri_flags = flags;
3203 }
3204
3205 static int
3206 bridge_rtinstall_oncpu(struct bridge_softc *sc, const uint8_t *dst,
3207                        struct ifnet *dst_if, int setflags, uint8_t flags,
3208                        struct bridge_rtinfo **bri0)
3209 {
3210         struct bridge_rtnode *brt;
3211         struct bridge_rtinfo *bri;
3212
3213         if (mycpuid == 0) {
3214                 brt = bridge_rtnode_lookup(sc, dst);
3215                 if (brt != NULL) {
3216                         /*
3217                          * rtnode for 'dst' already exists.  We inform the
3218                          * caller about this by leaving bri0 as NULL.  The
3219                          * caller will terminate the intallation upon getting
3220                          * NULL bri0.  However, we still need to update the
3221                          * rtinfo.
3222                          */
3223                         KKASSERT(*bri0 == NULL);
3224
3225                         /* Update rtinfo */
3226                         bridge_rtinfo_update(brt->brt_info, dst_if, setflags,
3227                                              flags, sc->sc_brttimeout);
3228                         return 0;
3229                 }
3230
3231                 /*
3232                  * We only need to check brtcnt on CPU0, since if limit
3233                  * is to be exceeded, ENOSPC is returned.  Caller knows
3234                  * this and will terminate the installation.
3235                  */
3236                 if (sc->sc_brtcnt >= sc->sc_brtmax)
3237                         return ENOSPC;
3238
3239                 KKASSERT(*bri0 == NULL);
3240                 bri = kmalloc(sizeof(struct bridge_rtinfo), M_DEVBUF,
3241                                   M_WAITOK | M_ZERO);
3242                 *bri0 = bri;
3243
3244                 /* Setup rtinfo */
3245                 bri->bri_flags = IFBAF_DYNAMIC;
3246                 bridge_rtinfo_update(bri, dst_if, setflags, flags,
3247                                      sc->sc_brttimeout);
3248         } else {
3249                 bri = *bri0;
3250                 KKASSERT(bri != NULL);
3251         }
3252
3253         brt = kmalloc(sizeof(struct bridge_rtnode), M_DEVBUF,
3254                       M_WAITOK | M_ZERO);
3255         memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
3256         brt->brt_info = bri;
3257
3258         bridge_rtnode_insert(sc, brt);
3259         return 0;
3260 }
3261
3262 static void
3263 bridge_rtinstall_handler(netmsg_t msg)
3264 {
3265         struct netmsg_brsaddr *brmsg = (struct netmsg_brsaddr *)msg;
3266         int error;
3267
3268         error = bridge_rtinstall_oncpu(brmsg->br_softc,
3269                                        brmsg->br_dst, brmsg->br_dst_if,
3270                                        brmsg->br_setflags, brmsg->br_flags,
3271                                        &brmsg->br_rtinfo);
3272         if (error) {
3273                 KKASSERT(mycpuid == 0 && brmsg->br_rtinfo == NULL);
3274                 lwkt_replymsg(&brmsg->base.lmsg, error);
3275                 return;
3276         } else if (brmsg->br_rtinfo == NULL) {
3277                 /* rtnode already exists for 'dst' */
3278                 KKASSERT(mycpuid == 0);
3279                 lwkt_replymsg(&brmsg->base.lmsg, 0);
3280                 return;
3281         }
3282         ifnet_forwardmsg(&brmsg->base.lmsg, mycpuid + 1);
3283 }
3284
3285 /*
3286  * bridge_rtupdate:
3287  *
3288  *      Add/Update a bridge routing entry.
3289  */
3290 static int
3291 bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst,
3292                 struct ifnet *dst_if, uint8_t flags)
3293 {
3294         struct bridge_rtnode *brt;
3295
3296         /*
3297          * A route for this destination might already exist.  If so,
3298          * update it, otherwise create a new one.
3299          */
3300         if ((brt = bridge_rtnode_lookup(sc, dst)) == NULL) {
3301                 struct netmsg_brsaddr *brmsg;
3302
3303                 if (sc->sc_brtcnt >= sc->sc_brtmax)
3304                         return ENOSPC;
3305
3306                 brmsg = kmalloc(sizeof(*brmsg), M_LWKTMSG, M_WAITOK | M_NULLOK);
3307                 if (brmsg == NULL)
3308                         return ENOMEM;
3309
3310                 netmsg_init(&brmsg->base, NULL, &netisr_afree_rport,
3311                             0, bridge_rtinstall_handler);
3312                 memcpy(brmsg->br_dst, dst, ETHER_ADDR_LEN);
3313                 brmsg->br_dst_if = dst_if;
3314                 brmsg->br_flags = flags;
3315                 brmsg->br_setflags = 0;
3316                 brmsg->br_softc = sc;
3317                 brmsg->br_rtinfo = NULL;
3318
3319                 ifnet_sendmsg(&brmsg->base.lmsg, 0);
3320                 return 0;
3321         }
3322         bridge_rtinfo_update(brt->brt_info, dst_if, 0, flags,
3323                              sc->sc_brttimeout);
3324         return 0;
3325 }
3326
3327 static int
3328 bridge_rtsaddr(struct bridge_softc *sc, const uint8_t *dst,
3329                struct ifnet *dst_if, uint8_t flags)
3330 {
3331         struct netmsg_brsaddr brmsg;
3332
3333         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3334
3335         netmsg_init(&brmsg.base, NULL, &curthread->td_msgport,
3336                     0, bridge_rtinstall_handler);
3337         memcpy(brmsg.br_dst, dst, ETHER_ADDR_LEN);
3338         brmsg.br_dst_if = dst_if;
3339         brmsg.br_flags = flags;
3340         brmsg.br_setflags = 1;
3341         brmsg.br_softc = sc;
3342         brmsg.br_rtinfo = NULL;
3343
3344         return ifnet_domsg(&brmsg.base.lmsg, 0);
3345 }
3346
3347 /*
3348  * bridge_rtlookup:
3349  *
3350  *      Lookup the destination interface for an address.
3351  */
3352 static struct ifnet *
3353 bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr)
3354 {
3355         struct bridge_rtnode *brt;
3356
3357         if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL)
3358                 return NULL;
3359         return brt->brt_info->bri_ifp;
3360 }
3361
3362 static void
3363 bridge_rtreap_handler(netmsg_t msg)
3364 {
3365         struct bridge_softc *sc = msg->lmsg.u.ms_resultp;
3366         struct bridge_rtnode *brt, *nbrt;
3367
3368         LIST_FOREACH_MUTABLE(brt, &sc->sc_rtlists[mycpuid], brt_list, nbrt) {
3369                 if (brt->brt_info->bri_dead)
3370                         bridge_rtnode_destroy(sc, brt);
3371         }
3372         ifnet_forwardmsg(&msg->lmsg, mycpuid + 1);
3373 }
3374
3375 static void
3376 bridge_rtreap(struct bridge_softc *sc)
3377 {
3378         struct netmsg_base msg;
3379
3380         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3381
3382         netmsg_init(&msg, NULL, &curthread->td_msgport,
3383                     0, bridge_rtreap_handler);
3384         msg.lmsg.u.ms_resultp = sc;
3385
3386         ifnet_domsg(&msg.lmsg, 0);
3387 }
3388
3389 static void
3390 bridge_rtreap_async(struct bridge_softc *sc)
3391 {
3392         struct netmsg_base *msg;
3393
3394         msg = kmalloc(sizeof(*msg), M_LWKTMSG, M_WAITOK);
3395
3396         netmsg_init(msg, NULL, &netisr_afree_rport,
3397                     0, bridge_rtreap_handler);
3398         msg->lmsg.u.ms_resultp = sc;
3399
3400         ifnet_sendmsg(&msg->lmsg, 0);
3401 }
3402
3403 /*
3404  * bridge_rttrim:
3405  *
3406  *      Trim the routine table so that we have a number
3407  *      of routing entries less than or equal to the
3408  *      maximum number.
3409  */
3410 static void
3411 bridge_rttrim(struct bridge_softc *sc)
3412 {
3413         struct bridge_rtnode *brt;
3414         int dead;
3415
3416         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3417
3418         /* Make sure we actually need to do this. */
3419         if (sc->sc_brtcnt <= sc->sc_brtmax)
3420                 return;
3421
3422         /*
3423          * Find out how many rtnodes are dead
3424          */
3425         dead = bridge_rtage_finddead(sc);
3426         KKASSERT(dead <= sc->sc_brtcnt);
3427
3428         if (sc->sc_brtcnt - dead <= sc->sc_brtmax) {
3429                 /* Enough dead rtnodes are found */
3430                 bridge_rtreap(sc);
3431                 return;
3432         }
3433
3434         /*
3435          * Kill some dynamic rtnodes to meet the brtmax
3436          */
3437         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3438                 struct bridge_rtinfo *bri = brt->brt_info;
3439
3440                 if (bri->bri_dead) {
3441                         /*
3442                          * We have counted this rtnode in
3443                          * bridge_rtage_finddead()
3444                          */
3445                         continue;
3446                 }
3447
3448                 if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
3449                         bri->bri_dead = 1;
3450                         ++dead;
3451                         KKASSERT(dead <= sc->sc_brtcnt);
3452
3453                         if (sc->sc_brtcnt - dead <= sc->sc_brtmax) {
3454                                 /* Enough rtnodes are collected */
3455                                 break;
3456                         }
3457                 }
3458         }
3459         if (dead)
3460                 bridge_rtreap(sc);
3461 }
3462
3463 /*
3464  * bridge_timer:
3465  *
3466  *      Aging timer for the bridge.
3467  */
3468 static void
3469 bridge_timer(void *arg)
3470 {
3471         struct bridge_softc *sc = arg;
3472         struct netmsg_base *msg;
3473
3474         KKASSERT(mycpuid == BRIDGE_CFGCPU);
3475
3476         crit_enter();
3477
3478         if (callout_pending(&sc->sc_brcallout) ||
3479             !callout_active(&sc->sc_brcallout)) {
3480                 crit_exit();
3481                 return;
3482         }
3483         callout_deactivate(&sc->sc_brcallout);
3484
3485         msg = &sc->sc_brtimemsg;
3486         KKASSERT(msg->lmsg.ms_flags & MSGF_DONE);
3487         lwkt_sendmsg(BRIDGE_CFGPORT, &msg->lmsg);
3488
3489         crit_exit();
3490 }
3491
3492 static void
3493 bridge_timer_handler(netmsg_t msg)
3494 {
3495         struct bridge_softc *sc = msg->lmsg.u.ms_resultp;
3496
3497         KKASSERT(&curthread->td_msgport == BRIDGE_CFGPORT);
3498
3499         crit_enter();
3500         /* Reply ASAP */
3501         lwkt_replymsg(&msg->lmsg, 0);
3502         crit_exit();
3503
3504         bridge_rtage(sc);
3505         if (sc->sc_ifp->if_flags & IFF_RUNNING) {
3506                 callout_reset(&sc->sc_brcallout,
3507                     bridge_rtable_prune_period * hz, bridge_timer, sc);
3508         }
3509 }
3510
3511 static int
3512 bridge_rtage_finddead(struct bridge_softc *sc)
3513 {
3514         struct bridge_rtnode *brt;
3515         int dead = 0;
3516
3517         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3518                 struct bridge_rtinfo *bri = brt->brt_info;
3519
3520                 if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
3521                     time_second >= bri->bri_expire) {
3522                         bri->bri_dead = 1;
3523                         ++dead;
3524                         KKASSERT(dead <= sc->sc_brtcnt);
3525                 }
3526         }
3527         return dead;
3528 }
3529
3530 /*
3531  * bridge_rtage:
3532  *
3533  *      Perform an aging cycle.
3534  */
3535 static void
3536 bridge_rtage(struct bridge_softc *sc)
3537 {
3538         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3539
3540         if (bridge_rtage_finddead(sc))
3541                 bridge_rtreap(sc);
3542 }
3543
3544 /*
3545  * bridge_rtflush:
3546  *
3547  *      Remove all dynamic addresses from the bridge.
3548  */
3549 static void
3550 bridge_rtflush(struct bridge_softc *sc, int bf)
3551 {
3552         struct bridge_rtnode *brt;
3553         int reap;
3554
3555         reap = 0;
3556         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3557                 struct bridge_rtinfo *bri = brt->brt_info;
3558
3559                 if ((bf & IFBF_FLUSHALL) ||
3560                     (bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
3561                         bri->bri_dead = 1;
3562                         reap = 1;
3563                 }
3564         }
3565         if (reap) {
3566                 if (bf & IFBF_FLUSHSYNC)
3567                         bridge_rtreap(sc);
3568                 else
3569                         bridge_rtreap_async(sc);
3570         }
3571 }
3572
3573 /*
3574  * bridge_rtdaddr:
3575  *
3576  *      Remove an address from the table.
3577  */
3578 static int
3579 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr)
3580 {
3581         struct bridge_rtnode *brt;
3582
3583         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3584
3585         if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL)
3586                 return (ENOENT);
3587
3588         /* TODO: add a cheaper delete operation */
3589         brt->brt_info->bri_dead = 1;
3590         bridge_rtreap(sc);
3591         return (0);
3592 }
3593
3594 /*
3595  * bridge_rtdelete:
3596  *
3597  *      Delete routes to a speicifc member interface.
3598  */
3599 void
3600 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int bf)
3601 {
3602         struct bridge_rtnode *brt;
3603         int reap;
3604
3605         reap = 0;
3606         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3607                 struct bridge_rtinfo *bri = brt->brt_info;
3608
3609                 if (bri->bri_ifp == ifp &&
3610                     ((bf & IFBF_FLUSHALL) ||
3611                      (bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
3612                         bri->bri_dead = 1;
3613                         reap = 1;
3614                 }
3615         }
3616         if (reap) {
3617                 if (bf & IFBF_FLUSHSYNC)
3618                         bridge_rtreap(sc);
3619                 else
3620                         bridge_rtreap_async(sc);
3621         }
3622 }
3623
3624 /*
3625  * bridge_rtable_init:
3626  *
3627  *      Initialize the route table for this bridge.
3628  */
3629 static void
3630 bridge_rtable_init(struct bridge_softc *sc)
3631 {
3632         int cpu;
3633
3634         /*
3635          * Initialize per-cpu hash tables
3636          */
3637         sc->sc_rthashs = kmalloc(sizeof(*sc->sc_rthashs) * ncpus,
3638                                  M_DEVBUF, M_WAITOK);
3639         for (cpu = 0; cpu < ncpus; ++cpu) {
3640                 int i;
3641
3642                 sc->sc_rthashs[cpu] =
3643                 kmalloc(sizeof(struct bridge_rtnode_head) * BRIDGE_RTHASH_SIZE,
3644                         M_DEVBUF, M_WAITOK);
3645
3646                 for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
3647                         LIST_INIT(&sc->sc_rthashs[cpu][i]);
3648         }
3649         sc->sc_rthash_key = karc4random();
3650
3651         /*
3652          * Initialize per-cpu lists
3653          */
3654         sc->sc_rtlists = kmalloc(sizeof(struct bridge_rtnode_head) * ncpus,
3655                                  M_DEVBUF, M_WAITOK);
3656         for (cpu = 0; cpu < ncpus; ++cpu)
3657                 LIST_INIT(&sc->sc_rtlists[cpu]);
3658 }
3659
3660 /*
3661  * bridge_rtable_fini:
3662  *
3663  *      Deconstruct the route table for this bridge.
3664  */
3665 static void
3666 bridge_rtable_fini(struct bridge_softc *sc)
3667 {
3668         int cpu;
3669
3670         /*
3671          * Free per-cpu hash tables
3672          */
3673         for (cpu = 0; cpu < ncpus; ++cpu)
3674                 kfree(sc->sc_rthashs[cpu], M_DEVBUF);
3675         kfree(sc->sc_rthashs, M_DEVBUF);
3676
3677         /*
3678          * Free per-cpu lists
3679          */
3680         kfree(sc->sc_rtlists, M_DEVBUF);
3681 }
3682
3683 /*
3684  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
3685  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
3686  */
3687 #define mix(a, b, c)                                                    \
3688 do {                                                                    \
3689         a -= b; a -= c; a ^= (c >> 13);                                 \
3690         b -= c; b -= a; b ^= (a << 8);                                  \
3691         c -= a; c -= b; c ^= (b >> 13);                                 \
3692         a -= b; a -= c; a ^= (c >> 12);                                 \
3693         b -= c; b -= a; b ^= (a << 16);                                 \
3694         c -= a; c -= b; c ^= (b >> 5);                                  \
3695         a -= b; a -= c; a ^= (c >> 3);                                  \
3696         b -= c; b -= a; b ^= (a << 10);                                 \
3697         c -= a; c -= b; c ^= (b >> 15);                                 \
3698 } while (/*CONSTCOND*/0)
3699
3700 static __inline uint32_t
3701 bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
3702 {
3703         uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
3704
3705         b += addr[5] << 8;
3706         b += addr[4];
3707         a += addr[3] << 24;
3708         a += addr[2] << 16;
3709         a += addr[1] << 8;
3710         a += addr[0];
3711
3712         mix(a, b, c);
3713
3714         return (c & BRIDGE_RTHASH_MASK);
3715 }
3716
3717 #undef mix
3718
3719 static int
3720 bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b)
3721 {
3722         int i, d;
3723
3724         for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
3725                 d = ((int)a[i]) - ((int)b[i]);
3726         }
3727
3728         return (d);
3729 }
3730
3731 /*
3732  * bridge_rtnode_lookup:
3733  *
3734  *      Look up a bridge route node for the specified destination.
3735  */
3736 static struct bridge_rtnode *
3737 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr)
3738 {
3739         struct bridge_rtnode *brt;
3740         uint32_t hash;
3741         int dir;
3742
3743         hash = bridge_rthash(sc, addr);
3744         LIST_FOREACH(brt, &sc->sc_rthashs[mycpuid][hash], brt_hash) {
3745                 dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
3746                 if (dir == 0)
3747                         return (brt);
3748                 if (dir > 0)
3749                         return (NULL);
3750         }
3751
3752         return (NULL);
3753 }
3754
3755 /*
3756  * bridge_rtnode_insert:
3757  *
3758  *      Insert the specified bridge node into the route table.
3759  *      Caller has to make sure that rtnode does not exist.
3760  */
3761 static void
3762 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
3763 {
3764         struct bridge_rtnode *lbrt;
3765         uint32_t hash;
3766         int dir;
3767
3768         hash = bridge_rthash(sc, brt->brt_addr);
3769
3770         lbrt = LIST_FIRST(&sc->sc_rthashs[mycpuid][hash]);
3771         if (lbrt == NULL) {
3772                 LIST_INSERT_HEAD(&sc->sc_rthashs[mycpuid][hash],
3773                                   brt, brt_hash);
3774                 goto out;
3775         }
3776
3777         do {
3778                 dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
3779                 KASSERT(dir != 0, ("rtnode already exist\n"));
3780
3781                 if (dir > 0) {
3782                         LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
3783                         goto out;
3784                 }
3785                 if (LIST_NEXT(lbrt, brt_hash) == NULL) {
3786                         LIST_INSERT_AFTER(lbrt, brt, brt_hash);
3787                         goto out;
3788                 }
3789                 lbrt = LIST_NEXT(lbrt, brt_hash);
3790         } while (lbrt != NULL);
3791
3792         panic("no suitable position found for rtnode\n");
3793 out:
3794         LIST_INSERT_HEAD(&sc->sc_rtlists[mycpuid], brt, brt_list);
3795         if (mycpuid == 0) {
3796                 /*
3797                  * Update the brtcnt.
3798                  * We only need to do it once and we do it on CPU0.
3799                  */
3800                 sc->sc_brtcnt++;
3801         }
3802 }
3803
3804 /*
3805  * bridge_rtnode_destroy:
3806  *
3807  *      Destroy a bridge rtnode.
3808  */
3809 static void
3810 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
3811 {
3812         LIST_REMOVE(brt, brt_hash);
3813         LIST_REMOVE(brt, brt_list);
3814
3815         if (mycpuid + 1 == ncpus) {
3816                 /* Free rtinfo associated with rtnode on the last cpu */
3817                 kfree(brt->brt_info, M_DEVBUF);
3818         }
3819         kfree(brt, M_DEVBUF);
3820
3821         if (mycpuid == 0) {
3822                 /* Update brtcnt only on CPU0 */
3823                 sc->sc_brtcnt--;
3824         }
3825 }
3826
3827 static __inline int
3828 bridge_post_pfil(struct mbuf *m)
3829 {
3830         if (m->m_pkthdr.fw_flags & IPFORWARD_MBUF_TAGGED)
3831                 return EOPNOTSUPP;
3832
3833         /* Not yet */
3834         if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED)
3835                 return EOPNOTSUPP;
3836
3837         return 0;
3838 }
3839
3840 /*
3841  * Send bridge packets through pfil if they are one of the types pfil can deal
3842  * with, or if they are ARP or REVARP.  (pfil will pass ARP and REVARP without
3843  * question.) If *bifp or *ifp are NULL then packet filtering is skipped for
3844  * that interface.
3845  */
3846 static int
3847 bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
3848 {
3849         int snap, error, i, hlen;
3850         struct ether_header *eh1, eh2;
3851         struct ip *ip;
3852         struct llc llc1;
3853         u_int16_t ether_type;
3854
3855         snap = 0;
3856         error = -1;     /* Default error if not error == 0 */
3857
3858         if (pfil_bridge == 0 && pfil_member == 0)
3859                 return (0); /* filtering is disabled */
3860
3861         i = min((*mp)->m_pkthdr.len, max_protohdr);
3862         if ((*mp)->m_len < i) {
3863                 *mp = m_pullup(*mp, i);
3864                 if (*mp == NULL) {
3865                         kprintf("%s: m_pullup failed\n", __func__);
3866                         return (-1);
3867                 }
3868         }
3869
3870         eh1 = mtod(*mp, struct ether_header *);
3871         ether_type = ntohs(eh1->ether_type);
3872
3873         /*
3874          * Check for SNAP/LLC.
3875          */
3876         if (ether_type < ETHERMTU) {
3877                 struct llc *llc2 = (struct llc *)(eh1 + 1);
3878
3879                 if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
3880                     llc2->llc_dsap == LLC_SNAP_LSAP &&
3881                     llc2->llc_ssap == LLC_SNAP_LSAP &&
3882                     llc2->llc_control == LLC_UI) {
3883                         ether_type = htons(llc2->llc_un.type_snap.ether_type);
3884                         snap = 1;
3885                 }
3886         }
3887
3888         /*
3889          * If we're trying to filter bridge traffic, don't look at anything
3890          * other than IP and ARP traffic.  If the filter doesn't understand
3891          * IPv6, don't allow IPv6 through the bridge either.  This is lame
3892          * since if we really wanted, say, an AppleTalk filter, we are hosed,
3893          * but of course we don't have an AppleTalk filter to begin with.
3894          * (Note that since pfil doesn't understand ARP it will pass *ALL*
3895          * ARP traffic.)
3896          */
3897         switch (ether_type) {
3898         case ETHERTYPE_ARP:
3899         case ETHERTYPE_REVARP:
3900                 return (0); /* Automatically pass */
3901
3902         case ETHERTYPE_IP:
3903 #ifdef INET6
3904         case ETHERTYPE_IPV6:
3905 #endif /* INET6 */
3906                 break;
3907
3908         default:
3909                 /*
3910                  * Check to see if the user wants to pass non-ip
3911                  * packets, these will not be checked by pfil(9)
3912                  * and passed unconditionally so the default is to drop.
3913                  */
3914                 if (pfil_onlyip)
3915                         goto bad;
3916         }
3917
3918         /* Strip off the Ethernet header and keep a copy. */
3919         m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t) &eh2);
3920         m_adj(*mp, ETHER_HDR_LEN);
3921
3922         /* Strip off snap header, if present */
3923         if (snap) {
3924                 m_copydata(*mp, 0, sizeof(struct llc), (caddr_t) &llc1);
3925                 m_adj(*mp, sizeof(struct llc));
3926         }
3927
3928         /*
3929          * Check the IP header for alignment and errors
3930          */
3931         if (dir == PFIL_IN) {
3932                 switch (ether_type) {
3933                 case ETHERTYPE_IP:
3934                         error = bridge_ip_checkbasic(mp);
3935                         break;
3936 #ifdef INET6
3937                 case ETHERTYPE_IPV6:
3938                         error = bridge_ip6_checkbasic(mp);
3939                         break;
3940 #endif /* INET6 */
3941                 default:
3942                         error = 0;
3943                 }
3944                 if (error)
3945                         goto bad;
3946         }
3947
3948         error = 0;
3949
3950         /*
3951          * Run the packet through pfil
3952          */
3953         switch (ether_type) {
3954         case ETHERTYPE_IP:
3955                 /*
3956                  * before calling the firewall, swap fields the same as
3957                  * IP does. here we assume the header is contiguous
3958                  */
3959                 ip = mtod(*mp, struct ip *);
3960
3961                 ip->ip_len = ntohs(ip->ip_len);
3962                 ip->ip_off = ntohs(ip->ip_off);
3963
3964                 /*
3965                  * Run pfil on the member interface and the bridge, both can
3966                  * be skipped by clearing pfil_member or pfil_bridge.
3967                  *
3968                  * Keep the order:
3969                  *   in_if -> bridge_if -> out_if
3970                  */
3971                 if (pfil_bridge && dir == PFIL_OUT && bifp != NULL) {
3972                         error = pfil_run_hooks(&inet_pfil_hook, mp, bifp, dir);
3973                         if (*mp == NULL || error != 0) /* filter may consume */
3974                                 break;
3975                         error = bridge_post_pfil(*mp);
3976                         if (error)
3977                                 break;
3978                 }
3979
3980                 if (pfil_member && ifp != NULL) {
3981                         error = pfil_run_hooks(&inet_pfil_hook, mp, ifp, dir);
3982                         if (*mp == NULL || error != 0) /* filter may consume */
3983                                 break;
3984                         error = bridge_post_pfil(*mp);
3985                         if (error)
3986                                 break;
3987                 }
3988
3989                 if (pfil_bridge && dir == PFIL_IN && bifp != NULL) {
3990                         error = pfil_run_hooks(&inet_pfil_hook, mp, bifp, dir);
3991                         if (*mp == NULL || error != 0) /* filter may consume */
3992                                 break;
3993                         error = bridge_post_pfil(*mp);
3994                         if (error)
3995                                 break;
3996                 }
3997
3998                 /* check if we need to fragment the packet */
3999                 if (pfil_member && ifp != NULL && dir == PFIL_OUT) {
4000                         i = (*mp)->m_pkthdr.len;
4001                         if (i > ifp->if_mtu) {
4002                                 error = bridge_fragment(ifp, *mp, &eh2, snap,
4003                                             &llc1);
4004                                 return (error);
4005                         }
4006                 }
4007
4008                 /* Recalculate the ip checksum and restore byte ordering */
4009                 ip = mtod(*mp, struct ip *);
4010                 hlen = ip->ip_hl << 2;
4011                 if (hlen < sizeof(struct ip))
4012                         goto bad;
4013                 if (hlen > (*mp)->m_len) {
4014                         if ((*mp = m_pullup(*mp, hlen)) == 0)
4015                                 goto bad;
4016                         ip = mtod(*mp, struct ip *);
4017                         if (ip == NULL)
4018                                 goto bad;
4019                 }
4020                 ip->ip_len = htons(ip->ip_len);
4021                 ip->ip_off = htons(ip->ip_off);
4022                 ip->ip_sum = 0;
4023                 if (hlen == sizeof(struct ip))
4024                         ip->ip_sum = in_cksum_hdr(ip);
4025                 else
4026                         ip->ip_sum = in_cksum(*mp, hlen);
4027
4028                 break;
4029 #ifdef INET6
4030         case ETHERTYPE_IPV6:
4031                 if (pfil_bridge && dir == PFIL_OUT && bifp != NULL)
4032                         error = pfil_run_hooks(&inet6_pfil_hook, mp, bifp,
4033                                         dir);
4034
4035                 if (*mp == NULL || error != 0) /* filter may consume */
4036                         break;
4037
4038                 if (pfil_member && ifp != NULL)
4039                         error = pfil_run_hooks(&inet6_pfil_hook, mp, ifp,
4040                                         dir);
4041
4042                 if (*mp == NULL || error != 0) /* filter may consume */
4043                         break;
4044
4045                 if (pfil_bridge && dir == PFIL_IN && bifp != NULL)
4046                         error = pfil_run_hooks(&inet6_pfil_hook, mp, bifp,
4047                                         dir);
4048                 break;
4049 #endif
4050         default:
4051                 error = 0;
4052                 break;
4053         }
4054
4055         if (*mp == NULL)
4056                 return (error);
4057         if (error != 0)
4058                 goto bad;
4059
4060         error = -1;
4061
4062         /*
4063          * Finally, put everything back the way it was and return
4064          */
4065         if (snap) {
4066                 M_PREPEND(*mp, sizeof(struct llc), MB_DONTWAIT);
4067                 if (*mp == NULL)
4068                         return (error);
4069                 bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
4070         }
4071
4072         M_PREPEND(*mp, ETHER_HDR_LEN, MB_DONTWAIT);
4073         if (*mp == NULL)
4074                 return (error);
4075         bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
4076
4077         return (0);
4078
4079 bad:
4080         m_freem(*mp);
4081         *mp = NULL;
4082         return (error);
4083 }
4084
4085 /*
4086  * Perform basic checks on header size since
4087  * pfil assumes ip_input has already processed
4088  * it for it.  Cut-and-pasted from ip_input.c.
4089  * Given how simple the IPv6 version is,
4090  * does the IPv4 version really need to be
4091  * this complicated?
4092  *
4093  * XXX Should we update ipstat here, or not?
4094  * XXX Right now we update ipstat but not
4095  * XXX csum_counter.
4096  */
4097 static int
4098 bridge_ip_checkbasic(struct mbuf **mp)
4099 {
4100         struct mbuf *m = *mp;
4101         struct ip *ip;
4102         int len, hlen;
4103         u_short sum;
4104
4105         if (*mp == NULL)
4106                 return (-1);
4107 #if 0 /* notyet */
4108         if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
4109                 if ((m = m_copyup(m, sizeof(struct ip),
4110                         (max_linkhdr + 3) & ~3)) == NULL) {
4111                         /* XXXJRT new stat, please */
4112                         ipstat.ips_toosmall++;
4113                         goto bad;
4114                 }
4115         } else
4116 #endif
4117 #ifndef __predict_false
4118 #define __predict_false(x) x
4119 #endif
4120          if (__predict_false(m->m_len < sizeof (struct ip))) {
4121                 if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
4122                         ipstat.ips_toosmall++;
4123                         goto bad;
4124                 }
4125         }
4126         ip = mtod(m, struct ip *);
4127         if (ip == NULL) goto bad;
4128
4129         if (ip->ip_v != IPVERSION) {
4130                 ipstat.ips_badvers++;
4131                 goto bad;
4132         }
4133         hlen = ip->ip_hl << 2;
4134         if (hlen < sizeof(struct ip)) { /* minimum header length */
4135                 ipstat.ips_badhlen++;
4136                 goto bad;
4137         }
4138         if (hlen > m->m_len) {
4139                 if ((m = m_pullup(m, hlen)) == 0) {
4140                         ipstat.ips_badhlen++;
4141                         goto bad;
4142                 }
4143                 ip = mtod(m, struct ip *);
4144                 if (ip == NULL) goto bad;
4145         }
4146
4147         if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
4148                 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
4149         } else {
4150                 if (hlen == sizeof(struct ip)) {
4151                         sum = in_cksum_hdr(ip);
4152                 } else {
4153                         sum = in_cksum(m, hlen);
4154                 }
4155         }
4156         if (sum) {
4157                 ipstat.ips_badsum++;
4158                 goto bad;
4159         }
4160
4161         /* Retrieve the packet length. */
4162         len = ntohs(ip->ip_len);
4163
4164         /*
4165          * Check for additional length bogosity
4166          */
4167         if (len < hlen) {
4168                 ipstat.ips_badlen++;
4169                 goto bad;
4170         }
4171
4172         /*
4173          * Check that the amount of data in the buffers
4174          * is as at least much as the IP header would have us expect.
4175          * Drop packet if shorter than we expect.
4176          */
4177         if (m->m_pkthdr.len < len) {
4178                 ipstat.ips_tooshort++;
4179                 goto bad;
4180         }
4181
4182         /* Checks out, proceed */
4183         *mp = m;
4184         return (0);
4185
4186 bad:
4187         *mp = m;
4188         return (-1);
4189 }
4190
4191 #ifdef INET6
4192 /*
4193  * Same as above, but for IPv6.
4194  * Cut-and-pasted from ip6_input.c.
4195  * XXX Should we update ip6stat, or not?
4196  */
4197 static int
4198 bridge_ip6_checkbasic(struct mbuf **mp)
4199 {
4200         struct mbuf *m = *mp;
4201         struct ip6_hdr *ip6;
4202
4203         /*
4204          * If the IPv6 header is not aligned, slurp it up into a new
4205          * mbuf with space for link headers, in the event we forward
4206          * it.  Otherwise, if it is aligned, make sure the entire base
4207          * IPv6 header is in the first mbuf of the chain.
4208          */
4209 #if 0 /* notyet */
4210         if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
4211                 struct ifnet *inifp = m->m_pkthdr.rcvif;
4212                 if ((m = m_copyup(m, sizeof(struct ip6_hdr),
4213                             (max_linkhdr + 3) & ~3)) == NULL) {
4214                         /* XXXJRT new stat, please */
4215                         ip6stat.ip6s_toosmall++;
4216                         in6_ifstat_inc(inifp, ifs6_in_hdrerr);
4217                         goto bad;
4218                 }
4219         } else
4220 #endif
4221         if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
4222                 struct ifnet *inifp = m->m_pkthdr.rcvif;
4223                 if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
4224                         ip6stat.ip6s_toosmall++;
4225                         in6_ifstat_inc(inifp, ifs6_in_hdrerr);
4226                         goto bad;
4227                 }
4228         }
4229
4230         ip6 = mtod(m, struct ip6_hdr *);
4231
4232         if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
4233                 ip6stat.ip6s_badvers++;
4234                 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
4235                 goto bad;
4236         }
4237
4238         /* Checks out, proceed */
4239         *mp = m;
4240         return (0);
4241
4242 bad:
4243         *mp = m;
4244         return (-1);
4245 }
4246 #endif /* INET6 */
4247
4248 /*
4249  * bridge_fragment:
4250  *
4251  *      Return a fragmented mbuf chain.
4252  */
4253 static int
4254 bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
4255     int snap, struct llc *llc)
4256 {
4257         struct mbuf *m0;
4258         struct ip *ip;
4259         int error = -1;
4260
4261         if (m->m_len < sizeof(struct ip) &&
4262             (m = m_pullup(m, sizeof(struct ip))) == NULL)
4263                 goto out;
4264         ip = mtod(m, struct ip *);
4265
4266         error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist,
4267                     CSUM_DELAY_IP);
4268         if (error)
4269                 goto out;
4270
4271         /* walk the chain and re-add the Ethernet header */
4272         for (m0 = m; m0; m0 = m0->m_nextpkt) {
4273                 if (error == 0) {
4274                         if (snap) {
4275                                 M_PREPEND(m0, sizeof(struct llc), MB_DONTWAIT);
4276                                 if (m0 == NULL) {
4277                                         error = ENOBUFS;
4278                                         continue;
4279                                 }
4280                                 bcopy(llc, mtod(m0, caddr_t),
4281                                     sizeof(struct llc));
4282                         }
4283                         M_PREPEND(m0, ETHER_HDR_LEN, MB_DONTWAIT);
4284                         if (m0 == NULL) {
4285                                 error = ENOBUFS;
4286                                 continue;
4287                         }
4288                         bcopy(eh, mtod(m0, caddr_t), ETHER_HDR_LEN);
4289                 } else 
4290                         m_freem(m);
4291         }
4292
4293         if (error == 0)
4294                 ipstat.ips_fragmented++;
4295
4296         return (error);
4297
4298 out:
4299         if (m != NULL)
4300                 m_freem(m);
4301         return (error);
4302 }
4303
4304 static void
4305 bridge_enqueue_handler(netmsg_t msg)
4306 {
4307         struct netmsg_packet *nmp;
4308         struct ifnet *dst_ifp;
4309         struct mbuf *m;
4310
4311         nmp = &msg->packet;
4312         m = nmp->nm_packet;
4313         dst_ifp = nmp->base.lmsg.u.ms_resultp;
4314
4315         bridge_handoff(dst_ifp->if_bridge, dst_ifp, m, 1);
4316 }
4317
4318 static void
4319 bridge_handoff(struct bridge_softc *sc, struct ifnet *dst_ifp,
4320                struct mbuf *m, int from_us)
4321 {
4322         struct mbuf *m0;
4323         struct ifnet *bifp;
4324
4325         bifp = sc->sc_ifp;
4326
4327         /* We may be sending a fragment so traverse the mbuf */
4328         for (; m; m = m0) {
4329                 struct altq_pktattr pktattr;
4330
4331                 m0 = m->m_nextpkt;
4332                 m->m_nextpkt = NULL;
4333
4334                 /*
4335                  * If being sent from our host override ether_shost
4336                  * with the bridge MAC.  This is mandatory for ARP
4337                  * so things don't get confused.  In particular we
4338                  * don't want ARPs to get associated with link interfaces
4339                  * under the bridge which might or might not stay valid.
4340                  *
4341                  * Also override ether_shost when relaying a packet out
4342                  * the same interface it came in on, due to multi-homed
4343                  * addresses & default routes, otherwise switches will
4344                  * get very confused.
4345                  *
4346                  * Otherwise if we are in transparent mode.
4347                  */
4348                 if (from_us || m->m_pkthdr.rcvif == dst_ifp) {
4349                         m_copyback(m,
4350                                    offsetof(struct ether_header, ether_shost),
4351                                    ETHER_ADDR_LEN, IF_LLADDR(sc->sc_ifp));
4352                 } else if ((bifp->if_flags & IFF_LINK0) &&
4353                            (m->m_pkthdr.fw_flags & BRIDGE_MBUF_TAGGED)) {
4354                         m_copyback(m,
4355                                    offsetof(struct ether_header, ether_shost),
4356                                    ETHER_ADDR_LEN,
4357                                    m->m_pkthdr.br.ether.ether_shost);
4358                 } /* else retain shost */
4359
4360                 if (ifq_is_enabled(&dst_ifp->if_snd))
4361                         altq_etherclassify(&dst_ifp->if_snd, m, &pktattr);
4362
4363                 ifq_dispatch(dst_ifp, m, &pktattr);
4364         }
4365 }
4366
4367 static void
4368 bridge_control_dispatch(netmsg_t msg)
4369 {
4370         struct netmsg_brctl *bc_msg = (struct netmsg_brctl *)msg;
4371         struct ifnet *bifp = bc_msg->bc_sc->sc_ifp;
4372         int error;
4373
4374         ifnet_serialize_all(bifp);
4375         error = bc_msg->bc_func(bc_msg->bc_sc, bc_msg->bc_arg);
4376         ifnet_deserialize_all(bifp);
4377
4378         lwkt_replymsg(&bc_msg->base.lmsg, error);
4379 }
4380
4381 static int
4382 bridge_control(struct bridge_softc *sc, u_long cmd,
4383                bridge_ctl_t bc_func, void *bc_arg)
4384 {
4385         struct ifnet *bifp = sc->sc_ifp;
4386         struct netmsg_brctl bc_msg;
4387         int error;
4388
4389         ASSERT_IFNET_SERIALIZED_ALL(bifp);
4390
4391         bzero(&bc_msg, sizeof(bc_msg));
4392
4393         netmsg_init(&bc_msg.base, NULL, &curthread->td_msgport,
4394                     0, bridge_control_dispatch);
4395         bc_msg.bc_func = bc_func;
4396         bc_msg.bc_sc = sc;
4397         bc_msg.bc_arg = bc_arg;
4398
4399         ifnet_deserialize_all(bifp);
4400         error = lwkt_domsg(BRIDGE_CFGPORT, &bc_msg.base.lmsg, 0);
4401         ifnet_serialize_all(bifp);
4402         return error;
4403 }
4404
4405 static void
4406 bridge_add_bif_handler(netmsg_t msg)
4407 {
4408         struct netmsg_braddbif *amsg = (struct netmsg_braddbif *)msg;
4409         struct bridge_softc *sc;
4410         struct bridge_iflist *bif;
4411
4412         sc = amsg->br_softc;
4413
4414         bif = kmalloc(sizeof(*bif), M_DEVBUF, M_WAITOK | M_ZERO);
4415         bif->bif_ifp = amsg->br_bif_ifp;
4416         bif->bif_onlist = 1;
4417         bif->bif_info = amsg->br_bif_info;
4418
4419         /*
4420          * runs through bif_info
4421          */
4422         bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
4423
4424         TAILQ_INSERT_HEAD(&sc->sc_iflists[mycpuid], bif, bif_next);
4425
4426         ifnet_forwardmsg(&amsg->base.lmsg, mycpuid + 1);
4427 }
4428
4429 static void
4430 bridge_add_bif(struct bridge_softc *sc, struct bridge_ifinfo *bif_info,
4431                struct ifnet *ifp)
4432 {
4433         struct netmsg_braddbif amsg;
4434
4435         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
4436
4437         netmsg_init(&amsg.base, NULL, &curthread->td_msgport,
4438                     0, bridge_add_bif_handler);
4439         amsg.br_softc = sc;
4440         amsg.br_bif_info = bif_info;
4441         amsg.br_bif_ifp = ifp;
4442
4443         ifnet_domsg(&amsg.base.lmsg, 0);
4444 }
4445
4446 static void
4447 bridge_del_bif_handler(netmsg_t msg)
4448 {
4449         struct netmsg_brdelbif *dmsg = (struct netmsg_brdelbif *)msg;
4450         struct bridge_softc *sc;
4451         struct bridge_iflist *bif;
4452
4453         sc = dmsg->br_softc;
4454
4455         /*
4456          * Locate the bif associated with the br_bif_info
4457          * on the current CPU
4458          */
4459         bif = bridge_lookup_member_ifinfo(sc, dmsg->br_bif_info);
4460         KKASSERT(bif != NULL && bif->bif_onlist);
4461
4462         /* Remove the bif from the current CPU's iflist */
4463         bif->bif_onlist = 0;
4464         TAILQ_REMOVE(dmsg->br_bif_list, bif, bif_next);
4465
4466         /* Save the removed bif for later freeing */
4467         TAILQ_INSERT_HEAD(dmsg->br_bif_list, bif, bif_next);
4468
4469         ifnet_forwardmsg(&dmsg->base.lmsg, mycpuid + 1);
4470 }
4471
4472 static void
4473 bridge_del_bif(struct bridge_softc *sc, struct bridge_ifinfo *bif_info,
4474                struct bridge_iflist_head *saved_bifs)
4475 {
4476         struct netmsg_brdelbif dmsg;
4477
4478         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
4479
4480         netmsg_init(&dmsg.base, NULL, &curthread->td_msgport,
4481                     0, bridge_del_bif_handler);
4482         dmsg.br_softc = sc;
4483         dmsg.br_bif_info = bif_info;
4484         dmsg.br_bif_list = saved_bifs;
4485
4486         ifnet_domsg(&dmsg.base.lmsg, 0);
4487 }