kernel - Change time_second to time_uptime for all expiration calculations
[dragonfly.git] / sys / net / bridge / if_bridge.c
1 /*
2  * Copyright 2001 Wasabi Systems, Inc.
3  * All rights reserved.
4  *
5  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *      This product includes software developed for the NetBSD Project by
18  *      Wasabi Systems, Inc.
19  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
20  *    or promote products derived from this software without specific prior
21  *    written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
27  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /*
37  * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
38  * All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. All advertising materials mentioning features or use of this software
49  *    must display the following acknowledgement:
50  *      This product includes software developed by Jason L. Wright
51  * 4. The name of the author may not be used to endorse or promote products
52  *    derived from this software without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
55  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
56  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
57  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
58  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
59  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
60  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
62  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
63  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
64  * POSSIBILITY OF SUCH DAMAGE.
65  *
66  * $OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp $
67  * $NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $
68  * $FreeBSD: src/sys/net/if_bridge.c,v 1.26 2005/10/13 23:05:55 thompsa Exp $
69  */
70
71 /*
72  * Network interface bridge support.
73  *
74  * TODO:
75  *
76  *      - Currently only supports Ethernet-like interfaces (Ethernet,
77  *        802.11, VLANs on Ethernet, etc.)  Figure out a nice way
78  *        to bridge other types of interfaces (FDDI-FDDI, and maybe
79  *        consider heterogenous bridges).
80  *
81  *
82  * Bridge's route information is duplicated to each CPUs:
83  *
84  *      CPU0          CPU1          CPU2          CPU3
85  * +-----------+ +-----------+ +-----------+ +-----------+
86  * |  rtnode   | |  rtnode   | |  rtnode   | |  rtnode   |
87  * |           | |           | |           | |           |
88  * | dst eaddr | | dst eaddr | | dst eaddr | | dst eaddr |
89  * +-----------+ +-----------+ +-----------+ +-----------+
90  *       |         |                     |         |
91  *       |         |                     |         |
92  *       |         |     +----------+    |         |
93  *       |         |     |  rtinfo  |    |         |
94  *       |         +---->|          |<---+         |
95  *       |               |  flags   |              |
96  *       +-------------->|  timeout |<-------------+
97  *                       |  dst_ifp |
98  *                       +----------+
99  *
100  * We choose to put timeout and dst_ifp into shared part, so updating
101  * them will be cheaper than using message forwarding.  Also there is
102  * not need to use spinlock to protect the updating: timeout and dst_ifp
103  * is not related and specific field's updating order has no importance.
104  * The cache pollution by the share part should not be heavy: in a stable
105  * setup, dst_ifp probably will be not changed in rtnode's life time,
106  * while timeout is refreshed once per second; most of the time, timeout
107  * and dst_ifp are read-only accessed.
108  *
109  *
110  * Bridge route information installation on bridge_input path:
111  *
112  *      CPU0           CPU1         CPU2          CPU3
113  *
114  *                               tcp_thread2
115  *                                    |
116  *                                alloc nmsg
117  *                    snd nmsg        |
118  *                    w/o rtinfo      |
119  *      ifnet0<-----------------------+
120  *        |                           :
121  *    lookup dst                      :
122  *   rtnode exists?(Y)free nmsg       :
123  *        |(N)                        :
124  *        |
125  *  alloc rtinfo
126  *  alloc rtnode
127  * install rtnode
128  *        |
129  *        +---------->ifnet1
130  *        : fwd nmsg    |
131  *        : w/ rtinfo   |
132  *        :             |
133  *        :             |
134  *                 alloc rtnode
135  *               (w/ nmsg's rtinfo)
136  *                install rtnode
137  *                      |
138  *                      +---------->ifnet2
139  *                      : fwd nmsg    |
140  *                      : w/ rtinfo   |
141  *                      :             |
142  *                      :         same as ifnet1
143  *                                    |
144  *                                    +---------->ifnet3
145  *                                    : fwd nmsg    |
146  *                                    : w/ rtinfo   |
147  *                                    :             |
148  *                                    :         same as ifnet1
149  *                                               free nmsg
150  *                                                  :
151  *                                                  :
152  *
153  * The netmsgs forwarded between protocol threads and ifnet threads are
154  * allocated with (M_WAITOK|M_NULLOK), so it will not fail under most
155  * cases (route information is too precious to be not installed :).
156  * Since multiple threads may try to install route information for the
157  * same dst eaddr, we look up route information in ifnet0.  However, this
158  * looking up only need to be performed on ifnet0, which is the start
159  * point of the route information installation process.
160  *
161  *
162  * Bridge route information deleting/flushing:
163  *
164  *  CPU0            CPU1             CPU2             CPU3
165  *
166  * netisr0
167  *   |
168  * find suitable rtnodes,
169  * mark their rtinfo dead
170  *   |
171  *   | domsg <------------------------------------------+
172  *   |                                                  | replymsg
173  *   |                                                  |
174  *   V     fwdmsg           fwdmsg           fwdmsg     |
175  * ifnet0 --------> ifnet1 --------> ifnet2 --------> ifnet3
176  * delete rtnodes   delete rtnodes   delete rtnodes   delete rtnodes
177  * w/ dead rtinfo   w/ dead rtinfo   w/ dead rtinfo   w/ dead rtinfo
178  *                                                    free dead rtinfos
179  *
180  * All deleting/flushing operations are serialized by netisr0, so each
181  * operation only reaps the route information marked dead by itself.
182  *
183  *
184  * Bridge route information adding/deleting/flushing:
185  * Since all operation is serialized by the fixed message flow between
186  * ifnet threads, it is not possible to create corrupted per-cpu route
187  * information.
188  *
189  *
190  *
191  * Percpu member interface list iteration with blocking operation:
192  * Since one bridge could only delete one member interface at a time and
193  * the deleted member interface is not freed after netmsg_service_sync(),
194  * following way is used to make sure that even if the certain member
195  * interface is ripped from the percpu list during the blocking operation,
196  * the iteration still could keep going:
197  *
198  * TAILQ_FOREACH_MUTABLE(bif, sc->sc_iflists[mycpuid], bif_next, nbif) {
199  *     blocking operation;
200  *     blocking operation;
201  *     ...
202  *     ...
203  *     if (nbif != NULL && !nbif->bif_onlist) {
204  *         KKASSERT(bif->bif_onlist);
205  *         nbif = TAILQ_NEXT(bif, bif_next);
206  *     }
207  * }
208  *
209  * As mentioned above only one member interface could be unlinked from the
210  * percpu member interface list, so either bif or nbif may be not on the list,
211  * but _not_ both.  To keep the list iteration, we don't care about bif, but
212  * only nbif.  Since removed member interface will only be freed after we
213  * finish our work, it is safe to access any field in an unlinked bif (here
214  * bif_onlist).  If nbif is no longer on the list, then bif must be on the
215  * list, so we change nbif to the next element of bif and keep going.
216  */
217
218 #include "opt_inet.h"
219 #include "opt_inet6.h"
220
221 #include <sys/param.h>
222 #include <sys/mbuf.h>
223 #include <sys/malloc.h>
224 #include <sys/protosw.h>
225 #include <sys/systm.h>
226 #include <sys/time.h>
227 #include <sys/socket.h> /* for net/if.h */
228 #include <sys/sockio.h>
229 #include <sys/ctype.h>  /* string functions */
230 #include <sys/kernel.h>
231 #include <sys/random.h>
232 #include <sys/sysctl.h>
233 #include <sys/module.h>
234 #include <sys/proc.h>
235 #include <sys/priv.h>
236 #include <sys/lock.h>
237 #include <sys/thread.h>
238 #include <sys/thread2.h>
239 #include <sys/mpipe.h>
240
241 #include <net/bpf.h>
242 #include <net/if.h>
243 #include <net/if_dl.h>
244 #include <net/if_types.h>
245 #include <net/if_var.h>
246 #include <net/pfil.h>
247 #include <net/ifq_var.h>
248 #include <net/if_clone.h>
249
250 #include <netinet/in.h> /* for struct arpcom */
251 #include <netinet/in_systm.h>
252 #include <netinet/in_var.h>
253 #include <netinet/ip.h>
254 #include <netinet/ip_var.h>
255 #ifdef INET6
256 #include <netinet/ip6.h>
257 #include <netinet6/ip6_var.h>
258 #endif
259 #include <netinet/if_ether.h> /* for struct arpcom */
260 #include <net/bridge/if_bridgevar.h>
261 #include <net/if_llc.h>
262 #include <net/netmsg2.h>
263 #include <net/netisr2.h>
264
265 #include <net/route.h>
266 #include <sys/in_cksum.h>
267
268 /*
269  * Size of the route hash table.  Must be a power of two.
270  */
271 #ifndef BRIDGE_RTHASH_SIZE
272 #define BRIDGE_RTHASH_SIZE              1024
273 #endif
274
275 #define BRIDGE_RTHASH_MASK              (BRIDGE_RTHASH_SIZE - 1)
276
277 /*
278  * Maximum number of addresses to cache.
279  */
280 #ifndef BRIDGE_RTABLE_MAX
281 #define BRIDGE_RTABLE_MAX               100
282 #endif
283
284 /*
285  * Spanning tree defaults.
286  */
287 #define BSTP_DEFAULT_MAX_AGE            (20 * 256)
288 #define BSTP_DEFAULT_HELLO_TIME         (2 * 256)
289 #define BSTP_DEFAULT_FORWARD_DELAY      (15 * 256)
290 #define BSTP_DEFAULT_HOLD_TIME          (1 * 256)
291 #define BSTP_DEFAULT_BRIDGE_PRIORITY    0x8000
292 #define BSTP_DEFAULT_PORT_PRIORITY      0x80
293 #define BSTP_DEFAULT_PATH_COST          55
294
295 /*
296  * Timeout (in seconds) for entries learned dynamically.
297  */
298 #ifndef BRIDGE_RTABLE_TIMEOUT
299 #define BRIDGE_RTABLE_TIMEOUT           (20 * 60)       /* same as ARP */
300 #endif
301
302 /*
303  * Number of seconds between walks of the route list.
304  */
305 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
306 #define BRIDGE_RTABLE_PRUNE_PERIOD      (5 * 60)
307 #endif
308
309 /*
310  * List of capabilities to mask on the member interface.
311  */
312 #define BRIDGE_IFCAPS_MASK              (IFCAP_TXCSUM | IFCAP_TSO)
313
314 typedef int     (*bridge_ctl_t)(struct bridge_softc *, void *);
315
316 struct netmsg_brctl {
317         struct netmsg_base      base;
318         bridge_ctl_t            bc_func;
319         struct bridge_softc     *bc_sc;
320         void                    *bc_arg;
321 };
322
323 struct netmsg_brsaddr {
324         struct netmsg_base      base;
325         struct bridge_softc     *br_softc;
326         struct ifnet            *br_dst_if;
327         struct bridge_rtinfo    *br_rtinfo;
328         int                     br_setflags;
329         uint8_t                 br_dst[ETHER_ADDR_LEN];
330         uint8_t                 br_flags;
331 };
332
333 struct netmsg_braddbif {
334         struct netmsg_base      base;
335         struct bridge_softc     *br_softc;
336         struct bridge_ifinfo    *br_bif_info;
337         struct ifnet            *br_bif_ifp;
338 };
339
340 struct netmsg_brdelbif {
341         struct netmsg_base      base;
342         struct bridge_softc     *br_softc;
343         struct bridge_ifinfo    *br_bif_info;
344         struct bridge_iflist_head *br_bif_list;
345 };
346
347 struct netmsg_brsflags {
348         struct netmsg_base      base;
349         struct bridge_softc     *br_softc;
350         struct bridge_ifinfo    *br_bif_info;
351         uint32_t                br_bif_flags;
352 };
353
354 eventhandler_tag        bridge_detach_cookie = NULL;
355
356 extern  struct mbuf *(*bridge_input_p)(struct ifnet *, struct mbuf *);
357 extern  int (*bridge_output_p)(struct ifnet *, struct mbuf *);
358 extern  void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
359 extern  struct ifnet *(*bridge_interface_p)(void *if_bridge);
360
361 static int      bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
362
363 static int      bridge_clone_create(struct if_clone *, int, caddr_t);
364 static int      bridge_clone_destroy(struct ifnet *);
365
366 static int      bridge_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
367 static void     bridge_mutecaps(struct bridge_ifinfo *, struct ifnet *, int);
368 static void     bridge_ifdetach(void *, struct ifnet *);
369 static void     bridge_init(void *);
370 static int      bridge_from_us(struct bridge_softc *, struct ether_header *);
371 static void     bridge_stop(struct ifnet *);
372 static void     bridge_start(struct ifnet *, struct ifaltq_subque *);
373 static struct mbuf *bridge_input(struct ifnet *, struct mbuf *);
374 static int      bridge_output(struct ifnet *, struct mbuf *);
375 static struct ifnet *bridge_interface(void *if_bridge);
376
377 static void     bridge_forward(struct bridge_softc *, struct mbuf *m);
378
379 static void     bridge_timer_handler(netmsg_t);
380 static void     bridge_timer(void *);
381
382 static void     bridge_start_bcast(struct bridge_softc *, struct mbuf *);
383 static void     bridge_broadcast(struct bridge_softc *, struct ifnet *,
384                     struct mbuf *);
385 static void     bridge_span(struct bridge_softc *, struct mbuf *);
386
387 static int      bridge_rtupdate(struct bridge_softc *, const uint8_t *,
388                     struct ifnet *, uint8_t);
389 static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *);
390 static void     bridge_rtreap(struct bridge_softc *);
391 static void     bridge_rtreap_async(struct bridge_softc *);
392 static void     bridge_rttrim(struct bridge_softc *);
393 static int      bridge_rtage_finddead(struct bridge_softc *);
394 static void     bridge_rtage(struct bridge_softc *);
395 static void     bridge_rtflush(struct bridge_softc *, int);
396 static int      bridge_rtdaddr(struct bridge_softc *, const uint8_t *);
397 static int      bridge_rtsaddr(struct bridge_softc *, const uint8_t *,
398                     struct ifnet *, uint8_t);
399 static void     bridge_rtmsg_sync(struct bridge_softc *sc);
400 static void     bridge_rtreap_handler(netmsg_t);
401 static void     bridge_rtinstall_handler(netmsg_t);
402 static int      bridge_rtinstall_oncpu(struct bridge_softc *, const uint8_t *,
403                     struct ifnet *, int, uint8_t, struct bridge_rtinfo **);
404
405 static void     bridge_rtable_init(struct bridge_softc *);
406 static void     bridge_rtable_fini(struct bridge_softc *);
407
408 static int      bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
409 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
410                     const uint8_t *);
411 static void     bridge_rtnode_insert(struct bridge_softc *,
412                     struct bridge_rtnode *);
413 static void     bridge_rtnode_destroy(struct bridge_softc *,
414                     struct bridge_rtnode *);
415
416 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
417                     const char *name);
418 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
419                     struct ifnet *ifp);
420 static struct bridge_iflist *bridge_lookup_member_ifinfo(struct bridge_softc *,
421                     struct bridge_ifinfo *);
422 static void     bridge_delete_member(struct bridge_softc *,
423                     struct bridge_iflist *, int);
424 static void     bridge_delete_span(struct bridge_softc *,
425                     struct bridge_iflist *);
426
427 static int      bridge_control(struct bridge_softc *, u_long,
428                                bridge_ctl_t, void *);
429 static int      bridge_ioctl_init(struct bridge_softc *, void *);
430 static int      bridge_ioctl_stop(struct bridge_softc *, void *);
431 static int      bridge_ioctl_add(struct bridge_softc *, void *);
432 static int      bridge_ioctl_del(struct bridge_softc *, void *);
433 static void     bridge_ioctl_fillflags(struct bridge_softc *sc,
434                                 struct bridge_iflist *bif, struct ifbreq *req);
435 static int      bridge_ioctl_gifflags(struct bridge_softc *, void *);
436 static int      bridge_ioctl_sifflags(struct bridge_softc *, void *);
437 static int      bridge_ioctl_scache(struct bridge_softc *, void *);
438 static int      bridge_ioctl_gcache(struct bridge_softc *, void *);
439 static int      bridge_ioctl_gifs(struct bridge_softc *, void *);
440 static int      bridge_ioctl_rts(struct bridge_softc *, void *);
441 static int      bridge_ioctl_saddr(struct bridge_softc *, void *);
442 static int      bridge_ioctl_sto(struct bridge_softc *, void *);
443 static int      bridge_ioctl_gto(struct bridge_softc *, void *);
444 static int      bridge_ioctl_daddr(struct bridge_softc *, void *);
445 static int      bridge_ioctl_flush(struct bridge_softc *, void *);
446 static int      bridge_ioctl_gpri(struct bridge_softc *, void *);
447 static int      bridge_ioctl_spri(struct bridge_softc *, void *);
448 static int      bridge_ioctl_reinit(struct bridge_softc *, void *);
449 static int      bridge_ioctl_ght(struct bridge_softc *, void *);
450 static int      bridge_ioctl_sht(struct bridge_softc *, void *);
451 static int      bridge_ioctl_gfd(struct bridge_softc *, void *);
452 static int      bridge_ioctl_sfd(struct bridge_softc *, void *);
453 static int      bridge_ioctl_gma(struct bridge_softc *, void *);
454 static int      bridge_ioctl_sma(struct bridge_softc *, void *);
455 static int      bridge_ioctl_sifprio(struct bridge_softc *, void *);
456 static int      bridge_ioctl_sifcost(struct bridge_softc *, void *);
457 static int      bridge_ioctl_addspan(struct bridge_softc *, void *);
458 static int      bridge_ioctl_delspan(struct bridge_softc *, void *);
459 static int      bridge_ioctl_sifbondwght(struct bridge_softc *, void *);
460 static int      bridge_pfil(struct mbuf **, struct ifnet *, struct ifnet *,
461                     int);
462 static int      bridge_ip_checkbasic(struct mbuf **mp);
463 #ifdef INET6
464 static int      bridge_ip6_checkbasic(struct mbuf **mp);
465 #endif /* INET6 */
466 static int      bridge_fragment(struct ifnet *, struct mbuf *,
467                     struct ether_header *, int, struct llc *);
468 static void     bridge_enqueue_handler(netmsg_t);
469 static void     bridge_handoff(struct bridge_softc *, struct ifnet *,
470                     struct mbuf *, int);
471
472 static void     bridge_del_bif_handler(netmsg_t);
473 static void     bridge_add_bif_handler(netmsg_t);
474 static void     bridge_del_bif(struct bridge_softc *, struct bridge_ifinfo *,
475                     struct bridge_iflist_head *);
476 static void     bridge_add_bif(struct bridge_softc *, struct bridge_ifinfo *,
477                     struct ifnet *);
478
479 SYSCTL_DECL(_net_link);
480 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW, 0, "Bridge");
481
482 static int pfil_onlyip = 1; /* only pass IP[46] packets when pfil is enabled */
483 static int pfil_bridge = 1; /* run pfil hooks on the bridge interface */
484 static int pfil_member = 1; /* run pfil hooks on the member interface */
485 static int bridge_debug;
486 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip, CTLFLAG_RW,
487     &pfil_onlyip, 0, "Only pass IP packets when pfil is enabled");
488 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge, CTLFLAG_RW,
489     &pfil_bridge, 0, "Packet filter on the bridge interface");
490 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member, CTLFLAG_RW,
491     &pfil_member, 0, "Packet filter on the member interface");
492 SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW,
493     &bridge_debug, 0, "Bridge debug mode");
494
495 struct bridge_control_arg {
496         union {
497                 struct ifbreq ifbreq;
498                 struct ifbifconf ifbifconf;
499                 struct ifbareq ifbareq;
500                 struct ifbaconf ifbaconf;
501                 struct ifbrparam ifbrparam;
502         } bca_u;
503         int     bca_len;
504         void    *bca_uptr;
505         void    *bca_kptr;
506 };
507
508 struct bridge_control {
509         bridge_ctl_t    bc_func;
510         int             bc_argsize;
511         int             bc_flags;
512 };
513
514 #define BC_F_COPYIN             0x01    /* copy arguments in */
515 #define BC_F_COPYOUT            0x02    /* copy arguments out */
516 #define BC_F_SUSER              0x04    /* do super-user check */
517
518 const struct bridge_control bridge_control_table[] = {
519         { bridge_ioctl_add,             sizeof(struct ifbreq),
520           BC_F_COPYIN|BC_F_SUSER },
521         { bridge_ioctl_del,             sizeof(struct ifbreq),
522           BC_F_COPYIN|BC_F_SUSER },
523
524         { bridge_ioctl_gifflags,        sizeof(struct ifbreq),
525           BC_F_COPYIN|BC_F_COPYOUT },
526         { bridge_ioctl_sifflags,        sizeof(struct ifbreq),
527           BC_F_COPYIN|BC_F_SUSER },
528
529         { bridge_ioctl_scache,          sizeof(struct ifbrparam),
530           BC_F_COPYIN|BC_F_SUSER },
531         { bridge_ioctl_gcache,          sizeof(struct ifbrparam),
532           BC_F_COPYOUT },
533
534         { bridge_ioctl_gifs,            sizeof(struct ifbifconf),
535           BC_F_COPYIN|BC_F_COPYOUT },
536         { bridge_ioctl_rts,             sizeof(struct ifbaconf),
537           BC_F_COPYIN|BC_F_COPYOUT },
538
539         { bridge_ioctl_saddr,           sizeof(struct ifbareq),
540           BC_F_COPYIN|BC_F_SUSER },
541
542         { bridge_ioctl_sto,             sizeof(struct ifbrparam),
543           BC_F_COPYIN|BC_F_SUSER },
544         { bridge_ioctl_gto,             sizeof(struct ifbrparam),
545           BC_F_COPYOUT },
546
547         { bridge_ioctl_daddr,           sizeof(struct ifbareq),
548           BC_F_COPYIN|BC_F_SUSER },
549
550         { bridge_ioctl_flush,           sizeof(struct ifbreq),
551           BC_F_COPYIN|BC_F_SUSER },
552
553         { bridge_ioctl_gpri,            sizeof(struct ifbrparam),
554           BC_F_COPYOUT },
555         { bridge_ioctl_spri,            sizeof(struct ifbrparam),
556           BC_F_COPYIN|BC_F_SUSER },
557
558         { bridge_ioctl_ght,             sizeof(struct ifbrparam),
559           BC_F_COPYOUT },
560         { bridge_ioctl_sht,             sizeof(struct ifbrparam),
561           BC_F_COPYIN|BC_F_SUSER },
562
563         { bridge_ioctl_gfd,             sizeof(struct ifbrparam),
564           BC_F_COPYOUT },
565         { bridge_ioctl_sfd,             sizeof(struct ifbrparam),
566           BC_F_COPYIN|BC_F_SUSER },
567
568         { bridge_ioctl_gma,             sizeof(struct ifbrparam),
569           BC_F_COPYOUT },
570         { bridge_ioctl_sma,             sizeof(struct ifbrparam),
571           BC_F_COPYIN|BC_F_SUSER },
572
573         { bridge_ioctl_sifprio,         sizeof(struct ifbreq),
574           BC_F_COPYIN|BC_F_SUSER },
575
576         { bridge_ioctl_sifcost,         sizeof(struct ifbreq),
577           BC_F_COPYIN|BC_F_SUSER },
578
579         { bridge_ioctl_addspan,         sizeof(struct ifbreq),
580           BC_F_COPYIN|BC_F_SUSER },
581         { bridge_ioctl_delspan,         sizeof(struct ifbreq),
582           BC_F_COPYIN|BC_F_SUSER },
583
584         { bridge_ioctl_sifbondwght,     sizeof(struct ifbreq),
585           BC_F_COPYIN|BC_F_SUSER },
586
587 };
588 static const int bridge_control_table_size = NELEM(bridge_control_table);
589
590 LIST_HEAD(, bridge_softc) bridge_list;
591
592 struct if_clone bridge_cloner = IF_CLONE_INITIALIZER("bridge",
593                                 bridge_clone_create,
594                                 bridge_clone_destroy, 0, IF_MAXUNIT);
595
596 static int
597 bridge_modevent(module_t mod, int type, void *data)
598 {
599         switch (type) {
600         case MOD_LOAD:
601                 LIST_INIT(&bridge_list);
602                 if_clone_attach(&bridge_cloner);
603                 bridge_input_p = bridge_input;
604                 bridge_output_p = bridge_output;
605                 bridge_interface_p = bridge_interface;
606                 bridge_detach_cookie = EVENTHANDLER_REGISTER(
607                     ifnet_detach_event, bridge_ifdetach, NULL,
608                     EVENTHANDLER_PRI_ANY);
609 #if 0 /* notyet */
610                 bstp_linkstate_p = bstp_linkstate;
611 #endif
612                 break;
613         case MOD_UNLOAD:
614                 if (!LIST_EMPTY(&bridge_list))
615                         return (EBUSY);
616                 EVENTHANDLER_DEREGISTER(ifnet_detach_event,
617                     bridge_detach_cookie);
618                 if_clone_detach(&bridge_cloner);
619                 bridge_input_p = NULL;
620                 bridge_output_p = NULL;
621                 bridge_interface_p = NULL;
622 #if 0 /* notyet */
623                 bstp_linkstate_p = NULL;
624 #endif
625                 break;
626         default:
627                 return (EOPNOTSUPP);
628         }
629         return (0);
630 }
631
632 static moduledata_t bridge_mod = {
633         "if_bridge",
634         bridge_modevent,
635         0
636 };
637
638 DECLARE_MODULE(if_bridge, bridge_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
639
640
641 /*
642  * bridge_clone_create:
643  *
644  *      Create a new bridge instance.
645  */
646 static int
647 bridge_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused)
648 {
649         struct bridge_softc *sc;
650         struct ifnet *ifp;
651         u_char eaddr[6];
652         int cpu, rnd;
653
654         sc = kmalloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
655         ifp = sc->sc_ifp = &sc->sc_if;
656
657         sc->sc_brtmax = BRIDGE_RTABLE_MAX;
658         sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
659         sc->sc_bridge_max_age = BSTP_DEFAULT_MAX_AGE;
660         sc->sc_bridge_hello_time = BSTP_DEFAULT_HELLO_TIME;
661         sc->sc_bridge_forward_delay = BSTP_DEFAULT_FORWARD_DELAY;
662         sc->sc_bridge_priority = BSTP_DEFAULT_BRIDGE_PRIORITY;
663         sc->sc_hold_time = BSTP_DEFAULT_HOLD_TIME;
664
665         /* Initialize our routing table. */
666         bridge_rtable_init(sc);
667
668         callout_init(&sc->sc_brcallout);
669         netmsg_init(&sc->sc_brtimemsg, NULL, &netisr_adone_rport,
670                     MSGF_DROPABLE, bridge_timer_handler);
671         sc->sc_brtimemsg.lmsg.u.ms_resultp = sc;
672
673         callout_init(&sc->sc_bstpcallout);
674         netmsg_init(&sc->sc_bstptimemsg, NULL, &netisr_adone_rport,
675                     MSGF_DROPABLE, bstp_tick_handler);
676         sc->sc_bstptimemsg.lmsg.u.ms_resultp = sc;
677
678         /* Initialize per-cpu member iface lists */
679         sc->sc_iflists = kmalloc(sizeof(*sc->sc_iflists) * ncpus,
680                                  M_DEVBUF, M_WAITOK);
681         for (cpu = 0; cpu < ncpus; ++cpu)
682                 TAILQ_INIT(&sc->sc_iflists[cpu]);
683
684         TAILQ_INIT(&sc->sc_spanlist);
685
686         ifp->if_softc = sc;
687         if_initname(ifp, ifc->ifc_name, unit);
688         ifp->if_mtu = ETHERMTU;
689         ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST;
690         ifp->if_ioctl = bridge_ioctl;
691         ifp->if_start = bridge_start;
692         ifp->if_init = bridge_init;
693         ifp->if_type = IFT_ETHER;
694         ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
695         ifq_set_ready(&ifp->if_snd);
696         ifp->if_hdrlen = ETHER_HDR_LEN;
697
698         /*
699          * Generate a random ethernet address and use the private AC:DE:48
700          * OUI code.
701          */
702         rnd = karc4random();
703         bcopy(&rnd, &eaddr[0], 4); /* ETHER_ADDR_LEN == 6 */
704         rnd = karc4random();
705         bcopy(&rnd, &eaddr[2], 4); /* ETHER_ADDR_LEN == 6 */
706
707         eaddr[0] &= ~1; /* clear multicast bit */
708         eaddr[0] |= 2;  /* set the LAA bit */
709
710         ether_ifattach(ifp, eaddr, NULL);
711         /* Now undo some of the damage... */
712         ifp->if_baudrate = 0;
713         /*ifp->if_type = IFT_BRIDGE;*/
714
715         crit_enter();   /* XXX MP */
716         LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
717         crit_exit();
718
719         return (0);
720 }
721
722 static void
723 bridge_delete_dispatch(netmsg_t msg)
724 {
725         struct bridge_softc *sc = msg->lmsg.u.ms_resultp;
726         struct ifnet *bifp = sc->sc_ifp;
727         struct bridge_iflist *bif;
728
729         ifnet_serialize_all(bifp);
730
731         while ((bif = TAILQ_FIRST(&sc->sc_iflists[mycpuid])) != NULL)
732                 bridge_delete_member(sc, bif, 0);
733
734         while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL)
735                 bridge_delete_span(sc, bif);
736
737         ifnet_deserialize_all(bifp);
738
739         lwkt_replymsg(&msg->lmsg, 0);
740 }
741
742 /*
743  * bridge_clone_destroy:
744  *
745  *      Destroy a bridge instance.
746  */
747 static int
748 bridge_clone_destroy(struct ifnet *ifp)
749 {
750         struct bridge_softc *sc = ifp->if_softc;
751         struct netmsg_base msg;
752
753         ifnet_serialize_all(ifp);
754
755         bridge_stop(ifp);
756         ifp->if_flags &= ~IFF_UP;
757
758         ifnet_deserialize_all(ifp);
759
760         netmsg_init(&msg, NULL, &curthread->td_msgport,
761                     0, bridge_delete_dispatch);
762         msg.lmsg.u.ms_resultp = sc;
763         lwkt_domsg(BRIDGE_CFGPORT, &msg.lmsg, 0);
764
765         crit_enter();   /* XXX MP */
766         LIST_REMOVE(sc, sc_list);
767         crit_exit();
768
769         ether_ifdetach(ifp);
770
771         /* Tear down the routing table. */
772         bridge_rtable_fini(sc);
773
774         /* Free per-cpu member iface lists */
775         kfree(sc->sc_iflists, M_DEVBUF);
776
777         kfree(sc, M_DEVBUF);
778
779         return 0;
780 }
781
782 /*
783  * bridge_ioctl:
784  *
785  *      Handle a control request from the operator.
786  */
787 static int
788 bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
789 {
790         struct bridge_softc *sc = ifp->if_softc;
791         struct bridge_control_arg args;
792         struct ifdrv *ifd = (struct ifdrv *) data;
793         const struct bridge_control *bc;
794         int error = 0;
795
796         ASSERT_IFNET_SERIALIZED_ALL(ifp);
797
798         switch (cmd) {
799         case SIOCADDMULTI:
800         case SIOCDELMULTI:
801                 break;
802
803         case SIOCGDRVSPEC:
804         case SIOCSDRVSPEC:
805                 if (ifd->ifd_cmd >= bridge_control_table_size) {
806                         error = EINVAL;
807                         break;
808                 }
809                 bc = &bridge_control_table[ifd->ifd_cmd];
810
811                 if (cmd == SIOCGDRVSPEC &&
812                     (bc->bc_flags & BC_F_COPYOUT) == 0) {
813                         error = EINVAL;
814                         break;
815                 } else if (cmd == SIOCSDRVSPEC &&
816                            (bc->bc_flags & BC_F_COPYOUT)) {
817                         error = EINVAL;
818                         break;
819                 }
820
821                 if (bc->bc_flags & BC_F_SUSER) {
822                         error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY);
823                         if (error)
824                                 break;
825                 }
826
827                 if (ifd->ifd_len != bc->bc_argsize ||
828                     ifd->ifd_len > sizeof(args.bca_u)) {
829                         error = EINVAL;
830                         break;
831                 }
832
833                 memset(&args, 0, sizeof(args));
834                 if (bc->bc_flags & BC_F_COPYIN) {
835                         error = copyin(ifd->ifd_data, &args.bca_u,
836                                        ifd->ifd_len);
837                         if (error)
838                                 break;
839                 }
840
841                 error = bridge_control(sc, cmd, bc->bc_func, &args);
842                 if (error) {
843                         KKASSERT(args.bca_len == 0 && args.bca_kptr == NULL);
844                         break;
845                 }
846
847                 if (bc->bc_flags & BC_F_COPYOUT) {
848                         error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
849                         if (args.bca_len != 0) {
850                                 KKASSERT(args.bca_kptr != NULL);
851                                 if (!error) {
852                                         error = copyout(args.bca_kptr,
853                                                 args.bca_uptr, args.bca_len);
854                                 }
855                                 kfree(args.bca_kptr, M_TEMP);
856                         } else {
857                                 KKASSERT(args.bca_kptr == NULL);
858                         }
859                 } else {
860                         KKASSERT(args.bca_len == 0 && args.bca_kptr == NULL);
861                 }
862                 break;
863
864         case SIOCSIFFLAGS:
865                 if (!(ifp->if_flags & IFF_UP) &&
866                     (ifp->if_flags & IFF_RUNNING)) {
867                         /*
868                          * If interface is marked down and it is running,
869                          * then stop it.
870                          */
871                         bridge_stop(ifp);
872                 } else if ((ifp->if_flags & IFF_UP) &&
873                     !(ifp->if_flags & IFF_RUNNING)) {
874                         /*
875                          * If interface is marked up and it is stopped, then
876                          * start it.
877                          */
878                         ifp->if_init(sc);
879                 }
880
881                 /*
882                  * If running and link flag state change we have to
883                  * reinitialize as well.
884                  */
885                 if ((ifp->if_flags & IFF_RUNNING) &&
886                     (ifp->if_flags & (IFF_LINK0|IFF_LINK1|IFF_LINK2)) !=
887                     sc->sc_copy_flags) {
888                         sc->sc_copy_flags = ifp->if_flags &
889                                         (IFF_LINK0|IFF_LINK1|IFF_LINK2);
890                         bridge_control(sc, 0, bridge_ioctl_reinit, NULL);
891                 }
892
893                 break;
894
895         case SIOCSIFMTU:
896                 /* Do not allow the MTU to be changed on the bridge */
897                 error = EINVAL;
898                 break;
899
900         default:
901                 error = ether_ioctl(ifp, cmd, data);
902                 break;
903         }
904         return (error);
905 }
906
907 /*
908  * bridge_mutecaps:
909  *
910  *      Clear or restore unwanted capabilities on the member interface
911  */
912 static void
913 bridge_mutecaps(struct bridge_ifinfo *bif_info, struct ifnet *ifp, int mute)
914 {
915         struct ifreq ifr;
916
917         if (ifp->if_ioctl == NULL)
918                 return;
919
920         bzero(&ifr, sizeof(ifr));
921         ifr.ifr_reqcap = ifp->if_capenable;
922
923         if (mute) {
924                 /* mask off and save capabilities */
925                 bif_info->bifi_mutecap = ifr.ifr_reqcap & BRIDGE_IFCAPS_MASK;
926                 if (bif_info->bifi_mutecap != 0)
927                         ifr.ifr_reqcap &= ~BRIDGE_IFCAPS_MASK;
928         } else {
929                 /* restore muted capabilities */
930                 ifr.ifr_reqcap |= bif_info->bifi_mutecap;
931         }
932
933         if (bif_info->bifi_mutecap != 0) {
934                 ifnet_serialize_all(ifp);
935                 ifp->if_ioctl(ifp, SIOCSIFCAP, (caddr_t)&ifr, NULL);
936                 ifnet_deserialize_all(ifp);
937         }
938 }
939
940 /*
941  * bridge_lookup_member:
942  *
943  *      Lookup a bridge member interface.
944  */
945 static struct bridge_iflist *
946 bridge_lookup_member(struct bridge_softc *sc, const char *name)
947 {
948         struct bridge_iflist *bif;
949
950         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
951                 if (strcmp(bif->bif_ifp->if_xname, name) == 0)
952                         return (bif);
953         }
954         return (NULL);
955 }
956
957 /*
958  * bridge_lookup_member_if:
959  *
960  *      Lookup a bridge member interface by ifnet*.
961  */
962 static struct bridge_iflist *
963 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
964 {
965         struct bridge_iflist *bif;
966
967         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
968                 if (bif->bif_ifp == member_ifp)
969                         return (bif);
970         }
971         return (NULL);
972 }
973
974 /*
975  * bridge_lookup_member_ifinfo:
976  *
977  *      Lookup a bridge member interface by bridge_ifinfo.
978  */
979 static struct bridge_iflist *
980 bridge_lookup_member_ifinfo(struct bridge_softc *sc,
981                             struct bridge_ifinfo *bif_info)
982 {
983         struct bridge_iflist *bif;
984
985         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
986                 if (bif->bif_info == bif_info)
987                         return (bif);
988         }
989         return (NULL);
990 }
991
992 /*
993  * bridge_delete_member:
994  *
995  *      Delete the specified member interface.
996  */
997 static void
998 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
999     int gone)
1000 {
1001         struct ifnet *ifs = bif->bif_ifp;
1002         struct ifnet *bifp = sc->sc_ifp;
1003         struct bridge_ifinfo *bif_info = bif->bif_info;
1004         struct bridge_iflist_head saved_bifs;
1005
1006         ASSERT_IFNET_SERIALIZED_ALL(bifp);
1007         KKASSERT(bif_info != NULL);
1008
1009         ifs->if_bridge = NULL;
1010
1011         /*
1012          * Release bridge interface's serializer:
1013          * - To avoid possible dead lock.
1014          * - Various sync operation will block the current thread.
1015          */
1016         ifnet_deserialize_all(bifp);
1017
1018         if (!gone) {
1019                 switch (ifs->if_type) {
1020                 case IFT_ETHER:
1021                 case IFT_L2VLAN:
1022                         /*
1023                          * Take the interface out of promiscuous mode.
1024                          */
1025                         ifpromisc(ifs, 0);
1026                         bridge_mutecaps(bif_info, ifs, 0);
1027                         break;
1028
1029                 case IFT_GIF:
1030                         break;
1031
1032                 default:
1033                         panic("bridge_delete_member: impossible");
1034                         break;
1035                 }
1036         }
1037
1038         /*
1039          * Remove bifs from percpu linked list.
1040          *
1041          * Removed bifs are not freed immediately, instead,
1042          * they are saved in saved_bifs.  They will be freed
1043          * after we make sure that no one is accessing them,
1044          * i.e. after following netmsg_service_sync()
1045          */
1046         TAILQ_INIT(&saved_bifs);
1047         bridge_del_bif(sc, bif_info, &saved_bifs);
1048
1049         /*
1050          * Make sure that all protocol threads:
1051          * o  see 'ifs' if_bridge is changed
1052          * o  know that bif is removed from the percpu linked list
1053          */
1054         netmsg_service_sync();
1055
1056         /*
1057          * Free the removed bifs
1058          */
1059         KKASSERT(!TAILQ_EMPTY(&saved_bifs));
1060         while ((bif = TAILQ_FIRST(&saved_bifs)) != NULL) {
1061                 TAILQ_REMOVE(&saved_bifs, bif, bif_next);
1062                 kfree(bif, M_DEVBUF);
1063         }
1064
1065         /* See the comment in bridge_ioctl_stop() */
1066         bridge_rtmsg_sync(sc);
1067         bridge_rtdelete(sc, ifs, IFBF_FLUSHALL | IFBF_FLUSHSYNC);
1068
1069         ifnet_serialize_all(bifp);
1070
1071         if (bifp->if_flags & IFF_RUNNING)
1072                 bstp_initialization(sc);
1073
1074         /*
1075          * Free the bif_info after bstp_initialization(), so that
1076          * bridge_softc.sc_root_port will not reference a dangling
1077          * pointer.
1078          */
1079         kfree(bif_info, M_DEVBUF);
1080 }
1081
1082 /*
1083  * bridge_delete_span:
1084  *
1085  *      Delete the specified span interface.
1086  */
1087 static void
1088 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
1089 {
1090         KASSERT(bif->bif_ifp->if_bridge == NULL,
1091             ("%s: not a span interface", __func__));
1092
1093         TAILQ_REMOVE(&sc->sc_iflists[mycpuid], bif, bif_next);
1094         kfree(bif, M_DEVBUF);
1095 }
1096
1097 static int
1098 bridge_ioctl_init(struct bridge_softc *sc, void *arg __unused)
1099 {
1100         struct ifnet *ifp = sc->sc_ifp;
1101
1102         if (ifp->if_flags & IFF_RUNNING)
1103                 return 0;
1104
1105         callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz,
1106             bridge_timer, sc);
1107
1108         ifp->if_flags |= IFF_RUNNING;
1109         bstp_initialization(sc);
1110         return 0;
1111 }
1112
1113 static int
1114 bridge_ioctl_stop(struct bridge_softc *sc, void *arg __unused)
1115 {
1116         struct ifnet *ifp = sc->sc_ifp;
1117
1118         if ((ifp->if_flags & IFF_RUNNING) == 0)
1119                 return 0;
1120
1121         callout_stop(&sc->sc_brcallout);
1122
1123         crit_enter();
1124         lwkt_dropmsg(&sc->sc_brtimemsg.lmsg);
1125         crit_exit();
1126
1127         bstp_stop(sc);
1128
1129         ifp->if_flags &= ~IFF_RUNNING;
1130
1131         ifnet_deserialize_all(ifp);
1132
1133         /* Let everyone know that we are stopped */
1134         netmsg_service_sync();
1135
1136         /*
1137          * Sync ifnetX msgports in the order we forward rtnode
1138          * installation message.  This is used to make sure that
1139          * all rtnode installation messages sent by bridge_rtupdate()
1140          * during above netmsg_service_sync() are flushed.
1141          */
1142         bridge_rtmsg_sync(sc);
1143         bridge_rtflush(sc, IFBF_FLUSHDYN | IFBF_FLUSHSYNC);
1144
1145         ifnet_serialize_all(ifp);
1146         return 0;
1147 }
1148
1149 static int
1150 bridge_ioctl_add(struct bridge_softc *sc, void *arg)
1151 {
1152         struct ifbreq *req = arg;
1153         struct bridge_iflist *bif;
1154         struct bridge_ifinfo *bif_info;
1155         struct ifnet *ifs, *bifp;
1156         int error = 0;
1157
1158         bifp = sc->sc_ifp;
1159         ASSERT_IFNET_SERIALIZED_ALL(bifp);
1160
1161         ifs = ifunit(req->ifbr_ifsname);
1162         if (ifs == NULL)
1163                 return (ENOENT);
1164
1165         /* If it's in the span list, it can't be a member. */
1166         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1167                 if (ifs == bif->bif_ifp)
1168                         return (EBUSY);
1169
1170         /* Allow the first Ethernet member to define the MTU */
1171         if (ifs->if_type != IFT_GIF) {
1172                 if (TAILQ_EMPTY(&sc->sc_iflists[mycpuid])) {
1173                         bifp->if_mtu = ifs->if_mtu;
1174                 } else if (bifp->if_mtu != ifs->if_mtu) {
1175                         if_printf(bifp, "invalid MTU for %s\n", ifs->if_xname);
1176                         return (EINVAL);
1177                 }
1178         }
1179
1180         if (ifs->if_bridge == sc)
1181                 return (EEXIST);
1182
1183         if (ifs->if_bridge != NULL)
1184                 return (EBUSY);
1185
1186         bif_info = kmalloc(sizeof(*bif_info), M_DEVBUF, M_WAITOK | M_ZERO);
1187         bif_info->bifi_priority = BSTP_DEFAULT_PORT_PRIORITY;
1188         bif_info->bifi_path_cost = BSTP_DEFAULT_PATH_COST;
1189         bif_info->bifi_ifp = ifs;
1190         bif_info->bifi_bond_weight = 1;
1191
1192         /*
1193          * Release bridge interface's serializer:
1194          * - To avoid possible dead lock.
1195          * - Various sync operation will block the current thread.
1196          */
1197         ifnet_deserialize_all(bifp);
1198
1199         switch (ifs->if_type) {
1200         case IFT_ETHER:
1201         case IFT_L2VLAN:
1202                 /*
1203                  * Place the interface into promiscuous mode.
1204                  */
1205                 error = ifpromisc(ifs, 1);
1206                 if (error) {
1207                         ifnet_serialize_all(bifp);
1208                         goto out;
1209                 }
1210                 bridge_mutecaps(bif_info, ifs, 1);
1211                 break;
1212
1213         case IFT_GIF: /* :^) */
1214                 break;
1215
1216         default:
1217                 error = EINVAL;
1218                 ifnet_serialize_all(bifp);
1219                 goto out;
1220         }
1221
1222         /*
1223          * Add bifs to percpu linked lists
1224          */
1225         bridge_add_bif(sc, bif_info, ifs);
1226
1227         ifnet_serialize_all(bifp);
1228
1229         if (bifp->if_flags & IFF_RUNNING)
1230                 bstp_initialization(sc);
1231         else
1232                 bstp_stop(sc);
1233
1234         /*
1235          * Everything has been setup, so let the member interface
1236          * deliver packets to this bridge on its input/output path.
1237          */
1238         ifs->if_bridge = sc;
1239 out:
1240         if (error) {
1241                 if (bif_info != NULL)
1242                         kfree(bif_info, M_DEVBUF);
1243         }
1244         return (error);
1245 }
1246
1247 static int
1248 bridge_ioctl_del(struct bridge_softc *sc, void *arg)
1249 {
1250         struct ifbreq *req = arg;
1251         struct bridge_iflist *bif;
1252
1253         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1254         if (bif == NULL)
1255                 return (ENOENT);
1256
1257         bridge_delete_member(sc, bif, 0);
1258
1259         return (0);
1260 }
1261
1262 static int
1263 bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
1264 {
1265         struct ifbreq *req = arg;
1266         struct bridge_iflist *bif;
1267
1268         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1269         if (bif == NULL)
1270                 return (ENOENT);
1271         bridge_ioctl_fillflags(sc, bif, req);
1272         return (0);
1273 }
1274
1275 static void
1276 bridge_ioctl_fillflags(struct bridge_softc *sc, struct bridge_iflist *bif,
1277                        struct ifbreq *req)
1278 {
1279         req->ifbr_ifsflags = bif->bif_flags;
1280         req->ifbr_state = bif->bif_state;
1281         req->ifbr_priority = bif->bif_priority;
1282         req->ifbr_path_cost = bif->bif_path_cost;
1283         req->ifbr_bond_weight = bif->bif_bond_weight;
1284         req->ifbr_portno = bif->bif_ifp->if_index & 0xff;
1285         if (bif->bif_flags & IFBIF_STP) {
1286                 req->ifbr_peer_root = bif->bif_peer_root;
1287                 req->ifbr_peer_bridge = bif->bif_peer_bridge;
1288                 req->ifbr_peer_cost = bif->bif_peer_cost;
1289                 req->ifbr_peer_port = bif->bif_peer_port;
1290                 if (bstp_supersedes_port_info(sc, bif)) {
1291                         req->ifbr_designated_root = bif->bif_peer_root;
1292                         req->ifbr_designated_bridge = bif->bif_peer_bridge;
1293                         req->ifbr_designated_cost = bif->bif_peer_cost;
1294                         req->ifbr_designated_port = bif->bif_peer_port;
1295                 } else {
1296                         req->ifbr_designated_root = sc->sc_bridge_id;
1297                         req->ifbr_designated_bridge = sc->sc_bridge_id;
1298                         req->ifbr_designated_cost = bif->bif_path_cost +
1299                                                     bif->bif_peer_cost;
1300                         req->ifbr_designated_port = bif->bif_port_id;
1301                 }
1302         } else {
1303                 req->ifbr_peer_root = 0;
1304                 req->ifbr_peer_bridge = 0;
1305                 req->ifbr_peer_cost = 0;
1306                 req->ifbr_peer_port = 0;
1307                 req->ifbr_designated_root = 0;
1308                 req->ifbr_designated_bridge = 0;
1309                 req->ifbr_designated_cost = 0;
1310                 req->ifbr_designated_port = 0;
1311         }
1312 }
1313
1314 static int
1315 bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
1316 {
1317         struct ifbreq *req = arg;
1318         struct bridge_iflist *bif;
1319         struct ifnet *bifp = sc->sc_ifp;
1320
1321         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1322         if (bif == NULL)
1323                 return (ENOENT);
1324
1325         if (req->ifbr_ifsflags & IFBIF_SPAN) {
1326                 /* SPAN is readonly */
1327                 return (EINVAL);
1328         }
1329
1330         if (req->ifbr_ifsflags & IFBIF_STP) {
1331                 switch (bif->bif_ifp->if_type) {
1332                 case IFT_ETHER:
1333                         /* These can do spanning tree. */
1334                         break;
1335
1336                 default:
1337                         /* Nothing else can. */
1338                         return (EINVAL);
1339                 }
1340         }
1341
1342         bif->bif_flags = (bif->bif_flags & IFBIF_KEEPMASK) |
1343                          (req->ifbr_ifsflags & ~IFBIF_KEEPMASK);
1344         if (bifp->if_flags & IFF_RUNNING)
1345                 bstp_initialization(sc);
1346
1347         return (0);
1348 }
1349
1350 static int
1351 bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
1352 {
1353         struct ifbrparam *param = arg;
1354         struct ifnet *ifp = sc->sc_ifp;
1355
1356         sc->sc_brtmax = param->ifbrp_csize;
1357
1358         ifnet_deserialize_all(ifp);
1359         bridge_rttrim(sc);
1360         ifnet_serialize_all(ifp);
1361
1362         return (0);
1363 }
1364
1365 static int
1366 bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
1367 {
1368         struct ifbrparam *param = arg;
1369
1370         param->ifbrp_csize = sc->sc_brtmax;
1371
1372         return (0);
1373 }
1374
1375 static int
1376 bridge_ioctl_gifs(struct bridge_softc *sc, void *arg)
1377 {
1378         struct bridge_control_arg *bc_arg = arg;
1379         struct ifbifconf *bifc = arg;
1380         struct bridge_iflist *bif;
1381         struct ifbreq *breq;
1382         int count, len;
1383
1384         count = 0;
1385         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next)
1386                 count++;
1387         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1388                 count++;
1389
1390         if (bifc->ifbic_len == 0) {
1391                 bifc->ifbic_len = sizeof(*breq) * count;
1392                 return 0;
1393         } else if (count == 0 || bifc->ifbic_len < sizeof(*breq)) {
1394                 bifc->ifbic_len = 0;
1395                 return 0;
1396         }
1397
1398         len = min(bifc->ifbic_len, sizeof(*breq) * count);
1399         KKASSERT(len >= sizeof(*breq));
1400
1401         breq = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO);
1402         if (breq == NULL) {
1403                 bifc->ifbic_len = 0;
1404                 return ENOMEM;
1405         }
1406         bc_arg->bca_kptr = breq;
1407
1408         count = 0;
1409         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
1410                 if (len < sizeof(*breq))
1411                         break;
1412
1413                 strlcpy(breq->ifbr_ifsname, bif->bif_ifp->if_xname,
1414                         sizeof(breq->ifbr_ifsname));
1415                 bridge_ioctl_fillflags(sc, bif, breq);
1416                 breq++;
1417                 count++;
1418                 len -= sizeof(*breq);
1419         }
1420         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
1421                 if (len < sizeof(*breq))
1422                         break;
1423
1424                 strlcpy(breq->ifbr_ifsname, bif->bif_ifp->if_xname,
1425                         sizeof(breq->ifbr_ifsname));
1426                 breq->ifbr_ifsflags = bif->bif_flags;
1427                 breq->ifbr_portno = bif->bif_ifp->if_index & 0xff;
1428                 breq++;
1429                 count++;
1430                 len -= sizeof(*breq);
1431         }
1432
1433         bifc->ifbic_len = sizeof(*breq) * count;
1434         KKASSERT(bifc->ifbic_len > 0);
1435
1436         bc_arg->bca_len = bifc->ifbic_len;
1437         bc_arg->bca_uptr = bifc->ifbic_req;
1438         return 0;
1439 }
1440
1441 static int
1442 bridge_ioctl_rts(struct bridge_softc *sc, void *arg)
1443 {
1444         struct bridge_control_arg *bc_arg = arg;
1445         struct ifbaconf *bac = arg;
1446         struct bridge_rtnode *brt;
1447         struct ifbareq *bareq;
1448         int count, len;
1449
1450         count = 0;
1451         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list)
1452                 count++;
1453
1454         if (bac->ifbac_len == 0) {
1455                 bac->ifbac_len = sizeof(*bareq) * count;
1456                 return 0;
1457         } else if (count == 0 || bac->ifbac_len < sizeof(*bareq)) {
1458                 bac->ifbac_len = 0;
1459                 return 0;
1460         }
1461
1462         len = min(bac->ifbac_len, sizeof(*bareq) * count);
1463         KKASSERT(len >= sizeof(*bareq));
1464
1465         bareq = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO);
1466         if (bareq == NULL) {
1467                 bac->ifbac_len = 0;
1468                 return ENOMEM;
1469         }
1470         bc_arg->bca_kptr = bareq;
1471
1472         count = 0;
1473         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
1474                 struct bridge_rtinfo *bri = brt->brt_info;
1475                 time_t expire;
1476
1477                 if (len < sizeof(*bareq))
1478                         break;
1479
1480                 strlcpy(bareq->ifba_ifsname, bri->bri_ifp->if_xname,
1481                         sizeof(bareq->ifba_ifsname));
1482                 memcpy(bareq->ifba_dst, brt->brt_addr, sizeof(brt->brt_addr));
1483                 expire = bri->bri_expire;
1484                 if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
1485                     time_uptime < expire)
1486                         bareq->ifba_expire = expire - time_uptime;
1487                 else
1488                         bareq->ifba_expire = 0;
1489                 bareq->ifba_flags = bri->bri_flags;
1490                 bareq++;
1491                 count++;
1492                 len -= sizeof(*bareq);
1493         }
1494
1495         bac->ifbac_len = sizeof(*bareq) * count;
1496         KKASSERT(bac->ifbac_len > 0);
1497
1498         bc_arg->bca_len = bac->ifbac_len;
1499         bc_arg->bca_uptr = bac->ifbac_req;
1500         return 0;
1501 }
1502
1503 static int
1504 bridge_ioctl_saddr(struct bridge_softc *sc, void *arg)
1505 {
1506         struct ifbareq *req = arg;
1507         struct bridge_iflist *bif;
1508         struct ifnet *ifp = sc->sc_ifp;
1509         int error;
1510
1511         ASSERT_IFNET_SERIALIZED_ALL(ifp);
1512
1513         bif = bridge_lookup_member(sc, req->ifba_ifsname);
1514         if (bif == NULL)
1515                 return (ENOENT);
1516
1517         ifnet_deserialize_all(ifp);
1518         error = bridge_rtsaddr(sc, req->ifba_dst, bif->bif_ifp,
1519                                req->ifba_flags);
1520         ifnet_serialize_all(ifp);
1521         return (error);
1522 }
1523
1524 static int
1525 bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
1526 {
1527         struct ifbrparam *param = arg;
1528
1529         sc->sc_brttimeout = param->ifbrp_ctime;
1530
1531         return (0);
1532 }
1533
1534 static int
1535 bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
1536 {
1537         struct ifbrparam *param = arg;
1538
1539         param->ifbrp_ctime = sc->sc_brttimeout;
1540
1541         return (0);
1542 }
1543
1544 static int
1545 bridge_ioctl_daddr(struct bridge_softc *sc, void *arg)
1546 {
1547         struct ifbareq *req = arg;
1548         struct ifnet *ifp = sc->sc_ifp;
1549         int error;
1550
1551         ifnet_deserialize_all(ifp);
1552         error = bridge_rtdaddr(sc, req->ifba_dst);
1553         ifnet_serialize_all(ifp);
1554         return error;
1555 }
1556
1557 static int
1558 bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
1559 {
1560         struct ifbreq *req = arg;
1561         struct ifnet *ifp = sc->sc_ifp;
1562
1563         ifnet_deserialize_all(ifp);
1564         bridge_rtflush(sc, req->ifbr_ifsflags | IFBF_FLUSHSYNC);
1565         ifnet_serialize_all(ifp);
1566
1567         return (0);
1568 }
1569
1570 static int
1571 bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
1572 {
1573         struct ifbrparam *param = arg;
1574
1575         param->ifbrp_prio = sc->sc_bridge_priority;
1576
1577         return (0);
1578 }
1579
1580 static int
1581 bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
1582 {
1583         struct ifbrparam *param = arg;
1584
1585         sc->sc_bridge_priority = param->ifbrp_prio;
1586
1587         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1588                 bstp_initialization(sc);
1589
1590         return (0);
1591 }
1592
1593 static int
1594 bridge_ioctl_reinit(struct bridge_softc *sc, void *arg __unused)
1595 {
1596         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1597                 bstp_initialization(sc);
1598         return (0);
1599 }
1600
1601 static int
1602 bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
1603 {
1604         struct ifbrparam *param = arg;
1605
1606         param->ifbrp_hellotime = sc->sc_bridge_hello_time >> 8;
1607
1608         return (0);
1609 }
1610
1611 static int
1612 bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
1613 {
1614         struct ifbrparam *param = arg;
1615
1616         if (param->ifbrp_hellotime == 0)
1617                 return (EINVAL);
1618         sc->sc_bridge_hello_time = param->ifbrp_hellotime << 8;
1619
1620         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1621                 bstp_initialization(sc);
1622
1623         return (0);
1624 }
1625
1626 static int
1627 bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
1628 {
1629         struct ifbrparam *param = arg;
1630
1631         param->ifbrp_fwddelay = sc->sc_bridge_forward_delay >> 8;
1632
1633         return (0);
1634 }
1635
1636 static int
1637 bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
1638 {
1639         struct ifbrparam *param = arg;
1640
1641         if (param->ifbrp_fwddelay == 0)
1642                 return (EINVAL);
1643         sc->sc_bridge_forward_delay = param->ifbrp_fwddelay << 8;
1644
1645         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1646                 bstp_initialization(sc);
1647
1648         return (0);
1649 }
1650
1651 static int
1652 bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
1653 {
1654         struct ifbrparam *param = arg;
1655
1656         param->ifbrp_maxage = sc->sc_bridge_max_age >> 8;
1657
1658         return (0);
1659 }
1660
1661 static int
1662 bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
1663 {
1664         struct ifbrparam *param = arg;
1665
1666         if (param->ifbrp_maxage == 0)
1667                 return (EINVAL);
1668         sc->sc_bridge_max_age = param->ifbrp_maxage << 8;
1669
1670         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1671                 bstp_initialization(sc);
1672
1673         return (0);
1674 }
1675
1676 static int
1677 bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
1678 {
1679         struct ifbreq *req = arg;
1680         struct bridge_iflist *bif;
1681
1682         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1683         if (bif == NULL)
1684                 return (ENOENT);
1685
1686         bif->bif_priority = req->ifbr_priority;
1687
1688         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1689                 bstp_initialization(sc);
1690
1691         return (0);
1692 }
1693
1694 static int
1695 bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
1696 {
1697         struct ifbreq *req = arg;
1698         struct bridge_iflist *bif;
1699
1700         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1701         if (bif == NULL)
1702                 return (ENOENT);
1703
1704         bif->bif_path_cost = req->ifbr_path_cost;
1705
1706         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1707                 bstp_initialization(sc);
1708
1709         return (0);
1710 }
1711
1712 static int
1713 bridge_ioctl_sifbondwght(struct bridge_softc *sc, void *arg)
1714 {
1715         struct ifbreq *req = arg;
1716         struct bridge_iflist *bif;
1717
1718         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1719         if (bif == NULL)
1720                 return (ENOENT);
1721
1722         bif->bif_bond_weight = req->ifbr_bond_weight;
1723
1724         /* no reinit needed */
1725
1726         return (0);
1727 }
1728
1729 static int
1730 bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
1731 {
1732         struct ifbreq *req = arg;
1733         struct bridge_iflist *bif;
1734         struct ifnet *ifs;
1735         struct bridge_ifinfo *bif_info;
1736
1737         ifs = ifunit(req->ifbr_ifsname);
1738         if (ifs == NULL)
1739                 return (ENOENT);
1740
1741         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1742                 if (ifs == bif->bif_ifp)
1743                         return (EBUSY);
1744
1745         if (ifs->if_bridge != NULL)
1746                 return (EBUSY);
1747
1748         switch (ifs->if_type) {
1749         case IFT_ETHER:
1750         case IFT_GIF:
1751         case IFT_L2VLAN:
1752                 break;
1753
1754         default:
1755                 return (EINVAL);
1756         }
1757
1758         /*
1759          * bif_info is needed for bif_flags
1760          */
1761         bif_info = kmalloc(sizeof(*bif_info), M_DEVBUF, M_WAITOK | M_ZERO);
1762         bif_info->bifi_ifp = ifs;
1763
1764         bif = kmalloc(sizeof(*bif), M_DEVBUF, M_WAITOK | M_ZERO);
1765         bif->bif_ifp = ifs;
1766         bif->bif_info = bif_info;
1767         bif->bif_flags = IFBIF_SPAN;
1768         /* NOTE: span bif does not need bridge_ifinfo */
1769
1770         TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
1771
1772         sc->sc_span = 1;
1773
1774         return (0);
1775 }
1776
1777 static int
1778 bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
1779 {
1780         struct ifbreq *req = arg;
1781         struct bridge_iflist *bif;
1782         struct ifnet *ifs;
1783
1784         ifs = ifunit(req->ifbr_ifsname);
1785         if (ifs == NULL)
1786                 return (ENOENT);
1787
1788         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1789                 if (ifs == bif->bif_ifp)
1790                         break;
1791
1792         if (bif == NULL)
1793                 return (ENOENT);
1794
1795         bridge_delete_span(sc, bif);
1796
1797         if (TAILQ_EMPTY(&sc->sc_spanlist))
1798                 sc->sc_span = 0;
1799
1800         return (0);
1801 }
1802
1803 static void
1804 bridge_ifdetach_dispatch(netmsg_t msg)
1805 {
1806         struct ifnet *ifp, *bifp;
1807         struct bridge_softc *sc;
1808         struct bridge_iflist *bif;
1809
1810         ifp = msg->lmsg.u.ms_resultp;
1811         sc = ifp->if_bridge;
1812
1813         /* Check if the interface is a bridge member */
1814         if (sc != NULL) {
1815                 bifp = sc->sc_ifp;
1816
1817                 ifnet_serialize_all(bifp);
1818
1819                 bif = bridge_lookup_member_if(sc, ifp);
1820                 if (bif != NULL) {
1821                         bridge_delete_member(sc, bif, 1);
1822                 } else {
1823                         /* XXX Why bif will be NULL? */
1824                 }
1825
1826                 ifnet_deserialize_all(bifp);
1827                 goto reply;
1828         }
1829
1830         crit_enter();   /* XXX MP */
1831
1832         /* Check if the interface is a span port */
1833         LIST_FOREACH(sc, &bridge_list, sc_list) {
1834                 bifp = sc->sc_ifp;
1835
1836                 ifnet_serialize_all(bifp);
1837
1838                 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1839                         if (ifp == bif->bif_ifp) {
1840                                 bridge_delete_span(sc, bif);
1841                                 break;
1842                         }
1843
1844                 ifnet_deserialize_all(bifp);
1845         }
1846
1847         crit_exit();
1848
1849 reply:
1850         lwkt_replymsg(&msg->lmsg, 0);
1851 }
1852
1853 /*
1854  * bridge_ifdetach:
1855  *
1856  *      Detach an interface from a bridge.  Called when a member
1857  *      interface is detaching.
1858  */
1859 static void
1860 bridge_ifdetach(void *arg __unused, struct ifnet *ifp)
1861 {
1862         struct netmsg_base msg;
1863
1864         netmsg_init(&msg, NULL, &curthread->td_msgport,
1865                     0, bridge_ifdetach_dispatch);
1866         msg.lmsg.u.ms_resultp = ifp;
1867
1868         lwkt_domsg(BRIDGE_CFGPORT, &msg.lmsg, 0);
1869 }
1870
1871 /*
1872  * bridge_init:
1873  *
1874  *      Initialize a bridge interface.
1875  */
1876 static void
1877 bridge_init(void *xsc)
1878 {
1879         bridge_control(xsc, SIOCSIFFLAGS, bridge_ioctl_init, NULL);
1880 }
1881
1882 /*
1883  * bridge_stop:
1884  *
1885  *      Stop the bridge interface.
1886  */
1887 static void
1888 bridge_stop(struct ifnet *ifp)
1889 {
1890         bridge_control(ifp->if_softc, SIOCSIFFLAGS, bridge_ioctl_stop, NULL);
1891 }
1892
1893 /*
1894  * Returns TRUE if the packet is being sent 'from us'... from our bridge
1895  * interface or from any member of our bridge interface.  This is used
1896  * later on to force the MAC to be the MAC of our bridge interface.
1897  */
1898 static int
1899 bridge_from_us(struct bridge_softc *sc, struct ether_header *eh)
1900 {
1901         struct bridge_iflist *bif;
1902
1903         if (memcmp(eh->ether_shost, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN) == 0)
1904                 return (1);
1905
1906         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
1907                 if (memcmp(eh->ether_shost, IF_LLADDR(bif->bif_ifp),
1908                            ETHER_ADDR_LEN) == 0) {
1909                         return (1);
1910                 }
1911         }
1912         return (0);
1913 }
1914
1915 /*
1916  * bridge_enqueue:
1917  *
1918  *      Enqueue a packet on a bridge member interface.
1919  *
1920  */
1921 void
1922 bridge_enqueue(struct ifnet *dst_ifp, struct mbuf *m)
1923 {
1924         struct netmsg_packet *nmp;
1925
1926         mbuftrackid(m, 64);
1927
1928         nmp = &m->m_hdr.mh_netmsg;
1929         netmsg_init(&nmp->base, NULL, &netisr_apanic_rport,
1930                     0, bridge_enqueue_handler);
1931         nmp->nm_packet = m;
1932         nmp->base.lmsg.u.ms_resultp = dst_ifp;
1933
1934         lwkt_sendmsg(netisr_cpuport(mycpuid), &nmp->base.lmsg);
1935 }
1936
1937 /*
1938  * After looking up dst_if in our forwarding table we still have to
1939  * deal with channel bonding.  Find the best interface in the bonding set.
1940  */
1941 static struct ifnet *
1942 bridge_select_unicast(struct bridge_softc *sc, struct ifnet *dst_if,
1943                       int from_blocking, struct mbuf *m)
1944 {
1945         struct bridge_iflist *bif, *nbif;
1946         struct ifnet *alt_if;
1947         int alt_priority;
1948         int priority;
1949
1950         /*
1951          * Unicast, kinda replicates the output side of bridge_output().
1952          *
1953          * Even though this is a uni-cast packet we may have to select
1954          * an interface from a bonding set.
1955          */
1956         bif = bridge_lookup_member_if(sc, dst_if);
1957         if (bif == NULL) {
1958                 /* Not a member of the bridge (anymore?) */
1959                 return NULL;
1960         }
1961
1962         /*
1963          * If STP is enabled on the target we are an equal opportunity
1964          * employer and do not necessarily output to dst_if.  Instead
1965          * scan available links with the same MAC as the current dst_if
1966          * and choose the best one.
1967          *
1968          * We also need to do this because arp entries tag onto a particular
1969          * interface and if it happens to be dead then the packets will
1970          * go into a bit bucket.
1971          *
1972          * If LINK2 is set the matching links are bonded and we-round robin.
1973          * (the MAC address must be the same for the participating links).
1974          * In this case links in a STP FORWARDING or BONDED state are
1975          * allowed for unicast packets.
1976          */
1977         if (bif->bif_flags & IFBIF_STP) {
1978                 alt_if = NULL;
1979                 alt_priority = 0;
1980                 priority = 0;
1981
1982                 TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid],
1983                                      bif_next, nbif) {
1984                         /*
1985                          * dst_if may imply a bonding set so we must compare
1986                          * MAC addresses.
1987                          */
1988                         if (memcmp(IF_LLADDR(bif->bif_ifp),
1989                                    IF_LLADDR(dst_if),
1990                                    ETHER_ADDR_LEN) != 0) {
1991                                 continue;
1992                         }
1993
1994                         if ((bif->bif_ifp->if_flags & IFF_RUNNING) == 0)
1995                                 continue;
1996
1997                         /*
1998                          * NOTE: We allow tranmissions through a BLOCKING
1999                          *       or LEARNING interface only as a last resort.
2000                          *       We DISALLOW both cases if the receiving
2001                          *
2002                          * NOTE: If we send a packet through a learning
2003                          *       interface the receiving end (if also in
2004                          *       LEARNING) will throw it away, so this is
2005                          *       the ultimate last resort.
2006                          */
2007                         switch(bif->bif_state) {
2008                         case BSTP_IFSTATE_BLOCKING:
2009                                 if (from_blocking == 0 &&
2010                                     bif->bif_priority + 256 > alt_priority) {
2011                                         alt_priority = bif->bif_priority + 256;
2012                                         alt_if = bif->bif_ifp;
2013                                 }
2014                                 continue;
2015                         case BSTP_IFSTATE_LEARNING:
2016                                 if (from_blocking == 0 &&
2017                                     bif->bif_priority > alt_priority) {
2018                                         alt_priority = bif->bif_priority;
2019                                         alt_if = bif->bif_ifp;
2020                                 }
2021                                 continue;
2022                         case BSTP_IFSTATE_L1BLOCKING:
2023                         case BSTP_IFSTATE_LISTENING:
2024                         case BSTP_IFSTATE_DISABLED:
2025                                 continue;
2026                         default:
2027                                 /* FORWARDING, BONDED */
2028                                 break;
2029                         }
2030
2031                         /*
2032                          * XXX we need to use the toepliz hash or
2033                          *     something like that instead of
2034                          *     round-robining.
2035                          */
2036                         if (sc->sc_ifp->if_flags & IFF_LINK2) {
2037                                 dst_if = bif->bif_ifp;
2038                                 if (++bif->bif_bond_count >=
2039                                     bif->bif_bond_weight) {
2040                                         bif->bif_bond_count = 0;
2041                                         TAILQ_REMOVE(&sc->sc_iflists[mycpuid],
2042                                                      bif, bif_next);
2043                                         TAILQ_INSERT_TAIL(
2044                                                      &sc->sc_iflists[mycpuid],
2045                                                      bif, bif_next);
2046                                 }
2047                                 priority = 1;
2048                                 break;
2049                         }
2050
2051                         /*
2052                          * Select best interface in the FORWARDING or
2053                          * BONDED set.  Well, there shouldn't be any
2054                          * in a BONDED state if LINK2 is not set (they
2055                          * will all be in a BLOCKING) state, but there
2056                          * could be a transitory condition here.
2057                          */
2058                         if (bif->bif_priority > priority) {
2059                                 priority = bif->bif_priority;
2060                                 dst_if = bif->bif_ifp;
2061                         }
2062                 }
2063
2064                 /*
2065                  * If no suitable interfaces were found but a suitable
2066                  * alternative interface was found, use the alternative
2067                  * interface.
2068                  */
2069                 if (priority == 0 && alt_if)
2070                         dst_if = alt_if;
2071         }
2072
2073         /*
2074          * At this point, we're dealing with a unicast frame
2075          * going to a different interface.
2076          */
2077         if ((dst_if->if_flags & IFF_RUNNING) == 0)
2078                 dst_if = NULL;
2079         return (dst_if);
2080 }
2081
2082
2083 /*
2084  * bridge_output:
2085  *
2086  *      Send output from a bridge member interface.  This
2087  *      performs the bridging function for locally originated
2088  *      packets.
2089  *
2090  *      The mbuf has the Ethernet header already attached.  We must
2091  *      enqueue or free the mbuf before returning.
2092  */
2093 static int
2094 bridge_output(struct ifnet *ifp, struct mbuf *m)
2095 {
2096         struct bridge_softc *sc = ifp->if_bridge;
2097         struct bridge_iflist *bif, *nbif;
2098         struct ether_header *eh;
2099         struct ifnet *dst_if, *alt_if, *bifp;
2100         int from_us;
2101         int alt_priority;
2102
2103         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
2104         mbuftrackid(m, 65);
2105
2106         /*
2107          * Make sure that we are still a member of a bridge interface.
2108          */
2109         if (sc == NULL) {
2110                 m_freem(m);
2111                 return (0);
2112         }
2113         bifp = sc->sc_ifp;
2114
2115         /*
2116          * Acquire header
2117          */
2118         if (m->m_len < ETHER_HDR_LEN) {
2119                 m = m_pullup(m, ETHER_HDR_LEN);
2120                 if (m == NULL) {
2121                         IFNET_STAT_INC(bifp, oerrors, 1);
2122                         return (0);
2123                 }
2124         }
2125         eh = mtod(m, struct ether_header *);
2126         from_us = bridge_from_us(sc, eh);
2127
2128         /*
2129          * If bridge is down, but the original output interface is up,
2130          * go ahead and send out that interface.  Otherwise, the packet
2131          * is dropped below.
2132          */
2133         if ((bifp->if_flags & IFF_RUNNING) == 0) {
2134                 dst_if = ifp;
2135                 goto sendunicast;
2136         }
2137
2138         /*
2139          * If the packet is a multicast, or we don't know a better way to
2140          * get there, send to all interfaces.
2141          */
2142         if (ETHER_IS_MULTICAST(eh->ether_dhost))
2143                 dst_if = NULL;
2144         else
2145                 dst_if = bridge_rtlookup(sc, eh->ether_dhost);
2146
2147         if (dst_if == NULL) {
2148                 struct mbuf *mc;
2149                 int used = 0;
2150                 int found = 0;
2151
2152                 if (sc->sc_span)
2153                         bridge_span(sc, m);
2154
2155                 alt_if = NULL;
2156                 alt_priority = 0;
2157                 TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid],
2158                                      bif_next, nbif) {
2159                         dst_if = bif->bif_ifp;
2160
2161                         if ((dst_if->if_flags & IFF_RUNNING) == 0)
2162                                 continue;
2163
2164                         /*
2165                          * If this is not the original output interface,
2166                          * and the interface is participating in spanning
2167                          * tree, make sure the port is in a state that
2168                          * allows forwarding.
2169                          *
2170                          * We keep track of a possible backup IF if we are
2171                          * unable to find any interfaces to forward through.
2172                          *
2173                          * NOTE: Currently round-robining is not implemented
2174                          *       across bonded interface groups (needs an
2175                          *       algorithm to track each group somehow).
2176                          *
2177                          *       Similarly we track only one alternative
2178                          *       interface if no suitable interfaces are
2179                          *       found.
2180                          */
2181                         if (dst_if != ifp &&
2182                             (bif->bif_flags & IFBIF_STP) != 0) {
2183                                 switch (bif->bif_state) {
2184                                 case BSTP_IFSTATE_BONDED:
2185                                         if (bif->bif_priority + 512 >
2186                                             alt_priority) {
2187                                                 alt_priority =
2188                                                     bif->bif_priority + 512;
2189                                                 alt_if = bif->bif_ifp;
2190                                         }
2191                                         continue;
2192                                 case BSTP_IFSTATE_BLOCKING:
2193                                         if (bif->bif_priority + 256 >
2194                                             alt_priority) {
2195                                                 alt_priority =
2196                                                     bif->bif_priority + 256;
2197                                                 alt_if = bif->bif_ifp;
2198                                         }
2199                                         continue;
2200                                 case BSTP_IFSTATE_LEARNING:
2201                                         if (bif->bif_priority > alt_priority) {
2202                                                 alt_priority =
2203                                                     bif->bif_priority;
2204                                                 alt_if = bif->bif_ifp;
2205                                         }
2206                                         continue;
2207                                 case BSTP_IFSTATE_L1BLOCKING:
2208                                 case BSTP_IFSTATE_LISTENING:
2209                                 case BSTP_IFSTATE_DISABLED:
2210                                         continue;
2211                                 default:
2212                                         /* FORWARDING */
2213                                         break;
2214                                 }
2215                         }
2216
2217                         KKASSERT(used == 0);
2218                         if (TAILQ_NEXT(bif, bif_next) == NULL) {
2219                                 used = 1;
2220                                 mc = m;
2221                         } else {
2222                                 mc = m_copypacket(m, MB_DONTWAIT);
2223                                 if (mc == NULL) {
2224                                         IFNET_STAT_INC(bifp, oerrors, 1);
2225                                         continue;
2226                                 }
2227                         }
2228
2229                         /*
2230                          * If the packet is 'from' us override ether_shost.
2231                          */
2232                         bridge_handoff(sc, dst_if, mc, from_us);
2233                         found = 1;
2234
2235                         if (nbif != NULL && !nbif->bif_onlist) {
2236                                 KKASSERT(bif->bif_onlist);
2237                                 nbif = TAILQ_NEXT(bif, bif_next);
2238                         }
2239                 }
2240
2241                 /*
2242                  * If we couldn't find anything use the backup interface
2243                  * if we have one.
2244                  */
2245                 if (found == 0 && alt_if) {
2246                         KKASSERT(used == 0);
2247                         mc = m;
2248                         used = 1;
2249                         bridge_handoff(sc, alt_if, mc, from_us);
2250                 }
2251
2252                 if (used == 0)
2253                         m_freem(m);
2254                 return (0);
2255         }
2256
2257         /*
2258          * Unicast
2259          */
2260 sendunicast:
2261         dst_if = bridge_select_unicast(sc, dst_if, 0, m);
2262
2263         if (sc->sc_span)
2264                 bridge_span(sc, m);
2265         if (dst_if == NULL)
2266                 m_freem(m);
2267         else
2268                 bridge_handoff(sc, dst_if, m, from_us);
2269         return (0);
2270 }
2271
2272 /*
2273  * Returns the bridge interface associated with an ifc.
2274  * Pass ifp->if_bridge (must not be NULL).  Used by the ARP
2275  * code to supply the bridge for the is-at info, making
2276  * the bridge responsible for matching local addresses.
2277  *
2278  * Without this the ARP code will supply bridge member interfaces
2279  * for the is-at which makes it difficult the bridge to fail-over
2280  * interfaces (amoung other things).
2281  */
2282 static struct ifnet *
2283 bridge_interface(void *if_bridge)
2284 {
2285         struct bridge_softc *sc = if_bridge;
2286         return (sc->sc_ifp);
2287 }
2288
2289 /*
2290  * bridge_start:
2291  *
2292  *      Start output on a bridge.
2293  */
2294 static void
2295 bridge_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
2296 {
2297         struct bridge_softc *sc = ifp->if_softc;
2298
2299         ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq);
2300         ASSERT_ALTQ_SQ_SERIALIZED_HW(ifsq);
2301
2302         ifsq_set_oactive(ifsq);
2303         for (;;) {
2304                 struct ifnet *dst_if = NULL;
2305                 struct ether_header *eh;
2306                 struct mbuf *m;
2307
2308                 m = ifsq_dequeue(ifsq);
2309                 if (m == NULL)
2310                         break;
2311                 mbuftrackid(m, 75);
2312
2313                 if (m->m_len < sizeof(*eh)) {
2314                         m = m_pullup(m, sizeof(*eh));
2315                         if (m == NULL) {
2316                                 IFNET_STAT_INC(ifp, oerrors, 1);
2317                                 continue;
2318                         }
2319                 }
2320                 eh = mtod(m, struct ether_header *);
2321
2322                 BPF_MTAP(ifp, m);
2323                 IFNET_STAT_INC(ifp, opackets, 1);
2324
2325                 if ((m->m_flags & (M_BCAST|M_MCAST)) == 0)
2326                         dst_if = bridge_rtlookup(sc, eh->ether_dhost);
2327
2328                 /*
2329                  * Multicast or broadcast
2330                  */
2331                 if (dst_if == NULL) {
2332                         bridge_start_bcast(sc, m);
2333                         continue;
2334                 }
2335
2336                 /*
2337                  * Unicast
2338                  */
2339                 dst_if = bridge_select_unicast(sc, dst_if, 0, m);
2340
2341                 if (dst_if == NULL)
2342                         m_freem(m);
2343                 else
2344                         bridge_enqueue(dst_if, m);
2345         }
2346         ifsq_clr_oactive(ifsq);
2347 }
2348
2349 /*
2350  * bridge_forward:
2351  *
2352  *      Forward packets received on a bridge interface via the input
2353  *      path.
2354  *
2355  *      This implements the forwarding function of the bridge.
2356  */
2357 static void
2358 bridge_forward(struct bridge_softc *sc, struct mbuf *m)
2359 {
2360         struct bridge_iflist *bif;
2361         struct ifnet *src_if, *dst_if, *ifp;
2362         struct ether_header *eh;
2363         int from_blocking;
2364
2365         mbuftrackid(m, 66);
2366         src_if = m->m_pkthdr.rcvif;
2367         ifp = sc->sc_ifp;
2368
2369         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
2370
2371         /*
2372          * packet coming in on the bridge is also going out on the bridge,
2373          * but ether code won't adjust output stats for the bridge because
2374          * we are changing the interface to something else.
2375          */
2376         IFNET_STAT_INC(ifp, opackets, 1);
2377         IFNET_STAT_INC(ifp, obytes, m->m_pkthdr.len);
2378
2379         /*
2380          * Look up the bridge_iflist.
2381          */
2382         bif = bridge_lookup_member_if(sc, src_if);
2383         if (bif == NULL) {
2384                 /* Interface is not a bridge member (anymore?) */
2385                 m_freem(m);
2386                 return;
2387         }
2388
2389         /*
2390          * In spanning tree mode receiving a packet from an interface
2391          * in a BLOCKING state is allowed, it could be a member of last
2392          * resort from the sender's point of view, but forwarding it is
2393          * not allowed.
2394          *
2395          * The sender's spanning tree will eventually sync up and the
2396          * sender will go into a BLOCKING state too (but this still may be
2397          * an interface of last resort during state changes).
2398          */
2399         if (bif->bif_flags & IFBIF_STP) {
2400                 switch (bif->bif_state) {
2401                 case BSTP_IFSTATE_L1BLOCKING:
2402                 case BSTP_IFSTATE_LISTENING:
2403                 case BSTP_IFSTATE_DISABLED:
2404                         m_freem(m);
2405                         return;
2406                 default:
2407                         /* learning, blocking, bonded, forwarding */
2408                         break;
2409                 }
2410                 from_blocking = (bif->bif_state == BSTP_IFSTATE_BLOCKING);
2411         } else {
2412                 from_blocking = 0;
2413         }
2414
2415         eh = mtod(m, struct ether_header *);
2416
2417         /*
2418          * If the interface is learning, and the source
2419          * address is valid and not multicast, record
2420          * the address.
2421          */
2422         if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
2423             from_blocking == 0 &&
2424             ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
2425             (eh->ether_shost[0] == 0 &&
2426              eh->ether_shost[1] == 0 &&
2427              eh->ether_shost[2] == 0 &&
2428              eh->ether_shost[3] == 0 &&
2429              eh->ether_shost[4] == 0 &&
2430              eh->ether_shost[5] == 0) == 0) {
2431                 bridge_rtupdate(sc, eh->ether_shost, src_if, IFBAF_DYNAMIC);
2432         }
2433
2434         /*
2435          * Don't forward from an interface in the listening or learning
2436          * state.  That is, in the learning state we learn information
2437          * but we throw away the packets.
2438          *
2439          * We let through packets on interfaces in the blocking state.
2440          * The blocking state is applicable to the send side, not the
2441          * receive side.
2442          */
2443         if ((bif->bif_flags & IFBIF_STP) != 0 &&
2444             (bif->bif_state == BSTP_IFSTATE_LISTENING ||
2445              bif->bif_state == BSTP_IFSTATE_LEARNING)) {
2446                 m_freem(m);
2447                 return;
2448         }
2449
2450         /*
2451          * At this point, the port either doesn't participate
2452          * in spanning tree or it is in the forwarding state.
2453          */
2454
2455         /*
2456          * If the packet is unicast, destined for someone on
2457          * "this" side of the bridge, drop it.
2458          *
2459          * src_if implies the entire bonding set so we have to compare MAC
2460          * addresses and not just if pointers.
2461          */
2462         if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
2463                 dst_if = bridge_rtlookup(sc, eh->ether_dhost);
2464                 if (dst_if && memcmp(IF_LLADDR(src_if), IF_LLADDR(dst_if),
2465                                      ETHER_ADDR_LEN) == 0) {
2466                         m_freem(m);
2467                         return;
2468                 }
2469         } else {
2470                 /* ...forward it to all interfaces. */
2471                 IFNET_STAT_INC(ifp, imcasts, 1);
2472                 dst_if = NULL;
2473         }
2474
2475         /*
2476          * Brodcast if we do not have forwarding information.  However, if
2477          * we received the packet on a blocking interface we do not do this
2478          * (unless you really want to blow up your network).
2479          */
2480         if (dst_if == NULL) {
2481                 if (from_blocking)
2482                         m_freem(m);
2483                 else
2484                         bridge_broadcast(sc, src_if, m);
2485                 return;
2486         }
2487
2488         dst_if = bridge_select_unicast(sc, dst_if, from_blocking, m);
2489
2490         if (dst_if == NULL) {
2491                 m_freem(m);
2492                 return;
2493         }
2494
2495         if (inet_pfil_hook.ph_hashooks > 0
2496 #ifdef INET6
2497             || inet6_pfil_hook.ph_hashooks > 0
2498 #endif
2499             ) {
2500                 if (bridge_pfil(&m, ifp, src_if, PFIL_IN) != 0)
2501                         return;
2502                 if (m == NULL)
2503                         return;
2504
2505                 if (bridge_pfil(&m, ifp, dst_if, PFIL_OUT) != 0)
2506                         return;
2507                 if (m == NULL)
2508                         return;
2509         }
2510         bridge_handoff(sc, dst_if, m, 0);
2511 }
2512
2513 /*
2514  * bridge_input:
2515  *
2516  *      Receive input from a member interface.  Queue the packet for
2517  *      bridging if it is not for us.
2518  */
2519 static struct mbuf *
2520 bridge_input(struct ifnet *ifp, struct mbuf *m)
2521 {
2522         struct bridge_softc *sc = ifp->if_bridge;
2523         struct bridge_iflist *bif;
2524         struct ifnet *bifp, *new_ifp;
2525         struct ether_header *eh;
2526         struct mbuf *mc, *mc2;
2527
2528         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
2529         mbuftrackid(m, 67);
2530
2531         /*
2532          * Make sure that we are still a member of a bridge interface.
2533          */
2534         if (sc == NULL)
2535                 return m;
2536
2537         new_ifp = NULL;
2538         bifp = sc->sc_ifp;
2539
2540         if ((bifp->if_flags & IFF_RUNNING) == 0)
2541                 goto out;
2542
2543         /*
2544          * Implement support for bridge monitoring.  If this flag has been
2545          * set on this interface, discard the packet once we push it through
2546          * the bpf(4) machinery, but before we do, increment various counters
2547          * associated with this bridge.
2548          */
2549         if (bifp->if_flags & IFF_MONITOR) {
2550                 /*
2551                  * Change input interface to this bridge
2552                  *
2553                  * Update bridge's ifnet statistics
2554                  */
2555                 m->m_pkthdr.rcvif = bifp;
2556
2557                 BPF_MTAP(bifp, m);
2558                 IFNET_STAT_INC(bifp, ipackets, 1);
2559                 IFNET_STAT_INC(bifp, ibytes, m->m_pkthdr.len);
2560                 if (m->m_flags & (M_MCAST | M_BCAST))
2561                         IFNET_STAT_INC(bifp, imcasts, 1);
2562
2563                 m_freem(m);
2564                 m = NULL;
2565                 goto out;
2566         }
2567
2568         /*
2569          * Handle the ether_header
2570          *
2571          * In all cases if the packet is destined for us via our MAC
2572          * we must clear BRIDGE_MBUF_TAGGED to ensure that we don't
2573          * repeat the source MAC out the same interface.
2574          *
2575          * This first test against our bridge MAC is the fast-path.
2576          *
2577          * NOTE!  The bridge interface can serve as an endpoint for
2578          *        communication but normally there are no IPs associated
2579          *        with it so you cannot route through it.  Instead what
2580          *        you do is point your default route *THROUGH* the bridge
2581          *        to the actual default router for one of the bridged spaces.
2582          *
2583          *        Another possibility is to put all your IP specifications
2584          *        on the bridge instead of on the individual interfaces.  If
2585          *        you do this it should be possible to use the bridge as an
2586          *        end point and route (rather than switch) through it using
2587          *        the default route or ipfw forwarding rules.
2588          */
2589
2590         /*
2591          * Acquire header
2592          */
2593         if (m->m_len < ETHER_HDR_LEN) {
2594                 m = m_pullup(m, ETHER_HDR_LEN);
2595                 if (m == NULL)
2596                         goto out;
2597         }
2598         eh = mtod(m, struct ether_header *);
2599         m->m_pkthdr.fw_flags |= BRIDGE_MBUF_TAGGED;
2600         bcopy(eh->ether_shost, m->m_pkthdr.ether_br_shost, ETHER_ADDR_LEN);
2601
2602         if ((bridge_debug & 1) &&
2603             (ntohs(eh->ether_type) == ETHERTYPE_ARP ||
2604             ntohs(eh->ether_type) == ETHERTYPE_REVARP)) {
2605                 kprintf("%02x:%02x:%02x:%02x:%02x:%02x "
2606                         "%02x:%02x:%02x:%02x:%02x:%02x type %04x "
2607                         "lla %02x:%02x:%02x:%02x:%02x:%02x\n",
2608                         eh->ether_dhost[0],
2609                         eh->ether_dhost[1],
2610                         eh->ether_dhost[2],
2611                         eh->ether_dhost[3],
2612                         eh->ether_dhost[4],
2613                         eh->ether_dhost[5],
2614                         eh->ether_shost[0],
2615                         eh->ether_shost[1],
2616                         eh->ether_shost[2],
2617                         eh->ether_shost[3],
2618                         eh->ether_shost[4],
2619                         eh->ether_shost[5],
2620                         eh->ether_type,
2621                         ((u_char *)IF_LLADDR(bifp))[0],
2622                         ((u_char *)IF_LLADDR(bifp))[1],
2623                         ((u_char *)IF_LLADDR(bifp))[2],
2624                         ((u_char *)IF_LLADDR(bifp))[3],
2625                         ((u_char *)IF_LLADDR(bifp))[4],
2626                         ((u_char *)IF_LLADDR(bifp))[5]
2627                 );
2628         }
2629
2630         if (memcmp(eh->ether_dhost, IF_LLADDR(bifp), ETHER_ADDR_LEN) == 0) {
2631                 /*
2632                  * If the packet is for us, set the packets source as the
2633                  * bridge, and return the packet back to ifnet.if_input for
2634                  * local processing.
2635                  */
2636                 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
2637                 KASSERT(bifp->if_bridge == NULL,
2638                         ("loop created in bridge_input"));
2639                 if (pfil_member != 0) {
2640                         if (inet_pfil_hook.ph_hashooks > 0
2641 #ifdef INET6
2642                             || inet6_pfil_hook.ph_hashooks > 0
2643 #endif
2644                         ) {
2645                                 if (bridge_pfil(&m, NULL, ifp, PFIL_IN) != 0)
2646                                         goto out;
2647                                 if (m == NULL)
2648                                         goto out;
2649                         }
2650                 }
2651                 new_ifp = bifp;
2652                 goto out;
2653         }
2654
2655         /*
2656          * Tap all packets arriving on the bridge, no matter if
2657          * they are local destinations or not.  In is in.
2658          *
2659          * Update bridge's ifnet statistics
2660          */
2661         BPF_MTAP(bifp, m);
2662         IFNET_STAT_INC(bifp, ipackets, 1);
2663         IFNET_STAT_INC(bifp, ibytes, m->m_pkthdr.len);
2664         if (m->m_flags & (M_MCAST | M_BCAST))
2665                 IFNET_STAT_INC(bifp, imcasts, 1);
2666
2667         bif = bridge_lookup_member_if(sc, ifp);
2668         if (bif == NULL)
2669                 goto out;
2670
2671         if (sc->sc_span)
2672                 bridge_span(sc, m);
2673
2674         if (m->m_flags & (M_BCAST | M_MCAST)) {
2675                 /*
2676                  * Tap off 802.1D packets; they do not get forwarded.
2677                  */
2678                 if (memcmp(eh->ether_dhost, bstp_etheraddr,
2679                             ETHER_ADDR_LEN) == 0) {
2680                         ifnet_serialize_all(bifp);
2681                         bstp_input(sc, bif, m);
2682                         ifnet_deserialize_all(bifp);
2683
2684                         /* m is freed by bstp_input */
2685                         m = NULL;
2686                         goto out;
2687                 }
2688
2689                 /*
2690                  * Other than 802.11d packets, ignore packets if the
2691                  * interface is not in a good state.
2692                  *
2693                  * NOTE: Broadcast/mcast packets received on a blocking or
2694                  *       learning interface are allowed for local processing.
2695                  *
2696                  *       The sending side of a blocked port will stop
2697                  *       transmitting when a better alternative is found.
2698                  *       However, later on we will disallow the forwarding
2699                  *       of bcast/mcsat packets over a blocking interface.
2700                  */
2701                 if (bif->bif_flags & IFBIF_STP) {
2702                         switch (bif->bif_state) {
2703                         case BSTP_IFSTATE_L1BLOCKING:
2704                         case BSTP_IFSTATE_LISTENING:
2705                         case BSTP_IFSTATE_DISABLED:
2706                                 goto out;
2707                         default:
2708                                 /* blocking, learning, bonded, forwarding */
2709                                 break;
2710                         }
2711                 }
2712
2713                 /*
2714                  * Make a deep copy of the packet and enqueue the copy
2715                  * for bridge processing; return the original packet for
2716                  * local processing.
2717                  */
2718                 mc = m_dup(m, MB_DONTWAIT);
2719                 if (mc == NULL)
2720                         goto out;
2721
2722                 /*
2723                  * It's just too dangerous to allow bcast/mcast over a
2724                  * blocked interface, eventually the network will sort
2725                  * itself out and a better path will be found.
2726                  */
2727                 if ((bif->bif_flags & IFBIF_STP) == 0 ||
2728                     bif->bif_state != BSTP_IFSTATE_BLOCKING) {
2729                         bridge_forward(sc, mc);
2730                 }
2731
2732                 /*
2733                  * Reinject the mbuf as arriving on the bridge so we have a
2734                  * chance at claiming multicast packets. We can not loop back
2735                  * here from ether_input as a bridge is never a member of a
2736                  * bridge.
2737                  */
2738                 KASSERT(bifp->if_bridge == NULL,
2739                         ("loop created in bridge_input"));
2740                 mc2 = m_dup(m, MB_DONTWAIT);
2741 #ifdef notyet
2742                 if (mc2 != NULL) {
2743                         /* Keep the layer3 header aligned */
2744                         int i = min(mc2->m_pkthdr.len, max_protohdr);
2745                         mc2 = m_copyup(mc2, i, ETHER_ALIGN);
2746                 }
2747 #endif
2748                 if (mc2 != NULL) {
2749                         /*
2750                          * Don't tap to bpf(4) again; we have already done
2751                          * the tapping.
2752                          *
2753                          * Leave m_pkthdr.rcvif alone, so ARP replies are
2754                          * processed as coming in on the correct interface.
2755                          *
2756                          * Clear the bridge flag for local processing in
2757                          * case the packet gets routed.
2758                          */
2759                         mc2->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
2760                         ether_reinput_oncpu(bifp, mc2, 0);
2761                 }
2762
2763                 /* Return the original packet for local processing. */
2764                 goto out;
2765         }
2766
2767         /*
2768          * Input of a unicast packet.  We have to allow unicast packets
2769          * input from links in the BLOCKING state as this might be an
2770          * interface of last resort.
2771          *
2772          * NOTE: We explicitly ignore normal packets received on a link
2773          *       in the BLOCKING state.  The point of being in that state
2774          *       is to avoid getting duplicate packets.
2775          *
2776          *       HOWEVER, if LINK2 is set the normal spanning tree code
2777          *       will mark an interface BLOCKING to avoid multi-cast/broadcast
2778          *       loops.  Unicast packets CAN still loop if we allow the
2779          *       case (hence we only do it in LINK2), but it isn't quite as
2780          *       bad as a broadcast packet looping.
2781          */
2782         if (bif->bif_flags & IFBIF_STP) {
2783                 switch (bif->bif_state) {
2784                 case BSTP_IFSTATE_L1BLOCKING:
2785                 case BSTP_IFSTATE_LISTENING:
2786                 case BSTP_IFSTATE_DISABLED:
2787                         goto out;
2788                 default:
2789                         /* blocking, bonded, forwarding, learning */
2790                         break;
2791                 }
2792         }
2793
2794         /*
2795          * Unicast.  Make sure it's not for us.
2796          *
2797          * This loop is MPSAFE; the only blocking operation (bridge_rtupdate)
2798          * is followed by breaking out of the loop.
2799          */
2800         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
2801                 if (bif->bif_ifp->if_type != IFT_ETHER)
2802                         continue;
2803
2804                 /*
2805                  * It is destined for an interface linked to the bridge.
2806                  * We want the bridge itself to take care of link level
2807                  * forwarding to member interfaces so reinput on the bridge.
2808                  * i.e. if you ping an IP on a target interface associated
2809                  * with the bridge, the arp is-at response should indicate
2810                  * the bridge MAC.
2811                  *
2812                  * Only update our addr list when learning if the port
2813                  * is not in a blocking state.  If it is we still allow
2814                  * the packet but we do not try to learn from it.
2815                  */
2816                 if (memcmp(IF_LLADDR(bif->bif_ifp), eh->ether_dhost,
2817                            ETHER_ADDR_LEN) == 0) {
2818                         if (bif->bif_ifp != ifp) {
2819                                 /* XXX loop prevention */
2820                                 m->m_flags |= M_ETHER_BRIDGED;
2821                         }
2822                         if ((bif->bif_flags & IFBIF_LEARNING) &&
2823                             ((bif->bif_flags & IFBIF_STP) == 0 ||
2824                              bif->bif_state != BSTP_IFSTATE_BLOCKING)) {
2825                                 bridge_rtupdate(sc, eh->ether_shost,
2826                                                 ifp, IFBAF_DYNAMIC);
2827                         }
2828                         new_ifp = bifp; /* not bif->bif_ifp */
2829                         m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
2830                         goto out;
2831                 }
2832
2833                 /*
2834                  * Ignore received packets that were sent by us.
2835                  */
2836                 if (memcmp(IF_LLADDR(bif->bif_ifp), eh->ether_shost,
2837                            ETHER_ADDR_LEN) == 0) {
2838                         m_freem(m);
2839                         m = NULL;
2840                         goto out;
2841                 }
2842         }
2843
2844         /*
2845          * It isn't for us.
2846          *
2847          * Perform the bridge forwarding function, but disallow bridging
2848          * to interfaces in the blocking state if the packet came in on
2849          * an interface in the blocking state.
2850          */
2851         bridge_forward(sc, m);
2852         m = NULL;
2853
2854         /*
2855          * ether_reinput_oncpu() will reprocess rcvif as
2856          * coming from new_ifp (since we do not specify
2857          * REINPUT_KEEPRCVIF).
2858          */
2859 out:
2860         if (new_ifp != NULL) {
2861                 /*
2862                  * Clear the bridge flag for local processing in
2863                  * case the packet gets routed.
2864                  */
2865                 ether_reinput_oncpu(new_ifp, m, REINPUT_RUNBPF);
2866                 m = NULL;
2867         }
2868         return (m);
2869 }
2870
2871 /*
2872  * bridge_start_bcast:
2873  *
2874  *      Broadcast the packet sent from bridge to all member
2875  *      interfaces.
2876  *      This is a simplified version of bridge_broadcast(), however,
2877  *      this function expects caller to hold bridge's serializer.
2878  */
2879 static void
2880 bridge_start_bcast(struct bridge_softc *sc, struct mbuf *m)
2881 {
2882         struct bridge_iflist *bif;
2883         struct mbuf *mc;
2884         struct ifnet *dst_if, *alt_if, *bifp;
2885         int used = 0;
2886         int found = 0;
2887         int alt_priority;
2888
2889         mbuftrackid(m, 68);
2890         bifp = sc->sc_ifp;
2891         ASSERT_IFNET_SERIALIZED_ALL(bifp);
2892
2893         /*
2894          * Following loop is MPSAFE; nothing is blocking
2895          * in the loop body.
2896          *
2897          * NOTE: We transmit through an member in the BLOCKING state only
2898          *       as a last resort.
2899          */
2900         alt_if = NULL;
2901         alt_priority = 0;
2902
2903         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
2904                 dst_if = bif->bif_ifp;
2905
2906                 if (bif->bif_flags & IFBIF_STP) {
2907                         switch (bif->bif_state) {
2908                         case BSTP_IFSTATE_BLOCKING:
2909                                 if (bif->bif_priority > alt_priority) {
2910                                         alt_priority = bif->bif_priority;
2911                                         alt_if = bif->bif_ifp;
2912                                 }
2913                                 /* fall through */
2914                         case BSTP_IFSTATE_L1BLOCKING:
2915                         case BSTP_IFSTATE_DISABLED:
2916                                 continue;
2917                         default:
2918                                 /* listening, learning, bonded, forwarding */
2919                                 break;
2920                         }
2921                 }
2922
2923                 if ((bif->bif_flags & IFBIF_DISCOVER) == 0 &&
2924                     (m->m_flags & (M_BCAST|M_MCAST)) == 0)
2925                         continue;
2926
2927                 if ((dst_if->if_flags & IFF_RUNNING) == 0)
2928                         continue;
2929
2930                 if (TAILQ_NEXT(bif, bif_next) == NULL) {
2931                         mc = m;
2932                         used = 1;
2933                 } else {
2934                         mc = m_copypacket(m, MB_DONTWAIT);
2935                         if (mc == NULL) {
2936                                 IFNET_STAT_INC(bifp, oerrors, 1);
2937                                 continue;
2938                         }
2939                 }
2940                 found = 1;
2941                 bridge_enqueue(dst_if, mc);
2942         }
2943
2944         if (found == 0 && alt_if) {
2945                 KKASSERT(used == 0);
2946                 mc = m;
2947                 used = 1;
2948                 bridge_enqueue(alt_if, mc);
2949         }
2950
2951         if (used == 0)
2952                 m_freem(m);
2953 }
2954
2955 /*
2956  * bridge_broadcast:
2957  *
2958  *      Send a frame to all interfaces that are members of
2959  *      the bridge, except for the one on which the packet
2960  *      arrived.
2961  */
2962 static void
2963 bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
2964                  struct mbuf *m)
2965 {
2966         struct bridge_iflist *bif, *nbif;
2967         struct ether_header *eh;
2968         struct mbuf *mc;
2969         struct ifnet *dst_if, *alt_if, *bifp;
2970         int used;
2971         int found;
2972         int alt_priority;
2973         int from_us;
2974
2975         mbuftrackid(m, 69);
2976         bifp = sc->sc_ifp;
2977         ASSERT_IFNET_NOT_SERIALIZED_ALL(bifp);
2978
2979         eh = mtod(m, struct ether_header *);
2980         from_us = bridge_from_us(sc, eh);
2981
2982         if (inet_pfil_hook.ph_hashooks > 0
2983 #ifdef INET6
2984             || inet6_pfil_hook.ph_hashooks > 0
2985 #endif
2986             ) {
2987                 if (bridge_pfil(&m, bifp, src_if, PFIL_IN) != 0)
2988                         return;
2989                 if (m == NULL)
2990                         return;
2991
2992                 /* Filter on the bridge interface before broadcasting */
2993                 if (bridge_pfil(&m, bifp, NULL, PFIL_OUT) != 0)
2994                         return;
2995                 if (m == NULL)
2996                         return;
2997         }
2998
2999         alt_if = NULL;
3000         alt_priority = 0;
3001         found = 0;
3002         used = 0;
3003
3004         TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid], bif_next, nbif) {
3005                 dst_if = bif->bif_ifp;
3006
3007                 if ((dst_if->if_flags & IFF_RUNNING) == 0)
3008                         continue;
3009
3010                 /*
3011                  * Don't bounce the packet out the same interface it came
3012                  * in on.  We have to test MAC addresses because a packet
3013                  * can come in a bonded interface and we don't want it to
3014                  * be echod out the forwarding interface for the same bonding
3015                  * set.
3016                  */
3017                 if (src_if && memcmp(IF_LLADDR(src_if), IF_LLADDR(dst_if),
3018                                      ETHER_ADDR_LEN) == 0) {
3019                         continue;
3020                 }
3021
3022                 /*
3023                  * Generally speaking we only broadcast through forwarding
3024                  * interfaces.  If no interfaces are available we select
3025                  * a BONDED, BLOCKING, or LEARNING interface to forward
3026                  * through.
3027                  */
3028                 if (bif->bif_flags & IFBIF_STP) {
3029                         switch (bif->bif_state) {
3030                         case BSTP_IFSTATE_BONDED:
3031                                 if (bif->bif_priority + 512 > alt_priority) {
3032                                         alt_priority = bif->bif_priority + 512;
3033                                         alt_if = bif->bif_ifp;
3034                                 }
3035                                 continue;
3036                         case BSTP_IFSTATE_BLOCKING:
3037                                 if (bif->bif_priority + 256 > alt_priority) {
3038                                         alt_priority = bif->bif_priority + 256;
3039                                         alt_if = bif->bif_ifp;
3040                                 }
3041                                 continue;
3042                         case BSTP_IFSTATE_LEARNING:
3043                                 if (bif->bif_priority > alt_priority) {
3044                                         alt_priority = bif->bif_priority;
3045                                         alt_if = bif->bif_ifp;
3046                                 }
3047                                 continue;
3048                         case BSTP_IFSTATE_L1BLOCKING:
3049                         case BSTP_IFSTATE_DISABLED:
3050                         case BSTP_IFSTATE_LISTENING:
3051                                 continue;
3052                         default:
3053                                 /* forwarding */
3054                                 break;
3055                         }
3056                 }
3057
3058                 if ((bif->bif_flags & IFBIF_DISCOVER) == 0 &&
3059                     (m->m_flags & (M_BCAST|M_MCAST)) == 0) {
3060                         continue;
3061                 }
3062
3063                 if (TAILQ_NEXT(bif, bif_next) == NULL) {
3064                         mc = m;
3065                         used = 1;
3066                 } else {
3067                         mc = m_copypacket(m, MB_DONTWAIT);
3068                         if (mc == NULL) {
3069                                 IFNET_STAT_INC(sc->sc_ifp, oerrors, 1);
3070                                 continue;
3071                         }
3072                 }
3073                 found = 1;
3074
3075                 /*
3076                  * Filter on the output interface.  Pass a NULL bridge
3077                  * interface pointer so we do not redundantly filter on
3078                  * the bridge for each interface we broadcast on.
3079                  */
3080                 if (inet_pfil_hook.ph_hashooks > 0
3081 #ifdef INET6
3082                     || inet6_pfil_hook.ph_hashooks > 0
3083 #endif
3084                     ) {
3085                         if (bridge_pfil(&mc, NULL, dst_if, PFIL_OUT) != 0)
3086                                 continue;
3087                         if (mc == NULL)
3088                                 continue;
3089                 }
3090                 bridge_handoff(sc, dst_if, mc, from_us);
3091
3092                 if (nbif != NULL && !nbif->bif_onlist) {
3093                         KKASSERT(bif->bif_onlist);
3094                         nbif = TAILQ_NEXT(bif, bif_next);
3095                 }
3096         }
3097
3098         if (found == 0 && alt_if) {
3099                 KKASSERT(used == 0);
3100                 mc = m;
3101                 used = 1;
3102                 bridge_enqueue(alt_if, mc);
3103         }
3104
3105         if (used == 0)
3106                 m_freem(m);
3107 }
3108
3109 /*
3110  * bridge_span:
3111  *
3112  *      Duplicate a packet out one or more interfaces that are in span mode,
3113  *      the original mbuf is unmodified.
3114  */
3115 static void
3116 bridge_span(struct bridge_softc *sc, struct mbuf *m)
3117 {
3118         struct bridge_iflist *bif;
3119         struct ifnet *dst_if, *bifp;
3120         struct mbuf *mc;
3121
3122         mbuftrackid(m, 70);
3123         bifp = sc->sc_ifp;
3124         ifnet_serialize_all(bifp);
3125
3126         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
3127                 dst_if = bif->bif_ifp;
3128
3129                 if ((dst_if->if_flags & IFF_RUNNING) == 0)
3130                         continue;
3131
3132                 mc = m_copypacket(m, MB_DONTWAIT);
3133                 if (mc == NULL) {
3134                         IFNET_STAT_INC(sc->sc_ifp, oerrors, 1);
3135                         continue;
3136                 }
3137                 bridge_enqueue(dst_if, mc);
3138         }
3139
3140         ifnet_deserialize_all(bifp);
3141 }
3142
3143 static void
3144 bridge_rtmsg_sync_handler(netmsg_t msg)
3145 {
3146         ifnet_forwardmsg(&msg->lmsg, mycpuid + 1);
3147 }
3148
3149 static void
3150 bridge_rtmsg_sync(struct bridge_softc *sc)
3151 {
3152         struct netmsg_base msg;
3153
3154         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3155
3156         netmsg_init(&msg, NULL, &curthread->td_msgport,
3157                     0, bridge_rtmsg_sync_handler);
3158         ifnet_domsg(&msg.lmsg, 0);
3159 }
3160
3161 static __inline void
3162 bridge_rtinfo_update(struct bridge_rtinfo *bri, struct ifnet *dst_if,
3163                      int setflags, uint8_t flags, uint32_t timeo)
3164 {
3165         if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
3166             bri->bri_ifp != dst_if)
3167                 bri->bri_ifp = dst_if;
3168         if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
3169             bri->bri_expire != time_uptime + timeo)
3170                 bri->bri_expire = time_uptime + timeo;
3171         if (setflags)
3172                 bri->bri_flags = flags;
3173 }
3174
3175 static int
3176 bridge_rtinstall_oncpu(struct bridge_softc *sc, const uint8_t *dst,
3177                        struct ifnet *dst_if, int setflags, uint8_t flags,
3178                        struct bridge_rtinfo **bri0)
3179 {
3180         struct bridge_rtnode *brt;
3181         struct bridge_rtinfo *bri;
3182
3183         if (mycpuid == 0) {
3184                 brt = bridge_rtnode_lookup(sc, dst);
3185                 if (brt != NULL) {
3186                         /*
3187                          * rtnode for 'dst' already exists.  We inform the
3188                          * caller about this by leaving bri0 as NULL.  The
3189                          * caller will terminate the intallation upon getting
3190                          * NULL bri0.  However, we still need to update the
3191                          * rtinfo.
3192                          */
3193                         KKASSERT(*bri0 == NULL);
3194
3195                         /* Update rtinfo */
3196                         bridge_rtinfo_update(brt->brt_info, dst_if, setflags,
3197                                              flags, sc->sc_brttimeout);
3198                         return 0;
3199                 }
3200
3201                 /*
3202                  * We only need to check brtcnt on CPU0, since if limit
3203                  * is to be exceeded, ENOSPC is returned.  Caller knows
3204                  * this and will terminate the installation.
3205                  */
3206                 if (sc->sc_brtcnt >= sc->sc_brtmax)
3207                         return ENOSPC;
3208
3209                 KKASSERT(*bri0 == NULL);
3210                 bri = kmalloc(sizeof(struct bridge_rtinfo), M_DEVBUF,
3211                                   M_WAITOK | M_ZERO);
3212                 *bri0 = bri;
3213
3214                 /* Setup rtinfo */
3215                 bri->bri_flags = IFBAF_DYNAMIC;
3216                 bridge_rtinfo_update(bri, dst_if, setflags, flags,
3217                                      sc->sc_brttimeout);
3218         } else {
3219                 bri = *bri0;
3220                 KKASSERT(bri != NULL);
3221         }
3222
3223         brt = kmalloc(sizeof(struct bridge_rtnode), M_DEVBUF,
3224                       M_WAITOK | M_ZERO);
3225         memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
3226         brt->brt_info = bri;
3227
3228         bridge_rtnode_insert(sc, brt);
3229         return 0;
3230 }
3231
3232 static void
3233 bridge_rtinstall_handler(netmsg_t msg)
3234 {
3235         struct netmsg_brsaddr *brmsg = (struct netmsg_brsaddr *)msg;
3236         int error;
3237
3238         error = bridge_rtinstall_oncpu(brmsg->br_softc,
3239                                        brmsg->br_dst, brmsg->br_dst_if,
3240                                        brmsg->br_setflags, brmsg->br_flags,
3241                                        &brmsg->br_rtinfo);
3242         if (error) {
3243                 KKASSERT(mycpuid == 0 && brmsg->br_rtinfo == NULL);
3244                 lwkt_replymsg(&brmsg->base.lmsg, error);
3245                 return;
3246         } else if (brmsg->br_rtinfo == NULL) {
3247                 /* rtnode already exists for 'dst' */
3248                 KKASSERT(mycpuid == 0);
3249                 lwkt_replymsg(&brmsg->base.lmsg, 0);
3250                 return;
3251         }
3252         ifnet_forwardmsg(&brmsg->base.lmsg, mycpuid + 1);
3253 }
3254
3255 /*
3256  * bridge_rtupdate:
3257  *
3258  *      Add/Update a bridge routing entry.
3259  */
3260 static int
3261 bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst,
3262                 struct ifnet *dst_if, uint8_t flags)
3263 {
3264         struct bridge_rtnode *brt;
3265
3266         /*
3267          * A route for this destination might already exist.  If so,
3268          * update it, otherwise create a new one.
3269          */
3270         if ((brt = bridge_rtnode_lookup(sc, dst)) == NULL) {
3271                 struct netmsg_brsaddr *brmsg;
3272
3273                 if (sc->sc_brtcnt >= sc->sc_brtmax)
3274                         return ENOSPC;
3275
3276                 brmsg = kmalloc(sizeof(*brmsg), M_LWKTMSG, M_WAITOK | M_NULLOK);
3277                 if (brmsg == NULL)
3278                         return ENOMEM;
3279
3280                 netmsg_init(&brmsg->base, NULL, &netisr_afree_rport,
3281                             0, bridge_rtinstall_handler);
3282                 memcpy(brmsg->br_dst, dst, ETHER_ADDR_LEN);
3283                 brmsg->br_dst_if = dst_if;
3284                 brmsg->br_flags = flags;
3285                 brmsg->br_setflags = 0;
3286                 brmsg->br_softc = sc;
3287                 brmsg->br_rtinfo = NULL;
3288
3289                 ifnet_sendmsg(&brmsg->base.lmsg, 0);
3290                 return 0;
3291         }
3292         bridge_rtinfo_update(brt->brt_info, dst_if, 0, flags,
3293                              sc->sc_brttimeout);
3294         return 0;
3295 }
3296
3297 static int
3298 bridge_rtsaddr(struct bridge_softc *sc, const uint8_t *dst,
3299                struct ifnet *dst_if, uint8_t flags)
3300 {
3301         struct netmsg_brsaddr brmsg;
3302
3303         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3304
3305         netmsg_init(&brmsg.base, NULL, &curthread->td_msgport,
3306                     0, bridge_rtinstall_handler);
3307         memcpy(brmsg.br_dst, dst, ETHER_ADDR_LEN);
3308         brmsg.br_dst_if = dst_if;
3309         brmsg.br_flags = flags;
3310         brmsg.br_setflags = 1;
3311         brmsg.br_softc = sc;
3312         brmsg.br_rtinfo = NULL;
3313
3314         return ifnet_domsg(&brmsg.base.lmsg, 0);
3315 }
3316
3317 /*
3318  * bridge_rtlookup:
3319  *
3320  *      Lookup the destination interface for an address.
3321  */
3322 static struct ifnet *
3323 bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr)
3324 {
3325         struct bridge_rtnode *brt;
3326
3327         if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL)
3328                 return NULL;
3329         return brt->brt_info->bri_ifp;
3330 }
3331
3332 static void
3333 bridge_rtreap_handler(netmsg_t msg)
3334 {
3335         struct bridge_softc *sc = msg->lmsg.u.ms_resultp;
3336         struct bridge_rtnode *brt, *nbrt;
3337
3338         LIST_FOREACH_MUTABLE(brt, &sc->sc_rtlists[mycpuid], brt_list, nbrt) {
3339                 if (brt->brt_info->bri_dead)
3340                         bridge_rtnode_destroy(sc, brt);
3341         }
3342         ifnet_forwardmsg(&msg->lmsg, mycpuid + 1);
3343 }
3344
3345 static void
3346 bridge_rtreap(struct bridge_softc *sc)
3347 {
3348         struct netmsg_base msg;
3349
3350         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3351
3352         netmsg_init(&msg, NULL, &curthread->td_msgport,
3353                     0, bridge_rtreap_handler);
3354         msg.lmsg.u.ms_resultp = sc;
3355
3356         ifnet_domsg(&msg.lmsg, 0);
3357 }
3358
3359 static void
3360 bridge_rtreap_async(struct bridge_softc *sc)
3361 {
3362         struct netmsg_base *msg;
3363
3364         msg = kmalloc(sizeof(*msg), M_LWKTMSG, M_WAITOK);
3365
3366         netmsg_init(msg, NULL, &netisr_afree_rport,
3367                     0, bridge_rtreap_handler);
3368         msg->lmsg.u.ms_resultp = sc;
3369
3370         ifnet_sendmsg(&msg->lmsg, 0);
3371 }
3372
3373 /*
3374  * bridge_rttrim:
3375  *
3376  *      Trim the routine table so that we have a number
3377  *      of routing entries less than or equal to the
3378  *      maximum number.
3379  */
3380 static void
3381 bridge_rttrim(struct bridge_softc *sc)
3382 {
3383         struct bridge_rtnode *brt;
3384         int dead;
3385
3386         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3387
3388         /* Make sure we actually need to do this. */
3389         if (sc->sc_brtcnt <= sc->sc_brtmax)
3390                 return;
3391
3392         /*
3393          * Find out how many rtnodes are dead
3394          */
3395         dead = bridge_rtage_finddead(sc);
3396         KKASSERT(dead <= sc->sc_brtcnt);
3397
3398         if (sc->sc_brtcnt - dead <= sc->sc_brtmax) {
3399                 /* Enough dead rtnodes are found */
3400                 bridge_rtreap(sc);
3401                 return;
3402         }
3403
3404         /*
3405          * Kill some dynamic rtnodes to meet the brtmax
3406          */
3407         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3408                 struct bridge_rtinfo *bri = brt->brt_info;
3409
3410                 if (bri->bri_dead) {
3411                         /*
3412                          * We have counted this rtnode in
3413                          * bridge_rtage_finddead()
3414                          */
3415                         continue;
3416                 }
3417
3418                 if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
3419                         bri->bri_dead = 1;
3420                         ++dead;
3421                         KKASSERT(dead <= sc->sc_brtcnt);
3422
3423                         if (sc->sc_brtcnt - dead <= sc->sc_brtmax) {
3424                                 /* Enough rtnodes are collected */
3425                                 break;
3426                         }
3427                 }
3428         }
3429         if (dead)
3430                 bridge_rtreap(sc);
3431 }
3432
3433 /*
3434  * bridge_timer:
3435  *
3436  *      Aging timer for the bridge.
3437  */
3438 static void
3439 bridge_timer(void *arg)
3440 {
3441         struct bridge_softc *sc = arg;
3442         struct netmsg_base *msg;
3443
3444         KKASSERT(mycpuid == BRIDGE_CFGCPU);
3445
3446         crit_enter();
3447
3448         if (callout_pending(&sc->sc_brcallout) ||
3449             !callout_active(&sc->sc_brcallout)) {
3450                 crit_exit();
3451                 return;
3452         }
3453         callout_deactivate(&sc->sc_brcallout);
3454
3455         msg = &sc->sc_brtimemsg;
3456         KKASSERT(msg->lmsg.ms_flags & MSGF_DONE);
3457         lwkt_sendmsg(BRIDGE_CFGPORT, &msg->lmsg);
3458
3459         crit_exit();
3460 }
3461
3462 static void
3463 bridge_timer_handler(netmsg_t msg)
3464 {
3465         struct bridge_softc *sc = msg->lmsg.u.ms_resultp;
3466
3467         KKASSERT(&curthread->td_msgport == BRIDGE_CFGPORT);
3468
3469         crit_enter();
3470         /* Reply ASAP */
3471         lwkt_replymsg(&msg->lmsg, 0);
3472         crit_exit();
3473
3474         bridge_rtage(sc);
3475         if (sc->sc_ifp->if_flags & IFF_RUNNING) {
3476                 callout_reset(&sc->sc_brcallout,
3477                     bridge_rtable_prune_period * hz, bridge_timer, sc);
3478         }
3479 }
3480
3481 static int
3482 bridge_rtage_finddead(struct bridge_softc *sc)
3483 {
3484         struct bridge_rtnode *brt;
3485         int dead = 0;
3486
3487         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3488                 struct bridge_rtinfo *bri = brt->brt_info;
3489
3490                 if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
3491                     time_uptime >= bri->bri_expire) {
3492                         bri->bri_dead = 1;
3493                         ++dead;
3494                         KKASSERT(dead <= sc->sc_brtcnt);
3495                 }
3496         }
3497         return dead;
3498 }
3499
3500 /*
3501  * bridge_rtage:
3502  *
3503  *      Perform an aging cycle.
3504  */
3505 static void
3506 bridge_rtage(struct bridge_softc *sc)
3507 {
3508         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3509
3510         if (bridge_rtage_finddead(sc))
3511                 bridge_rtreap(sc);
3512 }
3513
3514 /*
3515  * bridge_rtflush:
3516  *
3517  *      Remove all dynamic addresses from the bridge.
3518  */
3519 static void
3520 bridge_rtflush(struct bridge_softc *sc, int bf)
3521 {
3522         struct bridge_rtnode *brt;
3523         int reap;
3524
3525         reap = 0;
3526         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3527                 struct bridge_rtinfo *bri = brt->brt_info;
3528
3529                 if ((bf & IFBF_FLUSHALL) ||
3530                     (bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
3531                         bri->bri_dead = 1;
3532                         reap = 1;
3533                 }
3534         }
3535         if (reap) {
3536                 if (bf & IFBF_FLUSHSYNC)
3537                         bridge_rtreap(sc);
3538                 else
3539                         bridge_rtreap_async(sc);
3540         }
3541 }
3542
3543 /*
3544  * bridge_rtdaddr:
3545  *
3546  *      Remove an address from the table.
3547  */
3548 static int
3549 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr)
3550 {
3551         struct bridge_rtnode *brt;
3552
3553         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3554
3555         if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL)
3556                 return (ENOENT);
3557
3558         /* TODO: add a cheaper delete operation */
3559         brt->brt_info->bri_dead = 1;
3560         bridge_rtreap(sc);
3561         return (0);
3562 }
3563
3564 /*
3565  * bridge_rtdelete:
3566  *
3567  *      Delete routes to a speicifc member interface.
3568  */
3569 void
3570 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int bf)
3571 {
3572         struct bridge_rtnode *brt;
3573         int reap;
3574
3575         reap = 0;
3576         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3577                 struct bridge_rtinfo *bri = brt->brt_info;
3578
3579                 if (bri->bri_ifp == ifp &&
3580                     ((bf & IFBF_FLUSHALL) ||
3581                      (bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
3582                         bri->bri_dead = 1;
3583                         reap = 1;
3584                 }
3585         }
3586         if (reap) {
3587                 if (bf & IFBF_FLUSHSYNC)
3588                         bridge_rtreap(sc);
3589                 else
3590                         bridge_rtreap_async(sc);
3591         }
3592 }
3593
3594 /*
3595  * bridge_rtable_init:
3596  *
3597  *      Initialize the route table for this bridge.
3598  */
3599 static void
3600 bridge_rtable_init(struct bridge_softc *sc)
3601 {
3602         int cpu;
3603
3604         /*
3605          * Initialize per-cpu hash tables
3606          */
3607         sc->sc_rthashs = kmalloc(sizeof(*sc->sc_rthashs) * ncpus,
3608                                  M_DEVBUF, M_WAITOK);
3609         for (cpu = 0; cpu < ncpus; ++cpu) {
3610                 int i;
3611
3612                 sc->sc_rthashs[cpu] =
3613                 kmalloc(sizeof(struct bridge_rtnode_head) * BRIDGE_RTHASH_SIZE,
3614                         M_DEVBUF, M_WAITOK);
3615
3616                 for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
3617                         LIST_INIT(&sc->sc_rthashs[cpu][i]);
3618         }
3619         sc->sc_rthash_key = karc4random();
3620
3621         /*
3622          * Initialize per-cpu lists
3623          */
3624         sc->sc_rtlists = kmalloc(sizeof(struct bridge_rtnode_head) * ncpus,
3625                                  M_DEVBUF, M_WAITOK);
3626         for (cpu = 0; cpu < ncpus; ++cpu)
3627                 LIST_INIT(&sc->sc_rtlists[cpu]);
3628 }
3629
3630 /*
3631  * bridge_rtable_fini:
3632  *
3633  *      Deconstruct the route table for this bridge.
3634  */
3635 static void
3636 bridge_rtable_fini(struct bridge_softc *sc)
3637 {
3638         int cpu;
3639
3640         /*
3641          * Free per-cpu hash tables
3642          */
3643         for (cpu = 0; cpu < ncpus; ++cpu)
3644                 kfree(sc->sc_rthashs[cpu], M_DEVBUF);
3645         kfree(sc->sc_rthashs, M_DEVBUF);
3646
3647         /*
3648          * Free per-cpu lists
3649          */
3650         kfree(sc->sc_rtlists, M_DEVBUF);
3651 }
3652
3653 /*
3654  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
3655  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
3656  */
3657 #define mix(a, b, c)                                                    \
3658 do {                                                                    \
3659         a -= b; a -= c; a ^= (c >> 13);                                 \
3660         b -= c; b -= a; b ^= (a << 8);                                  \
3661         c -= a; c -= b; c ^= (b >> 13);                                 \
3662         a -= b; a -= c; a ^= (c >> 12);                                 \
3663         b -= c; b -= a; b ^= (a << 16);                                 \
3664         c -= a; c -= b; c ^= (b >> 5);                                  \
3665         a -= b; a -= c; a ^= (c >> 3);                                  \
3666         b -= c; b -= a; b ^= (a << 10);                                 \
3667         c -= a; c -= b; c ^= (b >> 15);                                 \
3668 } while (/*CONSTCOND*/0)
3669
3670 static __inline uint32_t
3671 bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
3672 {
3673         uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
3674
3675         b += addr[5] << 8;
3676         b += addr[4];
3677         a += addr[3] << 24;
3678         a += addr[2] << 16;
3679         a += addr[1] << 8;
3680         a += addr[0];
3681
3682         mix(a, b, c);
3683
3684         return (c & BRIDGE_RTHASH_MASK);
3685 }
3686
3687 #undef mix
3688
3689 static int
3690 bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b)
3691 {
3692         int i, d;
3693
3694         for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
3695                 d = ((int)a[i]) - ((int)b[i]);
3696         }
3697
3698         return (d);
3699 }
3700
3701 /*
3702  * bridge_rtnode_lookup:
3703  *
3704  *      Look up a bridge route node for the specified destination.
3705  */
3706 static struct bridge_rtnode *
3707 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr)
3708 {
3709         struct bridge_rtnode *brt;
3710         uint32_t hash;
3711         int dir;
3712
3713         hash = bridge_rthash(sc, addr);
3714         LIST_FOREACH(brt, &sc->sc_rthashs[mycpuid][hash], brt_hash) {
3715                 dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
3716                 if (dir == 0)
3717                         return (brt);
3718                 if (dir > 0)
3719                         return (NULL);
3720         }
3721
3722         return (NULL);
3723 }
3724
3725 /*
3726  * bridge_rtnode_insert:
3727  *
3728  *      Insert the specified bridge node into the route table.
3729  *      Caller has to make sure that rtnode does not exist.
3730  */
3731 static void
3732 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
3733 {
3734         struct bridge_rtnode *lbrt;
3735         uint32_t hash;
3736         int dir;
3737
3738         hash = bridge_rthash(sc, brt->brt_addr);
3739
3740         lbrt = LIST_FIRST(&sc->sc_rthashs[mycpuid][hash]);
3741         if (lbrt == NULL) {
3742                 LIST_INSERT_HEAD(&sc->sc_rthashs[mycpuid][hash],
3743                                   brt, brt_hash);
3744                 goto out;
3745         }
3746
3747         do {
3748                 dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
3749                 KASSERT(dir != 0, ("rtnode already exist"));
3750
3751                 if (dir > 0) {
3752                         LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
3753                         goto out;
3754                 }
3755                 if (LIST_NEXT(lbrt, brt_hash) == NULL) {
3756                         LIST_INSERT_AFTER(lbrt, brt, brt_hash);
3757                         goto out;
3758                 }
3759                 lbrt = LIST_NEXT(lbrt, brt_hash);
3760         } while (lbrt != NULL);
3761
3762         panic("no suitable position found for rtnode");
3763 out:
3764         LIST_INSERT_HEAD(&sc->sc_rtlists[mycpuid], brt, brt_list);
3765         if (mycpuid == 0) {
3766                 /*
3767                  * Update the brtcnt.
3768                  * We only need to do it once and we do it on CPU0.
3769                  */
3770                 sc->sc_brtcnt++;
3771         }
3772 }
3773
3774 /*
3775  * bridge_rtnode_destroy:
3776  *
3777  *      Destroy a bridge rtnode.
3778  */
3779 static void
3780 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
3781 {
3782         LIST_REMOVE(brt, brt_hash);
3783         LIST_REMOVE(brt, brt_list);
3784
3785         if (mycpuid + 1 == ncpus) {
3786                 /* Free rtinfo associated with rtnode on the last cpu */
3787                 kfree(brt->brt_info, M_DEVBUF);
3788         }
3789         kfree(brt, M_DEVBUF);
3790
3791         if (mycpuid == 0) {
3792                 /* Update brtcnt only on CPU0 */
3793                 sc->sc_brtcnt--;
3794         }
3795 }
3796
3797 static __inline int
3798 bridge_post_pfil(struct mbuf *m)
3799 {
3800         if (m->m_pkthdr.fw_flags & IPFORWARD_MBUF_TAGGED)
3801                 return EOPNOTSUPP;
3802
3803         /* Not yet */
3804         if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED)
3805                 return EOPNOTSUPP;
3806
3807         return 0;
3808 }
3809
3810 /*
3811  * Send bridge packets through pfil if they are one of the types pfil can deal
3812  * with, or if they are ARP or REVARP.  (pfil will pass ARP and REVARP without
3813  * question.) If *bifp or *ifp are NULL then packet filtering is skipped for
3814  * that interface.
3815  */
3816 static int
3817 bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
3818 {
3819         int snap, error, i, hlen;
3820         struct ether_header *eh1, eh2;
3821         struct ip *ip;
3822         struct llc llc1;
3823         u_int16_t ether_type;
3824
3825         snap = 0;
3826         error = -1;     /* Default error if not error == 0 */
3827
3828         if (pfil_bridge == 0 && pfil_member == 0)
3829                 return (0); /* filtering is disabled */
3830
3831         i = min((*mp)->m_pkthdr.len, max_protohdr);
3832         if ((*mp)->m_len < i) {
3833                 *mp = m_pullup(*mp, i);
3834                 if (*mp == NULL) {
3835                         kprintf("%s: m_pullup failed\n", __func__);
3836                         return (-1);
3837                 }
3838         }
3839
3840         eh1 = mtod(*mp, struct ether_header *);
3841         ether_type = ntohs(eh1->ether_type);
3842
3843         /*
3844          * Check for SNAP/LLC.
3845          */
3846         if (ether_type < ETHERMTU) {
3847                 struct llc *llc2 = (struct llc *)(eh1 + 1);
3848
3849                 if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
3850                     llc2->llc_dsap == LLC_SNAP_LSAP &&
3851                     llc2->llc_ssap == LLC_SNAP_LSAP &&
3852                     llc2->llc_control == LLC_UI) {
3853                         ether_type = htons(llc2->llc_un.type_snap.ether_type);
3854                         snap = 1;
3855                 }
3856         }
3857
3858         /*
3859          * If we're trying to filter bridge traffic, don't look at anything
3860          * other than IP and ARP traffic.  If the filter doesn't understand
3861          * IPv6, don't allow IPv6 through the bridge either.  This is lame
3862          * since if we really wanted, say, an AppleTalk filter, we are hosed,
3863          * but of course we don't have an AppleTalk filter to begin with.
3864          * (Note that since pfil doesn't understand ARP it will pass *ALL*
3865          * ARP traffic.)
3866          */
3867         switch (ether_type) {
3868         case ETHERTYPE_ARP:
3869         case ETHERTYPE_REVARP:
3870                 return (0); /* Automatically pass */
3871
3872         case ETHERTYPE_IP:
3873 #ifdef INET6
3874         case ETHERTYPE_IPV6:
3875 #endif /* INET6 */
3876                 break;
3877
3878         default:
3879                 /*
3880                  * Check to see if the user wants to pass non-ip
3881                  * packets, these will not be checked by pfil(9)
3882                  * and passed unconditionally so the default is to drop.
3883                  */
3884                 if (pfil_onlyip)
3885                         goto bad;
3886         }
3887
3888         /* Strip off the Ethernet header and keep a copy. */
3889         m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t) &eh2);
3890         m_adj(*mp, ETHER_HDR_LEN);
3891
3892         /* Strip off snap header, if present */
3893         if (snap) {
3894                 m_copydata(*mp, 0, sizeof(struct llc), (caddr_t) &llc1);
3895                 m_adj(*mp, sizeof(struct llc));
3896         }
3897
3898         /*
3899          * Check the IP header for alignment and errors
3900          */
3901         if (dir == PFIL_IN) {
3902                 switch (ether_type) {
3903                 case ETHERTYPE_IP:
3904                         error = bridge_ip_checkbasic(mp);
3905                         break;
3906 #ifdef INET6
3907                 case ETHERTYPE_IPV6:
3908                         error = bridge_ip6_checkbasic(mp);
3909                         break;
3910 #endif /* INET6 */
3911                 default:
3912                         error = 0;
3913                 }
3914                 if (error)
3915                         goto bad;
3916         }
3917
3918         error = 0;
3919
3920         /*
3921          * Run the packet through pfil
3922          */
3923         switch (ether_type) {
3924         case ETHERTYPE_IP:
3925                 /*
3926                  * before calling the firewall, swap fields the same as
3927                  * IP does. here we assume the header is contiguous
3928                  */
3929                 ip = mtod(*mp, struct ip *);
3930
3931                 ip->ip_len = ntohs(ip->ip_len);
3932                 ip->ip_off = ntohs(ip->ip_off);
3933
3934                 /*
3935                  * Run pfil on the member interface and the bridge, both can
3936                  * be skipped by clearing pfil_member or pfil_bridge.
3937                  *
3938                  * Keep the order:
3939                  *   in_if -> bridge_if -> out_if
3940                  */
3941                 if (pfil_bridge && dir == PFIL_OUT && bifp != NULL) {
3942                         error = pfil_run_hooks(&inet_pfil_hook, mp, bifp, dir);
3943                         if (*mp == NULL || error != 0) /* filter may consume */
3944                                 break;
3945                         error = bridge_post_pfil(*mp);
3946                         if (error)
3947                                 break;
3948                 }
3949
3950                 if (pfil_member && ifp != NULL) {
3951                         error = pfil_run_hooks(&inet_pfil_hook, mp, ifp, dir);
3952                         if (*mp == NULL || error != 0) /* filter may consume */
3953                                 break;
3954                         error = bridge_post_pfil(*mp);
3955                         if (error)
3956                                 break;
3957                 }
3958
3959                 if (pfil_bridge && dir == PFIL_IN && bifp != NULL) {
3960                         error = pfil_run_hooks(&inet_pfil_hook, mp, bifp, dir);
3961                         if (*mp == NULL || error != 0) /* filter may consume */
3962                                 break;
3963                         error = bridge_post_pfil(*mp);
3964                         if (error)
3965                                 break;
3966                 }
3967
3968                 /* check if we need to fragment the packet */
3969                 if (pfil_member && ifp != NULL && dir == PFIL_OUT) {
3970                         i = (*mp)->m_pkthdr.len;
3971                         if (i > ifp->if_mtu) {
3972                                 error = bridge_fragment(ifp, *mp, &eh2, snap,
3973                                             &llc1);
3974                                 return (error);
3975                         }
3976                 }
3977
3978                 /* Recalculate the ip checksum and restore byte ordering */
3979                 ip = mtod(*mp, struct ip *);
3980                 hlen = ip->ip_hl << 2;
3981                 if (hlen < sizeof(struct ip))
3982                         goto bad;
3983                 if (hlen > (*mp)->m_len) {
3984                         if ((*mp = m_pullup(*mp, hlen)) == NULL)
3985                                 goto bad;
3986                         ip = mtod(*mp, struct ip *);
3987                         if (ip == NULL)
3988                                 goto bad;
3989                 }
3990                 ip->ip_len = htons(ip->ip_len);
3991                 ip->ip_off = htons(ip->ip_off);
3992                 ip->ip_sum = 0;
3993                 if (hlen == sizeof(struct ip))
3994                         ip->ip_sum = in_cksum_hdr(ip);
3995                 else
3996                         ip->ip_sum = in_cksum(*mp, hlen);
3997
3998                 break;
3999 #ifdef INET6
4000         case ETHERTYPE_IPV6:
4001                 if (pfil_bridge && dir == PFIL_OUT && bifp != NULL)
4002                         error = pfil_run_hooks(&inet6_pfil_hook, mp, bifp,
4003                                         dir);
4004
4005                 if (*mp == NULL || error != 0) /* filter may consume */
4006                         break;
4007
4008                 if (pfil_member && ifp != NULL)
4009                         error = pfil_run_hooks(&inet6_pfil_hook, mp, ifp,
4010                                         dir);
4011
4012                 if (*mp == NULL || error != 0) /* filter may consume */
4013                         break;
4014
4015                 if (pfil_bridge && dir == PFIL_IN && bifp != NULL)
4016                         error = pfil_run_hooks(&inet6_pfil_hook, mp, bifp,
4017                                         dir);
4018                 break;
4019 #endif
4020         default:
4021                 error = 0;
4022                 break;
4023         }
4024
4025         if (*mp == NULL)
4026                 return (error);
4027         if (error != 0)
4028                 goto bad;
4029
4030         error = -1;
4031
4032         /*
4033          * Finally, put everything back the way it was and return
4034          */
4035         if (snap) {
4036                 M_PREPEND(*mp, sizeof(struct llc), MB_DONTWAIT);
4037                 if (*mp == NULL)
4038                         return (error);
4039                 bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
4040         }
4041
4042         M_PREPEND(*mp, ETHER_HDR_LEN, MB_DONTWAIT);
4043         if (*mp == NULL)
4044                 return (error);
4045         bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
4046
4047         return (0);
4048
4049 bad:
4050         m_freem(*mp);
4051         *mp = NULL;
4052         return (error);
4053 }
4054
4055 /*
4056  * Perform basic checks on header size since
4057  * pfil assumes ip_input has already processed
4058  * it for it.  Cut-and-pasted from ip_input.c.
4059  * Given how simple the IPv6 version is,
4060  * does the IPv4 version really need to be
4061  * this complicated?
4062  *
4063  * XXX Should we update ipstat here, or not?
4064  * XXX Right now we update ipstat but not
4065  * XXX csum_counter.
4066  */
4067 static int
4068 bridge_ip_checkbasic(struct mbuf **mp)
4069 {
4070         struct mbuf *m = *mp;
4071         struct ip *ip;
4072         int len, hlen;
4073         u_short sum;
4074
4075         if (*mp == NULL)
4076                 return (-1);
4077 #if 0 /* notyet */
4078         if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
4079                 if ((m = m_copyup(m, sizeof(struct ip),
4080                         (max_linkhdr + 3) & ~3)) == NULL) {
4081                         /* XXXJRT new stat, please */
4082                         ipstat.ips_toosmall++;
4083                         goto bad;
4084                 }
4085         } else
4086 #endif
4087 #ifndef __predict_false
4088 #define __predict_false(x) x
4089 #endif
4090          if (__predict_false(m->m_len < sizeof (struct ip))) {
4091                 if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
4092                         ipstat.ips_toosmall++;
4093                         goto bad;
4094                 }
4095         }
4096         ip = mtod(m, struct ip *);
4097         if (ip == NULL) goto bad;
4098
4099         if (ip->ip_v != IPVERSION) {
4100                 ipstat.ips_badvers++;
4101                 goto bad;
4102         }
4103         hlen = ip->ip_hl << 2;
4104         if (hlen < sizeof(struct ip)) { /* minimum header length */
4105                 ipstat.ips_badhlen++;
4106                 goto bad;
4107         }
4108         if (hlen > m->m_len) {
4109                 if ((m = m_pullup(m, hlen)) == NULL) {
4110                         ipstat.ips_badhlen++;
4111                         goto bad;
4112                 }
4113                 ip = mtod(m, struct ip *);
4114                 if (ip == NULL) goto bad;
4115         }
4116
4117         if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
4118                 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
4119         } else {
4120                 if (hlen == sizeof(struct ip)) {
4121                         sum = in_cksum_hdr(ip);
4122                 } else {
4123                         sum = in_cksum(m, hlen);
4124                 }
4125         }
4126         if (sum) {
4127                 ipstat.ips_badsum++;
4128                 goto bad;
4129         }
4130
4131         /* Retrieve the packet length. */
4132         len = ntohs(ip->ip_len);
4133
4134         /*
4135          * Check for additional length bogosity
4136          */
4137         if (len < hlen) {
4138                 ipstat.ips_badlen++;
4139                 goto bad;
4140         }
4141
4142         /*
4143          * Check that the amount of data in the buffers
4144          * is as at least much as the IP header would have us expect.
4145          * Drop packet if shorter than we expect.
4146          */
4147         if (m->m_pkthdr.len < len) {
4148                 ipstat.ips_tooshort++;
4149                 goto bad;
4150         }
4151
4152         /* Checks out, proceed */
4153         *mp = m;
4154         return (0);
4155
4156 bad:
4157         *mp = m;
4158         return (-1);
4159 }
4160
4161 #ifdef INET6
4162 /*
4163  * Same as above, but for IPv6.
4164  * Cut-and-pasted from ip6_input.c.
4165  * XXX Should we update ip6stat, or not?
4166  */
4167 static int
4168 bridge_ip6_checkbasic(struct mbuf **mp)
4169 {
4170         struct mbuf *m = *mp;
4171         struct ip6_hdr *ip6;
4172
4173         /*
4174          * If the IPv6 header is not aligned, slurp it up into a new
4175          * mbuf with space for link headers, in the event we forward
4176          * it.  Otherwise, if it is aligned, make sure the entire base
4177          * IPv6 header is in the first mbuf of the chain.
4178          */
4179 #if 0 /* notyet */
4180         if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
4181                 struct ifnet *inifp = m->m_pkthdr.rcvif;
4182                 if ((m = m_copyup(m, sizeof(struct ip6_hdr),
4183                             (max_linkhdr + 3) & ~3)) == NULL) {
4184                         /* XXXJRT new stat, please */
4185                         ip6stat.ip6s_toosmall++;
4186                         in6_ifstat_inc(inifp, ifs6_in_hdrerr);
4187                         goto bad;
4188                 }
4189         } else
4190 #endif
4191         if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
4192                 struct ifnet *inifp = m->m_pkthdr.rcvif;
4193                 if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
4194                         ip6stat.ip6s_toosmall++;
4195                         in6_ifstat_inc(inifp, ifs6_in_hdrerr);
4196                         goto bad;
4197                 }
4198         }
4199
4200         ip6 = mtod(m, struct ip6_hdr *);
4201
4202         if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
4203                 ip6stat.ip6s_badvers++;
4204                 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
4205                 goto bad;
4206         }
4207
4208         /* Checks out, proceed */
4209         *mp = m;
4210         return (0);
4211
4212 bad:
4213         *mp = m;
4214         return (-1);
4215 }
4216 #endif /* INET6 */
4217
4218 /*
4219  * bridge_fragment:
4220  *
4221  *      Return a fragmented mbuf chain.
4222  */
4223 static int
4224 bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
4225     int snap, struct llc *llc)
4226 {
4227         struct mbuf *m0;
4228         struct ip *ip;
4229         int error = -1;
4230
4231         if (m->m_len < sizeof(struct ip) &&
4232             (m = m_pullup(m, sizeof(struct ip))) == NULL)
4233                 goto out;
4234         ip = mtod(m, struct ip *);
4235
4236         error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist,
4237                     CSUM_DELAY_IP);
4238         if (error)
4239                 goto out;
4240
4241         /* walk the chain and re-add the Ethernet header */
4242         for (m0 = m; m0; m0 = m0->m_nextpkt) {
4243                 if (error == 0) {
4244                         if (snap) {
4245                                 M_PREPEND(m0, sizeof(struct llc), MB_DONTWAIT);
4246                                 if (m0 == NULL) {
4247                                         error = ENOBUFS;
4248                                         continue;
4249                                 }
4250                                 bcopy(llc, mtod(m0, caddr_t),
4251                                     sizeof(struct llc));
4252                         }
4253                         M_PREPEND(m0, ETHER_HDR_LEN, MB_DONTWAIT);
4254                         if (m0 == NULL) {
4255                                 error = ENOBUFS;
4256                                 continue;
4257                         }
4258                         bcopy(eh, mtod(m0, caddr_t), ETHER_HDR_LEN);
4259                 } else 
4260                         m_freem(m);
4261         }
4262
4263         if (error == 0)
4264                 ipstat.ips_fragmented++;
4265
4266         return (error);
4267
4268 out:
4269         if (m != NULL)
4270                 m_freem(m);
4271         return (error);
4272 }
4273
4274 static void
4275 bridge_enqueue_handler(netmsg_t msg)
4276 {
4277         struct netmsg_packet *nmp;
4278         struct ifnet *dst_ifp;
4279         struct mbuf *m;
4280
4281         nmp = &msg->packet;
4282         m = nmp->nm_packet;
4283         dst_ifp = nmp->base.lmsg.u.ms_resultp;
4284         mbuftrackid(m, 71);
4285
4286         bridge_handoff(dst_ifp->if_bridge, dst_ifp, m, 1);
4287 }
4288
4289 static void
4290 bridge_handoff(struct bridge_softc *sc, struct ifnet *dst_ifp,
4291                struct mbuf *m, int from_us)
4292 {
4293         struct mbuf *m0;
4294         struct ifnet *bifp;
4295
4296         bifp = sc->sc_ifp;
4297         mbuftrackid(m, 72);
4298
4299         /* We may be sending a fragment so traverse the mbuf */
4300         for (; m; m = m0) {
4301                 struct altq_pktattr pktattr;
4302
4303                 m0 = m->m_nextpkt;
4304                 m->m_nextpkt = NULL;
4305
4306                 /*
4307                  * If being sent from our host override ether_shost
4308                  * with the bridge MAC.  This is mandatory for ARP
4309                  * so things don't get confused.  In particular we
4310                  * don't want ARPs to get associated with link interfaces
4311                  * under the bridge which might or might not stay valid.
4312                  *
4313                  * Also override ether_shost when relaying a packet out
4314                  * the same interface it came in on, due to multi-homed
4315                  * addresses & default routes, otherwise switches will
4316                  * get very confused.
4317                  *
4318                  * Otherwise if we are in transparent mode.
4319                  */
4320                 if (from_us || m->m_pkthdr.rcvif == dst_ifp) {
4321                         m_copyback(m,
4322                                    offsetof(struct ether_header, ether_shost),
4323                                    ETHER_ADDR_LEN, IF_LLADDR(sc->sc_ifp));
4324                 } else if ((bifp->if_flags & IFF_LINK0) &&
4325                            (m->m_pkthdr.fw_flags & BRIDGE_MBUF_TAGGED)) {
4326                         m_copyback(m,
4327                                    offsetof(struct ether_header, ether_shost),
4328                                    ETHER_ADDR_LEN,
4329                                    m->m_pkthdr.ether_br_shost);
4330                 } /* else retain shost */
4331
4332                 if (ifq_is_enabled(&dst_ifp->if_snd))
4333                         altq_etherclassify(&dst_ifp->if_snd, m, &pktattr);
4334
4335                 ifq_dispatch(dst_ifp, m, &pktattr);
4336         }
4337 }
4338
4339 static void
4340 bridge_control_dispatch(netmsg_t msg)
4341 {
4342         struct netmsg_brctl *bc_msg = (struct netmsg_brctl *)msg;
4343         struct ifnet *bifp = bc_msg->bc_sc->sc_ifp;
4344         int error;
4345
4346         ifnet_serialize_all(bifp);
4347         error = bc_msg->bc_func(bc_msg->bc_sc, bc_msg->bc_arg);
4348         ifnet_deserialize_all(bifp);
4349
4350         lwkt_replymsg(&bc_msg->base.lmsg, error);
4351 }
4352
4353 static int
4354 bridge_control(struct bridge_softc *sc, u_long cmd,
4355                bridge_ctl_t bc_func, void *bc_arg)
4356 {
4357         struct ifnet *bifp = sc->sc_ifp;
4358         struct netmsg_brctl bc_msg;
4359         int error;
4360
4361         ASSERT_IFNET_SERIALIZED_ALL(bifp);
4362
4363         bzero(&bc_msg, sizeof(bc_msg));
4364
4365         netmsg_init(&bc_msg.base, NULL, &curthread->td_msgport,
4366                     0, bridge_control_dispatch);
4367         bc_msg.bc_func = bc_func;
4368         bc_msg.bc_sc = sc;
4369         bc_msg.bc_arg = bc_arg;
4370
4371         ifnet_deserialize_all(bifp);
4372         error = lwkt_domsg(BRIDGE_CFGPORT, &bc_msg.base.lmsg, 0);
4373         ifnet_serialize_all(bifp);
4374         return error;
4375 }
4376
4377 static void
4378 bridge_add_bif_handler(netmsg_t msg)
4379 {
4380         struct netmsg_braddbif *amsg = (struct netmsg_braddbif *)msg;
4381         struct bridge_softc *sc;
4382         struct bridge_iflist *bif;
4383
4384         sc = amsg->br_softc;
4385
4386         bif = kmalloc(sizeof(*bif), M_DEVBUF, M_WAITOK | M_ZERO);
4387         bif->bif_ifp = amsg->br_bif_ifp;
4388         bif->bif_onlist = 1;
4389         bif->bif_info = amsg->br_bif_info;
4390
4391         /*
4392          * runs through bif_info
4393          */
4394         bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
4395
4396         TAILQ_INSERT_HEAD(&sc->sc_iflists[mycpuid], bif, bif_next);
4397
4398         ifnet_forwardmsg(&amsg->base.lmsg, mycpuid + 1);
4399 }
4400
4401 static void
4402 bridge_add_bif(struct bridge_softc *sc, struct bridge_ifinfo *bif_info,
4403                struct ifnet *ifp)
4404 {
4405         struct netmsg_braddbif amsg;
4406
4407         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
4408
4409         netmsg_init(&amsg.base, NULL, &curthread->td_msgport,
4410                     0, bridge_add_bif_handler);
4411         amsg.br_softc = sc;
4412         amsg.br_bif_info = bif_info;
4413         amsg.br_bif_ifp = ifp;
4414
4415         ifnet_domsg(&amsg.base.lmsg, 0);
4416 }
4417
4418 static void
4419 bridge_del_bif_handler(netmsg_t msg)
4420 {
4421         struct netmsg_brdelbif *dmsg = (struct netmsg_brdelbif *)msg;
4422         struct bridge_softc *sc;
4423         struct bridge_iflist *bif;
4424
4425         sc = dmsg->br_softc;
4426
4427         /*
4428          * Locate the bif associated with the br_bif_info
4429          * on the current CPU
4430          */
4431         bif = bridge_lookup_member_ifinfo(sc, dmsg->br_bif_info);
4432         KKASSERT(bif != NULL && bif->bif_onlist);
4433
4434         /* Remove the bif from the current CPU's iflist */
4435         bif->bif_onlist = 0;
4436         TAILQ_REMOVE(dmsg->br_bif_list, bif, bif_next);
4437
4438         /* Save the removed bif for later freeing */
4439         TAILQ_INSERT_HEAD(dmsg->br_bif_list, bif, bif_next);
4440
4441         ifnet_forwardmsg(&dmsg->base.lmsg, mycpuid + 1);
4442 }
4443
4444 static void
4445 bridge_del_bif(struct bridge_softc *sc, struct bridge_ifinfo *bif_info,
4446                struct bridge_iflist_head *saved_bifs)
4447 {
4448         struct netmsg_brdelbif dmsg;
4449
4450         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
4451
4452         netmsg_init(&dmsg.base, NULL, &curthread->td_msgport,
4453                     0, bridge_del_bif_handler);
4454         dmsg.br_softc = sc;
4455         dmsg.br_bif_info = bif_info;
4456         dmsg.br_bif_list = saved_bifs;
4457
4458         ifnet_domsg(&dmsg.base.lmsg, 0);
4459 }