Merge branch 'vendor/DIFFUTILS'
[dragonfly.git] / sys / net / bridge / if_bridge.c
1 /*
2  * Copyright 2001 Wasabi Systems, Inc.
3  * All rights reserved.
4  *
5  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *      This product includes software developed for the NetBSD Project by
18  *      Wasabi Systems, Inc.
19  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
20  *    or promote products derived from this software without specific prior
21  *    written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
27  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /*
37  * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
38  * All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. All advertising materials mentioning features or use of this software
49  *    must display the following acknowledgement:
50  *      This product includes software developed by Jason L. Wright
51  * 4. The name of the author may not be used to endorse or promote products
52  *    derived from this software without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
55  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
56  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
57  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
58  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
59  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
60  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
62  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
63  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
64  * POSSIBILITY OF SUCH DAMAGE.
65  *
66  * $OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp $
67  * $NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $
68  * $FreeBSD: src/sys/net/if_bridge.c,v 1.26 2005/10/13 23:05:55 thompsa Exp $
69  */
70
71 /*
72  * Network interface bridge support.
73  *
74  * TODO:
75  *
76  *      - Currently only supports Ethernet-like interfaces (Ethernet,
77  *        802.11, VLANs on Ethernet, etc.)  Figure out a nice way
78  *        to bridge other types of interfaces (FDDI-FDDI, and maybe
79  *        consider heterogenous bridges).
80  *
81  *
82  * Bridge's route information is duplicated to each CPUs:
83  *
84  *      CPU0          CPU1          CPU2          CPU3
85  * +-----------+ +-----------+ +-----------+ +-----------+
86  * |  rtnode   | |  rtnode   | |  rtnode   | |  rtnode   |
87  * |           | |           | |           | |           |
88  * | dst eaddr | | dst eaddr | | dst eaddr | | dst eaddr |
89  * +-----------+ +-----------+ +-----------+ +-----------+
90  *       |         |                     |         |
91  *       |         |                     |         |
92  *       |         |     +----------+    |         |
93  *       |         |     |  rtinfo  |    |         |
94  *       |         +---->|          |<---+         |
95  *       |               |  flags   |              |
96  *       +-------------->|  timeout |<-------------+
97  *                       |  dst_ifp |
98  *                       +----------+
99  *
100  * We choose to put timeout and dst_ifp into shared part, so updating
101  * them will be cheaper than using message forwarding.  Also there is
102  * not need to use spinlock to protect the updating: timeout and dst_ifp
103  * is not related and specific field's updating order has no importance.
104  * The cache pollution by the share part should not be heavy: in a stable
105  * setup, dst_ifp probably will be not changed in rtnode's life time,
106  * while timeout is refreshed once per second; most of the time, timeout
107  * and dst_ifp are read-only accessed.
108  *
109  *
110  * Bridge route information installation on bridge_input path:
111  *
112  *      CPU0           CPU1         CPU2          CPU3
113  *
114  *                               tcp_thread2
115  *                                    |
116  *                                alloc nmsg
117  *                    snd nmsg        |
118  *                    w/o rtinfo      |
119  *      ifnet0<-----------------------+
120  *        |                           :
121  *    lookup dst                      :
122  *   rtnode exists?(Y)free nmsg       :
123  *        |(N)                        :
124  *        |
125  *  alloc rtinfo
126  *  alloc rtnode
127  * install rtnode
128  *        |
129  *        +---------->ifnet1
130  *        : fwd nmsg    |
131  *        : w/ rtinfo   |
132  *        :             |
133  *        :             |
134  *                 alloc rtnode
135  *               (w/ nmsg's rtinfo)
136  *                install rtnode
137  *                      |
138  *                      +---------->ifnet2
139  *                      : fwd nmsg    |
140  *                      : w/ rtinfo   |
141  *                      :             |
142  *                      :         same as ifnet1
143  *                                    |
144  *                                    +---------->ifnet3
145  *                                    : fwd nmsg    |
146  *                                    : w/ rtinfo   |
147  *                                    :             |
148  *                                    :         same as ifnet1
149  *                                               free nmsg
150  *                                                  :
151  *                                                  :
152  *
153  * The netmsgs forwarded between protocol threads and ifnet threads are
154  * allocated with (M_WAITOK|M_NULLOK), so it will not fail under most
155  * cases (route information is too precious to be not installed :).
156  * Since multiple threads may try to install route information for the
157  * same dst eaddr, we look up route information in ifnet0.  However, this
158  * looking up only need to be performed on ifnet0, which is the start
159  * point of the route information installation process.
160  *
161  *
162  * Bridge route information deleting/flushing:
163  *
164  *  CPU0            CPU1             CPU2             CPU3
165  *
166  * netisr0
167  *   |
168  * find suitable rtnodes,
169  * mark their rtinfo dead
170  *   |
171  *   | domsg <------------------------------------------+
172  *   |                                                  | replymsg
173  *   |                                                  |
174  *   V     fwdmsg           fwdmsg           fwdmsg     |
175  * ifnet0 --------> ifnet1 --------> ifnet2 --------> ifnet3
176  * delete rtnodes   delete rtnodes   delete rtnodes   delete rtnodes
177  * w/ dead rtinfo   w/ dead rtinfo   w/ dead rtinfo   w/ dead rtinfo
178  *                                                    free dead rtinfos
179  *
180  * All deleting/flushing operations are serialized by netisr0, so each
181  * operation only reaps the route information marked dead by itself.
182  *
183  *
184  * Bridge route information adding/deleting/flushing:
185  * Since all operation is serialized by the fixed message flow between
186  * ifnet threads, it is not possible to create corrupted per-cpu route
187  * information.
188  *
189  *
190  *
191  * Percpu member interface list iteration with blocking operation:
192  * Since one bridge could only delete one member interface at a time and
193  * the deleted member interface is not freed after netmsg_service_sync(),
194  * following way is used to make sure that even if the certain member
195  * interface is ripped from the percpu list during the blocking operation,
196  * the iteration still could keep going:
197  *
198  * TAILQ_FOREACH_MUTABLE(bif, sc->sc_iflists[mycpuid], bif_next, nbif) {
199  *     blocking operation;
200  *     blocking operation;
201  *     ...
202  *     ...
203  *     if (nbif != NULL && !nbif->bif_onlist) {
204  *         KKASSERT(bif->bif_onlist);
205  *         nbif = TAILQ_NEXT(bif, bif_next);
206  *     }
207  * }
208  *
209  * As mentioned above only one member interface could be unlinked from the
210  * percpu member interface list, so either bif or nbif may be not on the list,
211  * but _not_ both.  To keep the list iteration, we don't care about bif, but
212  * only nbif.  Since removed member interface will only be freed after we
213  * finish our work, it is safe to access any field in an unlinked bif (here
214  * bif_onlist).  If nbif is no longer on the list, then bif must be on the
215  * list, so we change nbif to the next element of bif and keep going.
216  */
217
218 #include "opt_inet.h"
219 #include "opt_inet6.h"
220
221 #include <sys/param.h>
222 #include <sys/mbuf.h>
223 #include <sys/malloc.h>
224 #include <sys/protosw.h>
225 #include <sys/systm.h>
226 #include <sys/time.h>
227 #include <sys/socket.h> /* for net/if.h */
228 #include <sys/sockio.h>
229 #include <sys/ctype.h>  /* string functions */
230 #include <sys/kernel.h>
231 #include <sys/random.h>
232 #include <sys/sysctl.h>
233 #include <sys/module.h>
234 #include <sys/proc.h>
235 #include <sys/priv.h>
236 #include <sys/lock.h>
237 #include <sys/thread.h>
238 #include <sys/thread2.h>
239 #include <sys/mpipe.h>
240
241 #include <net/bpf.h>
242 #include <net/if.h>
243 #include <net/if_dl.h>
244 #include <net/if_types.h>
245 #include <net/if_var.h>
246 #include <net/pfil.h>
247 #include <net/ifq_var.h>
248 #include <net/if_clone.h>
249
250 #include <netinet/in.h> /* for struct arpcom */
251 #include <netinet/in_systm.h>
252 #include <netinet/in_var.h>
253 #include <netinet/ip.h>
254 #include <netinet/ip_var.h>
255 #ifdef INET6
256 #include <netinet/ip6.h>
257 #include <netinet6/ip6_var.h>
258 #endif
259 #include <netinet/if_ether.h> /* for struct arpcom */
260 #include <net/bridge/if_bridgevar.h>
261 #include <net/if_llc.h>
262 #include <net/netmsg2.h>
263
264 #include <net/route.h>
265 #include <sys/in_cksum.h>
266
267 /*
268  * Size of the route hash table.  Must be a power of two.
269  */
270 #ifndef BRIDGE_RTHASH_SIZE
271 #define BRIDGE_RTHASH_SIZE              1024
272 #endif
273
274 #define BRIDGE_RTHASH_MASK              (BRIDGE_RTHASH_SIZE - 1)
275
276 /*
277  * Maximum number of addresses to cache.
278  */
279 #ifndef BRIDGE_RTABLE_MAX
280 #define BRIDGE_RTABLE_MAX               100
281 #endif
282
283 /*
284  * Spanning tree defaults.
285  */
286 #define BSTP_DEFAULT_MAX_AGE            (20 * 256)
287 #define BSTP_DEFAULT_HELLO_TIME         (2 * 256)
288 #define BSTP_DEFAULT_FORWARD_DELAY      (15 * 256)
289 #define BSTP_DEFAULT_HOLD_TIME          (1 * 256)
290 #define BSTP_DEFAULT_BRIDGE_PRIORITY    0x8000
291 #define BSTP_DEFAULT_PORT_PRIORITY      0x80
292 #define BSTP_DEFAULT_PATH_COST          55
293
294 /*
295  * Timeout (in seconds) for entries learned dynamically.
296  */
297 #ifndef BRIDGE_RTABLE_TIMEOUT
298 #define BRIDGE_RTABLE_TIMEOUT           (20 * 60)       /* same as ARP */
299 #endif
300
301 /*
302  * Number of seconds between walks of the route list.
303  */
304 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
305 #define BRIDGE_RTABLE_PRUNE_PERIOD      (5 * 60)
306 #endif
307
308 /*
309  * List of capabilities to mask on the member interface.
310  */
311 #define BRIDGE_IFCAPS_MASK              (IFCAP_TXCSUM | IFCAP_TSO)
312
313 typedef int     (*bridge_ctl_t)(struct bridge_softc *, void *);
314
315 struct netmsg_brctl {
316         struct netmsg_base      base;
317         bridge_ctl_t            bc_func;
318         struct bridge_softc     *bc_sc;
319         void                    *bc_arg;
320 };
321
322 struct netmsg_brsaddr {
323         struct netmsg_base      base;
324         struct bridge_softc     *br_softc;
325         struct ifnet            *br_dst_if;
326         struct bridge_rtinfo    *br_rtinfo;
327         int                     br_setflags;
328         uint8_t                 br_dst[ETHER_ADDR_LEN];
329         uint8_t                 br_flags;
330 };
331
332 struct netmsg_braddbif {
333         struct netmsg_base      base;
334         struct bridge_softc     *br_softc;
335         struct bridge_ifinfo    *br_bif_info;
336         struct ifnet            *br_bif_ifp;
337 };
338
339 struct netmsg_brdelbif {
340         struct netmsg_base      base;
341         struct bridge_softc     *br_softc;
342         struct bridge_ifinfo    *br_bif_info;
343         struct bridge_iflist_head *br_bif_list;
344 };
345
346 struct netmsg_brsflags {
347         struct netmsg_base      base;
348         struct bridge_softc     *br_softc;
349         struct bridge_ifinfo    *br_bif_info;
350         uint32_t                br_bif_flags;
351 };
352
353 eventhandler_tag        bridge_detach_cookie = NULL;
354
355 extern  struct mbuf *(*bridge_input_p)(struct ifnet *, struct mbuf *);
356 extern  int (*bridge_output_p)(struct ifnet *, struct mbuf *);
357 extern  void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
358 extern  struct ifnet *(*bridge_interface_p)(void *if_bridge);
359
360 static int      bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
361
362 static int      bridge_clone_create(struct if_clone *, int, caddr_t);
363 static int      bridge_clone_destroy(struct ifnet *);
364
365 static int      bridge_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
366 static void     bridge_mutecaps(struct bridge_ifinfo *, struct ifnet *, int);
367 static void     bridge_ifdetach(void *, struct ifnet *);
368 static void     bridge_init(void *);
369 static int      bridge_from_us(struct bridge_softc *, struct ether_header *);
370 static void     bridge_stop(struct ifnet *);
371 static void     bridge_start(struct ifnet *, struct ifaltq_subque *);
372 static struct mbuf *bridge_input(struct ifnet *, struct mbuf *);
373 static int      bridge_output(struct ifnet *, struct mbuf *);
374 static struct ifnet *bridge_interface(void *if_bridge);
375
376 static void     bridge_forward(struct bridge_softc *, struct mbuf *m);
377
378 static void     bridge_timer_handler(netmsg_t);
379 static void     bridge_timer(void *);
380
381 static void     bridge_start_bcast(struct bridge_softc *, struct mbuf *);
382 static void     bridge_broadcast(struct bridge_softc *, struct ifnet *,
383                     struct mbuf *);
384 static void     bridge_span(struct bridge_softc *, struct mbuf *);
385
386 static int      bridge_rtupdate(struct bridge_softc *, const uint8_t *,
387                     struct ifnet *, uint8_t);
388 static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *);
389 static void     bridge_rtreap(struct bridge_softc *);
390 static void     bridge_rtreap_async(struct bridge_softc *);
391 static void     bridge_rttrim(struct bridge_softc *);
392 static int      bridge_rtage_finddead(struct bridge_softc *);
393 static void     bridge_rtage(struct bridge_softc *);
394 static void     bridge_rtflush(struct bridge_softc *, int);
395 static int      bridge_rtdaddr(struct bridge_softc *, const uint8_t *);
396 static int      bridge_rtsaddr(struct bridge_softc *, const uint8_t *,
397                     struct ifnet *, uint8_t);
398 static void     bridge_rtmsg_sync(struct bridge_softc *sc);
399 static void     bridge_rtreap_handler(netmsg_t);
400 static void     bridge_rtinstall_handler(netmsg_t);
401 static int      bridge_rtinstall_oncpu(struct bridge_softc *, const uint8_t *,
402                     struct ifnet *, int, uint8_t, struct bridge_rtinfo **);
403
404 static void     bridge_rtable_init(struct bridge_softc *);
405 static void     bridge_rtable_fini(struct bridge_softc *);
406
407 static int      bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
408 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
409                     const uint8_t *);
410 static void     bridge_rtnode_insert(struct bridge_softc *,
411                     struct bridge_rtnode *);
412 static void     bridge_rtnode_destroy(struct bridge_softc *,
413                     struct bridge_rtnode *);
414
415 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
416                     const char *name);
417 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
418                     struct ifnet *ifp);
419 static struct bridge_iflist *bridge_lookup_member_ifinfo(struct bridge_softc *,
420                     struct bridge_ifinfo *);
421 static void     bridge_delete_member(struct bridge_softc *,
422                     struct bridge_iflist *, int);
423 static void     bridge_delete_span(struct bridge_softc *,
424                     struct bridge_iflist *);
425
426 static int      bridge_control(struct bridge_softc *, u_long,
427                                bridge_ctl_t, void *);
428 static int      bridge_ioctl_init(struct bridge_softc *, void *);
429 static int      bridge_ioctl_stop(struct bridge_softc *, void *);
430 static int      bridge_ioctl_add(struct bridge_softc *, void *);
431 static int      bridge_ioctl_del(struct bridge_softc *, void *);
432 static void     bridge_ioctl_fillflags(struct bridge_softc *sc,
433                                 struct bridge_iflist *bif, struct ifbreq *req);
434 static int      bridge_ioctl_gifflags(struct bridge_softc *, void *);
435 static int      bridge_ioctl_sifflags(struct bridge_softc *, void *);
436 static int      bridge_ioctl_scache(struct bridge_softc *, void *);
437 static int      bridge_ioctl_gcache(struct bridge_softc *, void *);
438 static int      bridge_ioctl_gifs(struct bridge_softc *, void *);
439 static int      bridge_ioctl_rts(struct bridge_softc *, void *);
440 static int      bridge_ioctl_saddr(struct bridge_softc *, void *);
441 static int      bridge_ioctl_sto(struct bridge_softc *, void *);
442 static int      bridge_ioctl_gto(struct bridge_softc *, void *);
443 static int      bridge_ioctl_daddr(struct bridge_softc *, void *);
444 static int      bridge_ioctl_flush(struct bridge_softc *, void *);
445 static int      bridge_ioctl_gpri(struct bridge_softc *, void *);
446 static int      bridge_ioctl_spri(struct bridge_softc *, void *);
447 static int      bridge_ioctl_reinit(struct bridge_softc *, void *);
448 static int      bridge_ioctl_ght(struct bridge_softc *, void *);
449 static int      bridge_ioctl_sht(struct bridge_softc *, void *);
450 static int      bridge_ioctl_gfd(struct bridge_softc *, void *);
451 static int      bridge_ioctl_sfd(struct bridge_softc *, void *);
452 static int      bridge_ioctl_gma(struct bridge_softc *, void *);
453 static int      bridge_ioctl_sma(struct bridge_softc *, void *);
454 static int      bridge_ioctl_sifprio(struct bridge_softc *, void *);
455 static int      bridge_ioctl_sifcost(struct bridge_softc *, void *);
456 static int      bridge_ioctl_addspan(struct bridge_softc *, void *);
457 static int      bridge_ioctl_delspan(struct bridge_softc *, void *);
458 static int      bridge_ioctl_sifbondwght(struct bridge_softc *, void *);
459 static int      bridge_pfil(struct mbuf **, struct ifnet *, struct ifnet *,
460                     int);
461 static int      bridge_ip_checkbasic(struct mbuf **mp);
462 #ifdef INET6
463 static int      bridge_ip6_checkbasic(struct mbuf **mp);
464 #endif /* INET6 */
465 static int      bridge_fragment(struct ifnet *, struct mbuf *,
466                     struct ether_header *, int, struct llc *);
467 static void     bridge_enqueue_handler(netmsg_t);
468 static void     bridge_handoff(struct bridge_softc *, struct ifnet *,
469                     struct mbuf *, int);
470
471 static void     bridge_del_bif_handler(netmsg_t);
472 static void     bridge_add_bif_handler(netmsg_t);
473 static void     bridge_del_bif(struct bridge_softc *, struct bridge_ifinfo *,
474                     struct bridge_iflist_head *);
475 static void     bridge_add_bif(struct bridge_softc *, struct bridge_ifinfo *,
476                     struct ifnet *);
477
478 SYSCTL_DECL(_net_link);
479 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW, 0, "Bridge");
480
481 static int pfil_onlyip = 1; /* only pass IP[46] packets when pfil is enabled */
482 static int pfil_bridge = 1; /* run pfil hooks on the bridge interface */
483 static int pfil_member = 1; /* run pfil hooks on the member interface */
484 static int bridge_debug;
485 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip, CTLFLAG_RW,
486     &pfil_onlyip, 0, "Only pass IP packets when pfil is enabled");
487 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge, CTLFLAG_RW,
488     &pfil_bridge, 0, "Packet filter on the bridge interface");
489 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member, CTLFLAG_RW,
490     &pfil_member, 0, "Packet filter on the member interface");
491 SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW,
492     &bridge_debug, 0, "Bridge debug mode");
493
494 struct bridge_control_arg {
495         union {
496                 struct ifbreq ifbreq;
497                 struct ifbifconf ifbifconf;
498                 struct ifbareq ifbareq;
499                 struct ifbaconf ifbaconf;
500                 struct ifbrparam ifbrparam;
501         } bca_u;
502         int     bca_len;
503         void    *bca_uptr;
504         void    *bca_kptr;
505 };
506
507 struct bridge_control {
508         bridge_ctl_t    bc_func;
509         int             bc_argsize;
510         int             bc_flags;
511 };
512
513 #define BC_F_COPYIN             0x01    /* copy arguments in */
514 #define BC_F_COPYOUT            0x02    /* copy arguments out */
515 #define BC_F_SUSER              0x04    /* do super-user check */
516
517 const struct bridge_control bridge_control_table[] = {
518         { bridge_ioctl_add,             sizeof(struct ifbreq),
519           BC_F_COPYIN|BC_F_SUSER },
520         { bridge_ioctl_del,             sizeof(struct ifbreq),
521           BC_F_COPYIN|BC_F_SUSER },
522
523         { bridge_ioctl_gifflags,        sizeof(struct ifbreq),
524           BC_F_COPYIN|BC_F_COPYOUT },
525         { bridge_ioctl_sifflags,        sizeof(struct ifbreq),
526           BC_F_COPYIN|BC_F_SUSER },
527
528         { bridge_ioctl_scache,          sizeof(struct ifbrparam),
529           BC_F_COPYIN|BC_F_SUSER },
530         { bridge_ioctl_gcache,          sizeof(struct ifbrparam),
531           BC_F_COPYOUT },
532
533         { bridge_ioctl_gifs,            sizeof(struct ifbifconf),
534           BC_F_COPYIN|BC_F_COPYOUT },
535         { bridge_ioctl_rts,             sizeof(struct ifbaconf),
536           BC_F_COPYIN|BC_F_COPYOUT },
537
538         { bridge_ioctl_saddr,           sizeof(struct ifbareq),
539           BC_F_COPYIN|BC_F_SUSER },
540
541         { bridge_ioctl_sto,             sizeof(struct ifbrparam),
542           BC_F_COPYIN|BC_F_SUSER },
543         { bridge_ioctl_gto,             sizeof(struct ifbrparam),
544           BC_F_COPYOUT },
545
546         { bridge_ioctl_daddr,           sizeof(struct ifbareq),
547           BC_F_COPYIN|BC_F_SUSER },
548
549         { bridge_ioctl_flush,           sizeof(struct ifbreq),
550           BC_F_COPYIN|BC_F_SUSER },
551
552         { bridge_ioctl_gpri,            sizeof(struct ifbrparam),
553           BC_F_COPYOUT },
554         { bridge_ioctl_spri,            sizeof(struct ifbrparam),
555           BC_F_COPYIN|BC_F_SUSER },
556
557         { bridge_ioctl_ght,             sizeof(struct ifbrparam),
558           BC_F_COPYOUT },
559         { bridge_ioctl_sht,             sizeof(struct ifbrparam),
560           BC_F_COPYIN|BC_F_SUSER },
561
562         { bridge_ioctl_gfd,             sizeof(struct ifbrparam),
563           BC_F_COPYOUT },
564         { bridge_ioctl_sfd,             sizeof(struct ifbrparam),
565           BC_F_COPYIN|BC_F_SUSER },
566
567         { bridge_ioctl_gma,             sizeof(struct ifbrparam),
568           BC_F_COPYOUT },
569         { bridge_ioctl_sma,             sizeof(struct ifbrparam),
570           BC_F_COPYIN|BC_F_SUSER },
571
572         { bridge_ioctl_sifprio,         sizeof(struct ifbreq),
573           BC_F_COPYIN|BC_F_SUSER },
574
575         { bridge_ioctl_sifcost,         sizeof(struct ifbreq),
576           BC_F_COPYIN|BC_F_SUSER },
577
578         { bridge_ioctl_addspan,         sizeof(struct ifbreq),
579           BC_F_COPYIN|BC_F_SUSER },
580         { bridge_ioctl_delspan,         sizeof(struct ifbreq),
581           BC_F_COPYIN|BC_F_SUSER },
582
583         { bridge_ioctl_sifbondwght,     sizeof(struct ifbreq),
584           BC_F_COPYIN|BC_F_SUSER },
585
586 };
587 static const int bridge_control_table_size = NELEM(bridge_control_table);
588
589 LIST_HEAD(, bridge_softc) bridge_list;
590
591 struct if_clone bridge_cloner = IF_CLONE_INITIALIZER("bridge",
592                                 bridge_clone_create,
593                                 bridge_clone_destroy, 0, IF_MAXUNIT);
594
595 static int
596 bridge_modevent(module_t mod, int type, void *data)
597 {
598         switch (type) {
599         case MOD_LOAD:
600                 LIST_INIT(&bridge_list);
601                 if_clone_attach(&bridge_cloner);
602                 bridge_input_p = bridge_input;
603                 bridge_output_p = bridge_output;
604                 bridge_interface_p = bridge_interface;
605                 bridge_detach_cookie = EVENTHANDLER_REGISTER(
606                     ifnet_detach_event, bridge_ifdetach, NULL,
607                     EVENTHANDLER_PRI_ANY);
608 #if 0 /* notyet */
609                 bstp_linkstate_p = bstp_linkstate;
610 #endif
611                 break;
612         case MOD_UNLOAD:
613                 if (!LIST_EMPTY(&bridge_list))
614                         return (EBUSY);
615                 EVENTHANDLER_DEREGISTER(ifnet_detach_event,
616                     bridge_detach_cookie);
617                 if_clone_detach(&bridge_cloner);
618                 bridge_input_p = NULL;
619                 bridge_output_p = NULL;
620                 bridge_interface_p = NULL;
621 #if 0 /* notyet */
622                 bstp_linkstate_p = NULL;
623 #endif
624                 break;
625         default:
626                 return (EOPNOTSUPP);
627         }
628         return (0);
629 }
630
631 static moduledata_t bridge_mod = {
632         "if_bridge",
633         bridge_modevent,
634         0
635 };
636
637 DECLARE_MODULE(if_bridge, bridge_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
638
639
640 /*
641  * bridge_clone_create:
642  *
643  *      Create a new bridge instance.
644  */
645 static int
646 bridge_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused)
647 {
648         struct bridge_softc *sc;
649         struct ifnet *ifp;
650         u_char eaddr[6];
651         int cpu, rnd;
652
653         sc = kmalloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
654         ifp = sc->sc_ifp = &sc->sc_if;
655
656         sc->sc_brtmax = BRIDGE_RTABLE_MAX;
657         sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
658         sc->sc_bridge_max_age = BSTP_DEFAULT_MAX_AGE;
659         sc->sc_bridge_hello_time = BSTP_DEFAULT_HELLO_TIME;
660         sc->sc_bridge_forward_delay = BSTP_DEFAULT_FORWARD_DELAY;
661         sc->sc_bridge_priority = BSTP_DEFAULT_BRIDGE_PRIORITY;
662         sc->sc_hold_time = BSTP_DEFAULT_HOLD_TIME;
663
664         /* Initialize our routing table. */
665         bridge_rtable_init(sc);
666
667         callout_init(&sc->sc_brcallout);
668         netmsg_init(&sc->sc_brtimemsg, NULL, &netisr_adone_rport,
669                     MSGF_DROPABLE, bridge_timer_handler);
670         sc->sc_brtimemsg.lmsg.u.ms_resultp = sc;
671
672         callout_init(&sc->sc_bstpcallout);
673         netmsg_init(&sc->sc_bstptimemsg, NULL, &netisr_adone_rport,
674                     MSGF_DROPABLE, bstp_tick_handler);
675         sc->sc_bstptimemsg.lmsg.u.ms_resultp = sc;
676
677         /* Initialize per-cpu member iface lists */
678         sc->sc_iflists = kmalloc(sizeof(*sc->sc_iflists) * ncpus,
679                                  M_DEVBUF, M_WAITOK);
680         for (cpu = 0; cpu < ncpus; ++cpu)
681                 TAILQ_INIT(&sc->sc_iflists[cpu]);
682
683         TAILQ_INIT(&sc->sc_spanlist);
684
685         ifp->if_softc = sc;
686         if_initname(ifp, ifc->ifc_name, unit);
687         ifp->if_mtu = ETHERMTU;
688         ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST;
689         ifp->if_ioctl = bridge_ioctl;
690         ifp->if_start = bridge_start;
691         ifp->if_init = bridge_init;
692         ifp->if_type = IFT_ETHER;
693         ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
694         ifq_set_ready(&ifp->if_snd);
695         ifp->if_hdrlen = ETHER_HDR_LEN;
696
697         /*
698          * Generate a random ethernet address and use the private AC:DE:48
699          * OUI code.
700          */
701         rnd = karc4random();
702         bcopy(&rnd, &eaddr[0], 4); /* ETHER_ADDR_LEN == 6 */
703         rnd = karc4random();
704         bcopy(&rnd, &eaddr[2], 4); /* ETHER_ADDR_LEN == 6 */
705
706         eaddr[0] &= ~1; /* clear multicast bit */
707         eaddr[0] |= 2;  /* set the LAA bit */
708
709         ether_ifattach(ifp, eaddr, NULL);
710         /* Now undo some of the damage... */
711         ifp->if_baudrate = 0;
712         /*ifp->if_type = IFT_BRIDGE;*/
713
714         crit_enter();   /* XXX MP */
715         LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
716         crit_exit();
717
718         return (0);
719 }
720
721 static void
722 bridge_delete_dispatch(netmsg_t msg)
723 {
724         struct bridge_softc *sc = msg->lmsg.u.ms_resultp;
725         struct ifnet *bifp = sc->sc_ifp;
726         struct bridge_iflist *bif;
727
728         ifnet_serialize_all(bifp);
729
730         while ((bif = TAILQ_FIRST(&sc->sc_iflists[mycpuid])) != NULL)
731                 bridge_delete_member(sc, bif, 0);
732
733         while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL)
734                 bridge_delete_span(sc, bif);
735
736         ifnet_deserialize_all(bifp);
737
738         lwkt_replymsg(&msg->lmsg, 0);
739 }
740
741 /*
742  * bridge_clone_destroy:
743  *
744  *      Destroy a bridge instance.
745  */
746 static int
747 bridge_clone_destroy(struct ifnet *ifp)
748 {
749         struct bridge_softc *sc = ifp->if_softc;
750         struct netmsg_base msg;
751
752         ifnet_serialize_all(ifp);
753
754         bridge_stop(ifp);
755         ifp->if_flags &= ~IFF_UP;
756
757         ifnet_deserialize_all(ifp);
758
759         netmsg_init(&msg, NULL, &curthread->td_msgport,
760                     0, bridge_delete_dispatch);
761         msg.lmsg.u.ms_resultp = sc;
762         lwkt_domsg(BRIDGE_CFGPORT, &msg.lmsg, 0);
763
764         crit_enter();   /* XXX MP */
765         LIST_REMOVE(sc, sc_list);
766         crit_exit();
767
768         ether_ifdetach(ifp);
769
770         /* Tear down the routing table. */
771         bridge_rtable_fini(sc);
772
773         /* Free per-cpu member iface lists */
774         kfree(sc->sc_iflists, M_DEVBUF);
775
776         kfree(sc, M_DEVBUF);
777
778         return 0;
779 }
780
781 /*
782  * bridge_ioctl:
783  *
784  *      Handle a control request from the operator.
785  */
786 static int
787 bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
788 {
789         struct bridge_softc *sc = ifp->if_softc;
790         struct bridge_control_arg args;
791         struct ifdrv *ifd = (struct ifdrv *) data;
792         const struct bridge_control *bc;
793         int error = 0;
794
795         ASSERT_IFNET_SERIALIZED_ALL(ifp);
796
797         switch (cmd) {
798         case SIOCADDMULTI:
799         case SIOCDELMULTI:
800                 break;
801
802         case SIOCGDRVSPEC:
803         case SIOCSDRVSPEC:
804                 if (ifd->ifd_cmd >= bridge_control_table_size) {
805                         error = EINVAL;
806                         break;
807                 }
808                 bc = &bridge_control_table[ifd->ifd_cmd];
809
810                 if (cmd == SIOCGDRVSPEC &&
811                     (bc->bc_flags & BC_F_COPYOUT) == 0) {
812                         error = EINVAL;
813                         break;
814                 } else if (cmd == SIOCSDRVSPEC &&
815                            (bc->bc_flags & BC_F_COPYOUT)) {
816                         error = EINVAL;
817                         break;
818                 }
819
820                 if (bc->bc_flags & BC_F_SUSER) {
821                         error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY);
822                         if (error)
823                                 break;
824                 }
825
826                 if (ifd->ifd_len != bc->bc_argsize ||
827                     ifd->ifd_len > sizeof(args.bca_u)) {
828                         error = EINVAL;
829                         break;
830                 }
831
832                 memset(&args, 0, sizeof(args));
833                 if (bc->bc_flags & BC_F_COPYIN) {
834                         error = copyin(ifd->ifd_data, &args.bca_u,
835                                        ifd->ifd_len);
836                         if (error)
837                                 break;
838                 }
839
840                 error = bridge_control(sc, cmd, bc->bc_func, &args);
841                 if (error) {
842                         KKASSERT(args.bca_len == 0 && args.bca_kptr == NULL);
843                         break;
844                 }
845
846                 if (bc->bc_flags & BC_F_COPYOUT) {
847                         error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
848                         if (args.bca_len != 0) {
849                                 KKASSERT(args.bca_kptr != NULL);
850                                 if (!error) {
851                                         error = copyout(args.bca_kptr,
852                                                 args.bca_uptr, args.bca_len);
853                                 }
854                                 kfree(args.bca_kptr, M_TEMP);
855                         } else {
856                                 KKASSERT(args.bca_kptr == NULL);
857                         }
858                 } else {
859                         KKASSERT(args.bca_len == 0 && args.bca_kptr == NULL);
860                 }
861                 break;
862
863         case SIOCSIFFLAGS:
864                 if (!(ifp->if_flags & IFF_UP) &&
865                     (ifp->if_flags & IFF_RUNNING)) {
866                         /*
867                          * If interface is marked down and it is running,
868                          * then stop it.
869                          */
870                         bridge_stop(ifp);
871                 } else if ((ifp->if_flags & IFF_UP) &&
872                     !(ifp->if_flags & IFF_RUNNING)) {
873                         /*
874                          * If interface is marked up and it is stopped, then
875                          * start it.
876                          */
877                         ifp->if_init(sc);
878                 }
879
880                 /*
881                  * If running and link flag state change we have to
882                  * reinitialize as well.
883                  */
884                 if ((ifp->if_flags & IFF_RUNNING) &&
885                     (ifp->if_flags & (IFF_LINK0|IFF_LINK1|IFF_LINK2)) !=
886                     sc->sc_copy_flags) {
887                         sc->sc_copy_flags = ifp->if_flags &
888                                         (IFF_LINK0|IFF_LINK1|IFF_LINK2);
889                         bridge_control(sc, 0, bridge_ioctl_reinit, NULL);
890                 }
891
892                 break;
893
894         case SIOCSIFMTU:
895                 /* Do not allow the MTU to be changed on the bridge */
896                 error = EINVAL;
897                 break;
898
899         default:
900                 error = ether_ioctl(ifp, cmd, data);
901                 break;
902         }
903         return (error);
904 }
905
906 /*
907  * bridge_mutecaps:
908  *
909  *      Clear or restore unwanted capabilities on the member interface
910  */
911 static void
912 bridge_mutecaps(struct bridge_ifinfo *bif_info, struct ifnet *ifp, int mute)
913 {
914         struct ifreq ifr;
915
916         if (ifp->if_ioctl == NULL)
917                 return;
918
919         bzero(&ifr, sizeof(ifr));
920         ifr.ifr_reqcap = ifp->if_capenable;
921
922         if (mute) {
923                 /* mask off and save capabilities */
924                 bif_info->bifi_mutecap = ifr.ifr_reqcap & BRIDGE_IFCAPS_MASK;
925                 if (bif_info->bifi_mutecap != 0)
926                         ifr.ifr_reqcap &= ~BRIDGE_IFCAPS_MASK;
927         } else {
928                 /* restore muted capabilities */
929                 ifr.ifr_reqcap |= bif_info->bifi_mutecap;
930         }
931
932         if (bif_info->bifi_mutecap != 0) {
933                 ifnet_serialize_all(ifp);
934                 ifp->if_ioctl(ifp, SIOCSIFCAP, (caddr_t)&ifr, NULL);
935                 ifnet_deserialize_all(ifp);
936         }
937 }
938
939 /*
940  * bridge_lookup_member:
941  *
942  *      Lookup a bridge member interface.
943  */
944 static struct bridge_iflist *
945 bridge_lookup_member(struct bridge_softc *sc, const char *name)
946 {
947         struct bridge_iflist *bif;
948
949         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
950                 if (strcmp(bif->bif_ifp->if_xname, name) == 0)
951                         return (bif);
952         }
953         return (NULL);
954 }
955
956 /*
957  * bridge_lookup_member_if:
958  *
959  *      Lookup a bridge member interface by ifnet*.
960  */
961 static struct bridge_iflist *
962 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
963 {
964         struct bridge_iflist *bif;
965
966         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
967                 if (bif->bif_ifp == member_ifp)
968                         return (bif);
969         }
970         return (NULL);
971 }
972
973 /*
974  * bridge_lookup_member_ifinfo:
975  *
976  *      Lookup a bridge member interface by bridge_ifinfo.
977  */
978 static struct bridge_iflist *
979 bridge_lookup_member_ifinfo(struct bridge_softc *sc,
980                             struct bridge_ifinfo *bif_info)
981 {
982         struct bridge_iflist *bif;
983
984         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
985                 if (bif->bif_info == bif_info)
986                         return (bif);
987         }
988         return (NULL);
989 }
990
991 /*
992  * bridge_delete_member:
993  *
994  *      Delete the specified member interface.
995  */
996 static void
997 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
998     int gone)
999 {
1000         struct ifnet *ifs = bif->bif_ifp;
1001         struct ifnet *bifp = sc->sc_ifp;
1002         struct bridge_ifinfo *bif_info = bif->bif_info;
1003         struct bridge_iflist_head saved_bifs;
1004
1005         ASSERT_IFNET_SERIALIZED_ALL(bifp);
1006         KKASSERT(bif_info != NULL);
1007
1008         ifs->if_bridge = NULL;
1009
1010         /*
1011          * Release bridge interface's serializer:
1012          * - To avoid possible dead lock.
1013          * - Various sync operation will block the current thread.
1014          */
1015         ifnet_deserialize_all(bifp);
1016
1017         if (!gone) {
1018                 switch (ifs->if_type) {
1019                 case IFT_ETHER:
1020                 case IFT_L2VLAN:
1021                         /*
1022                          * Take the interface out of promiscuous mode.
1023                          */
1024                         ifpromisc(ifs, 0);
1025                         bridge_mutecaps(bif_info, ifs, 0);
1026                         break;
1027
1028                 case IFT_GIF:
1029                         break;
1030
1031                 default:
1032                         panic("bridge_delete_member: impossible");
1033                         break;
1034                 }
1035         }
1036
1037         /*
1038          * Remove bifs from percpu linked list.
1039          *
1040          * Removed bifs are not freed immediately, instead,
1041          * they are saved in saved_bifs.  They will be freed
1042          * after we make sure that no one is accessing them,
1043          * i.e. after following netmsg_service_sync()
1044          */
1045         TAILQ_INIT(&saved_bifs);
1046         bridge_del_bif(sc, bif_info, &saved_bifs);
1047
1048         /*
1049          * Make sure that all protocol threads:
1050          * o  see 'ifs' if_bridge is changed
1051          * o  know that bif is removed from the percpu linked list
1052          */
1053         netmsg_service_sync();
1054
1055         /*
1056          * Free the removed bifs
1057          */
1058         KKASSERT(!TAILQ_EMPTY(&saved_bifs));
1059         while ((bif = TAILQ_FIRST(&saved_bifs)) != NULL) {
1060                 TAILQ_REMOVE(&saved_bifs, bif, bif_next);
1061                 kfree(bif, M_DEVBUF);
1062         }
1063
1064         /* See the comment in bridge_ioctl_stop() */
1065         bridge_rtmsg_sync(sc);
1066         bridge_rtdelete(sc, ifs, IFBF_FLUSHALL | IFBF_FLUSHSYNC);
1067
1068         ifnet_serialize_all(bifp);
1069
1070         if (bifp->if_flags & IFF_RUNNING)
1071                 bstp_initialization(sc);
1072
1073         /*
1074          * Free the bif_info after bstp_initialization(), so that
1075          * bridge_softc.sc_root_port will not reference a dangling
1076          * pointer.
1077          */
1078         kfree(bif_info, M_DEVBUF);
1079 }
1080
1081 /*
1082  * bridge_delete_span:
1083  *
1084  *      Delete the specified span interface.
1085  */
1086 static void
1087 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
1088 {
1089         KASSERT(bif->bif_ifp->if_bridge == NULL,
1090             ("%s: not a span interface", __func__));
1091
1092         TAILQ_REMOVE(&sc->sc_iflists[mycpuid], bif, bif_next);
1093         kfree(bif, M_DEVBUF);
1094 }
1095
1096 static int
1097 bridge_ioctl_init(struct bridge_softc *sc, void *arg __unused)
1098 {
1099         struct ifnet *ifp = sc->sc_ifp;
1100
1101         if (ifp->if_flags & IFF_RUNNING)
1102                 return 0;
1103
1104         callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz,
1105             bridge_timer, sc);
1106
1107         ifp->if_flags |= IFF_RUNNING;
1108         bstp_initialization(sc);
1109         return 0;
1110 }
1111
1112 static int
1113 bridge_ioctl_stop(struct bridge_softc *sc, void *arg __unused)
1114 {
1115         struct ifnet *ifp = sc->sc_ifp;
1116
1117         if ((ifp->if_flags & IFF_RUNNING) == 0)
1118                 return 0;
1119
1120         callout_stop(&sc->sc_brcallout);
1121
1122         crit_enter();
1123         lwkt_dropmsg(&sc->sc_brtimemsg.lmsg);
1124         crit_exit();
1125
1126         bstp_stop(sc);
1127
1128         ifp->if_flags &= ~IFF_RUNNING;
1129
1130         ifnet_deserialize_all(ifp);
1131
1132         /* Let everyone know that we are stopped */
1133         netmsg_service_sync();
1134
1135         /*
1136          * Sync ifnetX msgports in the order we forward rtnode
1137          * installation message.  This is used to make sure that
1138          * all rtnode installation messages sent by bridge_rtupdate()
1139          * during above netmsg_service_sync() are flushed.
1140          */
1141         bridge_rtmsg_sync(sc);
1142         bridge_rtflush(sc, IFBF_FLUSHDYN | IFBF_FLUSHSYNC);
1143
1144         ifnet_serialize_all(ifp);
1145         return 0;
1146 }
1147
1148 static int
1149 bridge_ioctl_add(struct bridge_softc *sc, void *arg)
1150 {
1151         struct ifbreq *req = arg;
1152         struct bridge_iflist *bif;
1153         struct bridge_ifinfo *bif_info;
1154         struct ifnet *ifs, *bifp;
1155         int error = 0;
1156
1157         bifp = sc->sc_ifp;
1158         ASSERT_IFNET_SERIALIZED_ALL(bifp);
1159
1160         ifs = ifunit(req->ifbr_ifsname);
1161         if (ifs == NULL)
1162                 return (ENOENT);
1163
1164         /* If it's in the span list, it can't be a member. */
1165         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1166                 if (ifs == bif->bif_ifp)
1167                         return (EBUSY);
1168
1169         /* Allow the first Ethernet member to define the MTU */
1170         if (ifs->if_type != IFT_GIF) {
1171                 if (TAILQ_EMPTY(&sc->sc_iflists[mycpuid])) {
1172                         bifp->if_mtu = ifs->if_mtu;
1173                 } else if (bifp->if_mtu != ifs->if_mtu) {
1174                         if_printf(bifp, "invalid MTU for %s\n", ifs->if_xname);
1175                         return (EINVAL);
1176                 }
1177         }
1178
1179         if (ifs->if_bridge == sc)
1180                 return (EEXIST);
1181
1182         if (ifs->if_bridge != NULL)
1183                 return (EBUSY);
1184
1185         bif_info = kmalloc(sizeof(*bif_info), M_DEVBUF, M_WAITOK | M_ZERO);
1186         bif_info->bifi_priority = BSTP_DEFAULT_PORT_PRIORITY;
1187         bif_info->bifi_path_cost = BSTP_DEFAULT_PATH_COST;
1188         bif_info->bifi_ifp = ifs;
1189         bif_info->bifi_bond_weight = 1;
1190
1191         /*
1192          * Release bridge interface's serializer:
1193          * - To avoid possible dead lock.
1194          * - Various sync operation will block the current thread.
1195          */
1196         ifnet_deserialize_all(bifp);
1197
1198         switch (ifs->if_type) {
1199         case IFT_ETHER:
1200         case IFT_L2VLAN:
1201                 /*
1202                  * Place the interface into promiscuous mode.
1203                  */
1204                 error = ifpromisc(ifs, 1);
1205                 if (error) {
1206                         ifnet_serialize_all(bifp);
1207                         goto out;
1208                 }
1209                 bridge_mutecaps(bif_info, ifs, 1);
1210                 break;
1211
1212         case IFT_GIF: /* :^) */
1213                 break;
1214
1215         default:
1216                 error = EINVAL;
1217                 ifnet_serialize_all(bifp);
1218                 goto out;
1219         }
1220
1221         /*
1222          * Add bifs to percpu linked lists
1223          */
1224         bridge_add_bif(sc, bif_info, ifs);
1225
1226         ifnet_serialize_all(bifp);
1227
1228         if (bifp->if_flags & IFF_RUNNING)
1229                 bstp_initialization(sc);
1230         else
1231                 bstp_stop(sc);
1232
1233         /*
1234          * Everything has been setup, so let the member interface
1235          * deliver packets to this bridge on its input/output path.
1236          */
1237         ifs->if_bridge = sc;
1238 out:
1239         if (error) {
1240                 if (bif_info != NULL)
1241                         kfree(bif_info, M_DEVBUF);
1242         }
1243         return (error);
1244 }
1245
1246 static int
1247 bridge_ioctl_del(struct bridge_softc *sc, void *arg)
1248 {
1249         struct ifbreq *req = arg;
1250         struct bridge_iflist *bif;
1251
1252         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1253         if (bif == NULL)
1254                 return (ENOENT);
1255
1256         bridge_delete_member(sc, bif, 0);
1257
1258         return (0);
1259 }
1260
1261 static int
1262 bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
1263 {
1264         struct ifbreq *req = arg;
1265         struct bridge_iflist *bif;
1266
1267         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1268         if (bif == NULL)
1269                 return (ENOENT);
1270         bridge_ioctl_fillflags(sc, bif, req);
1271         return (0);
1272 }
1273
1274 static void
1275 bridge_ioctl_fillflags(struct bridge_softc *sc, struct bridge_iflist *bif,
1276                        struct ifbreq *req)
1277 {
1278         req->ifbr_ifsflags = bif->bif_flags;
1279         req->ifbr_state = bif->bif_state;
1280         req->ifbr_priority = bif->bif_priority;
1281         req->ifbr_path_cost = bif->bif_path_cost;
1282         req->ifbr_bond_weight = bif->bif_bond_weight;
1283         req->ifbr_portno = bif->bif_ifp->if_index & 0xff;
1284         if (bif->bif_flags & IFBIF_STP) {
1285                 req->ifbr_peer_root = bif->bif_peer_root;
1286                 req->ifbr_peer_bridge = bif->bif_peer_bridge;
1287                 req->ifbr_peer_cost = bif->bif_peer_cost;
1288                 req->ifbr_peer_port = bif->bif_peer_port;
1289                 if (bstp_supersedes_port_info(sc, bif)) {
1290                         req->ifbr_designated_root = bif->bif_peer_root;
1291                         req->ifbr_designated_bridge = bif->bif_peer_bridge;
1292                         req->ifbr_designated_cost = bif->bif_peer_cost;
1293                         req->ifbr_designated_port = bif->bif_peer_port;
1294                 } else {
1295                         req->ifbr_designated_root = sc->sc_bridge_id;
1296                         req->ifbr_designated_bridge = sc->sc_bridge_id;
1297                         req->ifbr_designated_cost = bif->bif_path_cost +
1298                                                     bif->bif_peer_cost;
1299                         req->ifbr_designated_port = bif->bif_port_id;
1300                 }
1301         } else {
1302                 req->ifbr_peer_root = 0;
1303                 req->ifbr_peer_bridge = 0;
1304                 req->ifbr_peer_cost = 0;
1305                 req->ifbr_peer_port = 0;
1306                 req->ifbr_designated_root = 0;
1307                 req->ifbr_designated_bridge = 0;
1308                 req->ifbr_designated_cost = 0;
1309                 req->ifbr_designated_port = 0;
1310         }
1311 }
1312
1313 static int
1314 bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
1315 {
1316         struct ifbreq *req = arg;
1317         struct bridge_iflist *bif;
1318         struct ifnet *bifp = sc->sc_ifp;
1319
1320         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1321         if (bif == NULL)
1322                 return (ENOENT);
1323
1324         if (req->ifbr_ifsflags & IFBIF_SPAN) {
1325                 /* SPAN is readonly */
1326                 return (EINVAL);
1327         }
1328
1329         if (req->ifbr_ifsflags & IFBIF_STP) {
1330                 switch (bif->bif_ifp->if_type) {
1331                 case IFT_ETHER:
1332                         /* These can do spanning tree. */
1333                         break;
1334
1335                 default:
1336                         /* Nothing else can. */
1337                         return (EINVAL);
1338                 }
1339         }
1340
1341         bif->bif_flags = (bif->bif_flags & IFBIF_KEEPMASK) |
1342                          (req->ifbr_ifsflags & ~IFBIF_KEEPMASK);
1343         if (bifp->if_flags & IFF_RUNNING)
1344                 bstp_initialization(sc);
1345
1346         return (0);
1347 }
1348
1349 static int
1350 bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
1351 {
1352         struct ifbrparam *param = arg;
1353         struct ifnet *ifp = sc->sc_ifp;
1354
1355         sc->sc_brtmax = param->ifbrp_csize;
1356
1357         ifnet_deserialize_all(ifp);
1358         bridge_rttrim(sc);
1359         ifnet_serialize_all(ifp);
1360
1361         return (0);
1362 }
1363
1364 static int
1365 bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
1366 {
1367         struct ifbrparam *param = arg;
1368
1369         param->ifbrp_csize = sc->sc_brtmax;
1370
1371         return (0);
1372 }
1373
1374 static int
1375 bridge_ioctl_gifs(struct bridge_softc *sc, void *arg)
1376 {
1377         struct bridge_control_arg *bc_arg = arg;
1378         struct ifbifconf *bifc = arg;
1379         struct bridge_iflist *bif;
1380         struct ifbreq *breq;
1381         int count, len;
1382
1383         count = 0;
1384         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next)
1385                 count++;
1386         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1387                 count++;
1388
1389         if (bifc->ifbic_len == 0) {
1390                 bifc->ifbic_len = sizeof(*breq) * count;
1391                 return 0;
1392         } else if (count == 0 || bifc->ifbic_len < sizeof(*breq)) {
1393                 bifc->ifbic_len = 0;
1394                 return 0;
1395         }
1396
1397         len = min(bifc->ifbic_len, sizeof(*breq) * count);
1398         KKASSERT(len >= sizeof(*breq));
1399
1400         breq = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO);
1401         if (breq == NULL) {
1402                 bifc->ifbic_len = 0;
1403                 return ENOMEM;
1404         }
1405         bc_arg->bca_kptr = breq;
1406
1407         count = 0;
1408         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
1409                 if (len < sizeof(*breq))
1410                         break;
1411
1412                 strlcpy(breq->ifbr_ifsname, bif->bif_ifp->if_xname,
1413                         sizeof(breq->ifbr_ifsname));
1414                 bridge_ioctl_fillflags(sc, bif, breq);
1415                 breq++;
1416                 count++;
1417                 len -= sizeof(*breq);
1418         }
1419         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
1420                 if (len < sizeof(*breq))
1421                         break;
1422
1423                 strlcpy(breq->ifbr_ifsname, bif->bif_ifp->if_xname,
1424                         sizeof(breq->ifbr_ifsname));
1425                 breq->ifbr_ifsflags = bif->bif_flags;
1426                 breq->ifbr_portno = bif->bif_ifp->if_index & 0xff;
1427                 breq++;
1428                 count++;
1429                 len -= sizeof(*breq);
1430         }
1431
1432         bifc->ifbic_len = sizeof(*breq) * count;
1433         KKASSERT(bifc->ifbic_len > 0);
1434
1435         bc_arg->bca_len = bifc->ifbic_len;
1436         bc_arg->bca_uptr = bifc->ifbic_req;
1437         return 0;
1438 }
1439
1440 static int
1441 bridge_ioctl_rts(struct bridge_softc *sc, void *arg)
1442 {
1443         struct bridge_control_arg *bc_arg = arg;
1444         struct ifbaconf *bac = arg;
1445         struct bridge_rtnode *brt;
1446         struct ifbareq *bareq;
1447         int count, len;
1448
1449         count = 0;
1450         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list)
1451                 count++;
1452
1453         if (bac->ifbac_len == 0) {
1454                 bac->ifbac_len = sizeof(*bareq) * count;
1455                 return 0;
1456         } else if (count == 0 || bac->ifbac_len < sizeof(*bareq)) {
1457                 bac->ifbac_len = 0;
1458                 return 0;
1459         }
1460
1461         len = min(bac->ifbac_len, sizeof(*bareq) * count);
1462         KKASSERT(len >= sizeof(*bareq));
1463
1464         bareq = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO);
1465         if (bareq == NULL) {
1466                 bac->ifbac_len = 0;
1467                 return ENOMEM;
1468         }
1469         bc_arg->bca_kptr = bareq;
1470
1471         count = 0;
1472         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
1473                 struct bridge_rtinfo *bri = brt->brt_info;
1474                 unsigned long expire;
1475
1476                 if (len < sizeof(*bareq))
1477                         break;
1478
1479                 strlcpy(bareq->ifba_ifsname, bri->bri_ifp->if_xname,
1480                         sizeof(bareq->ifba_ifsname));
1481                 memcpy(bareq->ifba_dst, brt->brt_addr, sizeof(brt->brt_addr));
1482                 expire = bri->bri_expire;
1483                 if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
1484                     time_second < expire)
1485                         bareq->ifba_expire = expire - time_second;
1486                 else
1487                         bareq->ifba_expire = 0;
1488                 bareq->ifba_flags = bri->bri_flags;
1489                 bareq++;
1490                 count++;
1491                 len -= sizeof(*bareq);
1492         }
1493
1494         bac->ifbac_len = sizeof(*bareq) * count;
1495         KKASSERT(bac->ifbac_len > 0);
1496
1497         bc_arg->bca_len = bac->ifbac_len;
1498         bc_arg->bca_uptr = bac->ifbac_req;
1499         return 0;
1500 }
1501
1502 static int
1503 bridge_ioctl_saddr(struct bridge_softc *sc, void *arg)
1504 {
1505         struct ifbareq *req = arg;
1506         struct bridge_iflist *bif;
1507         struct ifnet *ifp = sc->sc_ifp;
1508         int error;
1509
1510         ASSERT_IFNET_SERIALIZED_ALL(ifp);
1511
1512         bif = bridge_lookup_member(sc, req->ifba_ifsname);
1513         if (bif == NULL)
1514                 return (ENOENT);
1515
1516         ifnet_deserialize_all(ifp);
1517         error = bridge_rtsaddr(sc, req->ifba_dst, bif->bif_ifp,
1518                                req->ifba_flags);
1519         ifnet_serialize_all(ifp);
1520         return (error);
1521 }
1522
1523 static int
1524 bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
1525 {
1526         struct ifbrparam *param = arg;
1527
1528         sc->sc_brttimeout = param->ifbrp_ctime;
1529
1530         return (0);
1531 }
1532
1533 static int
1534 bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
1535 {
1536         struct ifbrparam *param = arg;
1537
1538         param->ifbrp_ctime = sc->sc_brttimeout;
1539
1540         return (0);
1541 }
1542
1543 static int
1544 bridge_ioctl_daddr(struct bridge_softc *sc, void *arg)
1545 {
1546         struct ifbareq *req = arg;
1547         struct ifnet *ifp = sc->sc_ifp;
1548         int error;
1549
1550         ifnet_deserialize_all(ifp);
1551         error = bridge_rtdaddr(sc, req->ifba_dst);
1552         ifnet_serialize_all(ifp);
1553         return error;
1554 }
1555
1556 static int
1557 bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
1558 {
1559         struct ifbreq *req = arg;
1560         struct ifnet *ifp = sc->sc_ifp;
1561
1562         ifnet_deserialize_all(ifp);
1563         bridge_rtflush(sc, req->ifbr_ifsflags | IFBF_FLUSHSYNC);
1564         ifnet_serialize_all(ifp);
1565
1566         return (0);
1567 }
1568
1569 static int
1570 bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
1571 {
1572         struct ifbrparam *param = arg;
1573
1574         param->ifbrp_prio = sc->sc_bridge_priority;
1575
1576         return (0);
1577 }
1578
1579 static int
1580 bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
1581 {
1582         struct ifbrparam *param = arg;
1583
1584         sc->sc_bridge_priority = param->ifbrp_prio;
1585
1586         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1587                 bstp_initialization(sc);
1588
1589         return (0);
1590 }
1591
1592 static int
1593 bridge_ioctl_reinit(struct bridge_softc *sc, void *arg __unused)
1594 {
1595         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1596                 bstp_initialization(sc);
1597         return (0);
1598 }
1599
1600 static int
1601 bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
1602 {
1603         struct ifbrparam *param = arg;
1604
1605         param->ifbrp_hellotime = sc->sc_bridge_hello_time >> 8;
1606
1607         return (0);
1608 }
1609
1610 static int
1611 bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
1612 {
1613         struct ifbrparam *param = arg;
1614
1615         if (param->ifbrp_hellotime == 0)
1616                 return (EINVAL);
1617         sc->sc_bridge_hello_time = param->ifbrp_hellotime << 8;
1618
1619         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1620                 bstp_initialization(sc);
1621
1622         return (0);
1623 }
1624
1625 static int
1626 bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
1627 {
1628         struct ifbrparam *param = arg;
1629
1630         param->ifbrp_fwddelay = sc->sc_bridge_forward_delay >> 8;
1631
1632         return (0);
1633 }
1634
1635 static int
1636 bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
1637 {
1638         struct ifbrparam *param = arg;
1639
1640         if (param->ifbrp_fwddelay == 0)
1641                 return (EINVAL);
1642         sc->sc_bridge_forward_delay = param->ifbrp_fwddelay << 8;
1643
1644         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1645                 bstp_initialization(sc);
1646
1647         return (0);
1648 }
1649
1650 static int
1651 bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
1652 {
1653         struct ifbrparam *param = arg;
1654
1655         param->ifbrp_maxage = sc->sc_bridge_max_age >> 8;
1656
1657         return (0);
1658 }
1659
1660 static int
1661 bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
1662 {
1663         struct ifbrparam *param = arg;
1664
1665         if (param->ifbrp_maxage == 0)
1666                 return (EINVAL);
1667         sc->sc_bridge_max_age = param->ifbrp_maxage << 8;
1668
1669         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1670                 bstp_initialization(sc);
1671
1672         return (0);
1673 }
1674
1675 static int
1676 bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
1677 {
1678         struct ifbreq *req = arg;
1679         struct bridge_iflist *bif;
1680
1681         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1682         if (bif == NULL)
1683                 return (ENOENT);
1684
1685         bif->bif_priority = req->ifbr_priority;
1686
1687         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1688                 bstp_initialization(sc);
1689
1690         return (0);
1691 }
1692
1693 static int
1694 bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
1695 {
1696         struct ifbreq *req = arg;
1697         struct bridge_iflist *bif;
1698
1699         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1700         if (bif == NULL)
1701                 return (ENOENT);
1702
1703         bif->bif_path_cost = req->ifbr_path_cost;
1704
1705         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1706                 bstp_initialization(sc);
1707
1708         return (0);
1709 }
1710
1711 static int
1712 bridge_ioctl_sifbondwght(struct bridge_softc *sc, void *arg)
1713 {
1714         struct ifbreq *req = arg;
1715         struct bridge_iflist *bif;
1716
1717         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1718         if (bif == NULL)
1719                 return (ENOENT);
1720
1721         bif->bif_bond_weight = req->ifbr_bond_weight;
1722
1723         /* no reinit needed */
1724
1725         return (0);
1726 }
1727
1728 static int
1729 bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
1730 {
1731         struct ifbreq *req = arg;
1732         struct bridge_iflist *bif;
1733         struct ifnet *ifs;
1734         struct bridge_ifinfo *bif_info;
1735
1736         ifs = ifunit(req->ifbr_ifsname);
1737         if (ifs == NULL)
1738                 return (ENOENT);
1739
1740         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1741                 if (ifs == bif->bif_ifp)
1742                         return (EBUSY);
1743
1744         if (ifs->if_bridge != NULL)
1745                 return (EBUSY);
1746
1747         switch (ifs->if_type) {
1748         case IFT_ETHER:
1749         case IFT_GIF:
1750         case IFT_L2VLAN:
1751                 break;
1752
1753         default:
1754                 return (EINVAL);
1755         }
1756
1757         /*
1758          * bif_info is needed for bif_flags
1759          */
1760         bif_info = kmalloc(sizeof(*bif_info), M_DEVBUF, M_WAITOK | M_ZERO);
1761         bif_info->bifi_ifp = ifs;
1762
1763         bif = kmalloc(sizeof(*bif), M_DEVBUF, M_WAITOK | M_ZERO);
1764         bif->bif_ifp = ifs;
1765         bif->bif_info = bif_info;
1766         bif->bif_flags = IFBIF_SPAN;
1767         /* NOTE: span bif does not need bridge_ifinfo */
1768
1769         TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
1770
1771         sc->sc_span = 1;
1772
1773         return (0);
1774 }
1775
1776 static int
1777 bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
1778 {
1779         struct ifbreq *req = arg;
1780         struct bridge_iflist *bif;
1781         struct ifnet *ifs;
1782
1783         ifs = ifunit(req->ifbr_ifsname);
1784         if (ifs == NULL)
1785                 return (ENOENT);
1786
1787         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1788                 if (ifs == bif->bif_ifp)
1789                         break;
1790
1791         if (bif == NULL)
1792                 return (ENOENT);
1793
1794         bridge_delete_span(sc, bif);
1795
1796         if (TAILQ_EMPTY(&sc->sc_spanlist))
1797                 sc->sc_span = 0;
1798
1799         return (0);
1800 }
1801
1802 static void
1803 bridge_ifdetach_dispatch(netmsg_t msg)
1804 {
1805         struct ifnet *ifp, *bifp;
1806         struct bridge_softc *sc;
1807         struct bridge_iflist *bif;
1808
1809         ifp = msg->lmsg.u.ms_resultp;
1810         sc = ifp->if_bridge;
1811
1812         /* Check if the interface is a bridge member */
1813         if (sc != NULL) {
1814                 bifp = sc->sc_ifp;
1815
1816                 ifnet_serialize_all(bifp);
1817
1818                 bif = bridge_lookup_member_if(sc, ifp);
1819                 if (bif != NULL) {
1820                         bridge_delete_member(sc, bif, 1);
1821                 } else {
1822                         /* XXX Why bif will be NULL? */
1823                 }
1824
1825                 ifnet_deserialize_all(bifp);
1826                 goto reply;
1827         }
1828
1829         crit_enter();   /* XXX MP */
1830
1831         /* Check if the interface is a span port */
1832         LIST_FOREACH(sc, &bridge_list, sc_list) {
1833                 bifp = sc->sc_ifp;
1834
1835                 ifnet_serialize_all(bifp);
1836
1837                 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1838                         if (ifp == bif->bif_ifp) {
1839                                 bridge_delete_span(sc, bif);
1840                                 break;
1841                         }
1842
1843                 ifnet_deserialize_all(bifp);
1844         }
1845
1846         crit_exit();
1847
1848 reply:
1849         lwkt_replymsg(&msg->lmsg, 0);
1850 }
1851
1852 /*
1853  * bridge_ifdetach:
1854  *
1855  *      Detach an interface from a bridge.  Called when a member
1856  *      interface is detaching.
1857  */
1858 static void
1859 bridge_ifdetach(void *arg __unused, struct ifnet *ifp)
1860 {
1861         struct netmsg_base msg;
1862
1863         netmsg_init(&msg, NULL, &curthread->td_msgport,
1864                     0, bridge_ifdetach_dispatch);
1865         msg.lmsg.u.ms_resultp = ifp;
1866
1867         lwkt_domsg(BRIDGE_CFGPORT, &msg.lmsg, 0);
1868 }
1869
1870 /*
1871  * bridge_init:
1872  *
1873  *      Initialize a bridge interface.
1874  */
1875 static void
1876 bridge_init(void *xsc)
1877 {
1878         bridge_control(xsc, SIOCSIFFLAGS, bridge_ioctl_init, NULL);
1879 }
1880
1881 /*
1882  * bridge_stop:
1883  *
1884  *      Stop the bridge interface.
1885  */
1886 static void
1887 bridge_stop(struct ifnet *ifp)
1888 {
1889         bridge_control(ifp->if_softc, SIOCSIFFLAGS, bridge_ioctl_stop, NULL);
1890 }
1891
1892 /*
1893  * Returns TRUE if the packet is being sent 'from us'... from our bridge
1894  * interface or from any member of our bridge interface.  This is used
1895  * later on to force the MAC to be the MAC of our bridge interface.
1896  */
1897 static int
1898 bridge_from_us(struct bridge_softc *sc, struct ether_header *eh)
1899 {
1900         struct bridge_iflist *bif;
1901
1902         if (memcmp(eh->ether_shost, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN) == 0)
1903                 return (1);
1904
1905         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
1906                 if (memcmp(eh->ether_shost, IF_LLADDR(bif->bif_ifp),
1907                            ETHER_ADDR_LEN) == 0) {
1908                         return (1);
1909                 }
1910         }
1911         return (0);
1912 }
1913
1914 /*
1915  * bridge_enqueue:
1916  *
1917  *      Enqueue a packet on a bridge member interface.
1918  *
1919  */
1920 void
1921 bridge_enqueue(struct ifnet *dst_ifp, struct mbuf *m)
1922 {
1923         struct netmsg_packet *nmp;
1924
1925         mbuftrackid(m, 64);
1926
1927         nmp = &m->m_hdr.mh_netmsg;
1928         netmsg_init(&nmp->base, NULL, &netisr_apanic_rport,
1929                     0, bridge_enqueue_handler);
1930         nmp->nm_packet = m;
1931         nmp->base.lmsg.u.ms_resultp = dst_ifp;
1932
1933         lwkt_sendmsg(netisr_portfn(mycpu->gd_cpuid), &nmp->base.lmsg);
1934 }
1935
1936 /*
1937  * After looking up dst_if in our forwarding table we still have to
1938  * deal with channel bonding.  Find the best interface in the bonding set.
1939  */
1940 static struct ifnet *
1941 bridge_select_unicast(struct bridge_softc *sc, struct ifnet *dst_if,
1942                       int from_blocking, struct mbuf *m)
1943 {
1944         struct bridge_iflist *bif, *nbif;
1945         struct ifnet *alt_if;
1946         int alt_priority;
1947         int priority;
1948
1949         /*
1950          * Unicast, kinda replicates the output side of bridge_output().
1951          *
1952          * Even though this is a uni-cast packet we may have to select
1953          * an interface from a bonding set.
1954          */
1955         bif = bridge_lookup_member_if(sc, dst_if);
1956         if (bif == NULL) {
1957                 /* Not a member of the bridge (anymore?) */
1958                 return NULL;
1959         }
1960
1961         /*
1962          * If STP is enabled on the target we are an equal opportunity
1963          * employer and do not necessarily output to dst_if.  Instead
1964          * scan available links with the same MAC as the current dst_if
1965          * and choose the best one.
1966          *
1967          * We also need to do this because arp entries tag onto a particular
1968          * interface and if it happens to be dead then the packets will
1969          * go into a bit bucket.
1970          *
1971          * If LINK2 is set the matching links are bonded and we-round robin.
1972          * (the MAC address must be the same for the participating links).
1973          * In this case links in a STP FORWARDING or BONDED state are
1974          * allowed for unicast packets.
1975          */
1976         if (bif->bif_flags & IFBIF_STP) {
1977                 alt_if = NULL;
1978                 alt_priority = 0;
1979                 priority = 0;
1980
1981                 TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid],
1982                                      bif_next, nbif) {
1983                         /*
1984                          * dst_if may imply a bonding set so we must compare
1985                          * MAC addresses.
1986                          */
1987                         if (memcmp(IF_LLADDR(bif->bif_ifp),
1988                                    IF_LLADDR(dst_if),
1989                                    ETHER_ADDR_LEN) != 0) {
1990                                 continue;
1991                         }
1992
1993                         if ((bif->bif_ifp->if_flags & IFF_RUNNING) == 0)
1994                                 continue;
1995
1996                         /*
1997                          * NOTE: We allow tranmissions through a BLOCKING
1998                          *       or LEARNING interface only as a last resort.
1999                          *       We DISALLOW both cases if the receiving
2000                          *
2001                          * NOTE: If we send a packet through a learning
2002                          *       interface the receiving end (if also in
2003                          *       LEARNING) will throw it away, so this is
2004                          *       the ultimate last resort.
2005                          */
2006                         switch(bif->bif_state) {
2007                         case BSTP_IFSTATE_BLOCKING:
2008                                 if (from_blocking == 0 &&
2009                                     bif->bif_priority + 256 > alt_priority) {
2010                                         alt_priority = bif->bif_priority + 256;
2011                                         alt_if = bif->bif_ifp;
2012                                 }
2013                                 continue;
2014                         case BSTP_IFSTATE_LEARNING:
2015                                 if (from_blocking == 0 &&
2016                                     bif->bif_priority > alt_priority) {
2017                                         alt_priority = bif->bif_priority;
2018                                         alt_if = bif->bif_ifp;
2019                                 }
2020                                 continue;
2021                         case BSTP_IFSTATE_L1BLOCKING:
2022                         case BSTP_IFSTATE_LISTENING:
2023                         case BSTP_IFSTATE_DISABLED:
2024                                 continue;
2025                         default:
2026                                 /* FORWARDING, BONDED */
2027                                 break;
2028                         }
2029
2030                         /*
2031                          * XXX we need to use the toepliz hash or
2032                          *     something like that instead of
2033                          *     round-robining.
2034                          */
2035                         if (sc->sc_ifp->if_flags & IFF_LINK2) {
2036                                 dst_if = bif->bif_ifp;
2037                                 if (++bif->bif_bond_count >=
2038                                     bif->bif_bond_weight) {
2039                                         bif->bif_bond_count = 0;
2040                                         TAILQ_REMOVE(&sc->sc_iflists[mycpuid],
2041                                                      bif, bif_next);
2042                                         TAILQ_INSERT_TAIL(
2043                                                      &sc->sc_iflists[mycpuid],
2044                                                      bif, bif_next);
2045                                 }
2046                                 priority = 1;
2047                                 break;
2048                         }
2049
2050                         /*
2051                          * Select best interface in the FORWARDING or
2052                          * BONDED set.  Well, there shouldn't be any
2053                          * in a BONDED state if LINK2 is not set (they
2054                          * will all be in a BLOCKING) state, but there
2055                          * could be a transitory condition here.
2056                          */
2057                         if (bif->bif_priority > priority) {
2058                                 priority = bif->bif_priority;
2059                                 dst_if = bif->bif_ifp;
2060                         }
2061                 }
2062
2063                 /*
2064                  * If no suitable interfaces were found but a suitable
2065                  * alternative interface was found, use the alternative
2066                  * interface.
2067                  */
2068                 if (priority == 0 && alt_if)
2069                         dst_if = alt_if;
2070         }
2071
2072         /*
2073          * At this point, we're dealing with a unicast frame
2074          * going to a different interface.
2075          */
2076         if ((dst_if->if_flags & IFF_RUNNING) == 0)
2077                 dst_if = NULL;
2078         return (dst_if);
2079 }
2080
2081
2082 /*
2083  * bridge_output:
2084  *
2085  *      Send output from a bridge member interface.  This
2086  *      performs the bridging function for locally originated
2087  *      packets.
2088  *
2089  *      The mbuf has the Ethernet header already attached.  We must
2090  *      enqueue or free the mbuf before returning.
2091  */
2092 static int
2093 bridge_output(struct ifnet *ifp, struct mbuf *m)
2094 {
2095         struct bridge_softc *sc = ifp->if_bridge;
2096         struct bridge_iflist *bif, *nbif;
2097         struct ether_header *eh;
2098         struct ifnet *dst_if, *alt_if, *bifp;
2099         int from_us;
2100         int alt_priority;
2101
2102         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
2103         mbuftrackid(m, 65);
2104
2105         /*
2106          * Make sure that we are still a member of a bridge interface.
2107          */
2108         if (sc == NULL) {
2109                 m_freem(m);
2110                 return (0);
2111         }
2112         bifp = sc->sc_ifp;
2113
2114         /*
2115          * Acquire header
2116          */
2117         if (m->m_len < ETHER_HDR_LEN) {
2118                 m = m_pullup(m, ETHER_HDR_LEN);
2119                 if (m == NULL) {
2120                         IFNET_STAT_INC(bifp, oerrors, 1);
2121                         return (0);
2122                 }
2123         }
2124         eh = mtod(m, struct ether_header *);
2125         from_us = bridge_from_us(sc, eh);
2126
2127         /*
2128          * If bridge is down, but the original output interface is up,
2129          * go ahead and send out that interface.  Otherwise, the packet
2130          * is dropped below.
2131          */
2132         if ((bifp->if_flags & IFF_RUNNING) == 0) {
2133                 dst_if = ifp;
2134                 goto sendunicast;
2135         }
2136
2137         /*
2138          * If the packet is a multicast, or we don't know a better way to
2139          * get there, send to all interfaces.
2140          */
2141         if (ETHER_IS_MULTICAST(eh->ether_dhost))
2142                 dst_if = NULL;
2143         else
2144                 dst_if = bridge_rtlookup(sc, eh->ether_dhost);
2145
2146         if (dst_if == NULL) {
2147                 struct mbuf *mc;
2148                 int used = 0;
2149                 int found = 0;
2150
2151                 if (sc->sc_span)
2152                         bridge_span(sc, m);
2153
2154                 alt_if = NULL;
2155                 alt_priority = 0;
2156                 TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid],
2157                                      bif_next, nbif) {
2158                         dst_if = bif->bif_ifp;
2159
2160                         if ((dst_if->if_flags & IFF_RUNNING) == 0)
2161                                 continue;
2162
2163                         /*
2164                          * If this is not the original output interface,
2165                          * and the interface is participating in spanning
2166                          * tree, make sure the port is in a state that
2167                          * allows forwarding.
2168                          *
2169                          * We keep track of a possible backup IF if we are
2170                          * unable to find any interfaces to forward through.
2171                          *
2172                          * NOTE: Currently round-robining is not implemented
2173                          *       across bonded interface groups (needs an
2174                          *       algorithm to track each group somehow).
2175                          *
2176                          *       Similarly we track only one alternative
2177                          *       interface if no suitable interfaces are
2178                          *       found.
2179                          */
2180                         if (dst_if != ifp &&
2181                             (bif->bif_flags & IFBIF_STP) != 0) {
2182                                 switch (bif->bif_state) {
2183                                 case BSTP_IFSTATE_BONDED:
2184                                         if (bif->bif_priority + 512 >
2185                                             alt_priority) {
2186                                                 alt_priority =
2187                                                     bif->bif_priority + 512;
2188                                                 alt_if = bif->bif_ifp;
2189                                         }
2190                                         continue;
2191                                 case BSTP_IFSTATE_BLOCKING:
2192                                         if (bif->bif_priority + 256 >
2193                                             alt_priority) {
2194                                                 alt_priority =
2195                                                     bif->bif_priority + 256;
2196                                                 alt_if = bif->bif_ifp;
2197                                         }
2198                                         continue;
2199                                 case BSTP_IFSTATE_LEARNING:
2200                                         if (bif->bif_priority > alt_priority) {
2201                                                 alt_priority =
2202                                                     bif->bif_priority;
2203                                                 alt_if = bif->bif_ifp;
2204                                         }
2205                                         continue;
2206                                 case BSTP_IFSTATE_L1BLOCKING:
2207                                 case BSTP_IFSTATE_LISTENING:
2208                                 case BSTP_IFSTATE_DISABLED:
2209                                         continue;
2210                                 default:
2211                                         /* FORWARDING */
2212                                         break;
2213                                 }
2214                         }
2215
2216                         KKASSERT(used == 0);
2217                         if (TAILQ_NEXT(bif, bif_next) == NULL) {
2218                                 used = 1;
2219                                 mc = m;
2220                         } else {
2221                                 mc = m_copypacket(m, MB_DONTWAIT);
2222                                 if (mc == NULL) {
2223                                         IFNET_STAT_INC(bifp, oerrors, 1);
2224                                         continue;
2225                                 }
2226                         }
2227
2228                         /*
2229                          * If the packet is 'from' us override ether_shost.
2230                          */
2231                         bridge_handoff(sc, dst_if, mc, from_us);
2232                         found = 1;
2233
2234                         if (nbif != NULL && !nbif->bif_onlist) {
2235                                 KKASSERT(bif->bif_onlist);
2236                                 nbif = TAILQ_NEXT(bif, bif_next);
2237                         }
2238                 }
2239
2240                 /*
2241                  * If we couldn't find anything use the backup interface
2242                  * if we have one.
2243                  */
2244                 if (found == 0 && alt_if) {
2245                         KKASSERT(used == 0);
2246                         mc = m;
2247                         used = 1;
2248                         bridge_handoff(sc, alt_if, mc, from_us);
2249                 }
2250
2251                 if (used == 0)
2252                         m_freem(m);
2253                 return (0);
2254         }
2255
2256         /*
2257          * Unicast
2258          */
2259 sendunicast:
2260         dst_if = bridge_select_unicast(sc, dst_if, 0, m);
2261
2262         if (sc->sc_span)
2263                 bridge_span(sc, m);
2264         if (dst_if == NULL)
2265                 m_freem(m);
2266         else
2267                 bridge_handoff(sc, dst_if, m, from_us);
2268         return (0);
2269 }
2270
2271 /*
2272  * Returns the bridge interface associated with an ifc.
2273  * Pass ifp->if_bridge (must not be NULL).  Used by the ARP
2274  * code to supply the bridge for the is-at info, making
2275  * the bridge responsible for matching local addresses.
2276  *
2277  * Without this the ARP code will supply bridge member interfaces
2278  * for the is-at which makes it difficult the bridge to fail-over
2279  * interfaces (amoung other things).
2280  */
2281 static struct ifnet *
2282 bridge_interface(void *if_bridge)
2283 {
2284         struct bridge_softc *sc = if_bridge;
2285         return (sc->sc_ifp);
2286 }
2287
2288 /*
2289  * bridge_start:
2290  *
2291  *      Start output on a bridge.
2292  */
2293 static void
2294 bridge_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
2295 {
2296         struct bridge_softc *sc = ifp->if_softc;
2297
2298         ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq);
2299         ASSERT_ALTQ_SQ_SERIALIZED_HW(ifsq);
2300
2301         ifsq_set_oactive(ifsq);
2302         for (;;) {
2303                 struct ifnet *dst_if = NULL;
2304                 struct ether_header *eh;
2305                 struct mbuf *m;
2306
2307                 m = ifsq_dequeue(ifsq, NULL);
2308                 if (m == NULL)
2309                         break;
2310                 mbuftrackid(m, 75);
2311
2312                 if (m->m_len < sizeof(*eh)) {
2313                         m = m_pullup(m, sizeof(*eh));
2314                         if (m == NULL) {
2315                                 IFNET_STAT_INC(ifp, oerrors, 1);
2316                                 continue;
2317                         }
2318                 }
2319                 eh = mtod(m, struct ether_header *);
2320
2321                 BPF_MTAP(ifp, m);
2322                 IFNET_STAT_INC(ifp, opackets, 1);
2323
2324                 if ((m->m_flags & (M_BCAST|M_MCAST)) == 0)
2325                         dst_if = bridge_rtlookup(sc, eh->ether_dhost);
2326
2327                 /*
2328                  * Multicast or broadcast
2329                  */
2330                 if (dst_if == NULL) {
2331                         bridge_start_bcast(sc, m);
2332                         continue;
2333                 }
2334
2335                 /*
2336                  * Unicast
2337                  */
2338                 dst_if = bridge_select_unicast(sc, dst_if, 0, m);
2339
2340                 if (dst_if == NULL)
2341                         m_freem(m);
2342                 else
2343                         bridge_enqueue(dst_if, m);
2344         }
2345         ifsq_clr_oactive(ifsq);
2346 }
2347
2348 /*
2349  * bridge_forward:
2350  *
2351  *      Forward packets received on a bridge interface via the input
2352  *      path.
2353  *
2354  *      This implements the forwarding function of the bridge.
2355  */
2356 static void
2357 bridge_forward(struct bridge_softc *sc, struct mbuf *m)
2358 {
2359         struct bridge_iflist *bif;
2360         struct ifnet *src_if, *dst_if, *ifp;
2361         struct ether_header *eh;
2362         int from_blocking;
2363
2364         mbuftrackid(m, 66);
2365         src_if = m->m_pkthdr.rcvif;
2366         ifp = sc->sc_ifp;
2367
2368         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
2369
2370         IFNET_STAT_INC(ifp, ipackets, 1);
2371         IFNET_STAT_INC(ifp, ibytes, m->m_pkthdr.len);
2372
2373         /*
2374          * Look up the bridge_iflist.
2375          */
2376         bif = bridge_lookup_member_if(sc, src_if);
2377         if (bif == NULL) {
2378                 /* Interface is not a bridge member (anymore?) */
2379                 m_freem(m);
2380                 return;
2381         }
2382
2383         /*
2384          * In spanning tree mode receiving a packet from an interface
2385          * in a BLOCKING state is allowed, it could be a member of last
2386          * resort from the sender's point of view, but forwarding it is
2387          * not allowed.
2388          *
2389          * The sender's spanning tree will eventually sync up and the
2390          * sender will go into a BLOCKING state too (but this still may be
2391          * an interface of last resort during state changes).
2392          */
2393         if (bif->bif_flags & IFBIF_STP) {
2394                 switch (bif->bif_state) {
2395                 case BSTP_IFSTATE_L1BLOCKING:
2396                 case BSTP_IFSTATE_LISTENING:
2397                 case BSTP_IFSTATE_DISABLED:
2398                         m_freem(m);
2399                         return;
2400                 default:
2401                         /* learning, blocking, bonded, forwarding */
2402                         break;
2403                 }
2404                 from_blocking = (bif->bif_state == BSTP_IFSTATE_BLOCKING);
2405         } else {
2406                 from_blocking = 0;
2407         }
2408
2409         eh = mtod(m, struct ether_header *);
2410
2411         /*
2412          * If the interface is learning, and the source
2413          * address is valid and not multicast, record
2414          * the address.
2415          */
2416         if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
2417             from_blocking == 0 &&
2418             ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
2419             (eh->ether_shost[0] == 0 &&
2420              eh->ether_shost[1] == 0 &&
2421              eh->ether_shost[2] == 0 &&
2422              eh->ether_shost[3] == 0 &&
2423              eh->ether_shost[4] == 0 &&
2424              eh->ether_shost[5] == 0) == 0) {
2425                 bridge_rtupdate(sc, eh->ether_shost, src_if, IFBAF_DYNAMIC);
2426         }
2427
2428         /*
2429          * Don't forward from an interface in the listening or learning
2430          * state.  That is, in the learning state we learn information
2431          * but we throw away the packets.
2432          *
2433          * We let through packets on interfaces in the blocking state.
2434          * The blocking state is applicable to the send side, not the
2435          * receive side.
2436          */
2437         if ((bif->bif_flags & IFBIF_STP) != 0 &&
2438             (bif->bif_state == BSTP_IFSTATE_LISTENING ||
2439              bif->bif_state == BSTP_IFSTATE_LEARNING)) {
2440                 m_freem(m);
2441                 return;
2442         }
2443
2444         /*
2445          * At this point, the port either doesn't participate
2446          * in spanning tree or it is in the forwarding state.
2447          */
2448
2449         /*
2450          * If the packet is unicast, destined for someone on
2451          * "this" side of the bridge, drop it.
2452          *
2453          * src_if implies the entire bonding set so we have to compare MAC
2454          * addresses and not just if pointers.
2455          */
2456         if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
2457                 dst_if = bridge_rtlookup(sc, eh->ether_dhost);
2458                 if (dst_if && memcmp(IF_LLADDR(src_if), IF_LLADDR(dst_if),
2459                                      ETHER_ADDR_LEN) == 0) {
2460                         m_freem(m);
2461                         return;
2462                 }
2463         } else {
2464                 /* ...forward it to all interfaces. */
2465                 IFNET_STAT_INC(ifp, imcasts, 1);
2466                 dst_if = NULL;
2467         }
2468
2469         /*
2470          * Brodcast if we do not have forwarding information.  However, if
2471          * we received the packet on a blocking interface we do not do this
2472          * (unless you really want to blow up your network).
2473          */
2474         if (dst_if == NULL) {
2475                 if (from_blocking)
2476                         m_freem(m);
2477                 else
2478                         bridge_broadcast(sc, src_if, m);
2479                 return;
2480         }
2481
2482         dst_if = bridge_select_unicast(sc, dst_if, from_blocking, m);
2483
2484         if (dst_if == NULL) {
2485                 m_freem(m);
2486                 return;
2487         }
2488
2489         if (inet_pfil_hook.ph_hashooks > 0
2490 #ifdef INET6
2491             || inet6_pfil_hook.ph_hashooks > 0
2492 #endif
2493             ) {
2494                 if (bridge_pfil(&m, ifp, src_if, PFIL_IN) != 0)
2495                         return;
2496                 if (m == NULL)
2497                         return;
2498
2499                 if (bridge_pfil(&m, ifp, dst_if, PFIL_OUT) != 0)
2500                         return;
2501                 if (m == NULL)
2502                         return;
2503         }
2504         bridge_handoff(sc, dst_if, m, 0);
2505 }
2506
2507 /*
2508  * bridge_input:
2509  *
2510  *      Receive input from a member interface.  Queue the packet for
2511  *      bridging if it is not for us.
2512  */
2513 static struct mbuf *
2514 bridge_input(struct ifnet *ifp, struct mbuf *m)
2515 {
2516         struct bridge_softc *sc = ifp->if_bridge;
2517         struct bridge_iflist *bif;
2518         struct ifnet *bifp, *new_ifp;
2519         struct ether_header *eh;
2520         struct mbuf *mc, *mc2;
2521
2522         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
2523         mbuftrackid(m, 67);
2524
2525         /*
2526          * Make sure that we are still a member of a bridge interface.
2527          */
2528         if (sc == NULL)
2529                 return m;
2530
2531         new_ifp = NULL;
2532         bifp = sc->sc_ifp;
2533
2534         if ((bifp->if_flags & IFF_RUNNING) == 0)
2535                 goto out;
2536
2537         /*
2538          * Implement support for bridge monitoring.  If this flag has been
2539          * set on this interface, discard the packet once we push it through
2540          * the bpf(4) machinery, but before we do, increment various counters
2541          * associated with this bridge.
2542          */
2543         if (bifp->if_flags & IFF_MONITOR) {
2544                 /* Change input interface to this bridge */
2545                 m->m_pkthdr.rcvif = bifp;
2546
2547                 BPF_MTAP(bifp, m);
2548
2549                 /* Update bridge's ifnet statistics */
2550                 IFNET_STAT_INC(bifp, ipackets, 1);
2551                 IFNET_STAT_INC(bifp, ibytes, m->m_pkthdr.len);
2552                 if (m->m_flags & (M_MCAST | M_BCAST))
2553                         IFNET_STAT_INC(bifp, imcasts, 1);
2554
2555                 m_freem(m);
2556                 m = NULL;
2557                 goto out;
2558         }
2559
2560         /*
2561          * Handle the ether_header
2562          *
2563          * In all cases if the packet is destined for us via our MAC
2564          * we must clear BRIDGE_MBUF_TAGGED to ensure that we don't
2565          * repeat the source MAC out the same interface.
2566          *
2567          * This first test against our bridge MAC is the fast-path.
2568          *
2569          * NOTE!  The bridge interface can serve as an endpoint for
2570          *        communication but normally there are no IPs associated
2571          *        with it so you cannot route through it.  Instead what
2572          *        you do is point your default route *THROUGH* the bridge
2573          *        to the actual default router for one of the bridged spaces.
2574          *
2575          *        Another possibility is to put all your IP specifications
2576          *        on the bridge instead of on the individual interfaces.  If
2577          *        you do this it should be possible to use the bridge as an
2578          *        end point and route (rather than switch) through it using
2579          *        the default route or ipfw forwarding rules.
2580          */
2581
2582         /*
2583          * Acquire header
2584          */
2585         if (m->m_len < ETHER_HDR_LEN) {
2586                 m = m_pullup(m, ETHER_HDR_LEN);
2587                 if (m == NULL)
2588                         goto out;
2589         }
2590         eh = mtod(m, struct ether_header *);
2591         m->m_pkthdr.fw_flags |= BRIDGE_MBUF_TAGGED;
2592         bcopy(eh, &m->m_pkthdr.br.ether, sizeof(*eh));
2593
2594         if ((bridge_debug & 1) &&
2595             (ntohs(eh->ether_type) == ETHERTYPE_ARP ||
2596             ntohs(eh->ether_type) == ETHERTYPE_REVARP)) {
2597                 kprintf("%02x:%02x:%02x:%02x:%02x:%02x "
2598                         "%02x:%02x:%02x:%02x:%02x:%02x type %04x "
2599                         "lla %02x:%02x:%02x:%02x:%02x:%02x\n",
2600                         eh->ether_dhost[0],
2601                         eh->ether_dhost[1],
2602                         eh->ether_dhost[2],
2603                         eh->ether_dhost[3],
2604                         eh->ether_dhost[4],
2605                         eh->ether_dhost[5],
2606                         eh->ether_shost[0],
2607                         eh->ether_shost[1],
2608                         eh->ether_shost[2],
2609                         eh->ether_shost[3],
2610                         eh->ether_shost[4],
2611                         eh->ether_shost[5],
2612                         eh->ether_type,
2613                         ((u_char *)IF_LLADDR(bifp))[0],
2614                         ((u_char *)IF_LLADDR(bifp))[1],
2615                         ((u_char *)IF_LLADDR(bifp))[2],
2616                         ((u_char *)IF_LLADDR(bifp))[3],
2617                         ((u_char *)IF_LLADDR(bifp))[4],
2618                         ((u_char *)IF_LLADDR(bifp))[5]
2619                 );
2620         }
2621
2622         if (memcmp(eh->ether_dhost, IF_LLADDR(bifp), ETHER_ADDR_LEN) == 0) {
2623                 /*
2624                  * If the packet is for us, set the packets source as the
2625                  * bridge, and return the packet back to ifnet.if_input for
2626                  * local processing.
2627                  */
2628                 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
2629                 KASSERT(bifp->if_bridge == NULL,
2630                         ("loop created in bridge_input"));
2631                 if (pfil_member != 0) {
2632                         if (inet_pfil_hook.ph_hashooks > 0
2633 #ifdef INET6
2634                             || inet6_pfil_hook.ph_hashooks > 0
2635 #endif
2636                         ) {
2637                                 if (bridge_pfil(&m, NULL, ifp, PFIL_IN) != 0)
2638                                         goto out;
2639                                 if (m == NULL)
2640                                         goto out;
2641                         }
2642                 }
2643                 new_ifp = bifp;
2644                 goto out;
2645         }
2646
2647         /*
2648          * Tap all packets arriving on the bridge, no matter if
2649          * they are local destinations or not.  In is in.
2650          */
2651         BPF_MTAP(bifp, m);
2652
2653         bif = bridge_lookup_member_if(sc, ifp);
2654         if (bif == NULL)
2655                 goto out;
2656
2657         if (sc->sc_span)
2658                 bridge_span(sc, m);
2659
2660         if (m->m_flags & (M_BCAST | M_MCAST)) {
2661                 /*
2662                  * Tap off 802.1D packets; they do not get forwarded.
2663                  */
2664                 if (memcmp(eh->ether_dhost, bstp_etheraddr,
2665                             ETHER_ADDR_LEN) == 0) {
2666                         ifnet_serialize_all(bifp);
2667                         bstp_input(sc, bif, m);
2668                         ifnet_deserialize_all(bifp);
2669
2670                         /* m is freed by bstp_input */
2671                         m = NULL;
2672                         goto out;
2673                 }
2674
2675                 /*
2676                  * Other than 802.11d packets, ignore packets if the
2677                  * interface is not in a good state.
2678                  *
2679                  * NOTE: Broadcast/mcast packets received on a blocking or
2680                  *       learning interface are allowed for local processing.
2681                  *
2682                  *       The sending side of a blocked port will stop
2683                  *       transmitting when a better alternative is found.
2684                  *       However, later on we will disallow the forwarding
2685                  *       of bcast/mcsat packets over a blocking interface.
2686                  */
2687                 if (bif->bif_flags & IFBIF_STP) {
2688                         switch (bif->bif_state) {
2689                         case BSTP_IFSTATE_L1BLOCKING:
2690                         case BSTP_IFSTATE_LISTENING:
2691                         case BSTP_IFSTATE_DISABLED:
2692                                 goto out;
2693                         default:
2694                                 /* blocking, learning, bonded, forwarding */
2695                                 break;
2696                         }
2697                 }
2698
2699                 /*
2700                  * Make a deep copy of the packet and enqueue the copy
2701                  * for bridge processing; return the original packet for
2702                  * local processing.
2703                  */
2704                 mc = m_dup(m, MB_DONTWAIT);
2705                 if (mc == NULL)
2706                         goto out;
2707
2708                 /*
2709                  * It's just too dangerous to allow bcast/mcast over a
2710                  * blocked interface, eventually the network will sort
2711                  * itself out and a better path will be found.
2712                  */
2713                 if ((bif->bif_flags & IFBIF_STP) == 0 ||
2714                     bif->bif_state != BSTP_IFSTATE_BLOCKING) {
2715                         bridge_forward(sc, mc);
2716                 }
2717
2718                 /*
2719                  * Reinject the mbuf as arriving on the bridge so we have a
2720                  * chance at claiming multicast packets. We can not loop back
2721                  * here from ether_input as a bridge is never a member of a
2722                  * bridge.
2723                  */
2724                 KASSERT(bifp->if_bridge == NULL,
2725                         ("loop created in bridge_input"));
2726                 mc2 = m_dup(m, MB_DONTWAIT);
2727 #ifdef notyet
2728                 if (mc2 != NULL) {
2729                         /* Keep the layer3 header aligned */
2730                         int i = min(mc2->m_pkthdr.len, max_protohdr);
2731                         mc2 = m_copyup(mc2, i, ETHER_ALIGN);
2732                 }
2733 #endif
2734                 if (mc2 != NULL) {
2735                         /*
2736                          * Don't tap to bpf(4) again; we have already done
2737                          * the tapping.
2738                          *
2739                          * Leave m_pkthdr.rcvif alone, so ARP replies are
2740                          * processed as coming in on the correct interface.
2741                          *
2742                          * Clear the bridge flag for local processing in
2743                          * case the packet gets routed.
2744                          */
2745                         mc2->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
2746                         ether_reinput_oncpu(bifp, mc2, 0);
2747                 }
2748
2749                 /* Return the original packet for local processing. */
2750                 goto out;
2751         }
2752
2753         /*
2754          * Input of a unicast packet.  We have to allow unicast packets
2755          * input from links in the BLOCKING state as this might be an
2756          * interface of last resort.
2757          *
2758          * NOTE: We explicitly ignore normal packets received on a link
2759          *       in the BLOCKING state.  The point of being in that state
2760          *       is to avoid getting duplicate packets.
2761          *
2762          *       HOWEVER, if LINK2 is set the normal spanning tree code
2763          *       will mark an interface BLOCKING to avoid multi-cast/broadcast
2764          *       loops.  Unicast packets CAN still loop if we allow the
2765          *       case (hence we only do it in LINK2), but it isn't quite as
2766          *       bad as a broadcast packet looping.
2767          */
2768         if (bif->bif_flags & IFBIF_STP) {
2769                 switch (bif->bif_state) {
2770                 case BSTP_IFSTATE_L1BLOCKING:
2771                 case BSTP_IFSTATE_LISTENING:
2772                 case BSTP_IFSTATE_DISABLED:
2773                         goto out;
2774                 default:
2775                         /* blocking, bonded, forwarding, learning */
2776                         break;
2777                 }
2778         }
2779
2780         /*
2781          * Unicast.  Make sure it's not for us.
2782          *
2783          * This loop is MPSAFE; the only blocking operation (bridge_rtupdate)
2784          * is followed by breaking out of the loop.
2785          */
2786         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
2787                 if (bif->bif_ifp->if_type != IFT_ETHER)
2788                         continue;
2789
2790                 /*
2791                  * It is destined for an interface linked to the bridge.
2792                  * We want the bridge itself to take care of link level
2793                  * forwarding to member interfaces so reinput on the bridge.
2794                  * i.e. if you ping an IP on a target interface associated
2795                  * with the bridge, the arp is-at response should indicate
2796                  * the bridge MAC.
2797                  *
2798                  * Only update our addr list when learning if the port
2799                  * is not in a blocking state.  If it is we still allow
2800                  * the packet but we do not try to learn from it.
2801                  */
2802                 if (memcmp(IF_LLADDR(bif->bif_ifp), eh->ether_dhost,
2803                            ETHER_ADDR_LEN) == 0) {
2804                         if (bif->bif_ifp != ifp) {
2805                                 /* XXX loop prevention */
2806                                 m->m_flags |= M_ETHER_BRIDGED;
2807                         }
2808                         if ((bif->bif_flags & IFBIF_LEARNING) &&
2809                             ((bif->bif_flags & IFBIF_STP) == 0 ||
2810                              bif->bif_state != BSTP_IFSTATE_BLOCKING)) {
2811                                 bridge_rtupdate(sc, eh->ether_shost,
2812                                                 ifp, IFBAF_DYNAMIC);
2813                         }
2814                         new_ifp = bifp; /* not bif->bif_ifp */
2815                         m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
2816                         goto out;
2817                 }
2818
2819                 /*
2820                  * Ignore received packets that were sent by us.
2821                  */
2822                 if (memcmp(IF_LLADDR(bif->bif_ifp), eh->ether_shost,
2823                            ETHER_ADDR_LEN) == 0) {
2824                         m_freem(m);
2825                         m = NULL;
2826                         goto out;
2827                 }
2828         }
2829
2830         /*
2831          * It isn't for us.
2832          *
2833          * Perform the bridge forwarding function, but disallow bridging
2834          * to interfaces in the blocking state if the packet came in on
2835          * an interface in the blocking state.
2836          */
2837         bridge_forward(sc, m);
2838         m = NULL;
2839
2840         /*
2841          * ether_reinput_oncpu() will reprocess rcvif as
2842          * coming from new_ifp (since we do not specify
2843          * REINPUT_KEEPRCVIF).
2844          */
2845 out:
2846         if (new_ifp != NULL) {
2847                 /*
2848                  * Clear the bridge flag for local processing in
2849                  * case the packet gets routed.
2850                  */
2851                 ether_reinput_oncpu(new_ifp, m, REINPUT_RUNBPF);
2852                 m = NULL;
2853         }
2854         return (m);
2855 }
2856
2857 /*
2858  * bridge_start_bcast:
2859  *
2860  *      Broadcast the packet sent from bridge to all member
2861  *      interfaces.
2862  *      This is a simplified version of bridge_broadcast(), however,
2863  *      this function expects caller to hold bridge's serializer.
2864  */
2865 static void
2866 bridge_start_bcast(struct bridge_softc *sc, struct mbuf *m)
2867 {
2868         struct bridge_iflist *bif;
2869         struct mbuf *mc;
2870         struct ifnet *dst_if, *alt_if, *bifp;
2871         int used = 0;
2872         int found = 0;
2873         int alt_priority;
2874
2875         mbuftrackid(m, 68);
2876         bifp = sc->sc_ifp;
2877         ASSERT_IFNET_SERIALIZED_ALL(bifp);
2878
2879         /*
2880          * Following loop is MPSAFE; nothing is blocking
2881          * in the loop body.
2882          *
2883          * NOTE: We transmit through an member in the BLOCKING state only
2884          *       as a last resort.
2885          */
2886         alt_if = NULL;
2887         alt_priority = 0;
2888
2889         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
2890                 dst_if = bif->bif_ifp;
2891
2892                 if (bif->bif_flags & IFBIF_STP) {
2893                         switch (bif->bif_state) {
2894                         case BSTP_IFSTATE_BLOCKING:
2895                                 if (bif->bif_priority > alt_priority) {
2896                                         alt_priority = bif->bif_priority;
2897                                         alt_if = bif->bif_ifp;
2898                                 }
2899                                 /* fall through */
2900                         case BSTP_IFSTATE_L1BLOCKING:
2901                         case BSTP_IFSTATE_DISABLED:
2902                                 continue;
2903                         default:
2904                                 /* listening, learning, bonded, forwarding */
2905                                 break;
2906                         }
2907                 }
2908
2909                 if ((bif->bif_flags & IFBIF_DISCOVER) == 0 &&
2910                     (m->m_flags & (M_BCAST|M_MCAST)) == 0)
2911                         continue;
2912
2913                 if ((dst_if->if_flags & IFF_RUNNING) == 0)
2914                         continue;
2915
2916                 if (TAILQ_NEXT(bif, bif_next) == NULL) {
2917                         mc = m;
2918                         used = 1;
2919                 } else {
2920                         mc = m_copypacket(m, MB_DONTWAIT);
2921                         if (mc == NULL) {
2922                                 IFNET_STAT_INC(bifp, oerrors, 1);
2923                                 continue;
2924                         }
2925                 }
2926                 found = 1;
2927                 bridge_enqueue(dst_if, mc);
2928         }
2929
2930         if (found == 0 && alt_if) {
2931                 KKASSERT(used == 0);
2932                 mc = m;
2933                 used = 1;
2934                 bridge_enqueue(alt_if, mc);
2935         }
2936
2937         if (used == 0)
2938                 m_freem(m);
2939 }
2940
2941 /*
2942  * bridge_broadcast:
2943  *
2944  *      Send a frame to all interfaces that are members of
2945  *      the bridge, except for the one on which the packet
2946  *      arrived.
2947  */
2948 static void
2949 bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
2950                  struct mbuf *m)
2951 {
2952         struct bridge_iflist *bif, *nbif;
2953         struct ether_header *eh;
2954         struct mbuf *mc;
2955         struct ifnet *dst_if, *alt_if, *bifp;
2956         int used;
2957         int found;
2958         int alt_priority;
2959         int from_us;
2960
2961         mbuftrackid(m, 69);
2962         bifp = sc->sc_ifp;
2963         ASSERT_IFNET_NOT_SERIALIZED_ALL(bifp);
2964
2965         eh = mtod(m, struct ether_header *);
2966         from_us = bridge_from_us(sc, eh);
2967
2968         if (inet_pfil_hook.ph_hashooks > 0
2969 #ifdef INET6
2970             || inet6_pfil_hook.ph_hashooks > 0
2971 #endif
2972             ) {
2973                 if (bridge_pfil(&m, bifp, src_if, PFIL_IN) != 0)
2974                         return;
2975                 if (m == NULL)
2976                         return;
2977
2978                 /* Filter on the bridge interface before broadcasting */
2979                 if (bridge_pfil(&m, bifp, NULL, PFIL_OUT) != 0)
2980                         return;
2981                 if (m == NULL)
2982                         return;
2983         }
2984
2985         alt_if = NULL;
2986         alt_priority = 0;
2987         found = 0;
2988         used = 0;
2989
2990         TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid], bif_next, nbif) {
2991                 dst_if = bif->bif_ifp;
2992
2993                 if ((dst_if->if_flags & IFF_RUNNING) == 0)
2994                         continue;
2995
2996                 /*
2997                  * Don't bounce the packet out the same interface it came
2998                  * in on.  We have to test MAC addresses because a packet
2999                  * can come in a bonded interface and we don't want it to
3000                  * be echod out the forwarding interface for the same bonding
3001                  * set.
3002                  */
3003                 if (src_if && memcmp(IF_LLADDR(src_if), IF_LLADDR(dst_if),
3004                                      ETHER_ADDR_LEN) == 0) {
3005                         continue;
3006                 }
3007
3008                 /*
3009                  * Generally speaking we only broadcast through forwarding
3010                  * interfaces.  If no interfaces are available we select
3011                  * a BONDED, BLOCKING, or LEARNING interface to forward
3012                  * through.
3013                  */
3014                 if (bif->bif_flags & IFBIF_STP) {
3015                         switch (bif->bif_state) {
3016                         case BSTP_IFSTATE_BONDED:
3017                                 if (bif->bif_priority + 512 > alt_priority) {
3018                                         alt_priority = bif->bif_priority + 512;
3019                                         alt_if = bif->bif_ifp;
3020                                 }
3021                                 continue;
3022                         case BSTP_IFSTATE_BLOCKING:
3023                                 if (bif->bif_priority + 256 > alt_priority) {
3024                                         alt_priority = bif->bif_priority + 256;
3025                                         alt_if = bif->bif_ifp;
3026                                 }
3027                                 continue;
3028                         case BSTP_IFSTATE_LEARNING:
3029                                 if (bif->bif_priority > alt_priority) {
3030                                         alt_priority = bif->bif_priority;
3031                                         alt_if = bif->bif_ifp;
3032                                 }
3033                                 continue;
3034                         case BSTP_IFSTATE_L1BLOCKING:
3035                         case BSTP_IFSTATE_DISABLED:
3036                         case BSTP_IFSTATE_LISTENING:
3037                                 continue;
3038                         default:
3039                                 /* forwarding */
3040                                 break;
3041                         }
3042                 }
3043
3044                 if ((bif->bif_flags & IFBIF_DISCOVER) == 0 &&
3045                     (m->m_flags & (M_BCAST|M_MCAST)) == 0) {
3046                         continue;
3047                 }
3048
3049                 if (TAILQ_NEXT(bif, bif_next) == NULL) {
3050                         mc = m;
3051                         used = 1;
3052                 } else {
3053                         mc = m_copypacket(m, MB_DONTWAIT);
3054                         if (mc == NULL) {
3055                                 IFNET_STAT_INC(sc->sc_ifp, oerrors, 1);
3056                                 continue;
3057                         }
3058                 }
3059                 found = 1;
3060
3061                 /*
3062                  * Filter on the output interface.  Pass a NULL bridge
3063                  * interface pointer so we do not redundantly filter on
3064                  * the bridge for each interface we broadcast on.
3065                  */
3066                 if (inet_pfil_hook.ph_hashooks > 0
3067 #ifdef INET6
3068                     || inet6_pfil_hook.ph_hashooks > 0
3069 #endif
3070                     ) {
3071                         if (bridge_pfil(&mc, NULL, dst_if, PFIL_OUT) != 0)
3072                                 continue;
3073                         if (mc == NULL)
3074                                 continue;
3075                 }
3076                 bridge_handoff(sc, dst_if, mc, from_us);
3077
3078                 if (nbif != NULL && !nbif->bif_onlist) {
3079                         KKASSERT(bif->bif_onlist);
3080                         nbif = TAILQ_NEXT(bif, bif_next);
3081                 }
3082         }
3083
3084         if (found == 0 && alt_if) {
3085                 KKASSERT(used == 0);
3086                 mc = m;
3087                 used = 1;
3088                 bridge_enqueue(alt_if, mc);
3089         }
3090
3091         if (used == 0)
3092                 m_freem(m);
3093 }
3094
3095 /*
3096  * bridge_span:
3097  *
3098  *      Duplicate a packet out one or more interfaces that are in span mode,
3099  *      the original mbuf is unmodified.
3100  */
3101 static void
3102 bridge_span(struct bridge_softc *sc, struct mbuf *m)
3103 {
3104         struct bridge_iflist *bif;
3105         struct ifnet *dst_if, *bifp;
3106         struct mbuf *mc;
3107
3108         mbuftrackid(m, 70);
3109         bifp = sc->sc_ifp;
3110         ifnet_serialize_all(bifp);
3111
3112         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
3113                 dst_if = bif->bif_ifp;
3114
3115                 if ((dst_if->if_flags & IFF_RUNNING) == 0)
3116                         continue;
3117
3118                 mc = m_copypacket(m, MB_DONTWAIT);
3119                 if (mc == NULL) {
3120                         IFNET_STAT_INC(sc->sc_ifp, oerrors, 1);
3121                         continue;
3122                 }
3123                 bridge_enqueue(dst_if, mc);
3124         }
3125
3126         ifnet_deserialize_all(bifp);
3127 }
3128
3129 static void
3130 bridge_rtmsg_sync_handler(netmsg_t msg)
3131 {
3132         ifnet_forwardmsg(&msg->lmsg, mycpuid + 1);
3133 }
3134
3135 static void
3136 bridge_rtmsg_sync(struct bridge_softc *sc)
3137 {
3138         struct netmsg_base msg;
3139
3140         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3141
3142         netmsg_init(&msg, NULL, &curthread->td_msgport,
3143                     0, bridge_rtmsg_sync_handler);
3144         ifnet_domsg(&msg.lmsg, 0);
3145 }
3146
3147 static __inline void
3148 bridge_rtinfo_update(struct bridge_rtinfo *bri, struct ifnet *dst_if,
3149                      int setflags, uint8_t flags, uint32_t timeo)
3150 {
3151         if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
3152             bri->bri_ifp != dst_if)
3153                 bri->bri_ifp = dst_if;
3154         if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
3155             bri->bri_expire != time_second + timeo)
3156                 bri->bri_expire = time_second + timeo;
3157         if (setflags)
3158                 bri->bri_flags = flags;
3159 }
3160
3161 static int
3162 bridge_rtinstall_oncpu(struct bridge_softc *sc, const uint8_t *dst,
3163                        struct ifnet *dst_if, int setflags, uint8_t flags,
3164                        struct bridge_rtinfo **bri0)
3165 {
3166         struct bridge_rtnode *brt;
3167         struct bridge_rtinfo *bri;
3168
3169         if (mycpuid == 0) {
3170                 brt = bridge_rtnode_lookup(sc, dst);
3171                 if (brt != NULL) {
3172                         /*
3173                          * rtnode for 'dst' already exists.  We inform the
3174                          * caller about this by leaving bri0 as NULL.  The
3175                          * caller will terminate the intallation upon getting
3176                          * NULL bri0.  However, we still need to update the
3177                          * rtinfo.
3178                          */
3179                         KKASSERT(*bri0 == NULL);
3180
3181                         /* Update rtinfo */
3182                         bridge_rtinfo_update(brt->brt_info, dst_if, setflags,
3183                                              flags, sc->sc_brttimeout);
3184                         return 0;
3185                 }
3186
3187                 /*
3188                  * We only need to check brtcnt on CPU0, since if limit
3189                  * is to be exceeded, ENOSPC is returned.  Caller knows
3190                  * this and will terminate the installation.
3191                  */
3192                 if (sc->sc_brtcnt >= sc->sc_brtmax)
3193                         return ENOSPC;
3194
3195                 KKASSERT(*bri0 == NULL);
3196                 bri = kmalloc(sizeof(struct bridge_rtinfo), M_DEVBUF,
3197                                   M_WAITOK | M_ZERO);
3198                 *bri0 = bri;
3199
3200                 /* Setup rtinfo */
3201                 bri->bri_flags = IFBAF_DYNAMIC;
3202                 bridge_rtinfo_update(bri, dst_if, setflags, flags,
3203                                      sc->sc_brttimeout);
3204         } else {
3205                 bri = *bri0;
3206                 KKASSERT(bri != NULL);
3207         }
3208
3209         brt = kmalloc(sizeof(struct bridge_rtnode), M_DEVBUF,
3210                       M_WAITOK | M_ZERO);
3211         memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
3212         brt->brt_info = bri;
3213
3214         bridge_rtnode_insert(sc, brt);
3215         return 0;
3216 }
3217
3218 static void
3219 bridge_rtinstall_handler(netmsg_t msg)
3220 {
3221         struct netmsg_brsaddr *brmsg = (struct netmsg_brsaddr *)msg;
3222         int error;
3223
3224         error = bridge_rtinstall_oncpu(brmsg->br_softc,
3225                                        brmsg->br_dst, brmsg->br_dst_if,
3226                                        brmsg->br_setflags, brmsg->br_flags,
3227                                        &brmsg->br_rtinfo);
3228         if (error) {
3229                 KKASSERT(mycpuid == 0 && brmsg->br_rtinfo == NULL);
3230                 lwkt_replymsg(&brmsg->base.lmsg, error);
3231                 return;
3232         } else if (brmsg->br_rtinfo == NULL) {
3233                 /* rtnode already exists for 'dst' */
3234                 KKASSERT(mycpuid == 0);
3235                 lwkt_replymsg(&brmsg->base.lmsg, 0);
3236                 return;
3237         }
3238         ifnet_forwardmsg(&brmsg->base.lmsg, mycpuid + 1);
3239 }
3240
3241 /*
3242  * bridge_rtupdate:
3243  *
3244  *      Add/Update a bridge routing entry.
3245  */
3246 static int
3247 bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst,
3248                 struct ifnet *dst_if, uint8_t flags)
3249 {
3250         struct bridge_rtnode *brt;
3251
3252         /*
3253          * A route for this destination might already exist.  If so,
3254          * update it, otherwise create a new one.
3255          */
3256         if ((brt = bridge_rtnode_lookup(sc, dst)) == NULL) {
3257                 struct netmsg_brsaddr *brmsg;
3258
3259                 if (sc->sc_brtcnt >= sc->sc_brtmax)
3260                         return ENOSPC;
3261
3262                 brmsg = kmalloc(sizeof(*brmsg), M_LWKTMSG, M_WAITOK | M_NULLOK);
3263                 if (brmsg == NULL)
3264                         return ENOMEM;
3265
3266                 netmsg_init(&brmsg->base, NULL, &netisr_afree_rport,
3267                             0, bridge_rtinstall_handler);
3268                 memcpy(brmsg->br_dst, dst, ETHER_ADDR_LEN);
3269                 brmsg->br_dst_if = dst_if;
3270                 brmsg->br_flags = flags;
3271                 brmsg->br_setflags = 0;
3272                 brmsg->br_softc = sc;
3273                 brmsg->br_rtinfo = NULL;
3274
3275                 ifnet_sendmsg(&brmsg->base.lmsg, 0);
3276                 return 0;
3277         }
3278         bridge_rtinfo_update(brt->brt_info, dst_if, 0, flags,
3279                              sc->sc_brttimeout);
3280         return 0;
3281 }
3282
3283 static int
3284 bridge_rtsaddr(struct bridge_softc *sc, const uint8_t *dst,
3285                struct ifnet *dst_if, uint8_t flags)
3286 {
3287         struct netmsg_brsaddr brmsg;
3288
3289         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3290
3291         netmsg_init(&brmsg.base, NULL, &curthread->td_msgport,
3292                     0, bridge_rtinstall_handler);
3293         memcpy(brmsg.br_dst, dst, ETHER_ADDR_LEN);
3294         brmsg.br_dst_if = dst_if;
3295         brmsg.br_flags = flags;
3296         brmsg.br_setflags = 1;
3297         brmsg.br_softc = sc;
3298         brmsg.br_rtinfo = NULL;
3299
3300         return ifnet_domsg(&brmsg.base.lmsg, 0);
3301 }
3302
3303 /*
3304  * bridge_rtlookup:
3305  *
3306  *      Lookup the destination interface for an address.
3307  */
3308 static struct ifnet *
3309 bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr)
3310 {
3311         struct bridge_rtnode *brt;
3312
3313         if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL)
3314                 return NULL;
3315         return brt->brt_info->bri_ifp;
3316 }
3317
3318 static void
3319 bridge_rtreap_handler(netmsg_t msg)
3320 {
3321         struct bridge_softc *sc = msg->lmsg.u.ms_resultp;
3322         struct bridge_rtnode *brt, *nbrt;
3323
3324         LIST_FOREACH_MUTABLE(brt, &sc->sc_rtlists[mycpuid], brt_list, nbrt) {
3325                 if (brt->brt_info->bri_dead)
3326                         bridge_rtnode_destroy(sc, brt);
3327         }
3328         ifnet_forwardmsg(&msg->lmsg, mycpuid + 1);
3329 }
3330
3331 static void
3332 bridge_rtreap(struct bridge_softc *sc)
3333 {
3334         struct netmsg_base msg;
3335
3336         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3337
3338         netmsg_init(&msg, NULL, &curthread->td_msgport,
3339                     0, bridge_rtreap_handler);
3340         msg.lmsg.u.ms_resultp = sc;
3341
3342         ifnet_domsg(&msg.lmsg, 0);
3343 }
3344
3345 static void
3346 bridge_rtreap_async(struct bridge_softc *sc)
3347 {
3348         struct netmsg_base *msg;
3349
3350         msg = kmalloc(sizeof(*msg), M_LWKTMSG, M_WAITOK);
3351
3352         netmsg_init(msg, NULL, &netisr_afree_rport,
3353                     0, bridge_rtreap_handler);
3354         msg->lmsg.u.ms_resultp = sc;
3355
3356         ifnet_sendmsg(&msg->lmsg, 0);
3357 }
3358
3359 /*
3360  * bridge_rttrim:
3361  *
3362  *      Trim the routine table so that we have a number
3363  *      of routing entries less than or equal to the
3364  *      maximum number.
3365  */
3366 static void
3367 bridge_rttrim(struct bridge_softc *sc)
3368 {
3369         struct bridge_rtnode *brt;
3370         int dead;
3371
3372         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3373
3374         /* Make sure we actually need to do this. */
3375         if (sc->sc_brtcnt <= sc->sc_brtmax)
3376                 return;
3377
3378         /*
3379          * Find out how many rtnodes are dead
3380          */
3381         dead = bridge_rtage_finddead(sc);
3382         KKASSERT(dead <= sc->sc_brtcnt);
3383
3384         if (sc->sc_brtcnt - dead <= sc->sc_brtmax) {
3385                 /* Enough dead rtnodes are found */
3386                 bridge_rtreap(sc);
3387                 return;
3388         }
3389
3390         /*
3391          * Kill some dynamic rtnodes to meet the brtmax
3392          */
3393         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3394                 struct bridge_rtinfo *bri = brt->brt_info;
3395
3396                 if (bri->bri_dead) {
3397                         /*
3398                          * We have counted this rtnode in
3399                          * bridge_rtage_finddead()
3400                          */
3401                         continue;
3402                 }
3403
3404                 if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
3405                         bri->bri_dead = 1;
3406                         ++dead;
3407                         KKASSERT(dead <= sc->sc_brtcnt);
3408
3409                         if (sc->sc_brtcnt - dead <= sc->sc_brtmax) {
3410                                 /* Enough rtnodes are collected */
3411                                 break;
3412                         }
3413                 }
3414         }
3415         if (dead)
3416                 bridge_rtreap(sc);
3417 }
3418
3419 /*
3420  * bridge_timer:
3421  *
3422  *      Aging timer for the bridge.
3423  */
3424 static void
3425 bridge_timer(void *arg)
3426 {
3427         struct bridge_softc *sc = arg;
3428         struct netmsg_base *msg;
3429
3430         KKASSERT(mycpuid == BRIDGE_CFGCPU);
3431
3432         crit_enter();
3433
3434         if (callout_pending(&sc->sc_brcallout) ||
3435             !callout_active(&sc->sc_brcallout)) {
3436                 crit_exit();
3437                 return;
3438         }
3439         callout_deactivate(&sc->sc_brcallout);
3440
3441         msg = &sc->sc_brtimemsg;
3442         KKASSERT(msg->lmsg.ms_flags & MSGF_DONE);
3443         lwkt_sendmsg(BRIDGE_CFGPORT, &msg->lmsg);
3444
3445         crit_exit();
3446 }
3447
3448 static void
3449 bridge_timer_handler(netmsg_t msg)
3450 {
3451         struct bridge_softc *sc = msg->lmsg.u.ms_resultp;
3452
3453         KKASSERT(&curthread->td_msgport == BRIDGE_CFGPORT);
3454
3455         crit_enter();
3456         /* Reply ASAP */
3457         lwkt_replymsg(&msg->lmsg, 0);
3458         crit_exit();
3459
3460         bridge_rtage(sc);
3461         if (sc->sc_ifp->if_flags & IFF_RUNNING) {
3462                 callout_reset(&sc->sc_brcallout,
3463                     bridge_rtable_prune_period * hz, bridge_timer, sc);
3464         }
3465 }
3466
3467 static int
3468 bridge_rtage_finddead(struct bridge_softc *sc)
3469 {
3470         struct bridge_rtnode *brt;
3471         int dead = 0;
3472
3473         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3474                 struct bridge_rtinfo *bri = brt->brt_info;
3475
3476                 if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
3477                     time_second >= bri->bri_expire) {
3478                         bri->bri_dead = 1;
3479                         ++dead;
3480                         KKASSERT(dead <= sc->sc_brtcnt);
3481                 }
3482         }
3483         return dead;
3484 }
3485
3486 /*
3487  * bridge_rtage:
3488  *
3489  *      Perform an aging cycle.
3490  */
3491 static void
3492 bridge_rtage(struct bridge_softc *sc)
3493 {
3494         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3495
3496         if (bridge_rtage_finddead(sc))
3497                 bridge_rtreap(sc);
3498 }
3499
3500 /*
3501  * bridge_rtflush:
3502  *
3503  *      Remove all dynamic addresses from the bridge.
3504  */
3505 static void
3506 bridge_rtflush(struct bridge_softc *sc, int bf)
3507 {
3508         struct bridge_rtnode *brt;
3509         int reap;
3510
3511         reap = 0;
3512         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3513                 struct bridge_rtinfo *bri = brt->brt_info;
3514
3515                 if ((bf & IFBF_FLUSHALL) ||
3516                     (bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
3517                         bri->bri_dead = 1;
3518                         reap = 1;
3519                 }
3520         }
3521         if (reap) {
3522                 if (bf & IFBF_FLUSHSYNC)
3523                         bridge_rtreap(sc);
3524                 else
3525                         bridge_rtreap_async(sc);
3526         }
3527 }
3528
3529 /*
3530  * bridge_rtdaddr:
3531  *
3532  *      Remove an address from the table.
3533  */
3534 static int
3535 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr)
3536 {
3537         struct bridge_rtnode *brt;
3538
3539         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3540
3541         if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL)
3542                 return (ENOENT);
3543
3544         /* TODO: add a cheaper delete operation */
3545         brt->brt_info->bri_dead = 1;
3546         bridge_rtreap(sc);
3547         return (0);
3548 }
3549
3550 /*
3551  * bridge_rtdelete:
3552  *
3553  *      Delete routes to a speicifc member interface.
3554  */
3555 void
3556 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int bf)
3557 {
3558         struct bridge_rtnode *brt;
3559         int reap;
3560
3561         reap = 0;
3562         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3563                 struct bridge_rtinfo *bri = brt->brt_info;
3564
3565                 if (bri->bri_ifp == ifp &&
3566                     ((bf & IFBF_FLUSHALL) ||
3567                      (bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
3568                         bri->bri_dead = 1;
3569                         reap = 1;
3570                 }
3571         }
3572         if (reap) {
3573                 if (bf & IFBF_FLUSHSYNC)
3574                         bridge_rtreap(sc);
3575                 else
3576                         bridge_rtreap_async(sc);
3577         }
3578 }
3579
3580 /*
3581  * bridge_rtable_init:
3582  *
3583  *      Initialize the route table for this bridge.
3584  */
3585 static void
3586 bridge_rtable_init(struct bridge_softc *sc)
3587 {
3588         int cpu;
3589
3590         /*
3591          * Initialize per-cpu hash tables
3592          */
3593         sc->sc_rthashs = kmalloc(sizeof(*sc->sc_rthashs) * ncpus,
3594                                  M_DEVBUF, M_WAITOK);
3595         for (cpu = 0; cpu < ncpus; ++cpu) {
3596                 int i;
3597
3598                 sc->sc_rthashs[cpu] =
3599                 kmalloc(sizeof(struct bridge_rtnode_head) * BRIDGE_RTHASH_SIZE,
3600                         M_DEVBUF, M_WAITOK);
3601
3602                 for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
3603                         LIST_INIT(&sc->sc_rthashs[cpu][i]);
3604         }
3605         sc->sc_rthash_key = karc4random();
3606
3607         /*
3608          * Initialize per-cpu lists
3609          */
3610         sc->sc_rtlists = kmalloc(sizeof(struct bridge_rtnode_head) * ncpus,
3611                                  M_DEVBUF, M_WAITOK);
3612         for (cpu = 0; cpu < ncpus; ++cpu)
3613                 LIST_INIT(&sc->sc_rtlists[cpu]);
3614 }
3615
3616 /*
3617  * bridge_rtable_fini:
3618  *
3619  *      Deconstruct the route table for this bridge.
3620  */
3621 static void
3622 bridge_rtable_fini(struct bridge_softc *sc)
3623 {
3624         int cpu;
3625
3626         /*
3627          * Free per-cpu hash tables
3628          */
3629         for (cpu = 0; cpu < ncpus; ++cpu)
3630                 kfree(sc->sc_rthashs[cpu], M_DEVBUF);
3631         kfree(sc->sc_rthashs, M_DEVBUF);
3632
3633         /*
3634          * Free per-cpu lists
3635          */
3636         kfree(sc->sc_rtlists, M_DEVBUF);
3637 }
3638
3639 /*
3640  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
3641  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
3642  */
3643 #define mix(a, b, c)                                                    \
3644 do {                                                                    \
3645         a -= b; a -= c; a ^= (c >> 13);                                 \
3646         b -= c; b -= a; b ^= (a << 8);                                  \
3647         c -= a; c -= b; c ^= (b >> 13);                                 \
3648         a -= b; a -= c; a ^= (c >> 12);                                 \
3649         b -= c; b -= a; b ^= (a << 16);                                 \
3650         c -= a; c -= b; c ^= (b >> 5);                                  \
3651         a -= b; a -= c; a ^= (c >> 3);                                  \
3652         b -= c; b -= a; b ^= (a << 10);                                 \
3653         c -= a; c -= b; c ^= (b >> 15);                                 \
3654 } while (/*CONSTCOND*/0)
3655
3656 static __inline uint32_t
3657 bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
3658 {
3659         uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
3660
3661         b += addr[5] << 8;
3662         b += addr[4];
3663         a += addr[3] << 24;
3664         a += addr[2] << 16;
3665         a += addr[1] << 8;
3666         a += addr[0];
3667
3668         mix(a, b, c);
3669
3670         return (c & BRIDGE_RTHASH_MASK);
3671 }
3672
3673 #undef mix
3674
3675 static int
3676 bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b)
3677 {
3678         int i, d;
3679
3680         for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
3681                 d = ((int)a[i]) - ((int)b[i]);
3682         }
3683
3684         return (d);
3685 }
3686
3687 /*
3688  * bridge_rtnode_lookup:
3689  *
3690  *      Look up a bridge route node for the specified destination.
3691  */
3692 static struct bridge_rtnode *
3693 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr)
3694 {
3695         struct bridge_rtnode *brt;
3696         uint32_t hash;
3697         int dir;
3698
3699         hash = bridge_rthash(sc, addr);
3700         LIST_FOREACH(brt, &sc->sc_rthashs[mycpuid][hash], brt_hash) {
3701                 dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
3702                 if (dir == 0)
3703                         return (brt);
3704                 if (dir > 0)
3705                         return (NULL);
3706         }
3707
3708         return (NULL);
3709 }
3710
3711 /*
3712  * bridge_rtnode_insert:
3713  *
3714  *      Insert the specified bridge node into the route table.
3715  *      Caller has to make sure that rtnode does not exist.
3716  */
3717 static void
3718 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
3719 {
3720         struct bridge_rtnode *lbrt;
3721         uint32_t hash;
3722         int dir;
3723
3724         hash = bridge_rthash(sc, brt->brt_addr);
3725
3726         lbrt = LIST_FIRST(&sc->sc_rthashs[mycpuid][hash]);
3727         if (lbrt == NULL) {
3728                 LIST_INSERT_HEAD(&sc->sc_rthashs[mycpuid][hash],
3729                                   brt, brt_hash);
3730                 goto out;
3731         }
3732
3733         do {
3734                 dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
3735                 KASSERT(dir != 0, ("rtnode already exist"));
3736
3737                 if (dir > 0) {
3738                         LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
3739                         goto out;
3740                 }
3741                 if (LIST_NEXT(lbrt, brt_hash) == NULL) {
3742                         LIST_INSERT_AFTER(lbrt, brt, brt_hash);
3743                         goto out;
3744                 }
3745                 lbrt = LIST_NEXT(lbrt, brt_hash);
3746         } while (lbrt != NULL);
3747
3748         panic("no suitable position found for rtnode");
3749 out:
3750         LIST_INSERT_HEAD(&sc->sc_rtlists[mycpuid], brt, brt_list);
3751         if (mycpuid == 0) {
3752                 /*
3753                  * Update the brtcnt.
3754                  * We only need to do it once and we do it on CPU0.
3755                  */
3756                 sc->sc_brtcnt++;
3757         }
3758 }
3759
3760 /*
3761  * bridge_rtnode_destroy:
3762  *
3763  *      Destroy a bridge rtnode.
3764  */
3765 static void
3766 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
3767 {
3768         LIST_REMOVE(brt, brt_hash);
3769         LIST_REMOVE(brt, brt_list);
3770
3771         if (mycpuid + 1 == ncpus) {
3772                 /* Free rtinfo associated with rtnode on the last cpu */
3773                 kfree(brt->brt_info, M_DEVBUF);
3774         }
3775         kfree(brt, M_DEVBUF);
3776
3777         if (mycpuid == 0) {
3778                 /* Update brtcnt only on CPU0 */
3779                 sc->sc_brtcnt--;
3780         }
3781 }
3782
3783 static __inline int
3784 bridge_post_pfil(struct mbuf *m)
3785 {
3786         if (m->m_pkthdr.fw_flags & IPFORWARD_MBUF_TAGGED)
3787                 return EOPNOTSUPP;
3788
3789         /* Not yet */
3790         if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED)
3791                 return EOPNOTSUPP;
3792
3793         return 0;
3794 }
3795
3796 /*
3797  * Send bridge packets through pfil if they are one of the types pfil can deal
3798  * with, or if they are ARP or REVARP.  (pfil will pass ARP and REVARP without
3799  * question.) If *bifp or *ifp are NULL then packet filtering is skipped for
3800  * that interface.
3801  */
3802 static int
3803 bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
3804 {
3805         int snap, error, i, hlen;
3806         struct ether_header *eh1, eh2;
3807         struct ip *ip;
3808         struct llc llc1;
3809         u_int16_t ether_type;
3810
3811         snap = 0;
3812         error = -1;     /* Default error if not error == 0 */
3813
3814         if (pfil_bridge == 0 && pfil_member == 0)
3815                 return (0); /* filtering is disabled */
3816
3817         i = min((*mp)->m_pkthdr.len, max_protohdr);
3818         if ((*mp)->m_len < i) {
3819                 *mp = m_pullup(*mp, i);
3820                 if (*mp == NULL) {
3821                         kprintf("%s: m_pullup failed\n", __func__);
3822                         return (-1);
3823                 }
3824         }
3825
3826         eh1 = mtod(*mp, struct ether_header *);
3827         ether_type = ntohs(eh1->ether_type);
3828
3829         /*
3830          * Check for SNAP/LLC.
3831          */
3832         if (ether_type < ETHERMTU) {
3833                 struct llc *llc2 = (struct llc *)(eh1 + 1);
3834
3835                 if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
3836                     llc2->llc_dsap == LLC_SNAP_LSAP &&
3837                     llc2->llc_ssap == LLC_SNAP_LSAP &&
3838                     llc2->llc_control == LLC_UI) {
3839                         ether_type = htons(llc2->llc_un.type_snap.ether_type);
3840                         snap = 1;
3841                 }
3842         }
3843
3844         /*
3845          * If we're trying to filter bridge traffic, don't look at anything
3846          * other than IP and ARP traffic.  If the filter doesn't understand
3847          * IPv6, don't allow IPv6 through the bridge either.  This is lame
3848          * since if we really wanted, say, an AppleTalk filter, we are hosed,
3849          * but of course we don't have an AppleTalk filter to begin with.
3850          * (Note that since pfil doesn't understand ARP it will pass *ALL*
3851          * ARP traffic.)
3852          */
3853         switch (ether_type) {
3854         case ETHERTYPE_ARP:
3855         case ETHERTYPE_REVARP:
3856                 return (0); /* Automatically pass */
3857
3858         case ETHERTYPE_IP:
3859 #ifdef INET6
3860         case ETHERTYPE_IPV6:
3861 #endif /* INET6 */
3862                 break;
3863
3864         default:
3865                 /*
3866                  * Check to see if the user wants to pass non-ip
3867                  * packets, these will not be checked by pfil(9)
3868                  * and passed unconditionally so the default is to drop.
3869                  */
3870                 if (pfil_onlyip)
3871                         goto bad;
3872         }
3873
3874         /* Strip off the Ethernet header and keep a copy. */
3875         m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t) &eh2);
3876         m_adj(*mp, ETHER_HDR_LEN);
3877
3878         /* Strip off snap header, if present */
3879         if (snap) {
3880                 m_copydata(*mp, 0, sizeof(struct llc), (caddr_t) &llc1);
3881                 m_adj(*mp, sizeof(struct llc));
3882         }
3883
3884         /*
3885          * Check the IP header for alignment and errors
3886          */
3887         if (dir == PFIL_IN) {
3888                 switch (ether_type) {
3889                 case ETHERTYPE_IP:
3890                         error = bridge_ip_checkbasic(mp);
3891                         break;
3892 #ifdef INET6
3893                 case ETHERTYPE_IPV6:
3894                         error = bridge_ip6_checkbasic(mp);
3895                         break;
3896 #endif /* INET6 */
3897                 default:
3898                         error = 0;
3899                 }
3900                 if (error)
3901                         goto bad;
3902         }
3903
3904         error = 0;
3905
3906         /*
3907          * Run the packet through pfil
3908          */
3909         switch (ether_type) {
3910         case ETHERTYPE_IP:
3911                 /*
3912                  * before calling the firewall, swap fields the same as
3913                  * IP does. here we assume the header is contiguous
3914                  */
3915                 ip = mtod(*mp, struct ip *);
3916
3917                 ip->ip_len = ntohs(ip->ip_len);
3918                 ip->ip_off = ntohs(ip->ip_off);
3919
3920                 /*
3921                  * Run pfil on the member interface and the bridge, both can
3922                  * be skipped by clearing pfil_member or pfil_bridge.
3923                  *
3924                  * Keep the order:
3925                  *   in_if -> bridge_if -> out_if
3926                  */
3927                 if (pfil_bridge && dir == PFIL_OUT && bifp != NULL) {
3928                         error = pfil_run_hooks(&inet_pfil_hook, mp, bifp, dir);
3929                         if (*mp == NULL || error != 0) /* filter may consume */
3930                                 break;
3931                         error = bridge_post_pfil(*mp);
3932                         if (error)
3933                                 break;
3934                 }
3935
3936                 if (pfil_member && ifp != NULL) {
3937                         error = pfil_run_hooks(&inet_pfil_hook, mp, ifp, dir);
3938                         if (*mp == NULL || error != 0) /* filter may consume */
3939                                 break;
3940                         error = bridge_post_pfil(*mp);
3941                         if (error)
3942                                 break;
3943                 }
3944
3945                 if (pfil_bridge && dir == PFIL_IN && bifp != NULL) {
3946                         error = pfil_run_hooks(&inet_pfil_hook, mp, bifp, dir);
3947                         if (*mp == NULL || error != 0) /* filter may consume */
3948                                 break;
3949                         error = bridge_post_pfil(*mp);
3950                         if (error)
3951                                 break;
3952                 }
3953
3954                 /* check if we need to fragment the packet */
3955                 if (pfil_member && ifp != NULL && dir == PFIL_OUT) {
3956                         i = (*mp)->m_pkthdr.len;
3957                         if (i > ifp->if_mtu) {
3958                                 error = bridge_fragment(ifp, *mp, &eh2, snap,
3959                                             &llc1);
3960                                 return (error);
3961                         }
3962                 }
3963
3964                 /* Recalculate the ip checksum and restore byte ordering */
3965                 ip = mtod(*mp, struct ip *);
3966                 hlen = ip->ip_hl << 2;
3967                 if (hlen < sizeof(struct ip))
3968                         goto bad;
3969                 if (hlen > (*mp)->m_len) {
3970                         if ((*mp = m_pullup(*mp, hlen)) == NULL)
3971                                 goto bad;
3972                         ip = mtod(*mp, struct ip *);
3973                         if (ip == NULL)
3974                                 goto bad;
3975                 }
3976                 ip->ip_len = htons(ip->ip_len);
3977                 ip->ip_off = htons(ip->ip_off);
3978                 ip->ip_sum = 0;
3979                 if (hlen == sizeof(struct ip))
3980                         ip->ip_sum = in_cksum_hdr(ip);
3981                 else
3982                         ip->ip_sum = in_cksum(*mp, hlen);
3983
3984                 break;
3985 #ifdef INET6
3986         case ETHERTYPE_IPV6:
3987                 if (pfil_bridge && dir == PFIL_OUT && bifp != NULL)
3988                         error = pfil_run_hooks(&inet6_pfil_hook, mp, bifp,
3989                                         dir);
3990
3991                 if (*mp == NULL || error != 0) /* filter may consume */
3992                         break;
3993
3994                 if (pfil_member && ifp != NULL)
3995                         error = pfil_run_hooks(&inet6_pfil_hook, mp, ifp,
3996                                         dir);
3997
3998                 if (*mp == NULL || error != 0) /* filter may consume */
3999                         break;
4000
4001                 if (pfil_bridge && dir == PFIL_IN && bifp != NULL)
4002                         error = pfil_run_hooks(&inet6_pfil_hook, mp, bifp,
4003                                         dir);
4004                 break;
4005 #endif
4006         default:
4007                 error = 0;
4008                 break;
4009         }
4010
4011         if (*mp == NULL)
4012                 return (error);
4013         if (error != 0)
4014                 goto bad;
4015
4016         error = -1;
4017
4018         /*
4019          * Finally, put everything back the way it was and return
4020          */
4021         if (snap) {
4022                 M_PREPEND(*mp, sizeof(struct llc), MB_DONTWAIT);
4023                 if (*mp == NULL)
4024                         return (error);
4025                 bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
4026         }
4027
4028         M_PREPEND(*mp, ETHER_HDR_LEN, MB_DONTWAIT);
4029         if (*mp == NULL)
4030                 return (error);
4031         bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
4032
4033         return (0);
4034
4035 bad:
4036         m_freem(*mp);
4037         *mp = NULL;
4038         return (error);
4039 }
4040
4041 /*
4042  * Perform basic checks on header size since
4043  * pfil assumes ip_input has already processed
4044  * it for it.  Cut-and-pasted from ip_input.c.
4045  * Given how simple the IPv6 version is,
4046  * does the IPv4 version really need to be
4047  * this complicated?
4048  *
4049  * XXX Should we update ipstat here, or not?
4050  * XXX Right now we update ipstat but not
4051  * XXX csum_counter.
4052  */
4053 static int
4054 bridge_ip_checkbasic(struct mbuf **mp)
4055 {
4056         struct mbuf *m = *mp;
4057         struct ip *ip;
4058         int len, hlen;
4059         u_short sum;
4060
4061         if (*mp == NULL)
4062                 return (-1);
4063 #if 0 /* notyet */
4064         if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
4065                 if ((m = m_copyup(m, sizeof(struct ip),
4066                         (max_linkhdr + 3) & ~3)) == NULL) {
4067                         /* XXXJRT new stat, please */
4068                         ipstat.ips_toosmall++;
4069                         goto bad;
4070                 }
4071         } else
4072 #endif
4073 #ifndef __predict_false
4074 #define __predict_false(x) x
4075 #endif
4076          if (__predict_false(m->m_len < sizeof (struct ip))) {
4077                 if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
4078                         ipstat.ips_toosmall++;
4079                         goto bad;
4080                 }
4081         }
4082         ip = mtod(m, struct ip *);
4083         if (ip == NULL) goto bad;
4084
4085         if (ip->ip_v != IPVERSION) {
4086                 ipstat.ips_badvers++;
4087                 goto bad;
4088         }
4089         hlen = ip->ip_hl << 2;
4090         if (hlen < sizeof(struct ip)) { /* minimum header length */
4091                 ipstat.ips_badhlen++;
4092                 goto bad;
4093         }
4094         if (hlen > m->m_len) {
4095                 if ((m = m_pullup(m, hlen)) == NULL) {
4096                         ipstat.ips_badhlen++;
4097                         goto bad;
4098                 }
4099                 ip = mtod(m, struct ip *);
4100                 if (ip == NULL) goto bad;
4101         }
4102
4103         if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
4104                 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
4105         } else {
4106                 if (hlen == sizeof(struct ip)) {
4107                         sum = in_cksum_hdr(ip);
4108                 } else {
4109                         sum = in_cksum(m, hlen);
4110                 }
4111         }
4112         if (sum) {
4113                 ipstat.ips_badsum++;
4114                 goto bad;
4115         }
4116
4117         /* Retrieve the packet length. */
4118         len = ntohs(ip->ip_len);
4119
4120         /*
4121          * Check for additional length bogosity
4122          */
4123         if (len < hlen) {
4124                 ipstat.ips_badlen++;
4125                 goto bad;
4126         }
4127
4128         /*
4129          * Check that the amount of data in the buffers
4130          * is as at least much as the IP header would have us expect.
4131          * Drop packet if shorter than we expect.
4132          */
4133         if (m->m_pkthdr.len < len) {
4134                 ipstat.ips_tooshort++;
4135                 goto bad;
4136         }
4137
4138         /* Checks out, proceed */
4139         *mp = m;
4140         return (0);
4141
4142 bad:
4143         *mp = m;
4144         return (-1);
4145 }
4146
4147 #ifdef INET6
4148 /*
4149  * Same as above, but for IPv6.
4150  * Cut-and-pasted from ip6_input.c.
4151  * XXX Should we update ip6stat, or not?
4152  */
4153 static int
4154 bridge_ip6_checkbasic(struct mbuf **mp)
4155 {
4156         struct mbuf *m = *mp;
4157         struct ip6_hdr *ip6;
4158
4159         /*
4160          * If the IPv6 header is not aligned, slurp it up into a new
4161          * mbuf with space for link headers, in the event we forward
4162          * it.  Otherwise, if it is aligned, make sure the entire base
4163          * IPv6 header is in the first mbuf of the chain.
4164          */
4165 #if 0 /* notyet */
4166         if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
4167                 struct ifnet *inifp = m->m_pkthdr.rcvif;
4168                 if ((m = m_copyup(m, sizeof(struct ip6_hdr),
4169                             (max_linkhdr + 3) & ~3)) == NULL) {
4170                         /* XXXJRT new stat, please */
4171                         ip6stat.ip6s_toosmall++;
4172                         in6_ifstat_inc(inifp, ifs6_in_hdrerr);
4173                         goto bad;
4174                 }
4175         } else
4176 #endif
4177         if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
4178                 struct ifnet *inifp = m->m_pkthdr.rcvif;
4179                 if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
4180                         ip6stat.ip6s_toosmall++;
4181                         in6_ifstat_inc(inifp, ifs6_in_hdrerr);
4182                         goto bad;
4183                 }
4184         }
4185
4186         ip6 = mtod(m, struct ip6_hdr *);
4187
4188         if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
4189                 ip6stat.ip6s_badvers++;
4190                 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
4191                 goto bad;
4192         }
4193
4194         /* Checks out, proceed */
4195         *mp = m;
4196         return (0);
4197
4198 bad:
4199         *mp = m;
4200         return (-1);
4201 }
4202 #endif /* INET6 */
4203
4204 /*
4205  * bridge_fragment:
4206  *
4207  *      Return a fragmented mbuf chain.
4208  */
4209 static int
4210 bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
4211     int snap, struct llc *llc)
4212 {
4213         struct mbuf *m0;
4214         struct ip *ip;
4215         int error = -1;
4216
4217         if (m->m_len < sizeof(struct ip) &&
4218             (m = m_pullup(m, sizeof(struct ip))) == NULL)
4219                 goto out;
4220         ip = mtod(m, struct ip *);
4221
4222         error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist,
4223                     CSUM_DELAY_IP);
4224         if (error)
4225                 goto out;
4226
4227         /* walk the chain and re-add the Ethernet header */
4228         for (m0 = m; m0; m0 = m0->m_nextpkt) {
4229                 if (error == 0) {
4230                         if (snap) {
4231                                 M_PREPEND(m0, sizeof(struct llc), MB_DONTWAIT);
4232                                 if (m0 == NULL) {
4233                                         error = ENOBUFS;
4234                                         continue;
4235                                 }
4236                                 bcopy(llc, mtod(m0, caddr_t),
4237                                     sizeof(struct llc));
4238                         }
4239                         M_PREPEND(m0, ETHER_HDR_LEN, MB_DONTWAIT);
4240                         if (m0 == NULL) {
4241                                 error = ENOBUFS;
4242                                 continue;
4243                         }
4244                         bcopy(eh, mtod(m0, caddr_t), ETHER_HDR_LEN);
4245                 } else 
4246                         m_freem(m);
4247         }
4248
4249         if (error == 0)
4250                 ipstat.ips_fragmented++;
4251
4252         return (error);
4253
4254 out:
4255         if (m != NULL)
4256                 m_freem(m);
4257         return (error);
4258 }
4259
4260 static void
4261 bridge_enqueue_handler(netmsg_t msg)
4262 {
4263         struct netmsg_packet *nmp;
4264         struct ifnet *dst_ifp;
4265         struct mbuf *m;
4266
4267         nmp = &msg->packet;
4268         m = nmp->nm_packet;
4269         dst_ifp = nmp->base.lmsg.u.ms_resultp;
4270         mbuftrackid(m, 71);
4271
4272         bridge_handoff(dst_ifp->if_bridge, dst_ifp, m, 1);
4273 }
4274
4275 static void
4276 bridge_handoff(struct bridge_softc *sc, struct ifnet *dst_ifp,
4277                struct mbuf *m, int from_us)
4278 {
4279         struct mbuf *m0;
4280         struct ifnet *bifp;
4281
4282         bifp = sc->sc_ifp;
4283         mbuftrackid(m, 72);
4284
4285         /* We may be sending a fragment so traverse the mbuf */
4286         for (; m; m = m0) {
4287                 struct altq_pktattr pktattr;
4288
4289                 m0 = m->m_nextpkt;
4290                 m->m_nextpkt = NULL;
4291
4292                 /*
4293                  * If being sent from our host override ether_shost
4294                  * with the bridge MAC.  This is mandatory for ARP
4295                  * so things don't get confused.  In particular we
4296                  * don't want ARPs to get associated with link interfaces
4297                  * under the bridge which might or might not stay valid.
4298                  *
4299                  * Also override ether_shost when relaying a packet out
4300                  * the same interface it came in on, due to multi-homed
4301                  * addresses & default routes, otherwise switches will
4302                  * get very confused.
4303                  *
4304                  * Otherwise if we are in transparent mode.
4305                  */
4306                 if (from_us || m->m_pkthdr.rcvif == dst_ifp) {
4307                         m_copyback(m,
4308                                    offsetof(struct ether_header, ether_shost),
4309                                    ETHER_ADDR_LEN, IF_LLADDR(sc->sc_ifp));
4310                 } else if ((bifp->if_flags & IFF_LINK0) &&
4311                            (m->m_pkthdr.fw_flags & BRIDGE_MBUF_TAGGED)) {
4312                         m_copyback(m,
4313                                    offsetof(struct ether_header, ether_shost),
4314                                    ETHER_ADDR_LEN,
4315                                    m->m_pkthdr.br.ether.ether_shost);
4316                 } /* else retain shost */
4317
4318                 if (ifq_is_enabled(&dst_ifp->if_snd))
4319                         altq_etherclassify(&dst_ifp->if_snd, m, &pktattr);
4320
4321                 ifq_dispatch(dst_ifp, m, &pktattr);
4322         }
4323 }
4324
4325 static void
4326 bridge_control_dispatch(netmsg_t msg)
4327 {
4328         struct netmsg_brctl *bc_msg = (struct netmsg_brctl *)msg;
4329         struct ifnet *bifp = bc_msg->bc_sc->sc_ifp;
4330         int error;
4331
4332         ifnet_serialize_all(bifp);
4333         error = bc_msg->bc_func(bc_msg->bc_sc, bc_msg->bc_arg);
4334         ifnet_deserialize_all(bifp);
4335
4336         lwkt_replymsg(&bc_msg->base.lmsg, error);
4337 }
4338
4339 static int
4340 bridge_control(struct bridge_softc *sc, u_long cmd,
4341                bridge_ctl_t bc_func, void *bc_arg)
4342 {
4343         struct ifnet *bifp = sc->sc_ifp;
4344         struct netmsg_brctl bc_msg;
4345         int error;
4346
4347         ASSERT_IFNET_SERIALIZED_ALL(bifp);
4348
4349         bzero(&bc_msg, sizeof(bc_msg));
4350
4351         netmsg_init(&bc_msg.base, NULL, &curthread->td_msgport,
4352                     0, bridge_control_dispatch);
4353         bc_msg.bc_func = bc_func;
4354         bc_msg.bc_sc = sc;
4355         bc_msg.bc_arg = bc_arg;
4356
4357         ifnet_deserialize_all(bifp);
4358         error = lwkt_domsg(BRIDGE_CFGPORT, &bc_msg.base.lmsg, 0);
4359         ifnet_serialize_all(bifp);
4360         return error;
4361 }
4362
4363 static void
4364 bridge_add_bif_handler(netmsg_t msg)
4365 {
4366         struct netmsg_braddbif *amsg = (struct netmsg_braddbif *)msg;
4367         struct bridge_softc *sc;
4368         struct bridge_iflist *bif;
4369
4370         sc = amsg->br_softc;
4371
4372         bif = kmalloc(sizeof(*bif), M_DEVBUF, M_WAITOK | M_ZERO);
4373         bif->bif_ifp = amsg->br_bif_ifp;
4374         bif->bif_onlist = 1;
4375         bif->bif_info = amsg->br_bif_info;
4376
4377         /*
4378          * runs through bif_info
4379          */
4380         bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
4381
4382         TAILQ_INSERT_HEAD(&sc->sc_iflists[mycpuid], bif, bif_next);
4383
4384         ifnet_forwardmsg(&amsg->base.lmsg, mycpuid + 1);
4385 }
4386
4387 static void
4388 bridge_add_bif(struct bridge_softc *sc, struct bridge_ifinfo *bif_info,
4389                struct ifnet *ifp)
4390 {
4391         struct netmsg_braddbif amsg;
4392
4393         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
4394
4395         netmsg_init(&amsg.base, NULL, &curthread->td_msgport,
4396                     0, bridge_add_bif_handler);
4397         amsg.br_softc = sc;
4398         amsg.br_bif_info = bif_info;
4399         amsg.br_bif_ifp = ifp;
4400
4401         ifnet_domsg(&amsg.base.lmsg, 0);
4402 }
4403
4404 static void
4405 bridge_del_bif_handler(netmsg_t msg)
4406 {
4407         struct netmsg_brdelbif *dmsg = (struct netmsg_brdelbif *)msg;
4408         struct bridge_softc *sc;
4409         struct bridge_iflist *bif;
4410
4411         sc = dmsg->br_softc;
4412
4413         /*
4414          * Locate the bif associated with the br_bif_info
4415          * on the current CPU
4416          */
4417         bif = bridge_lookup_member_ifinfo(sc, dmsg->br_bif_info);
4418         KKASSERT(bif != NULL && bif->bif_onlist);
4419
4420         /* Remove the bif from the current CPU's iflist */
4421         bif->bif_onlist = 0;
4422         TAILQ_REMOVE(dmsg->br_bif_list, bif, bif_next);
4423
4424         /* Save the removed bif for later freeing */
4425         TAILQ_INSERT_HEAD(dmsg->br_bif_list, bif, bif_next);
4426
4427         ifnet_forwardmsg(&dmsg->base.lmsg, mycpuid + 1);
4428 }
4429
4430 static void
4431 bridge_del_bif(struct bridge_softc *sc, struct bridge_ifinfo *bif_info,
4432                struct bridge_iflist_head *saved_bifs)
4433 {
4434         struct netmsg_brdelbif dmsg;
4435
4436         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
4437
4438         netmsg_init(&dmsg.base, NULL, &curthread->td_msgport,
4439                     0, bridge_del_bif_handler);
4440         dmsg.br_softc = sc;
4441         dmsg.br_bif_info = bif_info;
4442         dmsg.br_bif_list = saved_bifs;
4443
4444         ifnet_domsg(&dmsg.base.lmsg, 0);
4445 }