19284243369f870b0afa2b246969b4ea83d92005
[dragonfly.git] / sys / net / bridge / if_bridge.c
1 /*
2  * Copyright 2001 Wasabi Systems, Inc.
3  * All rights reserved.
4  *
5  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *      This product includes software developed for the NetBSD Project by
18  *      Wasabi Systems, Inc.
19  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
20  *    or promote products derived from this software without specific prior
21  *    written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
27  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /*
37  * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
38  * All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. All advertising materials mentioning features or use of this software
49  *    must display the following acknowledgement:
50  *      This product includes software developed by Jason L. Wright
51  * 4. The name of the author may not be used to endorse or promote products
52  *    derived from this software without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
55  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
56  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
57  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
58  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
59  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
60  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
62  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
63  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
64  * POSSIBILITY OF SUCH DAMAGE.
65  *
66  * $OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp $
67  * $NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $
68  * $FreeBSD: src/sys/net/if_bridge.c,v 1.26 2005/10/13 23:05:55 thompsa Exp $
69  * $DragonFly: src/sys/net/bridge/if_bridge.c,v 1.59 2008/11/23 02:58:26 sephe Exp $
70  */
71
72 /*
73  * Network interface bridge support.
74  *
75  * TODO:
76  *
77  *      - Currently only supports Ethernet-like interfaces (Ethernet,
78  *        802.11, VLANs on Ethernet, etc.)  Figure out a nice way
79  *        to bridge other types of interfaces (FDDI-FDDI, and maybe
80  *        consider heterogenous bridges).
81  *
82  *
83  * Bridge's route information is duplicated to each CPUs:
84  *
85  *      CPU0          CPU1          CPU2          CPU3
86  * +-----------+ +-----------+ +-----------+ +-----------+
87  * |  rtnode   | |  rtnode   | |  rtnode   | |  rtnode   |
88  * |           | |           | |           | |           |
89  * | dst eaddr | | dst eaddr | | dst eaddr | | dst eaddr |
90  * +-----------+ +-----------+ +-----------+ +-----------+
91  *       |         |                     |         |
92  *       |         |                     |         |
93  *       |         |     +----------+    |         |
94  *       |         |     |  rtinfo  |    |         |
95  *       |         +---->|          |<---+         |
96  *       |               |  flags   |              |
97  *       +-------------->|  timeout |<-------------+
98  *                       |  dst_ifp |
99  *                       +----------+
100  *
101  * We choose to put timeout and dst_ifp into shared part, so updating
102  * them will be cheaper than using message forwarding.  Also there is
103  * not need to use spinlock to protect the updating: timeout and dst_ifp
104  * is not related and specific field's updating order has no importance.
105  * The cache pollution by the share part should not be heavy: in a stable
106  * setup, dst_ifp probably will be not changed in rtnode's life time,
107  * while timeout is refreshed once per second; most of the time, timeout
108  * and dst_ifp are read-only accessed.
109  *
110  *
111  * Bridge route information installation on bridge_input path:
112  *
113  *      CPU0           CPU1         CPU2          CPU3
114  *
115  *                               tcp_thread2
116  *                                    |
117  *                                alloc nmsg
118  *                    snd nmsg        |
119  *                    w/o rtinfo      |
120  *      ifnet0<-----------------------+
121  *        |                           :
122  *    lookup dst                      :
123  *   rtnode exists?(Y)free nmsg       :
124  *        |(N)                        :
125  *        |
126  *  alloc rtinfo
127  *  alloc rtnode
128  * install rtnode
129  *        |
130  *        +---------->ifnet1
131  *        : fwd nmsg    |
132  *        : w/ rtinfo   |
133  *        :             |
134  *        :             |
135  *                 alloc rtnode
136  *               (w/ nmsg's rtinfo)
137  *                install rtnode
138  *                      |
139  *                      +---------->ifnet2
140  *                      : fwd nmsg    |
141  *                      : w/ rtinfo   |
142  *                      :             |
143  *                      :         same as ifnet1
144  *                                    |
145  *                                    +---------->ifnet3
146  *                                    : fwd nmsg    |
147  *                                    : w/ rtinfo   |
148  *                                    :             |
149  *                                    :         same as ifnet1
150  *                                               free nmsg
151  *                                                  :
152  *                                                  :
153  *
154  * The netmsgs forwarded between protocol threads and ifnet threads are
155  * allocated with (M_WAITOK|M_NULLOK), so it will not fail under most
156  * cases (route information is too precious to be not installed :).
157  * Since multiple threads may try to install route information for the
158  * same dst eaddr, we look up route information in ifnet0.  However, this
159  * looking up only need to be performed on ifnet0, which is the start
160  * point of the route information installation process.
161  *
162  *
163  * Bridge route information deleting/flushing:
164  *
165  *  CPU0            CPU1             CPU2             CPU3
166  *
167  * netisr0
168  *   |
169  * find suitable rtnodes,
170  * mark their rtinfo dead
171  *   |
172  *   | domsg <------------------------------------------+
173  *   |                                                  | replymsg
174  *   |                                                  |
175  *   V     fwdmsg           fwdmsg           fwdmsg     |
176  * ifnet0 --------> ifnet1 --------> ifnet2 --------> ifnet3
177  * delete rtnodes   delete rtnodes   delete rtnodes   delete rtnodes
178  * w/ dead rtinfo   w/ dead rtinfo   w/ dead rtinfo   w/ dead rtinfo
179  *                                                    free dead rtinfos
180  *
181  * All deleting/flushing operations are serialized by netisr0, so each
182  * operation only reaps the route information marked dead by itself.
183  *
184  *
185  * Bridge route information adding/deleting/flushing:
186  * Since all operation is serialized by the fixed message flow between
187  * ifnet threads, it is not possible to create corrupted per-cpu route
188  * information.
189  *
190  *
191  *
192  * Percpu member interface list iteration with blocking operation:
193  * Since one bridge could only delete one member interface at a time and
194  * the deleted member interface is not freed after netmsg_service_sync(),
195  * following way is used to make sure that even if the certain member
196  * interface is ripped from the percpu list during the blocking operation,
197  * the iteration still could keep going:
198  *
199  * LIST_FOREACH_MUTABLE(bif, sc->sc_iflists[mycpuid], bif_next, nbif) {
200  *     blocking operation;
201  *     blocking operation;
202  *     ...
203  *     ...
204  *     if (nbif != NULL && !nbif->bif_onlist) {
205  *         KKASSERT(bif->bif_onlist);
206  *         nbif = LIST_NEXT(bif, bif_next);
207  *     }
208  * }
209  *
210  * As mentioned above only one member interface could be unlinked from the
211  * percpu member interface list, so either bif or nbif may be not on the list,
212  * but _not_ both.  To keep the list iteration, we don't care about bif, but
213  * only nbif.  Since removed member interface will only be freed after we
214  * finish our work, it is safe to access any field in an unlinked bif (here
215  * bif_onlist).  If nbif is no longer on the list, then bif must be on the
216  * list, so we change nbif to the next element of bif and keep going.
217  */
218
219 #include "opt_inet.h"
220 #include "opt_inet6.h"
221
222 #include <sys/param.h>
223 #include <sys/mbuf.h>
224 #include <sys/malloc.h>
225 #include <sys/protosw.h>
226 #include <sys/systm.h>
227 #include <sys/time.h>
228 #include <sys/socket.h> /* for net/if.h */
229 #include <sys/sockio.h>
230 #include <sys/ctype.h>  /* string functions */
231 #include <sys/kernel.h>
232 #include <sys/random.h>
233 #include <sys/sysctl.h>
234 #include <sys/module.h>
235 #include <sys/proc.h>
236 #include <sys/lock.h>
237 #include <sys/thread.h>
238 #include <sys/thread2.h>
239 #include <sys/mpipe.h>
240
241 #include <net/bpf.h>
242 #include <net/if.h>
243 #include <net/if_dl.h>
244 #include <net/if_types.h>
245 #include <net/if_var.h>
246 #include <net/pfil.h>
247 #include <net/ifq_var.h>
248 #include <net/if_clone.h>
249
250 #include <netinet/in.h> /* for struct arpcom */
251 #include <netinet/in_systm.h>
252 #include <netinet/in_var.h>
253 #include <netinet/ip.h>
254 #include <netinet/ip_var.h>
255 #ifdef INET6
256 #include <netinet/ip6.h>
257 #include <netinet6/ip6_var.h>
258 #endif
259 #include <netinet/if_ether.h> /* for struct arpcom */
260 #include <net/bridge/if_bridgevar.h>
261 #include <net/if_llc.h>
262 #include <net/netmsg2.h>
263
264 #include <net/route.h>
265 #include <sys/in_cksum.h>
266
267 /*
268  * Size of the route hash table.  Must be a power of two.
269  */
270 #ifndef BRIDGE_RTHASH_SIZE
271 #define BRIDGE_RTHASH_SIZE              1024
272 #endif
273
274 #define BRIDGE_RTHASH_MASK              (BRIDGE_RTHASH_SIZE - 1)
275
276 /*
277  * Maximum number of addresses to cache.
278  */
279 #ifndef BRIDGE_RTABLE_MAX
280 #define BRIDGE_RTABLE_MAX               100
281 #endif
282
283 /*
284  * Spanning tree defaults.
285  */
286 #define BSTP_DEFAULT_MAX_AGE            (20 * 256)
287 #define BSTP_DEFAULT_HELLO_TIME         (2 * 256)
288 #define BSTP_DEFAULT_FORWARD_DELAY      (15 * 256)
289 #define BSTP_DEFAULT_HOLD_TIME          (1 * 256)
290 #define BSTP_DEFAULT_BRIDGE_PRIORITY    0x8000
291 #define BSTP_DEFAULT_PORT_PRIORITY      0x80
292 #define BSTP_DEFAULT_PATH_COST          55
293
294 /*
295  * Timeout (in seconds) for entries learned dynamically.
296  */
297 #ifndef BRIDGE_RTABLE_TIMEOUT
298 #define BRIDGE_RTABLE_TIMEOUT           (20 * 60)       /* same as ARP */
299 #endif
300
301 /*
302  * Number of seconds between walks of the route list.
303  */
304 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
305 #define BRIDGE_RTABLE_PRUNE_PERIOD      (5 * 60)
306 #endif
307
308 /*
309  * List of capabilities to mask on the member interface.
310  */
311 #define BRIDGE_IFCAPS_MASK              IFCAP_TXCSUM
312
313 typedef int     (*bridge_ctl_t)(struct bridge_softc *, void *);
314
315 struct netmsg_brctl {
316         struct netmsg           bc_nmsg;
317         bridge_ctl_t            bc_func;
318         struct bridge_softc     *bc_sc;
319         void                    *bc_arg;
320 };
321
322 struct netmsg_brsaddr {
323         struct netmsg           br_nmsg;
324         struct bridge_softc     *br_softc;
325         struct ifnet            *br_dst_if;
326         struct bridge_rtinfo    *br_rtinfo;
327         int                     br_setflags;
328         uint8_t                 br_dst[ETHER_ADDR_LEN];
329         uint8_t                 br_flags;
330 };
331
332 struct netmsg_braddbif {
333         struct netmsg           br_nmsg;
334         struct bridge_softc     *br_softc;
335         struct bridge_ifinfo    *br_bif_info;
336         struct ifnet            *br_bif_ifp;
337 };
338
339 struct netmsg_brdelbif {
340         struct netmsg           br_nmsg;
341         struct bridge_softc     *br_softc;
342         struct bridge_ifinfo    *br_bif_info;
343         struct bridge_iflist_head *br_bif_list;
344 };
345
346 struct netmsg_brsflags {
347         struct netmsg           br_nmsg;
348         struct bridge_softc     *br_softc;
349         struct bridge_ifinfo    *br_bif_info;
350         uint32_t                br_bif_flags;
351 };
352
353 eventhandler_tag        bridge_detach_cookie = NULL;
354
355 extern  struct mbuf *(*bridge_input_p)(struct ifnet *, struct mbuf *);
356 extern  int (*bridge_output_p)(struct ifnet *, struct mbuf *);
357 extern  void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
358
359 static int      bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
360
361 static int      bridge_clone_create(struct if_clone *, int);
362 static void     bridge_clone_destroy(struct ifnet *);
363
364 static int      bridge_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
365 static void     bridge_mutecaps(struct bridge_ifinfo *, struct ifnet *, int);
366 static void     bridge_ifdetach(void *, struct ifnet *);
367 static void     bridge_init(void *);
368 static void     bridge_stop(struct ifnet *);
369 static void     bridge_start(struct ifnet *);
370 static struct mbuf *bridge_input(struct ifnet *, struct mbuf *);
371 static int      bridge_output(struct ifnet *, struct mbuf *);
372
373 static void     bridge_forward(struct bridge_softc *, struct mbuf *m);
374
375 static void     bridge_timer_handler(struct netmsg *);
376 static void     bridge_timer(void *);
377
378 static void     bridge_start_bcast(struct bridge_softc *, struct mbuf *);
379 static void     bridge_broadcast(struct bridge_softc *, struct ifnet *,
380                     struct mbuf *);
381 static void     bridge_span(struct bridge_softc *, struct mbuf *);
382
383 static int      bridge_rtupdate(struct bridge_softc *, const uint8_t *,
384                     struct ifnet *, uint8_t);
385 static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *);
386 static void     bridge_rtreap(struct bridge_softc *);
387 static void     bridge_rttrim(struct bridge_softc *);
388 static int      bridge_rtage_finddead(struct bridge_softc *);
389 static void     bridge_rtage(struct bridge_softc *);
390 static void     bridge_rtflush(struct bridge_softc *, int);
391 static int      bridge_rtdaddr(struct bridge_softc *, const uint8_t *);
392 static int      bridge_rtsaddr(struct bridge_softc *, const uint8_t *,
393                     struct ifnet *, uint8_t);
394 static void     bridge_rtmsg_sync(struct bridge_softc *sc);
395 static void     bridge_rtreap_handler(struct netmsg *);
396 static void     bridge_rtinstall_handler(struct netmsg *);
397 static int      bridge_rtinstall_oncpu(struct bridge_softc *, const uint8_t *,
398                     struct ifnet *, int, uint8_t, struct bridge_rtinfo **);
399
400 static void     bridge_rtable_init(struct bridge_softc *);
401 static void     bridge_rtable_fini(struct bridge_softc *);
402
403 static int      bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
404 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
405                     const uint8_t *);
406 static void     bridge_rtnode_insert(struct bridge_softc *,
407                     struct bridge_rtnode *);
408 static void     bridge_rtnode_destroy(struct bridge_softc *,
409                     struct bridge_rtnode *);
410
411 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
412                     const char *name);
413 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
414                     struct ifnet *ifp);
415 static struct bridge_iflist *bridge_lookup_member_ifinfo(struct bridge_softc *,
416                     struct bridge_ifinfo *);
417 static void     bridge_delete_member(struct bridge_softc *,
418                     struct bridge_iflist *, int);
419 static void     bridge_delete_span(struct bridge_softc *,
420                     struct bridge_iflist *);
421
422 static int      bridge_control(struct bridge_softc *, u_long,
423                                bridge_ctl_t, void *);
424 static int      bridge_ioctl_init(struct bridge_softc *, void *);
425 static int      bridge_ioctl_stop(struct bridge_softc *, void *);
426 static int      bridge_ioctl_add(struct bridge_softc *, void *);
427 static int      bridge_ioctl_del(struct bridge_softc *, void *);
428 static int      bridge_ioctl_gifflags(struct bridge_softc *, void *);
429 static int      bridge_ioctl_sifflags(struct bridge_softc *, void *);
430 static int      bridge_ioctl_scache(struct bridge_softc *, void *);
431 static int      bridge_ioctl_gcache(struct bridge_softc *, void *);
432 static int      bridge_ioctl_gifs(struct bridge_softc *, void *);
433 static int      bridge_ioctl_rts(struct bridge_softc *, void *);
434 static int      bridge_ioctl_saddr(struct bridge_softc *, void *);
435 static int      bridge_ioctl_sto(struct bridge_softc *, void *);
436 static int      bridge_ioctl_gto(struct bridge_softc *, void *);
437 static int      bridge_ioctl_daddr(struct bridge_softc *, void *);
438 static int      bridge_ioctl_flush(struct bridge_softc *, void *);
439 static int      bridge_ioctl_gpri(struct bridge_softc *, void *);
440 static int      bridge_ioctl_spri(struct bridge_softc *, void *);
441 static int      bridge_ioctl_ght(struct bridge_softc *, void *);
442 static int      bridge_ioctl_sht(struct bridge_softc *, void *);
443 static int      bridge_ioctl_gfd(struct bridge_softc *, void *);
444 static int      bridge_ioctl_sfd(struct bridge_softc *, void *);
445 static int      bridge_ioctl_gma(struct bridge_softc *, void *);
446 static int      bridge_ioctl_sma(struct bridge_softc *, void *);
447 static int      bridge_ioctl_sifprio(struct bridge_softc *, void *);
448 static int      bridge_ioctl_sifcost(struct bridge_softc *, void *);
449 static int      bridge_ioctl_addspan(struct bridge_softc *, void *);
450 static int      bridge_ioctl_delspan(struct bridge_softc *, void *);
451 static int      bridge_pfil(struct mbuf **, struct ifnet *, struct ifnet *,
452                     int);
453 static int      bridge_ip_checkbasic(struct mbuf **mp);
454 #ifdef INET6
455 static int      bridge_ip6_checkbasic(struct mbuf **mp);
456 #endif /* INET6 */
457 static int      bridge_fragment(struct ifnet *, struct mbuf *,
458                     struct ether_header *, int, struct llc *);
459 static void     bridge_enqueue_handler(struct netmsg *);
460 static void     bridge_handoff(struct ifnet *, struct mbuf *);
461
462 static void     bridge_del_bif_handler(struct netmsg *);
463 static void     bridge_add_bif_handler(struct netmsg *);
464 static void     bridge_set_bifflags_handler(struct netmsg *);
465 static void     bridge_del_bif(struct bridge_softc *, struct bridge_ifinfo *,
466                     struct bridge_iflist_head *);
467 static void     bridge_add_bif(struct bridge_softc *, struct bridge_ifinfo *,
468                     struct ifnet *);
469 static void     bridge_set_bifflags(struct bridge_softc *,
470                     struct bridge_ifinfo *, uint32_t);
471
472 SYSCTL_DECL(_net_link);
473 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW, 0, "Bridge");
474
475 static int pfil_onlyip = 1; /* only pass IP[46] packets when pfil is enabled */
476 static int pfil_bridge = 1; /* run pfil hooks on the bridge interface */
477 static int pfil_member = 1; /* run pfil hooks on the member interface */
478 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip, CTLFLAG_RW,
479     &pfil_onlyip, 0, "Only pass IP packets when pfil is enabled");
480 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge, CTLFLAG_RW,
481     &pfil_bridge, 0, "Packet filter on the bridge interface");
482 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member, CTLFLAG_RW,
483     &pfil_member, 0, "Packet filter on the member interface");
484
485 struct bridge_control_arg {
486         union {
487                 struct ifbreq ifbreq;
488                 struct ifbifconf ifbifconf;
489                 struct ifbareq ifbareq;
490                 struct ifbaconf ifbaconf;
491                 struct ifbrparam ifbrparam;
492         } bca_u;
493         int     bca_len;
494         void    *bca_uptr;
495         void    *bca_kptr;
496 };
497
498 struct bridge_control {
499         bridge_ctl_t    bc_func;
500         int             bc_argsize;
501         int             bc_flags;
502 };
503
504 #define BC_F_COPYIN             0x01    /* copy arguments in */
505 #define BC_F_COPYOUT            0x02    /* copy arguments out */
506 #define BC_F_SUSER              0x04    /* do super-user check */
507
508 const struct bridge_control bridge_control_table[] = {
509         { bridge_ioctl_add,             sizeof(struct ifbreq),
510           BC_F_COPYIN|BC_F_SUSER },
511         { bridge_ioctl_del,             sizeof(struct ifbreq),
512           BC_F_COPYIN|BC_F_SUSER },
513
514         { bridge_ioctl_gifflags,        sizeof(struct ifbreq),
515           BC_F_COPYIN|BC_F_COPYOUT },
516         { bridge_ioctl_sifflags,        sizeof(struct ifbreq),
517           BC_F_COPYIN|BC_F_SUSER },
518
519         { bridge_ioctl_scache,          sizeof(struct ifbrparam),
520           BC_F_COPYIN|BC_F_SUSER },
521         { bridge_ioctl_gcache,          sizeof(struct ifbrparam),
522           BC_F_COPYOUT },
523
524         { bridge_ioctl_gifs,            sizeof(struct ifbifconf),
525           BC_F_COPYIN|BC_F_COPYOUT },
526         { bridge_ioctl_rts,             sizeof(struct ifbaconf),
527           BC_F_COPYIN|BC_F_COPYOUT },
528
529         { bridge_ioctl_saddr,           sizeof(struct ifbareq),
530           BC_F_COPYIN|BC_F_SUSER },
531
532         { bridge_ioctl_sto,             sizeof(struct ifbrparam),
533           BC_F_COPYIN|BC_F_SUSER },
534         { bridge_ioctl_gto,             sizeof(struct ifbrparam),
535           BC_F_COPYOUT },
536
537         { bridge_ioctl_daddr,           sizeof(struct ifbareq),
538           BC_F_COPYIN|BC_F_SUSER },
539
540         { bridge_ioctl_flush,           sizeof(struct ifbreq),
541           BC_F_COPYIN|BC_F_SUSER },
542
543         { bridge_ioctl_gpri,            sizeof(struct ifbrparam),
544           BC_F_COPYOUT },
545         { bridge_ioctl_spri,            sizeof(struct ifbrparam),
546           BC_F_COPYIN|BC_F_SUSER },
547
548         { bridge_ioctl_ght,             sizeof(struct ifbrparam),
549           BC_F_COPYOUT },
550         { bridge_ioctl_sht,             sizeof(struct ifbrparam),
551           BC_F_COPYIN|BC_F_SUSER },
552
553         { bridge_ioctl_gfd,             sizeof(struct ifbrparam),
554           BC_F_COPYOUT },
555         { bridge_ioctl_sfd,             sizeof(struct ifbrparam),
556           BC_F_COPYIN|BC_F_SUSER },
557
558         { bridge_ioctl_gma,             sizeof(struct ifbrparam),
559           BC_F_COPYOUT },
560         { bridge_ioctl_sma,             sizeof(struct ifbrparam),
561           BC_F_COPYIN|BC_F_SUSER },
562
563         { bridge_ioctl_sifprio,         sizeof(struct ifbreq),
564           BC_F_COPYIN|BC_F_SUSER },
565
566         { bridge_ioctl_sifcost,         sizeof(struct ifbreq),
567           BC_F_COPYIN|BC_F_SUSER },
568
569         { bridge_ioctl_addspan,         sizeof(struct ifbreq),
570           BC_F_COPYIN|BC_F_SUSER },
571         { bridge_ioctl_delspan,         sizeof(struct ifbreq),
572           BC_F_COPYIN|BC_F_SUSER },
573 };
574 static const int bridge_control_table_size =
575     sizeof(bridge_control_table) / sizeof(bridge_control_table[0]);
576
577 LIST_HEAD(, bridge_softc) bridge_list;
578
579 struct if_clone bridge_cloner = IF_CLONE_INITIALIZER("bridge",
580                                 bridge_clone_create,
581                                 bridge_clone_destroy, 0, IF_MAXUNIT);
582
583 static int
584 bridge_modevent(module_t mod, int type, void *data)
585 {
586         switch (type) {
587         case MOD_LOAD:
588                 LIST_INIT(&bridge_list);
589                 if_clone_attach(&bridge_cloner);
590                 bridge_input_p = bridge_input;
591                 bridge_output_p = bridge_output;
592                 bridge_detach_cookie = EVENTHANDLER_REGISTER(
593                     ifnet_detach_event, bridge_ifdetach, NULL,
594                     EVENTHANDLER_PRI_ANY);
595 #if notyet
596                 bstp_linkstate_p = bstp_linkstate;
597 #endif
598                 break;
599         case MOD_UNLOAD:
600                 if (!LIST_EMPTY(&bridge_list))
601                         return (EBUSY);
602                 EVENTHANDLER_DEREGISTER(ifnet_detach_event,
603                     bridge_detach_cookie);
604                 if_clone_detach(&bridge_cloner);
605                 bridge_input_p = NULL;
606                 bridge_output_p = NULL;
607 #if notyet
608                 bstp_linkstate_p = NULL;
609 #endif
610                 break;
611         default:
612                 return (EOPNOTSUPP);
613         }
614         return (0);
615 }
616
617 static moduledata_t bridge_mod = {
618         "if_bridge",
619         bridge_modevent,
620         0
621 };
622
623 DECLARE_MODULE(if_bridge, bridge_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
624
625
626 /*
627  * bridge_clone_create:
628  *
629  *      Create a new bridge instance.
630  */
631 static int
632 bridge_clone_create(struct if_clone *ifc, int unit)
633 {
634         struct bridge_softc *sc;
635         struct ifnet *ifp;
636         u_char eaddr[6];
637         int cpu, rnd;
638
639         sc = kmalloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
640         ifp = sc->sc_ifp = &sc->sc_if;
641
642         sc->sc_brtmax = BRIDGE_RTABLE_MAX;
643         sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
644         sc->sc_bridge_max_age = BSTP_DEFAULT_MAX_AGE;
645         sc->sc_bridge_hello_time = BSTP_DEFAULT_HELLO_TIME;
646         sc->sc_bridge_forward_delay = BSTP_DEFAULT_FORWARD_DELAY;
647         sc->sc_bridge_priority = BSTP_DEFAULT_BRIDGE_PRIORITY;
648         sc->sc_hold_time = BSTP_DEFAULT_HOLD_TIME;
649
650         /* Initialize our routing table. */
651         bridge_rtable_init(sc);
652
653         callout_init(&sc->sc_brcallout);
654         netmsg_init(&sc->sc_brtimemsg, &netisr_adone_rport,
655                     MSGF_DROPABLE, bridge_timer_handler);
656         sc->sc_brtimemsg.nm_lmsg.u.ms_resultp = sc;
657
658         callout_init(&sc->sc_bstpcallout);
659         netmsg_init(&sc->sc_bstptimemsg, &netisr_adone_rport,
660                     MSGF_DROPABLE, bstp_tick_handler);
661         sc->sc_bstptimemsg.nm_lmsg.u.ms_resultp = sc;
662
663         /* Initialize per-cpu member iface lists */
664         sc->sc_iflists = kmalloc(sizeof(*sc->sc_iflists) * ncpus,
665                                  M_DEVBUF, M_WAITOK);
666         for (cpu = 0; cpu < ncpus; ++cpu)
667                 LIST_INIT(&sc->sc_iflists[cpu]);
668
669         LIST_INIT(&sc->sc_spanlist);
670
671         ifp->if_softc = sc;
672         if_initname(ifp, ifc->ifc_name, unit);
673         ifp->if_mtu = ETHERMTU;
674         ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST;
675         ifp->if_ioctl = bridge_ioctl;
676         ifp->if_start = bridge_start;
677         ifp->if_init = bridge_init;
678         ifp->if_type = IFT_BRIDGE;
679         ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
680         ifp->if_snd.ifq_maxlen = ifqmaxlen;
681         ifq_set_ready(&ifp->if_snd);
682         ifp->if_hdrlen = ETHER_HDR_LEN;
683
684         /*
685          * Generate a random ethernet address and use the private AC:DE:48
686          * OUI code.
687          */
688         rnd = karc4random();
689         bcopy(&rnd, &eaddr[0], 4); /* ETHER_ADDR_LEN == 6 */
690         rnd = karc4random();
691         bcopy(&rnd, &eaddr[2], 4); /* ETHER_ADDR_LEN == 6 */
692
693         eaddr[0] &= ~1; /* clear multicast bit */
694         eaddr[0] |= 2;  /* set the LAA bit */
695
696         ether_ifattach(ifp, eaddr, NULL);
697         /* Now undo some of the damage... */
698         ifp->if_baudrate = 0;
699         ifp->if_type = IFT_BRIDGE;
700
701         crit_enter();   /* XXX MP */
702         LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
703         crit_exit();
704
705         return (0);
706 }
707
708 static void
709 bridge_delete_dispatch(struct netmsg *nmsg)
710 {
711         struct lwkt_msg *lmsg = &nmsg->nm_lmsg;
712         struct bridge_softc *sc = lmsg->u.ms_resultp;
713         struct ifnet *bifp = sc->sc_ifp;
714         struct bridge_iflist *bif;
715
716         lwkt_serialize_enter(bifp->if_serializer);
717
718         while ((bif = LIST_FIRST(&sc->sc_iflists[mycpuid])) != NULL)
719                 bridge_delete_member(sc, bif, 0);
720
721         while ((bif = LIST_FIRST(&sc->sc_spanlist)) != NULL)
722                 bridge_delete_span(sc, bif);
723
724         lwkt_serialize_exit(bifp->if_serializer);
725
726         lwkt_replymsg(lmsg, 0);
727 }
728
729 /*
730  * bridge_clone_destroy:
731  *
732  *      Destroy a bridge instance.
733  */
734 static void
735 bridge_clone_destroy(struct ifnet *ifp)
736 {
737         struct bridge_softc *sc = ifp->if_softc;
738         struct lwkt_msg *lmsg;
739         struct netmsg nmsg;
740
741         lwkt_serialize_enter(ifp->if_serializer);
742
743         bridge_stop(ifp);
744         ifp->if_flags &= ~IFF_UP;
745
746         lwkt_serialize_exit(ifp->if_serializer);
747
748         netmsg_init(&nmsg, &curthread->td_msgport, 0, bridge_delete_dispatch);
749         lmsg = &nmsg.nm_lmsg;
750         lmsg->u.ms_resultp = sc;
751         lwkt_domsg(BRIDGE_CFGPORT, lmsg, 0);
752
753         crit_enter();   /* XXX MP */
754         LIST_REMOVE(sc, sc_list);
755         crit_exit();
756
757         ether_ifdetach(ifp);
758
759         /* Tear down the routing table. */
760         bridge_rtable_fini(sc);
761
762         /* Free per-cpu member iface lists */
763         kfree(sc->sc_iflists, M_DEVBUF);
764
765         kfree(sc, M_DEVBUF);
766 }
767
768 /*
769  * bridge_ioctl:
770  *
771  *      Handle a control request from the operator.
772  */
773 static int
774 bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
775 {
776         struct bridge_softc *sc = ifp->if_softc;
777         struct bridge_control_arg args;
778         struct ifdrv *ifd = (struct ifdrv *) data;
779         const struct bridge_control *bc;
780         int error = 0;
781
782         ASSERT_SERIALIZED(ifp->if_serializer);
783
784         switch (cmd) {
785         case SIOCADDMULTI:
786         case SIOCDELMULTI:
787                 break;
788
789         case SIOCGDRVSPEC:
790         case SIOCSDRVSPEC:
791                 if (ifd->ifd_cmd >= bridge_control_table_size) {
792                         error = EINVAL;
793                         break;
794                 }
795                 bc = &bridge_control_table[ifd->ifd_cmd];
796
797                 if (cmd == SIOCGDRVSPEC &&
798                     (bc->bc_flags & BC_F_COPYOUT) == 0) {
799                         error = EINVAL;
800                         break;
801                 } else if (cmd == SIOCSDRVSPEC &&
802                            (bc->bc_flags & BC_F_COPYOUT)) {
803                         error = EINVAL;
804                         break;
805                 }
806
807                 if (bc->bc_flags & BC_F_SUSER) {
808                         error = suser_cred(cr, NULL_CRED_OKAY);
809                         if (error)
810                                 break;
811                 }
812
813                 if (ifd->ifd_len != bc->bc_argsize ||
814                     ifd->ifd_len > sizeof(args.bca_u)) {
815                         error = EINVAL;
816                         break;
817                 }
818
819                 memset(&args, 0, sizeof(args));
820                 if (bc->bc_flags & BC_F_COPYIN) {
821                         error = copyin(ifd->ifd_data, &args.bca_u,
822                                        ifd->ifd_len);
823                         if (error)
824                                 break;
825                 }
826
827                 error = bridge_control(sc, cmd, bc->bc_func, &args);
828                 if (error) {
829                         KKASSERT(args.bca_len == 0 && args.bca_kptr == NULL);
830                         break;
831                 }
832
833                 if (bc->bc_flags & BC_F_COPYOUT) {
834                         error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
835                         if (args.bca_len != 0) {
836                                 KKASSERT(args.bca_kptr != NULL);
837                                 if (!error) {
838                                         error = copyout(args.bca_kptr,
839                                                 args.bca_uptr, args.bca_len);
840                                 }
841                                 kfree(args.bca_kptr, M_TEMP);
842                         } else {
843                                 KKASSERT(args.bca_kptr == NULL);
844                         }
845                 } else {
846                         KKASSERT(args.bca_len == 0 && args.bca_kptr == NULL);
847                 }
848                 break;
849
850         case SIOCSIFFLAGS:
851                 if (!(ifp->if_flags & IFF_UP) &&
852                     (ifp->if_flags & IFF_RUNNING)) {
853                         /*
854                          * If interface is marked down and it is running,
855                          * then stop it.
856                          */
857                         bridge_stop(ifp);
858                 } else if ((ifp->if_flags & IFF_UP) &&
859                     !(ifp->if_flags & IFF_RUNNING)) {
860                         /*
861                          * If interface is marked up and it is stopped, then
862                          * start it.
863                          */
864                         ifp->if_init(sc);
865                 }
866                 break;
867
868         case SIOCSIFMTU:
869                 /* Do not allow the MTU to be changed on the bridge */
870                 error = EINVAL;
871                 break;
872
873         default:
874                 error = ether_ioctl(ifp, cmd, data);
875                 break;
876         }
877         return (error);
878 }
879
880 /*
881  * bridge_mutecaps:
882  *
883  *      Clear or restore unwanted capabilities on the member interface
884  */
885 static void
886 bridge_mutecaps(struct bridge_ifinfo *bif_info, struct ifnet *ifp, int mute)
887 {
888         struct ifreq ifr;
889         int error;
890
891         if (ifp->if_ioctl == NULL)
892                 return;
893
894         bzero(&ifr, sizeof(ifr));
895         ifr.ifr_reqcap = ifp->if_capenable;
896
897         if (mute) {
898                 /* mask off and save capabilities */
899                 bif_info->bifi_mutecap = ifr.ifr_reqcap & BRIDGE_IFCAPS_MASK;
900                 if (bif_info->bifi_mutecap != 0)
901                         ifr.ifr_reqcap &= ~BRIDGE_IFCAPS_MASK;
902         } else {
903                 /* restore muted capabilities */
904                 ifr.ifr_reqcap |= bif_info->bifi_mutecap;
905         }
906
907         if (bif_info->bifi_mutecap != 0) {
908                 lwkt_serialize_enter(ifp->if_serializer);
909                 error = ifp->if_ioctl(ifp, SIOCSIFCAP, (caddr_t)&ifr, NULL);
910                 lwkt_serialize_exit(ifp->if_serializer);
911         }
912 }
913
914 /*
915  * bridge_lookup_member:
916  *
917  *      Lookup a bridge member interface.
918  */
919 static struct bridge_iflist *
920 bridge_lookup_member(struct bridge_softc *sc, const char *name)
921 {
922         struct bridge_iflist *bif;
923
924         LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
925                 if (strcmp(bif->bif_ifp->if_xname, name) == 0)
926                         return (bif);
927         }
928         return (NULL);
929 }
930
931 /*
932  * bridge_lookup_member_if:
933  *
934  *      Lookup a bridge member interface by ifnet*.
935  */
936 static struct bridge_iflist *
937 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
938 {
939         struct bridge_iflist *bif;
940
941         LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
942                 if (bif->bif_ifp == member_ifp)
943                         return (bif);
944         }
945         return (NULL);
946 }
947
948 /*
949  * bridge_lookup_member_ifinfo:
950  *
951  *      Lookup a bridge member interface by bridge_ifinfo.
952  */
953 static struct bridge_iflist *
954 bridge_lookup_member_ifinfo(struct bridge_softc *sc,
955                             struct bridge_ifinfo *bif_info)
956 {
957         struct bridge_iflist *bif;
958
959         LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
960                 if (bif->bif_info == bif_info)
961                         return (bif);
962         }
963         return (NULL);
964 }
965
966 /*
967  * bridge_delete_member:
968  *
969  *      Delete the specified member interface.
970  */
971 static void
972 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
973     int gone)
974 {
975         struct ifnet *ifs = bif->bif_ifp;
976         struct ifnet *bifp = sc->sc_ifp;
977         struct bridge_ifinfo *bif_info = bif->bif_info;
978         struct bridge_iflist_head saved_bifs;
979
980         ASSERT_SERIALIZED(bifp->if_serializer);
981         KKASSERT(bif_info != NULL);
982
983         ifs->if_bridge = NULL;
984
985         /*
986          * Release bridge interface's serializer:
987          * - To avoid possible dead lock.
988          * - Various sync operation will block the current thread.
989          */
990         lwkt_serialize_exit(bifp->if_serializer);
991
992         if (!gone) {
993                 switch (ifs->if_type) {
994                 case IFT_ETHER:
995                 case IFT_L2VLAN:
996                         /*
997                          * Take the interface out of promiscuous mode.
998                          */
999                         ifpromisc(ifs, 0);
1000                         bridge_mutecaps(bif_info, ifs, 0);
1001                         break;
1002
1003                 case IFT_GIF:
1004                         break;
1005
1006                 default:
1007                         panic("bridge_delete_member: impossible");
1008                         break;
1009                 }
1010         }
1011
1012         /*
1013          * Remove bifs from percpu linked list.
1014          *
1015          * Removed bifs are not freed immediately, instead,
1016          * they are saved in saved_bifs.  They will be freed
1017          * after we make sure that no one is accessing them,
1018          * i.e. after following netmsg_service_sync()
1019          */
1020         LIST_INIT(&saved_bifs);
1021         bridge_del_bif(sc, bif_info, &saved_bifs);
1022
1023         /*
1024          * Make sure that all protocol threads:
1025          * o  see 'ifs' if_bridge is changed
1026          * o  know that bif is removed from the percpu linked list
1027          */
1028         netmsg_service_sync();
1029
1030         /*
1031          * Free the removed bifs
1032          */
1033         KKASSERT(!LIST_EMPTY(&saved_bifs));
1034         while ((bif = LIST_FIRST(&saved_bifs)) != NULL) {
1035                 LIST_REMOVE(bif, bif_next);
1036                 kfree(bif, M_DEVBUF);
1037         }
1038
1039         /* See the comment in bridge_ioctl_stop() */
1040         bridge_rtmsg_sync(sc);
1041         bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
1042
1043         lwkt_serialize_enter(bifp->if_serializer);
1044
1045         if (bifp->if_flags & IFF_RUNNING)
1046                 bstp_initialization(sc);
1047
1048         /*
1049          * Free the bif_info after bstp_initialization(), so that
1050          * bridge_softc.sc_root_port will not reference a dangling
1051          * pointer.
1052          */
1053         kfree(bif_info, M_DEVBUF);
1054 }
1055
1056 /*
1057  * bridge_delete_span:
1058  *
1059  *      Delete the specified span interface.
1060  */
1061 static void
1062 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
1063 {
1064         KASSERT(bif->bif_ifp->if_bridge == NULL,
1065             ("%s: not a span interface", __func__));
1066
1067         LIST_REMOVE(bif, bif_next);
1068         kfree(bif, M_DEVBUF);
1069 }
1070
1071 static int
1072 bridge_ioctl_init(struct bridge_softc *sc, void *arg __unused)
1073 {
1074         struct ifnet *ifp = sc->sc_ifp;
1075
1076         if (ifp->if_flags & IFF_RUNNING)
1077                 return 0;
1078
1079         callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz,
1080             bridge_timer, sc);
1081
1082         ifp->if_flags |= IFF_RUNNING;
1083         bstp_initialization(sc);
1084         return 0;
1085 }
1086
1087 static int
1088 bridge_ioctl_stop(struct bridge_softc *sc, void *arg __unused)
1089 {
1090         struct ifnet *ifp = sc->sc_ifp;
1091         struct lwkt_msg *lmsg;
1092
1093         if ((ifp->if_flags & IFF_RUNNING) == 0)
1094                 return 0;
1095
1096         callout_stop(&sc->sc_brcallout);
1097
1098         crit_enter();
1099         lmsg = &sc->sc_brtimemsg.nm_lmsg;
1100         if ((lmsg->ms_flags & MSGF_DONE) == 0) {
1101                 /* Pending to be processed; drop it */
1102                 lwkt_dropmsg(lmsg);
1103         }
1104         crit_exit();
1105
1106         bstp_stop(sc);
1107
1108         ifp->if_flags &= ~IFF_RUNNING;
1109
1110         lwkt_serialize_exit(ifp->if_serializer);
1111
1112         /* Let everyone know that we are stopped */
1113         netmsg_service_sync();
1114
1115         /*
1116          * Sync ifnetX msgports in the order we forward rtnode
1117          * installation message.  This is used to make sure that
1118          * all rtnode installation messages sent by bridge_rtupdate()
1119          * during above netmsg_service_sync() are flushed.
1120          */
1121         bridge_rtmsg_sync(sc);
1122         bridge_rtflush(sc, IFBF_FLUSHDYN);
1123
1124         lwkt_serialize_enter(ifp->if_serializer);
1125         return 0;
1126 }
1127
1128 static int
1129 bridge_ioctl_add(struct bridge_softc *sc, void *arg)
1130 {
1131         struct ifbreq *req = arg;
1132         struct bridge_iflist *bif;
1133         struct bridge_ifinfo *bif_info;
1134         struct ifnet *ifs, *bifp;
1135         int error = 0;
1136
1137         bifp = sc->sc_ifp;
1138         ASSERT_SERIALIZED(bifp->if_serializer);
1139
1140         ifs = ifunit(req->ifbr_ifsname);
1141         if (ifs == NULL)
1142                 return (ENOENT);
1143
1144         /* If it's in the span list, it can't be a member. */
1145         LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
1146                 if (ifs == bif->bif_ifp)
1147                         return (EBUSY);
1148
1149         /* Allow the first Ethernet member to define the MTU */
1150         if (ifs->if_type != IFT_GIF) {
1151                 if (LIST_EMPTY(&sc->sc_iflists[mycpuid])) {
1152                         bifp->if_mtu = ifs->if_mtu;
1153                 } else if (bifp->if_mtu != ifs->if_mtu) {
1154                         if_printf(bifp, "invalid MTU for %s\n", ifs->if_xname);
1155                         return (EINVAL);
1156                 }
1157         }
1158
1159         if (ifs->if_bridge == sc)
1160                 return (EEXIST);
1161
1162         if (ifs->if_bridge != NULL)
1163                 return (EBUSY);
1164
1165         bif_info = kmalloc(sizeof(*bif_info), M_DEVBUF, M_WAITOK | M_ZERO);
1166         bif_info->bifi_priority = BSTP_DEFAULT_PORT_PRIORITY;
1167         bif_info->bifi_path_cost = BSTP_DEFAULT_PATH_COST;
1168         bif_info->bifi_ifp = ifs;
1169
1170         /*
1171          * Release bridge interface's serializer:
1172          * - To avoid possible dead lock.
1173          * - Various sync operation will block the current thread.
1174          */
1175         lwkt_serialize_exit(bifp->if_serializer);
1176
1177         switch (ifs->if_type) {
1178         case IFT_ETHER:
1179         case IFT_L2VLAN:
1180                 /*
1181                  * Place the interface into promiscuous mode.
1182                  */
1183                 error = ifpromisc(ifs, 1);
1184                 if (error) {
1185                         lwkt_serialize_enter(bifp->if_serializer);
1186                         goto out;
1187                 }
1188                 bridge_mutecaps(bif_info, ifs, 1);
1189                 break;
1190
1191         case IFT_GIF: /* :^) */
1192                 break;
1193
1194         default:
1195                 error = EINVAL;
1196                 lwkt_serialize_enter(bifp->if_serializer);
1197                 goto out;
1198         }
1199
1200         /*
1201          * Add bifs to percpu linked lists
1202          */
1203         bridge_add_bif(sc, bif_info, ifs);
1204
1205         lwkt_serialize_enter(bifp->if_serializer);
1206
1207         if (bifp->if_flags & IFF_RUNNING)
1208                 bstp_initialization(sc);
1209         else
1210                 bstp_stop(sc);
1211
1212         /*
1213          * Everything has been setup, so let the member interface
1214          * deliver packets to this bridge on its input/output path.
1215          */
1216         ifs->if_bridge = sc;
1217 out:
1218         if (error) {
1219                 if (bif_info != NULL)
1220                         kfree(bif_info, M_DEVBUF);
1221         }
1222         return (error);
1223 }
1224
1225 static int
1226 bridge_ioctl_del(struct bridge_softc *sc, void *arg)
1227 {
1228         struct ifbreq *req = arg;
1229         struct bridge_iflist *bif;
1230
1231         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1232         if (bif == NULL)
1233                 return (ENOENT);
1234
1235         bridge_delete_member(sc, bif, 0);
1236
1237         return (0);
1238 }
1239
1240 static int
1241 bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
1242 {
1243         struct ifbreq *req = arg;
1244         struct bridge_iflist *bif;
1245
1246         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1247         if (bif == NULL)
1248                 return (ENOENT);
1249
1250         req->ifbr_ifsflags = bif->bif_flags;
1251         req->ifbr_state = bif->bif_state;
1252         req->ifbr_priority = bif->bif_priority;
1253         req->ifbr_path_cost = bif->bif_path_cost;
1254         req->ifbr_portno = bif->bif_ifp->if_index & 0xff;
1255
1256         return (0);
1257 }
1258
1259 static int
1260 bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
1261 {
1262         struct ifbreq *req = arg;
1263         struct bridge_iflist *bif;
1264         struct ifnet *bifp = sc->sc_ifp;
1265
1266         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1267         if (bif == NULL)
1268                 return (ENOENT);
1269
1270         if (req->ifbr_ifsflags & IFBIF_SPAN) {
1271                 /* SPAN is readonly */
1272                 return (EINVAL);
1273         }
1274
1275         if (req->ifbr_ifsflags & IFBIF_STP) {
1276                 switch (bif->bif_ifp->if_type) {
1277                 case IFT_ETHER:
1278                         /* These can do spanning tree. */
1279                         break;
1280
1281                 default:
1282                         /* Nothing else can. */
1283                         return (EINVAL);
1284                 }
1285         }
1286
1287         lwkt_serialize_exit(bifp->if_serializer);
1288         bridge_set_bifflags(sc, bif->bif_info, req->ifbr_ifsflags);
1289         lwkt_serialize_enter(bifp->if_serializer);
1290
1291         if (bifp->if_flags & IFF_RUNNING)
1292                 bstp_initialization(sc);
1293
1294         return (0);
1295 }
1296
1297 static int
1298 bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
1299 {
1300         struct ifbrparam *param = arg;
1301         struct ifnet *ifp = sc->sc_ifp;
1302
1303         sc->sc_brtmax = param->ifbrp_csize;
1304
1305         lwkt_serialize_exit(ifp->if_serializer);
1306         bridge_rttrim(sc);
1307         lwkt_serialize_enter(ifp->if_serializer);
1308
1309         return (0);
1310 }
1311
1312 static int
1313 bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
1314 {
1315         struct ifbrparam *param = arg;
1316
1317         param->ifbrp_csize = sc->sc_brtmax;
1318
1319         return (0);
1320 }
1321
1322 static int
1323 bridge_ioctl_gifs(struct bridge_softc *sc, void *arg)
1324 {
1325         struct bridge_control_arg *bc_arg = arg;
1326         struct ifbifconf *bifc = arg;
1327         struct bridge_iflist *bif;
1328         struct ifbreq *breq;
1329         int count, len;
1330
1331         count = 0;
1332         LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next)
1333                 count++;
1334         LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
1335                 count++;
1336
1337         if (bifc->ifbic_len == 0) {
1338                 bifc->ifbic_len = sizeof(*breq) * count;
1339                 return 0;
1340         } else if (count == 0 || bifc->ifbic_len < sizeof(*breq)) {
1341                 bifc->ifbic_len = 0;
1342                 return 0;
1343         }
1344
1345         len = min(bifc->ifbic_len, sizeof(*breq) * count);
1346         KKASSERT(len >= sizeof(*breq));
1347
1348         breq = kmalloc(len, M_TEMP, M_INTWAIT | M_NULLOK | M_ZERO);
1349         if (breq == NULL) {
1350                 bifc->ifbic_len = 0;
1351                 return ENOMEM;
1352         }
1353         bc_arg->bca_kptr = breq;
1354
1355         count = 0;
1356         LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
1357                 if (len < sizeof(*breq))
1358                         break;
1359
1360                 strlcpy(breq->ifbr_ifsname, bif->bif_ifp->if_xname,
1361                         sizeof(breq->ifbr_ifsname));
1362                 breq->ifbr_ifsflags = bif->bif_flags;
1363                 breq->ifbr_state = bif->bif_state;
1364                 breq->ifbr_priority = bif->bif_priority;
1365                 breq->ifbr_path_cost = bif->bif_path_cost;
1366                 breq->ifbr_portno = bif->bif_ifp->if_index & 0xff;
1367                 breq++;
1368                 count++;
1369                 len -= sizeof(*breq);
1370         }
1371         LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) {
1372                 if (len < sizeof(*breq))
1373                         break;
1374
1375                 strlcpy(breq->ifbr_ifsname, bif->bif_ifp->if_xname,
1376                         sizeof(breq->ifbr_ifsname));
1377                 breq->ifbr_ifsflags = bif->bif_flags;
1378                 breq->ifbr_portno = bif->bif_ifp->if_index & 0xff;
1379                 breq++;
1380                 count++;
1381                 len -= sizeof(*breq);
1382         }
1383
1384         bifc->ifbic_len = sizeof(*breq) * count;
1385         KKASSERT(bifc->ifbic_len > 0);
1386
1387         bc_arg->bca_len = bifc->ifbic_len;
1388         bc_arg->bca_uptr = bifc->ifbic_req;
1389         return 0;
1390 }
1391
1392 static int
1393 bridge_ioctl_rts(struct bridge_softc *sc, void *arg)
1394 {
1395         struct bridge_control_arg *bc_arg = arg;
1396         struct ifbaconf *bac = arg;
1397         struct bridge_rtnode *brt;
1398         struct ifbareq *bareq;
1399         int count, len;
1400
1401         count = 0;
1402         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list)
1403                 count++;
1404
1405         if (bac->ifbac_len == 0) {
1406                 bac->ifbac_len = sizeof(*bareq) * count;
1407                 return 0;
1408         } else if (count == 0 || bac->ifbac_len < sizeof(*bareq)) {
1409                 bac->ifbac_len = 0;
1410                 return 0;
1411         }
1412
1413         len = min(bac->ifbac_len, sizeof(*bareq) * count);
1414         KKASSERT(len >= sizeof(*bareq));
1415
1416         bareq = kmalloc(len, M_TEMP, M_INTWAIT | M_NULLOK | M_ZERO);
1417         if (bareq == NULL) {
1418                 bac->ifbac_len = 0;
1419                 return ENOMEM;
1420         }
1421         bc_arg->bca_kptr = bareq;
1422
1423         count = 0;
1424         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
1425                 struct bridge_rtinfo *bri = brt->brt_info;
1426                 unsigned long expire;
1427
1428                 if (len < sizeof(*bareq))
1429                         break;
1430
1431                 strlcpy(bareq->ifba_ifsname, bri->bri_ifp->if_xname,
1432                         sizeof(bareq->ifba_ifsname));
1433                 memcpy(bareq->ifba_dst, brt->brt_addr, sizeof(brt->brt_addr));
1434                 expire = bri->bri_expire;
1435                 if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
1436                     time_second < expire)
1437                         bareq->ifba_expire = expire - time_second;
1438                 else
1439                         bareq->ifba_expire = 0;
1440                 bareq->ifba_flags = bri->bri_flags;
1441                 bareq++;
1442                 count++;
1443                 len -= sizeof(*bareq);
1444         }
1445
1446         bac->ifbac_len = sizeof(*bareq) * count;
1447         KKASSERT(bac->ifbac_len > 0);
1448
1449         bc_arg->bca_len = bac->ifbac_len;
1450         bc_arg->bca_uptr = bac->ifbac_req;
1451         return 0;
1452 }
1453
1454 static int
1455 bridge_ioctl_saddr(struct bridge_softc *sc, void *arg)
1456 {
1457         struct ifbareq *req = arg;
1458         struct bridge_iflist *bif;
1459         struct ifnet *ifp = sc->sc_ifp;
1460         int error;
1461
1462         ASSERT_SERIALIZED(ifp->if_serializer);
1463
1464         bif = bridge_lookup_member(sc, req->ifba_ifsname);
1465         if (bif == NULL)
1466                 return (ENOENT);
1467
1468         lwkt_serialize_exit(ifp->if_serializer);
1469         error = bridge_rtsaddr(sc, req->ifba_dst, bif->bif_ifp,
1470                                req->ifba_flags);
1471         lwkt_serialize_enter(ifp->if_serializer);
1472         return (error);
1473 }
1474
1475 static int
1476 bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
1477 {
1478         struct ifbrparam *param = arg;
1479
1480         sc->sc_brttimeout = param->ifbrp_ctime;
1481
1482         return (0);
1483 }
1484
1485 static int
1486 bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
1487 {
1488         struct ifbrparam *param = arg;
1489
1490         param->ifbrp_ctime = sc->sc_brttimeout;
1491
1492         return (0);
1493 }
1494
1495 static int
1496 bridge_ioctl_daddr(struct bridge_softc *sc, void *arg)
1497 {
1498         struct ifbareq *req = arg;
1499         struct ifnet *ifp = sc->sc_ifp;
1500         int error;
1501
1502         lwkt_serialize_exit(ifp->if_serializer);
1503         error = bridge_rtdaddr(sc, req->ifba_dst);
1504         lwkt_serialize_enter(ifp->if_serializer);
1505         return error;
1506 }
1507
1508 static int
1509 bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
1510 {
1511         struct ifbreq *req = arg;
1512         struct ifnet *ifp = sc->sc_ifp;
1513
1514         lwkt_serialize_exit(ifp->if_serializer);
1515         bridge_rtflush(sc, req->ifbr_ifsflags);
1516         lwkt_serialize_enter(ifp->if_serializer);
1517
1518         return (0);
1519 }
1520
1521 static int
1522 bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
1523 {
1524         struct ifbrparam *param = arg;
1525
1526         param->ifbrp_prio = sc->sc_bridge_priority;
1527
1528         return (0);
1529 }
1530
1531 static int
1532 bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
1533 {
1534         struct ifbrparam *param = arg;
1535
1536         sc->sc_bridge_priority = param->ifbrp_prio;
1537
1538         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1539                 bstp_initialization(sc);
1540
1541         return (0);
1542 }
1543
1544 static int
1545 bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
1546 {
1547         struct ifbrparam *param = arg;
1548
1549         param->ifbrp_hellotime = sc->sc_bridge_hello_time >> 8;
1550
1551         return (0);
1552 }
1553
1554 static int
1555 bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
1556 {
1557         struct ifbrparam *param = arg;
1558
1559         if (param->ifbrp_hellotime == 0)
1560                 return (EINVAL);
1561         sc->sc_bridge_hello_time = param->ifbrp_hellotime << 8;
1562
1563         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1564                 bstp_initialization(sc);
1565
1566         return (0);
1567 }
1568
1569 static int
1570 bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
1571 {
1572         struct ifbrparam *param = arg;
1573
1574         param->ifbrp_fwddelay = sc->sc_bridge_forward_delay >> 8;
1575
1576         return (0);
1577 }
1578
1579 static int
1580 bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
1581 {
1582         struct ifbrparam *param = arg;
1583
1584         if (param->ifbrp_fwddelay == 0)
1585                 return (EINVAL);
1586         sc->sc_bridge_forward_delay = param->ifbrp_fwddelay << 8;
1587
1588         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1589                 bstp_initialization(sc);
1590
1591         return (0);
1592 }
1593
1594 static int
1595 bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
1596 {
1597         struct ifbrparam *param = arg;
1598
1599         param->ifbrp_maxage = sc->sc_bridge_max_age >> 8;
1600
1601         return (0);
1602 }
1603
1604 static int
1605 bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
1606 {
1607         struct ifbrparam *param = arg;
1608
1609         if (param->ifbrp_maxage == 0)
1610                 return (EINVAL);
1611         sc->sc_bridge_max_age = param->ifbrp_maxage << 8;
1612
1613         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1614                 bstp_initialization(sc);
1615
1616         return (0);
1617 }
1618
1619 static int
1620 bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
1621 {
1622         struct ifbreq *req = arg;
1623         struct bridge_iflist *bif;
1624
1625         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1626         if (bif == NULL)
1627                 return (ENOENT);
1628
1629         bif->bif_priority = req->ifbr_priority;
1630
1631         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1632                 bstp_initialization(sc);
1633
1634         return (0);
1635 }
1636
1637 static int
1638 bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
1639 {
1640         struct ifbreq *req = arg;
1641         struct bridge_iflist *bif;
1642
1643         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1644         if (bif == NULL)
1645                 return (ENOENT);
1646
1647         bif->bif_path_cost = req->ifbr_path_cost;
1648
1649         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1650                 bstp_initialization(sc);
1651
1652         return (0);
1653 }
1654
1655 static int
1656 bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
1657 {
1658         struct ifbreq *req = arg;
1659         struct bridge_iflist *bif;
1660         struct ifnet *ifs;
1661
1662         ifs = ifunit(req->ifbr_ifsname);
1663         if (ifs == NULL)
1664                 return (ENOENT);
1665
1666         LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
1667                 if (ifs == bif->bif_ifp)
1668                         return (EBUSY);
1669
1670         if (ifs->if_bridge != NULL)
1671                 return (EBUSY);
1672
1673         switch (ifs->if_type) {
1674         case IFT_ETHER:
1675         case IFT_GIF:
1676         case IFT_L2VLAN:
1677                 break;
1678
1679         default:
1680                 return (EINVAL);
1681         }
1682
1683         bif = kmalloc(sizeof(*bif), M_DEVBUF, M_WAITOK | M_ZERO);
1684         bif->bif_ifp = ifs;
1685         bif->bif_flags = IFBIF_SPAN;
1686         /* NOTE: span bif does not need bridge_ifinfo */
1687
1688         LIST_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
1689
1690         sc->sc_span = 1;
1691
1692         return (0);
1693 }
1694
1695 static int
1696 bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
1697 {
1698         struct ifbreq *req = arg;
1699         struct bridge_iflist *bif;
1700         struct ifnet *ifs;
1701
1702         ifs = ifunit(req->ifbr_ifsname);
1703         if (ifs == NULL)
1704                 return (ENOENT);
1705
1706         LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
1707                 if (ifs == bif->bif_ifp)
1708                         break;
1709
1710         if (bif == NULL)
1711                 return (ENOENT);
1712
1713         bridge_delete_span(sc, bif);
1714
1715         if (LIST_EMPTY(&sc->sc_spanlist))
1716                 sc->sc_span = 0;
1717
1718         return (0);
1719 }
1720
1721 static void
1722 bridge_ifdetach_dispatch(struct netmsg *nmsg)
1723 {
1724         struct lwkt_msg *lmsg = &nmsg->nm_lmsg;
1725         struct ifnet *ifp, *bifp;
1726         struct bridge_softc *sc;
1727         struct bridge_iflist *bif;
1728
1729         ifp = lmsg->u.ms_resultp;
1730         sc = ifp->if_bridge;
1731
1732         /* Check if the interface is a bridge member */
1733         if (sc != NULL) {
1734                 bifp = sc->sc_ifp;
1735
1736                 lwkt_serialize_enter(bifp->if_serializer);
1737
1738                 bif = bridge_lookup_member_if(sc, ifp);
1739                 if (bif != NULL) {
1740                         bridge_delete_member(sc, bif, 1);
1741                 } else {
1742                         /* XXX Why bif will be NULL? */
1743                 }
1744
1745                 lwkt_serialize_exit(bifp->if_serializer);
1746                 goto reply;
1747         }
1748
1749         crit_enter();   /* XXX MP */
1750
1751         /* Check if the interface is a span port */
1752         LIST_FOREACH(sc, &bridge_list, sc_list) {
1753                 bifp = sc->sc_ifp;
1754
1755                 lwkt_serialize_enter(bifp->if_serializer);
1756
1757                 LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
1758                         if (ifp == bif->bif_ifp) {
1759                                 bridge_delete_span(sc, bif);
1760                                 break;
1761                         }
1762
1763                 lwkt_serialize_exit(bifp->if_serializer);
1764         }
1765
1766         crit_exit();
1767
1768 reply:
1769         lwkt_replymsg(lmsg, 0);
1770 }
1771
1772 /*
1773  * bridge_ifdetach:
1774  *
1775  *      Detach an interface from a bridge.  Called when a member
1776  *      interface is detaching.
1777  */
1778 static void
1779 bridge_ifdetach(void *arg __unused, struct ifnet *ifp)
1780 {
1781         struct lwkt_msg *lmsg;
1782         struct netmsg nmsg;
1783
1784         netmsg_init(&nmsg, &curthread->td_msgport, 0, bridge_ifdetach_dispatch);
1785         lmsg = &nmsg.nm_lmsg;
1786         lmsg->u.ms_resultp = ifp;
1787
1788         lwkt_domsg(BRIDGE_CFGPORT, lmsg, 0);
1789 }
1790
1791 /*
1792  * bridge_init:
1793  *
1794  *      Initialize a bridge interface.
1795  */
1796 static void
1797 bridge_init(void *xsc)
1798 {
1799         bridge_control(xsc, SIOCSIFFLAGS, bridge_ioctl_init, NULL);
1800 }
1801
1802 /*
1803  * bridge_stop:
1804  *
1805  *      Stop the bridge interface.
1806  */
1807 static void
1808 bridge_stop(struct ifnet *ifp)
1809 {
1810         bridge_control(ifp->if_softc, SIOCSIFFLAGS, bridge_ioctl_stop, NULL);
1811 }
1812
1813 /*
1814  * bridge_enqueue:
1815  *
1816  *      Enqueue a packet on a bridge member interface.
1817  *
1818  */
1819 void
1820 bridge_enqueue(struct ifnet *dst_ifp, struct mbuf *m)
1821 {
1822         struct netmsg_packet *nmp;
1823         lwkt_port_t port;
1824
1825         nmp = &m->m_hdr.mh_netmsg;
1826         netmsg_init(&nmp->nm_netmsg, &netisr_apanic_rport, 0,
1827                     bridge_enqueue_handler);
1828         nmp->nm_packet = m;
1829         nmp->nm_netmsg.nm_lmsg.u.ms_resultp = dst_ifp;
1830
1831         if (curthread->td_flags & TDF_NETWORK)
1832                 port = &curthread->td_msgport;
1833         else
1834                 port = cpu_portfn(mycpuid);
1835         lwkt_sendmsg(port, &nmp->nm_netmsg.nm_lmsg);
1836 }
1837
1838 /*
1839  * bridge_output:
1840  *
1841  *      Send output from a bridge member interface.  This
1842  *      performs the bridging function for locally originated
1843  *      packets.
1844  *
1845  *      The mbuf has the Ethernet header already attached.  We must
1846  *      enqueue or free the mbuf before returning.
1847  */
1848 static int
1849 bridge_output(struct ifnet *ifp, struct mbuf *m)
1850 {
1851         struct bridge_softc *sc = ifp->if_bridge;
1852         struct ether_header *eh;
1853         struct ifnet *dst_if, *bifp;
1854
1855         ASSERT_NOT_SERIALIZED(ifp->if_serializer);
1856
1857         /*
1858          * Make sure that we are still a member of a bridge interface.
1859          */
1860         if (sc == NULL) {
1861                 m_freem(m);
1862                 return (0);
1863         }
1864         bifp = sc->sc_ifp;
1865
1866         if (m->m_len < ETHER_HDR_LEN) {
1867                 m = m_pullup(m, ETHER_HDR_LEN);
1868                 if (m == NULL)
1869                         return (0);
1870         }
1871         eh = mtod(m, struct ether_header *);
1872
1873         /*
1874          * If bridge is down, but the original output interface is up,
1875          * go ahead and send out that interface.  Otherwise, the packet
1876          * is dropped below.
1877          */
1878         if ((bifp->if_flags & IFF_RUNNING) == 0) {
1879                 dst_if = ifp;
1880                 goto sendunicast;
1881         }
1882
1883         /*
1884          * If the packet is a multicast, or we don't know a better way to
1885          * get there, send to all interfaces.
1886          */
1887         if (ETHER_IS_MULTICAST(eh->ether_dhost))
1888                 dst_if = NULL;
1889         else
1890                 dst_if = bridge_rtlookup(sc, eh->ether_dhost);
1891         if (dst_if == NULL) {
1892                 struct bridge_iflist *bif, *nbif;
1893                 struct mbuf *mc;
1894                 int used = 0;
1895
1896                 if (sc->sc_span)
1897                         bridge_span(sc, m);
1898
1899                 LIST_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid],
1900                                      bif_next, nbif) {
1901                         dst_if = bif->bif_ifp;
1902                         if ((dst_if->if_flags & IFF_RUNNING) == 0)
1903                                 continue;
1904
1905                         /*
1906                          * If this is not the original output interface,
1907                          * and the interface is participating in spanning
1908                          * tree, make sure the port is in a state that
1909                          * allows forwarding.
1910                          */
1911                         if (dst_if != ifp &&
1912                             (bif->bif_flags & IFBIF_STP) != 0) {
1913                                 switch (bif->bif_state) {
1914                                 case BSTP_IFSTATE_BLOCKING:
1915                                 case BSTP_IFSTATE_LISTENING:
1916                                 case BSTP_IFSTATE_DISABLED:
1917                                         continue;
1918                                 }
1919                         }
1920
1921                         if (LIST_NEXT(bif, bif_next) == NULL) {
1922                                 used = 1;
1923                                 mc = m;
1924                         } else {
1925                                 mc = m_copypacket(m, MB_DONTWAIT);
1926                                 if (mc == NULL) {
1927                                         bifp->if_oerrors++;
1928                                         continue;
1929                                 }
1930                         }
1931                         bridge_handoff(dst_if, mc);
1932
1933                         if (nbif != NULL && !nbif->bif_onlist) {
1934                                 KKASSERT(bif->bif_onlist);
1935                                 nbif = LIST_NEXT(bif, bif_next);
1936                         }
1937                 }
1938                 if (used == 0)
1939                         m_freem(m);
1940                 return (0);
1941         }
1942
1943 sendunicast:
1944         /*
1945          * XXX Spanning tree consideration here?
1946          */
1947         if (sc->sc_span)
1948                 bridge_span(sc, m);
1949         if ((dst_if->if_flags & IFF_RUNNING) == 0)
1950                 m_freem(m);
1951         else
1952                 bridge_handoff(dst_if, m);
1953         return (0);
1954 }
1955
1956 /*
1957  * bridge_start:
1958  *
1959  *      Start output on a bridge.
1960  *
1961  */
1962 static void
1963 bridge_start(struct ifnet *ifp)
1964 {
1965         struct bridge_softc *sc = ifp->if_softc;
1966
1967         ASSERT_SERIALIZED(ifp->if_serializer);
1968
1969         ifp->if_flags |= IFF_OACTIVE;
1970         for (;;) {
1971                 struct ifnet *dst_if = NULL;
1972                 struct ether_header *eh;
1973                 struct mbuf *m;
1974
1975                 m = ifq_dequeue(&ifp->if_snd, NULL);
1976                 if (m == NULL)
1977                         break;
1978
1979                 if (m->m_len < sizeof(*eh)) {
1980                         m = m_pullup(m, sizeof(*eh));
1981                         if (m == NULL) {
1982                                 ifp->if_oerrors++;
1983                                 continue;
1984                         }
1985                 }
1986                 eh = mtod(m, struct ether_header *);
1987
1988                 BPF_MTAP(ifp, m);
1989                 ifp->if_opackets++;
1990
1991                 if ((m->m_flags & (M_BCAST|M_MCAST)) == 0)
1992                         dst_if = bridge_rtlookup(sc, eh->ether_dhost);
1993
1994                 if (dst_if == NULL)
1995                         bridge_start_bcast(sc, m);
1996                 else
1997                         bridge_enqueue(dst_if, m);
1998         }
1999         ifp->if_flags &= ~IFF_OACTIVE;
2000 }
2001
2002 /*
2003  * bridge_forward:
2004  *
2005  *      The forwarding function of the bridge.
2006  */
2007 static void
2008 bridge_forward(struct bridge_softc *sc, struct mbuf *m)
2009 {
2010         struct bridge_iflist *bif;
2011         struct ifnet *src_if, *dst_if, *ifp;
2012         struct ether_header *eh;
2013
2014         src_if = m->m_pkthdr.rcvif;
2015         ifp = sc->sc_ifp;
2016
2017         ASSERT_NOT_SERIALIZED(ifp->if_serializer);
2018
2019         ifp->if_ipackets++;
2020         ifp->if_ibytes += m->m_pkthdr.len;
2021
2022         /*
2023          * Look up the bridge_iflist.
2024          */
2025         bif = bridge_lookup_member_if(sc, src_if);
2026         if (bif == NULL) {
2027                 /* Interface is not a bridge member (anymore?) */
2028                 m_freem(m);
2029                 return;
2030         }
2031
2032         if (bif->bif_flags & IFBIF_STP) {
2033                 switch (bif->bif_state) {
2034                 case BSTP_IFSTATE_BLOCKING:
2035                 case BSTP_IFSTATE_LISTENING:
2036                 case BSTP_IFSTATE_DISABLED:
2037                         m_freem(m);
2038                         return;
2039                 }
2040         }
2041
2042         eh = mtod(m, struct ether_header *);
2043
2044         /*
2045          * If the interface is learning, and the source
2046          * address is valid and not multicast, record
2047          * the address.
2048          */
2049         if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
2050             ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
2051             (eh->ether_shost[0] == 0 &&
2052              eh->ether_shost[1] == 0 &&
2053              eh->ether_shost[2] == 0 &&
2054              eh->ether_shost[3] == 0 &&
2055              eh->ether_shost[4] == 0 &&
2056              eh->ether_shost[5] == 0) == 0)
2057                 bridge_rtupdate(sc, eh->ether_shost, src_if, IFBAF_DYNAMIC);
2058
2059         if ((bif->bif_flags & IFBIF_STP) != 0 &&
2060             bif->bif_state == BSTP_IFSTATE_LEARNING) {
2061                 m_freem(m);
2062                 return;
2063         }
2064
2065         /*
2066          * At this point, the port either doesn't participate
2067          * in spanning tree or it is in the forwarding state.
2068          */
2069
2070         /*
2071          * If the packet is unicast, destined for someone on
2072          * "this" side of the bridge, drop it.
2073          */
2074         if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
2075                 dst_if = bridge_rtlookup(sc, eh->ether_dhost);
2076                 if (src_if == dst_if) {
2077                         m_freem(m);
2078                         return;
2079                 }
2080         } else {
2081                 /* ...forward it to all interfaces. */
2082                 ifp->if_imcasts++;
2083                 dst_if = NULL;
2084         }
2085
2086         if (dst_if == NULL) {
2087                 bridge_broadcast(sc, src_if, m);
2088                 return;
2089         }
2090
2091         /*
2092          * At this point, we're dealing with a unicast frame
2093          * going to a different interface.
2094          */
2095         if ((dst_if->if_flags & IFF_RUNNING) == 0) {
2096                 m_freem(m);
2097                 return;
2098         }
2099         bif = bridge_lookup_member_if(sc, dst_if);
2100         if (bif == NULL) {
2101                 /* Not a member of the bridge (anymore?) */
2102                 m_freem(m);
2103                 return;
2104         }
2105
2106         if (bif->bif_flags & IFBIF_STP) {
2107                 switch (bif->bif_state) {
2108                 case BSTP_IFSTATE_DISABLED:
2109                 case BSTP_IFSTATE_BLOCKING:
2110                         m_freem(m);
2111                         return;
2112                 }
2113         }
2114
2115         if (inet_pfil_hook.ph_hashooks > 0
2116 #ifdef INET6
2117             || inet6_pfil_hook.ph_hashooks > 0
2118 #endif
2119             ) {
2120                 if (bridge_pfil(&m, ifp, src_if, PFIL_IN) != 0)
2121                         return;
2122                 if (m == NULL)
2123                         return;
2124
2125                 if (bridge_pfil(&m, ifp, dst_if, PFIL_OUT) != 0)
2126                         return;
2127                 if (m == NULL)
2128                         return;
2129         }
2130         bridge_handoff(dst_if, m);
2131 }
2132
2133 /*
2134  * bridge_input:
2135  *
2136  *      Receive input from a member interface.  Queue the packet for
2137  *      bridging if it is not for us.
2138  */
2139 static struct mbuf *
2140 bridge_input(struct ifnet *ifp, struct mbuf *m)
2141 {
2142         struct bridge_softc *sc = ifp->if_bridge;
2143         struct bridge_iflist *bif;
2144         struct ifnet *bifp, *new_ifp;
2145         struct ether_header *eh;
2146         struct mbuf *mc, *mc2;
2147
2148         ASSERT_NOT_SERIALIZED(ifp->if_serializer);
2149
2150         /*
2151          * Make sure that we are still a member of a bridge interface.
2152          */
2153         if (sc == NULL)
2154                 return m;
2155
2156         new_ifp = NULL;
2157         bifp = sc->sc_ifp;
2158
2159         if ((bifp->if_flags & IFF_RUNNING) == 0)
2160                 goto out;
2161
2162         /*
2163          * Implement support for bridge monitoring.  If this flag has been
2164          * set on this interface, discard the packet once we push it through
2165          * the bpf(4) machinery, but before we do, increment various counters
2166          * associated with this bridge.
2167          */
2168         if (bifp->if_flags & IFF_MONITOR) {
2169                 /* Change input interface to this bridge */
2170                 m->m_pkthdr.rcvif = bifp;
2171
2172                 BPF_MTAP(bifp, m);
2173
2174                 /* Update bridge's ifnet statistics */
2175                 bifp->if_ipackets++;
2176                 bifp->if_ibytes += m->m_pkthdr.len;
2177                 if (m->m_flags & (M_MCAST | M_BCAST))
2178                         bifp->if_imcasts++;
2179
2180                 m_freem(m);
2181                 m = NULL;
2182                 goto out;
2183         }
2184
2185         eh = mtod(m, struct ether_header *);
2186
2187         m->m_flags &= ~M_PROTO1; /* XXX Hack - loop prevention */
2188
2189         if (memcmp(eh->ether_dhost, IF_LLADDR(bifp), ETHER_ADDR_LEN) == 0) {
2190                 /*
2191                  * If the packet is for us, set the packets source as the
2192                  * bridge, and return the packet back to ifnet.if_input for
2193                  * local processing.
2194                  */
2195                 KASSERT(bifp->if_bridge == NULL,
2196                         ("loop created in bridge_input"));
2197                 new_ifp = bifp;
2198                 goto out;
2199         }
2200
2201         /*
2202          * Tap all packets arriving on the bridge, no matter if
2203          * they are local destinations or not.  In is in.
2204          */
2205         BPF_MTAP(bifp, m);
2206
2207         bif = bridge_lookup_member_if(sc, ifp);
2208         if (bif == NULL)
2209                 goto out;
2210
2211         if (sc->sc_span)
2212                 bridge_span(sc, m);
2213
2214         if (m->m_flags & (M_BCAST | M_MCAST)) {
2215                 /* Tap off 802.1D packets; they do not get forwarded. */
2216                 if (memcmp(eh->ether_dhost, bstp_etheraddr,
2217                     ETHER_ADDR_LEN) == 0) {
2218                         lwkt_serialize_enter(bifp->if_serializer);
2219                         bstp_input(sc, bif, m);
2220                         lwkt_serialize_exit(bifp->if_serializer);
2221
2222                         /* m is freed by bstp_input */
2223                         m = NULL;
2224                         goto out;
2225                 }
2226
2227                 if (bif->bif_flags & IFBIF_STP) {
2228                         switch (bif->bif_state) {
2229                         case BSTP_IFSTATE_BLOCKING:
2230                         case BSTP_IFSTATE_LISTENING:
2231                         case BSTP_IFSTATE_DISABLED:
2232                                 goto out;
2233                         }
2234                 }
2235
2236                 /*
2237                  * Make a deep copy of the packet and enqueue the copy
2238                  * for bridge processing; return the original packet for
2239                  * local processing.
2240                  */
2241                 mc = m_dup(m, MB_DONTWAIT);
2242                 if (mc == NULL)
2243                         goto out;
2244
2245                 bridge_forward(sc, mc);
2246
2247                 /*
2248                  * Reinject the mbuf as arriving on the bridge so we have a
2249                  * chance at claiming multicast packets. We can not loop back
2250                  * here from ether_input as a bridge is never a member of a
2251                  * bridge.
2252                  */
2253                 KASSERT(bifp->if_bridge == NULL,
2254                         ("loop created in bridge_input"));
2255                 mc2 = m_dup(m, MB_DONTWAIT);
2256 #ifdef notyet
2257                 if (mc2 != NULL) {
2258                         /* Keep the layer3 header aligned */
2259                         int i = min(mc2->m_pkthdr.len, max_protohdr);
2260                         mc2 = m_copyup(mc2, i, ETHER_ALIGN);
2261                 }
2262 #endif
2263                 if (mc2 != NULL) {
2264                         /*
2265                          * Don't tap to bpf(4) again; we have
2266                          * already done the tapping.
2267                          */
2268                         ether_reinput_oncpu(bifp, mc2, 0);
2269                 }
2270
2271                 /* Return the original packet for local processing. */
2272                 goto out;
2273         }
2274
2275         if (bif->bif_flags & IFBIF_STP) {
2276                 switch (bif->bif_state) {
2277                 case BSTP_IFSTATE_BLOCKING:
2278                 case BSTP_IFSTATE_LISTENING:
2279                 case BSTP_IFSTATE_DISABLED:
2280                         goto out;
2281                 }
2282         }
2283
2284         /*
2285          * Unicast.  Make sure it's not for us.
2286          *
2287          * This loop is MPSAFE; the only blocking operation (bridge_rtupdate)
2288          * is followed by breaking out of the loop.
2289          */
2290         LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
2291                 if (bif->bif_ifp->if_type != IFT_ETHER)
2292                         continue;
2293
2294                 /* It is destined for us. */
2295                 if (memcmp(IF_LLADDR(bif->bif_ifp), eh->ether_dhost,
2296                     ETHER_ADDR_LEN) == 0) {
2297                         if (bif->bif_ifp != ifp) {
2298                                 /* XXX loop prevention */
2299                                 m->m_flags |= M_PROTO1;
2300                                 new_ifp = bif->bif_ifp;
2301                         }
2302                         if (bif->bif_flags & IFBIF_LEARNING) {
2303                                 bridge_rtupdate(sc, eh->ether_shost,
2304                                                 ifp, IFBAF_DYNAMIC);
2305                         }
2306                         goto out;
2307                 }
2308
2309                 /* We just received a packet that we sent out. */
2310                 if (memcmp(IF_LLADDR(bif->bif_ifp), eh->ether_shost,
2311                     ETHER_ADDR_LEN) == 0) {
2312                         m_freem(m);
2313                         m = NULL;
2314                         goto out;
2315                 }
2316         }
2317
2318         /* Perform the bridge forwarding function. */
2319         bridge_forward(sc, m);
2320         m = NULL;
2321 out:
2322         if (new_ifp != NULL) {
2323                 ether_reinput_oncpu(new_ifp, m, 1);
2324                 m = NULL;
2325         }
2326         return (m);
2327 }
2328
2329 /*
2330  * bridge_start_bcast:
2331  *
2332  *      Broadcast the packet sent from bridge to all member
2333  *      interfaces.
2334  *      This is a simplified version of bridge_broadcast(), however,
2335  *      this function expects caller to hold bridge's serializer.
2336  */
2337 static void
2338 bridge_start_bcast(struct bridge_softc *sc, struct mbuf *m)
2339 {
2340         struct bridge_iflist *bif;
2341         struct mbuf *mc;
2342         struct ifnet *dst_if, *bifp;
2343         int used = 0;
2344
2345         bifp = sc->sc_ifp;
2346         ASSERT_SERIALIZED(bifp->if_serializer);
2347
2348         /*
2349          * Following loop is MPSAFE; nothing is blocking
2350          * in the loop body.
2351          */
2352         LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
2353                 dst_if = bif->bif_ifp;
2354
2355                 if (bif->bif_flags & IFBIF_STP) {
2356                         switch (bif->bif_state) {
2357                         case BSTP_IFSTATE_BLOCKING:
2358                         case BSTP_IFSTATE_DISABLED:
2359                                 continue;
2360                         }
2361                 }
2362
2363                 if ((bif->bif_flags & IFBIF_DISCOVER) == 0 &&
2364                     (m->m_flags & (M_BCAST|M_MCAST)) == 0)
2365                         continue;
2366
2367                 if ((dst_if->if_flags & IFF_RUNNING) == 0)
2368                         continue;
2369
2370                 if (LIST_NEXT(bif, bif_next) == NULL) {
2371                         mc = m;
2372                         used = 1;
2373                 } else {
2374                         mc = m_copypacket(m, MB_DONTWAIT);
2375                         if (mc == NULL) {
2376                                 bifp->if_oerrors++;
2377                                 continue;
2378                         }
2379                 }
2380                 bridge_enqueue(dst_if, mc);
2381         }
2382         if (used == 0)
2383                 m_freem(m);
2384 }
2385
2386 /*
2387  * bridge_broadcast:
2388  *
2389  *      Send a frame to all interfaces that are members of
2390  *      the bridge, except for the one on which the packet
2391  *      arrived.
2392  */
2393 static void
2394 bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
2395     struct mbuf *m)
2396 {
2397         struct bridge_iflist *bif, *nbif;
2398         struct mbuf *mc;
2399         struct ifnet *dst_if, *bifp;
2400         int used = 0;
2401
2402         bifp = sc->sc_ifp;
2403         ASSERT_NOT_SERIALIZED(bifp->if_serializer);
2404
2405         if (inet_pfil_hook.ph_hashooks > 0
2406 #ifdef INET6
2407             || inet6_pfil_hook.ph_hashooks > 0
2408 #endif
2409             ) {
2410                 if (bridge_pfil(&m, bifp, src_if, PFIL_IN) != 0)
2411                         return;
2412                 if (m == NULL)
2413                         return;
2414
2415                 /* Filter on the bridge interface before broadcasting */
2416                 if (bridge_pfil(&m, bifp, NULL, PFIL_OUT) != 0)
2417                         return;
2418                 if (m == NULL)
2419                         return;
2420         }
2421
2422         LIST_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid], bif_next, nbif) {
2423                 dst_if = bif->bif_ifp;
2424                 if (dst_if == src_if)
2425                         continue;
2426
2427                 if (bif->bif_flags & IFBIF_STP) {
2428                         switch (bif->bif_state) {
2429                         case BSTP_IFSTATE_BLOCKING:
2430                         case BSTP_IFSTATE_DISABLED:
2431                                 continue;
2432                         }
2433                 }
2434
2435                 if ((bif->bif_flags & IFBIF_DISCOVER) == 0 &&
2436                     (m->m_flags & (M_BCAST|M_MCAST)) == 0)
2437                         continue;
2438
2439                 if ((dst_if->if_flags & IFF_RUNNING) == 0)
2440                         continue;
2441
2442                 if (LIST_NEXT(bif, bif_next) == NULL) {
2443                         mc = m;
2444                         used = 1;
2445                 } else {
2446                         mc = m_copypacket(m, MB_DONTWAIT);
2447                         if (mc == NULL) {
2448                                 sc->sc_ifp->if_oerrors++;
2449                                 continue;
2450                         }
2451                 }
2452
2453                 /*
2454                  * Filter on the output interface.  Pass a NULL bridge
2455                  * interface pointer so we do not redundantly filter on
2456                  * the bridge for each interface we broadcast on.
2457                  */
2458                 if (inet_pfil_hook.ph_hashooks > 0
2459 #ifdef INET6
2460                     || inet6_pfil_hook.ph_hashooks > 0
2461 #endif
2462                     ) {
2463                         if (bridge_pfil(&mc, NULL, dst_if, PFIL_OUT) != 0)
2464                                 continue;
2465                         if (mc == NULL)
2466                                 continue;
2467                 }
2468                 bridge_handoff(dst_if, mc);
2469
2470                 if (nbif != NULL && !nbif->bif_onlist) {
2471                         KKASSERT(bif->bif_onlist);
2472                         nbif = LIST_NEXT(bif, bif_next);
2473                 }
2474         }
2475         if (used == 0)
2476                 m_freem(m);
2477 }
2478
2479 /*
2480  * bridge_span:
2481  *
2482  *      Duplicate a packet out one or more interfaces that are in span mode,
2483  *      the original mbuf is unmodified.
2484  */
2485 static void
2486 bridge_span(struct bridge_softc *sc, struct mbuf *m)
2487 {
2488         struct bridge_iflist *bif;
2489         struct ifnet *dst_if, *bifp;
2490         struct mbuf *mc;
2491
2492         bifp = sc->sc_ifp;
2493         lwkt_serialize_enter(bifp->if_serializer);
2494
2495         LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) {
2496                 dst_if = bif->bif_ifp;
2497
2498                 if ((dst_if->if_flags & IFF_RUNNING) == 0)
2499                         continue;
2500
2501                 mc = m_copypacket(m, MB_DONTWAIT);
2502                 if (mc == NULL) {
2503                         sc->sc_ifp->if_oerrors++;
2504                         continue;
2505                 }
2506                 bridge_enqueue(dst_if, mc);
2507         }
2508
2509         lwkt_serialize_exit(bifp->if_serializer);
2510 }
2511
2512 static void
2513 bridge_rtmsg_sync_handler(struct netmsg *nmsg)
2514 {
2515         ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1);
2516 }
2517
2518 static void
2519 bridge_rtmsg_sync(struct bridge_softc *sc)
2520 {
2521         struct netmsg nmsg;
2522
2523         ASSERT_NOT_SERIALIZED(sc->sc_ifp->if_serializer);
2524
2525         netmsg_init(&nmsg, &curthread->td_msgport, 0,
2526                     bridge_rtmsg_sync_handler);
2527         ifnet_domsg(&nmsg.nm_lmsg, 0);
2528 }
2529
2530 static __inline void
2531 bridge_rtinfo_update(struct bridge_rtinfo *bri, struct ifnet *dst_if,
2532                      int setflags, uint8_t flags, uint32_t timeo)
2533 {
2534         if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
2535             bri->bri_ifp != dst_if)
2536                 bri->bri_ifp = dst_if;
2537         if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
2538             bri->bri_expire != time_second + timeo)
2539                 bri->bri_expire = time_second + timeo;
2540         if (setflags)
2541                 bri->bri_flags = flags;
2542 }
2543
2544 static int
2545 bridge_rtinstall_oncpu(struct bridge_softc *sc, const uint8_t *dst,
2546                        struct ifnet *dst_if, int setflags, uint8_t flags,
2547                        struct bridge_rtinfo **bri0)
2548 {
2549         struct bridge_rtnode *brt;
2550         struct bridge_rtinfo *bri;
2551
2552         if (mycpuid == 0) {
2553                 brt = bridge_rtnode_lookup(sc, dst);
2554                 if (brt != NULL) {
2555                         /*
2556                          * rtnode for 'dst' already exists.  We inform the
2557                          * caller about this by leaving bri0 as NULL.  The
2558                          * caller will terminate the intallation upon getting
2559                          * NULL bri0.  However, we still need to update the
2560                          * rtinfo.
2561                          */
2562                         KKASSERT(*bri0 == NULL);
2563
2564                         /* Update rtinfo */
2565                         bridge_rtinfo_update(brt->brt_info, dst_if, setflags,
2566                                              flags, sc->sc_brttimeout);
2567                         return 0;
2568                 }
2569
2570                 /*
2571                  * We only need to check brtcnt on CPU0, since if limit
2572                  * is to be exceeded, ENOSPC is returned.  Caller knows
2573                  * this and will terminate the installation.
2574                  */
2575                 if (sc->sc_brtcnt >= sc->sc_brtmax)
2576                         return ENOSPC;
2577
2578                 KKASSERT(*bri0 == NULL);
2579                 bri = kmalloc(sizeof(struct bridge_rtinfo), M_DEVBUF,
2580                                   M_WAITOK | M_ZERO);
2581                 *bri0 = bri;
2582
2583                 /* Setup rtinfo */
2584                 bri->bri_flags = IFBAF_DYNAMIC;
2585                 bridge_rtinfo_update(bri, dst_if, setflags, flags,
2586                                      sc->sc_brttimeout);
2587         } else {
2588                 bri = *bri0;
2589                 KKASSERT(bri != NULL);
2590         }
2591
2592         brt = kmalloc(sizeof(struct bridge_rtnode), M_DEVBUF,
2593                       M_WAITOK | M_ZERO);
2594         memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
2595         brt->brt_info = bri;
2596
2597         bridge_rtnode_insert(sc, brt);
2598         return 0;
2599 }
2600
2601 static void
2602 bridge_rtinstall_handler(struct netmsg *nmsg)
2603 {
2604         struct netmsg_brsaddr *brmsg = (struct netmsg_brsaddr *)nmsg;
2605         int error;
2606
2607         error = bridge_rtinstall_oncpu(brmsg->br_softc,
2608                                        brmsg->br_dst, brmsg->br_dst_if,
2609                                        brmsg->br_setflags, brmsg->br_flags,
2610                                        &brmsg->br_rtinfo);
2611         if (error) {
2612                 KKASSERT(mycpuid == 0 && brmsg->br_rtinfo == NULL);
2613                 lwkt_replymsg(&nmsg->nm_lmsg, error);
2614                 return;
2615         } else if (brmsg->br_rtinfo == NULL) {
2616                 /* rtnode already exists for 'dst' */
2617                 KKASSERT(mycpuid == 0);
2618                 lwkt_replymsg(&nmsg->nm_lmsg, 0);
2619                 return;
2620         }
2621         ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1);
2622 }
2623
2624 /*
2625  * bridge_rtupdate:
2626  *
2627  *      Add/Update a bridge routing entry.
2628  */
2629 static int
2630 bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst,
2631                 struct ifnet *dst_if, uint8_t flags)
2632 {
2633         struct bridge_rtnode *brt;
2634
2635         /*
2636          * A route for this destination might already exist.  If so,
2637          * update it, otherwise create a new one.
2638          */
2639         if ((brt = bridge_rtnode_lookup(sc, dst)) == NULL) {
2640                 struct netmsg_brsaddr *brmsg;
2641
2642                 if (sc->sc_brtcnt >= sc->sc_brtmax)
2643                         return ENOSPC;
2644
2645                 brmsg = kmalloc(sizeof(*brmsg), M_LWKTMSG, M_WAITOK | M_NULLOK);
2646                 if (brmsg == NULL)
2647                         return ENOMEM;
2648
2649                 netmsg_init(&brmsg->br_nmsg, &netisr_afree_rport, 0,
2650                             bridge_rtinstall_handler);
2651                 memcpy(brmsg->br_dst, dst, ETHER_ADDR_LEN);
2652                 brmsg->br_dst_if = dst_if;
2653                 brmsg->br_flags = flags;
2654                 brmsg->br_setflags = 0;
2655                 brmsg->br_softc = sc;
2656                 brmsg->br_rtinfo = NULL;
2657
2658                 ifnet_sendmsg(&brmsg->br_nmsg.nm_lmsg, 0);
2659                 return 0;
2660         }
2661         bridge_rtinfo_update(brt->brt_info, dst_if, 0, flags,
2662                              sc->sc_brttimeout);
2663         return 0;
2664 }
2665
2666 static int
2667 bridge_rtsaddr(struct bridge_softc *sc, const uint8_t *dst,
2668                struct ifnet *dst_if, uint8_t flags)
2669 {
2670         struct netmsg_brsaddr brmsg;
2671
2672         ASSERT_NOT_SERIALIZED(sc->sc_ifp->if_serializer);
2673
2674         netmsg_init(&brmsg.br_nmsg, &curthread->td_msgport, 0,
2675                     bridge_rtinstall_handler);
2676         memcpy(brmsg.br_dst, dst, ETHER_ADDR_LEN);
2677         brmsg.br_dst_if = dst_if;
2678         brmsg.br_flags = flags;
2679         brmsg.br_setflags = 1;
2680         brmsg.br_softc = sc;
2681         brmsg.br_rtinfo = NULL;
2682
2683         return ifnet_domsg(&brmsg.br_nmsg.nm_lmsg, 0);
2684 }
2685
2686 /*
2687  * bridge_rtlookup:
2688  *
2689  *      Lookup the destination interface for an address.
2690  */
2691 static struct ifnet *
2692 bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr)
2693 {
2694         struct bridge_rtnode *brt;
2695
2696         if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL)
2697                 return NULL;
2698         return brt->brt_info->bri_ifp;
2699 }
2700
2701 static void
2702 bridge_rtreap_handler(struct netmsg *nmsg)
2703 {
2704         struct bridge_softc *sc = nmsg->nm_lmsg.u.ms_resultp;
2705         struct bridge_rtnode *brt, *nbrt;
2706
2707         LIST_FOREACH_MUTABLE(brt, &sc->sc_rtlists[mycpuid], brt_list, nbrt) {
2708                 if (brt->brt_info->bri_dead)
2709                         bridge_rtnode_destroy(sc, brt);
2710         }
2711         ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1);
2712 }
2713
2714 static void
2715 bridge_rtreap(struct bridge_softc *sc)
2716 {
2717         struct netmsg nmsg;
2718
2719         ASSERT_NOT_SERIALIZED(sc->sc_ifp->if_serializer);
2720
2721         netmsg_init(&nmsg, &curthread->td_msgport, 0, bridge_rtreap_handler);
2722         nmsg.nm_lmsg.u.ms_resultp = sc;
2723
2724         ifnet_domsg(&nmsg.nm_lmsg, 0);
2725 }
2726
2727 /*
2728  * bridge_rttrim:
2729  *
2730  *      Trim the routine table so that we have a number
2731  *      of routing entries less than or equal to the
2732  *      maximum number.
2733  */
2734 static void
2735 bridge_rttrim(struct bridge_softc *sc)
2736 {
2737         struct bridge_rtnode *brt;
2738         int dead;
2739
2740         ASSERT_NOT_SERIALIZED(sc->sc_ifp->if_serializer);
2741
2742         /* Make sure we actually need to do this. */
2743         if (sc->sc_brtcnt <= sc->sc_brtmax)
2744                 return;
2745
2746         /*
2747          * Find out how many rtnodes are dead
2748          */
2749         dead = bridge_rtage_finddead(sc);
2750         KKASSERT(dead <= sc->sc_brtcnt);
2751
2752         if (sc->sc_brtcnt - dead <= sc->sc_brtmax) {
2753                 /* Enough dead rtnodes are found */
2754                 bridge_rtreap(sc);
2755                 return;
2756         }
2757
2758         /*
2759          * Kill some dynamic rtnodes to meet the brtmax
2760          */
2761         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
2762                 struct bridge_rtinfo *bri = brt->brt_info;
2763
2764                 if (bri->bri_dead) {
2765                         /*
2766                          * We have counted this rtnode in
2767                          * bridge_rtage_finddead()
2768                          */
2769                         continue;
2770                 }
2771
2772                 if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
2773                         bri->bri_dead = 1;
2774                         ++dead;
2775                         KKASSERT(dead <= sc->sc_brtcnt);
2776
2777                         if (sc->sc_brtcnt - dead <= sc->sc_brtmax) {
2778                                 /* Enough rtnodes are collected */
2779                                 break;
2780                         }
2781                 }
2782         }
2783         if (dead)
2784                 bridge_rtreap(sc);
2785 }
2786
2787 /*
2788  * bridge_timer:
2789  *
2790  *      Aging timer for the bridge.
2791  */
2792 static void
2793 bridge_timer(void *arg)
2794 {
2795         struct bridge_softc *sc = arg;
2796         struct lwkt_msg *lmsg;
2797
2798         KKASSERT(mycpuid == BRIDGE_CFGCPU);
2799
2800         crit_enter();
2801
2802         if (callout_pending(&sc->sc_brcallout) ||
2803             !callout_active(&sc->sc_brcallout)) {
2804                 crit_exit();
2805                 return;
2806         }
2807         callout_deactivate(&sc->sc_brcallout);
2808
2809         lmsg = &sc->sc_brtimemsg.nm_lmsg;
2810         KKASSERT(lmsg->ms_flags & MSGF_DONE);
2811         lwkt_sendmsg(BRIDGE_CFGPORT, lmsg);
2812
2813         crit_exit();
2814 }
2815
2816 static void
2817 bridge_timer_handler(struct netmsg *nmsg)
2818 {
2819         struct bridge_softc *sc = nmsg->nm_lmsg.u.ms_resultp;
2820
2821         KKASSERT(&curthread->td_msgport == BRIDGE_CFGPORT);
2822
2823         crit_enter();
2824         /* Reply ASAP */
2825         lwkt_replymsg(&nmsg->nm_lmsg, 0);
2826         crit_exit();
2827
2828         bridge_rtage(sc);
2829         if (sc->sc_ifp->if_flags & IFF_RUNNING) {
2830                 callout_reset(&sc->sc_brcallout,
2831                     bridge_rtable_prune_period * hz, bridge_timer, sc);
2832         }
2833 }
2834
2835 static int
2836 bridge_rtage_finddead(struct bridge_softc *sc)
2837 {
2838         struct bridge_rtnode *brt;
2839         int dead = 0;
2840
2841         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
2842                 struct bridge_rtinfo *bri = brt->brt_info;
2843
2844                 if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
2845                     time_second >= bri->bri_expire) {
2846                         bri->bri_dead = 1;
2847                         ++dead;
2848                         KKASSERT(dead <= sc->sc_brtcnt);
2849                 }
2850         }
2851         return dead;
2852 }
2853
2854 /*
2855  * bridge_rtage:
2856  *
2857  *      Perform an aging cycle.
2858  */
2859 static void
2860 bridge_rtage(struct bridge_softc *sc)
2861 {
2862         ASSERT_NOT_SERIALIZED(sc->sc_ifp->if_serializer);
2863
2864         if (bridge_rtage_finddead(sc))
2865                 bridge_rtreap(sc);
2866 }
2867
2868 /*
2869  * bridge_rtflush:
2870  *
2871  *      Remove all dynamic addresses from the bridge.
2872  */
2873 static void
2874 bridge_rtflush(struct bridge_softc *sc, int full)
2875 {
2876         struct bridge_rtnode *brt;
2877         int reap;
2878
2879         ASSERT_NOT_SERIALIZED(sc->sc_ifp->if_serializer);
2880
2881         reap = 0;
2882         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
2883                 struct bridge_rtinfo *bri = brt->brt_info;
2884
2885                 if (full ||
2886                     (bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
2887                         bri->bri_dead = 1;
2888                         reap = 1;
2889                 }
2890         }
2891         if (reap)
2892                 bridge_rtreap(sc);
2893 }
2894
2895 /*
2896  * bridge_rtdaddr:
2897  *
2898  *      Remove an address from the table.
2899  */
2900 static int
2901 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr)
2902 {
2903         struct bridge_rtnode *brt;
2904
2905         ASSERT_NOT_SERIALIZED(sc->sc_ifp->if_serializer);
2906
2907         if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL)
2908                 return (ENOENT);
2909
2910         /* TODO: add a cheaper delete operation */
2911         brt->brt_info->bri_dead = 1;
2912         bridge_rtreap(sc);
2913         return (0);
2914 }
2915
2916 /*
2917  * bridge_rtdelete:
2918  *
2919  *      Delete routes to a speicifc member interface.
2920  */
2921 void
2922 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
2923 {
2924         struct bridge_rtnode *brt;
2925         int reap;
2926
2927         ASSERT_NOT_SERIALIZED(sc->sc_ifp->if_serializer);
2928
2929         reap = 0;
2930         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
2931                 struct bridge_rtinfo *bri = brt->brt_info;
2932
2933                 if (bri->bri_ifp == ifp &&
2934                     (full ||
2935                      (bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
2936                         bri->bri_dead = 1;
2937                         reap = 1;
2938                 }
2939         }
2940         if (reap)
2941                 bridge_rtreap(sc);
2942 }
2943
2944 /*
2945  * bridge_rtable_init:
2946  *
2947  *      Initialize the route table for this bridge.
2948  */
2949 static void
2950 bridge_rtable_init(struct bridge_softc *sc)
2951 {
2952         int cpu;
2953
2954         /*
2955          * Initialize per-cpu hash tables
2956          */
2957         sc->sc_rthashs = kmalloc(sizeof(*sc->sc_rthashs) * ncpus,
2958                                  M_DEVBUF, M_WAITOK);
2959         for (cpu = 0; cpu < ncpus; ++cpu) {
2960                 int i;
2961
2962                 sc->sc_rthashs[cpu] =
2963                 kmalloc(sizeof(struct bridge_rtnode_head) * BRIDGE_RTHASH_SIZE,
2964                         M_DEVBUF, M_WAITOK);
2965
2966                 for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
2967                         LIST_INIT(&sc->sc_rthashs[cpu][i]);
2968         }
2969         sc->sc_rthash_key = karc4random();
2970
2971         /*
2972          * Initialize per-cpu lists
2973          */
2974         sc->sc_rtlists = kmalloc(sizeof(struct bridge_rtnode_head) * ncpus,
2975                                  M_DEVBUF, M_WAITOK);
2976         for (cpu = 0; cpu < ncpus; ++cpu)
2977                 LIST_INIT(&sc->sc_rtlists[cpu]);
2978 }
2979
2980 /*
2981  * bridge_rtable_fini:
2982  *
2983  *      Deconstruct the route table for this bridge.
2984  */
2985 static void
2986 bridge_rtable_fini(struct bridge_softc *sc)
2987 {
2988         int cpu;
2989
2990         /*
2991          * Free per-cpu hash tables
2992          */
2993         for (cpu = 0; cpu < ncpus; ++cpu)
2994                 kfree(sc->sc_rthashs[cpu], M_DEVBUF);
2995         kfree(sc->sc_rthashs, M_DEVBUF);
2996
2997         /*
2998          * Free per-cpu lists
2999          */
3000         kfree(sc->sc_rtlists, M_DEVBUF);
3001 }
3002
3003 /*
3004  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
3005  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
3006  */
3007 #define mix(a, b, c)                                                    \
3008 do {                                                                    \
3009         a -= b; a -= c; a ^= (c >> 13);                                 \
3010         b -= c; b -= a; b ^= (a << 8);                                  \
3011         c -= a; c -= b; c ^= (b >> 13);                                 \
3012         a -= b; a -= c; a ^= (c >> 12);                                 \
3013         b -= c; b -= a; b ^= (a << 16);                                 \
3014         c -= a; c -= b; c ^= (b >> 5);                                  \
3015         a -= b; a -= c; a ^= (c >> 3);                                  \
3016         b -= c; b -= a; b ^= (a << 10);                                 \
3017         c -= a; c -= b; c ^= (b >> 15);                                 \
3018 } while (/*CONSTCOND*/0)
3019
3020 static __inline uint32_t
3021 bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
3022 {
3023         uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
3024
3025         b += addr[5] << 8;
3026         b += addr[4];
3027         a += addr[3] << 24;
3028         a += addr[2] << 16;
3029         a += addr[1] << 8;
3030         a += addr[0];
3031
3032         mix(a, b, c);
3033
3034         return (c & BRIDGE_RTHASH_MASK);
3035 }
3036
3037 #undef mix
3038
3039 static int
3040 bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b)
3041 {
3042         int i, d;
3043
3044         for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
3045                 d = ((int)a[i]) - ((int)b[i]);
3046         }
3047
3048         return (d);
3049 }
3050
3051 /*
3052  * bridge_rtnode_lookup:
3053  *
3054  *      Look up a bridge route node for the specified destination.
3055  */
3056 static struct bridge_rtnode *
3057 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr)
3058 {
3059         struct bridge_rtnode *brt;
3060         uint32_t hash;
3061         int dir;
3062
3063         hash = bridge_rthash(sc, addr);
3064         LIST_FOREACH(brt, &sc->sc_rthashs[mycpuid][hash], brt_hash) {
3065                 dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
3066                 if (dir == 0)
3067                         return (brt);
3068                 if (dir > 0)
3069                         return (NULL);
3070         }
3071
3072         return (NULL);
3073 }
3074
3075 /*
3076  * bridge_rtnode_insert:
3077  *
3078  *      Insert the specified bridge node into the route table.
3079  *      Caller has to make sure that rtnode does not exist.
3080  */
3081 static void
3082 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
3083 {
3084         struct bridge_rtnode *lbrt;
3085         uint32_t hash;
3086         int dir;
3087
3088         hash = bridge_rthash(sc, brt->brt_addr);
3089
3090         lbrt = LIST_FIRST(&sc->sc_rthashs[mycpuid][hash]);
3091         if (lbrt == NULL) {
3092                 LIST_INSERT_HEAD(&sc->sc_rthashs[mycpuid][hash], brt, brt_hash);
3093                 goto out;
3094         }
3095
3096         do {
3097                 dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
3098                 KASSERT(dir != 0, ("rtnode already exist\n"));
3099
3100                 if (dir > 0) {
3101                         LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
3102                         goto out;
3103                 }
3104                 if (LIST_NEXT(lbrt, brt_hash) == NULL) {
3105                         LIST_INSERT_AFTER(lbrt, brt, brt_hash);
3106                         goto out;
3107                 }
3108                 lbrt = LIST_NEXT(lbrt, brt_hash);
3109         } while (lbrt != NULL);
3110
3111         panic("no suitable position found for rtnode\n");
3112 out:
3113         LIST_INSERT_HEAD(&sc->sc_rtlists[mycpuid], brt, brt_list);
3114         if (mycpuid == 0) {
3115                 /*
3116                  * Update the brtcnt.
3117                  * We only need to do it once and we do it on CPU0.
3118                  */
3119                 sc->sc_brtcnt++;
3120         }
3121 }
3122
3123 /*
3124  * bridge_rtnode_destroy:
3125  *
3126  *      Destroy a bridge rtnode.
3127  */
3128 static void
3129 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
3130 {
3131         LIST_REMOVE(brt, brt_hash);
3132         LIST_REMOVE(brt, brt_list);
3133
3134         if (mycpuid + 1 == ncpus) {
3135                 /* Free rtinfo associated with rtnode on the last cpu */
3136                 kfree(brt->brt_info, M_DEVBUF);
3137         }
3138         kfree(brt, M_DEVBUF);
3139
3140         if (mycpuid == 0) {
3141                 /* Update brtcnt only on CPU0 */
3142                 sc->sc_brtcnt--;
3143         }
3144 }
3145
3146 static __inline int
3147 bridge_post_pfil(struct mbuf *m)
3148 {
3149         if (m->m_pkthdr.fw_flags & IPFORWARD_MBUF_TAGGED)
3150                 return EOPNOTSUPP;
3151
3152         /* Not yet */
3153         if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED)
3154                 return EOPNOTSUPP;
3155
3156         return 0;
3157 }
3158
3159 /*
3160  * Send bridge packets through pfil if they are one of the types pfil can deal
3161  * with, or if they are ARP or REVARP.  (pfil will pass ARP and REVARP without
3162  * question.) If *bifp or *ifp are NULL then packet filtering is skipped for
3163  * that interface.
3164  */
3165 static int
3166 bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
3167 {
3168         int snap, error, i, hlen;
3169         struct ether_header *eh1, eh2;
3170         struct ip *ip;
3171         struct llc llc1;
3172         u_int16_t ether_type;
3173
3174         snap = 0;
3175         error = -1;     /* Default error if not error == 0 */
3176
3177         if (pfil_bridge == 0 && pfil_member == 0)
3178                 return (0); /* filtering is disabled */
3179
3180         i = min((*mp)->m_pkthdr.len, max_protohdr);
3181         if ((*mp)->m_len < i) {
3182                 *mp = m_pullup(*mp, i);
3183                 if (*mp == NULL) {
3184                         kprintf("%s: m_pullup failed\n", __func__);
3185                         return (-1);
3186                 }
3187         }
3188
3189         eh1 = mtod(*mp, struct ether_header *);
3190         ether_type = ntohs(eh1->ether_type);
3191
3192         /*
3193          * Check for SNAP/LLC.
3194          */
3195         if (ether_type < ETHERMTU) {
3196                 struct llc *llc2 = (struct llc *)(eh1 + 1);
3197
3198                 if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
3199                     llc2->llc_dsap == LLC_SNAP_LSAP &&
3200                     llc2->llc_ssap == LLC_SNAP_LSAP &&
3201                     llc2->llc_control == LLC_UI) {
3202                         ether_type = htons(llc2->llc_un.type_snap.ether_type);
3203                         snap = 1;
3204                 }
3205         }
3206
3207         /*
3208          * If we're trying to filter bridge traffic, don't look at anything
3209          * other than IP and ARP traffic.  If the filter doesn't understand
3210          * IPv6, don't allow IPv6 through the bridge either.  This is lame
3211          * since if we really wanted, say, an AppleTalk filter, we are hosed,
3212          * but of course we don't have an AppleTalk filter to begin with.
3213          * (Note that since pfil doesn't understand ARP it will pass *ALL*
3214          * ARP traffic.)
3215          */
3216         switch (ether_type) {
3217         case ETHERTYPE_ARP:
3218         case ETHERTYPE_REVARP:
3219                 return (0); /* Automatically pass */
3220
3221         case ETHERTYPE_IP:
3222 #ifdef INET6
3223         case ETHERTYPE_IPV6:
3224 #endif /* INET6 */
3225                 break;
3226
3227         default:
3228                 /*
3229                  * Check to see if the user wants to pass non-ip
3230                  * packets, these will not be checked by pfil(9)
3231                  * and passed unconditionally so the default is to drop.
3232                  */
3233                 if (pfil_onlyip)
3234                         goto bad;
3235         }
3236
3237         /* Strip off the Ethernet header and keep a copy. */
3238         m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t) &eh2);
3239         m_adj(*mp, ETHER_HDR_LEN);
3240
3241         /* Strip off snap header, if present */
3242         if (snap) {
3243                 m_copydata(*mp, 0, sizeof(struct llc), (caddr_t) &llc1);
3244                 m_adj(*mp, sizeof(struct llc));
3245         }
3246
3247         /*
3248          * Check the IP header for alignment and errors
3249          */
3250         if (dir == PFIL_IN) {
3251                 switch (ether_type) {
3252                 case ETHERTYPE_IP:
3253                         error = bridge_ip_checkbasic(mp);
3254                         break;
3255 #ifdef INET6
3256                 case ETHERTYPE_IPV6:
3257                         error = bridge_ip6_checkbasic(mp);
3258                         break;
3259 #endif /* INET6 */
3260                 default:
3261                         error = 0;
3262                 }
3263                 if (error)
3264                         goto bad;
3265         }
3266
3267         error = 0;
3268
3269         /*
3270          * Run the packet through pfil
3271          */
3272         switch (ether_type) {
3273         case ETHERTYPE_IP:
3274                 /*
3275                  * before calling the firewall, swap fields the same as
3276                  * IP does. here we assume the header is contiguous
3277                  */
3278                 ip = mtod(*mp, struct ip *);
3279
3280                 ip->ip_len = ntohs(ip->ip_len);
3281                 ip->ip_off = ntohs(ip->ip_off);
3282
3283                 /*
3284                  * Run pfil on the member interface and the bridge, both can
3285                  * be skipped by clearing pfil_member or pfil_bridge.
3286                  *
3287                  * Keep the order:
3288                  *   in_if -> bridge_if -> out_if
3289                  */
3290                 if (pfil_bridge && dir == PFIL_OUT && bifp != NULL) {
3291                         error = pfil_run_hooks(&inet_pfil_hook, mp, bifp, dir);
3292                         if (*mp == NULL || error != 0) /* filter may consume */
3293                                 break;
3294                         error = bridge_post_pfil(*mp);
3295                         if (error)
3296                                 break;
3297                 }
3298
3299                 if (pfil_member && ifp != NULL) {
3300                         error = pfil_run_hooks(&inet_pfil_hook, mp, ifp, dir);
3301                         if (*mp == NULL || error != 0) /* filter may consume */
3302                                 break;
3303                         error = bridge_post_pfil(*mp);
3304                         if (error)
3305                                 break;
3306                 }
3307
3308                 if (pfil_bridge && dir == PFIL_IN && bifp != NULL) {
3309                         error = pfil_run_hooks(&inet_pfil_hook, mp, bifp, dir);
3310                         if (*mp == NULL || error != 0) /* filter may consume */
3311                                 break;
3312                         error = bridge_post_pfil(*mp);
3313                         if (error)
3314                                 break;
3315                 }
3316
3317                 /* check if we need to fragment the packet */
3318                 if (pfil_member && ifp != NULL && dir == PFIL_OUT) {
3319                         i = (*mp)->m_pkthdr.len;
3320                         if (i > ifp->if_mtu) {
3321                                 error = bridge_fragment(ifp, *mp, &eh2, snap,
3322                                             &llc1);
3323                                 return (error);
3324                         }
3325                 }
3326
3327                 /* Recalculate the ip checksum and restore byte ordering */
3328                 ip = mtod(*mp, struct ip *);
3329                 hlen = ip->ip_hl << 2;
3330                 if (hlen < sizeof(struct ip))
3331                         goto bad;
3332                 if (hlen > (*mp)->m_len) {
3333                         if ((*mp = m_pullup(*mp, hlen)) == 0)
3334                                 goto bad;
3335                         ip = mtod(*mp, struct ip *);
3336                         if (ip == NULL)
3337                                 goto bad;
3338                 }
3339                 ip->ip_len = htons(ip->ip_len);
3340                 ip->ip_off = htons(ip->ip_off);
3341                 ip->ip_sum = 0;
3342                 if (hlen == sizeof(struct ip))
3343                         ip->ip_sum = in_cksum_hdr(ip);
3344                 else
3345                         ip->ip_sum = in_cksum(*mp, hlen);
3346
3347                 break;
3348 #ifdef INET6
3349         case ETHERTYPE_IPV6:
3350                 if (pfil_bridge && dir == PFIL_OUT && bifp != NULL)
3351                         error = pfil_run_hooks(&inet6_pfil_hook, mp, bifp,
3352                                         dir);
3353
3354                 if (*mp == NULL || error != 0) /* filter may consume */
3355                         break;
3356
3357                 if (pfil_member && ifp != NULL)
3358                         error = pfil_run_hooks(&inet6_pfil_hook, mp, ifp,
3359                                         dir);
3360
3361                 if (*mp == NULL || error != 0) /* filter may consume */
3362                         break;
3363
3364                 if (pfil_bridge && dir == PFIL_IN && bifp != NULL)
3365                         error = pfil_run_hooks(&inet6_pfil_hook, mp, bifp,
3366                                         dir);
3367                 break;
3368 #endif
3369         default:
3370                 error = 0;
3371                 break;
3372         }
3373
3374         if (*mp == NULL)
3375                 return (error);
3376         if (error != 0)
3377                 goto bad;
3378
3379         error = -1;
3380
3381         /*
3382          * Finally, put everything back the way it was and return
3383          */
3384         if (snap) {
3385                 M_PREPEND(*mp, sizeof(struct llc), MB_DONTWAIT);
3386                 if (*mp == NULL)
3387                         return (error);
3388                 bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
3389         }
3390
3391         M_PREPEND(*mp, ETHER_HDR_LEN, MB_DONTWAIT);
3392         if (*mp == NULL)
3393                 return (error);
3394         bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
3395
3396         return (0);
3397
3398 bad:
3399         m_freem(*mp);
3400         *mp = NULL;
3401         return (error);
3402 }
3403
3404 /*
3405  * Perform basic checks on header size since
3406  * pfil assumes ip_input has already processed
3407  * it for it.  Cut-and-pasted from ip_input.c.
3408  * Given how simple the IPv6 version is,
3409  * does the IPv4 version really need to be
3410  * this complicated?
3411  *
3412  * XXX Should we update ipstat here, or not?
3413  * XXX Right now we update ipstat but not
3414  * XXX csum_counter.
3415  */
3416 static int
3417 bridge_ip_checkbasic(struct mbuf **mp)
3418 {
3419         struct mbuf *m = *mp;
3420         struct ip *ip;
3421         int len, hlen;
3422         u_short sum;
3423
3424         if (*mp == NULL)
3425                 return (-1);
3426 #if notyet
3427         if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
3428                 if ((m = m_copyup(m, sizeof(struct ip),
3429                         (max_linkhdr + 3) & ~3)) == NULL) {
3430                         /* XXXJRT new stat, please */
3431                         ipstat.ips_toosmall++;
3432                         goto bad;
3433                 }
3434         } else
3435 #endif
3436 #ifndef __predict_false
3437 #define __predict_false(x) x
3438 #endif
3439          if (__predict_false(m->m_len < sizeof (struct ip))) {
3440                 if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
3441                         ipstat.ips_toosmall++;
3442                         goto bad;
3443                 }
3444         }
3445         ip = mtod(m, struct ip *);
3446         if (ip == NULL) goto bad;
3447
3448         if (ip->ip_v != IPVERSION) {
3449                 ipstat.ips_badvers++;
3450                 goto bad;
3451         }
3452         hlen = ip->ip_hl << 2;
3453         if (hlen < sizeof(struct ip)) { /* minimum header length */
3454                 ipstat.ips_badhlen++;
3455                 goto bad;
3456         }
3457         if (hlen > m->m_len) {
3458                 if ((m = m_pullup(m, hlen)) == 0) {
3459                         ipstat.ips_badhlen++;
3460                         goto bad;
3461                 }
3462                 ip = mtod(m, struct ip *);
3463                 if (ip == NULL) goto bad;
3464         }
3465
3466         if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
3467                 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
3468         } else {
3469                 if (hlen == sizeof(struct ip)) {
3470                         sum = in_cksum_hdr(ip);
3471                 } else {
3472                         sum = in_cksum(m, hlen);
3473                 }
3474         }
3475         if (sum) {
3476                 ipstat.ips_badsum++;
3477                 goto bad;
3478         }
3479
3480         /* Retrieve the packet length. */
3481         len = ntohs(ip->ip_len);
3482
3483         /*
3484          * Check for additional length bogosity
3485          */
3486         if (len < hlen) {
3487                 ipstat.ips_badlen++;
3488                 goto bad;
3489         }
3490
3491         /*
3492          * Check that the amount of data in the buffers
3493          * is as at least much as the IP header would have us expect.
3494          * Drop packet if shorter than we expect.
3495          */
3496         if (m->m_pkthdr.len < len) {
3497                 ipstat.ips_tooshort++;
3498                 goto bad;
3499         }
3500
3501         /* Checks out, proceed */
3502         *mp = m;
3503         return (0);
3504
3505 bad:
3506         *mp = m;
3507         return (-1);
3508 }
3509
3510 #ifdef INET6
3511 /*
3512  * Same as above, but for IPv6.
3513  * Cut-and-pasted from ip6_input.c.
3514  * XXX Should we update ip6stat, or not?
3515  */
3516 static int
3517 bridge_ip6_checkbasic(struct mbuf **mp)
3518 {
3519         struct mbuf *m = *mp;
3520         struct ip6_hdr *ip6;
3521
3522         /*
3523          * If the IPv6 header is not aligned, slurp it up into a new
3524          * mbuf with space for link headers, in the event we forward
3525          * it.  Otherwise, if it is aligned, make sure the entire base
3526          * IPv6 header is in the first mbuf of the chain.
3527          */
3528 #if notyet
3529         if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
3530                 struct ifnet *inifp = m->m_pkthdr.rcvif;
3531                 if ((m = m_copyup(m, sizeof(struct ip6_hdr),
3532                             (max_linkhdr + 3) & ~3)) == NULL) {
3533                         /* XXXJRT new stat, please */
3534                         ip6stat.ip6s_toosmall++;
3535                         in6_ifstat_inc(inifp, ifs6_in_hdrerr);
3536                         goto bad;
3537                 }
3538         } else
3539 #endif
3540         if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
3541                 struct ifnet *inifp = m->m_pkthdr.rcvif;
3542                 if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
3543                         ip6stat.ip6s_toosmall++;
3544                         in6_ifstat_inc(inifp, ifs6_in_hdrerr);
3545                         goto bad;
3546                 }
3547         }
3548
3549         ip6 = mtod(m, struct ip6_hdr *);
3550
3551         if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
3552                 ip6stat.ip6s_badvers++;
3553                 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
3554                 goto bad;
3555         }
3556
3557         /* Checks out, proceed */
3558         *mp = m;
3559         return (0);
3560
3561 bad:
3562         *mp = m;
3563         return (-1);
3564 }
3565 #endif /* INET6 */
3566
3567 /*
3568  * bridge_fragment:
3569  *
3570  *      Return a fragmented mbuf chain.
3571  */
3572 static int
3573 bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
3574     int snap, struct llc *llc)
3575 {
3576         struct mbuf *m0;
3577         struct ip *ip;
3578         int error = -1;
3579
3580         if (m->m_len < sizeof(struct ip) &&
3581             (m = m_pullup(m, sizeof(struct ip))) == NULL)
3582                 goto out;
3583         ip = mtod(m, struct ip *);
3584
3585         error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist,
3586                     CSUM_DELAY_IP);
3587         if (error)
3588                 goto out;
3589
3590         /* walk the chain and re-add the Ethernet header */
3591         for (m0 = m; m0; m0 = m0->m_nextpkt) {
3592                 if (error == 0) {
3593                         if (snap) {
3594                                 M_PREPEND(m0, sizeof(struct llc), MB_DONTWAIT);
3595                                 if (m0 == NULL) {
3596                                         error = ENOBUFS;
3597                                         continue;
3598                                 }
3599                                 bcopy(llc, mtod(m0, caddr_t),
3600                                     sizeof(struct llc));
3601                         }
3602                         M_PREPEND(m0, ETHER_HDR_LEN, MB_DONTWAIT);
3603                         if (m0 == NULL) {
3604                                 error = ENOBUFS;
3605                                 continue;
3606                         }
3607                         bcopy(eh, mtod(m0, caddr_t), ETHER_HDR_LEN);
3608                 } else 
3609                         m_freem(m);
3610         }
3611
3612         if (error == 0)
3613                 ipstat.ips_fragmented++;
3614
3615         return (error);
3616
3617 out:
3618         if (m != NULL)
3619                 m_freem(m);
3620         return (error);
3621 }
3622
3623 static void
3624 bridge_enqueue_handler(struct netmsg *nmsg)
3625 {
3626         struct netmsg_packet *nmp;
3627         struct ifnet *dst_ifp;
3628         struct mbuf *m;
3629
3630         nmp = (struct netmsg_packet *)nmsg;
3631         m = nmp->nm_packet;
3632         dst_ifp = nmp->nm_netmsg.nm_lmsg.u.ms_resultp;
3633
3634         bridge_handoff(dst_ifp, m);
3635 }
3636
3637 static void
3638 bridge_handoff(struct ifnet *dst_ifp, struct mbuf *m)
3639 {
3640         struct mbuf *m0;
3641
3642         /* We may be sending a fragment so traverse the mbuf */
3643         for (; m; m = m0) {
3644                 struct altq_pktattr pktattr;
3645
3646                 m0 = m->m_nextpkt;
3647                 m->m_nextpkt = NULL;
3648
3649                 if (ifq_is_enabled(&dst_ifp->if_snd))
3650                         altq_etherclassify(&dst_ifp->if_snd, m, &pktattr);
3651
3652                 ifq_dispatch(dst_ifp, m, &pktattr);
3653         }
3654 }
3655
3656 static void
3657 bridge_control_dispatch(struct netmsg *nmsg)
3658 {
3659         struct netmsg_brctl *bc_msg = (struct netmsg_brctl *)nmsg;
3660         struct ifnet *bifp = bc_msg->bc_sc->sc_ifp;
3661         int error;
3662
3663         lwkt_serialize_enter(bifp->if_serializer);
3664         error = bc_msg->bc_func(bc_msg->bc_sc, bc_msg->bc_arg);
3665         lwkt_serialize_exit(bifp->if_serializer);
3666
3667         lwkt_replymsg(&nmsg->nm_lmsg, error);
3668 }
3669
3670 static int
3671 bridge_control(struct bridge_softc *sc, u_long cmd,
3672                bridge_ctl_t bc_func, void *bc_arg)
3673 {
3674         struct ifnet *bifp = sc->sc_ifp;
3675         struct netmsg_brctl bc_msg;
3676         struct netmsg *nmsg;
3677         int error;
3678
3679         ASSERT_SERIALIZED(bifp->if_serializer);
3680
3681         bzero(&bc_msg, sizeof(bc_msg));
3682         nmsg = &bc_msg.bc_nmsg;
3683
3684         netmsg_init(nmsg, &curthread->td_msgport, 0, bridge_control_dispatch);
3685         bc_msg.bc_func = bc_func;
3686         bc_msg.bc_sc = sc;
3687         bc_msg.bc_arg = bc_arg;
3688
3689         lwkt_serialize_exit(bifp->if_serializer);
3690         error = lwkt_domsg(BRIDGE_CFGPORT, &nmsg->nm_lmsg, 0);
3691         lwkt_serialize_enter(bifp->if_serializer);
3692         return error;
3693 }
3694
3695 static void
3696 bridge_add_bif_handler(struct netmsg *nmsg)
3697 {
3698         struct netmsg_braddbif *amsg = (struct netmsg_braddbif *)nmsg;
3699         struct bridge_softc *sc;
3700         struct bridge_iflist *bif;
3701
3702         sc = amsg->br_softc;
3703
3704         bif = kmalloc(sizeof(*bif), M_DEVBUF, M_WAITOK | M_ZERO);
3705         bif->bif_ifp = amsg->br_bif_ifp;
3706         bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
3707         bif->bif_onlist = 1;
3708         bif->bif_info = amsg->br_bif_info;
3709
3710         LIST_INSERT_HEAD(&sc->sc_iflists[mycpuid], bif, bif_next);
3711
3712         ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1);
3713 }
3714
3715 static void
3716 bridge_add_bif(struct bridge_softc *sc, struct bridge_ifinfo *bif_info,
3717                struct ifnet *ifp)
3718 {
3719         struct netmsg_braddbif amsg;
3720
3721         ASSERT_NOT_SERIALIZED(sc->sc_ifp->if_serializer);
3722
3723         netmsg_init(&amsg.br_nmsg, &curthread->td_msgport, 0,
3724                     bridge_add_bif_handler);
3725         amsg.br_softc = sc;
3726         amsg.br_bif_info = bif_info;
3727         amsg.br_bif_ifp = ifp;
3728
3729         ifnet_domsg(&amsg.br_nmsg.nm_lmsg, 0);
3730 }
3731
3732 static void
3733 bridge_del_bif_handler(struct netmsg *nmsg)
3734 {
3735         struct netmsg_brdelbif *dmsg = (struct netmsg_brdelbif *)nmsg;
3736         struct bridge_softc *sc;
3737         struct bridge_iflist *bif;
3738
3739         sc = dmsg->br_softc;
3740
3741         /*
3742          * Locate the bif associated with the br_bif_info
3743          * on the current CPU
3744          */
3745         bif = bridge_lookup_member_ifinfo(sc, dmsg->br_bif_info);
3746         KKASSERT(bif != NULL && bif->bif_onlist);
3747
3748         /* Remove the bif from the current CPU's iflist */
3749         bif->bif_onlist = 0;
3750         LIST_REMOVE(bif, bif_next);
3751
3752         /* Save the removed bif for later freeing */
3753         LIST_INSERT_HEAD(dmsg->br_bif_list, bif, bif_next);
3754
3755         ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1);
3756 }
3757
3758 static void
3759 bridge_del_bif(struct bridge_softc *sc, struct bridge_ifinfo *bif_info,
3760                struct bridge_iflist_head *saved_bifs)
3761 {
3762         struct netmsg_brdelbif dmsg;
3763
3764         ASSERT_NOT_SERIALIZED(sc->sc_ifp->if_serializer);
3765
3766         netmsg_init(&dmsg.br_nmsg, &curthread->td_msgport, 0,
3767                     bridge_del_bif_handler);
3768         dmsg.br_softc = sc;
3769         dmsg.br_bif_info = bif_info;
3770         dmsg.br_bif_list = saved_bifs;
3771
3772         ifnet_domsg(&dmsg.br_nmsg.nm_lmsg, 0);
3773 }
3774
3775 static void
3776 bridge_set_bifflags_handler(struct netmsg *nmsg)
3777 {
3778         struct netmsg_brsflags *smsg = (struct netmsg_brsflags *)nmsg;
3779         struct bridge_softc *sc;
3780         struct bridge_iflist *bif;
3781
3782         sc = smsg->br_softc;
3783
3784         /*
3785          * Locate the bif associated with the br_bif_info
3786          * on the current CPU
3787          */
3788         bif = bridge_lookup_member_ifinfo(sc, smsg->br_bif_info);
3789         KKASSERT(bif != NULL && bif->bif_onlist);
3790
3791         bif->bif_flags = smsg->br_bif_flags;
3792
3793         ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1);
3794 }
3795
3796 static void
3797 bridge_set_bifflags(struct bridge_softc *sc, struct bridge_ifinfo *bif_info,
3798                     uint32_t bif_flags)
3799 {
3800         struct netmsg_brsflags smsg;
3801
3802         ASSERT_NOT_SERIALIZED(sc->sc_ifp->if_serializer);
3803
3804         netmsg_init(&smsg.br_nmsg, &curthread->td_msgport, 0,
3805                     bridge_set_bifflags_handler);
3806         smsg.br_softc = sc;
3807         smsg.br_bif_info = bif_info;
3808         smsg.br_bif_flags = bif_flags;
3809
3810         ifnet_domsg(&smsg.br_nmsg.nm_lmsg, 0);
3811 }