b67771582fa519bda0e6f621d4491689b319bc79
[dragonfly.git] / sys / net / bridge / if_bridge.c
1 /*
2  * Copyright 2001 Wasabi Systems, Inc.
3  * All rights reserved.
4  *
5  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *      This product includes software developed for the NetBSD Project by
18  *      Wasabi Systems, Inc.
19  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
20  *    or promote products derived from this software without specific prior
21  *    written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
27  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /*
37  * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
38  * All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. All advertising materials mentioning features or use of this software
49  *    must display the following acknowledgement:
50  *      This product includes software developed by Jason L. Wright
51  * 4. The name of the author may not be used to endorse or promote products
52  *    derived from this software without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
55  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
56  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
57  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
58  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
59  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
60  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
62  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
63  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
64  * POSSIBILITY OF SUCH DAMAGE.
65  *
66  * $OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp $
67  * $NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $
68  * $FreeBSD: src/sys/net/if_bridge.c,v 1.26 2005/10/13 23:05:55 thompsa Exp $
69  * $DragonFly: src/sys/net/bridge/if_bridge.c,v 1.60 2008/11/26 12:49:43 sephe Exp $
70  */
71
72 /*
73  * Network interface bridge support.
74  *
75  * TODO:
76  *
77  *      - Currently only supports Ethernet-like interfaces (Ethernet,
78  *        802.11, VLANs on Ethernet, etc.)  Figure out a nice way
79  *        to bridge other types of interfaces (FDDI-FDDI, and maybe
80  *        consider heterogenous bridges).
81  *
82  *
83  * Bridge's route information is duplicated to each CPUs:
84  *
85  *      CPU0          CPU1          CPU2          CPU3
86  * +-----------+ +-----------+ +-----------+ +-----------+
87  * |  rtnode   | |  rtnode   | |  rtnode   | |  rtnode   |
88  * |           | |           | |           | |           |
89  * | dst eaddr | | dst eaddr | | dst eaddr | | dst eaddr |
90  * +-----------+ +-----------+ +-----------+ +-----------+
91  *       |         |                     |         |
92  *       |         |                     |         |
93  *       |         |     +----------+    |         |
94  *       |         |     |  rtinfo  |    |         |
95  *       |         +---->|          |<---+         |
96  *       |               |  flags   |              |
97  *       +-------------->|  timeout |<-------------+
98  *                       |  dst_ifp |
99  *                       +----------+
100  *
101  * We choose to put timeout and dst_ifp into shared part, so updating
102  * them will be cheaper than using message forwarding.  Also there is
103  * not need to use spinlock to protect the updating: timeout and dst_ifp
104  * is not related and specific field's updating order has no importance.
105  * The cache pollution by the share part should not be heavy: in a stable
106  * setup, dst_ifp probably will be not changed in rtnode's life time,
107  * while timeout is refreshed once per second; most of the time, timeout
108  * and dst_ifp are read-only accessed.
109  *
110  *
111  * Bridge route information installation on bridge_input path:
112  *
113  *      CPU0           CPU1         CPU2          CPU3
114  *
115  *                               tcp_thread2
116  *                                    |
117  *                                alloc nmsg
118  *                    snd nmsg        |
119  *                    w/o rtinfo      |
120  *      ifnet0<-----------------------+
121  *        |                           :
122  *    lookup dst                      :
123  *   rtnode exists?(Y)free nmsg       :
124  *        |(N)                        :
125  *        |
126  *  alloc rtinfo
127  *  alloc rtnode
128  * install rtnode
129  *        |
130  *        +---------->ifnet1
131  *        : fwd nmsg    |
132  *        : w/ rtinfo   |
133  *        :             |
134  *        :             |
135  *                 alloc rtnode
136  *               (w/ nmsg's rtinfo)
137  *                install rtnode
138  *                      |
139  *                      +---------->ifnet2
140  *                      : fwd nmsg    |
141  *                      : w/ rtinfo   |
142  *                      :             |
143  *                      :         same as ifnet1
144  *                                    |
145  *                                    +---------->ifnet3
146  *                                    : fwd nmsg    |
147  *                                    : w/ rtinfo   |
148  *                                    :             |
149  *                                    :         same as ifnet1
150  *                                               free nmsg
151  *                                                  :
152  *                                                  :
153  *
154  * The netmsgs forwarded between protocol threads and ifnet threads are
155  * allocated with (M_WAITOK|M_NULLOK), so it will not fail under most
156  * cases (route information is too precious to be not installed :).
157  * Since multiple threads may try to install route information for the
158  * same dst eaddr, we look up route information in ifnet0.  However, this
159  * looking up only need to be performed on ifnet0, which is the start
160  * point of the route information installation process.
161  *
162  *
163  * Bridge route information deleting/flushing:
164  *
165  *  CPU0            CPU1             CPU2             CPU3
166  *
167  * netisr0
168  *   |
169  * find suitable rtnodes,
170  * mark their rtinfo dead
171  *   |
172  *   | domsg <------------------------------------------+
173  *   |                                                  | replymsg
174  *   |                                                  |
175  *   V     fwdmsg           fwdmsg           fwdmsg     |
176  * ifnet0 --------> ifnet1 --------> ifnet2 --------> ifnet3
177  * delete rtnodes   delete rtnodes   delete rtnodes   delete rtnodes
178  * w/ dead rtinfo   w/ dead rtinfo   w/ dead rtinfo   w/ dead rtinfo
179  *                                                    free dead rtinfos
180  *
181  * All deleting/flushing operations are serialized by netisr0, so each
182  * operation only reaps the route information marked dead by itself.
183  *
184  *
185  * Bridge route information adding/deleting/flushing:
186  * Since all operation is serialized by the fixed message flow between
187  * ifnet threads, it is not possible to create corrupted per-cpu route
188  * information.
189  *
190  *
191  *
192  * Percpu member interface list iteration with blocking operation:
193  * Since one bridge could only delete one member interface at a time and
194  * the deleted member interface is not freed after netmsg_service_sync(),
195  * following way is used to make sure that even if the certain member
196  * interface is ripped from the percpu list during the blocking operation,
197  * the iteration still could keep going:
198  *
199  * LIST_FOREACH_MUTABLE(bif, sc->sc_iflists[mycpuid], bif_next, nbif) {
200  *     blocking operation;
201  *     blocking operation;
202  *     ...
203  *     ...
204  *     if (nbif != NULL && !nbif->bif_onlist) {
205  *         KKASSERT(bif->bif_onlist);
206  *         nbif = LIST_NEXT(bif, bif_next);
207  *     }
208  * }
209  *
210  * As mentioned above only one member interface could be unlinked from the
211  * percpu member interface list, so either bif or nbif may be not on the list,
212  * but _not_ both.  To keep the list iteration, we don't care about bif, but
213  * only nbif.  Since removed member interface will only be freed after we
214  * finish our work, it is safe to access any field in an unlinked bif (here
215  * bif_onlist).  If nbif is no longer on the list, then bif must be on the
216  * list, so we change nbif to the next element of bif and keep going.
217  */
218
219 #include "opt_inet.h"
220 #include "opt_inet6.h"
221
222 #include <sys/param.h>
223 #include <sys/mbuf.h>
224 #include <sys/malloc.h>
225 #include <sys/protosw.h>
226 #include <sys/systm.h>
227 #include <sys/time.h>
228 #include <sys/socket.h> /* for net/if.h */
229 #include <sys/sockio.h>
230 #include <sys/ctype.h>  /* string functions */
231 #include <sys/kernel.h>
232 #include <sys/random.h>
233 #include <sys/sysctl.h>
234 #include <sys/module.h>
235 #include <sys/proc.h>
236 #include <sys/priv.h>
237 #include <sys/lock.h>
238 #include <sys/thread.h>
239 #include <sys/thread2.h>
240 #include <sys/mpipe.h>
241
242 #include <net/bpf.h>
243 #include <net/if.h>
244 #include <net/if_dl.h>
245 #include <net/if_types.h>
246 #include <net/if_var.h>
247 #include <net/pfil.h>
248 #include <net/ifq_var.h>
249 #include <net/if_clone.h>
250
251 #include <netinet/in.h> /* for struct arpcom */
252 #include <netinet/in_systm.h>
253 #include <netinet/in_var.h>
254 #include <netinet/ip.h>
255 #include <netinet/ip_var.h>
256 #ifdef INET6
257 #include <netinet/ip6.h>
258 #include <netinet6/ip6_var.h>
259 #endif
260 #include <netinet/if_ether.h> /* for struct arpcom */
261 #include <net/bridge/if_bridgevar.h>
262 #include <net/if_llc.h>
263 #include <net/netmsg2.h>
264
265 #include <net/route.h>
266 #include <sys/in_cksum.h>
267
268 /*
269  * Size of the route hash table.  Must be a power of two.
270  */
271 #ifndef BRIDGE_RTHASH_SIZE
272 #define BRIDGE_RTHASH_SIZE              1024
273 #endif
274
275 #define BRIDGE_RTHASH_MASK              (BRIDGE_RTHASH_SIZE - 1)
276
277 /*
278  * Maximum number of addresses to cache.
279  */
280 #ifndef BRIDGE_RTABLE_MAX
281 #define BRIDGE_RTABLE_MAX               100
282 #endif
283
284 /*
285  * Spanning tree defaults.
286  */
287 #define BSTP_DEFAULT_MAX_AGE            (20 * 256)
288 #define BSTP_DEFAULT_HELLO_TIME         (2 * 256)
289 #define BSTP_DEFAULT_FORWARD_DELAY      (15 * 256)
290 #define BSTP_DEFAULT_HOLD_TIME          (1 * 256)
291 #define BSTP_DEFAULT_BRIDGE_PRIORITY    0x8000
292 #define BSTP_DEFAULT_PORT_PRIORITY      0x80
293 #define BSTP_DEFAULT_PATH_COST          55
294
295 /*
296  * Timeout (in seconds) for entries learned dynamically.
297  */
298 #ifndef BRIDGE_RTABLE_TIMEOUT
299 #define BRIDGE_RTABLE_TIMEOUT           (20 * 60)       /* same as ARP */
300 #endif
301
302 /*
303  * Number of seconds between walks of the route list.
304  */
305 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
306 #define BRIDGE_RTABLE_PRUNE_PERIOD      (5 * 60)
307 #endif
308
309 /*
310  * List of capabilities to mask on the member interface.
311  */
312 #define BRIDGE_IFCAPS_MASK              IFCAP_TXCSUM
313
314 typedef int     (*bridge_ctl_t)(struct bridge_softc *, void *);
315
316 struct netmsg_brctl {
317         struct netmsg           bc_nmsg;
318         bridge_ctl_t            bc_func;
319         struct bridge_softc     *bc_sc;
320         void                    *bc_arg;
321 };
322
323 struct netmsg_brsaddr {
324         struct netmsg           br_nmsg;
325         struct bridge_softc     *br_softc;
326         struct ifnet            *br_dst_if;
327         struct bridge_rtinfo    *br_rtinfo;
328         int                     br_setflags;
329         uint8_t                 br_dst[ETHER_ADDR_LEN];
330         uint8_t                 br_flags;
331 };
332
333 struct netmsg_braddbif {
334         struct netmsg           br_nmsg;
335         struct bridge_softc     *br_softc;
336         struct bridge_ifinfo    *br_bif_info;
337         struct ifnet            *br_bif_ifp;
338 };
339
340 struct netmsg_brdelbif {
341         struct netmsg           br_nmsg;
342         struct bridge_softc     *br_softc;
343         struct bridge_ifinfo    *br_bif_info;
344         struct bridge_iflist_head *br_bif_list;
345 };
346
347 struct netmsg_brsflags {
348         struct netmsg           br_nmsg;
349         struct bridge_softc     *br_softc;
350         struct bridge_ifinfo    *br_bif_info;
351         uint32_t                br_bif_flags;
352 };
353
354 eventhandler_tag        bridge_detach_cookie = NULL;
355
356 extern  struct mbuf *(*bridge_input_p)(struct ifnet *, struct mbuf *);
357 extern  int (*bridge_output_p)(struct ifnet *, struct mbuf *);
358 extern  void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
359
360 static int      bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
361
362 static int      bridge_clone_create(struct if_clone *, int);
363 static void     bridge_clone_destroy(struct ifnet *);
364
365 static int      bridge_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
366 static void     bridge_mutecaps(struct bridge_ifinfo *, struct ifnet *, int);
367 static void     bridge_ifdetach(void *, struct ifnet *);
368 static void     bridge_init(void *);
369 static void     bridge_stop(struct ifnet *);
370 static void     bridge_start(struct ifnet *);
371 static struct mbuf *bridge_input(struct ifnet *, struct mbuf *);
372 static int      bridge_output(struct ifnet *, struct mbuf *);
373
374 static void     bridge_forward(struct bridge_softc *, struct mbuf *m);
375
376 static void     bridge_timer_handler(struct netmsg *);
377 static void     bridge_timer(void *);
378
379 static void     bridge_start_bcast(struct bridge_softc *, struct mbuf *);
380 static void     bridge_broadcast(struct bridge_softc *, struct ifnet *,
381                     struct mbuf *);
382 static void     bridge_span(struct bridge_softc *, struct mbuf *);
383
384 static int      bridge_rtupdate(struct bridge_softc *, const uint8_t *,
385                     struct ifnet *, uint8_t);
386 static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *);
387 static void     bridge_rtreap(struct bridge_softc *);
388 static void     bridge_rtreap_async(struct bridge_softc *);
389 static void     bridge_rttrim(struct bridge_softc *);
390 static int      bridge_rtage_finddead(struct bridge_softc *);
391 static void     bridge_rtage(struct bridge_softc *);
392 static void     bridge_rtflush(struct bridge_softc *, int);
393 static int      bridge_rtdaddr(struct bridge_softc *, const uint8_t *);
394 static int      bridge_rtsaddr(struct bridge_softc *, const uint8_t *,
395                     struct ifnet *, uint8_t);
396 static void     bridge_rtmsg_sync(struct bridge_softc *sc);
397 static void     bridge_rtreap_handler(struct netmsg *);
398 static void     bridge_rtinstall_handler(struct netmsg *);
399 static int      bridge_rtinstall_oncpu(struct bridge_softc *, const uint8_t *,
400                     struct ifnet *, int, uint8_t, struct bridge_rtinfo **);
401
402 static void     bridge_rtable_init(struct bridge_softc *);
403 static void     bridge_rtable_fini(struct bridge_softc *);
404
405 static int      bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
406 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
407                     const uint8_t *);
408 static void     bridge_rtnode_insert(struct bridge_softc *,
409                     struct bridge_rtnode *);
410 static void     bridge_rtnode_destroy(struct bridge_softc *,
411                     struct bridge_rtnode *);
412
413 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
414                     const char *name);
415 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
416                     struct ifnet *ifp);
417 static struct bridge_iflist *bridge_lookup_member_ifinfo(struct bridge_softc *,
418                     struct bridge_ifinfo *);
419 static void     bridge_delete_member(struct bridge_softc *,
420                     struct bridge_iflist *, int);
421 static void     bridge_delete_span(struct bridge_softc *,
422                     struct bridge_iflist *);
423
424 static int      bridge_control(struct bridge_softc *, u_long,
425                                bridge_ctl_t, void *);
426 static int      bridge_ioctl_init(struct bridge_softc *, void *);
427 static int      bridge_ioctl_stop(struct bridge_softc *, void *);
428 static int      bridge_ioctl_add(struct bridge_softc *, void *);
429 static int      bridge_ioctl_del(struct bridge_softc *, void *);
430 static int      bridge_ioctl_gifflags(struct bridge_softc *, void *);
431 static int      bridge_ioctl_sifflags(struct bridge_softc *, void *);
432 static int      bridge_ioctl_scache(struct bridge_softc *, void *);
433 static int      bridge_ioctl_gcache(struct bridge_softc *, void *);
434 static int      bridge_ioctl_gifs(struct bridge_softc *, void *);
435 static int      bridge_ioctl_rts(struct bridge_softc *, void *);
436 static int      bridge_ioctl_saddr(struct bridge_softc *, void *);
437 static int      bridge_ioctl_sto(struct bridge_softc *, void *);
438 static int      bridge_ioctl_gto(struct bridge_softc *, void *);
439 static int      bridge_ioctl_daddr(struct bridge_softc *, void *);
440 static int      bridge_ioctl_flush(struct bridge_softc *, void *);
441 static int      bridge_ioctl_gpri(struct bridge_softc *, void *);
442 static int      bridge_ioctl_spri(struct bridge_softc *, void *);
443 static int      bridge_ioctl_ght(struct bridge_softc *, void *);
444 static int      bridge_ioctl_sht(struct bridge_softc *, void *);
445 static int      bridge_ioctl_gfd(struct bridge_softc *, void *);
446 static int      bridge_ioctl_sfd(struct bridge_softc *, void *);
447 static int      bridge_ioctl_gma(struct bridge_softc *, void *);
448 static int      bridge_ioctl_sma(struct bridge_softc *, void *);
449 static int      bridge_ioctl_sifprio(struct bridge_softc *, void *);
450 static int      bridge_ioctl_sifcost(struct bridge_softc *, void *);
451 static int      bridge_ioctl_addspan(struct bridge_softc *, void *);
452 static int      bridge_ioctl_delspan(struct bridge_softc *, void *);
453 static int      bridge_pfil(struct mbuf **, struct ifnet *, struct ifnet *,
454                     int);
455 static int      bridge_ip_checkbasic(struct mbuf **mp);
456 #ifdef INET6
457 static int      bridge_ip6_checkbasic(struct mbuf **mp);
458 #endif /* INET6 */
459 static int      bridge_fragment(struct ifnet *, struct mbuf *,
460                     struct ether_header *, int, struct llc *);
461 static void     bridge_enqueue_handler(struct netmsg *);
462 static void     bridge_handoff(struct ifnet *, struct mbuf *);
463
464 static void     bridge_del_bif_handler(struct netmsg *);
465 static void     bridge_add_bif_handler(struct netmsg *);
466 static void     bridge_set_bifflags_handler(struct netmsg *);
467 static void     bridge_del_bif(struct bridge_softc *, struct bridge_ifinfo *,
468                     struct bridge_iflist_head *);
469 static void     bridge_add_bif(struct bridge_softc *, struct bridge_ifinfo *,
470                     struct ifnet *);
471 static void     bridge_set_bifflags(struct bridge_softc *,
472                     struct bridge_ifinfo *, uint32_t);
473
474 SYSCTL_DECL(_net_link);
475 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW, 0, "Bridge");
476
477 static int pfil_onlyip = 1; /* only pass IP[46] packets when pfil is enabled */
478 static int pfil_bridge = 1; /* run pfil hooks on the bridge interface */
479 static int pfil_member = 1; /* run pfil hooks on the member interface */
480 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip, CTLFLAG_RW,
481     &pfil_onlyip, 0, "Only pass IP packets when pfil is enabled");
482 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge, CTLFLAG_RW,
483     &pfil_bridge, 0, "Packet filter on the bridge interface");
484 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member, CTLFLAG_RW,
485     &pfil_member, 0, "Packet filter on the member interface");
486
487 struct bridge_control_arg {
488         union {
489                 struct ifbreq ifbreq;
490                 struct ifbifconf ifbifconf;
491                 struct ifbareq ifbareq;
492                 struct ifbaconf ifbaconf;
493                 struct ifbrparam ifbrparam;
494         } bca_u;
495         int     bca_len;
496         void    *bca_uptr;
497         void    *bca_kptr;
498 };
499
500 struct bridge_control {
501         bridge_ctl_t    bc_func;
502         int             bc_argsize;
503         int             bc_flags;
504 };
505
506 #define BC_F_COPYIN             0x01    /* copy arguments in */
507 #define BC_F_COPYOUT            0x02    /* copy arguments out */
508 #define BC_F_SUSER              0x04    /* do super-user check */
509
510 const struct bridge_control bridge_control_table[] = {
511         { bridge_ioctl_add,             sizeof(struct ifbreq),
512           BC_F_COPYIN|BC_F_SUSER },
513         { bridge_ioctl_del,             sizeof(struct ifbreq),
514           BC_F_COPYIN|BC_F_SUSER },
515
516         { bridge_ioctl_gifflags,        sizeof(struct ifbreq),
517           BC_F_COPYIN|BC_F_COPYOUT },
518         { bridge_ioctl_sifflags,        sizeof(struct ifbreq),
519           BC_F_COPYIN|BC_F_SUSER },
520
521         { bridge_ioctl_scache,          sizeof(struct ifbrparam),
522           BC_F_COPYIN|BC_F_SUSER },
523         { bridge_ioctl_gcache,          sizeof(struct ifbrparam),
524           BC_F_COPYOUT },
525
526         { bridge_ioctl_gifs,            sizeof(struct ifbifconf),
527           BC_F_COPYIN|BC_F_COPYOUT },
528         { bridge_ioctl_rts,             sizeof(struct ifbaconf),
529           BC_F_COPYIN|BC_F_COPYOUT },
530
531         { bridge_ioctl_saddr,           sizeof(struct ifbareq),
532           BC_F_COPYIN|BC_F_SUSER },
533
534         { bridge_ioctl_sto,             sizeof(struct ifbrparam),
535           BC_F_COPYIN|BC_F_SUSER },
536         { bridge_ioctl_gto,             sizeof(struct ifbrparam),
537           BC_F_COPYOUT },
538
539         { bridge_ioctl_daddr,           sizeof(struct ifbareq),
540           BC_F_COPYIN|BC_F_SUSER },
541
542         { bridge_ioctl_flush,           sizeof(struct ifbreq),
543           BC_F_COPYIN|BC_F_SUSER },
544
545         { bridge_ioctl_gpri,            sizeof(struct ifbrparam),
546           BC_F_COPYOUT },
547         { bridge_ioctl_spri,            sizeof(struct ifbrparam),
548           BC_F_COPYIN|BC_F_SUSER },
549
550         { bridge_ioctl_ght,             sizeof(struct ifbrparam),
551           BC_F_COPYOUT },
552         { bridge_ioctl_sht,             sizeof(struct ifbrparam),
553           BC_F_COPYIN|BC_F_SUSER },
554
555         { bridge_ioctl_gfd,             sizeof(struct ifbrparam),
556           BC_F_COPYOUT },
557         { bridge_ioctl_sfd,             sizeof(struct ifbrparam),
558           BC_F_COPYIN|BC_F_SUSER },
559
560         { bridge_ioctl_gma,             sizeof(struct ifbrparam),
561           BC_F_COPYOUT },
562         { bridge_ioctl_sma,             sizeof(struct ifbrparam),
563           BC_F_COPYIN|BC_F_SUSER },
564
565         { bridge_ioctl_sifprio,         sizeof(struct ifbreq),
566           BC_F_COPYIN|BC_F_SUSER },
567
568         { bridge_ioctl_sifcost,         sizeof(struct ifbreq),
569           BC_F_COPYIN|BC_F_SUSER },
570
571         { bridge_ioctl_addspan,         sizeof(struct ifbreq),
572           BC_F_COPYIN|BC_F_SUSER },
573         { bridge_ioctl_delspan,         sizeof(struct ifbreq),
574           BC_F_COPYIN|BC_F_SUSER },
575 };
576 static const int bridge_control_table_size =
577     sizeof(bridge_control_table) / sizeof(bridge_control_table[0]);
578
579 LIST_HEAD(, bridge_softc) bridge_list;
580
581 struct if_clone bridge_cloner = IF_CLONE_INITIALIZER("bridge",
582                                 bridge_clone_create,
583                                 bridge_clone_destroy, 0, IF_MAXUNIT);
584
585 static int
586 bridge_modevent(module_t mod, int type, void *data)
587 {
588         switch (type) {
589         case MOD_LOAD:
590                 LIST_INIT(&bridge_list);
591                 if_clone_attach(&bridge_cloner);
592                 bridge_input_p = bridge_input;
593                 bridge_output_p = bridge_output;
594                 bridge_detach_cookie = EVENTHANDLER_REGISTER(
595                     ifnet_detach_event, bridge_ifdetach, NULL,
596                     EVENTHANDLER_PRI_ANY);
597 #if notyet
598                 bstp_linkstate_p = bstp_linkstate;
599 #endif
600                 break;
601         case MOD_UNLOAD:
602                 if (!LIST_EMPTY(&bridge_list))
603                         return (EBUSY);
604                 EVENTHANDLER_DEREGISTER(ifnet_detach_event,
605                     bridge_detach_cookie);
606                 if_clone_detach(&bridge_cloner);
607                 bridge_input_p = NULL;
608                 bridge_output_p = NULL;
609 #if notyet
610                 bstp_linkstate_p = NULL;
611 #endif
612                 break;
613         default:
614                 return (EOPNOTSUPP);
615         }
616         return (0);
617 }
618
619 static moduledata_t bridge_mod = {
620         "if_bridge",
621         bridge_modevent,
622         0
623 };
624
625 DECLARE_MODULE(if_bridge, bridge_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
626
627
628 /*
629  * bridge_clone_create:
630  *
631  *      Create a new bridge instance.
632  */
633 static int
634 bridge_clone_create(struct if_clone *ifc, int unit)
635 {
636         struct bridge_softc *sc;
637         struct ifnet *ifp;
638         u_char eaddr[6];
639         int cpu, rnd;
640
641         sc = kmalloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
642         ifp = sc->sc_ifp = &sc->sc_if;
643
644         sc->sc_brtmax = BRIDGE_RTABLE_MAX;
645         sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
646         sc->sc_bridge_max_age = BSTP_DEFAULT_MAX_AGE;
647         sc->sc_bridge_hello_time = BSTP_DEFAULT_HELLO_TIME;
648         sc->sc_bridge_forward_delay = BSTP_DEFAULT_FORWARD_DELAY;
649         sc->sc_bridge_priority = BSTP_DEFAULT_BRIDGE_PRIORITY;
650         sc->sc_hold_time = BSTP_DEFAULT_HOLD_TIME;
651
652         /* Initialize our routing table. */
653         bridge_rtable_init(sc);
654
655         callout_init(&sc->sc_brcallout);
656         netmsg_init(&sc->sc_brtimemsg, &netisr_adone_rport,
657                     MSGF_DROPABLE, bridge_timer_handler);
658         sc->sc_brtimemsg.nm_lmsg.u.ms_resultp = sc;
659
660         callout_init(&sc->sc_bstpcallout);
661         netmsg_init(&sc->sc_bstptimemsg, &netisr_adone_rport,
662                     MSGF_DROPABLE, bstp_tick_handler);
663         sc->sc_bstptimemsg.nm_lmsg.u.ms_resultp = sc;
664
665         /* Initialize per-cpu member iface lists */
666         sc->sc_iflists = kmalloc(sizeof(*sc->sc_iflists) * ncpus,
667                                  M_DEVBUF, M_WAITOK);
668         for (cpu = 0; cpu < ncpus; ++cpu)
669                 LIST_INIT(&sc->sc_iflists[cpu]);
670
671         LIST_INIT(&sc->sc_spanlist);
672
673         ifp->if_softc = sc;
674         if_initname(ifp, ifc->ifc_name, unit);
675         ifp->if_mtu = ETHERMTU;
676         ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST;
677         ifp->if_ioctl = bridge_ioctl;
678         ifp->if_start = bridge_start;
679         ifp->if_init = bridge_init;
680         ifp->if_type = IFT_BRIDGE;
681         ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
682         ifq_set_ready(&ifp->if_snd);
683         ifp->if_hdrlen = ETHER_HDR_LEN;
684
685         /*
686          * Generate a random ethernet address and use the private AC:DE:48
687          * OUI code.
688          */
689         rnd = karc4random();
690         bcopy(&rnd, &eaddr[0], 4); /* ETHER_ADDR_LEN == 6 */
691         rnd = karc4random();
692         bcopy(&rnd, &eaddr[2], 4); /* ETHER_ADDR_LEN == 6 */
693
694         eaddr[0] &= ~1; /* clear multicast bit */
695         eaddr[0] |= 2;  /* set the LAA bit */
696
697         ether_ifattach(ifp, eaddr, NULL);
698         /* Now undo some of the damage... */
699         ifp->if_baudrate = 0;
700         ifp->if_type = IFT_BRIDGE;
701
702         crit_enter();   /* XXX MP */
703         LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
704         crit_exit();
705
706         return (0);
707 }
708
709 static void
710 bridge_delete_dispatch(struct netmsg *nmsg)
711 {
712         struct lwkt_msg *lmsg = &nmsg->nm_lmsg;
713         struct bridge_softc *sc = lmsg->u.ms_resultp;
714         struct ifnet *bifp = sc->sc_ifp;
715         struct bridge_iflist *bif;
716
717         ifnet_serialize_all(bifp);
718
719         while ((bif = LIST_FIRST(&sc->sc_iflists[mycpuid])) != NULL)
720                 bridge_delete_member(sc, bif, 0);
721
722         while ((bif = LIST_FIRST(&sc->sc_spanlist)) != NULL)
723                 bridge_delete_span(sc, bif);
724
725         ifnet_deserialize_all(bifp);
726
727         lwkt_replymsg(lmsg, 0);
728 }
729
730 /*
731  * bridge_clone_destroy:
732  *
733  *      Destroy a bridge instance.
734  */
735 static void
736 bridge_clone_destroy(struct ifnet *ifp)
737 {
738         struct bridge_softc *sc = ifp->if_softc;
739         struct lwkt_msg *lmsg;
740         struct netmsg nmsg;
741
742         ifnet_serialize_all(ifp);
743
744         bridge_stop(ifp);
745         ifp->if_flags &= ~IFF_UP;
746
747         ifnet_deserialize_all(ifp);
748
749         netmsg_init(&nmsg, &curthread->td_msgport, 0, bridge_delete_dispatch);
750         lmsg = &nmsg.nm_lmsg;
751         lmsg->u.ms_resultp = sc;
752         lwkt_domsg(BRIDGE_CFGPORT, lmsg, 0);
753
754         crit_enter();   /* XXX MP */
755         LIST_REMOVE(sc, sc_list);
756         crit_exit();
757
758         ether_ifdetach(ifp);
759
760         /* Tear down the routing table. */
761         bridge_rtable_fini(sc);
762
763         /* Free per-cpu member iface lists */
764         kfree(sc->sc_iflists, M_DEVBUF);
765
766         kfree(sc, M_DEVBUF);
767 }
768
769 /*
770  * bridge_ioctl:
771  *
772  *      Handle a control request from the operator.
773  */
774 static int
775 bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
776 {
777         struct bridge_softc *sc = ifp->if_softc;
778         struct bridge_control_arg args;
779         struct ifdrv *ifd = (struct ifdrv *) data;
780         const struct bridge_control *bc;
781         int error = 0;
782
783         ASSERT_IFNET_SERIALIZED_ALL(ifp);
784
785         switch (cmd) {
786         case SIOCADDMULTI:
787         case SIOCDELMULTI:
788                 break;
789
790         case SIOCGDRVSPEC:
791         case SIOCSDRVSPEC:
792                 if (ifd->ifd_cmd >= bridge_control_table_size) {
793                         error = EINVAL;
794                         break;
795                 }
796                 bc = &bridge_control_table[ifd->ifd_cmd];
797
798                 if (cmd == SIOCGDRVSPEC &&
799                     (bc->bc_flags & BC_F_COPYOUT) == 0) {
800                         error = EINVAL;
801                         break;
802                 } else if (cmd == SIOCSDRVSPEC &&
803                            (bc->bc_flags & BC_F_COPYOUT)) {
804                         error = EINVAL;
805                         break;
806                 }
807
808                 if (bc->bc_flags & BC_F_SUSER) {
809                         error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY);
810                         if (error)
811                                 break;
812                 }
813
814                 if (ifd->ifd_len != bc->bc_argsize ||
815                     ifd->ifd_len > sizeof(args.bca_u)) {
816                         error = EINVAL;
817                         break;
818                 }
819
820                 memset(&args, 0, sizeof(args));
821                 if (bc->bc_flags & BC_F_COPYIN) {
822                         error = copyin(ifd->ifd_data, &args.bca_u,
823                                        ifd->ifd_len);
824                         if (error)
825                                 break;
826                 }
827
828                 error = bridge_control(sc, cmd, bc->bc_func, &args);
829                 if (error) {
830                         KKASSERT(args.bca_len == 0 && args.bca_kptr == NULL);
831                         break;
832                 }
833
834                 if (bc->bc_flags & BC_F_COPYOUT) {
835                         error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
836                         if (args.bca_len != 0) {
837                                 KKASSERT(args.bca_kptr != NULL);
838                                 if (!error) {
839                                         error = copyout(args.bca_kptr,
840                                                 args.bca_uptr, args.bca_len);
841                                 }
842                                 kfree(args.bca_kptr, M_TEMP);
843                         } else {
844                                 KKASSERT(args.bca_kptr == NULL);
845                         }
846                 } else {
847                         KKASSERT(args.bca_len == 0 && args.bca_kptr == NULL);
848                 }
849                 break;
850
851         case SIOCSIFFLAGS:
852                 if (!(ifp->if_flags & IFF_UP) &&
853                     (ifp->if_flags & IFF_RUNNING)) {
854                         /*
855                          * If interface is marked down and it is running,
856                          * then stop it.
857                          */
858                         bridge_stop(ifp);
859                 } else if ((ifp->if_flags & IFF_UP) &&
860                     !(ifp->if_flags & IFF_RUNNING)) {
861                         /*
862                          * If interface is marked up and it is stopped, then
863                          * start it.
864                          */
865                         ifp->if_init(sc);
866                 }
867                 break;
868
869         case SIOCSIFMTU:
870                 /* Do not allow the MTU to be changed on the bridge */
871                 error = EINVAL;
872                 break;
873
874         default:
875                 error = ether_ioctl(ifp, cmd, data);
876                 break;
877         }
878         return (error);
879 }
880
881 /*
882  * bridge_mutecaps:
883  *
884  *      Clear or restore unwanted capabilities on the member interface
885  */
886 static void
887 bridge_mutecaps(struct bridge_ifinfo *bif_info, struct ifnet *ifp, int mute)
888 {
889         struct ifreq ifr;
890         int error;
891
892         if (ifp->if_ioctl == NULL)
893                 return;
894
895         bzero(&ifr, sizeof(ifr));
896         ifr.ifr_reqcap = ifp->if_capenable;
897
898         if (mute) {
899                 /* mask off and save capabilities */
900                 bif_info->bifi_mutecap = ifr.ifr_reqcap & BRIDGE_IFCAPS_MASK;
901                 if (bif_info->bifi_mutecap != 0)
902                         ifr.ifr_reqcap &= ~BRIDGE_IFCAPS_MASK;
903         } else {
904                 /* restore muted capabilities */
905                 ifr.ifr_reqcap |= bif_info->bifi_mutecap;
906         }
907
908         if (bif_info->bifi_mutecap != 0) {
909                 ifnet_serialize_all(ifp);
910                 error = ifp->if_ioctl(ifp, SIOCSIFCAP, (caddr_t)&ifr, NULL);
911                 ifnet_deserialize_all(ifp);
912         }
913 }
914
915 /*
916  * bridge_lookup_member:
917  *
918  *      Lookup a bridge member interface.
919  */
920 static struct bridge_iflist *
921 bridge_lookup_member(struct bridge_softc *sc, const char *name)
922 {
923         struct bridge_iflist *bif;
924
925         LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
926                 if (strcmp(bif->bif_ifp->if_xname, name) == 0)
927                         return (bif);
928         }
929         return (NULL);
930 }
931
932 /*
933  * bridge_lookup_member_if:
934  *
935  *      Lookup a bridge member interface by ifnet*.
936  */
937 static struct bridge_iflist *
938 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
939 {
940         struct bridge_iflist *bif;
941
942         LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
943                 if (bif->bif_ifp == member_ifp)
944                         return (bif);
945         }
946         return (NULL);
947 }
948
949 /*
950  * bridge_lookup_member_ifinfo:
951  *
952  *      Lookup a bridge member interface by bridge_ifinfo.
953  */
954 static struct bridge_iflist *
955 bridge_lookup_member_ifinfo(struct bridge_softc *sc,
956                             struct bridge_ifinfo *bif_info)
957 {
958         struct bridge_iflist *bif;
959
960         LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
961                 if (bif->bif_info == bif_info)
962                         return (bif);
963         }
964         return (NULL);
965 }
966
967 /*
968  * bridge_delete_member:
969  *
970  *      Delete the specified member interface.
971  */
972 static void
973 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
974     int gone)
975 {
976         struct ifnet *ifs = bif->bif_ifp;
977         struct ifnet *bifp = sc->sc_ifp;
978         struct bridge_ifinfo *bif_info = bif->bif_info;
979         struct bridge_iflist_head saved_bifs;
980
981         ASSERT_IFNET_SERIALIZED_ALL(bifp);
982         KKASSERT(bif_info != NULL);
983
984         ifs->if_bridge = NULL;
985
986         /*
987          * Release bridge interface's serializer:
988          * - To avoid possible dead lock.
989          * - Various sync operation will block the current thread.
990          */
991         ifnet_deserialize_all(bifp);
992
993         if (!gone) {
994                 switch (ifs->if_type) {
995                 case IFT_ETHER:
996                 case IFT_L2VLAN:
997                         /*
998                          * Take the interface out of promiscuous mode.
999                          */
1000                         ifpromisc(ifs, 0);
1001                         bridge_mutecaps(bif_info, ifs, 0);
1002                         break;
1003
1004                 case IFT_GIF:
1005                         break;
1006
1007                 default:
1008                         panic("bridge_delete_member: impossible");
1009                         break;
1010                 }
1011         }
1012
1013         /*
1014          * Remove bifs from percpu linked list.
1015          *
1016          * Removed bifs are not freed immediately, instead,
1017          * they are saved in saved_bifs.  They will be freed
1018          * after we make sure that no one is accessing them,
1019          * i.e. after following netmsg_service_sync()
1020          */
1021         LIST_INIT(&saved_bifs);
1022         bridge_del_bif(sc, bif_info, &saved_bifs);
1023
1024         /*
1025          * Make sure that all protocol threads:
1026          * o  see 'ifs' if_bridge is changed
1027          * o  know that bif is removed from the percpu linked list
1028          */
1029         netmsg_service_sync();
1030
1031         /*
1032          * Free the removed bifs
1033          */
1034         KKASSERT(!LIST_EMPTY(&saved_bifs));
1035         while ((bif = LIST_FIRST(&saved_bifs)) != NULL) {
1036                 LIST_REMOVE(bif, bif_next);
1037                 kfree(bif, M_DEVBUF);
1038         }
1039
1040         /* See the comment in bridge_ioctl_stop() */
1041         bridge_rtmsg_sync(sc);
1042         bridge_rtdelete(sc, ifs, IFBF_FLUSHALL | IFBF_FLUSHSYNC);
1043
1044         ifnet_serialize_all(bifp);
1045
1046         if (bifp->if_flags & IFF_RUNNING)
1047                 bstp_initialization(sc);
1048
1049         /*
1050          * Free the bif_info after bstp_initialization(), so that
1051          * bridge_softc.sc_root_port will not reference a dangling
1052          * pointer.
1053          */
1054         kfree(bif_info, M_DEVBUF);
1055 }
1056
1057 /*
1058  * bridge_delete_span:
1059  *
1060  *      Delete the specified span interface.
1061  */
1062 static void
1063 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
1064 {
1065         KASSERT(bif->bif_ifp->if_bridge == NULL,
1066             ("%s: not a span interface", __func__));
1067
1068         LIST_REMOVE(bif, bif_next);
1069         kfree(bif, M_DEVBUF);
1070 }
1071
1072 static int
1073 bridge_ioctl_init(struct bridge_softc *sc, void *arg __unused)
1074 {
1075         struct ifnet *ifp = sc->sc_ifp;
1076
1077         if (ifp->if_flags & IFF_RUNNING)
1078                 return 0;
1079
1080         callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz,
1081             bridge_timer, sc);
1082
1083         ifp->if_flags |= IFF_RUNNING;
1084         bstp_initialization(sc);
1085         return 0;
1086 }
1087
1088 static int
1089 bridge_ioctl_stop(struct bridge_softc *sc, void *arg __unused)
1090 {
1091         struct ifnet *ifp = sc->sc_ifp;
1092         struct lwkt_msg *lmsg;
1093
1094         if ((ifp->if_flags & IFF_RUNNING) == 0)
1095                 return 0;
1096
1097         callout_stop(&sc->sc_brcallout);
1098
1099         crit_enter();
1100         lmsg = &sc->sc_brtimemsg.nm_lmsg;
1101         if ((lmsg->ms_flags & MSGF_DONE) == 0) {
1102                 /* Pending to be processed; drop it */
1103                 lwkt_dropmsg(lmsg);
1104         }
1105         crit_exit();
1106
1107         bstp_stop(sc);
1108
1109         ifp->if_flags &= ~IFF_RUNNING;
1110
1111         ifnet_deserialize_all(ifp);
1112
1113         /* Let everyone know that we are stopped */
1114         netmsg_service_sync();
1115
1116         /*
1117          * Sync ifnetX msgports in the order we forward rtnode
1118          * installation message.  This is used to make sure that
1119          * all rtnode installation messages sent by bridge_rtupdate()
1120          * during above netmsg_service_sync() are flushed.
1121          */
1122         bridge_rtmsg_sync(sc);
1123         bridge_rtflush(sc, IFBF_FLUSHDYN | IFBF_FLUSHSYNC);
1124
1125         ifnet_serialize_all(ifp);
1126         return 0;
1127 }
1128
1129 static int
1130 bridge_ioctl_add(struct bridge_softc *sc, void *arg)
1131 {
1132         struct ifbreq *req = arg;
1133         struct bridge_iflist *bif;
1134         struct bridge_ifinfo *bif_info;
1135         struct ifnet *ifs, *bifp;
1136         int error = 0;
1137
1138         bifp = sc->sc_ifp;
1139         ASSERT_IFNET_SERIALIZED_ALL(bifp);
1140
1141         ifs = ifunit(req->ifbr_ifsname);
1142         if (ifs == NULL)
1143                 return (ENOENT);
1144
1145         /* If it's in the span list, it can't be a member. */
1146         LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
1147                 if (ifs == bif->bif_ifp)
1148                         return (EBUSY);
1149
1150         /* Allow the first Ethernet member to define the MTU */
1151         if (ifs->if_type != IFT_GIF) {
1152                 if (LIST_EMPTY(&sc->sc_iflists[mycpuid])) {
1153                         bifp->if_mtu = ifs->if_mtu;
1154                 } else if (bifp->if_mtu != ifs->if_mtu) {
1155                         if_printf(bifp, "invalid MTU for %s\n", ifs->if_xname);
1156                         return (EINVAL);
1157                 }
1158         }
1159
1160         if (ifs->if_bridge == sc)
1161                 return (EEXIST);
1162
1163         if (ifs->if_bridge != NULL)
1164                 return (EBUSY);
1165
1166         bif_info = kmalloc(sizeof(*bif_info), M_DEVBUF, M_WAITOK | M_ZERO);
1167         bif_info->bifi_priority = BSTP_DEFAULT_PORT_PRIORITY;
1168         bif_info->bifi_path_cost = BSTP_DEFAULT_PATH_COST;
1169         bif_info->bifi_ifp = ifs;
1170
1171         /*
1172          * Release bridge interface's serializer:
1173          * - To avoid possible dead lock.
1174          * - Various sync operation will block the current thread.
1175          */
1176         ifnet_deserialize_all(bifp);
1177
1178         switch (ifs->if_type) {
1179         case IFT_ETHER:
1180         case IFT_L2VLAN:
1181                 /*
1182                  * Place the interface into promiscuous mode.
1183                  */
1184                 error = ifpromisc(ifs, 1);
1185                 if (error) {
1186                         ifnet_serialize_all(bifp);
1187                         goto out;
1188                 }
1189                 bridge_mutecaps(bif_info, ifs, 1);
1190                 break;
1191
1192         case IFT_GIF: /* :^) */
1193                 break;
1194
1195         default:
1196                 error = EINVAL;
1197                 ifnet_serialize_all(bifp);
1198                 goto out;
1199         }
1200
1201         /*
1202          * Add bifs to percpu linked lists
1203          */
1204         bridge_add_bif(sc, bif_info, ifs);
1205
1206         ifnet_serialize_all(bifp);
1207
1208         if (bifp->if_flags & IFF_RUNNING)
1209                 bstp_initialization(sc);
1210         else
1211                 bstp_stop(sc);
1212
1213         /*
1214          * Everything has been setup, so let the member interface
1215          * deliver packets to this bridge on its input/output path.
1216          */
1217         ifs->if_bridge = sc;
1218 out:
1219         if (error) {
1220                 if (bif_info != NULL)
1221                         kfree(bif_info, M_DEVBUF);
1222         }
1223         return (error);
1224 }
1225
1226 static int
1227 bridge_ioctl_del(struct bridge_softc *sc, void *arg)
1228 {
1229         struct ifbreq *req = arg;
1230         struct bridge_iflist *bif;
1231
1232         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1233         if (bif == NULL)
1234                 return (ENOENT);
1235
1236         bridge_delete_member(sc, bif, 0);
1237
1238         return (0);
1239 }
1240
1241 static int
1242 bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
1243 {
1244         struct ifbreq *req = arg;
1245         struct bridge_iflist *bif;
1246
1247         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1248         if (bif == NULL)
1249                 return (ENOENT);
1250
1251         req->ifbr_ifsflags = bif->bif_flags;
1252         req->ifbr_state = bif->bif_state;
1253         req->ifbr_priority = bif->bif_priority;
1254         req->ifbr_path_cost = bif->bif_path_cost;
1255         req->ifbr_portno = bif->bif_ifp->if_index & 0xff;
1256
1257         return (0);
1258 }
1259
1260 static int
1261 bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
1262 {
1263         struct ifbreq *req = arg;
1264         struct bridge_iflist *bif;
1265         struct ifnet *bifp = sc->sc_ifp;
1266
1267         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1268         if (bif == NULL)
1269                 return (ENOENT);
1270
1271         if (req->ifbr_ifsflags & IFBIF_SPAN) {
1272                 /* SPAN is readonly */
1273                 return (EINVAL);
1274         }
1275
1276         if (req->ifbr_ifsflags & IFBIF_STP) {
1277                 switch (bif->bif_ifp->if_type) {
1278                 case IFT_ETHER:
1279                         /* These can do spanning tree. */
1280                         break;
1281
1282                 default:
1283                         /* Nothing else can. */
1284                         return (EINVAL);
1285                 }
1286         }
1287
1288         ifnet_deserialize_all(bifp);
1289         bridge_set_bifflags(sc, bif->bif_info, req->ifbr_ifsflags);
1290         ifnet_serialize_all(bifp);
1291
1292         if (bifp->if_flags & IFF_RUNNING)
1293                 bstp_initialization(sc);
1294
1295         return (0);
1296 }
1297
1298 static int
1299 bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
1300 {
1301         struct ifbrparam *param = arg;
1302         struct ifnet *ifp = sc->sc_ifp;
1303
1304         sc->sc_brtmax = param->ifbrp_csize;
1305
1306         ifnet_deserialize_all(ifp);
1307         bridge_rttrim(sc);
1308         ifnet_serialize_all(ifp);
1309
1310         return (0);
1311 }
1312
1313 static int
1314 bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
1315 {
1316         struct ifbrparam *param = arg;
1317
1318         param->ifbrp_csize = sc->sc_brtmax;
1319
1320         return (0);
1321 }
1322
1323 static int
1324 bridge_ioctl_gifs(struct bridge_softc *sc, void *arg)
1325 {
1326         struct bridge_control_arg *bc_arg = arg;
1327         struct ifbifconf *bifc = arg;
1328         struct bridge_iflist *bif;
1329         struct ifbreq *breq;
1330         int count, len;
1331
1332         count = 0;
1333         LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next)
1334                 count++;
1335         LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
1336                 count++;
1337
1338         if (bifc->ifbic_len == 0) {
1339                 bifc->ifbic_len = sizeof(*breq) * count;
1340                 return 0;
1341         } else if (count == 0 || bifc->ifbic_len < sizeof(*breq)) {
1342                 bifc->ifbic_len = 0;
1343                 return 0;
1344         }
1345
1346         len = min(bifc->ifbic_len, sizeof(*breq) * count);
1347         KKASSERT(len >= sizeof(*breq));
1348
1349         breq = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO);
1350         if (breq == NULL) {
1351                 bifc->ifbic_len = 0;
1352                 return ENOMEM;
1353         }
1354         bc_arg->bca_kptr = breq;
1355
1356         count = 0;
1357         LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
1358                 if (len < sizeof(*breq))
1359                         break;
1360
1361                 strlcpy(breq->ifbr_ifsname, bif->bif_ifp->if_xname,
1362                         sizeof(breq->ifbr_ifsname));
1363                 breq->ifbr_ifsflags = bif->bif_flags;
1364                 breq->ifbr_state = bif->bif_state;
1365                 breq->ifbr_priority = bif->bif_priority;
1366                 breq->ifbr_path_cost = bif->bif_path_cost;
1367                 breq->ifbr_portno = bif->bif_ifp->if_index & 0xff;
1368                 breq++;
1369                 count++;
1370                 len -= sizeof(*breq);
1371         }
1372         LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) {
1373                 if (len < sizeof(*breq))
1374                         break;
1375
1376                 strlcpy(breq->ifbr_ifsname, bif->bif_ifp->if_xname,
1377                         sizeof(breq->ifbr_ifsname));
1378                 breq->ifbr_ifsflags = bif->bif_flags;
1379                 breq->ifbr_portno = bif->bif_ifp->if_index & 0xff;
1380                 breq++;
1381                 count++;
1382                 len -= sizeof(*breq);
1383         }
1384
1385         bifc->ifbic_len = sizeof(*breq) * count;
1386         KKASSERT(bifc->ifbic_len > 0);
1387
1388         bc_arg->bca_len = bifc->ifbic_len;
1389         bc_arg->bca_uptr = bifc->ifbic_req;
1390         return 0;
1391 }
1392
1393 static int
1394 bridge_ioctl_rts(struct bridge_softc *sc, void *arg)
1395 {
1396         struct bridge_control_arg *bc_arg = arg;
1397         struct ifbaconf *bac = arg;
1398         struct bridge_rtnode *brt;
1399         struct ifbareq *bareq;
1400         int count, len;
1401
1402         count = 0;
1403         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list)
1404                 count++;
1405
1406         if (bac->ifbac_len == 0) {
1407                 bac->ifbac_len = sizeof(*bareq) * count;
1408                 return 0;
1409         } else if (count == 0 || bac->ifbac_len < sizeof(*bareq)) {
1410                 bac->ifbac_len = 0;
1411                 return 0;
1412         }
1413
1414         len = min(bac->ifbac_len, sizeof(*bareq) * count);
1415         KKASSERT(len >= sizeof(*bareq));
1416
1417         bareq = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO);
1418         if (bareq == NULL) {
1419                 bac->ifbac_len = 0;
1420                 return ENOMEM;
1421         }
1422         bc_arg->bca_kptr = bareq;
1423
1424         count = 0;
1425         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
1426                 struct bridge_rtinfo *bri = brt->brt_info;
1427                 unsigned long expire;
1428
1429                 if (len < sizeof(*bareq))
1430                         break;
1431
1432                 strlcpy(bareq->ifba_ifsname, bri->bri_ifp->if_xname,
1433                         sizeof(bareq->ifba_ifsname));
1434                 memcpy(bareq->ifba_dst, brt->brt_addr, sizeof(brt->brt_addr));
1435                 expire = bri->bri_expire;
1436                 if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
1437                     time_second < expire)
1438                         bareq->ifba_expire = expire - time_second;
1439                 else
1440                         bareq->ifba_expire = 0;
1441                 bareq->ifba_flags = bri->bri_flags;
1442                 bareq++;
1443                 count++;
1444                 len -= sizeof(*bareq);
1445         }
1446
1447         bac->ifbac_len = sizeof(*bareq) * count;
1448         KKASSERT(bac->ifbac_len > 0);
1449
1450         bc_arg->bca_len = bac->ifbac_len;
1451         bc_arg->bca_uptr = bac->ifbac_req;
1452         return 0;
1453 }
1454
1455 static int
1456 bridge_ioctl_saddr(struct bridge_softc *sc, void *arg)
1457 {
1458         struct ifbareq *req = arg;
1459         struct bridge_iflist *bif;
1460         struct ifnet *ifp = sc->sc_ifp;
1461         int error;
1462
1463         ASSERT_IFNET_SERIALIZED_ALL(ifp);
1464
1465         bif = bridge_lookup_member(sc, req->ifba_ifsname);
1466         if (bif == NULL)
1467                 return (ENOENT);
1468
1469         ifnet_deserialize_all(ifp);
1470         error = bridge_rtsaddr(sc, req->ifba_dst, bif->bif_ifp,
1471                                req->ifba_flags);
1472         ifnet_serialize_all(ifp);
1473         return (error);
1474 }
1475
1476 static int
1477 bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
1478 {
1479         struct ifbrparam *param = arg;
1480
1481         sc->sc_brttimeout = param->ifbrp_ctime;
1482
1483         return (0);
1484 }
1485
1486 static int
1487 bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
1488 {
1489         struct ifbrparam *param = arg;
1490
1491         param->ifbrp_ctime = sc->sc_brttimeout;
1492
1493         return (0);
1494 }
1495
1496 static int
1497 bridge_ioctl_daddr(struct bridge_softc *sc, void *arg)
1498 {
1499         struct ifbareq *req = arg;
1500         struct ifnet *ifp = sc->sc_ifp;
1501         int error;
1502
1503         ifnet_deserialize_all(ifp);
1504         error = bridge_rtdaddr(sc, req->ifba_dst);
1505         ifnet_serialize_all(ifp);
1506         return error;
1507 }
1508
1509 static int
1510 bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
1511 {
1512         struct ifbreq *req = arg;
1513         struct ifnet *ifp = sc->sc_ifp;
1514
1515         ifnet_deserialize_all(ifp);
1516         bridge_rtflush(sc, req->ifbr_ifsflags | IFBF_FLUSHSYNC);
1517         ifnet_serialize_all(ifp);
1518
1519         return (0);
1520 }
1521
1522 static int
1523 bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
1524 {
1525         struct ifbrparam *param = arg;
1526
1527         param->ifbrp_prio = sc->sc_bridge_priority;
1528
1529         return (0);
1530 }
1531
1532 static int
1533 bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
1534 {
1535         struct ifbrparam *param = arg;
1536
1537         sc->sc_bridge_priority = param->ifbrp_prio;
1538
1539         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1540                 bstp_initialization(sc);
1541
1542         return (0);
1543 }
1544
1545 static int
1546 bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
1547 {
1548         struct ifbrparam *param = arg;
1549
1550         param->ifbrp_hellotime = sc->sc_bridge_hello_time >> 8;
1551
1552         return (0);
1553 }
1554
1555 static int
1556 bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
1557 {
1558         struct ifbrparam *param = arg;
1559
1560         if (param->ifbrp_hellotime == 0)
1561                 return (EINVAL);
1562         sc->sc_bridge_hello_time = param->ifbrp_hellotime << 8;
1563
1564         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1565                 bstp_initialization(sc);
1566
1567         return (0);
1568 }
1569
1570 static int
1571 bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
1572 {
1573         struct ifbrparam *param = arg;
1574
1575         param->ifbrp_fwddelay = sc->sc_bridge_forward_delay >> 8;
1576
1577         return (0);
1578 }
1579
1580 static int
1581 bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
1582 {
1583         struct ifbrparam *param = arg;
1584
1585         if (param->ifbrp_fwddelay == 0)
1586                 return (EINVAL);
1587         sc->sc_bridge_forward_delay = param->ifbrp_fwddelay << 8;
1588
1589         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1590                 bstp_initialization(sc);
1591
1592         return (0);
1593 }
1594
1595 static int
1596 bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
1597 {
1598         struct ifbrparam *param = arg;
1599
1600         param->ifbrp_maxage = sc->sc_bridge_max_age >> 8;
1601
1602         return (0);
1603 }
1604
1605 static int
1606 bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
1607 {
1608         struct ifbrparam *param = arg;
1609
1610         if (param->ifbrp_maxage == 0)
1611                 return (EINVAL);
1612         sc->sc_bridge_max_age = param->ifbrp_maxage << 8;
1613
1614         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1615                 bstp_initialization(sc);
1616
1617         return (0);
1618 }
1619
1620 static int
1621 bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
1622 {
1623         struct ifbreq *req = arg;
1624         struct bridge_iflist *bif;
1625
1626         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1627         if (bif == NULL)
1628                 return (ENOENT);
1629
1630         bif->bif_priority = req->ifbr_priority;
1631
1632         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1633                 bstp_initialization(sc);
1634
1635         return (0);
1636 }
1637
1638 static int
1639 bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
1640 {
1641         struct ifbreq *req = arg;
1642         struct bridge_iflist *bif;
1643
1644         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1645         if (bif == NULL)
1646                 return (ENOENT);
1647
1648         bif->bif_path_cost = req->ifbr_path_cost;
1649
1650         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1651                 bstp_initialization(sc);
1652
1653         return (0);
1654 }
1655
1656 static int
1657 bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
1658 {
1659         struct ifbreq *req = arg;
1660         struct bridge_iflist *bif;
1661         struct ifnet *ifs;
1662
1663         ifs = ifunit(req->ifbr_ifsname);
1664         if (ifs == NULL)
1665                 return (ENOENT);
1666
1667         LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
1668                 if (ifs == bif->bif_ifp)
1669                         return (EBUSY);
1670
1671         if (ifs->if_bridge != NULL)
1672                 return (EBUSY);
1673
1674         switch (ifs->if_type) {
1675         case IFT_ETHER:
1676         case IFT_GIF:
1677         case IFT_L2VLAN:
1678                 break;
1679
1680         default:
1681                 return (EINVAL);
1682         }
1683
1684         bif = kmalloc(sizeof(*bif), M_DEVBUF, M_WAITOK | M_ZERO);
1685         bif->bif_ifp = ifs;
1686         bif->bif_flags = IFBIF_SPAN;
1687         /* NOTE: span bif does not need bridge_ifinfo */
1688
1689         LIST_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
1690
1691         sc->sc_span = 1;
1692
1693         return (0);
1694 }
1695
1696 static int
1697 bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
1698 {
1699         struct ifbreq *req = arg;
1700         struct bridge_iflist *bif;
1701         struct ifnet *ifs;
1702
1703         ifs = ifunit(req->ifbr_ifsname);
1704         if (ifs == NULL)
1705                 return (ENOENT);
1706
1707         LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
1708                 if (ifs == bif->bif_ifp)
1709                         break;
1710
1711         if (bif == NULL)
1712                 return (ENOENT);
1713
1714         bridge_delete_span(sc, bif);
1715
1716         if (LIST_EMPTY(&sc->sc_spanlist))
1717                 sc->sc_span = 0;
1718
1719         return (0);
1720 }
1721
1722 static void
1723 bridge_ifdetach_dispatch(struct netmsg *nmsg)
1724 {
1725         struct lwkt_msg *lmsg = &nmsg->nm_lmsg;
1726         struct ifnet *ifp, *bifp;
1727         struct bridge_softc *sc;
1728         struct bridge_iflist *bif;
1729
1730         ifp = lmsg->u.ms_resultp;
1731         sc = ifp->if_bridge;
1732
1733         /* Check if the interface is a bridge member */
1734         if (sc != NULL) {
1735                 bifp = sc->sc_ifp;
1736
1737                 ifnet_serialize_all(bifp);
1738
1739                 bif = bridge_lookup_member_if(sc, ifp);
1740                 if (bif != NULL) {
1741                         bridge_delete_member(sc, bif, 1);
1742                 } else {
1743                         /* XXX Why bif will be NULL? */
1744                 }
1745
1746                 ifnet_deserialize_all(bifp);
1747                 goto reply;
1748         }
1749
1750         crit_enter();   /* XXX MP */
1751
1752         /* Check if the interface is a span port */
1753         LIST_FOREACH(sc, &bridge_list, sc_list) {
1754                 bifp = sc->sc_ifp;
1755
1756                 ifnet_serialize_all(bifp);
1757
1758                 LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
1759                         if (ifp == bif->bif_ifp) {
1760                                 bridge_delete_span(sc, bif);
1761                                 break;
1762                         }
1763
1764                 ifnet_deserialize_all(bifp);
1765         }
1766
1767         crit_exit();
1768
1769 reply:
1770         lwkt_replymsg(lmsg, 0);
1771 }
1772
1773 /*
1774  * bridge_ifdetach:
1775  *
1776  *      Detach an interface from a bridge.  Called when a member
1777  *      interface is detaching.
1778  */
1779 static void
1780 bridge_ifdetach(void *arg __unused, struct ifnet *ifp)
1781 {
1782         struct lwkt_msg *lmsg;
1783         struct netmsg nmsg;
1784
1785         netmsg_init(&nmsg, &curthread->td_msgport, 0, bridge_ifdetach_dispatch);
1786         lmsg = &nmsg.nm_lmsg;
1787         lmsg->u.ms_resultp = ifp;
1788
1789         lwkt_domsg(BRIDGE_CFGPORT, lmsg, 0);
1790 }
1791
1792 /*
1793  * bridge_init:
1794  *
1795  *      Initialize a bridge interface.
1796  */
1797 static void
1798 bridge_init(void *xsc)
1799 {
1800         bridge_control(xsc, SIOCSIFFLAGS, bridge_ioctl_init, NULL);
1801 }
1802
1803 /*
1804  * bridge_stop:
1805  *
1806  *      Stop the bridge interface.
1807  */
1808 static void
1809 bridge_stop(struct ifnet *ifp)
1810 {
1811         bridge_control(ifp->if_softc, SIOCSIFFLAGS, bridge_ioctl_stop, NULL);
1812 }
1813
1814 /*
1815  * bridge_enqueue:
1816  *
1817  *      Enqueue a packet on a bridge member interface.
1818  *
1819  */
1820 void
1821 bridge_enqueue(struct ifnet *dst_ifp, struct mbuf *m)
1822 {
1823         struct netmsg_packet *nmp;
1824
1825         nmp = &m->m_hdr.mh_netmsg;
1826         netmsg_init(&nmp->nm_netmsg, &netisr_apanic_rport, 0,
1827                     bridge_enqueue_handler);
1828         nmp->nm_packet = m;
1829         nmp->nm_netmsg.nm_lmsg.u.ms_resultp = dst_ifp;
1830
1831         lwkt_sendmsg(curnetport, &nmp->nm_netmsg.nm_lmsg);
1832 }
1833
1834 /*
1835  * bridge_output:
1836  *
1837  *      Send output from a bridge member interface.  This
1838  *      performs the bridging function for locally originated
1839  *      packets.
1840  *
1841  *      The mbuf has the Ethernet header already attached.  We must
1842  *      enqueue or free the mbuf before returning.
1843  */
1844 static int
1845 bridge_output(struct ifnet *ifp, struct mbuf *m)
1846 {
1847         struct bridge_softc *sc = ifp->if_bridge;
1848         struct ether_header *eh;
1849         struct ifnet *dst_if, *bifp;
1850
1851         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
1852
1853         /*
1854          * Make sure that we are still a member of a bridge interface.
1855          */
1856         if (sc == NULL) {
1857                 m_freem(m);
1858                 return (0);
1859         }
1860         bifp = sc->sc_ifp;
1861
1862         if (m->m_len < ETHER_HDR_LEN) {
1863                 m = m_pullup(m, ETHER_HDR_LEN);
1864                 if (m == NULL)
1865                         return (0);
1866         }
1867         eh = mtod(m, struct ether_header *);
1868
1869         /*
1870          * If bridge is down, but the original output interface is up,
1871          * go ahead and send out that interface.  Otherwise, the packet
1872          * is dropped below.
1873          */
1874         if ((bifp->if_flags & IFF_RUNNING) == 0) {
1875                 dst_if = ifp;
1876                 goto sendunicast;
1877         }
1878
1879         /*
1880          * If the packet is a multicast, or we don't know a better way to
1881          * get there, send to all interfaces.
1882          */
1883         if (ETHER_IS_MULTICAST(eh->ether_dhost))
1884                 dst_if = NULL;
1885         else
1886                 dst_if = bridge_rtlookup(sc, eh->ether_dhost);
1887         if (dst_if == NULL) {
1888                 struct bridge_iflist *bif, *nbif;
1889                 struct mbuf *mc;
1890                 int used = 0;
1891
1892                 if (sc->sc_span)
1893                         bridge_span(sc, m);
1894
1895                 LIST_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid],
1896                                      bif_next, nbif) {
1897                         dst_if = bif->bif_ifp;
1898                         if ((dst_if->if_flags & IFF_RUNNING) == 0)
1899                                 continue;
1900
1901                         /*
1902                          * If this is not the original output interface,
1903                          * and the interface is participating in spanning
1904                          * tree, make sure the port is in a state that
1905                          * allows forwarding.
1906                          */
1907                         if (dst_if != ifp &&
1908                             (bif->bif_flags & IFBIF_STP) != 0) {
1909                                 switch (bif->bif_state) {
1910                                 case BSTP_IFSTATE_BLOCKING:
1911                                 case BSTP_IFSTATE_LISTENING:
1912                                 case BSTP_IFSTATE_DISABLED:
1913                                         continue;
1914                                 }
1915                         }
1916
1917                         if (LIST_NEXT(bif, bif_next) == NULL) {
1918                                 used = 1;
1919                                 mc = m;
1920                         } else {
1921                                 mc = m_copypacket(m, MB_DONTWAIT);
1922                                 if (mc == NULL) {
1923                                         bifp->if_oerrors++;
1924                                         continue;
1925                                 }
1926                         }
1927                         bridge_handoff(dst_if, mc);
1928
1929                         if (nbif != NULL && !nbif->bif_onlist) {
1930                                 KKASSERT(bif->bif_onlist);
1931                                 nbif = LIST_NEXT(bif, bif_next);
1932                         }
1933                 }
1934                 if (used == 0)
1935                         m_freem(m);
1936                 return (0);
1937         }
1938
1939 sendunicast:
1940         /*
1941          * XXX Spanning tree consideration here?
1942          */
1943         if (sc->sc_span)
1944                 bridge_span(sc, m);
1945         if ((dst_if->if_flags & IFF_RUNNING) == 0)
1946                 m_freem(m);
1947         else
1948                 bridge_handoff(dst_if, m);
1949         return (0);
1950 }
1951
1952 /*
1953  * bridge_start:
1954  *
1955  *      Start output on a bridge.
1956  *
1957  */
1958 static void
1959 bridge_start(struct ifnet *ifp)
1960 {
1961         struct bridge_softc *sc = ifp->if_softc;
1962
1963         ASSERT_IFNET_SERIALIZED_TX(ifp);
1964
1965         ifp->if_flags |= IFF_OACTIVE;
1966         for (;;) {
1967                 struct ifnet *dst_if = NULL;
1968                 struct ether_header *eh;
1969                 struct mbuf *m;
1970
1971                 m = ifq_dequeue(&ifp->if_snd, NULL);
1972                 if (m == NULL)
1973                         break;
1974
1975                 if (m->m_len < sizeof(*eh)) {
1976                         m = m_pullup(m, sizeof(*eh));
1977                         if (m == NULL) {
1978                                 ifp->if_oerrors++;
1979                                 continue;
1980                         }
1981                 }
1982                 eh = mtod(m, struct ether_header *);
1983
1984                 BPF_MTAP(ifp, m);
1985                 ifp->if_opackets++;
1986
1987                 if ((m->m_flags & (M_BCAST|M_MCAST)) == 0)
1988                         dst_if = bridge_rtlookup(sc, eh->ether_dhost);
1989
1990                 if (dst_if == NULL)
1991                         bridge_start_bcast(sc, m);
1992                 else
1993                         bridge_enqueue(dst_if, m);
1994         }
1995         ifp->if_flags &= ~IFF_OACTIVE;
1996 }
1997
1998 /*
1999  * bridge_forward:
2000  *
2001  *      The forwarding function of the bridge.
2002  */
2003 static void
2004 bridge_forward(struct bridge_softc *sc, struct mbuf *m)
2005 {
2006         struct bridge_iflist *bif;
2007         struct ifnet *src_if, *dst_if, *ifp;
2008         struct ether_header *eh;
2009
2010         src_if = m->m_pkthdr.rcvif;
2011         ifp = sc->sc_ifp;
2012
2013         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
2014
2015         ifp->if_ipackets++;
2016         ifp->if_ibytes += m->m_pkthdr.len;
2017
2018         /*
2019          * Look up the bridge_iflist.
2020          */
2021         bif = bridge_lookup_member_if(sc, src_if);
2022         if (bif == NULL) {
2023                 /* Interface is not a bridge member (anymore?) */
2024                 m_freem(m);
2025                 return;
2026         }
2027
2028         if (bif->bif_flags & IFBIF_STP) {
2029                 switch (bif->bif_state) {
2030                 case BSTP_IFSTATE_BLOCKING:
2031                 case BSTP_IFSTATE_LISTENING:
2032                 case BSTP_IFSTATE_DISABLED:
2033                         m_freem(m);
2034                         return;
2035                 }
2036         }
2037
2038         eh = mtod(m, struct ether_header *);
2039
2040         /*
2041          * If the interface is learning, and the source
2042          * address is valid and not multicast, record
2043          * the address.
2044          */
2045         if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
2046             ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
2047             (eh->ether_shost[0] == 0 &&
2048              eh->ether_shost[1] == 0 &&
2049              eh->ether_shost[2] == 0 &&
2050              eh->ether_shost[3] == 0 &&
2051              eh->ether_shost[4] == 0 &&
2052              eh->ether_shost[5] == 0) == 0)
2053                 bridge_rtupdate(sc, eh->ether_shost, src_if, IFBAF_DYNAMIC);
2054
2055         if ((bif->bif_flags & IFBIF_STP) != 0 &&
2056             bif->bif_state == BSTP_IFSTATE_LEARNING) {
2057                 m_freem(m);
2058                 return;
2059         }
2060
2061         /*
2062          * At this point, the port either doesn't participate
2063          * in spanning tree or it is in the forwarding state.
2064          */
2065
2066         /*
2067          * If the packet is unicast, destined for someone on
2068          * "this" side of the bridge, drop it.
2069          */
2070         if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
2071                 dst_if = bridge_rtlookup(sc, eh->ether_dhost);
2072                 if (src_if == dst_if) {
2073                         m_freem(m);
2074                         return;
2075                 }
2076         } else {
2077                 /* ...forward it to all interfaces. */
2078                 ifp->if_imcasts++;
2079                 dst_if = NULL;
2080         }
2081
2082         if (dst_if == NULL) {
2083                 bridge_broadcast(sc, src_if, m);
2084                 return;
2085         }
2086
2087         /*
2088          * At this point, we're dealing with a unicast frame
2089          * going to a different interface.
2090          */
2091         if ((dst_if->if_flags & IFF_RUNNING) == 0) {
2092                 m_freem(m);
2093                 return;
2094         }
2095         bif = bridge_lookup_member_if(sc, dst_if);
2096         if (bif == NULL) {
2097                 /* Not a member of the bridge (anymore?) */
2098                 m_freem(m);
2099                 return;
2100         }
2101
2102         if (bif->bif_flags & IFBIF_STP) {
2103                 switch (bif->bif_state) {
2104                 case BSTP_IFSTATE_DISABLED:
2105                 case BSTP_IFSTATE_BLOCKING:
2106                         m_freem(m);
2107                         return;
2108                 }
2109         }
2110
2111         if (inet_pfil_hook.ph_hashooks > 0
2112 #ifdef INET6
2113             || inet6_pfil_hook.ph_hashooks > 0
2114 #endif
2115             ) {
2116                 if (bridge_pfil(&m, ifp, src_if, PFIL_IN) != 0)
2117                         return;
2118                 if (m == NULL)
2119                         return;
2120
2121                 if (bridge_pfil(&m, ifp, dst_if, PFIL_OUT) != 0)
2122                         return;
2123                 if (m == NULL)
2124                         return;
2125         }
2126         bridge_handoff(dst_if, m);
2127 }
2128
2129 /*
2130  * bridge_input:
2131  *
2132  *      Receive input from a member interface.  Queue the packet for
2133  *      bridging if it is not for us.
2134  */
2135 static struct mbuf *
2136 bridge_input(struct ifnet *ifp, struct mbuf *m)
2137 {
2138         struct bridge_softc *sc = ifp->if_bridge;
2139         struct bridge_iflist *bif;
2140         struct ifnet *bifp, *new_ifp;
2141         struct ether_header *eh;
2142         struct mbuf *mc, *mc2;
2143
2144         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
2145
2146         /*
2147          * Make sure that we are still a member of a bridge interface.
2148          */
2149         if (sc == NULL)
2150                 return m;
2151
2152         new_ifp = NULL;
2153         bifp = sc->sc_ifp;
2154
2155         if ((bifp->if_flags & IFF_RUNNING) == 0)
2156                 goto out;
2157
2158         /*
2159          * Implement support for bridge monitoring.  If this flag has been
2160          * set on this interface, discard the packet once we push it through
2161          * the bpf(4) machinery, but before we do, increment various counters
2162          * associated with this bridge.
2163          */
2164         if (bifp->if_flags & IFF_MONITOR) {
2165                 /* Change input interface to this bridge */
2166                 m->m_pkthdr.rcvif = bifp;
2167
2168                 BPF_MTAP(bifp, m);
2169
2170                 /* Update bridge's ifnet statistics */
2171                 bifp->if_ipackets++;
2172                 bifp->if_ibytes += m->m_pkthdr.len;
2173                 if (m->m_flags & (M_MCAST | M_BCAST))
2174                         bifp->if_imcasts++;
2175
2176                 m_freem(m);
2177                 m = NULL;
2178                 goto out;
2179         }
2180
2181         eh = mtod(m, struct ether_header *);
2182
2183         if (memcmp(eh->ether_dhost, IF_LLADDR(bifp), ETHER_ADDR_LEN) == 0) {
2184                 /*
2185                  * If the packet is for us, set the packets source as the
2186                  * bridge, and return the packet back to ifnet.if_input for
2187                  * local processing.
2188                  */
2189                 KASSERT(bifp->if_bridge == NULL,
2190                         ("loop created in bridge_input"));
2191                 new_ifp = bifp;
2192                 goto out;
2193         }
2194
2195         /*
2196          * Tap all packets arriving on the bridge, no matter if
2197          * they are local destinations or not.  In is in.
2198          */
2199         BPF_MTAP(bifp, m);
2200
2201         bif = bridge_lookup_member_if(sc, ifp);
2202         if (bif == NULL)
2203                 goto out;
2204
2205         if (sc->sc_span)
2206                 bridge_span(sc, m);
2207
2208         if (m->m_flags & (M_BCAST | M_MCAST)) {
2209                 /* Tap off 802.1D packets; they do not get forwarded. */
2210                 if (memcmp(eh->ether_dhost, bstp_etheraddr,
2211                     ETHER_ADDR_LEN) == 0) {
2212                         ifnet_serialize_all(bifp);
2213                         bstp_input(sc, bif, m);
2214                         ifnet_deserialize_all(bifp);
2215
2216                         /* m is freed by bstp_input */
2217                         m = NULL;
2218                         goto out;
2219                 }
2220
2221                 if (bif->bif_flags & IFBIF_STP) {
2222                         switch (bif->bif_state) {
2223                         case BSTP_IFSTATE_BLOCKING:
2224                         case BSTP_IFSTATE_LISTENING:
2225                         case BSTP_IFSTATE_DISABLED:
2226                                 goto out;
2227                         }
2228                 }
2229
2230                 /*
2231                  * Make a deep copy of the packet and enqueue the copy
2232                  * for bridge processing; return the original packet for
2233                  * local processing.
2234                  */
2235                 mc = m_dup(m, MB_DONTWAIT);
2236                 if (mc == NULL)
2237                         goto out;
2238
2239                 bridge_forward(sc, mc);
2240
2241                 /*
2242                  * Reinject the mbuf as arriving on the bridge so we have a
2243                  * chance at claiming multicast packets. We can not loop back
2244                  * here from ether_input as a bridge is never a member of a
2245                  * bridge.
2246                  */
2247                 KASSERT(bifp->if_bridge == NULL,
2248                         ("loop created in bridge_input"));
2249                 mc2 = m_dup(m, MB_DONTWAIT);
2250 #ifdef notyet
2251                 if (mc2 != NULL) {
2252                         /* Keep the layer3 header aligned */
2253                         int i = min(mc2->m_pkthdr.len, max_protohdr);
2254                         mc2 = m_copyup(mc2, i, ETHER_ALIGN);
2255                 }
2256 #endif
2257                 if (mc2 != NULL) {
2258                         /*
2259                          * Don't tap to bpf(4) again; we have
2260                          * already done the tapping.
2261                          */
2262                         ether_reinput_oncpu(bifp, mc2, 0);
2263                 }
2264
2265                 /* Return the original packet for local processing. */
2266                 goto out;
2267         }
2268
2269         if (bif->bif_flags & IFBIF_STP) {
2270                 switch (bif->bif_state) {
2271                 case BSTP_IFSTATE_BLOCKING:
2272                 case BSTP_IFSTATE_LISTENING:
2273                 case BSTP_IFSTATE_DISABLED:
2274                         goto out;
2275                 }
2276         }
2277
2278         /*
2279          * Unicast.  Make sure it's not for us.
2280          *
2281          * This loop is MPSAFE; the only blocking operation (bridge_rtupdate)
2282          * is followed by breaking out of the loop.
2283          */
2284         LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
2285                 if (bif->bif_ifp->if_type != IFT_ETHER)
2286                         continue;
2287
2288                 /* It is destined for us. */
2289                 if (memcmp(IF_LLADDR(bif->bif_ifp), eh->ether_dhost,
2290                     ETHER_ADDR_LEN) == 0) {
2291                         if (bif->bif_ifp != ifp) {
2292                                 /* XXX loop prevention */
2293                                 m->m_flags |= M_ETHER_BRIDGED;
2294                                 new_ifp = bif->bif_ifp;
2295                         }
2296                         if (bif->bif_flags & IFBIF_LEARNING) {
2297                                 bridge_rtupdate(sc, eh->ether_shost,
2298                                                 ifp, IFBAF_DYNAMIC);
2299                         }
2300                         goto out;
2301                 }
2302
2303                 /* We just received a packet that we sent out. */
2304                 if (memcmp(IF_LLADDR(bif->bif_ifp), eh->ether_shost,
2305                     ETHER_ADDR_LEN) == 0) {
2306                         m_freem(m);
2307                         m = NULL;
2308                         goto out;
2309                 }
2310         }
2311
2312         /* Perform the bridge forwarding function. */
2313         bridge_forward(sc, m);
2314         m = NULL;
2315 out:
2316         if (new_ifp != NULL) {
2317                 ether_reinput_oncpu(new_ifp, m, 1);
2318                 m = NULL;
2319         }
2320         return (m);
2321 }
2322
2323 /*
2324  * bridge_start_bcast:
2325  *
2326  *      Broadcast the packet sent from bridge to all member
2327  *      interfaces.
2328  *      This is a simplified version of bridge_broadcast(), however,
2329  *      this function expects caller to hold bridge's serializer.
2330  */
2331 static void
2332 bridge_start_bcast(struct bridge_softc *sc, struct mbuf *m)
2333 {
2334         struct bridge_iflist *bif;
2335         struct mbuf *mc;
2336         struct ifnet *dst_if, *bifp;
2337         int used = 0;
2338
2339         bifp = sc->sc_ifp;
2340         ASSERT_IFNET_SERIALIZED_ALL(bifp);
2341
2342         /*
2343          * Following loop is MPSAFE; nothing is blocking
2344          * in the loop body.
2345          */
2346         LIST_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
2347                 dst_if = bif->bif_ifp;
2348
2349                 if (bif->bif_flags & IFBIF_STP) {
2350                         switch (bif->bif_state) {
2351                         case BSTP_IFSTATE_BLOCKING:
2352                         case BSTP_IFSTATE_DISABLED:
2353                                 continue;
2354                         }
2355                 }
2356
2357                 if ((bif->bif_flags & IFBIF_DISCOVER) == 0 &&
2358                     (m->m_flags & (M_BCAST|M_MCAST)) == 0)
2359                         continue;
2360
2361                 if ((dst_if->if_flags & IFF_RUNNING) == 0)
2362                         continue;
2363
2364                 if (LIST_NEXT(bif, bif_next) == NULL) {
2365                         mc = m;
2366                         used = 1;
2367                 } else {
2368                         mc = m_copypacket(m, MB_DONTWAIT);
2369                         if (mc == NULL) {
2370                                 bifp->if_oerrors++;
2371                                 continue;
2372                         }
2373                 }
2374                 bridge_enqueue(dst_if, mc);
2375         }
2376         if (used == 0)
2377                 m_freem(m);
2378 }
2379
2380 /*
2381  * bridge_broadcast:
2382  *
2383  *      Send a frame to all interfaces that are members of
2384  *      the bridge, except for the one on which the packet
2385  *      arrived.
2386  */
2387 static void
2388 bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
2389     struct mbuf *m)
2390 {
2391         struct bridge_iflist *bif, *nbif;
2392         struct mbuf *mc;
2393         struct ifnet *dst_if, *bifp;
2394         int used = 0;
2395
2396         bifp = sc->sc_ifp;
2397         ASSERT_IFNET_NOT_SERIALIZED_ALL(bifp);
2398
2399         if (inet_pfil_hook.ph_hashooks > 0
2400 #ifdef INET6
2401             || inet6_pfil_hook.ph_hashooks > 0
2402 #endif
2403             ) {
2404                 if (bridge_pfil(&m, bifp, src_if, PFIL_IN) != 0)
2405                         return;
2406                 if (m == NULL)
2407                         return;
2408
2409                 /* Filter on the bridge interface before broadcasting */
2410                 if (bridge_pfil(&m, bifp, NULL, PFIL_OUT) != 0)
2411                         return;
2412                 if (m == NULL)
2413                         return;
2414         }
2415
2416         LIST_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid], bif_next, nbif) {
2417                 dst_if = bif->bif_ifp;
2418                 if (dst_if == src_if)
2419                         continue;
2420
2421                 if (bif->bif_flags & IFBIF_STP) {
2422                         switch (bif->bif_state) {
2423                         case BSTP_IFSTATE_BLOCKING:
2424                         case BSTP_IFSTATE_DISABLED:
2425                                 continue;
2426                         }
2427                 }
2428
2429                 if ((bif->bif_flags & IFBIF_DISCOVER) == 0 &&
2430                     (m->m_flags & (M_BCAST|M_MCAST)) == 0)
2431                         continue;
2432
2433                 if ((dst_if->if_flags & IFF_RUNNING) == 0)
2434                         continue;
2435
2436                 if (LIST_NEXT(bif, bif_next) == NULL) {
2437                         mc = m;
2438                         used = 1;
2439                 } else {
2440                         mc = m_copypacket(m, MB_DONTWAIT);
2441                         if (mc == NULL) {
2442                                 sc->sc_ifp->if_oerrors++;
2443                                 continue;
2444                         }
2445                 }
2446
2447                 /*
2448                  * Filter on the output interface.  Pass a NULL bridge
2449                  * interface pointer so we do not redundantly filter on
2450                  * the bridge for each interface we broadcast on.
2451                  */
2452                 if (inet_pfil_hook.ph_hashooks > 0
2453 #ifdef INET6
2454                     || inet6_pfil_hook.ph_hashooks > 0
2455 #endif
2456                     ) {
2457                         if (bridge_pfil(&mc, NULL, dst_if, PFIL_OUT) != 0)
2458                                 continue;
2459                         if (mc == NULL)
2460                                 continue;
2461                 }
2462                 bridge_handoff(dst_if, mc);
2463
2464                 if (nbif != NULL && !nbif->bif_onlist) {
2465                         KKASSERT(bif->bif_onlist);
2466                         nbif = LIST_NEXT(bif, bif_next);
2467                 }
2468         }
2469         if (used == 0)
2470                 m_freem(m);
2471 }
2472
2473 /*
2474  * bridge_span:
2475  *
2476  *      Duplicate a packet out one or more interfaces that are in span mode,
2477  *      the original mbuf is unmodified.
2478  */
2479 static void
2480 bridge_span(struct bridge_softc *sc, struct mbuf *m)
2481 {
2482         struct bridge_iflist *bif;
2483         struct ifnet *dst_if, *bifp;
2484         struct mbuf *mc;
2485
2486         bifp = sc->sc_ifp;
2487         ifnet_serialize_all(bifp);
2488
2489         LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) {
2490                 dst_if = bif->bif_ifp;
2491
2492                 if ((dst_if->if_flags & IFF_RUNNING) == 0)
2493                         continue;
2494
2495                 mc = m_copypacket(m, MB_DONTWAIT);
2496                 if (mc == NULL) {
2497                         sc->sc_ifp->if_oerrors++;
2498                         continue;
2499                 }
2500                 bridge_enqueue(dst_if, mc);
2501         }
2502
2503         ifnet_deserialize_all(bifp);
2504 }
2505
2506 static void
2507 bridge_rtmsg_sync_handler(struct netmsg *nmsg)
2508 {
2509         ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1);
2510 }
2511
2512 static void
2513 bridge_rtmsg_sync(struct bridge_softc *sc)
2514 {
2515         struct netmsg nmsg;
2516
2517         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
2518
2519         netmsg_init(&nmsg, &curthread->td_msgport, 0,
2520                     bridge_rtmsg_sync_handler);
2521         ifnet_domsg(&nmsg.nm_lmsg, 0);
2522 }
2523
2524 static __inline void
2525 bridge_rtinfo_update(struct bridge_rtinfo *bri, struct ifnet *dst_if,
2526                      int setflags, uint8_t flags, uint32_t timeo)
2527 {
2528         if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
2529             bri->bri_ifp != dst_if)
2530                 bri->bri_ifp = dst_if;
2531         if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
2532             bri->bri_expire != time_second + timeo)
2533                 bri->bri_expire = time_second + timeo;
2534         if (setflags)
2535                 bri->bri_flags = flags;
2536 }
2537
2538 static int
2539 bridge_rtinstall_oncpu(struct bridge_softc *sc, const uint8_t *dst,
2540                        struct ifnet *dst_if, int setflags, uint8_t flags,
2541                        struct bridge_rtinfo **bri0)
2542 {
2543         struct bridge_rtnode *brt;
2544         struct bridge_rtinfo *bri;
2545
2546         if (mycpuid == 0) {
2547                 brt = bridge_rtnode_lookup(sc, dst);
2548                 if (brt != NULL) {
2549                         /*
2550                          * rtnode for 'dst' already exists.  We inform the
2551                          * caller about this by leaving bri0 as NULL.  The
2552                          * caller will terminate the intallation upon getting
2553                          * NULL bri0.  However, we still need to update the
2554                          * rtinfo.
2555                          */
2556                         KKASSERT(*bri0 == NULL);
2557
2558                         /* Update rtinfo */
2559                         bridge_rtinfo_update(brt->brt_info, dst_if, setflags,
2560                                              flags, sc->sc_brttimeout);
2561                         return 0;
2562                 }
2563
2564                 /*
2565                  * We only need to check brtcnt on CPU0, since if limit
2566                  * is to be exceeded, ENOSPC is returned.  Caller knows
2567                  * this and will terminate the installation.
2568                  */
2569                 if (sc->sc_brtcnt >= sc->sc_brtmax)
2570                         return ENOSPC;
2571
2572                 KKASSERT(*bri0 == NULL);
2573                 bri = kmalloc(sizeof(struct bridge_rtinfo), M_DEVBUF,
2574                                   M_WAITOK | M_ZERO);
2575                 *bri0 = bri;
2576
2577                 /* Setup rtinfo */
2578                 bri->bri_flags = IFBAF_DYNAMIC;
2579                 bridge_rtinfo_update(bri, dst_if, setflags, flags,
2580                                      sc->sc_brttimeout);
2581         } else {
2582                 bri = *bri0;
2583                 KKASSERT(bri != NULL);
2584         }
2585
2586         brt = kmalloc(sizeof(struct bridge_rtnode), M_DEVBUF,
2587                       M_WAITOK | M_ZERO);
2588         memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
2589         brt->brt_info = bri;
2590
2591         bridge_rtnode_insert(sc, brt);
2592         return 0;
2593 }
2594
2595 static void
2596 bridge_rtinstall_handler(struct netmsg *nmsg)
2597 {
2598         struct netmsg_brsaddr *brmsg = (struct netmsg_brsaddr *)nmsg;
2599         int error;
2600
2601         error = bridge_rtinstall_oncpu(brmsg->br_softc,
2602                                        brmsg->br_dst, brmsg->br_dst_if,
2603                                        brmsg->br_setflags, brmsg->br_flags,
2604                                        &brmsg->br_rtinfo);
2605         if (error) {
2606                 KKASSERT(mycpuid == 0 && brmsg->br_rtinfo == NULL);
2607                 lwkt_replymsg(&nmsg->nm_lmsg, error);
2608                 return;
2609         } else if (brmsg->br_rtinfo == NULL) {
2610                 /* rtnode already exists for 'dst' */
2611                 KKASSERT(mycpuid == 0);
2612                 lwkt_replymsg(&nmsg->nm_lmsg, 0);
2613                 return;
2614         }
2615         ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1);
2616 }
2617
2618 /*
2619  * bridge_rtupdate:
2620  *
2621  *      Add/Update a bridge routing entry.
2622  */
2623 static int
2624 bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst,
2625                 struct ifnet *dst_if, uint8_t flags)
2626 {
2627         struct bridge_rtnode *brt;
2628
2629         /*
2630          * A route for this destination might already exist.  If so,
2631          * update it, otherwise create a new one.
2632          */
2633         if ((brt = bridge_rtnode_lookup(sc, dst)) == NULL) {
2634                 struct netmsg_brsaddr *brmsg;
2635
2636                 if (sc->sc_brtcnt >= sc->sc_brtmax)
2637                         return ENOSPC;
2638
2639                 brmsg = kmalloc(sizeof(*brmsg), M_LWKTMSG, M_WAITOK | M_NULLOK);
2640                 if (brmsg == NULL)
2641                         return ENOMEM;
2642
2643                 netmsg_init(&brmsg->br_nmsg, &netisr_afree_rport, 0,
2644                             bridge_rtinstall_handler);
2645                 memcpy(brmsg->br_dst, dst, ETHER_ADDR_LEN);
2646                 brmsg->br_dst_if = dst_if;
2647                 brmsg->br_flags = flags;
2648                 brmsg->br_setflags = 0;
2649                 brmsg->br_softc = sc;
2650                 brmsg->br_rtinfo = NULL;
2651
2652                 ifnet_sendmsg(&brmsg->br_nmsg.nm_lmsg, 0);
2653                 return 0;
2654         }
2655         bridge_rtinfo_update(brt->brt_info, dst_if, 0, flags,
2656                              sc->sc_brttimeout);
2657         return 0;
2658 }
2659
2660 static int
2661 bridge_rtsaddr(struct bridge_softc *sc, const uint8_t *dst,
2662                struct ifnet *dst_if, uint8_t flags)
2663 {
2664         struct netmsg_brsaddr brmsg;
2665
2666         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
2667
2668         netmsg_init(&brmsg.br_nmsg, &curthread->td_msgport, 0,
2669                     bridge_rtinstall_handler);
2670         memcpy(brmsg.br_dst, dst, ETHER_ADDR_LEN);
2671         brmsg.br_dst_if = dst_if;
2672         brmsg.br_flags = flags;
2673         brmsg.br_setflags = 1;
2674         brmsg.br_softc = sc;
2675         brmsg.br_rtinfo = NULL;
2676
2677         return ifnet_domsg(&brmsg.br_nmsg.nm_lmsg, 0);
2678 }
2679
2680 /*
2681  * bridge_rtlookup:
2682  *
2683  *      Lookup the destination interface for an address.
2684  */
2685 static struct ifnet *
2686 bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr)
2687 {
2688         struct bridge_rtnode *brt;
2689
2690         if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL)
2691                 return NULL;
2692         return brt->brt_info->bri_ifp;
2693 }
2694
2695 static void
2696 bridge_rtreap_handler(struct netmsg *nmsg)
2697 {
2698         struct bridge_softc *sc = nmsg->nm_lmsg.u.ms_resultp;
2699         struct bridge_rtnode *brt, *nbrt;
2700
2701         LIST_FOREACH_MUTABLE(brt, &sc->sc_rtlists[mycpuid], brt_list, nbrt) {
2702                 if (brt->brt_info->bri_dead)
2703                         bridge_rtnode_destroy(sc, brt);
2704         }
2705         ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1);
2706 }
2707
2708 static void
2709 bridge_rtreap(struct bridge_softc *sc)
2710 {
2711         struct netmsg nmsg;
2712
2713         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
2714
2715         netmsg_init(&nmsg, &curthread->td_msgport, 0, bridge_rtreap_handler);
2716         nmsg.nm_lmsg.u.ms_resultp = sc;
2717
2718         ifnet_domsg(&nmsg.nm_lmsg, 0);
2719 }
2720
2721 static void
2722 bridge_rtreap_async(struct bridge_softc *sc)
2723 {
2724         struct netmsg *nmsg;
2725
2726         nmsg = kmalloc(sizeof(*nmsg), M_LWKTMSG, M_WAITOK);
2727
2728         netmsg_init(nmsg, &netisr_afree_rport, 0, bridge_rtreap_handler);
2729         nmsg->nm_lmsg.u.ms_resultp = sc;
2730
2731         ifnet_sendmsg(&nmsg->nm_lmsg, 0);
2732 }
2733
2734 /*
2735  * bridge_rttrim:
2736  *
2737  *      Trim the routine table so that we have a number
2738  *      of routing entries less than or equal to the
2739  *      maximum number.
2740  */
2741 static void
2742 bridge_rttrim(struct bridge_softc *sc)
2743 {
2744         struct bridge_rtnode *brt;
2745         int dead;
2746
2747         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
2748
2749         /* Make sure we actually need to do this. */
2750         if (sc->sc_brtcnt <= sc->sc_brtmax)
2751                 return;
2752
2753         /*
2754          * Find out how many rtnodes are dead
2755          */
2756         dead = bridge_rtage_finddead(sc);
2757         KKASSERT(dead <= sc->sc_brtcnt);
2758
2759         if (sc->sc_brtcnt - dead <= sc->sc_brtmax) {
2760                 /* Enough dead rtnodes are found */
2761                 bridge_rtreap(sc);
2762                 return;
2763         }
2764
2765         /*
2766          * Kill some dynamic rtnodes to meet the brtmax
2767          */
2768         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
2769                 struct bridge_rtinfo *bri = brt->brt_info;
2770
2771                 if (bri->bri_dead) {
2772                         /*
2773                          * We have counted this rtnode in
2774                          * bridge_rtage_finddead()
2775                          */
2776                         continue;
2777                 }
2778
2779                 if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
2780                         bri->bri_dead = 1;
2781                         ++dead;
2782                         KKASSERT(dead <= sc->sc_brtcnt);
2783
2784                         if (sc->sc_brtcnt - dead <= sc->sc_brtmax) {
2785                                 /* Enough rtnodes are collected */
2786                                 break;
2787                         }
2788                 }
2789         }
2790         if (dead)
2791                 bridge_rtreap(sc);
2792 }
2793
2794 /*
2795  * bridge_timer:
2796  *
2797  *      Aging timer for the bridge.
2798  */
2799 static void
2800 bridge_timer(void *arg)
2801 {
2802         struct bridge_softc *sc = arg;
2803         struct lwkt_msg *lmsg;
2804
2805         KKASSERT(mycpuid == BRIDGE_CFGCPU);
2806
2807         crit_enter();
2808
2809         if (callout_pending(&sc->sc_brcallout) ||
2810             !callout_active(&sc->sc_brcallout)) {
2811                 crit_exit();
2812                 return;
2813         }
2814         callout_deactivate(&sc->sc_brcallout);
2815
2816         lmsg = &sc->sc_brtimemsg.nm_lmsg;
2817         KKASSERT(lmsg->ms_flags & MSGF_DONE);
2818         lwkt_sendmsg(BRIDGE_CFGPORT, lmsg);
2819
2820         crit_exit();
2821 }
2822
2823 static void
2824 bridge_timer_handler(struct netmsg *nmsg)
2825 {
2826         struct bridge_softc *sc = nmsg->nm_lmsg.u.ms_resultp;
2827
2828         KKASSERT(&curthread->td_msgport == BRIDGE_CFGPORT);
2829
2830         crit_enter();
2831         /* Reply ASAP */
2832         lwkt_replymsg(&nmsg->nm_lmsg, 0);
2833         crit_exit();
2834
2835         bridge_rtage(sc);
2836         if (sc->sc_ifp->if_flags & IFF_RUNNING) {
2837                 callout_reset(&sc->sc_brcallout,
2838                     bridge_rtable_prune_period * hz, bridge_timer, sc);
2839         }
2840 }
2841
2842 static int
2843 bridge_rtage_finddead(struct bridge_softc *sc)
2844 {
2845         struct bridge_rtnode *brt;
2846         int dead = 0;
2847
2848         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
2849                 struct bridge_rtinfo *bri = brt->brt_info;
2850
2851                 if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
2852                     time_second >= bri->bri_expire) {
2853                         bri->bri_dead = 1;
2854                         ++dead;
2855                         KKASSERT(dead <= sc->sc_brtcnt);
2856                 }
2857         }
2858         return dead;
2859 }
2860
2861 /*
2862  * bridge_rtage:
2863  *
2864  *      Perform an aging cycle.
2865  */
2866 static void
2867 bridge_rtage(struct bridge_softc *sc)
2868 {
2869         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
2870
2871         if (bridge_rtage_finddead(sc))
2872                 bridge_rtreap(sc);
2873 }
2874
2875 /*
2876  * bridge_rtflush:
2877  *
2878  *      Remove all dynamic addresses from the bridge.
2879  */
2880 static void
2881 bridge_rtflush(struct bridge_softc *sc, int bf)
2882 {
2883         struct bridge_rtnode *brt;
2884         int reap;
2885
2886         reap = 0;
2887         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
2888                 struct bridge_rtinfo *bri = brt->brt_info;
2889
2890                 if ((bf & IFBF_FLUSHALL) ||
2891                     (bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
2892                         bri->bri_dead = 1;
2893                         reap = 1;
2894                 }
2895         }
2896         if (reap) {
2897                 if (bf & IFBF_FLUSHSYNC)
2898                         bridge_rtreap(sc);
2899                 else
2900                         bridge_rtreap_async(sc);
2901         }
2902 }
2903
2904 /*
2905  * bridge_rtdaddr:
2906  *
2907  *      Remove an address from the table.
2908  */
2909 static int
2910 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr)
2911 {
2912         struct bridge_rtnode *brt;
2913
2914         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
2915
2916         if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL)
2917                 return (ENOENT);
2918
2919         /* TODO: add a cheaper delete operation */
2920         brt->brt_info->bri_dead = 1;
2921         bridge_rtreap(sc);
2922         return (0);
2923 }
2924
2925 /*
2926  * bridge_rtdelete:
2927  *
2928  *      Delete routes to a speicifc member interface.
2929  */
2930 void
2931 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int bf)
2932 {
2933         struct bridge_rtnode *brt;
2934         int reap;
2935
2936         reap = 0;
2937         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
2938                 struct bridge_rtinfo *bri = brt->brt_info;
2939
2940                 if (bri->bri_ifp == ifp &&
2941                     ((bf & IFBF_FLUSHALL) ||
2942                      (bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
2943                         bri->bri_dead = 1;
2944                         reap = 1;
2945                 }
2946         }
2947         if (reap) {
2948                 if (bf & IFBF_FLUSHSYNC)
2949                         bridge_rtreap(sc);
2950                 else
2951                         bridge_rtreap_async(sc);
2952         }
2953 }
2954
2955 /*
2956  * bridge_rtable_init:
2957  *
2958  *      Initialize the route table for this bridge.
2959  */
2960 static void
2961 bridge_rtable_init(struct bridge_softc *sc)
2962 {
2963         int cpu;
2964
2965         /*
2966          * Initialize per-cpu hash tables
2967          */
2968         sc->sc_rthashs = kmalloc(sizeof(*sc->sc_rthashs) * ncpus,
2969                                  M_DEVBUF, M_WAITOK);
2970         for (cpu = 0; cpu < ncpus; ++cpu) {
2971                 int i;
2972
2973                 sc->sc_rthashs[cpu] =
2974                 kmalloc(sizeof(struct bridge_rtnode_head) * BRIDGE_RTHASH_SIZE,
2975                         M_DEVBUF, M_WAITOK);
2976
2977                 for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
2978                         LIST_INIT(&sc->sc_rthashs[cpu][i]);
2979         }
2980         sc->sc_rthash_key = karc4random();
2981
2982         /*
2983          * Initialize per-cpu lists
2984          */
2985         sc->sc_rtlists = kmalloc(sizeof(struct bridge_rtnode_head) * ncpus,
2986                                  M_DEVBUF, M_WAITOK);
2987         for (cpu = 0; cpu < ncpus; ++cpu)
2988                 LIST_INIT(&sc->sc_rtlists[cpu]);
2989 }
2990
2991 /*
2992  * bridge_rtable_fini:
2993  *
2994  *      Deconstruct the route table for this bridge.
2995  */
2996 static void
2997 bridge_rtable_fini(struct bridge_softc *sc)
2998 {
2999         int cpu;
3000
3001         /*
3002          * Free per-cpu hash tables
3003          */
3004         for (cpu = 0; cpu < ncpus; ++cpu)
3005                 kfree(sc->sc_rthashs[cpu], M_DEVBUF);
3006         kfree(sc->sc_rthashs, M_DEVBUF);
3007
3008         /*
3009          * Free per-cpu lists
3010          */
3011         kfree(sc->sc_rtlists, M_DEVBUF);
3012 }
3013
3014 /*
3015  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
3016  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
3017  */
3018 #define mix(a, b, c)                                                    \
3019 do {                                                                    \
3020         a -= b; a -= c; a ^= (c >> 13);                                 \
3021         b -= c; b -= a; b ^= (a << 8);                                  \
3022         c -= a; c -= b; c ^= (b >> 13);                                 \
3023         a -= b; a -= c; a ^= (c >> 12);                                 \
3024         b -= c; b -= a; b ^= (a << 16);                                 \
3025         c -= a; c -= b; c ^= (b >> 5);                                  \
3026         a -= b; a -= c; a ^= (c >> 3);                                  \
3027         b -= c; b -= a; b ^= (a << 10);                                 \
3028         c -= a; c -= b; c ^= (b >> 15);                                 \
3029 } while (/*CONSTCOND*/0)
3030
3031 static __inline uint32_t
3032 bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
3033 {
3034         uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
3035
3036         b += addr[5] << 8;
3037         b += addr[4];
3038         a += addr[3] << 24;
3039         a += addr[2] << 16;
3040         a += addr[1] << 8;
3041         a += addr[0];
3042
3043         mix(a, b, c);
3044
3045         return (c & BRIDGE_RTHASH_MASK);
3046 }
3047
3048 #undef mix
3049
3050 static int
3051 bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b)
3052 {
3053         int i, d;
3054
3055         for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
3056                 d = ((int)a[i]) - ((int)b[i]);
3057         }
3058
3059         return (d);
3060 }
3061
3062 /*
3063  * bridge_rtnode_lookup:
3064  *
3065  *      Look up a bridge route node for the specified destination.
3066  */
3067 static struct bridge_rtnode *
3068 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr)
3069 {
3070         struct bridge_rtnode *brt;
3071         uint32_t hash;
3072         int dir;
3073
3074         hash = bridge_rthash(sc, addr);
3075         LIST_FOREACH(brt, &sc->sc_rthashs[mycpuid][hash], brt_hash) {
3076                 dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
3077                 if (dir == 0)
3078                         return (brt);
3079                 if (dir > 0)
3080                         return (NULL);
3081         }
3082
3083         return (NULL);
3084 }
3085
3086 /*
3087  * bridge_rtnode_insert:
3088  *
3089  *      Insert the specified bridge node into the route table.
3090  *      Caller has to make sure that rtnode does not exist.
3091  */
3092 static void
3093 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
3094 {
3095         struct bridge_rtnode *lbrt;
3096         uint32_t hash;
3097         int dir;
3098
3099         hash = bridge_rthash(sc, brt->brt_addr);
3100
3101         lbrt = LIST_FIRST(&sc->sc_rthashs[mycpuid][hash]);
3102         if (lbrt == NULL) {
3103                 LIST_INSERT_HEAD(&sc->sc_rthashs[mycpuid][hash], brt, brt_hash);
3104                 goto out;
3105         }
3106
3107         do {
3108                 dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
3109                 KASSERT(dir != 0, ("rtnode already exist\n"));
3110
3111                 if (dir > 0) {
3112                         LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
3113                         goto out;
3114                 }
3115                 if (LIST_NEXT(lbrt, brt_hash) == NULL) {
3116                         LIST_INSERT_AFTER(lbrt, brt, brt_hash);
3117                         goto out;
3118                 }
3119                 lbrt = LIST_NEXT(lbrt, brt_hash);
3120         } while (lbrt != NULL);
3121
3122         panic("no suitable position found for rtnode\n");
3123 out:
3124         LIST_INSERT_HEAD(&sc->sc_rtlists[mycpuid], brt, brt_list);
3125         if (mycpuid == 0) {
3126                 /*
3127                  * Update the brtcnt.
3128                  * We only need to do it once and we do it on CPU0.
3129                  */
3130                 sc->sc_brtcnt++;
3131         }
3132 }
3133
3134 /*
3135  * bridge_rtnode_destroy:
3136  *
3137  *      Destroy a bridge rtnode.
3138  */
3139 static void
3140 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
3141 {
3142         LIST_REMOVE(brt, brt_hash);
3143         LIST_REMOVE(brt, brt_list);
3144
3145         if (mycpuid + 1 == ncpus) {
3146                 /* Free rtinfo associated with rtnode on the last cpu */
3147                 kfree(brt->brt_info, M_DEVBUF);
3148         }
3149         kfree(brt, M_DEVBUF);
3150
3151         if (mycpuid == 0) {
3152                 /* Update brtcnt only on CPU0 */
3153                 sc->sc_brtcnt--;
3154         }
3155 }
3156
3157 static __inline int
3158 bridge_post_pfil(struct mbuf *m)
3159 {
3160         if (m->m_pkthdr.fw_flags & IPFORWARD_MBUF_TAGGED)
3161                 return EOPNOTSUPP;
3162
3163         /* Not yet */
3164         if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED)
3165                 return EOPNOTSUPP;
3166
3167         return 0;
3168 }
3169
3170 /*
3171  * Send bridge packets through pfil if they are one of the types pfil can deal
3172  * with, or if they are ARP or REVARP.  (pfil will pass ARP and REVARP without
3173  * question.) If *bifp or *ifp are NULL then packet filtering is skipped for
3174  * that interface.
3175  */
3176 static int
3177 bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
3178 {
3179         int snap, error, i, hlen;
3180         struct ether_header *eh1, eh2;
3181         struct ip *ip;
3182         struct llc llc1;
3183         u_int16_t ether_type;
3184
3185         snap = 0;
3186         error = -1;     /* Default error if not error == 0 */
3187
3188         if (pfil_bridge == 0 && pfil_member == 0)
3189                 return (0); /* filtering is disabled */
3190
3191         i = min((*mp)->m_pkthdr.len, max_protohdr);
3192         if ((*mp)->m_len < i) {
3193                 *mp = m_pullup(*mp, i);
3194                 if (*mp == NULL) {
3195                         kprintf("%s: m_pullup failed\n", __func__);
3196                         return (-1);
3197                 }
3198         }
3199
3200         eh1 = mtod(*mp, struct ether_header *);
3201         ether_type = ntohs(eh1->ether_type);
3202
3203         /*
3204          * Check for SNAP/LLC.
3205          */
3206         if (ether_type < ETHERMTU) {
3207                 struct llc *llc2 = (struct llc *)(eh1 + 1);
3208
3209                 if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
3210                     llc2->llc_dsap == LLC_SNAP_LSAP &&
3211                     llc2->llc_ssap == LLC_SNAP_LSAP &&
3212                     llc2->llc_control == LLC_UI) {
3213                         ether_type = htons(llc2->llc_un.type_snap.ether_type);
3214                         snap = 1;
3215                 }
3216         }
3217
3218         /*
3219          * If we're trying to filter bridge traffic, don't look at anything
3220          * other than IP and ARP traffic.  If the filter doesn't understand
3221          * IPv6, don't allow IPv6 through the bridge either.  This is lame
3222          * since if we really wanted, say, an AppleTalk filter, we are hosed,
3223          * but of course we don't have an AppleTalk filter to begin with.
3224          * (Note that since pfil doesn't understand ARP it will pass *ALL*
3225          * ARP traffic.)
3226          */
3227         switch (ether_type) {
3228         case ETHERTYPE_ARP:
3229         case ETHERTYPE_REVARP:
3230                 return (0); /* Automatically pass */
3231
3232         case ETHERTYPE_IP:
3233 #ifdef INET6
3234         case ETHERTYPE_IPV6:
3235 #endif /* INET6 */
3236                 break;
3237
3238         default:
3239                 /*
3240                  * Check to see if the user wants to pass non-ip
3241                  * packets, these will not be checked by pfil(9)
3242                  * and passed unconditionally so the default is to drop.
3243                  */
3244                 if (pfil_onlyip)
3245                         goto bad;
3246         }
3247
3248         /* Strip off the Ethernet header and keep a copy. */
3249         m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t) &eh2);
3250         m_adj(*mp, ETHER_HDR_LEN);
3251
3252         /* Strip off snap header, if present */
3253         if (snap) {
3254                 m_copydata(*mp, 0, sizeof(struct llc), (caddr_t) &llc1);
3255                 m_adj(*mp, sizeof(struct llc));
3256         }
3257
3258         /*
3259          * Check the IP header for alignment and errors
3260          */
3261         if (dir == PFIL_IN) {
3262                 switch (ether_type) {
3263                 case ETHERTYPE_IP:
3264                         error = bridge_ip_checkbasic(mp);
3265                         break;
3266 #ifdef INET6
3267                 case ETHERTYPE_IPV6:
3268                         error = bridge_ip6_checkbasic(mp);
3269                         break;
3270 #endif /* INET6 */
3271                 default:
3272                         error = 0;
3273                 }
3274                 if (error)
3275                         goto bad;
3276         }
3277
3278         error = 0;
3279
3280         /*
3281          * Run the packet through pfil
3282          */
3283         switch (ether_type) {
3284         case ETHERTYPE_IP:
3285                 /*
3286                  * before calling the firewall, swap fields the same as
3287                  * IP does. here we assume the header is contiguous
3288                  */
3289                 ip = mtod(*mp, struct ip *);
3290
3291                 ip->ip_len = ntohs(ip->ip_len);
3292                 ip->ip_off = ntohs(ip->ip_off);
3293
3294                 /*
3295                  * Run pfil on the member interface and the bridge, both can
3296                  * be skipped by clearing pfil_member or pfil_bridge.
3297                  *
3298                  * Keep the order:
3299                  *   in_if -> bridge_if -> out_if
3300                  */
3301                 if (pfil_bridge && dir == PFIL_OUT && bifp != NULL) {
3302                         error = pfil_run_hooks(&inet_pfil_hook, mp, bifp, dir);
3303                         if (*mp == NULL || error != 0) /* filter may consume */
3304                                 break;
3305                         error = bridge_post_pfil(*mp);
3306                         if (error)
3307                                 break;
3308                 }
3309
3310                 if (pfil_member && ifp != NULL) {
3311                         error = pfil_run_hooks(&inet_pfil_hook, mp, ifp, dir);
3312                         if (*mp == NULL || error != 0) /* filter may consume */
3313                                 break;
3314                         error = bridge_post_pfil(*mp);
3315                         if (error)
3316                                 break;
3317                 }
3318
3319                 if (pfil_bridge && dir == PFIL_IN && bifp != NULL) {
3320                         error = pfil_run_hooks(&inet_pfil_hook, mp, bifp, dir);
3321                         if (*mp == NULL || error != 0) /* filter may consume */
3322                                 break;
3323                         error = bridge_post_pfil(*mp);
3324                         if (error)
3325                                 break;
3326                 }
3327
3328                 /* check if we need to fragment the packet */
3329                 if (pfil_member && ifp != NULL && dir == PFIL_OUT) {
3330                         i = (*mp)->m_pkthdr.len;
3331                         if (i > ifp->if_mtu) {
3332                                 error = bridge_fragment(ifp, *mp, &eh2, snap,
3333                                             &llc1);
3334                                 return (error);
3335                         }
3336                 }
3337
3338                 /* Recalculate the ip checksum and restore byte ordering */
3339                 ip = mtod(*mp, struct ip *);
3340                 hlen = ip->ip_hl << 2;
3341                 if (hlen < sizeof(struct ip))
3342                         goto bad;
3343                 if (hlen > (*mp)->m_len) {
3344                         if ((*mp = m_pullup(*mp, hlen)) == 0)
3345                                 goto bad;
3346                         ip = mtod(*mp, struct ip *);
3347                         if (ip == NULL)
3348                                 goto bad;
3349                 }
3350                 ip->ip_len = htons(ip->ip_len);
3351                 ip->ip_off = htons(ip->ip_off);
3352                 ip->ip_sum = 0;
3353                 if (hlen == sizeof(struct ip))
3354                         ip->ip_sum = in_cksum_hdr(ip);
3355                 else
3356                         ip->ip_sum = in_cksum(*mp, hlen);
3357
3358                 break;
3359 #ifdef INET6
3360         case ETHERTYPE_IPV6:
3361                 if (pfil_bridge && dir == PFIL_OUT && bifp != NULL)
3362                         error = pfil_run_hooks(&inet6_pfil_hook, mp, bifp,
3363                                         dir);
3364
3365                 if (*mp == NULL || error != 0) /* filter may consume */
3366                         break;
3367
3368                 if (pfil_member && ifp != NULL)
3369                         error = pfil_run_hooks(&inet6_pfil_hook, mp, ifp,
3370                                         dir);
3371
3372                 if (*mp == NULL || error != 0) /* filter may consume */
3373                         break;
3374
3375                 if (pfil_bridge && dir == PFIL_IN && bifp != NULL)
3376                         error = pfil_run_hooks(&inet6_pfil_hook, mp, bifp,
3377                                         dir);
3378                 break;
3379 #endif
3380         default:
3381                 error = 0;
3382                 break;
3383         }
3384
3385         if (*mp == NULL)
3386                 return (error);
3387         if (error != 0)
3388                 goto bad;
3389
3390         error = -1;
3391
3392         /*
3393          * Finally, put everything back the way it was and return
3394          */
3395         if (snap) {
3396                 M_PREPEND(*mp, sizeof(struct llc), MB_DONTWAIT);
3397                 if (*mp == NULL)
3398                         return (error);
3399                 bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
3400         }
3401
3402         M_PREPEND(*mp, ETHER_HDR_LEN, MB_DONTWAIT);
3403         if (*mp == NULL)
3404                 return (error);
3405         bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
3406
3407         return (0);
3408
3409 bad:
3410         m_freem(*mp);
3411         *mp = NULL;
3412         return (error);
3413 }
3414
3415 /*
3416  * Perform basic checks on header size since
3417  * pfil assumes ip_input has already processed
3418  * it for it.  Cut-and-pasted from ip_input.c.
3419  * Given how simple the IPv6 version is,
3420  * does the IPv4 version really need to be
3421  * this complicated?
3422  *
3423  * XXX Should we update ipstat here, or not?
3424  * XXX Right now we update ipstat but not
3425  * XXX csum_counter.
3426  */
3427 static int
3428 bridge_ip_checkbasic(struct mbuf **mp)
3429 {
3430         struct mbuf *m = *mp;
3431         struct ip *ip;
3432         int len, hlen;
3433         u_short sum;
3434
3435         if (*mp == NULL)
3436                 return (-1);
3437 #if notyet
3438         if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
3439                 if ((m = m_copyup(m, sizeof(struct ip),
3440                         (max_linkhdr + 3) & ~3)) == NULL) {
3441                         /* XXXJRT new stat, please */
3442                         ipstat.ips_toosmall++;
3443                         goto bad;
3444                 }
3445         } else
3446 #endif
3447 #ifndef __predict_false
3448 #define __predict_false(x) x
3449 #endif
3450          if (__predict_false(m->m_len < sizeof (struct ip))) {
3451                 if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
3452                         ipstat.ips_toosmall++;
3453                         goto bad;
3454                 }
3455         }
3456         ip = mtod(m, struct ip *);
3457         if (ip == NULL) goto bad;
3458
3459         if (ip->ip_v != IPVERSION) {
3460                 ipstat.ips_badvers++;
3461                 goto bad;
3462         }
3463         hlen = ip->ip_hl << 2;
3464         if (hlen < sizeof(struct ip)) { /* minimum header length */
3465                 ipstat.ips_badhlen++;
3466                 goto bad;
3467         }
3468         if (hlen > m->m_len) {
3469                 if ((m = m_pullup(m, hlen)) == 0) {
3470                         ipstat.ips_badhlen++;
3471                         goto bad;
3472                 }
3473                 ip = mtod(m, struct ip *);
3474                 if (ip == NULL) goto bad;
3475         }
3476
3477         if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
3478                 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
3479         } else {
3480                 if (hlen == sizeof(struct ip)) {
3481                         sum = in_cksum_hdr(ip);
3482                 } else {
3483                         sum = in_cksum(m, hlen);
3484                 }
3485         }
3486         if (sum) {
3487                 ipstat.ips_badsum++;
3488                 goto bad;
3489         }
3490
3491         /* Retrieve the packet length. */
3492         len = ntohs(ip->ip_len);
3493
3494         /*
3495          * Check for additional length bogosity
3496          */
3497         if (len < hlen) {
3498                 ipstat.ips_badlen++;
3499                 goto bad;
3500         }
3501
3502         /*
3503          * Check that the amount of data in the buffers
3504          * is as at least much as the IP header would have us expect.
3505          * Drop packet if shorter than we expect.
3506          */
3507         if (m->m_pkthdr.len < len) {
3508                 ipstat.ips_tooshort++;
3509                 goto bad;
3510         }
3511
3512         /* Checks out, proceed */
3513         *mp = m;
3514         return (0);
3515
3516 bad:
3517         *mp = m;
3518         return (-1);
3519 }
3520
3521 #ifdef INET6
3522 /*
3523  * Same as above, but for IPv6.
3524  * Cut-and-pasted from ip6_input.c.
3525  * XXX Should we update ip6stat, or not?
3526  */
3527 static int
3528 bridge_ip6_checkbasic(struct mbuf **mp)
3529 {
3530         struct mbuf *m = *mp;
3531         struct ip6_hdr *ip6;
3532
3533         /*
3534          * If the IPv6 header is not aligned, slurp it up into a new
3535          * mbuf with space for link headers, in the event we forward
3536          * it.  Otherwise, if it is aligned, make sure the entire base
3537          * IPv6 header is in the first mbuf of the chain.
3538          */
3539 #if notyet
3540         if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
3541                 struct ifnet *inifp = m->m_pkthdr.rcvif;
3542                 if ((m = m_copyup(m, sizeof(struct ip6_hdr),
3543                             (max_linkhdr + 3) & ~3)) == NULL) {
3544                         /* XXXJRT new stat, please */
3545                         ip6stat.ip6s_toosmall++;
3546                         in6_ifstat_inc(inifp, ifs6_in_hdrerr);
3547                         goto bad;
3548                 }
3549         } else
3550 #endif
3551         if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
3552                 struct ifnet *inifp = m->m_pkthdr.rcvif;
3553                 if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
3554                         ip6stat.ip6s_toosmall++;
3555                         in6_ifstat_inc(inifp, ifs6_in_hdrerr);
3556                         goto bad;
3557                 }
3558         }
3559
3560         ip6 = mtod(m, struct ip6_hdr *);
3561
3562         if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
3563                 ip6stat.ip6s_badvers++;
3564                 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
3565                 goto bad;
3566         }
3567
3568         /* Checks out, proceed */
3569         *mp = m;
3570         return (0);
3571
3572 bad:
3573         *mp = m;
3574         return (-1);
3575 }
3576 #endif /* INET6 */
3577
3578 /*
3579  * bridge_fragment:
3580  *
3581  *      Return a fragmented mbuf chain.
3582  */
3583 static int
3584 bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
3585     int snap, struct llc *llc)
3586 {
3587         struct mbuf *m0;
3588         struct ip *ip;
3589         int error = -1;
3590
3591         if (m->m_len < sizeof(struct ip) &&
3592             (m = m_pullup(m, sizeof(struct ip))) == NULL)
3593                 goto out;
3594         ip = mtod(m, struct ip *);
3595
3596         error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist,
3597                     CSUM_DELAY_IP);
3598         if (error)
3599                 goto out;
3600
3601         /* walk the chain and re-add the Ethernet header */
3602         for (m0 = m; m0; m0 = m0->m_nextpkt) {
3603                 if (error == 0) {
3604                         if (snap) {
3605                                 M_PREPEND(m0, sizeof(struct llc), MB_DONTWAIT);
3606                                 if (m0 == NULL) {
3607                                         error = ENOBUFS;
3608                                         continue;
3609                                 }
3610                                 bcopy(llc, mtod(m0, caddr_t),
3611                                     sizeof(struct llc));
3612                         }
3613                         M_PREPEND(m0, ETHER_HDR_LEN, MB_DONTWAIT);
3614                         if (m0 == NULL) {
3615                                 error = ENOBUFS;
3616                                 continue;
3617                         }
3618                         bcopy(eh, mtod(m0, caddr_t), ETHER_HDR_LEN);
3619                 } else 
3620                         m_freem(m);
3621         }
3622
3623         if (error == 0)
3624                 ipstat.ips_fragmented++;
3625
3626         return (error);
3627
3628 out:
3629         if (m != NULL)
3630                 m_freem(m);
3631         return (error);
3632 }
3633
3634 static void
3635 bridge_enqueue_handler(struct netmsg *nmsg)
3636 {
3637         struct netmsg_packet *nmp;
3638         struct ifnet *dst_ifp;
3639         struct mbuf *m;
3640
3641         nmp = (struct netmsg_packet *)nmsg;
3642         m = nmp->nm_packet;
3643         dst_ifp = nmp->nm_netmsg.nm_lmsg.u.ms_resultp;
3644
3645         bridge_handoff(dst_ifp, m);
3646 }
3647
3648 static void
3649 bridge_handoff(struct ifnet *dst_ifp, struct mbuf *m)
3650 {
3651         struct mbuf *m0;
3652
3653         /* We may be sending a fragment so traverse the mbuf */
3654         for (; m; m = m0) {
3655                 struct altq_pktattr pktattr;
3656
3657                 m0 = m->m_nextpkt;
3658                 m->m_nextpkt = NULL;
3659
3660                 if (ifq_is_enabled(&dst_ifp->if_snd))
3661                         altq_etherclassify(&dst_ifp->if_snd, m, &pktattr);
3662
3663                 ifq_dispatch(dst_ifp, m, &pktattr);
3664         }
3665 }
3666
3667 static void
3668 bridge_control_dispatch(struct netmsg *nmsg)
3669 {
3670         struct netmsg_brctl *bc_msg = (struct netmsg_brctl *)nmsg;
3671         struct ifnet *bifp = bc_msg->bc_sc->sc_ifp;
3672         int error;
3673
3674         ifnet_serialize_all(bifp);
3675         error = bc_msg->bc_func(bc_msg->bc_sc, bc_msg->bc_arg);
3676         ifnet_deserialize_all(bifp);
3677
3678         lwkt_replymsg(&nmsg->nm_lmsg, error);
3679 }
3680
3681 static int
3682 bridge_control(struct bridge_softc *sc, u_long cmd,
3683                bridge_ctl_t bc_func, void *bc_arg)
3684 {
3685         struct ifnet *bifp = sc->sc_ifp;
3686         struct netmsg_brctl bc_msg;
3687         struct netmsg *nmsg;
3688         int error;
3689
3690         ASSERT_IFNET_SERIALIZED_ALL(bifp);
3691
3692         bzero(&bc_msg, sizeof(bc_msg));
3693         nmsg = &bc_msg.bc_nmsg;
3694
3695         netmsg_init(nmsg, &curthread->td_msgport, 0, bridge_control_dispatch);
3696         bc_msg.bc_func = bc_func;
3697         bc_msg.bc_sc = sc;
3698         bc_msg.bc_arg = bc_arg;
3699
3700         ifnet_deserialize_all(bifp);
3701         error = lwkt_domsg(BRIDGE_CFGPORT, &nmsg->nm_lmsg, 0);
3702         ifnet_serialize_all(bifp);
3703         return error;
3704 }
3705
3706 static void
3707 bridge_add_bif_handler(struct netmsg *nmsg)
3708 {
3709         struct netmsg_braddbif *amsg = (struct netmsg_braddbif *)nmsg;
3710         struct bridge_softc *sc;
3711         struct bridge_iflist *bif;
3712
3713         sc = amsg->br_softc;
3714
3715         bif = kmalloc(sizeof(*bif), M_DEVBUF, M_WAITOK | M_ZERO);
3716         bif->bif_ifp = amsg->br_bif_ifp;
3717         bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
3718         bif->bif_onlist = 1;
3719         bif->bif_info = amsg->br_bif_info;
3720
3721         LIST_INSERT_HEAD(&sc->sc_iflists[mycpuid], bif, bif_next);
3722
3723         ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1);
3724 }
3725
3726 static void
3727 bridge_add_bif(struct bridge_softc *sc, struct bridge_ifinfo *bif_info,
3728                struct ifnet *ifp)
3729 {
3730         struct netmsg_braddbif amsg;
3731
3732         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3733
3734         netmsg_init(&amsg.br_nmsg, &curthread->td_msgport, 0,
3735                     bridge_add_bif_handler);
3736         amsg.br_softc = sc;
3737         amsg.br_bif_info = bif_info;
3738         amsg.br_bif_ifp = ifp;
3739
3740         ifnet_domsg(&amsg.br_nmsg.nm_lmsg, 0);
3741 }
3742
3743 static void
3744 bridge_del_bif_handler(struct netmsg *nmsg)
3745 {
3746         struct netmsg_brdelbif *dmsg = (struct netmsg_brdelbif *)nmsg;
3747         struct bridge_softc *sc;
3748         struct bridge_iflist *bif;
3749
3750         sc = dmsg->br_softc;
3751
3752         /*
3753          * Locate the bif associated with the br_bif_info
3754          * on the current CPU
3755          */
3756         bif = bridge_lookup_member_ifinfo(sc, dmsg->br_bif_info);
3757         KKASSERT(bif != NULL && bif->bif_onlist);
3758
3759         /* Remove the bif from the current CPU's iflist */
3760         bif->bif_onlist = 0;
3761         LIST_REMOVE(bif, bif_next);
3762
3763         /* Save the removed bif for later freeing */
3764         LIST_INSERT_HEAD(dmsg->br_bif_list, bif, bif_next);
3765
3766         ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1);
3767 }
3768
3769 static void
3770 bridge_del_bif(struct bridge_softc *sc, struct bridge_ifinfo *bif_info,
3771                struct bridge_iflist_head *saved_bifs)
3772 {
3773         struct netmsg_brdelbif dmsg;
3774
3775         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3776
3777         netmsg_init(&dmsg.br_nmsg, &curthread->td_msgport, 0,
3778                     bridge_del_bif_handler);
3779         dmsg.br_softc = sc;
3780         dmsg.br_bif_info = bif_info;
3781         dmsg.br_bif_list = saved_bifs;
3782
3783         ifnet_domsg(&dmsg.br_nmsg.nm_lmsg, 0);
3784 }
3785
3786 static void
3787 bridge_set_bifflags_handler(struct netmsg *nmsg)
3788 {
3789         struct netmsg_brsflags *smsg = (struct netmsg_brsflags *)nmsg;
3790         struct bridge_softc *sc;
3791         struct bridge_iflist *bif;
3792
3793         sc = smsg->br_softc;
3794
3795         /*
3796          * Locate the bif associated with the br_bif_info
3797          * on the current CPU
3798          */
3799         bif = bridge_lookup_member_ifinfo(sc, smsg->br_bif_info);
3800         KKASSERT(bif != NULL && bif->bif_onlist);
3801
3802         bif->bif_flags = smsg->br_bif_flags;
3803
3804         ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1);
3805 }
3806
3807 static void
3808 bridge_set_bifflags(struct bridge_softc *sc, struct bridge_ifinfo *bif_info,
3809                     uint32_t bif_flags)
3810 {
3811         struct netmsg_brsflags smsg;
3812
3813         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3814
3815         netmsg_init(&smsg.br_nmsg, &curthread->td_msgport, 0,
3816                     bridge_set_bifflags_handler);
3817         smsg.br_softc = sc;
3818         smsg.br_bif_info = bif_info;
3819         smsg.br_bif_flags = bif_flags;
3820
3821         ifnet_domsg(&smsg.br_nmsg.nm_lmsg, 0);
3822 }