bridge: Fix comment.
[dragonfly.git] / sys / net / bridge / if_bridge.c
1 /*
2  * Copyright 2001 Wasabi Systems, Inc.
3  * All rights reserved.
4  *
5  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *      This product includes software developed for the NetBSD Project by
18  *      Wasabi Systems, Inc.
19  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
20  *    or promote products derived from this software without specific prior
21  *    written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
27  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /*
37  * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
38  * All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. All advertising materials mentioning features or use of this software
49  *    must display the following acknowledgement:
50  *      This product includes software developed by Jason L. Wright
51  * 4. The name of the author may not be used to endorse or promote products
52  *    derived from this software without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
55  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
56  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
57  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
58  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
59  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
60  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
62  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
63  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
64  * POSSIBILITY OF SUCH DAMAGE.
65  *
66  * $OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp $
67  * $NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $
68  * $FreeBSD: src/sys/net/if_bridge.c,v 1.26 2005/10/13 23:05:55 thompsa Exp $
69  */
70
71 /*
72  * Network interface bridge support.
73  *
74  * TODO:
75  *
76  *      - Currently only supports Ethernet-like interfaces (Ethernet,
77  *        802.11, VLANs on Ethernet, etc.)  Figure out a nice way
78  *        to bridge other types of interfaces (FDDI-FDDI, and maybe
79  *        consider heterogenous bridges).
80  *
81  *
82  * Bridge's route information is duplicated to each CPUs:
83  *
84  *      CPU0          CPU1          CPU2          CPU3
85  * +-----------+ +-----------+ +-----------+ +-----------+
86  * |  rtnode   | |  rtnode   | |  rtnode   | |  rtnode   |
87  * |           | |           | |           | |           |
88  * | dst eaddr | | dst eaddr | | dst eaddr | | dst eaddr |
89  * +-----------+ +-----------+ +-----------+ +-----------+
90  *       |         |                     |         |
91  *       |         |                     |         |
92  *       |         |     +----------+    |         |
93  *       |         |     |  rtinfo  |    |         |
94  *       |         +---->|          |<---+         |
95  *       |               |  flags   |              |
96  *       +-------------->|  timeout |<-------------+
97  *                       |  dst_ifp |
98  *                       +----------+
99  *
100  * We choose to put timeout and dst_ifp into shared part, so updating
101  * them will be cheaper than using message forwarding.  Also there is
102  * not need to use spinlock to protect the updating: timeout and dst_ifp
103  * is not related and specific field's updating order has no importance.
104  * The cache pollution by the share part should not be heavy: in a stable
105  * setup, dst_ifp probably will be not changed in rtnode's life time,
106  * while timeout is refreshed once per second; most of the time, timeout
107  * and dst_ifp are read-only accessed.
108  *
109  *
110  * Bridge route information installation on bridge_input path:
111  *
112  *      CPU0           CPU1         CPU2          CPU3
113  *
114  *                                 netisr2
115  *                                    |
116  *                                alloc nmsg
117  *                    snd nmsg        |
118  *                    w/o rtinfo      |
119  *     netisr0<-----------------------+
120  *        |                           :
121  *    lookup dst                      :
122  *   rtnode exists?(Y)free nmsg       :
123  *        |(N)                        :
124  *        |                           :
125  *  alloc rtinfo                      :
126  *  alloc rtnode                      :
127  * install rtnode                     :
128  *        |                           :
129  *        +---------->netisr1         :
130  *        : fwd nmsg     |            :
131  *        : w/ rtinfo    |            :
132  *        :              |            :
133  *        :              |            :
134  *                  alloc rtnode      :
135  *                (w/ nmsg's rtinfo)  :
136  *                 install rtnode     :
137  *                       |            :
138  *                       +----------->|
139  *                       : fwd nmsg   |
140  *                       : w/ rtinfo  |
141  *                       :            |
142  *                       :     same as netisr1
143  *                                    |
144  *                                    +---------->netisr3
145  *                                    : fwd nmsg     |
146  *                                    : w/ rtinfo    |
147  *                                    :              |
148  *                                    :       same as netisr1
149  *                                               free nmsg
150  *                                                   :
151  *                                                   :
152  *
153  * The netmsgs forwarded between netisr2 are allocated with
154  * (M_WAITOK|M_NULLOK), so it will not fail under most cases (route
155  * information is too precious to be not installed :).  Since multiple
156  * netisrs may try to install route information for the same dst eaddr,
157  * we look up route information in netisr0.  However, this looking up
158  * only need to be performed on netisr0, which is the start point of
159  * the route information installation process.
160  *
161  *
162  * Bridge route information deleting/flushing:
163  *
164  *  CPU0            CPU1              CPU2             CPU3
165  *
166  * netisr0
167  *    |
168  *  find suitable rtnodes,
169  *  mark their rtinfo dead
170  *    |
171  *    | domsg <-------------------------------------------+
172  *    : delete rtnodes                                    | replymsg
173  *    : w/ dead rtinfo                                    |
174  *    :                                                   |
175  *    :  fwdmsg             fwdmsg            fwdmsg      |
176  *    :----------> netisr1 --------> netisr2 --------> netisr3
177  *              delete rtnodes    delete rtnodes    delete rtnodes
178  *              w/ dead rtinfo    w/ dead rtinfo    w/ dead rtinfo
179  *                                                 free dead rtinfos
180  *
181  * All deleting/flushing operations are serialized by netisr0, so each
182  * operation only reaps the route information marked dead by itself.
183  *
184  *
185  * Bridge route information adding/deleting/flushing:
186  * Since all operation is serialized by the fixed message flow between
187  * netisrs, it is not possible to create corrupted per-cpu route
188  * information.
189  *
190  *
191  *
192  * XXX This no longer applies.
193  * Percpu member interface list iteration with blocking operation:
194  * Since one bridge could only delete one member interface at a time and
195  * the deleted member interface is not freed after netmsg_service_sync(),
196  * following way is used to make sure that even if the certain member
197  * interface is ripped from the percpu list during the blocking operation,
198  * the iteration still could keep going:
199  *
200  * TAILQ_FOREACH_MUTABLE(bif, sc->sc_iflists[mycpuid], bif_next, nbif) {
201  *     blocking operation;
202  *     blocking operation;
203  *     ...
204  *     ...
205  *     if (nbif != NULL && !nbif->bif_onlist) {
206  *         KKASSERT(bif->bif_onlist);
207  *         nbif = TAILQ_NEXT(bif, bif_next);
208  *     }
209  * }
210  *
211  * As mentioned above only one member interface could be unlinked from the
212  * percpu member interface list, so either bif or nbif may be not on the list,
213  * but _not_ both.  To keep the list iteration, we don't care about bif, but
214  * only nbif.  Since removed member interface will only be freed after we
215  * finish our work, it is safe to access any field in an unlinked bif (here
216  * bif_onlist).  If nbif is no longer on the list, then bif must be on the
217  * list, so we change nbif to the next element of bif and keep going.
218  */
219
220 #include "opt_inet.h"
221 #include "opt_inet6.h"
222
223 #include <sys/param.h>
224 #include <sys/mbuf.h>
225 #include <sys/malloc.h>
226 #include <sys/protosw.h>
227 #include <sys/systm.h>
228 #include <sys/time.h>
229 #include <sys/socket.h> /* for net/if.h */
230 #include <sys/sockio.h>
231 #include <sys/ctype.h>  /* string functions */
232 #include <sys/kernel.h>
233 #include <sys/random.h>
234 #include <sys/sysctl.h>
235 #include <sys/module.h>
236 #include <sys/proc.h>
237 #include <sys/priv.h>
238 #include <sys/lock.h>
239 #include <sys/thread.h>
240 #include <sys/thread2.h>
241 #include <sys/mpipe.h>
242
243 #include <net/bpf.h>
244 #include <net/if.h>
245 #include <net/if_dl.h>
246 #include <net/if_types.h>
247 #include <net/if_var.h>
248 #include <net/pfil.h>
249 #include <net/ifq_var.h>
250 #include <net/if_clone.h>
251
252 #include <netinet/in.h> /* for struct arpcom */
253 #include <netinet/in_systm.h>
254 #include <netinet/in_var.h>
255 #include <netinet/ip.h>
256 #include <netinet/ip_var.h>
257 #ifdef INET6
258 #include <netinet/ip6.h>
259 #include <netinet6/ip6_var.h>
260 #endif
261 #include <netinet/if_ether.h> /* for struct arpcom */
262 #include <net/bridge/if_bridgevar.h>
263 #include <net/if_llc.h>
264 #include <net/netmsg2.h>
265 #include <net/netisr2.h>
266
267 #include <net/route.h>
268 #include <sys/in_cksum.h>
269
270 /*
271  * Size of the route hash table.  Must be a power of two.
272  */
273 #ifndef BRIDGE_RTHASH_SIZE
274 #define BRIDGE_RTHASH_SIZE              1024
275 #endif
276
277 #define BRIDGE_RTHASH_MASK              (BRIDGE_RTHASH_SIZE - 1)
278
279 /*
280  * Maximum number of addresses to cache.
281  */
282 #ifndef BRIDGE_RTABLE_MAX
283 #define BRIDGE_RTABLE_MAX               4096
284 #endif
285
286 /*
287  * Spanning tree defaults.
288  */
289 #define BSTP_DEFAULT_MAX_AGE            (20 * 256)
290 #define BSTP_DEFAULT_HELLO_TIME         (2 * 256)
291 #define BSTP_DEFAULT_FORWARD_DELAY      (15 * 256)
292 #define BSTP_DEFAULT_HOLD_TIME          (1 * 256)
293 #define BSTP_DEFAULT_BRIDGE_PRIORITY    0x8000
294 #define BSTP_DEFAULT_PORT_PRIORITY      0x80
295 #define BSTP_DEFAULT_PATH_COST          55
296
297 /*
298  * Timeout (in seconds) for entries learned dynamically.
299  */
300 #ifndef BRIDGE_RTABLE_TIMEOUT
301 #define BRIDGE_RTABLE_TIMEOUT           (20 * 60)       /* same as ARP */
302 #endif
303
304 /*
305  * Number of seconds between walks of the route list.
306  */
307 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
308 #define BRIDGE_RTABLE_PRUNE_PERIOD      (5 * 60)
309 #endif
310
311 /*
312  * List of capabilities to mask on the member interface.
313  */
314 #define BRIDGE_IFCAPS_MASK              (IFCAP_TXCSUM | IFCAP_TSO)
315
316 typedef int     (*bridge_ctl_t)(struct bridge_softc *, void *);
317
318 struct netmsg_brctl {
319         struct netmsg_base      base;
320         bridge_ctl_t            bc_func;
321         struct bridge_softc     *bc_sc;
322         void                    *bc_arg;
323 };
324
325 struct netmsg_brsaddr {
326         struct netmsg_base      base;
327         struct bridge_softc     *br_softc;
328         struct ifnet            *br_dst_if;
329         struct bridge_rtinfo    *br_rtinfo;
330         int                     br_setflags;
331         uint8_t                 br_dst[ETHER_ADDR_LEN];
332         uint8_t                 br_flags;
333 };
334
335 struct netmsg_braddbif {
336         struct netmsg_base      base;
337         struct bridge_softc     *br_softc;
338         struct bridge_ifinfo    *br_bif_info;
339         struct ifnet            *br_bif_ifp;
340 };
341
342 struct netmsg_brdelbif {
343         struct netmsg_base      base;
344         struct bridge_softc     *br_softc;
345         struct bridge_ifinfo    *br_bif_info;
346         struct bridge_iflist_head *br_bif_list;
347 };
348
349 struct netmsg_brsflags {
350         struct netmsg_base      base;
351         struct bridge_softc     *br_softc;
352         struct bridge_ifinfo    *br_bif_info;
353         uint32_t                br_bif_flags;
354 };
355
356 eventhandler_tag        bridge_detach_cookie = NULL;
357
358 extern  struct mbuf *(*bridge_input_p)(struct ifnet *, struct mbuf *);
359 extern  int (*bridge_output_p)(struct ifnet *, struct mbuf *);
360 extern  void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
361 extern  struct ifnet *(*bridge_interface_p)(void *if_bridge);
362
363 static int      bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
364
365 static int      bridge_clone_create(struct if_clone *, int, caddr_t);
366 static int      bridge_clone_destroy(struct ifnet *);
367
368 static int      bridge_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
369 static void     bridge_mutecaps(struct bridge_ifinfo *, struct ifnet *, int);
370 static void     bridge_ifdetach(void *, struct ifnet *);
371 static void     bridge_init(void *);
372 static int      bridge_from_us(struct bridge_softc *, struct ether_header *);
373 static void     bridge_stop(struct ifnet *);
374 static void     bridge_start(struct ifnet *, struct ifaltq_subque *);
375 static struct mbuf *bridge_input(struct ifnet *, struct mbuf *);
376 static int      bridge_output(struct ifnet *, struct mbuf *);
377 static struct ifnet *bridge_interface(void *if_bridge);
378
379 static void     bridge_forward(struct bridge_softc *, struct mbuf *m);
380
381 static void     bridge_timer_handler(netmsg_t);
382 static void     bridge_timer(void *);
383
384 static void     bridge_start_bcast(struct bridge_softc *, struct mbuf *);
385 static void     bridge_broadcast(struct bridge_softc *, struct ifnet *,
386                     struct mbuf *);
387 static void     bridge_span(struct bridge_softc *, struct mbuf *);
388
389 static int      bridge_rtupdate(struct bridge_softc *, const uint8_t *,
390                     struct ifnet *, uint8_t);
391 static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *);
392 static void     bridge_rtreap(struct bridge_softc *);
393 static void     bridge_rtreap_async(struct bridge_softc *);
394 static void     bridge_rttrim(struct bridge_softc *);
395 static int      bridge_rtage_finddead(struct bridge_softc *);
396 static void     bridge_rtage(struct bridge_softc *);
397 static void     bridge_rtflush(struct bridge_softc *, int);
398 static int      bridge_rtdaddr(struct bridge_softc *, const uint8_t *);
399 static int      bridge_rtsaddr(struct bridge_softc *, const uint8_t *,
400                     struct ifnet *, uint8_t);
401 static void     bridge_rtmsg_sync(struct bridge_softc *sc);
402 static void     bridge_rtreap_handler(netmsg_t);
403 static void     bridge_rtinstall_handler(netmsg_t);
404 static int      bridge_rtinstall_oncpu(struct bridge_softc *, const uint8_t *,
405                     struct ifnet *, int, uint8_t, struct bridge_rtinfo **);
406
407 static void     bridge_rtable_init(struct bridge_softc *);
408 static void     bridge_rtable_fini(struct bridge_softc *);
409
410 static int      bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
411 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
412                     const uint8_t *);
413 static void     bridge_rtnode_insert(struct bridge_softc *,
414                     struct bridge_rtnode *);
415 static void     bridge_rtnode_destroy(struct bridge_softc *,
416                     struct bridge_rtnode *);
417
418 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
419                     const char *name);
420 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
421                     struct ifnet *ifp);
422 static struct bridge_iflist *bridge_lookup_member_ifinfo(struct bridge_softc *,
423                     struct bridge_ifinfo *);
424 static void     bridge_delete_member(struct bridge_softc *,
425                     struct bridge_iflist *, int);
426 static void     bridge_delete_span(struct bridge_softc *,
427                     struct bridge_iflist *);
428
429 static int      bridge_control(struct bridge_softc *, u_long,
430                                bridge_ctl_t, void *);
431 static int      bridge_ioctl_init(struct bridge_softc *, void *);
432 static int      bridge_ioctl_stop(struct bridge_softc *, void *);
433 static int      bridge_ioctl_add(struct bridge_softc *, void *);
434 static int      bridge_ioctl_del(struct bridge_softc *, void *);
435 static void     bridge_ioctl_fillflags(struct bridge_softc *sc,
436                                 struct bridge_iflist *bif, struct ifbreq *req);
437 static int      bridge_ioctl_gifflags(struct bridge_softc *, void *);
438 static int      bridge_ioctl_sifflags(struct bridge_softc *, void *);
439 static int      bridge_ioctl_scache(struct bridge_softc *, void *);
440 static int      bridge_ioctl_gcache(struct bridge_softc *, void *);
441 static int      bridge_ioctl_gifs(struct bridge_softc *, void *);
442 static int      bridge_ioctl_rts(struct bridge_softc *, void *);
443 static int      bridge_ioctl_saddr(struct bridge_softc *, void *);
444 static int      bridge_ioctl_sto(struct bridge_softc *, void *);
445 static int      bridge_ioctl_gto(struct bridge_softc *, void *);
446 static int      bridge_ioctl_daddr(struct bridge_softc *, void *);
447 static int      bridge_ioctl_flush(struct bridge_softc *, void *);
448 static int      bridge_ioctl_gpri(struct bridge_softc *, void *);
449 static int      bridge_ioctl_spri(struct bridge_softc *, void *);
450 static int      bridge_ioctl_reinit(struct bridge_softc *, void *);
451 static int      bridge_ioctl_ght(struct bridge_softc *, void *);
452 static int      bridge_ioctl_sht(struct bridge_softc *, void *);
453 static int      bridge_ioctl_gfd(struct bridge_softc *, void *);
454 static int      bridge_ioctl_sfd(struct bridge_softc *, void *);
455 static int      bridge_ioctl_gma(struct bridge_softc *, void *);
456 static int      bridge_ioctl_sma(struct bridge_softc *, void *);
457 static int      bridge_ioctl_sifprio(struct bridge_softc *, void *);
458 static int      bridge_ioctl_sifcost(struct bridge_softc *, void *);
459 static int      bridge_ioctl_addspan(struct bridge_softc *, void *);
460 static int      bridge_ioctl_delspan(struct bridge_softc *, void *);
461 static int      bridge_ioctl_sifbondwght(struct bridge_softc *, void *);
462 static int      bridge_pfil(struct mbuf **, struct ifnet *, struct ifnet *,
463                     int);
464 static int      bridge_ip_checkbasic(struct mbuf **mp);
465 #ifdef INET6
466 static int      bridge_ip6_checkbasic(struct mbuf **mp);
467 #endif /* INET6 */
468 static int      bridge_fragment(struct ifnet *, struct mbuf *,
469                     struct ether_header *, int, struct llc *);
470 static void     bridge_enqueue_handler(netmsg_t);
471 static void     bridge_handoff(struct bridge_softc *, struct ifnet *,
472                     struct mbuf *, int);
473
474 static void     bridge_del_bif_handler(netmsg_t);
475 static void     bridge_add_bif_handler(netmsg_t);
476 static void     bridge_del_bif(struct bridge_softc *, struct bridge_ifinfo *,
477                     struct bridge_iflist_head *);
478 static void     bridge_add_bif(struct bridge_softc *, struct bridge_ifinfo *,
479                     struct ifnet *);
480
481 SYSCTL_DECL(_net_link);
482 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW, 0, "Bridge");
483
484 static int pfil_onlyip = 1; /* only pass IP[46] packets when pfil is enabled */
485 static int pfil_bridge = 1; /* run pfil hooks on the bridge interface */
486 static int pfil_member = 1; /* run pfil hooks on the member interface */
487 static int bridge_debug;
488 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip, CTLFLAG_RW,
489     &pfil_onlyip, 0, "Only pass IP packets when pfil is enabled");
490 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge, CTLFLAG_RW,
491     &pfil_bridge, 0, "Packet filter on the bridge interface");
492 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member, CTLFLAG_RW,
493     &pfil_member, 0, "Packet filter on the member interface");
494 SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW,
495     &bridge_debug, 0, "Bridge debug mode");
496
497 struct bridge_control_arg {
498         union {
499                 struct ifbreq ifbreq;
500                 struct ifbifconf ifbifconf;
501                 struct ifbareq ifbareq;
502                 struct ifbaconf ifbaconf;
503                 struct ifbrparam ifbrparam;
504         } bca_u;
505         int     bca_len;
506         void    *bca_uptr;
507         void    *bca_kptr;
508 };
509
510 struct bridge_control {
511         bridge_ctl_t    bc_func;
512         int             bc_argsize;
513         int             bc_flags;
514 };
515
516 #define BC_F_COPYIN             0x01    /* copy arguments in */
517 #define BC_F_COPYOUT            0x02    /* copy arguments out */
518 #define BC_F_SUSER              0x04    /* do super-user check */
519
520 const struct bridge_control bridge_control_table[] = {
521         { bridge_ioctl_add,             sizeof(struct ifbreq),
522           BC_F_COPYIN|BC_F_SUSER },
523         { bridge_ioctl_del,             sizeof(struct ifbreq),
524           BC_F_COPYIN|BC_F_SUSER },
525
526         { bridge_ioctl_gifflags,        sizeof(struct ifbreq),
527           BC_F_COPYIN|BC_F_COPYOUT },
528         { bridge_ioctl_sifflags,        sizeof(struct ifbreq),
529           BC_F_COPYIN|BC_F_SUSER },
530
531         { bridge_ioctl_scache,          sizeof(struct ifbrparam),
532           BC_F_COPYIN|BC_F_SUSER },
533         { bridge_ioctl_gcache,          sizeof(struct ifbrparam),
534           BC_F_COPYOUT },
535
536         { bridge_ioctl_gifs,            sizeof(struct ifbifconf),
537           BC_F_COPYIN|BC_F_COPYOUT },
538         { bridge_ioctl_rts,             sizeof(struct ifbaconf),
539           BC_F_COPYIN|BC_F_COPYOUT },
540
541         { bridge_ioctl_saddr,           sizeof(struct ifbareq),
542           BC_F_COPYIN|BC_F_SUSER },
543
544         { bridge_ioctl_sto,             sizeof(struct ifbrparam),
545           BC_F_COPYIN|BC_F_SUSER },
546         { bridge_ioctl_gto,             sizeof(struct ifbrparam),
547           BC_F_COPYOUT },
548
549         { bridge_ioctl_daddr,           sizeof(struct ifbareq),
550           BC_F_COPYIN|BC_F_SUSER },
551
552         { bridge_ioctl_flush,           sizeof(struct ifbreq),
553           BC_F_COPYIN|BC_F_SUSER },
554
555         { bridge_ioctl_gpri,            sizeof(struct ifbrparam),
556           BC_F_COPYOUT },
557         { bridge_ioctl_spri,            sizeof(struct ifbrparam),
558           BC_F_COPYIN|BC_F_SUSER },
559
560         { bridge_ioctl_ght,             sizeof(struct ifbrparam),
561           BC_F_COPYOUT },
562         { bridge_ioctl_sht,             sizeof(struct ifbrparam),
563           BC_F_COPYIN|BC_F_SUSER },
564
565         { bridge_ioctl_gfd,             sizeof(struct ifbrparam),
566           BC_F_COPYOUT },
567         { bridge_ioctl_sfd,             sizeof(struct ifbrparam),
568           BC_F_COPYIN|BC_F_SUSER },
569
570         { bridge_ioctl_gma,             sizeof(struct ifbrparam),
571           BC_F_COPYOUT },
572         { bridge_ioctl_sma,             sizeof(struct ifbrparam),
573           BC_F_COPYIN|BC_F_SUSER },
574
575         { bridge_ioctl_sifprio,         sizeof(struct ifbreq),
576           BC_F_COPYIN|BC_F_SUSER },
577
578         { bridge_ioctl_sifcost,         sizeof(struct ifbreq),
579           BC_F_COPYIN|BC_F_SUSER },
580
581         { bridge_ioctl_addspan,         sizeof(struct ifbreq),
582           BC_F_COPYIN|BC_F_SUSER },
583         { bridge_ioctl_delspan,         sizeof(struct ifbreq),
584           BC_F_COPYIN|BC_F_SUSER },
585
586         { bridge_ioctl_sifbondwght,     sizeof(struct ifbreq),
587           BC_F_COPYIN|BC_F_SUSER },
588
589 };
590 static const int bridge_control_table_size = NELEM(bridge_control_table);
591
592 LIST_HEAD(, bridge_softc) bridge_list;
593
594 struct if_clone bridge_cloner = IF_CLONE_INITIALIZER("bridge",
595                                 bridge_clone_create,
596                                 bridge_clone_destroy, 0, IF_MAXUNIT);
597
598 static int
599 bridge_modevent(module_t mod, int type, void *data)
600 {
601         switch (type) {
602         case MOD_LOAD:
603                 LIST_INIT(&bridge_list);
604                 if_clone_attach(&bridge_cloner);
605                 bridge_input_p = bridge_input;
606                 bridge_output_p = bridge_output;
607                 bridge_interface_p = bridge_interface;
608                 bridge_detach_cookie = EVENTHANDLER_REGISTER(
609                     ifnet_detach_event, bridge_ifdetach, NULL,
610                     EVENTHANDLER_PRI_ANY);
611 #if 0 /* notyet */
612                 bstp_linkstate_p = bstp_linkstate;
613 #endif
614                 break;
615         case MOD_UNLOAD:
616                 if (!LIST_EMPTY(&bridge_list))
617                         return (EBUSY);
618                 EVENTHANDLER_DEREGISTER(ifnet_detach_event,
619                     bridge_detach_cookie);
620                 if_clone_detach(&bridge_cloner);
621                 bridge_input_p = NULL;
622                 bridge_output_p = NULL;
623                 bridge_interface_p = NULL;
624 #if 0 /* notyet */
625                 bstp_linkstate_p = NULL;
626 #endif
627                 break;
628         default:
629                 return (EOPNOTSUPP);
630         }
631         return (0);
632 }
633
634 static moduledata_t bridge_mod = {
635         "if_bridge",
636         bridge_modevent,
637         0
638 };
639
640 DECLARE_MODULE(if_bridge, bridge_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
641
642
643 /*
644  * bridge_clone_create:
645  *
646  *      Create a new bridge instance.
647  */
648 static int
649 bridge_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused)
650 {
651         struct bridge_softc *sc;
652         struct ifnet *ifp;
653         u_char eaddr[6];
654         int cpu, rnd;
655
656         sc = kmalloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
657         ifp = sc->sc_ifp = &sc->sc_if;
658
659         sc->sc_brtmax = BRIDGE_RTABLE_MAX;
660         sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
661         sc->sc_bridge_max_age = BSTP_DEFAULT_MAX_AGE;
662         sc->sc_bridge_hello_time = BSTP_DEFAULT_HELLO_TIME;
663         sc->sc_bridge_forward_delay = BSTP_DEFAULT_FORWARD_DELAY;
664         sc->sc_bridge_priority = BSTP_DEFAULT_BRIDGE_PRIORITY;
665         sc->sc_hold_time = BSTP_DEFAULT_HOLD_TIME;
666
667         /* Initialize our routing table. */
668         bridge_rtable_init(sc);
669
670         callout_init_mp(&sc->sc_brcallout);
671         netmsg_init(&sc->sc_brtimemsg, NULL, &netisr_adone_rport,
672                     MSGF_DROPABLE, bridge_timer_handler);
673         sc->sc_brtimemsg.lmsg.u.ms_resultp = sc;
674
675         callout_init_mp(&sc->sc_bstpcallout);
676         netmsg_init(&sc->sc_bstptimemsg, NULL, &netisr_adone_rport,
677                     MSGF_DROPABLE, bstp_tick_handler);
678         sc->sc_bstptimemsg.lmsg.u.ms_resultp = sc;
679
680         /* Initialize per-cpu member iface lists */
681         sc->sc_iflists = kmalloc(sizeof(*sc->sc_iflists) * ncpus,
682                                  M_DEVBUF, M_WAITOK);
683         for (cpu = 0; cpu < ncpus; ++cpu)
684                 TAILQ_INIT(&sc->sc_iflists[cpu]);
685
686         TAILQ_INIT(&sc->sc_spanlist);
687
688         ifp->if_softc = sc;
689         if_initname(ifp, ifc->ifc_name, unit);
690         ifp->if_mtu = ETHERMTU;
691         ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST;
692         ifp->if_ioctl = bridge_ioctl;
693         ifp->if_start = bridge_start;
694         ifp->if_init = bridge_init;
695         ifp->if_type = IFT_ETHER;
696         ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
697         ifq_set_ready(&ifp->if_snd);
698         ifp->if_hdrlen = ETHER_HDR_LEN;
699
700         /*
701          * Generate a random ethernet address and use the private AC:DE:48
702          * OUI code.
703          */
704         rnd = karc4random();
705         bcopy(&rnd, &eaddr[0], 4); /* ETHER_ADDR_LEN == 6 */
706         rnd = karc4random();
707         bcopy(&rnd, &eaddr[2], 4); /* ETHER_ADDR_LEN == 6 */
708
709         eaddr[0] &= ~1; /* clear multicast bit */
710         eaddr[0] |= 2;  /* set the LAA bit */
711
712         ether_ifattach(ifp, eaddr, NULL);
713         /* Now undo some of the damage... */
714         ifp->if_baudrate = 0;
715         /*ifp->if_type = IFT_BRIDGE;*/
716
717         crit_enter();   /* XXX MP */
718         LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
719         crit_exit();
720
721         return (0);
722 }
723
724 static void
725 bridge_delete_dispatch(netmsg_t msg)
726 {
727         struct bridge_softc *sc = msg->lmsg.u.ms_resultp;
728         struct ifnet *bifp = sc->sc_ifp;
729         struct bridge_iflist *bif;
730
731         ifnet_serialize_all(bifp);
732
733         while ((bif = TAILQ_FIRST(&sc->sc_iflists[mycpuid])) != NULL)
734                 bridge_delete_member(sc, bif, 0);
735
736         while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL)
737                 bridge_delete_span(sc, bif);
738
739         ifnet_deserialize_all(bifp);
740
741         lwkt_replymsg(&msg->lmsg, 0);
742 }
743
744 /*
745  * bridge_clone_destroy:
746  *
747  *      Destroy a bridge instance.
748  */
749 static int
750 bridge_clone_destroy(struct ifnet *ifp)
751 {
752         struct bridge_softc *sc = ifp->if_softc;
753         struct netmsg_base msg;
754
755         ifnet_serialize_all(ifp);
756
757         bridge_stop(ifp);
758         ifp->if_flags &= ~IFF_UP;
759
760         ifnet_deserialize_all(ifp);
761
762         netmsg_init(&msg, NULL, &curthread->td_msgport,
763                     0, bridge_delete_dispatch);
764         msg.lmsg.u.ms_resultp = sc;
765         lwkt_domsg(BRIDGE_CFGPORT, &msg.lmsg, 0);
766
767         crit_enter();   /* XXX MP */
768         LIST_REMOVE(sc, sc_list);
769         crit_exit();
770
771         ether_ifdetach(ifp);
772
773         /* Tear down the routing table. */
774         bridge_rtable_fini(sc);
775
776         /* Free per-cpu member iface lists */
777         kfree(sc->sc_iflists, M_DEVBUF);
778
779         kfree(sc, M_DEVBUF);
780
781         return 0;
782 }
783
784 /*
785  * bridge_ioctl:
786  *
787  *      Handle a control request from the operator.
788  */
789 static int
790 bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
791 {
792         struct bridge_softc *sc = ifp->if_softc;
793         struct bridge_control_arg args;
794         struct ifdrv *ifd = (struct ifdrv *) data;
795         const struct bridge_control *bc;
796         int error = 0;
797
798         ASSERT_IFNET_SERIALIZED_ALL(ifp);
799
800         switch (cmd) {
801         case SIOCADDMULTI:
802         case SIOCDELMULTI:
803                 break;
804
805         case SIOCGDRVSPEC:
806         case SIOCSDRVSPEC:
807                 if (ifd->ifd_cmd >= bridge_control_table_size) {
808                         error = EINVAL;
809                         break;
810                 }
811                 bc = &bridge_control_table[ifd->ifd_cmd];
812
813                 if (cmd == SIOCGDRVSPEC &&
814                     (bc->bc_flags & BC_F_COPYOUT) == 0) {
815                         error = EINVAL;
816                         break;
817                 } else if (cmd == SIOCSDRVSPEC &&
818                            (bc->bc_flags & BC_F_COPYOUT)) {
819                         error = EINVAL;
820                         break;
821                 }
822
823                 if (bc->bc_flags & BC_F_SUSER) {
824                         error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY);
825                         if (error)
826                                 break;
827                 }
828
829                 if (ifd->ifd_len != bc->bc_argsize ||
830                     ifd->ifd_len > sizeof(args.bca_u)) {
831                         error = EINVAL;
832                         break;
833                 }
834
835                 memset(&args, 0, sizeof(args));
836                 if (bc->bc_flags & BC_F_COPYIN) {
837                         error = copyin(ifd->ifd_data, &args.bca_u,
838                                        ifd->ifd_len);
839                         if (error)
840                                 break;
841                 }
842
843                 error = bridge_control(sc, cmd, bc->bc_func, &args);
844                 if (error) {
845                         KKASSERT(args.bca_len == 0 && args.bca_kptr == NULL);
846                         break;
847                 }
848
849                 if (bc->bc_flags & BC_F_COPYOUT) {
850                         error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
851                         if (args.bca_len != 0) {
852                                 KKASSERT(args.bca_kptr != NULL);
853                                 if (!error) {
854                                         error = copyout(args.bca_kptr,
855                                                 args.bca_uptr, args.bca_len);
856                                 }
857                                 kfree(args.bca_kptr, M_TEMP);
858                         } else {
859                                 KKASSERT(args.bca_kptr == NULL);
860                         }
861                 } else {
862                         KKASSERT(args.bca_len == 0 && args.bca_kptr == NULL);
863                 }
864                 break;
865
866         case SIOCSIFFLAGS:
867                 if (!(ifp->if_flags & IFF_UP) &&
868                     (ifp->if_flags & IFF_RUNNING)) {
869                         /*
870                          * If interface is marked down and it is running,
871                          * then stop it.
872                          */
873                         bridge_stop(ifp);
874                 } else if ((ifp->if_flags & IFF_UP) &&
875                     !(ifp->if_flags & IFF_RUNNING)) {
876                         /*
877                          * If interface is marked up and it is stopped, then
878                          * start it.
879                          */
880                         ifp->if_init(sc);
881                 }
882
883                 /*
884                  * If running and link flag state change we have to
885                  * reinitialize as well.
886                  */
887                 if ((ifp->if_flags & IFF_RUNNING) &&
888                     (ifp->if_flags & (IFF_LINK0|IFF_LINK1|IFF_LINK2)) !=
889                     sc->sc_copy_flags) {
890                         sc->sc_copy_flags = ifp->if_flags &
891                                         (IFF_LINK0|IFF_LINK1|IFF_LINK2);
892                         bridge_control(sc, 0, bridge_ioctl_reinit, NULL);
893                 }
894
895                 break;
896
897         case SIOCSIFMTU:
898                 /* Do not allow the MTU to be changed on the bridge */
899                 error = EINVAL;
900                 break;
901
902         default:
903                 error = ether_ioctl(ifp, cmd, data);
904                 break;
905         }
906         return (error);
907 }
908
909 /*
910  * bridge_mutecaps:
911  *
912  *      Clear or restore unwanted capabilities on the member interface
913  */
914 static void
915 bridge_mutecaps(struct bridge_ifinfo *bif_info, struct ifnet *ifp, int mute)
916 {
917         struct ifreq ifr;
918
919         if (ifp->if_ioctl == NULL)
920                 return;
921
922         bzero(&ifr, sizeof(ifr));
923         ifr.ifr_reqcap = ifp->if_capenable;
924
925         if (mute) {
926                 /* mask off and save capabilities */
927                 bif_info->bifi_mutecap = ifr.ifr_reqcap & BRIDGE_IFCAPS_MASK;
928                 if (bif_info->bifi_mutecap != 0)
929                         ifr.ifr_reqcap &= ~BRIDGE_IFCAPS_MASK;
930         } else {
931                 /* restore muted capabilities */
932                 ifr.ifr_reqcap |= bif_info->bifi_mutecap;
933         }
934
935         if (bif_info->bifi_mutecap != 0) {
936                 ifnet_serialize_all(ifp);
937                 ifp->if_ioctl(ifp, SIOCSIFCAP, (caddr_t)&ifr, NULL);
938                 ifnet_deserialize_all(ifp);
939         }
940 }
941
942 /*
943  * bridge_lookup_member:
944  *
945  *      Lookup a bridge member interface.
946  */
947 static struct bridge_iflist *
948 bridge_lookup_member(struct bridge_softc *sc, const char *name)
949 {
950         struct bridge_iflist *bif;
951
952         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
953                 if (strcmp(bif->bif_ifp->if_xname, name) == 0)
954                         return (bif);
955         }
956         return (NULL);
957 }
958
959 /*
960  * bridge_lookup_member_if:
961  *
962  *      Lookup a bridge member interface by ifnet*.
963  */
964 static struct bridge_iflist *
965 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
966 {
967         struct bridge_iflist *bif;
968
969         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
970                 if (bif->bif_ifp == member_ifp)
971                         return (bif);
972         }
973         return (NULL);
974 }
975
976 /*
977  * bridge_lookup_member_ifinfo:
978  *
979  *      Lookup a bridge member interface by bridge_ifinfo.
980  */
981 static struct bridge_iflist *
982 bridge_lookup_member_ifinfo(struct bridge_softc *sc,
983                             struct bridge_ifinfo *bif_info)
984 {
985         struct bridge_iflist *bif;
986
987         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
988                 if (bif->bif_info == bif_info)
989                         return (bif);
990         }
991         return (NULL);
992 }
993
994 /*
995  * bridge_delete_member:
996  *
997  *      Delete the specified member interface.
998  */
999 static void
1000 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
1001     int gone)
1002 {
1003         struct ifnet *ifs = bif->bif_ifp;
1004         struct ifnet *bifp = sc->sc_ifp;
1005         struct bridge_ifinfo *bif_info = bif->bif_info;
1006         struct bridge_iflist_head saved_bifs;
1007
1008         ASSERT_IFNET_SERIALIZED_ALL(bifp);
1009         KKASSERT(bif_info != NULL);
1010
1011         ifs->if_bridge = NULL;
1012
1013         /*
1014          * Release bridge interface's serializer:
1015          * - To avoid possible dead lock.
1016          * - Various sync operation will block the current thread.
1017          */
1018         ifnet_deserialize_all(bifp);
1019
1020         if (!gone) {
1021                 switch (ifs->if_type) {
1022                 case IFT_ETHER:
1023                 case IFT_L2VLAN:
1024                         /*
1025                          * Take the interface out of promiscuous mode.
1026                          */
1027                         ifpromisc(ifs, 0);
1028                         bridge_mutecaps(bif_info, ifs, 0);
1029                         break;
1030
1031                 case IFT_GIF:
1032                         break;
1033
1034                 default:
1035                         panic("bridge_delete_member: impossible");
1036                         break;
1037                 }
1038         }
1039
1040         /*
1041          * Remove bifs from percpu linked list.
1042          *
1043          * Removed bifs are not freed immediately, instead,
1044          * they are saved in saved_bifs.  They will be freed
1045          * after we make sure that no one is accessing them,
1046          * i.e. after following netmsg_service_sync()
1047          */
1048         TAILQ_INIT(&saved_bifs);
1049         bridge_del_bif(sc, bif_info, &saved_bifs);
1050
1051         /*
1052          * Make sure that all protocol threads:
1053          * o  see 'ifs' if_bridge is changed
1054          * o  know that bif is removed from the percpu linked list
1055          */
1056         netmsg_service_sync();
1057
1058         /*
1059          * Free the removed bifs
1060          */
1061         KKASSERT(!TAILQ_EMPTY(&saved_bifs));
1062         while ((bif = TAILQ_FIRST(&saved_bifs)) != NULL) {
1063                 TAILQ_REMOVE(&saved_bifs, bif, bif_next);
1064                 kfree(bif, M_DEVBUF);
1065         }
1066
1067         /* See the comment in bridge_ioctl_stop() */
1068         bridge_rtmsg_sync(sc);
1069         bridge_rtdelete(sc, ifs, IFBF_FLUSHALL | IFBF_FLUSHSYNC);
1070
1071         ifnet_serialize_all(bifp);
1072
1073         if (bifp->if_flags & IFF_RUNNING)
1074                 bstp_initialization(sc);
1075
1076         /*
1077          * Free the bif_info after bstp_initialization(), so that
1078          * bridge_softc.sc_root_port will not reference a dangling
1079          * pointer.
1080          */
1081         kfree(bif_info, M_DEVBUF);
1082 }
1083
1084 /*
1085  * bridge_delete_span:
1086  *
1087  *      Delete the specified span interface.
1088  */
1089 static void
1090 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
1091 {
1092         KASSERT(bif->bif_ifp->if_bridge == NULL,
1093             ("%s: not a span interface", __func__));
1094
1095         TAILQ_REMOVE(&sc->sc_iflists[mycpuid], bif, bif_next);
1096         kfree(bif, M_DEVBUF);
1097 }
1098
1099 static int
1100 bridge_ioctl_init(struct bridge_softc *sc, void *arg __unused)
1101 {
1102         struct ifnet *ifp = sc->sc_ifp;
1103
1104         if (ifp->if_flags & IFF_RUNNING)
1105                 return 0;
1106
1107         callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz,
1108             bridge_timer, sc);
1109
1110         ifp->if_flags |= IFF_RUNNING;
1111         bstp_initialization(sc);
1112         return 0;
1113 }
1114
1115 static int
1116 bridge_ioctl_stop(struct bridge_softc *sc, void *arg __unused)
1117 {
1118         struct ifnet *ifp = sc->sc_ifp;
1119
1120         if ((ifp->if_flags & IFF_RUNNING) == 0)
1121                 return 0;
1122
1123         callout_stop(&sc->sc_brcallout);
1124
1125         crit_enter();
1126         lwkt_dropmsg(&sc->sc_brtimemsg.lmsg);
1127         crit_exit();
1128
1129         bstp_stop(sc);
1130
1131         ifp->if_flags &= ~IFF_RUNNING;
1132
1133         ifnet_deserialize_all(ifp);
1134
1135         /* Let everyone know that we are stopped */
1136         netmsg_service_sync();
1137
1138         /*
1139          * Sync ifnetX msgports in the order we forward rtnode
1140          * installation message.  This is used to make sure that
1141          * all rtnode installation messages sent by bridge_rtupdate()
1142          * during above netmsg_service_sync() are flushed.
1143          */
1144         bridge_rtmsg_sync(sc);
1145         bridge_rtflush(sc, IFBF_FLUSHDYN | IFBF_FLUSHSYNC);
1146
1147         ifnet_serialize_all(ifp);
1148         return 0;
1149 }
1150
1151 static int
1152 bridge_ioctl_add(struct bridge_softc *sc, void *arg)
1153 {
1154         struct ifbreq *req = arg;
1155         struct bridge_iflist *bif;
1156         struct bridge_ifinfo *bif_info;
1157         struct ifnet *ifs, *bifp;
1158         int error = 0;
1159
1160         bifp = sc->sc_ifp;
1161         ASSERT_IFNET_SERIALIZED_ALL(bifp);
1162
1163         ifs = ifunit_netisr(req->ifbr_ifsname);
1164         if (ifs == NULL)
1165                 return (ENOENT);
1166
1167         /* If it's in the span list, it can't be a member. */
1168         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1169                 if (ifs == bif->bif_ifp)
1170                         return (EBUSY);
1171
1172         /* Allow the first Ethernet member to define the MTU */
1173         if (ifs->if_type != IFT_GIF) {
1174                 if (TAILQ_EMPTY(&sc->sc_iflists[mycpuid])) {
1175                         bifp->if_mtu = ifs->if_mtu;
1176                 } else if (bifp->if_mtu != ifs->if_mtu) {
1177                         if_printf(bifp, "invalid MTU for %s\n", ifs->if_xname);
1178                         return (EINVAL);
1179                 }
1180         }
1181
1182         if (ifs->if_bridge == sc)
1183                 return (EEXIST);
1184
1185         if (ifs->if_bridge != NULL)
1186                 return (EBUSY);
1187
1188         bif_info = kmalloc(sizeof(*bif_info), M_DEVBUF, M_WAITOK | M_ZERO);
1189         bif_info->bifi_priority = BSTP_DEFAULT_PORT_PRIORITY;
1190         bif_info->bifi_path_cost = BSTP_DEFAULT_PATH_COST;
1191         bif_info->bifi_ifp = ifs;
1192         bif_info->bifi_bond_weight = 1;
1193
1194         /*
1195          * Release bridge interface's serializer:
1196          * - To avoid possible dead lock.
1197          * - Various sync operation will block the current thread.
1198          */
1199         ifnet_deserialize_all(bifp);
1200
1201         switch (ifs->if_type) {
1202         case IFT_ETHER:
1203         case IFT_L2VLAN:
1204                 /*
1205                  * Place the interface into promiscuous mode.
1206                  */
1207                 error = ifpromisc(ifs, 1);
1208                 if (error) {
1209                         ifnet_serialize_all(bifp);
1210                         goto out;
1211                 }
1212                 bridge_mutecaps(bif_info, ifs, 1);
1213                 break;
1214
1215         case IFT_GIF: /* :^) */
1216                 break;
1217
1218         default:
1219                 error = EINVAL;
1220                 ifnet_serialize_all(bifp);
1221                 goto out;
1222         }
1223
1224         /*
1225          * Add bifs to percpu linked lists
1226          */
1227         bridge_add_bif(sc, bif_info, ifs);
1228
1229         ifnet_serialize_all(bifp);
1230
1231         if (bifp->if_flags & IFF_RUNNING)
1232                 bstp_initialization(sc);
1233         else
1234                 bstp_stop(sc);
1235
1236         /*
1237          * Everything has been setup, so let the member interface
1238          * deliver packets to this bridge on its input/output path.
1239          */
1240         ifs->if_bridge = sc;
1241 out:
1242         if (error) {
1243                 if (bif_info != NULL)
1244                         kfree(bif_info, M_DEVBUF);
1245         }
1246         return (error);
1247 }
1248
1249 static int
1250 bridge_ioctl_del(struct bridge_softc *sc, void *arg)
1251 {
1252         struct ifbreq *req = arg;
1253         struct bridge_iflist *bif;
1254
1255         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1256         if (bif == NULL)
1257                 return (ENOENT);
1258
1259         bridge_delete_member(sc, bif, 0);
1260
1261         return (0);
1262 }
1263
1264 static int
1265 bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
1266 {
1267         struct ifbreq *req = arg;
1268         struct bridge_iflist *bif;
1269
1270         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1271         if (bif == NULL)
1272                 return (ENOENT);
1273         bridge_ioctl_fillflags(sc, bif, req);
1274         return (0);
1275 }
1276
1277 static void
1278 bridge_ioctl_fillflags(struct bridge_softc *sc, struct bridge_iflist *bif,
1279                        struct ifbreq *req)
1280 {
1281         req->ifbr_ifsflags = bif->bif_flags;
1282         req->ifbr_state = bif->bif_state;
1283         req->ifbr_priority = bif->bif_priority;
1284         req->ifbr_path_cost = bif->bif_path_cost;
1285         req->ifbr_bond_weight = bif->bif_bond_weight;
1286         req->ifbr_portno = bif->bif_ifp->if_index & 0xff;
1287         if (bif->bif_flags & IFBIF_STP) {
1288                 req->ifbr_peer_root = bif->bif_peer_root;
1289                 req->ifbr_peer_bridge = bif->bif_peer_bridge;
1290                 req->ifbr_peer_cost = bif->bif_peer_cost;
1291                 req->ifbr_peer_port = bif->bif_peer_port;
1292                 if (bstp_supersedes_port_info(sc, bif)) {
1293                         req->ifbr_designated_root = bif->bif_peer_root;
1294                         req->ifbr_designated_bridge = bif->bif_peer_bridge;
1295                         req->ifbr_designated_cost = bif->bif_peer_cost;
1296                         req->ifbr_designated_port = bif->bif_peer_port;
1297                 } else {
1298                         req->ifbr_designated_root = sc->sc_bridge_id;
1299                         req->ifbr_designated_bridge = sc->sc_bridge_id;
1300                         req->ifbr_designated_cost = bif->bif_path_cost +
1301                                                     bif->bif_peer_cost;
1302                         req->ifbr_designated_port = bif->bif_port_id;
1303                 }
1304         } else {
1305                 req->ifbr_peer_root = 0;
1306                 req->ifbr_peer_bridge = 0;
1307                 req->ifbr_peer_cost = 0;
1308                 req->ifbr_peer_port = 0;
1309                 req->ifbr_designated_root = 0;
1310                 req->ifbr_designated_bridge = 0;
1311                 req->ifbr_designated_cost = 0;
1312                 req->ifbr_designated_port = 0;
1313         }
1314 }
1315
1316 static int
1317 bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
1318 {
1319         struct ifbreq *req = arg;
1320         struct bridge_iflist *bif;
1321         struct ifnet *bifp = sc->sc_ifp;
1322
1323         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1324         if (bif == NULL)
1325                 return (ENOENT);
1326
1327         if (req->ifbr_ifsflags & IFBIF_SPAN) {
1328                 /* SPAN is readonly */
1329                 return (EINVAL);
1330         }
1331
1332         if (req->ifbr_ifsflags & IFBIF_STP) {
1333                 switch (bif->bif_ifp->if_type) {
1334                 case IFT_ETHER:
1335                         /* These can do spanning tree. */
1336                         break;
1337
1338                 default:
1339                         /* Nothing else can. */
1340                         return (EINVAL);
1341                 }
1342         }
1343
1344         bif->bif_flags = (bif->bif_flags & IFBIF_KEEPMASK) |
1345                          (req->ifbr_ifsflags & ~IFBIF_KEEPMASK);
1346         if (bifp->if_flags & IFF_RUNNING)
1347                 bstp_initialization(sc);
1348
1349         return (0);
1350 }
1351
1352 static int
1353 bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
1354 {
1355         struct ifbrparam *param = arg;
1356         struct ifnet *ifp = sc->sc_ifp;
1357
1358         sc->sc_brtmax = param->ifbrp_csize;
1359
1360         ifnet_deserialize_all(ifp);
1361         bridge_rttrim(sc);
1362         ifnet_serialize_all(ifp);
1363
1364         return (0);
1365 }
1366
1367 static int
1368 bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
1369 {
1370         struct ifbrparam *param = arg;
1371
1372         param->ifbrp_csize = sc->sc_brtmax;
1373
1374         return (0);
1375 }
1376
1377 static int
1378 bridge_ioctl_gifs(struct bridge_softc *sc, void *arg)
1379 {
1380         struct bridge_control_arg *bc_arg = arg;
1381         struct ifbifconf *bifc = arg;
1382         struct bridge_iflist *bif;
1383         struct ifbreq *breq;
1384         int count, len;
1385
1386         count = 0;
1387         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next)
1388                 count++;
1389         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1390                 count++;
1391
1392         if (bifc->ifbic_len == 0) {
1393                 bifc->ifbic_len = sizeof(*breq) * count;
1394                 return 0;
1395         } else if (count == 0 || bifc->ifbic_len < sizeof(*breq)) {
1396                 bifc->ifbic_len = 0;
1397                 return 0;
1398         }
1399
1400         len = min(bifc->ifbic_len, sizeof(*breq) * count);
1401         KKASSERT(len >= sizeof(*breq));
1402
1403         breq = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO);
1404         if (breq == NULL) {
1405                 bifc->ifbic_len = 0;
1406                 return ENOMEM;
1407         }
1408         bc_arg->bca_kptr = breq;
1409
1410         count = 0;
1411         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
1412                 if (len < sizeof(*breq))
1413                         break;
1414
1415                 strlcpy(breq->ifbr_ifsname, bif->bif_ifp->if_xname,
1416                         sizeof(breq->ifbr_ifsname));
1417                 bridge_ioctl_fillflags(sc, bif, breq);
1418                 breq++;
1419                 count++;
1420                 len -= sizeof(*breq);
1421         }
1422         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
1423                 if (len < sizeof(*breq))
1424                         break;
1425
1426                 strlcpy(breq->ifbr_ifsname, bif->bif_ifp->if_xname,
1427                         sizeof(breq->ifbr_ifsname));
1428                 breq->ifbr_ifsflags = bif->bif_flags;
1429                 breq->ifbr_portno = bif->bif_ifp->if_index & 0xff;
1430                 breq++;
1431                 count++;
1432                 len -= sizeof(*breq);
1433         }
1434
1435         bifc->ifbic_len = sizeof(*breq) * count;
1436         KKASSERT(bifc->ifbic_len > 0);
1437
1438         bc_arg->bca_len = bifc->ifbic_len;
1439         bc_arg->bca_uptr = bifc->ifbic_req;
1440         return 0;
1441 }
1442
1443 static int
1444 bridge_ioctl_rts(struct bridge_softc *sc, void *arg)
1445 {
1446         struct bridge_control_arg *bc_arg = arg;
1447         struct ifbaconf *bac = arg;
1448         struct bridge_rtnode *brt;
1449         struct ifbareq *bareq;
1450         int count, len;
1451
1452         count = 0;
1453         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list)
1454                 count++;
1455
1456         if (bac->ifbac_len == 0) {
1457                 bac->ifbac_len = sizeof(*bareq) * count;
1458                 return 0;
1459         } else if (count == 0 || bac->ifbac_len < sizeof(*bareq)) {
1460                 bac->ifbac_len = 0;
1461                 return 0;
1462         }
1463
1464         len = min(bac->ifbac_len, sizeof(*bareq) * count);
1465         KKASSERT(len >= sizeof(*bareq));
1466
1467         bareq = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO);
1468         if (bareq == NULL) {
1469                 bac->ifbac_len = 0;
1470                 return ENOMEM;
1471         }
1472         bc_arg->bca_kptr = bareq;
1473
1474         count = 0;
1475         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
1476                 struct bridge_rtinfo *bri = brt->brt_info;
1477                 time_t expire;
1478
1479                 if (len < sizeof(*bareq))
1480                         break;
1481
1482                 strlcpy(bareq->ifba_ifsname, bri->bri_ifp->if_xname,
1483                         sizeof(bareq->ifba_ifsname));
1484                 memcpy(bareq->ifba_dst, brt->brt_addr, sizeof(brt->brt_addr));
1485                 expire = bri->bri_expire;
1486                 if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
1487                     time_uptime < expire)
1488                         bareq->ifba_expire = expire - time_uptime;
1489                 else
1490                         bareq->ifba_expire = 0;
1491                 bareq->ifba_flags = bri->bri_flags;
1492                 bareq++;
1493                 count++;
1494                 len -= sizeof(*bareq);
1495         }
1496
1497         bac->ifbac_len = sizeof(*bareq) * count;
1498         KKASSERT(bac->ifbac_len > 0);
1499
1500         bc_arg->bca_len = bac->ifbac_len;
1501         bc_arg->bca_uptr = bac->ifbac_req;
1502         return 0;
1503 }
1504
1505 static int
1506 bridge_ioctl_saddr(struct bridge_softc *sc, void *arg)
1507 {
1508         struct ifbareq *req = arg;
1509         struct bridge_iflist *bif;
1510         struct ifnet *ifp = sc->sc_ifp;
1511         int error;
1512
1513         ASSERT_IFNET_SERIALIZED_ALL(ifp);
1514
1515         bif = bridge_lookup_member(sc, req->ifba_ifsname);
1516         if (bif == NULL)
1517                 return (ENOENT);
1518
1519         ifnet_deserialize_all(ifp);
1520         error = bridge_rtsaddr(sc, req->ifba_dst, bif->bif_ifp,
1521                                req->ifba_flags);
1522         ifnet_serialize_all(ifp);
1523         return (error);
1524 }
1525
1526 static int
1527 bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
1528 {
1529         struct ifbrparam *param = arg;
1530
1531         sc->sc_brttimeout = param->ifbrp_ctime;
1532
1533         return (0);
1534 }
1535
1536 static int
1537 bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
1538 {
1539         struct ifbrparam *param = arg;
1540
1541         param->ifbrp_ctime = sc->sc_brttimeout;
1542
1543         return (0);
1544 }
1545
1546 static int
1547 bridge_ioctl_daddr(struct bridge_softc *sc, void *arg)
1548 {
1549         struct ifbareq *req = arg;
1550         struct ifnet *ifp = sc->sc_ifp;
1551         int error;
1552
1553         ifnet_deserialize_all(ifp);
1554         error = bridge_rtdaddr(sc, req->ifba_dst);
1555         ifnet_serialize_all(ifp);
1556         return error;
1557 }
1558
1559 static int
1560 bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
1561 {
1562         struct ifbreq *req = arg;
1563         struct ifnet *ifp = sc->sc_ifp;
1564
1565         ifnet_deserialize_all(ifp);
1566         bridge_rtflush(sc, req->ifbr_ifsflags | IFBF_FLUSHSYNC);
1567         ifnet_serialize_all(ifp);
1568
1569         return (0);
1570 }
1571
1572 static int
1573 bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
1574 {
1575         struct ifbrparam *param = arg;
1576
1577         param->ifbrp_prio = sc->sc_bridge_priority;
1578
1579         return (0);
1580 }
1581
1582 static int
1583 bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
1584 {
1585         struct ifbrparam *param = arg;
1586
1587         sc->sc_bridge_priority = param->ifbrp_prio;
1588
1589         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1590                 bstp_initialization(sc);
1591
1592         return (0);
1593 }
1594
1595 static int
1596 bridge_ioctl_reinit(struct bridge_softc *sc, void *arg __unused)
1597 {
1598         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1599                 bstp_initialization(sc);
1600         return (0);
1601 }
1602
1603 static int
1604 bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
1605 {
1606         struct ifbrparam *param = arg;
1607
1608         param->ifbrp_hellotime = sc->sc_bridge_hello_time >> 8;
1609
1610         return (0);
1611 }
1612
1613 static int
1614 bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
1615 {
1616         struct ifbrparam *param = arg;
1617
1618         if (param->ifbrp_hellotime == 0)
1619                 return (EINVAL);
1620         sc->sc_bridge_hello_time = param->ifbrp_hellotime << 8;
1621
1622         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1623                 bstp_initialization(sc);
1624
1625         return (0);
1626 }
1627
1628 static int
1629 bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
1630 {
1631         struct ifbrparam *param = arg;
1632
1633         param->ifbrp_fwddelay = sc->sc_bridge_forward_delay >> 8;
1634
1635         return (0);
1636 }
1637
1638 static int
1639 bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
1640 {
1641         struct ifbrparam *param = arg;
1642
1643         if (param->ifbrp_fwddelay == 0)
1644                 return (EINVAL);
1645         sc->sc_bridge_forward_delay = param->ifbrp_fwddelay << 8;
1646
1647         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1648                 bstp_initialization(sc);
1649
1650         return (0);
1651 }
1652
1653 static int
1654 bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
1655 {
1656         struct ifbrparam *param = arg;
1657
1658         param->ifbrp_maxage = sc->sc_bridge_max_age >> 8;
1659
1660         return (0);
1661 }
1662
1663 static int
1664 bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
1665 {
1666         struct ifbrparam *param = arg;
1667
1668         if (param->ifbrp_maxage == 0)
1669                 return (EINVAL);
1670         sc->sc_bridge_max_age = param->ifbrp_maxage << 8;
1671
1672         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1673                 bstp_initialization(sc);
1674
1675         return (0);
1676 }
1677
1678 static int
1679 bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
1680 {
1681         struct ifbreq *req = arg;
1682         struct bridge_iflist *bif;
1683
1684         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1685         if (bif == NULL)
1686                 return (ENOENT);
1687
1688         bif->bif_priority = req->ifbr_priority;
1689
1690         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1691                 bstp_initialization(sc);
1692
1693         return (0);
1694 }
1695
1696 static int
1697 bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
1698 {
1699         struct ifbreq *req = arg;
1700         struct bridge_iflist *bif;
1701
1702         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1703         if (bif == NULL)
1704                 return (ENOENT);
1705
1706         bif->bif_path_cost = req->ifbr_path_cost;
1707
1708         if (sc->sc_ifp->if_flags & IFF_RUNNING)
1709                 bstp_initialization(sc);
1710
1711         return (0);
1712 }
1713
1714 static int
1715 bridge_ioctl_sifbondwght(struct bridge_softc *sc, void *arg)
1716 {
1717         struct ifbreq *req = arg;
1718         struct bridge_iflist *bif;
1719
1720         bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1721         if (bif == NULL)
1722                 return (ENOENT);
1723
1724         bif->bif_bond_weight = req->ifbr_bond_weight;
1725
1726         /* no reinit needed */
1727
1728         return (0);
1729 }
1730
1731 static int
1732 bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
1733 {
1734         struct ifbreq *req = arg;
1735         struct bridge_iflist *bif;
1736         struct ifnet *ifs;
1737         struct bridge_ifinfo *bif_info;
1738
1739         ifs = ifunit_netisr(req->ifbr_ifsname);
1740         if (ifs == NULL)
1741                 return (ENOENT);
1742
1743         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1744                 if (ifs == bif->bif_ifp)
1745                         return (EBUSY);
1746
1747         if (ifs->if_bridge != NULL)
1748                 return (EBUSY);
1749
1750         switch (ifs->if_type) {
1751         case IFT_ETHER:
1752         case IFT_GIF:
1753         case IFT_L2VLAN:
1754                 break;
1755
1756         default:
1757                 return (EINVAL);
1758         }
1759
1760         /*
1761          * bif_info is needed for bif_flags
1762          */
1763         bif_info = kmalloc(sizeof(*bif_info), M_DEVBUF, M_WAITOK | M_ZERO);
1764         bif_info->bifi_ifp = ifs;
1765
1766         bif = kmalloc(sizeof(*bif), M_DEVBUF, M_WAITOK | M_ZERO);
1767         bif->bif_ifp = ifs;
1768         bif->bif_info = bif_info;
1769         bif->bif_flags = IFBIF_SPAN;
1770         /* NOTE: span bif does not need bridge_ifinfo */
1771
1772         TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
1773
1774         sc->sc_span = 1;
1775
1776         return (0);
1777 }
1778
1779 static int
1780 bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
1781 {
1782         struct ifbreq *req = arg;
1783         struct bridge_iflist *bif;
1784         struct ifnet *ifs;
1785
1786         ifs = ifunit_netisr(req->ifbr_ifsname);
1787         if (ifs == NULL)
1788                 return (ENOENT);
1789
1790         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1791                 if (ifs == bif->bif_ifp)
1792                         break;
1793
1794         if (bif == NULL)
1795                 return (ENOENT);
1796
1797         bridge_delete_span(sc, bif);
1798
1799         if (TAILQ_EMPTY(&sc->sc_spanlist))
1800                 sc->sc_span = 0;
1801
1802         return (0);
1803 }
1804
1805 static void
1806 bridge_ifdetach_dispatch(netmsg_t msg)
1807 {
1808         struct ifnet *ifp, *bifp;
1809         struct bridge_softc *sc;
1810         struct bridge_iflist *bif;
1811
1812         ifp = msg->lmsg.u.ms_resultp;
1813         sc = ifp->if_bridge;
1814
1815         /* Check if the interface is a bridge member */
1816         if (sc != NULL) {
1817                 bifp = sc->sc_ifp;
1818
1819                 ifnet_serialize_all(bifp);
1820
1821                 bif = bridge_lookup_member_if(sc, ifp);
1822                 if (bif != NULL) {
1823                         bridge_delete_member(sc, bif, 1);
1824                 } else {
1825                         /* XXX Why bif will be NULL? */
1826                 }
1827
1828                 ifnet_deserialize_all(bifp);
1829                 goto reply;
1830         }
1831
1832         crit_enter();   /* XXX MP */
1833
1834         /* Check if the interface is a span port */
1835         LIST_FOREACH(sc, &bridge_list, sc_list) {
1836                 bifp = sc->sc_ifp;
1837
1838                 ifnet_serialize_all(bifp);
1839
1840                 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
1841                         if (ifp == bif->bif_ifp) {
1842                                 bridge_delete_span(sc, bif);
1843                                 break;
1844                         }
1845
1846                 ifnet_deserialize_all(bifp);
1847         }
1848
1849         crit_exit();
1850
1851 reply:
1852         lwkt_replymsg(&msg->lmsg, 0);
1853 }
1854
1855 /*
1856  * bridge_ifdetach:
1857  *
1858  *      Detach an interface from a bridge.  Called when a member
1859  *      interface is detaching.
1860  */
1861 static void
1862 bridge_ifdetach(void *arg __unused, struct ifnet *ifp)
1863 {
1864         struct netmsg_base msg;
1865
1866         netmsg_init(&msg, NULL, &curthread->td_msgport,
1867                     0, bridge_ifdetach_dispatch);
1868         msg.lmsg.u.ms_resultp = ifp;
1869
1870         lwkt_domsg(BRIDGE_CFGPORT, &msg.lmsg, 0);
1871 }
1872
1873 /*
1874  * bridge_init:
1875  *
1876  *      Initialize a bridge interface.
1877  */
1878 static void
1879 bridge_init(void *xsc)
1880 {
1881         bridge_control(xsc, SIOCSIFFLAGS, bridge_ioctl_init, NULL);
1882 }
1883
1884 /*
1885  * bridge_stop:
1886  *
1887  *      Stop the bridge interface.
1888  */
1889 static void
1890 bridge_stop(struct ifnet *ifp)
1891 {
1892         bridge_control(ifp->if_softc, SIOCSIFFLAGS, bridge_ioctl_stop, NULL);
1893 }
1894
1895 /*
1896  * Returns TRUE if the packet is being sent 'from us'... from our bridge
1897  * interface or from any member of our bridge interface.  This is used
1898  * later on to force the MAC to be the MAC of our bridge interface.
1899  */
1900 static int
1901 bridge_from_us(struct bridge_softc *sc, struct ether_header *eh)
1902 {
1903         struct bridge_iflist *bif;
1904
1905         if (memcmp(eh->ether_shost, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN) == 0)
1906                 return (1);
1907
1908         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
1909                 if (memcmp(eh->ether_shost, IF_LLADDR(bif->bif_ifp),
1910                            ETHER_ADDR_LEN) == 0) {
1911                         return (1);
1912                 }
1913         }
1914         return (0);
1915 }
1916
1917 /*
1918  * bridge_enqueue:
1919  *
1920  *      Enqueue a packet on a bridge member interface.
1921  *
1922  */
1923 void
1924 bridge_enqueue(struct ifnet *dst_ifp, struct mbuf *m)
1925 {
1926         struct netmsg_packet *nmp;
1927
1928         mbuftrackid(m, 64);
1929
1930         nmp = &m->m_hdr.mh_netmsg;
1931         netmsg_init(&nmp->base, NULL, &netisr_apanic_rport,
1932                     0, bridge_enqueue_handler);
1933         nmp->nm_packet = m;
1934         nmp->base.lmsg.u.ms_resultp = dst_ifp;
1935
1936         lwkt_sendmsg_oncpu(netisr_cpuport(mycpuid), &nmp->base.lmsg);
1937 }
1938
1939 /*
1940  * After looking up dst_if in our forwarding table we still have to
1941  * deal with channel bonding.  Find the best interface in the bonding set.
1942  */
1943 static struct ifnet *
1944 bridge_select_unicast(struct bridge_softc *sc, struct ifnet *dst_if,
1945                       int from_blocking, struct mbuf *m)
1946 {
1947         struct bridge_iflist *bif, *nbif;
1948         struct ifnet *alt_if;
1949         int alt_priority;
1950         int priority;
1951
1952         /*
1953          * Unicast, kinda replicates the output side of bridge_output().
1954          *
1955          * Even though this is a uni-cast packet we may have to select
1956          * an interface from a bonding set.
1957          */
1958         bif = bridge_lookup_member_if(sc, dst_if);
1959         if (bif == NULL) {
1960                 /* Not a member of the bridge (anymore?) */
1961                 return NULL;
1962         }
1963
1964         /*
1965          * If STP is enabled on the target we are an equal opportunity
1966          * employer and do not necessarily output to dst_if.  Instead
1967          * scan available links with the same MAC as the current dst_if
1968          * and choose the best one.
1969          *
1970          * We also need to do this because arp entries tag onto a particular
1971          * interface and if it happens to be dead then the packets will
1972          * go into a bit bucket.
1973          *
1974          * If LINK2 is set the matching links are bonded and we-round robin.
1975          * (the MAC address must be the same for the participating links).
1976          * In this case links in a STP FORWARDING or BONDED state are
1977          * allowed for unicast packets.
1978          */
1979         if (bif->bif_flags & IFBIF_STP) {
1980                 alt_if = NULL;
1981                 alt_priority = 0;
1982                 priority = 0;
1983
1984                 TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid],
1985                                      bif_next, nbif) {
1986                         /*
1987                          * dst_if may imply a bonding set so we must compare
1988                          * MAC addresses.
1989                          */
1990                         if (memcmp(IF_LLADDR(bif->bif_ifp),
1991                                    IF_LLADDR(dst_if),
1992                                    ETHER_ADDR_LEN) != 0) {
1993                                 continue;
1994                         }
1995
1996                         if ((bif->bif_ifp->if_flags & IFF_RUNNING) == 0)
1997                                 continue;
1998
1999                         /*
2000                          * NOTE: We allow tranmissions through a BLOCKING
2001                          *       or LEARNING interface only as a last resort.
2002                          *       We DISALLOW both cases if the receiving
2003                          *
2004                          * NOTE: If we send a packet through a learning
2005                          *       interface the receiving end (if also in
2006                          *       LEARNING) will throw it away, so this is
2007                          *       the ultimate last resort.
2008                          */
2009                         switch(bif->bif_state) {
2010                         case BSTP_IFSTATE_BLOCKING:
2011                                 if (from_blocking == 0 &&
2012                                     bif->bif_priority + 256 > alt_priority) {
2013                                         alt_priority = bif->bif_priority + 256;
2014                                         alt_if = bif->bif_ifp;
2015                                 }
2016                                 continue;
2017                         case BSTP_IFSTATE_LEARNING:
2018                                 if (from_blocking == 0 &&
2019                                     bif->bif_priority > alt_priority) {
2020                                         alt_priority = bif->bif_priority;
2021                                         alt_if = bif->bif_ifp;
2022                                 }
2023                                 continue;
2024                         case BSTP_IFSTATE_L1BLOCKING:
2025                         case BSTP_IFSTATE_LISTENING:
2026                         case BSTP_IFSTATE_DISABLED:
2027                                 continue;
2028                         default:
2029                                 /* FORWARDING, BONDED */
2030                                 break;
2031                         }
2032
2033                         /*
2034                          * XXX we need to use the toepliz hash or
2035                          *     something like that instead of
2036                          *     round-robining.
2037                          */
2038                         if (sc->sc_ifp->if_flags & IFF_LINK2) {
2039                                 dst_if = bif->bif_ifp;
2040                                 if (++bif->bif_bond_count >=
2041                                     bif->bif_bond_weight) {
2042                                         bif->bif_bond_count = 0;
2043                                         TAILQ_REMOVE(&sc->sc_iflists[mycpuid],
2044                                                      bif, bif_next);
2045                                         TAILQ_INSERT_TAIL(
2046                                                      &sc->sc_iflists[mycpuid],
2047                                                      bif, bif_next);
2048                                 }
2049                                 priority = 1;
2050                                 break;
2051                         }
2052
2053                         /*
2054                          * Select best interface in the FORWARDING or
2055                          * BONDED set.  Well, there shouldn't be any
2056                          * in a BONDED state if LINK2 is not set (they
2057                          * will all be in a BLOCKING) state, but there
2058                          * could be a transitory condition here.
2059                          */
2060                         if (bif->bif_priority > priority) {
2061                                 priority = bif->bif_priority;
2062                                 dst_if = bif->bif_ifp;
2063                         }
2064                 }
2065
2066                 /*
2067                  * If no suitable interfaces were found but a suitable
2068                  * alternative interface was found, use the alternative
2069                  * interface.
2070                  */
2071                 if (priority == 0 && alt_if)
2072                         dst_if = alt_if;
2073         }
2074
2075         /*
2076          * At this point, we're dealing with a unicast frame
2077          * going to a different interface.
2078          */
2079         if ((dst_if->if_flags & IFF_RUNNING) == 0)
2080                 dst_if = NULL;
2081         return (dst_if);
2082 }
2083
2084
2085 /*
2086  * bridge_output:
2087  *
2088  *      Send output from a bridge member interface.  This
2089  *      performs the bridging function for locally originated
2090  *      packets.
2091  *
2092  *      The mbuf has the Ethernet header already attached.  We must
2093  *      enqueue or free the mbuf before returning.
2094  */
2095 static int
2096 bridge_output(struct ifnet *ifp, struct mbuf *m)
2097 {
2098         struct bridge_softc *sc = ifp->if_bridge;
2099         struct bridge_iflist *bif, *nbif;
2100         struct ether_header *eh;
2101         struct ifnet *dst_if, *alt_if, *bifp;
2102         int from_us;
2103         int alt_priority;
2104
2105         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
2106         mbuftrackid(m, 65);
2107
2108         /*
2109          * Make sure that we are still a member of a bridge interface.
2110          */
2111         if (sc == NULL) {
2112                 m_freem(m);
2113                 return (0);
2114         }
2115         bifp = sc->sc_ifp;
2116
2117         /*
2118          * Acquire header
2119          */
2120         if (m->m_len < ETHER_HDR_LEN) {
2121                 m = m_pullup(m, ETHER_HDR_LEN);
2122                 if (m == NULL) {
2123                         IFNET_STAT_INC(bifp, oerrors, 1);
2124                         return (0);
2125                 }
2126         }
2127         eh = mtod(m, struct ether_header *);
2128         from_us = bridge_from_us(sc, eh);
2129
2130         /*
2131          * If bridge is down, but the original output interface is up,
2132          * go ahead and send out that interface.  Otherwise, the packet
2133          * is dropped below.
2134          */
2135         if ((bifp->if_flags & IFF_RUNNING) == 0) {
2136                 dst_if = ifp;
2137                 goto sendunicast;
2138         }
2139
2140         /*
2141          * If the packet is a multicast, or we don't know a better way to
2142          * get there, send to all interfaces.
2143          */
2144         if (ETHER_IS_MULTICAST(eh->ether_dhost))
2145                 dst_if = NULL;
2146         else
2147                 dst_if = bridge_rtlookup(sc, eh->ether_dhost);
2148
2149         if (dst_if == NULL) {
2150                 struct mbuf *mc;
2151                 int used = 0;
2152                 int found = 0;
2153
2154                 if (sc->sc_span)
2155                         bridge_span(sc, m);
2156
2157                 alt_if = NULL;
2158                 alt_priority = 0;
2159                 TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid],
2160                                      bif_next, nbif) {
2161                         dst_if = bif->bif_ifp;
2162
2163                         if ((dst_if->if_flags & IFF_RUNNING) == 0)
2164                                 continue;
2165
2166                         /*
2167                          * If this is not the original output interface,
2168                          * and the interface is participating in spanning
2169                          * tree, make sure the port is in a state that
2170                          * allows forwarding.
2171                          *
2172                          * We keep track of a possible backup IF if we are
2173                          * unable to find any interfaces to forward through.
2174                          *
2175                          * NOTE: Currently round-robining is not implemented
2176                          *       across bonded interface groups (needs an
2177                          *       algorithm to track each group somehow).
2178                          *
2179                          *       Similarly we track only one alternative
2180                          *       interface if no suitable interfaces are
2181                          *       found.
2182                          */
2183                         if (dst_if != ifp &&
2184                             (bif->bif_flags & IFBIF_STP) != 0) {
2185                                 switch (bif->bif_state) {
2186                                 case BSTP_IFSTATE_BONDED:
2187                                         if (bif->bif_priority + 512 >
2188                                             alt_priority) {
2189                                                 alt_priority =
2190                                                     bif->bif_priority + 512;
2191                                                 alt_if = bif->bif_ifp;
2192                                         }
2193                                         continue;
2194                                 case BSTP_IFSTATE_BLOCKING:
2195                                         if (bif->bif_priority + 256 >
2196                                             alt_priority) {
2197                                                 alt_priority =
2198                                                     bif->bif_priority + 256;
2199                                                 alt_if = bif->bif_ifp;
2200                                         }
2201                                         continue;
2202                                 case BSTP_IFSTATE_LEARNING:
2203                                         if (bif->bif_priority > alt_priority) {
2204                                                 alt_priority =
2205                                                     bif->bif_priority;
2206                                                 alt_if = bif->bif_ifp;
2207                                         }
2208                                         continue;
2209                                 case BSTP_IFSTATE_L1BLOCKING:
2210                                 case BSTP_IFSTATE_LISTENING:
2211                                 case BSTP_IFSTATE_DISABLED:
2212                                         continue;
2213                                 default:
2214                                         /* FORWARDING */
2215                                         break;
2216                                 }
2217                         }
2218
2219                         KKASSERT(used == 0);
2220                         if (TAILQ_NEXT(bif, bif_next) == NULL) {
2221                                 used = 1;
2222                                 mc = m;
2223                         } else {
2224                                 mc = m_copypacket(m, M_NOWAIT);
2225                                 if (mc == NULL) {
2226                                         IFNET_STAT_INC(bifp, oerrors, 1);
2227                                         continue;
2228                                 }
2229                         }
2230
2231                         /*
2232                          * If the packet is 'from' us override ether_shost.
2233                          */
2234                         bridge_handoff(sc, dst_if, mc, from_us);
2235                         found = 1;
2236
2237                         if (nbif != NULL && !nbif->bif_onlist) {
2238                                 KKASSERT(bif->bif_onlist);
2239                                 nbif = TAILQ_NEXT(bif, bif_next);
2240                         }
2241                 }
2242
2243                 /*
2244                  * If we couldn't find anything use the backup interface
2245                  * if we have one.
2246                  */
2247                 if (found == 0 && alt_if) {
2248                         KKASSERT(used == 0);
2249                         mc = m;
2250                         used = 1;
2251                         bridge_handoff(sc, alt_if, mc, from_us);
2252                 }
2253
2254                 if (used == 0)
2255                         m_freem(m);
2256                 return (0);
2257         }
2258
2259         /*
2260          * Unicast
2261          */
2262 sendunicast:
2263         dst_if = bridge_select_unicast(sc, dst_if, 0, m);
2264
2265         if (sc->sc_span)
2266                 bridge_span(sc, m);
2267         if (dst_if == NULL)
2268                 m_freem(m);
2269         else
2270                 bridge_handoff(sc, dst_if, m, from_us);
2271         return (0);
2272 }
2273
2274 /*
2275  * Returns the bridge interface associated with an ifc.
2276  * Pass ifp->if_bridge (must not be NULL).  Used by the ARP
2277  * code to supply the bridge for the is-at info, making
2278  * the bridge responsible for matching local addresses.
2279  *
2280  * Without this the ARP code will supply bridge member interfaces
2281  * for the is-at which makes it difficult the bridge to fail-over
2282  * interfaces (amoung other things).
2283  */
2284 static struct ifnet *
2285 bridge_interface(void *if_bridge)
2286 {
2287         struct bridge_softc *sc = if_bridge;
2288         return (sc->sc_ifp);
2289 }
2290
2291 /*
2292  * bridge_start:
2293  *
2294  *      Start output on a bridge.
2295  */
2296 static void
2297 bridge_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
2298 {
2299         struct bridge_softc *sc = ifp->if_softc;
2300
2301         ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq);
2302         ASSERT_ALTQ_SQ_SERIALIZED_HW(ifsq);
2303
2304         ifsq_set_oactive(ifsq);
2305         for (;;) {
2306                 struct ifnet *dst_if = NULL;
2307                 struct ether_header *eh;
2308                 struct mbuf *m;
2309
2310                 m = ifsq_dequeue(ifsq);
2311                 if (m == NULL)
2312                         break;
2313                 mbuftrackid(m, 75);
2314
2315                 if (m->m_len < sizeof(*eh)) {
2316                         m = m_pullup(m, sizeof(*eh));
2317                         if (m == NULL) {
2318                                 IFNET_STAT_INC(ifp, oerrors, 1);
2319                                 continue;
2320                         }
2321                 }
2322                 eh = mtod(m, struct ether_header *);
2323
2324                 BPF_MTAP(ifp, m);
2325                 IFNET_STAT_INC(ifp, opackets, 1);
2326
2327                 if ((m->m_flags & (M_BCAST|M_MCAST)) == 0)
2328                         dst_if = bridge_rtlookup(sc, eh->ether_dhost);
2329
2330                 /*
2331                  * Multicast or broadcast
2332                  */
2333                 if (dst_if == NULL) {
2334                         bridge_start_bcast(sc, m);
2335                         continue;
2336                 }
2337
2338                 /*
2339                  * Unicast
2340                  */
2341                 dst_if = bridge_select_unicast(sc, dst_if, 0, m);
2342
2343                 if (dst_if == NULL)
2344                         m_freem(m);
2345                 else
2346                         bridge_enqueue(dst_if, m);
2347         }
2348         ifsq_clr_oactive(ifsq);
2349 }
2350
2351 /*
2352  * bridge_forward:
2353  *
2354  *      Forward packets received on a bridge interface via the input
2355  *      path.
2356  *
2357  *      This implements the forwarding function of the bridge.
2358  */
2359 static void
2360 bridge_forward(struct bridge_softc *sc, struct mbuf *m)
2361 {
2362         struct bridge_iflist *bif;
2363         struct ifnet *src_if, *dst_if, *ifp;
2364         struct ether_header *eh;
2365         int from_blocking;
2366
2367         mbuftrackid(m, 66);
2368         src_if = m->m_pkthdr.rcvif;
2369         ifp = sc->sc_ifp;
2370
2371         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
2372
2373         /*
2374          * packet coming in on the bridge is also going out on the bridge,
2375          * but ether code won't adjust output stats for the bridge because
2376          * we are changing the interface to something else.
2377          */
2378         IFNET_STAT_INC(ifp, opackets, 1);
2379         IFNET_STAT_INC(ifp, obytes, m->m_pkthdr.len);
2380
2381         /*
2382          * Look up the bridge_iflist.
2383          */
2384         bif = bridge_lookup_member_if(sc, src_if);
2385         if (bif == NULL) {
2386                 /* Interface is not a bridge member (anymore?) */
2387                 m_freem(m);
2388                 return;
2389         }
2390
2391         /*
2392          * In spanning tree mode receiving a packet from an interface
2393          * in a BLOCKING state is allowed, it could be a member of last
2394          * resort from the sender's point of view, but forwarding it is
2395          * not allowed.
2396          *
2397          * The sender's spanning tree will eventually sync up and the
2398          * sender will go into a BLOCKING state too (but this still may be
2399          * an interface of last resort during state changes).
2400          */
2401         if (bif->bif_flags & IFBIF_STP) {
2402                 switch (bif->bif_state) {
2403                 case BSTP_IFSTATE_L1BLOCKING:
2404                 case BSTP_IFSTATE_LISTENING:
2405                 case BSTP_IFSTATE_DISABLED:
2406                         m_freem(m);
2407                         return;
2408                 default:
2409                         /* learning, blocking, bonded, forwarding */
2410                         break;
2411                 }
2412                 from_blocking = (bif->bif_state == BSTP_IFSTATE_BLOCKING);
2413         } else {
2414                 from_blocking = 0;
2415         }
2416
2417         eh = mtod(m, struct ether_header *);
2418
2419         /*
2420          * If the interface is learning, and the source
2421          * address is valid and not multicast, record
2422          * the address.
2423          */
2424         if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
2425             from_blocking == 0 &&
2426             ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
2427             (eh->ether_shost[0] == 0 &&
2428              eh->ether_shost[1] == 0 &&
2429              eh->ether_shost[2] == 0 &&
2430              eh->ether_shost[3] == 0 &&
2431              eh->ether_shost[4] == 0 &&
2432              eh->ether_shost[5] == 0) == 0) {
2433                 bridge_rtupdate(sc, eh->ether_shost, src_if, IFBAF_DYNAMIC);
2434         }
2435
2436         /*
2437          * Don't forward from an interface in the listening or learning
2438          * state.  That is, in the learning state we learn information
2439          * but we throw away the packets.
2440          *
2441          * We let through packets on interfaces in the blocking state.
2442          * The blocking state is applicable to the send side, not the
2443          * receive side.
2444          */
2445         if ((bif->bif_flags & IFBIF_STP) != 0 &&
2446             (bif->bif_state == BSTP_IFSTATE_LISTENING ||
2447              bif->bif_state == BSTP_IFSTATE_LEARNING)) {
2448                 m_freem(m);
2449                 return;
2450         }
2451
2452         /*
2453          * At this point, the port either doesn't participate
2454          * in spanning tree or it is in the forwarding state.
2455          */
2456
2457         /*
2458          * If the packet is unicast, destined for someone on
2459          * "this" side of the bridge, drop it.
2460          *
2461          * src_if implies the entire bonding set so we have to compare MAC
2462          * addresses and not just if pointers.
2463          */
2464         if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
2465                 dst_if = bridge_rtlookup(sc, eh->ether_dhost);
2466                 if (dst_if && memcmp(IF_LLADDR(src_if), IF_LLADDR(dst_if),
2467                                      ETHER_ADDR_LEN) == 0) {
2468                         m_freem(m);
2469                         return;
2470                 }
2471         } else {
2472                 /* ...forward it to all interfaces. */
2473                 IFNET_STAT_INC(ifp, imcasts, 1);
2474                 dst_if = NULL;
2475         }
2476
2477         /*
2478          * Brodcast if we do not have forwarding information.  However, if
2479          * we received the packet on a blocking interface we do not do this
2480          * (unless you really want to blow up your network).
2481          */
2482         if (dst_if == NULL) {
2483                 if (from_blocking)
2484                         m_freem(m);
2485                 else
2486                         bridge_broadcast(sc, src_if, m);
2487                 return;
2488         }
2489
2490         dst_if = bridge_select_unicast(sc, dst_if, from_blocking, m);
2491
2492         if (dst_if == NULL) {
2493                 m_freem(m);
2494                 return;
2495         }
2496
2497         if (inet_pfil_hook.ph_hashooks > 0
2498 #ifdef INET6
2499             || inet6_pfil_hook.ph_hashooks > 0
2500 #endif
2501             ) {
2502                 if (bridge_pfil(&m, ifp, src_if, PFIL_IN) != 0)
2503                         return;
2504                 if (m == NULL)
2505                         return;
2506
2507                 if (bridge_pfil(&m, ifp, dst_if, PFIL_OUT) != 0)
2508                         return;
2509                 if (m == NULL)
2510                         return;
2511         }
2512         bridge_handoff(sc, dst_if, m, 0);
2513 }
2514
2515 /*
2516  * bridge_input:
2517  *
2518  *      Receive input from a member interface.  Queue the packet for
2519  *      bridging if it is not for us.
2520  */
2521 static struct mbuf *
2522 bridge_input(struct ifnet *ifp, struct mbuf *m)
2523 {
2524         struct bridge_softc *sc = ifp->if_bridge;
2525         struct bridge_iflist *bif;
2526         struct ifnet *bifp, *new_ifp;
2527         struct ether_header *eh;
2528         struct mbuf *mc, *mc2;
2529
2530         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
2531         mbuftrackid(m, 67);
2532
2533         /*
2534          * Make sure that we are still a member of a bridge interface.
2535          */
2536         if (sc == NULL)
2537                 return m;
2538
2539         new_ifp = NULL;
2540         bifp = sc->sc_ifp;
2541
2542         if ((bifp->if_flags & IFF_RUNNING) == 0)
2543                 goto out;
2544
2545         /*
2546          * Implement support for bridge monitoring.  If this flag has been
2547          * set on this interface, discard the packet once we push it through
2548          * the bpf(4) machinery, but before we do, increment various counters
2549          * associated with this bridge.
2550          */
2551         if (bifp->if_flags & IFF_MONITOR) {
2552                 /*
2553                  * Change input interface to this bridge
2554                  *
2555                  * Update bridge's ifnet statistics
2556                  */
2557                 m->m_pkthdr.rcvif = bifp;
2558
2559                 BPF_MTAP(bifp, m);
2560                 IFNET_STAT_INC(bifp, ipackets, 1);
2561                 IFNET_STAT_INC(bifp, ibytes, m->m_pkthdr.len);
2562                 if (m->m_flags & (M_MCAST | M_BCAST))
2563                         IFNET_STAT_INC(bifp, imcasts, 1);
2564
2565                 m_freem(m);
2566                 m = NULL;
2567                 goto out;
2568         }
2569
2570         /*
2571          * Handle the ether_header
2572          *
2573          * In all cases if the packet is destined for us via our MAC
2574          * we must clear BRIDGE_MBUF_TAGGED to ensure that we don't
2575          * repeat the source MAC out the same interface.
2576          *
2577          * This first test against our bridge MAC is the fast-path.
2578          *
2579          * NOTE!  The bridge interface can serve as an endpoint for
2580          *        communication but normally there are no IPs associated
2581          *        with it so you cannot route through it.  Instead what
2582          *        you do is point your default route *THROUGH* the bridge
2583          *        to the actual default router for one of the bridged spaces.
2584          *
2585          *        Another possibility is to put all your IP specifications
2586          *        on the bridge instead of on the individual interfaces.  If
2587          *        you do this it should be possible to use the bridge as an
2588          *        end point and route (rather than switch) through it using
2589          *        the default route or ipfw forwarding rules.
2590          */
2591
2592         /*
2593          * Acquire header
2594          */
2595         if (m->m_len < ETHER_HDR_LEN) {
2596                 m = m_pullup(m, ETHER_HDR_LEN);
2597                 if (m == NULL)
2598                         goto out;
2599         }
2600         eh = mtod(m, struct ether_header *);
2601         m->m_pkthdr.fw_flags |= BRIDGE_MBUF_TAGGED;
2602         bcopy(eh->ether_shost, m->m_pkthdr.ether_br_shost, ETHER_ADDR_LEN);
2603
2604         if ((bridge_debug & 1) &&
2605             (ntohs(eh->ether_type) == ETHERTYPE_ARP ||
2606             ntohs(eh->ether_type) == ETHERTYPE_REVARP)) {
2607                 kprintf("%02x:%02x:%02x:%02x:%02x:%02x "
2608                         "%02x:%02x:%02x:%02x:%02x:%02x type %04x "
2609                         "lla %02x:%02x:%02x:%02x:%02x:%02x\n",
2610                         eh->ether_dhost[0],
2611                         eh->ether_dhost[1],
2612                         eh->ether_dhost[2],
2613                         eh->ether_dhost[3],
2614                         eh->ether_dhost[4],
2615                         eh->ether_dhost[5],
2616                         eh->ether_shost[0],
2617                         eh->ether_shost[1],
2618                         eh->ether_shost[2],
2619                         eh->ether_shost[3],
2620                         eh->ether_shost[4],
2621                         eh->ether_shost[5],
2622                         eh->ether_type,
2623                         ((u_char *)IF_LLADDR(bifp))[0],
2624                         ((u_char *)IF_LLADDR(bifp))[1],
2625                         ((u_char *)IF_LLADDR(bifp))[2],
2626                         ((u_char *)IF_LLADDR(bifp))[3],
2627                         ((u_char *)IF_LLADDR(bifp))[4],
2628                         ((u_char *)IF_LLADDR(bifp))[5]
2629                 );
2630         }
2631
2632         /*
2633          * If the packet is for us, set the packets source as the
2634          * bridge, and return the packet back to ifnet.if_input for
2635          * local processing.
2636          */
2637         if (memcmp(eh->ether_dhost, IF_LLADDR(bifp), ETHER_ADDR_LEN) == 0) {
2638                 /*
2639                  * We must still record the source interface in our
2640                  * addr cache, otherwise our bridge won't know where
2641                  * to send responses and will broadcast them.
2642                  */
2643                 bif = bridge_lookup_member_if(sc, ifp);
2644                 if ((bif->bif_flags & IFBIF_LEARNING) &&
2645                     ((bif->bif_flags & IFBIF_STP) == 0 ||
2646                      bif->bif_state != BSTP_IFSTATE_BLOCKING)) {
2647                         bridge_rtupdate(sc, eh->ether_shost,
2648                                         ifp, IFBAF_DYNAMIC);
2649                 }
2650
2651                 /*
2652                  * Perform pfil hooks.
2653                  */
2654                 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
2655                 KASSERT(bifp->if_bridge == NULL,
2656                         ("loop created in bridge_input"));
2657                 if (pfil_member != 0) {
2658                         if (inet_pfil_hook.ph_hashooks > 0
2659 #ifdef INET6
2660                             || inet6_pfil_hook.ph_hashooks > 0
2661 #endif
2662                         ) {
2663                                 if (bridge_pfil(&m, NULL, ifp, PFIL_IN) != 0)
2664                                         goto out;
2665                                 if (m == NULL)
2666                                         goto out;
2667                         }
2668                 }
2669
2670                 /*
2671                  * Set new_ifp and skip to the end.  This will trigger code
2672                  * to reinput the packet and run it into our stack.
2673                  */
2674                 new_ifp = bifp;
2675                 goto out;
2676         }
2677
2678         /*
2679          * Tap all packets arriving on the bridge, no matter if
2680          * they are local destinations or not.  In is in.
2681          *
2682          * Update bridge's ifnet statistics
2683          */
2684         BPF_MTAP(bifp, m);
2685         IFNET_STAT_INC(bifp, ipackets, 1);
2686         IFNET_STAT_INC(bifp, ibytes, m->m_pkthdr.len);
2687         if (m->m_flags & (M_MCAST | M_BCAST))
2688                 IFNET_STAT_INC(bifp, imcasts, 1);
2689
2690         bif = bridge_lookup_member_if(sc, ifp);
2691         if (bif == NULL)
2692                 goto out;
2693
2694         if (sc->sc_span)
2695                 bridge_span(sc, m);
2696
2697         if (m->m_flags & (M_BCAST | M_MCAST)) {
2698                 /*
2699                  * Tap off 802.1D packets; they do not get forwarded.
2700                  */
2701                 if (memcmp(eh->ether_dhost, bstp_etheraddr,
2702                             ETHER_ADDR_LEN) == 0) {
2703                         ifnet_serialize_all(bifp);
2704                         bstp_input(sc, bif, m);
2705                         ifnet_deserialize_all(bifp);
2706
2707                         /* m is freed by bstp_input */
2708                         m = NULL;
2709                         goto out;
2710                 }
2711
2712                 /*
2713                  * Other than 802.11d packets, ignore packets if the
2714                  * interface is not in a good state.
2715                  *
2716                  * NOTE: Broadcast/mcast packets received on a blocking or
2717                  *       learning interface are allowed for local processing.
2718                  *
2719                  *       The sending side of a blocked port will stop
2720                  *       transmitting when a better alternative is found.
2721                  *       However, later on we will disallow the forwarding
2722                  *       of bcast/mcsat packets over a blocking interface.
2723                  */
2724                 if (bif->bif_flags & IFBIF_STP) {
2725                         switch (bif->bif_state) {
2726                         case BSTP_IFSTATE_L1BLOCKING:
2727                         case BSTP_IFSTATE_LISTENING:
2728                         case BSTP_IFSTATE_DISABLED:
2729                                 goto out;
2730                         default:
2731                                 /* blocking, learning, bonded, forwarding */
2732                                 break;
2733                         }
2734                 }
2735
2736                 /*
2737                  * Make a deep copy of the packet and enqueue the copy
2738                  * for bridge processing; return the original packet for
2739                  * local processing.
2740                  */
2741                 mc = m_dup(m, M_NOWAIT);
2742                 if (mc == NULL)
2743                         goto out;
2744
2745                 /*
2746                  * It's just too dangerous to allow bcast/mcast over a
2747                  * blocked interface, eventually the network will sort
2748                  * itself out and a better path will be found.
2749                  */
2750                 if ((bif->bif_flags & IFBIF_STP) == 0 ||
2751                     bif->bif_state != BSTP_IFSTATE_BLOCKING) {
2752                         bridge_forward(sc, mc);
2753                 }
2754
2755                 /*
2756                  * Reinject the mbuf as arriving on the bridge so we have a
2757                  * chance at claiming multicast packets. We can not loop back
2758                  * here from ether_input as a bridge is never a member of a
2759                  * bridge.
2760                  */
2761                 KASSERT(bifp->if_bridge == NULL,
2762                         ("loop created in bridge_input"));
2763                 mc2 = m_dup(m, M_NOWAIT);
2764 #ifdef notyet
2765                 if (mc2 != NULL) {
2766                         /* Keep the layer3 header aligned */
2767                         int i = min(mc2->m_pkthdr.len, max_protohdr);
2768                         mc2 = m_copyup(mc2, i, ETHER_ALIGN);
2769                 }
2770 #endif
2771                 if (mc2 != NULL) {
2772                         /*
2773                          * Don't tap to bpf(4) again; we have already done
2774                          * the tapping.
2775                          *
2776                          * Leave m_pkthdr.rcvif alone, so ARP replies are
2777                          * processed as coming in on the correct interface.
2778                          *
2779                          * Clear the bridge flag for local processing in
2780                          * case the packet gets routed.
2781                          */
2782                         mc2->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
2783                         ether_reinput_oncpu(bifp, mc2, 0);
2784                 }
2785
2786                 /* Return the original packet for local processing. */
2787                 goto out;
2788         }
2789
2790         /*
2791          * Input of a unicast packet.  We have to allow unicast packets
2792          * input from links in the BLOCKING state as this might be an
2793          * interface of last resort.
2794          *
2795          * NOTE: We explicitly ignore normal packets received on a link
2796          *       in the BLOCKING state.  The point of being in that state
2797          *       is to avoid getting duplicate packets.
2798          *
2799          *       HOWEVER, if LINK2 is set the normal spanning tree code
2800          *       will mark an interface BLOCKING to avoid multi-cast/broadcast
2801          *       loops.  Unicast packets CAN still loop if we allow the
2802          *       case (hence we only do it in LINK2), but it isn't quite as
2803          *       bad as a broadcast packet looping.
2804          */
2805         if (bif->bif_flags & IFBIF_STP) {
2806                 switch (bif->bif_state) {
2807                 case BSTP_IFSTATE_L1BLOCKING:
2808                 case BSTP_IFSTATE_LISTENING:
2809                 case BSTP_IFSTATE_DISABLED:
2810                         goto out;
2811                 default:
2812                         /* blocking, bonded, forwarding, learning */
2813                         break;
2814                 }
2815         }
2816
2817         /*
2818          * Unicast.  Make sure it's not for us.
2819          *
2820          * This loop is MPSAFE; the only blocking operation (bridge_rtupdate)
2821          * is followed by breaking out of the loop.
2822          */
2823         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
2824                 if (bif->bif_ifp->if_type != IFT_ETHER)
2825                         continue;
2826
2827                 /*
2828                  * It is destined for an interface linked to the bridge.
2829                  * We want the bridge itself to take care of link level
2830                  * forwarding to member interfaces so reinput on the bridge.
2831                  * i.e. if you ping an IP on a target interface associated
2832                  * with the bridge, the arp is-at response should indicate
2833                  * the bridge MAC.
2834                  *
2835                  * Only update our addr list when learning if the port
2836                  * is not in a blocking state.  If it is we still allow
2837                  * the packet but we do not try to learn from it.
2838                  */
2839                 if (memcmp(IF_LLADDR(bif->bif_ifp), eh->ether_dhost,
2840                            ETHER_ADDR_LEN) == 0) {
2841                         if (bif->bif_ifp != ifp) {
2842                                 /* XXX loop prevention */
2843                                 m->m_flags |= M_ETHER_BRIDGED;
2844                         }
2845                         if ((bif->bif_flags & IFBIF_LEARNING) &&
2846                             ((bif->bif_flags & IFBIF_STP) == 0 ||
2847                              bif->bif_state != BSTP_IFSTATE_BLOCKING)) {
2848                                 bridge_rtupdate(sc, eh->ether_shost,
2849                                                 ifp, IFBAF_DYNAMIC);
2850                         }
2851                         new_ifp = bifp; /* not bif->bif_ifp */
2852                         m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
2853                         goto out;
2854                 }
2855
2856                 /*
2857                  * Ignore received packets that were sent by us.
2858                  */
2859                 if (memcmp(IF_LLADDR(bif->bif_ifp), eh->ether_shost,
2860                            ETHER_ADDR_LEN) == 0) {
2861                         m_freem(m);
2862                         m = NULL;
2863                         goto out;
2864                 }
2865         }
2866
2867         /*
2868          * It isn't for us.
2869          *
2870          * Perform the bridge forwarding function, but disallow bridging
2871          * to interfaces in the blocking state if the packet came in on
2872          * an interface in the blocking state.
2873          *
2874          * (bridge_forward also updates the addr cache).
2875          */
2876         bridge_forward(sc, m);
2877         m = NULL;
2878
2879         /*
2880          * ether_reinput_oncpu() will reprocess rcvif as
2881          * coming from new_ifp (since we do not specify
2882          * REINPUT_KEEPRCVIF).
2883          */
2884 out:
2885         if (new_ifp != NULL) {
2886                 /*
2887                  * Clear the bridge flag for local processing in
2888                  * case the packet gets routed.
2889                  */
2890                 ether_reinput_oncpu(new_ifp, m, REINPUT_RUNBPF);
2891                 m = NULL;
2892         }
2893         return (m);
2894 }
2895
2896 /*
2897  * bridge_start_bcast:
2898  *
2899  *      Broadcast the packet sent from bridge to all member
2900  *      interfaces.
2901  *      This is a simplified version of bridge_broadcast(), however,
2902  *      this function expects caller to hold bridge's serializer.
2903  */
2904 static void
2905 bridge_start_bcast(struct bridge_softc *sc, struct mbuf *m)
2906 {
2907         struct bridge_iflist *bif;
2908         struct mbuf *mc;
2909         struct ifnet *dst_if, *alt_if, *bifp;
2910         int used = 0;
2911         int found = 0;
2912         int alt_priority;
2913
2914         mbuftrackid(m, 68);
2915         bifp = sc->sc_ifp;
2916         ASSERT_IFNET_SERIALIZED_ALL(bifp);
2917
2918         /*
2919          * Following loop is MPSAFE; nothing is blocking
2920          * in the loop body.
2921          *
2922          * NOTE: We transmit through an member in the BLOCKING state only
2923          *       as a last resort.
2924          */
2925         alt_if = NULL;
2926         alt_priority = 0;
2927
2928         TAILQ_FOREACH(bif, &sc->sc_iflists[mycpuid], bif_next) {
2929                 dst_if = bif->bif_ifp;
2930
2931                 if (bif->bif_flags & IFBIF_STP) {
2932                         switch (bif->bif_state) {
2933                         case BSTP_IFSTATE_BLOCKING:
2934                                 if (bif->bif_priority > alt_priority) {
2935                                         alt_priority = bif->bif_priority;
2936                                         alt_if = bif->bif_ifp;
2937                                 }
2938                                 /* fall through */
2939                         case BSTP_IFSTATE_L1BLOCKING:
2940                         case BSTP_IFSTATE_DISABLED:
2941                                 continue;
2942                         default:
2943                                 /* listening, learning, bonded, forwarding */
2944                                 break;
2945                         }
2946                 }
2947
2948                 if ((bif->bif_flags & IFBIF_DISCOVER) == 0 &&
2949                     (m->m_flags & (M_BCAST|M_MCAST)) == 0)
2950                         continue;
2951
2952                 if ((dst_if->if_flags & IFF_RUNNING) == 0)
2953                         continue;
2954
2955                 if (TAILQ_NEXT(bif, bif_next) == NULL) {
2956                         mc = m;
2957                         used = 1;
2958                 } else {
2959                         mc = m_copypacket(m, M_NOWAIT);
2960                         if (mc == NULL) {
2961                                 IFNET_STAT_INC(bifp, oerrors, 1);
2962                                 continue;
2963                         }
2964                 }
2965                 found = 1;
2966                 bridge_enqueue(dst_if, mc);
2967         }
2968
2969         if (found == 0 && alt_if) {
2970                 KKASSERT(used == 0);
2971                 mc = m;
2972                 used = 1;
2973                 bridge_enqueue(alt_if, mc);
2974         }
2975
2976         if (used == 0)
2977                 m_freem(m);
2978 }
2979
2980 /*
2981  * bridge_broadcast:
2982  *
2983  *      Send a frame to all interfaces that are members of
2984  *      the bridge, except for the one on which the packet
2985  *      arrived.
2986  */
2987 static void
2988 bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
2989                  struct mbuf *m)
2990 {
2991         struct bridge_iflist *bif, *nbif;
2992         struct ether_header *eh;
2993         struct mbuf *mc;
2994         struct ifnet *dst_if, *alt_if, *bifp;
2995         int used;
2996         int found;
2997         int alt_priority;
2998         int from_us;
2999
3000         mbuftrackid(m, 69);
3001         bifp = sc->sc_ifp;
3002         ASSERT_IFNET_NOT_SERIALIZED_ALL(bifp);
3003
3004         eh = mtod(m, struct ether_header *);
3005         from_us = bridge_from_us(sc, eh);
3006
3007         if (inet_pfil_hook.ph_hashooks > 0
3008 #ifdef INET6
3009             || inet6_pfil_hook.ph_hashooks > 0
3010 #endif
3011             ) {
3012                 if (bridge_pfil(&m, bifp, src_if, PFIL_IN) != 0)
3013                         return;
3014                 if (m == NULL)
3015                         return;
3016
3017                 /* Filter on the bridge interface before broadcasting */
3018                 if (bridge_pfil(&m, bifp, NULL, PFIL_OUT) != 0)
3019                         return;
3020                 if (m == NULL)
3021                         return;
3022         }
3023
3024         alt_if = NULL;
3025         alt_priority = 0;
3026         found = 0;
3027         used = 0;
3028
3029         TAILQ_FOREACH_MUTABLE(bif, &sc->sc_iflists[mycpuid], bif_next, nbif) {
3030                 dst_if = bif->bif_ifp;
3031
3032                 if ((dst_if->if_flags & IFF_RUNNING) == 0)
3033                         continue;
3034
3035                 /*
3036                  * Don't bounce the packet out the same interface it came
3037                  * in on.  We have to test MAC addresses because a packet
3038                  * can come in a bonded interface and we don't want it to
3039                  * be echod out the forwarding interface for the same bonding
3040                  * set.
3041                  */
3042                 if (src_if && memcmp(IF_LLADDR(src_if), IF_LLADDR(dst_if),
3043                                      ETHER_ADDR_LEN) == 0) {
3044                         continue;
3045                 }
3046
3047                 /*
3048                  * Generally speaking we only broadcast through forwarding
3049                  * interfaces.  If no interfaces are available we select
3050                  * a BONDED, BLOCKING, or LEARNING interface to forward
3051                  * through.
3052                  */
3053                 if (bif->bif_flags & IFBIF_STP) {
3054                         switch (bif->bif_state) {
3055                         case BSTP_IFSTATE_BONDED:
3056                                 if (bif->bif_priority + 512 > alt_priority) {
3057                                         alt_priority = bif->bif_priority + 512;
3058                                         alt_if = bif->bif_ifp;
3059                                 }
3060                                 continue;
3061                         case BSTP_IFSTATE_BLOCKING:
3062                                 if (bif->bif_priority + 256 > alt_priority) {
3063                                         alt_priority = bif->bif_priority + 256;
3064                                         alt_if = bif->bif_ifp;
3065                                 }
3066                                 continue;
3067                         case BSTP_IFSTATE_LEARNING:
3068                                 if (bif->bif_priority > alt_priority) {
3069                                         alt_priority = bif->bif_priority;
3070                                         alt_if = bif->bif_ifp;
3071                                 }
3072                                 continue;
3073                         case BSTP_IFSTATE_L1BLOCKING:
3074                         case BSTP_IFSTATE_DISABLED:
3075                         case BSTP_IFSTATE_LISTENING:
3076                                 continue;
3077                         default:
3078                                 /* forwarding */
3079                                 break;
3080                         }
3081                 }
3082
3083                 if ((bif->bif_flags & IFBIF_DISCOVER) == 0 &&
3084                     (m->m_flags & (M_BCAST|M_MCAST)) == 0) {
3085                         continue;
3086                 }
3087
3088                 if (TAILQ_NEXT(bif, bif_next) == NULL) {
3089                         mc = m;
3090                         used = 1;
3091                 } else {
3092                         mc = m_copypacket(m, M_NOWAIT);
3093                         if (mc == NULL) {
3094                                 IFNET_STAT_INC(sc->sc_ifp, oerrors, 1);
3095                                 continue;
3096                         }
3097                 }
3098                 found = 1;
3099
3100                 /*
3101                  * Filter on the output interface.  Pass a NULL bridge
3102                  * interface pointer so we do not redundantly filter on
3103                  * the bridge for each interface we broadcast on.
3104                  */
3105                 if (inet_pfil_hook.ph_hashooks > 0
3106 #ifdef INET6
3107                     || inet6_pfil_hook.ph_hashooks > 0
3108 #endif
3109                     ) {
3110                         if (bridge_pfil(&mc, NULL, dst_if, PFIL_OUT) != 0)
3111                                 continue;
3112                         if (mc == NULL)
3113                                 continue;
3114                 }
3115                 bridge_handoff(sc, dst_if, mc, from_us);
3116
3117                 if (nbif != NULL && !nbif->bif_onlist) {
3118                         KKASSERT(bif->bif_onlist);
3119                         nbif = TAILQ_NEXT(bif, bif_next);
3120                 }
3121         }
3122
3123         if (found == 0 && alt_if) {
3124                 KKASSERT(used == 0);
3125                 mc = m;
3126                 used = 1;
3127                 bridge_enqueue(alt_if, mc);
3128         }
3129
3130         if (used == 0)
3131                 m_freem(m);
3132 }
3133
3134 /*
3135  * bridge_span:
3136  *
3137  *      Duplicate a packet out one or more interfaces that are in span mode,
3138  *      the original mbuf is unmodified.
3139  */
3140 static void
3141 bridge_span(struct bridge_softc *sc, struct mbuf *m)
3142 {
3143         struct bridge_iflist *bif;
3144         struct ifnet *dst_if, *bifp;
3145         struct mbuf *mc;
3146
3147         mbuftrackid(m, 70);
3148         bifp = sc->sc_ifp;
3149         ifnet_serialize_all(bifp);
3150
3151         TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
3152                 dst_if = bif->bif_ifp;
3153
3154                 if ((dst_if->if_flags & IFF_RUNNING) == 0)
3155                         continue;
3156
3157                 mc = m_copypacket(m, M_NOWAIT);
3158                 if (mc == NULL) {
3159                         IFNET_STAT_INC(sc->sc_ifp, oerrors, 1);
3160                         continue;
3161                 }
3162                 bridge_enqueue(dst_if, mc);
3163         }
3164
3165         ifnet_deserialize_all(bifp);
3166 }
3167
3168 static void
3169 bridge_rtmsg_sync_handler(netmsg_t msg)
3170 {
3171         netisr_forwardmsg(&msg->base, mycpuid + 1);
3172 }
3173
3174 static void
3175 bridge_rtmsg_sync(struct bridge_softc *sc)
3176 {
3177         struct netmsg_base msg;
3178
3179         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3180
3181         /* XXX use netmsg_service_sync */
3182         netmsg_init(&msg, NULL, &curthread->td_msgport,
3183                     0, bridge_rtmsg_sync_handler);
3184         netisr_domsg(&msg, 0);
3185 }
3186
3187 static __inline void
3188 bridge_rtinfo_update(struct bridge_rtinfo *bri, struct ifnet *dst_if,
3189                      int setflags, uint8_t flags, uint32_t timeo)
3190 {
3191         if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
3192             bri->bri_ifp != dst_if)
3193                 bri->bri_ifp = dst_if;
3194         if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
3195             bri->bri_expire != time_uptime + timeo)
3196                 bri->bri_expire = time_uptime + timeo;
3197         if (setflags)
3198                 bri->bri_flags = flags;
3199 }
3200
3201 static int
3202 bridge_rtinstall_oncpu(struct bridge_softc *sc, const uint8_t *dst,
3203                        struct ifnet *dst_if, int setflags, uint8_t flags,
3204                        struct bridge_rtinfo **bri0)
3205 {
3206         struct bridge_rtnode *brt;
3207         struct bridge_rtinfo *bri;
3208
3209         if (mycpuid == 0) {
3210                 brt = bridge_rtnode_lookup(sc, dst);
3211                 if (brt != NULL) {
3212                         /*
3213                          * rtnode for 'dst' already exists.  We inform the
3214                          * caller about this by leaving bri0 as NULL.  The
3215                          * caller will terminate the intallation upon getting
3216                          * NULL bri0.  However, we still need to update the
3217                          * rtinfo.
3218                          */
3219                         KKASSERT(*bri0 == NULL);
3220
3221                         /* Update rtinfo */
3222                         bridge_rtinfo_update(brt->brt_info, dst_if, setflags,
3223                                              flags, sc->sc_brttimeout);
3224                         return 0;
3225                 }
3226
3227                 /*
3228                  * We only need to check brtcnt on CPU0, since if limit
3229                  * is to be exceeded, ENOSPC is returned.  Caller knows
3230                  * this and will terminate the installation.
3231                  */
3232                 if (sc->sc_brtcnt >= sc->sc_brtmax)
3233                         return ENOSPC;
3234
3235                 KKASSERT(*bri0 == NULL);
3236                 bri = kmalloc(sizeof(struct bridge_rtinfo), M_DEVBUF,
3237                                   M_WAITOK | M_ZERO);
3238                 *bri0 = bri;
3239
3240                 /* Setup rtinfo */
3241                 bri->bri_flags = IFBAF_DYNAMIC;
3242                 bridge_rtinfo_update(bri, dst_if, setflags, flags,
3243                                      sc->sc_brttimeout);
3244         } else {
3245                 bri = *bri0;
3246                 KKASSERT(bri != NULL);
3247         }
3248
3249         brt = kmalloc(sizeof(struct bridge_rtnode), M_DEVBUF,
3250                       M_WAITOK | M_ZERO);
3251         memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
3252         brt->brt_info = bri;
3253
3254         bridge_rtnode_insert(sc, brt);
3255         return 0;
3256 }
3257
3258 static void
3259 bridge_rtinstall_handler(netmsg_t msg)
3260 {
3261         struct netmsg_brsaddr *brmsg = (struct netmsg_brsaddr *)msg;
3262         int error;
3263
3264         error = bridge_rtinstall_oncpu(brmsg->br_softc,
3265                                        brmsg->br_dst, brmsg->br_dst_if,
3266                                        brmsg->br_setflags, brmsg->br_flags,
3267                                        &brmsg->br_rtinfo);
3268         if (error) {
3269                 KKASSERT(mycpuid == 0 && brmsg->br_rtinfo == NULL);
3270                 netisr_replymsg(&brmsg->base, error);
3271                 return;
3272         } else if (brmsg->br_rtinfo == NULL) {
3273                 /* rtnode already exists for 'dst' */
3274                 KKASSERT(mycpuid == 0);
3275                 netisr_replymsg(&brmsg->base, 0);
3276                 return;
3277         }
3278         netisr_forwardmsg(&brmsg->base, mycpuid + 1);
3279 }
3280
3281 /*
3282  * bridge_rtupdate:
3283  *
3284  *      Add/Update a bridge routing entry.
3285  */
3286 static int
3287 bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst,
3288                 struct ifnet *dst_if, uint8_t flags)
3289 {
3290         struct bridge_rtnode *brt;
3291
3292         /*
3293          * A route for this destination might already exist.  If so,
3294          * update it, otherwise create a new one.
3295          */
3296         if ((brt = bridge_rtnode_lookup(sc, dst)) == NULL) {
3297                 struct netmsg_brsaddr *brmsg;
3298
3299                 if (sc->sc_brtcnt >= sc->sc_brtmax)
3300                         return ENOSPC;
3301
3302                 brmsg = kmalloc(sizeof(*brmsg), M_LWKTMSG, M_WAITOK | M_NULLOK);
3303                 if (brmsg == NULL)
3304                         return ENOMEM;
3305
3306                 netmsg_init(&brmsg->base, NULL, &netisr_afree_rport,
3307                             0, bridge_rtinstall_handler);
3308                 memcpy(brmsg->br_dst, dst, ETHER_ADDR_LEN);
3309                 brmsg->br_dst_if = dst_if;
3310                 brmsg->br_flags = flags;
3311                 brmsg->br_setflags = 0;
3312                 brmsg->br_softc = sc;
3313                 brmsg->br_rtinfo = NULL;
3314
3315                 netisr_sendmsg(&brmsg->base, 0);
3316                 return 0;
3317         }
3318         bridge_rtinfo_update(brt->brt_info, dst_if, 0, flags,
3319                              sc->sc_brttimeout);
3320         return 0;
3321 }
3322
3323 static int
3324 bridge_rtsaddr(struct bridge_softc *sc, const uint8_t *dst,
3325                struct ifnet *dst_if, uint8_t flags)
3326 {
3327         struct netmsg_brsaddr brmsg;
3328
3329         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3330
3331         netmsg_init(&brmsg.base, NULL, &curthread->td_msgport,
3332                     0, bridge_rtinstall_handler);
3333         memcpy(brmsg.br_dst, dst, ETHER_ADDR_LEN);
3334         brmsg.br_dst_if = dst_if;
3335         brmsg.br_flags = flags;
3336         brmsg.br_setflags = 1;
3337         brmsg.br_softc = sc;
3338         brmsg.br_rtinfo = NULL;
3339
3340         return netisr_domsg(&brmsg.base, 0);
3341 }
3342
3343 /*
3344  * bridge_rtlookup:
3345  *
3346  *      Lookup the destination interface for an address.
3347  */
3348 static struct ifnet *
3349 bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr)
3350 {
3351         struct bridge_rtnode *brt;
3352
3353         if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL)
3354                 return NULL;
3355         return brt->brt_info->bri_ifp;
3356 }
3357
3358 static void
3359 bridge_rtreap_handler(netmsg_t msg)
3360 {
3361         struct bridge_softc *sc = msg->lmsg.u.ms_resultp;
3362         struct bridge_rtnode *brt, *nbrt;
3363
3364         LIST_FOREACH_MUTABLE(brt, &sc->sc_rtlists[mycpuid], brt_list, nbrt) {
3365                 if (brt->brt_info->bri_dead)
3366                         bridge_rtnode_destroy(sc, brt);
3367         }
3368         netisr_forwardmsg(&msg->base, mycpuid + 1);
3369 }
3370
3371 static void
3372 bridge_rtreap(struct bridge_softc *sc)
3373 {
3374         struct netmsg_base msg;
3375
3376         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3377
3378         netmsg_init(&msg, NULL, &curthread->td_msgport,
3379                     0, bridge_rtreap_handler);
3380         msg.lmsg.u.ms_resultp = sc;
3381
3382         netisr_domsg(&msg, 0);
3383 }
3384
3385 static void
3386 bridge_rtreap_async(struct bridge_softc *sc)
3387 {
3388         struct netmsg_base *msg;
3389
3390         msg = kmalloc(sizeof(*msg), M_LWKTMSG, M_WAITOK);
3391
3392         netmsg_init(msg, NULL, &netisr_afree_rport,
3393                     0, bridge_rtreap_handler);
3394         msg->lmsg.u.ms_resultp = sc;
3395
3396         netisr_sendmsg(msg, 0);
3397 }
3398
3399 /*
3400  * bridge_rttrim:
3401  *
3402  *      Trim the routine table so that we have a number
3403  *      of routing entries less than or equal to the
3404  *      maximum number.
3405  */
3406 static void
3407 bridge_rttrim(struct bridge_softc *sc)
3408 {
3409         struct bridge_rtnode *brt;
3410         int dead;
3411
3412         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3413
3414         /* Make sure we actually need to do this. */
3415         if (sc->sc_brtcnt <= sc->sc_brtmax)
3416                 return;
3417
3418         /*
3419          * Find out how many rtnodes are dead
3420          */
3421         dead = bridge_rtage_finddead(sc);
3422         KKASSERT(dead <= sc->sc_brtcnt);
3423
3424         if (sc->sc_brtcnt - dead <= sc->sc_brtmax) {
3425                 /* Enough dead rtnodes are found */
3426                 bridge_rtreap(sc);
3427                 return;
3428         }
3429
3430         /*
3431          * Kill some dynamic rtnodes to meet the brtmax
3432          */
3433         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3434                 struct bridge_rtinfo *bri = brt->brt_info;
3435
3436                 if (bri->bri_dead) {
3437                         /*
3438                          * We have counted this rtnode in
3439                          * bridge_rtage_finddead()
3440                          */
3441                         continue;
3442                 }
3443
3444                 if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
3445                         bri->bri_dead = 1;
3446                         ++dead;
3447                         KKASSERT(dead <= sc->sc_brtcnt);
3448
3449                         if (sc->sc_brtcnt - dead <= sc->sc_brtmax) {
3450                                 /* Enough rtnodes are collected */
3451                                 break;
3452                         }
3453                 }
3454         }
3455         if (dead)
3456                 bridge_rtreap(sc);
3457 }
3458
3459 /*
3460  * bridge_timer:
3461  *
3462  *      Aging timer for the bridge.
3463  */
3464 static void
3465 bridge_timer(void *arg)
3466 {
3467         struct bridge_softc *sc = arg;
3468         struct netmsg_base *msg;
3469
3470         KKASSERT(mycpuid == BRIDGE_CFGCPU);
3471
3472         crit_enter();
3473
3474         if (callout_pending(&sc->sc_brcallout) ||
3475             !callout_active(&sc->sc_brcallout)) {
3476                 crit_exit();
3477                 return;
3478         }
3479         callout_deactivate(&sc->sc_brcallout);
3480
3481         msg = &sc->sc_brtimemsg;
3482         KKASSERT(msg->lmsg.ms_flags & MSGF_DONE);
3483         lwkt_sendmsg_oncpu(BRIDGE_CFGPORT, &msg->lmsg);
3484
3485         crit_exit();
3486 }
3487
3488 static void
3489 bridge_timer_handler(netmsg_t msg)
3490 {
3491         struct bridge_softc *sc = msg->lmsg.u.ms_resultp;
3492
3493         KKASSERT(&curthread->td_msgport == BRIDGE_CFGPORT);
3494
3495         crit_enter();
3496         /* Reply ASAP */
3497         lwkt_replymsg(&msg->lmsg, 0);
3498         crit_exit();
3499
3500         bridge_rtage(sc);
3501         if (sc->sc_ifp->if_flags & IFF_RUNNING) {
3502                 callout_reset(&sc->sc_brcallout,
3503                     bridge_rtable_prune_period * hz, bridge_timer, sc);
3504         }
3505 }
3506
3507 static int
3508 bridge_rtage_finddead(struct bridge_softc *sc)
3509 {
3510         struct bridge_rtnode *brt;
3511         int dead = 0;
3512
3513         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3514                 struct bridge_rtinfo *bri = brt->brt_info;
3515
3516                 if ((bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
3517                     time_uptime >= bri->bri_expire) {
3518                         bri->bri_dead = 1;
3519                         ++dead;
3520                         KKASSERT(dead <= sc->sc_brtcnt);
3521                 }
3522         }
3523         return dead;
3524 }
3525
3526 /*
3527  * bridge_rtage:
3528  *
3529  *      Perform an aging cycle.
3530  */
3531 static void
3532 bridge_rtage(struct bridge_softc *sc)
3533 {
3534         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3535
3536         if (bridge_rtage_finddead(sc))
3537                 bridge_rtreap(sc);
3538 }
3539
3540 /*
3541  * bridge_rtflush:
3542  *
3543  *      Remove all dynamic addresses from the bridge.
3544  */
3545 static void
3546 bridge_rtflush(struct bridge_softc *sc, int bf)
3547 {
3548         struct bridge_rtnode *brt;
3549         int reap;
3550
3551         reap = 0;
3552         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3553                 struct bridge_rtinfo *bri = brt->brt_info;
3554
3555                 if ((bf & IFBF_FLUSHALL) ||
3556                     (bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
3557                         bri->bri_dead = 1;
3558                         reap = 1;
3559                 }
3560         }
3561         if (reap) {
3562                 if (bf & IFBF_FLUSHSYNC)
3563                         bridge_rtreap(sc);
3564                 else
3565                         bridge_rtreap_async(sc);
3566         }
3567 }
3568
3569 /*
3570  * bridge_rtdaddr:
3571  *
3572  *      Remove an address from the table.
3573  */
3574 static int
3575 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr)
3576 {
3577         struct bridge_rtnode *brt;
3578
3579         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
3580
3581         if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL)
3582                 return (ENOENT);
3583
3584         /* TODO: add a cheaper delete operation */
3585         brt->brt_info->bri_dead = 1;
3586         bridge_rtreap(sc);
3587         return (0);
3588 }
3589
3590 /*
3591  * bridge_rtdelete:
3592  *
3593  *      Delete routes to a speicifc member interface.
3594  */
3595 void
3596 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int bf)
3597 {
3598         struct bridge_rtnode *brt;
3599         int reap;
3600
3601         reap = 0;
3602         LIST_FOREACH(brt, &sc->sc_rtlists[mycpuid], brt_list) {
3603                 struct bridge_rtinfo *bri = brt->brt_info;
3604
3605                 if (bri->bri_ifp == ifp &&
3606                     ((bf & IFBF_FLUSHALL) ||
3607                      (bri->bri_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
3608                         bri->bri_dead = 1;
3609                         reap = 1;
3610                 }
3611         }
3612         if (reap) {
3613                 if (bf & IFBF_FLUSHSYNC)
3614                         bridge_rtreap(sc);
3615                 else
3616                         bridge_rtreap_async(sc);
3617         }
3618 }
3619
3620 /*
3621  * bridge_rtable_init:
3622  *
3623  *      Initialize the route table for this bridge.
3624  */
3625 static void
3626 bridge_rtable_init(struct bridge_softc *sc)
3627 {
3628         int cpu;
3629
3630         /*
3631          * Initialize per-cpu hash tables
3632          */
3633         sc->sc_rthashs = kmalloc(sizeof(*sc->sc_rthashs) * ncpus,
3634                                  M_DEVBUF, M_WAITOK);
3635         for (cpu = 0; cpu < ncpus; ++cpu) {
3636                 int i;
3637
3638                 sc->sc_rthashs[cpu] =
3639                 kmalloc(sizeof(struct bridge_rtnode_head) * BRIDGE_RTHASH_SIZE,
3640                         M_DEVBUF, M_WAITOK);
3641
3642                 for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
3643                         LIST_INIT(&sc->sc_rthashs[cpu][i]);
3644         }
3645         sc->sc_rthash_key = karc4random();
3646
3647         /*
3648          * Initialize per-cpu lists
3649          */
3650         sc->sc_rtlists = kmalloc(sizeof(struct bridge_rtnode_head) * ncpus,
3651                                  M_DEVBUF, M_WAITOK);
3652         for (cpu = 0; cpu < ncpus; ++cpu)
3653                 LIST_INIT(&sc->sc_rtlists[cpu]);
3654 }
3655
3656 /*
3657  * bridge_rtable_fini:
3658  *
3659  *      Deconstruct the route table for this bridge.
3660  */
3661 static void
3662 bridge_rtable_fini(struct bridge_softc *sc)
3663 {
3664         int cpu;
3665
3666         /*
3667          * Free per-cpu hash tables
3668          */
3669         for (cpu = 0; cpu < ncpus; ++cpu)
3670                 kfree(sc->sc_rthashs[cpu], M_DEVBUF);
3671         kfree(sc->sc_rthashs, M_DEVBUF);
3672
3673         /*
3674          * Free per-cpu lists
3675          */
3676         kfree(sc->sc_rtlists, M_DEVBUF);
3677 }
3678
3679 /*
3680  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
3681  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
3682  */
3683 #define mix(a, b, c)                                                    \
3684 do {                                                                    \
3685         a -= b; a -= c; a ^= (c >> 13);                                 \
3686         b -= c; b -= a; b ^= (a << 8);                                  \
3687         c -= a; c -= b; c ^= (b >> 13);                                 \
3688         a -= b; a -= c; a ^= (c >> 12);                                 \
3689         b -= c; b -= a; b ^= (a << 16);                                 \
3690         c -= a; c -= b; c ^= (b >> 5);                                  \
3691         a -= b; a -= c; a ^= (c >> 3);                                  \
3692         b -= c; b -= a; b ^= (a << 10);                                 \
3693         c -= a; c -= b; c ^= (b >> 15);                                 \
3694 } while (/*CONSTCOND*/0)
3695
3696 static __inline uint32_t
3697 bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
3698 {
3699         uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
3700
3701         b += addr[5] << 8;
3702         b += addr[4];
3703         a += addr[3] << 24;
3704         a += addr[2] << 16;
3705         a += addr[1] << 8;
3706         a += addr[0];
3707
3708         mix(a, b, c);
3709
3710         return (c & BRIDGE_RTHASH_MASK);
3711 }
3712
3713 #undef mix
3714
3715 static int
3716 bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b)
3717 {
3718         int i, d;
3719
3720         for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
3721                 d = ((int)a[i]) - ((int)b[i]);
3722         }
3723
3724         return (d);
3725 }
3726
3727 /*
3728  * bridge_rtnode_lookup:
3729  *
3730  *      Look up a bridge route node for the specified destination.
3731  */
3732 static struct bridge_rtnode *
3733 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr)
3734 {
3735         struct bridge_rtnode *brt;
3736         uint32_t hash;
3737         int dir;
3738
3739         hash = bridge_rthash(sc, addr);
3740         LIST_FOREACH(brt, &sc->sc_rthashs[mycpuid][hash], brt_hash) {
3741                 dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
3742                 if (dir == 0)
3743                         return (brt);
3744                 if (dir > 0)
3745                         return (NULL);
3746         }
3747
3748         return (NULL);
3749 }
3750
3751 /*
3752  * bridge_rtnode_insert:
3753  *
3754  *      Insert the specified bridge node into the route table.
3755  *      Caller has to make sure that rtnode does not exist.
3756  */
3757 static void
3758 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
3759 {
3760         struct bridge_rtnode *lbrt;
3761         uint32_t hash;
3762         int dir;
3763
3764         hash = bridge_rthash(sc, brt->brt_addr);
3765
3766         lbrt = LIST_FIRST(&sc->sc_rthashs[mycpuid][hash]);
3767         if (lbrt == NULL) {
3768                 LIST_INSERT_HEAD(&sc->sc_rthashs[mycpuid][hash],
3769                                   brt, brt_hash);
3770                 goto out;
3771         }
3772
3773         do {
3774                 dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
3775                 KASSERT(dir != 0, ("rtnode already exist"));
3776
3777                 if (dir > 0) {
3778                         LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
3779                         goto out;
3780                 }
3781                 if (LIST_NEXT(lbrt, brt_hash) == NULL) {
3782                         LIST_INSERT_AFTER(lbrt, brt, brt_hash);
3783                         goto out;
3784                 }
3785                 lbrt = LIST_NEXT(lbrt, brt_hash);
3786         } while (lbrt != NULL);
3787
3788         panic("no suitable position found for rtnode");
3789 out:
3790         LIST_INSERT_HEAD(&sc->sc_rtlists[mycpuid], brt, brt_list);
3791         if (mycpuid == 0) {
3792                 /*
3793                  * Update the brtcnt.
3794                  * We only need to do it once and we do it on CPU0.
3795                  */
3796                 sc->sc_brtcnt++;
3797         }
3798 }
3799
3800 /*
3801  * bridge_rtnode_destroy:
3802  *
3803  *      Destroy a bridge rtnode.
3804  */
3805 static void
3806 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
3807 {
3808         LIST_REMOVE(brt, brt_hash);
3809         LIST_REMOVE(brt, brt_list);
3810
3811         if (mycpuid + 1 == ncpus) {
3812                 /* Free rtinfo associated with rtnode on the last cpu */
3813                 kfree(brt->brt_info, M_DEVBUF);
3814         }
3815         kfree(brt, M_DEVBUF);
3816
3817         if (mycpuid == 0) {
3818                 /* Update brtcnt only on CPU0 */
3819                 sc->sc_brtcnt--;
3820         }
3821 }
3822
3823 static __inline int
3824 bridge_post_pfil(struct mbuf *m)
3825 {
3826         if (m->m_pkthdr.fw_flags & IPFORWARD_MBUF_TAGGED)
3827                 return EOPNOTSUPP;
3828
3829         /* Not yet */
3830         if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED)
3831                 return EOPNOTSUPP;
3832
3833         return 0;
3834 }
3835
3836 /*
3837  * Send bridge packets through pfil if they are one of the types pfil can deal
3838  * with, or if they are ARP or REVARP.  (pfil will pass ARP and REVARP without
3839  * question.) If *bifp or *ifp are NULL then packet filtering is skipped for
3840  * that interface.
3841  */
3842 static int
3843 bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
3844 {
3845         int snap, error, i, hlen;
3846         struct ether_header *eh1, eh2;
3847         struct ip *ip;
3848         struct llc llc1;
3849         u_int16_t ether_type;
3850
3851         snap = 0;
3852         error = -1;     /* Default error if not error == 0 */
3853
3854         if (pfil_bridge == 0 && pfil_member == 0)
3855                 return (0); /* filtering is disabled */
3856
3857         i = min((*mp)->m_pkthdr.len, max_protohdr);
3858         if ((*mp)->m_len < i) {
3859                 *mp = m_pullup(*mp, i);
3860                 if (*mp == NULL) {
3861                         kprintf("%s: m_pullup failed\n", __func__);
3862                         return (-1);
3863                 }
3864         }
3865
3866         eh1 = mtod(*mp, struct ether_header *);
3867         ether_type = ntohs(eh1->ether_type);
3868
3869         /*
3870          * Check for SNAP/LLC.
3871          */
3872         if (ether_type < ETHERMTU) {
3873                 struct llc *llc2 = (struct llc *)(eh1 + 1);
3874
3875                 if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
3876                     llc2->llc_dsap == LLC_SNAP_LSAP &&
3877                     llc2->llc_ssap == LLC_SNAP_LSAP &&
3878                     llc2->llc_control == LLC_UI) {
3879                         ether_type = htons(llc2->llc_un.type_snap.ether_type);
3880                         snap = 1;
3881                 }
3882         }
3883
3884         /*
3885          * If we're trying to filter bridge traffic, don't look at anything
3886          * other than IP and ARP traffic.  If the filter doesn't understand
3887          * IPv6, don't allow IPv6 through the bridge either.  This is lame
3888          * since if we really wanted, say, an AppleTalk filter, we are hosed,
3889          * but of course we don't have an AppleTalk filter to begin with.
3890          * (Note that since pfil doesn't understand ARP it will pass *ALL*
3891          * ARP traffic.)
3892          */
3893         switch (ether_type) {
3894         case ETHERTYPE_ARP:
3895         case ETHERTYPE_REVARP:
3896                 return (0); /* Automatically pass */
3897
3898         case ETHERTYPE_IP:
3899 #ifdef INET6
3900         case ETHERTYPE_IPV6:
3901 #endif /* INET6 */
3902                 break;
3903
3904         default:
3905                 /*
3906                  * Check to see if the user wants to pass non-ip
3907                  * packets, these will not be checked by pfil(9)
3908                  * and passed unconditionally so the default is to drop.
3909                  */
3910                 if (pfil_onlyip)
3911                         goto bad;
3912         }
3913
3914         /* Strip off the Ethernet header and keep a copy. */
3915         m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t) &eh2);
3916         m_adj(*mp, ETHER_HDR_LEN);
3917
3918         /* Strip off snap header, if present */
3919         if (snap) {
3920                 m_copydata(*mp, 0, sizeof(struct llc), (caddr_t) &llc1);
3921                 m_adj(*mp, sizeof(struct llc));
3922         }
3923
3924         /*
3925          * Check the IP header for alignment and errors
3926          */
3927         if (dir == PFIL_IN) {
3928                 switch (ether_type) {
3929                 case ETHERTYPE_IP:
3930                         error = bridge_ip_checkbasic(mp);
3931                         break;
3932 #ifdef INET6
3933                 case ETHERTYPE_IPV6:
3934                         error = bridge_ip6_checkbasic(mp);
3935                         break;
3936 #endif /* INET6 */
3937                 default:
3938                         error = 0;
3939                 }
3940                 if (error)
3941                         goto bad;
3942         }
3943
3944         error = 0;
3945
3946         /*
3947          * Run the packet through pfil
3948          */
3949         switch (ether_type) {
3950         case ETHERTYPE_IP:
3951                 /*
3952                  * before calling the firewall, swap fields the same as
3953                  * IP does. here we assume the header is contiguous
3954                  */
3955                 ip = mtod(*mp, struct ip *);
3956
3957                 ip->ip_len = ntohs(ip->ip_len);
3958                 ip->ip_off = ntohs(ip->ip_off);
3959
3960                 /*
3961                  * Run pfil on the member interface and the bridge, both can
3962                  * be skipped by clearing pfil_member or pfil_bridge.
3963                  *
3964                  * Keep the order:
3965                  *   in_if -> bridge_if -> out_if
3966                  */
3967                 if (pfil_bridge && dir == PFIL_OUT && bifp != NULL) {
3968                         error = pfil_run_hooks(&inet_pfil_hook, mp, bifp, dir);
3969                         if (*mp == NULL || error != 0) /* filter may consume */
3970                                 break;
3971                         error = bridge_post_pfil(*mp);
3972                         if (error)
3973                                 break;
3974                 }
3975
3976                 if (pfil_member && ifp != NULL) {
3977                         error = pfil_run_hooks(&inet_pfil_hook, mp, ifp, dir);
3978                         if (*mp == NULL || error != 0) /* filter may consume */
3979                                 break;
3980                         error = bridge_post_pfil(*mp);
3981                         if (error)
3982                                 break;
3983                 }
3984
3985                 if (pfil_bridge && dir == PFIL_IN && bifp != NULL) {
3986                         error = pfil_run_hooks(&inet_pfil_hook, mp, bifp, dir);
3987                         if (*mp == NULL || error != 0) /* filter may consume */
3988                                 break;
3989                         error = bridge_post_pfil(*mp);
3990                         if (error)
3991                                 break;
3992                 }
3993
3994                 /* check if we need to fragment the packet */
3995                 if (pfil_member && ifp != NULL && dir == PFIL_OUT) {
3996                         i = (*mp)->m_pkthdr.len;
3997                         if (i > ifp->if_mtu) {
3998                                 error = bridge_fragment(ifp, *mp, &eh2, snap,
3999                                             &llc1);
4000                                 return (error);
4001                         }
4002                 }
4003
4004                 /* Recalculate the ip checksum and restore byte ordering */
4005                 ip = mtod(*mp, struct ip *);
4006                 hlen = ip->ip_hl << 2;
4007                 if (hlen < sizeof(struct ip))
4008                         goto bad;
4009                 if (hlen > (*mp)->m_len) {
4010                         if ((*mp = m_pullup(*mp, hlen)) == NULL)
4011                                 goto bad;
4012                         ip = mtod(*mp, struct ip *);
4013                         if (ip == NULL)
4014                                 goto bad;
4015                 }
4016                 ip->ip_len = htons(ip->ip_len);
4017                 ip->ip_off = htons(ip->ip_off);
4018                 ip->ip_sum = 0;
4019                 if (hlen == sizeof(struct ip))
4020                         ip->ip_sum = in_cksum_hdr(ip);
4021                 else
4022                         ip->ip_sum = in_cksum(*mp, hlen);
4023
4024                 break;
4025 #ifdef INET6
4026         case ETHERTYPE_IPV6:
4027                 if (pfil_bridge && dir == PFIL_OUT && bifp != NULL)
4028                         error = pfil_run_hooks(&inet6_pfil_hook, mp, bifp,
4029                                         dir);
4030
4031                 if (*mp == NULL || error != 0) /* filter may consume */
4032                         break;
4033
4034                 if (pfil_member && ifp != NULL)
4035                         error = pfil_run_hooks(&inet6_pfil_hook, mp, ifp,
4036                                         dir);
4037
4038                 if (*mp == NULL || error != 0) /* filter may consume */
4039                         break;
4040
4041                 if (pfil_bridge && dir == PFIL_IN && bifp != NULL)
4042                         error = pfil_run_hooks(&inet6_pfil_hook, mp, bifp,
4043                                         dir);
4044                 break;
4045 #endif
4046         default:
4047                 error = 0;
4048                 break;
4049         }
4050
4051         if (*mp == NULL)
4052                 return (error);
4053         if (error != 0)
4054                 goto bad;
4055
4056         error = -1;
4057
4058         /*
4059          * Finally, put everything back the way it was and return
4060          */
4061         if (snap) {
4062                 M_PREPEND(*mp, sizeof(struct llc), M_NOWAIT);
4063                 if (*mp == NULL)
4064                         return (error);
4065                 bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
4066         }
4067
4068         M_PREPEND(*mp, ETHER_HDR_LEN, M_NOWAIT);
4069         if (*mp == NULL)
4070                 return (error);
4071         bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
4072
4073         return (0);
4074
4075 bad:
4076         m_freem(*mp);
4077         *mp = NULL;
4078         return (error);
4079 }
4080
4081 /*
4082  * Perform basic checks on header size since
4083  * pfil assumes ip_input has already processed
4084  * it for it.  Cut-and-pasted from ip_input.c.
4085  * Given how simple the IPv6 version is,
4086  * does the IPv4 version really need to be
4087  * this complicated?
4088  *
4089  * XXX Should we update ipstat here, or not?
4090  * XXX Right now we update ipstat but not
4091  * XXX csum_counter.
4092  */
4093 static int
4094 bridge_ip_checkbasic(struct mbuf **mp)
4095 {
4096         struct mbuf *m = *mp;
4097         struct ip *ip;
4098         int len, hlen;
4099         u_short sum;
4100
4101         if (*mp == NULL)
4102                 return (-1);
4103 #if 0 /* notyet */
4104         if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
4105                 if ((m = m_copyup(m, sizeof(struct ip),
4106                         (max_linkhdr + 3) & ~3)) == NULL) {
4107                         /* XXXJRT new stat, please */
4108                         ipstat.ips_toosmall++;
4109                         goto bad;
4110                 }
4111         } else
4112 #endif
4113 #ifndef __predict_false
4114 #define __predict_false(x) x
4115 #endif
4116          if (__predict_false(m->m_len < sizeof (struct ip))) {
4117                 if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
4118                         ipstat.ips_toosmall++;
4119                         goto bad;
4120                 }
4121         }
4122         ip = mtod(m, struct ip *);
4123         if (ip == NULL) goto bad;
4124
4125         if (ip->ip_v != IPVERSION) {
4126                 ipstat.ips_badvers++;
4127                 goto bad;
4128         }
4129         hlen = ip->ip_hl << 2;
4130         if (hlen < sizeof(struct ip)) { /* minimum header length */
4131                 ipstat.ips_badhlen++;
4132                 goto bad;
4133         }
4134         if (hlen > m->m_len) {
4135                 if ((m = m_pullup(m, hlen)) == NULL) {
4136                         ipstat.ips_badhlen++;
4137                         goto bad;
4138                 }
4139                 ip = mtod(m, struct ip *);
4140                 if (ip == NULL) goto bad;
4141         }
4142
4143         if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
4144                 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
4145         } else {
4146                 if (hlen == sizeof(struct ip)) {
4147                         sum = in_cksum_hdr(ip);
4148                 } else {
4149                         sum = in_cksum(m, hlen);
4150                 }
4151         }
4152         if (sum) {
4153                 ipstat.ips_badsum++;
4154                 goto bad;
4155         }
4156
4157         /* Retrieve the packet length. */
4158         len = ntohs(ip->ip_len);
4159
4160         /*
4161          * Check for additional length bogosity
4162          */
4163         if (len < hlen) {
4164                 ipstat.ips_badlen++;
4165                 goto bad;
4166         }
4167
4168         /*
4169          * Check that the amount of data in the buffers
4170          * is as at least much as the IP header would have us expect.
4171          * Drop packet if shorter than we expect.
4172          */
4173         if (m->m_pkthdr.len < len) {
4174                 ipstat.ips_tooshort++;
4175                 goto bad;
4176         }
4177
4178         /* Checks out, proceed */
4179         *mp = m;
4180         return (0);
4181
4182 bad:
4183         *mp = m;
4184         return (-1);
4185 }
4186
4187 #ifdef INET6
4188 /*
4189  * Same as above, but for IPv6.
4190  * Cut-and-pasted from ip6_input.c.
4191  * XXX Should we update ip6stat, or not?
4192  */
4193 static int
4194 bridge_ip6_checkbasic(struct mbuf **mp)
4195 {
4196         struct mbuf *m = *mp;
4197         struct ip6_hdr *ip6;
4198
4199         /*
4200          * If the IPv6 header is not aligned, slurp it up into a new
4201          * mbuf with space for link headers, in the event we forward
4202          * it.  Otherwise, if it is aligned, make sure the entire base
4203          * IPv6 header is in the first mbuf of the chain.
4204          */
4205 #if 0 /* notyet */
4206         if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
4207                 struct ifnet *inifp = m->m_pkthdr.rcvif;
4208                 if ((m = m_copyup(m, sizeof(struct ip6_hdr),
4209                             (max_linkhdr + 3) & ~3)) == NULL) {
4210                         /* XXXJRT new stat, please */
4211                         ip6stat.ip6s_toosmall++;
4212                         in6_ifstat_inc(inifp, ifs6_in_hdrerr);
4213                         goto bad;
4214                 }
4215         } else
4216 #endif
4217         if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
4218                 struct ifnet *inifp = m->m_pkthdr.rcvif;
4219                 if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
4220                         ip6stat.ip6s_toosmall++;
4221                         in6_ifstat_inc(inifp, ifs6_in_hdrerr);
4222                         goto bad;
4223                 }
4224         }
4225
4226         ip6 = mtod(m, struct ip6_hdr *);
4227
4228         if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
4229                 ip6stat.ip6s_badvers++;
4230                 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
4231                 goto bad;
4232         }
4233
4234         /* Checks out, proceed */
4235         *mp = m;
4236         return (0);
4237
4238 bad:
4239         *mp = m;
4240         return (-1);
4241 }
4242 #endif /* INET6 */
4243
4244 /*
4245  * bridge_fragment:
4246  *
4247  *      Return a fragmented mbuf chain.
4248  */
4249 static int
4250 bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
4251     int snap, struct llc *llc)
4252 {
4253         struct mbuf *m0;
4254         struct ip *ip;
4255         int error = -1;
4256
4257         if (m->m_len < sizeof(struct ip) &&
4258             (m = m_pullup(m, sizeof(struct ip))) == NULL)
4259                 goto out;
4260         ip = mtod(m, struct ip *);
4261
4262         error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist,
4263                     CSUM_DELAY_IP);
4264         if (error)
4265                 goto out;
4266
4267         /* walk the chain and re-add the Ethernet header */
4268         for (m0 = m; m0; m0 = m0->m_nextpkt) {
4269                 if (error == 0) {
4270                         if (snap) {
4271                                 M_PREPEND(m0, sizeof(struct llc), M_NOWAIT);
4272                                 if (m0 == NULL) {
4273                                         error = ENOBUFS;
4274                                         continue;
4275                                 }
4276                                 bcopy(llc, mtod(m0, caddr_t),
4277                                     sizeof(struct llc));
4278                         }
4279                         M_PREPEND(m0, ETHER_HDR_LEN, M_NOWAIT);
4280                         if (m0 == NULL) {
4281                                 error = ENOBUFS;
4282                                 continue;
4283                         }
4284                         bcopy(eh, mtod(m0, caddr_t), ETHER_HDR_LEN);
4285                 } else 
4286                         m_freem(m);
4287         }
4288
4289         if (error == 0)
4290                 ipstat.ips_fragmented++;
4291
4292         return (error);
4293
4294 out:
4295         if (m != NULL)
4296                 m_freem(m);
4297         return (error);
4298 }
4299
4300 static void
4301 bridge_enqueue_handler(netmsg_t msg)
4302 {
4303         struct netmsg_packet *nmp;
4304         struct ifnet *dst_ifp;
4305         struct mbuf *m;
4306
4307         nmp = &msg->packet;
4308         m = nmp->nm_packet;
4309         dst_ifp = nmp->base.lmsg.u.ms_resultp;
4310         mbuftrackid(m, 71);
4311
4312         bridge_handoff(dst_ifp->if_bridge, dst_ifp, m, 1);
4313 }
4314
4315 static void
4316 bridge_handoff(struct bridge_softc *sc, struct ifnet *dst_ifp,
4317                struct mbuf *m, int from_us)
4318 {
4319         struct mbuf *m0;
4320         struct ifnet *bifp;
4321
4322         bifp = sc->sc_ifp;
4323         mbuftrackid(m, 72);
4324
4325         /* We may be sending a fragment so traverse the mbuf */
4326         for (; m; m = m0) {
4327                 struct altq_pktattr pktattr;
4328
4329                 m0 = m->m_nextpkt;
4330                 m->m_nextpkt = NULL;
4331
4332                 /*
4333                  * If being sent from our host override ether_shost
4334                  * with the bridge MAC.  This is mandatory for ARP
4335                  * so things don't get confused.  In particular we
4336                  * don't want ARPs to get associated with link interfaces
4337                  * under the bridge which might or might not stay valid.
4338                  *
4339                  * Also override ether_shost when relaying a packet out
4340                  * the same interface it came in on, due to multi-homed
4341                  * addresses & default routes, otherwise switches will
4342                  * get very confused.
4343                  *
4344                  * Otherwise if we are in transparent mode.
4345                  */
4346                 if (from_us || m->m_pkthdr.rcvif == dst_ifp) {
4347                         m_copyback(m,
4348                                    offsetof(struct ether_header, ether_shost),
4349                                    ETHER_ADDR_LEN, IF_LLADDR(sc->sc_ifp));
4350                 } else if ((bifp->if_flags & IFF_LINK0) &&
4351                            (m->m_pkthdr.fw_flags & BRIDGE_MBUF_TAGGED)) {
4352                         m_copyback(m,
4353                                    offsetof(struct ether_header, ether_shost),
4354                                    ETHER_ADDR_LEN,
4355                                    m->m_pkthdr.ether_br_shost);
4356                 } /* else retain shost */
4357
4358                 if (ifq_is_enabled(&dst_ifp->if_snd))
4359                         altq_etherclassify(&dst_ifp->if_snd, m, &pktattr);
4360
4361                 ifq_dispatch(dst_ifp, m, &pktattr);
4362         }
4363 }
4364
4365 static void
4366 bridge_control_dispatch(netmsg_t msg)
4367 {
4368         struct netmsg_brctl *bc_msg = (struct netmsg_brctl *)msg;
4369         struct ifnet *bifp = bc_msg->bc_sc->sc_ifp;
4370         int error;
4371
4372         ifnet_serialize_all(bifp);
4373         error = bc_msg->bc_func(bc_msg->bc_sc, bc_msg->bc_arg);
4374         ifnet_deserialize_all(bifp);
4375
4376         lwkt_replymsg(&bc_msg->base.lmsg, error);
4377 }
4378
4379 static int
4380 bridge_control(struct bridge_softc *sc, u_long cmd,
4381                bridge_ctl_t bc_func, void *bc_arg)
4382 {
4383         struct ifnet *bifp = sc->sc_ifp;
4384         struct netmsg_brctl bc_msg;
4385         int error;
4386
4387         ASSERT_IFNET_SERIALIZED_ALL(bifp);
4388
4389         bzero(&bc_msg, sizeof(bc_msg));
4390
4391         netmsg_init(&bc_msg.base, NULL, &curthread->td_msgport,
4392                     0, bridge_control_dispatch);
4393         bc_msg.bc_func = bc_func;
4394         bc_msg.bc_sc = sc;
4395         bc_msg.bc_arg = bc_arg;
4396
4397         ifnet_deserialize_all(bifp);
4398         error = lwkt_domsg(BRIDGE_CFGPORT, &bc_msg.base.lmsg, 0);
4399         ifnet_serialize_all(bifp);
4400         return error;
4401 }
4402
4403 static void
4404 bridge_add_bif_handler(netmsg_t msg)
4405 {
4406         struct netmsg_braddbif *amsg = (struct netmsg_braddbif *)msg;
4407         struct bridge_softc *sc;
4408         struct bridge_iflist *bif;
4409
4410         sc = amsg->br_softc;
4411
4412         bif = kmalloc(sizeof(*bif), M_DEVBUF, M_WAITOK | M_ZERO);
4413         bif->bif_ifp = amsg->br_bif_ifp;
4414         bif->bif_onlist = 1;
4415         bif->bif_info = amsg->br_bif_info;
4416
4417         /*
4418          * runs through bif_info
4419          */
4420         bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
4421
4422         TAILQ_INSERT_HEAD(&sc->sc_iflists[mycpuid], bif, bif_next);
4423
4424         netisr_forwardmsg(&amsg->base, mycpuid + 1);
4425 }
4426
4427 static void
4428 bridge_add_bif(struct bridge_softc *sc, struct bridge_ifinfo *bif_info,
4429                struct ifnet *ifp)
4430 {
4431         struct netmsg_braddbif amsg;
4432
4433         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
4434
4435         netmsg_init(&amsg.base, NULL, &curthread->td_msgport,
4436                     0, bridge_add_bif_handler);
4437         amsg.br_softc = sc;
4438         amsg.br_bif_info = bif_info;
4439         amsg.br_bif_ifp = ifp;
4440
4441         netisr_domsg(&amsg.base, 0);
4442 }
4443
4444 static void
4445 bridge_del_bif_handler(netmsg_t msg)
4446 {
4447         struct netmsg_brdelbif *dmsg = (struct netmsg_brdelbif *)msg;
4448         struct bridge_softc *sc;
4449         struct bridge_iflist *bif;
4450
4451         sc = dmsg->br_softc;
4452
4453         /*
4454          * Locate the bif associated with the br_bif_info
4455          * on the current CPU
4456          */
4457         bif = bridge_lookup_member_ifinfo(sc, dmsg->br_bif_info);
4458         KKASSERT(bif != NULL && bif->bif_onlist);
4459
4460         /* Remove the bif from the current CPU's iflist */
4461         bif->bif_onlist = 0;
4462         TAILQ_REMOVE(dmsg->br_bif_list, bif, bif_next);
4463
4464         /* Save the removed bif for later freeing */
4465         TAILQ_INSERT_HEAD(dmsg->br_bif_list, bif, bif_next);
4466
4467         netisr_forwardmsg(&dmsg->base, mycpuid + 1);
4468 }
4469
4470 static void
4471 bridge_del_bif(struct bridge_softc *sc, struct bridge_ifinfo *bif_info,
4472                struct bridge_iflist_head *saved_bifs)
4473 {
4474         struct netmsg_brdelbif dmsg;
4475
4476         ASSERT_IFNET_NOT_SERIALIZED_ALL(sc->sc_ifp);
4477
4478         netmsg_init(&dmsg.base, NULL, &curthread->td_msgport,
4479                     0, bridge_del_bif_handler);
4480         dmsg.br_softc = sc;
4481         dmsg.br_bif_info = bif_info;
4482         dmsg.br_bif_list = saved_bifs;
4483
4484         netisr_domsg(&dmsg.base, 0);
4485 }