carp: Add TSO and TXCSUM support
[dragonfly.git] / sys / netinet / ip_carp.c
1 /*
2  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
3  * Copyright (c) 2003 Ryan McBride. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
18  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
24  * THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 /*
27  * $FreeBSD: src/sys/netinet/ip_carp.c,v 1.48 2007/02/02 09:39:09 glebius Exp $
28  */
29
30 #include "opt_carp.h"
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/in_cksum.h>
38 #include <sys/limits.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/msgport2.h>
42 #include <sys/time.h>
43 #include <sys/proc.h>
44 #include <sys/priv.h>
45 #include <sys/sockio.h>
46 #include <sys/socket.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
49 #include <sys/thread.h>
50
51 #include <machine/stdarg.h>
52 #include <crypto/sha1.h>
53
54 #include <net/bpf.h>
55 #include <net/ethernet.h>
56 #include <net/if.h>
57 #include <net/if_dl.h>
58 #include <net/if_types.h>
59 #include <net/route.h>
60 #include <net/if_clone.h>
61 #include <net/if_var.h>
62 #include <net/ifq_var.h>
63 #include <net/netmsg2.h>
64 #include <net/netisr2.h>
65
66 #ifdef INET
67 #include <netinet/in.h>
68 #include <netinet/in_var.h>
69 #include <netinet/in_systm.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip_var.h>
72 #include <netinet/if_ether.h>
73 #endif
74
75 #ifdef INET6
76 #include <netinet/icmp6.h>
77 #include <netinet/ip6.h>
78 #include <netinet6/ip6_var.h>
79 #include <netinet6/scope6_var.h>
80 #include <netinet6/nd6.h>
81 #endif
82
83 #include <netinet/ip_carp.h>
84
85 /*
86  * Note about carp's MP safe approach:
87  *
88  * Brief: carp_softc (softc), carp_softc_container (scc)
89  *
90  * - All configuration operation, e.g. ioctl, add/delete inet addresses
91  *   is serialized by netisr0; not by carp's serializer
92  *
93  * - Backing interface's if_carp and carp_softc's relationship:
94  *
95  *                +---------+
96  *     if_carp -->| carp_if |
97  *                +---------+
98  *                     |
99  *                     |
100  *                     V      +---------+
101  *                  +-----+   |         |
102  *                  | scc |-->|  softc  |
103  *                  +-----+   |         |
104  *                     |      +---------+
105  *                     |
106  *                     V      +---------+
107  *                  +-----+   |         |
108  *                  | scc |-->|  softc  |
109  *                  +-----+   |         |
110  *                            +---------+
111  *
112  * - if_carp creation, modification and deletion all happen in netisr0,
113  *   as stated previously.  Since if_carp is accessed by multiple netisrs,
114  *   the modification to if_carp is conducted in the following way:
115  *
116  *   Adding carp_softc:
117  *
118  *   1) Duplicate the old carp_if to new carp_if (ncif), and insert the
119  *      to-be-added carp_softc to the new carp_if (ncif):
120  *
121  *        if_carp                     ncif
122  *           |                         |
123  *           V                         V
124  *      +---------+               +---------+
125  *      | carp_if |               | carp_if |
126  *      +---------+               +---------+
127  *           |                         |
128  *           |                         |
129  *           V        +-------+        V
130  *        +-----+     |       |     +-----+
131  *        | scc |---->| softc |<----| scc |
132  *        +-----+     |       |     +-----+
133  *           |        +-------+        |
134  *           |                         |
135  *           V        +-------+        V
136  *        +-----+     |       |     +-----+
137  *        | scc |---->| softc |<----| scc |
138  *        +-----+     |       |     +-----+
139  *                    +-------+        |
140  *                                     |
141  *                    +-------+        V
142  *                    |       |     +-----+
143  *                    | softc |<----| scc |
144  *                    |       |     +-----+
145  *                    +-------+
146  *
147  *   2) Switch save if_carp into ocif and switch if_carp to ncif:
148  *      
149  *          ocif                    if_carp
150  *           |                         |
151  *           V                         V
152  *      +---------+               +---------+
153  *      | carp_if |               | carp_if |
154  *      +---------+               +---------+
155  *           |                         |
156  *           |                         |
157  *           V        +-------+        V
158  *        +-----+     |       |     +-----+
159  *        | scc |---->| softc |<----| scc |
160  *        +-----+     |       |     +-----+
161  *           |        +-------+        |
162  *           |                         |
163  *           V        +-------+        V
164  *        +-----+     |       |     +-----+
165  *        | scc |---->| softc |<----| scc |
166  *        +-----+     |       |     +-----+
167  *                    +-------+        |
168  *                                     |
169  *                    +-------+        V
170  *                    |       |     +-----+
171  *                    | softc |<----| scc |
172  *                    |       |     +-----+
173  *                    +-------+
174  *
175  *   3) Run netmsg_service_sync(), which will make sure that
176  *      ocif is no longer accessed (all network operations
177  *      are happened only in network threads).
178  *   4) Free ocif -- only carp_if and scc are freed.
179  *
180  *
181  *   Removing carp_softc:
182  *
183  *   1) Duplicate the old carp_if to new carp_if (ncif); the to-be-deleted
184  *      carp_softc will not be duplicated.
185  *
186  *        if_carp                     ncif
187  *           |                         |
188  *           V                         V
189  *      +---------+               +---------+
190  *      | carp_if |               | carp_if |
191  *      +---------+               +---------+
192  *           |                         |
193  *           |                         |
194  *           V        +-------+        V
195  *        +-----+     |       |     +-----+
196  *        | scc |---->| softc |<----| scc |
197  *        +-----+     |       |     +-----+
198  *           |        +-------+        |
199  *           |                         |
200  *           V        +-------+        |
201  *        +-----+     |       |        |
202  *        | scc |---->| softc |        |
203  *        +-----+     |       |        |
204  *           |        +-------+        |
205  *           |                         |
206  *           V        +-------+        V
207  *        +-----+     |       |     +-----+
208  *        | scc |---->| softc |<----| scc |
209  *        +-----+     |       |     +-----+
210  *                    +-------+
211  *
212  *   2) Switch save if_carp into ocif and switch if_carp to ncif:
213  *      
214  *          ocif                    if_carp
215  *           |                         |
216  *           V                         V
217  *      +---------+               +---------+
218  *      | carp_if |               | carp_if |
219  *      +---------+               +---------+
220  *           |                         |
221  *           |                         |
222  *           V        +-------+        V
223  *        +-----+     |       |     +-----+
224  *        | scc |---->| softc |<----| scc |
225  *        +-----+     |       |     +-----+
226  *           |        +-------+        |
227  *           |                         |
228  *           V        +-------+        |
229  *        +-----+     |       |        |
230  *        | scc |---->| softc |        |
231  *        +-----+     |       |        |
232  *           |        +-------+        |
233  *           |                         |
234  *           V        +-------+        V
235  *        +-----+     |       |     +-----+
236  *        | scc |---->| softc |<----| scc |
237  *        +-----+     |       |     +-----+
238  *                    +-------+
239  *
240  *   3) Run netmsg_service_sync(), which will make sure that
241  *      ocif is no longer accessed (all network operations
242  *      are happened only in network threads).
243  *   4) Free ocif -- only carp_if and scc are freed.
244  *
245  * - if_carp accessing:
246  *   The accessing code should cache the if_carp in a local temporary
247  *   variable and accessing the temporary variable along the code path
248  *   instead of accessing if_carp later on.
249  */
250
251 #define CARP_IFNAME             "carp"
252 #define CARP_IS_RUNNING(ifp)    \
253         (((ifp)->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING))
254
255 struct carp_softc;
256
257 struct carp_vhaddr {
258         uint32_t                vha_flags;      /* CARP_VHAF_ */
259         struct in_ifaddr        *vha_ia;        /* carp address */
260         struct in_ifaddr        *vha_iaback;    /* backing address */
261         TAILQ_ENTRY(carp_vhaddr) vha_link;
262 };
263 TAILQ_HEAD(carp_vhaddr_list, carp_vhaddr);
264
265 struct netmsg_carp {
266         struct netmsg_base      base;
267         struct ifnet            *nc_carpdev;
268         struct carp_softc       *nc_softc;
269         void                    *nc_data;
270         size_t                  nc_datalen;
271 };
272
273 struct carp_softc {
274         struct arpcom            arpcom;
275         struct ifnet            *sc_carpdev;    /* parent interface */
276         struct carp_vhaddr_list  sc_vha_list;   /* virtual addr list */
277
278         const struct in_ifaddr  *sc_ia;         /* primary iface address v4 */
279         struct ip_moptions       sc_imo;
280
281 #ifdef INET6
282         struct in6_ifaddr       *sc_ia6;        /* primary iface address v6 */
283         struct ip6_moptions      sc_im6o;
284 #endif /* INET6 */
285
286         enum { INIT = 0, BACKUP, MASTER }
287                                  sc_state;
288         boolean_t                sc_dead;
289
290         int                      sc_suppress;
291
292         int                      sc_sendad_errors;
293 #define CARP_SENDAD_MAX_ERRORS  3
294         int                      sc_sendad_success;
295 #define CARP_SENDAD_MIN_SUCCESS 3
296
297         int                      sc_vhid;
298         int                      sc_advskew;
299         int                      sc_naddrs;     /* actually used IPv4 vha */
300         int                      sc_naddrs6;
301         int                      sc_advbase;    /* seconds */
302         int                      sc_init_counter;
303         uint64_t                 sc_counter;
304
305         /* authentication */
306 #define CARP_HMAC_PAD   64
307         unsigned char            sc_key[CARP_KEY_LEN];
308         unsigned char            sc_pad[CARP_HMAC_PAD];
309         SHA1_CTX                 sc_sha1;
310
311         struct callout           sc_ad_tmo;     /* advertisement timeout */
312         struct netmsg_carp       sc_ad_msg;     /* adv timeout netmsg */
313         struct callout           sc_md_tmo;     /* ip4 master down timeout */
314         struct callout           sc_md6_tmo;    /* ip6 master down timeout */
315         struct netmsg_carp       sc_md_msg;     /* master down timeout netmsg */
316
317         LIST_ENTRY(carp_softc)   sc_next;       /* Interface clue */
318 };
319
320 #define sc_if   arpcom.ac_if
321
322 struct carp_softc_container {
323         TAILQ_ENTRY(carp_softc_container) scc_link;
324         struct carp_softc       *scc_softc;
325 };
326 TAILQ_HEAD(carp_if, carp_softc_container);
327
328 SYSCTL_DECL(_net_inet_carp);
329
330 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */
331 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
332     &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
333 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
334     &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
335 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
336     &carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
337 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
338     &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
339
340 static int carp_suppress_preempt = 0;
341 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
342     &carp_suppress_preempt, 0, "Preemption is suppressed");
343
344 static struct carpstats carpstats;
345 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
346     &carpstats, carpstats,
347     "CARP statistics (struct carpstats, netinet/ip_carp.h)");
348
349 #define CARP_LOG(...)   do {                            \
350         if (carp_opts[CARPCTL_LOG] > 0)                 \
351                 log(LOG_INFO, __VA_ARGS__);             \
352 } while (0)
353
354 #define CARP_DEBUG(...) do {                            \
355         if (carp_opts[CARPCTL_LOG] > 1)                 \
356                 log(LOG_DEBUG, __VA_ARGS__);            \
357 } while (0)
358
359 static struct lwkt_token carp_listtok = LWKT_TOKEN_INITIALIZER(carp_list_token);
360
361 static void     carp_hmac_prepare(struct carp_softc *);
362 static void     carp_hmac_generate(struct carp_softc *, uint32_t *,
363                     unsigned char *);
364 static int      carp_hmac_verify(struct carp_softc *, uint32_t *,
365                     unsigned char *);
366 static void     carp_setroute(struct carp_softc *, int);
367 static void     carp_proto_input_c(struct carp_softc *, struct mbuf *,
368                     struct carp_header *, sa_family_t);
369 static int      carp_clone_create(struct if_clone *, int, caddr_t);
370 static int      carp_clone_destroy(struct ifnet *);
371 static void     carp_detach(struct carp_softc *, boolean_t, boolean_t);
372 static void     carp_prepare_ad(struct carp_softc *, struct carp_header *);
373 static void     carp_send_ad_all(void);
374 static void     carp_send_ad_timeout(void *);
375 static void     carp_send_ad(struct carp_softc *);
376 static void     carp_send_arp(struct carp_softc *);
377 static void     carp_master_down_timeout(void *);
378 static void     carp_master_down(struct carp_softc *);
379 static void     carp_setrun(struct carp_softc *, sa_family_t);
380 static void     carp_set_state(struct carp_softc *, int);
381 static struct ifnet *carp_forus(struct carp_if *, const uint8_t *);
382
383 static void     carp_init(void *);
384 static int      carp_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
385 static int      carp_output(struct ifnet *, struct mbuf *, struct sockaddr *,
386                     struct rtentry *);
387 static void     carp_start(struct ifnet *, struct ifaltq_subque *);
388
389 static void     carp_multicast_cleanup(struct carp_softc *);
390 static void     carp_add_addr(struct carp_softc *, struct ifaddr *);
391 static void     carp_del_addr(struct carp_softc *, struct ifaddr *);
392 static void     carp_config_addr(struct carp_softc *, struct ifaddr *);
393 static void     carp_link_addrs(struct carp_softc *, struct ifnet *,
394                     struct ifaddr *);
395 static void     carp_unlink_addrs(struct carp_softc *, struct ifnet *,
396                     struct ifaddr *);
397 static void     carp_update_addrs(struct carp_softc *, struct ifaddr *);
398
399 static int      carp_config_vhaddr(struct carp_softc *, struct carp_vhaddr *,
400                     struct in_ifaddr *);
401 static int      carp_activate_vhaddr(struct carp_softc *, struct carp_vhaddr *,
402                     struct ifnet *, struct in_ifaddr *, int);
403 static void     carp_deactivate_vhaddr(struct carp_softc *,
404                     struct carp_vhaddr *, boolean_t);
405 static int      carp_addroute_vhaddr(struct carp_softc *, struct carp_vhaddr *);
406 static void     carp_delroute_vhaddr(struct carp_softc *, struct carp_vhaddr *,
407                     boolean_t);
408
409 #ifdef foo
410 static void     carp_sc_state(struct carp_softc *);
411 #endif
412 #ifdef INET6
413 static void     carp_send_na(struct carp_softc *);
414 #ifdef notyet
415 static int      carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
416 static int      carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
417 #endif
418 static void     carp_multicast6_cleanup(struct carp_softc *);
419 #endif
420 static void     carp_stop(struct carp_softc *, boolean_t);
421 static void     carp_suspend(struct carp_softc *, boolean_t);
422 static void     carp_ioctl_stop(struct carp_softc *);
423 static int      carp_ioctl_setvh(struct carp_softc *, void *, struct ucred *);
424 static void     carp_ioctl_ifcap(struct carp_softc *, int);
425 static int      carp_ioctl_getvh(struct carp_softc *, void *, struct ucred *);
426 static int      carp_ioctl_getdevname(struct carp_softc *, struct ifdrv *);
427 static int      carp_ioctl_getvhaddr(struct carp_softc *, struct ifdrv *);
428
429 static struct carp_if *carp_if_remove(struct carp_if *, struct carp_softc *);
430 static struct carp_if *carp_if_insert(struct carp_if *, struct carp_softc *);
431 static void     carp_if_free(struct carp_if *);
432
433 static void     carp_ifaddr(void *, struct ifnet *, enum ifaddr_event,
434                             struct ifaddr *);
435 static void     carp_ifdetach(void *, struct ifnet *);
436
437 static void     carp_ifdetach_dispatch(netmsg_t);
438 static void     carp_clone_destroy_dispatch(netmsg_t);
439 static void     carp_init_dispatch(netmsg_t);
440 static void     carp_ioctl_stop_dispatch(netmsg_t);
441 static void     carp_ioctl_setvh_dispatch(netmsg_t);
442 static void     carp_ioctl_ifcap_dispatch(netmsg_t);
443 static void     carp_ioctl_getvh_dispatch(netmsg_t);
444 static void     carp_ioctl_getdevname_dispatch(netmsg_t);
445 static void     carp_ioctl_getvhaddr_dispatch(netmsg_t);
446 static void     carp_send_ad_timeout_dispatch(netmsg_t);
447 static void     carp_master_down_timeout_dispatch(netmsg_t);
448
449 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
450
451 static LIST_HEAD(, carp_softc) carpif_list;
452
453 static struct if_clone carp_cloner =
454 IF_CLONE_INITIALIZER(CARP_IFNAME, carp_clone_create, carp_clone_destroy,
455                      0, IF_MAXUNIT);
456
457 static uint8_t  carp_etheraddr[ETHER_ADDR_LEN] = { 0, 0, 0x5e, 0, 1, 0 };
458
459 static eventhandler_tag carp_ifdetach_event;
460 static eventhandler_tag carp_ifaddr_event;
461
462 static __inline void
463 carp_insert_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha_new)
464 {
465         struct carp_vhaddr *vha;
466         u_long new_addr, addr;
467
468         KKASSERT((vha_new->vha_flags & CARP_VHAF_ONLIST) == 0);
469
470         /*
471          * Virtual address list is sorted; smaller one first
472          */
473         new_addr = ntohl(vha_new->vha_ia->ia_addr.sin_addr.s_addr);
474
475         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
476                 addr = ntohl(vha->vha_ia->ia_addr.sin_addr.s_addr);
477
478                 if (addr > new_addr)
479                         break;
480         }
481         if (vha == NULL)
482                 TAILQ_INSERT_TAIL(&sc->sc_vha_list, vha_new, vha_link);
483         else
484                 TAILQ_INSERT_BEFORE(vha, vha_new, vha_link);
485         vha_new->vha_flags |= CARP_VHAF_ONLIST;
486 }
487
488 static __inline void
489 carp_remove_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha)
490 {
491         KKASSERT(vha->vha_flags & CARP_VHAF_ONLIST);
492         vha->vha_flags &= ~CARP_VHAF_ONLIST;
493         TAILQ_REMOVE(&sc->sc_vha_list, vha, vha_link);
494 }
495
496 static void
497 carp_hmac_prepare(struct carp_softc *sc)
498 {
499         uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
500         uint8_t vhid = sc->sc_vhid & 0xff;
501         int i;
502 #ifdef INET6
503         struct ifaddr_container *ifac;
504         struct in6_addr in6;
505 #endif
506 #ifdef INET
507         struct carp_vhaddr *vha;
508 #endif
509
510         /* XXX: possible race here */
511
512         /* compute ipad from key */
513         bzero(sc->sc_pad, sizeof(sc->sc_pad));
514         bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
515         for (i = 0; i < sizeof(sc->sc_pad); i++)
516                 sc->sc_pad[i] ^= 0x36;
517
518         /* precompute first part of inner hash */
519         SHA1Init(&sc->sc_sha1);
520         SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
521         SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
522         SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
523         SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
524 #ifdef INET
525         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
526                 SHA1Update(&sc->sc_sha1,
527                     (const uint8_t *)&vha->vha_ia->ia_addr.sin_addr,
528                     sizeof(struct in_addr));
529         }
530 #endif /* INET */
531 #ifdef INET6
532         TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
533                 struct ifaddr *ifa = ifac->ifa;
534
535                 if (ifa->ifa_addr->sa_family == AF_INET6) {
536                         in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
537                         in6_clearscope(&in6);
538                         SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6));
539                 }
540         }
541 #endif /* INET6 */
542
543         /* convert ipad to opad */
544         for (i = 0; i < sizeof(sc->sc_pad); i++)
545                 sc->sc_pad[i] ^= 0x36 ^ 0x5c;
546 }
547
548 static void
549 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
550     unsigned char md[20])
551 {
552         SHA1_CTX sha1ctx;
553
554         /* fetch first half of inner hash */
555         bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
556
557         SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
558         SHA1Final(md, &sha1ctx);
559
560         /* outer hash */
561         SHA1Init(&sha1ctx);
562         SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
563         SHA1Update(&sha1ctx, md, 20);
564         SHA1Final(md, &sha1ctx);
565 }
566
567 static int
568 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
569     unsigned char md[20])
570 {
571         unsigned char md2[20];
572
573         carp_hmac_generate(sc, counter, md2);
574         return (bcmp(md, md2, sizeof(md2)));
575 }
576
577 static void
578 carp_setroute(struct carp_softc *sc, int cmd)
579 {
580 #ifdef INET6
581         struct ifaddr_container *ifac;
582 #endif
583         struct carp_vhaddr *vha;
584
585         KKASSERT(cmd == RTM_DELETE || cmd == RTM_ADD);
586
587         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
588                 if (vha->vha_iaback == NULL)
589                         continue;
590                 if (cmd == RTM_DELETE)
591                         carp_delroute_vhaddr(sc, vha, FALSE);
592                 else
593                         carp_addroute_vhaddr(sc, vha);
594         }
595
596 #ifdef INET6
597         TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
598                 struct ifaddr *ifa = ifac->ifa;
599
600                 if (ifa->ifa_addr->sa_family == AF_INET6) {
601                         if (cmd == RTM_ADD)
602                                 in6_ifaddloop(ifa);
603                         else
604                                 in6_ifremloop(ifa);
605                 }
606         }
607 #endif /* INET6 */
608 }
609
610 static int
611 carp_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused)
612 {
613         struct carp_softc *sc;
614         struct ifnet *ifp;
615
616         sc = kmalloc(sizeof(*sc), M_CARP, M_WAITOK | M_ZERO);
617         ifp = &sc->sc_if;
618
619         sc->sc_suppress = 0;
620         sc->sc_advbase = CARP_DFLTINTV;
621         sc->sc_vhid = -1;       /* required setting */
622         sc->sc_advskew = 0;
623         sc->sc_init_counter = 1;
624         sc->sc_naddrs = 0;
625         sc->sc_naddrs6 = 0;
626
627         TAILQ_INIT(&sc->sc_vha_list);
628
629 #ifdef INET6
630         sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
631 #endif
632
633         callout_init_mp(&sc->sc_ad_tmo);
634         netmsg_init(&sc->sc_ad_msg.base, NULL, &netisr_adone_rport,
635             MSGF_DROPABLE | MSGF_PRIORITY, carp_send_ad_timeout_dispatch);
636         sc->sc_ad_msg.nc_softc = sc;
637
638         callout_init_mp(&sc->sc_md_tmo);
639         callout_init_mp(&sc->sc_md6_tmo);
640         netmsg_init(&sc->sc_md_msg.base, NULL, &netisr_adone_rport,
641             MSGF_DROPABLE | MSGF_PRIORITY, carp_master_down_timeout_dispatch);
642         sc->sc_md_msg.nc_softc = sc;
643
644         if_initname(ifp, CARP_IFNAME, unit);
645         ifp->if_softc = sc;
646         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
647         ifp->if_init = carp_init;
648         ifp->if_ioctl = carp_ioctl;
649         ifp->if_start = carp_start;
650
651         ifp->if_capabilities = IFCAP_TXCSUM | IFCAP_TSO;
652         ifp->if_capenable = ifp->if_capabilities;
653         /*
654          * Leave if_hwassist as it is; if_hwassist will be
655          * setup when this carp interface has parent.
656          */
657
658         ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
659         ifq_set_ready(&ifp->if_snd);
660
661         ether_ifattach(ifp, carp_etheraddr, NULL);
662
663         ifp->if_type = IFT_CARP;
664         ifp->if_output = carp_output;
665
666         lwkt_gettoken(&carp_listtok);
667         LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
668         lwkt_reltoken(&carp_listtok);
669
670         return (0);
671 }
672
673 static void
674 carp_clone_destroy_dispatch(netmsg_t msg)
675 {
676         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
677         struct carp_softc *sc = cmsg->nc_softc;
678
679         sc->sc_dead = TRUE;
680         carp_detach(sc, TRUE, FALSE);
681
682         callout_stop_sync(&sc->sc_ad_tmo);
683         callout_stop_sync(&sc->sc_md_tmo);
684         callout_stop_sync(&sc->sc_md6_tmo);
685
686         crit_enter();
687         lwkt_dropmsg(&sc->sc_ad_msg.base.lmsg);
688         lwkt_dropmsg(&sc->sc_md_msg.base.lmsg);
689         crit_exit();
690
691         lwkt_replymsg(&cmsg->base.lmsg, 0);
692 }
693
694 static int
695 carp_clone_destroy(struct ifnet *ifp)
696 {
697         struct carp_softc *sc = ifp->if_softc;
698         struct netmsg_carp cmsg;
699
700         bzero(&cmsg, sizeof(cmsg));
701         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
702             carp_clone_destroy_dispatch);
703         cmsg.nc_softc = sc;
704
705         lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
706
707         lwkt_gettoken(&carp_listtok);
708         LIST_REMOVE(sc, sc_next);
709         lwkt_reltoken(&carp_listtok);
710
711         bpfdetach(ifp);
712         if_detach(ifp);
713
714         KASSERT(sc->sc_naddrs == 0, ("certain inet address is still active"));
715         kfree(sc, M_CARP);
716
717         return 0;
718 }
719
720 static struct carp_if *
721 carp_if_remove(struct carp_if *ocif, struct carp_softc *sc)
722 {
723         struct carp_softc_container *oscc, *scc;
724         struct carp_if *cif;
725         int count = 0;
726 #ifdef INVARIANTS
727         int found = 0;
728 #endif
729
730         TAILQ_FOREACH(oscc, ocif, scc_link) {
731                 ++count;
732 #ifdef INVARIANTS
733                 if (oscc->scc_softc == sc)
734                         found = 1;
735 #endif
736         }
737         KASSERT(found, ("%s carp_softc is not on carp_if", __func__));
738
739         if (count == 1) {
740                 /* Last one is going to be unlinked */
741                 return NULL;
742         }
743
744         cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
745         TAILQ_INIT(cif);
746
747         TAILQ_FOREACH(oscc, ocif, scc_link) {
748                 if (oscc->scc_softc == sc)
749                         continue;
750
751                 scc = kmalloc(sizeof(*scc), M_CARP, M_WAITOK | M_ZERO);
752                 scc->scc_softc = oscc->scc_softc;
753                 TAILQ_INSERT_TAIL(cif, scc, scc_link);
754         }
755
756         return cif;
757 }
758
759 static struct carp_if *
760 carp_if_insert(struct carp_if *ocif, struct carp_softc *sc)
761 {
762         struct carp_softc_container *oscc;
763         int onlist;
764
765         onlist = 0;
766         if (ocif != NULL) {
767                 TAILQ_FOREACH(oscc, ocif, scc_link) {
768                         if (oscc->scc_softc == sc)
769                                 onlist = 1;
770                 }
771         }
772
773 #ifdef INVARIANTS
774         if (sc->sc_carpdev != NULL) {
775                 KASSERT(onlist, ("%s is not on %s carp list",
776                     sc->sc_if.if_xname, sc->sc_carpdev->if_xname));
777         } else {
778                 KASSERT(!onlist, ("%s is already on carp list",
779                     sc->sc_if.if_xname));
780         }
781 #endif
782
783         if (!onlist) {
784                 struct carp_if *cif;
785                 struct carp_softc_container *new_scc, *scc;
786                 int inserted = 0;
787
788                 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
789                 TAILQ_INIT(cif);
790
791                 new_scc = kmalloc(sizeof(*new_scc), M_CARP, M_WAITOK | M_ZERO);
792                 new_scc->scc_softc = sc;
793
794                 if (ocif != NULL) {
795                         TAILQ_FOREACH(oscc, ocif, scc_link) {
796                                 if (!inserted &&
797                                     oscc->scc_softc->sc_vhid > sc->sc_vhid) {
798                                         TAILQ_INSERT_TAIL(cif, new_scc,
799                                             scc_link);
800                                         inserted = 1;
801                                 }
802
803                                 scc = kmalloc(sizeof(*scc), M_CARP,
804                                     M_WAITOK | M_ZERO);
805                                 scc->scc_softc = oscc->scc_softc;
806                                 TAILQ_INSERT_TAIL(cif, scc, scc_link);
807                         }
808                 }
809                 if (!inserted)
810                         TAILQ_INSERT_TAIL(cif, new_scc, scc_link);
811
812                 return cif;
813         } else {
814                 return ocif;
815         }
816 }
817
818 static void
819 carp_if_free(struct carp_if *cif)
820 {
821         struct carp_softc_container *scc;
822
823         while ((scc = TAILQ_FIRST(cif)) != NULL) {
824                 TAILQ_REMOVE(cif, scc, scc_link);
825                 kfree(scc, M_CARP);
826         }
827         kfree(cif, M_CARP);
828 }
829
830 static void
831 carp_detach(struct carp_softc *sc, boolean_t detach, boolean_t del_iaback)
832 {
833         carp_suspend(sc, detach);
834
835         carp_multicast_cleanup(sc);
836 #ifdef INET6
837         carp_multicast6_cleanup(sc);
838 #endif
839
840         if (!sc->sc_dead && detach) {
841                 struct carp_vhaddr *vha;
842
843                 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
844                         carp_deactivate_vhaddr(sc, vha, del_iaback);
845                 KKASSERT(sc->sc_naddrs == 0);
846         }
847
848         if (sc->sc_carpdev != NULL) {
849                 struct ifnet *ifp = sc->sc_carpdev;
850                 struct carp_if *ocif = ifp->if_carp;
851
852                 ifp->if_carp = carp_if_remove(ocif, sc);
853                 KASSERT(ifp->if_carp != ocif,
854                     ("%s carp_if_remove failed", __func__));
855
856                 sc->sc_carpdev = NULL;
857                 sc->sc_ia = NULL;
858                 sc->arpcom.ac_if.if_hwassist = 0;
859
860                 /*
861                  * Make sure that all protocol threads see the
862                  * sc_carpdev and if_carp changes
863                  */
864                 netmsg_service_sync();
865
866                 if (ifp->if_carp == NULL) {
867                         /*
868                          * No more carp interfaces using
869                          * ifp as the backing interface,
870                          * move it out of promiscous mode.
871                          */
872                         ifpromisc(ifp, 0);
873                 }
874
875                 /*
876                  * The old carp list could be safely free now,
877                  * since no one can access it.
878                  */
879                 carp_if_free(ocif);
880         }
881 }
882
883 static void
884 carp_ifdetach_dispatch(netmsg_t msg)
885 {
886         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
887         struct ifnet *ifp = cmsg->nc_carpdev;
888
889         while (ifp->if_carp) {
890                 struct carp_softc_container *scc;
891
892                 scc = TAILQ_FIRST((struct carp_if *)(ifp->if_carp));
893                 carp_detach(scc->scc_softc, TRUE, TRUE);
894         }
895         lwkt_replymsg(&cmsg->base.lmsg, 0);
896 }
897
898 /* Detach an interface from the carp. */
899 static void
900 carp_ifdetach(void *arg __unused, struct ifnet *ifp)
901 {
902         struct netmsg_carp cmsg;
903
904         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
905
906         bzero(&cmsg, sizeof(cmsg));
907         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
908             carp_ifdetach_dispatch);
909         cmsg.nc_carpdev = ifp;
910
911         lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
912 }
913
914 /*
915  * process input packet.
916  * we have rearranged checks order compared to the rfc,
917  * but it seems more efficient this way or not possible otherwise.
918  */
919 int
920 carp_proto_input(struct mbuf **mp, int *offp, int proto)
921 {
922         struct mbuf *m = *mp;
923         struct ip *ip = mtod(m, struct ip *);
924         struct ifnet *ifp = m->m_pkthdr.rcvif;
925         struct carp_header *ch;
926         struct carp_softc *sc;
927         int len, iphlen;
928
929         iphlen = *offp;
930         *mp = NULL;
931
932         carpstats.carps_ipackets++;
933
934         if (!carp_opts[CARPCTL_ALLOW]) {
935                 m_freem(m);
936                 goto back;
937         }
938
939         /* Check if received on a valid carp interface */
940         if (ifp->if_type != IFT_CARP) {
941                 carpstats.carps_badif++;
942                 CARP_LOG("carp_proto_input: packet received on non-carp "
943                     "interface: %s\n", ifp->if_xname);
944                 m_freem(m);
945                 goto back;
946         }
947
948         if (!CARP_IS_RUNNING(ifp)) {
949                 carpstats.carps_badif++;
950                 CARP_LOG("carp_proto_input: packet received on stopped carp "
951                     "interface: %s\n", ifp->if_xname);
952                 m_freem(m);
953                 goto back;
954         }
955
956         sc = ifp->if_softc;
957         if (sc->sc_carpdev == NULL) {
958                 carpstats.carps_badif++;
959                 CARP_LOG("carp_proto_input: packet received on defunc carp "
960                     "interface: %s\n", ifp->if_xname);
961                 m_freem(m);
962                 goto back;
963         }
964
965         if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
966                 carpstats.carps_badif++;
967                 CARP_LOG("carp_proto_input: non-mcast packet on "
968                     "interface: %s\n", ifp->if_xname);
969                 m_freem(m);
970                 goto back;
971         }
972
973         /* Verify that the IP TTL is CARP_DFLTTL. */
974         if (ip->ip_ttl != CARP_DFLTTL) {
975                 carpstats.carps_badttl++;
976                 CARP_LOG("carp_proto_input: received ttl %d != %d on %s\n",
977                     ip->ip_ttl, CARP_DFLTTL, ifp->if_xname);
978                 m_freem(m);
979                 goto back;
980         }
981
982         /* Minimal CARP packet size */
983         len = iphlen + sizeof(*ch);
984
985         /*
986          * Verify that the received packet length is
987          * not less than the CARP header
988          */
989         if (m->m_pkthdr.len < len) {
990                 carpstats.carps_badlen++;
991                 CARP_LOG("packet too short %d on %s\n", m->m_pkthdr.len,
992                     ifp->if_xname);
993                 m_freem(m);
994                 goto back;
995         }
996
997         /* Make sure that CARP header is contiguous */
998         if (len > m->m_len) {
999                 m = m_pullup(m, len);
1000                 if (m == NULL) {
1001                         carpstats.carps_hdrops++;
1002                         CARP_LOG("carp_proto_input: m_pullup failed\n");
1003                         goto back;
1004                 }
1005                 ip = mtod(m, struct ip *);
1006         }
1007         ch = (struct carp_header *)((uint8_t *)ip + iphlen);
1008
1009         /* Verify the CARP checksum */
1010         if (in_cksum_skip(m, len, iphlen)) {
1011                 carpstats.carps_badsum++;
1012                 CARP_LOG("carp_proto_input: checksum failed on %s\n",
1013                     ifp->if_xname);
1014                 m_freem(m);
1015                 goto back;
1016         }
1017         carp_proto_input_c(sc, m, ch, AF_INET);
1018 back:
1019         return(IPPROTO_DONE);
1020 }
1021
1022 #ifdef INET6
1023 int
1024 carp6_proto_input(struct mbuf **mp, int *offp, int proto)
1025 {
1026         struct mbuf *m = *mp;
1027         struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1028         struct ifnet *ifp = m->m_pkthdr.rcvif;
1029         struct carp_header *ch;
1030         struct carp_softc *sc;
1031         u_int len;
1032
1033         carpstats.carps_ipackets6++;
1034
1035         if (!carp_opts[CARPCTL_ALLOW]) {
1036                 m_freem(m);
1037                 goto back;
1038         }
1039
1040         /* check if received on a valid carp interface */
1041         if (ifp->if_type != IFT_CARP) {
1042                 carpstats.carps_badif++;
1043                 CARP_LOG("carp6_proto_input: packet received on non-carp "
1044                     "interface: %s\n", ifp->if_xname);
1045                 m_freem(m);
1046                 goto back;
1047         }
1048
1049         if (!CARP_IS_RUNNING(ifp)) {
1050                 carpstats.carps_badif++;
1051                 CARP_LOG("carp_proto_input: packet received on stopped carp "
1052                     "interface: %s\n", ifp->if_xname);
1053                 m_freem(m);
1054                 goto back;
1055         }
1056
1057         sc = ifp->if_softc;
1058         if (sc->sc_carpdev == NULL) {
1059                 carpstats.carps_badif++;
1060                 CARP_LOG("carp6_proto_input: packet received on defunc-carp "
1061                     "interface: %s\n", ifp->if_xname);
1062                 m_freem(m);
1063                 goto back;
1064         }
1065
1066         /* verify that the IP TTL is 255 */
1067         if (ip6->ip6_hlim != CARP_DFLTTL) {
1068                 carpstats.carps_badttl++;
1069                 CARP_LOG("carp6_proto_input: received ttl %d != 255 on %s\n",
1070                     ip6->ip6_hlim, ifp->if_xname);
1071                 m_freem(m);
1072                 goto back;
1073         }
1074
1075         /* verify that we have a complete carp packet */
1076         len = m->m_len;
1077         IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
1078         if (ch == NULL) {
1079                 carpstats.carps_badlen++;
1080                 CARP_LOG("carp6_proto_input: packet size %u too small\n", len);
1081                 goto back;
1082         }
1083
1084         /* verify the CARP checksum */
1085         if (in_cksum_range(m, 0, *offp, sizeof(*ch))) {
1086                 carpstats.carps_badsum++;
1087                 CARP_LOG("carp6_proto_input: checksum failed, on %s\n",
1088                     ifp->if_xname);
1089                 m_freem(m);
1090                 goto back;
1091         }
1092
1093         carp_proto_input_c(sc, m, ch, AF_INET6);
1094 back:
1095         return (IPPROTO_DONE);
1096 }
1097 #endif /* INET6 */
1098
1099 static void
1100 carp_proto_input_c(struct carp_softc *sc, struct mbuf *m,
1101     struct carp_header *ch, sa_family_t af)
1102 {
1103         struct ifnet *cifp;
1104         uint64_t tmp_counter;
1105         struct timeval sc_tv, ch_tv;
1106
1107         if (sc->sc_vhid != ch->carp_vhid) {
1108                 /*
1109                  * CARP uses multicast, however, multicast packets
1110                  * are tapped to all CARP interfaces on the physical
1111                  * interface receiving the CARP packets, so we don't
1112                  * update any stats here.
1113                  */
1114                 m_freem(m);
1115                 return;
1116         }
1117         cifp = &sc->sc_if;
1118
1119         /* verify the CARP version. */
1120         if (ch->carp_version != CARP_VERSION) {
1121                 carpstats.carps_badver++;
1122                 CARP_LOG("%s; invalid version %d\n", cifp->if_xname,
1123                          ch->carp_version);
1124                 m_freem(m);
1125                 return;
1126         }
1127
1128         /* verify the hash */
1129         if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
1130                 carpstats.carps_badauth++;
1131                 CARP_LOG("%s: incorrect hash\n", cifp->if_xname);
1132                 m_freem(m);
1133                 return;
1134         }
1135
1136         tmp_counter = ntohl(ch->carp_counter[0]);
1137         tmp_counter = tmp_counter<<32;
1138         tmp_counter += ntohl(ch->carp_counter[1]);
1139
1140         /* XXX Replay protection goes here */
1141
1142         sc->sc_init_counter = 0;
1143         sc->sc_counter = tmp_counter;
1144
1145         sc_tv.tv_sec = sc->sc_advbase;
1146         if (carp_suppress_preempt && sc->sc_advskew <  240)
1147                 sc_tv.tv_usec = 240 * 1000000 / 256;
1148         else
1149                 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1150         ch_tv.tv_sec = ch->carp_advbase;
1151         ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
1152
1153         switch (sc->sc_state) {
1154         case INIT:
1155                 break;
1156
1157         case MASTER:
1158                 /*
1159                  * If we receive an advertisement from a master who's going to
1160                  * be more frequent than us, go into BACKUP state.
1161                  */
1162                 if (timevalcmp(&sc_tv, &ch_tv, >) ||
1163                     timevalcmp(&sc_tv, &ch_tv, ==)) {
1164                         callout_stop(&sc->sc_ad_tmo);
1165                         CARP_DEBUG("%s: MASTER -> BACKUP "
1166                            "(more frequent advertisement received)\n",
1167                            cifp->if_xname);
1168                         carp_set_state(sc, BACKUP);
1169                         carp_setrun(sc, 0);
1170                         carp_setroute(sc, RTM_DELETE);
1171                 }
1172                 break;
1173
1174         case BACKUP:
1175                 /*
1176                  * If we're pre-empting masters who advertise slower than us,
1177                  * and this one claims to be slower, treat him as down.
1178                  */
1179                 if (carp_opts[CARPCTL_PREEMPT] &&
1180                     timevalcmp(&sc_tv, &ch_tv, <)) {
1181                         CARP_DEBUG("%s: BACKUP -> MASTER "
1182                             "(preempting a slower master)\n", cifp->if_xname);
1183                         carp_master_down(sc);
1184                         break;
1185                 }
1186
1187                 /*
1188                  *  If the master is going to advertise at such a low frequency
1189                  *  that he's guaranteed to time out, we'd might as well just
1190                  *  treat him as timed out now.
1191                  */
1192                 sc_tv.tv_sec = sc->sc_advbase * 3;
1193                 if (timevalcmp(&sc_tv, &ch_tv, <)) {
1194                         CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n",
1195                                    cifp->if_xname);
1196                         carp_master_down(sc);
1197                         break;
1198                 }
1199
1200                 /*
1201                  * Otherwise, we reset the counter and wait for the next
1202                  * advertisement.
1203                  */
1204                 carp_setrun(sc, af);
1205                 break;
1206         }
1207         m_freem(m);
1208 }
1209
1210 struct mbuf *
1211 carp_input(void *v, struct mbuf *m)
1212 {
1213         struct carp_if *cif = v;
1214         struct ether_header *eh;
1215         struct carp_softc_container *scc;
1216         struct ifnet *ifp;
1217
1218         eh = mtod(m, struct ether_header *);
1219
1220         ifp = carp_forus(cif, eh->ether_dhost);
1221         if (ifp != NULL) {
1222                 ether_reinput_oncpu(ifp, m, REINPUT_RUNBPF);
1223                 return NULL;
1224         }
1225
1226         if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
1227                 return m;
1228
1229         /*
1230          * XXX Should really check the list of multicast addresses
1231          * for each CARP interface _before_ copying.
1232          */
1233         TAILQ_FOREACH(scc, cif, scc_link) {
1234                 struct carp_softc *sc = scc->scc_softc;
1235                 struct mbuf *m0;
1236
1237                 if ((sc->sc_if.if_flags & IFF_UP) == 0)
1238                         continue;
1239
1240                 m0 = m_dup(m, MB_DONTWAIT);
1241                 if (m0 == NULL)
1242                         continue;
1243
1244                 ether_reinput_oncpu(&sc->sc_if, m0, REINPUT_RUNBPF);
1245         }
1246         return m;
1247 }
1248
1249 static void
1250 carp_prepare_ad(struct carp_softc *sc, struct carp_header *ch)
1251 {
1252         if (sc->sc_init_counter) {
1253                 /* this could also be seconds since unix epoch */
1254                 sc->sc_counter = karc4random();
1255                 sc->sc_counter = sc->sc_counter << 32;
1256                 sc->sc_counter += karc4random();
1257         } else {
1258                 sc->sc_counter++;
1259         }
1260
1261         ch->carp_counter[0] = htonl((sc->sc_counter >> 32) & 0xffffffff);
1262         ch->carp_counter[1] = htonl(sc->sc_counter & 0xffffffff);
1263
1264         carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
1265 }
1266
1267 static void
1268 carp_send_ad_all(void)
1269 {
1270         struct carp_softc *sc;
1271
1272         LIST_FOREACH(sc, &carpif_list, sc_next) {
1273                 if (sc->sc_carpdev == NULL)
1274                         continue;
1275
1276                 if (CARP_IS_RUNNING(&sc->sc_if) && sc->sc_state == MASTER)
1277                         carp_send_ad(sc);
1278         }
1279 }
1280
1281 static void
1282 carp_send_ad_timeout(void *xsc)
1283 {
1284         struct carp_softc *sc = xsc;
1285         struct netmsg_carp *cmsg = &sc->sc_ad_msg;
1286
1287         KASSERT(mycpuid == 0, ("%s not on cpu0 but on cpu%d",
1288             __func__, mycpuid));
1289
1290         crit_enter();
1291         if (cmsg->base.lmsg.ms_flags & MSGF_DONE)
1292                 lwkt_sendmsg(netisr_cpuport(0), &cmsg->base.lmsg);
1293         crit_exit();
1294 }
1295
1296 static void
1297 carp_send_ad_timeout_dispatch(netmsg_t msg)
1298 {
1299         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1300         struct carp_softc *sc = cmsg->nc_softc;
1301
1302         /* Reply ASAP */
1303         crit_enter();
1304         lwkt_replymsg(&cmsg->base.lmsg, 0);
1305         crit_exit();
1306
1307         carp_send_ad(sc);
1308 }
1309
1310 static void
1311 carp_send_ad(struct carp_softc *sc)
1312 {
1313         struct ifnet *cifp = &sc->sc_if;
1314         struct carp_header ch;
1315         struct timeval tv;
1316         struct carp_header *ch_ptr;
1317         struct mbuf *m;
1318         int len, advbase, advskew;
1319
1320         if (!CARP_IS_RUNNING(cifp)) {
1321                 /* Bow out */
1322                 advbase = 255;
1323                 advskew = 255;
1324         } else {
1325                 advbase = sc->sc_advbase;
1326                 if (!carp_suppress_preempt || sc->sc_advskew > 240)
1327                         advskew = sc->sc_advskew;
1328                 else
1329                         advskew = 240;
1330                 tv.tv_sec = advbase;
1331                 tv.tv_usec = advskew * 1000000 / 256;
1332         }
1333
1334         ch.carp_version = CARP_VERSION;
1335         ch.carp_type = CARP_ADVERTISEMENT;
1336         ch.carp_vhid = sc->sc_vhid;
1337         ch.carp_advbase = advbase;
1338         ch.carp_advskew = advskew;
1339         ch.carp_authlen = 7;    /* XXX DEFINE */
1340         ch.carp_pad1 = 0;       /* must be zero */
1341         ch.carp_cksum = 0;
1342
1343 #ifdef INET
1344         if (sc->sc_ia != NULL) {
1345                 struct ip *ip;
1346
1347                 MGETHDR(m, MB_DONTWAIT, MT_HEADER);
1348                 if (m == NULL) {
1349                         IFNET_STAT_INC(cifp, oerrors, 1);
1350                         carpstats.carps_onomem++;
1351                         /* XXX maybe less ? */
1352                         if (advbase != 255 || advskew != 255)
1353                                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1354                                     carp_send_ad_timeout, sc);
1355                         return;
1356                 }
1357                 len = sizeof(*ip) + sizeof(ch);
1358                 m->m_pkthdr.len = len;
1359                 m->m_pkthdr.rcvif = NULL;
1360                 m->m_len = len;
1361                 MH_ALIGN(m, m->m_len);
1362                 m->m_flags |= M_MCAST;
1363                 ip = mtod(m, struct ip *);
1364                 ip->ip_v = IPVERSION;
1365                 ip->ip_hl = sizeof(*ip) >> 2;
1366                 ip->ip_tos = IPTOS_LOWDELAY;
1367                 ip->ip_len = len;
1368                 ip->ip_id = ip_newid();
1369                 ip->ip_off = IP_DF;
1370                 ip->ip_ttl = CARP_DFLTTL;
1371                 ip->ip_p = IPPROTO_CARP;
1372                 ip->ip_sum = 0;
1373                 ip->ip_src = sc->sc_ia->ia_addr.sin_addr;
1374                 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
1375
1376                 ch_ptr = (struct carp_header *)(&ip[1]);
1377                 bcopy(&ch, ch_ptr, sizeof(ch));
1378                 carp_prepare_ad(sc, ch_ptr);
1379                 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip));
1380
1381                 getmicrotime(&cifp->if_lastchange);
1382                 IFNET_STAT_INC(cifp, opackets, 1);
1383                 IFNET_STAT_INC(cifp, obytes, len);
1384                 carpstats.carps_opackets++;
1385
1386                 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) {
1387                         IFNET_STAT_INC(cifp, oerrors, 1);
1388                         if (sc->sc_sendad_errors < INT_MAX)
1389                                 sc->sc_sendad_errors++;
1390                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1391                                 carp_suppress_preempt++;
1392                                 if (carp_suppress_preempt == 1) {
1393                                         carp_send_ad_all();
1394                                 }
1395                         }
1396                         sc->sc_sendad_success = 0;
1397                 } else {
1398                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1399                                 if (++sc->sc_sendad_success >=
1400                                     CARP_SENDAD_MIN_SUCCESS) {
1401                                         carp_suppress_preempt--;
1402                                         sc->sc_sendad_errors = 0;
1403                                 }
1404                         } else {
1405                                 sc->sc_sendad_errors = 0;
1406                         }
1407                 }
1408         }
1409 #endif /* INET */
1410 #ifdef INET6
1411         if (sc->sc_ia6) {
1412                 struct ip6_hdr *ip6;
1413
1414                 MGETHDR(m, MB_DONTWAIT, MT_HEADER);
1415                 if (m == NULL) {
1416                         IFNET_STAT_INC(cifp, oerrors, 1);
1417                         carpstats.carps_onomem++;
1418                         /* XXX maybe less ? */
1419                         if (advbase != 255 || advskew != 255)
1420                                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1421                                     carp_send_ad_timeout, sc);
1422                         return;
1423                 }
1424                 len = sizeof(*ip6) + sizeof(ch);
1425                 m->m_pkthdr.len = len;
1426                 m->m_pkthdr.rcvif = NULL;
1427                 m->m_len = len;
1428                 MH_ALIGN(m, m->m_len);
1429                 m->m_flags |= M_MCAST;
1430                 ip6 = mtod(m, struct ip6_hdr *);
1431                 bzero(ip6, sizeof(*ip6));
1432                 ip6->ip6_vfc |= IPV6_VERSION;
1433                 ip6->ip6_hlim = CARP_DFLTTL;
1434                 ip6->ip6_nxt = IPPROTO_CARP;
1435                 bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src,
1436                     sizeof(struct in6_addr));
1437                 /* set the multicast destination */
1438
1439                 ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
1440                 ip6->ip6_dst.s6_addr8[15] = 0x12;
1441                 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
1442                         IFNET_STAT_INC(cifp, oerrors, 1);
1443                         m_freem(m);
1444                         CARP_LOG("%s: in6_setscope failed\n", __func__);
1445                         return;
1446                 }
1447
1448                 ch_ptr = (struct carp_header *)(&ip6[1]);
1449                 bcopy(&ch, ch_ptr, sizeof(ch));
1450                 carp_prepare_ad(sc, ch_ptr);
1451                 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip6));
1452
1453                 getmicrotime(&cifp->if_lastchange);
1454                 IFNET_STAT_INC(cifp, opackets, 1);
1455                 IFNET_STAT_INC(cifp, obytes, len);
1456                 carpstats.carps_opackets6++;
1457
1458                 if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) {
1459                         IFNET_STAT_INC(cifp, oerrors, 1);
1460                         if (sc->sc_sendad_errors < INT_MAX)
1461                                 sc->sc_sendad_errors++;
1462                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1463                                 carp_suppress_preempt++;
1464                                 if (carp_suppress_preempt == 1) {
1465                                         carp_send_ad_all();
1466                                 }
1467                         }
1468                         sc->sc_sendad_success = 0;
1469                 } else {
1470                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1471                                 if (++sc->sc_sendad_success >=
1472                                     CARP_SENDAD_MIN_SUCCESS) {
1473                                         carp_suppress_preempt--;
1474                                         sc->sc_sendad_errors = 0;
1475                                 }
1476                         } else {
1477                                 sc->sc_sendad_errors = 0;
1478                         }
1479                 }
1480         }
1481 #endif /* INET6 */
1482
1483         if (advbase != 255 || advskew != 255)
1484                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1485                     carp_send_ad_timeout, sc);
1486 }
1487
1488 /*
1489  * Broadcast a gratuitous ARP request containing
1490  * the virtual router MAC address for each IP address
1491  * associated with the virtual router.
1492  */
1493 static void
1494 carp_send_arp(struct carp_softc *sc)
1495 {
1496         const struct carp_vhaddr *vha;
1497
1498         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1499                 if (vha->vha_iaback == NULL)
1500                         continue;
1501                 arp_gratuitous(&sc->sc_if, &vha->vha_ia->ia_ifa);
1502         }
1503 }
1504
1505 #ifdef INET6
1506 static void
1507 carp_send_na(struct carp_softc *sc)
1508 {
1509         struct ifaddr_container *ifac;
1510         struct in6_addr *in6;
1511         static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1512
1513         TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
1514                 struct ifaddr *ifa = ifac->ifa;
1515
1516                 if (ifa->ifa_addr->sa_family != AF_INET6)
1517                         continue;
1518
1519                 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1520                 nd6_na_output(sc->sc_carpdev, &mcast, in6,
1521                     ND_NA_FLAG_OVERRIDE, 1, NULL);
1522                 DELAY(1000);    /* XXX */
1523         }
1524 }
1525 #endif /* INET6 */
1526
1527 static __inline const struct carp_vhaddr *
1528 carp_find_addr(const struct carp_softc *sc, const struct in_addr *addr)
1529 {
1530         struct carp_vhaddr *vha;
1531
1532         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1533                 if (vha->vha_iaback == NULL)
1534                         continue;
1535
1536                 if (vha->vha_ia->ia_addr.sin_addr.s_addr == addr->s_addr)
1537                         return vha;
1538         }
1539         return NULL;
1540 }
1541
1542 #ifdef notyet
1543 static int
1544 carp_iamatch_balance(const struct carp_if *cif, const struct in_addr *itaddr,
1545                      const struct in_addr *isaddr, uint8_t **enaddr)
1546 {
1547         const struct carp_softc *vh;
1548         int index, count = 0;
1549
1550         /*
1551          * XXX proof of concept implementation.
1552          * We use the source ip to decide which virtual host should
1553          * handle the request. If we're master of that virtual host,
1554          * then we respond, otherwise, just drop the arp packet on
1555          * the floor.
1556          */
1557
1558         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1559                 if (!CARP_IS_RUNNING(&vh->sc_if))
1560                         continue;
1561
1562                 if (carp_find_addr(vh, itaddr) != NULL)
1563                         count++;
1564         }
1565         if (count == 0)
1566                 return 0;
1567
1568         /* this should be a hash, like pf_hash() */
1569         index = ntohl(isaddr->s_addr) % count;
1570         count = 0;
1571
1572         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1573                 if (!CARP_IS_RUNNING(&vh->sc_if))
1574                         continue;
1575
1576                 if (carp_find_addr(vh, itaddr) == NULL)
1577                         continue;
1578
1579                 if (count == index) {
1580                         if (vh->sc_state == MASTER) {
1581                                 *enaddr = IF_LLADDR(&vh->sc_if);
1582                                 return 1;
1583                         } else {
1584                                 return 0;
1585                         }
1586                 }
1587                 count++;
1588         }
1589         return 0;
1590 }
1591 #endif
1592
1593 int
1594 carp_iamatch(const struct in_ifaddr *ia)
1595 {
1596         const struct carp_softc *sc = ia->ia_ifp->if_softc;
1597
1598         KASSERT(&curthread->td_msgport == netisr_cpuport(0),
1599             ("not in netisr0"));
1600
1601 #ifdef notyet
1602         if (carp_opts[CARPCTL_ARPBALANCE])
1603                 return carp_iamatch_balance(cif, itaddr, isaddr, enaddr);
1604 #endif
1605
1606         if (!CARP_IS_RUNNING(&sc->sc_if) || sc->sc_state != MASTER)
1607                 return 0;
1608
1609         return 1;
1610 }
1611
1612 #ifdef INET6
1613 struct ifaddr *
1614 carp_iamatch6(void *v, struct in6_addr *taddr)
1615 {
1616 #ifdef foo
1617         struct carp_if *cif = v;
1618         struct carp_softc *vh;
1619
1620         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1621                 struct ifaddr_container *ifac;
1622
1623                 TAILQ_FOREACH(ifac, &vh->sc_if.if_addrheads[mycpuid],
1624                               ifa_link) {
1625                         struct ifaddr *ifa = ifac->ifa;
1626
1627                         if (IN6_ARE_ADDR_EQUAL(taddr,
1628                             &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1629                             CARP_IS_RUNNING(&vh->sc_if) &&
1630                             vh->sc_state == MASTER) {
1631                                 return (ifa);
1632                         }
1633                 }
1634         }
1635 #endif
1636         return (NULL);
1637 }
1638
1639 void *
1640 carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr)
1641 {
1642 #ifdef foo
1643         struct m_tag *mtag;
1644         struct carp_if *cif = v;
1645         struct carp_softc *sc;
1646
1647         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
1648                 struct ifaddr_container *ifac;
1649
1650                 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid],
1651                               ifa_link) {
1652                         struct ifaddr *ifa = ifac->ifa;
1653
1654                         if (IN6_ARE_ADDR_EQUAL(taddr,
1655                             &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1656                             CARP_IS_RUNNING(&sc->sc_if)) {
1657                                 struct ifnet *ifp = &sc->sc_if;
1658
1659                                 mtag = m_tag_get(PACKET_TAG_CARP,
1660                                     sizeof(struct ifnet *), MB_DONTWAIT);
1661                                 if (mtag == NULL) {
1662                                         /* better a bit than nothing */
1663                                         return (IF_LLADDR(ifp));
1664                                 }
1665                                 bcopy(&ifp, (caddr_t)(mtag + 1),
1666                                     sizeof(struct ifnet *));
1667                                 m_tag_prepend(m, mtag);
1668
1669                                 return (IF_LLADDR(ifp));
1670                         }
1671                 }
1672         }
1673 #endif
1674         return (NULL);
1675 }
1676 #endif
1677
1678 static struct ifnet *
1679 carp_forus(struct carp_if *cif, const uint8_t *dhost)
1680 {
1681         struct carp_softc_container *scc;
1682
1683         if (memcmp(dhost, carp_etheraddr, ETHER_ADDR_LEN - 1) != 0)
1684                 return NULL;
1685
1686         TAILQ_FOREACH(scc, cif, scc_link) {
1687                 struct carp_softc *sc = scc->scc_softc;
1688                 struct ifnet *ifp = &sc->sc_if;
1689
1690                 if (CARP_IS_RUNNING(ifp) && sc->sc_state == MASTER &&
1691                     !bcmp(dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN))
1692                         return ifp;
1693         }
1694         return NULL;
1695 }
1696
1697 static void
1698 carp_master_down_timeout(void *xsc)
1699 {
1700         struct carp_softc *sc = xsc;
1701         struct netmsg_carp *cmsg = &sc->sc_md_msg;
1702
1703         KASSERT(mycpuid == 0, ("%s not on cpu0 but on cpu%d",
1704             __func__, mycpuid));
1705
1706         crit_enter();
1707         if (cmsg->base.lmsg.ms_flags & MSGF_DONE)
1708                 lwkt_sendmsg(netisr_cpuport(0), &cmsg->base.lmsg);
1709         crit_exit();
1710 }
1711
1712 static void
1713 carp_master_down_timeout_dispatch(netmsg_t msg)
1714 {
1715         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1716         struct carp_softc *sc = cmsg->nc_softc;
1717
1718         /* Reply ASAP */
1719         crit_enter();
1720         lwkt_replymsg(&cmsg->base.lmsg, 0);
1721         crit_exit();
1722
1723         CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n",
1724                    sc->sc_if.if_xname);
1725         carp_master_down(sc);
1726 }
1727
1728 static void
1729 carp_master_down(struct carp_softc *sc)
1730 {
1731         switch (sc->sc_state) {
1732         case INIT:
1733                 kprintf("%s: master_down event in INIT state\n",
1734                         sc->sc_if.if_xname);
1735                 break;
1736
1737         case MASTER:
1738                 break;
1739
1740         case BACKUP:
1741                 carp_set_state(sc, MASTER);
1742                 carp_send_ad(sc);
1743                 carp_send_arp(sc);
1744 #ifdef INET6
1745                 carp_send_na(sc);
1746 #endif /* INET6 */
1747                 carp_setrun(sc, 0);
1748                 carp_setroute(sc, RTM_ADD);
1749                 break;
1750         }
1751 }
1752
1753 /*
1754  * When in backup state, af indicates whether to reset the master down timer
1755  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1756  */
1757 static void
1758 carp_setrun(struct carp_softc *sc, sa_family_t af)
1759 {
1760         struct ifnet *cifp = &sc->sc_if;
1761         struct timeval tv;
1762
1763         if (sc->sc_carpdev == NULL) {
1764                 carp_set_state(sc, INIT);
1765                 return;
1766         }
1767
1768         if ((cifp->if_flags & IFF_RUNNING) && sc->sc_vhid > 0 &&
1769             (sc->sc_naddrs || sc->sc_naddrs6)) {
1770                 /* Nothing */
1771         } else {
1772                 carp_setroute(sc, RTM_DELETE);
1773                 return;
1774         }
1775
1776         switch (sc->sc_state) {
1777         case INIT:
1778                 if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) {
1779                         carp_send_ad(sc);
1780                         carp_send_arp(sc);
1781 #ifdef INET6
1782                         carp_send_na(sc);
1783 #endif /* INET6 */
1784                         CARP_DEBUG("%s: INIT -> MASTER (preempting)\n",
1785                                    cifp->if_xname);
1786                         carp_set_state(sc, MASTER);
1787                         carp_setroute(sc, RTM_ADD);
1788                 } else {
1789                         CARP_DEBUG("%s: INIT -> BACKUP\n", cifp->if_xname);
1790                         carp_set_state(sc, BACKUP);
1791                         carp_setroute(sc, RTM_DELETE);
1792                         carp_setrun(sc, 0);
1793                 }
1794                 break;
1795
1796         case BACKUP:
1797                 callout_stop(&sc->sc_ad_tmo);
1798                 tv.tv_sec = 3 * sc->sc_advbase;
1799                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1800                 switch (af) {
1801 #ifdef INET
1802                 case AF_INET:
1803                         callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1804                             carp_master_down_timeout, sc);
1805                         break;
1806 #endif /* INET */
1807 #ifdef INET6
1808                 case AF_INET6:
1809                         callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1810                             carp_master_down_timeout, sc);
1811                         break;
1812 #endif /* INET6 */
1813                 default:
1814                         if (sc->sc_naddrs)
1815                                 callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1816                                     carp_master_down_timeout, sc);
1817                         if (sc->sc_naddrs6)
1818                                 callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1819                                     carp_master_down_timeout, sc);
1820                         break;
1821                 }
1822                 break;
1823
1824         case MASTER:
1825                 tv.tv_sec = sc->sc_advbase;
1826                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1827                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1828                     carp_send_ad_timeout, sc);
1829                 break;
1830         }
1831 }
1832
1833 static void
1834 carp_multicast_cleanup(struct carp_softc *sc)
1835 {
1836         struct ip_moptions *imo = &sc->sc_imo;
1837
1838         if (imo->imo_num_memberships == 0)
1839                 return;
1840         KKASSERT(imo->imo_num_memberships == 1);
1841
1842         in_delmulti(imo->imo_membership[0]);
1843         imo->imo_membership[0] = NULL;
1844         imo->imo_num_memberships = 0;
1845         imo->imo_multicast_ifp = NULL;
1846 }
1847
1848 #ifdef INET6
1849 static void
1850 carp_multicast6_cleanup(struct carp_softc *sc)
1851 {
1852         struct ip6_moptions *im6o = &sc->sc_im6o;
1853
1854         while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1855                 struct in6_multi_mship *imm =
1856                     LIST_FIRST(&im6o->im6o_memberships);
1857
1858                 LIST_REMOVE(imm, i6mm_chain);
1859                 in6_leavegroup(imm);
1860         }
1861         im6o->im6o_multicast_ifp = NULL;
1862 }
1863 #endif
1864
1865 static void
1866 carp_ioctl_getvhaddr_dispatch(netmsg_t msg)
1867 {
1868         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1869         struct carp_softc *sc = cmsg->nc_softc;
1870         const struct carp_vhaddr *vha;
1871         struct ifcarpvhaddr *carpa, *carpa0;
1872         int count, len, error = 0;
1873
1874         count = 0;
1875         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
1876                 ++count;
1877
1878         if (cmsg->nc_datalen == 0) {
1879                 cmsg->nc_datalen = count * sizeof(*carpa);
1880                 goto back;
1881         } else if (count == 0 || cmsg->nc_datalen < sizeof(*carpa)) {
1882                 cmsg->nc_datalen = 0;
1883                 goto back;
1884         }
1885         len = min(cmsg->nc_datalen, sizeof(*carpa) * count);
1886         KKASSERT(len >= sizeof(*carpa));
1887
1888         carpa0 = carpa = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO);
1889         if (carpa == NULL) {
1890                 error = ENOMEM; 
1891                 goto back;
1892         }
1893
1894         count = 0;
1895         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1896                 if (len < sizeof(*carpa))
1897                         break;
1898
1899                 carpa->carpa_flags = vha->vha_flags;
1900                 carpa->carpa_addr.sin_family = AF_INET;
1901                 carpa->carpa_addr.sin_addr = vha->vha_ia->ia_addr.sin_addr;
1902
1903                 carpa->carpa_baddr.sin_family = AF_INET;
1904                 if (vha->vha_iaback == NULL) {
1905                         carpa->carpa_baddr.sin_addr.s_addr = INADDR_ANY;
1906                 } else {
1907                         carpa->carpa_baddr.sin_addr =
1908                         vha->vha_iaback->ia_addr.sin_addr;
1909                 }
1910
1911                 ++carpa;
1912                 ++count;
1913                 len -= sizeof(*carpa);
1914         }
1915         cmsg->nc_datalen = sizeof(*carpa) * count;
1916         KKASSERT(cmsg->nc_datalen > 0);
1917
1918         cmsg->nc_data = carpa0;
1919
1920 back:
1921         lwkt_replymsg(&cmsg->base.lmsg, error);
1922 }
1923
1924 static int
1925 carp_ioctl_getvhaddr(struct carp_softc *sc, struct ifdrv *ifd)
1926 {
1927         struct ifnet *ifp = &sc->arpcom.ac_if;
1928         struct netmsg_carp cmsg;
1929         int error;
1930
1931         ASSERT_IFNET_SERIALIZED_ALL(ifp);
1932         ifnet_deserialize_all(ifp);
1933
1934         bzero(&cmsg, sizeof(cmsg));
1935         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
1936             carp_ioctl_getvhaddr_dispatch);
1937         cmsg.nc_softc = sc;
1938         cmsg.nc_datalen = ifd->ifd_len;
1939
1940         error = lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
1941
1942         if (!error) {
1943                 if (cmsg.nc_data != NULL) {
1944                         error = copyout(cmsg.nc_data, ifd->ifd_data,
1945                             cmsg.nc_datalen);
1946                         kfree(cmsg.nc_data, M_TEMP);
1947                 }
1948                 ifd->ifd_len = cmsg.nc_datalen;
1949         } else {
1950                 KASSERT(cmsg.nc_data == NULL,
1951                     ("%s temp vhaddr is alloc upon error", __func__));
1952         }
1953
1954         ifnet_serialize_all(ifp);
1955         return error;
1956 }
1957
1958 static int
1959 carp_config_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
1960     struct in_ifaddr *ia_del)
1961 {
1962         struct ifnet *ifp;
1963         struct in_ifaddr *ia_if;
1964         const struct in_ifaddr *ia_vha;
1965         struct in_ifaddr_container *iac;
1966         int own, ia_match_carpdev;
1967
1968         KKASSERT(vha->vha_ia != NULL);
1969         ia_vha = vha->vha_ia;
1970
1971         ia_if = NULL;
1972         own = 0;
1973         ia_match_carpdev = 0;
1974         TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
1975                 struct in_ifaddr *ia = iac->ia;
1976
1977                 if (ia == ia_del)
1978                         continue;
1979
1980                 if (ia->ia_ifp->if_type == IFT_CARP)
1981                         continue;
1982
1983                 if ((ia->ia_ifp->if_flags & IFF_UP) == 0)
1984                         continue;
1985
1986                 /* and, yeah, we need a multicast-capable iface too */
1987                 if ((ia->ia_ifp->if_flags & IFF_MULTICAST) == 0)
1988                         continue;
1989
1990                 if (ia_vha->ia_subnetmask == ia->ia_subnetmask &&
1991                     ia_vha->ia_subnet == ia->ia_subnet) {
1992                         if (ia_vha->ia_addr.sin_addr.s_addr ==
1993                             ia->ia_addr.sin_addr.s_addr)
1994                                 own = 1;
1995                         if (ia_if == NULL) {
1996                                 ia_if = ia;
1997                         } else if (sc->sc_carpdev != NULL &&
1998                             sc->sc_carpdev == ia->ia_ifp) {
1999                                 ia_if = ia;
2000                                 if (ia_if->ia_flags & IFA_ROUTE) {
2001                                         /*
2002                                          * Address with prefix route
2003                                          * is prefered
2004                                          */
2005                                         break;
2006                                 }
2007                                 ia_match_carpdev = 1;
2008                         } else if (!ia_match_carpdev) {
2009                                 if (ia->ia_flags & IFA_ROUTE) {
2010                                         /*
2011                                          * Address with prefix route
2012                                          * is prefered over others.
2013                                          */
2014                                         ia_if = ia;
2015                                 }
2016                         }
2017                 }
2018         }
2019
2020         carp_deactivate_vhaddr(sc, vha, FALSE);
2021         if (!ia_if)
2022                 return ENOENT;
2023
2024         ifp = ia_if->ia_ifp;
2025
2026         /* XXX Don't allow parent iface to be changed */
2027         if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp)
2028                 return EEXIST;
2029
2030         return carp_activate_vhaddr(sc, vha, ifp, ia_if, own);
2031 }
2032
2033 static void
2034 carp_add_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
2035 {
2036         struct carp_vhaddr *vha_new;
2037         struct in_ifaddr *carp_ia;
2038 #ifdef INVARIANTS
2039         struct carp_vhaddr *vha;
2040 #endif
2041
2042         KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
2043         carp_ia = ifatoia(carp_ifa);
2044
2045 #ifdef INVARIANTS
2046         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
2047                 KKASSERT(vha->vha_ia != NULL && vha->vha_ia != carp_ia);
2048 #endif
2049
2050         vha_new = kmalloc(sizeof(*vha_new), M_CARP, M_WAITOK | M_ZERO);
2051         vha_new->vha_ia = carp_ia;
2052         carp_insert_vhaddr(sc, vha_new);
2053
2054         if (carp_config_vhaddr(sc, vha_new, NULL) != 0) {
2055                 /*
2056                  * If the above configuration fails, it may only mean
2057                  * that the new address is problematic.  However, the
2058                  * carp(4) interface may already have several working
2059                  * addresses.  Since the expected behaviour of
2060                  * SIOC[AS]IFADDR is to put the NIC into working state,
2061                  * we try starting the state machine manually here with
2062                  * the hope that the carp(4)'s previously working
2063                  * addresses still could be brought up.
2064                  */
2065                 carp_hmac_prepare(sc);
2066                 carp_set_state(sc, INIT);
2067                 carp_setrun(sc, 0);
2068         }
2069 }
2070
2071 static void
2072 carp_del_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
2073 {
2074         struct carp_vhaddr *vha;
2075         struct in_ifaddr *carp_ia;
2076
2077         KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
2078         carp_ia = ifatoia(carp_ifa);
2079
2080         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
2081                 KKASSERT(vha->vha_ia != NULL);
2082                 if (vha->vha_ia == carp_ia)
2083                         break;
2084         }
2085         KASSERT(vha != NULL, ("no corresponding vhaddr %p", carp_ifa));
2086
2087         /*
2088          * Remove the vhaddr from the list before deactivating
2089          * the vhaddr, so that the HMAC could be correctly
2090          * updated in carp_deactivate_vhaddr()
2091          */
2092         carp_remove_vhaddr(sc, vha);
2093
2094         carp_deactivate_vhaddr(sc, vha, FALSE);
2095         kfree(vha, M_CARP);
2096 }
2097
2098 static void
2099 carp_config_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
2100 {
2101         struct carp_vhaddr *vha;
2102         struct in_ifaddr *carp_ia;
2103
2104         KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
2105         carp_ia = ifatoia(carp_ifa);
2106
2107         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
2108                 KKASSERT(vha->vha_ia != NULL);
2109                 if (vha->vha_ia == carp_ia)
2110                         break;
2111         }
2112         KASSERT(vha != NULL, ("no corresponding vhaddr %p", carp_ifa));
2113
2114         /* Remove then reinsert, to keep the vhaddr list sorted */
2115         carp_remove_vhaddr(sc, vha);
2116         carp_insert_vhaddr(sc, vha);
2117
2118         if (carp_config_vhaddr(sc, vha, NULL) != 0) {
2119                 /* See the comment in carp_add_addr() */
2120                 carp_hmac_prepare(sc);
2121                 carp_set_state(sc, INIT);
2122                 carp_setrun(sc, 0);
2123         }
2124 }
2125
2126 #ifdef notyet
2127
2128 #ifdef INET6
2129 static int
2130 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
2131 {
2132         struct ifnet *ifp;
2133         struct carp_if *cif;
2134         struct in6_ifaddr *ia, *ia_if;
2135         struct ip6_moptions *im6o = &sc->sc_im6o;
2136         struct in6_multi_mship *imm;
2137         struct in6_addr in6;
2138         int own, error;
2139
2140         if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
2141                 carp_setrun(sc, 0);
2142                 return (0);
2143         }
2144
2145         /* we have to do it by hands to check we won't match on us */
2146         ia_if = NULL; own = 0;
2147         for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
2148                 int i;
2149
2150                 for (i = 0; i < 4; i++) {
2151                         if ((sin6->sin6_addr.s6_addr32[i] &
2152                             ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
2153                             (ia->ia_addr.sin6_addr.s6_addr32[i] &
2154                             ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
2155                                 break;
2156                 }
2157                 /* and, yeah, we need a multicast-capable iface too */
2158                 if (ia->ia_ifp != &sc->sc_if &&
2159                     (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
2160                     (i == 4)) {
2161                         if (!ia_if)
2162                                 ia_if = ia;
2163                         if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
2164                             &ia->ia_addr.sin6_addr))
2165                                 own++;
2166                 }
2167         }
2168
2169         if (!ia_if)
2170                 return (EADDRNOTAVAIL);
2171         ia = ia_if;
2172         ifp = ia->ia_ifp;
2173
2174         if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
2175             (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp))
2176                 return (EADDRNOTAVAIL);
2177
2178         if (!sc->sc_naddrs6) {
2179                 im6o->im6o_multicast_ifp = ifp;
2180
2181                 /* join CARP multicast address */
2182                 bzero(&in6, sizeof(in6));
2183                 in6.s6_addr16[0] = htons(0xff02);
2184                 in6.s6_addr8[15] = 0x12;
2185                 if (in6_setscope(&in6, ifp, NULL) != 0)
2186                         goto cleanup;
2187                 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
2188                         goto cleanup;
2189                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2190
2191                 /* join solicited multicast address */
2192                 bzero(&in6, sizeof(in6));
2193                 in6.s6_addr16[0] = htons(0xff02);
2194                 in6.s6_addr32[1] = 0;
2195                 in6.s6_addr32[2] = htonl(1);
2196                 in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3];
2197                 in6.s6_addr8[12] = 0xff;
2198                 if (in6_setscope(&in6, ifp, NULL) != 0)
2199                         goto cleanup;
2200                 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
2201                         goto cleanup;
2202                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2203         }
2204
2205 #ifdef foo
2206         if (!ifp->if_carp) {
2207                 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
2208
2209                 if ((error = ifpromisc(ifp, 1))) {
2210                         kfree(cif, M_CARP);
2211                         goto cleanup;
2212                 }
2213
2214                 TAILQ_INIT(&cif->vhif_vrs);
2215                 ifp->if_carp = cif;
2216         } else {
2217                 struct carp_softc *vr;
2218
2219                 cif = ifp->if_carp;
2220                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
2221                         if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
2222                                 error = EINVAL;
2223                                 goto cleanup;
2224                         }
2225                 }
2226         }
2227 #endif
2228         sc->sc_ia6 = ia;
2229         sc->sc_carpdev = ifp;
2230
2231 #ifdef foo
2232         { /* XXX prevent endless loop if already in queue */
2233         struct carp_softc *vr, *after = NULL;
2234         int myself = 0;
2235         cif = ifp->if_carp;
2236
2237         TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
2238                 if (vr == sc)
2239                         myself = 1;
2240                 if (vr->sc_vhid < sc->sc_vhid)
2241                         after = vr;
2242         }
2243
2244         if (!myself) {
2245                 /* We're trying to keep things in order */
2246                 if (after == NULL)
2247                         TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
2248                 else
2249                         TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
2250         }
2251         }
2252 #endif
2253
2254         sc->sc_naddrs6++;
2255         if (own)
2256                 sc->sc_advskew = 0;
2257         carp_sc_state(sc);
2258         carp_setrun(sc, 0);
2259
2260         return (0);
2261
2262 cleanup:
2263         /* clean up multicast memberships */
2264         if (!sc->sc_naddrs6) {
2265                 while (!LIST_EMPTY(&im6o->im6o_memberships)) {
2266                         imm = LIST_FIRST(&im6o->im6o_memberships);
2267                         LIST_REMOVE(imm, i6mm_chain);
2268                         in6_leavegroup(imm);
2269                 }
2270         }
2271         return (error);
2272 }
2273
2274 static int
2275 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
2276 {
2277         int error = 0;
2278
2279         if (!--sc->sc_naddrs6) {
2280                 struct carp_if *cif = sc->sc_carpdev->if_carp;
2281                 struct ip6_moptions *im6o = &sc->sc_im6o;
2282
2283                 callout_stop(&sc->sc_ad_tmo);
2284                 sc->sc_vhid = -1;
2285                 while (!LIST_EMPTY(&im6o->im6o_memberships)) {
2286                         struct in6_multi_mship *imm =
2287                             LIST_FIRST(&im6o->im6o_memberships);
2288
2289                         LIST_REMOVE(imm, i6mm_chain);
2290                         in6_leavegroup(imm);
2291                 }
2292                 im6o->im6o_multicast_ifp = NULL;
2293 #ifdef foo
2294                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
2295                 if (TAILQ_EMPTY(&cif->vhif_vrs)) {
2296                         sc->sc_carpdev->if_carp = NULL;
2297                         kfree(cif, M_IFADDR);
2298                 }
2299 #endif
2300         }
2301         return (error);
2302 }
2303 #endif /* INET6 */
2304
2305 #endif
2306
2307 static int
2308 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr)
2309 {
2310         struct carp_softc *sc = ifp->if_softc;
2311         struct ifreq *ifr = (struct ifreq *)addr;
2312         struct ifdrv *ifd = (struct ifdrv *)addr;
2313         int error = 0;
2314
2315         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2316
2317         switch (cmd) {
2318         case SIOCSIFFLAGS:
2319                 if (ifp->if_flags & IFF_UP) {
2320                         if ((ifp->if_flags & IFF_RUNNING) == 0)
2321                                 carp_init(sc);
2322                 } else if (ifp->if_flags & IFF_RUNNING) {
2323                         carp_ioctl_stop(sc);
2324                 }
2325                 break;
2326
2327         case SIOCSIFCAP:
2328                 carp_ioctl_ifcap(sc, ifr->ifr_reqcap);
2329                 break;
2330
2331         case SIOCSVH:
2332                 error = carp_ioctl_setvh(sc, ifr->ifr_data, cr);
2333                 break;
2334
2335         case SIOCGVH:
2336                 error = carp_ioctl_getvh(sc, ifr->ifr_data, cr);
2337                 break;
2338
2339         case SIOCGDRVSPEC:
2340                 switch (ifd->ifd_cmd) {
2341                 case CARPGDEVNAME:
2342                         error = carp_ioctl_getdevname(sc, ifd);
2343                         break;
2344
2345                 case CARPGVHADDR:
2346                         error = carp_ioctl_getvhaddr(sc, ifd);
2347                         break;
2348
2349                 default:
2350                         error = EINVAL;
2351                         break;
2352                 }
2353                 break;
2354
2355         default:
2356                 error = ether_ioctl(ifp, cmd, addr);
2357                 break;
2358         }
2359
2360         return error;
2361 }
2362
2363 static void
2364 carp_ioctl_stop_dispatch(netmsg_t msg)
2365 {
2366         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2367         struct carp_softc *sc = cmsg->nc_softc;
2368
2369         carp_stop(sc, FALSE);
2370         lwkt_replymsg(&cmsg->base.lmsg, 0);
2371 }
2372
2373 static void
2374 carp_ioctl_stop(struct carp_softc *sc)
2375 {
2376         struct ifnet *ifp = &sc->arpcom.ac_if;
2377         struct netmsg_carp cmsg;
2378
2379         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2380
2381         ifnet_deserialize_all(ifp);
2382
2383         bzero(&cmsg, sizeof(cmsg));
2384         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2385             carp_ioctl_stop_dispatch);
2386         cmsg.nc_softc = sc;
2387
2388         lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2389
2390         ifnet_serialize_all(ifp);
2391 }
2392
2393 static void
2394 carp_ioctl_setvh_dispatch(netmsg_t msg)
2395 {
2396         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2397         struct carp_softc *sc = cmsg->nc_softc;
2398         struct ifnet *ifp = &sc->arpcom.ac_if;
2399         const struct carpreq *carpr = cmsg->nc_data;
2400         int error;
2401
2402         error = 1;
2403         if ((ifp->if_flags & IFF_RUNNING) &&
2404             sc->sc_state != INIT && carpr->carpr_state != sc->sc_state) {
2405                 switch (carpr->carpr_state) {
2406                 case BACKUP:
2407                         callout_stop(&sc->sc_ad_tmo);
2408                         carp_set_state(sc, BACKUP);
2409                         carp_setrun(sc, 0);
2410                         carp_setroute(sc, RTM_DELETE);
2411                         break;
2412
2413                 case MASTER:
2414                         carp_master_down(sc);
2415                         break;
2416
2417                 default:
2418                         break;
2419                 }
2420         }
2421         if (carpr->carpr_vhid > 0) {
2422                 if (carpr->carpr_vhid > 255) {
2423                         error = EINVAL;
2424                         goto back;
2425                 }
2426                 if (sc->sc_carpdev) {
2427                         struct carp_if *cif = sc->sc_carpdev->if_carp;
2428                         struct carp_softc_container *scc;
2429
2430                         TAILQ_FOREACH(scc, cif, scc_link) {
2431                                 struct carp_softc *vr = scc->scc_softc;
2432
2433                                 if (vr != sc &&
2434                                     vr->sc_vhid == carpr->carpr_vhid) {
2435                                         error = EEXIST;
2436                                         goto back;
2437                                 }
2438                         }
2439                 }
2440                 sc->sc_vhid = carpr->carpr_vhid;
2441
2442                 IF_LLADDR(ifp)[5] = sc->sc_vhid;
2443                 bcopy(IF_LLADDR(ifp), sc->arpcom.ac_enaddr,
2444                     ETHER_ADDR_LEN);
2445
2446                 error--;
2447         }
2448         if (carpr->carpr_advbase > 0 || carpr->carpr_advskew > 0) {
2449                 if (carpr->carpr_advskew >= 255) {
2450                         error = EINVAL;
2451                         goto back;
2452                 }
2453                 if (carpr->carpr_advbase > 255) {
2454                         error = EINVAL;
2455                         goto back;
2456                 }
2457                 sc->sc_advbase = carpr->carpr_advbase;
2458                 sc->sc_advskew = carpr->carpr_advskew;
2459                 error--;
2460         }
2461         bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key));
2462         if (error > 0) {
2463                 error = EINVAL;
2464         } else {
2465                 error = 0;
2466                 carp_setrun(sc, 0);
2467         }
2468 back:
2469         carp_hmac_prepare(sc);
2470
2471         lwkt_replymsg(&cmsg->base.lmsg, error);
2472 }
2473
2474 static int
2475 carp_ioctl_setvh(struct carp_softc *sc, void *udata, struct ucred *cr)
2476 {
2477         struct ifnet *ifp = &sc->arpcom.ac_if;
2478         struct netmsg_carp cmsg;
2479         struct carpreq carpr;
2480         int error;
2481
2482         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2483         ifnet_deserialize_all(ifp);
2484
2485         error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY);
2486         if (error)
2487                 goto back;
2488
2489         error = copyin(udata, &carpr, sizeof(carpr));
2490         if (error)
2491                 goto back;
2492
2493         bzero(&cmsg, sizeof(cmsg));
2494         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2495             carp_ioctl_setvh_dispatch);
2496         cmsg.nc_softc = sc;
2497         cmsg.nc_data = &carpr;
2498
2499         error = lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2500
2501 back:
2502         ifnet_serialize_all(ifp);
2503         return error;
2504 }
2505
2506 static void
2507 carp_ioctl_ifcap_dispatch(netmsg_t msg)
2508 {
2509         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2510         struct carp_softc *sc = cmsg->nc_softc;
2511         struct ifnet *ifp = &sc->arpcom.ac_if;
2512         int reqcap = *((const int *)(cmsg->nc_data));
2513         int mask;
2514
2515         mask = reqcap ^ ifp->if_capenable;
2516         if (mask & IFCAP_TXCSUM) {
2517                 ifp->if_capenable ^= IFCAP_TXCSUM;
2518                 if ((ifp->if_capenable & IFCAP_TXCSUM) &&
2519                     sc->sc_carpdev != NULL) {
2520                         ifp->if_hwassist |=
2521                             (sc->sc_carpdev->if_hwassist &
2522                              (CSUM_IP | CSUM_UDP | CSUM_TCP));
2523                 } else {
2524                         ifp->if_hwassist &= ~(CSUM_IP | CSUM_UDP | CSUM_TCP);
2525                 }
2526         }
2527         if (mask & IFCAP_TSO) {
2528                 ifp->if_capenable ^= IFCAP_TSO;
2529                 if ((ifp->if_capenable & IFCAP_TSO) &&
2530                     sc->sc_carpdev != NULL) {
2531                         ifp->if_hwassist |=
2532                             (sc->sc_carpdev->if_hwassist & CSUM_TSO);
2533                 } else {
2534                         ifp->if_hwassist &= ~CSUM_TSO;
2535                 }
2536         }
2537
2538         lwkt_replymsg(&cmsg->base.lmsg, 0);
2539 }
2540
2541 static void
2542 carp_ioctl_ifcap(struct carp_softc *sc, int reqcap)
2543 {
2544         struct ifnet *ifp = &sc->arpcom.ac_if;
2545         struct netmsg_carp cmsg;
2546
2547         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2548         ifnet_deserialize_all(ifp);
2549
2550         bzero(&cmsg, sizeof(cmsg));
2551         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2552             carp_ioctl_ifcap_dispatch);
2553         cmsg.nc_softc = sc;
2554         cmsg.nc_data = &reqcap;
2555
2556         lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2557
2558         ifnet_serialize_all(ifp);
2559 }
2560
2561 static void
2562 carp_ioctl_getvh_dispatch(netmsg_t msg)
2563 {
2564         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2565         struct carp_softc *sc = cmsg->nc_softc;
2566         struct carpreq *carpr = cmsg->nc_data;
2567
2568         carpr->carpr_state = sc->sc_state;
2569         carpr->carpr_vhid = sc->sc_vhid;
2570         carpr->carpr_advbase = sc->sc_advbase;
2571         carpr->carpr_advskew = sc->sc_advskew;
2572         bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key));
2573
2574         lwkt_replymsg(&cmsg->base.lmsg, 0);
2575 }
2576
2577 static int
2578 carp_ioctl_getvh(struct carp_softc *sc, void *udata, struct ucred *cr)
2579 {
2580         struct ifnet *ifp = &sc->arpcom.ac_if;
2581         struct netmsg_carp cmsg;
2582         struct carpreq carpr;
2583         int error;
2584
2585         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2586         ifnet_deserialize_all(ifp);
2587
2588         bzero(&cmsg, sizeof(cmsg));
2589         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2590             carp_ioctl_getvh_dispatch);
2591         cmsg.nc_softc = sc;
2592         cmsg.nc_data = &carpr;
2593
2594         lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2595
2596         error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY);
2597         if (error)
2598                 bzero(carpr.carpr_key, sizeof(carpr.carpr_key));
2599
2600         error = copyout(&carpr, udata, sizeof(carpr));
2601
2602         ifnet_serialize_all(ifp);
2603         return error;
2604 }
2605
2606 static void
2607 carp_ioctl_getdevname_dispatch(netmsg_t msg)
2608 {
2609         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2610         struct carp_softc *sc = cmsg->nc_softc;
2611         char *devname = cmsg->nc_data;
2612
2613         bzero(devname, IFNAMSIZ);
2614         if (sc->sc_carpdev != NULL)
2615                 strlcpy(devname, sc->sc_carpdev->if_xname, IFNAMSIZ);
2616
2617         lwkt_replymsg(&cmsg->base.lmsg, 0);
2618 }
2619
2620 static int
2621 carp_ioctl_getdevname(struct carp_softc *sc, struct ifdrv *ifd)
2622 {
2623         struct ifnet *ifp = &sc->arpcom.ac_if;
2624         struct netmsg_carp cmsg;
2625         char devname[IFNAMSIZ];
2626         int error;
2627
2628         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2629
2630         if (ifd->ifd_len != sizeof(devname))
2631                 return EINVAL;
2632
2633         ifnet_deserialize_all(ifp);
2634
2635         bzero(&cmsg, sizeof(cmsg));
2636         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2637             carp_ioctl_getdevname_dispatch);
2638         cmsg.nc_softc = sc;
2639         cmsg.nc_data = devname;
2640
2641         lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2642
2643         error = copyout(devname, ifd->ifd_data, sizeof(devname));
2644
2645         ifnet_serialize_all(ifp);
2646         return error;
2647 }
2648
2649 static void
2650 carp_init_dispatch(netmsg_t msg)
2651 {
2652         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2653         struct carp_softc *sc = cmsg->nc_softc;
2654
2655         sc->sc_if.if_flags |= IFF_RUNNING;
2656         carp_hmac_prepare(sc);
2657         carp_set_state(sc, INIT);
2658         carp_setrun(sc, 0);
2659
2660         lwkt_replymsg(&cmsg->base.lmsg, 0);
2661 }
2662
2663 static void
2664 carp_init(void *xsc)
2665 {
2666         struct carp_softc *sc = xsc;
2667         struct ifnet *ifp = &sc->arpcom.ac_if;
2668         struct netmsg_carp cmsg;
2669
2670         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2671
2672         ifnet_deserialize_all(ifp);
2673
2674         bzero(&cmsg, sizeof(cmsg));
2675         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2676             carp_init_dispatch);
2677         cmsg.nc_softc = sc;
2678
2679         lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2680
2681         ifnet_serialize_all(ifp);
2682 }
2683
2684 static int
2685 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
2686     struct rtentry *rt)
2687 {
2688         struct carp_softc *sc = ifp->if_softc;
2689         struct ifnet *carpdev;
2690         int error = 0;
2691
2692         carpdev = sc->sc_carpdev;
2693         if (carpdev != NULL) {
2694                 /*
2695                  * NOTE:
2696                  * CARP's ifp is passed to backing device's
2697                  * if_output method.
2698                  */
2699                 carpdev->if_output(ifp, m, dst, rt);
2700         } else {
2701                 m_freem(m);
2702                 error = ENETUNREACH;
2703         }
2704         return error;
2705 }
2706
2707 /*
2708  * Start output on carp interface. This function should never be called.
2709  */
2710 static void
2711 carp_start(struct ifnet *ifp, struct ifaltq_subque *ifsq __unused)
2712 {
2713         panic("%s: start called", ifp->if_xname);
2714 }
2715
2716 static void
2717 carp_set_state(struct carp_softc *sc, int state)
2718 {
2719         struct ifnet *cifp = &sc->sc_if;
2720
2721         if (sc->sc_state == state)
2722                 return;
2723         sc->sc_state = state;
2724
2725         switch (sc->sc_state) {
2726         case BACKUP:
2727                 cifp->if_link_state = LINK_STATE_DOWN;
2728                 break;
2729
2730         case MASTER:
2731                 cifp->if_link_state = LINK_STATE_UP;
2732                 break;
2733
2734         default:
2735                 cifp->if_link_state = LINK_STATE_UNKNOWN;
2736                 break;
2737         }
2738         rt_ifmsg(cifp);
2739 }
2740
2741 void
2742 carp_group_demote_adj(struct ifnet *ifp, int adj)
2743 {
2744         struct ifg_list *ifgl;
2745         int *dm;
2746
2747         TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2748                 if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL))
2749                         continue;
2750                 dm = &ifgl->ifgl_group->ifg_carp_demoted;
2751
2752                 if (*dm + adj >= 0)
2753                         *dm += adj;
2754                 else
2755                         *dm = 0;
2756
2757                 if (adj > 0 && *dm == 1)
2758                         carp_send_ad_all();
2759                 CARP_LOG("%s demoted group %s to %d", ifp->if_xname,
2760                     ifgl->ifgl_group->ifg_group, *dm);
2761         }
2762 }
2763
2764 #ifdef foo
2765 void
2766 carp_carpdev_state(void *v)
2767 {
2768         struct carp_if *cif = v;
2769         struct carp_softc *sc;
2770
2771         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list)
2772                 carp_sc_state(sc);
2773 }
2774
2775 static void
2776 carp_sc_state(struct carp_softc *sc)
2777 {
2778         if (!(sc->sc_carpdev->if_flags & IFF_UP)) {
2779                 callout_stop(&sc->sc_ad_tmo);
2780                 callout_stop(&sc->sc_md_tmo);
2781                 callout_stop(&sc->sc_md6_tmo);
2782                 carp_set_state(sc, INIT);
2783                 carp_setrun(sc, 0);
2784                 if (!sc->sc_suppress) {
2785                         carp_suppress_preempt++;
2786                         if (carp_suppress_preempt == 1)
2787                                 carp_send_ad_all();
2788                 }
2789                 sc->sc_suppress = 1;
2790         } else {
2791                 carp_set_state(sc, INIT);
2792                 carp_setrun(sc, 0);
2793                 if (sc->sc_suppress)
2794                         carp_suppress_preempt--;
2795                 sc->sc_suppress = 0;
2796         }
2797 }
2798 #endif
2799
2800 static void
2801 carp_stop(struct carp_softc *sc, boolean_t detach)
2802 {
2803         sc->sc_if.if_flags &= ~IFF_RUNNING;
2804
2805         callout_stop(&sc->sc_ad_tmo);
2806         callout_stop(&sc->sc_md_tmo);
2807         callout_stop(&sc->sc_md6_tmo);
2808
2809         if (!detach && sc->sc_state == MASTER)
2810                 carp_send_ad(sc);
2811
2812         if (sc->sc_suppress)
2813                 carp_suppress_preempt--;
2814         sc->sc_suppress = 0;
2815
2816         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
2817                 carp_suppress_preempt--;
2818         sc->sc_sendad_errors = 0;
2819         sc->sc_sendad_success = 0;
2820
2821         carp_set_state(sc, INIT);
2822         carp_setrun(sc, 0);
2823 }
2824
2825 static void
2826 carp_suspend(struct carp_softc *sc, boolean_t detach)
2827 {
2828         struct ifnet *cifp = &sc->sc_if;
2829
2830         carp_stop(sc, detach);
2831
2832         /* Retain the running state, if we are not dead yet */
2833         if (!sc->sc_dead && (cifp->if_flags & IFF_UP))
2834                 cifp->if_flags |= IFF_RUNNING;
2835 }
2836
2837 static int
2838 carp_activate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
2839     struct ifnet *ifp, struct in_ifaddr *ia_if, int own)
2840 {
2841         struct ip_moptions *imo = &sc->sc_imo;
2842         struct carp_if *ocif = ifp->if_carp;
2843         int error;
2844
2845         KKASSERT(vha->vha_ia != NULL);
2846
2847         KASSERT(ia_if != NULL, ("NULL backing address"));
2848         KASSERT(vha->vha_iaback == NULL, ("%p is already activated", vha));
2849         KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0,
2850                 ("inactive vhaddr %p is the address owner", vha));
2851
2852         KASSERT(sc->sc_carpdev == NULL || sc->sc_carpdev == ifp,
2853                 ("%s is already on %s", sc->sc_if.if_xname,
2854                  sc->sc_carpdev->if_xname));
2855
2856         if (ocif == NULL) {
2857                 KASSERT(sc->sc_carpdev == NULL,
2858                         ("%s is already on %s", sc->sc_if.if_xname,
2859                          sc->sc_carpdev->if_xname));
2860
2861                 error = ifpromisc(ifp, 1);
2862                 if (error)
2863                         return error;
2864         } else {
2865                 struct carp_softc_container *scc;
2866
2867                 TAILQ_FOREACH(scc, ocif, scc_link) {
2868                         struct carp_softc *vr = scc->scc_softc;
2869
2870                         if (vr != sc && vr->sc_vhid == sc->sc_vhid)
2871                                 return EINVAL;
2872                 }
2873         }
2874
2875         ifp->if_carp = carp_if_insert(ocif, sc);
2876         KASSERT(ifp->if_carp != NULL, ("%s carp_if_insert failed", __func__));
2877
2878         sc->sc_ia = ia_if;
2879         sc->sc_carpdev = ifp;
2880         sc->arpcom.ac_if.if_hwassist = 0;
2881         if (sc->arpcom.ac_if.if_capenable & IFCAP_TXCSUM) {
2882                 sc->arpcom.ac_if.if_hwassist |=
2883                     (ifp->if_hwassist & (CSUM_IP | CSUM_UDP | CSUM_TCP));
2884         }
2885         if (sc->arpcom.ac_if.if_capenable & IFCAP_TSO)
2886                 sc->arpcom.ac_if.if_hwassist |= (ifp->if_hwassist & CSUM_TSO);
2887
2888         /*
2889          * Make sure that all protocol threads see the sc_carpdev and
2890          * if_carp changes
2891          */
2892         netmsg_service_sync();
2893
2894         if (ocif != NULL && ifp->if_carp != ocif) {
2895                 /*
2896                  * The old carp list could be safely free now,
2897                  * since no one can access it.
2898                  */
2899                 carp_if_free(ocif);
2900         }
2901
2902         vha->vha_iaback = ia_if;
2903         sc->sc_naddrs++;
2904
2905         if (own) {
2906                 vha->vha_flags |= CARP_VHAF_OWNER;
2907
2908                 /* XXX save user configured advskew? */
2909                 sc->sc_advskew = 0;
2910         }
2911
2912         carp_addroute_vhaddr(sc, vha);
2913
2914         /*
2915          * Join the multicast group only after the backing interface
2916          * has been hooked with the CARP interface.
2917          */
2918         KASSERT(imo->imo_multicast_ifp == NULL ||
2919                 imo->imo_multicast_ifp == &sc->sc_if,
2920                 ("%s didn't leave mcast group on %s",
2921                  sc->sc_if.if_xname, imo->imo_multicast_ifp->if_xname));
2922
2923         if (imo->imo_num_memberships == 0) {
2924                 struct in_addr addr;
2925
2926                 addr.s_addr = htonl(INADDR_CARP_GROUP);
2927                 imo->imo_membership[0] = in_addmulti(&addr, &sc->sc_if);
2928                 if (imo->imo_membership[0] == NULL) {
2929                         carp_deactivate_vhaddr(sc, vha, FALSE);
2930                         return ENOBUFS;
2931                 }
2932
2933                 imo->imo_num_memberships++;
2934                 imo->imo_multicast_ifp = &sc->sc_if;
2935                 imo->imo_multicast_ttl = CARP_DFLTTL;
2936                 imo->imo_multicast_loop = 0;
2937         }
2938
2939         carp_hmac_prepare(sc);
2940         carp_set_state(sc, INIT);
2941         carp_setrun(sc, 0);
2942         return 0;
2943 }
2944
2945 static void
2946 carp_deactivate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
2947     boolean_t del_iaback)
2948 {
2949         KKASSERT(vha->vha_ia != NULL);
2950
2951         carp_hmac_prepare(sc);
2952
2953         if (vha->vha_iaback == NULL) {
2954                 KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0,
2955                         ("inactive vhaddr %p is the address owner", vha));
2956                 return;
2957         }
2958
2959         vha->vha_flags &= ~CARP_VHAF_OWNER;
2960         carp_delroute_vhaddr(sc, vha, del_iaback);
2961
2962         KKASSERT(sc->sc_naddrs > 0);
2963         vha->vha_iaback = NULL;
2964         sc->sc_naddrs--;
2965         if (!sc->sc_naddrs) {
2966                 if (sc->sc_naddrs6) {
2967                         carp_multicast_cleanup(sc);
2968                         sc->sc_ia = NULL;
2969                 } else {
2970                         carp_detach(sc, FALSE, del_iaback);
2971                 }
2972         }
2973 }
2974
2975 static void
2976 carp_link_addrs(struct carp_softc *sc, struct ifnet *ifp, struct ifaddr *ifa_if)
2977 {
2978         struct carp_vhaddr *vha;
2979         struct in_ifaddr *ia_if;
2980
2981         KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET);
2982         ia_if = ifatoia(ifa_if);
2983
2984         /*
2985          * Test each inactive vhaddr against the newly added address.
2986          * If the newly added address could be the backing address,
2987          * then activate the matching vhaddr.
2988          */
2989         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
2990                 const struct in_ifaddr *ia;
2991                 int own;
2992
2993                 if (vha->vha_iaback != NULL)
2994                         continue;
2995
2996                 ia = vha->vha_ia;
2997                 if (ia->ia_subnetmask != ia_if->ia_subnetmask ||
2998                     ia->ia_subnet != ia_if->ia_subnet)
2999                         continue;
3000
3001                 own = 0;
3002                 if (ia->ia_addr.sin_addr.s_addr ==
3003                     ia_if->ia_addr.sin_addr.s_addr)
3004                         own = 1;
3005
3006                 carp_activate_vhaddr(sc, vha, ifp, ia_if, own);
3007         }
3008 }
3009
3010 static void
3011 carp_unlink_addrs(struct carp_softc *sc, struct ifnet *ifp,
3012                   struct ifaddr *ifa_if)
3013 {
3014         struct carp_vhaddr *vha;
3015         struct in_ifaddr *ia_if;
3016
3017         KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET);
3018         ia_if = ifatoia(ifa_if);
3019
3020         /*
3021          * Ad src address is deleted; set it to NULL.
3022          * Following loop will try pick up a new ad src address
3023          * if one of the vhaddr could retain its backing address.
3024          */
3025         if (sc->sc_ia == ia_if)
3026                 sc->sc_ia = NULL;
3027
3028         /*
3029          * Test each active vhaddr against the deleted address.
3030          * If the deleted address is vhaddr address's backing
3031          * address, then deactivate the vhaddr.
3032          */
3033         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
3034                 if (vha->vha_iaback == NULL)
3035                         continue;
3036
3037                 if (vha->vha_iaback == ia_if)
3038                         carp_deactivate_vhaddr(sc, vha, TRUE);
3039                 else if (sc->sc_ia == NULL)
3040                         sc->sc_ia = vha->vha_iaback;
3041         }
3042 }
3043
3044 static void
3045 carp_update_addrs(struct carp_softc *sc, struct ifaddr *ifa_del)
3046 {
3047         struct carp_vhaddr *vha;
3048
3049         KKASSERT(sc->sc_carpdev == NULL);
3050
3051         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
3052                 carp_config_vhaddr(sc, vha, ifatoia(ifa_del));
3053 }
3054
3055 static void
3056 carp_ifaddr(void *arg __unused, struct ifnet *ifp,
3057             enum ifaddr_event event, struct ifaddr *ifa)
3058 {
3059         struct carp_softc *sc;
3060
3061         if (ifa->ifa_addr->sa_family != AF_INET)
3062                 return;
3063
3064         KASSERT(&curthread->td_msgport == netisr_cpuport(0),
3065             ("not in netisr0"));
3066
3067         if (ifp->if_type == IFT_CARP) {
3068                 /*
3069                  * Address is changed on carp(4) interface
3070                  */
3071                 switch (event) {
3072                 case IFADDR_EVENT_ADD:
3073                         carp_add_addr(ifp->if_softc, ifa);
3074                         break;
3075
3076                 case IFADDR_EVENT_CHANGE:
3077                         carp_config_addr(ifp->if_softc, ifa);
3078                         break;
3079
3080                 case IFADDR_EVENT_DELETE:
3081                         carp_del_addr(ifp->if_softc, ifa);
3082                         break;
3083                 }
3084                 return;
3085         }
3086
3087         /*
3088          * Address is changed on non-carp(4) interface
3089          */
3090         if ((ifp->if_flags & IFF_MULTICAST) == 0)
3091                 return;
3092
3093         LIST_FOREACH(sc, &carpif_list, sc_next) {
3094                 if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp) {
3095                         /* Not the parent iface; skip */
3096                         continue;
3097                 }
3098
3099                 switch (event) {
3100                 case IFADDR_EVENT_ADD:
3101                         carp_link_addrs(sc, ifp, ifa);
3102                         break;
3103
3104                 case IFADDR_EVENT_DELETE:
3105                         if (sc->sc_carpdev != NULL) {
3106                                 carp_unlink_addrs(sc, ifp, ifa);
3107                                 if (sc->sc_carpdev == NULL) {
3108                                         /*
3109                                          * We no longer have the parent
3110                                          * interface, however, certain
3111                                          * virtual addresses, which are
3112                                          * not used because they can't
3113                                          * match the previous parent
3114                                          * interface's addresses, may now
3115                                          * match different interface's
3116                                          * addresses.
3117                                          */
3118                                         carp_update_addrs(sc, ifa);
3119                                 }
3120                         } else {
3121                                 /*
3122                                  * The carp(4) interface didn't have a
3123                                  * parent iface, so it is not possible
3124                                  * that it will contain any address to
3125                                  * be unlinked.
3126                                  */
3127                         }
3128                         break;
3129
3130                 case IFADDR_EVENT_CHANGE:
3131                         if (sc->sc_carpdev == NULL) {
3132                                 /*
3133                                  * The carp(4) interface didn't have a
3134                                  * parent iface, so it is not possible
3135                                  * that it will contain any address to
3136                                  * be updated.
3137                                  */
3138                                 carp_link_addrs(sc, ifp, ifa);
3139                         } else {
3140                                 /*
3141                                  * First try breaking tie with the old
3142                                  * address.  Then see whether we could
3143                                  * link certain vhaddr to the new address.
3144                                  * If that fails, i.e. carpdev is NULL,
3145                                  * we try a global update.
3146                                  *
3147                                  * NOTE: The above order is critical.
3148                                  */
3149                                 carp_unlink_addrs(sc, ifp, ifa);
3150                                 carp_link_addrs(sc, ifp, ifa);
3151                                 if (sc->sc_carpdev == NULL) {
3152                                         /*
3153                                          * See the comment in the above
3154                                          * IFADDR_EVENT_DELETE block.
3155                                          */
3156                                         carp_update_addrs(sc, NULL);
3157                                 }
3158                         }
3159                         break;
3160                 }
3161         }
3162 }
3163
3164 void
3165 carp_proto_ctlinput(netmsg_t msg)
3166 {
3167         int cmd = msg->ctlinput.nm_cmd;
3168         struct sockaddr *sa = msg->ctlinput.nm_arg;
3169         struct in_ifaddr_container *iac;
3170
3171         TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
3172                 struct in_ifaddr *ia = iac->ia;
3173                 struct ifnet *ifp = ia->ia_ifp;
3174
3175                 if (ifp->if_type == IFT_CARP)
3176                         continue;
3177
3178                 if (ia->ia_ifa.ifa_addr == sa) {
3179                         if (cmd == PRC_IFDOWN) {
3180                                 carp_ifaddr(NULL, ifp, IFADDR_EVENT_DELETE,
3181                                     &ia->ia_ifa);
3182                         } else if (cmd == PRC_IFUP) {
3183                                 carp_ifaddr(NULL, ifp, IFADDR_EVENT_ADD,
3184                                     &ia->ia_ifa);
3185                         }
3186                         break;
3187                 }
3188         }
3189
3190         lwkt_replymsg(&msg->lmsg, 0);
3191 }
3192
3193 struct ifnet *
3194 carp_parent(struct ifnet *cifp)
3195 {
3196         struct carp_softc *sc;
3197
3198         KKASSERT(cifp->if_type == IFT_CARP);
3199         sc = cifp->if_softc;
3200
3201         return sc->sc_carpdev;
3202 }
3203
3204 #define rtinitflags(x) \
3205         (((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) \
3206                  ? RTF_HOST : 0)
3207
3208 static int
3209 carp_addroute_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha)
3210 {
3211         struct in_ifaddr *ia, *iaback;
3212
3213         if (sc->sc_state != MASTER)
3214                 return 0;
3215
3216         ia = vha->vha_ia;
3217         KKASSERT(ia != NULL);
3218
3219         iaback = vha->vha_iaback;
3220         KKASSERT(iaback != NULL);
3221
3222         return rtchange(&iaback->ia_ifa, &ia->ia_ifa);
3223 }
3224
3225 static void
3226 carp_delroute_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
3227     boolean_t del_iaback)
3228 {
3229         struct in_ifaddr *ia, *iaback;
3230
3231         ia = vha->vha_ia;
3232         KKASSERT(ia != NULL);
3233
3234         iaback = vha->vha_iaback;
3235         KKASSERT(iaback != NULL);
3236
3237         if (!del_iaback && (iaback->ia_ifp->if_flags & IFF_UP)) {
3238                 rtchange(&ia->ia_ifa, &iaback->ia_ifa);
3239                 return;
3240         }
3241
3242         rtinit(&ia->ia_ifa, RTM_DELETE, rtinitflags(ia));
3243         in_ifadown_force(&ia->ia_ifa, 1);
3244         ia->ia_flags &= ~IFA_ROUTE;
3245 }
3246
3247 static int
3248 carp_modevent(module_t mod, int type, void *data)
3249 {
3250         switch (type) {
3251         case MOD_LOAD:
3252                 LIST_INIT(&carpif_list);
3253                 carp_ifdetach_event =
3254                 EVENTHANDLER_REGISTER(ifnet_detach_event, carp_ifdetach, NULL,
3255                                       EVENTHANDLER_PRI_ANY);
3256                 carp_ifaddr_event =
3257                 EVENTHANDLER_REGISTER(ifaddr_event, carp_ifaddr, NULL,
3258                                       EVENTHANDLER_PRI_FIRST);
3259                 if_clone_attach(&carp_cloner);
3260                 break;
3261
3262         case MOD_UNLOAD:
3263                 EVENTHANDLER_DEREGISTER(ifnet_detach_event,
3264                                         carp_ifdetach_event);
3265                 EVENTHANDLER_DEREGISTER(ifaddr_event,
3266                                         carp_ifaddr_event);
3267                 if_clone_detach(&carp_cloner);
3268                 break;
3269
3270         default:
3271                 return (EINVAL);
3272         }
3273         return (0);
3274 }
3275
3276 static moduledata_t carp_mod = {
3277         "carp",
3278         carp_modevent,
3279         0
3280 };
3281 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);