nrelease - fix/improve livecd
[dragonfly.git] / sys / netinet / ip_carp.c
1 /*
2  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
3  * Copyright (c) 2003 Ryan McBride. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
18  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
24  * THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 /*
27  * $FreeBSD: src/sys/netinet/ip_carp.c,v 1.48 2007/02/02 09:39:09 glebius Exp $
28  */
29
30 #include "opt_carp.h"
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/in_cksum.h>
38 #include <sys/limits.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/msgport2.h>
42 #include <sys/time.h>
43 #include <sys/proc.h>
44 #include <sys/caps.h>
45 #include <sys/sockio.h>
46 #include <sys/socket.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
49 #include <sys/thread.h>
50
51 #include <machine/stdarg.h>
52 #include <crypto/sha1.h>
53
54 #include <net/bpf.h>
55 #include <net/ethernet.h>
56 #include <net/if.h>
57 #include <net/if_dl.h>
58 #include <net/if_types.h>
59 #include <net/route.h>
60 #include <net/if_clone.h>
61 #include <net/if_var.h>
62 #include <net/ifq_var.h>
63 #include <net/netmsg2.h>
64 #include <net/netisr2.h>
65
66 #ifdef INET
67 #include <netinet/in.h>
68 #include <netinet/in_var.h>
69 #include <netinet/in_systm.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip_var.h>
72 #include <netinet/if_ether.h>
73 #endif
74
75 #ifdef INET6
76 #include <netinet/icmp6.h>
77 #include <netinet/ip6.h>
78 #include <netinet6/ip6_var.h>
79 #include <netinet6/scope6_var.h>
80 #include <netinet6/nd6.h>
81 #endif
82
83 #include <netinet/ip_carp.h>
84
85 /*
86  * Note about carp's MP safe approach:
87  *
88  * Brief: carp_softc (softc), carp_softc_container (scc)
89  *
90  * - All configuration operation, e.g. ioctl, add/delete inet addresses
91  *   is serialized by netisr0; not by carp's serializer
92  *
93  * - Backing interface's if_carp and carp_softc's relationship:
94  *
95  *                +---------+
96  *     if_carp -->| carp_if |
97  *                +---------+
98  *                     |
99  *                     |
100  *                     V      +---------+
101  *                  +-----+   |         |
102  *                  | scc |-->|  softc  |
103  *                  +-----+   |         |
104  *                     |      +---------+
105  *                     |
106  *                     V      +---------+
107  *                  +-----+   |         |
108  *                  | scc |-->|  softc  |
109  *                  +-----+   |         |
110  *                            +---------+
111  *
112  * - if_carp creation, modification and deletion all happen in netisr0,
113  *   as stated previously.  Since if_carp is accessed by multiple netisrs,
114  *   the modification to if_carp is conducted in the following way:
115  *
116  *   Adding carp_softc:
117  *
118  *   1) Duplicate the old carp_if to new carp_if (ncif), and insert the
119  *      to-be-added carp_softc to the new carp_if (ncif):
120  *
121  *        if_carp                     ncif
122  *           |                         |
123  *           V                         V
124  *      +---------+               +---------+
125  *      | carp_if |               | carp_if |
126  *      +---------+               +---------+
127  *           |                         |
128  *           |                         |
129  *           V        +-------+        V
130  *        +-----+     |       |     +-----+
131  *        | scc |---->| softc |<----| scc |
132  *        +-----+     |       |     +-----+
133  *           |        +-------+        |
134  *           |                         |
135  *           V        +-------+        V
136  *        +-----+     |       |     +-----+
137  *        | scc |---->| softc |<----| scc |
138  *        +-----+     |       |     +-----+
139  *                    +-------+        |
140  *                                     |
141  *                    +-------+        V
142  *                    |       |     +-----+
143  *                    | softc |<----| scc |
144  *                    |       |     +-----+
145  *                    +-------+
146  *
147  *   2) Switch save if_carp into ocif and switch if_carp to ncif:
148  *      
149  *          ocif                    if_carp
150  *           |                         |
151  *           V                         V
152  *      +---------+               +---------+
153  *      | carp_if |               | carp_if |
154  *      +---------+               +---------+
155  *           |                         |
156  *           |                         |
157  *           V        +-------+        V
158  *        +-----+     |       |     +-----+
159  *        | scc |---->| softc |<----| scc |
160  *        +-----+     |       |     +-----+
161  *           |        +-------+        |
162  *           |                         |
163  *           V        +-------+        V
164  *        +-----+     |       |     +-----+
165  *        | scc |---->| softc |<----| scc |
166  *        +-----+     |       |     +-----+
167  *                    +-------+        |
168  *                                     |
169  *                    +-------+        V
170  *                    |       |     +-----+
171  *                    | softc |<----| scc |
172  *                    |       |     +-----+
173  *                    +-------+
174  *
175  *   3) Run netmsg_service_sync(), which will make sure that
176  *      ocif is no longer accessed (all network operations
177  *      are happened only in network threads).
178  *   4) Free ocif -- only carp_if and scc are freed.
179  *
180  *
181  *   Removing carp_softc:
182  *
183  *   1) Duplicate the old carp_if to new carp_if (ncif); the to-be-deleted
184  *      carp_softc will not be duplicated.
185  *
186  *        if_carp                     ncif
187  *           |                         |
188  *           V                         V
189  *      +---------+               +---------+
190  *      | carp_if |               | carp_if |
191  *      +---------+               +---------+
192  *           |                         |
193  *           |                         |
194  *           V        +-------+        V
195  *        +-----+     |       |     +-----+
196  *        | scc |---->| softc |<----| scc |
197  *        +-----+     |       |     +-----+
198  *           |        +-------+        |
199  *           |                         |
200  *           V        +-------+        |
201  *        +-----+     |       |        |
202  *        | scc |---->| softc |        |
203  *        +-----+     |       |        |
204  *           |        +-------+        |
205  *           |                         |
206  *           V        +-------+        V
207  *        +-----+     |       |     +-----+
208  *        | scc |---->| softc |<----| scc |
209  *        +-----+     |       |     +-----+
210  *                    +-------+
211  *
212  *   2) Switch save if_carp into ocif and switch if_carp to ncif:
213  *      
214  *          ocif                    if_carp
215  *           |                         |
216  *           V                         V
217  *      +---------+               +---------+
218  *      | carp_if |               | carp_if |
219  *      +---------+               +---------+
220  *           |                         |
221  *           |                         |
222  *           V        +-------+        V
223  *        +-----+     |       |     +-----+
224  *        | scc |---->| softc |<----| scc |
225  *        +-----+     |       |     +-----+
226  *           |        +-------+        |
227  *           |                         |
228  *           V        +-------+        |
229  *        +-----+     |       |        |
230  *        | scc |---->| softc |        |
231  *        +-----+     |       |        |
232  *           |        +-------+        |
233  *           |                         |
234  *           V        +-------+        V
235  *        +-----+     |       |     +-----+
236  *        | scc |---->| softc |<----| scc |
237  *        +-----+     |       |     +-----+
238  *                    +-------+
239  *
240  *   3) Run netmsg_service_sync(), which will make sure that
241  *      ocif is no longer accessed (all network operations
242  *      are happened only in network threads).
243  *   4) Free ocif -- only carp_if and scc are freed.
244  *
245  * - if_carp accessing:
246  *   The accessing code should cache the if_carp in a local temporary
247  *   variable and accessing the temporary variable along the code path
248  *   instead of accessing if_carp later on.
249  */
250
251 #define CARP_IFNAME             "carp"
252 #define CARP_IS_RUNNING(ifp)    \
253         (((ifp)->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING))
254
255 struct carp_softc;
256
257 struct carp_vhaddr {
258         uint32_t                vha_flags;      /* CARP_VHAF_ */
259         struct in_ifaddr        *vha_ia;        /* carp address */
260         struct in_ifaddr        *vha_iaback;    /* backing address */
261         TAILQ_ENTRY(carp_vhaddr) vha_link;
262 };
263 TAILQ_HEAD(carp_vhaddr_list, carp_vhaddr);
264
265 struct netmsg_carp {
266         struct netmsg_base      base;
267         struct ifnet            *nc_carpdev;
268         struct carp_softc       *nc_softc;
269         void                    *nc_data;
270         size_t                  nc_datalen;
271 };
272
273 struct carp_softc {
274         struct arpcom            arpcom;
275         struct ifnet            *sc_carpdev;    /* parent interface */
276         struct carp_vhaddr_list  sc_vha_list;   /* virtual addr list */
277
278         const struct in_ifaddr  *sc_ia;         /* primary iface address v4 */
279         struct ip_moptions       sc_imo;
280
281 #ifdef INET6
282         struct in6_ifaddr       *sc_ia6;        /* primary iface address v6 */
283         struct ip6_moptions      sc_im6o;
284 #endif /* INET6 */
285
286         enum { INIT = 0, BACKUP, MASTER }
287                                  sc_state;
288         boolean_t                sc_dead;
289
290         int                      sc_suppress;
291
292         int                      sc_sendad_errors;
293 #define CARP_SENDAD_MAX_ERRORS  3
294         int                      sc_sendad_success;
295 #define CARP_SENDAD_MIN_SUCCESS 3
296
297         int                      sc_vhid;
298         int                      sc_advskew;
299         int                      sc_naddrs;     /* actually used IPv4 vha */
300         int                      sc_naddrs6;
301         int                      sc_advbase;    /* seconds */
302         int                      sc_init_counter;
303         uint64_t                 sc_counter;
304
305         /* authentication */
306 #define CARP_HMAC_PAD   64
307         unsigned char            sc_key[CARP_KEY_LEN];
308         unsigned char            sc_pad[CARP_HMAC_PAD];
309         SHA1_CTX                 sc_sha1;
310
311         struct callout           sc_ad_tmo;     /* advertisement timeout */
312         struct netmsg_carp       sc_ad_msg;     /* adv timeout netmsg */
313         struct callout           sc_md_tmo;     /* ip4 master down timeout */
314         struct callout           sc_md6_tmo;    /* ip6 master down timeout */
315         struct netmsg_carp       sc_md_msg;     /* master down timeout netmsg */
316
317         LIST_ENTRY(carp_softc)   sc_next;       /* Interface clue */
318 };
319
320 #define sc_if   arpcom.ac_if
321
322 struct carp_softc_container {
323         TAILQ_ENTRY(carp_softc_container) scc_link;
324         struct carp_softc       *scc_softc;
325 };
326 TAILQ_HEAD(carp_if, carp_softc_container);
327
328 SYSCTL_DECL(_net_inet_carp);
329
330 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0, 1 }; /* XXX for now */
331 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
332     &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
333 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
334     &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
335 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
336     &carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
337 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
338     &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
339 SYSCTL_INT(_net_inet_carp, CARPCTL_SETROUTE, setroute, CTLFLAG_RW,
340     &carp_opts[CARPCTL_SETROUTE], 0, "set route");
341
342 static int carp_suppress_preempt = 0;
343 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
344     &carp_suppress_preempt, 0, "Preemption is suppressed");
345
346 static int carp_prio_ad = 1;
347 SYSCTL_INT(_net_inet_carp, OID_AUTO, prio_ad, CTLFLAG_RD,
348     &carp_prio_ad, 0, "Prioritize advertisement packet");
349
350 static struct carpstats carpstats;
351 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
352     &carpstats, carpstats,
353     "CARP statistics (struct carpstats, netinet/ip_carp.h)");
354
355 #define CARP_LOG(...)   do {                            \
356         if (carp_opts[CARPCTL_LOG] > 0)                 \
357                 log(LOG_INFO, __VA_ARGS__);             \
358 } while (0)
359
360 #define CARP_DEBUG(...) do {                            \
361         if (carp_opts[CARPCTL_LOG] > 1)                 \
362                 log(LOG_DEBUG, __VA_ARGS__);            \
363 } while (0)
364
365 static struct lwkt_token carp_listtok = LWKT_TOKEN_INITIALIZER(carp_list_token);
366
367 static void     carp_hmac_prepare(struct carp_softc *);
368 static void     carp_hmac_generate(struct carp_softc *, uint32_t *,
369                     unsigned char *);
370 static int      carp_hmac_verify(struct carp_softc *, uint32_t *,
371                     unsigned char *);
372 static void     carp_setroute(struct carp_softc *, int);
373 static void     carp_proto_input_c(struct carp_softc *, struct mbuf *,
374                     struct carp_header *, sa_family_t);
375 static int      carp_clone_create(struct if_clone *, int, caddr_t, caddr_t);
376 static int      carp_clone_destroy(struct ifnet *);
377 static void     carp_detach(struct carp_softc *, boolean_t, boolean_t);
378 static void     carp_prepare_ad(struct carp_softc *, struct carp_header *);
379 static void     carp_send_ad_all(void);
380 static void     carp_send_ad_timeout(void *);
381 static void     carp_send_ad(struct carp_softc *);
382 static void     carp_send_arp(struct carp_softc *);
383 static void     carp_master_down_timeout(void *);
384 static void     carp_master_down(struct carp_softc *);
385 static void     carp_setrun(struct carp_softc *, sa_family_t);
386 static void     carp_set_state(struct carp_softc *, int);
387 static struct ifnet *carp_forus(struct carp_if *, const uint8_t *);
388
389 static void     carp_init(void *);
390 static int      carp_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
391 static int      carp_output(struct ifnet *, struct mbuf *, struct sockaddr *,
392                     struct rtentry *);
393 static void     carp_start(struct ifnet *, struct ifaltq_subque *);
394
395 static void     carp_multicast_cleanup(struct carp_softc *);
396 static void     carp_add_addr(struct carp_softc *, struct ifaddr *);
397 static void     carp_del_addr(struct carp_softc *, struct ifaddr *);
398 static void     carp_config_addr(struct carp_softc *, struct ifaddr *);
399 static void     carp_link_addrs(struct carp_softc *, struct ifnet *,
400                     struct ifaddr *);
401 static void     carp_unlink_addrs(struct carp_softc *, struct ifnet *,
402                     struct ifaddr *);
403 static void     carp_update_addrs(struct carp_softc *, struct ifaddr *);
404
405 static int      carp_config_vhaddr(struct carp_softc *, struct carp_vhaddr *,
406                     struct in_ifaddr *);
407 static int      carp_activate_vhaddr(struct carp_softc *, struct carp_vhaddr *,
408                     struct ifnet *, struct in_ifaddr *, int);
409 static void     carp_deactivate_vhaddr(struct carp_softc *,
410                     struct carp_vhaddr *, boolean_t);
411 static int      carp_addroute_vhaddr(struct carp_softc *, struct carp_vhaddr *);
412 static void     carp_delroute_vhaddr(struct carp_softc *, struct carp_vhaddr *,
413                     boolean_t);
414
415 #ifdef foo
416 static void     carp_sc_state(struct carp_softc *);
417 #endif
418 #ifdef INET6
419 static void     carp_send_na(struct carp_softc *);
420 #ifdef notyet
421 static int      carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
422 static int      carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
423 #endif
424 static void     carp_multicast6_cleanup(struct carp_softc *);
425 #endif
426 static void     carp_stop(struct carp_softc *, boolean_t);
427 static void     carp_suspend(struct carp_softc *, boolean_t);
428 static void     carp_ioctl_stop(struct carp_softc *);
429 static int      carp_ioctl_setvh(struct carp_softc *, void *, struct ucred *);
430 static void     carp_ioctl_ifcap(struct carp_softc *, int);
431 static int      carp_ioctl_getvh(struct carp_softc *, void *, struct ucred *);
432 static int      carp_ioctl_getdevname(struct carp_softc *, struct ifdrv *);
433 static int      carp_ioctl_getvhaddr(struct carp_softc *, struct ifdrv *);
434
435 static struct carp_if *carp_if_remove(struct carp_if *, struct carp_softc *);
436 static struct carp_if *carp_if_insert(struct carp_if *, struct carp_softc *);
437 static void     carp_if_free(struct carp_if *);
438
439 static void     carp_ifaddr(void *, struct ifnet *, enum ifaddr_event,
440                             struct ifaddr *);
441 static void     carp_ifdetach(void *, struct ifnet *);
442
443 static void     carp_ifdetach_dispatch(netmsg_t);
444 static void     carp_clone_destroy_dispatch(netmsg_t);
445 static void     carp_init_dispatch(netmsg_t);
446 static void     carp_ioctl_stop_dispatch(netmsg_t);
447 static void     carp_ioctl_setvh_dispatch(netmsg_t);
448 static void     carp_ioctl_ifcap_dispatch(netmsg_t);
449 static void     carp_ioctl_getvh_dispatch(netmsg_t);
450 static void     carp_ioctl_getdevname_dispatch(netmsg_t);
451 static void     carp_ioctl_getvhaddr_dispatch(netmsg_t);
452 static void     carp_send_ad_timeout_dispatch(netmsg_t);
453 static void     carp_master_down_timeout_dispatch(netmsg_t);
454
455 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
456
457 static LIST_HEAD(, carp_softc) carpif_list;
458
459 static struct if_clone carp_cloner =
460 IF_CLONE_INITIALIZER(CARP_IFNAME, carp_clone_create, carp_clone_destroy,
461                      0, IF_MAXUNIT);
462
463 static const uint8_t    carp_etheraddr[ETHER_ADDR_LEN] =
464         { 0, 0, 0x5e, 0, 1, 0 };
465
466 static eventhandler_tag carp_ifdetach_event;
467 static eventhandler_tag carp_ifaddr_event;
468
469 static __inline void
470 carp_insert_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha_new)
471 {
472         struct carp_vhaddr *vha;
473         u_long new_addr, addr;
474
475         KKASSERT((vha_new->vha_flags & CARP_VHAF_ONLIST) == 0);
476
477         /*
478          * Virtual address list is sorted; smaller one first
479          */
480         new_addr = ntohl(vha_new->vha_ia->ia_addr.sin_addr.s_addr);
481
482         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
483                 addr = ntohl(vha->vha_ia->ia_addr.sin_addr.s_addr);
484
485                 if (addr > new_addr)
486                         break;
487         }
488         if (vha == NULL)
489                 TAILQ_INSERT_TAIL(&sc->sc_vha_list, vha_new, vha_link);
490         else
491                 TAILQ_INSERT_BEFORE(vha, vha_new, vha_link);
492         vha_new->vha_flags |= CARP_VHAF_ONLIST;
493 }
494
495 static __inline void
496 carp_remove_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha)
497 {
498         KKASSERT(vha->vha_flags & CARP_VHAF_ONLIST);
499         vha->vha_flags &= ~CARP_VHAF_ONLIST;
500         TAILQ_REMOVE(&sc->sc_vha_list, vha, vha_link);
501 }
502
503 static void
504 carp_hmac_prepare(struct carp_softc *sc)
505 {
506         uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
507         uint8_t vhid = sc->sc_vhid & 0xff;
508         int i;
509 #ifdef INET6
510         struct ifaddr_container *ifac;
511         struct in6_addr in6;
512 #endif
513 #ifdef INET
514         struct carp_vhaddr *vha;
515 #endif
516
517         /* XXX: possible race here */
518
519         /* compute ipad from key */
520         bzero(sc->sc_pad, sizeof(sc->sc_pad));
521         bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
522         for (i = 0; i < sizeof(sc->sc_pad); i++)
523                 sc->sc_pad[i] ^= 0x36;
524
525         /* precompute first part of inner hash */
526         SHA1Init(&sc->sc_sha1);
527         SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
528         SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
529         SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
530         SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
531 #ifdef INET
532         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
533                 SHA1Update(&sc->sc_sha1,
534                     (const uint8_t *)&vha->vha_ia->ia_addr.sin_addr,
535                     sizeof(struct in_addr));
536         }
537 #endif /* INET */
538 #ifdef INET6
539         TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
540                 struct ifaddr *ifa = ifac->ifa;
541
542                 if (ifa->ifa_addr->sa_family == AF_INET6) {
543                         in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
544                         in6_clearscope(&in6);
545                         SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6));
546                 }
547         }
548 #endif /* INET6 */
549
550         /* convert ipad to opad */
551         for (i = 0; i < sizeof(sc->sc_pad); i++)
552                 sc->sc_pad[i] ^= 0x36 ^ 0x5c;
553 }
554
555 static void
556 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
557     unsigned char md[20])
558 {
559         SHA1_CTX sha1ctx;
560
561         /* fetch first half of inner hash */
562         bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
563
564         SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
565         SHA1Final(md, &sha1ctx);
566
567         /* outer hash */
568         SHA1Init(&sha1ctx);
569         SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
570         SHA1Update(&sha1ctx, md, 20);
571         SHA1Final(md, &sha1ctx);
572 }
573
574 static int
575 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
576     unsigned char md[20])
577 {
578         unsigned char md2[20];
579
580         carp_hmac_generate(sc, counter, md2);
581         return (bcmp(md, md2, sizeof(md2)));
582 }
583
584 static void
585 carp_setroute(struct carp_softc *sc, int cmd)
586 {
587 #ifdef INET6
588         struct ifaddr_container *ifac;
589 #endif
590         struct carp_vhaddr *vha;
591
592         KKASSERT(cmd == RTM_DELETE || cmd == RTM_ADD);
593
594         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
595                 if (vha->vha_iaback == NULL)
596                         continue;
597                 if (cmd == RTM_DELETE)
598                         carp_delroute_vhaddr(sc, vha, FALSE);
599                 else
600                         carp_addroute_vhaddr(sc, vha);
601         }
602
603 #ifdef INET6
604         TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
605                 struct ifaddr *ifa = ifac->ifa;
606
607                 if (ifa->ifa_addr->sa_family == AF_INET6) {
608                         if (cmd == RTM_ADD)
609                                 in6_ifaddloop(ifa);
610                         else
611                                 in6_ifremloop(ifa);
612                 }
613         }
614 #endif /* INET6 */
615 }
616
617 static int
618 carp_clone_create(struct if_clone *ifc, int unit,
619                   caddr_t params __unused, caddr_t data __unused)
620 {
621         struct carp_softc *sc;
622         struct ifnet *ifp;
623
624         sc = kmalloc(sizeof(*sc), M_CARP, M_WAITOK | M_ZERO);
625         ifp = &sc->sc_if;
626
627         sc->sc_suppress = 0;
628         sc->sc_advbase = CARP_DFLTINTV;
629         sc->sc_vhid = -1;       /* required setting */
630         sc->sc_advskew = 0;
631         sc->sc_init_counter = 1;
632         sc->sc_naddrs = 0;
633         sc->sc_naddrs6 = 0;
634
635         TAILQ_INIT(&sc->sc_vha_list);
636
637 #ifdef INET6
638         sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
639 #endif
640
641         callout_init_mp(&sc->sc_ad_tmo);
642         netmsg_init(&sc->sc_ad_msg.base, NULL, &netisr_adone_rport,
643             MSGF_DROPABLE | MSGF_PRIORITY, carp_send_ad_timeout_dispatch);
644         sc->sc_ad_msg.nc_softc = sc;
645
646         callout_init_mp(&sc->sc_md_tmo);
647         callout_init_mp(&sc->sc_md6_tmo);
648         netmsg_init(&sc->sc_md_msg.base, NULL, &netisr_adone_rport,
649             MSGF_DROPABLE | MSGF_PRIORITY, carp_master_down_timeout_dispatch);
650         sc->sc_md_msg.nc_softc = sc;
651
652         if_initname(ifp, CARP_IFNAME, unit);
653         ifp->if_softc = sc;
654         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
655         ifp->if_init = carp_init;
656         ifp->if_ioctl = carp_ioctl;
657         ifp->if_start = carp_start;
658
659         ifp->if_capabilities = IFCAP_TXCSUM | IFCAP_TSO;
660         ifp->if_capenable = ifp->if_capabilities;
661         /*
662          * Leave if_hwassist as it is; if_hwassist will be
663          * setup when this carp interface has parent.
664          */
665
666         ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
667         ifq_set_ready(&ifp->if_snd);
668
669         ether_ifattach(ifp, carp_etheraddr, NULL);
670
671         ifp->if_type = IFT_CARP;
672         ifp->if_output = carp_output;
673
674         lwkt_gettoken(&carp_listtok);
675         LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
676         lwkt_reltoken(&carp_listtok);
677
678         return (0);
679 }
680
681 static void
682 carp_clone_destroy_dispatch(netmsg_t msg)
683 {
684         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
685         struct carp_softc *sc = cmsg->nc_softc;
686
687         sc->sc_dead = TRUE;
688         carp_detach(sc, TRUE, FALSE);
689
690         callout_cancel(&sc->sc_ad_tmo);
691         callout_cancel(&sc->sc_md_tmo);
692         callout_cancel(&sc->sc_md6_tmo);
693
694         crit_enter();
695         lwkt_dropmsg(&sc->sc_ad_msg.base.lmsg);
696         lwkt_dropmsg(&sc->sc_md_msg.base.lmsg);
697         crit_exit();
698
699         lwkt_replymsg(&cmsg->base.lmsg, 0);
700 }
701
702 static int
703 carp_clone_destroy(struct ifnet *ifp)
704 {
705         struct carp_softc *sc = ifp->if_softc;
706         struct netmsg_carp cmsg;
707
708         bzero(&cmsg, sizeof(cmsg));
709         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
710             carp_clone_destroy_dispatch);
711         cmsg.nc_softc = sc;
712
713         lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
714
715         lwkt_gettoken(&carp_listtok);
716         LIST_REMOVE(sc, sc_next);
717         lwkt_reltoken(&carp_listtok);
718
719         bpfdetach(ifp);
720         if_detach(ifp);
721
722         KASSERT(sc->sc_naddrs == 0, ("certain inet address is still active"));
723         kfree(sc, M_CARP);
724
725         return 0;
726 }
727
728 static struct carp_if *
729 carp_if_remove(struct carp_if *ocif, struct carp_softc *sc)
730 {
731         struct carp_softc_container *oscc, *scc;
732         struct carp_if *cif;
733         int count = 0;
734 #ifdef INVARIANTS
735         int found = 0;
736 #endif
737
738         TAILQ_FOREACH(oscc, ocif, scc_link) {
739                 ++count;
740 #ifdef INVARIANTS
741                 if (oscc->scc_softc == sc)
742                         found = 1;
743 #endif
744         }
745         KASSERT(found, ("%s carp_softc is not on carp_if", __func__));
746
747         if (count == 1) {
748                 /* Last one is going to be unlinked */
749                 return NULL;
750         }
751
752         cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
753         TAILQ_INIT(cif);
754
755         TAILQ_FOREACH(oscc, ocif, scc_link) {
756                 if (oscc->scc_softc == sc)
757                         continue;
758
759                 scc = kmalloc(sizeof(*scc), M_CARP, M_WAITOK | M_ZERO);
760                 scc->scc_softc = oscc->scc_softc;
761                 TAILQ_INSERT_TAIL(cif, scc, scc_link);
762         }
763
764         return cif;
765 }
766
767 static struct carp_if *
768 carp_if_insert(struct carp_if *ocif, struct carp_softc *sc)
769 {
770         struct carp_softc_container *oscc;
771         int onlist;
772
773         onlist = 0;
774         if (ocif != NULL) {
775                 TAILQ_FOREACH(oscc, ocif, scc_link) {
776                         if (oscc->scc_softc == sc)
777                                 onlist = 1;
778                 }
779         }
780
781 #ifdef INVARIANTS
782         if (sc->sc_carpdev != NULL) {
783                 KASSERT(onlist, ("%s is not on %s carp list",
784                     sc->sc_if.if_xname, sc->sc_carpdev->if_xname));
785         } else {
786                 KASSERT(!onlist, ("%s is already on carp list",
787                     sc->sc_if.if_xname));
788         }
789 #endif
790
791         if (!onlist) {
792                 struct carp_if *cif;
793                 struct carp_softc_container *new_scc, *scc;
794                 int inserted = 0;
795
796                 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
797                 TAILQ_INIT(cif);
798
799                 new_scc = kmalloc(sizeof(*new_scc), M_CARP, M_WAITOK | M_ZERO);
800                 new_scc->scc_softc = sc;
801
802                 if (ocif != NULL) {
803                         TAILQ_FOREACH(oscc, ocif, scc_link) {
804                                 if (!inserted &&
805                                     oscc->scc_softc->sc_vhid > sc->sc_vhid) {
806                                         TAILQ_INSERT_TAIL(cif, new_scc,
807                                             scc_link);
808                                         inserted = 1;
809                                 }
810
811                                 scc = kmalloc(sizeof(*scc), M_CARP,
812                                     M_WAITOK | M_ZERO);
813                                 scc->scc_softc = oscc->scc_softc;
814                                 TAILQ_INSERT_TAIL(cif, scc, scc_link);
815                         }
816                 }
817                 if (!inserted)
818                         TAILQ_INSERT_TAIL(cif, new_scc, scc_link);
819
820                 return cif;
821         } else {
822                 return ocif;
823         }
824 }
825
826 static void
827 carp_if_free(struct carp_if *cif)
828 {
829         struct carp_softc_container *scc;
830
831         while ((scc = TAILQ_FIRST(cif)) != NULL) {
832                 TAILQ_REMOVE(cif, scc, scc_link);
833                 kfree(scc, M_CARP);
834         }
835         kfree(cif, M_CARP);
836 }
837
838 static void
839 carp_detach(struct carp_softc *sc, boolean_t detach, boolean_t del_iaback)
840 {
841         carp_suspend(sc, detach);
842
843         carp_multicast_cleanup(sc);
844 #ifdef INET6
845         carp_multicast6_cleanup(sc);
846 #endif
847
848         if (!sc->sc_dead && detach) {
849                 struct carp_vhaddr *vha;
850
851                 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
852                         carp_deactivate_vhaddr(sc, vha, del_iaback);
853                 KKASSERT(sc->sc_naddrs == 0);
854         }
855
856         if (sc->sc_carpdev != NULL) {
857                 struct ifnet *ifp = sc->sc_carpdev;
858                 struct carp_if *ocif = ifp->if_carp;
859
860                 ifp->if_carp = carp_if_remove(ocif, sc);
861                 KASSERT(ifp->if_carp != ocif,
862                     ("%s carp_if_remove failed", __func__));
863
864                 sc->sc_carpdev = NULL;
865                 sc->sc_ia = NULL;
866                 sc->arpcom.ac_if.if_hwassist = 0;
867
868                 /*
869                  * Make sure that all protocol threads see the
870                  * sc_carpdev and if_carp changes
871                  */
872                 netmsg_service_sync();
873
874                 if (ifp->if_carp == NULL) {
875                         /*
876                          * No more carp interfaces using
877                          * ifp as the backing interface,
878                          * move it out of promiscous mode.
879                          */
880                         ifpromisc(ifp, 0);
881                 }
882
883                 /*
884                  * The old carp list could be safely free now,
885                  * since no one can access it.
886                  */
887                 carp_if_free(ocif);
888         }
889 }
890
891 static void
892 carp_ifdetach_dispatch(netmsg_t msg)
893 {
894         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
895         struct ifnet *ifp = cmsg->nc_carpdev;
896
897         while (ifp->if_carp) {
898                 struct carp_softc_container *scc;
899
900                 scc = TAILQ_FIRST((struct carp_if *)(ifp->if_carp));
901                 carp_detach(scc->scc_softc, TRUE, TRUE);
902         }
903         lwkt_replymsg(&cmsg->base.lmsg, 0);
904 }
905
906 /* Detach an interface from the carp. */
907 static void
908 carp_ifdetach(void *arg __unused, struct ifnet *ifp)
909 {
910         struct netmsg_carp cmsg;
911
912         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
913
914         bzero(&cmsg, sizeof(cmsg));
915         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
916             carp_ifdetach_dispatch);
917         cmsg.nc_carpdev = ifp;
918
919         lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
920 }
921
922 /*
923  * process input packet.
924  * we have rearranged checks order compared to the rfc,
925  * but it seems more efficient this way or not possible otherwise.
926  */
927 int
928 carp_proto_input(struct mbuf **mp, int *offp, int proto)
929 {
930         struct mbuf *m = *mp;
931         struct ip *ip = mtod(m, struct ip *);
932         struct ifnet *ifp = m->m_pkthdr.rcvif;
933         struct carp_header *ch;
934         struct carp_softc *sc;
935         int len, iphlen;
936
937         iphlen = *offp;
938         *mp = NULL;
939
940         carpstats.carps_ipackets++;
941
942         if (!carp_opts[CARPCTL_ALLOW]) {
943                 m_freem(m);
944                 goto back;
945         }
946
947         /* Check if received on a valid carp interface */
948         if (ifp->if_type != IFT_CARP) {
949                 carpstats.carps_badif++;
950                 CARP_LOG("carp_proto_input: packet received on non-carp "
951                     "interface: %s\n", ifp->if_xname);
952                 m_freem(m);
953                 goto back;
954         }
955
956         if (!CARP_IS_RUNNING(ifp)) {
957                 carpstats.carps_badif++;
958                 CARP_LOG("carp_proto_input: packet received on stopped carp "
959                     "interface: %s\n", ifp->if_xname);
960                 m_freem(m);
961                 goto back;
962         }
963
964         sc = ifp->if_softc;
965         if (sc->sc_carpdev == NULL) {
966                 carpstats.carps_badif++;
967                 CARP_LOG("carp_proto_input: packet received on defunc carp "
968                     "interface: %s\n", ifp->if_xname);
969                 m_freem(m);
970                 goto back;
971         }
972
973         if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
974                 carpstats.carps_badif++;
975                 CARP_LOG("carp_proto_input: non-mcast packet on "
976                     "interface: %s\n", ifp->if_xname);
977                 m_freem(m);
978                 goto back;
979         }
980
981         /* Verify that the IP TTL is CARP_DFLTTL. */
982         if (ip->ip_ttl != CARP_DFLTTL) {
983                 carpstats.carps_badttl++;
984                 CARP_LOG("carp_proto_input: received ttl %d != %d on %s\n",
985                     ip->ip_ttl, CARP_DFLTTL, ifp->if_xname);
986                 m_freem(m);
987                 goto back;
988         }
989
990         /* Minimal CARP packet size */
991         len = iphlen + sizeof(*ch);
992
993         /*
994          * Verify that the received packet length is
995          * not less than the CARP header
996          */
997         if (m->m_pkthdr.len < len) {
998                 carpstats.carps_badlen++;
999                 CARP_LOG("packet too short %d on %s\n", m->m_pkthdr.len,
1000                     ifp->if_xname);
1001                 m_freem(m);
1002                 goto back;
1003         }
1004
1005         /* Make sure that CARP header is contiguous */
1006         if (len > m->m_len) {
1007                 m = m_pullup(m, len);
1008                 if (m == NULL) {
1009                         carpstats.carps_hdrops++;
1010                         CARP_LOG("carp_proto_input: m_pullup failed\n");
1011                         goto back;
1012                 }
1013                 ip = mtod(m, struct ip *);
1014         }
1015         ch = (struct carp_header *)((uint8_t *)ip + iphlen);
1016
1017         /* Verify the CARP checksum */
1018         if (in_cksum_skip(m, len, iphlen)) {
1019                 carpstats.carps_badsum++;
1020                 CARP_LOG("carp_proto_input: checksum failed on %s\n",
1021                     ifp->if_xname);
1022                 m_freem(m);
1023                 goto back;
1024         }
1025         carp_proto_input_c(sc, m, ch, AF_INET);
1026 back:
1027         return(IPPROTO_DONE);
1028 }
1029
1030 #ifdef INET6
1031 int
1032 carp6_proto_input(struct mbuf **mp, int *offp, int proto)
1033 {
1034         struct mbuf *m = *mp;
1035         struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1036         struct ifnet *ifp = m->m_pkthdr.rcvif;
1037         struct carp_header *ch;
1038         struct carp_softc *sc;
1039         u_int len;
1040
1041         carpstats.carps_ipackets6++;
1042
1043         if (!carp_opts[CARPCTL_ALLOW]) {
1044                 m_freem(m);
1045                 goto back;
1046         }
1047
1048         /* check if received on a valid carp interface */
1049         if (ifp->if_type != IFT_CARP) {
1050                 carpstats.carps_badif++;
1051                 CARP_LOG("carp6_proto_input: packet received on non-carp "
1052                     "interface: %s\n", ifp->if_xname);
1053                 m_freem(m);
1054                 goto back;
1055         }
1056
1057         if (!CARP_IS_RUNNING(ifp)) {
1058                 carpstats.carps_badif++;
1059                 CARP_LOG("carp_proto_input: packet received on stopped carp "
1060                     "interface: %s\n", ifp->if_xname);
1061                 m_freem(m);
1062                 goto back;
1063         }
1064
1065         sc = ifp->if_softc;
1066         if (sc->sc_carpdev == NULL) {
1067                 carpstats.carps_badif++;
1068                 CARP_LOG("carp6_proto_input: packet received on defunc-carp "
1069                     "interface: %s\n", ifp->if_xname);
1070                 m_freem(m);
1071                 goto back;
1072         }
1073
1074         /* verify that the IP TTL is 255 */
1075         if (ip6->ip6_hlim != CARP_DFLTTL) {
1076                 carpstats.carps_badttl++;
1077                 CARP_LOG("carp6_proto_input: received ttl %d != 255 on %s\n",
1078                     ip6->ip6_hlim, ifp->if_xname);
1079                 m_freem(m);
1080                 goto back;
1081         }
1082
1083         /* verify that we have a complete carp packet */
1084         len = m->m_len;
1085         IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
1086         if (ch == NULL) {
1087                 carpstats.carps_badlen++;
1088                 CARP_LOG("carp6_proto_input: packet size %u too small\n", len);
1089                 goto back;
1090         }
1091
1092         /* verify the CARP checksum */
1093         if (in_cksum_range(m, 0, *offp, sizeof(*ch))) {
1094                 carpstats.carps_badsum++;
1095                 CARP_LOG("carp6_proto_input: checksum failed, on %s\n",
1096                     ifp->if_xname);
1097                 m_freem(m);
1098                 goto back;
1099         }
1100
1101         carp_proto_input_c(sc, m, ch, AF_INET6);
1102 back:
1103         return (IPPROTO_DONE);
1104 }
1105 #endif /* INET6 */
1106
1107 static void
1108 carp_proto_input_c(struct carp_softc *sc, struct mbuf *m,
1109     struct carp_header *ch, sa_family_t af)
1110 {
1111         struct ifnet *cifp;
1112         uint64_t tmp_counter;
1113         struct timeval sc_tv, ch_tv;
1114
1115         if (sc->sc_vhid != ch->carp_vhid) {
1116                 /*
1117                  * CARP uses multicast, however, multicast packets
1118                  * are tapped to all CARP interfaces on the physical
1119                  * interface receiving the CARP packets, so we don't
1120                  * update any stats here.
1121                  */
1122                 m_freem(m);
1123                 return;
1124         }
1125         cifp = &sc->sc_if;
1126
1127         /* verify the CARP version. */
1128         if (ch->carp_version != CARP_VERSION) {
1129                 carpstats.carps_badver++;
1130                 CARP_LOG("%s; invalid version %d\n", cifp->if_xname,
1131                          ch->carp_version);
1132                 m_freem(m);
1133                 return;
1134         }
1135
1136         /* verify the hash */
1137         if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
1138                 carpstats.carps_badauth++;
1139                 CARP_LOG("%s: incorrect hash\n", cifp->if_xname);
1140                 m_freem(m);
1141                 return;
1142         }
1143
1144         tmp_counter = ntohl(ch->carp_counter[0]);
1145         tmp_counter = tmp_counter<<32;
1146         tmp_counter += ntohl(ch->carp_counter[1]);
1147
1148         /* XXX Replay protection goes here */
1149
1150         sc->sc_init_counter = 0;
1151         sc->sc_counter = tmp_counter;
1152
1153         sc_tv.tv_sec = sc->sc_advbase;
1154         if (carp_suppress_preempt && sc->sc_advskew <  240)
1155                 sc_tv.tv_usec = 240 * 1000000 / 256;
1156         else
1157                 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1158         ch_tv.tv_sec = ch->carp_advbase;
1159         ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
1160
1161         switch (sc->sc_state) {
1162         case INIT:
1163                 break;
1164
1165         case MASTER:
1166                 /*
1167                  * If we receive an advertisement from a master who's going to
1168                  * be more frequent than us, go into BACKUP state.
1169                  */
1170                 if (timevalcmp(&sc_tv, &ch_tv, >) ||
1171                     timevalcmp(&sc_tv, &ch_tv, ==)) {
1172                         callout_stop(&sc->sc_ad_tmo);
1173                         CARP_DEBUG("%s: MASTER -> BACKUP "
1174                            "(more frequent advertisement received)\n",
1175                            cifp->if_xname);
1176                         carp_set_state(sc, BACKUP);
1177                         carp_setrun(sc, 0);
1178                         if (carp_opts[CARPCTL_SETROUTE])
1179                                 carp_setroute(sc, RTM_DELETE);
1180                 }
1181                 break;
1182
1183         case BACKUP:
1184                 /*
1185                  * If we're pre-empting masters who advertise slower than us,
1186                  * and this one claims to be slower, treat him as down.
1187                  */
1188                 if (carp_opts[CARPCTL_PREEMPT] &&
1189                     timevalcmp(&sc_tv, &ch_tv, <)) {
1190                         CARP_DEBUG("%s: BACKUP -> MASTER "
1191                             "(preempting a slower master)\n", cifp->if_xname);
1192                         carp_master_down(sc);
1193                         break;
1194                 }
1195
1196                 /*
1197                  *  If the master is going to advertise at such a low frequency
1198                  *  that he's guaranteed to time out, we'd might as well just
1199                  *  treat him as timed out now.
1200                  */
1201                 sc_tv.tv_sec = sc->sc_advbase * 3;
1202                 if (timevalcmp(&sc_tv, &ch_tv, <)) {
1203                         CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n",
1204                                    cifp->if_xname);
1205                         carp_master_down(sc);
1206                         break;
1207                 }
1208
1209                 /*
1210                  * Otherwise, we reset the counter and wait for the next
1211                  * advertisement.
1212                  */
1213                 carp_setrun(sc, af);
1214                 break;
1215         }
1216         m_freem(m);
1217 }
1218
1219 struct mbuf *
1220 carp_input(void *v, struct mbuf *m)
1221 {
1222         struct carp_if *cif = v;
1223         struct ether_header *eh;
1224         struct carp_softc_container *scc;
1225         struct ifnet *ifp;
1226
1227         eh = mtod(m, struct ether_header *);
1228
1229         ifp = carp_forus(cif, eh->ether_dhost);
1230         if (ifp != NULL) {
1231                 ether_reinput_oncpu(ifp, m, REINPUT_RUNBPF);
1232                 return NULL;
1233         }
1234
1235         if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
1236                 return m;
1237
1238         /*
1239          * XXX Should really check the list of multicast addresses
1240          * for each CARP interface _before_ copying.
1241          */
1242         TAILQ_FOREACH(scc, cif, scc_link) {
1243                 struct carp_softc *sc = scc->scc_softc;
1244                 struct mbuf *m0;
1245
1246                 if ((sc->sc_if.if_flags & IFF_UP) == 0)
1247                         continue;
1248
1249                 m0 = m_dup(m, M_NOWAIT);
1250                 if (m0 == NULL)
1251                         continue;
1252
1253                 ether_reinput_oncpu(&sc->sc_if, m0, REINPUT_RUNBPF);
1254         }
1255         return m;
1256 }
1257
1258 static void
1259 carp_prepare_ad(struct carp_softc *sc, struct carp_header *ch)
1260 {
1261         if (sc->sc_init_counter) {
1262                 /* this could also be seconds since unix epoch */
1263                 sc->sc_counter = karc4random();
1264                 sc->sc_counter = sc->sc_counter << 32;
1265                 sc->sc_counter += karc4random();
1266         } else {
1267                 sc->sc_counter++;
1268         }
1269
1270         ch->carp_counter[0] = htonl((sc->sc_counter >> 32) & 0xffffffff);
1271         ch->carp_counter[1] = htonl(sc->sc_counter & 0xffffffff);
1272
1273         carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
1274 }
1275
1276 static void
1277 carp_send_ad_all(void)
1278 {
1279         struct carp_softc *sc;
1280
1281         LIST_FOREACH(sc, &carpif_list, sc_next) {
1282                 if (sc->sc_carpdev == NULL)
1283                         continue;
1284
1285                 if (CARP_IS_RUNNING(&sc->sc_if) && sc->sc_state == MASTER)
1286                         carp_send_ad(sc);
1287         }
1288 }
1289
1290 static void
1291 carp_send_ad_timeout(void *xsc)
1292 {
1293         struct carp_softc *sc = xsc;
1294         struct netmsg_carp *cmsg = &sc->sc_ad_msg;
1295
1296         KASSERT(mycpuid == 0, ("%s not on cpu0 but on cpu%d",
1297             __func__, mycpuid));
1298
1299         crit_enter();
1300         if (cmsg->base.lmsg.ms_flags & MSGF_DONE)
1301                 lwkt_sendmsg_oncpu(netisr_cpuport(0), &cmsg->base.lmsg);
1302         crit_exit();
1303 }
1304
1305 static void
1306 carp_send_ad_timeout_dispatch(netmsg_t msg)
1307 {
1308         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1309         struct carp_softc *sc = cmsg->nc_softc;
1310
1311         /* Reply ASAP */
1312         crit_enter();
1313         lwkt_replymsg(&cmsg->base.lmsg, 0);
1314         crit_exit();
1315
1316         carp_send_ad(sc);
1317 }
1318
1319 static void
1320 carp_send_ad(struct carp_softc *sc)
1321 {
1322         struct ifnet *cifp = &sc->sc_if;
1323         struct carp_header ch;
1324         struct timeval tv;
1325         struct carp_header *ch_ptr;
1326         struct mbuf *m;
1327         int len, advbase, advskew;
1328
1329         if (!CARP_IS_RUNNING(cifp)) {
1330                 /* Bow out */
1331                 advbase = 255;
1332                 advskew = 255;
1333         } else {
1334                 advbase = sc->sc_advbase;
1335                 if (!carp_suppress_preempt || sc->sc_advskew > 240)
1336                         advskew = sc->sc_advskew;
1337                 else
1338                         advskew = 240;
1339                 tv.tv_sec = advbase;
1340                 tv.tv_usec = advskew * 1000000 / 256;
1341         }
1342
1343         ch.carp_version = CARP_VERSION;
1344         ch.carp_type = CARP_ADVERTISEMENT;
1345         ch.carp_vhid = sc->sc_vhid;
1346         ch.carp_advbase = advbase;
1347         ch.carp_advskew = advskew;
1348         ch.carp_authlen = 7;    /* XXX DEFINE */
1349         ch.carp_pad1 = 0;       /* must be zero */
1350         ch.carp_cksum = 0;
1351
1352 #ifdef INET
1353         if (sc->sc_ia != NULL) {
1354                 struct ip *ip;
1355
1356                 MGETHDR(m, M_NOWAIT, MT_HEADER);
1357                 if (m == NULL) {
1358                         IFNET_STAT_INC(cifp, oerrors, 1);
1359                         carpstats.carps_onomem++;
1360                         /* XXX maybe less ? */
1361                         if (advbase != 255 || advskew != 255)
1362                                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1363                                     carp_send_ad_timeout, sc);
1364                         return;
1365                 }
1366                 len = sizeof(*ip) + sizeof(ch);
1367                 m->m_pkthdr.len = len;
1368                 m->m_pkthdr.rcvif = NULL;
1369                 m->m_len = len;
1370                 MH_ALIGN(m, m->m_len);
1371                 m->m_flags |= M_MCAST;
1372                 if (carp_prio_ad)
1373                         m->m_flags |= M_PRIO;
1374                 ip = mtod(m, struct ip *);
1375                 ip->ip_v = IPVERSION;
1376                 ip->ip_hl = sizeof(*ip) >> 2;
1377                 ip->ip_tos = IPTOS_LOWDELAY;
1378                 ip->ip_len = htons(len);
1379                 ip->ip_id = ip_newid();
1380                 ip->ip_off = htons(IP_DF);
1381                 ip->ip_ttl = CARP_DFLTTL;
1382                 ip->ip_p = IPPROTO_CARP;
1383                 ip->ip_sum = 0;
1384                 ip->ip_src = sc->sc_ia->ia_addr.sin_addr;
1385                 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
1386
1387                 ch_ptr = (struct carp_header *)(&ip[1]);
1388                 bcopy(&ch, ch_ptr, sizeof(ch));
1389                 carp_prepare_ad(sc, ch_ptr);
1390                 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip));
1391
1392                 getmicrotime(&cifp->if_lastchange);
1393                 IFNET_STAT_INC(cifp, opackets, 1);
1394                 IFNET_STAT_INC(cifp, obytes, len);
1395                 carpstats.carps_opackets++;
1396
1397                 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) {
1398                         IFNET_STAT_INC(cifp, oerrors, 1);
1399                         if (sc->sc_sendad_errors < INT_MAX)
1400                                 sc->sc_sendad_errors++;
1401                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1402                                 carp_suppress_preempt++;
1403                                 if (carp_suppress_preempt == 1) {
1404                                         carp_send_ad_all();
1405                                 }
1406                         }
1407                         sc->sc_sendad_success = 0;
1408                 } else {
1409                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1410                                 if (++sc->sc_sendad_success >=
1411                                     CARP_SENDAD_MIN_SUCCESS) {
1412                                         carp_suppress_preempt--;
1413                                         sc->sc_sendad_errors = 0;
1414                                 }
1415                         } else {
1416                                 sc->sc_sendad_errors = 0;
1417                         }
1418                 }
1419         }
1420 #endif /* INET */
1421 #ifdef INET6
1422         if (sc->sc_ia6) {
1423                 struct ip6_hdr *ip6;
1424
1425                 MGETHDR(m, M_NOWAIT, MT_HEADER);
1426                 if (m == NULL) {
1427                         IFNET_STAT_INC(cifp, oerrors, 1);
1428                         carpstats.carps_onomem++;
1429                         /* XXX maybe less ? */
1430                         if (advbase != 255 || advskew != 255)
1431                                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1432                                     carp_send_ad_timeout, sc);
1433                         return;
1434                 }
1435                 len = sizeof(*ip6) + sizeof(ch);
1436                 m->m_pkthdr.len = len;
1437                 m->m_pkthdr.rcvif = NULL;
1438                 m->m_len = len;
1439                 MH_ALIGN(m, m->m_len);
1440                 m->m_flags |= M_MCAST;
1441                 ip6 = mtod(m, struct ip6_hdr *);
1442                 bzero(ip6, sizeof(*ip6));
1443                 ip6->ip6_vfc |= IPV6_VERSION;
1444                 ip6->ip6_hlim = CARP_DFLTTL;
1445                 ip6->ip6_nxt = IPPROTO_CARP;
1446                 bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src,
1447                     sizeof(struct in6_addr));
1448                 /* set the multicast destination */
1449
1450                 ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
1451                 ip6->ip6_dst.s6_addr8[15] = 0x12;
1452                 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
1453                         IFNET_STAT_INC(cifp, oerrors, 1);
1454                         m_freem(m);
1455                         CARP_LOG("%s: in6_setscope failed\n", __func__);
1456                         return;
1457                 }
1458
1459                 ch_ptr = (struct carp_header *)(&ip6[1]);
1460                 bcopy(&ch, ch_ptr, sizeof(ch));
1461                 carp_prepare_ad(sc, ch_ptr);
1462                 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip6));
1463
1464                 getmicrotime(&cifp->if_lastchange);
1465                 IFNET_STAT_INC(cifp, opackets, 1);
1466                 IFNET_STAT_INC(cifp, obytes, len);
1467                 carpstats.carps_opackets6++;
1468
1469                 if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) {
1470                         IFNET_STAT_INC(cifp, oerrors, 1);
1471                         if (sc->sc_sendad_errors < INT_MAX)
1472                                 sc->sc_sendad_errors++;
1473                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1474                                 carp_suppress_preempt++;
1475                                 if (carp_suppress_preempt == 1) {
1476                                         carp_send_ad_all();
1477                                 }
1478                         }
1479                         sc->sc_sendad_success = 0;
1480                 } else {
1481                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1482                                 if (++sc->sc_sendad_success >=
1483                                     CARP_SENDAD_MIN_SUCCESS) {
1484                                         carp_suppress_preempt--;
1485                                         sc->sc_sendad_errors = 0;
1486                                 }
1487                         } else {
1488                                 sc->sc_sendad_errors = 0;
1489                         }
1490                 }
1491         }
1492 #endif /* INET6 */
1493
1494         if (advbase != 255 || advskew != 255)
1495                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1496                     carp_send_ad_timeout, sc);
1497 }
1498
1499 /*
1500  * Broadcast a gratuitous ARP request containing
1501  * the virtual router MAC address for each IP address
1502  * associated with the virtual router.
1503  */
1504 static void
1505 carp_send_arp(struct carp_softc *sc)
1506 {
1507         const struct carp_vhaddr *vha;
1508
1509         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1510                 if (vha->vha_iaback == NULL)
1511                         continue;
1512                 arp_gratuitous(&sc->sc_if, &vha->vha_ia->ia_ifa);
1513         }
1514 }
1515
1516 #ifdef INET6
1517 static void
1518 carp_send_na(struct carp_softc *sc)
1519 {
1520         struct ifaddr_container *ifac;
1521         struct in6_addr *in6;
1522         static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1523
1524         TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
1525                 struct ifaddr *ifa = ifac->ifa;
1526
1527                 if (ifa->ifa_addr->sa_family != AF_INET6)
1528                         continue;
1529
1530                 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1531                 nd6_na_output(sc->sc_carpdev, &mcast, in6,
1532                     ND_NA_FLAG_OVERRIDE, 1, NULL);
1533                 DELAY(1000);    /* XXX */
1534         }
1535 }
1536 #endif /* INET6 */
1537
1538 #ifdef notyet
1539 static __inline const struct carp_vhaddr *
1540 carp_find_addr(const struct carp_softc *sc, const struct in_addr *addr)
1541 {
1542         struct carp_vhaddr *vha;
1543
1544         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1545                 if (vha->vha_iaback == NULL)
1546                         continue;
1547
1548                 if (vha->vha_ia->ia_addr.sin_addr.s_addr == addr->s_addr)
1549                         return vha;
1550         }
1551         return NULL;
1552 }
1553
1554 static int
1555 carp_iamatch_balance(const struct carp_if *cif, const struct in_addr *itaddr,
1556                      const struct in_addr *isaddr, uint8_t **enaddr)
1557 {
1558         const struct carp_softc *vh;
1559         int index, count = 0;
1560
1561         /*
1562          * XXX proof of concept implementation.
1563          * We use the source ip to decide which virtual host should
1564          * handle the request. If we're master of that virtual host,
1565          * then we respond, otherwise, just drop the arp packet on
1566          * the floor.
1567          */
1568
1569         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1570                 if (!CARP_IS_RUNNING(&vh->sc_if))
1571                         continue;
1572
1573                 if (carp_find_addr(vh, itaddr) != NULL)
1574                         count++;
1575         }
1576         if (count == 0)
1577                 return 0;
1578
1579         /* this should be a hash, like pf_hash() */
1580         index = ntohl(isaddr->s_addr) % count;
1581         count = 0;
1582
1583         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1584                 if (!CARP_IS_RUNNING(&vh->sc_if))
1585                         continue;
1586
1587                 if (carp_find_addr(vh, itaddr) == NULL)
1588                         continue;
1589
1590                 if (count == index) {
1591                         if (vh->sc_state == MASTER) {
1592                                 *enaddr = IF_LLADDR(&vh->sc_if);
1593                                 return 1;
1594                         } else {
1595                                 return 0;
1596                         }
1597                 }
1598                 count++;
1599         }
1600         return 0;
1601 }
1602 #endif
1603
1604 int
1605 carp_iamatch(const struct in_ifaddr *ia)
1606 {
1607         const struct carp_softc *sc = ia->ia_ifp->if_softc;
1608
1609         ASSERT_NETISR0;
1610
1611 #ifdef notyet
1612         if (carp_opts[CARPCTL_ARPBALANCE])
1613                 return carp_iamatch_balance(cif, itaddr, isaddr, enaddr);
1614 #endif
1615
1616         if (!CARP_IS_RUNNING(&sc->sc_if) || sc->sc_state != MASTER)
1617                 return 0;
1618
1619         return 1;
1620 }
1621
1622 #ifdef INET6
1623 struct ifaddr *
1624 carp_iamatch6(void *v, struct in6_addr *taddr)
1625 {
1626 #ifdef foo
1627         struct carp_if *cif = v;
1628         struct carp_softc *vh;
1629
1630         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1631                 struct ifaddr_container *ifac;
1632
1633                 TAILQ_FOREACH(ifac, &vh->sc_if.if_addrheads[mycpuid],
1634                               ifa_link) {
1635                         struct ifaddr *ifa = ifac->ifa;
1636
1637                         if (IN6_ARE_ADDR_EQUAL(taddr,
1638                             &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1639                             CARP_IS_RUNNING(&vh->sc_if) &&
1640                             vh->sc_state == MASTER) {
1641                                 return (ifa);
1642                         }
1643                 }
1644         }
1645 #endif
1646         return (NULL);
1647 }
1648
1649 void *
1650 carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr)
1651 {
1652 #ifdef foo
1653         struct m_tag *mtag;
1654         struct carp_if *cif = v;
1655         struct carp_softc *sc;
1656
1657         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
1658                 struct ifaddr_container *ifac;
1659
1660                 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid],
1661                               ifa_link) {
1662                         struct ifaddr *ifa = ifac->ifa;
1663
1664                         if (IN6_ARE_ADDR_EQUAL(taddr,
1665                             &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1666                             CARP_IS_RUNNING(&sc->sc_if)) {
1667                                 struct ifnet *ifp = &sc->sc_if;
1668
1669                                 mtag = m_tag_get(PACKET_TAG_CARP,
1670                                     sizeof(struct ifnet *), M_NOWAIT);
1671                                 if (mtag == NULL) {
1672                                         /* better a bit than nothing */
1673                                         return (IF_LLADDR(ifp));
1674                                 }
1675                                 bcopy(&ifp, (caddr_t)(mtag + 1),
1676                                     sizeof(struct ifnet *));
1677                                 m_tag_prepend(m, mtag);
1678
1679                                 return (IF_LLADDR(ifp));
1680                         }
1681                 }
1682         }
1683 #endif
1684         return (NULL);
1685 }
1686 #endif
1687
1688 static struct ifnet *
1689 carp_forus(struct carp_if *cif, const uint8_t *dhost)
1690 {
1691         struct carp_softc_container *scc;
1692
1693         if (memcmp(dhost, carp_etheraddr, ETHER_ADDR_LEN - 1) != 0)
1694                 return NULL;
1695
1696         TAILQ_FOREACH(scc, cif, scc_link) {
1697                 struct carp_softc *sc = scc->scc_softc;
1698                 struct ifnet *ifp = &sc->sc_if;
1699
1700                 if (CARP_IS_RUNNING(ifp) && sc->sc_state == MASTER &&
1701                     !bcmp(dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN))
1702                         return ifp;
1703         }
1704         return NULL;
1705 }
1706
1707 static void
1708 carp_master_down_timeout(void *xsc)
1709 {
1710         struct carp_softc *sc = xsc;
1711         struct netmsg_carp *cmsg = &sc->sc_md_msg;
1712
1713         KASSERT(mycpuid == 0, ("%s not on cpu0 but on cpu%d",
1714             __func__, mycpuid));
1715
1716         crit_enter();
1717         if (cmsg->base.lmsg.ms_flags & MSGF_DONE)
1718                 lwkt_sendmsg_oncpu(netisr_cpuport(0), &cmsg->base.lmsg);
1719         crit_exit();
1720 }
1721
1722 static void
1723 carp_master_down_timeout_dispatch(netmsg_t msg)
1724 {
1725         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1726         struct carp_softc *sc = cmsg->nc_softc;
1727
1728         /* Reply ASAP */
1729         crit_enter();
1730         lwkt_replymsg(&cmsg->base.lmsg, 0);
1731         crit_exit();
1732
1733         CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n",
1734                    sc->sc_if.if_xname);
1735         carp_master_down(sc);
1736 }
1737
1738 static void
1739 carp_master_down(struct carp_softc *sc)
1740 {
1741         switch (sc->sc_state) {
1742         case INIT:
1743                 kprintf("%s: master_down event in INIT state\n",
1744                         sc->sc_if.if_xname);
1745                 break;
1746
1747         case MASTER:
1748                 break;
1749
1750         case BACKUP:
1751                 carp_set_state(sc, MASTER);
1752                 carp_send_ad(sc);
1753                 carp_send_arp(sc);
1754 #ifdef INET6
1755                 carp_send_na(sc);
1756 #endif /* INET6 */
1757                 carp_setrun(sc, 0);
1758                 if (carp_opts[CARPCTL_SETROUTE])
1759                         carp_setroute(sc, RTM_ADD);
1760                 break;
1761         }
1762 }
1763
1764 /*
1765  * When in backup state, af indicates whether to reset the master down timer
1766  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1767  */
1768 static void
1769 carp_setrun(struct carp_softc *sc, sa_family_t af)
1770 {
1771         struct ifnet *cifp = &sc->sc_if;
1772         struct timeval tv;
1773
1774         if (sc->sc_carpdev == NULL) {
1775                 carp_set_state(sc, INIT);
1776                 return;
1777         }
1778
1779         if ((cifp->if_flags & IFF_RUNNING) && sc->sc_vhid > 0 &&
1780             (sc->sc_naddrs || sc->sc_naddrs6)) {
1781                 /* Nothing */
1782         } else {
1783                 if (carp_opts[CARPCTL_SETROUTE])
1784                         carp_setroute(sc, RTM_DELETE);
1785                 return;
1786         }
1787
1788         switch (sc->sc_state) {
1789         case INIT:
1790                 if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) {
1791                         carp_send_ad(sc);
1792                         carp_send_arp(sc);
1793 #ifdef INET6
1794                         carp_send_na(sc);
1795 #endif /* INET6 */
1796                         CARP_DEBUG("%s: INIT -> MASTER (preempting)\n",
1797                                    cifp->if_xname);
1798                         carp_set_state(sc, MASTER);
1799                         if (carp_opts[CARPCTL_SETROUTE])
1800                                 carp_setroute(sc, RTM_ADD);
1801                 } else {
1802                         CARP_DEBUG("%s: INIT -> BACKUP\n", cifp->if_xname);
1803                         carp_set_state(sc, BACKUP);
1804                         if (carp_opts[CARPCTL_SETROUTE])
1805                                 carp_setroute(sc, RTM_DELETE);
1806                         carp_setrun(sc, 0);
1807                 }
1808                 break;
1809
1810         case BACKUP:
1811                 callout_stop(&sc->sc_ad_tmo);
1812                 tv.tv_sec = 3 * sc->sc_advbase;
1813                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1814                 switch (af) {
1815 #ifdef INET
1816                 case AF_INET:
1817                         callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1818                             carp_master_down_timeout, sc);
1819                         break;
1820 #endif /* INET */
1821 #ifdef INET6
1822                 case AF_INET6:
1823                         callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1824                             carp_master_down_timeout, sc);
1825                         break;
1826 #endif /* INET6 */
1827                 default:
1828                         if (sc->sc_naddrs)
1829                                 callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1830                                     carp_master_down_timeout, sc);
1831                         if (sc->sc_naddrs6)
1832                                 callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1833                                     carp_master_down_timeout, sc);
1834                         break;
1835                 }
1836                 break;
1837
1838         case MASTER:
1839                 tv.tv_sec = sc->sc_advbase;
1840                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1841                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1842                     carp_send_ad_timeout, sc);
1843                 break;
1844         }
1845 }
1846
1847 static void
1848 carp_multicast_cleanup(struct carp_softc *sc)
1849 {
1850         struct ip_moptions *imo = &sc->sc_imo;
1851
1852         if (imo->imo_num_memberships == 0)
1853                 return;
1854         KKASSERT(imo->imo_num_memberships == 1);
1855
1856         in_delmulti(imo->imo_membership[0]);
1857         imo->imo_membership[0] = NULL;
1858         imo->imo_num_memberships = 0;
1859         imo->imo_multicast_ifp = NULL;
1860 }
1861
1862 #ifdef INET6
1863 static void
1864 carp_multicast6_cleanup(struct carp_softc *sc)
1865 {
1866         struct ip6_moptions *im6o = &sc->sc_im6o;
1867
1868         while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1869                 struct in6_multi_mship *imm =
1870                     LIST_FIRST(&im6o->im6o_memberships);
1871
1872                 LIST_REMOVE(imm, i6mm_chain);
1873                 in6_leavegroup(imm);
1874         }
1875         im6o->im6o_multicast_ifp = NULL;
1876 }
1877 #endif
1878
1879 static void
1880 carp_ioctl_getvhaddr_dispatch(netmsg_t msg)
1881 {
1882         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1883         struct carp_softc *sc = cmsg->nc_softc;
1884         const struct carp_vhaddr *vha;
1885         struct ifcarpvhaddr *carpa, *carpa0;
1886         int count, len, error = 0;
1887
1888         count = 0;
1889         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
1890                 ++count;
1891
1892         if (cmsg->nc_datalen == 0) {
1893                 cmsg->nc_datalen = count * sizeof(*carpa);
1894                 goto back;
1895         } else if (count == 0 || cmsg->nc_datalen < sizeof(*carpa)) {
1896                 cmsg->nc_datalen = 0;
1897                 goto back;
1898         }
1899         len = min(cmsg->nc_datalen, sizeof(*carpa) * count);
1900         KKASSERT(len >= sizeof(*carpa));
1901
1902         carpa0 = carpa = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO);
1903         if (carpa == NULL) {
1904                 error = ENOMEM; 
1905                 goto back;
1906         }
1907
1908         count = 0;
1909         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1910                 if (len < sizeof(*carpa))
1911                         break;
1912
1913                 carpa->carpa_flags = vha->vha_flags;
1914                 carpa->carpa_addr.sin_family = AF_INET;
1915                 carpa->carpa_addr.sin_addr = vha->vha_ia->ia_addr.sin_addr;
1916
1917                 carpa->carpa_baddr.sin_family = AF_INET;
1918                 if (vha->vha_iaback == NULL) {
1919                         carpa->carpa_baddr.sin_addr.s_addr = INADDR_ANY;
1920                 } else {
1921                         carpa->carpa_baddr.sin_addr =
1922                         vha->vha_iaback->ia_addr.sin_addr;
1923                 }
1924
1925                 ++carpa;
1926                 ++count;
1927                 len -= sizeof(*carpa);
1928         }
1929         cmsg->nc_datalen = sizeof(*carpa) * count;
1930         KKASSERT(cmsg->nc_datalen > 0);
1931
1932         cmsg->nc_data = carpa0;
1933
1934 back:
1935         lwkt_replymsg(&cmsg->base.lmsg, error);
1936 }
1937
1938 static int
1939 carp_ioctl_getvhaddr(struct carp_softc *sc, struct ifdrv *ifd)
1940 {
1941         struct ifnet *ifp = &sc->arpcom.ac_if;
1942         struct netmsg_carp cmsg;
1943         int error;
1944
1945         ASSERT_IFNET_SERIALIZED_ALL(ifp);
1946         ifnet_deserialize_all(ifp);
1947
1948         bzero(&cmsg, sizeof(cmsg));
1949         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
1950             carp_ioctl_getvhaddr_dispatch);
1951         cmsg.nc_softc = sc;
1952         cmsg.nc_datalen = ifd->ifd_len;
1953
1954         error = lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
1955
1956         if (!error) {
1957                 if (cmsg.nc_data != NULL) {
1958                         error = copyout(cmsg.nc_data, ifd->ifd_data,
1959                             cmsg.nc_datalen);
1960                         kfree(cmsg.nc_data, M_TEMP);
1961                 }
1962                 ifd->ifd_len = cmsg.nc_datalen;
1963         } else {
1964                 KASSERT(cmsg.nc_data == NULL,
1965                     ("%s temp vhaddr is alloc upon error", __func__));
1966         }
1967
1968         ifnet_serialize_all(ifp);
1969         return error;
1970 }
1971
1972 static int
1973 carp_config_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
1974     struct in_ifaddr *ia_del)
1975 {
1976         struct ifnet *ifp;
1977         struct in_ifaddr *ia_if;
1978         const struct in_ifaddr *ia_vha;
1979         struct in_ifaddr_container *iac;
1980         int own, ia_match_carpdev;
1981
1982         KKASSERT(vha->vha_ia != NULL);
1983         ia_vha = vha->vha_ia;
1984
1985         ia_if = NULL;
1986         own = 0;
1987         ia_match_carpdev = 0;
1988         TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
1989                 struct in_ifaddr *ia = iac->ia;
1990
1991                 if (ia == ia_del)
1992                         continue;
1993
1994                 if (ia->ia_ifp->if_type == IFT_CARP)
1995                         continue;
1996
1997                 if ((ia->ia_ifp->if_flags & IFF_UP) == 0)
1998                         continue;
1999
2000                 /* and, yeah, we need a multicast-capable iface too */
2001                 if ((ia->ia_ifp->if_flags & IFF_MULTICAST) == 0)
2002                         continue;
2003
2004                 if (ia_vha->ia_subnetmask == ia->ia_subnetmask &&
2005                     ia_vha->ia_subnet == ia->ia_subnet) {
2006                         if (ia_vha->ia_addr.sin_addr.s_addr ==
2007                             ia->ia_addr.sin_addr.s_addr)
2008                                 own = 1;
2009                         if (ia_if == NULL) {
2010                                 ia_if = ia;
2011                         } else if (sc->sc_carpdev != NULL &&
2012                             sc->sc_carpdev == ia->ia_ifp) {
2013                                 ia_if = ia;
2014                                 if (ia_if->ia_flags & IFA_ROUTE) {
2015                                         /*
2016                                          * Address with prefix route
2017                                          * is prefered
2018                                          */
2019                                         break;
2020                                 }
2021                                 ia_match_carpdev = 1;
2022                         } else if (!ia_match_carpdev) {
2023                                 if (ia->ia_flags & IFA_ROUTE) {
2024                                         /*
2025                                          * Address with prefix route
2026                                          * is prefered over others.
2027                                          */
2028                                         ia_if = ia;
2029                                 }
2030                         }
2031                 }
2032         }
2033
2034         carp_deactivate_vhaddr(sc, vha, FALSE);
2035         if (!ia_if)
2036                 return ENOENT;
2037
2038         ifp = ia_if->ia_ifp;
2039
2040         /* XXX Don't allow parent iface to be changed */
2041         if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp)
2042                 return EEXIST;
2043
2044         return carp_activate_vhaddr(sc, vha, ifp, ia_if, own);
2045 }
2046
2047 static void
2048 carp_add_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
2049 {
2050         struct carp_vhaddr *vha_new;
2051         struct in_ifaddr *carp_ia;
2052 #ifdef INVARIANTS
2053         struct carp_vhaddr *vha;
2054 #endif
2055
2056         KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
2057         carp_ia = ifatoia(carp_ifa);
2058
2059 #ifdef INVARIANTS
2060         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
2061                 KKASSERT(vha->vha_ia != NULL && vha->vha_ia != carp_ia);
2062 #endif
2063
2064         vha_new = kmalloc(sizeof(*vha_new), M_CARP, M_WAITOK | M_ZERO);
2065         vha_new->vha_ia = carp_ia;
2066         carp_insert_vhaddr(sc, vha_new);
2067
2068         if (carp_config_vhaddr(sc, vha_new, NULL) != 0) {
2069                 /*
2070                  * If the above configuration fails, it may only mean
2071                  * that the new address is problematic.  However, the
2072                  * carp(4) interface may already have several working
2073                  * addresses.  Since the expected behaviour of
2074                  * SIOC[AS]IFADDR is to put the NIC into working state,
2075                  * we try starting the state machine manually here with
2076                  * the hope that the carp(4)'s previously working
2077                  * addresses still could be brought up.
2078                  */
2079                 carp_hmac_prepare(sc);
2080                 carp_set_state(sc, INIT);
2081                 carp_setrun(sc, 0);
2082         }
2083 }
2084
2085 static void
2086 carp_del_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
2087 {
2088         struct carp_vhaddr *vha;
2089         struct in_ifaddr *carp_ia;
2090
2091         KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
2092         carp_ia = ifatoia(carp_ifa);
2093
2094         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
2095                 KKASSERT(vha->vha_ia != NULL);
2096                 if (vha->vha_ia == carp_ia)
2097                         break;
2098         }
2099         KASSERT(vha != NULL, ("no corresponding vhaddr %p", carp_ifa));
2100
2101         /*
2102          * Remove the vhaddr from the list before deactivating
2103          * the vhaddr, so that the HMAC could be correctly
2104          * updated in carp_deactivate_vhaddr()
2105          */
2106         carp_remove_vhaddr(sc, vha);
2107
2108         carp_deactivate_vhaddr(sc, vha, FALSE);
2109         kfree(vha, M_CARP);
2110 }
2111
2112 static void
2113 carp_config_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
2114 {
2115         struct carp_vhaddr *vha;
2116         struct in_ifaddr *carp_ia;
2117
2118         KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
2119         carp_ia = ifatoia(carp_ifa);
2120
2121         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
2122                 KKASSERT(vha->vha_ia != NULL);
2123                 if (vha->vha_ia == carp_ia)
2124                         break;
2125         }
2126         KASSERT(vha != NULL, ("no corresponding vhaddr %p", carp_ifa));
2127
2128         /* Remove then reinsert, to keep the vhaddr list sorted */
2129         carp_remove_vhaddr(sc, vha);
2130         carp_insert_vhaddr(sc, vha);
2131
2132         if (carp_config_vhaddr(sc, vha, NULL) != 0) {
2133                 /* See the comment in carp_add_addr() */
2134                 carp_hmac_prepare(sc);
2135                 carp_set_state(sc, INIT);
2136                 carp_setrun(sc, 0);
2137         }
2138 }
2139
2140 #ifdef notyet
2141
2142 #ifdef INET6
2143 static int
2144 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
2145 {
2146         struct ifnet *ifp;
2147         struct carp_if *cif;
2148         struct in6_ifaddr *ia, *ia_if;
2149         struct ip6_moptions *im6o = &sc->sc_im6o;
2150         struct in6_multi_mship *imm;
2151         struct in6_addr in6;
2152         int own, error;
2153
2154         if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
2155                 carp_setrun(sc, 0);
2156                 return (0);
2157         }
2158
2159         /* we have to do it by hands to check we won't match on us */
2160         ia_if = NULL; own = 0;
2161         for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
2162                 int i;
2163
2164                 for (i = 0; i < 4; i++) {
2165                         if ((sin6->sin6_addr.s6_addr32[i] &
2166                             ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
2167                             (ia->ia_addr.sin6_addr.s6_addr32[i] &
2168                             ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
2169                                 break;
2170                 }
2171                 /* and, yeah, we need a multicast-capable iface too */
2172                 if (ia->ia_ifp != &sc->sc_if &&
2173                     (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
2174                     (i == 4)) {
2175                         if (!ia_if)
2176                                 ia_if = ia;
2177                         if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
2178                             &ia->ia_addr.sin6_addr))
2179                                 own++;
2180                 }
2181         }
2182
2183         if (!ia_if)
2184                 return (EADDRNOTAVAIL);
2185         ia = ia_if;
2186         ifp = ia->ia_ifp;
2187
2188         if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
2189             (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp))
2190                 return (EADDRNOTAVAIL);
2191
2192         if (!sc->sc_naddrs6) {
2193                 im6o->im6o_multicast_ifp = ifp;
2194
2195                 /* join CARP multicast address */
2196                 bzero(&in6, sizeof(in6));
2197                 in6.s6_addr16[0] = htons(0xff02);
2198                 in6.s6_addr8[15] = 0x12;
2199                 if (in6_setscope(&in6, ifp, NULL) != 0)
2200                         goto cleanup;
2201                 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
2202                         goto cleanup;
2203                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2204
2205                 /* join solicited multicast address */
2206                 bzero(&in6, sizeof(in6));
2207                 in6.s6_addr16[0] = htons(0xff02);
2208                 in6.s6_addr32[1] = 0;
2209                 in6.s6_addr32[2] = htonl(1);
2210                 in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3];
2211                 in6.s6_addr8[12] = 0xff;
2212                 if (in6_setscope(&in6, ifp, NULL) != 0)
2213                         goto cleanup;
2214                 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
2215                         goto cleanup;
2216                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2217         }
2218
2219 #ifdef foo
2220         if (!ifp->if_carp) {
2221                 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
2222
2223                 if ((error = ifpromisc(ifp, 1))) {
2224                         kfree(cif, M_CARP);
2225                         goto cleanup;
2226                 }
2227
2228                 TAILQ_INIT(&cif->vhif_vrs);
2229                 ifp->if_carp = cif;
2230         } else {
2231                 struct carp_softc *vr;
2232
2233                 cif = ifp->if_carp;
2234                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
2235                         if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
2236                                 error = EINVAL;
2237                                 goto cleanup;
2238                         }
2239                 }
2240         }
2241 #endif
2242         sc->sc_ia6 = ia;
2243         sc->sc_carpdev = ifp;
2244
2245 #ifdef foo
2246         { /* XXX prevent endless loop if already in queue */
2247         struct carp_softc *vr, *after = NULL;
2248         int myself = 0;
2249         cif = ifp->if_carp;
2250
2251         TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
2252                 if (vr == sc)
2253                         myself = 1;
2254                 if (vr->sc_vhid < sc->sc_vhid)
2255                         after = vr;
2256         }
2257
2258         if (!myself) {
2259                 /* We're trying to keep things in order */
2260                 if (after == NULL)
2261                         TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
2262                 else
2263                         TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
2264         }
2265         }
2266 #endif
2267
2268         sc->sc_naddrs6++;
2269         if (own)
2270                 sc->sc_advskew = 0;
2271         carp_sc_state(sc);
2272         carp_setrun(sc, 0);
2273
2274         return (0);
2275
2276 cleanup:
2277         /* clean up multicast memberships */
2278         if (!sc->sc_naddrs6) {
2279                 while (!LIST_EMPTY(&im6o->im6o_memberships)) {
2280                         imm = LIST_FIRST(&im6o->im6o_memberships);
2281                         LIST_REMOVE(imm, i6mm_chain);
2282                         in6_leavegroup(imm);
2283                 }
2284         }
2285         return (error);
2286 }
2287
2288 static int
2289 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
2290 {
2291         int error = 0;
2292
2293         if (!--sc->sc_naddrs6) {
2294                 struct carp_if *cif = sc->sc_carpdev->if_carp;
2295                 struct ip6_moptions *im6o = &sc->sc_im6o;
2296
2297                 callout_stop(&sc->sc_ad_tmo);
2298                 sc->sc_vhid = -1;
2299                 while (!LIST_EMPTY(&im6o->im6o_memberships)) {
2300                         struct in6_multi_mship *imm =
2301                             LIST_FIRST(&im6o->im6o_memberships);
2302
2303                         LIST_REMOVE(imm, i6mm_chain);
2304                         in6_leavegroup(imm);
2305                 }
2306                 im6o->im6o_multicast_ifp = NULL;
2307 #ifdef foo
2308                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
2309                 if (TAILQ_EMPTY(&cif->vhif_vrs)) {
2310                         sc->sc_carpdev->if_carp = NULL;
2311                         kfree(cif, M_IFADDR);
2312                 }
2313 #endif
2314         }
2315         return (error);
2316 }
2317 #endif /* INET6 */
2318
2319 #endif
2320
2321 static int
2322 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr)
2323 {
2324         struct carp_softc *sc = ifp->if_softc;
2325         struct ifreq *ifr = (struct ifreq *)addr;
2326         struct ifdrv *ifd = (struct ifdrv *)addr;
2327         int error = 0;
2328
2329         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2330
2331         switch (cmd) {
2332         case SIOCSIFFLAGS:
2333                 if (ifp->if_flags & IFF_UP) {
2334                         if ((ifp->if_flags & IFF_RUNNING) == 0)
2335                                 carp_init(sc);
2336                 } else if (ifp->if_flags & IFF_RUNNING) {
2337                         carp_ioctl_stop(sc);
2338                 }
2339                 break;
2340
2341         case SIOCSIFCAP:
2342                 carp_ioctl_ifcap(sc, ifr->ifr_reqcap);
2343                 break;
2344
2345         case SIOCSVH:
2346                 error = carp_ioctl_setvh(sc, ifr->ifr_data, cr);
2347                 break;
2348
2349         case SIOCGVH:
2350                 error = carp_ioctl_getvh(sc, ifr->ifr_data, cr);
2351                 break;
2352
2353         case SIOCGDRVSPEC:
2354                 switch (ifd->ifd_cmd) {
2355                 case CARPGDEVNAME:
2356                         error = carp_ioctl_getdevname(sc, ifd);
2357                         break;
2358
2359                 case CARPGVHADDR:
2360                         error = carp_ioctl_getvhaddr(sc, ifd);
2361                         break;
2362
2363                 default:
2364                         error = EINVAL;
2365                         break;
2366                 }
2367                 break;
2368
2369         default:
2370                 error = ether_ioctl(ifp, cmd, addr);
2371                 break;
2372         }
2373
2374         return error;
2375 }
2376
2377 static void
2378 carp_ioctl_stop_dispatch(netmsg_t msg)
2379 {
2380         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2381         struct carp_softc *sc = cmsg->nc_softc;
2382
2383         carp_stop(sc, FALSE);
2384         lwkt_replymsg(&cmsg->base.lmsg, 0);
2385 }
2386
2387 static void
2388 carp_ioctl_stop(struct carp_softc *sc)
2389 {
2390         struct ifnet *ifp = &sc->arpcom.ac_if;
2391         struct netmsg_carp cmsg;
2392
2393         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2394
2395         ifnet_deserialize_all(ifp);
2396
2397         bzero(&cmsg, sizeof(cmsg));
2398         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2399             carp_ioctl_stop_dispatch);
2400         cmsg.nc_softc = sc;
2401
2402         lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2403
2404         ifnet_serialize_all(ifp);
2405 }
2406
2407 static void
2408 carp_ioctl_setvh_dispatch(netmsg_t msg)
2409 {
2410         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2411         struct carp_softc *sc = cmsg->nc_softc;
2412         struct ifnet *ifp = &sc->arpcom.ac_if;
2413         const struct carpreq *carpr = cmsg->nc_data;
2414         int error;
2415
2416         error = 1;
2417         if ((ifp->if_flags & IFF_RUNNING) &&
2418             sc->sc_state != INIT && carpr->carpr_state != sc->sc_state) {
2419                 switch (carpr->carpr_state) {
2420                 case BACKUP:
2421                         callout_stop(&sc->sc_ad_tmo);
2422                         carp_set_state(sc, BACKUP);
2423                         carp_setrun(sc, 0);
2424                         if (carp_opts[CARPCTL_SETROUTE])
2425                                 carp_setroute(sc, RTM_DELETE);
2426                         break;
2427
2428                 case MASTER:
2429                         carp_master_down(sc);
2430                         break;
2431
2432                 default:
2433                         break;
2434                 }
2435         }
2436         if (carpr->carpr_vhid > 0) {
2437                 if (carpr->carpr_vhid > 255) {
2438                         error = EINVAL;
2439                         goto back;
2440                 }
2441                 if (sc->sc_carpdev) {
2442                         struct carp_if *cif = sc->sc_carpdev->if_carp;
2443                         struct carp_softc_container *scc;
2444
2445                         TAILQ_FOREACH(scc, cif, scc_link) {
2446                                 struct carp_softc *vr = scc->scc_softc;
2447
2448                                 if (vr != sc &&
2449                                     vr->sc_vhid == carpr->carpr_vhid) {
2450                                         error = EEXIST;
2451                                         goto back;
2452                                 }
2453                         }
2454                 }
2455                 sc->sc_vhid = carpr->carpr_vhid;
2456
2457                 IF_LLADDR(ifp)[5] = sc->sc_vhid;
2458                 bcopy(IF_LLADDR(ifp), sc->arpcom.ac_enaddr,
2459                     ETHER_ADDR_LEN);
2460
2461                 error--;
2462         }
2463         if (carpr->carpr_advbase > 0 || carpr->carpr_advskew > 0) {
2464                 if (carpr->carpr_advskew >= 255) {
2465                         error = EINVAL;
2466                         goto back;
2467                 }
2468                 if (carpr->carpr_advbase > 255) {
2469                         error = EINVAL;
2470                         goto back;
2471                 }
2472                 sc->sc_advbase = carpr->carpr_advbase;
2473                 sc->sc_advskew = carpr->carpr_advskew;
2474                 error--;
2475         }
2476         bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key));
2477         if (error > 0) {
2478                 error = EINVAL;
2479         } else {
2480                 error = 0;
2481                 carp_setrun(sc, 0);
2482         }
2483 back:
2484         carp_hmac_prepare(sc);
2485
2486         lwkt_replymsg(&cmsg->base.lmsg, error);
2487 }
2488
2489 static int
2490 carp_ioctl_setvh(struct carp_softc *sc, void *udata, struct ucred *cr)
2491 {
2492         struct ifnet *ifp = &sc->arpcom.ac_if;
2493         struct netmsg_carp cmsg;
2494         struct carpreq carpr;
2495         int error;
2496
2497         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2498         ifnet_deserialize_all(ifp);
2499
2500         error = caps_priv_check(cr, SYSCAP_RESTRICTEDROOT |
2501                                     __SYSCAP_NULLCRED);
2502         if (error)
2503                 goto back;
2504
2505         error = copyin(udata, &carpr, sizeof(carpr));
2506         if (error)
2507                 goto back;
2508
2509         bzero(&cmsg, sizeof(cmsg));
2510         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2511             carp_ioctl_setvh_dispatch);
2512         cmsg.nc_softc = sc;
2513         cmsg.nc_data = &carpr;
2514
2515         error = lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2516
2517 back:
2518         ifnet_serialize_all(ifp);
2519         return error;
2520 }
2521
2522 static void
2523 carp_ioctl_ifcap_dispatch(netmsg_t msg)
2524 {
2525         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2526         struct carp_softc *sc = cmsg->nc_softc;
2527         struct ifnet *ifp = &sc->arpcom.ac_if;
2528         int reqcap = *((const int *)(cmsg->nc_data));
2529         int mask;
2530
2531         mask = reqcap ^ ifp->if_capenable;
2532         if (mask & IFCAP_TXCSUM) {
2533                 ifp->if_capenable ^= IFCAP_TXCSUM;
2534                 if ((ifp->if_capenable & IFCAP_TXCSUM) &&
2535                     sc->sc_carpdev != NULL) {
2536                         ifp->if_hwassist |=
2537                             (sc->sc_carpdev->if_hwassist &
2538                              (CSUM_IP | CSUM_UDP | CSUM_TCP));
2539                 } else {
2540                         ifp->if_hwassist &= ~(CSUM_IP | CSUM_UDP | CSUM_TCP);
2541                 }
2542         }
2543         if (mask & IFCAP_TSO) {
2544                 ifp->if_capenable ^= IFCAP_TSO;
2545                 if ((ifp->if_capenable & IFCAP_TSO) &&
2546                     sc->sc_carpdev != NULL) {
2547                         ifp->if_hwassist |=
2548                             (sc->sc_carpdev->if_hwassist & CSUM_TSO);
2549                 } else {
2550                         ifp->if_hwassist &= ~CSUM_TSO;
2551                 }
2552         }
2553
2554         lwkt_replymsg(&cmsg->base.lmsg, 0);
2555 }
2556
2557 static void
2558 carp_ioctl_ifcap(struct carp_softc *sc, int reqcap)
2559 {
2560         struct ifnet *ifp = &sc->arpcom.ac_if;
2561         struct netmsg_carp cmsg;
2562
2563         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2564         ifnet_deserialize_all(ifp);
2565
2566         bzero(&cmsg, sizeof(cmsg));
2567         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2568             carp_ioctl_ifcap_dispatch);
2569         cmsg.nc_softc = sc;
2570         cmsg.nc_data = &reqcap;
2571
2572         lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2573
2574         ifnet_serialize_all(ifp);
2575 }
2576
2577 static void
2578 carp_ioctl_getvh_dispatch(netmsg_t msg)
2579 {
2580         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2581         struct carp_softc *sc = cmsg->nc_softc;
2582         struct carpreq *carpr = cmsg->nc_data;
2583
2584         carpr->carpr_state = sc->sc_state;
2585         carpr->carpr_vhid = sc->sc_vhid;
2586         carpr->carpr_advbase = sc->sc_advbase;
2587         carpr->carpr_advskew = sc->sc_advskew;
2588         bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key));
2589
2590         lwkt_replymsg(&cmsg->base.lmsg, 0);
2591 }
2592
2593 static int
2594 carp_ioctl_getvh(struct carp_softc *sc, void *udata, struct ucred *cr)
2595 {
2596         struct ifnet *ifp = &sc->arpcom.ac_if;
2597         struct netmsg_carp cmsg;
2598         struct carpreq carpr;
2599         int error;
2600
2601         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2602         ifnet_deserialize_all(ifp);
2603
2604         bzero(&cmsg, sizeof(cmsg));
2605         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2606             carp_ioctl_getvh_dispatch);
2607         cmsg.nc_softc = sc;
2608         cmsg.nc_data = &carpr;
2609
2610         lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2611
2612         error = caps_priv_check(cr, SYSCAP_RESTRICTEDROOT |
2613                                     __SYSCAP_NULLCRED);
2614         if (error)
2615                 bzero(carpr.carpr_key, sizeof(carpr.carpr_key));
2616
2617         error = copyout(&carpr, udata, sizeof(carpr));
2618
2619         ifnet_serialize_all(ifp);
2620         return error;
2621 }
2622
2623 static void
2624 carp_ioctl_getdevname_dispatch(netmsg_t msg)
2625 {
2626         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2627         struct carp_softc *sc = cmsg->nc_softc;
2628         char *devname = cmsg->nc_data;
2629
2630         bzero(devname, IFNAMSIZ);
2631         if (sc->sc_carpdev != NULL)
2632                 strlcpy(devname, sc->sc_carpdev->if_xname, IFNAMSIZ);
2633
2634         lwkt_replymsg(&cmsg->base.lmsg, 0);
2635 }
2636
2637 static int
2638 carp_ioctl_getdevname(struct carp_softc *sc, struct ifdrv *ifd)
2639 {
2640         struct ifnet *ifp = &sc->arpcom.ac_if;
2641         struct netmsg_carp cmsg;
2642         char devname[IFNAMSIZ];
2643         int error;
2644
2645         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2646
2647         if (ifd->ifd_len != sizeof(devname))
2648                 return EINVAL;
2649
2650         ifnet_deserialize_all(ifp);
2651
2652         bzero(&cmsg, sizeof(cmsg));
2653         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2654             carp_ioctl_getdevname_dispatch);
2655         cmsg.nc_softc = sc;
2656         cmsg.nc_data = devname;
2657
2658         lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2659
2660         error = copyout(devname, ifd->ifd_data, sizeof(devname));
2661
2662         ifnet_serialize_all(ifp);
2663         return error;
2664 }
2665
2666 static void
2667 carp_init_dispatch(netmsg_t msg)
2668 {
2669         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2670         struct carp_softc *sc = cmsg->nc_softc;
2671
2672         sc->sc_if.if_flags |= IFF_RUNNING;
2673         carp_hmac_prepare(sc);
2674         carp_set_state(sc, INIT);
2675         carp_setrun(sc, 0);
2676
2677         lwkt_replymsg(&cmsg->base.lmsg, 0);
2678 }
2679
2680 static void
2681 carp_init(void *xsc)
2682 {
2683         struct carp_softc *sc = xsc;
2684         struct ifnet *ifp = &sc->arpcom.ac_if;
2685         struct netmsg_carp cmsg;
2686
2687         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2688
2689         ifnet_deserialize_all(ifp);
2690
2691         bzero(&cmsg, sizeof(cmsg));
2692         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2693             carp_init_dispatch);
2694         cmsg.nc_softc = sc;
2695
2696         lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2697
2698         ifnet_serialize_all(ifp);
2699 }
2700
2701 static int
2702 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
2703     struct rtentry *rt)
2704 {
2705         struct carp_softc *sc = ifp->if_softc;
2706         struct ifnet *carpdev;
2707         int error = 0;
2708
2709         carpdev = sc->sc_carpdev;
2710         if (carpdev != NULL) {
2711                 if (m->m_flags & M_MCAST)
2712                         IFNET_STAT_INC(ifp, omcasts, 1);
2713                 IFNET_STAT_INC(ifp, obytes, m->m_pkthdr.len + ETHER_HDR_LEN);
2714                 IFNET_STAT_INC(ifp, opackets, 1);
2715
2716                 /*
2717                  * NOTE:
2718                  * CARP's ifp is passed to backing device's
2719                  * if_output method.
2720                  */
2721                 carpdev->if_output(ifp, m, dst, rt);
2722         } else {
2723                 IFNET_STAT_INC(ifp, oerrors, 1);
2724                 m_freem(m);
2725                 error = ENETUNREACH;
2726         }
2727         return error;
2728 }
2729
2730 /*
2731  * Start output on carp interface. This function should never be called.
2732  */
2733 static void
2734 carp_start(struct ifnet *ifp, struct ifaltq_subque *ifsq __unused)
2735 {
2736         panic("%s: start called", ifp->if_xname);
2737 }
2738
2739 static void
2740 carp_set_state(struct carp_softc *sc, int state)
2741 {
2742         struct ifnet *cifp = &sc->sc_if;
2743
2744         if (sc->sc_state == state)
2745                 return;
2746         sc->sc_state = state;
2747
2748         switch (sc->sc_state) {
2749         case BACKUP:
2750                 cifp->if_link_state = LINK_STATE_DOWN;
2751                 break;
2752
2753         case MASTER:
2754                 cifp->if_link_state = LINK_STATE_UP;
2755                 break;
2756
2757         default:
2758                 cifp->if_link_state = LINK_STATE_UNKNOWN;
2759                 break;
2760         }
2761         rt_ifmsg(cifp);
2762 }
2763
2764 void
2765 carp_group_demote_adj(struct ifnet *ifp, int adj)
2766 {
2767         struct ifg_list *ifgl;
2768         int *dm;
2769
2770         TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2771                 if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL))
2772                         continue;
2773                 dm = &ifgl->ifgl_group->ifg_carp_demoted;
2774
2775                 if (*dm + adj >= 0)
2776                         *dm += adj;
2777                 else
2778                         *dm = 0;
2779
2780                 if (adj > 0 && *dm == 1)
2781                         carp_send_ad_all();
2782                 CARP_LOG("%s demoted group %s to %d", ifp->if_xname,
2783                     ifgl->ifgl_group->ifg_group, *dm);
2784         }
2785 }
2786
2787 #ifdef foo
2788 void
2789 carp_carpdev_state(void *v)
2790 {
2791         struct carp_if *cif = v;
2792         struct carp_softc *sc;
2793
2794         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list)
2795                 carp_sc_state(sc);
2796 }
2797
2798 static void
2799 carp_sc_state(struct carp_softc *sc)
2800 {
2801         if (!(sc->sc_carpdev->if_flags & IFF_UP)) {
2802                 callout_stop(&sc->sc_ad_tmo);
2803                 callout_stop(&sc->sc_md_tmo);
2804                 callout_stop(&sc->sc_md6_tmo);
2805                 carp_set_state(sc, INIT);
2806                 carp_setrun(sc, 0);
2807                 if (!sc->sc_suppress) {
2808                         carp_suppress_preempt++;
2809                         if (carp_suppress_preempt == 1)
2810                                 carp_send_ad_all();
2811                 }
2812                 sc->sc_suppress = 1;
2813         } else {
2814                 carp_set_state(sc, INIT);
2815                 carp_setrun(sc, 0);
2816                 if (sc->sc_suppress)
2817                         carp_suppress_preempt--;
2818                 sc->sc_suppress = 0;
2819         }
2820 }
2821 #endif
2822
2823 static void
2824 carp_stop(struct carp_softc *sc, boolean_t detach)
2825 {
2826         sc->sc_if.if_flags &= ~IFF_RUNNING;
2827
2828         callout_stop(&sc->sc_ad_tmo);
2829         callout_stop(&sc->sc_md_tmo);
2830         callout_stop(&sc->sc_md6_tmo);
2831
2832         if (!detach && sc->sc_state == MASTER)
2833                 carp_send_ad(sc);
2834
2835         if (sc->sc_suppress)
2836                 carp_suppress_preempt--;
2837         sc->sc_suppress = 0;
2838
2839         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
2840                 carp_suppress_preempt--;
2841         sc->sc_sendad_errors = 0;
2842         sc->sc_sendad_success = 0;
2843
2844         carp_set_state(sc, INIT);
2845         carp_setrun(sc, 0);
2846 }
2847
2848 static void
2849 carp_suspend(struct carp_softc *sc, boolean_t detach)
2850 {
2851         struct ifnet *cifp = &sc->sc_if;
2852
2853         carp_stop(sc, detach);
2854
2855         /* Retain the running state, if we are not dead yet */
2856         if (!sc->sc_dead && (cifp->if_flags & IFF_UP))
2857                 cifp->if_flags |= IFF_RUNNING;
2858 }
2859
2860 static int
2861 carp_activate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
2862     struct ifnet *ifp, struct in_ifaddr *ia_if, int own)
2863 {
2864         struct ip_moptions *imo = &sc->sc_imo;
2865         struct carp_if *ocif = ifp->if_carp;
2866         int error;
2867
2868         KKASSERT(vha->vha_ia != NULL);
2869
2870         KASSERT(ia_if != NULL, ("NULL backing address"));
2871         KASSERT(vha->vha_iaback == NULL, ("%p is already activated", vha));
2872         KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0,
2873                 ("inactive vhaddr %p is the address owner", vha));
2874
2875         KASSERT(sc->sc_carpdev == NULL || sc->sc_carpdev == ifp,
2876                 ("%s is already on %s", sc->sc_if.if_xname,
2877                  sc->sc_carpdev->if_xname));
2878
2879         if (ocif == NULL) {
2880                 KASSERT(sc->sc_carpdev == NULL,
2881                         ("%s is already on %s", sc->sc_if.if_xname,
2882                          sc->sc_carpdev->if_xname));
2883
2884                 error = ifpromisc(ifp, 1);
2885                 if (error)
2886                         return error;
2887         } else {
2888                 struct carp_softc_container *scc;
2889
2890                 TAILQ_FOREACH(scc, ocif, scc_link) {
2891                         struct carp_softc *vr = scc->scc_softc;
2892
2893                         if (vr != sc && vr->sc_vhid == sc->sc_vhid)
2894                                 return EINVAL;
2895                 }
2896         }
2897
2898         ifp->if_carp = carp_if_insert(ocif, sc);
2899         KASSERT(ifp->if_carp != NULL, ("%s carp_if_insert failed", __func__));
2900
2901         sc->sc_ia = ia_if;
2902         sc->sc_carpdev = ifp;
2903         sc->arpcom.ac_if.if_hwassist = 0;
2904         if (sc->arpcom.ac_if.if_capenable & IFCAP_TXCSUM) {
2905                 sc->arpcom.ac_if.if_hwassist |=
2906                     (ifp->if_hwassist & (CSUM_IP | CSUM_UDP | CSUM_TCP));
2907         }
2908         if (sc->arpcom.ac_if.if_capenable & IFCAP_TSO)
2909                 sc->arpcom.ac_if.if_hwassist |= (ifp->if_hwassist & CSUM_TSO);
2910
2911         /*
2912          * Make sure that all protocol threads see the sc_carpdev and
2913          * if_carp changes
2914          */
2915         netmsg_service_sync();
2916
2917         if (ocif != NULL && ifp->if_carp != ocif) {
2918                 /*
2919                  * The old carp list could be safely free now,
2920                  * since no one can access it.
2921                  */
2922                 carp_if_free(ocif);
2923         }
2924
2925         vha->vha_iaback = ia_if;
2926         sc->sc_naddrs++;
2927
2928         if (own) {
2929                 vha->vha_flags |= CARP_VHAF_OWNER;
2930
2931                 /* XXX save user configured advskew? */
2932                 sc->sc_advskew = 0;
2933         }
2934
2935         carp_addroute_vhaddr(sc, vha);
2936
2937         /*
2938          * Join the multicast group only after the backing interface
2939          * has been hooked with the CARP interface.
2940          */
2941         KASSERT(imo->imo_multicast_ifp == NULL ||
2942                 imo->imo_multicast_ifp == &sc->sc_if,
2943                 ("%s didn't leave mcast group on %s",
2944                  sc->sc_if.if_xname, imo->imo_multicast_ifp->if_xname));
2945
2946         if (imo->imo_num_memberships == 0) {
2947                 struct in_addr addr;
2948
2949                 addr.s_addr = htonl(INADDR_CARP_GROUP);
2950                 imo->imo_membership[0] = in_addmulti(&addr, &sc->sc_if);
2951                 if (imo->imo_membership[0] == NULL) {
2952                         carp_deactivate_vhaddr(sc, vha, FALSE);
2953                         return ENOBUFS;
2954                 }
2955
2956                 imo->imo_num_memberships++;
2957                 imo->imo_multicast_ifp = &sc->sc_if;
2958                 imo->imo_multicast_ttl = CARP_DFLTTL;
2959                 imo->imo_multicast_loop = 0;
2960         }
2961
2962         carp_hmac_prepare(sc);
2963         carp_set_state(sc, INIT);
2964         carp_setrun(sc, 0);
2965         return 0;
2966 }
2967
2968 static void
2969 carp_deactivate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
2970     boolean_t del_iaback)
2971 {
2972         KKASSERT(vha->vha_ia != NULL);
2973
2974         carp_hmac_prepare(sc);
2975
2976         if (vha->vha_iaback == NULL) {
2977                 KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0,
2978                         ("inactive vhaddr %p is the address owner", vha));
2979                 return;
2980         }
2981
2982         vha->vha_flags &= ~CARP_VHAF_OWNER;
2983         carp_delroute_vhaddr(sc, vha, del_iaback);
2984
2985         KKASSERT(sc->sc_naddrs > 0);
2986         vha->vha_iaback = NULL;
2987         sc->sc_naddrs--;
2988         if (!sc->sc_naddrs) {
2989                 if (sc->sc_naddrs6) {
2990                         carp_multicast_cleanup(sc);
2991                         sc->sc_ia = NULL;
2992                 } else {
2993                         carp_detach(sc, FALSE, del_iaback);
2994                 }
2995         }
2996 }
2997
2998 static void
2999 carp_link_addrs(struct carp_softc *sc, struct ifnet *ifp, struct ifaddr *ifa_if)
3000 {
3001         struct carp_vhaddr *vha;
3002         struct in_ifaddr *ia_if;
3003
3004         KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET);
3005         ia_if = ifatoia(ifa_if);
3006
3007         /*
3008          * Test each inactive vhaddr against the newly added address.
3009          * If the newly added address could be the backing address,
3010          * then activate the matching vhaddr.
3011          */
3012         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
3013                 const struct in_ifaddr *ia;
3014                 int own;
3015
3016                 if (vha->vha_iaback != NULL)
3017                         continue;
3018
3019                 ia = vha->vha_ia;
3020                 if (ia->ia_subnetmask != ia_if->ia_subnetmask ||
3021                     ia->ia_subnet != ia_if->ia_subnet)
3022                         continue;
3023
3024                 own = 0;
3025                 if (ia->ia_addr.sin_addr.s_addr ==
3026                     ia_if->ia_addr.sin_addr.s_addr)
3027                         own = 1;
3028
3029                 carp_activate_vhaddr(sc, vha, ifp, ia_if, own);
3030         }
3031 }
3032
3033 static void
3034 carp_unlink_addrs(struct carp_softc *sc, struct ifnet *ifp,
3035                   struct ifaddr *ifa_if)
3036 {
3037         struct carp_vhaddr *vha;
3038         struct in_ifaddr *ia_if;
3039
3040         KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET);
3041         ia_if = ifatoia(ifa_if);
3042
3043         /*
3044          * Ad src address is deleted; set it to NULL.
3045          * Following loop will try pick up a new ad src address
3046          * if one of the vhaddr could retain its backing address.
3047          */
3048         if (sc->sc_ia == ia_if)
3049                 sc->sc_ia = NULL;
3050
3051         /*
3052          * Test each active vhaddr against the deleted address.
3053          * If the deleted address is vhaddr address's backing
3054          * address, then deactivate the vhaddr.
3055          */
3056         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
3057                 if (vha->vha_iaback == NULL)
3058                         continue;
3059
3060                 if (vha->vha_iaback == ia_if)
3061                         carp_deactivate_vhaddr(sc, vha, TRUE);
3062                 else if (sc->sc_ia == NULL)
3063                         sc->sc_ia = vha->vha_iaback;
3064         }
3065 }
3066
3067 static void
3068 carp_update_addrs(struct carp_softc *sc, struct ifaddr *ifa_del)
3069 {
3070         struct carp_vhaddr *vha;
3071
3072         KKASSERT(sc->sc_carpdev == NULL);
3073
3074         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
3075                 carp_config_vhaddr(sc, vha, ifatoia(ifa_del));
3076 }
3077
3078 static void
3079 carp_ifaddr(void *arg __unused, struct ifnet *ifp,
3080             enum ifaddr_event event, struct ifaddr *ifa)
3081 {
3082         struct carp_softc *sc;
3083
3084         if (ifa->ifa_addr->sa_family != AF_INET)
3085                 return;
3086
3087         ASSERT_NETISR0;
3088
3089         if (ifp->if_type == IFT_CARP) {
3090                 /*
3091                  * Address is changed on carp(4) interface
3092                  */
3093                 switch (event) {
3094                 case IFADDR_EVENT_ADD:
3095                         carp_add_addr(ifp->if_softc, ifa);
3096                         break;
3097
3098                 case IFADDR_EVENT_CHANGE:
3099                         carp_config_addr(ifp->if_softc, ifa);
3100                         break;
3101
3102                 case IFADDR_EVENT_DELETE:
3103                         carp_del_addr(ifp->if_softc, ifa);
3104                         break;
3105                 }
3106                 return;
3107         }
3108
3109         /*
3110          * Address is changed on non-carp(4) interface
3111          */
3112         if ((ifp->if_flags & IFF_MULTICAST) == 0)
3113                 return;
3114
3115         LIST_FOREACH(sc, &carpif_list, sc_next) {
3116                 if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp) {
3117                         /* Not the parent iface; skip */
3118                         continue;
3119                 }
3120
3121                 switch (event) {
3122                 case IFADDR_EVENT_ADD:
3123                         carp_link_addrs(sc, ifp, ifa);
3124                         break;
3125
3126                 case IFADDR_EVENT_DELETE:
3127                         if (sc->sc_carpdev != NULL) {
3128                                 carp_unlink_addrs(sc, ifp, ifa);
3129                                 if (sc->sc_carpdev == NULL) {
3130                                         /*
3131                                          * We no longer have the parent
3132                                          * interface, however, certain
3133                                          * virtual addresses, which are
3134                                          * not used because they can't
3135                                          * match the previous parent
3136                                          * interface's addresses, may now
3137                                          * match different interface's
3138                                          * addresses.
3139                                          */
3140                                         carp_update_addrs(sc, ifa);
3141                                 }
3142                         } else {
3143                                 /*
3144                                  * The carp(4) interface didn't have a
3145                                  * parent iface, so it is not possible
3146                                  * that it will contain any address to
3147                                  * be unlinked.
3148                                  */
3149                         }
3150                         break;
3151
3152                 case IFADDR_EVENT_CHANGE:
3153                         if (sc->sc_carpdev == NULL) {
3154                                 /*
3155                                  * The carp(4) interface didn't have a
3156                                  * parent iface, so it is not possible
3157                                  * that it will contain any address to
3158                                  * be updated.
3159                                  */
3160                                 carp_link_addrs(sc, ifp, ifa);
3161                         } else {
3162                                 /*
3163                                  * First try breaking tie with the old
3164                                  * address.  Then see whether we could
3165                                  * link certain vhaddr to the new address.
3166                                  * If that fails, i.e. carpdev is NULL,
3167                                  * we try a global update.
3168                                  *
3169                                  * NOTE: The above order is critical.
3170                                  */
3171                                 carp_unlink_addrs(sc, ifp, ifa);
3172                                 carp_link_addrs(sc, ifp, ifa);
3173                                 if (sc->sc_carpdev == NULL) {
3174                                         /*
3175                                          * See the comment in the above
3176                                          * IFADDR_EVENT_DELETE block.
3177                                          */
3178                                         carp_update_addrs(sc, NULL);
3179                                 }
3180                         }
3181                         break;
3182                 }
3183         }
3184 }
3185
3186 void
3187 carp_proto_ctlinput(netmsg_t msg)
3188 {
3189         int cmd = msg->ctlinput.nm_cmd;
3190         struct sockaddr *sa = msg->ctlinput.nm_arg;
3191         struct in_ifaddr_container *iac;
3192
3193         /* We only process PRC_IFDOWN and PRC_IFUP commands */
3194         if (cmd != PRC_IFDOWN && cmd != PRC_IFUP)
3195                 goto done;
3196
3197         TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
3198                 struct in_ifaddr *ia = iac->ia;
3199                 struct ifnet *ifp = ia->ia_ifp;
3200
3201                 if (ifp->if_type == IFT_CARP)
3202                         continue;
3203
3204                 if (ia->ia_ifa.ifa_addr == sa) {
3205                         if (cmd == PRC_IFDOWN) {
3206                                 carp_ifaddr(NULL, ifp, IFADDR_EVENT_DELETE,
3207                                     &ia->ia_ifa);
3208                         } else if (cmd == PRC_IFUP) {
3209                                 carp_ifaddr(NULL, ifp, IFADDR_EVENT_ADD,
3210                                     &ia->ia_ifa);
3211                         }
3212                         break;
3213                 }
3214         }
3215 done:
3216         lwkt_replymsg(&msg->lmsg, 0);
3217 }
3218
3219 struct ifnet *
3220 carp_parent(struct ifnet *cifp)
3221 {
3222         struct carp_softc *sc;
3223
3224         KKASSERT(cifp->if_type == IFT_CARP);
3225         sc = cifp->if_softc;
3226
3227         return sc->sc_carpdev;
3228 }
3229
3230 #define rtinitflags(x) \
3231         (((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) \
3232                  ? RTF_HOST : 0)
3233
3234 static int
3235 carp_addroute_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha)
3236 {
3237         struct in_ifaddr *ia, *iaback;
3238
3239         if (sc->sc_state != MASTER)
3240                 return 0;
3241
3242         ia = vha->vha_ia;
3243         KKASSERT(ia != NULL);
3244
3245         iaback = vha->vha_iaback;
3246         KKASSERT(iaback != NULL);
3247
3248         return rtchange(&iaback->ia_ifa, &ia->ia_ifa);
3249 }
3250
3251 static void
3252 carp_delroute_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
3253     boolean_t del_iaback)
3254 {
3255         struct in_ifaddr *ia, *iaback;
3256
3257         ia = vha->vha_ia;
3258         KKASSERT(ia != NULL);
3259
3260         iaback = vha->vha_iaback;
3261         KKASSERT(iaback != NULL);
3262
3263         if (!del_iaback && (iaback->ia_ifp->if_flags & IFF_UP)) {
3264                 rtchange(&ia->ia_ifa, &iaback->ia_ifa);
3265                 return;
3266         }
3267
3268         rtinit(&ia->ia_ifa, RTM_DELETE, rtinitflags(ia));
3269         in_ifadown_force(&ia->ia_ifa, 1);
3270         ia->ia_flags &= ~IFA_ROUTE;
3271 }
3272
3273 static int
3274 carp_modevent(module_t mod, int type, void *data)
3275 {
3276         switch (type) {
3277         case MOD_LOAD:
3278                 LIST_INIT(&carpif_list);
3279                 carp_ifdetach_event =
3280                 EVENTHANDLER_REGISTER(ifnet_detach_event, carp_ifdetach, NULL,
3281                                       EVENTHANDLER_PRI_ANY);
3282                 carp_ifaddr_event =
3283                 EVENTHANDLER_REGISTER(ifaddr_event, carp_ifaddr, NULL,
3284                                       EVENTHANDLER_PRI_FIRST);
3285                 if_clone_attach(&carp_cloner);
3286                 break;
3287
3288         case MOD_UNLOAD:
3289                 EVENTHANDLER_DEREGISTER(ifnet_detach_event,
3290                                         carp_ifdetach_event);
3291                 EVENTHANDLER_DEREGISTER(ifaddr_event,
3292                                         carp_ifaddr_event);
3293                 if_clone_detach(&carp_cloner);
3294                 break;
3295
3296         default:
3297                 return (EINVAL);
3298         }
3299         return (0);
3300 }
3301
3302 static moduledata_t carp_mod = {
3303         "carp",
3304         carp_modevent,
3305         0
3306 };
3307 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);