Merge branch 'vendor/LESS'
[dragonfly.git] / sys / netinet / ip_carp.c
1 /*
2  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
3  * Copyright (c) 2003 Ryan McBride. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
18  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
24  * THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 /*
27  * $FreeBSD: src/sys/netinet/ip_carp.c,v 1.48 2007/02/02 09:39:09 glebius Exp $
28  */
29
30 #include "opt_carp.h"
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/in_cksum.h>
38 #include <sys/limits.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/msgport2.h>
42 #include <sys/time.h>
43 #include <sys/proc.h>
44 #include <sys/priv.h>
45 #include <sys/sockio.h>
46 #include <sys/socket.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
49 #include <sys/thread.h>
50
51 #include <machine/stdarg.h>
52 #include <crypto/sha1.h>
53
54 #include <net/bpf.h>
55 #include <net/ethernet.h>
56 #include <net/if.h>
57 #include <net/if_dl.h>
58 #include <net/if_types.h>
59 #include <net/route.h>
60 #include <net/if_clone.h>
61 #include <net/if_var.h>
62 #include <net/ifq_var.h>
63 #include <net/netmsg2.h>
64 #include <net/netisr2.h>
65
66 #ifdef INET
67 #include <netinet/in.h>
68 #include <netinet/in_var.h>
69 #include <netinet/in_systm.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip_var.h>
72 #include <netinet/if_ether.h>
73 #endif
74
75 #ifdef INET6
76 #include <netinet/icmp6.h>
77 #include <netinet/ip6.h>
78 #include <netinet6/ip6_var.h>
79 #include <netinet6/scope6_var.h>
80 #include <netinet6/nd6.h>
81 #endif
82
83 #include <netinet/ip_carp.h>
84
85 /*
86  * Note about carp's MP safe approach:
87  *
88  * Brief: carp_softc (softc), carp_softc_container (scc)
89  *
90  * - All configuration operation, e.g. ioctl, add/delete inet addresses
91  *   is serialized by netisr0; not by carp's serializer
92  *
93  * - Backing interface's if_carp and carp_softc's relationship:
94  *
95  *                +---------+
96  *     if_carp -->| carp_if |
97  *                +---------+
98  *                     |
99  *                     |
100  *                     V      +---------+
101  *                  +-----+   |         |
102  *                  | scc |-->|  softc  |
103  *                  +-----+   |         |
104  *                     |      +---------+
105  *                     |
106  *                     V      +---------+
107  *                  +-----+   |         |
108  *                  | scc |-->|  softc  |
109  *                  +-----+   |         |
110  *                            +---------+
111  *
112  * - if_carp creation, modification and deletion all happen in netisr0,
113  *   as stated previously.  Since if_carp is accessed by multiple netisrs,
114  *   the modification to if_carp is conducted in the following way:
115  *
116  *   Adding carp_softc:
117  *
118  *   1) Duplicate the old carp_if to new carp_if (ncif), and insert the
119  *      to-be-added carp_softc to the new carp_if (ncif):
120  *
121  *        if_carp                     ncif
122  *           |                         |
123  *           V                         V
124  *      +---------+               +---------+
125  *      | carp_if |               | carp_if |
126  *      +---------+               +---------+
127  *           |                         |
128  *           |                         |
129  *           V        +-------+        V
130  *        +-----+     |       |     +-----+
131  *        | scc |---->| softc |<----| scc |
132  *        +-----+     |       |     +-----+
133  *           |        +-------+        |
134  *           |                         |
135  *           V        +-------+        V
136  *        +-----+     |       |     +-----+
137  *        | scc |---->| softc |<----| scc |
138  *        +-----+     |       |     +-----+
139  *                    +-------+        |
140  *                                     |
141  *                    +-------+        V
142  *                    |       |     +-----+
143  *                    | softc |<----| scc |
144  *                    |       |     +-----+
145  *                    +-------+
146  *
147  *   2) Switch save if_carp into ocif and switch if_carp to ncif:
148  *      
149  *          ocif                    if_carp
150  *           |                         |
151  *           V                         V
152  *      +---------+               +---------+
153  *      | carp_if |               | carp_if |
154  *      +---------+               +---------+
155  *           |                         |
156  *           |                         |
157  *           V        +-------+        V
158  *        +-----+     |       |     +-----+
159  *        | scc |---->| softc |<----| scc |
160  *        +-----+     |       |     +-----+
161  *           |        +-------+        |
162  *           |                         |
163  *           V        +-------+        V
164  *        +-----+     |       |     +-----+
165  *        | scc |---->| softc |<----| scc |
166  *        +-----+     |       |     +-----+
167  *                    +-------+        |
168  *                                     |
169  *                    +-------+        V
170  *                    |       |     +-----+
171  *                    | softc |<----| scc |
172  *                    |       |     +-----+
173  *                    +-------+
174  *
175  *   3) Run netmsg_service_sync(), which will make sure that
176  *      ocif is no longer accessed (all network operations
177  *      are happened only in network threads).
178  *   4) Free ocif -- only carp_if and scc are freed.
179  *
180  *
181  *   Removing carp_softc:
182  *
183  *   1) Duplicate the old carp_if to new carp_if (ncif); the to-be-deleted
184  *      carp_softc will not be duplicated.
185  *
186  *        if_carp                     ncif
187  *           |                         |
188  *           V                         V
189  *      +---------+               +---------+
190  *      | carp_if |               | carp_if |
191  *      +---------+               +---------+
192  *           |                         |
193  *           |                         |
194  *           V        +-------+        V
195  *        +-----+     |       |     +-----+
196  *        | scc |---->| softc |<----| scc |
197  *        +-----+     |       |     +-----+
198  *           |        +-------+        |
199  *           |                         |
200  *           V        +-------+        |
201  *        +-----+     |       |        |
202  *        | scc |---->| softc |        |
203  *        +-----+     |       |        |
204  *           |        +-------+        |
205  *           |                         |
206  *           V        +-------+        V
207  *        +-----+     |       |     +-----+
208  *        | scc |---->| softc |<----| scc |
209  *        +-----+     |       |     +-----+
210  *                    +-------+
211  *
212  *   2) Switch save if_carp into ocif and switch if_carp to ncif:
213  *      
214  *          ocif                    if_carp
215  *           |                         |
216  *           V                         V
217  *      +---------+               +---------+
218  *      | carp_if |               | carp_if |
219  *      +---------+               +---------+
220  *           |                         |
221  *           |                         |
222  *           V        +-------+        V
223  *        +-----+     |       |     +-----+
224  *        | scc |---->| softc |<----| scc |
225  *        +-----+     |       |     +-----+
226  *           |        +-------+        |
227  *           |                         |
228  *           V        +-------+        |
229  *        +-----+     |       |        |
230  *        | scc |---->| softc |        |
231  *        +-----+     |       |        |
232  *           |        +-------+        |
233  *           |                         |
234  *           V        +-------+        V
235  *        +-----+     |       |     +-----+
236  *        | scc |---->| softc |<----| scc |
237  *        +-----+     |       |     +-----+
238  *                    +-------+
239  *
240  *   3) Run netmsg_service_sync(), which will make sure that
241  *      ocif is no longer accessed (all network operations
242  *      are happened only in network threads).
243  *   4) Free ocif -- only carp_if and scc are freed.
244  *
245  * - if_carp accessing:
246  *   The accessing code should cache the if_carp in a local temporary
247  *   variable and accessing the temporary variable along the code path
248  *   instead of accessing if_carp later on.
249  */
250
251 #define CARP_IFNAME             "carp"
252 #define CARP_IS_RUNNING(ifp)    \
253         (((ifp)->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING))
254
255 struct carp_softc;
256
257 struct carp_vhaddr {
258         uint32_t                vha_flags;      /* CARP_VHAF_ */
259         struct in_ifaddr        *vha_ia;        /* carp address */
260         struct in_ifaddr        *vha_iaback;    /* backing address */
261         TAILQ_ENTRY(carp_vhaddr) vha_link;
262 };
263 TAILQ_HEAD(carp_vhaddr_list, carp_vhaddr);
264
265 struct netmsg_carp {
266         struct netmsg_base      base;
267         struct ifnet            *nc_carpdev;
268         struct carp_softc       *nc_softc;
269         void                    *nc_data;
270         size_t                  nc_datalen;
271 };
272
273 struct carp_softc {
274         struct arpcom            arpcom;
275         struct ifnet            *sc_carpdev;    /* parent interface */
276         struct carp_vhaddr_list  sc_vha_list;   /* virtual addr list */
277
278         const struct in_ifaddr  *sc_ia;         /* primary iface address v4 */
279         struct ip_moptions       sc_imo;
280
281 #ifdef INET6
282         struct in6_ifaddr       *sc_ia6;        /* primary iface address v6 */
283         struct ip6_moptions      sc_im6o;
284 #endif /* INET6 */
285
286         enum { INIT = 0, BACKUP, MASTER }
287                                  sc_state;
288         boolean_t                sc_dead;
289
290         int                      sc_suppress;
291
292         int                      sc_sendad_errors;
293 #define CARP_SENDAD_MAX_ERRORS  3
294         int                      sc_sendad_success;
295 #define CARP_SENDAD_MIN_SUCCESS 3
296
297         int                      sc_vhid;
298         int                      sc_advskew;
299         int                      sc_naddrs;     /* actually used IPv4 vha */
300         int                      sc_naddrs6;
301         int                      sc_advbase;    /* seconds */
302         int                      sc_init_counter;
303         uint64_t                 sc_counter;
304
305         /* authentication */
306 #define CARP_HMAC_PAD   64
307         unsigned char            sc_key[CARP_KEY_LEN];
308         unsigned char            sc_pad[CARP_HMAC_PAD];
309         SHA1_CTX                 sc_sha1;
310
311         struct callout           sc_ad_tmo;     /* advertisement timeout */
312         struct netmsg_carp       sc_ad_msg;     /* adv timeout netmsg */
313         struct callout           sc_md_tmo;     /* ip4 master down timeout */
314         struct callout           sc_md6_tmo;    /* ip6 master down timeout */
315         struct netmsg_carp       sc_md_msg;     /* master down timeout netmsg */
316
317         LIST_ENTRY(carp_softc)   sc_next;       /* Interface clue */
318 };
319
320 #define sc_if   arpcom.ac_if
321
322 struct carp_softc_container {
323         TAILQ_ENTRY(carp_softc_container) scc_link;
324         struct carp_softc       *scc_softc;
325 };
326 TAILQ_HEAD(carp_if, carp_softc_container);
327
328 SYSCTL_DECL(_net_inet_carp);
329
330 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */
331 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
332     &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
333 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
334     &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
335 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
336     &carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
337 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
338     &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
339
340 static int carp_suppress_preempt = 0;
341 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
342     &carp_suppress_preempt, 0, "Preemption is suppressed");
343
344 static int carp_prio_ad = 1;
345 SYSCTL_INT(_net_inet_carp, OID_AUTO, prio_ad, CTLFLAG_RD,
346     &carp_prio_ad, 0, "Prioritize advertisement packet");
347
348 static struct carpstats carpstats;
349 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
350     &carpstats, carpstats,
351     "CARP statistics (struct carpstats, netinet/ip_carp.h)");
352
353 #define CARP_LOG(...)   do {                            \
354         if (carp_opts[CARPCTL_LOG] > 0)                 \
355                 log(LOG_INFO, __VA_ARGS__);             \
356 } while (0)
357
358 #define CARP_DEBUG(...) do {                            \
359         if (carp_opts[CARPCTL_LOG] > 1)                 \
360                 log(LOG_DEBUG, __VA_ARGS__);            \
361 } while (0)
362
363 static struct lwkt_token carp_listtok = LWKT_TOKEN_INITIALIZER(carp_list_token);
364
365 static void     carp_hmac_prepare(struct carp_softc *);
366 static void     carp_hmac_generate(struct carp_softc *, uint32_t *,
367                     unsigned char *);
368 static int      carp_hmac_verify(struct carp_softc *, uint32_t *,
369                     unsigned char *);
370 static void     carp_setroute(struct carp_softc *, int);
371 static void     carp_proto_input_c(struct carp_softc *, struct mbuf *,
372                     struct carp_header *, sa_family_t);
373 static int      carp_clone_create(struct if_clone *, int, caddr_t);
374 static int      carp_clone_destroy(struct ifnet *);
375 static void     carp_detach(struct carp_softc *, boolean_t, boolean_t);
376 static void     carp_prepare_ad(struct carp_softc *, struct carp_header *);
377 static void     carp_send_ad_all(void);
378 static void     carp_send_ad_timeout(void *);
379 static void     carp_send_ad(struct carp_softc *);
380 static void     carp_send_arp(struct carp_softc *);
381 static void     carp_master_down_timeout(void *);
382 static void     carp_master_down(struct carp_softc *);
383 static void     carp_setrun(struct carp_softc *, sa_family_t);
384 static void     carp_set_state(struct carp_softc *, int);
385 static struct ifnet *carp_forus(struct carp_if *, const uint8_t *);
386
387 static void     carp_init(void *);
388 static int      carp_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
389 static int      carp_output(struct ifnet *, struct mbuf *, struct sockaddr *,
390                     struct rtentry *);
391 static void     carp_start(struct ifnet *, struct ifaltq_subque *);
392
393 static void     carp_multicast_cleanup(struct carp_softc *);
394 static void     carp_add_addr(struct carp_softc *, struct ifaddr *);
395 static void     carp_del_addr(struct carp_softc *, struct ifaddr *);
396 static void     carp_config_addr(struct carp_softc *, struct ifaddr *);
397 static void     carp_link_addrs(struct carp_softc *, struct ifnet *,
398                     struct ifaddr *);
399 static void     carp_unlink_addrs(struct carp_softc *, struct ifnet *,
400                     struct ifaddr *);
401 static void     carp_update_addrs(struct carp_softc *, struct ifaddr *);
402
403 static int      carp_config_vhaddr(struct carp_softc *, struct carp_vhaddr *,
404                     struct in_ifaddr *);
405 static int      carp_activate_vhaddr(struct carp_softc *, struct carp_vhaddr *,
406                     struct ifnet *, struct in_ifaddr *, int);
407 static void     carp_deactivate_vhaddr(struct carp_softc *,
408                     struct carp_vhaddr *, boolean_t);
409 static int      carp_addroute_vhaddr(struct carp_softc *, struct carp_vhaddr *);
410 static void     carp_delroute_vhaddr(struct carp_softc *, struct carp_vhaddr *,
411                     boolean_t);
412
413 #ifdef foo
414 static void     carp_sc_state(struct carp_softc *);
415 #endif
416 #ifdef INET6
417 static void     carp_send_na(struct carp_softc *);
418 #ifdef notyet
419 static int      carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
420 static int      carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
421 #endif
422 static void     carp_multicast6_cleanup(struct carp_softc *);
423 #endif
424 static void     carp_stop(struct carp_softc *, boolean_t);
425 static void     carp_suspend(struct carp_softc *, boolean_t);
426 static void     carp_ioctl_stop(struct carp_softc *);
427 static int      carp_ioctl_setvh(struct carp_softc *, void *, struct ucred *);
428 static void     carp_ioctl_ifcap(struct carp_softc *, int);
429 static int      carp_ioctl_getvh(struct carp_softc *, void *, struct ucred *);
430 static int      carp_ioctl_getdevname(struct carp_softc *, struct ifdrv *);
431 static int      carp_ioctl_getvhaddr(struct carp_softc *, struct ifdrv *);
432
433 static struct carp_if *carp_if_remove(struct carp_if *, struct carp_softc *);
434 static struct carp_if *carp_if_insert(struct carp_if *, struct carp_softc *);
435 static void     carp_if_free(struct carp_if *);
436
437 static void     carp_ifaddr(void *, struct ifnet *, enum ifaddr_event,
438                             struct ifaddr *);
439 static void     carp_ifdetach(void *, struct ifnet *);
440
441 static void     carp_ifdetach_dispatch(netmsg_t);
442 static void     carp_clone_destroy_dispatch(netmsg_t);
443 static void     carp_init_dispatch(netmsg_t);
444 static void     carp_ioctl_stop_dispatch(netmsg_t);
445 static void     carp_ioctl_setvh_dispatch(netmsg_t);
446 static void     carp_ioctl_ifcap_dispatch(netmsg_t);
447 static void     carp_ioctl_getvh_dispatch(netmsg_t);
448 static void     carp_ioctl_getdevname_dispatch(netmsg_t);
449 static void     carp_ioctl_getvhaddr_dispatch(netmsg_t);
450 static void     carp_send_ad_timeout_dispatch(netmsg_t);
451 static void     carp_master_down_timeout_dispatch(netmsg_t);
452
453 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
454
455 static LIST_HEAD(, carp_softc) carpif_list;
456
457 static struct if_clone carp_cloner =
458 IF_CLONE_INITIALIZER(CARP_IFNAME, carp_clone_create, carp_clone_destroy,
459                      0, IF_MAXUNIT);
460
461 static const uint8_t    carp_etheraddr[ETHER_ADDR_LEN] =
462         { 0, 0, 0x5e, 0, 1, 0 };
463
464 static eventhandler_tag carp_ifdetach_event;
465 static eventhandler_tag carp_ifaddr_event;
466
467 static __inline void
468 carp_insert_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha_new)
469 {
470         struct carp_vhaddr *vha;
471         u_long new_addr, addr;
472
473         KKASSERT((vha_new->vha_flags & CARP_VHAF_ONLIST) == 0);
474
475         /*
476          * Virtual address list is sorted; smaller one first
477          */
478         new_addr = ntohl(vha_new->vha_ia->ia_addr.sin_addr.s_addr);
479
480         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
481                 addr = ntohl(vha->vha_ia->ia_addr.sin_addr.s_addr);
482
483                 if (addr > new_addr)
484                         break;
485         }
486         if (vha == NULL)
487                 TAILQ_INSERT_TAIL(&sc->sc_vha_list, vha_new, vha_link);
488         else
489                 TAILQ_INSERT_BEFORE(vha, vha_new, vha_link);
490         vha_new->vha_flags |= CARP_VHAF_ONLIST;
491 }
492
493 static __inline void
494 carp_remove_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha)
495 {
496         KKASSERT(vha->vha_flags & CARP_VHAF_ONLIST);
497         vha->vha_flags &= ~CARP_VHAF_ONLIST;
498         TAILQ_REMOVE(&sc->sc_vha_list, vha, vha_link);
499 }
500
501 static void
502 carp_hmac_prepare(struct carp_softc *sc)
503 {
504         uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
505         uint8_t vhid = sc->sc_vhid & 0xff;
506         int i;
507 #ifdef INET6
508         struct ifaddr_container *ifac;
509         struct in6_addr in6;
510 #endif
511 #ifdef INET
512         struct carp_vhaddr *vha;
513 #endif
514
515         /* XXX: possible race here */
516
517         /* compute ipad from key */
518         bzero(sc->sc_pad, sizeof(sc->sc_pad));
519         bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
520         for (i = 0; i < sizeof(sc->sc_pad); i++)
521                 sc->sc_pad[i] ^= 0x36;
522
523         /* precompute first part of inner hash */
524         SHA1Init(&sc->sc_sha1);
525         SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
526         SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
527         SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
528         SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
529 #ifdef INET
530         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
531                 SHA1Update(&sc->sc_sha1,
532                     (const uint8_t *)&vha->vha_ia->ia_addr.sin_addr,
533                     sizeof(struct in_addr));
534         }
535 #endif /* INET */
536 #ifdef INET6
537         TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
538                 struct ifaddr *ifa = ifac->ifa;
539
540                 if (ifa->ifa_addr->sa_family == AF_INET6) {
541                         in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
542                         in6_clearscope(&in6);
543                         SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6));
544                 }
545         }
546 #endif /* INET6 */
547
548         /* convert ipad to opad */
549         for (i = 0; i < sizeof(sc->sc_pad); i++)
550                 sc->sc_pad[i] ^= 0x36 ^ 0x5c;
551 }
552
553 static void
554 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
555     unsigned char md[20])
556 {
557         SHA1_CTX sha1ctx;
558
559         /* fetch first half of inner hash */
560         bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
561
562         SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
563         SHA1Final(md, &sha1ctx);
564
565         /* outer hash */
566         SHA1Init(&sha1ctx);
567         SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
568         SHA1Update(&sha1ctx, md, 20);
569         SHA1Final(md, &sha1ctx);
570 }
571
572 static int
573 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
574     unsigned char md[20])
575 {
576         unsigned char md2[20];
577
578         carp_hmac_generate(sc, counter, md2);
579         return (bcmp(md, md2, sizeof(md2)));
580 }
581
582 static void
583 carp_setroute(struct carp_softc *sc, int cmd)
584 {
585 #ifdef INET6
586         struct ifaddr_container *ifac;
587 #endif
588         struct carp_vhaddr *vha;
589
590         KKASSERT(cmd == RTM_DELETE || cmd == RTM_ADD);
591
592         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
593                 if (vha->vha_iaback == NULL)
594                         continue;
595                 if (cmd == RTM_DELETE)
596                         carp_delroute_vhaddr(sc, vha, FALSE);
597                 else
598                         carp_addroute_vhaddr(sc, vha);
599         }
600
601 #ifdef INET6
602         TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
603                 struct ifaddr *ifa = ifac->ifa;
604
605                 if (ifa->ifa_addr->sa_family == AF_INET6) {
606                         if (cmd == RTM_ADD)
607                                 in6_ifaddloop(ifa);
608                         else
609                                 in6_ifremloop(ifa);
610                 }
611         }
612 #endif /* INET6 */
613 }
614
615 static int
616 carp_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused)
617 {
618         struct carp_softc *sc;
619         struct ifnet *ifp;
620
621         sc = kmalloc(sizeof(*sc), M_CARP, M_WAITOK | M_ZERO);
622         ifp = &sc->sc_if;
623
624         sc->sc_suppress = 0;
625         sc->sc_advbase = CARP_DFLTINTV;
626         sc->sc_vhid = -1;       /* required setting */
627         sc->sc_advskew = 0;
628         sc->sc_init_counter = 1;
629         sc->sc_naddrs = 0;
630         sc->sc_naddrs6 = 0;
631
632         TAILQ_INIT(&sc->sc_vha_list);
633
634 #ifdef INET6
635         sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
636 #endif
637
638         callout_init_mp(&sc->sc_ad_tmo);
639         netmsg_init(&sc->sc_ad_msg.base, NULL, &netisr_adone_rport,
640             MSGF_DROPABLE | MSGF_PRIORITY, carp_send_ad_timeout_dispatch);
641         sc->sc_ad_msg.nc_softc = sc;
642
643         callout_init_mp(&sc->sc_md_tmo);
644         callout_init_mp(&sc->sc_md6_tmo);
645         netmsg_init(&sc->sc_md_msg.base, NULL, &netisr_adone_rport,
646             MSGF_DROPABLE | MSGF_PRIORITY, carp_master_down_timeout_dispatch);
647         sc->sc_md_msg.nc_softc = sc;
648
649         if_initname(ifp, CARP_IFNAME, unit);
650         ifp->if_softc = sc;
651         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
652         ifp->if_init = carp_init;
653         ifp->if_ioctl = carp_ioctl;
654         ifp->if_start = carp_start;
655
656         ifp->if_capabilities = IFCAP_TXCSUM | IFCAP_TSO;
657         ifp->if_capenable = ifp->if_capabilities;
658         /*
659          * Leave if_hwassist as it is; if_hwassist will be
660          * setup when this carp interface has parent.
661          */
662
663         ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
664         ifq_set_ready(&ifp->if_snd);
665
666         ether_ifattach(ifp, carp_etheraddr, NULL);
667
668         ifp->if_type = IFT_CARP;
669         ifp->if_output = carp_output;
670
671         lwkt_gettoken(&carp_listtok);
672         LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
673         lwkt_reltoken(&carp_listtok);
674
675         return (0);
676 }
677
678 static void
679 carp_clone_destroy_dispatch(netmsg_t msg)
680 {
681         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
682         struct carp_softc *sc = cmsg->nc_softc;
683
684         sc->sc_dead = TRUE;
685         carp_detach(sc, TRUE, FALSE);
686
687         callout_stop_sync(&sc->sc_ad_tmo);
688         callout_stop_sync(&sc->sc_md_tmo);
689         callout_stop_sync(&sc->sc_md6_tmo);
690
691         crit_enter();
692         lwkt_dropmsg(&sc->sc_ad_msg.base.lmsg);
693         lwkt_dropmsg(&sc->sc_md_msg.base.lmsg);
694         crit_exit();
695
696         lwkt_replymsg(&cmsg->base.lmsg, 0);
697 }
698
699 static int
700 carp_clone_destroy(struct ifnet *ifp)
701 {
702         struct carp_softc *sc = ifp->if_softc;
703         struct netmsg_carp cmsg;
704
705         bzero(&cmsg, sizeof(cmsg));
706         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
707             carp_clone_destroy_dispatch);
708         cmsg.nc_softc = sc;
709
710         lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
711
712         lwkt_gettoken(&carp_listtok);
713         LIST_REMOVE(sc, sc_next);
714         lwkt_reltoken(&carp_listtok);
715
716         bpfdetach(ifp);
717         if_detach(ifp);
718
719         KASSERT(sc->sc_naddrs == 0, ("certain inet address is still active"));
720         kfree(sc, M_CARP);
721
722         return 0;
723 }
724
725 static struct carp_if *
726 carp_if_remove(struct carp_if *ocif, struct carp_softc *sc)
727 {
728         struct carp_softc_container *oscc, *scc;
729         struct carp_if *cif;
730         int count = 0;
731 #ifdef INVARIANTS
732         int found = 0;
733 #endif
734
735         TAILQ_FOREACH(oscc, ocif, scc_link) {
736                 ++count;
737 #ifdef INVARIANTS
738                 if (oscc->scc_softc == sc)
739                         found = 1;
740 #endif
741         }
742         KASSERT(found, ("%s carp_softc is not on carp_if", __func__));
743
744         if (count == 1) {
745                 /* Last one is going to be unlinked */
746                 return NULL;
747         }
748
749         cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
750         TAILQ_INIT(cif);
751
752         TAILQ_FOREACH(oscc, ocif, scc_link) {
753                 if (oscc->scc_softc == sc)
754                         continue;
755
756                 scc = kmalloc(sizeof(*scc), M_CARP, M_WAITOK | M_ZERO);
757                 scc->scc_softc = oscc->scc_softc;
758                 TAILQ_INSERT_TAIL(cif, scc, scc_link);
759         }
760
761         return cif;
762 }
763
764 static struct carp_if *
765 carp_if_insert(struct carp_if *ocif, struct carp_softc *sc)
766 {
767         struct carp_softc_container *oscc;
768         int onlist;
769
770         onlist = 0;
771         if (ocif != NULL) {
772                 TAILQ_FOREACH(oscc, ocif, scc_link) {
773                         if (oscc->scc_softc == sc)
774                                 onlist = 1;
775                 }
776         }
777
778 #ifdef INVARIANTS
779         if (sc->sc_carpdev != NULL) {
780                 KASSERT(onlist, ("%s is not on %s carp list",
781                     sc->sc_if.if_xname, sc->sc_carpdev->if_xname));
782         } else {
783                 KASSERT(!onlist, ("%s is already on carp list",
784                     sc->sc_if.if_xname));
785         }
786 #endif
787
788         if (!onlist) {
789                 struct carp_if *cif;
790                 struct carp_softc_container *new_scc, *scc;
791                 int inserted = 0;
792
793                 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
794                 TAILQ_INIT(cif);
795
796                 new_scc = kmalloc(sizeof(*new_scc), M_CARP, M_WAITOK | M_ZERO);
797                 new_scc->scc_softc = sc;
798
799                 if (ocif != NULL) {
800                         TAILQ_FOREACH(oscc, ocif, scc_link) {
801                                 if (!inserted &&
802                                     oscc->scc_softc->sc_vhid > sc->sc_vhid) {
803                                         TAILQ_INSERT_TAIL(cif, new_scc,
804                                             scc_link);
805                                         inserted = 1;
806                                 }
807
808                                 scc = kmalloc(sizeof(*scc), M_CARP,
809                                     M_WAITOK | M_ZERO);
810                                 scc->scc_softc = oscc->scc_softc;
811                                 TAILQ_INSERT_TAIL(cif, scc, scc_link);
812                         }
813                 }
814                 if (!inserted)
815                         TAILQ_INSERT_TAIL(cif, new_scc, scc_link);
816
817                 return cif;
818         } else {
819                 return ocif;
820         }
821 }
822
823 static void
824 carp_if_free(struct carp_if *cif)
825 {
826         struct carp_softc_container *scc;
827
828         while ((scc = TAILQ_FIRST(cif)) != NULL) {
829                 TAILQ_REMOVE(cif, scc, scc_link);
830                 kfree(scc, M_CARP);
831         }
832         kfree(cif, M_CARP);
833 }
834
835 static void
836 carp_detach(struct carp_softc *sc, boolean_t detach, boolean_t del_iaback)
837 {
838         carp_suspend(sc, detach);
839
840         carp_multicast_cleanup(sc);
841 #ifdef INET6
842         carp_multicast6_cleanup(sc);
843 #endif
844
845         if (!sc->sc_dead && detach) {
846                 struct carp_vhaddr *vha;
847
848                 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
849                         carp_deactivate_vhaddr(sc, vha, del_iaback);
850                 KKASSERT(sc->sc_naddrs == 0);
851         }
852
853         if (sc->sc_carpdev != NULL) {
854                 struct ifnet *ifp = sc->sc_carpdev;
855                 struct carp_if *ocif = ifp->if_carp;
856
857                 ifp->if_carp = carp_if_remove(ocif, sc);
858                 KASSERT(ifp->if_carp != ocif,
859                     ("%s carp_if_remove failed", __func__));
860
861                 sc->sc_carpdev = NULL;
862                 sc->sc_ia = NULL;
863                 sc->arpcom.ac_if.if_hwassist = 0;
864
865                 /*
866                  * Make sure that all protocol threads see the
867                  * sc_carpdev and if_carp changes
868                  */
869                 netmsg_service_sync();
870
871                 if (ifp->if_carp == NULL) {
872                         /*
873                          * No more carp interfaces using
874                          * ifp as the backing interface,
875                          * move it out of promiscous mode.
876                          */
877                         ifpromisc(ifp, 0);
878                 }
879
880                 /*
881                  * The old carp list could be safely free now,
882                  * since no one can access it.
883                  */
884                 carp_if_free(ocif);
885         }
886 }
887
888 static void
889 carp_ifdetach_dispatch(netmsg_t msg)
890 {
891         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
892         struct ifnet *ifp = cmsg->nc_carpdev;
893
894         while (ifp->if_carp) {
895                 struct carp_softc_container *scc;
896
897                 scc = TAILQ_FIRST((struct carp_if *)(ifp->if_carp));
898                 carp_detach(scc->scc_softc, TRUE, TRUE);
899         }
900         lwkt_replymsg(&cmsg->base.lmsg, 0);
901 }
902
903 /* Detach an interface from the carp. */
904 static void
905 carp_ifdetach(void *arg __unused, struct ifnet *ifp)
906 {
907         struct netmsg_carp cmsg;
908
909         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
910
911         bzero(&cmsg, sizeof(cmsg));
912         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
913             carp_ifdetach_dispatch);
914         cmsg.nc_carpdev = ifp;
915
916         lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
917 }
918
919 /*
920  * process input packet.
921  * we have rearranged checks order compared to the rfc,
922  * but it seems more efficient this way or not possible otherwise.
923  */
924 int
925 carp_proto_input(struct mbuf **mp, int *offp, int proto)
926 {
927         struct mbuf *m = *mp;
928         struct ip *ip = mtod(m, struct ip *);
929         struct ifnet *ifp = m->m_pkthdr.rcvif;
930         struct carp_header *ch;
931         struct carp_softc *sc;
932         int len, iphlen;
933
934         iphlen = *offp;
935         *mp = NULL;
936
937         carpstats.carps_ipackets++;
938
939         if (!carp_opts[CARPCTL_ALLOW]) {
940                 m_freem(m);
941                 goto back;
942         }
943
944         /* Check if received on a valid carp interface */
945         if (ifp->if_type != IFT_CARP) {
946                 carpstats.carps_badif++;
947                 CARP_LOG("carp_proto_input: packet received on non-carp "
948                     "interface: %s\n", ifp->if_xname);
949                 m_freem(m);
950                 goto back;
951         }
952
953         if (!CARP_IS_RUNNING(ifp)) {
954                 carpstats.carps_badif++;
955                 CARP_LOG("carp_proto_input: packet received on stopped carp "
956                     "interface: %s\n", ifp->if_xname);
957                 m_freem(m);
958                 goto back;
959         }
960
961         sc = ifp->if_softc;
962         if (sc->sc_carpdev == NULL) {
963                 carpstats.carps_badif++;
964                 CARP_LOG("carp_proto_input: packet received on defunc carp "
965                     "interface: %s\n", ifp->if_xname);
966                 m_freem(m);
967                 goto back;
968         }
969
970         if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
971                 carpstats.carps_badif++;
972                 CARP_LOG("carp_proto_input: non-mcast packet on "
973                     "interface: %s\n", ifp->if_xname);
974                 m_freem(m);
975                 goto back;
976         }
977
978         /* Verify that the IP TTL is CARP_DFLTTL. */
979         if (ip->ip_ttl != CARP_DFLTTL) {
980                 carpstats.carps_badttl++;
981                 CARP_LOG("carp_proto_input: received ttl %d != %d on %s\n",
982                     ip->ip_ttl, CARP_DFLTTL, ifp->if_xname);
983                 m_freem(m);
984                 goto back;
985         }
986
987         /* Minimal CARP packet size */
988         len = iphlen + sizeof(*ch);
989
990         /*
991          * Verify that the received packet length is
992          * not less than the CARP header
993          */
994         if (m->m_pkthdr.len < len) {
995                 carpstats.carps_badlen++;
996                 CARP_LOG("packet too short %d on %s\n", m->m_pkthdr.len,
997                     ifp->if_xname);
998                 m_freem(m);
999                 goto back;
1000         }
1001
1002         /* Make sure that CARP header is contiguous */
1003         if (len > m->m_len) {
1004                 m = m_pullup(m, len);
1005                 if (m == NULL) {
1006                         carpstats.carps_hdrops++;
1007                         CARP_LOG("carp_proto_input: m_pullup failed\n");
1008                         goto back;
1009                 }
1010                 ip = mtod(m, struct ip *);
1011         }
1012         ch = (struct carp_header *)((uint8_t *)ip + iphlen);
1013
1014         /* Verify the CARP checksum */
1015         if (in_cksum_skip(m, len, iphlen)) {
1016                 carpstats.carps_badsum++;
1017                 CARP_LOG("carp_proto_input: checksum failed on %s\n",
1018                     ifp->if_xname);
1019                 m_freem(m);
1020                 goto back;
1021         }
1022         carp_proto_input_c(sc, m, ch, AF_INET);
1023 back:
1024         return(IPPROTO_DONE);
1025 }
1026
1027 #ifdef INET6
1028 int
1029 carp6_proto_input(struct mbuf **mp, int *offp, int proto)
1030 {
1031         struct mbuf *m = *mp;
1032         struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1033         struct ifnet *ifp = m->m_pkthdr.rcvif;
1034         struct carp_header *ch;
1035         struct carp_softc *sc;
1036         u_int len;
1037
1038         carpstats.carps_ipackets6++;
1039
1040         if (!carp_opts[CARPCTL_ALLOW]) {
1041                 m_freem(m);
1042                 goto back;
1043         }
1044
1045         /* check if received on a valid carp interface */
1046         if (ifp->if_type != IFT_CARP) {
1047                 carpstats.carps_badif++;
1048                 CARP_LOG("carp6_proto_input: packet received on non-carp "
1049                     "interface: %s\n", ifp->if_xname);
1050                 m_freem(m);
1051                 goto back;
1052         }
1053
1054         if (!CARP_IS_RUNNING(ifp)) {
1055                 carpstats.carps_badif++;
1056                 CARP_LOG("carp_proto_input: packet received on stopped carp "
1057                     "interface: %s\n", ifp->if_xname);
1058                 m_freem(m);
1059                 goto back;
1060         }
1061
1062         sc = ifp->if_softc;
1063         if (sc->sc_carpdev == NULL) {
1064                 carpstats.carps_badif++;
1065                 CARP_LOG("carp6_proto_input: packet received on defunc-carp "
1066                     "interface: %s\n", ifp->if_xname);
1067                 m_freem(m);
1068                 goto back;
1069         }
1070
1071         /* verify that the IP TTL is 255 */
1072         if (ip6->ip6_hlim != CARP_DFLTTL) {
1073                 carpstats.carps_badttl++;
1074                 CARP_LOG("carp6_proto_input: received ttl %d != 255 on %s\n",
1075                     ip6->ip6_hlim, ifp->if_xname);
1076                 m_freem(m);
1077                 goto back;
1078         }
1079
1080         /* verify that we have a complete carp packet */
1081         len = m->m_len;
1082         IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
1083         if (ch == NULL) {
1084                 carpstats.carps_badlen++;
1085                 CARP_LOG("carp6_proto_input: packet size %u too small\n", len);
1086                 goto back;
1087         }
1088
1089         /* verify the CARP checksum */
1090         if (in_cksum_range(m, 0, *offp, sizeof(*ch))) {
1091                 carpstats.carps_badsum++;
1092                 CARP_LOG("carp6_proto_input: checksum failed, on %s\n",
1093                     ifp->if_xname);
1094                 m_freem(m);
1095                 goto back;
1096         }
1097
1098         carp_proto_input_c(sc, m, ch, AF_INET6);
1099 back:
1100         return (IPPROTO_DONE);
1101 }
1102 #endif /* INET6 */
1103
1104 static void
1105 carp_proto_input_c(struct carp_softc *sc, struct mbuf *m,
1106     struct carp_header *ch, sa_family_t af)
1107 {
1108         struct ifnet *cifp;
1109         uint64_t tmp_counter;
1110         struct timeval sc_tv, ch_tv;
1111
1112         if (sc->sc_vhid != ch->carp_vhid) {
1113                 /*
1114                  * CARP uses multicast, however, multicast packets
1115                  * are tapped to all CARP interfaces on the physical
1116                  * interface receiving the CARP packets, so we don't
1117                  * update any stats here.
1118                  */
1119                 m_freem(m);
1120                 return;
1121         }
1122         cifp = &sc->sc_if;
1123
1124         /* verify the CARP version. */
1125         if (ch->carp_version != CARP_VERSION) {
1126                 carpstats.carps_badver++;
1127                 CARP_LOG("%s; invalid version %d\n", cifp->if_xname,
1128                          ch->carp_version);
1129                 m_freem(m);
1130                 return;
1131         }
1132
1133         /* verify the hash */
1134         if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
1135                 carpstats.carps_badauth++;
1136                 CARP_LOG("%s: incorrect hash\n", cifp->if_xname);
1137                 m_freem(m);
1138                 return;
1139         }
1140
1141         tmp_counter = ntohl(ch->carp_counter[0]);
1142         tmp_counter = tmp_counter<<32;
1143         tmp_counter += ntohl(ch->carp_counter[1]);
1144
1145         /* XXX Replay protection goes here */
1146
1147         sc->sc_init_counter = 0;
1148         sc->sc_counter = tmp_counter;
1149
1150         sc_tv.tv_sec = sc->sc_advbase;
1151         if (carp_suppress_preempt && sc->sc_advskew <  240)
1152                 sc_tv.tv_usec = 240 * 1000000 / 256;
1153         else
1154                 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1155         ch_tv.tv_sec = ch->carp_advbase;
1156         ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
1157
1158         switch (sc->sc_state) {
1159         case INIT:
1160                 break;
1161
1162         case MASTER:
1163                 /*
1164                  * If we receive an advertisement from a master who's going to
1165                  * be more frequent than us, go into BACKUP state.
1166                  */
1167                 if (timevalcmp(&sc_tv, &ch_tv, >) ||
1168                     timevalcmp(&sc_tv, &ch_tv, ==)) {
1169                         callout_stop(&sc->sc_ad_tmo);
1170                         CARP_DEBUG("%s: MASTER -> BACKUP "
1171                            "(more frequent advertisement received)\n",
1172                            cifp->if_xname);
1173                         carp_set_state(sc, BACKUP);
1174                         carp_setrun(sc, 0);
1175                         carp_setroute(sc, RTM_DELETE);
1176                 }
1177                 break;
1178
1179         case BACKUP:
1180                 /*
1181                  * If we're pre-empting masters who advertise slower than us,
1182                  * and this one claims to be slower, treat him as down.
1183                  */
1184                 if (carp_opts[CARPCTL_PREEMPT] &&
1185                     timevalcmp(&sc_tv, &ch_tv, <)) {
1186                         CARP_DEBUG("%s: BACKUP -> MASTER "
1187                             "(preempting a slower master)\n", cifp->if_xname);
1188                         carp_master_down(sc);
1189                         break;
1190                 }
1191
1192                 /*
1193                  *  If the master is going to advertise at such a low frequency
1194                  *  that he's guaranteed to time out, we'd might as well just
1195                  *  treat him as timed out now.
1196                  */
1197                 sc_tv.tv_sec = sc->sc_advbase * 3;
1198                 if (timevalcmp(&sc_tv, &ch_tv, <)) {
1199                         CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n",
1200                                    cifp->if_xname);
1201                         carp_master_down(sc);
1202                         break;
1203                 }
1204
1205                 /*
1206                  * Otherwise, we reset the counter and wait for the next
1207                  * advertisement.
1208                  */
1209                 carp_setrun(sc, af);
1210                 break;
1211         }
1212         m_freem(m);
1213 }
1214
1215 struct mbuf *
1216 carp_input(void *v, struct mbuf *m)
1217 {
1218         struct carp_if *cif = v;
1219         struct ether_header *eh;
1220         struct carp_softc_container *scc;
1221         struct ifnet *ifp;
1222
1223         eh = mtod(m, struct ether_header *);
1224
1225         ifp = carp_forus(cif, eh->ether_dhost);
1226         if (ifp != NULL) {
1227                 ether_reinput_oncpu(ifp, m, REINPUT_RUNBPF);
1228                 return NULL;
1229         }
1230
1231         if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
1232                 return m;
1233
1234         /*
1235          * XXX Should really check the list of multicast addresses
1236          * for each CARP interface _before_ copying.
1237          */
1238         TAILQ_FOREACH(scc, cif, scc_link) {
1239                 struct carp_softc *sc = scc->scc_softc;
1240                 struct mbuf *m0;
1241
1242                 if ((sc->sc_if.if_flags & IFF_UP) == 0)
1243                         continue;
1244
1245                 m0 = m_dup(m, M_NOWAIT);
1246                 if (m0 == NULL)
1247                         continue;
1248
1249                 ether_reinput_oncpu(&sc->sc_if, m0, REINPUT_RUNBPF);
1250         }
1251         return m;
1252 }
1253
1254 static void
1255 carp_prepare_ad(struct carp_softc *sc, struct carp_header *ch)
1256 {
1257         if (sc->sc_init_counter) {
1258                 /* this could also be seconds since unix epoch */
1259                 sc->sc_counter = karc4random();
1260                 sc->sc_counter = sc->sc_counter << 32;
1261                 sc->sc_counter += karc4random();
1262         } else {
1263                 sc->sc_counter++;
1264         }
1265
1266         ch->carp_counter[0] = htonl((sc->sc_counter >> 32) & 0xffffffff);
1267         ch->carp_counter[1] = htonl(sc->sc_counter & 0xffffffff);
1268
1269         carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
1270 }
1271
1272 static void
1273 carp_send_ad_all(void)
1274 {
1275         struct carp_softc *sc;
1276
1277         LIST_FOREACH(sc, &carpif_list, sc_next) {
1278                 if (sc->sc_carpdev == NULL)
1279                         continue;
1280
1281                 if (CARP_IS_RUNNING(&sc->sc_if) && sc->sc_state == MASTER)
1282                         carp_send_ad(sc);
1283         }
1284 }
1285
1286 static void
1287 carp_send_ad_timeout(void *xsc)
1288 {
1289         struct carp_softc *sc = xsc;
1290         struct netmsg_carp *cmsg = &sc->sc_ad_msg;
1291
1292         KASSERT(mycpuid == 0, ("%s not on cpu0 but on cpu%d",
1293             __func__, mycpuid));
1294
1295         crit_enter();
1296         if (cmsg->base.lmsg.ms_flags & MSGF_DONE)
1297                 lwkt_sendmsg_oncpu(netisr_cpuport(0), &cmsg->base.lmsg);
1298         crit_exit();
1299 }
1300
1301 static void
1302 carp_send_ad_timeout_dispatch(netmsg_t msg)
1303 {
1304         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1305         struct carp_softc *sc = cmsg->nc_softc;
1306
1307         /* Reply ASAP */
1308         crit_enter();
1309         lwkt_replymsg(&cmsg->base.lmsg, 0);
1310         crit_exit();
1311
1312         carp_send_ad(sc);
1313 }
1314
1315 static void
1316 carp_send_ad(struct carp_softc *sc)
1317 {
1318         struct ifnet *cifp = &sc->sc_if;
1319         struct carp_header ch;
1320         struct timeval tv;
1321         struct carp_header *ch_ptr;
1322         struct mbuf *m;
1323         int len, advbase, advskew;
1324
1325         if (!CARP_IS_RUNNING(cifp)) {
1326                 /* Bow out */
1327                 advbase = 255;
1328                 advskew = 255;
1329         } else {
1330                 advbase = sc->sc_advbase;
1331                 if (!carp_suppress_preempt || sc->sc_advskew > 240)
1332                         advskew = sc->sc_advskew;
1333                 else
1334                         advskew = 240;
1335                 tv.tv_sec = advbase;
1336                 tv.tv_usec = advskew * 1000000 / 256;
1337         }
1338
1339         ch.carp_version = CARP_VERSION;
1340         ch.carp_type = CARP_ADVERTISEMENT;
1341         ch.carp_vhid = sc->sc_vhid;
1342         ch.carp_advbase = advbase;
1343         ch.carp_advskew = advskew;
1344         ch.carp_authlen = 7;    /* XXX DEFINE */
1345         ch.carp_pad1 = 0;       /* must be zero */
1346         ch.carp_cksum = 0;
1347
1348 #ifdef INET
1349         if (sc->sc_ia != NULL) {
1350                 struct ip *ip;
1351
1352                 MGETHDR(m, M_NOWAIT, MT_HEADER);
1353                 if (m == NULL) {
1354                         IFNET_STAT_INC(cifp, oerrors, 1);
1355                         carpstats.carps_onomem++;
1356                         /* XXX maybe less ? */
1357                         if (advbase != 255 || advskew != 255)
1358                                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1359                                     carp_send_ad_timeout, sc);
1360                         return;
1361                 }
1362                 len = sizeof(*ip) + sizeof(ch);
1363                 m->m_pkthdr.len = len;
1364                 m->m_pkthdr.rcvif = NULL;
1365                 m->m_len = len;
1366                 MH_ALIGN(m, m->m_len);
1367                 m->m_flags |= M_MCAST;
1368                 if (carp_prio_ad)
1369                         m->m_flags |= M_PRIO;
1370                 ip = mtod(m, struct ip *);
1371                 ip->ip_v = IPVERSION;
1372                 ip->ip_hl = sizeof(*ip) >> 2;
1373                 ip->ip_tos = IPTOS_LOWDELAY;
1374                 ip->ip_len = len;
1375                 ip->ip_id = ip_newid();
1376                 ip->ip_off = IP_DF;
1377                 ip->ip_ttl = CARP_DFLTTL;
1378                 ip->ip_p = IPPROTO_CARP;
1379                 ip->ip_sum = 0;
1380                 ip->ip_src = sc->sc_ia->ia_addr.sin_addr;
1381                 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
1382
1383                 ch_ptr = (struct carp_header *)(&ip[1]);
1384                 bcopy(&ch, ch_ptr, sizeof(ch));
1385                 carp_prepare_ad(sc, ch_ptr);
1386                 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip));
1387
1388                 getmicrotime(&cifp->if_lastchange);
1389                 IFNET_STAT_INC(cifp, opackets, 1);
1390                 IFNET_STAT_INC(cifp, obytes, len);
1391                 carpstats.carps_opackets++;
1392
1393                 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) {
1394                         IFNET_STAT_INC(cifp, oerrors, 1);
1395                         if (sc->sc_sendad_errors < INT_MAX)
1396                                 sc->sc_sendad_errors++;
1397                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1398                                 carp_suppress_preempt++;
1399                                 if (carp_suppress_preempt == 1) {
1400                                         carp_send_ad_all();
1401                                 }
1402                         }
1403                         sc->sc_sendad_success = 0;
1404                 } else {
1405                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1406                                 if (++sc->sc_sendad_success >=
1407                                     CARP_SENDAD_MIN_SUCCESS) {
1408                                         carp_suppress_preempt--;
1409                                         sc->sc_sendad_errors = 0;
1410                                 }
1411                         } else {
1412                                 sc->sc_sendad_errors = 0;
1413                         }
1414                 }
1415         }
1416 #endif /* INET */
1417 #ifdef INET6
1418         if (sc->sc_ia6) {
1419                 struct ip6_hdr *ip6;
1420
1421                 MGETHDR(m, M_NOWAIT, MT_HEADER);
1422                 if (m == NULL) {
1423                         IFNET_STAT_INC(cifp, oerrors, 1);
1424                         carpstats.carps_onomem++;
1425                         /* XXX maybe less ? */
1426                         if (advbase != 255 || advskew != 255)
1427                                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1428                                     carp_send_ad_timeout, sc);
1429                         return;
1430                 }
1431                 len = sizeof(*ip6) + sizeof(ch);
1432                 m->m_pkthdr.len = len;
1433                 m->m_pkthdr.rcvif = NULL;
1434                 m->m_len = len;
1435                 MH_ALIGN(m, m->m_len);
1436                 m->m_flags |= M_MCAST;
1437                 ip6 = mtod(m, struct ip6_hdr *);
1438                 bzero(ip6, sizeof(*ip6));
1439                 ip6->ip6_vfc |= IPV6_VERSION;
1440                 ip6->ip6_hlim = CARP_DFLTTL;
1441                 ip6->ip6_nxt = IPPROTO_CARP;
1442                 bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src,
1443                     sizeof(struct in6_addr));
1444                 /* set the multicast destination */
1445
1446                 ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
1447                 ip6->ip6_dst.s6_addr8[15] = 0x12;
1448                 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
1449                         IFNET_STAT_INC(cifp, oerrors, 1);
1450                         m_freem(m);
1451                         CARP_LOG("%s: in6_setscope failed\n", __func__);
1452                         return;
1453                 }
1454
1455                 ch_ptr = (struct carp_header *)(&ip6[1]);
1456                 bcopy(&ch, ch_ptr, sizeof(ch));
1457                 carp_prepare_ad(sc, ch_ptr);
1458                 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip6));
1459
1460                 getmicrotime(&cifp->if_lastchange);
1461                 IFNET_STAT_INC(cifp, opackets, 1);
1462                 IFNET_STAT_INC(cifp, obytes, len);
1463                 carpstats.carps_opackets6++;
1464
1465                 if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) {
1466                         IFNET_STAT_INC(cifp, oerrors, 1);
1467                         if (sc->sc_sendad_errors < INT_MAX)
1468                                 sc->sc_sendad_errors++;
1469                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1470                                 carp_suppress_preempt++;
1471                                 if (carp_suppress_preempt == 1) {
1472                                         carp_send_ad_all();
1473                                 }
1474                         }
1475                         sc->sc_sendad_success = 0;
1476                 } else {
1477                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1478                                 if (++sc->sc_sendad_success >=
1479                                     CARP_SENDAD_MIN_SUCCESS) {
1480                                         carp_suppress_preempt--;
1481                                         sc->sc_sendad_errors = 0;
1482                                 }
1483                         } else {
1484                                 sc->sc_sendad_errors = 0;
1485                         }
1486                 }
1487         }
1488 #endif /* INET6 */
1489
1490         if (advbase != 255 || advskew != 255)
1491                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1492                     carp_send_ad_timeout, sc);
1493 }
1494
1495 /*
1496  * Broadcast a gratuitous ARP request containing
1497  * the virtual router MAC address for each IP address
1498  * associated with the virtual router.
1499  */
1500 static void
1501 carp_send_arp(struct carp_softc *sc)
1502 {
1503         const struct carp_vhaddr *vha;
1504
1505         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1506                 if (vha->vha_iaback == NULL)
1507                         continue;
1508                 arp_gratuitous(&sc->sc_if, &vha->vha_ia->ia_ifa);
1509         }
1510 }
1511
1512 #ifdef INET6
1513 static void
1514 carp_send_na(struct carp_softc *sc)
1515 {
1516         struct ifaddr_container *ifac;
1517         struct in6_addr *in6;
1518         static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1519
1520         TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
1521                 struct ifaddr *ifa = ifac->ifa;
1522
1523                 if (ifa->ifa_addr->sa_family != AF_INET6)
1524                         continue;
1525
1526                 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1527                 nd6_na_output(sc->sc_carpdev, &mcast, in6,
1528                     ND_NA_FLAG_OVERRIDE, 1, NULL);
1529                 DELAY(1000);    /* XXX */
1530         }
1531 }
1532 #endif /* INET6 */
1533
1534 static __inline const struct carp_vhaddr *
1535 carp_find_addr(const struct carp_softc *sc, const struct in_addr *addr)
1536 {
1537         struct carp_vhaddr *vha;
1538
1539         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1540                 if (vha->vha_iaback == NULL)
1541                         continue;
1542
1543                 if (vha->vha_ia->ia_addr.sin_addr.s_addr == addr->s_addr)
1544                         return vha;
1545         }
1546         return NULL;
1547 }
1548
1549 #ifdef notyet
1550 static int
1551 carp_iamatch_balance(const struct carp_if *cif, const struct in_addr *itaddr,
1552                      const struct in_addr *isaddr, uint8_t **enaddr)
1553 {
1554         const struct carp_softc *vh;
1555         int index, count = 0;
1556
1557         /*
1558          * XXX proof of concept implementation.
1559          * We use the source ip to decide which virtual host should
1560          * handle the request. If we're master of that virtual host,
1561          * then we respond, otherwise, just drop the arp packet on
1562          * the floor.
1563          */
1564
1565         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1566                 if (!CARP_IS_RUNNING(&vh->sc_if))
1567                         continue;
1568
1569                 if (carp_find_addr(vh, itaddr) != NULL)
1570                         count++;
1571         }
1572         if (count == 0)
1573                 return 0;
1574
1575         /* this should be a hash, like pf_hash() */
1576         index = ntohl(isaddr->s_addr) % count;
1577         count = 0;
1578
1579         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1580                 if (!CARP_IS_RUNNING(&vh->sc_if))
1581                         continue;
1582
1583                 if (carp_find_addr(vh, itaddr) == NULL)
1584                         continue;
1585
1586                 if (count == index) {
1587                         if (vh->sc_state == MASTER) {
1588                                 *enaddr = IF_LLADDR(&vh->sc_if);
1589                                 return 1;
1590                         } else {
1591                                 return 0;
1592                         }
1593                 }
1594                 count++;
1595         }
1596         return 0;
1597 }
1598 #endif
1599
1600 int
1601 carp_iamatch(const struct in_ifaddr *ia)
1602 {
1603         const struct carp_softc *sc = ia->ia_ifp->if_softc;
1604
1605         ASSERT_IN_NETISR(0);
1606
1607 #ifdef notyet
1608         if (carp_opts[CARPCTL_ARPBALANCE])
1609                 return carp_iamatch_balance(cif, itaddr, isaddr, enaddr);
1610 #endif
1611
1612         if (!CARP_IS_RUNNING(&sc->sc_if) || sc->sc_state != MASTER)
1613                 return 0;
1614
1615         return 1;
1616 }
1617
1618 #ifdef INET6
1619 struct ifaddr *
1620 carp_iamatch6(void *v, struct in6_addr *taddr)
1621 {
1622 #ifdef foo
1623         struct carp_if *cif = v;
1624         struct carp_softc *vh;
1625
1626         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1627                 struct ifaddr_container *ifac;
1628
1629                 TAILQ_FOREACH(ifac, &vh->sc_if.if_addrheads[mycpuid],
1630                               ifa_link) {
1631                         struct ifaddr *ifa = ifac->ifa;
1632
1633                         if (IN6_ARE_ADDR_EQUAL(taddr,
1634                             &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1635                             CARP_IS_RUNNING(&vh->sc_if) &&
1636                             vh->sc_state == MASTER) {
1637                                 return (ifa);
1638                         }
1639                 }
1640         }
1641 #endif
1642         return (NULL);
1643 }
1644
1645 void *
1646 carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr)
1647 {
1648 #ifdef foo
1649         struct m_tag *mtag;
1650         struct carp_if *cif = v;
1651         struct carp_softc *sc;
1652
1653         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
1654                 struct ifaddr_container *ifac;
1655
1656                 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid],
1657                               ifa_link) {
1658                         struct ifaddr *ifa = ifac->ifa;
1659
1660                         if (IN6_ARE_ADDR_EQUAL(taddr,
1661                             &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1662                             CARP_IS_RUNNING(&sc->sc_if)) {
1663                                 struct ifnet *ifp = &sc->sc_if;
1664
1665                                 mtag = m_tag_get(PACKET_TAG_CARP,
1666                                     sizeof(struct ifnet *), M_NOWAIT);
1667                                 if (mtag == NULL) {
1668                                         /* better a bit than nothing */
1669                                         return (IF_LLADDR(ifp));
1670                                 }
1671                                 bcopy(&ifp, (caddr_t)(mtag + 1),
1672                                     sizeof(struct ifnet *));
1673                                 m_tag_prepend(m, mtag);
1674
1675                                 return (IF_LLADDR(ifp));
1676                         }
1677                 }
1678         }
1679 #endif
1680         return (NULL);
1681 }
1682 #endif
1683
1684 static struct ifnet *
1685 carp_forus(struct carp_if *cif, const uint8_t *dhost)
1686 {
1687         struct carp_softc_container *scc;
1688
1689         if (memcmp(dhost, carp_etheraddr, ETHER_ADDR_LEN - 1) != 0)
1690                 return NULL;
1691
1692         TAILQ_FOREACH(scc, cif, scc_link) {
1693                 struct carp_softc *sc = scc->scc_softc;
1694                 struct ifnet *ifp = &sc->sc_if;
1695
1696                 if (CARP_IS_RUNNING(ifp) && sc->sc_state == MASTER &&
1697                     !bcmp(dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN))
1698                         return ifp;
1699         }
1700         return NULL;
1701 }
1702
1703 static void
1704 carp_master_down_timeout(void *xsc)
1705 {
1706         struct carp_softc *sc = xsc;
1707         struct netmsg_carp *cmsg = &sc->sc_md_msg;
1708
1709         KASSERT(mycpuid == 0, ("%s not on cpu0 but on cpu%d",
1710             __func__, mycpuid));
1711
1712         crit_enter();
1713         if (cmsg->base.lmsg.ms_flags & MSGF_DONE)
1714                 lwkt_sendmsg_oncpu(netisr_cpuport(0), &cmsg->base.lmsg);
1715         crit_exit();
1716 }
1717
1718 static void
1719 carp_master_down_timeout_dispatch(netmsg_t msg)
1720 {
1721         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1722         struct carp_softc *sc = cmsg->nc_softc;
1723
1724         /* Reply ASAP */
1725         crit_enter();
1726         lwkt_replymsg(&cmsg->base.lmsg, 0);
1727         crit_exit();
1728
1729         CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n",
1730                    sc->sc_if.if_xname);
1731         carp_master_down(sc);
1732 }
1733
1734 static void
1735 carp_master_down(struct carp_softc *sc)
1736 {
1737         switch (sc->sc_state) {
1738         case INIT:
1739                 kprintf("%s: master_down event in INIT state\n",
1740                         sc->sc_if.if_xname);
1741                 break;
1742
1743         case MASTER:
1744                 break;
1745
1746         case BACKUP:
1747                 carp_set_state(sc, MASTER);
1748                 carp_send_ad(sc);
1749                 carp_send_arp(sc);
1750 #ifdef INET6
1751                 carp_send_na(sc);
1752 #endif /* INET6 */
1753                 carp_setrun(sc, 0);
1754                 carp_setroute(sc, RTM_ADD);
1755                 break;
1756         }
1757 }
1758
1759 /*
1760  * When in backup state, af indicates whether to reset the master down timer
1761  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1762  */
1763 static void
1764 carp_setrun(struct carp_softc *sc, sa_family_t af)
1765 {
1766         struct ifnet *cifp = &sc->sc_if;
1767         struct timeval tv;
1768
1769         if (sc->sc_carpdev == NULL) {
1770                 carp_set_state(sc, INIT);
1771                 return;
1772         }
1773
1774         if ((cifp->if_flags & IFF_RUNNING) && sc->sc_vhid > 0 &&
1775             (sc->sc_naddrs || sc->sc_naddrs6)) {
1776                 /* Nothing */
1777         } else {
1778                 carp_setroute(sc, RTM_DELETE);
1779                 return;
1780         }
1781
1782         switch (sc->sc_state) {
1783         case INIT:
1784                 if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) {
1785                         carp_send_ad(sc);
1786                         carp_send_arp(sc);
1787 #ifdef INET6
1788                         carp_send_na(sc);
1789 #endif /* INET6 */
1790                         CARP_DEBUG("%s: INIT -> MASTER (preempting)\n",
1791                                    cifp->if_xname);
1792                         carp_set_state(sc, MASTER);
1793                         carp_setroute(sc, RTM_ADD);
1794                 } else {
1795                         CARP_DEBUG("%s: INIT -> BACKUP\n", cifp->if_xname);
1796                         carp_set_state(sc, BACKUP);
1797                         carp_setroute(sc, RTM_DELETE);
1798                         carp_setrun(sc, 0);
1799                 }
1800                 break;
1801
1802         case BACKUP:
1803                 callout_stop(&sc->sc_ad_tmo);
1804                 tv.tv_sec = 3 * sc->sc_advbase;
1805                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1806                 switch (af) {
1807 #ifdef INET
1808                 case AF_INET:
1809                         callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1810                             carp_master_down_timeout, sc);
1811                         break;
1812 #endif /* INET */
1813 #ifdef INET6
1814                 case AF_INET6:
1815                         callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1816                             carp_master_down_timeout, sc);
1817                         break;
1818 #endif /* INET6 */
1819                 default:
1820                         if (sc->sc_naddrs)
1821                                 callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1822                                     carp_master_down_timeout, sc);
1823                         if (sc->sc_naddrs6)
1824                                 callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1825                                     carp_master_down_timeout, sc);
1826                         break;
1827                 }
1828                 break;
1829
1830         case MASTER:
1831                 tv.tv_sec = sc->sc_advbase;
1832                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1833                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1834                     carp_send_ad_timeout, sc);
1835                 break;
1836         }
1837 }
1838
1839 static void
1840 carp_multicast_cleanup(struct carp_softc *sc)
1841 {
1842         struct ip_moptions *imo = &sc->sc_imo;
1843
1844         if (imo->imo_num_memberships == 0)
1845                 return;
1846         KKASSERT(imo->imo_num_memberships == 1);
1847
1848         in_delmulti(imo->imo_membership[0]);
1849         imo->imo_membership[0] = NULL;
1850         imo->imo_num_memberships = 0;
1851         imo->imo_multicast_ifp = NULL;
1852 }
1853
1854 #ifdef INET6
1855 static void
1856 carp_multicast6_cleanup(struct carp_softc *sc)
1857 {
1858         struct ip6_moptions *im6o = &sc->sc_im6o;
1859
1860         while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1861                 struct in6_multi_mship *imm =
1862                     LIST_FIRST(&im6o->im6o_memberships);
1863
1864                 LIST_REMOVE(imm, i6mm_chain);
1865                 in6_leavegroup(imm);
1866         }
1867         im6o->im6o_multicast_ifp = NULL;
1868 }
1869 #endif
1870
1871 static void
1872 carp_ioctl_getvhaddr_dispatch(netmsg_t msg)
1873 {
1874         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1875         struct carp_softc *sc = cmsg->nc_softc;
1876         const struct carp_vhaddr *vha;
1877         struct ifcarpvhaddr *carpa, *carpa0;
1878         int count, len, error = 0;
1879
1880         count = 0;
1881         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
1882                 ++count;
1883
1884         if (cmsg->nc_datalen == 0) {
1885                 cmsg->nc_datalen = count * sizeof(*carpa);
1886                 goto back;
1887         } else if (count == 0 || cmsg->nc_datalen < sizeof(*carpa)) {
1888                 cmsg->nc_datalen = 0;
1889                 goto back;
1890         }
1891         len = min(cmsg->nc_datalen, sizeof(*carpa) * count);
1892         KKASSERT(len >= sizeof(*carpa));
1893
1894         carpa0 = carpa = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO);
1895         if (carpa == NULL) {
1896                 error = ENOMEM; 
1897                 goto back;
1898         }
1899
1900         count = 0;
1901         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1902                 if (len < sizeof(*carpa))
1903                         break;
1904
1905                 carpa->carpa_flags = vha->vha_flags;
1906                 carpa->carpa_addr.sin_family = AF_INET;
1907                 carpa->carpa_addr.sin_addr = vha->vha_ia->ia_addr.sin_addr;
1908
1909                 carpa->carpa_baddr.sin_family = AF_INET;
1910                 if (vha->vha_iaback == NULL) {
1911                         carpa->carpa_baddr.sin_addr.s_addr = INADDR_ANY;
1912                 } else {
1913                         carpa->carpa_baddr.sin_addr =
1914                         vha->vha_iaback->ia_addr.sin_addr;
1915                 }
1916
1917                 ++carpa;
1918                 ++count;
1919                 len -= sizeof(*carpa);
1920         }
1921         cmsg->nc_datalen = sizeof(*carpa) * count;
1922         KKASSERT(cmsg->nc_datalen > 0);
1923
1924         cmsg->nc_data = carpa0;
1925
1926 back:
1927         lwkt_replymsg(&cmsg->base.lmsg, error);
1928 }
1929
1930 static int
1931 carp_ioctl_getvhaddr(struct carp_softc *sc, struct ifdrv *ifd)
1932 {
1933         struct ifnet *ifp = &sc->arpcom.ac_if;
1934         struct netmsg_carp cmsg;
1935         int error;
1936
1937         ASSERT_IFNET_SERIALIZED_ALL(ifp);
1938         ifnet_deserialize_all(ifp);
1939
1940         bzero(&cmsg, sizeof(cmsg));
1941         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
1942             carp_ioctl_getvhaddr_dispatch);
1943         cmsg.nc_softc = sc;
1944         cmsg.nc_datalen = ifd->ifd_len;
1945
1946         error = lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
1947
1948         if (!error) {
1949                 if (cmsg.nc_data != NULL) {
1950                         error = copyout(cmsg.nc_data, ifd->ifd_data,
1951                             cmsg.nc_datalen);
1952                         kfree(cmsg.nc_data, M_TEMP);
1953                 }
1954                 ifd->ifd_len = cmsg.nc_datalen;
1955         } else {
1956                 KASSERT(cmsg.nc_data == NULL,
1957                     ("%s temp vhaddr is alloc upon error", __func__));
1958         }
1959
1960         ifnet_serialize_all(ifp);
1961         return error;
1962 }
1963
1964 static int
1965 carp_config_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
1966     struct in_ifaddr *ia_del)
1967 {
1968         struct ifnet *ifp;
1969         struct in_ifaddr *ia_if;
1970         const struct in_ifaddr *ia_vha;
1971         struct in_ifaddr_container *iac;
1972         int own, ia_match_carpdev;
1973
1974         KKASSERT(vha->vha_ia != NULL);
1975         ia_vha = vha->vha_ia;
1976
1977         ia_if = NULL;
1978         own = 0;
1979         ia_match_carpdev = 0;
1980         TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
1981                 struct in_ifaddr *ia = iac->ia;
1982
1983                 if (ia == ia_del)
1984                         continue;
1985
1986                 if (ia->ia_ifp->if_type == IFT_CARP)
1987                         continue;
1988
1989                 if ((ia->ia_ifp->if_flags & IFF_UP) == 0)
1990                         continue;
1991
1992                 /* and, yeah, we need a multicast-capable iface too */
1993                 if ((ia->ia_ifp->if_flags & IFF_MULTICAST) == 0)
1994                         continue;
1995
1996                 if (ia_vha->ia_subnetmask == ia->ia_subnetmask &&
1997                     ia_vha->ia_subnet == ia->ia_subnet) {
1998                         if (ia_vha->ia_addr.sin_addr.s_addr ==
1999                             ia->ia_addr.sin_addr.s_addr)
2000                                 own = 1;
2001                         if (ia_if == NULL) {
2002                                 ia_if = ia;
2003                         } else if (sc->sc_carpdev != NULL &&
2004                             sc->sc_carpdev == ia->ia_ifp) {
2005                                 ia_if = ia;
2006                                 if (ia_if->ia_flags & IFA_ROUTE) {
2007                                         /*
2008                                          * Address with prefix route
2009                                          * is prefered
2010                                          */
2011                                         break;
2012                                 }
2013                                 ia_match_carpdev = 1;
2014                         } else if (!ia_match_carpdev) {
2015                                 if (ia->ia_flags & IFA_ROUTE) {
2016                                         /*
2017                                          * Address with prefix route
2018                                          * is prefered over others.
2019                                          */
2020                                         ia_if = ia;
2021                                 }
2022                         }
2023                 }
2024         }
2025
2026         carp_deactivate_vhaddr(sc, vha, FALSE);
2027         if (!ia_if)
2028                 return ENOENT;
2029
2030         ifp = ia_if->ia_ifp;
2031
2032         /* XXX Don't allow parent iface to be changed */
2033         if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp)
2034                 return EEXIST;
2035
2036         return carp_activate_vhaddr(sc, vha, ifp, ia_if, own);
2037 }
2038
2039 static void
2040 carp_add_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
2041 {
2042         struct carp_vhaddr *vha_new;
2043         struct in_ifaddr *carp_ia;
2044 #ifdef INVARIANTS
2045         struct carp_vhaddr *vha;
2046 #endif
2047
2048         KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
2049         carp_ia = ifatoia(carp_ifa);
2050
2051 #ifdef INVARIANTS
2052         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
2053                 KKASSERT(vha->vha_ia != NULL && vha->vha_ia != carp_ia);
2054 #endif
2055
2056         vha_new = kmalloc(sizeof(*vha_new), M_CARP, M_WAITOK | M_ZERO);
2057         vha_new->vha_ia = carp_ia;
2058         carp_insert_vhaddr(sc, vha_new);
2059
2060         if (carp_config_vhaddr(sc, vha_new, NULL) != 0) {
2061                 /*
2062                  * If the above configuration fails, it may only mean
2063                  * that the new address is problematic.  However, the
2064                  * carp(4) interface may already have several working
2065                  * addresses.  Since the expected behaviour of
2066                  * SIOC[AS]IFADDR is to put the NIC into working state,
2067                  * we try starting the state machine manually here with
2068                  * the hope that the carp(4)'s previously working
2069                  * addresses still could be brought up.
2070                  */
2071                 carp_hmac_prepare(sc);
2072                 carp_set_state(sc, INIT);
2073                 carp_setrun(sc, 0);
2074         }
2075 }
2076
2077 static void
2078 carp_del_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
2079 {
2080         struct carp_vhaddr *vha;
2081         struct in_ifaddr *carp_ia;
2082
2083         KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
2084         carp_ia = ifatoia(carp_ifa);
2085
2086         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
2087                 KKASSERT(vha->vha_ia != NULL);
2088                 if (vha->vha_ia == carp_ia)
2089                         break;
2090         }
2091         KASSERT(vha != NULL, ("no corresponding vhaddr %p", carp_ifa));
2092
2093         /*
2094          * Remove the vhaddr from the list before deactivating
2095          * the vhaddr, so that the HMAC could be correctly
2096          * updated in carp_deactivate_vhaddr()
2097          */
2098         carp_remove_vhaddr(sc, vha);
2099
2100         carp_deactivate_vhaddr(sc, vha, FALSE);
2101         kfree(vha, M_CARP);
2102 }
2103
2104 static void
2105 carp_config_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
2106 {
2107         struct carp_vhaddr *vha;
2108         struct in_ifaddr *carp_ia;
2109
2110         KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
2111         carp_ia = ifatoia(carp_ifa);
2112
2113         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
2114                 KKASSERT(vha->vha_ia != NULL);
2115                 if (vha->vha_ia == carp_ia)
2116                         break;
2117         }
2118         KASSERT(vha != NULL, ("no corresponding vhaddr %p", carp_ifa));
2119
2120         /* Remove then reinsert, to keep the vhaddr list sorted */
2121         carp_remove_vhaddr(sc, vha);
2122         carp_insert_vhaddr(sc, vha);
2123
2124         if (carp_config_vhaddr(sc, vha, NULL) != 0) {
2125                 /* See the comment in carp_add_addr() */
2126                 carp_hmac_prepare(sc);
2127                 carp_set_state(sc, INIT);
2128                 carp_setrun(sc, 0);
2129         }
2130 }
2131
2132 #ifdef notyet
2133
2134 #ifdef INET6
2135 static int
2136 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
2137 {
2138         struct ifnet *ifp;
2139         struct carp_if *cif;
2140         struct in6_ifaddr *ia, *ia_if;
2141         struct ip6_moptions *im6o = &sc->sc_im6o;
2142         struct in6_multi_mship *imm;
2143         struct in6_addr in6;
2144         int own, error;
2145
2146         if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
2147                 carp_setrun(sc, 0);
2148                 return (0);
2149         }
2150
2151         /* we have to do it by hands to check we won't match on us */
2152         ia_if = NULL; own = 0;
2153         for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
2154                 int i;
2155
2156                 for (i = 0; i < 4; i++) {
2157                         if ((sin6->sin6_addr.s6_addr32[i] &
2158                             ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
2159                             (ia->ia_addr.sin6_addr.s6_addr32[i] &
2160                             ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
2161                                 break;
2162                 }
2163                 /* and, yeah, we need a multicast-capable iface too */
2164                 if (ia->ia_ifp != &sc->sc_if &&
2165                     (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
2166                     (i == 4)) {
2167                         if (!ia_if)
2168                                 ia_if = ia;
2169                         if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
2170                             &ia->ia_addr.sin6_addr))
2171                                 own++;
2172                 }
2173         }
2174
2175         if (!ia_if)
2176                 return (EADDRNOTAVAIL);
2177         ia = ia_if;
2178         ifp = ia->ia_ifp;
2179
2180         if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
2181             (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp))
2182                 return (EADDRNOTAVAIL);
2183
2184         if (!sc->sc_naddrs6) {
2185                 im6o->im6o_multicast_ifp = ifp;
2186
2187                 /* join CARP multicast address */
2188                 bzero(&in6, sizeof(in6));
2189                 in6.s6_addr16[0] = htons(0xff02);
2190                 in6.s6_addr8[15] = 0x12;
2191                 if (in6_setscope(&in6, ifp, NULL) != 0)
2192                         goto cleanup;
2193                 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
2194                         goto cleanup;
2195                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2196
2197                 /* join solicited multicast address */
2198                 bzero(&in6, sizeof(in6));
2199                 in6.s6_addr16[0] = htons(0xff02);
2200                 in6.s6_addr32[1] = 0;
2201                 in6.s6_addr32[2] = htonl(1);
2202                 in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3];
2203                 in6.s6_addr8[12] = 0xff;
2204                 if (in6_setscope(&in6, ifp, NULL) != 0)
2205                         goto cleanup;
2206                 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
2207                         goto cleanup;
2208                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2209         }
2210
2211 #ifdef foo
2212         if (!ifp->if_carp) {
2213                 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
2214
2215                 if ((error = ifpromisc(ifp, 1))) {
2216                         kfree(cif, M_CARP);
2217                         goto cleanup;
2218                 }
2219
2220                 TAILQ_INIT(&cif->vhif_vrs);
2221                 ifp->if_carp = cif;
2222         } else {
2223                 struct carp_softc *vr;
2224
2225                 cif = ifp->if_carp;
2226                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
2227                         if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
2228                                 error = EINVAL;
2229                                 goto cleanup;
2230                         }
2231                 }
2232         }
2233 #endif
2234         sc->sc_ia6 = ia;
2235         sc->sc_carpdev = ifp;
2236
2237 #ifdef foo
2238         { /* XXX prevent endless loop if already in queue */
2239         struct carp_softc *vr, *after = NULL;
2240         int myself = 0;
2241         cif = ifp->if_carp;
2242
2243         TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
2244                 if (vr == sc)
2245                         myself = 1;
2246                 if (vr->sc_vhid < sc->sc_vhid)
2247                         after = vr;
2248         }
2249
2250         if (!myself) {
2251                 /* We're trying to keep things in order */
2252                 if (after == NULL)
2253                         TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
2254                 else
2255                         TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
2256         }
2257         }
2258 #endif
2259
2260         sc->sc_naddrs6++;
2261         if (own)
2262                 sc->sc_advskew = 0;
2263         carp_sc_state(sc);
2264         carp_setrun(sc, 0);
2265
2266         return (0);
2267
2268 cleanup:
2269         /* clean up multicast memberships */
2270         if (!sc->sc_naddrs6) {
2271                 while (!LIST_EMPTY(&im6o->im6o_memberships)) {
2272                         imm = LIST_FIRST(&im6o->im6o_memberships);
2273                         LIST_REMOVE(imm, i6mm_chain);
2274                         in6_leavegroup(imm);
2275                 }
2276         }
2277         return (error);
2278 }
2279
2280 static int
2281 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
2282 {
2283         int error = 0;
2284
2285         if (!--sc->sc_naddrs6) {
2286                 struct carp_if *cif = sc->sc_carpdev->if_carp;
2287                 struct ip6_moptions *im6o = &sc->sc_im6o;
2288
2289                 callout_stop(&sc->sc_ad_tmo);
2290                 sc->sc_vhid = -1;
2291                 while (!LIST_EMPTY(&im6o->im6o_memberships)) {
2292                         struct in6_multi_mship *imm =
2293                             LIST_FIRST(&im6o->im6o_memberships);
2294
2295                         LIST_REMOVE(imm, i6mm_chain);
2296                         in6_leavegroup(imm);
2297                 }
2298                 im6o->im6o_multicast_ifp = NULL;
2299 #ifdef foo
2300                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
2301                 if (TAILQ_EMPTY(&cif->vhif_vrs)) {
2302                         sc->sc_carpdev->if_carp = NULL;
2303                         kfree(cif, M_IFADDR);
2304                 }
2305 #endif
2306         }
2307         return (error);
2308 }
2309 #endif /* INET6 */
2310
2311 #endif
2312
2313 static int
2314 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr)
2315 {
2316         struct carp_softc *sc = ifp->if_softc;
2317         struct ifreq *ifr = (struct ifreq *)addr;
2318         struct ifdrv *ifd = (struct ifdrv *)addr;
2319         int error = 0;
2320
2321         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2322
2323         switch (cmd) {
2324         case SIOCSIFFLAGS:
2325                 if (ifp->if_flags & IFF_UP) {
2326                         if ((ifp->if_flags & IFF_RUNNING) == 0)
2327                                 carp_init(sc);
2328                 } else if (ifp->if_flags & IFF_RUNNING) {
2329                         carp_ioctl_stop(sc);
2330                 }
2331                 break;
2332
2333         case SIOCSIFCAP:
2334                 carp_ioctl_ifcap(sc, ifr->ifr_reqcap);
2335                 break;
2336
2337         case SIOCSVH:
2338                 error = carp_ioctl_setvh(sc, ifr->ifr_data, cr);
2339                 break;
2340
2341         case SIOCGVH:
2342                 error = carp_ioctl_getvh(sc, ifr->ifr_data, cr);
2343                 break;
2344
2345         case SIOCGDRVSPEC:
2346                 switch (ifd->ifd_cmd) {
2347                 case CARPGDEVNAME:
2348                         error = carp_ioctl_getdevname(sc, ifd);
2349                         break;
2350
2351                 case CARPGVHADDR:
2352                         error = carp_ioctl_getvhaddr(sc, ifd);
2353                         break;
2354
2355                 default:
2356                         error = EINVAL;
2357                         break;
2358                 }
2359                 break;
2360
2361         default:
2362                 error = ether_ioctl(ifp, cmd, addr);
2363                 break;
2364         }
2365
2366         return error;
2367 }
2368
2369 static void
2370 carp_ioctl_stop_dispatch(netmsg_t msg)
2371 {
2372         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2373         struct carp_softc *sc = cmsg->nc_softc;
2374
2375         carp_stop(sc, FALSE);
2376         lwkt_replymsg(&cmsg->base.lmsg, 0);
2377 }
2378
2379 static void
2380 carp_ioctl_stop(struct carp_softc *sc)
2381 {
2382         struct ifnet *ifp = &sc->arpcom.ac_if;
2383         struct netmsg_carp cmsg;
2384
2385         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2386
2387         ifnet_deserialize_all(ifp);
2388
2389         bzero(&cmsg, sizeof(cmsg));
2390         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2391             carp_ioctl_stop_dispatch);
2392         cmsg.nc_softc = sc;
2393
2394         lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2395
2396         ifnet_serialize_all(ifp);
2397 }
2398
2399 static void
2400 carp_ioctl_setvh_dispatch(netmsg_t msg)
2401 {
2402         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2403         struct carp_softc *sc = cmsg->nc_softc;
2404         struct ifnet *ifp = &sc->arpcom.ac_if;
2405         const struct carpreq *carpr = cmsg->nc_data;
2406         int error;
2407
2408         error = 1;
2409         if ((ifp->if_flags & IFF_RUNNING) &&
2410             sc->sc_state != INIT && carpr->carpr_state != sc->sc_state) {
2411                 switch (carpr->carpr_state) {
2412                 case BACKUP:
2413                         callout_stop(&sc->sc_ad_tmo);
2414                         carp_set_state(sc, BACKUP);
2415                         carp_setrun(sc, 0);
2416                         carp_setroute(sc, RTM_DELETE);
2417                         break;
2418
2419                 case MASTER:
2420                         carp_master_down(sc);
2421                         break;
2422
2423                 default:
2424                         break;
2425                 }
2426         }
2427         if (carpr->carpr_vhid > 0) {
2428                 if (carpr->carpr_vhid > 255) {
2429                         error = EINVAL;
2430                         goto back;
2431                 }
2432                 if (sc->sc_carpdev) {
2433                         struct carp_if *cif = sc->sc_carpdev->if_carp;
2434                         struct carp_softc_container *scc;
2435
2436                         TAILQ_FOREACH(scc, cif, scc_link) {
2437                                 struct carp_softc *vr = scc->scc_softc;
2438
2439                                 if (vr != sc &&
2440                                     vr->sc_vhid == carpr->carpr_vhid) {
2441                                         error = EEXIST;
2442                                         goto back;
2443                                 }
2444                         }
2445                 }
2446                 sc->sc_vhid = carpr->carpr_vhid;
2447
2448                 IF_LLADDR(ifp)[5] = sc->sc_vhid;
2449                 bcopy(IF_LLADDR(ifp), sc->arpcom.ac_enaddr,
2450                     ETHER_ADDR_LEN);
2451
2452                 error--;
2453         }
2454         if (carpr->carpr_advbase > 0 || carpr->carpr_advskew > 0) {
2455                 if (carpr->carpr_advskew >= 255) {
2456                         error = EINVAL;
2457                         goto back;
2458                 }
2459                 if (carpr->carpr_advbase > 255) {
2460                         error = EINVAL;
2461                         goto back;
2462                 }
2463                 sc->sc_advbase = carpr->carpr_advbase;
2464                 sc->sc_advskew = carpr->carpr_advskew;
2465                 error--;
2466         }
2467         bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key));
2468         if (error > 0) {
2469                 error = EINVAL;
2470         } else {
2471                 error = 0;
2472                 carp_setrun(sc, 0);
2473         }
2474 back:
2475         carp_hmac_prepare(sc);
2476
2477         lwkt_replymsg(&cmsg->base.lmsg, error);
2478 }
2479
2480 static int
2481 carp_ioctl_setvh(struct carp_softc *sc, void *udata, struct ucred *cr)
2482 {
2483         struct ifnet *ifp = &sc->arpcom.ac_if;
2484         struct netmsg_carp cmsg;
2485         struct carpreq carpr;
2486         int error;
2487
2488         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2489         ifnet_deserialize_all(ifp);
2490
2491         error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY);
2492         if (error)
2493                 goto back;
2494
2495         error = copyin(udata, &carpr, sizeof(carpr));
2496         if (error)
2497                 goto back;
2498
2499         bzero(&cmsg, sizeof(cmsg));
2500         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2501             carp_ioctl_setvh_dispatch);
2502         cmsg.nc_softc = sc;
2503         cmsg.nc_data = &carpr;
2504
2505         error = lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2506
2507 back:
2508         ifnet_serialize_all(ifp);
2509         return error;
2510 }
2511
2512 static void
2513 carp_ioctl_ifcap_dispatch(netmsg_t msg)
2514 {
2515         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2516         struct carp_softc *sc = cmsg->nc_softc;
2517         struct ifnet *ifp = &sc->arpcom.ac_if;
2518         int reqcap = *((const int *)(cmsg->nc_data));
2519         int mask;
2520
2521         mask = reqcap ^ ifp->if_capenable;
2522         if (mask & IFCAP_TXCSUM) {
2523                 ifp->if_capenable ^= IFCAP_TXCSUM;
2524                 if ((ifp->if_capenable & IFCAP_TXCSUM) &&
2525                     sc->sc_carpdev != NULL) {
2526                         ifp->if_hwassist |=
2527                             (sc->sc_carpdev->if_hwassist &
2528                              (CSUM_IP | CSUM_UDP | CSUM_TCP));
2529                 } else {
2530                         ifp->if_hwassist &= ~(CSUM_IP | CSUM_UDP | CSUM_TCP);
2531                 }
2532         }
2533         if (mask & IFCAP_TSO) {
2534                 ifp->if_capenable ^= IFCAP_TSO;
2535                 if ((ifp->if_capenable & IFCAP_TSO) &&
2536                     sc->sc_carpdev != NULL) {
2537                         ifp->if_hwassist |=
2538                             (sc->sc_carpdev->if_hwassist & CSUM_TSO);
2539                 } else {
2540                         ifp->if_hwassist &= ~CSUM_TSO;
2541                 }
2542         }
2543
2544         lwkt_replymsg(&cmsg->base.lmsg, 0);
2545 }
2546
2547 static void
2548 carp_ioctl_ifcap(struct carp_softc *sc, int reqcap)
2549 {
2550         struct ifnet *ifp = &sc->arpcom.ac_if;
2551         struct netmsg_carp cmsg;
2552
2553         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2554         ifnet_deserialize_all(ifp);
2555
2556         bzero(&cmsg, sizeof(cmsg));
2557         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2558             carp_ioctl_ifcap_dispatch);
2559         cmsg.nc_softc = sc;
2560         cmsg.nc_data = &reqcap;
2561
2562         lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2563
2564         ifnet_serialize_all(ifp);
2565 }
2566
2567 static void
2568 carp_ioctl_getvh_dispatch(netmsg_t msg)
2569 {
2570         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2571         struct carp_softc *sc = cmsg->nc_softc;
2572         struct carpreq *carpr = cmsg->nc_data;
2573
2574         carpr->carpr_state = sc->sc_state;
2575         carpr->carpr_vhid = sc->sc_vhid;
2576         carpr->carpr_advbase = sc->sc_advbase;
2577         carpr->carpr_advskew = sc->sc_advskew;
2578         bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key));
2579
2580         lwkt_replymsg(&cmsg->base.lmsg, 0);
2581 }
2582
2583 static int
2584 carp_ioctl_getvh(struct carp_softc *sc, void *udata, struct ucred *cr)
2585 {
2586         struct ifnet *ifp = &sc->arpcom.ac_if;
2587         struct netmsg_carp cmsg;
2588         struct carpreq carpr;
2589         int error;
2590
2591         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2592         ifnet_deserialize_all(ifp);
2593
2594         bzero(&cmsg, sizeof(cmsg));
2595         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2596             carp_ioctl_getvh_dispatch);
2597         cmsg.nc_softc = sc;
2598         cmsg.nc_data = &carpr;
2599
2600         lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2601
2602         error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY);
2603         if (error)
2604                 bzero(carpr.carpr_key, sizeof(carpr.carpr_key));
2605
2606         error = copyout(&carpr, udata, sizeof(carpr));
2607
2608         ifnet_serialize_all(ifp);
2609         return error;
2610 }
2611
2612 static void
2613 carp_ioctl_getdevname_dispatch(netmsg_t msg)
2614 {
2615         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2616         struct carp_softc *sc = cmsg->nc_softc;
2617         char *devname = cmsg->nc_data;
2618
2619         bzero(devname, IFNAMSIZ);
2620         if (sc->sc_carpdev != NULL)
2621                 strlcpy(devname, sc->sc_carpdev->if_xname, IFNAMSIZ);
2622
2623         lwkt_replymsg(&cmsg->base.lmsg, 0);
2624 }
2625
2626 static int
2627 carp_ioctl_getdevname(struct carp_softc *sc, struct ifdrv *ifd)
2628 {
2629         struct ifnet *ifp = &sc->arpcom.ac_if;
2630         struct netmsg_carp cmsg;
2631         char devname[IFNAMSIZ];
2632         int error;
2633
2634         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2635
2636         if (ifd->ifd_len != sizeof(devname))
2637                 return EINVAL;
2638
2639         ifnet_deserialize_all(ifp);
2640
2641         bzero(&cmsg, sizeof(cmsg));
2642         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2643             carp_ioctl_getdevname_dispatch);
2644         cmsg.nc_softc = sc;
2645         cmsg.nc_data = devname;
2646
2647         lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2648
2649         error = copyout(devname, ifd->ifd_data, sizeof(devname));
2650
2651         ifnet_serialize_all(ifp);
2652         return error;
2653 }
2654
2655 static void
2656 carp_init_dispatch(netmsg_t msg)
2657 {
2658         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2659         struct carp_softc *sc = cmsg->nc_softc;
2660
2661         sc->sc_if.if_flags |= IFF_RUNNING;
2662         carp_hmac_prepare(sc);
2663         carp_set_state(sc, INIT);
2664         carp_setrun(sc, 0);
2665
2666         lwkt_replymsg(&cmsg->base.lmsg, 0);
2667 }
2668
2669 static void
2670 carp_init(void *xsc)
2671 {
2672         struct carp_softc *sc = xsc;
2673         struct ifnet *ifp = &sc->arpcom.ac_if;
2674         struct netmsg_carp cmsg;
2675
2676         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2677
2678         ifnet_deserialize_all(ifp);
2679
2680         bzero(&cmsg, sizeof(cmsg));
2681         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2682             carp_init_dispatch);
2683         cmsg.nc_softc = sc;
2684
2685         lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2686
2687         ifnet_serialize_all(ifp);
2688 }
2689
2690 static int
2691 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
2692     struct rtentry *rt)
2693 {
2694         struct carp_softc *sc = ifp->if_softc;
2695         struct ifnet *carpdev;
2696         int error = 0;
2697
2698         carpdev = sc->sc_carpdev;
2699         if (carpdev != NULL) {
2700                 if (m->m_flags & M_MCAST)
2701                         IFNET_STAT_INC(ifp, omcasts, 1);
2702                 IFNET_STAT_INC(ifp, obytes, m->m_pkthdr.len + ETHER_HDR_LEN);
2703                 IFNET_STAT_INC(ifp, opackets, 1);
2704
2705                 /*
2706                  * NOTE:
2707                  * CARP's ifp is passed to backing device's
2708                  * if_output method.
2709                  */
2710                 carpdev->if_output(ifp, m, dst, rt);
2711         } else {
2712                 IFNET_STAT_INC(ifp, oerrors, 1);
2713                 m_freem(m);
2714                 error = ENETUNREACH;
2715         }
2716         return error;
2717 }
2718
2719 /*
2720  * Start output on carp interface. This function should never be called.
2721  */
2722 static void
2723 carp_start(struct ifnet *ifp, struct ifaltq_subque *ifsq __unused)
2724 {
2725         panic("%s: start called", ifp->if_xname);
2726 }
2727
2728 static void
2729 carp_set_state(struct carp_softc *sc, int state)
2730 {
2731         struct ifnet *cifp = &sc->sc_if;
2732
2733         if (sc->sc_state == state)
2734                 return;
2735         sc->sc_state = state;
2736
2737         switch (sc->sc_state) {
2738         case BACKUP:
2739                 cifp->if_link_state = LINK_STATE_DOWN;
2740                 break;
2741
2742         case MASTER:
2743                 cifp->if_link_state = LINK_STATE_UP;
2744                 break;
2745
2746         default:
2747                 cifp->if_link_state = LINK_STATE_UNKNOWN;
2748                 break;
2749         }
2750         rt_ifmsg(cifp);
2751 }
2752
2753 void
2754 carp_group_demote_adj(struct ifnet *ifp, int adj)
2755 {
2756         struct ifg_list *ifgl;
2757         int *dm;
2758
2759         TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2760                 if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL))
2761                         continue;
2762                 dm = &ifgl->ifgl_group->ifg_carp_demoted;
2763
2764                 if (*dm + adj >= 0)
2765                         *dm += adj;
2766                 else
2767                         *dm = 0;
2768
2769                 if (adj > 0 && *dm == 1)
2770                         carp_send_ad_all();
2771                 CARP_LOG("%s demoted group %s to %d", ifp->if_xname,
2772                     ifgl->ifgl_group->ifg_group, *dm);
2773         }
2774 }
2775
2776 #ifdef foo
2777 void
2778 carp_carpdev_state(void *v)
2779 {
2780         struct carp_if *cif = v;
2781         struct carp_softc *sc;
2782
2783         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list)
2784                 carp_sc_state(sc);
2785 }
2786
2787 static void
2788 carp_sc_state(struct carp_softc *sc)
2789 {
2790         if (!(sc->sc_carpdev->if_flags & IFF_UP)) {
2791                 callout_stop(&sc->sc_ad_tmo);
2792                 callout_stop(&sc->sc_md_tmo);
2793                 callout_stop(&sc->sc_md6_tmo);
2794                 carp_set_state(sc, INIT);
2795                 carp_setrun(sc, 0);
2796                 if (!sc->sc_suppress) {
2797                         carp_suppress_preempt++;
2798                         if (carp_suppress_preempt == 1)
2799                                 carp_send_ad_all();
2800                 }
2801                 sc->sc_suppress = 1;
2802         } else {
2803                 carp_set_state(sc, INIT);
2804                 carp_setrun(sc, 0);
2805                 if (sc->sc_suppress)
2806                         carp_suppress_preempt--;
2807                 sc->sc_suppress = 0;
2808         }
2809 }
2810 #endif
2811
2812 static void
2813 carp_stop(struct carp_softc *sc, boolean_t detach)
2814 {
2815         sc->sc_if.if_flags &= ~IFF_RUNNING;
2816
2817         callout_stop(&sc->sc_ad_tmo);
2818         callout_stop(&sc->sc_md_tmo);
2819         callout_stop(&sc->sc_md6_tmo);
2820
2821         if (!detach && sc->sc_state == MASTER)
2822                 carp_send_ad(sc);
2823
2824         if (sc->sc_suppress)
2825                 carp_suppress_preempt--;
2826         sc->sc_suppress = 0;
2827
2828         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
2829                 carp_suppress_preempt--;
2830         sc->sc_sendad_errors = 0;
2831         sc->sc_sendad_success = 0;
2832
2833         carp_set_state(sc, INIT);
2834         carp_setrun(sc, 0);
2835 }
2836
2837 static void
2838 carp_suspend(struct carp_softc *sc, boolean_t detach)
2839 {
2840         struct ifnet *cifp = &sc->sc_if;
2841
2842         carp_stop(sc, detach);
2843
2844         /* Retain the running state, if we are not dead yet */
2845         if (!sc->sc_dead && (cifp->if_flags & IFF_UP))
2846                 cifp->if_flags |= IFF_RUNNING;
2847 }
2848
2849 static int
2850 carp_activate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
2851     struct ifnet *ifp, struct in_ifaddr *ia_if, int own)
2852 {
2853         struct ip_moptions *imo = &sc->sc_imo;
2854         struct carp_if *ocif = ifp->if_carp;
2855         int error;
2856
2857         KKASSERT(vha->vha_ia != NULL);
2858
2859         KASSERT(ia_if != NULL, ("NULL backing address"));
2860         KASSERT(vha->vha_iaback == NULL, ("%p is already activated", vha));
2861         KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0,
2862                 ("inactive vhaddr %p is the address owner", vha));
2863
2864         KASSERT(sc->sc_carpdev == NULL || sc->sc_carpdev == ifp,
2865                 ("%s is already on %s", sc->sc_if.if_xname,
2866                  sc->sc_carpdev->if_xname));
2867
2868         if (ocif == NULL) {
2869                 KASSERT(sc->sc_carpdev == NULL,
2870                         ("%s is already on %s", sc->sc_if.if_xname,
2871                          sc->sc_carpdev->if_xname));
2872
2873                 error = ifpromisc(ifp, 1);
2874                 if (error)
2875                         return error;
2876         } else {
2877                 struct carp_softc_container *scc;
2878
2879                 TAILQ_FOREACH(scc, ocif, scc_link) {
2880                         struct carp_softc *vr = scc->scc_softc;
2881
2882                         if (vr != sc && vr->sc_vhid == sc->sc_vhid)
2883                                 return EINVAL;
2884                 }
2885         }
2886
2887         ifp->if_carp = carp_if_insert(ocif, sc);
2888         KASSERT(ifp->if_carp != NULL, ("%s carp_if_insert failed", __func__));
2889
2890         sc->sc_ia = ia_if;
2891         sc->sc_carpdev = ifp;
2892         sc->arpcom.ac_if.if_hwassist = 0;
2893         if (sc->arpcom.ac_if.if_capenable & IFCAP_TXCSUM) {
2894                 sc->arpcom.ac_if.if_hwassist |=
2895                     (ifp->if_hwassist & (CSUM_IP | CSUM_UDP | CSUM_TCP));
2896         }
2897         if (sc->arpcom.ac_if.if_capenable & IFCAP_TSO)
2898                 sc->arpcom.ac_if.if_hwassist |= (ifp->if_hwassist & CSUM_TSO);
2899
2900         /*
2901          * Make sure that all protocol threads see the sc_carpdev and
2902          * if_carp changes
2903          */
2904         netmsg_service_sync();
2905
2906         if (ocif != NULL && ifp->if_carp != ocif) {
2907                 /*
2908                  * The old carp list could be safely free now,
2909                  * since no one can access it.
2910                  */
2911                 carp_if_free(ocif);
2912         }
2913
2914         vha->vha_iaback = ia_if;
2915         sc->sc_naddrs++;
2916
2917         if (own) {
2918                 vha->vha_flags |= CARP_VHAF_OWNER;
2919
2920                 /* XXX save user configured advskew? */
2921                 sc->sc_advskew = 0;
2922         }
2923
2924         carp_addroute_vhaddr(sc, vha);
2925
2926         /*
2927          * Join the multicast group only after the backing interface
2928          * has been hooked with the CARP interface.
2929          */
2930         KASSERT(imo->imo_multicast_ifp == NULL ||
2931                 imo->imo_multicast_ifp == &sc->sc_if,
2932                 ("%s didn't leave mcast group on %s",
2933                  sc->sc_if.if_xname, imo->imo_multicast_ifp->if_xname));
2934
2935         if (imo->imo_num_memberships == 0) {
2936                 struct in_addr addr;
2937
2938                 addr.s_addr = htonl(INADDR_CARP_GROUP);
2939                 imo->imo_membership[0] = in_addmulti(&addr, &sc->sc_if);
2940                 if (imo->imo_membership[0] == NULL) {
2941                         carp_deactivate_vhaddr(sc, vha, FALSE);
2942                         return ENOBUFS;
2943                 }
2944
2945                 imo->imo_num_memberships++;
2946                 imo->imo_multicast_ifp = &sc->sc_if;
2947                 imo->imo_multicast_ttl = CARP_DFLTTL;
2948                 imo->imo_multicast_loop = 0;
2949         }
2950
2951         carp_hmac_prepare(sc);
2952         carp_set_state(sc, INIT);
2953         carp_setrun(sc, 0);
2954         return 0;
2955 }
2956
2957 static void
2958 carp_deactivate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
2959     boolean_t del_iaback)
2960 {
2961         KKASSERT(vha->vha_ia != NULL);
2962
2963         carp_hmac_prepare(sc);
2964
2965         if (vha->vha_iaback == NULL) {
2966                 KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0,
2967                         ("inactive vhaddr %p is the address owner", vha));
2968                 return;
2969         }
2970
2971         vha->vha_flags &= ~CARP_VHAF_OWNER;
2972         carp_delroute_vhaddr(sc, vha, del_iaback);
2973
2974         KKASSERT(sc->sc_naddrs > 0);
2975         vha->vha_iaback = NULL;
2976         sc->sc_naddrs--;
2977         if (!sc->sc_naddrs) {
2978                 if (sc->sc_naddrs6) {
2979                         carp_multicast_cleanup(sc);
2980                         sc->sc_ia = NULL;
2981                 } else {
2982                         carp_detach(sc, FALSE, del_iaback);
2983                 }
2984         }
2985 }
2986
2987 static void
2988 carp_link_addrs(struct carp_softc *sc, struct ifnet *ifp, struct ifaddr *ifa_if)
2989 {
2990         struct carp_vhaddr *vha;
2991         struct in_ifaddr *ia_if;
2992
2993         KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET);
2994         ia_if = ifatoia(ifa_if);
2995
2996         /*
2997          * Test each inactive vhaddr against the newly added address.
2998          * If the newly added address could be the backing address,
2999          * then activate the matching vhaddr.
3000          */
3001         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
3002                 const struct in_ifaddr *ia;
3003                 int own;
3004
3005                 if (vha->vha_iaback != NULL)
3006                         continue;
3007
3008                 ia = vha->vha_ia;
3009                 if (ia->ia_subnetmask != ia_if->ia_subnetmask ||
3010                     ia->ia_subnet != ia_if->ia_subnet)
3011                         continue;
3012
3013                 own = 0;
3014                 if (ia->ia_addr.sin_addr.s_addr ==
3015                     ia_if->ia_addr.sin_addr.s_addr)
3016                         own = 1;
3017
3018                 carp_activate_vhaddr(sc, vha, ifp, ia_if, own);
3019         }
3020 }
3021
3022 static void
3023 carp_unlink_addrs(struct carp_softc *sc, struct ifnet *ifp,
3024                   struct ifaddr *ifa_if)
3025 {
3026         struct carp_vhaddr *vha;
3027         struct in_ifaddr *ia_if;
3028
3029         KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET);
3030         ia_if = ifatoia(ifa_if);
3031
3032         /*
3033          * Ad src address is deleted; set it to NULL.
3034          * Following loop will try pick up a new ad src address
3035          * if one of the vhaddr could retain its backing address.
3036          */
3037         if (sc->sc_ia == ia_if)
3038                 sc->sc_ia = NULL;
3039
3040         /*
3041          * Test each active vhaddr against the deleted address.
3042          * If the deleted address is vhaddr address's backing
3043          * address, then deactivate the vhaddr.
3044          */
3045         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
3046                 if (vha->vha_iaback == NULL)
3047                         continue;
3048
3049                 if (vha->vha_iaback == ia_if)
3050                         carp_deactivate_vhaddr(sc, vha, TRUE);
3051                 else if (sc->sc_ia == NULL)
3052                         sc->sc_ia = vha->vha_iaback;
3053         }
3054 }
3055
3056 static void
3057 carp_update_addrs(struct carp_softc *sc, struct ifaddr *ifa_del)
3058 {
3059         struct carp_vhaddr *vha;
3060
3061         KKASSERT(sc->sc_carpdev == NULL);
3062
3063         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
3064                 carp_config_vhaddr(sc, vha, ifatoia(ifa_del));
3065 }
3066
3067 static void
3068 carp_ifaddr(void *arg __unused, struct ifnet *ifp,
3069             enum ifaddr_event event, struct ifaddr *ifa)
3070 {
3071         struct carp_softc *sc;
3072
3073         if (ifa->ifa_addr->sa_family != AF_INET)
3074                 return;
3075
3076         ASSERT_IN_NETISR(0);
3077
3078         if (ifp->if_type == IFT_CARP) {
3079                 /*
3080                  * Address is changed on carp(4) interface
3081                  */
3082                 switch (event) {
3083                 case IFADDR_EVENT_ADD:
3084                         carp_add_addr(ifp->if_softc, ifa);
3085                         break;
3086
3087                 case IFADDR_EVENT_CHANGE:
3088                         carp_config_addr(ifp->if_softc, ifa);
3089                         break;
3090
3091                 case IFADDR_EVENT_DELETE:
3092                         carp_del_addr(ifp->if_softc, ifa);
3093                         break;
3094                 }
3095                 return;
3096         }
3097
3098         /*
3099          * Address is changed on non-carp(4) interface
3100          */
3101         if ((ifp->if_flags & IFF_MULTICAST) == 0)
3102                 return;
3103
3104         LIST_FOREACH(sc, &carpif_list, sc_next) {
3105                 if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp) {
3106                         /* Not the parent iface; skip */
3107                         continue;
3108                 }
3109
3110                 switch (event) {
3111                 case IFADDR_EVENT_ADD:
3112                         carp_link_addrs(sc, ifp, ifa);
3113                         break;
3114
3115                 case IFADDR_EVENT_DELETE:
3116                         if (sc->sc_carpdev != NULL) {
3117                                 carp_unlink_addrs(sc, ifp, ifa);
3118                                 if (sc->sc_carpdev == NULL) {
3119                                         /*
3120                                          * We no longer have the parent
3121                                          * interface, however, certain
3122                                          * virtual addresses, which are
3123                                          * not used because they can't
3124                                          * match the previous parent
3125                                          * interface's addresses, may now
3126                                          * match different interface's
3127                                          * addresses.
3128                                          */
3129                                         carp_update_addrs(sc, ifa);
3130                                 }
3131                         } else {
3132                                 /*
3133                                  * The carp(4) interface didn't have a
3134                                  * parent iface, so it is not possible
3135                                  * that it will contain any address to
3136                                  * be unlinked.
3137                                  */
3138                         }
3139                         break;
3140
3141                 case IFADDR_EVENT_CHANGE:
3142                         if (sc->sc_carpdev == NULL) {
3143                                 /*
3144                                  * The carp(4) interface didn't have a
3145                                  * parent iface, so it is not possible
3146                                  * that it will contain any address to
3147                                  * be updated.
3148                                  */
3149                                 carp_link_addrs(sc, ifp, ifa);
3150                         } else {
3151                                 /*
3152                                  * First try breaking tie with the old
3153                                  * address.  Then see whether we could
3154                                  * link certain vhaddr to the new address.
3155                                  * If that fails, i.e. carpdev is NULL,
3156                                  * we try a global update.
3157                                  *
3158                                  * NOTE: The above order is critical.
3159                                  */
3160                                 carp_unlink_addrs(sc, ifp, ifa);
3161                                 carp_link_addrs(sc, ifp, ifa);
3162                                 if (sc->sc_carpdev == NULL) {
3163                                         /*
3164                                          * See the comment in the above
3165                                          * IFADDR_EVENT_DELETE block.
3166                                          */
3167                                         carp_update_addrs(sc, NULL);
3168                                 }
3169                         }
3170                         break;
3171                 }
3172         }
3173 }
3174
3175 void
3176 carp_proto_ctlinput(netmsg_t msg)
3177 {
3178         int cmd = msg->ctlinput.nm_cmd;
3179         struct sockaddr *sa = msg->ctlinput.nm_arg;
3180         struct in_ifaddr_container *iac;
3181
3182         /* We only process PRC_IFDOWN and PRC_IFUP commands */
3183         if (cmd != PRC_IFDOWN && cmd != PRC_IFUP)
3184                 goto done;
3185
3186         TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
3187                 struct in_ifaddr *ia = iac->ia;
3188                 struct ifnet *ifp = ia->ia_ifp;
3189
3190                 if (ifp->if_type == IFT_CARP)
3191                         continue;
3192
3193                 if (ia->ia_ifa.ifa_addr == sa) {
3194                         if (cmd == PRC_IFDOWN) {
3195                                 carp_ifaddr(NULL, ifp, IFADDR_EVENT_DELETE,
3196                                     &ia->ia_ifa);
3197                         } else if (cmd == PRC_IFUP) {
3198                                 carp_ifaddr(NULL, ifp, IFADDR_EVENT_ADD,
3199                                     &ia->ia_ifa);
3200                         }
3201                         break;
3202                 }
3203         }
3204 done:
3205         lwkt_replymsg(&msg->lmsg, 0);
3206 }
3207
3208 struct ifnet *
3209 carp_parent(struct ifnet *cifp)
3210 {
3211         struct carp_softc *sc;
3212
3213         KKASSERT(cifp->if_type == IFT_CARP);
3214         sc = cifp->if_softc;
3215
3216         return sc->sc_carpdev;
3217 }
3218
3219 #define rtinitflags(x) \
3220         (((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) \
3221                  ? RTF_HOST : 0)
3222
3223 static int
3224 carp_addroute_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha)
3225 {
3226         struct in_ifaddr *ia, *iaback;
3227
3228         if (sc->sc_state != MASTER)
3229                 return 0;
3230
3231         ia = vha->vha_ia;
3232         KKASSERT(ia != NULL);
3233
3234         iaback = vha->vha_iaback;
3235         KKASSERT(iaback != NULL);
3236
3237         return rtchange(&iaback->ia_ifa, &ia->ia_ifa);
3238 }
3239
3240 static void
3241 carp_delroute_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
3242     boolean_t del_iaback)
3243 {
3244         struct in_ifaddr *ia, *iaback;
3245
3246         ia = vha->vha_ia;
3247         KKASSERT(ia != NULL);
3248
3249         iaback = vha->vha_iaback;
3250         KKASSERT(iaback != NULL);
3251
3252         if (!del_iaback && (iaback->ia_ifp->if_flags & IFF_UP)) {
3253                 rtchange(&ia->ia_ifa, &iaback->ia_ifa);
3254                 return;
3255         }
3256
3257         rtinit(&ia->ia_ifa, RTM_DELETE, rtinitflags(ia));
3258         in_ifadown_force(&ia->ia_ifa, 1);
3259         ia->ia_flags &= ~IFA_ROUTE;
3260 }
3261
3262 static int
3263 carp_modevent(module_t mod, int type, void *data)
3264 {
3265         switch (type) {
3266         case MOD_LOAD:
3267                 LIST_INIT(&carpif_list);
3268                 carp_ifdetach_event =
3269                 EVENTHANDLER_REGISTER(ifnet_detach_event, carp_ifdetach, NULL,
3270                                       EVENTHANDLER_PRI_ANY);
3271                 carp_ifaddr_event =
3272                 EVENTHANDLER_REGISTER(ifaddr_event, carp_ifaddr, NULL,
3273                                       EVENTHANDLER_PRI_FIRST);
3274                 if_clone_attach(&carp_cloner);
3275                 break;
3276
3277         case MOD_UNLOAD:
3278                 EVENTHANDLER_DEREGISTER(ifnet_detach_event,
3279                                         carp_ifdetach_event);
3280                 EVENTHANDLER_DEREGISTER(ifaddr_event,
3281                                         carp_ifaddr_event);
3282                 if_clone_detach(&carp_cloner);
3283                 break;
3284
3285         default:
3286                 return (EINVAL);
3287         }
3288         return (0);
3289 }
3290
3291 static moduledata_t carp_mod = {
3292         "carp",
3293         carp_modevent,
3294         0
3295 };
3296 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);