openssl: Adjust manual pages for 1.0.1l.
[dragonfly.git] / sys / netinet / ip_carp.c
1 /*
2  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
3  * Copyright (c) 2003 Ryan McBride. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
18  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
24  * THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 /*
27  * $FreeBSD: src/sys/netinet/ip_carp.c,v 1.48 2007/02/02 09:39:09 glebius Exp $
28  */
29
30 #include "opt_carp.h"
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/in_cksum.h>
38 #include <sys/limits.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/msgport2.h>
42 #include <sys/time.h>
43 #include <sys/proc.h>
44 #include <sys/priv.h>
45 #include <sys/sockio.h>
46 #include <sys/socket.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
49 #include <sys/thread.h>
50
51 #include <machine/stdarg.h>
52 #include <crypto/sha1.h>
53
54 #include <net/bpf.h>
55 #include <net/ethernet.h>
56 #include <net/if.h>
57 #include <net/if_dl.h>
58 #include <net/if_types.h>
59 #include <net/route.h>
60 #include <net/if_clone.h>
61 #include <net/if_var.h>
62 #include <net/ifq_var.h>
63 #include <net/netmsg2.h>
64 #include <net/netisr2.h>
65
66 #ifdef INET
67 #include <netinet/in.h>
68 #include <netinet/in_var.h>
69 #include <netinet/in_systm.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip_var.h>
72 #include <netinet/if_ether.h>
73 #endif
74
75 #ifdef INET6
76 #include <netinet/icmp6.h>
77 #include <netinet/ip6.h>
78 #include <netinet6/ip6_var.h>
79 #include <netinet6/scope6_var.h>
80 #include <netinet6/nd6.h>
81 #endif
82
83 #include <netinet/ip_carp.h>
84
85 /*
86  * Note about carp's MP safe approach:
87  *
88  * Brief: carp_softc (softc), carp_softc_container (scc)
89  *
90  * - All configuration operation, e.g. ioctl, add/delete inet addresses
91  *   is serialized by netisr0; not by carp's serializer
92  *
93  * - Backing interface's if_carp and carp_softc's relationship:
94  *
95  *                +---------+
96  *     if_carp -->| carp_if |
97  *                +---------+
98  *                     |
99  *                     |
100  *                     V      +---------+
101  *                  +-----+   |         |
102  *                  | scc |-->|  softc  |
103  *                  +-----+   |         |
104  *                     |      +---------+
105  *                     |
106  *                     V      +---------+
107  *                  +-----+   |         |
108  *                  | scc |-->|  softc  |
109  *                  +-----+   |         |
110  *                            +---------+
111  *
112  * - if_carp creation, modification and deletion all happen in netisr0,
113  *   as stated previously.  Since if_carp is accessed by multiple netisrs,
114  *   the modification to if_carp is conducted in the following way:
115  *
116  *   Adding carp_softc:
117  *
118  *   1) Duplicate the old carp_if to new carp_if (ncif), and insert the
119  *      to-be-added carp_softc to the new carp_if (ncif):
120  *
121  *        if_carp                     ncif
122  *           |                         |
123  *           V                         V
124  *      +---------+               +---------+
125  *      | carp_if |               | carp_if |
126  *      +---------+               +---------+
127  *           |                         |
128  *           |                         |
129  *           V        +-------+        V
130  *        +-----+     |       |     +-----+
131  *        | scc |---->| softc |<----| scc |
132  *        +-----+     |       |     +-----+
133  *           |        +-------+        |
134  *           |                         |
135  *           V        +-------+        V
136  *        +-----+     |       |     +-----+
137  *        | scc |---->| softc |<----| scc |
138  *        +-----+     |       |     +-----+
139  *                    +-------+        |
140  *                                     |
141  *                    +-------+        V
142  *                    |       |     +-----+
143  *                    | softc |<----| scc |
144  *                    |       |     +-----+
145  *                    +-------+
146  *
147  *   2) Switch save if_carp into ocif and switch if_carp to ncif:
148  *      
149  *          ocif                    if_carp
150  *           |                         |
151  *           V                         V
152  *      +---------+               +---------+
153  *      | carp_if |               | carp_if |
154  *      +---------+               +---------+
155  *           |                         |
156  *           |                         |
157  *           V        +-------+        V
158  *        +-----+     |       |     +-----+
159  *        | scc |---->| softc |<----| scc |
160  *        +-----+     |       |     +-----+
161  *           |        +-------+        |
162  *           |                         |
163  *           V        +-------+        V
164  *        +-----+     |       |     +-----+
165  *        | scc |---->| softc |<----| scc |
166  *        +-----+     |       |     +-----+
167  *                    +-------+        |
168  *                                     |
169  *                    +-------+        V
170  *                    |       |     +-----+
171  *                    | softc |<----| scc |
172  *                    |       |     +-----+
173  *                    +-------+
174  *
175  *   3) Run netmsg_service_sync(), which will make sure that
176  *      ocif is no longer accessed (all network operations
177  *      are happened only in network threads).
178  *   4) Free ocif -- only carp_if and scc are freed.
179  *
180  *
181  *   Removing carp_softc:
182  *
183  *   1) Duplicate the old carp_if to new carp_if (ncif); the to-be-deleted
184  *      carp_softc will not be duplicated.
185  *
186  *        if_carp                     ncif
187  *           |                         |
188  *           V                         V
189  *      +---------+               +---------+
190  *      | carp_if |               | carp_if |
191  *      +---------+               +---------+
192  *           |                         |
193  *           |                         |
194  *           V        +-------+        V
195  *        +-----+     |       |     +-----+
196  *        | scc |---->| softc |<----| scc |
197  *        +-----+     |       |     +-----+
198  *           |        +-------+        |
199  *           |                         |
200  *           V        +-------+        |
201  *        +-----+     |       |        |
202  *        | scc |---->| softc |        |
203  *        +-----+     |       |        |
204  *           |        +-------+        |
205  *           |                         |
206  *           V        +-------+        V
207  *        +-----+     |       |     +-----+
208  *        | scc |---->| softc |<----| scc |
209  *        +-----+     |       |     +-----+
210  *                    +-------+
211  *
212  *   2) Switch save if_carp into ocif and switch if_carp to ncif:
213  *      
214  *          ocif                    if_carp
215  *           |                         |
216  *           V                         V
217  *      +---------+               +---------+
218  *      | carp_if |               | carp_if |
219  *      +---------+               +---------+
220  *           |                         |
221  *           |                         |
222  *           V        +-------+        V
223  *        +-----+     |       |     +-----+
224  *        | scc |---->| softc |<----| scc |
225  *        +-----+     |       |     +-----+
226  *           |        +-------+        |
227  *           |                         |
228  *           V        +-------+        |
229  *        +-----+     |       |        |
230  *        | scc |---->| softc |        |
231  *        +-----+     |       |        |
232  *           |        +-------+        |
233  *           |                         |
234  *           V        +-------+        V
235  *        +-----+     |       |     +-----+
236  *        | scc |---->| softc |<----| scc |
237  *        +-----+     |       |     +-----+
238  *                    +-------+
239  *
240  *   3) Run netmsg_service_sync(), which will make sure that
241  *      ocif is no longer accessed (all network operations
242  *      are happened only in network threads).
243  *   4) Free ocif -- only carp_if and scc are freed.
244  *
245  * - if_carp accessing:
246  *   The accessing code should cache the if_carp in a local temporary
247  *   variable and accessing the temporary variable along the code path
248  *   instead of accessing if_carp later on.
249  */
250
251 #define CARP_IFNAME             "carp"
252 #define CARP_IS_RUNNING(ifp)    \
253         (((ifp)->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING))
254
255 struct carp_softc;
256
257 struct carp_vhaddr {
258         uint32_t                vha_flags;      /* CARP_VHAF_ */
259         struct in_ifaddr        *vha_ia;        /* carp address */
260         struct in_ifaddr        *vha_iaback;    /* backing address */
261         TAILQ_ENTRY(carp_vhaddr) vha_link;
262 };
263 TAILQ_HEAD(carp_vhaddr_list, carp_vhaddr);
264
265 struct netmsg_carp {
266         struct netmsg_base      base;
267         struct ifnet            *nc_carpdev;
268         struct carp_softc       *nc_softc;
269         void                    *nc_data;
270         size_t                  nc_datalen;
271 };
272
273 struct carp_softc {
274         struct arpcom            arpcom;
275         struct ifnet            *sc_carpdev;    /* parent interface */
276         struct carp_vhaddr_list  sc_vha_list;   /* virtual addr list */
277
278         const struct in_ifaddr  *sc_ia;         /* primary iface address v4 */
279         struct ip_moptions       sc_imo;
280
281 #ifdef INET6
282         struct in6_ifaddr       *sc_ia6;        /* primary iface address v6 */
283         struct ip6_moptions      sc_im6o;
284 #endif /* INET6 */
285
286         enum { INIT = 0, BACKUP, MASTER }
287                                  sc_state;
288         boolean_t                sc_dead;
289
290         int                      sc_suppress;
291
292         int                      sc_sendad_errors;
293 #define CARP_SENDAD_MAX_ERRORS  3
294         int                      sc_sendad_success;
295 #define CARP_SENDAD_MIN_SUCCESS 3
296
297         int                      sc_vhid;
298         int                      sc_advskew;
299         int                      sc_naddrs;     /* actually used IPv4 vha */
300         int                      sc_naddrs6;
301         int                      sc_advbase;    /* seconds */
302         int                      sc_init_counter;
303         uint64_t                 sc_counter;
304
305         /* authentication */
306 #define CARP_HMAC_PAD   64
307         unsigned char            sc_key[CARP_KEY_LEN];
308         unsigned char            sc_pad[CARP_HMAC_PAD];
309         SHA1_CTX                 sc_sha1;
310
311         struct callout           sc_ad_tmo;     /* advertisement timeout */
312         struct netmsg_carp       sc_ad_msg;     /* adv timeout netmsg */
313         struct callout           sc_md_tmo;     /* ip4 master down timeout */
314         struct callout           sc_md6_tmo;    /* ip6 master down timeout */
315         struct netmsg_carp       sc_md_msg;     /* master down timeout netmsg */
316
317         LIST_ENTRY(carp_softc)   sc_next;       /* Interface clue */
318 };
319
320 #define sc_if   arpcom.ac_if
321
322 struct carp_softc_container {
323         TAILQ_ENTRY(carp_softc_container) scc_link;
324         struct carp_softc       *scc_softc;
325 };
326 TAILQ_HEAD(carp_if, carp_softc_container);
327
328 SYSCTL_DECL(_net_inet_carp);
329
330 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */
331 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
332     &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
333 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
334     &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
335 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
336     &carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
337 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
338     &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
339
340 static int carp_suppress_preempt = 0;
341 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
342     &carp_suppress_preempt, 0, "Preemption is suppressed");
343
344 static int carp_prio_ad = 1;
345 SYSCTL_INT(_net_inet_carp, OID_AUTO, prio_ad, CTLFLAG_RD,
346     &carp_prio_ad, 0, "Prioritize advertisement packet");
347
348 static struct carpstats carpstats;
349 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
350     &carpstats, carpstats,
351     "CARP statistics (struct carpstats, netinet/ip_carp.h)");
352
353 #define CARP_LOG(...)   do {                            \
354         if (carp_opts[CARPCTL_LOG] > 0)                 \
355                 log(LOG_INFO, __VA_ARGS__);             \
356 } while (0)
357
358 #define CARP_DEBUG(...) do {                            \
359         if (carp_opts[CARPCTL_LOG] > 1)                 \
360                 log(LOG_DEBUG, __VA_ARGS__);            \
361 } while (0)
362
363 static struct lwkt_token carp_listtok = LWKT_TOKEN_INITIALIZER(carp_list_token);
364
365 static void     carp_hmac_prepare(struct carp_softc *);
366 static void     carp_hmac_generate(struct carp_softc *, uint32_t *,
367                     unsigned char *);
368 static int      carp_hmac_verify(struct carp_softc *, uint32_t *,
369                     unsigned char *);
370 static void     carp_setroute(struct carp_softc *, int);
371 static void     carp_proto_input_c(struct carp_softc *, struct mbuf *,
372                     struct carp_header *, sa_family_t);
373 static int      carp_clone_create(struct if_clone *, int, caddr_t);
374 static int      carp_clone_destroy(struct ifnet *);
375 static void     carp_detach(struct carp_softc *, boolean_t, boolean_t);
376 static void     carp_prepare_ad(struct carp_softc *, struct carp_header *);
377 static void     carp_send_ad_all(void);
378 static void     carp_send_ad_timeout(void *);
379 static void     carp_send_ad(struct carp_softc *);
380 static void     carp_send_arp(struct carp_softc *);
381 static void     carp_master_down_timeout(void *);
382 static void     carp_master_down(struct carp_softc *);
383 static void     carp_setrun(struct carp_softc *, sa_family_t);
384 static void     carp_set_state(struct carp_softc *, int);
385 static struct ifnet *carp_forus(struct carp_if *, const uint8_t *);
386
387 static void     carp_init(void *);
388 static int      carp_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
389 static int      carp_output(struct ifnet *, struct mbuf *, struct sockaddr *,
390                     struct rtentry *);
391 static void     carp_start(struct ifnet *, struct ifaltq_subque *);
392
393 static void     carp_multicast_cleanup(struct carp_softc *);
394 static void     carp_add_addr(struct carp_softc *, struct ifaddr *);
395 static void     carp_del_addr(struct carp_softc *, struct ifaddr *);
396 static void     carp_config_addr(struct carp_softc *, struct ifaddr *);
397 static void     carp_link_addrs(struct carp_softc *, struct ifnet *,
398                     struct ifaddr *);
399 static void     carp_unlink_addrs(struct carp_softc *, struct ifnet *,
400                     struct ifaddr *);
401 static void     carp_update_addrs(struct carp_softc *, struct ifaddr *);
402
403 static int      carp_config_vhaddr(struct carp_softc *, struct carp_vhaddr *,
404                     struct in_ifaddr *);
405 static int      carp_activate_vhaddr(struct carp_softc *, struct carp_vhaddr *,
406                     struct ifnet *, struct in_ifaddr *, int);
407 static void     carp_deactivate_vhaddr(struct carp_softc *,
408                     struct carp_vhaddr *, boolean_t);
409 static int      carp_addroute_vhaddr(struct carp_softc *, struct carp_vhaddr *);
410 static void     carp_delroute_vhaddr(struct carp_softc *, struct carp_vhaddr *,
411                     boolean_t);
412
413 #ifdef foo
414 static void     carp_sc_state(struct carp_softc *);
415 #endif
416 #ifdef INET6
417 static void     carp_send_na(struct carp_softc *);
418 #ifdef notyet
419 static int      carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
420 static int      carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
421 #endif
422 static void     carp_multicast6_cleanup(struct carp_softc *);
423 #endif
424 static void     carp_stop(struct carp_softc *, boolean_t);
425 static void     carp_suspend(struct carp_softc *, boolean_t);
426 static void     carp_ioctl_stop(struct carp_softc *);
427 static int      carp_ioctl_setvh(struct carp_softc *, void *, struct ucred *);
428 static void     carp_ioctl_ifcap(struct carp_softc *, int);
429 static int      carp_ioctl_getvh(struct carp_softc *, void *, struct ucred *);
430 static int      carp_ioctl_getdevname(struct carp_softc *, struct ifdrv *);
431 static int      carp_ioctl_getvhaddr(struct carp_softc *, struct ifdrv *);
432
433 static struct carp_if *carp_if_remove(struct carp_if *, struct carp_softc *);
434 static struct carp_if *carp_if_insert(struct carp_if *, struct carp_softc *);
435 static void     carp_if_free(struct carp_if *);
436
437 static void     carp_ifaddr(void *, struct ifnet *, enum ifaddr_event,
438                             struct ifaddr *);
439 static void     carp_ifdetach(void *, struct ifnet *);
440
441 static void     carp_ifdetach_dispatch(netmsg_t);
442 static void     carp_clone_destroy_dispatch(netmsg_t);
443 static void     carp_init_dispatch(netmsg_t);
444 static void     carp_ioctl_stop_dispatch(netmsg_t);
445 static void     carp_ioctl_setvh_dispatch(netmsg_t);
446 static void     carp_ioctl_ifcap_dispatch(netmsg_t);
447 static void     carp_ioctl_getvh_dispatch(netmsg_t);
448 static void     carp_ioctl_getdevname_dispatch(netmsg_t);
449 static void     carp_ioctl_getvhaddr_dispatch(netmsg_t);
450 static void     carp_send_ad_timeout_dispatch(netmsg_t);
451 static void     carp_master_down_timeout_dispatch(netmsg_t);
452
453 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
454
455 static LIST_HEAD(, carp_softc) carpif_list;
456
457 static struct if_clone carp_cloner =
458 IF_CLONE_INITIALIZER(CARP_IFNAME, carp_clone_create, carp_clone_destroy,
459                      0, IF_MAXUNIT);
460
461 static const uint8_t    carp_etheraddr[ETHER_ADDR_LEN] =
462         { 0, 0, 0x5e, 0, 1, 0 };
463
464 static eventhandler_tag carp_ifdetach_event;
465 static eventhandler_tag carp_ifaddr_event;
466
467 static __inline void
468 carp_insert_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha_new)
469 {
470         struct carp_vhaddr *vha;
471         u_long new_addr, addr;
472
473         KKASSERT((vha_new->vha_flags & CARP_VHAF_ONLIST) == 0);
474
475         /*
476          * Virtual address list is sorted; smaller one first
477          */
478         new_addr = ntohl(vha_new->vha_ia->ia_addr.sin_addr.s_addr);
479
480         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
481                 addr = ntohl(vha->vha_ia->ia_addr.sin_addr.s_addr);
482
483                 if (addr > new_addr)
484                         break;
485         }
486         if (vha == NULL)
487                 TAILQ_INSERT_TAIL(&sc->sc_vha_list, vha_new, vha_link);
488         else
489                 TAILQ_INSERT_BEFORE(vha, vha_new, vha_link);
490         vha_new->vha_flags |= CARP_VHAF_ONLIST;
491 }
492
493 static __inline void
494 carp_remove_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha)
495 {
496         KKASSERT(vha->vha_flags & CARP_VHAF_ONLIST);
497         vha->vha_flags &= ~CARP_VHAF_ONLIST;
498         TAILQ_REMOVE(&sc->sc_vha_list, vha, vha_link);
499 }
500
501 static void
502 carp_hmac_prepare(struct carp_softc *sc)
503 {
504         uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
505         uint8_t vhid = sc->sc_vhid & 0xff;
506         int i;
507 #ifdef INET6
508         struct ifaddr_container *ifac;
509         struct in6_addr in6;
510 #endif
511 #ifdef INET
512         struct carp_vhaddr *vha;
513 #endif
514
515         /* XXX: possible race here */
516
517         /* compute ipad from key */
518         bzero(sc->sc_pad, sizeof(sc->sc_pad));
519         bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
520         for (i = 0; i < sizeof(sc->sc_pad); i++)
521                 sc->sc_pad[i] ^= 0x36;
522
523         /* precompute first part of inner hash */
524         SHA1Init(&sc->sc_sha1);
525         SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
526         SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
527         SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
528         SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
529 #ifdef INET
530         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
531                 SHA1Update(&sc->sc_sha1,
532                     (const uint8_t *)&vha->vha_ia->ia_addr.sin_addr,
533                     sizeof(struct in_addr));
534         }
535 #endif /* INET */
536 #ifdef INET6
537         TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
538                 struct ifaddr *ifa = ifac->ifa;
539
540                 if (ifa->ifa_addr->sa_family == AF_INET6) {
541                         in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
542                         in6_clearscope(&in6);
543                         SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6));
544                 }
545         }
546 #endif /* INET6 */
547
548         /* convert ipad to opad */
549         for (i = 0; i < sizeof(sc->sc_pad); i++)
550                 sc->sc_pad[i] ^= 0x36 ^ 0x5c;
551 }
552
553 static void
554 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
555     unsigned char md[20])
556 {
557         SHA1_CTX sha1ctx;
558
559         /* fetch first half of inner hash */
560         bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
561
562         SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
563         SHA1Final(md, &sha1ctx);
564
565         /* outer hash */
566         SHA1Init(&sha1ctx);
567         SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
568         SHA1Update(&sha1ctx, md, 20);
569         SHA1Final(md, &sha1ctx);
570 }
571
572 static int
573 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
574     unsigned char md[20])
575 {
576         unsigned char md2[20];
577
578         carp_hmac_generate(sc, counter, md2);
579         return (bcmp(md, md2, sizeof(md2)));
580 }
581
582 static void
583 carp_setroute(struct carp_softc *sc, int cmd)
584 {
585 #ifdef INET6
586         struct ifaddr_container *ifac;
587 #endif
588         struct carp_vhaddr *vha;
589
590         KKASSERT(cmd == RTM_DELETE || cmd == RTM_ADD);
591
592         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
593                 if (vha->vha_iaback == NULL)
594                         continue;
595                 if (cmd == RTM_DELETE)
596                         carp_delroute_vhaddr(sc, vha, FALSE);
597                 else
598                         carp_addroute_vhaddr(sc, vha);
599         }
600
601 #ifdef INET6
602         TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
603                 struct ifaddr *ifa = ifac->ifa;
604
605                 if (ifa->ifa_addr->sa_family == AF_INET6) {
606                         if (cmd == RTM_ADD)
607                                 in6_ifaddloop(ifa);
608                         else
609                                 in6_ifremloop(ifa);
610                 }
611         }
612 #endif /* INET6 */
613 }
614
615 static int
616 carp_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused)
617 {
618         struct carp_softc *sc;
619         struct ifnet *ifp;
620
621         sc = kmalloc(sizeof(*sc), M_CARP, M_WAITOK | M_ZERO);
622         ifp = &sc->sc_if;
623
624         sc->sc_suppress = 0;
625         sc->sc_advbase = CARP_DFLTINTV;
626         sc->sc_vhid = -1;       /* required setting */
627         sc->sc_advskew = 0;
628         sc->sc_init_counter = 1;
629         sc->sc_naddrs = 0;
630         sc->sc_naddrs6 = 0;
631
632         TAILQ_INIT(&sc->sc_vha_list);
633
634 #ifdef INET6
635         sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
636 #endif
637
638         callout_init_mp(&sc->sc_ad_tmo);
639         netmsg_init(&sc->sc_ad_msg.base, NULL, &netisr_adone_rport,
640             MSGF_DROPABLE | MSGF_PRIORITY, carp_send_ad_timeout_dispatch);
641         sc->sc_ad_msg.nc_softc = sc;
642
643         callout_init_mp(&sc->sc_md_tmo);
644         callout_init_mp(&sc->sc_md6_tmo);
645         netmsg_init(&sc->sc_md_msg.base, NULL, &netisr_adone_rport,
646             MSGF_DROPABLE | MSGF_PRIORITY, carp_master_down_timeout_dispatch);
647         sc->sc_md_msg.nc_softc = sc;
648
649         if_initname(ifp, CARP_IFNAME, unit);
650         ifp->if_softc = sc;
651         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
652         ifp->if_init = carp_init;
653         ifp->if_ioctl = carp_ioctl;
654         ifp->if_start = carp_start;
655
656         ifp->if_capabilities = IFCAP_TXCSUM | IFCAP_TSO;
657         ifp->if_capenable = ifp->if_capabilities;
658         /*
659          * Leave if_hwassist as it is; if_hwassist will be
660          * setup when this carp interface has parent.
661          */
662
663         ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
664         ifq_set_ready(&ifp->if_snd);
665
666         ether_ifattach(ifp, carp_etheraddr, NULL);
667
668         ifp->if_type = IFT_CARP;
669         ifp->if_output = carp_output;
670
671         lwkt_gettoken(&carp_listtok);
672         LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
673         lwkt_reltoken(&carp_listtok);
674
675         return (0);
676 }
677
678 static void
679 carp_clone_destroy_dispatch(netmsg_t msg)
680 {
681         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
682         struct carp_softc *sc = cmsg->nc_softc;
683
684         sc->sc_dead = TRUE;
685         carp_detach(sc, TRUE, FALSE);
686
687         callout_stop_sync(&sc->sc_ad_tmo);
688         callout_stop_sync(&sc->sc_md_tmo);
689         callout_stop_sync(&sc->sc_md6_tmo);
690
691         crit_enter();
692         lwkt_dropmsg(&sc->sc_ad_msg.base.lmsg);
693         lwkt_dropmsg(&sc->sc_md_msg.base.lmsg);
694         crit_exit();
695
696         lwkt_replymsg(&cmsg->base.lmsg, 0);
697 }
698
699 static int
700 carp_clone_destroy(struct ifnet *ifp)
701 {
702         struct carp_softc *sc = ifp->if_softc;
703         struct netmsg_carp cmsg;
704
705         bzero(&cmsg, sizeof(cmsg));
706         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
707             carp_clone_destroy_dispatch);
708         cmsg.nc_softc = sc;
709
710         lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
711
712         lwkt_gettoken(&carp_listtok);
713         LIST_REMOVE(sc, sc_next);
714         lwkt_reltoken(&carp_listtok);
715
716         bpfdetach(ifp);
717         if_detach(ifp);
718
719         KASSERT(sc->sc_naddrs == 0, ("certain inet address is still active"));
720         kfree(sc, M_CARP);
721
722         return 0;
723 }
724
725 static struct carp_if *
726 carp_if_remove(struct carp_if *ocif, struct carp_softc *sc)
727 {
728         struct carp_softc_container *oscc, *scc;
729         struct carp_if *cif;
730         int count = 0;
731 #ifdef INVARIANTS
732         int found = 0;
733 #endif
734
735         TAILQ_FOREACH(oscc, ocif, scc_link) {
736                 ++count;
737 #ifdef INVARIANTS
738                 if (oscc->scc_softc == sc)
739                         found = 1;
740 #endif
741         }
742         KASSERT(found, ("%s carp_softc is not on carp_if", __func__));
743
744         if (count == 1) {
745                 /* Last one is going to be unlinked */
746                 return NULL;
747         }
748
749         cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
750         TAILQ_INIT(cif);
751
752         TAILQ_FOREACH(oscc, ocif, scc_link) {
753                 if (oscc->scc_softc == sc)
754                         continue;
755
756                 scc = kmalloc(sizeof(*scc), M_CARP, M_WAITOK | M_ZERO);
757                 scc->scc_softc = oscc->scc_softc;
758                 TAILQ_INSERT_TAIL(cif, scc, scc_link);
759         }
760
761         return cif;
762 }
763
764 static struct carp_if *
765 carp_if_insert(struct carp_if *ocif, struct carp_softc *sc)
766 {
767         struct carp_softc_container *oscc;
768         int onlist;
769
770         onlist = 0;
771         if (ocif != NULL) {
772                 TAILQ_FOREACH(oscc, ocif, scc_link) {
773                         if (oscc->scc_softc == sc)
774                                 onlist = 1;
775                 }
776         }
777
778 #ifdef INVARIANTS
779         if (sc->sc_carpdev != NULL) {
780                 KASSERT(onlist, ("%s is not on %s carp list",
781                     sc->sc_if.if_xname, sc->sc_carpdev->if_xname));
782         } else {
783                 KASSERT(!onlist, ("%s is already on carp list",
784                     sc->sc_if.if_xname));
785         }
786 #endif
787
788         if (!onlist) {
789                 struct carp_if *cif;
790                 struct carp_softc_container *new_scc, *scc;
791                 int inserted = 0;
792
793                 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
794                 TAILQ_INIT(cif);
795
796                 new_scc = kmalloc(sizeof(*new_scc), M_CARP, M_WAITOK | M_ZERO);
797                 new_scc->scc_softc = sc;
798
799                 if (ocif != NULL) {
800                         TAILQ_FOREACH(oscc, ocif, scc_link) {
801                                 if (!inserted &&
802                                     oscc->scc_softc->sc_vhid > sc->sc_vhid) {
803                                         TAILQ_INSERT_TAIL(cif, new_scc,
804                                             scc_link);
805                                         inserted = 1;
806                                 }
807
808                                 scc = kmalloc(sizeof(*scc), M_CARP,
809                                     M_WAITOK | M_ZERO);
810                                 scc->scc_softc = oscc->scc_softc;
811                                 TAILQ_INSERT_TAIL(cif, scc, scc_link);
812                         }
813                 }
814                 if (!inserted)
815                         TAILQ_INSERT_TAIL(cif, new_scc, scc_link);
816
817                 return cif;
818         } else {
819                 return ocif;
820         }
821 }
822
823 static void
824 carp_if_free(struct carp_if *cif)
825 {
826         struct carp_softc_container *scc;
827
828         while ((scc = TAILQ_FIRST(cif)) != NULL) {
829                 TAILQ_REMOVE(cif, scc, scc_link);
830                 kfree(scc, M_CARP);
831         }
832         kfree(cif, M_CARP);
833 }
834
835 static void
836 carp_detach(struct carp_softc *sc, boolean_t detach, boolean_t del_iaback)
837 {
838         carp_suspend(sc, detach);
839
840         carp_multicast_cleanup(sc);
841 #ifdef INET6
842         carp_multicast6_cleanup(sc);
843 #endif
844
845         if (!sc->sc_dead && detach) {
846                 struct carp_vhaddr *vha;
847
848                 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
849                         carp_deactivate_vhaddr(sc, vha, del_iaback);
850                 KKASSERT(sc->sc_naddrs == 0);
851         }
852
853         if (sc->sc_carpdev != NULL) {
854                 struct ifnet *ifp = sc->sc_carpdev;
855                 struct carp_if *ocif = ifp->if_carp;
856
857                 ifp->if_carp = carp_if_remove(ocif, sc);
858                 KASSERT(ifp->if_carp != ocif,
859                     ("%s carp_if_remove failed", __func__));
860
861                 sc->sc_carpdev = NULL;
862                 sc->sc_ia = NULL;
863                 sc->arpcom.ac_if.if_hwassist = 0;
864
865                 /*
866                  * Make sure that all protocol threads see the
867                  * sc_carpdev and if_carp changes
868                  */
869                 netmsg_service_sync();
870
871                 if (ifp->if_carp == NULL) {
872                         /*
873                          * No more carp interfaces using
874                          * ifp as the backing interface,
875                          * move it out of promiscous mode.
876                          */
877                         ifpromisc(ifp, 0);
878                 }
879
880                 /*
881                  * The old carp list could be safely free now,
882                  * since no one can access it.
883                  */
884                 carp_if_free(ocif);
885         }
886 }
887
888 static void
889 carp_ifdetach_dispatch(netmsg_t msg)
890 {
891         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
892         struct ifnet *ifp = cmsg->nc_carpdev;
893
894         while (ifp->if_carp) {
895                 struct carp_softc_container *scc;
896
897                 scc = TAILQ_FIRST((struct carp_if *)(ifp->if_carp));
898                 carp_detach(scc->scc_softc, TRUE, TRUE);
899         }
900         lwkt_replymsg(&cmsg->base.lmsg, 0);
901 }
902
903 /* Detach an interface from the carp. */
904 static void
905 carp_ifdetach(void *arg __unused, struct ifnet *ifp)
906 {
907         struct netmsg_carp cmsg;
908
909         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
910
911         bzero(&cmsg, sizeof(cmsg));
912         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
913             carp_ifdetach_dispatch);
914         cmsg.nc_carpdev = ifp;
915
916         lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
917 }
918
919 /*
920  * process input packet.
921  * we have rearranged checks order compared to the rfc,
922  * but it seems more efficient this way or not possible otherwise.
923  */
924 int
925 carp_proto_input(struct mbuf **mp, int *offp, int proto)
926 {
927         struct mbuf *m = *mp;
928         struct ip *ip = mtod(m, struct ip *);
929         struct ifnet *ifp = m->m_pkthdr.rcvif;
930         struct carp_header *ch;
931         struct carp_softc *sc;
932         int len, iphlen;
933
934         iphlen = *offp;
935         *mp = NULL;
936
937         carpstats.carps_ipackets++;
938
939         if (!carp_opts[CARPCTL_ALLOW]) {
940                 m_freem(m);
941                 goto back;
942         }
943
944         /* Check if received on a valid carp interface */
945         if (ifp->if_type != IFT_CARP) {
946                 carpstats.carps_badif++;
947                 CARP_LOG("carp_proto_input: packet received on non-carp "
948                     "interface: %s\n", ifp->if_xname);
949                 m_freem(m);
950                 goto back;
951         }
952
953         if (!CARP_IS_RUNNING(ifp)) {
954                 carpstats.carps_badif++;
955                 CARP_LOG("carp_proto_input: packet received on stopped carp "
956                     "interface: %s\n", ifp->if_xname);
957                 m_freem(m);
958                 goto back;
959         }
960
961         sc = ifp->if_softc;
962         if (sc->sc_carpdev == NULL) {
963                 carpstats.carps_badif++;
964                 CARP_LOG("carp_proto_input: packet received on defunc carp "
965                     "interface: %s\n", ifp->if_xname);
966                 m_freem(m);
967                 goto back;
968         }
969
970         if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
971                 carpstats.carps_badif++;
972                 CARP_LOG("carp_proto_input: non-mcast packet on "
973                     "interface: %s\n", ifp->if_xname);
974                 m_freem(m);
975                 goto back;
976         }
977
978         /* Verify that the IP TTL is CARP_DFLTTL. */
979         if (ip->ip_ttl != CARP_DFLTTL) {
980                 carpstats.carps_badttl++;
981                 CARP_LOG("carp_proto_input: received ttl %d != %d on %s\n",
982                     ip->ip_ttl, CARP_DFLTTL, ifp->if_xname);
983                 m_freem(m);
984                 goto back;
985         }
986
987         /* Minimal CARP packet size */
988         len = iphlen + sizeof(*ch);
989
990         /*
991          * Verify that the received packet length is
992          * not less than the CARP header
993          */
994         if (m->m_pkthdr.len < len) {
995                 carpstats.carps_badlen++;
996                 CARP_LOG("packet too short %d on %s\n", m->m_pkthdr.len,
997                     ifp->if_xname);
998                 m_freem(m);
999                 goto back;
1000         }
1001
1002         /* Make sure that CARP header is contiguous */
1003         if (len > m->m_len) {
1004                 m = m_pullup(m, len);
1005                 if (m == NULL) {
1006                         carpstats.carps_hdrops++;
1007                         CARP_LOG("carp_proto_input: m_pullup failed\n");
1008                         goto back;
1009                 }
1010                 ip = mtod(m, struct ip *);
1011         }
1012         ch = (struct carp_header *)((uint8_t *)ip + iphlen);
1013
1014         /* Verify the CARP checksum */
1015         if (in_cksum_skip(m, len, iphlen)) {
1016                 carpstats.carps_badsum++;
1017                 CARP_LOG("carp_proto_input: checksum failed on %s\n",
1018                     ifp->if_xname);
1019                 m_freem(m);
1020                 goto back;
1021         }
1022         carp_proto_input_c(sc, m, ch, AF_INET);
1023 back:
1024         return(IPPROTO_DONE);
1025 }
1026
1027 #ifdef INET6
1028 int
1029 carp6_proto_input(struct mbuf **mp, int *offp, int proto)
1030 {
1031         struct mbuf *m = *mp;
1032         struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1033         struct ifnet *ifp = m->m_pkthdr.rcvif;
1034         struct carp_header *ch;
1035         struct carp_softc *sc;
1036         u_int len;
1037
1038         carpstats.carps_ipackets6++;
1039
1040         if (!carp_opts[CARPCTL_ALLOW]) {
1041                 m_freem(m);
1042                 goto back;
1043         }
1044
1045         /* check if received on a valid carp interface */
1046         if (ifp->if_type != IFT_CARP) {
1047                 carpstats.carps_badif++;
1048                 CARP_LOG("carp6_proto_input: packet received on non-carp "
1049                     "interface: %s\n", ifp->if_xname);
1050                 m_freem(m);
1051                 goto back;
1052         }
1053
1054         if (!CARP_IS_RUNNING(ifp)) {
1055                 carpstats.carps_badif++;
1056                 CARP_LOG("carp_proto_input: packet received on stopped carp "
1057                     "interface: %s\n", ifp->if_xname);
1058                 m_freem(m);
1059                 goto back;
1060         }
1061
1062         sc = ifp->if_softc;
1063         if (sc->sc_carpdev == NULL) {
1064                 carpstats.carps_badif++;
1065                 CARP_LOG("carp6_proto_input: packet received on defunc-carp "
1066                     "interface: %s\n", ifp->if_xname);
1067                 m_freem(m);
1068                 goto back;
1069         }
1070
1071         /* verify that the IP TTL is 255 */
1072         if (ip6->ip6_hlim != CARP_DFLTTL) {
1073                 carpstats.carps_badttl++;
1074                 CARP_LOG("carp6_proto_input: received ttl %d != 255 on %s\n",
1075                     ip6->ip6_hlim, ifp->if_xname);
1076                 m_freem(m);
1077                 goto back;
1078         }
1079
1080         /* verify that we have a complete carp packet */
1081         len = m->m_len;
1082         IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
1083         if (ch == NULL) {
1084                 carpstats.carps_badlen++;
1085                 CARP_LOG("carp6_proto_input: packet size %u too small\n", len);
1086                 goto back;
1087         }
1088
1089         /* verify the CARP checksum */
1090         if (in_cksum_range(m, 0, *offp, sizeof(*ch))) {
1091                 carpstats.carps_badsum++;
1092                 CARP_LOG("carp6_proto_input: checksum failed, on %s\n",
1093                     ifp->if_xname);
1094                 m_freem(m);
1095                 goto back;
1096         }
1097
1098         carp_proto_input_c(sc, m, ch, AF_INET6);
1099 back:
1100         return (IPPROTO_DONE);
1101 }
1102 #endif /* INET6 */
1103
1104 static void
1105 carp_proto_input_c(struct carp_softc *sc, struct mbuf *m,
1106     struct carp_header *ch, sa_family_t af)
1107 {
1108         struct ifnet *cifp;
1109         uint64_t tmp_counter;
1110         struct timeval sc_tv, ch_tv;
1111
1112         if (sc->sc_vhid != ch->carp_vhid) {
1113                 /*
1114                  * CARP uses multicast, however, multicast packets
1115                  * are tapped to all CARP interfaces on the physical
1116                  * interface receiving the CARP packets, so we don't
1117                  * update any stats here.
1118                  */
1119                 m_freem(m);
1120                 return;
1121         }
1122         cifp = &sc->sc_if;
1123
1124         /* verify the CARP version. */
1125         if (ch->carp_version != CARP_VERSION) {
1126                 carpstats.carps_badver++;
1127                 CARP_LOG("%s; invalid version %d\n", cifp->if_xname,
1128                          ch->carp_version);
1129                 m_freem(m);
1130                 return;
1131         }
1132
1133         /* verify the hash */
1134         if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
1135                 carpstats.carps_badauth++;
1136                 CARP_LOG("%s: incorrect hash\n", cifp->if_xname);
1137                 m_freem(m);
1138                 return;
1139         }
1140
1141         tmp_counter = ntohl(ch->carp_counter[0]);
1142         tmp_counter = tmp_counter<<32;
1143         tmp_counter += ntohl(ch->carp_counter[1]);
1144
1145         /* XXX Replay protection goes here */
1146
1147         sc->sc_init_counter = 0;
1148         sc->sc_counter = tmp_counter;
1149
1150         sc_tv.tv_sec = sc->sc_advbase;
1151         if (carp_suppress_preempt && sc->sc_advskew <  240)
1152                 sc_tv.tv_usec = 240 * 1000000 / 256;
1153         else
1154                 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1155         ch_tv.tv_sec = ch->carp_advbase;
1156         ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
1157
1158         switch (sc->sc_state) {
1159         case INIT:
1160                 break;
1161
1162         case MASTER:
1163                 /*
1164                  * If we receive an advertisement from a master who's going to
1165                  * be more frequent than us, go into BACKUP state.
1166                  */
1167                 if (timevalcmp(&sc_tv, &ch_tv, >) ||
1168                     timevalcmp(&sc_tv, &ch_tv, ==)) {
1169                         callout_stop(&sc->sc_ad_tmo);
1170                         CARP_DEBUG("%s: MASTER -> BACKUP "
1171                            "(more frequent advertisement received)\n",
1172                            cifp->if_xname);
1173                         carp_set_state(sc, BACKUP);
1174                         carp_setrun(sc, 0);
1175                         carp_setroute(sc, RTM_DELETE);
1176                 }
1177                 break;
1178
1179         case BACKUP:
1180                 /*
1181                  * If we're pre-empting masters who advertise slower than us,
1182                  * and this one claims to be slower, treat him as down.
1183                  */
1184                 if (carp_opts[CARPCTL_PREEMPT] &&
1185                     timevalcmp(&sc_tv, &ch_tv, <)) {
1186                         CARP_DEBUG("%s: BACKUP -> MASTER "
1187                             "(preempting a slower master)\n", cifp->if_xname);
1188                         carp_master_down(sc);
1189                         break;
1190                 }
1191
1192                 /*
1193                  *  If the master is going to advertise at such a low frequency
1194                  *  that he's guaranteed to time out, we'd might as well just
1195                  *  treat him as timed out now.
1196                  */
1197                 sc_tv.tv_sec = sc->sc_advbase * 3;
1198                 if (timevalcmp(&sc_tv, &ch_tv, <)) {
1199                         CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n",
1200                                    cifp->if_xname);
1201                         carp_master_down(sc);
1202                         break;
1203                 }
1204
1205                 /*
1206                  * Otherwise, we reset the counter and wait for the next
1207                  * advertisement.
1208                  */
1209                 carp_setrun(sc, af);
1210                 break;
1211         }
1212         m_freem(m);
1213 }
1214
1215 struct mbuf *
1216 carp_input(void *v, struct mbuf *m)
1217 {
1218         struct carp_if *cif = v;
1219         struct ether_header *eh;
1220         struct carp_softc_container *scc;
1221         struct ifnet *ifp;
1222
1223         eh = mtod(m, struct ether_header *);
1224
1225         ifp = carp_forus(cif, eh->ether_dhost);
1226         if (ifp != NULL) {
1227                 ether_reinput_oncpu(ifp, m, REINPUT_RUNBPF);
1228                 return NULL;
1229         }
1230
1231         if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
1232                 return m;
1233
1234         /*
1235          * XXX Should really check the list of multicast addresses
1236          * for each CARP interface _before_ copying.
1237          */
1238         TAILQ_FOREACH(scc, cif, scc_link) {
1239                 struct carp_softc *sc = scc->scc_softc;
1240                 struct mbuf *m0;
1241
1242                 if ((sc->sc_if.if_flags & IFF_UP) == 0)
1243                         continue;
1244
1245                 m0 = m_dup(m, MB_DONTWAIT);
1246                 if (m0 == NULL)
1247                         continue;
1248
1249                 ether_reinput_oncpu(&sc->sc_if, m0, REINPUT_RUNBPF);
1250         }
1251         return m;
1252 }
1253
1254 static void
1255 carp_prepare_ad(struct carp_softc *sc, struct carp_header *ch)
1256 {
1257         if (sc->sc_init_counter) {
1258                 /* this could also be seconds since unix epoch */
1259                 sc->sc_counter = karc4random();
1260                 sc->sc_counter = sc->sc_counter << 32;
1261                 sc->sc_counter += karc4random();
1262         } else {
1263                 sc->sc_counter++;
1264         }
1265
1266         ch->carp_counter[0] = htonl((sc->sc_counter >> 32) & 0xffffffff);
1267         ch->carp_counter[1] = htonl(sc->sc_counter & 0xffffffff);
1268
1269         carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
1270 }
1271
1272 static void
1273 carp_send_ad_all(void)
1274 {
1275         struct carp_softc *sc;
1276
1277         LIST_FOREACH(sc, &carpif_list, sc_next) {
1278                 if (sc->sc_carpdev == NULL)
1279                         continue;
1280
1281                 if (CARP_IS_RUNNING(&sc->sc_if) && sc->sc_state == MASTER)
1282                         carp_send_ad(sc);
1283         }
1284 }
1285
1286 static void
1287 carp_send_ad_timeout(void *xsc)
1288 {
1289         struct carp_softc *sc = xsc;
1290         struct netmsg_carp *cmsg = &sc->sc_ad_msg;
1291
1292         KASSERT(mycpuid == 0, ("%s not on cpu0 but on cpu%d",
1293             __func__, mycpuid));
1294
1295         crit_enter();
1296         if (cmsg->base.lmsg.ms_flags & MSGF_DONE)
1297                 lwkt_sendmsg_oncpu(netisr_cpuport(0), &cmsg->base.lmsg);
1298         crit_exit();
1299 }
1300
1301 static void
1302 carp_send_ad_timeout_dispatch(netmsg_t msg)
1303 {
1304         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1305         struct carp_softc *sc = cmsg->nc_softc;
1306
1307         /* Reply ASAP */
1308         crit_enter();
1309         lwkt_replymsg(&cmsg->base.lmsg, 0);
1310         crit_exit();
1311
1312         carp_send_ad(sc);
1313 }
1314
1315 static void
1316 carp_send_ad(struct carp_softc *sc)
1317 {
1318         struct ifnet *cifp = &sc->sc_if;
1319         struct carp_header ch;
1320         struct timeval tv;
1321         struct carp_header *ch_ptr;
1322         struct mbuf *m;
1323         int len, advbase, advskew;
1324
1325         if (!CARP_IS_RUNNING(cifp)) {
1326                 /* Bow out */
1327                 advbase = 255;
1328                 advskew = 255;
1329         } else {
1330                 advbase = sc->sc_advbase;
1331                 if (!carp_suppress_preempt || sc->sc_advskew > 240)
1332                         advskew = sc->sc_advskew;
1333                 else
1334                         advskew = 240;
1335                 tv.tv_sec = advbase;
1336                 tv.tv_usec = advskew * 1000000 / 256;
1337         }
1338
1339         ch.carp_version = CARP_VERSION;
1340         ch.carp_type = CARP_ADVERTISEMENT;
1341         ch.carp_vhid = sc->sc_vhid;
1342         ch.carp_advbase = advbase;
1343         ch.carp_advskew = advskew;
1344         ch.carp_authlen = 7;    /* XXX DEFINE */
1345         ch.carp_pad1 = 0;       /* must be zero */
1346         ch.carp_cksum = 0;
1347
1348 #ifdef INET
1349         if (sc->sc_ia != NULL) {
1350                 struct ip *ip;
1351
1352                 MGETHDR(m, MB_DONTWAIT, MT_HEADER);
1353                 if (m == NULL) {
1354                         IFNET_STAT_INC(cifp, oerrors, 1);
1355                         carpstats.carps_onomem++;
1356                         /* XXX maybe less ? */
1357                         if (advbase != 255 || advskew != 255)
1358                                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1359                                     carp_send_ad_timeout, sc);
1360                         return;
1361                 }
1362                 len = sizeof(*ip) + sizeof(ch);
1363                 m->m_pkthdr.len = len;
1364                 m->m_pkthdr.rcvif = NULL;
1365                 m->m_len = len;
1366                 MH_ALIGN(m, m->m_len);
1367                 m->m_flags |= M_MCAST;
1368                 if (carp_prio_ad)
1369                         m->m_flags |= M_PRIO;
1370                 ip = mtod(m, struct ip *);
1371                 ip->ip_v = IPVERSION;
1372                 ip->ip_hl = sizeof(*ip) >> 2;
1373                 ip->ip_tos = IPTOS_LOWDELAY;
1374                 ip->ip_len = len;
1375                 ip->ip_id = ip_newid();
1376                 ip->ip_off = IP_DF;
1377                 ip->ip_ttl = CARP_DFLTTL;
1378                 ip->ip_p = IPPROTO_CARP;
1379                 ip->ip_sum = 0;
1380                 ip->ip_src = sc->sc_ia->ia_addr.sin_addr;
1381                 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
1382
1383                 ch_ptr = (struct carp_header *)(&ip[1]);
1384                 bcopy(&ch, ch_ptr, sizeof(ch));
1385                 carp_prepare_ad(sc, ch_ptr);
1386                 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip));
1387
1388                 getmicrotime(&cifp->if_lastchange);
1389                 IFNET_STAT_INC(cifp, opackets, 1);
1390                 IFNET_STAT_INC(cifp, obytes, len);
1391                 carpstats.carps_opackets++;
1392
1393                 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) {
1394                         IFNET_STAT_INC(cifp, oerrors, 1);
1395                         if (sc->sc_sendad_errors < INT_MAX)
1396                                 sc->sc_sendad_errors++;
1397                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1398                                 carp_suppress_preempt++;
1399                                 if (carp_suppress_preempt == 1) {
1400                                         carp_send_ad_all();
1401                                 }
1402                         }
1403                         sc->sc_sendad_success = 0;
1404                 } else {
1405                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1406                                 if (++sc->sc_sendad_success >=
1407                                     CARP_SENDAD_MIN_SUCCESS) {
1408                                         carp_suppress_preempt--;
1409                                         sc->sc_sendad_errors = 0;
1410                                 }
1411                         } else {
1412                                 sc->sc_sendad_errors = 0;
1413                         }
1414                 }
1415         }
1416 #endif /* INET */
1417 #ifdef INET6
1418         if (sc->sc_ia6) {
1419                 struct ip6_hdr *ip6;
1420
1421                 MGETHDR(m, MB_DONTWAIT, MT_HEADER);
1422                 if (m == NULL) {
1423                         IFNET_STAT_INC(cifp, oerrors, 1);
1424                         carpstats.carps_onomem++;
1425                         /* XXX maybe less ? */
1426                         if (advbase != 255 || advskew != 255)
1427                                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1428                                     carp_send_ad_timeout, sc);
1429                         return;
1430                 }
1431                 len = sizeof(*ip6) + sizeof(ch);
1432                 m->m_pkthdr.len = len;
1433                 m->m_pkthdr.rcvif = NULL;
1434                 m->m_len = len;
1435                 MH_ALIGN(m, m->m_len);
1436                 m->m_flags |= M_MCAST;
1437                 ip6 = mtod(m, struct ip6_hdr *);
1438                 bzero(ip6, sizeof(*ip6));
1439                 ip6->ip6_vfc |= IPV6_VERSION;
1440                 ip6->ip6_hlim = CARP_DFLTTL;
1441                 ip6->ip6_nxt = IPPROTO_CARP;
1442                 bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src,
1443                     sizeof(struct in6_addr));
1444                 /* set the multicast destination */
1445
1446                 ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
1447                 ip6->ip6_dst.s6_addr8[15] = 0x12;
1448                 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
1449                         IFNET_STAT_INC(cifp, oerrors, 1);
1450                         m_freem(m);
1451                         CARP_LOG("%s: in6_setscope failed\n", __func__);
1452                         return;
1453                 }
1454
1455                 ch_ptr = (struct carp_header *)(&ip6[1]);
1456                 bcopy(&ch, ch_ptr, sizeof(ch));
1457                 carp_prepare_ad(sc, ch_ptr);
1458                 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip6));
1459
1460                 getmicrotime(&cifp->if_lastchange);
1461                 IFNET_STAT_INC(cifp, opackets, 1);
1462                 IFNET_STAT_INC(cifp, obytes, len);
1463                 carpstats.carps_opackets6++;
1464
1465                 if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) {
1466                         IFNET_STAT_INC(cifp, oerrors, 1);
1467                         if (sc->sc_sendad_errors < INT_MAX)
1468                                 sc->sc_sendad_errors++;
1469                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1470                                 carp_suppress_preempt++;
1471                                 if (carp_suppress_preempt == 1) {
1472                                         carp_send_ad_all();
1473                                 }
1474                         }
1475                         sc->sc_sendad_success = 0;
1476                 } else {
1477                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1478                                 if (++sc->sc_sendad_success >=
1479                                     CARP_SENDAD_MIN_SUCCESS) {
1480                                         carp_suppress_preempt--;
1481                                         sc->sc_sendad_errors = 0;
1482                                 }
1483                         } else {
1484                                 sc->sc_sendad_errors = 0;
1485                         }
1486                 }
1487         }
1488 #endif /* INET6 */
1489
1490         if (advbase != 255 || advskew != 255)
1491                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1492                     carp_send_ad_timeout, sc);
1493 }
1494
1495 /*
1496  * Broadcast a gratuitous ARP request containing
1497  * the virtual router MAC address for each IP address
1498  * associated with the virtual router.
1499  */
1500 static void
1501 carp_send_arp(struct carp_softc *sc)
1502 {
1503         const struct carp_vhaddr *vha;
1504
1505         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1506                 if (vha->vha_iaback == NULL)
1507                         continue;
1508                 arp_gratuitous(&sc->sc_if, &vha->vha_ia->ia_ifa);
1509         }
1510 }
1511
1512 #ifdef INET6
1513 static void
1514 carp_send_na(struct carp_softc *sc)
1515 {
1516         struct ifaddr_container *ifac;
1517         struct in6_addr *in6;
1518         static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1519
1520         TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
1521                 struct ifaddr *ifa = ifac->ifa;
1522
1523                 if (ifa->ifa_addr->sa_family != AF_INET6)
1524                         continue;
1525
1526                 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1527                 nd6_na_output(sc->sc_carpdev, &mcast, in6,
1528                     ND_NA_FLAG_OVERRIDE, 1, NULL);
1529                 DELAY(1000);    /* XXX */
1530         }
1531 }
1532 #endif /* INET6 */
1533
1534 static __inline const struct carp_vhaddr *
1535 carp_find_addr(const struct carp_softc *sc, const struct in_addr *addr)
1536 {
1537         struct carp_vhaddr *vha;
1538
1539         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1540                 if (vha->vha_iaback == NULL)
1541                         continue;
1542
1543                 if (vha->vha_ia->ia_addr.sin_addr.s_addr == addr->s_addr)
1544                         return vha;
1545         }
1546         return NULL;
1547 }
1548
1549 #ifdef notyet
1550 static int
1551 carp_iamatch_balance(const struct carp_if *cif, const struct in_addr *itaddr,
1552                      const struct in_addr *isaddr, uint8_t **enaddr)
1553 {
1554         const struct carp_softc *vh;
1555         int index, count = 0;
1556
1557         /*
1558          * XXX proof of concept implementation.
1559          * We use the source ip to decide which virtual host should
1560          * handle the request. If we're master of that virtual host,
1561          * then we respond, otherwise, just drop the arp packet on
1562          * the floor.
1563          */
1564
1565         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1566                 if (!CARP_IS_RUNNING(&vh->sc_if))
1567                         continue;
1568
1569                 if (carp_find_addr(vh, itaddr) != NULL)
1570                         count++;
1571         }
1572         if (count == 0)
1573                 return 0;
1574
1575         /* this should be a hash, like pf_hash() */
1576         index = ntohl(isaddr->s_addr) % count;
1577         count = 0;
1578
1579         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1580                 if (!CARP_IS_RUNNING(&vh->sc_if))
1581                         continue;
1582
1583                 if (carp_find_addr(vh, itaddr) == NULL)
1584                         continue;
1585
1586                 if (count == index) {
1587                         if (vh->sc_state == MASTER) {
1588                                 *enaddr = IF_LLADDR(&vh->sc_if);
1589                                 return 1;
1590                         } else {
1591                                 return 0;
1592                         }
1593                 }
1594                 count++;
1595         }
1596         return 0;
1597 }
1598 #endif
1599
1600 int
1601 carp_iamatch(const struct in_ifaddr *ia)
1602 {
1603         const struct carp_softc *sc = ia->ia_ifp->if_softc;
1604
1605         KASSERT(&curthread->td_msgport == netisr_cpuport(0),
1606             ("not in netisr0"));
1607
1608 #ifdef notyet
1609         if (carp_opts[CARPCTL_ARPBALANCE])
1610                 return carp_iamatch_balance(cif, itaddr, isaddr, enaddr);
1611 #endif
1612
1613         if (!CARP_IS_RUNNING(&sc->sc_if) || sc->sc_state != MASTER)
1614                 return 0;
1615
1616         return 1;
1617 }
1618
1619 #ifdef INET6
1620 struct ifaddr *
1621 carp_iamatch6(void *v, struct in6_addr *taddr)
1622 {
1623 #ifdef foo
1624         struct carp_if *cif = v;
1625         struct carp_softc *vh;
1626
1627         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1628                 struct ifaddr_container *ifac;
1629
1630                 TAILQ_FOREACH(ifac, &vh->sc_if.if_addrheads[mycpuid],
1631                               ifa_link) {
1632                         struct ifaddr *ifa = ifac->ifa;
1633
1634                         if (IN6_ARE_ADDR_EQUAL(taddr,
1635                             &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1636                             CARP_IS_RUNNING(&vh->sc_if) &&
1637                             vh->sc_state == MASTER) {
1638                                 return (ifa);
1639                         }
1640                 }
1641         }
1642 #endif
1643         return (NULL);
1644 }
1645
1646 void *
1647 carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr)
1648 {
1649 #ifdef foo
1650         struct m_tag *mtag;
1651         struct carp_if *cif = v;
1652         struct carp_softc *sc;
1653
1654         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
1655                 struct ifaddr_container *ifac;
1656
1657                 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid],
1658                               ifa_link) {
1659                         struct ifaddr *ifa = ifac->ifa;
1660
1661                         if (IN6_ARE_ADDR_EQUAL(taddr,
1662                             &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1663                             CARP_IS_RUNNING(&sc->sc_if)) {
1664                                 struct ifnet *ifp = &sc->sc_if;
1665
1666                                 mtag = m_tag_get(PACKET_TAG_CARP,
1667                                     sizeof(struct ifnet *), MB_DONTWAIT);
1668                                 if (mtag == NULL) {
1669                                         /* better a bit than nothing */
1670                                         return (IF_LLADDR(ifp));
1671                                 }
1672                                 bcopy(&ifp, (caddr_t)(mtag + 1),
1673                                     sizeof(struct ifnet *));
1674                                 m_tag_prepend(m, mtag);
1675
1676                                 return (IF_LLADDR(ifp));
1677                         }
1678                 }
1679         }
1680 #endif
1681         return (NULL);
1682 }
1683 #endif
1684
1685 static struct ifnet *
1686 carp_forus(struct carp_if *cif, const uint8_t *dhost)
1687 {
1688         struct carp_softc_container *scc;
1689
1690         if (memcmp(dhost, carp_etheraddr, ETHER_ADDR_LEN - 1) != 0)
1691                 return NULL;
1692
1693         TAILQ_FOREACH(scc, cif, scc_link) {
1694                 struct carp_softc *sc = scc->scc_softc;
1695                 struct ifnet *ifp = &sc->sc_if;
1696
1697                 if (CARP_IS_RUNNING(ifp) && sc->sc_state == MASTER &&
1698                     !bcmp(dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN))
1699                         return ifp;
1700         }
1701         return NULL;
1702 }
1703
1704 static void
1705 carp_master_down_timeout(void *xsc)
1706 {
1707         struct carp_softc *sc = xsc;
1708         struct netmsg_carp *cmsg = &sc->sc_md_msg;
1709
1710         KASSERT(mycpuid == 0, ("%s not on cpu0 but on cpu%d",
1711             __func__, mycpuid));
1712
1713         crit_enter();
1714         if (cmsg->base.lmsg.ms_flags & MSGF_DONE)
1715                 lwkt_sendmsg_oncpu(netisr_cpuport(0), &cmsg->base.lmsg);
1716         crit_exit();
1717 }
1718
1719 static void
1720 carp_master_down_timeout_dispatch(netmsg_t msg)
1721 {
1722         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1723         struct carp_softc *sc = cmsg->nc_softc;
1724
1725         /* Reply ASAP */
1726         crit_enter();
1727         lwkt_replymsg(&cmsg->base.lmsg, 0);
1728         crit_exit();
1729
1730         CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n",
1731                    sc->sc_if.if_xname);
1732         carp_master_down(sc);
1733 }
1734
1735 static void
1736 carp_master_down(struct carp_softc *sc)
1737 {
1738         switch (sc->sc_state) {
1739         case INIT:
1740                 kprintf("%s: master_down event in INIT state\n",
1741                         sc->sc_if.if_xname);
1742                 break;
1743
1744         case MASTER:
1745                 break;
1746
1747         case BACKUP:
1748                 carp_set_state(sc, MASTER);
1749                 carp_send_ad(sc);
1750                 carp_send_arp(sc);
1751 #ifdef INET6
1752                 carp_send_na(sc);
1753 #endif /* INET6 */
1754                 carp_setrun(sc, 0);
1755                 carp_setroute(sc, RTM_ADD);
1756                 break;
1757         }
1758 }
1759
1760 /*
1761  * When in backup state, af indicates whether to reset the master down timer
1762  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1763  */
1764 static void
1765 carp_setrun(struct carp_softc *sc, sa_family_t af)
1766 {
1767         struct ifnet *cifp = &sc->sc_if;
1768         struct timeval tv;
1769
1770         if (sc->sc_carpdev == NULL) {
1771                 carp_set_state(sc, INIT);
1772                 return;
1773         }
1774
1775         if ((cifp->if_flags & IFF_RUNNING) && sc->sc_vhid > 0 &&
1776             (sc->sc_naddrs || sc->sc_naddrs6)) {
1777                 /* Nothing */
1778         } else {
1779                 carp_setroute(sc, RTM_DELETE);
1780                 return;
1781         }
1782
1783         switch (sc->sc_state) {
1784         case INIT:
1785                 if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) {
1786                         carp_send_ad(sc);
1787                         carp_send_arp(sc);
1788 #ifdef INET6
1789                         carp_send_na(sc);
1790 #endif /* INET6 */
1791                         CARP_DEBUG("%s: INIT -> MASTER (preempting)\n",
1792                                    cifp->if_xname);
1793                         carp_set_state(sc, MASTER);
1794                         carp_setroute(sc, RTM_ADD);
1795                 } else {
1796                         CARP_DEBUG("%s: INIT -> BACKUP\n", cifp->if_xname);
1797                         carp_set_state(sc, BACKUP);
1798                         carp_setroute(sc, RTM_DELETE);
1799                         carp_setrun(sc, 0);
1800                 }
1801                 break;
1802
1803         case BACKUP:
1804                 callout_stop(&sc->sc_ad_tmo);
1805                 tv.tv_sec = 3 * sc->sc_advbase;
1806                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1807                 switch (af) {
1808 #ifdef INET
1809                 case AF_INET:
1810                         callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1811                             carp_master_down_timeout, sc);
1812                         break;
1813 #endif /* INET */
1814 #ifdef INET6
1815                 case AF_INET6:
1816                         callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1817                             carp_master_down_timeout, sc);
1818                         break;
1819 #endif /* INET6 */
1820                 default:
1821                         if (sc->sc_naddrs)
1822                                 callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1823                                     carp_master_down_timeout, sc);
1824                         if (sc->sc_naddrs6)
1825                                 callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1826                                     carp_master_down_timeout, sc);
1827                         break;
1828                 }
1829                 break;
1830
1831         case MASTER:
1832                 tv.tv_sec = sc->sc_advbase;
1833                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1834                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1835                     carp_send_ad_timeout, sc);
1836                 break;
1837         }
1838 }
1839
1840 static void
1841 carp_multicast_cleanup(struct carp_softc *sc)
1842 {
1843         struct ip_moptions *imo = &sc->sc_imo;
1844
1845         if (imo->imo_num_memberships == 0)
1846                 return;
1847         KKASSERT(imo->imo_num_memberships == 1);
1848
1849         in_delmulti(imo->imo_membership[0]);
1850         imo->imo_membership[0] = NULL;
1851         imo->imo_num_memberships = 0;
1852         imo->imo_multicast_ifp = NULL;
1853 }
1854
1855 #ifdef INET6
1856 static void
1857 carp_multicast6_cleanup(struct carp_softc *sc)
1858 {
1859         struct ip6_moptions *im6o = &sc->sc_im6o;
1860
1861         while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1862                 struct in6_multi_mship *imm =
1863                     LIST_FIRST(&im6o->im6o_memberships);
1864
1865                 LIST_REMOVE(imm, i6mm_chain);
1866                 in6_leavegroup(imm);
1867         }
1868         im6o->im6o_multicast_ifp = NULL;
1869 }
1870 #endif
1871
1872 static void
1873 carp_ioctl_getvhaddr_dispatch(netmsg_t msg)
1874 {
1875         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1876         struct carp_softc *sc = cmsg->nc_softc;
1877         const struct carp_vhaddr *vha;
1878         struct ifcarpvhaddr *carpa, *carpa0;
1879         int count, len, error = 0;
1880
1881         count = 0;
1882         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
1883                 ++count;
1884
1885         if (cmsg->nc_datalen == 0) {
1886                 cmsg->nc_datalen = count * sizeof(*carpa);
1887                 goto back;
1888         } else if (count == 0 || cmsg->nc_datalen < sizeof(*carpa)) {
1889                 cmsg->nc_datalen = 0;
1890                 goto back;
1891         }
1892         len = min(cmsg->nc_datalen, sizeof(*carpa) * count);
1893         KKASSERT(len >= sizeof(*carpa));
1894
1895         carpa0 = carpa = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO);
1896         if (carpa == NULL) {
1897                 error = ENOMEM; 
1898                 goto back;
1899         }
1900
1901         count = 0;
1902         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1903                 if (len < sizeof(*carpa))
1904                         break;
1905
1906                 carpa->carpa_flags = vha->vha_flags;
1907                 carpa->carpa_addr.sin_family = AF_INET;
1908                 carpa->carpa_addr.sin_addr = vha->vha_ia->ia_addr.sin_addr;
1909
1910                 carpa->carpa_baddr.sin_family = AF_INET;
1911                 if (vha->vha_iaback == NULL) {
1912                         carpa->carpa_baddr.sin_addr.s_addr = INADDR_ANY;
1913                 } else {
1914                         carpa->carpa_baddr.sin_addr =
1915                         vha->vha_iaback->ia_addr.sin_addr;
1916                 }
1917
1918                 ++carpa;
1919                 ++count;
1920                 len -= sizeof(*carpa);
1921         }
1922         cmsg->nc_datalen = sizeof(*carpa) * count;
1923         KKASSERT(cmsg->nc_datalen > 0);
1924
1925         cmsg->nc_data = carpa0;
1926
1927 back:
1928         lwkt_replymsg(&cmsg->base.lmsg, error);
1929 }
1930
1931 static int
1932 carp_ioctl_getvhaddr(struct carp_softc *sc, struct ifdrv *ifd)
1933 {
1934         struct ifnet *ifp = &sc->arpcom.ac_if;
1935         struct netmsg_carp cmsg;
1936         int error;
1937
1938         ASSERT_IFNET_SERIALIZED_ALL(ifp);
1939         ifnet_deserialize_all(ifp);
1940
1941         bzero(&cmsg, sizeof(cmsg));
1942         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
1943             carp_ioctl_getvhaddr_dispatch);
1944         cmsg.nc_softc = sc;
1945         cmsg.nc_datalen = ifd->ifd_len;
1946
1947         error = lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
1948
1949         if (!error) {
1950                 if (cmsg.nc_data != NULL) {
1951                         error = copyout(cmsg.nc_data, ifd->ifd_data,
1952                             cmsg.nc_datalen);
1953                         kfree(cmsg.nc_data, M_TEMP);
1954                 }
1955                 ifd->ifd_len = cmsg.nc_datalen;
1956         } else {
1957                 KASSERT(cmsg.nc_data == NULL,
1958                     ("%s temp vhaddr is alloc upon error", __func__));
1959         }
1960
1961         ifnet_serialize_all(ifp);
1962         return error;
1963 }
1964
1965 static int
1966 carp_config_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
1967     struct in_ifaddr *ia_del)
1968 {
1969         struct ifnet *ifp;
1970         struct in_ifaddr *ia_if;
1971         const struct in_ifaddr *ia_vha;
1972         struct in_ifaddr_container *iac;
1973         int own, ia_match_carpdev;
1974
1975         KKASSERT(vha->vha_ia != NULL);
1976         ia_vha = vha->vha_ia;
1977
1978         ia_if = NULL;
1979         own = 0;
1980         ia_match_carpdev = 0;
1981         TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
1982                 struct in_ifaddr *ia = iac->ia;
1983
1984                 if (ia == ia_del)
1985                         continue;
1986
1987                 if (ia->ia_ifp->if_type == IFT_CARP)
1988                         continue;
1989
1990                 if ((ia->ia_ifp->if_flags & IFF_UP) == 0)
1991                         continue;
1992
1993                 /* and, yeah, we need a multicast-capable iface too */
1994                 if ((ia->ia_ifp->if_flags & IFF_MULTICAST) == 0)
1995                         continue;
1996
1997                 if (ia_vha->ia_subnetmask == ia->ia_subnetmask &&
1998                     ia_vha->ia_subnet == ia->ia_subnet) {
1999                         if (ia_vha->ia_addr.sin_addr.s_addr ==
2000                             ia->ia_addr.sin_addr.s_addr)
2001                                 own = 1;
2002                         if (ia_if == NULL) {
2003                                 ia_if = ia;
2004                         } else if (sc->sc_carpdev != NULL &&
2005                             sc->sc_carpdev == ia->ia_ifp) {
2006                                 ia_if = ia;
2007                                 if (ia_if->ia_flags & IFA_ROUTE) {
2008                                         /*
2009                                          * Address with prefix route
2010                                          * is prefered
2011                                          */
2012                                         break;
2013                                 }
2014                                 ia_match_carpdev = 1;
2015                         } else if (!ia_match_carpdev) {
2016                                 if (ia->ia_flags & IFA_ROUTE) {
2017                                         /*
2018                                          * Address with prefix route
2019                                          * is prefered over others.
2020                                          */
2021                                         ia_if = ia;
2022                                 }
2023                         }
2024                 }
2025         }
2026
2027         carp_deactivate_vhaddr(sc, vha, FALSE);
2028         if (!ia_if)
2029                 return ENOENT;
2030
2031         ifp = ia_if->ia_ifp;
2032
2033         /* XXX Don't allow parent iface to be changed */
2034         if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp)
2035                 return EEXIST;
2036
2037         return carp_activate_vhaddr(sc, vha, ifp, ia_if, own);
2038 }
2039
2040 static void
2041 carp_add_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
2042 {
2043         struct carp_vhaddr *vha_new;
2044         struct in_ifaddr *carp_ia;
2045 #ifdef INVARIANTS
2046         struct carp_vhaddr *vha;
2047 #endif
2048
2049         KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
2050         carp_ia = ifatoia(carp_ifa);
2051
2052 #ifdef INVARIANTS
2053         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
2054                 KKASSERT(vha->vha_ia != NULL && vha->vha_ia != carp_ia);
2055 #endif
2056
2057         vha_new = kmalloc(sizeof(*vha_new), M_CARP, M_WAITOK | M_ZERO);
2058         vha_new->vha_ia = carp_ia;
2059         carp_insert_vhaddr(sc, vha_new);
2060
2061         if (carp_config_vhaddr(sc, vha_new, NULL) != 0) {
2062                 /*
2063                  * If the above configuration fails, it may only mean
2064                  * that the new address is problematic.  However, the
2065                  * carp(4) interface may already have several working
2066                  * addresses.  Since the expected behaviour of
2067                  * SIOC[AS]IFADDR is to put the NIC into working state,
2068                  * we try starting the state machine manually here with
2069                  * the hope that the carp(4)'s previously working
2070                  * addresses still could be brought up.
2071                  */
2072                 carp_hmac_prepare(sc);
2073                 carp_set_state(sc, INIT);
2074                 carp_setrun(sc, 0);
2075         }
2076 }
2077
2078 static void
2079 carp_del_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
2080 {
2081         struct carp_vhaddr *vha;
2082         struct in_ifaddr *carp_ia;
2083
2084         KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
2085         carp_ia = ifatoia(carp_ifa);
2086
2087         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
2088                 KKASSERT(vha->vha_ia != NULL);
2089                 if (vha->vha_ia == carp_ia)
2090                         break;
2091         }
2092         KASSERT(vha != NULL, ("no corresponding vhaddr %p", carp_ifa));
2093
2094         /*
2095          * Remove the vhaddr from the list before deactivating
2096          * the vhaddr, so that the HMAC could be correctly
2097          * updated in carp_deactivate_vhaddr()
2098          */
2099         carp_remove_vhaddr(sc, vha);
2100
2101         carp_deactivate_vhaddr(sc, vha, FALSE);
2102         kfree(vha, M_CARP);
2103 }
2104
2105 static void
2106 carp_config_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
2107 {
2108         struct carp_vhaddr *vha;
2109         struct in_ifaddr *carp_ia;
2110
2111         KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
2112         carp_ia = ifatoia(carp_ifa);
2113
2114         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
2115                 KKASSERT(vha->vha_ia != NULL);
2116                 if (vha->vha_ia == carp_ia)
2117                         break;
2118         }
2119         KASSERT(vha != NULL, ("no corresponding vhaddr %p", carp_ifa));
2120
2121         /* Remove then reinsert, to keep the vhaddr list sorted */
2122         carp_remove_vhaddr(sc, vha);
2123         carp_insert_vhaddr(sc, vha);
2124
2125         if (carp_config_vhaddr(sc, vha, NULL) != 0) {
2126                 /* See the comment in carp_add_addr() */
2127                 carp_hmac_prepare(sc);
2128                 carp_set_state(sc, INIT);
2129                 carp_setrun(sc, 0);
2130         }
2131 }
2132
2133 #ifdef notyet
2134
2135 #ifdef INET6
2136 static int
2137 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
2138 {
2139         struct ifnet *ifp;
2140         struct carp_if *cif;
2141         struct in6_ifaddr *ia, *ia_if;
2142         struct ip6_moptions *im6o = &sc->sc_im6o;
2143         struct in6_multi_mship *imm;
2144         struct in6_addr in6;
2145         int own, error;
2146
2147         if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
2148                 carp_setrun(sc, 0);
2149                 return (0);
2150         }
2151
2152         /* we have to do it by hands to check we won't match on us */
2153         ia_if = NULL; own = 0;
2154         for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
2155                 int i;
2156
2157                 for (i = 0; i < 4; i++) {
2158                         if ((sin6->sin6_addr.s6_addr32[i] &
2159                             ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
2160                             (ia->ia_addr.sin6_addr.s6_addr32[i] &
2161                             ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
2162                                 break;
2163                 }
2164                 /* and, yeah, we need a multicast-capable iface too */
2165                 if (ia->ia_ifp != &sc->sc_if &&
2166                     (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
2167                     (i == 4)) {
2168                         if (!ia_if)
2169                                 ia_if = ia;
2170                         if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
2171                             &ia->ia_addr.sin6_addr))
2172                                 own++;
2173                 }
2174         }
2175
2176         if (!ia_if)
2177                 return (EADDRNOTAVAIL);
2178         ia = ia_if;
2179         ifp = ia->ia_ifp;
2180
2181         if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
2182             (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp))
2183                 return (EADDRNOTAVAIL);
2184
2185         if (!sc->sc_naddrs6) {
2186                 im6o->im6o_multicast_ifp = ifp;
2187
2188                 /* join CARP multicast address */
2189                 bzero(&in6, sizeof(in6));
2190                 in6.s6_addr16[0] = htons(0xff02);
2191                 in6.s6_addr8[15] = 0x12;
2192                 if (in6_setscope(&in6, ifp, NULL) != 0)
2193                         goto cleanup;
2194                 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
2195                         goto cleanup;
2196                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2197
2198                 /* join solicited multicast address */
2199                 bzero(&in6, sizeof(in6));
2200                 in6.s6_addr16[0] = htons(0xff02);
2201                 in6.s6_addr32[1] = 0;
2202                 in6.s6_addr32[2] = htonl(1);
2203                 in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3];
2204                 in6.s6_addr8[12] = 0xff;
2205                 if (in6_setscope(&in6, ifp, NULL) != 0)
2206                         goto cleanup;
2207                 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
2208                         goto cleanup;
2209                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2210         }
2211
2212 #ifdef foo
2213         if (!ifp->if_carp) {
2214                 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
2215
2216                 if ((error = ifpromisc(ifp, 1))) {
2217                         kfree(cif, M_CARP);
2218                         goto cleanup;
2219                 }
2220
2221                 TAILQ_INIT(&cif->vhif_vrs);
2222                 ifp->if_carp = cif;
2223         } else {
2224                 struct carp_softc *vr;
2225
2226                 cif = ifp->if_carp;
2227                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
2228                         if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
2229                                 error = EINVAL;
2230                                 goto cleanup;
2231                         }
2232                 }
2233         }
2234 #endif
2235         sc->sc_ia6 = ia;
2236         sc->sc_carpdev = ifp;
2237
2238 #ifdef foo
2239         { /* XXX prevent endless loop if already in queue */
2240         struct carp_softc *vr, *after = NULL;
2241         int myself = 0;
2242         cif = ifp->if_carp;
2243
2244         TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
2245                 if (vr == sc)
2246                         myself = 1;
2247                 if (vr->sc_vhid < sc->sc_vhid)
2248                         after = vr;
2249         }
2250
2251         if (!myself) {
2252                 /* We're trying to keep things in order */
2253                 if (after == NULL)
2254                         TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
2255                 else
2256                         TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
2257         }
2258         }
2259 #endif
2260
2261         sc->sc_naddrs6++;
2262         if (own)
2263                 sc->sc_advskew = 0;
2264         carp_sc_state(sc);
2265         carp_setrun(sc, 0);
2266
2267         return (0);
2268
2269 cleanup:
2270         /* clean up multicast memberships */
2271         if (!sc->sc_naddrs6) {
2272                 while (!LIST_EMPTY(&im6o->im6o_memberships)) {
2273                         imm = LIST_FIRST(&im6o->im6o_memberships);
2274                         LIST_REMOVE(imm, i6mm_chain);
2275                         in6_leavegroup(imm);
2276                 }
2277         }
2278         return (error);
2279 }
2280
2281 static int
2282 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
2283 {
2284         int error = 0;
2285
2286         if (!--sc->sc_naddrs6) {
2287                 struct carp_if *cif = sc->sc_carpdev->if_carp;
2288                 struct ip6_moptions *im6o = &sc->sc_im6o;
2289
2290                 callout_stop(&sc->sc_ad_tmo);
2291                 sc->sc_vhid = -1;
2292                 while (!LIST_EMPTY(&im6o->im6o_memberships)) {
2293                         struct in6_multi_mship *imm =
2294                             LIST_FIRST(&im6o->im6o_memberships);
2295
2296                         LIST_REMOVE(imm, i6mm_chain);
2297                         in6_leavegroup(imm);
2298                 }
2299                 im6o->im6o_multicast_ifp = NULL;
2300 #ifdef foo
2301                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
2302                 if (TAILQ_EMPTY(&cif->vhif_vrs)) {
2303                         sc->sc_carpdev->if_carp = NULL;
2304                         kfree(cif, M_IFADDR);
2305                 }
2306 #endif
2307         }
2308         return (error);
2309 }
2310 #endif /* INET6 */
2311
2312 #endif
2313
2314 static int
2315 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr)
2316 {
2317         struct carp_softc *sc = ifp->if_softc;
2318         struct ifreq *ifr = (struct ifreq *)addr;
2319         struct ifdrv *ifd = (struct ifdrv *)addr;
2320         int error = 0;
2321
2322         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2323
2324         switch (cmd) {
2325         case SIOCSIFFLAGS:
2326                 if (ifp->if_flags & IFF_UP) {
2327                         if ((ifp->if_flags & IFF_RUNNING) == 0)
2328                                 carp_init(sc);
2329                 } else if (ifp->if_flags & IFF_RUNNING) {
2330                         carp_ioctl_stop(sc);
2331                 }
2332                 break;
2333
2334         case SIOCSIFCAP:
2335                 carp_ioctl_ifcap(sc, ifr->ifr_reqcap);
2336                 break;
2337
2338         case SIOCSVH:
2339                 error = carp_ioctl_setvh(sc, ifr->ifr_data, cr);
2340                 break;
2341
2342         case SIOCGVH:
2343                 error = carp_ioctl_getvh(sc, ifr->ifr_data, cr);
2344                 break;
2345
2346         case SIOCGDRVSPEC:
2347                 switch (ifd->ifd_cmd) {
2348                 case CARPGDEVNAME:
2349                         error = carp_ioctl_getdevname(sc, ifd);
2350                         break;
2351
2352                 case CARPGVHADDR:
2353                         error = carp_ioctl_getvhaddr(sc, ifd);
2354                         break;
2355
2356                 default:
2357                         error = EINVAL;
2358                         break;
2359                 }
2360                 break;
2361
2362         default:
2363                 error = ether_ioctl(ifp, cmd, addr);
2364                 break;
2365         }
2366
2367         return error;
2368 }
2369
2370 static void
2371 carp_ioctl_stop_dispatch(netmsg_t msg)
2372 {
2373         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2374         struct carp_softc *sc = cmsg->nc_softc;
2375
2376         carp_stop(sc, FALSE);
2377         lwkt_replymsg(&cmsg->base.lmsg, 0);
2378 }
2379
2380 static void
2381 carp_ioctl_stop(struct carp_softc *sc)
2382 {
2383         struct ifnet *ifp = &sc->arpcom.ac_if;
2384         struct netmsg_carp cmsg;
2385
2386         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2387
2388         ifnet_deserialize_all(ifp);
2389
2390         bzero(&cmsg, sizeof(cmsg));
2391         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2392             carp_ioctl_stop_dispatch);
2393         cmsg.nc_softc = sc;
2394
2395         lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2396
2397         ifnet_serialize_all(ifp);
2398 }
2399
2400 static void
2401 carp_ioctl_setvh_dispatch(netmsg_t msg)
2402 {
2403         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2404         struct carp_softc *sc = cmsg->nc_softc;
2405         struct ifnet *ifp = &sc->arpcom.ac_if;
2406         const struct carpreq *carpr = cmsg->nc_data;
2407         int error;
2408
2409         error = 1;
2410         if ((ifp->if_flags & IFF_RUNNING) &&
2411             sc->sc_state != INIT && carpr->carpr_state != sc->sc_state) {
2412                 switch (carpr->carpr_state) {
2413                 case BACKUP:
2414                         callout_stop(&sc->sc_ad_tmo);
2415                         carp_set_state(sc, BACKUP);
2416                         carp_setrun(sc, 0);
2417                         carp_setroute(sc, RTM_DELETE);
2418                         break;
2419
2420                 case MASTER:
2421                         carp_master_down(sc);
2422                         break;
2423
2424                 default:
2425                         break;
2426                 }
2427         }
2428         if (carpr->carpr_vhid > 0) {
2429                 if (carpr->carpr_vhid > 255) {
2430                         error = EINVAL;
2431                         goto back;
2432                 }
2433                 if (sc->sc_carpdev) {
2434                         struct carp_if *cif = sc->sc_carpdev->if_carp;
2435                         struct carp_softc_container *scc;
2436
2437                         TAILQ_FOREACH(scc, cif, scc_link) {
2438                                 struct carp_softc *vr = scc->scc_softc;
2439
2440                                 if (vr != sc &&
2441                                     vr->sc_vhid == carpr->carpr_vhid) {
2442                                         error = EEXIST;
2443                                         goto back;
2444                                 }
2445                         }
2446                 }
2447                 sc->sc_vhid = carpr->carpr_vhid;
2448
2449                 IF_LLADDR(ifp)[5] = sc->sc_vhid;
2450                 bcopy(IF_LLADDR(ifp), sc->arpcom.ac_enaddr,
2451                     ETHER_ADDR_LEN);
2452
2453                 error--;
2454         }
2455         if (carpr->carpr_advbase > 0 || carpr->carpr_advskew > 0) {
2456                 if (carpr->carpr_advskew >= 255) {
2457                         error = EINVAL;
2458                         goto back;
2459                 }
2460                 if (carpr->carpr_advbase > 255) {
2461                         error = EINVAL;
2462                         goto back;
2463                 }
2464                 sc->sc_advbase = carpr->carpr_advbase;
2465                 sc->sc_advskew = carpr->carpr_advskew;
2466                 error--;
2467         }
2468         bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key));
2469         if (error > 0) {
2470                 error = EINVAL;
2471         } else {
2472                 error = 0;
2473                 carp_setrun(sc, 0);
2474         }
2475 back:
2476         carp_hmac_prepare(sc);
2477
2478         lwkt_replymsg(&cmsg->base.lmsg, error);
2479 }
2480
2481 static int
2482 carp_ioctl_setvh(struct carp_softc *sc, void *udata, struct ucred *cr)
2483 {
2484         struct ifnet *ifp = &sc->arpcom.ac_if;
2485         struct netmsg_carp cmsg;
2486         struct carpreq carpr;
2487         int error;
2488
2489         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2490         ifnet_deserialize_all(ifp);
2491
2492         error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY);
2493         if (error)
2494                 goto back;
2495
2496         error = copyin(udata, &carpr, sizeof(carpr));
2497         if (error)
2498                 goto back;
2499
2500         bzero(&cmsg, sizeof(cmsg));
2501         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2502             carp_ioctl_setvh_dispatch);
2503         cmsg.nc_softc = sc;
2504         cmsg.nc_data = &carpr;
2505
2506         error = lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2507
2508 back:
2509         ifnet_serialize_all(ifp);
2510         return error;
2511 }
2512
2513 static void
2514 carp_ioctl_ifcap_dispatch(netmsg_t msg)
2515 {
2516         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2517         struct carp_softc *sc = cmsg->nc_softc;
2518         struct ifnet *ifp = &sc->arpcom.ac_if;
2519         int reqcap = *((const int *)(cmsg->nc_data));
2520         int mask;
2521
2522         mask = reqcap ^ ifp->if_capenable;
2523         if (mask & IFCAP_TXCSUM) {
2524                 ifp->if_capenable ^= IFCAP_TXCSUM;
2525                 if ((ifp->if_capenable & IFCAP_TXCSUM) &&
2526                     sc->sc_carpdev != NULL) {
2527                         ifp->if_hwassist |=
2528                             (sc->sc_carpdev->if_hwassist &
2529                              (CSUM_IP | CSUM_UDP | CSUM_TCP));
2530                 } else {
2531                         ifp->if_hwassist &= ~(CSUM_IP | CSUM_UDP | CSUM_TCP);
2532                 }
2533         }
2534         if (mask & IFCAP_TSO) {
2535                 ifp->if_capenable ^= IFCAP_TSO;
2536                 if ((ifp->if_capenable & IFCAP_TSO) &&
2537                     sc->sc_carpdev != NULL) {
2538                         ifp->if_hwassist |=
2539                             (sc->sc_carpdev->if_hwassist & CSUM_TSO);
2540                 } else {
2541                         ifp->if_hwassist &= ~CSUM_TSO;
2542                 }
2543         }
2544
2545         lwkt_replymsg(&cmsg->base.lmsg, 0);
2546 }
2547
2548 static void
2549 carp_ioctl_ifcap(struct carp_softc *sc, int reqcap)
2550 {
2551         struct ifnet *ifp = &sc->arpcom.ac_if;
2552         struct netmsg_carp cmsg;
2553
2554         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2555         ifnet_deserialize_all(ifp);
2556
2557         bzero(&cmsg, sizeof(cmsg));
2558         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2559             carp_ioctl_ifcap_dispatch);
2560         cmsg.nc_softc = sc;
2561         cmsg.nc_data = &reqcap;
2562
2563         lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2564
2565         ifnet_serialize_all(ifp);
2566 }
2567
2568 static void
2569 carp_ioctl_getvh_dispatch(netmsg_t msg)
2570 {
2571         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2572         struct carp_softc *sc = cmsg->nc_softc;
2573         struct carpreq *carpr = cmsg->nc_data;
2574
2575         carpr->carpr_state = sc->sc_state;
2576         carpr->carpr_vhid = sc->sc_vhid;
2577         carpr->carpr_advbase = sc->sc_advbase;
2578         carpr->carpr_advskew = sc->sc_advskew;
2579         bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key));
2580
2581         lwkt_replymsg(&cmsg->base.lmsg, 0);
2582 }
2583
2584 static int
2585 carp_ioctl_getvh(struct carp_softc *sc, void *udata, struct ucred *cr)
2586 {
2587         struct ifnet *ifp = &sc->arpcom.ac_if;
2588         struct netmsg_carp cmsg;
2589         struct carpreq carpr;
2590         int error;
2591
2592         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2593         ifnet_deserialize_all(ifp);
2594
2595         bzero(&cmsg, sizeof(cmsg));
2596         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2597             carp_ioctl_getvh_dispatch);
2598         cmsg.nc_softc = sc;
2599         cmsg.nc_data = &carpr;
2600
2601         lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2602
2603         error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY);
2604         if (error)
2605                 bzero(carpr.carpr_key, sizeof(carpr.carpr_key));
2606
2607         error = copyout(&carpr, udata, sizeof(carpr));
2608
2609         ifnet_serialize_all(ifp);
2610         return error;
2611 }
2612
2613 static void
2614 carp_ioctl_getdevname_dispatch(netmsg_t msg)
2615 {
2616         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2617         struct carp_softc *sc = cmsg->nc_softc;
2618         char *devname = cmsg->nc_data;
2619
2620         bzero(devname, IFNAMSIZ);
2621         if (sc->sc_carpdev != NULL)
2622                 strlcpy(devname, sc->sc_carpdev->if_xname, IFNAMSIZ);
2623
2624         lwkt_replymsg(&cmsg->base.lmsg, 0);
2625 }
2626
2627 static int
2628 carp_ioctl_getdevname(struct carp_softc *sc, struct ifdrv *ifd)
2629 {
2630         struct ifnet *ifp = &sc->arpcom.ac_if;
2631         struct netmsg_carp cmsg;
2632         char devname[IFNAMSIZ];
2633         int error;
2634
2635         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2636
2637         if (ifd->ifd_len != sizeof(devname))
2638                 return EINVAL;
2639
2640         ifnet_deserialize_all(ifp);
2641
2642         bzero(&cmsg, sizeof(cmsg));
2643         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2644             carp_ioctl_getdevname_dispatch);
2645         cmsg.nc_softc = sc;
2646         cmsg.nc_data = devname;
2647
2648         lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2649
2650         error = copyout(devname, ifd->ifd_data, sizeof(devname));
2651
2652         ifnet_serialize_all(ifp);
2653         return error;
2654 }
2655
2656 static void
2657 carp_init_dispatch(netmsg_t msg)
2658 {
2659         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2660         struct carp_softc *sc = cmsg->nc_softc;
2661
2662         sc->sc_if.if_flags |= IFF_RUNNING;
2663         carp_hmac_prepare(sc);
2664         carp_set_state(sc, INIT);
2665         carp_setrun(sc, 0);
2666
2667         lwkt_replymsg(&cmsg->base.lmsg, 0);
2668 }
2669
2670 static void
2671 carp_init(void *xsc)
2672 {
2673         struct carp_softc *sc = xsc;
2674         struct ifnet *ifp = &sc->arpcom.ac_if;
2675         struct netmsg_carp cmsg;
2676
2677         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2678
2679         ifnet_deserialize_all(ifp);
2680
2681         bzero(&cmsg, sizeof(cmsg));
2682         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2683             carp_init_dispatch);
2684         cmsg.nc_softc = sc;
2685
2686         lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2687
2688         ifnet_serialize_all(ifp);
2689 }
2690
2691 static int
2692 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
2693     struct rtentry *rt)
2694 {
2695         struct carp_softc *sc = ifp->if_softc;
2696         struct ifnet *carpdev;
2697         int error = 0;
2698
2699         carpdev = sc->sc_carpdev;
2700         if (carpdev != NULL) {
2701                 if (m->m_flags & M_MCAST)
2702                         IFNET_STAT_INC(ifp, omcasts, 1);
2703                 IFNET_STAT_INC(ifp, obytes, m->m_pkthdr.len + ETHER_HDR_LEN);
2704                 IFNET_STAT_INC(ifp, opackets, 1);
2705
2706                 /*
2707                  * NOTE:
2708                  * CARP's ifp is passed to backing device's
2709                  * if_output method.
2710                  */
2711                 carpdev->if_output(ifp, m, dst, rt);
2712         } else {
2713                 IFNET_STAT_INC(ifp, oerrors, 1);
2714                 m_freem(m);
2715                 error = ENETUNREACH;
2716         }
2717         return error;
2718 }
2719
2720 /*
2721  * Start output on carp interface. This function should never be called.
2722  */
2723 static void
2724 carp_start(struct ifnet *ifp, struct ifaltq_subque *ifsq __unused)
2725 {
2726         panic("%s: start called", ifp->if_xname);
2727 }
2728
2729 static void
2730 carp_set_state(struct carp_softc *sc, int state)
2731 {
2732         struct ifnet *cifp = &sc->sc_if;
2733
2734         if (sc->sc_state == state)
2735                 return;
2736         sc->sc_state = state;
2737
2738         switch (sc->sc_state) {
2739         case BACKUP:
2740                 cifp->if_link_state = LINK_STATE_DOWN;
2741                 break;
2742
2743         case MASTER:
2744                 cifp->if_link_state = LINK_STATE_UP;
2745                 break;
2746
2747         default:
2748                 cifp->if_link_state = LINK_STATE_UNKNOWN;
2749                 break;
2750         }
2751         rt_ifmsg(cifp);
2752 }
2753
2754 void
2755 carp_group_demote_adj(struct ifnet *ifp, int adj)
2756 {
2757         struct ifg_list *ifgl;
2758         int *dm;
2759
2760         TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2761                 if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL))
2762                         continue;
2763                 dm = &ifgl->ifgl_group->ifg_carp_demoted;
2764
2765                 if (*dm + adj >= 0)
2766                         *dm += adj;
2767                 else
2768                         *dm = 0;
2769
2770                 if (adj > 0 && *dm == 1)
2771                         carp_send_ad_all();
2772                 CARP_LOG("%s demoted group %s to %d", ifp->if_xname,
2773                     ifgl->ifgl_group->ifg_group, *dm);
2774         }
2775 }
2776
2777 #ifdef foo
2778 void
2779 carp_carpdev_state(void *v)
2780 {
2781         struct carp_if *cif = v;
2782         struct carp_softc *sc;
2783
2784         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list)
2785                 carp_sc_state(sc);
2786 }
2787
2788 static void
2789 carp_sc_state(struct carp_softc *sc)
2790 {
2791         if (!(sc->sc_carpdev->if_flags & IFF_UP)) {
2792                 callout_stop(&sc->sc_ad_tmo);
2793                 callout_stop(&sc->sc_md_tmo);
2794                 callout_stop(&sc->sc_md6_tmo);
2795                 carp_set_state(sc, INIT);
2796                 carp_setrun(sc, 0);
2797                 if (!sc->sc_suppress) {
2798                         carp_suppress_preempt++;
2799                         if (carp_suppress_preempt == 1)
2800                                 carp_send_ad_all();
2801                 }
2802                 sc->sc_suppress = 1;
2803         } else {
2804                 carp_set_state(sc, INIT);
2805                 carp_setrun(sc, 0);
2806                 if (sc->sc_suppress)
2807                         carp_suppress_preempt--;
2808                 sc->sc_suppress = 0;
2809         }
2810 }
2811 #endif
2812
2813 static void
2814 carp_stop(struct carp_softc *sc, boolean_t detach)
2815 {
2816         sc->sc_if.if_flags &= ~IFF_RUNNING;
2817
2818         callout_stop(&sc->sc_ad_tmo);
2819         callout_stop(&sc->sc_md_tmo);
2820         callout_stop(&sc->sc_md6_tmo);
2821
2822         if (!detach && sc->sc_state == MASTER)
2823                 carp_send_ad(sc);
2824
2825         if (sc->sc_suppress)
2826                 carp_suppress_preempt--;
2827         sc->sc_suppress = 0;
2828
2829         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
2830                 carp_suppress_preempt--;
2831         sc->sc_sendad_errors = 0;
2832         sc->sc_sendad_success = 0;
2833
2834         carp_set_state(sc, INIT);
2835         carp_setrun(sc, 0);
2836 }
2837
2838 static void
2839 carp_suspend(struct carp_softc *sc, boolean_t detach)
2840 {
2841         struct ifnet *cifp = &sc->sc_if;
2842
2843         carp_stop(sc, detach);
2844
2845         /* Retain the running state, if we are not dead yet */
2846         if (!sc->sc_dead && (cifp->if_flags & IFF_UP))
2847                 cifp->if_flags |= IFF_RUNNING;
2848 }
2849
2850 static int
2851 carp_activate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
2852     struct ifnet *ifp, struct in_ifaddr *ia_if, int own)
2853 {
2854         struct ip_moptions *imo = &sc->sc_imo;
2855         struct carp_if *ocif = ifp->if_carp;
2856         int error;
2857
2858         KKASSERT(vha->vha_ia != NULL);
2859
2860         KASSERT(ia_if != NULL, ("NULL backing address"));
2861         KASSERT(vha->vha_iaback == NULL, ("%p is already activated", vha));
2862         KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0,
2863                 ("inactive vhaddr %p is the address owner", vha));
2864
2865         KASSERT(sc->sc_carpdev == NULL || sc->sc_carpdev == ifp,
2866                 ("%s is already on %s", sc->sc_if.if_xname,
2867                  sc->sc_carpdev->if_xname));
2868
2869         if (ocif == NULL) {
2870                 KASSERT(sc->sc_carpdev == NULL,
2871                         ("%s is already on %s", sc->sc_if.if_xname,
2872                          sc->sc_carpdev->if_xname));
2873
2874                 error = ifpromisc(ifp, 1);
2875                 if (error)
2876                         return error;
2877         } else {
2878                 struct carp_softc_container *scc;
2879
2880                 TAILQ_FOREACH(scc, ocif, scc_link) {
2881                         struct carp_softc *vr = scc->scc_softc;
2882
2883                         if (vr != sc && vr->sc_vhid == sc->sc_vhid)
2884                                 return EINVAL;
2885                 }
2886         }
2887
2888         ifp->if_carp = carp_if_insert(ocif, sc);
2889         KASSERT(ifp->if_carp != NULL, ("%s carp_if_insert failed", __func__));
2890
2891         sc->sc_ia = ia_if;
2892         sc->sc_carpdev = ifp;
2893         sc->arpcom.ac_if.if_hwassist = 0;
2894         if (sc->arpcom.ac_if.if_capenable & IFCAP_TXCSUM) {
2895                 sc->arpcom.ac_if.if_hwassist |=
2896                     (ifp->if_hwassist & (CSUM_IP | CSUM_UDP | CSUM_TCP));
2897         }
2898         if (sc->arpcom.ac_if.if_capenable & IFCAP_TSO)
2899                 sc->arpcom.ac_if.if_hwassist |= (ifp->if_hwassist & CSUM_TSO);
2900
2901         /*
2902          * Make sure that all protocol threads see the sc_carpdev and
2903          * if_carp changes
2904          */
2905         netmsg_service_sync();
2906
2907         if (ocif != NULL && ifp->if_carp != ocif) {
2908                 /*
2909                  * The old carp list could be safely free now,
2910                  * since no one can access it.
2911                  */
2912                 carp_if_free(ocif);
2913         }
2914
2915         vha->vha_iaback = ia_if;
2916         sc->sc_naddrs++;
2917
2918         if (own) {
2919                 vha->vha_flags |= CARP_VHAF_OWNER;
2920
2921                 /* XXX save user configured advskew? */
2922                 sc->sc_advskew = 0;
2923         }
2924
2925         carp_addroute_vhaddr(sc, vha);
2926
2927         /*
2928          * Join the multicast group only after the backing interface
2929          * has been hooked with the CARP interface.
2930          */
2931         KASSERT(imo->imo_multicast_ifp == NULL ||
2932                 imo->imo_multicast_ifp == &sc->sc_if,
2933                 ("%s didn't leave mcast group on %s",
2934                  sc->sc_if.if_xname, imo->imo_multicast_ifp->if_xname));
2935
2936         if (imo->imo_num_memberships == 0) {
2937                 struct in_addr addr;
2938
2939                 addr.s_addr = htonl(INADDR_CARP_GROUP);
2940                 imo->imo_membership[0] = in_addmulti(&addr, &sc->sc_if);
2941                 if (imo->imo_membership[0] == NULL) {
2942                         carp_deactivate_vhaddr(sc, vha, FALSE);
2943                         return ENOBUFS;
2944                 }
2945
2946                 imo->imo_num_memberships++;
2947                 imo->imo_multicast_ifp = &sc->sc_if;
2948                 imo->imo_multicast_ttl = CARP_DFLTTL;
2949                 imo->imo_multicast_loop = 0;
2950         }
2951
2952         carp_hmac_prepare(sc);
2953         carp_set_state(sc, INIT);
2954         carp_setrun(sc, 0);
2955         return 0;
2956 }
2957
2958 static void
2959 carp_deactivate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
2960     boolean_t del_iaback)
2961 {
2962         KKASSERT(vha->vha_ia != NULL);
2963
2964         carp_hmac_prepare(sc);
2965
2966         if (vha->vha_iaback == NULL) {
2967                 KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0,
2968                         ("inactive vhaddr %p is the address owner", vha));
2969                 return;
2970         }
2971
2972         vha->vha_flags &= ~CARP_VHAF_OWNER;
2973         carp_delroute_vhaddr(sc, vha, del_iaback);
2974
2975         KKASSERT(sc->sc_naddrs > 0);
2976         vha->vha_iaback = NULL;
2977         sc->sc_naddrs--;
2978         if (!sc->sc_naddrs) {
2979                 if (sc->sc_naddrs6) {
2980                         carp_multicast_cleanup(sc);
2981                         sc->sc_ia = NULL;
2982                 } else {
2983                         carp_detach(sc, FALSE, del_iaback);
2984                 }
2985         }
2986 }
2987
2988 static void
2989 carp_link_addrs(struct carp_softc *sc, struct ifnet *ifp, struct ifaddr *ifa_if)
2990 {
2991         struct carp_vhaddr *vha;
2992         struct in_ifaddr *ia_if;
2993
2994         KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET);
2995         ia_if = ifatoia(ifa_if);
2996
2997         /*
2998          * Test each inactive vhaddr against the newly added address.
2999          * If the newly added address could be the backing address,
3000          * then activate the matching vhaddr.
3001          */
3002         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
3003                 const struct in_ifaddr *ia;
3004                 int own;
3005
3006                 if (vha->vha_iaback != NULL)
3007                         continue;
3008
3009                 ia = vha->vha_ia;
3010                 if (ia->ia_subnetmask != ia_if->ia_subnetmask ||
3011                     ia->ia_subnet != ia_if->ia_subnet)
3012                         continue;
3013
3014                 own = 0;
3015                 if (ia->ia_addr.sin_addr.s_addr ==
3016                     ia_if->ia_addr.sin_addr.s_addr)
3017                         own = 1;
3018
3019                 carp_activate_vhaddr(sc, vha, ifp, ia_if, own);
3020         }
3021 }
3022
3023 static void
3024 carp_unlink_addrs(struct carp_softc *sc, struct ifnet *ifp,
3025                   struct ifaddr *ifa_if)
3026 {
3027         struct carp_vhaddr *vha;
3028         struct in_ifaddr *ia_if;
3029
3030         KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET);
3031         ia_if = ifatoia(ifa_if);
3032
3033         /*
3034          * Ad src address is deleted; set it to NULL.
3035          * Following loop will try pick up a new ad src address
3036          * if one of the vhaddr could retain its backing address.
3037          */
3038         if (sc->sc_ia == ia_if)
3039                 sc->sc_ia = NULL;
3040
3041         /*
3042          * Test each active vhaddr against the deleted address.
3043          * If the deleted address is vhaddr address's backing
3044          * address, then deactivate the vhaddr.
3045          */
3046         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
3047                 if (vha->vha_iaback == NULL)
3048                         continue;
3049
3050                 if (vha->vha_iaback == ia_if)
3051                         carp_deactivate_vhaddr(sc, vha, TRUE);
3052                 else if (sc->sc_ia == NULL)
3053                         sc->sc_ia = vha->vha_iaback;
3054         }
3055 }
3056
3057 static void
3058 carp_update_addrs(struct carp_softc *sc, struct ifaddr *ifa_del)
3059 {
3060         struct carp_vhaddr *vha;
3061
3062         KKASSERT(sc->sc_carpdev == NULL);
3063
3064         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
3065                 carp_config_vhaddr(sc, vha, ifatoia(ifa_del));
3066 }
3067
3068 static void
3069 carp_ifaddr(void *arg __unused, struct ifnet *ifp,
3070             enum ifaddr_event event, struct ifaddr *ifa)
3071 {
3072         struct carp_softc *sc;
3073
3074         if (ifa->ifa_addr->sa_family != AF_INET)
3075                 return;
3076
3077         KASSERT(&curthread->td_msgport == netisr_cpuport(0),
3078             ("not in netisr0"));
3079
3080         if (ifp->if_type == IFT_CARP) {
3081                 /*
3082                  * Address is changed on carp(4) interface
3083                  */
3084                 switch (event) {
3085                 case IFADDR_EVENT_ADD:
3086                         carp_add_addr(ifp->if_softc, ifa);
3087                         break;
3088
3089                 case IFADDR_EVENT_CHANGE:
3090                         carp_config_addr(ifp->if_softc, ifa);
3091                         break;
3092
3093                 case IFADDR_EVENT_DELETE:
3094                         carp_del_addr(ifp->if_softc, ifa);
3095                         break;
3096                 }
3097                 return;
3098         }
3099
3100         /*
3101          * Address is changed on non-carp(4) interface
3102          */
3103         if ((ifp->if_flags & IFF_MULTICAST) == 0)
3104                 return;
3105
3106         LIST_FOREACH(sc, &carpif_list, sc_next) {
3107                 if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp) {
3108                         /* Not the parent iface; skip */
3109                         continue;
3110                 }
3111
3112                 switch (event) {
3113                 case IFADDR_EVENT_ADD:
3114                         carp_link_addrs(sc, ifp, ifa);
3115                         break;
3116
3117                 case IFADDR_EVENT_DELETE:
3118                         if (sc->sc_carpdev != NULL) {
3119                                 carp_unlink_addrs(sc, ifp, ifa);
3120                                 if (sc->sc_carpdev == NULL) {
3121                                         /*
3122                                          * We no longer have the parent
3123                                          * interface, however, certain
3124                                          * virtual addresses, which are
3125                                          * not used because they can't
3126                                          * match the previous parent
3127                                          * interface's addresses, may now
3128                                          * match different interface's
3129                                          * addresses.
3130                                          */
3131                                         carp_update_addrs(sc, ifa);
3132                                 }
3133                         } else {
3134                                 /*
3135                                  * The carp(4) interface didn't have a
3136                                  * parent iface, so it is not possible
3137                                  * that it will contain any address to
3138                                  * be unlinked.
3139                                  */
3140                         }
3141                         break;
3142
3143                 case IFADDR_EVENT_CHANGE:
3144                         if (sc->sc_carpdev == NULL) {
3145                                 /*
3146                                  * The carp(4) interface didn't have a
3147                                  * parent iface, so it is not possible
3148                                  * that it will contain any address to
3149                                  * be updated.
3150                                  */
3151                                 carp_link_addrs(sc, ifp, ifa);
3152                         } else {
3153                                 /*
3154                                  * First try breaking tie with the old
3155                                  * address.  Then see whether we could
3156                                  * link certain vhaddr to the new address.
3157                                  * If that fails, i.e. carpdev is NULL,
3158                                  * we try a global update.
3159                                  *
3160                                  * NOTE: The above order is critical.
3161                                  */
3162                                 carp_unlink_addrs(sc, ifp, ifa);
3163                                 carp_link_addrs(sc, ifp, ifa);
3164                                 if (sc->sc_carpdev == NULL) {
3165                                         /*
3166                                          * See the comment in the above
3167                                          * IFADDR_EVENT_DELETE block.
3168                                          */
3169                                         carp_update_addrs(sc, NULL);
3170                                 }
3171                         }
3172                         break;
3173                 }
3174         }
3175 }
3176
3177 void
3178 carp_proto_ctlinput(netmsg_t msg)
3179 {
3180         int cmd = msg->ctlinput.nm_cmd;
3181         struct sockaddr *sa = msg->ctlinput.nm_arg;
3182         struct in_ifaddr_container *iac;
3183
3184         /* We only process PRC_IFDOWN and PRC_IFUP commands */
3185         if (cmd != PRC_IFDOWN && cmd != PRC_IFUP)
3186                 goto done;
3187
3188         TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
3189                 struct in_ifaddr *ia = iac->ia;
3190                 struct ifnet *ifp = ia->ia_ifp;
3191
3192                 if (ifp->if_type == IFT_CARP)
3193                         continue;
3194
3195                 if (ia->ia_ifa.ifa_addr == sa) {
3196                         if (cmd == PRC_IFDOWN) {
3197                                 carp_ifaddr(NULL, ifp, IFADDR_EVENT_DELETE,
3198                                     &ia->ia_ifa);
3199                         } else if (cmd == PRC_IFUP) {
3200                                 carp_ifaddr(NULL, ifp, IFADDR_EVENT_ADD,
3201                                     &ia->ia_ifa);
3202                         }
3203                         break;
3204                 }
3205         }
3206 done:
3207         lwkt_replymsg(&msg->lmsg, 0);
3208 }
3209
3210 struct ifnet *
3211 carp_parent(struct ifnet *cifp)
3212 {
3213         struct carp_softc *sc;
3214
3215         KKASSERT(cifp->if_type == IFT_CARP);
3216         sc = cifp->if_softc;
3217
3218         return sc->sc_carpdev;
3219 }
3220
3221 #define rtinitflags(x) \
3222         (((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) \
3223                  ? RTF_HOST : 0)
3224
3225 static int
3226 carp_addroute_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha)
3227 {
3228         struct in_ifaddr *ia, *iaback;
3229
3230         if (sc->sc_state != MASTER)
3231                 return 0;
3232
3233         ia = vha->vha_ia;
3234         KKASSERT(ia != NULL);
3235
3236         iaback = vha->vha_iaback;
3237         KKASSERT(iaback != NULL);
3238
3239         return rtchange(&iaback->ia_ifa, &ia->ia_ifa);
3240 }
3241
3242 static void
3243 carp_delroute_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
3244     boolean_t del_iaback)
3245 {
3246         struct in_ifaddr *ia, *iaback;
3247
3248         ia = vha->vha_ia;
3249         KKASSERT(ia != NULL);
3250
3251         iaback = vha->vha_iaback;
3252         KKASSERT(iaback != NULL);
3253
3254         if (!del_iaback && (iaback->ia_ifp->if_flags & IFF_UP)) {
3255                 rtchange(&ia->ia_ifa, &iaback->ia_ifa);
3256                 return;
3257         }
3258
3259         rtinit(&ia->ia_ifa, RTM_DELETE, rtinitflags(ia));
3260         in_ifadown_force(&ia->ia_ifa, 1);
3261         ia->ia_flags &= ~IFA_ROUTE;
3262 }
3263
3264 static int
3265 carp_modevent(module_t mod, int type, void *data)
3266 {
3267         switch (type) {
3268         case MOD_LOAD:
3269                 LIST_INIT(&carpif_list);
3270                 carp_ifdetach_event =
3271                 EVENTHANDLER_REGISTER(ifnet_detach_event, carp_ifdetach, NULL,
3272                                       EVENTHANDLER_PRI_ANY);
3273                 carp_ifaddr_event =
3274                 EVENTHANDLER_REGISTER(ifaddr_event, carp_ifaddr, NULL,
3275                                       EVENTHANDLER_PRI_FIRST);
3276                 if_clone_attach(&carp_cloner);
3277                 break;
3278
3279         case MOD_UNLOAD:
3280                 EVENTHANDLER_DEREGISTER(ifnet_detach_event,
3281                                         carp_ifdetach_event);
3282                 EVENTHANDLER_DEREGISTER(ifaddr_event,
3283                                         carp_ifaddr_event);
3284                 if_clone_detach(&carp_cloner);
3285                 break;
3286
3287         default:
3288                 return (EINVAL);
3289         }
3290         return (0);
3291 }
3292
3293 static moduledata_t carp_mod = {
3294         "carp",
3295         carp_modevent,
3296         0
3297 };
3298 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);