ff41dc972662e794c42ef04be43fa387b1bf0227
[dragonfly.git] / sys / netinet / ip_carp.c
1 /*
2  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
3  * Copyright (c) 2003 Ryan McBride. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
18  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
24  * THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 /*
27  * $FreeBSD: src/sys/netinet/ip_carp.c,v 1.48 2007/02/02 09:39:09 glebius Exp $
28  */
29
30 #include "opt_carp.h"
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/in_cksum.h>
38 #include <sys/limits.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/msgport2.h>
42 #include <sys/time.h>
43 #include <sys/proc.h>
44 #include <sys/priv.h>
45 #include <sys/sockio.h>
46 #include <sys/socket.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
49 #include <sys/thread.h>
50
51 #include <machine/stdarg.h>
52 #include <crypto/sha1.h>
53
54 #include <net/bpf.h>
55 #include <net/ethernet.h>
56 #include <net/if.h>
57 #include <net/if_dl.h>
58 #include <net/if_types.h>
59 #include <net/route.h>
60 #include <net/if_clone.h>
61 #include <net/if_var.h>
62 #include <net/ifq_var.h>
63 #include <net/netmsg2.h>
64
65 #ifdef INET
66 #include <netinet/in.h>
67 #include <netinet/in_var.h>
68 #include <netinet/in_systm.h>
69 #include <netinet/ip.h>
70 #include <netinet/ip_var.h>
71 #include <netinet/if_ether.h>
72 #endif
73
74 #ifdef INET6
75 #include <netinet/icmp6.h>
76 #include <netinet/ip6.h>
77 #include <netinet6/ip6_var.h>
78 #include <netinet6/scope6_var.h>
79 #include <netinet6/nd6.h>
80 #endif
81
82 #include <netinet/ip_carp.h>
83
84 #define CARP_IFNAME             "carp"
85 #define CARP_IS_RUNNING(ifp)    \
86         (((ifp)->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING))
87
88 struct carp_vhaddr {
89         uint32_t                vha_flags;      /* CARP_VHAF_ */
90         struct in_ifaddr        *vha_ia;        /* carp address */
91         struct in_ifaddr        *vha_iaback;    /* backing address */
92         TAILQ_ENTRY(carp_vhaddr) vha_link;
93 };
94 TAILQ_HEAD(carp_vhaddr_list, carp_vhaddr);
95
96 struct carp_softc {
97         struct arpcom            arpcom;
98         struct ifnet            *sc_carpdev;    /* parent interface */
99         struct carp_vhaddr_list  sc_vha_list;   /* virtual addr list */
100
101         const struct in_ifaddr  *sc_ia;         /* primary iface address v4 */
102         struct ip_moptions       sc_imo;
103
104 #ifdef INET6
105         struct in6_ifaddr       *sc_ia6;        /* primary iface address v6 */
106         struct ip6_moptions      sc_im6o;
107 #endif /* INET6 */
108         TAILQ_ENTRY(carp_softc)  sc_list;
109
110         enum { INIT = 0, BACKUP, MASTER }
111                                  sc_state;
112         int                      sc_dead;
113
114         int                      sc_suppress;
115
116         int                      sc_sendad_errors;
117 #define CARP_SENDAD_MAX_ERRORS  3
118         int                      sc_sendad_success;
119 #define CARP_SENDAD_MIN_SUCCESS 3
120
121         int                      sc_vhid;
122         int                      sc_advskew;
123         int                      sc_naddrs;     /* actually used IPv4 vha */
124         int                      sc_naddrs6;
125         int                      sc_advbase;    /* seconds */
126         int                      sc_init_counter;
127         uint64_t                 sc_counter;
128
129         /* authentication */
130 #define CARP_HMAC_PAD   64
131         unsigned char            sc_key[CARP_KEY_LEN];
132         unsigned char            sc_pad[CARP_HMAC_PAD];
133         SHA1_CTX                 sc_sha1;
134
135         struct callout           sc_ad_tmo;     /* advertisement timeout */
136         struct callout           sc_md_tmo;     /* master down timeout */
137         struct callout           sc_md6_tmo;    /* master down timeout */
138
139         LIST_ENTRY(carp_softc)   sc_next;       /* Interface clue */
140 };
141
142 #define sc_if   arpcom.ac_if
143
144 struct carp_if {
145         TAILQ_HEAD(, carp_softc) vhif_vrs;
146 };
147
148 struct netmsg_carp {
149         struct netmsg_base      base;
150         struct ifnet            *nc_carpdev;
151         struct carp_softc       *nc_softc;
152 };
153
154 SYSCTL_DECL(_net_inet_carp);
155
156 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */
157 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
158     &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
159 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
160     &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
161 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
162     &carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
163 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
164     &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
165
166 static int carp_suppress_preempt = 0;
167 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
168     &carp_suppress_preempt, 0, "Preemption is suppressed");
169
170 static struct carpstats carpstats;
171 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
172     &carpstats, carpstats,
173     "CARP statistics (struct carpstats, netinet/ip_carp.h)");
174
175 #define CARP_LOG(...)   do {                            \
176         if (carp_opts[CARPCTL_LOG] > 0)                 \
177                 log(LOG_INFO, __VA_ARGS__);             \
178 } while (0)
179
180 #define CARP_DEBUG(...) do {                            \
181         if (carp_opts[CARPCTL_LOG] > 1)                 \
182                 log(LOG_DEBUG, __VA_ARGS__);            \
183 } while (0)
184
185 static struct lwkt_token carp_tok = LWKT_TOKEN_INITIALIZER(carp_token);
186
187 static void     carp_hmac_prepare(struct carp_softc *);
188 static void     carp_hmac_generate(struct carp_softc *, uint32_t *,
189                     unsigned char *);
190 static int      carp_hmac_verify(struct carp_softc *, uint32_t *,
191                     unsigned char *);
192 static void     carp_setroute(struct carp_softc *, int);
193 static void     carp_proto_input_c(struct carp_softc *, struct mbuf *,
194                     struct carp_header *, sa_family_t);
195 static int      carp_clone_create(struct if_clone *, int, caddr_t);
196 static int      carp_clone_destroy(struct ifnet *);
197 static void     carp_detach(struct carp_softc *, int, boolean_t);
198 static void     carp_prepare_ad(struct carp_softc *, struct carp_header *);
199 static void     carp_send_ad_all(void);
200 static void     carp_send_ad_timeout(void *);
201 static void     carp_send_ad(struct carp_softc *);
202 static void     carp_send_arp(struct carp_softc *);
203 static void     carp_master_down_timeout(void *);
204 static void     carp_master_down(struct carp_softc *);
205 static void     carp_setrun(struct carp_softc *, sa_family_t);
206 static void     carp_set_state(struct carp_softc *, int);
207 static struct ifnet *carp_forus(struct carp_if *, const uint8_t *);
208
209 static void     carp_init(void *);
210 static int      carp_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
211 static int      carp_output(struct ifnet *, struct mbuf *, struct sockaddr *,
212                     struct rtentry *);
213 static void     carp_start(struct ifnet *);
214 static void     carp_serialize(struct ifnet *, enum ifnet_serialize);
215 static void     carp_deserialize(struct ifnet *, enum ifnet_serialize);
216 static int      carp_tryserialize(struct ifnet *, enum ifnet_serialize);
217 #ifdef INVARIANTS
218 static void     carp_serialize_assert(struct ifnet *, enum ifnet_serialize,
219                     boolean_t);
220 #endif
221
222 static void     carp_multicast_cleanup(struct carp_softc *);
223 static void     carp_add_addr(struct carp_softc *, struct ifaddr *);
224 static void     carp_del_addr(struct carp_softc *, struct ifaddr *);
225 static void     carp_config_addr(struct carp_softc *, struct ifaddr *);
226 static void     carp_link_addrs(struct carp_softc *, struct ifnet *,
227                     struct ifaddr *);
228 static void     carp_unlink_addrs(struct carp_softc *, struct ifnet *,
229                     struct ifaddr *);
230 static void     carp_update_addrs(struct carp_softc *, struct ifaddr *);
231
232 static int      carp_get_vhaddr(struct carp_softc *, struct ifdrv *);
233 static int      carp_config_vhaddr(struct carp_softc *, struct carp_vhaddr *,
234                     struct in_ifaddr *);
235 static int      carp_activate_vhaddr(struct carp_softc *, struct carp_vhaddr *,
236                     struct ifnet *, struct in_ifaddr *, int);
237 static void     carp_deactivate_vhaddr(struct carp_softc *,
238                     struct carp_vhaddr *, boolean_t);
239 static int      carp_addroute_vhaddr(struct carp_softc *, struct carp_vhaddr *);
240 static void     carp_delroute_vhaddr(struct carp_softc *, struct carp_vhaddr *,
241                     boolean_t);
242
243 static void     carp_sc_state(struct carp_softc *);
244 #ifdef INET6
245 static void     carp_send_na(struct carp_softc *);
246 #ifdef notyet
247 static int      carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
248 static int      carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
249 #endif
250 static void     carp_multicast6_cleanup(struct carp_softc *);
251 #endif
252 static void     carp_stop(struct carp_softc *, int);
253 static void     carp_suspend(struct carp_softc *, int);
254 static void     carp_ioctl_stop(struct carp_softc *);
255
256 static void     carp_ifaddr(void *, struct ifnet *, enum ifaddr_event,
257                             struct ifaddr *);
258 static void     carp_ifdetach(void *, struct ifnet *);
259
260 static void     carp_ifdetach_dispatch(netmsg_t);
261 static void     carp_clone_destroy_dispatch(netmsg_t);
262 static void     carp_init_dispatch(netmsg_t);
263 static void     carp_ioctl_stop_dispatch(netmsg_t);
264
265 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
266
267 static LIST_HEAD(, carp_softc) carpif_list;
268
269 static struct if_clone carp_cloner =
270 IF_CLONE_INITIALIZER(CARP_IFNAME, carp_clone_create, carp_clone_destroy,
271                      0, IF_MAXUNIT);
272
273 static uint8_t  carp_etheraddr[ETHER_ADDR_LEN] = { 0, 0, 0x5e, 0, 1, 0 };
274
275 static eventhandler_tag carp_ifdetach_event;
276 static eventhandler_tag carp_ifaddr_event;
277
278 static __inline void
279 carp_insert_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha_new)
280 {
281         struct carp_vhaddr *vha;
282         u_long new_addr, addr;
283
284         KKASSERT((vha_new->vha_flags & CARP_VHAF_ONLIST) == 0);
285
286         /*
287          * Virtual address list is sorted; smaller one first
288          */
289         new_addr = ntohl(vha_new->vha_ia->ia_addr.sin_addr.s_addr);
290
291         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
292                 addr = ntohl(vha->vha_ia->ia_addr.sin_addr.s_addr);
293
294                 if (addr > new_addr)
295                         break;
296         }
297         if (vha == NULL)
298                 TAILQ_INSERT_TAIL(&sc->sc_vha_list, vha_new, vha_link);
299         else
300                 TAILQ_INSERT_BEFORE(vha, vha_new, vha_link);
301         vha_new->vha_flags |= CARP_VHAF_ONLIST;
302 }
303
304 static __inline void
305 carp_remove_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha)
306 {
307         KKASSERT(vha->vha_flags & CARP_VHAF_ONLIST);
308         vha->vha_flags &= ~CARP_VHAF_ONLIST;
309         TAILQ_REMOVE(&sc->sc_vha_list, vha, vha_link);
310 }
311
312 static void
313 carp_hmac_prepare(struct carp_softc *sc)
314 {
315         uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
316         uint8_t vhid = sc->sc_vhid & 0xff;
317         int i;
318 #ifdef INET6
319         struct ifaddr_container *ifac;
320         struct in6_addr in6;
321 #endif
322 #ifdef INET
323         struct carp_vhaddr *vha;
324 #endif
325
326         /* XXX: possible race here */
327
328         /* compute ipad from key */
329         bzero(sc->sc_pad, sizeof(sc->sc_pad));
330         bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
331         for (i = 0; i < sizeof(sc->sc_pad); i++)
332                 sc->sc_pad[i] ^= 0x36;
333
334         /* precompute first part of inner hash */
335         SHA1Init(&sc->sc_sha1);
336         SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
337         SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
338         SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
339         SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
340 #ifdef INET
341         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
342                 SHA1Update(&sc->sc_sha1,
343                     (const uint8_t *)&vha->vha_ia->ia_addr.sin_addr,
344                     sizeof(struct in_addr));
345         }
346 #endif /* INET */
347 #ifdef INET6
348         TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
349                 struct ifaddr *ifa = ifac->ifa;
350
351                 if (ifa->ifa_addr->sa_family == AF_INET6) {
352                         in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
353                         in6_clearscope(&in6);
354                         SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6));
355                 }
356         }
357 #endif /* INET6 */
358
359         /* convert ipad to opad */
360         for (i = 0; i < sizeof(sc->sc_pad); i++)
361                 sc->sc_pad[i] ^= 0x36 ^ 0x5c;
362 }
363
364 static void
365 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
366     unsigned char md[20])
367 {
368         SHA1_CTX sha1ctx;
369
370         /* fetch first half of inner hash */
371         bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
372
373         SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
374         SHA1Final(md, &sha1ctx);
375
376         /* outer hash */
377         SHA1Init(&sha1ctx);
378         SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
379         SHA1Update(&sha1ctx, md, 20);
380         SHA1Final(md, &sha1ctx);
381 }
382
383 static int
384 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
385     unsigned char md[20])
386 {
387         unsigned char md2[20];
388
389         carp_hmac_generate(sc, counter, md2);
390         return (bcmp(md, md2, sizeof(md2)));
391 }
392
393 static void
394 carp_setroute(struct carp_softc *sc, int cmd)
395 {
396 #ifdef INET6
397         struct ifaddr_container *ifac;
398 #endif
399         struct carp_vhaddr *vha;
400
401         KKASSERT(cmd == RTM_DELETE || cmd == RTM_ADD);
402
403         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
404                 if (vha->vha_iaback == NULL)
405                         continue;
406                 if (cmd == RTM_DELETE)
407                         carp_delroute_vhaddr(sc, vha, FALSE);
408                 else
409                         carp_addroute_vhaddr(sc, vha);
410         }
411
412 #ifdef INET6
413         TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
414                 struct ifaddr *ifa = ifac->ifa;
415
416                 if (ifa->ifa_addr->sa_family == AF_INET6) {
417                         if (cmd == RTM_ADD)
418                                 in6_ifaddloop(ifa);
419                         else
420                                 in6_ifremloop(ifa);
421                 }
422         }
423 #endif /* INET6 */
424 }
425
426 static int
427 carp_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused)
428 {
429         struct carp_softc *sc;
430         struct ifnet *ifp;
431
432         sc = kmalloc(sizeof(*sc), M_CARP, M_WAITOK | M_ZERO);
433         ifp = &sc->sc_if;
434
435         sc->sc_suppress = 0;
436         sc->sc_advbase = CARP_DFLTINTV;
437         sc->sc_vhid = -1;       /* required setting */
438         sc->sc_advskew = 0;
439         sc->sc_init_counter = 1;
440         sc->sc_naddrs = 0;
441         sc->sc_naddrs6 = 0;
442
443         TAILQ_INIT(&sc->sc_vha_list);
444
445 #ifdef INET6
446         sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
447 #endif
448
449         callout_init_mp(&sc->sc_ad_tmo);
450         callout_init_mp(&sc->sc_md_tmo);
451         callout_init_mp(&sc->sc_md6_tmo);
452
453         if_initname(ifp, CARP_IFNAME, unit);
454         ifp->if_softc = sc;
455         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
456         ifp->if_init = carp_init;
457         ifp->if_ioctl = carp_ioctl;
458         ifp->if_start = carp_start;
459         ifp->if_serialize = carp_serialize;
460         ifp->if_deserialize = carp_deserialize;
461         ifp->if_tryserialize = carp_tryserialize;
462 #ifdef INVARIANTS
463         ifp->if_serialize_assert = carp_serialize_assert;
464 #endif
465         ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
466         ifq_set_ready(&ifp->if_snd);
467
468         ether_ifattach(ifp, carp_etheraddr, NULL);
469
470         ifp->if_type = IFT_CARP;
471         ifp->if_output = carp_output;
472
473         carp_gettok();
474         LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
475         carp_reltok();
476
477         return (0);
478 }
479
480 static void
481 carp_clone_destroy_dispatch(netmsg_t msg)
482 {
483         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
484         struct carp_softc *sc = cmsg->nc_softc;
485
486         carp_gettok();
487
488         sc->sc_dead = 1;
489         carp_detach(sc, 1, FALSE);
490
491         carp_reltok();
492
493         lwkt_replymsg(&cmsg->base.lmsg, 0);
494 }
495
496 static int
497 carp_clone_destroy(struct ifnet *ifp)
498 {
499         struct carp_softc *sc = ifp->if_softc;
500         struct netmsg_carp cmsg;
501
502         bzero(&cmsg, sizeof(cmsg));
503         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
504             carp_clone_destroy_dispatch);
505         cmsg.nc_softc = sc;
506
507         lwkt_domsg(cpu_portfn(0), &cmsg.base.lmsg, 0);
508
509         carp_gettok();
510         LIST_REMOVE(sc, sc_next);
511         carp_reltok();
512
513         bpfdetach(ifp);
514         if_detach(ifp);
515
516         KASSERT(sc->sc_naddrs == 0, ("certain inet address is still active\n"));
517         kfree(sc, M_CARP);
518
519         return 0;
520 }
521
522 static void
523 carp_detach(struct carp_softc *sc, int detach, boolean_t del_iaback)
524 {
525         struct carp_if *cif;
526
527         carp_suspend(sc, detach);
528
529         carp_multicast_cleanup(sc);
530 #ifdef INET6
531         carp_multicast6_cleanup(sc);
532 #endif
533
534         if (!sc->sc_dead && detach) {
535                 struct carp_vhaddr *vha;
536
537                 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
538                         carp_deactivate_vhaddr(sc, vha, del_iaback);
539                 KKASSERT(sc->sc_naddrs == 0);
540         }
541
542         if (sc->sc_carpdev != NULL) {
543                 cif = sc->sc_carpdev->if_carp;
544                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
545                 if (TAILQ_EMPTY(&cif->vhif_vrs)) {
546                         ifpromisc(sc->sc_carpdev, 0);
547                         sc->sc_carpdev->if_carp = NULL;
548                         kfree(cif, M_CARP);
549                 }
550                 sc->sc_carpdev = NULL;
551                 sc->sc_ia = NULL;
552         }
553 }
554
555 static void
556 carp_ifdetach_dispatch(netmsg_t msg)
557 {
558         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
559         struct ifnet *ifp = cmsg->nc_carpdev;
560         struct carp_if *cif = ifp->if_carp;
561         struct carp_softc *sc;
562
563         carp_gettok();
564
565         while (ifp->if_carp &&
566                (sc = TAILQ_FIRST(&cif->vhif_vrs)) != NULL)
567                 carp_detach(sc, 1, TRUE);
568
569         carp_reltok();
570
571         lwkt_replymsg(&cmsg->base.lmsg, 0);
572 }
573
574 /* Detach an interface from the carp. */
575 static void
576 carp_ifdetach(void *arg __unused, struct ifnet *ifp)
577 {
578         struct netmsg_carp cmsg;
579
580         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
581
582         bzero(&cmsg, sizeof(cmsg));
583         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
584             carp_ifdetach_dispatch);
585         cmsg.nc_carpdev = ifp;
586
587         lwkt_domsg(cpu_portfn(0), &cmsg.base.lmsg, 0);
588 }
589
590 /*
591  * process input packet.
592  * we have rearranged checks order compared to the rfc,
593  * but it seems more efficient this way or not possible otherwise.
594  */
595 int
596 carp_proto_input(struct mbuf **mp, int *offp, int proto)
597 {
598         struct mbuf *m = *mp;
599         struct ip *ip = mtod(m, struct ip *);
600         struct ifnet *ifp = m->m_pkthdr.rcvif;
601         struct carp_header *ch;
602         struct carp_softc *sc;
603         int len, iphlen;
604
605         carp_gettok();
606
607         iphlen = *offp;
608         *mp = NULL;
609
610         carpstats.carps_ipackets++;
611
612         if (!carp_opts[CARPCTL_ALLOW]) {
613                 m_freem(m);
614                 goto back;
615         }
616
617         /* Check if received on a valid carp interface */
618         if (ifp->if_type != IFT_CARP) {
619                 carpstats.carps_badif++;
620                 CARP_LOG("carp_proto_input: packet received on non-carp "
621                     "interface: %s\n", ifp->if_xname);
622                 m_freem(m);
623                 goto back;
624         }
625
626         if (!CARP_IS_RUNNING(ifp)) {
627                 carpstats.carps_badif++;
628                 CARP_LOG("carp_proto_input: packet received on stopped carp "
629                     "interface: %s\n", ifp->if_xname);
630                 m_freem(m);
631                 goto back;
632         }
633
634         sc = ifp->if_softc;
635         if (sc->sc_carpdev == NULL) {
636                 carpstats.carps_badif++;
637                 CARP_LOG("carp_proto_input: packet received on defunc carp "
638                     "interface: %s\n", ifp->if_xname);
639                 m_freem(m);
640                 goto back;
641         }
642
643         if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
644                 carpstats.carps_badif++;
645                 CARP_LOG("carp_proto_input: non-mcast packet on "
646                     "interface: %s\n", ifp->if_xname);
647                 m_freem(m);
648                 goto back;
649         }
650
651         /* Verify that the IP TTL is CARP_DFLTTL. */
652         if (ip->ip_ttl != CARP_DFLTTL) {
653                 carpstats.carps_badttl++;
654                 CARP_LOG("carp_proto_input: received ttl %d != %d on %s\n",
655                     ip->ip_ttl, CARP_DFLTTL, ifp->if_xname);
656                 m_freem(m);
657                 goto back;
658         }
659
660         /* Minimal CARP packet size */
661         len = iphlen + sizeof(*ch);
662
663         /*
664          * Verify that the received packet length is
665          * not less than the CARP header
666          */
667         if (m->m_pkthdr.len < len) {
668                 carpstats.carps_badlen++;
669                 CARP_LOG("packet too short %d on %s\n", m->m_pkthdr.len,
670                     ifp->if_xname);
671                 m_freem(m);
672                 goto back;
673         }
674
675         /* Make sure that CARP header is contiguous */
676         if (len > m->m_len) {
677                 m = m_pullup(m, len);
678                 if (m == NULL) {
679                         carpstats.carps_hdrops++;
680                         CARP_LOG("carp_proto_input: m_pullup failed\n");
681                         goto back;
682                 }
683                 ip = mtod(m, struct ip *);
684         }
685         ch = (struct carp_header *)((uint8_t *)ip + iphlen);
686
687         /* Verify the CARP checksum */
688         if (in_cksum_skip(m, len, iphlen)) {
689                 carpstats.carps_badsum++;
690                 CARP_LOG("carp_proto_input: checksum failed on %s\n",
691                     ifp->if_xname);
692                 m_freem(m);
693                 goto back;
694         }
695         carp_proto_input_c(sc, m, ch, AF_INET);
696 back:
697         carp_reltok();
698         return(IPPROTO_DONE);
699 }
700
701 #ifdef INET6
702 int
703 carp6_proto_input(struct mbuf **mp, int *offp, int proto)
704 {
705         struct mbuf *m = *mp;
706         struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
707         struct ifnet *ifp = m->m_pkthdr.rcvif;
708         struct carp_header *ch;
709         struct carp_softc *sc;
710         u_int len;
711
712         carp_gettok();
713
714         carpstats.carps_ipackets6++;
715
716         if (!carp_opts[CARPCTL_ALLOW]) {
717                 m_freem(m);
718                 goto back;
719         }
720
721         /* check if received on a valid carp interface */
722         if (ifp->if_type != IFT_CARP) {
723                 carpstats.carps_badif++;
724                 CARP_LOG("carp6_proto_input: packet received on non-carp "
725                     "interface: %s\n", ifp->if_xname);
726                 m_freem(m);
727                 goto back;
728         }
729
730         if (!CARP_IS_RUNNING(ifp)) {
731                 carpstats.carps_badif++;
732                 CARP_LOG("carp_proto_input: packet received on stopped carp "
733                     "interface: %s\n", ifp->if_xname);
734                 m_freem(m);
735                 goto back;
736         }
737
738         sc = ifp->if_softc;
739         if (sc->sc_carpdev == NULL) {
740                 carpstats.carps_badif++;
741                 CARP_LOG("carp6_proto_input: packet received on defunc-carp "
742                     "interface: %s\n", ifp->if_xname);
743                 m_freem(m);
744                 goto back;
745         }
746
747         /* verify that the IP TTL is 255 */
748         if (ip6->ip6_hlim != CARP_DFLTTL) {
749                 carpstats.carps_badttl++;
750                 CARP_LOG("carp6_proto_input: received ttl %d != 255 on %s\n",
751                     ip6->ip6_hlim, ifp->if_xname);
752                 m_freem(m);
753                 goto back;
754         }
755
756         /* verify that we have a complete carp packet */
757         len = m->m_len;
758         IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
759         if (ch == NULL) {
760                 carpstats.carps_badlen++;
761                 CARP_LOG("carp6_proto_input: packet size %u too small\n", len);
762                 goto back;
763         }
764
765         /* verify the CARP checksum */
766         if (in_cksum_range(m, 0, *offp, sizeof(*ch))) {
767                 carpstats.carps_badsum++;
768                 CARP_LOG("carp6_proto_input: checksum failed, on %s\n",
769                     ifp->if_xname);
770                 m_freem(m);
771                 goto back;
772         }
773
774         carp_proto_input_c(sc, m, ch, AF_INET6);
775 back:
776         carp_reltok();
777         return (IPPROTO_DONE);
778 }
779 #endif /* INET6 */
780
781 static void
782 carp_proto_input_c(struct carp_softc *sc, struct mbuf *m,
783     struct carp_header *ch, sa_family_t af)
784 {
785         struct ifnet *cifp;
786         uint64_t tmp_counter;
787         struct timeval sc_tv, ch_tv;
788
789         if (sc->sc_vhid != ch->carp_vhid) {
790                 /*
791                  * CARP uses multicast, however, multicast packets
792                  * are tapped to all CARP interfaces on the physical
793                  * interface receiving the CARP packets, so we don't
794                  * update any stats here.
795                  */
796                 m_freem(m);
797                 return;
798         }
799         cifp = &sc->sc_if;
800
801         /* verify the CARP version. */
802         if (ch->carp_version != CARP_VERSION) {
803                 carpstats.carps_badver++;
804                 CARP_LOG("%s; invalid version %d\n", cifp->if_xname,
805                          ch->carp_version);
806                 m_freem(m);
807                 return;
808         }
809
810         /* verify the hash */
811         if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
812                 carpstats.carps_badauth++;
813                 CARP_LOG("%s: incorrect hash\n", cifp->if_xname);
814                 m_freem(m);
815                 return;
816         }
817
818         tmp_counter = ntohl(ch->carp_counter[0]);
819         tmp_counter = tmp_counter<<32;
820         tmp_counter += ntohl(ch->carp_counter[1]);
821
822         /* XXX Replay protection goes here */
823
824         sc->sc_init_counter = 0;
825         sc->sc_counter = tmp_counter;
826
827         sc_tv.tv_sec = sc->sc_advbase;
828         if (carp_suppress_preempt && sc->sc_advskew <  240)
829                 sc_tv.tv_usec = 240 * 1000000 / 256;
830         else
831                 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
832         ch_tv.tv_sec = ch->carp_advbase;
833         ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
834
835         switch (sc->sc_state) {
836         case INIT:
837                 break;
838
839         case MASTER:
840                 /*
841                  * If we receive an advertisement from a master who's going to
842                  * be more frequent than us, go into BACKUP state.
843                  */
844                 if (timevalcmp(&sc_tv, &ch_tv, >) ||
845                     timevalcmp(&sc_tv, &ch_tv, ==)) {
846                         callout_stop(&sc->sc_ad_tmo);
847                         CARP_DEBUG("%s: MASTER -> BACKUP "
848                            "(more frequent advertisement received)\n",
849                            cifp->if_xname);
850                         carp_set_state(sc, BACKUP);
851                         carp_setrun(sc, 0);
852                         carp_setroute(sc, RTM_DELETE);
853                 }
854                 break;
855
856         case BACKUP:
857                 /*
858                  * If we're pre-empting masters who advertise slower than us,
859                  * and this one claims to be slower, treat him as down.
860                  */
861                 if (carp_opts[CARPCTL_PREEMPT] &&
862                     timevalcmp(&sc_tv, &ch_tv, <)) {
863                         CARP_DEBUG("%s: BACKUP -> MASTER "
864                             "(preempting a slower master)\n", cifp->if_xname);
865                         carp_master_down(sc);
866                         break;
867                 }
868
869                 /*
870                  *  If the master is going to advertise at such a low frequency
871                  *  that he's guaranteed to time out, we'd might as well just
872                  *  treat him as timed out now.
873                  */
874                 sc_tv.tv_sec = sc->sc_advbase * 3;
875                 if (timevalcmp(&sc_tv, &ch_tv, <)) {
876                         CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n",
877                                    cifp->if_xname);
878                         carp_master_down(sc);
879                         break;
880                 }
881
882                 /*
883                  * Otherwise, we reset the counter and wait for the next
884                  * advertisement.
885                  */
886                 carp_setrun(sc, af);
887                 break;
888         }
889         m_freem(m);
890 }
891
892 struct mbuf *
893 carp_input(void *v, struct mbuf *m)
894 {
895         struct carp_if *cif = v;
896         struct ether_header *eh;
897         struct carp_softc *sc;
898         struct ifnet *ifp;
899
900         ASSERT_LWKT_TOKEN_HELD(&carp_tok);
901
902         eh = mtod(m, struct ether_header *);
903
904         ifp = carp_forus(cif, eh->ether_dhost);
905         if (ifp != NULL) {
906                 ether_reinput_oncpu(ifp, m, REINPUT_RUNBPF);
907                 return NULL;
908         }
909
910         if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
911                 return m;
912
913         /*
914          * XXX Should really check the list of multicast addresses
915          * for each CARP interface _before_ copying.
916          */
917         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
918                 struct mbuf *m0;
919
920                 if ((sc->sc_if.if_flags & IFF_UP) == 0)
921                         continue;
922
923                 m0 = m_dup(m, MB_DONTWAIT);
924                 if (m0 == NULL)
925                         continue;
926
927                 ether_reinput_oncpu(&sc->sc_if, m0, REINPUT_RUNBPF);
928         }
929         return m;
930 }
931
932 static void
933 carp_prepare_ad(struct carp_softc *sc, struct carp_header *ch)
934 {
935         if (sc->sc_init_counter) {
936                 /* this could also be seconds since unix epoch */
937                 sc->sc_counter = karc4random();
938                 sc->sc_counter = sc->sc_counter << 32;
939                 sc->sc_counter += karc4random();
940         } else {
941                 sc->sc_counter++;
942         }
943
944         ch->carp_counter[0] = htonl((sc->sc_counter >> 32) & 0xffffffff);
945         ch->carp_counter[1] = htonl(sc->sc_counter & 0xffffffff);
946
947         carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
948 }
949
950 static void
951 carp_send_ad_all(void)
952 {
953         struct carp_softc *sc;
954
955         LIST_FOREACH(sc, &carpif_list, sc_next) {
956                 if (sc->sc_carpdev == NULL)
957                         continue;
958
959                 if (CARP_IS_RUNNING(&sc->sc_if) && sc->sc_state == MASTER)
960                         carp_send_ad(sc);
961         }
962 }
963
964 static void
965 carp_send_ad_timeout(void *xsc)
966 {
967         carp_gettok();
968         carp_send_ad(xsc);
969         carp_reltok();
970 }
971
972 static void
973 carp_send_ad(struct carp_softc *sc)
974 {
975         struct ifnet *cifp = &sc->sc_if;
976         struct carp_header ch;
977         struct timeval tv;
978         struct carp_header *ch_ptr;
979         struct mbuf *m;
980         int len, advbase, advskew;
981
982         if (!CARP_IS_RUNNING(cifp)) {
983                 /* Bow out */
984                 advbase = 255;
985                 advskew = 255;
986         } else {
987                 advbase = sc->sc_advbase;
988                 if (!carp_suppress_preempt || sc->sc_advskew > 240)
989                         advskew = sc->sc_advskew;
990                 else
991                         advskew = 240;
992                 tv.tv_sec = advbase;
993                 tv.tv_usec = advskew * 1000000 / 256;
994         }
995
996         ch.carp_version = CARP_VERSION;
997         ch.carp_type = CARP_ADVERTISEMENT;
998         ch.carp_vhid = sc->sc_vhid;
999         ch.carp_advbase = advbase;
1000         ch.carp_advskew = advskew;
1001         ch.carp_authlen = 7;    /* XXX DEFINE */
1002         ch.carp_pad1 = 0;       /* must be zero */
1003         ch.carp_cksum = 0;
1004
1005 #ifdef INET
1006         if (sc->sc_ia != NULL) {
1007                 struct ip *ip;
1008
1009                 MGETHDR(m, MB_DONTWAIT, MT_HEADER);
1010                 if (m == NULL) {
1011                         cifp->if_oerrors++;
1012                         carpstats.carps_onomem++;
1013                         /* XXX maybe less ? */
1014                         if (advbase != 255 || advskew != 255)
1015                                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1016                                     carp_send_ad_timeout, sc);
1017                         return;
1018                 }
1019                 len = sizeof(*ip) + sizeof(ch);
1020                 m->m_pkthdr.len = len;
1021                 m->m_pkthdr.rcvif = NULL;
1022                 m->m_len = len;
1023                 MH_ALIGN(m, m->m_len);
1024                 m->m_flags |= M_MCAST;
1025                 ip = mtod(m, struct ip *);
1026                 ip->ip_v = IPVERSION;
1027                 ip->ip_hl = sizeof(*ip) >> 2;
1028                 ip->ip_tos = IPTOS_LOWDELAY;
1029                 ip->ip_len = len;
1030                 ip->ip_id = ip_newid();
1031                 ip->ip_off = IP_DF;
1032                 ip->ip_ttl = CARP_DFLTTL;
1033                 ip->ip_p = IPPROTO_CARP;
1034                 ip->ip_sum = 0;
1035                 ip->ip_src = sc->sc_ia->ia_addr.sin_addr;
1036                 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
1037
1038                 ch_ptr = (struct carp_header *)(&ip[1]);
1039                 bcopy(&ch, ch_ptr, sizeof(ch));
1040                 carp_prepare_ad(sc, ch_ptr);
1041                 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip));
1042
1043                 getmicrotime(&cifp->if_lastchange);
1044                 cifp->if_opackets++;
1045                 cifp->if_obytes += len;
1046                 carpstats.carps_opackets++;
1047
1048                 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) {
1049                         cifp->if_oerrors++;
1050                         if (sc->sc_sendad_errors < INT_MAX)
1051                                 sc->sc_sendad_errors++;
1052                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1053                                 carp_suppress_preempt++;
1054                                 if (carp_suppress_preempt == 1) {
1055                                         carp_send_ad_all();
1056                                 }
1057                         }
1058                         sc->sc_sendad_success = 0;
1059                 } else {
1060                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1061                                 if (++sc->sc_sendad_success >=
1062                                     CARP_SENDAD_MIN_SUCCESS) {
1063                                         carp_suppress_preempt--;
1064                                         sc->sc_sendad_errors = 0;
1065                                 }
1066                         } else {
1067                                 sc->sc_sendad_errors = 0;
1068                         }
1069                 }
1070         }
1071 #endif /* INET */
1072 #ifdef INET6
1073         if (sc->sc_ia6) {
1074                 struct ip6_hdr *ip6;
1075
1076                 MGETHDR(m, MB_DONTWAIT, MT_HEADER);
1077                 if (m == NULL) {
1078                         cifp->if_oerrors++;
1079                         carpstats.carps_onomem++;
1080                         /* XXX maybe less ? */
1081                         if (advbase != 255 || advskew != 255)
1082                                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1083                                     carp_send_ad_timeout, sc);
1084                         return;
1085                 }
1086                 len = sizeof(*ip6) + sizeof(ch);
1087                 m->m_pkthdr.len = len;
1088                 m->m_pkthdr.rcvif = NULL;
1089                 m->m_len = len;
1090                 MH_ALIGN(m, m->m_len);
1091                 m->m_flags |= M_MCAST;
1092                 ip6 = mtod(m, struct ip6_hdr *);
1093                 bzero(ip6, sizeof(*ip6));
1094                 ip6->ip6_vfc |= IPV6_VERSION;
1095                 ip6->ip6_hlim = CARP_DFLTTL;
1096                 ip6->ip6_nxt = IPPROTO_CARP;
1097                 bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src,
1098                     sizeof(struct in6_addr));
1099                 /* set the multicast destination */
1100
1101                 ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
1102                 ip6->ip6_dst.s6_addr8[15] = 0x12;
1103                 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
1104                         cifp->if_oerrors++;
1105                         m_freem(m);
1106                         CARP_LOG("%s: in6_setscope failed\n", __func__);
1107                         return;
1108                 }
1109
1110                 ch_ptr = (struct carp_header *)(&ip6[1]);
1111                 bcopy(&ch, ch_ptr, sizeof(ch));
1112                 carp_prepare_ad(sc, ch_ptr);
1113                 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip6));
1114
1115                 getmicrotime(&cifp->if_lastchange);
1116                 cifp->if_opackets++;
1117                 cifp->if_obytes += len;
1118                 carpstats.carps_opackets6++;
1119
1120                 if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) {
1121                         cifp->if_oerrors++;
1122                         if (sc->sc_sendad_errors < INT_MAX)
1123                                 sc->sc_sendad_errors++;
1124                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1125                                 carp_suppress_preempt++;
1126                                 if (carp_suppress_preempt == 1) {
1127                                         carp_send_ad_all();
1128                                 }
1129                         }
1130                         sc->sc_sendad_success = 0;
1131                 } else {
1132                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1133                                 if (++sc->sc_sendad_success >=
1134                                     CARP_SENDAD_MIN_SUCCESS) {
1135                                         carp_suppress_preempt--;
1136                                         sc->sc_sendad_errors = 0;
1137                                 }
1138                         } else {
1139                                 sc->sc_sendad_errors = 0;
1140                         }
1141                 }
1142         }
1143 #endif /* INET6 */
1144
1145         if (advbase != 255 || advskew != 255)
1146                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1147                     carp_send_ad_timeout, sc);
1148 }
1149
1150 /*
1151  * Broadcast a gratuitous ARP request containing
1152  * the virtual router MAC address for each IP address
1153  * associated with the virtual router.
1154  */
1155 static void
1156 carp_send_arp(struct carp_softc *sc)
1157 {
1158         const struct carp_vhaddr *vha;
1159
1160         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1161                 if (vha->vha_iaback == NULL)
1162                         continue;
1163                 arp_gratuitous(&sc->sc_if, &vha->vha_ia->ia_ifa);
1164         }
1165 }
1166
1167 #ifdef INET6
1168 static void
1169 carp_send_na(struct carp_softc *sc)
1170 {
1171         struct ifaddr_container *ifac;
1172         struct in6_addr *in6;
1173         static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1174
1175         TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
1176                 struct ifaddr *ifa = ifac->ifa;
1177
1178                 if (ifa->ifa_addr->sa_family != AF_INET6)
1179                         continue;
1180
1181                 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1182                 nd6_na_output(sc->sc_carpdev, &mcast, in6,
1183                     ND_NA_FLAG_OVERRIDE, 1, NULL);
1184                 DELAY(1000);    /* XXX */
1185         }
1186 }
1187 #endif /* INET6 */
1188
1189 static __inline const struct carp_vhaddr *
1190 carp_find_addr(const struct carp_softc *sc, const struct in_addr *addr)
1191 {
1192         struct carp_vhaddr *vha;
1193
1194         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1195                 if (vha->vha_iaback == NULL)
1196                         continue;
1197
1198                 if (vha->vha_ia->ia_addr.sin_addr.s_addr == addr->s_addr)
1199                         return vha;
1200         }
1201         return NULL;
1202 }
1203
1204 #ifdef notyet
1205 static int
1206 carp_iamatch_balance(const struct carp_if *cif, const struct in_addr *itaddr,
1207                      const struct in_addr *isaddr, uint8_t **enaddr)
1208 {
1209         const struct carp_softc *vh;
1210         int index, count = 0;
1211
1212         /*
1213          * XXX proof of concept implementation.
1214          * We use the source ip to decide which virtual host should
1215          * handle the request. If we're master of that virtual host,
1216          * then we respond, otherwise, just drop the arp packet on
1217          * the floor.
1218          */
1219
1220         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1221                 if (!CARP_IS_RUNNING(&vh->sc_if))
1222                         continue;
1223
1224                 if (carp_find_addr(vh, itaddr) != NULL)
1225                         count++;
1226         }
1227         if (count == 0)
1228                 return 0;
1229
1230         /* this should be a hash, like pf_hash() */
1231         index = ntohl(isaddr->s_addr) % count;
1232         count = 0;
1233
1234         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1235                 if (!CARP_IS_RUNNING(&vh->sc_if))
1236                         continue;
1237
1238                 if (carp_find_addr(vh, itaddr) == NULL)
1239                         continue;
1240
1241                 if (count == index) {
1242                         if (vh->sc_state == MASTER) {
1243                                 *enaddr = IF_LLADDR(&vh->sc_if);
1244                                 return 1;
1245                         } else {
1246                                 return 0;
1247                         }
1248                 }
1249                 count++;
1250         }
1251         return 0;
1252 }
1253 #endif
1254
1255 int
1256 carp_iamatch(const struct in_ifaddr *ia)
1257 {
1258         const struct carp_softc *sc = ia->ia_ifp->if_softc;
1259
1260         ASSERT_LWKT_TOKEN_HELD(&carp_tok);
1261
1262 #ifdef notyet
1263         if (carp_opts[CARPCTL_ARPBALANCE])
1264                 return carp_iamatch_balance(cif, itaddr, isaddr, enaddr);
1265 #endif
1266
1267         if (!CARP_IS_RUNNING(&sc->sc_if) || sc->sc_state != MASTER)
1268                 return 0;
1269
1270         return 1;
1271 }
1272
1273 #ifdef INET6
1274 struct ifaddr *
1275 carp_iamatch6(void *v, struct in6_addr *taddr)
1276 {
1277         struct carp_if *cif = v;
1278         struct carp_softc *vh;
1279
1280         ASSERT_LWKT_TOKEN_HELD(&carp_tok);
1281
1282         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1283                 struct ifaddr_container *ifac;
1284
1285                 TAILQ_FOREACH(ifac, &vh->sc_if.if_addrheads[mycpuid],
1286                               ifa_link) {
1287                         struct ifaddr *ifa = ifac->ifa;
1288
1289                         if (IN6_ARE_ADDR_EQUAL(taddr,
1290                             &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1291                             CARP_IS_RUNNING(&vh->sc_if) &&
1292                             vh->sc_state == MASTER) {
1293                                 return (ifa);
1294                         }
1295                 }
1296         }
1297         return (NULL);
1298 }
1299
1300 void *
1301 carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr)
1302 {
1303         struct m_tag *mtag;
1304         struct carp_if *cif = v;
1305         struct carp_softc *sc;
1306
1307         ASSERT_LWKT_TOKEN_HELD(&carp_tok);
1308
1309         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
1310                 struct ifaddr_container *ifac;
1311
1312                 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid],
1313                               ifa_link) {
1314                         struct ifaddr *ifa = ifac->ifa;
1315
1316                         if (IN6_ARE_ADDR_EQUAL(taddr,
1317                             &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1318                             CARP_IS_RUNNING(&sc->sc_if)) {
1319                                 struct ifnet *ifp = &sc->sc_if;
1320
1321                                 mtag = m_tag_get(PACKET_TAG_CARP,
1322                                     sizeof(struct ifnet *), MB_DONTWAIT);
1323                                 if (mtag == NULL) {
1324                                         /* better a bit than nothing */
1325                                         return (IF_LLADDR(ifp));
1326                                 }
1327                                 bcopy(&ifp, (caddr_t)(mtag + 1),
1328                                     sizeof(struct ifnet *));
1329                                 m_tag_prepend(m, mtag);
1330
1331                                 return (IF_LLADDR(ifp));
1332                         }
1333                 }
1334         }
1335         return (NULL);
1336 }
1337 #endif
1338
1339 static struct ifnet *
1340 carp_forus(struct carp_if *cif, const uint8_t *dhost)
1341 {
1342         struct carp_softc *sc;
1343
1344         ASSERT_LWKT_TOKEN_HELD(&carp_tok);
1345
1346         if (memcmp(dhost, carp_etheraddr, ETHER_ADDR_LEN - 1) != 0)
1347                 return NULL;
1348
1349         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
1350                 struct ifnet *ifp = &sc->sc_if;
1351
1352                 if (CARP_IS_RUNNING(ifp) && sc->sc_state == MASTER &&
1353                     !bcmp(dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN))
1354                         return ifp;
1355         }
1356         return NULL;
1357 }
1358
1359 static void
1360 carp_master_down_timeout(void *xsc)
1361 {
1362         struct carp_softc *sc = xsc;
1363
1364         CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n",
1365                    sc->sc_if.if_xname);
1366         carp_gettok();
1367         carp_master_down(sc);
1368         carp_reltok();
1369 }
1370
1371 static void
1372 carp_master_down(struct carp_softc *sc)
1373 {
1374         switch (sc->sc_state) {
1375         case INIT:
1376                 kprintf("%s: master_down event in INIT state\n",
1377                         sc->sc_if.if_xname);
1378                 break;
1379
1380         case MASTER:
1381                 break;
1382
1383         case BACKUP:
1384                 carp_set_state(sc, MASTER);
1385                 carp_send_ad(sc);
1386                 carp_send_arp(sc);
1387 #ifdef INET6
1388                 carp_send_na(sc);
1389 #endif /* INET6 */
1390                 carp_setrun(sc, 0);
1391                 carp_setroute(sc, RTM_ADD);
1392                 break;
1393         }
1394 }
1395
1396 /*
1397  * When in backup state, af indicates whether to reset the master down timer
1398  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1399  */
1400 static void
1401 carp_setrun(struct carp_softc *sc, sa_family_t af)
1402 {
1403         struct ifnet *cifp = &sc->sc_if;
1404         struct timeval tv;
1405
1406         if (sc->sc_carpdev == NULL) {
1407                 carp_set_state(sc, INIT);
1408                 return;
1409         }
1410
1411         if ((cifp->if_flags & IFF_RUNNING) && sc->sc_vhid > 0 &&
1412             (sc->sc_naddrs || sc->sc_naddrs6)) {
1413                 /* Nothing */
1414         } else {
1415                 carp_setroute(sc, RTM_DELETE);
1416                 return;
1417         }
1418
1419         switch (sc->sc_state) {
1420         case INIT:
1421                 if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) {
1422                         carp_send_ad(sc);
1423                         carp_send_arp(sc);
1424 #ifdef INET6
1425                         carp_send_na(sc);
1426 #endif /* INET6 */
1427                         CARP_DEBUG("%s: INIT -> MASTER (preempting)\n",
1428                                    cifp->if_xname);
1429                         carp_set_state(sc, MASTER);
1430                         carp_setroute(sc, RTM_ADD);
1431                 } else {
1432                         CARP_DEBUG("%s: INIT -> BACKUP\n", cifp->if_xname);
1433                         carp_set_state(sc, BACKUP);
1434                         carp_setroute(sc, RTM_DELETE);
1435                         carp_setrun(sc, 0);
1436                 }
1437                 break;
1438
1439         case BACKUP:
1440                 callout_stop(&sc->sc_ad_tmo);
1441                 tv.tv_sec = 3 * sc->sc_advbase;
1442                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1443                 switch (af) {
1444 #ifdef INET
1445                 case AF_INET:
1446                         callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1447                             carp_master_down_timeout, sc);
1448                         break;
1449 #endif /* INET */
1450 #ifdef INET6
1451                 case AF_INET6:
1452                         callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1453                             carp_master_down_timeout, sc);
1454                         break;
1455 #endif /* INET6 */
1456                 default:
1457                         if (sc->sc_naddrs)
1458                                 callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1459                                     carp_master_down_timeout, sc);
1460                         if (sc->sc_naddrs6)
1461                                 callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1462                                     carp_master_down_timeout, sc);
1463                         break;
1464                 }
1465                 break;
1466
1467         case MASTER:
1468                 tv.tv_sec = sc->sc_advbase;
1469                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1470                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1471                     carp_send_ad_timeout, sc);
1472                 break;
1473         }
1474 }
1475
1476 static void
1477 carp_multicast_cleanup(struct carp_softc *sc)
1478 {
1479         struct ip_moptions *imo = &sc->sc_imo;
1480
1481         if (imo->imo_num_memberships == 0)
1482                 return;
1483         KKASSERT(imo->imo_num_memberships == 1);
1484
1485         in_delmulti(imo->imo_membership[0]);
1486         imo->imo_membership[0] = NULL;
1487         imo->imo_num_memberships = 0;
1488         imo->imo_multicast_ifp = NULL;
1489 }
1490
1491 #ifdef INET6
1492 static void
1493 carp_multicast6_cleanup(struct carp_softc *sc)
1494 {
1495         struct ip6_moptions *im6o = &sc->sc_im6o;
1496
1497         while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1498                 struct in6_multi_mship *imm =
1499                     LIST_FIRST(&im6o->im6o_memberships);
1500
1501                 LIST_REMOVE(imm, i6mm_chain);
1502                 in6_leavegroup(imm);
1503         }
1504         im6o->im6o_multicast_ifp = NULL;
1505 }
1506 #endif
1507
1508 static int
1509 carp_get_vhaddr(struct carp_softc *sc, struct ifdrv *ifd)
1510 {
1511         const struct carp_vhaddr *vha;
1512         struct ifcarpvhaddr *carpa, *carpa0;
1513         int count, len, error;
1514
1515         count = 0;
1516         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
1517                 ++count;
1518
1519         if (ifd->ifd_len == 0) {
1520                 ifd->ifd_len = count * sizeof(*carpa);
1521                 return 0;
1522         } else if (count == 0 || ifd->ifd_len < sizeof(*carpa)) {
1523                 ifd->ifd_len = 0;
1524                 return 0;
1525         }
1526         len = min(ifd->ifd_len, sizeof(*carpa) * count);
1527         KKASSERT(len >= sizeof(*carpa));
1528
1529         carpa0 = carpa = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO);
1530         if (carpa == NULL)
1531                 return ENOMEM;
1532
1533         count = 0;
1534         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1535                 if (len < sizeof(*carpa))
1536                         break;
1537
1538                 carpa->carpa_flags = vha->vha_flags;
1539                 carpa->carpa_addr.sin_family = AF_INET;
1540                 carpa->carpa_addr.sin_addr = vha->vha_ia->ia_addr.sin_addr;
1541
1542                 carpa->carpa_baddr.sin_family = AF_INET;
1543                 if (vha->vha_iaback == NULL) {
1544                         carpa->carpa_baddr.sin_addr.s_addr = INADDR_ANY;
1545                 } else {
1546                         carpa->carpa_baddr.sin_addr =
1547                         vha->vha_iaback->ia_addr.sin_addr;
1548                 }
1549
1550                 ++carpa;
1551                 ++count;
1552                 len -= sizeof(*carpa);
1553         }
1554         ifd->ifd_len = sizeof(*carpa) * count;
1555         KKASSERT(ifd->ifd_len > 0);
1556
1557         error = copyout(carpa0, ifd->ifd_data, ifd->ifd_len);
1558         kfree(carpa0, M_TEMP);
1559         return error;
1560 }
1561
1562 static int
1563 carp_config_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
1564     struct in_ifaddr *ia_del)
1565 {
1566         struct ifnet *ifp;
1567         struct in_ifaddr *ia_if;
1568         struct in_ifaddr_container *iac;
1569         const struct sockaddr_in *sin;
1570         u_long iaddr;
1571         int own;
1572
1573         KKASSERT(vha->vha_ia != NULL);
1574
1575         sin = &vha->vha_ia->ia_addr;
1576         iaddr = ntohl(sin->sin_addr.s_addr);
1577
1578         ia_if = NULL;
1579         own = 0;
1580         TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
1581                 struct in_ifaddr *ia = iac->ia;
1582
1583                 if (ia == ia_del)
1584                         continue;
1585
1586                 if (ia->ia_ifp->if_type == IFT_CARP)
1587                         continue;
1588
1589                 if ((ia->ia_ifp->if_flags & IFF_UP) == 0)
1590                         continue;
1591
1592                 /* and, yeah, we need a multicast-capable iface too */
1593                 if ((ia->ia_ifp->if_flags & IFF_MULTICAST) == 0)
1594                         continue;
1595
1596                 if ((iaddr & ia->ia_subnetmask) == ia->ia_subnet) {
1597                         if (sin->sin_addr.s_addr ==
1598                             ia->ia_addr.sin_addr.s_addr)
1599                                 own = 1;
1600                         if (ia_if == NULL)
1601                                 ia_if = ia;
1602                         else if (sc->sc_carpdev != NULL &&
1603                                  sc->sc_carpdev == ia->ia_ifp)
1604                                 ia_if = ia;
1605                 }
1606         }
1607
1608         carp_deactivate_vhaddr(sc, vha, FALSE);
1609         if (!ia_if)
1610                 return ENOENT;
1611
1612         ifp = ia_if->ia_ifp;
1613
1614         /* XXX Don't allow parent iface to be changed */
1615         if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp)
1616                 return EEXIST;
1617
1618         return carp_activate_vhaddr(sc, vha, ifp, ia_if, own);
1619 }
1620
1621 static void
1622 carp_add_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
1623 {
1624         struct carp_vhaddr *vha_new;
1625         struct in_ifaddr *carp_ia;
1626 #ifdef INVARIANTS
1627         struct carp_vhaddr *vha;
1628 #endif
1629
1630         KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
1631         carp_ia = ifatoia(carp_ifa);
1632
1633 #ifdef INVARIANTS
1634         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
1635                 KKASSERT(vha->vha_ia != NULL && vha->vha_ia != carp_ia);
1636 #endif
1637
1638         vha_new = kmalloc(sizeof(*vha_new), M_CARP, M_WAITOK | M_ZERO);
1639         vha_new->vha_ia = carp_ia;
1640         carp_insert_vhaddr(sc, vha_new);
1641
1642         if (carp_config_vhaddr(sc, vha_new, NULL) != 0) {
1643                 /*
1644                  * If the above configuration fails, it may only mean
1645                  * that the new address is problematic.  However, the
1646                  * carp(4) interface may already have several working
1647                  * addresses.  Since the expected behaviour of
1648                  * SIOC[AS]IFADDR is to put the NIC into working state,
1649                  * we try starting the state machine manually here with
1650                  * the hope that the carp(4)'s previously working
1651                  * addresses still could be brought up.
1652                  */
1653                 carp_hmac_prepare(sc);
1654                 carp_set_state(sc, INIT);
1655                 carp_setrun(sc, 0);
1656         }
1657 }
1658
1659 static void
1660 carp_del_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
1661 {
1662         struct carp_vhaddr *vha;
1663         struct in_ifaddr *carp_ia;
1664
1665         KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
1666         carp_ia = ifatoia(carp_ifa);
1667
1668         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1669                 KKASSERT(vha->vha_ia != NULL);
1670                 if (vha->vha_ia == carp_ia)
1671                         break;
1672         }
1673         KASSERT(vha != NULL, ("no corresponding vhaddr %p\n", carp_ifa));
1674
1675         /*
1676          * Remove the vhaddr from the list before deactivating
1677          * the vhaddr, so that the HMAC could be correctly
1678          * updated in carp_deactivate_vhaddr()
1679          */
1680         carp_remove_vhaddr(sc, vha);
1681
1682         carp_deactivate_vhaddr(sc, vha, FALSE);
1683         kfree(vha, M_CARP);
1684 }
1685
1686 static void
1687 carp_config_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
1688 {
1689         struct carp_vhaddr *vha;
1690         struct in_ifaddr *carp_ia;
1691
1692         KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
1693         carp_ia = ifatoia(carp_ifa);
1694
1695         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1696                 KKASSERT(vha->vha_ia != NULL);
1697                 if (vha->vha_ia == carp_ia)
1698                         break;
1699         }
1700         KASSERT(vha != NULL, ("no corresponding vhaddr %p\n", carp_ifa));
1701
1702         /* Remove then reinsert, to keep the vhaddr list sorted */
1703         carp_remove_vhaddr(sc, vha);
1704         carp_insert_vhaddr(sc, vha);
1705
1706         if (carp_config_vhaddr(sc, vha, NULL) != 0) {
1707                 /* See the comment in carp_add_addr() */
1708                 carp_hmac_prepare(sc);
1709                 carp_set_state(sc, INIT);
1710                 carp_setrun(sc, 0);
1711         }
1712 }
1713
1714 #ifdef notyet
1715
1716 #ifdef INET6
1717 static int
1718 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
1719 {
1720         struct ifnet *ifp;
1721         struct carp_if *cif;
1722         struct in6_ifaddr *ia, *ia_if;
1723         struct ip6_moptions *im6o = &sc->sc_im6o;
1724         struct in6_multi_mship *imm;
1725         struct in6_addr in6;
1726         int own, error;
1727
1728         if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
1729                 carp_setrun(sc, 0);
1730                 return (0);
1731         }
1732
1733         /* we have to do it by hands to check we won't match on us */
1734         ia_if = NULL; own = 0;
1735         for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
1736                 int i;
1737
1738                 for (i = 0; i < 4; i++) {
1739                         if ((sin6->sin6_addr.s6_addr32[i] &
1740                             ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
1741                             (ia->ia_addr.sin6_addr.s6_addr32[i] &
1742                             ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
1743                                 break;
1744                 }
1745                 /* and, yeah, we need a multicast-capable iface too */
1746                 if (ia->ia_ifp != &sc->sc_if &&
1747                     (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
1748                     (i == 4)) {
1749                         if (!ia_if)
1750                                 ia_if = ia;
1751                         if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
1752                             &ia->ia_addr.sin6_addr))
1753                                 own++;
1754                 }
1755         }
1756
1757         if (!ia_if)
1758                 return (EADDRNOTAVAIL);
1759         ia = ia_if;
1760         ifp = ia->ia_ifp;
1761
1762         if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
1763             (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp))
1764                 return (EADDRNOTAVAIL);
1765
1766         if (!sc->sc_naddrs6) {
1767                 im6o->im6o_multicast_ifp = ifp;
1768
1769                 /* join CARP multicast address */
1770                 bzero(&in6, sizeof(in6));
1771                 in6.s6_addr16[0] = htons(0xff02);
1772                 in6.s6_addr8[15] = 0x12;
1773                 if (in6_setscope(&in6, ifp, NULL) != 0)
1774                         goto cleanup;
1775                 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
1776                         goto cleanup;
1777                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
1778
1779                 /* join solicited multicast address */
1780                 bzero(&in6, sizeof(in6));
1781                 in6.s6_addr16[0] = htons(0xff02);
1782                 in6.s6_addr32[1] = 0;
1783                 in6.s6_addr32[2] = htonl(1);
1784                 in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3];
1785                 in6.s6_addr8[12] = 0xff;
1786                 if (in6_setscope(&in6, ifp, NULL) != 0)
1787                         goto cleanup;
1788                 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
1789                         goto cleanup;
1790                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
1791         }
1792
1793         if (!ifp->if_carp) {
1794                 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
1795
1796                 if ((error = ifpromisc(ifp, 1))) {
1797                         kfree(cif, M_CARP);
1798                         goto cleanup;
1799                 }
1800
1801                 TAILQ_INIT(&cif->vhif_vrs);
1802                 ifp->if_carp = cif;
1803         } else {
1804                 struct carp_softc *vr;
1805
1806                 cif = ifp->if_carp;
1807                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1808                         if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
1809                                 error = EINVAL;
1810                                 goto cleanup;
1811                         }
1812                 }
1813         }
1814         sc->sc_ia6 = ia;
1815         sc->sc_carpdev = ifp;
1816
1817         { /* XXX prevent endless loop if already in queue */
1818         struct carp_softc *vr, *after = NULL;
1819         int myself = 0;
1820         cif = ifp->if_carp;
1821
1822         TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1823                 if (vr == sc)
1824                         myself = 1;
1825                 if (vr->sc_vhid < sc->sc_vhid)
1826                         after = vr;
1827         }
1828
1829         if (!myself) {
1830                 /* We're trying to keep things in order */
1831                 if (after == NULL)
1832                         TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
1833                 else
1834                         TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
1835         }
1836         }
1837
1838         sc->sc_naddrs6++;
1839         if (own)
1840                 sc->sc_advskew = 0;
1841         carp_sc_state(sc);
1842         carp_setrun(sc, 0);
1843
1844         return (0);
1845
1846 cleanup:
1847         /* clean up multicast memberships */
1848         if (!sc->sc_naddrs6) {
1849                 while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1850                         imm = LIST_FIRST(&im6o->im6o_memberships);
1851                         LIST_REMOVE(imm, i6mm_chain);
1852                         in6_leavegroup(imm);
1853                 }
1854         }
1855         return (error);
1856 }
1857
1858 static int
1859 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
1860 {
1861         int error = 0;
1862
1863         if (!--sc->sc_naddrs6) {
1864                 struct carp_if *cif = sc->sc_carpdev->if_carp;
1865                 struct ip6_moptions *im6o = &sc->sc_im6o;
1866
1867                 callout_stop(&sc->sc_ad_tmo);
1868                 sc->sc_vhid = -1;
1869                 while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1870                         struct in6_multi_mship *imm =
1871                             LIST_FIRST(&im6o->im6o_memberships);
1872
1873                         LIST_REMOVE(imm, i6mm_chain);
1874                         in6_leavegroup(imm);
1875                 }
1876                 im6o->im6o_multicast_ifp = NULL;
1877                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
1878                 if (TAILQ_EMPTY(&cif->vhif_vrs)) {
1879                         sc->sc_carpdev->if_carp = NULL;
1880                         kfree(cif, M_IFADDR);
1881                 }
1882         }
1883         return (error);
1884 }
1885 #endif /* INET6 */
1886
1887 #endif
1888
1889 static int
1890 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr)
1891 {
1892         struct carp_softc *sc = ifp->if_softc, *vr;
1893         struct carpreq carpr;
1894         struct ifaddr *ifa;
1895         struct ifreq *ifr;
1896         struct ifaliasreq *ifra;
1897         struct ifdrv *ifd;
1898         char devname[IFNAMSIZ];
1899         int error = 0;
1900
1901         ASSERT_IFNET_SERIALIZED_ALL(ifp);
1902
1903         carp_gettok();
1904
1905         ifa = (struct ifaddr *)addr;
1906         ifra = (struct ifaliasreq *)addr;
1907         ifr = (struct ifreq *)addr;
1908         ifd = (struct ifdrv *)addr;
1909
1910         switch (cmd) {
1911         case SIOCSIFFLAGS:
1912                 if (ifp->if_flags & IFF_UP) {
1913                         if ((ifp->if_flags & IFF_RUNNING) == 0)
1914                                 carp_init(sc);
1915                 } else if (ifp->if_flags & IFF_RUNNING) {
1916                         carp_ioctl_stop(sc);
1917                 }
1918                 break;
1919
1920         case SIOCSVH:
1921                 error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY);
1922                 if (error)
1923                         break;
1924                 error = copyin(ifr->ifr_data, &carpr, sizeof(carpr));
1925                 if (error)
1926                         break;
1927
1928                 error = 1;
1929                 if ((ifp->if_flags & IFF_RUNNING) &&
1930                     sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) {
1931                         switch (carpr.carpr_state) {
1932                         case BACKUP:
1933                                 callout_stop(&sc->sc_ad_tmo);
1934                                 carp_set_state(sc, BACKUP);
1935                                 carp_setrun(sc, 0);
1936                                 carp_setroute(sc, RTM_DELETE);
1937                                 break;
1938
1939                         case MASTER:
1940                                 carp_master_down(sc);
1941                                 break;
1942
1943                         default:
1944                                 break;
1945                         }
1946                 }
1947                 if (carpr.carpr_vhid > 0) {
1948                         if (carpr.carpr_vhid > 255) {
1949                                 error = EINVAL;
1950                                 break;
1951                         }
1952                         if (sc->sc_carpdev) {
1953                                 struct carp_if *cif = sc->sc_carpdev->if_carp;
1954
1955                                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1956                                         if (vr != sc &&
1957                                             vr->sc_vhid == carpr.carpr_vhid) {
1958                                                 carp_reltok();
1959                                                 return EEXIST;
1960                                         }
1961                                 }
1962                         }
1963                         sc->sc_vhid = carpr.carpr_vhid;
1964
1965                         IF_LLADDR(ifp)[5] = sc->sc_vhid;
1966                         bcopy(IF_LLADDR(ifp), sc->arpcom.ac_enaddr,
1967                             ETHER_ADDR_LEN);
1968
1969                         error--;
1970                 }
1971                 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) {
1972                         if (carpr.carpr_advskew >= 255) {
1973                                 error = EINVAL;
1974                                 break;
1975                         }
1976                         if (carpr.carpr_advbase > 255) {
1977                                 error = EINVAL;
1978                                 break;
1979                         }
1980                         sc->sc_advbase = carpr.carpr_advbase;
1981                         sc->sc_advskew = carpr.carpr_advskew;
1982                         error--;
1983                 }
1984                 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
1985                 if (error > 0) {
1986                         error = EINVAL;
1987                 } else {
1988                         error = 0;
1989                         carp_setrun(sc, 0);
1990                 }
1991                 break;
1992
1993         case SIOCGVH:
1994                 bzero(&carpr, sizeof(carpr));
1995                 carpr.carpr_state = sc->sc_state;
1996                 carpr.carpr_vhid = sc->sc_vhid;
1997                 carpr.carpr_advbase = sc->sc_advbase;
1998                 carpr.carpr_advskew = sc->sc_advskew;
1999                 error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY);
2000                 if (error == 0) {
2001                         bcopy(sc->sc_key, carpr.carpr_key,
2002                               sizeof(carpr.carpr_key));
2003                 }
2004
2005                 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
2006                 break;
2007
2008         case SIOCGDRVSPEC:
2009                 switch (ifd->ifd_cmd) {
2010                 case CARPGDEVNAME:
2011                         if (ifd->ifd_len != sizeof(devname))
2012                                 error = EINVAL;
2013                         break;
2014
2015                 case CARPGVHADDR:
2016                         break;
2017
2018                 default:
2019                         error = EINVAL;
2020                         break;
2021                 }
2022                 if (error)
2023                         break;
2024
2025                 switch (ifd->ifd_cmd) {
2026                 case CARPGVHADDR:
2027                         error = carp_get_vhaddr(sc, ifd);
2028                         break;
2029
2030                 case CARPGDEVNAME:
2031                         bzero(devname, sizeof(devname));
2032                         if (sc->sc_carpdev != NULL) {
2033                                 strlcpy(devname, sc->sc_carpdev->if_xname,
2034                                         sizeof(devname));
2035                         }
2036                         error = copyout(devname, ifd->ifd_data,
2037                                         sizeof(devname));
2038                         break;
2039                 }
2040                 break;
2041
2042         default:
2043                 error = ether_ioctl(ifp, cmd, addr);
2044                 break;
2045         }
2046         carp_hmac_prepare(sc);
2047
2048         carp_reltok();
2049         return error;
2050 }
2051
2052 static void
2053 carp_ioctl_stop_dispatch(netmsg_t msg)
2054 {
2055         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2056         struct carp_softc *sc = cmsg->nc_softc;
2057
2058         carp_gettok();
2059         carp_stop(sc, 0);
2060         carp_reltok();
2061
2062         lwkt_replymsg(&cmsg->base.lmsg, 0);
2063 }
2064
2065 static void
2066 carp_ioctl_stop(struct carp_softc *sc)
2067 {
2068         struct ifnet *ifp = &sc->arpcom.ac_if;
2069         struct netmsg_carp cmsg;
2070
2071         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2072
2073         ifnet_deserialize_all(ifp);
2074
2075         bzero(&cmsg, sizeof(cmsg));
2076         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2077             carp_ioctl_stop_dispatch);
2078         cmsg.nc_softc = sc;
2079
2080         lwkt_domsg(cpu_portfn(0), &cmsg.base.lmsg, 0);
2081
2082         ifnet_serialize_all(ifp);
2083 }
2084
2085 static void
2086 carp_init_dispatch(netmsg_t msg)
2087 {
2088         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2089         struct carp_softc *sc = cmsg->nc_softc;
2090
2091         carp_gettok();
2092
2093         sc->sc_if.if_flags |= IFF_RUNNING;
2094         carp_set_state(sc, INIT);
2095         carp_setrun(sc, 0);
2096
2097         carp_reltok();
2098
2099         lwkt_replymsg(&cmsg->base.lmsg, 0);
2100 }
2101
2102 static void
2103 carp_init(void *xsc)
2104 {
2105         struct carp_softc *sc = xsc;
2106         struct ifnet *ifp = &sc->arpcom.ac_if;
2107         struct netmsg_carp cmsg;
2108
2109         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2110
2111         ifnet_deserialize_all(ifp);
2112
2113         bzero(&cmsg, sizeof(cmsg));
2114         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2115             carp_init_dispatch);
2116         cmsg.nc_softc = sc;
2117
2118         lwkt_domsg(cpu_portfn(0), &cmsg.base.lmsg, 0);
2119
2120         ifnet_serialize_all(ifp);
2121 }
2122
2123 static int
2124 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
2125     struct rtentry *rt)
2126 {
2127         struct carp_softc *sc = ifp->if_softc;
2128         int error = 0;
2129
2130         carp_gettok();
2131         if (sc->sc_carpdev) {
2132                 /*
2133                  * NOTE:
2134                  * CARP's ifp is passed to backing device's
2135                  * if_output method.
2136                  */
2137                 sc->sc_carpdev->if_output(ifp, m, dst, rt);
2138         } else {
2139                 m_freem(m);
2140                 error = ENETUNREACH;
2141         }
2142         carp_reltok();
2143
2144         return error;
2145 }
2146
2147 /*
2148  * Start output on carp interface. This function should never be called.
2149  */
2150 static void
2151 carp_start(struct ifnet *ifp)
2152 {
2153         panic("%s: start called\n", ifp->if_xname);
2154 }
2155
2156 static void
2157 carp_serialize(struct ifnet *ifp __unused,
2158     enum ifnet_serialize slz __unused)
2159 {
2160 }
2161
2162 static void
2163 carp_deserialize(struct ifnet *ifp __unused,
2164     enum ifnet_serialize slz __unused)
2165 {
2166 }
2167
2168 static int
2169 carp_tryserialize(struct ifnet *ifp __unused,
2170     enum ifnet_serialize slz __unused)
2171 {
2172         return 1;
2173 }
2174
2175 #ifdef INVARIANTS
2176
2177 static void
2178 carp_serialize_assert(struct ifnet *ifp __unused,
2179     enum ifnet_serialize slz __unused, boolean_t serialized __unused)
2180 {
2181 }
2182
2183 #endif  /* INVARIANTS */
2184
2185 static void
2186 carp_set_state(struct carp_softc *sc, int state)
2187 {
2188         struct ifnet *cifp = &sc->sc_if;
2189
2190         if (sc->sc_state == state)
2191                 return;
2192         sc->sc_state = state;
2193
2194         switch (sc->sc_state) {
2195         case BACKUP:
2196                 cifp->if_link_state = LINK_STATE_DOWN;
2197                 break;
2198
2199         case MASTER:
2200                 cifp->if_link_state = LINK_STATE_UP;
2201                 break;
2202
2203         default:
2204                 cifp->if_link_state = LINK_STATE_UNKNOWN;
2205                 break;
2206         }
2207         rt_ifmsg(cifp);
2208 }
2209
2210 void
2211 carp_group_demote_adj(struct ifnet *ifp, int adj)
2212 {
2213         struct ifg_list *ifgl;
2214         int *dm;
2215
2216         carp_gettok();
2217
2218         TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2219                 if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL))
2220                         continue;
2221                 dm = &ifgl->ifgl_group->ifg_carp_demoted;
2222
2223                 if (*dm + adj >= 0)
2224                         *dm += adj;
2225                 else
2226                         *dm = 0;
2227
2228                 if (adj > 0 && *dm == 1)
2229                         carp_send_ad_all();
2230                 CARP_LOG("%s demoted group %s to %d", ifp->if_xname,
2231                     ifgl->ifgl_group->ifg_group, *dm);
2232         }
2233
2234         carp_reltok();
2235 }
2236
2237 void
2238 carp_carpdev_state(void *v)
2239 {
2240         struct carp_if *cif = v;
2241         struct carp_softc *sc;
2242
2243         carp_gettok();
2244
2245         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list)
2246                 carp_sc_state(sc);
2247
2248         carp_reltok();
2249 }
2250
2251 static void
2252 carp_sc_state(struct carp_softc *sc)
2253 {
2254         if (!(sc->sc_carpdev->if_flags & IFF_UP)) {
2255                 callout_stop(&sc->sc_ad_tmo);
2256                 callout_stop(&sc->sc_md_tmo);
2257                 callout_stop(&sc->sc_md6_tmo);
2258                 carp_set_state(sc, INIT);
2259                 carp_setrun(sc, 0);
2260                 if (!sc->sc_suppress) {
2261                         carp_suppress_preempt++;
2262                         if (carp_suppress_preempt == 1)
2263                                 carp_send_ad_all();
2264                 }
2265                 sc->sc_suppress = 1;
2266         } else {
2267                 carp_set_state(sc, INIT);
2268                 carp_setrun(sc, 0);
2269                 if (sc->sc_suppress)
2270                         carp_suppress_preempt--;
2271                 sc->sc_suppress = 0;
2272         }
2273 }
2274
2275 static void
2276 carp_stop(struct carp_softc *sc, int detach)
2277 {
2278         sc->sc_if.if_flags &= ~IFF_RUNNING;
2279
2280         callout_stop(&sc->sc_ad_tmo);
2281         callout_stop(&sc->sc_md_tmo);
2282         callout_stop(&sc->sc_md6_tmo);
2283
2284         if (!detach && sc->sc_state == MASTER)
2285                 carp_send_ad(sc);
2286
2287         if (sc->sc_suppress)
2288                 carp_suppress_preempt--;
2289         sc->sc_suppress = 0;
2290
2291         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
2292                 carp_suppress_preempt--;
2293         sc->sc_sendad_errors = 0;
2294         sc->sc_sendad_success = 0;
2295
2296         carp_set_state(sc, INIT);
2297         carp_setrun(sc, 0);
2298 }
2299
2300 static void
2301 carp_suspend(struct carp_softc *sc, int detach)
2302 {
2303         struct ifnet *cifp = &sc->sc_if;
2304
2305         carp_stop(sc, detach);
2306
2307         /* Retain the running state, if we are not dead yet */
2308         if (!sc->sc_dead && (cifp->if_flags & IFF_UP))
2309                 cifp->if_flags |= IFF_RUNNING;
2310 }
2311
2312 static int
2313 carp_activate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
2314     struct ifnet *ifp, struct in_ifaddr *ia_if, int own)
2315 {
2316         struct ip_moptions *imo = &sc->sc_imo;
2317         struct carp_if *cif;
2318         struct carp_softc *vr, *after = NULL;
2319         int onlist, error;
2320 #ifdef INVARIANTS
2321         int assert_onlist;
2322 #endif
2323
2324         KKASSERT(vha->vha_ia != NULL);
2325
2326         KASSERT(ia_if != NULL, ("NULL backing address\n"));
2327         KASSERT(vha->vha_iaback == NULL, ("%p is already activated\n", vha));
2328         KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0,
2329                 ("inactive vhaddr %p is the address owner\n", vha));
2330
2331         KASSERT(sc->sc_carpdev == NULL || sc->sc_carpdev == ifp,
2332                 ("%s is already on %s\n", sc->sc_if.if_xname,
2333                  sc->sc_carpdev->if_xname));
2334
2335         if (!ifp->if_carp) {
2336                 KASSERT(sc->sc_carpdev == NULL,
2337                         ("%s is already on %s\n", sc->sc_if.if_xname,
2338                          sc->sc_carpdev->if_xname));
2339
2340                 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
2341
2342                 error = ifpromisc(ifp, 1);
2343                 if (error) {
2344                         kfree(cif, M_CARP);
2345                         return error;
2346                 }
2347
2348                 TAILQ_INIT(&cif->vhif_vrs);
2349                 ifp->if_carp = cif;
2350         } else {
2351                 cif = ifp->if_carp;
2352                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
2353                         if (vr != sc && vr->sc_vhid == sc->sc_vhid)
2354                                 return EINVAL;
2355                 }
2356         }
2357
2358 #ifdef INVARIANTS
2359         if (sc->sc_carpdev != NULL)
2360                 assert_onlist = 1;
2361         else
2362                 assert_onlist = 0;
2363 #endif
2364         sc->sc_ia = ia_if;
2365         sc->sc_carpdev = ifp;
2366
2367         cif = ifp->if_carp;
2368         onlist = 0;
2369         TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
2370                 if (vr == sc)
2371                         onlist = 1;
2372                 if (vr->sc_vhid < sc->sc_vhid)
2373                         after = vr;
2374         }
2375
2376 #ifdef INVARIANTS
2377         if (assert_onlist) {
2378                 KASSERT(onlist, ("%s is not on %s carp list\n",
2379                         sc->sc_if.if_xname, ifp->if_xname));
2380         } else {
2381                 KASSERT(!onlist, ("%s is already on %s carp list\n",
2382                         sc->sc_if.if_xname, ifp->if_xname));
2383         }
2384 #endif
2385
2386         if (!onlist) {
2387                 /* We're trying to keep things in order */
2388                 if (after == NULL)
2389                         TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
2390                 else
2391                         TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
2392         }
2393
2394         vha->vha_iaback = ia_if;
2395         sc->sc_naddrs++;
2396
2397         if (own) {
2398                 vha->vha_flags |= CARP_VHAF_OWNER;
2399
2400                 /* XXX save user configured advskew? */
2401                 sc->sc_advskew = 0;
2402         }
2403
2404         carp_addroute_vhaddr(sc, vha);
2405
2406         /*
2407          * Join the multicast group only after the backing interface
2408          * has been hooked with the CARP interface.
2409          */
2410         KASSERT(imo->imo_multicast_ifp == NULL ||
2411                 imo->imo_multicast_ifp == &sc->sc_if,
2412                 ("%s didn't leave mcast group on %s\n",
2413                  sc->sc_if.if_xname, imo->imo_multicast_ifp->if_xname));
2414
2415         if (imo->imo_num_memberships == 0) {
2416                 struct in_addr addr;
2417
2418                 addr.s_addr = htonl(INADDR_CARP_GROUP);
2419                 imo->imo_membership[0] = in_addmulti(&addr, &sc->sc_if);
2420                 if (imo->imo_membership[0] == NULL) {
2421                         carp_deactivate_vhaddr(sc, vha, FALSE);
2422                         return ENOBUFS;
2423                 }
2424
2425                 imo->imo_num_memberships++;
2426                 imo->imo_multicast_ifp = &sc->sc_if;
2427                 imo->imo_multicast_ttl = CARP_DFLTTL;
2428                 imo->imo_multicast_loop = 0;
2429         }
2430
2431         carp_hmac_prepare(sc);
2432         carp_set_state(sc, INIT);
2433         carp_setrun(sc, 0);
2434         return 0;
2435 }
2436
2437 static void
2438 carp_deactivate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
2439     boolean_t del_iaback)
2440 {
2441         KKASSERT(vha->vha_ia != NULL);
2442
2443         carp_hmac_prepare(sc);
2444
2445         if (vha->vha_iaback == NULL) {
2446                 KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0,
2447                         ("inactive vhaddr %p is the address owner\n", vha));
2448                 return;
2449         }
2450
2451         vha->vha_flags &= ~CARP_VHAF_OWNER;
2452         carp_delroute_vhaddr(sc, vha, del_iaback);
2453
2454         KKASSERT(sc->sc_naddrs > 0);
2455         vha->vha_iaback = NULL;
2456         sc->sc_naddrs--;
2457         if (!sc->sc_naddrs) {
2458                 if (sc->sc_naddrs6) {
2459                         carp_multicast_cleanup(sc);
2460                         sc->sc_ia = NULL;
2461                 } else {
2462                         carp_detach(sc, 0, del_iaback);
2463                 }
2464         }
2465 }
2466
2467 static void
2468 carp_link_addrs(struct carp_softc *sc, struct ifnet *ifp, struct ifaddr *ifa_if)
2469 {
2470         struct carp_vhaddr *vha;
2471         struct in_ifaddr *ia_if;
2472
2473         KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET);
2474         ia_if = ifatoia(ifa_if);
2475
2476         /*
2477          * Test each inactive vhaddr against the newly added address.
2478          * If the newly added address could be the backing address,
2479          * then activate the matching vhaddr.
2480          */
2481         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
2482                 const struct in_ifaddr *ia;
2483                 u_long iaddr;
2484                 int own;
2485
2486                 if (vha->vha_iaback != NULL)
2487                         continue;
2488
2489                 ia = vha->vha_ia;
2490                 iaddr = ntohl(ia->ia_addr.sin_addr.s_addr);
2491
2492                 if ((iaddr & ia_if->ia_subnetmask) != ia_if->ia_subnet)
2493                         continue;
2494
2495                 own = 0;
2496                 if (ia->ia_addr.sin_addr.s_addr ==
2497                     ia_if->ia_addr.sin_addr.s_addr)
2498                         own = 1;
2499
2500                 carp_activate_vhaddr(sc, vha, ifp, ia_if, own);
2501         }
2502 }
2503
2504 static void
2505 carp_unlink_addrs(struct carp_softc *sc, struct ifnet *ifp,
2506                   struct ifaddr *ifa_if)
2507 {
2508         struct carp_vhaddr *vha;
2509         struct in_ifaddr *ia_if;
2510
2511         KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET);
2512         ia_if = ifatoia(ifa_if);
2513
2514         /*
2515          * Ad src address is deleted; set it to NULL.
2516          * Following loop will try pick up a new ad src address
2517          * if one of the vhaddr could retain its backing address.
2518          */
2519         if (sc->sc_ia == ia_if)
2520                 sc->sc_ia = NULL;
2521
2522         /*
2523          * Test each active vhaddr against the deleted address.
2524          * If the deleted address is vhaddr address's backing
2525          * address, then deactivate the vhaddr.
2526          */
2527         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
2528                 if (vha->vha_iaback == NULL)
2529                         continue;
2530
2531                 if (vha->vha_iaback == ia_if)
2532                         carp_deactivate_vhaddr(sc, vha, TRUE);
2533                 else if (sc->sc_ia == NULL)
2534                         sc->sc_ia = vha->vha_iaback;
2535         }
2536 }
2537
2538 static void
2539 carp_update_addrs(struct carp_softc *sc, struct ifaddr *ifa_del)
2540 {
2541         struct carp_vhaddr *vha;
2542
2543         KKASSERT(sc->sc_carpdev == NULL);
2544
2545         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
2546                 carp_config_vhaddr(sc, vha, ifatoia(ifa_del));
2547 }
2548
2549 static void
2550 carp_ifaddr(void *arg __unused, struct ifnet *ifp,
2551             enum ifaddr_event event, struct ifaddr *ifa)
2552 {
2553         struct carp_softc *sc;
2554
2555         carp_gettok();
2556
2557         if (ifa->ifa_addr->sa_family != AF_INET)
2558                 goto back;
2559
2560         KASSERT(&curthread->td_msgport == cpu_portfn(0),
2561             ("not in netisr0"));
2562
2563         if (ifp->if_type == IFT_CARP) {
2564                 /*
2565                  * Address is changed on carp(4) interface
2566                  */
2567                 switch (event) {
2568                 case IFADDR_EVENT_ADD:
2569                         carp_add_addr(ifp->if_softc, ifa);
2570                         break;
2571
2572                 case IFADDR_EVENT_CHANGE:
2573                         carp_config_addr(ifp->if_softc, ifa);
2574                         break;
2575
2576                 case IFADDR_EVENT_DELETE:
2577                         carp_del_addr(ifp->if_softc, ifa);
2578                         break;
2579                 }
2580                 goto back;
2581         }
2582
2583         /*
2584          * Address is changed on non-carp(4) interface
2585          */
2586         if ((ifp->if_flags & IFF_MULTICAST) == 0)
2587                 goto back;
2588
2589         LIST_FOREACH(sc, &carpif_list, sc_next) {
2590                 if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp) {
2591                         /* Not the parent iface; skip */
2592                         continue;
2593                 }
2594
2595                 switch (event) {
2596                 case IFADDR_EVENT_ADD:
2597                         carp_link_addrs(sc, ifp, ifa);
2598                         break;
2599
2600                 case IFADDR_EVENT_DELETE:
2601                         if (sc->sc_carpdev != NULL) {
2602                                 carp_unlink_addrs(sc, ifp, ifa);
2603                                 if (sc->sc_carpdev == NULL) {
2604                                         /*
2605                                          * We no longer have the parent
2606                                          * interface, however, certain
2607                                          * virtual addresses, which are
2608                                          * not used because they can't
2609                                          * match the previous parent
2610                                          * interface's addresses, may now
2611                                          * match different interface's
2612                                          * addresses.
2613                                          */
2614                                         carp_update_addrs(sc, ifa);
2615                                 }
2616                         } else {
2617                                 /*
2618                                  * The carp(4) interface didn't have a
2619                                  * parent iface, so it is not possible
2620                                  * that it will contain any address to
2621                                  * be unlinked.
2622                                  */
2623                         }
2624                         break;
2625
2626                 case IFADDR_EVENT_CHANGE:
2627                         if (sc->sc_carpdev == NULL) {
2628                                 /*
2629                                  * The carp(4) interface didn't have a
2630                                  * parent iface, so it is not possible
2631                                  * that it will contain any address to
2632                                  * be updated.
2633                                  */
2634                                 carp_link_addrs(sc, ifp, ifa);
2635                         } else {
2636                                 /*
2637                                  * First try breaking tie with the old
2638                                  * address.  Then see whether we could
2639                                  * link certain vhaddr to the new address.
2640                                  * If that fails, i.e. carpdev is NULL,
2641                                  * we try a global update.
2642                                  *
2643                                  * NOTE: The above order is critical.
2644                                  */
2645                                 carp_unlink_addrs(sc, ifp, ifa);
2646                                 carp_link_addrs(sc, ifp, ifa);
2647                                 if (sc->sc_carpdev == NULL) {
2648                                         /*
2649                                          * See the comment in the above
2650                                          * IFADDR_EVENT_DELETE block.
2651                                          */
2652                                         carp_update_addrs(sc, NULL);
2653                                 }
2654                         }
2655                         break;
2656                 }
2657         }
2658
2659 back:
2660         carp_reltok();
2661 }
2662
2663 void
2664 carp_proto_ctlinput(netmsg_t msg)
2665 {
2666         int cmd = msg->ctlinput.nm_cmd;
2667         struct sockaddr *sa = msg->ctlinput.nm_arg;
2668         struct in_ifaddr_container *iac;
2669
2670         carp_gettok();
2671
2672         TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
2673                 struct in_ifaddr *ia = iac->ia;
2674                 struct ifnet *ifp = ia->ia_ifp;
2675
2676                 if (ifp->if_type == IFT_CARP)
2677                         continue;
2678
2679                 if (ia->ia_ifa.ifa_addr == sa) {
2680                         if (cmd == PRC_IFDOWN) {
2681                                 carp_ifaddr(NULL, ifp, IFADDR_EVENT_DELETE,
2682                                     &ia->ia_ifa);
2683                         } else if (cmd == PRC_IFUP) {
2684                                 carp_ifaddr(NULL, ifp, IFADDR_EVENT_ADD,
2685                                     &ia->ia_ifa);
2686                         }
2687                         break;
2688                 }
2689         }
2690
2691         carp_reltok();
2692         lwkt_replymsg(&msg->lmsg, 0);
2693 }
2694
2695 void
2696 carp_gettok(void)
2697 {
2698         lwkt_gettoken(&carp_tok);
2699 }
2700
2701 void
2702 carp_reltok(void)
2703 {
2704         lwkt_reltoken(&carp_tok);
2705 }
2706
2707 struct ifnet *
2708 carp_parent(struct ifnet *cifp)
2709 {
2710         struct carp_softc *sc;
2711
2712         ASSERT_LWKT_TOKEN_HELD(&carp_tok);
2713
2714         KKASSERT(cifp->if_type == IFT_CARP);
2715         sc = cifp->if_softc;
2716
2717         return sc->sc_carpdev;
2718 }
2719
2720 #define rtinitflags(x) \
2721         (((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) \
2722                  ? RTF_HOST : 0)
2723
2724 static int
2725 carp_addroute_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha)
2726 {
2727         struct in_ifaddr *ia, *iaback;
2728         int error;
2729
2730         if (sc->sc_state != MASTER)
2731                 return 0;
2732
2733         ia = vha->vha_ia;
2734         KKASSERT(ia != NULL);
2735
2736         iaback = vha->vha_iaback;
2737         KKASSERT(iaback != NULL);
2738
2739         rtinit(&iaback->ia_ifa, RTM_DELETE, rtinitflags(iaback));
2740         in_ifadown(&iaback->ia_ifa, 1);
2741         iaback->ia_flags &= ~IFA_ROUTE;
2742
2743         error = rtinit(&ia->ia_ifa, RTM_ADD, rtinitflags(ia) | RTF_UP);
2744         if (!error)
2745                 ia->ia_flags |= IFA_ROUTE;
2746         return error;
2747 }
2748
2749 static void
2750 carp_delroute_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
2751     boolean_t del_iaback)
2752 {
2753         struct in_ifaddr *ia, *iaback;
2754
2755         ia = vha->vha_ia;
2756         KKASSERT(ia != NULL);
2757
2758         iaback = vha->vha_iaback;
2759         KKASSERT(iaback != NULL);
2760
2761         rtinit(&ia->ia_ifa, RTM_DELETE, rtinitflags(ia));
2762         in_ifadown(&ia->ia_ifa, 1);
2763         ia->ia_flags &= ~IFA_ROUTE;
2764
2765         if (!del_iaback && (iaback->ia_ifp->if_flags & IFF_UP)) {
2766                 int error;
2767
2768                 error = rtinit(&iaback->ia_ifa, RTM_ADD,
2769                     rtinitflags(iaback) | RTF_UP);
2770                 if (!error)
2771                         iaback->ia_flags |= IFA_ROUTE;
2772         }
2773 }
2774
2775 static int
2776 carp_modevent(module_t mod, int type, void *data)
2777 {
2778         switch (type) {
2779         case MOD_LOAD:
2780                 LIST_INIT(&carpif_list);
2781                 carp_ifdetach_event =
2782                 EVENTHANDLER_REGISTER(ifnet_detach_event, carp_ifdetach, NULL,
2783                                       EVENTHANDLER_PRI_ANY);
2784                 carp_ifaddr_event =
2785                 EVENTHANDLER_REGISTER(ifaddr_event, carp_ifaddr, NULL,
2786                                       EVENTHANDLER_PRI_FIRST);
2787                 if_clone_attach(&carp_cloner);
2788                 break;
2789
2790         case MOD_UNLOAD:
2791                 EVENTHANDLER_DEREGISTER(ifnet_detach_event,
2792                                         carp_ifdetach_event);
2793                 EVENTHANDLER_DEREGISTER(ifaddr_event,
2794                                         carp_ifaddr_event);
2795                 if_clone_detach(&carp_cloner);
2796                 break;
2797
2798         default:
2799                 return (EINVAL);
2800         }
2801         return (0);
2802 }
2803
2804 static moduledata_t carp_mod = {
2805         "carp",
2806         carp_modevent,
2807         0
2808 };
2809 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);