45cf573e46063fff4cbb92987806b29b5b3c3f39
[dragonfly.git] / sys / netinet / ip_carp.c
1 /*
2  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
3  * Copyright (c) 2003 Ryan McBride. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
18  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
24  * THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 /*
27  * $FreeBSD: src/sys/netinet/ip_carp.c,v 1.48 2007/02/02 09:39:09 glebius Exp $
28  */
29
30 #include "opt_carp.h"
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/in_cksum.h>
38 #include <sys/limits.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/msgport2.h>
42 #include <sys/time.h>
43 #include <sys/proc.h>
44 #include <sys/priv.h>
45 #include <sys/sockio.h>
46 #include <sys/socket.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
49 #include <sys/thread.h>
50
51 #include <machine/stdarg.h>
52 #include <crypto/sha1.h>
53
54 #include <net/bpf.h>
55 #include <net/ethernet.h>
56 #include <net/if.h>
57 #include <net/if_dl.h>
58 #include <net/if_types.h>
59 #include <net/route.h>
60 #include <net/if_clone.h>
61 #include <net/if_var.h>
62 #include <net/ifq_var.h>
63 #include <net/netmsg2.h>
64
65 #ifdef INET
66 #include <netinet/in.h>
67 #include <netinet/in_var.h>
68 #include <netinet/in_systm.h>
69 #include <netinet/ip.h>
70 #include <netinet/ip_var.h>
71 #include <netinet/if_ether.h>
72 #endif
73
74 #ifdef INET6
75 #include <netinet/icmp6.h>
76 #include <netinet/ip6.h>
77 #include <netinet6/ip6_var.h>
78 #include <netinet6/scope6_var.h>
79 #include <netinet6/nd6.h>
80 #endif
81
82 #include <netinet/ip_carp.h>
83
84 #define CARP_IFNAME             "carp"
85 #define CARP_IS_RUNNING(ifp)    \
86         (((ifp)->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING))
87
88 struct carp_softc;
89
90 struct carp_vhaddr {
91         uint32_t                vha_flags;      /* CARP_VHAF_ */
92         struct in_ifaddr        *vha_ia;        /* carp address */
93         struct in_ifaddr        *vha_iaback;    /* backing address */
94         TAILQ_ENTRY(carp_vhaddr) vha_link;
95 };
96 TAILQ_HEAD(carp_vhaddr_list, carp_vhaddr);
97
98 struct netmsg_carp {
99         struct netmsg_base      base;
100         struct ifnet            *nc_carpdev;
101         struct carp_softc       *nc_softc;
102         void                    *nc_data;
103         size_t                  nc_datalen;
104 };
105
106 struct carp_softc {
107         struct arpcom            arpcom;
108         struct ifnet            *sc_carpdev;    /* parent interface */
109         struct carp_vhaddr_list  sc_vha_list;   /* virtual addr list */
110
111         const struct in_ifaddr  *sc_ia;         /* primary iface address v4 */
112         struct ip_moptions       sc_imo;
113
114 #ifdef INET6
115         struct in6_ifaddr       *sc_ia6;        /* primary iface address v6 */
116         struct ip6_moptions      sc_im6o;
117 #endif /* INET6 */
118
119         enum { INIT = 0, BACKUP, MASTER }
120                                  sc_state;
121         int                      sc_dead;
122
123         int                      sc_suppress;
124
125         int                      sc_sendad_errors;
126 #define CARP_SENDAD_MAX_ERRORS  3
127         int                      sc_sendad_success;
128 #define CARP_SENDAD_MIN_SUCCESS 3
129
130         int                      sc_vhid;
131         int                      sc_advskew;
132         int                      sc_naddrs;     /* actually used IPv4 vha */
133         int                      sc_naddrs6;
134         int                      sc_advbase;    /* seconds */
135         int                      sc_init_counter;
136         uint64_t                 sc_counter;
137
138         /* authentication */
139 #define CARP_HMAC_PAD   64
140         unsigned char            sc_key[CARP_KEY_LEN];
141         unsigned char            sc_pad[CARP_HMAC_PAD];
142         SHA1_CTX                 sc_sha1;
143
144         struct callout           sc_ad_tmo;     /* advertisement timeout */
145         struct netmsg_carp       sc_ad_msg;     /* adv timeout netmsg */
146         struct callout           sc_md_tmo;     /* ip4 master down timeout */
147         struct callout           sc_md6_tmo;    /* ip6 master down timeout */
148         struct netmsg_carp       sc_md_msg;     /* master down timeout netmsg */
149
150         LIST_ENTRY(carp_softc)   sc_next;       /* Interface clue */
151 };
152
153 #define sc_if   arpcom.ac_if
154
155 struct carp_softc_container {
156         TAILQ_ENTRY(carp_softc_container) scc_link;
157         struct carp_softc       *scc_softc;
158 };
159 TAILQ_HEAD(carp_if, carp_softc_container);
160
161 SYSCTL_DECL(_net_inet_carp);
162
163 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */
164 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
165     &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
166 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
167     &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
168 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
169     &carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
170 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
171     &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
172
173 static int carp_suppress_preempt = 0;
174 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
175     &carp_suppress_preempt, 0, "Preemption is suppressed");
176
177 static struct carpstats carpstats;
178 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
179     &carpstats, carpstats,
180     "CARP statistics (struct carpstats, netinet/ip_carp.h)");
181
182 #define CARP_LOG(...)   do {                            \
183         if (carp_opts[CARPCTL_LOG] > 0)                 \
184                 log(LOG_INFO, __VA_ARGS__);             \
185 } while (0)
186
187 #define CARP_DEBUG(...) do {                            \
188         if (carp_opts[CARPCTL_LOG] > 1)                 \
189                 log(LOG_DEBUG, __VA_ARGS__);            \
190 } while (0)
191
192 static struct lwkt_token carp_tok = LWKT_TOKEN_INITIALIZER(carp_token);
193
194 static void     carp_hmac_prepare(struct carp_softc *);
195 static void     carp_hmac_generate(struct carp_softc *, uint32_t *,
196                     unsigned char *);
197 static int      carp_hmac_verify(struct carp_softc *, uint32_t *,
198                     unsigned char *);
199 static void     carp_setroute(struct carp_softc *, int);
200 static void     carp_proto_input_c(struct carp_softc *, struct mbuf *,
201                     struct carp_header *, sa_family_t);
202 static int      carp_clone_create(struct if_clone *, int, caddr_t);
203 static int      carp_clone_destroy(struct ifnet *);
204 static void     carp_detach(struct carp_softc *, int, boolean_t);
205 static void     carp_prepare_ad(struct carp_softc *, struct carp_header *);
206 static void     carp_send_ad_all(void);
207 static void     carp_send_ad_timeout(void *);
208 static void     carp_send_ad(struct carp_softc *);
209 static void     carp_send_arp(struct carp_softc *);
210 static void     carp_master_down_timeout(void *);
211 static void     carp_master_down(struct carp_softc *);
212 static void     carp_setrun(struct carp_softc *, sa_family_t);
213 static void     carp_set_state(struct carp_softc *, int);
214 static struct ifnet *carp_forus(struct carp_if *, const uint8_t *);
215
216 static void     carp_init(void *);
217 static int      carp_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
218 static int      carp_output(struct ifnet *, struct mbuf *, struct sockaddr *,
219                     struct rtentry *);
220 static void     carp_start(struct ifnet *);
221 static void     carp_serialize(struct ifnet *, enum ifnet_serialize);
222 static void     carp_deserialize(struct ifnet *, enum ifnet_serialize);
223 static int      carp_tryserialize(struct ifnet *, enum ifnet_serialize);
224 #ifdef INVARIANTS
225 static void     carp_serialize_assert(struct ifnet *, enum ifnet_serialize,
226                     boolean_t);
227 #endif
228
229 static void     carp_multicast_cleanup(struct carp_softc *);
230 static void     carp_add_addr(struct carp_softc *, struct ifaddr *);
231 static void     carp_del_addr(struct carp_softc *, struct ifaddr *);
232 static void     carp_config_addr(struct carp_softc *, struct ifaddr *);
233 static void     carp_link_addrs(struct carp_softc *, struct ifnet *,
234                     struct ifaddr *);
235 static void     carp_unlink_addrs(struct carp_softc *, struct ifnet *,
236                     struct ifaddr *);
237 static void     carp_update_addrs(struct carp_softc *, struct ifaddr *);
238
239 static int      carp_config_vhaddr(struct carp_softc *, struct carp_vhaddr *,
240                     struct in_ifaddr *);
241 static int      carp_activate_vhaddr(struct carp_softc *, struct carp_vhaddr *,
242                     struct ifnet *, struct in_ifaddr *, int);
243 static void     carp_deactivate_vhaddr(struct carp_softc *,
244                     struct carp_vhaddr *, boolean_t);
245 static int      carp_addroute_vhaddr(struct carp_softc *, struct carp_vhaddr *);
246 static void     carp_delroute_vhaddr(struct carp_softc *, struct carp_vhaddr *,
247                     boolean_t);
248
249 #ifdef foo
250 static void     carp_sc_state(struct carp_softc *);
251 #endif
252 #ifdef INET6
253 static void     carp_send_na(struct carp_softc *);
254 #ifdef notyet
255 static int      carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
256 static int      carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
257 #endif
258 static void     carp_multicast6_cleanup(struct carp_softc *);
259 #endif
260 static void     carp_stop(struct carp_softc *, int);
261 static void     carp_suspend(struct carp_softc *, int);
262 static void     carp_ioctl_stop(struct carp_softc *);
263 static int      carp_ioctl_setvh(struct carp_softc *, void *, struct ucred *);
264 static int      carp_ioctl_getvh(struct carp_softc *, void *, struct ucred *);
265 static int      carp_ioctl_getdevname(struct carp_softc *, struct ifdrv *);
266 static int      carp_ioctl_getvhaddr(struct carp_softc *, struct ifdrv *);
267
268 static struct carp_if *carp_if_remove(struct carp_if *, struct carp_softc *);
269 static struct carp_if *carp_if_insert(struct carp_if *, struct carp_softc *);
270 static void     carp_if_free(struct carp_if *);
271
272 static void     carp_ifaddr(void *, struct ifnet *, enum ifaddr_event,
273                             struct ifaddr *);
274 static void     carp_ifdetach(void *, struct ifnet *);
275
276 static void     carp_ifdetach_dispatch(netmsg_t);
277 static void     carp_clone_destroy_dispatch(netmsg_t);
278 static void     carp_init_dispatch(netmsg_t);
279 static void     carp_ioctl_stop_dispatch(netmsg_t);
280 static void     carp_ioctl_setvh_dispatch(netmsg_t);
281 static void     carp_ioctl_getvh_dispatch(netmsg_t);
282 static void     carp_ioctl_getdevname_dispatch(netmsg_t);
283 static void     carp_ioctl_getvhaddr_dispatch(netmsg_t);
284 static void     carp_send_ad_timeout_dispatch(netmsg_t);
285 static void     carp_master_down_timeout_dispatch(netmsg_t);
286
287 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
288
289 static LIST_HEAD(, carp_softc) carpif_list;
290
291 static struct if_clone carp_cloner =
292 IF_CLONE_INITIALIZER(CARP_IFNAME, carp_clone_create, carp_clone_destroy,
293                      0, IF_MAXUNIT);
294
295 static uint8_t  carp_etheraddr[ETHER_ADDR_LEN] = { 0, 0, 0x5e, 0, 1, 0 };
296
297 static eventhandler_tag carp_ifdetach_event;
298 static eventhandler_tag carp_ifaddr_event;
299
300 static __inline void
301 carp_insert_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha_new)
302 {
303         struct carp_vhaddr *vha;
304         u_long new_addr, addr;
305
306         KKASSERT((vha_new->vha_flags & CARP_VHAF_ONLIST) == 0);
307
308         /*
309          * Virtual address list is sorted; smaller one first
310          */
311         new_addr = ntohl(vha_new->vha_ia->ia_addr.sin_addr.s_addr);
312
313         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
314                 addr = ntohl(vha->vha_ia->ia_addr.sin_addr.s_addr);
315
316                 if (addr > new_addr)
317                         break;
318         }
319         if (vha == NULL)
320                 TAILQ_INSERT_TAIL(&sc->sc_vha_list, vha_new, vha_link);
321         else
322                 TAILQ_INSERT_BEFORE(vha, vha_new, vha_link);
323         vha_new->vha_flags |= CARP_VHAF_ONLIST;
324 }
325
326 static __inline void
327 carp_remove_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha)
328 {
329         KKASSERT(vha->vha_flags & CARP_VHAF_ONLIST);
330         vha->vha_flags &= ~CARP_VHAF_ONLIST;
331         TAILQ_REMOVE(&sc->sc_vha_list, vha, vha_link);
332 }
333
334 static void
335 carp_hmac_prepare(struct carp_softc *sc)
336 {
337         uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
338         uint8_t vhid = sc->sc_vhid & 0xff;
339         int i;
340 #ifdef INET6
341         struct ifaddr_container *ifac;
342         struct in6_addr in6;
343 #endif
344 #ifdef INET
345         struct carp_vhaddr *vha;
346 #endif
347
348         /* XXX: possible race here */
349
350         /* compute ipad from key */
351         bzero(sc->sc_pad, sizeof(sc->sc_pad));
352         bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
353         for (i = 0; i < sizeof(sc->sc_pad); i++)
354                 sc->sc_pad[i] ^= 0x36;
355
356         /* precompute first part of inner hash */
357         SHA1Init(&sc->sc_sha1);
358         SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
359         SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
360         SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
361         SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
362 #ifdef INET
363         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
364                 SHA1Update(&sc->sc_sha1,
365                     (const uint8_t *)&vha->vha_ia->ia_addr.sin_addr,
366                     sizeof(struct in_addr));
367         }
368 #endif /* INET */
369 #ifdef INET6
370         TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
371                 struct ifaddr *ifa = ifac->ifa;
372
373                 if (ifa->ifa_addr->sa_family == AF_INET6) {
374                         in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
375                         in6_clearscope(&in6);
376                         SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6));
377                 }
378         }
379 #endif /* INET6 */
380
381         /* convert ipad to opad */
382         for (i = 0; i < sizeof(sc->sc_pad); i++)
383                 sc->sc_pad[i] ^= 0x36 ^ 0x5c;
384 }
385
386 static void
387 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
388     unsigned char md[20])
389 {
390         SHA1_CTX sha1ctx;
391
392         /* fetch first half of inner hash */
393         bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
394
395         SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
396         SHA1Final(md, &sha1ctx);
397
398         /* outer hash */
399         SHA1Init(&sha1ctx);
400         SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
401         SHA1Update(&sha1ctx, md, 20);
402         SHA1Final(md, &sha1ctx);
403 }
404
405 static int
406 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
407     unsigned char md[20])
408 {
409         unsigned char md2[20];
410
411         carp_hmac_generate(sc, counter, md2);
412         return (bcmp(md, md2, sizeof(md2)));
413 }
414
415 static void
416 carp_setroute(struct carp_softc *sc, int cmd)
417 {
418 #ifdef INET6
419         struct ifaddr_container *ifac;
420 #endif
421         struct carp_vhaddr *vha;
422
423         KKASSERT(cmd == RTM_DELETE || cmd == RTM_ADD);
424
425         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
426                 if (vha->vha_iaback == NULL)
427                         continue;
428                 if (cmd == RTM_DELETE)
429                         carp_delroute_vhaddr(sc, vha, FALSE);
430                 else
431                         carp_addroute_vhaddr(sc, vha);
432         }
433
434 #ifdef INET6
435         TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
436                 struct ifaddr *ifa = ifac->ifa;
437
438                 if (ifa->ifa_addr->sa_family == AF_INET6) {
439                         if (cmd == RTM_ADD)
440                                 in6_ifaddloop(ifa);
441                         else
442                                 in6_ifremloop(ifa);
443                 }
444         }
445 #endif /* INET6 */
446 }
447
448 static int
449 carp_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused)
450 {
451         struct carp_softc *sc;
452         struct ifnet *ifp;
453
454         sc = kmalloc(sizeof(*sc), M_CARP, M_WAITOK | M_ZERO);
455         ifp = &sc->sc_if;
456
457         sc->sc_suppress = 0;
458         sc->sc_advbase = CARP_DFLTINTV;
459         sc->sc_vhid = -1;       /* required setting */
460         sc->sc_advskew = 0;
461         sc->sc_init_counter = 1;
462         sc->sc_naddrs = 0;
463         sc->sc_naddrs6 = 0;
464
465         TAILQ_INIT(&sc->sc_vha_list);
466
467 #ifdef INET6
468         sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
469 #endif
470
471         callout_init_mp(&sc->sc_ad_tmo);
472         netmsg_init(&sc->sc_ad_msg.base, NULL, &netisr_adone_rport,
473             MSGF_DROPABLE | MSGF_PRIORITY, carp_send_ad_timeout_dispatch);
474         sc->sc_ad_msg.nc_softc = sc;
475
476         callout_init_mp(&sc->sc_md_tmo);
477         callout_init_mp(&sc->sc_md6_tmo);
478         netmsg_init(&sc->sc_md_msg.base, NULL, &netisr_adone_rport,
479             MSGF_DROPABLE | MSGF_PRIORITY, carp_master_down_timeout_dispatch);
480         sc->sc_md_msg.nc_softc = sc;
481
482         if_initname(ifp, CARP_IFNAME, unit);
483         ifp->if_softc = sc;
484         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
485         ifp->if_init = carp_init;
486         ifp->if_ioctl = carp_ioctl;
487         ifp->if_start = carp_start;
488         ifp->if_serialize = carp_serialize;
489         ifp->if_deserialize = carp_deserialize;
490         ifp->if_tryserialize = carp_tryserialize;
491 #ifdef INVARIANTS
492         ifp->if_serialize_assert = carp_serialize_assert;
493 #endif
494         ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
495         ifq_set_ready(&ifp->if_snd);
496
497         ether_ifattach(ifp, carp_etheraddr, NULL);
498
499         ifp->if_type = IFT_CARP;
500         ifp->if_output = carp_output;
501
502         carp_gettok();
503         LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
504         carp_reltok();
505
506         return (0);
507 }
508
509 static void
510 carp_clone_destroy_dispatch(netmsg_t msg)
511 {
512         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
513         struct carp_softc *sc = cmsg->nc_softc;
514
515         carp_gettok();
516
517         sc->sc_dead = 1;
518         carp_detach(sc, 1, FALSE);
519
520         carp_reltok();
521
522         callout_stop_sync(&sc->sc_ad_tmo);
523         callout_stop_sync(&sc->sc_md_tmo);
524         callout_stop_sync(&sc->sc_md6_tmo);
525
526         crit_enter();
527         if ((sc->sc_ad_msg.base.lmsg.ms_flags & MSGF_DONE) == 0)
528                 lwkt_dropmsg(&sc->sc_ad_msg.base.lmsg);
529         if ((sc->sc_md_msg.base.lmsg.ms_flags & MSGF_DONE) == 0)
530                 lwkt_dropmsg(&sc->sc_md_msg.base.lmsg);
531         crit_exit();
532
533         lwkt_replymsg(&cmsg->base.lmsg, 0);
534 }
535
536 static int
537 carp_clone_destroy(struct ifnet *ifp)
538 {
539         struct carp_softc *sc = ifp->if_softc;
540         struct netmsg_carp cmsg;
541
542         bzero(&cmsg, sizeof(cmsg));
543         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
544             carp_clone_destroy_dispatch);
545         cmsg.nc_softc = sc;
546
547         lwkt_domsg(cpu_portfn(0), &cmsg.base.lmsg, 0);
548
549         carp_gettok();
550         LIST_REMOVE(sc, sc_next);
551         carp_reltok();
552
553         bpfdetach(ifp);
554         if_detach(ifp);
555
556         KASSERT(sc->sc_naddrs == 0, ("certain inet address is still active\n"));
557         kfree(sc, M_CARP);
558
559         return 0;
560 }
561
562 static struct carp_if *
563 carp_if_remove(struct carp_if *ocif, struct carp_softc *sc)
564 {
565         struct carp_softc_container *oscc, *scc;
566         struct carp_if *cif;
567         int count = 0;
568 #ifdef INVARIANTS
569         int found = 0;
570 #endif
571
572         TAILQ_FOREACH(oscc, ocif, scc_link) {
573                 ++count;
574 #ifdef INVARIANTS
575                 if (oscc->scc_softc == sc)
576                         found = 1;
577 #endif
578         }
579         KASSERT(found, ("%s carp_softc is not on carp_if\n", __func__));
580
581         if (count == 1) {
582                 /* Last one is going to be unlinked */
583                 return NULL;
584         }
585
586         cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
587         TAILQ_INIT(cif);
588
589         TAILQ_FOREACH(oscc, ocif, scc_link) {
590                 if (oscc->scc_softc == sc)
591                         continue;
592
593                 scc = kmalloc(sizeof(*scc), M_CARP, M_WAITOK | M_ZERO);
594                 scc->scc_softc = oscc->scc_softc;
595                 TAILQ_INSERT_TAIL(cif, scc, scc_link);
596         }
597
598         return cif;
599 }
600
601 static struct carp_if *
602 carp_if_insert(struct carp_if *ocif, struct carp_softc *sc)
603 {
604         struct carp_softc_container *oscc;
605         int onlist;
606
607         onlist = 0;
608         if (ocif != NULL) {
609                 TAILQ_FOREACH(oscc, ocif, scc_link) {
610                         if (oscc->scc_softc == sc)
611                                 onlist = 1;
612                 }
613         }
614
615 #ifdef INVARIANTS
616         if (sc->sc_carpdev != NULL) {
617                 KASSERT(onlist, ("%s is not on %s carp list\n",
618                     sc->sc_if.if_xname, sc->sc_carpdev->if_xname));
619         } else {
620                 KASSERT(!onlist, ("%s is already on carp list\n",
621                     sc->sc_if.if_xname));
622         }
623 #endif
624
625         if (!onlist) {
626                 struct carp_if *cif;
627                 struct carp_softc_container *new_scc, *scc;
628                 int inserted = 0;
629
630                 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
631                 TAILQ_INIT(cif);
632
633                 new_scc = kmalloc(sizeof(*new_scc), M_CARP, M_WAITOK | M_ZERO);
634                 new_scc->scc_softc = sc;
635
636                 if (ocif != NULL) {
637                         TAILQ_FOREACH(oscc, ocif, scc_link) {
638                                 if (!inserted &&
639                                     oscc->scc_softc->sc_vhid > sc->sc_vhid) {
640                                         TAILQ_INSERT_TAIL(cif, new_scc,
641                                             scc_link);
642                                         inserted = 1;
643                                 }
644
645                                 scc = kmalloc(sizeof(*scc), M_CARP,
646                                     M_WAITOK | M_ZERO);
647                                 scc->scc_softc = oscc->scc_softc;
648                                 TAILQ_INSERT_TAIL(cif, scc, scc_link);
649                         }
650                 }
651                 if (!inserted)
652                         TAILQ_INSERT_TAIL(cif, new_scc, scc_link);
653
654                 return cif;
655         } else {
656                 return ocif;
657         }
658 }
659
660 static void
661 carp_if_free(struct carp_if *cif)
662 {
663         struct carp_softc_container *scc;
664
665         while ((scc = TAILQ_FIRST(cif)) != NULL) {
666                 TAILQ_REMOVE(cif, scc, scc_link);
667                 kfree(scc, M_CARP);
668         }
669         kfree(cif, M_CARP);
670 }
671
672 static void
673 carp_detach(struct carp_softc *sc, int detach, boolean_t del_iaback)
674 {
675         carp_suspend(sc, detach);
676
677         carp_multicast_cleanup(sc);
678 #ifdef INET6
679         carp_multicast6_cleanup(sc);
680 #endif
681
682         if (!sc->sc_dead && detach) {
683                 struct carp_vhaddr *vha;
684
685                 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
686                         carp_deactivate_vhaddr(sc, vha, del_iaback);
687                 KKASSERT(sc->sc_naddrs == 0);
688         }
689
690         if (sc->sc_carpdev != NULL) {
691                 struct ifnet *ifp = sc->sc_carpdev;
692                 struct carp_if *ocif = ifp->if_carp;
693
694                 ifp->if_carp = carp_if_remove(ocif, sc);
695                 KASSERT(ifp->if_carp != ocif,
696                     ("%s carp_if_remove failed\n", __func__));
697
698                 sc->sc_carpdev = NULL;
699                 sc->sc_ia = NULL;
700
701                 /*
702                  * Make sure that all protocol threads see the
703                  * sc_carpdev and if_carp changes
704                  */
705                 netmsg_service_sync();
706
707                 if (ifp->if_carp == NULL) {
708                         /*
709                          * No more carp interfaces using
710                          * ifp as the backing interface,
711                          * move it out of promiscous mode.
712                          */
713                         ifpromisc(ifp, 0);
714                 }
715
716                 /*
717                  * The old carp list could be safely free now,
718                  * since no one can access it.
719                  */
720                 carp_if_free(ocif);
721         }
722 }
723
724 static void
725 carp_ifdetach_dispatch(netmsg_t msg)
726 {
727         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
728         struct ifnet *ifp = cmsg->nc_carpdev;
729
730         carp_gettok();
731
732         while (ifp->if_carp) {
733                 struct carp_softc_container *scc;
734
735                 scc = TAILQ_FIRST((struct carp_if *)(ifp->if_carp));
736                 carp_detach(scc->scc_softc, 1, TRUE);
737         }
738
739         carp_reltok();
740
741         lwkt_replymsg(&cmsg->base.lmsg, 0);
742 }
743
744 /* Detach an interface from the carp. */
745 static void
746 carp_ifdetach(void *arg __unused, struct ifnet *ifp)
747 {
748         struct netmsg_carp cmsg;
749
750         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
751
752         bzero(&cmsg, sizeof(cmsg));
753         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
754             carp_ifdetach_dispatch);
755         cmsg.nc_carpdev = ifp;
756
757         lwkt_domsg(cpu_portfn(0), &cmsg.base.lmsg, 0);
758 }
759
760 /*
761  * process input packet.
762  * we have rearranged checks order compared to the rfc,
763  * but it seems more efficient this way or not possible otherwise.
764  */
765 int
766 carp_proto_input(struct mbuf **mp, int *offp, int proto)
767 {
768         struct mbuf *m = *mp;
769         struct ip *ip = mtod(m, struct ip *);
770         struct ifnet *ifp = m->m_pkthdr.rcvif;
771         struct carp_header *ch;
772         struct carp_softc *sc;
773         int len, iphlen;
774
775         carp_gettok();
776
777         iphlen = *offp;
778         *mp = NULL;
779
780         carpstats.carps_ipackets++;
781
782         if (!carp_opts[CARPCTL_ALLOW]) {
783                 m_freem(m);
784                 goto back;
785         }
786
787         /* Check if received on a valid carp interface */
788         if (ifp->if_type != IFT_CARP) {
789                 carpstats.carps_badif++;
790                 CARP_LOG("carp_proto_input: packet received on non-carp "
791                     "interface: %s\n", ifp->if_xname);
792                 m_freem(m);
793                 goto back;
794         }
795
796         if (!CARP_IS_RUNNING(ifp)) {
797                 carpstats.carps_badif++;
798                 CARP_LOG("carp_proto_input: packet received on stopped carp "
799                     "interface: %s\n", ifp->if_xname);
800                 m_freem(m);
801                 goto back;
802         }
803
804         sc = ifp->if_softc;
805         if (sc->sc_carpdev == NULL) {
806                 carpstats.carps_badif++;
807                 CARP_LOG("carp_proto_input: packet received on defunc carp "
808                     "interface: %s\n", ifp->if_xname);
809                 m_freem(m);
810                 goto back;
811         }
812
813         if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
814                 carpstats.carps_badif++;
815                 CARP_LOG("carp_proto_input: non-mcast packet on "
816                     "interface: %s\n", ifp->if_xname);
817                 m_freem(m);
818                 goto back;
819         }
820
821         /* Verify that the IP TTL is CARP_DFLTTL. */
822         if (ip->ip_ttl != CARP_DFLTTL) {
823                 carpstats.carps_badttl++;
824                 CARP_LOG("carp_proto_input: received ttl %d != %d on %s\n",
825                     ip->ip_ttl, CARP_DFLTTL, ifp->if_xname);
826                 m_freem(m);
827                 goto back;
828         }
829
830         /* Minimal CARP packet size */
831         len = iphlen + sizeof(*ch);
832
833         /*
834          * Verify that the received packet length is
835          * not less than the CARP header
836          */
837         if (m->m_pkthdr.len < len) {
838                 carpstats.carps_badlen++;
839                 CARP_LOG("packet too short %d on %s\n", m->m_pkthdr.len,
840                     ifp->if_xname);
841                 m_freem(m);
842                 goto back;
843         }
844
845         /* Make sure that CARP header is contiguous */
846         if (len > m->m_len) {
847                 m = m_pullup(m, len);
848                 if (m == NULL) {
849                         carpstats.carps_hdrops++;
850                         CARP_LOG("carp_proto_input: m_pullup failed\n");
851                         goto back;
852                 }
853                 ip = mtod(m, struct ip *);
854         }
855         ch = (struct carp_header *)((uint8_t *)ip + iphlen);
856
857         /* Verify the CARP checksum */
858         if (in_cksum_skip(m, len, iphlen)) {
859                 carpstats.carps_badsum++;
860                 CARP_LOG("carp_proto_input: checksum failed on %s\n",
861                     ifp->if_xname);
862                 m_freem(m);
863                 goto back;
864         }
865         carp_proto_input_c(sc, m, ch, AF_INET);
866 back:
867         carp_reltok();
868         return(IPPROTO_DONE);
869 }
870
871 #ifdef INET6
872 int
873 carp6_proto_input(struct mbuf **mp, int *offp, int proto)
874 {
875         struct mbuf *m = *mp;
876         struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
877         struct ifnet *ifp = m->m_pkthdr.rcvif;
878         struct carp_header *ch;
879         struct carp_softc *sc;
880         u_int len;
881
882         carp_gettok();
883
884         carpstats.carps_ipackets6++;
885
886         if (!carp_opts[CARPCTL_ALLOW]) {
887                 m_freem(m);
888                 goto back;
889         }
890
891         /* check if received on a valid carp interface */
892         if (ifp->if_type != IFT_CARP) {
893                 carpstats.carps_badif++;
894                 CARP_LOG("carp6_proto_input: packet received on non-carp "
895                     "interface: %s\n", ifp->if_xname);
896                 m_freem(m);
897                 goto back;
898         }
899
900         if (!CARP_IS_RUNNING(ifp)) {
901                 carpstats.carps_badif++;
902                 CARP_LOG("carp_proto_input: packet received on stopped carp "
903                     "interface: %s\n", ifp->if_xname);
904                 m_freem(m);
905                 goto back;
906         }
907
908         sc = ifp->if_softc;
909         if (sc->sc_carpdev == NULL) {
910                 carpstats.carps_badif++;
911                 CARP_LOG("carp6_proto_input: packet received on defunc-carp "
912                     "interface: %s\n", ifp->if_xname);
913                 m_freem(m);
914                 goto back;
915         }
916
917         /* verify that the IP TTL is 255 */
918         if (ip6->ip6_hlim != CARP_DFLTTL) {
919                 carpstats.carps_badttl++;
920                 CARP_LOG("carp6_proto_input: received ttl %d != 255 on %s\n",
921                     ip6->ip6_hlim, ifp->if_xname);
922                 m_freem(m);
923                 goto back;
924         }
925
926         /* verify that we have a complete carp packet */
927         len = m->m_len;
928         IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
929         if (ch == NULL) {
930                 carpstats.carps_badlen++;
931                 CARP_LOG("carp6_proto_input: packet size %u too small\n", len);
932                 goto back;
933         }
934
935         /* verify the CARP checksum */
936         if (in_cksum_range(m, 0, *offp, sizeof(*ch))) {
937                 carpstats.carps_badsum++;
938                 CARP_LOG("carp6_proto_input: checksum failed, on %s\n",
939                     ifp->if_xname);
940                 m_freem(m);
941                 goto back;
942         }
943
944         carp_proto_input_c(sc, m, ch, AF_INET6);
945 back:
946         carp_reltok();
947         return (IPPROTO_DONE);
948 }
949 #endif /* INET6 */
950
951 static void
952 carp_proto_input_c(struct carp_softc *sc, struct mbuf *m,
953     struct carp_header *ch, sa_family_t af)
954 {
955         struct ifnet *cifp;
956         uint64_t tmp_counter;
957         struct timeval sc_tv, ch_tv;
958
959         if (sc->sc_vhid != ch->carp_vhid) {
960                 /*
961                  * CARP uses multicast, however, multicast packets
962                  * are tapped to all CARP interfaces on the physical
963                  * interface receiving the CARP packets, so we don't
964                  * update any stats here.
965                  */
966                 m_freem(m);
967                 return;
968         }
969         cifp = &sc->sc_if;
970
971         /* verify the CARP version. */
972         if (ch->carp_version != CARP_VERSION) {
973                 carpstats.carps_badver++;
974                 CARP_LOG("%s; invalid version %d\n", cifp->if_xname,
975                          ch->carp_version);
976                 m_freem(m);
977                 return;
978         }
979
980         /* verify the hash */
981         if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
982                 carpstats.carps_badauth++;
983                 CARP_LOG("%s: incorrect hash\n", cifp->if_xname);
984                 m_freem(m);
985                 return;
986         }
987
988         tmp_counter = ntohl(ch->carp_counter[0]);
989         tmp_counter = tmp_counter<<32;
990         tmp_counter += ntohl(ch->carp_counter[1]);
991
992         /* XXX Replay protection goes here */
993
994         sc->sc_init_counter = 0;
995         sc->sc_counter = tmp_counter;
996
997         sc_tv.tv_sec = sc->sc_advbase;
998         if (carp_suppress_preempt && sc->sc_advskew <  240)
999                 sc_tv.tv_usec = 240 * 1000000 / 256;
1000         else
1001                 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1002         ch_tv.tv_sec = ch->carp_advbase;
1003         ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
1004
1005         switch (sc->sc_state) {
1006         case INIT:
1007                 break;
1008
1009         case MASTER:
1010                 /*
1011                  * If we receive an advertisement from a master who's going to
1012                  * be more frequent than us, go into BACKUP state.
1013                  */
1014                 if (timevalcmp(&sc_tv, &ch_tv, >) ||
1015                     timevalcmp(&sc_tv, &ch_tv, ==)) {
1016                         callout_stop(&sc->sc_ad_tmo);
1017                         CARP_DEBUG("%s: MASTER -> BACKUP "
1018                            "(more frequent advertisement received)\n",
1019                            cifp->if_xname);
1020                         carp_set_state(sc, BACKUP);
1021                         carp_setrun(sc, 0);
1022                         carp_setroute(sc, RTM_DELETE);
1023                 }
1024                 break;
1025
1026         case BACKUP:
1027                 /*
1028                  * If we're pre-empting masters who advertise slower than us,
1029                  * and this one claims to be slower, treat him as down.
1030                  */
1031                 if (carp_opts[CARPCTL_PREEMPT] &&
1032                     timevalcmp(&sc_tv, &ch_tv, <)) {
1033                         CARP_DEBUG("%s: BACKUP -> MASTER "
1034                             "(preempting a slower master)\n", cifp->if_xname);
1035                         carp_master_down(sc);
1036                         break;
1037                 }
1038
1039                 /*
1040                  *  If the master is going to advertise at such a low frequency
1041                  *  that he's guaranteed to time out, we'd might as well just
1042                  *  treat him as timed out now.
1043                  */
1044                 sc_tv.tv_sec = sc->sc_advbase * 3;
1045                 if (timevalcmp(&sc_tv, &ch_tv, <)) {
1046                         CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n",
1047                                    cifp->if_xname);
1048                         carp_master_down(sc);
1049                         break;
1050                 }
1051
1052                 /*
1053                  * Otherwise, we reset the counter and wait for the next
1054                  * advertisement.
1055                  */
1056                 carp_setrun(sc, af);
1057                 break;
1058         }
1059         m_freem(m);
1060 }
1061
1062 struct mbuf *
1063 carp_input(void *v, struct mbuf *m)
1064 {
1065         struct carp_if *cif = v;
1066         struct ether_header *eh;
1067         struct carp_softc_container *scc;
1068         struct ifnet *ifp;
1069
1070         ASSERT_LWKT_TOKEN_HELD(&carp_tok);
1071
1072         eh = mtod(m, struct ether_header *);
1073
1074         ifp = carp_forus(cif, eh->ether_dhost);
1075         if (ifp != NULL) {
1076                 ether_reinput_oncpu(ifp, m, REINPUT_RUNBPF);
1077                 return NULL;
1078         }
1079
1080         if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
1081                 return m;
1082
1083         /*
1084          * XXX Should really check the list of multicast addresses
1085          * for each CARP interface _before_ copying.
1086          */
1087         TAILQ_FOREACH(scc, cif, scc_link) {
1088                 struct carp_softc *sc = scc->scc_softc;
1089                 struct mbuf *m0;
1090
1091                 if ((sc->sc_if.if_flags & IFF_UP) == 0)
1092                         continue;
1093
1094                 m0 = m_dup(m, MB_DONTWAIT);
1095                 if (m0 == NULL)
1096                         continue;
1097
1098                 ether_reinput_oncpu(&sc->sc_if, m0, REINPUT_RUNBPF);
1099         }
1100         return m;
1101 }
1102
1103 static void
1104 carp_prepare_ad(struct carp_softc *sc, struct carp_header *ch)
1105 {
1106         if (sc->sc_init_counter) {
1107                 /* this could also be seconds since unix epoch */
1108                 sc->sc_counter = karc4random();
1109                 sc->sc_counter = sc->sc_counter << 32;
1110                 sc->sc_counter += karc4random();
1111         } else {
1112                 sc->sc_counter++;
1113         }
1114
1115         ch->carp_counter[0] = htonl((sc->sc_counter >> 32) & 0xffffffff);
1116         ch->carp_counter[1] = htonl(sc->sc_counter & 0xffffffff);
1117
1118         carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
1119 }
1120
1121 static void
1122 carp_send_ad_all(void)
1123 {
1124         struct carp_softc *sc;
1125
1126         LIST_FOREACH(sc, &carpif_list, sc_next) {
1127                 if (sc->sc_carpdev == NULL)
1128                         continue;
1129
1130                 if (CARP_IS_RUNNING(&sc->sc_if) && sc->sc_state == MASTER)
1131                         carp_send_ad(sc);
1132         }
1133 }
1134
1135 static void
1136 carp_send_ad_timeout(void *xsc)
1137 {
1138         struct carp_softc *sc = xsc;
1139         struct netmsg_carp *cmsg = &sc->sc_ad_msg;
1140
1141         KASSERT(mycpuid == 0, ("%s not on cpu0 but on cpu%d\n",
1142             __func__, mycpuid));
1143
1144         crit_enter();
1145         if (cmsg->base.lmsg.ms_flags & MSGF_DONE)
1146                 lwkt_sendmsg(cpu_portfn(0), &cmsg->base.lmsg);
1147         crit_exit();
1148 }
1149
1150 static void
1151 carp_send_ad_timeout_dispatch(netmsg_t msg)
1152 {
1153         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1154         struct carp_softc *sc = cmsg->nc_softc;
1155
1156         /* Reply ASAP */
1157         crit_enter();
1158         lwkt_replymsg(&cmsg->base.lmsg, 0);
1159         crit_exit();
1160
1161         carp_gettok();
1162         carp_send_ad(sc);
1163         carp_reltok();
1164 }
1165
1166 static void
1167 carp_send_ad(struct carp_softc *sc)
1168 {
1169         struct ifnet *cifp = &sc->sc_if;
1170         struct carp_header ch;
1171         struct timeval tv;
1172         struct carp_header *ch_ptr;
1173         struct mbuf *m;
1174         int len, advbase, advskew;
1175
1176         if (!CARP_IS_RUNNING(cifp)) {
1177                 /* Bow out */
1178                 advbase = 255;
1179                 advskew = 255;
1180         } else {
1181                 advbase = sc->sc_advbase;
1182                 if (!carp_suppress_preempt || sc->sc_advskew > 240)
1183                         advskew = sc->sc_advskew;
1184                 else
1185                         advskew = 240;
1186                 tv.tv_sec = advbase;
1187                 tv.tv_usec = advskew * 1000000 / 256;
1188         }
1189
1190         ch.carp_version = CARP_VERSION;
1191         ch.carp_type = CARP_ADVERTISEMENT;
1192         ch.carp_vhid = sc->sc_vhid;
1193         ch.carp_advbase = advbase;
1194         ch.carp_advskew = advskew;
1195         ch.carp_authlen = 7;    /* XXX DEFINE */
1196         ch.carp_pad1 = 0;       /* must be zero */
1197         ch.carp_cksum = 0;
1198
1199 #ifdef INET
1200         if (sc->sc_ia != NULL) {
1201                 struct ip *ip;
1202
1203                 MGETHDR(m, MB_DONTWAIT, MT_HEADER);
1204                 if (m == NULL) {
1205                         cifp->if_oerrors++;
1206                         carpstats.carps_onomem++;
1207                         /* XXX maybe less ? */
1208                         if (advbase != 255 || advskew != 255)
1209                                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1210                                     carp_send_ad_timeout, sc);
1211                         return;
1212                 }
1213                 len = sizeof(*ip) + sizeof(ch);
1214                 m->m_pkthdr.len = len;
1215                 m->m_pkthdr.rcvif = NULL;
1216                 m->m_len = len;
1217                 MH_ALIGN(m, m->m_len);
1218                 m->m_flags |= M_MCAST;
1219                 ip = mtod(m, struct ip *);
1220                 ip->ip_v = IPVERSION;
1221                 ip->ip_hl = sizeof(*ip) >> 2;
1222                 ip->ip_tos = IPTOS_LOWDELAY;
1223                 ip->ip_len = len;
1224                 ip->ip_id = ip_newid();
1225                 ip->ip_off = IP_DF;
1226                 ip->ip_ttl = CARP_DFLTTL;
1227                 ip->ip_p = IPPROTO_CARP;
1228                 ip->ip_sum = 0;
1229                 ip->ip_src = sc->sc_ia->ia_addr.sin_addr;
1230                 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
1231
1232                 ch_ptr = (struct carp_header *)(&ip[1]);
1233                 bcopy(&ch, ch_ptr, sizeof(ch));
1234                 carp_prepare_ad(sc, ch_ptr);
1235                 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip));
1236
1237                 getmicrotime(&cifp->if_lastchange);
1238                 cifp->if_opackets++;
1239                 cifp->if_obytes += len;
1240                 carpstats.carps_opackets++;
1241
1242                 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) {
1243                         cifp->if_oerrors++;
1244                         if (sc->sc_sendad_errors < INT_MAX)
1245                                 sc->sc_sendad_errors++;
1246                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1247                                 carp_suppress_preempt++;
1248                                 if (carp_suppress_preempt == 1) {
1249                                         carp_send_ad_all();
1250                                 }
1251                         }
1252                         sc->sc_sendad_success = 0;
1253                 } else {
1254                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1255                                 if (++sc->sc_sendad_success >=
1256                                     CARP_SENDAD_MIN_SUCCESS) {
1257                                         carp_suppress_preempt--;
1258                                         sc->sc_sendad_errors = 0;
1259                                 }
1260                         } else {
1261                                 sc->sc_sendad_errors = 0;
1262                         }
1263                 }
1264         }
1265 #endif /* INET */
1266 #ifdef INET6
1267         if (sc->sc_ia6) {
1268                 struct ip6_hdr *ip6;
1269
1270                 MGETHDR(m, MB_DONTWAIT, MT_HEADER);
1271                 if (m == NULL) {
1272                         cifp->if_oerrors++;
1273                         carpstats.carps_onomem++;
1274                         /* XXX maybe less ? */
1275                         if (advbase != 255 || advskew != 255)
1276                                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1277                                     carp_send_ad_timeout, sc);
1278                         return;
1279                 }
1280                 len = sizeof(*ip6) + sizeof(ch);
1281                 m->m_pkthdr.len = len;
1282                 m->m_pkthdr.rcvif = NULL;
1283                 m->m_len = len;
1284                 MH_ALIGN(m, m->m_len);
1285                 m->m_flags |= M_MCAST;
1286                 ip6 = mtod(m, struct ip6_hdr *);
1287                 bzero(ip6, sizeof(*ip6));
1288                 ip6->ip6_vfc |= IPV6_VERSION;
1289                 ip6->ip6_hlim = CARP_DFLTTL;
1290                 ip6->ip6_nxt = IPPROTO_CARP;
1291                 bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src,
1292                     sizeof(struct in6_addr));
1293                 /* set the multicast destination */
1294
1295                 ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
1296                 ip6->ip6_dst.s6_addr8[15] = 0x12;
1297                 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
1298                         cifp->if_oerrors++;
1299                         m_freem(m);
1300                         CARP_LOG("%s: in6_setscope failed\n", __func__);
1301                         return;
1302                 }
1303
1304                 ch_ptr = (struct carp_header *)(&ip6[1]);
1305                 bcopy(&ch, ch_ptr, sizeof(ch));
1306                 carp_prepare_ad(sc, ch_ptr);
1307                 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip6));
1308
1309                 getmicrotime(&cifp->if_lastchange);
1310                 cifp->if_opackets++;
1311                 cifp->if_obytes += len;
1312                 carpstats.carps_opackets6++;
1313
1314                 if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) {
1315                         cifp->if_oerrors++;
1316                         if (sc->sc_sendad_errors < INT_MAX)
1317                                 sc->sc_sendad_errors++;
1318                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1319                                 carp_suppress_preempt++;
1320                                 if (carp_suppress_preempt == 1) {
1321                                         carp_send_ad_all();
1322                                 }
1323                         }
1324                         sc->sc_sendad_success = 0;
1325                 } else {
1326                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1327                                 if (++sc->sc_sendad_success >=
1328                                     CARP_SENDAD_MIN_SUCCESS) {
1329                                         carp_suppress_preempt--;
1330                                         sc->sc_sendad_errors = 0;
1331                                 }
1332                         } else {
1333                                 sc->sc_sendad_errors = 0;
1334                         }
1335                 }
1336         }
1337 #endif /* INET6 */
1338
1339         if (advbase != 255 || advskew != 255)
1340                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1341                     carp_send_ad_timeout, sc);
1342 }
1343
1344 /*
1345  * Broadcast a gratuitous ARP request containing
1346  * the virtual router MAC address for each IP address
1347  * associated with the virtual router.
1348  */
1349 static void
1350 carp_send_arp(struct carp_softc *sc)
1351 {
1352         const struct carp_vhaddr *vha;
1353
1354         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1355                 if (vha->vha_iaback == NULL)
1356                         continue;
1357                 arp_gratuitous(&sc->sc_if, &vha->vha_ia->ia_ifa);
1358         }
1359 }
1360
1361 #ifdef INET6
1362 static void
1363 carp_send_na(struct carp_softc *sc)
1364 {
1365         struct ifaddr_container *ifac;
1366         struct in6_addr *in6;
1367         static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1368
1369         TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
1370                 struct ifaddr *ifa = ifac->ifa;
1371
1372                 if (ifa->ifa_addr->sa_family != AF_INET6)
1373                         continue;
1374
1375                 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1376                 nd6_na_output(sc->sc_carpdev, &mcast, in6,
1377                     ND_NA_FLAG_OVERRIDE, 1, NULL);
1378                 DELAY(1000);    /* XXX */
1379         }
1380 }
1381 #endif /* INET6 */
1382
1383 static __inline const struct carp_vhaddr *
1384 carp_find_addr(const struct carp_softc *sc, const struct in_addr *addr)
1385 {
1386         struct carp_vhaddr *vha;
1387
1388         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1389                 if (vha->vha_iaback == NULL)
1390                         continue;
1391
1392                 if (vha->vha_ia->ia_addr.sin_addr.s_addr == addr->s_addr)
1393                         return vha;
1394         }
1395         return NULL;
1396 }
1397
1398 #ifdef notyet
1399 static int
1400 carp_iamatch_balance(const struct carp_if *cif, const struct in_addr *itaddr,
1401                      const struct in_addr *isaddr, uint8_t **enaddr)
1402 {
1403         const struct carp_softc *vh;
1404         int index, count = 0;
1405
1406         /*
1407          * XXX proof of concept implementation.
1408          * We use the source ip to decide which virtual host should
1409          * handle the request. If we're master of that virtual host,
1410          * then we respond, otherwise, just drop the arp packet on
1411          * the floor.
1412          */
1413
1414         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1415                 if (!CARP_IS_RUNNING(&vh->sc_if))
1416                         continue;
1417
1418                 if (carp_find_addr(vh, itaddr) != NULL)
1419                         count++;
1420         }
1421         if (count == 0)
1422                 return 0;
1423
1424         /* this should be a hash, like pf_hash() */
1425         index = ntohl(isaddr->s_addr) % count;
1426         count = 0;
1427
1428         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1429                 if (!CARP_IS_RUNNING(&vh->sc_if))
1430                         continue;
1431
1432                 if (carp_find_addr(vh, itaddr) == NULL)
1433                         continue;
1434
1435                 if (count == index) {
1436                         if (vh->sc_state == MASTER) {
1437                                 *enaddr = IF_LLADDR(&vh->sc_if);
1438                                 return 1;
1439                         } else {
1440                                 return 0;
1441                         }
1442                 }
1443                 count++;
1444         }
1445         return 0;
1446 }
1447 #endif
1448
1449 int
1450 carp_iamatch(const struct in_ifaddr *ia)
1451 {
1452         const struct carp_softc *sc = ia->ia_ifp->if_softc;
1453
1454         ASSERT_LWKT_TOKEN_HELD(&carp_tok);
1455
1456         KASSERT(&curthread->td_msgport == cpu_portfn(0),
1457             ("not in netisr0"));
1458
1459 #ifdef notyet
1460         if (carp_opts[CARPCTL_ARPBALANCE])
1461                 return carp_iamatch_balance(cif, itaddr, isaddr, enaddr);
1462 #endif
1463
1464         if (!CARP_IS_RUNNING(&sc->sc_if) || sc->sc_state != MASTER)
1465                 return 0;
1466
1467         return 1;
1468 }
1469
1470 #ifdef INET6
1471 struct ifaddr *
1472 carp_iamatch6(void *v, struct in6_addr *taddr)
1473 {
1474 #ifdef foo
1475         struct carp_if *cif = v;
1476         struct carp_softc *vh;
1477
1478         ASSERT_LWKT_TOKEN_HELD(&carp_tok);
1479
1480         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1481                 struct ifaddr_container *ifac;
1482
1483                 TAILQ_FOREACH(ifac, &vh->sc_if.if_addrheads[mycpuid],
1484                               ifa_link) {
1485                         struct ifaddr *ifa = ifac->ifa;
1486
1487                         if (IN6_ARE_ADDR_EQUAL(taddr,
1488                             &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1489                             CARP_IS_RUNNING(&vh->sc_if) &&
1490                             vh->sc_state == MASTER) {
1491                                 return (ifa);
1492                         }
1493                 }
1494         }
1495 #endif
1496         return (NULL);
1497 }
1498
1499 void *
1500 carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr)
1501 {
1502 #ifdef foo
1503         struct m_tag *mtag;
1504         struct carp_if *cif = v;
1505         struct carp_softc *sc;
1506
1507         ASSERT_LWKT_TOKEN_HELD(&carp_tok);
1508
1509         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
1510                 struct ifaddr_container *ifac;
1511
1512                 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid],
1513                               ifa_link) {
1514                         struct ifaddr *ifa = ifac->ifa;
1515
1516                         if (IN6_ARE_ADDR_EQUAL(taddr,
1517                             &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1518                             CARP_IS_RUNNING(&sc->sc_if)) {
1519                                 struct ifnet *ifp = &sc->sc_if;
1520
1521                                 mtag = m_tag_get(PACKET_TAG_CARP,
1522                                     sizeof(struct ifnet *), MB_DONTWAIT);
1523                                 if (mtag == NULL) {
1524                                         /* better a bit than nothing */
1525                                         return (IF_LLADDR(ifp));
1526                                 }
1527                                 bcopy(&ifp, (caddr_t)(mtag + 1),
1528                                     sizeof(struct ifnet *));
1529                                 m_tag_prepend(m, mtag);
1530
1531                                 return (IF_LLADDR(ifp));
1532                         }
1533                 }
1534         }
1535 #endif
1536         return (NULL);
1537 }
1538 #endif
1539
1540 static struct ifnet *
1541 carp_forus(struct carp_if *cif, const uint8_t *dhost)
1542 {
1543         struct carp_softc_container *scc;
1544
1545         ASSERT_LWKT_TOKEN_HELD(&carp_tok);
1546
1547         if (memcmp(dhost, carp_etheraddr, ETHER_ADDR_LEN - 1) != 0)
1548                 return NULL;
1549
1550         TAILQ_FOREACH(scc, cif, scc_link) {
1551                 struct carp_softc *sc = scc->scc_softc;
1552                 struct ifnet *ifp = &sc->sc_if;
1553
1554                 if (CARP_IS_RUNNING(ifp) && sc->sc_state == MASTER &&
1555                     !bcmp(dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN))
1556                         return ifp;
1557         }
1558         return NULL;
1559 }
1560
1561 static void
1562 carp_master_down_timeout(void *xsc)
1563 {
1564         struct carp_softc *sc = xsc;
1565         struct netmsg_carp *cmsg = &sc->sc_md_msg;
1566
1567         KASSERT(mycpuid == 0, ("%s not on cpu0 but on cpu%d\n",
1568             __func__, mycpuid));
1569
1570         crit_enter();
1571         if (cmsg->base.lmsg.ms_flags & MSGF_DONE)
1572                 lwkt_sendmsg(cpu_portfn(0), &cmsg->base.lmsg);
1573         crit_exit();
1574 }
1575
1576 static void
1577 carp_master_down_timeout_dispatch(netmsg_t msg)
1578 {
1579         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1580         struct carp_softc *sc = cmsg->nc_softc;
1581
1582         /* Reply ASAP */
1583         crit_enter();
1584         lwkt_replymsg(&cmsg->base.lmsg, 0);
1585         crit_exit();
1586
1587         CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n",
1588                    sc->sc_if.if_xname);
1589         carp_gettok();
1590         carp_master_down(sc);
1591         carp_reltok();
1592 }
1593
1594 static void
1595 carp_master_down(struct carp_softc *sc)
1596 {
1597         switch (sc->sc_state) {
1598         case INIT:
1599                 kprintf("%s: master_down event in INIT state\n",
1600                         sc->sc_if.if_xname);
1601                 break;
1602
1603         case MASTER:
1604                 break;
1605
1606         case BACKUP:
1607                 carp_set_state(sc, MASTER);
1608                 carp_send_ad(sc);
1609                 carp_send_arp(sc);
1610 #ifdef INET6
1611                 carp_send_na(sc);
1612 #endif /* INET6 */
1613                 carp_setrun(sc, 0);
1614                 carp_setroute(sc, RTM_ADD);
1615                 break;
1616         }
1617 }
1618
1619 /*
1620  * When in backup state, af indicates whether to reset the master down timer
1621  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1622  */
1623 static void
1624 carp_setrun(struct carp_softc *sc, sa_family_t af)
1625 {
1626         struct ifnet *cifp = &sc->sc_if;
1627         struct timeval tv;
1628
1629         if (sc->sc_carpdev == NULL) {
1630                 carp_set_state(sc, INIT);
1631                 return;
1632         }
1633
1634         if ((cifp->if_flags & IFF_RUNNING) && sc->sc_vhid > 0 &&
1635             (sc->sc_naddrs || sc->sc_naddrs6)) {
1636                 /* Nothing */
1637         } else {
1638                 carp_setroute(sc, RTM_DELETE);
1639                 return;
1640         }
1641
1642         switch (sc->sc_state) {
1643         case INIT:
1644                 if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) {
1645                         carp_send_ad(sc);
1646                         carp_send_arp(sc);
1647 #ifdef INET6
1648                         carp_send_na(sc);
1649 #endif /* INET6 */
1650                         CARP_DEBUG("%s: INIT -> MASTER (preempting)\n",
1651                                    cifp->if_xname);
1652                         carp_set_state(sc, MASTER);
1653                         carp_setroute(sc, RTM_ADD);
1654                 } else {
1655                         CARP_DEBUG("%s: INIT -> BACKUP\n", cifp->if_xname);
1656                         carp_set_state(sc, BACKUP);
1657                         carp_setroute(sc, RTM_DELETE);
1658                         carp_setrun(sc, 0);
1659                 }
1660                 break;
1661
1662         case BACKUP:
1663                 callout_stop(&sc->sc_ad_tmo);
1664                 tv.tv_sec = 3 * sc->sc_advbase;
1665                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1666                 switch (af) {
1667 #ifdef INET
1668                 case AF_INET:
1669                         callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1670                             carp_master_down_timeout, sc);
1671                         break;
1672 #endif /* INET */
1673 #ifdef INET6
1674                 case AF_INET6:
1675                         callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1676                             carp_master_down_timeout, sc);
1677                         break;
1678 #endif /* INET6 */
1679                 default:
1680                         if (sc->sc_naddrs)
1681                                 callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1682                                     carp_master_down_timeout, sc);
1683                         if (sc->sc_naddrs6)
1684                                 callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1685                                     carp_master_down_timeout, sc);
1686                         break;
1687                 }
1688                 break;
1689
1690         case MASTER:
1691                 tv.tv_sec = sc->sc_advbase;
1692                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1693                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1694                     carp_send_ad_timeout, sc);
1695                 break;
1696         }
1697 }
1698
1699 static void
1700 carp_multicast_cleanup(struct carp_softc *sc)
1701 {
1702         struct ip_moptions *imo = &sc->sc_imo;
1703
1704         if (imo->imo_num_memberships == 0)
1705                 return;
1706         KKASSERT(imo->imo_num_memberships == 1);
1707
1708         in_delmulti(imo->imo_membership[0]);
1709         imo->imo_membership[0] = NULL;
1710         imo->imo_num_memberships = 0;
1711         imo->imo_multicast_ifp = NULL;
1712 }
1713
1714 #ifdef INET6
1715 static void
1716 carp_multicast6_cleanup(struct carp_softc *sc)
1717 {
1718         struct ip6_moptions *im6o = &sc->sc_im6o;
1719
1720         while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1721                 struct in6_multi_mship *imm =
1722                     LIST_FIRST(&im6o->im6o_memberships);
1723
1724                 LIST_REMOVE(imm, i6mm_chain);
1725                 in6_leavegroup(imm);
1726         }
1727         im6o->im6o_multicast_ifp = NULL;
1728 }
1729 #endif
1730
1731 static void
1732 carp_ioctl_getvhaddr_dispatch(netmsg_t msg)
1733 {
1734         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1735         struct carp_softc *sc = cmsg->nc_softc;
1736         const struct carp_vhaddr *vha;
1737         struct ifcarpvhaddr *carpa, *carpa0;
1738         int count, len, error = 0;
1739
1740         carp_gettok();
1741
1742         count = 0;
1743         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
1744                 ++count;
1745
1746         if (cmsg->nc_datalen == 0) {
1747                 cmsg->nc_datalen = count * sizeof(*carpa);
1748                 goto back;
1749         } else if (count == 0 || cmsg->nc_datalen < sizeof(*carpa)) {
1750                 cmsg->nc_datalen = 0;
1751                 goto back;
1752         }
1753         len = min(cmsg->nc_datalen, sizeof(*carpa) * count);
1754         KKASSERT(len >= sizeof(*carpa));
1755
1756         carpa0 = carpa = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO);
1757         if (carpa == NULL) {
1758                 error = ENOMEM; 
1759                 goto back;
1760         }
1761
1762         count = 0;
1763         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1764                 if (len < sizeof(*carpa))
1765                         break;
1766
1767                 carpa->carpa_flags = vha->vha_flags;
1768                 carpa->carpa_addr.sin_family = AF_INET;
1769                 carpa->carpa_addr.sin_addr = vha->vha_ia->ia_addr.sin_addr;
1770
1771                 carpa->carpa_baddr.sin_family = AF_INET;
1772                 if (vha->vha_iaback == NULL) {
1773                         carpa->carpa_baddr.sin_addr.s_addr = INADDR_ANY;
1774                 } else {
1775                         carpa->carpa_baddr.sin_addr =
1776                         vha->vha_iaback->ia_addr.sin_addr;
1777                 }
1778
1779                 ++carpa;
1780                 ++count;
1781                 len -= sizeof(*carpa);
1782         }
1783         cmsg->nc_datalen = sizeof(*carpa) * count;
1784         KKASSERT(cmsg->nc_datalen > 0);
1785
1786         cmsg->nc_data = carpa0;
1787
1788 back:
1789         carp_reltok();
1790         lwkt_replymsg(&cmsg->base.lmsg, error);
1791 }
1792
1793 static int
1794 carp_ioctl_getvhaddr(struct carp_softc *sc, struct ifdrv *ifd)
1795 {
1796         struct ifnet *ifp = &sc->arpcom.ac_if;
1797         struct netmsg_carp cmsg;
1798         int error;
1799
1800         ASSERT_IFNET_SERIALIZED_ALL(ifp);
1801         ifnet_deserialize_all(ifp);
1802
1803         bzero(&cmsg, sizeof(cmsg));
1804         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
1805             carp_ioctl_getvhaddr_dispatch);
1806         cmsg.nc_softc = sc;
1807         cmsg.nc_datalen = ifd->ifd_len;
1808
1809         error = lwkt_domsg(cpu_portfn(0), &cmsg.base.lmsg, 0);
1810
1811         if (!error) {
1812                 if (cmsg.nc_data != NULL) {
1813                         error = copyout(cmsg.nc_data, ifd->ifd_data,
1814                             cmsg.nc_datalen);
1815                         kfree(cmsg.nc_data, M_TEMP);
1816                 }
1817                 ifd->ifd_len = cmsg.nc_datalen;
1818         } else {
1819                 KASSERT(cmsg.nc_data == NULL,
1820                     ("%s temp vhaddr is alloc upon error\n", __func__));
1821         }
1822
1823         ifnet_serialize_all(ifp);
1824         return error;
1825 }
1826
1827 static int
1828 carp_config_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
1829     struct in_ifaddr *ia_del)
1830 {
1831         struct ifnet *ifp;
1832         struct in_ifaddr *ia_if;
1833         struct in_ifaddr_container *iac;
1834         const struct sockaddr_in *sin;
1835         u_long iaddr;
1836         int own;
1837
1838         KKASSERT(vha->vha_ia != NULL);
1839
1840         sin = &vha->vha_ia->ia_addr;
1841         iaddr = ntohl(sin->sin_addr.s_addr);
1842
1843         ia_if = NULL;
1844         own = 0;
1845         TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
1846                 struct in_ifaddr *ia = iac->ia;
1847
1848                 if (ia == ia_del)
1849                         continue;
1850
1851                 if (ia->ia_ifp->if_type == IFT_CARP)
1852                         continue;
1853
1854                 if ((ia->ia_ifp->if_flags & IFF_UP) == 0)
1855                         continue;
1856
1857                 /* and, yeah, we need a multicast-capable iface too */
1858                 if ((ia->ia_ifp->if_flags & IFF_MULTICAST) == 0)
1859                         continue;
1860
1861                 if ((iaddr & ia->ia_subnetmask) == ia->ia_subnet) {
1862                         if (sin->sin_addr.s_addr ==
1863                             ia->ia_addr.sin_addr.s_addr)
1864                                 own = 1;
1865                         if (ia_if == NULL)
1866                                 ia_if = ia;
1867                         else if (sc->sc_carpdev != NULL &&
1868                                  sc->sc_carpdev == ia->ia_ifp)
1869                                 ia_if = ia;
1870                 }
1871         }
1872
1873         carp_deactivate_vhaddr(sc, vha, FALSE);
1874         if (!ia_if)
1875                 return ENOENT;
1876
1877         ifp = ia_if->ia_ifp;
1878
1879         /* XXX Don't allow parent iface to be changed */
1880         if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp)
1881                 return EEXIST;
1882
1883         return carp_activate_vhaddr(sc, vha, ifp, ia_if, own);
1884 }
1885
1886 static void
1887 carp_add_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
1888 {
1889         struct carp_vhaddr *vha_new;
1890         struct in_ifaddr *carp_ia;
1891 #ifdef INVARIANTS
1892         struct carp_vhaddr *vha;
1893 #endif
1894
1895         KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
1896         carp_ia = ifatoia(carp_ifa);
1897
1898 #ifdef INVARIANTS
1899         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
1900                 KKASSERT(vha->vha_ia != NULL && vha->vha_ia != carp_ia);
1901 #endif
1902
1903         vha_new = kmalloc(sizeof(*vha_new), M_CARP, M_WAITOK | M_ZERO);
1904         vha_new->vha_ia = carp_ia;
1905         carp_insert_vhaddr(sc, vha_new);
1906
1907         if (carp_config_vhaddr(sc, vha_new, NULL) != 0) {
1908                 /*
1909                  * If the above configuration fails, it may only mean
1910                  * that the new address is problematic.  However, the
1911                  * carp(4) interface may already have several working
1912                  * addresses.  Since the expected behaviour of
1913                  * SIOC[AS]IFADDR is to put the NIC into working state,
1914                  * we try starting the state machine manually here with
1915                  * the hope that the carp(4)'s previously working
1916                  * addresses still could be brought up.
1917                  */
1918                 carp_hmac_prepare(sc);
1919                 carp_set_state(sc, INIT);
1920                 carp_setrun(sc, 0);
1921         }
1922 }
1923
1924 static void
1925 carp_del_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
1926 {
1927         struct carp_vhaddr *vha;
1928         struct in_ifaddr *carp_ia;
1929
1930         KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
1931         carp_ia = ifatoia(carp_ifa);
1932
1933         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1934                 KKASSERT(vha->vha_ia != NULL);
1935                 if (vha->vha_ia == carp_ia)
1936                         break;
1937         }
1938         KASSERT(vha != NULL, ("no corresponding vhaddr %p\n", carp_ifa));
1939
1940         /*
1941          * Remove the vhaddr from the list before deactivating
1942          * the vhaddr, so that the HMAC could be correctly
1943          * updated in carp_deactivate_vhaddr()
1944          */
1945         carp_remove_vhaddr(sc, vha);
1946
1947         carp_deactivate_vhaddr(sc, vha, FALSE);
1948         kfree(vha, M_CARP);
1949 }
1950
1951 static void
1952 carp_config_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
1953 {
1954         struct carp_vhaddr *vha;
1955         struct in_ifaddr *carp_ia;
1956
1957         KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
1958         carp_ia = ifatoia(carp_ifa);
1959
1960         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1961                 KKASSERT(vha->vha_ia != NULL);
1962                 if (vha->vha_ia == carp_ia)
1963                         break;
1964         }
1965         KASSERT(vha != NULL, ("no corresponding vhaddr %p\n", carp_ifa));
1966
1967         /* Remove then reinsert, to keep the vhaddr list sorted */
1968         carp_remove_vhaddr(sc, vha);
1969         carp_insert_vhaddr(sc, vha);
1970
1971         if (carp_config_vhaddr(sc, vha, NULL) != 0) {
1972                 /* See the comment in carp_add_addr() */
1973                 carp_hmac_prepare(sc);
1974                 carp_set_state(sc, INIT);
1975                 carp_setrun(sc, 0);
1976         }
1977 }
1978
1979 #ifdef notyet
1980
1981 #ifdef INET6
1982 static int
1983 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
1984 {
1985         struct ifnet *ifp;
1986         struct carp_if *cif;
1987         struct in6_ifaddr *ia, *ia_if;
1988         struct ip6_moptions *im6o = &sc->sc_im6o;
1989         struct in6_multi_mship *imm;
1990         struct in6_addr in6;
1991         int own, error;
1992
1993         if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
1994                 carp_setrun(sc, 0);
1995                 return (0);
1996         }
1997
1998         /* we have to do it by hands to check we won't match on us */
1999         ia_if = NULL; own = 0;
2000         for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
2001                 int i;
2002
2003                 for (i = 0; i < 4; i++) {
2004                         if ((sin6->sin6_addr.s6_addr32[i] &
2005                             ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
2006                             (ia->ia_addr.sin6_addr.s6_addr32[i] &
2007                             ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
2008                                 break;
2009                 }
2010                 /* and, yeah, we need a multicast-capable iface too */
2011                 if (ia->ia_ifp != &sc->sc_if &&
2012                     (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
2013                     (i == 4)) {
2014                         if (!ia_if)
2015                                 ia_if = ia;
2016                         if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
2017                             &ia->ia_addr.sin6_addr))
2018                                 own++;
2019                 }
2020         }
2021
2022         if (!ia_if)
2023                 return (EADDRNOTAVAIL);
2024         ia = ia_if;
2025         ifp = ia->ia_ifp;
2026
2027         if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
2028             (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp))
2029                 return (EADDRNOTAVAIL);
2030
2031         if (!sc->sc_naddrs6) {
2032                 im6o->im6o_multicast_ifp = ifp;
2033
2034                 /* join CARP multicast address */
2035                 bzero(&in6, sizeof(in6));
2036                 in6.s6_addr16[0] = htons(0xff02);
2037                 in6.s6_addr8[15] = 0x12;
2038                 if (in6_setscope(&in6, ifp, NULL) != 0)
2039                         goto cleanup;
2040                 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
2041                         goto cleanup;
2042                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2043
2044                 /* join solicited multicast address */
2045                 bzero(&in6, sizeof(in6));
2046                 in6.s6_addr16[0] = htons(0xff02);
2047                 in6.s6_addr32[1] = 0;
2048                 in6.s6_addr32[2] = htonl(1);
2049                 in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3];
2050                 in6.s6_addr8[12] = 0xff;
2051                 if (in6_setscope(&in6, ifp, NULL) != 0)
2052                         goto cleanup;
2053                 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
2054                         goto cleanup;
2055                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2056         }
2057
2058 #ifdef foo
2059         if (!ifp->if_carp) {
2060                 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
2061
2062                 if ((error = ifpromisc(ifp, 1))) {
2063                         kfree(cif, M_CARP);
2064                         goto cleanup;
2065                 }
2066
2067                 TAILQ_INIT(&cif->vhif_vrs);
2068                 ifp->if_carp = cif;
2069         } else {
2070                 struct carp_softc *vr;
2071
2072                 cif = ifp->if_carp;
2073                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
2074                         if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
2075                                 error = EINVAL;
2076                                 goto cleanup;
2077                         }
2078                 }
2079         }
2080 #endif
2081         sc->sc_ia6 = ia;
2082         sc->sc_carpdev = ifp;
2083
2084 #ifdef foo
2085         { /* XXX prevent endless loop if already in queue */
2086         struct carp_softc *vr, *after = NULL;
2087         int myself = 0;
2088         cif = ifp->if_carp;
2089
2090         TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
2091                 if (vr == sc)
2092                         myself = 1;
2093                 if (vr->sc_vhid < sc->sc_vhid)
2094                         after = vr;
2095         }
2096
2097         if (!myself) {
2098                 /* We're trying to keep things in order */
2099                 if (after == NULL)
2100                         TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
2101                 else
2102                         TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
2103         }
2104         }
2105 #endif
2106
2107         sc->sc_naddrs6++;
2108         if (own)
2109                 sc->sc_advskew = 0;
2110         carp_sc_state(sc);
2111         carp_setrun(sc, 0);
2112
2113         return (0);
2114
2115 cleanup:
2116         /* clean up multicast memberships */
2117         if (!sc->sc_naddrs6) {
2118                 while (!LIST_EMPTY(&im6o->im6o_memberships)) {
2119                         imm = LIST_FIRST(&im6o->im6o_memberships);
2120                         LIST_REMOVE(imm, i6mm_chain);
2121                         in6_leavegroup(imm);
2122                 }
2123         }
2124         return (error);
2125 }
2126
2127 static int
2128 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
2129 {
2130         int error = 0;
2131
2132         if (!--sc->sc_naddrs6) {
2133                 struct carp_if *cif = sc->sc_carpdev->if_carp;
2134                 struct ip6_moptions *im6o = &sc->sc_im6o;
2135
2136                 callout_stop(&sc->sc_ad_tmo);
2137                 sc->sc_vhid = -1;
2138                 while (!LIST_EMPTY(&im6o->im6o_memberships)) {
2139                         struct in6_multi_mship *imm =
2140                             LIST_FIRST(&im6o->im6o_memberships);
2141
2142                         LIST_REMOVE(imm, i6mm_chain);
2143                         in6_leavegroup(imm);
2144                 }
2145                 im6o->im6o_multicast_ifp = NULL;
2146 #ifdef foo
2147                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
2148                 if (TAILQ_EMPTY(&cif->vhif_vrs)) {
2149                         sc->sc_carpdev->if_carp = NULL;
2150                         kfree(cif, M_IFADDR);
2151                 }
2152 #endif
2153         }
2154         return (error);
2155 }
2156 #endif /* INET6 */
2157
2158 #endif
2159
2160 static int
2161 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr)
2162 {
2163         struct carp_softc *sc = ifp->if_softc;
2164         struct ifreq *ifr = (struct ifreq *)addr;
2165         struct ifdrv *ifd = (struct ifdrv *)addr;
2166         int error = 0;
2167
2168         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2169
2170         carp_gettok();
2171
2172         switch (cmd) {
2173         case SIOCSIFFLAGS:
2174                 if (ifp->if_flags & IFF_UP) {
2175                         if ((ifp->if_flags & IFF_RUNNING) == 0)
2176                                 carp_init(sc);
2177                 } else if (ifp->if_flags & IFF_RUNNING) {
2178                         carp_ioctl_stop(sc);
2179                 }
2180                 break;
2181
2182         case SIOCSVH:
2183                 error = carp_ioctl_setvh(sc, ifr->ifr_data, cr);
2184                 break;
2185
2186         case SIOCGVH:
2187                 error = carp_ioctl_getvh(sc, ifr->ifr_data, cr);
2188                 break;
2189
2190         case SIOCGDRVSPEC:
2191                 switch (ifd->ifd_cmd) {
2192                 case CARPGDEVNAME:
2193                         error = carp_ioctl_getdevname(sc, ifd);
2194                         break;
2195
2196                 case CARPGVHADDR:
2197                         error = carp_ioctl_getvhaddr(sc, ifd);
2198                         break;
2199
2200                 default:
2201                         error = EINVAL;
2202                         break;
2203                 }
2204                 break;
2205
2206         default:
2207                 error = ether_ioctl(ifp, cmd, addr);
2208                 break;
2209         }
2210
2211         carp_reltok();
2212         return error;
2213 }
2214
2215 static void
2216 carp_ioctl_stop_dispatch(netmsg_t msg)
2217 {
2218         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2219         struct carp_softc *sc = cmsg->nc_softc;
2220
2221         carp_gettok();
2222         carp_stop(sc, 0);
2223         carp_reltok();
2224
2225         lwkt_replymsg(&cmsg->base.lmsg, 0);
2226 }
2227
2228 static void
2229 carp_ioctl_stop(struct carp_softc *sc)
2230 {
2231         struct ifnet *ifp = &sc->arpcom.ac_if;
2232         struct netmsg_carp cmsg;
2233
2234         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2235
2236         ifnet_deserialize_all(ifp);
2237
2238         bzero(&cmsg, sizeof(cmsg));
2239         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2240             carp_ioctl_stop_dispatch);
2241         cmsg.nc_softc = sc;
2242
2243         lwkt_domsg(cpu_portfn(0), &cmsg.base.lmsg, 0);
2244
2245         ifnet_serialize_all(ifp);
2246 }
2247
2248 static void
2249 carp_ioctl_setvh_dispatch(netmsg_t msg)
2250 {
2251         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2252         struct carp_softc *sc = cmsg->nc_softc;
2253         struct ifnet *ifp = &sc->arpcom.ac_if;
2254         const struct carpreq *carpr = cmsg->nc_data;
2255         int error;
2256
2257         carp_gettok();
2258
2259         error = 1;
2260         if ((ifp->if_flags & IFF_RUNNING) &&
2261             sc->sc_state != INIT && carpr->carpr_state != sc->sc_state) {
2262                 switch (carpr->carpr_state) {
2263                 case BACKUP:
2264                         callout_stop(&sc->sc_ad_tmo);
2265                         carp_set_state(sc, BACKUP);
2266                         carp_setrun(sc, 0);
2267                         carp_setroute(sc, RTM_DELETE);
2268                         break;
2269
2270                 case MASTER:
2271                         carp_master_down(sc);
2272                         break;
2273
2274                 default:
2275                         break;
2276                 }
2277         }
2278         if (carpr->carpr_vhid > 0) {
2279                 if (carpr->carpr_vhid > 255) {
2280                         error = EINVAL;
2281                         goto back;
2282                 }
2283                 if (sc->sc_carpdev) {
2284                         struct carp_if *cif = sc->sc_carpdev->if_carp;
2285                         struct carp_softc_container *scc;
2286
2287                         TAILQ_FOREACH(scc, cif, scc_link) {
2288                                 struct carp_softc *vr = scc->scc_softc;
2289
2290                                 if (vr != sc &&
2291                                     vr->sc_vhid == carpr->carpr_vhid) {
2292                                         error = EEXIST;
2293                                         goto back;
2294                                 }
2295                         }
2296                 }
2297                 sc->sc_vhid = carpr->carpr_vhid;
2298
2299                 IF_LLADDR(ifp)[5] = sc->sc_vhid;
2300                 bcopy(IF_LLADDR(ifp), sc->arpcom.ac_enaddr,
2301                     ETHER_ADDR_LEN);
2302
2303                 error--;
2304         }
2305         if (carpr->carpr_advbase > 0 || carpr->carpr_advskew > 0) {
2306                 if (carpr->carpr_advskew >= 255) {
2307                         error = EINVAL;
2308                         goto back;
2309                 }
2310                 if (carpr->carpr_advbase > 255) {
2311                         error = EINVAL;
2312                         goto back;
2313                 }
2314                 sc->sc_advbase = carpr->carpr_advbase;
2315                 sc->sc_advskew = carpr->carpr_advskew;
2316                 error--;
2317         }
2318         bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key));
2319         if (error > 0) {
2320                 error = EINVAL;
2321         } else {
2322                 error = 0;
2323                 carp_setrun(sc, 0);
2324         }
2325 back:
2326         carp_hmac_prepare(sc);
2327         carp_gettok();
2328
2329         lwkt_replymsg(&cmsg->base.lmsg, error);
2330 }
2331
2332 static int
2333 carp_ioctl_setvh(struct carp_softc *sc, void *udata, struct ucred *cr)
2334 {
2335         struct ifnet *ifp = &sc->arpcom.ac_if;
2336         struct netmsg_carp cmsg;
2337         struct carpreq carpr;
2338         int error;
2339
2340         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2341         ifnet_deserialize_all(ifp);
2342
2343         error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY);
2344         if (error)
2345                 goto back;
2346
2347         error = copyin(udata, &carpr, sizeof(carpr));
2348         if (error)
2349                 goto back;
2350
2351         bzero(&cmsg, sizeof(cmsg));
2352         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2353             carp_ioctl_setvh_dispatch);
2354         cmsg.nc_softc = sc;
2355         cmsg.nc_data = &carpr;
2356
2357         error = lwkt_domsg(cpu_portfn(0), &cmsg.base.lmsg, 0);
2358
2359 back:
2360         ifnet_serialize_all(ifp);
2361         return error;
2362 }
2363
2364 static void
2365 carp_ioctl_getvh_dispatch(netmsg_t msg)
2366 {
2367         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2368         struct carp_softc *sc = cmsg->nc_softc;
2369         struct carpreq *carpr = cmsg->nc_data;
2370
2371         carp_gettok();
2372
2373         carpr->carpr_state = sc->sc_state;
2374         carpr->carpr_vhid = sc->sc_vhid;
2375         carpr->carpr_advbase = sc->sc_advbase;
2376         carpr->carpr_advskew = sc->sc_advskew;
2377         bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key));
2378
2379         carp_reltok();
2380
2381         lwkt_replymsg(&cmsg->base.lmsg, 0);
2382 }
2383
2384 static int
2385 carp_ioctl_getvh(struct carp_softc *sc, void *udata, struct ucred *cr)
2386 {
2387         struct ifnet *ifp = &sc->arpcom.ac_if;
2388         struct netmsg_carp cmsg;
2389         struct carpreq carpr;
2390         int error;
2391
2392         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2393         ifnet_deserialize_all(ifp);
2394
2395         bzero(&cmsg, sizeof(cmsg));
2396         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2397             carp_ioctl_getvh_dispatch);
2398         cmsg.nc_softc = sc;
2399         cmsg.nc_data = &carpr;
2400
2401         lwkt_domsg(cpu_portfn(0), &cmsg.base.lmsg, 0);
2402
2403         error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY);
2404         if (error)
2405                 bzero(carpr.carpr_key, sizeof(carpr.carpr_key));
2406
2407         error = copyout(&carpr, udata, sizeof(carpr));
2408
2409         ifnet_serialize_all(ifp);
2410         return error;
2411 }
2412
2413 static void
2414 carp_ioctl_getdevname_dispatch(netmsg_t msg)
2415 {
2416         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2417         struct carp_softc *sc = cmsg->nc_softc;
2418         char *devname = cmsg->nc_data;
2419
2420         bzero(devname, sizeof(devname));
2421
2422         carp_gettok();
2423         if (sc->sc_carpdev != NULL)
2424                 strlcpy(devname, sc->sc_carpdev->if_xname, sizeof(devname));
2425         carp_reltok();
2426
2427         lwkt_replymsg(&cmsg->base.lmsg, 0);
2428 }
2429
2430 static int
2431 carp_ioctl_getdevname(struct carp_softc *sc, struct ifdrv *ifd)
2432 {
2433         struct ifnet *ifp = &sc->arpcom.ac_if;
2434         struct netmsg_carp cmsg;
2435         char devname[IFNAMSIZ];
2436         int error;
2437
2438         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2439
2440         if (ifd->ifd_len != sizeof(devname))
2441                 return EINVAL;
2442
2443         ifnet_deserialize_all(ifp);
2444
2445         bzero(&cmsg, sizeof(cmsg));
2446         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2447             carp_ioctl_getdevname_dispatch);
2448         cmsg.nc_softc = sc;
2449         cmsg.nc_data = devname;
2450
2451         lwkt_domsg(cpu_portfn(0), &cmsg.base.lmsg, 0);
2452
2453         error = copyout(devname, ifd->ifd_data, sizeof(devname));
2454
2455         ifnet_serialize_all(ifp);
2456         return error;
2457 }
2458
2459 static void
2460 carp_init_dispatch(netmsg_t msg)
2461 {
2462         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2463         struct carp_softc *sc = cmsg->nc_softc;
2464
2465         carp_gettok();
2466
2467         sc->sc_if.if_flags |= IFF_RUNNING;
2468         carp_hmac_prepare(sc);
2469         carp_set_state(sc, INIT);
2470         carp_setrun(sc, 0);
2471
2472         carp_reltok();
2473
2474         lwkt_replymsg(&cmsg->base.lmsg, 0);
2475 }
2476
2477 static void
2478 carp_init(void *xsc)
2479 {
2480         struct carp_softc *sc = xsc;
2481         struct ifnet *ifp = &sc->arpcom.ac_if;
2482         struct netmsg_carp cmsg;
2483
2484         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2485
2486         ifnet_deserialize_all(ifp);
2487
2488         bzero(&cmsg, sizeof(cmsg));
2489         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2490             carp_init_dispatch);
2491         cmsg.nc_softc = sc;
2492
2493         lwkt_domsg(cpu_portfn(0), &cmsg.base.lmsg, 0);
2494
2495         ifnet_serialize_all(ifp);
2496 }
2497
2498 static int
2499 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
2500     struct rtentry *rt)
2501 {
2502         struct carp_softc *sc = ifp->if_softc;
2503         struct ifnet *carpdev;
2504         int error = 0;
2505
2506         carp_gettok();
2507         carpdev = sc->sc_carpdev;
2508         if (carpdev != NULL) {
2509                 /*
2510                  * NOTE:
2511                  * CARP's ifp is passed to backing device's
2512                  * if_output method.
2513                  */
2514                 carpdev->if_output(ifp, m, dst, rt);
2515         } else {
2516                 m_freem(m);
2517                 error = ENETUNREACH;
2518         }
2519         carp_reltok();
2520
2521         return error;
2522 }
2523
2524 /*
2525  * Start output on carp interface. This function should never be called.
2526  */
2527 static void
2528 carp_start(struct ifnet *ifp)
2529 {
2530         panic("%s: start called\n", ifp->if_xname);
2531 }
2532
2533 static void
2534 carp_serialize(struct ifnet *ifp __unused,
2535     enum ifnet_serialize slz __unused)
2536 {
2537 }
2538
2539 static void
2540 carp_deserialize(struct ifnet *ifp __unused,
2541     enum ifnet_serialize slz __unused)
2542 {
2543 }
2544
2545 static int
2546 carp_tryserialize(struct ifnet *ifp __unused,
2547     enum ifnet_serialize slz __unused)
2548 {
2549         return 1;
2550 }
2551
2552 #ifdef INVARIANTS
2553
2554 static void
2555 carp_serialize_assert(struct ifnet *ifp __unused,
2556     enum ifnet_serialize slz __unused, boolean_t serialized __unused)
2557 {
2558 }
2559
2560 #endif  /* INVARIANTS */
2561
2562 static void
2563 carp_set_state(struct carp_softc *sc, int state)
2564 {
2565         struct ifnet *cifp = &sc->sc_if;
2566
2567         if (sc->sc_state == state)
2568                 return;
2569         sc->sc_state = state;
2570
2571         switch (sc->sc_state) {
2572         case BACKUP:
2573                 cifp->if_link_state = LINK_STATE_DOWN;
2574                 break;
2575
2576         case MASTER:
2577                 cifp->if_link_state = LINK_STATE_UP;
2578                 break;
2579
2580         default:
2581                 cifp->if_link_state = LINK_STATE_UNKNOWN;
2582                 break;
2583         }
2584         rt_ifmsg(cifp);
2585 }
2586
2587 void
2588 carp_group_demote_adj(struct ifnet *ifp, int adj)
2589 {
2590         struct ifg_list *ifgl;
2591         int *dm;
2592
2593         carp_gettok();
2594
2595         TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2596                 if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL))
2597                         continue;
2598                 dm = &ifgl->ifgl_group->ifg_carp_demoted;
2599
2600                 if (*dm + adj >= 0)
2601                         *dm += adj;
2602                 else
2603                         *dm = 0;
2604
2605                 if (adj > 0 && *dm == 1)
2606                         carp_send_ad_all();
2607                 CARP_LOG("%s demoted group %s to %d", ifp->if_xname,
2608                     ifgl->ifgl_group->ifg_group, *dm);
2609         }
2610
2611         carp_reltok();
2612 }
2613
2614 #ifdef foo
2615 void
2616 carp_carpdev_state(void *v)
2617 {
2618         struct carp_if *cif = v;
2619         struct carp_softc *sc;
2620
2621         carp_gettok();
2622
2623         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list)
2624                 carp_sc_state(sc);
2625
2626         carp_reltok();
2627 }
2628
2629 static void
2630 carp_sc_state(struct carp_softc *sc)
2631 {
2632         if (!(sc->sc_carpdev->if_flags & IFF_UP)) {
2633                 callout_stop(&sc->sc_ad_tmo);
2634                 callout_stop(&sc->sc_md_tmo);
2635                 callout_stop(&sc->sc_md6_tmo);
2636                 carp_set_state(sc, INIT);
2637                 carp_setrun(sc, 0);
2638                 if (!sc->sc_suppress) {
2639                         carp_suppress_preempt++;
2640                         if (carp_suppress_preempt == 1)
2641                                 carp_send_ad_all();
2642                 }
2643                 sc->sc_suppress = 1;
2644         } else {
2645                 carp_set_state(sc, INIT);
2646                 carp_setrun(sc, 0);
2647                 if (sc->sc_suppress)
2648                         carp_suppress_preempt--;
2649                 sc->sc_suppress = 0;
2650         }
2651 }
2652 #endif
2653
2654 static void
2655 carp_stop(struct carp_softc *sc, int detach)
2656 {
2657         sc->sc_if.if_flags &= ~IFF_RUNNING;
2658
2659         callout_stop(&sc->sc_ad_tmo);
2660         callout_stop(&sc->sc_md_tmo);
2661         callout_stop(&sc->sc_md6_tmo);
2662
2663         if (!detach && sc->sc_state == MASTER)
2664                 carp_send_ad(sc);
2665
2666         if (sc->sc_suppress)
2667                 carp_suppress_preempt--;
2668         sc->sc_suppress = 0;
2669
2670         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
2671                 carp_suppress_preempt--;
2672         sc->sc_sendad_errors = 0;
2673         sc->sc_sendad_success = 0;
2674
2675         carp_set_state(sc, INIT);
2676         carp_setrun(sc, 0);
2677 }
2678
2679 static void
2680 carp_suspend(struct carp_softc *sc, int detach)
2681 {
2682         struct ifnet *cifp = &sc->sc_if;
2683
2684         carp_stop(sc, detach);
2685
2686         /* Retain the running state, if we are not dead yet */
2687         if (!sc->sc_dead && (cifp->if_flags & IFF_UP))
2688                 cifp->if_flags |= IFF_RUNNING;
2689 }
2690
2691 static int
2692 carp_activate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
2693     struct ifnet *ifp, struct in_ifaddr *ia_if, int own)
2694 {
2695         struct ip_moptions *imo = &sc->sc_imo;
2696         struct carp_if *ocif = ifp->if_carp;
2697         int error;
2698
2699         KKASSERT(vha->vha_ia != NULL);
2700
2701         KASSERT(ia_if != NULL, ("NULL backing address\n"));
2702         KASSERT(vha->vha_iaback == NULL, ("%p is already activated\n", vha));
2703         KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0,
2704                 ("inactive vhaddr %p is the address owner\n", vha));
2705
2706         KASSERT(sc->sc_carpdev == NULL || sc->sc_carpdev == ifp,
2707                 ("%s is already on %s\n", sc->sc_if.if_xname,
2708                  sc->sc_carpdev->if_xname));
2709
2710         if (ocif == NULL) {
2711                 KASSERT(sc->sc_carpdev == NULL,
2712                         ("%s is already on %s\n", sc->sc_if.if_xname,
2713                          sc->sc_carpdev->if_xname));
2714
2715                 error = ifpromisc(ifp, 1);
2716                 if (error)
2717                         return error;
2718         } else {
2719                 struct carp_softc_container *scc;
2720
2721                 TAILQ_FOREACH(scc, ocif, scc_link) {
2722                         struct carp_softc *vr = scc->scc_softc;
2723
2724                         if (vr != sc && vr->sc_vhid == sc->sc_vhid)
2725                                 return EINVAL;
2726                 }
2727         }
2728
2729         ifp->if_carp = carp_if_insert(ocif, sc);
2730         KASSERT(ifp->if_carp != NULL, ("%s carp_if_insert failed\n", __func__));
2731
2732         sc->sc_ia = ia_if;
2733         sc->sc_carpdev = ifp;
2734
2735         /*
2736          * Make sure that all protocol threads see the sc_carpdev and
2737          * if_carp changes
2738          */
2739         netmsg_service_sync();
2740
2741         if (ocif != NULL && ifp->if_carp != ocif) {
2742                 /*
2743                  * The old carp list could be safely free now,
2744                  * since no one can access it.
2745                  */
2746                 carp_if_free(ocif);
2747         }
2748
2749         vha->vha_iaback = ia_if;
2750         sc->sc_naddrs++;
2751
2752         if (own) {
2753                 vha->vha_flags |= CARP_VHAF_OWNER;
2754
2755                 /* XXX save user configured advskew? */
2756                 sc->sc_advskew = 0;
2757         }
2758
2759         carp_addroute_vhaddr(sc, vha);
2760
2761         /*
2762          * Join the multicast group only after the backing interface
2763          * has been hooked with the CARP interface.
2764          */
2765         KASSERT(imo->imo_multicast_ifp == NULL ||
2766                 imo->imo_multicast_ifp == &sc->sc_if,
2767                 ("%s didn't leave mcast group on %s\n",
2768                  sc->sc_if.if_xname, imo->imo_multicast_ifp->if_xname));
2769
2770         if (imo->imo_num_memberships == 0) {
2771                 struct in_addr addr;
2772
2773                 addr.s_addr = htonl(INADDR_CARP_GROUP);
2774                 imo->imo_membership[0] = in_addmulti(&addr, &sc->sc_if);
2775                 if (imo->imo_membership[0] == NULL) {
2776                         carp_deactivate_vhaddr(sc, vha, FALSE);
2777                         return ENOBUFS;
2778                 }
2779
2780                 imo->imo_num_memberships++;
2781                 imo->imo_multicast_ifp = &sc->sc_if;
2782                 imo->imo_multicast_ttl = CARP_DFLTTL;
2783                 imo->imo_multicast_loop = 0;
2784         }
2785
2786         carp_hmac_prepare(sc);
2787         carp_set_state(sc, INIT);
2788         carp_setrun(sc, 0);
2789         return 0;
2790 }
2791
2792 static void
2793 carp_deactivate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
2794     boolean_t del_iaback)
2795 {
2796         KKASSERT(vha->vha_ia != NULL);
2797
2798         carp_hmac_prepare(sc);
2799
2800         if (vha->vha_iaback == NULL) {
2801                 KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0,
2802                         ("inactive vhaddr %p is the address owner\n", vha));
2803                 return;
2804         }
2805
2806         vha->vha_flags &= ~CARP_VHAF_OWNER;
2807         carp_delroute_vhaddr(sc, vha, del_iaback);
2808
2809         KKASSERT(sc->sc_naddrs > 0);
2810         vha->vha_iaback = NULL;
2811         sc->sc_naddrs--;
2812         if (!sc->sc_naddrs) {
2813                 if (sc->sc_naddrs6) {
2814                         carp_multicast_cleanup(sc);
2815                         sc->sc_ia = NULL;
2816                 } else {
2817                         carp_detach(sc, 0, del_iaback);
2818                 }
2819         }
2820 }
2821
2822 static void
2823 carp_link_addrs(struct carp_softc *sc, struct ifnet *ifp, struct ifaddr *ifa_if)
2824 {
2825         struct carp_vhaddr *vha;
2826         struct in_ifaddr *ia_if;
2827
2828         KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET);
2829         ia_if = ifatoia(ifa_if);
2830
2831         /*
2832          * Test each inactive vhaddr against the newly added address.
2833          * If the newly added address could be the backing address,
2834          * then activate the matching vhaddr.
2835          */
2836         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
2837                 const struct in_ifaddr *ia;
2838                 u_long iaddr;
2839                 int own;
2840
2841                 if (vha->vha_iaback != NULL)
2842                         continue;
2843
2844                 ia = vha->vha_ia;
2845                 iaddr = ntohl(ia->ia_addr.sin_addr.s_addr);
2846
2847                 if ((iaddr & ia_if->ia_subnetmask) != ia_if->ia_subnet)
2848                         continue;
2849
2850                 own = 0;
2851                 if (ia->ia_addr.sin_addr.s_addr ==
2852                     ia_if->ia_addr.sin_addr.s_addr)
2853                         own = 1;
2854
2855                 carp_activate_vhaddr(sc, vha, ifp, ia_if, own);
2856         }
2857 }
2858
2859 static void
2860 carp_unlink_addrs(struct carp_softc *sc, struct ifnet *ifp,
2861                   struct ifaddr *ifa_if)
2862 {
2863         struct carp_vhaddr *vha;
2864         struct in_ifaddr *ia_if;
2865
2866         KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET);
2867         ia_if = ifatoia(ifa_if);
2868
2869         /*
2870          * Ad src address is deleted; set it to NULL.
2871          * Following loop will try pick up a new ad src address
2872          * if one of the vhaddr could retain its backing address.
2873          */
2874         if (sc->sc_ia == ia_if)
2875                 sc->sc_ia = NULL;
2876
2877         /*
2878          * Test each active vhaddr against the deleted address.
2879          * If the deleted address is vhaddr address's backing
2880          * address, then deactivate the vhaddr.
2881          */
2882         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
2883                 if (vha->vha_iaback == NULL)
2884                         continue;
2885
2886                 if (vha->vha_iaback == ia_if)
2887                         carp_deactivate_vhaddr(sc, vha, TRUE);
2888                 else if (sc->sc_ia == NULL)
2889                         sc->sc_ia = vha->vha_iaback;
2890         }
2891 }
2892
2893 static void
2894 carp_update_addrs(struct carp_softc *sc, struct ifaddr *ifa_del)
2895 {
2896         struct carp_vhaddr *vha;
2897
2898         KKASSERT(sc->sc_carpdev == NULL);
2899
2900         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
2901                 carp_config_vhaddr(sc, vha, ifatoia(ifa_del));
2902 }
2903
2904 static void
2905 carp_ifaddr(void *arg __unused, struct ifnet *ifp,
2906             enum ifaddr_event event, struct ifaddr *ifa)
2907 {
2908         struct carp_softc *sc;
2909
2910         carp_gettok();
2911
2912         if (ifa->ifa_addr->sa_family != AF_INET)
2913                 goto back;
2914
2915         KASSERT(&curthread->td_msgport == cpu_portfn(0),
2916             ("not in netisr0"));
2917
2918         if (ifp->if_type == IFT_CARP) {
2919                 /*
2920                  * Address is changed on carp(4) interface
2921                  */
2922                 switch (event) {
2923                 case IFADDR_EVENT_ADD:
2924                         carp_add_addr(ifp->if_softc, ifa);
2925                         break;
2926
2927                 case IFADDR_EVENT_CHANGE:
2928                         carp_config_addr(ifp->if_softc, ifa);
2929                         break;
2930
2931                 case IFADDR_EVENT_DELETE:
2932                         carp_del_addr(ifp->if_softc, ifa);
2933                         break;
2934                 }
2935                 goto back;
2936         }
2937
2938         /*
2939          * Address is changed on non-carp(4) interface
2940          */
2941         if ((ifp->if_flags & IFF_MULTICAST) == 0)
2942                 goto back;
2943
2944         LIST_FOREACH(sc, &carpif_list, sc_next) {
2945                 if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp) {
2946                         /* Not the parent iface; skip */
2947                         continue;
2948                 }
2949
2950                 switch (event) {
2951                 case IFADDR_EVENT_ADD:
2952                         carp_link_addrs(sc, ifp, ifa);
2953                         break;
2954
2955                 case IFADDR_EVENT_DELETE:
2956                         if (sc->sc_carpdev != NULL) {
2957                                 carp_unlink_addrs(sc, ifp, ifa);
2958                                 if (sc->sc_carpdev == NULL) {
2959                                         /*
2960                                          * We no longer have the parent
2961                                          * interface, however, certain
2962                                          * virtual addresses, which are
2963                                          * not used because they can't
2964                                          * match the previous parent
2965                                          * interface's addresses, may now
2966                                          * match different interface's
2967                                          * addresses.
2968                                          */
2969                                         carp_update_addrs(sc, ifa);
2970                                 }
2971                         } else {
2972                                 /*
2973                                  * The carp(4) interface didn't have a
2974                                  * parent iface, so it is not possible
2975                                  * that it will contain any address to
2976                                  * be unlinked.
2977                                  */
2978                         }
2979                         break;
2980
2981                 case IFADDR_EVENT_CHANGE:
2982                         if (sc->sc_carpdev == NULL) {
2983                                 /*
2984                                  * The carp(4) interface didn't have a
2985                                  * parent iface, so it is not possible
2986                                  * that it will contain any address to
2987                                  * be updated.
2988                                  */
2989                                 carp_link_addrs(sc, ifp, ifa);
2990                         } else {
2991                                 /*
2992                                  * First try breaking tie with the old
2993                                  * address.  Then see whether we could
2994                                  * link certain vhaddr to the new address.
2995                                  * If that fails, i.e. carpdev is NULL,
2996                                  * we try a global update.
2997                                  *
2998                                  * NOTE: The above order is critical.
2999                                  */
3000                                 carp_unlink_addrs(sc, ifp, ifa);
3001                                 carp_link_addrs(sc, ifp, ifa);
3002                                 if (sc->sc_carpdev == NULL) {
3003                                         /*
3004                                          * See the comment in the above
3005                                          * IFADDR_EVENT_DELETE block.
3006                                          */
3007                                         carp_update_addrs(sc, NULL);
3008                                 }
3009                         }
3010                         break;
3011                 }
3012         }
3013
3014 back:
3015         carp_reltok();
3016 }
3017
3018 void
3019 carp_proto_ctlinput(netmsg_t msg)
3020 {
3021         int cmd = msg->ctlinput.nm_cmd;
3022         struct sockaddr *sa = msg->ctlinput.nm_arg;
3023         struct in_ifaddr_container *iac;
3024
3025         carp_gettok();
3026
3027         TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
3028                 struct in_ifaddr *ia = iac->ia;
3029                 struct ifnet *ifp = ia->ia_ifp;
3030
3031                 if (ifp->if_type == IFT_CARP)
3032                         continue;
3033
3034                 if (ia->ia_ifa.ifa_addr == sa) {
3035                         if (cmd == PRC_IFDOWN) {
3036                                 carp_ifaddr(NULL, ifp, IFADDR_EVENT_DELETE,
3037                                     &ia->ia_ifa);
3038                         } else if (cmd == PRC_IFUP) {
3039                                 carp_ifaddr(NULL, ifp, IFADDR_EVENT_ADD,
3040                                     &ia->ia_ifa);
3041                         }
3042                         break;
3043                 }
3044         }
3045
3046         carp_reltok();
3047         lwkt_replymsg(&msg->lmsg, 0);
3048 }
3049
3050 void
3051 carp_gettok(void)
3052 {
3053         lwkt_gettoken(&carp_tok);
3054 }
3055
3056 void
3057 carp_reltok(void)
3058 {
3059         lwkt_reltoken(&carp_tok);
3060 }
3061
3062 struct ifnet *
3063 carp_parent(struct ifnet *cifp)
3064 {
3065         struct carp_softc *sc;
3066
3067         ASSERT_LWKT_TOKEN_HELD(&carp_tok);
3068
3069         KKASSERT(cifp->if_type == IFT_CARP);
3070         sc = cifp->if_softc;
3071
3072         return sc->sc_carpdev;
3073 }
3074
3075 #define rtinitflags(x) \
3076         (((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) \
3077                  ? RTF_HOST : 0)
3078
3079 static int
3080 carp_addroute_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha)
3081 {
3082         struct in_ifaddr *ia, *iaback;
3083         int error;
3084
3085         if (sc->sc_state != MASTER)
3086                 return 0;
3087
3088         ia = vha->vha_ia;
3089         KKASSERT(ia != NULL);
3090
3091         iaback = vha->vha_iaback;
3092         KKASSERT(iaback != NULL);
3093
3094         rtinit(&iaback->ia_ifa, RTM_DELETE, rtinitflags(iaback));
3095         in_ifadown(&iaback->ia_ifa, 1);
3096         iaback->ia_flags &= ~IFA_ROUTE;
3097
3098         error = rtinit(&ia->ia_ifa, RTM_ADD, rtinitflags(ia) | RTF_UP);
3099         if (!error)
3100                 ia->ia_flags |= IFA_ROUTE;
3101         return error;
3102 }
3103
3104 static void
3105 carp_delroute_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
3106     boolean_t del_iaback)
3107 {
3108         struct in_ifaddr *ia, *iaback;
3109
3110         ia = vha->vha_ia;
3111         KKASSERT(ia != NULL);
3112
3113         iaback = vha->vha_iaback;
3114         KKASSERT(iaback != NULL);
3115
3116         rtinit(&ia->ia_ifa, RTM_DELETE, rtinitflags(ia));
3117         in_ifadown(&ia->ia_ifa, 1);
3118         ia->ia_flags &= ~IFA_ROUTE;
3119
3120         if (!del_iaback && (iaback->ia_ifp->if_flags & IFF_UP)) {
3121                 int error;
3122
3123                 error = rtinit(&iaback->ia_ifa, RTM_ADD,
3124                     rtinitflags(iaback) | RTF_UP);
3125                 if (!error)
3126                         iaback->ia_flags |= IFA_ROUTE;
3127         }
3128 }
3129
3130 static int
3131 carp_modevent(module_t mod, int type, void *data)
3132 {
3133         switch (type) {
3134         case MOD_LOAD:
3135                 LIST_INIT(&carpif_list);
3136                 carp_ifdetach_event =
3137                 EVENTHANDLER_REGISTER(ifnet_detach_event, carp_ifdetach, NULL,
3138                                       EVENTHANDLER_PRI_ANY);
3139                 carp_ifaddr_event =
3140                 EVENTHANDLER_REGISTER(ifaddr_event, carp_ifaddr, NULL,
3141                                       EVENTHANDLER_PRI_FIRST);
3142                 if_clone_attach(&carp_cloner);
3143                 break;
3144
3145         case MOD_UNLOAD:
3146                 EVENTHANDLER_DEREGISTER(ifnet_detach_event,
3147                                         carp_ifdetach_event);
3148                 EVENTHANDLER_DEREGISTER(ifaddr_event,
3149                                         carp_ifaddr_event);
3150                 if_clone_detach(&carp_cloner);
3151                 break;
3152
3153         default:
3154                 return (EINVAL);
3155         }
3156         return (0);
3157 }
3158
3159 static moduledata_t carp_mod = {
3160         "carp",
3161         carp_modevent,
3162         0
3163 };
3164 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);