carp: Change 1/0 usage of int into boolean_t, which is more expressive
[dragonfly.git] / sys / netinet / ip_carp.c
1 /*
2  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
3  * Copyright (c) 2003 Ryan McBride. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
18  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
24  * THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 /*
27  * $FreeBSD: src/sys/netinet/ip_carp.c,v 1.48 2007/02/02 09:39:09 glebius Exp $
28  */
29
30 #include "opt_carp.h"
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/in_cksum.h>
38 #include <sys/limits.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/msgport2.h>
42 #include <sys/time.h>
43 #include <sys/proc.h>
44 #include <sys/priv.h>
45 #include <sys/sockio.h>
46 #include <sys/socket.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
49 #include <sys/thread.h>
50
51 #include <machine/stdarg.h>
52 #include <crypto/sha1.h>
53
54 #include <net/bpf.h>
55 #include <net/ethernet.h>
56 #include <net/if.h>
57 #include <net/if_dl.h>
58 #include <net/if_types.h>
59 #include <net/route.h>
60 #include <net/if_clone.h>
61 #include <net/if_var.h>
62 #include <net/ifq_var.h>
63 #include <net/netmsg2.h>
64
65 #ifdef INET
66 #include <netinet/in.h>
67 #include <netinet/in_var.h>
68 #include <netinet/in_systm.h>
69 #include <netinet/ip.h>
70 #include <netinet/ip_var.h>
71 #include <netinet/if_ether.h>
72 #endif
73
74 #ifdef INET6
75 #include <netinet/icmp6.h>
76 #include <netinet/ip6.h>
77 #include <netinet6/ip6_var.h>
78 #include <netinet6/scope6_var.h>
79 #include <netinet6/nd6.h>
80 #endif
81
82 #include <netinet/ip_carp.h>
83
84 #define CARP_IFNAME             "carp"
85 #define CARP_IS_RUNNING(ifp)    \
86         (((ifp)->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING))
87
88 struct carp_softc;
89
90 struct carp_vhaddr {
91         uint32_t                vha_flags;      /* CARP_VHAF_ */
92         struct in_ifaddr        *vha_ia;        /* carp address */
93         struct in_ifaddr        *vha_iaback;    /* backing address */
94         TAILQ_ENTRY(carp_vhaddr) vha_link;
95 };
96 TAILQ_HEAD(carp_vhaddr_list, carp_vhaddr);
97
98 struct netmsg_carp {
99         struct netmsg_base      base;
100         struct ifnet            *nc_carpdev;
101         struct carp_softc       *nc_softc;
102         void                    *nc_data;
103         size_t                  nc_datalen;
104 };
105
106 struct carp_softc {
107         struct arpcom            arpcom;
108         struct ifnet            *sc_carpdev;    /* parent interface */
109         struct carp_vhaddr_list  sc_vha_list;   /* virtual addr list */
110
111         const struct in_ifaddr  *sc_ia;         /* primary iface address v4 */
112         struct ip_moptions       sc_imo;
113
114 #ifdef INET6
115         struct in6_ifaddr       *sc_ia6;        /* primary iface address v6 */
116         struct ip6_moptions      sc_im6o;
117 #endif /* INET6 */
118
119         enum { INIT = 0, BACKUP, MASTER }
120                                  sc_state;
121         boolean_t                sc_dead;
122
123         int                      sc_suppress;
124
125         int                      sc_sendad_errors;
126 #define CARP_SENDAD_MAX_ERRORS  3
127         int                      sc_sendad_success;
128 #define CARP_SENDAD_MIN_SUCCESS 3
129
130         int                      sc_vhid;
131         int                      sc_advskew;
132         int                      sc_naddrs;     /* actually used IPv4 vha */
133         int                      sc_naddrs6;
134         int                      sc_advbase;    /* seconds */
135         int                      sc_init_counter;
136         uint64_t                 sc_counter;
137
138         /* authentication */
139 #define CARP_HMAC_PAD   64
140         unsigned char            sc_key[CARP_KEY_LEN];
141         unsigned char            sc_pad[CARP_HMAC_PAD];
142         SHA1_CTX                 sc_sha1;
143
144         struct callout           sc_ad_tmo;     /* advertisement timeout */
145         struct netmsg_carp       sc_ad_msg;     /* adv timeout netmsg */
146         struct callout           sc_md_tmo;     /* ip4 master down timeout */
147         struct callout           sc_md6_tmo;    /* ip6 master down timeout */
148         struct netmsg_carp       sc_md_msg;     /* master down timeout netmsg */
149
150         LIST_ENTRY(carp_softc)   sc_next;       /* Interface clue */
151 };
152
153 #define sc_if   arpcom.ac_if
154
155 struct carp_softc_container {
156         TAILQ_ENTRY(carp_softc_container) scc_link;
157         struct carp_softc       *scc_softc;
158 };
159 TAILQ_HEAD(carp_if, carp_softc_container);
160
161 SYSCTL_DECL(_net_inet_carp);
162
163 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */
164 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
165     &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
166 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
167     &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
168 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
169     &carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
170 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
171     &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
172
173 static int carp_suppress_preempt = 0;
174 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
175     &carp_suppress_preempt, 0, "Preemption is suppressed");
176
177 static struct carpstats carpstats;
178 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
179     &carpstats, carpstats,
180     "CARP statistics (struct carpstats, netinet/ip_carp.h)");
181
182 #define CARP_LOG(...)   do {                            \
183         if (carp_opts[CARPCTL_LOG] > 0)                 \
184                 log(LOG_INFO, __VA_ARGS__);             \
185 } while (0)
186
187 #define CARP_DEBUG(...) do {                            \
188         if (carp_opts[CARPCTL_LOG] > 1)                 \
189                 log(LOG_DEBUG, __VA_ARGS__);            \
190 } while (0)
191
192 static struct lwkt_token carp_listtok = LWKT_TOKEN_INITIALIZER(carp_list_token);
193
194 static void     carp_hmac_prepare(struct carp_softc *);
195 static void     carp_hmac_generate(struct carp_softc *, uint32_t *,
196                     unsigned char *);
197 static int      carp_hmac_verify(struct carp_softc *, uint32_t *,
198                     unsigned char *);
199 static void     carp_setroute(struct carp_softc *, int);
200 static void     carp_proto_input_c(struct carp_softc *, struct mbuf *,
201                     struct carp_header *, sa_family_t);
202 static int      carp_clone_create(struct if_clone *, int, caddr_t);
203 static int      carp_clone_destroy(struct ifnet *);
204 static void     carp_detach(struct carp_softc *, boolean_t, boolean_t);
205 static void     carp_prepare_ad(struct carp_softc *, struct carp_header *);
206 static void     carp_send_ad_all(void);
207 static void     carp_send_ad_timeout(void *);
208 static void     carp_send_ad(struct carp_softc *);
209 static void     carp_send_arp(struct carp_softc *);
210 static void     carp_master_down_timeout(void *);
211 static void     carp_master_down(struct carp_softc *);
212 static void     carp_setrun(struct carp_softc *, sa_family_t);
213 static void     carp_set_state(struct carp_softc *, int);
214 static struct ifnet *carp_forus(struct carp_if *, const uint8_t *);
215
216 static void     carp_init(void *);
217 static int      carp_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
218 static int      carp_output(struct ifnet *, struct mbuf *, struct sockaddr *,
219                     struct rtentry *);
220 static void     carp_start(struct ifnet *);
221 static void     carp_serialize(struct ifnet *, enum ifnet_serialize);
222 static void     carp_deserialize(struct ifnet *, enum ifnet_serialize);
223 static int      carp_tryserialize(struct ifnet *, enum ifnet_serialize);
224 #ifdef INVARIANTS
225 static void     carp_serialize_assert(struct ifnet *, enum ifnet_serialize,
226                     boolean_t);
227 #endif
228
229 static void     carp_multicast_cleanup(struct carp_softc *);
230 static void     carp_add_addr(struct carp_softc *, struct ifaddr *);
231 static void     carp_del_addr(struct carp_softc *, struct ifaddr *);
232 static void     carp_config_addr(struct carp_softc *, struct ifaddr *);
233 static void     carp_link_addrs(struct carp_softc *, struct ifnet *,
234                     struct ifaddr *);
235 static void     carp_unlink_addrs(struct carp_softc *, struct ifnet *,
236                     struct ifaddr *);
237 static void     carp_update_addrs(struct carp_softc *, struct ifaddr *);
238
239 static int      carp_config_vhaddr(struct carp_softc *, struct carp_vhaddr *,
240                     struct in_ifaddr *);
241 static int      carp_activate_vhaddr(struct carp_softc *, struct carp_vhaddr *,
242                     struct ifnet *, struct in_ifaddr *, int);
243 static void     carp_deactivate_vhaddr(struct carp_softc *,
244                     struct carp_vhaddr *, boolean_t);
245 static int      carp_addroute_vhaddr(struct carp_softc *, struct carp_vhaddr *);
246 static void     carp_delroute_vhaddr(struct carp_softc *, struct carp_vhaddr *,
247                     boolean_t);
248
249 #ifdef foo
250 static void     carp_sc_state(struct carp_softc *);
251 #endif
252 #ifdef INET6
253 static void     carp_send_na(struct carp_softc *);
254 #ifdef notyet
255 static int      carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
256 static int      carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
257 #endif
258 static void     carp_multicast6_cleanup(struct carp_softc *);
259 #endif
260 static void     carp_stop(struct carp_softc *, boolean_t);
261 static void     carp_suspend(struct carp_softc *, boolean_t);
262 static void     carp_ioctl_stop(struct carp_softc *);
263 static int      carp_ioctl_setvh(struct carp_softc *, void *, struct ucred *);
264 static int      carp_ioctl_getvh(struct carp_softc *, void *, struct ucred *);
265 static int      carp_ioctl_getdevname(struct carp_softc *, struct ifdrv *);
266 static int      carp_ioctl_getvhaddr(struct carp_softc *, struct ifdrv *);
267
268 static struct carp_if *carp_if_remove(struct carp_if *, struct carp_softc *);
269 static struct carp_if *carp_if_insert(struct carp_if *, struct carp_softc *);
270 static void     carp_if_free(struct carp_if *);
271
272 static void     carp_ifaddr(void *, struct ifnet *, enum ifaddr_event,
273                             struct ifaddr *);
274 static void     carp_ifdetach(void *, struct ifnet *);
275
276 static void     carp_ifdetach_dispatch(netmsg_t);
277 static void     carp_clone_destroy_dispatch(netmsg_t);
278 static void     carp_init_dispatch(netmsg_t);
279 static void     carp_ioctl_stop_dispatch(netmsg_t);
280 static void     carp_ioctl_setvh_dispatch(netmsg_t);
281 static void     carp_ioctl_getvh_dispatch(netmsg_t);
282 static void     carp_ioctl_getdevname_dispatch(netmsg_t);
283 static void     carp_ioctl_getvhaddr_dispatch(netmsg_t);
284 static void     carp_send_ad_timeout_dispatch(netmsg_t);
285 static void     carp_master_down_timeout_dispatch(netmsg_t);
286
287 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
288
289 static LIST_HEAD(, carp_softc) carpif_list;
290
291 static struct if_clone carp_cloner =
292 IF_CLONE_INITIALIZER(CARP_IFNAME, carp_clone_create, carp_clone_destroy,
293                      0, IF_MAXUNIT);
294
295 static uint8_t  carp_etheraddr[ETHER_ADDR_LEN] = { 0, 0, 0x5e, 0, 1, 0 };
296
297 static eventhandler_tag carp_ifdetach_event;
298 static eventhandler_tag carp_ifaddr_event;
299
300 static __inline void
301 carp_insert_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha_new)
302 {
303         struct carp_vhaddr *vha;
304         u_long new_addr, addr;
305
306         KKASSERT((vha_new->vha_flags & CARP_VHAF_ONLIST) == 0);
307
308         /*
309          * Virtual address list is sorted; smaller one first
310          */
311         new_addr = ntohl(vha_new->vha_ia->ia_addr.sin_addr.s_addr);
312
313         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
314                 addr = ntohl(vha->vha_ia->ia_addr.sin_addr.s_addr);
315
316                 if (addr > new_addr)
317                         break;
318         }
319         if (vha == NULL)
320                 TAILQ_INSERT_TAIL(&sc->sc_vha_list, vha_new, vha_link);
321         else
322                 TAILQ_INSERT_BEFORE(vha, vha_new, vha_link);
323         vha_new->vha_flags |= CARP_VHAF_ONLIST;
324 }
325
326 static __inline void
327 carp_remove_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha)
328 {
329         KKASSERT(vha->vha_flags & CARP_VHAF_ONLIST);
330         vha->vha_flags &= ~CARP_VHAF_ONLIST;
331         TAILQ_REMOVE(&sc->sc_vha_list, vha, vha_link);
332 }
333
334 static void
335 carp_hmac_prepare(struct carp_softc *sc)
336 {
337         uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
338         uint8_t vhid = sc->sc_vhid & 0xff;
339         int i;
340 #ifdef INET6
341         struct ifaddr_container *ifac;
342         struct in6_addr in6;
343 #endif
344 #ifdef INET
345         struct carp_vhaddr *vha;
346 #endif
347
348         /* XXX: possible race here */
349
350         /* compute ipad from key */
351         bzero(sc->sc_pad, sizeof(sc->sc_pad));
352         bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
353         for (i = 0; i < sizeof(sc->sc_pad); i++)
354                 sc->sc_pad[i] ^= 0x36;
355
356         /* precompute first part of inner hash */
357         SHA1Init(&sc->sc_sha1);
358         SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
359         SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
360         SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
361         SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
362 #ifdef INET
363         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
364                 SHA1Update(&sc->sc_sha1,
365                     (const uint8_t *)&vha->vha_ia->ia_addr.sin_addr,
366                     sizeof(struct in_addr));
367         }
368 #endif /* INET */
369 #ifdef INET6
370         TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
371                 struct ifaddr *ifa = ifac->ifa;
372
373                 if (ifa->ifa_addr->sa_family == AF_INET6) {
374                         in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
375                         in6_clearscope(&in6);
376                         SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6));
377                 }
378         }
379 #endif /* INET6 */
380
381         /* convert ipad to opad */
382         for (i = 0; i < sizeof(sc->sc_pad); i++)
383                 sc->sc_pad[i] ^= 0x36 ^ 0x5c;
384 }
385
386 static void
387 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
388     unsigned char md[20])
389 {
390         SHA1_CTX sha1ctx;
391
392         /* fetch first half of inner hash */
393         bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
394
395         SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
396         SHA1Final(md, &sha1ctx);
397
398         /* outer hash */
399         SHA1Init(&sha1ctx);
400         SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
401         SHA1Update(&sha1ctx, md, 20);
402         SHA1Final(md, &sha1ctx);
403 }
404
405 static int
406 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
407     unsigned char md[20])
408 {
409         unsigned char md2[20];
410
411         carp_hmac_generate(sc, counter, md2);
412         return (bcmp(md, md2, sizeof(md2)));
413 }
414
415 static void
416 carp_setroute(struct carp_softc *sc, int cmd)
417 {
418 #ifdef INET6
419         struct ifaddr_container *ifac;
420 #endif
421         struct carp_vhaddr *vha;
422
423         KKASSERT(cmd == RTM_DELETE || cmd == RTM_ADD);
424
425         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
426                 if (vha->vha_iaback == NULL)
427                         continue;
428                 if (cmd == RTM_DELETE)
429                         carp_delroute_vhaddr(sc, vha, FALSE);
430                 else
431                         carp_addroute_vhaddr(sc, vha);
432         }
433
434 #ifdef INET6
435         TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
436                 struct ifaddr *ifa = ifac->ifa;
437
438                 if (ifa->ifa_addr->sa_family == AF_INET6) {
439                         if (cmd == RTM_ADD)
440                                 in6_ifaddloop(ifa);
441                         else
442                                 in6_ifremloop(ifa);
443                 }
444         }
445 #endif /* INET6 */
446 }
447
448 static int
449 carp_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused)
450 {
451         struct carp_softc *sc;
452         struct ifnet *ifp;
453
454         sc = kmalloc(sizeof(*sc), M_CARP, M_WAITOK | M_ZERO);
455         ifp = &sc->sc_if;
456
457         sc->sc_suppress = 0;
458         sc->sc_advbase = CARP_DFLTINTV;
459         sc->sc_vhid = -1;       /* required setting */
460         sc->sc_advskew = 0;
461         sc->sc_init_counter = 1;
462         sc->sc_naddrs = 0;
463         sc->sc_naddrs6 = 0;
464
465         TAILQ_INIT(&sc->sc_vha_list);
466
467 #ifdef INET6
468         sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
469 #endif
470
471         callout_init_mp(&sc->sc_ad_tmo);
472         netmsg_init(&sc->sc_ad_msg.base, NULL, &netisr_adone_rport,
473             MSGF_DROPABLE | MSGF_PRIORITY, carp_send_ad_timeout_dispatch);
474         sc->sc_ad_msg.nc_softc = sc;
475
476         callout_init_mp(&sc->sc_md_tmo);
477         callout_init_mp(&sc->sc_md6_tmo);
478         netmsg_init(&sc->sc_md_msg.base, NULL, &netisr_adone_rport,
479             MSGF_DROPABLE | MSGF_PRIORITY, carp_master_down_timeout_dispatch);
480         sc->sc_md_msg.nc_softc = sc;
481
482         if_initname(ifp, CARP_IFNAME, unit);
483         ifp->if_softc = sc;
484         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
485         ifp->if_init = carp_init;
486         ifp->if_ioctl = carp_ioctl;
487         ifp->if_start = carp_start;
488         ifp->if_serialize = carp_serialize;
489         ifp->if_deserialize = carp_deserialize;
490         ifp->if_tryserialize = carp_tryserialize;
491 #ifdef INVARIANTS
492         ifp->if_serialize_assert = carp_serialize_assert;
493 #endif
494         ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
495         ifq_set_ready(&ifp->if_snd);
496
497         ether_ifattach(ifp, carp_etheraddr, NULL);
498
499         ifp->if_type = IFT_CARP;
500         ifp->if_output = carp_output;
501
502         lwkt_gettoken(&carp_listtok);
503         LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
504         lwkt_reltoken(&carp_listtok);
505
506         return (0);
507 }
508
509 static void
510 carp_clone_destroy_dispatch(netmsg_t msg)
511 {
512         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
513         struct carp_softc *sc = cmsg->nc_softc;
514
515         sc->sc_dead = TRUE;
516         carp_detach(sc, TRUE, FALSE);
517
518         callout_stop_sync(&sc->sc_ad_tmo);
519         callout_stop_sync(&sc->sc_md_tmo);
520         callout_stop_sync(&sc->sc_md6_tmo);
521
522         crit_enter();
523         if ((sc->sc_ad_msg.base.lmsg.ms_flags & MSGF_DONE) == 0)
524                 lwkt_dropmsg(&sc->sc_ad_msg.base.lmsg);
525         if ((sc->sc_md_msg.base.lmsg.ms_flags & MSGF_DONE) == 0)
526                 lwkt_dropmsg(&sc->sc_md_msg.base.lmsg);
527         crit_exit();
528
529         lwkt_replymsg(&cmsg->base.lmsg, 0);
530 }
531
532 static int
533 carp_clone_destroy(struct ifnet *ifp)
534 {
535         struct carp_softc *sc = ifp->if_softc;
536         struct netmsg_carp cmsg;
537
538         bzero(&cmsg, sizeof(cmsg));
539         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
540             carp_clone_destroy_dispatch);
541         cmsg.nc_softc = sc;
542
543         lwkt_domsg(cpu_portfn(0), &cmsg.base.lmsg, 0);
544
545         lwkt_gettoken(&carp_listtok);
546         LIST_REMOVE(sc, sc_next);
547         lwkt_reltoken(&carp_listtok);
548
549         bpfdetach(ifp);
550         if_detach(ifp);
551
552         KASSERT(sc->sc_naddrs == 0, ("certain inet address is still active\n"));
553         kfree(sc, M_CARP);
554
555         return 0;
556 }
557
558 static struct carp_if *
559 carp_if_remove(struct carp_if *ocif, struct carp_softc *sc)
560 {
561         struct carp_softc_container *oscc, *scc;
562         struct carp_if *cif;
563         int count = 0;
564 #ifdef INVARIANTS
565         int found = 0;
566 #endif
567
568         TAILQ_FOREACH(oscc, ocif, scc_link) {
569                 ++count;
570 #ifdef INVARIANTS
571                 if (oscc->scc_softc == sc)
572                         found = 1;
573 #endif
574         }
575         KASSERT(found, ("%s carp_softc is not on carp_if\n", __func__));
576
577         if (count == 1) {
578                 /* Last one is going to be unlinked */
579                 return NULL;
580         }
581
582         cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
583         TAILQ_INIT(cif);
584
585         TAILQ_FOREACH(oscc, ocif, scc_link) {
586                 if (oscc->scc_softc == sc)
587                         continue;
588
589                 scc = kmalloc(sizeof(*scc), M_CARP, M_WAITOK | M_ZERO);
590                 scc->scc_softc = oscc->scc_softc;
591                 TAILQ_INSERT_TAIL(cif, scc, scc_link);
592         }
593
594         return cif;
595 }
596
597 static struct carp_if *
598 carp_if_insert(struct carp_if *ocif, struct carp_softc *sc)
599 {
600         struct carp_softc_container *oscc;
601         int onlist;
602
603         onlist = 0;
604         if (ocif != NULL) {
605                 TAILQ_FOREACH(oscc, ocif, scc_link) {
606                         if (oscc->scc_softc == sc)
607                                 onlist = 1;
608                 }
609         }
610
611 #ifdef INVARIANTS
612         if (sc->sc_carpdev != NULL) {
613                 KASSERT(onlist, ("%s is not on %s carp list\n",
614                     sc->sc_if.if_xname, sc->sc_carpdev->if_xname));
615         } else {
616                 KASSERT(!onlist, ("%s is already on carp list\n",
617                     sc->sc_if.if_xname));
618         }
619 #endif
620
621         if (!onlist) {
622                 struct carp_if *cif;
623                 struct carp_softc_container *new_scc, *scc;
624                 int inserted = 0;
625
626                 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
627                 TAILQ_INIT(cif);
628
629                 new_scc = kmalloc(sizeof(*new_scc), M_CARP, M_WAITOK | M_ZERO);
630                 new_scc->scc_softc = sc;
631
632                 if (ocif != NULL) {
633                         TAILQ_FOREACH(oscc, ocif, scc_link) {
634                                 if (!inserted &&
635                                     oscc->scc_softc->sc_vhid > sc->sc_vhid) {
636                                         TAILQ_INSERT_TAIL(cif, new_scc,
637                                             scc_link);
638                                         inserted = 1;
639                                 }
640
641                                 scc = kmalloc(sizeof(*scc), M_CARP,
642                                     M_WAITOK | M_ZERO);
643                                 scc->scc_softc = oscc->scc_softc;
644                                 TAILQ_INSERT_TAIL(cif, scc, scc_link);
645                         }
646                 }
647                 if (!inserted)
648                         TAILQ_INSERT_TAIL(cif, new_scc, scc_link);
649
650                 return cif;
651         } else {
652                 return ocif;
653         }
654 }
655
656 static void
657 carp_if_free(struct carp_if *cif)
658 {
659         struct carp_softc_container *scc;
660
661         while ((scc = TAILQ_FIRST(cif)) != NULL) {
662                 TAILQ_REMOVE(cif, scc, scc_link);
663                 kfree(scc, M_CARP);
664         }
665         kfree(cif, M_CARP);
666 }
667
668 static void
669 carp_detach(struct carp_softc *sc, boolean_t detach, boolean_t del_iaback)
670 {
671         carp_suspend(sc, detach);
672
673         carp_multicast_cleanup(sc);
674 #ifdef INET6
675         carp_multicast6_cleanup(sc);
676 #endif
677
678         if (!sc->sc_dead && detach) {
679                 struct carp_vhaddr *vha;
680
681                 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
682                         carp_deactivate_vhaddr(sc, vha, del_iaback);
683                 KKASSERT(sc->sc_naddrs == 0);
684         }
685
686         if (sc->sc_carpdev != NULL) {
687                 struct ifnet *ifp = sc->sc_carpdev;
688                 struct carp_if *ocif = ifp->if_carp;
689
690                 ifp->if_carp = carp_if_remove(ocif, sc);
691                 KASSERT(ifp->if_carp != ocif,
692                     ("%s carp_if_remove failed\n", __func__));
693
694                 sc->sc_carpdev = NULL;
695                 sc->sc_ia = NULL;
696
697                 /*
698                  * Make sure that all protocol threads see the
699                  * sc_carpdev and if_carp changes
700                  */
701                 netmsg_service_sync();
702
703                 if (ifp->if_carp == NULL) {
704                         /*
705                          * No more carp interfaces using
706                          * ifp as the backing interface,
707                          * move it out of promiscous mode.
708                          */
709                         ifpromisc(ifp, 0);
710                 }
711
712                 /*
713                  * The old carp list could be safely free now,
714                  * since no one can access it.
715                  */
716                 carp_if_free(ocif);
717         }
718 }
719
720 static void
721 carp_ifdetach_dispatch(netmsg_t msg)
722 {
723         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
724         struct ifnet *ifp = cmsg->nc_carpdev;
725
726         while (ifp->if_carp) {
727                 struct carp_softc_container *scc;
728
729                 scc = TAILQ_FIRST((struct carp_if *)(ifp->if_carp));
730                 carp_detach(scc->scc_softc, TRUE, TRUE);
731         }
732         lwkt_replymsg(&cmsg->base.lmsg, 0);
733 }
734
735 /* Detach an interface from the carp. */
736 static void
737 carp_ifdetach(void *arg __unused, struct ifnet *ifp)
738 {
739         struct netmsg_carp cmsg;
740
741         ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
742
743         bzero(&cmsg, sizeof(cmsg));
744         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
745             carp_ifdetach_dispatch);
746         cmsg.nc_carpdev = ifp;
747
748         lwkt_domsg(cpu_portfn(0), &cmsg.base.lmsg, 0);
749 }
750
751 /*
752  * process input packet.
753  * we have rearranged checks order compared to the rfc,
754  * but it seems more efficient this way or not possible otherwise.
755  */
756 int
757 carp_proto_input(struct mbuf **mp, int *offp, int proto)
758 {
759         struct mbuf *m = *mp;
760         struct ip *ip = mtod(m, struct ip *);
761         struct ifnet *ifp = m->m_pkthdr.rcvif;
762         struct carp_header *ch;
763         struct carp_softc *sc;
764         int len, iphlen;
765
766         iphlen = *offp;
767         *mp = NULL;
768
769         carpstats.carps_ipackets++;
770
771         if (!carp_opts[CARPCTL_ALLOW]) {
772                 m_freem(m);
773                 goto back;
774         }
775
776         /* Check if received on a valid carp interface */
777         if (ifp->if_type != IFT_CARP) {
778                 carpstats.carps_badif++;
779                 CARP_LOG("carp_proto_input: packet received on non-carp "
780                     "interface: %s\n", ifp->if_xname);
781                 m_freem(m);
782                 goto back;
783         }
784
785         if (!CARP_IS_RUNNING(ifp)) {
786                 carpstats.carps_badif++;
787                 CARP_LOG("carp_proto_input: packet received on stopped carp "
788                     "interface: %s\n", ifp->if_xname);
789                 m_freem(m);
790                 goto back;
791         }
792
793         sc = ifp->if_softc;
794         if (sc->sc_carpdev == NULL) {
795                 carpstats.carps_badif++;
796                 CARP_LOG("carp_proto_input: packet received on defunc carp "
797                     "interface: %s\n", ifp->if_xname);
798                 m_freem(m);
799                 goto back;
800         }
801
802         if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
803                 carpstats.carps_badif++;
804                 CARP_LOG("carp_proto_input: non-mcast packet on "
805                     "interface: %s\n", ifp->if_xname);
806                 m_freem(m);
807                 goto back;
808         }
809
810         /* Verify that the IP TTL is CARP_DFLTTL. */
811         if (ip->ip_ttl != CARP_DFLTTL) {
812                 carpstats.carps_badttl++;
813                 CARP_LOG("carp_proto_input: received ttl %d != %d on %s\n",
814                     ip->ip_ttl, CARP_DFLTTL, ifp->if_xname);
815                 m_freem(m);
816                 goto back;
817         }
818
819         /* Minimal CARP packet size */
820         len = iphlen + sizeof(*ch);
821
822         /*
823          * Verify that the received packet length is
824          * not less than the CARP header
825          */
826         if (m->m_pkthdr.len < len) {
827                 carpstats.carps_badlen++;
828                 CARP_LOG("packet too short %d on %s\n", m->m_pkthdr.len,
829                     ifp->if_xname);
830                 m_freem(m);
831                 goto back;
832         }
833
834         /* Make sure that CARP header is contiguous */
835         if (len > m->m_len) {
836                 m = m_pullup(m, len);
837                 if (m == NULL) {
838                         carpstats.carps_hdrops++;
839                         CARP_LOG("carp_proto_input: m_pullup failed\n");
840                         goto back;
841                 }
842                 ip = mtod(m, struct ip *);
843         }
844         ch = (struct carp_header *)((uint8_t *)ip + iphlen);
845
846         /* Verify the CARP checksum */
847         if (in_cksum_skip(m, len, iphlen)) {
848                 carpstats.carps_badsum++;
849                 CARP_LOG("carp_proto_input: checksum failed on %s\n",
850                     ifp->if_xname);
851                 m_freem(m);
852                 goto back;
853         }
854         carp_proto_input_c(sc, m, ch, AF_INET);
855 back:
856         return(IPPROTO_DONE);
857 }
858
859 #ifdef INET6
860 int
861 carp6_proto_input(struct mbuf **mp, int *offp, int proto)
862 {
863         struct mbuf *m = *mp;
864         struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
865         struct ifnet *ifp = m->m_pkthdr.rcvif;
866         struct carp_header *ch;
867         struct carp_softc *sc;
868         u_int len;
869
870         carpstats.carps_ipackets6++;
871
872         if (!carp_opts[CARPCTL_ALLOW]) {
873                 m_freem(m);
874                 goto back;
875         }
876
877         /* check if received on a valid carp interface */
878         if (ifp->if_type != IFT_CARP) {
879                 carpstats.carps_badif++;
880                 CARP_LOG("carp6_proto_input: packet received on non-carp "
881                     "interface: %s\n", ifp->if_xname);
882                 m_freem(m);
883                 goto back;
884         }
885
886         if (!CARP_IS_RUNNING(ifp)) {
887                 carpstats.carps_badif++;
888                 CARP_LOG("carp_proto_input: packet received on stopped carp "
889                     "interface: %s\n", ifp->if_xname);
890                 m_freem(m);
891                 goto back;
892         }
893
894         sc = ifp->if_softc;
895         if (sc->sc_carpdev == NULL) {
896                 carpstats.carps_badif++;
897                 CARP_LOG("carp6_proto_input: packet received on defunc-carp "
898                     "interface: %s\n", ifp->if_xname);
899                 m_freem(m);
900                 goto back;
901         }
902
903         /* verify that the IP TTL is 255 */
904         if (ip6->ip6_hlim != CARP_DFLTTL) {
905                 carpstats.carps_badttl++;
906                 CARP_LOG("carp6_proto_input: received ttl %d != 255 on %s\n",
907                     ip6->ip6_hlim, ifp->if_xname);
908                 m_freem(m);
909                 goto back;
910         }
911
912         /* verify that we have a complete carp packet */
913         len = m->m_len;
914         IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
915         if (ch == NULL) {
916                 carpstats.carps_badlen++;
917                 CARP_LOG("carp6_proto_input: packet size %u too small\n", len);
918                 goto back;
919         }
920
921         /* verify the CARP checksum */
922         if (in_cksum_range(m, 0, *offp, sizeof(*ch))) {
923                 carpstats.carps_badsum++;
924                 CARP_LOG("carp6_proto_input: checksum failed, on %s\n",
925                     ifp->if_xname);
926                 m_freem(m);
927                 goto back;
928         }
929
930         carp_proto_input_c(sc, m, ch, AF_INET6);
931 back:
932         return (IPPROTO_DONE);
933 }
934 #endif /* INET6 */
935
936 static void
937 carp_proto_input_c(struct carp_softc *sc, struct mbuf *m,
938     struct carp_header *ch, sa_family_t af)
939 {
940         struct ifnet *cifp;
941         uint64_t tmp_counter;
942         struct timeval sc_tv, ch_tv;
943
944         if (sc->sc_vhid != ch->carp_vhid) {
945                 /*
946                  * CARP uses multicast, however, multicast packets
947                  * are tapped to all CARP interfaces on the physical
948                  * interface receiving the CARP packets, so we don't
949                  * update any stats here.
950                  */
951                 m_freem(m);
952                 return;
953         }
954         cifp = &sc->sc_if;
955
956         /* verify the CARP version. */
957         if (ch->carp_version != CARP_VERSION) {
958                 carpstats.carps_badver++;
959                 CARP_LOG("%s; invalid version %d\n", cifp->if_xname,
960                          ch->carp_version);
961                 m_freem(m);
962                 return;
963         }
964
965         /* verify the hash */
966         if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
967                 carpstats.carps_badauth++;
968                 CARP_LOG("%s: incorrect hash\n", cifp->if_xname);
969                 m_freem(m);
970                 return;
971         }
972
973         tmp_counter = ntohl(ch->carp_counter[0]);
974         tmp_counter = tmp_counter<<32;
975         tmp_counter += ntohl(ch->carp_counter[1]);
976
977         /* XXX Replay protection goes here */
978
979         sc->sc_init_counter = 0;
980         sc->sc_counter = tmp_counter;
981
982         sc_tv.tv_sec = sc->sc_advbase;
983         if (carp_suppress_preempt && sc->sc_advskew <  240)
984                 sc_tv.tv_usec = 240 * 1000000 / 256;
985         else
986                 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
987         ch_tv.tv_sec = ch->carp_advbase;
988         ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
989
990         switch (sc->sc_state) {
991         case INIT:
992                 break;
993
994         case MASTER:
995                 /*
996                  * If we receive an advertisement from a master who's going to
997                  * be more frequent than us, go into BACKUP state.
998                  */
999                 if (timevalcmp(&sc_tv, &ch_tv, >) ||
1000                     timevalcmp(&sc_tv, &ch_tv, ==)) {
1001                         callout_stop(&sc->sc_ad_tmo);
1002                         CARP_DEBUG("%s: MASTER -> BACKUP "
1003                            "(more frequent advertisement received)\n",
1004                            cifp->if_xname);
1005                         carp_set_state(sc, BACKUP);
1006                         carp_setrun(sc, 0);
1007                         carp_setroute(sc, RTM_DELETE);
1008                 }
1009                 break;
1010
1011         case BACKUP:
1012                 /*
1013                  * If we're pre-empting masters who advertise slower than us,
1014                  * and this one claims to be slower, treat him as down.
1015                  */
1016                 if (carp_opts[CARPCTL_PREEMPT] &&
1017                     timevalcmp(&sc_tv, &ch_tv, <)) {
1018                         CARP_DEBUG("%s: BACKUP -> MASTER "
1019                             "(preempting a slower master)\n", cifp->if_xname);
1020                         carp_master_down(sc);
1021                         break;
1022                 }
1023
1024                 /*
1025                  *  If the master is going to advertise at such a low frequency
1026                  *  that he's guaranteed to time out, we'd might as well just
1027                  *  treat him as timed out now.
1028                  */
1029                 sc_tv.tv_sec = sc->sc_advbase * 3;
1030                 if (timevalcmp(&sc_tv, &ch_tv, <)) {
1031                         CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n",
1032                                    cifp->if_xname);
1033                         carp_master_down(sc);
1034                         break;
1035                 }
1036
1037                 /*
1038                  * Otherwise, we reset the counter and wait for the next
1039                  * advertisement.
1040                  */
1041                 carp_setrun(sc, af);
1042                 break;
1043         }
1044         m_freem(m);
1045 }
1046
1047 struct mbuf *
1048 carp_input(void *v, struct mbuf *m)
1049 {
1050         struct carp_if *cif = v;
1051         struct ether_header *eh;
1052         struct carp_softc_container *scc;
1053         struct ifnet *ifp;
1054
1055         eh = mtod(m, struct ether_header *);
1056
1057         ifp = carp_forus(cif, eh->ether_dhost);
1058         if (ifp != NULL) {
1059                 ether_reinput_oncpu(ifp, m, REINPUT_RUNBPF);
1060                 return NULL;
1061         }
1062
1063         if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
1064                 return m;
1065
1066         /*
1067          * XXX Should really check the list of multicast addresses
1068          * for each CARP interface _before_ copying.
1069          */
1070         TAILQ_FOREACH(scc, cif, scc_link) {
1071                 struct carp_softc *sc = scc->scc_softc;
1072                 struct mbuf *m0;
1073
1074                 if ((sc->sc_if.if_flags & IFF_UP) == 0)
1075                         continue;
1076
1077                 m0 = m_dup(m, MB_DONTWAIT);
1078                 if (m0 == NULL)
1079                         continue;
1080
1081                 ether_reinput_oncpu(&sc->sc_if, m0, REINPUT_RUNBPF);
1082         }
1083         return m;
1084 }
1085
1086 static void
1087 carp_prepare_ad(struct carp_softc *sc, struct carp_header *ch)
1088 {
1089         if (sc->sc_init_counter) {
1090                 /* this could also be seconds since unix epoch */
1091                 sc->sc_counter = karc4random();
1092                 sc->sc_counter = sc->sc_counter << 32;
1093                 sc->sc_counter += karc4random();
1094         } else {
1095                 sc->sc_counter++;
1096         }
1097
1098         ch->carp_counter[0] = htonl((sc->sc_counter >> 32) & 0xffffffff);
1099         ch->carp_counter[1] = htonl(sc->sc_counter & 0xffffffff);
1100
1101         carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
1102 }
1103
1104 static void
1105 carp_send_ad_all(void)
1106 {
1107         struct carp_softc *sc;
1108
1109         LIST_FOREACH(sc, &carpif_list, sc_next) {
1110                 if (sc->sc_carpdev == NULL)
1111                         continue;
1112
1113                 if (CARP_IS_RUNNING(&sc->sc_if) && sc->sc_state == MASTER)
1114                         carp_send_ad(sc);
1115         }
1116 }
1117
1118 static void
1119 carp_send_ad_timeout(void *xsc)
1120 {
1121         struct carp_softc *sc = xsc;
1122         struct netmsg_carp *cmsg = &sc->sc_ad_msg;
1123
1124         KASSERT(mycpuid == 0, ("%s not on cpu0 but on cpu%d\n",
1125             __func__, mycpuid));
1126
1127         crit_enter();
1128         if (cmsg->base.lmsg.ms_flags & MSGF_DONE)
1129                 lwkt_sendmsg(cpu_portfn(0), &cmsg->base.lmsg);
1130         crit_exit();
1131 }
1132
1133 static void
1134 carp_send_ad_timeout_dispatch(netmsg_t msg)
1135 {
1136         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1137         struct carp_softc *sc = cmsg->nc_softc;
1138
1139         /* Reply ASAP */
1140         crit_enter();
1141         lwkt_replymsg(&cmsg->base.lmsg, 0);
1142         crit_exit();
1143
1144         carp_send_ad(sc);
1145 }
1146
1147 static void
1148 carp_send_ad(struct carp_softc *sc)
1149 {
1150         struct ifnet *cifp = &sc->sc_if;
1151         struct carp_header ch;
1152         struct timeval tv;
1153         struct carp_header *ch_ptr;
1154         struct mbuf *m;
1155         int len, advbase, advskew;
1156
1157         if (!CARP_IS_RUNNING(cifp)) {
1158                 /* Bow out */
1159                 advbase = 255;
1160                 advskew = 255;
1161         } else {
1162                 advbase = sc->sc_advbase;
1163                 if (!carp_suppress_preempt || sc->sc_advskew > 240)
1164                         advskew = sc->sc_advskew;
1165                 else
1166                         advskew = 240;
1167                 tv.tv_sec = advbase;
1168                 tv.tv_usec = advskew * 1000000 / 256;
1169         }
1170
1171         ch.carp_version = CARP_VERSION;
1172         ch.carp_type = CARP_ADVERTISEMENT;
1173         ch.carp_vhid = sc->sc_vhid;
1174         ch.carp_advbase = advbase;
1175         ch.carp_advskew = advskew;
1176         ch.carp_authlen = 7;    /* XXX DEFINE */
1177         ch.carp_pad1 = 0;       /* must be zero */
1178         ch.carp_cksum = 0;
1179
1180 #ifdef INET
1181         if (sc->sc_ia != NULL) {
1182                 struct ip *ip;
1183
1184                 MGETHDR(m, MB_DONTWAIT, MT_HEADER);
1185                 if (m == NULL) {
1186                         cifp->if_oerrors++;
1187                         carpstats.carps_onomem++;
1188                         /* XXX maybe less ? */
1189                         if (advbase != 255 || advskew != 255)
1190                                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1191                                     carp_send_ad_timeout, sc);
1192                         return;
1193                 }
1194                 len = sizeof(*ip) + sizeof(ch);
1195                 m->m_pkthdr.len = len;
1196                 m->m_pkthdr.rcvif = NULL;
1197                 m->m_len = len;
1198                 MH_ALIGN(m, m->m_len);
1199                 m->m_flags |= M_MCAST;
1200                 ip = mtod(m, struct ip *);
1201                 ip->ip_v = IPVERSION;
1202                 ip->ip_hl = sizeof(*ip) >> 2;
1203                 ip->ip_tos = IPTOS_LOWDELAY;
1204                 ip->ip_len = len;
1205                 ip->ip_id = ip_newid();
1206                 ip->ip_off = IP_DF;
1207                 ip->ip_ttl = CARP_DFLTTL;
1208                 ip->ip_p = IPPROTO_CARP;
1209                 ip->ip_sum = 0;
1210                 ip->ip_src = sc->sc_ia->ia_addr.sin_addr;
1211                 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
1212
1213                 ch_ptr = (struct carp_header *)(&ip[1]);
1214                 bcopy(&ch, ch_ptr, sizeof(ch));
1215                 carp_prepare_ad(sc, ch_ptr);
1216                 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip));
1217
1218                 getmicrotime(&cifp->if_lastchange);
1219                 cifp->if_opackets++;
1220                 cifp->if_obytes += len;
1221                 carpstats.carps_opackets++;
1222
1223                 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) {
1224                         cifp->if_oerrors++;
1225                         if (sc->sc_sendad_errors < INT_MAX)
1226                                 sc->sc_sendad_errors++;
1227                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1228                                 carp_suppress_preempt++;
1229                                 if (carp_suppress_preempt == 1) {
1230                                         carp_send_ad_all();
1231                                 }
1232                         }
1233                         sc->sc_sendad_success = 0;
1234                 } else {
1235                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1236                                 if (++sc->sc_sendad_success >=
1237                                     CARP_SENDAD_MIN_SUCCESS) {
1238                                         carp_suppress_preempt--;
1239                                         sc->sc_sendad_errors = 0;
1240                                 }
1241                         } else {
1242                                 sc->sc_sendad_errors = 0;
1243                         }
1244                 }
1245         }
1246 #endif /* INET */
1247 #ifdef INET6
1248         if (sc->sc_ia6) {
1249                 struct ip6_hdr *ip6;
1250
1251                 MGETHDR(m, MB_DONTWAIT, MT_HEADER);
1252                 if (m == NULL) {
1253                         cifp->if_oerrors++;
1254                         carpstats.carps_onomem++;
1255                         /* XXX maybe less ? */
1256                         if (advbase != 255 || advskew != 255)
1257                                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1258                                     carp_send_ad_timeout, sc);
1259                         return;
1260                 }
1261                 len = sizeof(*ip6) + sizeof(ch);
1262                 m->m_pkthdr.len = len;
1263                 m->m_pkthdr.rcvif = NULL;
1264                 m->m_len = len;
1265                 MH_ALIGN(m, m->m_len);
1266                 m->m_flags |= M_MCAST;
1267                 ip6 = mtod(m, struct ip6_hdr *);
1268                 bzero(ip6, sizeof(*ip6));
1269                 ip6->ip6_vfc |= IPV6_VERSION;
1270                 ip6->ip6_hlim = CARP_DFLTTL;
1271                 ip6->ip6_nxt = IPPROTO_CARP;
1272                 bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src,
1273                     sizeof(struct in6_addr));
1274                 /* set the multicast destination */
1275
1276                 ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
1277                 ip6->ip6_dst.s6_addr8[15] = 0x12;
1278                 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
1279                         cifp->if_oerrors++;
1280                         m_freem(m);
1281                         CARP_LOG("%s: in6_setscope failed\n", __func__);
1282                         return;
1283                 }
1284
1285                 ch_ptr = (struct carp_header *)(&ip6[1]);
1286                 bcopy(&ch, ch_ptr, sizeof(ch));
1287                 carp_prepare_ad(sc, ch_ptr);
1288                 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip6));
1289
1290                 getmicrotime(&cifp->if_lastchange);
1291                 cifp->if_opackets++;
1292                 cifp->if_obytes += len;
1293                 carpstats.carps_opackets6++;
1294
1295                 if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) {
1296                         cifp->if_oerrors++;
1297                         if (sc->sc_sendad_errors < INT_MAX)
1298                                 sc->sc_sendad_errors++;
1299                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1300                                 carp_suppress_preempt++;
1301                                 if (carp_suppress_preempt == 1) {
1302                                         carp_send_ad_all();
1303                                 }
1304                         }
1305                         sc->sc_sendad_success = 0;
1306                 } else {
1307                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1308                                 if (++sc->sc_sendad_success >=
1309                                     CARP_SENDAD_MIN_SUCCESS) {
1310                                         carp_suppress_preempt--;
1311                                         sc->sc_sendad_errors = 0;
1312                                 }
1313                         } else {
1314                                 sc->sc_sendad_errors = 0;
1315                         }
1316                 }
1317         }
1318 #endif /* INET6 */
1319
1320         if (advbase != 255 || advskew != 255)
1321                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1322                     carp_send_ad_timeout, sc);
1323 }
1324
1325 /*
1326  * Broadcast a gratuitous ARP request containing
1327  * the virtual router MAC address for each IP address
1328  * associated with the virtual router.
1329  */
1330 static void
1331 carp_send_arp(struct carp_softc *sc)
1332 {
1333         const struct carp_vhaddr *vha;
1334
1335         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1336                 if (vha->vha_iaback == NULL)
1337                         continue;
1338                 arp_gratuitous(&sc->sc_if, &vha->vha_ia->ia_ifa);
1339         }
1340 }
1341
1342 #ifdef INET6
1343 static void
1344 carp_send_na(struct carp_softc *sc)
1345 {
1346         struct ifaddr_container *ifac;
1347         struct in6_addr *in6;
1348         static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1349
1350         TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
1351                 struct ifaddr *ifa = ifac->ifa;
1352
1353                 if (ifa->ifa_addr->sa_family != AF_INET6)
1354                         continue;
1355
1356                 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1357                 nd6_na_output(sc->sc_carpdev, &mcast, in6,
1358                     ND_NA_FLAG_OVERRIDE, 1, NULL);
1359                 DELAY(1000);    /* XXX */
1360         }
1361 }
1362 #endif /* INET6 */
1363
1364 static __inline const struct carp_vhaddr *
1365 carp_find_addr(const struct carp_softc *sc, const struct in_addr *addr)
1366 {
1367         struct carp_vhaddr *vha;
1368
1369         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1370                 if (vha->vha_iaback == NULL)
1371                         continue;
1372
1373                 if (vha->vha_ia->ia_addr.sin_addr.s_addr == addr->s_addr)
1374                         return vha;
1375         }
1376         return NULL;
1377 }
1378
1379 #ifdef notyet
1380 static int
1381 carp_iamatch_balance(const struct carp_if *cif, const struct in_addr *itaddr,
1382                      const struct in_addr *isaddr, uint8_t **enaddr)
1383 {
1384         const struct carp_softc *vh;
1385         int index, count = 0;
1386
1387         /*
1388          * XXX proof of concept implementation.
1389          * We use the source ip to decide which virtual host should
1390          * handle the request. If we're master of that virtual host,
1391          * then we respond, otherwise, just drop the arp packet on
1392          * the floor.
1393          */
1394
1395         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1396                 if (!CARP_IS_RUNNING(&vh->sc_if))
1397                         continue;
1398
1399                 if (carp_find_addr(vh, itaddr) != NULL)
1400                         count++;
1401         }
1402         if (count == 0)
1403                 return 0;
1404
1405         /* this should be a hash, like pf_hash() */
1406         index = ntohl(isaddr->s_addr) % count;
1407         count = 0;
1408
1409         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1410                 if (!CARP_IS_RUNNING(&vh->sc_if))
1411                         continue;
1412
1413                 if (carp_find_addr(vh, itaddr) == NULL)
1414                         continue;
1415
1416                 if (count == index) {
1417                         if (vh->sc_state == MASTER) {
1418                                 *enaddr = IF_LLADDR(&vh->sc_if);
1419                                 return 1;
1420                         } else {
1421                                 return 0;
1422                         }
1423                 }
1424                 count++;
1425         }
1426         return 0;
1427 }
1428 #endif
1429
1430 int
1431 carp_iamatch(const struct in_ifaddr *ia)
1432 {
1433         const struct carp_softc *sc = ia->ia_ifp->if_softc;
1434
1435         KASSERT(&curthread->td_msgport == cpu_portfn(0),
1436             ("not in netisr0"));
1437
1438 #ifdef notyet
1439         if (carp_opts[CARPCTL_ARPBALANCE])
1440                 return carp_iamatch_balance(cif, itaddr, isaddr, enaddr);
1441 #endif
1442
1443         if (!CARP_IS_RUNNING(&sc->sc_if) || sc->sc_state != MASTER)
1444                 return 0;
1445
1446         return 1;
1447 }
1448
1449 #ifdef INET6
1450 struct ifaddr *
1451 carp_iamatch6(void *v, struct in6_addr *taddr)
1452 {
1453 #ifdef foo
1454         struct carp_if *cif = v;
1455         struct carp_softc *vh;
1456
1457         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1458                 struct ifaddr_container *ifac;
1459
1460                 TAILQ_FOREACH(ifac, &vh->sc_if.if_addrheads[mycpuid],
1461                               ifa_link) {
1462                         struct ifaddr *ifa = ifac->ifa;
1463
1464                         if (IN6_ARE_ADDR_EQUAL(taddr,
1465                             &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1466                             CARP_IS_RUNNING(&vh->sc_if) &&
1467                             vh->sc_state == MASTER) {
1468                                 return (ifa);
1469                         }
1470                 }
1471         }
1472 #endif
1473         return (NULL);
1474 }
1475
1476 void *
1477 carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr)
1478 {
1479 #ifdef foo
1480         struct m_tag *mtag;
1481         struct carp_if *cif = v;
1482         struct carp_softc *sc;
1483
1484         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
1485                 struct ifaddr_container *ifac;
1486
1487                 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid],
1488                               ifa_link) {
1489                         struct ifaddr *ifa = ifac->ifa;
1490
1491                         if (IN6_ARE_ADDR_EQUAL(taddr,
1492                             &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1493                             CARP_IS_RUNNING(&sc->sc_if)) {
1494                                 struct ifnet *ifp = &sc->sc_if;
1495
1496                                 mtag = m_tag_get(PACKET_TAG_CARP,
1497                                     sizeof(struct ifnet *), MB_DONTWAIT);
1498                                 if (mtag == NULL) {
1499                                         /* better a bit than nothing */
1500                                         return (IF_LLADDR(ifp));
1501                                 }
1502                                 bcopy(&ifp, (caddr_t)(mtag + 1),
1503                                     sizeof(struct ifnet *));
1504                                 m_tag_prepend(m, mtag);
1505
1506                                 return (IF_LLADDR(ifp));
1507                         }
1508                 }
1509         }
1510 #endif
1511         return (NULL);
1512 }
1513 #endif
1514
1515 static struct ifnet *
1516 carp_forus(struct carp_if *cif, const uint8_t *dhost)
1517 {
1518         struct carp_softc_container *scc;
1519
1520         if (memcmp(dhost, carp_etheraddr, ETHER_ADDR_LEN - 1) != 0)
1521                 return NULL;
1522
1523         TAILQ_FOREACH(scc, cif, scc_link) {
1524                 struct carp_softc *sc = scc->scc_softc;
1525                 struct ifnet *ifp = &sc->sc_if;
1526
1527                 if (CARP_IS_RUNNING(ifp) && sc->sc_state == MASTER &&
1528                     !bcmp(dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN))
1529                         return ifp;
1530         }
1531         return NULL;
1532 }
1533
1534 static void
1535 carp_master_down_timeout(void *xsc)
1536 {
1537         struct carp_softc *sc = xsc;
1538         struct netmsg_carp *cmsg = &sc->sc_md_msg;
1539
1540         KASSERT(mycpuid == 0, ("%s not on cpu0 but on cpu%d\n",
1541             __func__, mycpuid));
1542
1543         crit_enter();
1544         if (cmsg->base.lmsg.ms_flags & MSGF_DONE)
1545                 lwkt_sendmsg(cpu_portfn(0), &cmsg->base.lmsg);
1546         crit_exit();
1547 }
1548
1549 static void
1550 carp_master_down_timeout_dispatch(netmsg_t msg)
1551 {
1552         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1553         struct carp_softc *sc = cmsg->nc_softc;
1554
1555         /* Reply ASAP */
1556         crit_enter();
1557         lwkt_replymsg(&cmsg->base.lmsg, 0);
1558         crit_exit();
1559
1560         CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n",
1561                    sc->sc_if.if_xname);
1562         carp_master_down(sc);
1563 }
1564
1565 static void
1566 carp_master_down(struct carp_softc *sc)
1567 {
1568         switch (sc->sc_state) {
1569         case INIT:
1570                 kprintf("%s: master_down event in INIT state\n",
1571                         sc->sc_if.if_xname);
1572                 break;
1573
1574         case MASTER:
1575                 break;
1576
1577         case BACKUP:
1578                 carp_set_state(sc, MASTER);
1579                 carp_send_ad(sc);
1580                 carp_send_arp(sc);
1581 #ifdef INET6
1582                 carp_send_na(sc);
1583 #endif /* INET6 */
1584                 carp_setrun(sc, 0);
1585                 carp_setroute(sc, RTM_ADD);
1586                 break;
1587         }
1588 }
1589
1590 /*
1591  * When in backup state, af indicates whether to reset the master down timer
1592  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1593  */
1594 static void
1595 carp_setrun(struct carp_softc *sc, sa_family_t af)
1596 {
1597         struct ifnet *cifp = &sc->sc_if;
1598         struct timeval tv;
1599
1600         if (sc->sc_carpdev == NULL) {
1601                 carp_set_state(sc, INIT);
1602                 return;
1603         }
1604
1605         if ((cifp->if_flags & IFF_RUNNING) && sc->sc_vhid > 0 &&
1606             (sc->sc_naddrs || sc->sc_naddrs6)) {
1607                 /* Nothing */
1608         } else {
1609                 carp_setroute(sc, RTM_DELETE);
1610                 return;
1611         }
1612
1613         switch (sc->sc_state) {
1614         case INIT:
1615                 if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) {
1616                         carp_send_ad(sc);
1617                         carp_send_arp(sc);
1618 #ifdef INET6
1619                         carp_send_na(sc);
1620 #endif /* INET6 */
1621                         CARP_DEBUG("%s: INIT -> MASTER (preempting)\n",
1622                                    cifp->if_xname);
1623                         carp_set_state(sc, MASTER);
1624                         carp_setroute(sc, RTM_ADD);
1625                 } else {
1626                         CARP_DEBUG("%s: INIT -> BACKUP\n", cifp->if_xname);
1627                         carp_set_state(sc, BACKUP);
1628                         carp_setroute(sc, RTM_DELETE);
1629                         carp_setrun(sc, 0);
1630                 }
1631                 break;
1632
1633         case BACKUP:
1634                 callout_stop(&sc->sc_ad_tmo);
1635                 tv.tv_sec = 3 * sc->sc_advbase;
1636                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1637                 switch (af) {
1638 #ifdef INET
1639                 case AF_INET:
1640                         callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1641                             carp_master_down_timeout, sc);
1642                         break;
1643 #endif /* INET */
1644 #ifdef INET6
1645                 case AF_INET6:
1646                         callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1647                             carp_master_down_timeout, sc);
1648                         break;
1649 #endif /* INET6 */
1650                 default:
1651                         if (sc->sc_naddrs)
1652                                 callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1653                                     carp_master_down_timeout, sc);
1654                         if (sc->sc_naddrs6)
1655                                 callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1656                                     carp_master_down_timeout, sc);
1657                         break;
1658                 }
1659                 break;
1660
1661         case MASTER:
1662                 tv.tv_sec = sc->sc_advbase;
1663                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1664                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1665                     carp_send_ad_timeout, sc);
1666                 break;
1667         }
1668 }
1669
1670 static void
1671 carp_multicast_cleanup(struct carp_softc *sc)
1672 {
1673         struct ip_moptions *imo = &sc->sc_imo;
1674
1675         if (imo->imo_num_memberships == 0)
1676                 return;
1677         KKASSERT(imo->imo_num_memberships == 1);
1678
1679         in_delmulti(imo->imo_membership[0]);
1680         imo->imo_membership[0] = NULL;
1681         imo->imo_num_memberships = 0;
1682         imo->imo_multicast_ifp = NULL;
1683 }
1684
1685 #ifdef INET6
1686 static void
1687 carp_multicast6_cleanup(struct carp_softc *sc)
1688 {
1689         struct ip6_moptions *im6o = &sc->sc_im6o;
1690
1691         while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1692                 struct in6_multi_mship *imm =
1693                     LIST_FIRST(&im6o->im6o_memberships);
1694
1695                 LIST_REMOVE(imm, i6mm_chain);
1696                 in6_leavegroup(imm);
1697         }
1698         im6o->im6o_multicast_ifp = NULL;
1699 }
1700 #endif
1701
1702 static void
1703 carp_ioctl_getvhaddr_dispatch(netmsg_t msg)
1704 {
1705         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1706         struct carp_softc *sc = cmsg->nc_softc;
1707         const struct carp_vhaddr *vha;
1708         struct ifcarpvhaddr *carpa, *carpa0;
1709         int count, len, error = 0;
1710
1711         count = 0;
1712         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
1713                 ++count;
1714
1715         if (cmsg->nc_datalen == 0) {
1716                 cmsg->nc_datalen = count * sizeof(*carpa);
1717                 goto back;
1718         } else if (count == 0 || cmsg->nc_datalen < sizeof(*carpa)) {
1719                 cmsg->nc_datalen = 0;
1720                 goto back;
1721         }
1722         len = min(cmsg->nc_datalen, sizeof(*carpa) * count);
1723         KKASSERT(len >= sizeof(*carpa));
1724
1725         carpa0 = carpa = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO);
1726         if (carpa == NULL) {
1727                 error = ENOMEM; 
1728                 goto back;
1729         }
1730
1731         count = 0;
1732         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1733                 if (len < sizeof(*carpa))
1734                         break;
1735
1736                 carpa->carpa_flags = vha->vha_flags;
1737                 carpa->carpa_addr.sin_family = AF_INET;
1738                 carpa->carpa_addr.sin_addr = vha->vha_ia->ia_addr.sin_addr;
1739
1740                 carpa->carpa_baddr.sin_family = AF_INET;
1741                 if (vha->vha_iaback == NULL) {
1742                         carpa->carpa_baddr.sin_addr.s_addr = INADDR_ANY;
1743                 } else {
1744                         carpa->carpa_baddr.sin_addr =
1745                         vha->vha_iaback->ia_addr.sin_addr;
1746                 }
1747
1748                 ++carpa;
1749                 ++count;
1750                 len -= sizeof(*carpa);
1751         }
1752         cmsg->nc_datalen = sizeof(*carpa) * count;
1753         KKASSERT(cmsg->nc_datalen > 0);
1754
1755         cmsg->nc_data = carpa0;
1756
1757 back:
1758         lwkt_replymsg(&cmsg->base.lmsg, error);
1759 }
1760
1761 static int
1762 carp_ioctl_getvhaddr(struct carp_softc *sc, struct ifdrv *ifd)
1763 {
1764         struct ifnet *ifp = &sc->arpcom.ac_if;
1765         struct netmsg_carp cmsg;
1766         int error;
1767
1768         ASSERT_IFNET_SERIALIZED_ALL(ifp);
1769         ifnet_deserialize_all(ifp);
1770
1771         bzero(&cmsg, sizeof(cmsg));
1772         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
1773             carp_ioctl_getvhaddr_dispatch);
1774         cmsg.nc_softc = sc;
1775         cmsg.nc_datalen = ifd->ifd_len;
1776
1777         error = lwkt_domsg(cpu_portfn(0), &cmsg.base.lmsg, 0);
1778
1779         if (!error) {
1780                 if (cmsg.nc_data != NULL) {
1781                         error = copyout(cmsg.nc_data, ifd->ifd_data,
1782                             cmsg.nc_datalen);
1783                         kfree(cmsg.nc_data, M_TEMP);
1784                 }
1785                 ifd->ifd_len = cmsg.nc_datalen;
1786         } else {
1787                 KASSERT(cmsg.nc_data == NULL,
1788                     ("%s temp vhaddr is alloc upon error\n", __func__));
1789         }
1790
1791         ifnet_serialize_all(ifp);
1792         return error;
1793 }
1794
1795 static int
1796 carp_config_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
1797     struct in_ifaddr *ia_del)
1798 {
1799         struct ifnet *ifp;
1800         struct in_ifaddr *ia_if;
1801         struct in_ifaddr_container *iac;
1802         const struct sockaddr_in *sin;
1803         u_long iaddr;
1804         int own;
1805
1806         KKASSERT(vha->vha_ia != NULL);
1807
1808         sin = &vha->vha_ia->ia_addr;
1809         iaddr = ntohl(sin->sin_addr.s_addr);
1810
1811         ia_if = NULL;
1812         own = 0;
1813         TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
1814                 struct in_ifaddr *ia = iac->ia;
1815
1816                 if (ia == ia_del)
1817                         continue;
1818
1819                 if (ia->ia_ifp->if_type == IFT_CARP)
1820                         continue;
1821
1822                 if ((ia->ia_ifp->if_flags & IFF_UP) == 0)
1823                         continue;
1824
1825                 /* and, yeah, we need a multicast-capable iface too */
1826                 if ((ia->ia_ifp->if_flags & IFF_MULTICAST) == 0)
1827                         continue;
1828
1829                 if ((iaddr & ia->ia_subnetmask) == ia->ia_subnet) {
1830                         if (sin->sin_addr.s_addr ==
1831                             ia->ia_addr.sin_addr.s_addr)
1832                                 own = 1;
1833                         if (ia_if == NULL)
1834                                 ia_if = ia;
1835                         else if (sc->sc_carpdev != NULL &&
1836                                  sc->sc_carpdev == ia->ia_ifp)
1837                                 ia_if = ia;
1838                 }
1839         }
1840
1841         carp_deactivate_vhaddr(sc, vha, FALSE);
1842         if (!ia_if)
1843                 return ENOENT;
1844
1845         ifp = ia_if->ia_ifp;
1846
1847         /* XXX Don't allow parent iface to be changed */
1848         if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp)
1849                 return EEXIST;
1850
1851         return carp_activate_vhaddr(sc, vha, ifp, ia_if, own);
1852 }
1853
1854 static void
1855 carp_add_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
1856 {
1857         struct carp_vhaddr *vha_new;
1858         struct in_ifaddr *carp_ia;
1859 #ifdef INVARIANTS
1860         struct carp_vhaddr *vha;
1861 #endif
1862
1863         KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
1864         carp_ia = ifatoia(carp_ifa);
1865
1866 #ifdef INVARIANTS
1867         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
1868                 KKASSERT(vha->vha_ia != NULL && vha->vha_ia != carp_ia);
1869 #endif
1870
1871         vha_new = kmalloc(sizeof(*vha_new), M_CARP, M_WAITOK | M_ZERO);
1872         vha_new->vha_ia = carp_ia;
1873         carp_insert_vhaddr(sc, vha_new);
1874
1875         if (carp_config_vhaddr(sc, vha_new, NULL) != 0) {
1876                 /*
1877                  * If the above configuration fails, it may only mean
1878                  * that the new address is problematic.  However, the
1879                  * carp(4) interface may already have several working
1880                  * addresses.  Since the expected behaviour of
1881                  * SIOC[AS]IFADDR is to put the NIC into working state,
1882                  * we try starting the state machine manually here with
1883                  * the hope that the carp(4)'s previously working
1884                  * addresses still could be brought up.
1885                  */
1886                 carp_hmac_prepare(sc);
1887                 carp_set_state(sc, INIT);
1888                 carp_setrun(sc, 0);
1889         }
1890 }
1891
1892 static void
1893 carp_del_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
1894 {
1895         struct carp_vhaddr *vha;
1896         struct in_ifaddr *carp_ia;
1897
1898         KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
1899         carp_ia = ifatoia(carp_ifa);
1900
1901         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1902                 KKASSERT(vha->vha_ia != NULL);
1903                 if (vha->vha_ia == carp_ia)
1904                         break;
1905         }
1906         KASSERT(vha != NULL, ("no corresponding vhaddr %p\n", carp_ifa));
1907
1908         /*
1909          * Remove the vhaddr from the list before deactivating
1910          * the vhaddr, so that the HMAC could be correctly
1911          * updated in carp_deactivate_vhaddr()
1912          */
1913         carp_remove_vhaddr(sc, vha);
1914
1915         carp_deactivate_vhaddr(sc, vha, FALSE);
1916         kfree(vha, M_CARP);
1917 }
1918
1919 static void
1920 carp_config_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
1921 {
1922         struct carp_vhaddr *vha;
1923         struct in_ifaddr *carp_ia;
1924
1925         KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
1926         carp_ia = ifatoia(carp_ifa);
1927
1928         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1929                 KKASSERT(vha->vha_ia != NULL);
1930                 if (vha->vha_ia == carp_ia)
1931                         break;
1932         }
1933         KASSERT(vha != NULL, ("no corresponding vhaddr %p\n", carp_ifa));
1934
1935         /* Remove then reinsert, to keep the vhaddr list sorted */
1936         carp_remove_vhaddr(sc, vha);
1937         carp_insert_vhaddr(sc, vha);
1938
1939         if (carp_config_vhaddr(sc, vha, NULL) != 0) {
1940                 /* See the comment in carp_add_addr() */
1941                 carp_hmac_prepare(sc);
1942                 carp_set_state(sc, INIT);
1943                 carp_setrun(sc, 0);
1944         }
1945 }
1946
1947 #ifdef notyet
1948
1949 #ifdef INET6
1950 static int
1951 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
1952 {
1953         struct ifnet *ifp;
1954         struct carp_if *cif;
1955         struct in6_ifaddr *ia, *ia_if;
1956         struct ip6_moptions *im6o = &sc->sc_im6o;
1957         struct in6_multi_mship *imm;
1958         struct in6_addr in6;
1959         int own, error;
1960
1961         if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
1962                 carp_setrun(sc, 0);
1963                 return (0);
1964         }
1965
1966         /* we have to do it by hands to check we won't match on us */
1967         ia_if = NULL; own = 0;
1968         for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
1969                 int i;
1970
1971                 for (i = 0; i < 4; i++) {
1972                         if ((sin6->sin6_addr.s6_addr32[i] &
1973                             ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
1974                             (ia->ia_addr.sin6_addr.s6_addr32[i] &
1975                             ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
1976                                 break;
1977                 }
1978                 /* and, yeah, we need a multicast-capable iface too */
1979                 if (ia->ia_ifp != &sc->sc_if &&
1980                     (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
1981                     (i == 4)) {
1982                         if (!ia_if)
1983                                 ia_if = ia;
1984                         if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
1985                             &ia->ia_addr.sin6_addr))
1986                                 own++;
1987                 }
1988         }
1989
1990         if (!ia_if)
1991                 return (EADDRNOTAVAIL);
1992         ia = ia_if;
1993         ifp = ia->ia_ifp;
1994
1995         if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
1996             (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp))
1997                 return (EADDRNOTAVAIL);
1998
1999         if (!sc->sc_naddrs6) {
2000                 im6o->im6o_multicast_ifp = ifp;
2001
2002                 /* join CARP multicast address */
2003                 bzero(&in6, sizeof(in6));
2004                 in6.s6_addr16[0] = htons(0xff02);
2005                 in6.s6_addr8[15] = 0x12;
2006                 if (in6_setscope(&in6, ifp, NULL) != 0)
2007                         goto cleanup;
2008                 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
2009                         goto cleanup;
2010                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2011
2012                 /* join solicited multicast address */
2013                 bzero(&in6, sizeof(in6));
2014                 in6.s6_addr16[0] = htons(0xff02);
2015                 in6.s6_addr32[1] = 0;
2016                 in6.s6_addr32[2] = htonl(1);
2017                 in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3];
2018                 in6.s6_addr8[12] = 0xff;
2019                 if (in6_setscope(&in6, ifp, NULL) != 0)
2020                         goto cleanup;
2021                 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
2022                         goto cleanup;
2023                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2024         }
2025
2026 #ifdef foo
2027         if (!ifp->if_carp) {
2028                 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
2029
2030                 if ((error = ifpromisc(ifp, 1))) {
2031                         kfree(cif, M_CARP);
2032                         goto cleanup;
2033                 }
2034
2035                 TAILQ_INIT(&cif->vhif_vrs);
2036                 ifp->if_carp = cif;
2037         } else {
2038                 struct carp_softc *vr;
2039
2040                 cif = ifp->if_carp;
2041                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
2042                         if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
2043                                 error = EINVAL;
2044                                 goto cleanup;
2045                         }
2046                 }
2047         }
2048 #endif
2049         sc->sc_ia6 = ia;
2050         sc->sc_carpdev = ifp;
2051
2052 #ifdef foo
2053         { /* XXX prevent endless loop if already in queue */
2054         struct carp_softc *vr, *after = NULL;
2055         int myself = 0;
2056         cif = ifp->if_carp;
2057
2058         TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
2059                 if (vr == sc)
2060                         myself = 1;
2061                 if (vr->sc_vhid < sc->sc_vhid)
2062                         after = vr;
2063         }
2064
2065         if (!myself) {
2066                 /* We're trying to keep things in order */
2067                 if (after == NULL)
2068                         TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
2069                 else
2070                         TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
2071         }
2072         }
2073 #endif
2074
2075         sc->sc_naddrs6++;
2076         if (own)
2077                 sc->sc_advskew = 0;
2078         carp_sc_state(sc);
2079         carp_setrun(sc, 0);
2080
2081         return (0);
2082
2083 cleanup:
2084         /* clean up multicast memberships */
2085         if (!sc->sc_naddrs6) {
2086                 while (!LIST_EMPTY(&im6o->im6o_memberships)) {
2087                         imm = LIST_FIRST(&im6o->im6o_memberships);
2088                         LIST_REMOVE(imm, i6mm_chain);
2089                         in6_leavegroup(imm);
2090                 }
2091         }
2092         return (error);
2093 }
2094
2095 static int
2096 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
2097 {
2098         int error = 0;
2099
2100         if (!--sc->sc_naddrs6) {
2101                 struct carp_if *cif = sc->sc_carpdev->if_carp;
2102                 struct ip6_moptions *im6o = &sc->sc_im6o;
2103
2104                 callout_stop(&sc->sc_ad_tmo);
2105                 sc->sc_vhid = -1;
2106                 while (!LIST_EMPTY(&im6o->im6o_memberships)) {
2107                         struct in6_multi_mship *imm =
2108                             LIST_FIRST(&im6o->im6o_memberships);
2109
2110                         LIST_REMOVE(imm, i6mm_chain);
2111                         in6_leavegroup(imm);
2112                 }
2113                 im6o->im6o_multicast_ifp = NULL;
2114 #ifdef foo
2115                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
2116                 if (TAILQ_EMPTY(&cif->vhif_vrs)) {
2117                         sc->sc_carpdev->if_carp = NULL;
2118                         kfree(cif, M_IFADDR);
2119                 }
2120 #endif
2121         }
2122         return (error);
2123 }
2124 #endif /* INET6 */
2125
2126 #endif
2127
2128 static int
2129 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr)
2130 {
2131         struct carp_softc *sc = ifp->if_softc;
2132         struct ifreq *ifr = (struct ifreq *)addr;
2133         struct ifdrv *ifd = (struct ifdrv *)addr;
2134         int error = 0;
2135
2136         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2137
2138         switch (cmd) {
2139         case SIOCSIFFLAGS:
2140                 if (ifp->if_flags & IFF_UP) {
2141                         if ((ifp->if_flags & IFF_RUNNING) == 0)
2142                                 carp_init(sc);
2143                 } else if (ifp->if_flags & IFF_RUNNING) {
2144                         carp_ioctl_stop(sc);
2145                 }
2146                 break;
2147
2148         case SIOCSVH:
2149                 error = carp_ioctl_setvh(sc, ifr->ifr_data, cr);
2150                 break;
2151
2152         case SIOCGVH:
2153                 error = carp_ioctl_getvh(sc, ifr->ifr_data, cr);
2154                 break;
2155
2156         case SIOCGDRVSPEC:
2157                 switch (ifd->ifd_cmd) {
2158                 case CARPGDEVNAME:
2159                         error = carp_ioctl_getdevname(sc, ifd);
2160                         break;
2161
2162                 case CARPGVHADDR:
2163                         error = carp_ioctl_getvhaddr(sc, ifd);
2164                         break;
2165
2166                 default:
2167                         error = EINVAL;
2168                         break;
2169                 }
2170                 break;
2171
2172         default:
2173                 error = ether_ioctl(ifp, cmd, addr);
2174                 break;
2175         }
2176
2177         return error;
2178 }
2179
2180 static void
2181 carp_ioctl_stop_dispatch(netmsg_t msg)
2182 {
2183         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2184         struct carp_softc *sc = cmsg->nc_softc;
2185
2186         carp_stop(sc, FALSE);
2187         lwkt_replymsg(&cmsg->base.lmsg, 0);
2188 }
2189
2190 static void
2191 carp_ioctl_stop(struct carp_softc *sc)
2192 {
2193         struct ifnet *ifp = &sc->arpcom.ac_if;
2194         struct netmsg_carp cmsg;
2195
2196         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2197
2198         ifnet_deserialize_all(ifp);
2199
2200         bzero(&cmsg, sizeof(cmsg));
2201         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2202             carp_ioctl_stop_dispatch);
2203         cmsg.nc_softc = sc;
2204
2205         lwkt_domsg(cpu_portfn(0), &cmsg.base.lmsg, 0);
2206
2207         ifnet_serialize_all(ifp);
2208 }
2209
2210 static void
2211 carp_ioctl_setvh_dispatch(netmsg_t msg)
2212 {
2213         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2214         struct carp_softc *sc = cmsg->nc_softc;
2215         struct ifnet *ifp = &sc->arpcom.ac_if;
2216         const struct carpreq *carpr = cmsg->nc_data;
2217         int error;
2218
2219         error = 1;
2220         if ((ifp->if_flags & IFF_RUNNING) &&
2221             sc->sc_state != INIT && carpr->carpr_state != sc->sc_state) {
2222                 switch (carpr->carpr_state) {
2223                 case BACKUP:
2224                         callout_stop(&sc->sc_ad_tmo);
2225                         carp_set_state(sc, BACKUP);
2226                         carp_setrun(sc, 0);
2227                         carp_setroute(sc, RTM_DELETE);
2228                         break;
2229
2230                 case MASTER:
2231                         carp_master_down(sc);
2232                         break;
2233
2234                 default:
2235                         break;
2236                 }
2237         }
2238         if (carpr->carpr_vhid > 0) {
2239                 if (carpr->carpr_vhid > 255) {
2240                         error = EINVAL;
2241                         goto back;
2242                 }
2243                 if (sc->sc_carpdev) {
2244                         struct carp_if *cif = sc->sc_carpdev->if_carp;
2245                         struct carp_softc_container *scc;
2246
2247                         TAILQ_FOREACH(scc, cif, scc_link) {
2248                                 struct carp_softc *vr = scc->scc_softc;
2249
2250                                 if (vr != sc &&
2251                                     vr->sc_vhid == carpr->carpr_vhid) {
2252                                         error = EEXIST;
2253                                         goto back;
2254                                 }
2255                         }
2256                 }
2257                 sc->sc_vhid = carpr->carpr_vhid;
2258
2259                 IF_LLADDR(ifp)[5] = sc->sc_vhid;
2260                 bcopy(IF_LLADDR(ifp), sc->arpcom.ac_enaddr,
2261                     ETHER_ADDR_LEN);
2262
2263                 error--;
2264         }
2265         if (carpr->carpr_advbase > 0 || carpr->carpr_advskew > 0) {
2266                 if (carpr->carpr_advskew >= 255) {
2267                         error = EINVAL;
2268                         goto back;
2269                 }
2270                 if (carpr->carpr_advbase > 255) {
2271                         error = EINVAL;
2272                         goto back;
2273                 }
2274                 sc->sc_advbase = carpr->carpr_advbase;
2275                 sc->sc_advskew = carpr->carpr_advskew;
2276                 error--;
2277         }
2278         bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key));
2279         if (error > 0) {
2280                 error = EINVAL;
2281         } else {
2282                 error = 0;
2283                 carp_setrun(sc, 0);
2284         }
2285 back:
2286         carp_hmac_prepare(sc);
2287
2288         lwkt_replymsg(&cmsg->base.lmsg, error);
2289 }
2290
2291 static int
2292 carp_ioctl_setvh(struct carp_softc *sc, void *udata, struct ucred *cr)
2293 {
2294         struct ifnet *ifp = &sc->arpcom.ac_if;
2295         struct netmsg_carp cmsg;
2296         struct carpreq carpr;
2297         int error;
2298
2299         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2300         ifnet_deserialize_all(ifp);
2301
2302         error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY);
2303         if (error)
2304                 goto back;
2305
2306         error = copyin(udata, &carpr, sizeof(carpr));
2307         if (error)
2308                 goto back;
2309
2310         bzero(&cmsg, sizeof(cmsg));
2311         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2312             carp_ioctl_setvh_dispatch);
2313         cmsg.nc_softc = sc;
2314         cmsg.nc_data = &carpr;
2315
2316         error = lwkt_domsg(cpu_portfn(0), &cmsg.base.lmsg, 0);
2317
2318 back:
2319         ifnet_serialize_all(ifp);
2320         return error;
2321 }
2322
2323 static void
2324 carp_ioctl_getvh_dispatch(netmsg_t msg)
2325 {
2326         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2327         struct carp_softc *sc = cmsg->nc_softc;
2328         struct carpreq *carpr = cmsg->nc_data;
2329
2330         carpr->carpr_state = sc->sc_state;
2331         carpr->carpr_vhid = sc->sc_vhid;
2332         carpr->carpr_advbase = sc->sc_advbase;
2333         carpr->carpr_advskew = sc->sc_advskew;
2334         bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key));
2335
2336         lwkt_replymsg(&cmsg->base.lmsg, 0);
2337 }
2338
2339 static int
2340 carp_ioctl_getvh(struct carp_softc *sc, void *udata, struct ucred *cr)
2341 {
2342         struct ifnet *ifp = &sc->arpcom.ac_if;
2343         struct netmsg_carp cmsg;
2344         struct carpreq carpr;
2345         int error;
2346
2347         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2348         ifnet_deserialize_all(ifp);
2349
2350         bzero(&cmsg, sizeof(cmsg));
2351         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2352             carp_ioctl_getvh_dispatch);
2353         cmsg.nc_softc = sc;
2354         cmsg.nc_data = &carpr;
2355
2356         lwkt_domsg(cpu_portfn(0), &cmsg.base.lmsg, 0);
2357
2358         error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY);
2359         if (error)
2360                 bzero(carpr.carpr_key, sizeof(carpr.carpr_key));
2361
2362         error = copyout(&carpr, udata, sizeof(carpr));
2363
2364         ifnet_serialize_all(ifp);
2365         return error;
2366 }
2367
2368 static void
2369 carp_ioctl_getdevname_dispatch(netmsg_t msg)
2370 {
2371         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2372         struct carp_softc *sc = cmsg->nc_softc;
2373         char *devname = cmsg->nc_data;
2374
2375         bzero(devname, sizeof(devname));
2376         if (sc->sc_carpdev != NULL)
2377                 strlcpy(devname, sc->sc_carpdev->if_xname, sizeof(devname));
2378
2379         lwkt_replymsg(&cmsg->base.lmsg, 0);
2380 }
2381
2382 static int
2383 carp_ioctl_getdevname(struct carp_softc *sc, struct ifdrv *ifd)
2384 {
2385         struct ifnet *ifp = &sc->arpcom.ac_if;
2386         struct netmsg_carp cmsg;
2387         char devname[IFNAMSIZ];
2388         int error;
2389
2390         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2391
2392         if (ifd->ifd_len != sizeof(devname))
2393                 return EINVAL;
2394
2395         ifnet_deserialize_all(ifp);
2396
2397         bzero(&cmsg, sizeof(cmsg));
2398         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2399             carp_ioctl_getdevname_dispatch);
2400         cmsg.nc_softc = sc;
2401         cmsg.nc_data = devname;
2402
2403         lwkt_domsg(cpu_portfn(0), &cmsg.base.lmsg, 0);
2404
2405         error = copyout(devname, ifd->ifd_data, sizeof(devname));
2406
2407         ifnet_serialize_all(ifp);
2408         return error;
2409 }
2410
2411 static void
2412 carp_init_dispatch(netmsg_t msg)
2413 {
2414         struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2415         struct carp_softc *sc = cmsg->nc_softc;
2416
2417         sc->sc_if.if_flags |= IFF_RUNNING;
2418         carp_hmac_prepare(sc);
2419         carp_set_state(sc, INIT);
2420         carp_setrun(sc, 0);
2421
2422         lwkt_replymsg(&cmsg->base.lmsg, 0);
2423 }
2424
2425 static void
2426 carp_init(void *xsc)
2427 {
2428         struct carp_softc *sc = xsc;
2429         struct ifnet *ifp = &sc->arpcom.ac_if;
2430         struct netmsg_carp cmsg;
2431
2432         ASSERT_IFNET_SERIALIZED_ALL(ifp);
2433
2434         ifnet_deserialize_all(ifp);
2435
2436         bzero(&cmsg, sizeof(cmsg));
2437         netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2438             carp_init_dispatch);
2439         cmsg.nc_softc = sc;
2440
2441         lwkt_domsg(cpu_portfn(0), &cmsg.base.lmsg, 0);
2442
2443         ifnet_serialize_all(ifp);
2444 }
2445
2446 static int
2447 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
2448     struct rtentry *rt)
2449 {
2450         struct carp_softc *sc = ifp->if_softc;
2451         struct ifnet *carpdev;
2452         int error = 0;
2453
2454         carpdev = sc->sc_carpdev;
2455         if (carpdev != NULL) {
2456                 /*
2457                  * NOTE:
2458                  * CARP's ifp is passed to backing device's
2459                  * if_output method.
2460                  */
2461                 carpdev->if_output(ifp, m, dst, rt);
2462         } else {
2463                 m_freem(m);
2464                 error = ENETUNREACH;
2465         }
2466         return error;
2467 }
2468
2469 /*
2470  * Start output on carp interface. This function should never be called.
2471  */
2472 static void
2473 carp_start(struct ifnet *ifp)
2474 {
2475         panic("%s: start called\n", ifp->if_xname);
2476 }
2477
2478 static void
2479 carp_serialize(struct ifnet *ifp __unused,
2480     enum ifnet_serialize slz __unused)
2481 {
2482 }
2483
2484 static void
2485 carp_deserialize(struct ifnet *ifp __unused,
2486     enum ifnet_serialize slz __unused)
2487 {
2488 }
2489
2490 static int
2491 carp_tryserialize(struct ifnet *ifp __unused,
2492     enum ifnet_serialize slz __unused)
2493 {
2494         return 1;
2495 }
2496
2497 #ifdef INVARIANTS
2498
2499 static void
2500 carp_serialize_assert(struct ifnet *ifp __unused,
2501     enum ifnet_serialize slz __unused, boolean_t serialized __unused)
2502 {
2503 }
2504
2505 #endif  /* INVARIANTS */
2506
2507 static void
2508 carp_set_state(struct carp_softc *sc, int state)
2509 {
2510         struct ifnet *cifp = &sc->sc_if;
2511
2512         if (sc->sc_state == state)
2513                 return;
2514         sc->sc_state = state;
2515
2516         switch (sc->sc_state) {
2517         case BACKUP:
2518                 cifp->if_link_state = LINK_STATE_DOWN;
2519                 break;
2520
2521         case MASTER:
2522                 cifp->if_link_state = LINK_STATE_UP;
2523                 break;
2524
2525         default:
2526                 cifp->if_link_state = LINK_STATE_UNKNOWN;
2527                 break;
2528         }
2529         rt_ifmsg(cifp);
2530 }
2531
2532 void
2533 carp_group_demote_adj(struct ifnet *ifp, int adj)
2534 {
2535         struct ifg_list *ifgl;
2536         int *dm;
2537
2538         TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2539                 if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL))
2540                         continue;
2541                 dm = &ifgl->ifgl_group->ifg_carp_demoted;
2542
2543                 if (*dm + adj >= 0)
2544                         *dm += adj;
2545                 else
2546                         *dm = 0;
2547
2548                 if (adj > 0 && *dm == 1)
2549                         carp_send_ad_all();
2550                 CARP_LOG("%s demoted group %s to %d", ifp->if_xname,
2551                     ifgl->ifgl_group->ifg_group, *dm);
2552         }
2553 }
2554
2555 #ifdef foo
2556 void
2557 carp_carpdev_state(void *v)
2558 {
2559         struct carp_if *cif = v;
2560         struct carp_softc *sc;
2561
2562         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list)
2563                 carp_sc_state(sc);
2564 }
2565
2566 static void
2567 carp_sc_state(struct carp_softc *sc)
2568 {
2569         if (!(sc->sc_carpdev->if_flags & IFF_UP)) {
2570                 callout_stop(&sc->sc_ad_tmo);
2571                 callout_stop(&sc->sc_md_tmo);
2572                 callout_stop(&sc->sc_md6_tmo);
2573                 carp_set_state(sc, INIT);
2574                 carp_setrun(sc, 0);
2575                 if (!sc->sc_suppress) {
2576                         carp_suppress_preempt++;
2577                         if (carp_suppress_preempt == 1)
2578                                 carp_send_ad_all();
2579                 }
2580                 sc->sc_suppress = 1;
2581         } else {
2582                 carp_set_state(sc, INIT);
2583                 carp_setrun(sc, 0);
2584                 if (sc->sc_suppress)
2585                         carp_suppress_preempt--;
2586                 sc->sc_suppress = 0;
2587         }
2588 }
2589 #endif
2590
2591 static void
2592 carp_stop(struct carp_softc *sc, boolean_t detach)
2593 {
2594         sc->sc_if.if_flags &= ~IFF_RUNNING;
2595
2596         callout_stop(&sc->sc_ad_tmo);
2597         callout_stop(&sc->sc_md_tmo);
2598         callout_stop(&sc->sc_md6_tmo);
2599
2600         if (!detach && sc->sc_state == MASTER)
2601                 carp_send_ad(sc);
2602
2603         if (sc->sc_suppress)
2604                 carp_suppress_preempt--;
2605         sc->sc_suppress = 0;
2606
2607         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
2608                 carp_suppress_preempt--;
2609         sc->sc_sendad_errors = 0;
2610         sc->sc_sendad_success = 0;
2611
2612         carp_set_state(sc, INIT);
2613         carp_setrun(sc, 0);
2614 }
2615
2616 static void
2617 carp_suspend(struct carp_softc *sc, boolean_t detach)
2618 {
2619         struct ifnet *cifp = &sc->sc_if;
2620
2621         carp_stop(sc, detach);
2622
2623         /* Retain the running state, if we are not dead yet */
2624         if (!sc->sc_dead && (cifp->if_flags & IFF_UP))
2625                 cifp->if_flags |= IFF_RUNNING;
2626 }
2627
2628 static int
2629 carp_activate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
2630     struct ifnet *ifp, struct in_ifaddr *ia_if, int own)
2631 {
2632         struct ip_moptions *imo = &sc->sc_imo;
2633         struct carp_if *ocif = ifp->if_carp;
2634         int error;
2635
2636         KKASSERT(vha->vha_ia != NULL);
2637
2638         KASSERT(ia_if != NULL, ("NULL backing address\n"));
2639         KASSERT(vha->vha_iaback == NULL, ("%p is already activated\n", vha));
2640         KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0,
2641                 ("inactive vhaddr %p is the address owner\n", vha));
2642
2643         KASSERT(sc->sc_carpdev == NULL || sc->sc_carpdev == ifp,
2644                 ("%s is already on %s\n", sc->sc_if.if_xname,
2645                  sc->sc_carpdev->if_xname));
2646
2647         if (ocif == NULL) {
2648                 KASSERT(sc->sc_carpdev == NULL,
2649                         ("%s is already on %s\n", sc->sc_if.if_xname,
2650                          sc->sc_carpdev->if_xname));
2651
2652                 error = ifpromisc(ifp, 1);
2653                 if (error)
2654                         return error;
2655         } else {
2656                 struct carp_softc_container *scc;
2657
2658                 TAILQ_FOREACH(scc, ocif, scc_link) {
2659                         struct carp_softc *vr = scc->scc_softc;
2660
2661                         if (vr != sc && vr->sc_vhid == sc->sc_vhid)
2662                                 return EINVAL;
2663                 }
2664         }
2665
2666         ifp->if_carp = carp_if_insert(ocif, sc);
2667         KASSERT(ifp->if_carp != NULL, ("%s carp_if_insert failed\n", __func__));
2668
2669         sc->sc_ia = ia_if;
2670         sc->sc_carpdev = ifp;
2671
2672         /*
2673          * Make sure that all protocol threads see the sc_carpdev and
2674          * if_carp changes
2675          */
2676         netmsg_service_sync();
2677
2678         if (ocif != NULL && ifp->if_carp != ocif) {
2679                 /*
2680                  * The old carp list could be safely free now,
2681                  * since no one can access it.
2682                  */
2683                 carp_if_free(ocif);
2684         }
2685
2686         vha->vha_iaback = ia_if;
2687         sc->sc_naddrs++;
2688
2689         if (own) {
2690                 vha->vha_flags |= CARP_VHAF_OWNER;
2691
2692                 /* XXX save user configured advskew? */
2693                 sc->sc_advskew = 0;
2694         }
2695
2696         carp_addroute_vhaddr(sc, vha);
2697
2698         /*
2699          * Join the multicast group only after the backing interface
2700          * has been hooked with the CARP interface.
2701          */
2702         KASSERT(imo->imo_multicast_ifp == NULL ||
2703                 imo->imo_multicast_ifp == &sc->sc_if,
2704                 ("%s didn't leave mcast group on %s\n",
2705                  sc->sc_if.if_xname, imo->imo_multicast_ifp->if_xname));
2706
2707         if (imo->imo_num_memberships == 0) {
2708                 struct in_addr addr;
2709
2710                 addr.s_addr = htonl(INADDR_CARP_GROUP);
2711                 imo->imo_membership[0] = in_addmulti(&addr, &sc->sc_if);
2712                 if (imo->imo_membership[0] == NULL) {
2713                         carp_deactivate_vhaddr(sc, vha, FALSE);
2714                         return ENOBUFS;
2715                 }
2716
2717                 imo->imo_num_memberships++;
2718                 imo->imo_multicast_ifp = &sc->sc_if;
2719                 imo->imo_multicast_ttl = CARP_DFLTTL;
2720                 imo->imo_multicast_loop = 0;
2721         }
2722
2723         carp_hmac_prepare(sc);
2724         carp_set_state(sc, INIT);
2725         carp_setrun(sc, 0);
2726         return 0;
2727 }
2728
2729 static void
2730 carp_deactivate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
2731     boolean_t del_iaback)
2732 {
2733         KKASSERT(vha->vha_ia != NULL);
2734
2735         carp_hmac_prepare(sc);
2736
2737         if (vha->vha_iaback == NULL) {
2738                 KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0,
2739                         ("inactive vhaddr %p is the address owner\n", vha));
2740                 return;
2741         }
2742
2743         vha->vha_flags &= ~CARP_VHAF_OWNER;
2744         carp_delroute_vhaddr(sc, vha, del_iaback);
2745
2746         KKASSERT(sc->sc_naddrs > 0);
2747         vha->vha_iaback = NULL;
2748         sc->sc_naddrs--;
2749         if (!sc->sc_naddrs) {
2750                 if (sc->sc_naddrs6) {
2751                         carp_multicast_cleanup(sc);
2752                         sc->sc_ia = NULL;
2753                 } else {
2754                         carp_detach(sc, FALSE, del_iaback);
2755                 }
2756         }
2757 }
2758
2759 static void
2760 carp_link_addrs(struct carp_softc *sc, struct ifnet *ifp, struct ifaddr *ifa_if)
2761 {
2762         struct carp_vhaddr *vha;
2763         struct in_ifaddr *ia_if;
2764
2765         KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET);
2766         ia_if = ifatoia(ifa_if);
2767
2768         /*
2769          * Test each inactive vhaddr against the newly added address.
2770          * If the newly added address could be the backing address,
2771          * then activate the matching vhaddr.
2772          */
2773         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
2774                 const struct in_ifaddr *ia;
2775                 u_long iaddr;
2776                 int own;
2777
2778                 if (vha->vha_iaback != NULL)
2779                         continue;
2780
2781                 ia = vha->vha_ia;
2782                 iaddr = ntohl(ia->ia_addr.sin_addr.s_addr);
2783
2784                 if ((iaddr & ia_if->ia_subnetmask) != ia_if->ia_subnet)
2785                         continue;
2786
2787                 own = 0;
2788                 if (ia->ia_addr.sin_addr.s_addr ==
2789                     ia_if->ia_addr.sin_addr.s_addr)
2790                         own = 1;
2791
2792                 carp_activate_vhaddr(sc, vha, ifp, ia_if, own);
2793         }
2794 }
2795
2796 static void
2797 carp_unlink_addrs(struct carp_softc *sc, struct ifnet *ifp,
2798                   struct ifaddr *ifa_if)
2799 {
2800         struct carp_vhaddr *vha;
2801         struct in_ifaddr *ia_if;
2802
2803         KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET);
2804         ia_if = ifatoia(ifa_if);
2805
2806         /*
2807          * Ad src address is deleted; set it to NULL.
2808          * Following loop will try pick up a new ad src address
2809          * if one of the vhaddr could retain its backing address.
2810          */
2811         if (sc->sc_ia == ia_if)
2812                 sc->sc_ia = NULL;
2813
2814         /*
2815          * Test each active vhaddr against the deleted address.
2816          * If the deleted address is vhaddr address's backing
2817          * address, then deactivate the vhaddr.
2818          */
2819         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
2820                 if (vha->vha_iaback == NULL)
2821                         continue;
2822
2823                 if (vha->vha_iaback == ia_if)
2824                         carp_deactivate_vhaddr(sc, vha, TRUE);
2825                 else if (sc->sc_ia == NULL)
2826                         sc->sc_ia = vha->vha_iaback;
2827         }
2828 }
2829
2830 static void
2831 carp_update_addrs(struct carp_softc *sc, struct ifaddr *ifa_del)
2832 {
2833         struct carp_vhaddr *vha;
2834
2835         KKASSERT(sc->sc_carpdev == NULL);
2836
2837         TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
2838                 carp_config_vhaddr(sc, vha, ifatoia(ifa_del));
2839 }
2840
2841 static void
2842 carp_ifaddr(void *arg __unused, struct ifnet *ifp,
2843             enum ifaddr_event event, struct ifaddr *ifa)
2844 {
2845         struct carp_softc *sc;
2846
2847         if (ifa->ifa_addr->sa_family != AF_INET)
2848                 return;
2849
2850         KASSERT(&curthread->td_msgport == cpu_portfn(0),
2851             ("not in netisr0"));
2852
2853         if (ifp->if_type == IFT_CARP) {
2854                 /*
2855                  * Address is changed on carp(4) interface
2856                  */
2857                 switch (event) {
2858                 case IFADDR_EVENT_ADD:
2859                         carp_add_addr(ifp->if_softc, ifa);
2860                         break;
2861
2862                 case IFADDR_EVENT_CHANGE:
2863                         carp_config_addr(ifp->if_softc, ifa);
2864                         break;
2865
2866                 case IFADDR_EVENT_DELETE:
2867                         carp_del_addr(ifp->if_softc, ifa);
2868                         break;
2869                 }
2870                 return;
2871         }
2872
2873         /*
2874          * Address is changed on non-carp(4) interface
2875          */
2876         if ((ifp->if_flags & IFF_MULTICAST) == 0)
2877                 return;
2878
2879         LIST_FOREACH(sc, &carpif_list, sc_next) {
2880                 if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp) {
2881                         /* Not the parent iface; skip */
2882                         continue;
2883                 }
2884
2885                 switch (event) {
2886                 case IFADDR_EVENT_ADD:
2887                         carp_link_addrs(sc, ifp, ifa);
2888                         break;
2889
2890                 case IFADDR_EVENT_DELETE:
2891                         if (sc->sc_carpdev != NULL) {
2892                                 carp_unlink_addrs(sc, ifp, ifa);
2893                                 if (sc->sc_carpdev == NULL) {
2894                                         /*
2895                                          * We no longer have the parent
2896                                          * interface, however, certain
2897                                          * virtual addresses, which are
2898                                          * not used because they can't
2899                                          * match the previous parent
2900                                          * interface's addresses, may now
2901                                          * match different interface's
2902                                          * addresses.
2903                                          */
2904                                         carp_update_addrs(sc, ifa);
2905                                 }
2906                         } else {
2907                                 /*
2908                                  * The carp(4) interface didn't have a
2909                                  * parent iface, so it is not possible
2910                                  * that it will contain any address to
2911                                  * be unlinked.
2912                                  */
2913                         }
2914                         break;
2915
2916                 case IFADDR_EVENT_CHANGE:
2917                         if (sc->sc_carpdev == NULL) {
2918                                 /*
2919                                  * The carp(4) interface didn't have a
2920                                  * parent iface, so it is not possible
2921                                  * that it will contain any address to
2922                                  * be updated.
2923                                  */
2924                                 carp_link_addrs(sc, ifp, ifa);
2925                         } else {
2926                                 /*
2927                                  * First try breaking tie with the old
2928                                  * address.  Then see whether we could
2929                                  * link certain vhaddr to the new address.
2930                                  * If that fails, i.e. carpdev is NULL,
2931                                  * we try a global update.
2932                                  *
2933                                  * NOTE: The above order is critical.
2934                                  */
2935                                 carp_unlink_addrs(sc, ifp, ifa);
2936                                 carp_link_addrs(sc, ifp, ifa);
2937                                 if (sc->sc_carpdev == NULL) {
2938                                         /*
2939                                          * See the comment in the above
2940                                          * IFADDR_EVENT_DELETE block.
2941                                          */
2942                                         carp_update_addrs(sc, NULL);
2943                                 }
2944                         }
2945                         break;
2946                 }
2947         }
2948 }
2949
2950 void
2951 carp_proto_ctlinput(netmsg_t msg)
2952 {
2953         int cmd = msg->ctlinput.nm_cmd;
2954         struct sockaddr *sa = msg->ctlinput.nm_arg;
2955         struct in_ifaddr_container *iac;
2956
2957         TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
2958                 struct in_ifaddr *ia = iac->ia;
2959                 struct ifnet *ifp = ia->ia_ifp;
2960
2961                 if (ifp->if_type == IFT_CARP)
2962                         continue;
2963
2964                 if (ia->ia_ifa.ifa_addr == sa) {
2965                         if (cmd == PRC_IFDOWN) {
2966                                 carp_ifaddr(NULL, ifp, IFADDR_EVENT_DELETE,
2967                                     &ia->ia_ifa);
2968                         } else if (cmd == PRC_IFUP) {
2969                                 carp_ifaddr(NULL, ifp, IFADDR_EVENT_ADD,
2970                                     &ia->ia_ifa);
2971                         }
2972                         break;
2973                 }
2974         }
2975
2976         lwkt_replymsg(&msg->lmsg, 0);
2977 }
2978
2979 struct ifnet *
2980 carp_parent(struct ifnet *cifp)
2981 {
2982         struct carp_softc *sc;
2983
2984         KKASSERT(cifp->if_type == IFT_CARP);
2985         sc = cifp->if_softc;
2986
2987         return sc->sc_carpdev;
2988 }
2989
2990 #define rtinitflags(x) \
2991         (((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) \
2992                  ? RTF_HOST : 0)
2993
2994 static int
2995 carp_addroute_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha)
2996 {
2997         struct in_ifaddr *ia, *iaback;
2998         int error;
2999
3000         if (sc->sc_state != MASTER)
3001                 return 0;
3002
3003         ia = vha->vha_ia;
3004         KKASSERT(ia != NULL);
3005
3006         iaback = vha->vha_iaback;
3007         KKASSERT(iaback != NULL);
3008
3009         rtinit(&iaback->ia_ifa, RTM_DELETE, rtinitflags(iaback));
3010         in_ifadown(&iaback->ia_ifa, 1);
3011         iaback->ia_flags &= ~IFA_ROUTE;
3012
3013         error = rtinit(&ia->ia_ifa, RTM_ADD, rtinitflags(ia) | RTF_UP);
3014         if (!error)
3015                 ia->ia_flags |= IFA_ROUTE;
3016         return error;
3017 }
3018
3019 static void
3020 carp_delroute_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
3021     boolean_t del_iaback)
3022 {
3023         struct in_ifaddr *ia, *iaback;
3024
3025         ia = vha->vha_ia;
3026         KKASSERT(ia != NULL);
3027
3028         iaback = vha->vha_iaback;
3029         KKASSERT(iaback != NULL);
3030
3031         rtinit(&ia->ia_ifa, RTM_DELETE, rtinitflags(ia));
3032         in_ifadown(&ia->ia_ifa, 1);
3033         ia->ia_flags &= ~IFA_ROUTE;
3034
3035         if (!del_iaback && (iaback->ia_ifp->if_flags & IFF_UP)) {
3036                 int error;
3037
3038                 error = rtinit(&iaback->ia_ifa, RTM_ADD,
3039                     rtinitflags(iaback) | RTF_UP);
3040                 if (!error)
3041                         iaback->ia_flags |= IFA_ROUTE;
3042         }
3043 }
3044
3045 static int
3046 carp_modevent(module_t mod, int type, void *data)
3047 {
3048         switch (type) {
3049         case MOD_LOAD:
3050                 LIST_INIT(&carpif_list);
3051                 carp_ifdetach_event =
3052                 EVENTHANDLER_REGISTER(ifnet_detach_event, carp_ifdetach, NULL,
3053                                       EVENTHANDLER_PRI_ANY);
3054                 carp_ifaddr_event =
3055                 EVENTHANDLER_REGISTER(ifaddr_event, carp_ifaddr, NULL,
3056                                       EVENTHANDLER_PRI_FIRST);
3057                 if_clone_attach(&carp_cloner);
3058                 break;
3059
3060         case MOD_UNLOAD:
3061                 EVENTHANDLER_DEREGISTER(ifnet_detach_event,
3062                                         carp_ifdetach_event);
3063                 EVENTHANDLER_DEREGISTER(ifaddr_event,
3064                                         carp_ifaddr_event);
3065                 if_clone_detach(&carp_cloner);
3066                 break;
3067
3068         default:
3069                 return (EINVAL);
3070         }
3071         return (0);
3072 }
3073
3074 static moduledata_t carp_mod = {
3075         "carp",
3076         carp_modevent,
3077         0
3078 };
3079 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);