Remove unused macro
[dragonfly.git] / sys / netinet / ip_carp.c
1 /*
2  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
3  * Copyright (c) 2003 Ryan McBride. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
18  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
24  * THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 /*
27  * $FreeBSD: src/sys/netinet/ip_carp.c,v 1.48 2007/02/02 09:39:09 glebius Exp $
28  * $DragonFly: src/sys/netinet/ip_carp.c,v 1.10 2008/07/27 10:06:57 sephe Exp $
29  */
30
31 #include "opt_carp.h"
32 #include "opt_inet.h"
33 #include "opt_inet6.h"
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/in_cksum.h>
39 #include <sys/limits.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/time.h>
43 #include <sys/proc.h>
44 #include <sys/sockio.h>
45 #include <sys/socket.h>
46 #include <sys/sysctl.h>
47 #include <sys/syslog.h>
48
49 #include <machine/stdarg.h>
50 #include <crypto/sha1.h>
51
52 #include <net/bpf.h>
53 #include <net/ethernet.h>
54 #include <net/if.h>
55 #include <net/if_dl.h>
56 #include <net/if_types.h>
57 #include <net/route.h>
58 #include <net/if_clone.h>
59
60 #ifdef INET
61 #include <netinet/in.h>
62 #include <netinet/in_var.h>
63 #include <netinet/in_systm.h>
64 #include <netinet/ip.h>
65 #include <netinet/ip_var.h>
66 #include <netinet/if_ether.h>
67 #endif
68
69 #ifdef INET6
70 #include <netinet/icmp6.h>
71 #include <netinet/ip6.h>
72 #include <netinet6/ip6_var.h>
73 #include <netinet6/scope6_var.h>
74 #include <netinet6/nd6.h>
75 #endif
76
77 #include <netinet/ip_carp.h>
78
79 #define CARP_IFNAME     "carp"
80
81 struct carp_softc {
82         struct ifnet             sc_if;
83         struct ifnet            *sc_ifp;        /* compat shim */
84         struct ifnet            *sc_carpdev;    /* parent interface */
85         struct in_ifaddr        *sc_ia;         /* primary iface address */
86         struct ip_moptions       sc_imo;
87 #ifdef INET6
88         struct in6_ifaddr       *sc_ia6;        /* primary iface address v6 */
89         struct ip6_moptions      sc_im6o;
90 #endif /* INET6 */
91         TAILQ_ENTRY(carp_softc)  sc_list;
92
93         enum { INIT = 0, BACKUP, MASTER }
94                                  sc_state;
95
96         int                      sc_flags_backup;
97         int                      sc_suppress;
98
99         int                      sc_sendad_errors;
100 #define CARP_SENDAD_MAX_ERRORS  3
101         int                      sc_sendad_success;
102 #define CARP_SENDAD_MIN_SUCCESS 3
103
104         int                      sc_vhid;
105         int                      sc_advskew;
106         int                      sc_naddrs;
107         int                      sc_naddrs6;
108         int                      sc_advbase;    /* seconds */
109         int                      sc_init_counter;
110         uint64_t                 sc_counter;
111
112         /* authentication */
113 #define CARP_HMAC_PAD   64
114         unsigned char            sc_key[CARP_KEY_LEN];
115         unsigned char            sc_pad[CARP_HMAC_PAD];
116         SHA1_CTX                 sc_sha1;
117
118         struct callout           sc_ad_tmo;     /* advertisement timeout */
119         struct callout           sc_md_tmo;     /* master down timeout */
120         struct callout           sc_md6_tmo;    /* master down timeout */
121
122         LIST_ENTRY(carp_softc)   sc_next;       /* Interface clue */
123 };
124 #define SC2IFP(sc)      ((sc)->sc_ifp)
125
126 struct carp_if {
127         TAILQ_HEAD(, carp_softc) vhif_vrs;
128         int             vhif_nvrs;
129
130         struct ifnet    *vhif_ifp;
131 };
132
133 enum    { CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
134
135 SYSCTL_DECL(_net_inet_carp);
136
137 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */
138 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
139     &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
140 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
141     &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
142 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
143     &carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
144 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
145     &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
146
147 static int carp_suppress_preempt = 0;
148 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
149     &carp_suppress_preempt, 0, "Preemption is suppressed");
150
151 static struct carpstats carpstats;
152 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
153     &carpstats, carpstats,
154     "CARP statistics (struct carpstats, netinet/ip_carp.h)");
155
156 #define CARP_LOG(...)   do {                            \
157         if (carp_opts[CARPCTL_LOG] > 0)                 \
158                 log(LOG_INFO, __VA_ARGS__);             \
159 } while (0)
160
161 #define CARP_DEBUG(...) do {                            \
162         if (carp_opts[CARPCTL_LOG] > 1)                 \
163                 log(LOG_DEBUG, __VA_ARGS__);            \
164 } while (0)
165
166 static void     carp_hmac_prepare(struct carp_softc *);
167 static void     carp_hmac_generate(struct carp_softc *, uint32_t *,
168                     unsigned char *);
169 static int      carp_hmac_verify(struct carp_softc *, uint32_t *,
170                     unsigned char *);
171 static void     carp_setroute(struct carp_softc *, int);
172 static void     carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
173 static int      carp_clone_create(struct if_clone *, int);
174 static void     carp_clone_destroy(struct ifnet *);
175 static void     carpdetach(struct carp_softc *, int);
176 static int      carp_prepare_ad(struct mbuf *, struct carp_softc *,
177                     struct carp_header *);
178 static void     carp_send_ad_all(void);
179 static void     carp_send_ad(void *);
180 static void     carp_send_ad_locked(struct carp_softc *);
181 static void     carp_send_arp(struct carp_softc *);
182 static void     carp_master_down(void *);
183 static void     carp_master_down_locked(struct carp_softc *);
184 static int      carp_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
185 static int      carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *,
186                     struct rtentry *);
187 static void     carp_start(struct ifnet *);
188 static void     carp_setrun(struct carp_softc *, sa_family_t);
189 static void     carp_set_state(struct carp_softc *, int);
190 static int      carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
191
192 static void     carp_multicast_cleanup(struct carp_softc *);
193 static int      carp_set_addr(struct carp_softc *, struct sockaddr_in *);
194 static int      carp_del_addr(struct carp_softc *, struct sockaddr_in *);
195 static void     carp_carpdev_state_locked(struct carp_if *);
196 static void     carp_sc_state_locked(struct carp_softc *);
197 #ifdef INET6
198 static void     carp_send_na(struct carp_softc *);
199 static int      carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
200 static int      carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
201 static void     carp_multicast6_cleanup(struct carp_softc *);
202 #endif
203
204 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
205
206 static LIST_HEAD(, carp_softc) carpif_list;
207
208 static struct if_clone carp_cloner =
209 IF_CLONE_INITIALIZER(CARP_IFNAME, carp_clone_create, carp_clone_destroy,
210                      0, IF_MAXUNIT);
211
212 static eventhandler_tag carp_ifdetach_event;
213
214 static void
215 carp_hmac_prepare(struct carp_softc *sc)
216 {
217         uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
218         uint8_t vhid = sc->sc_vhid & 0xff;
219         struct ifaddr_container *ifac;
220         int i;
221 #ifdef INET6
222         struct in6_addr in6;
223 #endif
224
225         /* XXX: possible race here */
226
227         /* compute ipad from key */
228         bzero(sc->sc_pad, sizeof(sc->sc_pad));
229         bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
230         for (i = 0; i < sizeof(sc->sc_pad); i++)
231                 sc->sc_pad[i] ^= 0x36;
232
233         /* precompute first part of inner hash */
234         SHA1Init(&sc->sc_sha1);
235         SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
236         SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
237         SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
238         SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
239 #ifdef INET
240         TAILQ_FOREACH(ifac, &SC2IFP(sc)->if_addrheads[mycpuid], ifa_link) {
241                 struct ifaddr *ifa = ifac->ifa;
242
243                 if (ifa->ifa_addr->sa_family == AF_INET)
244                         SHA1Update(&sc->sc_sha1,
245                             (void *)&ifatoia(ifa)->ia_addr.sin_addr.s_addr,
246                             sizeof(struct in_addr));
247         }
248 #endif /* INET */
249 #ifdef INET6
250         TAILQ_FOREACH(ifac, &SC2IFP(sc)->if_addrheads[mycpuid], ifa_link) {
251                 struct ifaddr *ifa = ifac->ifa;
252
253                 if (ifa->ifa_addr->sa_family == AF_INET6) {
254                         in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
255                         in6_clearscope(&in6);
256                         SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6));
257                 }
258         }
259 #endif /* INET6 */
260
261         /* convert ipad to opad */
262         for (i = 0; i < sizeof(sc->sc_pad); i++)
263                 sc->sc_pad[i] ^= 0x36 ^ 0x5c;
264 }
265
266 static void
267 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
268     unsigned char md[20])
269 {
270         SHA1_CTX sha1ctx;
271
272         /* fetch first half of inner hash */
273         bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
274
275         SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
276         SHA1Final(md, &sha1ctx);
277
278         /* outer hash */
279         SHA1Init(&sha1ctx);
280         SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
281         SHA1Update(&sha1ctx, md, 20);
282         SHA1Final(md, &sha1ctx);
283 }
284
285 static int
286 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
287     unsigned char md[20])
288 {
289         unsigned char md2[20];
290
291         carp_hmac_generate(sc, counter, md2);
292         return (bcmp(md, md2, sizeof(md2)));
293 }
294
295 static void
296 carp_setroute(struct carp_softc *sc, int cmd)
297 {
298         struct ifaddr_container *ifac;
299
300         crit_enter();
301         TAILQ_FOREACH(ifac, &SC2IFP(sc)->if_addrheads[mycpuid], ifa_link) {
302                 struct ifaddr *ifa = ifac->ifa;
303
304                 if (ifa->ifa_addr->sa_family == AF_INET &&
305                     sc->sc_carpdev != NULL) {
306                         int count = carp_addrcount(
307                             (struct carp_if *)sc->sc_carpdev->if_carp,
308                             ifatoia(ifa), CARP_COUNT_MASTER);
309
310                         if ((cmd == RTM_ADD && count == 1) ||
311                             (cmd == RTM_DELETE && count == 0))
312                                 rtinit(ifa, cmd, RTF_UP | RTF_HOST);
313                 }
314 #ifdef INET6
315                 if (ifa->ifa_addr->sa_family == AF_INET6) {
316                         if (cmd == RTM_ADD)
317                                 in6_ifaddloop(ifa);
318                         else
319                                 in6_ifremloop(ifa);
320                 }
321 #endif /* INET6 */
322         }
323         crit_exit();
324 }
325
326 static int
327 carp_clone_create(struct if_clone *ifc, int unit)
328 {
329         struct carp_softc *sc;
330         struct ifnet *ifp;
331
332         sc = kmalloc(sizeof(*sc), M_CARP, M_WAITOK | M_ZERO);
333         ifp = sc->sc_ifp = &sc->sc_if;
334
335         sc->sc_flags_backup = 0;
336         sc->sc_suppress = 0;
337         sc->sc_advbase = CARP_DFLTINTV;
338         sc->sc_vhid = -1;       /* required setting */
339         sc->sc_advskew = 0;
340         sc->sc_init_counter = 1;
341         sc->sc_naddrs = sc->sc_naddrs6 = 0;
342
343 #ifdef INET6
344         sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
345 #endif
346
347         callout_init(&sc->sc_ad_tmo);
348         callout_init(&sc->sc_md_tmo);
349         callout_init(&sc->sc_md6_tmo);
350
351         ifp->if_softc = sc;
352         if_initname(ifp, CARP_IFNAME, unit);    
353         ifp->if_mtu = ETHERMTU;
354         ifp->if_flags = IFF_LOOPBACK;
355         ifp->if_ioctl = carp_ioctl;
356         ifp->if_output = carp_looutput;
357         ifp->if_start = carp_start;
358         ifp->if_type = IFT_CARP;
359         ifp->if_snd.ifq_maxlen = ifqmaxlen;
360         ifp->if_hdrlen = 0;
361         if_attach(ifp, NULL);
362         bpfattach(ifp, DLT_NULL, sizeof(u_int));
363
364         crit_enter();
365         LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
366         crit_exit();
367
368         return (0);
369 }
370
371 static void
372 carp_clone_destroy(struct ifnet *ifp)
373 {
374         struct carp_softc *sc = ifp->if_softc;
375
376         carpdetach(sc, 1);
377
378         crit_enter();
379         LIST_REMOVE(sc, sc_next);
380         crit_exit();
381         bpfdetach(ifp);
382         if_detach(ifp);
383         kfree(sc, M_CARP);
384 }
385
386 /*
387  * This function can be called on CARP interface destroy path,
388  * and in case of the removal of the underlying interface as
389  * well. We differentiate these two cases. In the latter case
390  * we do not cleanup our multicast memberships, since they
391  * are already freed.
392  */
393 static void
394 carpdetach(struct carp_softc *sc, int unlock)
395 {
396         struct carp_if *cif;
397
398         callout_stop(&sc->sc_ad_tmo);
399         callout_stop(&sc->sc_md_tmo);
400         callout_stop(&sc->sc_md6_tmo);
401
402         if (sc->sc_suppress)
403                 carp_suppress_preempt--;
404         sc->sc_suppress = 0;
405
406         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
407                 carp_suppress_preempt--;
408         sc->sc_sendad_errors = 0;
409
410         carp_set_state(sc, INIT);
411         SC2IFP(sc)->if_flags &= ~IFF_UP;
412         carp_setrun(sc, 0);
413         if (unlock)
414                 carp_multicast_cleanup(sc);
415 #ifdef INET6
416         carp_multicast6_cleanup(sc);
417 #endif
418
419         if (sc->sc_carpdev != NULL) {
420                 cif = (struct carp_if *)sc->sc_carpdev->if_carp;
421                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
422                 if (!--cif->vhif_nvrs) {
423                         ifpromisc(sc->sc_carpdev, 0);
424                         sc->sc_carpdev->if_carp = NULL;
425                         FREE(cif, M_IFADDR);
426                 }
427                 sc->sc_carpdev = NULL;
428         }
429 }
430
431 /* Detach an interface from the carp. */
432 static void
433 carp_ifdetach(void *arg __unused, struct ifnet *ifp)
434 {
435         struct carp_if *cif = (struct carp_if *)ifp->if_carp;
436         struct carp_softc *sc, *nextsc;
437
438         if (cif == NULL)
439                 return;
440
441         for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
442                 nextsc = TAILQ_NEXT(sc, sc_list);
443                 carpdetach(sc, 0);
444         }
445 }
446
447 /*
448  * process input packet.
449  * we have rearranged checks order compared to the rfc,
450  * but it seems more efficient this way or not possible otherwise.
451  */
452 void
453 carp_input(struct mbuf *m, ...)
454 {
455         struct ip *ip = mtod(m, struct ip *);
456         struct carp_header *ch;
457         int len, iphlen;
458         __va_list ap;
459
460         __va_start(ap, m);
461         iphlen = __va_arg(ap, int);
462         __va_end(ap);
463
464         carpstats.carps_ipackets++;
465
466         if (!carp_opts[CARPCTL_ALLOW]) {
467                 m_freem(m);
468                 return;
469         }
470
471         /* Check if received on a valid carp interface */
472         if (m->m_pkthdr.rcvif->if_carp == NULL) {
473                 carpstats.carps_badif++;
474                 CARP_LOG("carp_input: packet received on non-carp "
475                     "interface: %s\n",
476                     m->m_pkthdr.rcvif->if_xname);
477                 m_freem(m);
478                 return;
479         }
480
481         /* Verify that the IP TTL is CARP_DFLTTL. */
482         if (ip->ip_ttl != CARP_DFLTTL) {
483                 carpstats.carps_badttl++;
484                 CARP_LOG("carp_input: received ttl %d != %d on %s\n",
485                     ip->ip_ttl, CARP_DFLTTL,
486                     m->m_pkthdr.rcvif->if_xname);
487                 m_freem(m);
488                 return;
489         }
490
491         /* Minimal CARP packet size */
492         len = iphlen + sizeof(*ch);
493
494         /*
495          * Verify that the received packet length is
496          * not less than the CARP header
497          */
498         if (m->m_pkthdr.len < len) {
499                 carpstats.carps_badlen++;
500                 CARP_LOG("packet too short %d on %s\n", m->m_pkthdr.len,
501                          m->m_pkthdr.rcvif->if_xname);
502                 m_freem(m);
503                 return;
504         }
505
506         /* Make sure that CARP header is contiguous */
507         if (len < m->m_len) {
508                 m = m_pullup(m, len);
509                 if (m == NULL) {
510                         carpstats.carps_hdrops++;
511                         CARP_LOG("carp_input: m_pullup failed\n");
512                         return;
513                 }
514                 ip = mtod(m, struct ip *);
515         }
516         ch = (struct carp_header *)((uint8_t *)ip + iphlen);
517
518         /* Verify the CARP checksum */
519         if (in_cksum_skip(m, len, iphlen)) {
520                 carpstats.carps_badsum++;
521                 CARP_LOG("carp_input: checksum failed on %s\n",
522                     m->m_pkthdr.rcvif->if_xname);
523                 m_freem(m);
524                 return;
525         }
526         carp_input_c(m, ch, AF_INET);
527 }
528
529 #ifdef INET6
530 int
531 carp6_input(struct mbuf **mp, int *offp, int proto)
532 {
533         struct mbuf *m = *mp;
534         struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
535         struct carp_header *ch;
536         u_int len;
537
538         carpstats.carps_ipackets6++;
539
540         if (!carp_opts[CARPCTL_ALLOW]) {
541                 m_freem(m);
542                 return (IPPROTO_DONE);
543         }
544
545         /* check if received on a valid carp interface */
546         if (m->m_pkthdr.rcvif->if_carp == NULL) {
547                 carpstats.carps_badif++;
548                 CARP_LOG("carp6_input: packet received on non-carp "
549                     "interface: %s\n",
550                     m->m_pkthdr.rcvif->if_xname);
551                 m_freem(m);
552                 return (IPPROTO_DONE);
553         }
554
555         /* verify that the IP TTL is 255 */
556         if (ip6->ip6_hlim != CARP_DFLTTL) {
557                 carpstats.carps_badttl++;
558                 CARP_LOG("carp6_input: received ttl %d != 255 on %s\n",
559                     ip6->ip6_hlim,
560                     m->m_pkthdr.rcvif->if_xname);
561                 m_freem(m);
562                 return (IPPROTO_DONE);
563         }
564
565         /* verify that we have a complete carp packet */
566         len = m->m_len;
567         IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
568         if (ch == NULL) {
569                 carpstats.carps_badlen++;
570                 CARP_LOG("carp6_input: packet size %u too small\n", len);
571                 return (IPPROTO_DONE);
572         }
573
574         /* verify the CARP checksum */
575         if (in_cksum_range(m, 0, *offp, sizeof(*ch))) {
576                 carpstats.carps_badsum++;
577                 CARP_LOG("carp6_input: checksum failed, on %s\n",
578                     m->m_pkthdr.rcvif->if_xname);
579                 m_freem(m);
580                 return (IPPROTO_DONE);
581         }
582
583         carp_input_c(m, ch, AF_INET6);
584         return (IPPROTO_DONE);
585 }
586 #endif /* INET6 */
587
588 static void
589 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
590 {
591         struct ifnet *ifp = m->m_pkthdr.rcvif;
592         struct carp_softc *sc;
593         uint64_t tmp_counter;
594         struct timeval sc_tv, ch_tv;
595
596         /* verify that the VHID is valid on the receiving interface */
597         TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list)
598                 if (sc->sc_vhid == ch->carp_vhid)
599                         break;
600
601         if (!sc ||
602             !((SC2IFP(sc)->if_flags & IFF_UP) &&
603               (SC2IFP(sc)->if_flags & IFF_RUNNING))) {
604                 carpstats.carps_badvhid++;
605                 m_freem(m);
606                 return;
607         }
608
609         getmicrotime(&SC2IFP(sc)->if_lastchange);
610         SC2IFP(sc)->if_ipackets++;
611         SC2IFP(sc)->if_ibytes += m->m_pkthdr.len;
612
613         if (SC2IFP(sc)->if_bpf) {
614                 struct ip *ip = mtod(m, struct ip *);
615
616                 /* BPF wants net byte order */
617                 ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2));
618                 ip->ip_off = htons(ip->ip_off);
619                 bpf_mtap(SC2IFP(sc)->if_bpf, m);
620         }
621
622         /* verify the CARP version. */
623         if (ch->carp_version != CARP_VERSION) {
624                 carpstats.carps_badver++;
625                 SC2IFP(sc)->if_ierrors++;
626                 CARP_LOG("%s; invalid version %d\n",
627                     SC2IFP(sc)->if_xname,
628                     ch->carp_version);
629                 m_freem(m);
630                 return;
631         }
632
633         /* verify the hash */
634         if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
635                 carpstats.carps_badauth++;
636                 SC2IFP(sc)->if_ierrors++;
637                 CARP_LOG("%s: incorrect hash\n", SC2IFP(sc)->if_xname);
638                 m_freem(m);
639                 return;
640         }
641
642         tmp_counter = ntohl(ch->carp_counter[0]);
643         tmp_counter = tmp_counter<<32;
644         tmp_counter += ntohl(ch->carp_counter[1]);
645
646         /* XXX Replay protection goes here */
647
648         sc->sc_init_counter = 0;
649         sc->sc_counter = tmp_counter;
650
651         sc_tv.tv_sec = sc->sc_advbase;
652         if (carp_suppress_preempt && sc->sc_advskew <  240)
653                 sc_tv.tv_usec = 240 * 1000000 / 256;
654         else
655                 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
656         ch_tv.tv_sec = ch->carp_advbase;
657         ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
658
659         switch (sc->sc_state) {
660         case INIT:
661                 break;
662
663         case MASTER:
664                 /*
665                  * If we receive an advertisement from a master who's going to
666                  * be more frequent than us, go into BACKUP state.
667                  */
668                 if (timevalcmp(&sc_tv, &ch_tv, >) ||
669                     timevalcmp(&sc_tv, &ch_tv, ==)) {
670                         callout_stop(&sc->sc_ad_tmo);
671                         CARP_DEBUG("%s: MASTER -> BACKUP "
672                            "(more frequent advertisement received)\n",
673                            SC2IFP(sc)->if_xname);
674                         carp_set_state(sc, BACKUP);
675                         carp_setrun(sc, 0);
676                         carp_setroute(sc, RTM_DELETE);
677                 }
678                 break;
679
680         case BACKUP:
681                 /*
682                  * If we're pre-empting masters who advertise slower than us,
683                  * and this one claims to be slower, treat him as down.
684                  */
685                 if (carp_opts[CARPCTL_PREEMPT] &&
686                     timevalcmp(&sc_tv, &ch_tv, <)) {
687                         CARP_DEBUG("%s: BACKUP -> MASTER "
688                             "(preempting a slower master)\n",
689                             SC2IFP(sc)->if_xname);
690                         carp_master_down_locked(sc);
691                         break;
692                 }
693
694                 /*
695                  *  If the master is going to advertise at such a low frequency
696                  *  that he's guaranteed to time out, we'd might as well just
697                  *  treat him as timed out now.
698                  */
699                 sc_tv.tv_sec = sc->sc_advbase * 3;
700                 if (timevalcmp(&sc_tv, &ch_tv, <)) {
701                         CARP_DEBUG("%s: BACKUP -> MASTER "
702                             "(master timed out)\n",
703                             SC2IFP(sc)->if_xname);
704                         carp_master_down_locked(sc);
705                         break;
706                 }
707
708                 /*
709                  * Otherwise, we reset the counter and wait for the next
710                  * advertisement.
711                  */
712                 carp_setrun(sc, af);
713                 break;
714         }
715         m_freem(m);
716 }
717
718 static int
719 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
720 {
721         struct m_tag *mtag;
722         struct ifnet *ifp = SC2IFP(sc);
723
724         if (sc->sc_init_counter) {
725                 /* this could also be seconds since unix epoch */
726                 sc->sc_counter = karc4random();
727                 sc->sc_counter = sc->sc_counter << 32;
728                 sc->sc_counter += karc4random();
729         } else {
730                 sc->sc_counter++;
731         }
732
733         ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
734         ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
735
736         carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
737
738         /* Tag packet for carp_output */
739         mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), MB_DONTWAIT);
740         if (mtag == NULL) {
741                 m_freem(m);
742                 SC2IFP(sc)->if_oerrors++;
743                 return (ENOMEM);
744         }
745         bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *));
746         m_tag_prepend(m, mtag);
747
748         return (0);
749 }
750
751 static void
752 carp_send_ad_all(void)
753 {
754         struct carp_softc *sc;
755
756         LIST_FOREACH(sc, &carpif_list, sc_next) {
757                 if (sc->sc_carpdev == NULL)
758                         continue;
759
760                 if ((SC2IFP(sc)->if_flags & IFF_UP) &&
761                     (SC2IFP(sc)->if_flags & IFF_RUNNING) &&
762                     sc->sc_state == MASTER)
763                         carp_send_ad_locked(sc);
764         }
765 }
766
767 static void
768 carp_send_ad(void *v)
769 {
770         struct carp_softc *sc = v;
771
772         carp_send_ad_locked(sc);
773 }
774
775 static void
776 carp_send_ad_locked(struct carp_softc *sc)
777 {
778         struct carp_header ch;
779         struct timeval tv;
780         struct carp_header *ch_ptr;
781         struct mbuf *m;
782         int len, advbase, advskew;
783
784         /* bow out if we've lost our UPness or RUNNINGuiness */
785         if (!((SC2IFP(sc)->if_flags & IFF_UP) &&
786               (SC2IFP(sc)->if_flags & IFF_RUNNING))) {
787                 advbase = 255;
788                 advskew = 255;
789         } else {
790                 advbase = sc->sc_advbase;
791                 if (!carp_suppress_preempt || sc->sc_advskew > 240)
792                         advskew = sc->sc_advskew;
793                 else
794                         advskew = 240;
795                 tv.tv_sec = advbase;
796                 tv.tv_usec = advskew * 1000000 / 256;
797         }
798
799         ch.carp_version = CARP_VERSION;
800         ch.carp_type = CARP_ADVERTISEMENT;
801         ch.carp_vhid = sc->sc_vhid;
802         ch.carp_advbase = advbase;
803         ch.carp_advskew = advskew;
804         ch.carp_authlen = 7;    /* XXX DEFINE */
805         ch.carp_pad1 = 0;       /* must be zero */
806         ch.carp_cksum = 0;
807
808 #ifdef INET
809         if (sc->sc_ia) {
810                 struct ip *ip;
811
812                 MGETHDR(m, M_NOWAIT, MT_HEADER);
813                 if (m == NULL) {
814                         SC2IFP(sc)->if_oerrors++;
815                         carpstats.carps_onomem++;
816                         /* XXX maybe less ? */
817                         if (advbase != 255 || advskew != 255)
818                                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
819                                     carp_send_ad, sc);
820                         return;
821                 }
822                 len = sizeof(*ip) + sizeof(ch);
823                 m->m_pkthdr.len = len;
824                 m->m_pkthdr.rcvif = NULL;
825                 m->m_len = len;
826                 MH_ALIGN(m, m->m_len);
827                 m->m_flags |= M_MCAST;
828                 ip = mtod(m, struct ip *);
829                 ip->ip_v = IPVERSION;
830                 ip->ip_hl = sizeof(*ip) >> 2;
831                 ip->ip_tos = IPTOS_LOWDELAY;
832                 ip->ip_len = len;
833                 ip->ip_id = ip_newid();
834                 ip->ip_off = IP_DF;
835                 ip->ip_ttl = CARP_DFLTTL;
836                 ip->ip_p = IPPROTO_CARP;
837                 ip->ip_sum = 0;
838                 ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr;
839                 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
840
841                 ch_ptr = (struct carp_header *)(&ip[1]);
842                 bcopy(&ch, ch_ptr, sizeof(ch));
843                 if (carp_prepare_ad(m, sc, ch_ptr))
844                         return;
845                 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip));
846
847                 getmicrotime(&SC2IFP(sc)->if_lastchange);
848                 SC2IFP(sc)->if_opackets++;
849                 SC2IFP(sc)->if_obytes += len;
850                 carpstats.carps_opackets++;
851
852                 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) {
853                         SC2IFP(sc)->if_oerrors++;
854                         if (sc->sc_sendad_errors < INT_MAX)
855                                 sc->sc_sendad_errors++;
856                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
857                                 carp_suppress_preempt++;
858                                 if (carp_suppress_preempt == 1) {
859                                         carp_send_ad_all();
860                                 }
861                         }
862                         sc->sc_sendad_success = 0;
863                 } else {
864                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
865                                 if (++sc->sc_sendad_success >=
866                                     CARP_SENDAD_MIN_SUCCESS) {
867                                         carp_suppress_preempt--;
868                                         sc->sc_sendad_errors = 0;
869                                 }
870                         } else {
871                                 sc->sc_sendad_errors = 0;
872                         }
873                 }
874         }
875 #endif /* INET */
876 #ifdef INET6
877         if (sc->sc_ia6) {
878                 struct ip6_hdr *ip6;
879
880                 MGETHDR(m, M_NOWAIT, MT_HEADER);
881                 if (m == NULL) {
882                         SC2IFP(sc)->if_oerrors++;
883                         carpstats.carps_onomem++;
884                         /* XXX maybe less ? */
885                         if (advbase != 255 || advskew != 255)
886                                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
887                                     carp_send_ad, sc);
888                         return;
889                 }
890                 len = sizeof(*ip6) + sizeof(ch);
891                 m->m_pkthdr.len = len;
892                 m->m_pkthdr.rcvif = NULL;
893                 m->m_len = len;
894                 MH_ALIGN(m, m->m_len);
895                 m->m_flags |= M_MCAST;
896                 ip6 = mtod(m, struct ip6_hdr *);
897                 bzero(ip6, sizeof(*ip6));
898                 ip6->ip6_vfc |= IPV6_VERSION;
899                 ip6->ip6_hlim = CARP_DFLTTL;
900                 ip6->ip6_nxt = IPPROTO_CARP;
901                 bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src,
902                     sizeof(struct in6_addr));
903                 /* set the multicast destination */
904
905                 ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
906                 ip6->ip6_dst.s6_addr8[15] = 0x12;
907                 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
908                         SC2IFP(sc)->if_oerrors++;
909                         m_freem(m);
910                         CARP_LOG("%s: in6_setscope failed\n", __func__);
911                         return;
912                 }
913
914                 ch_ptr = (struct carp_header *)(&ip6[1]);
915                 bcopy(&ch, ch_ptr, sizeof(ch));
916                 if (carp_prepare_ad(m, sc, ch_ptr))
917                         return;
918                 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip6));
919
920                 getmicrotime(&SC2IFP(sc)->if_lastchange);
921                 SC2IFP(sc)->if_opackets++;
922                 SC2IFP(sc)->if_obytes += len;
923                 carpstats.carps_opackets6++;
924
925                 if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) {
926                         SC2IFP(sc)->if_oerrors++;
927                         if (sc->sc_sendad_errors < INT_MAX)
928                                 sc->sc_sendad_errors++;
929                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
930                                 carp_suppress_preempt++;
931                                 if (carp_suppress_preempt == 1) {
932                                         carp_send_ad_all();
933                                 }
934                         }
935                         sc->sc_sendad_success = 0;
936                 } else {
937                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
938                                 if (++sc->sc_sendad_success >=
939                                     CARP_SENDAD_MIN_SUCCESS) {
940                                         carp_suppress_preempt--;
941                                         sc->sc_sendad_errors = 0;
942                                 }
943                         } else {
944                                 sc->sc_sendad_errors = 0;
945                         }
946                 }
947         }
948 #endif /* INET6 */
949
950         if (advbase != 255 || advskew != 255)
951                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
952                     carp_send_ad, sc);
953 }
954
955 /*
956  * Broadcast a gratuitous ARP request containing
957  * the virtual router MAC address for each IP address
958  * associated with the virtual router.
959  */
960 static void
961 carp_send_arp(struct carp_softc *sc)
962 {
963         struct ifaddr_container *ifac;
964
965         TAILQ_FOREACH(ifac, &SC2IFP(sc)->if_addrheads[mycpuid], ifa_link) {
966                 struct ifaddr *ifa = ifac->ifa;
967
968                 if (ifa->ifa_addr->sa_family != AF_INET)
969                         continue;
970                 arp_ifinit2(sc->sc_carpdev, ifa, IF_LLADDR(sc->sc_ifp));        
971
972                 DELAY(1000);    /* XXX */
973         }
974 }
975
976 #ifdef INET6
977 static void
978 carp_send_na(struct carp_softc *sc)
979 {
980         struct ifaddr_container *ifac;
981         struct in6_addr *in6;
982         static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
983
984         TAILQ_FOREACH(ifac, &SC2IFP(sc)->if_addrheads[mycpuid], ifa_link) {
985                 struct ifaddr *ifa = ifac->ifa;
986
987                 if (ifa->ifa_addr->sa_family != AF_INET6)
988                         continue;
989
990                 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
991                 nd6_na_output(sc->sc_carpdev, &mcast, in6,
992                     ND_NA_FLAG_OVERRIDE, 1, NULL);
993                 DELAY(1000);    /* XXX */
994         }
995 }
996 #endif /* INET6 */
997
998 static int
999 carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type)
1000 {
1001         struct carp_softc *vh;
1002         int count = 0;
1003
1004         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1005                 if ((type == CARP_COUNT_RUNNING &&
1006                      (SC2IFP(vh)->if_flags & IFF_UP) &&
1007                      (SC2IFP(vh)->if_flags & IFF_RUNNING)) ||
1008                     (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) {
1009                         struct ifaddr_container *ifac;
1010
1011                         TAILQ_FOREACH(ifac, &SC2IFP(vh)->if_addrheads[mycpuid],
1012                                       ifa_link) {
1013                                 struct ifaddr *ifa = ifac->ifa;
1014
1015                                 if (ifa->ifa_addr->sa_family == AF_INET &&
1016                                     ia->ia_addr.sin_addr.s_addr ==
1017                                     ifatoia(ifa)->ia_addr.sin_addr.s_addr)
1018                                         count++;
1019                         }
1020                 }
1021         }
1022         return (count);
1023 }
1024
1025 int
1026 carp_iamatch(void *v, struct in_ifaddr *ia,
1027     struct in_addr *isaddr, uint8_t **enaddr)
1028 {
1029         struct carp_if *cif = v;
1030         struct carp_softc *vh;
1031         int index, count = 0;
1032
1033         if (carp_opts[CARPCTL_ARPBALANCE]) {
1034                 /*
1035                  * XXX proof of concept implementation.
1036                  * We use the source ip to decide which virtual host should
1037                  * handle the request. If we're master of that virtual host,
1038                  * then we respond, otherwise, just drop the arp packet on
1039                  * the floor.
1040                  */
1041                 count = carp_addrcount(cif, ia, CARP_COUNT_RUNNING);
1042                 if (count == 0) {
1043                         /* should never reach this */
1044                         return (0);
1045                 }
1046
1047                 /* this should be a hash, like pf_hash() */
1048                 index = ntohl(isaddr->s_addr) % count;
1049                 count = 0;
1050
1051                 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1052                         if ((SC2IFP(vh)->if_flags & IFF_UP) &&
1053                             (SC2IFP(vh)->if_flags & IFF_RUNNING)) {
1054                                 struct ifaddr_container *ifac;
1055
1056                                 TAILQ_FOREACH(ifac,
1057                                 &SC2IFP(vh)->if_addrheads[mycpuid], ifa_link) {
1058                                         struct ifaddr *ifa = ifac->ifa;
1059
1060                                         if (ifa->ifa_addr->sa_family ==
1061                                             AF_INET &&
1062                                             ia->ia_addr.sin_addr.s_addr ==
1063                                             ifatoia(ifa)->ia_addr.sin_addr.s_addr) {
1064                                                 if (count == index) {
1065                                                         if (vh->sc_state == MASTER) {
1066                                                                 *enaddr = IF_LLADDR(vh->sc_ifp);
1067                                                                 return (1);
1068                                                         } else {
1069                                                                 return (0);
1070                                                         }
1071                                                 }
1072                                                 count++;
1073                                         }
1074                                 }
1075                         }
1076                 }
1077         } else {
1078                 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1079                         if ((SC2IFP(vh)->if_flags & IFF_UP) &&
1080                             (SC2IFP(vh)->if_flags & IFF_RUNNING) &&
1081                             vh->sc_state == MASTER) {
1082                                 *enaddr = IF_LLADDR(vh->sc_ifp);
1083                                 return (1);
1084                         }
1085                 }
1086         }
1087         return(0);
1088 }
1089
1090 #ifdef INET6
1091 struct ifaddr *
1092 carp_iamatch6(void *v, struct in6_addr *taddr)
1093 {
1094         struct carp_if *cif = v;
1095         struct carp_softc *vh;
1096
1097         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1098                 struct ifaddr_container *ifac;
1099
1100                 TAILQ_FOREACH(ifac, &SC2IFP(vh)->if_addrheads[mycpuid],
1101                               ifa_link) {
1102                         struct ifaddr *ifa = ifac->ifa;
1103
1104                         if (IN6_ARE_ADDR_EQUAL(taddr,
1105                             &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1106                             (SC2IFP(vh)->if_flags & IFF_UP) &&
1107                             (SC2IFP(vh)->if_flags & IFF_RUNNING) &&
1108                             vh->sc_state == MASTER) {
1109                                 return (ifa);
1110                         }
1111                 }
1112         }
1113         return (NULL);
1114 }
1115
1116 void *
1117 carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr)
1118 {
1119         struct m_tag *mtag;
1120         struct carp_if *cif = v;
1121         struct carp_softc *sc;
1122
1123         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
1124                 struct ifaddr_container *ifac;
1125
1126                 TAILQ_FOREACH(ifac, &SC2IFP(sc)->if_addrheads[mycpuid],
1127                               ifa_link) {
1128                         struct ifaddr *ifa = ifac->ifa;
1129
1130                         if (IN6_ARE_ADDR_EQUAL(taddr,
1131                             &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1132                             (SC2IFP(sc)->if_flags & IFF_UP) &&
1133                             (SC2IFP(sc)->if_flags & IFF_RUNNING)) {
1134                                 struct ifnet *ifp = SC2IFP(sc);
1135                                 mtag = m_tag_get(PACKET_TAG_CARP,
1136                                     sizeof(struct ifnet *), MB_DONTWAIT);
1137                                 if (mtag == NULL) {
1138                                         /* better a bit than nothing */
1139                                         return (IF_LLADDR(sc->sc_ifp));
1140                                 }
1141                                 bcopy(&ifp, (caddr_t)(mtag + 1),
1142                                     sizeof(struct ifnet *));
1143                                 m_tag_prepend(m, mtag);
1144
1145                                 return (IF_LLADDR(sc->sc_ifp));
1146                         }
1147                 }
1148         }
1149         return (NULL);
1150 }
1151 #endif
1152
1153 struct ifnet *
1154 carp_forus(void *v, void *dhost)
1155 {
1156         struct carp_if *cif = v;
1157         struct carp_softc *vh;
1158         uint8_t *ena = dhost;
1159         
1160         /**
1161          * XXX: See here for check on MAC adr is not for virtual use
1162          *
1163          **/
1164
1165         if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
1166                 return (NULL);
1167
1168         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1169                 if ((SC2IFP(vh)->if_flags & IFF_UP) &&
1170                     (SC2IFP(vh)->if_flags & IFF_RUNNING) &&
1171                     vh->sc_state == MASTER &&
1172                     !bcmp(dhost, IF_LLADDR(vh->sc_ifp), ETHER_ADDR_LEN)) {
1173                         return (SC2IFP(vh));
1174                 }
1175         }
1176         return (NULL);
1177 }
1178
1179 static void
1180 carp_master_down(void *v)
1181 {
1182         struct carp_softc *sc = v;
1183
1184         lwkt_serialize_enter(sc->sc_ifp->if_serializer);
1185         carp_master_down_locked(sc);
1186         lwkt_serialize_exit(sc->sc_ifp->if_serializer);
1187 }
1188
1189 static void
1190 carp_master_down_locked(struct carp_softc *sc)
1191 {
1192         switch (sc->sc_state) {
1193         case INIT:
1194                 kprintf("%s: master_down event in INIT state\n",
1195                     SC2IFP(sc)->if_xname);
1196                 break;
1197
1198         case MASTER:
1199                 break;
1200
1201         case BACKUP:
1202                 carp_set_state(sc, MASTER);
1203                 carp_send_ad_locked(sc);
1204                 carp_send_arp(sc);
1205 #ifdef INET6
1206                 carp_send_na(sc);
1207 #endif /* INET6 */
1208                 carp_setrun(sc, 0);
1209                 carp_setroute(sc, RTM_ADD);
1210                 break;
1211         }
1212 }
1213
1214 /*
1215  * When in backup state, af indicates whether to reset the master down timer
1216  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1217  */
1218 static void
1219 carp_setrun(struct carp_softc *sc, sa_family_t af)
1220 {
1221         struct timeval tv;
1222
1223         if (sc->sc_carpdev == NULL) {
1224                 SC2IFP(sc)->if_flags &= ~IFF_RUNNING;
1225                 carp_set_state(sc, INIT);
1226                 return;
1227         }
1228
1229         if (SC2IFP(sc)->if_flags & IFF_UP &&
1230             sc->sc_vhid > 0 && (sc->sc_naddrs || sc->sc_naddrs6)) {
1231                 SC2IFP(sc)->if_flags |= IFF_RUNNING;
1232         } else {
1233                 SC2IFP(sc)->if_flags &= ~IFF_RUNNING;
1234                 carp_setroute(sc, RTM_DELETE);
1235                 return;
1236         }
1237
1238         switch (sc->sc_state) {
1239         case INIT:
1240                 if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) {
1241                         carp_send_ad_locked(sc);
1242                         carp_send_arp(sc);
1243 #ifdef INET6
1244                         carp_send_na(sc);
1245 #endif /* INET6 */
1246                         CARP_DEBUG("%s: INIT -> MASTER (preempting)\n",
1247                                    SC2IFP(sc)->if_xname);
1248                         carp_set_state(sc, MASTER);
1249                         carp_setroute(sc, RTM_ADD);
1250                 } else {
1251                         CARP_DEBUG("%s: INIT -> BACKUP\n",
1252                                    SC2IFP(sc)->if_xname);
1253                         carp_set_state(sc, BACKUP);
1254                         carp_setroute(sc, RTM_DELETE);
1255                         carp_setrun(sc, 0);
1256                 }
1257                 break;
1258
1259         case BACKUP:
1260                 callout_stop(&sc->sc_ad_tmo);
1261                 tv.tv_sec = 3 * sc->sc_advbase;
1262                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1263                 switch (af) {
1264 #ifdef INET
1265                 case AF_INET:
1266                         callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1267                             carp_master_down, sc);
1268                         break;
1269 #endif /* INET */
1270 #ifdef INET6
1271                 case AF_INET6:
1272                         callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1273                             carp_master_down, sc);
1274                         break;
1275 #endif /* INET6 */
1276                 default:
1277                         if (sc->sc_naddrs)
1278                                 callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1279                                     carp_master_down, sc);
1280                         if (sc->sc_naddrs6)
1281                                 callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1282                                     carp_master_down, sc);
1283                         break;
1284                 }
1285                 break;
1286
1287         case MASTER:
1288                 tv.tv_sec = sc->sc_advbase;
1289                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1290                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1291                     carp_send_ad, sc);
1292                 break;
1293         }
1294 }
1295
1296 static void
1297 carp_multicast_cleanup(struct carp_softc *sc)
1298 {
1299         struct ip_moptions *imo = &sc->sc_imo;
1300         uint16_t n = imo->imo_num_memberships;
1301
1302         /* Clean up our own multicast memberships */
1303         while (n-- > 0) {
1304                 if (imo->imo_membership[n] != NULL) {
1305                         in_delmulti(imo->imo_membership[n]);
1306                         imo->imo_membership[n] = NULL;
1307                 }
1308         }
1309         imo->imo_num_memberships = 0;
1310         imo->imo_multicast_ifp = NULL;
1311 }
1312
1313 #ifdef INET6
1314 static void
1315 carp_multicast6_cleanup(struct carp_softc *sc)
1316 {
1317         struct ip6_moptions *im6o = &sc->sc_im6o;
1318
1319         while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1320                 struct in6_multi_mship *imm =
1321                     LIST_FIRST(&im6o->im6o_memberships);
1322
1323                 LIST_REMOVE(imm, i6mm_chain);
1324                 in6_leavegroup(imm);
1325         }
1326         im6o->im6o_multicast_ifp = NULL;
1327 }
1328 #endif
1329
1330 static int
1331 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
1332 {
1333         struct ifnet *ifp;
1334         struct carp_if *cif;
1335         struct in_ifaddr *ia, *ia_if;
1336         struct in_ifaddr_container *iac;
1337         struct ip_moptions *imo = &sc->sc_imo;
1338         struct in_addr addr;
1339         u_long iaddr = htonl(sin->sin_addr.s_addr);
1340         int own, error;
1341         
1342         if (sin->sin_addr.s_addr == 0) {
1343                 if (!(SC2IFP(sc)->if_flags & IFF_UP))
1344                         carp_set_state(sc, INIT);
1345                 if (sc->sc_naddrs)
1346                         SC2IFP(sc)->if_flags |= IFF_UP;
1347                 carp_setrun(sc, 0);
1348                 return (0);
1349         }
1350         /* we have to do it by hands to check we won't match on us */
1351         ia_if = NULL; own = 0;
1352         TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
1353                 ia = iac->ia;
1354
1355                 /* and, yeah, we need a multicast-capable iface too */
1356                 if (ia->ia_ifp != SC2IFP(sc) &&
1357                     (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
1358                     (iaddr & ia->ia_subnetmask) == ia->ia_subnet) {
1359                         if (!ia_if)
1360                                 ia_if = ia;
1361                         if (sin->sin_addr.s_addr ==
1362                             ia->ia_addr.sin_addr.s_addr)
1363                                 own++;
1364                 }
1365         }
1366
1367         if (!ia_if)
1368                 return (EADDRNOTAVAIL);
1369
1370         ia = ia_if;
1371         ifp = ia->ia_ifp;
1372
1373         if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
1374             (imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp))
1375                 return (EADDRNOTAVAIL);
1376
1377         if (imo->imo_num_memberships == 0) {
1378                 addr.s_addr = htonl(INADDR_CARP_GROUP);
1379                 if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) == NULL)
1380                         return (ENOBUFS);
1381                 imo->imo_num_memberships++;
1382                 imo->imo_multicast_ifp = ifp;
1383                 imo->imo_multicast_ttl = CARP_DFLTTL;
1384                 imo->imo_multicast_loop = 0;
1385         }
1386
1387         if (!ifp->if_carp) {
1388                 MALLOC(cif, struct carp_if *, sizeof(*cif), M_CARP,
1389                     M_WAITOK|M_ZERO);
1390                 if ((error = ifpromisc(ifp, 1))) {
1391                         FREE(cif, M_CARP);
1392                         goto cleanup;
1393                 }
1394                 
1395                 cif->vhif_ifp = ifp;
1396                 TAILQ_INIT(&cif->vhif_vrs);
1397                 ifp->if_carp = cif;
1398         } else {
1399                 struct carp_softc *vr;
1400
1401                 cif = (struct carp_if *)ifp->if_carp;
1402                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1403                         if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
1404                                 error = EINVAL;
1405                                 goto cleanup;
1406                         }
1407                 }
1408         }
1409         sc->sc_ia = ia;
1410         sc->sc_carpdev = ifp;
1411
1412         { /* XXX prevent endless loop if already in queue */
1413         struct carp_softc *vr, *after = NULL;
1414         int myself = 0;
1415         cif = (struct carp_if *)ifp->if_carp;
1416
1417         TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1418                 if (vr == sc)
1419                         myself = 1;
1420                 if (vr->sc_vhid < sc->sc_vhid)
1421                         after = vr;
1422         }
1423
1424         if (!myself) {
1425                 /* We're trying to keep things in order */
1426                 if (after == NULL)
1427                         TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
1428                 else
1429                         TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
1430                 cif->vhif_nvrs++;
1431         }
1432         }
1433
1434         sc->sc_naddrs++;
1435         SC2IFP(sc)->if_flags |= IFF_UP;
1436         if (own)
1437                 sc->sc_advskew = 0;
1438
1439         carp_sc_state_locked(sc);
1440         carp_setrun(sc, 0);
1441
1442         return (0);
1443
1444 cleanup:
1445         in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1446         return (error);
1447 }
1448
1449 static int
1450 carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin)
1451 {
1452         int error = 0;
1453
1454         if (!--sc->sc_naddrs) {
1455                 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
1456                 struct ip_moptions *imo = &sc->sc_imo;
1457
1458                 callout_stop(&sc->sc_ad_tmo);
1459                 SC2IFP(sc)->if_flags &= ~(IFF_UP | IFF_RUNNING);
1460                 sc->sc_vhid = -1;
1461                 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1462                 imo->imo_multicast_ifp = NULL;
1463                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
1464                 if (!--cif->vhif_nvrs) {
1465                         sc->sc_carpdev->if_carp = NULL;
1466                         FREE(cif, M_IFADDR);
1467                 }
1468         }
1469         return (error);
1470 }
1471
1472 #ifdef INET6
1473 static int
1474 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
1475 {
1476         struct ifnet *ifp;
1477         struct carp_if *cif;
1478         struct in6_ifaddr *ia, *ia_if;
1479         struct ip6_moptions *im6o = &sc->sc_im6o;
1480         struct in6_multi_mship *imm;
1481         struct in6_addr in6;
1482         int own, error;
1483
1484         if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
1485                 if (!(SC2IFP(sc)->if_flags & IFF_UP))
1486                         carp_set_state(sc, INIT);
1487                 if (sc->sc_naddrs6)
1488                         SC2IFP(sc)->if_flags |= IFF_UP;
1489                 carp_setrun(sc, 0);
1490                 return (0);
1491         }
1492
1493         /* we have to do it by hands to check we won't match on us */
1494         ia_if = NULL; own = 0;
1495         for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
1496                 int i;
1497
1498                 for (i = 0; i < 4; i++) {
1499                         if ((sin6->sin6_addr.s6_addr32[i] &
1500                             ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
1501                             (ia->ia_addr.sin6_addr.s6_addr32[i] &
1502                             ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
1503                                 break;
1504                 }
1505                 /* and, yeah, we need a multicast-capable iface too */
1506                 if (ia->ia_ifp != SC2IFP(sc) &&
1507                     (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
1508                     (i == 4)) {
1509                         if (!ia_if)
1510                                 ia_if = ia;
1511                         if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
1512                             &ia->ia_addr.sin6_addr))
1513                                 own++;
1514                 }
1515         }
1516
1517         if (!ia_if)
1518                 return (EADDRNOTAVAIL);
1519         ia = ia_if;
1520         ifp = ia->ia_ifp;
1521
1522         if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
1523             (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp))
1524                 return (EADDRNOTAVAIL);
1525
1526         if (!sc->sc_naddrs6) {
1527                 im6o->im6o_multicast_ifp = ifp;
1528
1529                 /* join CARP multicast address */
1530                 bzero(&in6, sizeof(in6));
1531                 in6.s6_addr16[0] = htons(0xff02);
1532                 in6.s6_addr8[15] = 0x12;
1533                 if (in6_setscope(&in6, ifp, NULL) != 0)
1534                         goto cleanup;
1535                 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
1536                         goto cleanup;
1537                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
1538
1539                 /* join solicited multicast address */
1540                 bzero(&in6, sizeof(in6));
1541                 in6.s6_addr16[0] = htons(0xff02);
1542                 in6.s6_addr32[1] = 0;
1543                 in6.s6_addr32[2] = htonl(1);
1544                 in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3];
1545                 in6.s6_addr8[12] = 0xff;
1546                 if (in6_setscope(&in6, ifp, NULL) != 0)
1547                         goto cleanup;
1548                 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
1549                         goto cleanup;
1550                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
1551         }
1552
1553         if (!ifp->if_carp) {
1554                 MALLOC(cif, struct carp_if *, sizeof(*cif), M_CARP,
1555                     M_WAITOK|M_ZERO);
1556                 if ((error = ifpromisc(ifp, 1))) {
1557                         FREE(cif, M_CARP);
1558                         goto cleanup;
1559                 }
1560
1561                 cif->vhif_ifp = ifp;
1562                 TAILQ_INIT(&cif->vhif_vrs);
1563                 ifp->if_carp = cif;
1564         } else {
1565                 struct carp_softc *vr;
1566
1567                 cif = (struct carp_if *)ifp->if_carp;
1568                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1569                         if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
1570                                 error = EINVAL;
1571                                 goto cleanup;
1572                         }
1573                 }
1574         }
1575         sc->sc_ia6 = ia;
1576         sc->sc_carpdev = ifp;
1577
1578         { /* XXX prevent endless loop if already in queue */
1579         struct carp_softc *vr, *after = NULL;
1580         int myself = 0;
1581         cif = (struct carp_if *)ifp->if_carp;
1582
1583         TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1584                 if (vr == sc)
1585                         myself = 1;
1586                 if (vr->sc_vhid < sc->sc_vhid)
1587                         after = vr;
1588         }
1589
1590         if (!myself) {
1591                 /* We're trying to keep things in order */
1592                 if (after == NULL)
1593                         TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
1594                 else
1595                         TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
1596                 cif->vhif_nvrs++;
1597         }
1598         }
1599
1600         sc->sc_naddrs6++;
1601         SC2IFP(sc)->if_flags |= IFF_UP;
1602         if (own)
1603                 sc->sc_advskew = 0;
1604         carp_sc_state_locked(sc);
1605         carp_setrun(sc, 0);
1606
1607         return (0);
1608
1609 cleanup:
1610         /* clean up multicast memberships */
1611         if (!sc->sc_naddrs6) {
1612                 while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1613                         imm = LIST_FIRST(&im6o->im6o_memberships);
1614                         LIST_REMOVE(imm, i6mm_chain);
1615                         in6_leavegroup(imm);
1616                 }
1617         }
1618         return (error);
1619 }
1620
1621 static int
1622 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
1623 {
1624         int error = 0;
1625
1626         if (!--sc->sc_naddrs6) {
1627                 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
1628                 struct ip6_moptions *im6o = &sc->sc_im6o;
1629
1630                 callout_stop(&sc->sc_ad_tmo);
1631                 SC2IFP(sc)->if_flags &= ~(IFF_UP | IFF_RUNNING);
1632                 sc->sc_vhid = -1;
1633                 while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1634                         struct in6_multi_mship *imm =
1635                             LIST_FIRST(&im6o->im6o_memberships);
1636
1637                         LIST_REMOVE(imm, i6mm_chain);
1638                         in6_leavegroup(imm);
1639                 }
1640                 im6o->im6o_multicast_ifp = NULL;
1641                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
1642                 if (!--cif->vhif_nvrs) {
1643                         sc->sc_carpdev->if_carp = NULL;
1644                         FREE(cif, M_IFADDR);
1645                 }
1646         }
1647         return (error);
1648 }
1649 #endif /* INET6 */
1650
1651 static int
1652 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr)
1653 {
1654         struct carp_softc *sc = ifp->if_softc, *vr;
1655         struct carpreq carpr;
1656         struct ifaddr *ifa;
1657         struct ifreq *ifr;
1658         struct ifaliasreq *ifra;
1659         int error = 0;
1660
1661         ifa = (struct ifaddr *)addr;
1662         ifra = (struct ifaliasreq *)addr;
1663         ifr = (struct ifreq *)addr;
1664
1665         switch (cmd) {
1666         case SIOCSIFADDR:
1667                 switch (ifa->ifa_addr->sa_family) {
1668 #ifdef INET
1669                 case AF_INET:
1670                         SC2IFP(sc)->if_flags |= IFF_UP;
1671                         bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
1672                             sizeof(struct sockaddr));
1673                         error = carp_set_addr(sc, satosin(ifa->ifa_addr));
1674                         break;
1675 #endif /* INET */
1676 #ifdef INET6
1677                 case AF_INET6:
1678                         SC2IFP(sc)->if_flags |= IFF_UP;
1679                         error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
1680                         break;
1681 #endif /* INET6 */
1682                 default:
1683                         error = EAFNOSUPPORT;
1684                         break;
1685                 }
1686                 break;
1687
1688         case SIOCAIFADDR:
1689                 switch (ifa->ifa_addr->sa_family) {
1690 #ifdef INET
1691                 case AF_INET:
1692                         SC2IFP(sc)->if_flags |= IFF_UP;
1693                         bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
1694                             sizeof(struct sockaddr));
1695                         error = carp_set_addr(sc, satosin(&ifra->ifra_addr));
1696                         break;
1697 #endif /* INET */
1698 #ifdef INET6
1699                 case AF_INET6:
1700                         SC2IFP(sc)->if_flags |= IFF_UP;
1701                         error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr));
1702                         break;
1703 #endif /* INET6 */
1704                 default:
1705                         error = EAFNOSUPPORT;
1706                         break;
1707                 }
1708                 break;
1709
1710         case SIOCDIFADDR:
1711                 switch (ifa->ifa_addr->sa_family) {
1712 #ifdef INET
1713                 case AF_INET:
1714                         error = carp_del_addr(sc, satosin(&ifra->ifra_addr));
1715                         break;
1716 #endif /* INET */
1717 #ifdef INET6
1718                 case AF_INET6:
1719                         error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr));
1720                         break;
1721 #endif /* INET6 */
1722                 default:
1723                         error = EAFNOSUPPORT;
1724                         break;
1725                 }
1726                 break;
1727
1728         case SIOCSIFFLAGS:
1729                 if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) {
1730                         callout_stop(&sc->sc_ad_tmo);
1731                         callout_stop(&sc->sc_md_tmo);
1732                         callout_stop(&sc->sc_md6_tmo);
1733                         if (sc->sc_state == MASTER)
1734                                 carp_send_ad_locked(sc);
1735                         carp_set_state(sc, INIT);
1736                         carp_setrun(sc, 0);
1737                 } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) {
1738                         SC2IFP(sc)->if_flags |= IFF_UP;
1739                         carp_setrun(sc, 0);
1740                 }
1741                 break;
1742
1743         case SIOCSVH:
1744                 error = suser_cred(cr, NULL_CRED_OKAY);
1745                 if (error)
1746                         break;
1747                 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
1748                         break;
1749                 error = 1;
1750                 if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) {
1751                         switch (carpr.carpr_state) {
1752                         case BACKUP:
1753                                 callout_stop(&sc->sc_ad_tmo);
1754                                 carp_set_state(sc, BACKUP);
1755                                 carp_setrun(sc, 0);
1756                                 carp_setroute(sc, RTM_DELETE);
1757                                 break;
1758
1759                         case MASTER:
1760                                 carp_master_down_locked(sc);
1761                                 break;
1762
1763                         default:
1764                                 break;
1765                         }
1766                 }
1767                 if (carpr.carpr_vhid > 0) {
1768                         if (carpr.carpr_vhid > 255) {
1769                                 error = EINVAL;
1770                                 break;
1771                         }
1772                         if (sc->sc_carpdev) {
1773                                 struct carp_if *cif;
1774                                 cif = (struct carp_if *)sc->sc_carpdev->if_carp;
1775                                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1776                                         if (vr != sc &&
1777                                             vr->sc_vhid == carpr.carpr_vhid)
1778                                                 return EEXIST;
1779                                 }
1780                         }
1781                         sc->sc_vhid = carpr.carpr_vhid;
1782                         IF_LLADDR(sc->sc_ifp)[0] = 0;
1783                         IF_LLADDR(sc->sc_ifp)[1] = 0;
1784                         IF_LLADDR(sc->sc_ifp)[2] = 0x5e;
1785                         IF_LLADDR(sc->sc_ifp)[3] = 0;
1786                         IF_LLADDR(sc->sc_ifp)[4] = 1;
1787                         IF_LLADDR(sc->sc_ifp)[5] = sc->sc_vhid;
1788                         error--;
1789                 }
1790                 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) {
1791                         if (carpr.carpr_advskew >= 255) {
1792                                 error = EINVAL;
1793                                 break;
1794                         }
1795                         if (carpr.carpr_advbase > 255) {
1796                                 error = EINVAL;
1797                                 break;
1798                         }
1799                         sc->sc_advbase = carpr.carpr_advbase;
1800                         sc->sc_advskew = carpr.carpr_advskew;
1801                         error--;
1802                 }
1803                 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
1804                 if (error > 0) {
1805                         error = EINVAL;
1806                 } else {
1807                         error = 0;
1808                         carp_setrun(sc, 0);
1809                 }
1810                 break;
1811
1812         case SIOCGVH:
1813                 bzero(&carpr, sizeof(carpr));
1814                 carpr.carpr_state = sc->sc_state;
1815                 carpr.carpr_vhid = sc->sc_vhid;
1816                 carpr.carpr_advbase = sc->sc_advbase;
1817                 carpr.carpr_advskew = sc->sc_advskew;
1818                 error = suser_cred(cr, NULL_CRED_OKAY);
1819                 if (error == 0) {
1820                         bcopy(sc->sc_key, carpr.carpr_key,
1821                             sizeof(carpr.carpr_key));
1822                 }
1823                 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
1824                 break;
1825
1826         default:
1827                 error = EINVAL;
1828         }
1829         carp_hmac_prepare(sc);
1830         return (error);
1831 }
1832
1833 /*
1834  * XXX: this is looutput. We should eventually use it from there.
1835  */
1836 static int
1837 carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
1838     struct rtentry *rt)
1839 {
1840         uint32_t af;
1841
1842         M_ASSERTPKTHDR(m); /* check if we have the packet header */
1843
1844         if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
1845                 m_freem(m);
1846                 return (rt->rt_flags & RTF_BLACKHOLE ? 0 :
1847                         rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
1848         }
1849
1850         ifp->if_opackets++;
1851         ifp->if_obytes += m->m_pkthdr.len;
1852
1853         /* BPF writes need to be handled specially. */
1854         if (dst->sa_family == AF_UNSPEC) {
1855                 bcopy(dst->sa_data, &af, sizeof(af));
1856                 dst->sa_family = af;
1857         }
1858
1859 #if 1   /* XXX */
1860         switch (dst->sa_family) {
1861         case AF_INET:
1862         case AF_INET6:
1863         case AF_IPX:
1864         case AF_APPLETALK:
1865                 break;
1866
1867         default:
1868                 m_freem(m);
1869                 return (EAFNOSUPPORT);
1870         }
1871 #endif
1872         return (if_simloop(ifp, m, dst->sa_family, 0));
1873 }
1874
1875 /*
1876  * Start output on carp interface. This function should never be called.
1877  */
1878 static void
1879 carp_start(struct ifnet *ifp)
1880 {
1881 #ifdef DEBUG
1882         kprintf("%s: start called\n", ifp->if_xname);
1883 #endif
1884 }
1885
1886 int
1887 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
1888     struct rtentry *rt)
1889 {
1890         struct m_tag *mtag;
1891         struct carp_softc *sc;
1892         struct ifnet *carp_ifp;
1893         struct ether_header *eh;
1894
1895         if (!sa)
1896                 return (0);
1897
1898         switch (sa->sa_family) {
1899 #ifdef INET
1900         case AF_INET:
1901                 break;
1902 #endif /* INET */
1903 #ifdef INET6
1904         case AF_INET6:
1905                 break;
1906 #endif /* INET6 */
1907         default:
1908                 return (0);
1909         }
1910
1911         mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
1912         if (mtag == NULL)
1913                 return (0);
1914
1915         bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *));
1916         sc = carp_ifp->if_softc;
1917
1918         /* Set the source MAC address to Virtual Router MAC Address */
1919         switch (ifp->if_type) {
1920         case IFT_ETHER:
1921         case IFT_L2VLAN:
1922                 eh = mtod(m, struct ether_header *);
1923                 eh->ether_shost[0] = 0;
1924                 eh->ether_shost[1] = 0;
1925                 eh->ether_shost[2] = 0x5e;
1926                 eh->ether_shost[3] = 0;
1927                 eh->ether_shost[4] = 1;
1928                 eh->ether_shost[5] = sc->sc_vhid;
1929                 break;
1930
1931         default:
1932                 if_printf(ifp, "carp is not supported for this "
1933                           "interface type\n");
1934                 return (EOPNOTSUPP);
1935         }
1936         return (0);
1937 }
1938
1939 static void
1940 carp_set_state(struct carp_softc *sc, int state)
1941 {
1942         if (sc->sc_state == state)
1943                 return;
1944
1945         sc->sc_state = state;
1946         switch (state) {
1947         case BACKUP:
1948                 SC2IFP(sc)->if_link_state = LINK_STATE_DOWN;
1949                 break;
1950
1951         case MASTER:
1952                 SC2IFP(sc)->if_link_state = LINK_STATE_UP;
1953                 break;
1954
1955         default:
1956                 SC2IFP(sc)->if_link_state = LINK_STATE_UNKNOWN;
1957                 break;
1958         }
1959         rt_ifmsg(SC2IFP(sc));
1960 }
1961
1962 void
1963 carp_carpdev_state(void *v)
1964 {
1965         struct carp_if *cif = v;
1966
1967         carp_carpdev_state_locked(cif);
1968 }
1969
1970 static void
1971 carp_carpdev_state_locked(struct carp_if *cif)
1972 {
1973         struct carp_softc *sc;
1974
1975         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list)
1976                 carp_sc_state_locked(sc);
1977 }
1978
1979 static void
1980 carp_sc_state_locked(struct carp_softc *sc)
1981 {
1982         if (!(sc->sc_carpdev->if_flags & IFF_UP)) {
1983                 sc->sc_flags_backup = SC2IFP(sc)->if_flags;
1984                 SC2IFP(sc)->if_flags &= ~(IFF_UP | IFF_RUNNING);
1985                 callout_stop(&sc->sc_ad_tmo);
1986                 callout_stop(&sc->sc_md_tmo);
1987                 callout_stop(&sc->sc_md6_tmo);
1988                 carp_set_state(sc, INIT);
1989                 carp_setrun(sc, 0);
1990                 if (!sc->sc_suppress) {
1991                         carp_suppress_preempt++;
1992                         if (carp_suppress_preempt == 1)
1993                                 carp_send_ad_all();
1994                 }
1995                 sc->sc_suppress = 1;
1996         } else {
1997                 SC2IFP(sc)->if_flags |= sc->sc_flags_backup;
1998                 carp_set_state(sc, INIT);
1999                 carp_setrun(sc, 0);
2000                 if (sc->sc_suppress)
2001                         carp_suppress_preempt--;
2002                 sc->sc_suppress = 0;
2003         }
2004 }
2005
2006 static int
2007 carp_modevent(module_t mod, int type, void *data)
2008 {
2009         switch (type) {
2010         case MOD_LOAD:
2011                 LIST_INIT(&carpif_list);
2012                 carp_ifdetach_event =
2013                 EVENTHANDLER_REGISTER(ifnet_detach_event, carp_ifdetach, NULL,
2014                                       EVENTHANDLER_PRI_ANY);
2015                 if_clone_attach(&carp_cloner);
2016                 break;
2017
2018         case MOD_UNLOAD:
2019                 EVENTHANDLER_DEREGISTER(ifnet_detach_event,
2020                                         carp_ifdetach_event);
2021                 if_clone_detach(&carp_cloner);
2022                 break;
2023
2024         default:
2025                 return (EINVAL);
2026         }
2027         return (0);
2028 }
2029
2030 static moduledata_t carp_mod = {
2031         "carp",
2032         carp_modevent,
2033         0
2034 };
2035 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);