suser_* to priv_* conversion
[dragonfly.git] / sys / netinet / ip_carp.c
1 /*
2  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
3  * Copyright (c) 2003 Ryan McBride. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
18  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
24  * THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 /*
27  * $FreeBSD: src/sys/netinet/ip_carp.c,v 1.48 2007/02/02 09:39:09 glebius Exp $
28  * $DragonFly: src/sys/netinet/ip_carp.c,v 1.10 2008/07/27 10:06:57 sephe Exp $
29  */
30
31 #include "opt_carp.h"
32 #include "opt_inet.h"
33 #include "opt_inet6.h"
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/in_cksum.h>
39 #include <sys/limits.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/time.h>
43 #include <sys/proc.h>
44 #include <sys/priv.h>
45 #include <sys/sockio.h>
46 #include <sys/socket.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
49
50 #include <machine/stdarg.h>
51 #include <crypto/sha1.h>
52
53 #include <net/bpf.h>
54 #include <net/ethernet.h>
55 #include <net/if.h>
56 #include <net/if_dl.h>
57 #include <net/if_types.h>
58 #include <net/route.h>
59 #include <net/if_clone.h>
60
61 #ifdef INET
62 #include <netinet/in.h>
63 #include <netinet/in_var.h>
64 #include <netinet/in_systm.h>
65 #include <netinet/ip.h>
66 #include <netinet/ip_var.h>
67 #include <netinet/if_ether.h>
68 #endif
69
70 #ifdef INET6
71 #include <netinet/icmp6.h>
72 #include <netinet/ip6.h>
73 #include <netinet6/ip6_var.h>
74 #include <netinet6/scope6_var.h>
75 #include <netinet6/nd6.h>
76 #endif
77
78 #include <netinet/ip_carp.h>
79
80 #define CARP_IFNAME             "carp"
81 #define CARP_IS_RUNNING(ifp)    \
82         (((ifp)->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING))
83
84 struct carp_softc {
85         struct ifnet             sc_if;
86         struct ifnet            *sc_ifp;        /* compat shim */
87         struct ifnet            *sc_carpdev;    /* parent interface */
88         struct in_ifaddr        *sc_ia;         /* primary iface address */
89         struct ip_moptions       sc_imo;
90 #ifdef INET6
91         struct in6_ifaddr       *sc_ia6;        /* primary iface address v6 */
92         struct ip6_moptions      sc_im6o;
93 #endif /* INET6 */
94         TAILQ_ENTRY(carp_softc)  sc_list;
95
96         enum { INIT = 0, BACKUP, MASTER }
97                                  sc_state;
98
99         int                      sc_flags_backup;
100         int                      sc_suppress;
101
102         int                      sc_sendad_errors;
103 #define CARP_SENDAD_MAX_ERRORS  3
104         int                      sc_sendad_success;
105 #define CARP_SENDAD_MIN_SUCCESS 3
106
107         int                      sc_vhid;
108         int                      sc_advskew;
109         int                      sc_naddrs;
110         int                      sc_naddrs6;
111         int                      sc_advbase;    /* seconds */
112         int                      sc_init_counter;
113         uint64_t                 sc_counter;
114
115         /* authentication */
116 #define CARP_HMAC_PAD   64
117         unsigned char            sc_key[CARP_KEY_LEN];
118         unsigned char            sc_pad[CARP_HMAC_PAD];
119         SHA1_CTX                 sc_sha1;
120
121         struct callout           sc_ad_tmo;     /* advertisement timeout */
122         struct callout           sc_md_tmo;     /* master down timeout */
123         struct callout           sc_md6_tmo;    /* master down timeout */
124
125         LIST_ENTRY(carp_softc)   sc_next;       /* Interface clue */
126 };
127 #define SC2IFP(sc)      ((sc)->sc_ifp)
128
129 struct carp_if {
130         TAILQ_HEAD(, carp_softc) vhif_vrs;
131         int             vhif_nvrs;
132
133         struct ifnet    *vhif_ifp;
134 };
135
136 enum    { CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
137
138 SYSCTL_DECL(_net_inet_carp);
139
140 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */
141 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
142     &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
143 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
144     &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
145 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
146     &carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
147 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
148     &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
149
150 static int carp_suppress_preempt = 0;
151 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
152     &carp_suppress_preempt, 0, "Preemption is suppressed");
153
154 static struct carpstats carpstats;
155 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
156     &carpstats, carpstats,
157     "CARP statistics (struct carpstats, netinet/ip_carp.h)");
158
159 #define CARP_LOG(...)   do {                            \
160         if (carp_opts[CARPCTL_LOG] > 0)                 \
161                 log(LOG_INFO, __VA_ARGS__);             \
162 } while (0)
163
164 #define CARP_DEBUG(...) do {                            \
165         if (carp_opts[CARPCTL_LOG] > 1)                 \
166                 log(LOG_DEBUG, __VA_ARGS__);            \
167 } while (0)
168
169 static void     carp_hmac_prepare(struct carp_softc *);
170 static void     carp_hmac_generate(struct carp_softc *, uint32_t *,
171                     unsigned char *);
172 static int      carp_hmac_verify(struct carp_softc *, uint32_t *,
173                     unsigned char *);
174 static void     carp_setroute(struct carp_softc *, int);
175 static void     carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
176 static int      carp_clone_create(struct if_clone *, int);
177 static void     carp_clone_destroy(struct ifnet *);
178 static void     carpdetach(struct carp_softc *, int);
179 static int      carp_prepare_ad(struct mbuf *, struct carp_softc *,
180                     struct carp_header *);
181 static void     carp_send_ad_all(void);
182 static void     carp_send_ad(void *);
183 static void     carp_send_ad_locked(struct carp_softc *);
184 static void     carp_send_arp(struct carp_softc *);
185 static void     carp_master_down(void *);
186 static void     carp_master_down_locked(struct carp_softc *);
187 static int      carp_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
188 static int      carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *,
189                     struct rtentry *);
190 static void     carp_start(struct ifnet *);
191 static void     carp_setrun(struct carp_softc *, sa_family_t);
192 static void     carp_set_state(struct carp_softc *, int);
193 static int      carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
194
195 static void     carp_multicast_cleanup(struct carp_softc *);
196 static int      carp_set_addr(struct carp_softc *, struct sockaddr_in *);
197 static int      carp_del_addr(struct carp_softc *, struct sockaddr_in *);
198 static void     carp_carpdev_state_locked(struct carp_if *);
199 static void     carp_sc_state_locked(struct carp_softc *);
200 #ifdef INET6
201 static void     carp_send_na(struct carp_softc *);
202 static int      carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
203 static int      carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
204 static void     carp_multicast6_cleanup(struct carp_softc *);
205 #endif
206
207 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
208
209 static LIST_HEAD(, carp_softc) carpif_list;
210
211 static struct if_clone carp_cloner =
212 IF_CLONE_INITIALIZER(CARP_IFNAME, carp_clone_create, carp_clone_destroy,
213                      0, IF_MAXUNIT);
214
215 static eventhandler_tag carp_ifdetach_event;
216
217 static void
218 carp_hmac_prepare(struct carp_softc *sc)
219 {
220         uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
221         uint8_t vhid = sc->sc_vhid & 0xff;
222         struct ifaddr_container *ifac;
223         int i;
224 #ifdef INET6
225         struct in6_addr in6;
226 #endif
227
228         /* XXX: possible race here */
229
230         /* compute ipad from key */
231         bzero(sc->sc_pad, sizeof(sc->sc_pad));
232         bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
233         for (i = 0; i < sizeof(sc->sc_pad); i++)
234                 sc->sc_pad[i] ^= 0x36;
235
236         /* precompute first part of inner hash */
237         SHA1Init(&sc->sc_sha1);
238         SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
239         SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
240         SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
241         SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
242 #ifdef INET
243         TAILQ_FOREACH(ifac, &SC2IFP(sc)->if_addrheads[mycpuid], ifa_link) {
244                 struct ifaddr *ifa = ifac->ifa;
245
246                 if (ifa->ifa_addr->sa_family == AF_INET)
247                         SHA1Update(&sc->sc_sha1,
248                             (void *)&ifatoia(ifa)->ia_addr.sin_addr.s_addr,
249                             sizeof(struct in_addr));
250         }
251 #endif /* INET */
252 #ifdef INET6
253         TAILQ_FOREACH(ifac, &SC2IFP(sc)->if_addrheads[mycpuid], ifa_link) {
254                 struct ifaddr *ifa = ifac->ifa;
255
256                 if (ifa->ifa_addr->sa_family == AF_INET6) {
257                         in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
258                         in6_clearscope(&in6);
259                         SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6));
260                 }
261         }
262 #endif /* INET6 */
263
264         /* convert ipad to opad */
265         for (i = 0; i < sizeof(sc->sc_pad); i++)
266                 sc->sc_pad[i] ^= 0x36 ^ 0x5c;
267 }
268
269 static void
270 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
271     unsigned char md[20])
272 {
273         SHA1_CTX sha1ctx;
274
275         /* fetch first half of inner hash */
276         bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
277
278         SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
279         SHA1Final(md, &sha1ctx);
280
281         /* outer hash */
282         SHA1Init(&sha1ctx);
283         SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
284         SHA1Update(&sha1ctx, md, 20);
285         SHA1Final(md, &sha1ctx);
286 }
287
288 static int
289 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
290     unsigned char md[20])
291 {
292         unsigned char md2[20];
293
294         carp_hmac_generate(sc, counter, md2);
295         return (bcmp(md, md2, sizeof(md2)));
296 }
297
298 static void
299 carp_setroute(struct carp_softc *sc, int cmd)
300 {
301         struct ifaddr_container *ifac;
302
303         crit_enter();
304         TAILQ_FOREACH(ifac, &SC2IFP(sc)->if_addrheads[mycpuid], ifa_link) {
305                 struct ifaddr *ifa = ifac->ifa;
306
307                 if (ifa->ifa_addr->sa_family == AF_INET &&
308                     sc->sc_carpdev != NULL) {
309                         int count = carp_addrcount(
310                             (struct carp_if *)sc->sc_carpdev->if_carp,
311                             ifatoia(ifa), CARP_COUNT_MASTER);
312
313                         if ((cmd == RTM_ADD && count == 1) ||
314                             (cmd == RTM_DELETE && count == 0))
315                                 rtinit(ifa, cmd, RTF_UP | RTF_HOST);
316                 }
317 #ifdef INET6
318                 if (ifa->ifa_addr->sa_family == AF_INET6) {
319                         if (cmd == RTM_ADD)
320                                 in6_ifaddloop(ifa);
321                         else
322                                 in6_ifremloop(ifa);
323                 }
324 #endif /* INET6 */
325         }
326         crit_exit();
327 }
328
329 static int
330 carp_clone_create(struct if_clone *ifc, int unit)
331 {
332         struct carp_softc *sc;
333         struct ifnet *ifp;
334
335         sc = kmalloc(sizeof(*sc), M_CARP, M_WAITOK | M_ZERO);
336         ifp = sc->sc_ifp = &sc->sc_if;
337
338         sc->sc_flags_backup = 0;
339         sc->sc_suppress = 0;
340         sc->sc_advbase = CARP_DFLTINTV;
341         sc->sc_vhid = -1;       /* required setting */
342         sc->sc_advskew = 0;
343         sc->sc_init_counter = 1;
344         sc->sc_naddrs = sc->sc_naddrs6 = 0;
345
346 #ifdef INET6
347         sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
348 #endif
349
350         callout_init(&sc->sc_ad_tmo);
351         callout_init(&sc->sc_md_tmo);
352         callout_init(&sc->sc_md6_tmo);
353
354         ifp->if_softc = sc;
355         if_initname(ifp, CARP_IFNAME, unit);    
356         ifp->if_mtu = ETHERMTU;
357         ifp->if_flags = IFF_LOOPBACK;
358         ifp->if_ioctl = carp_ioctl;
359         ifp->if_output = carp_looutput;
360         ifp->if_start = carp_start;
361         ifp->if_type = IFT_CARP;
362         ifp->if_snd.ifq_maxlen = ifqmaxlen;
363         ifp->if_hdrlen = 0;
364         if_attach(ifp, NULL);
365         bpfattach(ifp, DLT_NULL, sizeof(u_int));
366
367         crit_enter();
368         LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
369         crit_exit();
370
371         return (0);
372 }
373
374 static void
375 carp_clone_destroy(struct ifnet *ifp)
376 {
377         struct carp_softc *sc = ifp->if_softc;
378
379         carpdetach(sc, 1);
380
381         crit_enter();
382         LIST_REMOVE(sc, sc_next);
383         crit_exit();
384         bpfdetach(ifp);
385         if_detach(ifp);
386         kfree(sc, M_CARP);
387 }
388
389 /*
390  * This function can be called on CARP interface destroy path,
391  * and in case of the removal of the underlying interface as
392  * well. We differentiate these two cases. In the latter case
393  * we do not cleanup our multicast memberships, since they
394  * are already freed.
395  */
396 static void
397 carpdetach(struct carp_softc *sc, int unlock)
398 {
399         struct carp_if *cif;
400
401         callout_stop(&sc->sc_ad_tmo);
402         callout_stop(&sc->sc_md_tmo);
403         callout_stop(&sc->sc_md6_tmo);
404
405         if (sc->sc_suppress)
406                 carp_suppress_preempt--;
407         sc->sc_suppress = 0;
408
409         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
410                 carp_suppress_preempt--;
411         sc->sc_sendad_errors = 0;
412
413         carp_set_state(sc, INIT);
414         SC2IFP(sc)->if_flags &= ~IFF_UP;
415         carp_setrun(sc, 0);
416         if (unlock)
417                 carp_multicast_cleanup(sc);
418 #ifdef INET6
419         carp_multicast6_cleanup(sc);
420 #endif
421
422         if (sc->sc_carpdev != NULL) {
423                 cif = (struct carp_if *)sc->sc_carpdev->if_carp;
424                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
425                 if (!--cif->vhif_nvrs) {
426                         ifpromisc(sc->sc_carpdev, 0);
427                         sc->sc_carpdev->if_carp = NULL;
428                         kfree(cif, M_IFADDR);
429                 }
430                 sc->sc_carpdev = NULL;
431         }
432 }
433
434 /* Detach an interface from the carp. */
435 static void
436 carp_ifdetach(void *arg __unused, struct ifnet *ifp)
437 {
438         struct carp_if *cif = (struct carp_if *)ifp->if_carp;
439         struct carp_softc *sc, *nextsc;
440
441         if (cif == NULL)
442                 return;
443
444         for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
445                 nextsc = TAILQ_NEXT(sc, sc_list);
446                 carpdetach(sc, 0);
447         }
448 }
449
450 /*
451  * process input packet.
452  * we have rearranged checks order compared to the rfc,
453  * but it seems more efficient this way or not possible otherwise.
454  */
455 void
456 carp_input(struct mbuf *m, ...)
457 {
458         struct ip *ip = mtod(m, struct ip *);
459         struct carp_header *ch;
460         int len, iphlen;
461         __va_list ap;
462
463         __va_start(ap, m);
464         iphlen = __va_arg(ap, int);
465         __va_end(ap);
466
467         carpstats.carps_ipackets++;
468
469         if (!carp_opts[CARPCTL_ALLOW]) {
470                 m_freem(m);
471                 return;
472         }
473
474         /* Check if received on a valid carp interface */
475         if (m->m_pkthdr.rcvif->if_carp == NULL) {
476                 carpstats.carps_badif++;
477                 CARP_LOG("carp_input: packet received on non-carp "
478                     "interface: %s\n",
479                     m->m_pkthdr.rcvif->if_xname);
480                 m_freem(m);
481                 return;
482         }
483
484         /* Verify that the IP TTL is CARP_DFLTTL. */
485         if (ip->ip_ttl != CARP_DFLTTL) {
486                 carpstats.carps_badttl++;
487                 CARP_LOG("carp_input: received ttl %d != %d on %s\n",
488                     ip->ip_ttl, CARP_DFLTTL,
489                     m->m_pkthdr.rcvif->if_xname);
490                 m_freem(m);
491                 return;
492         }
493
494         /* Minimal CARP packet size */
495         len = iphlen + sizeof(*ch);
496
497         /*
498          * Verify that the received packet length is
499          * not less than the CARP header
500          */
501         if (m->m_pkthdr.len < len) {
502                 carpstats.carps_badlen++;
503                 CARP_LOG("packet too short %d on %s\n", m->m_pkthdr.len,
504                          m->m_pkthdr.rcvif->if_xname);
505                 m_freem(m);
506                 return;
507         }
508
509         /* Make sure that CARP header is contiguous */
510         if (len > m->m_len) {
511                 m = m_pullup(m, len);
512                 if (m == NULL) {
513                         carpstats.carps_hdrops++;
514                         CARP_LOG("carp_input: m_pullup failed\n");
515                         return;
516                 }
517                 ip = mtod(m, struct ip *);
518         }
519         ch = (struct carp_header *)((uint8_t *)ip + iphlen);
520
521         /* Verify the CARP checksum */
522         if (in_cksum_skip(m, len, iphlen)) {
523                 carpstats.carps_badsum++;
524                 CARP_LOG("carp_input: checksum failed on %s\n",
525                     m->m_pkthdr.rcvif->if_xname);
526                 m_freem(m);
527                 return;
528         }
529         carp_input_c(m, ch, AF_INET);
530 }
531
532 #ifdef INET6
533 int
534 carp6_input(struct mbuf **mp, int *offp, int proto)
535 {
536         struct mbuf *m = *mp;
537         struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
538         struct carp_header *ch;
539         u_int len;
540
541         carpstats.carps_ipackets6++;
542
543         if (!carp_opts[CARPCTL_ALLOW]) {
544                 m_freem(m);
545                 return (IPPROTO_DONE);
546         }
547
548         /* check if received on a valid carp interface */
549         if (m->m_pkthdr.rcvif->if_carp == NULL) {
550                 carpstats.carps_badif++;
551                 CARP_LOG("carp6_input: packet received on non-carp "
552                     "interface: %s\n",
553                     m->m_pkthdr.rcvif->if_xname);
554                 m_freem(m);
555                 return (IPPROTO_DONE);
556         }
557
558         /* verify that the IP TTL is 255 */
559         if (ip6->ip6_hlim != CARP_DFLTTL) {
560                 carpstats.carps_badttl++;
561                 CARP_LOG("carp6_input: received ttl %d != 255 on %s\n",
562                     ip6->ip6_hlim,
563                     m->m_pkthdr.rcvif->if_xname);
564                 m_freem(m);
565                 return (IPPROTO_DONE);
566         }
567
568         /* verify that we have a complete carp packet */
569         len = m->m_len;
570         IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
571         if (ch == NULL) {
572                 carpstats.carps_badlen++;
573                 CARP_LOG("carp6_input: packet size %u too small\n", len);
574                 return (IPPROTO_DONE);
575         }
576
577         /* verify the CARP checksum */
578         if (in_cksum_range(m, 0, *offp, sizeof(*ch))) {
579                 carpstats.carps_badsum++;
580                 CARP_LOG("carp6_input: checksum failed, on %s\n",
581                     m->m_pkthdr.rcvif->if_xname);
582                 m_freem(m);
583                 return (IPPROTO_DONE);
584         }
585
586         carp_input_c(m, ch, AF_INET6);
587         return (IPPROTO_DONE);
588 }
589 #endif /* INET6 */
590
591 static void
592 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
593 {
594         struct ifnet *ifp = m->m_pkthdr.rcvif;
595         struct carp_softc *sc;
596         uint64_t tmp_counter;
597         struct timeval sc_tv, ch_tv;
598
599         /* verify that the VHID is valid on the receiving interface */
600         TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list)
601                 if (sc->sc_vhid == ch->carp_vhid)
602                         break;
603
604         if (!sc || !CARP_IS_RUNNING(SC2IFP(sc))) {
605                 carpstats.carps_badvhid++;
606                 m_freem(m);
607                 return;
608         }
609
610         getmicrotime(&SC2IFP(sc)->if_lastchange);
611         SC2IFP(sc)->if_ipackets++;
612         SC2IFP(sc)->if_ibytes += m->m_pkthdr.len;
613
614         if (SC2IFP(sc)->if_bpf) {
615                 struct ip *ip = mtod(m, struct ip *);
616
617                 /* BPF wants net byte order */
618                 ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2));
619                 ip->ip_off = htons(ip->ip_off);
620                 bpf_mtap(SC2IFP(sc)->if_bpf, m);
621         }
622
623         /* verify the CARP version. */
624         if (ch->carp_version != CARP_VERSION) {
625                 carpstats.carps_badver++;
626                 SC2IFP(sc)->if_ierrors++;
627                 CARP_LOG("%s; invalid version %d\n",
628                     SC2IFP(sc)->if_xname,
629                     ch->carp_version);
630                 m_freem(m);
631                 return;
632         }
633
634         /* verify the hash */
635         if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
636                 carpstats.carps_badauth++;
637                 SC2IFP(sc)->if_ierrors++;
638                 CARP_LOG("%s: incorrect hash\n", SC2IFP(sc)->if_xname);
639                 m_freem(m);
640                 return;
641         }
642
643         tmp_counter = ntohl(ch->carp_counter[0]);
644         tmp_counter = tmp_counter<<32;
645         tmp_counter += ntohl(ch->carp_counter[1]);
646
647         /* XXX Replay protection goes here */
648
649         sc->sc_init_counter = 0;
650         sc->sc_counter = tmp_counter;
651
652         sc_tv.tv_sec = sc->sc_advbase;
653         if (carp_suppress_preempt && sc->sc_advskew <  240)
654                 sc_tv.tv_usec = 240 * 1000000 / 256;
655         else
656                 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
657         ch_tv.tv_sec = ch->carp_advbase;
658         ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
659
660         switch (sc->sc_state) {
661         case INIT:
662                 break;
663
664         case MASTER:
665                 /*
666                  * If we receive an advertisement from a master who's going to
667                  * be more frequent than us, go into BACKUP state.
668                  */
669                 if (timevalcmp(&sc_tv, &ch_tv, >) ||
670                     timevalcmp(&sc_tv, &ch_tv, ==)) {
671                         callout_stop(&sc->sc_ad_tmo);
672                         CARP_DEBUG("%s: MASTER -> BACKUP "
673                            "(more frequent advertisement received)\n",
674                            SC2IFP(sc)->if_xname);
675                         carp_set_state(sc, BACKUP);
676                         carp_setrun(sc, 0);
677                         carp_setroute(sc, RTM_DELETE);
678                 }
679                 break;
680
681         case BACKUP:
682                 /*
683                  * If we're pre-empting masters who advertise slower than us,
684                  * and this one claims to be slower, treat him as down.
685                  */
686                 if (carp_opts[CARPCTL_PREEMPT] &&
687                     timevalcmp(&sc_tv, &ch_tv, <)) {
688                         CARP_DEBUG("%s: BACKUP -> MASTER "
689                             "(preempting a slower master)\n",
690                             SC2IFP(sc)->if_xname);
691                         carp_master_down_locked(sc);
692                         break;
693                 }
694
695                 /*
696                  *  If the master is going to advertise at such a low frequency
697                  *  that he's guaranteed to time out, we'd might as well just
698                  *  treat him as timed out now.
699                  */
700                 sc_tv.tv_sec = sc->sc_advbase * 3;
701                 if (timevalcmp(&sc_tv, &ch_tv, <)) {
702                         CARP_DEBUG("%s: BACKUP -> MASTER "
703                             "(master timed out)\n",
704                             SC2IFP(sc)->if_xname);
705                         carp_master_down_locked(sc);
706                         break;
707                 }
708
709                 /*
710                  * Otherwise, we reset the counter and wait for the next
711                  * advertisement.
712                  */
713                 carp_setrun(sc, af);
714                 break;
715         }
716         m_freem(m);
717 }
718
719 static int
720 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
721 {
722         struct m_tag *mtag;
723         struct ifnet *ifp = SC2IFP(sc);
724
725         if (sc->sc_init_counter) {
726                 /* this could also be seconds since unix epoch */
727                 sc->sc_counter = karc4random();
728                 sc->sc_counter = sc->sc_counter << 32;
729                 sc->sc_counter += karc4random();
730         } else {
731                 sc->sc_counter++;
732         }
733
734         ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
735         ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
736
737         carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
738
739         /* Tag packet for carp_output */
740         mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), MB_DONTWAIT);
741         if (mtag == NULL) {
742                 m_freem(m);
743                 SC2IFP(sc)->if_oerrors++;
744                 return (ENOMEM);
745         }
746         bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *));
747         m_tag_prepend(m, mtag);
748
749         return (0);
750 }
751
752 static void
753 carp_send_ad_all(void)
754 {
755         struct carp_softc *sc;
756
757         LIST_FOREACH(sc, &carpif_list, sc_next) {
758                 if (sc->sc_carpdev == NULL)
759                         continue;
760
761                 if (CARP_IS_RUNNING(SC2IFP(sc)) && sc->sc_state == MASTER)
762                         carp_send_ad_locked(sc);
763         }
764 }
765
766 static void
767 carp_send_ad(void *v)
768 {
769         struct carp_softc *sc = v;
770
771         carp_send_ad_locked(sc);
772 }
773
774 static void
775 carp_send_ad_locked(struct carp_softc *sc)
776 {
777         struct carp_header ch;
778         struct timeval tv;
779         struct carp_header *ch_ptr;
780         struct mbuf *m;
781         int len, advbase, advskew;
782
783         /* bow out if we've lost our UPness or RUNNINGuiness */
784         if (!CARP_IS_RUNNING(SC2IFP(sc))) {
785                 advbase = 255;
786                 advskew = 255;
787         } else {
788                 advbase = sc->sc_advbase;
789                 if (!carp_suppress_preempt || sc->sc_advskew > 240)
790                         advskew = sc->sc_advskew;
791                 else
792                         advskew = 240;
793                 tv.tv_sec = advbase;
794                 tv.tv_usec = advskew * 1000000 / 256;
795         }
796
797         ch.carp_version = CARP_VERSION;
798         ch.carp_type = CARP_ADVERTISEMENT;
799         ch.carp_vhid = sc->sc_vhid;
800         ch.carp_advbase = advbase;
801         ch.carp_advskew = advskew;
802         ch.carp_authlen = 7;    /* XXX DEFINE */
803         ch.carp_pad1 = 0;       /* must be zero */
804         ch.carp_cksum = 0;
805
806 #ifdef INET
807         if (sc->sc_ia) {
808                 struct ip *ip;
809
810                 MGETHDR(m, M_NOWAIT, MT_HEADER);
811                 if (m == NULL) {
812                         SC2IFP(sc)->if_oerrors++;
813                         carpstats.carps_onomem++;
814                         /* XXX maybe less ? */
815                         if (advbase != 255 || advskew != 255)
816                                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
817                                     carp_send_ad, sc);
818                         return;
819                 }
820                 len = sizeof(*ip) + sizeof(ch);
821                 m->m_pkthdr.len = len;
822                 m->m_pkthdr.rcvif = NULL;
823                 m->m_len = len;
824                 MH_ALIGN(m, m->m_len);
825                 m->m_flags |= M_MCAST;
826                 ip = mtod(m, struct ip *);
827                 ip->ip_v = IPVERSION;
828                 ip->ip_hl = sizeof(*ip) >> 2;
829                 ip->ip_tos = IPTOS_LOWDELAY;
830                 ip->ip_len = len;
831                 ip->ip_id = ip_newid();
832                 ip->ip_off = IP_DF;
833                 ip->ip_ttl = CARP_DFLTTL;
834                 ip->ip_p = IPPROTO_CARP;
835                 ip->ip_sum = 0;
836                 ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr;
837                 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
838
839                 ch_ptr = (struct carp_header *)(&ip[1]);
840                 bcopy(&ch, ch_ptr, sizeof(ch));
841                 if (carp_prepare_ad(m, sc, ch_ptr))
842                         return;
843                 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip));
844
845                 getmicrotime(&SC2IFP(sc)->if_lastchange);
846                 SC2IFP(sc)->if_opackets++;
847                 SC2IFP(sc)->if_obytes += len;
848                 carpstats.carps_opackets++;
849
850                 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) {
851                         SC2IFP(sc)->if_oerrors++;
852                         if (sc->sc_sendad_errors < INT_MAX)
853                                 sc->sc_sendad_errors++;
854                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
855                                 carp_suppress_preempt++;
856                                 if (carp_suppress_preempt == 1) {
857                                         carp_send_ad_all();
858                                 }
859                         }
860                         sc->sc_sendad_success = 0;
861                 } else {
862                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
863                                 if (++sc->sc_sendad_success >=
864                                     CARP_SENDAD_MIN_SUCCESS) {
865                                         carp_suppress_preempt--;
866                                         sc->sc_sendad_errors = 0;
867                                 }
868                         } else {
869                                 sc->sc_sendad_errors = 0;
870                         }
871                 }
872         }
873 #endif /* INET */
874 #ifdef INET6
875         if (sc->sc_ia6) {
876                 struct ip6_hdr *ip6;
877
878                 MGETHDR(m, M_NOWAIT, MT_HEADER);
879                 if (m == NULL) {
880                         SC2IFP(sc)->if_oerrors++;
881                         carpstats.carps_onomem++;
882                         /* XXX maybe less ? */
883                         if (advbase != 255 || advskew != 255)
884                                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
885                                     carp_send_ad, sc);
886                         return;
887                 }
888                 len = sizeof(*ip6) + sizeof(ch);
889                 m->m_pkthdr.len = len;
890                 m->m_pkthdr.rcvif = NULL;
891                 m->m_len = len;
892                 MH_ALIGN(m, m->m_len);
893                 m->m_flags |= M_MCAST;
894                 ip6 = mtod(m, struct ip6_hdr *);
895                 bzero(ip6, sizeof(*ip6));
896                 ip6->ip6_vfc |= IPV6_VERSION;
897                 ip6->ip6_hlim = CARP_DFLTTL;
898                 ip6->ip6_nxt = IPPROTO_CARP;
899                 bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src,
900                     sizeof(struct in6_addr));
901                 /* set the multicast destination */
902
903                 ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
904                 ip6->ip6_dst.s6_addr8[15] = 0x12;
905                 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
906                         SC2IFP(sc)->if_oerrors++;
907                         m_freem(m);
908                         CARP_LOG("%s: in6_setscope failed\n", __func__);
909                         return;
910                 }
911
912                 ch_ptr = (struct carp_header *)(&ip6[1]);
913                 bcopy(&ch, ch_ptr, sizeof(ch));
914                 if (carp_prepare_ad(m, sc, ch_ptr))
915                         return;
916                 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip6));
917
918                 getmicrotime(&SC2IFP(sc)->if_lastchange);
919                 SC2IFP(sc)->if_opackets++;
920                 SC2IFP(sc)->if_obytes += len;
921                 carpstats.carps_opackets6++;
922
923                 if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) {
924                         SC2IFP(sc)->if_oerrors++;
925                         if (sc->sc_sendad_errors < INT_MAX)
926                                 sc->sc_sendad_errors++;
927                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
928                                 carp_suppress_preempt++;
929                                 if (carp_suppress_preempt == 1) {
930                                         carp_send_ad_all();
931                                 }
932                         }
933                         sc->sc_sendad_success = 0;
934                 } else {
935                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
936                                 if (++sc->sc_sendad_success >=
937                                     CARP_SENDAD_MIN_SUCCESS) {
938                                         carp_suppress_preempt--;
939                                         sc->sc_sendad_errors = 0;
940                                 }
941                         } else {
942                                 sc->sc_sendad_errors = 0;
943                         }
944                 }
945         }
946 #endif /* INET6 */
947
948         if (advbase != 255 || advskew != 255)
949                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
950                     carp_send_ad, sc);
951 }
952
953 /*
954  * Broadcast a gratuitous ARP request containing
955  * the virtual router MAC address for each IP address
956  * associated with the virtual router.
957  */
958 static void
959 carp_send_arp(struct carp_softc *sc)
960 {
961         struct ifaddr_container *ifac;
962
963         TAILQ_FOREACH(ifac, &SC2IFP(sc)->if_addrheads[mycpuid], ifa_link) {
964                 struct ifaddr *ifa = ifac->ifa;
965
966                 if (ifa->ifa_addr->sa_family != AF_INET)
967                         continue;
968                 arp_ifinit2(sc->sc_carpdev, ifa, IF_LLADDR(sc->sc_ifp));        
969
970                 DELAY(1000);    /* XXX */
971         }
972 }
973
974 #ifdef INET6
975 static void
976 carp_send_na(struct carp_softc *sc)
977 {
978         struct ifaddr_container *ifac;
979         struct in6_addr *in6;
980         static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
981
982         TAILQ_FOREACH(ifac, &SC2IFP(sc)->if_addrheads[mycpuid], ifa_link) {
983                 struct ifaddr *ifa = ifac->ifa;
984
985                 if (ifa->ifa_addr->sa_family != AF_INET6)
986                         continue;
987
988                 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
989                 nd6_na_output(sc->sc_carpdev, &mcast, in6,
990                     ND_NA_FLAG_OVERRIDE, 1, NULL);
991                 DELAY(1000);    /* XXX */
992         }
993 }
994 #endif /* INET6 */
995
996 static int
997 carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type)
998 {
999         struct carp_softc *vh;
1000         int count = 0;
1001
1002         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1003                 if ((type == CARP_COUNT_RUNNING &&
1004                      CARP_IS_RUNNING(SC2IFP(vh))) ||
1005                     (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) {
1006                         struct ifaddr_container *ifac;
1007
1008                         TAILQ_FOREACH(ifac, &SC2IFP(vh)->if_addrheads[mycpuid],
1009                                       ifa_link) {
1010                                 struct ifaddr *ifa = ifac->ifa;
1011
1012                                 if (ifa->ifa_addr->sa_family == AF_INET &&
1013                                     ia->ia_addr.sin_addr.s_addr ==
1014                                     ifatoia(ifa)->ia_addr.sin_addr.s_addr)
1015                                         count++;
1016                         }
1017                 }
1018         }
1019         return (count);
1020 }
1021
1022 int
1023 carp_iamatch(void *v, struct in_ifaddr *ia,
1024     struct in_addr *isaddr, uint8_t **enaddr)
1025 {
1026         struct carp_if *cif = v;
1027         struct carp_softc *vh;
1028         int index, count = 0;
1029
1030         if (carp_opts[CARPCTL_ARPBALANCE]) {
1031                 /*
1032                  * XXX proof of concept implementation.
1033                  * We use the source ip to decide which virtual host should
1034                  * handle the request. If we're master of that virtual host,
1035                  * then we respond, otherwise, just drop the arp packet on
1036                  * the floor.
1037                  */
1038                 count = carp_addrcount(cif, ia, CARP_COUNT_RUNNING);
1039                 if (count == 0) {
1040                         /* should never reach this */
1041                         return (0);
1042                 }
1043
1044                 /* this should be a hash, like pf_hash() */
1045                 index = ntohl(isaddr->s_addr) % count;
1046                 count = 0;
1047
1048                 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1049                         if (CARP_IS_RUNNING(SC2IFP(vh))) {
1050                                 struct ifaddr_container *ifac;
1051
1052                                 TAILQ_FOREACH(ifac,
1053                                 &SC2IFP(vh)->if_addrheads[mycpuid], ifa_link) {
1054                                         struct ifaddr *ifa = ifac->ifa;
1055
1056                                         if (ifa->ifa_addr->sa_family ==
1057                                             AF_INET &&
1058                                             ia->ia_addr.sin_addr.s_addr ==
1059                                             ifatoia(ifa)->ia_addr.sin_addr.s_addr) {
1060                                                 if (count == index) {
1061                                                         if (vh->sc_state == MASTER) {
1062                                                                 *enaddr = IF_LLADDR(vh->sc_ifp);
1063                                                                 return (1);
1064                                                         } else {
1065                                                                 return (0);
1066                                                         }
1067                                                 }
1068                                                 count++;
1069                                         }
1070                                 }
1071                         }
1072                 }
1073         } else {
1074                 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1075                         if (CARP_IS_RUNNING(SC2IFP(vh)) &&
1076                             vh->sc_state == MASTER) {
1077                                 *enaddr = IF_LLADDR(vh->sc_ifp);
1078                                 return (1);
1079                         }
1080                 }
1081         }
1082         return(0);
1083 }
1084
1085 #ifdef INET6
1086 struct ifaddr *
1087 carp_iamatch6(void *v, struct in6_addr *taddr)
1088 {
1089         struct carp_if *cif = v;
1090         struct carp_softc *vh;
1091
1092         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1093                 struct ifaddr_container *ifac;
1094
1095                 TAILQ_FOREACH(ifac, &SC2IFP(vh)->if_addrheads[mycpuid],
1096                               ifa_link) {
1097                         struct ifaddr *ifa = ifac->ifa;
1098
1099                         if (IN6_ARE_ADDR_EQUAL(taddr,
1100                             &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1101                             CARP_IS_RUNNING(SC2IFP(vh)) &&
1102                             vh->sc_state == MASTER) {
1103                                 return (ifa);
1104                         }
1105                 }
1106         }
1107         return (NULL);
1108 }
1109
1110 void *
1111 carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr)
1112 {
1113         struct m_tag *mtag;
1114         struct carp_if *cif = v;
1115         struct carp_softc *sc;
1116
1117         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
1118                 struct ifaddr_container *ifac;
1119
1120                 TAILQ_FOREACH(ifac, &SC2IFP(sc)->if_addrheads[mycpuid],
1121                               ifa_link) {
1122                         struct ifaddr *ifa = ifac->ifa;
1123
1124                         if (IN6_ARE_ADDR_EQUAL(taddr,
1125                             &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1126                             CARP_IS_RUNNING(SC2IFP(sc))) {
1127                                 struct ifnet *ifp = SC2IFP(sc);
1128                                 mtag = m_tag_get(PACKET_TAG_CARP,
1129                                     sizeof(struct ifnet *), MB_DONTWAIT);
1130                                 if (mtag == NULL) {
1131                                         /* better a bit than nothing */
1132                                         return (IF_LLADDR(sc->sc_ifp));
1133                                 }
1134                                 bcopy(&ifp, (caddr_t)(mtag + 1),
1135                                     sizeof(struct ifnet *));
1136                                 m_tag_prepend(m, mtag);
1137
1138                                 return (IF_LLADDR(sc->sc_ifp));
1139                         }
1140                 }
1141         }
1142         return (NULL);
1143 }
1144 #endif
1145
1146 struct ifnet *
1147 carp_forus(void *v, void *dhost)
1148 {
1149         struct carp_if *cif = v;
1150         struct carp_softc *vh;
1151         uint8_t *ena = dhost;
1152         
1153         /**
1154          * XXX: See here for check on MAC adr is not for virtual use
1155          *
1156          **/
1157
1158         if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
1159                 return (NULL);
1160
1161         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1162                 if (CARP_IS_RUNNING(SC2IFP(vh)) && vh->sc_state == MASTER &&
1163                     !bcmp(dhost, IF_LLADDR(vh->sc_ifp), ETHER_ADDR_LEN)) {
1164                         return (SC2IFP(vh));
1165                 }
1166         }
1167         return (NULL);
1168 }
1169
1170 static void
1171 carp_master_down(void *v)
1172 {
1173         struct carp_softc *sc = v;
1174
1175         lwkt_serialize_enter(sc->sc_ifp->if_serializer);
1176         carp_master_down_locked(sc);
1177         lwkt_serialize_exit(sc->sc_ifp->if_serializer);
1178 }
1179
1180 static void
1181 carp_master_down_locked(struct carp_softc *sc)
1182 {
1183         switch (sc->sc_state) {
1184         case INIT:
1185                 kprintf("%s: master_down event in INIT state\n",
1186                     SC2IFP(sc)->if_xname);
1187                 break;
1188
1189         case MASTER:
1190                 break;
1191
1192         case BACKUP:
1193                 carp_set_state(sc, MASTER);
1194                 carp_send_ad_locked(sc);
1195                 carp_send_arp(sc);
1196 #ifdef INET6
1197                 carp_send_na(sc);
1198 #endif /* INET6 */
1199                 carp_setrun(sc, 0);
1200                 carp_setroute(sc, RTM_ADD);
1201                 break;
1202         }
1203 }
1204
1205 /*
1206  * When in backup state, af indicates whether to reset the master down timer
1207  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1208  */
1209 static void
1210 carp_setrun(struct carp_softc *sc, sa_family_t af)
1211 {
1212         struct timeval tv;
1213
1214         if (sc->sc_carpdev == NULL) {
1215                 SC2IFP(sc)->if_flags &= ~IFF_RUNNING;
1216                 carp_set_state(sc, INIT);
1217                 return;
1218         }
1219
1220         if (SC2IFP(sc)->if_flags & IFF_UP &&
1221             sc->sc_vhid > 0 && (sc->sc_naddrs || sc->sc_naddrs6)) {
1222                 SC2IFP(sc)->if_flags |= IFF_RUNNING;
1223         } else {
1224                 SC2IFP(sc)->if_flags &= ~IFF_RUNNING;
1225                 carp_setroute(sc, RTM_DELETE);
1226                 return;
1227         }
1228
1229         switch (sc->sc_state) {
1230         case INIT:
1231                 if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) {
1232                         carp_send_ad_locked(sc);
1233                         carp_send_arp(sc);
1234 #ifdef INET6
1235                         carp_send_na(sc);
1236 #endif /* INET6 */
1237                         CARP_DEBUG("%s: INIT -> MASTER (preempting)\n",
1238                                    SC2IFP(sc)->if_xname);
1239                         carp_set_state(sc, MASTER);
1240                         carp_setroute(sc, RTM_ADD);
1241                 } else {
1242                         CARP_DEBUG("%s: INIT -> BACKUP\n",
1243                                    SC2IFP(sc)->if_xname);
1244                         carp_set_state(sc, BACKUP);
1245                         carp_setroute(sc, RTM_DELETE);
1246                         carp_setrun(sc, 0);
1247                 }
1248                 break;
1249
1250         case BACKUP:
1251                 callout_stop(&sc->sc_ad_tmo);
1252                 tv.tv_sec = 3 * sc->sc_advbase;
1253                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1254                 switch (af) {
1255 #ifdef INET
1256                 case AF_INET:
1257                         callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1258                             carp_master_down, sc);
1259                         break;
1260 #endif /* INET */
1261 #ifdef INET6
1262                 case AF_INET6:
1263                         callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1264                             carp_master_down, sc);
1265                         break;
1266 #endif /* INET6 */
1267                 default:
1268                         if (sc->sc_naddrs)
1269                                 callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1270                                     carp_master_down, sc);
1271                         if (sc->sc_naddrs6)
1272                                 callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1273                                     carp_master_down, sc);
1274                         break;
1275                 }
1276                 break;
1277
1278         case MASTER:
1279                 tv.tv_sec = sc->sc_advbase;
1280                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1281                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1282                     carp_send_ad, sc);
1283                 break;
1284         }
1285 }
1286
1287 static void
1288 carp_multicast_cleanup(struct carp_softc *sc)
1289 {
1290         struct ip_moptions *imo = &sc->sc_imo;
1291         uint16_t n = imo->imo_num_memberships;
1292
1293         /* Clean up our own multicast memberships */
1294         while (n-- > 0) {
1295                 if (imo->imo_membership[n] != NULL) {
1296                         in_delmulti(imo->imo_membership[n]);
1297                         imo->imo_membership[n] = NULL;
1298                 }
1299         }
1300         imo->imo_num_memberships = 0;
1301         imo->imo_multicast_ifp = NULL;
1302 }
1303
1304 #ifdef INET6
1305 static void
1306 carp_multicast6_cleanup(struct carp_softc *sc)
1307 {
1308         struct ip6_moptions *im6o = &sc->sc_im6o;
1309
1310         while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1311                 struct in6_multi_mship *imm =
1312                     LIST_FIRST(&im6o->im6o_memberships);
1313
1314                 LIST_REMOVE(imm, i6mm_chain);
1315                 in6_leavegroup(imm);
1316         }
1317         im6o->im6o_multicast_ifp = NULL;
1318 }
1319 #endif
1320
1321 static int
1322 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
1323 {
1324         struct ifnet *ifp;
1325         struct carp_if *cif;
1326         struct in_ifaddr *ia, *ia_if;
1327         struct in_ifaddr_container *iac;
1328         struct ip_moptions *imo = &sc->sc_imo;
1329         struct in_addr addr;
1330         u_long iaddr = htonl(sin->sin_addr.s_addr);
1331         int own, error;
1332         
1333         if (sin->sin_addr.s_addr == 0) {
1334                 if (!(SC2IFP(sc)->if_flags & IFF_UP))
1335                         carp_set_state(sc, INIT);
1336                 if (sc->sc_naddrs)
1337                         SC2IFP(sc)->if_flags |= IFF_UP;
1338                 carp_setrun(sc, 0);
1339                 return (0);
1340         }
1341         /* we have to do it by hands to check we won't match on us */
1342         ia_if = NULL; own = 0;
1343         TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
1344                 ia = iac->ia;
1345
1346                 /* and, yeah, we need a multicast-capable iface too */
1347                 if (ia->ia_ifp != SC2IFP(sc) &&
1348                     (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
1349                     (iaddr & ia->ia_subnetmask) == ia->ia_subnet) {
1350                         if (!ia_if)
1351                                 ia_if = ia;
1352                         if (sin->sin_addr.s_addr ==
1353                             ia->ia_addr.sin_addr.s_addr)
1354                                 own++;
1355                 }
1356         }
1357
1358         if (!ia_if)
1359                 return (EADDRNOTAVAIL);
1360
1361         ia = ia_if;
1362         ifp = ia->ia_ifp;
1363
1364         if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
1365             (imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp))
1366                 return (EADDRNOTAVAIL);
1367
1368         if (imo->imo_num_memberships == 0) {
1369                 addr.s_addr = htonl(INADDR_CARP_GROUP);
1370                 if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) == NULL)
1371                         return (ENOBUFS);
1372                 imo->imo_num_memberships++;
1373                 imo->imo_multicast_ifp = ifp;
1374                 imo->imo_multicast_ttl = CARP_DFLTTL;
1375                 imo->imo_multicast_loop = 0;
1376         }
1377
1378         if (!ifp->if_carp) {
1379                 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
1380                 if ((error = ifpromisc(ifp, 1))) {
1381                         kfree(cif, M_CARP);
1382                         goto cleanup;
1383                 }
1384                 
1385                 cif->vhif_ifp = ifp;
1386                 TAILQ_INIT(&cif->vhif_vrs);
1387                 ifp->if_carp = cif;
1388         } else {
1389                 struct carp_softc *vr;
1390
1391                 cif = (struct carp_if *)ifp->if_carp;
1392                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1393                         if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
1394                                 error = EINVAL;
1395                                 goto cleanup;
1396                         }
1397                 }
1398         }
1399         sc->sc_ia = ia;
1400         sc->sc_carpdev = ifp;
1401
1402         { /* XXX prevent endless loop if already in queue */
1403         struct carp_softc *vr, *after = NULL;
1404         int myself = 0;
1405         cif = (struct carp_if *)ifp->if_carp;
1406
1407         TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1408                 if (vr == sc)
1409                         myself = 1;
1410                 if (vr->sc_vhid < sc->sc_vhid)
1411                         after = vr;
1412         }
1413
1414         if (!myself) {
1415                 /* We're trying to keep things in order */
1416                 if (after == NULL)
1417                         TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
1418                 else
1419                         TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
1420                 cif->vhif_nvrs++;
1421         }
1422         }
1423
1424         sc->sc_naddrs++;
1425         SC2IFP(sc)->if_flags |= IFF_UP;
1426         if (own)
1427                 sc->sc_advskew = 0;
1428
1429         carp_sc_state_locked(sc);
1430         carp_setrun(sc, 0);
1431
1432         return (0);
1433
1434 cleanup:
1435         in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1436         return (error);
1437 }
1438
1439 static int
1440 carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin)
1441 {
1442         int error = 0;
1443
1444         if (!--sc->sc_naddrs) {
1445                 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
1446                 struct ip_moptions *imo = &sc->sc_imo;
1447
1448                 callout_stop(&sc->sc_ad_tmo);
1449                 SC2IFP(sc)->if_flags &= ~(IFF_UP | IFF_RUNNING);
1450                 sc->sc_vhid = -1;
1451                 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1452                 imo->imo_multicast_ifp = NULL;
1453                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
1454                 if (!--cif->vhif_nvrs) {
1455                         sc->sc_carpdev->if_carp = NULL;
1456                         kfree(cif, M_IFADDR);
1457                 }
1458         }
1459         return (error);
1460 }
1461
1462 #ifdef INET6
1463 static int
1464 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
1465 {
1466         struct ifnet *ifp;
1467         struct carp_if *cif;
1468         struct in6_ifaddr *ia, *ia_if;
1469         struct ip6_moptions *im6o = &sc->sc_im6o;
1470         struct in6_multi_mship *imm;
1471         struct in6_addr in6;
1472         int own, error;
1473
1474         if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
1475                 if (!(SC2IFP(sc)->if_flags & IFF_UP))
1476                         carp_set_state(sc, INIT);
1477                 if (sc->sc_naddrs6)
1478                         SC2IFP(sc)->if_flags |= IFF_UP;
1479                 carp_setrun(sc, 0);
1480                 return (0);
1481         }
1482
1483         /* we have to do it by hands to check we won't match on us */
1484         ia_if = NULL; own = 0;
1485         for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
1486                 int i;
1487
1488                 for (i = 0; i < 4; i++) {
1489                         if ((sin6->sin6_addr.s6_addr32[i] &
1490                             ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
1491                             (ia->ia_addr.sin6_addr.s6_addr32[i] &
1492                             ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
1493                                 break;
1494                 }
1495                 /* and, yeah, we need a multicast-capable iface too */
1496                 if (ia->ia_ifp != SC2IFP(sc) &&
1497                     (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
1498                     (i == 4)) {
1499                         if (!ia_if)
1500                                 ia_if = ia;
1501                         if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
1502                             &ia->ia_addr.sin6_addr))
1503                                 own++;
1504                 }
1505         }
1506
1507         if (!ia_if)
1508                 return (EADDRNOTAVAIL);
1509         ia = ia_if;
1510         ifp = ia->ia_ifp;
1511
1512         if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
1513             (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp))
1514                 return (EADDRNOTAVAIL);
1515
1516         if (!sc->sc_naddrs6) {
1517                 im6o->im6o_multicast_ifp = ifp;
1518
1519                 /* join CARP multicast address */
1520                 bzero(&in6, sizeof(in6));
1521                 in6.s6_addr16[0] = htons(0xff02);
1522                 in6.s6_addr8[15] = 0x12;
1523                 if (in6_setscope(&in6, ifp, NULL) != 0)
1524                         goto cleanup;
1525                 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
1526                         goto cleanup;
1527                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
1528
1529                 /* join solicited multicast address */
1530                 bzero(&in6, sizeof(in6));
1531                 in6.s6_addr16[0] = htons(0xff02);
1532                 in6.s6_addr32[1] = 0;
1533                 in6.s6_addr32[2] = htonl(1);
1534                 in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3];
1535                 in6.s6_addr8[12] = 0xff;
1536                 if (in6_setscope(&in6, ifp, NULL) != 0)
1537                         goto cleanup;
1538                 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
1539                         goto cleanup;
1540                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
1541         }
1542
1543         if (!ifp->if_carp) {
1544                 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
1545                 if ((error = ifpromisc(ifp, 1))) {
1546                         kfree(cif, M_CARP);
1547                         goto cleanup;
1548                 }
1549
1550                 cif->vhif_ifp = ifp;
1551                 TAILQ_INIT(&cif->vhif_vrs);
1552                 ifp->if_carp = cif;
1553         } else {
1554                 struct carp_softc *vr;
1555
1556                 cif = (struct carp_if *)ifp->if_carp;
1557                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1558                         if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
1559                                 error = EINVAL;
1560                                 goto cleanup;
1561                         }
1562                 }
1563         }
1564         sc->sc_ia6 = ia;
1565         sc->sc_carpdev = ifp;
1566
1567         { /* XXX prevent endless loop if already in queue */
1568         struct carp_softc *vr, *after = NULL;
1569         int myself = 0;
1570         cif = (struct carp_if *)ifp->if_carp;
1571
1572         TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1573                 if (vr == sc)
1574                         myself = 1;
1575                 if (vr->sc_vhid < sc->sc_vhid)
1576                         after = vr;
1577         }
1578
1579         if (!myself) {
1580                 /* We're trying to keep things in order */
1581                 if (after == NULL)
1582                         TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
1583                 else
1584                         TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
1585                 cif->vhif_nvrs++;
1586         }
1587         }
1588
1589         sc->sc_naddrs6++;
1590         SC2IFP(sc)->if_flags |= IFF_UP;
1591         if (own)
1592                 sc->sc_advskew = 0;
1593         carp_sc_state_locked(sc);
1594         carp_setrun(sc, 0);
1595
1596         return (0);
1597
1598 cleanup:
1599         /* clean up multicast memberships */
1600         if (!sc->sc_naddrs6) {
1601                 while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1602                         imm = LIST_FIRST(&im6o->im6o_memberships);
1603                         LIST_REMOVE(imm, i6mm_chain);
1604                         in6_leavegroup(imm);
1605                 }
1606         }
1607         return (error);
1608 }
1609
1610 static int
1611 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
1612 {
1613         int error = 0;
1614
1615         if (!--sc->sc_naddrs6) {
1616                 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
1617                 struct ip6_moptions *im6o = &sc->sc_im6o;
1618
1619                 callout_stop(&sc->sc_ad_tmo);
1620                 SC2IFP(sc)->if_flags &= ~(IFF_UP | IFF_RUNNING);
1621                 sc->sc_vhid = -1;
1622                 while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1623                         struct in6_multi_mship *imm =
1624                             LIST_FIRST(&im6o->im6o_memberships);
1625
1626                         LIST_REMOVE(imm, i6mm_chain);
1627                         in6_leavegroup(imm);
1628                 }
1629                 im6o->im6o_multicast_ifp = NULL;
1630                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
1631                 if (!--cif->vhif_nvrs) {
1632                         sc->sc_carpdev->if_carp = NULL;
1633                         kfree(cif, M_IFADDR);
1634                 }
1635         }
1636         return (error);
1637 }
1638 #endif /* INET6 */
1639
1640 static int
1641 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr)
1642 {
1643         struct carp_softc *sc = ifp->if_softc, *vr;
1644         struct carpreq carpr;
1645         struct ifaddr *ifa;
1646         struct ifreq *ifr;
1647         struct ifaliasreq *ifra;
1648         int error = 0;
1649
1650         ifa = (struct ifaddr *)addr;
1651         ifra = (struct ifaliasreq *)addr;
1652         ifr = (struct ifreq *)addr;
1653
1654         switch (cmd) {
1655         case SIOCSIFADDR:
1656                 switch (ifa->ifa_addr->sa_family) {
1657 #ifdef INET
1658                 case AF_INET:
1659                         SC2IFP(sc)->if_flags |= IFF_UP;
1660                         bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
1661                             sizeof(struct sockaddr));
1662                         error = carp_set_addr(sc, satosin(ifa->ifa_addr));
1663                         break;
1664 #endif /* INET */
1665 #ifdef INET6
1666                 case AF_INET6:
1667                         SC2IFP(sc)->if_flags |= IFF_UP;
1668                         error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
1669                         break;
1670 #endif /* INET6 */
1671                 default:
1672                         error = EAFNOSUPPORT;
1673                         break;
1674                 }
1675                 break;
1676
1677         case SIOCAIFADDR:
1678                 switch (ifa->ifa_addr->sa_family) {
1679 #ifdef INET
1680                 case AF_INET:
1681                         SC2IFP(sc)->if_flags |= IFF_UP;
1682                         bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
1683                             sizeof(struct sockaddr));
1684                         error = carp_set_addr(sc, satosin(&ifra->ifra_addr));
1685                         break;
1686 #endif /* INET */
1687 #ifdef INET6
1688                 case AF_INET6:
1689                         SC2IFP(sc)->if_flags |= IFF_UP;
1690                         error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr));
1691                         break;
1692 #endif /* INET6 */
1693                 default:
1694                         error = EAFNOSUPPORT;
1695                         break;
1696                 }
1697                 break;
1698
1699         case SIOCDIFADDR:
1700                 switch (ifa->ifa_addr->sa_family) {
1701 #ifdef INET
1702                 case AF_INET:
1703                         error = carp_del_addr(sc, satosin(&ifra->ifra_addr));
1704                         break;
1705 #endif /* INET */
1706 #ifdef INET6
1707                 case AF_INET6:
1708                         error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr));
1709                         break;
1710 #endif /* INET6 */
1711                 default:
1712                         error = EAFNOSUPPORT;
1713                         break;
1714                 }
1715                 break;
1716
1717         case SIOCSIFFLAGS:
1718                 if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) {
1719                         callout_stop(&sc->sc_ad_tmo);
1720                         callout_stop(&sc->sc_md_tmo);
1721                         callout_stop(&sc->sc_md6_tmo);
1722                         if (sc->sc_state == MASTER)
1723                                 carp_send_ad_locked(sc);
1724                         carp_set_state(sc, INIT);
1725                         carp_setrun(sc, 0);
1726                 } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) {
1727                         SC2IFP(sc)->if_flags |= IFF_UP;
1728                         carp_setrun(sc, 0);
1729                 }
1730                 break;
1731
1732         case SIOCSVH:
1733                 error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY);
1734                 if (error)
1735                         break;
1736                 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
1737                         break;
1738                 error = 1;
1739                 if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) {
1740                         switch (carpr.carpr_state) {
1741                         case BACKUP:
1742                                 callout_stop(&sc->sc_ad_tmo);
1743                                 carp_set_state(sc, BACKUP);
1744                                 carp_setrun(sc, 0);
1745                                 carp_setroute(sc, RTM_DELETE);
1746                                 break;
1747
1748                         case MASTER:
1749                                 carp_master_down_locked(sc);
1750                                 break;
1751
1752                         default:
1753                                 break;
1754                         }
1755                 }
1756                 if (carpr.carpr_vhid > 0) {
1757                         if (carpr.carpr_vhid > 255) {
1758                                 error = EINVAL;
1759                                 break;
1760                         }
1761                         if (sc->sc_carpdev) {
1762                                 struct carp_if *cif;
1763                                 cif = (struct carp_if *)sc->sc_carpdev->if_carp;
1764                                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1765                                         if (vr != sc &&
1766                                             vr->sc_vhid == carpr.carpr_vhid)
1767                                                 return EEXIST;
1768                                 }
1769                         }
1770                         sc->sc_vhid = carpr.carpr_vhid;
1771                         IF_LLADDR(sc->sc_ifp)[0] = 0;
1772                         IF_LLADDR(sc->sc_ifp)[1] = 0;
1773                         IF_LLADDR(sc->sc_ifp)[2] = 0x5e;
1774                         IF_LLADDR(sc->sc_ifp)[3] = 0;
1775                         IF_LLADDR(sc->sc_ifp)[4] = 1;
1776                         IF_LLADDR(sc->sc_ifp)[5] = sc->sc_vhid;
1777                         error--;
1778                 }
1779                 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) {
1780                         if (carpr.carpr_advskew >= 255) {
1781                                 error = EINVAL;
1782                                 break;
1783                         }
1784                         if (carpr.carpr_advbase > 255) {
1785                                 error = EINVAL;
1786                                 break;
1787                         }
1788                         sc->sc_advbase = carpr.carpr_advbase;
1789                         sc->sc_advskew = carpr.carpr_advskew;
1790                         error--;
1791                 }
1792                 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
1793                 if (error > 0) {
1794                         error = EINVAL;
1795                 } else {
1796                         error = 0;
1797                         carp_setrun(sc, 0);
1798                 }
1799                 break;
1800
1801         case SIOCGVH:
1802                 bzero(&carpr, sizeof(carpr));
1803                 carpr.carpr_state = sc->sc_state;
1804                 carpr.carpr_vhid = sc->sc_vhid;
1805                 carpr.carpr_advbase = sc->sc_advbase;
1806                 carpr.carpr_advskew = sc->sc_advskew;
1807                 error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY);
1808                 if (error == 0) {
1809                         bcopy(sc->sc_key, carpr.carpr_key,
1810                             sizeof(carpr.carpr_key));
1811                 }
1812                 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
1813                 break;
1814
1815         default:
1816                 error = EINVAL;
1817         }
1818         carp_hmac_prepare(sc);
1819         return (error);
1820 }
1821
1822 /*
1823  * XXX: this is looutput. We should eventually use it from there.
1824  */
1825 static int
1826 carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
1827     struct rtentry *rt)
1828 {
1829         uint32_t af;
1830
1831         M_ASSERTPKTHDR(m); /* check if we have the packet header */
1832
1833         if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
1834                 m_freem(m);
1835                 return (rt->rt_flags & RTF_BLACKHOLE ? 0 :
1836                         rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
1837         }
1838
1839         ifp->if_opackets++;
1840         ifp->if_obytes += m->m_pkthdr.len;
1841
1842         /* BPF writes need to be handled specially. */
1843         if (dst->sa_family == AF_UNSPEC) {
1844                 bcopy(dst->sa_data, &af, sizeof(af));
1845                 dst->sa_family = af;
1846         }
1847
1848 #if 1   /* XXX */
1849         switch (dst->sa_family) {
1850         case AF_INET:
1851         case AF_INET6:
1852         case AF_IPX:
1853         case AF_APPLETALK:
1854                 break;
1855
1856         default:
1857                 m_freem(m);
1858                 return (EAFNOSUPPORT);
1859         }
1860 #endif
1861         return (if_simloop(ifp, m, dst->sa_family, 0));
1862 }
1863
1864 /*
1865  * Start output on carp interface. This function should never be called.
1866  */
1867 static void
1868 carp_start(struct ifnet *ifp)
1869 {
1870 #ifdef DEBUG
1871         kprintf("%s: start called\n", ifp->if_xname);
1872 #endif
1873 }
1874
1875 int
1876 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
1877     struct rtentry *rt)
1878 {
1879         struct m_tag *mtag;
1880         struct carp_softc *sc;
1881         struct ifnet *carp_ifp;
1882         struct ether_header *eh;
1883
1884         if (!sa)
1885                 return (0);
1886
1887         switch (sa->sa_family) {
1888 #ifdef INET
1889         case AF_INET:
1890                 break;
1891 #endif /* INET */
1892 #ifdef INET6
1893         case AF_INET6:
1894                 break;
1895 #endif /* INET6 */
1896         default:
1897                 return (0);
1898         }
1899
1900         mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
1901         if (mtag == NULL)
1902                 return (0);
1903
1904         bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *));
1905         sc = carp_ifp->if_softc;
1906
1907         /* Set the source MAC address to Virtual Router MAC Address */
1908         switch (ifp->if_type) {
1909         case IFT_ETHER:
1910         case IFT_L2VLAN:
1911                 eh = mtod(m, struct ether_header *);
1912                 eh->ether_shost[0] = 0;
1913                 eh->ether_shost[1] = 0;
1914                 eh->ether_shost[2] = 0x5e;
1915                 eh->ether_shost[3] = 0;
1916                 eh->ether_shost[4] = 1;
1917                 eh->ether_shost[5] = sc->sc_vhid;
1918                 break;
1919
1920         default:
1921                 if_printf(ifp, "carp is not supported for this "
1922                           "interface type\n");
1923                 return (EOPNOTSUPP);
1924         }
1925         return (0);
1926 }
1927
1928 static void
1929 carp_set_state(struct carp_softc *sc, int state)
1930 {
1931         if (sc->sc_state == state)
1932                 return;
1933
1934         sc->sc_state = state;
1935         switch (state) {
1936         case BACKUP:
1937                 SC2IFP(sc)->if_link_state = LINK_STATE_DOWN;
1938                 break;
1939
1940         case MASTER:
1941                 SC2IFP(sc)->if_link_state = LINK_STATE_UP;
1942                 break;
1943
1944         default:
1945                 SC2IFP(sc)->if_link_state = LINK_STATE_UNKNOWN;
1946                 break;
1947         }
1948         rt_ifmsg(SC2IFP(sc));
1949 }
1950
1951 void
1952 carp_carpdev_state(void *v)
1953 {
1954         struct carp_if *cif = v;
1955
1956         carp_carpdev_state_locked(cif);
1957 }
1958
1959 static void
1960 carp_carpdev_state_locked(struct carp_if *cif)
1961 {
1962         struct carp_softc *sc;
1963
1964         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list)
1965                 carp_sc_state_locked(sc);
1966 }
1967
1968 static void
1969 carp_sc_state_locked(struct carp_softc *sc)
1970 {
1971         if (!(sc->sc_carpdev->if_flags & IFF_UP)) {
1972                 sc->sc_flags_backup = SC2IFP(sc)->if_flags;
1973                 SC2IFP(sc)->if_flags &= ~(IFF_UP | IFF_RUNNING);
1974                 callout_stop(&sc->sc_ad_tmo);
1975                 callout_stop(&sc->sc_md_tmo);
1976                 callout_stop(&sc->sc_md6_tmo);
1977                 carp_set_state(sc, INIT);
1978                 carp_setrun(sc, 0);
1979                 if (!sc->sc_suppress) {
1980                         carp_suppress_preempt++;
1981                         if (carp_suppress_preempt == 1)
1982                                 carp_send_ad_all();
1983                 }
1984                 sc->sc_suppress = 1;
1985         } else {
1986                 SC2IFP(sc)->if_flags |= sc->sc_flags_backup;
1987                 carp_set_state(sc, INIT);
1988                 carp_setrun(sc, 0);
1989                 if (sc->sc_suppress)
1990                         carp_suppress_preempt--;
1991                 sc->sc_suppress = 0;
1992         }
1993 }
1994
1995 static int
1996 carp_modevent(module_t mod, int type, void *data)
1997 {
1998         switch (type) {
1999         case MOD_LOAD:
2000                 LIST_INIT(&carpif_list);
2001                 carp_ifdetach_event =
2002                 EVENTHANDLER_REGISTER(ifnet_detach_event, carp_ifdetach, NULL,
2003                                       EVENTHANDLER_PRI_ANY);
2004                 if_clone_attach(&carp_cloner);
2005                 break;
2006
2007         case MOD_UNLOAD:
2008                 EVENTHANDLER_DEREGISTER(ifnet_detach_event,
2009                                         carp_ifdetach_event);
2010                 if_clone_detach(&carp_cloner);
2011                 break;
2012
2013         default:
2014                 return (EINVAL);
2015         }
2016         return (0);
2017 }
2018
2019 static moduledata_t carp_mod = {
2020         "carp",
2021         carp_modevent,
2022         0
2023 };
2024 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);