Correct the reversed comparison logic
[dragonfly.git] / sys / netinet / ip_carp.c
1 /*
2  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
3  * Copyright (c) 2003 Ryan McBride. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
18  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
24  * THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 /*
27  * $FreeBSD: src/sys/netinet/ip_carp.c,v 1.48 2007/02/02 09:39:09 glebius Exp $
28  * $DragonFly: src/sys/netinet/ip_carp.c,v 1.10 2008/07/27 10:06:57 sephe Exp $
29  */
30
31 #include "opt_carp.h"
32 #include "opt_inet.h"
33 #include "opt_inet6.h"
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/in_cksum.h>
39 #include <sys/limits.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/time.h>
43 #include <sys/proc.h>
44 #include <sys/sockio.h>
45 #include <sys/socket.h>
46 #include <sys/sysctl.h>
47 #include <sys/syslog.h>
48
49 #include <machine/stdarg.h>
50 #include <crypto/sha1.h>
51
52 #include <net/bpf.h>
53 #include <net/ethernet.h>
54 #include <net/if.h>
55 #include <net/if_dl.h>
56 #include <net/if_types.h>
57 #include <net/route.h>
58 #include <net/if_clone.h>
59
60 #ifdef INET
61 #include <netinet/in.h>
62 #include <netinet/in_var.h>
63 #include <netinet/in_systm.h>
64 #include <netinet/ip.h>
65 #include <netinet/ip_var.h>
66 #include <netinet/if_ether.h>
67 #endif
68
69 #ifdef INET6
70 #include <netinet/icmp6.h>
71 #include <netinet/ip6.h>
72 #include <netinet6/ip6_var.h>
73 #include <netinet6/scope6_var.h>
74 #include <netinet6/nd6.h>
75 #endif
76
77 #include <netinet/ip_carp.h>
78
79 #define CARP_IFNAME             "carp"
80 #define CARP_IS_RUNNING(ifp)    \
81         (((ifp)->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING))
82
83 struct carp_softc {
84         struct ifnet             sc_if;
85         struct ifnet            *sc_ifp;        /* compat shim */
86         struct ifnet            *sc_carpdev;    /* parent interface */
87         struct in_ifaddr        *sc_ia;         /* primary iface address */
88         struct ip_moptions       sc_imo;
89 #ifdef INET6
90         struct in6_ifaddr       *sc_ia6;        /* primary iface address v6 */
91         struct ip6_moptions      sc_im6o;
92 #endif /* INET6 */
93         TAILQ_ENTRY(carp_softc)  sc_list;
94
95         enum { INIT = 0, BACKUP, MASTER }
96                                  sc_state;
97
98         int                      sc_flags_backup;
99         int                      sc_suppress;
100
101         int                      sc_sendad_errors;
102 #define CARP_SENDAD_MAX_ERRORS  3
103         int                      sc_sendad_success;
104 #define CARP_SENDAD_MIN_SUCCESS 3
105
106         int                      sc_vhid;
107         int                      sc_advskew;
108         int                      sc_naddrs;
109         int                      sc_naddrs6;
110         int                      sc_advbase;    /* seconds */
111         int                      sc_init_counter;
112         uint64_t                 sc_counter;
113
114         /* authentication */
115 #define CARP_HMAC_PAD   64
116         unsigned char            sc_key[CARP_KEY_LEN];
117         unsigned char            sc_pad[CARP_HMAC_PAD];
118         SHA1_CTX                 sc_sha1;
119
120         struct callout           sc_ad_tmo;     /* advertisement timeout */
121         struct callout           sc_md_tmo;     /* master down timeout */
122         struct callout           sc_md6_tmo;    /* master down timeout */
123
124         LIST_ENTRY(carp_softc)   sc_next;       /* Interface clue */
125 };
126 #define SC2IFP(sc)      ((sc)->sc_ifp)
127
128 struct carp_if {
129         TAILQ_HEAD(, carp_softc) vhif_vrs;
130         int             vhif_nvrs;
131
132         struct ifnet    *vhif_ifp;
133 };
134
135 enum    { CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
136
137 SYSCTL_DECL(_net_inet_carp);
138
139 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */
140 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
141     &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
142 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
143     &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
144 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
145     &carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
146 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
147     &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
148
149 static int carp_suppress_preempt = 0;
150 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
151     &carp_suppress_preempt, 0, "Preemption is suppressed");
152
153 static struct carpstats carpstats;
154 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
155     &carpstats, carpstats,
156     "CARP statistics (struct carpstats, netinet/ip_carp.h)");
157
158 #define CARP_LOG(...)   do {                            \
159         if (carp_opts[CARPCTL_LOG] > 0)                 \
160                 log(LOG_INFO, __VA_ARGS__);             \
161 } while (0)
162
163 #define CARP_DEBUG(...) do {                            \
164         if (carp_opts[CARPCTL_LOG] > 1)                 \
165                 log(LOG_DEBUG, __VA_ARGS__);            \
166 } while (0)
167
168 static void     carp_hmac_prepare(struct carp_softc *);
169 static void     carp_hmac_generate(struct carp_softc *, uint32_t *,
170                     unsigned char *);
171 static int      carp_hmac_verify(struct carp_softc *, uint32_t *,
172                     unsigned char *);
173 static void     carp_setroute(struct carp_softc *, int);
174 static void     carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
175 static int      carp_clone_create(struct if_clone *, int);
176 static void     carp_clone_destroy(struct ifnet *);
177 static void     carpdetach(struct carp_softc *, int);
178 static int      carp_prepare_ad(struct mbuf *, struct carp_softc *,
179                     struct carp_header *);
180 static void     carp_send_ad_all(void);
181 static void     carp_send_ad(void *);
182 static void     carp_send_ad_locked(struct carp_softc *);
183 static void     carp_send_arp(struct carp_softc *);
184 static void     carp_master_down(void *);
185 static void     carp_master_down_locked(struct carp_softc *);
186 static int      carp_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
187 static int      carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *,
188                     struct rtentry *);
189 static void     carp_start(struct ifnet *);
190 static void     carp_setrun(struct carp_softc *, sa_family_t);
191 static void     carp_set_state(struct carp_softc *, int);
192 static int      carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
193
194 static void     carp_multicast_cleanup(struct carp_softc *);
195 static int      carp_set_addr(struct carp_softc *, struct sockaddr_in *);
196 static int      carp_del_addr(struct carp_softc *, struct sockaddr_in *);
197 static void     carp_carpdev_state_locked(struct carp_if *);
198 static void     carp_sc_state_locked(struct carp_softc *);
199 #ifdef INET6
200 static void     carp_send_na(struct carp_softc *);
201 static int      carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
202 static int      carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
203 static void     carp_multicast6_cleanup(struct carp_softc *);
204 #endif
205
206 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
207
208 static LIST_HEAD(, carp_softc) carpif_list;
209
210 static struct if_clone carp_cloner =
211 IF_CLONE_INITIALIZER(CARP_IFNAME, carp_clone_create, carp_clone_destroy,
212                      0, IF_MAXUNIT);
213
214 static eventhandler_tag carp_ifdetach_event;
215
216 static void
217 carp_hmac_prepare(struct carp_softc *sc)
218 {
219         uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
220         uint8_t vhid = sc->sc_vhid & 0xff;
221         struct ifaddr_container *ifac;
222         int i;
223 #ifdef INET6
224         struct in6_addr in6;
225 #endif
226
227         /* XXX: possible race here */
228
229         /* compute ipad from key */
230         bzero(sc->sc_pad, sizeof(sc->sc_pad));
231         bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
232         for (i = 0; i < sizeof(sc->sc_pad); i++)
233                 sc->sc_pad[i] ^= 0x36;
234
235         /* precompute first part of inner hash */
236         SHA1Init(&sc->sc_sha1);
237         SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
238         SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
239         SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
240         SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
241 #ifdef INET
242         TAILQ_FOREACH(ifac, &SC2IFP(sc)->if_addrheads[mycpuid], ifa_link) {
243                 struct ifaddr *ifa = ifac->ifa;
244
245                 if (ifa->ifa_addr->sa_family == AF_INET)
246                         SHA1Update(&sc->sc_sha1,
247                             (void *)&ifatoia(ifa)->ia_addr.sin_addr.s_addr,
248                             sizeof(struct in_addr));
249         }
250 #endif /* INET */
251 #ifdef INET6
252         TAILQ_FOREACH(ifac, &SC2IFP(sc)->if_addrheads[mycpuid], ifa_link) {
253                 struct ifaddr *ifa = ifac->ifa;
254
255                 if (ifa->ifa_addr->sa_family == AF_INET6) {
256                         in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
257                         in6_clearscope(&in6);
258                         SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6));
259                 }
260         }
261 #endif /* INET6 */
262
263         /* convert ipad to opad */
264         for (i = 0; i < sizeof(sc->sc_pad); i++)
265                 sc->sc_pad[i] ^= 0x36 ^ 0x5c;
266 }
267
268 static void
269 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
270     unsigned char md[20])
271 {
272         SHA1_CTX sha1ctx;
273
274         /* fetch first half of inner hash */
275         bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
276
277         SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
278         SHA1Final(md, &sha1ctx);
279
280         /* outer hash */
281         SHA1Init(&sha1ctx);
282         SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
283         SHA1Update(&sha1ctx, md, 20);
284         SHA1Final(md, &sha1ctx);
285 }
286
287 static int
288 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
289     unsigned char md[20])
290 {
291         unsigned char md2[20];
292
293         carp_hmac_generate(sc, counter, md2);
294         return (bcmp(md, md2, sizeof(md2)));
295 }
296
297 static void
298 carp_setroute(struct carp_softc *sc, int cmd)
299 {
300         struct ifaddr_container *ifac;
301
302         crit_enter();
303         TAILQ_FOREACH(ifac, &SC2IFP(sc)->if_addrheads[mycpuid], ifa_link) {
304                 struct ifaddr *ifa = ifac->ifa;
305
306                 if (ifa->ifa_addr->sa_family == AF_INET &&
307                     sc->sc_carpdev != NULL) {
308                         int count = carp_addrcount(
309                             (struct carp_if *)sc->sc_carpdev->if_carp,
310                             ifatoia(ifa), CARP_COUNT_MASTER);
311
312                         if ((cmd == RTM_ADD && count == 1) ||
313                             (cmd == RTM_DELETE && count == 0))
314                                 rtinit(ifa, cmd, RTF_UP | RTF_HOST);
315                 }
316 #ifdef INET6
317                 if (ifa->ifa_addr->sa_family == AF_INET6) {
318                         if (cmd == RTM_ADD)
319                                 in6_ifaddloop(ifa);
320                         else
321                                 in6_ifremloop(ifa);
322                 }
323 #endif /* INET6 */
324         }
325         crit_exit();
326 }
327
328 static int
329 carp_clone_create(struct if_clone *ifc, int unit)
330 {
331         struct carp_softc *sc;
332         struct ifnet *ifp;
333
334         sc = kmalloc(sizeof(*sc), M_CARP, M_WAITOK | M_ZERO);
335         ifp = sc->sc_ifp = &sc->sc_if;
336
337         sc->sc_flags_backup = 0;
338         sc->sc_suppress = 0;
339         sc->sc_advbase = CARP_DFLTINTV;
340         sc->sc_vhid = -1;       /* required setting */
341         sc->sc_advskew = 0;
342         sc->sc_init_counter = 1;
343         sc->sc_naddrs = sc->sc_naddrs6 = 0;
344
345 #ifdef INET6
346         sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
347 #endif
348
349         callout_init(&sc->sc_ad_tmo);
350         callout_init(&sc->sc_md_tmo);
351         callout_init(&sc->sc_md6_tmo);
352
353         ifp->if_softc = sc;
354         if_initname(ifp, CARP_IFNAME, unit);    
355         ifp->if_mtu = ETHERMTU;
356         ifp->if_flags = IFF_LOOPBACK;
357         ifp->if_ioctl = carp_ioctl;
358         ifp->if_output = carp_looutput;
359         ifp->if_start = carp_start;
360         ifp->if_type = IFT_CARP;
361         ifp->if_snd.ifq_maxlen = ifqmaxlen;
362         ifp->if_hdrlen = 0;
363         if_attach(ifp, NULL);
364         bpfattach(ifp, DLT_NULL, sizeof(u_int));
365
366         crit_enter();
367         LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
368         crit_exit();
369
370         return (0);
371 }
372
373 static void
374 carp_clone_destroy(struct ifnet *ifp)
375 {
376         struct carp_softc *sc = ifp->if_softc;
377
378         carpdetach(sc, 1);
379
380         crit_enter();
381         LIST_REMOVE(sc, sc_next);
382         crit_exit();
383         bpfdetach(ifp);
384         if_detach(ifp);
385         kfree(sc, M_CARP);
386 }
387
388 /*
389  * This function can be called on CARP interface destroy path,
390  * and in case of the removal of the underlying interface as
391  * well. We differentiate these two cases. In the latter case
392  * we do not cleanup our multicast memberships, since they
393  * are already freed.
394  */
395 static void
396 carpdetach(struct carp_softc *sc, int unlock)
397 {
398         struct carp_if *cif;
399
400         callout_stop(&sc->sc_ad_tmo);
401         callout_stop(&sc->sc_md_tmo);
402         callout_stop(&sc->sc_md6_tmo);
403
404         if (sc->sc_suppress)
405                 carp_suppress_preempt--;
406         sc->sc_suppress = 0;
407
408         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
409                 carp_suppress_preempt--;
410         sc->sc_sendad_errors = 0;
411
412         carp_set_state(sc, INIT);
413         SC2IFP(sc)->if_flags &= ~IFF_UP;
414         carp_setrun(sc, 0);
415         if (unlock)
416                 carp_multicast_cleanup(sc);
417 #ifdef INET6
418         carp_multicast6_cleanup(sc);
419 #endif
420
421         if (sc->sc_carpdev != NULL) {
422                 cif = (struct carp_if *)sc->sc_carpdev->if_carp;
423                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
424                 if (!--cif->vhif_nvrs) {
425                         ifpromisc(sc->sc_carpdev, 0);
426                         sc->sc_carpdev->if_carp = NULL;
427                         FREE(cif, M_IFADDR);
428                 }
429                 sc->sc_carpdev = NULL;
430         }
431 }
432
433 /* Detach an interface from the carp. */
434 static void
435 carp_ifdetach(void *arg __unused, struct ifnet *ifp)
436 {
437         struct carp_if *cif = (struct carp_if *)ifp->if_carp;
438         struct carp_softc *sc, *nextsc;
439
440         if (cif == NULL)
441                 return;
442
443         for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
444                 nextsc = TAILQ_NEXT(sc, sc_list);
445                 carpdetach(sc, 0);
446         }
447 }
448
449 /*
450  * process input packet.
451  * we have rearranged checks order compared to the rfc,
452  * but it seems more efficient this way or not possible otherwise.
453  */
454 void
455 carp_input(struct mbuf *m, ...)
456 {
457         struct ip *ip = mtod(m, struct ip *);
458         struct carp_header *ch;
459         int len, iphlen;
460         __va_list ap;
461
462         __va_start(ap, m);
463         iphlen = __va_arg(ap, int);
464         __va_end(ap);
465
466         carpstats.carps_ipackets++;
467
468         if (!carp_opts[CARPCTL_ALLOW]) {
469                 m_freem(m);
470                 return;
471         }
472
473         /* Check if received on a valid carp interface */
474         if (m->m_pkthdr.rcvif->if_carp == NULL) {
475                 carpstats.carps_badif++;
476                 CARP_LOG("carp_input: packet received on non-carp "
477                     "interface: %s\n",
478                     m->m_pkthdr.rcvif->if_xname);
479                 m_freem(m);
480                 return;
481         }
482
483         /* Verify that the IP TTL is CARP_DFLTTL. */
484         if (ip->ip_ttl != CARP_DFLTTL) {
485                 carpstats.carps_badttl++;
486                 CARP_LOG("carp_input: received ttl %d != %d on %s\n",
487                     ip->ip_ttl, CARP_DFLTTL,
488                     m->m_pkthdr.rcvif->if_xname);
489                 m_freem(m);
490                 return;
491         }
492
493         /* Minimal CARP packet size */
494         len = iphlen + sizeof(*ch);
495
496         /*
497          * Verify that the received packet length is
498          * not less than the CARP header
499          */
500         if (m->m_pkthdr.len < len) {
501                 carpstats.carps_badlen++;
502                 CARP_LOG("packet too short %d on %s\n", m->m_pkthdr.len,
503                          m->m_pkthdr.rcvif->if_xname);
504                 m_freem(m);
505                 return;
506         }
507
508         /* Make sure that CARP header is contiguous */
509         if (len > m->m_len) {
510                 m = m_pullup(m, len);
511                 if (m == NULL) {
512                         carpstats.carps_hdrops++;
513                         CARP_LOG("carp_input: m_pullup failed\n");
514                         return;
515                 }
516                 ip = mtod(m, struct ip *);
517         }
518         ch = (struct carp_header *)((uint8_t *)ip + iphlen);
519
520         /* Verify the CARP checksum */
521         if (in_cksum_skip(m, len, iphlen)) {
522                 carpstats.carps_badsum++;
523                 CARP_LOG("carp_input: checksum failed on %s\n",
524                     m->m_pkthdr.rcvif->if_xname);
525                 m_freem(m);
526                 return;
527         }
528         carp_input_c(m, ch, AF_INET);
529 }
530
531 #ifdef INET6
532 int
533 carp6_input(struct mbuf **mp, int *offp, int proto)
534 {
535         struct mbuf *m = *mp;
536         struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
537         struct carp_header *ch;
538         u_int len;
539
540         carpstats.carps_ipackets6++;
541
542         if (!carp_opts[CARPCTL_ALLOW]) {
543                 m_freem(m);
544                 return (IPPROTO_DONE);
545         }
546
547         /* check if received on a valid carp interface */
548         if (m->m_pkthdr.rcvif->if_carp == NULL) {
549                 carpstats.carps_badif++;
550                 CARP_LOG("carp6_input: packet received on non-carp "
551                     "interface: %s\n",
552                     m->m_pkthdr.rcvif->if_xname);
553                 m_freem(m);
554                 return (IPPROTO_DONE);
555         }
556
557         /* verify that the IP TTL is 255 */
558         if (ip6->ip6_hlim != CARP_DFLTTL) {
559                 carpstats.carps_badttl++;
560                 CARP_LOG("carp6_input: received ttl %d != 255 on %s\n",
561                     ip6->ip6_hlim,
562                     m->m_pkthdr.rcvif->if_xname);
563                 m_freem(m);
564                 return (IPPROTO_DONE);
565         }
566
567         /* verify that we have a complete carp packet */
568         len = m->m_len;
569         IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
570         if (ch == NULL) {
571                 carpstats.carps_badlen++;
572                 CARP_LOG("carp6_input: packet size %u too small\n", len);
573                 return (IPPROTO_DONE);
574         }
575
576         /* verify the CARP checksum */
577         if (in_cksum_range(m, 0, *offp, sizeof(*ch))) {
578                 carpstats.carps_badsum++;
579                 CARP_LOG("carp6_input: checksum failed, on %s\n",
580                     m->m_pkthdr.rcvif->if_xname);
581                 m_freem(m);
582                 return (IPPROTO_DONE);
583         }
584
585         carp_input_c(m, ch, AF_INET6);
586         return (IPPROTO_DONE);
587 }
588 #endif /* INET6 */
589
590 static void
591 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
592 {
593         struct ifnet *ifp = m->m_pkthdr.rcvif;
594         struct carp_softc *sc;
595         uint64_t tmp_counter;
596         struct timeval sc_tv, ch_tv;
597
598         /* verify that the VHID is valid on the receiving interface */
599         TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list)
600                 if (sc->sc_vhid == ch->carp_vhid)
601                         break;
602
603         if (!sc || !CARP_IS_RUNNING(SC2IFP(sc))) {
604                 carpstats.carps_badvhid++;
605                 m_freem(m);
606                 return;
607         }
608
609         getmicrotime(&SC2IFP(sc)->if_lastchange);
610         SC2IFP(sc)->if_ipackets++;
611         SC2IFP(sc)->if_ibytes += m->m_pkthdr.len;
612
613         if (SC2IFP(sc)->if_bpf) {
614                 struct ip *ip = mtod(m, struct ip *);
615
616                 /* BPF wants net byte order */
617                 ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2));
618                 ip->ip_off = htons(ip->ip_off);
619                 bpf_mtap(SC2IFP(sc)->if_bpf, m);
620         }
621
622         /* verify the CARP version. */
623         if (ch->carp_version != CARP_VERSION) {
624                 carpstats.carps_badver++;
625                 SC2IFP(sc)->if_ierrors++;
626                 CARP_LOG("%s; invalid version %d\n",
627                     SC2IFP(sc)->if_xname,
628                     ch->carp_version);
629                 m_freem(m);
630                 return;
631         }
632
633         /* verify the hash */
634         if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
635                 carpstats.carps_badauth++;
636                 SC2IFP(sc)->if_ierrors++;
637                 CARP_LOG("%s: incorrect hash\n", SC2IFP(sc)->if_xname);
638                 m_freem(m);
639                 return;
640         }
641
642         tmp_counter = ntohl(ch->carp_counter[0]);
643         tmp_counter = tmp_counter<<32;
644         tmp_counter += ntohl(ch->carp_counter[1]);
645
646         /* XXX Replay protection goes here */
647
648         sc->sc_init_counter = 0;
649         sc->sc_counter = tmp_counter;
650
651         sc_tv.tv_sec = sc->sc_advbase;
652         if (carp_suppress_preempt && sc->sc_advskew <  240)
653                 sc_tv.tv_usec = 240 * 1000000 / 256;
654         else
655                 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
656         ch_tv.tv_sec = ch->carp_advbase;
657         ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
658
659         switch (sc->sc_state) {
660         case INIT:
661                 break;
662
663         case MASTER:
664                 /*
665                  * If we receive an advertisement from a master who's going to
666                  * be more frequent than us, go into BACKUP state.
667                  */
668                 if (timevalcmp(&sc_tv, &ch_tv, >) ||
669                     timevalcmp(&sc_tv, &ch_tv, ==)) {
670                         callout_stop(&sc->sc_ad_tmo);
671                         CARP_DEBUG("%s: MASTER -> BACKUP "
672                            "(more frequent advertisement received)\n",
673                            SC2IFP(sc)->if_xname);
674                         carp_set_state(sc, BACKUP);
675                         carp_setrun(sc, 0);
676                         carp_setroute(sc, RTM_DELETE);
677                 }
678                 break;
679
680         case BACKUP:
681                 /*
682                  * If we're pre-empting masters who advertise slower than us,
683                  * and this one claims to be slower, treat him as down.
684                  */
685                 if (carp_opts[CARPCTL_PREEMPT] &&
686                     timevalcmp(&sc_tv, &ch_tv, <)) {
687                         CARP_DEBUG("%s: BACKUP -> MASTER "
688                             "(preempting a slower master)\n",
689                             SC2IFP(sc)->if_xname);
690                         carp_master_down_locked(sc);
691                         break;
692                 }
693
694                 /*
695                  *  If the master is going to advertise at such a low frequency
696                  *  that he's guaranteed to time out, we'd might as well just
697                  *  treat him as timed out now.
698                  */
699                 sc_tv.tv_sec = sc->sc_advbase * 3;
700                 if (timevalcmp(&sc_tv, &ch_tv, <)) {
701                         CARP_DEBUG("%s: BACKUP -> MASTER "
702                             "(master timed out)\n",
703                             SC2IFP(sc)->if_xname);
704                         carp_master_down_locked(sc);
705                         break;
706                 }
707
708                 /*
709                  * Otherwise, we reset the counter and wait for the next
710                  * advertisement.
711                  */
712                 carp_setrun(sc, af);
713                 break;
714         }
715         m_freem(m);
716 }
717
718 static int
719 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
720 {
721         struct m_tag *mtag;
722         struct ifnet *ifp = SC2IFP(sc);
723
724         if (sc->sc_init_counter) {
725                 /* this could also be seconds since unix epoch */
726                 sc->sc_counter = karc4random();
727                 sc->sc_counter = sc->sc_counter << 32;
728                 sc->sc_counter += karc4random();
729         } else {
730                 sc->sc_counter++;
731         }
732
733         ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
734         ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
735
736         carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
737
738         /* Tag packet for carp_output */
739         mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), MB_DONTWAIT);
740         if (mtag == NULL) {
741                 m_freem(m);
742                 SC2IFP(sc)->if_oerrors++;
743                 return (ENOMEM);
744         }
745         bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *));
746         m_tag_prepend(m, mtag);
747
748         return (0);
749 }
750
751 static void
752 carp_send_ad_all(void)
753 {
754         struct carp_softc *sc;
755
756         LIST_FOREACH(sc, &carpif_list, sc_next) {
757                 if (sc->sc_carpdev == NULL)
758                         continue;
759
760                 if (CARP_IS_RUNNING(SC2IFP(sc)) && sc->sc_state == MASTER)
761                         carp_send_ad_locked(sc);
762         }
763 }
764
765 static void
766 carp_send_ad(void *v)
767 {
768         struct carp_softc *sc = v;
769
770         carp_send_ad_locked(sc);
771 }
772
773 static void
774 carp_send_ad_locked(struct carp_softc *sc)
775 {
776         struct carp_header ch;
777         struct timeval tv;
778         struct carp_header *ch_ptr;
779         struct mbuf *m;
780         int len, advbase, advskew;
781
782         /* bow out if we've lost our UPness or RUNNINGuiness */
783         if (!CARP_IS_RUNNING(SC2IFP(sc))) {
784                 advbase = 255;
785                 advskew = 255;
786         } else {
787                 advbase = sc->sc_advbase;
788                 if (!carp_suppress_preempt || sc->sc_advskew > 240)
789                         advskew = sc->sc_advskew;
790                 else
791                         advskew = 240;
792                 tv.tv_sec = advbase;
793                 tv.tv_usec = advskew * 1000000 / 256;
794         }
795
796         ch.carp_version = CARP_VERSION;
797         ch.carp_type = CARP_ADVERTISEMENT;
798         ch.carp_vhid = sc->sc_vhid;
799         ch.carp_advbase = advbase;
800         ch.carp_advskew = advskew;
801         ch.carp_authlen = 7;    /* XXX DEFINE */
802         ch.carp_pad1 = 0;       /* must be zero */
803         ch.carp_cksum = 0;
804
805 #ifdef INET
806         if (sc->sc_ia) {
807                 struct ip *ip;
808
809                 MGETHDR(m, M_NOWAIT, MT_HEADER);
810                 if (m == NULL) {
811                         SC2IFP(sc)->if_oerrors++;
812                         carpstats.carps_onomem++;
813                         /* XXX maybe less ? */
814                         if (advbase != 255 || advskew != 255)
815                                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
816                                     carp_send_ad, sc);
817                         return;
818                 }
819                 len = sizeof(*ip) + sizeof(ch);
820                 m->m_pkthdr.len = len;
821                 m->m_pkthdr.rcvif = NULL;
822                 m->m_len = len;
823                 MH_ALIGN(m, m->m_len);
824                 m->m_flags |= M_MCAST;
825                 ip = mtod(m, struct ip *);
826                 ip->ip_v = IPVERSION;
827                 ip->ip_hl = sizeof(*ip) >> 2;
828                 ip->ip_tos = IPTOS_LOWDELAY;
829                 ip->ip_len = len;
830                 ip->ip_id = ip_newid();
831                 ip->ip_off = IP_DF;
832                 ip->ip_ttl = CARP_DFLTTL;
833                 ip->ip_p = IPPROTO_CARP;
834                 ip->ip_sum = 0;
835                 ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr;
836                 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
837
838                 ch_ptr = (struct carp_header *)(&ip[1]);
839                 bcopy(&ch, ch_ptr, sizeof(ch));
840                 if (carp_prepare_ad(m, sc, ch_ptr))
841                         return;
842                 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip));
843
844                 getmicrotime(&SC2IFP(sc)->if_lastchange);
845                 SC2IFP(sc)->if_opackets++;
846                 SC2IFP(sc)->if_obytes += len;
847                 carpstats.carps_opackets++;
848
849                 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) {
850                         SC2IFP(sc)->if_oerrors++;
851                         if (sc->sc_sendad_errors < INT_MAX)
852                                 sc->sc_sendad_errors++;
853                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
854                                 carp_suppress_preempt++;
855                                 if (carp_suppress_preempt == 1) {
856                                         carp_send_ad_all();
857                                 }
858                         }
859                         sc->sc_sendad_success = 0;
860                 } else {
861                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
862                                 if (++sc->sc_sendad_success >=
863                                     CARP_SENDAD_MIN_SUCCESS) {
864                                         carp_suppress_preempt--;
865                                         sc->sc_sendad_errors = 0;
866                                 }
867                         } else {
868                                 sc->sc_sendad_errors = 0;
869                         }
870                 }
871         }
872 #endif /* INET */
873 #ifdef INET6
874         if (sc->sc_ia6) {
875                 struct ip6_hdr *ip6;
876
877                 MGETHDR(m, M_NOWAIT, MT_HEADER);
878                 if (m == NULL) {
879                         SC2IFP(sc)->if_oerrors++;
880                         carpstats.carps_onomem++;
881                         /* XXX maybe less ? */
882                         if (advbase != 255 || advskew != 255)
883                                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
884                                     carp_send_ad, sc);
885                         return;
886                 }
887                 len = sizeof(*ip6) + sizeof(ch);
888                 m->m_pkthdr.len = len;
889                 m->m_pkthdr.rcvif = NULL;
890                 m->m_len = len;
891                 MH_ALIGN(m, m->m_len);
892                 m->m_flags |= M_MCAST;
893                 ip6 = mtod(m, struct ip6_hdr *);
894                 bzero(ip6, sizeof(*ip6));
895                 ip6->ip6_vfc |= IPV6_VERSION;
896                 ip6->ip6_hlim = CARP_DFLTTL;
897                 ip6->ip6_nxt = IPPROTO_CARP;
898                 bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src,
899                     sizeof(struct in6_addr));
900                 /* set the multicast destination */
901
902                 ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
903                 ip6->ip6_dst.s6_addr8[15] = 0x12;
904                 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
905                         SC2IFP(sc)->if_oerrors++;
906                         m_freem(m);
907                         CARP_LOG("%s: in6_setscope failed\n", __func__);
908                         return;
909                 }
910
911                 ch_ptr = (struct carp_header *)(&ip6[1]);
912                 bcopy(&ch, ch_ptr, sizeof(ch));
913                 if (carp_prepare_ad(m, sc, ch_ptr))
914                         return;
915                 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip6));
916
917                 getmicrotime(&SC2IFP(sc)->if_lastchange);
918                 SC2IFP(sc)->if_opackets++;
919                 SC2IFP(sc)->if_obytes += len;
920                 carpstats.carps_opackets6++;
921
922                 if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) {
923                         SC2IFP(sc)->if_oerrors++;
924                         if (sc->sc_sendad_errors < INT_MAX)
925                                 sc->sc_sendad_errors++;
926                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
927                                 carp_suppress_preempt++;
928                                 if (carp_suppress_preempt == 1) {
929                                         carp_send_ad_all();
930                                 }
931                         }
932                         sc->sc_sendad_success = 0;
933                 } else {
934                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
935                                 if (++sc->sc_sendad_success >=
936                                     CARP_SENDAD_MIN_SUCCESS) {
937                                         carp_suppress_preempt--;
938                                         sc->sc_sendad_errors = 0;
939                                 }
940                         } else {
941                                 sc->sc_sendad_errors = 0;
942                         }
943                 }
944         }
945 #endif /* INET6 */
946
947         if (advbase != 255 || advskew != 255)
948                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
949                     carp_send_ad, sc);
950 }
951
952 /*
953  * Broadcast a gratuitous ARP request containing
954  * the virtual router MAC address for each IP address
955  * associated with the virtual router.
956  */
957 static void
958 carp_send_arp(struct carp_softc *sc)
959 {
960         struct ifaddr_container *ifac;
961
962         TAILQ_FOREACH(ifac, &SC2IFP(sc)->if_addrheads[mycpuid], ifa_link) {
963                 struct ifaddr *ifa = ifac->ifa;
964
965                 if (ifa->ifa_addr->sa_family != AF_INET)
966                         continue;
967                 arp_ifinit2(sc->sc_carpdev, ifa, IF_LLADDR(sc->sc_ifp));        
968
969                 DELAY(1000);    /* XXX */
970         }
971 }
972
973 #ifdef INET6
974 static void
975 carp_send_na(struct carp_softc *sc)
976 {
977         struct ifaddr_container *ifac;
978         struct in6_addr *in6;
979         static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
980
981         TAILQ_FOREACH(ifac, &SC2IFP(sc)->if_addrheads[mycpuid], ifa_link) {
982                 struct ifaddr *ifa = ifac->ifa;
983
984                 if (ifa->ifa_addr->sa_family != AF_INET6)
985                         continue;
986
987                 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
988                 nd6_na_output(sc->sc_carpdev, &mcast, in6,
989                     ND_NA_FLAG_OVERRIDE, 1, NULL);
990                 DELAY(1000);    /* XXX */
991         }
992 }
993 #endif /* INET6 */
994
995 static int
996 carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type)
997 {
998         struct carp_softc *vh;
999         int count = 0;
1000
1001         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1002                 if ((type == CARP_COUNT_RUNNING &&
1003                      CARP_IS_RUNNING(SC2IFP(vh))) ||
1004                     (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) {
1005                         struct ifaddr_container *ifac;
1006
1007                         TAILQ_FOREACH(ifac, &SC2IFP(vh)->if_addrheads[mycpuid],
1008                                       ifa_link) {
1009                                 struct ifaddr *ifa = ifac->ifa;
1010
1011                                 if (ifa->ifa_addr->sa_family == AF_INET &&
1012                                     ia->ia_addr.sin_addr.s_addr ==
1013                                     ifatoia(ifa)->ia_addr.sin_addr.s_addr)
1014                                         count++;
1015                         }
1016                 }
1017         }
1018         return (count);
1019 }
1020
1021 int
1022 carp_iamatch(void *v, struct in_ifaddr *ia,
1023     struct in_addr *isaddr, uint8_t **enaddr)
1024 {
1025         struct carp_if *cif = v;
1026         struct carp_softc *vh;
1027         int index, count = 0;
1028
1029         if (carp_opts[CARPCTL_ARPBALANCE]) {
1030                 /*
1031                  * XXX proof of concept implementation.
1032                  * We use the source ip to decide which virtual host should
1033                  * handle the request. If we're master of that virtual host,
1034                  * then we respond, otherwise, just drop the arp packet on
1035                  * the floor.
1036                  */
1037                 count = carp_addrcount(cif, ia, CARP_COUNT_RUNNING);
1038                 if (count == 0) {
1039                         /* should never reach this */
1040                         return (0);
1041                 }
1042
1043                 /* this should be a hash, like pf_hash() */
1044                 index = ntohl(isaddr->s_addr) % count;
1045                 count = 0;
1046
1047                 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1048                         if (CARP_IS_RUNNING(SC2IFP(vh))) {
1049                                 struct ifaddr_container *ifac;
1050
1051                                 TAILQ_FOREACH(ifac,
1052                                 &SC2IFP(vh)->if_addrheads[mycpuid], ifa_link) {
1053                                         struct ifaddr *ifa = ifac->ifa;
1054
1055                                         if (ifa->ifa_addr->sa_family ==
1056                                             AF_INET &&
1057                                             ia->ia_addr.sin_addr.s_addr ==
1058                                             ifatoia(ifa)->ia_addr.sin_addr.s_addr) {
1059                                                 if (count == index) {
1060                                                         if (vh->sc_state == MASTER) {
1061                                                                 *enaddr = IF_LLADDR(vh->sc_ifp);
1062                                                                 return (1);
1063                                                         } else {
1064                                                                 return (0);
1065                                                         }
1066                                                 }
1067                                                 count++;
1068                                         }
1069                                 }
1070                         }
1071                 }
1072         } else {
1073                 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1074                         if (CARP_IS_RUNNING(SC2IFP(vh)) &&
1075                             vh->sc_state == MASTER) {
1076                                 *enaddr = IF_LLADDR(vh->sc_ifp);
1077                                 return (1);
1078                         }
1079                 }
1080         }
1081         return(0);
1082 }
1083
1084 #ifdef INET6
1085 struct ifaddr *
1086 carp_iamatch6(void *v, struct in6_addr *taddr)
1087 {
1088         struct carp_if *cif = v;
1089         struct carp_softc *vh;
1090
1091         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1092                 struct ifaddr_container *ifac;
1093
1094                 TAILQ_FOREACH(ifac, &SC2IFP(vh)->if_addrheads[mycpuid],
1095                               ifa_link) {
1096                         struct ifaddr *ifa = ifac->ifa;
1097
1098                         if (IN6_ARE_ADDR_EQUAL(taddr,
1099                             &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1100                             CARP_IS_RUNNING(SC2IFP(vh)) &&
1101                             vh->sc_state == MASTER) {
1102                                 return (ifa);
1103                         }
1104                 }
1105         }
1106         return (NULL);
1107 }
1108
1109 void *
1110 carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr)
1111 {
1112         struct m_tag *mtag;
1113         struct carp_if *cif = v;
1114         struct carp_softc *sc;
1115
1116         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
1117                 struct ifaddr_container *ifac;
1118
1119                 TAILQ_FOREACH(ifac, &SC2IFP(sc)->if_addrheads[mycpuid],
1120                               ifa_link) {
1121                         struct ifaddr *ifa = ifac->ifa;
1122
1123                         if (IN6_ARE_ADDR_EQUAL(taddr,
1124                             &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1125                             CARP_IS_RUNNING(SC2IFP(sc))) {
1126                                 struct ifnet *ifp = SC2IFP(sc);
1127                                 mtag = m_tag_get(PACKET_TAG_CARP,
1128                                     sizeof(struct ifnet *), MB_DONTWAIT);
1129                                 if (mtag == NULL) {
1130                                         /* better a bit than nothing */
1131                                         return (IF_LLADDR(sc->sc_ifp));
1132                                 }
1133                                 bcopy(&ifp, (caddr_t)(mtag + 1),
1134                                     sizeof(struct ifnet *));
1135                                 m_tag_prepend(m, mtag);
1136
1137                                 return (IF_LLADDR(sc->sc_ifp));
1138                         }
1139                 }
1140         }
1141         return (NULL);
1142 }
1143 #endif
1144
1145 struct ifnet *
1146 carp_forus(void *v, void *dhost)
1147 {
1148         struct carp_if *cif = v;
1149         struct carp_softc *vh;
1150         uint8_t *ena = dhost;
1151         
1152         /**
1153          * XXX: See here for check on MAC adr is not for virtual use
1154          *
1155          **/
1156
1157         if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
1158                 return (NULL);
1159
1160         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1161                 if (CARP_IS_RUNNING(SC2IFP(vh)) && vh->sc_state == MASTER &&
1162                     !bcmp(dhost, IF_LLADDR(vh->sc_ifp), ETHER_ADDR_LEN)) {
1163                         return (SC2IFP(vh));
1164                 }
1165         }
1166         return (NULL);
1167 }
1168
1169 static void
1170 carp_master_down(void *v)
1171 {
1172         struct carp_softc *sc = v;
1173
1174         lwkt_serialize_enter(sc->sc_ifp->if_serializer);
1175         carp_master_down_locked(sc);
1176         lwkt_serialize_exit(sc->sc_ifp->if_serializer);
1177 }
1178
1179 static void
1180 carp_master_down_locked(struct carp_softc *sc)
1181 {
1182         switch (sc->sc_state) {
1183         case INIT:
1184                 kprintf("%s: master_down event in INIT state\n",
1185                     SC2IFP(sc)->if_xname);
1186                 break;
1187
1188         case MASTER:
1189                 break;
1190
1191         case BACKUP:
1192                 carp_set_state(sc, MASTER);
1193                 carp_send_ad_locked(sc);
1194                 carp_send_arp(sc);
1195 #ifdef INET6
1196                 carp_send_na(sc);
1197 #endif /* INET6 */
1198                 carp_setrun(sc, 0);
1199                 carp_setroute(sc, RTM_ADD);
1200                 break;
1201         }
1202 }
1203
1204 /*
1205  * When in backup state, af indicates whether to reset the master down timer
1206  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1207  */
1208 static void
1209 carp_setrun(struct carp_softc *sc, sa_family_t af)
1210 {
1211         struct timeval tv;
1212
1213         if (sc->sc_carpdev == NULL) {
1214                 SC2IFP(sc)->if_flags &= ~IFF_RUNNING;
1215                 carp_set_state(sc, INIT);
1216                 return;
1217         }
1218
1219         if (SC2IFP(sc)->if_flags & IFF_UP &&
1220             sc->sc_vhid > 0 && (sc->sc_naddrs || sc->sc_naddrs6)) {
1221                 SC2IFP(sc)->if_flags |= IFF_RUNNING;
1222         } else {
1223                 SC2IFP(sc)->if_flags &= ~IFF_RUNNING;
1224                 carp_setroute(sc, RTM_DELETE);
1225                 return;
1226         }
1227
1228         switch (sc->sc_state) {
1229         case INIT:
1230                 if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) {
1231                         carp_send_ad_locked(sc);
1232                         carp_send_arp(sc);
1233 #ifdef INET6
1234                         carp_send_na(sc);
1235 #endif /* INET6 */
1236                         CARP_DEBUG("%s: INIT -> MASTER (preempting)\n",
1237                                    SC2IFP(sc)->if_xname);
1238                         carp_set_state(sc, MASTER);
1239                         carp_setroute(sc, RTM_ADD);
1240                 } else {
1241                         CARP_DEBUG("%s: INIT -> BACKUP\n",
1242                                    SC2IFP(sc)->if_xname);
1243                         carp_set_state(sc, BACKUP);
1244                         carp_setroute(sc, RTM_DELETE);
1245                         carp_setrun(sc, 0);
1246                 }
1247                 break;
1248
1249         case BACKUP:
1250                 callout_stop(&sc->sc_ad_tmo);
1251                 tv.tv_sec = 3 * sc->sc_advbase;
1252                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1253                 switch (af) {
1254 #ifdef INET
1255                 case AF_INET:
1256                         callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1257                             carp_master_down, sc);
1258                         break;
1259 #endif /* INET */
1260 #ifdef INET6
1261                 case AF_INET6:
1262                         callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1263                             carp_master_down, sc);
1264                         break;
1265 #endif /* INET6 */
1266                 default:
1267                         if (sc->sc_naddrs)
1268                                 callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1269                                     carp_master_down, sc);
1270                         if (sc->sc_naddrs6)
1271                                 callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1272                                     carp_master_down, sc);
1273                         break;
1274                 }
1275                 break;
1276
1277         case MASTER:
1278                 tv.tv_sec = sc->sc_advbase;
1279                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1280                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1281                     carp_send_ad, sc);
1282                 break;
1283         }
1284 }
1285
1286 static void
1287 carp_multicast_cleanup(struct carp_softc *sc)
1288 {
1289         struct ip_moptions *imo = &sc->sc_imo;
1290         uint16_t n = imo->imo_num_memberships;
1291
1292         /* Clean up our own multicast memberships */
1293         while (n-- > 0) {
1294                 if (imo->imo_membership[n] != NULL) {
1295                         in_delmulti(imo->imo_membership[n]);
1296                         imo->imo_membership[n] = NULL;
1297                 }
1298         }
1299         imo->imo_num_memberships = 0;
1300         imo->imo_multicast_ifp = NULL;
1301 }
1302
1303 #ifdef INET6
1304 static void
1305 carp_multicast6_cleanup(struct carp_softc *sc)
1306 {
1307         struct ip6_moptions *im6o = &sc->sc_im6o;
1308
1309         while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1310                 struct in6_multi_mship *imm =
1311                     LIST_FIRST(&im6o->im6o_memberships);
1312
1313                 LIST_REMOVE(imm, i6mm_chain);
1314                 in6_leavegroup(imm);
1315         }
1316         im6o->im6o_multicast_ifp = NULL;
1317 }
1318 #endif
1319
1320 static int
1321 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
1322 {
1323         struct ifnet *ifp;
1324         struct carp_if *cif;
1325         struct in_ifaddr *ia, *ia_if;
1326         struct in_ifaddr_container *iac;
1327         struct ip_moptions *imo = &sc->sc_imo;
1328         struct in_addr addr;
1329         u_long iaddr = htonl(sin->sin_addr.s_addr);
1330         int own, error;
1331         
1332         if (sin->sin_addr.s_addr == 0) {
1333                 if (!(SC2IFP(sc)->if_flags & IFF_UP))
1334                         carp_set_state(sc, INIT);
1335                 if (sc->sc_naddrs)
1336                         SC2IFP(sc)->if_flags |= IFF_UP;
1337                 carp_setrun(sc, 0);
1338                 return (0);
1339         }
1340         /* we have to do it by hands to check we won't match on us */
1341         ia_if = NULL; own = 0;
1342         TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
1343                 ia = iac->ia;
1344
1345                 /* and, yeah, we need a multicast-capable iface too */
1346                 if (ia->ia_ifp != SC2IFP(sc) &&
1347                     (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
1348                     (iaddr & ia->ia_subnetmask) == ia->ia_subnet) {
1349                         if (!ia_if)
1350                                 ia_if = ia;
1351                         if (sin->sin_addr.s_addr ==
1352                             ia->ia_addr.sin_addr.s_addr)
1353                                 own++;
1354                 }
1355         }
1356
1357         if (!ia_if)
1358                 return (EADDRNOTAVAIL);
1359
1360         ia = ia_if;
1361         ifp = ia->ia_ifp;
1362
1363         if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
1364             (imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp))
1365                 return (EADDRNOTAVAIL);
1366
1367         if (imo->imo_num_memberships == 0) {
1368                 addr.s_addr = htonl(INADDR_CARP_GROUP);
1369                 if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) == NULL)
1370                         return (ENOBUFS);
1371                 imo->imo_num_memberships++;
1372                 imo->imo_multicast_ifp = ifp;
1373                 imo->imo_multicast_ttl = CARP_DFLTTL;
1374                 imo->imo_multicast_loop = 0;
1375         }
1376
1377         if (!ifp->if_carp) {
1378                 MALLOC(cif, struct carp_if *, sizeof(*cif), M_CARP,
1379                     M_WAITOK|M_ZERO);
1380                 if ((error = ifpromisc(ifp, 1))) {
1381                         FREE(cif, M_CARP);
1382                         goto cleanup;
1383                 }
1384                 
1385                 cif->vhif_ifp = ifp;
1386                 TAILQ_INIT(&cif->vhif_vrs);
1387                 ifp->if_carp = cif;
1388         } else {
1389                 struct carp_softc *vr;
1390
1391                 cif = (struct carp_if *)ifp->if_carp;
1392                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1393                         if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
1394                                 error = EINVAL;
1395                                 goto cleanup;
1396                         }
1397                 }
1398         }
1399         sc->sc_ia = ia;
1400         sc->sc_carpdev = ifp;
1401
1402         { /* XXX prevent endless loop if already in queue */
1403         struct carp_softc *vr, *after = NULL;
1404         int myself = 0;
1405         cif = (struct carp_if *)ifp->if_carp;
1406
1407         TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1408                 if (vr == sc)
1409                         myself = 1;
1410                 if (vr->sc_vhid < sc->sc_vhid)
1411                         after = vr;
1412         }
1413
1414         if (!myself) {
1415                 /* We're trying to keep things in order */
1416                 if (after == NULL)
1417                         TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
1418                 else
1419                         TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
1420                 cif->vhif_nvrs++;
1421         }
1422         }
1423
1424         sc->sc_naddrs++;
1425         SC2IFP(sc)->if_flags |= IFF_UP;
1426         if (own)
1427                 sc->sc_advskew = 0;
1428
1429         carp_sc_state_locked(sc);
1430         carp_setrun(sc, 0);
1431
1432         return (0);
1433
1434 cleanup:
1435         in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1436         return (error);
1437 }
1438
1439 static int
1440 carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin)
1441 {
1442         int error = 0;
1443
1444         if (!--sc->sc_naddrs) {
1445                 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
1446                 struct ip_moptions *imo = &sc->sc_imo;
1447
1448                 callout_stop(&sc->sc_ad_tmo);
1449                 SC2IFP(sc)->if_flags &= ~(IFF_UP | IFF_RUNNING);
1450                 sc->sc_vhid = -1;
1451                 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1452                 imo->imo_multicast_ifp = NULL;
1453                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
1454                 if (!--cif->vhif_nvrs) {
1455                         sc->sc_carpdev->if_carp = NULL;
1456                         FREE(cif, M_IFADDR);
1457                 }
1458         }
1459         return (error);
1460 }
1461
1462 #ifdef INET6
1463 static int
1464 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
1465 {
1466         struct ifnet *ifp;
1467         struct carp_if *cif;
1468         struct in6_ifaddr *ia, *ia_if;
1469         struct ip6_moptions *im6o = &sc->sc_im6o;
1470         struct in6_multi_mship *imm;
1471         struct in6_addr in6;
1472         int own, error;
1473
1474         if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
1475                 if (!(SC2IFP(sc)->if_flags & IFF_UP))
1476                         carp_set_state(sc, INIT);
1477                 if (sc->sc_naddrs6)
1478                         SC2IFP(sc)->if_flags |= IFF_UP;
1479                 carp_setrun(sc, 0);
1480                 return (0);
1481         }
1482
1483         /* we have to do it by hands to check we won't match on us */
1484         ia_if = NULL; own = 0;
1485         for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
1486                 int i;
1487
1488                 for (i = 0; i < 4; i++) {
1489                         if ((sin6->sin6_addr.s6_addr32[i] &
1490                             ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
1491                             (ia->ia_addr.sin6_addr.s6_addr32[i] &
1492                             ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
1493                                 break;
1494                 }
1495                 /* and, yeah, we need a multicast-capable iface too */
1496                 if (ia->ia_ifp != SC2IFP(sc) &&
1497                     (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
1498                     (i == 4)) {
1499                         if (!ia_if)
1500                                 ia_if = ia;
1501                         if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
1502                             &ia->ia_addr.sin6_addr))
1503                                 own++;
1504                 }
1505         }
1506
1507         if (!ia_if)
1508                 return (EADDRNOTAVAIL);
1509         ia = ia_if;
1510         ifp = ia->ia_ifp;
1511
1512         if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
1513             (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp))
1514                 return (EADDRNOTAVAIL);
1515
1516         if (!sc->sc_naddrs6) {
1517                 im6o->im6o_multicast_ifp = ifp;
1518
1519                 /* join CARP multicast address */
1520                 bzero(&in6, sizeof(in6));
1521                 in6.s6_addr16[0] = htons(0xff02);
1522                 in6.s6_addr8[15] = 0x12;
1523                 if (in6_setscope(&in6, ifp, NULL) != 0)
1524                         goto cleanup;
1525                 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
1526                         goto cleanup;
1527                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
1528
1529                 /* join solicited multicast address */
1530                 bzero(&in6, sizeof(in6));
1531                 in6.s6_addr16[0] = htons(0xff02);
1532                 in6.s6_addr32[1] = 0;
1533                 in6.s6_addr32[2] = htonl(1);
1534                 in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3];
1535                 in6.s6_addr8[12] = 0xff;
1536                 if (in6_setscope(&in6, ifp, NULL) != 0)
1537                         goto cleanup;
1538                 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
1539                         goto cleanup;
1540                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
1541         }
1542
1543         if (!ifp->if_carp) {
1544                 MALLOC(cif, struct carp_if *, sizeof(*cif), M_CARP,
1545                     M_WAITOK|M_ZERO);
1546                 if ((error = ifpromisc(ifp, 1))) {
1547                         FREE(cif, M_CARP);
1548                         goto cleanup;
1549                 }
1550
1551                 cif->vhif_ifp = ifp;
1552                 TAILQ_INIT(&cif->vhif_vrs);
1553                 ifp->if_carp = cif;
1554         } else {
1555                 struct carp_softc *vr;
1556
1557                 cif = (struct carp_if *)ifp->if_carp;
1558                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1559                         if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
1560                                 error = EINVAL;
1561                                 goto cleanup;
1562                         }
1563                 }
1564         }
1565         sc->sc_ia6 = ia;
1566         sc->sc_carpdev = ifp;
1567
1568         { /* XXX prevent endless loop if already in queue */
1569         struct carp_softc *vr, *after = NULL;
1570         int myself = 0;
1571         cif = (struct carp_if *)ifp->if_carp;
1572
1573         TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1574                 if (vr == sc)
1575                         myself = 1;
1576                 if (vr->sc_vhid < sc->sc_vhid)
1577                         after = vr;
1578         }
1579
1580         if (!myself) {
1581                 /* We're trying to keep things in order */
1582                 if (after == NULL)
1583                         TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
1584                 else
1585                         TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
1586                 cif->vhif_nvrs++;
1587         }
1588         }
1589
1590         sc->sc_naddrs6++;
1591         SC2IFP(sc)->if_flags |= IFF_UP;
1592         if (own)
1593                 sc->sc_advskew = 0;
1594         carp_sc_state_locked(sc);
1595         carp_setrun(sc, 0);
1596
1597         return (0);
1598
1599 cleanup:
1600         /* clean up multicast memberships */
1601         if (!sc->sc_naddrs6) {
1602                 while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1603                         imm = LIST_FIRST(&im6o->im6o_memberships);
1604                         LIST_REMOVE(imm, i6mm_chain);
1605                         in6_leavegroup(imm);
1606                 }
1607         }
1608         return (error);
1609 }
1610
1611 static int
1612 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
1613 {
1614         int error = 0;
1615
1616         if (!--sc->sc_naddrs6) {
1617                 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
1618                 struct ip6_moptions *im6o = &sc->sc_im6o;
1619
1620                 callout_stop(&sc->sc_ad_tmo);
1621                 SC2IFP(sc)->if_flags &= ~(IFF_UP | IFF_RUNNING);
1622                 sc->sc_vhid = -1;
1623                 while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1624                         struct in6_multi_mship *imm =
1625                             LIST_FIRST(&im6o->im6o_memberships);
1626
1627                         LIST_REMOVE(imm, i6mm_chain);
1628                         in6_leavegroup(imm);
1629                 }
1630                 im6o->im6o_multicast_ifp = NULL;
1631                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
1632                 if (!--cif->vhif_nvrs) {
1633                         sc->sc_carpdev->if_carp = NULL;
1634                         FREE(cif, M_IFADDR);
1635                 }
1636         }
1637         return (error);
1638 }
1639 #endif /* INET6 */
1640
1641 static int
1642 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr)
1643 {
1644         struct carp_softc *sc = ifp->if_softc, *vr;
1645         struct carpreq carpr;
1646         struct ifaddr *ifa;
1647         struct ifreq *ifr;
1648         struct ifaliasreq *ifra;
1649         int error = 0;
1650
1651         ifa = (struct ifaddr *)addr;
1652         ifra = (struct ifaliasreq *)addr;
1653         ifr = (struct ifreq *)addr;
1654
1655         switch (cmd) {
1656         case SIOCSIFADDR:
1657                 switch (ifa->ifa_addr->sa_family) {
1658 #ifdef INET
1659                 case AF_INET:
1660                         SC2IFP(sc)->if_flags |= IFF_UP;
1661                         bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
1662                             sizeof(struct sockaddr));
1663                         error = carp_set_addr(sc, satosin(ifa->ifa_addr));
1664                         break;
1665 #endif /* INET */
1666 #ifdef INET6
1667                 case AF_INET6:
1668                         SC2IFP(sc)->if_flags |= IFF_UP;
1669                         error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
1670                         break;
1671 #endif /* INET6 */
1672                 default:
1673                         error = EAFNOSUPPORT;
1674                         break;
1675                 }
1676                 break;
1677
1678         case SIOCAIFADDR:
1679                 switch (ifa->ifa_addr->sa_family) {
1680 #ifdef INET
1681                 case AF_INET:
1682                         SC2IFP(sc)->if_flags |= IFF_UP;
1683                         bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
1684                             sizeof(struct sockaddr));
1685                         error = carp_set_addr(sc, satosin(&ifra->ifra_addr));
1686                         break;
1687 #endif /* INET */
1688 #ifdef INET6
1689                 case AF_INET6:
1690                         SC2IFP(sc)->if_flags |= IFF_UP;
1691                         error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr));
1692                         break;
1693 #endif /* INET6 */
1694                 default:
1695                         error = EAFNOSUPPORT;
1696                         break;
1697                 }
1698                 break;
1699
1700         case SIOCDIFADDR:
1701                 switch (ifa->ifa_addr->sa_family) {
1702 #ifdef INET
1703                 case AF_INET:
1704                         error = carp_del_addr(sc, satosin(&ifra->ifra_addr));
1705                         break;
1706 #endif /* INET */
1707 #ifdef INET6
1708                 case AF_INET6:
1709                         error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr));
1710                         break;
1711 #endif /* INET6 */
1712                 default:
1713                         error = EAFNOSUPPORT;
1714                         break;
1715                 }
1716                 break;
1717
1718         case SIOCSIFFLAGS:
1719                 if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) {
1720                         callout_stop(&sc->sc_ad_tmo);
1721                         callout_stop(&sc->sc_md_tmo);
1722                         callout_stop(&sc->sc_md6_tmo);
1723                         if (sc->sc_state == MASTER)
1724                                 carp_send_ad_locked(sc);
1725                         carp_set_state(sc, INIT);
1726                         carp_setrun(sc, 0);
1727                 } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) {
1728                         SC2IFP(sc)->if_flags |= IFF_UP;
1729                         carp_setrun(sc, 0);
1730                 }
1731                 break;
1732
1733         case SIOCSVH:
1734                 error = suser_cred(cr, NULL_CRED_OKAY);
1735                 if (error)
1736                         break;
1737                 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
1738                         break;
1739                 error = 1;
1740                 if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) {
1741                         switch (carpr.carpr_state) {
1742                         case BACKUP:
1743                                 callout_stop(&sc->sc_ad_tmo);
1744                                 carp_set_state(sc, BACKUP);
1745                                 carp_setrun(sc, 0);
1746                                 carp_setroute(sc, RTM_DELETE);
1747                                 break;
1748
1749                         case MASTER:
1750                                 carp_master_down_locked(sc);
1751                                 break;
1752
1753                         default:
1754                                 break;
1755                         }
1756                 }
1757                 if (carpr.carpr_vhid > 0) {
1758                         if (carpr.carpr_vhid > 255) {
1759                                 error = EINVAL;
1760                                 break;
1761                         }
1762                         if (sc->sc_carpdev) {
1763                                 struct carp_if *cif;
1764                                 cif = (struct carp_if *)sc->sc_carpdev->if_carp;
1765                                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1766                                         if (vr != sc &&
1767                                             vr->sc_vhid == carpr.carpr_vhid)
1768                                                 return EEXIST;
1769                                 }
1770                         }
1771                         sc->sc_vhid = carpr.carpr_vhid;
1772                         IF_LLADDR(sc->sc_ifp)[0] = 0;
1773                         IF_LLADDR(sc->sc_ifp)[1] = 0;
1774                         IF_LLADDR(sc->sc_ifp)[2] = 0x5e;
1775                         IF_LLADDR(sc->sc_ifp)[3] = 0;
1776                         IF_LLADDR(sc->sc_ifp)[4] = 1;
1777                         IF_LLADDR(sc->sc_ifp)[5] = sc->sc_vhid;
1778                         error--;
1779                 }
1780                 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) {
1781                         if (carpr.carpr_advskew >= 255) {
1782                                 error = EINVAL;
1783                                 break;
1784                         }
1785                         if (carpr.carpr_advbase > 255) {
1786                                 error = EINVAL;
1787                                 break;
1788                         }
1789                         sc->sc_advbase = carpr.carpr_advbase;
1790                         sc->sc_advskew = carpr.carpr_advskew;
1791                         error--;
1792                 }
1793                 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
1794                 if (error > 0) {
1795                         error = EINVAL;
1796                 } else {
1797                         error = 0;
1798                         carp_setrun(sc, 0);
1799                 }
1800                 break;
1801
1802         case SIOCGVH:
1803                 bzero(&carpr, sizeof(carpr));
1804                 carpr.carpr_state = sc->sc_state;
1805                 carpr.carpr_vhid = sc->sc_vhid;
1806                 carpr.carpr_advbase = sc->sc_advbase;
1807                 carpr.carpr_advskew = sc->sc_advskew;
1808                 error = suser_cred(cr, NULL_CRED_OKAY);
1809                 if (error == 0) {
1810                         bcopy(sc->sc_key, carpr.carpr_key,
1811                             sizeof(carpr.carpr_key));
1812                 }
1813                 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
1814                 break;
1815
1816         default:
1817                 error = EINVAL;
1818         }
1819         carp_hmac_prepare(sc);
1820         return (error);
1821 }
1822
1823 /*
1824  * XXX: this is looutput. We should eventually use it from there.
1825  */
1826 static int
1827 carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
1828     struct rtentry *rt)
1829 {
1830         uint32_t af;
1831
1832         M_ASSERTPKTHDR(m); /* check if we have the packet header */
1833
1834         if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
1835                 m_freem(m);
1836                 return (rt->rt_flags & RTF_BLACKHOLE ? 0 :
1837                         rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
1838         }
1839
1840         ifp->if_opackets++;
1841         ifp->if_obytes += m->m_pkthdr.len;
1842
1843         /* BPF writes need to be handled specially. */
1844         if (dst->sa_family == AF_UNSPEC) {
1845                 bcopy(dst->sa_data, &af, sizeof(af));
1846                 dst->sa_family = af;
1847         }
1848
1849 #if 1   /* XXX */
1850         switch (dst->sa_family) {
1851         case AF_INET:
1852         case AF_INET6:
1853         case AF_IPX:
1854         case AF_APPLETALK:
1855                 break;
1856
1857         default:
1858                 m_freem(m);
1859                 return (EAFNOSUPPORT);
1860         }
1861 #endif
1862         return (if_simloop(ifp, m, dst->sa_family, 0));
1863 }
1864
1865 /*
1866  * Start output on carp interface. This function should never be called.
1867  */
1868 static void
1869 carp_start(struct ifnet *ifp)
1870 {
1871 #ifdef DEBUG
1872         kprintf("%s: start called\n", ifp->if_xname);
1873 #endif
1874 }
1875
1876 int
1877 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
1878     struct rtentry *rt)
1879 {
1880         struct m_tag *mtag;
1881         struct carp_softc *sc;
1882         struct ifnet *carp_ifp;
1883         struct ether_header *eh;
1884
1885         if (!sa)
1886                 return (0);
1887
1888         switch (sa->sa_family) {
1889 #ifdef INET
1890         case AF_INET:
1891                 break;
1892 #endif /* INET */
1893 #ifdef INET6
1894         case AF_INET6:
1895                 break;
1896 #endif /* INET6 */
1897         default:
1898                 return (0);
1899         }
1900
1901         mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
1902         if (mtag == NULL)
1903                 return (0);
1904
1905         bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *));
1906         sc = carp_ifp->if_softc;
1907
1908         /* Set the source MAC address to Virtual Router MAC Address */
1909         switch (ifp->if_type) {
1910         case IFT_ETHER:
1911         case IFT_L2VLAN:
1912                 eh = mtod(m, struct ether_header *);
1913                 eh->ether_shost[0] = 0;
1914                 eh->ether_shost[1] = 0;
1915                 eh->ether_shost[2] = 0x5e;
1916                 eh->ether_shost[3] = 0;
1917                 eh->ether_shost[4] = 1;
1918                 eh->ether_shost[5] = sc->sc_vhid;
1919                 break;
1920
1921         default:
1922                 if_printf(ifp, "carp is not supported for this "
1923                           "interface type\n");
1924                 return (EOPNOTSUPP);
1925         }
1926         return (0);
1927 }
1928
1929 static void
1930 carp_set_state(struct carp_softc *sc, int state)
1931 {
1932         if (sc->sc_state == state)
1933                 return;
1934
1935         sc->sc_state = state;
1936         switch (state) {
1937         case BACKUP:
1938                 SC2IFP(sc)->if_link_state = LINK_STATE_DOWN;
1939                 break;
1940
1941         case MASTER:
1942                 SC2IFP(sc)->if_link_state = LINK_STATE_UP;
1943                 break;
1944
1945         default:
1946                 SC2IFP(sc)->if_link_state = LINK_STATE_UNKNOWN;
1947                 break;
1948         }
1949         rt_ifmsg(SC2IFP(sc));
1950 }
1951
1952 void
1953 carp_carpdev_state(void *v)
1954 {
1955         struct carp_if *cif = v;
1956
1957         carp_carpdev_state_locked(cif);
1958 }
1959
1960 static void
1961 carp_carpdev_state_locked(struct carp_if *cif)
1962 {
1963         struct carp_softc *sc;
1964
1965         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list)
1966                 carp_sc_state_locked(sc);
1967 }
1968
1969 static void
1970 carp_sc_state_locked(struct carp_softc *sc)
1971 {
1972         if (!(sc->sc_carpdev->if_flags & IFF_UP)) {
1973                 sc->sc_flags_backup = SC2IFP(sc)->if_flags;
1974                 SC2IFP(sc)->if_flags &= ~(IFF_UP | IFF_RUNNING);
1975                 callout_stop(&sc->sc_ad_tmo);
1976                 callout_stop(&sc->sc_md_tmo);
1977                 callout_stop(&sc->sc_md6_tmo);
1978                 carp_set_state(sc, INIT);
1979                 carp_setrun(sc, 0);
1980                 if (!sc->sc_suppress) {
1981                         carp_suppress_preempt++;
1982                         if (carp_suppress_preempt == 1)
1983                                 carp_send_ad_all();
1984                 }
1985                 sc->sc_suppress = 1;
1986         } else {
1987                 SC2IFP(sc)->if_flags |= sc->sc_flags_backup;
1988                 carp_set_state(sc, INIT);
1989                 carp_setrun(sc, 0);
1990                 if (sc->sc_suppress)
1991                         carp_suppress_preempt--;
1992                 sc->sc_suppress = 0;
1993         }
1994 }
1995
1996 static int
1997 carp_modevent(module_t mod, int type, void *data)
1998 {
1999         switch (type) {
2000         case MOD_LOAD:
2001                 LIST_INIT(&carpif_list);
2002                 carp_ifdetach_event =
2003                 EVENTHANDLER_REGISTER(ifnet_detach_event, carp_ifdetach, NULL,
2004                                       EVENTHANDLER_PRI_ANY);
2005                 if_clone_attach(&carp_cloner);
2006                 break;
2007
2008         case MOD_UNLOAD:
2009                 EVENTHANDLER_DEREGISTER(ifnet_detach_event,
2010                                         carp_ifdetach_event);
2011                 if_clone_detach(&carp_cloner);
2012                 break;
2013
2014         default:
2015                 return (EINVAL);
2016         }
2017         return (0);
2018 }
2019
2020 static moduledata_t carp_mod = {
2021         "carp",
2022         carp_modevent,
2023         0
2024 };
2025 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);