- Split if_clone.c out of if.c, license in if.c is attached to if_clone.c
[dragonfly.git] / sys / netinet / ip_carp.c
1 /*
2  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
3  * Copyright (c) 2003 Ryan McBride. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
18  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
24  * THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 /*
27  * $FreeBSD: src/sys/netinet/ip_carp.c,v 1.48 2007/02/02 09:39:09 glebius Exp $
28  * $DragonFly: src/sys/netinet/ip_carp.c,v 1.6 2008/01/11 11:59:40 sephe Exp $
29  */
30
31 #include "opt_carp.h"
32 /*#include "opt_bpf.h"*/
33 #include "opt_inet.h"
34 #include "opt_inet6.h"
35
36 #include <sys/types.h>
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/conf.h>
40 #include <sys/kernel.h>
41 #include <machine/limits.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/module.h>
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
49 #include <sys/signalvar.h>
50 #include <sys/filio.h>
51 #include <sys/sockio.h>
52 #include <sys/in_cksum.h>
53 #include <sys/socket.h>
54 #include <sys/vnode.h>
55
56 #include <machine/stdarg.h>
57
58 #include <net/bpf.h>
59 #include <net/ethernet.h>
60 #include <net/if.h>
61 #include <net/if_dl.h>
62 #include <net/if_types.h>
63 #include <net/route.h>
64 #include <net/if_clone.h>
65
66 #ifdef INET
67 #include <netinet/in.h>
68 #include <netinet/in_var.h>
69 #include <netinet/in_systm.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip_var.h>
72 #include <netinet/if_ether.h>
73 #endif
74
75 #ifdef INET6
76 #include <netinet/icmp6.h>
77 #include <netinet/ip6.h>
78 #include <netinet6/ip6_var.h>
79 #include <netinet6/scope6_var.h>
80 #include <netinet6/nd6.h>
81 #endif
82
83 #include <crypto/sha1.h>
84 #include <netinet/ip_carp.h>
85 #include <sys/lock.h>
86
87 #define CARP_IFNAME     "carp"
88 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
89 static MALLOC_DEFINE(M_IFNET, "IFNET", "IFNET CARP?");
90 SYSCTL_DECL(_net_inet_carp);
91
92 struct carp_softc {
93         struct ifnet            *sc_ifp;        /* Interface clue */
94         struct ifnet            *sc_carpdev;    /* Pointer to parent interface */
95         struct in_ifaddr        *sc_ia;         /* primary iface address */
96         struct ip_moptions       sc_imo;
97 #ifdef INET6
98         struct in6_ifaddr       *sc_ia6;        /* primary iface address v6 */
99         struct ip6_moptions      sc_im6o;
100 #endif /* INET6 */
101         TAILQ_ENTRY(carp_softc)  sc_list;
102
103         enum { INIT = 0, BACKUP, MASTER }       sc_state;
104
105         int                      sc_flags_backup;
106         int                      sc_suppress;
107
108         int                      sc_sendad_errors;
109 #define CARP_SENDAD_MAX_ERRORS  3
110         int                      sc_sendad_success;
111 #define CARP_SENDAD_MIN_SUCCESS 3
112
113         int                      sc_vhid;
114         int                      sc_advskew;
115         int                      sc_naddrs;
116         int                      sc_naddrs6;
117         int                      sc_advbase;    /* seconds */
118         int                      sc_init_counter;
119         u_int64_t                sc_counter;
120
121         /* authentication */
122 #define CARP_HMAC_PAD   64
123         unsigned char sc_key[CARP_KEY_LEN];
124         unsigned char sc_pad[CARP_HMAC_PAD];
125         SHA1_CTX sc_sha1;
126
127         struct callout           sc_ad_tmo;     /* advertisement timeout */
128         struct callout           sc_md_tmo;     /* master down timeout */
129         struct callout           sc_md6_tmo;    /* master down timeout */
130         
131         LIST_ENTRY(carp_softc)   sc_next;       /* Interface clue */
132 };
133 #define SC2IFP(sc)      ((sc)->sc_ifp)
134
135 int carp_suppress_preempt = 0;
136 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 };    /* XXX for now */
137 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
138     &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
139 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
140     &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
141 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
142     &carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
143 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
144     &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
145 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
146     &carp_suppress_preempt, 0, "Preemption is suppressed");
147
148 struct carpstats carpstats;
149 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
150     &carpstats, carpstats,
151     "CARP statistics (struct carpstats, netinet/ip_carp.h)");
152
153 struct carp_if {
154         TAILQ_HEAD(, carp_softc) vhif_vrs;
155         int vhif_nvrs;
156
157         struct ifnet    *vhif_ifp;
158         struct lock     vhif_lock;
159 };
160
161 /* Get carp_if from softc. Valid after carp_set_addr{,6}. */
162 #define SC2CIF(sc)              ((struct carp_if *)(sc)->sc_carpdev->if_carp)
163
164 #define CARP_LOCK_INIT(cif)     lockinit(&(cif)->vhif_lock, "carp_if", 0,  LK_NOWAIT);
165 #define CARP_LOCK_DESTROY(cif)  ;
166 #define CARP_LOCK_ASSERT(cif)   ;
167 #define CARP_LOCK(cif)          lockmgr(&(cif)->vhif_lock, LK_EXCLUSIVE);
168 #define CARP_UNLOCK(cif)        lockmgr(&(cif)->vhif_lock, LK_RELEASE);
169
170 #define CARP_SCLOCK(sc)         lockmgr(&SC2CIF(sc)->vhif_lock, LK_EXCLUSIVE);
171 #define CARP_SCUNLOCK(sc)       lockmgr(&SC2CIF(sc)->vhif_lock, LK_RELEASE);
172 #define CARP_SCLOCK_ASSERT(sc)  ;
173
174 #define CARP_LOG(...)   do {                            \
175         if (carp_opts[CARPCTL_LOG] > 0)                 \
176                 log(LOG_INFO, __VA_ARGS__);             \
177 } while (0)
178
179 #define CARP_DEBUG(...) do {                            \
180         if (carp_opts[CARPCTL_LOG] > 1)                 \
181                 log(LOG_DEBUG, __VA_ARGS__);            \
182 } while (0)
183
184 static void     carp_hmac_prepare(struct carp_softc *);
185 static void     carp_hmac_generate(struct carp_softc *, u_int32_t *,
186                     unsigned char *);
187 static int      carp_hmac_verify(struct carp_softc *, u_int32_t *,
188                     unsigned char *);
189 static void     carp_setroute(struct carp_softc *, int);
190 static void     carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
191 static int      carp_clone_create(struct if_clone *, int);
192 static void     carp_clone_destroy(struct ifnet *);
193 static void     carpdetach(struct carp_softc *, int);
194 static int      carp_prepare_ad(struct mbuf *, struct carp_softc *,
195                     struct carp_header *);
196 static void     carp_send_ad_all(void);
197 static void     carp_send_ad(void *);
198 static void     carp_send_ad_locked(struct carp_softc *);
199 static void     carp_send_arp(struct carp_softc *);
200 static void     carp_master_down(void *);
201 static void     carp_master_down_locked(struct carp_softc *);
202 static int      carp_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
203 static int      carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *,
204                     struct rtentry *);
205 static void     carp_start(struct ifnet *);
206 static void     carp_setrun(struct carp_softc *, sa_family_t);
207 static void     carp_set_state(struct carp_softc *, int);
208 static int      carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
209 enum    { CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
210
211 static void     carp_multicast_cleanup(struct carp_softc *);
212 static int      carp_set_addr(struct carp_softc *, struct sockaddr_in *);
213 static int      carp_del_addr(struct carp_softc *, struct sockaddr_in *);
214 static void     carp_carpdev_state_locked(struct carp_if *);
215 static void     carp_sc_state_locked(struct carp_softc *);
216 #ifdef INET6
217 static void     carp_send_na(struct carp_softc *);
218 static int      carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
219 static int      carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
220 static void     carp_multicast6_cleanup(struct carp_softc *);
221 #endif
222
223 static LIST_HEAD(, carp_softc) carpif_list;
224
225 struct if_clone carp_cloner = IF_CLONE_INITIALIZER(CARP_IFNAME, carp_clone_create, carp_clone_destroy, 0, IF_MAXUNIT);
226
227 static eventhandler_tag if_detach_event_tag;
228
229 static __inline u_int16_t
230 carp_cksum(struct mbuf *m, int len)
231 {
232         return (in_cksum(m, len));
233 }
234
235 static void
236 carp_hmac_prepare(struct carp_softc *sc)
237 {
238         u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
239         u_int8_t vhid = sc->sc_vhid & 0xff;
240         struct ifaddr *ifa;
241         int i;
242 #ifdef INET6
243         struct in6_addr in6;
244 #endif
245
246         if (sc->sc_carpdev)
247                 CARP_SCLOCK(sc);
248
249         /* XXX: possible race here */
250
251         /* compute ipad from key */
252         bzero(sc->sc_pad, sizeof(sc->sc_pad));
253         bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
254         for (i = 0; i < sizeof(sc->sc_pad); i++)
255                 sc->sc_pad[i] ^= 0x36;
256
257         /* precompute first part of inner hash */
258         SHA1Init(&sc->sc_sha1);
259         SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
260         SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
261         SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
262         SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
263 #ifdef INET
264         TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
265                 if (ifa->ifa_addr->sa_family == AF_INET)
266                         SHA1Update(&sc->sc_sha1,
267                             (void *)&ifatoia(ifa)->ia_addr.sin_addr.s_addr,
268                             sizeof(struct in_addr));
269         }
270 #endif /* INET */
271 #ifdef INET6
272         TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
273                 if (ifa->ifa_addr->sa_family == AF_INET6) {
274                         in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
275                         in6_clearscope(&in6);
276                         SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6));
277                 }
278         }
279 #endif /* INET6 */
280
281         /* convert ipad to opad */
282         for (i = 0; i < sizeof(sc->sc_pad); i++)
283                 sc->sc_pad[i] ^= 0x36 ^ 0x5c;
284
285         if (sc->sc_carpdev)
286                 CARP_SCUNLOCK(sc);
287 }
288
289 static void
290 carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2],
291     unsigned char md[20])
292 {
293         SHA1_CTX sha1ctx;
294
295         /* fetch first half of inner hash */
296         bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
297
298         SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
299         SHA1Final(md, &sha1ctx);
300
301         /* outer hash */
302         SHA1Init(&sha1ctx);
303         SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
304         SHA1Update(&sha1ctx, md, 20);
305         SHA1Final(md, &sha1ctx);
306 }
307
308 static int
309 carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2],
310     unsigned char md[20])
311 {
312         unsigned char md2[20];
313
314         CARP_SCLOCK_ASSERT(sc);
315
316         carp_hmac_generate(sc, counter, md2);
317
318         return (bcmp(md, md2, sizeof(md2)));
319 }
320
321 static void
322 carp_setroute(struct carp_softc *sc, int cmd)
323 {
324         struct ifaddr *ifa;
325
326         if (sc->sc_carpdev)
327                 CARP_SCLOCK_ASSERT(sc);
328
329         crit_enter();
330         TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
331                 if (ifa->ifa_addr->sa_family == AF_INET &&
332                     sc->sc_carpdev != NULL) {
333                         int count = carp_addrcount(
334                             (struct carp_if *)sc->sc_carpdev->if_carp,
335                             ifatoia(ifa), CARP_COUNT_MASTER);
336
337                         if ((cmd == RTM_ADD && count == 1) ||
338                             (cmd == RTM_DELETE && count == 0))
339                                 rtinit(ifa, cmd, RTF_UP | RTF_HOST);
340                 }
341 #ifdef INET6
342                 if (ifa->ifa_addr->sa_family == AF_INET6) {
343                         if (cmd == RTM_ADD)
344                                 in6_ifaddloop(ifa);
345                         else
346                                 in6_ifremloop(ifa);
347                 }
348 #endif /* INET6 */
349         }
350         crit_exit();
351
352 }
353
354 static int
355 carp_clone_create(struct if_clone *ifc, int unit)
356 {
357
358         struct carp_softc *sc;
359         struct ifnet *ifp;
360         
361         MALLOC(sc, struct carp_softc *, sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); 
362         ifp = SC2IFP(sc) = kmalloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO); 
363
364         sc->sc_flags_backup = 0;
365         sc->sc_suppress = 0;
366         sc->sc_advbase = CARP_DFLTINTV;
367         sc->sc_vhid = -1;       /* required setting */
368         sc->sc_advskew = 0;
369         sc->sc_init_counter = 1;
370         sc->sc_naddrs = sc->sc_naddrs6 = 0; /* M_ZERO? */
371
372 #ifdef INET6
373         sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
374 #endif
375
376 /*      sc->sc_imo.imo_membership = kmalloc((sizeof(struct in_multi) * IP_MAX_MEMBERSHIPS), M_CARP,M_WAITOK);*/
377 /*
378         sc->sc_imo.imo_max_memberships = IP_MAX_MEMBERSHIPS;
379         sc->sc_imo.imo_multicast_vif = -1;
380 */
381         callout_init(&sc->sc_ad_tmo);
382         callout_init(&sc->sc_md_tmo);
383         callout_init(&sc->sc_md6_tmo);
384
385         ifp->if_softc = sc;
386         if_initname(ifp, CARP_IFNAME, unit);    
387         ifp->if_mtu = ETHERMTU;
388         ifp->if_flags = IFF_LOOPBACK;
389         ifp->if_ioctl = carp_ioctl;
390         ifp->if_output = carp_looutput;
391         ifp->if_start = carp_start;
392         ifp->if_type = IFT_CARP;
393         ifp->if_snd.ifq_maxlen = ifqmaxlen;
394         ifp->if_hdrlen = 0;
395         if_attach(ifp, NULL);
396         bpfattach(ifp, DLT_NULL, sizeof(u_int));
397
398         crit_enter();
399         LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
400         crit_exit();
401
402         return (0);
403 }
404
405 static void
406 carp_clone_destroy(struct ifnet *ifp)
407 {
408         struct carp_softc *sc = ifp->if_softc;
409
410         if (sc->sc_carpdev)
411                 CARP_SCLOCK(sc);
412         carpdetach(sc, 1);      /* Returns unlocked. */
413
414         crit_enter();
415         LIST_REMOVE(sc, sc_next);
416         crit_exit();
417         bpfdetach(ifp);
418         if_detach(ifp);
419 /*      if_free_type(ifp, IFT_ETHER);*/
420 /*      kfree(sc->sc_imo.imo_membership, M_CARP); */
421         kfree(sc, M_CARP);
422 }
423
424 /*
425  * This function can be called on CARP interface destroy path,
426  * and in case of the removal of the underlying interface as
427  * well. We differentiate these two cases. In the latter case
428  * we do not cleanup our multicast memberships, since they
429  * are already freed. Also, in the latter case we do not
430  * release the lock on return, because the function will be
431  * called once more, for another CARP instance on the same
432  * interface.
433  */
434 static void
435 carpdetach(struct carp_softc *sc, int unlock)
436 {
437         struct carp_if *cif;
438
439         callout_stop(&sc->sc_ad_tmo);
440         callout_stop(&sc->sc_md_tmo);
441         callout_stop(&sc->sc_md6_tmo);
442
443         if (sc->sc_suppress)
444                 carp_suppress_preempt--;
445         sc->sc_suppress = 0;
446
447         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
448                 carp_suppress_preempt--;
449         sc->sc_sendad_errors = 0;
450
451         carp_set_state(sc, INIT);
452         SC2IFP(sc)->if_flags &= ~IFF_UP;
453         carp_setrun(sc, 0);
454         if (unlock)
455                 carp_multicast_cleanup(sc);
456 #ifdef INET6
457         carp_multicast6_cleanup(sc);
458 #endif
459
460         if (sc->sc_carpdev != NULL) {
461                 cif = (struct carp_if *)sc->sc_carpdev->if_carp;
462                 CARP_LOCK_ASSERT(cif);
463                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
464                 if (!--cif->vhif_nvrs) {
465                         ifpromisc(sc->sc_carpdev, 0);
466                         sc->sc_carpdev->if_carp = NULL;
467                         CARP_LOCK_DESTROY(cif);
468                         FREE(cif, M_IFADDR);
469                 } else if (unlock)
470                         CARP_UNLOCK(cif);
471                 sc->sc_carpdev = NULL;
472         }
473 }
474
475 /* Detach an interface from the carp. */
476 static void
477 carp_ifdetach(void *arg __unused, struct ifnet *ifp)
478 {
479         struct carp_if *cif = (struct carp_if *)ifp->if_carp;
480         struct carp_softc *sc, *nextsc;
481
482         if (cif == NULL)
483                 return;
484
485         /*
486          * XXX: At the end of for() cycle the lock will be destroyed.
487          */
488         CARP_LOCK(cif);
489         for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
490                 nextsc = TAILQ_NEXT(sc, sc_list);
491                 carpdetach(sc, 0);
492         }
493         CARP_UNLOCK(cif);
494 }
495
496 /*
497  * process input packet.
498  * we have rearranged checks order compared to the rfc,
499  * but it seems more efficient this way or not possible otherwise.
500  */
501 void
502 carp_input(struct mbuf *m, int hlen)
503 {
504         struct ip *ip = mtod(m, struct ip *);
505         struct carp_header *ch;
506         int iplen, len;
507
508         carpstats.carps_ipackets++;
509
510         if (!carp_opts[CARPCTL_ALLOW]) {
511                 m_freem(m);
512                 return;
513         }
514
515         /* check if received on a valid carp interface */
516         if (m->m_pkthdr.rcvif->if_carp == NULL) {
517                 carpstats.carps_badif++;
518                 CARP_LOG("carp_input: packet received on non-carp "
519                     "interface: %s\n",
520                     m->m_pkthdr.rcvif->if_xname);
521                 m_freem(m);
522                 return;
523         }
524
525         /* verify that the IP TTL is 255.  */
526         if (ip->ip_ttl != CARP_DFLTTL) {
527                 carpstats.carps_badttl++;
528                 CARP_LOG("carp_input: received ttl %d != 255i on %s\n",
529                     ip->ip_ttl,
530                     m->m_pkthdr.rcvif->if_xname);
531                 m_freem(m);
532                 return;
533         }
534
535         iplen = ip->ip_hl << 2;
536
537         if (m->m_pkthdr.len < iplen + sizeof(*ch)) {
538                 carpstats.carps_badlen++;
539                 CARP_LOG("carp_input: received len %zd < "
540                     "sizeof(struct carp_header)\n",
541                     m->m_len - sizeof(struct ip));
542                 m_freem(m);
543                 return;
544         }
545
546         if (iplen + sizeof(*ch) < m->m_len) {
547                 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) {
548                         carpstats.carps_hdrops++;
549                         CARP_LOG("carp_input: pullup failed\n");
550                         return;
551                 }
552                 ip = mtod(m, struct ip *);
553         }
554         ch = (struct carp_header *)((char *)ip + iplen);
555
556         /*
557          * verify that the received packet length is
558          * equal to the CARP header
559          */
560         len = iplen + sizeof(*ch);
561         if (len > m->m_pkthdr.len) {
562                 carpstats.carps_badlen++;
563                 CARP_LOG("carp_input: packet too short %d on %s\n",
564                     m->m_pkthdr.len,
565                     m->m_pkthdr.rcvif->if_xname);
566                 m_freem(m);
567                 return;
568         }
569
570         if ((m = m_pullup(m, len)) == NULL) {
571                 carpstats.carps_hdrops++;
572                 return;
573         }
574         ip = mtod(m, struct ip *);
575         ch = (struct carp_header *)((char *)ip + iplen);
576
577         /* verify the CARP checksum */
578         m->m_data += iplen;
579         if (carp_cksum(m, len - iplen)) {
580                 carpstats.carps_badsum++;
581                 CARP_LOG("carp_input: checksum failed on %s\n",
582                     m->m_pkthdr.rcvif->if_xname);
583                 m_freem(m);
584                 return;
585         }
586         m->m_data -= iplen;
587
588         carp_input_c(m, ch, AF_INET);
589 }
590
591 #ifdef INET6
592 int
593 carp6_input(struct mbuf **mp, int *offp, int proto)
594 {
595         struct mbuf *m = *mp;
596         struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
597         struct carp_header *ch;
598         u_int len;
599
600         carpstats.carps_ipackets6++;
601
602         if (!carp_opts[CARPCTL_ALLOW]) {
603                 m_freem(m);
604                 return (IPPROTO_DONE);
605         }
606
607         /* check if received on a valid carp interface */
608         if (m->m_pkthdr.rcvif->if_carp == NULL) {
609                 carpstats.carps_badif++;
610                 CARP_LOG("carp6_input: packet received on non-carp "
611                     "interface: %s\n",
612                     m->m_pkthdr.rcvif->if_xname);
613                 m_freem(m);
614                 return (IPPROTO_DONE);
615         }
616
617         /* verify that the IP TTL is 255 */
618         if (ip6->ip6_hlim != CARP_DFLTTL) {
619                 carpstats.carps_badttl++;
620                 CARP_LOG("carp6_input: received ttl %d != 255 on %s\n",
621                     ip6->ip6_hlim,
622                     m->m_pkthdr.rcvif->if_xname);
623                 m_freem(m);
624                 return (IPPROTO_DONE);
625         }
626
627         /* verify that we have a complete carp packet */
628         len = m->m_len;
629         IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
630         if (ch == NULL) {
631                 carpstats.carps_badlen++;
632                 CARP_LOG("carp6_input: packet size %u too small\n", len);
633                 return (IPPROTO_DONE);
634         }
635
636
637         /* verify the CARP checksum */
638         m->m_data += *offp;
639         if (carp_cksum(m, sizeof(*ch))) {
640                 carpstats.carps_badsum++;
641                 CARP_LOG("carp6_input: checksum failed, on %s\n",
642                     m->m_pkthdr.rcvif->if_xname);
643                 m_freem(m);
644                 return (IPPROTO_DONE);
645         }
646         m->m_data -= *offp;
647
648         carp_input_c(m, ch, AF_INET6);
649         return (IPPROTO_DONE);
650 }
651 #endif /* INET6 */
652
653 static void
654 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
655 {
656         struct ifnet *ifp = m->m_pkthdr.rcvif;
657         struct carp_softc *sc;
658         u_int64_t tmp_counter;
659         struct timeval sc_tv, ch_tv;
660
661         /* verify that the VHID is valid on the receiving interface */
662         CARP_LOCK(ifp->if_carp);
663         TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list)
664                 if (sc->sc_vhid == ch->carp_vhid)
665                         break;
666
667         if (!sc || !((SC2IFP(sc)->if_flags & IFF_UP) && (SC2IFP(sc)->if_flags & IFF_RUNNING))) {
668                 carpstats.carps_badvhid++;
669                 CARP_UNLOCK(ifp->if_carp);
670                 m_freem(m);
671                 return;
672         }
673
674         getmicrotime(&SC2IFP(sc)->if_lastchange);
675         SC2IFP(sc)->if_ipackets++;
676         SC2IFP(sc)->if_ibytes += m->m_pkthdr.len;
677
678         if (SC2IFP(sc)->if_bpf) {
679                 struct ip *ip = mtod(m, struct ip *);
680
681                 /* BPF wants net byte order */
682                 ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2));
683                 ip->ip_off = htons(ip->ip_off);
684                 bpf_mtap(SC2IFP(sc)->if_bpf, m);
685         }
686
687         /* verify the CARP version. */
688         if (ch->carp_version != CARP_VERSION) {
689                 carpstats.carps_badver++;
690                 SC2IFP(sc)->if_ierrors++;
691                 CARP_UNLOCK(ifp->if_carp);
692                 CARP_LOG("%s; invalid version %d\n",
693                     SC2IFP(sc)->if_xname,
694                     ch->carp_version);
695                 m_freem(m);
696                 return;
697         }
698
699         /* verify the hash */
700         if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
701                 carpstats.carps_badauth++;
702                 SC2IFP(sc)->if_ierrors++;
703                 CARP_UNLOCK(ifp->if_carp);
704                 CARP_LOG("%s: incorrect hash\n", SC2IFP(sc)->if_xname);
705                 m_freem(m);
706                 return;
707         }
708
709         tmp_counter = ntohl(ch->carp_counter[0]);
710         tmp_counter = tmp_counter<<32;
711         tmp_counter += ntohl(ch->carp_counter[1]);
712
713         /* XXX Replay protection goes here */
714
715         sc->sc_init_counter = 0;
716         sc->sc_counter = tmp_counter;
717
718         sc_tv.tv_sec = sc->sc_advbase;
719         if (carp_suppress_preempt && sc->sc_advskew <  240)
720                 sc_tv.tv_usec = 240 * 1000000 / 256;
721         else
722                 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
723         ch_tv.tv_sec = ch->carp_advbase;
724         ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
725
726         switch (sc->sc_state) {
727         case INIT:
728                 break;
729         case MASTER:
730                 /*
731                  * If we receive an advertisement from a master who's going to
732                  * be more frequent than us, go into BACKUP state.
733                  */
734                 if (timevalcmp(&sc_tv, &ch_tv, >) ||
735                     timevalcmp(&sc_tv, &ch_tv, ==)) {
736                         callout_stop(&sc->sc_ad_tmo);
737                         CARP_DEBUG("%s: MASTER -> BACKUP "
738                            "(more frequent advertisement received)\n",
739                            SC2IFP(sc)->if_xname);
740                         carp_set_state(sc, BACKUP);
741                         carp_setrun(sc, 0);
742                         carp_setroute(sc, RTM_DELETE);
743                 }
744                 break;
745         case BACKUP:
746                 /*
747                  * If we're pre-empting masters who advertise slower than us,
748                  * and this one claims to be slower, treat him as down.
749                  */
750                 if (carp_opts[CARPCTL_PREEMPT] &&
751                     timevalcmp(&sc_tv, &ch_tv, <)) {
752                         CARP_DEBUG("%s: BACKUP -> MASTER "
753                             "(preempting a slower master)\n",
754                             SC2IFP(sc)->if_xname);
755                         carp_master_down_locked(sc);
756                         break;
757                 }
758
759                 /*
760                  *  If the master is going to advertise at such a low frequency
761                  *  that he's guaranteed to time out, we'd might as well just
762                  *  treat him as timed out now.
763                  */
764                 sc_tv.tv_sec = sc->sc_advbase * 3;
765                 if (timevalcmp(&sc_tv, &ch_tv, <)) {
766                         CARP_DEBUG("%s: BACKUP -> MASTER "
767                             "(master timed out)\n",
768                             SC2IFP(sc)->if_xname);
769                         carp_master_down_locked(sc);
770                         break;
771                 }
772
773                 /*
774                  * Otherwise, we reset the counter and wait for the next
775                  * advertisement.
776                  */
777                 carp_setrun(sc, af);
778                 break;
779         }
780
781         CARP_UNLOCK(ifp->if_carp);
782
783         m_freem(m);
784         return;
785 }
786
787 static int
788 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
789 {
790         struct m_tag *mtag;
791         struct ifnet *ifp = SC2IFP(sc);
792
793         if (sc->sc_init_counter) {
794                 /* this could also be seconds since unix epoch */
795                 sc->sc_counter = karc4random();
796                 sc->sc_counter = sc->sc_counter << 32;
797                 sc->sc_counter += karc4random();
798         } else
799                 sc->sc_counter++;
800
801         ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
802         ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
803
804         carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
805
806         /* Tag packet for carp_output */
807         mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), MB_DONTWAIT);
808         if (mtag == NULL) {
809                 m_freem(m);
810                 SC2IFP(sc)->if_oerrors++;
811                 return (ENOMEM);
812         }
813         bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *));
814         m_tag_prepend(m, mtag);
815
816         return (0);
817 }
818
819 static void
820 carp_send_ad_all(void)
821 {
822         struct carp_softc *sc;
823
824         LIST_FOREACH(sc, &carpif_list, sc_next) {
825                 if (sc->sc_carpdev == NULL)
826                         continue;
827                 CARP_SCLOCK(sc);
828                 if ((SC2IFP(sc)->if_flags & IFF_UP) && (SC2IFP(sc)->if_flags & IFF_RUNNING) &&
829                      sc->sc_state == MASTER)
830                         carp_send_ad_locked(sc);
831                 CARP_SCUNLOCK(sc);
832         }
833 }
834
835 static void
836 carp_send_ad(void *v)
837 {
838         struct carp_softc *sc = v;
839
840         CARP_SCLOCK(sc);
841         carp_send_ad_locked(sc);
842         CARP_SCUNLOCK(sc);
843 }
844
845 static void
846 carp_send_ad_locked(struct carp_softc *sc)
847 {
848         struct carp_header ch;
849         struct timeval tv;
850         struct carp_header *ch_ptr;
851         struct mbuf *m;
852         int len, advbase, advskew;
853
854
855         /* bow out if we've lost our UPness or RUNNINGuiness */
856         if (!((SC2IFP(sc)->if_flags & IFF_UP) && (SC2IFP(sc)->if_flags & IFF_RUNNING))) {
857                 advbase = 255;
858                 advskew = 255;
859         } else {
860                 advbase = sc->sc_advbase;
861                 if (!carp_suppress_preempt || sc->sc_advskew > 240)
862                         advskew = sc->sc_advskew;
863                 else
864                         advskew = 240;
865                 tv.tv_sec = advbase;
866                 tv.tv_usec = advskew * 1000000 / 256;
867         }
868
869         ch.carp_version = CARP_VERSION;
870         ch.carp_type = CARP_ADVERTISEMENT;
871         ch.carp_vhid = sc->sc_vhid;
872         ch.carp_advbase = advbase;
873         ch.carp_advskew = advskew;
874         ch.carp_authlen = 7;    /* XXX DEFINE */
875         ch.carp_pad1 = 0;       /* must be zero */
876         ch.carp_cksum = 0;
877
878 #ifdef INET
879         if (sc->sc_ia) {
880                 struct ip *ip;
881
882                 MGETHDR(m, M_NOWAIT, MT_HEADER);
883                 if (m == NULL) {
884                         SC2IFP(sc)->if_oerrors++;
885                         carpstats.carps_onomem++;
886                         /* XXX maybe less ? */
887                         if (advbase != 255 || advskew != 255)
888                                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
889                                     carp_send_ad, sc);
890                         return;
891                 }
892                 len = sizeof(*ip) + sizeof(ch);
893                 m->m_pkthdr.len = len;
894                 m->m_pkthdr.rcvif = NULL;
895                 m->m_len = len;
896                 MH_ALIGN(m, m->m_len);
897                 m->m_flags |= M_MCAST;
898                 ip = mtod(m, struct ip *);
899                 ip->ip_v = IPVERSION;
900                 ip->ip_hl = sizeof(*ip) >> 2;
901                 ip->ip_tos = IPTOS_LOWDELAY;
902                 ip->ip_len = len;
903                 ip->ip_id = ip_newid();
904                 ip->ip_off = IP_DF;
905                 ip->ip_ttl = CARP_DFLTTL;
906                 ip->ip_p = IPPROTO_CARP;
907                 ip->ip_sum = 0;
908                 ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr;
909                 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
910
911                 ch_ptr = (struct carp_header *)(&ip[1]);
912                 bcopy(&ch, ch_ptr, sizeof(ch));
913                 if (carp_prepare_ad(m, sc, ch_ptr))
914                         return;
915
916                 m->m_data += sizeof(*ip);
917                 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
918                 m->m_data -= sizeof(*ip);
919
920                 getmicrotime(&SC2IFP(sc)->if_lastchange);
921                 SC2IFP(sc)->if_opackets++;
922                 SC2IFP(sc)->if_obytes += len;
923                 carpstats.carps_opackets++;
924
925                 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) {
926                         SC2IFP(sc)->if_oerrors++;
927                         if (sc->sc_sendad_errors < INT_MAX)
928                                 sc->sc_sendad_errors++;
929                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
930                                 carp_suppress_preempt++;
931                                 if (carp_suppress_preempt == 1) {
932                                         CARP_SCUNLOCK(sc);
933                                         carp_send_ad_all();
934                                         CARP_SCLOCK(sc);
935                                 }
936                         }
937                         sc->sc_sendad_success = 0;
938                 } else {
939                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
940                                 if (++sc->sc_sendad_success >=
941                                     CARP_SENDAD_MIN_SUCCESS) {
942                                         carp_suppress_preempt--;
943                                         sc->sc_sendad_errors = 0;
944                                 }
945                         } else
946                                 sc->sc_sendad_errors = 0;
947                 }
948         }
949 #endif /* INET */
950 #ifdef INET6
951         if (sc->sc_ia6) {
952                 struct ip6_hdr *ip6;
953
954                 MGETHDR(m, M_NOWAIT, MT_HEADER);
955                 if (m == NULL) {
956                         SC2IFP(sc)->if_oerrors++;
957                         carpstats.carps_onomem++;
958                         /* XXX maybe less ? */
959                         if (advbase != 255 || advskew != 255)
960                                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
961                                     carp_send_ad, sc);
962                         return;
963                 }
964                 len = sizeof(*ip6) + sizeof(ch);
965                 m->m_pkthdr.len = len;
966                 m->m_pkthdr.rcvif = NULL;
967                 m->m_len = len;
968                 MH_ALIGN(m, m->m_len);
969                 m->m_flags |= M_MCAST;
970                 ip6 = mtod(m, struct ip6_hdr *);
971                 bzero(ip6, sizeof(*ip6));
972                 ip6->ip6_vfc |= IPV6_VERSION;
973                 ip6->ip6_hlim = CARP_DFLTTL;
974                 ip6->ip6_nxt = IPPROTO_CARP;
975                 bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src,
976                     sizeof(struct in6_addr));
977                 /* set the multicast destination */
978
979                 ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
980                 ip6->ip6_dst.s6_addr8[15] = 0x12;
981                 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
982                         SC2IFP(sc)->if_oerrors++;
983                         m_freem(m);
984                         CARP_LOG("%s: in6_setscope failed\n", __func__);
985                         return;
986                 }
987
988                 ch_ptr = (struct carp_header *)(&ip6[1]);
989                 bcopy(&ch, ch_ptr, sizeof(ch));
990                 if (carp_prepare_ad(m, sc, ch_ptr))
991                         return;
992
993                 m->m_data += sizeof(*ip6);
994                 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6));
995                 m->m_data -= sizeof(*ip6);
996
997                 getmicrotime(&SC2IFP(sc)->if_lastchange);
998                 SC2IFP(sc)->if_opackets++;
999                 SC2IFP(sc)->if_obytes += len;
1000                 carpstats.carps_opackets6++;
1001
1002                 if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) {
1003                         SC2IFP(sc)->if_oerrors++;
1004                         if (sc->sc_sendad_errors < INT_MAX)
1005                                 sc->sc_sendad_errors++;
1006                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1007                                 carp_suppress_preempt++;
1008                                 if (carp_suppress_preempt == 1) {
1009                                         CARP_SCUNLOCK(sc);
1010                                         carp_send_ad_all();
1011                                         CARP_SCLOCK(sc);
1012                                 }
1013                         }
1014                         sc->sc_sendad_success = 0;
1015                 } else {
1016                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1017                                 if (++sc->sc_sendad_success >=
1018                                     CARP_SENDAD_MIN_SUCCESS) {
1019                                         carp_suppress_preempt--;
1020                                         sc->sc_sendad_errors = 0;
1021                                 }
1022                         } else
1023                                 sc->sc_sendad_errors = 0;
1024                 }
1025         }
1026 #endif /* INET6 */
1027
1028         if (advbase != 255 || advskew != 255)
1029                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1030                     carp_send_ad, sc);
1031
1032 }
1033
1034 /*
1035  * Broadcast a gratuitous ARP request containing
1036  * the virtual router MAC address for each IP address
1037  * associated with the virtual router.
1038  */
1039 static void
1040 carp_send_arp(struct carp_softc *sc)
1041 {
1042         struct ifaddr *ifa;
1043
1044         TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
1045
1046                 if (ifa->ifa_addr->sa_family != AF_INET)
1047                         continue;
1048                 lwkt_serialize_enter(sc->sc_carpdev->if_serializer);
1049                 arp_ifinit2(sc->sc_carpdev, ifa, IF_LLADDR(sc->sc_ifp));        
1050                 lwkt_serialize_exit(sc->sc_carpdev->if_serializer); 
1051
1052                 DELAY(1000);    /* XXX */
1053         }
1054 }
1055
1056 #ifdef INET6
1057 static void
1058 carp_send_na(struct carp_softc *sc)
1059 {
1060         struct ifaddr *ifa;
1061         struct in6_addr *in6;
1062         static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1063
1064         TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
1065
1066                 if (ifa->ifa_addr->sa_family != AF_INET6)
1067                         continue;
1068
1069                 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1070                 nd6_na_output(sc->sc_carpdev, &mcast, in6,
1071                     ND_NA_FLAG_OVERRIDE, 1, NULL);
1072                 DELAY(1000);    /* XXX */
1073         }
1074 }
1075 #endif /* INET6 */
1076
1077 static int
1078 carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type)
1079 {
1080         struct carp_softc *vh;
1081         struct ifaddr *ifa;
1082         int count = 0;
1083
1084         CARP_LOCK_ASSERT(cif);
1085
1086         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1087                 if ((type == CARP_COUNT_RUNNING &&
1088                     (SC2IFP(vh)->if_flags & IFF_UP) && (SC2IFP(vh)->if_flags & IFF_RUNNING)) ||
1089                     (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) {
1090                         TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist,
1091                             ifa_list) {
1092                                 if (ifa->ifa_addr->sa_family == AF_INET &&
1093                                     ia->ia_addr.sin_addr.s_addr ==
1094                                     ifatoia(ifa)->ia_addr.sin_addr.s_addr)
1095                                         count++;
1096                         }
1097                 }
1098         }
1099         return (count);
1100 }
1101
1102 int
1103 carp_iamatch(void *v, struct in_ifaddr *ia,
1104     struct in_addr *isaddr, u_int8_t **enaddr)
1105 {
1106         struct carp_if *cif = v;
1107         struct carp_softc *vh;
1108         int index, count = 0;
1109         struct ifaddr *ifa;
1110
1111         CARP_LOCK(cif);
1112
1113         if (carp_opts[CARPCTL_ARPBALANCE]) {
1114                 /*
1115                  * XXX proof of concept implementation.
1116                  * We use the source ip to decide which virtual host should
1117                  * handle the request. If we're master of that virtual host,
1118                  * then we respond, otherwise, just drop the arp packet on
1119                  * the floor.
1120                  */
1121                 count = carp_addrcount(cif, ia, CARP_COUNT_RUNNING);
1122                 if (count == 0) {
1123                         /* should never reach this */
1124                         CARP_UNLOCK(cif);
1125                         return (0);
1126                 }
1127
1128                 /* this should be a hash, like pf_hash() */
1129                 index = ntohl(isaddr->s_addr) % count;
1130                 count = 0;
1131
1132                 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1133                         if ((SC2IFP(vh)->if_flags & IFF_UP) && (SC2IFP(vh)->if_flags & IFF_RUNNING)) {
1134                                 TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist,
1135                                     ifa_list) {
1136                                         if (ifa->ifa_addr->sa_family ==
1137                                             AF_INET &&
1138                                             ia->ia_addr.sin_addr.s_addr ==
1139                                             ifatoia(ifa)->ia_addr.sin_addr.s_addr) {
1140                                                 if (count == index) {
1141                                                         if (vh->sc_state ==
1142                                                             MASTER) {
1143                                                                 *enaddr = IF_LLADDR(vh->sc_ifp);
1144                                                                 CARP_UNLOCK(cif);
1145                                                                 return (1);
1146                                                         } else {
1147                                                                 CARP_UNLOCK(cif);
1148                                                                 return (0);
1149                                                         }
1150                                                 }
1151                                                 count++;
1152                                         }
1153                                 }
1154                         }
1155                 }
1156         } else {
1157                 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1158                         if ((SC2IFP(vh)->if_flags & IFF_UP) && (SC2IFP(vh)->if_flags & IFF_RUNNING) &&
1159                             vh->sc_state == MASTER) {
1160                                 *enaddr = IF_LLADDR(vh->sc_ifp);
1161                                 CARP_UNLOCK(cif);
1162                                 return (1);
1163                         }
1164                 }
1165         }
1166         CARP_UNLOCK(cif);
1167         return(0);
1168 }
1169
1170 #ifdef INET6
1171 struct ifaddr *
1172 carp_iamatch6(void *v, struct in6_addr *taddr)
1173 {
1174         struct carp_if *cif = v;
1175         struct carp_softc *vh;
1176         struct ifaddr *ifa;
1177
1178         CARP_LOCK(cif);
1179         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1180                 TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, ifa_list) {
1181                         if (IN6_ARE_ADDR_EQUAL(taddr,
1182                             &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1183                             (SC2IFP(vh)->if_flags & IFF_UP) && (SC2IFP(vh)->if_flags & IFF_RUNNING) &&
1184                             vh->sc_state == MASTER) {
1185                                 CARP_UNLOCK(cif);
1186                                 return (ifa);
1187                         }
1188                 }
1189         }
1190         CARP_UNLOCK(cif);
1191         
1192         return (NULL);
1193 }
1194
1195 void *
1196 carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr)
1197 {
1198         struct m_tag *mtag;
1199         struct carp_if *cif = v;
1200         struct carp_softc *sc;
1201         struct ifaddr *ifa;
1202
1203         CARP_LOCK(cif);
1204         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
1205                 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
1206                         if (IN6_ARE_ADDR_EQUAL(taddr,
1207                             &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1208                             (SC2IFP(sc)->if_flags & IFF_UP) && (SC2IFP(sc)->if_flags & IFF_RUNNING)) {
1209                                 struct ifnet *ifp = SC2IFP(sc);
1210                                 mtag = m_tag_get(PACKET_TAG_CARP,
1211                                     sizeof(struct ifnet *), MB_DONTWAIT);
1212                                 if (mtag == NULL) {
1213                                         /* better a bit than nothing */
1214                                         CARP_UNLOCK(cif);
1215                                         return (IF_LLADDR(sc->sc_ifp));
1216                                 }
1217                                 bcopy(&ifp, (caddr_t)(mtag + 1),
1218                                     sizeof(struct ifnet *));
1219                                 m_tag_prepend(m, mtag);
1220
1221                                 CARP_UNLOCK(cif);
1222                                 return (IF_LLADDR(sc->sc_ifp));
1223                         }
1224                 }
1225         }
1226         CARP_UNLOCK(cif);
1227
1228         return (NULL);
1229 }
1230 #endif
1231
1232 struct ifnet *
1233 carp_forus(void *v, void *dhost)
1234 {
1235         struct carp_if *cif = v;
1236         struct carp_softc *vh;
1237         u_int8_t *ena = dhost;
1238         
1239         /**
1240          * XXX: See here for check on MAC adr is not for virtual use
1241          *
1242          **/
1243
1244         if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
1245         {
1246                 return (NULL);
1247         }
1248
1249         CARP_LOCK(cif);
1250         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list)
1251                 if ((SC2IFP(vh)->if_flags & IFF_UP) && (SC2IFP(vh)->if_flags & IFF_RUNNING) &&
1252                     vh->sc_state == MASTER &&
1253                     !bcmp(dhost, IF_LLADDR(vh->sc_ifp), ETHER_ADDR_LEN)) {
1254                         CARP_UNLOCK(cif);
1255                         return (SC2IFP(vh));
1256                 }
1257
1258         CARP_UNLOCK(cif);
1259         return (NULL);
1260 }
1261
1262 static void
1263 carp_master_down(void *v)
1264 {
1265         struct carp_softc *sc = v;
1266
1267         lwkt_serialize_enter(sc->sc_ifp->if_serializer);
1268         carp_master_down_locked(sc);
1269         lwkt_serialize_exit(sc->sc_ifp->if_serializer);
1270 }
1271
1272 static void
1273 carp_master_down_locked(struct carp_softc *sc)
1274 {
1275         if (sc->sc_carpdev)
1276                 CARP_SCLOCK_ASSERT(sc);
1277
1278         switch (sc->sc_state) {
1279         case INIT:
1280                 kprintf("%s: master_down event in INIT state\n",
1281                     SC2IFP(sc)->if_xname);
1282                 break;
1283         case MASTER:
1284                 break;
1285         case BACKUP:
1286                 carp_set_state(sc, MASTER);
1287                 carp_send_ad_locked(sc);
1288                 carp_send_arp(sc);
1289 #ifdef INET6
1290                 carp_send_na(sc);
1291 #endif /* INET6 */
1292                 carp_setrun(sc, 0);
1293                 carp_setroute(sc, RTM_ADD);
1294                 break;
1295         }
1296 }
1297
1298 /*
1299  * When in backup state, af indicates whether to reset the master down timer
1300  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1301  */
1302 static void
1303 carp_setrun(struct carp_softc *sc, sa_family_t af)
1304 {
1305         struct timeval tv;
1306
1307         if (sc->sc_carpdev == NULL) {
1308                 SC2IFP(sc)->if_flags &= ~IFF_RUNNING;
1309                 carp_set_state(sc, INIT);
1310                 return;
1311         }
1312
1313         if (SC2IFP(sc)->if_flags & IFF_UP &&
1314             sc->sc_vhid > 0 && (sc->sc_naddrs || sc->sc_naddrs6))
1315                 SC2IFP(sc)->if_flags |= IFF_RUNNING;
1316         else {
1317                 SC2IFP(sc)->if_flags &= ~IFF_RUNNING;
1318                 carp_setroute(sc, RTM_DELETE);
1319                 return;
1320         }
1321
1322         switch (sc->sc_state) {
1323         case INIT:
1324                 if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) {
1325                         carp_send_ad_locked(sc);
1326                         carp_send_arp(sc);
1327 #ifdef INET6
1328                         carp_send_na(sc);
1329 #endif /* INET6 */
1330                         CARP_DEBUG("%s: INIT -> MASTER (preempting)\n",
1331                             SC2IFP(sc)->if_xname);
1332                         carp_set_state(sc, MASTER);
1333                         carp_setroute(sc, RTM_ADD);
1334                 } else {
1335                         CARP_DEBUG("%s: INIT -> BACKUP\n", SC2IFP(sc)->if_xname);
1336                         carp_set_state(sc, BACKUP);
1337                         carp_setroute(sc, RTM_DELETE);
1338                         carp_setrun(sc, 0);
1339                 }
1340                 break;
1341         case BACKUP:
1342                 callout_stop(&sc->sc_ad_tmo);
1343                 tv.tv_sec = 3 * sc->sc_advbase;
1344                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1345                 switch (af) {
1346 #ifdef INET
1347                 case AF_INET:
1348                         callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1349                             carp_master_down, sc);
1350                         break;
1351 #endif /* INET */
1352 #ifdef INET6
1353                 case AF_INET6:
1354                         callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1355                             carp_master_down, sc);
1356                         break;
1357 #endif /* INET6 */
1358                 default:
1359                         if (sc->sc_naddrs)
1360                                 callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1361                                     carp_master_down, sc);
1362                         if (sc->sc_naddrs6)
1363                                 callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1364                                     carp_master_down, sc);
1365                         break;
1366                 }
1367                 break;
1368         case MASTER:
1369                 tv.tv_sec = sc->sc_advbase;
1370                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1371                 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1372                     carp_send_ad, sc);
1373                 break;
1374         }
1375 }
1376
1377 static void
1378 carp_multicast_cleanup(struct carp_softc *sc)
1379 {
1380         struct ip_moptions *imo = &sc->sc_imo;
1381         u_int16_t n = imo->imo_num_memberships;
1382
1383         /* Clean up our own multicast memberships */
1384         while (n-- > 0) {
1385                 if (imo->imo_membership[n] != NULL) {
1386                         in_delmulti(imo->imo_membership[n]);
1387                         imo->imo_membership[n] = NULL;
1388                 }
1389         }
1390         imo->imo_num_memberships = 0;
1391         imo->imo_multicast_ifp = NULL;
1392 }
1393
1394 #ifdef INET6
1395 static void
1396 carp_multicast6_cleanup(struct carp_softc *sc)
1397 {
1398         struct ip6_moptions *im6o = &sc->sc_im6o;
1399
1400         while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1401                 struct in6_multi_mship *imm =
1402                     LIST_FIRST(&im6o->im6o_memberships);
1403
1404                 LIST_REMOVE(imm, i6mm_chain);
1405                 in6_leavegroup(imm);
1406         }
1407         im6o->im6o_multicast_ifp = NULL;
1408 }
1409 #endif
1410
1411 static int
1412 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
1413 {
1414         struct ifnet *ifp;
1415         struct carp_if *cif;
1416         struct in_ifaddr *ia, *ia_if;
1417         struct ip_moptions *imo = &sc->sc_imo;
1418         struct in_addr addr;
1419         u_long iaddr = htonl(sin->sin_addr.s_addr);
1420         int own, error;
1421         
1422         if (sin->sin_addr.s_addr == 0) 
1423         {
1424                 if (!(SC2IFP(sc)->if_flags & IFF_UP))
1425                 {
1426                         carp_set_state(sc, INIT);
1427                 }
1428                 if (sc->sc_naddrs)
1429                 {
1430                         SC2IFP(sc)->if_flags |= IFF_UP;
1431                 }
1432                 carp_setrun(sc, 0);
1433                 return (0);
1434         }
1435         /* we have to do it by hands to check we won't match on us */
1436         ia_if = NULL; own = 0;
1437         TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
1438                 /* and, yeah, we need a multicast-capable iface too */
1439                 if (ia->ia_ifp != SC2IFP(sc) &&
1440                     (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
1441                     (iaddr & ia->ia_subnetmask) == ia->ia_subnet) {
1442                         if (!ia_if)
1443                                 ia_if = ia;
1444                         if (sin->sin_addr.s_addr ==
1445                             ia->ia_addr.sin_addr.s_addr)
1446                                 own++;
1447                 }
1448         }
1449         
1450         
1451         if (!ia_if)
1452                 return (EADDRNOTAVAIL);
1453
1454         ia = ia_if;
1455         ifp = ia->ia_ifp;
1456
1457         if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
1458             (imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp))
1459                 return (EADDRNOTAVAIL);
1460
1461         if (imo->imo_num_memberships == 0) {
1462                 addr.s_addr = htonl(INADDR_CARP_GROUP);
1463                 if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) == NULL)
1464                         return (ENOBUFS);
1465                 imo->imo_num_memberships++;
1466                 imo->imo_multicast_ifp = ifp;
1467                 imo->imo_multicast_ttl = CARP_DFLTTL;
1468                 imo->imo_multicast_loop = 0;
1469         }
1470
1471         if (!ifp->if_carp) {
1472
1473                 MALLOC(cif, struct carp_if *, sizeof(*cif), M_CARP,
1474                     M_WAITOK|M_ZERO);
1475                 if ((error = ifpromisc(ifp, 1))) {
1476                         FREE(cif, M_CARP);
1477                         goto cleanup;
1478                 }
1479                 
1480                 CARP_LOCK_INIT(cif);
1481                 CARP_LOCK(cif);
1482                 cif->vhif_ifp = ifp;
1483                 TAILQ_INIT(&cif->vhif_vrs);
1484                 ifp->if_carp = cif;
1485
1486         } else {
1487                 struct carp_softc *vr;
1488
1489                 cif = (struct carp_if *)ifp->if_carp;
1490                 CARP_LOCK(cif);
1491                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
1492                         if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
1493                                 CARP_UNLOCK(cif);
1494                                 error = EINVAL;
1495                                 goto cleanup;
1496                         }
1497         }
1498         sc->sc_ia = ia;
1499         sc->sc_carpdev = ifp;
1500
1501         { /* XXX prevent endless loop if already in queue */
1502         struct carp_softc *vr, *after = NULL;
1503         int myself = 0;
1504         cif = (struct carp_if *)ifp->if_carp;
1505
1506         /* XXX: cif should not change, right? So we still hold the lock */
1507         CARP_LOCK_ASSERT(cif);
1508
1509         TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1510                 if (vr == sc)
1511                         myself = 1;
1512                 if (vr->sc_vhid < sc->sc_vhid)
1513                         after = vr;
1514         }
1515
1516         if (!myself) {
1517                 /* We're trying to keep things in order */
1518                 if (after == NULL) {
1519                         TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
1520                 } else {
1521                         TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
1522                 }
1523                 cif->vhif_nvrs++;
1524         }
1525         }
1526
1527         sc->sc_naddrs++;
1528         SC2IFP(sc)->if_flags |= IFF_UP;
1529         if (own)
1530                 sc->sc_advskew = 0;
1531
1532
1533         carp_sc_state_locked(sc);
1534         carp_setrun(sc, 0);
1535
1536         CARP_UNLOCK(cif);
1537         
1538         return (0);
1539
1540 cleanup:
1541         in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1542         return (error);
1543
1544 }
1545
1546 static int
1547 carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin)
1548 {
1549         int error = 0;
1550
1551         if (!--sc->sc_naddrs) {
1552                 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
1553                 struct ip_moptions *imo = &sc->sc_imo;
1554
1555                 CARP_LOCK(cif);
1556                 callout_stop(&sc->sc_ad_tmo);
1557                 SC2IFP(sc)->if_flags &= ~IFF_UP;
1558                 SC2IFP(sc)->if_flags &= ~IFF_RUNNING;
1559                 sc->sc_vhid = -1;
1560                 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1561                 imo->imo_multicast_ifp = NULL;
1562                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
1563                 if (!--cif->vhif_nvrs) {
1564                         sc->sc_carpdev->if_carp = NULL;
1565                         CARP_LOCK_DESTROY(cif);
1566                         FREE(cif, M_IFADDR);
1567                 } else {
1568                         CARP_UNLOCK(cif);
1569                 }
1570         }
1571
1572         return (error);
1573 }
1574
1575 #ifdef INET6
1576 static int
1577 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
1578 {
1579         struct ifnet *ifp;
1580         struct carp_if *cif;
1581         struct in6_ifaddr *ia, *ia_if;
1582         struct ip6_moptions *im6o = &sc->sc_im6o;
1583         struct in6_multi_mship *imm;
1584         struct in6_addr in6;
1585         int own, error;
1586
1587         if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
1588                 if (!(SC2IFP(sc)->if_flags & IFF_UP))
1589                         carp_set_state(sc, INIT);
1590                 if (sc->sc_naddrs6)
1591                         SC2IFP(sc)->if_flags |= IFF_UP;
1592                 carp_setrun(sc, 0);
1593                 return (0);
1594         }
1595
1596         /* we have to do it by hands to check we won't match on us */
1597         ia_if = NULL; own = 0;
1598         for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
1599                 int i;
1600
1601                 for (i = 0; i < 4; i++) {
1602                         if ((sin6->sin6_addr.s6_addr32[i] &
1603                             ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
1604                             (ia->ia_addr.sin6_addr.s6_addr32[i] &
1605                             ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
1606                                 break;
1607                 }
1608                 /* and, yeah, we need a multicast-capable iface too */
1609                 if (ia->ia_ifp != SC2IFP(sc) &&
1610                     (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
1611                     (i == 4)) {
1612                         if (!ia_if)
1613                                 ia_if = ia;
1614                         if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
1615                             &ia->ia_addr.sin6_addr))
1616                                 own++;
1617                 }
1618         }
1619
1620         if (!ia_if)
1621                 return (EADDRNOTAVAIL);
1622         ia = ia_if;
1623         ifp = ia->ia_ifp;
1624
1625         if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
1626             (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp))
1627                 return (EADDRNOTAVAIL);
1628
1629         if (!sc->sc_naddrs6) {
1630                 im6o->im6o_multicast_ifp = ifp;
1631
1632                 /* join CARP multicast address */
1633                 bzero(&in6, sizeof(in6));
1634                 in6.s6_addr16[0] = htons(0xff02);
1635                 in6.s6_addr8[15] = 0x12;
1636                 if (in6_setscope(&in6, ifp, NULL) != 0)
1637                         goto cleanup;
1638                 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
1639                         goto cleanup;
1640                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
1641
1642                 /* join solicited multicast address */
1643                 bzero(&in6, sizeof(in6));
1644                 in6.s6_addr16[0] = htons(0xff02);
1645                 in6.s6_addr32[1] = 0;
1646                 in6.s6_addr32[2] = htonl(1);
1647                 in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3];
1648                 in6.s6_addr8[12] = 0xff;
1649                 if (in6_setscope(&in6, ifp, NULL) != 0)
1650                         goto cleanup;
1651                 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
1652                         goto cleanup;
1653                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
1654         }
1655
1656         if (!ifp->if_carp) {
1657                 MALLOC(cif, struct carp_if *, sizeof(*cif), M_CARP,
1658                     M_WAITOK|M_ZERO);
1659                 if ((error = ifpromisc(ifp, 1))) {
1660                         FREE(cif, M_CARP);
1661                         goto cleanup;
1662                 }
1663
1664                 CARP_LOCK_INIT(cif);
1665                 CARP_LOCK(cif);
1666                 cif->vhif_ifp = ifp;
1667                 TAILQ_INIT(&cif->vhif_vrs);
1668                 ifp->if_carp = cif;
1669
1670         } else {
1671                 struct carp_softc *vr;
1672
1673                 cif = (struct carp_if *)ifp->if_carp;
1674                 CARP_LOCK(cif);
1675                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
1676                         if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
1677                                 CARP_UNLOCK(cif);
1678                                 error = EINVAL;
1679                                 goto cleanup;
1680                         }
1681         }
1682         sc->sc_ia6 = ia;
1683         sc->sc_carpdev = ifp;
1684
1685         { /* XXX prevent endless loop if already in queue */
1686         struct carp_softc *vr, *after = NULL;
1687         int myself = 0;
1688         cif = (struct carp_if *)ifp->if_carp;
1689         CARP_LOCK_ASSERT(cif);
1690
1691         TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1692                 if (vr == sc)
1693                         myself = 1;
1694                 if (vr->sc_vhid < sc->sc_vhid)
1695                         after = vr;
1696         }
1697
1698         if (!myself) {
1699                 /* We're trying to keep things in order */
1700                 if (after == NULL) {
1701                         TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
1702                 } else {
1703                         TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
1704                 }
1705                 cif->vhif_nvrs++;
1706         }
1707         }
1708
1709         sc->sc_naddrs6++;
1710         SC2IFP(sc)->if_flags |= IFF_UP;
1711         if (own)
1712                 sc->sc_advskew = 0;
1713         carp_sc_state_locked(sc);
1714         carp_setrun(sc, 0);
1715
1716         CARP_UNLOCK(cif);
1717
1718         return (0);
1719
1720 cleanup:
1721         /* clean up multicast memberships */
1722         if (!sc->sc_naddrs6) {
1723                 while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1724                         imm = LIST_FIRST(&im6o->im6o_memberships);
1725                         LIST_REMOVE(imm, i6mm_chain);
1726                         in6_leavegroup(imm);
1727                 }
1728         }
1729         return (error);
1730 }
1731
1732 static int
1733 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
1734 {
1735         int error = 0;
1736
1737         if (!--sc->sc_naddrs6) {
1738                 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
1739                 struct ip6_moptions *im6o = &sc->sc_im6o;
1740
1741                 CARP_LOCK(cif);
1742                 callout_stop(&sc->sc_ad_tmo);
1743                 SC2IFP(sc)->if_flags &= ~IFF_UP;
1744                 SC2IFP(sc)->if_flags &= ~IFF_RUNNING;
1745                 sc->sc_vhid = -1;
1746                 while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1747                         struct in6_multi_mship *imm =
1748                             LIST_FIRST(&im6o->im6o_memberships);
1749
1750                         LIST_REMOVE(imm, i6mm_chain);
1751                         in6_leavegroup(imm);
1752                 }
1753                 im6o->im6o_multicast_ifp = NULL;
1754                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
1755                 if (!--cif->vhif_nvrs) {
1756                         CARP_LOCK_DESTROY(cif);
1757                         sc->sc_carpdev->if_carp = NULL;
1758                         FREE(cif, M_IFADDR);
1759                 } else
1760                         CARP_UNLOCK(cif);
1761         }
1762
1763         return (error);
1764 }
1765 #endif /* INET6 */
1766
1767 static int
1768 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *creds)
1769 {
1770         struct carp_softc *sc = ifp->if_softc, *vr;
1771         struct carpreq carpr;
1772         struct ifaddr *ifa;
1773         struct ifreq *ifr;
1774         struct ifaliasreq *ifra;
1775         int locked = 0, error = 0;
1776
1777         ifa = (struct ifaddr *)addr;
1778         ifra = (struct ifaliasreq *)addr;
1779         ifr = (struct ifreq *)addr;
1780
1781
1782         switch (cmd) {
1783         case SIOCSIFADDR:
1784                 switch (ifa->ifa_addr->sa_family) {
1785 #ifdef INET
1786                 case AF_INET:
1787                         SC2IFP(sc)->if_flags |= IFF_UP;
1788                         bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
1789                             sizeof(struct sockaddr));
1790                         error = carp_set_addr(sc, satosin(ifa->ifa_addr));
1791                         break;
1792 #endif /* INET */
1793 #ifdef INET6
1794                 case AF_INET6:
1795                         SC2IFP(sc)->if_flags |= IFF_UP;
1796                         error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
1797                         break;
1798 #endif /* INET6 */
1799                 default:
1800                         error = EAFNOSUPPORT;
1801                         break;
1802                 }
1803                 break;
1804
1805         case SIOCAIFADDR:
1806                 switch (ifa->ifa_addr->sa_family) {
1807 #ifdef INET
1808                 case AF_INET:
1809                         SC2IFP(sc)->if_flags |= IFF_UP;
1810                         bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
1811                             sizeof(struct sockaddr));
1812                         error = carp_set_addr(sc, satosin(&ifra->ifra_addr));
1813                         break;
1814 #endif /* INET */
1815 #ifdef INET6
1816                 case AF_INET6:
1817                         SC2IFP(sc)->if_flags |= IFF_UP;
1818                         error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr));
1819                         break;
1820 #endif /* INET6 */
1821                 default:
1822                         error = EAFNOSUPPORT;
1823                         break;
1824                 }
1825                 break;
1826
1827         case SIOCDIFADDR:
1828                 switch (ifa->ifa_addr->sa_family) {
1829 #ifdef INET
1830                 case AF_INET:
1831                         error = carp_del_addr(sc, satosin(&ifra->ifra_addr));
1832                         break;
1833 #endif /* INET */
1834 #ifdef INET6
1835                 case AF_INET6:
1836                         error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr));
1837                         break;
1838 #endif /* INET6 */
1839                 default:
1840                         error = EAFNOSUPPORT;
1841                         break;
1842                 }
1843                 break;
1844
1845         case SIOCSIFFLAGS:
1846                 if (sc->sc_carpdev) {
1847                         locked = 1;
1848                         CARP_SCLOCK(sc);
1849                 }
1850                 if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) {
1851                         callout_stop(&sc->sc_ad_tmo);
1852                         callout_stop(&sc->sc_md_tmo);
1853                         callout_stop(&sc->sc_md6_tmo);
1854                         if (sc->sc_state == MASTER)
1855                                 carp_send_ad_locked(sc);
1856                         carp_set_state(sc, INIT);
1857                         carp_setrun(sc, 0);
1858                 } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) {
1859                         SC2IFP(sc)->if_flags |= IFF_UP;
1860                         carp_setrun(sc, 0);
1861                 }
1862                 break;
1863
1864         case SIOCSVH:
1865                 error = suser(curthread);
1866                 if (error)
1867                         break;
1868                 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
1869                         break;
1870                 error = 1;
1871                 if (sc->sc_carpdev) {
1872                         locked = 1;
1873                         CARP_SCLOCK(sc);
1874                 }
1875                 if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) {
1876                         switch (carpr.carpr_state) {
1877                         case BACKUP:
1878                                 callout_stop(&sc->sc_ad_tmo);
1879                                 carp_set_state(sc, BACKUP);
1880                                 carp_setrun(sc, 0);
1881                                 carp_setroute(sc, RTM_DELETE);
1882                                 break;
1883                         case MASTER:
1884                                 carp_master_down_locked(sc);
1885                                 break;
1886                         default:
1887                                 break;
1888                         }
1889                 }
1890                 if (carpr.carpr_vhid > 0) {
1891                         if (carpr.carpr_vhid > 255) {
1892                                 error = EINVAL;
1893                                 break;
1894                         }
1895                         if (sc->sc_carpdev) {
1896                                 struct carp_if *cif;
1897                                 cif = (struct carp_if *)sc->sc_carpdev->if_carp;
1898                                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
1899                                         if (vr != sc &&
1900                                             vr->sc_vhid == carpr.carpr_vhid)
1901                                                 return EEXIST;
1902                         }
1903                         sc->sc_vhid = carpr.carpr_vhid;
1904                         IF_LLADDR(sc->sc_ifp)[0] = 0;
1905                         IF_LLADDR(sc->sc_ifp)[1] = 0;
1906                         IF_LLADDR(sc->sc_ifp)[2] = 0x5e;
1907                         IF_LLADDR(sc->sc_ifp)[3] = 0;
1908                         IF_LLADDR(sc->sc_ifp)[4] = 1;
1909                         IF_LLADDR(sc->sc_ifp)[5] = sc->sc_vhid;
1910                         error--;
1911                 }
1912                 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) {
1913                         if (carpr.carpr_advskew >= 255) {
1914                                 error = EINVAL;
1915                                 break;
1916                         }
1917                         if (carpr.carpr_advbase > 255) {
1918                                 error = EINVAL;
1919                                 break;
1920                         }
1921                         sc->sc_advbase = carpr.carpr_advbase;
1922                         sc->sc_advskew = carpr.carpr_advskew;
1923                         error--;
1924                 }
1925                 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
1926                 if (error > 0)
1927                         error = EINVAL;
1928                 else {
1929                         error = 0;
1930                         carp_setrun(sc, 0);
1931                 }
1932                 break;
1933
1934         case SIOCGVH:
1935                 /* XXX: lockless read */
1936                 bzero(&carpr, sizeof(carpr));
1937                 carpr.carpr_state = sc->sc_state;
1938                 carpr.carpr_vhid = sc->sc_vhid;
1939                 carpr.carpr_advbase = sc->sc_advbase;
1940                 carpr.carpr_advskew = sc->sc_advskew;
1941                 error = suser(curthread);
1942                 if (error == 0)
1943                         bcopy(sc->sc_key, carpr.carpr_key,
1944                             sizeof(carpr.carpr_key));
1945                 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
1946                 break;
1947
1948         default:
1949                 error = EINVAL;
1950         }
1951
1952         if (locked)
1953                 CARP_SCUNLOCK(sc);
1954
1955         carp_hmac_prepare(sc);
1956
1957         return (error);
1958 }
1959
1960 /*
1961  * XXX: this is looutput. We should eventually use it from there.
1962  */
1963 static int
1964 carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
1965     struct rtentry *rt)
1966 {
1967         u_int32_t af;
1968
1969         M_ASSERTPKTHDR(m); /* check if we have the packet header */
1970
1971         if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
1972                 m_freem(m);
1973                 return (rt->rt_flags & RTF_BLACKHOLE ? 0 :
1974                         rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
1975         }
1976
1977         ifp->if_opackets++;
1978         ifp->if_obytes += m->m_pkthdr.len;
1979
1980         /* BPF writes need to be handled specially. */
1981         if (dst->sa_family == AF_UNSPEC) {
1982                 bcopy(dst->sa_data, &af, sizeof(af));
1983                 dst->sa_family = af;
1984         }
1985
1986 #if 1   /* XXX */
1987         switch (dst->sa_family) {
1988         case AF_INET:
1989         case AF_INET6:
1990         case AF_IPX:
1991         case AF_APPLETALK:
1992                 break;
1993         default:
1994                 m_freem(m);
1995                 return (EAFNOSUPPORT);
1996         }
1997 #endif
1998         return(if_simloop(ifp, m, dst->sa_family, 0));
1999 }
2000
2001 /*
2002  * Start output on carp interface. This function should never be called.
2003  */
2004 static void
2005 carp_start(struct ifnet *ifp)
2006 {
2007 #ifdef DEBUG
2008         kprintf("%s: start called\n", ifp->if_xname);
2009 #endif
2010 }
2011
2012 int
2013 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
2014     struct rtentry *rt)
2015 {
2016         struct m_tag *mtag;
2017         struct carp_softc *sc;
2018         struct ifnet *carp_ifp;
2019
2020         if (!sa)
2021                 return (0);
2022
2023         switch (sa->sa_family) {
2024 #ifdef INET
2025         case AF_INET:
2026                 break;
2027 #endif /* INET */
2028 #ifdef INET6
2029         case AF_INET6:
2030                 break;
2031 #endif /* INET6 */
2032         default:
2033                 return (0);
2034         }
2035
2036         mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
2037         if (mtag == NULL)
2038                 return (0);
2039
2040         bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *));
2041         sc = carp_ifp->if_softc;
2042
2043         /* Set the source MAC address to Virtual Router MAC Address */
2044         switch (ifp->if_type) {
2045         case IFT_ETHER:
2046         case IFT_L2VLAN: {
2047                         struct ether_header *eh;
2048
2049                         eh = mtod(m, struct ether_header *);
2050                         eh->ether_shost[0] = 0;
2051                         eh->ether_shost[1] = 0;
2052                         eh->ether_shost[2] = 0x5e;
2053                         eh->ether_shost[3] = 0;
2054                         eh->ether_shost[4] = 1;
2055                         eh->ether_shost[5] = sc->sc_vhid;
2056                 }
2057                 break;
2058         default:
2059                 kprintf("%s: carp is not supported for this interface type\n",
2060                     ifp->if_xname);
2061                 return (EOPNOTSUPP);
2062         }
2063
2064         return (0);
2065
2066 }
2067
2068 static void
2069 carp_set_state(struct carp_softc *sc, int state)
2070 {
2071
2072         if (sc->sc_carpdev)
2073                 CARP_SCLOCK_ASSERT(sc);
2074
2075         if (sc->sc_state == state)
2076                 return;
2077
2078         sc->sc_state = state;
2079         switch (state) {
2080         case BACKUP:
2081                 SC2IFP(sc)->if_link_state = LINK_STATE_DOWN;
2082                 break;
2083         case MASTER:
2084                 SC2IFP(sc)->if_link_state = LINK_STATE_UP;
2085                 break;
2086         default:
2087                 SC2IFP(sc)->if_link_state = LINK_STATE_UNKNOWN;
2088                 break;
2089         }
2090         rt_ifmsg(SC2IFP(sc));
2091 }
2092
2093 void
2094 carp_carpdev_state(void *v)
2095 {
2096         struct carp_if *cif = v;
2097
2098         CARP_LOCK(cif);
2099         carp_carpdev_state_locked(cif);
2100         CARP_UNLOCK(cif);
2101 }
2102
2103 static void
2104 carp_carpdev_state_locked(struct carp_if *cif)
2105 {
2106         struct carp_softc *sc;
2107
2108         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list)
2109                 carp_sc_state_locked(sc);
2110 }
2111
2112 static void
2113 carp_sc_state_locked(struct carp_softc *sc)
2114 {
2115         CARP_SCLOCK_ASSERT(sc);
2116
2117         if ( !(sc->sc_carpdev->if_flags & IFF_UP)) {
2118                 sc->sc_flags_backup = SC2IFP(sc)->if_flags;
2119                 SC2IFP(sc)->if_flags &= ~IFF_UP;
2120                 SC2IFP(sc)->if_flags &= ~IFF_RUNNING;
2121                 callout_stop(&sc->sc_ad_tmo);
2122                 callout_stop(&sc->sc_md_tmo);
2123                 callout_stop(&sc->sc_md6_tmo);
2124                 carp_set_state(sc, INIT);
2125                 carp_setrun(sc, 0);
2126                 if (!sc->sc_suppress) {
2127                         carp_suppress_preempt++;
2128                         if (carp_suppress_preempt == 1) {
2129                                 CARP_SCUNLOCK(sc);
2130                                 carp_send_ad_all();
2131                                 CARP_SCLOCK(sc);
2132                         }
2133                 }
2134                 sc->sc_suppress = 1;
2135         } else {
2136                 SC2IFP(sc)->if_flags |= sc->sc_flags_backup;
2137                 carp_set_state(sc, INIT);
2138                 carp_setrun(sc, 0);
2139                 if (sc->sc_suppress)
2140                         carp_suppress_preempt--;
2141                 sc->sc_suppress = 0;
2142         }
2143
2144         return;
2145 }
2146
2147 static int
2148 carp_modevent(module_t mod, int type, void *data)
2149 {
2150         switch (type) {
2151         case MOD_LOAD:
2152                 if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
2153                     carp_ifdetach, NULL, EVENTHANDLER_PRI_ANY);
2154                 if (if_detach_event_tag == NULL)
2155                         return (ENOMEM);
2156                 
2157                 LIST_INIT(&carpif_list);
2158                 if_clone_attach(&carp_cloner);
2159                 break;
2160
2161         case MOD_UNLOAD:
2162                 EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag);
2163                 if_clone_detach(&carp_cloner);
2164                 break;
2165
2166         default:
2167                 return (EINVAL);
2168         }
2169
2170         return (0);
2171 }
2172
2173 static moduledata_t carp_mod = {
2174         "carp",
2175         carp_modevent,
2176         0
2177 };
2178
2179 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);