1 /* $OpenBSD: if_pfsync.c,v 1.73 2006/11/16 13:13:38 henning Exp $ */
4 * Copyright (c) 2002 Michael Shalayeff
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
30 #include "opt_inet6.h"
33 #include <sys/param.h>
34 #include <sys/endian.h>
37 #include <sys/systm.h>
40 #include <sys/socket.h>
41 #include <sys/kernel.h>
42 #include <sys/malloc.h>
43 #include <sys/module.h>
44 #include <sys/sockio.h>
45 #include <sys/thread2.h>
46 #include <vm/vm_zone.h>
48 #include <machine/inttypes.h>
51 #include <net/if_types.h>
52 #include <net/route.h>
54 #include <netinet/in.h>
55 #include <netinet/if_ether.h>
56 #include <netinet/tcp.h>
57 #include <netinet/tcp_seq.h>
60 #include <netinet/in_systm.h>
61 #include <netinet/in_var.h>
62 #include <netinet/ip.h>
63 #include <netinet/ip_var.h>
67 #include <netinet6/nd6.h>
70 #include <net/pf/pfvar.h>
71 #include <net/pf/if_pfsync.h>
73 #define PFSYNCNAME "pfsync"
75 #define PFSYNC_MINMTU \
76 (sizeof(struct pfsync_header) + sizeof(struct pf_state))
79 #define DPRINTF(x) do { if (pfsyncdebug) kprintf x ; } while (0)
85 struct pfsync_softc *pfsyncif = NULL;
86 struct pfsyncstats pfsyncstats;
88 void pfsyncattach(int);
89 static void pfsync_clone_destroy(struct ifnet *);
90 static int pfsync_clone_create(struct if_clone *, int, caddr_t);
91 void pfsync_setmtu(struct pfsync_softc *, int);
92 int pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
93 struct pf_state_peer *);
94 int pfsync_insert_net_state(struct pfsync_state *, u_int8_t);
95 int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
97 int pfsyncioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
98 void pfsyncstart(struct ifnet *);
100 struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **);
101 int pfsync_request_update(struct pfsync_state_upd *, struct in_addr *);
102 int pfsync_sendout(struct pfsync_softc *);
103 int pfsync_sendout_mbuf(struct pfsync_softc *, struct mbuf *);
104 void pfsync_timeout(void *);
105 void pfsync_send_bus(struct pfsync_softc *, u_int8_t);
106 void pfsync_bulk_update(void *);
107 void pfsync_bulkfail(void *);
109 static MALLOC_DEFINE(M_PFSYNC, PFSYNCNAME, "Packet Filter State Sync. Interface");
110 static LIST_HEAD(pfsync_list, pfsync_softc) pfsync_list;
114 struct if_clone pfsync_cloner =
115 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy, 1 ,1);
118 pfsyncattach(int npfsync)
120 if_clone_attach(&pfsync_cloner);
123 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused)
125 struct pfsync_softc *sc;
128 MALLOC(sc, struct pfsync_softc *, sizeof(*sc), M_PFSYNC,
133 sc->sc_mbuf_net = NULL;
134 sc->sc_mbuf_tdb = NULL;
135 sc->sc_statep.s = NULL;
136 sc->sc_statep_net.s = NULL;
137 sc->sc_statep_tdb.t = NULL;
138 sc->sc_maxupdates = 128;
139 sc->sc_sync_peer.s_addr =htonl(INADDR_PFSYNC_GROUP);
140 sc->sc_sendaddr.s_addr = htonl(INADDR_PFSYNC_GROUP);
141 sc->sc_ureq_received = 0;
142 sc->sc_ureq_sent = 0;
143 sc->sc_bulk_send_next = NULL;
144 sc->sc_bulk_terminator = NULL;
147 ksnprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit);
148 if_initname(ifp, ifc->ifc_name, unit);
149 ifp->if_ioctl = pfsyncioctl;
150 ifp->if_output = pfsyncoutput;
151 ifp->if_start = pfsyncstart;
152 ifp->if_type = IFT_PFSYNC;
153 ifp->if_snd.ifq_maxlen = ifqmaxlen;
154 ifp->if_hdrlen = PFSYNC_HDRLEN;
155 ifp->if_baudrate = IF_Mbps(100);
157 pfsync_setmtu(sc, MCLBYTES);
158 callout_init(&sc->sc_tmo);
159 callout_init(&sc->sc_tdb_tmo);
160 callout_init(&sc->sc_bulk_tmo);
161 callout_init(&sc->sc_bulkfail_tmo);
162 if_attach(&sc->sc_if, NULL);
164 LIST_INSERT_HEAD(&pfsync_list, sc, sc_next);
165 bpfattach(&sc->sc_if, DLT_PFSYNC, PFSYNC_HDRLEN);
168 if_addgroup(ifp, "carp");
175 pfsync_clone_destroy(struct ifnet *ifp)
181 kfree(pfsyncif, M_DEVBUF);
186 * Start output on the pfsync interface.
189 pfsyncstart(struct ifnet *ifp)
192 IF_DROP(&ifp->if_snd);
193 IF_DRAIN(&ifp->if_snd);
198 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
199 struct pf_state_peer *d)
201 if (s->scrub.scrub_flag && d->scrub == NULL) {
202 d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
203 if (d->scrub == NULL)
205 bzero(d->scrub, sizeof(*d->scrub));
212 pfsync_insert_net_state(struct pfsync_state *sp, u_int8_t chksum_flag)
214 struct pf_state *st = NULL;
215 struct pf_rule *r = NULL;
218 if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) {
219 kprintf("pfsync_insert_net_state: invalid creator id:"
220 " %08x\n", ntohl(sp->creatorid));
224 kif = pfi_kif_get(sp->ifname);
226 if (pf_status.debug >= PF_DEBUG_MISC)
227 kprintf("pfsync_insert_net_state: "
228 "unknown interface: %s\n", sp->ifname);
229 /* skip this state */
234 * If the ruleset checksums match, it's safe to associate the state
235 * with the rule of that number.
237 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && chksum_flag)
238 r = pf_main_ruleset.rules[
239 PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)];
241 r = &pf_default_rule;
243 if (!r->max_states || r->states < r->max_states)
244 st = pool_get(&pf_state_pl, PR_NOWAIT);
246 pfi_kif_unref(kif, PFI_KIF_REF_NONE);
249 bzero(st, sizeof(*st));
251 /* allocate memory for scrub info */
252 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
253 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) {
254 pfi_kif_unref(kif, PFI_KIF_REF_NONE);
256 pool_put(&pf_state_scrub_pl, st->src.scrub);
257 pool_put(&pf_state_pl, st);
262 /* XXX get pointers to nat_rule and anchor */
264 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
267 /* fill in the rest of the state entry */
268 pf_state_host_ntoh(&sp->lan, &st->lan);
269 pf_state_host_ntoh(&sp->gwy, &st->gwy);
270 pf_state_host_ntoh(&sp->ext, &st->ext);
272 pf_state_peer_ntoh(&sp->src, &st->src);
273 pf_state_peer_ntoh(&sp->dst, &st->dst);
275 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
276 st->creation = time_second - ntohl(sp->creation);
277 st->expire = ntohl(sp->expire) + time_second;
280 st->proto = sp->proto;
281 st->direction = sp->direction;
283 st->timeout = sp->timeout;
284 st->allow_opts = sp->allow_opts;
286 bcopy(sp->id, &st->id, sizeof(st->id));
287 st->creatorid = sp->creatorid;
288 st->sync_flags = PFSTATE_FROMSYNC;
290 if (pf_insert_state(kif, st)) {
291 pfi_kif_unref(kif, PFI_KIF_REF_NONE);
292 /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */
295 pool_put(&pf_state_scrub_pl, st->dst.scrub);
297 pool_put(&pf_state_scrub_pl, st->src.scrub);
298 pool_put(&pf_state_pl, st);
306 pfsync_input(struct mbuf *m, ...)
308 struct ip *ip = mtod(m, struct ip *);
309 struct pfsync_header *ph;
310 struct pfsync_softc *sc = pfsyncif;
312 struct pf_state_cmp key;
313 struct pfsync_state *sp;
314 struct pfsync_state_upd *up;
315 struct pfsync_state_del *dp;
316 struct pfsync_state_clr *cp;
317 struct pfsync_state_upd_req *rup;
318 struct pfsync_state_bus *bus;
321 int iplen, action, error, i, count, offp, sfail, stale = 0;
322 u_int8_t chksum_flag = 0;
324 pfsyncstats.pfsyncs_ipackets++;
326 /* verify that we have a sync interface configured */
327 if (!sc || !sc->sc_sync_ifp || !pf_status.running)
330 /* verify that the packet came in on the right interface */
331 if (sc->sc_sync_ifp != m->m_pkthdr.rcvif) {
332 pfsyncstats.pfsyncs_badif++;
336 /* verify that the IP TTL is 255. */
337 if (ip->ip_ttl != PFSYNC_DFLTTL) {
338 pfsyncstats.pfsyncs_badttl++;
342 iplen = ip->ip_hl << 2;
344 if (m->m_pkthdr.len < iplen + sizeof(*ph)) {
345 pfsyncstats.pfsyncs_hdrops++;
349 if (iplen + sizeof(*ph) > m->m_len) {
350 if ((m = m_pullup(m, iplen + sizeof(*ph))) == NULL) {
351 pfsyncstats.pfsyncs_hdrops++;
354 ip = mtod(m, struct ip *);
356 ph = (struct pfsync_header *)((char *)ip + iplen);
358 /* verify the version */
359 if (ph->version != PFSYNC_VERSION) {
360 pfsyncstats.pfsyncs_badver++;
367 /* make sure it's a valid action code */
368 if (action >= PFSYNC_ACT_MAX) {
369 pfsyncstats.pfsyncs_badact++;
373 /* Cheaper to grab this now than having to mess with mbufs later */
376 if (!bcmp(&ph->pf_chksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
380 case PFSYNC_ACT_CLR: {
381 struct pf_state *nexts;
384 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
385 sizeof(*cp), &offp)) == NULL) {
386 pfsyncstats.pfsyncs_badlen++;
389 cp = (struct pfsync_state_clr *)(mp->m_data + offp);
390 creatorid = cp->creatorid;
393 if (cp->ifname[0] == '\0') {
394 for (st = RB_MIN(pf_state_tree_id, &tree_id);
396 nexts = RB_NEXT(pf_state_tree_id, &tree_id, st);
397 if (st->creatorid == creatorid) {
398 st->sync_flags |= PFSTATE_FROMSYNC;
403 if ((kif = pfi_kif_get(cp->ifname)) == NULL) {
407 for (st = RB_MIN(pf_state_tree_lan_ext,
408 &kif->pfik_lan_ext); st; st = nexts) {
409 nexts = RB_NEXT(pf_state_tree_lan_ext,
410 &kif->pfik_lan_ext, st);
411 if (st->creatorid == creatorid) {
412 st->sync_flags |= PFSTATE_FROMSYNC;
422 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
423 count * sizeof(*sp), &offp)) == NULL) {
424 pfsyncstats.pfsyncs_badlen++;
429 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
430 i < count; i++, sp++) {
431 /* check for invalid values */
432 if (sp->timeout >= PFTM_MAX ||
433 sp->src.state > PF_TCPS_PROXY_DST ||
434 sp->dst.state > PF_TCPS_PROXY_DST ||
435 sp->direction > PF_OUT ||
436 (sp->af != AF_INET && sp->af != AF_INET6)) {
437 if (pf_status.debug >= PF_DEBUG_MISC)
438 kprintf("pfsync_insert: PFSYNC_ACT_INS: "
440 pfsyncstats.pfsyncs_badstate++;
444 if ((error = pfsync_insert_net_state(sp,
446 if (error == ENOMEM) {
456 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
457 count * sizeof(*sp), &offp)) == NULL) {
458 pfsyncstats.pfsyncs_badlen++;
463 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
464 i < count; i++, sp++) {
465 int flags = PFSYNC_FLAG_STALE;
467 /* check for invalid values */
468 if (sp->timeout >= PFTM_MAX ||
469 sp->src.state > PF_TCPS_PROXY_DST ||
470 sp->dst.state > PF_TCPS_PROXY_DST) {
471 if (pf_status.debug >= PF_DEBUG_MISC)
472 kprintf("pfsync_insert: PFSYNC_ACT_UPD: "
474 pfsyncstats.pfsyncs_badstate++;
478 bcopy(sp->id, &key.id, sizeof(key.id));
479 key.creatorid = sp->creatorid;
481 st = pf_find_state_byid(&key);
483 /* insert the update */
484 if (pfsync_insert_net_state(sp, chksum_flag))
485 pfsyncstats.pfsyncs_badstate++;
489 if (st->proto == IPPROTO_TCP) {
491 * The state should never go backwards except
492 * for syn-proxy states. Neither should the
493 * sequence window slide backwards.
495 if (st->src.state > sp->src.state &&
496 (st->src.state < PF_TCPS_PROXY_SRC ||
497 sp->src.state >= PF_TCPS_PROXY_SRC))
499 else if (SEQ_GT(st->src.seqlo,
500 ntohl(sp->src.seqlo)))
502 else if (st->dst.state > sp->dst.state) {
503 /* There might still be useful
504 * information about the src state here,
505 * so import that part of the update,
506 * then "fail" so we send the updated
507 * state back to the peer who is missing
508 * our what we know. */
509 pf_state_peer_ntoh(&sp->src, &st->src);
510 /* XXX do anything with timeouts? */
513 } else if (st->dst.state >= TCPS_SYN_SENT &&
514 SEQ_GT(st->dst.seqlo, ntohl(sp->dst.seqlo)))
518 * Non-TCP protocol state machine always go
521 if (st->src.state > sp->src.state)
523 else if (st->dst.state > sp->dst.state)
527 if (pf_status.debug >= PF_DEBUG_MISC)
528 kprintf("pfsync: %s stale update "
531 (sfail < 7 ? "ignoring"
534 ntohl(st->creatorid));
535 pfsyncstats.pfsyncs_badstate++;
537 if (!(sp->sync_flags & PFSTATE_STALE)) {
538 /* we have a better state, send it */
539 if (sc->sc_mbuf != NULL && !stale)
544 PFSYNC_ACT_UPD, st, flags);
548 pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
549 pf_state_peer_ntoh(&sp->src, &st->src);
550 pf_state_peer_ntoh(&sp->dst, &st->dst);
551 st->expire = ntohl(sp->expire) + time_second;
552 st->timeout = sp->timeout;
554 if (stale && sc->sc_mbuf != NULL)
559 * It's not strictly necessary for us to support the "uncompressed"
560 * delete action, but it's relatively simple and maintains consistency.
563 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
564 count * sizeof(*sp), &offp)) == NULL) {
565 pfsyncstats.pfsyncs_badlen++;
570 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
571 i < count; i++, sp++) {
572 bcopy(sp->id, &key.id, sizeof(key.id));
573 key.creatorid = sp->creatorid;
575 st = pf_find_state_byid(&key);
577 pfsyncstats.pfsyncs_badstate++;
580 st->sync_flags |= PFSTATE_FROMSYNC;
585 case PFSYNC_ACT_UPD_C: {
586 int update_requested = 0;
588 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
589 count * sizeof(*up), &offp)) == NULL) {
590 pfsyncstats.pfsyncs_badlen++;
595 for (i = 0, up = (struct pfsync_state_upd *)(mp->m_data + offp);
596 i < count; i++, up++) {
597 /* check for invalid values */
598 if (up->timeout >= PFTM_MAX ||
599 up->src.state > PF_TCPS_PROXY_DST ||
600 up->dst.state > PF_TCPS_PROXY_DST) {
601 if (pf_status.debug >= PF_DEBUG_MISC)
602 kprintf("pfsync_insert: "
605 pfsyncstats.pfsyncs_badstate++;
609 bcopy(up->id, &key.id, sizeof(key.id));
610 key.creatorid = up->creatorid;
612 st = pf_find_state_byid(&key);
614 /* We don't have this state. Ask for it. */
615 error = pfsync_request_update(up, &src);
616 if (error == ENOMEM) {
620 update_requested = 1;
621 pfsyncstats.pfsyncs_badstate++;
625 if (st->proto == IPPROTO_TCP) {
627 * The state should never go backwards except
628 * for syn-proxy states. Neither should the
629 * sequence window slide backwards.
631 if (st->src.state > up->src.state &&
632 (st->src.state < PF_TCPS_PROXY_SRC ||
633 up->src.state >= PF_TCPS_PROXY_SRC))
635 else if (st->dst.state > up->dst.state)
637 else if (SEQ_GT(st->src.seqlo,
638 ntohl(up->src.seqlo)))
640 else if (st->dst.state >= TCPS_SYN_SENT &&
641 SEQ_GT(st->dst.seqlo, ntohl(up->dst.seqlo)))
645 * Non-TCP protocol state machine always go
648 if (st->src.state > up->src.state)
650 else if (st->dst.state > up->dst.state)
654 if (pf_status.debug >= PF_DEBUG_MISC)
655 kprintf("pfsync: ignoring stale update "
657 "creatorid: %08x\n", sfail,
659 ntohl(st->creatorid));
660 pfsyncstats.pfsyncs_badstate++;
662 /* we have a better state, send it out */
663 if ((!stale || update_requested) &&
664 sc->sc_mbuf != NULL) {
666 update_requested = 0;
670 pfsync_pack_state(PFSYNC_ACT_UPD, st,
674 pfsync_alloc_scrub_memory(&up->dst, &st->dst);
675 pf_state_peer_ntoh(&up->src, &st->src);
676 pf_state_peer_ntoh(&up->dst, &st->dst);
677 st->expire = ntohl(up->expire) + time_second;
678 st->timeout = up->timeout;
680 if ((update_requested || stale) && sc->sc_mbuf)
685 case PFSYNC_ACT_DEL_C:
686 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
687 count * sizeof(*dp), &offp)) == NULL) {
688 pfsyncstats.pfsyncs_badlen++;
693 for (i = 0, dp = (struct pfsync_state_del *)(mp->m_data + offp);
694 i < count; i++, dp++) {
695 bcopy(dp->id, &key.id, sizeof(key.id));
696 key.creatorid = dp->creatorid;
698 st = pf_find_state_byid(&key);
700 pfsyncstats.pfsyncs_badstate++;
703 st->sync_flags |= PFSTATE_FROMSYNC;
708 case PFSYNC_ACT_INS_F:
709 case PFSYNC_ACT_DEL_F:
710 /* not implemented */
712 case PFSYNC_ACT_UREQ:
713 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
714 count * sizeof(*rup), &offp)) == NULL) {
715 pfsyncstats.pfsyncs_badlen++;
720 if (sc->sc_mbuf != NULL)
723 rup = (struct pfsync_state_upd_req *)(mp->m_data + offp);
724 i < count; i++, rup++) {
725 bcopy(rup->id, &key.id, sizeof(key.id));
726 key.creatorid = rup->creatorid;
728 if (key.id == 0 && key.creatorid == 0) {
729 sc->sc_ureq_received = mycpu->gd_time_seconds;
730 if (sc->sc_bulk_send_next == NULL)
731 sc->sc_bulk_send_next =
732 TAILQ_FIRST(&state_list);
733 sc->sc_bulk_terminator = sc->sc_bulk_send_next;
734 if (pf_status.debug >= PF_DEBUG_MISC)
735 kprintf("pfsync: received "
736 "bulk update request\n");
737 pfsync_send_bus(sc, PFSYNC_BUS_START);
738 callout_reset(&sc->sc_bulk_tmo, 1 * hz,
740 LIST_FIRST(&pfsync_list));
742 st = pf_find_state_byid(&key);
744 pfsyncstats.pfsyncs_badstate++;
748 pfsync_pack_state(PFSYNC_ACT_UPD,
752 if (sc->sc_mbuf != NULL)
757 /* If we're not waiting for a bulk update, who cares. */
758 if (sc->sc_ureq_sent == 0)
761 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
762 sizeof(*bus), &offp)) == NULL) {
763 pfsyncstats.pfsyncs_badlen++;
766 bus = (struct pfsync_state_bus *)(mp->m_data + offp);
767 switch (bus->status) {
768 case PFSYNC_BUS_START:
769 callout_reset(&sc->sc_bulkfail_tmo,
770 pf_pool_limits[PF_LIMIT_STATES].limit /
771 (PFSYNC_BULKPACKETS * sc->sc_maxcount),
772 pfsync_bulkfail, LIST_FIRST(&pfsync_list));
773 if (pf_status.debug >= PF_DEBUG_MISC)
774 kprintf("pfsync: received bulk "
778 if (mycpu->gd_time_seconds - ntohl(bus->endtime) >=
780 /* that's it, we're happy */
781 sc->sc_ureq_sent = 0;
782 sc->sc_bulk_tries = 0;
783 callout_stop(&sc->sc_bulkfail_tmo);
786 carp_group_demote_adj(&sc->sc_if, -1);
789 if (pf_status.debug >= PF_DEBUG_MISC)
790 kprintf("pfsync: received valid "
791 "bulk update end\n");
793 if (pf_status.debug >= PF_DEBUG_MISC)
794 kprintf("pfsync: received invalid "
795 "bulk update end: bad timestamp\n");
808 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
817 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
819 struct pfsync_softc *sc = ifp->if_softc;
820 struct ifreq *ifr = (struct ifreq *)data;
821 struct ip_moptions *imo = &sc->sc_imo;
822 struct pfsyncreq pfsyncr;
831 if (ifp->if_flags & IFF_UP)
832 ifp->if_flags |= IFF_RUNNING;
834 ifp->if_flags &= ~IFF_RUNNING;
837 if (ifr->ifr_mtu < PFSYNC_MINMTU)
839 if (ifr->ifr_mtu > MCLBYTES)
840 ifr->ifr_mtu = MCLBYTES;
842 if (ifr->ifr_mtu < ifp->if_mtu)
844 pfsync_setmtu(sc, ifr->ifr_mtu);
848 bzero(&pfsyncr, sizeof(pfsyncr));
850 strlcpy(pfsyncr.pfsyncr_syncdev,
851 sc->sc_sync_ifp->if_xname, IFNAMSIZ);
852 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
853 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
854 if ((error = copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))))
858 if ((error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY)) != 0)
860 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
863 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
864 sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
866 sc->sc_sync_peer.s_addr =
867 pfsyncr.pfsyncr_syncpeer.s_addr;
869 if (pfsyncr.pfsyncr_maxupdates > 255)
871 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
873 if (pfsyncr.pfsyncr_syncdev[0] == 0) {
874 sc->sc_sync_ifp = NULL;
875 if (sc->sc_mbuf_net != NULL) {
876 /* Don't keep stale pfsync packets around. */
878 m_freem(sc->sc_mbuf_net);
879 sc->sc_mbuf_net = NULL;
880 sc->sc_statep_net.s = NULL;
883 if (imo->imo_num_memberships > 0) {
884 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
885 imo->imo_multicast_ifp = NULL;
890 if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL)
894 if (sifp->if_mtu < sc->sc_if.if_mtu ||
895 (sc->sc_sync_ifp != NULL &&
896 sifp->if_mtu < sc->sc_sync_ifp->if_mtu) ||
897 sifp->if_mtu < MCLBYTES - sizeof(struct ip))
899 sc->sc_sync_ifp = sifp;
901 pfsync_setmtu(sc, sc->sc_if.if_mtu);
903 if (imo->imo_num_memberships > 0) {
904 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
905 imo->imo_multicast_ifp = NULL;
908 if (sc->sc_sync_ifp &&
909 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
912 if (!(sc->sc_sync_ifp->if_flags & IFF_MULTICAST)) {
913 sc->sc_sync_ifp = NULL;
915 return (EADDRNOTAVAIL);
918 addr.s_addr = INADDR_PFSYNC_GROUP;
920 if ((imo->imo_membership[0] =
921 in_addmulti(&addr, sc->sc_sync_ifp)) == NULL) {
922 sc->sc_sync_ifp = NULL;
926 imo->imo_num_memberships++;
927 imo->imo_multicast_ifp = sc->sc_sync_ifp;
928 imo->imo_multicast_ttl = PFSYNC_DFLTTL;
929 imo->imo_multicast_loop = 0;
932 if (sc->sc_sync_ifp ||
933 sc->sc_sendaddr.s_addr != INADDR_PFSYNC_GROUP) {
934 /* Request a full state table update. */
935 sc->sc_ureq_sent = mycpu->gd_time_seconds;
938 carp_group_demote_adj(&sc->sc_if, 1);
941 if (pf_status.debug >= PF_DEBUG_MISC)
942 kprintf("pfsync: requesting bulk update\n");
943 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
944 pfsync_bulkfail, LIST_FIRST(&pfsync_list));
945 error = pfsync_request_update(NULL, NULL);
946 if (error == ENOMEM) {
964 pfsync_setmtu(struct pfsync_softc *sc, int mtu_req)
968 if (sc->sc_sync_ifp && sc->sc_sync_ifp->if_mtu < mtu_req)
969 mtu = sc->sc_sync_ifp->if_mtu;
973 sc->sc_maxcount = (mtu - sizeof(struct pfsync_header)) /
974 sizeof(struct pfsync_state);
975 if (sc->sc_maxcount > 254)
976 sc->sc_maxcount = 254;
977 sc->sc_if.if_mtu = sizeof(struct pfsync_header) +
978 sc->sc_maxcount * sizeof(struct pfsync_state);
982 pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp)
984 struct pfsync_header *h;
988 MGETHDR(m, M_WAITOK, MT_DATA);
990 sc->sc_if.if_oerrors++;
996 len = sizeof(struct pfsync_header) +
997 sizeof(struct pfsync_state_clr);
999 case PFSYNC_ACT_UPD_C:
1000 len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd)) +
1001 sizeof(struct pfsync_header);
1003 case PFSYNC_ACT_DEL_C:
1004 len = (sc->sc_maxcount * sizeof(struct pfsync_state_del)) +
1005 sizeof(struct pfsync_header);
1007 case PFSYNC_ACT_UREQ:
1008 len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd_req)) +
1009 sizeof(struct pfsync_header);
1011 case PFSYNC_ACT_BUS:
1012 len = sizeof(struct pfsync_header) +
1013 sizeof(struct pfsync_state_bus);
1016 len = (sc->sc_maxcount * sizeof(struct pfsync_state)) +
1017 sizeof(struct pfsync_header);
1022 MCLGET(m, M_WAITOK);
1023 if ((m->m_flags & M_EXT) == 0) {
1025 sc->sc_if.if_oerrors++;
1028 m->m_data += (MCLBYTES - len) &~ (sizeof(long) - 1);
1032 m->m_pkthdr.rcvif = NULL;
1033 m->m_pkthdr.len = m->m_len = sizeof(struct pfsync_header);
1034 h = mtod(m, struct pfsync_header *);
1035 h->version = PFSYNC_VERSION;
1040 *sp = (void *)((char *)h + PFSYNC_HDRLEN);
1041 callout_reset(&sc->sc_tmo, hz, pfsync_timeout,
1042 LIST_FIRST(&pfsync_list));
1047 pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags)
1049 struct ifnet *ifp = NULL;
1050 struct pfsync_softc *sc = pfsyncif;
1051 struct pfsync_header *h, *h_net;
1052 struct pfsync_state *sp = NULL;
1053 struct pfsync_state_upd *up = NULL;
1054 struct pfsync_state_del *dp = NULL;
1058 u_int8_t i = 255, newaction = 0;
1065 * If a packet falls in the forest and there's nobody around to
1066 * hear, does it make a sound?
1068 if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL &&
1069 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1070 /* Don't leave any stale pfsync packets hanging around. */
1071 if (sc->sc_mbuf != NULL) {
1072 m_freem(sc->sc_mbuf);
1074 sc->sc_statep.s = NULL;
1079 if (action >= PFSYNC_ACT_MAX)
1083 if (sc->sc_mbuf == NULL) {
1084 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action,
1085 (void *)&sc->sc_statep.s)) == NULL) {
1089 h = mtod(sc->sc_mbuf, struct pfsync_header *);
1091 h = mtod(sc->sc_mbuf, struct pfsync_header *);
1092 if (h->action != action) {
1094 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action,
1095 (void *)&sc->sc_statep.s)) == NULL) {
1099 h = mtod(sc->sc_mbuf, struct pfsync_header *);
1102 * If it's an update, look in the packet to see if
1103 * we already have an update for the state.
1105 if (action == PFSYNC_ACT_UPD && sc->sc_maxupdates) {
1106 struct pfsync_state *usp =
1107 (void *)((char *)h + PFSYNC_HDRLEN);
1109 for (i = 0; i < h->count; i++) {
1110 if (!memcmp(usp->id, &st->id,
1112 usp->creatorid == st->creatorid) {
1125 st->pfsync_time = mycpu->gd_time_seconds;;
1128 /* not a "duplicate" update */
1130 sp = sc->sc_statep.s++;
1131 sc->sc_mbuf->m_pkthdr.len =
1132 sc->sc_mbuf->m_len += sizeof(struct pfsync_state);
1134 bzero(sp, sizeof(*sp));
1136 bcopy(&st->id, sp->id, sizeof(sp->id));
1137 sp->creatorid = st->creatorid;
1139 strlcpy(sp->ifname, st->u.s.kif->pfik_name, sizeof(sp->ifname));
1140 pf_state_host_hton(&st->lan, &sp->lan);
1141 pf_state_host_hton(&st->gwy, &sp->gwy);
1142 pf_state_host_hton(&st->ext, &sp->ext);
1144 bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
1146 sp->creation = htonl(secs - st->creation);
1147 pf_state_counter_hton(st->packets[0], sp->packets[0]);
1148 pf_state_counter_hton(st->packets[1], sp->packets[1]);
1149 pf_state_counter_hton(st->bytes[0], sp->bytes[0]);
1150 pf_state_counter_hton(st->bytes[1], sp->bytes[1]);
1151 if ((r = st->rule.ptr) == NULL)
1152 sp->rule = htonl(-1);
1154 sp->rule = htonl(r->nr);
1155 if ((r = st->anchor.ptr) == NULL)
1156 sp->anchor = htonl(-1);
1158 sp->anchor = htonl(r->nr);
1160 sp->proto = st->proto;
1161 sp->direction = st->direction;
1163 sp->allow_opts = st->allow_opts;
1164 sp->timeout = st->timeout;
1166 if (flags & PFSYNC_FLAG_STALE)
1167 sp->sync_flags |= PFSTATE_STALE;
1170 pf_state_peer_hton(&st->src, &sp->src);
1171 pf_state_peer_hton(&st->dst, &sp->dst);
1173 if (st->expire <= secs)
1174 sp->expire = htonl(0);
1176 sp->expire = htonl(st->expire - secs);
1178 /* do we need to build "compressed" actions for network transfer? */
1179 if (sc->sc_sync_ifp && flags & PFSYNC_FLAG_COMPRESS) {
1181 case PFSYNC_ACT_UPD:
1182 newaction = PFSYNC_ACT_UPD_C;
1184 case PFSYNC_ACT_DEL:
1185 newaction = PFSYNC_ACT_DEL_C;
1188 /* by default we just send the uncompressed states */
1194 if (sc->sc_mbuf_net == NULL) {
1195 if ((sc->sc_mbuf_net = pfsync_get_mbuf(sc, newaction,
1196 (void *)&sc->sc_statep_net.s)) == NULL) {
1201 h_net = mtod(sc->sc_mbuf_net, struct pfsync_header *);
1203 switch (newaction) {
1204 case PFSYNC_ACT_UPD_C:
1206 up = (void *)((char *)h_net +
1207 PFSYNC_HDRLEN + (i * sizeof(*up)));
1211 sc->sc_mbuf_net->m_pkthdr.len =
1212 sc->sc_mbuf_net->m_len += sizeof(*up);
1213 up = sc->sc_statep_net.u++;
1215 bzero(up, sizeof(*up));
1216 bcopy(&st->id, up->id, sizeof(up->id));
1217 up->creatorid = st->creatorid;
1219 up->timeout = st->timeout;
1220 up->expire = sp->expire;
1224 case PFSYNC_ACT_DEL_C:
1225 sc->sc_mbuf_net->m_pkthdr.len =
1226 sc->sc_mbuf_net->m_len += sizeof(*dp);
1227 dp = sc->sc_statep_net.d++;
1230 bzero(dp, sizeof(*dp));
1231 bcopy(&st->id, dp->id, sizeof(dp->id));
1232 dp->creatorid = st->creatorid;
1237 if (h->count == sc->sc_maxcount ||
1238 (sc->sc_maxupdates && (sp->updates >= sc->sc_maxupdates)))
1239 ret = pfsync_sendout(sc);
1245 /* This must be called in splnet() */
1247 pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src)
1249 struct ifnet *ifp = NULL;
1250 struct pfsync_header *h;
1251 struct pfsync_softc *sc = pfsyncif;
1252 struct pfsync_state_upd_req *rup;
1259 if (sc->sc_mbuf == NULL) {
1260 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ,
1261 (void *)&sc->sc_statep.s)) == NULL)
1263 h = mtod(sc->sc_mbuf, struct pfsync_header *);
1265 h = mtod(sc->sc_mbuf, struct pfsync_header *);
1266 if (h->action != PFSYNC_ACT_UREQ) {
1268 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ,
1269 (void *)&sc->sc_statep.s)) == NULL)
1271 h = mtod(sc->sc_mbuf, struct pfsync_header *);
1276 sc->sc_sendaddr = *src;
1277 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*rup);
1279 rup = sc->sc_statep.r++;
1280 bzero(rup, sizeof(*rup));
1282 bcopy(up->id, rup->id, sizeof(rup->id));
1283 rup->creatorid = up->creatorid;
1286 if (h->count == sc->sc_maxcount)
1287 ret = pfsync_sendout(sc);
1293 pfsync_clear_states(u_int32_t creatorid, char *ifname)
1295 struct ifnet *ifp = NULL;
1296 struct pfsync_softc *sc = pfsyncif;
1297 struct pfsync_state_clr *cp;
1305 if (sc->sc_mbuf != NULL)
1307 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR,
1308 (void *)&sc->sc_statep.c)) == NULL) {
1312 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*cp);
1313 cp = sc->sc_statep.c;
1314 cp->creatorid = creatorid;
1316 strlcpy(cp->ifname, ifname, IFNAMSIZ);
1318 ret = (pfsync_sendout(sc));
1324 pfsync_timeout(void *v)
1326 struct pfsync_softc *sc = v;
1334 pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status)
1336 struct pfsync_state_bus *bus;
1338 if (sc->sc_mbuf != NULL)
1341 if (pfsync_sync_ok &&
1342 (sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_BUS,
1343 (void *)&sc->sc_statep.b)) != NULL) {
1344 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*bus);
1345 bus = sc->sc_statep.b;
1346 bus->creatorid = pf_status.hostid;
1347 bus->status = status;
1348 bus->endtime = htonl(mycpu->gd_time_seconds - sc->sc_ureq_received);
1354 pfsync_bulk_update(void *v)
1356 struct pfsync_softc *sc = v;
1358 struct pf_state *state;
1361 if (sc->sc_mbuf != NULL)
1365 * Grab at most PFSYNC_BULKPACKETS worth of states which have not
1366 * been sent since the latest request was made.
1368 state = sc->sc_bulk_send_next;
1371 /* send state update if syncable and not already sent */
1372 if (!state->sync_flags
1373 && state->timeout < PFTM_MAX
1374 && state->pfsync_time <= sc->sc_ureq_received) {
1375 pfsync_pack_state(PFSYNC_ACT_UPD, state, 0);
1379 /* figure next state to send */
1380 state = TAILQ_NEXT(state, u.s.entry_list);
1382 /* wrap to start of list if we hit the end */
1384 state = TAILQ_FIRST(&state_list);
1385 } while (i < sc->sc_maxcount * PFSYNC_BULKPACKETS &&
1386 state != sc->sc_bulk_terminator);
1388 if (!state || state == sc->sc_bulk_terminator) {
1390 pfsync_send_bus(sc, PFSYNC_BUS_END);
1391 sc->sc_ureq_received = 0;
1392 sc->sc_bulk_send_next = NULL;
1393 sc->sc_bulk_terminator = NULL;
1394 callout_stop(&sc->sc_bulk_tmo);
1395 if (pf_status.debug >= PF_DEBUG_MISC)
1396 kprintf("pfsync: bulk update complete\n");
1398 /* look again for more in a bit */
1399 callout_reset(&sc->sc_bulk_tmo, 1, pfsync_timeout,
1400 LIST_FIRST(&pfsync_list));
1401 sc->sc_bulk_send_next = state;
1403 if (sc->sc_mbuf != NULL)
1409 pfsync_bulkfail(void *v)
1411 struct pfsync_softc *sc = v;
1414 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
1415 /* Try again in a bit */
1416 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulkfail,
1417 LIST_FIRST(&pfsync_list));
1419 error = pfsync_request_update(NULL, NULL);
1420 if (error == ENOMEM) {
1421 if (pf_status.debug >= PF_DEBUG_MISC)
1422 kprintf("pfsync: cannot allocate mbufs for "
1428 /* Pretend like the transfer was ok */
1429 sc->sc_ureq_sent = 0;
1430 sc->sc_bulk_tries = 0;
1432 if (!pfsync_sync_ok)
1433 carp_group_demote_adj(&sc->sc_if, -1);
1436 if (pf_status.debug >= PF_DEBUG_MISC)
1437 kprintf("pfsync: failed to receive "
1438 "bulk update status\n");
1439 callout_stop(&sc->sc_bulkfail_tmo);
1443 /* This must be called in splnet() */
1445 pfsync_sendout(struct pfsync_softc *sc)
1448 struct ifnet *ifp = &sc->sc_if;
1452 callout_stop(&sc->sc_tmo);
1454 if (sc->sc_mbuf == NULL)
1458 sc->sc_statep.s = NULL;
1462 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1465 if (sc->sc_mbuf_net) {
1467 m = sc->sc_mbuf_net;
1468 sc->sc_mbuf_net = NULL;
1469 sc->sc_statep_net.s = NULL;
1472 return pfsync_sendout_mbuf(sc, m);
1476 pfsync_sendout_mbuf(struct pfsync_softc *sc, struct mbuf *m)
1481 if (sc->sc_sync_ifp ||
1482 sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) {
1483 M_PREPEND(m, sizeof(struct ip), M_WAITOK);
1485 pfsyncstats.pfsyncs_onomem++;
1488 ip = mtod(m, struct ip *);
1489 ip->ip_v = IPVERSION;
1490 ip->ip_hl = sizeof(*ip) >> 2;
1491 ip->ip_tos = IPTOS_LOWDELAY;
1492 ip->ip_len = htons(m->m_pkthdr.len);
1493 ip->ip_id = htons(ip_randomid());
1494 ip->ip_off = htons(IP_DF);
1495 ip->ip_ttl = PFSYNC_DFLTTL;
1496 ip->ip_p = IPPROTO_PFSYNC;
1499 bzero(&sa, sizeof(sa));
1500 ip->ip_src.s_addr = INADDR_ANY;
1502 if (sc->sc_sendaddr.s_addr == INADDR_PFSYNC_GROUP)
1503 m->m_flags |= M_MCAST;
1504 ip->ip_dst = sc->sc_sendaddr;
1505 sc->sc_sendaddr.s_addr = sc->sc_sync_peer.s_addr;
1507 pfsyncstats.pfsyncs_opackets++;
1509 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))
1510 pfsyncstats.pfsyncs_oerrors++;