1 /* $OpenBSD: if_pfsync.c,v 1.73 2006/11/16 13:13:38 henning Exp $ */
4 * Copyright (c) 2002 Michael Shalayeff
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
30 #include "opt_inet6.h"
32 #include <sys/param.h>
33 #include <sys/endian.h>
36 #include <sys/systm.h>
39 #include <sys/socket.h>
40 #include <sys/kernel.h>
41 #include <sys/malloc.h>
42 #include <sys/module.h>
43 #include <sys/sockio.h>
44 #include <sys/thread2.h>
45 #include <vm/vm_zone.h>
47 #include <machine/inttypes.h>
50 #include <net/if_types.h>
51 #include <net/route.h>
53 #include <netinet/in.h>
54 #include <netinet/if_ether.h>
55 #include <netinet/tcp.h>
56 #include <netinet/tcp_seq.h>
59 #include <netinet/in_systm.h>
60 #include <netinet/in_var.h>
61 #include <netinet/ip.h>
62 #include <netinet/ip_var.h>
66 #include <netinet6/nd6.h>
69 #include <net/pf/pfvar.h>
70 #include <net/pf/if_pfsync.h>
72 #define PFSYNCNAME "pfsync"
74 #define PFSYNC_MINMTU \
75 (sizeof(struct pfsync_header) + sizeof(struct pf_state))
78 #define DPRINTF(x) do { if (pfsyncdebug) kprintf x ; } while (0)
84 struct pfsync_softc *pfsyncif = NULL;
85 struct pfsyncstats pfsyncstats;
87 void pfsyncattach(int);
88 static void pfsync_clone_destroy(struct ifnet *);
89 static int pfsync_clone_create(struct if_clone *, int, caddr_t);
90 void pfsync_setmtu(struct pfsync_softc *, int);
91 int pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
92 struct pf_state_peer *);
93 int pfsync_insert_net_state(struct pfsync_state *, u_int8_t);
94 int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
96 int pfsyncioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
97 void pfsyncstart(struct ifnet *);
99 struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **);
100 int pfsync_request_update(struct pfsync_state_upd *, struct in_addr *);
101 int pfsync_sendout(struct pfsync_softc *);
102 int pfsync_sendout_mbuf(struct pfsync_softc *, struct mbuf *);
103 void pfsync_timeout(void *);
104 void pfsync_send_bus(struct pfsync_softc *, u_int8_t);
105 void pfsync_bulk_update(void *);
106 void pfsync_bulkfail(void *);
108 static MALLOC_DEFINE(M_PFSYNC, PFSYNCNAME, "Packet Filter State Sync. Interface");
109 static LIST_HEAD(pfsync_list, pfsync_softc) pfsync_list;
113 struct if_clone pfsync_cloner =
114 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy, 1 ,1);
117 pfsyncattach(int npfsync)
119 if_clone_attach(&pfsync_cloner);
122 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused)
124 struct pfsync_softc *sc;
127 MALLOC(sc, struct pfsync_softc *, sizeof(*sc), M_PFSYNC,
132 sc->sc_mbuf_net = NULL;
133 sc->sc_mbuf_tdb = NULL;
134 sc->sc_statep.s = NULL;
135 sc->sc_statep_net.s = NULL;
136 sc->sc_statep_tdb.t = NULL;
137 sc->sc_maxupdates = 128;
138 sc->sc_sync_peer.s_addr =htonl(INADDR_PFSYNC_GROUP);
139 sc->sc_sendaddr.s_addr = htonl(INADDR_PFSYNC_GROUP);
140 sc->sc_ureq_received = 0;
141 sc->sc_ureq_sent = 0;
142 sc->sc_bulk_send_next = NULL;
143 sc->sc_bulk_terminator = NULL;
146 ksnprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit);
147 if_initname(ifp, ifc->ifc_name, unit);
148 ifp->if_ioctl = pfsyncioctl;
149 ifp->if_output = pfsyncoutput;
150 ifp->if_start = pfsyncstart;
151 ifp->if_type = IFT_PFSYNC;
152 ifp->if_snd.ifq_maxlen = ifqmaxlen;
153 ifp->if_hdrlen = PFSYNC_HDRLEN;
154 ifp->if_baudrate = IF_Mbps(100);
156 pfsync_setmtu(sc, MCLBYTES);
157 callout_init(&sc->sc_tmo);
158 callout_init(&sc->sc_tdb_tmo);
159 callout_init(&sc->sc_bulk_tmo);
160 callout_init(&sc->sc_bulkfail_tmo);
161 if_attach(&sc->sc_if, NULL);
163 LIST_INSERT_HEAD(&pfsync_list, sc, sc_next);
164 bpfattach(&sc->sc_if, DLT_PFSYNC, PFSYNC_HDRLEN);
167 if_addgroup(ifp, "carp");
174 pfsync_clone_destroy(struct ifnet *ifp)
180 kfree(pfsyncif, M_DEVBUF);
185 * Start output on the pfsync interface.
188 pfsyncstart(struct ifnet *ifp)
191 IF_DROP(&ifp->if_snd);
192 IF_DRAIN(&ifp->if_snd);
197 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
198 struct pf_state_peer *d)
200 if (s->scrub.scrub_flag && d->scrub == NULL) {
201 d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
202 if (d->scrub == NULL)
204 bzero(d->scrub, sizeof(*d->scrub));
211 pfsync_insert_net_state(struct pfsync_state *sp, u_int8_t chksum_flag)
213 struct pf_state *st = NULL;
214 struct pf_rule *r = NULL;
217 if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) {
218 kprintf("pfsync_insert_net_state: invalid creator id:"
219 " %08x\n", ntohl(sp->creatorid));
223 kif = pfi_kif_get(sp->ifname);
225 if (pf_status.debug >= PF_DEBUG_MISC)
226 kprintf("pfsync_insert_net_state: "
227 "unknown interface: %s\n", sp->ifname);
228 /* skip this state */
233 * If the ruleset checksums match, it's safe to associate the state
234 * with the rule of that number.
236 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && chksum_flag)
237 r = pf_main_ruleset.rules[
238 PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)];
240 r = &pf_default_rule;
242 if (!r->max_states || r->states < r->max_states)
243 st = pool_get(&pf_state_pl, PR_NOWAIT);
245 pfi_kif_unref(kif, PFI_KIF_REF_NONE);
248 bzero(st, sizeof(*st));
250 /* allocate memory for scrub info */
251 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
252 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) {
253 pfi_kif_unref(kif, PFI_KIF_REF_NONE);
255 pool_put(&pf_state_scrub_pl, st->src.scrub);
256 pool_put(&pf_state_pl, st);
261 /* XXX get pointers to nat_rule and anchor */
263 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
266 /* fill in the rest of the state entry */
267 pf_state_host_ntoh(&sp->lan, &st->lan);
268 pf_state_host_ntoh(&sp->gwy, &st->gwy);
269 pf_state_host_ntoh(&sp->ext, &st->ext);
271 pf_state_peer_ntoh(&sp->src, &st->src);
272 pf_state_peer_ntoh(&sp->dst, &st->dst);
274 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
275 st->creation = time_second - ntohl(sp->creation);
276 st->expire = ntohl(sp->expire) + time_second;
279 st->proto = sp->proto;
280 st->direction = sp->direction;
282 st->timeout = sp->timeout;
283 st->allow_opts = sp->allow_opts;
285 bcopy(sp->id, &st->id, sizeof(st->id));
286 st->creatorid = sp->creatorid;
287 st->sync_flags = PFSTATE_FROMSYNC;
289 if (pf_insert_state(kif, st)) {
290 pfi_kif_unref(kif, PFI_KIF_REF_NONE);
291 /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */
294 pool_put(&pf_state_scrub_pl, st->dst.scrub);
296 pool_put(&pf_state_scrub_pl, st->src.scrub);
297 pool_put(&pf_state_pl, st);
305 pfsync_input(struct mbuf *m, ...)
307 struct ip *ip = mtod(m, struct ip *);
308 struct pfsync_header *ph;
309 struct pfsync_softc *sc = pfsyncif;
311 struct pf_state_cmp key;
312 struct pfsync_state *sp;
313 struct pfsync_state_upd *up;
314 struct pfsync_state_del *dp;
315 struct pfsync_state_clr *cp;
316 struct pfsync_state_upd_req *rup;
317 struct pfsync_state_bus *bus;
320 int iplen, action, error, i, count, offp, sfail, stale = 0;
321 u_int8_t chksum_flag = 0;
323 pfsyncstats.pfsyncs_ipackets++;
325 /* verify that we have a sync interface configured */
326 if (!sc || !sc->sc_sync_ifp || !pf_status.running)
329 /* verify that the packet came in on the right interface */
330 if (sc->sc_sync_ifp != m->m_pkthdr.rcvif) {
331 pfsyncstats.pfsyncs_badif++;
335 /* verify that the IP TTL is 255. */
336 if (ip->ip_ttl != PFSYNC_DFLTTL) {
337 pfsyncstats.pfsyncs_badttl++;
341 iplen = ip->ip_hl << 2;
343 if (m->m_pkthdr.len < iplen + sizeof(*ph)) {
344 pfsyncstats.pfsyncs_hdrops++;
348 if (iplen + sizeof(*ph) > m->m_len) {
349 if ((m = m_pullup(m, iplen + sizeof(*ph))) == NULL) {
350 pfsyncstats.pfsyncs_hdrops++;
353 ip = mtod(m, struct ip *);
355 ph = (struct pfsync_header *)((char *)ip + iplen);
357 /* verify the version */
358 if (ph->version != PFSYNC_VERSION) {
359 pfsyncstats.pfsyncs_badver++;
366 /* make sure it's a valid action code */
367 if (action >= PFSYNC_ACT_MAX) {
368 pfsyncstats.pfsyncs_badact++;
372 /* Cheaper to grab this now than having to mess with mbufs later */
375 if (!bcmp(&ph->pf_chksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
379 case PFSYNC_ACT_CLR: {
380 struct pf_state *nexts;
383 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
384 sizeof(*cp), &offp)) == NULL) {
385 pfsyncstats.pfsyncs_badlen++;
388 cp = (struct pfsync_state_clr *)(mp->m_data + offp);
389 creatorid = cp->creatorid;
392 if (cp->ifname[0] == '\0') {
393 for (st = RB_MIN(pf_state_tree_id, &tree_id);
395 nexts = RB_NEXT(pf_state_tree_id, &tree_id, st);
396 if (st->creatorid == creatorid) {
397 st->sync_flags |= PFSTATE_FROMSYNC;
402 if ((kif = pfi_kif_get(cp->ifname)) == NULL) {
406 for (st = RB_MIN(pf_state_tree_lan_ext,
407 &kif->pfik_lan_ext); st; st = nexts) {
408 nexts = RB_NEXT(pf_state_tree_lan_ext,
409 &kif->pfik_lan_ext, st);
410 if (st->creatorid == creatorid) {
411 st->sync_flags |= PFSTATE_FROMSYNC;
421 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
422 count * sizeof(*sp), &offp)) == NULL) {
423 pfsyncstats.pfsyncs_badlen++;
428 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
429 i < count; i++, sp++) {
430 /* check for invalid values */
431 if (sp->timeout >= PFTM_MAX ||
432 sp->src.state > PF_TCPS_PROXY_DST ||
433 sp->dst.state > PF_TCPS_PROXY_DST ||
434 sp->direction > PF_OUT ||
435 (sp->af != AF_INET && sp->af != AF_INET6)) {
436 if (pf_status.debug >= PF_DEBUG_MISC)
437 kprintf("pfsync_insert: PFSYNC_ACT_INS: "
439 pfsyncstats.pfsyncs_badstate++;
443 if ((error = pfsync_insert_net_state(sp,
445 if (error == ENOMEM) {
455 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
456 count * sizeof(*sp), &offp)) == NULL) {
457 pfsyncstats.pfsyncs_badlen++;
462 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
463 i < count; i++, sp++) {
464 int flags = PFSYNC_FLAG_STALE;
466 /* check for invalid values */
467 if (sp->timeout >= PFTM_MAX ||
468 sp->src.state > PF_TCPS_PROXY_DST ||
469 sp->dst.state > PF_TCPS_PROXY_DST) {
470 if (pf_status.debug >= PF_DEBUG_MISC)
471 kprintf("pfsync_insert: PFSYNC_ACT_UPD: "
473 pfsyncstats.pfsyncs_badstate++;
477 bcopy(sp->id, &key.id, sizeof(key.id));
478 key.creatorid = sp->creatorid;
480 st = pf_find_state_byid(&key);
482 /* insert the update */
483 if (pfsync_insert_net_state(sp, chksum_flag))
484 pfsyncstats.pfsyncs_badstate++;
488 if (st->proto == IPPROTO_TCP) {
490 * The state should never go backwards except
491 * for syn-proxy states. Neither should the
492 * sequence window slide backwards.
494 if (st->src.state > sp->src.state &&
495 (st->src.state < PF_TCPS_PROXY_SRC ||
496 sp->src.state >= PF_TCPS_PROXY_SRC))
498 else if (SEQ_GT(st->src.seqlo,
499 ntohl(sp->src.seqlo)))
501 else if (st->dst.state > sp->dst.state) {
502 /* There might still be useful
503 * information about the src state here,
504 * so import that part of the update,
505 * then "fail" so we send the updated
506 * state back to the peer who is missing
507 * our what we know. */
508 pf_state_peer_ntoh(&sp->src, &st->src);
509 /* XXX do anything with timeouts? */
512 } else if (st->dst.state >= TCPS_SYN_SENT &&
513 SEQ_GT(st->dst.seqlo, ntohl(sp->dst.seqlo)))
517 * Non-TCP protocol state machine always go
520 if (st->src.state > sp->src.state)
522 else if (st->dst.state > sp->dst.state)
526 if (pf_status.debug >= PF_DEBUG_MISC)
527 kprintf("pfsync: %s stale update "
530 (sfail < 7 ? "ignoring"
533 ntohl(st->creatorid));
534 pfsyncstats.pfsyncs_badstate++;
536 if (!(sp->sync_flags & PFSTATE_STALE)) {
537 /* we have a better state, send it */
538 if (sc->sc_mbuf != NULL && !stale)
543 PFSYNC_ACT_UPD, st, flags);
547 pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
548 pf_state_peer_ntoh(&sp->src, &st->src);
549 pf_state_peer_ntoh(&sp->dst, &st->dst);
550 st->expire = ntohl(sp->expire) + time_second;
551 st->timeout = sp->timeout;
553 if (stale && sc->sc_mbuf != NULL)
558 * It's not strictly necessary for us to support the "uncompressed"
559 * delete action, but it's relatively simple and maintains consistency.
562 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
563 count * sizeof(*sp), &offp)) == NULL) {
564 pfsyncstats.pfsyncs_badlen++;
569 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
570 i < count; i++, sp++) {
571 bcopy(sp->id, &key.id, sizeof(key.id));
572 key.creatorid = sp->creatorid;
574 st = pf_find_state_byid(&key);
576 pfsyncstats.pfsyncs_badstate++;
579 st->sync_flags |= PFSTATE_FROMSYNC;
584 case PFSYNC_ACT_UPD_C: {
585 int update_requested = 0;
587 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
588 count * sizeof(*up), &offp)) == NULL) {
589 pfsyncstats.pfsyncs_badlen++;
594 for (i = 0, up = (struct pfsync_state_upd *)(mp->m_data + offp);
595 i < count; i++, up++) {
596 /* check for invalid values */
597 if (up->timeout >= PFTM_MAX ||
598 up->src.state > PF_TCPS_PROXY_DST ||
599 up->dst.state > PF_TCPS_PROXY_DST) {
600 if (pf_status.debug >= PF_DEBUG_MISC)
601 kprintf("pfsync_insert: "
604 pfsyncstats.pfsyncs_badstate++;
608 bcopy(up->id, &key.id, sizeof(key.id));
609 key.creatorid = up->creatorid;
611 st = pf_find_state_byid(&key);
613 /* We don't have this state. Ask for it. */
614 error = pfsync_request_update(up, &src);
615 if (error == ENOMEM) {
619 update_requested = 1;
620 pfsyncstats.pfsyncs_badstate++;
624 if (st->proto == IPPROTO_TCP) {
626 * The state should never go backwards except
627 * for syn-proxy states. Neither should the
628 * sequence window slide backwards.
630 if (st->src.state > up->src.state &&
631 (st->src.state < PF_TCPS_PROXY_SRC ||
632 up->src.state >= PF_TCPS_PROXY_SRC))
634 else if (st->dst.state > up->dst.state)
636 else if (SEQ_GT(st->src.seqlo,
637 ntohl(up->src.seqlo)))
639 else if (st->dst.state >= TCPS_SYN_SENT &&
640 SEQ_GT(st->dst.seqlo, ntohl(up->dst.seqlo)))
644 * Non-TCP protocol state machine always go
647 if (st->src.state > up->src.state)
649 else if (st->dst.state > up->dst.state)
653 if (pf_status.debug >= PF_DEBUG_MISC)
654 kprintf("pfsync: ignoring stale update "
656 "creatorid: %08x\n", sfail,
658 ntohl(st->creatorid));
659 pfsyncstats.pfsyncs_badstate++;
661 /* we have a better state, send it out */
662 if ((!stale || update_requested) &&
663 sc->sc_mbuf != NULL) {
665 update_requested = 0;
669 pfsync_pack_state(PFSYNC_ACT_UPD, st,
673 pfsync_alloc_scrub_memory(&up->dst, &st->dst);
674 pf_state_peer_ntoh(&up->src, &st->src);
675 pf_state_peer_ntoh(&up->dst, &st->dst);
676 st->expire = ntohl(up->expire) + time_second;
677 st->timeout = up->timeout;
679 if ((update_requested || stale) && sc->sc_mbuf)
684 case PFSYNC_ACT_DEL_C:
685 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
686 count * sizeof(*dp), &offp)) == NULL) {
687 pfsyncstats.pfsyncs_badlen++;
692 for (i = 0, dp = (struct pfsync_state_del *)(mp->m_data + offp);
693 i < count; i++, dp++) {
694 bcopy(dp->id, &key.id, sizeof(key.id));
695 key.creatorid = dp->creatorid;
697 st = pf_find_state_byid(&key);
699 pfsyncstats.pfsyncs_badstate++;
702 st->sync_flags |= PFSTATE_FROMSYNC;
707 case PFSYNC_ACT_INS_F:
708 case PFSYNC_ACT_DEL_F:
709 /* not implemented */
711 case PFSYNC_ACT_UREQ:
712 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
713 count * sizeof(*rup), &offp)) == NULL) {
714 pfsyncstats.pfsyncs_badlen++;
719 if (sc->sc_mbuf != NULL)
722 rup = (struct pfsync_state_upd_req *)(mp->m_data + offp);
723 i < count; i++, rup++) {
724 bcopy(rup->id, &key.id, sizeof(key.id));
725 key.creatorid = rup->creatorid;
727 if (key.id == 0 && key.creatorid == 0) {
728 sc->sc_ureq_received = mycpu->gd_time_seconds;
729 if (sc->sc_bulk_send_next == NULL)
730 sc->sc_bulk_send_next =
731 TAILQ_FIRST(&state_list);
732 sc->sc_bulk_terminator = sc->sc_bulk_send_next;
733 if (pf_status.debug >= PF_DEBUG_MISC)
734 kprintf("pfsync: received "
735 "bulk update request\n");
736 pfsync_send_bus(sc, PFSYNC_BUS_START);
737 callout_reset(&sc->sc_bulk_tmo, 1 * hz,
739 LIST_FIRST(&pfsync_list));
741 st = pf_find_state_byid(&key);
743 pfsyncstats.pfsyncs_badstate++;
747 pfsync_pack_state(PFSYNC_ACT_UPD,
751 if (sc->sc_mbuf != NULL)
756 /* If we're not waiting for a bulk update, who cares. */
757 if (sc->sc_ureq_sent == 0)
760 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
761 sizeof(*bus), &offp)) == NULL) {
762 pfsyncstats.pfsyncs_badlen++;
765 bus = (struct pfsync_state_bus *)(mp->m_data + offp);
766 switch (bus->status) {
767 case PFSYNC_BUS_START:
768 callout_reset(&sc->sc_bulkfail_tmo,
769 pf_pool_limits[PF_LIMIT_STATES].limit /
770 (PFSYNC_BULKPACKETS * sc->sc_maxcount),
771 pfsync_bulkfail, LIST_FIRST(&pfsync_list));
772 if (pf_status.debug >= PF_DEBUG_MISC)
773 kprintf("pfsync: received bulk "
777 if (mycpu->gd_time_seconds - ntohl(bus->endtime) >=
779 /* that's it, we're happy */
780 sc->sc_ureq_sent = 0;
781 sc->sc_bulk_tries = 0;
782 callout_stop(&sc->sc_bulkfail_tmo);
785 carp_group_demote_adj(&sc->sc_if, -1);
788 if (pf_status.debug >= PF_DEBUG_MISC)
789 kprintf("pfsync: received valid "
790 "bulk update end\n");
792 if (pf_status.debug >= PF_DEBUG_MISC)
793 kprintf("pfsync: received invalid "
794 "bulk update end: bad timestamp\n");
807 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
816 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
818 struct pfsync_softc *sc = ifp->if_softc;
819 struct ifreq *ifr = (struct ifreq *)data;
820 struct ip_moptions *imo = &sc->sc_imo;
821 struct pfsyncreq pfsyncr;
830 if (ifp->if_flags & IFF_UP)
831 ifp->if_flags |= IFF_RUNNING;
833 ifp->if_flags &= ~IFF_RUNNING;
836 if (ifr->ifr_mtu < PFSYNC_MINMTU)
838 if (ifr->ifr_mtu > MCLBYTES)
839 ifr->ifr_mtu = MCLBYTES;
841 if (ifr->ifr_mtu < ifp->if_mtu)
843 pfsync_setmtu(sc, ifr->ifr_mtu);
847 bzero(&pfsyncr, sizeof(pfsyncr));
849 strlcpy(pfsyncr.pfsyncr_syncdev,
850 sc->sc_sync_ifp->if_xname, IFNAMSIZ);
851 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
852 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
853 if ((error = copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))))
857 if ((error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY)) != 0)
859 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
862 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
863 sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
865 sc->sc_sync_peer.s_addr =
866 pfsyncr.pfsyncr_syncpeer.s_addr;
868 if (pfsyncr.pfsyncr_maxupdates > 255)
870 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
872 if (pfsyncr.pfsyncr_syncdev[0] == 0) {
873 sc->sc_sync_ifp = NULL;
874 if (sc->sc_mbuf_net != NULL) {
875 /* Don't keep stale pfsync packets around. */
877 m_freem(sc->sc_mbuf_net);
878 sc->sc_mbuf_net = NULL;
879 sc->sc_statep_net.s = NULL;
882 if (imo->imo_num_memberships > 0) {
883 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
884 imo->imo_multicast_ifp = NULL;
889 if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL)
893 if (sifp->if_mtu < sc->sc_if.if_mtu ||
894 (sc->sc_sync_ifp != NULL &&
895 sifp->if_mtu < sc->sc_sync_ifp->if_mtu) ||
896 sifp->if_mtu < MCLBYTES - sizeof(struct ip))
898 sc->sc_sync_ifp = sifp;
900 pfsync_setmtu(sc, sc->sc_if.if_mtu);
902 if (imo->imo_num_memberships > 0) {
903 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
904 imo->imo_multicast_ifp = NULL;
907 if (sc->sc_sync_ifp &&
908 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
911 if (!(sc->sc_sync_ifp->if_flags & IFF_MULTICAST)) {
912 sc->sc_sync_ifp = NULL;
914 return (EADDRNOTAVAIL);
917 addr.s_addr = INADDR_PFSYNC_GROUP;
919 if ((imo->imo_membership[0] =
920 in_addmulti(&addr, sc->sc_sync_ifp)) == NULL) {
921 sc->sc_sync_ifp = NULL;
925 imo->imo_num_memberships++;
926 imo->imo_multicast_ifp = sc->sc_sync_ifp;
927 imo->imo_multicast_ttl = PFSYNC_DFLTTL;
928 imo->imo_multicast_loop = 0;
931 if (sc->sc_sync_ifp ||
932 sc->sc_sendaddr.s_addr != INADDR_PFSYNC_GROUP) {
933 /* Request a full state table update. */
934 sc->sc_ureq_sent = mycpu->gd_time_seconds;
937 carp_group_demote_adj(&sc->sc_if, 1);
940 if (pf_status.debug >= PF_DEBUG_MISC)
941 kprintf("pfsync: requesting bulk update\n");
942 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
943 pfsync_bulkfail, LIST_FIRST(&pfsync_list));
944 error = pfsync_request_update(NULL, NULL);
945 if (error == ENOMEM) {
963 pfsync_setmtu(struct pfsync_softc *sc, int mtu_req)
967 if (sc->sc_sync_ifp && sc->sc_sync_ifp->if_mtu < mtu_req)
968 mtu = sc->sc_sync_ifp->if_mtu;
972 sc->sc_maxcount = (mtu - sizeof(struct pfsync_header)) /
973 sizeof(struct pfsync_state);
974 if (sc->sc_maxcount > 254)
975 sc->sc_maxcount = 254;
976 sc->sc_if.if_mtu = sizeof(struct pfsync_header) +
977 sc->sc_maxcount * sizeof(struct pfsync_state);
981 pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp)
983 struct pfsync_header *h;
987 MGETHDR(m, M_WAITOK, MT_DATA);
989 sc->sc_if.if_oerrors++;
995 len = sizeof(struct pfsync_header) +
996 sizeof(struct pfsync_state_clr);
998 case PFSYNC_ACT_UPD_C:
999 len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd)) +
1000 sizeof(struct pfsync_header);
1002 case PFSYNC_ACT_DEL_C:
1003 len = (sc->sc_maxcount * sizeof(struct pfsync_state_del)) +
1004 sizeof(struct pfsync_header);
1006 case PFSYNC_ACT_UREQ:
1007 len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd_req)) +
1008 sizeof(struct pfsync_header);
1010 case PFSYNC_ACT_BUS:
1011 len = sizeof(struct pfsync_header) +
1012 sizeof(struct pfsync_state_bus);
1015 len = (sc->sc_maxcount * sizeof(struct pfsync_state)) +
1016 sizeof(struct pfsync_header);
1021 MCLGET(m, M_WAITOK);
1022 if ((m->m_flags & M_EXT) == 0) {
1024 sc->sc_if.if_oerrors++;
1027 m->m_data += (MCLBYTES - len) &~ (sizeof(long) - 1);
1031 m->m_pkthdr.rcvif = NULL;
1032 m->m_pkthdr.len = m->m_len = sizeof(struct pfsync_header);
1033 h = mtod(m, struct pfsync_header *);
1034 h->version = PFSYNC_VERSION;
1039 *sp = (void *)((char *)h + PFSYNC_HDRLEN);
1040 callout_reset(&sc->sc_tmo, hz, pfsync_timeout,
1041 LIST_FIRST(&pfsync_list));
1046 pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags)
1048 struct ifnet *ifp = NULL;
1049 struct pfsync_softc *sc = pfsyncif;
1050 struct pfsync_header *h, *h_net;
1051 struct pfsync_state *sp = NULL;
1052 struct pfsync_state_upd *up = NULL;
1053 struct pfsync_state_del *dp = NULL;
1057 u_int8_t i = 255, newaction = 0;
1064 * If a packet falls in the forest and there's nobody around to
1065 * hear, does it make a sound?
1067 if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL &&
1068 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1069 /* Don't leave any stale pfsync packets hanging around. */
1070 if (sc->sc_mbuf != NULL) {
1071 m_freem(sc->sc_mbuf);
1073 sc->sc_statep.s = NULL;
1078 if (action >= PFSYNC_ACT_MAX)
1082 if (sc->sc_mbuf == NULL) {
1083 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action,
1084 (void *)&sc->sc_statep.s)) == NULL) {
1088 h = mtod(sc->sc_mbuf, struct pfsync_header *);
1090 h = mtod(sc->sc_mbuf, struct pfsync_header *);
1091 if (h->action != action) {
1093 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action,
1094 (void *)&sc->sc_statep.s)) == NULL) {
1098 h = mtod(sc->sc_mbuf, struct pfsync_header *);
1101 * If it's an update, look in the packet to see if
1102 * we already have an update for the state.
1104 if (action == PFSYNC_ACT_UPD && sc->sc_maxupdates) {
1105 struct pfsync_state *usp =
1106 (void *)((char *)h + PFSYNC_HDRLEN);
1108 for (i = 0; i < h->count; i++) {
1109 if (!memcmp(usp->id, &st->id,
1111 usp->creatorid == st->creatorid) {
1124 st->pfsync_time = mycpu->gd_time_seconds;;
1127 /* not a "duplicate" update */
1129 sp = sc->sc_statep.s++;
1130 sc->sc_mbuf->m_pkthdr.len =
1131 sc->sc_mbuf->m_len += sizeof(struct pfsync_state);
1133 bzero(sp, sizeof(*sp));
1135 bcopy(&st->id, sp->id, sizeof(sp->id));
1136 sp->creatorid = st->creatorid;
1138 strlcpy(sp->ifname, st->u.s.kif->pfik_name, sizeof(sp->ifname));
1139 pf_state_host_hton(&st->lan, &sp->lan);
1140 pf_state_host_hton(&st->gwy, &sp->gwy);
1141 pf_state_host_hton(&st->ext, &sp->ext);
1143 bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
1145 sp->creation = htonl(secs - st->creation);
1146 pf_state_counter_hton(st->packets[0], sp->packets[0]);
1147 pf_state_counter_hton(st->packets[1], sp->packets[1]);
1148 pf_state_counter_hton(st->bytes[0], sp->bytes[0]);
1149 pf_state_counter_hton(st->bytes[1], sp->bytes[1]);
1150 if ((r = st->rule.ptr) == NULL)
1151 sp->rule = htonl(-1);
1153 sp->rule = htonl(r->nr);
1154 if ((r = st->anchor.ptr) == NULL)
1155 sp->anchor = htonl(-1);
1157 sp->anchor = htonl(r->nr);
1159 sp->proto = st->proto;
1160 sp->direction = st->direction;
1162 sp->allow_opts = st->allow_opts;
1163 sp->timeout = st->timeout;
1165 if (flags & PFSYNC_FLAG_STALE)
1166 sp->sync_flags |= PFSTATE_STALE;
1169 pf_state_peer_hton(&st->src, &sp->src);
1170 pf_state_peer_hton(&st->dst, &sp->dst);
1172 if (st->expire <= secs)
1173 sp->expire = htonl(0);
1175 sp->expire = htonl(st->expire - secs);
1177 /* do we need to build "compressed" actions for network transfer? */
1178 if (sc->sc_sync_ifp && flags & PFSYNC_FLAG_COMPRESS) {
1180 case PFSYNC_ACT_UPD:
1181 newaction = PFSYNC_ACT_UPD_C;
1183 case PFSYNC_ACT_DEL:
1184 newaction = PFSYNC_ACT_DEL_C;
1187 /* by default we just send the uncompressed states */
1193 if (sc->sc_mbuf_net == NULL) {
1194 if ((sc->sc_mbuf_net = pfsync_get_mbuf(sc, newaction,
1195 (void *)&sc->sc_statep_net.s)) == NULL) {
1200 h_net = mtod(sc->sc_mbuf_net, struct pfsync_header *);
1202 switch (newaction) {
1203 case PFSYNC_ACT_UPD_C:
1205 up = (void *)((char *)h_net +
1206 PFSYNC_HDRLEN + (i * sizeof(*up)));
1210 sc->sc_mbuf_net->m_pkthdr.len =
1211 sc->sc_mbuf_net->m_len += sizeof(*up);
1212 up = sc->sc_statep_net.u++;
1214 bzero(up, sizeof(*up));
1215 bcopy(&st->id, up->id, sizeof(up->id));
1216 up->creatorid = st->creatorid;
1218 up->timeout = st->timeout;
1219 up->expire = sp->expire;
1223 case PFSYNC_ACT_DEL_C:
1224 sc->sc_mbuf_net->m_pkthdr.len =
1225 sc->sc_mbuf_net->m_len += sizeof(*dp);
1226 dp = sc->sc_statep_net.d++;
1229 bzero(dp, sizeof(*dp));
1230 bcopy(&st->id, dp->id, sizeof(dp->id));
1231 dp->creatorid = st->creatorid;
1236 if (h->count == sc->sc_maxcount ||
1237 (sc->sc_maxupdates && (sp->updates >= sc->sc_maxupdates)))
1238 ret = pfsync_sendout(sc);
1244 /* This must be called in splnet() */
1246 pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src)
1248 struct ifnet *ifp = NULL;
1249 struct pfsync_header *h;
1250 struct pfsync_softc *sc = pfsyncif;
1251 struct pfsync_state_upd_req *rup;
1258 if (sc->sc_mbuf == NULL) {
1259 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ,
1260 (void *)&sc->sc_statep.s)) == NULL)
1262 h = mtod(sc->sc_mbuf, struct pfsync_header *);
1264 h = mtod(sc->sc_mbuf, struct pfsync_header *);
1265 if (h->action != PFSYNC_ACT_UREQ) {
1267 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ,
1268 (void *)&sc->sc_statep.s)) == NULL)
1270 h = mtod(sc->sc_mbuf, struct pfsync_header *);
1275 sc->sc_sendaddr = *src;
1276 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*rup);
1278 rup = sc->sc_statep.r++;
1279 bzero(rup, sizeof(*rup));
1281 bcopy(up->id, rup->id, sizeof(rup->id));
1282 rup->creatorid = up->creatorid;
1285 if (h->count == sc->sc_maxcount)
1286 ret = pfsync_sendout(sc);
1292 pfsync_clear_states(u_int32_t creatorid, char *ifname)
1294 struct ifnet *ifp = NULL;
1295 struct pfsync_softc *sc = pfsyncif;
1296 struct pfsync_state_clr *cp;
1304 if (sc->sc_mbuf != NULL)
1306 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR,
1307 (void *)&sc->sc_statep.c)) == NULL) {
1311 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*cp);
1312 cp = sc->sc_statep.c;
1313 cp->creatorid = creatorid;
1315 strlcpy(cp->ifname, ifname, IFNAMSIZ);
1317 ret = (pfsync_sendout(sc));
1323 pfsync_timeout(void *v)
1325 struct pfsync_softc *sc = v;
1333 pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status)
1335 struct pfsync_state_bus *bus;
1337 if (sc->sc_mbuf != NULL)
1340 if (pfsync_sync_ok &&
1341 (sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_BUS,
1342 (void *)&sc->sc_statep.b)) != NULL) {
1343 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*bus);
1344 bus = sc->sc_statep.b;
1345 bus->creatorid = pf_status.hostid;
1346 bus->status = status;
1347 bus->endtime = htonl(mycpu->gd_time_seconds - sc->sc_ureq_received);
1353 pfsync_bulk_update(void *v)
1355 struct pfsync_softc *sc = v;
1357 struct pf_state *state;
1360 if (sc->sc_mbuf != NULL)
1364 * Grab at most PFSYNC_BULKPACKETS worth of states which have not
1365 * been sent since the latest request was made.
1367 state = sc->sc_bulk_send_next;
1370 /* send state update if syncable and not already sent */
1371 if (!state->sync_flags
1372 && state->timeout < PFTM_MAX
1373 && state->pfsync_time <= sc->sc_ureq_received) {
1374 pfsync_pack_state(PFSYNC_ACT_UPD, state, 0);
1378 /* figure next state to send */
1379 state = TAILQ_NEXT(state, u.s.entry_list);
1381 /* wrap to start of list if we hit the end */
1383 state = TAILQ_FIRST(&state_list);
1384 } while (i < sc->sc_maxcount * PFSYNC_BULKPACKETS &&
1385 state != sc->sc_bulk_terminator);
1387 if (!state || state == sc->sc_bulk_terminator) {
1389 pfsync_send_bus(sc, PFSYNC_BUS_END);
1390 sc->sc_ureq_received = 0;
1391 sc->sc_bulk_send_next = NULL;
1392 sc->sc_bulk_terminator = NULL;
1393 callout_stop(&sc->sc_bulk_tmo);
1394 if (pf_status.debug >= PF_DEBUG_MISC)
1395 kprintf("pfsync: bulk update complete\n");
1397 /* look again for more in a bit */
1398 callout_reset(&sc->sc_bulk_tmo, 1, pfsync_timeout,
1399 LIST_FIRST(&pfsync_list));
1400 sc->sc_bulk_send_next = state;
1402 if (sc->sc_mbuf != NULL)
1408 pfsync_bulkfail(void *v)
1410 struct pfsync_softc *sc = v;
1413 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
1414 /* Try again in a bit */
1415 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulkfail,
1416 LIST_FIRST(&pfsync_list));
1418 error = pfsync_request_update(NULL, NULL);
1419 if (error == ENOMEM) {
1420 if (pf_status.debug >= PF_DEBUG_MISC)
1421 kprintf("pfsync: cannot allocate mbufs for "
1427 /* Pretend like the transfer was ok */
1428 sc->sc_ureq_sent = 0;
1429 sc->sc_bulk_tries = 0;
1431 if (!pfsync_sync_ok)
1432 carp_group_demote_adj(&sc->sc_if, -1);
1435 if (pf_status.debug >= PF_DEBUG_MISC)
1436 kprintf("pfsync: failed to receive "
1437 "bulk update status\n");
1438 callout_stop(&sc->sc_bulkfail_tmo);
1442 /* This must be called in splnet() */
1444 pfsync_sendout(struct pfsync_softc *sc)
1447 struct ifnet *ifp = &sc->sc_if;
1451 callout_stop(&sc->sc_tmo);
1453 if (sc->sc_mbuf == NULL)
1457 sc->sc_statep.s = NULL;
1461 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1464 if (sc->sc_mbuf_net) {
1466 m = sc->sc_mbuf_net;
1467 sc->sc_mbuf_net = NULL;
1468 sc->sc_statep_net.s = NULL;
1471 return pfsync_sendout_mbuf(sc, m);
1475 pfsync_sendout_mbuf(struct pfsync_softc *sc, struct mbuf *m)
1480 if (sc->sc_sync_ifp ||
1481 sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) {
1482 M_PREPEND(m, sizeof(struct ip), M_WAITOK);
1484 pfsyncstats.pfsyncs_onomem++;
1487 ip = mtod(m, struct ip *);
1488 ip->ip_v = IPVERSION;
1489 ip->ip_hl = sizeof(*ip) >> 2;
1490 ip->ip_tos = IPTOS_LOWDELAY;
1491 ip->ip_len = htons(m->m_pkthdr.len);
1492 ip->ip_id = htons(ip_randomid());
1493 ip->ip_off = htons(IP_DF);
1494 ip->ip_ttl = PFSYNC_DFLTTL;
1495 ip->ip_p = IPPROTO_PFSYNC;
1498 bzero(&sa, sizeof(sa));
1499 ip->ip_src.s_addr = INADDR_ANY;
1501 if (sc->sc_sendaddr.s_addr == INADDR_PFSYNC_GROUP)
1502 m->m_flags |= M_MCAST;
1503 ip->ip_dst = sc->sc_sendaddr;
1504 sc->sc_sendaddr.s_addr = sc->sc_sync_peer.s_addr;
1506 pfsyncstats.pfsyncs_opackets++;
1508 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))
1509 pfsyncstats.pfsyncs_oerrors++;