From 02742ec659233383d28a5a7a34abfd58a088a8d0 Mon Sep 17 00:00:00 2001 From: Joerg Sonnenberger Date: Sun, 19 Sep 2004 22:32:48 +0000 Subject: [PATCH] Kernel part of PF Ported-by: - Max Layer (original patch set, FreeBSD PF maintainer) - Devon O'Dell, Simon 'corecode' Schubert (integration and DragonFly specific changes) In contrast to FreeBSD and OpenBSD, use direct flags in pkthdr instead of m_tags. This reduces allocation and processing overhead. Keep the IP header in Host Byte Order like the rest of the tree assumes. Module support has a memory leak for vm_zones when unloading pf.ko. --- sys/boot/forth/loader.conf | 3 +- sys/conf/files | 11 +- sys/config/LINT | 6 +- sys/i386/conf/LINT | 6 +- sys/kern/uipc_mbuf.c | 3 +- sys/net/Makefile | 4 +- sys/net/bpf.h | 7 +- sys/net/if_types.h | 4 +- sys/net/pf/Makefile | 33 + sys/net/pf/if_pflog.c | 284 ++ sys/net/pf/if_pflog.h | 83 + sys/net/pf/if_pfsync.c | 1313 ++++++++ sys/net/pf/if_pfsync.h | 285 ++ sys/net/pf/pf.c | 5889 +++++++++++++++++++++++++++++++++++ sys/net/pf/pf_if.c | 973 ++++++ sys/net/pf/pf_ioctl.c | 3129 +++++++++++++++++++ sys/net/pf/pf_norm.c | 1566 ++++++++++ sys/net/pf/pf_osfp.c | 553 ++++ sys/net/pf/pf_subr.c | 131 + sys/net/pf/pf_table.c | 2115 +++++++++++++ sys/net/pf/pfvar.h | 1562 ++++++++++ sys/netinet/icmp_var.h | 3 +- sys/netinet/in.h | 4 +- sys/netinet/in_cksum.c | 28 +- sys/netinet/in_proto.c | 10 +- sys/netinet/ip_icmp.c | 6 +- sys/netinet6/in6_ifattach.c | 5 +- sys/sys/in_cksum.h | 10 +- sys/sys/mbuf.h | 20 +- 29 files changed, 18021 insertions(+), 25 deletions(-) create mode 100644 sys/net/pf/Makefile create mode 100644 sys/net/pf/if_pflog.c create mode 100644 sys/net/pf/if_pflog.h create mode 100644 sys/net/pf/if_pfsync.c create mode 100644 sys/net/pf/if_pfsync.h create mode 100644 sys/net/pf/pf.c create mode 100644 sys/net/pf/pf_if.c create mode 100644 sys/net/pf/pf_ioctl.c create mode 100644 sys/net/pf/pf_norm.c create mode 100644 sys/net/pf/pf_osfp.c create mode 100644 sys/net/pf/pf_subr.c create mode 100644 sys/net/pf/pf_table.c create mode 100644 sys/net/pf/pfvar.h diff --git a/sys/boot/forth/loader.conf b/sys/boot/forth/loader.conf index 4a18ec7d84..c974798945 100644 --- a/sys/boot/forth/loader.conf +++ b/sys/boot/forth/loader.conf @@ -7,7 +7,7 @@ # All arguments must be in double quotes. # # $FreeBSD: src/sys/boot/forth/loader.conf,v 1.72 2003/07/01 01:03:32 brueffer Exp $ -# $DragonFly: src/sys/boot/forth/loader.conf,v 1.3 2003/11/10 06:08:34 dillon Exp $ +# $DragonFly: src/sys/boot/forth/loader.conf,v 1.4 2004/09/19 22:32:47 joerg Exp $ ############################################################## ### Basic configuration options ############################ @@ -172,6 +172,7 @@ if_tap_load="NO" # Ethernet tunnel software network interface if_tun_load="NO" # Tunnel driver (user process ppp) if_vlan_load="NO" # IEEE 802.1Q VLAN network interface ipfw_load="NO" # Firewall ++pf_load="NO" # OpenBSD's packet filter ############################################################## diff --git a/sys/conf/files b/sys/conf/files index 3a3f4d702b..1feeace964 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1,5 +1,5 @@ # $FreeBSD: src/sys/conf/files,v 1.340.2.137 2003/06/04 17:10:30 sam Exp $ -# $DragonFly: src/sys/conf/files,v 1.72 2004/09/06 13:52:24 joerg Exp $ +# $DragonFly: src/sys/conf/files,v 1.73 2004/09/19 22:32:47 joerg Exp $ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and @@ -783,6 +783,15 @@ net/if_fddisubr.c optional fddi net/gif/if_gif.c optional gif net/gre/if_gre.c optional gre net/pfil.c standard +net/pf/if_pflog.c optional pflog +net/pf/if_pfsync.c optional pfsync +net/pf/pf.c optional pf +net/pf/pf_if.c optional pf +net/pf/pf_subr.c optional pf +net/pf/pf_ioctl.c optional pf +net/pf/pf_norm.c optional pf +net/pf/pf_table.c optional pf +net/pf/pf_osfp.c optional pf net/ppp_layer/slcompress.c optional netgraph_vjc net/ppp_layer/slcompress.c optional ppp net/ppp_layer/slcompress.c optional sl diff --git a/sys/config/LINT b/sys/config/LINT index 48a4b8f9e5..b0a26e73ef 100644 --- a/sys/config/LINT +++ b/sys/config/LINT @@ -3,7 +3,7 @@ # as much of the source tree as it can. # # $FreeBSD: src/sys/i386/conf/LINT,v 1.749.2.144 2003/06/04 17:56:59 sam Exp $ -# $DragonFly: src/sys/config/LINT,v 1.35 2004/08/03 07:26:57 joerg Exp $ +# $DragonFly: src/sys/config/LINT,v 1.36 2004/09/19 22:32:47 joerg Exp $ # # NB: You probably don't want to try running a kernel built from this # file. Instead, you should start from GENERIC, and add options from @@ -610,6 +610,10 @@ options IPSTEALTH #support for stealth forwarding options TCPDEBUG options NS # NETNS support +device pf +device pfsync +device pflog + # The MBUF_STRESS_TEST option enables options which create # various random failures / extreme cases related to mbuf # functions. See the mbuf(9) manpage for a list of available diff --git a/sys/i386/conf/LINT b/sys/i386/conf/LINT index cfaa1ad0fb..1d81b2094e 100644 --- a/sys/i386/conf/LINT +++ b/sys/i386/conf/LINT @@ -3,7 +3,7 @@ # as much of the source tree as it can. # # $FreeBSD: src/sys/i386/conf/LINT,v 1.749.2.144 2003/06/04 17:56:59 sam Exp $ -# $DragonFly: src/sys/i386/conf/Attic/LINT,v 1.35 2004/08/03 07:26:57 joerg Exp $ +# $DragonFly: src/sys/i386/conf/Attic/LINT,v 1.36 2004/09/19 22:32:47 joerg Exp $ # # NB: You probably don't want to try running a kernel built from this # file. Instead, you should start from GENERIC, and add options from @@ -610,6 +610,10 @@ options IPSTEALTH #support for stealth forwarding options TCPDEBUG options NS # NETNS support +device pf +device pfsync +device pflog + # The MBUF_STRESS_TEST option enables options which create # various random failures / extreme cases related to mbuf # functions. See the mbuf(9) manpage for a list of available diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c index cf0f4cb201..cb19c71b85 100644 --- a/sys/kern/uipc_mbuf.c +++ b/sys/kern/uipc_mbuf.c @@ -82,7 +82,7 @@ * * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94 * $FreeBSD: src/sys/kern/uipc_mbuf.c,v 1.51.2.24 2003/04/15 06:59:29 silby Exp $ - * $DragonFly: src/sys/kern/uipc_mbuf.c,v 1.27 2004/09/17 10:15:00 dillon Exp $ + * $DragonFly: src/sys/kern/uipc_mbuf.c,v 1.28 2004/09/19 22:32:47 joerg Exp $ */ #include "opt_param.h" @@ -730,6 +730,7 @@ m_gethdr(int how, int type) m->m_pkthdr.rcvif = NULL; SLIST_INIT(&m->m_pkthdr.tags); m->m_pkthdr.csum_flags = 0; + m->m_pkthdr.pf_flags = 0; splx(ms); } else { splx(ms); diff --git a/sys/net/Makefile b/sys/net/Makefile index af09cc740b..eb27ba3bb3 100644 --- a/sys/net/Makefile +++ b/sys/net/Makefile @@ -1,8 +1,8 @@ -# $DragonFly: src/sys/net/Makefile,v 1.1 2003/08/15 07:03:09 dillon Exp $ +# $DragonFly: src/sys/net/Makefile,v 1.2 2004/09/19 22:32:47 joerg Exp $ # SUBDIR=accf_data accf_http disc ef faith gif gre sl stf tap tun \ vlan bridge dummynet ipfilter ipfw ip6fw ip_mroute \ - sppp ppp_layer + sppp ppp_layer pf .include diff --git a/sys/net/bpf.h b/sys/net/bpf.h index 295b7e63a9..6624029d41 100644 --- a/sys/net/bpf.h +++ b/sys/net/bpf.h @@ -39,7 +39,7 @@ * @(#)bpf.h 1.34 (LBL) 6/16/96 * * $FreeBSD: src/sys/net/bpf.h,v 1.21.2.4 2002/07/05 14:40:00 fenner Exp $ - * $DragonFly: src/sys/net/bpf.h,v 1.5 2004/07/07 15:16:04 joerg Exp $ + * $DragonFly: src/sys/net/bpf.h,v 1.6 2004/09/19 22:32:47 joerg Exp $ */ #ifndef _NET_BPF_H_ @@ -269,6 +269,11 @@ struct bpf_hdr { */ #define DLT_AIRONET_HEADER 120 +/* + * Reserved for use by OpenBSD's pfsync device. + */ +#define DLT_PFSYNC 121 + /* * The instruction encodings. */ diff --git a/sys/net/if_types.h b/sys/net/if_types.h index 3153ddb1af..2b9dc04a95 100644 --- a/sys/net/if_types.h +++ b/sys/net/if_types.h @@ -32,7 +32,7 @@ * * @(#)if_types.h 8.3 (Berkeley) 4/28/95 * $FreeBSD: src/sys/net/if_types.h,v 1.8.2.4 2002/12/23 23:02:21 kbyanc Exp $ - * $DragonFly: src/sys/net/if_types.h,v 1.2 2003/06/17 04:28:48 dillon Exp $ + * $DragonFly: src/sys/net/if_types.h,v 1.3 2004/09/19 22:32:47 joerg Exp $ * $NetBSD: if_types.h,v 1.16 2000/04/19 06:30:53 itojun Exp $ */ @@ -249,4 +249,6 @@ #define IFT_PVC 0xf1 #define IFT_FAITH 0xf2 #define IFT_STF 0xf3 +#define IFT_PFLOG 0xf5 /* Packet filter logging */ +#define IFT_PFSYNC 0xf6 /* Packet filter state syncing */ #endif /* !_NET_IF_TYPES_H_ */ diff --git a/sys/net/pf/Makefile b/sys/net/pf/Makefile new file mode 100644 index 0000000000..e1b288a484 --- /dev/null +++ b/sys/net/pf/Makefile @@ -0,0 +1,33 @@ +# $DragonFly: src/sys/net/pf/Makefile,v 1.1 2004/09/19 22:32:47 joerg Exp $ + +KMOD= pf +SRCS= if_pflog.c pf.c pf_if.c pf_ioctl.c pf_norm.c pf_osfp.c pf_subr.c +SRCS+= pf_table.c +SRCS+= use_pflog.h use_pfsync.h opt_inet.h opt_inet6.h use_bpf.h +SRCS+= opt_icmp_bandlim.h +CLEANFILES= use_pflog.h use_pfsync.h use_bpf.h +NOMAN= +CFLAGS+= -I${.CURDIR}/../../contrib/pf + +use_pflog.h: + echo "#define NPFLOG 1" > ${.TARGET} + +use_pfsync.h: +# .PATH searches only work for targes without source +.if !exists(use_pfsync.h) + echo "#define NPFSYNC 0" > ${.TARGET} +.endif + +opt_inet.h: + echo "#define INET 1" > ${.TARGET} + +opt_inet6.h: +# .PATH searches only work for targes without source +.if !exists(opt_inet6.h) + echo "#define INET6 1" > ${.TARGET} +.endif + +use_bpf.h: + echo "#define NBPF 1" > ${.TARGET} + +.include diff --git a/sys/net/pf/if_pflog.c b/sys/net/pf/if_pflog.c new file mode 100644 index 0000000000..2273edde30 --- /dev/null +++ b/sys/net/pf/if_pflog.c @@ -0,0 +1,284 @@ +/* $FreeBSD: src/sys/contrib/pf/net/if_pflog.c,v 1.9 2004/06/22 20:13:24 brooks Exp $ */ +/* $OpenBSD: if_pflog.c,v 1.11 2003/12/31 11:18:25 cedric Exp $ */ +/* $DragonFly: src/sys/net/pf/if_pflog.c,v 1.1 2004/09/19 22:32:47 joerg Exp $ */ + +/* + * The authors of this code are John Ioannidis (ji@tla.org), + * Angelos D. Keromytis (kermit@csd.uch.gr) and + * Niels Provos (provos@physnet.uni-hamburg.de). + * + * This code was written by John Ioannidis for BSD/OS in Athens, Greece, + * in November 1995. + * + * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996, + * by Angelos D. Keromytis. + * + * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis + * and Niels Provos. + * + * Copyright (C) 1995, 1996, 1997, 1998 by John Ioannidis, Angelos D. Keromytis + * and Niels Provos. + * Copyright (c) 2001, Angelos D. Keromytis, Niels Provos. + * + * Copyright (c) 2004 The DragonFly Project. All rights reserved. + * + * Permission to use, copy, and modify this software with or without fee + * is hereby granted, provided that this entire notice is included in + * all copies of any software which is or includes a copy or + * modification of this software. + * You may use this code under the GNU public license if you so wish. Please + * contribute changes back to the authors under this freer than GPL license + * so that we may further the use of strong encryption without limitations to + * all. + * + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE + * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR + * PURPOSE. + */ + +#include "opt_inet.h" +#include "opt_inet6.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#ifdef INET +#include +#include +#include +#include +#endif + +#ifdef INET6 +#ifndef INET +#include +#endif +#include +#endif /* INET6 */ + +#include +#include + +#define PFLOGNAME "pflog" + +#define PFLOGMTU (32768 + MHLEN + MLEN) + +#ifdef PFLOGDEBUG +#define DPRINTF(x) do { if (pflogdebug) printf x ; } while (0) +#else +#define DPRINTF(x) +#endif + + +static void pflog_clone_destroy(struct ifnet *); +static int pflog_clone_create(struct if_clone *, int); +int pflogoutput(struct ifnet *, struct mbuf *, struct sockaddr *, + struct rtentry *); +int pflogioctl(struct ifnet *, u_long, caddr_t, struct ucred *); +void pflogrtrequest(int, struct rtentry *, struct sockaddr *); +void pflogstart(struct ifnet *); + +static MALLOC_DEFINE(M_PFLOG, PFLOGNAME, "Packet Filter Logging Interface"); +static LIST_HEAD(pflog_list, pflog_softc) pflog_list; +struct if_clone pflog_cloner = IF_CLONE_INITIALIZER("pflog", pflog_clone_create, + pflog_clone_destroy, 1, 1); + +static void +pflog_clone_destroy(struct ifnet *ifp) +{ + struct pflog_softc *sc; + + sc = ifp->if_softc; + + /* + * Do we really need this? + */ + IF_DRAIN(&ifp->if_snd); + + bpfdetach(ifp); + if_detach(ifp); + LIST_REMOVE(sc, sc_next); + free(sc, M_PFLOG); +} + +static int +pflog_clone_create(struct if_clone *ifc, int unit) +{ + struct pflog_softc *sc; + + MALLOC(sc, struct pflog_softc *, sizeof(*sc), M_PFLOG, M_WAITOK|M_ZERO); + + if_initname(&sc->sc_if, ifc->ifc_name, unit); + sc->sc_if.if_mtu = PFLOGMTU; + sc->sc_if.if_ioctl = pflogioctl; + sc->sc_if.if_output = pflogoutput; + sc->sc_if.if_start = pflogstart; + sc->sc_if.if_type = IFT_PFLOG; + sc->sc_if.if_snd.ifq_maxlen = ifqmaxlen; + sc->sc_if.if_hdrlen = PFLOG_HDRLEN; + sc->sc_if.if_softc = sc; + if_attach(&sc->sc_if); + + LIST_INSERT_HEAD(&pflog_list, sc, sc_next); + bpfattach(&sc->sc_if, DLT_PFLOG, PFLOG_HDRLEN); + + return (0); +} + +/* + * Start output on the pflog interface. + */ +void +pflogstart(struct ifnet *ifp) +{ + int s; + + s = splimp(); + IF_DROP(&ifp->if_snd); + IF_DRAIN(&ifp->if_snd); + splx(s); +} + +int +pflogoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, + struct rtentry *rt) +{ + m_freem(m); + return (0); +} + +/* ARGSUSED */ +void +pflogrtrequest(int cmd, struct rtentry *rt, struct sockaddr *sa) +{ + if (rt) + rt->rt_rmx.rmx_mtu = PFLOGMTU; +} + +/* ARGSUSED */ +int +pflogioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr) +{ + switch (cmd) { + case SIOCSIFADDR: + case SIOCAIFADDR: + case SIOCSIFDSTADDR: + case SIOCSIFFLAGS: + if (ifp->if_flags & IFF_UP) + ifp->if_flags |= IFF_RUNNING; + else + ifp->if_flags &= ~IFF_RUNNING; + break; + default: + return (EINVAL); + } + + return (0); +} + +int +pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, + u_int8_t reason, struct pf_rule *rm, struct pf_rule *am, + struct pf_ruleset *ruleset) +{ + struct ifnet *ifn; + struct pfloghdr hdr; + struct mbuf m1; + + if (kif == NULL || m == NULL || rm == NULL) + return (-1); + + bzero(&hdr, sizeof(hdr)); + hdr.length = PFLOG_REAL_HDRLEN; + hdr.af = af; + hdr.action = rm->action; + hdr.reason = reason; + memcpy(hdr.ifname, kif->pfik_name, sizeof(hdr.ifname)); + + if (am == NULL) { + hdr.rulenr = htonl(rm->nr); + hdr.subrulenr = -1; + } else { + hdr.rulenr = htonl(am->nr); + hdr.subrulenr = htonl(rm->nr); + if (ruleset != NULL) + memcpy(hdr.ruleset, ruleset->name, + sizeof(hdr.ruleset)); + + + } + hdr.dir = dir; + +#ifdef INET + if (af == AF_INET && dir == PF_OUT) { + struct ip *ip; + + ip = mtod(m, struct ip *); + ip->ip_sum = 0; + ip->ip_sum = in_cksum(m, ip->ip_hl << 2); + } +#endif /* INET */ + + m1.m_next = m; + m1.m_len = PFLOG_HDRLEN; + m1.m_data = (char *) &hdr; + + KASSERT((!LIST_EMPTY(&pflog_list)), ("pflog: no interface")); + ifn = &LIST_FIRST(&pflog_list)->sc_if; + + BPF_MTAP(ifn, &m1); + + return (0); +} + +static int +pflog_modevent(module_t mod, int type, void *data) +{ + int error = 0; + + switch (type) { + case MOD_LOAD: + LIST_INIT(&pflog_list); + if_clone_attach(&pflog_cloner); + break; + + case MOD_UNLOAD: + if_clone_detach(&pflog_cloner); + while (!LIST_EMPTY(&pflog_list)) + pflog_clone_destroy( + &LIST_FIRST(&pflog_list)->sc_if); + break; + + default: + error = EINVAL; + break; + } + + return error; +} + +static moduledata_t pflog_mod = { + "pflog", + pflog_modevent, + 0 +}; + +#define PFLOG_MODVER 1 + +DECLARE_MODULE(pflog, pflog_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); +MODULE_VERSION(pflog, PFLOG_MODVER); diff --git a/sys/net/pf/if_pflog.h b/sys/net/pf/if_pflog.h new file mode 100644 index 0000000000..6eeb1b0805 --- /dev/null +++ b/sys/net/pf/if_pflog.h @@ -0,0 +1,83 @@ +/* $FreeBSD: src/sys/contrib/pf/net/if_pflog.h,v 1.4 2004/06/16 23:24:00 mlaier Exp $ */ +/* $OpenBSD: if_pflog.h,v 1.10 2004/03/19 04:52:04 frantzen Exp $ */ +/* $DragonFly: src/sys/net/pf/if_pflog.h,v 1.1 2004/09/19 22:32:47 joerg Exp $ */ + +/* + * Copyright (c) 2004 The DragonFly Project. All rights reserved. + * + * Copyright 2001 Niels Provos + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _NET_IF_PFLOG_H_ +#define _NET_IF_PFLOG_H_ + +struct pflog_softc { + struct ifnet sc_if; /* the interface */ + LIST_ENTRY(pflog_softc) sc_next; +}; + +/* XXX keep in sync with pfvar.h */ +#ifndef PF_RULESET_NAME_SIZE +#define PF_RULESET_NAME_SIZE 16 +#endif + +struct pfloghdr { + u_int8_t length; + sa_family_t af; + u_int8_t action; + u_int8_t reason; + char ifname[IFNAMSIZ]; + char ruleset[PF_RULESET_NAME_SIZE]; + u_int32_t rulenr; + u_int32_t subrulenr; + u_int8_t dir; + u_int8_t pad[3]; +}; + +#define PFLOG_HDRLEN sizeof(struct pfloghdr) +/* minus pad, also used as a signature */ +#define PFLOG_REAL_HDRLEN offsetof(struct pfloghdr, pad) + +/* XXX remove later when old format logs are no longer needed */ +struct old_pfloghdr { + u_int32_t af; + char ifname[IFNAMSIZ]; + short rnr; + u_short reason; + u_short action; + u_short dir; +}; +#define OLD_PFLOG_HDRLEN sizeof(struct old_pfloghdr) + +#ifdef _KERNEL + +#include "use_pflog.h" + +#if NPFLOG > 0 +#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g) pflog_packet(i,a,b,c,d,e,f,g) +#else +#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g) ((void)0) +#endif /* NPFLOG > 0 */ +#endif /* _KERNEL */ +#endif /* _NET_IF_PFLOG_H_ */ diff --git a/sys/net/pf/if_pfsync.c b/sys/net/pf/if_pfsync.c new file mode 100644 index 0000000000..f32637f1cb --- /dev/null +++ b/sys/net/pf/if_pfsync.c @@ -0,0 +1,1313 @@ +/* $FreeBSD: src/sys/contrib/pf/net/if_pfsync.c,v 1.11 2004/08/14 15:32:40 dwmalone Exp $ */ +/* $OpenBSD: if_pfsync.c,v 1.26 2004/03/28 18:14:20 mcbride Exp $ */ +/* $DragonFly: src/sys/net/pf/if_pfsync.c,v 1.1 2004/09/19 22:32:47 joerg Exp $ */ + +/* + * Copyright (c) 2004 The DragonFly Project. All rights reserved. + * + * Copyright (c) 2002 Michael Shalayeff + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "opt_inet.h" +#include "opt_inet6.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#ifdef INET +#include +#include +#include +#include +#include +#endif + +#ifdef INET6 +#ifndef INET +#include +#endif +#include +#endif /* INET6 */ + +#include +#include + +#define PFSYNCNAME "pfsync" + +#define PFSYNC_MINMTU \ + (sizeof(struct pfsync_header) + sizeof(struct pf_state)) + +#ifdef PFSYNCDEBUG +#define DPRINTF(x) do { if (pfsyncdebug) printf x ; } while (0) +int pfsyncdebug; +#else +#define DPRINTF(x) +#endif + +int pfsync_sync_ok; +struct pfsyncstats pfsyncstats; + +static void pfsync_clone_destroy(struct ifnet *); +static int pfsync_clone_create(struct if_clone *, int); +void pfsync_setmtu(struct pfsync_softc *, int); +int pfsync_insert_net_state(struct pfsync_state *); +int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, + struct rtentry *); +int pfsyncioctl(struct ifnet *, u_long, caddr_t, struct ucred *); +void pfsyncstart(struct ifnet *); + +struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **); +int pfsync_request_update(struct pfsync_state_upd *, struct in_addr *); +int pfsync_sendout(struct pfsync_softc *); +void pfsync_timeout(void *); +void pfsync_send_bus(struct pfsync_softc *, u_int8_t); +void pfsync_bulk_update(void *); +void pfsync_bulkfail(void *); + +static MALLOC_DEFINE(M_PFSYNC, PFSYNCNAME, "Packet Filter State Sync. Interface"); +static LIST_HEAD(pfsync_list, pfsync_softc) pfsync_list; +struct if_clone pfsync_cloner = IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, + pfsync_clone_destroy, 1, 1); + +static void +pfsync_clone_destroy(struct ifnet *ifp) +{ + struct pfsync_softc *sc; + + sc = ifp->if_softc; + callout_stop(&sc->sc_tmo); + callout_stop(&sc->sc_bulk_tmo); + callout_stop(&sc->sc_bulkfail_tmo); + + bpfdetach(ifp); + if_detach(ifp); + LIST_REMOVE(sc, sc_next); + free(sc, M_PFSYNC); +} + +static int +pfsync_clone_create(struct if_clone *ifc, int unit) +{ + struct pfsync_softc *sc; + struct ifnet *ifp; + + MALLOC(sc, struct pfsync_softc *, sizeof(*sc), M_PFSYNC, + M_WAITOK|M_ZERO); + + pfsync_sync_ok = 1; + sc->sc_mbuf = NULL; + sc->sc_mbuf_net = NULL; + sc->sc_statep.s = NULL; + sc->sc_statep_net.s = NULL; + sc->sc_maxupdates = 128; + sc->sc_sendaddr.s_addr = htonl(INADDR_PFSYNC_GROUP); + sc->sc_ureq_received = 0; + sc->sc_ureq_sent = 0; + + ifp = &sc->sc_if; + if_initname(ifp, ifc->ifc_name, unit); + ifp->if_ioctl = pfsyncioctl; + ifp->if_output = pfsyncoutput; + ifp->if_start = pfsyncstart; + ifp->if_type = IFT_PFSYNC; + ifp->if_snd.ifq_maxlen = ifqmaxlen; + ifp->if_hdrlen = PFSYNC_HDRLEN; + ifp->if_baudrate = IF_Mbps(100); + ifp->if_softc = sc; + pfsync_setmtu(sc, MCLBYTES); + callout_init(&sc->sc_tmo); + callout_init(&sc->sc_bulk_tmo); + callout_init(&sc->sc_bulkfail_tmo); + if_attach(&sc->sc_if); + + LIST_INSERT_HEAD(&pfsync_list, sc, sc_next); + bpfattach(&sc->sc_if, DLT_PFSYNC, PFSYNC_HDRLEN); + + return (0); +} + +/* + * Start output on the pfsync interface. + */ +void +pfsyncstart(struct ifnet *ifp) +{ + int s; + + s = splimp(); + IF_DROP(&ifp->if_snd); + IF_DRAIN(&ifp->if_snd); + splx(s); +} + +int +pfsync_insert_net_state(struct pfsync_state *sp) +{ + struct pf_state *st = NULL; + struct pf_rule *r = NULL; + struct pfi_kif *kif; + + if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) { + printf("pfsync_insert_net_state: invalid creator id:" + " %08" PRIx32 "\n", ntohl(sp->creatorid)); + return (EINVAL); + } + + kif = pfi_lookup_create(sp->ifname); + if (kif == NULL) { + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync_insert_net_state: " + "unknown interface: %s\n", sp->ifname); + /* skip this state */ + return (0); + } + + /* + * Just use the default rule until we have infrastructure to find the + * best matching rule. + */ + r = &pf_default_rule; + + if (!r->max_states || r->states < r->max_states) + st = pool_get(&pf_state_pl, PR_NOWAIT); + if (st == NULL) { + pfi_maybe_destroy(kif); + return (ENOMEM); + } + bzero(st, sizeof(*st)); + + st->rule.ptr = r; + /* XXX get pointers to nat_rule and anchor */ + + /* fill in the rest of the state entry */ + pf_state_host_ntoh(&sp->lan, &st->lan); + pf_state_host_ntoh(&sp->gwy, &st->gwy); + pf_state_host_ntoh(&sp->ext, &st->ext); + + pf_state_peer_ntoh(&sp->src, &st->src); + pf_state_peer_ntoh(&sp->dst, &st->dst); + + bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); + st->creation = ntohl(sp->creation) + time_second; + st->expire = ntohl(sp->expire) + time_second; + + st->af = sp->af; + st->proto = sp->proto; + st->direction = sp->direction; + st->log = sp->log; + st->timeout = sp->timeout; + st->allow_opts = sp->allow_opts; + + bcopy(sp->id, &st->id, sizeof(st->id)); + st->creatorid = sp->creatorid; + st->sync_flags = sp->sync_flags | PFSTATE_FROMSYNC; + + + if (pf_insert_state(kif, st)) { + pfi_maybe_destroy(kif); + pool_put(&pf_state_pl, st); + return (EINVAL); + } + + return (0); +} + +void +pfsync_input(struct mbuf *m, ...) +{ + struct ip *ip = mtod(m, struct ip *); + struct pfsync_header *ph; + struct pfsync_softc *sc = LIST_FIRST(&pfsync_list); + struct pf_state *st, key; + struct pfsync_state *sp; + struct pfsync_state_upd *up; + struct pfsync_state_del *dp; + struct pfsync_state_clr *cp; + struct pfsync_state_upd_req *rup; + struct pfsync_state_bus *bus; + struct in_addr src; + struct mbuf *mp; + int iplen, action, error, i, s, count, offp; + + pfsyncstats.pfsyncs_ipackets++; + + /* verify that we have a sync interface configured */ + if (!sc->sc_sync_ifp || !pf_status.running) + goto done; + + /* verify that the packet came in on the right interface */ + if (sc->sc_sync_ifp != m->m_pkthdr.rcvif) { + pfsyncstats.pfsyncs_badif++; + goto done; + } + + /* verify that the IP TTL is 255. */ + if (ip->ip_ttl != PFSYNC_DFLTTL) { + pfsyncstats.pfsyncs_badttl++; + goto done; + } + + iplen = ip->ip_hl << 2; + + if (m->m_pkthdr.len < iplen + sizeof(*ph)) { + pfsyncstats.pfsyncs_hdrops++; + goto done; + } + + if (iplen + sizeof(*ph) > m->m_len) { + if ((m = m_pullup(m, iplen + sizeof(*ph))) == NULL) { + pfsyncstats.pfsyncs_hdrops++; + goto done; + } + ip = mtod(m, struct ip *); + } + ph = (struct pfsync_header *)((char *)ip + iplen); + + /* verify the version */ + if (ph->version != PFSYNC_VERSION) { + pfsyncstats.pfsyncs_badver++; + goto done; + } + + action = ph->action; + count = ph->count; + + /* make sure it's a valid action code */ + if (action >= PFSYNC_ACT_MAX) { + pfsyncstats.pfsyncs_badact++; + goto done; + } + + /* Cheaper to grab this now than having to mess with mbufs later */ + src = ip->ip_src; + + switch (action) { + case PFSYNC_ACT_CLR: { + struct pfi_kif *kif; + u_int32_t creatorid; + if ((mp = m_pulldown(m, iplen + sizeof(*ph), + sizeof(*cp), &offp)) == NULL) { + pfsyncstats.pfsyncs_badlen++; + return; + } + cp = (struct pfsync_state_clr *)(mp->m_data + offp); + creatorid = cp->creatorid; + + s = splsoftnet(); + if (cp->ifname[0] == '\0') { + RB_FOREACH(st, pf_state_tree_id, &tree_id) { + if (st->creatorid == creatorid) + st->timeout = PFTM_PURGE; + } + } else { + kif = pfi_lookup_if(cp->ifname); + if (kif == NULL) { + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync_input: PFSYNC_ACT_CLR " + "bad interface: %s\n", cp->ifname); + splx(s); + goto done; + } + RB_FOREACH(st, pf_state_tree_lan_ext, + &kif->pfik_lan_ext) { + if (st->creatorid == creatorid) + st->timeout = PFTM_PURGE; + } + } + pf_purge_expired_states(); + splx(s); + + break; + } + case PFSYNC_ACT_INS: + if ((mp = m_pulldown(m, iplen + sizeof(*ph), + count * sizeof(*sp), &offp)) == NULL) { + pfsyncstats.pfsyncs_badlen++; + return; + } + + s = splsoftnet(); + for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); + i < count; i++, sp++) { + /* check for invalid values */ + if (sp->timeout >= PFTM_MAX || + sp->src.state > PF_TCPS_PROXY_DST || + sp->dst.state > PF_TCPS_PROXY_DST || + sp->direction > PF_OUT || + (sp->af != AF_INET && sp->af != AF_INET6)) { + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync_insert: PFSYNC_ACT_INS: " + "invalid value\n"); + pfsyncstats.pfsyncs_badstate++; + continue; + } + + if ((error = pfsync_insert_net_state(sp))) { + if (error == ENOMEM) { + splx(s); + goto done; + } + continue; + } + } + splx(s); + break; + case PFSYNC_ACT_UPD: + if ((mp = m_pulldown(m, iplen + sizeof(*ph), + count * sizeof(*sp), &offp)) == NULL) { + pfsyncstats.pfsyncs_badlen++; + return; + } + + s = splsoftnet(); + for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); + i < count; i++, sp++) { + /* check for invalid values */ + if (sp->timeout >= PFTM_MAX || + sp->src.state > PF_TCPS_PROXY_DST || + sp->dst.state > PF_TCPS_PROXY_DST) { + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync_insert: PFSYNC_ACT_UPD: " + "invalid value\n"); + pfsyncstats.pfsyncs_badstate++; + continue; + } + + bcopy(sp->id, &key.id, sizeof(key.id)); + key.creatorid = sp->creatorid; + + st = pf_find_state_byid(&key); + if (st == NULL) { + /* insert the update */ + if (pfsync_insert_net_state(sp)) + pfsyncstats.pfsyncs_badstate++; + continue; + } + pf_state_peer_ntoh(&sp->src, &st->src); + pf_state_peer_ntoh(&sp->dst, &st->dst); + st->expire = ntohl(sp->expire) + time_second; + st->timeout = sp->timeout; + + } + splx(s); + break; + /* + * It's not strictly necessary for us to support the "uncompressed" + * delete action, but it's relatively simple and maintains consistency. + */ + case PFSYNC_ACT_DEL: + if ((mp = m_pulldown(m, iplen + sizeof(*ph), + count * sizeof(*sp), &offp)) == NULL) { + pfsyncstats.pfsyncs_badlen++; + return; + } + + s = splsoftnet(); + for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); + i < count; i++, sp++) { + bcopy(sp->id, &key.id, sizeof(key.id)); + key.creatorid = sp->creatorid; + + st = pf_find_state_byid(&key); + if (st == NULL) { + pfsyncstats.pfsyncs_badstate++; + continue; + } + /* + * XXX + * pf_purge_expired_states() is expensive, + * we really want to purge the state directly. + */ + st->timeout = PFTM_PURGE; + st->sync_flags |= PFSTATE_FROMSYNC; + } + pf_purge_expired_states(); + splx(s); + break; + case PFSYNC_ACT_UPD_C: { + int update_requested = 0; + + if ((mp = m_pulldown(m, iplen + sizeof(*ph), + count * sizeof(*up), &offp)) == NULL) { + pfsyncstats.pfsyncs_badlen++; + return; + } + + s = splsoftnet(); + for (i = 0, up = (struct pfsync_state_upd *)(mp->m_data + offp); + i < count; i++, up++) { + /* check for invalid values */ + if (up->timeout >= PFTM_MAX || + up->src.state > PF_TCPS_PROXY_DST || + up->dst.state > PF_TCPS_PROXY_DST) { + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync_insert: " + "PFSYNC_ACT_UPD_C: " + "invalid value\n"); + pfsyncstats.pfsyncs_badstate++; + continue; + } + + bcopy(up->id, &key.id, sizeof(key.id)); + key.creatorid = up->creatorid; + + st = pf_find_state_byid(&key); + if (st == NULL) { + /* We don't have this state. Ask for it. */ + pfsync_request_update(up, &src); + update_requested = 1; + pfsyncstats.pfsyncs_badstate++; + continue; + } + pf_state_peer_ntoh(&up->src, &st->src); + pf_state_peer_ntoh(&up->dst, &st->dst); + st->expire = ntohl(up->expire) + time_second; + st->timeout = up->timeout; + } + if (update_requested) + pfsync_sendout(sc); + splx(s); + break; + } + case PFSYNC_ACT_DEL_C: + if ((mp = m_pulldown(m, iplen + sizeof(*ph), + count * sizeof(*dp), &offp)) == NULL) { + pfsyncstats.pfsyncs_badlen++; + return; + } + + s = splsoftnet(); + for (i = 0, dp = (struct pfsync_state_del *)(mp->m_data + offp); + i < count; i++, dp++) { + bcopy(dp->id, &key.id, sizeof(key.id)); + key.creatorid = dp->creatorid; + + st = pf_find_state_byid(&key); + if (st == NULL) { + pfsyncstats.pfsyncs_badstate++; + continue; + } + /* + * XXX + * pf_purge_expired_states() is expensive, + * we really want to purge the state directly. + */ + st->timeout = PFTM_PURGE; + st->sync_flags |= PFSTATE_FROMSYNC; + } + pf_purge_expired_states(); + splx(s); + break; + case PFSYNC_ACT_INS_F: + case PFSYNC_ACT_DEL_F: + /* not implemented */ + break; + case PFSYNC_ACT_UREQ: + if ((mp = m_pulldown(m, iplen + sizeof(*ph), + count * sizeof(*rup), &offp)) == NULL) { + pfsyncstats.pfsyncs_badlen++; + return; + } + + s = splsoftnet(); + /* XXX send existing. pfsync_pack_state should handle this. */ + if (sc->sc_mbuf != NULL) + pfsync_sendout(sc); + for (i = 0, + rup = (struct pfsync_state_upd_req *)(mp->m_data + offp); + i < count; i++, rup++) { + bcopy(rup->id, &key.id, sizeof(key.id)); + key.creatorid = rup->creatorid; + + if (key.id == 0 && key.creatorid == 0) { + sc->sc_ureq_received = mycpu->gd_time_seconds; + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync: received " + "bulk update request\n"); + pfsync_send_bus(sc, PFSYNC_BUS_START); + callout_reset(&sc->sc_bulk_tmo, 1 * hz, + pfsync_bulk_update, + LIST_FIRST(&pfsync_list)); + } else { + st = pf_find_state_byid(&key); + if (st == NULL) { + pfsyncstats.pfsyncs_badstate++; + continue; + } + pfsync_pack_state(PFSYNC_ACT_UPD, st, 0); + } + } + if (sc->sc_mbuf != NULL) + pfsync_sendout(sc); + splx(s); + break; + case PFSYNC_ACT_BUS: + /* If we're not waiting for a bulk update, who cares. */ + if (sc->sc_ureq_sent == 0) + break; + + if ((mp = m_pulldown(m, iplen + sizeof(*ph), + sizeof(*bus), &offp)) == NULL) { + pfsyncstats.pfsyncs_badlen++; + return; + } + bus = (struct pfsync_state_bus *)(mp->m_data + offp); + switch (bus->status) { + case PFSYNC_BUS_START: + callout_reset(&sc->sc_bulkfail_tmo, + pf_pool_limits[PF_LIMIT_STATES].limit / + (PFSYNC_BULKPACKETS * sc->sc_maxcount), + pfsync_bulkfail, LIST_FIRST(&pfsync_list)); + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync: received bulk " + "update start\n"); + break; + case PFSYNC_BUS_END: + if (mycpu->gd_time_seconds - ntohl(bus->endtime) >= + sc->sc_ureq_sent) { + /* that's it, we're happy */ + sc->sc_ureq_sent = 0; + sc->sc_bulk_tries = 0; + callout_stop(&sc->sc_bulkfail_tmo); + pfsync_sync_ok = 1; + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync: received valid " + "bulk update end\n"); + } else { + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync: received invalid " + "bulk update end: bad timestamp\n"); + } + break; + } + break; + } + +done: + if (m) + m_freem(m); +} + +int +pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, + struct rtentry *rt) +{ + m_freem(m); + return (0); +} + +/* ARGSUSED */ +int +pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr) +{ + struct pfsync_softc *sc = ifp->if_softc; + struct ifreq *ifr = (struct ifreq *)data; + struct ip_moptions *imo = &sc->sc_imo; + struct pfsyncreq pfsyncr; + struct ifnet *sifp; + int s, error; + + switch (cmd) { + case SIOCSIFADDR: + case SIOCAIFADDR: + case SIOCSIFDSTADDR: + case SIOCSIFFLAGS: + if (ifp->if_flags & IFF_UP) + ifp->if_flags |= IFF_RUNNING; + else + ifp->if_flags &= ~IFF_RUNNING; + break; + case SIOCSIFMTU: + if (ifr->ifr_mtu < PFSYNC_MINMTU) + return (EINVAL); + if (ifr->ifr_mtu > MCLBYTES) + ifr->ifr_mtu = MCLBYTES; + s = splnet(); + if (ifr->ifr_mtu < ifp->if_mtu) + pfsync_sendout(sc); + pfsync_setmtu(sc, ifr->ifr_mtu); + splx(s); + break; + case SIOCGETPFSYNC: + bzero(&pfsyncr, sizeof(pfsyncr)); + if (sc->sc_sync_ifp) + strlcpy(pfsyncr.pfsyncr_syncif, + sc->sc_sync_ifp->if_xname, IFNAMSIZ); + pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; + if ((error = copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)))) + return (error); + break; + case SIOCSETPFSYNC: + if ((error = suser_cred(cr, NULL_CRED_OKAY)) != 0) + return (error); + if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) + return (error); + + if (pfsyncr.pfsyncr_maxupdates > 255) + return (EINVAL); + sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; + + if (pfsyncr.pfsyncr_syncif[0] == 0) { + sc->sc_sync_ifp = NULL; + if (sc->sc_mbuf_net != NULL) { + /* Don't keep stale pfsync packets around. */ + s = splnet(); + m_freem(sc->sc_mbuf_net); + sc->sc_mbuf_net = NULL; + sc->sc_statep_net.s = NULL; + splx(s); + } + break; + } + if ((sifp = ifunit(pfsyncr.pfsyncr_syncif)) == NULL) + return (EINVAL); + else if (sifp == sc->sc_sync_ifp) + break; + + s = splnet(); + if (sifp->if_mtu < sc->sc_if.if_mtu || + (sc->sc_sync_ifp != NULL && + sifp->if_mtu < sc->sc_sync_ifp->if_mtu) || + sifp->if_mtu < MCLBYTES - sizeof(struct ip)) + pfsync_sendout(sc); + sc->sc_sync_ifp = sifp; + + pfsync_setmtu(sc, sc->sc_if.if_mtu); + + if (imo->imo_num_memberships > 0) { + in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); + imo->imo_multicast_ifp = NULL; + } + + if (sc->sc_sync_ifp) { + struct in_addr addr; + + addr.s_addr = htonl(INADDR_PFSYNC_GROUP); + /* XXX do we only use one group? Also see above */ + if ((imo->imo_membership[0] = + in_addmulti(&addr, sc->sc_sync_ifp)) == NULL) { + splx(s); + return (ENOBUFS); + } + imo->imo_num_memberships++; + imo->imo_multicast_ifp = sc->sc_sync_ifp; + imo->imo_multicast_ttl = PFSYNC_DFLTTL; + imo->imo_multicast_loop = 0; + + /* Request a full state table update. */ + sc->sc_ureq_sent = mycpu->gd_time_seconds; + pfsync_sync_ok = 0; + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync: requesting bulk update\n"); + callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, + pfsync_bulkfail, LIST_FIRST(&pfsync_list)); + pfsync_request_update(NULL, NULL); + pfsync_sendout(sc); + } + splx(s); + + break; + + default: + return (ENOTTY); + } + + return (0); +} + +void +pfsync_setmtu(struct pfsync_softc *sc, int mtu_req) +{ + int mtu; + + if (sc->sc_sync_ifp && sc->sc_sync_ifp->if_mtu < mtu_req) + mtu = sc->sc_sync_ifp->if_mtu; + else + mtu = mtu_req; + + sc->sc_maxcount = (mtu - sizeof(struct pfsync_header)) / + sizeof(struct pfsync_state); + if (sc->sc_maxcount > 254) + sc->sc_maxcount = 254; + sc->sc_if.if_mtu = sizeof(struct pfsync_header) + + sc->sc_maxcount * sizeof(struct pfsync_state); +} + +struct mbuf * +pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp) +{ + struct pfsync_header *h; + struct mbuf *m; + int len; + + MGETHDR(m, MB_DONTWAIT, MT_DATA); + if (m == NULL) { + sc->sc_if.if_oerrors++; + return (NULL); + } + + switch (action) { + case PFSYNC_ACT_CLR: + len = sizeof(struct pfsync_header) + + sizeof(struct pfsync_state_clr); + break; + case PFSYNC_ACT_UPD_C: + len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd)) + + sizeof(struct pfsync_header); + break; + case PFSYNC_ACT_DEL_C: + len = (sc->sc_maxcount * sizeof(struct pfsync_state_del)) + + sizeof(struct pfsync_header); + break; + case PFSYNC_ACT_UREQ: + len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd_req)) + + sizeof(struct pfsync_header); + break; + case PFSYNC_ACT_BUS: + len = sizeof(struct pfsync_header) + + sizeof(struct pfsync_state_bus); + break; + default: + len = (sc->sc_maxcount * sizeof(struct pfsync_state)) + + sizeof(struct pfsync_header); + break; + } + + if (len > MHLEN) { + MCLGET(m, MB_DONTWAIT); + if ((m->m_flags & M_EXT) == 0) { + m_free(m); + sc->sc_if.if_oerrors++; + return (NULL); + } + m->m_data += (MCLBYTES - len) &~ (sizeof(long) - 1); + } else + MH_ALIGN(m, len); + + m->m_pkthdr.rcvif = NULL; + m->m_pkthdr.len = m->m_len = sizeof(struct pfsync_header); + h = mtod(m, struct pfsync_header *); + h->version = PFSYNC_VERSION; + h->af = 0; + h->count = 0; + h->action = action; + + *sp = (void *)((char *)h + PFSYNC_HDRLEN); + callout_reset(&sc->sc_tmo, hz, pfsync_timeout, + LIST_FIRST(&pfsync_list)); + return (m); +} + +int +pfsync_pack_state(u_int8_t action, struct pf_state *st, int compress) +{ + struct ifnet *ifp = &(LIST_FIRST(&pfsync_list))->sc_if; + struct pfsync_softc *sc = ifp->if_softc; + struct pfsync_header *h, *h_net; + struct pfsync_state *sp = NULL; + struct pfsync_state_upd *up = NULL; + struct pfsync_state_del *dp = NULL; + struct pf_rule *r; + u_long secs; + int s, ret = 0; + u_int8_t i = 255, newaction = 0; + + /* + * If a packet falls in the forest and there's nobody around to + * hear, does it make a sound? + */ + if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL) { + /* Don't leave any stale pfsync packets hanging around. */ + if (sc->sc_mbuf != NULL) { + m_freem(sc->sc_mbuf); + sc->sc_mbuf = NULL; + sc->sc_statep.s = NULL; + } + return (0); + } + + if (action >= PFSYNC_ACT_MAX) + return (EINVAL); + + s = splnet(); + if (sc->sc_mbuf == NULL) { + if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action, + (void *)&sc->sc_statep.s)) == NULL) { + splx(s); + return (ENOMEM); + } + h = mtod(sc->sc_mbuf, struct pfsync_header *); + } else { + h = mtod(sc->sc_mbuf, struct pfsync_header *); + if (h->action != action) { + pfsync_sendout(sc); + if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action, + (void *)&sc->sc_statep.s)) == NULL) { + splx(s); + return (ENOMEM); + } + h = mtod(sc->sc_mbuf, struct pfsync_header *); + } else { + /* + * If it's an update, look in the packet to see if + * we already have an update for the state. + */ + if (action == PFSYNC_ACT_UPD && sc->sc_maxupdates) { + struct pfsync_state *usp = + (void *)((char *)h + PFSYNC_HDRLEN); + + for (i = 0; i < h->count; i++) { + if (!memcmp(usp->id, &st->id, + PFSYNC_ID_LEN) && + usp->creatorid == st->creatorid) { + sp = usp; + sp->updates++; + break; + } + usp++; + } + } + } + } + + secs = time_second; + + st->pfsync_time = mycpu->gd_time_seconds; + TAILQ_REMOVE(&state_updates, st, u.s.entry_updates); + TAILQ_INSERT_TAIL(&state_updates, st, u.s.entry_updates); + + if (sp == NULL) { + /* not a "duplicate" update */ + i = 255; + sp = sc->sc_statep.s++; + sc->sc_mbuf->m_pkthdr.len = + sc->sc_mbuf->m_len += sizeof(struct pfsync_state); + h->count++; + bzero(sp, sizeof(*sp)); + + bcopy(&st->id, sp->id, sizeof(sp->id)); + sp->creatorid = st->creatorid; + + strlcpy(sp->ifname, st->u.s.kif->pfik_name, sizeof(sp->ifname)); + pf_state_host_hton(&st->lan, &sp->lan); + pf_state_host_hton(&st->gwy, &sp->gwy); + pf_state_host_hton(&st->ext, &sp->ext); + + bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); + + sp->creation = htonl(secs - st->creation); + sp->packets[0] = htonl(st->packets[0]); + sp->packets[1] = htonl(st->packets[1]); + sp->bytes[0] = htonl(st->bytes[0]); + sp->bytes[1] = htonl(st->bytes[1]); + if ((r = st->rule.ptr) == NULL) + sp->rule = htonl(-1); + else + sp->rule = htonl(r->nr); + if ((r = st->anchor.ptr) == NULL) + sp->anchor = htonl(-1); + else + sp->anchor = htonl(r->nr); + sp->af = st->af; + sp->proto = st->proto; + sp->direction = st->direction; + sp->log = st->log; + sp->allow_opts = st->allow_opts; + sp->timeout = st->timeout; + + sp->sync_flags = st->sync_flags & PFSTATE_NOSYNC; + } + + pf_state_peer_hton(&st->src, &sp->src); + pf_state_peer_hton(&st->dst, &sp->dst); + + if (st->expire <= secs) + sp->expire = htonl(0); + else + sp->expire = htonl(st->expire - secs); + + /* do we need to build "compressed" actions for network transfer? */ + if (sc->sc_sync_ifp && compress) { + switch (action) { + case PFSYNC_ACT_UPD: + newaction = PFSYNC_ACT_UPD_C; + break; + case PFSYNC_ACT_DEL: + newaction = PFSYNC_ACT_DEL_C; + break; + default: + /* by default we just send the uncompressed states */ + break; + } + } + + if (newaction) { + if (sc->sc_mbuf_net == NULL) { + if ((sc->sc_mbuf_net = pfsync_get_mbuf(sc, newaction, + (void *)&sc->sc_statep_net.s)) == NULL) { + splx(s); + return (ENOMEM); + } + } + h_net = mtod(sc->sc_mbuf_net, struct pfsync_header *); + + switch (newaction) { + case PFSYNC_ACT_UPD_C: + if (i != 255) { + up = (void *)((char *)h_net + + PFSYNC_HDRLEN + (i * sizeof(*up))); + up->updates++; + } else { + h_net->count++; + sc->sc_mbuf_net->m_pkthdr.len = + sc->sc_mbuf_net->m_len += sizeof(*up); + up = sc->sc_statep_net.u++; + + bzero(up, sizeof(*up)); + bcopy(&st->id, up->id, sizeof(up->id)); + up->creatorid = st->creatorid; + } + up->timeout = st->timeout; + up->expire = sp->expire; + up->src = sp->src; + up->dst = sp->dst; + break; + case PFSYNC_ACT_DEL_C: + sc->sc_mbuf_net->m_pkthdr.len = + sc->sc_mbuf_net->m_len += sizeof(*dp); + dp = sc->sc_statep_net.d++; + h_net->count++; + + bzero(dp, sizeof(*dp)); + bcopy(&st->id, dp->id, sizeof(dp->id)); + dp->creatorid = st->creatorid; + break; + } + } + + if (h->count == sc->sc_maxcount || + (sc->sc_maxupdates && (sp->updates >= sc->sc_maxupdates))) + ret = pfsync_sendout(sc); + + splx(s); + return (ret); +} + +/* This must be called in splnet() */ +int +pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src) +{ + struct ifnet *ifp = &(LIST_FIRST(&pfsync_list))->sc_if; + struct pfsync_header *h; + struct pfsync_softc *sc = ifp->if_softc; + struct pfsync_state_upd_req *rup; + int s = 0, ret = 0; + + if (sc->sc_mbuf == NULL) { + if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, + (void *)&sc->sc_statep.s)) == NULL) { + splx(s); + return (ENOMEM); + } + h = mtod(sc->sc_mbuf, struct pfsync_header *); + } else { + h = mtod(sc->sc_mbuf, struct pfsync_header *); + if (h->action != PFSYNC_ACT_UREQ) { + pfsync_sendout(sc); + if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, + (void *)&sc->sc_statep.s)) == NULL) { + splx(s); + return (ENOMEM); + } + h = mtod(sc->sc_mbuf, struct pfsync_header *); + } + } + + if (src != NULL) + sc->sc_sendaddr = *src; + sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*rup); + h->count++; + rup = sc->sc_statep.r++; + bzero(rup, sizeof(*rup)); + if (up != NULL) { + bcopy(up->id, rup->id, sizeof(rup->id)); + rup->creatorid = up->creatorid; + } + + if (h->count == sc->sc_maxcount) + ret = pfsync_sendout(sc); + + return (ret); +} + +int +pfsync_clear_states(u_int32_t creatorid, char *ifname) +{ + struct ifnet *ifp = &(LIST_FIRST(&pfsync_list))->sc_if; + struct pfsync_softc *sc = ifp->if_softc; + struct pfsync_state_clr *cp; + int s, ret; + + s = splnet(); + if (sc->sc_mbuf != NULL) + pfsync_sendout(sc); + if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR, + (void *)&sc->sc_statep.c)) == NULL) { + splx(s); + return (ENOMEM); + } + sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*cp); + cp = sc->sc_statep.c; + cp->creatorid = creatorid; + if (ifname != NULL) + strlcpy(cp->ifname, ifname, IFNAMSIZ); + + ret = (pfsync_sendout(sc)); + splx(s); + return (ret); +} + +void +pfsync_timeout(void *v) +{ + struct pfsync_softc *sc = v; + int s; + + s = splnet(); + pfsync_sendout(sc); + splx(s); +} + +void +pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status) +{ + struct pfsync_state_bus *bus; + + if (sc->sc_mbuf != NULL) + pfsync_sendout(sc); + + if (pfsync_sync_ok && + (sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_BUS, + (void *)&sc->sc_statep.b)) != NULL) { + sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*bus); + bus = sc->sc_statep.b; + bus->creatorid = pf_status.hostid; + bus->status = status; + bus->endtime = htonl(mycpu->gd_time_seconds - sc->sc_ureq_received); + pfsync_sendout(sc); + } +} + +void +pfsync_bulk_update(void *v) +{ + struct pfsync_softc *sc = v; + int s, i = 0; + struct pf_state *state; + + s = splnet(); + if (sc->sc_mbuf != NULL) + pfsync_sendout(sc); + + /* + * Grab at most PFSYNC_BULKPACKETS worth of states which have not + * been sent since the latest request was made. + */ + while ((state = TAILQ_FIRST(&state_updates)) != NULL && + ++i < (sc->sc_maxcount * PFSYNC_BULKPACKETS)) { + if (state->pfsync_time > sc->sc_ureq_received) { + /* we're done */ + pfsync_send_bus(sc, PFSYNC_BUS_END); + sc->sc_ureq_received = 0; + callout_stop(&sc->sc_bulk_tmo); + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync: bulk update complete\n"); + break; + } else { + /* send an update and move to end of list */ + if (!state->sync_flags) + pfsync_pack_state(PFSYNC_ACT_UPD, state, 0); + state->pfsync_time = mycpu->gd_time_seconds; + TAILQ_REMOVE(&state_updates, state, u.s.entry_updates); + TAILQ_INSERT_TAIL(&state_updates, state, + u.s.entry_updates); + + /* look again for more in a bit */ + callout_reset(&sc->sc_bulk_tmo, 1, pfsync_timeout, + LIST_FIRST(&pfsync_list)); + } + } + if (sc->sc_mbuf != NULL) + pfsync_sendout(sc); + splx(s); +} + +void +pfsync_bulkfail(void *v) +{ + struct pfsync_softc *sc = v; + + if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { + /* Try again in a bit */ + callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulkfail, + LIST_FIRST(&pfsync_list)); + pfsync_request_update(NULL, NULL); + pfsync_sendout(sc); + } else { + /* Pretend like the transfer was ok */ + sc->sc_ureq_sent = 0; + sc->sc_bulk_tries = 0; + pfsync_sync_ok = 1; + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync: failed to receive " + "bulk update status\n"); + callout_stop(&sc->sc_bulkfail_tmo); + } +} + +int +pfsync_sendout(sc) + struct pfsync_softc *sc; +{ + struct ifnet *ifp = &sc->sc_if; + struct mbuf *m; + + callout_stop(&sc->sc_tmo); + + if (sc->sc_mbuf == NULL) + return (0); + m = sc->sc_mbuf; + sc->sc_mbuf = NULL; + sc->sc_statep.s = NULL; + + KASSERT(m != NULL, ("pfsync_sendout: null mbuf")); + BPF_MTAP(ifp, m); + + if (sc->sc_mbuf_net) { + m_freem(m); + m = sc->sc_mbuf_net; + sc->sc_mbuf_net = NULL; + sc->sc_statep_net.s = NULL; + } + + if (sc->sc_sync_ifp) { + struct ip *ip; + struct ifaddr *ifa; + struct sockaddr sa; + + M_PREPEND(m, sizeof(struct ip), MB_DONTWAIT); + if (m == NULL) { + pfsyncstats.pfsyncs_onomem++; + return (0); + } + ip = mtod(m, struct ip *); + ip->ip_v = IPVERSION; + ip->ip_hl = sizeof(*ip) >> 2; + ip->ip_tos = IPTOS_LOWDELAY; + ip->ip_len = m->m_pkthdr.len; +#ifdef RANDOM_IP_ID + ip->ip_id = ip_randomid(); +#else + ip->ip_id = ntohs(ip_id++); +#endif + ip->ip_off = IP_DF; + ip->ip_ttl = PFSYNC_DFLTTL; + ip->ip_p = IPPROTO_PFSYNC; + ip->ip_sum = 0; + + bzero(&sa, sizeof(sa)); + sa.sa_family = AF_INET; + ifa = ifaof_ifpforaddr(&sa, sc->sc_sync_ifp); + if (ifa == NULL) + return (0); + ip->ip_src.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; + + if (sc->sc_sendaddr.s_addr == htonl(INADDR_PFSYNC_GROUP)) + m->m_flags |= M_MCAST; + ip->ip_dst = sc->sc_sendaddr; + sc->sc_sendaddr.s_addr = htonl(INADDR_PFSYNC_GROUP); + + pfsyncstats.pfsyncs_opackets++; + + if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) + pfsyncstats.pfsyncs_oerrors++; + } else + m_freem(m); + + return (0); +} + +static int +pfsync_modevent(module_t mod, int type, void *data) +{ + int error = 0; + + switch (type) { + case MOD_LOAD: + LIST_INIT(&pfsync_list); + if_clone_attach(&pfsync_cloner); + break; + + case MOD_UNLOAD: + if_clone_detach(&pfsync_cloner); + while (!LIST_EMPTY(&pfsync_list)) + pfsync_clone_destroy( + &LIST_FIRST(&pfsync_list)->sc_if); + break; + + default: + error = EINVAL; + break; + } + + return error; +} + +static moduledata_t pfsync_mod = { + "pfsync", + pfsync_modevent, + 0 +}; + +#define PFSYNC_MODVER 1 + +DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); +MODULE_VERSION(pfsync, PFSYNC_MODVER); diff --git a/sys/net/pf/if_pfsync.h b/sys/net/pf/if_pfsync.h new file mode 100644 index 0000000000..7d2c1807f0 --- /dev/null +++ b/sys/net/pf/if_pfsync.h @@ -0,0 +1,285 @@ +/* $FreeBSD: src/sys/contrib/pf/net/if_pfsync.h,v 1.4 2004/06/16 23:24:00 mlaier Exp $ */ +/* $OpenBSD: if_pfsync.h,v 1.13 2004/03/22 04:54:17 mcbride Exp $ */ +/* $DragonFly: src/sys/net/pf/if_pfsync.h,v 1.1 2004/09/19 22:32:47 joerg Exp $ */ + +/* + * Copyright (c) 2004 The DragonFly Project. All rights reserved. + * + * Copyright (c) 2001 Michael Shalayeff + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _NET_IF_PFSYNC_H_ +#define _NET_IF_PFSYNC_H_ + + +#define PFSYNC_ID_LEN sizeof(u_int64_t) + +struct pfsync_state_scrub { + u_int16_t pfss_flags; + u_int8_t pfss_ttl; /* stashed TTL */ + u_int8_t scrub_flag; + u_int32_t pfss_ts_mod; /* timestamp modulation */ +} __packed; + +struct pfsync_state_host { + struct pf_addr addr; + u_int16_t port; + u_int16_t pad[3]; +} __packed; + +struct pfsync_state_peer { + struct pfsync_state_scrub scrub; /* state is scrubbed */ + u_int32_t seqlo; /* Max sequence number sent */ + u_int32_t seqhi; /* Max the other end ACKd + win */ + u_int32_t seqdiff; /* Sequence number modulator */ + u_int16_t max_win; /* largest window (pre scaling) */ + u_int16_t mss; /* Maximum segment size option */ + u_int8_t state; /* active state level */ + u_int8_t wscale; /* window scaling factor */ + u_int8_t scrub_flag; + u_int8_t pad[5]; +} __packed; + +struct pfsync_state { + u_int32_t id[2]; + char ifname[IFNAMSIZ]; + struct pfsync_state_host lan; + struct pfsync_state_host gwy; + struct pfsync_state_host ext; + struct pfsync_state_peer src; + struct pfsync_state_peer dst; + struct pf_addr rt_addr; + u_int32_t rule; + u_int32_t anchor; + u_int32_t nat_rule; + u_int32_t creation; + u_int32_t expire; + u_int32_t packets[2]; + u_int32_t bytes[2]; + u_int32_t creatorid; + sa_family_t af; + u_int8_t proto; + u_int8_t direction; + u_int8_t log; + u_int8_t allow_opts; + u_int8_t timeout; + u_int8_t sync_flags; + u_int8_t updates; +} __packed; + +struct pfsync_state_upd { + u_int32_t id[2]; + struct pfsync_state_peer src; + struct pfsync_state_peer dst; + u_int32_t creatorid; + u_int32_t expire; + u_int8_t timeout; + u_int8_t updates; + u_int8_t pad[6]; +} __packed; + +struct pfsync_state_del { + u_int32_t id[2]; + u_int32_t creatorid; + struct { + u_int8_t state; + } src; + struct { + u_int8_t state; + } dst; + u_int8_t pad[2]; +} __packed; + +struct pfsync_state_upd_req { + u_int32_t id[2]; + u_int32_t creatorid; + u_int32_t pad; +} __packed; + +struct pfsync_state_clr { + char ifname[IFNAMSIZ]; + u_int32_t creatorid; + u_int32_t pad; +} __packed; + +struct pfsync_state_bus { + u_int32_t creatorid; + u_int32_t endtime; + u_int8_t status; +#define PFSYNC_BUS_START 1 +#define PFSYNC_BUS_END 2 + u_int8_t pad[7]; +} __packed; + +#ifdef _KERNEL + +union sc_statep { + struct pfsync_state *s; + struct pfsync_state_upd *u; + struct pfsync_state_del *d; + struct pfsync_state_clr *c; + struct pfsync_state_bus *b; + struct pfsync_state_upd_req *r; +}; + +extern int pfsync_sync_ok; + +struct pfsync_softc { + struct ifnet sc_if; + struct ifnet *sc_sync_ifp; + + struct ip_moptions sc_imo; + struct callout sc_tmo; + struct callout sc_bulk_tmo; + struct callout sc_bulkfail_tmo; + struct in_addr sc_sendaddr; + struct mbuf *sc_mbuf; /* current cummulative mbuf */ + struct mbuf *sc_mbuf_net; /* current cummulative mbuf */ + union sc_statep sc_statep; + union sc_statep sc_statep_net; + u_int32_t sc_ureq_received; + u_int32_t sc_ureq_sent; + int sc_bulk_tries; + int sc_maxcount; /* number of states in mtu */ + int sc_maxupdates; /* number of updates/state */ + LIST_ENTRY(pfsync_softc) sc_next; +}; +#endif + + +struct pfsync_header { + u_int8_t version; +#define PFSYNC_VERSION 2 + u_int8_t af; + u_int8_t action; +#define PFSYNC_ACT_CLR 0 /* clear all states */ +#define PFSYNC_ACT_INS 1 /* insert state */ +#define PFSYNC_ACT_UPD 2 /* update state */ +#define PFSYNC_ACT_DEL 3 /* delete state */ +#define PFSYNC_ACT_UPD_C 4 /* "compressed" state update */ +#define PFSYNC_ACT_DEL_C 5 /* "compressed" state delete */ +#define PFSYNC_ACT_INS_F 6 /* insert fragment */ +#define PFSYNC_ACT_DEL_F 7 /* delete fragments */ +#define PFSYNC_ACT_UREQ 8 /* request "uncompressed" state */ +#define PFSYNC_ACT_BUS 9 /* Bulk Update Status */ +#define PFSYNC_ACT_MAX 10 + u_int8_t count; +} __packed; + +#define PFSYNC_BULKPACKETS 1 /* # of packets per timeout */ +#define PFSYNC_MAX_BULKTRIES 12 +#define PFSYNC_HDRLEN sizeof(struct pfsync_header) +#define PFSYNC_ACTIONS \ + "CLR ST", "INS ST", "UPD ST", "DEL ST", \ + "UPD ST COMP", "DEL ST COMP", "INS FR", "DEL FR", \ + "UPD REQ", "BLK UPD STAT" + +#define PFSYNC_DFLTTL 255 + +struct pfsyncstats { + u_long pfsyncs_ipackets; /* total input packets, IPv4 */ + u_long pfsyncs_ipackets6; /* total input packets, IPv6 */ + u_long pfsyncs_badif; /* not the right interface */ + u_long pfsyncs_badttl; /* TTL is not PFSYNC_DFLTTL */ + u_long pfsyncs_hdrops; /* packets shorter than header */ + u_long pfsyncs_badver; /* bad (incl unsupp) version */ + u_long pfsyncs_badact; /* bad action */ + u_long pfsyncs_badlen; /* data length does not match */ + u_long pfsyncs_badauth; /* bad authentication */ + u_long pfsyncs_badstate; /* insert/lookup failed */ + + u_long pfsyncs_opackets; /* total output packets, IPv4 */ + u_long pfsyncs_opackets6; /* total output packets, IPv6 */ + u_long pfsyncs_onomem; /* no memory for an mbuf for a send */ + u_long pfsyncs_oerrors; /* ip output error */ +}; + +/* + * Configuration structure for SIOCSETPFSYNC SIOCGETPFSYNC + */ +struct pfsyncreq { + char pfsyncr_syncif[IFNAMSIZ]; + int pfsyncr_maxupdates; + int pfsyncr_authlevel; +}; +#define SIOCSETPFSYNC _IOW('i', 247, struct ifreq) +#define SIOCGETPFSYNC _IOWR('i', 248, struct ifreq) + + +#define pf_state_peer_hton(s,d) do { \ + (d)->seqlo = htonl((s)->seqlo); \ + (d)->seqhi = htonl((s)->seqhi); \ + (d)->seqdiff = htonl((s)->seqdiff); \ + (d)->max_win = htons((s)->max_win); \ + (d)->mss = htons((s)->mss); \ + (d)->state = (s)->state; \ + (d)->wscale = (s)->wscale; \ +} while (0) + +#define pf_state_peer_ntoh(s,d) do { \ + (d)->seqlo = ntohl((s)->seqlo); \ + (d)->seqhi = ntohl((s)->seqhi); \ + (d)->seqdiff = ntohl((s)->seqdiff); \ + (d)->max_win = ntohs((s)->max_win); \ + (d)->mss = ntohs((s)->mss); \ + (d)->state = (s)->state; \ + (d)->wscale = (s)->wscale; \ +} while (0) + +#define pf_state_host_hton(s,d) do { \ + bcopy(&(s)->addr, &(d)->addr, sizeof((d)->addr)); \ + (d)->port = (s)->port; \ +} while (0) + +#define pf_state_host_ntoh(s,d) do { \ + bcopy(&(s)->addr, &(d)->addr, sizeof((d)->addr)); \ + (d)->port = (s)->port; \ +} while (0) + +#ifdef _KERNEL +void pfsync_input(struct mbuf *, ...); +int pfsync_clear_states(u_int32_t, char *); +int pfsync_pack_state(u_int8_t, struct pf_state *, int); +#define pfsync_insert_state(st) do { \ + if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) || \ + (st->proto == IPPROTO_PFSYNC)) \ + st->sync_flags |= PFSTATE_NOSYNC; \ + else if (!st->sync_flags) \ + pfsync_pack_state(PFSYNC_ACT_INS, (st), 1); \ + st->sync_flags &= ~PFSTATE_FROMSYNC; \ +} while (0) +#define pfsync_update_state(st) do { \ + if (!st->sync_flags) \ + pfsync_pack_state(PFSYNC_ACT_UPD, (st), 1); \ + st->sync_flags &= ~PFSTATE_FROMSYNC; \ +} while (0) +#define pfsync_delete_state(st) do { \ + if (!st->sync_flags) \ + pfsync_pack_state(PFSYNC_ACT_DEL, (st), 1); \ + st->sync_flags &= ~PFSTATE_FROMSYNC; \ +} while (0) +#endif + +#endif /* _NET_IF_PFSYNC_H_ */ diff --git a/sys/net/pf/pf.c b/sys/net/pf/pf.c new file mode 100644 index 0000000000..10d81b60b2 --- /dev/null +++ b/sys/net/pf/pf.c @@ -0,0 +1,5889 @@ +/* $FreeBSD: src/sys/contrib/pf/net/pf.c,v 1.19 2004/09/11 11:18:25 mlaier Exp $ */ +/* $OpenBSD: pf.c,v 1.433.2.2 2004/07/17 03:22:34 brad Exp $ */ +/* add $OpenBSD: pf.c,v 1.448 2004/05/11 07:34:11 dhartmei Exp $ */ +/* $DragonFly: src/sys/net/pf/pf.c,v 1.1 2004/09/19 22:32:47 joerg Exp $ */ + +/* + * Copyright (c) 2004 The DragonFly Project. All rights reserved. + * + * Copyright (c) 2001 Daniel Hartmeier + * Copyright (c) 2002,2003 Henning Brauer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Effort sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F30602-01-2-0537. + * + */ + +#include "opt_inet.h" +#include "opt_inet6.h" +#include "use_pfsync.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#if NPFSYNC > 0 +#include +#endif /* NPFSYNC > 0 */ + +#ifdef INET6 +#include +#include +#include +#include +#include +#include +#endif /* INET6 */ + +#include +#include +#include +#include + +extern int ip_optcopy(struct ip *, struct ip *); + +#define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x + +/* + * Global variables + */ + +struct pf_anchorqueue pf_anchors; +struct pf_ruleset pf_main_ruleset; +struct pf_altqqueue pf_altqs[2]; +struct pf_palist pf_pabuf; +struct pf_altqqueue *pf_altqs_active; +struct pf_altqqueue *pf_altqs_inactive; +struct pf_status pf_status; + +u_int32_t ticket_altqs_active; +u_int32_t ticket_altqs_inactive; +int altqs_inactive_open; +u_int32_t ticket_pabuf; + +struct callout pf_expire_to; /* expire timeout */ + +vm_zone_t pf_src_tree_pl, pf_rule_pl; +vm_zone_t pf_state_pl, pf_altq_pl, pf_pooladdr_pl; + +void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t); +void pf_print_state(struct pf_state *); +void pf_print_flags(u_int8_t); + +u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t, + u_int8_t); +void pf_change_ap(struct pf_addr *, u_int16_t *, + u_int16_t *, u_int16_t *, struct pf_addr *, + u_int16_t, u_int8_t, sa_family_t); +#ifdef INET6 +void pf_change_a6(struct pf_addr *, u_int16_t *, + struct pf_addr *, u_int8_t); +#endif /* INET6 */ +void pf_change_icmp(struct pf_addr *, u_int16_t *, + struct pf_addr *, struct pf_addr *, u_int16_t, + u_int16_t *, u_int16_t *, u_int16_t *, + u_int16_t *, u_int8_t, sa_family_t); +void pf_send_tcp(const struct pf_rule *, sa_family_t, + const struct pf_addr *, const struct pf_addr *, + u_int16_t, u_int16_t, u_int32_t, u_int32_t, + u_int8_t, u_int16_t, u_int16_t, u_int8_t); +void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, + sa_family_t, struct pf_rule *); +struct pf_rule *pf_match_translation(struct pf_pdesc *, struct mbuf *, + int, int, struct pfi_kif *, + struct pf_addr *, u_int16_t, struct pf_addr *, + u_int16_t, int); +struct pf_rule *pf_get_translation(struct pf_pdesc *, struct mbuf *, + int, int, struct pfi_kif *, struct pf_src_node **, + struct pf_addr *, u_int16_t, + struct pf_addr *, u_int16_t, + struct pf_addr *, u_int16_t *); +int pf_test_tcp(struct pf_rule **, struct pf_state **, + int, struct pfi_kif *, struct mbuf *, int, + void *, struct pf_pdesc *, struct pf_rule **, + struct pf_ruleset **); +int pf_test_udp(struct pf_rule **, struct pf_state **, + int, struct pfi_kif *, struct mbuf *, int, + void *, struct pf_pdesc *, struct pf_rule **, + struct pf_ruleset **); +int pf_test_icmp(struct pf_rule **, struct pf_state **, + int, struct pfi_kif *, struct mbuf *, int, + void *, struct pf_pdesc *, struct pf_rule **, + struct pf_ruleset **); +int pf_test_other(struct pf_rule **, struct pf_state **, + int, struct pfi_kif *, struct mbuf *, int, void *, + struct pf_pdesc *, struct pf_rule **, + struct pf_ruleset **); +int pf_test_fragment(struct pf_rule **, int, + struct pfi_kif *, struct mbuf *, void *, + struct pf_pdesc *, struct pf_rule **, + struct pf_ruleset **); +int pf_test_state_tcp(struct pf_state **, int, + struct pfi_kif *, struct mbuf *, int, + void *, struct pf_pdesc *, u_short *); +int pf_test_state_udp(struct pf_state **, int, + struct pfi_kif *, struct mbuf *, int, + void *, struct pf_pdesc *); +int pf_test_state_icmp(struct pf_state **, int, + struct pfi_kif *, struct mbuf *, int, + void *, struct pf_pdesc *); +int pf_test_state_other(struct pf_state **, int, + struct pfi_kif *, struct pf_pdesc *); +static int pf_match_tag(struct mbuf *, struct pf_rule *, + struct pf_rule *, int *); +void pf_hash(struct pf_addr *, struct pf_addr *, + struct pf_poolhashkey *, sa_family_t); +int pf_map_addr(u_int8_t, struct pf_rule *, + struct pf_addr *, struct pf_addr *, + struct pf_addr *, struct pf_src_node **); +int pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *, + struct pf_addr *, struct pf_addr *, u_int16_t, + struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t, + struct pf_src_node **); +void pf_route(struct mbuf **, struct pf_rule *, int, + struct ifnet *, struct pf_state *); +void pf_route6(struct mbuf **, struct pf_rule *, int, + struct ifnet *, struct pf_state *); +int pf_socket_lookup(uid_t *, gid_t *, + int, struct pf_pdesc *); +u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t, + sa_family_t); +u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t, + sa_family_t); +u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, + u_int16_t); +void pf_set_rt_ifp(struct pf_state *, + struct pf_addr *); +int pf_check_proto_cksum(struct mbuf *, int, int, + u_int8_t, sa_family_t); +int pf_addr_wrap_neq(struct pf_addr_wrap *, + struct pf_addr_wrap *); +struct pf_state *pf_find_state_recurse(struct pfi_kif *, + struct pf_state *, u_int8_t); + +struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX]; + +#define STATE_LOOKUP() \ + do { \ + if (direction == PF_IN) \ + *state = pf_find_state_recurse( \ + kif, &key, PF_EXT_GWY); \ + else \ + *state = pf_find_state_recurse( \ + kif, &key, PF_LAN_EXT); \ + if (*state == NULL) \ + return (PF_DROP); \ + if (direction == PF_OUT && \ + (((*state)->rule.ptr->rt == PF_ROUTETO && \ + (*state)->rule.ptr->direction == PF_OUT) || \ + ((*state)->rule.ptr->rt == PF_REPLYTO && \ + (*state)->rule.ptr->direction == PF_IN)) && \ + (*state)->rt_kif != NULL && \ + (*state)->rt_kif != kif) \ + return (PF_PASS); \ + } while (0) + +#define STATE_TRANSLATE(s) \ + (s)->lan.addr.addr32[0] != (s)->gwy.addr.addr32[0] || \ + ((s)->af == AF_INET6 && \ + ((s)->lan.addr.addr32[1] != (s)->gwy.addr.addr32[1] || \ + (s)->lan.addr.addr32[2] != (s)->gwy.addr.addr32[2] || \ + (s)->lan.addr.addr32[3] != (s)->gwy.addr.addr32[3])) || \ + (s)->lan.port != (s)->gwy.port + +#define BOUND_IFACE(r, k) (((r)->rule_flag & PFRULE_IFBOUND) ? (k) : \ + ((r)->rule_flag & PFRULE_GRBOUND) ? (k)->pfik_parent : \ + (k)->pfik_parent->pfik_parent) + +static int pf_src_compare(struct pf_src_node *, struct pf_src_node *); +static int pf_state_compare_lan_ext(struct pf_state *, + struct pf_state *); +static int pf_state_compare_ext_gwy(struct pf_state *, + struct pf_state *); +static int pf_state_compare_id(struct pf_state *, + struct pf_state *); + +struct pf_src_tree tree_src_tracking; + +struct pf_state_tree_id tree_id; +struct pf_state_queue state_updates; + +RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); +RB_GENERATE(pf_state_tree_lan_ext, pf_state, + u.s.entry_lan_ext, pf_state_compare_lan_ext); +RB_GENERATE(pf_state_tree_ext_gwy, pf_state, + u.s.entry_ext_gwy, pf_state_compare_ext_gwy); +RB_GENERATE(pf_state_tree_id, pf_state, + u.s.entry_id, pf_state_compare_id); + +static int +pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) +{ + int diff; + + if (a->rule.ptr > b->rule.ptr) + return (1); + if (a->rule.ptr < b->rule.ptr) + return (-1); + if ((diff = a->af - b->af) != 0) + return (diff); + switch (a->af) { +#ifdef INET + case AF_INET: + if (a->addr.addr32[0] > b->addr.addr32[0]) + return (1); + if (a->addr.addr32[0] < b->addr.addr32[0]) + return (-1); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + if (a->addr.addr32[3] > b->addr.addr32[3]) + return (1); + if (a->addr.addr32[3] < b->addr.addr32[3]) + return (-1); + if (a->addr.addr32[2] > b->addr.addr32[2]) + return (1); + if (a->addr.addr32[2] < b->addr.addr32[2]) + return (-1); + if (a->addr.addr32[1] > b->addr.addr32[1]) + return (1); + if (a->addr.addr32[1] < b->addr.addr32[1]) + return (-1); + if (a->addr.addr32[0] > b->addr.addr32[0]) + return (1); + if (a->addr.addr32[0] < b->addr.addr32[0]) + return (-1); + break; +#endif /* INET6 */ + } + return (0); +} + +static int +pf_state_compare_lan_ext(struct pf_state *a, struct pf_state *b) +{ + int diff; + + if ((diff = a->proto - b->proto) != 0) + return (diff); + if ((diff = a->af - b->af) != 0) + return (diff); + switch (a->af) { +#ifdef INET + case AF_INET: + if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0]) + return (1); + if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0]) + return (-1); + if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) + return (1); + if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) + return (-1); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + if (a->lan.addr.addr32[3] > b->lan.addr.addr32[3]) + return (1); + if (a->lan.addr.addr32[3] < b->lan.addr.addr32[3]) + return (-1); + if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3]) + return (1); + if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3]) + return (-1); + if (a->lan.addr.addr32[2] > b->lan.addr.addr32[2]) + return (1); + if (a->lan.addr.addr32[2] < b->lan.addr.addr32[2]) + return (-1); + if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2]) + return (1); + if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2]) + return (-1); + if (a->lan.addr.addr32[1] > b->lan.addr.addr32[1]) + return (1); + if (a->lan.addr.addr32[1] < b->lan.addr.addr32[1]) + return (-1); + if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1]) + return (1); + if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1]) + return (-1); + if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0]) + return (1); + if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0]) + return (-1); + if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) + return (1); + if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) + return (-1); + break; +#endif /* INET6 */ + } + + if ((diff = a->lan.port - b->lan.port) != 0) + return (diff); + if ((diff = a->ext.port - b->ext.port) != 0) + return (diff); + + return (0); +} + +static int +pf_state_compare_ext_gwy(struct pf_state *a, struct pf_state *b) +{ + int diff; + + if ((diff = a->proto - b->proto) != 0) + return (diff); + if ((diff = a->af - b->af) != 0) + return (diff); + switch (a->af) { +#ifdef INET + case AF_INET: + if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) + return (1); + if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) + return (-1); + if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0]) + return (1); + if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0]) + return (-1); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3]) + return (1); + if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3]) + return (-1); + if (a->gwy.addr.addr32[3] > b->gwy.addr.addr32[3]) + return (1); + if (a->gwy.addr.addr32[3] < b->gwy.addr.addr32[3]) + return (-1); + if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2]) + return (1); + if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2]) + return (-1); + if (a->gwy.addr.addr32[2] > b->gwy.addr.addr32[2]) + return (1); + if (a->gwy.addr.addr32[2] < b->gwy.addr.addr32[2]) + return (-1); + if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1]) + return (1); + if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1]) + return (-1); + if (a->gwy.addr.addr32[1] > b->gwy.addr.addr32[1]) + return (1); + if (a->gwy.addr.addr32[1] < b->gwy.addr.addr32[1]) + return (-1); + if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) + return (1); + if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) + return (-1); + if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0]) + return (1); + if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0]) + return (-1); + break; +#endif /* INET6 */ + } + + if ((diff = a->ext.port - b->ext.port) != 0) + return (diff); + if ((diff = a->gwy.port - b->gwy.port) != 0) + return (diff); + + return (0); +} + +static int +pf_state_compare_id(struct pf_state *a, struct pf_state *b) +{ + if (a->id > b->id) + return (1); + if (a->id < b->id) + return (-1); + if (a->creatorid > b->creatorid) + return (1); + if (a->creatorid < b->creatorid) + return (-1); + + return (0); +} + +#ifdef INET6 +void +pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) +{ + switch (af) { +#ifdef INET + case AF_INET: + dst->addr32[0] = src->addr32[0]; + break; +#endif /* INET */ + case AF_INET6: + dst->addr32[0] = src->addr32[0]; + dst->addr32[1] = src->addr32[1]; + dst->addr32[2] = src->addr32[2]; + dst->addr32[3] = src->addr32[3]; + break; + } +} +#endif + +struct pf_state * +pf_find_state_byid(struct pf_state *key) +{ + pf_status.fcounters[FCNT_STATE_SEARCH]++; + return (RB_FIND(pf_state_tree_id, &tree_id, key)); +} + +struct pf_state * +pf_find_state_recurse(struct pfi_kif *kif, struct pf_state *key, u_int8_t tree) +{ + struct pf_state *s; + + pf_status.fcounters[FCNT_STATE_SEARCH]++; + + switch (tree) { + case PF_LAN_EXT: + for (; kif != NULL; kif = kif->pfik_parent) { + s = RB_FIND(pf_state_tree_lan_ext, + &kif->pfik_lan_ext, key); + if (s != NULL) + return (s); + } + return (NULL); + case PF_EXT_GWY: + for (; kif != NULL; kif = kif->pfik_parent) { + s = RB_FIND(pf_state_tree_ext_gwy, + &kif->pfik_ext_gwy, key); + if (s != NULL) + return (s); + } + return (NULL); + default: + panic("pf_find_state_recurse"); + } +} + +struct pf_state * +pf_find_state_all(struct pf_state *key, u_int8_t tree, int *more) +{ + struct pf_state *s, *ss = NULL; + struct pfi_kif *kif; + + pf_status.fcounters[FCNT_STATE_SEARCH]++; + + switch (tree) { + case PF_LAN_EXT: + TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) { + s = RB_FIND(pf_state_tree_lan_ext, + &kif->pfik_lan_ext, key); + if (s == NULL) + continue; + if (more == NULL) + return (s); + ss = s; + (*more)++; + } + return (ss); + case PF_EXT_GWY: + TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) { + s = RB_FIND(pf_state_tree_ext_gwy, + &kif->pfik_ext_gwy, key); + if (s == NULL) + continue; + if (more == NULL) + return (s); + ss = s; + (*more)++; + } + return (ss); + default: + panic("pf_find_state_all"); + } +} + +int +pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, + struct pf_addr *src, sa_family_t af) +{ + struct pf_src_node k; + + if (*sn == NULL) { + k.af = af; + PF_ACPY(&k.addr, src, af); + if (rule->rule_flag & PFRULE_RULESRCTRACK || + rule->rpool.opts & PF_POOL_STICKYADDR) + k.rule.ptr = rule; + else + k.rule.ptr = NULL; + pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; + *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); + } + if (*sn == NULL) { + if (!rule->max_src_nodes || + rule->src_nodes < rule->max_src_nodes) + (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT); + if ((*sn) == NULL) + return (-1); + bzero(*sn, sizeof(struct pf_src_node)); + (*sn)->af = af; + if (rule->rule_flag & PFRULE_RULESRCTRACK || + rule->rpool.opts & PF_POOL_STICKYADDR) + (*sn)->rule.ptr = rule; + else + (*sn)->rule.ptr = NULL; + PF_ACPY(&(*sn)->addr, src, af); + if (RB_INSERT(pf_src_tree, + &tree_src_tracking, *sn) != NULL) { + if (pf_status.debug >= PF_DEBUG_MISC) { + printf("pf: src_tree insert failed: "); + pf_print_host(&(*sn)->addr, 0, af); + printf("\n"); + } + pool_put(&pf_src_tree_pl, *sn); + return (-1); + } + (*sn)->creation = time_second; + (*sn)->ruletype = rule->action; + if ((*sn)->rule.ptr != NULL) + (*sn)->rule.ptr->src_nodes++; + pf_status.scounters[SCNT_SRC_NODE_INSERT]++; + pf_status.src_nodes++; + } else { + if (rule->max_src_states && + (*sn)->states >= rule->max_src_states) + return (-1); + } + return (0); +} + +int +pf_insert_state(struct pfi_kif *kif, struct pf_state *state) +{ + /* Thou MUST NOT insert multiple duplicate keys */ + state->u.s.kif = kif; + if (RB_INSERT(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state)) { + if (pf_status.debug >= PF_DEBUG_MISC) { + printf("pf: state insert failed: tree_lan_ext"); + printf(" lan: "); + pf_print_host(&state->lan.addr, state->lan.port, + state->af); + printf(" gwy: "); + pf_print_host(&state->gwy.addr, state->gwy.port, + state->af); + printf(" ext: "); + pf_print_host(&state->ext.addr, state->ext.port, + state->af); + if (state->sync_flags & PFSTATE_FROMSYNC) + printf(" (from sync)"); + printf("\n"); + } + return (-1); + } + + if (RB_INSERT(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state)) { + if (pf_status.debug >= PF_DEBUG_MISC) { + printf("pf: state insert failed: tree_ext_gwy"); + printf(" lan: "); + pf_print_host(&state->lan.addr, state->lan.port, + state->af); + printf(" gwy: "); + pf_print_host(&state->gwy.addr, state->gwy.port, + state->af); + printf(" ext: "); + pf_print_host(&state->ext.addr, state->ext.port, + state->af); + if (state->sync_flags & PFSTATE_FROMSYNC) + printf(" (from sync)"); + printf("\n"); + } + RB_REMOVE(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state); + return (-1); + } + + if (state->id == 0 && state->creatorid == 0) { + state->id = htobe64(pf_status.stateid++); + state->creatorid = pf_status.hostid; + } + if (RB_INSERT(pf_state_tree_id, &tree_id, state) != NULL) { + if (pf_status.debug >= PF_DEBUG_MISC) { + printf("pf: state insert failed: " + "id: %016" PRIx64 " creatorid: %08" PRIx32, + be64toh(state->id), ntohl(state->creatorid)); + if (state->sync_flags & PFSTATE_FROMSYNC) + printf(" (from sync)"); + printf("\n"); + } + RB_REMOVE(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state); + RB_REMOVE(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state); + return (-1); + } + TAILQ_INSERT_HEAD(&state_updates, state, u.s.entry_updates); + + pf_status.fcounters[FCNT_STATE_INSERT]++; + pf_status.states++; + pfi_attach_state(kif); +#if NPFSYNC + pfsync_insert_state(state); +#endif + return (0); +} + +void +pf_purge_timeout(void *arg) +{ + struct callout *to = arg; + int s; + + s = splsoftnet(); + pf_purge_expired_states(); + pf_purge_expired_fragments(); + pf_purge_expired_src_nodes(); + splx(s); + + callout_reset(to, pf_default_rule.timeout[PFTM_INTERVAL] * hz, + pf_purge_timeout, to); +} + +u_int32_t +pf_state_expires(const struct pf_state *state) +{ + u_int32_t timeout; + u_int32_t start; + u_int32_t end; + u_int32_t states; + + /* handle all PFTM_* > PFTM_MAX here */ + if (state->timeout == PFTM_PURGE) + return (time_second); + if (state->timeout == PFTM_UNTIL_PACKET) + return (0); + KASSERT((state->timeout < PFTM_MAX), + ("pf_state_expires: timeout > PFTM_MAX")); + timeout = state->rule.ptr->timeout[state->timeout]; + if (!timeout) + timeout = pf_default_rule.timeout[state->timeout]; + start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START]; + if (start) { + end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END]; + states = state->rule.ptr->states; + } else { + start = pf_default_rule.timeout[PFTM_ADAPTIVE_START]; + end = pf_default_rule.timeout[PFTM_ADAPTIVE_END]; + states = pf_status.states; + } + if (end && states > start && start < end) { + if (states < end) + return (state->expire + timeout * (end - states) / + (end - start)); + else + return (time_second); + } + return (state->expire + timeout); +} + +void +pf_purge_expired_src_nodes(void) +{ + struct pf_src_node *cur, *next; + + for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { + next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); + + if (cur->states <= 0 && cur->expire <= time_second) { + if (cur->rule.ptr != NULL) { + cur->rule.ptr->src_nodes--; + if (cur->rule.ptr->states <= 0 && + cur->rule.ptr->max_src_nodes <= 0) + pf_rm_rule(NULL, cur->rule.ptr); + } + RB_REMOVE(pf_src_tree, &tree_src_tracking, cur); + pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; + pf_status.src_nodes--; + pool_put(&pf_src_tree_pl, cur); + } + } +} + +void +pf_src_tree_remove_state(struct pf_state *s) +{ + u_int32_t timeout; + + if (s->src_node != NULL) { + if (--s->src_node->states <= 0) { + timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; + if (!timeout) + timeout = + pf_default_rule.timeout[PFTM_SRC_NODE]; + s->src_node->expire = time_second + timeout; + } + } + if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) { + if (--s->nat_src_node->states <= 0) { + timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; + if (!timeout) + timeout = + pf_default_rule.timeout[PFTM_SRC_NODE]; + s->nat_src_node->expire = time_second + timeout; + } + } + s->src_node = s->nat_src_node = NULL; +} + +void +pf_purge_expired_states(void) +{ + struct pf_state *cur, *next; + + for (cur = RB_MIN(pf_state_tree_id, &tree_id); + cur; cur = next) { + next = RB_NEXT(pf_state_tree_id, &tree_id, cur); + + if (pf_state_expires(cur) <= time_second) { + if (cur->src.state == PF_TCPS_PROXY_DST) + pf_send_tcp(cur->rule.ptr, cur->af, + &cur->ext.addr, &cur->lan.addr, + cur->ext.port, cur->lan.port, + cur->src.seqhi, cur->src.seqlo + 1, 0, + TH_RST|TH_ACK, 0, 0); + RB_REMOVE(pf_state_tree_ext_gwy, + &cur->u.s.kif->pfik_ext_gwy, cur); + RB_REMOVE(pf_state_tree_lan_ext, + &cur->u.s.kif->pfik_lan_ext, cur); + RB_REMOVE(pf_state_tree_id, &tree_id, cur); +#if NPFSYNC + pfsync_delete_state(cur); +#endif + pf_src_tree_remove_state(cur); + if (--cur->rule.ptr->states <= 0 && + cur->rule.ptr->src_nodes <= 0) + pf_rm_rule(NULL, cur->rule.ptr); + if (cur->nat_rule.ptr != NULL) + if (--cur->nat_rule.ptr->states <= 0 && + cur->nat_rule.ptr->src_nodes <= 0) + pf_rm_rule(NULL, cur->nat_rule.ptr); + if (cur->anchor.ptr != NULL) + if (--cur->anchor.ptr->states <= 0) + pf_rm_rule(NULL, cur->anchor.ptr); + pf_normalize_tcp_cleanup(cur); + pfi_detach_state(cur->u.s.kif); + TAILQ_REMOVE(&state_updates, cur, u.s.entry_updates); + pool_put(&pf_state_pl, cur); + pf_status.fcounters[FCNT_STATE_REMOVALS]++; + pf_status.states--; + } + } +} + +int +pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw) +{ + if (aw->type != PF_ADDR_TABLE) + return (0); + if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL) + return (1); + return (0); +} + +void +pf_tbladdr_remove(struct pf_addr_wrap *aw) +{ + if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) + return; + pfr_detach_table(aw->p.tbl); + aw->p.tbl = NULL; +} + +void +pf_tbladdr_copyout(struct pf_addr_wrap *aw) +{ + struct pfr_ktable *kt = aw->p.tbl; + + if (aw->type != PF_ADDR_TABLE || kt == NULL) + return; + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) + kt = kt->pfrkt_root; + aw->p.tbl = NULL; + aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ? + kt->pfrkt_cnt : -1; +} + +void +pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) +{ + switch (af) { +#ifdef INET + case AF_INET: { + u_int32_t a = ntohl(addr->addr32[0]); + printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255, + (a>>8)&255, a&255); + if (p) { + p = ntohs(p); + printf(":%u", p); + } + break; + } +#endif /* INET */ +#ifdef INET6 + case AF_INET6: { + u_int16_t b; + u_int8_t i, curstart = 255, curend = 0, + maxstart = 0, maxend = 0; + for (i = 0; i < 8; i++) { + if (!addr->addr16[i]) { + if (curstart == 255) + curstart = i; + else + curend = i; + } else { + if (curstart) { + if ((curend - curstart) > + (maxend - maxstart)) { + maxstart = curstart; + maxend = curend; + curstart = 255; + } + } + } + } + for (i = 0; i < 8; i++) { + if (i >= maxstart && i <= maxend) { + if (maxend != 7) { + if (i == maxstart) + printf(":"); + } else { + if (i == maxend) + printf(":"); + } + } else { + b = ntohs(addr->addr16[i]); + printf("%x", b); + if (i < 7) + printf(":"); + } + } + if (p) { + p = ntohs(p); + printf("[%u]", p); + } + break; + } +#endif /* INET6 */ + } +} + +void +pf_print_state(struct pf_state *s) +{ + switch (s->proto) { + case IPPROTO_TCP: + printf("TCP "); + break; + case IPPROTO_UDP: + printf("UDP "); + break; + case IPPROTO_ICMP: + printf("ICMP "); + break; + case IPPROTO_ICMPV6: + printf("ICMPV6 "); + break; + default: + printf("%u ", s->proto); + break; + } + pf_print_host(&s->lan.addr, s->lan.port, s->af); + printf(" "); + pf_print_host(&s->gwy.addr, s->gwy.port, s->af); + printf(" "); + pf_print_host(&s->ext.addr, s->ext.port, s->af); + printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo, + s->src.seqhi, s->src.max_win, s->src.seqdiff); + if (s->src.wscale && s->dst.wscale) + printf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK); + printf("]"); + printf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo, + s->dst.seqhi, s->dst.max_win, s->dst.seqdiff); + if (s->src.wscale && s->dst.wscale) + printf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK); + printf("]"); + printf(" %u:%u", s->src.state, s->dst.state); +} + +void +pf_print_flags(u_int8_t f) +{ + if (f) + printf(" "); + if (f & TH_FIN) + printf("F"); + if (f & TH_SYN) + printf("S"); + if (f & TH_RST) + printf("R"); + if (f & TH_PUSH) + printf("P"); + if (f & TH_ACK) + printf("A"); + if (f & TH_URG) + printf("U"); + if (f & TH_ECE) + printf("E"); + if (f & TH_CWR) + printf("W"); +} + +#define PF_SET_SKIP_STEPS(i) \ + do { \ + while (head[i] != cur) { \ + head[i]->skip[i].ptr = cur; \ + head[i] = TAILQ_NEXT(head[i], entries); \ + } \ + } while (0) + +void +pf_calc_skip_steps(struct pf_rulequeue *rules) +{ + struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT]; + int i; + + cur = TAILQ_FIRST(rules); + prev = cur; + for (i = 0; i < PF_SKIP_COUNT; ++i) + head[i] = cur; + while (cur != NULL) { + + if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) + PF_SET_SKIP_STEPS(PF_SKIP_IFP); + if (cur->direction != prev->direction) + PF_SET_SKIP_STEPS(PF_SKIP_DIR); + if (cur->af != prev->af) + PF_SET_SKIP_STEPS(PF_SKIP_AF); + if (cur->proto != prev->proto) + PF_SET_SKIP_STEPS(PF_SKIP_PROTO); + if (cur->src.not != prev->src.not || + pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) + PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR); + if (cur->src.port[0] != prev->src.port[0] || + cur->src.port[1] != prev->src.port[1] || + cur->src.port_op != prev->src.port_op) + PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT); + if (cur->dst.not != prev->dst.not || + pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) + PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR); + if (cur->dst.port[0] != prev->dst.port[0] || + cur->dst.port[1] != prev->dst.port[1] || + cur->dst.port_op != prev->dst.port_op) + PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT); + + prev = cur; + cur = TAILQ_NEXT(cur, entries); + } + for (i = 0; i < PF_SKIP_COUNT; ++i) + PF_SET_SKIP_STEPS(i); +} + +int +pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) +{ + if (aw1->type != aw2->type) + return (1); + switch (aw1->type) { + case PF_ADDR_ADDRMASK: + if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0)) + return (1); + if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0)) + return (1); + return (0); + case PF_ADDR_DYNIFTL: + return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt); + case PF_ADDR_NOROUTE: + return (0); + case PF_ADDR_TABLE: + return (aw1->p.tbl != aw2->p.tbl); + default: + printf("invalid address type: %d\n", aw1->type); + return (1); + } +} + +void +pf_update_anchor_rules() +{ + struct pf_rule *rule; + int i; + + for (i = 0; i < PF_RULESET_MAX; ++i) + TAILQ_FOREACH(rule, pf_main_ruleset.rules[i].active.ptr, + entries) + if (rule->anchorname[0]) + rule->anchor = pf_find_anchor(rule->anchorname); + else + rule->anchor = NULL; +} + +u_int16_t +pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp) +{ + u_int32_t l; + + if (udp && !cksum) + return (0x0000); + l = cksum + old - new; + l = (l >> 16) + (l & 65535); + l = l & 65535; + if (udp && !l) + return (0xFFFF); + return (l); +} + +void +pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc, + struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af) +{ + struct pf_addr ao; + u_int16_t po = *p; + + PF_ACPY(&ao, a, af); + PF_ACPY(a, an, af); + + *p = pn; + + switch (af) { +#ifdef INET + case AF_INET: + *ic = pf_cksum_fixup(pf_cksum_fixup(*ic, + ao.addr16[0], an->addr16[0], 0), + ao.addr16[1], an->addr16[1], 0); + *p = pn; + *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc, + ao.addr16[0], an->addr16[0], u), + ao.addr16[1], an->addr16[1], u), + po, pn, u); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc, + ao.addr16[0], an->addr16[0], u), + ao.addr16[1], an->addr16[1], u), + ao.addr16[2], an->addr16[2], u), + ao.addr16[3], an->addr16[3], u), + ao.addr16[4], an->addr16[4], u), + ao.addr16[5], an->addr16[5], u), + ao.addr16[6], an->addr16[6], u), + ao.addr16[7], an->addr16[7], u), + po, pn, u); + break; +#endif /* INET6 */ + } +} + + +/* Changes a u_int32_t. Uses a void * so there are no align restrictions */ +void +pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u) +{ + u_int32_t ao; + + memcpy(&ao, a, sizeof(ao)); + memcpy(a, &an, sizeof(u_int32_t)); + *c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u), + ao % 65536, an % 65536, u); +} + +#ifdef INET6 +void +pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u) +{ + struct pf_addr ao; + + PF_ACPY(&ao, a, AF_INET6); + PF_ACPY(a, an, AF_INET6); + + *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(*c, + ao.addr16[0], an->addr16[0], u), + ao.addr16[1], an->addr16[1], u), + ao.addr16[2], an->addr16[2], u), + ao.addr16[3], an->addr16[3], u), + ao.addr16[4], an->addr16[4], u), + ao.addr16[5], an->addr16[5], u), + ao.addr16[6], an->addr16[6], u), + ao.addr16[7], an->addr16[7], u); +} +#endif /* INET6 */ + +void +pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, + struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c, + u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af) +{ + struct pf_addr oia, ooa; + + PF_ACPY(&oia, ia, af); + PF_ACPY(&ooa, oa, af); + + /* Change inner protocol port, fix inner protocol checksum. */ + if (ip != NULL) { + u_int16_t oip = *ip; + u_int32_t opc = 0; + + if (pc != NULL) + opc = *pc; + *ip = np; + if (pc != NULL) + *pc = pf_cksum_fixup(*pc, oip, *ip, u); + *ic = pf_cksum_fixup(*ic, oip, *ip, 0); + if (pc != NULL) + *ic = pf_cksum_fixup(*ic, opc, *pc, 0); + } + /* Change inner ip address, fix inner ip and icmp checksums. */ + PF_ACPY(ia, na, af); + switch (af) { +#ifdef INET + case AF_INET: { + u_int32_t oh2c = *h2c; + + *h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c, + oia.addr16[0], ia->addr16[0], 0), + oia.addr16[1], ia->addr16[1], 0); + *ic = pf_cksum_fixup(pf_cksum_fixup(*ic, + oia.addr16[0], ia->addr16[0], 0), + oia.addr16[1], ia->addr16[1], 0); + *ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0); + break; + } +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(*ic, + oia.addr16[0], ia->addr16[0], u), + oia.addr16[1], ia->addr16[1], u), + oia.addr16[2], ia->addr16[2], u), + oia.addr16[3], ia->addr16[3], u), + oia.addr16[4], ia->addr16[4], u), + oia.addr16[5], ia->addr16[5], u), + oia.addr16[6], ia->addr16[6], u), + oia.addr16[7], ia->addr16[7], u); + break; +#endif /* INET6 */ + } + /* Change outer ip address, fix outer ip or icmpv6 checksum. */ + PF_ACPY(oa, na, af); + switch (af) { +#ifdef INET + case AF_INET: + *hc = pf_cksum_fixup(pf_cksum_fixup(*hc, + ooa.addr16[0], oa->addr16[0], 0), + ooa.addr16[1], oa->addr16[1], 0); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(*ic, + ooa.addr16[0], oa->addr16[0], u), + ooa.addr16[1], oa->addr16[1], u), + ooa.addr16[2], oa->addr16[2], u), + ooa.addr16[3], oa->addr16[3], u), + ooa.addr16[4], oa->addr16[4], u), + ooa.addr16[5], oa->addr16[5], u), + ooa.addr16[6], oa->addr16[6], u), + ooa.addr16[7], oa->addr16[7], u); + break; +#endif /* INET6 */ + } +} + +void +pf_send_tcp(const struct pf_rule *r, sa_family_t af, + const struct pf_addr *saddr, const struct pf_addr *daddr, + u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, + u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl) +{ + struct mbuf *m; + int len = 0, tlen; +#ifdef INET + struct ip *h = NULL; +#endif /* INET */ +#ifdef INET6 + struct ip6_hdr *h6 = NULL; +#endif /* INET6 */ + struct tcphdr *th = NULL; + char *opt; + + /* maximum segment size tcp option */ + tlen = sizeof(struct tcphdr); + if (mss) + tlen += 4; + + switch (af) { +#ifdef INET + case AF_INET: + len = sizeof(struct ip) + tlen; + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + len = sizeof(struct ip6_hdr) + tlen; + break; +#endif /* INET6 */ + } + + /* create outgoing mbuf */ + m = m_gethdr(MB_DONTWAIT, MT_HEADER); + if (m == NULL) + return; + m->m_pkthdr.pf_flags |= PF_MBUF_GENERATED; +#ifdef ALTQ + if (r != NULL && r->qid) { + struct altq_tag *atag; + + mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), MB_DONTWAIT); + if (mtag != NULL) { + atag = (struct altq_tag *)(mtag + 1); + atag->qid = r->qid; + /* add hints for ecn */ + atag->af = af; + atag->hdr = mtod(m, struct ip *); + m_tag_prepend(m, mtag); + } + } +#endif + m->m_data += max_linkhdr; + m->m_pkthdr.len = m->m_len = len; + m->m_pkthdr.rcvif = NULL; + bzero(m->m_data, len); + switch (af) { +#ifdef INET + case AF_INET: + h = mtod(m, struct ip *); + + /* IP header fields included in the TCP checksum */ + h->ip_p = IPPROTO_TCP; + h->ip_len = tlen; + h->ip_src.s_addr = saddr->v4.s_addr; + h->ip_dst.s_addr = daddr->v4.s_addr; + + th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip)); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + h6 = mtod(m, struct ip6_hdr *); + + /* IP header fields included in the TCP checksum */ + h6->ip6_nxt = IPPROTO_TCP; + h6->ip6_plen = htons(tlen); + memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr)); + memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr)); + + th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr)); + break; +#endif /* INET6 */ + } + + /* TCP header */ + th->th_sport = sport; + th->th_dport = dport; + th->th_seq = htonl(seq); + th->th_ack = htonl(ack); + th->th_off = tlen >> 2; + th->th_flags = flags; + th->th_win = htons(win); + + if (mss) { + opt = (char *)(th + 1); + opt[0] = TCPOPT_MAXSEG; + opt[1] = 4; + mss = htons(mss); + bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2); + } + + switch (af) { +#ifdef INET + case AF_INET: + /* TCP checksum */ + th->th_sum = in_cksum(m, len); + + /* Finish the IP header */ + h->ip_v = 4; + h->ip_hl = sizeof(*h) >> 2; + h->ip_tos = IPTOS_LOWDELAY; + h->ip_len = len; + h->ip_off = path_mtu_discovery ? IP_DF : 0; + h->ip_ttl = ttl ? ttl : ip_defttl; + h->ip_sum = 0; + ip_output(m, (void *)NULL, (void *)NULL, 0, (void *)NULL, + (void *)NULL); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + /* TCP checksum */ + th->th_sum = in6_cksum(m, IPPROTO_TCP, + sizeof(struct ip6_hdr), tlen); + + h6->ip6_vfc |= IPV6_VERSION; + h6->ip6_hlim = IPV6_DEFHLIM; + + ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); + break; +#endif /* INET6 */ + } +} + +void +pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af, + struct pf_rule *r) +{ + struct mbuf *m0; + + m0 = m_copypacket(m, MB_DONTWAIT); + if (m0 == NULL) + return; + m0->m_pkthdr.pf_flags |= PF_MBUF_GENERATED; + +#ifdef ALTQ + if (r->qid) { + struct altq_tag *atag; + + mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), MB_DONTWAIT); + if (mtag != NULL) { + atag = (struct altq_tag *)(mtag + 1); + atag->qid = r->qid; + /* add hints for ecn */ + atag->af = af; + atag->hdr = mtod(m0, struct ip *); + m_tag_prepend(m0, mtag); + } + } +#endif + + switch (af) { +#ifdef INET + case AF_INET: + icmp_error(m0, type, code, 0, (void *)NULL); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + icmp6_error(m0, type, code, 0); + break; +#endif /* INET6 */ + } +} + +/* + * Return 1 if the addresses a and b match (with mask m), otherwise return 0. + * If n is 0, they match if they are equal. If n is != 0, they match if they + * are different. + */ +int +pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m, + struct pf_addr *b, sa_family_t af) +{ + int match = 0; + + switch (af) { +#ifdef INET + case AF_INET: + if ((a->addr32[0] & m->addr32[0]) == + (b->addr32[0] & m->addr32[0])) + match++; + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + if (((a->addr32[0] & m->addr32[0]) == + (b->addr32[0] & m->addr32[0])) && + ((a->addr32[1] & m->addr32[1]) == + (b->addr32[1] & m->addr32[1])) && + ((a->addr32[2] & m->addr32[2]) == + (b->addr32[2] & m->addr32[2])) && + ((a->addr32[3] & m->addr32[3]) == + (b->addr32[3] & m->addr32[3]))) + match++; + break; +#endif /* INET6 */ + } + if (match) { + if (n) + return (0); + else + return (1); + } else { + if (n) + return (1); + else + return (0); + } +} + +int +pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p) +{ + switch (op) { + case PF_OP_IRG: + return ((p > a1) && (p < a2)); + case PF_OP_XRG: + return ((p < a1) || (p > a2)); + case PF_OP_RRG: + return ((p >= a1) && (p <= a2)); + case PF_OP_EQ: + return (p == a1); + case PF_OP_NE: + return (p != a1); + case PF_OP_LT: + return (p < a1); + case PF_OP_LE: + return (p <= a1); + case PF_OP_GT: + return (p > a1); + case PF_OP_GE: + return (p >= a1); + } + return (0); /* never reached */ +} + +int +pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) +{ + a1 = ntohs(a1); + a2 = ntohs(a2); + p = ntohs(p); + return (pf_match(op, a1, a2, p)); +} + +int +pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) +{ + if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE) + return (0); + return (pf_match(op, a1, a2, u)); +} + +int +pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) +{ + if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE) + return (0); + return (pf_match(op, a1, a2, g)); +} + +static int +pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_rule *nat_rule, + int *tag) +{ + if (*tag == -1) { /* find mbuf tag */ + if (nat_rule != NULL && nat_rule->tag) + *tag = nat_rule->tag; + else if (m->m_pkthdr.pf_flags & PF_MBUF_TAGGED) + *tag = m->m_pkthdr.pf_tag; + else + *tag = 0; + } + + return ((!r->match_tag_not && r->match_tag == *tag) || + (r->match_tag_not && r->match_tag != *tag)); +} + +void +pf_tag_packet(struct mbuf *m, int tag) +{ + if (tag <= 0) + return; + + m->m_pkthdr.pf_flags |= PF_MBUF_TAGGED; + m->m_pkthdr.pf_tag = tag; +} + +#define PF_STEP_INTO_ANCHOR(r, a, s, n) \ + do { \ + if ((r) == NULL || (r)->anchor == NULL || \ + (s) != NULL || (a) != NULL) \ + panic("PF_STEP_INTO_ANCHOR"); \ + (a) = (r); \ + (s) = TAILQ_FIRST(&(r)->anchor->rulesets); \ + (r) = NULL; \ + while ((s) != NULL && ((r) = \ + TAILQ_FIRST((s)->rules[n].active.ptr)) == NULL) \ + (s) = TAILQ_NEXT((s), entries); \ + if ((r) == NULL) { \ + (r) = TAILQ_NEXT((a), entries); \ + (a) = NULL; \ + } \ + } while (0) + +#define PF_STEP_OUT_OF_ANCHOR(r, a, s, n) \ + do { \ + if ((r) != NULL || (a) == NULL || (s) == NULL) \ + panic("PF_STEP_OUT_OF_ANCHOR"); \ + (s) = TAILQ_NEXT((s), entries); \ + while ((s) != NULL && ((r) = \ + TAILQ_FIRST((s)->rules[n].active.ptr)) == NULL) \ + (s) = TAILQ_NEXT((s), entries); \ + if ((r) == NULL) { \ + (r) = TAILQ_NEXT((a), entries); \ + (a) = NULL; \ + } \ + } while (0) + +#ifdef INET6 +void +pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, + struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af) +{ + switch (af) { +#ifdef INET + case AF_INET: + naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | + ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); + break; +#endif /* INET */ + case AF_INET6: + naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | + ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); + naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) | + ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]); + naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) | + ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]); + naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) | + ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]); + break; + } +} + +void +pf_addr_inc(struct pf_addr *addr, sa_family_t af) +{ + switch (af) { +#ifdef INET + case AF_INET: + addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); + break; +#endif /* INET */ + case AF_INET6: + if (addr->addr32[3] == 0xffffffff) { + addr->addr32[3] = 0; + if (addr->addr32[2] == 0xffffffff) { + addr->addr32[2] = 0; + if (addr->addr32[1] == 0xffffffff) { + addr->addr32[1] = 0; + addr->addr32[0] = + htonl(ntohl(addr->addr32[0]) + 1); + } else + addr->addr32[1] = + htonl(ntohl(addr->addr32[1]) + 1); + } else + addr->addr32[2] = + htonl(ntohl(addr->addr32[2]) + 1); + } else + addr->addr32[3] = + htonl(ntohl(addr->addr32[3]) + 1); + break; + } +} +#endif /* INET6 */ + +#define mix(a,b,c) \ + do { \ + a -= b; a -= c; a ^= (c >> 13); \ + b -= c; b -= a; b ^= (a << 8); \ + c -= a; c -= b; c ^= (b >> 13); \ + a -= b; a -= c; a ^= (c >> 12); \ + b -= c; b -= a; b ^= (a << 16); \ + c -= a; c -= b; c ^= (b >> 5); \ + a -= b; a -= c; a ^= (c >> 3); \ + b -= c; b -= a; b ^= (a << 10); \ + c -= a; c -= b; c ^= (b >> 15); \ + } while (0) + +/* + * hash function based on bridge_hash in if_bridge.c + */ +void +pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, + struct pf_poolhashkey *key, sa_family_t af) +{ + u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0]; + + switch (af) { +#ifdef INET + case AF_INET: + a += inaddr->addr32[0]; + b += key->key32[1]; + mix(a, b, c); + hash->addr32[0] = c + key->key32[2]; + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + a += inaddr->addr32[0]; + b += inaddr->addr32[2]; + mix(a, b, c); + hash->addr32[0] = c; + a += inaddr->addr32[1]; + b += inaddr->addr32[3]; + c += key->key32[1]; + mix(a, b, c); + hash->addr32[1] = c; + a += inaddr->addr32[2]; + b += inaddr->addr32[1]; + c += key->key32[2]; + mix(a, b, c); + hash->addr32[2] = c; + a += inaddr->addr32[3]; + b += inaddr->addr32[0]; + c += key->key32[3]; + mix(a, b, c); + hash->addr32[3] = c; + break; +#endif /* INET6 */ + } +} + +int +pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, + struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn) +{ + unsigned char hash[16]; + struct pf_pool *rpool = &r->rpool; + struct pf_addr *raddr = &rpool->cur->addr.v.a.addr; + struct pf_addr *rmask = &rpool->cur->addr.v.a.mask; + struct pf_pooladdr *acur = rpool->cur; + struct pf_src_node k; + + if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR && + (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { + k.af = af; + PF_ACPY(&k.addr, saddr, af); + if (r->rule_flag & PFRULE_RULESRCTRACK || + r->rpool.opts & PF_POOL_STICKYADDR) + k.rule.ptr = r; + else + k.rule.ptr = NULL; + pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; + *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); + if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) { + PF_ACPY(naddr, &(*sn)->raddr, af); + if (pf_status.debug >= PF_DEBUG_MISC) { + printf("pf_map_addr: src tracking maps "); + pf_print_host(&k.addr, 0, af); + printf(" to "); + pf_print_host(naddr, 0, af); + printf("\n"); + } + return (0); + } + } + + if (rpool->cur->addr.type == PF_ADDR_NOROUTE) + return (1); + if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { + if (af == AF_INET) { + if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 && + (rpool->opts & PF_POOL_TYPEMASK) != + PF_POOL_ROUNDROBIN) + return (1); + raddr = &rpool->cur->addr.p.dyn->pfid_addr4; + rmask = &rpool->cur->addr.p.dyn->pfid_mask4; + } else { + if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 && + (rpool->opts & PF_POOL_TYPEMASK) != + PF_POOL_ROUNDROBIN) + return (1); + raddr = &rpool->cur->addr.p.dyn->pfid_addr6; + rmask = &rpool->cur->addr.p.dyn->pfid_mask6; + } + } else if (rpool->cur->addr.type == PF_ADDR_TABLE) { + if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) + return (1); /* unsupported */ + } else { + raddr = &rpool->cur->addr.v.a.addr; + rmask = &rpool->cur->addr.v.a.mask; + } + + switch (rpool->opts & PF_POOL_TYPEMASK) { + case PF_POOL_NONE: + PF_ACPY(naddr, raddr, af); + break; + case PF_POOL_BITMASK: + PF_POOLMASK(naddr, raddr, rmask, saddr, af); + break; + case PF_POOL_RANDOM: + if (init_addr != NULL && PF_AZERO(init_addr, af)) { + switch (af) { +#ifdef INET + case AF_INET: + rpool->counter.addr32[0] = arc4random(); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + if (rmask->addr32[3] != 0xffffffff) + rpool->counter.addr32[3] = arc4random(); + else + break; + if (rmask->addr32[2] != 0xffffffff) + rpool->counter.addr32[2] = arc4random(); + else + break; + if (rmask->addr32[1] != 0xffffffff) + rpool->counter.addr32[1] = arc4random(); + else + break; + if (rmask->addr32[0] != 0xffffffff) + rpool->counter.addr32[0] = arc4random(); + break; +#endif /* INET6 */ + } + PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); + PF_ACPY(init_addr, naddr, af); + + } else { + PF_AINC(&rpool->counter, af); + PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); + } + break; + case PF_POOL_SRCHASH: + pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af); + PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af); + break; + case PF_POOL_ROUNDROBIN: + if (rpool->cur->addr.type == PF_ADDR_TABLE) { + if (!pfr_pool_get(rpool->cur->addr.p.tbl, + &rpool->tblidx, &rpool->counter, + &raddr, &rmask, af)) + goto get_addr; + } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { + if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, + &rpool->tblidx, &rpool->counter, + &raddr, &rmask, af)) + goto get_addr; + } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af)) + goto get_addr; + + try_next: + if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL) + rpool->cur = TAILQ_FIRST(&rpool->list); + if (rpool->cur->addr.type == PF_ADDR_TABLE) { + rpool->tblidx = -1; + if (pfr_pool_get(rpool->cur->addr.p.tbl, + &rpool->tblidx, &rpool->counter, + &raddr, &rmask, af)) { + /* table contains no address of type 'af' */ + if (rpool->cur != acur) + goto try_next; + return (1); + } + } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { + rpool->tblidx = -1; + if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, + &rpool->tblidx, &rpool->counter, + &raddr, &rmask, af)) { + /* table contains no address of type 'af' */ + if (rpool->cur != acur) + goto try_next; + return (1); + } + } else { + raddr = &rpool->cur->addr.v.a.addr; + rmask = &rpool->cur->addr.v.a.mask; + PF_ACPY(&rpool->counter, raddr, af); + } + + get_addr: + PF_ACPY(naddr, &rpool->counter, af); + PF_AINC(&rpool->counter, af); + break; + } + if (*sn != NULL) + PF_ACPY(&(*sn)->raddr, naddr, af); + + if (pf_status.debug >= PF_DEBUG_MISC && + (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { + printf("pf_map_addr: selected address "); + pf_print_host(naddr, 0, af); + printf("\n"); + } + + return (0); +} + +int +pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, + struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport, + struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high, + struct pf_src_node **sn) +{ + struct pf_state key; + struct pf_addr init_addr; + u_int16_t cut; + + bzero(&init_addr, sizeof(init_addr)); + if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) + return (1); + + do { + key.af = af; + key.proto = proto; + PF_ACPY(&key.ext.addr, daddr, key.af); + PF_ACPY(&key.gwy.addr, naddr, key.af); + key.ext.port = dport; + + /* + * port search; start random, step; + * similar 2 portloop in in_pcbbind + */ + if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP)) { + key.gwy.port = 0; + if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) + return (0); + } else if (low == 0 && high == 0) { + key.gwy.port = *nport; + if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) + return (0); + } else if (low == high) { + key.gwy.port = htons(low); + if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) { + *nport = htons(low); + return (0); + } + } else { + u_int16_t tmp; + + if (low > high) { + tmp = low; + low = high; + high = tmp; + } + /* low < high */ + cut = arc4random() % (1 + high - low) + low; + /* low <= cut <= high */ + for (tmp = cut; tmp <= high; ++(tmp)) { + key.gwy.port = htons(tmp); + if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == + NULL) { + *nport = htons(tmp); + return (0); + } + } + for (tmp = cut - 1; tmp >= low; --(tmp)) { + key.gwy.port = htons(tmp); + if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == + NULL) { + *nport = htons(tmp); + return (0); + } + } + } + + switch (r->rpool.opts & PF_POOL_TYPEMASK) { + case PF_POOL_RANDOM: + case PF_POOL_ROUNDROBIN: + if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) + return (1); + break; + case PF_POOL_NONE: + case PF_POOL_SRCHASH: + case PF_POOL_BITMASK: + default: + return (1); + } + } while (! PF_AEQ(&init_addr, naddr, af) ); + + return (1); /* none available */ +} + +struct pf_rule * +pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, + int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport, + struct pf_addr *daddr, u_int16_t dport, int rs_num) +{ + struct pf_rule *r, *rm = NULL, *anchorrule = NULL; + struct pf_ruleset *ruleset = NULL; + + r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr); + while (r && rm == NULL) { + struct pf_rule_addr *src = NULL, *dst = NULL; + struct pf_addr_wrap *xdst = NULL; + + if (r->action == PF_BINAT && direction == PF_IN) { + src = &r->dst; + if (r->rpool.cur != NULL) + xdst = &r->rpool.cur->addr; + } else { + src = &r->src; + dst = &r->dst; + } + + r->evaluations++; + if (r->kif != NULL && + (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + r = r->skip[PF_SKIP_IFP].ptr; + else if (r->direction && r->direction != direction) + r = r->skip[PF_SKIP_DIR].ptr; + else if (r->af && r->af != pd->af) + r = r->skip[PF_SKIP_AF].ptr; + else if (r->proto && r->proto != pd->proto) + r = r->skip[PF_SKIP_PROTO].ptr; + else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, src->not)) + r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR : + PF_SKIP_DST_ADDR].ptr; + else if (src->port_op && !pf_match_port(src->port_op, + src->port[0], src->port[1], sport)) + r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT : + PF_SKIP_DST_PORT].ptr; + else if (dst != NULL && + PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->not)) + r = r->skip[PF_SKIP_DST_ADDR].ptr; + else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, 0)) + r = TAILQ_NEXT(r, entries); + else if (dst != NULL && dst->port_op && + !pf_match_port(dst->port_op, dst->port[0], + dst->port[1], dport)) + r = r->skip[PF_SKIP_DST_PORT].ptr; + else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto != + IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m, + off, pd->hdr.tcp), r->os_fingerprint))) + r = TAILQ_NEXT(r, entries); + else if (r->anchorname[0] && r->anchor == NULL) + r = TAILQ_NEXT(r, entries); + else if (r->anchor == NULL) + rm = r; + else + PF_STEP_INTO_ANCHOR(r, anchorrule, ruleset, rs_num); + if (r == NULL && anchorrule != NULL) + PF_STEP_OUT_OF_ANCHOR(r, anchorrule, ruleset, + rs_num); + } + if (rm != NULL && (rm->action == PF_NONAT || + rm->action == PF_NORDR || rm->action == PF_NOBINAT)) + return (NULL); + return (rm); +} + +struct pf_rule * +pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, + struct pfi_kif *kif, struct pf_src_node **sn, + struct pf_addr *saddr, u_int16_t sport, + struct pf_addr *daddr, u_int16_t dport, + struct pf_addr *naddr, u_int16_t *nport) +{ + struct pf_rule *r = NULL; + + if (direction == PF_OUT) { + r = pf_match_translation(pd, m, off, direction, kif, saddr, + sport, daddr, dport, PF_RULESET_BINAT); + if (r == NULL) + r = pf_match_translation(pd, m, off, direction, kif, + saddr, sport, daddr, dport, PF_RULESET_NAT); + } else { + r = pf_match_translation(pd, m, off, direction, kif, saddr, + sport, daddr, dport, PF_RULESET_RDR); + if (r == NULL) + r = pf_match_translation(pd, m, off, direction, kif, + saddr, sport, daddr, dport, PF_RULESET_BINAT); + } + + if (r != NULL) { + switch (r->action) { + case PF_NONAT: + case PF_NOBINAT: + case PF_NORDR: + return (NULL); + case PF_NAT: + if (pf_get_sport(pd->af, pd->proto, r, saddr, + daddr, dport, naddr, nport, r->rpool.proxy_port[0], + r->rpool.proxy_port[1], sn)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: NAT proxy port allocation " + "(%u-%u) failed\n", + r->rpool.proxy_port[0], + r->rpool.proxy_port[1])); + return (NULL); + } + break; + case PF_BINAT: + switch (direction) { + case PF_OUT: + if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){ + if (pd->af == AF_INET) { + if (r->rpool.cur->addr.p.dyn-> + pfid_acnt4 < 1) + return (NULL); + PF_POOLMASK(naddr, + &r->rpool.cur->addr.p.dyn-> + pfid_addr4, + &r->rpool.cur->addr.p.dyn-> + pfid_mask4, + saddr, AF_INET); + } else { + if (r->rpool.cur->addr.p.dyn-> + pfid_acnt6 < 1) + return (NULL); + PF_POOLMASK(naddr, + &r->rpool.cur->addr.p.dyn-> + pfid_addr6, + &r->rpool.cur->addr.p.dyn-> + pfid_mask6, + saddr, AF_INET6); + } + } else + PF_POOLMASK(naddr, + &r->rpool.cur->addr.v.a.addr, + &r->rpool.cur->addr.v.a.mask, + saddr, pd->af); + break; + case PF_IN: + if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){ + if (pd->af == AF_INET) { + if (r->src.addr.p.dyn-> + pfid_acnt4 < 1) + return (NULL); + PF_POOLMASK(naddr, + &r->src.addr.p.dyn-> + pfid_addr4, + &r->src.addr.p.dyn-> + pfid_mask4, + daddr, AF_INET); + } else { + if (r->src.addr.p.dyn-> + pfid_acnt6 < 1) + return (NULL); + PF_POOLMASK(naddr, + &r->src.addr.p.dyn-> + pfid_addr6, + &r->src.addr.p.dyn-> + pfid_mask6, + daddr, AF_INET6); + } + } else + PF_POOLMASK(naddr, + &r->src.addr.v.a.addr, + &r->src.addr.v.a.mask, daddr, + pd->af); + break; + } + break; + case PF_RDR: { + if (pf_map_addr(r->af, r, saddr, naddr, NULL, sn)) + return (NULL); + + if (r->rpool.proxy_port[1]) { + u_int32_t tmp_nport; + + tmp_nport = ((ntohs(dport) - + ntohs(r->dst.port[0])) % + (r->rpool.proxy_port[1] - + r->rpool.proxy_port[0] + 1)) + + r->rpool.proxy_port[0]; + + /* wrap around if necessary */ + if (tmp_nport > 65535) + tmp_nport -= 65535; + *nport = htons((u_int16_t)tmp_nport); + } else if (r->rpool.proxy_port[0]) + *nport = htons(r->rpool.proxy_port[0]); + break; + } + default: + return (NULL); + } + } + + return (r); +} + +#ifdef SMP +struct netmsg_hashlookup { + struct lwkt_msg nm_lmsg; + struct inpcb **nm_pinp; + struct inpcbinfo *nm_pcbinfo; + struct pf_addr *nm_saddr; + struct pf_addr *nm_daddr; + uint16_t nm_sport; + uint16_t nm_dport; + sa_family_t nm_af; +}; + +static int +in_pcblookup_hash_handler(struct lwkt_msg *msg0) +{ + struct netmsg_hashlookup *msg = (struct netmsg_hashlookup *)msg0; + + if (msg->nm_af == AF_INET) + *msg->nm_pinp = in_pcblookup_hash(msg->nm_pcbinfo, + msg->nm_saddr->v4, msg->nm_sport, msg->nm_daddr->v4, + msg->nm_dport, INPLOOKUP_WILDCARD, NULL); +#ifdef INET6 + else + *msg->nm_pinp = in6_pcblookup_hash(msg->nm_pcbinfo, + &msg->nm_saddr->v6, msg->nm_sport, &msg->nm_daddr->v6, + msg->nm_dport, INPLOOKUP_WILDCARD, NULL); +#endif /* INET6 */ + lwkt_replymsg(&msg->nm_lmsg, 0); + return (EASYNC); +} +#endif /* SMP */ + +int +pf_socket_lookup(uid_t *uid, gid_t *gid, int direction, struct pf_pdesc *pd) +{ + struct pf_addr *saddr, *daddr; + u_int16_t sport, dport; + struct inpcbinfo *pi; + struct inpcb *inp; +#ifdef SMP + struct netmsg_hashlookup *msg = NULL; +#endif + int pi_cpu = 0; + + *uid = UID_MAX; + *gid = GID_MAX; + if (direction == PF_IN) { + saddr = pd->src; + daddr = pd->dst; + } else { + saddr = pd->dst; + daddr = pd->src; + } + switch (pd->proto) { + case IPPROTO_TCP: + sport = pd->hdr.tcp->th_sport; + dport = pd->hdr.tcp->th_dport; + + pi_cpu = tcp_addrcpu(saddr->v4.s_addr, sport, daddr->v4.s_addr, dport); + pi = &tcbinfo[pi_cpu]; +#ifdef SMP + /* + * Our netstack runs lockless on MP systems + * (only for TCP connections at the moment). + * + * As we are not allowed to read another CPU's tcbinfo, + * we have to ask that CPU via remote call to search the + * table for us. + * + * Prepare a msg iff data belongs to another CPU. + */ + if (pi_cpu != mycpu->gd_cpuid) { + msg = malloc(sizeof(*msg), M_LWKTMSG, M_INTWAIT); + lwkt_initmsg(&msg->nm_lmsg, &netisr_afree_rport, 0, + lwkt_cmd_func(in_pcblookup_hash_handler), + lwkt_cmd_op_none); + msg->nm_pinp = &inp; + msg->nm_pcbinfo = pi; + msg->nm_saddr = saddr; + msg->nm_sport = sport; + msg->nm_daddr = daddr; + msg->nm_dport = dport; + msg->nm_af = pd->af; + } +#endif /* SMP */ + break; + case IPPROTO_UDP: + sport = pd->hdr.udp->uh_sport; + dport = pd->hdr.udp->uh_dport; + pi = &udbinfo; + break; + default: + return (0); + } + if (direction != PF_IN) { + u_int16_t p; + + p = sport; + sport = dport; + dport = p; + } + switch (pd->af) { +#ifdef INET6 + case AF_INET6: +#ifdef SMP + /* + * Query other CPU, second part + * + * msg only gets initialized when: + * 1) packet is TCP + * 2) the info belongs to another CPU + * + * Use some switch/case magic to avoid code duplication. + */ + if (msg == NULL) +#endif /* SMP */ + { + inp = in6_pcblookup_hash(pi, &saddr->v6, sport, + &daddr->v6, dport, INPLOOKUP_WILDCARD, NULL); + + if (inp == NULL) + return (0); + break; + } + /* FALLTHROUGH if SMP and on other CPU */ +#endif /* INET6 */ + case AF_INET: +#ifdef SMP + if (msg != NULL) { + lwkt_sendmsg(tcp_cport(pi_cpu), &msg->nm_lmsg); + } else +#endif /* SMP */ + { + inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4, + dport, INPLOOKUP_WILDCARD, NULL); + } + if (inp == NULL) + return (0); + break; + + default: + return (0); + } + *uid = inp->inp_socket->so_cred->cr_uid; + *gid = inp->inp_socket->so_cred->cr_groups[0]; + return (1); +} + +u_int8_t +pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) +{ + int hlen; + u_int8_t hdr[60]; + u_int8_t *opt, optlen; + u_int8_t wscale = 0; + + hlen = th_off << 2; /* hlen <= sizeof(hdr) */ + if (hlen <= sizeof(struct tcphdr)) + return (0); + if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af)) + return (0); + opt = hdr + sizeof(struct tcphdr); + hlen -= sizeof(struct tcphdr); + while (hlen >= 3) { + switch (*opt) { + case TCPOPT_EOL: + case TCPOPT_NOP: + ++opt; + --hlen; + break; + case TCPOPT_WINDOW: + wscale = opt[2]; + if (wscale > TCP_MAX_WINSHIFT) + wscale = TCP_MAX_WINSHIFT; + wscale |= PF_WSCALE_FLAG; + /* FALLTHROUGH */ + default: + optlen = opt[1]; + if (optlen < 2) + optlen = 2; + hlen -= optlen; + opt += optlen; + break; + } + } + return (wscale); +} + +u_int16_t +pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) +{ + int hlen; + u_int8_t hdr[60]; + u_int8_t *opt, optlen; + u_int16_t mss = tcp_mssdflt; + + hlen = th_off << 2; /* hlen <= sizeof(hdr) */ + if (hlen <= sizeof(struct tcphdr)) + return (0); + if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af)) + return (0); + opt = hdr + sizeof(struct tcphdr); + hlen -= sizeof(struct tcphdr); + while (hlen >= TCPOLEN_MAXSEG) { + switch (*opt) { + case TCPOPT_EOL: + case TCPOPT_NOP: + ++opt; + --hlen; + break; + case TCPOPT_MAXSEG: + bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2); + /* FALLTHROUGH */ + default: + optlen = opt[1]; + if (optlen < 2) + optlen = 2; + hlen -= optlen; + opt += optlen; + break; + } + } + return (mss); +} + +u_int16_t +pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer) +{ +#ifdef INET + struct sockaddr_in *dst; + struct route ro; +#endif /* INET */ +#ifdef INET6 + struct sockaddr_in6 *dst6; + struct route_in6 ro6; +#endif /* INET6 */ + struct rtentry *rt = NULL; + int hlen = 0; + u_int16_t mss = tcp_mssdflt; + + switch (af) { +#ifdef INET + case AF_INET: + hlen = sizeof(struct ip); + bzero(&ro, sizeof(ro)); + dst = (struct sockaddr_in *)&ro.ro_dst; + dst->sin_family = AF_INET; + dst->sin_len = sizeof(*dst); + dst->sin_addr = addr->v4; + rtalloc_ign(&ro, (RTF_CLONING | RTF_PRCLONING)); + rt = ro.ro_rt; + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + hlen = sizeof(struct ip6_hdr); + bzero(&ro6, sizeof(ro6)); + dst6 = (struct sockaddr_in6 *)&ro6.ro_dst; + dst6->sin6_family = AF_INET6; + dst6->sin6_len = sizeof(*dst6); + dst6->sin6_addr = addr->v6; + rtalloc_ign((struct route *)&ro6, (RTF_CLONING | RTF_PRCLONING)); + rt = ro6.ro_rt; + break; +#endif /* INET6 */ + } + + if (rt && rt->rt_ifp) { + mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr); + mss = max(tcp_mssdflt, mss); + RTFREE(rt); + } + mss = min(mss, offer); + mss = max(mss, 64); /* sanity - at least max opt space */ + return (mss); +} + +void +pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr) +{ + struct pf_rule *r = s->rule.ptr; + + s->rt_kif = NULL; + if (!r->rt || r->rt == PF_FASTROUTE) + return; + switch (s->af) { +#ifdef INET + case AF_INET: + pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, + &s->nat_src_node); + s->rt_kif = r->rpool.cur->kif; + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, + &s->nat_src_node); + s->rt_kif = r->rpool.cur->kif; + break; +#endif /* INET6 */ + } +} + +int +pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, + struct pfi_kif *kif, struct mbuf *m, int off, void *h, + struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm) +{ + struct pf_rule *nr = NULL; + struct pf_addr *saddr = pd->src, *daddr = pd->dst; + struct tcphdr *th = pd->hdr.tcp; + u_int16_t bport, nport = 0; + sa_family_t af = pd->af; + int lookup = -1; + uid_t uid; + gid_t gid; + struct pf_rule *r, *a = NULL; + struct pf_ruleset *ruleset = NULL; + struct pf_src_node *nsn = NULL; + u_short reason; + int rewrite = 0; + int tag = -1; + u_int16_t mss = tcp_mssdflt; + + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); + + if (direction == PF_OUT) { + bport = nport = th->th_sport; + /* check outgoing packet for BINAT/NAT */ + if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn, + saddr, th->th_sport, daddr, th->th_dport, + &pd->naddr, &nport)) != NULL) { + PF_ACPY(&pd->baddr, saddr, af); + pf_change_ap(saddr, &th->th_sport, pd->ip_sum, + &th->th_sum, &pd->naddr, nport, 0, af); + rewrite++; + if (nr->natpass) + r = NULL; + pd->nat_rule = nr; + } + } else { + bport = nport = th->th_dport; + /* check incoming packet for BINAT/RDR */ + if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn, + saddr, th->th_sport, daddr, th->th_dport, + &pd->naddr, &nport)) != NULL) { + PF_ACPY(&pd->baddr, daddr, af); + pf_change_ap(daddr, &th->th_dport, pd->ip_sum, + &th->th_sum, &pd->naddr, nport, 0, af); + rewrite++; + if (nr->natpass) + r = NULL; + pd->nat_rule = nr; + } + } + + while (r != NULL) { + r->evaluations++; + if (r->kif != NULL && + (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + r = r->skip[PF_SKIP_IFP].ptr; + else if (r->direction && r->direction != direction) + r = r->skip[PF_SKIP_DIR].ptr; + else if (r->af && r->af != af) + r = r->skip[PF_SKIP_AF].ptr; + else if (r->proto && r->proto != IPPROTO_TCP) + r = r->skip[PF_SKIP_PROTO].ptr; + else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not)) + r = r->skip[PF_SKIP_SRC_ADDR].ptr; + else if (r->src.port_op && !pf_match_port(r->src.port_op, + r->src.port[0], r->src.port[1], th->th_sport)) + r = r->skip[PF_SKIP_SRC_PORT].ptr; + else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not)) + r = r->skip[PF_SKIP_DST_ADDR].ptr; + else if (r->dst.port_op && !pf_match_port(r->dst.port_op, + r->dst.port[0], r->dst.port[1], th->th_dport)) + r = r->skip[PF_SKIP_DST_PORT].ptr; + else if (r->tos && !(r->tos & pd->tos)) + r = TAILQ_NEXT(r, entries); + else if (r->rule_flag & PFRULE_FRAGMENT) + r = TAILQ_NEXT(r, entries); + else if ((r->flagset & th->th_flags) != r->flags) + r = TAILQ_NEXT(r, entries); + else if (r->uid.op && (lookup != -1 || (lookup = + pf_socket_lookup(&uid, &gid, direction, pd), 1)) && + !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], + uid)) + r = TAILQ_NEXT(r, entries); + else if (r->gid.op && (lookup != -1 || (lookup = + pf_socket_lookup(&uid, &gid, direction, pd), 1)) && + !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], + gid)) + r = TAILQ_NEXT(r, entries); + else if (r->match_tag && !pf_match_tag(m, r, nr, &tag)) + r = TAILQ_NEXT(r, entries); + else if (r->anchorname[0] && r->anchor == NULL) + r = TAILQ_NEXT(r, entries); + else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match( + pf_osfp_fingerprint(pd, m, off, th), r->os_fingerprint)) + r = TAILQ_NEXT(r, entries); + else { + if (r->tag) + tag = r->tag; + if (r->anchor == NULL) { + *rm = r; + *am = a; + *rsm = ruleset; + if ((*rm)->quick) + break; + r = TAILQ_NEXT(r, entries); + } else + PF_STEP_INTO_ANCHOR(r, a, ruleset, + PF_RULESET_FILTER); + } + if (r == NULL && a != NULL) + PF_STEP_OUT_OF_ANCHOR(r, a, ruleset, + PF_RULESET_FILTER); + } + r = *rm; + a = *am; + ruleset = *rsm; + + REASON_SET(&reason, PFRES_MATCH); + + if (r->log) { + if (rewrite) + m_copyback(m, off, sizeof(*th), (caddr_t)th); + PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset); + } + + if ((r->action == PF_DROP) && + ((r->rule_flag & PFRULE_RETURNRST) || + (r->rule_flag & PFRULE_RETURNICMP) || + (r->rule_flag & PFRULE_RETURN))) { + /* undo NAT changes, if they have taken place */ + if (nr != NULL) { + if (direction == PF_OUT) { + pf_change_ap(saddr, &th->th_sport, pd->ip_sum, + &th->th_sum, &pd->baddr, bport, 0, af); + rewrite++; + } else { + pf_change_ap(daddr, &th->th_dport, pd->ip_sum, + &th->th_sum, &pd->baddr, bport, 0, af); + rewrite++; + } + } + if (((r->rule_flag & PFRULE_RETURNRST) || + (r->rule_flag & PFRULE_RETURN)) && + !(th->th_flags & TH_RST)) { + u_int32_t ack = ntohl(th->th_seq) + pd->p_len; + + if (th->th_flags & TH_SYN) + ack++; + if (th->th_flags & TH_FIN) + ack++; + pf_send_tcp(r, af, pd->dst, + pd->src, th->th_dport, th->th_sport, + ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, + r->return_ttl); + } else if ((af == AF_INET) && r->return_icmp) + pf_send_icmp(m, r->return_icmp >> 8, + r->return_icmp & 255, af, r); + else if ((af == AF_INET6) && r->return_icmp6) + pf_send_icmp(m, r->return_icmp6 >> 8, + r->return_icmp6 & 255, af, r); + } + + if (r->action == PF_DROP) + return (PF_DROP); + + pf_tag_packet(m, tag); + + if (r->keep_state || nr != NULL || + (pd->flags & PFDESC_TCP_NORM)) { + /* create new state */ + u_int16_t len; + struct pf_state *s = NULL; + struct pf_src_node *sn = NULL; + + len = pd->tot_len - off - (th->th_off << 2); + + /* check maximums */ + if (r->max_states && (r->states >= r->max_states)) + goto cleanup; + /* src node for flter rule */ + if ((r->rule_flag & PFRULE_SRCTRACK || + r->rpool.opts & PF_POOL_STICKYADDR) && + pf_insert_src_node(&sn, r, saddr, af) != 0) + goto cleanup; + /* src node for translation rule */ + if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && + ((direction == PF_OUT && + pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) || + (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) + goto cleanup; + s = pool_get(&pf_state_pl, PR_NOWAIT); + if (s == NULL) { +cleanup: + if (sn != NULL && sn->states == 0 && sn->expire == 0) { + RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); + pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; + pf_status.src_nodes--; + pool_put(&pf_src_tree_pl, sn); + } + if (nsn != sn && nsn != NULL && nsn->states == 0 && + nsn->expire == 0) { + RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn); + pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; + pf_status.src_nodes--; + pool_put(&pf_src_tree_pl, nsn); + } + REASON_SET(&reason, PFRES_MEMORY); + return (PF_DROP); + } + bzero(s, sizeof(*s)); + r->states++; + if (a != NULL) + a->states++; + s->rule.ptr = r; + s->nat_rule.ptr = nr; + if (s->nat_rule.ptr != NULL) + s->nat_rule.ptr->states++; + s->anchor.ptr = a; + s->allow_opts = r->allow_opts; + s->log = r->log & 2; + s->proto = IPPROTO_TCP; + s->direction = direction; + s->af = af; + if (direction == PF_OUT) { + PF_ACPY(&s->gwy.addr, saddr, af); + s->gwy.port = th->th_sport; /* sport */ + PF_ACPY(&s->ext.addr, daddr, af); + s->ext.port = th->th_dport; + if (nr != NULL) { + PF_ACPY(&s->lan.addr, &pd->baddr, af); + s->lan.port = bport; + } else { + PF_ACPY(&s->lan.addr, &s->gwy.addr, af); + s->lan.port = s->gwy.port; + } + } else { + PF_ACPY(&s->lan.addr, daddr, af); + s->lan.port = th->th_dport; + PF_ACPY(&s->ext.addr, saddr, af); + s->ext.port = th->th_sport; + if (nr != NULL) { + PF_ACPY(&s->gwy.addr, &pd->baddr, af); + s->gwy.port = bport; + } else { + PF_ACPY(&s->gwy.addr, &s->lan.addr, af); + s->gwy.port = s->lan.port; + } + } + + s->src.seqlo = ntohl(th->th_seq); + s->src.seqhi = s->src.seqlo + len + 1; + if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && + r->keep_state == PF_STATE_MODULATE) { + /* Generate sequence number modulator */ + while ((s->src.seqdiff = arc4random()) == 0) + ; + pf_change_a(&th->th_seq, &th->th_sum, + htonl(s->src.seqlo + s->src.seqdiff), 0); + rewrite = 1; + } else + s->src.seqdiff = 0; + if (th->th_flags & TH_SYN) { + s->src.seqhi++; + s->src.wscale = pf_get_wscale(m, off, th->th_off, af); + } + s->src.max_win = MAX(ntohs(th->th_win), 1); + if (s->src.wscale & PF_WSCALE_MASK) { + /* Remove scale factor from initial window */ + int win = s->src.max_win; + win += 1 << (s->src.wscale & PF_WSCALE_MASK); + s->src.max_win = (win - 1) >> + (s->src.wscale & PF_WSCALE_MASK); + } + if (th->th_flags & TH_FIN) + s->src.seqhi++; + s->dst.seqhi = 1; + s->dst.max_win = 1; + s->src.state = TCPS_SYN_SENT; + s->dst.state = TCPS_CLOSED; + s->creation = time_second; + s->expire = time_second; + s->timeout = PFTM_TCP_FIRST_PACKET; + pf_set_rt_ifp(s, saddr); + if (sn != NULL) { + s->src_node = sn; + s->src_node->states++; + } + if (nsn != NULL) { + PF_ACPY(&nsn->raddr, &pd->naddr, af); + s->nat_src_node = nsn; + s->nat_src_node->states++; + } + if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m, + off, pd, th, &s->src, &s->dst)) { + REASON_SET(&reason, PFRES_MEMORY); + pf_src_tree_remove_state(s); + pool_put(&pf_state_pl, s); + return (PF_DROP); + } + if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub && + pf_normalize_tcp_stateful(m, off, pd, &reason, th, &s->src, + &s->dst, &rewrite)) { + pf_normalize_tcp_cleanup(s); + pf_src_tree_remove_state(s); + pool_put(&pf_state_pl, s); + return (PF_DROP); + } + if (pf_insert_state(BOUND_IFACE(r, kif), s)) { + pf_normalize_tcp_cleanup(s); + REASON_SET(&reason, PFRES_MEMORY); + pf_src_tree_remove_state(s); + pool_put(&pf_state_pl, s); + return (PF_DROP); + } else + *sm = s; + if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && + r->keep_state == PF_STATE_SYNPROXY) { + s->src.state = PF_TCPS_PROXY_SRC; + if (nr != NULL) { + if (direction == PF_OUT) { + pf_change_ap(saddr, &th->th_sport, + pd->ip_sum, &th->th_sum, &pd->baddr, + bport, 0, af); + } else { + pf_change_ap(daddr, &th->th_dport, + pd->ip_sum, &th->th_sum, &pd->baddr, + bport, 0, af); + } + } + s->src.seqhi = arc4random(); + /* Find mss option */ + mss = pf_get_mss(m, off, th->th_off, af); + mss = pf_calc_mss(saddr, af, mss); + mss = pf_calc_mss(daddr, af, mss); + s->src.mss = mss; + pf_send_tcp(r, af, daddr, saddr, th->th_dport, + th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, + TH_SYN|TH_ACK, 0, s->src.mss, 0); + return (PF_SYNPROXY_DROP); + } + } + + /* copy back packet headers if we performed NAT operations */ + if (rewrite) + m_copyback(m, off, sizeof(*th), (caddr_t)th); + + return (PF_PASS); +} + +int +pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, + struct pfi_kif *kif, struct mbuf *m, int off, void *h, + struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm) +{ + struct pf_rule *nr = NULL; + struct pf_addr *saddr = pd->src, *daddr = pd->dst; + struct udphdr *uh = pd->hdr.udp; + u_int16_t bport, nport = 0; + sa_family_t af = pd->af; + int lookup = -1; + uid_t uid; + gid_t gid; + struct pf_rule *r, *a = NULL; + struct pf_ruleset *ruleset = NULL; + struct pf_src_node *nsn = NULL; + u_short reason; + int rewrite = 0; + int tag = -1; + + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); + + if (direction == PF_OUT) { + bport = nport = uh->uh_sport; + /* check outgoing packet for BINAT/NAT */ + if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn, + saddr, uh->uh_sport, daddr, uh->uh_dport, + &pd->naddr, &nport)) != NULL) { + PF_ACPY(&pd->baddr, saddr, af); + pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum, + &uh->uh_sum, &pd->naddr, nport, 1, af); + rewrite++; + if (nr->natpass) + r = NULL; + pd->nat_rule = nr; + } + } else { + bport = nport = uh->uh_dport; + /* check incoming packet for BINAT/RDR */ + if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn, + saddr, uh->uh_sport, daddr, uh->uh_dport, &pd->naddr, + &nport)) != NULL) { + PF_ACPY(&pd->baddr, daddr, af); + pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum, + &uh->uh_sum, &pd->naddr, nport, 1, af); + rewrite++; + if (nr->natpass) + r = NULL; + pd->nat_rule = nr; + } + } + + while (r != NULL) { + r->evaluations++; + if (r->kif != NULL && + (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + r = r->skip[PF_SKIP_IFP].ptr; + else if (r->direction && r->direction != direction) + r = r->skip[PF_SKIP_DIR].ptr; + else if (r->af && r->af != af) + r = r->skip[PF_SKIP_AF].ptr; + else if (r->proto && r->proto != IPPROTO_UDP) + r = r->skip[PF_SKIP_PROTO].ptr; + else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not)) + r = r->skip[PF_SKIP_SRC_ADDR].ptr; + else if (r->src.port_op && !pf_match_port(r->src.port_op, + r->src.port[0], r->src.port[1], uh->uh_sport)) + r = r->skip[PF_SKIP_SRC_PORT].ptr; + else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not)) + r = r->skip[PF_SKIP_DST_ADDR].ptr; + else if (r->dst.port_op && !pf_match_port(r->dst.port_op, + r->dst.port[0], r->dst.port[1], uh->uh_dport)) + r = r->skip[PF_SKIP_DST_PORT].ptr; + else if (r->tos && !(r->tos & pd->tos)) + r = TAILQ_NEXT(r, entries); + else if (r->rule_flag & PFRULE_FRAGMENT) + r = TAILQ_NEXT(r, entries); + else if (r->uid.op && (lookup != -1 || (lookup = + pf_socket_lookup(&uid, &gid, direction, pd), 1)) && + !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], + uid)) + r = TAILQ_NEXT(r, entries); + else if (r->gid.op && (lookup != -1 || (lookup = + pf_socket_lookup(&uid, &gid, direction, pd), 1)) && + !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], + gid)) + r = TAILQ_NEXT(r, entries); + else if (r->match_tag && !pf_match_tag(m, r, nr, &tag)) + r = TAILQ_NEXT(r, entries); + else if (r->anchorname[0] && r->anchor == NULL) + r = TAILQ_NEXT(r, entries); + else if (r->os_fingerprint != PF_OSFP_ANY) + r = TAILQ_NEXT(r, entries); + else { + if (r->tag) + tag = r->tag; + if (r->anchor == NULL) { + *rm = r; + *am = a; + *rsm = ruleset; + if ((*rm)->quick) + break; + r = TAILQ_NEXT(r, entries); + } else + PF_STEP_INTO_ANCHOR(r, a, ruleset, + PF_RULESET_FILTER); + } + if (r == NULL && a != NULL) + PF_STEP_OUT_OF_ANCHOR(r, a, ruleset, + PF_RULESET_FILTER); + } + r = *rm; + a = *am; + ruleset = *rsm; + + REASON_SET(&reason, PFRES_MATCH); + + if (r->log) { + if (rewrite) + m_copyback(m, off, sizeof(*uh), (caddr_t)uh); + PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset); + } + + if ((r->action == PF_DROP) && + ((r->rule_flag & PFRULE_RETURNICMP) || + (r->rule_flag & PFRULE_RETURN))) { + /* undo NAT changes, if they have taken place */ + if (nr != NULL) { + if (direction == PF_OUT) { + pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum, + &uh->uh_sum, &pd->baddr, bport, 1, af); + rewrite++; + } else { + pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum, + &uh->uh_sum, &pd->baddr, bport, 1, af); + rewrite++; + } + } + if ((af == AF_INET) && r->return_icmp) + pf_send_icmp(m, r->return_icmp >> 8, + r->return_icmp & 255, af, r); + else if ((af == AF_INET6) && r->return_icmp6) + pf_send_icmp(m, r->return_icmp6 >> 8, + r->return_icmp6 & 255, af, r); + } + + if (r->action == PF_DROP) + return (PF_DROP); + + pf_tag_packet(m, tag); + + if (r->keep_state || nr != NULL) { + /* create new state */ + struct pf_state *s = NULL; + struct pf_src_node *sn = NULL; + + /* check maximums */ + if (r->max_states && (r->states >= r->max_states)) + goto cleanup; + /* src node for flter rule */ + if ((r->rule_flag & PFRULE_SRCTRACK || + r->rpool.opts & PF_POOL_STICKYADDR) && + pf_insert_src_node(&sn, r, saddr, af) != 0) + goto cleanup; + /* src node for translation rule */ + if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && + ((direction == PF_OUT && + pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) || + (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) + goto cleanup; + s = pool_get(&pf_state_pl, PR_NOWAIT); + if (s == NULL) { +cleanup: + if (sn != NULL && sn->states == 0 && sn->expire == 0) { + RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); + pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; + pf_status.src_nodes--; + pool_put(&pf_src_tree_pl, sn); + } + if (nsn != sn && nsn != NULL && nsn->states == 0 && + nsn->expire == 0) { + RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn); + pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; + pf_status.src_nodes--; + pool_put(&pf_src_tree_pl, nsn); + } + REASON_SET(&reason, PFRES_MEMORY); + return (PF_DROP); + } + bzero(s, sizeof(*s)); + r->states++; + if (a != NULL) + a->states++; + s->rule.ptr = r; + s->nat_rule.ptr = nr; + if (s->nat_rule.ptr != NULL) + s->nat_rule.ptr->states++; + s->anchor.ptr = a; + s->allow_opts = r->allow_opts; + s->log = r->log & 2; + s->proto = IPPROTO_UDP; + s->direction = direction; + s->af = af; + if (direction == PF_OUT) { + PF_ACPY(&s->gwy.addr, saddr, af); + s->gwy.port = uh->uh_sport; + PF_ACPY(&s->ext.addr, daddr, af); + s->ext.port = uh->uh_dport; + if (nr != NULL) { + PF_ACPY(&s->lan.addr, &pd->baddr, af); + s->lan.port = bport; + } else { + PF_ACPY(&s->lan.addr, &s->gwy.addr, af); + s->lan.port = s->gwy.port; + } + } else { + PF_ACPY(&s->lan.addr, daddr, af); + s->lan.port = uh->uh_dport; + PF_ACPY(&s->ext.addr, saddr, af); + s->ext.port = uh->uh_sport; + if (nr != NULL) { + PF_ACPY(&s->gwy.addr, &pd->baddr, af); + s->gwy.port = bport; + } else { + PF_ACPY(&s->gwy.addr, &s->lan.addr, af); + s->gwy.port = s->lan.port; + } + } + s->src.state = PFUDPS_SINGLE; + s->dst.state = PFUDPS_NO_TRAFFIC; + s->creation = time_second; + s->expire = time_second; + s->timeout = PFTM_UDP_FIRST_PACKET; + pf_set_rt_ifp(s, saddr); + if (sn != NULL) { + s->src_node = sn; + s->src_node->states++; + } + if (nsn != NULL) { + PF_ACPY(&nsn->raddr, &pd->naddr, af); + s->nat_src_node = nsn; + s->nat_src_node->states++; + } + if (pf_insert_state(BOUND_IFACE(r, kif), s)) { + REASON_SET(&reason, PFRES_MEMORY); + pf_src_tree_remove_state(s); + pool_put(&pf_state_pl, s); + return (PF_DROP); + } else + *sm = s; + } + + /* copy back packet headers if we performed NAT operations */ + if (rewrite) + m_copyback(m, off, sizeof(*uh), (caddr_t)uh); + + return (PF_PASS); +} + +int +pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, + struct pfi_kif *kif, struct mbuf *m, int off, void *h, + struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm) +{ + struct pf_rule *nr = NULL; + struct pf_addr *saddr = pd->src, *daddr = pd->dst; + struct pf_rule *r, *a = NULL; + struct pf_ruleset *ruleset = NULL; + struct pf_src_node *nsn = NULL; + u_short reason; + u_int16_t icmpid = 0; + sa_family_t af = pd->af; + u_int8_t icmptype = 0, icmpcode = 0; + int state_icmp = 0; + int tag = -1; +#ifdef INET6 + int rewrite = 0; +#endif /* INET6 */ + + switch (pd->proto) { +#ifdef INET + case IPPROTO_ICMP: + icmptype = pd->hdr.icmp->icmp_type; + icmpcode = pd->hdr.icmp->icmp_code; + icmpid = pd->hdr.icmp->icmp_id; + + if (icmptype == ICMP_UNREACH || + icmptype == ICMP_SOURCEQUENCH || + icmptype == ICMP_REDIRECT || + icmptype == ICMP_TIMXCEED || + icmptype == ICMP_PARAMPROB) + state_icmp++; + break; +#endif /* INET */ +#ifdef INET6 + case IPPROTO_ICMPV6: + icmptype = pd->hdr.icmp6->icmp6_type; + icmpcode = pd->hdr.icmp6->icmp6_code; + icmpid = pd->hdr.icmp6->icmp6_id; + + if (icmptype == ICMP6_DST_UNREACH || + icmptype == ICMP6_PACKET_TOO_BIG || + icmptype == ICMP6_TIME_EXCEEDED || + icmptype == ICMP6_PARAM_PROB) + state_icmp++; + break; +#endif /* INET6 */ + } + + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); + + if (direction == PF_OUT) { + /* check outgoing packet for BINAT/NAT */ + if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn, + saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) { + PF_ACPY(&pd->baddr, saddr, af); + switch (af) { +#ifdef INET + case AF_INET: + pf_change_a(&saddr->v4.s_addr, pd->ip_sum, + pd->naddr.v4.s_addr, 0); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum, + &pd->naddr, 0); + rewrite++; + break; +#endif /* INET6 */ + } + if (nr->natpass) + r = NULL; + pd->nat_rule = nr; + } + } else { + /* check incoming packet for BINAT/RDR */ + if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn, + saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) { + PF_ACPY(&pd->baddr, daddr, af); + switch (af) { +#ifdef INET + case AF_INET: + pf_change_a(&daddr->v4.s_addr, + pd->ip_sum, pd->naddr.v4.s_addr, 0); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum, + &pd->naddr, 0); + rewrite++; + break; +#endif /* INET6 */ + } + if (nr->natpass) + r = NULL; + pd->nat_rule = nr; + } + } + + while (r != NULL) { + r->evaluations++; + if (r->kif != NULL && + (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + r = r->skip[PF_SKIP_IFP].ptr; + else if (r->direction && r->direction != direction) + r = r->skip[PF_SKIP_DIR].ptr; + else if (r->af && r->af != af) + r = r->skip[PF_SKIP_AF].ptr; + else if (r->proto && r->proto != pd->proto) + r = r->skip[PF_SKIP_PROTO].ptr; + else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not)) + r = r->skip[PF_SKIP_SRC_ADDR].ptr; + else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not)) + r = r->skip[PF_SKIP_DST_ADDR].ptr; + else if (r->type && r->type != icmptype + 1) + r = TAILQ_NEXT(r, entries); + else if (r->code && r->code != icmpcode + 1) + r = TAILQ_NEXT(r, entries); + else if (r->tos && !(r->tos & pd->tos)) + r = TAILQ_NEXT(r, entries); + else if (r->rule_flag & PFRULE_FRAGMENT) + r = TAILQ_NEXT(r, entries); + else if (r->match_tag && !pf_match_tag(m, r, nr, &tag)) + r = TAILQ_NEXT(r, entries); + else if (r->anchorname[0] && r->anchor == NULL) + r = TAILQ_NEXT(r, entries); + else if (r->os_fingerprint != PF_OSFP_ANY) + r = TAILQ_NEXT(r, entries); + else { + if (r->tag) + tag = r->tag; + if (r->anchor == NULL) { + *rm = r; + *am = a; + *rsm = ruleset; + if ((*rm)->quick) + break; + r = TAILQ_NEXT(r, entries); + } else + PF_STEP_INTO_ANCHOR(r, a, ruleset, + PF_RULESET_FILTER); + } + if (r == NULL && a != NULL) + PF_STEP_OUT_OF_ANCHOR(r, a, ruleset, + PF_RULESET_FILTER); + } + r = *rm; + a = *am; + ruleset = *rsm; + + REASON_SET(&reason, PFRES_MATCH); + + if (r->log) { +#ifdef INET6 + if (rewrite) + m_copyback(m, off, sizeof(struct icmp6_hdr), + (caddr_t)pd->hdr.icmp6); +#endif /* INET6 */ + PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset); + } + + if (r->action != PF_PASS) + return (PF_DROP); + + pf_tag_packet(m, tag); + + if (!state_icmp && (r->keep_state || nr != NULL)) { + /* create new state */ + struct pf_state *s = NULL; + struct pf_src_node *sn = NULL; + + /* check maximums */ + if (r->max_states && (r->states >= r->max_states)) + goto cleanup; + /* src node for flter rule */ + if ((r->rule_flag & PFRULE_SRCTRACK || + r->rpool.opts & PF_POOL_STICKYADDR) && + pf_insert_src_node(&sn, r, saddr, af) != 0) + goto cleanup; + /* src node for translation rule */ + if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && + ((direction == PF_OUT && + pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) || + (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) + goto cleanup; + s = pool_get(&pf_state_pl, PR_NOWAIT); + if (s == NULL) { +cleanup: + if (sn != NULL && sn->states == 0 && sn->expire == 0) { + RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); + pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; + pf_status.src_nodes--; + pool_put(&pf_src_tree_pl, sn); + } + if (nsn != sn && nsn != NULL && nsn->states == 0 && + nsn->expire == 0) { + RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn); + pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; + pf_status.src_nodes--; + pool_put(&pf_src_tree_pl, nsn); + } + REASON_SET(&reason, PFRES_MEMORY); + return (PF_DROP); + } + bzero(s, sizeof(*s)); + r->states++; + if (a != NULL) + a->states++; + s->rule.ptr = r; + s->nat_rule.ptr = nr; + if (s->nat_rule.ptr != NULL) + s->nat_rule.ptr->states++; + s->anchor.ptr = a; + s->allow_opts = r->allow_opts; + s->log = r->log & 2; + s->proto = pd->proto; + s->direction = direction; + s->af = af; + if (direction == PF_OUT) { + PF_ACPY(&s->gwy.addr, saddr, af); + s->gwy.port = icmpid; + PF_ACPY(&s->ext.addr, daddr, af); + s->ext.port = icmpid; + if (nr != NULL) + PF_ACPY(&s->lan.addr, &pd->baddr, af); + else + PF_ACPY(&s->lan.addr, &s->gwy.addr, af); + s->lan.port = icmpid; + } else { + PF_ACPY(&s->lan.addr, daddr, af); + s->lan.port = icmpid; + PF_ACPY(&s->ext.addr, saddr, af); + s->ext.port = icmpid; + if (nr != NULL) + PF_ACPY(&s->gwy.addr, &pd->baddr, af); + else + PF_ACPY(&s->gwy.addr, &s->lan.addr, af); + s->gwy.port = icmpid; + } + s->creation = time_second; + s->expire = time_second; + s->timeout = PFTM_ICMP_FIRST_PACKET; + pf_set_rt_ifp(s, saddr); + if (sn != NULL) { + s->src_node = sn; + s->src_node->states++; + } + if (nsn != NULL) { + PF_ACPY(&nsn->raddr, &pd->naddr, af); + s->nat_src_node = nsn; + s->nat_src_node->states++; + } + if (pf_insert_state(BOUND_IFACE(r, kif), s)) { + REASON_SET(&reason, PFRES_MEMORY); + pf_src_tree_remove_state(s); + pool_put(&pf_state_pl, s); + return (PF_DROP); + } else + *sm = s; + } + +#ifdef INET6 + /* copy back packet headers if we performed IPv6 NAT operations */ + if (rewrite) + m_copyback(m, off, sizeof(struct icmp6_hdr), + (caddr_t)pd->hdr.icmp6); +#endif /* INET6 */ + + return (PF_PASS); +} + +int +pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, + struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd, + struct pf_rule **am, struct pf_ruleset **rsm) +{ + struct pf_rule *nr = NULL; + struct pf_rule *r, *a = NULL; + struct pf_ruleset *ruleset = NULL; + struct pf_src_node *nsn = NULL; + struct pf_addr *saddr = pd->src, *daddr = pd->dst; + sa_family_t af = pd->af; + u_short reason; + int tag = -1; + + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); + + if (direction == PF_OUT) { + /* check outgoing packet for BINAT/NAT */ + if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn, + saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) { + PF_ACPY(&pd->baddr, saddr, af); + switch (af) { +#ifdef INET + case AF_INET: + pf_change_a(&saddr->v4.s_addr, pd->ip_sum, + pd->naddr.v4.s_addr, 0); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + PF_ACPY(saddr, &pd->naddr, af); + break; +#endif /* INET6 */ + } + if (nr->natpass) + r = NULL; + pd->nat_rule = nr; + } + } else { + /* check incoming packet for BINAT/RDR */ + if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn, + saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) { + PF_ACPY(&pd->baddr, daddr, af); + switch (af) { +#ifdef INET + case AF_INET: + pf_change_a(&daddr->v4.s_addr, + pd->ip_sum, pd->naddr.v4.s_addr, 0); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + PF_ACPY(daddr, &pd->naddr, af); + break; +#endif /* INET6 */ + } + if (nr->natpass) + r = NULL; + pd->nat_rule = nr; + } + } + + while (r != NULL) { + r->evaluations++; + if (r->kif != NULL && + (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + r = r->skip[PF_SKIP_IFP].ptr; + else if (r->direction && r->direction != direction) + r = r->skip[PF_SKIP_DIR].ptr; + else if (r->af && r->af != af) + r = r->skip[PF_SKIP_AF].ptr; + else if (r->proto && r->proto != pd->proto) + r = r->skip[PF_SKIP_PROTO].ptr; + else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.not)) + r = r->skip[PF_SKIP_SRC_ADDR].ptr; + else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.not)) + r = r->skip[PF_SKIP_DST_ADDR].ptr; + else if (r->tos && !(r->tos & pd->tos)) + r = TAILQ_NEXT(r, entries); + else if (r->rule_flag & PFRULE_FRAGMENT) + r = TAILQ_NEXT(r, entries); + else if (r->match_tag && !pf_match_tag(m, r, nr, &tag)) + r = TAILQ_NEXT(r, entries); + else if (r->anchorname[0] && r->anchor == NULL) + r = TAILQ_NEXT(r, entries); + else if (r->os_fingerprint != PF_OSFP_ANY) + r = TAILQ_NEXT(r, entries); + else { + if (r->tag) + tag = r->tag; + if (r->anchor == NULL) { + *rm = r; + *am = a; + *rsm = ruleset; + if ((*rm)->quick) + break; + r = TAILQ_NEXT(r, entries); + } else + PF_STEP_INTO_ANCHOR(r, a, ruleset, + PF_RULESET_FILTER); + } + if (r == NULL && a != NULL) + PF_STEP_OUT_OF_ANCHOR(r, a, ruleset, + PF_RULESET_FILTER); + } + r = *rm; + a = *am; + ruleset = *rsm; + + REASON_SET(&reason, PFRES_MATCH); + + if (r->log) + PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset); + + if ((r->action == PF_DROP) && + ((r->rule_flag & PFRULE_RETURNICMP) || + (r->rule_flag & PFRULE_RETURN))) { + struct pf_addr *a = NULL; + + if (nr != NULL) { + if (direction == PF_OUT) + a = saddr; + else + a = daddr; + } + if (a != NULL) { + switch (af) { +#ifdef INET + case AF_INET: + pf_change_a(&a->v4.s_addr, pd->ip_sum, + pd->baddr.v4.s_addr, 0); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + PF_ACPY(a, &pd->baddr, af); + break; +#endif /* INET6 */ + } + } + if ((af == AF_INET) && r->return_icmp) + pf_send_icmp(m, r->return_icmp >> 8, + r->return_icmp & 255, af, r); + else if ((af == AF_INET6) && r->return_icmp6) + pf_send_icmp(m, r->return_icmp6 >> 8, + r->return_icmp6 & 255, af, r); + } + + if (r->action != PF_PASS) + return (PF_DROP); + + pf_tag_packet(m, tag); + + if (r->keep_state || nr != NULL) { + /* create new state */ + struct pf_state *s = NULL; + struct pf_src_node *sn = NULL; + + /* check maximums */ + if (r->max_states && (r->states >= r->max_states)) + goto cleanup; + /* src node for flter rule */ + if ((r->rule_flag & PFRULE_SRCTRACK || + r->rpool.opts & PF_POOL_STICKYADDR) && + pf_insert_src_node(&sn, r, saddr, af) != 0) + goto cleanup; + /* src node for translation rule */ + if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && + ((direction == PF_OUT && + pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) || + (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) + goto cleanup; + s = pool_get(&pf_state_pl, PR_NOWAIT); + if (s == NULL) { +cleanup: + if (sn != NULL && sn->states == 0 && sn->expire == 0) { + RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); + pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; + pf_status.src_nodes--; + pool_put(&pf_src_tree_pl, sn); + } + if (nsn != sn && nsn != NULL && nsn->states == 0 && + nsn->expire == 0) { + RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn); + pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; + pf_status.src_nodes--; + pool_put(&pf_src_tree_pl, nsn); + } + REASON_SET(&reason, PFRES_MEMORY); + return (PF_DROP); + } + bzero(s, sizeof(*s)); + r->states++; + if (a != NULL) + a->states++; + s->rule.ptr = r; + s->nat_rule.ptr = nr; + if (s->nat_rule.ptr != NULL) + s->nat_rule.ptr->states++; + s->anchor.ptr = a; + s->allow_opts = r->allow_opts; + s->log = r->log & 2; + s->proto = pd->proto; + s->direction = direction; + s->af = af; + if (direction == PF_OUT) { + PF_ACPY(&s->gwy.addr, saddr, af); + PF_ACPY(&s->ext.addr, daddr, af); + if (nr != NULL) + PF_ACPY(&s->lan.addr, &pd->baddr, af); + else + PF_ACPY(&s->lan.addr, &s->gwy.addr, af); + } else { + PF_ACPY(&s->lan.addr, daddr, af); + PF_ACPY(&s->ext.addr, saddr, af); + if (nr != NULL) + PF_ACPY(&s->gwy.addr, &pd->baddr, af); + else + PF_ACPY(&s->gwy.addr, &s->lan.addr, af); + } + s->src.state = PFOTHERS_SINGLE; + s->dst.state = PFOTHERS_NO_TRAFFIC; + s->creation = time_second; + s->expire = time_second; + s->timeout = PFTM_OTHER_FIRST_PACKET; + pf_set_rt_ifp(s, saddr); + if (sn != NULL) { + s->src_node = sn; + s->src_node->states++; + } + if (nsn != NULL) { + PF_ACPY(&nsn->raddr, &pd->naddr, af); + s->nat_src_node = nsn; + s->nat_src_node->states++; + } + if (pf_insert_state(BOUND_IFACE(r, kif), s)) { + REASON_SET(&reason, PFRES_MEMORY); + pf_src_tree_remove_state(s); + pool_put(&pf_state_pl, s); + return (PF_DROP); + } else + *sm = s; + } + + return (PF_PASS); +} + +int +pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, + struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am, + struct pf_ruleset **rsm) +{ + struct pf_rule *r, *a = NULL; + struct pf_ruleset *ruleset = NULL; + sa_family_t af = pd->af; + u_short reason; + int tag = -1; + + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); + while (r != NULL) { + r->evaluations++; + if (r->kif != NULL && + (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + r = r->skip[PF_SKIP_IFP].ptr; + else if (r->direction && r->direction != direction) + r = r->skip[PF_SKIP_DIR].ptr; + else if (r->af && r->af != af) + r = r->skip[PF_SKIP_AF].ptr; + else if (r->proto && r->proto != pd->proto) + r = r->skip[PF_SKIP_PROTO].ptr; + else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.not)) + r = r->skip[PF_SKIP_SRC_ADDR].ptr; + else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.not)) + r = r->skip[PF_SKIP_DST_ADDR].ptr; + else if (r->tos && !(r->tos & pd->tos)) + r = TAILQ_NEXT(r, entries); + else if (r->src.port_op || r->dst.port_op || + r->flagset || r->type || r->code || + r->os_fingerprint != PF_OSFP_ANY) + r = TAILQ_NEXT(r, entries); + else if (r->match_tag && !pf_match_tag(m, r, NULL, &tag)) + r = TAILQ_NEXT(r, entries); + else if (r->anchorname[0] && r->anchor == NULL) + r = TAILQ_NEXT(r, entries); + else { + if (r->anchor == NULL) { + *rm = r; + *am = a; + *rsm = ruleset; + if ((*rm)->quick) + break; + r = TAILQ_NEXT(r, entries); + } else + PF_STEP_INTO_ANCHOR(r, a, ruleset, + PF_RULESET_FILTER); + } + if (r == NULL && a != NULL) + PF_STEP_OUT_OF_ANCHOR(r, a, ruleset, + PF_RULESET_FILTER); + } + r = *rm; + a = *am; + ruleset = *rsm; + + REASON_SET(&reason, PFRES_MATCH); + + if (r->log) + PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset); + + if (r->action != PF_PASS) + return (PF_DROP); + + pf_tag_packet(m, tag); + + return (PF_PASS); +} + +int +pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, + struct mbuf *m, int off, void *h, struct pf_pdesc *pd, + u_short *reason) +{ + struct pf_state key; + struct tcphdr *th = pd->hdr.tcp; + u_int16_t win = ntohs(th->th_win); + u_int32_t ack, end, seq; + u_int8_t sws, dws; + int ackskew; + int copyback = 0; + struct pf_state_peer *src, *dst; + + key.af = pd->af; + key.proto = IPPROTO_TCP; + if (direction == PF_IN) { + PF_ACPY(&key.ext.addr, pd->src, key.af); + PF_ACPY(&key.gwy.addr, pd->dst, key.af); + key.ext.port = th->th_sport; + key.gwy.port = th->th_dport; + } else { + PF_ACPY(&key.lan.addr, pd->src, key.af); + PF_ACPY(&key.ext.addr, pd->dst, key.af); + key.lan.port = th->th_sport; + key.ext.port = th->th_dport; + } + + STATE_LOOKUP(); + + if (direction == (*state)->direction) { + src = &(*state)->src; + dst = &(*state)->dst; + } else { + src = &(*state)->dst; + dst = &(*state)->src; + } + + if ((*state)->src.state == PF_TCPS_PROXY_SRC) { + if (direction != (*state)->direction) + return (PF_SYNPROXY_DROP); + if (th->th_flags & TH_SYN) { + if (ntohl(th->th_seq) != (*state)->src.seqlo) + return (PF_DROP); + pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, + pd->src, th->th_dport, th->th_sport, + (*state)->src.seqhi, ntohl(th->th_seq) + 1, + TH_SYN|TH_ACK, 0, (*state)->src.mss, 0); + return (PF_SYNPROXY_DROP); + } else if (!(th->th_flags & TH_ACK) || + (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || + (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) + return (PF_DROP); + else + (*state)->src.state = PF_TCPS_PROXY_DST; + } + if ((*state)->src.state == PF_TCPS_PROXY_DST) { + struct pf_state_host *src, *dst; + + if (direction == PF_OUT) { + src = &(*state)->gwy; + dst = &(*state)->ext; + } else { + src = &(*state)->ext; + dst = &(*state)->lan; + } + if (direction == (*state)->direction) { + if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || + (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || + (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) + return (PF_DROP); + (*state)->src.max_win = MAX(ntohs(th->th_win), 1); + if ((*state)->dst.seqhi == 1) + (*state)->dst.seqhi = arc4random(); + pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr, + &dst->addr, src->port, dst->port, + (*state)->dst.seqhi, 0, TH_SYN, 0, + (*state)->src.mss, 0); + return (PF_SYNPROXY_DROP); + } else if (((th->th_flags & (TH_SYN|TH_ACK)) != + (TH_SYN|TH_ACK)) || + (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) + return (PF_DROP); + else { + (*state)->dst.max_win = MAX(ntohs(th->th_win), 1); + (*state)->dst.seqlo = ntohl(th->th_seq); + pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, + pd->src, th->th_dport, th->th_sport, + ntohl(th->th_ack), ntohl(th->th_seq) + 1, + TH_ACK, (*state)->src.max_win, 0, 0); + pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr, + &dst->addr, src->port, dst->port, + (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, + TH_ACK, (*state)->dst.max_win, 0, 0); + (*state)->src.seqdiff = (*state)->dst.seqhi - + (*state)->src.seqlo; + (*state)->dst.seqdiff = (*state)->src.seqhi - + (*state)->dst.seqlo; + (*state)->src.seqhi = (*state)->src.seqlo + + (*state)->src.max_win; + (*state)->dst.seqhi = (*state)->dst.seqlo + + (*state)->dst.max_win; + (*state)->src.wscale = (*state)->dst.wscale = 0; + (*state)->src.state = (*state)->dst.state = + TCPS_ESTABLISHED; + return (PF_SYNPROXY_DROP); + } + } + + if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { + sws = src->wscale & PF_WSCALE_MASK; + dws = dst->wscale & PF_WSCALE_MASK; + } else + sws = dws = 0; + + /* + * Sequence tracking algorithm from Guido van Rooij's paper: + * http://www.madison-gurkha.com/publications/tcp_filtering/ + * tcp_filtering.ps + */ + + seq = ntohl(th->th_seq); + if (src->seqlo == 0) { + /* First packet from this end. Set its state */ + + if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) && + src->scrub == NULL) { + if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) { + REASON_SET(reason, PFRES_MEMORY); + return (PF_DROP); + } + } + + /* Deferred generation of sequence number modulator */ + if (dst->seqdiff && !src->seqdiff) { + while ((src->seqdiff = arc4random()) == 0) + ; + ack = ntohl(th->th_ack) - dst->seqdiff; + pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + + src->seqdiff), 0); + pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0); + copyback = 1; + } else { + ack = ntohl(th->th_ack); + } + + end = seq + pd->p_len; + if (th->th_flags & TH_SYN) { + end++; + if (dst->wscale & PF_WSCALE_FLAG) { + src->wscale = pf_get_wscale(m, off, th->th_off, + pd->af); + if (src->wscale & PF_WSCALE_FLAG) { + /* Remove scale factor from initial + * window */ + sws = src->wscale & PF_WSCALE_MASK; + win = ((u_int32_t)win + (1 << sws) - 1) + >> sws; + dws = dst->wscale & PF_WSCALE_MASK; + } else { + /* fixup other window */ + dst->max_win <<= dst->wscale & + PF_WSCALE_MASK; + /* in case of a retrans SYN|ACK */ + dst->wscale = 0; + } + } + } + if (th->th_flags & TH_FIN) + end++; + + src->seqlo = seq; + if (src->state < TCPS_SYN_SENT) + src->state = TCPS_SYN_SENT; + + /* + * May need to slide the window (seqhi may have been set by + * the crappy stack check or if we picked up the connection + * after establishment) + */ + if (src->seqhi == 1 || + SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi)) + src->seqhi = end + MAX(1, dst->max_win << dws); + if (win > src->max_win) + src->max_win = win; + + } else { + ack = ntohl(th->th_ack) - dst->seqdiff; + if (src->seqdiff) { + /* Modulate sequence numbers */ + pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + + src->seqdiff), 0); + pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0); + copyback = 1; + } + end = seq + pd->p_len; + if (th->th_flags & TH_SYN) + end++; + if (th->th_flags & TH_FIN) + end++; + } + + if ((th->th_flags & TH_ACK) == 0) { + /* Let it pass through the ack skew check */ + ack = dst->seqlo; + } else if ((ack == 0 && + (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) || + /* broken tcp stacks do not set ack */ + (dst->state < TCPS_SYN_SENT)) { + /* + * Many stacks (ours included) will set the ACK number in an + * FIN|ACK if the SYN times out -- no sequence to ACK. + */ + ack = dst->seqlo; + } + + if (seq == end) { + /* Ease sequencing restrictions on no data packets */ + seq = src->seqlo; + end = seq; + } + + ackskew = dst->seqlo - ack; + +#define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ + if (SEQ_GEQ(src->seqhi, end) && + /* Last octet inside other's window space */ + SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && + /* Retrans: not more than one window back */ + (ackskew >= -MAXACKWINDOW) && + /* Acking not more than one reassembled fragment backwards */ + (ackskew <= (MAXACKWINDOW << sws))) { + /* Acking not more than one window forward */ + + /* update max window */ + if (src->max_win < win) + src->max_win = win; + /* synchronize sequencing */ + if (SEQ_GT(end, src->seqlo)) + src->seqlo = end; + /* slide the window of what the other end can send */ + if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) + dst->seqhi = ack + MAX((win << sws), 1); + + + /* update states */ + if (th->th_flags & TH_SYN) + if (src->state < TCPS_SYN_SENT) + src->state = TCPS_SYN_SENT; + if (th->th_flags & TH_FIN) + if (src->state < TCPS_CLOSING) + src->state = TCPS_CLOSING; + if (th->th_flags & TH_ACK) { + if (dst->state == TCPS_SYN_SENT) + dst->state = TCPS_ESTABLISHED; + else if (dst->state == TCPS_CLOSING) + dst->state = TCPS_FIN_WAIT_2; + } + if (th->th_flags & TH_RST) + src->state = dst->state = TCPS_TIME_WAIT; + + /* update expire time */ + (*state)->expire = time_second; + if (src->state >= TCPS_FIN_WAIT_2 && + dst->state >= TCPS_FIN_WAIT_2) + (*state)->timeout = PFTM_TCP_CLOSED; + else if (src->state >= TCPS_FIN_WAIT_2 || + dst->state >= TCPS_FIN_WAIT_2) + (*state)->timeout = PFTM_TCP_FIN_WAIT; + else if (src->state < TCPS_ESTABLISHED || + dst->state < TCPS_ESTABLISHED) + (*state)->timeout = PFTM_TCP_OPENING; + else if (src->state >= TCPS_CLOSING || + dst->state >= TCPS_CLOSING) + (*state)->timeout = PFTM_TCP_CLOSING; + else + (*state)->timeout = PFTM_TCP_ESTABLISHED; + + /* Fall through to PASS packet */ + + } else if ((dst->state < TCPS_SYN_SENT || + dst->state >= TCPS_FIN_WAIT_2 || + src->state >= TCPS_FIN_WAIT_2) && + SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) && + /* Within a window forward of the originating packet */ + SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) { + /* Within a window backward of the originating packet */ + + /* + * This currently handles three situations: + * 1) Stupid stacks will shotgun SYNs before their peer + * replies. + * 2) When PF catches an already established stream (the + * firewall rebooted, the state table was flushed, routes + * changed...) + * 3) Packets get funky immediately after the connection + * closes (this should catch Solaris spurious ACK|FINs + * that web servers like to spew after a close) + * + * This must be a little more careful than the above code + * since packet floods will also be caught here. We don't + * update the TTL here to mitigate the damage of a packet + * flood and so the same code can handle awkward establishment + * and a loosened connection close. + * In the establishment case, a correct peer response will + * validate the connection, go through the normal state code + * and keep updating the state TTL. + */ + + if (pf_status.debug >= PF_DEBUG_MISC) { + printf("pf: loose state match: "); + pf_print_state(*state); + pf_print_flags(th->th_flags); + printf(" seq=%u ack=%u len=%u ackskew=%d pkts=%d:%d\n", + seq, ack, pd->p_len, ackskew, + (*state)->packets[0], (*state)->packets[1]); + } + + /* update max window */ + if (src->max_win < win) + src->max_win = win; + /* synchronize sequencing */ + if (SEQ_GT(end, src->seqlo)) + src->seqlo = end; + /* slide the window of what the other end can send */ + if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) + dst->seqhi = ack + MAX((win << sws), 1); + + /* + * Cannot set dst->seqhi here since this could be a shotgunned + * SYN and not an already established connection. + */ + + if (th->th_flags & TH_FIN) + if (src->state < TCPS_CLOSING) + src->state = TCPS_CLOSING; + if (th->th_flags & TH_RST) + src->state = dst->state = TCPS_TIME_WAIT; + + /* Fall through to PASS packet */ + + } else { + if ((*state)->dst.state == TCPS_SYN_SENT && + (*state)->src.state == TCPS_SYN_SENT) { + /* Send RST for state mismatches during handshake */ + if (!(th->th_flags & TH_RST)) { + u_int32_t ack = ntohl(th->th_seq) + pd->p_len; + + if (th->th_flags & TH_SYN) + ack++; + if (th->th_flags & TH_FIN) + ack++; + pf_send_tcp((*state)->rule.ptr, pd->af, + pd->dst, pd->src, th->th_dport, + th->th_sport, ntohl(th->th_ack), ack, + TH_RST|TH_ACK, 0, 0, + (*state)->rule.ptr->return_ttl); + } + src->seqlo = 0; + src->seqhi = 1; + src->max_win = 1; + } else if (pf_status.debug >= PF_DEBUG_MISC) { + printf("pf: BAD state: "); + pf_print_state(*state); + pf_print_flags(th->th_flags); + printf(" seq=%u ack=%u len=%u ackskew=%d pkts=%d:%d " + "dir=%s,%s\n", seq, ack, pd->p_len, ackskew, + (*state)->packets[0], (*state)->packets[1], + direction == PF_IN ? "in" : "out", + direction == (*state)->direction ? "fwd" : "rev"); + printf("pf: State failure on: %c %c %c %c | %c %c\n", + SEQ_GEQ(src->seqhi, end) ? ' ' : '1', + SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? + ' ': '2', + (ackskew >= -MAXACKWINDOW) ? ' ' : '3', + (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4', + SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5', + SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6'); + } + return (PF_DROP); + } + + if (dst->scrub || src->scrub) { + if (pf_normalize_tcp_stateful(m, off, pd, reason, th, + src, dst, ©back)) + return (PF_DROP); + } + + /* Any packets which have gotten here are to be passed */ + + /* translate source/destination address, if necessary */ + if (STATE_TRANSLATE(*state)) { + if (direction == PF_OUT) + pf_change_ap(pd->src, &th->th_sport, pd->ip_sum, + &th->th_sum, &(*state)->gwy.addr, + (*state)->gwy.port, 0, pd->af); + else + pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum, + &th->th_sum, &(*state)->lan.addr, + (*state)->lan.port, 0, pd->af); + m_copyback(m, off, sizeof(*th), (caddr_t)th); + } else if (copyback) { + /* Copyback sequence modulation or stateful scrub changes */ + m_copyback(m, off, sizeof(*th), (caddr_t)th); + } + + return (PF_PASS); +} + +int +pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, + struct mbuf *m, int off, void *h, struct pf_pdesc *pd) +{ + struct pf_state_peer *src, *dst; + struct pf_state key; + struct udphdr *uh = pd->hdr.udp; + + key.af = pd->af; + key.proto = IPPROTO_UDP; + if (direction == PF_IN) { + PF_ACPY(&key.ext.addr, pd->src, key.af); + PF_ACPY(&key.gwy.addr, pd->dst, key.af); + key.ext.port = uh->uh_sport; + key.gwy.port = uh->uh_dport; + } else { + PF_ACPY(&key.lan.addr, pd->src, key.af); + PF_ACPY(&key.ext.addr, pd->dst, key.af); + key.lan.port = uh->uh_sport; + key.ext.port = uh->uh_dport; + } + + STATE_LOOKUP(); + + if (direction == (*state)->direction) { + src = &(*state)->src; + dst = &(*state)->dst; + } else { + src = &(*state)->dst; + dst = &(*state)->src; + } + + /* update states */ + if (src->state < PFUDPS_SINGLE) + src->state = PFUDPS_SINGLE; + if (dst->state == PFUDPS_SINGLE) + dst->state = PFUDPS_MULTIPLE; + + /* update expire time */ + (*state)->expire = time_second; + if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE) + (*state)->timeout = PFTM_UDP_MULTIPLE; + else + (*state)->timeout = PFTM_UDP_SINGLE; + + /* translate source/destination address, if necessary */ + if (STATE_TRANSLATE(*state)) { + if (direction == PF_OUT) + pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum, + &uh->uh_sum, &(*state)->gwy.addr, + (*state)->gwy.port, 1, pd->af); + else + pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum, + &uh->uh_sum, &(*state)->lan.addr, + (*state)->lan.port, 1, pd->af); + m_copyback(m, off, sizeof(*uh), (caddr_t)uh); + } + + return (PF_PASS); +} + +int +pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, + struct mbuf *m, int off, void *h, struct pf_pdesc *pd) +{ + struct pf_addr *saddr = pd->src, *daddr = pd->dst; + u_int16_t icmpid = 0; + u_int16_t *icmpsum = NULL; + u_int8_t icmptype = 0; + int state_icmp = 0; + + switch (pd->proto) { +#ifdef INET + case IPPROTO_ICMP: + icmptype = pd->hdr.icmp->icmp_type; + icmpid = pd->hdr.icmp->icmp_id; + icmpsum = &pd->hdr.icmp->icmp_cksum; + + if (icmptype == ICMP_UNREACH || + icmptype == ICMP_SOURCEQUENCH || + icmptype == ICMP_REDIRECT || + icmptype == ICMP_TIMXCEED || + icmptype == ICMP_PARAMPROB) + state_icmp++; + break; +#endif /* INET */ +#ifdef INET6 + case IPPROTO_ICMPV6: + icmptype = pd->hdr.icmp6->icmp6_type; + icmpid = pd->hdr.icmp6->icmp6_id; + icmpsum = &pd->hdr.icmp6->icmp6_cksum; + + if (icmptype == ICMP6_DST_UNREACH || + icmptype == ICMP6_PACKET_TOO_BIG || + icmptype == ICMP6_TIME_EXCEEDED || + icmptype == ICMP6_PARAM_PROB) + state_icmp++; + break; +#endif /* INET6 */ + } + + if (!state_icmp) { + + /* + * ICMP query/reply message not related to a TCP/UDP packet. + * Search for an ICMP state. + */ + struct pf_state key; + + key.af = pd->af; + key.proto = pd->proto; + if (direction == PF_IN) { + PF_ACPY(&key.ext.addr, pd->src, key.af); + PF_ACPY(&key.gwy.addr, pd->dst, key.af); + key.ext.port = icmpid; + key.gwy.port = icmpid; + } else { + PF_ACPY(&key.lan.addr, pd->src, key.af); + PF_ACPY(&key.ext.addr, pd->dst, key.af); + key.lan.port = icmpid; + key.ext.port = icmpid; + } + + STATE_LOOKUP(); + + (*state)->expire = time_second; + (*state)->timeout = PFTM_ICMP_ERROR_REPLY; + + /* translate source/destination address, if necessary */ + if (PF_ANEQ(&(*state)->lan.addr, &(*state)->gwy.addr, pd->af)) { + if (direction == PF_OUT) { + switch (pd->af) { +#ifdef INET + case AF_INET: + pf_change_a(&saddr->v4.s_addr, + pd->ip_sum, + (*state)->gwy.addr.v4.s_addr, 0); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + pf_change_a6(saddr, + &pd->hdr.icmp6->icmp6_cksum, + &(*state)->gwy.addr, 0); + m_copyback(m, off, + sizeof(struct icmp6_hdr), + (caddr_t)pd->hdr.icmp6); + break; +#endif /* INET6 */ + } + } else { + switch (pd->af) { +#ifdef INET + case AF_INET: + pf_change_a(&daddr->v4.s_addr, + pd->ip_sum, + (*state)->lan.addr.v4.s_addr, 0); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + pf_change_a6(daddr, + &pd->hdr.icmp6->icmp6_cksum, + &(*state)->lan.addr, 0); + m_copyback(m, off, + sizeof(struct icmp6_hdr), + (caddr_t)pd->hdr.icmp6); + break; +#endif /* INET6 */ + } + } + } + + return (PF_PASS); + + } else { + /* + * ICMP error message in response to a TCP/UDP packet. + * Extract the inner TCP/UDP header and search for that state. + */ + + struct pf_pdesc pd2; +#ifdef INET + struct ip h2; +#endif /* INET */ +#ifdef INET6 + struct ip6_hdr h2_6; + int terminal = 0; +#endif /* INET6 */ + int ipoff2 = 0; + int off2 = 0; + + pd2.af = pd->af; + switch (pd->af) { +#ifdef INET + case AF_INET: + /* offset of h2 in mbuf chain */ + ipoff2 = off + ICMP_MINLEN; + + if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2), + NULL, NULL, pd2.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: ICMP error message too short " + "(ip)\n")); + return (PF_DROP); + } + /* + * ICMP error messages don't refer to non-first + * fragments + */ + /* + * Note: We are dealing with an encapsulated + * header. This means ip_off/ip_len are not + * in host byte order! + */ + if (h2.ip_off & htons(IP_OFFMASK)) + return (PF_DROP); + + /* offset of protocol header that follows h2 */ + off2 = ipoff2 + (h2.ip_hl << 2); + + pd2.proto = h2.ip_p; + pd2.src = (struct pf_addr *)&h2.ip_src; + pd2.dst = (struct pf_addr *)&h2.ip_dst; + pd2.ip_sum = &h2.ip_sum; + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + ipoff2 = off + sizeof(struct icmp6_hdr); + + if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6), + NULL, NULL, pd2.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: ICMP error message too short " + "(ip6)\n")); + return (PF_DROP); + } + pd2.proto = h2_6.ip6_nxt; + pd2.src = (struct pf_addr *)&h2_6.ip6_src; + pd2.dst = (struct pf_addr *)&h2_6.ip6_dst; + pd2.ip_sum = NULL; + off2 = ipoff2 + sizeof(h2_6); + do { + switch (pd2.proto) { + case IPPROTO_FRAGMENT: + /* + * ICMPv6 error messages for + * non-first fragments + */ + return (PF_DROP); + case IPPROTO_AH: + case IPPROTO_HOPOPTS: + case IPPROTO_ROUTING: + case IPPROTO_DSTOPTS: { + /* get next header and header length */ + struct ip6_ext opt6; + + if (!pf_pull_hdr(m, off2, &opt6, + sizeof(opt6), NULL, NULL, pd2.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: ICMPv6 short opt\n")); + return (PF_DROP); + } + if (pd2.proto == IPPROTO_AH) + off2 += (opt6.ip6e_len + 2) * 4; + else + off2 += (opt6.ip6e_len + 1) * 8; + pd2.proto = opt6.ip6e_nxt; + /* goto the next header */ + break; + } + default: + terminal++; + break; + } + } while (!terminal); + break; +#endif /* INET6 */ + } + + switch (pd2.proto) { + case IPPROTO_TCP: { + struct tcphdr th; + u_int32_t seq; + struct pf_state key; + struct pf_state_peer *src, *dst; + u_int8_t dws; + int copyback = 0; + + /* + * Only the first 8 bytes of the TCP header can be + * expected. Don't access any TCP header fields after + * th_seq, an ackskew test is not possible. + */ + if (!pf_pull_hdr(m, off2, &th, 8, NULL, NULL, pd2.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: ICMP error message too short " + "(tcp)\n")); + return (PF_DROP); + } + + key.af = pd2.af; + key.proto = IPPROTO_TCP; + if (direction == PF_IN) { + PF_ACPY(&key.ext.addr, pd2.dst, key.af); + PF_ACPY(&key.gwy.addr, pd2.src, key.af); + key.ext.port = th.th_dport; + key.gwy.port = th.th_sport; + } else { + PF_ACPY(&key.lan.addr, pd2.dst, key.af); + PF_ACPY(&key.ext.addr, pd2.src, key.af); + key.lan.port = th.th_dport; + key.ext.port = th.th_sport; + } + + STATE_LOOKUP(); + + if (direction == (*state)->direction) { + src = &(*state)->dst; + dst = &(*state)->src; + } else { + src = &(*state)->src; + dst = &(*state)->dst; + } + + if (src->wscale && dst->wscale && + !(th.th_flags & TH_SYN)) + dws = dst->wscale & PF_WSCALE_MASK; + else + dws = 0; + + /* Demodulate sequence number */ + seq = ntohl(th.th_seq) - src->seqdiff; + if (src->seqdiff) { + pf_change_a(&th.th_seq, icmpsum, + htonl(seq), 0); + copyback = 1; + } + + if (!SEQ_GEQ(src->seqhi, seq) || + !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws))) { + if (pf_status.debug >= PF_DEBUG_MISC) { + printf("pf: BAD ICMP %d:%d ", + icmptype, pd->hdr.icmp->icmp_code); + pf_print_host(pd->src, 0, pd->af); + printf(" -> "); + pf_print_host(pd->dst, 0, pd->af); + printf(" state: "); + pf_print_state(*state); + printf(" seq=%u\n", seq); + } + return (PF_DROP); + } + + if (STATE_TRANSLATE(*state)) { + if (direction == PF_IN) { + pf_change_icmp(pd2.src, &th.th_sport, + daddr, &(*state)->lan.addr, + (*state)->lan.port, NULL, + pd2.ip_sum, icmpsum, + pd->ip_sum, 0, pd2.af); + } else { + pf_change_icmp(pd2.dst, &th.th_dport, + saddr, &(*state)->gwy.addr, + (*state)->gwy.port, NULL, + pd2.ip_sum, icmpsum, + pd->ip_sum, 0, pd2.af); + } + copyback = 1; + } + + if (copyback) { + switch (pd2.af) { +#ifdef INET + case AF_INET: + m_copyback(m, off, ICMP_MINLEN, + (caddr_t)pd->hdr.icmp); + m_copyback(m, ipoff2, sizeof(h2), + (caddr_t)&h2); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + m_copyback(m, off, + sizeof(struct icmp6_hdr), + (caddr_t)pd->hdr.icmp6); + m_copyback(m, ipoff2, sizeof(h2_6), + (caddr_t)&h2_6); + break; +#endif /* INET6 */ + } + m_copyback(m, off2, 8, (caddr_t)&th); + } + + return (PF_PASS); + break; + } + case IPPROTO_UDP: { + struct udphdr uh; + struct pf_state key; + + if (!pf_pull_hdr(m, off2, &uh, sizeof(uh), + NULL, NULL, pd2.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: ICMP error message too short " + "(udp)\n")); + return (PF_DROP); + } + + key.af = pd2.af; + key.proto = IPPROTO_UDP; + if (direction == PF_IN) { + PF_ACPY(&key.ext.addr, pd2.dst, key.af); + PF_ACPY(&key.gwy.addr, pd2.src, key.af); + key.ext.port = uh.uh_dport; + key.gwy.port = uh.uh_sport; + } else { + PF_ACPY(&key.lan.addr, pd2.dst, key.af); + PF_ACPY(&key.ext.addr, pd2.src, key.af); + key.lan.port = uh.uh_dport; + key.ext.port = uh.uh_sport; + } + + STATE_LOOKUP(); + + if (STATE_TRANSLATE(*state)) { + if (direction == PF_IN) { + pf_change_icmp(pd2.src, &uh.uh_sport, + daddr, &(*state)->lan.addr, + (*state)->lan.port, &uh.uh_sum, + pd2.ip_sum, icmpsum, + pd->ip_sum, 1, pd2.af); + } else { + pf_change_icmp(pd2.dst, &uh.uh_dport, + saddr, &(*state)->gwy.addr, + (*state)->gwy.port, &uh.uh_sum, + pd2.ip_sum, icmpsum, + pd->ip_sum, 1, pd2.af); + } + switch (pd2.af) { +#ifdef INET + case AF_INET: + m_copyback(m, off, ICMP_MINLEN, + (caddr_t)pd->hdr.icmp); + m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + m_copyback(m, off, + sizeof(struct icmp6_hdr), + (caddr_t)pd->hdr.icmp6); + m_copyback(m, ipoff2, sizeof(h2_6), + (caddr_t)&h2_6); + break; +#endif /* INET6 */ + } + m_copyback(m, off2, sizeof(uh), (caddr_t)&uh); + } + + return (PF_PASS); + break; + } +#ifdef INET + case IPPROTO_ICMP: { + struct icmp iih; + struct pf_state key; + + if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN, + NULL, NULL, pd2.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: ICMP error message too short i" + "(icmp)\n")); + return (PF_DROP); + } + + key.af = pd2.af; + key.proto = IPPROTO_ICMP; + if (direction == PF_IN) { + PF_ACPY(&key.ext.addr, pd2.dst, key.af); + PF_ACPY(&key.gwy.addr, pd2.src, key.af); + key.ext.port = iih.icmp_id; + key.gwy.port = iih.icmp_id; + } else { + PF_ACPY(&key.lan.addr, pd2.dst, key.af); + PF_ACPY(&key.ext.addr, pd2.src, key.af); + key.lan.port = iih.icmp_id; + key.ext.port = iih.icmp_id; + } + + STATE_LOOKUP(); + + if (STATE_TRANSLATE(*state)) { + if (direction == PF_IN) { + pf_change_icmp(pd2.src, &iih.icmp_id, + daddr, &(*state)->lan.addr, + (*state)->lan.port, NULL, + pd2.ip_sum, icmpsum, + pd->ip_sum, 0, AF_INET); + } else { + pf_change_icmp(pd2.dst, &iih.icmp_id, + saddr, &(*state)->gwy.addr, + (*state)->gwy.port, NULL, + pd2.ip_sum, icmpsum, + pd->ip_sum, 0, AF_INET); + } + m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp); + m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); + m_copyback(m, off2, ICMP_MINLEN, (caddr_t)&iih); + } + + return (PF_PASS); + break; + } +#endif /* INET */ +#ifdef INET6 + case IPPROTO_ICMPV6: { + struct icmp6_hdr iih; + struct pf_state key; + + if (!pf_pull_hdr(m, off2, &iih, + sizeof(struct icmp6_hdr), NULL, NULL, pd2.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: ICMP error message too short " + "(icmp6)\n")); + return (PF_DROP); + } + + key.af = pd2.af; + key.proto = IPPROTO_ICMPV6; + if (direction == PF_IN) { + PF_ACPY(&key.ext.addr, pd2.dst, key.af); + PF_ACPY(&key.gwy.addr, pd2.src, key.af); + key.ext.port = iih.icmp6_id; + key.gwy.port = iih.icmp6_id; + } else { + PF_ACPY(&key.lan.addr, pd2.dst, key.af); + PF_ACPY(&key.ext.addr, pd2.src, key.af); + key.lan.port = iih.icmp6_id; + key.ext.port = iih.icmp6_id; + } + + STATE_LOOKUP(); + + if (STATE_TRANSLATE(*state)) { + if (direction == PF_IN) { + pf_change_icmp(pd2.src, &iih.icmp6_id, + daddr, &(*state)->lan.addr, + (*state)->lan.port, NULL, + pd2.ip_sum, icmpsum, + pd->ip_sum, 0, AF_INET6); + } else { + pf_change_icmp(pd2.dst, &iih.icmp6_id, + saddr, &(*state)->gwy.addr, + (*state)->gwy.port, NULL, + pd2.ip_sum, icmpsum, + pd->ip_sum, 0, AF_INET6); + } + m_copyback(m, off, sizeof(struct icmp6_hdr), + (caddr_t)pd->hdr.icmp6); + m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6); + m_copyback(m, off2, sizeof(struct icmp6_hdr), + (caddr_t)&iih); + } + + return (PF_PASS); + break; + } +#endif /* INET6 */ + default: { + struct pf_state key; + + key.af = pd2.af; + key.proto = pd2.proto; + if (direction == PF_IN) { + PF_ACPY(&key.ext.addr, pd2.dst, key.af); + PF_ACPY(&key.gwy.addr, pd2.src, key.af); + key.ext.port = 0; + key.gwy.port = 0; + } else { + PF_ACPY(&key.lan.addr, pd2.dst, key.af); + PF_ACPY(&key.ext.addr, pd2.src, key.af); + key.lan.port = 0; + key.ext.port = 0; + } + + STATE_LOOKUP(); + + if (STATE_TRANSLATE(*state)) { + if (direction == PF_IN) { + pf_change_icmp(pd2.src, NULL, + daddr, &(*state)->lan.addr, + 0, NULL, + pd2.ip_sum, icmpsum, + pd->ip_sum, 0, pd2.af); + } else { + pf_change_icmp(pd2.dst, NULL, + saddr, &(*state)->gwy.addr, + 0, NULL, + pd2.ip_sum, icmpsum, + pd->ip_sum, 0, pd2.af); + } + switch (pd2.af) { +#ifdef INET + case AF_INET: + m_copyback(m, off, ICMP_MINLEN, + (caddr_t)pd->hdr.icmp); + m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + m_copyback(m, off, + sizeof(struct icmp6_hdr), + (caddr_t)pd->hdr.icmp6); + m_copyback(m, ipoff2, sizeof(h2_6), + (caddr_t)&h2_6); + break; +#endif /* INET6 */ + } + } + + return (PF_PASS); + break; + } + } + } +} + +int +pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, + struct pf_pdesc *pd) +{ + struct pf_state_peer *src, *dst; + struct pf_state key; + + key.af = pd->af; + key.proto = pd->proto; + if (direction == PF_IN) { + PF_ACPY(&key.ext.addr, pd->src, key.af); + PF_ACPY(&key.gwy.addr, pd->dst, key.af); + key.ext.port = 0; + key.gwy.port = 0; + } else { + PF_ACPY(&key.lan.addr, pd->src, key.af); + PF_ACPY(&key.ext.addr, pd->dst, key.af); + key.lan.port = 0; + key.ext.port = 0; + } + + STATE_LOOKUP(); + + if (direction == (*state)->direction) { + src = &(*state)->src; + dst = &(*state)->dst; + } else { + src = &(*state)->dst; + dst = &(*state)->src; + } + + /* update states */ + if (src->state < PFOTHERS_SINGLE) + src->state = PFOTHERS_SINGLE; + if (dst->state == PFOTHERS_SINGLE) + dst->state = PFOTHERS_MULTIPLE; + + /* update expire time */ + (*state)->expire = time_second; + if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE) + (*state)->timeout = PFTM_OTHER_MULTIPLE; + else + (*state)->timeout = PFTM_OTHER_SINGLE; + + /* translate source/destination address, if necessary */ + if (STATE_TRANSLATE(*state)) { + if (direction == PF_OUT) + switch (pd->af) { +#ifdef INET + case AF_INET: + pf_change_a(&pd->src->v4.s_addr, + pd->ip_sum, (*state)->gwy.addr.v4.s_addr, + 0); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + PF_ACPY(pd->src, &(*state)->gwy.addr, pd->af); + break; +#endif /* INET6 */ + } + else + switch (pd->af) { +#ifdef INET + case AF_INET: + pf_change_a(&pd->dst->v4.s_addr, + pd->ip_sum, (*state)->lan.addr.v4.s_addr, + 0); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + PF_ACPY(pd->dst, &(*state)->lan.addr, pd->af); + break; +#endif /* INET6 */ + } + } + + return (PF_PASS); +} + +/* + * ipoff and off are measured from the start of the mbuf chain. + * h must be at "ipoff" on the mbuf chain. + */ +void * +pf_pull_hdr(struct mbuf *m, int off, void *p, int len, + u_short *actionp, u_short *reasonp, sa_family_t af) +{ + switch (af) { +#ifdef INET + case AF_INET: { + struct ip *h = mtod(m, struct ip *); + u_int16_t fragoff = (h->ip_off & IP_OFFMASK) << 3; + + if (fragoff) { + if (fragoff >= len) + ACTION_SET(actionp, PF_PASS); + else { + ACTION_SET(actionp, PF_DROP); + REASON_SET(reasonp, PFRES_FRAG); + } + return (NULL); + } + if (m->m_pkthdr.len < off + len || + h->ip_len < off + len) { + ACTION_SET(actionp, PF_DROP); + REASON_SET(reasonp, PFRES_SHORT); + return (NULL); + } + break; + } +#endif /* INET */ +#ifdef INET6 + case AF_INET6: { + struct ip6_hdr *h = mtod(m, struct ip6_hdr *); + + if (m->m_pkthdr.len < off + len || + (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) < + (unsigned)(off + len)) { + ACTION_SET(actionp, PF_DROP); + REASON_SET(reasonp, PFRES_SHORT); + return (NULL); + } + break; + } +#endif /* INET6 */ + } + m_copydata(m, off, len, p); + return (p); +} + +int +pf_routable(struct pf_addr *addr, sa_family_t af) +{ + struct sockaddr_in *dst; + struct route ro; + int ret = 0; + + bzero(&ro, sizeof(ro)); + dst = satosin(&ro.ro_dst); + dst->sin_family = af; + dst->sin_len = sizeof(*dst); + dst->sin_addr = addr->v4; + rtalloc_ign(&ro, (RTF_CLONING|RTF_PRCLONING)); + + if (ro.ro_rt != NULL) { + ret = 1; + RTFREE(ro.ro_rt); + } + + return (ret); +} + +#ifdef INET +void +pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, + struct pf_state *s) +{ + struct mbuf *m0, *m1; + struct route iproute; + struct route *ro = NULL; + struct sockaddr_in *dst; + struct ip *ip; + struct ifnet *ifp = NULL; + struct pf_addr naddr; + struct pf_src_node *sn = NULL; + int error = 0; + int sw_csum; + + if (m == NULL || *m == NULL || r == NULL || + (dir != PF_IN && dir != PF_OUT) || oifp == NULL) + panic("pf_route: invalid parameters"); + + if (((*m)->m_pkthdr.pf_flags & PF_MBUF_ROUTED) == 0) { + (*m)->m_pkthdr.pf_flags |= PF_MBUF_ROUTED; + (*m)->m_pkthdr.pf_routed = 1; + } else { + if ((*m)->m_pkthdr.pf_routed > 3) { + m0 = *m; + *m = NULL; + goto bad; + } + (*m)->m_pkthdr.pf_routed++; + } + + if (r->rt == PF_DUPTO) { + if ((m0 = m_dup(*m, MB_DONTWAIT)) == NULL) + return; + } else { + if ((r->rt == PF_REPLYTO) == (r->direction == dir)) + return; + m0 = *m; + } + + if (m0->m_len < sizeof(struct ip)) + panic("pf_route: m0->m_len < sizeof(struct ip)"); + ip = mtod(m0, struct ip *); + + ro = &iproute; + bzero((caddr_t)ro, sizeof(*ro)); + dst = satosin(&ro->ro_dst); + dst->sin_family = AF_INET; + dst->sin_len = sizeof(*dst); + dst->sin_addr = ip->ip_dst; + + if (r->rt == PF_FASTROUTE) { + rtalloc(ro); + if (ro->ro_rt == 0) { + ipstat.ips_noroute++; + goto bad; + } + + ifp = ro->ro_rt->rt_ifp; + ro->ro_rt->rt_use++; + + if (ro->ro_rt->rt_flags & RTF_GATEWAY) + dst = satosin(ro->ro_rt->rt_gateway); + } else { + if (TAILQ_EMPTY(&r->rpool.list)) + panic("pf_route: TAILQ_EMPTY(&r->rpool.list)"); + if (s == NULL) { + pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src, + &naddr, NULL, &sn); + if (!PF_AZERO(&naddr, AF_INET)) + dst->sin_addr.s_addr = naddr.v4.s_addr; + ifp = r->rpool.cur->kif ? + r->rpool.cur->kif->pfik_ifp : NULL; + } else { + if (!PF_AZERO(&s->rt_addr, AF_INET)) + dst->sin_addr.s_addr = + s->rt_addr.v4.s_addr; + ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; + } + } + if (ifp == NULL) + goto bad; + + if (oifp != ifp) { + if (pf_test(PF_OUT, ifp, &m0) != PF_PASS) + goto bad; + else if (m0 == NULL) + goto done; + if (m0->m_len < sizeof(struct ip)) + panic("pf_route: m0->m_len < sizeof(struct ip)"); + ip = mtod(m0, struct ip *); + } + + /* Copied from ip_output. */ + m0->m_pkthdr.csum_flags |= CSUM_IP; + sw_csum = m0->m_pkthdr.csum_flags & ~ifp->if_hwassist; + if (sw_csum & CSUM_DELAY_DATA) { + in_delayed_cksum(m0); + sw_csum &= ~CSUM_DELAY_DATA; + } + m0->m_pkthdr.csum_flags &= ifp->if_hwassist; + + /* + * If small enough for interface, or the interface will take + * care of the fragmentation for us, can just send directly. + */ + if (ip->ip_len <= ifp->if_mtu || ((ifp->if_hwassist & CSUM_FRAGMENT) && + (ip->ip_off & IP_DF) == 0)) { + ip->ip_len = htons(ip->ip_len); + ip->ip_off = htons(ip->ip_off); + ip->ip_sum = 0; + if (sw_csum & CSUM_DELAY_IP) { + /* From KAME */ + if (ip->ip_v == IPVERSION && + (ip->ip_hl << 2) == sizeof(*ip)) { + ip->ip_sum = in_cksum_hdr(ip); + } else { + ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); + } + } + + error = (*ifp->if_output)(ifp, m0, sintosa(dst), ro->ro_rt); + goto done; + } + + /* + * Too large for interface; fragment if possible. + * Must be able to put at least 8 bytes per fragment. + */ + if (ip->ip_off & IP_DF) { + ipstat.ips_cantfrag++; + if (r->rt != PF_DUPTO) { + icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, + ifp); + goto done; + } else + goto bad; + } + + m1 = m0; + error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist, sw_csum); + if (error) + goto bad; + + for (m0 = m1; m0; m0 = m1) { + m1 = m0->m_nextpkt; + m0->m_nextpkt = 0; + if (error == 0) + error = (*ifp->if_output)(ifp, m0, sintosa(dst), + NULL); + else + m_freem(m0); + } + + if (error == 0) + ipstat.ips_fragmented++; + +done: + if (r->rt != PF_DUPTO) + *m = NULL; + if (ro == &iproute && ro->ro_rt) + RTFREE(ro->ro_rt); + return; + +bad: + m_freem(m0); + goto done; +} +#endif /* INET */ + +#ifdef INET6 +void +pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, + struct pf_state *s) +{ + struct mbuf *m0; + struct route_in6 ip6route; + struct route_in6 *ro; + struct sockaddr_in6 *dst; + struct ip6_hdr *ip6; + struct ifnet *ifp = NULL; + struct pf_addr naddr; + struct pf_src_node *sn = NULL; + int error = 0; + + if (m == NULL || *m == NULL || r == NULL || + (dir != PF_IN && dir != PF_OUT) || oifp == NULL) + panic("pf_route6: invalid parameters"); + + if (((*m)->m_pkthdr.pf_flags & PF_MBUF_ROUTED) == 0) { + (*m)->m_pkthdr.pf_flags |= PF_MBUF_ROUTED; + (*m)->m_pkthdr.pf_routed = 1; + } else { + if ((*m)->m_pkthdr.pf_routed > 3) { + m0 = *m; + *m = NULL; + goto bad; + } + (*m)->m_pkthdr.pf_routed++; + } + + if (r->rt == PF_DUPTO) { + if ((m0 = m_dup(*m, MB_DONTWAIT)) == NULL) + return; + } else { + if ((r->rt == PF_REPLYTO) == (r->direction == dir)) + return; + m0 = *m; + } + + if (m0->m_len < sizeof(struct ip6_hdr)) + panic("pf_route6: m0->m_len < sizeof(struct ip6_hdr)"); + ip6 = mtod(m0, struct ip6_hdr *); + + ro = &ip6route; + bzero((caddr_t)ro, sizeof(*ro)); + dst = (struct sockaddr_in6 *)&ro->ro_dst; + dst->sin6_family = AF_INET6; + dst->sin6_len = sizeof(*dst); + dst->sin6_addr = ip6->ip6_dst; + + /* Cheat. */ + if (r->rt == PF_FASTROUTE) { + m0->m_pkthdr.pf_flags |= PF_MBUF_GENERATED; + ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL); + return; + } + + if (TAILQ_EMPTY(&r->rpool.list)) + panic("pf_route6: TAILQ_EMPTY(&r->rpool.list)"); + if (s == NULL) { + pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src, + &naddr, NULL, &sn); + if (!PF_AZERO(&naddr, AF_INET6)) + PF_ACPY((struct pf_addr *)&dst->sin6_addr, + &naddr, AF_INET6); + ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL; + } else { + if (!PF_AZERO(&s->rt_addr, AF_INET6)) + PF_ACPY((struct pf_addr *)&dst->sin6_addr, + &s->rt_addr, AF_INET6); + ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; + } + if (ifp == NULL) + goto bad; + + if (oifp != ifp) { + if (pf_test6(PF_OUT, ifp, &m0) != PF_PASS) + goto bad; + else if (m0 == NULL) + goto done; + if (m0->m_len < sizeof(struct ip6_hdr)) + panic("pf_route6: m0->m_len < sizeof(struct ip6_hdr)"); + ip6 = mtod(m0, struct ip6_hdr *); + } + + /* + * If the packet is too large for the outgoing interface, + * send back an icmp6 error. + */ + if (IN6_IS_ADDR_LINKLOCAL(&dst->sin6_addr)) + dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index); + if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { + error = nd6_output(ifp, ifp, m0, dst, NULL); + } else { + in6_ifstat_inc(ifp, ifs6_in_toobig); + if (r->rt != PF_DUPTO) + icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); + else + goto bad; + } + +done: + if (r->rt != PF_DUPTO) + *m = NULL; + return; + +bad: + m_freem(m0); + goto done; +} +#endif /* INET6 */ + + +/* + * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag + * off is the offset where the protocol header starts + * len is the total length of protocol header plus payload + * returns 0 when the checksum is valid, otherwise returns 1. + */ +/* + * XXX + * FreeBSD supports cksum offload for the following drivers. + * em(4), gx(4), lge(4), nge(4), ti(4), xl(4) + * If we can make full use of it we would outperform ipfw/ipfilter in + * very heavy traffic. + * I have not tested 'cause I don't have NICs that supports cksum offload. + * (There might be problems. Typical phenomena would be + * 1. No route message for UDP packet. + * 2. No connection acceptance from external hosts regardless of rule set.) + */ +int +pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, + sa_family_t af) +{ + u_int16_t sum = 0; + int hw_assist = 0; + struct ip *ip; + + if (off < sizeof(struct ip) || len < sizeof(struct udphdr)) + return (1); + if (m->m_pkthdr.len < off + len) + return (1); + + switch (p) { + case IPPROTO_TCP: + case IPPROTO_UDP: + if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { + if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) { + sum = m->m_pkthdr.csum_data; + } else { + ip = mtod(m, struct ip *); + sum = in_pseudo(ip->ip_src.s_addr, + ip->ip_dst.s_addr, htonl((u_short)len + + m->m_pkthdr.csum_data + p)); + } + sum ^= 0xffff; + ++hw_assist; + } + break; + case IPPROTO_ICMP: +#ifdef INET6 + case IPPROTO_ICMPV6: +#endif /* INET6 */ + break; + default: + return (1); + } + + if (!hw_assist) { + switch (af) { + case AF_INET: + if (p == IPPROTO_ICMP) { + if (m->m_len < off) + return (1); + m->m_data += off; + m->m_len -= off; + sum = in_cksum(m, len); + m->m_data -= off; + m->m_len += off; + } else { + if (m->m_len < sizeof(struct ip)) + return (1); + sum = in_cksum_range(m, p, off, len); + if (sum == 0) { + m->m_pkthdr.csum_flags |= + (CSUM_DATA_VALID | + CSUM_PSEUDO_HDR); + m->m_pkthdr.csum_data = 0xffff; + } + } + break; +#ifdef INET6 + case AF_INET6: + if (m->m_len < sizeof(struct ip6_hdr)) + return (1); + sum = in6_cksum(m, p, off, len); + /* + * XXX + * IPv6 H/W cksum off-load not supported yet! + * + * if (sum == 0) { + * m->m_pkthdr.csum_flags |= + * (CSUM_DATA_VALID|CSUM_PSEUDO_HDR); + * m->m_pkthdr.csum_data = 0xffff; + *} + */ + break; +#endif /* INET6 */ + default: + return (1); + } + } + if (sum) { + switch (p) { + case IPPROTO_TCP: + tcpstat.tcps_rcvbadsum++; + break; + case IPPROTO_UDP: + udpstat.udps_badsum++; + break; + case IPPROTO_ICMP: + icmpstat.icps_checksum++; + break; +#ifdef INET6 + case IPPROTO_ICMPV6: + icmp6stat.icp6s_checksum++; + break; +#endif /* INET6 */ + } + return (1); + } + return (0); +} + +#ifdef INET +int +pf_test(int dir, struct ifnet *ifp, struct mbuf **m0) +{ + struct pfi_kif *kif; + u_short action, reason = 0, log = 0; + struct mbuf *m = *m0; + struct ip *h = NULL; + struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr; + struct pf_state *s = NULL; + struct pf_ruleset *ruleset = NULL; + struct pf_pdesc pd; + int off, dirndx, pqid = 0; + + if (!pf_status.running || (m->m_pkthdr.pf_flags & PF_MBUF_GENERATED)) + return (PF_PASS); + + kif = pfi_index2kif[ifp->if_index]; + if (kif == NULL) + return (PF_DROP); + +#ifdef DIAGNOSTIC + if ((m->m_flags & M_PKTHDR) == 0) + panic("non-M_PKTHDR is passed to pf_test"); +#endif + + memset(&pd, 0, sizeof(pd)); + if (m->m_pkthdr.len < (int)sizeof(*h)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_SHORT); + log = 1; + goto done; + } + + /* We do IP header normalization and packet reassembly here */ + if (pf_normalize_ip(m0, dir, kif, &reason) != PF_PASS) { + action = PF_DROP; + goto done; + } + m = *m0; + h = mtod(m, struct ip *); + + off = h->ip_hl << 2; + if (off < (int)sizeof(*h)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_SHORT); + log = 1; + goto done; + } + + pd.src = (struct pf_addr *)&h->ip_src; + pd.dst = (struct pf_addr *)&h->ip_dst; + PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET); + pd.ip_sum = &h->ip_sum; + pd.proto = h->ip_p; + pd.af = AF_INET; + pd.tos = h->ip_tos; + pd.tot_len = h->ip_len; + + /* handle fragments that didn't get reassembled by normalization */ + if (h->ip_off & (IP_MF | IP_OFFMASK)) { + action = pf_test_fragment(&r, dir, kif, m, h, + &pd, &a, &ruleset); + goto done; + } + + switch (h->ip_p) { + + case IPPROTO_TCP: { + struct tcphdr th; + + pd.hdr.tcp = &th; + if (!pf_pull_hdr(m, off, &th, sizeof(th), + &action, &reason, AF_INET)) { + log = action != PF_PASS; + goto done; + } + if (dir == PF_IN && pf_check_proto_cksum(m, off, + h->ip_len - off, IPPROTO_TCP, AF_INET)) { + action = PF_DROP; + goto done; + } + pd.p_len = pd.tot_len - off - (th.th_off << 2); + if ((th.th_flags & TH_ACK) && pd.p_len == 0) + pqid = 1; + action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd); + if (action == PF_DROP) + goto done; + action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, + &reason); + if (action == PF_PASS) { +#if NPFSYNC + pfsync_update_state(s); +#endif + r = s->rule.ptr; + a = s->anchor.ptr; + log = s->log; + } else if (s == NULL) + action = pf_test_tcp(&r, &s, dir, kif, + m, off, h, &pd, &a, &ruleset); + break; + } + + case IPPROTO_UDP: { + struct udphdr uh; + + pd.hdr.udp = &uh; + if (!pf_pull_hdr(m, off, &uh, sizeof(uh), + &action, &reason, AF_INET)) { + log = action != PF_PASS; + goto done; + } + if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m, + off, h->ip_len - off, IPPROTO_UDP, AF_INET)) { + action = PF_DROP; + goto done; + } + if (uh.uh_dport == 0 || + ntohs(uh.uh_ulen) > m->m_pkthdr.len - off || + ntohs(uh.uh_ulen) < sizeof(struct udphdr)) { + action = PF_DROP; + goto done; + } + action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); + if (action == PF_PASS) { +#if NPFSYNC + pfsync_update_state(s); +#endif + r = s->rule.ptr; + a = s->anchor.ptr; + log = s->log; + } else if (s == NULL) + action = pf_test_udp(&r, &s, dir, kif, + m, off, h, &pd, &a, &ruleset); + break; + } + + case IPPROTO_ICMP: { + struct icmp ih; + + pd.hdr.icmp = &ih; + if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN, + &action, &reason, AF_INET)) { + log = action != PF_PASS; + goto done; + } + if (dir == PF_IN && pf_check_proto_cksum(m, off, + h->ip_len - off, IPPROTO_ICMP, AF_INET)) { + action = PF_DROP; + goto done; + } + action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd); + if (action == PF_PASS) { +#if NPFSYNC + pfsync_update_state(s); +#endif + r = s->rule.ptr; + a = s->anchor.ptr; + log = s->log; + } else if (s == NULL) + action = pf_test_icmp(&r, &s, dir, kif, + m, off, h, &pd, &a, &ruleset); + break; + } + + default: + action = pf_test_state_other(&s, dir, kif, &pd); + if (action == PF_PASS) { +#if NPFSYNC + pfsync_update_state(s); +#endif + r = s->rule.ptr; + a = s->anchor.ptr; + log = s->log; + } else if (s == NULL) + action = pf_test_other(&r, &s, dir, kif, m, off, h, + &pd, &a, &ruleset); + break; + } + +done: + if (action == PF_PASS && h->ip_hl > 5 && + !((s && s->allow_opts) || r->allow_opts)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_SHORT); + log = 1; + DPFPRINTF(PF_DEBUG_MISC, + ("pf: dropping packet with ip options\n")); + } + +#ifdef ALTQ + if (action == PF_PASS && r->qid) { + struct m_tag *mtag; + struct altq_tag *atag; + + mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), MB_DONTWAIT); + if (mtag != NULL) { + atag = (struct altq_tag *)(mtag + 1); + if (pqid || pd.tos == IPTOS_LOWDELAY) + atag->qid = r->pqid; + else + atag->qid = r->qid; + /* add hints for ecn */ + atag->af = AF_INET; + atag->hdr = h; + m_tag_prepend(m, mtag); + } + } +#endif + + /* + * connections redirected to loopback should not match sockets + * bound specifically to loopback due to security implications, + * see tcp_input() and in_pcblookup_listen(). + */ + if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP || + pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && + (s->nat_rule.ptr->action == PF_RDR || + s->nat_rule.ptr->action == PF_BINAT) && + (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); + } + + m->m_pkthdr.pf_flags |= PF_MBUF_TRANSLATE_LOCALHOST; + + if (log) + PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, a, ruleset); + + kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len; + kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++; + + if (action == PF_PASS || r->action == PF_DROP) { + r->packets++; + r->bytes += pd.tot_len; + if (a != NULL) { + a->packets++; + a->bytes += pd.tot_len; + } + if (s != NULL) { + dirndx = (dir == s->direction) ? 0 : 1; + s->packets[dirndx]++; + s->bytes[dirndx] += pd.tot_len; + if (s->nat_rule.ptr != NULL) { + s->nat_rule.ptr->packets++; + s->nat_rule.ptr->bytes += pd.tot_len; + } + if (s->src_node != NULL) { + s->src_node->packets++; + s->src_node->bytes += pd.tot_len; + } + if (s->nat_src_node != NULL) { + s->nat_src_node->packets++; + s->nat_src_node->bytes += pd.tot_len; + } + } + tr = r; + nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; + if (nr != NULL) { + struct pf_addr *x; + /* + * XXX: we need to make sure that the addresses + * passed to pfr_update_stats() are the same than + * the addresses used during matching (pfr_match) + */ + if (r == &pf_default_rule) { + tr = nr; + x = (s == NULL || s->direction == dir) ? + &pd.baddr : &pd.naddr; + } else + x = (s == NULL || s->direction == dir) ? + &pd.naddr : &pd.baddr; + if (x == &pd.baddr || s == NULL) { + /* we need to change the address */ + if (dir == PF_OUT) + pd.src = x; + else + pd.dst = x; + } + } + if (tr->src.addr.type == PF_ADDR_TABLE) + pfr_update_stats(tr->src.addr.p.tbl, (s == NULL || + s->direction == dir) ? pd.src : pd.dst, pd.af, + pd.tot_len, dir == PF_OUT, r->action == PF_PASS, + tr->src.not); + if (tr->dst.addr.type == PF_ADDR_TABLE) + pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL || + s->direction == dir) ? pd.dst : pd.src, pd.af, + pd.tot_len, dir == PF_OUT, r->action == PF_PASS, + tr->dst.not); + } + + + if (action == PF_SYNPROXY_DROP) { + m_freem(*m0); + *m0 = NULL; + action = PF_PASS; + } else if (r->rt) + /* pf_route can free the mbuf causing *m0 to become NULL */ + pf_route(m0, r, dir, ifp, s); + + return (action); +} +#endif /* INET */ + +#ifdef INET6 +int +pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0) +{ + struct pfi_kif *kif; + u_short action, reason = 0, log = 0; + struct mbuf *m = *m0; + struct ip6_hdr *h; + struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr; + struct pf_state *s = NULL; + struct pf_ruleset *ruleset = NULL; + struct pf_pdesc pd; + int off, terminal = 0, dirndx; + + if (!pf_status.running || (m->m_pkthdr.pf_flags & PF_MBUF_GENERATED)) + return (PF_PASS); + + kif = pfi_index2kif[ifp->if_index]; + if (kif == NULL) + return (PF_DROP); + +#ifdef DIAGNOSTIC + if ((m->m_flags & M_PKTHDR) == 0) + panic("non-M_PKTHDR is passed to pf_test"); +#endif + + memset(&pd, 0, sizeof(pd)); + if (m->m_pkthdr.len < (int)sizeof(*h)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_SHORT); + log = 1; + goto done; + } + + /* We do IP header normalization and packet reassembly here */ + if (pf_normalize_ip6(m0, dir, kif, &reason) != PF_PASS) { + action = PF_DROP; + goto done; + } + m = *m0; + h = mtod(m, struct ip6_hdr *); + + pd.src = (struct pf_addr *)&h->ip6_src; + pd.dst = (struct pf_addr *)&h->ip6_dst; + PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET6); + pd.ip_sum = NULL; + pd.af = AF_INET6; + pd.tos = 0; + pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); + + off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr); + pd.proto = h->ip6_nxt; + do { + switch (pd.proto) { + case IPPROTO_FRAGMENT: + action = pf_test_fragment(&r, dir, kif, m, h, + &pd, &a, &ruleset); + if (action == PF_DROP) + REASON_SET(&reason, PFRES_FRAG); + goto done; + case IPPROTO_AH: + case IPPROTO_HOPOPTS: + case IPPROTO_ROUTING: + case IPPROTO_DSTOPTS: { + /* get next header and header length */ + struct ip6_ext opt6; + + if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6), + NULL, NULL, pd.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: IPv6 short opt\n")); + action = PF_DROP; + REASON_SET(&reason, PFRES_SHORT); + log = 1; + goto done; + } + if (pd.proto == IPPROTO_AH) + off += (opt6.ip6e_len + 2) * 4; + else + off += (opt6.ip6e_len + 1) * 8; + pd.proto = opt6.ip6e_nxt; + /* goto the next header */ + break; + } + default: + terminal++; + break; + } + } while (!terminal); + + switch (pd.proto) { + + case IPPROTO_TCP: { + struct tcphdr th; + + pd.hdr.tcp = &th; + if (!pf_pull_hdr(m, off, &th, sizeof(th), + &action, &reason, AF_INET6)) { + log = action != PF_PASS; + goto done; + } + if (dir == PF_IN && pf_check_proto_cksum(m, off, + ntohs(h->ip6_plen), IPPROTO_TCP, AF_INET6)) { + action = PF_DROP; + goto done; + } + pd.p_len = pd.tot_len - off - (th.th_off << 2); + action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd); + if (action == PF_DROP) + goto done; + action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, + &reason); + if (action == PF_PASS) { +#if NPFSYNC + pfsync_update_state(s); +#endif + r = s->rule.ptr; + a = s->anchor.ptr; + log = s->log; + } else if (s == NULL) + action = pf_test_tcp(&r, &s, dir, kif, + m, off, h, &pd, &a, &ruleset); + break; + } + + case IPPROTO_UDP: { + struct udphdr uh; + + pd.hdr.udp = &uh; + if (!pf_pull_hdr(m, off, &uh, sizeof(uh), + &action, &reason, AF_INET6)) { + log = action != PF_PASS; + goto done; + } + if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m, + off, ntohs(h->ip6_plen), IPPROTO_UDP, AF_INET6)) { + action = PF_DROP; + goto done; + } + if (uh.uh_dport == 0 || + ntohs(uh.uh_ulen) > m->m_pkthdr.len - off || + ntohs(uh.uh_ulen) < sizeof(struct udphdr)) { + action = PF_DROP; + goto done; + } + action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); + if (action == PF_PASS) { +#if NPFSYNC + pfsync_update_state(s); +#endif + r = s->rule.ptr; + a = s->anchor.ptr; + log = s->log; + } else if (s == NULL) + action = pf_test_udp(&r, &s, dir, kif, + m, off, h, &pd, &a, &ruleset); + break; + } + + case IPPROTO_ICMPV6: { + struct icmp6_hdr ih; + + pd.hdr.icmp6 = &ih; + if (!pf_pull_hdr(m, off, &ih, sizeof(ih), + &action, &reason, AF_INET6)) { + log = action != PF_PASS; + goto done; + } + if (dir == PF_IN && pf_check_proto_cksum(m, off, + ntohs(h->ip6_plen), IPPROTO_ICMPV6, AF_INET6)) { + action = PF_DROP; + goto done; + } + action = pf_test_state_icmp(&s, dir, kif, + m, off, h, &pd); + if (action == PF_PASS) { +#if NPFSYNC + pfsync_update_state(s); +#endif + r = s->rule.ptr; + a = s->anchor.ptr; + log = s->log; + } else if (s == NULL) + action = pf_test_icmp(&r, &s, dir, kif, + m, off, h, &pd, &a, &ruleset); + break; + } + + default: + action = pf_test_state_other(&s, dir, kif, &pd); + if (action == PF_PASS) { + r = s->rule.ptr; + a = s->anchor.ptr; + log = s->log; + } else if (s == NULL) + action = pf_test_other(&r, &s, dir, kif, m, off, h, + &pd, &a, &ruleset); + break; + } + +done: + /* XXX handle IPv6 options, if not allowed. not implemented. */ + +#ifdef ALTQ + if (action == PF_PASS && r->qid) { + struct m_tag *mtag; + struct altq_tag *atag; + + mtag = m_tag_get(PACKET_TAG_PF_QID, sizeof(*atag), MB_DONTWAIT); + if (mtag != NULL) { + atag = (struct altq_tag *)(mtag + 1); + if (pd.tos == IPTOS_LOWDELAY) + atag->qid = r->pqid; + else + atag->qid = r->qid; + /* add hints for ecn */ + atag->af = AF_INET6; + atag->hdr = h; + m_tag_prepend(m, mtag); + } + } +#endif + + if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP || + pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && + (s->nat_rule.ptr->action == PF_RDR || + s->nat_rule.ptr->action == PF_BINAT) && + IN6_IS_ADDR_LOOPBACK(&pd.dst->v6)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); + } + + m->m_pkthdr.pf_flags |= PF_MBUF_TRANSLATE_LOCALHOST; + + if (log) + PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, r, a, ruleset); + + kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len; + kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++; + + if (action == PF_PASS || r->action == PF_DROP) { + r->packets++; + r->bytes += pd.tot_len; + if (a != NULL) { + a->packets++; + a->bytes += pd.tot_len; + } + if (s != NULL) { + dirndx = (dir == s->direction) ? 0 : 1; + s->packets[dirndx]++; + s->bytes[dirndx] += pd.tot_len; + if (s->nat_rule.ptr != NULL) { + s->nat_rule.ptr->packets++; + s->nat_rule.ptr->bytes += pd.tot_len; + } + if (s->src_node != NULL) { + s->src_node->packets++; + s->src_node->bytes += pd.tot_len; + } + if (s->nat_src_node != NULL) { + s->nat_src_node->packets++; + s->nat_src_node->bytes += pd.tot_len; + } + } + tr = r; + nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; + if (nr != NULL) { + struct pf_addr *x; + /* + * XXX: we need to make sure that the addresses + * passed to pfr_update_stats() are the same than + * the addresses used during matching (pfr_match) + */ + if (r == &pf_default_rule) { + tr = nr; + x = (s == NULL || s->direction == dir) ? + &pd.baddr : &pd.naddr; + } else { + x = (s == NULL || s->direction == dir) ? + &pd.naddr : &pd.baddr; + } + if (x == &pd.baddr || s == NULL) { + if (dir == PF_OUT) + pd.src = x; + else + pd.dst = x; + } + } + if (tr->src.addr.type == PF_ADDR_TABLE) + pfr_update_stats(tr->src.addr.p.tbl, (s == NULL || + s->direction == dir) ? pd.src : pd.dst, pd.af, + pd.tot_len, dir == PF_OUT, r->action == PF_PASS, + tr->src.not); + if (tr->dst.addr.type == PF_ADDR_TABLE) + pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL || + s->direction == dir) ? pd.dst : pd.src, pd.af, + pd.tot_len, dir == PF_OUT, r->action == PF_PASS, + tr->dst.not); + } + + + if (action == PF_SYNPROXY_DROP) { + m_freem(*m0); + *m0 = NULL; + action = PF_PASS; + } else if (r->rt) + /* pf_route6 can free the mbuf causing *m0 to become NULL */ + pf_route6(m0, r, dir, ifp, s); + + return (action); +} +#endif /* INET6 */ diff --git a/sys/net/pf/pf_if.c b/sys/net/pf/pf_if.c new file mode 100644 index 0000000000..78b6a37d87 --- /dev/null +++ b/sys/net/pf/pf_if.c @@ -0,0 +1,973 @@ +/* $FreeBSD: src/sys/contrib/pf/net/pf_if.c,v 1.6 2004/09/14 15:20:24 mlaier Exp $ */ +/* $OpenBSD: pf_if.c,v 1.11 2004/03/15 11:38:23 cedric Exp $ */ +/* add $OpenBSD: pf_if.c,v 1.19 2004/08/11 12:06:44 henning Exp $ */ +/* $DragonFly: src/sys/net/pf/pf_if.c,v 1.1 2004/09/19 22:32:47 joerg Exp $ */ + +/* + * Copyright (c) 2004 The DragonFly Project. All rights reserved. + * + * Copyright (c) 2001 Daniel Hartmeier + * Copyright (c) 2003 Cedric Berger + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "opt_inet.h" +#include "opt_inet6.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include + +#ifdef INET6 +#include +#endif /* INET6 */ + +#define ACCEPT_FLAGS(oklist) \ + do { \ + if ((flags & ~(oklist)) & \ + PFI_FLAG_ALLMASK) \ + return (EINVAL); \ + } while (0) + +#define senderr(e) do { rv = (e); goto _bad; } while (0) + +struct pfi_kif **pfi_index2kif; +struct pfi_kif *pfi_self, *pfi_dummy; +int pfi_indexlim; +struct pfi_ifhead pfi_ifs; +struct pfi_statehead pfi_statehead; +int pfi_ifcnt; +vm_zone_t pfi_addr_pl; +long pfi_update = 1; +struct pfr_addr *pfi_buffer; +int pfi_buffer_cnt; +int pfi_buffer_max; +char pfi_reserved_anchor[PF_ANCHOR_NAME_SIZE] = + PF_RESERVED_ANCHOR; +char pfi_interface_ruleset[PF_RULESET_NAME_SIZE] = + PF_INTERFACE_RULESET; + +eventhandler_tag pfi_clone_cookie = NULL; +eventhandler_tag pfi_attach_cookie = NULL; +eventhandler_tag pfi_detach_cookie = NULL; + +void pfi_dynaddr_update(void *); +void pfi_kifaddr_update(void *); +void pfi_table_update(struct pfr_ktable *, struct pfi_kif *, + int, int); +void pfi_instance_add(struct ifnet *, int, int); +void pfi_address_add(struct sockaddr *, int, int); +int pfi_if_compare(struct pfi_kif *, struct pfi_kif *); +struct pfi_kif *pfi_if_create(const char *, struct pfi_kif *, int); +void pfi_copy_group(char *, const char *, int); +void pfi_dynamic_drivers(void); +void pfi_newgroup(const char *, int); +int pfi_skip_if(const char *, struct pfi_kif *, int); +int pfi_unmask(void *); +void pfi_dohooks(struct pfi_kif *); +void pfi_kifaddr_update_event(void *, struct ifnet *); +void pfi_attach_clone_event(void *, struct if_clone *); +void pfi_attach_ifnet_event(void *, struct ifnet *); +void pfi_detach_ifnet_event(void *, struct ifnet *); + +RB_PROTOTYPE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); +RB_GENERATE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); + +#define PFI_DYNAMIC_BUSES { "pcmcia", "cardbus", "uhub" } +#define PFI_BUFFER_MAX 0x10000 +MALLOC_DEFINE(PFI_MTYPE, "pf_if", "pf interface table"); + +void +pfi_initialize(void) +{ + struct ifnet *ifp; + + if (pfi_self != NULL) /* already initialized */ + return; + + TAILQ_INIT(&pfi_statehead); + pfi_buffer_max = 64; + pfi_buffer = malloc(pfi_buffer_max * sizeof(*pfi_buffer), + PFI_MTYPE, M_WAITOK); + pfi_self = pfi_if_create("self", NULL, PFI_IFLAG_GROUP); + pfi_dynamic_drivers(); + + TAILQ_FOREACH(ifp, &ifnet, if_link) { + if (ifp->if_dunit != IF_DUNIT_NONE) + pfi_attach_ifnet(ifp); + } + pfi_dummy = pfi_if_create("notyet", pfi_self, + PFI_IFLAG_GROUP | PFI_IFLAG_DYNAMIC); + pfi_attach_cookie = EVENTHANDLER_REGISTER(ifnet_arrival_event, + pfi_attach_ifnet_event, NULL, EVENTHANDLER_PRI_ANY); + pfi_detach_cookie = EVENTHANDLER_REGISTER(ifnet_departure_event, + pfi_detach_ifnet_event, NULL, EVENTHANDLER_PRI_ANY); + pfi_clone_cookie = EVENTHANDLER_REGISTER(if_clone_event, + pfi_attach_clone_event, NULL, EVENTHANDLER_PRI_ANY); +} + +void +pfi_cleanup(void) +{ + struct pfi_kif *p, key; + struct ifnet *ifp; + + EVENTHANDLER_DEREGISTER(ifnet_arrival_event, pfi_attach_cookie); + EVENTHANDLER_DEREGISTER(ifnet_departure_event, pfi_detach_cookie); + EVENTHANDLER_DEREGISTER(if_clone_event, pfi_clone_cookie); + + /* release PFI_IFLAG_INSTANCE */ + TAILQ_FOREACH(ifp, &ifnet, if_link) { + strlcpy(key.pfik_name, ifp->if_xname, sizeof(key.pfik_name)); + p = RB_FIND(pfi_ifhead, &pfi_ifs, &key); + if (p != NULL) + pfi_detach_ifnet(ifp); + } + + /* XXX clear all other interface group */ + while ((p = RB_MIN(pfi_ifhead, &pfi_ifs))) { + RB_REMOVE(pfi_ifhead, &pfi_ifs, p); + + free(p->pfik_ah_head, PFI_MTYPE); + free(p, PFI_MTYPE); + } + free(pfi_index2kif, PFI_MTYPE); + free(pfi_buffer, PFI_MTYPE); + pfi_index2kif = NULL; + pfi_buffer = NULL; + pfi_self = NULL; +} + +/* + * Wrapper functions for FreeBSD eventhandler + */ +void +pfi_kifaddr_update_event(void *arg, struct ifnet *ifp) +{ + struct pfi_kif *p = arg; + + /* + * Check to see if it is 'our' interface as we do not have per + * interface hooks and thus get an update for every interface. + */ + if (p && p->pfik_ifp == ifp) + pfi_kifaddr_update(p); +} + +void +pfi_attach_clone_event(void *arg __unused, struct if_clone *ifc) +{ + pfi_attach_clone(ifc); +} + +void +pfi_attach_ifnet_event(void *arg __unused, struct ifnet *ifp) +{ + if (ifp->if_dunit != IF_DUNIT_NONE) + pfi_attach_ifnet(ifp); +} + +void +pfi_detach_ifnet_event(void *arg __unused, struct ifnet *ifp) +{ + pfi_detach_ifnet(ifp); +} + +void +pfi_attach_clone(struct if_clone *ifc) +{ + pfi_initialize(); + pfi_newgroup(ifc->ifc_name, PFI_IFLAG_CLONABLE); +} + +void +pfi_attach_ifnet(struct ifnet *ifp) +{ + struct pfi_kif *p, *q, key; + int s; + int realname; + + pfi_initialize(); + s = splsoftnet(); + pfi_update++; + if (ifp->if_index >= pfi_indexlim) { + /* + * grow pfi_index2kif, similar to ifindex2ifnet code in if.c + */ + size_t m, n, oldlim; + struct pfi_kif **mp, **np; + + oldlim = pfi_indexlim; + if (pfi_indexlim == 0) + pfi_indexlim = 64; + while (ifp->if_index >= pfi_indexlim) + pfi_indexlim <<= 1; + + m = oldlim * sizeof(struct pfi_kif *); + mp = pfi_index2kif; + n = pfi_indexlim * sizeof(struct pfi_kif *); + np = malloc(n, PFI_MTYPE, M_NOWAIT); + if (np == NULL) + panic("pfi_attach_ifnet: " + "cannot allocate translation table"); + bzero(np, n); + if (mp != NULL) + bcopy(mp, np, m); + pfi_index2kif = np; + if (mp != NULL) + free(mp, PFI_MTYPE); + } + + strlcpy(key.pfik_name, ifp->if_xname, sizeof(key.pfik_name)); + p = RB_FIND(pfi_ifhead, &pfi_ifs, &key); + /* some additional trickery for placeholders */ + if ((p == NULL) || (p->pfik_parent == pfi_dummy)) { + /* are we looking at a renamed instance or not? */ + pfi_copy_group(key.pfik_name, ifp->if_xname, + sizeof(key.pfik_name)); + realname = (strncmp(key.pfik_name, ifp->if_dname, + sizeof(key.pfik_name)) == 0); + /* add group */ + /* we can change if_xname, hence use if_dname as group id */ + pfi_copy_group(key.pfik_name, ifp->if_dname, + sizeof(key.pfik_name)); + q = RB_FIND(pfi_ifhead, &pfi_ifs, &key); + if (q == NULL) + q = pfi_if_create(key.pfik_name, pfi_self, + PFI_IFLAG_GROUP|PFI_IFLAG_DYNAMIC); + else if (q->pfik_parent == pfi_dummy) { + q->pfik_parent = pfi_self; + q->pfik_flags = (PFI_IFLAG_GROUP | PFI_IFLAG_DYNAMIC); + } + if (q == NULL) + panic("pfi_attach_ifnet: " + "cannot allocate '%s' group", key.pfik_name); + + /* add/modify interface */ + if (p == NULL) + p = pfi_if_create(ifp->if_xname, q, + realname?PFI_IFLAG_INSTANCE:PFI_IFLAG_PLACEHOLDER); + else { + /* remove from the dummy group */ + /* XXX: copy stats? We should not have any!!! */ + pfi_dummy->pfik_delcnt++; + TAILQ_REMOVE(&pfi_dummy->pfik_grouphead, p, + pfik_instances); + /* move to the right group */ + p->pfik_parent = q; + q->pfik_addcnt++; + TAILQ_INSERT_TAIL(&q->pfik_grouphead, p, + pfik_instances); + if (realname) { + p->pfik_flags &= ~PFI_IFLAG_PLACEHOLDER; + p->pfik_flags |= PFI_IFLAG_INSTANCE; + } + } + if (p == NULL) + panic("pfi_attach_ifnet: " + "cannot allocate '%s' interface", ifp->if_xname); + } else + q = p->pfik_parent; + p->pfik_ifp = ifp; + p->pfik_flags |= PFI_IFLAG_ATTACHED; + p->pfik_ah_cookie = EVENTHANDLER_REGISTER(ifaddr_event, + pfi_kifaddr_update_event, p, EVENTHANDLER_PRI_ANY); + pfi_index2kif[ifp->if_index] = p; + pfi_dohooks(p); + splx(s); +} + +void +pfi_detach_ifnet(struct ifnet *ifp) +{ + struct pfi_kif *p, *q, key; + int s; + + strlcpy(key.pfik_name, ifp->if_xname, sizeof(key.pfik_name)); + + s = splsoftnet(); + pfi_update++; + p = RB_FIND(pfi_ifhead, &pfi_ifs, &key); + if (p == NULL) { + printf("pfi_detach_ifnet: cannot find %s", ifp->if_xname); + splx(s); + return; + } + EVENTHANDLER_DEREGISTER(ifaddr_event, p->pfik_ah_cookie); + q = p->pfik_parent; + p->pfik_ifp = NULL; + p->pfik_flags &= ~PFI_IFLAG_ATTACHED; + pfi_index2kif[ifp->if_index] = NULL; + pfi_dohooks(p); + pfi_maybe_destroy(p); + splx(s); +} + +struct pfi_kif * +pfi_lookup_create(const char *name) +{ + struct pfi_kif *p, *q, key; + int s; + + s = splsoftnet(); + p = pfi_lookup_if(name); + if (p == NULL) { + pfi_copy_group(key.pfik_name, name, sizeof(key.pfik_name)); + q = pfi_lookup_if(key.pfik_name); + if ((q != NULL) && (q->pfik_parent != pfi_dummy)) + p = pfi_if_create(name, q, PFI_IFLAG_INSTANCE); + else { + if (pfi_dummy == NULL) + panic("no 'notyet' dummy group"); + p = pfi_if_create(name, pfi_dummy, + PFI_IFLAG_PLACEHOLDER); + } + } + splx(s); + return (p); +} + +struct pfi_kif * +pfi_attach_rule(const char *name) +{ + struct pfi_kif *p; + + p = pfi_lookup_create(name); + if (p != NULL) + p->pfik_rules++; + return (p); +} + +void +pfi_detach_rule(struct pfi_kif *p) +{ + if (p == NULL) + return; + if (p->pfik_rules > 0) + p->pfik_rules--; + else + printf("pfi_detach_rule: reference count at 0\n"); + pfi_maybe_destroy(p); +} + +void +pfi_attach_state(struct pfi_kif *p) +{ + if (!p->pfik_states++) + TAILQ_INSERT_TAIL(&pfi_statehead, p, pfik_w_states); +} + +void +pfi_detach_state(struct pfi_kif *p) +{ + if (p == NULL) + return; + if (p->pfik_states <= 0) { + printf("pfi_detach_state: reference count <= 0\n"); + return; + } + if (!--p->pfik_states) + TAILQ_REMOVE(&pfi_statehead, p, pfik_w_states); + pfi_maybe_destroy(p); +} + +int +pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af) +{ + struct pfi_dynaddr *dyn; + char tblname[PF_TABLE_NAME_SIZE]; + struct pf_ruleset *ruleset = NULL; + int s, rv = 0; + + if (aw->type != PF_ADDR_DYNIFTL) + return (0); + dyn = pool_get(&pfi_addr_pl, PR_NOWAIT); + if (dyn == NULL) + return (1); + bzero(dyn, sizeof(*dyn)); + + s = splsoftnet(); + dyn->pfid_kif = pfi_attach_rule(aw->v.ifname); + if (dyn->pfid_kif == NULL) + senderr(1); + + dyn->pfid_net = pfi_unmask(&aw->v.a.mask); + if (af == AF_INET && dyn->pfid_net == 32) + dyn->pfid_net = 128; + strlcpy(tblname, aw->v.ifname, sizeof(tblname)); + if (aw->iflags & PFI_AFLAG_NETWORK) + strlcat(tblname, ":network", sizeof(tblname)); + if (aw->iflags & PFI_AFLAG_BROADCAST) + strlcat(tblname, ":broadcast", sizeof(tblname)); + if (aw->iflags & PFI_AFLAG_PEER) + strlcat(tblname, ":peer", sizeof(tblname)); + if (aw->iflags & PFI_AFLAG_NOALIAS) + strlcat(tblname, ":0", sizeof(tblname)); + if (dyn->pfid_net != 128) + snprintf(tblname + strlen(tblname), + sizeof(tblname) - strlen(tblname), "/%d", dyn->pfid_net); + ruleset = pf_find_or_create_ruleset(pfi_reserved_anchor, + pfi_interface_ruleset); + if (ruleset == NULL) + senderr(1); + + dyn->pfid_kt = pfr_attach_table(ruleset, tblname); + if (dyn->pfid_kt == NULL) + senderr(1); + + dyn->pfid_kt->pfrkt_flags |= PFR_TFLAG_ACTIVE; + dyn->pfid_iflags = aw->iflags; + dyn->pfid_af = af; + dyn->pfid_hook_cookie = hook_establish(dyn->pfid_kif->pfik_ah_head, 1, + pfi_dynaddr_update, dyn); + if (dyn->pfid_hook_cookie == NULL) + senderr(1); + + aw->p.dyn = dyn; + pfi_dynaddr_update(aw->p.dyn); + splx(s); + return (0); + +_bad: + if (dyn->pfid_kt != NULL) + pfr_detach_table(dyn->pfid_kt); + if (ruleset != NULL) + pf_remove_if_empty_ruleset(ruleset); + if (dyn->pfid_kif != NULL) + pfi_detach_rule(dyn->pfid_kif); + pool_put(&pfi_addr_pl, dyn); + splx(s); + return (rv); +} + +void +pfi_dynaddr_update(void *p) +{ + struct pfi_dynaddr *dyn = (struct pfi_dynaddr *)p; + struct pfi_kif *kif = dyn->pfid_kif; + struct pfr_ktable *kt = dyn->pfid_kt; + + if (dyn == NULL || kif == NULL || kt == NULL) + panic("pfi_dynaddr_update"); + if (kt->pfrkt_larg != pfi_update) { + /* this table needs to be brought up-to-date */ + pfi_table_update(kt, kif, dyn->pfid_net, dyn->pfid_iflags); + kt->pfrkt_larg = pfi_update; + } + pfr_dynaddr_update(kt, dyn); +} + +void +pfi_table_update(struct pfr_ktable *kt, struct pfi_kif *kif, int net, int flags) +{ + int e, size2 = 0; + struct pfi_kif *p; + struct pfr_table t; + + if ((kif->pfik_flags & PFI_IFLAG_INSTANCE) && kif->pfik_ifp == NULL) { + pfr_clr_addrs(&kt->pfrkt_t, NULL, 0); + return; + } + pfi_buffer_cnt = 0; + if ((kif->pfik_flags & PFI_IFLAG_INSTANCE)) + pfi_instance_add(kif->pfik_ifp, net, flags); + else if (strcmp(kif->pfik_name, "self")) { + TAILQ_FOREACH(p, &kif->pfik_grouphead, pfik_instances) + pfi_instance_add(p->pfik_ifp, net, flags); + } else { + RB_FOREACH(p, pfi_ifhead, &pfi_ifs) + if (p->pfik_flags & PFI_IFLAG_INSTANCE) + pfi_instance_add(p->pfik_ifp, net, flags); + } + t = kt->pfrkt_t; + t.pfrt_flags = 0; + if ((e = pfr_set_addrs(&t, pfi_buffer, pfi_buffer_cnt, &size2, + NULL, NULL, NULL, 0))) + printf("pfi_table_update: cannot set %d new addresses " + "into table %s: %d\n", pfi_buffer_cnt, kt->pfrkt_name, e); +} + +void +pfi_instance_add(struct ifnet *ifp, int net, int flags) +{ + struct ifaddr *ia; + int got4 = 0, got6 = 0; + int net2, af; + + if (ifp == NULL) + return; + TAILQ_FOREACH(ia, &ifp->if_addrlist, ifa_list) { + if (ia->ifa_addr == NULL) + continue; + af = ia->ifa_addr->sa_family; + if (af != AF_INET && af != AF_INET6) + continue; + /* + * XXX: For point-to-point interfaces, (ifname:0) and IPv4, + * jump over address without a proper route to work + * around a problem with ppp not fully removing the + * address used during IPCP. + */ + if ((ifp->if_flags & IFF_POINTOPOINT) && + !(ia->ifa_flags & IFA_ROUTE) && + (flags & PFI_AFLAG_NOALIAS) && (af == AF_INET)) + continue; + if ((flags & PFI_AFLAG_BROADCAST) && af == AF_INET6) + continue; + if ((flags & PFI_AFLAG_BROADCAST) && + !(ifp->if_flags & IFF_BROADCAST)) + continue; + if ((flags & PFI_AFLAG_PEER) && + !(ifp->if_flags & IFF_POINTOPOINT)) + continue; + if ((flags & PFI_AFLAG_NETWORK) && af == AF_INET6 && + IN6_IS_ADDR_LINKLOCAL( + &((struct sockaddr_in6 *)ia->ifa_addr)->sin6_addr)) + continue; + if (flags & PFI_AFLAG_NOALIAS) { + if (af == AF_INET && got4) + continue; + if (af == AF_INET6 && got6) + continue; + } + if (af == AF_INET) + got4 = 1; + else + got6 = 1; + net2 = net; + if (net2 == 128 && (flags & PFI_AFLAG_NETWORK)) { + if (af == AF_INET) { + net2 = pfi_unmask(&((struct sockaddr_in *) + ia->ifa_netmask)->sin_addr); + } else { + net2 = pfi_unmask(&((struct sockaddr_in6 *) + ia->ifa_netmask)->sin6_addr); + } + } + if (af == AF_INET && net2 > 32) + net2 = 32; + if (flags & PFI_AFLAG_BROADCAST) + pfi_address_add(ia->ifa_broadaddr, af, net2); + else if (flags & PFI_AFLAG_PEER) + pfi_address_add(ia->ifa_dstaddr, af, net2); + else + pfi_address_add(ia->ifa_addr, af, net2); + } +} + +void +pfi_address_add(struct sockaddr *sa, int af, int net) +{ + struct pfr_addr *p; + int i; + + if (pfi_buffer_cnt >= pfi_buffer_max) { + int new_max = pfi_buffer_max * 2; + + if (new_max > PFI_BUFFER_MAX) { + printf("pfi_address_add: address buffer full (%d/%d)\n", + pfi_buffer_cnt, PFI_BUFFER_MAX); + return; + } + p = malloc(new_max * sizeof(*pfi_buffer), PFI_MTYPE, + M_NOWAIT); + if (p == NULL) { + printf("pfi_address_add: no memory to grow buffer " + "(%d/%d)\n", pfi_buffer_cnt, PFI_BUFFER_MAX); + return; + } + memcpy(pfi_buffer, p, pfi_buffer_cnt * sizeof(*pfi_buffer)); + /* no need to zero buffer */ + free(pfi_buffer, PFI_MTYPE); + pfi_buffer = p; + pfi_buffer_max = new_max; + } + if (af == AF_INET && net > 32) + net = 128; + p = pfi_buffer + pfi_buffer_cnt++; + bzero(p, sizeof(*p)); + p->pfra_af = af; + p->pfra_net = net; + if (af == AF_INET) + p->pfra_ip4addr = ((struct sockaddr_in *)sa)->sin_addr; + if (af == AF_INET6) { + p->pfra_ip6addr = ((struct sockaddr_in6 *)sa)->sin6_addr; + if (IN6_IS_ADDR_LINKLOCAL(&p->pfra_ip6addr)) + p->pfra_ip6addr.s6_addr16[1] = 0; + } + /* mask network address bits */ + if (net < 128) + ((caddr_t)p)[p->pfra_net/8] &= ~(0xFF >> (p->pfra_net%8)); + for (i = (p->pfra_net+7)/8; i < sizeof(p->pfra_u); i++) + ((caddr_t)p)[i] = 0; +} + +void +pfi_dynaddr_remove(struct pf_addr_wrap *aw) +{ + int s; + + if (aw->type != PF_ADDR_DYNIFTL || aw->p.dyn == NULL || + aw->p.dyn->pfid_kif == NULL || aw->p.dyn->pfid_kt == NULL) + return; + + s = splsoftnet(); + hook_disestablish(aw->p.dyn->pfid_kif->pfik_ah_head, + aw->p.dyn->pfid_hook_cookie); + pfi_detach_rule(aw->p.dyn->pfid_kif); + aw->p.dyn->pfid_kif = NULL; + pfr_detach_table(aw->p.dyn->pfid_kt); + aw->p.dyn->pfid_kt = NULL; + pool_put(&pfi_addr_pl, aw->p.dyn); + aw->p.dyn = NULL; + splx(s); +} + +void +pfi_dynaddr_copyout(struct pf_addr_wrap *aw) +{ + if (aw->type != PF_ADDR_DYNIFTL || aw->p.dyn == NULL || + aw->p.dyn->pfid_kif == NULL) + return; + aw->p.dyncnt = aw->p.dyn->pfid_acnt4 + aw->p.dyn->pfid_acnt6; +} + +void +pfi_kifaddr_update(void *v) +{ + int s; + + s = splsoftnet(); + pfi_update++; + pfi_dohooks(v); + splx(s); +} + +int +pfi_if_compare(struct pfi_kif *p, struct pfi_kif *q) +{ + return (strncmp(p->pfik_name, q->pfik_name, IFNAMSIZ)); +} + +struct pfi_kif * +pfi_if_create(const char *name, struct pfi_kif *q, int flags) +{ + struct pfi_kif *p; + + p = malloc(sizeof(*p), PFI_MTYPE, M_NOWAIT); + if (p == NULL) + return (NULL); + bzero(p, sizeof(*p)); + p->pfik_ah_head = malloc(sizeof(*p->pfik_ah_head), PFI_MTYPE, + M_NOWAIT); + if (p->pfik_ah_head == NULL) { + free(p, PFI_MTYPE); + return (NULL); + } + bzero(p->pfik_ah_head, sizeof(*p->pfik_ah_head)); + TAILQ_INIT(p->pfik_ah_head); + TAILQ_INIT(&p->pfik_grouphead); + strlcpy(p->pfik_name, name, sizeof(p->pfik_name)); + RB_INIT(&p->pfik_lan_ext); + RB_INIT(&p->pfik_ext_gwy); + p->pfik_flags = flags; + p->pfik_parent = q; + p->pfik_tzero = time_second; + + RB_INSERT(pfi_ifhead, &pfi_ifs, p); + if (q != NULL) { + q->pfik_addcnt++; + TAILQ_INSERT_TAIL(&q->pfik_grouphead, p, pfik_instances); + } + pfi_ifcnt++; + return (p); +} + +int +pfi_maybe_destroy(struct pfi_kif *p) +{ + int i, j, k, s; + struct pfi_kif *q = p->pfik_parent; + + if ((p->pfik_flags & (PFI_IFLAG_ATTACHED | PFI_IFLAG_GROUP)) || + p->pfik_rules > 0 || p->pfik_states > 0) + if (!(p->pfik_flags & PFI_IFLAG_PLACEHOLDER)) + return (0); + + s = splsoftnet(); + if (q != NULL) { + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) + for (k = 0; k < 2; k++) { + q->pfik_bytes[i][j][k] += + p->pfik_bytes[i][j][k]; + q->pfik_packets[i][j][k] += + p->pfik_packets[i][j][k]; + /* clear stats in case we return to the dummy group */ + p->pfik_bytes[i][j][k] = 0; + p->pfik_packets[i][j][k] = 0; + } + q->pfik_delcnt++; + TAILQ_REMOVE(&q->pfik_grouphead, p, pfik_instances); + } + if (p->pfik_rules > 0 || p->pfik_states > 0) { + /* move back to the dummy group */ + p->pfik_parent = pfi_dummy; + pfi_dummy->pfik_addcnt++; + TAILQ_INSERT_TAIL(&pfi_dummy->pfik_grouphead, p, + pfik_instances); + return (0); + } + pfi_ifcnt--; + RB_REMOVE(pfi_ifhead, &pfi_ifs, p); + splx(s); + + free(p->pfik_ah_head, PFI_MTYPE); + free(p, PFI_MTYPE); + return (1); +} + +void +pfi_copy_group(char *p, const char *q, int m) +{ + while (m > 1 && *q && !(*q >= '0' && *q <= '9')) { + *p++ = *q++; + m--; + } + if (m > 0) + *p++ = '\0'; +} + +void +pfi_dynamic_drivers(void) +{ + struct ifnet *ifp; + +/* + * For FreeBSD basically every interface is "dynamic" as we can unload + * modules e.g. + */ + TAILQ_FOREACH(ifp, &ifnet, if_link) { + if (ifp->if_dunit == IF_DUNIT_NONE) + continue; + pfi_newgroup(ifp->if_dname, PFI_IFLAG_DYNAMIC); + } +} + +void +pfi_newgroup(const char *name, int flags) +{ + struct pfi_kif *p; + + p = pfi_lookup_if(name); + if (p == NULL) + p = pfi_if_create(name, pfi_self, PFI_IFLAG_GROUP); + if (p == NULL) { + printf("pfi_newgroup: cannot allocate '%s' group", name); + return; + } + p->pfik_flags |= flags; +} + +void +pfi_fill_oldstatus(struct pf_status *pfs) +{ + struct pfi_kif *p, key; + int i, j, k, s; + + strlcpy(key.pfik_name, pfs->ifname, sizeof(key.pfik_name)); + s = splsoftnet(); + p = RB_FIND(pfi_ifhead, &pfi_ifs, &key); + if (p == NULL) { + splx(s); + return; + } + bzero(pfs->pcounters, sizeof(pfs->pcounters)); + bzero(pfs->bcounters, sizeof(pfs->bcounters)); + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) + for (k = 0; k < 2; k++) { + pfs->pcounters[i][j][k] = + p->pfik_packets[i][j][k]; + pfs->bcounters[i][j] += + p->pfik_bytes[i][j][k]; + } + splx(s); +} + +int +pfi_clr_istats(const char *name, int *nzero, int flags) +{ + struct pfi_kif *p; + int n = 0, s; + long tzero = time_second; + + s = splsoftnet(); + ACCEPT_FLAGS(PFI_FLAG_GROUP|PFI_FLAG_INSTANCE); + RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { + if (pfi_skip_if(name, p, flags)) + continue; + bzero(p->pfik_packets, sizeof(p->pfik_packets)); + bzero(p->pfik_bytes, sizeof(p->pfik_bytes)); + p->pfik_tzero = tzero; + n++; + } + splx(s); + if (nzero != NULL) + *nzero = n; + return (0); +} + +int +pfi_get_ifaces(const char *name, struct pfi_if *buf, int *size, int flags) +{ + struct pfi_kif *p; + int s, n = 0; + + ACCEPT_FLAGS(PFI_FLAG_GROUP|PFI_FLAG_INSTANCE); + s = splsoftnet(); + RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { + if (pfi_skip_if(name, p, flags)) + continue; + if (*size > n++) { + if (!p->pfik_tzero) + p->pfik_tzero = boottime.tv_sec; + if (copyout(p, buf++, sizeof(*buf))) { + splx(s); + return (EFAULT); + } + } + } + splx(s); + *size = n; + return (0); +} + +struct pfi_kif * +pfi_lookup_if(const char *name) +{ + struct pfi_kif *p, key; + + strlcpy(key.pfik_name, name, sizeof(key.pfik_name)); + p = RB_FIND(pfi_ifhead, &pfi_ifs, &key); + return (p); +} + +int +pfi_skip_if(const char *filter, struct pfi_kif *p, int f) +{ + int n; + + if ((p->pfik_flags & PFI_IFLAG_GROUP) && !(f & PFI_FLAG_GROUP)) + return (1); + if ((p->pfik_flags & PFI_IFLAG_INSTANCE) && !(f & PFI_FLAG_INSTANCE)) + return (1); + if (filter == NULL || !*filter) + return (0); + if (!strcmp(p->pfik_name, filter)) + return (0); /* exact match */ + n = strlen(filter); + if (n < 1 || n >= IFNAMSIZ) + return (1); /* sanity check */ + if (filter[n-1] >= '0' && filter[n-1] <= '9') + return (1); /* only do exact match in that case */ + if (strncmp(p->pfik_name, filter, n)) + return (1); /* prefix doesn't match */ + return (p->pfik_name[n] < '0' || p->pfik_name[n] > '9'); +} + +/* from pf_print_state.c */ +int +pfi_unmask(void *addr) +{ + struct pf_addr *m = addr; + int i = 31, j = 0, b = 0; + u_int32_t tmp; + + while (j < 4 && m->addr32[j] == 0xffffffff) { + b += 32; + j++; + } + if (j < 4) { + tmp = ntohl(m->addr32[j]); + for (i = 31; tmp & (1 << i); --i) + b++; + } + return (b); +} + +void +pfi_dohooks(struct pfi_kif *p) +{ + for (; p != NULL; p = p->pfik_parent) + dohooks(p->pfik_ah_head, 0); +} + +int +pfi_match_addr(struct pfi_dynaddr *dyn, struct pf_addr *a, sa_family_t af) +{ + if (af == AF_INET) { + switch (dyn->pfid_acnt4) { + case 0: + return (0); + case 1: + return (PF_MATCHA(0, &dyn->pfid_addr4, + &dyn->pfid_mask4, a, AF_INET)); + default: + return (pfr_match_addr(dyn->pfid_kt, a, AF_INET)); + } + } else { + switch (dyn->pfid_acnt6) { + case 0: + return (0); + case 1: + return (PF_MATCHA(0, &dyn->pfid_addr6, + &dyn->pfid_mask6, a, AF_INET6)); + default: + return (pfr_match_addr(dyn->pfid_kt, a, AF_INET6)); + } + } +} diff --git a/sys/net/pf/pf_ioctl.c b/sys/net/pf/pf_ioctl.c new file mode 100644 index 0000000000..9be2862623 --- /dev/null +++ b/sys/net/pf/pf_ioctl.c @@ -0,0 +1,3129 @@ +/* $FreeBSD: src/sys/contrib/pf/net/pf_ioctl.c,v 1.12 2004/08/12 14:15:42 mlaier Exp $ */ +/* $OpenBSD: pf_ioctl.c,v 1.112.2.2 2004/07/24 18:28:12 brad Exp $ */ +/* $DragonFly: src/sys/net/pf/pf_ioctl.c,v 1.1 2004/09/19 22:32:47 joerg Exp $ */ + +/* + * Copyright (c) 2004 The DragonFly Project. All rights reserved. + * + * Copyright (c) 2001 Daniel Hartmeier + * Copyright (c) 2002,2003 Henning Brauer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Effort sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F30602-01-2-0537. + * + */ + +#include "opt_inet.h" +#include "opt_inet6.h" +#include "use_pfsync.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#if NPFSYNC > 0 +#include +#endif /* NPFSYNC > 0 */ + +#ifdef INET6 +#include +#include +#endif /* INET6 */ + +#ifdef ALTQ +#include +#endif + +#include +#include +void init_zone_var(void); +void cleanup_pf_zone(void); +int pfattach(void); +int pfopen(dev_t, int, int, struct thread *); +int pfclose(dev_t, int, int, struct thread *); +struct pf_pool *pf_get_pool(char *, char *, u_int32_t, + u_int8_t, u_int32_t, u_int8_t, u_int8_t, u_int8_t); +int pf_get_ruleset_number(u_int8_t); +void pf_init_ruleset(struct pf_ruleset *); +void pf_mv_pool(struct pf_palist *, struct pf_palist *); +void pf_empty_pool(struct pf_palist *); +int pfioctl(dev_t, u_long, caddr_t, int, struct thread *); +#ifdef ALTQ +int pf_begin_altq(u_int32_t *); +int pf_rollback_altq(u_int32_t); +int pf_commit_altq(u_int32_t); +#endif /* ALTQ */ +int pf_begin_rules(u_int32_t *, int, char *, char *); +int pf_rollback_rules(u_int32_t, int, char *, char *); +int pf_commit_rules(u_int32_t, int, char *, char *); + +extern struct callout pf_expire_to; + +struct pf_rule pf_default_rule; + +#define TAGID_MAX 50000 +TAILQ_HEAD(pf_tags, pf_tagname) pf_tags = TAILQ_HEAD_INITIALIZER(pf_tags), + pf_qids = TAILQ_HEAD_INITIALIZER(pf_qids); + +#if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE) +#error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE +#endif +static u_int16_t tagname2tag(struct pf_tags *, char *); +static void tag2tagname(struct pf_tags *, u_int16_t, char *); +static void tag_unref(struct pf_tags *, u_int16_t); + +#define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x + +static dev_t pf_dev; + +/* + * XXX - These are new and need to be checked when moveing to a new version + */ +static void pf_clear_states(void); +static int pf_clear_tables(void); +static void pf_clear_srcnodes(void); +/* + * XXX - These are new and need to be checked when moveing to a new version + */ + +/* + * Wrapper functions for pfil(9) hooks + */ +static int pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, + int dir); +static int pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, + int dir); +#ifdef INET6 +static int pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, + int dir); +static int pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, + int dir); +#endif + +static int hook_pf(void); +static int dehook_pf(void); +static int shutdown_pf(void); +static int pf_load(void); +static int pf_unload(void); + +static struct cdevsw pf_cdevsw = { /* XXX convert to port model */ + .d_name = PF_NAME, + .d_maj = 73, /* XXX */ + .old_open = pfopen, + .old_close = pfclose, + .old_ioctl = pfioctl +}; + +static volatile int pf_pfil_hooked = 0; + +void +init_zone_var(void) +{ + pf_src_tree_pl = pf_rule_pl = NULL; + pf_state_pl = pf_altq_pl = pf_pooladdr_pl = NULL; + pf_frent_pl = pf_frag_pl = pf_cache_pl = pf_cent_pl = NULL; + pf_state_scrub_pl = NULL; + pfr_ktable_pl = pfr_kentry_pl = NULL; +} + +void +cleanup_pf_zone(void) +{ + ZONE_DESTROY(pf_src_tree_pl); + ZONE_DESTROY(pf_rule_pl); + ZONE_DESTROY(pf_state_pl); + ZONE_DESTROY(pf_altq_pl); + ZONE_DESTROY(pf_pooladdr_pl); + ZONE_DESTROY(pf_frent_pl); + ZONE_DESTROY(pf_frag_pl); + ZONE_DESTROY(pf_cache_pl); + ZONE_DESTROY(pf_cent_pl); + ZONE_DESTROY(pfr_ktable_pl); + ZONE_DESTROY(pfr_kentry_pl); + ZONE_DESTROY(pf_state_scrub_pl); + ZONE_DESTROY(pfi_addr_pl); +} + +int +pfattach(void) +{ + u_int32_t *my_timeout = pf_default_rule.timeout; + int error = 1; + + do { + ZONE_CREATE(pf_src_tree_pl,struct pf_src_node, "pfsrctrpl"); + ZONE_CREATE(pf_rule_pl, struct pf_rule, "pfrulepl"); + ZONE_CREATE(pf_state_pl, struct pf_state, "pfstatepl"); + ZONE_CREATE(pf_altq_pl, struct pf_altq, "pfaltqpl"); + ZONE_CREATE(pf_pooladdr_pl,struct pf_pooladdr, "pfpooladdrpl"); + ZONE_CREATE(pfr_ktable_pl, struct pfr_ktable, "pfrktable"); + ZONE_CREATE(pfr_kentry_pl, struct pfr_kentry, "pfrkentry"); + ZONE_CREATE(pf_frent_pl, struct pf_frent, "pffrent"); + ZONE_CREATE(pf_frag_pl, struct pf_fragment, "pffrag"); + ZONE_CREATE(pf_cache_pl, struct pf_fragment, "pffrcache"); + ZONE_CREATE(pf_cent_pl, struct pf_frcache, "pffrcent"); + ZONE_CREATE(pf_state_scrub_pl, struct pf_state_scrub, + "pfstatescrub"); + ZONE_CREATE(pfi_addr_pl, struct pfi_dynaddr, "pfiaddrpl"); + error = 0; + } while(0); + if (error) { + cleanup_pf_zone(); + return (error); + } + pfr_initialize(); + pfi_initialize(); + error = pf_osfp_initialize(); + if (error) { + cleanup_pf_zone(); + pf_osfp_cleanup(); + return (error); + } + + pf_pool_limits[PF_LIMIT_STATES].pp = pf_state_pl; + pf_pool_limits[PF_LIMIT_STATES].limit = PFSTATE_HIWAT; + pf_pool_limits[PF_LIMIT_FRAGS].pp = pf_frent_pl; + pf_pool_limits[PF_LIMIT_FRAGS].limit = PFFRAG_FRENT_HIWAT; + /* XXX uma_zone_set_max(pf_pool_limits[PF_LIMIT_STATES].pp, + pf_pool_limits[PF_LIMIT_STATES].limit); + */ + + RB_INIT(&tree_src_tracking); + TAILQ_INIT(&pf_anchors); + pf_init_ruleset(&pf_main_ruleset); + TAILQ_INIT(&pf_altqs[0]); + TAILQ_INIT(&pf_altqs[1]); + TAILQ_INIT(&pf_pabuf); + pf_altqs_active = &pf_altqs[0]; + pf_altqs_inactive = &pf_altqs[1]; + TAILQ_INIT(&state_updates); + + /* default rule should never be garbage collected */ + pf_default_rule.entries.tqe_prev = &pf_default_rule.entries.tqe_next; + pf_default_rule.action = PF_PASS; + pf_default_rule.nr = -1; + + /* initialize default timeouts */ + my_timeout[PFTM_TCP_FIRST_PACKET] = 120; /* First TCP packet */ + my_timeout[PFTM_TCP_OPENING] = 30; /* No response yet */ + my_timeout[PFTM_TCP_ESTABLISHED] = 24*60*60; /* Established */ + my_timeout[PFTM_TCP_CLOSING] = 15 * 60; /* Half closed */ + my_timeout[PFTM_TCP_FIN_WAIT] = 45; /* Got both FINs */ + my_timeout[PFTM_TCP_CLOSED] = 90; /* Got a RST */ + my_timeout[PFTM_UDP_FIRST_PACKET] = 60; /* First UDP packet */ + my_timeout[PFTM_UDP_SINGLE] = 30; /* Unidirectional */ + my_timeout[PFTM_UDP_MULTIPLE] = 60; /* Bidirectional */ + my_timeout[PFTM_ICMP_FIRST_PACKET] = 20; /* First ICMP packet */ + my_timeout[PFTM_ICMP_ERROR_REPLY] = 10; /* Got error response */ + my_timeout[PFTM_OTHER_FIRST_PACKET] = 60; /* First packet */ + my_timeout[PFTM_OTHER_SINGLE] = 30; /* Unidirectional */ + my_timeout[PFTM_OTHER_MULTIPLE] = 60; /* Bidirectional */ + my_timeout[PFTM_FRAG] = 30; /* Fragment expire */ + my_timeout[PFTM_INTERVAL] = 10; /* Expire interval */ + + callout_init(&pf_expire_to); + callout_reset(&pf_expire_to, my_timeout[PFTM_INTERVAL] * hz, + pf_purge_timeout, &pf_expire_to); + + pf_normalize_init(); + bzero(&pf_status, sizeof(pf_status)); + pf_status.debug = PF_DEBUG_URGENT; + pf_pfil_hooked = 0; + + /* XXX do our best to avoid a conflict */ + pf_status.hostid = arc4random(); + + return (error); +} + +int +pfopen(dev_t dev, int flags, int devtype, struct thread *td) +{ + if (minor(dev) >= 1) + return (ENXIO); + return (0); +} + +int +pfclose(dev_t dev, int flags, int fmt, struct thread *td) +{ + if (minor(dev) >= 1) + return (ENXIO); + return (0); +} + +struct pf_pool * +pf_get_pool(char *anchorname, char *rulesetname, u_int32_t ticket, + u_int8_t rule_action, u_int32_t rule_number, u_int8_t r_last, + u_int8_t active, u_int8_t check_ticket) +{ + struct pf_ruleset *ruleset; + struct pf_rule *rule; + int rs_num; + + ruleset = pf_find_ruleset(anchorname, rulesetname); + if (ruleset == NULL) + return (NULL); + rs_num = pf_get_ruleset_number(rule_action); + if (rs_num >= PF_RULESET_MAX) + return (NULL); + if (active) { + if (check_ticket && ticket != + ruleset->rules[rs_num].active.ticket) + return (NULL); + if (r_last) + rule = TAILQ_LAST(ruleset->rules[rs_num].active.ptr, + pf_rulequeue); + else + rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr); + } else { + if (check_ticket && ticket != + ruleset->rules[rs_num].inactive.ticket) + return (NULL); + if (r_last) + rule = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr, + pf_rulequeue); + else + rule = TAILQ_FIRST(ruleset->rules[rs_num].inactive.ptr); + } + if (!r_last) { + while ((rule != NULL) && (rule->nr != rule_number)) + rule = TAILQ_NEXT(rule, entries); + } + if (rule == NULL) + return (NULL); + + return (&rule->rpool); +} + +int +pf_get_ruleset_number(u_int8_t action) +{ + switch (action) { + case PF_SCRUB: + return (PF_RULESET_SCRUB); + break; + case PF_PASS: + case PF_DROP: + return (PF_RULESET_FILTER); + break; + case PF_NAT: + case PF_NONAT: + return (PF_RULESET_NAT); + break; + case PF_BINAT: + case PF_NOBINAT: + return (PF_RULESET_BINAT); + break; + case PF_RDR: + case PF_NORDR: + return (PF_RULESET_RDR); + break; + default: + return (PF_RULESET_MAX); + break; + } +} + +void +pf_init_ruleset(struct pf_ruleset *ruleset) +{ + int i; + + memset(ruleset, 0, sizeof(struct pf_ruleset)); + for (i = 0; i < PF_RULESET_MAX; i++) { + TAILQ_INIT(&ruleset->rules[i].queues[0]); + TAILQ_INIT(&ruleset->rules[i].queues[1]); + ruleset->rules[i].active.ptr = &ruleset->rules[i].queues[0]; + ruleset->rules[i].inactive.ptr = &ruleset->rules[i].queues[1]; + } +} + +struct pf_anchor * +pf_find_anchor(const char *anchorname) +{ + struct pf_anchor *anchor; + int n = -1; + + anchor = TAILQ_FIRST(&pf_anchors); + while (anchor != NULL && (n = strcmp(anchor->name, anchorname)) < 0) + anchor = TAILQ_NEXT(anchor, entries); + if (n == 0) + return (anchor); + else + return (NULL); +} + +struct pf_ruleset * +pf_find_ruleset(char *anchorname, char *rulesetname) +{ + struct pf_anchor *anchor; + struct pf_ruleset *ruleset; + + if (!anchorname[0] && !rulesetname[0]) + return (&pf_main_ruleset); + if (!anchorname[0] || !rulesetname[0]) + return (NULL); + anchorname[PF_ANCHOR_NAME_SIZE-1] = 0; + rulesetname[PF_RULESET_NAME_SIZE-1] = 0; + anchor = pf_find_anchor(anchorname); + if (anchor == NULL) + return (NULL); + ruleset = TAILQ_FIRST(&anchor->rulesets); + while (ruleset != NULL && strcmp(ruleset->name, rulesetname) < 0) + ruleset = TAILQ_NEXT(ruleset, entries); + if (ruleset != NULL && !strcmp(ruleset->name, rulesetname)) + return (ruleset); + else + return (NULL); +} + +struct pf_ruleset * +pf_find_or_create_ruleset(char anchorname[PF_ANCHOR_NAME_SIZE], + char rulesetname[PF_RULESET_NAME_SIZE]) +{ + struct pf_anchor *anchor, *a; + struct pf_ruleset *ruleset, *r; + + if (!anchorname[0] && !rulesetname[0]) + return (&pf_main_ruleset); + if (!anchorname[0] || !rulesetname[0]) + return (NULL); + anchorname[PF_ANCHOR_NAME_SIZE-1] = 0; + rulesetname[PF_RULESET_NAME_SIZE-1] = 0; + a = TAILQ_FIRST(&pf_anchors); + while (a != NULL && strcmp(a->name, anchorname) < 0) + a = TAILQ_NEXT(a, entries); + if (a != NULL && !strcmp(a->name, anchorname)) + anchor = a; + else { + anchor = (struct pf_anchor *)malloc(sizeof(struct pf_anchor), + M_TEMP, M_NOWAIT); + if (anchor == NULL) + return (NULL); + memset(anchor, 0, sizeof(struct pf_anchor)); + bcopy(anchorname, anchor->name, sizeof(anchor->name)); + TAILQ_INIT(&anchor->rulesets); + if (a != NULL) + TAILQ_INSERT_BEFORE(a, anchor, entries); + else + TAILQ_INSERT_TAIL(&pf_anchors, anchor, entries); + } + r = TAILQ_FIRST(&anchor->rulesets); + while (r != NULL && strcmp(r->name, rulesetname) < 0) + r = TAILQ_NEXT(r, entries); + if (r != NULL && !strcmp(r->name, rulesetname)) + return (r); + ruleset = (struct pf_ruleset *)malloc(sizeof(struct pf_ruleset), + M_TEMP, M_NOWAIT); + if (ruleset != NULL) { + pf_init_ruleset(ruleset); + bcopy(rulesetname, ruleset->name, sizeof(ruleset->name)); + ruleset->anchor = anchor; + if (r != NULL) + TAILQ_INSERT_BEFORE(r, ruleset, entries); + else + TAILQ_INSERT_TAIL(&anchor->rulesets, ruleset, entries); + } + return (ruleset); +} + +void +pf_remove_if_empty_ruleset(struct pf_ruleset *ruleset) +{ + struct pf_anchor *anchor; + int i; + + if (ruleset == NULL || ruleset->anchor == NULL || ruleset->tables > 0 || + ruleset->topen) + return; + for (i = 0; i < PF_RULESET_MAX; ++i) + if (!TAILQ_EMPTY(ruleset->rules[i].active.ptr) || + !TAILQ_EMPTY(ruleset->rules[i].inactive.ptr) || + ruleset->rules[i].inactive.open) + return; + + anchor = ruleset->anchor; + TAILQ_REMOVE(&anchor->rulesets, ruleset, entries); + free(ruleset, M_TEMP); + + if (TAILQ_EMPTY(&anchor->rulesets)) { + TAILQ_REMOVE(&pf_anchors, anchor, entries); + free(anchor, M_TEMP); + pf_update_anchor_rules(); + } +} + +void +pf_mv_pool(struct pf_palist *poola, struct pf_palist *poolb) +{ + struct pf_pooladdr *mv_pool_pa; + + while ((mv_pool_pa = TAILQ_FIRST(poola)) != NULL) { + TAILQ_REMOVE(poola, mv_pool_pa, entries); + TAILQ_INSERT_TAIL(poolb, mv_pool_pa, entries); + } +} + +void +pf_empty_pool(struct pf_palist *poola) +{ + struct pf_pooladdr *empty_pool_pa; + + while ((empty_pool_pa = TAILQ_FIRST(poola)) != NULL) { + pfi_dynaddr_remove(&empty_pool_pa->addr); + pf_tbladdr_remove(&empty_pool_pa->addr); + pfi_detach_rule(empty_pool_pa->kif); + TAILQ_REMOVE(poola, empty_pool_pa, entries); + pool_put(&pf_pooladdr_pl, empty_pool_pa); + } +} + +void +pf_rm_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule) +{ + if (rulequeue != NULL) { + if (rule->states <= 0) { + /* + * XXX - we need to remove the table *before* detaching + * the rule to make sure the table code does not delete + * the anchor under our feet. + */ + pf_tbladdr_remove(&rule->src.addr); + pf_tbladdr_remove(&rule->dst.addr); + } + TAILQ_REMOVE(rulequeue, rule, entries); + rule->entries.tqe_prev = NULL; + rule->nr = -1; + } + + if (rule->states > 0 || rule->src_nodes > 0 || + rule->entries.tqe_prev != NULL) + return; + pf_tag_unref(rule->tag); + pf_tag_unref(rule->match_tag); +#ifdef ALTQ + if (rule->pqid != rule->qid) + pf_qid_unref(rule->pqid); + pf_qid_unref(rule->qid); +#endif + pfi_dynaddr_remove(&rule->src.addr); + pfi_dynaddr_remove(&rule->dst.addr); + if (rulequeue == NULL) { + pf_tbladdr_remove(&rule->src.addr); + pf_tbladdr_remove(&rule->dst.addr); + } + pfi_detach_rule(rule->kif); + pf_empty_pool(&rule->rpool.list); + pool_put(&pf_rule_pl, rule); +} + +static u_int16_t +tagname2tag(struct pf_tags *head, char *tagname) +{ + struct pf_tagname *tag, *p = NULL; + u_int16_t new_tagid = 1; + + TAILQ_FOREACH(tag, head, entries) + if (strcmp(tagname, tag->name) == 0) { + tag->ref++; + return (tag->tag); + } + + /* + * to avoid fragmentation, we do a linear search from the beginning + * and take the first free slot we find. if there is none or the list + * is empty, append a new entry at the end. + */ + + /* new entry */ + if (!TAILQ_EMPTY(head)) + for (p = TAILQ_FIRST(head); p != NULL && + p->tag == new_tagid; p = TAILQ_NEXT(p, entries)) + new_tagid = p->tag + 1; + + if (new_tagid > TAGID_MAX) + return (0); + + /* allocate and fill new struct pf_tagname */ + tag = (struct pf_tagname *)malloc(sizeof(struct pf_tagname), + M_TEMP, M_NOWAIT); + if (tag == NULL) + return (0); + bzero(tag, sizeof(struct pf_tagname)); + strlcpy(tag->name, tagname, sizeof(tag->name)); + tag->tag = new_tagid; + tag->ref++; + + if (p != NULL) /* insert new entry before p */ + TAILQ_INSERT_BEFORE(p, tag, entries); + else /* either list empty or no free slot in between */ + TAILQ_INSERT_TAIL(head, tag, entries); + + return (tag->tag); +} + +static void +tag2tagname(struct pf_tags *head, u_int16_t tagid, char *p) +{ + struct pf_tagname *tag; + + TAILQ_FOREACH(tag, head, entries) + if (tag->tag == tagid) { + strlcpy(p, tag->name, PF_TAG_NAME_SIZE); + return; + } +} + +static void +tag_unref(struct pf_tags *head, u_int16_t tag) +{ + struct pf_tagname *p, *next; + + if (tag == 0) + return; + + for (p = TAILQ_FIRST(head); p != NULL; p = next) { + next = TAILQ_NEXT(p, entries); + if (tag == p->tag) { + if (--p->ref == 0) { + TAILQ_REMOVE(head, p, entries); + free(p, M_TEMP); + } + break; + } + } +} + +u_int16_t +pf_tagname2tag(char *tagname) +{ + return (tagname2tag(&pf_tags, tagname)); +} + +void +pf_tag2tagname(u_int16_t tagid, char *p) +{ + return (tag2tagname(&pf_tags, tagid, p)); +} + +void +pf_tag_unref(u_int16_t tag) +{ + return (tag_unref(&pf_tags, tag)); +} + +#ifdef ALTQ +u_int32_t +pf_qname2qid(char *qname) +{ + return ((u_int32_t)tagname2tag(&pf_qids, qname)); +} + +void +pf_qid2qname(u_int32_t qid, char *p) +{ + return (tag2tagname(&pf_qids, (u_int16_t)qid, p)); +} + +void +pf_qid_unref(u_int32_t qid) +{ + return (tag_unref(&pf_qids, (u_int16_t)qid)); +} + +int +pf_begin_altq(u_int32_t *ticket) +{ + struct pf_altq *altq; + int error = 0; + + /* Purge the old altq list */ + while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) { + TAILQ_REMOVE(pf_altqs_inactive, altq, entries); + if (altq->qname[0] == 0) { + /* detach and destroy the discipline */ + error = altq_remove(altq); + } else + pf_qid_unref(altq->qid); + pool_put(&pf_altq_pl, altq); + } + if (error) + return (error); + *ticket = ++ticket_altqs_inactive; + altqs_inactive_open = 1; + return (0); +} + +int +pf_rollback_altq(u_int32_t ticket) +{ + struct pf_altq *altq; + int error = 0; + + if (!altqs_inactive_open || ticket != ticket_altqs_inactive) + return (0); + /* Purge the old altq list */ + while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) { + TAILQ_REMOVE(pf_altqs_inactive, altq, entries); + if (altq->qname[0] == 0) { + /* detach and destroy the discipline */ + error = altq_remove(altq); + } else + pf_qid_unref(altq->qid); + pool_put(&pf_altq_pl, altq); + } + altqs_inactive_open = 0; + return (error); +} + +int +pf_commit_altq(u_int32_t ticket) +{ + struct pf_altqqueue *old_altqs; + struct pf_altq *altq; + int s, err, error = 0; + + if (!altqs_inactive_open || ticket != ticket_altqs_inactive) + return (EBUSY); + + /* swap altqs, keep the old. */ + s = splsoftnet(); + old_altqs = pf_altqs_active; + pf_altqs_active = pf_altqs_inactive; + pf_altqs_inactive = old_altqs; + ticket_altqs_active = ticket_altqs_inactive; + + /* Attach new disciplines */ + TAILQ_FOREACH(altq, pf_altqs_active, entries) { + if (altq->qname[0] == 0) { + /* attach the discipline */ + error = altq_pfattach(altq); + if (error) { + splx(s); + return (error); + } + } + } + + /* Purge the old altq list */ + while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) { + TAILQ_REMOVE(pf_altqs_inactive, altq, entries); + if (altq->qname[0] == 0) { + /* detach and destroy the discipline */ + err = altq_pfdetach(altq); + if (err != 0 && error == 0) + error = err; + err = altq_remove(altq); + if (err != 0 && error == 0) + error = err; + } else + pf_qid_unref(altq->qid); + pool_put(&pf_altq_pl, altq); + } + splx(s); + + altqs_inactive_open = 0; + return (error); +} +#endif /* ALTQ */ + +int +pf_begin_rules(u_int32_t *ticket, int rs_num, char *anchor, char *ruleset) +{ + struct pf_ruleset *rs; + struct pf_rule *rule; + + if (rs_num < 0 || rs_num >= PF_RULESET_MAX) + return (EINVAL); + rs = pf_find_or_create_ruleset(anchor, ruleset); + if (rs == NULL) + return (EINVAL); + while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) + pf_rm_rule(rs->rules[rs_num].inactive.ptr, rule); + *ticket = ++rs->rules[rs_num].inactive.ticket; + rs->rules[rs_num].inactive.open = 1; + return (0); +} + +int +pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor, char *ruleset) +{ + struct pf_ruleset *rs; + struct pf_rule *rule; + + if (rs_num < 0 || rs_num >= PF_RULESET_MAX) + return (EINVAL); + rs = pf_find_ruleset(anchor, ruleset); + if (rs == NULL || !rs->rules[rs_num].inactive.open || + rs->rules[rs_num].inactive.ticket != ticket) + return (0); + while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) + pf_rm_rule(rs->rules[rs_num].inactive.ptr, rule); + rs->rules[rs_num].inactive.open = 0; + return (0); +} + +int +pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor, char *ruleset) +{ + struct pf_ruleset *rs; + struct pf_rule *rule; + struct pf_rulequeue *old_rules; + int s; + + if (rs_num < 0 || rs_num >= PF_RULESET_MAX) + return (EINVAL); + rs = pf_find_ruleset(anchor, ruleset); + if (rs == NULL || !rs->rules[rs_num].inactive.open || + ticket != rs->rules[rs_num].inactive.ticket) + return (EBUSY); + + /* Swap rules, keep the old. */ + s = splsoftnet(); + old_rules = rs->rules[rs_num].active.ptr; + rs->rules[rs_num].active.ptr = + rs->rules[rs_num].inactive.ptr; + rs->rules[rs_num].inactive.ptr = old_rules; + rs->rules[rs_num].active.ticket = + rs->rules[rs_num].inactive.ticket; + pf_calc_skip_steps(rs->rules[rs_num].active.ptr); + + /* Purge the old rule list. */ + while ((rule = TAILQ_FIRST(old_rules)) != NULL) + pf_rm_rule(old_rules, rule); + rs->rules[rs_num].inactive.open = 0; + pf_remove_if_empty_ruleset(rs); + pf_update_anchor_rules(); + splx(s); + return (0); +} + +int +pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct thread *td) +{ + struct pf_pooladdr *pa = NULL; + struct pf_pool *pool = NULL; + int s; + int error = 0; + + /* XXX keep in sync with switch() below */ + if (securelevel > 1) + switch (cmd) { + case DIOCGETRULES: + case DIOCGETRULE: + case DIOCGETADDRS: + case DIOCGETADDR: + case DIOCGETSTATE: + case DIOCSETSTATUSIF: + case DIOCGETSTATUS: + case DIOCCLRSTATUS: + case DIOCNATLOOK: + case DIOCSETDEBUG: + case DIOCGETSTATES: + case DIOCGETTIMEOUT: + case DIOCCLRRULECTRS: + case DIOCGETLIMIT: + case DIOCGETALTQS: + case DIOCGETALTQ: + case DIOCGETQSTATS: + case DIOCGETANCHORS: + case DIOCGETANCHOR: + case DIOCGETRULESETS: + case DIOCGETRULESET: + case DIOCRGETTABLES: + case DIOCRGETTSTATS: + case DIOCRCLRTSTATS: + case DIOCRCLRADDRS: + case DIOCRADDADDRS: + case DIOCRDELADDRS: + case DIOCRSETADDRS: + case DIOCRGETADDRS: + case DIOCRGETASTATS: + case DIOCRCLRASTATS: + case DIOCRTSTADDRS: + case DIOCOSFPGET: + case DIOCGETSRCNODES: + case DIOCCLRSRCNODES: + case DIOCIGETIFACES: + case DIOCICLRISTATS: + case DIOCGIFSPEED: + break; + case DIOCRCLRTABLES: + case DIOCRADDTABLES: + case DIOCRDELTABLES: + case DIOCRSETTFLAGS: + if (((struct pfioc_table *)addr)->pfrio_flags & + PFR_FLAG_DUMMY) + break; /* dummy operation ok */ + return (EPERM); + default: + return (EPERM); + } + + if (!(flags & FWRITE)) + switch (cmd) { + case DIOCGETRULES: + case DIOCGETRULE: + case DIOCGETADDRS: + case DIOCGETADDR: + case DIOCGETSTATE: + case DIOCGETSTATUS: + case DIOCGETSTATES: + case DIOCGETTIMEOUT: + case DIOCGETLIMIT: + case DIOCGETALTQS: + case DIOCGETALTQ: + case DIOCGETQSTATS: + case DIOCGETANCHORS: + case DIOCGETANCHOR: + case DIOCGETRULESETS: + case DIOCGETRULESET: + case DIOCRGETTABLES: + case DIOCRGETTSTATS: + case DIOCRGETADDRS: + case DIOCRGETASTATS: + case DIOCRTSTADDRS: + case DIOCOSFPGET: + case DIOCGETSRCNODES: + case DIOCIGETIFACES: + case DIOCGIFSPEED: + break; + case DIOCRCLRTABLES: + case DIOCRADDTABLES: + case DIOCRDELTABLES: + case DIOCRCLRTSTATS: + case DIOCRCLRADDRS: + case DIOCRADDADDRS: + case DIOCRDELADDRS: + case DIOCRSETADDRS: + case DIOCRSETTFLAGS: + if (((struct pfioc_table *)addr)->pfrio_flags & + PFR_FLAG_DUMMY) + break; /* dummy operation ok */ + return (EACCES); + default: + return (EACCES); + } + + switch (cmd) { + + case DIOCSTART: + if (pf_status.running) + error = EEXIST; + else { + error = hook_pf(); + if (error) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: pfil registeration fail\n")); + break; + } + pf_status.running = 1; + pf_status.since = time_second; + if (pf_status.stateid == 0) { + pf_status.stateid = time_second; + pf_status.stateid = pf_status.stateid << 32; + } + DPFPRINTF(PF_DEBUG_MISC, ("pf: started\n")); + } + break; + + case DIOCSTOP: + if (!pf_status.running) + error = ENOENT; + else { + pf_status.running = 0; + error = dehook_pf(); + if (error) { + pf_status.running = 1; + DPFPRINTF(PF_DEBUG_MISC, + ("pf: pfil unregisteration failed\n")); + } + pf_status.since = time_second; + DPFPRINTF(PF_DEBUG_MISC, ("pf: stopped\n")); + } + break; + + case DIOCBEGINRULES: { + struct pfioc_rule *pr = (struct pfioc_rule *)addr; + + error = pf_begin_rules(&pr->ticket, pf_get_ruleset_number( + pr->rule.action), pr->anchor, pr->ruleset); + break; + } + + case DIOCADDRULE: { + struct pfioc_rule *pr = (struct pfioc_rule *)addr; + struct pf_ruleset *ruleset; + struct pf_rule *rule, *tail; + struct pf_pooladdr *pa; + int rs_num; + + ruleset = pf_find_ruleset(pr->anchor, pr->ruleset); + if (ruleset == NULL) { + error = EINVAL; + break; + } + rs_num = pf_get_ruleset_number(pr->rule.action); + if (rs_num >= PF_RULESET_MAX) { + error = EINVAL; + break; + } + if (pr->rule.anchorname[0] && ruleset != &pf_main_ruleset) { + error = EINVAL; + break; + } + if (pr->rule.return_icmp >> 8 > ICMP_MAXTYPE) { + error = EINVAL; + break; + } + if (pr->ticket != ruleset->rules[rs_num].inactive.ticket) { + error = EBUSY; + break; + } + if (pr->pool_ticket != ticket_pabuf) { + error = EBUSY; + break; + } + rule = pool_get(&pf_rule_pl, PR_NOWAIT); + if (rule == NULL) { + error = ENOMEM; + break; + } + bcopy(&pr->rule, rule, sizeof(struct pf_rule)); + rule->anchor = NULL; + rule->kif = NULL; + TAILQ_INIT(&rule->rpool.list); + /* initialize refcounting */ + rule->states = 0; + rule->src_nodes = 0; + rule->entries.tqe_prev = NULL; +#ifndef INET + if (rule->af == AF_INET) { + pool_put(&pf_rule_pl, rule); + error = EAFNOSUPPORT; + break; + } +#endif /* INET */ +#ifndef INET6 + if (rule->af == AF_INET6) { + pool_put(&pf_rule_pl, rule); + error = EAFNOSUPPORT; + break; + } +#endif /* INET6 */ + tail = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr, + pf_rulequeue); + if (tail) + rule->nr = tail->nr + 1; + else + rule->nr = 0; + if (rule->ifname[0]) { + rule->kif = pfi_attach_rule(rule->ifname); + if (rule->kif == NULL) { + pool_put(&pf_rule_pl, rule); + error = EINVAL; + break; + } + } + +#ifdef ALTQ + /* set queue IDs */ + if (rule->qname[0] != 0) { + if ((rule->qid = pf_qname2qid(rule->qname)) == 0) + error = EBUSY; + else if (rule->pqname[0] != 0) { + if ((rule->pqid = + pf_qname2qid(rule->pqname)) == 0) + error = EBUSY; + } else + rule->pqid = rule->qid; + } +#endif + if (rule->tagname[0]) + if ((rule->tag = pf_tagname2tag(rule->tagname)) == 0) + error = EBUSY; + if (rule->match_tagname[0]) + if ((rule->match_tag = + pf_tagname2tag(rule->match_tagname)) == 0) + error = EBUSY; + if (rule->rt && !rule->direction) + error = EINVAL; + if (pfi_dynaddr_setup(&rule->src.addr, rule->af)) + error = EINVAL; + if (pfi_dynaddr_setup(&rule->dst.addr, rule->af)) + error = EINVAL; + if (pf_tbladdr_setup(ruleset, &rule->src.addr)) + error = EINVAL; + if (pf_tbladdr_setup(ruleset, &rule->dst.addr)) + error = EINVAL; + TAILQ_FOREACH(pa, &pf_pabuf, entries) + if (pf_tbladdr_setup(ruleset, &pa->addr)) + error = EINVAL; + + pf_mv_pool(&pf_pabuf, &rule->rpool.list); + if (((((rule->action == PF_NAT) || (rule->action == PF_RDR) || + (rule->action == PF_BINAT)) && !rule->anchorname[0]) || + (rule->rt > PF_FASTROUTE)) && + (TAILQ_FIRST(&rule->rpool.list) == NULL)) + error = EINVAL; + + if (error) { + pf_rm_rule(NULL, rule); + break; + } + rule->rpool.cur = TAILQ_FIRST(&rule->rpool.list); + rule->evaluations = rule->packets = rule->bytes = 0; + TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr, + rule, entries); + break; + } + + case DIOCCOMMITRULES: { + struct pfioc_rule *pr = (struct pfioc_rule *)addr; + + error = pf_commit_rules(pr->ticket, pf_get_ruleset_number( + pr->rule.action), pr->anchor, pr->ruleset); + break; + } + + case DIOCGETRULES: { + struct pfioc_rule *pr = (struct pfioc_rule *)addr; + struct pf_ruleset *ruleset; + struct pf_rule *tail; + int rs_num; + + ruleset = pf_find_ruleset(pr->anchor, pr->ruleset); + if (ruleset == NULL) { + error = EINVAL; + break; + } + rs_num = pf_get_ruleset_number(pr->rule.action); + if (rs_num >= PF_RULESET_MAX) { + error = EINVAL; + break; + } + s = splsoftnet(); + tail = TAILQ_LAST(ruleset->rules[rs_num].active.ptr, + pf_rulequeue); + if (tail) + pr->nr = tail->nr + 1; + else + pr->nr = 0; + pr->ticket = ruleset->rules[rs_num].active.ticket; + splx(s); + break; + } + + case DIOCGETRULE: { + struct pfioc_rule *pr = (struct pfioc_rule *)addr; + struct pf_ruleset *ruleset; + struct pf_rule *rule; + int rs_num, i; + + ruleset = pf_find_ruleset(pr->anchor, pr->ruleset); + if (ruleset == NULL) { + error = EINVAL; + break; + } + rs_num = pf_get_ruleset_number(pr->rule.action); + if (rs_num >= PF_RULESET_MAX) { + error = EINVAL; + break; + } + if (pr->ticket != ruleset->rules[rs_num].active.ticket) { + error = EBUSY; + break; + } + s = splsoftnet(); + rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr); + while ((rule != NULL) && (rule->nr != pr->nr)) + rule = TAILQ_NEXT(rule, entries); + if (rule == NULL) { + error = EBUSY; + splx(s); + break; + } + bcopy(rule, &pr->rule, sizeof(struct pf_rule)); + pfi_dynaddr_copyout(&pr->rule.src.addr); + pfi_dynaddr_copyout(&pr->rule.dst.addr); + pf_tbladdr_copyout(&pr->rule.src.addr); + pf_tbladdr_copyout(&pr->rule.dst.addr); + for (i = 0; i < PF_SKIP_COUNT; ++i) + if (rule->skip[i].ptr == NULL) + pr->rule.skip[i].nr = -1; + else + pr->rule.skip[i].nr = + rule->skip[i].ptr->nr; + splx(s); + break; + } + + case DIOCCHANGERULE: { + struct pfioc_rule *pcr = (struct pfioc_rule *)addr; + struct pf_ruleset *ruleset; + struct pf_rule *oldrule = NULL, *newrule = NULL; + u_int32_t nr = 0; + int rs_num; + + if (!(pcr->action == PF_CHANGE_REMOVE || + pcr->action == PF_CHANGE_GET_TICKET) && + pcr->pool_ticket != ticket_pabuf) { + error = EBUSY; + break; + } + + if (pcr->action < PF_CHANGE_ADD_HEAD || + pcr->action > PF_CHANGE_GET_TICKET) { + error = EINVAL; + break; + } + ruleset = pf_find_ruleset(pcr->anchor, pcr->ruleset); + if (ruleset == NULL) { + error = EINVAL; + break; + } + rs_num = pf_get_ruleset_number(pcr->rule.action); + if (rs_num >= PF_RULESET_MAX) { + error = EINVAL; + break; + } + + if (pcr->action == PF_CHANGE_GET_TICKET) { + pcr->ticket = ++ruleset->rules[rs_num].active.ticket; + break; + } else { + if (pcr->ticket != + ruleset->rules[rs_num].active.ticket) { + error = EINVAL; + break; + } + if (pcr->rule.return_icmp >> 8 > ICMP_MAXTYPE) { + error = EINVAL; + break; + } + } + + if (pcr->action != PF_CHANGE_REMOVE) { + newrule = pool_get(&pf_rule_pl, PR_NOWAIT); + if (newrule == NULL) { + error = ENOMEM; + break; + } + bcopy(&pcr->rule, newrule, sizeof(struct pf_rule)); + TAILQ_INIT(&newrule->rpool.list); + /* initialize refcounting */ + newrule->states = 0; + newrule->entries.tqe_prev = NULL; +#ifndef INET + if (newrule->af == AF_INET) { + pool_put(&pf_rule_pl, newrule); + error = EAFNOSUPPORT; + break; + } +#endif /* INET */ +#ifndef INET6 + if (newrule->af == AF_INET6) { + pool_put(&pf_rule_pl, newrule); + error = EAFNOSUPPORT; + break; + } +#endif /* INET6 */ + if (newrule->ifname[0]) { + newrule->kif = pfi_attach_rule(newrule->ifname); + if (newrule->kif == NULL) { + pool_put(&pf_rule_pl, newrule); + error = EINVAL; + break; + } + } else + newrule->kif = NULL; + +#ifdef ALTQ + /* set queue IDs */ + if (newrule->qname[0] != 0) { + if ((newrule->qid = + pf_qname2qid(newrule->qname)) == 0) + error = EBUSY; + else if (newrule->pqname[0] != 0) { + if ((newrule->pqid = + pf_qname2qid(newrule->pqname)) == 0) + error = EBUSY; + } else + newrule->pqid = newrule->qid; + } +#endif + if (newrule->tagname[0]) + if ((newrule->tag = + pf_tagname2tag(newrule->tagname)) == 0) + error = EBUSY; + if (newrule->match_tagname[0]) + if ((newrule->match_tag = pf_tagname2tag( + newrule->match_tagname)) == 0) + error = EBUSY; + + if (newrule->rt && !newrule->direction) + error = EINVAL; + if (pfi_dynaddr_setup(&newrule->src.addr, newrule->af)) + error = EINVAL; + if (pfi_dynaddr_setup(&newrule->dst.addr, newrule->af)) + error = EINVAL; + if (pf_tbladdr_setup(ruleset, &newrule->src.addr)) + error = EINVAL; + if (pf_tbladdr_setup(ruleset, &newrule->dst.addr)) + error = EINVAL; + + pf_mv_pool(&pf_pabuf, &newrule->rpool.list); + if (((((newrule->action == PF_NAT) || + (newrule->action == PF_RDR) || + (newrule->action == PF_BINAT) || + (newrule->rt > PF_FASTROUTE)) && + !newrule->anchorname[0])) && + (TAILQ_FIRST(&newrule->rpool.list) == NULL)) + error = EINVAL; + + if (error) { + pf_rm_rule(NULL, newrule); + break; + } + newrule->rpool.cur = TAILQ_FIRST(&newrule->rpool.list); + newrule->evaluations = newrule->packets = 0; + newrule->bytes = 0; + } + pf_empty_pool(&pf_pabuf); + + s = splsoftnet(); + + if (pcr->action == PF_CHANGE_ADD_HEAD) + oldrule = TAILQ_FIRST( + ruleset->rules[rs_num].active.ptr); + else if (pcr->action == PF_CHANGE_ADD_TAIL) + oldrule = TAILQ_LAST( + ruleset->rules[rs_num].active.ptr, pf_rulequeue); + else { + oldrule = TAILQ_FIRST( + ruleset->rules[rs_num].active.ptr); + while ((oldrule != NULL) && (oldrule->nr != pcr->nr)) + oldrule = TAILQ_NEXT(oldrule, entries); + if (oldrule == NULL) { + pf_rm_rule(NULL, newrule); + error = EINVAL; + splx(s); + break; + } + } + + if (pcr->action == PF_CHANGE_REMOVE) + pf_rm_rule(ruleset->rules[rs_num].active.ptr, oldrule); + else { + if (oldrule == NULL) + TAILQ_INSERT_TAIL( + ruleset->rules[rs_num].active.ptr, + newrule, entries); + else if (pcr->action == PF_CHANGE_ADD_HEAD || + pcr->action == PF_CHANGE_ADD_BEFORE) + TAILQ_INSERT_BEFORE(oldrule, newrule, entries); + else + TAILQ_INSERT_AFTER( + ruleset->rules[rs_num].active.ptr, + oldrule, newrule, entries); + } + + nr = 0; + TAILQ_FOREACH(oldrule, + ruleset->rules[rs_num].active.ptr, entries) + oldrule->nr = nr++; + + pf_calc_skip_steps(ruleset->rules[rs_num].active.ptr); + pf_remove_if_empty_ruleset(ruleset); + pf_update_anchor_rules(); + + ruleset->rules[rs_num].active.ticket++; + splx(s); + break; + } + + case DIOCCLRSTATES: { + struct pf_state *state; + struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr; + int killed = 0; + + s = splsoftnet(); + RB_FOREACH(state, pf_state_tree_id, &tree_id) { + if (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname, + state->u.s.kif->pfik_name)) { + state->timeout = PFTM_PURGE; +#if NPFSYNC + /* don't send out individual delete messages */ + state->sync_flags = PFSTATE_NOSYNC; +#endif + killed++; + } + } + pf_purge_expired_states(); + pf_status.states = 0; + psk->psk_af = killed; +#if NPFSYNC + pfsync_clear_states(pf_status.hostid, psk->psk_ifname); +#endif + splx(s); + break; + } + + case DIOCKILLSTATES: { + struct pf_state *state; + struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr; + int killed = 0; + + s = splsoftnet(); + RB_FOREACH(state, pf_state_tree_id, &tree_id) { + if ((!psk->psk_af || state->af == psk->psk_af) + && (!psk->psk_proto || psk->psk_proto == + state->proto) && + PF_MATCHA(psk->psk_src.not, + &psk->psk_src.addr.v.a.addr, + &psk->psk_src.addr.v.a.mask, + &state->lan.addr, state->af) && + PF_MATCHA(psk->psk_dst.not, + &psk->psk_dst.addr.v.a.addr, + &psk->psk_dst.addr.v.a.mask, + &state->ext.addr, state->af) && + (psk->psk_src.port_op == 0 || + pf_match_port(psk->psk_src.port_op, + psk->psk_src.port[0], psk->psk_src.port[1], + state->lan.port)) && + (psk->psk_dst.port_op == 0 || + pf_match_port(psk->psk_dst.port_op, + psk->psk_dst.port[0], psk->psk_dst.port[1], + state->ext.port)) && + (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname, + state->u.s.kif->pfik_name))) { + state->timeout = PFTM_PURGE; + killed++; + } + } + pf_purge_expired_states(); + splx(s); + psk->psk_af = killed; + break; + } + + case DIOCADDSTATE: { + struct pfioc_state *ps = (struct pfioc_state *)addr; + struct pf_state *state; + struct pfi_kif *kif; + + if (ps->state.timeout >= PFTM_MAX && + ps->state.timeout != PFTM_UNTIL_PACKET) { + error = EINVAL; + break; + } + state = pool_get(&pf_state_pl, PR_NOWAIT); + if (state == NULL) { + error = ENOMEM; + break; + } + s = splsoftnet(); + kif = pfi_lookup_create(ps->state.u.ifname); + if (kif == NULL) { + pool_put(&pf_state_pl, state); + error = ENOENT; + splx(s); + break; + } + bcopy(&ps->state, state, sizeof(struct pf_state)); + bzero(&state->u, sizeof(state->u)); + state->rule.ptr = &pf_default_rule; + state->nat_rule.ptr = NULL; + state->anchor.ptr = NULL; + state->rt_kif = NULL; + state->creation = time_second; + state->pfsync_time = 0; + state->packets[0] = state->packets[1] = 0; + state->bytes[0] = state->bytes[1] = 0; + + if (pf_insert_state(kif, state)) { + pfi_maybe_destroy(kif); + pool_put(&pf_state_pl, state); + error = ENOMEM; + } + splx(s); + break; + } + + case DIOCGETSTATE: { + struct pfioc_state *ps = (struct pfioc_state *)addr; + struct pf_state *state; + u_int32_t nr; + + nr = 0; + s = splsoftnet(); + RB_FOREACH(state, pf_state_tree_id, &tree_id) { + if (nr >= ps->nr) + break; + nr++; + } + if (state == NULL) { + error = EBUSY; + splx(s); + break; + } + bcopy(state, &ps->state, sizeof(struct pf_state)); + ps->state.rule.nr = state->rule.ptr->nr; + ps->state.nat_rule.nr = (state->nat_rule.ptr == NULL) ? + -1 : state->nat_rule.ptr->nr; + ps->state.anchor.nr = (state->anchor.ptr == NULL) ? + -1 : state->anchor.ptr->nr; + splx(s); + ps->state.expire = pf_state_expires(state); + if (ps->state.expire > time_second) + ps->state.expire -= time_second; + else + ps->state.expire = 0; + break; + } + + case DIOCGETSTATES: { + struct pfioc_states *ps = (struct pfioc_states *)addr; + struct pf_state *state; + struct pf_state *p, pstore; + struct pfi_kif *kif; + u_int32_t nr = 0; + int space = ps->ps_len; + + if (space == 0) { + s = splsoftnet(); + TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) + nr += kif->pfik_states; + splx(s); + ps->ps_len = sizeof(struct pf_state) * nr; + return (0); + } + + s = splsoftnet(); + p = ps->ps_states; + TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) + RB_FOREACH(state, pf_state_tree_ext_gwy, + &kif->pfik_ext_gwy) { + int secs = time_second; + + if ((nr+1) * sizeof(*p) > (unsigned)ps->ps_len) + break; + + bcopy(state, &pstore, sizeof(pstore)); + strlcpy(pstore.u.ifname, kif->pfik_name, + sizeof(pstore.u.ifname)); + pstore.rule.nr = state->rule.ptr->nr; + pstore.nat_rule.nr = (state->nat_rule.ptr == + NULL) ? -1 : state->nat_rule.ptr->nr; + pstore.anchor.nr = (state->anchor.ptr == + NULL) ? -1 : state->anchor.ptr->nr; + pstore.creation = secs - pstore.creation; + pstore.expire = pf_state_expires(state); + if (pstore.expire > secs) + pstore.expire -= secs; + else + pstore.expire = 0; + error = copyout(&pstore, p, sizeof(*p)); + if (error) { + splx(s); + goto fail; + } + p++; + nr++; + } + ps->ps_len = sizeof(struct pf_state) * nr; + splx(s); + break; + } + + case DIOCGETSTATUS: { + struct pf_status *s = (struct pf_status *)addr; + bcopy(&pf_status, s, sizeof(struct pf_status)); + pfi_fill_oldstatus(s); + break; + } + + case DIOCSETSTATUSIF: { + struct pfioc_if *pi = (struct pfioc_if *)addr; + + if (pi->ifname[0] == 0) { + bzero(pf_status.ifname, IFNAMSIZ); + break; + } + if (ifunit(pi->ifname) == NULL) { + error = EINVAL; + break; + } + strlcpy(pf_status.ifname, pi->ifname, IFNAMSIZ); + break; + } + + case DIOCCLRSTATUS: { + bzero(pf_status.counters, sizeof(pf_status.counters)); + bzero(pf_status.fcounters, sizeof(pf_status.fcounters)); + bzero(pf_status.scounters, sizeof(pf_status.scounters)); + if (*pf_status.ifname) + pfi_clr_istats(pf_status.ifname, NULL, + PFI_FLAG_INSTANCE); + break; + } + + case DIOCNATLOOK: { + struct pfioc_natlook *pnl = (struct pfioc_natlook *)addr; + struct pf_state *state; + struct pf_state key; + int m = 0, direction = pnl->direction; + + key.af = pnl->af; + key.proto = pnl->proto; + + if (!pnl->proto || + PF_AZERO(&pnl->saddr, pnl->af) || + PF_AZERO(&pnl->daddr, pnl->af) || + !pnl->dport || !pnl->sport) + error = EINVAL; + else { + s = splsoftnet(); + + /* + * userland gives us source and dest of connection, + * reverse the lookup so we ask for what happens with + * the return traffic, enabling us to find it in the + * state tree. + */ + if (direction == PF_IN) { + PF_ACPY(&key.ext.addr, &pnl->daddr, pnl->af); + key.ext.port = pnl->dport; + PF_ACPY(&key.gwy.addr, &pnl->saddr, pnl->af); + key.gwy.port = pnl->sport; + state = pf_find_state_all(&key, PF_EXT_GWY, &m); + } else { + PF_ACPY(&key.lan.addr, &pnl->daddr, pnl->af); + key.lan.port = pnl->dport; + PF_ACPY(&key.ext.addr, &pnl->saddr, pnl->af); + key.ext.port = pnl->sport; + state = pf_find_state_all(&key, PF_LAN_EXT, &m); + } + if (m > 1) + error = E2BIG; /* more than one state */ + else if (state != NULL) { + if (direction == PF_IN) { + PF_ACPY(&pnl->rsaddr, &state->lan.addr, + state->af); + pnl->rsport = state->lan.port; + PF_ACPY(&pnl->rdaddr, &pnl->daddr, + pnl->af); + pnl->rdport = pnl->dport; + } else { + PF_ACPY(&pnl->rdaddr, &state->gwy.addr, + state->af); + pnl->rdport = state->gwy.port; + PF_ACPY(&pnl->rsaddr, &pnl->saddr, + pnl->af); + pnl->rsport = pnl->sport; + } + } else + error = ENOENT; + splx(s); + } + break; + } + + case DIOCSETTIMEOUT: { + struct pfioc_tm *pt = (struct pfioc_tm *)addr; + int old; + + if (pt->timeout < 0 || pt->timeout >= PFTM_MAX || + pt->seconds < 0) { + error = EINVAL; + goto fail; + } + old = pf_default_rule.timeout[pt->timeout]; + pf_default_rule.timeout[pt->timeout] = pt->seconds; + pt->seconds = old; + break; + } + + case DIOCGETTIMEOUT: { + struct pfioc_tm *pt = (struct pfioc_tm *)addr; + + if (pt->timeout < 0 || pt->timeout >= PFTM_MAX) { + error = EINVAL; + goto fail; + } + pt->seconds = pf_default_rule.timeout[pt->timeout]; + break; + } + + case DIOCGETLIMIT: { + struct pfioc_limit *pl = (struct pfioc_limit *)addr; + + if (pl->index < 0 || pl->index >= PF_LIMIT_MAX) { + error = EINVAL; + goto fail; + } + pl->limit = pf_pool_limits[pl->index].limit; + break; + } + + case DIOCSETLIMIT: { + struct pfioc_limit *pl = (struct pfioc_limit *)addr; + int old_limit; + + if (pl->index < 0 || pl->index >= PF_LIMIT_MAX || + pf_pool_limits[pl->index].pp == NULL) { + error = EINVAL; + goto fail; + } + + /* XXX Get an API to set limits on the zone/pool */ + old_limit = pf_pool_limits[pl->index].limit; + pf_pool_limits[pl->index].limit = pl->limit; + pl->limit = old_limit; + break; + } + + case DIOCSETDEBUG: { + u_int32_t *level = (u_int32_t *)addr; + + pf_status.debug = *level; + break; + } + + case DIOCCLRRULECTRS: { + struct pf_ruleset *ruleset = &pf_main_ruleset; + struct pf_rule *rule; + + s = splsoftnet(); + TAILQ_FOREACH(rule, + ruleset->rules[PF_RULESET_FILTER].active.ptr, entries) + rule->evaluations = rule->packets = + rule->bytes = 0; + splx(s); + break; + } + + case DIOCGIFSPEED: { + struct pf_ifspeed *psp = (struct pf_ifspeed *)addr; + struct pf_ifspeed ps; + struct ifnet *ifp; + + if (psp->ifname[0] != 0) { + /* Can we completely trust user-land? */ + strlcpy(ps.ifname, psp->ifname, IFNAMSIZ); + ifp = ifunit(ps.ifname); + if (ifp ) + psp->baudrate = ifp->if_baudrate; + else + error = EINVAL; + } else + error = EINVAL; + break; + } +#ifdef ALTQ + case DIOCSTARTALTQ: { + struct pf_altq *altq; + struct ifnet *ifp; + struct tb_profile tb; + + /* enable all altq interfaces on active list */ + s = splsoftnet(); + TAILQ_FOREACH(altq, pf_altqs_active, entries) { + if (altq->qname[0] == 0) { + if ((ifp = ifunit(altq->ifname)) == NULL) { + error = EINVAL; + break; + } + if (ifp->if_snd.altq_type != ALTQT_NONE) + error = altq_enable(&ifp->if_snd); + if (error != 0) + break; + /* set tokenbucket regulator */ + tb.rate = altq->ifbandwidth; + tb.depth = altq->tbrsize; + error = tbr_set(&ifp->if_snd, &tb); + if (error != 0) + break; + } + } + splx(s); + DPFPRINTF(PF_DEBUG_MISC, ("altq: started\n")); + break; + } + + case DIOCSTOPALTQ: { + struct pf_altq *altq; + struct ifnet *ifp; + struct tb_profile tb; + int err; + + /* disable all altq interfaces on active list */ + s = splsoftnet(); + TAILQ_FOREACH(altq, pf_altqs_active, entries) { + if (altq->qname[0] == 0) { + if ((ifp = ifunit(altq->ifname)) == NULL) { + error = EINVAL; + break; + } + if (ifp->if_snd.altq_type != ALTQT_NONE) { + err = altq_disable(&ifp->if_snd); + if (err != 0 && error == 0) + error = err; + } + /* clear tokenbucket regulator */ + tb.rate = 0; + err = tbr_set(&ifp->if_snd, &tb); + if (err != 0 && error == 0) + error = err; + } + } + splx(s); + DPFPRINTF(PF_DEBUG_MISC, ("altq: stopped\n")); + break; + } + + case DIOCBEGINALTQS: { + u_int32_t *ticket = (u_int32_t *)addr; + + error = pf_begin_altq(ticket); + break; + } + + case DIOCADDALTQ: { + struct pfioc_altq *pa = (struct pfioc_altq *)addr; + struct pf_altq *altq, *a; + + if (pa->ticket != ticket_altqs_inactive) { + error = EBUSY; + break; + } + altq = pool_get(&pf_altq_pl, PR_NOWAIT); + if (altq == NULL) { + error = ENOMEM; + break; + } + bcopy(&pa->altq, altq, sizeof(struct pf_altq)); + + /* + * if this is for a queue, find the discipline and + * copy the necessary fields + */ + if (altq->qname[0] != 0) { + if ((altq->qid = pf_qname2qid(altq->qname)) == 0) { + error = EBUSY; + pool_put(&pf_altq_pl, altq); + break; + } + TAILQ_FOREACH(a, pf_altqs_inactive, entries) { + if (strncmp(a->ifname, altq->ifname, + IFNAMSIZ) == 0 && a->qname[0] == 0) { + altq->altq_disc = a->altq_disc; + break; + } + } + } + + error = altq_add(altq); + if (error) { + pool_put(&pf_altq_pl, altq); + break; + } + + TAILQ_INSERT_TAIL(pf_altqs_inactive, altq, entries); + bcopy(altq, &pa->altq, sizeof(struct pf_altq)); + break; + } + + case DIOCCOMMITALTQS: { + u_int32_t ticket = *(u_int32_t *)addr; + + error = pf_commit_altq(ticket); + break; + } + + case DIOCGETALTQS: { + struct pfioc_altq *pa = (struct pfioc_altq *)addr; + struct pf_altq *altq; + + pa->nr = 0; + s = splsoftnet(); + TAILQ_FOREACH(altq, pf_altqs_active, entries) + pa->nr++; + pa->ticket = ticket_altqs_active; + splx(s); + break; + } + + case DIOCGETALTQ: { + struct pfioc_altq *pa = (struct pfioc_altq *)addr; + struct pf_altq *altq; + u_int32_t nr; + + if (pa->ticket != ticket_altqs_active) { + error = EBUSY; + break; + } + nr = 0; + s = splsoftnet(); + altq = TAILQ_FIRST(pf_altqs_active); + while ((altq != NULL) && (nr < pa->nr)) { + altq = TAILQ_NEXT(altq, entries); + nr++; + } + if (altq == NULL) { + error = EBUSY; + splx(s); + break; + } + bcopy(altq, &pa->altq, sizeof(struct pf_altq)); + splx(s); + break; + } + + case DIOCCHANGEALTQ: + /* CHANGEALTQ not supported yet! */ + error = ENODEV; + break; + + case DIOCGETQSTATS: { + struct pfioc_qstats *pq = (struct pfioc_qstats *)addr; + struct pf_altq *altq; + u_int32_t nr; + int nbytes; + + if (pq->ticket != ticket_altqs_active) { + error = EBUSY; + break; + } + nbytes = pq->nbytes; + nr = 0; + s = splsoftnet(); + altq = TAILQ_FIRST(pf_altqs_active); + while ((altq != NULL) && (nr < pq->nr)) { + altq = TAILQ_NEXT(altq, entries); + nr++; + } + if (altq == NULL) { + error = EBUSY; + splx(s); + break; + } + error = altq_getqstats(altq, pq->buf, &nbytes); + splx(s); + if (error == 0) { + pq->scheduler = altq->scheduler; + pq->nbytes = nbytes; + } + break; + } +#endif /* ALTQ */ + + case DIOCBEGINADDRS: { + struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; + + pf_empty_pool(&pf_pabuf); + pp->ticket = ++ticket_pabuf; + break; + } + + case DIOCADDADDR: { + struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; + +#ifndef INET + if (pp->af == AF_INET) { + error = EAFNOSUPPORT; + break; + } +#endif /* INET */ +#ifndef INET6 + if (pp->af == AF_INET6) { + error = EAFNOSUPPORT; + break; + } +#endif /* INET6 */ + if (pp->addr.addr.type != PF_ADDR_ADDRMASK && + pp->addr.addr.type != PF_ADDR_DYNIFTL && + pp->addr.addr.type != PF_ADDR_TABLE) { + error = EINVAL; + break; + } + pa = pool_get(&pf_pooladdr_pl, PR_NOWAIT); + if (pa == NULL) { + error = ENOMEM; + break; + } + bcopy(&pp->addr, pa, sizeof(struct pf_pooladdr)); + if (pa->ifname[0]) { + pa->kif = pfi_attach_rule(pa->ifname); + if (pa->kif == NULL) { + pool_put(&pf_pooladdr_pl, pa); + error = EINVAL; + break; + } + } + if (pfi_dynaddr_setup(&pa->addr, pp->af)) { + pfi_dynaddr_remove(&pa->addr); + pfi_detach_rule(pa->kif); + pool_put(&pf_pooladdr_pl, pa); + error = EINVAL; + break; + } + TAILQ_INSERT_TAIL(&pf_pabuf, pa, entries); + break; + } + + case DIOCGETADDRS: { + struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; + + pp->nr = 0; + s = splsoftnet(); + pool = pf_get_pool(pp->anchor, pp->ruleset, pp->ticket, + pp->r_action, pp->r_num, 0, 1, 0); + if (pool == NULL) { + error = EBUSY; + splx(s); + break; + } + TAILQ_FOREACH(pa, &pool->list, entries) + pp->nr++; + splx(s); + break; + } + + case DIOCGETADDR: { + struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; + u_int32_t nr = 0; + + s = splsoftnet(); + pool = pf_get_pool(pp->anchor, pp->ruleset, pp->ticket, + pp->r_action, pp->r_num, 0, 1, 1); + if (pool == NULL) { + error = EBUSY; + splx(s); + break; + } + pa = TAILQ_FIRST(&pool->list); + while ((pa != NULL) && (nr < pp->nr)) { + pa = TAILQ_NEXT(pa, entries); + nr++; + } + if (pa == NULL) { + error = EBUSY; + splx(s); + break; + } + bcopy(pa, &pp->addr, sizeof(struct pf_pooladdr)); + pfi_dynaddr_copyout(&pp->addr.addr); + pf_tbladdr_copyout(&pp->addr.addr); + splx(s); + break; + } + + case DIOCCHANGEADDR: { + struct pfioc_pooladdr *pca = (struct pfioc_pooladdr *)addr; + struct pf_pooladdr *oldpa = NULL, *newpa = NULL; + struct pf_ruleset *ruleset; + + if (pca->action < PF_CHANGE_ADD_HEAD || + pca->action > PF_CHANGE_REMOVE) { + error = EINVAL; + break; + } + if (pca->addr.addr.type != PF_ADDR_ADDRMASK && + pca->addr.addr.type != PF_ADDR_DYNIFTL && + pca->addr.addr.type != PF_ADDR_TABLE) { + error = EINVAL; + break; + } + + ruleset = pf_find_ruleset(pca->anchor, pca->ruleset); + if (ruleset == NULL) { + error = EBUSY; + break; + } + pool = pf_get_pool(pca->anchor, pca->ruleset, pca->ticket, + pca->r_action, pca->r_num, pca->r_last, 1, 1); + if (pool == NULL) { + error = EBUSY; + break; + } + if (pca->action != PF_CHANGE_REMOVE) { + newpa = pool_get(&pf_pooladdr_pl, PR_NOWAIT); + if (newpa == NULL) { + error = ENOMEM; + break; + } + bcopy(&pca->addr, newpa, sizeof(struct pf_pooladdr)); +#ifndef INET + if (pca->af == AF_INET) { + pool_put(&pf_pooladdr_pl, newpa); + error = EAFNOSUPPORT; + break; + } +#endif /* INET */ +#ifndef INET6 + if (pca->af == AF_INET6) { + pool_put(&pf_pooladdr_pl, newpa); + error = EAFNOSUPPORT; + break; + } +#endif /* INET6 */ + if (newpa->ifname[0]) { + newpa->kif = pfi_attach_rule(newpa->ifname); + if (newpa->kif == NULL) { + pool_put(&pf_pooladdr_pl, newpa); + error = EINVAL; + break; + } + } else + newpa->kif = NULL; + if (pfi_dynaddr_setup(&newpa->addr, pca->af) || + pf_tbladdr_setup(ruleset, &newpa->addr)) { + pfi_dynaddr_remove(&newpa->addr); + pfi_detach_rule(newpa->kif); + pool_put(&pf_pooladdr_pl, newpa); + error = EINVAL; + break; + } + } + + s = splsoftnet(); + + if (pca->action == PF_CHANGE_ADD_HEAD) + oldpa = TAILQ_FIRST(&pool->list); + else if (pca->action == PF_CHANGE_ADD_TAIL) + oldpa = TAILQ_LAST(&pool->list, pf_palist); + else { + int i = 0; + + oldpa = TAILQ_FIRST(&pool->list); + while ((oldpa != NULL) && (i < pca->nr)) { + oldpa = TAILQ_NEXT(oldpa, entries); + i++; + } + if (oldpa == NULL) { + error = EINVAL; + splx(s); + break; + } + } + + if (pca->action == PF_CHANGE_REMOVE) { + TAILQ_REMOVE(&pool->list, oldpa, entries); + pfi_dynaddr_remove(&oldpa->addr); + pf_tbladdr_remove(&oldpa->addr); + pfi_detach_rule(oldpa->kif); + pool_put(&pf_pooladdr_pl, oldpa); + } else { + if (oldpa == NULL) + TAILQ_INSERT_TAIL(&pool->list, newpa, entries); + else if (pca->action == PF_CHANGE_ADD_HEAD || + pca->action == PF_CHANGE_ADD_BEFORE) + TAILQ_INSERT_BEFORE(oldpa, newpa, entries); + else + TAILQ_INSERT_AFTER(&pool->list, oldpa, + newpa, entries); + } + + pool->cur = TAILQ_FIRST(&pool->list); + PF_ACPY(&pool->counter, &pool->cur->addr.v.a.addr, + pca->af); + splx(s); + break; + } + + case DIOCGETANCHORS: { + struct pfioc_anchor *pa = (struct pfioc_anchor *)addr; + struct pf_anchor *anchor; + + pa->nr = 0; + TAILQ_FOREACH(anchor, &pf_anchors, entries) + pa->nr++; + break; + } + + case DIOCGETANCHOR: { + struct pfioc_anchor *pa = (struct pfioc_anchor *)addr; + struct pf_anchor *anchor; + u_int32_t nr = 0; + + anchor = TAILQ_FIRST(&pf_anchors); + while (anchor != NULL && nr < pa->nr) { + anchor = TAILQ_NEXT(anchor, entries); + nr++; + } + if (anchor == NULL) + error = EBUSY; + else + bcopy(anchor->name, pa->name, sizeof(pa->name)); + break; + } + + case DIOCGETRULESETS: { + struct pfioc_ruleset *pr = (struct pfioc_ruleset *)addr; + struct pf_anchor *anchor; + struct pf_ruleset *ruleset; + + pr->anchor[PF_ANCHOR_NAME_SIZE-1] = 0; + if ((anchor = pf_find_anchor(pr->anchor)) == NULL) { + error = EINVAL; + break; + } + pr->nr = 0; + TAILQ_FOREACH(ruleset, &anchor->rulesets, entries) + pr->nr++; + break; + } + + case DIOCGETRULESET: { + struct pfioc_ruleset *pr = (struct pfioc_ruleset *)addr; + struct pf_anchor *anchor; + struct pf_ruleset *ruleset; + u_int32_t nr = 0; + + if ((anchor = pf_find_anchor(pr->anchor)) == NULL) { + error = EINVAL; + break; + } + ruleset = TAILQ_FIRST(&anchor->rulesets); + while (ruleset != NULL && nr < pr->nr) { + ruleset = TAILQ_NEXT(ruleset, entries); + nr++; + } + if (ruleset == NULL) + error = EBUSY; + else + bcopy(ruleset->name, pr->name, sizeof(pr->name)); + break; + } + + case DIOCRCLRTABLES: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != 0) { + error = ENODEV; + break; + } + error = pfr_clr_tables(&io->pfrio_table, &io->pfrio_ndel, + io->pfrio_flags | PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRADDTABLES: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof(struct pfr_table)) { + error = ENODEV; + break; + } + error = pfr_add_tables(io->pfrio_buffer, io->pfrio_size, + &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRDELTABLES: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof(struct pfr_table)) { + error = ENODEV; + break; + } + error = pfr_del_tables(io->pfrio_buffer, io->pfrio_size, + &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRGETTABLES: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof(struct pfr_table)) { + error = ENODEV; + break; + } + error = pfr_get_tables(&io->pfrio_table, io->pfrio_buffer, + &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRGETTSTATS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof(struct pfr_tstats)) { + error = ENODEV; + break; + } + error = pfr_get_tstats(&io->pfrio_table, io->pfrio_buffer, + &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRCLRTSTATS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof(struct pfr_table)) { + error = ENODEV; + break; + } + error = pfr_clr_tstats(io->pfrio_buffer, io->pfrio_size, + &io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRSETTFLAGS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof(struct pfr_table)) { + error = ENODEV; + break; + } + error = pfr_set_tflags(io->pfrio_buffer, io->pfrio_size, + io->pfrio_setflag, io->pfrio_clrflag, &io->pfrio_nchange, + &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRCLRADDRS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != 0) { + error = ENODEV; + break; + } + error = pfr_clr_addrs(&io->pfrio_table, &io->pfrio_ndel, + io->pfrio_flags | PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRADDADDRS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof(struct pfr_addr)) { + error = ENODEV; + break; + } + error = pfr_add_addrs(&io->pfrio_table, io->pfrio_buffer, + io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags | + PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRDELADDRS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof(struct pfr_addr)) { + error = ENODEV; + break; + } + error = pfr_del_addrs(&io->pfrio_table, io->pfrio_buffer, + io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags | + PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRSETADDRS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof(struct pfr_addr)) { + error = ENODEV; + break; + } + error = pfr_set_addrs(&io->pfrio_table, io->pfrio_buffer, + io->pfrio_size, &io->pfrio_size2, &io->pfrio_nadd, + &io->pfrio_ndel, &io->pfrio_nchange, io->pfrio_flags | + PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRGETADDRS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof(struct pfr_addr)) { + error = ENODEV; + break; + } + error = pfr_get_addrs(&io->pfrio_table, io->pfrio_buffer, + &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRGETASTATS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof(struct pfr_astats)) { + error = ENODEV; + break; + } + error = pfr_get_astats(&io->pfrio_table, io->pfrio_buffer, + &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRCLRASTATS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof(struct pfr_addr)) { + error = ENODEV; + break; + } + error = pfr_clr_astats(&io->pfrio_table, io->pfrio_buffer, + io->pfrio_size, &io->pfrio_nzero, io->pfrio_flags | + PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRTSTADDRS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof(struct pfr_addr)) { + error = ENODEV; + break; + } + error = pfr_tst_addrs(&io->pfrio_table, io->pfrio_buffer, + io->pfrio_size, &io->pfrio_nmatch, io->pfrio_flags | + PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRINABEGIN: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != 0) { + error = ENODEV; + break; + } + error = pfr_ina_begin(&io->pfrio_table, &io->pfrio_ticket, + &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRINACOMMIT: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != 0) { + error = ENODEV; + break; + } + error = pfr_ina_commit(&io->pfrio_table, io->pfrio_ticket, + &io->pfrio_nadd, &io->pfrio_nchange, io->pfrio_flags | + PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRINADEFINE: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof(struct pfr_addr)) { + error = ENODEV; + break; + } + error = pfr_ina_define(&io->pfrio_table, io->pfrio_buffer, + io->pfrio_size, &io->pfrio_nadd, &io->pfrio_naddr, + io->pfrio_ticket, io->pfrio_flags | PFR_FLAG_USERIOCTL); + break; + } + + case DIOCOSFPADD: { + struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr; + s = splsoftnet(); + error = pf_osfp_add(io); + splx(s); + break; + } + + case DIOCOSFPGET: { + struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr; + s = splsoftnet(); + error = pf_osfp_get(io); + splx(s); + break; + } + + case DIOCXBEGIN: { + struct pfioc_trans *io = (struct pfioc_trans *)addr; + struct pfioc_trans_e ioe; + struct pfr_table table; + int i; + + if (io->esize != sizeof(ioe)) { + error = ENODEV; + goto fail; + } + for (i = 0; i < io->size; i++) { + if (copyin(io->array+i, &ioe, sizeof(ioe))) { + error = EFAULT; + goto fail; + } + switch (ioe.rs_num) { +#ifdef ALTQ + case PF_RULESET_ALTQ: + if (ioe.anchor[0] || ioe.ruleset[0]) { + error = EINVAL; + goto fail; + } + if ((error = pf_begin_altq(&ioe.ticket))) + goto fail; + break; +#endif /* ALTQ */ + case PF_RULESET_TABLE: + bzero(&table, sizeof(table)); + strlcpy(table.pfrt_anchor, ioe.anchor, + sizeof(table.pfrt_anchor)); + strlcpy(table.pfrt_ruleset, ioe.ruleset, + sizeof(table.pfrt_ruleset)); + if ((error = pfr_ina_begin(&table, + &ioe.ticket, NULL, 0))) + goto fail; + break; + default: + if ((error = pf_begin_rules(&ioe.ticket, + ioe.rs_num, ioe.anchor, ioe.ruleset))) + goto fail; + break; + } + if (copyout(&ioe, io->array+i, sizeof(io->array[i]))) { + error = EFAULT; + goto fail; + } + } + break; + } + + case DIOCXROLLBACK: { + struct pfioc_trans *io = (struct pfioc_trans *)addr; + struct pfioc_trans_e ioe; + struct pfr_table table; + int i; + + if (io->esize != sizeof(ioe)) { + error = ENODEV; + goto fail; + } + for (i = 0; i < io->size; i++) { + if (copyin(io->array+i, &ioe, sizeof(ioe))) { + error = EFAULT; + goto fail; + } + switch (ioe.rs_num) { +#ifdef ALTQ + case PF_RULESET_ALTQ: + if (ioe.anchor[0] || ioe.ruleset[0]) { + error = EINVAL; + goto fail; + } + if ((error = pf_rollback_altq(ioe.ticket))) + goto fail; /* really bad */ + break; +#endif /* ALTQ */ + case PF_RULESET_TABLE: + bzero(&table, sizeof(table)); + strlcpy(table.pfrt_anchor, ioe.anchor, + sizeof(table.pfrt_anchor)); + strlcpy(table.pfrt_ruleset, ioe.ruleset, + sizeof(table.pfrt_ruleset)); + if ((error = pfr_ina_rollback(&table, + ioe.ticket, NULL, 0))) + goto fail; /* really bad */ + break; + default: + if ((error = pf_rollback_rules(ioe.ticket, + ioe.rs_num, ioe.anchor, ioe.ruleset))) + goto fail; /* really bad */ + break; + } + } + break; + } + + case DIOCXCOMMIT: { + struct pfioc_trans *io = (struct pfioc_trans *)addr; + struct pfioc_trans_e ioe; + struct pfr_table table; + struct pf_ruleset *rs; + int i; + + if (io->esize != sizeof(ioe)) { + error = ENODEV; + goto fail; + } + /* first makes sure everything will succeed */ + for (i = 0; i < io->size; i++) { + if (copyin(io->array+i, &ioe, sizeof(ioe))) { + error = EFAULT; + goto fail; + } + switch (ioe.rs_num) { +#ifdef ALTQ + case PF_RULESET_ALTQ: + if (ioe.anchor[0] || ioe.ruleset[0]) { + error = EINVAL; + goto fail; + } + if (!altqs_inactive_open || ioe.ticket != + ticket_altqs_inactive) { + error = EBUSY; + goto fail; + } + break; +#endif /* ALTQ */ + case PF_RULESET_TABLE: + rs = pf_find_ruleset(ioe.anchor, ioe.ruleset); + if (rs == NULL || !rs->topen || ioe.ticket != + rs->tticket) { + error = EBUSY; + goto fail; + } + break; + default: + if (ioe.rs_num < 0 || ioe.rs_num >= + PF_RULESET_MAX) { + error = EINVAL; + goto fail; + } + rs = pf_find_ruleset(ioe.anchor, ioe.ruleset); + if (rs == NULL || + !rs->rules[ioe.rs_num].inactive.open || + rs->rules[ioe.rs_num].inactive.ticket != + ioe.ticket) { + error = EBUSY; + goto fail; + } + break; + } + } + /* now do the commit - no errors should happen here */ + for (i = 0; i < io->size; i++) { + if (copyin(io->array+i, &ioe, sizeof(ioe))) { + error = EFAULT; + goto fail; + } + switch (ioe.rs_num) { +#ifdef ALTQ + case PF_RULESET_ALTQ: + if ((error = pf_commit_altq(ioe.ticket))) + goto fail; /* really bad */ + break; +#endif /* ALTQ */ + case PF_RULESET_TABLE: + bzero(&table, sizeof(table)); + strlcpy(table.pfrt_anchor, ioe.anchor, + sizeof(table.pfrt_anchor)); + strlcpy(table.pfrt_ruleset, ioe.ruleset, + sizeof(table.pfrt_ruleset)); + if ((error = pfr_ina_commit(&table, ioe.ticket, + NULL, NULL, 0))) + goto fail; /* really bad */ + break; + default: + if ((error = pf_commit_rules(ioe.ticket, + ioe.rs_num, ioe.anchor, ioe.ruleset))) + goto fail; /* really bad */ + break; + } + } + break; + } + + case DIOCGETSRCNODES: { + struct pfioc_src_nodes *psn = (struct pfioc_src_nodes *)addr; + struct pf_src_node *n; + struct pf_src_node *p, pstore; + u_int32_t nr = 0; + int space = psn->psn_len; + + if (space == 0) { + s = splsoftnet(); + RB_FOREACH(n, pf_src_tree, &tree_src_tracking) + nr++; + splx(s); + psn->psn_len = sizeof(struct pf_src_node) * nr; + return (0); + } + + s = splsoftnet(); + p = psn->psn_src_nodes; + RB_FOREACH(n, pf_src_tree, &tree_src_tracking) { + int secs = time_second; + + if ((nr + 1) * sizeof(*p) > (unsigned)psn->psn_len) + break; + + bcopy(n, &pstore, sizeof(pstore)); + if (n->rule.ptr != NULL) + pstore.rule.nr = n->rule.ptr->nr; + pstore.creation = secs - pstore.creation; + if (pstore.expire > secs) + pstore.expire -= secs; + else + pstore.expire = 0; + error = copyout(&pstore, p, sizeof(*p)); + if (error) { + splx(s); + goto fail; + } + p++; + nr++; + } + psn->psn_len = sizeof(struct pf_src_node) * nr; + splx(s); + break; + } + + case DIOCCLRSRCNODES: { + struct pf_src_node *n; + struct pf_state *state; + + s = splsoftnet(); + RB_FOREACH(state, pf_state_tree_id, &tree_id) { + state->src_node = NULL; + state->nat_src_node = NULL; + } + RB_FOREACH(n, pf_src_tree, &tree_src_tracking) { + n->expire = 1; + n->states = 0; + } + pf_purge_expired_src_nodes(); + pf_status.src_nodes = 0; + splx(s); + break; + } + + case DIOCSETHOSTID: { + u_int32_t *hostid = (u_int32_t *)addr; + + if (*hostid == 0) { + error = EINVAL; + goto fail; + } + pf_status.hostid = *hostid; + break; + } + + case DIOCOSFPFLUSH: + s = splsoftnet(); + pf_osfp_flush(); + splx(s); + break; + + case DIOCIGETIFACES: { + struct pfioc_iface *io = (struct pfioc_iface *)addr; + + if (io->pfiio_esize != sizeof(struct pfi_if)) { + error = ENODEV; + break; + } + error = pfi_get_ifaces(io->pfiio_name, io->pfiio_buffer, + &io->pfiio_size, io->pfiio_flags); + break; + } + + case DIOCICLRISTATS: { + struct pfioc_iface *io = (struct pfioc_iface *)addr; + + error = pfi_clr_istats(io->pfiio_name, &io->pfiio_nzero, + io->pfiio_flags); + break; + } + + default: + error = ENODEV; + break; + } +fail: + return (error); +} + +/* + * XXX - Check for version missmatch!!! + */ +static void +pf_clear_states(void) +{ + struct pf_state *state; + + RB_FOREACH(state, pf_state_tree_id, &tree_id) { + state->timeout = PFTM_PURGE; +#if NPFSYNC + /* don't send out individual delete messages */ + state->sync_flags = PFSTATE_NOSYNC; +#endif + } + pf_purge_expired_states(); + pf_status.states = 0; +#if 0 /* NPFSYNC */ +/* + * XXX This is called on module unload, we do not want to sync that over? */ + */ + pfsync_clear_states(pf_status.hostid, psk->psk_ifname); +#endif +} + +static int +pf_clear_tables(void) +{ + struct pfioc_table io; + int error; + + bzero(&io, sizeof(io)); + + error = pfr_clr_tables(&io.pfrio_table, &io.pfrio_ndel, + io.pfrio_flags); + + return (error); +} + +static void +pf_clear_srcnodes(void) +{ + struct pf_src_node *n; + struct pf_state *state; + + RB_FOREACH(state, pf_state_tree_id, &tree_id) { + state->src_node = NULL; + state->nat_src_node = NULL; + } + RB_FOREACH(n, pf_src_tree, &tree_src_tracking) { + n->expire = 1; + n->states = 0; + } + pf_purge_expired_src_nodes(); + pf_status.src_nodes = 0; +} +/* + * XXX - Check for version missmatch!!! + */ + +/* + * Duplicate pfctl -Fa operation to get rid of as much as we can. + */ +static int +shutdown_pf(void) +{ + int error = 0; + u_int32_t t[5]; + char nn = '\0'; + + callout_stop(&pf_expire_to); + + pf_status.running = 0; + do { + if ((error = pf_begin_rules(&t[0], PF_RULESET_SCRUB, &nn, + &nn)) != 0) { + DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: SCRUB\n")); + break; + } + if ((error = pf_begin_rules(&t[1], PF_RULESET_FILTER, &nn, + &nn)) != 0) { + DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: FILTER\n")); + break; /* XXX: rollback? */ + } + if ((error = pf_begin_rules(&t[2], PF_RULESET_NAT, &nn, &nn)) + != 0) { + DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: NAT\n")); + break; /* XXX: rollback? */ + } + if ((error = pf_begin_rules(&t[3], PF_RULESET_BINAT, &nn, &nn)) + != 0) { + DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: BINAT\n")); + break; /* XXX: rollback? */ + } + if ((error = pf_begin_rules(&t[4], PF_RULESET_RDR, &nn, &nn)) + != 0) { + DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: RDR\n")); + break; /* XXX: rollback? */ + } + + /* XXX: these should always succeed here */ + pf_commit_rules(t[0], PF_RULESET_SCRUB, &nn, &nn); + pf_commit_rules(t[1], PF_RULESET_FILTER, &nn, &nn); + pf_commit_rules(t[2], PF_RULESET_NAT, &nn, &nn); + pf_commit_rules(t[3], PF_RULESET_BINAT, &nn, &nn); + pf_commit_rules(t[4], PF_RULESET_RDR, &nn, &nn); + + if ((error = pf_clear_tables()) != 0) + break; + +#ifdef ALTQ + if ((error = pf_begin_altq(&t[0])) != 0) { + DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: ALTQ\n")); + break; + } + pf_commit_altq(t[0]); +#endif + + pf_clear_states(); + + pf_clear_srcnodes(); + + /* status does not use malloced mem so no need to cleanup */ + /* fingerprints and interfaces have their own cleanup code */ + } while(0); + + return (error); +} + +static int +pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir) +{ + /* + * DragonFly's version of pf uses FreeBSD's native host byte ordering + * for ip_len/ip_off. This is why we don't have to change byte order + * like the FreeBSD-5 version does. + */ + int chk; + + chk = pf_test(PF_IN, ifp, m); + if (chk && *m) { + m_freem(*m); + *m = NULL; + } + return chk; +} + +static int +pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir) +{ + /* + * DragonFly's version of pf uses FreeBSD's native host byte ordering + * for ip_len/ip_off. This is why we don't have to change byte order + * like the FreeBSD-5 version does. + */ + int chk; + + /* We need a proper CSUM befor we start (s. OpenBSD ip_output) */ + if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + in_delayed_cksum(*m); + (*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + } + chk = pf_test(PF_OUT, ifp, m); + if (chk && *m) { + m_freem(*m); + *m = NULL; + } + return chk; +} + +#ifdef INET6 +static int +pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir) +{ + /* + * IPv6 is not affected by ip_len/ip_off byte order changes. + */ + int chk; + + chk = pf_test6(PF_IN, ifp, m); + if (chk && *m) { + m_freem(*m); + *m = NULL; + } + return chk; +} + +static int +pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir) +{ + /* + * IPv6 is not affected by ip_len/ip_off byte order changes. + */ + int chk; + + /* We need a proper CSUM befor we start (s. OpenBSD ip_output) */ + if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + in_delayed_cksum(*m); + (*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + } + chk = pf_test6(PF_OUT, ifp, m); + if (chk && *m) { + m_freem(*m); + *m = NULL; + } + return chk; +} +#endif /* INET6 */ + +static int +hook_pf(void) +{ + struct pfil_head *pfh_inet; +#ifdef INET6 + struct pfil_head *pfh_inet6; +#endif + + if (pf_pfil_hooked) + return (0); + + pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); + if (pfh_inet == NULL) + return (ENODEV) + pfil_add_hook(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet); + pfil_add_hook(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet); +#ifdef INET6 + pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); + if (pfh_inet6 == NULL) { + pfil_remove_hook(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK, + pfh_inet); + pfil_remove_hook(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK, + pfh_inet); + return (ENODEV); + } + pfil_add_hook(pf_check6_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet6); + pfil_add_hook(pf_check6_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet6); +#endif + + pf_pfil_hooked = 1; + return (0); +} + +static int +dehook_pf(void) +{ + struct pfil_head *pfh_inet; +#ifdef INET6 + struct pfil_head *pfh_inet6; +#endif + + if (pf_pfil_hooked == 0) + return (0); + + pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); + if (pfh_inet == NULL) + return (ENODEV); + pfil_remove_hook(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK, + pfh_inet); + pfil_remove_hook(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK, + pfh_inet); +#ifdef INET6 + pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); + if (pfh_inet6 == NULL) + return (ENODEV); + pfil_remove_hook(pf_check6_in, NULL, PFIL_IN | PFIL_WAITOK, + pfh_inet6); + pfil_remove_hook(pf_check6_out, NULL, PFIL_OUT | PFIL_WAITOK, + pfh_inet6); +#endif + + pf_pfil_hooked = 0; + return (0); +} + +static int +pf_load(void) +{ + int error; + + init_zone_var(); + error = cdevsw_add(&pf_cdevsw, 0, 0); + if (error) + return (error); + pf_dev = make_dev(&pf_cdevsw, 0, 0, 0, 0600, PF_NAME); + error = pfattach(); + if (error) { + cdevsw_remove(&pf_cdevsw, 0, 0); + return (error); + } + return (0); +} + +static int +pf_unload(void) +{ + int error; + + pf_status.running = 0; + error = dehook_pf(); + if (error) { + /* + * Should not happen! + * XXX Due to error code ESRCH, kldunload will show + * a message like 'No such process'. + */ + printf("pfil unregisteration fail\n"); + return error; + } + shutdown_pf(); + pfi_cleanup(); + pf_osfp_flush(); + pf_osfp_cleanup(); + cleanup_pf_zone(); + cdevsw_remove(&pf_cdevsw, 0, 0); + return 0; +} + +static int +pf_modevent(module_t mod, int type, void *data) +{ + int error = 0; + + switch(type) { + case MOD_LOAD: + error = pf_load(); + break; + + case MOD_UNLOAD: + error = pf_unload(); + break; + default: + error = EINVAL; + break; + } + return error; +} + +static moduledata_t pf_mod = { + "pf", + pf_modevent, + 0 +}; + +DECLARE_MODULE(pf, pf_mod, SI_SUB_PSEUDO, SI_ORDER_FIRST); +MODULE_VERSION(pf, PF_MODVER); diff --git a/sys/net/pf/pf_norm.c b/sys/net/pf/pf_norm.c new file mode 100644 index 0000000000..71d90b484c --- /dev/null +++ b/sys/net/pf/pf_norm.c @@ -0,0 +1,1566 @@ +/* $FreeBSD: src/sys/contrib/pf/net/pf_norm.c,v 1.10 2004/08/14 15:32:40 dwmalone Exp $ */ +/* $OpenBSD: pf_norm.c,v 1.80.2.1 2004/04/30 21:46:33 brad Exp $ */ +/* add $OpenBSD: pf_norm.c,v 1.87 2004/05/11 07:34:11 dhartmei Exp $ */ +/* $DragonFly: src/sys/net/pf/pf_norm.c,v 1.1 2004/09/19 22:32:47 joerg Exp $ */ + +/* + * Copyright (c) 2004 The DragonFly Project. All rights reserved. + * + * Copyright 2001 Niels Provos + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "opt_inet.h" +#include "opt_inet6.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef INET6 +#include +#endif /* INET6 */ + +#include + +#ifdef INET6 +/* + * XXX: This should go to netinet/ip6.h (KAME) + */ +/* IPv6 options: common part */ +struct ip6_opt { + u_int8_t ip6o_type; + u_int8_t ip6o_len; +} __packed; + +/* Jumbo Payload Option */ +struct ip6_opt_jumbo { + u_int8_t ip6oj_type; + u_int8_t ip6oj_len; + u_int8_t ip6oj_jumbo_len[4]; +} __packed; + +/* NSAP Address Option */ +struct ip6_opt_nsap { + u_int8_t ip6on_type; + u_int8_t ip6on_len; + u_int8_t ip6on_src_nsap_len; + u_int8_t ip6on_dst_nsap_len; + /* followed by source NSAP */ + /* followed by destination NSAP */ +} __packed; + +/* Tunnel Limit Option */ +struct ip6_opt_tunnel { + u_int8_t ip6ot_type; + u_int8_t ip6ot_len; + u_int8_t ip6ot_encap_limit; +} __packed; + +/* Router Alert Option */ +struct ip6_opt_router { + u_int8_t ip6or_type; + u_int8_t ip6or_len; + u_int8_t ip6or_value[2]; +} __packed; +#endif /* INET6 */ + +#define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */ +#define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */ +#define PFFRAG_DROP 0x0004 /* Drop all fragments */ +#define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER)) + + +TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue; +TAILQ_HEAD(pf_cachequeue, pf_fragment) pf_cachequeue; + +static int pf_frag_compare(struct pf_fragment *, + struct pf_fragment *); +RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree; +RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); +RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); + +/* Private prototypes */ +void pf_ip2key(struct pf_fragment *, struct ip *); +void pf_remove_fragment(struct pf_fragment *); +void pf_flush_fragments(void); +void pf_free_fragment(struct pf_fragment *); +struct pf_fragment *pf_find_fragment(struct ip *, struct pf_frag_tree *); +struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **, + struct pf_frent *, int); +struct mbuf *pf_fragcache(struct mbuf **, struct ip*, + struct pf_fragment **, int, int, int *); +u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t); +int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *, + struct tcphdr *, int); + +#define DPFPRINTF(x) if (pf_status.debug >= PF_DEBUG_MISC) \ + { printf("%s: ", __func__); printf x ;} + +/* Globals */ +vm_zone_t pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl; +vm_zone_t pf_state_scrub_pl; +int pf_nfrents, pf_ncache; + +void +pf_normalize_init(void) +{ + /* XXX + pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT); + pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0); + pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0); + pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0); + */ + + TAILQ_INIT(&pf_fragqueue); + TAILQ_INIT(&pf_cachequeue); +} + +static int +pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b) +{ + int diff; + + if ((diff = a->fr_id - b->fr_id)) + return (diff); + else if ((diff = a->fr_p - b->fr_p)) + return (diff); + else if (a->fr_src.s_addr < b->fr_src.s_addr) + return (-1); + else if (a->fr_src.s_addr > b->fr_src.s_addr) + return (1); + else if (a->fr_dst.s_addr < b->fr_dst.s_addr) + return (-1); + else if (a->fr_dst.s_addr > b->fr_dst.s_addr) + return (1); + return (0); +} + +void +pf_purge_expired_fragments(void) +{ + struct pf_fragment *frag; + u_int32_t expire = time_second - + pf_default_rule.timeout[PFTM_FRAG]; + + while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) { + KASSERT((BUFFER_FRAGMENTS(frag)), + ("BUFFER_FRAGMENTS(frag) == 0: %s", __FUNCTION__)); + if (frag->fr_timeout > expire) + break; + + DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag)); + pf_free_fragment(frag); + } + + while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) { + KASSERT((!BUFFER_FRAGMENTS(frag)), + ("BUFFER_FRAGMENTS(frag) != 0: %s", __FUNCTION__)); + if (frag->fr_timeout > expire) + break; + + DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag)); + pf_free_fragment(frag); + KASSERT((TAILQ_EMPTY(&pf_cachequeue) || + TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag), + ("!(TAILQ_EMPTY() || TAILQ_LAST() == farg): %s", + __FUNCTION__)); + } +} + +/* + * Try to flush old fragments to make space for new ones + */ + +void +pf_flush_fragments(void) +{ + struct pf_fragment *frag; + int goal; + + goal = pf_nfrents * 9 / 10; + DPFPRINTF(("trying to free > %d frents\n", + pf_nfrents - goal)); + while (goal < pf_nfrents) { + frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue); + if (frag == NULL) + break; + pf_free_fragment(frag); + } + + + goal = pf_ncache * 9 / 10; + DPFPRINTF(("trying to free > %d cache entries\n", + pf_ncache - goal)); + while (goal < pf_ncache) { + frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue); + if (frag == NULL) + break; + pf_free_fragment(frag); + } +} + +/* Frees the fragments and all associated entries */ + +void +pf_free_fragment(struct pf_fragment *frag) +{ + struct pf_frent *frent; + struct pf_frcache *frcache; + + /* Free all fragments */ + if (BUFFER_FRAGMENTS(frag)) { + for (frent = LIST_FIRST(&frag->fr_queue); frent; + frent = LIST_FIRST(&frag->fr_queue)) { + LIST_REMOVE(frent, fr_next); + + m_freem(frent->fr_m); + pool_put(&pf_frent_pl, frent); + pf_nfrents--; + } + } else { + for (frcache = LIST_FIRST(&frag->fr_cache); frcache; + frcache = LIST_FIRST(&frag->fr_cache)) { + LIST_REMOVE(frcache, fr_next); + + KASSERT((LIST_EMPTY(&frag->fr_cache) || + LIST_FIRST(&frag->fr_cache)->fr_off > + frcache->fr_end), + ("! (LIST_EMPTY() || LIST_FIRST()->fr_off >" + " frcache->fr_end): %s", __FUNCTION__)); + + pool_put(&pf_cent_pl, frcache); + pf_ncache--; + } + } + + pf_remove_fragment(frag); +} + +void +pf_ip2key(struct pf_fragment *key, struct ip *ip) +{ + key->fr_p = ip->ip_p; + key->fr_id = ip->ip_id; + key->fr_src.s_addr = ip->ip_src.s_addr; + key->fr_dst.s_addr = ip->ip_dst.s_addr; +} + +struct pf_fragment * +pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree) +{ + struct pf_fragment key; + struct pf_fragment *frag; + + pf_ip2key(&key, ip); + + frag = RB_FIND(pf_frag_tree, tree, &key); + if (frag != NULL) { + /* XXX Are we sure we want to update the timeout? */ + frag->fr_timeout = time_second; + if (BUFFER_FRAGMENTS(frag)) { + TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); + TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); + } else { + TAILQ_REMOVE(&pf_cachequeue, frag, frag_next); + TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next); + } + } + + return (frag); +} + +/* Removes a fragment from the fragment queue and frees the fragment */ + +void +pf_remove_fragment(struct pf_fragment *frag) +{ + if (BUFFER_FRAGMENTS(frag)) { + RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag); + TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); + pool_put(&pf_frag_pl, frag); + } else { + RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag); + TAILQ_REMOVE(&pf_cachequeue, frag, frag_next); + pool_put(&pf_cache_pl, frag); + } +} + +#define FR_IP_OFF(fr) (((fr)->fr_ip->ip_off & IP_OFFMASK) << 3) +struct mbuf * +pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, + struct pf_frent *frent, int mff) +{ + struct mbuf *m = *m0, *m2; + struct pf_frent *frea, *next; + struct pf_frent *frep = NULL; + struct ip *ip = frent->fr_ip; + int hlen = ip->ip_hl << 2; + u_int16_t off = (ip->ip_off & IP_OFFMASK) << 3; + u_int16_t ip_len = ip->ip_len - ip->ip_hl * 4; + u_int16_t max = ip_len + off; + + KASSERT((*frag == NULL || BUFFER_FRAGMENTS(*frag)), + ("! (*frag == NULL || BUFFER_FRAGMENTS(*frag)): %s", __FUNCTION__)); + + /* Strip off ip header */ + m->m_data += hlen; + m->m_len -= hlen; + + /* Create a new reassembly queue for this packet */ + if (*frag == NULL) { + *frag = pool_get(&pf_frag_pl, PR_NOWAIT); + if (*frag == NULL) { + pf_flush_fragments(); + *frag = pool_get(&pf_frag_pl, PR_NOWAIT); + if (*frag == NULL) + goto drop_fragment; + } + + (*frag)->fr_flags = 0; + (*frag)->fr_max = 0; + (*frag)->fr_src = frent->fr_ip->ip_src; + (*frag)->fr_dst = frent->fr_ip->ip_dst; + (*frag)->fr_p = frent->fr_ip->ip_p; + (*frag)->fr_id = frent->fr_ip->ip_id; + (*frag)->fr_timeout = time_second; + LIST_INIT(&(*frag)->fr_queue); + + RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag); + TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next); + + /* We do not have a previous fragment */ + frep = NULL; + goto insert; + } + + /* + * Find a fragment after the current one: + * - off contains the real shifted offset. + */ + LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) { + if (FR_IP_OFF(frea) > off) + break; + frep = frea; + } + + KASSERT((frep != NULL || frea != NULL), + ("!(frep != NULL || frea != NULL): %s", __FUNCTION__));; + + if (frep != NULL && + FR_IP_OFF(frep) + frep->fr_ip->ip_len - frep->fr_ip->ip_hl * + 4 > off) + { + u_int16_t precut; + + precut = FR_IP_OFF(frep) + frep->fr_ip->ip_len - + frep->fr_ip->ip_hl * 4 - off; + if (precut >= ip_len) + goto drop_fragment; + m_adj(frent->fr_m, precut); + DPFPRINTF(("overlap -%d\n", precut)); + /* Enforce 8 byte boundaries */ + ip->ip_off = ip->ip_off + (precut >> 3); + off = (ip->ip_off & IP_OFFMASK) << 3; + ip_len -= precut; + ip->ip_len = ip_len; + } + + for (; frea != NULL && ip_len + off > FR_IP_OFF(frea); + frea = next) + { + u_int16_t aftercut; + + aftercut = ip_len + off - FR_IP_OFF(frea); + DPFPRINTF(("adjust overlap %d\n", aftercut)); + if (aftercut < frea->fr_ip->ip_len - frea->fr_ip->ip_hl + * 4) + { + frea->fr_ip->ip_len = + frea->fr_ip->ip_len - aftercut; + frea->fr_ip->ip_off = frea->fr_ip->ip_off + + (aftercut >> 3); + m_adj(frea->fr_m, aftercut); + break; + } + + /* This fragment is completely overlapped, loose it */ + next = LIST_NEXT(frea, fr_next); + m_freem(frea->fr_m); + LIST_REMOVE(frea, fr_next); + pool_put(&pf_frent_pl, frea); + pf_nfrents--; + } + + insert: + /* Update maximum data size */ + if ((*frag)->fr_max < max) + (*frag)->fr_max = max; + /* This is the last segment */ + if (!mff) + (*frag)->fr_flags |= PFFRAG_SEENLAST; + + if (frep == NULL) + LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next); + else + LIST_INSERT_AFTER(frep, frent, fr_next); + + /* Check if we are completely reassembled */ + if (!((*frag)->fr_flags & PFFRAG_SEENLAST)) + return (NULL); + + /* Check if we have all the data */ + off = 0; + for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) { + next = LIST_NEXT(frep, fr_next); + + off += frep->fr_ip->ip_len - frep->fr_ip->ip_hl * 4; + if (off < (*frag)->fr_max && + (next == NULL || FR_IP_OFF(next) != off)) + { + DPFPRINTF(("missing fragment at %d, next %d, max %d\n", + off, next == NULL ? -1 : FR_IP_OFF(next), + (*frag)->fr_max)); + return (NULL); + } + } + DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max)); + if (off < (*frag)->fr_max) + return (NULL); + + /* We have all the data */ + frent = LIST_FIRST(&(*frag)->fr_queue); + KASSERT((frent != NULL), ("frent == NULL: %s", __FUNCTION__)); + if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) { + DPFPRINTF(("drop: too big: %d\n", off)); + pf_free_fragment(*frag); + *frag = NULL; + return (NULL); + } + next = LIST_NEXT(frent, fr_next); + + /* Magic from ip_input */ + ip = frent->fr_ip; + m = frent->fr_m; + m2 = m->m_next; + m->m_next = NULL; + m_cat(m, m2); + pool_put(&pf_frent_pl, frent); + pf_nfrents--; + for (frent = next; frent != NULL; frent = next) { + next = LIST_NEXT(frent, fr_next); + + m2 = frent->fr_m; + pool_put(&pf_frent_pl, frent); + pf_nfrents--; + m_cat(m, m2); + } + + ip->ip_src = (*frag)->fr_src; + ip->ip_dst = (*frag)->fr_dst; + + /* Remove from fragment queue */ + pf_remove_fragment(*frag); + *frag = NULL; + + hlen = ip->ip_hl << 2; + ip->ip_len = off + hlen; + m->m_len += hlen; + m->m_data -= hlen; + + /* some debugging cruft by sklower, below, will go away soon */ + /* XXX this should be done elsewhere */ + if (m->m_flags & M_PKTHDR) { + int plen = 0; + for (m2 = m; m2; m2 = m2->m_next) + plen += m2->m_len; + m->m_pkthdr.len = plen; + } + + DPFPRINTF(("complete: %p(%d)\n", m, ip->ip_len)); + return (m); + + drop_fragment: + /* Oops - fail safe - drop packet */ + pool_put(&pf_frent_pl, frent); + pf_nfrents--; + m_freem(m); + return (NULL); +} + +struct mbuf * +pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, + int drop, int *nomem) +{ + struct mbuf *m = *m0; + struct pf_frcache *frp, *fra, *cur = NULL; + int ip_len = h->ip_len - (h->ip_hl << 2); + u_int16_t off = h->ip_off << 3; + u_int16_t max = ip_len + off; + int hosed = 0; + + KASSERT((*frag == NULL || !BUFFER_FRAGMENTS(*frag)), + ("!(*frag == NULL || !BUFFER_FRAGMENTS(*frag)): %s", __FUNCTION__)); + + /* Create a new range queue for this packet */ + if (*frag == NULL) { + *frag = pool_get(&pf_cache_pl, PR_NOWAIT); + if (*frag == NULL) { + pf_flush_fragments(); + *frag = pool_get(&pf_cache_pl, PR_NOWAIT); + if (*frag == NULL) + goto no_mem; + } + + /* Get an entry for the queue */ + cur = pool_get(&pf_cent_pl, PR_NOWAIT); + if (cur == NULL) { + pool_put(&pf_cache_pl, *frag); + *frag = NULL; + goto no_mem; + } + pf_ncache++; + + (*frag)->fr_flags = PFFRAG_NOBUFFER; + (*frag)->fr_max = 0; + (*frag)->fr_src = h->ip_src; + (*frag)->fr_dst = h->ip_dst; + (*frag)->fr_p = h->ip_p; + (*frag)->fr_id = h->ip_id; + (*frag)->fr_timeout = time_second; + + cur->fr_off = off; + cur->fr_end = max; + LIST_INIT(&(*frag)->fr_cache); + LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next); + + RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag); + TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next); + + DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, max)); + + goto pass; + } + + /* + * Find a fragment after the current one: + * - off contains the real shifted offset. + */ + frp = NULL; + LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) { + if (fra->fr_off > off) + break; + frp = fra; + } + + KASSERT((frp != NULL || fra != NULL), + ("!(frp != NULL || fra != NULL): %s", __FUNCTION__)); + + if (frp != NULL) { + int precut; + + precut = frp->fr_end - off; + if (precut >= ip_len) { + /* Fragment is entirely a duplicate */ + DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n", + h->ip_id, frp->fr_off, frp->fr_end, off, max)); + goto drop_fragment; + } + if (precut == 0) { + /* They are adjacent. Fixup cache entry */ + DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n", + h->ip_id, frp->fr_off, frp->fr_end, off, max)); + frp->fr_end = max; + } else if (precut > 0) { + /* The first part of this payload overlaps with a + * fragment that has already been passed. + * Need to trim off the first part of the payload. + * But to do so easily, we need to create another + * mbuf to throw the original header into. + */ + + DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n", + h->ip_id, precut, frp->fr_off, frp->fr_end, off, + max)); + + off += precut; + max -= precut; + /* Update the previous frag to encompass this one */ + frp->fr_end = max; + + if (!drop) { + /* XXX Optimization opportunity + * This is a very heavy way to trim the payload. + * we could do it much faster by diddling mbuf + * internals but that would be even less legible + * than this mbuf magic. For my next trick, + * I'll pull a rabbit out of my laptop. + */ + *m0 = m_dup(m, MB_DONTWAIT); + /* From KAME Project : We have missed this! */ + m_adj(*m0, (h->ip_hl << 2) - + (*m0)->m_pkthdr.len); + if (*m0 == NULL) + goto no_mem; + KASSERT(((*m0)->m_next == NULL), + ("(*m0)->m_next != NULL: %s", + __FUNCTION__)); + m_adj(m, precut + (h->ip_hl << 2)); + m_cat(*m0, m); + m = *m0; + if (m->m_flags & M_PKTHDR) { + int plen = 0; + struct mbuf *t; + for (t = m; t; t = t->m_next) + plen += t->m_len; + m->m_pkthdr.len = plen; + } + + + h = mtod(m, struct ip *); + + KASSERT(((int)m->m_len == + h->ip_len - precut), + ("m->m_len != h->ip_len - precut: %s", + __FUNCTION__)); + h->ip_off = h->ip_off + + (precut >> 3); + h->ip_len = h->ip_len - precut; + } else { + hosed++; + } + } else { + /* There is a gap between fragments */ + + DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n", + h->ip_id, -precut, frp->fr_off, frp->fr_end, off, + max)); + + cur = pool_get(&pf_cent_pl, PR_NOWAIT); + if (cur == NULL) + goto no_mem; + pf_ncache++; + + cur->fr_off = off; + cur->fr_end = max; + LIST_INSERT_AFTER(frp, cur, fr_next); + } + } + + if (fra != NULL) { + int aftercut; + int merge = 0; + + aftercut = max - fra->fr_off; + if (aftercut == 0) { + /* Adjacent fragments */ + DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n", + h->ip_id, off, max, fra->fr_off, fra->fr_end)); + fra->fr_off = off; + merge = 1; + } else if (aftercut > 0) { + /* Need to chop off the tail of this fragment */ + DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n", + h->ip_id, aftercut, off, max, fra->fr_off, + fra->fr_end)); + fra->fr_off = off; + max -= aftercut; + + merge = 1; + + if (!drop) { + m_adj(m, -aftercut); + if (m->m_flags & M_PKTHDR) { + int plen = 0; + struct mbuf *t; + for (t = m; t; t = t->m_next) + plen += t->m_len; + m->m_pkthdr.len = plen; + } + h = mtod(m, struct ip *); + KASSERT(((int)m->m_len == h->ip_len - aftercut), + ("m->m_len != h->ip_len - aftercut: %s", + __FUNCTION__)); + h->ip_len = h->ip_len - aftercut; + } else { + hosed++; + } + } else { + /* There is a gap between fragments */ + DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n", + h->ip_id, -aftercut, off, max, fra->fr_off, + fra->fr_end)); + + cur = pool_get(&pf_cent_pl, PR_NOWAIT); + if (cur == NULL) + goto no_mem; + pf_ncache++; + + cur->fr_off = off; + cur->fr_end = max; + LIST_INSERT_BEFORE(fra, cur, fr_next); + } + + + /* Need to glue together two separate fragment descriptors */ + if (merge) { + if (cur && fra->fr_off <= cur->fr_end) { + /* Need to merge in a previous 'cur' */ + DPFPRINTF(("fragcache[%d]: adjacent(merge " + "%d-%d) %d-%d (%d-%d)\n", + h->ip_id, cur->fr_off, cur->fr_end, off, + max, fra->fr_off, fra->fr_end)); + fra->fr_off = cur->fr_off; + LIST_REMOVE(cur, fr_next); + pool_put(&pf_cent_pl, cur); + pf_ncache--; + cur = NULL; + + } else if (frp && fra->fr_off <= frp->fr_end) { + /* Need to merge in a modified 'frp' */ + KASSERT((cur == NULL), ("cur != NULL: %s", + __FUNCTION__)); + DPFPRINTF(("fragcache[%d]: adjacent(merge " + "%d-%d) %d-%d (%d-%d)\n", + h->ip_id, frp->fr_off, frp->fr_end, off, + max, fra->fr_off, fra->fr_end)); + fra->fr_off = frp->fr_off; + LIST_REMOVE(frp, fr_next); + pool_put(&pf_cent_pl, frp); + pf_ncache--; + frp = NULL; + + } + } + } + + if (hosed) { + /* + * We must keep tracking the overall fragment even when + * we're going to drop it anyway so that we know when to + * free the overall descriptor. Thus we drop the frag late. + */ + goto drop_fragment; + } + + + pass: + /* Update maximum data size */ + if ((*frag)->fr_max < max) + (*frag)->fr_max = max; + + /* This is the last segment */ + if (!mff) + (*frag)->fr_flags |= PFFRAG_SEENLAST; + + /* Check if we are completely reassembled */ + if (((*frag)->fr_flags & PFFRAG_SEENLAST) && + LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 && + LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) { + /* Remove from fragment queue */ + DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id, + (*frag)->fr_max)); + pf_free_fragment(*frag); + *frag = NULL; + } + + return (m); + + no_mem: + *nomem = 1; + + /* Still need to pay attention to !IP_MF */ + if (!mff && *frag != NULL) + (*frag)->fr_flags |= PFFRAG_SEENLAST; + + m_freem(m); + return (NULL); + + drop_fragment: + + /* Still need to pay attention to !IP_MF */ + if (!mff && *frag != NULL) + (*frag)->fr_flags |= PFFRAG_SEENLAST; + + if (drop) { + /* This fragment has been deemed bad. Don't reass */ + if (((*frag)->fr_flags & PFFRAG_DROP) == 0) + DPFPRINTF(("fragcache[%d]: dropping overall fragment\n", + h->ip_id)); + (*frag)->fr_flags |= PFFRAG_DROP; + } + + m_freem(m); + return (NULL); +} + +int +pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason) +{ + struct mbuf *m = *m0; + struct pf_rule *r; + struct pf_frent *frent; + struct pf_fragment *frag = NULL; + struct ip *h = mtod(m, struct ip *); + int mff = (h->ip_off & IP_MF); + int hlen = h->ip_hl << 2; + u_int16_t fragoff = (h->ip_off & IP_OFFMASK) << 3; + u_int16_t max; + int ip_len; + int ip_off; + + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); + while (r != NULL) { + r->evaluations++; + if (r->kif != NULL && + (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + r = r->skip[PF_SKIP_IFP].ptr; + else if (r->direction && r->direction != dir) + r = r->skip[PF_SKIP_DIR].ptr; + else if (r->af && r->af != AF_INET) + r = r->skip[PF_SKIP_AF].ptr; + else if (r->proto && r->proto != h->ip_p) + r = r->skip[PF_SKIP_PROTO].ptr; + else if (PF_MISMATCHAW(&r->src.addr, + (struct pf_addr *)&h->ip_src.s_addr, AF_INET, r->src.not)) + r = r->skip[PF_SKIP_SRC_ADDR].ptr; + else if (PF_MISMATCHAW(&r->dst.addr, + (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, r->dst.not)) + r = r->skip[PF_SKIP_DST_ADDR].ptr; + else + break; + } + + if (r == NULL) + return (PF_PASS); + else + r->packets++; + + /* Check for illegal packets */ + if (hlen < (int)sizeof(struct ip)) + goto drop; + + if (hlen > h->ip_len) + goto drop; + + /* Clear IP_DF if the rule uses the no-df option */ + if (r->rule_flag & PFRULE_NODF) + h->ip_off &= ~IP_DF; + + /* We will need other tests here */ + if (!fragoff && !mff) + goto no_fragment; + + /* We're dealing with a fragment now. Don't allow fragments + * with IP_DF to enter the cache. If the flag was cleared by + * no-df above, fine. Otherwise drop it. + */ + if (h->ip_off & IP_DF) { + DPFPRINTF(("IP_DF\n")); + goto bad; + } + + ip_len = h->ip_len - hlen; + ip_off = (h->ip_off & IP_OFFMASK) << 3; + + /* All fragments are 8 byte aligned */ + if (mff && (ip_len & 0x7)) { + DPFPRINTF(("mff and %d\n", ip_len)); + goto bad; + } + + /* Respect maximum length */ + if (fragoff + ip_len > IP_MAXPACKET) { + DPFPRINTF(("max packet %d\n", fragoff + ip_len)); + goto bad; + } + max = fragoff + ip_len; + + if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) { + /* Fully buffer all of the fragments */ + + frag = pf_find_fragment(h, &pf_frag_tree); + + /* Check if we saw the last fragment already */ + if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && + max > frag->fr_max) + goto bad; + + /* Get an entry for the fragment queue */ + frent = pool_get(&pf_frent_pl, PR_NOWAIT); + if (frent == NULL) { + REASON_SET(reason, PFRES_MEMORY); + return (PF_DROP); + } + pf_nfrents++; + frent->fr_ip = h; + frent->fr_m = m; + + /* Might return a completely reassembled mbuf, or NULL */ + DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max)); + *m0 = m = pf_reassemble(m0, &frag, frent, mff); + + if (m == NULL) + return (PF_DROP); + + if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) + goto drop; + + h = mtod(m, struct ip *); + } else { + /* non-buffering fragment cache (drops or masks overlaps) */ + int nomem = 0; + + if (dir == PF_OUT) { + if (m->m_pkthdr.pf_flags & PF_MBUF_FRAGCACHE) { + /* Already passed the fragment cache in the + * input direction. If we continued, it would + * appear to be a dup and would be dropped. + */ + goto fragment_pass; + } + } + + frag = pf_find_fragment(h, &pf_cache_tree); + + /* Check if we saw the last fragment already */ + if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && + max > frag->fr_max) { + if (r->rule_flag & PFRULE_FRAGDROP) + frag->fr_flags |= PFFRAG_DROP; + goto bad; + } + + *m0 = m = pf_fragcache(m0, h, &frag, mff, + (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem); + if (m == NULL) { + if (nomem) + goto no_mem; + goto drop; + } + + if (dir == PF_IN) + m->m_pkthdr.pf_flags |= PF_MBUF_FRAGCACHE; + + if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) + goto drop; + goto fragment_pass; + } + + no_fragment: + /* At this point, only IP_DF is allowed in ip_off */ + h->ip_off &= IP_DF; + + /* Enforce a minimum ttl, may cause endless packet loops */ + if (r->min_ttl && h->ip_ttl < r->min_ttl) + h->ip_ttl = r->min_ttl; + + if (r->rule_flag & PFRULE_RANDOMID) { +#ifdef RANDOM_IP_ID + h->ip_id = ip_randomid(); +#else + h->ip_id = htons(ip_id++); +#endif + } + + return (PF_PASS); + + fragment_pass: + /* Enforce a minimum ttl, may cause endless packet loops */ + if (r->min_ttl && h->ip_ttl < r->min_ttl) + h->ip_ttl = r->min_ttl; + + return (PF_PASS); + + no_mem: + REASON_SET(reason, PFRES_MEMORY); + if (r != NULL && r->log) + PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL); + return (PF_DROP); + + drop: + REASON_SET(reason, PFRES_NORM); + if (r != NULL && r->log) + PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL); + return (PF_DROP); + + bad: + DPFPRINTF(("dropping bad fragment\n")); + + /* Free associated fragments */ + if (frag != NULL) + pf_free_fragment(frag); + + REASON_SET(reason, PFRES_FRAG); + if (r != NULL && r->log) + PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL); + + return (PF_DROP); +} + +#ifdef INET6 +int +pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, + u_short *reason) +{ + struct mbuf *m = *m0; + struct pf_rule *r; + struct ip6_hdr *h = mtod(m, struct ip6_hdr *); + int off; + struct ip6_ext ext; + struct ip6_opt opt; + struct ip6_opt_jumbo jumbo; + struct ip6_frag frag; + u_int32_t jumbolen = 0, plen; + u_int16_t fragoff = 0; + int optend; + int ooff; + u_int8_t proto; + int terminal; + + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); + while (r != NULL) { + r->evaluations++; + if (r->kif != NULL && + (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + r = r->skip[PF_SKIP_IFP].ptr; + else if (r->direction && r->direction != dir) + r = r->skip[PF_SKIP_DIR].ptr; + else if (r->af && r->af != AF_INET6) + r = r->skip[PF_SKIP_AF].ptr; +#if 0 /* header chain! */ + else if (r->proto && r->proto != h->ip6_nxt) + r = r->skip[PF_SKIP_PROTO].ptr; +#endif + else if (PF_MISMATCHAW(&r->src.addr, + (struct pf_addr *)&h->ip6_src, AF_INET6, r->src.not)) + r = r->skip[PF_SKIP_SRC_ADDR].ptr; + else if (PF_MISMATCHAW(&r->dst.addr, + (struct pf_addr *)&h->ip6_dst, AF_INET6, r->dst.not)) + r = r->skip[PF_SKIP_DST_ADDR].ptr; + else + break; + } + + if (r == NULL) + return (PF_PASS); + else + r->packets++; + + /* Check for illegal packets */ + if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len) + goto drop; + + off = sizeof(struct ip6_hdr); + proto = h->ip6_nxt; + terminal = 0; + do { + switch (proto) { + case IPPROTO_FRAGMENT: + goto fragment; + break; + case IPPROTO_AH: + case IPPROTO_ROUTING: + case IPPROTO_DSTOPTS: + if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL, + NULL, AF_INET6)) + goto shortpkt; + if (proto == IPPROTO_AH) + off += (ext.ip6e_len + 2) * 4; + else + off += (ext.ip6e_len + 1) * 8; + proto = ext.ip6e_nxt; + break; + case IPPROTO_HOPOPTS: + if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL, + NULL, AF_INET6)) + goto shortpkt; + optend = off + (ext.ip6e_len + 1) * 8; + ooff = off + sizeof(ext); + do { + if (!pf_pull_hdr(m, ooff, &opt.ip6o_type, + sizeof(opt.ip6o_type), NULL, NULL, + AF_INET6)) + goto shortpkt; + if (opt.ip6o_type == IP6OPT_PAD1) { + ooff++; + continue; + } + if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt), + NULL, NULL, AF_INET6)) + goto shortpkt; + if (ooff + sizeof(opt) + opt.ip6o_len > optend) + goto drop; + switch (opt.ip6o_type) { + case IP6OPT_JUMBO: + if (h->ip6_plen != 0) + goto drop; + if (!pf_pull_hdr(m, ooff, &jumbo, + sizeof(jumbo), NULL, NULL, + AF_INET6)) + goto shortpkt; + memcpy(&jumbolen, jumbo.ip6oj_jumbo_len, + sizeof(jumbolen)); + jumbolen = ntohl(jumbolen); + if (jumbolen <= IPV6_MAXPACKET) + goto drop; + if (sizeof(struct ip6_hdr) + jumbolen != + m->m_pkthdr.len) + goto drop; + break; + default: + break; + } + ooff += sizeof(opt) + opt.ip6o_len; + } while (ooff < optend); + + off = optend; + proto = ext.ip6e_nxt; + break; + default: + terminal = 1; + break; + } + } while (!terminal); + + /* jumbo payload option must be present, or plen > 0 */ + if (ntohs(h->ip6_plen) == 0) + plen = jumbolen; + else + plen = ntohs(h->ip6_plen); + if (plen == 0) + goto drop; + if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len) + goto shortpkt; + + /* Enforce a minimum ttl, may cause endless packet loops */ + if (r->min_ttl && h->ip6_hlim < r->min_ttl) + h->ip6_hlim = r->min_ttl; + + return (PF_PASS); + + fragment: + if (ntohs(h->ip6_plen) == 0 || jumbolen) + goto drop; + plen = ntohs(h->ip6_plen); + + if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6)) + goto shortpkt; + fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK); + if (fragoff + (plen - off - sizeof(frag)) > IPV6_MAXPACKET) + goto badfrag; + + /* do something about it */ + return (PF_PASS); + + shortpkt: + REASON_SET(reason, PFRES_SHORT); + if (r != NULL && r->log) + PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL); + return (PF_DROP); + + drop: + REASON_SET(reason, PFRES_NORM); + if (r != NULL && r->log) + PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL); + return (PF_DROP); + + badfrag: + REASON_SET(reason, PFRES_FRAG); + if (r != NULL && r->log) + PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL); + return (PF_DROP); +} +#endif + +int +pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, + int off, void *h, struct pf_pdesc *pd) +{ + struct pf_rule *r, *rm = NULL; + struct tcphdr *th = pd->hdr.tcp; + int rewrite = 0; + u_short reason; + u_int8_t flags; + sa_family_t af = pd->af; + + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); + while (r != NULL) { + r->evaluations++; + if (r->kif != NULL && + (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + r = r->skip[PF_SKIP_IFP].ptr; + else if (r->direction && r->direction != dir) + r = r->skip[PF_SKIP_DIR].ptr; + else if (r->af && r->af != af) + r = r->skip[PF_SKIP_AF].ptr; + else if (r->proto && r->proto != pd->proto) + r = r->skip[PF_SKIP_PROTO].ptr; + else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.not)) + r = r->skip[PF_SKIP_SRC_ADDR].ptr; + else if (r->src.port_op && !pf_match_port(r->src.port_op, + r->src.port[0], r->src.port[1], th->th_sport)) + r = r->skip[PF_SKIP_SRC_PORT].ptr; + else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.not)) + r = r->skip[PF_SKIP_DST_ADDR].ptr; + else if (r->dst.port_op && !pf_match_port(r->dst.port_op, + r->dst.port[0], r->dst.port[1], th->th_dport)) + r = r->skip[PF_SKIP_DST_PORT].ptr; + else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match( + pf_osfp_fingerprint(pd, m, off, th), + r->os_fingerprint)) + r = TAILQ_NEXT(r, entries); + else { + rm = r; + break; + } + } + + if (rm == NULL) + return (PF_PASS); + else + r->packets++; + + if (rm->rule_flag & PFRULE_REASSEMBLE_TCP) + pd->flags |= PFDESC_TCP_NORM; + + flags = th->th_flags; + if (flags & TH_SYN) { + /* Illegal packet */ + if (flags & TH_RST) + goto tcp_drop; + + if (flags & TH_FIN) + flags &= ~TH_FIN; + } else { + /* Illegal packet */ + if (!(flags & (TH_ACK|TH_RST))) + goto tcp_drop; + } + + if (!(flags & TH_ACK)) { + /* These flags are only valid if ACK is set */ + if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG)) + goto tcp_drop; + } + + /* Check for illegal header length */ + if (th->th_off < (sizeof(struct tcphdr) >> 2)) + goto tcp_drop; + + /* If flags changed, or reserved data set, then adjust */ + if (flags != th->th_flags || th->th_x2 != 0) { + u_int16_t ov, nv; + + ov = *(u_int16_t *)(&th->th_ack + 1); + th->th_flags = flags; + th->th_x2 = 0; + nv = *(u_int16_t *)(&th->th_ack + 1); + + th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv); + rewrite = 1; + } + + /* Remove urgent pointer, if TH_URG is not set */ + if (!(flags & TH_URG) && th->th_urp) { + th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0); + th->th_urp = 0; + rewrite = 1; + } + + /* Process options */ + if (r->max_mss && pf_normalize_tcpopt(r, m, th, off)) + rewrite = 1; + + /* copy back packet headers if we sanitized */ + if (rewrite) + m_copyback(m, off, sizeof(*th), (caddr_t)th); + + return (PF_PASS); + + tcp_drop: + REASON_SET(&reason, PFRES_NORM); + if (rm != NULL && r->log) + PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL); + return (PF_DROP); +} + +int +pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, + struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst) +{ + u_int8_t hdr[60]; + u_int8_t *opt; + + KASSERT((src->scrub == NULL), + ("pf_normalize_tcp_init: src->scrub != NULL")); + + src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); + if (src->scrub == NULL) + return (1); + bzero(src->scrub, sizeof(*src->scrub)); + + switch (pd->af) { +#ifdef INET + case AF_INET: { + struct ip *h = mtod(m, struct ip *); + src->scrub->pfss_ttl = h->ip_ttl; + break; + } +#endif /* INET */ +#ifdef INET6 + case AF_INET6: { + struct ip6_hdr *h = mtod(m, struct ip6_hdr *); + src->scrub->pfss_ttl = h->ip6_hlim; + break; + } +#endif /* INET6 */ + } + + + /* + * All normalizations below are only begun if we see the start of + * the connections. They must all set an enabled bit in pfss_flags + */ + if ((th->th_flags & TH_SYN) == 0) + return (0); + + + if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub && + pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { + /* Diddle with TCP options */ + int hlen; + opt = hdr + sizeof(struct tcphdr); + hlen = (th->th_off << 2) - sizeof(struct tcphdr); + while (hlen >= TCPOLEN_TIMESTAMP) { + switch (*opt) { + case TCPOPT_EOL: /* FALLTHROUGH */ + case TCPOPT_NOP: + opt++; + hlen--; + break; + case TCPOPT_TIMESTAMP: + if (opt[1] >= TCPOLEN_TIMESTAMP) { + src->scrub->pfss_flags |= + PFSS_TIMESTAMP; + src->scrub->pfss_ts_mod = arc4random(); + } + /* FALLTHROUGH */ + default: + hlen -= MAX(opt[1], 2); + opt += MAX(opt[1], 2); + break; + } + } + } + + return (0); +} + +void +pf_normalize_tcp_cleanup(struct pf_state *state) +{ + if (state->src.scrub) + pool_put(&pf_state_scrub_pl, state->src.scrub); + if (state->dst.scrub) + pool_put(&pf_state_scrub_pl, state->dst.scrub); + + /* Someday... flush the TCP segment reassembly descriptors. */ +} + +int +pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, + u_short *reason, struct tcphdr *th, struct pf_state_peer *src, + struct pf_state_peer *dst, int *writeback) +{ + u_int8_t hdr[60]; + u_int8_t *opt; + int copyback = 0; + + KASSERT((src->scrub || dst->scrub), + ("pf_normalize_tcp_statefull: src->scrub && dst->scrub!")); + + /* + * Enforce the minimum TTL seen for this connection. Negate a common + * technique to evade an intrusion detection system and confuse + * firewall state code. + */ + switch (pd->af) { +#ifdef INET + case AF_INET: { + if (src->scrub) { + struct ip *h = mtod(m, struct ip *); + if (h->ip_ttl > src->scrub->pfss_ttl) + src->scrub->pfss_ttl = h->ip_ttl; + h->ip_ttl = src->scrub->pfss_ttl; + } + break; + } +#endif /* INET */ +#ifdef INET6 + case AF_INET6: { + if (src->scrub) { + struct ip6_hdr *h = mtod(m, struct ip6_hdr *); + if (h->ip6_hlim > src->scrub->pfss_ttl) + src->scrub->pfss_ttl = h->ip6_hlim; + h->ip6_hlim = src->scrub->pfss_ttl; + } + break; + } +#endif /* INET6 */ + } + + if (th->th_off > (sizeof(struct tcphdr) >> 2) && + ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) || + (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) && + pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { + /* Diddle with TCP options */ + int hlen; + opt = hdr + sizeof(struct tcphdr); + hlen = (th->th_off << 2) - sizeof(struct tcphdr); + while (hlen >= TCPOLEN_TIMESTAMP) { + switch (*opt) { + case TCPOPT_EOL: /* FALLTHROUGH */ + case TCPOPT_NOP: + opt++; + hlen--; + break; + case TCPOPT_TIMESTAMP: + /* Modulate the timestamps. Can be used for + * NAT detection, OS uptime determination or + * reboot detection. + */ + if (opt[1] >= TCPOLEN_TIMESTAMP) { + u_int32_t ts_value; + if (src->scrub && + (src->scrub->pfss_flags & + PFSS_TIMESTAMP)) { + memcpy(&ts_value, &opt[2], + sizeof(u_int32_t)); + ts_value = htonl(ntohl(ts_value) + + src->scrub->pfss_ts_mod); + pf_change_a(&opt[2], + &th->th_sum, ts_value, 0); + copyback = 1; + } + + /* Modulate TS reply iff valid (!0) */ + memcpy(&ts_value, &opt[6], + sizeof(u_int32_t)); + if (ts_value && dst->scrub && + (dst->scrub->pfss_flags & + PFSS_TIMESTAMP)) { + ts_value = htonl(ntohl(ts_value) + - dst->scrub->pfss_ts_mod); + pf_change_a(&opt[6], + &th->th_sum, ts_value, 0); + copyback = 1; + } + } + /* FALLTHROUGH */ + default: + hlen -= MAX(opt[1], 2); + opt += MAX(opt[1], 2); + break; + } + } + if (copyback) { + /* Copyback the options, caller copys back header */ + *writeback = 1; + m_copyback(m, off + sizeof(struct tcphdr), + (th->th_off << 2) - sizeof(struct tcphdr), hdr + + sizeof(struct tcphdr)); + } + } + + + /* I have a dream.... TCP segment reassembly.... */ + return (0); +} + +int +pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th, + int off) +{ + u_int16_t *mss; + int thoff; + int opt, cnt, optlen = 0; + int rewrite = 0; + u_char *optp; + + thoff = th->th_off << 2; + cnt = thoff - sizeof(struct tcphdr); + optp = mtod(m, caddr_t) + off + sizeof(struct tcphdr); + + for (; cnt > 0; cnt -= optlen, optp += optlen) { + opt = optp[0]; + if (opt == TCPOPT_EOL) + break; + if (opt == TCPOPT_NOP) + optlen = 1; + else { + if (cnt < 2) + break; + optlen = optp[1]; + if (optlen < 2 || optlen > cnt) + break; + } + switch (opt) { + case TCPOPT_MAXSEG: + mss = (u_int16_t *)(optp + 2); + if ((ntohs(*mss)) > r->max_mss) { + th->th_sum = pf_cksum_fixup(th->th_sum, + *mss, htons(r->max_mss)); + *mss = htons(r->max_mss); + rewrite = 1; + } + break; + default: + break; + } + } + + return (rewrite); +} diff --git a/sys/net/pf/pf_osfp.c b/sys/net/pf/pf_osfp.c new file mode 100644 index 0000000000..79b6aa8e72 --- /dev/null +++ b/sys/net/pf/pf_osfp.c @@ -0,0 +1,553 @@ +/* $FreeBSD: src/sys/contrib/pf/net/pf_osfp.c,v 1.4 2004/06/16 23:24:00 mlaier Exp $ */ +/* $OpenBSD: pf_osfp.c,v 1.9 2004/01/04 20:08:42 pvalchev Exp $ */ +/* $DragonFly: src/sys/net/pf/pf_osfp.c,v 1.1 2004/09/19 22:32:47 joerg Exp $ */ + +/* + * Copyright (c) 2004 The DragonFly Project. All rights reserved. + * + * Copyright (c) 2003 Mike Frantzen + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include +#include +#ifdef _KERNEL +# include +# include +#endif /* _KERNEL */ +#include + +#include +#include +#include +#include + +#include +#include + +#ifdef INET6 +#include +#endif /* INET6 */ + + +#ifdef _KERNEL +# define DPFPRINTF(format, x...) \ + if (pf_status.debug >= PF_DEBUG_NOISY) \ + printf(format , ##x) +typedef vm_zone_t pool_t; + +#else +/* Userland equivalents so we can lend code to tcpdump et al. */ + +# include +# include +# include +# include +# include +# define pool_t int +# define pool_get(pool, flags) malloc(*(pool)) +# define pool_put(pool, item) free(item) +# define pool_init(pool, size, a, ao, f, m, p) (*(pool)) = (size) + +# ifdef PFDEBUG +# include +# define DPFPRINTF(format, x...) fprintf(stderr, format , ##x) +# else +# define DPFPRINTF(format, x...) ((void)0) +# endif /* PFDEBUG */ +#endif /* _KERNEL */ + + +SLIST_HEAD(pf_osfp_list, pf_os_fingerprint) pf_osfp_list; +pool_t pf_osfp_entry_pl; +pool_t pf_osfp_pl; + +struct pf_os_fingerprint *pf_osfp_find(struct pf_osfp_list *, + struct pf_os_fingerprint *, u_int8_t); +struct pf_os_fingerprint *pf_osfp_find_exact(struct pf_osfp_list *, + struct pf_os_fingerprint *); +void pf_osfp_insert(struct pf_osfp_list *, + struct pf_os_fingerprint *); + + +#ifdef _KERNEL +/* + * Passively fingerprint the OS of the host (IPv4 TCP SYN packets only) + * Returns the list of possible OSes. + */ +struct pf_osfp_enlist * +pf_osfp_fingerprint(struct pf_pdesc *pd, struct mbuf *m, int off, + const struct tcphdr *tcp) +{ + struct ip *ip; + char hdr[60]; + + /* XXX don't have a fingerprint database for IPv6 :-( */ + if (pd->af != PF_INET || pd->proto != IPPROTO_TCP || (tcp->th_off << 2) + < sizeof(*tcp)) + return (NULL); + + ip = mtod(m, struct ip *); + if (!pf_pull_hdr(m, off, hdr, tcp->th_off << 2, NULL, NULL, pd->af)) + return (NULL); + + return (pf_osfp_fingerprint_hdr(ip, (struct tcphdr *)hdr)); +} +#endif /* _KERNEL */ + +struct pf_osfp_enlist * +pf_osfp_fingerprint_hdr(const struct ip *ip, const struct tcphdr *tcp) +{ + struct pf_os_fingerprint fp, *fpresult; + int cnt, optlen = 0; + const u_int8_t *optp; + + if ((tcp->th_flags & (TH_SYN|TH_ACK)) != TH_SYN || (ip->ip_off & + IP_OFFMASK)) + return (NULL); + + memset(&fp, 0, sizeof(fp)); + + fp.fp_psize = ip->ip_len; + fp.fp_ttl = ip->ip_ttl; + if (ip->ip_off & IP_DF) + fp.fp_flags |= PF_OSFP_DF; + fp.fp_wsize = ntohs(tcp->th_win); + + + cnt = (tcp->th_off << 2) - sizeof(*tcp); + optp = (const u_int8_t *)((const char *)tcp + sizeof(*tcp)); + for (; cnt > 0; cnt -= optlen, optp += optlen) { + if (*optp == TCPOPT_EOL) + break; + + fp.fp_optcnt++; + if (*optp == TCPOPT_NOP) { + fp.fp_tcpopts = (fp.fp_tcpopts << PF_OSFP_TCPOPT_BITS) | + PF_OSFP_TCPOPT_NOP; + optlen = 1; + } else { + if (cnt < 2) + return (NULL); + optlen = optp[1]; + if (optlen > cnt || optlen < 2) + return (NULL); + switch (*optp) { + case TCPOPT_MAXSEG: + if (optlen >= TCPOLEN_MAXSEG) + memcpy(&fp.fp_mss, &optp[2], + sizeof(fp.fp_mss)); + fp.fp_tcpopts = (fp.fp_tcpopts << + PF_OSFP_TCPOPT_BITS) | PF_OSFP_TCPOPT_MSS; + fp.fp_mss = ntohs(fp.fp_mss); + break; + case TCPOPT_WINDOW: + if (optlen >= TCPOLEN_WINDOW) + memcpy(&fp.fp_wscale, &optp[2], + sizeof(fp.fp_wscale)); + fp.fp_wscale = ntohs(fp.fp_wscale); + fp.fp_tcpopts = (fp.fp_tcpopts << + PF_OSFP_TCPOPT_BITS) | + PF_OSFP_TCPOPT_WSCALE; + break; + case TCPOPT_SACK_PERMITTED: + fp.fp_tcpopts = (fp.fp_tcpopts << + PF_OSFP_TCPOPT_BITS) | PF_OSFP_TCPOPT_SACK; + break; + case TCPOPT_TIMESTAMP: + if (optlen >= TCPOLEN_TIMESTAMP) { + u_int32_t ts; + memcpy(&ts, &optp[2], sizeof(ts)); + if (ts == 0) + fp.fp_flags |= PF_OSFP_TS0; + + } + fp.fp_tcpopts = (fp.fp_tcpopts << + PF_OSFP_TCPOPT_BITS) | PF_OSFP_TCPOPT_TS; + break; + default: + return (NULL); + } + } + optlen = MAX(optlen, 1); /* paranoia */ + } + + DPFPRINTF("fingerprinted %s:%d %d:%d:%d:%d:%llx (%d) " + "(TS=%s,M=%s%d,W=%s%d)\n", + inet_ntoa(ip->ip_src), ntohs(tcp->th_sport), + fp.fp_wsize, fp.fp_ttl, (fp.fp_flags & PF_OSFP_DF) != 0, + fp.fp_psize, (long long int)fp.fp_tcpopts, fp.fp_optcnt, + (fp.fp_flags & PF_OSFP_TS0) ? "0" : "", + (fp.fp_flags & PF_OSFP_MSS_MOD) ? "%" : + (fp.fp_flags & PF_OSFP_MSS_DC) ? "*" : "", + fp.fp_mss, + (fp.fp_flags & PF_OSFP_WSCALE_MOD) ? "%" : + (fp.fp_flags & PF_OSFP_WSCALE_DC) ? "*" : "", + fp.fp_wscale); + + if ((fpresult = pf_osfp_find(&pf_osfp_list, &fp, + PF_OSFP_MAXTTL_OFFSET))) + return (&fpresult->fp_oses); + return (NULL); +} + +/* Match a fingerprint ID against a list of OSes */ +int +pf_osfp_match(struct pf_osfp_enlist *list, pf_osfp_t os) +{ + struct pf_osfp_entry *entry; + int os_class, os_version, os_subtype; + int en_class, en_version, en_subtype; + + if (os == PF_OSFP_ANY) + return (1); + if (list == NULL) { + DPFPRINTF("osfp no match against %x\n", os); + return (os == PF_OSFP_UNKNOWN); + } + PF_OSFP_UNPACK(os, os_class, os_version, os_subtype); + SLIST_FOREACH(entry, list, fp_entry) { + PF_OSFP_UNPACK(entry->fp_os, en_class, en_version, en_subtype); + if ((os_class == PF_OSFP_ANY || en_class == os_class) && + (os_version == PF_OSFP_ANY || en_version == os_version) && + (os_subtype == PF_OSFP_ANY || en_subtype == os_subtype)) { + DPFPRINTF("osfp matched %s %s %s %x==%x\n", + entry->fp_class_nm, entry->fp_version_nm, + entry->fp_subtype_nm, os, entry->fp_os); + return (1); + } + } + DPFPRINTF("fingerprint 0x%x didn't match\n", os); + return (0); +} + +/* Initialize the OS fingerprint system */ +int +pf_osfp_initialize(void) +{ + int error = 0; + +#ifdef _KERNEL + do { + error = ENOMEM; + pf_osfp_entry_pl = pf_osfp_pl = NULL; + ZONE_CREATE(pf_osfp_entry_pl, struct pf_osfp_entry, "pfospfen"); + ZONE_CREATE(pf_osfp_pl, struct pf_os_fingerprint, "pfosfp"); + error = 0; + } while(0); +#else + pool_init(&pf_osfp_entry_pl, sizeof(struct pf_osfp_entry), 0, 0, 0, + "pfosfpen", NULL); + pool_init(&pf_osfp_pl, sizeof(struct pf_os_fingerprint), 0, 0, 0, + "pfosfp", NULL); +#endif + SLIST_INIT(&pf_osfp_list); + + return (error); +} + +#ifdef _KERNEL +void +pf_osfp_cleanup(void) +{ + ZONE_DESTROY(pf_osfp_entry_pl); + ZONE_DESTROY(pf_osfp_pl); +} +#endif + +/* Flush the fingerprint list */ +void +pf_osfp_flush(void) +{ + struct pf_os_fingerprint *fp; + struct pf_osfp_entry *entry; + + while ((fp = SLIST_FIRST(&pf_osfp_list))) { + SLIST_REMOVE_HEAD(&pf_osfp_list, fp_next); + while ((entry = SLIST_FIRST(&fp->fp_oses))) { + SLIST_REMOVE_HEAD(&fp->fp_oses, fp_entry); + pool_put(&pf_osfp_entry_pl, entry); + } + pool_put(&pf_osfp_pl, fp); + } +} + + +/* Add a fingerprint */ +int +pf_osfp_add(struct pf_osfp_ioctl *fpioc) +{ + struct pf_os_fingerprint *fp, fpadd; + struct pf_osfp_entry *entry; + + memset(&fpadd, 0, sizeof(fpadd)); + fpadd.fp_tcpopts = fpioc->fp_tcpopts; + fpadd.fp_wsize = fpioc->fp_wsize; + fpadd.fp_psize = fpioc->fp_psize; + fpadd.fp_mss = fpioc->fp_mss; + fpadd.fp_flags = fpioc->fp_flags; + fpadd.fp_optcnt = fpioc->fp_optcnt; + fpadd.fp_wscale = fpioc->fp_wscale; + fpadd.fp_ttl = fpioc->fp_ttl; + + DPFPRINTF("adding osfp %s %s %s = %s%d:%d:%d:%s%d:0x%llx %d " + "(TS=%s,M=%s%d,W=%s%d) %x\n", + fpioc->fp_os.fp_class_nm, fpioc->fp_os.fp_version_nm, + fpioc->fp_os.fp_subtype_nm, + (fpadd.fp_flags & PF_OSFP_WSIZE_MOD) ? "%" : + (fpadd.fp_flags & PF_OSFP_WSIZE_MSS) ? "S" : + (fpadd.fp_flags & PF_OSFP_WSIZE_MTU) ? "T" : + (fpadd.fp_flags & PF_OSFP_WSIZE_DC) ? "*" : "", + fpadd.fp_wsize, + fpadd.fp_ttl, + (fpadd.fp_flags & PF_OSFP_DF) ? 1 : 0, + (fpadd.fp_flags & PF_OSFP_PSIZE_MOD) ? "%" : + (fpadd.fp_flags & PF_OSFP_PSIZE_DC) ? "*" : "", + fpadd.fp_psize, + (long long int)fpadd.fp_tcpopts, fpadd.fp_optcnt, + (fpadd.fp_flags & PF_OSFP_TS0) ? "0" : "", + (fpadd.fp_flags & PF_OSFP_MSS_MOD) ? "%" : + (fpadd.fp_flags & PF_OSFP_MSS_DC) ? "*" : "", + fpadd.fp_mss, + (fpadd.fp_flags & PF_OSFP_WSCALE_MOD) ? "%" : + (fpadd.fp_flags & PF_OSFP_WSCALE_DC) ? "*" : "", + fpadd.fp_wscale, + fpioc->fp_os.fp_os); + + + if ((fp = pf_osfp_find_exact(&pf_osfp_list, &fpadd))) { + SLIST_FOREACH(entry, &fp->fp_oses, fp_entry) { + if (PF_OSFP_ENTRY_EQ(entry, &fpioc->fp_os)) + return (EEXIST); + } + if ((entry = pool_get(&pf_osfp_entry_pl, PR_NOWAIT)) == NULL) + return (ENOMEM); + } else { + if ((fp = pool_get(&pf_osfp_pl, PR_NOWAIT)) == NULL) + return (ENOMEM); + memset(fp, 0, sizeof(*fp)); + fp->fp_tcpopts = fpioc->fp_tcpopts; + fp->fp_wsize = fpioc->fp_wsize; + fp->fp_psize = fpioc->fp_psize; + fp->fp_mss = fpioc->fp_mss; + fp->fp_flags = fpioc->fp_flags; + fp->fp_optcnt = fpioc->fp_optcnt; + fp->fp_wscale = fpioc->fp_wscale; + fp->fp_ttl = fpioc->fp_ttl; + SLIST_INIT(&fp->fp_oses); + if ((entry = pool_get(&pf_osfp_entry_pl, PR_NOWAIT)) == NULL) { + pool_put(&pf_osfp_pl, fp); + return (ENOMEM); + } + pf_osfp_insert(&pf_osfp_list, fp); + } + memcpy(entry, &fpioc->fp_os, sizeof(*entry)); + + /* Make sure the strings are NUL terminated */ + entry->fp_class_nm[sizeof(entry->fp_class_nm)-1] = '\0'; + entry->fp_version_nm[sizeof(entry->fp_version_nm)-1] = '\0'; + entry->fp_subtype_nm[sizeof(entry->fp_subtype_nm)-1] = '\0'; + + SLIST_INSERT_HEAD(&fp->fp_oses, entry, fp_entry); + +#ifdef PFDEBUG + if ((fp = pf_osfp_validate())) + printf("Invalid fingerprint list\n"); +#endif /* PFDEBUG */ + return (0); +} + + +/* Find a fingerprint in the list */ +struct pf_os_fingerprint * +pf_osfp_find(struct pf_osfp_list *list, struct pf_os_fingerprint *find, + u_int8_t ttldiff) +{ + struct pf_os_fingerprint *f; + +#define MATCH_INT(_MOD, _DC, _field) \ + if ((f->fp_flags & _DC) == 0) { \ + if ((f->fp_flags & _MOD) == 0) { \ + if (f->_field != find->_field) \ + continue; \ + } else { \ + if (f->_field == 0 || find->_field % f->_field) \ + continue; \ + } \ + } + + SLIST_FOREACH(f, list, fp_next) { + if (f->fp_tcpopts != find->fp_tcpopts || + f->fp_optcnt != find->fp_optcnt || + f->fp_ttl < find->fp_ttl || + f->fp_ttl - find->fp_ttl > ttldiff || + (f->fp_flags & (PF_OSFP_DF|PF_OSFP_TS0)) != + (find->fp_flags & (PF_OSFP_DF|PF_OSFP_TS0))) + continue; + + MATCH_INT(PF_OSFP_PSIZE_MOD, PF_OSFP_PSIZE_DC, fp_psize) + MATCH_INT(PF_OSFP_MSS_MOD, PF_OSFP_MSS_DC, fp_mss) + MATCH_INT(PF_OSFP_WSCALE_MOD, PF_OSFP_WSCALE_DC, fp_wscale) + if ((f->fp_flags & PF_OSFP_WSIZE_DC) == 0) { + if (f->fp_flags & PF_OSFP_WSIZE_MSS) { + if (find->fp_mss == 0) + continue; + +/* Some "smart" NAT devices and DSL routers will tweak the MSS size and + * will set it to whatever is suitable for the link type. + */ +#define SMART_MSS 1460 + if ((find->fp_wsize % find->fp_mss || + find->fp_wsize / find->fp_mss != + f->fp_wsize) && + (find->fp_wsize % SMART_MSS || + find->fp_wsize / SMART_MSS != + f->fp_wsize)) + continue; + } else if (f->fp_flags & PF_OSFP_WSIZE_MTU) { + if (find->fp_mss == 0) + continue; + +#define MTUOFF (sizeof(struct ip) + sizeof(struct tcphdr)) +#define SMART_MTU (SMART_MSS + MTUOFF) + if ((find->fp_wsize % (find->fp_mss + MTUOFF) || + find->fp_wsize / (find->fp_mss + MTUOFF) != + f->fp_wsize) && + (find->fp_wsize % SMART_MTU || + find->fp_wsize / SMART_MTU != + f->fp_wsize)) + continue; + } else if (f->fp_flags & PF_OSFP_WSIZE_MOD) { + if (f->fp_wsize == 0 || find->fp_wsize % + f->fp_wsize) + continue; + } else { + if (f->fp_wsize != find->fp_wsize) + continue; + } + } + return (f); + } + + return (NULL); +} + +/* Find an exact fingerprint in the list */ +struct pf_os_fingerprint * +pf_osfp_find_exact(struct pf_osfp_list *list, struct pf_os_fingerprint *find) +{ + struct pf_os_fingerprint *f; + + SLIST_FOREACH(f, list, fp_next) { + if (f->fp_tcpopts == find->fp_tcpopts && + f->fp_wsize == find->fp_wsize && + f->fp_psize == find->fp_psize && + f->fp_mss == find->fp_mss && + f->fp_flags == find->fp_flags && + f->fp_optcnt == find->fp_optcnt && + f->fp_wscale == find->fp_wscale && + f->fp_ttl == find->fp_ttl) + return (f); + } + + return (NULL); +} + +/* Insert a fingerprint into the list */ +void +pf_osfp_insert(struct pf_osfp_list *list, struct pf_os_fingerprint *ins) +{ + struct pf_os_fingerprint *f, *prev = NULL; + + /* XXX need to go semi tree based. can key on tcp options */ + + SLIST_FOREACH(f, list, fp_next) + prev = f; + if (prev) + SLIST_INSERT_AFTER(prev, ins, fp_next); + else + SLIST_INSERT_HEAD(list, ins, fp_next); +} + +/* Fill a fingerprint by its number (from an ioctl) */ +int +pf_osfp_get(struct pf_osfp_ioctl *fpioc) +{ + struct pf_os_fingerprint *fp; + struct pf_osfp_entry *entry; + int num = fpioc->fp_getnum; + int i = 0; + + + memset(fpioc, 0, sizeof(*fpioc)); + SLIST_FOREACH(fp, &pf_osfp_list, fp_next) { + SLIST_FOREACH(entry, &fp->fp_oses, fp_entry) { + if (i++ == num) { + fpioc->fp_mss = fp->fp_mss; + fpioc->fp_wsize = fp->fp_wsize; + fpioc->fp_flags = fp->fp_flags; + fpioc->fp_psize = fp->fp_psize; + fpioc->fp_ttl = fp->fp_ttl; + fpioc->fp_wscale = fp->fp_wscale; + fpioc->fp_getnum = num; + memcpy(&fpioc->fp_os, entry, + sizeof(fpioc->fp_os)); + return (0); + } + } + } + + return (EBUSY); +} + + +/* Validate that each signature is reachable */ +struct pf_os_fingerprint * +pf_osfp_validate(void) +{ + struct pf_os_fingerprint *f, *f2, find; + + SLIST_FOREACH(f, &pf_osfp_list, fp_next) { + memcpy(&find, f, sizeof(find)); + + /* We do a few MSS/th_win percolations to make things unique */ + if (find.fp_mss == 0) + find.fp_mss = 128; + if (f->fp_flags & PF_OSFP_WSIZE_MSS) + find.fp_wsize *= find.fp_mss, 1; + else if (f->fp_flags & PF_OSFP_WSIZE_MTU) + find.fp_wsize *= (find.fp_mss + 40); + else if (f->fp_flags & PF_OSFP_WSIZE_MOD) + find.fp_wsize *= 2; + if (f != (f2 = pf_osfp_find(&pf_osfp_list, &find, 0))) { + if (f2) + printf("Found \"%s %s %s\" instead of " + "\"%s %s %s\"\n", + SLIST_FIRST(&f2->fp_oses)->fp_class_nm, + SLIST_FIRST(&f2->fp_oses)->fp_version_nm, + SLIST_FIRST(&f2->fp_oses)->fp_subtype_nm, + SLIST_FIRST(&f->fp_oses)->fp_class_nm, + SLIST_FIRST(&f->fp_oses)->fp_version_nm, + SLIST_FIRST(&f->fp_oses)->fp_subtype_nm); + else + printf("Couldn't find \"%s %s %s\"\n", + SLIST_FIRST(&f->fp_oses)->fp_class_nm, + SLIST_FIRST(&f->fp_oses)->fp_version_nm, + SLIST_FIRST(&f->fp_oses)->fp_subtype_nm); + return (f); + } + } + return (NULL); +} diff --git a/sys/net/pf/pf_subr.c b/sys/net/pf/pf_subr.c new file mode 100644 index 0000000000..344c9a03fc --- /dev/null +++ b/sys/net/pf/pf_subr.c @@ -0,0 +1,131 @@ +/* $FreeBSD: src/sys/contrib/pf/net/pf_subr.c,v 1.1 2004/06/16 23:24:00 mlaier Exp $ */ +/* from $OpenBSD: kern_subr.c,v 1.26 2003/10/31 11:10:41 markus Exp $ */ +/* $NetBSD: kern_subr.c,v 1.15 1996/04/09 17:21:56 ragge Exp $ */ +/* $DragonFly: src/sys/net/pf/pf_subr.c,v 1.1 2004/09/19 22:32:47 joerg Exp $ */ + +/* + * Copyright (c) 2004 The DragonFly Project. All rights reserved. + * + * Copyright (c) 1982, 1986, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include + +/* + * This implements additional functions used by pf which can not be ported + * easyly. At this point it boils down to mostly the Net/OpenBSD hook + * implementation. + * + * BEWARE: this is not locked! Required locking is done by the caller. + */ + +void * +hook_establish(struct hook_desc_head *head, int tail, void (*fn)(void *), + void *arg) +{ + struct hook_desc *hdp; + + hdp = (struct hook_desc *)malloc(sizeof (*hdp), M_DEVBUF, M_NOWAIT); + if (hdp == NULL) + return (NULL); + + hdp->hd_fn = fn; + hdp->hd_arg = arg; + if (tail) + TAILQ_INSERT_TAIL(head, hdp, hd_list); + else + TAILQ_INSERT_HEAD(head, hdp, hd_list); + + return (hdp); +} + +void +hook_disestablish(struct hook_desc_head *head, void *vhook) +{ + struct hook_desc *hdp; + +#ifdef DIAGNOSTIC + for (hdp = TAILQ_FIRST(head); hdp != NULL; + hdp = TAILQ_NEXT(hdp, hd_list)) + if (hdp == vhook) + break; + if (hdp == NULL) + panic("hook_disestablish: hook not established"); +#endif + hdp = vhook; + TAILQ_REMOVE(head, hdp, hd_list); + free(hdp, M_DEVBUF); +} + +/* + * Run hooks. Startup hooks are invoked right after scheduler_start but + * before root is mounted. Shutdown hooks are invoked immediately before the + * system is halted or rebooted, i.e. after file systems unmounted, + * after crash dump done, etc. + */ +void +dohooks(struct hook_desc_head *head, int flags) +{ + struct hook_desc *hdp; + + if ((flags & HOOK_REMOVE) == 0) { + TAILQ_FOREACH(hdp, head, hd_list) { + (*hdp->hd_fn)(hdp->hd_arg); + } + } else { + while ((hdp = TAILQ_FIRST(head)) != NULL) { + TAILQ_REMOVE(head, hdp, hd_list); + (*hdp->hd_fn)(hdp->hd_arg); + if ((flags & HOOK_FREE) != 0) + free(hdp, M_DEVBUF); + } + } +} diff --git a/sys/net/pf/pf_table.c b/sys/net/pf/pf_table.c new file mode 100644 index 0000000000..468e6e5808 --- /dev/null +++ b/sys/net/pf/pf_table.c @@ -0,0 +1,2115 @@ +/* $FreeBSD: src/sys/contrib/pf/net/pf_table.c,v 1.5 2004/07/28 06:14:44 kan Exp $ */ +/* $OpenBSD: pf_table.c,v 1.47 2004/03/09 21:44:41 mcbride Exp $ */ +/* $DragonFly: src/sys/net/pf/pf_table.c,v 1.1 2004/09/19 22:32:47 joerg Exp $ */ + +/* + * Copyright (c) 2004 The DragonFly Project. All rights reserved. + * + * Copyright (c) 2002 Cedric Berger + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "opt_inet.h" +#include "opt_inet6.h" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#define ACCEPT_FLAGS(oklist) \ + do { \ + if ((flags & ~(oklist)) & \ + PFR_FLAG_ALLMASK) \ + return (EINVAL); \ + } while (0) + +#define COPYIN(from, to, size) \ + ((flags & PFR_FLAG_USERIOCTL) ? \ + copyin((from), (to), (size)) : \ + (bcopy((from), (to), (size)), 0)) + +#define COPYOUT(from, to, size) \ + ((flags & PFR_FLAG_USERIOCTL) ? \ + copyout((from), (to), (size)) : \ + (bcopy((from), (to), (size)), 0)) + +#define FILLIN_SIN(sin, addr) \ + do { \ + (sin).sin_len = sizeof(sin); \ + (sin).sin_family = AF_INET; \ + (sin).sin_addr = (addr); \ + } while (0) + +#define FILLIN_SIN6(sin6, addr) \ + do { \ + (sin6).sin6_len = sizeof(sin6); \ + (sin6).sin6_family = AF_INET6; \ + (sin6).sin6_addr = (addr); \ + } while (0) + +#define SWAP(type, a1, a2) \ + do { \ + type tmp = a1; \ + a1 = a2; \ + a2 = tmp; \ + } while (0) + +#define SUNION2PF(su, af) (((af)==AF_INET) ? \ + (struct pf_addr *)&(su)->sin.sin_addr : \ + (struct pf_addr *)&(su)->sin6.sin6_addr) + +#define AF_BITS(af) (((af)==AF_INET)?32:128) +#define ADDR_NETWORK(ad) ((ad)->pfra_net < AF_BITS((ad)->pfra_af)) +#define KENTRY_NETWORK(ke) ((ke)->pfrke_net < AF_BITS((ke)->pfrke_af)) +#define KENTRY_RNF_ROOT(ke) \ + ((((struct radix_node *)(ke))->rn_flags & RNF_ROOT) != 0) + +#define NO_ADDRESSES (-1) +#define ENQUEUE_UNMARKED_ONLY (1) +#define INVERT_NEG_FLAG (1) + +struct pfr_walktree { + enum pfrw_op { + PFRW_MARK, + PFRW_SWEEP, + PFRW_ENQUEUE, + PFRW_GET_ADDRS, + PFRW_GET_ASTATS, + PFRW_POOL_GET, + PFRW_DYNADDR_UPDATE + } pfrw_op; + union { + struct pfr_addr *pfrw1_addr; + struct pfr_astats *pfrw1_astats; + struct pfr_kentryworkq *pfrw1_workq; + struct pfr_kentry *pfrw1_kentry; + struct pfi_dynaddr *pfrw1_dyn; + } pfrw_1; + int pfrw_free; + int pfrw_flags; +}; +#define pfrw_addr pfrw_1.pfrw1_addr +#define pfrw_astats pfrw_1.pfrw1_astats +#define pfrw_workq pfrw_1.pfrw1_workq +#define pfrw_kentry pfrw_1.pfrw1_kentry +#define pfrw_dyn pfrw_1.pfrw1_dyn +#define pfrw_cnt pfrw_free + +#define senderr(e) do { rv = (e); goto _bad; } while (0) + +vm_zone_t pfr_ktable_pl; +vm_zone_t pfr_kentry_pl; +struct sockaddr_in pfr_sin; +struct sockaddr_in6 pfr_sin6; +union sockaddr_union pfr_mask; +struct pf_addr pfr_ffaddr; + +void pfr_copyout_addr(struct pfr_addr *, + struct pfr_kentry *ke); +int pfr_validate_addr(struct pfr_addr *); +void pfr_enqueue_addrs(struct pfr_ktable *, + struct pfr_kentryworkq *, int *, int); +void pfr_mark_addrs(struct pfr_ktable *); +struct pfr_kentry *pfr_lookup_addr(struct pfr_ktable *, + struct pfr_addr *, int); +struct pfr_kentry *pfr_create_kentry(struct pfr_addr *); +void pfr_destroy_kentries(struct pfr_kentryworkq *); +void pfr_destroy_kentry(struct pfr_kentry *); +void pfr_insert_kentries(struct pfr_ktable *, + struct pfr_kentryworkq *, long); +void pfr_remove_kentries(struct pfr_ktable *, + struct pfr_kentryworkq *); +void pfr_clstats_kentries(struct pfr_kentryworkq *, long, + int); +void pfr_reset_feedback(struct pfr_addr *, int, int); +void pfr_prepare_network(union sockaddr_union *, int, int); +int pfr_route_kentry(struct pfr_ktable *, + struct pfr_kentry *); +int pfr_unroute_kentry(struct pfr_ktable *, + struct pfr_kentry *); +int pfr_walktree(struct radix_node *, void *); +int pfr_validate_table(struct pfr_table *, int, int); +void pfr_commit_ktable(struct pfr_ktable *, long); +void pfr_insert_ktables(struct pfr_ktableworkq *); +void pfr_insert_ktable(struct pfr_ktable *); +void pfr_setflags_ktables(struct pfr_ktableworkq *); +void pfr_setflags_ktable(struct pfr_ktable *, int); +void pfr_clstats_ktables(struct pfr_ktableworkq *, long, + int); +void pfr_clstats_ktable(struct pfr_ktable *, long, int); +struct pfr_ktable *pfr_create_ktable(struct pfr_table *, long, int); +void pfr_destroy_ktables(struct pfr_ktableworkq *, int); +void pfr_destroy_ktable(struct pfr_ktable *, int); +int pfr_ktable_compare(struct pfr_ktable *, + struct pfr_ktable *); +struct pfr_ktable *pfr_lookup_table(struct pfr_table *); +void pfr_clean_node_mask(struct pfr_ktable *, + struct pfr_kentryworkq *); +int pfr_table_count(struct pfr_table *, int); +int pfr_skip_table(struct pfr_table *, + struct pfr_ktable *, int); +struct pfr_kentry *pfr_kentry_byidx(struct pfr_ktable *, int, int); + +RB_PROTOTYPE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare); +RB_GENERATE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare); + +struct pfr_ktablehead pfr_ktables; +struct pfr_table pfr_nulltable; +int pfr_ktable_cnt; + +void +pfr_initialize(void) +{ + pfr_sin.sin_len = sizeof(pfr_sin); + pfr_sin.sin_family = AF_INET; + pfr_sin6.sin6_len = sizeof(pfr_sin6); + pfr_sin6.sin6_family = AF_INET6; + + memset(&pfr_ffaddr, 0xff, sizeof(pfr_ffaddr)); +} + +int +pfr_clr_addrs(struct pfr_table *tbl, int *ndel, int flags) +{ + struct pfr_ktable *kt; + struct pfr_kentryworkq workq; + int s = 0; + + ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY); + if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL)) + return (EINVAL); + kt = pfr_lookup_table(tbl); + if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (ESRCH); + if (kt->pfrkt_flags & PFR_TFLAG_CONST) + return (EPERM); + pfr_enqueue_addrs(kt, &workq, ndel, 0); + + if (!(flags & PFR_FLAG_DUMMY)) { + if (flags & PFR_FLAG_ATOMIC) + s = splsoftnet(); + pfr_remove_kentries(kt, &workq); + if (flags & PFR_FLAG_ATOMIC) + splx(s); + if (kt->pfrkt_cnt) { + printf("pfr_clr_addrs: corruption detected (%d).\n", + kt->pfrkt_cnt); + kt->pfrkt_cnt = 0; + } + } + return (0); +} + +int +pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, + int *nadd, int flags) +{ + struct pfr_ktable *kt, *tmpkt; + struct pfr_kentryworkq workq; + struct pfr_kentry *p, *q; + struct pfr_addr ad; + int i, rv, s = 0, xadd = 0; + long tzero = time_second; + + ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK); + if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL)) + return (EINVAL); + kt = pfr_lookup_table(tbl); + if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (ESRCH); + if (kt->pfrkt_flags & PFR_TFLAG_CONST) + return (EPERM); + tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0); + if (tmpkt == NULL) + return (ENOMEM); + SLIST_INIT(&workq); + for (i = 0; i < size; i++) { + if (COPYIN(addr+i, &ad, sizeof(ad))) + senderr(EFAULT); + if (pfr_validate_addr(&ad)) + senderr(EINVAL); + p = pfr_lookup_addr(kt, &ad, 1); + q = pfr_lookup_addr(tmpkt, &ad, 1); + if (flags & PFR_FLAG_FEEDBACK) { + if (q != NULL) + ad.pfra_fback = PFR_FB_DUPLICATE; + else if (p == NULL) + ad.pfra_fback = PFR_FB_ADDED; + else if (p->pfrke_not != ad.pfra_not) + ad.pfra_fback = PFR_FB_CONFLICT; + else + ad.pfra_fback = PFR_FB_NONE; + } + if (p == NULL && q == NULL) { + p = pfr_create_kentry(&ad); + if (p == NULL) + senderr(ENOMEM); + if (pfr_route_kentry(tmpkt, p)) { + pfr_destroy_kentry(p); + ad.pfra_fback = PFR_FB_NONE; + } else { + SLIST_INSERT_HEAD(&workq, p, pfrke_workq); + xadd++; + } + } + if (flags & PFR_FLAG_FEEDBACK) + if (COPYOUT(&ad, addr+i, sizeof(ad))) + senderr(EFAULT); + } + pfr_clean_node_mask(tmpkt, &workq); + if (!(flags & PFR_FLAG_DUMMY)) { + if (flags & PFR_FLAG_ATOMIC) + s = splsoftnet(); + pfr_insert_kentries(kt, &workq, tzero); + if (flags & PFR_FLAG_ATOMIC) + splx(s); + } else + pfr_destroy_kentries(&workq); + if (nadd != NULL) + *nadd = xadd; + pfr_destroy_ktable(tmpkt, 0); + return (0); +_bad: + pfr_clean_node_mask(tmpkt, &workq); + pfr_destroy_kentries(&workq); + if (flags & PFR_FLAG_FEEDBACK) + pfr_reset_feedback(addr, size, flags); + pfr_destroy_ktable(tmpkt, 0); + return (rv); +} + +int +pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, + int *ndel, int flags) +{ + struct pfr_ktable *kt; + struct pfr_kentryworkq workq; + struct pfr_kentry *p; + struct pfr_addr ad; + int i, rv, s = 0, xdel = 0; + + ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK); + if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL)) + return (EINVAL); + kt = pfr_lookup_table(tbl); + if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (ESRCH); + if (kt->pfrkt_flags & PFR_TFLAG_CONST) + return (EPERM); + pfr_mark_addrs(kt); + SLIST_INIT(&workq); + for (i = 0; i < size; i++) { + if (COPYIN(addr+i, &ad, sizeof(ad))) + senderr(EFAULT); + if (pfr_validate_addr(&ad)) + senderr(EINVAL); + p = pfr_lookup_addr(kt, &ad, 1); + if (flags & PFR_FLAG_FEEDBACK) { + if (p == NULL) + ad.pfra_fback = PFR_FB_NONE; + else if (p->pfrke_not != ad.pfra_not) + ad.pfra_fback = PFR_FB_CONFLICT; + else if (p->pfrke_mark) + ad.pfra_fback = PFR_FB_DUPLICATE; + else + ad.pfra_fback = PFR_FB_DELETED; + } + if (p != NULL && p->pfrke_not == ad.pfra_not && + !p->pfrke_mark) { + p->pfrke_mark = 1; + SLIST_INSERT_HEAD(&workq, p, pfrke_workq); + xdel++; + } + if (flags & PFR_FLAG_FEEDBACK) + if (COPYOUT(&ad, addr+i, sizeof(ad))) + senderr(EFAULT); + } + if (!(flags & PFR_FLAG_DUMMY)) { + if (flags & PFR_FLAG_ATOMIC) + s = splsoftnet(); + pfr_remove_kentries(kt, &workq); + if (flags & PFR_FLAG_ATOMIC) + splx(s); + } + if (ndel != NULL) + *ndel = xdel; + return (0); +_bad: + if (flags & PFR_FLAG_FEEDBACK) + pfr_reset_feedback(addr, size, flags); + return (rv); +} + +int +pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, + int *size2, int *nadd, int *ndel, int *nchange, int flags) +{ + struct pfr_ktable *kt, *tmpkt; + struct pfr_kentryworkq addq, delq, changeq; + struct pfr_kentry *p, *q; + struct pfr_addr ad; + int i, rv, s = 0, xadd = 0, xdel = 0, xchange = 0; + long tzero = time_second; + + ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK); + if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL)) + return (EINVAL); + kt = pfr_lookup_table(tbl); + if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (ESRCH); + if (kt->pfrkt_flags & PFR_TFLAG_CONST) + return (EPERM); + tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0); + if (tmpkt == NULL) + return (ENOMEM); + pfr_mark_addrs(kt); + SLIST_INIT(&addq); + SLIST_INIT(&delq); + SLIST_INIT(&changeq); + for (i = 0; i < size; i++) { + if (COPYIN(addr+i, &ad, sizeof(ad))) + senderr(EFAULT); + if (pfr_validate_addr(&ad)) + senderr(EINVAL); + ad.pfra_fback = PFR_FB_NONE; + p = pfr_lookup_addr(kt, &ad, 1); + if (p != NULL) { + if (p->pfrke_mark) { + ad.pfra_fback = PFR_FB_DUPLICATE; + goto _skip; + } + p->pfrke_mark = 1; + if (p->pfrke_not != ad.pfra_not) { + SLIST_INSERT_HEAD(&changeq, p, pfrke_workq); + ad.pfra_fback = PFR_FB_CHANGED; + xchange++; + } + } else { + q = pfr_lookup_addr(tmpkt, &ad, 1); + if (q != NULL) { + ad.pfra_fback = PFR_FB_DUPLICATE; + goto _skip; + } + p = pfr_create_kentry(&ad); + if (p == NULL) + senderr(ENOMEM); + if (pfr_route_kentry(tmpkt, p)) { + pfr_destroy_kentry(p); + ad.pfra_fback = PFR_FB_NONE; + } else { + SLIST_INSERT_HEAD(&addq, p, pfrke_workq); + ad.pfra_fback = PFR_FB_ADDED; + xadd++; + } + } +_skip: + if (flags & PFR_FLAG_FEEDBACK) + if (COPYOUT(&ad, addr+i, sizeof(ad))) + senderr(EFAULT); + } + pfr_enqueue_addrs(kt, &delq, &xdel, ENQUEUE_UNMARKED_ONLY); + if ((flags & PFR_FLAG_FEEDBACK) && *size2) { + if (*size2 < size+xdel) { + *size2 = size+xdel; + senderr(0); + } + i = 0; + SLIST_FOREACH(p, &delq, pfrke_workq) { + pfr_copyout_addr(&ad, p); + ad.pfra_fback = PFR_FB_DELETED; + if (COPYOUT(&ad, addr+size+i, sizeof(ad))) + senderr(EFAULT); + i++; + } + } + pfr_clean_node_mask(tmpkt, &addq); + if (!(flags & PFR_FLAG_DUMMY)) { + if (flags & PFR_FLAG_ATOMIC) + s = splsoftnet(); + pfr_insert_kentries(kt, &addq, tzero); + pfr_remove_kentries(kt, &delq); + pfr_clstats_kentries(&changeq, tzero, INVERT_NEG_FLAG); + if (flags & PFR_FLAG_ATOMIC) + splx(s); + } else + pfr_destroy_kentries(&addq); + if (nadd != NULL) + *nadd = xadd; + if (ndel != NULL) + *ndel = xdel; + if (nchange != NULL) + *nchange = xchange; + if ((flags & PFR_FLAG_FEEDBACK) && size2) + *size2 = size+xdel; + pfr_destroy_ktable(tmpkt, 0); + return (0); +_bad: + pfr_clean_node_mask(tmpkt, &addq); + pfr_destroy_kentries(&addq); + if (flags & PFR_FLAG_FEEDBACK) + pfr_reset_feedback(addr, size, flags); + pfr_destroy_ktable(tmpkt, 0); + return (rv); +} + +int +pfr_tst_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, + int *nmatch, int flags) +{ + struct pfr_ktable *kt; + struct pfr_kentry *p; + struct pfr_addr ad; + int i, xmatch = 0; + + ACCEPT_FLAGS(PFR_FLAG_REPLACE); + if (pfr_validate_table(tbl, 0, 0)) + return (EINVAL); + kt = pfr_lookup_table(tbl); + if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (ESRCH); + + for (i = 0; i < size; i++) { + if (COPYIN(addr+i, &ad, sizeof(ad))) + return (EFAULT); + if (pfr_validate_addr(&ad)) + return (EINVAL); + if (ADDR_NETWORK(&ad)) + return (EINVAL); + p = pfr_lookup_addr(kt, &ad, 0); + if (flags & PFR_FLAG_REPLACE) + pfr_copyout_addr(&ad, p); + ad.pfra_fback = (p == NULL) ? PFR_FB_NONE : + (p->pfrke_not ? PFR_FB_NOTMATCH : PFR_FB_MATCH); + if (p != NULL && !p->pfrke_not) + xmatch++; + if (COPYOUT(&ad, addr+i, sizeof(ad))) + return (EFAULT); + } + if (nmatch != NULL) + *nmatch = xmatch; + return (0); +} + +int +pfr_get_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int *size, + int flags) +{ + struct pfr_ktable *kt; + struct pfr_walktree w; + int rv; + + ACCEPT_FLAGS(0); + if (pfr_validate_table(tbl, 0, 0)) + return (EINVAL); + kt = pfr_lookup_table(tbl); + if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (ESRCH); + if (kt->pfrkt_cnt > *size) { + *size = kt->pfrkt_cnt; + return (0); + } + + bzero(&w, sizeof(w)); + w.pfrw_op = PFRW_GET_ADDRS; + w.pfrw_addr = addr; + w.pfrw_free = kt->pfrkt_cnt; + w.pfrw_flags = flags; + rv = kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w); + if (!rv) + rv = kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w); + if (rv) + return (rv); + + if (w.pfrw_free) { + printf("pfr_get_addrs: corruption detected (%d).\n", + w.pfrw_free); + return (ENOTTY); + } + *size = kt->pfrkt_cnt; + return (0); +} + +int +pfr_get_astats(struct pfr_table *tbl, struct pfr_astats *addr, int *size, + int flags) +{ + struct pfr_ktable *kt; + struct pfr_walktree w; + struct pfr_kentryworkq workq; + int rv, s = 0; + long tzero = time_second; + + ACCEPT_FLAGS(PFR_FLAG_ATOMIC); /* XXX PFR_FLAG_CLSTATS disabled */ + if (pfr_validate_table(tbl, 0, 0)) + return (EINVAL); + kt = pfr_lookup_table(tbl); + if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (ESRCH); + if (kt->pfrkt_cnt > *size) { + *size = kt->pfrkt_cnt; + return (0); + } + + bzero(&w, sizeof(w)); + w.pfrw_op = PFRW_GET_ASTATS; + w.pfrw_astats = addr; + w.pfrw_free = kt->pfrkt_cnt; + w.pfrw_flags = flags; + if (flags & PFR_FLAG_ATOMIC) + s = splsoftnet(); + rv = kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w); + if (!rv) + rv = kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w); + if (!rv && (flags & PFR_FLAG_CLSTATS)) { + pfr_enqueue_addrs(kt, &workq, NULL, 0); + pfr_clstats_kentries(&workq, tzero, 0); + } + if (flags & PFR_FLAG_ATOMIC) + splx(s); + if (rv) + return (rv); + + if (w.pfrw_free) { + printf("pfr_get_astats: corruption detected (%d).\n", + w.pfrw_free); + return (ENOTTY); + } + *size = kt->pfrkt_cnt; + return (0); +} + +int +pfr_clr_astats(struct pfr_table *tbl, struct pfr_addr *addr, int size, + int *nzero, int flags) +{ + struct pfr_ktable *kt; + struct pfr_kentryworkq workq; + struct pfr_kentry *p; + struct pfr_addr ad; + int i, rv, s = 0, xzero = 0; + + ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK); + if (pfr_validate_table(tbl, 0, 0)) + return (EINVAL); + kt = pfr_lookup_table(tbl); + if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (ESRCH); + SLIST_INIT(&workq); + for (i = 0; i < size; i++) { + if (COPYIN(addr+i, &ad, sizeof(ad))) + senderr(EFAULT); + if (pfr_validate_addr(&ad)) + senderr(EINVAL); + p = pfr_lookup_addr(kt, &ad, 1); + if (flags & PFR_FLAG_FEEDBACK) { + ad.pfra_fback = (p != NULL) ? + PFR_FB_CLEARED : PFR_FB_NONE; + if (COPYOUT(&ad, addr+i, sizeof(ad))) + senderr(EFAULT); + } + if (p != NULL) { + SLIST_INSERT_HEAD(&workq, p, pfrke_workq); + xzero++; + } + } + + if (!(flags & PFR_FLAG_DUMMY)) { + if (flags & PFR_FLAG_ATOMIC) + s = splsoftnet(); + pfr_clstats_kentries(&workq, 0, 0); + if (flags & PFR_FLAG_ATOMIC) + splx(s); + } + if (nzero != NULL) + *nzero = xzero; + return (0); +_bad: + if (flags & PFR_FLAG_FEEDBACK) + pfr_reset_feedback(addr, size, flags); + return (rv); +} + +int +pfr_validate_addr(struct pfr_addr *ad) +{ + int i; + + switch (ad->pfra_af) { + case AF_INET: + if (ad->pfra_net > 32) + return (-1); + break; + case AF_INET6: + if (ad->pfra_net > 128) + return (-1); + break; + default: + return (-1); + } + if (ad->pfra_net < 128 && + (((caddr_t)ad)[ad->pfra_net/8] & (0xFF >> (ad->pfra_net%8)))) + return (-1); + for (i = (ad->pfra_net+7)/8; i < sizeof(ad->pfra_u); i++) + if (((caddr_t)ad)[i]) + return (-1); + if (ad->pfra_not && ad->pfra_not != 1) + return (-1); + if (ad->pfra_fback) + return (-1); + return (0); +} + +void +pfr_enqueue_addrs(struct pfr_ktable *kt, struct pfr_kentryworkq *workq, + int *naddr, int sweep) +{ + struct pfr_walktree w; + + SLIST_INIT(workq); + bzero(&w, sizeof(w)); + w.pfrw_op = sweep ? PFRW_SWEEP : PFRW_ENQUEUE; + w.pfrw_workq = workq; + if (kt->pfrkt_ip4 != NULL) + if (kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w)) + printf("pfr_enqueue_addrs: IPv4 walktree failed.\n"); + if (kt->pfrkt_ip6 != NULL) + if (kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w)) + printf("pfr_enqueue_addrs: IPv6 walktree failed.\n"); + if (naddr != NULL) + *naddr = w.pfrw_cnt; +} + +void +pfr_mark_addrs(struct pfr_ktable *kt) +{ + struct pfr_walktree w; + + bzero(&w, sizeof(w)); + w.pfrw_op = PFRW_MARK; + if (kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w)) + printf("pfr_mark_addrs: IPv4 walktree failed.\n"); + if (kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w)) + printf("pfr_mark_addrs: IPv6 walktree failed.\n"); +} + + +struct pfr_kentry * +pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact) +{ + union sockaddr_union sa, mask; + struct radix_node_head *head; + struct pfr_kentry *ke; + int s; + + bzero(&sa, sizeof(sa)); + if (ad->pfra_af == AF_INET) { + FILLIN_SIN(sa.sin, ad->pfra_ip4addr); + head = kt->pfrkt_ip4; + } else { + FILLIN_SIN6(sa.sin6, ad->pfra_ip6addr); + head = kt->pfrkt_ip6; + } + if (ADDR_NETWORK(ad)) { + pfr_prepare_network(&mask, ad->pfra_af, ad->pfra_net); + s = splsoftnet(); /* rn_lookup makes use of globals */ + ke = (struct pfr_kentry *)rn_lookup(&sa, &mask, head); + splx(s); + if (ke && KENTRY_RNF_ROOT(ke)) + ke = NULL; + } else { + ke = (struct pfr_kentry *)rn_match(&sa, head); + if (ke && KENTRY_RNF_ROOT(ke)) + ke = NULL; + if (exact && ke && KENTRY_NETWORK(ke)) + ke = NULL; + } + return (ke); +} + +struct pfr_kentry * +pfr_create_kentry(struct pfr_addr *ad) +{ + struct pfr_kentry *ke; + + ke = pool_get(&pfr_kentry_pl, PR_NOWAIT); + if (ke == NULL) + return (NULL); + bzero(ke, sizeof(*ke)); + + if (ad->pfra_af == AF_INET) + FILLIN_SIN(ke->pfrke_sa.sin, ad->pfra_ip4addr); + else + FILLIN_SIN6(ke->pfrke_sa.sin6, ad->pfra_ip6addr); + ke->pfrke_af = ad->pfra_af; + ke->pfrke_net = ad->pfra_net; + ke->pfrke_not = ad->pfra_not; + return (ke); +} + +void +pfr_destroy_kentries(struct pfr_kentryworkq *workq) +{ + struct pfr_kentry *p, *q; + + for (p = SLIST_FIRST(workq); p != NULL; p = q) { + q = SLIST_NEXT(p, pfrke_workq); + pfr_destroy_kentry(p); + } +} + +void +pfr_destroy_kentry(struct pfr_kentry *ke) +{ + pool_put(&pfr_kentry_pl, ke); +} + +void +pfr_insert_kentries(struct pfr_ktable *kt, + struct pfr_kentryworkq *workq, long tzero) +{ + struct pfr_kentry *p; + int rv, n = 0; + + SLIST_FOREACH(p, workq, pfrke_workq) { + rv = pfr_route_kentry(kt, p); + if (rv) { + printf("pfr_insert_kentries: cannot route entry " + "(code=%d).\n", rv); + break; + } + p->pfrke_tzero = tzero; + n++; + } + kt->pfrkt_cnt += n; +} + +void +pfr_remove_kentries(struct pfr_ktable *kt, + struct pfr_kentryworkq *workq) +{ + struct pfr_kentry *p; + int n = 0; + + SLIST_FOREACH(p, workq, pfrke_workq) { + pfr_unroute_kentry(kt, p); + n++; + } + kt->pfrkt_cnt -= n; + pfr_destroy_kentries(workq); +} + +void +pfr_clean_node_mask(struct pfr_ktable *kt, + struct pfr_kentryworkq *workq) +{ + struct pfr_kentry *p; + + SLIST_FOREACH(p, workq, pfrke_workq) + pfr_unroute_kentry(kt, p); +} + +void +pfr_clstats_kentries(struct pfr_kentryworkq *workq, long tzero, int negchange) +{ + struct pfr_kentry *p; + int s; + + SLIST_FOREACH(p, workq, pfrke_workq) { + s = splsoftnet(); + if (negchange) + p->pfrke_not = !p->pfrke_not; + bzero(p->pfrke_packets, sizeof(p->pfrke_packets)); + bzero(p->pfrke_bytes, sizeof(p->pfrke_bytes)); + splx(s); + p->pfrke_tzero = tzero; + } +} + +void +pfr_reset_feedback(struct pfr_addr *addr, int size, int flags) +{ + struct pfr_addr ad; + int i; + + for (i = 0; i < size; i++) { + if (COPYIN(addr+i, &ad, sizeof(ad))) + break; + ad.pfra_fback = PFR_FB_NONE; + if (COPYOUT(&ad, addr+i, sizeof(ad))) + break; + } +} + +void +pfr_prepare_network(union sockaddr_union *sa, int af, int net) +{ + int i; + + bzero(sa, sizeof(*sa)); + if (af == AF_INET) { + sa->sin.sin_len = sizeof(sa->sin); + sa->sin.sin_family = AF_INET; + sa->sin.sin_addr.s_addr = htonl(-1 << (32-net)); + } else { + sa->sin6.sin6_len = sizeof(sa->sin6); + sa->sin6.sin6_family = AF_INET6; + for (i = 0; i < 4; i++) { + if (net <= 32) { + sa->sin6.sin6_addr.s6_addr32[i] = + htonl(-1 << (32-net)); + break; + } + sa->sin6.sin6_addr.s6_addr32[i] = 0xFFFFFFFF; + net -= 32; + } + } +} + +int +pfr_route_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke) +{ + union sockaddr_union mask; + struct radix_node *rn; + struct radix_node_head *head; + int s; + + bzero(ke->pfrke_node, sizeof(ke->pfrke_node)); + if (ke->pfrke_af == AF_INET) + head = kt->pfrkt_ip4; + else + head = kt->pfrkt_ip6; + + s = splsoftnet(); + if (KENTRY_NETWORK(ke)) { + pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net); + rn = rn_addroute(&ke->pfrke_sa, &mask, head, ke->pfrke_node); + } else + rn = rn_addroute(&ke->pfrke_sa, NULL, head, ke->pfrke_node); + splx(s); + + return (rn == NULL ? -1 : 0); +} + +int +pfr_unroute_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke) +{ + union sockaddr_union mask; + struct radix_node *rn; + struct radix_node_head *head; + int s; + + if (ke->pfrke_af == AF_INET) + head = kt->pfrkt_ip4; + else + head = kt->pfrkt_ip6; + + s = splsoftnet(); + if (KENTRY_NETWORK(ke)) { + pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net); + rn = rn_delete(&ke->pfrke_sa, &mask, head); + } else + rn = rn_delete(&ke->pfrke_sa, NULL, head); + splx(s); + + if (rn == NULL) { + printf("pfr_unroute_kentry: delete failed.\n"); + return (-1); + } + return (0); +} + +void +pfr_copyout_addr(struct pfr_addr *ad, struct pfr_kentry *ke) +{ + bzero(ad, sizeof(*ad)); + if (ke == NULL) + return; + ad->pfra_af = ke->pfrke_af; + ad->pfra_net = ke->pfrke_net; + ad->pfra_not = ke->pfrke_not; + if (ad->pfra_af == AF_INET) + ad->pfra_ip4addr = ke->pfrke_sa.sin.sin_addr; + else + ad->pfra_ip6addr = ke->pfrke_sa.sin6.sin6_addr; +} + +int +pfr_walktree(struct radix_node *rn, void *arg) +{ + struct pfr_kentry *ke = (struct pfr_kentry *)rn; + struct pfr_walktree *w = arg; + int s, flags = w->pfrw_flags; + + switch (w->pfrw_op) { + case PFRW_MARK: + ke->pfrke_mark = 0; + break; + case PFRW_SWEEP: + if (ke->pfrke_mark) + break; + /* FALLTHROUGH */ + case PFRW_ENQUEUE: + SLIST_INSERT_HEAD(w->pfrw_workq, ke, pfrke_workq); + w->pfrw_cnt++; + break; + case PFRW_GET_ADDRS: + if (w->pfrw_free-- > 0) { + struct pfr_addr ad; + + pfr_copyout_addr(&ad, ke); + if (copyout(&ad, w->pfrw_addr, sizeof(ad))) + return (EFAULT); + w->pfrw_addr++; + } + break; + case PFRW_GET_ASTATS: + if (w->pfrw_free-- > 0) { + struct pfr_astats as; + + pfr_copyout_addr(&as.pfras_a, ke); + + s = splsoftnet(); + bcopy(ke->pfrke_packets, as.pfras_packets, + sizeof(as.pfras_packets)); + bcopy(ke->pfrke_bytes, as.pfras_bytes, + sizeof(as.pfras_bytes)); + splx(s); + as.pfras_tzero = ke->pfrke_tzero; + + if (COPYOUT(&as, w->pfrw_astats, sizeof(as))) + return (EFAULT); + w->pfrw_astats++; + } + break; + case PFRW_POOL_GET: + if (ke->pfrke_not) + break; /* negative entries are ignored */ + if (!w->pfrw_cnt--) { + w->pfrw_kentry = ke; + return (1); /* finish search */ + } + break; + case PFRW_DYNADDR_UPDATE: + if (ke->pfrke_af == AF_INET) { + if (w->pfrw_dyn->pfid_acnt4++ > 0) + break; + pfr_prepare_network(&pfr_mask, AF_INET, ke->pfrke_net); + w->pfrw_dyn->pfid_addr4 = *SUNION2PF( + &ke->pfrke_sa, AF_INET); + w->pfrw_dyn->pfid_mask4 = *SUNION2PF( + &pfr_mask, AF_INET); + } else { + if (w->pfrw_dyn->pfid_acnt6++ > 0) + break; + pfr_prepare_network(&pfr_mask, AF_INET6, ke->pfrke_net); + w->pfrw_dyn->pfid_addr6 = *SUNION2PF( + &ke->pfrke_sa, AF_INET6); + w->pfrw_dyn->pfid_mask6 = *SUNION2PF( + &pfr_mask, AF_INET6); + } + break; + } + return (0); +} + +int +pfr_clr_tables(struct pfr_table *filter, int *ndel, int flags) +{ + struct pfr_ktableworkq workq; + struct pfr_ktable *p; + int s = 0, xdel = 0; + + ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_ALLRSETS); + if (pfr_table_count(filter, flags) < 0) + return (ENOENT); + + SLIST_INIT(&workq); + RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) { + if (pfr_skip_table(filter, p, flags)) + continue; + if (!strcmp(p->pfrkt_anchor, PF_RESERVED_ANCHOR)) + continue; + if (!(p->pfrkt_flags & PFR_TFLAG_ACTIVE)) + continue; + p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_ACTIVE; + SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); + xdel++; + } + if (!(flags & PFR_FLAG_DUMMY)) { + if (flags & PFR_FLAG_ATOMIC) + s = splsoftnet(); + pfr_setflags_ktables(&workq); + if (flags & PFR_FLAG_ATOMIC) + splx(s); + } + if (ndel != NULL) + *ndel = xdel; + return (0); +} + +int +pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags) +{ + struct pfr_ktableworkq addq, changeq; + struct pfr_ktable *p, *q, *r, key; + int i, rv, s = 0, xadd = 0; + long tzero = time_second; + + ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY); + SLIST_INIT(&addq); + SLIST_INIT(&changeq); + for (i = 0; i < size; i++) { + if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t))) + senderr(EFAULT); + if (pfr_validate_table(&key.pfrkt_t, PFR_TFLAG_USRMASK, + flags & PFR_FLAG_USERIOCTL)) + senderr(EINVAL); + key.pfrkt_flags |= PFR_TFLAG_ACTIVE; + p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); + if (p == NULL) { + p = pfr_create_ktable(&key.pfrkt_t, tzero, 1); + if (p == NULL) + senderr(ENOMEM); + SLIST_FOREACH(q, &addq, pfrkt_workq) { + if (!pfr_ktable_compare(p, q)) + goto _skip; + } + SLIST_INSERT_HEAD(&addq, p, pfrkt_workq); + xadd++; + if (!key.pfrkt_anchor[0]) + goto _skip; + + /* find or create root table */ + bzero(key.pfrkt_anchor, sizeof(key.pfrkt_anchor)); + bzero(key.pfrkt_ruleset, sizeof(key.pfrkt_ruleset)); + r = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); + if (r != NULL) { + p->pfrkt_root = r; + goto _skip; + } + SLIST_FOREACH(q, &addq, pfrkt_workq) { + if (!pfr_ktable_compare(&key, q)) { + p->pfrkt_root = q; + goto _skip; + } + } + key.pfrkt_flags = 0; + r = pfr_create_ktable(&key.pfrkt_t, 0, 1); + if (r == NULL) + senderr(ENOMEM); + SLIST_INSERT_HEAD(&addq, r, pfrkt_workq); + p->pfrkt_root = r; + } else if (!(p->pfrkt_flags & PFR_TFLAG_ACTIVE)) { + SLIST_FOREACH(q, &changeq, pfrkt_workq) + if (!pfr_ktable_compare(&key, q)) + goto _skip; + p->pfrkt_nflags = (p->pfrkt_flags & + ~PFR_TFLAG_USRMASK) | key.pfrkt_flags; + SLIST_INSERT_HEAD(&changeq, p, pfrkt_workq); + xadd++; + } +_skip: + ; + } + if (!(flags & PFR_FLAG_DUMMY)) { + if (flags & PFR_FLAG_ATOMIC) + s = splsoftnet(); + pfr_insert_ktables(&addq); + pfr_setflags_ktables(&changeq); + if (flags & PFR_FLAG_ATOMIC) + splx(s); + } else + pfr_destroy_ktables(&addq, 0); + if (nadd != NULL) + *nadd = xadd; + return (0); +_bad: + pfr_destroy_ktables(&addq, 0); + return (rv); +} + +int +pfr_del_tables(struct pfr_table *tbl, int size, int *ndel, int flags) +{ + struct pfr_ktableworkq workq; + struct pfr_ktable *p, *q, key; + int i, s = 0, xdel = 0; + + ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY); + SLIST_INIT(&workq); + for (i = 0; i < size; i++) { + if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t))) + return (EFAULT); + if (pfr_validate_table(&key.pfrkt_t, 0, + flags & PFR_FLAG_USERIOCTL)) + return (EINVAL); + p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); + if (p != NULL && (p->pfrkt_flags & PFR_TFLAG_ACTIVE)) { + SLIST_FOREACH(q, &workq, pfrkt_workq) + if (!pfr_ktable_compare(p, q)) + goto _skip; + p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_ACTIVE; + SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); + xdel++; + } +_skip: + ; + } + + if (!(flags & PFR_FLAG_DUMMY)) { + if (flags & PFR_FLAG_ATOMIC) + s = splsoftnet(); + pfr_setflags_ktables(&workq); + if (flags & PFR_FLAG_ATOMIC) + splx(s); + } + if (ndel != NULL) + *ndel = xdel; + return (0); +} + +int +pfr_get_tables(struct pfr_table *filter, struct pfr_table *tbl, int *size, + int flags) +{ + struct pfr_ktable *p; + int n, nn; + + ACCEPT_FLAGS(PFR_FLAG_ALLRSETS); + n = nn = pfr_table_count(filter, flags); + if (n < 0) + return (ENOENT); + if (n > *size) { + *size = n; + return (0); + } + RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) { + if (pfr_skip_table(filter, p, flags)) + continue; + if (n-- <= 0) + continue; + if (COPYOUT(&p->pfrkt_t, tbl++, sizeof(*tbl))) + return (EFAULT); + } + if (n) { + printf("pfr_get_tables: corruption detected (%d).\n", n); + return (ENOTTY); + } + *size = nn; + return (0); +} + +int +pfr_get_tstats(struct pfr_table *filter, struct pfr_tstats *tbl, int *size, + int flags) +{ + struct pfr_ktable *p; + struct pfr_ktableworkq workq; + int s = 0, n, nn; + long tzero = time_second; + + ACCEPT_FLAGS(PFR_FLAG_ATOMIC|PFR_FLAG_ALLRSETS); + /* XXX PFR_FLAG_CLSTATS disabled */ + n = nn = pfr_table_count(filter, flags); + if (n < 0) + return (ENOENT); + if (n > *size) { + *size = n; + return (0); + } + SLIST_INIT(&workq); + if (flags & PFR_FLAG_ATOMIC) + s = splsoftnet(); + RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) { + if (pfr_skip_table(filter, p, flags)) + continue; + if (n-- <= 0) + continue; + if (!(flags & PFR_FLAG_ATOMIC)) + s = splsoftnet(); + if (COPYOUT(&p->pfrkt_ts, tbl++, sizeof(*tbl))) { + splx(s); + return (EFAULT); + } + if (!(flags & PFR_FLAG_ATOMIC)) + splx(s); + SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); + } + if (flags & PFR_FLAG_CLSTATS) + pfr_clstats_ktables(&workq, tzero, + flags & PFR_FLAG_ADDRSTOO); + if (flags & PFR_FLAG_ATOMIC) + splx(s); + if (n) { + printf("pfr_get_tstats: corruption detected (%d).\n", n); + return (ENOTTY); + } + *size = nn; + return (0); +} + +int +pfr_clr_tstats(struct pfr_table *tbl, int size, int *nzero, int flags) +{ + struct pfr_ktableworkq workq; + struct pfr_ktable *p, key; + int i, s = 0, xzero = 0; + long tzero = time_second; + + ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_ADDRSTOO); + SLIST_INIT(&workq); + for (i = 0; i < size; i++) { + if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t))) + return (EFAULT); + if (pfr_validate_table(&key.pfrkt_t, 0, 0)) + return (EINVAL); + p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); + if (p != NULL) { + SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); + xzero++; + } + } + if (!(flags & PFR_FLAG_DUMMY)) { + if (flags & PFR_FLAG_ATOMIC) + s = splsoftnet(); + pfr_clstats_ktables(&workq, tzero, flags & PFR_FLAG_ADDRSTOO); + if (flags & PFR_FLAG_ATOMIC) + splx(s); + } + if (nzero != NULL) + *nzero = xzero; + return (0); +} + +int +pfr_set_tflags(struct pfr_table *tbl, int size, int setflag, int clrflag, + int *nchange, int *ndel, int flags) +{ + struct pfr_ktableworkq workq; + struct pfr_ktable *p, *q, key; + int i, s = 0, xchange = 0, xdel = 0; + + ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY); + if ((setflag & ~PFR_TFLAG_USRMASK) || + (clrflag & ~PFR_TFLAG_USRMASK) || + (setflag & clrflag)) + return (EINVAL); + SLIST_INIT(&workq); + for (i = 0; i < size; i++) { + if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t))) + return (EFAULT); + if (pfr_validate_table(&key.pfrkt_t, 0, + flags & PFR_FLAG_USERIOCTL)) + return (EINVAL); + p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); + if (p != NULL && (p->pfrkt_flags & PFR_TFLAG_ACTIVE)) { + p->pfrkt_nflags = (p->pfrkt_flags | setflag) & + ~clrflag; + if (p->pfrkt_nflags == p->pfrkt_flags) + goto _skip; + SLIST_FOREACH(q, &workq, pfrkt_workq) + if (!pfr_ktable_compare(p, q)) + goto _skip; + SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); + if ((p->pfrkt_flags & PFR_TFLAG_PERSIST) && + (clrflag & PFR_TFLAG_PERSIST) && + !(p->pfrkt_flags & PFR_TFLAG_REFERENCED)) + xdel++; + else + xchange++; + } +_skip: + ; + } + if (!(flags & PFR_FLAG_DUMMY)) { + if (flags & PFR_FLAG_ATOMIC) + s = splsoftnet(); + pfr_setflags_ktables(&workq); + if (flags & PFR_FLAG_ATOMIC) + splx(s); + } + if (nchange != NULL) + *nchange = xchange; + if (ndel != NULL) + *ndel = xdel; + return (0); +} + +int +pfr_ina_begin(struct pfr_table *trs, u_int32_t *ticket, int *ndel, int flags) +{ + struct pfr_ktableworkq workq; + struct pfr_ktable *p; + struct pf_ruleset *rs; + int xdel = 0; + + ACCEPT_FLAGS(PFR_FLAG_DUMMY); + rs = pf_find_or_create_ruleset(trs->pfrt_anchor, trs->pfrt_ruleset); + if (rs == NULL) + return (ENOMEM); + SLIST_INIT(&workq); + RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) { + if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) || + pfr_skip_table(trs, p, 0)) + continue; + p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_INACTIVE; + SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); + xdel++; + } + if (!(flags & PFR_FLAG_DUMMY)) { + pfr_setflags_ktables(&workq); + if (ticket != NULL) + *ticket = ++rs->tticket; + rs->topen = 1; + } else + pf_remove_if_empty_ruleset(rs); + if (ndel != NULL) + *ndel = xdel; + return (0); +} + +int +pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size, + int *nadd, int *naddr, u_int32_t ticket, int flags) +{ + struct pfr_ktableworkq tableq; + struct pfr_kentryworkq addrq; + struct pfr_ktable *kt, *rt, *shadow, key; + struct pfr_kentry *p; + struct pfr_addr ad; + struct pf_ruleset *rs; + int i, rv, xadd = 0, xaddr = 0; + + ACCEPT_FLAGS(PFR_FLAG_DUMMY|PFR_FLAG_ADDRSTOO); + if (size && !(flags & PFR_FLAG_ADDRSTOO)) + return (EINVAL); + if (pfr_validate_table(tbl, PFR_TFLAG_USRMASK, + flags & PFR_FLAG_USERIOCTL)) + return (EINVAL); + rs = pf_find_ruleset(tbl->pfrt_anchor, tbl->pfrt_ruleset); + if (rs == NULL || !rs->topen || ticket != rs->tticket) + return (EBUSY); + tbl->pfrt_flags |= PFR_TFLAG_INACTIVE; + SLIST_INIT(&tableq); + kt = RB_FIND(pfr_ktablehead, &pfr_ktables, (struct pfr_ktable *)tbl); + if (kt == NULL) { + kt = pfr_create_ktable(tbl, 0, 1); + if (kt == NULL) + return (ENOMEM); + SLIST_INSERT_HEAD(&tableq, kt, pfrkt_workq); + xadd++; + if (!tbl->pfrt_anchor[0]) + goto _skip; + + /* find or create root table */ + bzero(&key, sizeof(key)); + strlcpy(key.pfrkt_name, tbl->pfrt_name, sizeof(key.pfrkt_name)); + rt = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); + if (rt != NULL) { + kt->pfrkt_root = rt; + goto _skip; + } + rt = pfr_create_ktable(&key.pfrkt_t, 0, 1); + if (rt == NULL) { + pfr_destroy_ktables(&tableq, 0); + return (ENOMEM); + } + SLIST_INSERT_HEAD(&tableq, rt, pfrkt_workq); + kt->pfrkt_root = rt; + } else if (!(kt->pfrkt_flags & PFR_TFLAG_INACTIVE)) + xadd++; +_skip: + shadow = pfr_create_ktable(tbl, 0, 0); + if (shadow == NULL) { + pfr_destroy_ktables(&tableq, 0); + return (ENOMEM); + } + SLIST_INIT(&addrq); + for (i = 0; i < size; i++) { + if (COPYIN(addr+i, &ad, sizeof(ad))) + senderr(EFAULT); + if (pfr_validate_addr(&ad)) + senderr(EINVAL); + if (pfr_lookup_addr(shadow, &ad, 1) != NULL) + continue; + p = pfr_create_kentry(&ad); + if (p == NULL) + senderr(ENOMEM); + if (pfr_route_kentry(shadow, p)) { + pfr_destroy_kentry(p); + continue; + } + SLIST_INSERT_HEAD(&addrq, p, pfrke_workq); + xaddr++; + } + if (!(flags & PFR_FLAG_DUMMY)) { + if (kt->pfrkt_shadow != NULL) + pfr_destroy_ktable(kt->pfrkt_shadow, 1); + kt->pfrkt_flags |= PFR_TFLAG_INACTIVE; + pfr_insert_ktables(&tableq); + shadow->pfrkt_cnt = (flags & PFR_FLAG_ADDRSTOO) ? + xaddr : NO_ADDRESSES; + kt->pfrkt_shadow = shadow; + } else { + pfr_clean_node_mask(shadow, &addrq); + pfr_destroy_ktable(shadow, 0); + pfr_destroy_ktables(&tableq, 0); + pfr_destroy_kentries(&addrq); + } + if (nadd != NULL) + *nadd = xadd; + if (naddr != NULL) + *naddr = xaddr; + return (0); +_bad: + pfr_destroy_ktable(shadow, 0); + pfr_destroy_ktables(&tableq, 0); + pfr_destroy_kentries(&addrq); + return (rv); +} + +int +pfr_ina_rollback(struct pfr_table *trs, u_int32_t ticket, int *ndel, int flags) +{ + struct pfr_ktableworkq workq; + struct pfr_ktable *p; + struct pf_ruleset *rs; + int xdel = 0; + + ACCEPT_FLAGS(PFR_FLAG_DUMMY); + rs = pf_find_ruleset(trs->pfrt_anchor, trs->pfrt_ruleset); + if (rs == NULL || !rs->topen || ticket != rs->tticket) + return (0); + SLIST_INIT(&workq); + RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) { + if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) || + pfr_skip_table(trs, p, 0)) + continue; + p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_INACTIVE; + SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); + xdel++; + } + if (!(flags & PFR_FLAG_DUMMY)) { + pfr_setflags_ktables(&workq); + rs->topen = 0; + pf_remove_if_empty_ruleset(rs); + } + if (ndel != NULL) + *ndel = xdel; + return (0); +} + +int +pfr_ina_commit(struct pfr_table *trs, u_int32_t ticket, int *nadd, + int *nchange, int flags) +{ + struct pfr_ktable *p; + struct pfr_ktableworkq workq; + struct pf_ruleset *rs; + int s = 0, xadd = 0, xchange = 0; + long tzero = time_second; + + ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY); + rs = pf_find_ruleset(trs->pfrt_anchor, trs->pfrt_ruleset); + if (rs == NULL || !rs->topen || ticket != rs->tticket) + return (EBUSY); + + SLIST_INIT(&workq); + RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) { + if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) || + pfr_skip_table(trs, p, 0)) + continue; + SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); + if (p->pfrkt_flags & PFR_TFLAG_ACTIVE) + xchange++; + else + xadd++; + } + + if (!(flags & PFR_FLAG_DUMMY)) { + if (flags & PFR_FLAG_ATOMIC) + s = splsoftnet(); + SLIST_FOREACH(p, &workq, pfrkt_workq) + pfr_commit_ktable(p, tzero); + if (flags & PFR_FLAG_ATOMIC) + splx(s); + rs->topen = 0; + pf_remove_if_empty_ruleset(rs); + } + if (nadd != NULL) + *nadd = xadd; + if (nchange != NULL) + *nchange = xchange; + + return (0); +} + +void +pfr_commit_ktable(struct pfr_ktable *kt, long tzero) +{ + struct pfr_ktable *shadow = kt->pfrkt_shadow; + int nflags; + + if (shadow->pfrkt_cnt == NO_ADDRESSES) { + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + pfr_clstats_ktable(kt, tzero, 1); + } else if (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) { + /* kt might contain addresses */ + struct pfr_kentryworkq addrq, addq, changeq, delq, garbageq; + struct pfr_kentry *p, *q, *next; + struct pfr_addr ad; + + pfr_enqueue_addrs(shadow, &addrq, NULL, 0); + pfr_mark_addrs(kt); + SLIST_INIT(&addq); + SLIST_INIT(&changeq); + SLIST_INIT(&delq); + SLIST_INIT(&garbageq); + pfr_clean_node_mask(shadow, &addrq); + for (p = SLIST_FIRST(&addrq); p != NULL; p = next) { + next = SLIST_NEXT(p, pfrke_workq); /* XXX */ + pfr_copyout_addr(&ad, p); + q = pfr_lookup_addr(kt, &ad, 1); + if (q != NULL) { + if (q->pfrke_not != p->pfrke_not) + SLIST_INSERT_HEAD(&changeq, q, + pfrke_workq); + q->pfrke_mark = 1; + SLIST_INSERT_HEAD(&garbageq, p, pfrke_workq); + } else { + p->pfrke_tzero = tzero; + SLIST_INSERT_HEAD(&addq, p, pfrke_workq); + } + } + pfr_enqueue_addrs(kt, &delq, NULL, ENQUEUE_UNMARKED_ONLY); + pfr_insert_kentries(kt, &addq, tzero); + pfr_remove_kentries(kt, &delq); + pfr_clstats_kentries(&changeq, tzero, INVERT_NEG_FLAG); + pfr_destroy_kentries(&garbageq); + } else { + /* kt cannot contain addresses */ + SWAP(struct radix_node_head *, kt->pfrkt_ip4, + shadow->pfrkt_ip4); + SWAP(struct radix_node_head *, kt->pfrkt_ip6, + shadow->pfrkt_ip6); + SWAP(int, kt->pfrkt_cnt, shadow->pfrkt_cnt); + pfr_clstats_ktable(kt, tzero, 1); + } + nflags = ((shadow->pfrkt_flags & PFR_TFLAG_USRMASK) | + (kt->pfrkt_flags & PFR_TFLAG_SETMASK) | PFR_TFLAG_ACTIVE) + & ~PFR_TFLAG_INACTIVE; + pfr_destroy_ktable(shadow, 0); + kt->pfrkt_shadow = NULL; + pfr_setflags_ktable(kt, nflags); +} + +int +pfr_validate_table(struct pfr_table *tbl, int allowedflags, int no_reserved) +{ + int i; + + if (!tbl->pfrt_name[0]) + return (-1); + if (no_reserved && !strcmp(tbl->pfrt_anchor, PF_RESERVED_ANCHOR)) + return (-1); + if (tbl->pfrt_name[PF_TABLE_NAME_SIZE-1]) + return (-1); + for (i = strlen(tbl->pfrt_name); i < PF_TABLE_NAME_SIZE; i++) + if (tbl->pfrt_name[i]) + return (-1); + if (tbl->pfrt_flags & ~allowedflags) + return (-1); + return (0); +} + +int +pfr_table_count(struct pfr_table *filter, int flags) +{ + struct pf_ruleset *rs; + struct pf_anchor *ac; + + if (flags & PFR_FLAG_ALLRSETS) + return (pfr_ktable_cnt); + if (filter->pfrt_ruleset[0]) { + rs = pf_find_ruleset(filter->pfrt_anchor, + filter->pfrt_ruleset); + return ((rs != NULL) ? rs->tables : -1); + } + if (filter->pfrt_anchor[0]) { + ac = pf_find_anchor(filter->pfrt_anchor); + return ((ac != NULL) ? ac->tables : -1); + } + return (pf_main_ruleset.tables); +} + +int +pfr_skip_table(struct pfr_table *filter, struct pfr_ktable *kt, int flags) +{ + if (flags & PFR_FLAG_ALLRSETS) + return (0); + if (strncmp(filter->pfrt_anchor, kt->pfrkt_anchor, + PF_ANCHOR_NAME_SIZE)) + return (1); + if (!filter->pfrt_ruleset[0]) + return (0); + if (strncmp(filter->pfrt_ruleset, kt->pfrkt_ruleset, + PF_RULESET_NAME_SIZE)) + return (1); + return (0); +} + +void +pfr_insert_ktables(struct pfr_ktableworkq *workq) +{ + struct pfr_ktable *p; + + SLIST_FOREACH(p, workq, pfrkt_workq) + pfr_insert_ktable(p); +} + +void +pfr_insert_ktable(struct pfr_ktable *kt) +{ + RB_INSERT(pfr_ktablehead, &pfr_ktables, kt); + pfr_ktable_cnt++; + if (kt->pfrkt_root != NULL) + if (!kt->pfrkt_root->pfrkt_refcnt[PFR_REFCNT_ANCHOR]++) + pfr_setflags_ktable(kt->pfrkt_root, + kt->pfrkt_root->pfrkt_flags|PFR_TFLAG_REFDANCHOR); +} + +void +pfr_setflags_ktables(struct pfr_ktableworkq *workq) +{ + struct pfr_ktable *p; + + SLIST_FOREACH(p, workq, pfrkt_workq) + pfr_setflags_ktable(p, p->pfrkt_nflags); +} + +void +pfr_setflags_ktable(struct pfr_ktable *kt, int newf) +{ + struct pfr_kentryworkq addrq; + + if (!(newf & PFR_TFLAG_REFERENCED) && + !(newf & PFR_TFLAG_PERSIST)) + newf &= ~PFR_TFLAG_ACTIVE; + if (!(newf & PFR_TFLAG_ACTIVE)) + newf &= ~PFR_TFLAG_USRMASK; + if (!(newf & PFR_TFLAG_SETMASK)) { + RB_REMOVE(pfr_ktablehead, &pfr_ktables, kt); + if (kt->pfrkt_root != NULL) + if (!--kt->pfrkt_root->pfrkt_refcnt[PFR_REFCNT_ANCHOR]) + pfr_setflags_ktable(kt->pfrkt_root, + kt->pfrkt_root->pfrkt_flags & + ~PFR_TFLAG_REFDANCHOR); + pfr_destroy_ktable(kt, 1); + pfr_ktable_cnt--; + return; + } + if (!(newf & PFR_TFLAG_ACTIVE) && kt->pfrkt_cnt) { + pfr_enqueue_addrs(kt, &addrq, NULL, 0); + pfr_remove_kentries(kt, &addrq); + } + if (!(newf & PFR_TFLAG_INACTIVE) && kt->pfrkt_shadow != NULL) { + pfr_destroy_ktable(kt->pfrkt_shadow, 1); + kt->pfrkt_shadow = NULL; + } + kt->pfrkt_flags = newf; +} + +void +pfr_clstats_ktables(struct pfr_ktableworkq *workq, long tzero, int recurse) +{ + struct pfr_ktable *p; + + SLIST_FOREACH(p, workq, pfrkt_workq) + pfr_clstats_ktable(p, tzero, recurse); +} + +void +pfr_clstats_ktable(struct pfr_ktable *kt, long tzero, int recurse) +{ + struct pfr_kentryworkq addrq; + int s; + + if (recurse) { + pfr_enqueue_addrs(kt, &addrq, NULL, 0); + pfr_clstats_kentries(&addrq, tzero, 0); + } + s = splsoftnet(); + bzero(kt->pfrkt_packets, sizeof(kt->pfrkt_packets)); + bzero(kt->pfrkt_bytes, sizeof(kt->pfrkt_bytes)); + kt->pfrkt_match = kt->pfrkt_nomatch = 0; + splx(s); + kt->pfrkt_tzero = tzero; +} + +struct pfr_ktable * +pfr_create_ktable(struct pfr_table *tbl, long tzero, int attachruleset) +{ + struct pfr_ktable *kt; + struct pf_ruleset *rs; + + kt = pool_get(&pfr_ktable_pl, PR_NOWAIT); + if (kt == NULL) + return (NULL); + bzero(kt, sizeof(*kt)); + kt->pfrkt_t = *tbl; + + if (attachruleset) { + rs = pf_find_or_create_ruleset(tbl->pfrt_anchor, + tbl->pfrt_ruleset); + if (!rs) { + pfr_destroy_ktable(kt, 0); + return (NULL); + } + kt->pfrkt_rs = rs; + rs->tables++; + if (rs->anchor != NULL) + rs->anchor->tables++; + } + + if (!rn_inithead((void **)&kt->pfrkt_ip4, + offsetof(struct sockaddr_in, sin_addr) * 8) || + !rn_inithead((void **)&kt->pfrkt_ip6, + offsetof(struct sockaddr_in6, sin6_addr) * 8)) { + pfr_destroy_ktable(kt, 0); + return (NULL); + } + kt->pfrkt_tzero = tzero; + + return (kt); +} + +void +pfr_destroy_ktables(struct pfr_ktableworkq *workq, int flushaddr) +{ + struct pfr_ktable *p, *q; + + for (p = SLIST_FIRST(workq); p; p = q) { + q = SLIST_NEXT(p, pfrkt_workq); + pfr_destroy_ktable(p, flushaddr); + } +} + +void +pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr) +{ + struct pfr_kentryworkq addrq; + + if (flushaddr) { + pfr_enqueue_addrs(kt, &addrq, NULL, 0); + pfr_clean_node_mask(kt, &addrq); + pfr_destroy_kentries(&addrq); + } + if (kt->pfrkt_ip4 != NULL) + free((caddr_t)kt->pfrkt_ip4, M_RTABLE); + if (kt->pfrkt_ip6 != NULL) + free((caddr_t)kt->pfrkt_ip6, M_RTABLE); + if (kt->pfrkt_shadow != NULL) + pfr_destroy_ktable(kt->pfrkt_shadow, flushaddr); + if (kt->pfrkt_rs != NULL) { + kt->pfrkt_rs->tables--; + if (kt->pfrkt_rs->anchor != NULL) + kt->pfrkt_rs->anchor->tables--; + pf_remove_if_empty_ruleset(kt->pfrkt_rs); + } + pool_put(&pfr_ktable_pl, kt); +} + +int +pfr_ktable_compare(struct pfr_ktable *p, struct pfr_ktable *q) +{ + int d; + + if ((d = strncmp(p->pfrkt_name, q->pfrkt_name, PF_TABLE_NAME_SIZE))) + return (d); + if ((d = strncmp(p->pfrkt_anchor, q->pfrkt_anchor, + PF_ANCHOR_NAME_SIZE))) + return (d); + return (strncmp(p->pfrkt_ruleset, q->pfrkt_ruleset, + PF_RULESET_NAME_SIZE)); +} + +struct pfr_ktable * +pfr_lookup_table(struct pfr_table *tbl) +{ + /* struct pfr_ktable start like a struct pfr_table */ + return (RB_FIND(pfr_ktablehead, &pfr_ktables, + (struct pfr_ktable *)tbl)); +} + +int +pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af) +{ + struct pfr_kentry *ke = NULL; + int match; + + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) + kt = kt->pfrkt_root; + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (0); + + switch (af) { + case AF_INET: + pfr_sin.sin_addr.s_addr = a->addr32[0]; + ke = (struct pfr_kentry *)rn_match(&pfr_sin, kt->pfrkt_ip4); + if (ke && KENTRY_RNF_ROOT(ke)) + ke = NULL; + break; + case AF_INET6: + bcopy(a, &pfr_sin6.sin6_addr, sizeof(pfr_sin6.sin6_addr)); + ke = (struct pfr_kentry *)rn_match(&pfr_sin6, kt->pfrkt_ip6); + if (ke && KENTRY_RNF_ROOT(ke)) + ke = NULL; + break; + } + match = (ke && !ke->pfrke_not); + if (match) + kt->pfrkt_match++; + else + kt->pfrkt_nomatch++; + return (match); +} + +void +pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af, + u_int64_t len, int dir_out, int op_pass, int notrule) +{ + struct pfr_kentry *ke = NULL; + + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) + kt = kt->pfrkt_root; + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return; + + switch (af) { + case AF_INET: + pfr_sin.sin_addr.s_addr = a->addr32[0]; + ke = (struct pfr_kentry *)rn_match(&pfr_sin, kt->pfrkt_ip4); + if (ke && KENTRY_RNF_ROOT(ke)) + ke = NULL; + break; + case AF_INET6: + bcopy(a, &pfr_sin6.sin6_addr, sizeof(pfr_sin6.sin6_addr)); + ke = (struct pfr_kentry *)rn_match(&pfr_sin6, kt->pfrkt_ip6); + if (ke && KENTRY_RNF_ROOT(ke)) + ke = NULL; + break; + } + if ((ke == NULL || ke->pfrke_not) != notrule) { + if (op_pass != PFR_OP_PASS) + printf("pfr_update_stats: assertion failed.\n"); + op_pass = PFR_OP_XPASS; + } + kt->pfrkt_packets[dir_out][op_pass]++; + kt->pfrkt_bytes[dir_out][op_pass] += len; + if (ke != NULL && op_pass != PFR_OP_XPASS) { + ke->pfrke_packets[dir_out][op_pass]++; + ke->pfrke_bytes[dir_out][op_pass] += len; + } +} + +struct pfr_ktable * +pfr_attach_table(struct pf_ruleset *rs, char *name) +{ + struct pfr_ktable *kt, *rt; + struct pfr_table tbl; + struct pf_anchor *ac = rs->anchor; + + bzero(&tbl, sizeof(tbl)); + strlcpy(tbl.pfrt_name, name, sizeof(tbl.pfrt_name)); + if (ac != NULL) { + strlcpy(tbl.pfrt_anchor, ac->name, sizeof(tbl.pfrt_anchor)); + strlcpy(tbl.pfrt_ruleset, rs->name, sizeof(tbl.pfrt_ruleset)); + } + kt = pfr_lookup_table(&tbl); + if (kt == NULL) { + kt = pfr_create_ktable(&tbl, time_second, 1); + if (kt == NULL) + return (NULL); + if (ac != NULL) { + bzero(tbl.pfrt_anchor, sizeof(tbl.pfrt_anchor)); + bzero(tbl.pfrt_ruleset, sizeof(tbl.pfrt_ruleset)); + rt = pfr_lookup_table(&tbl); + if (rt == NULL) { + rt = pfr_create_ktable(&tbl, 0, 1); + if (rt == NULL) { + pfr_destroy_ktable(kt, 0); + return (NULL); + } + pfr_insert_ktable(rt); + } + kt->pfrkt_root = rt; + } + pfr_insert_ktable(kt); + } + if (!kt->pfrkt_refcnt[PFR_REFCNT_RULE]++) + pfr_setflags_ktable(kt, kt->pfrkt_flags|PFR_TFLAG_REFERENCED); + return (kt); +} + +void +pfr_detach_table(struct pfr_ktable *kt) +{ + if (kt->pfrkt_refcnt[PFR_REFCNT_RULE] <= 0) + printf("pfr_detach_table: refcount = %d.\n", + kt->pfrkt_refcnt[PFR_REFCNT_RULE]); + else if (!--kt->pfrkt_refcnt[PFR_REFCNT_RULE]) + pfr_setflags_ktable(kt, kt->pfrkt_flags&~PFR_TFLAG_REFERENCED); +} + +int +pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter, + struct pf_addr **raddr, struct pf_addr **rmask, sa_family_t af) +{ + struct pfr_kentry *ke, *ke2; + struct pf_addr *addr; + union sockaddr_union mask; + int idx = -1, use_counter = 0; + + addr = (af == AF_INET) ? (struct pf_addr *)&pfr_sin.sin_addr : + (struct pf_addr *)&pfr_sin6.sin6_addr; + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) + kt = kt->pfrkt_root; + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (-1); + + if (pidx != NULL) + idx = *pidx; + if (counter != NULL && idx >= 0) + use_counter = 1; + if (idx < 0) + idx = 0; + +_next_block: + ke = pfr_kentry_byidx(kt, idx, af); + if (ke == NULL) + return (1); + pfr_prepare_network(&pfr_mask, af, ke->pfrke_net); + *raddr = SUNION2PF(&ke->pfrke_sa, af); + *rmask = SUNION2PF(&pfr_mask, af); + + if (use_counter) { + /* is supplied address within block? */ + if (!PF_MATCHA(0, *raddr, *rmask, counter, af)) { + /* no, go to next block in table */ + idx++; + use_counter = 0; + goto _next_block; + } + PF_ACPY(addr, counter, af); + } else { + /* use first address of block */ + PF_ACPY(addr, *raddr, af); + } + + if (!KENTRY_NETWORK(ke)) { + /* this is a single IP address - no possible nested block */ + PF_ACPY(counter, addr, af); + *pidx = idx; + return (0); + } + for (;;) { + /* we don't want to use a nested block */ + ke2 = (struct pfr_kentry *)(af == AF_INET ? + rn_match(&pfr_sin, kt->pfrkt_ip4) : + rn_match(&pfr_sin6, kt->pfrkt_ip6)); + /* no need to check KENTRY_RNF_ROOT() here */ + if (ke2 == ke) { + /* lookup return the same block - perfect */ + PF_ACPY(counter, addr, af); + *pidx = idx; + return (0); + } + + /* we need to increase the counter past the nested block */ + pfr_prepare_network(&mask, AF_INET, ke2->pfrke_net); + PF_POOLMASK(addr, addr, SUNION2PF(&mask, af), &pfr_ffaddr, af); + PF_AINC(addr, af); + if (!PF_MATCHA(0, *raddr, *rmask, addr, af)) { + /* ok, we reached the end of our main block */ + /* go to next block in table */ + idx++; + use_counter = 0; + goto _next_block; + } + } +} + +struct pfr_kentry * +pfr_kentry_byidx(struct pfr_ktable *kt, int idx, int af) +{ + struct pfr_walktree w; + + bzero(&w, sizeof(w)); + w.pfrw_op = PFRW_POOL_GET; + w.pfrw_cnt = idx; + + switch (af) { + case AF_INET: + kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w); + return (w.pfrw_kentry); + case AF_INET6: + kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w); + return (w.pfrw_kentry); + default: + return (NULL); + } +} + +void +pfr_dynaddr_update(struct pfr_ktable *kt, struct pfi_dynaddr *dyn) +{ + struct pfr_walktree w; + int s; + + bzero(&w, sizeof(w)); + w.pfrw_op = PFRW_DYNADDR_UPDATE; + w.pfrw_dyn = dyn; + + s = splsoftnet(); + dyn->pfid_acnt4 = 0; + dyn->pfid_acnt6 = 0; + if (!dyn->pfid_af || dyn->pfid_af == AF_INET) + kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w); + if (!dyn->pfid_af || dyn->pfid_af == AF_INET6) + kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w); + splx(s); +} diff --git a/sys/net/pf/pfvar.h b/sys/net/pf/pfvar.h new file mode 100644 index 0000000000..657000ca82 --- /dev/null +++ b/sys/net/pf/pfvar.h @@ -0,0 +1,1562 @@ +/* $FreeBSD: src/sys/contrib/pf/net/pfvar.h,v 1.8 2004/08/12 13:59:44 mlaier Exp $ */ +/* $OpenBSD: pfvar.h,v 1.187 2004/03/22 04:54:18 mcbride Exp $ */ +/* add $OpenBSD: pfvar.h,v 1.194 2004/05/11 07:34:11 dhartmei Exp $ */ +/* $DragonFly: src/sys/net/pf/pfvar.h,v 1.1 2004/09/19 22:32:47 joerg Exp $ */ + +/* + * Copyright (c) 2004 The DragonFly Project. All rights reserved. + * + * Copyright (c) 2001 Daniel Hartmeier + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef _NET_PFVAR_H_ +#define _NET_PFVAR_H_ + +#include +#include +#include + +#include +#include +/* + * XXX + * If we include , we need _KERNEL definition. + * This makes pfctl compilation difficult. + */ +union sockaddr_union { + struct sockaddr sa; + struct sockaddr_in sin; + struct sockaddr_in6 sin6; +}; + +#include + +struct ip; + +#define PF_TCPS_PROXY_SRC ((TCP_NSTATES)+0) +#define PF_TCPS_PROXY_DST ((TCP_NSTATES)+1) + +enum { PF_INOUT, PF_IN, PF_OUT }; +enum { PF_LAN_EXT, PF_EXT_GWY, PF_ID }; +enum { PF_PASS, PF_DROP, PF_SCRUB, PF_NAT, PF_NONAT, + PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP }; +enum { PF_RULESET_SCRUB, PF_RULESET_FILTER, PF_RULESET_NAT, + PF_RULESET_BINAT, PF_RULESET_RDR, PF_RULESET_MAX }; +enum { PF_OP_NONE, PF_OP_IRG, PF_OP_EQ, PF_OP_NE, PF_OP_LT, + PF_OP_LE, PF_OP_GT, PF_OP_GE, PF_OP_XRG, PF_OP_RRG }; +enum { PF_DEBUG_NONE, PF_DEBUG_URGENT, PF_DEBUG_MISC, PF_DEBUG_NOISY }; +enum { PF_CHANGE_NONE, PF_CHANGE_ADD_HEAD, PF_CHANGE_ADD_TAIL, + PF_CHANGE_ADD_BEFORE, PF_CHANGE_ADD_AFTER, + PF_CHANGE_REMOVE, PF_CHANGE_GET_TICKET }; +/* + * Note about PFTM_*: real indices into pf_rule.timeout[] come before + * PFTM_MAX, special cases afterwards. See pf_state_expires(). + */ +enum { PFTM_TCP_FIRST_PACKET, PFTM_TCP_OPENING, PFTM_TCP_ESTABLISHED, + PFTM_TCP_CLOSING, PFTM_TCP_FIN_WAIT, PFTM_TCP_CLOSED, + PFTM_UDP_FIRST_PACKET, PFTM_UDP_SINGLE, PFTM_UDP_MULTIPLE, + PFTM_ICMP_FIRST_PACKET, PFTM_ICMP_ERROR_REPLY, + PFTM_OTHER_FIRST_PACKET, PFTM_OTHER_SINGLE, + PFTM_OTHER_MULTIPLE, PFTM_FRAG, PFTM_INTERVAL, + PFTM_ADAPTIVE_START, PFTM_ADAPTIVE_END, PFTM_SRC_NODE, + PFTM_MAX, PFTM_PURGE, PFTM_UNTIL_PACKET }; +enum { PF_NOPFROUTE, PF_FASTROUTE, PF_ROUTETO, PF_DUPTO, PF_REPLYTO }; +enum { PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS, PF_LIMIT_MAX }; +#define PF_POOL_IDMASK 0x0f +enum { PF_POOL_NONE, PF_POOL_BITMASK, PF_POOL_RANDOM, + PF_POOL_SRCHASH, PF_POOL_ROUNDROBIN }; +enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL, + PF_ADDR_TABLE }; +#define PF_POOL_TYPEMASK 0x0f +#define PF_POOL_STICKYADDR 0x20 +#define PF_WSCALE_FLAG 0x80 +#define PF_WSCALE_MASK 0x0f + +struct pf_addr { + union { + struct in_addr v4; + struct in6_addr v6; + u_int8_t addr8[16]; + u_int16_t addr16[8]; + u_int32_t addr32[4]; + } pfa; /* 128-bit address */ +#define v4 pfa.v4 +#define v6 pfa.v6 +#define addr8 pfa.addr8 +#define addr16 pfa.addr16 +#define addr32 pfa.addr32 +}; + +#define PF_TABLE_NAME_SIZE 32 + +#define PFI_AFLAG_NETWORK 0x01 +#define PFI_AFLAG_BROADCAST 0x02 +#define PFI_AFLAG_PEER 0x04 +#define PFI_AFLAG_MODEMASK 0x07 +#define PFI_AFLAG_NOALIAS 0x08 + +struct pf_addr_wrap { + union { + struct { + struct pf_addr addr; + struct pf_addr mask; + } a; + char ifname[IFNAMSIZ]; + char tblname[PF_TABLE_NAME_SIZE]; + } v; + union { + struct pfi_dynaddr *dyn; + struct pfr_ktable *tbl; + int dyncnt; + int tblcnt; + } p; + u_int8_t type; /* PF_ADDR_* */ + u_int8_t iflags; /* PFI_AFLAG_* */ +}; + +#ifdef _KERNEL + +struct pfi_dynaddr { + struct pf_addr pfid_addr4; + struct pf_addr pfid_mask4; + struct pf_addr pfid_addr6; + struct pf_addr pfid_mask6; + struct pfr_ktable *pfid_kt; + struct pfi_kif *pfid_kif; + void *pfid_hook_cookie; + int pfid_net; /* optional mask, or 128 */ + int pfid_acnt4; /* address count, IPv4 */ + int pfid_acnt6; /* address count, IPv6 */ + sa_family_t pfid_af; /* rule address family */ + u_int8_t pfid_iflags; /* PFI_AFLAG_* */ +}; + +/* + * Address manipulation macros + */ + +#define splsoftnet() splnet() + +/* XXX correct values for zinit? */ +#define ZONE_CREATE(var, type, desc) \ + var = zinit(desc, sizeof(type), 1, 0, 1); \ + if (var == NULL) break +#define ZONE_DESTROY(a) /* XXX */ + +#define pool_get(p, f) zalloc(*(p)) +#define pool_put(p, o) zfree(*(p), (o)) + +#define PF_NAME "pf" + +#define PF_MODVER 1 +#define PFLOG_MODVER 1 +#define PFSYNC_MODVER 1 + +#define PFLOG_MINVER 1 +#define PFLOG_PREFVER PFLOG_MODVER +#define PFLOG_MAXVER 1 +#define PFSYNC_MINVER 1 +#define PFSYNC_PREFVER PFSYNC_MODVER +#define PFSYNC_MAXVER 1 + +/* prototyped for pf_subr.c */ +struct hook_desc { + TAILQ_ENTRY(hook_desc) hd_list; + void (*hd_fn)(void *); + void *hd_arg; +}; +TAILQ_HEAD(hook_desc_head, hook_desc); + +void *hook_establish(struct hook_desc_head *, int, void (*)(void *), void *); +void hook_disestablish(struct hook_desc_head *, void *); +void dohooks(struct hook_desc_head *, int); + +#define HOOK_REMOVE 0x01 +#define HOOK_FREE 0x02 + +#ifdef INET +#ifndef INET6 +#define PF_INET_ONLY +#endif /* ! INET6 */ +#endif /* INET */ + +#ifdef INET6 +#ifndef INET +#define PF_INET6_ONLY +#endif /* ! INET */ +#endif /* INET6 */ + +#ifdef INET +#ifdef INET6 +#define PF_INET_INET6 +#endif /* INET6 */ +#endif /* INET */ + +#else + +#define PF_INET_INET6 + +#endif /* _KERNEL */ + +/* Both IPv4 and IPv6 */ +#ifdef PF_INET_INET6 + +#define PF_AEQ(a, b, c) \ + ((c == AF_INET && (a)->addr32[0] == (b)->addr32[0]) || \ + ((a)->addr32[3] == (b)->addr32[3] && \ + (a)->addr32[2] == (b)->addr32[2] && \ + (a)->addr32[1] == (b)->addr32[1] && \ + (a)->addr32[0] == (b)->addr32[0])) \ + +#define PF_ANEQ(a, b, c) \ + ((c == AF_INET && (a)->addr32[0] != (b)->addr32[0]) || \ + ((a)->addr32[3] != (b)->addr32[3] || \ + (a)->addr32[2] != (b)->addr32[2] || \ + (a)->addr32[1] != (b)->addr32[1] || \ + (a)->addr32[0] != (b)->addr32[0])) \ + +#define PF_AZERO(a, c) \ + ((c == AF_INET && !(a)->addr32[0]) || \ + (!(a)->addr32[0] && !(a)->addr32[1] && \ + !(a)->addr32[2] && !(a)->addr32[3] )) \ + +#define PF_MATCHA(n, a, m, b, f) \ + pf_match_addr(n, a, m, b, f) + +#define PF_ACPY(a, b, f) \ + pf_addrcpy(a, b, f) + +#define PF_AINC(a, f) \ + pf_addr_inc(a, f) + +#define PF_POOLMASK(a, b, c, d, f) \ + pf_poolmask(a, b, c, d, f) + +#else + +/* Just IPv6 */ + +#ifdef PF_INET6_ONLY + +#define PF_AEQ(a, b, c) \ + ((a)->addr32[3] == (b)->addr32[3] && \ + (a)->addr32[2] == (b)->addr32[2] && \ + (a)->addr32[1] == (b)->addr32[1] && \ + (a)->addr32[0] == (b)->addr32[0]) \ + +#define PF_ANEQ(a, b, c) \ + ((a)->addr32[3] != (b)->addr32[3] || \ + (a)->addr32[2] != (b)->addr32[2] || \ + (a)->addr32[1] != (b)->addr32[1] || \ + (a)->addr32[0] != (b)->addr32[0]) \ + +#define PF_AZERO(a, c) \ + (!(a)->addr32[0] && \ + !(a)->addr32[1] && \ + !(a)->addr32[2] && \ + !(a)->addr32[3] ) \ + +#define PF_MATCHA(n, a, m, b, f) \ + pf_match_addr(n, a, m, b, f) + +#define PF_ACPY(a, b, f) \ + pf_addrcpy(a, b, f) + +#define PF_AINC(a, f) \ + pf_addr_inc(a, f) + +#define PF_POOLMASK(a, b, c, d, f) \ + pf_poolmask(a, b, c, d, f) + +#else + +/* Just IPv4 */ +#ifdef PF_INET_ONLY + +#define PF_AEQ(a, b, c) \ + ((a)->addr32[0] == (b)->addr32[0]) + +#define PF_ANEQ(a, b, c) \ + ((a)->addr32[0] != (b)->addr32[0]) + +#define PF_AZERO(a, c) \ + (!(a)->addr32[0]) + +#define PF_MATCHA(n, a, m, b, f) \ + pf_match_addr(n, a, m, b, f) + +#define PF_ACPY(a, b, f) \ + (a)->v4.s_addr = (b)->v4.s_addr + +#define PF_AINC(a, f) \ + do { \ + (a)->addr32[0] = htonl(ntohl((a)->addr32[0]) + 1); \ + } while (0) + +#define PF_POOLMASK(a, b, c, d, f) \ + do { \ + (a)->addr32[0] = ((b)->addr32[0] & (c)->addr32[0]) | \ + (((c)->addr32[0] ^ 0xffffffff ) & (d)->addr32[0]); \ + } while (0) + +#endif /* PF_INET_ONLY */ +#endif /* PF_INET6_ONLY */ +#endif /* PF_INET_INET6 */ + +#define PF_MISMATCHAW(aw, x, af, not) \ + ( \ + (((aw)->type == PF_ADDR_NOROUTE && \ + pf_routable((x), (af))) || \ + ((aw)->type == PF_ADDR_TABLE && \ + !pfr_match_addr((aw)->p.tbl, (x), (af))) || \ + ((aw)->type == PF_ADDR_DYNIFTL && \ + !pfi_match_addr((aw)->p.dyn, (x), (af))) || \ + ((aw)->type == PF_ADDR_ADDRMASK && \ + !PF_AZERO(&(aw)->v.a.mask, (af)) && \ + !PF_MATCHA(0, &(aw)->v.a.addr, \ + &(aw)->v.a.mask, (x), (af)))) != \ + (not) \ + ) + +struct pf_rule_uid { + uid_t uid[2]; + u_int8_t op; +}; + +struct pf_rule_gid { + uid_t gid[2]; + u_int8_t op; +}; + +struct pf_rule_addr { + struct pf_addr_wrap addr; + u_int16_t port[2]; + u_int8_t not; + u_int8_t port_op; +}; + +struct pf_pooladdr { + struct pf_addr_wrap addr; + TAILQ_ENTRY(pf_pooladdr) entries; + char ifname[IFNAMSIZ]; + struct pfi_kif *kif; +}; + +TAILQ_HEAD(pf_palist, pf_pooladdr); + +struct pf_poolhashkey { + union { + u_int8_t key8[16]; + u_int16_t key16[8]; + u_int32_t key32[4]; + } pfk; /* 128-bit hash key */ +#define key8 pfk.key8 +#define key16 pfk.key16 +#define key32 pfk.key32 +}; + +struct pf_pool { + struct pf_palist list; + struct pf_pooladdr *cur; + struct pf_poolhashkey key; + struct pf_addr counter; + int tblidx; + u_int16_t proxy_port[2]; + u_int8_t port_op; + u_int8_t opts; +}; + + +/* A packed Operating System description for fingerprinting */ +typedef u_int32_t pf_osfp_t; +#define PF_OSFP_ANY ((pf_osfp_t)0) +#define PF_OSFP_UNKNOWN ((pf_osfp_t)-1) +#define PF_OSFP_NOMATCH ((pf_osfp_t)-2) + +struct pf_osfp_entry { + SLIST_ENTRY(pf_osfp_entry) fp_entry; + pf_osfp_t fp_os; + int fp_enflags; +#define PF_OSFP_EXPANDED 0x001 /* expanded entry */ +#define PF_OSFP_GENERIC 0x002 /* generic signature */ +#define PF_OSFP_NODETAIL 0x004 /* no p0f details */ +#define PF_OSFP_LEN 32 + char fp_class_nm[PF_OSFP_LEN]; + char fp_version_nm[PF_OSFP_LEN]; + char fp_subtype_nm[PF_OSFP_LEN]; +}; +#define PF_OSFP_ENTRY_EQ(a, b) \ + ((a)->fp_os == (b)->fp_os && \ + memcmp((a)->fp_class_nm, (b)->fp_class_nm, PF_OSFP_LEN) == 0 && \ + memcmp((a)->fp_version_nm, (b)->fp_version_nm, PF_OSFP_LEN) == 0 && \ + memcmp((a)->fp_subtype_nm, (b)->fp_subtype_nm, PF_OSFP_LEN) == 0) + +/* handle pf_osfp_t packing */ +#define _FP_RESERVED_BIT 1 /* For the special negative #defines */ +#define _FP_UNUSED_BITS 1 +#define _FP_CLASS_BITS 10 /* OS Class (Windows, Linux) */ +#define _FP_VERSION_BITS 10 /* OS version (95, 98, NT, 2.4.54, 3.2) */ +#define _FP_SUBTYPE_BITS 10 /* patch level (NT SP4, SP3, ECN patch) */ +#define PF_OSFP_UNPACK(osfp, class, version, subtype) do { \ + (class) = ((osfp) >> (_FP_VERSION_BITS+_FP_SUBTYPE_BITS)) & \ + ((1 << _FP_CLASS_BITS) - 1); \ + (version) = ((osfp) >> _FP_SUBTYPE_BITS) & \ + ((1 << _FP_VERSION_BITS) - 1);\ + (subtype) = (osfp) & ((1 << _FP_SUBTYPE_BITS) - 1); \ +} while(0) +#define PF_OSFP_PACK(osfp, class, version, subtype) do { \ + (osfp) = ((class) & ((1 << _FP_CLASS_BITS) - 1)) << (_FP_VERSION_BITS \ + + _FP_SUBTYPE_BITS); \ + (osfp) |= ((version) & ((1 << _FP_VERSION_BITS) - 1)) << \ + _FP_SUBTYPE_BITS; \ + (osfp) |= (subtype) & ((1 << _FP_SUBTYPE_BITS) - 1); \ +} while(0) + +/* the fingerprint of an OSes TCP SYN packet */ +typedef u_int64_t pf_tcpopts_t; +struct pf_os_fingerprint { + SLIST_HEAD(pf_osfp_enlist, pf_osfp_entry) fp_oses; /* list of matches */ + pf_tcpopts_t fp_tcpopts; /* packed TCP options */ + u_int16_t fp_wsize; /* TCP window size */ + u_int16_t fp_psize; /* ip->ip_len */ + u_int16_t fp_mss; /* TCP MSS */ + u_int16_t fp_flags; +#define PF_OSFP_WSIZE_MOD 0x0001 /* Window modulus */ +#define PF_OSFP_WSIZE_DC 0x0002 /* Window don't care */ +#define PF_OSFP_WSIZE_MSS 0x0004 /* Window multiple of MSS */ +#define PF_OSFP_WSIZE_MTU 0x0008 /* Window multiple of MTU */ +#define PF_OSFP_PSIZE_MOD 0x0010 /* packet size modulus */ +#define PF_OSFP_PSIZE_DC 0x0020 /* packet size don't care */ +#define PF_OSFP_WSCALE 0x0040 /* TCP window scaling */ +#define PF_OSFP_WSCALE_MOD 0x0080 /* TCP window scale modulus */ +#define PF_OSFP_WSCALE_DC 0x0100 /* TCP window scale dont-care */ +#define PF_OSFP_MSS 0x0200 /* TCP MSS */ +#define PF_OSFP_MSS_MOD 0x0400 /* TCP MSS modulus */ +#define PF_OSFP_MSS_DC 0x0800 /* TCP MSS dont-care */ +#define PF_OSFP_DF 0x1000 /* IPv4 don't fragment bit */ +#define PF_OSFP_TS0 0x2000 /* Zero timestamp */ + u_int8_t fp_optcnt; /* TCP option count */ + u_int8_t fp_wscale; /* TCP window scaling */ + u_int8_t fp_ttl; /* IPv4 TTL */ +#define PF_OSFP_MAXTTL_OFFSET 40 +/* TCP options packing */ +#define PF_OSFP_TCPOPT_NOP 0x0 /* TCP NOP option */ +#define PF_OSFP_TCPOPT_WSCALE 0x1 /* TCP window scaling option */ +#define PF_OSFP_TCPOPT_MSS 0x2 /* TCP max segment size opt */ +#define PF_OSFP_TCPOPT_SACK 0x3 /* TCP SACK OK option */ +#define PF_OSFP_TCPOPT_TS 0x4 /* TCP timestamp option */ +#define PF_OSFP_TCPOPT_BITS 3 /* bits used by each option */ +#define PF_OSFP_MAX_OPTS \ + (sizeof(((struct pf_os_fingerprint *)0)->fp_tcpopts) * 8) \ + / PF_OSFP_TCPOPT_BITS + + SLIST_ENTRY(pf_os_fingerprint) fp_next; +}; + +struct pf_osfp_ioctl { + struct pf_osfp_entry fp_os; + pf_tcpopts_t fp_tcpopts; /* packed TCP options */ + u_int16_t fp_wsize; /* TCP window size */ + u_int16_t fp_psize; /* ip->ip_len */ + u_int16_t fp_mss; /* TCP MSS */ + u_int16_t fp_flags; + u_int8_t fp_optcnt; /* TCP option count */ + u_int8_t fp_wscale; /* TCP window scaling */ + u_int8_t fp_ttl; /* IPv4 TTL */ + + int fp_getnum; /* DIOCOSFPGET number */ +}; + + +union pf_rule_ptr { + struct pf_rule *ptr; + u_int32_t nr; +}; + +struct pf_rule { + struct pf_rule_addr src; + struct pf_rule_addr dst; +#define PF_SKIP_IFP 0 +#define PF_SKIP_DIR 1 +#define PF_SKIP_AF 2 +#define PF_SKIP_PROTO 3 +#define PF_SKIP_SRC_ADDR 4 +#define PF_SKIP_SRC_PORT 5 +#define PF_SKIP_DST_ADDR 6 +#define PF_SKIP_DST_PORT 7 +#define PF_SKIP_COUNT 8 + union pf_rule_ptr skip[PF_SKIP_COUNT]; +#define PF_RULE_LABEL_SIZE 64 + char label[PF_RULE_LABEL_SIZE]; +#define PF_QNAME_SIZE 16 + char ifname[IFNAMSIZ]; + char qname[PF_QNAME_SIZE]; + char pqname[PF_QNAME_SIZE]; +#define PF_ANCHOR_NAME_SIZE 16 + char anchorname[PF_ANCHOR_NAME_SIZE]; +#define PF_TAG_NAME_SIZE 16 + char tagname[PF_TAG_NAME_SIZE]; + char match_tagname[PF_TAG_NAME_SIZE]; + + TAILQ_ENTRY(pf_rule) entries; + struct pf_pool rpool; + + u_int64_t evaluations; + u_int64_t packets; + u_int64_t bytes; + + struct pfi_kif *kif; + struct pf_anchor *anchor; + + pf_osfp_t os_fingerprint; + + u_int32_t timeout[PFTM_MAX]; + u_int32_t states; + u_int32_t max_states; + u_int32_t src_nodes; + u_int32_t max_src_nodes; + u_int32_t max_src_states; + u_int32_t qid; + u_int32_t pqid; + u_int32_t rt_listid; + u_int32_t nr; + + u_int16_t return_icmp; + u_int16_t return_icmp6; + u_int16_t max_mss; + u_int16_t tag; + u_int16_t match_tag; + + struct pf_rule_uid uid; + struct pf_rule_gid gid; + + u_int32_t rule_flag; + u_int8_t action; + u_int8_t direction; + u_int8_t log; + u_int8_t quick; + u_int8_t ifnot; + u_int8_t match_tag_not; + u_int8_t natpass; + +#define PF_STATE_NORMAL 0x1 +#define PF_STATE_MODULATE 0x2 +#define PF_STATE_SYNPROXY 0x3 + u_int8_t keep_state; + sa_family_t af; + u_int8_t proto; + u_int8_t type; + u_int8_t code; + u_int8_t flags; + u_int8_t flagset; + u_int8_t min_ttl; + u_int8_t allow_opts; + u_int8_t rt; + u_int8_t return_ttl; + u_int8_t tos; +}; + +/* rule flags */ +#define PFRULE_DROP 0x0000 +#define PFRULE_RETURNRST 0x0001 +#define PFRULE_FRAGMENT 0x0002 +#define PFRULE_RETURNICMP 0x0004 +#define PFRULE_RETURN 0x0008 +#define PFRULE_NOSYNC 0x0010 +#define PFRULE_SRCTRACK 0x0020 /* track source states */ +#define PFRULE_RULESRCTRACK 0x0040 /* per rule */ + +/* scrub flags */ +#define PFRULE_NODF 0x0100 +#define PFRULE_FRAGCROP 0x0200 /* non-buffering frag cache */ +#define PFRULE_FRAGDROP 0x0400 /* drop funny fragments */ +#define PFRULE_RANDOMID 0x0800 +#define PFRULE_REASSEMBLE_TCP 0x1000 + +/* rule flags again */ +#define PFRULE_IFBOUND 0x00010000 /* if-bound */ +#define PFRULE_GRBOUND 0x00020000 /* group-bound */ + +#define PFSTATE_HIWAT 10000 /* default state table size */ + +struct pf_src_node { + RB_ENTRY(pf_src_node) entry; + struct pf_addr addr; + struct pf_addr raddr; + union pf_rule_ptr rule; + struct pfi_kif *kif; + u_int32_t bytes; + u_int32_t packets; + u_int32_t states; + u_int32_t creation; + u_int32_t expire; + sa_family_t af; + u_int8_t ruletype; +}; + +#define PFSNODE_HIWAT 10000 /* default source node table size */ + +struct pf_state_scrub { + u_int16_t pfss_flags; +#define PFSS_TIMESTAMP 0x0001 /* modulate timestamp */ + u_int8_t pfss_ttl; /* stashed TTL */ + u_int8_t pad; + u_int32_t pfss_ts_mod; /* timestamp modulation */ +}; + +struct pf_state_host { + struct pf_addr addr; + u_int16_t port; + u_int16_t pad; +}; + +struct pf_state_peer { + u_int32_t seqlo; /* Max sequence number sent */ + u_int32_t seqhi; /* Max the other end ACKd + win */ + u_int32_t seqdiff; /* Sequence number modulator */ + u_int16_t max_win; /* largest window (pre scaling) */ + u_int8_t state; /* active state level */ + u_int8_t wscale; /* window scaling factor */ + u_int16_t mss; /* Maximum segment size option */ + struct pf_state_scrub *scrub; /* state is scrubbed */ +}; + +TAILQ_HEAD(pf_state_queue, pf_state); + +struct pf_state { + u_int64_t id; + union { + struct { + RB_ENTRY(pf_state) entry_lan_ext; + RB_ENTRY(pf_state) entry_ext_gwy; + RB_ENTRY(pf_state) entry_id; + TAILQ_ENTRY(pf_state) entry_updates; + struct pfi_kif *kif; + } s; + char ifname[IFNAMSIZ]; + } u; + struct pf_state_host lan; + struct pf_state_host gwy; + struct pf_state_host ext; + struct pf_state_peer src; + struct pf_state_peer dst; + union pf_rule_ptr rule; + union pf_rule_ptr anchor; + union pf_rule_ptr nat_rule; + struct pf_addr rt_addr; + struct pfi_kif *rt_kif; + struct pf_src_node *src_node; + struct pf_src_node *nat_src_node; + u_int32_t creation; + u_int32_t expire; + u_int32_t pfsync_time; + u_int32_t packets[2]; + u_int32_t bytes[2]; + u_int32_t creatorid; + sa_family_t af; + u_int8_t proto; + u_int8_t direction; + u_int8_t log; + u_int8_t allow_opts; + u_int8_t timeout; + u_int8_t sync_flags; +#define PFSTATE_NOSYNC 0x01 +#define PFSTATE_FROMSYNC 0x02 + u_int8_t pad; +}; + +TAILQ_HEAD(pf_rulequeue, pf_rule); + +struct pf_anchor; + +struct pf_ruleset { + TAILQ_ENTRY(pf_ruleset) entries; +#define PF_RULESET_NAME_SIZE 16 + char name[PF_RULESET_NAME_SIZE]; + struct { + struct pf_rulequeue queues[2]; + struct { + struct pf_rulequeue *ptr; + u_int32_t ticket; + int open; + } active, inactive; + } rules[PF_RULESET_MAX]; + struct pf_anchor *anchor; + u_int32_t tticket; + int tables; + int topen; +}; + +TAILQ_HEAD(pf_rulesetqueue, pf_ruleset); + +struct pf_anchor { + TAILQ_ENTRY(pf_anchor) entries; + char name[PF_ANCHOR_NAME_SIZE]; + struct pf_rulesetqueue rulesets; + int tables; +}; + +TAILQ_HEAD(pf_anchorqueue, pf_anchor); + +#define PF_RESERVED_ANCHOR "_pf" +#define PF_INTERFACE_RULESET "_if" + +#define PFR_TFLAG_PERSIST 0x00000001 +#define PFR_TFLAG_CONST 0x00000002 +#define PFR_TFLAG_ACTIVE 0x00000004 +#define PFR_TFLAG_INACTIVE 0x00000008 +#define PFR_TFLAG_REFERENCED 0x00000010 +#define PFR_TFLAG_REFDANCHOR 0x00000020 +#define PFR_TFLAG_USRMASK 0x00000003 +#define PFR_TFLAG_SETMASK 0x0000003C +#define PFR_TFLAG_ALLMASK 0x0000003F + +struct pfr_table { + char pfrt_anchor[PF_ANCHOR_NAME_SIZE]; + char pfrt_ruleset[PF_RULESET_NAME_SIZE]; + char pfrt_name[PF_TABLE_NAME_SIZE]; + u_int32_t pfrt_flags; + u_int8_t pfrt_fback; +}; + +enum { PFR_FB_NONE, PFR_FB_MATCH, PFR_FB_ADDED, PFR_FB_DELETED, + PFR_FB_CHANGED, PFR_FB_CLEARED, PFR_FB_DUPLICATE, + PFR_FB_NOTMATCH, PFR_FB_CONFLICT, PFR_FB_MAX }; + +struct pfr_addr { + union { + struct in_addr _pfra_ip4addr; + struct in6_addr _pfra_ip6addr; + } pfra_u; + u_int8_t pfra_af; + u_int8_t pfra_net; + u_int8_t pfra_not; + u_int8_t pfra_fback; +}; +#define pfra_ip4addr pfra_u._pfra_ip4addr +#define pfra_ip6addr pfra_u._pfra_ip6addr + +enum { PFR_DIR_IN, PFR_DIR_OUT, PFR_DIR_MAX }; +enum { PFR_OP_BLOCK, PFR_OP_PASS, PFR_OP_ADDR_MAX, PFR_OP_TABLE_MAX }; +#define PFR_OP_XPASS PFR_OP_ADDR_MAX + +struct pfr_astats { + struct pfr_addr pfras_a; + u_int64_t pfras_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; + u_int64_t pfras_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; + long pfras_tzero; +}; + +enum { PFR_REFCNT_RULE, PFR_REFCNT_ANCHOR, PFR_REFCNT_MAX }; + +struct pfr_tstats { + struct pfr_table pfrts_t; + u_int64_t pfrts_packets[PFR_DIR_MAX][PFR_OP_TABLE_MAX]; + u_int64_t pfrts_bytes[PFR_DIR_MAX][PFR_OP_TABLE_MAX]; + u_int64_t pfrts_match; + u_int64_t pfrts_nomatch; + long pfrts_tzero; + int pfrts_cnt; + int pfrts_refcnt[PFR_REFCNT_MAX]; +}; +#define pfrts_name pfrts_t.pfrt_name +#define pfrts_flags pfrts_t.pfrt_flags + +SLIST_HEAD(pfr_kentryworkq, pfr_kentry); +struct pfr_kentry { + struct radix_node pfrke_node[2]; + union sockaddr_union pfrke_sa; + u_int64_t pfrke_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; + u_int64_t pfrke_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; + SLIST_ENTRY(pfr_kentry) pfrke_workq; + long pfrke_tzero; + u_int8_t pfrke_af; + u_int8_t pfrke_net; + u_int8_t pfrke_not; + u_int8_t pfrke_mark; +}; + +SLIST_HEAD(pfr_ktableworkq, pfr_ktable); +RB_HEAD(pfr_ktablehead, pfr_ktable); +struct pfr_ktable { + struct pfr_tstats pfrkt_ts; + RB_ENTRY(pfr_ktable) pfrkt_tree; + SLIST_ENTRY(pfr_ktable) pfrkt_workq; + struct radix_node_head *pfrkt_ip4; + struct radix_node_head *pfrkt_ip6; + struct pfr_ktable *pfrkt_shadow; + struct pfr_ktable *pfrkt_root; + struct pf_ruleset *pfrkt_rs; + long pfrkt_larg; + int pfrkt_nflags; +}; +#define pfrkt_t pfrkt_ts.pfrts_t +#define pfrkt_name pfrkt_t.pfrt_name +#define pfrkt_anchor pfrkt_t.pfrt_anchor +#define pfrkt_ruleset pfrkt_t.pfrt_ruleset +#define pfrkt_flags pfrkt_t.pfrt_flags +#define pfrkt_cnt pfrkt_ts.pfrts_cnt +#define pfrkt_refcnt pfrkt_ts.pfrts_refcnt +#define pfrkt_packets pfrkt_ts.pfrts_packets +#define pfrkt_bytes pfrkt_ts.pfrts_bytes +#define pfrkt_match pfrkt_ts.pfrts_match +#define pfrkt_nomatch pfrkt_ts.pfrts_nomatch +#define pfrkt_tzero pfrkt_ts.pfrts_tzero + +RB_HEAD(pf_state_tree_lan_ext, pf_state); +RB_PROTOTYPE(pf_state_tree_lan_ext, pf_state, + u.s.entry_lan_ext, pf_state_compare_lan_ext); + +RB_HEAD(pf_state_tree_ext_gwy, pf_state); +RB_PROTOTYPE(pf_state_tree_ext_gwy, pf_state, + u.s.entry_ext_gwy, pf_state_compare_ext_gwy); + +struct pfi_if { + char pfif_name[IFNAMSIZ]; + u_int64_t pfif_packets[2][2][2]; + u_int64_t pfif_bytes[2][2][2]; + u_int64_t pfif_addcnt; + u_int64_t pfif_delcnt; + long pfif_tzero; + int pfif_states; + int pfif_rules; + int pfif_flags; +}; + +TAILQ_HEAD(pfi_grouphead, pfi_kif); +TAILQ_HEAD(pfi_statehead, pfi_kif); +RB_HEAD(pfi_ifhead, pfi_kif); +struct pfi_kif { + struct pfi_if pfik_if; + RB_ENTRY(pfi_kif) pfik_tree; + struct pf_state_tree_lan_ext pfik_lan_ext; + struct pf_state_tree_ext_gwy pfik_ext_gwy; + struct pfi_grouphead pfik_grouphead; + TAILQ_ENTRY(pfi_kif) pfik_instances; + TAILQ_ENTRY(pfi_kif) pfik_w_states; + struct hook_desc_head *pfik_ah_head; + void *pfik_ah_cookie; + struct pfi_kif *pfik_parent; + struct ifnet *pfik_ifp; + int pfik_states; + int pfik_rules; +}; +#define pfik_name pfik_if.pfif_name +#define pfik_packets pfik_if.pfif_packets +#define pfik_bytes pfik_if.pfif_bytes +#define pfik_tzero pfik_if.pfif_tzero +#define pfik_flags pfik_if.pfif_flags +#define pfik_addcnt pfik_if.pfif_addcnt +#define pfik_delcnt pfik_if.pfif_delcnt +#define pfik_states pfik_if.pfif_states +#define pfik_rules pfik_if.pfif_rules + +#define PFI_IFLAG_GROUP 0x0001 /* group of interfaces */ +#define PFI_IFLAG_INSTANCE 0x0002 /* single instance */ +#define PFI_IFLAG_CLONABLE 0x0010 /* clonable group */ +#define PFI_IFLAG_DYNAMIC 0x0020 /* dynamic group */ +#define PFI_IFLAG_ATTACHED 0x0040 /* interface attached */ +#define PFI_IFLAG_PLACEHOLDER 0x8000 /* placeholder group/interface */ + +struct pf_pdesc { + u_int64_t tot_len; /* Make Mickey money */ + union { + struct tcphdr *tcp; + struct udphdr *udp; + struct icmp *icmp; +#ifdef INET6 + struct icmp6_hdr *icmp6; +#endif /* INET6 */ + void *any; + } hdr; + struct pf_addr baddr; /* address before translation */ + struct pf_addr naddr; /* address after translation */ + struct pf_rule *nat_rule; /* nat/rdr rule applied to packet */ + struct pf_addr *src; + struct pf_addr *dst; + u_int16_t *ip_sum; + u_int32_t p_len; /* total length of payload */ + u_int16_t flags; /* Let SCRUB trigger behavior in + * state code. Easier than tags */ +#define PFDESC_TCP_NORM 0x0001 /* TCP shall be statefully scrubbed */ + sa_family_t af; + u_int8_t proto; + u_int8_t tos; +}; + +/* flags for RDR options */ +#define PF_DPORT_RANGE 0x01 /* Dest port uses range */ +#define PF_RPORT_RANGE 0x02 /* RDR'ed port uses range */ + +/* Reasons code for passing/dropping a packet */ +#define PFRES_MATCH 0 /* Explicit match of a rule */ +#define PFRES_BADOFF 1 /* Bad offset for pull_hdr */ +#define PFRES_FRAG 2 /* Dropping following fragment */ +#define PFRES_SHORT 3 /* Dropping short packet */ +#define PFRES_NORM 4 /* Dropping by normalizer */ +#define PFRES_MEMORY 5 /* Dropped due to lacking mem */ +#define PFRES_MAX 6 /* total+1 */ + +#define PFRES_NAMES { \ + "match", \ + "bad-offset", \ + "fragment", \ + "short", \ + "normalize", \ + "memory", \ + NULL \ +} + +/* UDP state enumeration */ +#define PFUDPS_NO_TRAFFIC 0 +#define PFUDPS_SINGLE 1 +#define PFUDPS_MULTIPLE 2 + +#define PFUDPS_NSTATES 3 /* number of state levels */ + +#define PFUDPS_NAMES { \ + "NO_TRAFFIC", \ + "SINGLE", \ + "MULTIPLE", \ + NULL \ +} + +/* Other protocol state enumeration */ +#define PFOTHERS_NO_TRAFFIC 0 +#define PFOTHERS_SINGLE 1 +#define PFOTHERS_MULTIPLE 2 + +#define PFOTHERS_NSTATES 3 /* number of state levels */ + +#define PFOTHERS_NAMES { \ + "NO_TRAFFIC", \ + "SINGLE", \ + "MULTIPLE", \ + NULL \ +} + +#define FCNT_STATE_SEARCH 0 +#define FCNT_STATE_INSERT 1 +#define FCNT_STATE_REMOVALS 2 +#define FCNT_MAX 3 + +#define SCNT_SRC_NODE_SEARCH 0 +#define SCNT_SRC_NODE_INSERT 1 +#define SCNT_SRC_NODE_REMOVALS 2 +#define SCNT_MAX 3 + +#define ACTION_SET(a, x) \ + do { \ + if ((a) != NULL) \ + *(a) = (x); \ + } while (0) + +#define REASON_SET(a, x) \ + do { \ + if ((a) != NULL) \ + *(a) = (x); \ + if (x < PFRES_MAX) \ + pf_status.counters[x]++; \ + } while (0) + +struct pf_status { + u_int64_t counters[PFRES_MAX]; + u_int64_t fcounters[FCNT_MAX]; + u_int64_t scounters[SCNT_MAX]; + u_int64_t pcounters[2][2][3]; + u_int64_t bcounters[2][2]; + u_int64_t stateid; + u_int32_t running; + u_int32_t states; + u_int32_t src_nodes; + u_int32_t since; + u_int32_t debug; + u_int32_t hostid; + char ifname[IFNAMSIZ]; +}; + +struct cbq_opts { + u_int minburst; + u_int maxburst; + u_int pktsize; + u_int maxpktsize; + u_int ns_per_byte; + u_int maxidle; + int minidle; + u_int offtime; + int flags; +}; + +struct priq_opts { + int flags; +}; + +struct hfsc_opts { + /* real-time service curve */ + u_int rtsc_m1; /* slope of the 1st segment in bps */ + u_int rtsc_d; /* the x-projection of m1 in msec */ + u_int rtsc_m2; /* slope of the 2nd segment in bps */ + /* link-sharing service curve */ + u_int lssc_m1; + u_int lssc_d; + u_int lssc_m2; + /* upper-limit service curve */ + u_int ulsc_m1; + u_int ulsc_d; + u_int ulsc_m2; + int flags; +}; + +struct pf_altq { + char ifname[IFNAMSIZ]; + + void *altq_disc; /* discipline-specific state */ + TAILQ_ENTRY(pf_altq) entries; + + /* scheduler spec */ + u_int8_t scheduler; /* scheduler type */ + u_int16_t tbrsize; /* tokenbucket regulator size */ + u_int32_t ifbandwidth; /* interface bandwidth */ + + /* queue spec */ + char qname[PF_QNAME_SIZE]; /* queue name */ + char parent[PF_QNAME_SIZE]; /* parent name */ + u_int32_t parent_qid; /* parent queue id */ + u_int32_t bandwidth; /* queue bandwidth */ + u_int8_t priority; /* priority */ + u_int16_t qlimit; /* queue size limit */ + u_int16_t flags; /* misc flags */ + union { + struct cbq_opts cbq_opts; + struct priq_opts priq_opts; + struct hfsc_opts hfsc_opts; + } pq_u; + + u_int32_t qid; /* return value */ +}; + +struct pf_tagname { + TAILQ_ENTRY(pf_tagname) entries; + char name[PF_TAG_NAME_SIZE]; + u_int16_t tag; + int ref; +}; + +#define PFFRAG_FRENT_HIWAT 5000 /* Number of fragment entries */ +#define PFFRAG_FRAG_HIWAT 1000 /* Number of fragmented packets */ +#define PFFRAG_FRCENT_HIWAT 50000 /* Number of fragment cache entries */ +#define PFFRAG_FRCACHE_HIWAT 10000 /* Number of fragment descriptors */ + +/* + * ioctl parameter structures + */ + +struct pfioc_pooladdr { + u_int32_t action; + u_int32_t ticket; + u_int32_t nr; + u_int32_t r_num; + u_int8_t r_action; + u_int8_t r_last; + u_int8_t af; + char anchor[PF_ANCHOR_NAME_SIZE]; + char ruleset[PF_RULESET_NAME_SIZE]; + struct pf_pooladdr addr; +}; + +struct pfioc_rule { + u_int32_t action; + u_int32_t ticket; + u_int32_t pool_ticket; + u_int32_t nr; + char anchor[PF_ANCHOR_NAME_SIZE]; + char ruleset[PF_RULESET_NAME_SIZE]; + struct pf_rule rule; +}; + +struct pfioc_natlook { + struct pf_addr saddr; + struct pf_addr daddr; + struct pf_addr rsaddr; + struct pf_addr rdaddr; + u_int16_t sport; + u_int16_t dport; + u_int16_t rsport; + u_int16_t rdport; + sa_family_t af; + u_int8_t proto; + u_int8_t direction; +}; + +struct pfioc_state { + u_int32_t nr; + struct pf_state state; +}; + +struct pfioc_state_kill { + /* XXX returns the number of states killed in psk_af */ + sa_family_t psk_af; + int psk_proto; + struct pf_rule_addr psk_src; + struct pf_rule_addr psk_dst; + char psk_ifname[IFNAMSIZ]; +}; + +struct pfioc_states { + int ps_len; + union { + caddr_t psu_buf; + struct pf_state *psu_states; + } ps_u; +#define ps_buf ps_u.psu_buf +#define ps_states ps_u.psu_states +}; + +struct pfioc_src_nodes { + int psn_len; + union { + caddr_t psu_buf; + struct pf_src_node *psu_src_nodes; + } psn_u; +#define psn_buf psn_u.psu_buf +#define psn_src_nodes psn_u.psu_src_nodes +}; + +struct pfioc_if { + char ifname[IFNAMSIZ]; +}; + +struct pfioc_tm { + int timeout; + int seconds; +}; + +struct pfioc_limit { + int index; + unsigned limit; +}; + +struct pfioc_altq { + u_int32_t action; + u_int32_t ticket; + u_int32_t nr; + struct pf_altq altq; +}; + +struct pfioc_qstats { + u_int32_t ticket; + u_int32_t nr; + void *buf; + int nbytes; + u_int8_t scheduler; +}; + +struct pfioc_anchor { + u_int32_t nr; + char name[PF_ANCHOR_NAME_SIZE]; +}; + +struct pfioc_ruleset { + u_int32_t nr; + char anchor[PF_ANCHOR_NAME_SIZE]; + char name[PF_RULESET_NAME_SIZE]; +}; + +#define PF_RULESET_ALTQ (PF_RULESET_MAX) +#define PF_RULESET_TABLE (PF_RULESET_MAX+1) +struct pfioc_trans { + int size; /* number of elements */ + int esize; /* size of each element in bytes */ + struct pfioc_trans_e { + int rs_num; + char anchor[PF_ANCHOR_NAME_SIZE]; + char ruleset[PF_RULESET_NAME_SIZE]; + u_int32_t ticket; + } *array; +}; + +#define PFR_FLAG_ATOMIC 0x00000001 +#define PFR_FLAG_DUMMY 0x00000002 +#define PFR_FLAG_FEEDBACK 0x00000004 +#define PFR_FLAG_CLSTATS 0x00000008 +#define PFR_FLAG_ADDRSTOO 0x00000010 +#define PFR_FLAG_REPLACE 0x00000020 +#define PFR_FLAG_ALLRSETS 0x00000040 +#define PFR_FLAG_ALLMASK 0x0000007F +#ifdef _KERNEL +#define PFR_FLAG_USERIOCTL 0x10000000 +#endif + +struct pfioc_table { + struct pfr_table pfrio_table; + void *pfrio_buffer; + int pfrio_esize; + int pfrio_size; + int pfrio_size2; + int pfrio_nadd; + int pfrio_ndel; + int pfrio_nchange; + int pfrio_flags; + u_int32_t pfrio_ticket; +}; +#define pfrio_exists pfrio_nadd +#define pfrio_nzero pfrio_nadd +#define pfrio_nmatch pfrio_nadd +#define pfrio_naddr pfrio_size2 +#define pfrio_setflag pfrio_size2 +#define pfrio_clrflag pfrio_nadd + + +#define PFI_FLAG_GROUP 0x0001 /* gets groups of interfaces */ +#define PFI_FLAG_INSTANCE 0x0002 /* gets single interfaces */ +#define PFI_FLAG_ALLMASK 0x0003 + +struct pfioc_iface { + char pfiio_name[IFNAMSIZ]; + void *pfiio_buffer; + int pfiio_esize; + int pfiio_size; + int pfiio_nzero; + int pfiio_flags; +}; + + +/* + * ioctl operations + */ + +#define DIOCSTART _IO ('D', 1) +#define DIOCSTOP _IO ('D', 2) +#define DIOCBEGINRULES _IOWR('D', 3, struct pfioc_rule) +#define DIOCADDRULE _IOWR('D', 4, struct pfioc_rule) +#define DIOCCOMMITRULES _IOWR('D', 5, struct pfioc_rule) +#define DIOCGETRULES _IOWR('D', 6, struct pfioc_rule) +#define DIOCGETRULE _IOWR('D', 7, struct pfioc_rule) +/* XXX cut 8 - 17 */ +#define DIOCCLRSTATES _IOWR('D', 18, struct pfioc_state_kill) +#define DIOCGETSTATE _IOWR('D', 19, struct pfioc_state) +#define DIOCSETSTATUSIF _IOWR('D', 20, struct pfioc_if) +#define DIOCGETSTATUS _IOWR('D', 21, struct pf_status) +#define DIOCCLRSTATUS _IO ('D', 22) +#define DIOCNATLOOK _IOWR('D', 23, struct pfioc_natlook) +#define DIOCSETDEBUG _IOWR('D', 24, u_int32_t) +#define DIOCGETSTATES _IOWR('D', 25, struct pfioc_states) +#define DIOCCHANGERULE _IOWR('D', 26, struct pfioc_rule) +/* XXX cut 26 - 28 */ +#define DIOCSETTIMEOUT _IOWR('D', 29, struct pfioc_tm) +#define DIOCGETTIMEOUT _IOWR('D', 30, struct pfioc_tm) +#define DIOCADDSTATE _IOWR('D', 37, struct pfioc_state) +#define DIOCCLRRULECTRS _IO ('D', 38) +#define DIOCGETLIMIT _IOWR('D', 39, struct pfioc_limit) +#define DIOCSETLIMIT _IOWR('D', 40, struct pfioc_limit) +#define DIOCKILLSTATES _IOWR('D', 41, struct pfioc_state_kill) +#define DIOCSTARTALTQ _IO ('D', 42) +#define DIOCSTOPALTQ _IO ('D', 43) +#define DIOCBEGINALTQS _IOWR('D', 44, u_int32_t) +#define DIOCADDALTQ _IOWR('D', 45, struct pfioc_altq) +#define DIOCCOMMITALTQS _IOWR('D', 46, u_int32_t) +#define DIOCGETALTQS _IOWR('D', 47, struct pfioc_altq) +#define DIOCGETALTQ _IOWR('D', 48, struct pfioc_altq) +#define DIOCCHANGEALTQ _IOWR('D', 49, struct pfioc_altq) +#define DIOCGETQSTATS _IOWR('D', 50, struct pfioc_qstats) +#define DIOCBEGINADDRS _IOWR('D', 51, struct pfioc_pooladdr) +#define DIOCADDADDR _IOWR('D', 52, struct pfioc_pooladdr) +#define DIOCGETADDRS _IOWR('D', 53, struct pfioc_pooladdr) +#define DIOCGETADDR _IOWR('D', 54, struct pfioc_pooladdr) +#define DIOCCHANGEADDR _IOWR('D', 55, struct pfioc_pooladdr) +#define DIOCGETANCHORS _IOWR('D', 56, struct pfioc_anchor) +#define DIOCGETANCHOR _IOWR('D', 57, struct pfioc_anchor) +#define DIOCGETRULESETS _IOWR('D', 58, struct pfioc_ruleset) +#define DIOCGETRULESET _IOWR('D', 59, struct pfioc_ruleset) +#define DIOCRCLRTABLES _IOWR('D', 60, struct pfioc_table) +#define DIOCRADDTABLES _IOWR('D', 61, struct pfioc_table) +#define DIOCRDELTABLES _IOWR('D', 62, struct pfioc_table) +#define DIOCRGETTABLES _IOWR('D', 63, struct pfioc_table) +#define DIOCRGETTSTATS _IOWR('D', 64, struct pfioc_table) +#define DIOCRCLRTSTATS _IOWR('D', 65, struct pfioc_table) +#define DIOCRCLRADDRS _IOWR('D', 66, struct pfioc_table) +#define DIOCRADDADDRS _IOWR('D', 67, struct pfioc_table) +#define DIOCRDELADDRS _IOWR('D', 68, struct pfioc_table) +#define DIOCRSETADDRS _IOWR('D', 69, struct pfioc_table) +#define DIOCRGETADDRS _IOWR('D', 70, struct pfioc_table) +#define DIOCRGETASTATS _IOWR('D', 71, struct pfioc_table) +#define DIOCRCLRASTATS _IOWR('D', 72, struct pfioc_table) +#define DIOCRTSTADDRS _IOWR('D', 73, struct pfioc_table) +#define DIOCRSETTFLAGS _IOWR('D', 74, struct pfioc_table) +#define DIOCRINABEGIN _IOWR('D', 75, struct pfioc_table) +#define DIOCRINACOMMIT _IOWR('D', 76, struct pfioc_table) +#define DIOCRINADEFINE _IOWR('D', 77, struct pfioc_table) +#define DIOCOSFPFLUSH _IO('D', 78) +#define DIOCOSFPADD _IOWR('D', 79, struct pf_osfp_ioctl) +#define DIOCOSFPGET _IOWR('D', 80, struct pf_osfp_ioctl) +#define DIOCXBEGIN _IOWR('D', 81, struct pfioc_trans) +#define DIOCXCOMMIT _IOWR('D', 82, struct pfioc_trans) +#define DIOCXROLLBACK _IOWR('D', 83, struct pfioc_trans) +#define DIOCGETSRCNODES _IOWR('D', 84, struct pfioc_src_nodes) +#define DIOCCLRSRCNODES _IO('D', 85) +#define DIOCSETHOSTID _IOWR('D', 86, u_int32_t) +#define DIOCIGETIFACES _IOWR('D', 87, struct pfioc_iface) +#define DIOCICLRISTATS _IOWR('D', 88, struct pfioc_iface) +struct pf_ifspeed { + char ifname[IFNAMSIZ]; + u_int32_t baudrate; +}; +#define DIOCGIFSPEED _IOWR('D', 89, struct pf_ifspeed) + +#ifdef _KERNEL +RB_HEAD(pf_src_tree, pf_src_node); +RB_PROTOTYPE(pf_src_tree, pf_src_node, entry, pf_src_compare); +extern struct pf_src_tree tree_src_tracking; + +RB_HEAD(pf_state_tree_id, pf_state); +RB_PROTOTYPE(pf_state_tree_id, pf_state, + entry_id, pf_state_compare_id); +extern struct pf_state_tree_id tree_id; +extern struct pf_state_queue state_updates; + +extern struct pf_anchorqueue pf_anchors; +extern struct pf_ruleset pf_main_ruleset; +TAILQ_HEAD(pf_poolqueue, pf_pool); +extern struct pf_poolqueue pf_pools[2]; +TAILQ_HEAD(pf_altqqueue, pf_altq); +extern struct pf_altqqueue pf_altqs[2]; +extern struct pf_palist pf_pabuf; +extern struct pfi_kif **pfi_index2kif; + +extern u_int32_t ticket_altqs_active; +extern u_int32_t ticket_altqs_inactive; +extern int altqs_inactive_open; +extern u_int32_t ticket_pabuf; +extern struct pf_altqqueue *pf_altqs_active; +extern struct pf_altqqueue *pf_altqs_inactive; +extern struct pf_poolqueue *pf_pools_active; +extern struct pf_poolqueue *pf_pools_inactive; +extern int pf_tbladdr_setup(struct pf_ruleset *, + struct pf_addr_wrap *); +extern void pf_tbladdr_remove(struct pf_addr_wrap *); +extern void pf_tbladdr_copyout(struct pf_addr_wrap *); +extern void pf_calc_skip_steps(struct pf_rulequeue *); +extern void pf_update_anchor_rules(void); +extern vm_zone_t pf_src_tree_pl, pf_rule_pl; +extern vm_zone_t pf_state_pl, pf_altq_pl, pf_pooladdr_pl; +extern vm_zone_t pfr_ktable_pl, pfr_kentry_pl; +extern vm_zone_t pf_cache_pl, pf_cent_pl; +extern vm_zone_t pf_state_scrub_pl; +extern vm_zone_t pfi_addr_pl; +extern void pf_purge_timeout(void *); +extern void pf_purge_expired_src_nodes(void); +extern void pf_purge_expired_states(void); +extern int pf_insert_state(struct pfi_kif *, + struct pf_state *); +extern int pf_insert_src_node(struct pf_src_node **, + struct pf_rule *, struct pf_addr *, + sa_family_t); +void pf_src_tree_remove_state(struct pf_state *); +extern struct pf_state *pf_find_state_byid(struct pf_state *); +extern struct pf_state *pf_find_state_all(struct pf_state *key, + u_int8_t tree, int *more); +extern struct pf_anchor *pf_find_anchor(const char *); +extern struct pf_ruleset *pf_find_ruleset(char *, char *); +extern struct pf_ruleset *pf_find_or_create_ruleset( + char[PF_ANCHOR_NAME_SIZE], + char[PF_RULESET_NAME_SIZE]); +extern void pf_remove_if_empty_ruleset( + struct pf_ruleset *); + +extern struct ifnet *sync_ifp; +extern struct pf_rule pf_default_rule; +extern void pf_addrcpy(struct pf_addr *, struct pf_addr *, + u_int8_t); +void pf_rm_rule(struct pf_rulequeue *, + struct pf_rule *); + +#ifdef INET +int pf_test(int, struct ifnet *, struct mbuf **); +#endif /* INET */ + +#ifdef INET6 +int pf_test6(int, struct ifnet *, struct mbuf **); +void pf_poolmask(struct pf_addr *, struct pf_addr*, + struct pf_addr *, struct pf_addr *, u_int8_t); +void pf_addr_inc(struct pf_addr *, sa_family_t); +#endif /* INET6 */ + +void *pf_pull_hdr(struct mbuf *, int, void *, int, u_short *, u_short *, + sa_family_t); +void pf_change_a(void *, u_int16_t *, u_int32_t, u_int8_t); +int pflog_packet(struct pfi_kif *, struct mbuf *, sa_family_t, u_int8_t, + u_int8_t, struct pf_rule *, struct pf_rule *, struct pf_ruleset *); +int pf_match_addr(u_int8_t, struct pf_addr *, struct pf_addr *, + struct pf_addr *, sa_family_t); +int pf_match(u_int8_t, u_int32_t, u_int32_t, u_int32_t); +int pf_match_port(u_int8_t, u_int16_t, u_int16_t, u_int16_t); +int pf_match_uid(u_int8_t, uid_t, uid_t, uid_t); +int pf_match_gid(u_int8_t, gid_t, gid_t, gid_t); + +void pf_normalize_init(void); +int pf_normalize_ip(struct mbuf **, int, struct pfi_kif *, u_short *); +int pf_normalize_ip6(struct mbuf **, int, struct pfi_kif *, u_short *); +int pf_normalize_tcp(int, struct pfi_kif *, struct mbuf *, int, int, void *, + struct pf_pdesc *); +void pf_normalize_tcp_cleanup(struct pf_state *); +int pf_normalize_tcp_init(struct mbuf *, int, struct pf_pdesc *, + struct tcphdr *, struct pf_state_peer *, struct pf_state_peer *); +int pf_normalize_tcp_stateful(struct mbuf *, int, struct pf_pdesc *, + u_short *, struct tcphdr *, struct pf_state_peer *, + struct pf_state_peer *, int *); +u_int32_t + pf_state_expires(const struct pf_state *); +void pf_purge_expired_fragments(void); +int pf_routable(struct pf_addr *addr, sa_family_t af); +void pfr_initialize(void); +int pfr_match_addr(struct pfr_ktable *, struct pf_addr *, sa_family_t); +void pfr_update_stats(struct pfr_ktable *, struct pf_addr *, sa_family_t, + u_int64_t, int, int, int); +int pfr_pool_get(struct pfr_ktable *, int *, struct pf_addr *, + struct pf_addr **, struct pf_addr **, sa_family_t); +void pfr_dynaddr_update(struct pfr_ktable *, struct pfi_dynaddr *); +struct pfr_ktable * + pfr_attach_table(struct pf_ruleset *, char *); +void pfr_detach_table(struct pfr_ktable *); +int pfr_clr_tables(struct pfr_table *, int *, int); +int pfr_add_tables(struct pfr_table *, int, int *, int); +int pfr_del_tables(struct pfr_table *, int, int *, int); +int pfr_get_tables(struct pfr_table *, struct pfr_table *, int *, int); +int pfr_get_tstats(struct pfr_table *, struct pfr_tstats *, int *, int); +int pfr_clr_tstats(struct pfr_table *, int, int *, int); +int pfr_set_tflags(struct pfr_table *, int, int, int, int *, int *, int); +int pfr_clr_addrs(struct pfr_table *, int *, int); +int pfr_add_addrs(struct pfr_table *, struct pfr_addr *, int, int *, + int); +int pfr_del_addrs(struct pfr_table *, struct pfr_addr *, int, int *, + int); +int pfr_set_addrs(struct pfr_table *, struct pfr_addr *, int, int *, + int *, int *, int *, int); +int pfr_get_addrs(struct pfr_table *, struct pfr_addr *, int *, int); +int pfr_get_astats(struct pfr_table *, struct pfr_astats *, int *, int); +int pfr_clr_astats(struct pfr_table *, struct pfr_addr *, int, int *, + int); +int pfr_tst_addrs(struct pfr_table *, struct pfr_addr *, int, int *, + int); +int pfr_ina_begin(struct pfr_table *, u_int32_t *, int *, int); +int pfr_ina_rollback(struct pfr_table *, u_int32_t, int *, int); +int pfr_ina_commit(struct pfr_table *, u_int32_t, int *, int *, int); +int pfr_ina_define(struct pfr_table *, struct pfr_addr *, int, int *, + int *, u_int32_t, int); + +void pfi_initialize(void); +void pfi_cleanup(void); +void pfi_attach_clone(struct if_clone *); +void pfi_attach_ifnet(struct ifnet *); +void pfi_detach_ifnet(struct ifnet *); +struct pfi_kif *pfi_lookup_create(const char *); +struct pfi_kif *pfi_lookup_if(const char *); +int pfi_maybe_destroy(struct pfi_kif *); +struct pfi_kif *pfi_attach_rule(const char *); +void pfi_detach_rule(struct pfi_kif *); +void pfi_attach_state(struct pfi_kif *); +void pfi_detach_state(struct pfi_kif *); +int pfi_dynaddr_setup(struct pf_addr_wrap *, sa_family_t); +void pfi_dynaddr_copyout(struct pf_addr_wrap *); +void pfi_dynaddr_remove(struct pf_addr_wrap *); +void pfi_fill_oldstatus(struct pf_status *); +int pfi_clr_istats(const char *, int *, int); +int pfi_get_ifaces(const char *, struct pfi_if *, int *, int); +int pfi_match_addr(struct pfi_dynaddr *, struct pf_addr *, + sa_family_t); + +extern struct pfi_statehead pfi_statehead; + +u_int16_t pf_tagname2tag(char *); +void pf_tag2tagname(u_int16_t, char *); +void pf_tag_unref(u_int16_t); +void pf_tag_packet(struct mbuf *, int); +u_int32_t pf_qname2qid(char *); +void pf_qid2qname(u_int32_t, char *); +void pf_qid_unref(u_int32_t); + +extern struct pf_status pf_status; +extern vm_zone_t pf_frent_pl, pf_frag_pl; + +struct pf_pool_limit { + void *pp; + unsigned limit; +}; +extern struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX]; + +struct pf_frent { + LIST_ENTRY(pf_frent) fr_next; + struct ip *fr_ip; + struct mbuf *fr_m; +}; + +struct pf_frcache { + LIST_ENTRY(pf_frcache) fr_next; + uint16_t fr_off; + uint16_t fr_end; +}; + +struct pf_fragment { + RB_ENTRY(pf_fragment) fr_entry; + TAILQ_ENTRY(pf_fragment) frag_next; + struct in_addr fr_src; + struct in_addr fr_dst; + u_int8_t fr_p; /* protocol of this fragment */ + u_int8_t fr_flags; /* status flags */ + u_int16_t fr_id; /* fragment id for reassemble */ + u_int16_t fr_max; /* fragment data max */ + u_int32_t fr_timeout; +#define fr_queue fr_u.fru_queue +#define fr_cache fr_u.fru_cache + union { + LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */ + LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */ + } fr_u; +}; +#endif /* _KERNEL */ + +/* The fingerprint functions can be linked into userland programs (tcpdump) */ +int pf_osfp_add(struct pf_osfp_ioctl *); +#ifdef _KERNEL +struct pf_osfp_enlist * + pf_osfp_fingerprint(struct pf_pdesc *, struct mbuf *, int, + const struct tcphdr *); +#endif /* _KERNEL */ +struct pf_osfp_enlist * + pf_osfp_fingerprint_hdr(const struct ip *, const struct tcphdr *); +void pf_osfp_flush(void); +int pf_osfp_get(struct pf_osfp_ioctl *); +int pf_osfp_initialize(void); +void pf_osfp_cleanup(void); +int pf_osfp_match(struct pf_osfp_enlist *, pf_osfp_t); +struct pf_os_fingerprint * + pf_osfp_validate(void); + + +#endif /* _NET_PFVAR_H_ */ diff --git a/sys/netinet/icmp_var.h b/sys/netinet/icmp_var.h index f805cc9796..843e24dc87 100644 --- a/sys/netinet/icmp_var.h +++ b/sys/netinet/icmp_var.h @@ -32,7 +32,7 @@ * * @(#)icmp_var.h 8.1 (Berkeley) 6/10/93 * $FreeBSD: src/sys/netinet/icmp_var.h,v 1.15.2.2 2001/12/07 09:23:11 ru Exp $ - * $DragonFly: src/sys/netinet/icmp_var.h,v 1.3 2003/08/23 11:18:00 rob Exp $ + * $DragonFly: src/sys/netinet/icmp_var.h,v 1.4 2004/09/19 22:32:48 joerg Exp $ */ #ifndef _NETINET_ICMP_VAR_H_ @@ -82,6 +82,7 @@ struct icmpstat { #ifdef _KERNEL SYSCTL_DECL(_net_inet_icmp); +extern struct icmpstat icmpstat; /* icmp statistics */ #ifdef ICMP_BANDLIM extern int badport_bandlim (int); #endif diff --git a/sys/netinet/in.h b/sys/netinet/in.h index 917e95f327..c8a20e3774 100644 --- a/sys/netinet/in.h +++ b/sys/netinet/in.h @@ -32,7 +32,7 @@ * * @(#)in.h 8.3 (Berkeley) 1/3/94 * $FreeBSD: src/sys/netinet/in.h,v 1.48.2.10 2003/08/24 08:24:38 hsu Exp $ - * $DragonFly: src/sys/netinet/in.h,v 1.8 2004/09/16 13:40:23 joerg Exp $ + * $DragonFly: src/sys/netinet/in.h,v 1.9 2004/09/19 22:32:48 joerg Exp $ */ #ifndef _NETINET_IN_H_ @@ -168,6 +168,7 @@ /* 101-254: Partly Unassigned */ #define IPPROTO_PIM 103 /* Protocol Independent Mcast */ #define IPPROTO_PGM 113 /* PGM */ +#define IPPROTO_PFSYNC 240 /* PFSYNC */ /* 255: Reserved */ /* BSD Private, local use, namespace incursion */ #define IPPROTO_DIVERT 254 /* divert pseudo-protocol */ @@ -293,6 +294,7 @@ struct in_addr { #define INADDR_UNSPEC_GROUP (u_int32_t)0xe0000000 /* 224.0.0.0 */ #define INADDR_ALLHOSTS_GROUP (u_int32_t)0xe0000001 /* 224.0.0.1 */ #define INADDR_ALLRTRS_GROUP (u_int32_t)0xe0000002 /* 224.0.0.2 */ +#define INADDR_PFSYNC_GROUP (u_int32_t)0xe00000f0 /* 224.0.0.240 */ #define INADDR_MAX_LOCAL_GROUP (u_int32_t)0xe00000ff /* 224.0.0.255 */ #define IN_LOOPBACKNET 127 /* official! */ diff --git a/sys/netinet/in_cksum.c b/sys/netinet/in_cksum.c index 665d95cb17..70ba68a31f 100644 --- a/sys/netinet/in_cksum.c +++ b/sys/netinet/in_cksum.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/netinet/in_cksum.c,v 1.5 2004/07/16 05:51:19 dillon Exp $ + * $DragonFly: src/sys/netinet/in_cksum.c,v 1.6 2004/09/19 22:32:48 joerg Exp $ */ #include @@ -42,6 +42,7 @@ #include #include #include +#include #include @@ -66,14 +67,36 @@ * carry when collapsing it back down to 16 bits. */ __uint32_t -in_cksum_range(struct mbuf *m, int offset, int bytes) +in_cksum_range(struct mbuf *m, int nxt, int offset, int bytes) { + union { + struct ipovly ipov; + uint16_t w[10]; + } u; __uint8_t *ptr; __uint32_t sum0; __uint32_t sum1; int n; int flip; + sum0 = 0; + + if (nxt != 0) { + /* pseudo header */ + if (offset < sizeof(struct ipovly)) + panic("in_cksum_range: offset too short"); + if (m->m_len < sizeof(struct ip)) + panic("in_cksum_range: bad mbuf chain"); + bzero(&u.ipov, sizeof(u.ipov)); + u.ipov.ih_len = htons(bytes); + u.ipov.ih_pr = nxt; + u.ipov.ih_src = mtod(m, struct ip *)->ip_src; + u.ipov.ih_dst = mtod(m, struct ip *)->ip_dst; + ptr = u.w; + sum0 += ptr[0]; sum0 += ptr[1]; sum0 += ptr[2]; sum0 += ptr[3]; sum0 += ptr[4]; + sum0 += ptr[5]; sum0 += ptr[6]; sum0 += ptr[7]; sum0 += ptr[8]; sum0 += ptr[9]; + } + /* * Skip fully engulfed mbufs. Branch predict optimal. */ @@ -91,7 +114,6 @@ in_cksum_range(struct mbuf *m, int offset, int bytes) * * Initial offsets do not pre-set flip (assert that offset is even?) */ - sum0 = 0; sum1 = 0; flip = 0; while (bytes > 0 && m) { diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c index bf92069cf6..5b47a87622 100644 --- a/sys/netinet/in_proto.c +++ b/sys/netinet/in_proto.c @@ -32,7 +32,7 @@ * * @(#)in_proto.c 8.2 (Berkeley) 2/9/95 * $FreeBSD: src/sys/netinet/in_proto.c,v 1.53.2.7 2003/08/24 08:24:38 hsu Exp $ - * $DragonFly: src/sys/netinet/in_proto.c,v 1.6 2004/03/06 01:58:55 hsu Exp $ + * $DragonFly: src/sys/netinet/in_proto.c,v 1.7 2004/09/19 22:32:48 joerg Exp $ */ #include "opt_ipdivert.h" @@ -245,6 +245,14 @@ struct ipprotosw inetsw[] = { &rip_usrreqs }, #endif +#ifdef NPFSYNC +{ SOCK_RAW, &inetdomain, IPPROTO_PFSYNC, PR_ATOMIC|PR_ADDR, + pfsync_input, 0, 0, rip_ctloutput, + 0, + 0, 0, 0, 0, + &rip_usrreqs +}, +#endif /* NPFSYNC */ /* raw wildcard */ { SOCK_RAW, &inetdomain, 0, PR_ATOMIC|PR_ADDR, rip_input, 0, 0, rip_ctloutput, diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c index b3b0678dda..6a1b6a701f 100644 --- a/sys/netinet/ip_icmp.c +++ b/sys/netinet/ip_icmp.c @@ -32,7 +32,7 @@ * * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 * $FreeBSD: src/sys/netinet/ip_icmp.c,v 1.39.2.19 2003/01/24 05:11:34 sam Exp $ - * $DragonFly: src/sys/netinet/ip_icmp.c,v 1.11 2004/09/14 00:11:07 drhodus Exp $ + * $DragonFly: src/sys/netinet/ip_icmp.c,v 1.12 2004/09/19 22:32:48 joerg Exp $ */ #include "opt_ipsec.h" @@ -79,7 +79,7 @@ * host table maintenance routines. */ -static struct icmpstat icmpstat; +struct icmpstat icmpstat; SYSCTL_STRUCT(_net_inet_icmp, ICMPCTL_STATS, stats, CTLFLAG_RW, &icmpstat, icmpstat, ""); @@ -237,6 +237,7 @@ icmp_error(n, type, code, dest, destifp) nip->ip_vhl = IP_VHL_BORING; nip->ip_p = IPPROTO_ICMP; nip->ip_tos = 0; + m->m_pkthdr.pf_flags |= n->m_pkthdr.pf_flags & PF_MBUF_GENERATED; icmp_reflect(m); freeit: @@ -741,6 +742,7 @@ match: bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1), (unsigned)(m->m_len - sizeof(struct ip))); } + m->m_pkthdr.pf_flags &= PF_MBUF_GENERATED; m->m_flags &= ~(M_BCAST|M_MCAST); icmp_send(m, opts, ro); done: diff --git a/sys/netinet6/in6_ifattach.c b/sys/netinet6/in6_ifattach.c index 3e98b37a8c..c8c8428dc0 100644 --- a/sys/netinet6/in6_ifattach.c +++ b/sys/netinet6/in6_ifattach.c @@ -1,5 +1,5 @@ /* $FreeBSD: src/sys/netinet6/in6_ifattach.c,v 1.2.2.6 2002/04/28 05:40:26 suz Exp $ */ -/* $DragonFly: src/sys/netinet6/in6_ifattach.c,v 1.7 2004/08/02 13:22:33 joerg Exp $ */ +/* $DragonFly: src/sys/netinet6/in6_ifattach.c,v 1.8 2004/09/19 22:32:48 joerg Exp $ */ /* $KAME: in6_ifattach.c,v 1.118 2001/05/24 07:44:00 itojun Exp $ */ /* @@ -727,6 +727,9 @@ in6_ifattach(struct ifnet *ifp, case IFT_BRIDGE: return; #endif + case IFT_PFLOG: + case IFT_PFSYNC: + return; } /* diff --git a/sys/sys/in_cksum.h b/sys/sys/in_cksum.h index fbb80035bf..42084ee23f 100644 --- a/sys/sys/in_cksum.h +++ b/sys/sys/in_cksum.h @@ -37,7 +37,7 @@ * from: @(#)in_cksum.c 1.3 (Berkeley) 1/19/91 * from: Id: in_cksum.c,v 1.8 1995/12/03 18:35:19 bde Exp * $FreeBSD: src/sys/i386/include/in_cksum.h,v 1.7.2.2 2002/07/02 04:03:04 jdp Exp $ - * $DragonFly: src/sys/sys/in_cksum.h,v 1.2 2004/07/16 05:51:57 dillon Exp $ + * $DragonFly: src/sys/sys/in_cksum.h,v 1.3 2004/09/19 22:32:48 joerg Exp $ */ #ifndef _SYS_IN_CKSUM_H_ @@ -48,21 +48,21 @@ struct ip; struct mbuf; -__uint32_t in_cksum_range(struct mbuf *m, int offset, int bytes); +__uint32_t in_cksum_range(struct mbuf *m, int nxt, int offset, int bytes); __uint32_t asm_ones32(const void *buf, int count); /* in 32 bit words */ static __inline u_int in_cksum(struct mbuf *m, int len) { - return(in_cksum_range(m, 0, len)); + return(in_cksum_range(m, 0, 0, len)); } static __inline u_int in_cksum_skip(struct mbuf *m, int len, int skip) { - return(in_cksum_range(m, skip, len - skip)); + return(in_cksum_range(m, 0, skip, len - skip)); } - + static __inline u_int in_cksum_hdr(const struct ip *ip) { diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index 96ff7579ad..466949716e 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -1,4 +1,6 @@ /* + * Copyright (c) 2004 The DragonFly Project. All rights reserved. + * * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * @@ -32,7 +34,7 @@ * * @(#)mbuf.h 8.5 (Berkeley) 2/19/95 * $FreeBSD: src/sys/sys/mbuf.h,v 1.44.2.17 2003/04/15 06:15:02 silby Exp $ - * $DragonFly: src/sys/sys/mbuf.h,v 1.15 2004/09/14 15:49:54 joerg Exp $ + * $DragonFly: src/sys/sys/mbuf.h,v 1.16 2004/09/19 22:32:48 joerg Exp $ */ #ifndef _SYS_MBUF_H_ @@ -88,12 +90,16 @@ struct m_tag { struct pkthdr { struct ifnet *rcvif; /* rcv interface */ int len; /* total packet length */ + SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */ /* variables for ip and tcp reassembly */ void *header; /* pointer to packet header */ /* variables for hardware checksum */ int csum_flags; /* flags regarding checksum */ int csum_data; /* data field used by csum routines */ - SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */ + /* variables for PF processing */ + int pf_flags; /* flags for PF */ + uint16_t pf_tag; /* PF tag id */ + uint8_t pf_routed; /* PF routing counter */ }; /* @@ -196,6 +202,16 @@ struct mbuf { #define CSUM_DELAY_DATA (CSUM_TCP | CSUM_UDP) #define CSUM_DELAY_IP (CSUM_IP) /* XXX add ipv6 here too? */ +/* + * Flags indicating PF processing status + */ +#define PF_MBUF_GENERATED 0x00000001 +#define PF_MBUF_TAGGED 0x00000002 /* pf_tag field is valid */ +#define PF_MBUF_ROUTED 0x00000004 /* pf_routed field is valid */ +#define PF_MBUF_TRANSLATE_LOCALHOST \ + 0x00000008 +#define PF_MBUF_FRAGCACHE 0x00000010 + /* * mbuf types. */ -- 2.41.0