From: Jan Lentfer Date: Sun, 8 Aug 2010 07:44:38 +0000 (+0200) Subject: pf: Update packet filter to the version that comes with OpenBSD 4.1 X-Git-Tag: v2.9.0~589 X-Git-Url: https://gitweb.dragonflybsd.org/dragonfly.git/commitdiff_plain/70224baa0d7c2fcfc9b51076bdc46f77a8e1e2a7 pf: Update packet filter to the version that comes with OpenBSD 4.1 The original OpenBSD 4.1 defaults to "keep state flags S/SA" for all pass rules. In contrast to that we default to "no state". As in earlier verions of pf in DragonFly the default keep-state policy can still be set with the keep-policy option (e.g. "set keep-policy keep state (pickups)"). DragonFly additions to pf have been kept: fairq support, pickups. Detailed Info on changes/additions: * ALTQ: Fix altq to work with pf_mtag Patch by Matthew Dillon * libkern: Revert commit e104539 strchr was added to libkern.h together with strrch * net/if.h: add interface groups Imported from FreeBSD. * netinet6/in6.h: add macros IN6_IS_ADDR_MC_INTFACELOCAL IN6_IS_SCOPE_EMBED PV6_ADDR_SCOPE_INTFACELOCAL * sys/libkern.h: Add strchr and strrchr as inline functions Brought in from FreeBSD * sys/net/if_var.h: Import interface groups Import interface groups and event handlers from FreeBSD * sys/net/if_var.h: add if_pf_kif, if_groups to struct ifnet obtained from: Open/FreeBSD * net/if_types.h: add IFT_ENC to non-IATA-assignments obtained from Open/FreeBSD * net/bpf.c: add bpf_mtap_hdr from OpenBSD Con up a minimal dummy header to pacify bpf. Allocate (only) a struct m_hdr on the stack. --- diff --git a/include/string.h b/include/string.h index f2d6766adf..a9885df2ee 100644 --- a/include/string.h +++ b/include/string.h @@ -114,7 +114,9 @@ size_t strnlen(const char *, size_t) __pure; char *strnstr(const char *, const char *, size_t) __pure; #endif char *strpbrk(const char *, const char *) __pure; +#if !defined(_KERNEL_VIRTUAL) char *strrchr(const char *, int) __pure; +#endif #if __BSD_VISIBLE #if !defined(_KERNEL_VIRTUAL) char *strsep(char **, const char *); diff --git a/share/man/man4/pf.4 b/share/man/man4/pf.4 index 187a050a65..3a57046c75 100644 --- a/share/man/man4/pf.4 +++ b/share/man/man4/pf.4 @@ -1,5 +1,4 @@ -.\" $OpenBSD: pf.4,v 1.46 2004/02/19 21:29:51 cedric Exp $ -.\" $DragonFly: src/share/man/man4/pf.4,v 1.5 2007/05/19 17:32:12 swildner Exp $ +.\" $OpenBSD: pf.4,v 1.58 2007/02/09 11:39:06 henning Exp $ .\" .\" Copyright (C) 2001, Kjell Wooding. All rights reserved. .\" @@ -50,34 +49,47 @@ The most commonly used functions are covered by .Xr pfctl 8 . .Pp Manipulations like loading a ruleset that involve more than a single -ioctl call require a so-called ticket, which prevents the occurrence of +.Xr ioctl 2 +call require a so-called +.Em ticket , +which prevents the occurrence of multiple concurrent manipulations. .Pp -Fields of ioctl parameter structures that refer to packet data (like +Fields of +.Xr ioctl 2 +parameter structures that refer to packet data (like addresses and ports) are generally expected in network byte-order. -.Sh FILES -.Bl -tag -width /dev/pf -compact -.It Pa /dev/pf -packet filtering device. -.El +.Pp +Rules and address tables are contained in so-called +.Em anchors . +When servicing an +.Xr ioctl 2 +request, if the anchor field of the argument structure is empty, +the kernel will use the default anchor (i.e., the main ruleset) +in operations. +Anchors are specified by name and may be nested, with components +separated by +.Sq / +characters, similar to how file system hierarchies are laid out. +The final component of the anchor path is the anchor under which +operations will be performed. .Sh IOCTL INTERFACE -pf supports the following +.Nm +supports the following .Xr ioctl 2 -commands: +commands, available through +.Aq Pa net/pfvar.h : .Bl -tag -width xxxxxx .It Dv DIOCSTART -Starts the packet filter. +Start the packet filter. .It Dv DIOCSTOP -Stops the packet filter. +Stop the packet filter. .It Dv DIOCSTARTALTQ -Starts the -.Xr altq 4 -bandwidth control system. +Start the ALTQ bandwidth control system (see +.Xr altq 9 ) . .It Dv DIOCSTOPALTQ -Stops the -.Xr altq 4 -bandwidth control system. -.It Dv DIOCBEGINADDRS Fa "struct pfioc_pooladdr" +Stop the ALTQ bandwidth control system. +.It Dv DIOCBEGINADDRS Fa "struct pfioc_pooladdr *pp" .Bd -literal struct pfioc_pooladdr { u_int32_t action; @@ -87,24 +99,22 @@ struct pfioc_pooladdr { u_int8_t r_action; u_int8_t r_last; u_int8_t af; - char anchor[PF_ANCHOR_NAME_SIZE]; - char ruleset[PF_RULESET_NAME_SIZE]; + char anchor[MAXPATHLEN]; struct pf_pooladdr addr; }; .Ed .Pp -Clears the buffer address pool -and returns a +Clear the buffer address pool and get a .Va ticket for subsequent .Dv DIOCADDADDR , -.Dv DIOCADDRULE +.Dv DIOCADDRULE , and .Dv DIOCCHANGERULE calls. -.It Dv DIOCADDADDR Fa "struct pfioc_pooladdr" +.It Dv DIOCADDADDR Fa "struct pfioc_pooladdr *pp" .Pp -Adds pool address +Add the pool address .Va addr to the buffer address pool to be used in the following .Dv DIOCADDRULE @@ -112,119 +122,120 @@ or .Dv DIOCCHANGERULE call. All other members of the structure are ignored. -.It Dv DIOCADDRULE Fa "struct pfioc_rule" +.It Dv DIOCADDRULE Fa "struct pfioc_rule *pr" .Bd -literal struct pfioc_rule { u_int32_t action; u_int32_t ticket; u_int32_t pool_ticket; u_int32_t nr; - char anchor[PF_ANCHOR_NAME_SIZE]; - char ruleset[PF_RULESET_NAME_SIZE]; + char anchor[MAXPATHLEN]; + char anchor_call[MAXPATHLEN]; struct pf_rule rule; }; .Ed .Pp -Adds +Add .Va rule at the end of the inactive ruleset. -Requires +This call requires a .Va ticket -obtained through preceding +obtained through a preceding .Dv DIOCXBEGIN -call, and +call and a .Va pool_ticket -obtained through +obtained through a .Dv DIOCBEGINADDRS call. .Dv DIOCADDADDR must also be called if any pool addresses are required. The optional .Va anchor -and -.Va ruleset -names indicate the anchor and ruleset in which to append the rule. +name indicates the anchor in which to append the rule. .Va nr and .Va action are ignored. -.It Dv DIOCADDALTQ Fa "struct pfioc_altq" -Adds +.It Dv DIOCADDALTQ Fa "struct pfioc_altq *pa" +Add an ALTQ discipline or queue. .Bd -literal struct pfioc_altq { u_int32_t action; u_int32_t ticket; u_int32_t nr; - struct pf_altq altq; + struct pf_altq altq; }; .Ed -.It Dv DIOCGETRULES Fa "struct pfioc_rule" -Returns +.It Dv DIOCGETRULES Fa "struct pfioc_rule *pr" +Get a .Va ticket for subsequent .Dv DIOCGETRULE -calls and +calls and the number .Va nr of rules in the active ruleset. -.It Dv DIOCGETRULE Fa "struct pfioc_rule" -Returns +.It Dv DIOCGETRULE Fa "struct pfioc_rule *pr" +Get a .Va rule -number +by its number .Va nr -using +using the .Va ticket obtained through a preceding .Dv DIOCGETRULES call. -.It Dv DIOCGETADDRS Fa "struct pfioc_pooladdr" -Returns +If +.Va action +is set to +.Dv PF_GET_CLR_CNTR , +the per-rule statistics on the requested rule are cleared. +.It Dv DIOCGETADDRS Fa "struct pfioc_pooladdr *pp" +Get a .Va ticket for subsequent .Dv DIOCGETADDR -calls and +calls and the number .Va nr of pool addresses in the rule specified with .Va r_action , .Va r_num , -.Va anchor and -.Va ruleset . -.It Dv DIOCGETADDR Fa "struct pfioc_pooladdr" -Returns pool address +.Va anchor . +.It Dv DIOCGETADDR Fa "struct pfioc_pooladdr *pp" +Get the pool address .Va addr -number +by its number .Va nr from the rule specified with .Va r_action , .Va r_num , -.Va anchor and -.Va ruleset -using +.Va anchor +using the .Va ticket obtained through a preceding .Dv DIOCGETADDRS call. -.It Dv DIOCGETALTQS Fa "struct pfioc_altq" -Returns +.It Dv DIOCGETALTQS Fa "struct pfioc_altq *pa" +Get a .Va ticket for subsequent .Dv DIOCGETALTQ -calls and +calls and the number .Va nr of queues in the active list. -.It Dv DIOCGETALTQ Fa "struct pfioc_altq" -Returns +.It Dv DIOCGETALTQ Fa "struct pfioc_altq *pa" +Get the queueing discipline .Va altq -number +by its number .Va nr -using +using the .Va ticket obtained through a preceding .Dv DIOCGETALTQS call. -.It Dv DIOCGETQSTATS Fa "struct pfioc_qstats" -Returns statistics on a queue. +.It Dv DIOCGETQSTATS Fa "struct pfioc_qstats *pq" +Get the statistics on a queue. .Bd -literal struct pfioc_qstats { u_int32_t ticket; @@ -235,54 +246,99 @@ struct pfioc_qstats { }; .Ed .Pp -A pointer to a buffer of statistics -.Va buf +This call fills in a pointer to the buffer of statistics +.Va buf , of length -.Va nbytes +.Va nbytes , for the queue specified by .Va nr . -.It Dv DIOCADDSTATE Fa "struct pfioc_state" -Adds a state entry. -.It Dv DIOCGETSTATE Fa "struct pfioc_state" +.It Dv DIOCGETRULESETS Fa "struct pfioc_ruleset *pr" +.Bd -literal +struct pfioc_ruleset { + u_int32_t nr; + char path[MAXPATHLEN]; + char name[PF_ANCHOR_NAME_SIZE]; +}; +.Ed +.Pp +Get the number +.Va nr +of rulesets (i.e., anchors) directly attached to the anchor named by +.Va path +for use in subsequent +.Dv DIOCGETRULESET +calls. +Nested anchors, since they are not directly attached to the given +anchor, will not be included. +This ioctl returns +.Er EINVAL +if the given anchor does not exist. +.It Dv DIOCGETRULESET Fa "struct pfioc_ruleset *pr" +Get a ruleset (i.e., an anchor) +.Va name +by its number +.Va nr +from the given anchor +.Va path , +the maximum number of which can be obtained from a preceding +.Dv DIOCGETRULESETS +call. +This ioctl returns +.Er EINVAL +if the given anchor does not exist or +.Er EBUSY +if another process is concurrently updating a ruleset. +.It Dv DIOCADDSTATE Fa "struct pfioc_state *ps" +Add a state entry. .Bd -literal struct pfioc_state { u_int32_t nr; struct pf_state state; }; .Ed -.Pp -Extracts the entry with the specified number from the state table. -.It Dv DIOCKILLSTATES Fa "struct pfioc_state_kill" -Removes matching entries from the state table. -Returns the number of killed states in psk_af. +.It Dv DIOCGETSTATE Fa "struct pfioc_state *ps" +Extract the entry with the specified number +.Va nr +from the state table. +.It Dv DIOCKILLSTATES Fa "struct pfioc_state_kill *psk" +Remove matching entries from the state table. +This ioctl returns the number of killed states in +.Va psk_af . .Bd -literal struct pfioc_state_kill { - int psk_af; + sa_family_t psk_af; int psk_proto; struct pf_rule_addr psk_src; struct pf_rule_addr psk_dst; char psk_ifname[IFNAMSIZ]; }; .Ed -.It Dv DIOCCLRSTATES Fa "struct pfioc_state_kill" -Clears all states. +.It Dv DIOCCLRSTATES Fa "struct pfioc_state_kill *psk" +Clear all states. It works like .Dv DIOCKILLSTATES , -but ignores the psk_af, psk_proto, psk_src and psk_dst fields of the -.Fa pfioc_state_kill +but ignores the +.Va psk_af , +.Va psk_proto , +.Va psk_src , +and +.Va psk_dst +fields of the +.Vt pfioc_state_kill structure. -.It Dv DIOCSETSTATUSIF Fa "struct pfioc_if" +.It Dv DIOCSETSTATUSIF Fa "struct pfioc_if *pi" +Specify the interface for which statistics are accumulated. .Bd -literal struct pfioc_if { char ifname[IFNAMSIZ]; }; .Ed -.Pp -Specifies the interface for which statistics are accumulated. -.It Dv DIOCGETSTATUS Fa "struct pf_status" +.It Dv DIOCGETSTATUS Fa "struct pf_status *s" +Get the internal packet filter statistics. .Bd -literal struct pf_status { u_int64_t counters[PFRES_MAX]; + u_int64_t lcounters[LCNT_MAX]; u_int64_t fcounters[FCNT_MAX]; u_int64_t scounters[SCNT_MAX]; u_int64_t pcounters[2][2][3]; @@ -293,14 +349,13 @@ struct pf_status { u_int32_t since; u_int32_t debug; char ifname[IFNAMSIZ]; + u_int8_t pf_chksum[MD5_DIGEST_LENGTH]; }; .Ed -.Pp -Gets the internal packet filter statistics. .It Dv DIOCCLRSTATUS -Clears the internal packet filter statistics. -.It Dv DIOCNATLOOK Fa "struct pfioc_natlook" -Looks up a state table entry by source and destination addresses and ports. +Clear the internal packet filter statistics. +.It Dv DIOCNATLOOK Fa "struct pfioc_natlook *pnl" +Look up a state table entry by source and destination addresses and ports. .Bd -literal struct pfioc_natlook { struct pf_addr saddr; @@ -316,99 +371,145 @@ struct pfioc_natlook { u_int8_t direction; }; .Ed -.It Dv DIOCSETDEBUG Fa "u_int32_t" -Sets the debug level. +.It Dv DIOCSETDEBUG Fa "u_int32_t *level" +Set the debug level. .Bd -literal -enum { PF_DEBUG_NONE=0, PF_DEBUG_URGENT=1, PF_DEBUG_MISC=2 }; +enum { PF_DEBUG_NONE, PF_DEBUG_URGENT, PF_DEBUG_MISC, + PF_DEBUG_NOISY }; .Ed -.It Dv DIOCGETSTATES Fa "struct pfioc_states" +.It Dv DIOCGETSTATES Fa "struct pfioc_states *ps" +Get state table entries. .Bd -literal struct pfioc_states { int ps_len; union { - caddr_t psu_buf; + caddr_t psu_buf; struct pf_state *psu_states; } ps_u; #define ps_buf ps_u.psu_buf #define ps_states ps_u.psu_states }; .Ed -.It Dv DIOCCHANGERULE Fa "struct pfioc_rule" -Adds or removes the +.Pp +If +.Va ps_len +is non-zero on entry, as many states as possible that can fit into this +size will be copied into the supplied buffer +.Va ps_states . +On exit, +.Va ps_len +is always set to the total size required to hold all state table entries +(i.e., it is set to +.Li sizeof(struct pf_state) * nr ) . +.It Dv DIOCCHANGERULE Fa "struct pfioc_rule *pcr" +Add or remove the .Va rule in the ruleset specified by .Va rule.action . -.Bd -literal -enum { PF_CHANGE_ADD_HEAD=1, PF_CHANGE_ADD_TAIL=2, - PF_CHANGE_ADD_BEFORE=3, PF_CHANGE_ADD_AFTER=4, - PF_CHANGE_REMOVE=5, PF_CHANGE_GET_TICKET=6 }; -.Ed .Pp The type of operation to be performed is indicated by -.Va action . +.Va action , +which can be any of the following: +.Bd -literal +enum { PF_CHANGE_NONE, PF_CHANGE_ADD_HEAD, PF_CHANGE_ADD_TAIL, + PF_CHANGE_ADD_BEFORE, PF_CHANGE_ADD_AFTER, + PF_CHANGE_REMOVE, PF_CHANGE_GET_TICKET }; +.Ed .Pp .Va ticket -must be set to the value obtained with PF_CHANGE_GET_TICKET -for all actions except PF_CHANGE_GET_TICKET. +must be set to the value obtained with +.Dv PF_CHANGE_GET_TICKET +for all actions except +.Dv PF_CHANGE_GET_TICKET . .Va pool_ticket must be set to the value obtained with the .Dv DIOCBEGINADDRS -call for all actions except PF_CHANGE_REMOVE and PF_CHANGE_GET_TICKET. -.Pp -.Va anchor +call for all actions except +.Dv PF_CHANGE_REMOVE and -.Va ruleset -indicate which anchor and ruleset the operation applies to. +.Dv PF_CHANGE_GET_TICKET . +.Va anchor +indicates to which anchor the operation applies. .Va nr -indicates the rule number against which PF_CHANGE_ADD_BEFORE, -PF_CHANGE_ADD_AFTER or PF_CHANGE_REMOVE actions are applied. -.It Dv DIOCCHANGEADDR Fa "struct pfioc_pooladdr" -Adds or removes a pool address +indicates the rule number against which +.Dv PF_CHANGE_ADD_BEFORE , +.Dv PF_CHANGE_ADD_AFTER , +or +.Dv PF_CHANGE_REMOVE +actions are applied. +.\" It Dv DIOCCHANGEALTQ Fa "struct pfioc_altq *pcr" +.It Dv DIOCCHANGEADDR Fa "struct pfioc_pooladdr *pca" +Add or remove the pool address .Va addr -from a rule specified with +from the rule specified by .Va r_action , .Va r_num , -.Va anchor and -.Va ruleset . -.It Dv DIOCSETTIMEOUT Fa "struct pfioc_tm" +.Va anchor . +.It Dv DIOCSETTIMEOUT Fa "struct pfioc_tm *pt" .Bd -literal struct pfioc_tm { int timeout; int seconds; }; .Ed -.It Dv DIOCGETTIMEOUT Fa "struct pfioc_tm" +.Pp +Set the state timeout of +.Va timeout +to +.Va seconds . +The old value will be placed into +.Va seconds . +For possible values of +.Va timeout , +consult the +.Dv PFTM_* +values in +.Aq Pa net/pfvar.h . +.It Dv DIOCGETTIMEOUT Fa "struct pfioc_tm *pt" +Get the state timeout of +.Va timeout . +The value will be placed into the +.Va seconds +field. .It Dv DIOCCLRRULECTRS Clear per-rule statistics. -.It Dv DIOCSETLIMIT Fa "struct pfioc_limit" -Sets hard limits on the memory pools used by the packet filter. +.It Dv DIOCSETLIMIT Fa "struct pfioc_limit *pl" +Set the hard limits on the memory pools used by the packet filter. .Bd -literal struct pfioc_limit { int index; unsigned limit; }; + +enum { PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS, + PF_LIMIT_TABLES, PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_MAX }; .Ed -.It Dv DIOCGETLIMIT Fa "struct pfioc_limit" -.It Dv DIOCRCLRTABLES Fa "struct pfioc_table" +.It Dv DIOCGETLIMIT Fa "struct pfioc_limit *pl" +Get the hard +.Va limit +for the memory pool indicated by +.Va index . +.It Dv DIOCRCLRTABLES Fa "struct pfioc_table *io" Clear all tables. -All the IOCTLs that manipulate radix tables +All the ioctls that manipulate radix tables use the same structure described below. For -.Dv DIOCRCLRTABLES, pfrio_ndel contains on exit the number -of tables deleted. +.Dv DIOCRCLRTABLES , +.Va pfrio_ndel +contains on exit the number of tables deleted. .Bd -literal struct pfioc_table { - struct pfr_table pfrio_table; - void *pfrio_buffer; - int pfrio_esize; - int pfrio_size; - int pfrio_size2; - int pfrio_nadd; - int pfrio_ndel; - int pfrio_nchange; - int pfrio_flags; - int pfrio_ticket; + struct pfr_table pfrio_table; + void *pfrio_buffer; + int pfrio_esize; + int pfrio_size; + int pfrio_size2; + int pfrio_nadd; + int pfrio_ndel; + int pfrio_nchange; + int pfrio_flags; + u_int32_t pfrio_ticket; }; #define pfrio_exists pfrio_nadd #define pfrio_nzero pfrio_nadd @@ -417,192 +518,359 @@ struct pfioc_table { #define pfrio_setflag pfrio_size2 #define pfrio_clrflag pfrio_nadd .Ed -.It Dv DIOCRADDTABLES Fa "struct pfioc_table" -Creates one or more tables. -On entry, pfrio_buffer[pfrio_size] contains a table of pfr_table structures. -On exit, pfrio_nadd contains the number of tables effectively created. +.It Dv DIOCRADDTABLES Fa "struct pfioc_table *io" +Create one or more tables. +On entry, +.Va pfrio_buffer +must point to an array of +.Vt struct pfr_table +containing at least +.Vt pfrio_size +elements. +.Vt pfrio_esize +must be the size of +.Vt struct pfr_table . +On exit, +.Va pfrio_nadd +contains the number of tables effectively created. .Bd -literal struct pfr_table { - char pfrt_anchor[PF_ANCHOR_NAME_SIZE]; - char pfrt_ruleset[PF_RULESET_NAME_SIZE]; - char pfrt_name[PF_TABLE_NAME_SIZE]; - u_int32_t pfrt_flags; - u_int8_t pfrt_fback; + char pfrt_anchor[MAXPATHLEN]; + char pfrt_name[PF_TABLE_NAME_SIZE]; + u_int32_t pfrt_flags; + u_int8_t pfrt_fback; }; .Ed -.It Dv DIOCRDELTABLES Fa "struct pfioc_table" -Deletes one or more tables. -On entry, pfrio_buffer[pfrio_size] contains a table of pfr_table structures. -On exit, pfrio_nadd contains the number of tables effectively deleted. -.It Dv DIOCRGETTABLES Fa "struct pfioc_table" +.It Dv DIOCRDELTABLES Fa "struct pfioc_table *io" +Delete one or more tables. +On entry, +.Va pfrio_buffer +must point to an array of +.Vt struct pfr_table +containing at least +.Vt pfrio_size +elements. +.Vt pfrio_esize +must be the size of +.Vt struct pfr_table . +On exit, +.Va pfrio_ndel +contains the number of tables effectively deleted. +.It Dv DIOCRGETTABLES Fa "struct pfioc_table *io" Get the list of all tables. -On entry, pfrio_buffer[pfrio_size] contains a valid writeable buffer for -pfr_table structures. -On exit, pfrio_size contains the number of tables written into the buffer. +On entry, +.Va pfrio_buffer[pfrio_size] +contains a valid writeable buffer for +.Vt pfr_table +structures. +On exit, +.Va pfrio_size +contains the number of tables written into the buffer. If the buffer is too small, the kernel does not store anything but just returns the required buffer size, without error. -.It Dv DIOCRGETTSTATS Fa "struct pfioc_table" -Like -.Dv DIOCRGETTABLES , -but returns an array of pfr_tstats structures. +.It Dv DIOCRGETTSTATS Fa "struct pfioc_table *io" +This call is like +.Dv DIOCRGETTABLES +but is used to get an array of +.Vt pfr_tstats +structures. .Bd -literal struct pfr_tstats { - struct pfr_table pfrts_t; - u_int64_t pfrts_packets - [PFR_DIR_MAX][PFR_OP_TABLE_MAX]; - u_int64_t pfrts_bytes - [PFR_DIR_MAX][PFR_OP_TABLE_MAX]; - u_int64_t pfrts_match; - u_int64_t pfrts_nomatch; - long pfrts_tzero; - int pfrts_cnt; - int pfrts_refcnt[PFR_REFCNT_MAX]; + struct pfr_table pfrts_t; + u_int64_t pfrts_packets + [PFR_DIR_MAX][PFR_OP_TABLE_MAX]; + u_int64_t pfrts_bytes + [PFR_DIR_MAX][PFR_OP_TABLE_MAX]; + u_int64_t pfrts_match; + u_int64_t pfrts_nomatch; + long pfrts_tzero; + int pfrts_cnt; + int pfrts_refcnt[PFR_REFCNT_MAX]; }; -#define pfrts_name pfrts_t.pfrt_name -#define pfrts_flags pfrts_t.pfrt_flags +#define pfrts_name pfrts_t.pfrt_name +#define pfrts_flags pfrts_t.pfrt_flags .Ed -.It Dv DIOCRCLRTSTATS Fa "struct pfioc_table" -Clears the statistics of one or more tables. -On entry, pfrio_buffer[pfrio_size] contains a table of pfr_table structures. -On exit, pfrio_nzero contains the number of tables effectively cleared. -.It Dv DIOCRCLRADDRS Fa "struct pfioc_table" +.It Dv DIOCRCLRTSTATS Fa "struct pfioc_table *io" +Clear the statistics of one or more tables. +On entry, +.Va pfrio_buffer +must point to an array of +.Vt struct pfr_table +containing at least +.Vt pfrio_size +elements. +.Vt pfrio_esize +must be the size of +.Vt struct pfr_table . +On exit, +.Va pfrio_nzero +contains the number of tables effectively cleared. +.It Dv DIOCRCLRADDRS Fa "struct pfioc_table *io" Clear all addresses in a table. -On entry, pfrio_table contains the table to clear. -On exit, pfrio_ndel contains the number of addresses removed. -.It Dv DIOCRADDADDRS Fa "struct pfioc_table" +On entry, +.Va pfrio_table +contains the table to clear. +On exit, +.Va pfrio_ndel +contains the number of addresses removed. +.It Dv DIOCRADDADDRS Fa "struct pfioc_table *io" Add one or more addresses to a table. -On entry, pfrio_table contains the table id and pfrio_buffer[pfrio_size] -contains the list of pfr_addr structures to add. -On exit, pfrio_nadd contains the number of addresses effectively added. +On entry, +.Va pfrio_table +contains the table ID and +.Va pfrio_buffer +must point to an array of +.Vt struct pfr_addr +containing at least +.Vt pfrio_size +elements to add to the table. +.Vt pfrio_esize +must be the size of +.Vt struct pfr_addr . +On exit, +.Va pfrio_nadd +contains the number of addresses effectively added. .Bd -literal struct pfr_addr { - union { - struct in_addr _pfra_ip4addr; - struct in6_addr _pfra_ip6addr; - } pfra_u; - u_int8_t pfra_af; - u_int8_t pfra_net; - u_int8_t pfra_not; - u_int8_t pfra_fback; + union { + struct in_addr _pfra_ip4addr; + struct in6_addr _pfra_ip6addr; + } pfra_u; + u_int8_t pfra_af; + u_int8_t pfra_net; + u_int8_t pfra_not; + u_int8_t pfra_fback; }; #define pfra_ip4addr pfra_u._pfra_ip4addr #define pfra_ip6addr pfra_u._pfra_ip6addr .Ed -.It Dv DIOCRDELADDRS Fa "struct pfioc_table" +.It Dv DIOCRDELADDRS Fa "struct pfioc_table *io" Delete one or more addresses from a table. -On entry, pfrio_table contains the table id and pfrio_buffer[pfrio_size] -contains the list of pfr_addr structures to delete. -On exit, pfrio_ndel contains the number of addresses effectively deleted. -.It Dv DIOCRSETADDRS Fa "struct pfioc_table" +On entry, +.Va pfrio_table +contains the table ID and +.Va pfrio_buffer +must point to an array of +.Vt struct pfr_addr +containing at least +.Vt pfrio_size +elements to delete from the table. +.Vt pfrio_esize +must be the size of +.Vt struct pfr_addr . +On exit, +.Va pfrio_ndel +contains the number of addresses effectively deleted. +.It Dv DIOCRSETADDRS Fa "struct pfioc_table *io" Replace the content of a table by a new address list. This is the most complicated command, which uses all the structure members. -On entry, pfrio_table contains the table id and pfrio_buffer[pfrio_size] -contains the new list of pfr_addr structures. -In addition to that, if size2 is nonzero, pfrio_buffer[pfrio_size..pfrio_size2] -must be a writeable buffer, into which the kernel can copy the addresses that -have been deleted during the replace operation. -On exit, pfrio_ndel, pfrio_nadd and pfrio_nchange contain the number of -addresses deleted, added and changed by the kernel. -If pfrio_size2 was set on -entry, pfrio_size2 will point to the size of the buffer used, exactly like +.Pp +On entry, +.Va pfrio_table +contains the table ID and +.Va pfrio_buffer +must point to an array of +.Vt struct pfr_addr +containing at least +.Vt pfrio_size +elements which become the new contents of the table. +.Vt pfrio_esize +must be the size of +.Vt struct pfr_addr . +Additionally, if +.Va pfrio_size2 +is non-zero, +.Va pfrio_buffer[pfrio_size..pfrio_size2] +must be a writeable buffer, into which the kernel can copy the +addresses that have been deleted during the replace operation. +On exit, +.Va pfrio_ndel , +.Va pfrio_nadd , +and +.Va pfrio_nchange +contain the number of addresses deleted, added, and changed by the +kernel. +If +.Va pfrio_size2 +was set on entry, +.Va pfrio_size2 +will point to the size of the buffer used, exactly like .Dv DIOCRGETADDRS . -.It Dv DIOCRGETADDRS Fa "struct pfioc_table" +.It Dv DIOCRGETADDRS Fa "struct pfioc_table *io" Get all the addresses of a table. -On entry, pfrio_table contains the table id and pfrio_buffer[pfrio_size] -contains a valid writeable buffer for pfr_addr structures. -On exit, pfrio_size contains the number of addresses written into the buffer. +On entry, +.Va pfrio_table +contains the table ID and +.Va pfrio_buffer[pfrio_size] +contains a valid writeable buffer for +.Vt pfr_addr +structures. +On exit, +.Va pfrio_size +contains the number of addresses written into the buffer. If the buffer was too small, the kernel does not store anything but just -return the required buffer size, without returning an error. -.It Dv DIOCRGETASTATS Fa "struct pfioc_table" -Like -.Dv DIOCRGETADDRS , -but returns an array of pfr_astats structures. +returns the required buffer size, without returning an error. +.It Dv DIOCRGETASTATS Fa "struct pfioc_table *io" +This call is like +.Dv DIOCRGETADDRS +but is used to get an array of +.Vt pfr_astats +structures. .Bd -literal struct pfr_astats { - struct pfr_addr pfras_a; - u_int64_t pfras_packets - [PFR_DIR_MAX][PFR_OP_ADDR_MAX]; - u_int64_t pfras_bytes - [PFR_DIR_MAX][PFR_OP_ADDR_MAX]; - long pfras_tzero; + struct pfr_addr pfras_a; + u_int64_t pfras_packets + [PFR_DIR_MAX][PFR_OP_ADDR_MAX]; + u_int64_t pfras_bytes + [PFR_DIR_MAX][PFR_OP_ADDR_MAX]; + long pfras_tzero; }; .Ed -.It Dv DIOCRCLRASTATS Fa "struct pfioc_table" -Clears the statistics of one or more addresses. -On entry, pfrio_table contains the table id and pfrio_buffer[pfrio_size] -contains a table of pfr_addr structures to clear. -On exit, pfrio_nzero contains the number of addresses effectively cleared. -.It Dv DIOCRTSTADDRS Fa "struct pfioc_table" +.It Dv DIOCRCLRASTATS Fa "struct pfioc_table *io" +Clear the statistics of one or more addresses. +On entry, +.Va pfrio_table +contains the table ID and +.Va pfrio_buffer +must point to an array of +.Vt struct pfr_addr +containing at least +.Vt pfrio_size +elements to be cleared from the table. +.Vt pfrio_esize +must be the size of +.Vt struct pfr_addr . +On exit, +.Va pfrio_nzero +contains the number of addresses effectively cleared. +.It Dv DIOCRTSTADDRS Fa "struct pfioc_table *io" Test if the given addresses match a table. -On entry, pfrio_table contains the table id and pfrio_buffer[pfrio_size] -contains a table of pfr_addr structures to test. -On exit, the kernel updates the pfr_addr table by setting the pfra_fback +On entry, +.Va pfrio_table +contains the table ID and +.Va pfrio_buffer +must point to an array of +.Vt struct pfr_addr +containing at least +.Vt pfrio_size +elements, each of which will be tested for a match in the table. +.Vt pfrio_esize +must be the size of +.Vt struct pfr_addr . +On exit, the kernel updates the +.Vt pfr_addr +array by setting the +.Va pfra_fback member appropriately. -.It Dv DIOCRSETTFLAGS Fa "struct pfioc_table" +.It Dv DIOCRSETTFLAGS Fa "struct pfioc_table *io" Change the -.Va const +.Dv PFR_TFLAG_CONST or -.Va persist -flag of a table. -On entry, pfrio_buffer[pfrio_size] contains a table of pfr_table structures, -and pfrio_setflag contains the flags to add, while pfrio_clrflag contains the -flags to remove. -On exit, pfrio_nchange and pfrio_ndel contain the number of tables altered -or deleted by the kernel. +.Dv PFR_TFLAG_PERSIST +flags of a table. +On entry, +.Va pfrio_buffer +must point to an array of +.Vt struct pfr_table +containing at least +.Vt pfrio_size +elements. +.Va pfrio_esize +must be the size of +.Vt struct pfr_table . +.Va pfrio_setflag +must contain the flags to add, while +.Va pfrio_clrflag +must contain the flags to remove. +On exit, +.Va pfrio_nchange +and +.Va pfrio_ndel +contain the number of tables altered or deleted by the kernel. Yes, tables can be deleted if one removes the -.Va persist +.Dv PFR_TFLAG_PERSIST flag of an unreferenced table. -.It Dv DIOCRINADEFINE Fa "struct pfioc_table" +.It Dv DIOCRINADEFINE Fa "struct pfioc_table *io" Defines a table in the inactive set. -On entry, pfrio_table contains the table id and pfrio_buffer[pfrio_size] -contains the list of pfr_addr structures to put in the table. -A valid ticket must also be supplied to pfrio_ticket. -On exit, pfrio_nadd contains 0 if the table was already defined in the -inactive list, or 1 if a new table has been created. -pfrio_naddr contains the number of addresses effectively put in the table. -.It Dv DIOCXBEGIN Fa "struct pfioc_trans" +On entry, +.Va pfrio_table +contains the table ID and +.Va pfrio_buffer[pfrio_size] +contains an array of +.Vt pfr_addr +structures to put in the table. +A valid ticket must also be supplied to +.Va pfrio_ticket . +On exit, +.Va pfrio_nadd +contains 0 if the table was already defined in the inactive list +or 1 if a new table has been created. +.Va pfrio_naddr +contains the number of addresses effectively put in the table. +.It Dv DIOCXBEGIN Fa "struct pfioc_trans *io" .Bd -literal -#define PF_RULESET_ALTQ (PF_RULESET_MAX) -#define PF_RULESET_TABLE (PF_RULESET_MAX+1) struct pfioc_trans { - int size; /* number of elements */ - int esize; /* size of each element in bytes */ - struct pfioc_trans_e { - int rs_num; - char anchor[PF_ANCHOR_NAME_SIZE]; - char ruleset[PF_RULESET_NAME_SIZE]; - u_int32_t ticket; - } *array; + int size; /* number of elements */ + int esize; /* size of each element in bytes */ + struct pfioc_trans_e { + int rs_num; + char anchor[MAXPATHLEN]; + u_int32_t ticket; + } *array; }; .Ed .Pp -Clears all the inactive rulesets specified in the -.Fa "struct pfioc_trans_e" +Clear all the inactive rulesets specified in the +.Vt pfioc_trans_e array. -For each ruleset, a ticket is returned for subsequent "add rule" IOCTLs, +For each ruleset, a ticket is returned for subsequent "add rule" ioctls, as well as for the .Dv DIOCXCOMMIT and .Dv DIOCXROLLBACK calls. -.It Dv DIOCXCOMMIT Fa "struct pfioc_trans" +.Pp +Ruleset types, identified by +.Va rs_num , +include the following: +.Pp +.Bl -tag -width PF_RULESET_FILTER -offset ind -compact +.It Dv PF_RULESET_SCRUB +Scrub (packet normalization) rules. +.It Dv PF_RULESET_FILTER +Filter rules. +.It Dv PF_RULESET_NAT +NAT (Network Address Translation) rules. +.It Dv PF_RULESET_BINAT +Bidirectional NAT rules. +.It Dv PF_RULESET_RDR +Redirect rules. +.It Dv PF_RULESET_ALTQ +ALTQ disciplines. +.It Dv PF_RULESET_TABLE +Address tables. +.El +.It Dv DIOCXCOMMIT Fa "struct pfioc_trans *io" Atomically switch a vector of inactive rulesets to the active rulesets. -Implemented as a standard 2-phase commit, which will either fail for all -rulesets or completely succeed. +This call is implemented as a standard two-phase commit, which will either +fail for all rulesets or completely succeed. All tickets need to be valid. -Returns +This ioctl returns .Er EBUSY -if a concurrent process is trying to update some of the same rulesets -concurrently. -.It Dv DIOCXROLLBACK Fa "struct pfioc_trans" +if another process is concurrently updating some of the same rulesets. +.It Dv DIOCXROLLBACK Fa "struct pfioc_trans *io" Clean up the kernel by undoing all changes that have taken place on the inactive rulesets since the last .Dv DIOCXBEGIN . .Dv DIOCXROLLBACK will silently ignore rulesets for which the ticket is invalid. -.It Dv DIOCFPFLUSH +.It Dv DIOCSETHOSTID Fa "u_int32_t *hostid" +Set the host ID, which is used by +.Xr pfsync 4 +to identify which host created state table entries. +.It Dv DIOCOSFPFLUSH Flush the passive OS fingerprint table. -.It Dv DIOCFPADD Fa "struct pf_osfp_ioctl" +.It Dv DIOCOSFPADD Fa "struct pf_osfp_ioctl *io" .Bd -literal struct pf_osfp_ioctl { struct pf_osfp_entry { @@ -612,12 +880,14 @@ struct pf_osfp_ioctl { char fp_version_nm[PF_OSFP_LEN]; char fp_subtype_nm[PF_OSFP_LEN]; } fp_os; - u_int16_t fp_mss; + pf_tcpopts_t fp_tcpopts; u_int16_t fp_wsize; u_int16_t fp_psize; - u_int8_t fp_ttl; + u_int16_t fp_mss; + u_int16_t fp_flags; + u_int8_t fp_optcnt; u_int8_t fp_wscale; - u_int8_t fp_flags; + u_int8_t fp_ttl; int fp_getnum; }; .Ed @@ -637,41 +907,42 @@ The members .Va fp_wsize , .Va fp_psize , .Va fp_ttl , +.Va fp_optcnt , and .Va fp_wscale -are set to the TCP MSS, the TCP window size, the IP length and the IP TTL of -the TCP SYN packet respectively. +are set to the TCP MSS, the TCP window size, the IP length, the IP TTL, +the number of TCP options, and the TCP window scaling constant of the +TCP SYN packet, respectively. +.Pp The .Va fp_flags member is filled according to the -.In net/pf/pfvar.h -include file PF_OSFP_* defines. +.Aq Pa net/pfvar.h +include file +.Dv PF_OSFP_* +defines. +The +.Va fp_tcpopts +member contains packed TCP options. +Each option uses +.Dv PF_OSFP_TCPOPT_BITS +bits in the packed value. +Options include any of +.Dv PF_OSFP_TCPOPT_NOP , +.Dv PF_OSFP_TCPOPT_SACK , +.Dv PF_OSFP_TCPOPT_WSCALE , +.Dv PF_OSFP_TCPOPT_MSS , +or +.Dv PF_OSFP_TCPOPT_TS . +.Pp The .Va fp_getnum -is not used with this ioctl. +member is not used with this ioctl. .Pp -The structure's slack space must be zeroed for correct operation; memset +The structure's slack space must be zeroed for correct operation; +.Xr memset 3 the whole structure to zero before filling and sending to the kernel. -.It Dv DIOCFPGET Fa "struct pf_osfp_ioctl" -.Bd -literal -struct pf_osfp_ioctl { - struct pf_osfp_entry { - SLIST_ENTRY(pf_osfp_entry) fp_entry; - pf_osfp_t fp_os; - char fp_class_nm[PF_OSFP_LEN]; - char fp_version_nm[PF_OSFP_LEN]; - char fp_subtype_nm[PF_OSFP_LEN]; - } fp_os; - u_int16_t fp_mss; - u_int16_t fp_wsize; - u_int16_t fp_psize; - u_int8_t fp_ttl; - u_int8_t fp_wscale; - u_int8_t fp_flags; - int fp_getnum; -}; -.Ed -.Pp +.It Dv DIOCOSFPGET Fa "struct pf_osfp_ioctl *io" Get the passive OS fingerprint number .Va fp_getnum from the kernel's fingerprint list. @@ -680,24 +951,21 @@ Get the whole list by repeatedly incrementing the .Va fp_getnum number until the ioctl returns .Er EBUSY . -.It Dv DIOCGETSRCNODES Fa "struct pfioc_src_nodes" +.It Dv DIOCGETSRCNODES Fa "struct pfioc_src_nodes *psn" .Bd -literal struct pfioc_src_nodes { - int psn_len; - union { - caddr_t psu_buf; - struct pf_src_node *psu_src_nodes; - } psn_u; -#define psn_buf psn_u.psu_buf -#define psn_src_nodes psn_u.psu_src_nodes + int psn_len; + union { + caddr_t psu_buf; + struct pf_src_node *psu_src_nodes; + } psn_u; +#define psn_buf psn_u.psu_buf +#define psn_src_nodes psn_u.psu_src_nodes }; .Ed .Pp -Get the list of source nodes kept by the -.Ar sticky-address -and -.Ar source-track -options. +Get the list of source nodes kept by sticky addresses and source +tracking. The ioctl must be called once with .Va psn_len set to 0. @@ -711,29 +979,25 @@ placed in .Va psn_buf . The ioctl must then be called again to fill this buffer with the actual source node data. -After the ioctl call +After that call, .Va psn_len will be set to the length of the buffer actually used. -.It Dv DIOCCLRSRCNODES Fa "struct pfioc_table" +.It Dv DIOCCLRSRCNODES Clear the tree of source tracking nodes. -.It Dv DIOCIGETIFACES Fa "struct pfioc_iface" -Gets the list of interfaces and interface drivers known to +.It Dv DIOCIGETIFACES Fa "struct pfioc_iface *io" +Get the list of interfaces and interface drivers known to .Nm . -All the IOCTLs that manipulate interfaces +All the ioctls that manipulate interfaces use the same structure described below: .Bd -literal struct pfioc_iface { - char pfiio_name[IFNAMSIZ]; - void *pfiio_buffer; - int pfiio_esize; - int pfiio_size; - int pfiio_nzero; - int pfiio_flags; + char pfiio_name[IFNAMSIZ]; + void *pfiio_buffer; + int pfiio_esize; + int pfiio_size; + int pfiio_nzero; + int pfiio_flags; }; - -#define PFI_FLAG_GROUP 0x0001 /* gets groups of interfaces */ -#define PFI_FLAG_INSTANCE 0x0002 /* gets single interfaces */ -#define PFI_FLAG_ALLMASK 0x0003 .Ed .Pp If not empty, @@ -743,57 +1007,60 @@ can be used to restrict the search to a specific interface or driver. is the user-supplied buffer for returning the data. On entry, .Va pfiio_size -represents the number of -.Va pfi_if +contains the number of +.Vt pfi_kif entries that can fit into the buffer. The kernel will replace this value by the real number of entries it wants to return. .Va pfiio_esize -should be set to sizeof(struct pfi_if). -.Va pfiio_flags should be set to -.Dv PFI_FLAG_GROUP , PFI_FLAG_INSTANCE , -or both to tell the kernel to return a group of interfaces -(drivers, like "fxp"), real interface instances (like "fxp1") or both. +.Li sizeof(struct pfi_kif) . +.Pp The data is returned in the -.Va pfi_if +.Vt pfi_kif structure described below: .Bd -literal -struct pfi_if { - char pfif_name[IFNAMSIZ]; - u_int64_t pfif_packets[2][2][2]; - u_int64_t pfif_bytes[2][2][2]; - u_int64_t pfif_addcnt; - u_int64_t pfif_delcnt; - long pfif_tzero; - int pfif_states; - int pfif_rules; - int pfif_flags; +struct pfi_kif { + RB_ENTRY(pfi_kif) pfik_tree; + char pfik_name[IFNAMSIZ]; + u_int64_t pfik_packets[2][2][2]; + u_int64_t pfik_bytes[2][2][2]; + u_int32_t pfik_tzero; + int pfik_flags; + struct pf_state_tree_lan_ext pfik_lan_ext; + struct pf_state_tree_ext_gwy pfik_ext_gwy; + TAILQ_ENTRY(pfi_kif) pfik_w_states; + void *pfik_ah_cookie; + struct ifnet *pfik_ifp; + struct ifg_group *pfik_group; + int pfik_states; + int pfik_rules; + TAILQ_HEAD(, pfi_dynaddr) pfik_dynaddrs; }; - -#define PFI_IFLAG_GROUP 0x0001 /* group of interfaces */ -#define PFI_IFLAG_INSTANCE 0x0002 /* single instance */ -#define PFI_IFLAG_CLONABLE 0x0010 /* clonable group */ -#define PFI_IFLAG_DYNAMIC 0x0020 /* dynamic group */ -#define PFI_IFLAG_ATTACHED 0x0040 /* interface attached */ -#define PFI_IFLAG_REFERENCED 0x0080 /* referenced by rules */ .Ed -.It Dv DIOCICLRISTATS Fa "struct pfioc_iface" -Clear the statistics counters of one or more interfaces. -.Va pfiio_name -and -.Va pfrio_flags -can be used to select which interfaces need to be cleared. +.It Dv DIOCSETIFFLAG Fa "struct pfioc_iface *io" +Set the user setable flags (described above) of the +.Nm +internal interface description. The filtering process is the same as for .Dv DIOCIGETIFACES . -.Va pfiio_nzero -will be set by the kernel to the number of interfaces and drivers -that have been cleared. +.Bd -literal +#define PFI_IFLAG_SKIP 0x0100 /* skip filtering on interface */ +.Ed +.It Dv DIOCCLRIFFLAG Fa "struct pfioc_iface *io" +Works as +.Dv DIOCSETIFFLAG +above but clears the flags. +.El +.Sh FILES +.Bl -tag -width /dev/pf -compact +.It Pa /dev/pf +packet filtering device. .El .Sh EXAMPLES The following example demonstrates how to use the .Dv DIOCNATLOOK -command to find the internal host/port of a NATed connection. +command to find the internal host/port of a NATed connection: .Bd -literal #include #include @@ -862,7 +1129,8 @@ main(int argc, char *argv[]) .Xr bridge 4 , .Xr pflog 4 , .Xr pfsync 4 , -.Xr pfctl 8 +.Xr pfctl 8 , +.Xr altq 9 .Sh HISTORY The .Nm diff --git a/share/man/man4/pflog.4 b/share/man/man4/pflog.4 index 5268c4a610..189576807c 100644 --- a/share/man/man4/pflog.4 +++ b/share/man/man4/pflog.4 @@ -1,5 +1,4 @@ -.\" $OpenBSD: pflog.4,v 1.7 2004/03/21 19:47:59 miod Exp $ -.\" $DragonFly: src/share/man/man4/pflog.4,v 1.4 2007/07/29 17:27:45 swildner Exp $ +.\" $OpenBSD: pflog.4,v 1.9 2006/10/25 12:51:31 jmc Exp $ .\" .\" Copyright (c) 2001 Tobias Weingartner .\" All rights reserved. @@ -46,6 +45,14 @@ on the interface, or stored to disk using .Xr pflogd 8 . .Pp +The pflog0 interface is created automatically at boot if both +.Xr pf 4 +and +.Xr pflogd 8 +are enabled; +further instances can be created using +.Xr ifconfig 8 . +.Pp Each packet retrieved on this interface has a header associated with it of length .Dv PFLOG_HDRLEN . @@ -64,23 +71,31 @@ struct pfloghdr { char ruleset[PF_RULESET_NAME_SIZE]; u_int32_t rulenr; u_int32_t subrulenr; + uid_t uid; + pid_t pid; + uid_t rule_uid; + pid_t rule_pid; u_int8_t dir; u_int8_t pad[3]; }; .Ed .Sh EXAMPLES +Create a +.Nm +interface +and monitor all packets logged on it: .Bd -literal -offset indent # ifconfig pflog0 up # tcpdump -n -e -ttt -i pflog0 .Ed .Sh SEE ALSO -.Xr tcpdump 1 , .Xr inet 4 , .Xr inet6 4 , .Xr netintro 4 , .Xr pf 4 , .Xr ifconfig 8 , -.Xr pflogd 8 +.Xr pflogd 8, +.Xr tcpdump 1 .Sh HISTORY The .Nm diff --git a/share/man/man4/pfsync.4 b/share/man/man4/pfsync.4 index 127249bc19..358ba7469b 100644 --- a/share/man/man4/pfsync.4 +++ b/share/man/man4/pfsync.4 @@ -1,7 +1,7 @@ -.\" $OpenBSD: pfsync.4,v 1.14 2004/03/21 19:47:59 miod Exp $ -.\" $DragonFly: src/share/man/man4/pfsync.4,v 1.6 2007/11/03 18:37:42 swildner Exp $ +\" $OpenBSD: pfsync.4,v 1.24 2006/10/23 07:05:49 jmc Exp $ .\" .\" Copyright (c) 2002 Michael Shalayeff +.\" Copyright (c) 2003-2004 Ryan McBride .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without @@ -24,12 +24,12 @@ .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .\" -.Dd April 9, 2007 +.Dd August 5, 2010 .Dt PFSYNC 4 .Os .Sh NAME .Nm pfsync -.Nd packet filter states table logging interface +.Nd packet filter state table logging interface .Sh SYNOPSIS .Cd "device pfsync" .Sh DESCRIPTION @@ -69,20 +69,20 @@ state into one message where possible. The maximum number of times this can be done before the update is sent out is controlled by the .Ar maxupd -to ifconfig. +parameter to ifconfig (see .Xr ifconfig 8 -and the example below for more details) +and the example below for more details). .Pp Each packet retrieved on this interface has a header associated with it of length .Dv PFSYNC_HDRLEN . The header indicates the version of the protocol, address family, -action taken on the following states and the number of state +action taken on the following states, and the number of state table entries attached in this packet. -This structure, defined in -.In net/pf/if_pfsync.h -looks like: +This structure is defined in +.Aq Pa net/pf/if_pfsync.h +as: .Bd -literal -offset indent struct pfsync_header { u_int8_t version; @@ -96,21 +96,35 @@ States can be synchronised between two or more firewalls using this interface, by specifying a synchronisation interface using .Xr ifconfig 8 . For example, the following command sets fxp0 as the synchronisation -interface. +interface: .Bd -literal -offset indent -# ifconfig pfsync0 syncif fxp0 +# ifconfig pfsync0 syncdev fxp0 .Ed .Pp -State change messages are sent out on the synchronisation +By default, state change messages are sent out on the synchronisation interface using IP multicast packets. The protocol is IP protocol 240, PFSYNC, and the multicast group used is 224.0.0.240. +When a peer address is specified using the +.Ic syncpeer +keyword, the peer address is used as a destination for the pfsync traffic, +and the traffic can then be protected using +.Xr ipsec 4 . +In such a configuration, the syncdev should be set to the +.Xr enc 4 +interface, as this is where the traffic arrives when it is decapsulated, +e.g.: +.Bd -literal -offset indent +# ifconfig pfsync0 syncpeer 10.0.0.2 syncdev enc0 +.Ed .Pp -It is important that the synchronisation interface be on a trusted -network as there is no authentication on the protocol and it would +It is important that the pfsync traffic be well secured +as there is no authentication on the protocol and it would be trivial to spoof packets which create states, bypassing the pf ruleset. -Ideally, this is a network dedicated to pfsync messages, -i.e. a crossover cable between two firewalls. +Either run the pfsync protocol on a trusted network \- ideally a network +dedicated to pfsync messages such as a crossover cable between two firewalls, +or specify a peer address and protect the traffic with +.Xr ipsec 4 . .Pp There is a one-to-one correspondence between packets seen by .Xr bpf 4 @@ -175,7 +189,7 @@ The following should be added to the top of .Pa /etc/pf.conf : .Bd -literal -offset indent pass quick on { sis2 } proto pfsync -pass quick on { sis0 sis1 } proto carp keep state +pass on { sis0 sis1 } proto carp .Ed .Pp If it is preferable that one firewall handle the traffic, @@ -197,16 +211,18 @@ The following must also be added to net.inet.carp.preempt=1 .Ed .Sh SEE ALSO -.Xr tcpdump 1 , .Xr bpf 4 , .Xr carp 4 , .Xr inet 4 , .Xr inet6 4 , +.Xr ipsec 4 , .Xr netintro 4 , .Xr pf 4 , .Xr pf.conf 5 , .Xr protocols 5 , -.Xr ifconfig 8 +.Xr ifconfig 8 , +.Xr ifstated 8 , +.Xr tcpdump 1 .Sh HISTORY The .Nm diff --git a/sys/conf/files b/sys/conf/files index 8f6b522504..e6a816b18a 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1849,7 +1849,6 @@ libkern/rindex.c standard libkern/scanc.c standard libkern/skpc.c standard libkern/strcat.c standard -libkern/strchr.c standard libkern/strcmp.c standard libkern/strcasecmp.c standard libkern/fnmatch.c standard diff --git a/sys/libkern/strchr.c b/sys/libkern/strchr.c deleted file mode 100644 index 4e7cf69b69..0000000000 --- a/sys/libkern/strchr.c +++ /dev/null @@ -1,45 +0,0 @@ -/*- - * Copyright (c) 1990, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - */ - -#include -#include - -char * -strchr(const char *str, int c) -{ - const char *s; - - for (s = str; (*s) && (*s != (char)c); ++s); - return ((*s) ? __DECONST(char *, s) : NULL); -} diff --git a/sys/net/altq/altq_cbq.c b/sys/net/altq/altq_cbq.c index 21684947ef..bbe5e75fd5 100644 --- a/sys/net/altq/altq_cbq.c +++ b/sys/net/altq/altq_cbq.c @@ -485,6 +485,7 @@ cbq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) { cbq_state_t *cbqp = (cbq_state_t *)ifq->altq_disc; struct rm_class *cl; + struct pf_mtag *pf; int len; /* grab class set by classifier */ @@ -494,8 +495,9 @@ cbq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) m_freem(m); return (ENOBUFS); } - if (m->m_pkthdr.fw_flags & ALTQ_MBUF_TAGGED) - cl = clh_to_clp(cbqp, m->m_pkthdr.altq_qid); + + if ((pf = altq_find_pftag(m)) != NULL) + cl = clh_to_clp(cbqp, pf->qid); else cl = NULL; if (cl == NULL) { diff --git a/sys/net/altq/altq_fairq.c b/sys/net/altq/altq_fairq.c index e171affecf..4c795814b8 100644 --- a/sys/net/altq/altq_fairq.c +++ b/sys/net/altq/altq_fairq.c @@ -66,8 +66,8 @@ /* * FAIRQ - take traffic classified by keep state (hashed into - * mbuf->m_pkthdr.altq_state_hash) and bucketize it. Fairly extract - * the first packet from each bucket in a round-robin fashion. + * pf->state_hash) and bucketize it. Fairly extract + * the first packet from each bucket in a round-robin fashion. * * TODO - better overall qlimit support (right now it is per-bucket). * - NOTE: red etc is per bucket, not overall. @@ -123,7 +123,7 @@ static int fairq_class_destroy(struct fairq_class *); static int fairq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); static struct mbuf *fairq_dequeue(struct ifaltq *, struct mbuf *, int); -static int fairq_addq(struct fairq_class *, struct mbuf *); +static int fairq_addq(struct fairq_class *, struct mbuf *, struct pf_mtag *); static struct mbuf *fairq_getq(struct fairq_class *, uint64_t); static struct mbuf *fairq_pollq(struct fairq_class *, uint64_t, int *); static fairq_bucket_t *fairq_selectq(struct fairq_class *, int); @@ -498,6 +498,7 @@ fairq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) { struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc; struct fairq_class *cl; + struct pf_mtag *pf; int error; int len; @@ -512,8 +513,8 @@ fairq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) goto done; } - if (m->m_pkthdr.fw_flags & ALTQ_MBUF_TAGGED) - cl = clh_to_clp(pif, m->m_pkthdr.altq_qid); + if ((pf = altq_find_pftag(m)) != NULL) + cl = clh_to_clp(pif, pf->qid); else cl = NULL; if (cl == NULL) { @@ -527,7 +528,7 @@ fairq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) cl->cl_flags |= FARF_HAS_PACKETS; cl->cl_pktattr = NULL; len = m_pktlen(m); - if (fairq_addq(cl, m) != 0) { + if (fairq_addq(cl, m, pf) != 0) { /* drop occurred. mbuf was freed in fairq_addq. */ PKTCNTR_ADD(&cl->cl_dropcnt, len); error = ENOBUFS; @@ -626,7 +627,7 @@ fairq_dequeue(struct ifaltq *ifq, struct mbuf *mpolled, int op) } static int -fairq_addq(struct fairq_class *cl, struct mbuf *m) +fairq_addq(struct fairq_class *cl, struct mbuf *m, struct pf_mtag *pf) { fairq_bucket_t *b; u_int hindex; @@ -636,13 +637,13 @@ fairq_addq(struct fairq_class *cl, struct mbuf *m) * If the packet doesn't have any keep state put it on the end of * our queue. XXX this can result in out of order delivery. */ - if ((m->m_pkthdr.fw_flags & ALTQ_MBUF_STATE_HASHED) == 0) { + if (pf == NULL || (pf->flags & PF_TAG_STATE_HASHED) == 0) { if (cl->cl_head) b = cl->cl_head->prev; else b = &cl->cl_buckets[0]; } else { - hindex = m->m_pkthdr.altq_state_hash & cl->cl_nbucket_mask; + hindex = pf->state_hash & cl->cl_nbucket_mask; b = &cl->cl_buckets[hindex]; } diff --git a/sys/net/altq/altq_hfsc.c b/sys/net/altq/altq_hfsc.c index 45c7b4869f..43609deba4 100644 --- a/sys/net/altq/altq_hfsc.c +++ b/sys/net/altq/altq_hfsc.c @@ -645,6 +645,7 @@ hfsc_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) { struct hfsc_if *hif = (struct hfsc_if *)ifq->altq_disc; struct hfsc_class *cl; + struct pf_mtag *pf; int len; /* grab class set by classifier */ @@ -655,8 +656,8 @@ hfsc_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) return (ENOBUFS); } crit_enter(); - if (m->m_pkthdr.fw_flags & ALTQ_MBUF_TAGGED) - cl = clh_to_clp(hif, m->m_pkthdr.altq_qid); + if ((pf = altq_find_pftag(m)) != NULL) + cl = clh_to_clp(hif, pf->qid); else cl = NULL; if (cl == NULL || is_a_parent_class(cl)) { diff --git a/sys/net/altq/altq_priq.c b/sys/net/altq/altq_priq.c index 0ca183effa..00dd4e1298 100644 --- a/sys/net/altq/altq_priq.c +++ b/sys/net/altq/altq_priq.c @@ -415,6 +415,7 @@ priq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) { struct priq_if *pif = (struct priq_if *)ifq->altq_disc; struct priq_class *cl; + struct pf_mtag *pf; int error; int len; @@ -429,8 +430,8 @@ priq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) goto done; } - if (m->m_pkthdr.fw_flags & ALTQ_MBUF_TAGGED) - cl = clh_to_clp(pif, m->m_pkthdr.altq_qid); + if ((pf = altq_find_pftag(m)) != NULL) + cl = clh_to_clp(pif, pf->qid); else cl = NULL; if (cl == NULL) { diff --git a/sys/net/altq/altq_red.c b/sys/net/altq/altq_red.c index 3163ea291b..1a4dade801 100644 --- a/sys/net/altq/altq_red.c +++ b/sys/net/altq/altq_red.c @@ -419,14 +419,16 @@ drop_early(int fp_len, int fp_probd, int count) int mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags) { + struct pf_mtag *pf; struct mbuf *m0; void *hdr; int af; - if ((m->m_pkthdr.fw_flags & ALTQ_MBUF_TAGGED) == 0) + pf = altq_find_pftag(m); + if (pf == NULL) return (0); - af = m->m_pkthdr.ecn_af; - hdr = m->m_pkthdr.header; + af = pf->af; + hdr = pf->hdr; if (af != AF_INET && af != AF_INET6) return (0); diff --git a/sys/net/altq/altq_subr.c b/sys/net/altq/altq_subr.c index ea70e5321e..9b397688a1 100644 --- a/sys/net/altq/altq_subr.c +++ b/sys/net/altq/altq_subr.c @@ -877,3 +877,15 @@ read_machclk(void) } return (val); } + +struct pf_mtag * +altq_find_pftag(struct mbuf *m) +{ + struct m_tag *mtag; + + mtag = m_tag_find(m, PF_MBUF_TAGGED, NULL); + if (mtag) + return((struct pf_mtag *)(mtag + 1)); + return(NULL); +} + diff --git a/sys/net/altq/altq_var.h b/sys/net/altq/altq_var.h index e5a54bad51..c5db2cc38c 100644 --- a/sys/net/altq/altq_var.h +++ b/sys/net/altq/altq_var.h @@ -64,8 +64,10 @@ extern int pfaltq_running; struct ifnet; struct mbuf; struct pf_altq; +struct pf_mtag; void *altq_lookup(const char *, int); +struct pf_mtag *altq_find_pftag(struct mbuf *m); uint8_t read_dsfield(struct mbuf *, struct altq_pktattr *); void write_dsfield(struct mbuf *, struct altq_pktattr *, uint8_t); int tbr_set(struct ifaltq *, struct tb_profile *); diff --git a/sys/net/bpf.c b/sys/net/bpf.c index 297e8dc5b9..0e3234e50c 100644 --- a/sys/net/bpf.c +++ b/sys/net/bpf.c @@ -1249,6 +1249,28 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m) rel_mplock(); } +/* + * Incoming linkage from device drivers, where we have a mbuf chain + * but need to prepend some arbitrary header from a linear buffer. + * + * Con up a minimal dummy header to pacify bpf. Allocate (only) a + * struct m_hdr on the stack. This is safe as bpf only reads from the + * fields in this header that we initialize, and will not try to free + * it or keep a pointer to it. + */ +void +bpf_mtap_hdr(struct bpf_if *arg, caddr_t data, u_int dlen, struct mbuf *m, u_int direction) +{ + struct m_hdr mh; + + mh.mh_flags = 0; + mh.mh_next = m; + mh.mh_len = dlen; + mh.mh_data = data; + + return bpf_mtap(arg, (struct mbuf *) &mh); +} + void bpf_mtap_family(struct bpf_if *bp, struct mbuf *m, sa_family_t family) { diff --git a/sys/net/bpf.h b/sys/net/bpf.h index d8b86c6eb3..d205afe42e 100644 --- a/sys/net/bpf.h +++ b/sys/net/bpf.h @@ -249,6 +249,7 @@ int bpf_validate(const struct bpf_insn *, int); void bpf_tap(struct bpf_if *, u_char *, u_int); void bpf_mtap(struct bpf_if *, struct mbuf *); void bpf_mtap_family(struct bpf_if *, struct mbuf *m, __uint8_t family); +void bpf_mtap_hdr(struct bpf_if *, caddr_t, u_int, struct mbuf *, u_int); void bpf_ptap(struct bpf_if *, struct mbuf *, const void *, u_int); void bpfattach(struct ifnet *, u_int, u_int); void bpfattach_dlt(struct ifnet *, u_int, u_int, struct bpf_if **); diff --git a/sys/net/if.h b/sys/net/if.h index e4c25c21ba..a422f2adb5 100644 --- a/sys/net/if.h +++ b/sys/net/if.h @@ -322,6 +322,36 @@ struct ifconf { #define ifc_req ifc_ifcu.ifcu_req /* array of structures returned */ }; +/* + * interface groups + */ + +#define IFG_ALL "all" /* group contains all interfaces */ +/* XXX: will we implement this? */ +#define IFG_EGRESS "egress" /* if(s) default route(s) point to */ + +struct ifg_req { + union { + char ifgrqu_group[IFNAMSIZ]; + char ifgrqu_member[IFNAMSIZ]; + } ifgrq_ifgrqu; +#define ifgrq_group ifgrq_ifgrqu.ifgrqu_group +#define ifgrq_member ifgrq_ifgrqu.ifgrqu_member +}; + +/* + * Used to lookup groups for an interface + */ +struct ifgroupreq { + char ifgr_name[IFNAMSIZ]; + u_int ifgr_len; + union { + char ifgru_group[IFNAMSIZ]; + struct ifg_req *ifgru_groups; + } ifgr_ifgru; +#define ifgr_group ifgr_ifgru.ifgru_group +#define ifgr_groups ifgr_ifgru.ifgru_groups +}; /* * Structure for SIOC[AGD]LIFADDR diff --git a/sys/net/if_types.h b/sys/net/if_types.h index 529ca73727..e880bfc641 100644 --- a/sys/net/if_types.h +++ b/sys/net/if_types.h @@ -250,6 +250,7 @@ #define IFT_PVC 0xf1 #define IFT_FAITH 0xf2 #define IFT_STF 0xf3 +#define IFT_ENC 0xf4 #define IFT_PFLOG 0xf5 /* Packet filter logging */ #define IFT_PFSYNC 0xf6 /* Packet filter state syncing */ #define IFT_CARP 0xf8 /* Common Address Redundancy Protocol */ diff --git a/sys/net/if_var.h b/sys/net/if_var.h index 8259fad190..c016aa8e7b 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -233,6 +233,8 @@ struct ifnet { (struct ifnet *, struct sockaddr **, struct sockaddr *); int (*if_start_cpuid) /* cpuid to run if_start */ (struct ifnet *); + TAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if */ + /* protected by if_addr_mtx */ #ifdef DEVICE_POLLING void (*if_poll) /* IFF_POLLING support */ (struct ifnet *, enum poll_cmd, int); @@ -272,6 +274,7 @@ struct ifnet { struct lwkt_serialize if_default_serializer; /* if not supplied */ int if_cpuid; struct netmsg *if_start_nmsg; /* percpu messages to schedule if_start */ + void *if_pf_kif; /* pf interface abstraction */ }; typedef void if_init_f_t (void *); @@ -599,6 +602,38 @@ EVENTHANDLER_DECLARE(ifnet_attach_event, ifnet_attach_event_handler_t); typedef void (*ifnet_detach_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(ifnet_detach_event, ifnet_detach_event_handler_t); +/* + * interface groups + */ +struct ifg_group { + char ifg_group[IFNAMSIZ]; + u_int ifg_refcnt; + void *ifg_pf_kif; + TAILQ_HEAD(, ifg_member) ifg_members; + TAILQ_ENTRY(ifg_group) ifg_next; +}; + +struct ifg_member { + TAILQ_ENTRY(ifg_member) ifgm_next; + struct ifnet *ifgm_ifp; +}; + +struct ifg_list { + struct ifg_group *ifgl_group; + TAILQ_ENTRY(ifg_list) ifgl_next; +}; + +/* group attach event */ +typedef void (*group_attach_event_handler_t)(void *, struct ifg_group *); +EVENTHANDLER_DECLARE(group_attach_event, group_attach_event_handler_t); +/* group detach event */ +typedef void (*group_detach_event_handler_t)(void *, struct ifg_group *); +EVENTHANDLER_DECLARE(group_detach_event, group_detach_event_handler_t); +/* group change event */ +typedef void (*group_change_event_handler_t)(void *, const char *); +EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t); + + #ifdef INVARIANTS #define ASSERT_IFAC_VALID(ifac) do { \ KKASSERT((ifac)->ifa_magic == IFA_CONTAINER_MAGIC); \ diff --git a/sys/net/pf/Makefile b/sys/net/pf/Makefile index 106e337b50..91bf512d47 100644 --- a/sys/net/pf/Makefile +++ b/sys/net/pf/Makefile @@ -2,7 +2,7 @@ KMOD= pf SRCS= if_pflog.c pf.c pf_if.c pf_ioctl.c pf_norm.c pf_osfp.c pf_subr.c -SRCS+= pf_table.c +SRCS+= pf_table.c pf_ruleset.c SRCS+= use_pflog.h use_pfsync.h opt_inet.h opt_inet6.h SRCS+= opt_icmp_bandlim.h diff --git a/sys/net/pf/if_pflog.c b/sys/net/pf/if_pflog.c index 1ed7bbe13f..7b6da4003e 100644 --- a/sys/net/pf/if_pflog.c +++ b/sys/net/pf/if_pflog.c @@ -1,7 +1,7 @@ /* $FreeBSD: src/sys/contrib/pf/net/if_pflog.c,v 1.9 2004/06/22 20:13:24 brooks Exp $ */ /* $OpenBSD: if_pflog.c,v 1.11 2003/12/31 11:18:25 cedric Exp $ */ /* $DragonFly: src/sys/net/pf/if_pflog.c,v 1.6 2006/12/22 23:44:57 swildner Exp $ */ - +/* $OpenBSD: if_pflog.c,v 1.22 2006/12/15 09:31:20 otto Exp $ */ /* * The authors of this code are John Ioannidis (ji@tla.org), * Angelos D. Keromytis (kermit@csd.uch.gr) and @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -85,60 +86,84 @@ #define DPRINTF(x) #endif - -static void pflog_clone_destroy(struct ifnet *); -static int pflog_clone_create(struct if_clone *, int, caddr_t); +void pflogattach(int); int pflogoutput(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); int pflogioctl(struct ifnet *, u_long, caddr_t, struct ucred *); void pflogrtrequest(int, struct rtentry *, struct sockaddr *); void pflogstart(struct ifnet *); -static MALLOC_DEFINE(M_PFLOG, PFLOGNAME, "Packet Filter Logging Interface"); -static LIST_HEAD(pflog_list, pflog_softc) pflog_list; -struct if_clone pflog_cloner = IF_CLONE_INITIALIZER("pflog", pflog_clone_create, - pflog_clone_destroy, 1, 1); - -static void -pflog_clone_destroy(struct ifnet *ifp) -{ - struct pflog_softc *sc; +int pflog_clone_create(struct if_clone *, int, caddr_t); +void pflog_clone_destroy(struct ifnet *); - sc = ifp->if_softc; +LIST_HEAD(, pflog_softc) pflogif_list; +struct if_clone pflog_cloner = + IF_CLONE_INITIALIZER("pflog", pflog_clone_create, pflog_clone_destroy, 1, 1); - /* - * Do we really need this? - */ - IF_DRAIN(&ifp->if_snd); +struct ifnet *pflogifs[PFLOGIFS_MAX]; /* for fast access */ - bpfdetach(ifp); - if_detach(ifp); - LIST_REMOVE(sc, sc_next); - kfree(sc, M_PFLOG); +void +pflogattach(int npflog) +{ + int i; + LIST_INIT(&pflogif_list); + for (i = 0; i < PFLOGIFS_MAX; i++) + pflogifs[i] = NULL; + (void) pflog_clone_create(&pflog_cloner, 0, NULL); + if_clone_attach(&pflog_cloner); } -static int +int pflog_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused) { - struct pflog_softc *sc; + struct ifnet *ifp; + struct pflog_softc *pflogif; - MALLOC(sc, struct pflog_softc *, sizeof(*sc), M_PFLOG, M_WAITOK|M_ZERO); + if (unit >= PFLOGIFS_MAX) + return (EINVAL); + + if ((pflogif = kmalloc(sizeof(*pflogif), M_DEVBUF, M_WAITOK)) == NULL) + return (ENOMEM); + bzero(pflogif, sizeof(*pflogif)); + + pflogif->sc_unit = unit; + ifp = &pflogif->sc_if; + ksnprintf(ifp->if_xname, sizeof ifp->if_xname, "pflog%d", unit); + ifp->if_softc = pflogif; + ifp->if_mtu = PFLOGMTU; + ifp->if_ioctl = pflogioctl; + ifp->if_output = pflogoutput; + ifp->if_start = pflogstart; + ifp->if_type = IFT_PFLOG; + ifp->if_snd.ifq_maxlen = ifqmaxlen; + ifp->if_hdrlen = PFLOG_HDRLEN; + if_attach(ifp, NULL); + + bpfattach(&pflogif->sc_if, DLT_PFLOG, PFLOG_HDRLEN); + + crit_enter(); + LIST_INSERT_HEAD(&pflogif_list, pflogif, sc_list); + pflogifs[unit] = ifp; + crit_exit(); - if_initname(&sc->sc_if, ifc->ifc_name, unit); - sc->sc_if.if_mtu = PFLOGMTU; - sc->sc_if.if_ioctl = pflogioctl; - sc->sc_if.if_output = pflogoutput; - sc->sc_if.if_start = pflogstart; - sc->sc_if.if_type = IFT_PFLOG; - sc->sc_if.if_snd.ifq_maxlen = ifqmaxlen; - sc->sc_if.if_hdrlen = PFLOG_HDRLEN; - sc->sc_if.if_softc = sc; - if_attach(&sc->sc_if, NULL); + return (0); +} + +void +pflog_clone_destroy(struct ifnet *ifp) +{ + struct pflog_softc *pflogif = ifp->if_softc; - LIST_INSERT_HEAD(&pflog_list, sc, sc_next); - bpfattach(&sc->sc_if, DLT_PFLOG, PFLOG_HDRLEN); + crit_enter(); + pflogifs[pflogif->sc_unit] = NULL; + LIST_REMOVE(pflogif, sc_list); + crit_exit(); - return (0); +#if NBPFILTER > 0 + bpfdetach(ifp); +#endif + if_detach(ifp); + kfree(pflogif, M_DEVBUF); } /* @@ -147,10 +172,19 @@ pflog_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused) void pflogstart(struct ifnet *ifp) { - crit_enter(); - IF_DROP(&ifp->if_snd); - IF_DRAIN(&ifp->if_snd); - crit_exit(); + struct mbuf *m; + + for (;;) { + crit_enter(); + IF_DROP(&ifp->if_snd); + IF_DEQUEUE(&ifp->if_snd, m); + crit_exit(); + + if (m == NULL) + return; + else + m_freem(m); + } } int @@ -193,15 +227,17 @@ pflogioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr) int pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, u_int8_t reason, struct pf_rule *rm, struct pf_rule *am, - struct pf_ruleset *ruleset) + struct pf_ruleset *ruleset, struct pf_pdesc *pd) { - struct ifnet *ifn; + struct ifnet *ifn = NULL; struct pfloghdr hdr; - struct mbuf m1; if (kif == NULL || m == NULL || rm == NULL) return (-1); + if ((ifn = pflogifs[rm->logif]) == NULL || !ifn->if_bpf) + return (0); + bzero(&hdr, sizeof(hdr)); hdr.length = PFLOG_REAL_HDRLEN; hdr.af = af; @@ -215,36 +251,42 @@ pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, } else { hdr.rulenr = htonl(am->nr); hdr.subrulenr = htonl(rm->nr); - if (ruleset != NULL) - memcpy(hdr.ruleset, ruleset->name, + if (ruleset != NULL && ruleset->anchor != NULL) { + strlcpy(hdr.ruleset, ruleset->anchor->name, sizeof(hdr.ruleset)); - - + } } + if (rm->log & PF_LOG_SOCKET_LOOKUP && !pd->lookup.done) + pd->lookup.done = pf_socket_lookup(dir, pd, NULL); + if (pd->lookup.done > 0) { + hdr.uid = pd->lookup.uid; + hdr.pid = pd->lookup.pid; + } else { + hdr.uid = UID_MAX; + hdr.pid = NO_PID; + } + hdr.rule_uid = rm->cuid; + hdr.rule_pid = rm->cpid; hdr.dir = dir; #ifdef INET if (af == AF_INET) { - struct ip *ip = mtod(m, struct ip *); - + struct ip *ip; + ip = mtod(m, struct ip *); ip->ip_len = htons(ip->ip_len); ip->ip_off = htons(ip->ip_off); if (dir == PF_OUT) { ip->ip_sum = 0; ip->ip_sum = in_cksum(m, ip->ip_hl << 2); - } + } } #endif /* INET */ - m1.m_next = m; - m1.m_len = PFLOG_HDRLEN; - m1.m_data = (char *) &hdr; - - KASSERT((!LIST_EMPTY(&pflog_list)), ("pflog: no interface")); - ifn = &LIST_FIRST(&pflog_list)->sc_if; - - BPF_MTAP(ifn, &m1); + ifn->if_opackets++; + ifn->if_obytes += m->m_pkthdr.len; + bpf_mtap_hdr(ifn->if_bpf, (char *)&hdr, PFLOG_HDRLEN, m, + BPF_DIRECTION_OUT); #ifdef INET if (af == AF_INET) { @@ -265,15 +307,16 @@ pflog_modevent(module_t mod, int type, void *data) switch (type) { case MOD_LOAD: - LIST_INIT(&pflog_list); + LIST_INIT(&pflogif_list); if_clone_attach(&pflog_cloner); break; case MOD_UNLOAD: if_clone_detach(&pflog_cloner); - while (!LIST_EMPTY(&pflog_list)) + while (!LIST_EMPTY(&pflogif_list)) { pflog_clone_destroy( - &LIST_FIRST(&pflog_list)->sc_if); + &LIST_FIRST(&pflogif_list)->sc_if); + } break; default: diff --git a/sys/net/pf/if_pflog.h b/sys/net/pf/if_pflog.h index 6eeb1b0805..7b3a8ed483 100644 --- a/sys/net/pf/if_pflog.h +++ b/sys/net/pf/if_pflog.h @@ -1,7 +1,7 @@ /* $FreeBSD: src/sys/contrib/pf/net/if_pflog.h,v 1.4 2004/06/16 23:24:00 mlaier Exp $ */ /* $OpenBSD: if_pflog.h,v 1.10 2004/03/19 04:52:04 frantzen Exp $ */ /* $DragonFly: src/sys/net/pf/if_pflog.h,v 1.1 2004/09/19 22:32:47 joerg Exp $ */ - +/* $OpenBSD: if_pflog.h,v 1.14 2006/10/25 11:27:01 henning Exp $ */ /* * Copyright (c) 2004 The DragonFly Project. All rights reserved. * @@ -32,15 +32,15 @@ #ifndef _NET_IF_PFLOG_H_ #define _NET_IF_PFLOG_H_ +#define PFLOGIFS_MAX 16 + struct pflog_softc { - struct ifnet sc_if; /* the interface */ - LIST_ENTRY(pflog_softc) sc_next; + struct ifnet sc_if; /* the interface */ + int sc_unit; + LIST_ENTRY(pflog_softc) sc_list; }; -/* XXX keep in sync with pfvar.h */ -#ifndef PF_RULESET_NAME_SIZE -#define PF_RULESET_NAME_SIZE 16 -#endif +#define PFLOG_RULESET_NAME_SIZE 16 struct pfloghdr { u_int8_t length; @@ -48,9 +48,13 @@ struct pfloghdr { u_int8_t action; u_int8_t reason; char ifname[IFNAMSIZ]; - char ruleset[PF_RULESET_NAME_SIZE]; + char ruleset[PFLOG_RULESET_NAME_SIZE]; u_int32_t rulenr; u_int32_t subrulenr; + uid_t uid; + pid_t pid; + uid_t rule_uid; + pid_t rule_pid; u_int8_t dir; u_int8_t pad[3]; }; @@ -75,9 +79,9 @@ struct old_pfloghdr { #include "use_pflog.h" #if NPFLOG > 0 -#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g) pflog_packet(i,a,b,c,d,e,f,g) +#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g,h) pflog_packet(i,a,b,c,d,e,f,g,h) #else -#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g) ((void)0) +#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g,h) ((void)0) #endif /* NPFLOG > 0 */ #endif /* _KERNEL */ #endif /* _NET_IF_PFLOG_H_ */ diff --git a/sys/net/pf/if_pfsync.c b/sys/net/pf/if_pfsync.c index 17a4790334..8a78505af4 100644 --- a/sys/net/pf/if_pfsync.c +++ b/sys/net/pf/if_pfsync.c @@ -1,10 +1,6 @@ -/* $FreeBSD: src/sys/contrib/pf/net/if_pfsync.c,v 1.11 2004/08/14 15:32:40 dwmalone Exp $ */ -/* $OpenBSD: if_pfsync.c,v 1.26 2004/03/28 18:14:20 mcbride Exp $ */ -/* $DragonFly: src/sys/net/pf/if_pfsync.c,v 1.8 2008/04/12 17:39:41 dillon Exp $ */ +/* $OpenBSD: if_pfsync.c,v 1.73 2006/11/16 13:13:38 henning Exp $ */ /* - * Copyright (c) 2004 The DragonFly Project. All rights reserved. - * * Copyright (c) 2002 Michael Shalayeff * All rights reserved. * @@ -30,32 +26,27 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -#include "opt_inet.h" -#include "opt_inet6.h" #include #include -#include #include #include #include #include +#include +#include #include -#include -#include -#include -#include -#include - -#include #include #include #include #include +#include +#include +#include +#include #ifdef INET -#include #include #include #include @@ -63,106 +54,130 @@ #endif #ifdef INET6 -#ifndef INET -#include -#endif #include #endif /* INET6 */ -#include -#include +#include "carp.h" +#if NCARP > 0 +#include +#endif -#define PFSYNCNAME "pfsync" +#include +#include + +#include "bpfilter.h" +#include "pfsync.h" #define PFSYNC_MINMTU \ (sizeof(struct pfsync_header) + sizeof(struct pf_state)) #ifdef PFSYNCDEBUG -#define DPRINTF(x) do { if (pfsyncdebug) kprintf x ; } while (0) +#define DPRINTF(x) do { if (pfsyncdebug) printf x ; } while (0) int pfsyncdebug; #else #define DPRINTF(x) #endif -int pfsync_sync_ok; -struct pfsyncstats pfsyncstats; +struct pfsync_softc *pfsyncif = NULL; +struct pfsyncstats pfsyncstats; -static void pfsync_clone_destroy(struct ifnet *); -static int pfsync_clone_create(struct if_clone *, int, caddr_t); +void pfsyncattach(int); +int pfsync_clone_create(struct if_clone *, int); +int pfsync_clone_destroy(struct ifnet *); void pfsync_setmtu(struct pfsync_softc *, int); -int pfsync_insert_net_state(struct pfsync_state *); +int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, + struct pf_state_peer *); +int pfsync_insert_net_state(struct pfsync_state *, u_int8_t); +void pfsync_update_net_tdb(struct pfsync_tdb *); int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); -int pfsyncioctl(struct ifnet *, u_long, caddr_t, struct ucred *); +int pfsyncioctl(struct ifnet *, u_long, caddr_t); void pfsyncstart(struct ifnet *); struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **); int pfsync_request_update(struct pfsync_state_upd *, struct in_addr *); int pfsync_sendout(struct pfsync_softc *); +int pfsync_tdb_sendout(struct pfsync_softc *); +int pfsync_sendout_mbuf(struct pfsync_softc *, struct mbuf *); void pfsync_timeout(void *); +void pfsync_tdb_timeout(void *); void pfsync_send_bus(struct pfsync_softc *, u_int8_t); void pfsync_bulk_update(void *); void pfsync_bulkfail(void *); -static MALLOC_DEFINE(M_PFSYNC, PFSYNCNAME, "Packet Filter State Sync. Interface"); -static LIST_HEAD(pfsync_list, pfsync_softc) pfsync_list; -struct if_clone pfsync_cloner = IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, - pfsync_clone_destroy, 1, 1); - -static void -pfsync_clone_destroy(struct ifnet *ifp) -{ - struct pfsync_softc *sc; +int pfsync_sync_ok; +extern int ifqmaxlen; - sc = ifp->if_softc; - callout_stop(&sc->sc_tmo); - callout_stop(&sc->sc_bulk_tmo); - callout_stop(&sc->sc_bulkfail_tmo); +struct if_clone pfsync_cloner = + IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy); - bpfdetach(ifp); - if_detach(ifp); - LIST_REMOVE(sc, sc_next); - kfree(sc, M_PFSYNC); +void +pfsyncattach(int npfsync) +{ + if_clone_attach(&pfsync_cloner); } - -static int -pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused) +int +pfsync_clone_create(struct if_clone *ifc, int unit) { - struct pfsync_softc *sc; struct ifnet *ifp; - MALLOC(sc, struct pfsync_softc *, sizeof(*sc), M_PFSYNC, - M_WAITOK|M_ZERO); + if (unit != 0) + return (EINVAL); pfsync_sync_ok = 1; - sc->sc_mbuf = NULL; - sc->sc_mbuf_net = NULL; - sc->sc_statep.s = NULL; - sc->sc_statep_net.s = NULL; - sc->sc_maxupdates = 128; - sc->sc_sendaddr.s_addr = htonl(INADDR_PFSYNC_GROUP); - sc->sc_ureq_received = 0; - sc->sc_ureq_sent = 0; - - ifp = &sc->sc_if; - if_initname(ifp, ifc->ifc_name, unit); + if ((pfsyncif = malloc(sizeof(*pfsyncif), M_DEVBUF, M_WAITOK)) == NULL) + return (ENOMEM); + bzero(pfsyncif, sizeof(*pfsyncif)); + pfsyncif->sc_mbuf = NULL; + pfsyncif->sc_mbuf_net = NULL; + pfsyncif->sc_mbuf_tdb = NULL; + pfsyncif->sc_statep.s = NULL; + pfsyncif->sc_statep_net.s = NULL; + pfsyncif->sc_statep_tdb.t = NULL; + pfsyncif->sc_maxupdates = 128; + pfsyncif->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; + pfsyncif->sc_sendaddr.s_addr = INADDR_PFSYNC_GROUP; + pfsyncif->sc_ureq_received = 0; + pfsyncif->sc_ureq_sent = 0; + pfsyncif->sc_bulk_send_next = NULL; + pfsyncif->sc_bulk_terminator = NULL; + ifp = &pfsyncif->sc_if; + snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit); + ifp->if_softc = pfsyncif; ifp->if_ioctl = pfsyncioctl; ifp->if_output = pfsyncoutput; ifp->if_start = pfsyncstart; ifp->if_type = IFT_PFSYNC; ifp->if_snd.ifq_maxlen = ifqmaxlen; ifp->if_hdrlen = PFSYNC_HDRLEN; - ifp->if_baudrate = IF_Mbps(100); - ifp->if_softc = sc; - pfsync_setmtu(sc, MCLBYTES); - callout_init(&sc->sc_tmo); - callout_init(&sc->sc_bulk_tmo); - callout_init(&sc->sc_bulkfail_tmo); - if_attach(&sc->sc_if, NULL); + pfsync_setmtu(pfsyncif, ETHERMTU); + timeout_set(&pfsyncif->sc_tmo, pfsync_timeout, pfsyncif); + timeout_set(&pfsyncif->sc_tdb_tmo, pfsync_tdb_timeout, pfsyncif); + timeout_set(&pfsyncif->sc_bulk_tmo, pfsync_bulk_update, pfsyncif); + timeout_set(&pfsyncif->sc_bulkfail_tmo, pfsync_bulkfail, pfsyncif); + if_attach(ifp); + if_alloc_sadl(ifp); + +#if NCARP > 0 + if_addgroup(ifp, "carp"); +#endif + +#if NBPFILTER > 0 + bpfattach(&pfsyncif->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN); +#endif - LIST_INSERT_HEAD(&pfsync_list, sc, sc_next); - bpfattach(&sc->sc_if, DLT_PFSYNC, PFSYNC_HDRLEN); + return (0); +} +int +pfsync_clone_destroy(struct ifnet *ifp) +{ +#if NBPFILTER > 0 + bpfdetach(ifp); +#endif + if_detach(ifp); + free(pfsyncif, M_DEVBUF); + pfsyncif = NULL; return (0); } @@ -172,51 +187,92 @@ pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused) void pfsyncstart(struct ifnet *ifp) { - crit_enter(); - IF_DROP(&ifp->if_snd); - IF_DRAIN(&ifp->if_snd); - crit_exit(); + struct mbuf *m; + int s; + + for (;;) { + s = splnet(); + IF_DROP(&ifp->if_snd); + IF_DEQUEUE(&ifp->if_snd, m); + splx(s); + + if (m == NULL) + return; + else + m_freem(m); + } +} + +int +pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, + struct pf_state_peer *d) +{ + if (s->scrub.scrub_flag && d->scrub == NULL) { + d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); + if (d->scrub == NULL) + return (ENOMEM); + bzero(d->scrub, sizeof(*d->scrub)); + } + + return (0); } int -pfsync_insert_net_state(struct pfsync_state *sp) +pfsync_insert_net_state(struct pfsync_state *sp, u_int8_t chksum_flag) { struct pf_state *st = NULL; struct pf_rule *r = NULL; struct pfi_kif *kif; if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) { - kprintf("pfsync_insert_net_state: invalid creator id:" - " %08" PRIx32 "\n", ntohl(sp->creatorid)); + printf("pfsync_insert_net_state: invalid creator id:" + " %08x\n", ntohl(sp->creatorid)); return (EINVAL); } - kif = pfi_lookup_create(sp->ifname); + kif = pfi_kif_get(sp->ifname); if (kif == NULL) { if (pf_status.debug >= PF_DEBUG_MISC) - kprintf("pfsync_insert_net_state: " + printf("pfsync_insert_net_state: " "unknown interface: %s\n", sp->ifname); /* skip this state */ return (0); } /* - * Just use the default rule until we have infrastructure to find the - * best matching rule. + * If the ruleset checksums match, it's safe to associate the state + * with the rule of that number. */ - r = &pf_default_rule; + if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && chksum_flag) + r = pf_main_ruleset.rules[ + PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; + else + r = &pf_default_rule; if (!r->max_states || r->states < r->max_states) st = pool_get(&pf_state_pl, PR_NOWAIT); if (st == NULL) { - pfi_maybe_destroy(kif); + pfi_kif_unref(kif, PFI_KIF_REF_NONE); return (ENOMEM); } bzero(st, sizeof(*st)); + /* allocate memory for scrub info */ + if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || + pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) { + pfi_kif_unref(kif, PFI_KIF_REF_NONE); + if (st->src.scrub) + pool_put(&pf_state_scrub_pl, st->src.scrub); + pool_put(&pf_state_pl, st); + return (ENOMEM); + } + st->rule.ptr = r; /* XXX get pointers to nat_rule and anchor */ + /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ + r->states++; + /* fill in the rest of the state entry */ pf_state_host_ntoh(&sp->lan, &st->lan); pf_state_host_ntoh(&sp->gwy, &st->gwy); @@ -226,8 +282,7 @@ pfsync_insert_net_state(struct pfsync_state *sp) pf_state_peer_ntoh(&sp->dst, &st->dst); bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); - st->hash = pf_state_hash(st); - st->creation = ntohl(sp->creation) + time_second; + st->creation = time_second - ntohl(sp->creation); st->expire = ntohl(sp->expire) + time_second; st->af = sp->af; @@ -239,11 +294,16 @@ pfsync_insert_net_state(struct pfsync_state *sp) bcopy(sp->id, &st->id, sizeof(st->id)); st->creatorid = sp->creatorid; - st->sync_flags = sp->sync_flags | PFSTATE_FROMSYNC; - + st->sync_flags = PFSTATE_FROMSYNC; if (pf_insert_state(kif, st)) { - pfi_maybe_destroy(kif); + pfi_kif_unref(kif, PFI_KIF_REF_NONE); + /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */ + r->states--; + if (st->dst.scrub) + pool_put(&pf_state_scrub_pl, st->dst.scrub); + if (st->src.scrub) + pool_put(&pf_state_scrub_pl, st->src.scrub); pool_put(&pf_state_pl, st); return (EINVAL); } @@ -256,22 +316,25 @@ pfsync_input(struct mbuf *m, ...) { struct ip *ip = mtod(m, struct ip *); struct pfsync_header *ph; - struct pfsync_softc *sc = LIST_FIRST(&pfsync_list); - struct pf_state *st, key; + struct pfsync_softc *sc = pfsyncif; + struct pf_state *st; + struct pf_state_cmp key; struct pfsync_state *sp; struct pfsync_state_upd *up; struct pfsync_state_del *dp; struct pfsync_state_clr *cp; struct pfsync_state_upd_req *rup; struct pfsync_state_bus *bus; + struct pfsync_tdb *pt; struct in_addr src; struct mbuf *mp; - int iplen, action, error, i, count, offp; + int iplen, action, error, i, s, count, offp, sfail, stale = 0; + u_int8_t chksum_flag = 0; pfsyncstats.pfsyncs_ipackets++; /* verify that we have a sync interface configured */ - if (!sc->sc_sync_ifp || !pf_status.running) + if (!sc || !sc->sc_sync_ifp || !pf_status.running) goto done; /* verify that the packet came in on the right interface */ @@ -320,8 +383,12 @@ pfsync_input(struct mbuf *m, ...) /* Cheaper to grab this now than having to mess with mbufs later */ src = ip->ip_src; + if (!bcmp(&ph->pf_chksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) + chksum_flag++; + switch (action) { case PFSYNC_ACT_CLR: { + struct pf_state *nexts; struct pfi_kif *kif; u_int32_t creatorid; if ((mp = m_pulldown(m, iplen + sizeof(*ph), @@ -332,29 +399,32 @@ pfsync_input(struct mbuf *m, ...) cp = (struct pfsync_state_clr *)(mp->m_data + offp); creatorid = cp->creatorid; - crit_enter(); + s = splsoftnet(); if (cp->ifname[0] == '\0') { - RB_FOREACH(st, pf_state_tree_id, &tree_id) { - if (st->creatorid == creatorid) - st->timeout = PFTM_PURGE; + for (st = RB_MIN(pf_state_tree_id, &tree_id); + st; st = nexts) { + nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); + if (st->creatorid == creatorid) { + st->sync_flags |= PFSTATE_FROMSYNC; + pf_unlink_state(st); + } } } else { - kif = pfi_lookup_if(cp->ifname); - if (kif == NULL) { - if (pf_status.debug >= PF_DEBUG_MISC) - kprintf("pfsync_input: PFSYNC_ACT_CLR " - "bad interface: %s\n", cp->ifname); - crit_exit(); - goto done; + if ((kif = pfi_kif_get(cp->ifname)) == NULL) { + splx(s); + return; } - RB_FOREACH(st, pf_state_tree_lan_ext, - &kif->pfik_lan_ext) { - if (st->creatorid == creatorid) - st->timeout = PFTM_PURGE; + for (st = RB_MIN(pf_state_tree_lan_ext, + &kif->pfik_lan_ext); st; st = nexts) { + nexts = RB_NEXT(pf_state_tree_lan_ext, + &kif->pfik_lan_ext, st); + if (st->creatorid == creatorid) { + st->sync_flags |= PFSTATE_FROMSYNC; + pf_unlink_state(st); + } } } - pf_purge_expired_states(); - crit_exit(); + splx(s); break; } @@ -365,7 +435,7 @@ pfsync_input(struct mbuf *m, ...) return; } - crit_enter(); + s = splsoftnet(); for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); i < count; i++, sp++) { /* check for invalid values */ @@ -375,21 +445,22 @@ pfsync_input(struct mbuf *m, ...) sp->direction > PF_OUT || (sp->af != AF_INET && sp->af != AF_INET6)) { if (pf_status.debug >= PF_DEBUG_MISC) - kprintf("pfsync_insert: PFSYNC_ACT_INS: " + printf("pfsync_insert: PFSYNC_ACT_INS: " "invalid value\n"); pfsyncstats.pfsyncs_badstate++; continue; } - if ((error = pfsync_insert_net_state(sp))) { + if ((error = pfsync_insert_net_state(sp, + chksum_flag))) { if (error == ENOMEM) { - crit_exit(); + splx(s); goto done; } continue; } } - crit_exit(); + splx(s); break; case PFSYNC_ACT_UPD: if ((mp = m_pulldown(m, iplen + sizeof(*ph), @@ -398,15 +469,17 @@ pfsync_input(struct mbuf *m, ...) return; } - crit_enter(); + s = splsoftnet(); for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); i < count; i++, sp++) { + int flags = PFSYNC_FLAG_STALE; + /* check for invalid values */ if (sp->timeout >= PFTM_MAX || sp->src.state > PF_TCPS_PROXY_DST || sp->dst.state > PF_TCPS_PROXY_DST) { if (pf_status.debug >= PF_DEBUG_MISC) - kprintf("pfsync_insert: PFSYNC_ACT_UPD: " + printf("pfsync_insert: PFSYNC_ACT_UPD: " "invalid value\n"); pfsyncstats.pfsyncs_badstate++; continue; @@ -418,17 +491,79 @@ pfsync_input(struct mbuf *m, ...) st = pf_find_state_byid(&key); if (st == NULL) { /* insert the update */ - if (pfsync_insert_net_state(sp)) + if (pfsync_insert_net_state(sp, chksum_flag)) pfsyncstats.pfsyncs_badstate++; continue; } + sfail = 0; + if (st->proto == IPPROTO_TCP) { + /* + * The state should never go backwards except + * for syn-proxy states. Neither should the + * sequence window slide backwards. + */ + if (st->src.state > sp->src.state && + (st->src.state < PF_TCPS_PROXY_SRC || + sp->src.state >= PF_TCPS_PROXY_SRC)) + sfail = 1; + else if (SEQ_GT(st->src.seqlo, + ntohl(sp->src.seqlo))) + sfail = 3; + else if (st->dst.state > sp->dst.state) { + /* There might still be useful + * information about the src state here, + * so import that part of the update, + * then "fail" so we send the updated + * state back to the peer who is missing + * our what we know. */ + pf_state_peer_ntoh(&sp->src, &st->src); + /* XXX do anything with timeouts? */ + sfail = 7; + flags = 0; + } else if (st->dst.state >= TCPS_SYN_SENT && + SEQ_GT(st->dst.seqlo, ntohl(sp->dst.seqlo))) + sfail = 4; + } else { + /* + * Non-TCP protocol state machine always go + * forwards + */ + if (st->src.state > sp->src.state) + sfail = 5; + else if (st->dst.state > sp->dst.state) + sfail = 6; + } + if (sfail) { + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync: %s stale update " + "(%d) id: %016llx " + "creatorid: %08x\n", + (sfail < 7 ? "ignoring" + : "partial"), sfail, + betoh64(st->id), + ntohl(st->creatorid)); + pfsyncstats.pfsyncs_badstate++; + + if (!(sp->sync_flags & PFSTATE_STALE)) { + /* we have a better state, send it */ + if (sc->sc_mbuf != NULL && !stale) + pfsync_sendout(sc); + stale++; + if (!st->sync_flags) + pfsync_pack_state( + PFSYNC_ACT_UPD, st, flags); + } + continue; + } + pfsync_alloc_scrub_memory(&sp->dst, &st->dst); pf_state_peer_ntoh(&sp->src, &st->src); pf_state_peer_ntoh(&sp->dst, &st->dst); st->expire = ntohl(sp->expire) + time_second; st->timeout = sp->timeout; - } - crit_exit(); + if (stale && sc->sc_mbuf != NULL) + pfsync_sendout(sc); + splx(s); break; /* * It's not strictly necessary for us to support the "uncompressed" @@ -441,7 +576,7 @@ pfsync_input(struct mbuf *m, ...) return; } - crit_enter(); + s = splsoftnet(); for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); i < count; i++, sp++) { bcopy(sp->id, &key.id, sizeof(key.id)); @@ -452,16 +587,10 @@ pfsync_input(struct mbuf *m, ...) pfsyncstats.pfsyncs_badstate++; continue; } - /* - * XXX - * pf_purge_expired_states() is expensive, - * we really want to purge the state directly. - */ - st->timeout = PFTM_PURGE; st->sync_flags |= PFSTATE_FROMSYNC; + pf_unlink_state(st); } - pf_purge_expired_states(); - crit_exit(); + splx(s); break; case PFSYNC_ACT_UPD_C: { int update_requested = 0; @@ -472,7 +601,7 @@ pfsync_input(struct mbuf *m, ...) return; } - crit_enter(); + s = splsoftnet(); for (i = 0, up = (struct pfsync_state_upd *)(mp->m_data + offp); i < count; i++, up++) { /* check for invalid values */ @@ -480,7 +609,7 @@ pfsync_input(struct mbuf *m, ...) up->src.state > PF_TCPS_PROXY_DST || up->dst.state > PF_TCPS_PROXY_DST) { if (pf_status.debug >= PF_DEBUG_MISC) - kprintf("pfsync_insert: " + printf("pfsync_insert: " "PFSYNC_ACT_UPD_C: " "invalid value\n"); pfsyncstats.pfsyncs_badstate++; @@ -493,19 +622,74 @@ pfsync_input(struct mbuf *m, ...) st = pf_find_state_byid(&key); if (st == NULL) { /* We don't have this state. Ask for it. */ - pfsync_request_update(up, &src); + error = pfsync_request_update(up, &src); + if (error == ENOMEM) { + splx(s); + goto done; + } update_requested = 1; pfsyncstats.pfsyncs_badstate++; continue; } + sfail = 0; + if (st->proto == IPPROTO_TCP) { + /* + * The state should never go backwards except + * for syn-proxy states. Neither should the + * sequence window slide backwards. + */ + if (st->src.state > up->src.state && + (st->src.state < PF_TCPS_PROXY_SRC || + up->src.state >= PF_TCPS_PROXY_SRC)) + sfail = 1; + else if (st->dst.state > up->dst.state) + sfail = 2; + else if (SEQ_GT(st->src.seqlo, + ntohl(up->src.seqlo))) + sfail = 3; + else if (st->dst.state >= TCPS_SYN_SENT && + SEQ_GT(st->dst.seqlo, ntohl(up->dst.seqlo))) + sfail = 4; + } else { + /* + * Non-TCP protocol state machine always go + * forwards + */ + if (st->src.state > up->src.state) + sfail = 5; + else if (st->dst.state > up->dst.state) + sfail = 6; + } + if (sfail) { + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync: ignoring stale update " + "(%d) id: %016llx " + "creatorid: %08x\n", sfail, + betoh64(st->id), + ntohl(st->creatorid)); + pfsyncstats.pfsyncs_badstate++; + + /* we have a better state, send it out */ + if ((!stale || update_requested) && + sc->sc_mbuf != NULL) { + pfsync_sendout(sc); + update_requested = 0; + } + stale++; + if (!st->sync_flags) + pfsync_pack_state(PFSYNC_ACT_UPD, st, + PFSYNC_FLAG_STALE); + continue; + } + pfsync_alloc_scrub_memory(&up->dst, &st->dst); pf_state_peer_ntoh(&up->src, &st->src); pf_state_peer_ntoh(&up->dst, &st->dst); st->expire = ntohl(up->expire) + time_second; st->timeout = up->timeout; } - if (update_requested) + if ((update_requested || stale) && sc->sc_mbuf) pfsync_sendout(sc); - crit_exit(); + splx(s); break; } case PFSYNC_ACT_DEL_C: @@ -515,7 +699,7 @@ pfsync_input(struct mbuf *m, ...) return; } - crit_enter(); + s = splsoftnet(); for (i = 0, dp = (struct pfsync_state_del *)(mp->m_data + offp); i < count; i++, dp++) { bcopy(dp->id, &key.id, sizeof(key.id)); @@ -526,16 +710,10 @@ pfsync_input(struct mbuf *m, ...) pfsyncstats.pfsyncs_badstate++; continue; } - /* - * XXX - * pf_purge_expired_states() is expensive, - * we really want to purge the state directly. - */ - st->timeout = PFTM_PURGE; st->sync_flags |= PFSTATE_FROMSYNC; + pf_unlink_state(st); } - pf_purge_expired_states(); - crit_exit(); + splx(s); break; case PFSYNC_ACT_INS_F: case PFSYNC_ACT_DEL_F: @@ -548,8 +726,7 @@ pfsync_input(struct mbuf *m, ...) return; } - crit_enter(); - /* XXX send existing. pfsync_pack_state should handle this. */ + s = splsoftnet(); if (sc->sc_mbuf != NULL) pfsync_sendout(sc); for (i = 0, @@ -559,26 +736,30 @@ pfsync_input(struct mbuf *m, ...) key.creatorid = rup->creatorid; if (key.id == 0 && key.creatorid == 0) { - sc->sc_ureq_received = mycpu->gd_time_seconds; + sc->sc_ureq_received = time_uptime; + if (sc->sc_bulk_send_next == NULL) + sc->sc_bulk_send_next = + TAILQ_FIRST(&state_list); + sc->sc_bulk_terminator = sc->sc_bulk_send_next; if (pf_status.debug >= PF_DEBUG_MISC) - kprintf("pfsync: received " + printf("pfsync: received " "bulk update request\n"); pfsync_send_bus(sc, PFSYNC_BUS_START); - callout_reset(&sc->sc_bulk_tmo, 1 * hz, - pfsync_bulk_update, - LIST_FIRST(&pfsync_list)); + timeout_add(&sc->sc_bulk_tmo, 1 * hz); } else { st = pf_find_state_byid(&key); if (st == NULL) { pfsyncstats.pfsyncs_badstate++; continue; } - pfsync_pack_state(PFSYNC_ACT_UPD, st, 0); + if (!st->sync_flags) + pfsync_pack_state(PFSYNC_ACT_UPD, + st, 0); } } if (sc->sc_mbuf != NULL) pfsync_sendout(sc); - crit_exit(); + splx(s); break; case PFSYNC_ACT_BUS: /* If we're not waiting for a bulk update, who cares. */ @@ -593,33 +774,48 @@ pfsync_input(struct mbuf *m, ...) bus = (struct pfsync_state_bus *)(mp->m_data + offp); switch (bus->status) { case PFSYNC_BUS_START: - callout_reset(&sc->sc_bulkfail_tmo, + timeout_add(&sc->sc_bulkfail_tmo, pf_pool_limits[PF_LIMIT_STATES].limit / - (PFSYNC_BULKPACKETS * sc->sc_maxcount), - pfsync_bulkfail, LIST_FIRST(&pfsync_list)); + (PFSYNC_BULKPACKETS * sc->sc_maxcount)); if (pf_status.debug >= PF_DEBUG_MISC) - kprintf("pfsync: received bulk " + printf("pfsync: received bulk " "update start\n"); break; case PFSYNC_BUS_END: - if (mycpu->gd_time_seconds - ntohl(bus->endtime) >= + if (time_uptime - ntohl(bus->endtime) >= sc->sc_ureq_sent) { /* that's it, we're happy */ sc->sc_ureq_sent = 0; sc->sc_bulk_tries = 0; - callout_stop(&sc->sc_bulkfail_tmo); + timeout_del(&sc->sc_bulkfail_tmo); +#if NCARP > 0 + if (!pfsync_sync_ok) + carp_group_demote_adj(&sc->sc_if, -1); +#endif pfsync_sync_ok = 1; if (pf_status.debug >= PF_DEBUG_MISC) - kprintf("pfsync: received valid " + printf("pfsync: received valid " "bulk update end\n"); } else { if (pf_status.debug >= PF_DEBUG_MISC) - kprintf("pfsync: received invalid " + printf("pfsync: received invalid " "bulk update end: bad timestamp\n"); } break; } break; + case PFSYNC_ACT_TDB_UPD: + if ((mp = m_pulldown(m, iplen + sizeof(*ph), + count * sizeof(*pt), &offp)) == NULL) { + pfsyncstats.pfsyncs_badlen++; + return; + } + s = splsoftnet(); + for (i = 0, pt = (struct pfsync_tdb *)(mp->m_data + offp); + i < count; i++, pt++) + pfsync_update_net_tdb(pt); + splx(s); + break; } done: @@ -637,14 +833,15 @@ pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, /* ARGSUSED */ int -pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr) +pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { + struct proc *p = curproc; struct pfsync_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; struct ip_moptions *imo = &sc->sc_imo; struct pfsyncreq pfsyncr; struct ifnet *sifp; - int error; + int s, error; switch (cmd) { case SIOCSIFADDR: @@ -661,49 +858,59 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr) return (EINVAL); if (ifr->ifr_mtu > MCLBYTES) ifr->ifr_mtu = MCLBYTES; - crit_enter(); + s = splnet(); if (ifr->ifr_mtu < ifp->if_mtu) pfsync_sendout(sc); pfsync_setmtu(sc, ifr->ifr_mtu); - crit_exit(); + splx(s); break; case SIOCGETPFSYNC: bzero(&pfsyncr, sizeof(pfsyncr)); if (sc->sc_sync_ifp) - strlcpy(pfsyncr.pfsyncr_syncif, + strlcpy(pfsyncr.pfsyncr_syncdev, sc->sc_sync_ifp->if_xname, IFNAMSIZ); + pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; if ((error = copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)))) return (error); break; case SIOCSETPFSYNC: - if ((error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY)) != 0) + if ((error = suser(p, p->p_acflag)) != 0) return (error); if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) return (error); + if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) + sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; + else + sc->sc_sync_peer.s_addr = + pfsyncr.pfsyncr_syncpeer.s_addr; + if (pfsyncr.pfsyncr_maxupdates > 255) return (EINVAL); sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; - if (pfsyncr.pfsyncr_syncif[0] == 0) { + if (pfsyncr.pfsyncr_syncdev[0] == 0) { sc->sc_sync_ifp = NULL; if (sc->sc_mbuf_net != NULL) { /* Don't keep stale pfsync packets around. */ - crit_enter(); + s = splnet(); m_freem(sc->sc_mbuf_net); sc->sc_mbuf_net = NULL; sc->sc_statep_net.s = NULL; - crit_exit(); + splx(s); + } + if (imo->imo_num_memberships > 0) { + in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); + imo->imo_multicast_ifp = NULL; } break; } - if ((sifp = ifunit(pfsyncr.pfsyncr_syncif)) == NULL) + + if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) return (EINVAL); - else if (sifp == sc->sc_sync_ifp) - break; - crit_enter(); + s = splnet(); if (sifp->if_mtu < sc->sc_if.if_mtu || (sc->sc_sync_ifp != NULL && sifp->if_mtu < sc->sc_sync_ifp->if_mtu) || @@ -718,32 +925,50 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr) imo->imo_multicast_ifp = NULL; } - if (sc->sc_sync_ifp) { + if (sc->sc_sync_ifp && + sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { struct in_addr addr; - addr.s_addr = htonl(INADDR_PFSYNC_GROUP); - /* XXX do we only use one group? Also see above */ + if (!(sc->sc_sync_ifp->if_flags & IFF_MULTICAST)) { + sc->sc_sync_ifp = NULL; + splx(s); + return (EADDRNOTAVAIL); + } + + addr.s_addr = INADDR_PFSYNC_GROUP; + if ((imo->imo_membership[0] = in_addmulti(&addr, sc->sc_sync_ifp)) == NULL) { - crit_exit(); + sc->sc_sync_ifp = NULL; + splx(s); return (ENOBUFS); } imo->imo_num_memberships++; imo->imo_multicast_ifp = sc->sc_sync_ifp; imo->imo_multicast_ttl = PFSYNC_DFLTTL; imo->imo_multicast_loop = 0; + } + if (sc->sc_sync_ifp || + sc->sc_sendaddr.s_addr != INADDR_PFSYNC_GROUP) { /* Request a full state table update. */ - sc->sc_ureq_sent = mycpu->gd_time_seconds; + sc->sc_ureq_sent = time_uptime; +#if NCARP > 0 + if (pfsync_sync_ok) + carp_group_demote_adj(&sc->sc_if, 1); +#endif pfsync_sync_ok = 0; if (pf_status.debug >= PF_DEBUG_MISC) - kprintf("pfsync: requesting bulk update\n"); - callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, - pfsync_bulkfail, LIST_FIRST(&pfsync_list)); - pfsync_request_update(NULL, NULL); + printf("pfsync: requesting bulk update\n"); + timeout_add(&sc->sc_bulkfail_tmo, 5 * hz); + error = pfsync_request_update(NULL, NULL); + if (error == ENOMEM) { + splx(s); + return (ENOMEM); + } pfsync_sendout(sc); } - crit_exit(); + splx(s); break; @@ -779,7 +1004,7 @@ pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp) struct mbuf *m; int len; - MGETHDR(m, MB_DONTWAIT, MT_DATA); + MGETHDR(m, M_DONTWAIT, MT_DATA); if (m == NULL) { sc->sc_if.if_oerrors++; return (NULL); @@ -806,6 +1031,10 @@ pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp) len = sizeof(struct pfsync_header) + sizeof(struct pfsync_state_bus); break; + case PFSYNC_ACT_TDB_UPD: + len = (sc->sc_maxcount * sizeof(struct pfsync_tdb)) + + sizeof(struct pfsync_header); + break; default: len = (sc->sc_maxcount * sizeof(struct pfsync_state)) + sizeof(struct pfsync_header); @@ -813,7 +1042,7 @@ pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp) } if (len > MHLEN) { - MCLGET(m, MB_DONTWAIT); + MCLGET(m, M_DONTWAIT); if ((m->m_flags & M_EXT) == 0) { m_free(m); sc->sc_if.if_oerrors++; @@ -830,32 +1059,42 @@ pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp) h->af = 0; h->count = 0; h->action = action; + if (action != PFSYNC_ACT_TDB_UPD) + bcopy(&pf_status.pf_chksum, &h->pf_chksum, + PF_MD5_DIGEST_LENGTH); *sp = (void *)((char *)h + PFSYNC_HDRLEN); - callout_reset(&sc->sc_tmo, hz, pfsync_timeout, - LIST_FIRST(&pfsync_list)); + if (action == PFSYNC_ACT_TDB_UPD) + timeout_add(&sc->sc_tdb_tmo, hz); + else + timeout_add(&sc->sc_tmo, hz); return (m); } int -pfsync_pack_state(u_int8_t action, struct pf_state *st, int compress) +pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags) { - struct ifnet *ifp = &(LIST_FIRST(&pfsync_list))->sc_if; - struct pfsync_softc *sc = ifp->if_softc; + struct ifnet *ifp = NULL; + struct pfsync_softc *sc = pfsyncif; struct pfsync_header *h, *h_net; struct pfsync_state *sp = NULL; struct pfsync_state_upd *up = NULL; struct pfsync_state_del *dp = NULL; struct pf_rule *r; u_long secs; - int ret = 0; + int s, ret = 0; u_int8_t i = 255, newaction = 0; + if (sc == NULL) + return (0); + ifp = &sc->sc_if; + /* * If a packet falls in the forest and there's nobody around to * hear, does it make a sound? */ - if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL) { + if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL && + sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { /* Don't leave any stale pfsync packets hanging around. */ if (sc->sc_mbuf != NULL) { m_freem(sc->sc_mbuf); @@ -868,11 +1107,11 @@ pfsync_pack_state(u_int8_t action, struct pf_state *st, int compress) if (action >= PFSYNC_ACT_MAX) return (EINVAL); - crit_enter(); + s = splnet(); if (sc->sc_mbuf == NULL) { if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action, (void *)&sc->sc_statep.s)) == NULL) { - crit_exit(); + splx(s); return (ENOMEM); } h = mtod(sc->sc_mbuf, struct pfsync_header *); @@ -882,7 +1121,7 @@ pfsync_pack_state(u_int8_t action, struct pf_state *st, int compress) pfsync_sendout(sc); if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action, (void *)&sc->sc_statep.s)) == NULL) { - crit_exit(); + splx(s); return (ENOMEM); } h = mtod(sc->sc_mbuf, struct pfsync_header *); @@ -911,9 +1150,7 @@ pfsync_pack_state(u_int8_t action, struct pf_state *st, int compress) secs = time_second; - st->pfsync_time = mycpu->gd_time_seconds; - TAILQ_REMOVE(&state_updates, st, u.s.entry_updates); - TAILQ_INSERT_TAIL(&state_updates, st, u.s.entry_updates); + st->pfsync_time = time_uptime; if (sp == NULL) { /* not a "duplicate" update */ @@ -935,10 +1172,10 @@ pfsync_pack_state(u_int8_t action, struct pf_state *st, int compress) bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); sp->creation = htonl(secs - st->creation); - sp->packets[0] = htonl(st->packets[0]); - sp->packets[1] = htonl(st->packets[1]); - sp->bytes[0] = htonl(st->bytes[0]); - sp->bytes[1] = htonl(st->bytes[1]); + pf_state_counter_hton(st->packets[0], sp->packets[0]); + pf_state_counter_hton(st->packets[1], sp->packets[1]); + pf_state_counter_hton(st->bytes[0], sp->bytes[0]); + pf_state_counter_hton(st->bytes[1], sp->bytes[1]); if ((r = st->rule.ptr) == NULL) sp->rule = htonl(-1); else @@ -954,7 +1191,8 @@ pfsync_pack_state(u_int8_t action, struct pf_state *st, int compress) sp->allow_opts = st->allow_opts; sp->timeout = st->timeout; - sp->sync_flags = st->sync_flags & PFSTATE_NOSYNC; + if (flags & PFSYNC_FLAG_STALE) + sp->sync_flags |= PFSTATE_STALE; } pf_state_peer_hton(&st->src, &sp->src); @@ -966,7 +1204,7 @@ pfsync_pack_state(u_int8_t action, struct pf_state *st, int compress) sp->expire = htonl(st->expire - secs); /* do we need to build "compressed" actions for network transfer? */ - if (sc->sc_sync_ifp && compress) { + if (sc->sc_sync_ifp && flags & PFSYNC_FLAG_COMPRESS) { switch (action) { case PFSYNC_ACT_UPD: newaction = PFSYNC_ACT_UPD_C; @@ -984,7 +1222,7 @@ pfsync_pack_state(u_int8_t action, struct pf_state *st, int compress) if (sc->sc_mbuf_net == NULL) { if ((sc->sc_mbuf_net = pfsync_get_mbuf(sc, newaction, (void *)&sc->sc_statep_net.s)) == NULL) { - crit_exit(); + splx(s); return (ENOMEM); } } @@ -1028,7 +1266,7 @@ pfsync_pack_state(u_int8_t action, struct pf_state *st, int compress) (sc->sc_maxupdates && (sp->updates >= sc->sc_maxupdates))) ret = pfsync_sendout(sc); - crit_exit(); + splx(s); return (ret); } @@ -1036,26 +1274,28 @@ pfsync_pack_state(u_int8_t action, struct pf_state *st, int compress) int pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src) { - struct ifnet *ifp = &(LIST_FIRST(&pfsync_list))->sc_if; + struct ifnet *ifp = NULL; struct pfsync_header *h; - struct pfsync_softc *sc = ifp->if_softc; + struct pfsync_softc *sc = pfsyncif; struct pfsync_state_upd_req *rup; int ret = 0; + if (sc == NULL) + return (0); + + ifp = &sc->sc_if; if (sc->sc_mbuf == NULL) { if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, - (void *)&sc->sc_statep.s)) == NULL) { + (void *)&sc->sc_statep.s)) == NULL) return (ENOMEM); - } h = mtod(sc->sc_mbuf, struct pfsync_header *); } else { h = mtod(sc->sc_mbuf, struct pfsync_header *); if (h->action != PFSYNC_ACT_UREQ) { pfsync_sendout(sc); if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, - (void *)&sc->sc_statep.s)) == NULL) { + (void *)&sc->sc_statep.s)) == NULL) return (ENOMEM); - } h = mtod(sc->sc_mbuf, struct pfsync_header *); } } @@ -1080,17 +1320,21 @@ pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src) int pfsync_clear_states(u_int32_t creatorid, char *ifname) { - struct ifnet *ifp = &(LIST_FIRST(&pfsync_list))->sc_if; - struct pfsync_softc *sc = ifp->if_softc; + struct ifnet *ifp = NULL; + struct pfsync_softc *sc = pfsyncif; struct pfsync_state_clr *cp; - int ret; + int s, ret; - crit_enter(); + if (sc == NULL) + return (0); + + ifp = &sc->sc_if; + s = splnet(); if (sc->sc_mbuf != NULL) pfsync_sendout(sc); if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR, (void *)&sc->sc_statep.c)) == NULL) { - crit_exit(); + splx(s); return (ENOMEM); } sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*cp); @@ -1100,7 +1344,7 @@ pfsync_clear_states(u_int32_t creatorid, char *ifname) strlcpy(cp->ifname, ifname, IFNAMSIZ); ret = (pfsync_sendout(sc)); - crit_exit(); + splx(s); return (ret); } @@ -1108,12 +1352,25 @@ void pfsync_timeout(void *v) { struct pfsync_softc *sc = v; + int s; - crit_enter(); + s = splnet(); pfsync_sendout(sc); - crit_exit(); + splx(s); } +void +pfsync_tdb_timeout(void *v) +{ + struct pfsync_softc *sc = v; + int s; + + s = splnet(); + pfsync_tdb_sendout(sc); + splx(s); +} + +/* This must be called in splnet() */ void pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status) { @@ -1129,7 +1386,7 @@ pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status) bus = sc->sc_statep.b; bus->creatorid = pf_status.hostid; bus->status = status; - bus->endtime = htonl(mycpu->gd_time_seconds - sc->sc_ureq_received); + bus->endtime = htonl(time_uptime - sc->sc_ureq_received); pfsync_sendout(sc); } } @@ -1138,10 +1395,10 @@ void pfsync_bulk_update(void *v) { struct pfsync_softc *sc = v; - int i = 0; + int s, i = 0; struct pf_state *state; - crit_enter(); + s = splnet(); if (sc->sc_mbuf != NULL) pfsync_sendout(sc); @@ -1149,65 +1406,89 @@ pfsync_bulk_update(void *v) * Grab at most PFSYNC_BULKPACKETS worth of states which have not * been sent since the latest request was made. */ - while ((state = TAILQ_FIRST(&state_updates)) != NULL && - ++i < (sc->sc_maxcount * PFSYNC_BULKPACKETS)) { - if (state->pfsync_time > sc->sc_ureq_received) { - /* we're done */ - pfsync_send_bus(sc, PFSYNC_BUS_END); - sc->sc_ureq_received = 0; - callout_stop(&sc->sc_bulk_tmo); - if (pf_status.debug >= PF_DEBUG_MISC) - kprintf("pfsync: bulk update complete\n"); - break; - } else { - /* send an update and move to end of list */ - if (!state->sync_flags) + state = sc->sc_bulk_send_next; + if (state) + do { + /* send state update if syncable and not already sent */ + if (!state->sync_flags + && state->timeout < PFTM_MAX + && state->pfsync_time <= sc->sc_ureq_received) { pfsync_pack_state(PFSYNC_ACT_UPD, state, 0); - state->pfsync_time = mycpu->gd_time_seconds; - TAILQ_REMOVE(&state_updates, state, u.s.entry_updates); - TAILQ_INSERT_TAIL(&state_updates, state, - u.s.entry_updates); - - /* look again for more in a bit */ - callout_reset(&sc->sc_bulk_tmo, 1, pfsync_timeout, - LIST_FIRST(&pfsync_list)); - } + i++; + } + + /* figure next state to send */ + state = TAILQ_NEXT(state, u.s.entry_list); + + /* wrap to start of list if we hit the end */ + if (!state) + state = TAILQ_FIRST(&state_list); + } while (i < sc->sc_maxcount * PFSYNC_BULKPACKETS && + state != sc->sc_bulk_terminator); + + if (!state || state == sc->sc_bulk_terminator) { + /* we're done */ + pfsync_send_bus(sc, PFSYNC_BUS_END); + sc->sc_ureq_received = 0; + sc->sc_bulk_send_next = NULL; + sc->sc_bulk_terminator = NULL; + timeout_del(&sc->sc_bulk_tmo); + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync: bulk update complete\n"); + } else { + /* look again for more in a bit */ + timeout_add(&sc->sc_bulk_tmo, 1); + sc->sc_bulk_send_next = state; } if (sc->sc_mbuf != NULL) pfsync_sendout(sc); - crit_exit(); + splx(s); } void pfsync_bulkfail(void *v) { struct pfsync_softc *sc = v; + int s, error; if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { /* Try again in a bit */ - callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulkfail, - LIST_FIRST(&pfsync_list)); - pfsync_request_update(NULL, NULL); - pfsync_sendout(sc); + timeout_add(&sc->sc_bulkfail_tmo, 5 * hz); + s = splnet(); + error = pfsync_request_update(NULL, NULL); + if (error == ENOMEM) { + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync: cannot allocate mbufs for " + "bulk update\n"); + } else + pfsync_sendout(sc); + splx(s); } else { /* Pretend like the transfer was ok */ sc->sc_ureq_sent = 0; sc->sc_bulk_tries = 0; +#if NCARP > 0 + if (!pfsync_sync_ok) + carp_group_demote_adj(&sc->sc_if, -1); +#endif pfsync_sync_ok = 1; if (pf_status.debug >= PF_DEBUG_MISC) - kprintf("pfsync: failed to receive " + printf("pfsync: failed to receive " "bulk update status\n"); - callout_stop(&sc->sc_bulkfail_tmo); + timeout_del(&sc->sc_bulkfail_tmo); } } +/* This must be called in splnet() */ int pfsync_sendout(struct pfsync_softc *sc) { +#if NBPFILTER > 0 struct ifnet *ifp = &sc->sc_if; +#endif struct mbuf *m; - callout_stop(&sc->sc_tmo); + timeout_del(&sc->sc_tmo); if (sc->sc_mbuf == NULL) return (0); @@ -1215,8 +1496,10 @@ pfsync_sendout(struct pfsync_softc *sc) sc->sc_mbuf = NULL; sc->sc_statep.s = NULL; - KASSERT(m != NULL, ("pfsync_sendout: null mbuf")); - BPF_MTAP(ifp, m); +#if NBPFILTER > 0 + if (ifp->if_bpf) + bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); +#endif if (sc->sc_mbuf_net) { m_freem(m); @@ -1225,12 +1508,42 @@ pfsync_sendout(struct pfsync_softc *sc) sc->sc_statep_net.s = NULL; } - if (sc->sc_sync_ifp) { - struct ip *ip; - struct ifaddr *ifa; - struct sockaddr sa; + return pfsync_sendout_mbuf(sc, m); +} + +int +pfsync_tdb_sendout(struct pfsync_softc *sc) +{ +#if NBPFILTER > 0 + struct ifnet *ifp = &sc->sc_if; +#endif + struct mbuf *m; + + timeout_del(&sc->sc_tdb_tmo); + + if (sc->sc_mbuf_tdb == NULL) + return (0); + m = sc->sc_mbuf_tdb; + sc->sc_mbuf_tdb = NULL; + sc->sc_statep_tdb.t = NULL; + +#if NBPFILTER > 0 + if (ifp->if_bpf) + bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); +#endif + + return pfsync_sendout_mbuf(sc, m); +} - M_PREPEND(m, sizeof(struct ip), MB_DONTWAIT); +int +pfsync_sendout_mbuf(struct pfsync_softc *sc, struct mbuf *m) +{ + struct sockaddr sa; + struct ip *ip; + + if (sc->sc_sync_ifp || + sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) { + M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); if (m == NULL) { pfsyncstats.pfsyncs_onomem++; return (0); @@ -1239,28 +1552,20 @@ pfsync_sendout(struct pfsync_softc *sc) ip->ip_v = IPVERSION; ip->ip_hl = sizeof(*ip) >> 2; ip->ip_tos = IPTOS_LOWDELAY; - ip->ip_len = m->m_pkthdr.len; -#ifdef RANDOM_IP_ID - ip->ip_id = ip_randomid(); -#else - ip->ip_id = ntohs(ip_id++); -#endif - ip->ip_off = IP_DF; + ip->ip_len = htons(m->m_pkthdr.len); + ip->ip_id = htons(ip_randomid()); + ip->ip_off = htons(IP_DF); ip->ip_ttl = PFSYNC_DFLTTL; ip->ip_p = IPPROTO_PFSYNC; ip->ip_sum = 0; bzero(&sa, sizeof(sa)); - sa.sa_family = AF_INET; - ifa = ifaof_ifpforaddr(&sa, sc->sc_sync_ifp); - if (ifa == NULL) - return (0); - ip->ip_src.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; + ip->ip_src.s_addr = INADDR_ANY; - if (sc->sc_sendaddr.s_addr == htonl(INADDR_PFSYNC_GROUP)) + if (sc->sc_sendaddr.s_addr == INADDR_PFSYNC_GROUP) m->m_flags |= M_MCAST; ip->ip_dst = sc->sc_sendaddr; - sc->sc_sendaddr.s_addr = htonl(INADDR_PFSYNC_GROUP); + sc->sc_sendaddr.s_addr = sc->sc_sync_peer.s_addr; pfsyncstats.pfsyncs_opackets++; @@ -1272,39 +1577,153 @@ pfsync_sendout(struct pfsync_softc *sc) return (0); } -static int -pfsync_modevent(module_t mod, int type, void *data) +/* Update an in-kernel tdb. Silently fail if no tdb is found. */ +void +pfsync_update_net_tdb(struct pfsync_tdb *pt) { - int error = 0; + struct tdb *tdb; + int s; + + /* check for invalid values */ + if (ntohl(pt->spi) <= SPI_RESERVED_MAX || + (pt->dst.sa.sa_family != AF_INET && + pt->dst.sa.sa_family != AF_INET6)) + goto bad; + + s = spltdb(); + tdb = gettdb(pt->spi, &pt->dst, pt->sproto); + if (tdb) { + pt->rpl = ntohl(pt->rpl); + pt->cur_bytes = betoh64(pt->cur_bytes); + + /* Neither replay nor byte counter should ever decrease. */ + if (pt->rpl < tdb->tdb_rpl || + pt->cur_bytes < tdb->tdb_cur_bytes) { + splx(s); + goto bad; + } - switch (type) { - case MOD_LOAD: - LIST_INIT(&pfsync_list); - if_clone_attach(&pfsync_cloner); - break; + tdb->tdb_rpl = pt->rpl; + tdb->tdb_cur_bytes = pt->cur_bytes; + } + splx(s); + return; + + bad: + if (pf_status.debug >= PF_DEBUG_MISC) + printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " + "invalid value\n"); + pfsyncstats.pfsyncs_badstate++; + return; +} - case MOD_UNLOAD: - if_clone_detach(&pfsync_cloner); - while (!LIST_EMPTY(&pfsync_list)) - pfsync_clone_destroy( - &LIST_FIRST(&pfsync_list)->sc_if); - break; +/* One of our local tdbs have been updated, need to sync rpl with others */ +int +pfsync_update_tdb(struct tdb *tdb, int output) +{ + struct ifnet *ifp = NULL; + struct pfsync_softc *sc = pfsyncif; + struct pfsync_header *h; + struct pfsync_tdb *pt = NULL; + int s, i, ret; - default: - error = EINVAL; - break; + if (sc == NULL) + return (0); + + ifp = &sc->sc_if; + if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL && + sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { + /* Don't leave any stale pfsync packets hanging around. */ + if (sc->sc_mbuf_tdb != NULL) { + m_freem(sc->sc_mbuf_tdb); + sc->sc_mbuf_tdb = NULL; + sc->sc_statep_tdb.t = NULL; + } + return (0); } - return error; -} + s = splnet(); + if (sc->sc_mbuf_tdb == NULL) { + if ((sc->sc_mbuf_tdb = pfsync_get_mbuf(sc, PFSYNC_ACT_TDB_UPD, + (void *)&sc->sc_statep_tdb.t)) == NULL) { + splx(s); + return (ENOMEM); + } + h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *); + } else { + h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *); + if (h->action != PFSYNC_ACT_TDB_UPD) { + /* + * XXX will never happen as long as there's + * only one "TDB action". + */ + pfsync_tdb_sendout(sc); + sc->sc_mbuf_tdb = pfsync_get_mbuf(sc, + PFSYNC_ACT_TDB_UPD, (void *)&sc->sc_statep_tdb.t); + if (sc->sc_mbuf_tdb == NULL) { + splx(s); + return (ENOMEM); + } + h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *); + } else if (sc->sc_maxupdates) { + /* + * If it's an update, look in the packet to see if + * we already have an update for the state. + */ + struct pfsync_tdb *u = + (void *)((char *)h + PFSYNC_HDRLEN); + + for (i = 0; !pt && i < h->count; i++) { + if (tdb->tdb_spi == u->spi && + tdb->tdb_sproto == u->sproto && + !bcmp(&tdb->tdb_dst, &u->dst, + SA_LEN(&u->dst.sa))) { + pt = u; + pt->updates++; + } + u++; + } + } + } -static moduledata_t pfsync_mod = { - "pfsync", - pfsync_modevent, - 0 -}; + if (pt == NULL) { + /* not a "duplicate" update */ + pt = sc->sc_statep_tdb.t++; + sc->sc_mbuf_tdb->m_pkthdr.len = + sc->sc_mbuf_tdb->m_len += sizeof(struct pfsync_tdb); + h->count++; + bzero(pt, sizeof(*pt)); -#define PFSYNC_MODVER 1 + pt->spi = tdb->tdb_spi; + memcpy(&pt->dst, &tdb->tdb_dst, sizeof pt->dst); + pt->sproto = tdb->tdb_sproto; + } -DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); -MODULE_VERSION(pfsync, PFSYNC_MODVER); + /* + * When a failover happens, the master's rpl is probably above + * what we see here (we may be up to a second late), so + * increase it a bit for outbound tdbs to manage most such + * situations. + * + * For now, just add an offset that is likely to be larger + * than the number of packets we can see in one second. The RFC + * just says the next packet must have a higher seq value. + * + * XXX What is a good algorithm for this? We could use + * a rate-determined increase, but to know it, we would have + * to extend struct tdb. + * XXX pt->rpl can wrap over MAXINT, but if so the real tdb + * will soon be replaced anyway. For now, just don't handle + * this edge case. + */ +#define RPL_INCR 16384 + pt->rpl = htonl(tdb->tdb_rpl + (output ? RPL_INCR : 0)); + pt->cur_bytes = htobe64(tdb->tdb_cur_bytes); + + if (h->count == sc->sc_maxcount || + (sc->sc_maxupdates && (pt->updates >= sc->sc_maxupdates))) + ret = pfsync_tdb_sendout(sc); + + splx(s); + return (ret); +} diff --git a/sys/net/pf/if_pfsync.h b/sys/net/pf/if_pfsync.h index c9b86a67f8..5ed465e716 100644 --- a/sys/net/pf/if_pfsync.h +++ b/sys/net/pf/if_pfsync.h @@ -1,10 +1,6 @@ -/* $FreeBSD: src/sys/contrib/pf/net/if_pfsync.h,v 1.4 2004/06/16 23:24:00 mlaier Exp $ */ -/* $OpenBSD: if_pfsync.h,v 1.13 2004/03/22 04:54:17 mcbride Exp $ */ -/* $DragonFly: src/sys/net/pf/if_pfsync.h,v 1.2 2004/09/20 01:43:13 dillon Exp $ */ +/* $OpenBSD: if_pfsync.h,v 1.30 2006/10/31 14:49:01 henning Exp $ */ /* - * Copyright (c) 2004 The DragonFly Project. All rights reserved. - * * Copyright (c) 2001 Michael Shalayeff * All rights reserved. * @@ -33,23 +29,13 @@ #ifndef _NET_IF_PFSYNC_H_ #define _NET_IF_PFSYNC_H_ -#include - -/* - * pfvar.h is required to get struct pf_addr. Also kdump and other utilities - * blindly include header files to try to get all the ioctl constants and - * buildworld will fail without this. We need a better way XXX - */ -#ifndef _NET_PFVAR_H_ -#include "pfvar.h" -#endif - #define PFSYNC_ID_LEN sizeof(u_int64_t) struct pfsync_state_scrub { u_int16_t pfss_flags; u_int8_t pfss_ttl; /* stashed TTL */ +#define PFSYNC_SCRUB_FLAG_VALID 0x01 u_int8_t scrub_flag; u_int32_t pfss_ts_mod; /* timestamp modulation */ } __packed; @@ -69,8 +55,7 @@ struct pfsync_state_peer { u_int16_t mss; /* Maximum segment size option */ u_int8_t state; /* active state level */ u_int8_t wscale; /* window scaling factor */ - u_int8_t scrub_flag; - u_int8_t pad[5]; + u_int8_t pad[6]; } __packed; struct pfsync_state { @@ -87,8 +72,8 @@ struct pfsync_state { u_int32_t nat_rule; u_int32_t creation; u_int32_t expire; - u_int32_t packets[2]; - u_int32_t bytes[2]; + u_int32_t packets[2][2]; + u_int32_t bytes[2][2]; u_int32_t creatorid; sa_family_t af; u_int8_t proto; @@ -100,6 +85,19 @@ struct pfsync_state { u_int8_t updates; } __packed; +#define PFSYNC_FLAG_COMPRESS 0x01 +#define PFSYNC_FLAG_STALE 0x02 + +struct pfsync_tdb { + u_int32_t spi; + union sockaddr_union dst; + u_int32_t rpl; + u_int64_t cur_bytes; + u_int8_t sproto; + u_int8_t updates; + u_int8_t pad[2]; +} __packed; + struct pfsync_state_upd { u_int32_t id[2]; struct pfsync_state_peer src; @@ -155,6 +153,10 @@ union sc_statep { struct pfsync_state_upd_req *r; }; +union sc_tdb_statep { + struct pfsync_tdb *t; +}; + extern int pfsync_sync_ok; struct pfsync_softc { @@ -162,27 +164,34 @@ struct pfsync_softc { struct ifnet *sc_sync_ifp; struct ip_moptions sc_imo; - struct callout sc_tmo; - struct callout sc_bulk_tmo; - struct callout sc_bulkfail_tmo; + struct timeout sc_tmo; + struct timeout sc_tdb_tmo; + struct timeout sc_bulk_tmo; + struct timeout sc_bulkfail_tmo; + struct in_addr sc_sync_peer; struct in_addr sc_sendaddr; - struct mbuf *sc_mbuf; /* current cummulative mbuf */ - struct mbuf *sc_mbuf_net; /* current cummulative mbuf */ + struct mbuf *sc_mbuf; /* current cumulative mbuf */ + struct mbuf *sc_mbuf_net; /* current cumulative mbuf */ + struct mbuf *sc_mbuf_tdb; /* dito for TDB updates */ union sc_statep sc_statep; union sc_statep sc_statep_net; + union sc_tdb_statep sc_statep_tdb; u_int32_t sc_ureq_received; u_int32_t sc_ureq_sent; + struct pf_state *sc_bulk_send_next; + struct pf_state *sc_bulk_terminator; int sc_bulk_tries; int sc_maxcount; /* number of states in mtu */ int sc_maxupdates; /* number of updates/state */ - LIST_ENTRY(pfsync_softc) sc_next; }; + +extern struct pfsync_softc *pfsyncif; #endif struct pfsync_header { u_int8_t version; -#define PFSYNC_VERSION 2 +#define PFSYNC_VERSION 3 u_int8_t af; u_int8_t action; #define PFSYNC_ACT_CLR 0 /* clear all states */ @@ -195,48 +204,51 @@ struct pfsync_header { #define PFSYNC_ACT_DEL_F 7 /* delete fragments */ #define PFSYNC_ACT_UREQ 8 /* request "uncompressed" state */ #define PFSYNC_ACT_BUS 9 /* Bulk Update Status */ -#define PFSYNC_ACT_MAX 10 +#define PFSYNC_ACT_TDB_UPD 10 /* TDB replay counter update */ +#define PFSYNC_ACT_MAX 11 u_int8_t count; + u_int8_t pf_chksum[PF_MD5_DIGEST_LENGTH]; } __packed; #define PFSYNC_BULKPACKETS 1 /* # of packets per timeout */ -#define PFSYNC_MAX_BULKTRIES 12 +#define PFSYNC_MAX_BULKTRIES 12 #define PFSYNC_HDRLEN sizeof(struct pfsync_header) #define PFSYNC_ACTIONS \ "CLR ST", "INS ST", "UPD ST", "DEL ST", \ "UPD ST COMP", "DEL ST COMP", "INS FR", "DEL FR", \ - "UPD REQ", "BLK UPD STAT" + "UPD REQ", "BLK UPD STAT", "TDB UPD" #define PFSYNC_DFLTTL 255 struct pfsyncstats { - u_long pfsyncs_ipackets; /* total input packets, IPv4 */ - u_long pfsyncs_ipackets6; /* total input packets, IPv6 */ - u_long pfsyncs_badif; /* not the right interface */ - u_long pfsyncs_badttl; /* TTL is not PFSYNC_DFLTTL */ - u_long pfsyncs_hdrops; /* packets shorter than header */ - u_long pfsyncs_badver; /* bad (incl unsupp) version */ - u_long pfsyncs_badact; /* bad action */ - u_long pfsyncs_badlen; /* data length does not match */ - u_long pfsyncs_badauth; /* bad authentication */ - u_long pfsyncs_badstate; /* insert/lookup failed */ - - u_long pfsyncs_opackets; /* total output packets, IPv4 */ - u_long pfsyncs_opackets6; /* total output packets, IPv6 */ - u_long pfsyncs_onomem; /* no memory for an mbuf for a send */ - u_long pfsyncs_oerrors; /* ip output error */ + u_int64_t pfsyncs_ipackets; /* total input packets, IPv4 */ + u_int64_t pfsyncs_ipackets6; /* total input packets, IPv6 */ + u_int64_t pfsyncs_badif; /* not the right interface */ + u_int64_t pfsyncs_badttl; /* TTL is not PFSYNC_DFLTTL */ + u_int64_t pfsyncs_hdrops; /* packets shorter than hdr */ + u_int64_t pfsyncs_badver; /* bad (incl unsupp) version */ + u_int64_t pfsyncs_badact; /* bad action */ + u_int64_t pfsyncs_badlen; /* data length does not match */ + u_int64_t pfsyncs_badauth; /* bad authentication */ + u_int64_t pfsyncs_stale; /* stale state */ + u_int64_t pfsyncs_badval; /* bad values */ + u_int64_t pfsyncs_badstate; /* insert/lookup failed */ + + u_int64_t pfsyncs_opackets; /* total output packets, IPv4 */ + u_int64_t pfsyncs_opackets6; /* total output packets, IPv6 */ + u_int64_t pfsyncs_onomem; /* no memory for an mbuf */ + u_int64_t pfsyncs_oerrors; /* ip output error */ }; /* * Configuration structure for SIOCSETPFSYNC SIOCGETPFSYNC */ struct pfsyncreq { - char pfsyncr_syncif[IFNAMSIZ]; - int pfsyncr_maxupdates; - int pfsyncr_authlevel; + char pfsyncr_syncdev[IFNAMSIZ]; + struct in_addr pfsyncr_syncpeer; + int pfsyncr_maxupdates; + int pfsyncr_authlevel; }; -#define SIOCSETPFSYNC _IOW('i', 247, struct ifreq) -#define SIOCGETPFSYNC _IOWR('i', 248, struct ifreq) #define pf_state_peer_hton(s,d) do { \ @@ -247,6 +259,13 @@ struct pfsyncreq { (d)->mss = htons((s)->mss); \ (d)->state = (s)->state; \ (d)->wscale = (s)->wscale; \ + if ((s)->scrub) { \ + (d)->scrub.pfss_flags = \ + htons((s)->scrub->pfss_flags & PFSS_TIMESTAMP); \ + (d)->scrub.pfss_ttl = (s)->scrub->pfss_ttl; \ + (d)->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);\ + (d)->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID; \ + } \ } while (0) #define pf_state_peer_ntoh(s,d) do { \ @@ -257,6 +276,13 @@ struct pfsyncreq { (d)->mss = ntohs((s)->mss); \ (d)->state = (s)->state; \ (d)->wscale = (s)->wscale; \ + if ((s)->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID && \ + (d)->scrub != NULL) { \ + (d)->scrub->pfss_flags = \ + ntohs((s)->scrub.pfss_flags) & PFSS_TIMESTAMP; \ + (d)->scrub->pfss_ttl = (s)->scrub.pfss_ttl; \ + (d)->scrub->pfss_ts_mod = ntohl((s)->scrub.pfss_ts_mod);\ + } \ } while (0) #define pf_state_host_hton(s,d) do { \ @@ -269,6 +295,17 @@ struct pfsyncreq { (d)->port = (s)->port; \ } while (0) +#define pf_state_counter_hton(s,d) do { \ + d[0] = htonl((s>>32)&0xffffffff); \ + d[1] = htonl(s&0xffffffff); \ +} while (0) + +#define pf_state_counter_ntoh(s,d) do { \ + d = ntohl(s[0]); \ + d = d<<32; \ + d += ntohl(s[1]); \ +} while (0) + #ifdef _KERNEL void pfsync_input(struct mbuf *, ...); int pfsync_clear_states(u_int32_t, char *); @@ -278,19 +315,22 @@ int pfsync_pack_state(u_int8_t, struct pf_state *, int); (st->proto == IPPROTO_PFSYNC)) \ st->sync_flags |= PFSTATE_NOSYNC; \ else if (!st->sync_flags) \ - pfsync_pack_state(PFSYNC_ACT_INS, (st), 1); \ + pfsync_pack_state(PFSYNC_ACT_INS, (st), \ + PFSYNC_FLAG_COMPRESS); \ st->sync_flags &= ~PFSTATE_FROMSYNC; \ } while (0) #define pfsync_update_state(st) do { \ if (!st->sync_flags) \ - pfsync_pack_state(PFSYNC_ACT_UPD, (st), 1); \ + pfsync_pack_state(PFSYNC_ACT_UPD, (st), \ + PFSYNC_FLAG_COMPRESS); \ st->sync_flags &= ~PFSTATE_FROMSYNC; \ } while (0) #define pfsync_delete_state(st) do { \ if (!st->sync_flags) \ - pfsync_pack_state(PFSYNC_ACT_DEL, (st), 1); \ - st->sync_flags &= ~PFSTATE_FROMSYNC; \ + pfsync_pack_state(PFSYNC_ACT_DEL, (st), \ + PFSYNC_FLAG_COMPRESS); \ } while (0) +int pfsync_update_tdb(struct tdb *, int); #endif #endif /* _NET_IF_PFSYNC_H_ */ diff --git a/sys/net/pf/pf.c b/sys/net/pf/pf.c index 4afc2913f9..d600b47edb 100644 --- a/sys/net/pf/pf.c +++ b/sys/net/pf/pf.c @@ -2,6 +2,7 @@ /* $OpenBSD: pf.c,v 1.433.2.2 2004/07/17 03:22:34 brad Exp $ */ /* add $OpenBSD: pf.c,v 1.448 2004/05/11 07:34:11 dhartmei Exp $ */ /* $DragonFly: src/sys/net/pf/pf.c,v 1.20 2008/06/05 18:06:32 swildner Exp $ */ +/* $OpenBSD: pf.c,v 1.527 2007/02/22 15:23:23 pyr Exp $ */ /* * Copyright (c) 2004 The DragonFly Project. All rights reserved. @@ -56,6 +57,8 @@ #include #include #include +#include +#include #include @@ -79,6 +82,7 @@ #include #include #include +#include #include #include @@ -103,6 +107,7 @@ #include extern int ip_optcopy(struct ip *, struct ip *); +extern int debug_pfugidhack; #define DPFPRINTF(n, x) if (pf_status.debug >= (n)) kprintf x @@ -110,8 +115,6 @@ extern int ip_optcopy(struct ip *, struct ip *); * Global variables */ -struct pf_anchorqueue pf_anchors; -struct pf_ruleset pf_main_ruleset; struct pf_altqqueue pf_altqs[2]; struct pf_palist pf_pabuf; struct pf_altqqueue *pf_altqs_active; @@ -123,20 +126,28 @@ u_int32_t ticket_altqs_inactive; int altqs_inactive_open; u_int32_t ticket_pabuf; -struct callout pf_expire_to; /* expire timeout */ +struct pf_anchor_stackframe { + struct pf_ruleset *rs; + struct pf_rule *r; + struct pf_anchor_node *parent; + struct pf_anchor *child; +} pf_anchor_stack[64]; vm_zone_t pf_src_tree_pl, pf_rule_pl; vm_zone_t pf_state_pl, pf_altq_pl, pf_pooladdr_pl; void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t); -void pf_print_state(struct pf_state *); -void pf_print_flags(u_int8_t); -u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t, - u_int8_t); +void pf_init_threshold(struct pf_threshold *, u_int32_t, + u_int32_t); +void pf_add_threshold(struct pf_threshold *); +int pf_check_threshold(struct pf_threshold *); + void pf_change_ap(struct pf_addr *, u_int16_t *, u_int16_t *, u_int16_t *, struct pf_addr *, u_int16_t, u_int8_t, sa_family_t); +int pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *, + struct tcphdr *, struct pf_state_peer *); #ifdef INET6 void pf_change_a6(struct pf_addr *, u_int16_t *, struct pf_addr *, u_int8_t); @@ -148,7 +159,8 @@ void pf_change_icmp(struct pf_addr *, u_int16_t *, void pf_send_tcp(const struct pf_rule *, sa_family_t, const struct pf_addr *, const struct pf_addr *, u_int16_t, u_int16_t, u_int32_t, u_int32_t, - u_int8_t, u_int16_t, u_int16_t, u_int8_t); + u_int8_t, u_int16_t, u_int16_t, u_int8_t, int, + u_int16_t, struct ether_header *, struct ifnet *); void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, sa_family_t, struct pf_rule *); struct pf_rule *pf_match_translation(struct pf_pdesc *, struct mbuf *, @@ -163,19 +175,19 @@ struct pf_rule *pf_get_translation(struct pf_pdesc *, struct mbuf *, int pf_test_tcp(struct pf_rule **, struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, struct pf_rule **, - struct pf_ruleset **); + struct pf_ruleset **, struct ifqueue *, struct inpcb *); int pf_test_udp(struct pf_rule **, struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, struct pf_rule **, - struct pf_ruleset **); + struct pf_ruleset **, struct ifqueue *, struct inpcb *); int pf_test_icmp(struct pf_rule **, struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, struct pf_rule **, - struct pf_ruleset **); + struct pf_ruleset **, struct ifqueue *); int pf_test_other(struct pf_rule **, struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, struct pf_rule **, - struct pf_ruleset **); + struct pf_ruleset **, struct ifqueue *); int pf_test_fragment(struct pf_rule **, int, struct pfi_kif *, struct mbuf *, void *, struct pf_pdesc *, struct pf_rule **, @@ -188,11 +200,14 @@ int pf_test_state_udp(struct pf_state **, int, void *, struct pf_pdesc *); int pf_test_state_icmp(struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, - void *, struct pf_pdesc *); + void *, struct pf_pdesc *, u_short *); int pf_test_state_other(struct pf_state **, int, struct pfi_kif *, struct pf_pdesc *); -static int pf_match_tag(struct mbuf *, struct pf_rule *, - struct pf_rule *, int *); +int pf_match_tag(struct mbuf *, struct pf_rule *, + struct pf_mtag *, int *); +int pf_step_out_of_anchor(int *, struct pf_ruleset **, + int, struct pf_rule **, struct pf_rule **, + int *); void pf_hash(struct pf_addr *, struct pf_addr *, struct pf_poolhashkey *, sa_family_t); int pf_map_addr(u_int8_t, struct pf_rule *, @@ -203,11 +218,11 @@ int pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *, struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t, struct pf_src_node **); void pf_route(struct mbuf **, struct pf_rule *, int, - struct ifnet *, struct pf_state *); + struct ifnet *, struct pf_state *, + struct pf_pdesc *); void pf_route6(struct mbuf **, struct pf_rule *, int, - struct ifnet *, struct pf_state *); -int pf_socket_lookup(uid_t *, gid_t *, - int, struct pf_pdesc *); + struct ifnet *, struct pf_state *, + struct pf_pdesc *); u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t, sa_family_t); u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t, @@ -221,19 +236,29 @@ int pf_check_proto_cksum(struct mbuf *, int, int, int pf_addr_wrap_neq(struct pf_addr_wrap *, struct pf_addr_wrap *); struct pf_state *pf_find_state_recurse(struct pfi_kif *, - struct pf_state *, u_int8_t); - -struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX]; + struct pf_state_cmp *, u_int8_t); +int pf_src_connlimit(struct pf_state **); +int pf_check_congestion(struct ifqueue *); + +extern int pf_end_threads; + +struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { + { &pf_state_pl, PFSTATE_HIWAT }, + { &pf_src_tree_pl, PFSNODE_HIWAT }, + { &pf_frent_pl, PFFRAG_FRENT_HIWAT }, + { &pfr_ktable_pl, PFR_KTABLE_HIWAT }, + { &pfr_kentry_pl, PFR_KENTRY_HIWAT } +}; #define STATE_LOOKUP() \ do { \ if (direction == PF_IN) \ - *state = pf_find_state_recurse( \ + *state = pf_find_state_recurse( \ kif, &key, PF_EXT_GWY); \ else \ *state = pf_find_state_recurse( \ kif, &key, PF_LAN_EXT); \ - if (*state == NULL) \ + if (*state == NULL || (*state)->timeout == PFTM_PURGE) \ return (PF_DROP); \ if (direction == PF_OUT && \ (((*state)->rule.ptr->rt == PF_ROUTETO && \ @@ -253,22 +278,39 @@ struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX]; (s)->lan.addr.addr32[3] != (s)->gwy.addr.addr32[3])) || \ (s)->lan.port != (s)->gwy.port -#define BOUND_IFACE(r, k) (((r)->rule_flag & PFRULE_IFBOUND) ? (k) : \ - ((r)->rule_flag & PFRULE_GRBOUND) ? (k)->pfik_parent : \ - (k)->pfik_parent->pfik_parent) +#define BOUND_IFACE(r, k) \ + ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all + +#define STATE_INC_COUNTERS(s) \ + do { \ + s->rule.ptr->states++; \ + if (s->anchor.ptr != NULL) \ + s->anchor.ptr->states++; \ + if (s->nat_rule.ptr != NULL) \ + s->nat_rule.ptr->states++; \ + } while (0) + +#define STATE_DEC_COUNTERS(s) \ + do { \ + if (s->nat_rule.ptr != NULL) \ + s->nat_rule.ptr->states--; \ + if (s->anchor.ptr != NULL) \ + s->anchor.ptr->states--; \ + s->rule.ptr->states--; \ + } while (0) -static int pf_src_compare(struct pf_src_node *, struct pf_src_node *); -static int pf_state_compare_lan_ext(struct pf_state *, +static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *); +static __inline int pf_state_compare_lan_ext(struct pf_state *, struct pf_state *); -static int pf_state_compare_ext_gwy(struct pf_state *, +static __inline int pf_state_compare_ext_gwy(struct pf_state *, struct pf_state *); -static int pf_state_compare_id(struct pf_state *, +static __inline int pf_state_compare_id(struct pf_state *, struct pf_state *); struct pf_src_tree tree_src_tracking; struct pf_state_tree_id tree_id; -struct pf_state_queue state_updates; +struct pf_state_queue state_list; RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); RB_GENERATE(pf_state_tree_lan_ext, pf_state, @@ -278,7 +320,7 @@ RB_GENERATE(pf_state_tree_ext_gwy, pf_state, RB_GENERATE(pf_state_tree_id, pf_state, u.s.entry_id, pf_state_compare_id); -static int +static __inline int pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) { int diff; @@ -335,7 +377,7 @@ pf_state_hash(struct pf_state *s) return(hv); } -static int +static __inline int pf_state_compare_lan_ext(struct pf_state *a, struct pf_state *b) { int diff; @@ -403,7 +445,7 @@ pf_state_compare_lan_ext(struct pf_state *a, struct pf_state *b) return (0); } -static int +static __inline int pf_state_compare_ext_gwy(struct pf_state *a, struct pf_state *b) { int diff; @@ -471,7 +513,7 @@ pf_state_compare_ext_gwy(struct pf_state *a, struct pf_state *b) return (0); } -static int +static __inline int pf_state_compare_id(struct pf_state *a, struct pf_state *b) { if (a->id > b->id) @@ -504,17 +546,17 @@ pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) break; } } -#endif +#endif /* INET6 */ struct pf_state * -pf_find_state_byid(struct pf_state *key) +pf_find_state_byid(struct pf_state_cmp *key) { pf_status.fcounters[FCNT_STATE_SEARCH]++; - return (RB_FIND(pf_state_tree_id, &tree_id, key)); + return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key)); } struct pf_state * -pf_find_state_recurse(struct pfi_kif *kif, struct pf_state *key, u_int8_t tree) +pf_find_state_recurse(struct pfi_kif *kif, struct pf_state_cmp *key, u_int8_t tree) { struct pf_state *s; @@ -522,20 +564,20 @@ pf_find_state_recurse(struct pfi_kif *kif, struct pf_state *key, u_int8_t tree) switch (tree) { case PF_LAN_EXT: - for (; kif != NULL; kif = kif->pfik_parent) { - s = RB_FIND(pf_state_tree_lan_ext, - &kif->pfik_lan_ext, key); - if (s != NULL) - return (s); - } + if ((s = RB_FIND(pf_state_tree_lan_ext, &kif->pfik_lan_ext, + (struct pf_state *)key)) != NULL) + return (s); + if ((s = RB_FIND(pf_state_tree_lan_ext, &pfi_all->pfik_lan_ext, + (struct pf_state *)key)) != NULL) + return (s); return (NULL); case PF_EXT_GWY: - for (; kif != NULL; kif = kif->pfik_parent) { - s = RB_FIND(pf_state_tree_ext_gwy, - &kif->pfik_ext_gwy, key); - if (s != NULL) - return (s); - } + if ((s = RB_FIND(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, + (struct pf_state *)key)) != NULL) + return (s); + if ((s = RB_FIND(pf_state_tree_ext_gwy, &pfi_all->pfik_ext_gwy, + (struct pf_state *)key)) != NULL) + return (s); return (NULL); default: panic("pf_find_state_recurse"); @@ -543,7 +585,7 @@ pf_find_state_recurse(struct pfi_kif *kif, struct pf_state *key, u_int8_t tree) } struct pf_state * -pf_find_state_all(struct pf_state *key, u_int8_t tree, int *more) +pf_find_state_all(struct pf_state_cmp *key, u_int8_t tree, int *more) { struct pf_state *s, *ss = NULL; struct pfi_kif *kif; @@ -554,7 +596,7 @@ pf_find_state_all(struct pf_state *key, u_int8_t tree, int *more) case PF_LAN_EXT: TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) { s = RB_FIND(pf_state_tree_lan_ext, - &kif->pfik_lan_ext, key); + &kif->pfik_lan_ext, (struct pf_state *)key); if (s == NULL) continue; if (more == NULL) @@ -566,7 +608,7 @@ pf_find_state_all(struct pf_state *key, u_int8_t tree, int *more) case PF_EXT_GWY: TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) { s = RB_FIND(pf_state_tree_ext_gwy, - &kif->pfik_ext_gwy, key); + &kif->pfik_ext_gwy, (struct pf_state *)key); if (s == NULL) continue; if (more == NULL) @@ -580,6 +622,132 @@ pf_find_state_all(struct pf_state *key, u_int8_t tree, int *more) } } +void +pf_init_threshold(struct pf_threshold *threshold, + u_int32_t limit, u_int32_t seconds) +{ + threshold->limit = limit * PF_THRESHOLD_MULT; + threshold->seconds = seconds; + threshold->count = 0; + threshold->last = time_second; +} + +void +pf_add_threshold(struct pf_threshold *threshold) +{ + u_int32_t t = time_second, diff = t - threshold->last; + + if (diff >= threshold->seconds) + threshold->count = 0; + else + threshold->count -= threshold->count * diff / + threshold->seconds; + threshold->count += PF_THRESHOLD_MULT; + threshold->last = t; +} + +int +pf_check_threshold(struct pf_threshold *threshold) +{ + return (threshold->count > threshold->limit); +} + +int +pf_src_connlimit(struct pf_state **state) +{ + struct pf_state *s; + int bad = 0; + + (*state)->src_node->conn++; + (*state)->src.tcp_est = 1; + pf_add_threshold(&(*state)->src_node->conn_rate); + + if ((*state)->rule.ptr->max_src_conn && + (*state)->rule.ptr->max_src_conn < + (*state)->src_node->conn) { + pf_status.lcounters[LCNT_SRCCONN]++; + bad++; + } + + if ((*state)->rule.ptr->max_src_conn_rate.limit && + pf_check_threshold(&(*state)->src_node->conn_rate)) { + pf_status.lcounters[LCNT_SRCCONNRATE]++; + bad++; + } + + if (!bad) + return (0); + + if ((*state)->rule.ptr->overload_tbl) { + struct pfr_addr p; + u_int32_t killed = 0; + + pf_status.lcounters[LCNT_OVERLOAD_TABLE]++; + if (pf_status.debug >= PF_DEBUG_MISC) { + kprintf("pf_src_connlimit: blocking address "); + pf_print_host(&(*state)->src_node->addr, 0, + (*state)->af); + } + + bzero(&p, sizeof(p)); + p.pfra_af = (*state)->af; + switch ((*state)->af) { +#ifdef INET + case AF_INET: + p.pfra_net = 32; + p.pfra_ip4addr = (*state)->src_node->addr.v4; + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + p.pfra_net = 128; + p.pfra_ip6addr = (*state)->src_node->addr.v6; + break; +#endif /* INET6 */ + } + + pfr_insert_kentry((*state)->rule.ptr->overload_tbl, + &p, time_second); + + /* kill existing states if that's required. */ + if ((*state)->rule.ptr->flush) { + pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; + + RB_FOREACH(s, pf_state_tree_id, &tree_id) { + /* + * Kill states from this source. (Only those + * from the same rule if PF_FLUSH_GLOBAL is not + * set) + */ + if (s->af == (*state)->af && + (((*state)->direction == PF_OUT && + PF_AEQ(&(*state)->src_node->addr, + &s->lan.addr, s->af)) || + ((*state)->direction == PF_IN && + PF_AEQ(&(*state)->src_node->addr, + &s->ext.addr, s->af))) && + ((*state)->rule.ptr->flush & + PF_FLUSH_GLOBAL || + (*state)->rule.ptr == s->rule.ptr)) { + s->timeout = PFTM_PURGE; + s->src.state = s->dst.state = + TCPS_CLOSED; + killed++; + } + } + if (pf_status.debug >= PF_DEBUG_MISC) + kprintf(", %u states killed", killed); + } + if (pf_status.debug >= PF_DEBUG_MISC) + kprintf("\n"); + } + + /* kill this state */ + (*state)->timeout = PFTM_PURGE; + (*state)->src.state = (*state)->dst.state = TCPS_CLOSED; + return (1); +} + int pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, struct pf_addr *src, sa_family_t af) @@ -601,9 +769,16 @@ pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, if (!rule->max_src_nodes || rule->src_nodes < rule->max_src_nodes) (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT); + else + pf_status.lcounters[LCNT_SRCNODES]++; if ((*sn) == NULL) return (-1); bzero(*sn, sizeof(struct pf_src_node)); + + pf_init_threshold(&(*sn)->conn_rate, + rule->max_src_conn_rate.limit, + rule->max_src_conn_rate.seconds); + (*sn)->af = af; if (rule->rule_flag & PFRULE_RULESRCTRACK || rule->rpool.opts & PF_POOL_STICKYADDR) @@ -629,8 +804,10 @@ pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, pf_status.src_nodes++; } else { if (rule->max_src_states && - (*sn)->states >= rule->max_src_states) + (*sn)->states >= rule->max_src_states) { + pf_status.lcounters[LCNT_SRCSTATES]++; return (-1); + } } return (0); } @@ -696,11 +873,10 @@ pf_insert_state(struct pfi_kif *kif, struct pf_state *state) RB_REMOVE(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state); return (-1); } - TAILQ_INSERT_HEAD(&state_updates, state, u.s.entry_updates); - + TAILQ_INSERT_TAIL(&state_list, state, u.s.entry_list); pf_status.fcounters[FCNT_STATE_INSERT]++; pf_status.states++; - pfi_attach_state(kif); + pfi_kif_ref(kif, PFI_KIF_REF_STATE); #if NPFSYNC pfsync_insert_state(state); #endif @@ -708,18 +884,47 @@ pf_insert_state(struct pfi_kif *kif, struct pf_state *state) } void -pf_purge_timeout(void *arg) +pf_purge_thread(void *v) { - struct callout *to = arg; + int nloops = 0; + int locked = 0; + + for (;;) { + tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz); + + lockmgr(&pf_consistency_lock, LK_EXCLUSIVE); + + if (pf_end_threads) { + pf_purge_expired_states(pf_status.states, 1); + pf_purge_expired_fragments(); + pf_purge_expired_src_nodes(1); + pf_end_threads++; + + lockmgr(&pf_consistency_lock, LK_RELEASE); + wakeup(pf_purge_thread); + kthread_exit(); + } + crit_enter(); + + /* process a fraction of the state table every second */ + if(!pf_purge_expired_states(1 + (pf_status.states + / pf_default_rule.timeout[PFTM_INTERVAL]), 0)) { - crit_enter(); - pf_purge_expired_states(); - pf_purge_expired_fragments(); - pf_purge_expired_src_nodes(); - crit_exit(); + pf_purge_expired_states(1 + (pf_status.states + / pf_default_rule.timeout[PFTM_INTERVAL]), 1); + } - callout_reset(to, pf_default_rule.timeout[PFTM_INTERVAL] * hz, - pf_purge_timeout, to); + /* purge other expired types every PFTM_INTERVAL seconds */ + if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { + pf_purge_expired_fragments(); + if (!pf_purge_expired_src_nodes(locked)) { + pf_purge_expired_src_nodes(1); + } + nloops = 0; + } + crit_exit(); + lockmgr(&pf_consistency_lock, LK_RELEASE); + } } u_int32_t @@ -735,8 +940,9 @@ pf_state_expires(const struct pf_state *state) return (time_second); if (state->timeout == PFTM_UNTIL_PACKET) return (0); - KASSERT((state->timeout < PFTM_MAX), - ("pf_state_expires: timeout > PFTM_MAX")); + KKASSERT(state->timeout != PFTM_UNLINKED); + KASSERT((state->timeout < PFTM_MAX), + ("pf_state_expires: timeout > PFTM_MAX")); timeout = state->rule.ptr->timeout[state->timeout]; if (!timeout) timeout = pf_default_rule.timeout[state->timeout]; @@ -759,15 +965,22 @@ pf_state_expires(const struct pf_state *state) return (state->expire + timeout); } -void -pf_purge_expired_src_nodes(void) +int +pf_purge_expired_src_nodes(int waslocked) { struct pf_src_node *cur, *next; + int locked = waslocked; for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); if (cur->states <= 0 && cur->expire <= time_second) { + if (! locked) { + lockmgr(&pf_consistency_lock, LK_EXCLUSIVE); + next = RB_NEXT(pf_src_tree, + &tree_src_tracking, cur); + locked = 1; + } if (cur->rule.ptr != NULL) { cur->rule.ptr->src_nodes--; if (cur->rule.ptr->states <= 0 && @@ -780,6 +993,10 @@ pf_purge_expired_src_nodes(void) pool_put(&pf_src_tree_pl, cur); } } + + if (locked && !waslocked) + lockmgr(&pf_consistency_lock, LK_RELEASE); + return(1); } void @@ -788,6 +1005,10 @@ pf_src_tree_remove_state(struct pf_state *s) u_int32_t timeout; if (s->src_node != NULL) { + if (s->proto == IPPROTO_TCP) { + if (s->src.tcp_est) + --s->src_node->conn; + } if (--s->src_node->states <= 0) { timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; if (!timeout) @@ -808,53 +1029,104 @@ pf_src_tree_remove_state(struct pf_state *s) s->src_node = s->nat_src_node = NULL; } -static int -pf_purge_expired_states_callback(struct pf_state *cur, void *data __unused) +/* callers should be at crit_enter() */ +void +pf_unlink_state(struct pf_state *cur) { - if (pf_state_expires(cur) <= time_second) { - RB_REMOVE(pf_state_tree_ext_gwy, - &cur->u.s.kif->pfik_ext_gwy, cur); - RB_REMOVE(pf_state_tree_lan_ext, - &cur->u.s.kif->pfik_lan_ext, cur); - RB_REMOVE(pf_state_tree_id, &tree_id, cur); - if (cur->src.state == PF_TCPS_PROXY_DST) { - pf_send_tcp(cur->rule.ptr, cur->af, - &cur->ext.addr, &cur->lan.addr, - cur->ext.port, cur->lan.port, - cur->src.seqhi, cur->src.seqlo + 1, 0, - TH_RST|TH_ACK, 0, 0); - } + if (cur->src.state == PF_TCPS_PROXY_DST) { + pf_send_tcp(cur->rule.ptr, cur->af, + &cur->ext.addr, &cur->lan.addr, + cur->ext.port, cur->lan.port, + cur->src.seqhi, cur->src.seqlo + 1, + TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL); + } + RB_REMOVE(pf_state_tree_ext_gwy, + &cur->u.s.kif->pfik_ext_gwy, cur); + RB_REMOVE(pf_state_tree_lan_ext, + &cur->u.s.kif->pfik_lan_ext, cur); + RB_REMOVE(pf_state_tree_id, &tree_id, cur); #if NPFSYNC + if (cur->creatorid == pf_status.hostid) pfsync_delete_state(cur); #endif - pf_src_tree_remove_state(cur); - if (--cur->rule.ptr->states <= 0 && - cur->rule.ptr->src_nodes <= 0) - pf_rm_rule(NULL, cur->rule.ptr); - if (cur->nat_rule.ptr != NULL) - if (--cur->nat_rule.ptr->states <= 0 && - cur->nat_rule.ptr->src_nodes <= 0) - pf_rm_rule(NULL, cur->nat_rule.ptr); - if (cur->anchor.ptr != NULL) - if (--cur->anchor.ptr->states <= 0) - pf_rm_rule(NULL, cur->anchor.ptr); - pf_normalize_tcp_cleanup(cur); - pfi_detach_state(cur->u.s.kif); - TAILQ_REMOVE(&state_updates, cur, u.s.entry_updates); - pool_put(&pf_state_pl, cur); - pf_status.fcounters[FCNT_STATE_REMOVALS]++; - pf_status.states--; - } - return(0); + cur->timeout = PFTM_UNLINKED; + pf_src_tree_remove_state(cur); } +/* callers should be at crit_enter() and hold the + * write_lock on pf_consistency_lock */ void -pf_purge_expired_states(void) +pf_free_state(struct pf_state *cur) { - RB_SCAN(pf_state_tree_id, &tree_id, NULL, - pf_purge_expired_states_callback, NULL); +#if NPFSYNC + if (pfsyncif != NULL && + (pfsyncif->sc_bulk_send_next == cur || + pfsyncif->sc_bulk_terminator == cur)) + return; +#endif + KKASSERT(cur->timeout == PFTM_UNLINKED); + if (--cur->rule.ptr->states <= 0 && + cur->rule.ptr->src_nodes <= 0) + pf_rm_rule(NULL, cur->rule.ptr); + if (cur->nat_rule.ptr != NULL) + if (--cur->nat_rule.ptr->states <= 0 && + cur->nat_rule.ptr->src_nodes <= 0) + pf_rm_rule(NULL, cur->nat_rule.ptr); + if (cur->anchor.ptr != NULL) + if (--cur->anchor.ptr->states <= 0) + pf_rm_rule(NULL, cur->anchor.ptr); + pf_normalize_tcp_cleanup(cur); + pfi_kif_unref(cur->u.s.kif, PFI_KIF_REF_STATE); + TAILQ_REMOVE(&state_list, cur, u.s.entry_list); + if (cur->tag) + pf_tag_unref(cur->tag); + pool_put(&pf_state_pl, cur); + pf_status.fcounters[FCNT_STATE_REMOVALS]++; + pf_status.states--; } +int +pf_purge_expired_states(u_int32_t maxcheck, int waslocked) +{ + static struct pf_state *cur = NULL; + struct pf_state *next; + int locked = waslocked; + + while (maxcheck--) { + /* wrap to start of list when we hit the end */ + if (cur == NULL) { + cur = TAILQ_FIRST(&state_list); + if (cur == NULL) + break; /* list empty */ + } + + /* get next state, as cur may get deleted */ + next = TAILQ_NEXT(cur, u.s.entry_list); + + if (cur->timeout == PFTM_UNLINKED) { + /* free unlinked state */ + if (! locked) { + lockmgr(&pf_consistency_lock, LK_EXCLUSIVE); + locked = 1; + } + pf_free_state(cur); + } else if (pf_state_expires(cur) <= time_second) { + /* unlink and free expired state */ + pf_unlink_state(cur); + if (! locked) { + if (!lockmgr(&pf_consistency_lock, LK_EXCLUSIVE)) + return (0); + locked = 1; + } + pf_free_state(cur); + } + cur = next; + } + + if (locked) + lockmgr(&pf_consistency_lock, LK_RELEASE); + return (1); +} int pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw) @@ -1042,14 +1314,14 @@ pf_calc_skip_steps(struct pf_rulequeue *rules) PF_SET_SKIP_STEPS(PF_SKIP_AF); if (cur->proto != prev->proto) PF_SET_SKIP_STEPS(PF_SKIP_PROTO); - if (cur->src.not != prev->src.not || + if (cur->src.neg != prev->src.neg || pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR); if (cur->src.port[0] != prev->src.port[0] || cur->src.port[1] != prev->src.port[1] || cur->src.port_op != prev->src.port_op) PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT); - if (cur->dst.not != prev->dst.not || + if (cur->dst.neg != prev->dst.neg || pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR); if (cur->dst.port[0] != prev->dst.port[0] || @@ -1079,30 +1351,18 @@ pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) case PF_ADDR_DYNIFTL: return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt); case PF_ADDR_NOROUTE: + case PF_ADDR_URPFFAILED: return (0); case PF_ADDR_TABLE: return (aw1->p.tbl != aw2->p.tbl); + case PF_ADDR_RTLABEL: + return (aw1->v.rtlabel != aw2->v.rtlabel); default: kprintf("invalid address type: %d\n", aw1->type); return (1); } } -void -pf_update_anchor_rules(void) -{ - struct pf_rule *rule; - int i; - - for (i = 0; i < PF_RULESET_MAX; ++i) - TAILQ_FOREACH(rule, pf_main_ruleset.rules[i].active.ptr, - entries) - if (rule->anchorname[0]) - rule->anchor = pf_find_anchor(rule->anchorname); - else - rule->anchor = NULL; -} - u_int16_t pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp) { @@ -1283,11 +1543,70 @@ pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, } } + +/* + * Need to modulate the sequence numbers in the TCP SACK option + * (credits to Krzysztof Pfaff for report and patch) + */ +int +pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd, + struct tcphdr *th, struct pf_state_peer *dst) +{ + int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen; + u_int8_t opts[TCP_MAXOLEN], *opt = opts; + int copyback = 0, i, olen; + struct raw_sackblock sack; + +#define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2) + if (hlen < TCPOLEN_SACKLEN || + !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af)) + return 0; + + while (hlen >= TCPOLEN_SACKLEN) { + olen = opt[1]; + switch (*opt) { + case TCPOPT_EOL: /* FALLTHROUGH */ + case TCPOPT_NOP: + opt++; + hlen--; + break; + case TCPOPT_SACK: + if (olen > hlen) + olen = hlen; + if (olen >= TCPOLEN_SACKLEN) { + for (i = 2; i + TCPOLEN_SACK <= olen; + i += TCPOLEN_SACK) { + memcpy(&sack, &opt[i], sizeof(sack)); + pf_change_a(&sack.rblk_start, &th->th_sum, + htonl(ntohl(sack.rblk_start) - + dst->seqdiff), 0); + pf_change_a(&sack.rblk_end, &th->th_sum, + htonl(ntohl(sack.rblk_end) - + dst->seqdiff), 0); + memcpy(&opt[i], &sack, sizeof(sack)); + } + copyback = 1; + } + /* FALLTHROUGH */ + default: + if (olen < 2) + olen = 2; + hlen -= olen; + opt += olen; + } + } + + if (copyback) + m_copyback(m, off + sizeof(*th), thoptlen, opts); + return (copyback); +} + void pf_send_tcp(const struct pf_rule *r, sa_family_t af, const struct pf_addr *saddr, const struct pf_addr *daddr, u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, - u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl) + u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, + u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp) { struct mbuf *m; int len = 0, tlen; @@ -1298,7 +1617,8 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, struct ip6_hdr *h6 = NULL; #endif /* INET6 */ struct tcphdr *th = NULL; - char *opt; + char *opt; + struct pf_mtag *pf_mtag; /* maximum segment size tcp option */ tlen = sizeof(struct tcphdr); @@ -1322,15 +1642,26 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, m = m_gethdr(MB_DONTWAIT, MT_HEADER); if (m == NULL) return; - m->m_pkthdr.fw_flags = PF_MBUF_GENERATED; + if ((pf_mtag = pf_get_mtag(m)) == NULL) { + m_freem(m); + return; + } + if (tag) + pf_mtag->flags |= PF_TAG_GENERATED; + + pf_mtag->tag = rtag; + + if (r != NULL && r->rtableid >= 0) + pf_mtag->rtableid = r->rtableid; + #ifdef ALTQ if (r != NULL && r->qid) { - m->m_pkthdr.fw_flags |= ALTQ_MBUF_TAGGED; - m->m_pkthdr.altq_qid = r->qid; - m->m_pkthdr.ecn_af = af; - m->m_pkthdr.header = mtod(m, struct ip *); + pf_mtag->qid = r->qid; + /* add hints for ecn */ + pf_mtag->af = af; + pf_mtag->hdr = mtod(m, struct ip *); } -#endif +#endif /* ALTQ */ m->m_data += max_linkhdr; m->m_pkthdr.len = m->m_len = len; m->m_pkthdr.rcvif = NULL; @@ -1395,7 +1726,28 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, h->ip_off = path_mtu_discovery ? IP_DF : 0; h->ip_ttl = ttl ? ttl : ip_defttl; h->ip_sum = 0; - ip_output(m, NULL, NULL, 0, NULL, NULL); + if (eh == NULL) { + ip_output(m, NULL, NULL, 0, NULL, NULL); + } else { + struct route ro; + struct rtentry rt; + struct ether_header *e = (void *)ro.ro_dst.sa_data; + + if (ifp == NULL) { + m_freem(m); + return; + } + rt.rt_ifp = ifp; + ro.ro_rt = &rt; + ro.ro_dst.sa_len = sizeof(ro.ro_dst); + ro.ro_dst.sa_family = pseudo_AF_HDRCMPLT; + bcopy(eh->ether_dhost, e->ether_shost, ETHER_ADDR_LEN); + bcopy(eh->ether_shost, e->ether_dhost, ETHER_ADDR_LEN); + e->ether_type = eh->ether_type; + /* XXX_IMPORT: later */ + ip_output(m, (void *)NULL, &ro, 0, + (void *)NULL, (void *)NULL); + } break; #endif /* INET */ #ifdef INET6 @@ -1417,21 +1769,26 @@ void pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af, struct pf_rule *r) { + struct pf_mtag *pf_mtag; struct mbuf *m0; - m0 = m_copypacket(m, MB_DONTWAIT); - if (m0 == NULL) + m0 = m_copy(m, 0, M_COPYALL); + + if ((pf_mtag = pf_get_mtag(m0)) == NULL) return; - m0->m_pkthdr.fw_flags |= PF_MBUF_GENERATED; + pf_mtag->flags |= PF_TAG_GENERATED; + + if (r->rtableid >= 0) + pf_mtag->rtableid = r->rtableid; #ifdef ALTQ if (r->qid) { - m->m_pkthdr.fw_flags |= ALTQ_MBUF_TAGGED; - m->m_pkthdr.altq_qid = r->qid; - m->m_pkthdr.ecn_af = af; - m->m_pkthdr.header = mtod(m0, struct ip *); + pf_mtag->qid = r->qid; + /* add hints for ecn */ + pf_mtag->af = af; + pf_mtag->hdr = mtod(m0, struct ip *); } -#endif +#endif /* ALTQ */ switch (af) { #ifdef INET @@ -1544,63 +1901,135 @@ pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) return (pf_match(op, a1, a2, g)); } -static int -pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_rule *nat_rule, - int *tag) +struct pf_mtag * +pf_find_mtag(struct mbuf *m) { - if (*tag == -1) { /* find mbuf tag */ - if (nat_rule != NULL && nat_rule->tag) - *tag = nat_rule->tag; - else if (m->m_pkthdr.fw_flags & PF_MBUF_TAGGED) - *tag = m->m_pkthdr.pf_tag; - else - *tag = 0; + struct m_tag *mtag; + + if ((mtag = m_tag_find(m, PF_MBUF_TAGGED, NULL)) == NULL) + return (NULL); + + return ((struct pf_mtag *)(mtag + 1)); +} + +struct pf_mtag * +pf_get_mtag(struct mbuf *m) +{ + struct m_tag *mtag; + + if ((mtag = m_tag_find(m, PF_MBUF_TAGGED, NULL)) == NULL) { + mtag = m_tag_get(PF_MBUF_TAGGED, sizeof(struct pf_mtag), + M_NOWAIT); + if (mtag == NULL) + return (NULL); + bzero(mtag + 1, sizeof(struct pf_mtag)); + m_tag_prepend(m, mtag); } + return ((struct pf_mtag *)(mtag + 1)); +} + +int +pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_mtag *pf_mtag, + int *tag) +{ + if (*tag == -1) + *tag = pf_mtag->tag; + return ((!r->match_tag_not && r->match_tag == *tag) || (r->match_tag_not && r->match_tag != *tag)); } -void -pf_tag_packet(struct mbuf *m, int tag) +int +pf_tag_packet(struct mbuf *m, struct pf_mtag *pf_mtag, int tag, int rtableid) { - if (tag <= 0) - return; + if (tag <= 0 && rtableid < 0) + return (0); + + if (pf_mtag == NULL) + if ((pf_mtag = pf_get_mtag(m)) == NULL) + return (1); + if (tag > 0) + pf_mtag->tag = tag; + if (rtableid >= 0) + pf_mtag->rtableid = rtableid; - m->m_pkthdr.fw_flags |= PF_MBUF_TAGGED; - m->m_pkthdr.pf_tag = tag; + return (0); } -#define PF_STEP_INTO_ANCHOR(r, a, s, n) \ - do { \ - if ((r) == NULL || (r)->anchor == NULL || \ - (s) != NULL || (a) != NULL) \ - panic("PF_STEP_INTO_ANCHOR"); \ - (a) = (r); \ - (s) = TAILQ_FIRST(&(r)->anchor->rulesets); \ - (r) = NULL; \ - while ((s) != NULL && ((r) = \ - TAILQ_FIRST((s)->rules[n].active.ptr)) == NULL) \ - (s) = TAILQ_NEXT((s), entries); \ - if ((r) == NULL) { \ - (r) = TAILQ_NEXT((a), entries); \ - (a) = NULL; \ - } \ - } while (0) +static void +pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n, + struct pf_rule **r, struct pf_rule **a, int *match) +{ + struct pf_anchor_stackframe *f; + + (*r)->anchor->match = 0; + if (match) + *match = 0; + if (*depth >= sizeof(pf_anchor_stack) / + sizeof(pf_anchor_stack[0])) { + kprintf("pf_step_into_anchor: stack overflow\n"); + *r = TAILQ_NEXT(*r, entries); + return; + } else if (*depth == 0 && a != NULL) + *a = *r; + f = pf_anchor_stack + (*depth)++; + f->rs = *rs; + f->r = *r; + if ((*r)->anchor_wildcard) { + f->parent = &(*r)->anchor->children; + if ((f->child = RB_MIN(pf_anchor_node, f->parent)) == + NULL) { + *r = NULL; + return; + } + *rs = &f->child->ruleset; + } else { + f->parent = NULL; + f->child = NULL; + *rs = &(*r)->anchor->ruleset; + } + *r = TAILQ_FIRST((*rs)->rules[n].active.ptr); +} -#define PF_STEP_OUT_OF_ANCHOR(r, a, s, n) \ - do { \ - if ((r) != NULL || (a) == NULL || (s) == NULL) \ - panic("PF_STEP_OUT_OF_ANCHOR"); \ - (s) = TAILQ_NEXT((s), entries); \ - while ((s) != NULL && ((r) = \ - TAILQ_FIRST((s)->rules[n].active.ptr)) == NULL) \ - (s) = TAILQ_NEXT((s), entries); \ - if ((r) == NULL) { \ - (r) = TAILQ_NEXT((a), entries); \ - (a) = NULL; \ - } \ - } while (0) +int +pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n, + struct pf_rule **r, struct pf_rule **a, int *match) +{ + struct pf_anchor_stackframe *f; + int quick = 0; + + do { + if (*depth <= 0) + break; + f = pf_anchor_stack + *depth - 1; + if (f->parent != NULL && f->child != NULL) { + if (f->child->match || + (match != NULL && *match)) { + f->r->anchor->match = 1; + *match = 0; + } + f->child = RB_NEXT(pf_anchor_node, f->parent, f->child); + if (f->child != NULL) { + *rs = &f->child->ruleset; + *r = TAILQ_FIRST((*rs)->rules[n].active.ptr); + if (*r == NULL) + continue; + else + break; + } + } + (*depth)--; + if (*depth == 0 && a != NULL) + *a = NULL; + *rs = f->rs; + if (f->r->anchor->match || (match != NULL && *match)) + quick = f->r->quick; + *r = TAILQ_NEXT(f->r, entries); + } while (*r == NULL); + + return (quick); +} #ifdef INET6 void @@ -1754,20 +2183,27 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, if (rpool->cur->addr.type == PF_ADDR_NOROUTE) return (1); if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { - if (af == AF_INET) { + switch (af) { +#ifdef INET + case AF_INET: if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 && (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) return (1); raddr = &rpool->cur->addr.p.dyn->pfid_addr4; rmask = &rpool->cur->addr.p.dyn->pfid_mask4; - } else { + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 && (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) return (1); raddr = &rpool->cur->addr.p.dyn->pfid_addr6; rmask = &rpool->cur->addr.p.dyn->pfid_mask6; + break; +#endif /* INET6 */ } } else if (rpool->cur->addr.type == PF_ADDR_TABLE) { if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) @@ -1789,25 +2225,29 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, switch (af) { #ifdef INET case AF_INET: - rpool->counter.addr32[0] = karc4random(); + rpool->counter.addr32[0] = htonl(karc4random()); break; #endif /* INET */ #ifdef INET6 case AF_INET6: if (rmask->addr32[3] != 0xffffffff) - rpool->counter.addr32[3] = karc4random(); + rpool->counter.addr32[3] = + htonl(karc4random()); else break; if (rmask->addr32[2] != 0xffffffff) - rpool->counter.addr32[2] = karc4random(); + rpool->counter.addr32[2] = + htonl(karc4random()); else break; if (rmask->addr32[1] != 0xffffffff) - rpool->counter.addr32[1] = karc4random(); + rpool->counter.addr32[1] = + htonl(karc4random()); else break; if (rmask->addr32[0] != 0xffffffff) - rpool->counter.addr32[0] = karc4random(); + rpool->counter.addr32[0] = + htonl(karc4random()); break; #endif /* INET6 */ } @@ -1868,6 +2308,8 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, get_addr: PF_ACPY(naddr, &rpool->counter, af); + if (init_addr != NULL && PF_AZERO(init_addr, af)) + PF_ACPY(init_addr, naddr, af); PF_AINC(&rpool->counter, af); break; } @@ -1890,7 +2332,7 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high, struct pf_src_node **sn) { - struct pf_state key; + struct pf_state_cmp key; struct pf_addr init_addr; u_int16_t cut; @@ -1898,6 +2340,11 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) return (1); + if (proto == IPPROTO_ICMP) { + low = 1; + high = 65535; + } + do { key.af = af; key.proto = proto; @@ -1909,8 +2356,9 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, * port search; start random, step; * similar 2 portloop in in_pcbbind */ - if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP)) { - key.gwy.port = 0; + if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP || + proto == IPPROTO_ICMP)) { + key.gwy.port = dport; if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) return (0); } else if (low == 0 && high == 0) { @@ -1932,7 +2380,7 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, high = tmp; } /* low < high */ - cut = karc4random() % (1 + high - low) + low; + cut = htonl(karc4random()) % (1 + high - low) + low; /* low <= cut <= high */ for (tmp = cut; tmp <= high; ++(tmp)) { key.gwy.port = htons(tmp); @@ -1974,8 +2422,11 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport, struct pf_addr *daddr, u_int16_t dport, int rs_num) { - struct pf_rule *r, *rm = NULL, *anchorrule = NULL; + struct pf_rule *r, *rm = NULL; struct pf_ruleset *ruleset = NULL; + int tag = -1; + int rtableid = -1; + int asd = 0; r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr); while (r && rm == NULL) { @@ -1992,8 +2443,7 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, } r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != direction) r = r->skip[PF_SKIP_DIR].ptr; @@ -2001,7 +2451,8 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, src->not)) + else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, + src->neg, kif)) r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR : PF_SKIP_DST_ADDR].ptr; else if (src->port_op && !pf_match_port(src->port_op, @@ -2009,28 +2460,38 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT : PF_SKIP_DST_PORT].ptr; else if (dst != NULL && - PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->not)) + PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; - else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, 0)) + else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, + 0, NULL)) r = TAILQ_NEXT(r, entries); else if (dst != NULL && dst->port_op && !pf_match_port(dst->port_op, dst->port[0], dst->port[1], dport)) r = r->skip[PF_SKIP_DST_PORT].ptr; + else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) + r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto != IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m, off, pd->hdr.tcp), r->os_fingerprint))) r = TAILQ_NEXT(r, entries); - else if (r->anchorname[0] && r->anchor == NULL) - r = TAILQ_NEXT(r, entries); - else if (r->anchor == NULL) + else { + if (r->tag) + tag = r->tag; + if (r->rtableid >= 0) + rtableid = r->rtableid; + if (r->anchor == NULL) { rm = r; - else - PF_STEP_INTO_ANCHOR(r, anchorrule, ruleset, rs_num); - if (r == NULL && anchorrule != NULL) - PF_STEP_OUT_OF_ANCHOR(r, anchorrule, ruleset, - rs_num); + } else + pf_step_into_anchor(&asd, &ruleset, rs_num, + &r, NULL, NULL); + } + if (r == NULL) + pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r, + NULL, NULL); } + if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) + return (NULL); if (rm != NULL && (rm->action == PF_NONAT || rm->action == PF_NORDR || rm->action == PF_NOBINAT)) return (NULL); @@ -2082,7 +2543,9 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, switch (direction) { case PF_OUT: if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){ - if (pd->af == AF_INET) { + switch (pd->af) { +#ifdef INET + case AF_INET: if (r->rpool.cur->addr.p.dyn-> pfid_acnt4 < 1) return (NULL); @@ -2092,7 +2555,10 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, &r->rpool.cur->addr.p.dyn-> pfid_mask4, saddr, AF_INET); - } else { + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: if (r->rpool.cur->addr.p.dyn-> pfid_acnt6 < 1) return (NULL); @@ -2102,6 +2568,8 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, &r->rpool.cur->addr.p.dyn-> pfid_mask6, saddr, AF_INET6); + break; +#endif /* INET6 */ } } else PF_POOLMASK(naddr, @@ -2110,8 +2578,10 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, saddr, pd->af); break; case PF_IN: - if (r->src.addr.type == PF_ADDR_DYNIFTL){ - if (pd->af == AF_INET) { + if (r->src.addr.type == PF_ADDR_DYNIFTL) { + switch (pd->af) { +#ifdef INET + case AF_INET: if (r->src.addr.p.dyn-> pfid_acnt4 < 1) return (NULL); @@ -2121,7 +2591,10 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, &r->src.addr.p.dyn-> pfid_mask4, daddr, AF_INET); - } else { + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: if (r->src.addr.p.dyn-> pfid_acnt6 < 1) return (NULL); @@ -2131,6 +2604,8 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, &r->src.addr.p.dyn-> pfid_mask6, daddr, AF_INET6); + break; +#endif /* INET6 */ } } else PF_POOLMASK(naddr, @@ -2141,9 +2616,13 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, } break; case PF_RDR: { - if (pf_map_addr(r->af, r, saddr, naddr, NULL, sn)) + if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn)) return (NULL); - + if ((r->rpool.opts & PF_POOL_TYPEMASK) == + PF_POOL_BITMASK) + PF_POOLMASK(naddr, naddr, + &r->rpool.cur->addr.v.a.mask, daddr, + pd->af); if (r->rpool.proxy_port[1]) { u_int32_t tmp_nport; @@ -2201,7 +2680,7 @@ in_pcblookup_hash_handler(struct netmsg *msg0) #endif /* SMP */ int -pf_socket_lookup(uid_t *uid, gid_t *gid, int direction, struct pf_pdesc *pd) +pf_socket_lookup(int direction, struct pf_pdesc *pd, struct inpcb *inp_arg) { struct pf_addr *saddr, *daddr; u_int16_t sport, dport; @@ -2212,8 +2691,11 @@ pf_socket_lookup(uid_t *uid, gid_t *gid, int direction, struct pf_pdesc *pd) #endif int pi_cpu = 0; - *uid = UID_MAX; - *gid = GID_MAX; + if (pd == NULL) + return (-1); + pd->lookup.uid = UID_MAX; + pd->lookup.gid = GID_MAX; + pd->lookup.pid = NO_PID; if (direction == PF_IN) { saddr = pd->src; daddr = pd->dst; @@ -2288,7 +2770,7 @@ pf_socket_lookup(uid_t *uid, gid_t *gid, int direction, struct pf_pdesc *pd) &daddr->v6, dport, INPLOOKUP_WILDCARD, NULL); if (inp == NULL) - return (0); + return (-1); break; } /* FALLTHROUGH if SMP and on other CPU */ @@ -2309,10 +2791,10 @@ pf_socket_lookup(uid_t *uid, gid_t *gid, int direction, struct pf_pdesc *pd) break; default: - return (0); + return (-1); } - *uid = inp->inp_socket->so_cred->cr_uid; - *gid = inp->inp_socket->so_cred->cr_groups[0]; + pd->lookup.uid = inp->inp_socket->so_cred->cr_uid; + pd->lookup.gid = inp->inp_socket->so_cred->cr_groups[0]; return (1); } @@ -2380,6 +2862,7 @@ pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) break; case TCPOPT_MAXSEG: bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2); + NTOHS(mss); /* FALLTHROUGH */ default: optlen = opt[1]; @@ -2474,22 +2957,38 @@ pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr) int pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, - struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm) + struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm, + struct ifqueue *ifq, struct inpcb *inp) { struct pf_rule *nr = NULL; struct pf_addr *saddr = pd->src, *daddr = pd->dst; struct tcphdr *th = pd->hdr.tcp; u_int16_t bport, nport = 0; sa_family_t af = pd->af; - int lookup = -1; - uid_t uid; - gid_t gid; struct pf_rule *r, *a = NULL; struct pf_ruleset *ruleset = NULL; struct pf_src_node *nsn = NULL; u_short reason; int rewrite = 0; - int tag = -1; + int tag = -1, rtableid = -1; + u_int16_t mss = tcp_mssdflt; + int asd = 0; + int match = 0; + + if (pf_check_congestion(ifq)) { + REASON_SET(&reason, PFRES_CONGEST); + return (PF_DROP); + } + + if (inp != NULL) + pd->lookup.done = pf_socket_lookup(direction, pd, inp); + else if (debug_pfugidhack) { + crit_exit(); + DPFPRINTF(PF_DEBUG_MISC, ("pf: unlocked lookup\n")); + pd->lookup.done = pf_socket_lookup(direction, pd, inp); + crit_enter(); + } + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); @@ -2525,8 +3024,7 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != direction) r = r->skip[PF_SKIP_DIR].ptr; @@ -2534,37 +3032,37 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != IPPROTO_TCP) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not)) + else if (PF_MISMATCHAW(&r->src.addr, saddr, af, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (r->src.port_op && !pf_match_port(r->src.port_op, r->src.port[0], r->src.port[1], th->th_sport)) r = r->skip[PF_SKIP_SRC_PORT].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not)) + else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->dst.port_op && !pf_match_port(r->dst.port_op, r->dst.port[0], r->dst.port[1], th->th_dport)) r = r->skip[PF_SKIP_DST_PORT].ptr; - else if (r->tos && !(r->tos & pd->tos)) + else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); else if (r->rule_flag & PFRULE_FRAGMENT) r = TAILQ_NEXT(r, entries); else if ((r->flagset & th->th_flags) != r->flags) r = TAILQ_NEXT(r, entries); - else if (r->uid.op && (lookup != -1 || (lookup = - pf_socket_lookup(&uid, &gid, direction, pd), 1)) && + else if (r->uid.op && (pd->lookup.done || (pd->lookup.done = + pf_socket_lookup(direction, pd, inp), 1)) && !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], - uid)) + pd->lookup.uid)) r = TAILQ_NEXT(r, entries); - else if (r->gid.op && (lookup != -1 || (lookup = - pf_socket_lookup(&uid, &gid, direction, pd), 1)) && + else if (r->gid.op && (pd->lookup.done || (pd->lookup.done = + pf_socket_lookup(direction, pd, inp), 1)) && !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], - gid)) + pd->lookup.gid)) r = TAILQ_NEXT(r, entries); else if (r->prob && r->prob <= karc4random()) r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, nr, &tag)) - r = TAILQ_NEXT(r, entries); - else if (r->anchorname[0] && r->anchor == NULL) + else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match( pf_osfp_fingerprint(pd, m, off, th), r->os_fingerprint)) @@ -2572,7 +3070,10 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, else { if (r->tag) tag = r->tag; + if (r->rtableid >= 0) + rtableid = r->rtableid; if (r->anchor == NULL) { + match = 1; *rm = r; *am = a; *rsm = ruleset; @@ -2580,12 +3081,12 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, break; r = TAILQ_NEXT(r, entries); } else - PF_STEP_INTO_ANCHOR(r, a, ruleset, - PF_RULESET_FILTER); + pf_step_into_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a, &match); } - if (r == NULL && a != NULL) - PF_STEP_OUT_OF_ANCHOR(r, a, ruleset, - PF_RULESET_FILTER); + if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a, &match)) + break; } r = *rm; a = *am; @@ -2593,10 +3094,11 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, REASON_SET(&reason, PFRES_MATCH); - if (r->log) { + if (r->log || (nr != NULL && nr->natpass && nr->log)) { if (rewrite) m_copyback(m, off, sizeof(*th), (caddr_t)th); - PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset); + PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, + a, ruleset, pd); } if ((r->action == PF_DROP) && @@ -2627,7 +3129,7 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, pf_send_tcp(r, af, pd->dst, pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, - r->return_ttl); + r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp); } else if ((af == AF_INET) && r->return_icmp) pf_send_icmp(m, r->return_icmp >> 8, r->return_icmp & 255, af, r); @@ -2636,10 +3138,14 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, r->return_icmp6 & 255, af, r); } - if (r->action == PF_DROP) + if (r->action == PF_DROP) { return (PF_DROP); + } - pf_tag_packet(m, tag); + if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) { + REASON_SET(&reason, PFRES_MEMORY); + return (PF_DROP); + } if (r->keep_state || nr != NULL || (pd->flags & PFDESC_TCP_NORM)) { @@ -2651,21 +3157,29 @@ pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction, len = pd->tot_len - off - (th->th_off << 2); /* check maximums */ - if (r->max_states && (r->states >= r->max_states)) + if (r->max_states && (r->states >= r->max_states)) { + pf_status.lcounters[LCNT_STATES]++; + REASON_SET(&reason, PFRES_MAXSTATES); goto cleanup; - /* src node for flter rule */ + } + /* src node for filter rule */ if ((r->rule_flag & PFRULE_SRCTRACK || r->rpool.opts & PF_POOL_STICKYADDR) && - pf_insert_src_node(&sn, r, saddr, af) != 0) + pf_insert_src_node(&sn, r, saddr, af) != 0) { + REASON_SET(&reason, PFRES_SRCLIMIT); goto cleanup; + } /* src node for translation rule */ if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && ((direction == PF_OUT && pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) || - (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) + (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) { + REASON_SET(&reason, PFRES_SRCLIMIT); goto cleanup; + } s = pool_get(&pf_state_pl, PR_NOWAIT); if (s == NULL) { + REASON_SET(&reason, PFRES_MEMORY); cleanup: if (sn != NULL && sn->states == 0 && sn->expire == 0) { RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); @@ -2680,20 +3194,17 @@ cleanup: pf_status.src_nodes--; pool_put(&pf_src_tree_pl, nsn); } - REASON_SET(&reason, PFRES_MEMORY); return (PF_DROP); } bzero(s, sizeof(*s)); - r->states++; - if (a != NULL) - a->states++; s->rule.ptr = r; s->nat_rule.ptr = nr; - if (s->nat_rule.ptr != NULL) - s->nat_rule.ptr->states++; s->anchor.ptr = a; + STATE_INC_COUNTERS(s); s->allow_opts = r->allow_opts; - s->log = r->log & 2; + s->log = r->log & PF_LOG_ALL; + if (nr != NULL) + s->log |= nr->log & PF_LOG_ALL; s->proto = IPPROTO_TCP; s->direction = direction; s->af = af; @@ -2731,30 +3242,14 @@ cleanup: if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && r->keep_state == PF_STATE_MODULATE) { /* Generate sequence number modulator */ - while ((s->src.seqdiff = karc4random()) == 0) + while ((s->src.seqdiff = + pf_new_isn(s) - s->src.seqlo) == 0) ; pf_change_a(&th->th_seq, &th->th_sum, htonl(s->src.seqlo + s->src.seqdiff), 0); rewrite = 1; } else s->src.seqdiff = 0; - - /* - * WARNING! NetBSD patched this to not scale max_win up - * on the initial SYN, but they failed to correct the code - * in pf_test_state_tcp() that 'undid' the scaling, and they - * failed to remove the scale factor on successful window - * scale negotiation (and doing so would be difficult in the - * face of retransmission, without adding more flags to the - * state structure). - * - * After discussions with Daniel Hartmeier and Max Laier - * I've decided not to apply the NetBSD patch. - * - * The worst that happens is that the undo code on window - * scale negotiation failures will produce a larger - * max_win then actual. - */ if (th->th_flags & TH_SYN) { s->src.seqhi++; s->src.wscale = pf_get_wscale(m, off, th->th_off, af); @@ -2791,29 +3286,37 @@ cleanup: off, pd, th, &s->src, &s->dst)) { REASON_SET(&reason, PFRES_MEMORY); pf_src_tree_remove_state(s); + STATE_DEC_COUNTERS(s); pool_put(&pf_state_pl, s); return (PF_DROP); } if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub && - pf_normalize_tcp_stateful(m, off, pd, &reason, th, &s->src, - &s->dst, &rewrite)) { + pf_normalize_tcp_stateful(m, off, pd, &reason, th, s, + &s->src, &s->dst, &rewrite)) { + /* This really shouldn't happen!!! */ + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_normalize_tcp_stateful failed on first pkt")); pf_normalize_tcp_cleanup(s); pf_src_tree_remove_state(s); + STATE_DEC_COUNTERS(s); pool_put(&pf_state_pl, s); return (PF_DROP); } if (pf_insert_state(BOUND_IFACE(r, kif), s)) { pf_normalize_tcp_cleanup(s); - REASON_SET(&reason, PFRES_MEMORY); + REASON_SET(&reason, PFRES_STATEINS); pf_src_tree_remove_state(s); + STATE_DEC_COUNTERS(s); pool_put(&pf_state_pl, s); return (PF_DROP); } else *sm = s; + if (tag > 0) { + pf_tag_ref(tag); + s->tag = tag; + } if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && r->keep_state == PF_STATE_SYNPROXY) { - u_int16_t mss; - s->src.state = PF_TCPS_PROXY_SRC; if (nr != NULL) { if (direction == PF_OUT) { @@ -2826,7 +3329,7 @@ cleanup: bport, 0, af); } } - s->src.seqhi = karc4random(); + s->src.seqhi = htonl(karc4random()); /* Find mss option */ mss = pf_get_mss(m, off, th->th_off, af); mss = pf_calc_mss(saddr, af, mss); @@ -2834,7 +3337,8 @@ cleanup: s->src.mss = mss; pf_send_tcp(r, af, daddr, saddr, th->th_dport, th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, - TH_SYN|TH_ACK, 0, s->src.mss, 0); + TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL); + REASON_SET(&reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } } @@ -2849,24 +3353,29 @@ cleanup: int pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, - struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm) + struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm, + struct ifqueue *ifq, struct inpcb *inp) { struct pf_rule *nr = NULL; struct pf_addr *saddr = pd->src, *daddr = pd->dst; struct udphdr *uh = pd->hdr.udp; u_int16_t bport, nport = 0; sa_family_t af = pd->af; - int lookup = -1; - uid_t uid; - gid_t gid; struct pf_rule *r, *a = NULL; struct pf_ruleset *ruleset = NULL; struct pf_src_node *nsn = NULL; u_short reason; int rewrite = 0; - int tag = -1; - - r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); + int tag = -1, rtableid = -1; + int asd = 0; + int match = 0; + + if (pf_check_congestion(ifq)) { + REASON_SET(&reason, PFRES_CONGEST); + return (PF_DROP); + } + + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); if (direction == PF_OUT) { bport = nport = uh->uh_sport; @@ -2900,8 +3409,7 @@ pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != direction) r = r->skip[PF_SKIP_DIR].ptr; @@ -2909,42 +3417,45 @@ pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != IPPROTO_UDP) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not)) + else if (PF_MISMATCHAW(&r->src.addr, saddr, af, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (r->src.port_op && !pf_match_port(r->src.port_op, r->src.port[0], r->src.port[1], uh->uh_sport)) r = r->skip[PF_SKIP_SRC_PORT].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not)) + else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->dst.port_op && !pf_match_port(r->dst.port_op, r->dst.port[0], r->dst.port[1], uh->uh_dport)) r = r->skip[PF_SKIP_DST_PORT].ptr; - else if (r->tos && !(r->tos & pd->tos)) + else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); else if (r->rule_flag & PFRULE_FRAGMENT) r = TAILQ_NEXT(r, entries); - else if (r->uid.op && (lookup != -1 || (lookup = - pf_socket_lookup(&uid, &gid, direction, pd), 1)) && + else if (r->uid.op && (pd->lookup.done || (pd->lookup.done = + pf_socket_lookup(direction, pd, inp), 1)) && !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], - uid)) + pd->lookup.uid)) r = TAILQ_NEXT(r, entries); - else if (r->gid.op && (lookup != -1 || (lookup = - pf_socket_lookup(&uid, &gid, direction, pd), 1)) && + else if (r->gid.op && (pd->lookup.done || (pd->lookup.done = + pf_socket_lookup(direction, pd, inp), 1)) && !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], - gid)) + pd->lookup.gid)) r = TAILQ_NEXT(r, entries); else if (r->prob && r->prob <= karc4random()) r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, nr, &tag)) - r = TAILQ_NEXT(r, entries); - else if (r->anchorname[0] && r->anchor == NULL) + else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY) r = TAILQ_NEXT(r, entries); else { if (r->tag) tag = r->tag; + if (r->rtableid >= 0) + rtableid = r->rtableid; if (r->anchor == NULL) { + match = 1; *rm = r; *am = a; *rsm = ruleset; @@ -2952,12 +3463,12 @@ pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, break; r = TAILQ_NEXT(r, entries); } else - PF_STEP_INTO_ANCHOR(r, a, ruleset, - PF_RULESET_FILTER); + pf_step_into_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a, &match); } - if (r == NULL && a != NULL) - PF_STEP_OUT_OF_ANCHOR(r, a, ruleset, - PF_RULESET_FILTER); + if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a, &match)) + break; } r = *rm; a = *am; @@ -2965,10 +3476,11 @@ pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, REASON_SET(&reason, PFRES_MATCH); - if (r->log) { + if (r->log || (nr != NULL && nr->natpass && nr->log)) { if (rewrite) m_copyback(m, off, sizeof(*uh), (caddr_t)uh); - PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset); + PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, + a, ruleset, pd); } if ((r->action == PF_DROP) && @@ -2997,7 +3509,10 @@ pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, if (r->action == PF_DROP) return (PF_DROP); - pf_tag_packet(m, tag); + if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) { + REASON_SET(&reason, PFRES_MEMORY); + return (PF_DROP); + } if (r->keep_state || nr != NULL) { /* create new state */ @@ -3005,21 +3520,29 @@ pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction, struct pf_src_node *sn = NULL; /* check maximums */ - if (r->max_states && (r->states >= r->max_states)) + if (r->max_states && (r->states >= r->max_states)) { + pf_status.lcounters[LCNT_STATES]++; + REASON_SET(&reason, PFRES_MAXSTATES); goto cleanup; - /* src node for flter rule */ + } + /* src node for filter rule */ if ((r->rule_flag & PFRULE_SRCTRACK || r->rpool.opts & PF_POOL_STICKYADDR) && - pf_insert_src_node(&sn, r, saddr, af) != 0) + pf_insert_src_node(&sn, r, saddr, af) != 0) { + REASON_SET(&reason, PFRES_SRCLIMIT); goto cleanup; + } /* src node for translation rule */ if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && ((direction == PF_OUT && pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) || - (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) + (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) { + REASON_SET(&reason, PFRES_SRCLIMIT); goto cleanup; + } s = pool_get(&pf_state_pl, PR_NOWAIT); if (s == NULL) { + REASON_SET(&reason, PFRES_MEMORY); cleanup: if (sn != NULL && sn->states == 0 && sn->expire == 0) { RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); @@ -3034,20 +3557,17 @@ cleanup: pf_status.src_nodes--; pool_put(&pf_src_tree_pl, nsn); } - REASON_SET(&reason, PFRES_MEMORY); return (PF_DROP); } bzero(s, sizeof(*s)); - r->states++; - if (a != NULL) - a->states++; s->rule.ptr = r; s->nat_rule.ptr = nr; - if (s->nat_rule.ptr != NULL) - s->nat_rule.ptr->states++; s->anchor.ptr = a; + STATE_INC_COUNTERS(s); s->allow_opts = r->allow_opts; - s->log = r->log & 2; + s->log = r->log & PF_LOG_ALL; + if (nr != NULL) + s->log |= nr->log & PF_LOG_ALL; s->proto = IPPROTO_UDP; s->direction = direction; s->af = af; @@ -3093,12 +3613,17 @@ cleanup: s->nat_src_node->states++; } if (pf_insert_state(BOUND_IFACE(r, kif), s)) { - REASON_SET(&reason, PFRES_MEMORY); + REASON_SET(&reason, PFRES_STATEINS); pf_src_tree_remove_state(s); + STATE_DEC_COUNTERS(s); pool_put(&pf_state_pl, s); return (PF_DROP); } else *sm = s; + if (tag > 0) { + pf_tag_ref(tag); + s->tag = tag; + } } /* copy back packet headers if we performed NAT operations */ @@ -3111,7 +3636,8 @@ cleanup: int pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, - struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm) + struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm, + struct ifqueue *ifq) { struct pf_rule *nr = NULL; struct pf_addr *saddr = pd->src, *daddr = pd->dst; @@ -3119,14 +3645,21 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, struct pf_ruleset *ruleset = NULL; struct pf_src_node *nsn = NULL; u_short reason; - u_int16_t icmpid = 0; + u_int16_t icmpid = 0, bport, nport = 0; sa_family_t af = pd->af; u_int8_t icmptype = 0, icmpcode = 0; int state_icmp = 0; - int tag = -1; + int tag = -1, rtableid = -1; #ifdef INET6 int rewrite = 0; #endif /* INET6 */ + int asd = 0; + int match = 0; + + if (pf_check_congestion(ifq)) { + REASON_SET(&reason, PFRES_CONGEST); + return (PF_DROP); + } switch (pd->proto) { #ifdef INET @@ -3161,15 +3694,21 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); if (direction == PF_OUT) { + bport = nport = icmpid; /* check outgoing packet for BINAT/NAT */ if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn, - saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) { + saddr, icmpid, daddr, icmpid, &pd->naddr, &nport)) != + NULL) { PF_ACPY(&pd->baddr, saddr, af); switch (af) { #ifdef INET case AF_INET: pf_change_a(&saddr->v4.s_addr, pd->ip_sum, pd->naddr.v4.s_addr, 0); + pd->hdr.icmp->icmp_cksum = pf_cksum_fixup( + pd->hdr.icmp->icmp_cksum, icmpid, nport, 0); + pd->hdr.icmp->icmp_id = nport; + m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp); break; #endif /* INET */ #ifdef INET6 @@ -3185,9 +3724,11 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, pd->nat_rule = nr; } } else { + bport = nport = icmpid; /* check incoming packet for BINAT/RDR */ if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn, - saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) { + saddr, icmpid, daddr, icmpid, &pd->naddr, &nport)) != + NULL) { PF_ACPY(&pd->baddr, daddr, af); switch (af) { #ifdef INET @@ -3212,8 +3753,7 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != direction) r = r->skip[PF_SKIP_DIR].ptr; @@ -3221,30 +3761,33 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not)) + else if (PF_MISMATCHAW(&r->src.addr, saddr, af, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not)) + else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->type && r->type != icmptype + 1) r = TAILQ_NEXT(r, entries); else if (r->code && r->code != icmpcode + 1) r = TAILQ_NEXT(r, entries); - else if (r->tos && !(r->tos & pd->tos)) + else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); else if (r->rule_flag & PFRULE_FRAGMENT) r = TAILQ_NEXT(r, entries); else if (r->prob && r->prob <= karc4random()) r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, nr, &tag)) - r = TAILQ_NEXT(r, entries); - else if (r->anchorname[0] && r->anchor == NULL) + else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY) r = TAILQ_NEXT(r, entries); else { if (r->tag) tag = r->tag; + if (r->rtableid >= 0) + rtableid = r->rtableid; if (r->anchor == NULL) { + match = 1; *rm = r; *am = a; *rsm = ruleset; @@ -3252,12 +3795,12 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, break; r = TAILQ_NEXT(r, entries); } else - PF_STEP_INTO_ANCHOR(r, a, ruleset, - PF_RULESET_FILTER); + pf_step_into_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a, &match); } - if (r == NULL && a != NULL) - PF_STEP_OUT_OF_ANCHOR(r, a, ruleset, - PF_RULESET_FILTER); + if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a, &match)) + break; } r = *rm; a = *am; @@ -3265,19 +3808,23 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, REASON_SET(&reason, PFRES_MATCH); - if (r->log) { + if (r->log || (nr != NULL && nr->natpass && nr->log)) { #ifdef INET6 if (rewrite) m_copyback(m, off, sizeof(struct icmp6_hdr), (caddr_t)pd->hdr.icmp6); #endif /* INET6 */ - PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset); + PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, + a, ruleset, pd); } if (r->action != PF_PASS) return (PF_DROP); - pf_tag_packet(m, tag); + if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) { + REASON_SET(&reason, PFRES_MEMORY); + return (PF_DROP); + } if (!state_icmp && (r->keep_state || nr != NULL)) { /* create new state */ @@ -3285,21 +3832,29 @@ pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction, struct pf_src_node *sn = NULL; /* check maximums */ - if (r->max_states && (r->states >= r->max_states)) + if (r->max_states && (r->states >= r->max_states)) { + pf_status.lcounters[LCNT_STATES]++; + REASON_SET(&reason, PFRES_MAXSTATES); goto cleanup; - /* src node for flter rule */ + } + /* src node for filter rule */ if ((r->rule_flag & PFRULE_SRCTRACK || r->rpool.opts & PF_POOL_STICKYADDR) && - pf_insert_src_node(&sn, r, saddr, af) != 0) + pf_insert_src_node(&sn, r, saddr, af) != 0) { + REASON_SET(&reason, PFRES_SRCLIMIT); goto cleanup; + } /* src node for translation rule */ if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && ((direction == PF_OUT && pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) || - (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) + (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) { + REASON_SET(&reason, PFRES_SRCLIMIT); goto cleanup; + } s = pool_get(&pf_state_pl, PR_NOWAIT); if (s == NULL) { + REASON_SET(&reason, PFRES_MEMORY); cleanup: if (sn != NULL && sn->states == 0 && sn->expire == 0) { RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); @@ -3314,43 +3869,44 @@ cleanup: pf_status.src_nodes--; pool_put(&pf_src_tree_pl, nsn); } - REASON_SET(&reason, PFRES_MEMORY); return (PF_DROP); } bzero(s, sizeof(*s)); - r->states++; - if (a != NULL) - a->states++; s->rule.ptr = r; s->nat_rule.ptr = nr; - if (s->nat_rule.ptr != NULL) - s->nat_rule.ptr->states++; s->anchor.ptr = a; + STATE_INC_COUNTERS(s); s->allow_opts = r->allow_opts; - s->log = r->log & 2; + s->log = r->log & PF_LOG_ALL; + if (nr != NULL) + s->log |= nr->log & PF_LOG_ALL; s->proto = pd->proto; s->direction = direction; s->af = af; if (direction == PF_OUT) { PF_ACPY(&s->gwy.addr, saddr, af); - s->gwy.port = icmpid; + s->gwy.port = nport; PF_ACPY(&s->ext.addr, daddr, af); - s->ext.port = icmpid; - if (nr != NULL) + s->ext.port = 0; + if (nr != NULL) { PF_ACPY(&s->lan.addr, &pd->baddr, af); - else + s->lan.port = bport; + } else { PF_ACPY(&s->lan.addr, &s->gwy.addr, af); - s->lan.port = icmpid; + s->lan.port = s->gwy.port; + } } else { PF_ACPY(&s->lan.addr, daddr, af); - s->lan.port = icmpid; + s->lan.port = nport; PF_ACPY(&s->ext.addr, saddr, af); - s->ext.port = icmpid; - if (nr != NULL) + s->ext.port = 0; + if (nr != NULL) { PF_ACPY(&s->gwy.addr, &pd->baddr, af); - else + s->gwy.port = bport; + } else { PF_ACPY(&s->gwy.addr, &s->lan.addr, af); - s->gwy.port = icmpid; + s->gwy.port = s->lan.port; + } } s->hash = pf_state_hash(s); s->creation = time_second; @@ -3367,12 +3923,17 @@ cleanup: s->nat_src_node->states++; } if (pf_insert_state(BOUND_IFACE(r, kif), s)) { - REASON_SET(&reason, PFRES_MEMORY); + REASON_SET(&reason, PFRES_STATEINS); pf_src_tree_remove_state(s); + STATE_DEC_COUNTERS(s); pool_put(&pf_state_pl, s); return (PF_DROP); } else *sm = s; + if (tag > 0) { + pf_tag_ref(tag); + s->tag = tag; + } } #ifdef INET6 @@ -3388,7 +3949,7 @@ cleanup: int pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd, - struct pf_rule **am, struct pf_ruleset **rsm) + struct pf_rule **am, struct pf_ruleset **rsm, struct ifqueue *ifq) { struct pf_rule *nr = NULL; struct pf_rule *r, *a = NULL; @@ -3397,7 +3958,14 @@ pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, struct pf_addr *saddr = pd->src, *daddr = pd->dst; sa_family_t af = pd->af; u_short reason; - int tag = -1; + int tag = -1, rtableid = -1; + int asd = 0; + int match = 0; + + if (pf_check_congestion(ifq)) { + REASON_SET(&reason, PFRES_CONGEST); + return (PF_DROP); + } r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); @@ -3449,8 +4017,7 @@ pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != direction) r = r->skip[PF_SKIP_DIR].ptr; @@ -3458,26 +4025,29 @@ pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.not)) + else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.not)) + else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; - else if (r->tos && !(r->tos & pd->tos)) + else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); else if (r->rule_flag & PFRULE_FRAGMENT) r = TAILQ_NEXT(r, entries); else if (r->prob && r->prob <= karc4random()) r = TAILQ_NEXT(r, entries); - else if (r->match_tag && !pf_match_tag(m, r, nr, &tag)) - r = TAILQ_NEXT(r, entries); - else if (r->anchorname[0] && r->anchor == NULL) + else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY) r = TAILQ_NEXT(r, entries); else { if (r->tag) tag = r->tag; + if (r->rtableid >= 0) + rtableid = r->rtableid; if (r->anchor == NULL) { + match = 1; *rm = r; *am = a; *rsm = ruleset; @@ -3485,12 +4055,12 @@ pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, break; r = TAILQ_NEXT(r, entries); } else - PF_STEP_INTO_ANCHOR(r, a, ruleset, - PF_RULESET_FILTER); + pf_step_into_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a, &match); } - if (r == NULL && a != NULL) - PF_STEP_OUT_OF_ANCHOR(r, a, ruleset, - PF_RULESET_FILTER); + if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a, &match)) + break; } r = *rm; a = *am; @@ -3498,8 +4068,9 @@ pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, REASON_SET(&reason, PFRES_MATCH); - if (r->log) - PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset); + if (r->log || (nr != NULL && nr->natpass && nr->log)) + PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, + a, ruleset, pd); if ((r->action == PF_DROP) && ((r->rule_flag & PFRULE_RETURNICMP) || @@ -3538,7 +4109,10 @@ pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, if (r->action != PF_PASS) return (PF_DROP); - pf_tag_packet(m, tag); + if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) { + REASON_SET(&reason, PFRES_MEMORY); + return (PF_DROP); + } if (r->keep_state || nr != NULL) { /* create new state */ @@ -3546,21 +4120,29 @@ pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction, struct pf_src_node *sn = NULL; /* check maximums */ - if (r->max_states && (r->states >= r->max_states)) + if (r->max_states && (r->states >= r->max_states)) { + pf_status.lcounters[LCNT_STATES]++; + REASON_SET(&reason, PFRES_MAXSTATES); goto cleanup; - /* src node for flter rule */ + } + /* src node for filter rule */ if ((r->rule_flag & PFRULE_SRCTRACK || r->rpool.opts & PF_POOL_STICKYADDR) && - pf_insert_src_node(&sn, r, saddr, af) != 0) + pf_insert_src_node(&sn, r, saddr, af) != 0) { + REASON_SET(&reason, PFRES_SRCLIMIT); goto cleanup; + } /* src node for translation rule */ if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && ((direction == PF_OUT && pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) || - (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) + (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) { + REASON_SET(&reason, PFRES_SRCLIMIT); goto cleanup; + } s = pool_get(&pf_state_pl, PR_NOWAIT); if (s == NULL) { + REASON_SET(&reason, PFRES_MEMORY); cleanup: if (sn != NULL && sn->states == 0 && sn->expire == 0) { RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); @@ -3575,20 +4157,17 @@ cleanup: pf_status.src_nodes--; pool_put(&pf_src_tree_pl, nsn); } - REASON_SET(&reason, PFRES_MEMORY); return (PF_DROP); } bzero(s, sizeof(*s)); - r->states++; - if (a != NULL) - a->states++; s->rule.ptr = r; s->nat_rule.ptr = nr; - if (s->nat_rule.ptr != NULL) - s->nat_rule.ptr->states++; s->anchor.ptr = a; + STATE_INC_COUNTERS(s); s->allow_opts = r->allow_opts; - s->log = r->log & 2; + s->log = r->log & PF_LOG_ALL; + if (nr != NULL) + s->log |= nr->log & PF_LOG_ALL; s->proto = pd->proto; s->direction = direction; s->af = af; @@ -3624,12 +4203,17 @@ cleanup: s->nat_src_node->states++; } if (pf_insert_state(BOUND_IFACE(r, kif), s)) { - REASON_SET(&reason, PFRES_MEMORY); + REASON_SET(&reason, PFRES_STATEINS); pf_src_tree_remove_state(s); + STATE_DEC_COUNTERS(s); pool_put(&pf_state_pl, s); return (PF_DROP); } else *sm = s; + if (tag > 0) { + pf_tag_ref(tag); + s->tag = tag; + } } return (PF_PASS); @@ -3645,12 +4229,13 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, sa_family_t af = pd->af; u_short reason; int tag = -1; + int asd = 0; + int match = 0; r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != direction) r = r->skip[PF_SKIP_DIR].ptr; @@ -3658,11 +4243,13 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.not)) + else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.not)) + else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; - else if (r->tos && !(r->tos & pd->tos)) + else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY) r = TAILQ_NEXT(r, entries); @@ -3680,10 +4267,9 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, r = TAILQ_NEXT(r, entries); else if (r->match_tag && !pf_match_tag(m, r, NULL, &tag)) r = TAILQ_NEXT(r, entries); - else if (r->anchorname[0] && r->anchor == NULL) - r = TAILQ_NEXT(r, entries); else { if (r->anchor == NULL) { + match = 1; *rm = r; *am = a; *rsm = ruleset; @@ -3691,12 +4277,12 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, break; r = TAILQ_NEXT(r, entries); } else - PF_STEP_INTO_ANCHOR(r, a, ruleset, - PF_RULESET_FILTER); + pf_step_into_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a, &match); } - if (r == NULL && a != NULL) - PF_STEP_OUT_OF_ANCHOR(r, a, ruleset, - PF_RULESET_FILTER); + if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a, &match)) + break; } r = *rm; a = *am; @@ -3705,12 +4291,16 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, REASON_SET(&reason, PFRES_MATCH); if (r->log) - PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset); + PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset, + pd); if (r->action != PF_PASS) return (PF_DROP); - pf_tag_packet(m, tag); + if (pf_tag_packet(m, pd->pf_mtag, tag, -1)) { + REASON_SET(&reason, PFRES_MEMORY); + return (PF_DROP); + } return (PF_PASS); } @@ -3720,10 +4310,10 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason) { - struct pf_state key; + struct pf_state_cmp key; struct tcphdr *th = pd->hdr.tcp; u_int16_t win = ntohs(th->th_win); - u_int32_t ack, end, seq; + u_int32_t ack, end, seq, orig_seq; u_int8_t sws, dws; int ackskew; int copyback = 0; @@ -3754,21 +4344,32 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, } if ((*state)->src.state == PF_TCPS_PROXY_SRC) { - if (direction != (*state)->direction) + if (direction != (*state)->direction) { + REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); + } if (th->th_flags & TH_SYN) { - if (ntohl(th->th_seq) != (*state)->src.seqlo) + if (ntohl(th->th_seq) != (*state)->src.seqlo) { + REASON_SET(reason, PFRES_SYNPROXY); return (PF_DROP); + } pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, pd->src, th->th_dport, th->th_sport, (*state)->src.seqhi, ntohl(th->th_seq) + 1, - TH_SYN|TH_ACK, 0, (*state)->src.mss, 0); + TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, + 0, NULL, NULL); + REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } else if (!(th->th_flags & TH_ACK) || (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || - (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) + (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { + REASON_SET(reason, PFRES_SYNPROXY); return (PF_DROP); - else + } else if ((*state)->src_node != NULL && + pf_src_connlimit(state)) { + REASON_SET(reason, PFRES_SRCLIMIT); + return (PF_DROP); + } else (*state)->src.state = PF_TCPS_PROXY_DST; } if ((*state)->src.state == PF_TCPS_PROXY_DST) { @@ -3784,31 +4385,37 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, if (direction == (*state)->direction) { if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || - (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) + (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { + REASON_SET(reason, PFRES_SYNPROXY); return (PF_DROP); + } (*state)->src.max_win = MAX(ntohs(th->th_win), 1); if ((*state)->dst.seqhi == 1) - (*state)->dst.seqhi = karc4random(); + (*state)->dst.seqhi = htonl(karc4random()); pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr, &dst->addr, src->port, dst->port, (*state)->dst.seqhi, 0, TH_SYN, 0, - (*state)->src.mss, 0); + (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL); + REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } else if (((th->th_flags & (TH_SYN|TH_ACK)) != (TH_SYN|TH_ACK)) || - (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) + (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) { + REASON_SET(reason, PFRES_SYNPROXY); return (PF_DROP); - else { + } else { (*state)->dst.max_win = MAX(ntohs(th->th_win), 1); (*state)->dst.seqlo = ntohl(th->th_seq); pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), ntohl(th->th_seq) + 1, - TH_ACK, (*state)->src.max_win, 0, 0); + TH_ACK, (*state)->src.max_win, 0, 0, 0, + (*state)->tag, NULL, NULL); pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr, &dst->addr, src->port, dst->port, (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, - TH_ACK, (*state)->dst.max_win, 0, 0); + TH_ACK, (*state)->dst.max_win, 0, 0, 1, + 0, NULL, NULL); (*state)->src.seqdiff = (*state)->dst.seqhi - (*state)->src.seqlo; (*state)->dst.seqdiff = (*state)->src.seqhi - @@ -3820,6 +4427,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, (*state)->src.wscale = (*state)->dst.wscale = 0; (*state)->src.state = (*state)->dst.state = TCPS_ESTABLISHED; + REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } } @@ -3836,12 +4444,10 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, * tcp_filtering.ps */ - seq = ntohl(th->th_seq); + orig_seq = seq = ntohl(th->th_seq); if (src->seqlo == 0) { - /* - * First packet from this end. The other end has already set - * the seqlo field. Set its state. - */ + /* First packet from this end. Set its state */ + if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) && src->scrub == NULL) { if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) { @@ -3852,7 +4458,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, /* Deferred generation of sequence number modulator */ if (dst->seqdiff && !src->seqdiff) { - while ((src->seqdiff = karc4random()) == 0) + while ((src->seqdiff = pf_new_isn(*state) - seq) == 0) ; ack = ntohl(th->th_ack) - dst->seqdiff; pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + @@ -3878,14 +4484,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, >> sws; dws = dst->wscale & PF_WSCALE_MASK; } else { - /* - * Fixup other window. Undo the - * normalization that was done on - * the initial SYN. This can result - * in max_win being larger then - * actual but we don't really have - * much of a choice. - */ + /* fixup other window */ dst->max_win <<= dst->wscale & PF_WSCALE_MASK; /* in case of a retrans SYN|ACK */ @@ -3949,16 +4548,43 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, ackskew = dst->seqlo - ack; -#define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ + /* + * Need to demodulate the sequence numbers in any TCP SACK options + * (Selective ACK). We could optionally validate the SACK values + * against the current ACK window, either forwards or backwards, but + * I'm not confident that SACK has been implemented properly + * everywhere. It wouldn't surprise me if several stacks accidently + * SACK too far backwards of previously ACKed data. There really aren't + * any security implications of bad SACKing unless the target stack + * doesn't validate the option length correctly. Someone trying to + * spoof into a TCP connection won't bother blindly sending SACK + * options anyway. + */ + if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) { + if (pf_modulate_sack(m, off, pd, th, dst)) + copyback = 1; + } + + +#define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ if (SEQ_GEQ(src->seqhi, end) && /* Last octet inside other's window space */ SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && /* Retrans: not more than one window back */ (ackskew >= -MAXACKWINDOW) && /* Acking not more than one reassembled fragment backwards */ - (ackskew <= (MAXACKWINDOW << sws))) { + (ackskew <= (MAXACKWINDOW << sws)) && /* Acking not more than one window forward */ + ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || + (orig_seq == src->seqlo + 1) || (pd->flags & PFDESC_IP_REAS) == 0)) { + /* Require an exact/+1 sequence match on resets when possible */ + + if (dst->scrub || src->scrub) { + if (pf_normalize_tcp_stateful(m, off, pd, reason, th, + *state, src, dst, ©back)) + return (PF_DROP); + } /* update max window */ if (src->max_win < win) @@ -3979,9 +4605,15 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, if (src->state < TCPS_CLOSING) src->state = TCPS_CLOSING; if (th->th_flags & TH_ACK) { - if (dst->state == TCPS_SYN_SENT) + if (dst->state == TCPS_SYN_SENT) { dst->state = TCPS_ESTABLISHED; - else if (dst->state == TCPS_CLOSING) + if (src->state == TCPS_ESTABLISHED && + (*state)->src_node != NULL && + pf_src_connlimit(state)) { + REASON_SET(reason, PFRES_SRCLIMIT); + return (PF_DROP); + } + } else if (dst->state == TCPS_CLOSING) dst->state = TCPS_FIN_WAIT_2; } if (th->th_flags & TH_RST) @@ -3992,8 +4624,8 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, if (src->state >= TCPS_FIN_WAIT_2 && dst->state >= TCPS_FIN_WAIT_2) (*state)->timeout = PFTM_TCP_CLOSED; - else if (src->state >= TCPS_FIN_WAIT_2 || - dst->state >= TCPS_FIN_WAIT_2) + else if (src->state >= TCPS_CLOSING && + dst->state >= TCPS_CLOSING) (*state)->timeout = PFTM_TCP_FIN_WAIT; else if (src->state < TCPS_ESTABLISHED || dst->state < TCPS_ESTABLISHED) @@ -4039,9 +4671,16 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, kprintf("pf: loose state match: "); pf_print_state(*state); pf_print_flags(th->th_flags); - kprintf(" seq=%u ack=%u len=%u ackskew=%d pkts=%d:%d\n", - seq, ack, pd->p_len, ackskew, - (*state)->packets[0], (*state)->packets[1]); + kprintf(" seq=%u (%u) ack=%u len=%u ackskew=%d " + "pkts=%llu:%llu\n", seq, orig_seq, ack, pd->p_len, + ackskew, (unsigned long long)(*state)->packets[0], + (unsigned long long)(*state)->packets[1]); + } + + if (dst->scrub || src->scrub) { + if (pf_normalize_tcp_stateful(m, off, pd, reason, th, + *state, src, dst, ©back)) + return (PF_DROP); } /* update max window */ @@ -4089,19 +4728,13 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, if ((*state)->dst.state == TCPS_SYN_SENT && (*state)->src.state == TCPS_SYN_SENT) { /* Send RST for state mismatches during handshake */ - if (!(th->th_flags & TH_RST)) { - u_int32_t ack = ntohl(th->th_seq) + pd->p_len; - - if (th->th_flags & TH_SYN) - ack++; - if (th->th_flags & TH_FIN) - ack++; + if (!(th->th_flags & TH_RST)) pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, pd->src, th->th_dport, - th->th_sport, ntohl(th->th_ack), ack, - TH_RST|TH_ACK, 0, 0, - (*state)->rule.ptr->return_ttl); - } + th->th_sport, ntohl(th->th_ack), 0, + TH_RST, 0, 0, + (*state)->rule.ptr->return_ttl, 1, 0, + pd->eh, kif->pfik_ifp); src->seqlo = 0; src->seqhi = 1; src->max_win = 1; @@ -4109,9 +4742,11 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, kprintf("pf: BAD state: "); pf_print_state(*state); pf_print_flags(th->th_flags); - kprintf(" seq=%u ack=%u len=%u ackskew=%d pkts=%d:%d " - "dir=%s,%s\n", seq, ack, pd->p_len, ackskew, - (*state)->packets[0], (*state)->packets[1], + kprintf(" seq=%u (%u) ack=%u len=%u ackskew=%d " + "pkts=%llu:%llu dir=%s,%s\n", + seq, orig_seq, ack, pd->p_len, ackskew, + (unsigned long long)(*state)->packets[0], + (unsigned long long)(*state)->packets[1], direction == PF_IN ? "in" : "out", direction == (*state)->direction ? "fwd" : "rev"); kprintf("pf: State failure on: %c %c %c %c | %c %c\n", @@ -4123,15 +4758,10 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5', SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6'); } + REASON_SET(reason, PFRES_BADSTATE); return (PF_DROP); } - if (dst->scrub || src->scrub) { - if (pf_normalize_tcp_stateful(m, off, pd, reason, th, - src, dst, ©back)) - return (PF_DROP); - } - /* Any packets which have gotten here are to be passed */ /* translate source/destination address, if necessary */ @@ -4165,7 +4795,7 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd) { struct pf_state_peer *src, *dst; - struct pf_state key; + struct pf_state_cmp key; struct udphdr *uh = pd->hdr.udp; key.af = pd->af; @@ -4230,13 +4860,13 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, int pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, - struct mbuf *m, int off, void *h, struct pf_pdesc *pd) + struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason) { struct pf_addr *saddr = pd->src, *daddr = pd->dst; - u_int16_t icmpid = 0; - u_int16_t *icmpsum = NULL; - u_int8_t icmptype = 0; + u_int16_t icmpid = 0, *icmpsum; + u_int8_t icmptype; int state_icmp = 0; + struct pf_state_cmp key; switch (pd->proto) { #ifdef INET @@ -4274,20 +4904,18 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, * ICMP query/reply message not related to a TCP/UDP packet. * Search for an ICMP state. */ - struct pf_state key; - key.af = pd->af; key.proto = pd->proto; if (direction == PF_IN) { PF_ACPY(&key.ext.addr, pd->src, key.af); PF_ACPY(&key.gwy.addr, pd->dst, key.af); - key.ext.port = icmpid; + key.ext.port = 0; key.gwy.port = icmpid; } else { PF_ACPY(&key.lan.addr, pd->src, key.af); PF_ACPY(&key.ext.addr, pd->dst, key.af); key.lan.port = icmpid; - key.ext.port = icmpid; + key.ext.port = 0; } STATE_LOOKUP(); @@ -4296,7 +4924,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, (*state)->timeout = PFTM_ICMP_ERROR_REPLY; /* translate source/destination address, if necessary */ - if (PF_ANEQ(&(*state)->lan.addr, &(*state)->gwy.addr, pd->af)) { + if (STATE_TRANSLATE(*state)) { if (direction == PF_OUT) { switch (pd->af) { #ifdef INET @@ -4304,6 +4932,14 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, pf_change_a(&saddr->v4.s_addr, pd->ip_sum, (*state)->gwy.addr.v4.s_addr, 0); + pd->hdr.icmp->icmp_cksum = + pf_cksum_fixup( + pd->hdr.icmp->icmp_cksum, icmpid, + (*state)->gwy.port, 0); + pd->hdr.icmp->icmp_id = + (*state)->gwy.port; + m_copyback(m, off, ICMP_MINLEN, + (caddr_t)pd->hdr.icmp); break; #endif /* INET */ #ifdef INET6 @@ -4324,6 +4960,14 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, pf_change_a(&daddr->v4.s_addr, pd->ip_sum, (*state)->lan.addr.v4.s_addr, 0); + pd->hdr.icmp->icmp_cksum = + pf_cksum_fixup( + pd->hdr.icmp->icmp_cksum, icmpid, + (*state)->lan.port, 0); + pd->hdr.icmp->icmp_id = + (*state)->lan.port; + m_copyback(m, off, ICMP_MINLEN, + (caddr_t)pd->hdr.icmp); break; #endif /* INET */ #ifdef INET6 @@ -4356,8 +5000,8 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, struct ip6_hdr h2_6; int terminal = 0; #endif /* INET6 */ - int ipoff2 = 0; - int off2 = 0; + int ipoff2; + int off2; pd2.af = pd->af; switch (pd->af) { @@ -4367,7 +5011,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, ipoff2 = off + ICMP_MINLEN; if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2), - NULL, NULL, pd2.af)) { + NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMP error message too short " "(ip)\n")); @@ -4377,13 +5021,10 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, * ICMP error messages don't refer to non-first * fragments */ - /* - * Note: We are dealing with an encapsulated - * header. This means ip_off/ip_len are not - * in host byte order! - */ - if (h2.ip_off & htons(IP_OFFMASK)) + if (h2.ip_off & htons(IP_OFFMASK)) { + REASON_SET(reason, PFRES_FRAG); return (PF_DROP); + } /* offset of protocol header that follows h2 */ off2 = ipoff2 + (h2.ip_hl << 2); @@ -4399,7 +5040,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, ipoff2 = off + sizeof(struct icmp6_hdr); if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6), - NULL, NULL, pd2.af)) { + NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMP error message too short " "(ip6)\n")); @@ -4417,6 +5058,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, * ICMPv6 error messages for * non-first fragments */ + REASON_SET(reason, PFRES_FRAG); return (PF_DROP); case IPPROTO_AH: case IPPROTO_HOPOPTS: @@ -4426,7 +5068,8 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, struct ip6_ext opt6; if (!pf_pull_hdr(m, off2, &opt6, - sizeof(opt6), NULL, NULL, pd2.af)) { + sizeof(opt6), NULL, reason, + pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMPv6 short opt\n")); return (PF_DROP); @@ -4457,7 +5100,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, case IPPROTO_TCP: { struct tcphdr th; u_int32_t seq; - struct pf_state key; + struct pf_state_cmp key; struct pf_state_peer *src, *dst; u_int8_t dws; int copyback = 0; @@ -4467,7 +5110,8 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, * expected. Don't access any TCP header fields after * th_seq, an ackskew test is not possible. */ - if (!pf_pull_hdr(m, off2, &th, 8, NULL, NULL, pd2.af)) { + if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason, + pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMP error message too short " "(tcp)\n")); @@ -4524,6 +5168,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, pf_print_state(*state); kprintf(" seq=%u\n", seq); } + REASON_SET(reason, PFRES_BADSTATE); return (PF_DROP); } @@ -4572,10 +5217,9 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, } case IPPROTO_UDP: { struct udphdr uh; - struct pf_state key; if (!pf_pull_hdr(m, off2, &uh, sizeof(uh), - NULL, NULL, pd2.af)) { + NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMP error message too short " "(udp)\n")); @@ -4639,10 +5283,9 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, #ifdef INET case IPPROTO_ICMP: { struct icmp iih; - struct pf_state key; if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN, - NULL, NULL, pd2.af)) { + NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMP error message too short i" "(icmp)\n")); @@ -4654,13 +5297,13 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, if (direction == PF_IN) { PF_ACPY(&key.ext.addr, pd2.dst, key.af); PF_ACPY(&key.gwy.addr, pd2.src, key.af); - key.ext.port = iih.icmp_id; + key.ext.port = 0; key.gwy.port = iih.icmp_id; } else { PF_ACPY(&key.lan.addr, pd2.dst, key.af); PF_ACPY(&key.ext.addr, pd2.src, key.af); key.lan.port = iih.icmp_id; - key.ext.port = iih.icmp_id; + key.ext.port = 0; } STATE_LOOKUP(); @@ -4691,10 +5334,9 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, #ifdef INET6 case IPPROTO_ICMPV6: { struct icmp6_hdr iih; - struct pf_state key; if (!pf_pull_hdr(m, off2, &iih, - sizeof(struct icmp6_hdr), NULL, NULL, pd2.af)) { + sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMP error message too short " "(icmp6)\n")); @@ -4706,13 +5348,13 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, if (direction == PF_IN) { PF_ACPY(&key.ext.addr, pd2.dst, key.af); PF_ACPY(&key.gwy.addr, pd2.src, key.af); - key.ext.port = iih.icmp6_id; + key.ext.port = 0; key.gwy.port = iih.icmp6_id; } else { PF_ACPY(&key.lan.addr, pd2.dst, key.af); PF_ACPY(&key.ext.addr, pd2.src, key.af); key.lan.port = iih.icmp6_id; - key.ext.port = iih.icmp6_id; + key.ext.port = 0; } STATE_LOOKUP(); @@ -4743,8 +5385,6 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, } #endif /* INET6 */ default: { - struct pf_state key; - key.af = pd2.af; key.proto = pd2.proto; if (direction == PF_IN) { @@ -4807,7 +5447,7 @@ pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, struct pf_pdesc *pd) { struct pf_state_peer *src, *dst; - struct pf_state key; + struct pf_state_cmp key; key.af = pd->af; key.proto = pd->proto; @@ -4935,21 +5575,112 @@ pf_pull_hdr(struct mbuf *m, int off, void *p, int len, } int -pf_routable(struct pf_addr *addr, sa_family_t af) +pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif) +{ + struct sockaddr_in *dst; + int ret = 1; + int check_mpath; +#ifdef INET6 + struct sockaddr_in6 *dst6; + struct route_in6 ro; +#else + struct route ro; +#endif + struct radix_node *rn; + struct rtentry *rt; + struct ifnet *ifp; + + check_mpath = 0; + bzero(&ro, sizeof(ro)); + switch (af) { + case AF_INET: + dst = satosin(&ro.ro_dst); + dst->sin_family = AF_INET; + dst->sin_len = sizeof(*dst); + dst->sin_addr = addr->v4; + break; +#ifdef INET6 + case AF_INET6: + dst6 = (struct sockaddr_in6 *)&ro.ro_dst; + dst6->sin6_family = AF_INET6; + dst6->sin6_len = sizeof(*dst6); + dst6->sin6_addr = addr->v6; + break; +#endif /* INET6 */ + default: + return (0); + } + + /* Skip checks for ipsec interfaces */ + if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) + goto out; + + rtalloc_ign((struct route *)&ro, 0); + + if (ro.ro_rt != NULL) { + /* No interface given, this is a no-route check */ + if (kif == NULL) + goto out; + + if (kif->pfik_ifp == NULL) { + ret = 0; + goto out; + } + + /* Perform uRPF check if passed input interface */ + ret = 0; + rn = (struct radix_node *)ro.ro_rt; + do { + rt = (struct rtentry *)rn; + ifp = rt->rt_ifp; + + if (kif->pfik_ifp == ifp) + ret = 1; + rn = NULL; + } while (check_mpath == 1 && rn != NULL && ret == 0); + } else + ret = 0; +out: + if (ro.ro_rt != NULL) + RTFREE(ro.ro_rt); + return (ret); +} + +int +pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw) { struct sockaddr_in *dst; +#ifdef INET6 + struct sockaddr_in6 *dst6; + struct route_in6 ro; +#else struct route ro; +#endif int ret = 0; bzero(&ro, sizeof(ro)); - dst = satosin(&ro.ro_dst); - dst->sin_family = af; - dst->sin_len = sizeof(*dst); - dst->sin_addr = addr->v4; - rtalloc_ign(&ro, (RTF_CLONING | RTF_PRCLONING)); + switch (af) { + case AF_INET: + dst = satosin(&ro.ro_dst); + dst->sin_family = AF_INET; + dst->sin_len = sizeof(*dst); + dst->sin_addr = addr->v4; + break; +#ifdef INET6 + case AF_INET6: + dst6 = (struct sockaddr_in6 *)&ro.ro_dst; + dst6->sin6_family = AF_INET6; + dst6->sin6_len = sizeof(*dst6); + dst6->sin6_addr = addr->v6; + break; +#endif /* INET6 */ + default: + return (0); + } + +rtalloc_ign((struct route *)&ro, (RTF_CLONING | RTF_PRCLONING)); if (ro.ro_rt != NULL) { - ret = 1; RTFREE(ro.ro_rt); } @@ -4959,7 +5690,7 @@ pf_routable(struct pf_addr *addr, sa_family_t af) #ifdef INET void pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, - struct pf_state *s) + struct pf_state *s, struct pf_pdesc *pd) { struct mbuf *m0, *m1; struct route iproute; @@ -4971,6 +5702,9 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, struct pf_src_node *sn = NULL; int error = 0; int sw_csum; +#ifdef IPSEC + struct m_tag *mtag; +#endif /* IPSEC */ if (m == NULL || *m == NULL || r == NULL || (dir != PF_IN && dir != PF_OUT) || oifp == NULL) @@ -4997,8 +5731,12 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, m0 = *m; } - if (m0->m_len < sizeof(struct ip)) - panic("pf_route: m0->m_len < sizeof(struct ip)"); + if (m0->m_len < sizeof(struct ip)) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_route: m0->m_len < sizeof(struct ip)\n")); + goto bad; + } + ip = mtod(m0, struct ip *); ro = &iproute; @@ -5021,8 +5759,11 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, if (ro->ro_rt->rt_flags & RTF_GATEWAY) dst = satosin(ro->ro_rt->rt_gateway); } else { - if (TAILQ_EMPTY(&r->rpool.list)) - panic("pf_route: TAILQ_EMPTY(&r->rpool.list)"); + if (TAILQ_EMPTY(&r->rpool.list)) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_route: TAILQ_EMPTY(&r->rpool.list)\n")); + goto bad; + } if (s == NULL) { pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src, &naddr, NULL, &sn); @@ -5041,32 +5782,46 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, goto bad; if (oifp != ifp) { - if (pf_test(PF_OUT, ifp, &m0) != PF_PASS) + crit_exit(); + if (pf_test(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) { + crit_enter(); goto bad; - else if (m0 == NULL) + } else if (m0 == NULL) { + crit_enter(); goto done; - if (m0->m_len < sizeof(struct ip)) - panic("pf_route: m0->m_len < sizeof(struct ip)"); + } + crit_enter(); + if (m0->m_len < sizeof(struct ip)) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_route: m0->m_len < sizeof(struct ip)\n")); + goto bad; + } ip = mtod(m0, struct ip *); } - /* Copied from ip_output. */ + /* Copied from FreeBSD 5.1-CURRENT ip_output. */ m0->m_pkthdr.csum_flags |= CSUM_IP; sw_csum = m0->m_pkthdr.csum_flags & ~ifp->if_hwassist; if (sw_csum & CSUM_DELAY_DATA) { + /* + * XXX: in_delayed_cksum assumes HBO for ip->ip_len (at least) + */ + NTOHS(ip->ip_len); + NTOHS(ip->ip_off); /* XXX: needed? */ in_delayed_cksum(m0); + HTONS(ip->ip_len); + HTONS(ip->ip_off); sw_csum &= ~CSUM_DELAY_DATA; } m0->m_pkthdr.csum_flags &= ifp->if_hwassist; - /* - * If small enough for interface, or the interface will take - * care of the fragmentation for us, can just send directly. - */ - if (ip->ip_len <= ifp->if_mtu || ((ifp->if_hwassist & CSUM_FRAGMENT) && - (ip->ip_off & IP_DF) == 0)) { - ip->ip_len = htons(ip->ip_len); - ip->ip_off = htons(ip->ip_off); + if (ntohs(ip->ip_len) <= ifp->if_mtu || + (ifp->if_hwassist & CSUM_FRAGMENT && + ((ip->ip_off & htons(IP_DF)) == 0))) { + /* + * ip->ip_len = htons(ip->ip_len); + * ip->ip_off = htons(ip->ip_off); + */ ip->ip_sum = 0; if (sw_csum & CSUM_DELAY_IP) { /* From KAME */ @@ -5077,8 +5832,9 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); } } - - error = ifp->if_output(ifp, m0, sintosa(dst), ro->ro_rt); + crit_exit(); + error = (*ifp->if_output)(ifp, m0, sintosa(dst), ro->ro_rt); + crit_enter(); goto done; } @@ -5086,29 +5842,42 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, * Too large for interface; fragment if possible. * Must be able to put at least 8 bytes per fragment. */ - if (ip->ip_off & IP_DF) { + if (ip->ip_off & htons(IP_DF)) { ipstat.ips_cantfrag++; if (r->rt != PF_DUPTO) { + /* icmp_error() expects host byte ordering */ + NTOHS(ip->ip_len); + NTOHS(ip->ip_off); + crit_exit(); icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, ifp->if_mtu); + crit_enter(); goto done; } else goto bad; } m1 = m0; + /* + * XXX: is cheaper + less error prone than own function + */ + NTOHS(ip->ip_len); + NTOHS(ip->ip_off); error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist, sw_csum); - if (error) + if (error) { goto bad; + } for (m0 = m1; m0; m0 = m1) { m1 = m0->m_nextpkt; m0->m_nextpkt = 0; if (error == 0) { - error = ifp->if_output(ifp, m0, sintosa(dst), NULL); - } else { + crit_exit(); + error = (*ifp->if_output)(ifp, m0, sintosa(dst), + NULL); + crit_enter(); + } else m_freem(m0); - } } if (error == 0) @@ -5130,7 +5899,7 @@ bad: #ifdef INET6 void pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, - struct pf_state *s) + struct pf_state *s, struct pf_pdesc *pd) { struct mbuf *m0; struct route_in6 ip6route; @@ -5167,8 +5936,11 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, m0 = *m; } - if (m0->m_len < sizeof(struct ip6_hdr)) - panic("pf_route6: m0->m_len < sizeof(struct ip6_hdr)"); + if (m0->m_len < sizeof(struct ip6_hdr)) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n")); + goto bad; + } ip6 = mtod(m0, struct ip6_hdr *); ro = &ip6route; @@ -5178,15 +5950,20 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, dst->sin6_len = sizeof(*dst); dst->sin6_addr = ip6->ip6_dst; - /* Cheat. */ + /* Cheat. XXX why only in the v6 case??? */ if (r->rt == PF_FASTROUTE) { - m0->m_pkthdr.fw_flags |= PF_MBUF_GENERATED; + pd->pf_mtag->flags |= PF_TAG_GENERATED; + crit_exit(); ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL); + crit_enter(); return; } - if (TAILQ_EMPTY(&r->rpool.list)) - panic("pf_route6: TAILQ_EMPTY(&r->rpool.list)"); + if (TAILQ_EMPTY(&r->rpool.list)) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_route6: TAILQ_EMPTY(&r->rpool.list)\n")); + goto bad; + } if (s == NULL) { pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src, &naddr, NULL, &sn); @@ -5204,12 +5981,20 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, goto bad; if (oifp != ifp) { - if (pf_test6(PF_OUT, ifp, &m0) != PF_PASS) + crit_exit(); + if (pf_test6(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) { + crit_enter(); goto bad; - else if (m0 == NULL) + } else if (m0 == NULL) { + crit_enter(); goto done; - if (m0->m_len < sizeof(struct ip6_hdr)) - panic("pf_route6: m0->m_len < sizeof(struct ip6_hdr)"); + } + crit_enter(); + if (m0->m_len < sizeof(struct ip6_hdr)) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n")); + goto bad; + } ip6 = mtod(m0, struct ip6_hdr *); } @@ -5220,12 +6005,16 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, if (IN6_IS_ADDR_LINKLOCAL(&dst->sin6_addr)) dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index); if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { + crit_exit(); error = nd6_output(ifp, ifp, m0, dst, NULL); + crit_enter(); } else { in6_ifstat_inc(ifp, ifs6_in_toobig); - if (r->rt != PF_DUPTO) + if (r->rt != PF_DUPTO) { + crit_exit(); icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); - else + crit_enter(); + } else goto bad; } @@ -5238,6 +6027,7 @@ bad: m_freem(m0); goto done; } + #endif /* INET6 */ @@ -5364,7 +6154,8 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, #ifdef INET int -pf_test(int dir, struct ifnet *ifp, struct mbuf **m0) +pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, + struct ether_header *eh, struct inpcb *inp) { struct pfi_kif *kif; u_short action, reason = 0, log = 0; @@ -5375,29 +6166,42 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0) struct pf_ruleset *ruleset = NULL; struct pf_pdesc pd; int off, dirndx, pqid = 0; - - if (!pf_status.running || (m->m_pkthdr.fw_flags & PF_MBUF_GENERATED)) + + if (!pf_status.running) return (PF_PASS); - kif = pfi_index2kif[ifp->if_index]; - if (kif == NULL) + memset(&pd, 0, sizeof(pd)); + if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_test: pf_get_mtag returned NULL\n")); + return (PF_DROP); + } + if (pd.pf_mtag->flags & PF_TAG_GENERATED) + return (PF_PASS); + kif = (struct pfi_kif *)ifp->if_pf_kif; + if (kif == NULL) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname)); return (PF_DROP); + } + if (kif->pfik_flags & PFI_IFLAG_SKIP) + return (PF_PASS); #ifdef DIAGNOSTIC if ((m->m_flags & M_PKTHDR) == 0) panic("non-M_PKTHDR is passed to pf_test"); -#endif +#endif /* DIAGNOSTIC */ - memset(&pd, 0, sizeof(pd)); if (m->m_pkthdr.len < (int)sizeof(*h)) { action = PF_DROP; REASON_SET(&reason, PFRES_SHORT); log = 1; + panic("debug"); goto done; } /* We do IP header normalization and packet reassembly here */ - if (pf_normalize_ip(m0, dir, kif, &reason) != PF_PASS) { + if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) { action = PF_DROP; goto done; } @@ -5409,6 +6213,7 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0) action = PF_DROP; REASON_SET(&reason, PFRES_SHORT); log = 1; + panic("debug"); goto done; } @@ -5420,6 +6225,7 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0) pd.af = AF_INET; pd.tos = h->ip_tos; pd.tot_len = h->ip_len; + pd.eh = eh; /* handle fragments that didn't get reassembled by normalization */ if (h->ip_off & (IP_MF | IP_OFFMASK)) { @@ -5427,7 +6233,6 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0) &pd, &a, &ruleset); goto done; } - switch (h->ip_p) { case IPPROTO_TCP: { @@ -5441,6 +6246,7 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0) } if (dir == PF_IN && pf_check_proto_cksum(m, off, h->ip_len - off, IPPROTO_TCP, AF_INET)) { + REASON_SET(&reason, PFRES_PROTCKSUM); action = PF_DROP; goto done; } @@ -5455,13 +6261,14 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0) if (action == PF_PASS) { #if NPFSYNC pfsync_update_state(s); -#endif +#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; - } else if (s == NULL) + } else if (s == NULL) { action = pf_test_tcp(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset); + m, off, h, &pd, &a, &ruleset, NULL, inp); + } break; } @@ -5477,25 +6284,27 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0) if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m, off, h->ip_len - off, IPPROTO_UDP, AF_INET)) { action = PF_DROP; + REASON_SET(&reason, PFRES_PROTCKSUM); goto done; } if (uh.uh_dport == 0 || ntohs(uh.uh_ulen) > m->m_pkthdr.len - off || ntohs(uh.uh_ulen) < sizeof(struct udphdr)) { action = PF_DROP; + REASON_SET(&reason, PFRES_SHORT); goto done; } action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); if (action == PF_PASS) { #if NPFSYNC pfsync_update_state(s); -#endif +#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_udp(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset); + m, off, h, &pd, &a, &ruleset, NULL, inp); break; } @@ -5511,19 +6320,21 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0) if (dir == PF_IN && pf_check_proto_cksum(m, off, h->ip_len - off, IPPROTO_ICMP, AF_INET)) { action = PF_DROP; + REASON_SET(&reason, PFRES_PROTCKSUM); goto done; } - action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd); + action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd, + &reason); if (action == PF_PASS) { #if NPFSYNC pfsync_update_state(s); -#endif +#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_icmp(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset); + m, off, h, &pd, &a, &ruleset, NULL); break; } @@ -5532,13 +6343,13 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0) if (action == PF_PASS) { #if NPFSYNC pfsync_update_state(s); -#endif +#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_other(&r, &s, dir, kif, m, off, h, - &pd, &a, &ruleset); + &pd, &a, &ruleset, NULL); break; } @@ -5546,28 +6357,32 @@ done: if (action == PF_PASS && h->ip_hl > 5 && !((s && s->allow_opts) || r->allow_opts)) { action = PF_DROP; - REASON_SET(&reason, PFRES_SHORT); + REASON_SET(&reason, PFRES_IPOPTIONS); log = 1; DPFPRINTF(PF_DEBUG_MISC, ("pf: dropping packet with ip options\n")); } + if ((s && s->tag) || r->rtableid) + pf_tag_packet(m, pd.pf_mtag, s ? s->tag : 0, r->rtableid); + #ifdef ALTQ if (action == PF_PASS && r->qid) { - m->m_pkthdr.fw_flags |= ALTQ_MBUF_TAGGED; - if (pd.tos == IPTOS_LOWDELAY) - m->m_pkthdr.altq_qid = r->pqid; + if (pqid || (pd.tos & IPTOS_LOWDELAY)) + pd.pf_mtag->qid = r->pqid; else - m->m_pkthdr.altq_qid = r->qid; + pd.pf_mtag->qid = r->qid; + /* add hints for ecn */ + pd.pf_mtag->af = AF_INET; + pd.pf_mtag->hdr = h; + /* add connection hash for fairq */ if (s) { KKASSERT(s->hash != 0); - m->m_pkthdr.fw_flags |= ALTQ_MBUF_STATE_HASHED; - m->m_pkthdr.altq_state_hash = s->hash; + pd.pf_mtag->flags |= PF_TAG_STATE_HASHED; + pd.pf_mtag->state_hash = s->hash; } - m->m_pkthdr.ecn_af = AF_INET; - m->m_pkthdr.header = h; } -#endif +#endif /* ALTQ */ /* * connections redirected to loopback should not match sockets @@ -5578,42 +6393,48 @@ done: pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && (s->nat_rule.ptr->action == PF_RDR || s->nat_rule.ptr->action == PF_BINAT) && - (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { - action = PF_DROP; - REASON_SET(&reason, PFRES_MEMORY); - } + (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) + pd.pf_mtag->flags |= PF_TAG_TRANSLATE_LOCALHOST; - m->m_pkthdr.fw_flags |= PF_MBUF_TRANSLATE_LOCALHOST; + if (log) { + struct pf_rule *lr; - if (log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, a, ruleset); + if (s != NULL && s->nat_rule.ptr != NULL && + s->nat_rule.ptr->log & PF_LOG_ALL) + lr = s->nat_rule.ptr; + else + lr = r; + PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, lr, a, ruleset, + &pd); + } kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len; kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++; if (action == PF_PASS || r->action == PF_DROP) { - r->packets++; - r->bytes += pd.tot_len; + dirndx = (dir == PF_OUT); + r->packets[dirndx]++; + r->bytes[dirndx] += pd.tot_len; if (a != NULL) { - a->packets++; - a->bytes += pd.tot_len; + a->packets[dirndx]++; + a->bytes[dirndx] += pd.tot_len; } if (s != NULL) { - dirndx = (dir == s->direction) ? 0 : 1; - s->packets[dirndx]++; - s->bytes[dirndx] += pd.tot_len; if (s->nat_rule.ptr != NULL) { - s->nat_rule.ptr->packets++; - s->nat_rule.ptr->bytes += pd.tot_len; + s->nat_rule.ptr->packets[dirndx]++; + s->nat_rule.ptr->bytes[dirndx] += pd.tot_len; } if (s->src_node != NULL) { - s->src_node->packets++; - s->src_node->bytes += pd.tot_len; + s->src_node->packets[dirndx]++; + s->src_node->bytes[dirndx] += pd.tot_len; } if (s->nat_src_node != NULL) { - s->nat_src_node->packets++; - s->nat_src_node->bytes += pd.tot_len; + s->nat_src_node->packets[dirndx]++; + s->nat_src_node->bytes[dirndx] += pd.tot_len; } + dirndx = (dir == s->direction) ? 0 : 1; + s->packets[dirndx]++; + s->bytes[dirndx] += pd.tot_len; } tr = r; nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; @@ -5643,12 +6464,12 @@ done: pfr_update_stats(tr->src.addr.p.tbl, (s == NULL || s->direction == dir) ? pd.src : pd.dst, pd.af, pd.tot_len, dir == PF_OUT, r->action == PF_PASS, - tr->src.not); + tr->src.neg); if (tr->dst.addr.type == PF_ADDR_TABLE) pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL || s->direction == dir) ? pd.dst : pd.src, pd.af, pd.tot_len, dir == PF_OUT, r->action == PF_PASS, - tr->dst.not); + tr->dst.neg); } @@ -5658,19 +6479,19 @@ done: action = PF_PASS; } else if (r->rt) /* pf_route can free the mbuf causing *m0 to become NULL */ - pf_route(m0, r, dir, ifp, s); - + pf_route(m0, r, dir, ifp, s, &pd); return (action); } #endif /* INET */ #ifdef INET6 int -pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0) +pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, + struct ether_header *eh, struct inpcb *inp) { struct pfi_kif *kif; u_short action, reason = 0, log = 0; - struct mbuf *m = *m0; + struct mbuf *m = *m0, *n = NULL; struct ip6_hdr *h = NULL; struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr; struct pf_state *s = NULL; @@ -5678,19 +6499,32 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0) struct pf_pdesc pd; int off, terminal = 0, dirndx; - if (!pf_status.running || (m->m_pkthdr.fw_flags & PF_MBUF_GENERATED)) + if (!pf_status.running) + return (PF_PASS); + + memset(&pd, 0, sizeof(pd)); + if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_test6: pf_get_mtag returned NULL\n")); + return (PF_DROP); + } + if (pd.pf_mtag->flags & PF_TAG_GENERATED) return (PF_PASS); - kif = pfi_index2kif[ifp->if_index]; - if (kif == NULL) + kif = (struct pfi_kif *)ifp->if_pf_kif; + if (kif == NULL) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname)); return (PF_DROP); + } + if (kif->pfik_flags & PFI_IFLAG_SKIP) + return (PF_PASS); #ifdef DIAGNOSTIC if ((m->m_flags & M_PKTHDR) == 0) - panic("non-M_PKTHDR is passed to pf_test"); -#endif + panic("non-M_PKTHDR is passed to pf_test6"); +#endif /* DIAGNOSTIC */ - memset(&pd, 0, sizeof(pd)); if (m->m_pkthdr.len < (int)sizeof(*h)) { action = PF_DROP; REASON_SET(&reason, PFRES_SHORT); @@ -5699,13 +6533,25 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0) } /* We do IP header normalization and packet reassembly here */ - if (pf_normalize_ip6(m0, dir, kif, &reason) != PF_PASS) { + if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) { action = PF_DROP; goto done; } m = *m0; h = mtod(m, struct ip6_hdr *); +#if 1 + /* + * we do not support jumbogram yet. if we keep going, zero ip6_plen + * will do something bad, so drop the packet for now. + */ + if (htons(h->ip6_plen) == 0) { + action = PF_DROP; + REASON_SET(&reason, PFRES_NORM); /*XXX*/ + goto done; + } +#endif + pd.src = (struct pf_addr *)&h->ip6_src; pd.dst = (struct pf_addr *)&h->ip6_dst; PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET6); @@ -5713,6 +6559,7 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0) pd.af = AF_INET6; pd.tos = 0; pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); + pd.eh = eh; off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr); pd.proto = h->ip6_nxt; @@ -5724,19 +6571,38 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0) if (action == PF_DROP) REASON_SET(&reason, PFRES_FRAG); goto done; + case IPPROTO_ROUTING: { + struct ip6_rthdr rthdr; + + if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL, + &reason, pd.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: IPv6 short rthdr\n")); + action = PF_DROP; + log = 1; + goto done; + } + if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: IPv6 rthdr0\n")); + action = PF_DROP; + REASON_SET(&reason, PFRES_IPOPTIONS); + log = 1; + goto done; + } + /* FALLTHROUGH */ + } case IPPROTO_AH: case IPPROTO_HOPOPTS: - case IPPROTO_ROUTING: case IPPROTO_DSTOPTS: { /* get next header and header length */ struct ip6_ext opt6; if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6), - NULL, NULL, pd.af)) { + NULL, &reason, pd.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: IPv6 short opt\n")); action = PF_DROP; - REASON_SET(&reason, PFRES_SHORT); log = 1; goto done; } @@ -5754,6 +6620,10 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0) } } while (!terminal); + /* if there's no routing header, use unmodified mbuf for checksumming */ + if (!n) + n = m; + switch (pd.proto) { case IPPROTO_TCP: { @@ -5765,9 +6635,11 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0) log = action != PF_PASS; goto done; } - if (dir == PF_IN && pf_check_proto_cksum(m, off, - ntohs(h->ip6_plen), IPPROTO_TCP, AF_INET6)) { + if (dir == PF_IN && pf_check_proto_cksum(n, off, + ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)), + IPPROTO_TCP, AF_INET6)) { action = PF_DROP; + REASON_SET(&reason, PFRES_PROTCKSUM); goto done; } pd.p_len = pd.tot_len - off - (th.th_off << 2); @@ -5779,13 +6651,13 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0) if (action == PF_PASS) { #if NPFSYNC pfsync_update_state(s); -#endif +#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_tcp(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset); + m, off, h, &pd, &a, &ruleset, NULL, inp); break; } @@ -5798,28 +6670,31 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0) log = action != PF_PASS; goto done; } - if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m, - off, ntohs(h->ip6_plen), IPPROTO_UDP, AF_INET6)) { + if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(n, + off, ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)), + IPPROTO_UDP, AF_INET6)) { action = PF_DROP; + REASON_SET(&reason, PFRES_PROTCKSUM); goto done; } if (uh.uh_dport == 0 || ntohs(uh.uh_ulen) > m->m_pkthdr.len - off || ntohs(uh.uh_ulen) < sizeof(struct udphdr)) { action = PF_DROP; + REASON_SET(&reason, PFRES_SHORT); goto done; } action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); if (action == PF_PASS) { #if NPFSYNC pfsync_update_state(s); -#endif +#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_udp(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset); + m, off, h, &pd, &a, &ruleset, NULL, inp); break; } @@ -5832,98 +6707,118 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0) log = action != PF_PASS; goto done; } - if (dir == PF_IN && pf_check_proto_cksum(m, off, - ntohs(h->ip6_plen), IPPROTO_ICMPV6, AF_INET6)) { + if (dir == PF_IN && pf_check_proto_cksum(n, off, + ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)), + IPPROTO_ICMPV6, AF_INET6)) { action = PF_DROP; + REASON_SET(&reason, PFRES_PROTCKSUM); goto done; } action = pf_test_state_icmp(&s, dir, kif, - m, off, h, &pd); + m, off, h, &pd, &reason); if (action == PF_PASS) { #if NPFSYNC pfsync_update_state(s); -#endif +#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_icmp(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset); + m, off, h, &pd, &a, &ruleset, NULL); break; } default: action = pf_test_state_other(&s, dir, kif, &pd); if (action == PF_PASS) { +#if NPFSYNC + pfsync_update_state(s); +#endif /* NPFSYNC */ r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_other(&r, &s, dir, kif, m, off, h, - &pd, &a, &ruleset); + &pd, &a, &ruleset, NULL); break; } done: - /* XXX handle IPv6 options, if not allowed. not implemented. */ + if (n != m) { + m_freem(n); + n = NULL; + } + + /* XXX handle IPv6 options, if not allowed. not implemented. */ + + if ((s && s->tag) || r->rtableid) + pf_tag_packet(m, pd.pf_mtag, s ? s->tag : 0, r->rtableid); #ifdef ALTQ if (action == PF_PASS && r->qid) { - m->m_pkthdr.fw_flags |= ALTQ_MBUF_TAGGED; - if (pd.tos == IPTOS_LOWDELAY) - m->m_pkthdr.altq_qid = r->pqid; + if (pd.tos & IPTOS_LOWDELAY) + pd.pf_mtag->qid = r->pqid; else - m->m_pkthdr.altq_qid = r->qid; + pd.pf_mtag->qid = r->qid; + /* add hints for ecn */ + pd.pf_mtag->af = AF_INET6; + pd.pf_mtag->hdr = h; + /* add connection hash for fairq */ if (s) { KKASSERT(s->hash != 0); - m->m_pkthdr.fw_flags |= ALTQ_MBUF_STATE_HASHED; - m->m_pkthdr.altq_state_hash = s->hash; + pd.pf_mtag->flags |= PF_TAG_STATE_HASHED; + pd.pf_mtag->state_hash = s->hash; } - m->m_pkthdr.ecn_af = AF_INET6; - m->m_pkthdr.header = h; } -#endif +#endif /* ALTQ */ if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP || pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && (s->nat_rule.ptr->action == PF_RDR || s->nat_rule.ptr->action == PF_BINAT) && - IN6_IS_ADDR_LOOPBACK(&pd.dst->v6)) { - action = PF_DROP; - REASON_SET(&reason, PFRES_MEMORY); - } + IN6_IS_ADDR_LOOPBACK(&pd.dst->v6)) + pd.pf_mtag->flags |= PF_TAG_TRANSLATE_LOCALHOST; - m->m_pkthdr.fw_flags |= PF_MBUF_TRANSLATE_LOCALHOST; + if (log) { + struct pf_rule *lr; - if (log) - PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, r, a, ruleset); + if (s != NULL && s->nat_rule.ptr != NULL && + s->nat_rule.ptr->log & PF_LOG_ALL) + lr = s->nat_rule.ptr; + else + lr = r; + PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, lr, a, ruleset, + &pd); + } kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len; kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++; if (action == PF_PASS || r->action == PF_DROP) { - r->packets++; - r->bytes += pd.tot_len; + dirndx = (dir == PF_OUT); + r->packets[dirndx]++; + r->bytes[dirndx] += pd.tot_len; if (a != NULL) { - a->packets++; - a->bytes += pd.tot_len; + a->packets[dirndx]++; + a->bytes[dirndx] += pd.tot_len; } if (s != NULL) { - dirndx = (dir == s->direction) ? 0 : 1; - s->packets[dirndx]++; - s->bytes[dirndx] += pd.tot_len; if (s->nat_rule.ptr != NULL) { - s->nat_rule.ptr->packets++; - s->nat_rule.ptr->bytes += pd.tot_len; + s->nat_rule.ptr->packets[dirndx]++; + s->nat_rule.ptr->bytes[dirndx] += pd.tot_len; } if (s->src_node != NULL) { - s->src_node->packets++; - s->src_node->bytes += pd.tot_len; + s->src_node->packets[dirndx]++; + s->src_node->bytes[dirndx] += pd.tot_len; } if (s->nat_src_node != NULL) { - s->nat_src_node->packets++; - s->nat_src_node->bytes += pd.tot_len; + s->nat_src_node->packets[dirndx]++; + s->nat_src_node->bytes[dirndx] += pd.tot_len; } + dirndx = (dir == s->direction) ? 0 : 1; + s->packets[dirndx]++; + s->bytes[dirndx] += pd.tot_len; } tr = r; nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; @@ -5953,12 +6848,12 @@ done: pfr_update_stats(tr->src.addr.p.tbl, (s == NULL || s->direction == dir) ? pd.src : pd.dst, pd.af, pd.tot_len, dir == PF_OUT, r->action == PF_PASS, - tr->src.not); + tr->src.neg); if (tr->dst.addr.type == PF_ADDR_TABLE) pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL || s->direction == dir) ? pd.dst : pd.src, pd.af, pd.tot_len, dir == PF_OUT, r->action == PF_PASS, - tr->dst.not); + tr->dst.neg); } @@ -5968,8 +6863,14 @@ done: action = PF_PASS; } else if (r->rt) /* pf_route6 can free the mbuf causing *m0 to become NULL */ - pf_route6(m0, r, dir, ifp, s); + pf_route6(m0, r, dir, ifp, s, &pd); return (action); } #endif /* INET6 */ + +int +pf_check_congestion(struct ifqueue *ifq) +{ + return (0); +} diff --git a/sys/net/pf/pf_if.c b/sys/net/pf/pf_if.c index b68ac99c7e..a6b86724ff 100644 --- a/sys/net/pf/pf_if.c +++ b/sys/net/pf/pf_if.c @@ -1,11 +1,8 @@ -/* $FreeBSD: src/sys/contrib/pf/net/pf_if.c,v 1.6 2004/09/14 15:20:24 mlaier Exp $ */ -/* $OpenBSD: pf_if.c,v 1.11 2004/03/15 11:38:23 cedric Exp $ */ -/* add $OpenBSD: pf_if.c,v 1.19 2004/08/11 12:06:44 henning Exp $ */ -/* $DragonFly: src/sys/net/pf/pf_if.c,v 1.10 2008/03/07 11:34:20 sephe Exp $ */ +/* $OpenBSD: pf_if.c,v 1.46 2006/12/13 09:01:59 itojun Exp $ */ /* - * Copyright (c) 2004 The DragonFly Project. All rights reserved. - * + * Copyright 2005 Henning Brauer + * Copyright 2005 Ryan McBride * Copyright (c) 2001 Daniel Hartmeier * Copyright (c) 2003 Cedric Berger * All rights reserved. @@ -67,59 +64,38 @@ #include #endif /* INET6 */ -#define ACCEPT_FLAGS(oklist) \ - do { \ - if ((flags & ~(oklist)) & \ - PFI_FLAG_ALLMASK) \ - return (EINVAL); \ - } while (0) - -#define senderr(e) do { rv = (e); goto _bad; } while (0) - -struct pfi_kif **pfi_index2kif; -struct pfi_kif *pfi_self, *pfi_dummy; -int pfi_indexlim; -struct pfi_ifhead pfi_ifs; +struct pfi_kif *pfi_all = NULL; struct pfi_statehead pfi_statehead; -int pfi_ifcnt; vm_zone_t pfi_addr_pl; +struct pfi_ifhead pfi_ifs; long pfi_update = 1; struct pfr_addr *pfi_buffer; int pfi_buffer_cnt; int pfi_buffer_max; -char pfi_reserved_anchor[PF_ANCHOR_NAME_SIZE] = - PF_RESERVED_ANCHOR; -char pfi_interface_ruleset[PF_RULESET_NAME_SIZE] = - PF_INTERFACE_RULESET; eventhandler_tag pfi_clone_cookie = NULL; eventhandler_tag pfi_attach_cookie = NULL; eventhandler_tag pfi_detach_cookie = NULL; -void pfi_dynaddr_update(void *); -void pfi_kifaddr_update(void *); +void pfi_kif_update(struct pfi_kif *); +void pfi_dynaddr_update(struct pfi_dynaddr *dyn); void pfi_table_update(struct pfr_ktable *, struct pfi_kif *, int, int); +void pfi_kifaddr_update(void *); void pfi_instance_add(struct ifnet *, int, int); void pfi_address_add(struct sockaddr *, int, int); int pfi_if_compare(struct pfi_kif *, struct pfi_kif *); -struct pfi_kif *pfi_if_create(const char *, struct pfi_kif *, int); -void pfi_copy_group(char *, const char *, int); -void pfi_dynamic_drivers(void); -void pfi_newgroup(const char *, int); -int pfi_skip_if(const char *, struct pfi_kif *, int); +int pfi_skip_if(const char *, struct pfi_kif *); int pfi_unmask(void *); -void pfi_dohooks(struct pfi_kif *); void pfi_kifaddr_update_event(void *, struct ifnet *, enum ifaddr_event, struct ifaddr *); -void pfi_attach_clone_event(void *, struct if_clone *); +/*XXX jl void pfi_attach_clone_event(void *, struct if_clone *);*/ void pfi_attach_ifnet_event(void *, struct ifnet *); void pfi_detach_ifnet_event(void *, struct ifnet *); RB_PROTOTYPE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); RB_GENERATE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); -#define PFI_DYNAMIC_BUSES { "pcmcia", "cardbus", "uhub" } #define PFI_BUFFER_MAX 0x10000 MALLOC_DEFINE(PFI_MTYPE, "pf_if", "pf interface table"); @@ -128,28 +104,30 @@ pfi_initialize(void) { struct ifnet *ifp; - if (pfi_self != NULL) /* already initialized */ + if (pfi_all != NULL) /* already initialized */ return; TAILQ_INIT(&pfi_statehead); pfi_buffer_max = 64; pfi_buffer = kmalloc(pfi_buffer_max * sizeof(*pfi_buffer), PFI_MTYPE, M_WAITOK); - pfi_self = pfi_if_create("self", NULL, PFI_IFLAG_GROUP); - pfi_dynamic_drivers(); + + if ((pfi_all = pfi_kif_get(IFG_ALL)) == NULL) + panic("pfi_kif_get for pfi_all failed"); TAILQ_FOREACH(ifp, &ifnet, if_link) { if (ifp->if_dunit != IF_DUNIT_NONE) pfi_attach_ifnet(ifp); } - pfi_dummy = pfi_if_create("notyet", pfi_self, - PFI_IFLAG_GROUP | PFI_IFLAG_DYNAMIC); pfi_attach_cookie = EVENTHANDLER_REGISTER(ifnet_attach_event, pfi_attach_ifnet_event, NULL, EVENTHANDLER_PRI_ANY); pfi_detach_cookie = EVENTHANDLER_REGISTER(ifnet_detach_event, pfi_detach_ifnet_event, NULL, EVENTHANDLER_PRI_ANY); - pfi_clone_cookie = EVENTHANDLER_REGISTER(if_clone_event, - pfi_attach_clone_event, NULL, EVENTHANDLER_PRI_ANY); +/* XXX jl pfi_clone_cookie = EVENTHANDLER_REGISTER(if_clone_event, + pfi_attach_clone_event, NULL, EVENTHANDLER_PRI_ANY); */ + + if ((pfi_all = pfi_kif_get(IFG_ALL)) == NULL) + panic("pfi_kif_get for pfi_all failed"); } void @@ -174,14 +152,11 @@ pfi_cleanup(void) while ((p = RB_MIN(pfi_ifhead, &pfi_ifs))) { RB_REMOVE(pfi_ifhead, &pfi_ifs, p); - kfree(p->pfik_ah_head, PFI_MTYPE); kfree(p, PFI_MTYPE); } - kfree(pfi_index2kif, PFI_MTYPE); kfree(pfi_buffer, PFI_MTYPE); - pfi_index2kif = NULL; pfi_buffer = NULL; - pfi_self = NULL; + pfi_all = NULL; } /* @@ -201,11 +176,13 @@ pfi_kifaddr_update_event(void *arg, struct ifnet *ifp, pfi_kifaddr_update(p); } +/* XXX jl void pfi_attach_clone_event(void *arg __unused, struct if_clone *ifc) { pfi_attach_clone(ifc); } +*/ void pfi_attach_ifnet_event(void *arg __unused, struct ifnet *ifp) @@ -220,198 +197,219 @@ pfi_detach_ifnet_event(void *arg __unused, struct ifnet *ifp) pfi_detach_ifnet(ifp); } +struct pfi_kif * +pfi_kif_get(const char *kif_name) +{ + struct pfi_kif *kif; + struct pfi_kif_cmp s; + + bzero(&s, sizeof(s)); + strlcpy(s.pfik_ifname, kif_name, sizeof(s.pfik_ifname)); + if ((kif = RB_FIND(pfi_ifhead, &pfi_ifs, (struct pfi_kif *)&s)) != NULL) + return (kif); + + /* create new one */ + if ((kif = kmalloc(sizeof(*kif), PFI_MTYPE, M_WAITOK)) == NULL) + return (NULL); + + bzero(kif, sizeof(*kif)); + strlcpy(kif->pfik_name, kif_name, sizeof(kif->pfik_name)); + kif->pfik_tzero = time_second; + TAILQ_INIT(&kif->pfik_dynaddrs); + + RB_INSERT(pfi_ifhead, &pfi_ifs, kif); + return (kif); +} + +void +pfi_kif_ref(struct pfi_kif *kif, enum pfi_kif_refs what) +{ + switch (what) { + case PFI_KIF_REF_RULE: + kif->pfik_rules++; + break; + case PFI_KIF_REF_STATE: + if (!kif->pfik_states++) + TAILQ_INSERT_TAIL(&pfi_statehead, kif, pfik_w_states); + break; + default: + panic("pfi_kif_ref with unknown type"); + } +} + void -pfi_attach_clone(struct if_clone *ifc) +pfi_kif_unref(struct pfi_kif *kif, enum pfi_kif_refs what) { - pfi_initialize(); - pfi_newgroup(ifc->ifc_name, PFI_IFLAG_CLONABLE); + if (kif == NULL) + return; + + switch (what) { + case PFI_KIF_REF_NONE: + break; + case PFI_KIF_REF_RULE: + if (kif->pfik_rules <= 0) { + kprintf("pfi_kif_unref: rules refcount <= 0\n"); + return; + } + kif->pfik_rules--; + break; + case PFI_KIF_REF_STATE: + if (kif->pfik_states <= 0) { + kprintf("pfi_kif_unref: state refcount <= 0\n"); + return; + } + if (!--kif->pfik_states) + TAILQ_REMOVE(&pfi_statehead, kif, pfik_w_states); + break; + default: + panic("pfi_kif_unref with unknown type"); + } + + if (kif->pfik_ifp != NULL || kif->pfik_group != NULL || kif == pfi_all) + return; + + if (kif->pfik_rules || kif->pfik_states) + return; + + RB_REMOVE(pfi_ifhead, &pfi_ifs, kif); + kfree(kif, PFI_MTYPE); +} + +int +pfi_kif_match(struct pfi_kif *rule_kif, struct pfi_kif *packet_kif) +{ + struct ifg_list *p; + + if (rule_kif == NULL || rule_kif == packet_kif) + return (1); + + if (rule_kif->pfik_group != NULL) + TAILQ_FOREACH(p, &packet_kif->pfik_ifp->if_groups, ifgl_next) + if (p->ifgl_group == rule_kif->pfik_group) + return (1); + + return (0); } void pfi_attach_ifnet(struct ifnet *ifp) { - struct pfi_kif *p, *q, key; - int realname; + struct pfi_kif *kif; pfi_initialize(); crit_enter(); pfi_update++; - if (ifp->if_index >= pfi_indexlim) { - /* - * grow pfi_index2kif, similar to ifindex2ifnet code in if.c - */ - size_t m, n, oldlim; - struct pfi_kif **mp, **np; - - oldlim = pfi_indexlim; - if (pfi_indexlim == 0) - pfi_indexlim = 64; - while (ifp->if_index >= pfi_indexlim) - pfi_indexlim <<= 1; - - m = oldlim * sizeof(struct pfi_kif *); - mp = pfi_index2kif; - n = pfi_indexlim * sizeof(struct pfi_kif *); - np = kmalloc(n, PFI_MTYPE, M_NOWAIT | M_ZERO); - if (np == NULL) - panic("pfi_attach_ifnet: " - "cannot allocate translation table"); - if (mp != NULL) - bcopy(mp, np, m); - pfi_index2kif = np; - if (mp != NULL) - kfree(mp, PFI_MTYPE); - } + if ((kif = pfi_kif_get(ifp->if_xname)) == NULL) + panic("pfi_kif_get failed"); + + kif->pfik_ifp = ifp; + ifp->if_pf_kif = (caddr_t)kif; + + pfi_kif_update(kif); - strlcpy(key.pfik_name, ifp->if_xname, sizeof(key.pfik_name)); - p = RB_FIND(pfi_ifhead, &pfi_ifs, &key); - /* some additional trickery for placeholders */ - if ((p == NULL) || (p->pfik_parent == pfi_dummy)) { - /* are we looking at a renamed instance or not? */ - pfi_copy_group(key.pfik_name, ifp->if_xname, - sizeof(key.pfik_name)); - realname = (strncmp(key.pfik_name, ifp->if_dname, - sizeof(key.pfik_name)) == 0); - /* add group */ - /* we can change if_xname, hence use if_dname as group id */ - pfi_copy_group(key.pfik_name, ifp->if_dname, - sizeof(key.pfik_name)); - q = RB_FIND(pfi_ifhead, &pfi_ifs, &key); - if (q == NULL) - q = pfi_if_create(key.pfik_name, pfi_self, - PFI_IFLAG_GROUP|PFI_IFLAG_DYNAMIC); - else if (q->pfik_parent == pfi_dummy) { - q->pfik_parent = pfi_self; - q->pfik_flags = (PFI_IFLAG_GROUP | PFI_IFLAG_DYNAMIC); - } - if (q == NULL) - panic("pfi_attach_ifnet: " - "cannot allocate '%s' group", key.pfik_name); - - /* add/modify interface */ - if (p == NULL) - p = pfi_if_create(ifp->if_xname, q, - realname?PFI_IFLAG_INSTANCE:PFI_IFLAG_PLACEHOLDER); - else { - /* remove from the dummy group */ - /* XXX: copy stats? We should not have any!!! */ - pfi_dummy->pfik_delcnt++; - TAILQ_REMOVE(&pfi_dummy->pfik_grouphead, p, - pfik_instances); - /* move to the right group */ - p->pfik_parent = q; - q->pfik_addcnt++; - TAILQ_INSERT_TAIL(&q->pfik_grouphead, p, - pfik_instances); - if (realname) { - p->pfik_flags &= ~PFI_IFLAG_PLACEHOLDER; - p->pfik_flags |= PFI_IFLAG_INSTANCE; - } - } - if (p == NULL) - panic("pfi_attach_ifnet: " - "cannot allocate '%s' interface", ifp->if_xname); - } else - q = p->pfik_parent; - p->pfik_ifp = ifp; - p->pfik_flags |= PFI_IFLAG_ATTACHED; - p->pfik_ah_cookie = EVENTHANDLER_REGISTER(ifaddr_event, - pfi_kifaddr_update_event, p, EVENTHANDLER_PRI_ANY); - pfi_index2kif[ifp->if_index] = p; - pfi_dohooks(p); crit_exit(); } void pfi_detach_ifnet(struct ifnet *ifp) { - struct pfi_kif *p, *q, key; - - strlcpy(key.pfik_name, ifp->if_xname, sizeof(key.pfik_name)); + struct pfi_kif *kif; + if ((kif = (struct pfi_kif *)ifp->if_pf_kif) == NULL) + return; + crit_enter(); pfi_update++; - p = RB_FIND(pfi_ifhead, &pfi_ifs, &key); - if (p == NULL) { - kprintf("pfi_detach_ifnet: cannot find %s", ifp->if_xname); - crit_exit(); - return; - } - EVENTHANDLER_DEREGISTER(ifaddr_event, p->pfik_ah_cookie); - q = p->pfik_parent; - p->pfik_ifp = NULL; - p->pfik_flags &= ~PFI_IFLAG_ATTACHED; - pfi_index2kif[ifp->if_index] = NULL; - pfi_dohooks(p); - pfi_maybe_destroy(p); + pfi_kif_update(kif); + + kif->pfik_ifp = NULL; + ifp->if_pf_kif = NULL; + pfi_kif_unref(kif, PFI_KIF_REF_NONE); crit_exit(); } -struct pfi_kif * -pfi_lookup_create(const char *name) +void +pfi_attach_ifgroup(struct ifg_group *ifg) { - struct pfi_kif *p, *q, key; + struct pfi_kif *kif; + pfi_initialize(); crit_enter(); - p = pfi_lookup_if(name); - if (p == NULL) { - pfi_copy_group(key.pfik_name, name, sizeof(key.pfik_name)); - q = pfi_lookup_if(key.pfik_name); - if ((q != NULL) && (q->pfik_parent != pfi_dummy)) - p = pfi_if_create(name, q, PFI_IFLAG_INSTANCE); - else { - if (pfi_dummy == NULL) - panic("no 'notyet' dummy group"); - p = pfi_if_create(name, pfi_dummy, - PFI_IFLAG_PLACEHOLDER); - } - } - crit_exit(); - return (p); -} + pfi_update++; + if ((kif = pfi_kif_get(ifg->ifg_group)) == NULL) + panic("pfi_kif_get failed"); -struct pfi_kif * -pfi_attach_rule(const char *name) -{ - struct pfi_kif *p; + kif->pfik_group = ifg; + ifg->ifg_pf_kif = (caddr_t)kif; - p = pfi_lookup_create(name); - if (p != NULL) - p->pfik_rules++; - return (p); + crit_exit(); } void -pfi_detach_rule(struct pfi_kif *p) +pfi_detach_ifgroup(struct ifg_group *ifg) { - if (p == NULL) + struct pfi_kif *kif; + + if ((kif = (struct pfi_kif *)ifg->ifg_pf_kif) == NULL) return; - if (p->pfik_rules > 0) - p->pfik_rules--; - else - kprintf("pfi_detach_rule: reference count at 0\n"); - pfi_maybe_destroy(p); + + crit_enter(); + pfi_update++; + + kif->pfik_group = NULL; + ifg->ifg_pf_kif = NULL; + pfi_kif_unref(kif, PFI_KIF_REF_NONE); + crit_exit(); } void -pfi_attach_state(struct pfi_kif *p) +pfi_group_change(const char *group) { - if (!p->pfik_states++) - TAILQ_INSERT_TAIL(&pfi_statehead, p, pfik_w_states); + struct pfi_kif *kif; + + crit_enter(); + pfi_update++; + if ((kif = pfi_kif_get(group)) == NULL) + panic("pfi_kif_get failed"); + + pfi_kif_update(kif); + + crit_exit(); } -void -pfi_detach_state(struct pfi_kif *p) +int +pfi_match_addr(struct pfi_dynaddr *dyn, struct pf_addr *a, sa_family_t af) { - if (p == NULL) - return; - if (p->pfik_states <= 0) { - kprintf("pfi_detach_state: reference count <= 0\n"); - return; + switch (af) { +#ifdef INET + case AF_INET: + switch (dyn->pfid_acnt4) { + case 0: + return (0); + case 1: + return (PF_MATCHA(0, &dyn->pfid_addr4, + &dyn->pfid_mask4, a, AF_INET)); + default: + return (pfr_match_addr(dyn->pfid_kt, a, AF_INET)); + } + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + switch (dyn->pfid_acnt6) { + case 0: + return (0); + case 1: + return (PF_MATCHA(0, &dyn->pfid_addr6, + &dyn->pfid_mask6, a, AF_INET6)); + default: + return (pfr_match_addr(dyn->pfid_kt, a, AF_INET6)); + } + break; +#endif /* INET6 */ + default: + return (0); } - if (!--p->pfik_states) - TAILQ_REMOVE(&pfi_statehead, p, pfik_w_states); - pfi_maybe_destroy(p); } int @@ -424,15 +422,20 @@ pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af) if (aw->type != PF_ADDR_DYNIFTL) return (0); - dyn = pool_get(&pfi_addr_pl, PR_NOWAIT); - if (dyn == NULL) + if ((dyn = pool_get(&pfi_addr_pl, PR_NOWAIT)) == NULL) return (1); bzero(dyn, sizeof(*dyn)); crit_enter(); - dyn->pfid_kif = pfi_attach_rule(aw->v.ifname); - if (dyn->pfid_kif == NULL) - senderr(1); + if (!strcmp(aw->v.ifname, "self")) + dyn->pfid_kif = pfi_kif_get(IFG_ALL); + else + dyn->pfid_kif = pfi_kif_get(aw->v.ifname); + if (dyn->pfid_kif == NULL) { + rv = 1; + goto _bad; + } + pfi_kif_ref(dyn->pfid_kif, PFI_KIF_REF_RULE); dyn->pfid_net = pfi_unmask(&aw->v.a.mask); if (af == AF_INET && dyn->pfid_net == 32) @@ -449,25 +452,23 @@ pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af) if (dyn->pfid_net != 128) ksnprintf(tblname + strlen(tblname), sizeof(tblname) - strlen(tblname), "/%d", dyn->pfid_net); - ruleset = pf_find_or_create_ruleset(pfi_reserved_anchor, - pfi_interface_ruleset); - if (ruleset == NULL) - senderr(1); + if ((ruleset = pf_find_or_create_ruleset(PF_RESERVED_ANCHOR)) == NULL) { + rv = 1; + goto _bad; + } - dyn->pfid_kt = pfr_attach_table(ruleset, tblname); - if (dyn->pfid_kt == NULL) - senderr(1); + if ((dyn->pfid_kt = pfr_attach_table(ruleset, tblname)) == NULL) { + rv = 1; + goto _bad; + } dyn->pfid_kt->pfrkt_flags |= PFR_TFLAG_ACTIVE; dyn->pfid_iflags = aw->iflags; dyn->pfid_af = af; - dyn->pfid_hook_cookie = hook_establish(dyn->pfid_kif->pfik_ah_head, 1, - pfi_dynaddr_update, dyn); - if (dyn->pfid_hook_cookie == NULL) - senderr(1); + TAILQ_INSERT_TAIL(&dyn->pfid_kif->pfik_dynaddrs, dyn, entry); aw->p.dyn = dyn; - pfi_dynaddr_update(aw->p.dyn); + pfi_kif_update(dyn->pfid_kif); crit_exit(); return (0); @@ -477,21 +478,41 @@ _bad: if (ruleset != NULL) pf_remove_if_empty_ruleset(ruleset); if (dyn->pfid_kif != NULL) - pfi_detach_rule(dyn->pfid_kif); + pfi_kif_unref(dyn->pfid_kif, PFI_KIF_REF_RULE); pool_put(&pfi_addr_pl, dyn); crit_exit(); return (rv); } void -pfi_dynaddr_update(void *p) +pfi_kif_update(struct pfi_kif *kif) { - struct pfi_dynaddr *dyn = (struct pfi_dynaddr *)p; - struct pfi_kif *kif = dyn->pfid_kif; - struct pfr_ktable *kt = dyn->pfid_kt; + struct ifg_list *ifgl; + struct pfi_dynaddr *p; - if (dyn == NULL || kif == NULL || kt == NULL) + /* update all dynaddr */ + TAILQ_FOREACH(p, &kif->pfik_dynaddrs, entry) + pfi_dynaddr_update(p); + + /* again for all groups kif is member of */ + if (kif->pfik_ifp != NULL) + TAILQ_FOREACH(ifgl, &kif->pfik_ifp->if_groups, ifgl_next) + pfi_kif_update((struct pfi_kif *) + ifgl->ifgl_group->ifg_pf_kif); +} + +void +pfi_dynaddr_update(struct pfi_dynaddr *dyn) +{ + struct pfi_kif *kif; + struct pfr_ktable *kt; + + if (dyn == NULL || dyn->pfid_kif == NULL || dyn->pfid_kt == NULL) panic("pfi_dynaddr_update"); + + kif = dyn->pfid_kif; + kt = dyn->pfid_kt; + if (kt->pfrkt_larg != pfi_update) { /* this table needs to be brought up-to-date */ pfi_table_update(kt, kif, dyn->pfid_net, dyn->pfid_iflags); @@ -504,28 +525,18 @@ void pfi_table_update(struct pfr_ktable *kt, struct pfi_kif *kif, int net, int flags) { int e, size2 = 0; - struct pfi_kif *p; - struct pfr_table t; + struct ifg_member *ifgm; - if ((kif->pfik_flags & PFI_IFLAG_INSTANCE) && kif->pfik_ifp == NULL) { - pfr_clr_addrs(&kt->pfrkt_t, NULL, 0); - return; - } pfi_buffer_cnt = 0; - if ((kif->pfik_flags & PFI_IFLAG_INSTANCE)) + + if (kif->pfik_ifp != NULL) pfi_instance_add(kif->pfik_ifp, net, flags); - else if (strcmp(kif->pfik_name, "self")) { - TAILQ_FOREACH(p, &kif->pfik_grouphead, pfik_instances) - pfi_instance_add(p->pfik_ifp, net, flags); - } else { - RB_FOREACH(p, pfi_ifhead, &pfi_ifs) - if (p->pfik_flags & PFI_IFLAG_INSTANCE) - pfi_instance_add(p->pfik_ifp, net, flags); - } - t = kt->pfrkt_t; - t.pfrt_flags = 0; - if ((e = pfr_set_addrs(&t, pfi_buffer, pfi_buffer_cnt, &size2, - NULL, NULL, NULL, 0))) + else if (kif->pfik_group != NULL) + TAILQ_FOREACH(ifgm, &kif->pfik_group->ifg_members, ifgm_next) + pfi_instance_add(ifgm->ifgm_ifp, net, flags); + + if ((e = pfr_set_addrs(&kt->pfrkt_t, pfi_buffer, pfi_buffer_cnt, &size2, + NULL, NULL, NULL, 0, PFR_TFLAG_ALLMASK))) kprintf("pfi_table_update: cannot set %d new addresses " "into table %s: %d\n", pfi_buffer_cnt, kt->pfrkt_name, e); } @@ -577,17 +588,16 @@ pfi_instance_add(struct ifnet *ifp, int net, int flags) } if (af == AF_INET) got4 = 1; - else + else if (af == AF_INET6) got6 = 1; net2 = net; if (net2 == 128 && (flags & PFI_AFLAG_NETWORK)) { - if (af == AF_INET) { + if (af == AF_INET) net2 = pfi_unmask(&((struct sockaddr_in *) ia->ifa_netmask)->sin_addr); - } else { + else if (af == AF_INET6) net2 = pfi_unmask(&((struct sockaddr_in6 *) ia->ifa_netmask)->sin6_addr); - } } if (af == AF_INET && net2 > 32) net2 = 32; @@ -614,8 +624,7 @@ pfi_address_add(struct sockaddr *sa, int af, int net) pfi_buffer_cnt, PFI_BUFFER_MAX); return; } - p = kmalloc(new_max * sizeof(*pfi_buffer), PFI_MTYPE, - M_NOWAIT); + p = kmalloc(new_max * sizeof(*pfi_buffer), PFI_MTYPE, M_WAITOK); if (p == NULL) { kprintf("pfi_address_add: no memory to grow buffer " "(%d/%d)\n", pfi_buffer_cnt, PFI_BUFFER_MAX); @@ -635,9 +644,9 @@ pfi_address_add(struct sockaddr *sa, int af, int net) p->pfra_net = net; if (af == AF_INET) p->pfra_ip4addr = ((struct sockaddr_in *)sa)->sin_addr; - if (af == AF_INET6) { + else if (af == AF_INET6) { p->pfra_ip6addr = ((struct sockaddr_in6 *)sa)->sin6_addr; - if (IN6_IS_ADDR_LINKLOCAL(&p->pfra_ip6addr)) + if (IN6_IS_SCOPE_EMBED(&p->pfra_ip6addr)) p->pfra_ip6addr.s6_addr16[1] = 0; } /* mask network address bits */ @@ -655,9 +664,8 @@ pfi_dynaddr_remove(struct pf_addr_wrap *aw) return; crit_enter(); - hook_disestablish(aw->p.dyn->pfid_kif->pfik_ah_head, - aw->p.dyn->pfid_hook_cookie); - pfi_detach_rule(aw->p.dyn->pfid_kif); + TAILQ_REMOVE(&aw->p.dyn->pfid_kif->pfik_dynaddrs, aw->p.dyn, entry); + pfi_kif_unref(aw->p.dyn->pfid_kif, PFI_KIF_REF_RULE); aw->p.dyn->pfid_kif = NULL; pfr_detach_table(aw->p.dyn->pfid_kt); aw->p.dyn->pfid_kt = NULL; @@ -678,9 +686,11 @@ pfi_dynaddr_copyout(struct pf_addr_wrap *aw) void pfi_kifaddr_update(void *v) { + struct pfi_kif *kif = (struct pfi_kif *)v; + crit_enter(); pfi_update++; - pfi_dohooks(v); + pfi_kif_update(kif); crit_exit(); } @@ -690,134 +700,16 @@ pfi_if_compare(struct pfi_kif *p, struct pfi_kif *q) return (strncmp(p->pfik_name, q->pfik_name, IFNAMSIZ)); } -struct pfi_kif * -pfi_if_create(const char *name, struct pfi_kif *q, int flags) -{ - struct pfi_kif *p; - - p = kmalloc(sizeof(*p), PFI_MTYPE, M_NOWAIT | M_ZERO); - if (p == NULL) - return (NULL); - p->pfik_ah_head = kmalloc(sizeof(*p->pfik_ah_head), PFI_MTYPE, - M_NOWAIT | M_ZERO); - if (p->pfik_ah_head == NULL) { - kfree(p, PFI_MTYPE); - return (NULL); - } - TAILQ_INIT(p->pfik_ah_head); - TAILQ_INIT(&p->pfik_grouphead); - strlcpy(p->pfik_name, name, sizeof(p->pfik_name)); - RB_INIT(&p->pfik_lan_ext); - RB_INIT(&p->pfik_ext_gwy); - p->pfik_flags = flags; - p->pfik_parent = q; - p->pfik_tzero = time_second; - - RB_INSERT(pfi_ifhead, &pfi_ifs, p); - if (q != NULL) { - q->pfik_addcnt++; - TAILQ_INSERT_TAIL(&q->pfik_grouphead, p, pfik_instances); - } - pfi_ifcnt++; - return (p); -} - -int -pfi_maybe_destroy(struct pfi_kif *p) -{ - int i, j, k; - struct pfi_kif *q = p->pfik_parent; - - if ((p->pfik_flags & (PFI_IFLAG_ATTACHED | PFI_IFLAG_GROUP)) || - p->pfik_rules > 0 || p->pfik_states > 0) - if (!(p->pfik_flags & PFI_IFLAG_PLACEHOLDER)) - return (0); - - crit_enter(); - if (q != NULL) { - for (i = 0; i < 2; i++) - for (j = 0; j < 2; j++) - for (k = 0; k < 2; k++) { - q->pfik_bytes[i][j][k] += - p->pfik_bytes[i][j][k]; - q->pfik_packets[i][j][k] += - p->pfik_packets[i][j][k]; - /* clear stats in case we return to the dummy group */ - p->pfik_bytes[i][j][k] = 0; - p->pfik_packets[i][j][k] = 0; - } - q->pfik_delcnt++; - TAILQ_REMOVE(&q->pfik_grouphead, p, pfik_instances); - } - if (p->pfik_rules > 0 || p->pfik_states > 0) { - /* move back to the dummy group */ - p->pfik_parent = pfi_dummy; - pfi_dummy->pfik_addcnt++; - TAILQ_INSERT_TAIL(&pfi_dummy->pfik_grouphead, p, - pfik_instances); - crit_exit(); - return (0); - } - pfi_ifcnt--; - RB_REMOVE(pfi_ifhead, &pfi_ifs, p); - crit_exit(); - - kfree(p->pfik_ah_head, PFI_MTYPE); - kfree(p, PFI_MTYPE); - return (1); -} - -void -pfi_copy_group(char *p, const char *q, int m) -{ - while (m > 1 && *q && !(*q >= '0' && *q <= '9')) { - *p++ = *q++; - m--; - } - if (m > 0) - *p++ = '\0'; -} - -void -pfi_dynamic_drivers(void) -{ - struct ifnet *ifp; - -/* - * For FreeBSD basically every interface is "dynamic" as we can unload - * modules e.g. - */ - TAILQ_FOREACH(ifp, &ifnet, if_link) { - if (ifp->if_dunit == IF_DUNIT_NONE) - continue; - pfi_newgroup(ifp->if_dname, PFI_IFLAG_DYNAMIC); - } -} - -void -pfi_newgroup(const char *name, int flags) -{ - struct pfi_kif *p; - - p = pfi_lookup_if(name); - if (p == NULL) - p = pfi_if_create(name, pfi_self, PFI_IFLAG_GROUP); - if (p == NULL) { - kprintf("pfi_newgroup: cannot allocate '%s' group", name); - return; - } - p->pfik_flags |= flags; -} - void pfi_fill_oldstatus(struct pf_status *pfs) { - struct pfi_kif *p, key; - int i, j, k; + struct pfi_kif *p; + struct pfi_kif_cmp key; + int i, j, k; - strlcpy(key.pfik_name, pfs->ifname, sizeof(key.pfik_name)); + strlcpy(key.pfik_ifname, pfs->ifname, sizeof(key.pfik_ifname)); crit_enter(); - p = RB_FIND(pfi_ifhead, &pfi_ifs, &key); + p = RB_FIND(pfi_ifhead, &pfi_ifs, (struct pfi_kif *)&key); if (p == NULL) { crit_exit(); return; @@ -836,46 +728,45 @@ pfi_fill_oldstatus(struct pf_status *pfs) } int -pfi_clr_istats(const char *name, int *nzero, int flags) +pfi_clr_istats(const char *name) { struct pfi_kif *p; - int n = 0; - long tzero = time_second; crit_enter(); - ACCEPT_FLAGS(PFI_FLAG_GROUP|PFI_FLAG_INSTANCE); RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { - if (pfi_skip_if(name, p, flags)) + if (pfi_skip_if(name, p)) continue; bzero(p->pfik_packets, sizeof(p->pfik_packets)); bzero(p->pfik_bytes, sizeof(p->pfik_bytes)); - p->pfik_tzero = tzero; - n++; + p->pfik_tzero = time_second; } crit_exit(); - if (nzero != NULL) - *nzero = n; + return (0); } int -pfi_get_ifaces(const char *name, struct pfi_if *buf, int *size, int flags) +pfi_get_ifaces(const char *name, struct pfi_kif *buf, int *size) { - struct pfi_kif *p; + struct pfi_kif *p, *nextp; int n = 0; - ACCEPT_FLAGS(PFI_FLAG_GROUP|PFI_FLAG_INSTANCE); crit_enter(); - RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { - if (pfi_skip_if(name, p, flags)) + for (p = RB_MIN(pfi_ifhead, &pfi_ifs); p; p = nextp) { + nextp = RB_NEXT(pfi_ifhead, &pfi_ifs, p); + if (pfi_skip_if(name, p)) continue; if (*size > n++) { if (!p->pfik_tzero) - p->pfik_tzero = boottime.tv_sec; + p->pfik_tzero = time_second; + pfi_kif_ref(p, PFI_KIF_REF_RULE); if (copyout(p, buf++, sizeof(*buf))) { + pfi_kif_unref(p, PFI_KIF_REF_RULE); crit_exit(); return (EFAULT); } + nextp = RB_NEXT(pfi_ifhead, &pfi_ifs, p); + pfi_kif_unref(p, PFI_KIF_REF_RULE); } } crit_exit(); @@ -883,25 +774,11 @@ pfi_get_ifaces(const char *name, struct pfi_if *buf, int *size, int flags) return (0); } -struct pfi_kif * -pfi_lookup_if(const char *name) -{ - struct pfi_kif *p, key; - - strlcpy(key.pfik_name, name, sizeof(key.pfik_name)); - p = RB_FIND(pfi_ifhead, &pfi_ifs, &key); - return (p); -} - int -pfi_skip_if(const char *filter, struct pfi_kif *p, int f) +pfi_skip_if(const char *filter, struct pfi_kif *p) { int n; - if ((p->pfik_flags & PFI_IFLAG_GROUP) && !(f & PFI_FLAG_GROUP)) - return (1); - if ((p->pfik_flags & PFI_IFLAG_INSTANCE) && !(f & PFI_FLAG_INSTANCE)) - return (1); if (filter == NULL || !*filter) return (0); if (!strcmp(p->pfik_name, filter)) @@ -916,6 +793,36 @@ pfi_skip_if(const char *filter, struct pfi_kif *p, int f) return (p->pfik_name[n] < '0' || p->pfik_name[n] > '9'); } +int +pfi_set_flags(const char *name, int flags) +{ + struct pfi_kif *p; + + crit_enter(); + RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { + if (pfi_skip_if(name, p)) + continue; + p->pfik_flags |= flags; + } + crit_exit(); + return (0); +} + +int +pfi_clear_flags(const char *name, int flags) +{ + struct pfi_kif *p; + + crit_enter(); + RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { + if (pfi_skip_if(name, p)) + continue; + p->pfik_flags &= ~flags; + } + crit_exit(); + return (0); +} + /* from pf_print_state.c */ int pfi_unmask(void *addr) @@ -936,35 +843,3 @@ pfi_unmask(void *addr) return (b); } -void -pfi_dohooks(struct pfi_kif *p) -{ - for (; p != NULL; p = p->pfik_parent) - dohooks(p->pfik_ah_head, 0); -} - -int -pfi_match_addr(struct pfi_dynaddr *dyn, struct pf_addr *a, sa_family_t af) -{ - if (af == AF_INET) { - switch (dyn->pfid_acnt4) { - case 0: - return (0); - case 1: - return (PF_MATCHA(0, &dyn->pfid_addr4, - &dyn->pfid_mask4, a, AF_INET)); - default: - return (pfr_match_addr(dyn->pfid_kt, a, AF_INET)); - } - } else { - switch (dyn->pfid_acnt6) { - case 0: - return (0); - case 1: - return (PF_MATCHA(0, &dyn->pfid_addr6, - &dyn->pfid_mask6, a, AF_INET6)); - default: - return (pfr_match_addr(dyn->pfid_kt, a, AF_INET6)); - } - } -} diff --git a/sys/net/pf/pf_ioctl.c b/sys/net/pf/pf_ioctl.c index 3bf7431ec5..8f40143d44 100644 --- a/sys/net/pf/pf_ioctl.c +++ b/sys/net/pf/pf_ioctl.c @@ -1,9 +1,9 @@ /* $FreeBSD: src/sys/contrib/pf/net/pf_ioctl.c,v 1.12 2004/08/12 14:15:42 mlaier Exp $ */ /* $OpenBSD: pf_ioctl.c,v 1.112.2.2 2004/07/24 18:28:12 brad Exp $ */ -/* $DragonFly: src/sys/net/pf/pf_ioctl.c,v 1.15 2008/09/15 05:11:02 sephe Exp $ */ +/* $OpenBSD: pf_ioctl.c,v 1.175 2007/02/26 22:47:43 deraadt Exp $ */ /* - * Copyright (c) 2004 The DragonFly Project. All rights reserved. + * Copyright (c) 2010 The DragonFly Project. All rights reserved. * * Copyright (c) 2001 Daniel Hartmeier * Copyright (c) 2002,2003 Henning Brauer @@ -53,11 +53,14 @@ #include #include #include +#include #include #include +#include #include #include #include +#include #include #include @@ -70,12 +73,18 @@ #include #include +#include +#include #include #if NPFSYNC > 0 #include #endif /* NPFSYNC > 0 */ +#if NPFLOG > 0 +#include +#endif /* NPFLOG > 0 */ + #ifdef INET6 #include #include @@ -87,28 +96,38 @@ #include #include +#include + +u_int rt_numfibs = RT_NUMFIBS; void init_zone_var(void); void cleanup_pf_zone(void); int pfattach(void); -struct pf_pool *pf_get_pool(char *, char *, u_int32_t, - u_int8_t, u_int32_t, u_int8_t, u_int8_t, u_int8_t); -int pf_get_ruleset_number(u_int8_t); -void pf_init_ruleset(struct pf_ruleset *); +void pf_thread_create(void *); +struct pf_pool *pf_get_pool(char *, u_int32_t, u_int8_t, u_int32_t, + u_int8_t, u_int8_t, u_int8_t); + void pf_mv_pool(struct pf_palist *, struct pf_palist *); void pf_empty_pool(struct pf_palist *); #ifdef ALTQ int pf_begin_altq(u_int32_t *); int pf_rollback_altq(u_int32_t); int pf_commit_altq(u_int32_t); +int pf_enable_altq(struct pf_altq *); +int pf_disable_altq(struct pf_altq *); #endif /* ALTQ */ -int pf_begin_rules(u_int32_t *, int, char *, char *); -int pf_rollback_rules(u_int32_t, int, char *, char *); -int pf_commit_rules(u_int32_t, int, char *, char *); - -extern struct callout pf_expire_to; +int pf_begin_rules(u_int32_t *, int, const char *); +int pf_rollback_rules(u_int32_t, int, char *); +int pf_setup_pfsync_matching(struct pf_ruleset *); +void pf_hash_rule(MD5_CTX *, struct pf_rule *); +void pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *); +int pf_commit_rules(u_int32_t, int, char *); struct pf_rule pf_default_rule; +struct lock pf_consistency_lock; +#ifdef ALTQ +static int pf_altq_running; +#endif #define TAGID_MAX 50000 TAILQ_HEAD(pf_tags, pf_tagname) pf_tags = TAILQ_HEAD_INITIALIZER(pf_tags), @@ -117,9 +136,12 @@ TAILQ_HEAD(pf_tags, pf_tagname) pf_tags = TAILQ_HEAD_INITIALIZER(pf_tags), #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE) #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE #endif -static u_int16_t tagname2tag(struct pf_tags *, char *); -static void tag2tagname(struct pf_tags *, u_int16_t, char *); -static void tag_unref(struct pf_tags *, u_int16_t); +u_int16_t tagname2tag(struct pf_tags *, char *); +void tag2tagname(struct pf_tags *, u_int16_t, char *); +void tag_unref(struct pf_tags *, u_int16_t); +int pf_rtlabel_add(struct pf_addr_wrap *); +void pf_rtlabel_remove(struct pf_addr_wrap *); +void pf_rtlabel_copyout(struct pf_addr_wrap *); #define DPFPRINTF(n, x) if (pf_status.debug >= (n)) kprintf x @@ -167,6 +189,25 @@ static struct dev_ops pf_ops = { /* XXX convert to port model */ }; static volatile int pf_pfil_hooked = 0; +int pf_end_threads = 0; +struct lock pf_task_lck; + +int debug_pfugidhack = 0; +SYSCTL_INT(_debug, OID_AUTO, pfugidhack, CTLFLAG_RW, &debug_pfugidhack, 0, + "Enable/disable pf user/group rules mpsafe hack"); + + +static void +init_pf_lck(void) +{ + lockinit(&pf_task_lck, "pf task lck", 0, LK_CANRECURSE); +} + +static void +destroy_pf_lck(void) +{ + lockuninit(&pf_task_lck); +} void init_zone_var(void) @@ -239,21 +280,24 @@ pfattach(void) /* XXX uma_zone_set_max(pf_pool_limits[PF_LIMIT_STATES].pp, pf_pool_limits[PF_LIMIT_STATES].limit); */ - + if (ctob(physmem) <= 100*1024*1024) + pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].limit = + PFR_KENTRY_HIWAT_SMALL; RB_INIT(&tree_src_tracking); - TAILQ_INIT(&pf_anchors); + RB_INIT(&pf_anchors); pf_init_ruleset(&pf_main_ruleset); TAILQ_INIT(&pf_altqs[0]); TAILQ_INIT(&pf_altqs[1]); TAILQ_INIT(&pf_pabuf); pf_altqs_active = &pf_altqs[0]; pf_altqs_inactive = &pf_altqs[1]; - TAILQ_INIT(&state_updates); + TAILQ_INIT(&state_list); /* default rule should never be garbage collected */ pf_default_rule.entries.tqe_prev = &pf_default_rule.entries.tqe_next; pf_default_rule.action = PF_PASS; pf_default_rule.nr = (uint32_t)(-1); + pf_default_rule.rtableid = -1; /* initialize default timeouts */ my_timeout[PFTM_TCP_FIRST_PACKET] = 120; /* First TCP packet */ @@ -272,22 +316,31 @@ pfattach(void) my_timeout[PFTM_OTHER_MULTIPLE] = 60; /* Bidirectional */ my_timeout[PFTM_FRAG] = 30; /* Fragment expire */ my_timeout[PFTM_INTERVAL] = 10; /* Expire interval */ - - callout_init(&pf_expire_to); - callout_reset(&pf_expire_to, my_timeout[PFTM_INTERVAL] * hz, - pf_purge_timeout, &pf_expire_to); - + my_timeout[PFTM_SRC_NODE] = 0; /* Source Tracking */ + my_timeout[PFTM_TS_DIFF] = 30; /* Allowed TS diff */ + my_timeout[PFTM_ADAPTIVE_START] = PFSTATE_ADAPT_START; + my_timeout[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END; + pf_normalize_init(); bzero(&pf_status, sizeof(pf_status)); pf_status.debug = PF_DEBUG_URGENT; - pf_pfil_hooked = 0; /* XXX do our best to avoid a conflict */ pf_status.hostid = karc4random(); + /* require process context to purge states, so perform in a thread */ + kthread_create(pf_thread_create, NULL, NULL, "dummy"); + return (error); } +void +pf_thread_create(void *v) +{ + if (kthread_create(pf_purge_thread, NULL, NULL, "pfpurge")) + panic("pfpurge thread"); +} + int pfopen(struct dev_open_args *ap) { @@ -307,15 +360,15 @@ pfclose(struct dev_close_args *ap) } struct pf_pool * -pf_get_pool(char *anchorname, char *rulesetname, u_int32_t ticket, - u_int8_t rule_action, u_int32_t rule_number, u_int8_t r_last, - u_int8_t active, u_int8_t check_ticket) +pf_get_pool(char *anchor, u_int32_t ticket, u_int8_t rule_action, + u_int32_t rule_number, u_int8_t r_last, u_int8_t active, + u_int8_t check_ticket) { struct pf_ruleset *ruleset; struct pf_rule *rule; int rs_num; - ruleset = pf_find_ruleset(anchorname, rulesetname); + ruleset = pf_find_ruleset(anchor); if (ruleset == NULL) return (NULL); rs_num = pf_get_ruleset_number(rule_action); @@ -350,164 +403,6 @@ pf_get_pool(char *anchorname, char *rulesetname, u_int32_t ticket, return (&rule->rpool); } -int -pf_get_ruleset_number(u_int8_t action) -{ - switch (action) { - case PF_SCRUB: - return (PF_RULESET_SCRUB); - break; - case PF_PASS: - case PF_DROP: - return (PF_RULESET_FILTER); - break; - case PF_NAT: - case PF_NONAT: - return (PF_RULESET_NAT); - break; - case PF_BINAT: - case PF_NOBINAT: - return (PF_RULESET_BINAT); - break; - case PF_RDR: - case PF_NORDR: - return (PF_RULESET_RDR); - break; - default: - return (PF_RULESET_MAX); - break; - } -} - -void -pf_init_ruleset(struct pf_ruleset *ruleset) -{ - int i; - - memset(ruleset, 0, sizeof(struct pf_ruleset)); - for (i = 0; i < PF_RULESET_MAX; i++) { - TAILQ_INIT(&ruleset->rules[i].queues[0]); - TAILQ_INIT(&ruleset->rules[i].queues[1]); - ruleset->rules[i].active.ptr = &ruleset->rules[i].queues[0]; - ruleset->rules[i].inactive.ptr = &ruleset->rules[i].queues[1]; - } -} - -struct pf_anchor * -pf_find_anchor(const char *anchorname) -{ - struct pf_anchor *anchor; - int n = -1; - - anchor = TAILQ_FIRST(&pf_anchors); - while (anchor != NULL && (n = strcmp(anchor->name, anchorname)) < 0) - anchor = TAILQ_NEXT(anchor, entries); - if (n == 0) - return (anchor); - else - return (NULL); -} - -struct pf_ruleset * -pf_find_ruleset(char *anchorname, char *rulesetname) -{ - struct pf_anchor *anchor; - struct pf_ruleset *ruleset; - - if (!anchorname[0] && !rulesetname[0]) - return (&pf_main_ruleset); - if (!anchorname[0] || !rulesetname[0]) - return (NULL); - anchorname[PF_ANCHOR_NAME_SIZE-1] = 0; - rulesetname[PF_RULESET_NAME_SIZE-1] = 0; - anchor = pf_find_anchor(anchorname); - if (anchor == NULL) - return (NULL); - ruleset = TAILQ_FIRST(&anchor->rulesets); - while (ruleset != NULL && strcmp(ruleset->name, rulesetname) < 0) - ruleset = TAILQ_NEXT(ruleset, entries); - if (ruleset != NULL && !strcmp(ruleset->name, rulesetname)) - return (ruleset); - else - return (NULL); -} - -struct pf_ruleset * -pf_find_or_create_ruleset(char anchorname[PF_ANCHOR_NAME_SIZE], - char rulesetname[PF_RULESET_NAME_SIZE]) -{ - struct pf_anchor *anchor, *a; - struct pf_ruleset *ruleset, *r; - - if (!anchorname[0] && !rulesetname[0]) - return (&pf_main_ruleset); - if (!anchorname[0] || !rulesetname[0]) - return (NULL); - anchorname[PF_ANCHOR_NAME_SIZE-1] = 0; - rulesetname[PF_RULESET_NAME_SIZE-1] = 0; - a = TAILQ_FIRST(&pf_anchors); - while (a != NULL && strcmp(a->name, anchorname) < 0) - a = TAILQ_NEXT(a, entries); - if (a != NULL && !strcmp(a->name, anchorname)) - anchor = a; - else { - anchor = (struct pf_anchor *)kmalloc(sizeof(struct pf_anchor), - M_TEMP, M_NOWAIT); - if (anchor == NULL) - return (NULL); - memset(anchor, 0, sizeof(struct pf_anchor)); - bcopy(anchorname, anchor->name, sizeof(anchor->name)); - TAILQ_INIT(&anchor->rulesets); - if (a != NULL) - TAILQ_INSERT_BEFORE(a, anchor, entries); - else - TAILQ_INSERT_TAIL(&pf_anchors, anchor, entries); - } - r = TAILQ_FIRST(&anchor->rulesets); - while (r != NULL && strcmp(r->name, rulesetname) < 0) - r = TAILQ_NEXT(r, entries); - if (r != NULL && !strcmp(r->name, rulesetname)) - return (r); - ruleset = (struct pf_ruleset *)kmalloc(sizeof(struct pf_ruleset), - M_TEMP, M_NOWAIT); - if (ruleset != NULL) { - pf_init_ruleset(ruleset); - bcopy(rulesetname, ruleset->name, sizeof(ruleset->name)); - ruleset->anchor = anchor; - if (r != NULL) - TAILQ_INSERT_BEFORE(r, ruleset, entries); - else - TAILQ_INSERT_TAIL(&anchor->rulesets, ruleset, entries); - } - return (ruleset); -} - -void -pf_remove_if_empty_ruleset(struct pf_ruleset *ruleset) -{ - struct pf_anchor *anchor; - int i; - - if (ruleset == NULL || ruleset->anchor == NULL || ruleset->tables > 0 || - ruleset->topen) - return; - for (i = 0; i < PF_RULESET_MAX; ++i) - if (!TAILQ_EMPTY(ruleset->rules[i].active.ptr) || - !TAILQ_EMPTY(ruleset->rules[i].inactive.ptr) || - ruleset->rules[i].inactive.open) - return; - - anchor = ruleset->anchor; - TAILQ_REMOVE(&anchor->rulesets, ruleset, entries); - kfree(ruleset, M_TEMP); - - if (TAILQ_EMPTY(&anchor->rulesets)) { - TAILQ_REMOVE(&pf_anchors, anchor, entries); - kfree(anchor, M_TEMP); - pf_update_anchor_rules(); - } -} - void pf_mv_pool(struct pf_palist *poola, struct pf_palist *poolb) { @@ -527,7 +422,7 @@ pf_empty_pool(struct pf_palist *poola) while ((empty_pool_pa = TAILQ_FIRST(poola)) != NULL) { pfi_dynaddr_remove(&empty_pool_pa->addr); pf_tbladdr_remove(&empty_pool_pa->addr); - pfi_detach_rule(empty_pool_pa->kif); + pfi_kif_unref(empty_pool_pa->kif, PFI_KIF_REF_RULE); TAILQ_REMOVE(poola, empty_pool_pa, entries); pool_put(&pf_pooladdr_pl, empty_pool_pa); } @@ -545,10 +440,12 @@ pf_rm_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule) */ pf_tbladdr_remove(&rule->src.addr); pf_tbladdr_remove(&rule->dst.addr); + if (rule->overload_tbl) + pfr_detach_table(rule->overload_tbl); } TAILQ_REMOVE(rulequeue, rule, entries); rule->entries.tqe_prev = NULL; - rule->nr = (uint32_t)(-1); + rule->nr = -1; } if (rule->states > 0 || rule->src_nodes > 0 || @@ -561,18 +458,23 @@ pf_rm_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule) pf_qid_unref(rule->pqid); pf_qid_unref(rule->qid); #endif + pf_rtlabel_remove(&rule->src.addr); + pf_rtlabel_remove(&rule->dst.addr); pfi_dynaddr_remove(&rule->src.addr); pfi_dynaddr_remove(&rule->dst.addr); if (rulequeue == NULL) { pf_tbladdr_remove(&rule->src.addr); pf_tbladdr_remove(&rule->dst.addr); + if (rule->overload_tbl) + pfr_detach_table(rule->overload_tbl); } - pfi_detach_rule(rule->kif); + pfi_kif_unref(rule->kif, PFI_KIF_REF_RULE); + pf_anchor_remove(rule); pf_empty_pool(&rule->rpool.list); pool_put(&pf_rule_pl, rule); } -static u_int16_t +u_int16_t tagname2tag(struct pf_tags *head, char *tagname) { struct pf_tagname *tag, *p = NULL; @@ -600,10 +502,10 @@ tagname2tag(struct pf_tags *head, char *tagname) return (0); /* allocate and fill new struct pf_tagname */ - tag = (struct pf_tagname *)kmalloc(sizeof(struct pf_tagname), - M_TEMP, M_NOWAIT | M_ZERO); + tag = kmalloc(sizeof(struct pf_tagname), M_TEMP, M_WAITOK); if (tag == NULL) return (0); + bzero(tag, sizeof(struct pf_tagname)); strlcpy(tag->name, tagname, sizeof(tag->name)); tag->tag = new_tagid; tag->ref++; @@ -616,7 +518,7 @@ tagname2tag(struct pf_tags *head, char *tagname) return (tag->tag); } -static void +void tag2tagname(struct pf_tags *head, u_int16_t tagid, char *p) { struct pf_tagname *tag; @@ -628,7 +530,7 @@ tag2tagname(struct pf_tags *head, u_int16_t tagid, char *p) } } -static void +void tag_unref(struct pf_tags *head, u_int16_t tag) { struct pf_tagname *p, *next; @@ -657,13 +559,43 @@ pf_tagname2tag(char *tagname) void pf_tag2tagname(u_int16_t tagid, char *p) { - return (tag2tagname(&pf_tags, tagid, p)); + tag2tagname(&pf_tags, tagid, p); +} + +void +pf_tag_ref(u_int16_t tag) +{ + struct pf_tagname *t; + + TAILQ_FOREACH(t, &pf_tags, entries) + if (t->tag == tag) + break; + if (t != NULL) + t->ref++; } void pf_tag_unref(u_int16_t tag) { - return (tag_unref(&pf_tags, tag)); + tag_unref(&pf_tags, tag); +} + +int +pf_rtlabel_add(struct pf_addr_wrap *a) +{ + return (0); +} + +void +pf_rtlabel_remove(struct pf_addr_wrap *a) +{ +} + +void +pf_rtlabel_copyout(struct pf_addr_wrap *a) +{ + if (a->type == PF_ADDR_RTLABEL && a->v.rtlabel) + strlcpy(a->v.rtlabelname, "?", sizeof(a->v.rtlabelname)); } #ifdef ALTQ @@ -676,13 +608,13 @@ pf_qname2qid(char *qname) void pf_qid2qname(u_int32_t qid, char *p) { - return (tag2tagname(&pf_qids, (u_int16_t)qid, p)); + tag2tagname(&pf_qids, (u_int16_t)qid, p); } void pf_qid_unref(u_int32_t qid) { - return (tag_unref(&pf_qids, (u_int16_t)qid)); + tag_unref(&pf_qids, (u_int16_t)qid); } int @@ -764,6 +696,8 @@ pf_commit_altq(u_int32_t ticket) TAILQ_REMOVE(pf_altqs_inactive, altq, entries); if (altq->qname[0] == 0) { /* detach and destroy the discipline */ + if (pf_altq_running) + error = pf_disable_altq(altq); err = altq_pfdetach(altq); if (err != 0 && error == 0) error = err; @@ -779,78 +713,283 @@ pf_commit_altq(u_int32_t ticket) altqs_inactive_open = 0; return (error); } + +int +pf_enable_altq(struct pf_altq *altq) +{ + struct ifnet *ifp; + struct tb_profile tb; + int error = 0; + + if ((ifp = ifunit(altq->ifname)) == NULL) + return (EINVAL); + + if (ifp->if_snd.altq_type != ALTQT_NONE) + error = altq_enable(&ifp->if_snd); + + /* set tokenbucket regulator */ + if (error == 0 && ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) { + tb.rate = altq->ifbandwidth; + tb.depth = altq->tbrsize; + crit_enter(); + error = tbr_set(&ifp->if_snd, &tb); + crit_exit(); + } + + return (error); +} + +int +pf_disable_altq(struct pf_altq *altq) +{ + struct ifnet *ifp; + struct tb_profile tb; + int error; + + if ((ifp = ifunit(altq->ifname)) == NULL) + return (EINVAL); + + /* + * when the discipline is no longer referenced, it was overridden + * by a new one. if so, just return. + */ + if (altq->altq_disc != ifp->if_snd.altq_disc) + return (0); + + error = altq_disable(&ifp->if_snd); + + if (error == 0) { + /* clear tokenbucket regulator */ + tb.rate = 0; + crit_enter(); + error = tbr_set(&ifp->if_snd, &tb); + crit_exit(); + } + + return (error); +} #endif /* ALTQ */ int -pf_begin_rules(u_int32_t *ticket, int rs_num, char *anchor, char *ruleset) +pf_begin_rules(u_int32_t *ticket, int rs_num, const char *anchor) { struct pf_ruleset *rs; struct pf_rule *rule; if (rs_num < 0 || rs_num >= PF_RULESET_MAX) return (EINVAL); - rs = pf_find_or_create_ruleset(anchor, ruleset); + rs = pf_find_or_create_ruleset(anchor); if (rs == NULL) return (EINVAL); - while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) + while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) { pf_rm_rule(rs->rules[rs_num].inactive.ptr, rule); + rs->rules[rs_num].inactive.rcount--; + } *ticket = ++rs->rules[rs_num].inactive.ticket; rs->rules[rs_num].inactive.open = 1; return (0); } int -pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor, char *ruleset) +pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor) { struct pf_ruleset *rs; struct pf_rule *rule; if (rs_num < 0 || rs_num >= PF_RULESET_MAX) return (EINVAL); - rs = pf_find_ruleset(anchor, ruleset); + rs = pf_find_ruleset(anchor); if (rs == NULL || !rs->rules[rs_num].inactive.open || rs->rules[rs_num].inactive.ticket != ticket) return (0); - while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) + while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) { pf_rm_rule(rs->rules[rs_num].inactive.ptr, rule); + rs->rules[rs_num].inactive.rcount--; + } rs->rules[rs_num].inactive.open = 0; return (0); } +#define PF_MD5_UPD(st, elm) \ + MD5Update(ctx, (u_int8_t *) &(st)->elm, sizeof((st)->elm)) + +#define PF_MD5_UPD_STR(st, elm) \ + MD5Update(ctx, (u_int8_t *) (st)->elm, strlen((st)->elm)) + +#define PF_MD5_UPD_HTONL(st, elm, stor) do { \ + (stor) = htonl((st)->elm); \ + MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int32_t));\ +} while (0) + +#define PF_MD5_UPD_HTONS(st, elm, stor) do { \ + (stor) = htons((st)->elm); \ + MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int16_t));\ +} while (0) + +void +pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr) +{ + PF_MD5_UPD(pfr, addr.type); + switch (pfr->addr.type) { + case PF_ADDR_DYNIFTL: + PF_MD5_UPD(pfr, addr.v.ifname); + PF_MD5_UPD(pfr, addr.iflags); + break; + case PF_ADDR_TABLE: + PF_MD5_UPD(pfr, addr.v.tblname); + break; + case PF_ADDR_ADDRMASK: + /* XXX ignore af? */ + PF_MD5_UPD(pfr, addr.v.a.addr.addr32); + PF_MD5_UPD(pfr, addr.v.a.mask.addr32); + break; + case PF_ADDR_RTLABEL: + PF_MD5_UPD(pfr, addr.v.rtlabelname); + break; + } + + PF_MD5_UPD(pfr, port[0]); + PF_MD5_UPD(pfr, port[1]); + PF_MD5_UPD(pfr, neg); + PF_MD5_UPD(pfr, port_op); +} + +void +pf_hash_rule(MD5_CTX *ctx, struct pf_rule *rule) +{ + u_int16_t x; + u_int32_t y; + + pf_hash_rule_addr(ctx, &rule->src); + pf_hash_rule_addr(ctx, &rule->dst); + PF_MD5_UPD_STR(rule, label); + PF_MD5_UPD_STR(rule, ifname); + PF_MD5_UPD_STR(rule, match_tagname); + PF_MD5_UPD_HTONS(rule, match_tag, x); /* dup? */ + PF_MD5_UPD_HTONL(rule, os_fingerprint, y); + PF_MD5_UPD_HTONL(rule, prob, y); + PF_MD5_UPD_HTONL(rule, uid.uid[0], y); + PF_MD5_UPD_HTONL(rule, uid.uid[1], y); + PF_MD5_UPD(rule, uid.op); + PF_MD5_UPD_HTONL(rule, gid.gid[0], y); + PF_MD5_UPD_HTONL(rule, gid.gid[1], y); + PF_MD5_UPD(rule, gid.op); + PF_MD5_UPD_HTONL(rule, rule_flag, y); + PF_MD5_UPD(rule, action); + PF_MD5_UPD(rule, direction); + PF_MD5_UPD(rule, af); + PF_MD5_UPD(rule, quick); + PF_MD5_UPD(rule, ifnot); + PF_MD5_UPD(rule, match_tag_not); + PF_MD5_UPD(rule, natpass); + PF_MD5_UPD(rule, keep_state); + PF_MD5_UPD(rule, proto); + PF_MD5_UPD(rule, type); + PF_MD5_UPD(rule, code); + PF_MD5_UPD(rule, flags); + PF_MD5_UPD(rule, flagset); + PF_MD5_UPD(rule, allow_opts); + PF_MD5_UPD(rule, rt); + PF_MD5_UPD(rule, tos); +} + int -pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor, char *ruleset) +pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) { struct pf_ruleset *rs; - struct pf_rule *rule; + struct pf_rule *rule, **old_array; struct pf_rulequeue *old_rules; + int error; + u_int32_t old_rcount; if (rs_num < 0 || rs_num >= PF_RULESET_MAX) return (EINVAL); - rs = pf_find_ruleset(anchor, ruleset); + rs = pf_find_ruleset(anchor); if (rs == NULL || !rs->rules[rs_num].inactive.open || ticket != rs->rules[rs_num].inactive.ticket) return (EBUSY); + /* Calculate checksum for the main ruleset */ + if (rs == &pf_main_ruleset) { + error = pf_setup_pfsync_matching(rs); + if (error != 0) + return (error); + } + /* Swap rules, keep the old. */ crit_enter(); old_rules = rs->rules[rs_num].active.ptr; + old_rcount = rs->rules[rs_num].active.rcount; + old_array = rs->rules[rs_num].active.ptr_array; + rs->rules[rs_num].active.ptr = rs->rules[rs_num].inactive.ptr; + rs->rules[rs_num].active.ptr_array = + rs->rules[rs_num].inactive.ptr_array; + rs->rules[rs_num].active.rcount = + rs->rules[rs_num].inactive.rcount; rs->rules[rs_num].inactive.ptr = old_rules; + rs->rules[rs_num].inactive.ptr_array = old_array; + rs->rules[rs_num].inactive.rcount = old_rcount; + rs->rules[rs_num].active.ticket = rs->rules[rs_num].inactive.ticket; pf_calc_skip_steps(rs->rules[rs_num].active.ptr); + /* Purge the old rule list. */ while ((rule = TAILQ_FIRST(old_rules)) != NULL) pf_rm_rule(old_rules, rule); + if (rs->rules[rs_num].inactive.ptr_array) + kfree(rs->rules[rs_num].inactive.ptr_array, M_TEMP); + rs->rules[rs_num].inactive.ptr_array = NULL; + rs->rules[rs_num].inactive.rcount = 0; rs->rules[rs_num].inactive.open = 0; pf_remove_if_empty_ruleset(rs); - pf_update_anchor_rules(); crit_exit(); return (0); } +int +pf_setup_pfsync_matching(struct pf_ruleset *rs) +{ + MD5_CTX ctx; + struct pf_rule *rule; + int rs_cnt; + u_int8_t digest[PF_MD5_DIGEST_LENGTH]; + + MD5Init(&ctx); + for (rs_cnt = 0; rs_cnt < PF_RULESET_MAX; rs_cnt++) { + /* XXX PF_RULESET_SCRUB as well? */ + if (rs_cnt == PF_RULESET_SCRUB) + continue; + + if (rs->rules[rs_cnt].inactive.ptr_array) + kfree(rs->rules[rs_cnt].inactive.ptr_array, M_TEMP); + rs->rules[rs_cnt].inactive.ptr_array = NULL; + + if (rs->rules[rs_cnt].inactive.rcount) { + rs->rules[rs_cnt].inactive.ptr_array = + kmalloc(sizeof(caddr_t) * + rs->rules[rs_cnt].inactive.rcount, + M_TEMP, M_WAITOK); + + if (!rs->rules[rs_cnt].inactive.ptr_array) + return (ENOMEM); + } + + TAILQ_FOREACH(rule, rs->rules[rs_cnt].inactive.ptr, + entries) { + pf_hash_rule(&ctx, rule); + (rs->rules[rs_cnt].inactive.ptr_array)[rule->nr] = rule; + } + } + + MD5Final(digest, &ctx); + memcpy(pf_status.pf_chksum, digest, sizeof(pf_status.pf_chksum)); + return (0); +} + int pfioctl(struct dev_ioctl_args *ap) { @@ -880,8 +1019,6 @@ pfioctl(struct dev_ioctl_args *ap) case DIOCGETALTQS: case DIOCGETALTQ: case DIOCGETQSTATS: - case DIOCGETANCHORS: - case DIOCGETANCHOR: case DIOCGETRULESETS: case DIOCGETRULESET: case DIOCRGETTABLES: @@ -899,7 +1036,8 @@ pfioctl(struct dev_ioctl_args *ap) case DIOCGETSRCNODES: case DIOCCLRSRCNODES: case DIOCIGETIFACES: - case DIOCICLRISTATS: + case DIOCSETIFFLAG: + case DIOCCLRIFFLAG: case DIOCGIFSPEED: break; case DIOCRCLRTABLES: @@ -917,7 +1055,6 @@ pfioctl(struct dev_ioctl_args *ap) if (!(ap->a_fflag & FWRITE)) switch (cmd) { case DIOCGETRULES: - case DIOCGETRULE: case DIOCGETADDRS: case DIOCGETADDR: case DIOCGETSTATE: @@ -928,10 +1065,9 @@ pfioctl(struct dev_ioctl_args *ap) case DIOCGETALTQS: case DIOCGETALTQ: case DIOCGETQSTATS: - case DIOCGETANCHORS: - case DIOCGETANCHOR: case DIOCGETRULESETS: case DIOCGETRULESET: + case DIOCNATLOOK: case DIOCRGETTABLES: case DIOCRGETTSTATS: case DIOCRGETADDRS: @@ -955,6 +1091,10 @@ pfioctl(struct dev_ioctl_args *ap) PFR_FLAG_DUMMY) break; /* dummy operation ok */ return (EACCES); + case DIOCGETRULE: + if (((struct pfioc_rule *)addr)->action == PF_GET_CLR_CNTR) + return (EACCES); + break; default: return (EACCES); } @@ -997,14 +1137,6 @@ pfioctl(struct dev_ioctl_args *ap) } break; - case DIOCBEGINRULES: { - struct pfioc_rule *pr = (struct pfioc_rule *)addr; - - error = pf_begin_rules(&pr->ticket, pf_get_ruleset_number( - pr->rule.action), pr->anchor, pr->ruleset); - break; - } - case DIOCADDRULE: { struct pfioc_rule *pr = (struct pfioc_rule *)addr; struct pf_ruleset *ruleset; @@ -1012,7 +1144,8 @@ pfioctl(struct dev_ioctl_args *ap) struct pf_pooladdr *pa; int rs_num; - ruleset = pf_find_ruleset(pr->anchor, pr->ruleset); + pr->anchor[sizeof(pr->anchor) - 1] = 0; + ruleset = pf_find_ruleset(pr->anchor); if (ruleset == NULL) { error = EINVAL; break; @@ -1022,10 +1155,6 @@ pfioctl(struct dev_ioctl_args *ap) error = EINVAL; break; } - if (pr->rule.anchorname[0] && ruleset != &pf_main_ruleset) { - error = EINVAL; - break; - } if (pr->rule.return_icmp >> 8 > ICMP_MAXTYPE) { error = EINVAL; break; @@ -1044,6 +1173,8 @@ pfioctl(struct dev_ioctl_args *ap) break; } bcopy(&pr->rule, rule, sizeof(struct pf_rule)); + rule->cuid = ap->a_cred->cr_ruid; + rule->cpid = (int)NULL; rule->anchor = NULL; rule->kif = NULL; TAILQ_INIT(&rule->rpool.list); @@ -1072,14 +1203,18 @@ pfioctl(struct dev_ioctl_args *ap) else rule->nr = 0; if (rule->ifname[0]) { - rule->kif = pfi_attach_rule(rule->ifname); + rule->kif = pfi_kif_get(rule->ifname); if (rule->kif == NULL) { pool_put(&pf_rule_pl, rule); error = EINVAL; break; } + pfi_kif_ref(rule->kif, PFI_KIF_REF_RULE); } + if (rule->rtableid > 0 && rule->rtableid > rt_numfibs) + error = EBUSY; + #ifdef ALTQ /* set queue IDs */ if (rule->qname[0] != 0) { @@ -1102,6 +1237,13 @@ pfioctl(struct dev_ioctl_args *ap) error = EBUSY; if (rule->rt && !rule->direction) error = EINVAL; +#if NPFLOG > 0 + if (rule->logif >= PFLOGIFS_MAX) + error = EINVAL; +#endif + if (pf_rtlabel_add(&rule->src.addr) || + pf_rtlabel_add(&rule->dst.addr)) + error = EBUSY; if (pfi_dynaddr_setup(&rule->src.addr, rule->af)) error = EINVAL; if (pfi_dynaddr_setup(&rule->dst.addr, rule->af)) @@ -1110,13 +1252,24 @@ pfioctl(struct dev_ioctl_args *ap) error = EINVAL; if (pf_tbladdr_setup(ruleset, &rule->dst.addr)) error = EINVAL; + if (pf_anchor_setup(rule, ruleset, pr->anchor_call)) + error = EINVAL; TAILQ_FOREACH(pa, &pf_pabuf, entries) if (pf_tbladdr_setup(ruleset, &pa->addr)) error = EINVAL; + if (rule->overload_tblname[0]) { + if ((rule->overload_tbl = pfr_attach_table(ruleset, + rule->overload_tblname)) == NULL) + error = EINVAL; + else + rule->overload_tbl->pfrkt_flags |= + PFR_TFLAG_ACTIVE; + } + pf_mv_pool(&pf_pabuf, &rule->rpool.list); if (((((rule->action == PF_NAT) || (rule->action == PF_RDR) || - (rule->action == PF_BINAT)) && !rule->anchorname[0]) || + (rule->action == PF_BINAT)) && rule->anchor == NULL) || (rule->rt > PF_FASTROUTE)) && (TAILQ_FIRST(&rule->rpool.list) == NULL)) error = EINVAL; @@ -1126,17 +1279,11 @@ pfioctl(struct dev_ioctl_args *ap) break; } rule->rpool.cur = TAILQ_FIRST(&rule->rpool.list); - rule->evaluations = rule->packets = rule->bytes = 0; + rule->evaluations = rule->packets[0] = rule->packets[1] = + rule->bytes[0] = rule->bytes[1] = 0; TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr, rule, entries); - break; - } - - case DIOCCOMMITRULES: { - struct pfioc_rule *pr = (struct pfioc_rule *)addr; - - error = pf_commit_rules(pr->ticket, pf_get_ruleset_number( - pr->rule.action), pr->anchor, pr->ruleset); + ruleset->rules[rs_num].inactive.rcount++; break; } @@ -1146,7 +1293,8 @@ pfioctl(struct dev_ioctl_args *ap) struct pf_rule *tail; int rs_num; - ruleset = pf_find_ruleset(pr->anchor, pr->ruleset); + pr->anchor[sizeof(pr->anchor) - 1] = 0; + ruleset = pf_find_ruleset(pr->anchor); if (ruleset == NULL) { error = EINVAL; break; @@ -1156,7 +1304,6 @@ pfioctl(struct dev_ioctl_args *ap) error = EINVAL; break; } - crit_enter(); tail = TAILQ_LAST(ruleset->rules[rs_num].active.ptr, pf_rulequeue); if (tail) @@ -1164,7 +1311,6 @@ pfioctl(struct dev_ioctl_args *ap) else pr->nr = 0; pr->ticket = ruleset->rules[rs_num].active.ticket; - crit_exit(); break; } @@ -1174,7 +1320,8 @@ pfioctl(struct dev_ioctl_args *ap) struct pf_rule *rule; int rs_num, i; - ruleset = pf_find_ruleset(pr->anchor, pr->ruleset); + pr->anchor[sizeof(pr->anchor) - 1] = 0; + ruleset = pf_find_ruleset(pr->anchor); if (ruleset == NULL) { error = EINVAL; break; @@ -1188,27 +1335,36 @@ pfioctl(struct dev_ioctl_args *ap) error = EBUSY; break; } - crit_enter(); rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr); while ((rule != NULL) && (rule->nr != pr->nr)) rule = TAILQ_NEXT(rule, entries); if (rule == NULL) { error = EBUSY; - crit_exit(); break; } bcopy(rule, &pr->rule, sizeof(struct pf_rule)); + if (pf_anchor_copyout(ruleset, rule, pr)) { + error = EBUSY; + break; + } pfi_dynaddr_copyout(&pr->rule.src.addr); pfi_dynaddr_copyout(&pr->rule.dst.addr); pf_tbladdr_copyout(&pr->rule.src.addr); pf_tbladdr_copyout(&pr->rule.dst.addr); + pf_rtlabel_copyout(&pr->rule.src.addr); + pf_rtlabel_copyout(&pr->rule.dst.addr); for (i = 0; i < PF_SKIP_COUNT; ++i) if (rule->skip[i].ptr == NULL) pr->rule.skip[i].nr = (uint32_t)(-1); else pr->rule.skip[i].nr = rule->skip[i].ptr->nr; - crit_exit(); + + if (pr->action == PF_GET_CLR_CNTR) { + rule->evaluations = 0; + rule->packets[0] = rule->packets[1] = 0; + rule->bytes[0] = rule->bytes[1] = 0; + } break; } @@ -1231,7 +1387,7 @@ pfioctl(struct dev_ioctl_args *ap) error = EINVAL; break; } - ruleset = pf_find_ruleset(pcr->anchor, pcr->ruleset); + ruleset = pf_find_ruleset(pcr->anchor); if (ruleset == NULL) { error = EINVAL; break; @@ -1264,6 +1420,8 @@ pfioctl(struct dev_ioctl_args *ap) break; } bcopy(&pcr->rule, newrule, sizeof(struct pf_rule)); + newrule->cuid = ap->a_cred->cr_ruid; + newrule->cpid = (int)NULL; TAILQ_INIT(&newrule->rpool.list); /* initialize refcounting */ newrule->states = 0; @@ -1283,15 +1441,20 @@ pfioctl(struct dev_ioctl_args *ap) } #endif /* INET6 */ if (newrule->ifname[0]) { - newrule->kif = pfi_attach_rule(newrule->ifname); + newrule->kif = pfi_kif_get(newrule->ifname); if (newrule->kif == NULL) { pool_put(&pf_rule_pl, newrule); error = EINVAL; break; } + pfi_kif_ref(newrule->kif, PFI_KIF_REF_RULE); } else newrule->kif = NULL; + if (newrule->rtableid > 0 && + newrule->rtableid > rt_numfibs) + error = EBUSY; + #ifdef ALTQ /* set queue IDs */ if (newrule->qname[0] != 0) { @@ -1305,7 +1468,7 @@ pfioctl(struct dev_ioctl_args *ap) } else newrule->pqid = newrule->qid; } -#endif +#endif /* ALTQ */ if (newrule->tagname[0]) if ((newrule->tag = pf_tagname2tag(newrule->tagname)) == 0) @@ -1314,9 +1477,11 @@ pfioctl(struct dev_ioctl_args *ap) if ((newrule->match_tag = pf_tagname2tag( newrule->match_tagname)) == 0) error = EBUSY; - if (newrule->rt && !newrule->direction) error = EINVAL; + if (pf_rtlabel_add(&newrule->src.addr) || + pf_rtlabel_add(&newrule->dst.addr)) + error = EBUSY; if (pfi_dynaddr_setup(&newrule->src.addr, newrule->af)) error = EINVAL; if (pfi_dynaddr_setup(&newrule->dst.addr, newrule->af)) @@ -1325,13 +1490,28 @@ pfioctl(struct dev_ioctl_args *ap) error = EINVAL; if (pf_tbladdr_setup(ruleset, &newrule->dst.addr)) error = EINVAL; + if (pf_anchor_setup(newrule, ruleset, pcr->anchor_call)) + error = EINVAL; + TAILQ_FOREACH(pa, &pf_pabuf, entries) + if (pf_tbladdr_setup(ruleset, &pa->addr)) + error = EINVAL; + + if (newrule->overload_tblname[0]) { + if ((newrule->overload_tbl = pfr_attach_table( + ruleset, newrule->overload_tblname)) == + NULL) + error = EINVAL; + else + newrule->overload_tbl->pfrkt_flags |= + PFR_TFLAG_ACTIVE; + } pf_mv_pool(&pf_pabuf, &newrule->rpool.list); if (((((newrule->action == PF_NAT) || (newrule->action == PF_RDR) || (newrule->action == PF_BINAT) || (newrule->rt > PF_FASTROUTE)) && - !newrule->anchorname[0])) && + !newrule->anchor)) && (TAILQ_FIRST(&newrule->rpool.list) == NULL)) error = EINVAL; @@ -1340,13 +1520,12 @@ pfioctl(struct dev_ioctl_args *ap) break; } newrule->rpool.cur = TAILQ_FIRST(&newrule->rpool.list); - newrule->evaluations = newrule->packets = 0; - newrule->bytes = 0; + newrule->evaluations = 0; + newrule->packets[0] = newrule->packets[1] = 0; + newrule->bytes[0] = newrule->bytes[1] = 0; } pf_empty_pool(&pf_pabuf); - crit_enter(); - if (pcr->action == PF_CHANGE_ADD_HEAD) oldrule = TAILQ_FIRST( ruleset->rules[rs_num].active.ptr); @@ -1359,16 +1538,17 @@ pfioctl(struct dev_ioctl_args *ap) while ((oldrule != NULL) && (oldrule->nr != pcr->nr)) oldrule = TAILQ_NEXT(oldrule, entries); if (oldrule == NULL) { - pf_rm_rule(NULL, newrule); + if (newrule != NULL) + pf_rm_rule(NULL, newrule); error = EINVAL; - crit_exit(); break; } } - if (pcr->action == PF_CHANGE_REMOVE) + if (pcr->action == PF_CHANGE_REMOVE) { pf_rm_rule(ruleset->rules[rs_num].active.ptr, oldrule); - else { + ruleset->rules[rs_num].active.rcount--; + } else { if (oldrule == NULL) TAILQ_INSERT_TAIL( ruleset->rules[rs_num].active.ptr, @@ -1380,6 +1560,7 @@ pfioctl(struct dev_ioctl_args *ap) TAILQ_INSERT_AFTER( ruleset->rules[rs_num].active.ptr, oldrule, newrule, entries); + ruleset->rules[rs_num].active.rcount++; } nr = 0; @@ -1387,76 +1568,87 @@ pfioctl(struct dev_ioctl_args *ap) ruleset->rules[rs_num].active.ptr, entries) oldrule->nr = nr++; + ruleset->rules[rs_num].active.ticket++; + pf_calc_skip_steps(ruleset->rules[rs_num].active.ptr); pf_remove_if_empty_ruleset(ruleset); - pf_update_anchor_rules(); - ruleset->rules[rs_num].active.ticket++; - crit_exit(); break; } case DIOCCLRSTATES: { - struct pf_state *state; + struct pf_state *state, *nexts; struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr; int killed = 0; - crit_enter(); - RB_FOREACH(state, pf_state_tree_id, &tree_id) { + for (state = RB_MIN(pf_state_tree_id, &tree_id); state; + state = nexts) { + nexts = RB_NEXT(pf_state_tree_id, &tree_id, state); + if (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname, state->u.s.kif->pfik_name)) { - state->timeout = PFTM_PURGE; #if NPFSYNC /* don't send out individual delete messages */ state->sync_flags = PFSTATE_NOSYNC; #endif + pf_unlink_state(state); killed++; } } - pf_purge_expired_states(); - pf_status.states = 0; psk->psk_af = killed; #if NPFSYNC pfsync_clear_states(pf_status.hostid, psk->psk_ifname); #endif - crit_exit(); break; } case DIOCKILLSTATES: { - struct pf_state *state; + struct pf_state *state, *nexts; + struct pf_state_host *src, *dst; struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr; int killed = 0; - crit_enter(); - RB_FOREACH(state, pf_state_tree_id, &tree_id) { + for (state = RB_MIN(pf_state_tree_id, &tree_id); state; + state = nexts) { + nexts = RB_NEXT(pf_state_tree_id, &tree_id, state); + + if (state->direction == PF_OUT) { + src = &state->lan; + dst = &state->ext; + } else { + src = &state->ext; + dst = &state->lan; + } if ((!psk->psk_af || state->af == psk->psk_af) && (!psk->psk_proto || psk->psk_proto == state->proto) && - PF_MATCHA(psk->psk_src.not, + PF_MATCHA(psk->psk_src.neg, &psk->psk_src.addr.v.a.addr, &psk->psk_src.addr.v.a.mask, - &state->lan.addr, state->af) && - PF_MATCHA(psk->psk_dst.not, + &src->addr, state->af) && + PF_MATCHA(psk->psk_dst.neg, &psk->psk_dst.addr.v.a.addr, &psk->psk_dst.addr.v.a.mask, - &state->ext.addr, state->af) && + &dst->addr, state->af) && (psk->psk_src.port_op == 0 || pf_match_port(psk->psk_src.port_op, psk->psk_src.port[0], psk->psk_src.port[1], - state->lan.port)) && + src->port)) && (psk->psk_dst.port_op == 0 || pf_match_port(psk->psk_dst.port_op, psk->psk_dst.port[0], psk->psk_dst.port[1], - state->ext.port)) && + dst->port)) && (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname, state->u.s.kif->pfik_name))) { - state->timeout = PFTM_PURGE; +#if NPFSYNC > 0 + /* send immediate delete of state */ + pfsync_delete_state(state); + state->sync_flags |= PFSTATE_NOSYNC; +#endif + pf_unlink_state(state); killed++; } } - pf_purge_expired_states(); - crit_exit(); psk->psk_af = killed; break; } @@ -1476,12 +1668,10 @@ pfioctl(struct dev_ioctl_args *ap) error = ENOMEM; break; } - crit_enter(); - kif = pfi_lookup_create(ps->state.u.ifname); + kif = pfi_kif_get(ps->state.u.ifname); if (kif == NULL) { pool_put(&pf_state_pl, state); error = ENOENT; - crit_exit(); break; } bcopy(&ps->state, state, sizeof(struct pf_state)); @@ -1497,11 +1687,10 @@ pfioctl(struct dev_ioctl_args *ap) state->hash = pf_state_hash(state); if (pf_insert_state(kif, state)) { - pfi_maybe_destroy(kif); + pfi_kif_unref(kif, PFI_KIF_REF_NONE); pool_put(&pf_state_pl, state); error = ENOMEM; } - crit_exit(); break; } @@ -1509,9 +1698,9 @@ pfioctl(struct dev_ioctl_args *ap) struct pfioc_state *ps = (struct pfioc_state *)addr; struct pf_state *state; u_int32_t nr; + int secs; nr = 0; - crit_enter(); RB_FOREACH(state, pf_state_tree_id, &tree_id) { if (nr >= ps->nr) break; @@ -1519,19 +1708,21 @@ pfioctl(struct dev_ioctl_args *ap) } if (state == NULL) { error = EBUSY; - crit_exit(); break; } - bcopy(state, &ps->state, sizeof(struct pf_state)); + secs = time_second; + bcopy(state, &ps->state, sizeof(ps->state)); + strlcpy(ps->state.u.ifname, state->u.s.kif->pfik_name, + sizeof(ps->state.u.ifname)); ps->state.rule.nr = state->rule.ptr->nr; ps->state.nat_rule.nr = (state->nat_rule.ptr == NULL) ? (uint32_t)(-1) : state->nat_rule.ptr->nr; ps->state.anchor.nr = (state->anchor.ptr == NULL) ? - (uint32_t)(-1) : state->anchor.ptr->nr; - crit_exit(); + -1 : state->anchor.ptr->nr; + ps->state.creation = secs - ps->state.creation; ps->state.expire = pf_state_expires(state); - if (ps->state.expire > time_second) - ps->state.expire -= time_second; + if (ps->state.expire > secs) + ps->state.expire -= secs; else ps->state.expire = 0; break; @@ -1540,56 +1731,57 @@ pfioctl(struct dev_ioctl_args *ap) case DIOCGETSTATES: { struct pfioc_states *ps = (struct pfioc_states *)addr; struct pf_state *state; - struct pf_state *p, pstore; - struct pfi_kif *kif; + struct pf_state *p, *pstore; u_int32_t nr = 0; int space = ps->ps_len; if (space == 0) { - crit_enter(); - TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) - nr += kif->pfik_states; - crit_exit(); + nr = pf_status.states; ps->ps_len = sizeof(struct pf_state) * nr; - return (0); + break; } - crit_enter(); + pstore = kmalloc(sizeof(*pstore), M_TEMP, M_WAITOK); + p = ps->ps_states; - TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) - RB_FOREACH(state, pf_state_tree_ext_gwy, - &kif->pfik_ext_gwy) { + + state = TAILQ_FIRST(&state_list); + while (state) { + if (state->timeout != PFTM_UNLINKED) { int secs = time_second; if ((nr+1) * sizeof(*p) > (unsigned)ps->ps_len) break; - bcopy(state, &pstore, sizeof(pstore)); - strlcpy(pstore.u.ifname, kif->pfik_name, - sizeof(pstore.u.ifname)); - pstore.rule.nr = state->rule.ptr->nr; - pstore.nat_rule.nr = (state->nat_rule.ptr == - NULL) ? (uint32_t)(-1) - : state->nat_rule.ptr->nr; - pstore.anchor.nr = (state->anchor.ptr == - NULL) ? (uint32_t)(-1) - : state->anchor.ptr->nr; - pstore.creation = secs - pstore.creation; - pstore.expire = pf_state_expires(state); - if (pstore.expire > secs) - pstore.expire -= secs; + bcopy(state, pstore, sizeof(*pstore)); + strlcpy(pstore->u.ifname, + state->u.s.kif->pfik_name, + sizeof(pstore->u.ifname)); + pstore->rule.nr = state->rule.ptr->nr; + pstore->nat_rule.nr = (state->nat_rule.ptr == + NULL) ? -1 : state->nat_rule.ptr->nr; + pstore->anchor.nr = (state->anchor.ptr == + NULL) ? -1 : state->anchor.ptr->nr; + pstore->creation = secs - pstore->creation; + pstore->expire = pf_state_expires(state); + if (pstore->expire > secs) + pstore->expire -= secs; else - pstore.expire = 0; - error = copyout(&pstore, p, sizeof(*p)); + pstore->expire = 0; + error = copyout(pstore, p, sizeof(*p)); if (error) { - crit_exit(); + kfree(pstore, M_TEMP); goto fail; } p++; nr++; } + state = TAILQ_NEXT(state, u.s.entry_list); + } + ps->ps_len = sizeof(struct pf_state) * nr; - crit_exit(); + + kfree(pstore, M_TEMP); break; } @@ -1619,16 +1811,16 @@ pfioctl(struct dev_ioctl_args *ap) bzero(pf_status.counters, sizeof(pf_status.counters)); bzero(pf_status.fcounters, sizeof(pf_status.fcounters)); bzero(pf_status.scounters, sizeof(pf_status.scounters)); + pf_status.since = time_second; if (*pf_status.ifname) - pfi_clr_istats(pf_status.ifname, NULL, - PFI_FLAG_INSTANCE); + pfi_clr_istats(pf_status.ifname); break; } case DIOCNATLOOK: { struct pfioc_natlook *pnl = (struct pfioc_natlook *)addr; struct pf_state *state; - struct pf_state key; + struct pf_state_cmp key; int m = 0, direction = pnl->direction; key.af = pnl->af; @@ -1637,11 +1829,11 @@ pfioctl(struct dev_ioctl_args *ap) if (!pnl->proto || PF_AZERO(&pnl->saddr, pnl->af) || PF_AZERO(&pnl->daddr, pnl->af) || - !pnl->dport || !pnl->sport) + ((pnl->proto == IPPROTO_TCP || + pnl->proto == IPPROTO_UDP) && + (!pnl->dport || !pnl->sport))) error = EINVAL; else { - crit_enter(); - /* * userland gives us source and dest of connection, * reverse the lookup so we ask for what happens with @@ -1681,7 +1873,6 @@ pfioctl(struct dev_ioctl_args *ap) } } else error = ENOENT; - crit_exit(); } break; } @@ -1696,7 +1887,11 @@ pfioctl(struct dev_ioctl_args *ap) goto fail; } old = pf_default_rule.timeout[pt->timeout]; + if (pt->timeout == PFTM_INTERVAL && pt->seconds == 0) + pt->seconds = 1; pf_default_rule.timeout[pt->timeout] = pt->seconds; + if (pt->timeout == PFTM_INTERVAL && pt->seconds < old) + wakeup(pf_purge_thread); pt->seconds = old; break; } @@ -1748,15 +1943,16 @@ pfioctl(struct dev_ioctl_args *ap) } case DIOCCLRRULECTRS: { + /* obsoleted by DIOCGETRULE with action=PF_GET_CLR_CNTR */ struct pf_ruleset *ruleset = &pf_main_ruleset; struct pf_rule *rule; - crit_enter(); TAILQ_FOREACH(rule, - ruleset->rules[PF_RULESET_FILTER].active.ptr, entries) - rule->evaluations = rule->packets = - rule->bytes = 0; - crit_exit(); + ruleset->rules[PF_RULESET_FILTER].active.ptr, entries) { + rule->evaluations = 0; + rule->packets[0] = rule->packets[1] = 0; + rule->bytes[0] = rule->bytes[1] = 0; + } break; } @@ -1780,72 +1976,38 @@ pfioctl(struct dev_ioctl_args *ap) #ifdef ALTQ case DIOCSTARTALTQ: { struct pf_altq *altq; - struct ifnet *ifp; - struct tb_profile tb; /* enable all altq interfaces on active list */ - crit_enter(); TAILQ_FOREACH(altq, pf_altqs_active, entries) { if (altq->qname[0] == 0) { - if ((ifp = ifunit(altq->ifname)) == NULL) { - error = EINVAL; - break; - } - if (ifp->if_snd.altq_type != ALTQT_NONE) - error = altq_enable(&ifp->if_snd); - if (error != 0) - break; - /* set tokenbucket regulator */ - tb.rate = altq->ifbandwidth; - tb.depth = altq->tbrsize; - error = tbr_set(&ifp->if_snd, &tb); + error = pf_enable_altq(altq); if (error != 0) break; } } - crit_exit(); + if (error == 0) + pf_altq_running = 1; DPFPRINTF(PF_DEBUG_MISC, ("altq: started\n")); break; } case DIOCSTOPALTQ: { struct pf_altq *altq; - struct ifnet *ifp; - struct tb_profile tb; - int err; /* disable all altq interfaces on active list */ - crit_enter(); TAILQ_FOREACH(altq, pf_altqs_active, entries) { if (altq->qname[0] == 0) { - if ((ifp = ifunit(altq->ifname)) == NULL) { - error = EINVAL; + error = pf_disable_altq(altq); + if (error != 0) break; - } - if (ifp->if_snd.altq_type != ALTQT_NONE) { - err = altq_disable(&ifp->if_snd); - if (err != 0 && error == 0) - error = err; - } - /* clear tokenbucket regulator */ - tb.rate = 0; - err = tbr_set(&ifp->if_snd, &tb); - if (err != 0 && error == 0) - error = err; } } - crit_exit(); + if (error == 0) + pf_altq_running = 0; DPFPRINTF(PF_DEBUG_MISC, ("altq: stopped\n")); break; } - case DIOCBEGINALTQS: { - u_int32_t *ticket = (u_int32_t *)addr; - - error = pf_begin_altq(ticket); - break; - } - case DIOCADDALTQ: { struct pfioc_altq *pa = (struct pfioc_altq *)addr; struct pf_altq *altq, *a; @@ -1891,23 +2053,14 @@ pfioctl(struct dev_ioctl_args *ap) break; } - case DIOCCOMMITALTQS: { - u_int32_t ticket = *(u_int32_t *)addr; - - error = pf_commit_altq(ticket); - break; - } - case DIOCGETALTQS: { struct pfioc_altq *pa = (struct pfioc_altq *)addr; struct pf_altq *altq; pa->nr = 0; - crit_enter(); TAILQ_FOREACH(altq, pf_altqs_active, entries) pa->nr++; pa->ticket = ticket_altqs_active; - crit_exit(); break; } @@ -1921,7 +2074,6 @@ pfioctl(struct dev_ioctl_args *ap) break; } nr = 0; - crit_enter(); altq = TAILQ_FIRST(pf_altqs_active); while ((altq != NULL) && (nr < pa->nr)) { altq = TAILQ_NEXT(altq, entries); @@ -1929,11 +2081,9 @@ pfioctl(struct dev_ioctl_args *ap) } if (altq == NULL) { error = EBUSY; - crit_exit(); break; } bcopy(altq, &pa->altq, sizeof(struct pf_altq)); - crit_exit(); break; } @@ -1954,7 +2104,6 @@ pfioctl(struct dev_ioctl_args *ap) } nbytes = pq->nbytes; nr = 0; - crit_enter(); altq = TAILQ_FIRST(pf_altqs_active); while ((altq != NULL) && (nr < pq->nr)) { altq = TAILQ_NEXT(altq, entries); @@ -1962,11 +2111,9 @@ pfioctl(struct dev_ioctl_args *ap) } if (altq == NULL) { error = EBUSY; - crit_exit(); break; } error = altq_getqstats(altq, pq->buf, &nbytes); - crit_exit(); if (error == 0) { pq->scheduler = altq->scheduler; pq->nbytes = nbytes; @@ -1986,6 +2133,10 @@ pfioctl(struct dev_ioctl_args *ap) case DIOCADDADDR: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; + if (pp->ticket != ticket_pabuf) { + error = EBUSY; + break; + } #ifndef INET if (pp->af == AF_INET) { error = EAFNOSUPPORT; @@ -2011,16 +2162,17 @@ pfioctl(struct dev_ioctl_args *ap) } bcopy(&pp->addr, pa, sizeof(struct pf_pooladdr)); if (pa->ifname[0]) { - pa->kif = pfi_attach_rule(pa->ifname); + pa->kif = pfi_kif_get(pa->ifname); if (pa->kif == NULL) { pool_put(&pf_pooladdr_pl, pa); error = EINVAL; break; } + pfi_kif_ref(pa->kif, PFI_KIF_REF_RULE); } if (pfi_dynaddr_setup(&pa->addr, pp->af)) { pfi_dynaddr_remove(&pa->addr); - pfi_detach_rule(pa->kif); + pfi_kif_unref(pa->kif, PFI_KIF_REF_RULE); pool_put(&pf_pooladdr_pl, pa); error = EINVAL; break; @@ -2033,17 +2185,14 @@ pfioctl(struct dev_ioctl_args *ap) struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; pp->nr = 0; - crit_enter(); - pool = pf_get_pool(pp->anchor, pp->ruleset, pp->ticket, - pp->r_action, pp->r_num, 0, 1, 0); + pool = pf_get_pool(pp->anchor, pp->ticket, pp->r_action, + pp->r_num, 0, 1, 0); if (pool == NULL) { error = EBUSY; - crit_exit(); break; } TAILQ_FOREACH(pa, &pool->list, entries) pp->nr++; - crit_exit(); break; } @@ -2051,12 +2200,10 @@ pfioctl(struct dev_ioctl_args *ap) struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; u_int32_t nr = 0; - crit_enter(); - pool = pf_get_pool(pp->anchor, pp->ruleset, pp->ticket, - pp->r_action, pp->r_num, 0, 1, 1); + pool = pf_get_pool(pp->anchor, pp->ticket, pp->r_action, + pp->r_num, 0, 1, 1); if (pool == NULL) { error = EBUSY; - crit_exit(); break; } pa = TAILQ_FIRST(&pool->list); @@ -2066,13 +2213,12 @@ pfioctl(struct dev_ioctl_args *ap) } if (pa == NULL) { error = EBUSY; - crit_exit(); break; } bcopy(pa, &pp->addr, sizeof(struct pf_pooladdr)); pfi_dynaddr_copyout(&pp->addr.addr); pf_tbladdr_copyout(&pp->addr.addr); - crit_exit(); + pf_rtlabel_copyout(&pp->addr.addr); break; } @@ -2093,13 +2239,13 @@ pfioctl(struct dev_ioctl_args *ap) break; } - ruleset = pf_find_ruleset(pca->anchor, pca->ruleset); + ruleset = pf_find_ruleset(pca->anchor); if (ruleset == NULL) { error = EBUSY; break; } - pool = pf_get_pool(pca->anchor, pca->ruleset, pca->ticket, - pca->r_action, pca->r_num, pca->r_last, 1, 1); + pool = pf_get_pool(pca->anchor, pca->ticket, pca->r_action, + pca->r_num, pca->r_last, 1, 1); if (pool == NULL) { error = EBUSY; break; @@ -2126,26 +2272,25 @@ pfioctl(struct dev_ioctl_args *ap) } #endif /* INET6 */ if (newpa->ifname[0]) { - newpa->kif = pfi_attach_rule(newpa->ifname); + newpa->kif = pfi_kif_get(newpa->ifname); if (newpa->kif == NULL) { pool_put(&pf_pooladdr_pl, newpa); error = EINVAL; break; } + pfi_kif_ref(newpa->kif, PFI_KIF_REF_RULE); } else newpa->kif = NULL; if (pfi_dynaddr_setup(&newpa->addr, pca->af) || pf_tbladdr_setup(ruleset, &newpa->addr)) { pfi_dynaddr_remove(&newpa->addr); - pfi_detach_rule(newpa->kif); + pfi_kif_unref(newpa->kif, PFI_KIF_REF_RULE); pool_put(&pf_pooladdr_pl, newpa); error = EINVAL; break; } } - crit_enter(); - if (pca->action == PF_CHANGE_ADD_HEAD) oldpa = TAILQ_FIRST(&pool->list); else if (pca->action == PF_CHANGE_ADD_TAIL) @@ -2160,7 +2305,6 @@ pfioctl(struct dev_ioctl_args *ap) } if (oldpa == NULL) { error = EINVAL; - crit_exit(); break; } } @@ -2169,7 +2313,7 @@ pfioctl(struct dev_ioctl_args *ap) TAILQ_REMOVE(&pool->list, oldpa, entries); pfi_dynaddr_remove(&oldpa->addr); pf_tbladdr_remove(&oldpa->addr); - pfi_detach_rule(oldpa->kif); + pfi_kif_unref(oldpa->kif, PFI_KIF_REF_RULE); pool_put(&pf_pooladdr_pl, oldpa); } else { if (oldpa == NULL) @@ -2185,72 +2329,64 @@ pfioctl(struct dev_ioctl_args *ap) pool->cur = TAILQ_FIRST(&pool->list); PF_ACPY(&pool->counter, &pool->cur->addr.v.a.addr, pca->af); - crit_exit(); - break; - } - - case DIOCGETANCHORS: { - struct pfioc_anchor *pa = (struct pfioc_anchor *)addr; - struct pf_anchor *anchor; - - pa->nr = 0; - TAILQ_FOREACH(anchor, &pf_anchors, entries) - pa->nr++; - break; - } - - case DIOCGETANCHOR: { - struct pfioc_anchor *pa = (struct pfioc_anchor *)addr; - struct pf_anchor *anchor; - u_int32_t nr = 0; - - anchor = TAILQ_FIRST(&pf_anchors); - while (anchor != NULL && nr < pa->nr) { - anchor = TAILQ_NEXT(anchor, entries); - nr++; - } - if (anchor == NULL) - error = EBUSY; - else - bcopy(anchor->name, pa->name, sizeof(pa->name)); break; } case DIOCGETRULESETS: { struct pfioc_ruleset *pr = (struct pfioc_ruleset *)addr; - struct pf_anchor *anchor; struct pf_ruleset *ruleset; + struct pf_anchor *anchor; - pr->anchor[PF_ANCHOR_NAME_SIZE-1] = 0; - if ((anchor = pf_find_anchor(pr->anchor)) == NULL) { + pr->path[sizeof(pr->path) - 1] = 0; + if ((ruleset = pf_find_ruleset(pr->path)) == NULL) { error = EINVAL; break; } pr->nr = 0; - TAILQ_FOREACH(ruleset, &anchor->rulesets, entries) - pr->nr++; + if (ruleset->anchor == NULL) { + /* XXX kludge for pf_main_ruleset */ + RB_FOREACH(anchor, pf_anchor_global, &pf_anchors) + if (anchor->parent == NULL) + pr->nr++; + } else { + RB_FOREACH(anchor, pf_anchor_node, + &ruleset->anchor->children) + pr->nr++; + } break; } case DIOCGETRULESET: { struct pfioc_ruleset *pr = (struct pfioc_ruleset *)addr; - struct pf_anchor *anchor; struct pf_ruleset *ruleset; + struct pf_anchor *anchor; u_int32_t nr = 0; - if ((anchor = pf_find_anchor(pr->anchor)) == NULL) { + pr->path[sizeof(pr->path) - 1] = 0; + if ((ruleset = pf_find_ruleset(pr->path)) == NULL) { error = EINVAL; break; } - ruleset = TAILQ_FIRST(&anchor->rulesets); - while (ruleset != NULL && nr < pr->nr) { - ruleset = TAILQ_NEXT(ruleset, entries); - nr++; + pr->name[0] = 0; + if (ruleset->anchor == NULL) { + /* XXX kludge for pf_main_ruleset */ + RB_FOREACH(anchor, pf_anchor_global, &pf_anchors) + if (anchor->parent == NULL && nr++ == pr->nr) { + strlcpy(pr->name, anchor->name, + sizeof(pr->name)); + break; + } + } else { + RB_FOREACH(anchor, pf_anchor_node, + &ruleset->anchor->children) + if (nr++ == pr->nr) { + strlcpy(pr->name, anchor->name, + sizeof(pr->name)); + break; + } } - if (ruleset == NULL) + if (!pr->name[0]) error = EBUSY; - else - bcopy(ruleset->name, pr->name, sizeof(pr->name)); break; } @@ -2387,7 +2523,7 @@ pfioctl(struct dev_ioctl_args *ap) error = pfr_set_addrs(&io->pfrio_table, io->pfrio_buffer, io->pfrio_size, &io->pfrio_size2, &io->pfrio_nadd, &io->pfrio_ndel, &io->pfrio_nchange, io->pfrio_flags | - PFR_FLAG_USERIOCTL); + PFR_FLAG_USERIOCTL, 0); break; } @@ -2441,31 +2577,6 @@ pfioctl(struct dev_ioctl_args *ap) break; } - case DIOCRINABEGIN: { - struct pfioc_table *io = (struct pfioc_table *)addr; - - if (io->pfrio_esize != 0) { - error = ENODEV; - break; - } - error = pfr_ina_begin(&io->pfrio_table, &io->pfrio_ticket, - &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); - break; - } - - case DIOCRINACOMMIT: { - struct pfioc_table *io = (struct pfioc_table *)addr; - - if (io->pfrio_esize != 0) { - error = ENODEV; - break; - } - error = pfr_ina_commit(&io->pfrio_table, io->pfrio_ticket, - &io->pfrio_nadd, &io->pfrio_nchange, io->pfrio_flags | - PFR_FLAG_USERIOCTL); - break; - } - case DIOCRINADEFINE: { struct pfioc_table *io = (struct pfioc_table *)addr; @@ -2481,166 +2592,214 @@ pfioctl(struct dev_ioctl_args *ap) case DIOCOSFPADD: { struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr; - crit_enter(); error = pf_osfp_add(io); - crit_exit(); break; } case DIOCOSFPGET: { struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr; - crit_enter(); error = pf_osfp_get(io); - crit_exit(); break; } case DIOCXBEGIN: { struct pfioc_trans *io = (struct pfioc_trans *)addr; - struct pfioc_trans_e ioe; - struct pfr_table table; + struct pfioc_trans_e *ioe; + struct pfr_table *table; int i; - if (io->esize != sizeof(ioe)) { + if (io->esize != sizeof(*ioe)) { error = ENODEV; goto fail; } + ioe = (struct pfioc_trans_e *)kmalloc(sizeof(*ioe), + M_TEMP, M_WAITOK); + table = (struct pfr_table *)kmalloc(sizeof(*table), + M_TEMP, M_WAITOK); for (i = 0; i < io->size; i++) { - if (copyin(io->array+i, &ioe, sizeof(ioe))) { + if (copyin(io->array+i, ioe, sizeof(*ioe))) { + kfree(table, M_TEMP); + kfree(ioe, M_TEMP); error = EFAULT; goto fail; } - switch (ioe.rs_num) { + switch (ioe->rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: - if (ioe.anchor[0] || ioe.ruleset[0]) { + if (ioe->anchor[0]) { + kfree(table, M_TEMP); + kfree(ioe, M_TEMP); error = EINVAL; goto fail; } - if ((error = pf_begin_altq(&ioe.ticket))) + if ((error = pf_begin_altq(&ioe->ticket))) { + kfree(table, M_TEMP); + kfree(ioe, M_TEMP); goto fail; + } break; #endif /* ALTQ */ case PF_RULESET_TABLE: - bzero(&table, sizeof(table)); - strlcpy(table.pfrt_anchor, ioe.anchor, - sizeof(table.pfrt_anchor)); - strlcpy(table.pfrt_ruleset, ioe.ruleset, - sizeof(table.pfrt_ruleset)); - if ((error = pfr_ina_begin(&table, - &ioe.ticket, NULL, 0))) + bzero(table, sizeof(*table)); + strlcpy(table->pfrt_anchor, ioe->anchor, + sizeof(table->pfrt_anchor)); + if ((error = pfr_ina_begin(table, + &ioe->ticket, NULL, 0))) { + kfree(table, M_TEMP); + kfree(ioe, M_TEMP); goto fail; + } break; default: - if ((error = pf_begin_rules(&ioe.ticket, - ioe.rs_num, ioe.anchor, ioe.ruleset))) + if ((error = pf_begin_rules(&ioe->ticket, + ioe->rs_num, ioe->anchor))) { + kfree(table, M_TEMP); + kfree(ioe, M_TEMP); goto fail; + } break; } - if (copyout(&ioe, io->array+i, sizeof(io->array[i]))) { + if (copyout(ioe, io->array+i, sizeof(io->array[i]))) { + kfree(table, M_TEMP); + kfree(ioe, M_TEMP); error = EFAULT; goto fail; } } + kfree(table, M_TEMP); + kfree(ioe, M_TEMP); break; } case DIOCXROLLBACK: { struct pfioc_trans *io = (struct pfioc_trans *)addr; - struct pfioc_trans_e ioe; - struct pfr_table table; + struct pfioc_trans_e *ioe; + struct pfr_table *table; int i; - if (io->esize != sizeof(ioe)) { + if (io->esize != sizeof(*ioe)) { error = ENODEV; goto fail; } + ioe = (struct pfioc_trans_e *)kmalloc(sizeof(*ioe), + M_TEMP, M_WAITOK); + table = (struct pfr_table *)kmalloc(sizeof(*table), + M_TEMP, M_WAITOK); for (i = 0; i < io->size; i++) { - if (copyin(io->array+i, &ioe, sizeof(ioe))) { + if (copyin(io->array+i, ioe, sizeof(*ioe))) { + kfree(table, M_TEMP); + kfree(ioe, M_TEMP); error = EFAULT; goto fail; } - switch (ioe.rs_num) { + switch (ioe->rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: - if (ioe.anchor[0] || ioe.ruleset[0]) { + if (ioe->anchor[0]) { + kfree(table, M_TEMP); + kfree(ioe, M_TEMP); error = EINVAL; goto fail; } - if ((error = pf_rollback_altq(ioe.ticket))) + if ((error = pf_rollback_altq(ioe->ticket))) { + kfree(table, M_TEMP); + kfree(ioe, M_TEMP); goto fail; /* really bad */ + } break; #endif /* ALTQ */ case PF_RULESET_TABLE: - bzero(&table, sizeof(table)); - strlcpy(table.pfrt_anchor, ioe.anchor, - sizeof(table.pfrt_anchor)); - strlcpy(table.pfrt_ruleset, ioe.ruleset, - sizeof(table.pfrt_ruleset)); - if ((error = pfr_ina_rollback(&table, - ioe.ticket, NULL, 0))) + bzero(table, sizeof(*table)); + strlcpy(table->pfrt_anchor, ioe->anchor, + sizeof(table->pfrt_anchor)); + if ((error = pfr_ina_rollback(table, + ioe->ticket, NULL, 0))) { + kfree(table, M_TEMP); + kfree(ioe, M_TEMP); goto fail; /* really bad */ + } break; default: - if ((error = pf_rollback_rules(ioe.ticket, - ioe.rs_num, ioe.anchor, ioe.ruleset))) + if ((error = pf_rollback_rules(ioe->ticket, + ioe->rs_num, ioe->anchor))) { + kfree(table, M_TEMP); + kfree(ioe, M_TEMP); goto fail; /* really bad */ + } break; } } + kfree(table, M_TEMP); + kfree(ioe, M_TEMP); break; } case DIOCXCOMMIT: { struct pfioc_trans *io = (struct pfioc_trans *)addr; - struct pfioc_trans_e ioe; - struct pfr_table table; + struct pfioc_trans_e *ioe; + struct pfr_table *table; struct pf_ruleset *rs; int i; - if (io->esize != sizeof(ioe)) { + if (io->esize != sizeof(*ioe)) { error = ENODEV; goto fail; } + ioe = (struct pfioc_trans_e *)kmalloc(sizeof(*ioe), + M_TEMP, M_WAITOK); + table = (struct pfr_table *)kmalloc(sizeof(*table), + M_TEMP, M_WAITOK); /* first makes sure everything will succeed */ for (i = 0; i < io->size; i++) { - if (copyin(io->array+i, &ioe, sizeof(ioe))) { + if (copyin(io->array+i, ioe, sizeof(*ioe))) { + kfree(table, M_TEMP); + kfree(ioe, M_TEMP); error = EFAULT; goto fail; } - switch (ioe.rs_num) { + switch (ioe->rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: - if (ioe.anchor[0] || ioe.ruleset[0]) { + if (ioe->anchor[0]) { + kfree(table, M_TEMP); + kfree(ioe, M_TEMP); error = EINVAL; goto fail; } - if (!altqs_inactive_open || ioe.ticket != + if (!altqs_inactive_open || ioe->ticket != ticket_altqs_inactive) { + kfree(table, M_TEMP); + kfree(ioe, M_TEMP); error = EBUSY; goto fail; } break; #endif /* ALTQ */ case PF_RULESET_TABLE: - rs = pf_find_ruleset(ioe.anchor, ioe.ruleset); - if (rs == NULL || !rs->topen || ioe.ticket != + rs = pf_find_ruleset(ioe->anchor); + if (rs == NULL || !rs->topen || ioe->ticket != rs->tticket) { + kfree(table, M_TEMP); + kfree(ioe, M_TEMP); error = EBUSY; goto fail; } break; default: - if (ioe.rs_num < 0 || ioe.rs_num >= + if (ioe->rs_num < 0 || ioe->rs_num >= PF_RULESET_MAX) { + kfree(table, M_TEMP); + kfree(ioe, M_TEMP); error = EINVAL; goto fail; } - rs = pf_find_ruleset(ioe.anchor, ioe.ruleset); + rs = pf_find_ruleset(ioe->anchor); if (rs == NULL || - !rs->rules[ioe.rs_num].inactive.open || - rs->rules[ioe.rs_num].inactive.ticket != - ioe.ticket) { + !rs->rules[ioe->rs_num].inactive.open || + rs->rules[ioe->rs_num].inactive.ticket != + ioe->ticket) { + kfree(table, M_TEMP); + kfree(ioe, M_TEMP); error = EBUSY; goto fail; } @@ -2649,79 +2808,99 @@ pfioctl(struct dev_ioctl_args *ap) } /* now do the commit - no errors should happen here */ for (i = 0; i < io->size; i++) { - if (copyin(io->array+i, &ioe, sizeof(ioe))) { + if (copyin(io->array+i, ioe, sizeof(*ioe))) { + kfree(table, M_TEMP); + kfree(ioe, M_TEMP); error = EFAULT; goto fail; } - switch (ioe.rs_num) { + switch (ioe->rs_num) { #ifdef ALTQ case PF_RULESET_ALTQ: - if ((error = pf_commit_altq(ioe.ticket))) + if ((error = pf_commit_altq(ioe->ticket))) { + kfree(table, M_TEMP); + kfree(ioe, M_TEMP); goto fail; /* really bad */ + } break; #endif /* ALTQ */ case PF_RULESET_TABLE: - bzero(&table, sizeof(table)); - strlcpy(table.pfrt_anchor, ioe.anchor, - sizeof(table.pfrt_anchor)); - strlcpy(table.pfrt_ruleset, ioe.ruleset, - sizeof(table.pfrt_ruleset)); - if ((error = pfr_ina_commit(&table, ioe.ticket, - NULL, NULL, 0))) + bzero(table, sizeof(*table)); + strlcpy(table->pfrt_anchor, ioe->anchor, + sizeof(table->pfrt_anchor)); + if ((error = pfr_ina_commit(table, ioe->ticket, + NULL, NULL, 0))) { + kfree(table, M_TEMP); + kfree(ioe, M_TEMP); goto fail; /* really bad */ + } break; default: - if ((error = pf_commit_rules(ioe.ticket, - ioe.rs_num, ioe.anchor, ioe.ruleset))) + if ((error = pf_commit_rules(ioe->ticket, + ioe->rs_num, ioe->anchor))) { + kfree(table, M_TEMP); + kfree(ioe, M_TEMP); goto fail; /* really bad */ + } break; } } + kfree(table, M_TEMP); + kfree(ioe, M_TEMP); break; } case DIOCGETSRCNODES: { struct pfioc_src_nodes *psn = (struct pfioc_src_nodes *)addr; - struct pf_src_node *n; - struct pf_src_node *p, pstore; + struct pf_src_node *n, *p, *pstore; u_int32_t nr = 0; int space = psn->psn_len; if (space == 0) { - crit_enter(); RB_FOREACH(n, pf_src_tree, &tree_src_tracking) nr++; - crit_exit(); psn->psn_len = sizeof(struct pf_src_node) * nr; - return (0); + break; } - crit_enter(); + pstore = kmalloc(sizeof(*pstore), M_TEMP, M_WAITOK); + p = psn->psn_src_nodes; RB_FOREACH(n, pf_src_tree, &tree_src_tracking) { - int secs = time_second; + int secs = time_second, diff; if ((nr + 1) * sizeof(*p) > (unsigned)psn->psn_len) break; - bcopy(n, &pstore, sizeof(pstore)); + bcopy(n, pstore, sizeof(*pstore)); if (n->rule.ptr != NULL) - pstore.rule.nr = n->rule.ptr->nr; - pstore.creation = secs - pstore.creation; - if (pstore.expire > secs) - pstore.expire -= secs; + pstore->rule.nr = n->rule.ptr->nr; + pstore->creation = secs - pstore->creation; + if (pstore->expire > secs) + pstore->expire -= secs; + else + pstore->expire = 0; + + /* adjust the connection rate estimate */ + diff = secs - n->conn_rate.last; + if (diff >= n->conn_rate.seconds) + pstore->conn_rate.count = 0; else - pstore.expire = 0; - error = copyout(&pstore, p, sizeof(*p)); + pstore->conn_rate.count -= + n->conn_rate.count * diff / + n->conn_rate.seconds; + + error = copyout(pstore, p, sizeof(*p)); if (error) { - crit_exit(); + kfree(pstore, M_TEMP); goto fail; } p++; nr++; } psn->psn_len = sizeof(struct pf_src_node) * nr; - crit_exit(); + + kfree(pstore, M_TEMP); break; } @@ -2729,7 +2908,6 @@ pfioctl(struct dev_ioctl_args *ap) struct pf_src_node *n; struct pf_state *state; - crit_enter(); RB_FOREACH(state, pf_state_tree_id, &tree_id) { state->src_node = NULL; state->nat_src_node = NULL; @@ -2738,20 +2916,57 @@ pfioctl(struct dev_ioctl_args *ap) n->expire = 1; n->states = 0; } - pf_purge_expired_src_nodes(); + pf_purge_expired_src_nodes(1); pf_status.src_nodes = 0; - crit_exit(); + break; + } + + case DIOCKILLSRCNODES: { + struct pf_src_node *sn; + struct pf_state *s; + struct pfioc_src_node_kill *psnk = \ + (struct pfioc_src_node_kill *) addr; + int killed = 0; + + RB_FOREACH(sn, pf_src_tree, &tree_src_tracking) { + if (PF_MATCHA(psnk->psnk_src.neg, \ + &psnk->psnk_src.addr.v.a.addr, \ + &psnk->psnk_src.addr.v.a.mask, \ + &sn->addr, sn->af) && + PF_MATCHA(psnk->psnk_dst.neg, \ + &psnk->psnk_dst.addr.v.a.addr, \ + &psnk->psnk_dst.addr.v.a.mask, \ + &sn->raddr, sn->af)) { + /* Handle state to src_node linkage */ + if (sn->states != 0) { + RB_FOREACH(s, pf_state_tree_id, + &tree_id) { + if (s->src_node == sn) + s->src_node = NULL; + if (s->nat_src_node == sn) + s->nat_src_node = NULL; + } + sn->states = 0; + } + sn->expire = 1; + killed++; + } + } + + if (killed > 0) + pf_purge_expired_src_nodes(1); + + psnk->psnk_af = killed; break; } case DIOCSETHOSTID: { u_int32_t *hostid = (u_int32_t *)addr; - if (*hostid == 0) { - error = EINVAL; - goto fail; - } - pf_status.hostid = *hostid; + if (*hostid == 0) + pf_status.hostid = karc4random(); + else + pf_status.hostid = *hostid; break; } @@ -2764,20 +2979,26 @@ pfioctl(struct dev_ioctl_args *ap) case DIOCIGETIFACES: { struct pfioc_iface *io = (struct pfioc_iface *)addr; - if (io->pfiio_esize != sizeof(struct pfi_if)) { + if (io->pfiio_esize != sizeof(struct pfi_kif)) { error = ENODEV; break; } error = pfi_get_ifaces(io->pfiio_name, io->pfiio_buffer, - &io->pfiio_size, io->pfiio_flags); + &io->pfiio_size); + break; + } + + case DIOCSETIFFLAG: { + struct pfioc_iface *io = (struct pfioc_iface *)addr; + + error = pfi_set_flags(io->pfiio_name, io->pfiio_flags); break; } - case DIOCICLRISTATS: { + case DIOCCLRIFFLAG: { struct pfioc_iface *io = (struct pfioc_iface *)addr; - error = pfi_clr_istats(io->pfiio_name, &io->pfiio_nzero, - io->pfiio_flags); + error = pfi_clear_flags(io->pfiio_name, io->pfiio_flags); break; } @@ -2803,8 +3024,8 @@ pf_clear_states(void) /* don't send out individual delete messages */ state->sync_flags = PFSTATE_NOSYNC; #endif + pf_unlink_state(state); } - pf_purge_expired_states(); pf_status.states = 0; #if 0 /* NPFSYNC */ /* @@ -2842,7 +3063,7 @@ pf_clear_srcnodes(void) n->expire = 1; n->states = 0; } - pf_purge_expired_src_nodes(); + pf_purge_expired_src_nodes(0); pf_status.src_nodes = 0; } /* @@ -2859,42 +3080,37 @@ shutdown_pf(void) u_int32_t t[5]; char nn = '\0'; - callout_stop(&pf_expire_to); - pf_status.running = 0; do { - if ((error = pf_begin_rules(&t[0], PF_RULESET_SCRUB, &nn, - &nn)) != 0) { + if ((error = pf_begin_rules(&t[0], PF_RULESET_SCRUB, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: SCRUB\n")); break; } - if ((error = pf_begin_rules(&t[1], PF_RULESET_FILTER, &nn, - &nn)) != 0) { + if ((error = pf_begin_rules(&t[1], PF_RULESET_FILTER, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: FILTER\n")); break; /* XXX: rollback? */ } - if ((error = pf_begin_rules(&t[2], PF_RULESET_NAT, &nn, &nn)) - != 0) { + if ((error = pf_begin_rules(&t[2], PF_RULESET_NAT, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: NAT\n")); break; /* XXX: rollback? */ } - if ((error = pf_begin_rules(&t[3], PF_RULESET_BINAT, &nn, &nn)) + if ((error = pf_begin_rules(&t[3], PF_RULESET_BINAT, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: BINAT\n")); break; /* XXX: rollback? */ } - if ((error = pf_begin_rules(&t[4], PF_RULESET_RDR, &nn, &nn)) + if ((error = pf_begin_rules(&t[4], PF_RULESET_RDR, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: RDR\n")); break; /* XXX: rollback? */ } /* XXX: these should always succeed here */ - pf_commit_rules(t[0], PF_RULESET_SCRUB, &nn, &nn); - pf_commit_rules(t[1], PF_RULESET_FILTER, &nn, &nn); - pf_commit_rules(t[2], PF_RULESET_NAT, &nn, &nn); - pf_commit_rules(t[3], PF_RULESET_BINAT, &nn, &nn); - pf_commit_rules(t[4], PF_RULESET_RDR, &nn, &nn); + pf_commit_rules(t[0], PF_RULESET_SCRUB, &nn); + pf_commit_rules(t[1], PF_RULESET_FILTER, &nn); + pf_commit_rules(t[2], PF_RULESET_NAT, &nn); + pf_commit_rules(t[3], PF_RULESET_BINAT, &nn); + pf_commit_rules(t[4], PF_RULESET_RDR, &nn); if ((error = pf_clear_tables()) != 0) break; @@ -2928,7 +3144,7 @@ pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir) */ int chk; - chk = pf_test(PF_IN, ifp, m); + chk = pf_test(PF_IN, ifp, m, NULL, NULL); if (chk && *m) { m_freem(*m); *m = NULL; @@ -2951,7 +3167,7 @@ pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir) in_delayed_cksum(*m); (*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } - chk = pf_test(PF_OUT, ifp, m); + chk = pf_test(PF_OUT, ifp, m, NULL, NULL); if (chk && *m) { m_freem(*m); *m = NULL; @@ -2968,7 +3184,7 @@ pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir) */ int chk; - chk = pf_test6(PF_IN, ifp, m); + chk = pf_test6(PF_IN, ifp, m, NULL, NULL); if (chk && *m) { m_freem(*m); *m = NULL; @@ -2989,7 +3205,7 @@ pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir) in_delayed_cksum(*m); (*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } - chk = pf_test6(PF_OUT, ifp, m); + chk = pf_test6(PF_OUT, ifp, m, NULL, NULL); if (chk && *m) { m_freem(*m); *m = NULL; @@ -3063,12 +3279,19 @@ pf_load(void) int error; init_zone_var(); + init_pf_lck(); + kprintf("pf_load() p1\n"); /*XXX*/ pf_dev = make_dev(&pf_ops, 0, 0, 0, 0600, PF_NAME); + kprintf("pf_load() p2\n"); /*XXX*/ error = pfattach(); + kprintf("pf_load() p3\n"); /*XXX*/ if (error) { dev_ops_remove_all(&pf_ops); + destroy_pf_lck(); return (error); } + lockinit(&pf_consistency_lock, "pfconslck", 0, LK_CANRECURSE); + kprintf("pf_load() p4\n"); /*XXX*/ return (0); } @@ -3089,11 +3312,19 @@ pf_unload(void) return error; } shutdown_pf(); + pf_end_threads = 1; + while (pf_end_threads < 2) { + wakeup_one(pf_purge_thread); + lksleep(pf_purge_thread, &pf_task_lck, 0, "pftmo", hz); + + } pfi_cleanup(); pf_osfp_flush(); pf_osfp_cleanup(); cleanup_pf_zone(); dev_ops_remove_all(&pf_ops); + lockuninit(&pf_consistency_lock); + destroy_pf_lck(); return 0; } @@ -3122,6 +3353,5 @@ static moduledata_t pf_mod = { pf_modevent, 0 }; - DECLARE_MODULE(pf, pf_mod, SI_SUB_PSEUDO, SI_ORDER_FIRST); MODULE_VERSION(pf, PF_MODVER); diff --git a/sys/net/pf/pf_norm.c b/sys/net/pf/pf_norm.c index b02b364397..fdaae265fe 100644 --- a/sys/net/pf/pf_norm.c +++ b/sys/net/pf/pf_norm.c @@ -2,9 +2,10 @@ /* $OpenBSD: pf_norm.c,v 1.80.2.1 2004/04/30 21:46:33 brad Exp $ */ /* add $OpenBSD: pf_norm.c,v 1.87 2004/05/11 07:34:11 dhartmei Exp $ */ /* $DragonFly: src/sys/net/pf/pf_norm.c,v 1.10 2008/09/04 09:08:22 hasso Exp $ */ +/* $OpenBSD: pf_norm.c,v 1.107 2006/04/16 00:59:52 pascoe Exp $ */ /* - * Copyright (c) 2004 The DragonFly Project. All rights reserved. + * Copyright (c) 2010 The DragonFly Project. All rights reserved. * * Copyright 2001 Niels Provos * All rights reserved. @@ -65,12 +66,6 @@ #include -#ifdef INET6 -/* - * XXX: This should go to netinet/ip6.h (KAME) - */ -#endif /* INET6 */ - #define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */ #define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */ #define PFFRAG_DROP 0x0004 /* Drop all fragments */ @@ -80,8 +75,8 @@ TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue; TAILQ_HEAD(pf_cachequeue, pf_fragment) pf_cachequeue; -static int pf_frag_compare(struct pf_fragment *, - struct pf_fragment *); +static __inline int pf_frag_compare(struct pf_fragment *, + struct pf_fragment *); RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree; RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); @@ -96,12 +91,15 @@ struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **, struct pf_frent *, int); struct mbuf *pf_fragcache(struct mbuf **, struct ip*, struct pf_fragment **, int, int, int *); -u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t); int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *, struct tcphdr *, int); -#define DPFPRINTF(x) if (pf_status.debug >= PF_DEBUG_MISC) \ - { kprintf("%s: ", __func__); kprintf x ;} +#define DPFPRINTF(x) do { \ + if (pf_status.debug >= PF_DEBUG_MISC) { \ + kprintf("%s: ", __func__); \ + kprintf x ; \ + } \ +} while(0) /* Globals */ vm_zone_t pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl; @@ -122,7 +120,7 @@ pf_normalize_init(void) TAILQ_INIT(&pf_cachequeue); } -static int +static __inline int pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b) { int diff; @@ -292,7 +290,7 @@ pf_remove_fragment(struct pf_fragment *frag) } } -#define FR_IP_OFF(fr) (((fr)->fr_ip->ip_off & IP_OFFMASK) << 3) +#define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3) struct mbuf * pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, struct pf_frent *frent, int mff) @@ -390,7 +388,7 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, break; } - /* This fragment is completely overlapped, loose it */ + /* This fragment is completely overlapped, lose it */ next = LIST_NEXT(frea, fr_next); m_freem(frea->fr_m); LIST_REMOVE(frea, fr_next); @@ -459,23 +457,9 @@ pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, m2 = frent->fr_m; pool_put(&pf_frent_pl, frent); pf_nfrents--; - m->m_pkthdr.csum_flags &= m2->m_pkthdr.csum_flags; - m->m_pkthdr.csum_data += m2->m_pkthdr.csum_data; m_cat(m, m2); } - /* - * Note: this 1's complement optimization with <= 65535 fragments. - * - * Handle 1's complement carry for the 16 bit result. This can - * result in another carry which must also be handled. - */ - m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) + - (m->m_pkthdr.csum_data >> 16); - if (m->m_pkthdr.csum_data > 0xFFFF) - m->m_pkthdr.csum_data -= 0xFFFF; - - ip->ip_src = (*frag)->fr_src; ip->ip_dst = (*frag)->fr_dst; @@ -815,7 +799,8 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, } int -pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason) +pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, + struct pf_pdesc *pd) { struct mbuf *m = *m0; struct pf_rule *r; @@ -832,8 +817,7 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason) r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != dir) r = r->skip[PF_SKIP_DIR].ptr; @@ -842,19 +826,23 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason) else if (r->proto && r->proto != h->ip_p) r = r->skip[PF_SKIP_PROTO].ptr; else if (PF_MISMATCHAW(&r->src.addr, - (struct pf_addr *)&h->ip_src.s_addr, AF_INET, r->src.not)) + (struct pf_addr *)&h->ip_src.s_addr, AF_INET, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (PF_MISMATCHAW(&r->dst.addr, - (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, r->dst.not)) + (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else break; } - if (r == NULL) + if (r == NULL || r->action == PF_NOSCRUB) return (PF_PASS); - else - r->packets++; + else { + r->packets[dir == PF_OUT]++; + r->bytes[dir == PF_OUT] += pd->tot_len; + } /* Check for illegal packets */ if (hlen < (int)sizeof(struct ip)) @@ -864,8 +852,12 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason) goto drop; /* Clear IP_DF if the rule uses the no-df option */ - if (r->rule_flag & PFRULE_NODF) + if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) { + u_int16_t ip_off = h->ip_off; + h->ip_off &= ~IP_DF; + h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0); + } /* We will need other tests here */ if (!fragoff && !mff) @@ -923,6 +915,18 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason) if (m == NULL) return (PF_DROP); + /* use mtag from concatenated mbuf chain */ + pd->pf_mtag = pf_find_mtag(m); +#ifdef DIAGNOSTIC + if (pd->pf_mtag == NULL) { + kprintf("%s: pf_find_mtag returned NULL(1)\n", __func__); + if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) { + m_freem(m); + *m0 = NULL; + goto no_mem; + } + } +#endif if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) goto drop; @@ -931,14 +935,13 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason) /* non-buffering fragment cache (drops or masks overlaps) */ int nomem = 0; - if (dir == PF_OUT) { - if (m->m_pkthdr.fw_flags & PF_MBUF_FRAGCACHE) { - /* Already passed the fragment cache in the - * input direction. If we continued, it would - * appear to be a dup and would be dropped. - */ - goto fragment_pass; - } + if (dir == PF_OUT && pd->pf_mtag->flags & PF_TAG_FRAGCACHE) { + /* + * Already passed the fragment cache in the + * input direction. If we continued, it would + * appear to be a dup and would be dropped. + */ + goto fragment_pass; } frag = pf_find_fragment(h, &pf_cache_tree); @@ -959,8 +962,20 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason) goto drop; } + /* use mtag from copied and trimmed mbuf chain */ + pd->pf_mtag = pf_find_mtag(m); +#ifdef DIAGNOSTIC + if (pd->pf_mtag == NULL) { + kprintf("%s: pf_find_mtag returned NULL(2)\n", __func__); + if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) { + m_freem(m); + *m0 = NULL; + goto no_mem; + } + } +#endif if (dir == PF_IN) - m->m_pkthdr.fw_flags |= PF_MBUF_FRAGCACHE; + pd->pf_mtag->flags |= PF_TAG_FRAGCACHE; if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) goto drop; @@ -969,39 +984,54 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason) no_fragment: /* At this point, only IP_DF is allowed in ip_off */ - h->ip_off &= IP_DF; + if (h->ip_off & IP_DF) { + u_int16_t ip_off = h->ip_off; + + h->ip_off &= IP_DF; + h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0); + } /* Enforce a minimum ttl, may cause endless packet loops */ - if (r->min_ttl && h->ip_ttl < r->min_ttl) + if (r->min_ttl && h->ip_ttl < r->min_ttl) { + u_int16_t ip_ttl = h->ip_ttl; + h->ip_ttl = r->min_ttl; + h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); + } if (r->rule_flag & PFRULE_RANDOMID) { -#ifdef RANDOM_IP_ID + u_int16_t ip_id = h->ip_id; + h->ip_id = ip_randomid(); -#else - h->ip_id = htons(ip_id++); -#endif + h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0); } + if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) + pd->flags |= PFDESC_IP_REAS; return (PF_PASS); fragment_pass: /* Enforce a minimum ttl, may cause endless packet loops */ - if (r->min_ttl && h->ip_ttl < r->min_ttl) - h->ip_ttl = r->min_ttl; + if (r->min_ttl && h->ip_ttl < r->min_ttl) { + u_int16_t ip_ttl = h->ip_ttl; + h->ip_ttl = r->min_ttl; + h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); + } + if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) + pd->flags |= PFDESC_IP_REAS; return (PF_PASS); no_mem: REASON_SET(reason, PFRES_MEMORY); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL); + PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd); return (PF_DROP); drop: REASON_SET(reason, PFRES_NORM); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL); + PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd); return (PF_DROP); bad: @@ -1013,7 +1043,7 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason) REASON_SET(reason, PFRES_FRAG); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL); + PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd); return (PF_DROP); } @@ -1021,7 +1051,7 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason) #ifdef INET6 int pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, - u_short *reason) + u_short *reason, struct pf_pdesc *pd) { struct mbuf *m = *m0; struct pf_rule *r; @@ -1041,8 +1071,7 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != dir) r = r->skip[PF_SKIP_DIR].ptr; @@ -1053,19 +1082,23 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, r = r->skip[PF_SKIP_PROTO].ptr; #endif else if (PF_MISMATCHAW(&r->src.addr, - (struct pf_addr *)&h->ip6_src, AF_INET6, r->src.not)) + (struct pf_addr *)&h->ip6_src, AF_INET6, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (PF_MISMATCHAW(&r->dst.addr, - (struct pf_addr *)&h->ip6_dst, AF_INET6, r->dst.not)) + (struct pf_addr *)&h->ip6_dst, AF_INET6, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else break; } - if (r == NULL) + if (r == NULL || r->action == PF_NOSCRUB) return (PF_PASS); - else - r->packets++; + else { + r->packets[dir == PF_OUT]++; + r->bytes[dir == PF_OUT] += pd->tot_len; + } /* Check for illegal packets */ if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len) @@ -1171,27 +1204,28 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, goto badfrag; /* do something about it */ + /* remember to set pd->flags |= PFDESC_IP_REAS */ return (PF_PASS); shortpkt: REASON_SET(reason, PFRES_SHORT); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL); + PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd); return (PF_DROP); drop: REASON_SET(reason, PFRES_NORM); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL); + PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd); return (PF_DROP); badfrag: REASON_SET(reason, PFRES_FRAG); if (r != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL); + PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd); return (PF_DROP); } -#endif +#endif /* INET6 */ int pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, @@ -1207,8 +1241,7 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); while (r != NULL) { r->evaluations++; - if (r->kif != NULL && - (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot) + if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != dir) r = r->skip[PF_SKIP_DIR].ptr; @@ -1216,12 +1249,14 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; - else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.not)) + else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, + r->src.neg, kif)) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (r->src.port_op && !pf_match_port(r->src.port_op, r->src.port[0], r->src.port[1], th->th_sport)) r = r->skip[PF_SKIP_SRC_PORT].ptr; - else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.not)) + else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, + r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->dst.port_op && !pf_match_port(r->dst.port_op, r->dst.port[0], r->dst.port[1], th->th_dport)) @@ -1236,10 +1271,12 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, } } - if (rm == NULL) + if (rm == NULL || rm->action == PF_NOSCRUB) return (PF_PASS); - else - r->packets++; + else { + r->packets[dir == PF_OUT]++; + r->bytes[dir == PF_OUT] += pd->tot_len; + } if (rm->rule_flag & PFRULE_REASSEMBLE_TCP) pd->flags |= PFDESC_TCP_NORM; @@ -1277,13 +1314,13 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, th->th_x2 = 0; nv = *(u_int16_t *)(&th->th_ack + 1); - th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv); + th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0); rewrite = 1; } /* Remove urgent pointer, if TH_URG is not set */ if (!(flags & TH_URG) && th->th_urp) { - th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0); + th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0); th->th_urp = 0; rewrite = 1; } @@ -1301,7 +1338,7 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, tcp_drop: REASON_SET(&reason, PFRES_NORM); if (rm != NULL && r->log) - PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL); + PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL, pd); return (PF_DROP); } @@ -1309,6 +1346,7 @@ int pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst) { + u_int32_t tsval, tsecr; u_int8_t hdr[60]; u_int8_t *opt; @@ -1364,6 +1402,16 @@ pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, src->scrub->pfss_flags |= PFSS_TIMESTAMP; src->scrub->pfss_ts_mod = karc4random(); + + /* note PFSS_PAWS not set yet */ + memcpy(&tsval, &opt[2], + sizeof(u_int32_t)); + memcpy(&tsecr, &opt[6], + sizeof(u_int32_t)); + src->scrub->pfss_tsval0 = ntohl(tsval); + src->scrub->pfss_tsval = ntohl(tsval); + src->scrub->pfss_tsecr = ntohl(tsecr); + getmicrouptime(&src->scrub->pfss_last); } /* FALLTHROUGH */ default: @@ -1390,12 +1438,16 @@ pf_normalize_tcp_cleanup(struct pf_state *state) int pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, - u_short *reason, struct tcphdr *th, struct pf_state_peer *src, - struct pf_state_peer *dst, int *writeback) + u_short *reason, struct tcphdr *th, struct pf_state *state, + struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback) { + struct timeval uptime; + u_int32_t tsval, tsecr; + u_int tsval_from_last; u_int8_t hdr[60]; u_int8_t *opt; int copyback = 0; + int got_ts = 0; KASSERT((src->scrub || dst->scrub), ("pf_normalize_tcp_statefull: src->scrub && dst->scrub!")); @@ -1450,32 +1502,46 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, * NAT detection, OS uptime determination or * reboot detection. */ + + if (got_ts) { + /* Huh? Multiple timestamps!? */ + if (pf_status.debug >= PF_DEBUG_MISC) { + DPFPRINTF(("multiple TS??")); + pf_print_state(state); + kprintf("\n"); + } + REASON_SET(reason, PFRES_TS); + return (PF_DROP); + } if (opt[1] >= TCPOLEN_TIMESTAMP) { - u_int32_t ts_value; - if (src->scrub && + memcpy(&tsval, &opt[2], + sizeof(u_int32_t)); + if (tsval && src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) { - memcpy(&ts_value, &opt[2], - sizeof(u_int32_t)); - ts_value = htonl(ntohl(ts_value) - + src->scrub->pfss_ts_mod); + tsval = ntohl(tsval); pf_change_a(&opt[2], - &th->th_sum, ts_value, 0); + &th->th_sum, + htonl(tsval + + src->scrub->pfss_ts_mod), + 0); copyback = 1; } /* Modulate TS reply iff valid (!0) */ - memcpy(&ts_value, &opt[6], + memcpy(&tsecr, &opt[6], sizeof(u_int32_t)); - if (ts_value && dst->scrub && + if (tsecr && dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { - ts_value = htonl(ntohl(ts_value) - - dst->scrub->pfss_ts_mod); + tsecr = ntohl(tsecr) + - dst->scrub->pfss_ts_mod; pf_change_a(&opt[6], - &th->th_sum, ts_value, 0); + &th->th_sum, htonl(tsecr), + 0); copyback = 1; } + got_ts = 1; } /* FALLTHROUGH */ default: @@ -1494,6 +1560,282 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, } + /* + * Must invalidate PAWS checks on connections idle for too long. + * The fastest allowed timestamp clock is 1ms. That turns out to + * be about 24 days before it wraps. XXX Right now our lowerbound + * TS echo check only works for the first 12 days of a connection + * when the TS has exhausted half its 32bit space + */ +#define TS_MAX_IDLE (24*24*60*60) +#define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */ + + getmicrouptime(&uptime); + if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) && + (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE || + time_second - state->creation > TS_MAX_CONN)) { + if (pf_status.debug >= PF_DEBUG_MISC) { + DPFPRINTF(("src idled out of PAWS\n")); + pf_print_state(state); + kprintf("\n"); + } + src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS) + | PFSS_PAWS_IDLED; + } + if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) && + uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) { + if (pf_status.debug >= PF_DEBUG_MISC) { + DPFPRINTF(("dst idled out of PAWS\n")); + pf_print_state(state); + kprintf("\n"); + } + dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS) + | PFSS_PAWS_IDLED; + } + + if (got_ts && src->scrub && dst->scrub && + (src->scrub->pfss_flags & PFSS_PAWS) && + (dst->scrub->pfss_flags & PFSS_PAWS)) { + /* Validate that the timestamps are "in-window". + * RFC1323 describes TCP Timestamp options that allow + * measurement of RTT (round trip time) and PAWS + * (protection against wrapped sequence numbers). PAWS + * gives us a set of rules for rejecting packets on + * long fat pipes (packets that were somehow delayed + * in transit longer than the time it took to send the + * full TCP sequence space of 4Gb). We can use these + * rules and infer a few others that will let us treat + * the 32bit timestamp and the 32bit echoed timestamp + * as sequence numbers to prevent a blind attacker from + * inserting packets into a connection. + * + * RFC1323 tells us: + * - The timestamp on this packet must be greater than + * or equal to the last value echoed by the other + * endpoint. The RFC says those will be discarded + * since it is a dup that has already been acked. + * This gives us a lowerbound on the timestamp. + * timestamp >= other last echoed timestamp + * - The timestamp will be less than or equal to + * the last timestamp plus the time between the + * last packet and now. The RFC defines the max + * clock rate as 1ms. We will allow clocks to be + * up to 10% fast and will allow a total difference + * or 30 seconds due to a route change. And this + * gives us an upperbound on the timestamp. + * timestamp <= last timestamp + max ticks + * We have to be careful here. Windows will send an + * initial timestamp of zero and then initialize it + * to a random value after the 3whs; presumably to + * avoid a DoS by having to call an expensive RNG + * during a SYN flood. Proof MS has at least one + * good security geek. + * + * - The TCP timestamp option must also echo the other + * endpoints timestamp. The timestamp echoed is the + * one carried on the earliest unacknowledged segment + * on the left edge of the sequence window. The RFC + * states that the host will reject any echoed + * timestamps that were larger than any ever sent. + * This gives us an upperbound on the TS echo. + * tescr <= largest_tsval + * - The lowerbound on the TS echo is a little more + * tricky to determine. The other endpoint's echoed + * values will not decrease. But there may be + * network conditions that re-order packets and + * cause our view of them to decrease. For now the + * only lowerbound we can safely determine is that + * the TS echo will never be less than the orginal + * TS. XXX There is probably a better lowerbound. + * Remove TS_MAX_CONN with better lowerbound check. + * tescr >= other original TS + * + * It is also important to note that the fastest + * timestamp clock of 1ms will wrap its 32bit space in + * 24 days. So we just disable TS checking after 24 + * days of idle time. We actually must use a 12d + * connection limit until we can come up with a better + * lowerbound to the TS echo check. + */ + struct timeval delta_ts; + int ts_fudge; + + + /* + * PFTM_TS_DIFF is how many seconds of leeway to allow + * a host's timestamp. This can happen if the previous + * packet got delayed in transit for much longer than + * this packet. + */ + if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0) + ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF]; + + + /* Calculate max ticks since the last timestamp */ +#define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */ +#define TS_MICROSECS 1000000 /* microseconds per second */ +#ifndef timersub +#define timersub(tvp, uvp, vvp) \ + do { \ + (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec; \ + (vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec; \ + if ((vvp)->tv_usec < 0) { \ + (vvp)->tv_sec--; \ + (vvp)->tv_usec += 1000000; \ + } \ + } while (0) +#endif + + timersub(&uptime, &src->scrub->pfss_last, &delta_ts); + tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ; + tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ); + + + if ((src->state >= TCPS_ESTABLISHED && + dst->state >= TCPS_ESTABLISHED) && + (SEQ_LT(tsval, dst->scrub->pfss_tsecr) || + SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) || + (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) || + SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) { + /* Bad RFC1323 implementation or an insertion attack. + * + * - Solaris 2.6 and 2.7 are known to send another ACK + * after the FIN,FIN|ACK,ACK closing that carries + * an old timestamp. + */ + + DPFPRINTF(("Timestamp failed %c%c%c%c\n", + SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ', + SEQ_GT(tsval, src->scrub->pfss_tsval + + tsval_from_last) ? '1' : ' ', + SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ', + SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' ')); + DPFPRINTF((" tsval: %u tsecr: %u +ticks: %u " + "idle: %lus %lums\n", + tsval, tsecr, tsval_from_last, delta_ts.tv_sec, + delta_ts.tv_usec / 1000)); + DPFPRINTF((" src->tsval: %u tsecr: %u\n", + src->scrub->pfss_tsval, src->scrub->pfss_tsecr)); + DPFPRINTF((" dst->tsval: %u tsecr: %u tsval0: %u" + "\n", dst->scrub->pfss_tsval, + dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0)); + if (pf_status.debug >= PF_DEBUG_MISC) { + pf_print_state(state); + pf_print_flags(th->th_flags); + kprintf("\n"); + } + REASON_SET(reason, PFRES_TS); + return (PF_DROP); + } + + /* XXX I'd really like to require tsecr but it's optional */ + + } else if (!got_ts && (th->th_flags & TH_RST) == 0 && + ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED) + || pd->p_len > 0 || (th->th_flags & TH_SYN)) && + src->scrub && dst->scrub && + (src->scrub->pfss_flags & PFSS_PAWS) && + (dst->scrub->pfss_flags & PFSS_PAWS)) { + /* Didn't send a timestamp. Timestamps aren't really useful + * when: + * - connection opening or closing (often not even sent). + * but we must not let an attacker to put a FIN on a + * data packet to sneak it through our ESTABLISHED check. + * - on a TCP reset. RFC suggests not even looking at TS. + * - on an empty ACK. The TS will not be echoed so it will + * probably not help keep the RTT calculation in sync and + * there isn't as much danger when the sequence numbers + * got wrapped. So some stacks don't include TS on empty + * ACKs :-( + * + * To minimize the disruption to mostly RFC1323 conformant + * stacks, we will only require timestamps on data packets. + * + * And what do ya know, we cannot require timestamps on data + * packets. There appear to be devices that do legitimate + * TCP connection hijacking. There are HTTP devices that allow + * a 3whs (with timestamps) and then buffer the HTTP request. + * If the intermediate device has the HTTP response cache, it + * will spoof the response but not bother timestamping its + * packets. So we can look for the presence of a timestamp in + * the first data packet and if there, require it in all future + * packets. + */ + + if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) { + /* + * Hey! Someone tried to sneak a packet in. Or the + * stack changed its RFC1323 behavior?!?! + */ + if (pf_status.debug >= PF_DEBUG_MISC) { + DPFPRINTF(("Did not receive expected RFC1323 " + "timestamp\n")); + pf_print_state(state); + pf_print_flags(th->th_flags); + kprintf("\n"); + } + REASON_SET(reason, PFRES_TS); + return (PF_DROP); + } + } + + + /* + * We will note if a host sends his data packets with or without + * timestamps. And require all data packets to contain a timestamp + * if the first does. PAWS implicitly requires that all data packets be + * timestamped. But I think there are middle-man devices that hijack + * TCP streams immediately after the 3whs and don't timestamp their + * packets (seen in a WWW accelerator or cache). + */ + if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags & + (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) { + if (got_ts) + src->scrub->pfss_flags |= PFSS_DATA_TS; + else { + src->scrub->pfss_flags |= PFSS_DATA_NOTS; + if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub && + (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { + /* Don't warn if other host rejected RFC1323 */ + DPFPRINTF(("Broken RFC1323 stack did not " + "timestamp data packet. Disabled PAWS " + "security.\n")); + pf_print_state(state); + pf_print_flags(th->th_flags); + kprintf("\n"); + } + } + } + + + /* + * Update PAWS values + */ + if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags & + (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) { + getmicrouptime(&src->scrub->pfss_last); + if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) || + (src->scrub->pfss_flags & PFSS_PAWS) == 0) + src->scrub->pfss_tsval = tsval; + + if (tsecr) { + if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) || + (src->scrub->pfss_flags & PFSS_PAWS) == 0) + src->scrub->pfss_tsecr = tsecr; + + if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 && + (SEQ_LT(tsval, src->scrub->pfss_tsval0) || + src->scrub->pfss_tsval0 == 0)) { + /* tsval0 MUST be the lowest timestamp */ + src->scrub->pfss_tsval0 = tsval; + } + + /* Only fully initialized after a TS gets echoed */ + if ((src->scrub->pfss_flags & PFSS_PAWS) == 0) + src->scrub->pfss_flags |= PFSS_PAWS; + } + } + /* I have a dream.... TCP segment reassembly.... */ return (0); } @@ -1530,7 +1872,7 @@ pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th, mss = (u_int16_t *)(optp + 2); if ((ntohs(*mss)) > r->max_mss) { th->th_sum = pf_cksum_fixup(th->th_sum, - *mss, htons(r->max_mss)); + *mss, htons(r->max_mss), 0); *mss = htons(r->max_mss); rewrite = 1; } diff --git a/sys/net/pf/pf_osfp.c b/sys/net/pf/pf_osfp.c index 328b4c132c..f9ffcc7dfa 100644 --- a/sys/net/pf/pf_osfp.c +++ b/sys/net/pf/pf_osfp.c @@ -1,10 +1,6 @@ -/* $FreeBSD: src/sys/contrib/pf/net/pf_osfp.c,v 1.4 2004/06/16 23:24:00 mlaier Exp $ */ -/* $OpenBSD: pf_osfp.c,v 1.9 2004/01/04 20:08:42 pvalchev Exp $ */ -/* $DragonFly: src/sys/net/pf/pf_osfp.c,v 1.2 2006/12/22 23:44:57 swildner Exp $ */ +/* $OpenBSD: pf_osfp.c,v 1.12 2006/12/13 18:14:10 itojun Exp $ */ /* - * Copyright (c) 2004 The DragonFly Project. All rights reserved. - * * Copyright (c) 2003 Mike Frantzen * * Permission to use, copy, modify, and distribute this software for any @@ -25,7 +21,6 @@ #include #ifdef _KERNEL # include -# include #endif /* _KERNEL */ #include @@ -37,9 +32,10 @@ #include #include -#ifdef INET6 #include -#endif /* INET6 */ +#ifdef _KERNEL +#include +#endif #ifdef _KERNEL @@ -56,6 +52,7 @@ typedef vm_zone_t pool_t; # include # include # include +# include # define pool_t int # define pool_get(pool, flags) malloc(*(pool)) # define pool_put(pool, item) free(item) @@ -92,38 +89,96 @@ pf_osfp_fingerprint(struct pf_pdesc *pd, struct mbuf *m, int off, const struct tcphdr *tcp) { struct ip *ip; + struct ip6_hdr *ip6; char hdr[60]; - /* XXX don't have a fingerprint database for IPv6 :-( */ - if (pd->af != PF_INET || pd->proto != IPPROTO_TCP || (tcp->th_off << 2) - < sizeof(*tcp)) + if ((pd->af != PF_INET && pd->af != PF_INET6) || + pd->proto != IPPROTO_TCP || (tcp->th_off << 2) < sizeof(*tcp)) return (NULL); - ip = mtod(m, struct ip *); - if (!pf_pull_hdr(m, off, hdr, tcp->th_off << 2, NULL, NULL, pd->af)) - return (NULL); + if (pd->af == PF_INET) { + ip = mtod(m, struct ip *); + ip6 = (struct ip6_hdr *)NULL; + } else { + ip = (struct ip *)NULL; + ip6 = mtod(m, struct ip6_hdr *); + } + if (!pf_pull_hdr(m, off, hdr, tcp->th_off << 2, NULL, NULL, + pd->af)) return (NULL); - return (pf_osfp_fingerprint_hdr(ip, (struct tcphdr *)hdr)); + return (pf_osfp_fingerprint_hdr(ip, ip6, (struct tcphdr *)hdr)); } #endif /* _KERNEL */ struct pf_osfp_enlist * -pf_osfp_fingerprint_hdr(const struct ip *ip, const struct tcphdr *tcp) +pf_osfp_fingerprint_hdr(const struct ip *ip, const struct ip6_hdr *ip6, const struct tcphdr *tcp) { struct pf_os_fingerprint fp, *fpresult; int cnt, optlen = 0; const u_int8_t *optp; +#ifdef _KERNEL + char srcname[128]; +#else + char srcname[NI_MAXHOST]; +#endif - if ((tcp->th_flags & (TH_SYN|TH_ACK)) != TH_SYN || (ip->ip_off & - IP_OFFMASK)) + if ((tcp->th_flags & (TH_SYN|TH_ACK)) != TH_SYN) return (NULL); + if (ip) { + if ((ip->ip_off & htons(IP_OFFMASK)) != 0) + return (NULL); + } memset(&fp, 0, sizeof(fp)); - fp.fp_psize = ip->ip_len; - fp.fp_ttl = ip->ip_ttl; - if (ip->ip_off & IP_DF) + if (ip) { +#ifndef _KERNEL + struct sockaddr_in sin; +#endif + + fp.fp_psize = ntohs(ip->ip_len); + fp.fp_ttl = ip->ip_ttl; + if (ip->ip_off & htons(IP_DF)) + fp.fp_flags |= PF_OSFP_DF; +#ifdef _KERNEL + strlcpy(srcname, inet_ntoa(ip->ip_src), sizeof(srcname)); +#else + memset(&sin, 0, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_len = sizeof(struct sockaddr_in); + sin.sin_addr = ip->ip_src; + (void)getnameinfo((struct sockaddr *)&sin, + sizeof(struct sockaddr_in), srcname, sizeof(srcname), + NULL, 0, NI_NUMERICHOST); +#endif + } +#ifdef INET6 + else if (ip6) { +#ifndef _KERNEL + struct sockaddr_in6 sin6; +#endif + + /* jumbo payload? */ + fp.fp_psize = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen); + fp.fp_ttl = ip6->ip6_hlim; fp.fp_flags |= PF_OSFP_DF; + fp.fp_flags |= PF_OSFP_INET6; +#ifdef _KERNEL + strlcpy(srcname, ip6_sprintf((struct in6_addr *)&ip6->ip6_src), + sizeof(srcname)); +#else + memset(&sin6, 0, sizeof(sin6)); + sin6.sin6_family = AF_INET6; + sin6.sin6_len = sizeof(struct sockaddr_in6); + sin6.sin6_addr = ip6->ip6_src; + (void)getnameinfo((struct sockaddr *)&sin6, + sizeof(struct sockaddr_in6), srcname, sizeof(srcname), + NULL, 0, NI_NUMERICHOST); +#endif + } +#endif + else + return (NULL); fp.fp_wsize = ntohs(tcp->th_win); @@ -151,13 +206,13 @@ pf_osfp_fingerprint_hdr(const struct ip *ip, const struct tcphdr *tcp) sizeof(fp.fp_mss)); fp.fp_tcpopts = (fp.fp_tcpopts << PF_OSFP_TCPOPT_BITS) | PF_OSFP_TCPOPT_MSS; - fp.fp_mss = ntohs(fp.fp_mss); + NTOHS(fp.fp_mss); break; case TCPOPT_WINDOW: if (optlen >= TCPOLEN_WINDOW) memcpy(&fp.fp_wscale, &optp[2], sizeof(fp.fp_wscale)); - fp.fp_wscale = ntohs(fp.fp_wscale); + NTOHS(fp.fp_wscale); fp.fp_tcpopts = (fp.fp_tcpopts << PF_OSFP_TCPOPT_BITS) | PF_OSFP_TCPOPT_WSCALE; @@ -186,7 +241,7 @@ pf_osfp_fingerprint_hdr(const struct ip *ip, const struct tcphdr *tcp) DPFPRINTF("fingerprinted %s:%d %d:%d:%d:%d:%llx (%d) " "(TS=%s,M=%s%d,W=%s%d)\n", - inet_ntoa(ip->ip_src), ntohs(tcp->th_sport), + srcname, ntohs(tcp->th_sport), fp.fp_wsize, fp.fp_ttl, (fp.fp_flags & PF_OSFP_DF) != 0, fp.fp_psize, (long long int)fp.fp_tcpopts, fp.fp_optcnt, (fp.fp_flags & PF_OSFP_TS0) ? "0" : "", diff --git a/sys/net/pf/pf_ruleset.c b/sys/net/pf/pf_ruleset.c new file mode 100644 index 0000000000..c16c8f5e2a --- /dev/null +++ b/sys/net/pf/pf_ruleset.c @@ -0,0 +1,419 @@ +/* $OpenBSD: pf_ruleset.c,v 1.1 2006/10/27 13:56:51 mcbride Exp $ */ + +/* + * Copyright (c) 2001 Daniel Hartmeier + * Copyright (c) 2002,2003 Henning Brauer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Effort sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F30602-01-2-0537. + * + */ + +#include +#include +#include +#ifdef _KERNEL +# include +#endif /* _KERNEL */ +#include + +#include +#include +#include +#include + +#include +#include + +#ifdef INET6 +#include +#endif /* INET6 */ + + +#ifdef _KERNEL +# define DPFPRINTF(format, x...) \ + if (pf_status.debug >= PF_DEBUG_NOISY) \ + kprintf(format , ##x) +#define rs_malloc(x) kmalloc(x, M_TEMP, M_WAITOK) +#define rs_free(x) kfree(x, M_TEMP) +#define printf kprintf +#else +/* Userland equivalents so we can lend code to pfctl et al. */ + +# include +# include +# include +# include +# include +# define rs_malloc(x) malloc(x) +# define rs_free(x) free(x) + +# ifdef PFDEBUG +# include +# define DPFPRINTF(format, x...) fprintf(stderr, format , ##x) +# else +# define DPFPRINTF(format, x...) ((void)0) +# endif /* PFDEBUG */ +#endif /* _KERNEL */ + + +struct pf_anchor_global pf_anchors; +struct pf_anchor pf_main_anchor; + +/* +* XXX: hum? +int pf_get_ruleset_number(u_int8_t); +void pf_init_ruleset(struct pf_ruleset *); +int pf_anchor_setup(struct pf_rule *, + const struct pf_ruleset *, const char *); +int pf_anchor_copyout(const struct pf_ruleset *, + const struct pf_rule *, struct pfioc_rule *); +void pf_anchor_remove(struct pf_rule *); +*/ + +static __inline int pf_anchor_compare(struct pf_anchor *, struct pf_anchor *); + +RB_GENERATE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare); +RB_GENERATE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare); + +static __inline int +pf_anchor_compare(struct pf_anchor *a, struct pf_anchor *b) +{ + int c = strcmp(a->path, b->path); + + return (c ? (c < 0 ? -1 : 1) : 0); +} + +int +pf_get_ruleset_number(u_int8_t action) +{ + switch (action) { + case PF_SCRUB: + case PF_NOSCRUB: + return (PF_RULESET_SCRUB); + break; + case PF_PASS: + case PF_DROP: + return (PF_RULESET_FILTER); + break; + case PF_NAT: + case PF_NONAT: + return (PF_RULESET_NAT); + break; + case PF_BINAT: + case PF_NOBINAT: + return (PF_RULESET_BINAT); + break; + case PF_RDR: + case PF_NORDR: + return (PF_RULESET_RDR); + break; + default: + return (PF_RULESET_MAX); + break; + } +} + +void +pf_init_ruleset(struct pf_ruleset *ruleset) +{ + int i; + + memset(ruleset, 0, sizeof(struct pf_ruleset)); + for (i = 0; i < PF_RULESET_MAX; i++) { + TAILQ_INIT(&ruleset->rules[i].queues[0]); + TAILQ_INIT(&ruleset->rules[i].queues[1]); + ruleset->rules[i].active.ptr = &ruleset->rules[i].queues[0]; + ruleset->rules[i].inactive.ptr = &ruleset->rules[i].queues[1]; + } +} + +struct pf_anchor * +pf_find_anchor(const char *path) +{ + struct pf_anchor *key, *found; + + key = (struct pf_anchor *)rs_malloc(sizeof(*key)); + memset(key, 0, sizeof(*key)); + strlcpy(key->path, path, sizeof(key->path)); + found = RB_FIND(pf_anchor_global, &pf_anchors, key); + rs_free(key); + return (found); +} + +struct pf_ruleset * +pf_find_ruleset(const char *path) +{ + struct pf_anchor *anchor; + + while (*path == '/') + path++; + if (!*path) + return (&pf_main_ruleset); + anchor = pf_find_anchor(path); + if (anchor == NULL) + return (NULL); + else + return (&anchor->ruleset); +} + +struct pf_ruleset * +pf_find_or_create_ruleset(const char *path) +{ + char *p, *q, *r; + struct pf_ruleset *ruleset; + struct pf_anchor *anchor = NULL, *dup, *parent = NULL; + + if (path[0] == 0) + return (&pf_main_ruleset); + while (*path == '/') + path++; + ruleset = pf_find_ruleset(path); + if (ruleset != NULL) + return (ruleset); + p = (char *)rs_malloc(MAXPATHLEN); + bzero(p, MAXPATHLEN); + strlcpy(p, path, MAXPATHLEN); + while (parent == NULL && (q = strrchr(p, '/')) != NULL) { + *q = 0; + if ((ruleset = pf_find_ruleset(p)) != NULL) { + parent = ruleset->anchor; + break; + } + } + if (q == NULL) + q = p; + else + q++; + strlcpy(p, path, MAXPATHLEN); + if (!*q) { + rs_free(p); + return (NULL); + } + while ((r = strchr(q, '/')) != NULL || *q) { + if (r != NULL) + *r = 0; + if (!*q || strlen(q) >= PF_ANCHOR_NAME_SIZE || + (parent != NULL && strlen(parent->path) >= + MAXPATHLEN - PF_ANCHOR_NAME_SIZE - 1)) { + rs_free(p); + return (NULL); + } + anchor = (struct pf_anchor *)rs_malloc(sizeof(*anchor)); + if (anchor == NULL) { + rs_free(p); + return (NULL); + } + memset(anchor, 0, sizeof(*anchor)); + RB_INIT(&anchor->children); + strlcpy(anchor->name, q, sizeof(anchor->name)); + if (parent != NULL) { + strlcpy(anchor->path, parent->path, + sizeof(anchor->path)); + strlcat(anchor->path, "/", sizeof(anchor->path)); + } + strlcat(anchor->path, anchor->name, sizeof(anchor->path)); + if ((dup = RB_INSERT(pf_anchor_global, &pf_anchors, anchor)) != + NULL) { + printf("pf_find_or_create_ruleset: RB_INSERT1 " + "'%s' '%s' collides with '%s' '%s'\n", + anchor->path, anchor->name, dup->path, dup->name); + rs_free(anchor); + rs_free(p); + return (NULL); + } + if (parent != NULL) { + anchor->parent = parent; + if ((dup = RB_INSERT(pf_anchor_node, &parent->children, + anchor)) != NULL) { + printf("pf_find_or_create_ruleset: " + "RB_INSERT2 '%s' '%s' collides with " + "'%s' '%s'\n", anchor->path, anchor->name, + dup->path, dup->name); + RB_REMOVE(pf_anchor_global, &pf_anchors, + anchor); + rs_free(anchor); + rs_free(p); + return (NULL); + } + } + pf_init_ruleset(&anchor->ruleset); + anchor->ruleset.anchor = anchor; + parent = anchor; + if (r != NULL) + q = r + 1; + else + *q = 0; + } + rs_free(p); + return (&anchor->ruleset); +} + +void +pf_remove_if_empty_ruleset(struct pf_ruleset *ruleset) +{ + struct pf_anchor *parent; + int i; + + while (ruleset != NULL) { + if (ruleset == &pf_main_ruleset || ruleset->anchor == NULL || + !RB_EMPTY(&ruleset->anchor->children) || + ruleset->anchor->refcnt > 0 || ruleset->tables > 0 || + ruleset->topen) + return; + for (i = 0; i < PF_RULESET_MAX; ++i) + if (!TAILQ_EMPTY(ruleset->rules[i].active.ptr) || + !TAILQ_EMPTY(ruleset->rules[i].inactive.ptr) || + ruleset->rules[i].inactive.open) + return; + RB_REMOVE(pf_anchor_global, &pf_anchors, ruleset->anchor); + if ((parent = ruleset->anchor->parent) != NULL) + RB_REMOVE(pf_anchor_node, &parent->children, + ruleset->anchor); + rs_free(ruleset->anchor); + if (parent == NULL) + return; + ruleset = &parent->ruleset; + } +} + +int +pf_anchor_setup(struct pf_rule *r, const struct pf_ruleset *s, + const char *name) +{ + char *p, *path; + struct pf_ruleset *ruleset; + + r->anchor = NULL; + r->anchor_relative = 0; + r->anchor_wildcard = 0; + if (!name[0]) + return (0); + path = (char *)rs_malloc(MAXPATHLEN); + bzero(path, MAXPATHLEN); + if (name[0] == '/') + strlcpy(path, name + 1, MAXPATHLEN); + else { + /* relative path */ + r->anchor_relative = 1; + if (s->anchor == NULL || !s->anchor->path[0]) + path[0] = 0; + else + strlcpy(path, s->anchor->path, MAXPATHLEN); + while (name[0] == '.' && name[1] == '.' && name[2] == '/') { + if (!path[0]) { + printf("pf_anchor_setup: .. beyond root\n"); + rs_free(path); + return (1); + } + if ((p = strrchr(path, '/')) != NULL) + *p = 0; + else + path[0] = 0; + r->anchor_relative++; + name += 3; + } + if (path[0]) + strlcat(path, "/", MAXPATHLEN); + strlcat(path, name, MAXPATHLEN); + } + if ((p = strrchr(path, '/')) != NULL && !strcmp(p, "/*")) { + r->anchor_wildcard = 1; + *p = 0; + } + ruleset = pf_find_or_create_ruleset(path); + rs_free(path); + if (ruleset == NULL || ruleset->anchor == NULL) { + printf("pf_anchor_setup: ruleset\n"); + return (1); + } + r->anchor = ruleset->anchor; + r->anchor->refcnt++; + return (0); +} + +int +pf_anchor_copyout(const struct pf_ruleset *rs, const struct pf_rule *r, + struct pfioc_rule *pr) +{ + pr->anchor_call[0] = 0; + if (r->anchor == NULL) + return (0); + if (!r->anchor_relative) { + strlcpy(pr->anchor_call, "/", sizeof(pr->anchor_call)); + strlcat(pr->anchor_call, r->anchor->path, + sizeof(pr->anchor_call)); + } else { + char *a, *p; + int i; + + a = (char *)rs_malloc(MAXPATHLEN); + bzero(a, MAXPATHLEN); + if (rs->anchor == NULL) + a[0] = 0; + else + strlcpy(a, rs->anchor->path, MAXPATHLEN); + for (i = 1; i < r->anchor_relative; ++i) { + if ((p = strrchr(a, '/')) == NULL) + p = a; + *p = 0; + strlcat(pr->anchor_call, "../", + sizeof(pr->anchor_call)); + } + if (strncmp(a, r->anchor->path, strlen(a))) { + printf("pf_anchor_copyout: '%s' '%s'\n", a, + r->anchor->path); + rs_free(a); + return (1); + } + if (strlen(r->anchor->path) > strlen(a)) + strlcat(pr->anchor_call, r->anchor->path + (a[0] ? + strlen(a) + 1 : 0), sizeof(pr->anchor_call)); + rs_free(a); + } + if (r->anchor_wildcard) + strlcat(pr->anchor_call, pr->anchor_call[0] ? "/*" : "*", + sizeof(pr->anchor_call)); + return (0); +} + +void +pf_anchor_remove(struct pf_rule *r) +{ + if (r->anchor == NULL) + return; + if (r->anchor->refcnt <= 0) { + printf("pf_anchor_remove: broken refcount\n"); + r->anchor = NULL; + return; + } + if (!--r->anchor->refcnt) + pf_remove_if_empty_ruleset(&r->anchor->ruleset); + r->anchor = NULL; +} diff --git a/sys/net/pf/pf_subr.c b/sys/net/pf/pf_subr.c index 4a21bd7c05..6d0122d09c 100644 --- a/sys/net/pf/pf_subr.c +++ b/sys/net/pf/pf_subr.c @@ -56,8 +56,11 @@ #include #include +#include #include +#include +#include /* * This implements additional functions used by pf which can not be ported @@ -73,7 +76,7 @@ hook_establish(struct hook_desc_head *head, int tail, void (*fn)(void *), { struct hook_desc *hdp; - hdp = (struct hook_desc *)kmalloc(sizeof (*hdp), M_DEVBUF, M_NOWAIT); + hdp = kmalloc(sizeof (*hdp), M_DEVBUF, M_WAITOK); if (hdp == NULL) return (NULL); @@ -129,3 +132,106 @@ dohooks(struct hook_desc_head *head, int flags) } } } + + +/* + * Following is where TCP initial sequence number generation occurs. + * + * There are two places where we must use initial sequence numbers: + * 1. In SYN-ACK packets. + * 2. In SYN packets. + * + * All ISNs for SYN-ACK packets are generated by the syncache. See + * tcp_syncache.c for details. + * + * The ISNs in SYN packets must be monotonic; TIME_WAIT recycling + * depends on this property. In addition, these ISNs should be + * unguessable so as to prevent connection hijacking. To satisfy + * the requirements of this situation, the algorithm outlined in + * RFC 1948 is used, with only small modifications. + * + * Implementation details: + * + * Time is based off the system timer, and is corrected so that it + * increases by one megabyte per second. This allows for proper + * recycling on high speed LANs while still leaving over an hour + * before rollover. + * + * As reading the *exact* system time is too expensive to be done + * whenever setting up a TCP connection, we increment the time + * offset in two ways. First, a small random positive increment + * is added to isn_offset for each connection that is set up. + * Second, the function tcp_isn_tick fires once per clock tick + * and increments isn_offset as necessary so that sequence numbers + * are incremented at approximately ISN_BYTES_PER_SECOND. The + * random positive increments serve only to ensure that the same + * exact sequence number is never sent out twice (as could otherwise + * happen when a port is recycled in less than the system tick + * interval.) + * + * net.inet.tcp.isn_reseed_interval controls the number of seconds + * between seeding of isn_secret. This is normally set to zero, + * as reseeding should not be necessary. + * + * Locking of the global variables isn_secret, isn_last_reseed, isn_offset, + * isn_offset_old, and isn_ctx is performed using the TCP pcbinfo lock. In + * general, this means holding an exclusive (write) lock. + */ + +#define ISN_BYTES_PER_SECOND 1048576 +#define ISN_STATIC_INCREMENT 4096 +#define ISN_RANDOM_INCREMENT (4096 - 1) + +static u_char pf_isn_secret[32]; +static int pf_isn_last_reseed; +static u_int32_t pf_isn_offset; + +u_int32_t +pf_new_isn(struct pf_state *s) /* From FreeBSD */ +{ + MD5_CTX isn_ctx; + u_int32_t md5_buffer[4]; + u_int32_t new_isn; + struct pf_state_host *src, *dst; + + /* Seed if this is the first use, reseed if requested. */ + if (pf_isn_last_reseed == 0) { + read_random_unlimited(&pf_isn_secret, sizeof(pf_isn_secret)); + pf_isn_last_reseed = ticks; + } + + if (s->direction == PF_IN) { + src = &s->ext; + dst = &s->gwy; + } else { + src = &s->lan; + dst = &s->ext; + } + + /* Compute the md5 hash and return the ISN. */ + MD5Init(&isn_ctx); + MD5Update(&isn_ctx, (u_char *) &dst->port, sizeof(u_short)); + MD5Update(&isn_ctx, (u_char *) &src->port, sizeof(u_short)); +#ifdef INET6 + if (s->af == AF_INET6) { + MD5Update(&isn_ctx, (u_char *) &dst->addr, + sizeof(struct in6_addr)); + MD5Update(&isn_ctx, (u_char *) &src->addr, + sizeof(struct in6_addr)); + } else +#endif + { + MD5Update(&isn_ctx, (u_char *) &dst->addr, + sizeof(struct in_addr)); + MD5Update(&isn_ctx, (u_char *) &src->addr, + sizeof(struct in_addr)); + } + MD5Update(&isn_ctx, (u_char *) &pf_isn_secret, sizeof(pf_isn_secret)); + MD5Final((u_char *) &md5_buffer, &isn_ctx); + new_isn = (tcp_seq) md5_buffer[0]; + pf_isn_offset += ISN_STATIC_INCREMENT + + (karc4random() & ISN_RANDOM_INCREMENT); + new_isn += pf_isn_offset; + return (new_isn); +} + diff --git a/sys/net/pf/pf_table.c b/sys/net/pf/pf_table.c index 937e795fc3..3e3543a24e 100644 --- a/sys/net/pf/pf_table.c +++ b/sys/net/pf/pf_table.c @@ -1,6 +1,7 @@ /* $FreeBSD: src/sys/contrib/pf/net/pf_table.c,v 1.5 2004/07/28 06:14:44 kan Exp $ */ /* $OpenBSD: pf_table.c,v 1.47 2004/03/09 21:44:41 mcbride Exp $ */ /* $DragonFly: src/sys/net/pf/pf_table.c,v 1.5 2006/12/22 23:44:57 swildner Exp $ */ +/* $OpenBSD: pf_table.c,v 1.68 2006/05/02 10:08:45 dhartmei Exp $ */ /* * Copyright (c) 2004 The DragonFly Project. All rights reserved. @@ -134,6 +135,7 @@ struct pfr_walktree { vm_zone_t pfr_ktable_pl; vm_zone_t pfr_kentry_pl; +vm_zone_t pfr_kentry_pl2; struct sockaddr_in pfr_sin; struct sockaddr_in6 pfr_sin6; union sockaddr_union pfr_mask; @@ -147,7 +149,7 @@ void pfr_enqueue_addrs(struct pfr_ktable *, void pfr_mark_addrs(struct pfr_ktable *); struct pfr_kentry *pfr_lookup_addr(struct pfr_ktable *, struct pfr_addr *, int); -struct pfr_kentry *pfr_create_kentry(struct pfr_addr *); +struct pfr_kentry *pfr_create_kentry(struct pfr_addr *, int); void pfr_destroy_kentries(struct pfr_kentryworkq *); void pfr_destroy_kentry(struct pfr_kentry *); void pfr_insert_kentries(struct pfr_ktable *, @@ -164,6 +166,7 @@ int pfr_unroute_kentry(struct pfr_ktable *, struct pfr_kentry *); int pfr_walktree(struct radix_node *, void *); int pfr_validate_table(struct pfr_table *, int, int); +int pfr_fix_anchor(char *); void pfr_commit_ktable(struct pfr_ktable *, long); void pfr_insert_ktables(struct pfr_ktableworkq *); void pfr_insert_ktable(struct pfr_ktable *); @@ -275,7 +278,7 @@ pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, ad.pfra_fback = PFR_FB_NONE; } if (p == NULL && q == NULL) { - p = pfr_create_kentry(&ad); + p = pfr_create_kentry(&ad, 0); if (p == NULL) senderr(ENOMEM); if (pfr_route_kentry(tmpkt, p)) { @@ -320,7 +323,7 @@ pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, struct pfr_kentryworkq workq; struct pfr_kentry *p; struct pfr_addr ad; - int i, rv, xdel = 0; + int i, rv, xdel = 0, log = 1; ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK); if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL)) @@ -330,7 +333,34 @@ pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, return (ESRCH); if (kt->pfrkt_flags & PFR_TFLAG_CONST) return (EPERM); - pfr_mark_addrs(kt); + /* + * there are two algorithms to choose from here. + * with: + * n: number of addresses to delete + * N: number of addresses in the table + * + * one is O(N) and is better for large 'n' + * one is O(n*LOG(N)) and is better for small 'n' + * + * following code try to decide which one is best. + */ + for (i = kt->pfrkt_cnt; i > 0; i >>= 1) + log++; + if (size > kt->pfrkt_cnt/log) { + /* full table scan */ + pfr_mark_addrs(kt); + } else { + /* iterate over addresses to delete */ + for (i = 0; i < size; i++) { + if (COPYIN(addr+i, &ad, sizeof(ad))) + return (EFAULT); + if (pfr_validate_addr(&ad)) + return (EINVAL); + p = pfr_lookup_addr(kt, &ad, 1); + if (p != NULL) + p->pfrke_mark = 0; + } + } SLIST_INIT(&workq); for (i = 0; i < size; i++) { if (COPYIN(addr+i, &ad, sizeof(ad))) @@ -376,7 +406,8 @@ _bad: int pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, - int *size2, int *nadd, int *ndel, int *nchange, int flags) + int *size2, int *nadd, int *ndel, int *nchange, int flags, + u_int32_t ignore_pfrt_flags) { struct pfr_ktable *kt, *tmpkt; struct pfr_kentryworkq addq, delq, changeq; @@ -386,7 +417,8 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, long tzero = time_second; ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_FEEDBACK); - if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL)) + if (pfr_validate_table(tbl, ignore_pfrt_flags, flags & + PFR_FLAG_USERIOCTL)) return (EINVAL); kt = pfr_lookup_table(tbl); if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) @@ -424,7 +456,7 @@ pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, ad.pfra_fback = PFR_FB_DUPLICATE; goto _skip; } - p = pfr_create_kentry(&ad); + p = pfr_create_kentry(&ad, 0); if (p == NULL) senderr(ENOMEM); if (pfr_route_kentry(tmpkt, p)) { @@ -669,14 +701,18 @@ pfr_validate_addr(struct pfr_addr *ad) int i; switch (ad->pfra_af) { +#ifdef INET case AF_INET: if (ad->pfra_net > 32) return (-1); break; +#endif /* INET */ +#ifdef INET6 case AF_INET6: if (ad->pfra_net > 128) return (-1); break; +#endif /* INET6 */ default: return (-1); } @@ -731,14 +767,14 @@ struct pfr_kentry * pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact) { union sockaddr_union sa, mask; - struct radix_node_head *head; + struct radix_node_head *head = NULL; struct pfr_kentry *ke; bzero(&sa, sizeof(sa)); if (ad->pfra_af == AF_INET) { FILLIN_SIN(sa.sin, ad->pfra_ip4addr); head = kt->pfrkt_ip4; - } else { + } else if ( ad->pfra_af == AF_INET6 ) { FILLIN_SIN6(sa.sin6, ad->pfra_ip6addr); head = kt->pfrkt_ip6; } @@ -761,22 +797,26 @@ pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact) } struct pfr_kentry * -pfr_create_kentry(struct pfr_addr *ad) +pfr_create_kentry(struct pfr_addr *ad, int intr) { struct pfr_kentry *ke; - ke = pool_get(&pfr_kentry_pl, PR_NOWAIT); + if (intr) + ke = pool_get(&pfr_kentry_pl2, PR_NOWAIT); + else + ke = pool_get(&pfr_kentry_pl, PR_NOWAIT); if (ke == NULL) return (NULL); bzero(ke, sizeof(*ke)); if (ad->pfra_af == AF_INET) FILLIN_SIN(ke->pfrke_sa.sin, ad->pfra_ip4addr); - else + else if (ad->pfra_af == AF_INET6) FILLIN_SIN6(ke->pfrke_sa.sin6, ad->pfra_ip6addr); ke->pfrke_af = ad->pfra_af; ke->pfrke_net = ad->pfra_net; ke->pfrke_not = ad->pfra_not; + ke->pfrke_intrpool = intr; return (ke); } @@ -794,7 +834,10 @@ pfr_destroy_kentries(struct pfr_kentryworkq *workq) void pfr_destroy_kentry(struct pfr_kentry *ke) { - pool_put(&pfr_kentry_pl, ke); + if (ke->pfrke_intrpool) + pool_put(&pfr_kentry_pl2, ke); + else + pool_put(&pfr_kentry_pl, ke); } void @@ -817,6 +860,29 @@ pfr_insert_kentries(struct pfr_ktable *kt, kt->pfrkt_cnt += n; } +int +pfr_insert_kentry(struct pfr_ktable *kt, struct pfr_addr *ad, long tzero) +{ + struct pfr_kentry *p; + int rv; + + p = pfr_lookup_addr(kt, ad, 1); + if (p != NULL) + return (0); + p = pfr_create_kentry(ad, 1); + if (p == NULL) + return (EINVAL); + + rv = pfr_route_kentry(kt, p); + if (rv) + return (rv); + + p->pfrke_tzero = tzero; + kt->pfrkt_cnt++; + + return (0); +} + void pfr_remove_kentries(struct pfr_ktable *kt, struct pfr_kentryworkq *workq) @@ -882,14 +948,14 @@ pfr_prepare_network(union sockaddr_union *sa, int af, int net) if (af == AF_INET) { sa->sin.sin_len = sizeof(sa->sin); sa->sin.sin_family = AF_INET; - sa->sin.sin_addr.s_addr = htonl(-1 << (32-net)); - } else { + sa->sin.sin_addr.s_addr = net ? htonl(-1 << (32-net)) : 0; + } else if (af == AF_INET6) { sa->sin6.sin6_len = sizeof(sa->sin6); sa->sin6.sin6_family = AF_INET6; for (i = 0; i < 4; i++) { if (net <= 32) { sa->sin6.sin6_addr.s6_addr32[i] = - htonl(-1 << (32-net)); + net ? htonl(-1 << (32-net)) : 0; break; } sa->sin6.sin6_addr.s6_addr32[i] = 0xFFFFFFFF; @@ -903,12 +969,12 @@ pfr_route_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke) { union sockaddr_union mask; struct radix_node *rn; - struct radix_node_head *head; + struct radix_node_head *head = NULL; bzero(ke->pfrke_node, sizeof(ke->pfrke_node)); if (ke->pfrke_af == AF_INET) head = kt->pfrkt_ip4; - else + else if (ke->pfrke_af == AF_INET6) head = kt->pfrkt_ip6; crit_enter(); @@ -929,11 +995,11 @@ pfr_unroute_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke) { union sockaddr_union mask; struct radix_node *rn; - struct radix_node_head *head; + struct radix_node_head *head = NULL; if (ke->pfrke_af == AF_INET) head = kt->pfrkt_ip4; - else + else if (ke->pfrke_af == AF_INET6) head = kt->pfrkt_ip6; crit_enter(); @@ -962,7 +1028,7 @@ pfr_copyout_addr(struct pfr_addr *ad, struct pfr_kentry *ke) ad->pfra_not = ke->pfrke_not; if (ad->pfra_af == AF_INET) ad->pfra_ip4addr = ke->pfrke_sa.sin.sin_addr; - else + else if (ad->pfra_af == AF_INET6) ad->pfra_ip6addr = ke->pfrke_sa.sin6.sin6_addr; } @@ -971,7 +1037,7 @@ pfr_walktree(struct radix_node *rn, void *arg) { struct pfr_kentry *ke = (struct pfr_kentry *)rn; struct pfr_walktree *w = arg; - int flags = w->pfrw_flags; + int flags = w->pfrw_flags; switch (w->pfrw_op) { case PFRW_MARK: @@ -1031,7 +1097,7 @@ pfr_walktree(struct radix_node *rn, void *arg) &ke->pfrke_sa, AF_INET); w->pfrw_dyn->pfid_mask4 = *SUNION2PF( &pfr_mask, AF_INET); - } else { + } else if (ke->pfrke_af == AF_INET6){ if (w->pfrw_dyn->pfid_acnt6++ > 0) break; pfr_prepare_network(&pfr_mask, AF_INET6, ke->pfrke_net); @@ -1053,6 +1119,8 @@ pfr_clr_tables(struct pfr_table *filter, int *ndel, int flags) int xdel = 0; ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY+PFR_FLAG_ALLRSETS); + if (pfr_fix_anchor(filter->pfrt_anchor)) + return (EINVAL); if (pfr_table_count(filter, flags) < 0) return (ENOENT); @@ -1114,7 +1182,6 @@ pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags) /* find or create root table */ bzero(key.pfrkt_anchor, sizeof(key.pfrkt_anchor)); - bzero(key.pfrkt_ruleset, sizeof(key.pfrkt_ruleset)); r = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); if (r != NULL) { p->pfrkt_root = r; @@ -1209,6 +1276,8 @@ pfr_get_tables(struct pfr_table *filter, struct pfr_table *tbl, int *size, int n, nn; ACCEPT_FLAGS(PFR_FLAG_ALLRSETS); + if (pfr_fix_anchor(filter->pfrt_anchor)) + return (EINVAL); n = nn = pfr_table_count(filter, flags); if (n < 0) return (ENOENT); @@ -1243,6 +1312,8 @@ pfr_get_tstats(struct pfr_table *filter, struct pfr_tstats *tbl, int *size, ACCEPT_FLAGS(PFR_FLAG_ATOMIC|PFR_FLAG_ALLRSETS); /* XXX PFR_FLAG_CLSTATS disabled */ + if (pfr_fix_anchor(filter->pfrt_anchor)) + return (EINVAL); n = nn = pfr_table_count(filter, flags); if (n < 0) return (ENOENT); @@ -1377,7 +1448,7 @@ pfr_ina_begin(struct pfr_table *trs, u_int32_t *ticket, int *ndel, int flags) int xdel = 0; ACCEPT_FLAGS(PFR_FLAG_DUMMY); - rs = pf_find_or_create_ruleset(trs->pfrt_anchor, trs->pfrt_ruleset); + rs = pf_find_or_create_ruleset(trs->pfrt_anchor); if (rs == NULL) return (ENOMEM); SLIST_INIT(&workq); @@ -1419,7 +1490,7 @@ pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size, if (pfr_validate_table(tbl, PFR_TFLAG_USRMASK, flags & PFR_FLAG_USERIOCTL)) return (EINVAL); - rs = pf_find_ruleset(tbl->pfrt_anchor, tbl->pfrt_ruleset); + rs = pf_find_ruleset(tbl->pfrt_anchor); if (rs == NULL || !rs->topen || ticket != rs->tticket) return (EBUSY); tbl->pfrt_flags |= PFR_TFLAG_INACTIVE; @@ -1465,7 +1536,7 @@ _skip: senderr(EINVAL); if (pfr_lookup_addr(shadow, &ad, 1) != NULL) continue; - p = pfr_create_kentry(&ad); + p = pfr_create_kentry(&ad, 0); if (p == NULL) senderr(ENOMEM); if (pfr_route_kentry(shadow, p)) { @@ -1510,7 +1581,7 @@ pfr_ina_rollback(struct pfr_table *trs, u_int32_t ticket, int *ndel, int flags) int xdel = 0; ACCEPT_FLAGS(PFR_FLAG_DUMMY); - rs = pf_find_ruleset(trs->pfrt_anchor, trs->pfrt_ruleset); + rs = pf_find_ruleset(trs->pfrt_anchor); if (rs == NULL || !rs->topen || ticket != rs->tticket) return (0); SLIST_INIT(&workq); @@ -1536,14 +1607,14 @@ int pfr_ina_commit(struct pfr_table *trs, u_int32_t ticket, int *nadd, int *nchange, int flags) { - struct pfr_ktable *p; + struct pfr_ktable *p, *q; struct pfr_ktableworkq workq; struct pf_ruleset *rs; int xadd = 0, xchange = 0; long tzero = time_second; ACCEPT_FLAGS(PFR_FLAG_ATOMIC+PFR_FLAG_DUMMY); - rs = pf_find_ruleset(trs->pfrt_anchor, trs->pfrt_ruleset); + rs = pf_find_ruleset(trs->pfrt_anchor); if (rs == NULL || !rs->topen || ticket != rs->tticket) return (EBUSY); @@ -1562,8 +1633,10 @@ pfr_ina_commit(struct pfr_table *trs, u_int32_t ticket, int *nadd, if (!(flags & PFR_FLAG_DUMMY)) { if (flags & PFR_FLAG_ATOMIC) crit_enter(); - SLIST_FOREACH(p, &workq, pfrkt_workq) + for (p = SLIST_FIRST(&workq); p != NULL; p = q) { + q = SLIST_NEXT(p, pfrkt_workq); pfr_commit_ktable(p, tzero); + } if (flags & PFR_FLAG_ATOMIC) crit_exit(); rs->topen = 0; @@ -1650,27 +1723,52 @@ pfr_validate_table(struct pfr_table *tbl, int allowedflags, int no_reserved) for (i = strlen(tbl->pfrt_name); i < PF_TABLE_NAME_SIZE; i++) if (tbl->pfrt_name[i]) return (-1); + if (pfr_fix_anchor(tbl->pfrt_anchor)) + return (-1); if (tbl->pfrt_flags & ~allowedflags) return (-1); return (0); } +/* + * Rewrite anchors referenced by tables to remove slashes + * and check for validity. + */ +int +pfr_fix_anchor(char *anchor) +{ + size_t siz = MAXPATHLEN; + int i; + + if (anchor[0] == '/') { + char *path; + int off; + + path = anchor; + off = 1; + while (*++path == '/') + off++; + bcopy(path, anchor, siz - off); + memset(anchor + siz - off, 0, off); + } + if (anchor[siz - 1]) + return (-1); + for (i = strlen(anchor); i < siz; i++) + if (anchor[i]) + return (-1); + return (0); +} + int pfr_table_count(struct pfr_table *filter, int flags) { struct pf_ruleset *rs; - struct pf_anchor *ac; if (flags & PFR_FLAG_ALLRSETS) return (pfr_ktable_cnt); - if (filter->pfrt_ruleset[0]) { - rs = pf_find_ruleset(filter->pfrt_anchor, - filter->pfrt_ruleset); - return ((rs != NULL) ? rs->tables : -1); - } if (filter->pfrt_anchor[0]) { - ac = pf_find_anchor(filter->pfrt_anchor); - return ((ac != NULL) ? ac->tables : -1); + rs = pf_find_ruleset(filter->pfrt_anchor); + return ((rs != NULL) ? rs->tables : -1); } return (pf_main_ruleset.tables); } @@ -1680,13 +1778,7 @@ pfr_skip_table(struct pfr_table *filter, struct pfr_ktable *kt, int flags) { if (flags & PFR_FLAG_ALLRSETS) return (0); - if (strncmp(filter->pfrt_anchor, kt->pfrkt_anchor, - PF_ANCHOR_NAME_SIZE)) - return (1); - if (!filter->pfrt_ruleset[0]) - return (0); - if (strncmp(filter->pfrt_ruleset, kt->pfrkt_ruleset, - PF_RULESET_NAME_SIZE)) + if (strcmp(filter->pfrt_anchor, kt->pfrkt_anchor)) return (1); return (0); } @@ -1714,10 +1806,12 @@ pfr_insert_ktable(struct pfr_ktable *kt) void pfr_setflags_ktables(struct pfr_ktableworkq *workq) { - struct pfr_ktable *p; + struct pfr_ktable *p, *q; - SLIST_FOREACH(p, workq, pfrkt_workq) + for (p = SLIST_FIRST(workq); p; p = q) { + q = SLIST_NEXT(p, pfrkt_workq); pfr_setflags_ktable(p, p->pfrkt_nflags); + } } void @@ -1791,16 +1885,13 @@ pfr_create_ktable(struct pfr_table *tbl, long tzero, int attachruleset) kt->pfrkt_t = *tbl; if (attachruleset) { - rs = pf_find_or_create_ruleset(tbl->pfrt_anchor, - tbl->pfrt_ruleset); + rs = pf_find_or_create_ruleset(tbl->pfrt_anchor); if (!rs) { pfr_destroy_ktable(kt, 0); return (NULL); } kt->pfrkt_rs = rs; rs->tables++; - if (rs->anchor != NULL) - rs->anchor->tables++; } if (!rn_inithead((void **)&kt->pfrkt_ip4, @@ -1844,8 +1935,6 @@ pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr) pfr_destroy_ktable(kt->pfrkt_shadow, flushaddr); if (kt->pfrkt_rs != NULL) { kt->pfrkt_rs->tables--; - if (kt->pfrkt_rs->anchor != NULL) - kt->pfrkt_rs->anchor->tables--; pf_remove_if_empty_ruleset(kt->pfrkt_rs); } pool_put(&pfr_ktable_pl, kt); @@ -1858,11 +1947,7 @@ pfr_ktable_compare(struct pfr_ktable *p, struct pfr_ktable *q) if ((d = strncmp(p->pfrkt_name, q->pfrkt_name, PF_TABLE_NAME_SIZE))) return (d); - if ((d = strncmp(p->pfrkt_anchor, q->pfrkt_anchor, - PF_ANCHOR_NAME_SIZE))) - return (d); - return (strncmp(p->pfrkt_ruleset, q->pfrkt_ruleset, - PF_RULESET_NAME_SIZE)); + return (strcmp(p->pfrkt_anchor, q->pfrkt_anchor)); } struct pfr_ktable * @@ -1885,6 +1970,7 @@ pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af) return (0); switch (af) { +#ifdef INET case AF_INET: pfr_sin.sin_addr.s_addr = a->addr32[0]; ke = (struct pfr_kentry *)rn_match((char *)&pfr_sin, @@ -1892,6 +1978,8 @@ pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af) if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; break; +#endif /* INET */ +#ifdef INET6 case AF_INET6: bcopy(a, &pfr_sin6.sin6_addr, sizeof(pfr_sin6.sin6_addr)); ke = (struct pfr_kentry *)rn_match((char *)&pfr_sin6, @@ -1899,6 +1987,7 @@ pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af) if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; break; +#endif /* INET6 */ } match = (ke && !ke->pfrke_not); if (match) @@ -1920,6 +2009,7 @@ pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af, return; switch (af) { +#ifdef INET case AF_INET: pfr_sin.sin_addr.s_addr = a->addr32[0]; ke = (struct pfr_kentry *)rn_match((char *)&pfr_sin, @@ -1927,6 +2017,8 @@ pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af, if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; break; +#endif /* INET */ +#ifdef INET6 case AF_INET6: bcopy(a, &pfr_sin6.sin6_addr, sizeof(pfr_sin6.sin6_addr)); ke = (struct pfr_kentry *)rn_match((char *)&pfr_sin6, @@ -1934,6 +2026,9 @@ pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af, if (ke && KENTRY_RNF_ROOT(ke)) ke = NULL; break; +#endif /* INET6 */ + default: + ; } if ((ke == NULL || ke->pfrke_not) != notrule) { if (op_pass != PFR_OP_PASS) @@ -1957,10 +2052,8 @@ pfr_attach_table(struct pf_ruleset *rs, char *name) bzero(&tbl, sizeof(tbl)); strlcpy(tbl.pfrt_name, name, sizeof(tbl.pfrt_name)); - if (ac != NULL) { - strlcpy(tbl.pfrt_anchor, ac->name, sizeof(tbl.pfrt_anchor)); - strlcpy(tbl.pfrt_ruleset, rs->name, sizeof(tbl.pfrt_ruleset)); - } + if (ac != NULL) + strlcpy(tbl.pfrt_anchor, ac->path, sizeof(tbl.pfrt_anchor)); kt = pfr_lookup_table(&tbl); if (kt == NULL) { kt = pfr_create_ktable(&tbl, time_second, 1); @@ -1968,7 +2061,6 @@ pfr_attach_table(struct pf_ruleset *rs, char *name) return (NULL); if (ac != NULL) { bzero(tbl.pfrt_anchor, sizeof(tbl.pfrt_anchor)); - bzero(tbl.pfrt_ruleset, sizeof(tbl.pfrt_ruleset)); rt = pfr_lookup_table(&tbl); if (rt == NULL) { rt = pfr_create_ktable(&tbl, 0, 1); @@ -2001,13 +2093,15 @@ int pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter, struct pf_addr **raddr, struct pf_addr **rmask, sa_family_t af) { - struct pfr_kentry *ke, *ke2; - struct pf_addr *addr; + struct pfr_kentry *ke, *ke2 = NULL; + struct pf_addr *addr = NULL; union sockaddr_union mask; int idx = -1, use_counter = 0; - addr = (af == AF_INET) ? (struct pf_addr *)&pfr_sin.sin_addr : - (struct pf_addr *)&pfr_sin6.sin6_addr; + if (af == AF_INET) + addr = (struct pf_addr *)&pfr_sin.sin_addr; + else if (af == AF_INET6) + addr = (struct pf_addr *)&pfr_sin6.sin6_addr; if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) kt = kt->pfrkt_root; if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) @@ -2050,9 +2144,12 @@ _next_block: } for (;;) { /* we don't want to use a nested block */ - ke2 = (struct pfr_kentry *)(af == AF_INET ? - rn_match((char *)&pfr_sin, kt->pfrkt_ip4) : - rn_match((char *)&pfr_sin6, kt->pfrkt_ip6)); + if (af == AF_INET) + ke2 = (struct pfr_kentry *)rn_match((char *)&pfr_sin, + kt->pfrkt_ip4); + else if (af == AF_INET6) + ke2 = (struct pfr_kentry *)rn_match((char *)&pfr_sin6, + kt->pfrkt_ip6); /* no need to check KENTRY_RNF_ROOT() here */ if (ke2 == ke) { /* lookup return the same block - perfect */ @@ -2085,12 +2182,16 @@ pfr_kentry_byidx(struct pfr_ktable *kt, int idx, int af) w.pfrw_cnt = idx; switch (af) { +#ifdef INET case AF_INET: kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w); return (w.pfrw_kentry); +#endif /* INET */ +#ifdef INET6 case AF_INET6: kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w); return (w.pfrw_kentry); +#endif /* INET6 */ default: return (NULL); } diff --git a/sys/net/pf/pfvar.h b/sys/net/pf/pfvar.h index 01a1229993..d187cd5c69 100644 --- a/sys/net/pf/pfvar.h +++ b/sys/net/pf/pfvar.h @@ -2,6 +2,7 @@ /* $OpenBSD: pfvar.h,v 1.187 2004/03/22 04:54:18 mcbride Exp $ */ /* add $OpenBSD: pfvar.h,v 1.194 2004/05/11 07:34:11 dhartmei Exp $ */ /* $DragonFly: src/sys/net/pf/pfvar.h,v 1.8 2008/04/11 18:21:48 dillon Exp $ */ +/* $OpenBSD: pfvar.h,v 1.244 2007/02/23 21:31:51 deraadt Exp $ */ /* * Copyright (c) 2004 The DragonFly Project. All rights reserved. @@ -38,14 +39,18 @@ #ifndef _NET_PFVAR_H_ #define _NET_PFVAR_H_ +#include #include +#include #include #include #include +#include #include #include #include +#include #ifdef _KERNEL #include @@ -65,13 +70,30 @@ union sockaddr_union { #include struct ip; +struct ip6_hdr; #define PF_TCPS_PROXY_SRC ((TCP_NSTATES)+0) #define PF_TCPS_PROXY_DST ((TCP_NSTATES)+1) + +#define RTLABEL_LEN 32 +#define IFG_ALL "all" /* group contains all interfaces */ +#define BPF_DIRECTION_OUT (1<<1) +#define PWAIT 0 +#define RT_NUMFIBS 1 +#define ALTQ_IS_ENABLED(ifq) ((ifq)->altq_flags & ALTQF_ENABLED) + + +#define PF_MD5_DIGEST_LENGTH 16 +#ifdef MD5_DIGEST_LENGTH +#if PF_MD5_DIGEST_LENGTH != MD5_DIGEST_LENGTH +#error +#endif +#endif + enum { PF_INOUT, PF_IN, PF_OUT }; enum { PF_LAN_EXT, PF_EXT_GWY, PF_ID }; -enum { PF_PASS, PF_DROP, PF_SCRUB, PF_NAT, PF_NONAT, +enum { PF_PASS, PF_DROP, PF_SCRUB, PF_NOSCRUB, PF_NAT, PF_NONAT, PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP }; enum { PF_RULESET_SCRUB, PF_RULESET_FILTER, PF_RULESET_NAT, PF_RULESET_BINAT, PF_RULESET_RDR, PF_RULESET_MAX }; @@ -81,6 +103,8 @@ enum { PF_DEBUG_NONE, PF_DEBUG_URGENT, PF_DEBUG_MISC, PF_DEBUG_NOISY }; enum { PF_CHANGE_NONE, PF_CHANGE_ADD_HEAD, PF_CHANGE_ADD_TAIL, PF_CHANGE_ADD_BEFORE, PF_CHANGE_ADD_AFTER, PF_CHANGE_REMOVE, PF_CHANGE_GET_TICKET }; +enum { PF_GET_NONE, PF_GET_CLR_CNTR }; + /* * Note about PFTM_*: real indices into pf_rule.timeout[] come before * PFTM_MAX, special cases afterwards. See pf_state_expires(). @@ -92,19 +116,46 @@ enum { PFTM_TCP_FIRST_PACKET, PFTM_TCP_OPENING, PFTM_TCP_ESTABLISHED, PFTM_OTHER_FIRST_PACKET, PFTM_OTHER_SINGLE, PFTM_OTHER_MULTIPLE, PFTM_FRAG, PFTM_INTERVAL, PFTM_ADAPTIVE_START, PFTM_ADAPTIVE_END, PFTM_SRC_NODE, - PFTM_MAX, PFTM_PURGE, PFTM_UNTIL_PACKET }; + PFTM_TS_DIFF, PFTM_MAX, PFTM_PURGE, PFTM_UNLINKED, + PFTM_UNTIL_PACKET }; + +/* PFTM default values */ +#define PFTM_TCP_FIRST_PACKET_VAL 120 /* First TCP packet */ +#define PFTM_TCP_OPENING_VAL 30 /* No response yet */ +#define PFTM_TCP_ESTABLISHED_VAL 24*60*60/* Established */ +#define PFTM_TCP_CLOSING_VAL 15 * 60 /* Half closed */ +#define PFTM_TCP_FIN_WAIT_VAL 45 /* Got both FINs */ +#define PFTM_TCP_CLOSED_VAL 90 /* Got a RST */ +#define PFTM_UDP_FIRST_PACKET_VAL 60 /* First UDP packet */ +#define PFTM_UDP_SINGLE_VAL 30 /* Unidirectional */ +#define PFTM_UDP_MULTIPLE_VAL 60 /* Bidirectional */ +#define PFTM_ICMP_FIRST_PACKET_VAL 20 /* First ICMP packet */ +#define PFTM_ICMP_ERROR_REPLY_VAL 10 /* Got error response */ +#define PFTM_OTHER_FIRST_PACKET_VAL 60 /* First packet */ +#define PFTM_OTHER_SINGLE_VAL 30 /* Unidirectional */ +#define PFTM_OTHER_MULTIPLE_VAL 60 /* Bidirectional */ +#define PFTM_FRAG_VAL 30 /* Fragment expire */ +#define PFTM_INTERVAL_VAL 10 /* Expire interval */ +#define PFTM_SRC_NODE_VAL 0 /* Source tracking */ +#define PFTM_TS_DIFF_VAL 30 /* Allowed TS diff */ + enum { PF_NOPFROUTE, PF_FASTROUTE, PF_ROUTETO, PF_DUPTO, PF_REPLYTO }; -enum { PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS, PF_LIMIT_MAX }; +enum { PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS, + PF_LIMIT_TABLES, PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_MAX }; #define PF_POOL_IDMASK 0x0f enum { PF_POOL_NONE, PF_POOL_BITMASK, PF_POOL_RANDOM, PF_POOL_SRCHASH, PF_POOL_ROUNDROBIN }; enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL, - PF_ADDR_TABLE }; + PF_ADDR_TABLE, PF_ADDR_RTLABEL, PF_ADDR_URPFFAILED }; #define PF_POOL_TYPEMASK 0x0f #define PF_POOL_STICKYADDR 0x20 #define PF_WSCALE_FLAG 0x80 #define PF_WSCALE_MASK 0x0f +#define PF_LOG 0x01 +#define PF_LOG_ALL 0x02 +#define PF_LOG_SOCKET_LOOKUP 0x04 + struct pf_addr { union { struct in_addr v4; @@ -136,6 +187,8 @@ struct pf_addr_wrap { } a; char ifname[IFNAMSIZ]; char tblname[PF_TABLE_NAME_SIZE]; + char rtlabelname[RTLABEL_LEN]; + u_int32_t rtlabel; } v; union { struct pfi_dynaddr *dyn; @@ -150,18 +203,19 @@ struct pf_addr_wrap { #ifdef _KERNEL struct pfi_dynaddr { - struct pf_addr pfid_addr4; - struct pf_addr pfid_mask4; - struct pf_addr pfid_addr6; - struct pf_addr pfid_mask6; - struct pfr_ktable *pfid_kt; - struct pfi_kif *pfid_kif; - void *pfid_hook_cookie; - int pfid_net; /* optional mask, or 128 */ - int pfid_acnt4; /* address count, IPv4 */ - int pfid_acnt6; /* address count, IPv6 */ - sa_family_t pfid_af; /* rule address family */ - u_int8_t pfid_iflags; /* PFI_AFLAG_* */ + TAILQ_ENTRY(pfi_dynaddr) entry; + struct pf_addr pfid_addr4; + struct pf_addr pfid_mask4; + struct pf_addr pfid_addr6; + struct pf_addr pfid_mask6; + struct pfr_ktable *pfid_kt; + struct pfi_kif *pfid_kif; + void *pfid_hook_cookie; + int pfid_net; /* mask or 128 */ + int pfid_acnt4; /* address count IPv4 */ + int pfid_acnt6; /* address count IPv6 */ + sa_family_t pfid_af; /* rule af */ + u_int8_t pfid_iflags; /* PFI_AFLAG_* */ }; /* @@ -177,6 +231,9 @@ struct pfi_dynaddr { #define pool_get(p, f) zalloc(*(p)) #define pool_put(p, o) zfree(*(p), (o)) +#define NTOHS(x) (x) = ntohs((__uint16_t)(x)) +#define HTONS(x) (x) = htons((__uint16_t)(x)) + #define PF_NAME "pf" #define PF_MODVER 1 @@ -334,21 +391,26 @@ void dohooks(struct hook_desc_head *, int); #endif /* PF_INET6_ONLY */ #endif /* PF_INET_INET6 */ -#define PF_MISMATCHAW(aw, x, af, not) \ - ( \ - (((aw)->type == PF_ADDR_NOROUTE && \ - pf_routable((x), (af))) || \ - ((aw)->type == PF_ADDR_TABLE && \ - !pfr_match_addr((aw)->p.tbl, (x), (af))) || \ - ((aw)->type == PF_ADDR_DYNIFTL && \ - !pfi_match_addr((aw)->p.dyn, (x), (af))) || \ - ((aw)->type == PF_ADDR_ADDRMASK && \ - !PF_AZERO(&(aw)->v.a.mask, (af)) && \ - !PF_MATCHA(0, &(aw)->v.a.addr, \ - &(aw)->v.a.mask, (x), (af)))) != \ - (not) \ +#define PF_MISMATCHAW(aw, x, af, neg, ifp) \ + ( \ + (((aw)->type == PF_ADDR_NOROUTE && \ + pf_routable((x), (af), NULL)) || \ + (((aw)->type == PF_ADDR_URPFFAILED && (ifp) != NULL && \ + pf_routable((x), (af), (ifp))) || \ + ((aw)->type == PF_ADDR_RTLABEL && \ + !pf_rtlabel_match((x), (af), (aw))) || \ + ((aw)->type == PF_ADDR_TABLE && \ + !pfr_match_addr((aw)->p.tbl, (x), (af))) || \ + ((aw)->type == PF_ADDR_DYNIFTL && \ + !pfi_match_addr((aw)->p.dyn, (x), (af))) || \ + ((aw)->type == PF_ADDR_ADDRMASK && \ + !PF_AZERO(&(aw)->v.a.mask, (af)) && \ + !PF_MATCHA(0, &(aw)->v.a.addr, \ + &(aw)->v.a.mask, (x), (af))))) != \ + (neg) \ ) + struct pf_rule_uid { uid_t uid[2]; u_int8_t op; @@ -362,7 +424,7 @@ struct pf_rule_gid { struct pf_rule_addr { struct pf_addr_wrap addr; u_int16_t port[2]; - u_int8_t not; + u_int8_t neg; u_int8_t port_op; }; @@ -466,6 +528,7 @@ struct pf_os_fingerprint { #define PF_OSFP_MSS_DC 0x0800 /* TCP MSS dont-care */ #define PF_OSFP_DF 0x1000 /* IPv4 don't fragment bit */ #define PF_OSFP_TS0 0x2000 /* Zero timestamp */ +#define PF_OSFP_INET6 0x4000 /* IPv6 */ u_int8_t fp_optcnt; /* TCP option count */ u_int8_t fp_wscale; /* TCP window scaling */ u_int8_t fp_ttl; /* IPv4 TTL */ @@ -504,6 +567,8 @@ union pf_rule_ptr { u_int32_t nr; }; +#define PF_ANCHOR_NAME_SIZE 64 + struct pf_rule { struct pf_rule_addr src; struct pf_rule_addr dst; @@ -519,39 +584,48 @@ struct pf_rule { union pf_rule_ptr skip[PF_SKIP_COUNT]; #define PF_RULE_LABEL_SIZE 64 char label[PF_RULE_LABEL_SIZE]; -#define PF_QNAME_SIZE 16 +#define PF_QNAME_SIZE 64 char ifname[IFNAMSIZ]; char qname[PF_QNAME_SIZE]; char pqname[PF_QNAME_SIZE]; -#define PF_ANCHOR_NAME_SIZE 16 - char anchorname[PF_ANCHOR_NAME_SIZE]; -#define PF_TAG_NAME_SIZE 16 +#define PF_TAG_NAME_SIZE 64 char tagname[PF_TAG_NAME_SIZE]; char match_tagname[PF_TAG_NAME_SIZE]; + char overload_tblname[PF_TABLE_NAME_SIZE]; + TAILQ_ENTRY(pf_rule) entries; struct pf_pool rpool; u_int64_t evaluations; - u_int64_t packets; - u_int64_t bytes; + u_int64_t packets[2]; + u_int64_t bytes[2]; struct pfi_kif *kif; struct pf_anchor *anchor; + struct pfr_ktable *overload_tbl; pf_osfp_t os_fingerprint; + int rtableid; u_int32_t timeout[PFTM_MAX]; u_int32_t states; u_int32_t max_states; u_int32_t src_nodes; u_int32_t max_src_nodes; u_int32_t max_src_states; + u_int32_t max_src_conn; + struct { + u_int32_t limit; + u_int32_t seconds; + } max_src_conn_rate; u_int32_t qid; u_int32_t pqid; u_int32_t rt_listid; u_int32_t nr; u_int32_t prob; + uid_t cuid; + pid_t cpid; u_int16_t return_icmp; u_int16_t return_icmp6; @@ -566,6 +640,7 @@ struct pf_rule { u_int8_t action; u_int8_t direction; u_int8_t log; + u_int8_t logif; u_int8_t quick; u_int8_t ifnot; u_int8_t match_tag_not; @@ -586,6 +661,13 @@ struct pf_rule { u_int8_t rt; u_int8_t return_ttl; u_int8_t tos; + u_int8_t anchor_relative; + u_int8_t anchor_wildcard; + +#define PF_FLUSH 0x01 +#define PF_FLUSH_GLOBAL 0x02 + u_int8_t flush; + #define PF_PICKUPS_UNSPECIFIED 0 #define PF_PICKUPS_DISABLED 1 #define PF_PICKUPS_HASHONLY 2 @@ -613,9 +695,20 @@ struct pf_rule { /* rule flags again */ #define PFRULE_IFBOUND 0x00010000 /* if-bound */ -#define PFRULE_GRBOUND 0x00020000 /* group-bound */ #define PFSTATE_HIWAT 10000 /* default state table size */ +#define PFSTATE_ADAPT_START 6000 /* default adaptive timeout start */ +#define PFSTATE_ADAPT_END 12000 /* default adaptive timeout end */ + + +struct pf_threshold { + u_int32_t limit; +#define PF_THRESHOLD_MULT 1000 +#define PF_THRESHOLD_MAX 0xffffffff / PF_THRESHOLD_MULT + u_int32_t seconds; + u_int32_t count; + u_int32_t last; +}; struct pf_src_node { RB_ENTRY(pf_src_node) entry; @@ -623,9 +716,11 @@ struct pf_src_node { struct pf_addr raddr; union pf_rule_ptr rule; struct pfi_kif *kif; - u_int32_t bytes; - u_int32_t packets; + u_int64_t bytes[2]; + u_int64_t packets[2]; u_int32_t states; + u_int32_t conn; + struct pf_threshold conn_rate; u_int32_t creation; u_int32_t expire; sa_family_t af; @@ -635,11 +730,19 @@ struct pf_src_node { #define PFSNODE_HIWAT 10000 /* default source node table size */ struct pf_state_scrub { + struct timeval pfss_last; /* time received last packet */ + u_int32_t pfss_tsecr; /* last echoed timestamp */ + u_int32_t pfss_tsval; /* largest timestamp */ + u_int32_t pfss_tsval0; /* original timestamp */ u_int16_t pfss_flags; -#define PFSS_TIMESTAMP 0x0001 /* modulate timestamp */ - u_int8_t pfss_ttl; /* stashed TTL */ +#define PFSS_TIMESTAMP 0x0001 /* modulate timestamp */ +#define PFSS_PAWS 0x0010 /* stricter PAWS checks */ +#define PFSS_PAWS_IDLED 0x0020 /* was idle too long. no PAWS */ +#define PFSS_DATA_TS 0x0040 /* timestamp on data packets */ +#define PFSS_DATA_NOTS 0x0080 /* no timestamp on data packets */ + u_int8_t pfss_ttl; /* stashed TTL */ u_int8_t pad; - u_int32_t pfss_ts_mod; /* timestamp modulation */ + u_int32_t pfss_ts_mod; /* timestamp modulation */ }; struct pf_state_host { @@ -656,26 +759,56 @@ struct pf_state_peer { u_int8_t state; /* active state level */ u_int8_t wscale; /* window scaling factor */ u_int16_t mss; /* Maximum segment size option */ + u_int8_t tcp_est; /* Did we reach TCPS_ESTABLISHED */ struct pf_state_scrub *scrub; /* state is scrubbed */ + u_int8_t pad[3]; }; TAILQ_HEAD(pf_state_queue, pf_state); +/* keep synced with struct pf_state, used in RB_FIND */ +struct pf_state_cmp { + u_int64_t id; + u_int32_t creatorid; + struct pf_state_host lan; + struct pf_state_host gwy; + struct pf_state_host ext; + sa_family_t af; + u_int8_t proto; + u_int8_t direction; + u_int8_t pad; +}; + struct pf_state { u_int64_t id; + u_int32_t creatorid; + struct pf_state_host lan; + struct pf_state_host gwy; + struct pf_state_host ext; + sa_family_t af; + u_int8_t proto; + u_int8_t direction; + u_int8_t pad; + u_int8_t log; + u_int8_t allow_opts; + u_int8_t timeout; + u_int8_t sync_flags; + u_int8_t pickup_mode; +#define PFSTATE_NOSYNC 0x01 +#define PFSTATE_FROMSYNC 0x02 +#define PFSTATE_STALE 0x04 +#define PFSTATE_GOT_SYN1 0x04 /* got SYN in one direction */ +#define PFSTATE_GOT_SYN2 0x08 /* got SYN in the other direction */ union { struct { RB_ENTRY(pf_state) entry_lan_ext; RB_ENTRY(pf_state) entry_ext_gwy; RB_ENTRY(pf_state) entry_id; - TAILQ_ENTRY(pf_state) entry_updates; + TAILQ_ENTRY(pf_state) entry_list; struct pfi_kif *kif; } s; char ifname[IFNAMSIZ]; } u; - struct pf_state_host lan; - struct pf_state_host gwy; - struct pf_state_host ext; struct pf_state_peer src; struct pf_state_peer dst; union pf_rule_ptr rule; @@ -685,26 +818,13 @@ struct pf_state { struct pfi_kif *rt_kif; struct pf_src_node *src_node; struct pf_src_node *nat_src_node; + u_int64_t packets[2]; + u_int64_t bytes[2]; u_int32_t hash; u_int32_t creation; u_int32_t expire; u_int32_t pfsync_time; - u_int32_t packets[2]; - u_int32_t bytes[2]; - u_int32_t creatorid; - sa_family_t af; - u_int8_t proto; - u_int8_t direction; - u_int8_t log; - u_int8_t allow_opts; - u_int8_t timeout; - u_int8_t sync_flags; -#define PFSTATE_NOSYNC 0x01 -#define PFSTATE_FROMSYNC 0x02 -#define PFSTATE_GOT_SYN1 0x04 /* got SYN in one direction */ -#define PFSTATE_GOT_SYN2 0x08 /* got SYN in the other direction */ - u_int8_t pickup_mode; - u_int8_t pad; + u_int16_t tag; }; #define PFSTATE_GOT_SYN_MASK (PFSTATE_GOT_SYN1|PFSTATE_GOT_SYN2) @@ -714,13 +834,12 @@ TAILQ_HEAD(pf_rulequeue, pf_rule); struct pf_anchor; struct pf_ruleset { - TAILQ_ENTRY(pf_ruleset) entries; -#define PF_RULESET_NAME_SIZE 16 - char name[PF_RULESET_NAME_SIZE]; struct { struct pf_rulequeue queues[2]; struct { struct pf_rulequeue *ptr; + struct pf_rule **ptr_array; + u_int32_t rcount; u_int32_t ticket; int open; } active, inactive; @@ -731,19 +850,23 @@ struct pf_ruleset { int topen; }; -TAILQ_HEAD(pf_rulesetqueue, pf_ruleset); - +RB_HEAD(pf_anchor_global, pf_anchor); +RB_HEAD(pf_anchor_node, pf_anchor); struct pf_anchor { - TAILQ_ENTRY(pf_anchor) entries; + RB_ENTRY(pf_anchor) entry_global; + RB_ENTRY(pf_anchor) entry_node; + struct pf_anchor *parent; + struct pf_anchor_node children; char name[PF_ANCHOR_NAME_SIZE]; - struct pf_rulesetqueue rulesets; - int tables; + char path[MAXPATHLEN]; + struct pf_ruleset ruleset; + int refcnt; /* anchor rules */ + int match; }; - -TAILQ_HEAD(pf_anchorqueue, pf_anchor); +RB_PROTOTYPE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare); +RB_PROTOTYPE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare); #define PF_RESERVED_ANCHOR "_pf" -#define PF_INTERFACE_RULESET "_if" #define PFR_TFLAG_PERSIST 0x00000001 #define PFR_TFLAG_CONST 0x00000002 @@ -756,8 +879,7 @@ TAILQ_HEAD(pf_anchorqueue, pf_anchor); #define PFR_TFLAG_ALLMASK 0x0000003F struct pfr_table { - char pfrt_anchor[PF_ANCHOR_NAME_SIZE]; - char pfrt_ruleset[PF_RULESET_NAME_SIZE]; + char pfrt_anchor[MAXPATHLEN]; char pfrt_name[PF_TABLE_NAME_SIZE]; u_int32_t pfrt_flags; u_int8_t pfrt_fback; @@ -818,6 +940,7 @@ struct pfr_kentry { u_int8_t pfrke_net; u_int8_t pfrke_not; u_int8_t pfrke_mark; + u_int8_t pfrke_intrpool; }; SLIST_HEAD(pfr_ktableworkq, pfr_ktable); @@ -870,9 +993,19 @@ struct pfi_if { TAILQ_HEAD(pfi_grouphead, pfi_kif); TAILQ_HEAD(pfi_statehead, pfi_kif); RB_HEAD(pfi_ifhead, pfi_kif); + +/* keep synced with pfi_kif, used in RB_FIND */ +struct pfi_kif_cmp { + char pfik_ifname[IFNAMSIZ]; +}; + struct pfi_kif { struct pfi_if pfik_if; RB_ENTRY(pfi_kif) pfik_tree; + u_int64_t pfik_packets[2][2][2]; + u_int64_t pfik_bytes[2][2][2]; + u_int32_t pfik_tzero; + int pfik_flags; struct pf_state_tree_lan_ext pfik_lan_ext; struct pf_state_tree_ext_gwy pfik_ext_gwy; struct pfi_grouphead pfik_grouphead; @@ -882,8 +1015,10 @@ struct pfi_kif { void *pfik_ah_cookie; struct pfi_kif *pfik_parent; struct ifnet *pfik_ifp; + struct ifg_group *pfik_group; int pfik_states; int pfik_rules; + TAILQ_HEAD(, pfi_dynaddr) pfik_dynaddrs; }; #define pfik_name pfik_if.pfif_name #define pfik_packets pfik_if.pfif_packets @@ -895,14 +1030,26 @@ struct pfi_kif { #define pfik_states pfik_if.pfif_states #define pfik_rules pfik_if.pfif_rules +enum pfi_kif_refs { + PFI_KIF_REF_NONE, + PFI_KIF_REF_STATE, + PFI_KIF_REF_RULE +}; + #define PFI_IFLAG_GROUP 0x0001 /* group of interfaces */ #define PFI_IFLAG_INSTANCE 0x0002 /* single instance */ #define PFI_IFLAG_CLONABLE 0x0010 /* clonable group */ #define PFI_IFLAG_DYNAMIC 0x0020 /* dynamic group */ -#define PFI_IFLAG_ATTACHED 0x0040 /* interface attached */ +#define PFI_IFLAG_SKIP 0x0100 /* skip filtering on interface */ #define PFI_IFLAG_PLACEHOLDER 0x8000 /* placeholder group/interface */ struct pf_pdesc { + struct { + int done; + uid_t uid; + gid_t gid; + pid_t pid; + } lookup; u_int64_t tot_len; /* Make Mickey money */ union { struct tcphdr *tcp; @@ -918,11 +1065,15 @@ struct pf_pdesc { struct pf_rule *nat_rule; /* nat/rdr rule applied to packet */ struct pf_addr *src; struct pf_addr *dst; + struct ether_header + *eh; + struct pf_mtag *pf_mtag; u_int16_t *ip_sum; u_int32_t p_len; /* total length of payload */ u_int16_t flags; /* Let SCRUB trigger behavior in * state code. Easier than tags */ #define PFDESC_TCP_NORM 0x0001 /* TCP shall be statefully scrubbed */ +#define PFDESC_IP_REAS 0x0002 /* IP frags would've been reassembled */ sa_family_t af; u_int8_t proto; u_int8_t tos; @@ -939,7 +1090,16 @@ struct pf_pdesc { #define PFRES_SHORT 3 /* Dropping short packet */ #define PFRES_NORM 4 /* Dropping by normalizer */ #define PFRES_MEMORY 5 /* Dropped due to lacking mem */ -#define PFRES_MAX 6 /* total+1 */ +#define PFRES_TS 6 /* Bad TCP Timestamp (RFC1323) */ +#define PFRES_CONGEST 7 /* Congestion (of ipintrq) */ +#define PFRES_IPOPTIONS 8 /* IP option */ +#define PFRES_PROTCKSUM 9 /* Protocol checksum invalid */ +#define PFRES_BADSTATE 10 /* State mismatch */ +#define PFRES_STATEINS 11 /* State insertion failure */ +#define PFRES_MAXSTATES 12 /* State limit */ +#define PFRES_SRCLIMIT 13 /* Source node/conn limit */ +#define PFRES_SYNPROXY 14 /* SYN proxy */ +#define PFRES_MAX 15 /* total+1 */ #define PFRES_NAMES { \ "match", \ @@ -948,6 +1108,36 @@ struct pf_pdesc { "short", \ "normalize", \ "memory", \ + "bad-timestamp", \ + "congestion", \ + "ip-option", \ + "proto-cksum", \ + "state-mismatch", \ + "state-insert", \ + "state-limit", \ + "src-limit", \ + "synproxy", \ + NULL \ +} + +/* Counters for other things we want to keep track of */ +#define LCNT_STATES 0 /* states */ +#define LCNT_SRCSTATES 1 /* max-src-states */ +#define LCNT_SRCNODES 2 /* max-src-nodes */ +#define LCNT_SRCCONN 3 /* max-src-conn */ +#define LCNT_SRCCONNRATE 4 /* max-src-conn-rate */ +#define LCNT_OVERLOAD_TABLE 5 /* entry added to overload table */ +#define LCNT_OVERLOAD_FLUSH 6 /* state entries flushed */ +#define LCNT_MAX 7 /* total+1 */ + +#define LCNT_NAMES { \ + "max states per rule", \ + "max-src-states", \ + "max-src-nodes", \ + "max-src-conn", \ + "max-src-conn-rate", \ + "overload table insertion", \ + "overload flush states", \ NULL \ } @@ -1005,6 +1195,7 @@ struct pf_pdesc { struct pf_status { u_int64_t counters[PFRES_MAX]; + u_int64_t lcounters[LCNT_MAX]; /* limit counters */ u_int64_t fcounters[FCNT_MAX]; u_int64_t scounters[SCNT_MAX]; u_int64_t pcounters[2][2][3]; @@ -1017,6 +1208,7 @@ struct pf_status { u_int32_t debug; u_int32_t hostid; char ifname[IFNAMSIZ]; + u_int8_t pf_chksum[PF_MD5_DIGEST_LENGTH]; }; struct cbq_opts { @@ -1094,6 +1286,26 @@ struct pf_altq { u_int32_t qid; /* return value */ }; +#define PF_TAG_GENERATED 0x01 +#define PF_TAG_FRAGCACHE 0x02 +#define PF_TAG_TRANSLATE_LOCALHOST 0x04 +#define PF_TAG_STATE_HASHED 0x08 + +struct pf_mtag { + void *hdr; /* saved hdr pos in mbuf, for ECN */ + u_int rtableid; /* alternate routing table id */ + u_int32_t qid; /* queue id */ + u_int16_t tag; /* tag id */ + u_int8_t flags; + u_int8_t routed; + sa_family_t af; /* for ECN */ + u_int32_t state_hash; /* for fairq */ +}; + +struct pf_tag { + u_int16_t tag; /* tag id */ +}; + struct pf_tagname { TAILQ_ENTRY(pf_tagname) entries; char name[PF_TAG_NAME_SIZE]; @@ -1106,6 +1318,10 @@ struct pf_tagname { #define PFFRAG_FRCENT_HIWAT 50000 /* Number of fragment cache entries */ #define PFFRAG_FRCACHE_HIWAT 10000 /* Number of fragment descriptors */ +#define PFR_KTABLE_HIWAT 1000 /* Number of tables */ +#define PFR_KENTRY_HIWAT 200000 /* Number of table entries */ +#define PFR_KENTRY_HIWAT_SMALL 100000 /* Number of table entries (tiny hosts) */ + /* * ioctl parameter structures */ @@ -1118,8 +1334,7 @@ struct pfioc_pooladdr { u_int8_t r_action; u_int8_t r_last; u_int8_t af; - char anchor[PF_ANCHOR_NAME_SIZE]; - char ruleset[PF_RULESET_NAME_SIZE]; + char anchor[MAXPATHLEN]; struct pf_pooladdr addr; }; @@ -1128,8 +1343,8 @@ struct pfioc_rule { u_int32_t ticket; u_int32_t pool_ticket; u_int32_t nr; - char anchor[PF_ANCHOR_NAME_SIZE]; - char ruleset[PF_RULESET_NAME_SIZE]; + char anchor[MAXPATHLEN]; + char anchor_call[MAXPATHLEN]; struct pf_rule rule; }; @@ -1152,6 +1367,13 @@ struct pfioc_state { struct pf_state state; }; +struct pfioc_src_node_kill { + /* XXX returns the number of src nodes killed in psnk_af */ + sa_family_t psnk_af; + struct pf_rule_addr psnk_src; + struct pf_rule_addr psnk_dst; +}; + struct pfioc_state_kill { /* XXX returns the number of states killed in psk_af */ sa_family_t psk_af; @@ -1217,8 +1439,8 @@ struct pfioc_anchor { struct pfioc_ruleset { u_int32_t nr; - char anchor[PF_ANCHOR_NAME_SIZE]; - char name[PF_RULESET_NAME_SIZE]; + char path[MAXPATHLEN]; + char name[PF_ANCHOR_NAME_SIZE]; }; #define PF_RULESET_ALTQ (PF_RULESET_MAX) @@ -1228,8 +1450,7 @@ struct pfioc_trans { int esize; /* size of each element in bytes */ struct pfioc_trans_e { int rs_num; - char anchor[PF_ANCHOR_NAME_SIZE]; - char ruleset[PF_RULESET_NAME_SIZE]; + char anchor[MAXPATHLEN]; u_int32_t ticket; } *array; }; @@ -1265,11 +1486,6 @@ struct pfioc_table { #define pfrio_setflag pfrio_size2 #define pfrio_clrflag pfrio_nadd - -#define PFI_FLAG_GROUP 0x0001 /* gets groups of interfaces */ -#define PFI_FLAG_INSTANCE 0x0002 /* gets single interfaces */ -#define PFI_FLAG_ALLMASK 0x0003 - struct pfioc_iface { char pfiio_name[IFNAMSIZ]; void *pfiio_buffer; @@ -1323,8 +1539,6 @@ struct pfioc_iface { #define DIOCGETADDRS _IOWR('D', 53, struct pfioc_pooladdr) #define DIOCGETADDR _IOWR('D', 54, struct pfioc_pooladdr) #define DIOCCHANGEADDR _IOWR('D', 55, struct pfioc_pooladdr) -#define DIOCGETANCHORS _IOWR('D', 56, struct pfioc_anchor) -#define DIOCGETANCHOR _IOWR('D', 57, struct pfioc_anchor) #define DIOCGETRULESETS _IOWR('D', 58, struct pfioc_ruleset) #define DIOCGETRULESET _IOWR('D', 59, struct pfioc_ruleset) #define DIOCRCLRTABLES _IOWR('D', 60, struct pfioc_table) @@ -1356,6 +1570,9 @@ struct pfioc_iface { #define DIOCSETHOSTID _IOWR('D', 86, u_int32_t) #define DIOCIGETIFACES _IOWR('D', 87, struct pfioc_iface) #define DIOCICLRISTATS _IOWR('D', 88, struct pfioc_iface) +#define DIOCSETIFFLAG _IOWR('D', 89, struct pfioc_iface) +#define DIOCCLRIFFLAG _IOWR('D', 90, struct pfioc_iface) +#define DIOCKILLSRCNODES _IOWR('D', 91, struct pfioc_src_node_kill) struct pf_ifspeed { char ifname[IFNAMSIZ]; u_int32_t baudrate; @@ -1371,16 +1588,13 @@ RB_HEAD(pf_state_tree_id, pf_state); RB_PROTOTYPE(pf_state_tree_id, pf_state, entry_id, pf_state_compare_id); extern struct pf_state_tree_id tree_id; -extern struct pf_state_queue state_updates; +extern struct pf_state_queue state_list; -extern struct pf_anchorqueue pf_anchors; -extern struct pf_ruleset pf_main_ruleset; TAILQ_HEAD(pf_poolqueue, pf_pool); extern struct pf_poolqueue pf_pools[2]; TAILQ_HEAD(pf_altqqueue, pf_altq); extern struct pf_altqqueue pf_altqs[2]; extern struct pf_palist pf_pabuf; -extern struct pfi_kif **pfi_index2kif; extern u_int32_t ticket_altqs_active; extern u_int32_t ticket_altqs_inactive; @@ -1395,16 +1609,17 @@ extern int pf_tbladdr_setup(struct pf_ruleset *, extern void pf_tbladdr_remove(struct pf_addr_wrap *); extern void pf_tbladdr_copyout(struct pf_addr_wrap *); extern void pf_calc_skip_steps(struct pf_rulequeue *); -extern void pf_update_anchor_rules(void); extern vm_zone_t pf_src_tree_pl, pf_rule_pl; extern vm_zone_t pf_state_pl, pf_altq_pl, pf_pooladdr_pl; extern vm_zone_t pfr_ktable_pl, pfr_kentry_pl; extern vm_zone_t pf_cache_pl, pf_cent_pl; extern vm_zone_t pf_state_scrub_pl; extern vm_zone_t pfi_addr_pl; -extern void pf_purge_timeout(void *); -extern void pf_purge_expired_src_nodes(void); -extern void pf_purge_expired_states(void); +extern void pf_purge_thread(void *); +extern int pf_purge_expired_src_nodes(int); +extern int pf_purge_expired_states(u_int32_t, int); +extern void pf_unlink_state(struct pf_state *); +extern void pf_free_state(struct pf_state *); extern int pf_insert_state(struct pfi_kif *, struct pf_state *); extern int pf_insert_src_node(struct pf_src_node **, @@ -1412,16 +1627,13 @@ extern int pf_insert_src_node(struct pf_src_node **, sa_family_t); void pf_src_tree_remove_state(struct pf_state *); u_int32_t pf_state_hash(struct pf_state *s); -extern struct pf_state *pf_find_state_byid(struct pf_state *); -extern struct pf_state *pf_find_state_all(struct pf_state *key, +extern struct pf_state *pf_find_state_byid(struct pf_state_cmp *); +extern struct pf_state *pf_find_state_all(struct pf_state_cmp *key, u_int8_t tree, int *more); -extern struct pf_anchor *pf_find_anchor(const char *); -extern struct pf_ruleset *pf_find_ruleset(char *, char *); -extern struct pf_ruleset *pf_find_or_create_ruleset( - char[PF_ANCHOR_NAME_SIZE], - char[PF_RULESET_NAME_SIZE]); -extern void pf_remove_if_empty_ruleset( - struct pf_ruleset *); +extern void pf_print_state(struct pf_state *); +extern void pf_print_flags(u_int8_t); +extern u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t, + u_int8_t); extern struct ifnet *sync_ifp; extern struct pf_rule pf_default_rule; @@ -1431,21 +1643,23 @@ void pf_rm_rule(struct pf_rulequeue *, struct pf_rule *); #ifdef INET -int pf_test(int, struct ifnet *, struct mbuf **); +int pf_test(int, struct ifnet *, struct mbuf **, struct ether_header *, struct inpcb *); #endif /* INET */ #ifdef INET6 -int pf_test6(int, struct ifnet *, struct mbuf **); +int pf_test6(int, struct ifnet *, struct mbuf **, struct ether_header *, struct inpcb *); void pf_poolmask(struct pf_addr *, struct pf_addr*, struct pf_addr *, struct pf_addr *, u_int8_t); void pf_addr_inc(struct pf_addr *, sa_family_t); #endif /* INET6 */ +u_int32_t pf_new_isn(struct pf_state *); /* From FreeBSD */ void *pf_pull_hdr(struct mbuf *, int, void *, int, u_short *, u_short *, sa_family_t); void pf_change_a(void *, u_int16_t *, u_int32_t, u_int8_t); int pflog_packet(struct pfi_kif *, struct mbuf *, sa_family_t, u_int8_t, - u_int8_t, struct pf_rule *, struct pf_rule *, struct pf_ruleset *); + u_int8_t, struct pf_rule *, struct pf_rule *, struct pf_ruleset *, + struct pf_pdesc *); int pf_match_addr(u_int8_t, struct pf_addr *, struct pf_addr *, struct pf_addr *, sa_family_t); int pf_match(u_int8_t, u_int32_t, u_int32_t, u_int32_t); @@ -1454,20 +1668,24 @@ int pf_match_uid(u_int8_t, uid_t, uid_t, uid_t); int pf_match_gid(u_int8_t, gid_t, gid_t, gid_t); void pf_normalize_init(void); -int pf_normalize_ip(struct mbuf **, int, struct pfi_kif *, u_short *); -int pf_normalize_ip6(struct mbuf **, int, struct pfi_kif *, u_short *); +int pf_normalize_ip(struct mbuf **, int, struct pfi_kif *, u_short *, + struct pf_pdesc *); +int pf_normalize_ip6(struct mbuf **, int, struct pfi_kif *, u_short *, + struct pf_pdesc *); int pf_normalize_tcp(int, struct pfi_kif *, struct mbuf *, int, int, void *, struct pf_pdesc *); void pf_normalize_tcp_cleanup(struct pf_state *); int pf_normalize_tcp_init(struct mbuf *, int, struct pf_pdesc *, struct tcphdr *, struct pf_state_peer *, struct pf_state_peer *); int pf_normalize_tcp_stateful(struct mbuf *, int, struct pf_pdesc *, - u_short *, struct tcphdr *, struct pf_state_peer *, - struct pf_state_peer *, int *); + u_short *, struct tcphdr *, struct pf_state *, + struct pf_state_peer *, struct pf_state_peer *, int *); u_int32_t pf_state_expires(const struct pf_state *); void pf_purge_expired_fragments(void); -int pf_routable(struct pf_addr *addr, sa_family_t af); +int pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *); +int pf_rtlabel_match(struct pf_addr *, sa_family_t, struct pf_addr_wrap *); +int pf_socket_lookup(int, struct pf_pdesc *, struct inpcb *); void pfr_initialize(void); int pfr_match_addr(struct pfr_ktable *, struct pf_addr *, sa_family_t); void pfr_update_stats(struct pfr_ktable *, struct pf_addr *, sa_family_t, @@ -1486,12 +1704,13 @@ int pfr_get_tstats(struct pfr_table *, struct pfr_tstats *, int *, int); int pfr_clr_tstats(struct pfr_table *, int, int *, int); int pfr_set_tflags(struct pfr_table *, int, int, int, int *, int *, int); int pfr_clr_addrs(struct pfr_table *, int *, int); +int pfr_insert_kentry(struct pfr_ktable *, struct pfr_addr *, long); int pfr_add_addrs(struct pfr_table *, struct pfr_addr *, int, int *, int); int pfr_del_addrs(struct pfr_table *, struct pfr_addr *, int, int *, int); int pfr_set_addrs(struct pfr_table *, struct pfr_addr *, int, int *, - int *, int *, int *, int); + int *, int *, int *, int, u_int32_t); int pfr_get_addrs(struct pfr_table *, struct pfr_addr *, int *, int); int pfr_get_astats(struct pfr_table *, struct pfr_astats *, int *, int); int pfr_clr_astats(struct pfr_table *, struct pfr_addr *, int, int *, @@ -1504,39 +1723,48 @@ int pfr_ina_commit(struct pfr_table *, u_int32_t, int *, int *, int); int pfr_ina_define(struct pfr_table *, struct pfr_addr *, int, int *, int *, u_int32_t, int); +extern struct pfi_statehead pfi_statehead; +extern struct pfi_kif *pfi_all; + void pfi_initialize(void); +struct pfi_kif *pfi_kif_get(const char *); void pfi_cleanup(void); void pfi_attach_clone(struct if_clone *); +void pfi_kif_ref(struct pfi_kif *, enum pfi_kif_refs); +void pfi_kif_unref(struct pfi_kif *, enum pfi_kif_refs); +int pfi_kif_match(struct pfi_kif *, struct pfi_kif *); void pfi_attach_ifnet(struct ifnet *); void pfi_detach_ifnet(struct ifnet *); struct pfi_kif *pfi_lookup_create(const char *); struct pfi_kif *pfi_lookup_if(const char *); -int pfi_maybe_destroy(struct pfi_kif *); -struct pfi_kif *pfi_attach_rule(const char *); -void pfi_detach_rule(struct pfi_kif *); -void pfi_attach_state(struct pfi_kif *); -void pfi_detach_state(struct pfi_kif *); +void pfi_attach_ifgroup(struct ifg_group *); +void pfi_detach_ifgroup(struct ifg_group *); +void pfi_group_change(const char *); +int pfi_match_addr(struct pfi_dynaddr *, struct pf_addr *, + sa_family_t); int pfi_dynaddr_setup(struct pf_addr_wrap *, sa_family_t); -void pfi_dynaddr_copyout(struct pf_addr_wrap *); void pfi_dynaddr_remove(struct pf_addr_wrap *); +void pfi_dynaddr_copyout(struct pf_addr_wrap *); void pfi_fill_oldstatus(struct pf_status *); -int pfi_clr_istats(const char *, int *, int); -int pfi_get_ifaces(const char *, struct pfi_if *, int *, int); -int pfi_match_addr(struct pfi_dynaddr *, struct pf_addr *, - sa_family_t); - -extern struct pfi_statehead pfi_statehead; - -u_int16_t pf_tagname2tag(char *); -void pf_tag2tagname(u_int16_t, char *); -void pf_tag_unref(u_int16_t); -void pf_tag_packet(struct mbuf *, int); -u_int32_t pf_qname2qid(char *); -void pf_qid2qname(u_int32_t, char *); -void pf_qid_unref(u_int32_t); +int pfi_clr_istats(const char *); +int pfi_get_ifaces(const char *, struct pfi_kif *, int *); +int pfi_set_flags(const char *, int); +int pfi_clear_flags(const char *, int); + +u_int16_t pf_tagname2tag(char *); +void pf_tag2tagname(u_int16_t, char *); +void pf_tag_ref(u_int16_t); +void pf_tag_unref(u_int16_t); +int pf_tag_packet(struct mbuf *, struct pf_mtag *, int, int); +u_int32_t pf_qname2qid(char *); +void pf_qid2qname(u_int32_t, char *); +void pf_qid_unref(u_int32_t); +struct pf_mtag *pf_find_mtag(struct mbuf *); +struct pf_mtag *pf_get_mtag(struct mbuf *); extern struct pf_status pf_status; extern vm_zone_t pf_frent_pl, pf_frag_pl; +extern struct lock pf_consistency_lock; struct pf_pool_limit { void *pp; @@ -1573,8 +1801,27 @@ struct pf_fragment { LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */ } fr_u; }; + #endif /* _KERNEL */ +extern struct pf_anchor_global pf_anchors; +extern struct pf_anchor pf_main_anchor; +#define pf_main_ruleset pf_main_anchor.ruleset + +/* these ruleset functions can be linked into userland programs (pfctl) */ +int pf_get_ruleset_number(u_int8_t); +void pf_init_ruleset(struct pf_ruleset *); +int pf_anchor_setup(struct pf_rule *, + const struct pf_ruleset *, const char *); +int pf_anchor_copyout(const struct pf_ruleset *, + const struct pf_rule *, struct pfioc_rule *); +void pf_anchor_remove(struct pf_rule *); +void pf_remove_if_empty_ruleset(struct pf_ruleset *); +struct pf_anchor *pf_find_anchor(const char *); +struct pf_ruleset *pf_find_ruleset(const char *); +struct pf_ruleset *pf_find_or_create_ruleset(const char *); +void pf_rs_initialize(void); + /* The fingerprint functions can be linked into userland programs (tcpdump) */ int pf_osfp_add(struct pf_osfp_ioctl *); #ifdef _KERNEL @@ -1583,7 +1830,8 @@ struct pf_osfp_enlist * const struct tcphdr *); #endif /* _KERNEL */ struct pf_osfp_enlist * - pf_osfp_fingerprint_hdr(const struct ip *, const struct tcphdr *); + pf_osfp_fingerprint_hdr(const struct ip *, const struct ip6_hdr *, + const struct tcphdr *); void pf_osfp_flush(void); int pf_osfp_get(struct pf_osfp_ioctl *); int pf_osfp_initialize(void); diff --git a/sys/netinet6/in6.h b/sys/netinet6/in6.h index eef8da0d26..5b853e6e3d 100644 --- a/sys/netinet6/in6.h +++ b/sys/netinet6/in6.h @@ -300,6 +300,7 @@ extern const struct in6_addr in6addr_linklocal_allnodes; #ifdef _KERNEL /* XXX nonstandard */ #define IPV6_ADDR_SCOPE_NODELOCAL 0x01 +#define IPV6_ADDR_SCOPE_INTFACELOCAL 0x01 #define IPV6_ADDR_SCOPE_LINKLOCAL 0x02 #define IPV6_ADDR_SCOPE_SITELOCAL 0x05 #define IPV6_ADDR_SCOPE_ORGLOCAL 0x08 /* just used in this file */ @@ -331,6 +332,10 @@ extern const struct in6_addr in6addr_linklocal_allnodes; (IN6_IS_ADDR_MULTICAST(a) && \ (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_NODELOCAL)) +#define IN6_IS_ADDR_MC_INTFACELOCAL(a) \ + (IN6_IS_ADDR_MULTICAST(a) && \ + (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_INTFACELOCAL)) + #define IN6_IS_ADDR_MC_LINKLOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_LINKLOCAL)) @@ -353,6 +358,11 @@ extern const struct in6_addr in6addr_linklocal_allnodes; */ #define IN6_IS_SCOPE_LINKLOCAL(a) \ (IN6_IS_ADDR_LINKLOCAL(a) || IN6_IS_ADDR_MC_LINKLOCAL(a)) +#define IN6_IS_SCOPE_EMBED(a) \ + ((IN6_IS_ADDR_LINKLOCAL(a)) || \ + (IN6_IS_ADDR_MC_LINKLOCAL(a)) || \ + (IN6_IS_ADDR_MC_INTFACELOCAL(a))) + #define IFA6_IS_DEPRECATED(a) \ ((a)->ia6_lifetime.ia6t_preferred != 0 && \ diff --git a/sys/sys/libkern.h b/sys/sys/libkern.h index a705a9c401..ccd1a58b80 100644 --- a/sys/sys/libkern.h +++ b/sys/sys/libkern.h @@ -113,7 +113,6 @@ int scanc (u_int, const u_char *, const u_char *, int); int skpc (int, int, char *); void skrandom (u_long); char *strcat (char * __restrict, const char * __restrict); -char *strchr(const char *, int); int strcmp (const char *, const char *); int strcasecmp (const char *, const char *); char *strcpy (char * __restrict, const char * __restrict); @@ -145,6 +144,18 @@ memcmp(const void *b1, const void *b2, size_t len) return (bcmp(b1, b2, len)); } +static __inline char * +strchr(const char *p, int ch) +{ + return (index(p, ch)); +} + +static __inline char * +strrchr(const char *p, int ch) +{ + return (rindex(p, ch)); +} + /* kfnmatch() return values. */ #define FNM_NOMATCH 1 /* Match failed. */ diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index 28d8e91ad2..c39820ab93 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -123,7 +123,7 @@ struct pkthdr { uint32_t fw_flags; /* flags for PF */ /* variables for PF processing */ - uint16_t pf_tag; /* PF tag id */ + uint16_t unused01; /* was PF tag id */ uint8_t pf_routed; /* PF routing counter */ /* variables for ALTQ processing */ @@ -248,12 +248,12 @@ struct mbuf { * Flags indicating PF processing status */ #define FW_MBUF_GENERATED 0x00000001 -#define PF_MBUF_TAGGED 0x00000002 /* pf_tag field is valid */ +#define XX_MBUF_UNUSED02 0x00000002 #define PF_MBUF_ROUTED 0x00000004 /* pf_routed field is valid */ #define PF_MBUF_TRANSLATE_LOCALHOST \ 0x00000008 #define PF_MBUF_FRAGCACHE 0x00000010 -#define ALTQ_MBUF_TAGGED 0x00000020 /* altq_qid is valid */ +#define XX_MBUF_UNUSED20 0x00000020 #define IPFORWARD_MBUF_TAGGED 0x00000040 #define DUMMYNET_MBUF_TAGGED 0x00000080 #define ALTQ_MBUF_STATE_HASHED 0x00000100 @@ -261,6 +261,12 @@ struct mbuf { #define PF_MBUF_GENERATED FW_MBUF_GENERATED #define IPFW_MBUF_GENERATED FW_MBUF_GENERATED +/* + * The PF code uses a different symbol name for the m_tag. This is + * NOT a pkthdr flag. + */ +#define PF_MBUF_TAGGED PACKET_TAG_PF + /* * mbuf types. */ @@ -608,6 +614,8 @@ m_getb(int len, int how, int type, int flags) #define PACKET_TAG_IPSRCRT 27 /* IP srcrt opts */ /* struct ip_srcrt_opt */ #define PACKET_TAG_CARP 28 /* CARP info */ +/* struct pf_mtag */ +#define PACKET_TAG_PF 29 /* PF info */ /* Packet tag routines */ struct m_tag *m_tag_alloc(u_int32_t, int, int, int); diff --git a/sys/sys/sockio.h b/sys/sys/sockio.h index d3c5381a9b..90ae3c3c6b 100644 --- a/sys/sys/sockio.h +++ b/sys/sys/sockio.h @@ -118,6 +118,8 @@ #define SIOCIFDESTROY _IOW('i', 121, struct ifreq) /* destroy clone if */ #define SIOCIFGCLONERS _IOWR('i', 120, struct if_clonereq) /* get cloners */ +#define SIOCGIFGMEMB _IOWR('i', 138, struct ifgroupreq) /* get members */ + #define SIOCSIFPOLLCPU _IOW('i', 125, struct ifreq) /* set polling(4) cpu */ #define SIOCGIFPOLLCPU _IOWR('i', 126, struct ifreq) /* set polling(4) cpu */ diff --git a/usr.sbin/authpf/Makefile b/usr.sbin/authpf/Makefile index 7dbe01d289..ac7b259787 100644 --- a/usr.sbin/authpf/Makefile +++ b/usr.sbin/authpf/Makefile @@ -6,14 +6,12 @@ MAN= authpf.8 BINOWN= root BINGRP= authpf BINMODE= 6555 -SRCS= authpf.c parse.y pfctl_parser.c pf_print_state.c -SRCS+= pfctl_radix.c pfctl_osfp.c -SRCS+= pfctl_altq.c +SRCS= authpf.c .PATH: ${.CURDIR}/../pfctl CFLAGS+= -I${.CURDIR}/../pfctl -LDADD+= -lm -lmd +LDADD+= -lm -lmd -lutil DPADD+= ${LIBM} CLEANFILES+= y.tab.h diff --git a/usr.sbin/authpf/authpf.c b/usr.sbin/authpf/authpf.c index 7f8bf482e4..690c61d544 100644 --- a/usr.sbin/authpf/authpf.c +++ b/usr.sbin/authpf/authpf.c @@ -1,8 +1,8 @@ -/* $OpenBSD: authpf.c,v 1.75 2004/01/29 01:55:10 deraadt Exp $ */ +/* $OpenBSD: authpf.c,v 1.104 2007/02/24 17:35:08 beck Exp $ */ /* $DragonFly: src/usr.sbin/authpf/authpf.c,v 1.2 2004/12/18 22:48:02 swildner Exp $ */ /* - * Copyright (C) 1998 - 2002 Bob Beck (beck@openbsd.org). + * Copyright (C) 1998 - 2007 Bob Beck (beck@openbsd.org). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -31,7 +31,9 @@ #include #include #include +#include #include +#include #include #include @@ -39,6 +41,7 @@ #include #include +#include #include #include #include @@ -47,29 +50,25 @@ #include #include -#include "pfctl_parser.h" -#include "pfctl.h" - #include "pathnames.h" #define __dead __dead2 -extern int symset(const char *, const char *, int); - static int read_config(FILE *); static void print_message(const char *); static int allowed_luser(const char *); static int check_luser(const char *, const char *); static int remove_stale_rulesets(void); static int change_filter(int, const char *, const char *); +static int change_table(int, const char *); static void authpf_kill_states(void); int dev_fd; /* pf device */ char anchorname[PF_ANCHOR_NAME_SIZE] = "authpf"; -char rulesetname[PF_RULESET_NAME_SIZE]; +char rulesetname[MAXPATHLEN - PF_ANCHOR_NAME_SIZE - 2]; +char tablename[PF_TABLE_NAME_SIZE] = "authpf_users"; FILE *pidfp; -char *infile; /* file name printed by yyerror() in parse.y */ char luser[MAXLOGNAME]; /* username */ char ipsrc[256]; /* ip as a string */ char pidfile[MAXPATHLEN]; /* we save pid in this file. */ @@ -91,12 +90,19 @@ main(int argc __unused, char **argv __unused) { int lockcnt = 0, n, pidfd; FILE *config; - struct in_addr ina; + struct in6_addr ina; struct passwd *pw; char *cp; + gid_t gid; uid_t uid; + char *shell; + login_cap_t *lc; config = fopen(PATH_CONFFILE, "r"); + if (config == NULL) { + syslog(LOG_ERR, "can not open %s (%m)", PATH_CONFFILE); + exit(1); + } if ((cp = getenv("SSH_TTY")) == NULL) { syslog(LOG_ERR, "non-interactive session connection for authpf"); @@ -118,7 +124,8 @@ main(int argc __unused, char **argv __unused) exit(1); } *cp = '\0'; - if (inet_pton(AF_INET, ipsrc, &ina) != 1) { + if (inet_pton(AF_INET, ipsrc, &ina) != 1 && + inet_pton(AF_INET6, ipsrc, &ina) != 1) { syslog(LOG_ERR, "cannot determine IP from SSH_CLIENT %s", ipsrc); exit(1); @@ -136,12 +143,26 @@ main(int argc __unused, char **argv __unused) syslog(LOG_ERR, "cannot find user for uid %u", uid); goto die; } - if (strcmp(pw->pw_shell, PATH_AUTHPF_SHELL)) { + + if ((lc = login_getclass(pw->pw_class)) != NULL) + shell = login_getcapstr(lc, "shell", pw->pw_shell, + pw->pw_shell); + else + shell = pw->pw_shell; + + login_close(lc); + + if (strcmp(shell, PATH_AUTHPF_SHELL)) { syslog(LOG_ERR, "wrong shell for user %s, uid %u", pw->pw_name, pw->pw_uid); + if (shell != pw->pw_shell) + free(shell); goto die; } + if (shell != pw->pw_shell) + free(shell); + /* * Paranoia, but this data _does_ come from outside authpf, and * truncation would be bad. @@ -230,6 +251,8 @@ main(int argc __unused, char **argv __unused) if (++lockcnt > 10) { syslog(LOG_ERR, "cannot kill previous authpf (pid %d)", otherpid); + fclose(pidfp); + pidfp = NULL; goto dogdeath; } sleep(1); @@ -239,12 +262,22 @@ main(int argc __unused, char **argv __unused) * it's lock, giving us a chance to get it now */ fclose(pidfp); + pidfp = NULL; } while (1); + + /* whack the group list */ + gid = getegid(); + if (setgroups(1, &gid) == -1) { + syslog(LOG_INFO, "setgroups: %s", strerror(errno)); + do_death(0); + } /* revoke privs */ - seteuid(getuid()); - setuid(getuid()); - + uid = getuid(); + if (setresuid(uid, uid, uid) == -1) { + syslog(LOG_INFO, "setresuid: %s", strerror(errno)); + do_death(0); + } openlog("authpf", LOG_PID | LOG_NDELAY, LOG_DAEMON); if (!check_luser(PATH_BAN_DIR, luser) || !allowed_luser(luser)) { @@ -252,8 +285,8 @@ main(int argc __unused, char **argv __unused) do_death(0); } - if (config == NULL || read_config(config)) { - syslog(LOG_INFO, "bad or nonexistent %s", PATH_CONFFILE); + if (read_config(config)) { + syslog(LOG_ERR, "invalid config file %s", PATH_CONFFILE); do_death(0); } @@ -272,16 +305,21 @@ main(int argc __unused, char **argv __unused) printf("Unable to modify filters\r\n"); do_death(0); } + if (change_table(1, ipsrc) == -1) { + printf("Unable to modify table\r\n"); + change_filter(0, luser, ipsrc); + do_death(0); + } signal(SIGTERM, need_death); signal(SIGINT, need_death); signal(SIGALRM, need_death); signal(SIGPIPE, need_death); signal(SIGHUP, need_death); - signal(SIGSTOP, need_death); + signal(SIGQUIT, need_death); signal(SIGTSTP, need_death); while (1) { - printf("\r\nHello %s, ", luser); + printf("\r\nHello %s. ", luser); printf("You are authenticated from host \"%s\"\r\n", ipsrc); setproctitle("%s@%s", luser, ipsrc); print_message(PATH_MESSAGE); @@ -354,6 +392,11 @@ read_config(FILE *f) sizeof(anchorname)) >= sizeof(anchorname)) goto parse_error; } + if (strcasecmp(pair[0], "table") == 0) { + if (!pair[1][0] || strlcpy(tablename, pair[1], + sizeof(tablename)) >= sizeof(tablename)) + goto parse_error; + } } while (!feof(f) && !ferror(f)); fclose(f); return (0); @@ -520,9 +563,11 @@ check_luser(const char *userdir, const char *user) while (fputs(tmp, stdout) != EOF && !feof(f)) { if (fgets(tmp, sizeof(tmp), f) == NULL) { fflush(stdout); + fclose(f); return (0); } } + fclose(f); } fflush(stdout); return (0); @@ -536,12 +581,10 @@ static int remove_stale_rulesets(void) { struct pfioc_ruleset prs; - const int action[PF_RULESET_MAX] = { PF_SCRUB, - PF_PASS, PF_NAT, PF_BINAT, PF_RDR }; u_int32_t nr, mnr; memset(&prs, 0, sizeof(prs)); - strlcpy(prs.anchor, anchorname, sizeof(prs.anchor)); + strlcpy(prs.path, anchorname, sizeof(prs.path)); if (ioctl(dev_fd, DIOCGETRULESETS, &prs)) { if (errno == EINVAL) return (0); @@ -568,19 +611,19 @@ remove_stale_rulesets(void) (*s && (t == prs.name || *s != ')'))) return (1); if (kill(pid, 0) && errno != EPERM) { - int i; - - for (i = 0; i < PF_RULESET_MAX; ++i) { - struct pfioc_rule pr; - - memset(&pr, 0, sizeof(pr)); - memcpy(pr.anchor, prs.anchor, sizeof(pr.anchor)); - memcpy(pr.ruleset, prs.name, sizeof(pr.ruleset)); - pr.rule.action = action[i]; - if ((ioctl(dev_fd, DIOCBEGINRULES, &pr) || - ioctl(dev_fd, DIOCCOMMITRULES, &pr)) && - errno != EINVAL) - return (1); + int i; + struct pfioc_trans_e t_e[PF_RULESET_MAX+1]; + struct pfioc_trans t_local; + + bzero(&t, sizeof(t_local)); + bzero(t_e, sizeof(t_e)); + t_local.size = PF_RULESET_MAX+1; + t_local.esize = sizeof(t_e[0]); + t_local.array = t_e; + for (i = 0; i < PF_RULESET_MAX+1; ++i) { + t_e[i].rs_num = i; + snprintf(t_e[i].anchor, sizeof(t_e[i].anchor), + "%s/%s", anchorname, prs.name); } mnr--; } else @@ -595,84 +638,71 @@ remove_stale_rulesets(void) static int change_filter(int add, const char *user, const char *his_ipsrc) { - char fn[MAXPATHLEN]; - FILE *f = NULL; - struct pfctl pf; - struct pfr_buffer t; - int i; + const char *pargv[13] = { + "pfctl", "-p", "/dev/pf", "-q", "-a", "anchor/ruleset", + "-D", "user_ip=X", "-D", "user_id=X", "-f", + "file", NULL + }; + char *fdpath = NULL, *userstr = NULL, *ipstr = NULL; + char *rsn = NULL, *fn = NULL; + pid_t pid; + gid_t gid; + int s; if (user == NULL || !user[0] || his_ipsrc == NULL || !his_ipsrc[0]) { - syslog(LOG_ERR, "invalid luser/ipsrc"); + syslog(LOG_ERR, "invalid user/ipsrc"); goto error; } - if (add) { - if ((i = snprintf(fn, sizeof(fn), "%s/%s/authpf.rules", - PATH_USER_DIR, user)) < 0 || i >= (int)sizeof(fn)) { - syslog(LOG_ERR, "user rule path too long"); - goto error; - } - if ((f = fopen(fn, "r")) == NULL && errno != ENOENT) { - syslog(LOG_ERR, "cannot open %s (%m)", fn); - goto error; - } - if (f == NULL) { - if (strlcpy(fn, PATH_PFRULES, sizeof(fn)) >= - sizeof(fn)) { - syslog(LOG_ERR, "rule path too long"); - goto error; - } - if ((f = fopen(fn, "r")) == NULL) { - syslog(LOG_ERR, "cannot open %s (%m)", fn); - goto error; - } - } - } + if (asprintf(&rsn, "%s/%s", anchorname, rulesetname) == -1) + goto no_mem; + if (asprintf(&fdpath, "/dev/fd/%d", dev_fd) == -1) + goto no_mem; + if (asprintf(&ipstr, "user_ip=%s", his_ipsrc) == -1) + goto no_mem; + if (asprintf(&userstr, "user_id=%s", user) == -1) + goto no_mem; - if (pfctl_load_fingerprints(dev_fd, 0)) { - syslog(LOG_ERR, "unable to load kernel's OS fingerprints"); - goto error; - } - bzero(&t, sizeof(t)); - t.pfrb_type = PFRB_TRANS; - memset(&pf, 0, sizeof(pf)); - for (i = 0; i < PF_RULESET_MAX; ++i) { - if (pfctl_add_trans(&t, i, anchorname, rulesetname)) { - syslog(LOG_ERR, "pfctl_add_trans %m"); - goto error; + if (add) { + struct stat sb; + + if (asprintf(&fn, "%s/%s/authpf.rules", PATH_USER_DIR, user) + == -1) + goto no_mem; + if (stat(fn, &sb) == -1) { + free(fn); + if ((fn = strdup(PATH_PFRULES)) == NULL) + goto no_mem; } } - if (pfctl_trans(dev_fd, &t, DIOCXBEGIN, 0)) { - syslog(LOG_ERR, "DIOCXBEGIN (%s) %m", add?"add":"remove"); + pargv[2] = fdpath; + pargv[5] = rsn; + pargv[7] = userstr; + pargv[9] = ipstr; + if (!add) + pargv[11] = "/dev/null"; + else + pargv[11] = fn; + + switch (pid = fork()) { + case -1: + syslog(LOG_ERR, "fork failed"); goto error; - } - - if (add) { - if (symset("user_ip", his_ipsrc, 0) || - symset("user_id", user, 0)) { - syslog(LOG_ERR, "symset"); - goto error; - } - - pf.dev = dev_fd; - pf.trans = &t; - pf.anchor = anchorname; - pf.ruleset = rulesetname; - - infile = fn; - if (parse_rules(f, &pf) < 0) { - syslog(LOG_ERR, "syntax error in rule file: " - "authpf rules not loaded"); - goto error; + case 0: + /* revoke group privs before exec */ + gid = getgid(); + if (setregid(gid, gid) == -1) { + err(1, "setregid"); } - - infile = NULL; - fclose(f); - f = NULL; + execvp(PATH_PFCTL, __DECONST(char *const *, pargv)); + warn("exec of %s failed", PATH_PFCTL); + _exit(1); } - if (pfctl_trans(dev_fd, &t, DIOCXCOMMIT, 0)) { - syslog(LOG_ERR, "DIOCXCOMMIT (%s) %m", add?"add":"remove"); + /* parent */ + waitpid(pid, &s, 0); + if (s != 0) { + syslog(LOG_ERR, "pfctl exited abnormally"); goto error; } @@ -685,17 +715,57 @@ change_filter(int add, const char *user, const char *his_ipsrc) his_ipsrc, user, Tend.tv_sec - Tstart.tv_sec); } return (0); - +no_mem: + syslog(LOG_ERR, "malloc failed"); error: - if (f != NULL) - fclose(f); - if (pfctl_trans(dev_fd, &t, DIOCXROLLBACK, 0)) - syslog(LOG_ERR, "DIOCXROLLBACK (%s) %m", add?"add":"remove"); - - infile = NULL; + free(fdpath); + free(rsn); + free(userstr); + free(ipstr); + free(fn); return (-1); } +/* + * Add/remove this IP from the "authpf_users" table. + */ +static int +change_table(int add, const char *his_ipsrc) +{ + struct pfioc_table io; + struct pfr_addr addr; + + bzero(&io, sizeof(io)); + strlcpy(io.pfrio_table.pfrt_name, tablename, + sizeof(io.pfrio_table.pfrt_name)); + io.pfrio_buffer = &addr; + io.pfrio_esize = sizeof(addr); + io.pfrio_size = 1; + + bzero(&addr, sizeof(addr)); + if (his_ipsrc == NULL || !his_ipsrc[0]) + return (-1); + if (inet_pton(AF_INET, his_ipsrc, &addr.pfra_ip4addr) == 1) { + addr.pfra_af = AF_INET; + addr.pfra_net = 32; + } else if (inet_pton(AF_INET6, his_ipsrc, &addr.pfra_ip6addr) == 1) { + addr.pfra_af = AF_INET6; + addr.pfra_net = 128; + } else { + syslog(LOG_ERR, "invalid his_ipsrc"); + return (-1); + } + + if (ioctl(dev_fd, add ? DIOCRADDADDRS : DIOCRDELADDRS, &io) && + errno != ESRCH) { + syslog(LOG_ERR, "cannot %s %s from table %s: %s", + add ? "add" : "remove", his_ipsrc, tablename, + strerror(errno)); + return (-1); + } + return (0); +} + /* * This is to kill off states that would otherwise be left behind stateful * rules. This means we don't need to allow in more traffic than we really @@ -707,24 +777,32 @@ static void authpf_kill_states(void) { struct pfioc_state_kill psk; - struct in_addr target; + struct pf_addr target; memset(&psk, 0, sizeof(psk)); - psk.psk_af = AF_INET; + memset(&target, 0, sizeof(target)); - inet_pton(AF_INET, ipsrc, &target); + if (inet_pton(AF_INET, ipsrc, &target.v4) == 1) + psk.psk_af = AF_INET; + else if (inet_pton(AF_INET6, ipsrc, &target.v6) == 1) + psk.psk_af = AF_INET6; + else { + syslog(LOG_ERR, "inet_pton(%s) failed", ipsrc); + return; + } /* Kill all states from ipsrc */ - psk.psk_src.addr.v.a.addr.v4 = target; + memcpy(&psk.psk_src.addr.v.a.addr, &target, + sizeof(psk.psk_src.addr.v.a.addr)); memset(&psk.psk_src.addr.v.a.mask, 0xff, sizeof(psk.psk_src.addr.v.a.mask)); if (ioctl(dev_fd, DIOCKILLSTATES, &psk)) syslog(LOG_ERR, "DIOCKILLSTATES failed (%m)"); /* Kill all states to ipsrc */ - psk.psk_af = AF_INET; memset(&psk.psk_src, 0, sizeof(psk.psk_src)); - psk.psk_dst.addr.v.a.addr.v4 = target; + memcpy(&psk.psk_dst.addr.v.a.addr, &target, + sizeof(psk.psk_dst.addr.v.a.addr)); memset(&psk.psk_dst.addr.v.a.mask, 0xff, sizeof(psk.psk_dst.addr.v.a.mask)); if (ioctl(dev_fd, DIOCKILLSTATES, &psk)) @@ -748,171 +826,12 @@ do_death(int active) if (active) { change_filter(0, luser, ipsrc); + change_table(0, ipsrc); authpf_kill_states(); remove_stale_rulesets(); } - if (pidfp) - ftruncate(fileno(pidfp), 0); - if (pidfile[0]) + if (pidfile[0] && (pidfp != NULL)) if (unlink(pidfile) == -1) syslog(LOG_ERR, "cannot unlink %s (%m)", pidfile); exit(ret); } - -/* - * callbacks for parse_rules(void) - */ - -int -pfctl_add_rule(struct pfctl *pf, struct pf_rule *r) -{ - u_int8_t rs_num; - struct pfioc_rule pr; - - switch (r->action) { - case PF_PASS: - case PF_DROP: - rs_num = PF_RULESET_FILTER; - break; - case PF_SCRUB: - rs_num = PF_RULESET_SCRUB; - break; - case PF_NAT: - case PF_NONAT: - rs_num = PF_RULESET_NAT; - break; - case PF_RDR: - case PF_NORDR: - rs_num = PF_RULESET_RDR; - break; - case PF_BINAT: - case PF_NOBINAT: - rs_num = PF_RULESET_BINAT; - break; - default: - syslog(LOG_ERR, "invalid rule action %d", r->action); - return (1); - } - - bzero(&pr, sizeof(pr)); - strlcpy(pr.anchor, pf->anchor, sizeof(pr.anchor)); - strlcpy(pr.ruleset, pf->ruleset, sizeof(pr.ruleset)); - if (pfctl_add_pool(pf, &r->rpool, r->af)) - return (1); - pr.ticket = pfctl_get_ticket(pf->trans, rs_num, pf->anchor, - pf->ruleset); - pr.pool_ticket = pf->paddr.ticket; - memcpy(&pr.rule, r, sizeof(pr.rule)); - if (ioctl(pf->dev, DIOCADDRULE, &pr)) { - syslog(LOG_ERR, "DIOCADDRULE %m"); - return (1); - } - pfctl_clear_pool(&r->rpool); - return (0); -} - -int -pfctl_add_pool(struct pfctl *pf, struct pf_pool *p, sa_family_t af) -{ - struct pf_pooladdr *pa; - - if (ioctl(pf->dev, DIOCBEGINADDRS, &pf->paddr)) { - syslog(LOG_ERR, "DIOCBEGINADDRS %m"); - return (1); - } - pf->paddr.af = af; - TAILQ_FOREACH(pa, &p->list, entries) { - memcpy(&pf->paddr.addr, pa, sizeof(struct pf_pooladdr)); - if (ioctl(pf->dev, DIOCADDADDR, &pf->paddr)) { - syslog(LOG_ERR, "DIOCADDADDR %m"); - return (1); - } - } - return (0); -} - -void -pfctl_clear_pool(struct pf_pool *pool) -{ - struct pf_pooladdr *pa; - - while ((pa = TAILQ_FIRST(&pool->list)) != NULL) { - TAILQ_REMOVE(&pool->list, pa, entries); - free(pa); - } -} - -int -pfctl_add_altq(struct pfctl *pf __unused, struct pf_altq *a __unused) -{ - fprintf(stderr, "altq rules not supported in authpf\n"); - return (1); -} - -int -pfctl_set_optimization(struct pfctl *pf __unused, const char *opt __unused) -{ - fprintf(stderr, "set optimization not supported in authpf\n"); - return (1); -} - -int -pfctl_set_logif(struct pfctl *pf __unused, char *ifname __unused) -{ - fprintf(stderr, "set loginterface not supported in authpf\n"); - return (1); -} - -int -pfctl_set_hostid(struct pfctl *pf __unused, u_int32_t hostid __unused) -{ - fprintf(stderr, "set hostid not supported in authpf\n"); - return (1); -} - -int -pfctl_set_timeout(struct pfctl *pf __unused, const char *opt __unused, - int seconds __unused, int quiet __unused) -{ - fprintf(stderr, "set timeout not supported in authpf\n"); - return (1); -} - -int -pfctl_set_limit(struct pfctl *pf __unused, const char *opt __unused, - unsigned int limit __unused) -{ - fprintf(stderr, "set limit not supported in authpf\n"); - return (1); -} - -int -pfctl_set_debug(struct pfctl *pf __unused, char *d __unused) -{ - fprintf(stderr, "set debug not supported in authpf\n"); - return (1); -} - -int -pfctl_define_table(char *name __unused, int flags __unused, int addrs __unused, - const char *anchor __unused, const char *ruleset __unused, - struct pfr_buffer *ab __unused, u_int32_t ticket __unused) -{ - fprintf(stderr, "table definitions not yet supported in authpf\n"); - return (1); -} - -int -pfctl_rules(int dev __unused, char *filename __unused, int opts __unused, - char *my_anchorname __unused, char *my_rulesetname __unused, - struct pfr_buffer *t __unused) -{ - /* never called, no anchors inside anchors, but we need the stub */ - fprintf(stderr, "load anchor not supported from authpf\n"); - return (1); -} - -void -pfctl_print_title(const char *title __unused) -{ -} diff --git a/usr.sbin/authpf/pathnames.h b/usr.sbin/authpf/pathnames.h index 7f543f648a..3ec2b552de 100644 --- a/usr.sbin/authpf/pathnames.h +++ b/usr.sbin/authpf/pathnames.h @@ -36,3 +36,4 @@ #define PATH_DEVFILE "/dev/pf" #define PATH_PIDFILE "/var/authpf" #define PATH_AUTHPF_SHELL "/usr/sbin/authpf" +#define PATH_PFCTL "/usr/sbin/pfctl" diff --git a/usr.sbin/pfctl/Makefile b/usr.sbin/pfctl/Makefile index 896d30cd1e..f8f9d0bc2e 100644 --- a/usr.sbin/pfctl/Makefile +++ b/usr.sbin/pfctl/Makefile @@ -1,13 +1,18 @@ # $OpenBSD: Makefile,v 1.15 2004/03/10 18:49:49 mcbride Exp $ # $DragonFly: src/usr.sbin/pfctl/Makefile,v 1.2 2005/02/11 22:31:45 joerg Exp $ - PROG= pfctl SRCS= pfctl.c parse.y pfctl_parser.c pf_print_state.c pfctl_altq.c SRCS+= pfctl_osfp.c pfctl_radix.c pfctl_table.c pfctl_qstats.c +SRCS+= pfctl_optimize.c pf_ruleset.c CFLAGS+= -I${.CURDIR} MAN= pfctl.8 pf.conf.5 pf.os.5 +# Ruleset and Anchor handling +.PATH: ${.CURDIR}/../../sys/net/pf + LDADD+= -lmd -lm DPADD+= ${LIBM} +WARNS?= 2 + .include diff --git a/usr.sbin/pfctl/parse.y b/usr.sbin/pfctl/parse.y index d92a92b79e..98f1374248 100644 --- a/usr.sbin/pfctl/parse.y +++ b/usr.sbin/pfctl/parse.y @@ -1,5 +1,4 @@ -/* $OpenBSD: parse.y,v 1.449 2004/03/20 23:20:20 david Exp $ */ -/* $DragonFly: src/usr.sbin/pfctl/parse.y,v 1.6 2008/08/22 20:53:00 thomas Exp $ */ +/* $OpenBSD: parse.y,v 1.517 2007/02/03 23:26:40 dhartmei Exp $ */ /* * Copyright (c) 2001 Markus Friedl. All rights reserved. @@ -122,9 +121,10 @@ struct node_icmp { }; enum { PF_STATE_OPT_MAX, PF_STATE_OPT_NOSYNC, PF_STATE_OPT_SRCTRACK, - PF_STATE_OPT_MAX_SRC_STATES, PF_STATE_OPT_MAX_SRC_NODES, - PF_STATE_OPT_STATELOCK, PF_STATE_OPT_TIMEOUT, - PF_STATE_OPT_PICKUPS }; + PF_STATE_OPT_MAX_SRC_STATES, PF_STATE_OPT_MAX_SRC_CONN, + PF_STATE_OPT_MAX_SRC_CONN_RATE, PF_STATE_OPT_MAX_SRC_NODES, + PF_STATE_OPT_OVERLOAD, PF_STATE_OPT_STATELOCK, + PF_STATE_OPT_TIMEOUT, PF_STATE_OPT_PICKUPS }; enum { PF_SRCTRACK_NONE, PF_SRCTRACK, PF_SRCTRACK_GLOBAL, PF_SRCTRACK_RULE }; @@ -133,6 +133,15 @@ struct node_state_opt { union { u_int32_t max_states; u_int32_t max_src_states; + u_int32_t max_src_conn; + struct { + u_int32_t limit; + u_int32_t seconds; + } max_src_conn_rate; + struct { + u_int8_t flush; + char tblname[PF_TABLE_NAME_SIZE]; + } overload; u_int32_t max_src_nodes; u_int8_t src_track; u_int8_t pickup_mode; @@ -194,10 +203,12 @@ struct filter_opts { char *tag; char *match_tag; u_int8_t match_tag_not; + int rtableid; } filter_opts; struct antispoof_opts { char *label; + int rtableid; } antispoof_opts; struct scrub_opts { @@ -211,6 +222,7 @@ struct scrub_opts { int fragcache; int randomid; int reassemble_tcp; + int rtableid; } scrub_opts; struct queue_opts { @@ -250,9 +262,10 @@ struct node_fairq_opts fairq_opts; int yyerror(const char *, ...); int disallow_table(struct node_host *, const char *); +int disallow_urpf_failed(struct node_host *, const char *); int disallow_alias(struct node_host *, const char *); -int rule_consistent(struct pf_rule *); -int filter_consistent(struct pf_rule *); +int rule_consistent(struct pf_rule *, int); +int filter_consistent(struct pf_rule *, int); int nat_consistent(struct pf_rule *); int rdr_consistent(struct pf_rule *); int process_tabledef(char *, struct table_opts *); @@ -270,11 +283,13 @@ void expand_label(char *, size_t, const char *, u_int8_t, struct node_host *, void expand_rule(struct pf_rule *, struct node_if *, struct node_host *, struct node_proto *, struct node_os*, struct node_host *, struct node_port *, struct node_host *, struct node_port *, - struct node_uid *, struct node_gid *, struct node_icmp *); + struct node_uid *, struct node_gid *, struct node_icmp *, + const char *); int expand_altq(struct pf_altq *, struct node_if *, struct node_queue *, struct node_queue_bw bwspec, struct node_queue_opt *); int expand_queue(struct pf_altq *, struct node_if *, struct node_queue *, struct node_queue_bw, struct node_queue_opt *); +int expand_skip_interface(struct node_if *); int check_rulestate(int); int kw_cmp(const void *, const void *); @@ -300,6 +315,7 @@ struct sym { int symset(const char *, const char *, int); char *symget(const char *); +void mv_rules(struct pf_ruleset *, struct pf_ruleset *); void decide_address_family(struct node_host *, sa_family_t *); void remove_invalid_hosts(struct node_host **, sa_family_t *); int invalid_redirect(struct node_host *, sa_family_t); @@ -311,7 +327,6 @@ TAILQ_HEAD(loadanchorshead, loadanchors) struct loadanchors { TAILQ_ENTRY(loadanchors) entries; char *anchorname; - char *rulesetname; char *filename; }; @@ -320,6 +335,7 @@ typedef struct { u_int32_t number; int i; char *string; + int rtableid; struct { u_int8_t b1; u_int8_t b2; @@ -362,8 +378,13 @@ typedef struct { } keep_state; struct { u_int8_t log; + u_int8_t logif; u_int8_t quick; } logquick; + struct { + int neg; + char *name; + } tagged; struct pf_poolhashkey *hashkey; struct node_queue *queue; struct node_queue_opt queue_options; @@ -381,18 +402,6 @@ typedef struct { int lineno; } YYSTYPE; -#define PREPARE_ANCHOR_RULE(r, a) \ - do { \ - memset(&(r), 0, sizeof(r)); \ - if (strlcpy(r.anchorname, (a), \ - sizeof(r.anchorname)) >= \ - sizeof(r.anchorname)) { \ - yyerror("anchor name '%s' too long", \ - (a)); \ - YYERROR; \ - } \ - } while (0) - #define DYNIF_MULTIADDR(addr) ((addr).type == PF_ADDR_DYNIFTL && \ (!((addr).iflags & PFI_AFLAG_NOALIAS) || \ !isdigit((addr).v.ifname[strlen((addr).v.ifname)-1]))) @@ -401,30 +410,32 @@ extern char *infile; %} -%token PASS BLOCK SCRUB RETURN IN OS OUT LOG LOGALL QUICK ON FROM TO FLAGS +%token PASS BLOCK SCRUB RETURN IN OS OUT LOG QUICK ON FROM TO FLAGS %token RETURNRST RETURNICMP RETURNICMP6 PROTO INET INET6 ALL ANY ICMPTYPE %token ICMP6TYPE CODE KEEP MODULATE STATE PORT RDR NAT BINAT ARROW NODF %token MINTTL ERROR ALLOWOPTS FASTROUTE FILENAME ROUTETO DUPTO REPLYTO NO LABEL -%token NOROUTE FRAGMENT USER GROUP MAXMSS MAXIMUM TTL TOS DROP TABLE +%token NOROUTE URPFFAILED FRAGMENT USER GROUP MAXMSS MAXIMUM TTL TOS DROP TABLE %token REASSEMBLE FRAGDROP FRAGCROP ANCHOR NATANCHOR RDRANCHOR BINATANCHOR %token SET OPTIMIZATION TIMEOUT LIMIT LOGINTERFACE BLOCKPOLICY RANDOMID -%token REQUIREORDER SYNPROXY FINGERPRINTS NOSYNC DEBUG HOSTID +%token REQUIREORDER SYNPROXY FINGERPRINTS NOSYNC DEBUG SKIP HOSTID %token ANTISPOOF FOR %token BITMASK RANDOM SOURCEHASH ROUNDROBIN STATICPORT PROBABILITY %token ALTQ CBQ PRIQ HFSC FAIRQ BANDWIDTH TBRSIZE LINKSHARE REALTIME UPPERLIMIT -%token QUEUE PRIORITY QLIMIT HOGS BUCKETS -%token LOAD -%token PICKUPS NOPICKUPS HASHONLY +%token QUEUE PRIORITY QLIMIT RTABLE HOGS BUCKETS +%token LOAD RULESET_OPTIMIZATION +%token PICKUPS NOPICKUPS HASHONLY %token STICKYADDRESS MAXSRCSTATES MAXSRCNODES SOURCETRACK GLOBAL RULE -%token TAGGED TAG IFBOUND GRBOUND FLOATING STATEPOLICY KEEPPOLICY +%token MAXSRCCONN MAXSRCCONNRATE OVERLOAD FLUSH +%token TAGGED TAG IFBOUND FLOATING STATEPOLICY ROUTE KEEPPOLICY %token STRING %token PORTBINARY %type interface if_list if_item_not if_item %type number icmptype icmp6type uid gid -%type tos not yesno natpass -%type no dir log af fragcache sourcetrack -%type unaryop statelock -%type action nataction flags flag blockspec +%type tos not yesno +%type no dir af fragcache optimizer +%type sourcetrack flush unaryop statelock +%type action nataction natpass scrubaction +%type flags flag blockspec %type port rport %type hashkey %type proto proto_list proto_item @@ -442,11 +453,11 @@ extern char *infile; %type gids gid_list gid_item %type route %type redirection redirpool -%type label string tag +%type label string tag anchorname %type keep %type state_opt_spec state_opt_list state_opt_item -%type logquick -%type antispoof_ifspc antispoof_iflst +%type logquick quick log logopts logopt +%type antispoof_ifspc antispoof_iflst antispoof_if %type qname %type qassign qassign_list qassign_item %type scheduler @@ -461,6 +472,8 @@ extern char *infile; %type scrub_opts scrub_opt scrub_opts_l %type table_opts table_opt table_opts_l %type pool_opts pool_opt pool_opts_l +%type tagged +%type rtable %% ruleset : /* empty */ @@ -477,9 +490,36 @@ ruleset : /* empty */ | ruleset varset '\n' | ruleset antispoof '\n' | ruleset tabledef '\n' + | '{' fakeanchor '}' '\n'; | ruleset error '\n' { errors++; } ; +/* + * apply to previouslys specified rule: must be careful to note + * what that is: pf or nat or binat or rdr + */ +fakeanchor : fakeanchor '\n' + | fakeanchor anchorrule '\n' + | fakeanchor binatrule '\n' + | fakeanchor natrule '\n' + | fakeanchor pfrule '\n' + | fakeanchor error '\n' + ; + +optimizer : string { + if (!strcmp($1, "none")) + $$ = 0; + else if (!strcmp($1, "basic")) + $$ = PF_OPTIMIZE_BASIC; + else if (!strcmp($1, "profile")) + $$ = PF_OPTIMIZE_BASIC | PF_OPTIMIZE_PROFILE; + else { + yyerror("unknown ruleset-optimization %s", $$); + YYERROR; + } + } + ; + option : SET OPTIMIZATION STRING { if (check_rulestate(PFCTL_STATE_OPTION)) { free($3); @@ -490,7 +530,13 @@ option : SET OPTIMIZATION STRING { free($3); YYERROR; } - free ($3); + free($3); + } + | SET RULESET_OPTIMIZATION optimizer { + if (!(pf->opts & PF_OPT_OPTIMIZE)) { + pf->opts |= PF_OPT_OPTIMIZE; + pf->optimize = $3; + } } | SET TIMEOUT timeout_spec | SET TIMEOUT '{' timeout_list '}' @@ -501,11 +547,6 @@ option : SET OPTIMIZATION STRING { free($3); YYERROR; } - if ((ifa_exists($3, 0) == NULL) && strcmp($3, "none")) { - yyerror("interface %s doesn't exist", $3); - free($3); - YYERROR; - } if (pfctl_set_logif(pf, $3) != 0) { yyerror("error setting loginterface %s", $3); free($3); @@ -545,15 +586,19 @@ option : SET OPTIMIZATION STRING { } | SET FINGERPRINTS STRING { if (pf->opts & PF_OPT_VERBOSE) - printf("fingerprints %s\n", $3); + printf("set fingerprints \"%s\"\n", $3); if (check_rulestate(PFCTL_STATE_OPTION)) { free($3); YYERROR; } - if (pfctl_file_fingerprints(pf->dev, pf->opts, $3)) { - yyerror("error loading fingerprints %s", $3); - free($3); - YYERROR; + if (!pf->anchor->name[0]) { + if (pfctl_file_fingerprints(pf->dev, + pf->opts, $3)) { + yyerror("error loading " + "fingerprints %s", $3); + free($3); + YYERROR; + } } free($3); } @@ -566,10 +611,6 @@ option : SET OPTIMIZATION STRING { case PFRULE_IFBOUND: printf("set state-policy if-bound\n"); break; - case PFRULE_GRBOUND: - printf("set state-policy " - "group-bound\n"); - break; } default_statelock = $3; } @@ -591,6 +632,12 @@ option : SET OPTIMIZATION STRING { } free($3); } + | SET SKIP interface { + if (expand_skip_interface($3) != 0) { + yyerror("error setting skip interface(s)"); + YYERROR; + } + } ; string : string STRING { @@ -612,36 +659,120 @@ varset : STRING '=' string { } ; -anchorrule : ANCHOR string dir interface af proto fromto filter_opts { +anchorname : STRING { $$ = $1; } + | /* empty */ { $$ = NULL; } + ; + +optnl : optnl '\n' + | + ; + +pfa_anchorlist : pfrule optnl + | anchorrule optnl + | pfa_anchorlist pfrule optnl + | pfa_anchorlist anchorrule optnl + ; + +pfa_anchor : '{' + { + char ta[PF_ANCHOR_NAME_SIZE]; + struct pf_ruleset *rs; + + /* steping into a brace anchor */ + pf->asd++; + pf->bn++; + pf->brace = 1; + + /* create a holding ruleset in the root */ + snprintf(ta, PF_ANCHOR_NAME_SIZE, "_%d", pf->bn); + rs = pf_find_or_create_ruleset(ta); + if (rs == NULL) + err(1, "pfa_anchor: pf_find_or_create_ruleset"); + pf->astack[pf->asd] = rs->anchor; + pf->anchor = rs->anchor; + } '\n' pfa_anchorlist '}' + { + pf->alast = pf->anchor; + pf->asd--; + pf->anchor = pf->astack[pf->asd]; + } + | /* empty */ + ; + +anchorrule : ANCHOR anchorname dir quick interface af proto fromto + filter_opts pfa_anchor + { struct pf_rule r; if (check_rulestate(PFCTL_STATE_FILTER)) { + if ($2) + free($2); + YYERROR; + } + + if ($2 && ($2[0] == '_' || strstr($2, "/_") != NULL)) { free($2); + yyerror("anchor names beginning with '_' " + "are reserved for internal use"); YYERROR; } - PREPARE_ANCHOR_RULE(r, $2); + memset(&r, 0, sizeof(r)); + if (pf->astack[pf->asd + 1]) { + /* move inline rules into relative location */ + pf_anchor_setup(&r, + &pf->astack[pf->asd]->ruleset, + $2 ? $2 : pf->alast->name); + + if (r.anchor == NULL) + err(1, "anchorrule: unable to " + "create ruleset"); + + if (pf->alast != r.anchor) { + if (r.anchor->match) { + yyerror("inline anchor '%s' " + "already exists", + r.anchor->name); + YYERROR; + } + mv_rules(&pf->alast->ruleset, + &r.anchor->ruleset); + } + pf_remove_if_empty_ruleset(&pf->alast->ruleset); + pf->alast = r.anchor; + } else { + if (!$2) { + yyerror("anchors without explicit " + "rules must specify a name"); + YYERROR; + } + } r.direction = $3; - r.af = $5; - r.prob = $8.prob; + r.quick = $4.quick; + r.af = $6; + r.prob = $9.prob; + r.rtableid = $9.rtableid; - if ($8.match_tag) - if (strlcpy(r.match_tagname, $8.match_tag, + if ($9.match_tag) + if (strlcpy(r.match_tagname, $9.match_tag, PF_TAG_NAME_SIZE) >= PF_TAG_NAME_SIZE) { yyerror("tag too long, max %u chars", PF_TAG_NAME_SIZE - 1); YYERROR; } - r.match_tag_not = $8.match_tag_not; + r.match_tag_not = $9.match_tag_not; - decide_address_family($7.src.host, &r.af); - decide_address_family($7.dst.host, &r.af); + decide_address_family($8.src.host, &r.af); + decide_address_family($8.dst.host, &r.af); - expand_rule(&r, $4, NULL, $6, $7.src_os, - $7.src.host, $7.src.port, $7.dst.host, $7.dst.port, - 0, 0, 0); + expand_rule(&r, $5, NULL, $7, $8.src_os, + $8.src.host, $8.src.port, $8.dst.host, $8.dst.port, + 0, 0, 0, pf->astack[pf->asd + 1] ? + pf->alast->name : $2); + free($2); + pf->astack[pf->asd + 1] = NULL; } - | NATANCHOR string interface af proto fromto { + | NATANCHOR string interface af proto fromto rtable { struct pf_rule r; if (check_rulestate(PFCTL_STATE_NAT)) { @@ -649,19 +780,20 @@ anchorrule : ANCHOR string dir interface af proto fromto filter_opts { YYERROR; } - PREPARE_ANCHOR_RULE(r, $2); - free($2); + memset(&r, 0, sizeof(r)); r.action = PF_NAT; r.af = $4; + r.rtableid = $7; decide_address_family($6.src.host, &r.af); decide_address_family($6.dst.host, &r.af); expand_rule(&r, $3, NULL, $5, $6.src_os, $6.src.host, $6.src.port, $6.dst.host, $6.dst.port, - 0, 0, 0); + 0, 0, 0, $2); + free($2); } - | RDRANCHOR string interface af proto fromto { + | RDRANCHOR string interface af proto fromto rtable { struct pf_rule r; if (check_rulestate(PFCTL_STATE_NAT)) { @@ -669,10 +801,10 @@ anchorrule : ANCHOR string dir interface af proto fromto filter_opts { YYERROR; } - PREPARE_ANCHOR_RULE(r, $2); - free($2); + memset(&r, 0, sizeof(r)); r.action = PF_RDR; r.af = $4; + r.rtableid = $7; decide_address_family($6.src.host, &r.af); decide_address_family($6.dst.host, &r.af); @@ -700,9 +832,10 @@ anchorrule : ANCHOR string dir interface af proto fromto filter_opts { expand_rule(&r, $3, NULL, $5, $6.src_os, $6.src.host, $6.src.port, $6.dst.host, $6.dst.port, - 0, 0, 0); + 0, 0, 0, $2); + free($2); } - | BINATANCHOR string interface af proto fromto { + | BINATANCHOR string interface af proto fromto rtable { struct pf_rule r; if (check_rulestate(PFCTL_STATE_NAT)) { @@ -710,10 +843,10 @@ anchorrule : ANCHOR string dir interface af proto fromto filter_opts { YYERROR; } - PREPARE_ANCHOR_RULE(r, $2); - free($2); + memset(&r, 0, sizeof(r)); r.action = PF_BINAT; r.af = $4; + r.rtableid = $7; if ($5 != NULL) { if ($5->next != NULL) { yyerror("proto list expansion" @@ -734,51 +867,52 @@ anchorrule : ANCHOR string dir interface af proto fromto filter_opts { decide_address_family($6.src.host, &r.af); decide_address_family($6.dst.host, &r.af); - pfctl_add_rule(pf, &r); + pfctl_add_rule(pf, &r, $2); + free($2); } ; loadrule : LOAD ANCHOR string FROM string { - char *t; struct loadanchors *loadanchor; - t = strsep(&$3, ":"); - if (*t == '\0' || $3 == NULL || *$3 == '\0') { - yyerror("anchor '%s' invalid\n", $3); - free(t); - YYERROR; - } - if (strlen(t) >= PF_ANCHOR_NAME_SIZE) { + if (strlen(pf->anchor->name) + 1 + + strlen($3) >= MAXPATHLEN) { yyerror("anchorname %s too long, max %u\n", - t, PF_ANCHOR_NAME_SIZE - 1); - free(t); - YYERROR; - } - if (strlen($3) >= PF_RULESET_NAME_SIZE) { - yyerror("rulesetname %s too long, max %u\n", - $3, PF_RULESET_NAME_SIZE - 1); - free(t); + $3, MAXPATHLEN - 1); + free($3); YYERROR; } - loadanchor = calloc(1, sizeof(struct loadanchors)); if (loadanchor == NULL) err(1, "loadrule: calloc"); - if ((loadanchor->anchorname = strdup(t)) == NULL) - err(1, "loadrule: strdup"); - if ((loadanchor->rulesetname = strdup($3)) == NULL) - err(1, "loadrule: strdup"); + if ((loadanchor->anchorname = malloc(MAXPATHLEN)) == + NULL) + err(1, "loadrule: malloc"); + if (pf->anchor->name[0]) + snprintf(loadanchor->anchorname, MAXPATHLEN, + "%s/%s", pf->anchor->name, $3); + else + strlcpy(loadanchor->anchorname, $3, MAXPATHLEN); if ((loadanchor->filename = strdup($5)) == NULL) err(1, "loadrule: strdup"); TAILQ_INSERT_TAIL(&loadanchorshead, loadanchor, entries); - free(t); /* not $3 */ + free($3); free($5); }; -scrubrule : SCRUB dir logquick interface af proto fromto scrub_opts +scrubaction : no SCRUB { + $$.b2 = $$.w = 0; + if ($1) + $$.b1 = PF_NOSCRUB; + else + $$.b1 = PF_SCRUB; + } + ; + +scrubrule : scrubaction dir logquick interface af proto fromto scrub_opts { struct pf_rule r; @@ -787,10 +921,11 @@ scrubrule : SCRUB dir logquick interface af proto fromto scrub_opts memset(&r, 0, sizeof(r)); - r.action = PF_SCRUB; + r.action = $1.b1; r.direction = $2; r.log = $3.log; + r.logif = $3.logif; if ($3.quick) { yyerror("scrub rules do not support 'quick'"); YYERROR; @@ -815,20 +950,23 @@ scrubrule : SCRUB dir logquick interface af proto fromto scrub_opts r.max_mss = $8.maxmss; if ($8.fragcache) r.rule_flag |= $8.fragcache; + r.rtableid = $8.rtableid; expand_rule(&r, $4, NULL, $6, $7.src_os, $7.src.host, $7.src.port, $7.dst.host, $7.dst.port, - NULL, NULL, NULL); + NULL, NULL, NULL, ""); } ; scrub_opts : { - bzero(&scrub_opts, sizeof scrub_opts); - } + bzero(&scrub_opts, sizeof scrub_opts); + scrub_opts.rtableid = -1; + } scrub_opts_l { $$ = scrub_opts; } | /* empty */ { bzero(&scrub_opts, sizeof scrub_opts); + scrub_opts.rtableid = -1; $$ = scrub_opts; } ; @@ -878,6 +1016,8 @@ scrub_opt : NODF { } | REASSEMBLE STRING { if (strcasecmp($2, "tcp") != 0) { + yyerror("scrub reassemble supports only tcp, " + "not '%s'", $2); free($2); YYERROR; } @@ -895,6 +1035,9 @@ scrub_opt : NODF { } scrub_opts.randomid = 1; } + | RTABLE number { + scrub_opts.rtableid = $2; + } ; fragcache : FRAGMENT REASSEMBLE { $$ = 0; /* default */ } @@ -904,7 +1047,7 @@ fragcache : FRAGMENT REASSEMBLE { $$ = 0; /* default */ } antispoof : ANTISPOOF logquick antispoof_ifspc af antispoof_opts { struct pf_rule r; - struct node_host *h = NULL; + struct node_host *h = NULL, *hh; struct node_if *i, *j; if (check_rulestate(PFCTL_STATE_FILTER)) @@ -916,10 +1059,12 @@ antispoof : ANTISPOOF logquick antispoof_ifspc af antispoof_opts { r.action = PF_DROP; r.direction = PF_IN; r.log = $2.log; + r.logif = $2.logif; r.quick = $2.quick; r.af = $4; if (rule_label(&r, $5.label)) YYERROR; + r.rtableid = $5.rtableid; j = calloc(1, sizeof(struct node_if)); if (j == NULL) err(1, "antispoof: calloc"); @@ -930,11 +1075,35 @@ antispoof : ANTISPOOF logquick antispoof_ifspc af antispoof_opts { YYERROR; } j->not = 1; - h = ifa_lookup(j->ifname, PFI_AFLAG_NETWORK); + if (i->dynamic) { + h = calloc(1, sizeof(*h)); + if (h == NULL) + err(1, "address: calloc"); + h->addr.type = PF_ADDR_DYNIFTL; + set_ipmask(h, 128); + if (strlcpy(h->addr.v.ifname, i->ifname, + sizeof(h->addr.v.ifname)) >= + sizeof(h->addr.v.ifname)) { + free(h); + yyerror( + "interface name too long"); + YYERROR; + } + hh = malloc(sizeof(*hh)); + if (hh == NULL) + err(1, "address: malloc"); + bcopy(h, hh, sizeof(*hh)); + h->addr.iflags = PFI_AFLAG_NETWORK; + } else { + h = ifa_lookup(j->ifname, + PFI_AFLAG_NETWORK); + hh = NULL; + } if (h != NULL) expand_rule(&r, j, NULL, NULL, NULL, h, - NULL, NULL, NULL, NULL, NULL, NULL); + NULL, NULL, NULL, NULL, NULL, + NULL, ""); if ((i->ifa_flags & IFF_LOOPBACK) == 0) { bzero(&r, sizeof(r)); @@ -946,34 +1115,50 @@ antispoof : ANTISPOOF logquick antispoof_ifspc af antispoof_opts { r.af = $4; if (rule_label(&r, $5.label)) YYERROR; - h = ifa_lookup(i->ifname, 0); + r.rtableid = $5.rtableid; + if (hh != NULL) + h = hh; + else + h = ifa_lookup(i->ifname, 0); if (h != NULL) expand_rule(&r, NULL, NULL, NULL, NULL, h, NULL, NULL, - NULL, NULL, NULL, NULL); - } + NULL, NULL, NULL, NULL, ""); + } else + free(hh); } free($5.label); } ; -antispoof_ifspc : FOR if_item { $$ = $2; } +antispoof_ifspc : FOR antispoof_if { $$ = $2; } | FOR '{' antispoof_iflst '}' { $$ = $3; } ; -antispoof_iflst : if_item { $$ = $1; } - | antispoof_iflst comma if_item { +antispoof_iflst : antispoof_if { $$ = $1; } + | antispoof_iflst comma antispoof_if { $1->tail->next = $3; $1->tail = $3; $$ = $1; } ; -antispoof_opts : { bzero(&antispoof_opts, sizeof antispoof_opts); } +antispoof_if : if_item { $$ = $1; } + | '(' if_item ')' { + $2->dynamic = 1; + $$ = $2; + } + ; + +antispoof_opts : { + bzero(&antispoof_opts, sizeof antispoof_opts); + antispoof_opts.rtableid = -1; + } antispoof_opts_l { $$ = antispoof_opts; } | /* empty */ { bzero(&antispoof_opts, sizeof antispoof_opts); + antispoof_opts.rtableid = -1; $$ = antispoof_opts; } ; @@ -989,6 +1174,9 @@ antispoof_opt : label { } antispoof_opts.label = $1; } + | RTABLE number { + antispoof_opts.rtableid = $2; + } ; not : '!' { $$ = 1; } @@ -1049,6 +1237,7 @@ table_opt : STRING { else if (!strcmp($1, "persist")) table_opts.flags |= PFR_TFLAG_PERSIST; else { + yyerror("invalid table option '%s'", $1); free($1); YYERROR; } @@ -1074,6 +1263,10 @@ table_opt : STRING { yyerror("\"no-route\" is not permitted " "inside tables"); break; + case PF_ADDR_URPFFAILED: + yyerror("\"urpf-failed\" is not " + "permitted inside tables"); + break; default: yyerror("unknown address type %d", n->addr.type); @@ -1376,14 +1569,15 @@ hfscopts_item : LINKSHARE bandwidth { hfsc_opts.linkshare.m2 = $2; hfsc_opts.linkshare.used = 1; } - | LINKSHARE '(' bandwidth number bandwidth ')' { + | LINKSHARE '(' bandwidth comma number comma bandwidth ')' + { if (hfsc_opts.linkshare.used) { yyerror("linkshare already specified"); YYERROR; } hfsc_opts.linkshare.m1 = $3; - hfsc_opts.linkshare.d = $4; - hfsc_opts.linkshare.m2 = $5; + hfsc_opts.linkshare.d = $5; + hfsc_opts.linkshare.m2 = $7; hfsc_opts.linkshare.used = 1; } | REALTIME bandwidth { @@ -1394,14 +1588,15 @@ hfscopts_item : LINKSHARE bandwidth { hfsc_opts.realtime.m2 = $2; hfsc_opts.realtime.used = 1; } - | REALTIME '(' bandwidth number bandwidth ')' { + | REALTIME '(' bandwidth comma number comma bandwidth ')' + { if (hfsc_opts.realtime.used) { yyerror("realtime already specified"); YYERROR; } hfsc_opts.realtime.m1 = $3; - hfsc_opts.realtime.d = $4; - hfsc_opts.realtime.m2 = $5; + hfsc_opts.realtime.d = $5; + hfsc_opts.realtime.m2 = $7; hfsc_opts.realtime.used = 1; } | UPPERLIMIT bandwidth { @@ -1412,14 +1607,15 @@ hfscopts_item : LINKSHARE bandwidth { hfsc_opts.upperlimit.m2 = $2; hfsc_opts.upperlimit.used = 1; } - | UPPERLIMIT '(' bandwidth number bandwidth ')' { + | UPPERLIMIT '(' bandwidth comma number comma bandwidth ')' + { if (hfsc_opts.upperlimit.used) { yyerror("upperlimit already specified"); YYERROR; } hfsc_opts.upperlimit.m1 = $3; - hfsc_opts.upperlimit.d = $4; - hfsc_opts.upperlimit.m2 = $5; + hfsc_opts.upperlimit.d = $5; + hfsc_opts.upperlimit.m2 = $7; hfsc_opts.upperlimit.used = 1; } | STRING { @@ -1534,6 +1730,7 @@ pfrule : action dir logquick interface route af proto fromto struct node_proto *proto; int srctrack = 0; int statelock = 0; + int adaptive = 0; int dofree; if (check_rulestate(PFCTL_STATE_FILTER)) @@ -1560,8 +1757,10 @@ pfrule : action dir logquick interface route af proto fromto } r.direction = $2; r.log = $3.log; + r.logif = $3.logif; r.quick = $3.quick; r.prob = $9.prob; + r.rtableid = $9.rtableid; r.af = $6; if ($9.tag) @@ -1579,11 +1778,15 @@ pfrule : action dir logquick interface route af proto fromto YYERROR; } r.match_tag_not = $9.match_tag_not; - r.flags = $9.flags.b1; - r.flagset = $9.flags.b2; if (rule_label(&r, $9.label)) YYERROR; free($9.label); + r.flags = $9.flags.b1; + r.flagset = $9.flags.b2; + if (($9.flags.b1 & $9.flags.b2) != $9.flags.b1) { + yyerror("flags always false"); + YYERROR; + } if ($9.flags.b1 || $9.flags.b2 || $8.src_os) { for (proto = $7; proto != NULL && proto->proto != IPPROTO_TCP; @@ -1651,6 +1854,7 @@ pfrule : action dir logquick interface route af proto fromto YYERROR; } srctrack = o->data.src_track; + r.rule_flag |= PFRULE_SRCTRACK; break; case PF_STATE_OPT_MAX_SRC_STATES: if (r.max_src_states) { @@ -1659,7 +1863,7 @@ pfrule : action dir logquick interface route af proto fromto "multiple definitions"); YYERROR; } - if (o->data.max_src_nodes == 0) { + if (o->data.max_src_states == 0) { yyerror("'max-src-states' must " "be > 0"); YYERROR; @@ -1668,6 +1872,66 @@ pfrule : action dir logquick interface route af proto fromto o->data.max_src_states; r.rule_flag |= PFRULE_SRCTRACK; break; + case PF_STATE_OPT_OVERLOAD: + if (r.overload_tblname[0]) { + yyerror("multiple 'overload' " + "table definitions"); + YYERROR; + } + if (strlcpy(r.overload_tblname, + o->data.overload.tblname, + PF_TABLE_NAME_SIZE) >= + PF_TABLE_NAME_SIZE) { + yyerror("state option: " + "strlcpy"); + YYERROR; + } + r.flush = o->data.overload.flush; + break; + case PF_STATE_OPT_MAX_SRC_CONN: + if (r.max_src_conn) { + yyerror("state option " + "'max-src-conn' " + "multiple definitions"); + YYERROR; + } + if (o->data.max_src_conn == 0) { + yyerror("'max-src-conn' " + "must be > 0"); + YYERROR; + } + r.max_src_conn = + o->data.max_src_conn; + r.rule_flag |= PFRULE_SRCTRACK | + PFRULE_RULESRCTRACK; + break; + case PF_STATE_OPT_MAX_SRC_CONN_RATE: + if (r.max_src_conn_rate.limit) { + yyerror("state option " + "'max-src-conn-rate' " + "multiple definitions"); + YYERROR; + } + if (!o->data.max_src_conn_rate.limit || + !o->data.max_src_conn_rate.seconds) { + yyerror("'max-src-conn-rate' " + "values must be > 0"); + YYERROR; + } + if (o->data.max_src_conn_rate.limit > + PF_THRESHOLD_MAX) { + yyerror("'max-src-conn-rate' " + "maximum rate must be < %u", + PF_THRESHOLD_MAX); + YYERROR; + } + r.max_src_conn_rate.limit = + o->data.max_src_conn_rate.limit; + r.max_src_conn_rate.seconds = + o->data.max_src_conn_rate.seconds; + r.rule_flag |= PFRULE_SRCTRACK | + PFRULE_RULESRCTRACK; + break; case PF_STATE_OPT_MAX_SRC_NODES: if (r.max_src_nodes) { yyerror("state option " @@ -1695,6 +1959,11 @@ pfrule : action dir logquick interface route af proto fromto r.rule_flag |= o->data.statelock; break; case PF_STATE_OPT_TIMEOUT: + if (o->data.timeout.number == + PFTM_ADAPTIVE_START || + o->data.timeout.number == + PFTM_ADAPTIVE_END) + adaptive = 1; if (r.timeout[o->data.timeout.number]) { yyerror("state timeout %s " "multiple definitions", @@ -1714,30 +1983,22 @@ pfrule : action dir logquick interface route af proto fromto free(p); } - /* - * 'flags S/SA' by default on stateful rules if - * the pickup mode is unspecified or disabled. - * - * If the pickup mode is enabled or hashonly we - * want to create state regardless of the flags. - */ +#if 0 + /* 'flags S/SA' by default on stateful rules */ if (!r.action && !r.flags && !r.flagset && !$9.fragment && !($9.marker & FOM_FLAGS) && r.keep_state) { - switch(r.pickup_mode) { - case PF_PICKUPS_UNSPECIFIED: - case PF_PICKUPS_DISABLED: - r.flags = parse_flags("S"); - r.flagset = parse_flags("SA"); - break; - case PF_PICKUPS_HASHONLY: - case PF_PICKUPS_ENABLED: - /* no flag restrictions */ - break; - } + r.flags = parse_flags("S"); + r.flagset = parse_flags("SA"); } - - if (srctrack) { +#endif + if (!adaptive && r.max_states) { + r.timeout[PFTM_ADAPTIVE_START] = + (r.max_states / 10) * 6; + r.timeout[PFTM_ADAPTIVE_END] = + (r.max_states / 10) * 12; + } + if (r.rule_flag & PFRULE_SRCTRACK) { if (srctrack == PF_SRCTRACK_GLOBAL && r.max_src_nodes) { yyerror("'max-src-nodes' is " @@ -1745,6 +2006,24 @@ pfrule : action dir logquick interface route af proto fromto "'source-track global'"); YYERROR; } + if (srctrack == PF_SRCTRACK_GLOBAL && + r.max_src_conn) { + yyerror("'max-src-conn' is " + "incompatible with " + "'source-track global'"); + YYERROR; + } + if (srctrack == PF_SRCTRACK_GLOBAL && + r.max_src_conn_rate.seconds) { + yyerror("'max-src-conn-rate' is " + "incompatible with " + "'source-track global'"); + YYERROR; + } + if (r.timeout[PFTM_SRC_NODE] < + r.max_src_conn_rate.seconds) + r.timeout[PFTM_SRC_NODE] = + r.max_src_conn_rate.seconds; r.rule_flag |= PFRULE_SRCTRACK; if (srctrack == PF_SRCTRACK_RULE) r.rule_flag |= PFRULE_RULESRCTRACK; @@ -1825,15 +2104,19 @@ pfrule : action dir logquick interface route af proto fromto expand_rule(&r, $4, $5.host, $7, $8.src_os, $8.src.host, $8.src.port, $8.dst.host, $8.dst.port, - $9.uid, $9.gid, $9.icmpspec); + $9.uid, $9.gid, $9.icmpspec, ""); } ; -filter_opts : { bzero(&filter_opts, sizeof filter_opts); } +filter_opts : { + bzero(&filter_opts, sizeof filter_opts); + filter_opts.rtableid = -1; + } filter_opts_l { $$ = filter_opts; } | /* empty */ { bzero(&filter_opts, sizeof filter_opts); + filter_opts.rtableid = -1; $$ = filter_opts; } ; @@ -1915,9 +2198,9 @@ filter_opt : USER uids { filter_opts.match_tag = $3; filter_opts.match_tag_not = $1; } - | PROBABILITY STRING { - char *e; - double p = strtod($2, &e); + | PROBABILITY STRING { + char *e; + double p = strtod($2, &e); if (*e == '%') { p *= 0.01; @@ -1937,6 +2220,9 @@ filter_opt : USER uids { filter_opts.prob = (u_int32_t)p; free($2); } + | RTABLE number { + filter_opts.rtableid = $2; + } ; action : PASS { $$.b1 = PF_PASS; $$.b2 = $$.w = 0; } @@ -2018,15 +2304,55 @@ dir : /* empty */ { $$ = 0; } | OUT { $$ = PF_OUT; } ; -logquick : /* empty */ { $$.log = 0; $$.quick = 0; } - | log { $$.log = $1; $$.quick = 0; } - | QUICK { $$.log = 0; $$.quick = 1; } - | log QUICK { $$.log = $1; $$.quick = 1; } - | QUICK log { $$.log = $2; $$.quick = 1; } +quick : /* empty */ { $$.quick = 0; } + | QUICK { $$.quick = 1; } ; -log : LOG { $$ = 1; } - | LOGALL { $$ = 2; } +logquick : /* empty */ { $$.log = 0; $$.quick = 0; $$.logif = 0; } + | log { $$ = $1; $$.quick = 0; } + | QUICK { $$.quick = 1; $$.log = 0; $$.logif = 0; } + | log QUICK { $$ = $1; $$.quick = 1; } + | QUICK log { $$ = $2; $$.quick = 1; } + ; + +log : LOG { $$.log = PF_LOG; $$.logif = 0; } + | LOG '(' logopts ')' { + $$.log = PF_LOG | $3.log; + $$.logif = $3.logif; + } + ; + +logopts : logopt { $$ = $1; } + | logopts comma logopt { + $$.log = $1.log | $3.log; + $$.logif = $3.logif; + if ($$.logif == 0) + $$.logif = $1.logif; + } + ; + +logopt : ALL { $$.log = PF_LOG_ALL; $$.logif = 0; } + | USER { $$.log = PF_LOG_SOCKET_LOOKUP; $$.logif = 0; } + | GROUP { $$.log = PF_LOG_SOCKET_LOOKUP; $$.logif = 0; } + | TO string { + const char *errstr; + u_int i; + + $$.log = 0; + if (strncmp($2, "pflog", 5)) { + yyerror("%s: should be a pflog interface", $2); + free($2); + YYERROR; + } + i = strtonum($2 + 5, 0, 255, &errstr); + if (errstr) { + yyerror("%s: %s", $2, errstr); + free($2); + YYERROR; + } + free($2); + $$.logif = i; + } ; interface : /* empty */ { $$ = NULL; } @@ -2048,11 +2374,6 @@ if_item_not : not if_item { $$ = $2; $$->not = $1; } if_item : STRING { struct node_host *n; - if ((n = ifa_exists($1, 1)) == NULL) { - yyerror("unknown interface %s", $1); - free($1); - YYERROR; - } $$ = calloc(1, sizeof(struct node_if)); if ($$ == NULL) err(1, "if_item: calloc"); @@ -2063,8 +2384,11 @@ if_item : STRING { yyerror("interface name too long"); YYERROR; } + + if ((n = ifa_exists($1)) != NULL) + $$->ifa_flags = n->ifa_flags; + free($1); - $$->ifa_flags = n->ifa_flags; $$->not = 0; $$->next = NULL; $$->tail = $$; @@ -2175,6 +2499,9 @@ to : /* empty */ { $$.port = NULL; } | TO ipportspec { + if (disallow_urpf_failed($2.host, "\"urpf-failed\" is " + "not permitted in a destination address")) + YYERROR; $$ = $2; } ; @@ -2198,8 +2525,8 @@ ipspec : ANY { $$ = NULL; } | '{' host_list '}' { $$ = $2; } ; -host_list : xhost { $$ = $1; } - | host_list comma xhost { +host_list : ipspec { $$ = $1; } + | host_list comma ipspec { if ($3 == NULL) $$ = $1; else if ($1 == NULL) @@ -2219,12 +2546,22 @@ xhost : not host { n->not = $1; $$ = $2; } - | NOROUTE { + | not NOROUTE { $$ = calloc(1, sizeof(struct node_host)); if ($$ == NULL) err(1, "xhost: calloc"); $$->addr.type = PF_ADDR_NOROUTE; $$->next = NULL; + $$->not = $1; + $$->tail = $$; + } + | not URPFFAILED { + $$ = calloc(1, sizeof(struct node_host)); + if ($$ == NULL) + err(1, "xhost: calloc"); + $$->addr.type = PF_ADDR_URPFFAILED; + $$->next = NULL; + $$->not = $1; $$->tail = $$; } ; @@ -2279,6 +2616,26 @@ host : STRING { $$->next = NULL; $$->tail = $$; } + | ROUTE STRING { + $$ = calloc(1, sizeof(struct node_host)); + if ($$ == NULL) { + free($2); + err(1, "host: calloc"); + } + $$->addr.type = PF_ADDR_RTLABEL; + if (strlcpy($$->addr.v.rtlabelname, $2, + sizeof($$->addr.v.rtlabelname)) >= + sizeof($$->addr.v.rtlabelname)) { + yyerror("route label too long, max %u chars", + sizeof($$->addr.v.rtlabelname) - 1); + free($2); + free($$); + YYERROR; + } + $$->next = NULL; + $$->tail = $$; + free($2); + } ; number : STRING { @@ -2299,6 +2656,11 @@ dynaddr : '(' STRING ')' { char *p, *op; op = $2; + if (!isalpha(op[0])) { + yyerror("invalid interface name '%s'", op); + free(op); + YYERROR; + } while ((p = strrchr($2, ':')) != NULL) { if (!strcmp(p+1, "network")) flags |= PFI_AFLAG_NETWORK; @@ -2322,11 +2684,6 @@ dynaddr : '(' STRING ')' { "interface modifiers"); YYERROR; } - if (ifa_exists($2, 1) == NULL && strcmp($2, "self")) { - yyerror("interface %s does not exist", $2); - free(op); - YYERROR; - } $$ = calloc(1, sizeof(struct node_host)); if ($$ == NULL) err(1, "address: calloc"); @@ -2407,31 +2764,13 @@ port_item : port { port : STRING { char *p = strchr($1, ':'); - struct servent *s = NULL; - u_long ulval; if (p == NULL) { - if (atoul($1, &ulval) == 0) { - if (ulval > 65535) { - free($1); - yyerror("illegal port value %d", - ulval); - YYERROR; - } - $$.a = htons(ulval); - } else { - s = getservbyname($1, "tcp"); - if (s == NULL) - s = getservbyname($1, "udp"); - if (s == NULL) { - yyerror("unknown port %s", $1); - free($1); - YYERROR; - } - $$.a = s->s_port; + if (($$.a = getservice($1)) == -1) { + free($1); + YYERROR; } - $$.b = 0; - $$.t = 0; + $$.b = $$.t = 0; } else { int port[2]; @@ -2628,6 +2967,7 @@ flag : STRING { flags : FLAGS flag '/' flag { $$.b1 = $2.b1; $$.b2 = $4.b1; } | FLAGS '/' flag { $$.b1 = 0; $$.b2 = $3.b1; } + | FLAGS ANY { $$.b1 = 0; $$.b2 = 0; } ; icmpspec : ICMPTYPE icmp_item { $$ = $2; } @@ -2669,7 +3009,7 @@ icmp_item : icmptype { if (atoul($3, &ulval) == 0) { if (ulval > 255) { free($3); - yyerror("illegal icmp-code %d", ulval); + yyerror("illegal icmp-code %lu", ulval); YYERROR; } } else { @@ -2709,7 +3049,7 @@ icmp6_item : icmp6type { if (atoul($3, &ulval) == 0) { if (ulval > 255) { - yyerror("illegal icmp6-code %ld", + yyerror("illegal icmp6-code %lu", ulval); free($3); YYERROR; @@ -2741,7 +3081,7 @@ icmptype : STRING { if (atoul($1, &ulval) == 0) { if (ulval > 255) { - yyerror("illegal icmp-type %d", ulval); + yyerror("illegal icmp-type %lu", ulval); free($1); YYERROR; } @@ -2765,7 +3105,8 @@ icmp6type : STRING { if (atoul($1, &ulval) == 0) { if (ulval > 255) { - yyerror("illegal icmp6-type %d", ulval); + yyerror("illegal icmp6-type %lu", + ulval); free($1); YYERROR; } @@ -2811,9 +3152,6 @@ sourcetrack : SOURCETRACK { $$ = PF_SRCTRACK; } statelock : IFBOUND { $$ = PFRULE_IFBOUND; } - | GRBOUND { - $$ = PFRULE_GRBOUND; - } | FLOATING { $$ = 0; } @@ -2837,6 +3175,13 @@ keep : NO STATE { } ; +flush : /* empty */ { $$ = 0; } + | FLUSH { $$ = PF_FLUSH; } + | FLUSH GLOBAL { + $$ = PF_FLUSH | PF_FLUSH_GLOBAL; + } + ; + state_opt_spec : '(' state_opt_list ')' { $$ = $2; } | /* empty */ { $$ = NULL; } ; @@ -2875,6 +3220,43 @@ state_opt_item : MAXIMUM number { $$->next = NULL; $$->tail = $$; } + | MAXSRCCONN number { + $$ = calloc(1, sizeof(struct node_state_opt)); + if ($$ == NULL) + err(1, "state_opt_item: calloc"); + $$->type = PF_STATE_OPT_MAX_SRC_CONN; + $$->data.max_src_conn = $2; + $$->next = NULL; + $$->tail = $$; + } + | MAXSRCCONNRATE number '/' number { + $$ = calloc(1, sizeof(struct node_state_opt)); + if ($$ == NULL) + err(1, "state_opt_item: calloc"); + $$->type = PF_STATE_OPT_MAX_SRC_CONN_RATE; + $$->data.max_src_conn_rate.limit = $2; + $$->data.max_src_conn_rate.seconds = $4; + $$->next = NULL; + $$->tail = $$; + } + | OVERLOAD '<' STRING '>' flush { + if (strlen($3) >= PF_TABLE_NAME_SIZE) { + yyerror("table name '%s' too long", $3); + free($3); + YYERROR; + } + $$ = calloc(1, sizeof(struct node_state_opt)); + if ($$ == NULL) + err(1, "state_opt_item: calloc"); + if (strlcpy($$->data.overload.tblname, $3, + PF_TABLE_NAME_SIZE) >= PF_TABLE_NAME_SIZE) + errx(1, "state_opt_item: strlcpy"); + free($3); + $$->type = PF_STATE_OPT_OVERLOAD; + $$->data.overload.flush = $5; + $$->next = NULL; + $$->tail = $$; + } | MAXSRCNODES number { $$ = calloc(1, sizeof(struct node_state_opt)); if ($$ == NULL) @@ -3165,29 +3547,41 @@ redirection : /* empty */ { $$ = NULL; } } ; -natpass : /* empty */ { $$ = 0; } - | PASS { $$ = 1; } +natpass : /* empty */ { $$.b1 = $$.b2 = 0; } + | PASS { $$.b1 = 1; $$.b2 = 0; } + | PASS log { $$.b1 = 1; $$.b2 = $2.log; $$.w2 = $2.logif; } ; nataction : no NAT natpass { - $$.b2 = $$.w = 0; + if ($1 && $3.b1) { + yyerror("\"pass\" not valid with \"no\""); + YYERROR; + } if ($1) $$.b1 = PF_NONAT; else $$.b1 = PF_NAT; - $$.b2 = $3; + $$.b2 = $3.b1; + $$.w = $3.b2; + $$.w2 = $3.w2; } | no RDR natpass { - $$.b2 = $$.w = 0; + if ($1 && $3.b1) { + yyerror("\"pass\" not valid with \"no\""); + YYERROR; + } if ($1) $$.b1 = PF_NORDR; else $$.b1 = PF_RDR; - $$.b2 = $3; + $$.b2 = $3.b1; + $$.w = $3.b2; + $$.w2 = $3.w2; } ; -natrule : nataction interface af proto fromto tag redirpool pool_opts +natrule : nataction interface af proto fromto tag tagged rtable + redirpool pool_opts { struct pf_rule r; @@ -3198,6 +3592,8 @@ natrule : nataction interface af proto fromto tag redirpool pool_opts r.action = $1.b1; r.natpass = $1.b2; + r.log = $1.w; + r.logif = $1.w2; r.af = $3; if (!r.af) { @@ -3217,46 +3613,56 @@ natrule : nataction interface af proto fromto tag redirpool pool_opts YYERROR; } + if ($7.name) + if (strlcpy(r.match_tagname, $7.name, + PF_TAG_NAME_SIZE) >= PF_TAG_NAME_SIZE) { + yyerror("tag too long, max %u chars", + PF_TAG_NAME_SIZE - 1); + YYERROR; + } + r.match_tag_not = $7.neg; + r.rtableid = $8; + if (r.action == PF_NONAT || r.action == PF_NORDR) { - if ($7 != NULL) { + if ($9 != NULL) { yyerror("translation rule with 'no' " "does not need '->'"); YYERROR; } } else { - if ($7 == NULL || $7->host == NULL) { + if ($9 == NULL || $9->host == NULL) { yyerror("translation rule requires '-> " "address'"); YYERROR; } - if (!r.af && ! $7->host->ifindex) - r.af = $7->host->af; + if (!r.af && ! $9->host->ifindex) + r.af = $9->host->af; - remove_invalid_hosts(&$7->host, &r.af); - if (invalid_redirect($7->host, r.af)) + remove_invalid_hosts(&$9->host, &r.af); + if (invalid_redirect($9->host, r.af)) YYERROR; - if (check_netmask($7->host, r.af)) + if (check_netmask($9->host, r.af)) YYERROR; - r.rpool.proxy_port[0] = ntohs($7->rport.a); + r.rpool.proxy_port[0] = ntohs($9->rport.a); switch (r.action) { case PF_RDR: - if (!$7->rport.b && $7->rport.t && + if (!$9->rport.b && $9->rport.t && $5.dst.port != NULL) { r.rpool.proxy_port[1] = - ntohs($7->rport.a) + + ntohs($9->rport.a) + (ntohs( $5.dst.port->port[1]) - ntohs( $5.dst.port->port[0])); } else r.rpool.proxy_port[1] = - ntohs($7->rport.b); + ntohs($9->rport.b); break; case PF_NAT: r.rpool.proxy_port[1] = - ntohs($7->rport.b); + ntohs($9->rport.b); if (!r.rpool.proxy_port[0] && !r.rpool.proxy_port[1]) { r.rpool.proxy_port[0] = @@ -3271,25 +3677,25 @@ natrule : nataction interface af proto fromto tag redirpool pool_opts break; } - r.rpool.opts = $8.type; + r.rpool.opts = $10.type; if ((r.rpool.opts & PF_POOL_TYPEMASK) == - PF_POOL_NONE && ($7->host->next != NULL || - $7->host->addr.type == PF_ADDR_TABLE || - DYNIF_MULTIADDR($7->host->addr))) + PF_POOL_NONE && ($9->host->next != NULL || + $9->host->addr.type == PF_ADDR_TABLE || + DYNIF_MULTIADDR($9->host->addr))) r.rpool.opts = PF_POOL_ROUNDROBIN; if ((r.rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN && - disallow_table($7->host, "tables are only " + disallow_table($9->host, "tables are only " "supported in round-robin redirection " "pools")) YYERROR; if ((r.rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN && - disallow_alias($7->host, "interface (%s) " + disallow_alias($9->host, "interface (%s) " "is only supported in round-robin " "redirection pools")) YYERROR; - if ($7->host->next != NULL) { + if ($9->host->next != NULL) { if ((r.rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) { yyerror("only round-robin " @@ -3300,14 +3706,14 @@ natrule : nataction interface af proto fromto tag redirpool pool_opts } } - if ($8.key != NULL) - memcpy(&r.rpool.key, $8.key, + if ($10.key != NULL) + memcpy(&r.rpool.key, $10.key, sizeof(struct pf_poolhashkey)); - if ($8.opts) - r.rpool.opts |= $8.opts; + if ($10.opts) + r.rpool.opts |= $10.opts; - if ($8.staticport) { + if ($10.staticport) { if (r.action != PF_NAT) { yyerror("the 'static-port' option is " "only valid with nat rules"); @@ -3326,36 +3732,46 @@ natrule : nataction interface af proto fromto tag redirpool pool_opts r.rpool.proxy_port[1] = 0; } - expand_rule(&r, $2, $7 == NULL ? NULL : $7->host, $4, + expand_rule(&r, $2, $9 == NULL ? NULL : $9->host, $4, $5.src_os, $5.src.host, $5.src.port, $5.dst.host, - $5.dst.port, 0, 0, 0); - free($7); + $5.dst.port, 0, 0, 0, ""); + free($9); } ; binatrule : no BINAT natpass interface af proto FROM host TO ipspec tag - redirection + tagged rtable redirection { struct pf_rule binat; struct pf_pooladdr *pa; if (check_rulestate(PFCTL_STATE_NAT)) YYERROR; + if (disallow_urpf_failed($10, "\"urpf-failed\" is not " + "permitted as a binat destination")) + YYERROR; memset(&binat, 0, sizeof(binat)); + if ($1 && $3.b1) { + yyerror("\"pass\" not valid with \"no\""); + YYERROR; + } if ($1) binat.action = PF_NOBINAT; else binat.action = PF_BINAT; - binat.natpass = $3; + binat.natpass = $3.b1; + binat.log = $3.b2; + binat.logif = $3.w2; binat.af = $5; if (!binat.af && $8 != NULL && $8->af) binat.af = $8->af; if (!binat.af && $10 != NULL && $10->af) binat.af = $10->af; - if (!binat.af && $12 != NULL && $12->host) - binat.af = $12->host->af; + + if (!binat.af && $14 != NULL && $14->host) + binat.af = $14->host->af; if (!binat.af) { yyerror("address family (inet/inet6) " "undefined"); @@ -3368,6 +3784,7 @@ binatrule : no BINAT natpass interface af proto FROM host TO ipspec tag binat.ifnot = $4->not; free($4); } + if ($11 != NULL) if (strlcpy(binat.tagname, $11, PF_TAG_NAME_SIZE) >= PF_TAG_NAME_SIZE) { @@ -3375,6 +3792,15 @@ binatrule : no BINAT natpass interface af proto FROM host TO ipspec tag PF_TAG_NAME_SIZE - 1); YYERROR; } + if ($12.name) + if (strlcpy(binat.match_tagname, $12.name, + PF_TAG_NAME_SIZE) >= PF_TAG_NAME_SIZE) { + yyerror("tag too long, max %u chars", + PF_TAG_NAME_SIZE - 1); + YYERROR; + } + binat.match_tag_not = $12.neg; + binat.rtableid = $13; if ($6 != NULL) { binat.proto = $6->proto; @@ -3388,12 +3814,12 @@ binatrule : no BINAT natpass interface af proto FROM host TO ipspec tag "interface (%s) as the source address of a binat " "rule")) YYERROR; - if ($12 != NULL && $12->host != NULL && disallow_table( - $12->host, "invalid use of table <%s> as the " + if ($14 != NULL && $14->host != NULL && disallow_table( + $14->host, "invalid use of table <%s> as the " "redirect address of a binat rule")) YYERROR; - if ($12 != NULL && $12->host != NULL && disallow_alias( - $12->host, "invalid use of interface (%s) as the " + if ($14 != NULL && $14->host != NULL && disallow_alias( + $14->host, "invalid use of interface (%s) as the " "redirect address of a binat rule")) YYERROR; @@ -3427,38 +3853,38 @@ binatrule : no BINAT natpass interface af proto FROM host TO ipspec tag YYERROR; memcpy(&binat.dst.addr, &$10->addr, sizeof(binat.dst.addr)); - binat.dst.not = $10->not; + binat.dst.neg = $10->not; free($10); } if (binat.action == PF_NOBINAT) { - if ($12 != NULL) { + if ($14 != NULL) { yyerror("'no binat' rule does not need" " '->'"); YYERROR; } } else { - if ($12 == NULL || $12->host == NULL) { + if ($14 == NULL || $14->host == NULL) { yyerror("'binat' rule requires" " '-> address'"); YYERROR; } - remove_invalid_hosts(&$12->host, &binat.af); - if (invalid_redirect($12->host, binat.af)) + remove_invalid_hosts(&$14->host, &binat.af); + if (invalid_redirect($14->host, binat.af)) YYERROR; - if ($12->host->next != NULL) { + if ($14->host->next != NULL) { yyerror("binat rule must redirect to " "a single address"); YYERROR; } - if (check_netmask($12->host, binat.af)) + if (check_netmask($14->host, binat.af)) YYERROR; if (!PF_AZERO(&binat.src.addr.v.a.mask, binat.af) && !PF_AEQ(&binat.src.addr.v.a.mask, - &$12->host->addr.v.a.mask, binat.af)) { + &$14->host->addr.v.a.mask, binat.af)) { yyerror("'binat' source mask and " "redirect mask must be the same"); YYERROR; @@ -3468,15 +3894,15 @@ binatrule : no BINAT natpass interface af proto FROM host TO ipspec tag pa = calloc(1, sizeof(struct pf_pooladdr)); if (pa == NULL) err(1, "binat: calloc"); - pa->addr = $12->host->addr; + pa->addr = $14->host->addr; pa->ifname[0] = 0; TAILQ_INSERT_TAIL(&binat.rpool.list, pa, entries); - free($12); + free($14); } - pfctl_add_rule(pf, &binat); + pfctl_add_rule(pf, &binat, ""); } ; @@ -3484,18 +3910,21 @@ tag : /* empty */ { $$ = NULL; } | TAG STRING { $$ = $2; } ; +tagged : /* empty */ { $$.neg = 0; $$.name = NULL; } + | not TAGGED string { $$.neg = $1; $$.name = $3; } + ; + +rtable : /* empty */ { $$ = -1; } + | RTABLE number { + $$ = $2; + } + ; + route_host : STRING { $$ = calloc(1, sizeof(struct node_host)); if ($$ == NULL) err(1, "route_host: calloc"); $$->ifname = $1; - if (ifa_exists($$->ifname, 0) == NULL) { - yyerror("routeto: unknown interface %s", - $$->ifname); - free($1); - free($$); - YYERROR; - } set_ipmask($$, 128); $$->next = NULL; $$->tail = $$; @@ -3503,11 +3932,6 @@ route_host : STRING { | '(' STRING host ')' { $$ = $3; $$->ifname = $2; - if (ifa_exists($$->ifname, 0) == NULL) { - yyerror("routeto: unknown interface %s", - $$->ifname); - YYERROR; - } } ; @@ -3610,6 +4034,8 @@ yesno : NO { $$ = 0; } if (!strcmp($1, "yes")) $$ = 1; else { + yyerror("invalid value '%s', expected 'yes' " + "or 'no'", $1); free($1); YYERROR; } @@ -3652,6 +4078,17 @@ disallow_table(struct node_host *h, const char *fmt) return (0); } +int +disallow_urpf_failed(struct node_host *h, const char *fmt) +{ + for (; h != NULL; h = h->next) + if (h->addr.type == PF_ADDR_URPFFAILED) { + yyerror(fmt); + return (1); + } + return (0); +} + int disallow_alias(struct node_host *h, const char *fmt) { @@ -3664,7 +4101,7 @@ disallow_alias(struct node_host *h, const char *fmt) } int -rule_consistent(struct pf_rule *r) +rule_consistent(struct pf_rule *r, int anchor_call __unused) { int problems = 0; @@ -3672,7 +4109,8 @@ rule_consistent(struct pf_rule *r) case PF_PASS: case PF_DROP: case PF_SCRUB: - problems = filter_consistent(r); + case PF_NOSCRUB: + problems = filter_consistent(r, anchor_call); break; case PF_NAT: case PF_NONAT: @@ -3691,7 +4129,7 @@ rule_consistent(struct pf_rule *r) } int -filter_consistent(struct pf_rule *r) +filter_consistent(struct pf_rule *r, int anchor_call __unused) { int problems = 0; @@ -3709,6 +4147,12 @@ filter_consistent(struct pf_rule *r) yyerror("must indicate address family with icmp-type/code"); problems++; } + if (r->overload_tblname[0] && + r->max_src_conn == 0 && r->max_src_conn_rate.seconds == 0) { + yyerror("'overload' requires 'max-src-conn' " + "or 'max-src-conn-rate'"); + problems++; + } if ((r->proto == IPPROTO_ICMP && r->af == AF_INET6) || (r->proto == IPPROTO_ICMPV6 && r->af == AF_INET)) { yyerror("proto %s doesn't match address family %s", @@ -3737,11 +4181,6 @@ filter_consistent(struct pf_rule *r) yyerror("keep state on block rules doesn't make sense"); problems++; } - if ((r->tagname[0] || r->match_tagname[0]) && !r->keep_state && - r->action == PF_PASS && !r->anchorname[0]) { - yyerror("tags cannot be used without keep state"); - problems++; - } return (-problems); } @@ -3809,7 +4248,7 @@ process_tabledef(char *name, struct table_opts *opts) &opts->init_nodes); if (!(pf->opts & PF_OPT_NOACTION) && pfctl_define_table(name, opts->flags, opts->init_addr, - pf->anchor, pf->ruleset, &ab, pf->tticket)) { + pf->anchor->name, &ab, pf->anchor->ruleset.tticket)) { yyerror("cannot define table %s: %s", name, pfr_strerror(errno)); goto _error; @@ -3908,6 +4347,9 @@ expand_label_addr(const char *name, char *label, size_t len, sa_family_t af, case PF_ADDR_NOROUTE: snprintf(tmp, sizeof(tmp), "no-route"); break; + case PF_ADDR_URPFFAILED: + snprintf(tmp, sizeof(tmp), "urpf-failed"); + break; case PF_ADDR_ADDRMASK: if (!af || (PF_AZERO(&h->addr.v.a.addr, af) && PF_AZERO(&h->addr.v.a.mask, af))) @@ -3998,7 +4440,7 @@ expand_label_nr(const char *name, char *label, size_t len) char n[11]; if (strstr(label, name) != NULL) { - snprintf(n, sizeof(n), "%u", pf->rule_nr); + snprintf(n, sizeof(n), "%u", pf->anchor->match); expand_label_str(label, len, name, n); } } @@ -4276,7 +4718,8 @@ expand_rule(struct pf_rule *r, struct node_proto *protos, struct node_os *src_oses, struct node_host *src_hosts, struct node_port *src_ports, struct node_host *dst_hosts, struct node_port *dst_ports, - struct node_uid *uids, struct node_gid *gids, struct node_icmp *icmp_types) + struct node_uid *uids, struct node_gid *gids, struct node_icmp *icmp_types, + const char *anchor_call) { sa_family_t af = r->af; int added = 0, error = 0; @@ -4329,11 +4772,12 @@ expand_rule(struct pf_rule *r, r->af = dst_host->af; if (*interface->ifname) - memcpy(r->ifname, interface->ifname, sizeof(r->ifname)); + strlcpy(r->ifname, interface->ifname, + sizeof(r->ifname)); else if (if_indextoname(src_host->ifindex, ifname)) - memcpy(r->ifname, ifname, sizeof(r->ifname)); + strlcpy(r->ifname, ifname, sizeof(r->ifname)); else if (if_indextoname(dst_host->ifindex, ifname)) - memcpy(r->ifname, ifname, sizeof(r->ifname)); + strlcpy(r->ifname, ifname, sizeof(r->ifname)); else memset(r->ifname, '\0', sizeof(r->ifname)); @@ -4360,12 +4804,12 @@ expand_rule(struct pf_rule *r, r->ifnot = interface->not; r->proto = proto->proto; r->src.addr = src_host->addr; - r->src.not = src_host->not; + r->src.neg = src_host->not; r->src.port[0] = src_port->port[0]; r->src.port[1] = src_port->port[1]; r->src.port_op = src_port->op; r->dst.addr = dst_host->addr; - r->dst.not = dst_host->not; + r->dst.neg = dst_host->not; r->dst.port[0] = dst_port->port[0]; r->dst.port[1] = dst_port->port[1]; r->dst.port_op = dst_port->op; @@ -4424,11 +4868,11 @@ expand_rule(struct pf_rule *r, TAILQ_INSERT_TAIL(&r->rpool.list, pa, entries); } - if (rule_consistent(r) < 0 || error) + if (rule_consistent(r, anchor_call[0]) < 0 || error) yyerror("skipping rule due to errors"); else { - r->nr = pf->rule_nr++; - pfctl_add_rule(pf, r); + r->nr = pf->astack[pf->asd]->match++; + pfctl_add_rule(pf, r, anchor_call); added++; } @@ -4450,6 +4894,42 @@ expand_rule(struct pf_rule *r, yyerror("rule expands to no valid combination"); } +int +expand_skip_interface(struct node_if *interfaces) +{ + int errs = 0; + + if (!interfaces || (!interfaces->next && !interfaces->not && + !strcmp(interfaces->ifname, "none"))) { + if (pf->opts & PF_OPT_VERBOSE) + printf("set skip on none\n"); + errs = pfctl_set_interface_flags(pf, __DECONST(char *, ""), PFI_IFLAG_SKIP, 0); + return (errs); + } + + if (pf->opts & PF_OPT_VERBOSE) + printf("set skip on {"); + LOOP_THROUGH(struct node_if, interface, interfaces, + if (pf->opts & PF_OPT_VERBOSE) + printf(" %s", interface->ifname); + if (interface->not) { + yyerror("skip on ! is not supported"); + errs++; + } else + errs += pfctl_set_interface_flags(pf, + interface->ifname, PFI_IFLAG_SKIP, 1); + ); + if (pf->opts & PF_OPT_VERBOSE) + printf(" }\n"); + + FREE_LIST(struct node_if, interfaces); + + if (errs) + return (1); + else + return (0); +} + #undef FREE_LIST #undef LOOP_THROUGH @@ -4502,12 +4982,12 @@ lookup(char *s) { "fingerprints", FINGERPRINTS}, { "flags", FLAGS}, { "floating", FLOATING}, + { "flush", FLUSH}, { "for", FOR}, { "fragment", FRAGMENT}, { "from", FROM}, { "global", GLOBAL}, { "group", GROUP}, - { "group-bound", GRBOUND}, { "hash-only", HASHONLY}, { "hfsc", HFSC}, { "hogs", HOGS}, @@ -4525,10 +5005,11 @@ lookup(char *s) { "linkshare", LINKSHARE}, { "load", LOAD}, { "log", LOG}, - { "log-all", LOGALL}, { "loginterface", LOGINTERFACE}, { "max", MAXIMUM}, { "max-mss", MAXMSS}, + { "max-src-conn", MAXSRCCONN}, + { "max-src-conn-rate", MAXSRCCONNRATE}, { "max-src-nodes", MAXSRCNODES}, { "max-src-states", MAXSRCSTATES}, { "min-ttl", MINTTL}, @@ -4544,6 +5025,7 @@ lookup(char *s) { "optimization", OPTIMIZATION}, { "os", OS}, { "out", OUT}, + { "overload", OVERLOAD}, { "pass", PASS}, { "pickups", PICKUPS}, { "port", PORT}, @@ -4567,10 +5049,14 @@ lookup(char *s) { "return-icmp6", RETURNICMP6}, { "return-rst", RETURNRST}, { "round-robin", ROUNDROBIN}, + { "route", ROUTE}, { "route-to", ROUTETO}, + { "rtable", RTABLE}, { "rule", RULE}, + { "ruleset-optimization", RULESET_OPTIMIZATION}, { "scrub", SCRUB}, { "set", SET}, + { "skip", SKIP}, { "source-hash", SOURCEHASH}, { "source-track", SOURCETRACK}, { "state", STATE}, @@ -4587,6 +5073,7 @@ lookup(char *s) { "tos", TOS}, { "ttl", TTL}, { "upperlimit", UPPERLIMIT}, + { "urpf-failed", URPFFAILED}, { "user", USER}, }; const struct keywords *p; @@ -4634,9 +5121,7 @@ lgetc(FILE *f) while ((c = getc(f)) == '\\') { next = getc(f); if (next != '\n') { - if (isspace(next)) - yyerror("whitespace after \\"); - ungetc(next, f); + c = next; break; } yylval.lineno = lineno; @@ -4924,21 +5409,40 @@ symget(const char *nam) } void -decide_address_family(struct node_host *n, sa_family_t *af) +mv_rules(struct pf_ruleset *src, struct pf_ruleset *dst) { - sa_family_t target_af = 0; + int i; + struct pf_rule *r; + + for (i = 0; i < PF_RULESET_MAX; ++i) { + while ((r = TAILQ_FIRST(src->rules[i].active.ptr)) + != NULL) { + TAILQ_REMOVE(src->rules[i].active.ptr, r, entries); + TAILQ_INSERT_TAIL(dst->rules[i].active.ptr, r, entries); + dst->anchor->match++; + } + src->anchor->match = 0; + while ((r = TAILQ_FIRST(src->rules[i].inactive.ptr)) + != NULL) { + TAILQ_REMOVE(src->rules[i].inactive.ptr, r, entries); + TAILQ_INSERT_TAIL(dst->rules[i].inactive.ptr, + r, entries); + } + } +} - while (!*af && n != NULL) { - if (n->af) { - if (target_af == 0) - target_af = n->af; - if (target_af != n->af) - return; +void +decide_address_family(struct node_host *n, sa_family_t *af) +{ + if (*af != 0 || n == NULL) + return; + *af = n->af; + while ((n = n->next) != NULL) { + if (n->af != *af) { + *af = 0; + return; } - n = n->next; } - if (!*af && target_af) - *af = target_af; } void @@ -5022,7 +5526,7 @@ getservice(char *n) if (atoul(n, &ulval) == 0) { if (ulval > 65535) { - yyerror("illegal port value %d", ulval); + yyerror("illegal port value %lu", ulval); return (-1); } return (htons(ulval)); @@ -5072,26 +5576,30 @@ parseicmpspec(char *w, sa_family_t af) ulval = p->code; } if (ulval > 255) { - yyerror("invalid icmp code %ld", ulval); + yyerror("invalid icmp code %lu", ulval); return (0); } return (icmptype << 8 | ulval); } int -pfctl_load_anchors(int dev, int opts, struct pfr_buffer *trans) +pfctl_load_anchors(int dev, struct pfctl *his_pf, struct pfr_buffer *trans) { struct loadanchors *la; + FILE *his_fin; TAILQ_FOREACH(la, &loadanchorshead, entries) { - if (opts & PF_OPT_VERBOSE) - fprintf(stderr, "\nLoading anchor %s:%s from %s\n", - la->anchorname, la->rulesetname, la->filename); - if (pfctl_rules(dev, la->filename, opts, la->anchorname, - la->rulesetname, trans) == -1) + if (his_pf->opts & PF_OPT_VERBOSE) + fprintf(stderr, "\nLoading anchor %s from %s\n", + la->anchorname, la->filename); + if ((his_fin = pfctl_fopen(la->filename, "r")) == NULL) { + warn("%s", la->filename); + continue; + } + if (pfctl_rules(dev, la->filename, his_fin, his_pf->opts, his_pf->optimize, + la->anchorname, trans) == -1) return (-1); } return (0); } - diff --git a/usr.sbin/pfctl/pf.conf.5 b/usr.sbin/pfctl/pf.conf.5 index c8423a6d95..7ee7dcb4ed 100644 --- a/usr.sbin/pfctl/pf.conf.5 +++ b/usr.sbin/pfctl/pf.conf.5 @@ -28,7 +28,7 @@ .\" ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" -.Dd April 6, 2008 +.Dd August 4, 2010 .Dt PF.CONF 5 .Os .Sh NAME @@ -63,8 +63,7 @@ Queueing provides rule-based bandwidth control. Translation rules specify how addresses are to be mapped or redirected to other addresses. .It Cm Packet Filtering -Stateful and stateless packet filtering provides rule-based blocking or -passing of packets. +Packet filtering provides rule-based blocking or passing of packets. .El .Pp With the exception of @@ -81,11 +80,7 @@ enforces this order (see .Ar set require-order below). .Sh MACROS -Much like -.Xr cpp 1 -or -.Xr m4 1 , -macros can be defined that will later be expanded in context. +Macros can be defined that will later be expanded in context. Macro names must start with a letter, and may contain letters, digits and underscores. Macro names may not be reserved words (for example @@ -98,8 +93,8 @@ For example, .Bd -literal -offset indent ext_if = \&"kue0\&" all_ifs = \&"{\&" $ext_if lo0 \&"}\&" -pass out on $ext_if from any to any keep state -pass in on $ext_if proto tcp from any to any port 25 keep state +pass out on $ext_if from any to any +pass in on $ext_if proto tcp from any to any port 25 .Ed .Sh TABLES Tables are named structures which can hold a collection of addresses and @@ -182,9 +177,9 @@ when running with .Pp For example, .Bd -literal -offset indent -table const { 10/8, 172.16/12, 192.168/16 } -table persist -block on fxp0 from { , } to any +table \*(Ltprivate\*(Gt const { 10/8, 172.16/12, 192.168/16 } +table \*(Ltbadhosts\*(Gt persist +block on fxp0 from { \*(Ltprivate\*(Gt, \*(Ltbadhosts\*(Gt } to any .Ed .Pp creates a table called private, to hold RFC 1918 private network @@ -202,8 +197,8 @@ these hosts can be blocked by using A table can also be initialized with an address list specified in one or more external files, using the following syntax: .Bd -literal -offset indent -table persist file \&"/etc/spammers\&" file \&"/etc/openrelays\&" -block on fxp0 from to any +table \*(Ltspam\*(Gt persist file \&"/etc/spammers\&" file \&"/etc/openrelays\&" +block on fxp0 from \*(Ltspam\*(Gt to any .Ed .Pp The files @@ -218,7 +213,7 @@ When the resolver is called to add a hostname to a table, .Em all resulting IPv4 and IPv6 addresses are placed into the table. IP addresses can also be entered in a table by specifying a valid interface -name or the +name, a valid interface group or the .Em self keyword, in which case all addresses assigned to the interface(s) will be added to the table. @@ -230,7 +225,7 @@ command. .Bl -tag -width xxxx .It Ar set timeout .Pp -.Bl -tag -width interval -compact +.Bl -tag -width "src.track" -compact .It Ar interval Interval between purging expired states and fragments. .It Ar frag @@ -311,7 +306,12 @@ This value is used to define the scale factor, it should not actually be reached (set a lower state limit, see below). .El .Pp -These values can be defined both globally and for each rule. +Adaptive timeouts are enabled by default, with an adaptive.start value +equal to 60% of the state limit, and an adaptive.end value equal to +120% of the state limit. +They can be disabled by setting both adaptive.start and adaptive.end to 0. +.Pp +The adaptive timeout values can be defined both globally and for each rule. When used on a per-rule basis, the values relate to the number of states created by the rule, otherwise to the total number of states. @@ -359,8 +359,10 @@ set limit states 20000 .Pp sets the maximum number of entries in the memory pool used by state table entries (generated by -.Ar keep state -rules) to 20000. +.Ar pass +rules which do not specify +.Ar no state ) +to 20000. Using .Bd -literal -offset indent set limit frags 20000 @@ -370,7 +372,7 @@ sets the maximum number of entries in the memory pool used for fragment reassembly (generated by .Ar scrub rules) to 20000. -Finally, +Using .Bd -literal -offset indent set limit src-nodes 2000 .Ed @@ -379,16 +381,58 @@ sets the maximum number of entries in the memory pool used for tracking source IP addresses (generated by the .Ar sticky-address and -.Ar source-track +.Ar src.track options) to 2000. .Pp -These can be combined: +sets limits on the memory pools used by tables. +The first limits the number of tables that can exist to 1000. +The second limits the overall number of addresses that can be stored +in tables to 100000. +.Pp +Various limits can be combined on a single line: .Bd -literal -offset indent set limit { states 20000, frags 20000, src-nodes 2000 } .Ed .Pp +.It Ar set ruleset-optimization +.Bl -tag -width xxxxxxxx -compact +.It Ar none +Disable the ruleset optimizer. +This is the default behaviour. +.It Ar basic +Enable basic ruleset optimization, which does four things to improve the +performance of ruleset evaluations: +.Pp +.Bl -enum -compact +.It +remove duplicate rules +.It +remove rules that are a subset of another rule +.It +combine multiple rules into a table when advantageous +.It +re-order the rules to improve evaluation performance +.El +.Pp +.It Ar profile +Uses the currently loaded ruleset as a feedback profile to tailor the +ordering of quick rules to actual network traffic. +.El +.Pp +It is important to note that the ruleset optimizer will modify the ruleset +to improve performance. +A side effect of the ruleset modification is that per-rule accounting +statistics will have different meanings than before. +If per-rule accounting is important for billing purposes or whatnot, +either the ruleset optimizer should not be used or a label field should +be added to all of the accounting rules to act as optimization barriers. +.Pp +Optimization can also be set as a command-line argument to +.Xr pfctl 8 , +overriding the settings in +.Nm . .It Ar set optimization -Optimize the engine for one of the following network environments: +Optimize state timeouts for one of the following network environments: .Pp .Bl -tag -width xxxx -compact .It Ar normal @@ -465,8 +509,6 @@ option sets the default behaviour for states: .Bl -tag -width group-bound -compact .It Ar if-bound States are bound to interface. -.It Ar group-bound -States are bound to interface group (i.e., ppp) .It Ar floating States can match packets on any interfaces (the default). .El @@ -475,6 +517,21 @@ For example: .Bd -literal -offset indent set state-policy if-bound .Ed +.It Ar set hostid +The 32-bit +.Ar hostid +identifies this firewall's state table entries to other firewalls +in a +.Xr pfsync 4 +failover cluster. +By default the hostid is set to a pseudo-random value, however it may be +desirable to manually configure it, for example to more easily identify the +source of state table entries. +.Bd -literal -offset indent +set hostid 1 +.Ed +.Pp +The hostid may be specified in either decimal or hexadecimal. .It Ar set require-order By default .Xr pfctl 8 @@ -506,6 +563,16 @@ For example: .Pp .Dl set fingerprints \&"/etc/pf.os.devel\&" .Pp +.It Ar set skip on Aq Ar ifspec +List interfaces for which packets should not be filtered. +Packets passing in or out on such interfaces are passed as if pf was +disabled, i.e. pf does not process them in any way. +This can be useful on loopback and other virtual interfaces, when +packet filtering is not desired and can have unexpected effects. +For example: +.Pp +.Dl set skip on lo0 +.Pp .It Ar set debug Set the debug .Ar level @@ -563,14 +630,14 @@ Using the modifier (see below) is recommended in combination with the .Ar no-df modifier to ensure unique IP identifiers. -.It Ar min-ttl +.It Ar min-ttl Aq Ar number Enforces a minimum TTL for matching IP packets. -.It Ar max-mss +.It Ar max-mss Aq Ar number Enforces a maximum MSS for matching TCP packets. .It Ar random-id Replaces the IP identification field with random values to compensate for predictable values generated by many hosts. -This option only applies to outgoing packets that are not fragmented +This option only applies to packets that are not fragmented after the optional fragment reassembly. .It Ar fragment reassemble Using @@ -624,7 +691,7 @@ the firewall state, and expires before reaching the destination host. .Ar reassemble tcp will raise the TTL of all packets back up to the highest value seen on the connection. -.It timeout modulation +.It timestamp modulation Modern TCP stacks will send a timestamp on every TCP packet and echo the other endpoint's timestamp back to them. Many operating systems will merely start the timestamp at zero when @@ -641,6 +708,24 @@ guessable base time. will cause .Ar scrub to modulate the TCP timestamps with a random number. +.It extended PAWS checks +There is a problem with TCP on long fat pipes, in that a packet might get +delayed for longer than it takes the connection to wrap its 32-bit sequence +space. +In such an occurrence, the old packet would be indistinguishable from a +new packet and would be accepted as such. +The solution to this is called PAWS: Protection Against Wrapped Sequence +numbers. +It protects against it by making sure the timestamp on each packet does +not go backwards. +.Ar reassemble tcp +also makes sure the timestamp on the packet does not go forward more +than the RFC allows. +By doing this, +.Xr pf 4 +artificially extends the security of TCP sequence numbers by 10 to 18 +bits when the host uses appropriately randomized timestamps, since a +blind attacker would have to guess the timestamp as well. .El .El .Pp @@ -648,6 +733,15 @@ For example, .Bd -literal -offset indent scrub in on $ext_if all fragment reassemble .Ed +.Pp +The +.Ar no +option prefixed to a scrub rule causes matching packets to remain unscrubbed, +much in the same way as +.Ar drop quick +works in the packet filter (see below). +This mechanism should be used when it is necessary to exclude specific packets +from broader scrub rules. .Sh QUEUEING Packets can be assigned to queues for the purpose of bandwidth control. @@ -687,6 +781,18 @@ assigned. mainly controls the time packets take to get sent out, while .Ar bandwidth has primarily effects on throughput. +.Ar cbq +achieves both partitioning and sharing of link bandwidth +by hierarchically structured classes. +Each class has its own +.Ar queue +and is assigned its share of +.Ar bandwidth . +A child class can borrow bandwidth from its parent class +as long as excess bandwidth is available +(see the option +.Ar borrow , +below). .It Ar priq Priority Queueing. .Ar Queues @@ -720,6 +826,14 @@ assigned. mainly controls the time packets take to get sent out, while .Ar bandwidth has primarily effects on throughput. +.Ar hfsc +supports both link-sharing and guaranteed real-time services. +It employs a service curve based QoS model, +and its unique feature is an ability to decouple +.Ar delay +and +.Ar bandwidth +allocation. .It Ar fairq Fair Queue. .Ar Queues @@ -792,9 +906,9 @@ declaration. .Ar altq on has the following keywords: .Bl -tag -width xxxx -.It Ar +.It Aq Ar interface Queueing is enabled on the named interface. -.It Ar +.It Aq Ar scheduler Specifies which queueing scheduler to use. Currently supported values are @@ -824,7 +938,9 @@ gigabits per second, respectively. The value must not exceed the interface bandwidth. If .Ar bandwidth -is not specified, the interface bandwidth is used. +is not specified, the interface bandwidth is used +but take note that some interfaces do not know their bandwidth, +or can adapt their bandwidth rates). .Pp When used with .Ar fairq , @@ -833,11 +949,11 @@ specifies a guaranteed minimum but the fairq is allowed to exceed it. .It Ar qlimit The maximum number of packets held in the queue. The default is 50. -.It Ar tbrsize +.It Ar tbrsize Aq Ar size Adjusts the size, in bytes, of the token bucket regulator. If not specified, heuristics based on the interface bandwidth are used to determine the size. -.It Ar queue +.It Ar queue Aq Ar list Defines a list of subqueues to create on an interface. .El .Pp @@ -868,15 +984,16 @@ in a parent declaration. The following keywords can be used: .Bl -tag -width xxxx -.It Ar on +.It Ar on Aq Ar interface Specifies the interface the queue operates on. If not given, it operates on all matching interfaces. -.It Ar bandwidth +.It Ar bandwidth Aq Ar bw Specifies the maximum bitrate to be processed by the queue. This value must not exceed the value of the parent .Ar queue and can be specified as an absolute value or a percentage of the parent queue's bandwidth. +If not specified, defaults to 100% of the parent queue's bandwidth. The .Ar priq scheduler does not support bandwidth specification. @@ -884,7 +1001,7 @@ The .Ar fairq scheduler uses the bandwidth specification as a guaranteed minimum and may exceed it. -.It Ar priority +.It Ar priority Aq Ar level Between queues a priority level can be set. For .Ar cbq , @@ -904,7 +1021,7 @@ bandwidth specification. and .Ar hfsc queues with a higher priority are preferred in the case of overload. -.It Ar qlimit +.It Ar qlimit Aq Ar limit The maximum number of packets held in the queue. The default is 50. When used with a @@ -915,7 +1032,9 @@ this specified the maximum number of packets held per bucket. The .Ar scheduler can get additional parameters with -.Ar "()" . +.Xo Aq Ar scheduler +.Pf ( Aq Ar parameters ) . +.Xc Parameters are as follows: .Bl -tag -width Fl .It Ar default @@ -969,15 +1088,15 @@ The .Ar scheduler supports some additional options: .Bl -tag -width Fl -.It Ar realtime +.It Ar realtime Aq Ar sc The minimum required bandwidth for the queue. -.It Ar upperlimit +.It Ar upperlimit Aq Ar sc The maximum allowed bandwidth for the queue. -.It Ar linkshare +.It Ar linkshare Aq Ar sc The bandwidth share of a backlogged queue. .El .Pp -.Ar +.Aq Ar sc is an acronym for .Ar service curve . .Pp @@ -1039,18 +1158,18 @@ queue developers bandwidth 75% cbq(borrow) queue employees bandwidth 15% queue mail bandwidth 10% priority 0 cbq(borrow ecn) queue ssh bandwidth 20% cbq(borrow) { ssh_interactive, ssh_bulk } -queue ssh_interactive priority 7 -queue ssh_bulk priority 0 +queue ssh_interactive bandwidth 50% priority 7 cbq(borrow) +queue ssh_bulk bandwidth 50% priority 0 cbq(borrow) block return out on dc0 inet all queue std pass out on dc0 inet proto tcp from $developerhosts to any port 80 \e - keep state queue developers + queue developers pass out on dc0 inet proto tcp from $employeehosts to any port 80 \e - keep state queue employees + queue employees pass out on dc0 inet proto tcp from any to any port 22 \e - keep state queue(ssh_bulk, ssh_interactive) + queue(ssh_bulk, ssh_interactive) pass out on dc0 inet proto tcp from any to any port 25 \e - keep state queue mail + queue mail .Ed .Sh TRANSLATION Translation rules modify either the source or destination address of the @@ -1110,9 +1229,9 @@ The packet is redirected to another destination and possibly a different port. .Ar rdr rules can optionally specify port ranges instead of single ports. -rdr ... port 2000:2999 -> ... port 4000 +rdr ... port 2000:2999 -\*(Gt ... port 4000 redirects ports 2000 to 2999 (inclusive) to port 4000. -rdr ... port 2000:2999 -> ... port 4000:* +rdr ... port 2000:2999 -\*(Gt ... port 4000:* redirects port 2000 to 4000, 2001 to 4001, ..., 2999 to 4999. .El .Pp @@ -1130,8 +1249,17 @@ Port numbers are never translated with a .Ar binat rule. .Pp -For each packet processed by the translator, the translation rules are -evaluated in sequential order, from first to last. +Evaluation order of the translation rules is dependent on the type +of the translation rules and of the direction of a packet. +.Ar binat +rules are always evaluated first. +Then either the +.Ar rdr +rules are evaluated on an inbound packet or the +.Ar nat +rules on an outbound packet. +Rules of the same type are evaluated in the same order in which they +appear in the ruleset. The first matching rule decides what action is taken. .Pp The @@ -1157,7 +1285,7 @@ or to the firewall itself. Note that redirecting external incoming connections to the loopback address, as in .Bd -literal -offset indent -rdr on ne3 inet proto tcp to port 8025 -> 127.0.0.1 port 25 +rdr on ne3 inet proto tcp to port spamd -\*(Gt 127.0.0.1 port smtp .Ed .Pp will effectively allow an external host to connect to daemons @@ -1193,6 +1321,8 @@ assigned to queues for the purpose of bandwidth control. For each packet processed by the packet filter, the filter rules are evaluated in sequential order, from first to last. The last matching rule decides what action is taken. +If no rule matches the packet, the default action is to pass +the packet. .Pp The following actions can be used in the filter: .Bl -tag -width xxxx @@ -1227,28 +1357,126 @@ This causes a TCP RST to be returned for packets and an ICMP UNREACHABLE for UDP and other packets. .El .Pp -Options returning packets have no effect if +Options returning ICMP packets currently have no effect if .Xr pf 4 operates on a -.Xr bridge 4 . +.Xr bridge 4 , +as the code to support this feature has not yet been implemented. +.Pp +The simplest mechanism to block everything by default and only pass +packets that match explicit rules is specify a first filter rule of: +.Bd -literal -offset indent +block all +.Ed .It Ar pass -The packet is passed. +The packet is passed; +state is created state unless the +.Ar no state +option is specified. .El .Pp -If no rule matches the packet, the default action is -.Ar pass . +By default +.Xr pf 4 +filters packets statefully; the first time a packet matches a +.Ar pass +rule, a state entry is created; for subsequent packets the filter checks +whether the packet matches any state. +If it does, the packet is passed without evaluation of any rules. +After the connection is closed or times out, the state entry is automatically +removed. .Pp -To block everything by default and only pass packets -that match explicit rules, one uses +This has several advantages. +For TCP connections, comparing a packet to a state involves checking +its sequence numbers, as well as TCP timestamps if a +.Ar scrub reassemble tcp +rule applies to the connection. +If these values are outside the narrow windows of expected +values, the packet is dropped. +This prevents spoofing attacks, such as when an attacker sends packets with +a fake source address/port but does not know the connection's sequence +numbers. +Similarly, +.Xr pf 4 +knows how to match ICMP replies to states. +For example, .Bd -literal -offset indent -block all +pass out inet proto icmp all icmp-type echoreq +.Ed +.It Ar pass +The packet is passed; +state is created state unless the +.Ar no state +option is specified. +.El +.Pp +By default +.Xr pf 4 +filters packets statefully; the first time a packet matches a +.Ar pass +rule, a state entry is created; for subsequent packets the filter checks +whether the packet matches any state. +If it does, the packet is passed without evaluation of any rules. +After the connection is closed or times out, the state entry is automatically +removed. +.Pp +This has several advantages. +For TCP connections, comparing a packet to a state involves checking +its sequence numbers, as well as TCP timestamps if a +.Ar scrub reassemble tcp +rule applies to the connection. +If these values are outside the narrow windows of expected +values, the packet is dropped. +This prevents spoofing attacks, such as when an attacker sends packets with +a fake source address/port but does not know the connection's sequence +numbers. +Similarly, +.Xr pf 4 +knows how to match ICMP replies to states. +For example, +.Bd -literal -offset indent +pass out inet proto icmp all icmp-type echoreq .Ed .Pp -as the first filter rule. +allows echo requests (such as those created by +.Xr ping 8 ) +out statefully, and matches incoming echo replies correctly to states. +.Pp +Also, looking up states is usually faster than evaluating rules. +If there are 50 rules, all of them are evaluated sequentially in O(n). +Even with 50000 states, only 16 comparisons are needed to match a +state, since states are stored in a binary search tree that allows +searches in O(log2 n). +.Pp +Furthermore, correct handling of ICMP error messages is critical to +many protocols, particularly TCP. +.Xr pf 4 +matches ICMP error messages to the correct connection, checks them against +connection parameters, and passes them if appropriate. +For example if an ICMP source quench message referring to a stateful TCP +connection arrives, it will be matched to the state and get passed. +.Pp +Finally, state tracking is required for +.Ar nat , binat No and Ar rdr +rules, in order to track address and port translations and reverse the +translation on returning packets. +.Pp +.Xr pf 4 +will also create state for other protocols which are effectively stateless by +nature. +UDP packets are matched to states using only host addresses and ports, +and other protocols are matched to states using only the host addresses. .Pp +If stateless filtering of individual packets is desired, +the +.Ar no state +keyword can be used to specify that state will not be created +if this is the last matching rule. +A number of parameters can also be set to affect how +.Xr pf 4 +handles state tracking. See -.Sx FILTER EXAMPLES -below. +.Sx STATEFUL TRACKING OPTIONS +below for further details. .Sh PARAMETERS The rule parameters specify the packets to which a rule applies. A packet always comes in on, or goes out through, one interface. @@ -1268,22 +1496,14 @@ nor are specified, the rule will match packets in both directions. .It Ar log In addition to the action specified, a log message is generated. -All packets for that connection are logged, unless the -.Ar keep state , -.Ar modulate state -or -.Ar synproxy state -options are specified, in which case only the -packet that establishes the state is logged. -(See -.Ar keep state , -.Ar modulate state -and -.Ar synproxy state -below). -The logged packets are sent to the +Only the packet that establishes the state is logged, +unless the +.Ar no state +option is specified. +The logged packets are sent to a .Xr pflog 4 -interface. +interface, by default +.Ar pflog0 . This interface is monitored by the .Xr pflogd 8 logging daemon, which dumps the logged packets to the file @@ -1291,35 +1511,48 @@ logging daemon, which dumps the logged packets to the file in .Xr pcap 3 binary format. -.It Ar log-all -Used with -.Ar keep state , -.Ar modulate state -or -.Ar synproxy state -rules to force logging of all packets for a connection. +.It Ar log (all) +Used to force logging of all packets for a connection. +This is not necessary when +.Ar no state +is explicitly specified. As with .Ar log , packets are logged to .Xr pflog 4 . +.It Ar log (user) +Logs the +.Ux +user ID of the user that owns the socket and the PID of the process that +has the socket open where the packet is sourced from or destined to +(depending on which socket is local). +This is in addition to the normal information logged. +.It Ar log (to Aq Ar interface ) +Send logs to the specified +.Xr pflog 4 +interface instead of +.Ar pflog0 . .It Ar quick If a packet matches a rule which has the .Ar quick option set, this rule is considered the last matching rule, and evaluation of subsequent rules is skipped. -.It Ar on +.It Ar on Aq Ar interface This rule applies only to packets coming in on, or going out through, this -particular interface. -It is also possible to simply give the interface driver name, like ppp or fxp, -to make the rule match packets flowing through a group of interfaces. -.It Ar +particular interface or interface group. +For more information on interface groups, +see the +.Ic group +keyword in +.Xr ifconfig 8 . +.It Aq Ar af This rule applies only to packets of this address family. Supported values are .Ar inet and .Ar inet6 . -.It Ar proto +.It Ar proto Aq Ar protocol This rule applies only to packets of this protocol. Common protocols are .Xr icmp 4 , @@ -1332,8 +1565,11 @@ For a list of all the protocol name to number mappings used by see the file .Pa /etc/protocols . .It Xo -.Ar from port os -.Ar to port +.Ar from Aq Ar source +.Ar port Aq Ar source +.Ar os Aq Ar source +.Ar to Aq Ar dest +.Ar port Aq Ar dest .Xc This rule applies only to packets with the specified source and destination addresses and ports. @@ -1341,12 +1577,23 @@ addresses and ports. Addresses can be specified in CIDR notation (matching netblocks), as symbolic host names or interface names, or as any of the following keywords: .Pp -.Bl -tag -width xxxxxxxxxxxx -compact +.Bl -tag -width xxxxxxxxxxxxxx -compact .It Ar any Any address. +.It Ar route Aq Ar label +Any address whose associated route has label +.Aq Ar label . +See +.Xr route 4 +and +.Xr route 8 . .It Ar no-route Any address which is not currently routable. -.It Ar +.It Ar urpf-failed +Any source address that fails a unicast reverse path forwarding (URPF) +check, i.e. packets coming in on an interface other than that which holds +the route back to the packet's source address. +.It Aq Ar table Any address that matches the given table. .El .Pp @@ -1393,30 +1640,33 @@ Ports and ranges of ports are specified by using these operators: .Bd -literal -offset indent = (equal) != (unequal) -< (less than) -<= (less than or equal) -> (greater than) ->= (greater than or equal) +\*(Lt (less than) +\*(Le (less than or equal) +\*(Gt (greater than) +\*(Ge (greater than or equal) : (range including boundaries) ->< (range excluding boundaries) -<> (except range) +\*(Gt\*(Lt (range excluding boundaries) +\*(Lt\*(Gt (except range) .Ed .Pp -><, <> and : +.Sq \*(Gt\*(Lt , +.Sq \*(Lt\*(Gt +and +.Sq \&: are binary operators (they take two arguments). For instance: .Bl -tag -width Fl .It Ar port 2000:2004 means -.Sq all ports \(>= 2000 and \(<= 2004 , +.Sq all ports \*(Ge 2000 and \*(Le 2004 , hence ports 2000, 2001, 2002, 2003 and 2004. -.It Ar port 2000 >< 2004 +.It Ar port 2000 \*(Gt\*(Lt 2004 means -.Sq all ports > 2000 and < 2004 , +.Sq all ports \*(Gt 2000 and \*(Lt 2004 , hence ports 2001, 2002 and 2003. -.It Ar port 2000 <> 2004 +.It Ar port 2000 \*(Lt\*(Gt 2004 means -.Sq all ports < 2000 or > 2004 , +.Sq all ports \*(Lt 2000 or \*(Gt 2004 , hence ports 1-1999 and 2005-65535. .El .Pp @@ -1432,19 +1682,20 @@ The host, port and OS specifications are optional, as in the following examples: .Bd -literal -offset indent pass in all pass in from any to any -pass in proto tcp from any port <= 1024 to any +pass in proto tcp from any port \*(Le 1024 to any pass in proto tcp from any to any port 25 -pass in proto tcp from 10.0.0.0/8 port > 1024 \e +pass in proto tcp from 10.0.0.0/8 port \*(Gt 1024 \e to ! 10.1.2.3 port != ssh -pass in proto tcp from any os "OpenBSD" flags S/SA +pass in proto tcp from any os "OpenBSD" +pass in proto tcp from route "DTAG" .Ed .It Ar all This is equivalent to "from any to any". -.It Ar group +.It Ar group Aq Ar group Similar to .Ar user , this rule only applies to packets of sockets owned by the specified group. -.It Ar user +.It Ar user Aq Ar user This rule only applies to packets of sockets owned by the specified user. For outgoing connections initiated from the firewall, this is the user that opened the connection. @@ -1477,7 +1728,7 @@ can only be used with the operators and .Cm != . Other constructs like -.Cm user >= unknown +.Cm user \*(Ge unknown are invalid. Forwarded packets with unknown user and group ID match only rules that explicitly compare against @@ -1487,29 +1738,37 @@ with the operators or .Cm != . For instance -.Cm user >= 0 +.Cm user \*(Ge 0 does not match forwarded packets. The following example allows only selected users to open outgoing connections: .Bd -literal -offset indent block out proto { tcp, udp } all -pass out proto { tcp, udp } all \e - user { < 1000, dhartmei } keep state +pass out proto { tcp, udp } all user { \*(Lt 1000, dhartmei } .Ed -.It Ar flags / | / +.It Xo Ar flags Aq Ar a +.Pf / Ns Aq Ar b +.No \*(Ba / Ns Aq Ar b +.No \*(Ba any +.Xc This rule only applies to TCP packets that have the flags -.Ar +.Aq Ar a set out of set -.Ar . +.Aq Ar b . Flags not specified in -.Ar +.Aq Ar b are ignored. +For stateful connections, the default is +.Ar flags S/SA . +To indicate that flags should not be checkd at all, specify +.Ar flags any . The flags are: (F)IN, (S)YN, (R)ST, (P)USH, (A)CK, (U)RG, (E)CE, and C(W)R. .Bl -tag -width Fl .It Ar flags S/S Flag SYN is set. The other flags are ignored. .It Ar flags S/SA +This is the default setting for stateful connections. Out of SYN and ACK, exactly SYN may be set. SYN, SYN+PSH and SYN+RST match, but SYN+ACK, ACK and ACK+RST do not. This is more restrictive than the previous example. @@ -1517,14 +1776,73 @@ This is more restrictive than the previous example. If the first set is not specified, it defaults to none. All of SYN, FIN, RST and ACK must be unset. .El -.It Ar icmp-type code -.It Ar icmp6-type code +.Pp +Because +.Ar flags S/SA +is applied by default (unless +.Ar no state +is specified), only the initial SYN packet of a TCP handshake will create +a state for a TCP connection. +It is possible to be less restrictive, and allow state creation from +intermediate +.Pq non-SYN +packets, by specifying +.Ar flags any . +This will cause +.Xr pf 4 +to synchronize to existing connections, for instance +if one flushes the state table. +However, states created from such intermediate packets may be missing +connection details such as the TCP window scaling factor. +States which modify the packet flow, such as those affected by +.Ar nat , binat No or Ar rdr +rules, +.Ar modulate No or Ar synproxy state +options, or scrubbed with +.Ar reassemble tcp +will also not be recoverable from intermediate packets. +Such connections will stall and time out. +.It Xo Ar icmp-type Aq Ar type +.Ar code Aq Ar code +.Xc +.It Xo Ar icmp6-type Aq Ar type +.Ar code Aq Ar code +.Xc This rule only applies to ICMP or ICMPv6 packets with the specified type and code. +Text names for ICMP types and codes are listed in +.Xr icmp 4 +and +.Xr icmp6 4 . This parameter is only valid for rules that cover protocols ICMP or ICMP6. -The protocol and the ICMP type indicator (icmp-type or icmp6-type) +The protocol and the ICMP type indicator +.Po +.Ar icmp-type +or +.Ar icmp6-type +.Pc must match. +.It Xo Ar tos Aq Ar string +.No \*(Ba Aq Ar number +.Xc +This rule applies to packets with the specified +.Em TOS +bits set. +.Em TOS +may be +given as one of +.Ar lowdelay , +.Ar throughput , +.Ar reliability , +or as either hex or decimal. +.Pp +For example, the following rules are identical: +.Bd -literal -offset indent +pass all tos lowdelay +pass all tos 0x10 +pass all tos 16 +.Ed .It Ar allow-opts By default, packets which contain IP options are blocked. When @@ -1539,7 +1857,7 @@ The implicit .Ar pass rule that is used when a packet does not match any rules does not allow IP options. -.It Ar label +.It Ar label Aq Ar string Adds a label (name) to the rule, which can be used to identify the rule. For instance, pfctl -s labels @@ -1568,24 +1886,27 @@ For example: .Bd -literal -offset indent ips = \&"{ 1.2.3.4, 1.2.3.5 }\&" pass in proto tcp from any to $ips \e - port > 1023 label \&"$dstaddr:$dstport\&" + port \*(Gt 1023 label \&"$dstaddr:$dstport\&" .Ed .Pp expands to .Bd -literal -offset indent pass in inet proto tcp from any to 1.2.3.4 \e - port > 1023 label \&"1.2.3.4:>1023\&" + port \*(Gt 1023 label \&"1.2.3.4:\*(Gt1023\&" pass in inet proto tcp from any to 1.2.3.5 \e - port > 1023 label \&"1.2.3.5:>1023\&" + port \*(Gt 1023 label \&"1.2.3.5:\*(Gt1023\&" .Ed .Pp The macro expansion for the .Ar label directive occurs only at configuration file parse time, not during runtime. -.It Ar queue | ( , ) +.It Xo Ar queue Aq Ar queue +.No \*(Ba ( Aq Ar queue , +.Aq Ar queue ) +.Xc Packets matching this rule will be assigned to the specified queue. If two queues are given, packets which have a -.Em tos +.Em TOS of .Em lowdelay and TCP ACKs with no data payload will be assigned to the second one. @@ -1598,7 +1919,7 @@ For example: pass in proto tcp to port 25 queue mail pass in proto tcp to port 22 queue(ssh_bulk, ssh_prio) .Ed -.It Ar tag +.It Ar tag Aq Ar string Packets matching this rule will be tagged with the specified string. The tag acts as an internal marker that can be used to @@ -1613,14 +1934,6 @@ is not the last matching rule. Further matching rules can replace the tag with a new one but will not remove a previously applied tag. A packet is only ever assigned one tag at a time. -.Ar pass -rules that use the -.Ar tag -keyword must also use -.Ar keep state , -.Ar modulate state -or -.Ar synproxy state . Packet tagging can be done during .Ar nat , .Ar rdr , @@ -1628,8 +1941,8 @@ or .Ar binat rules in addition to filter rules. Tags take the same macros as labels (see above). -.It Ar tagged -Used with filter rules to specify that packets must already +.It Ar tagged Aq Ar string +Used with filter or translation rules to specify that packets must already be tagged with the given tag in order to match the rule. Inverse tag matching can also be done by specifying the @@ -1637,7 +1950,10 @@ by specifying the operator before the .Ar tagged keyword. -.It Ar probability +.It Ar rtable Aq Ar number +Used to select an alternate routing table for the routing lookup. +Only effective before the route lookup happened, i.e. when filtering inbound. +.It Ar probability Aq Ar number A probability attribute can be attached to a rule, with a value set between 0 and 1, bounds not included. In that case, the rule will be honoured using the given probability value @@ -1676,7 +1992,7 @@ option is similar to but routes packets that pass in the opposite direction (replies) to the specified interface. Opposite direction is only defined in the context of a state entry, and -.Ar route-to +.Ar reply-to is useful only in rules that create state. It can be used on systems with multiple external connections to route all outgoing packets of a connection through the interface @@ -1759,142 +2075,6 @@ beyond the lifetime of the states, increase the global options with See .Sx STATEFUL TRACKING OPTIONS for more ways to control the source tracking. -.Sh STATEFUL INSPECTION -.Xr pf 4 -is a stateful packet filter, which means it can track the state of -a connection. -Instead of passing all traffic to port 25, for instance, it is possible -to pass only the initial packet, and then begin to keep state. -Subsequent traffic will flow because the filter is aware of the connection. -.Pp -You can turn on stateful inspection on all pass rules by default using -the -.Ar set keep-policy -directive. -Any pass rule may specify or override the stateful inspection -default, including turning it off by specifying -.Ar no state . -.Pp -If a packet matches a -.Ar pass ... keep state -rule, the filter creates a state for this connection and automatically -lets pass all subsequent packets of that connection. -.Pp -Before any rules are evaluated, the filter checks whether the packet -matches any state. -If it does, the packet is passed without evaluation of any rules. -.Pp -States are removed after the connection is closed or has timed out. -.Pp -This has several advantages. -Comparing a packet to a state involves checking its sequence numbers. -If the sequence numbers are outside the narrow windows of expected -values, the packet is dropped. -This prevents spoofing attacks, such as when an attacker sends packets with -a fake source address/port but does not know the connection's sequence -numbers. -.Pp -Also, looking up states is usually faster than evaluating rules. -If there are 50 rules, all of them are evaluated sequentially in O(n). -Even with 50000 states, only 16 comparisons are needed to match a -state, since states are stored in a binary search tree that allows -searches in O(log2 n). -.Pp -For instance: -.Bd -literal -offset indent -block all -pass out proto tcp from any to any flags S/SA keep state -pass in proto tcp from any to any port 25 flags S/SA keep state -.Ed -.Pp -This ruleset blocks everything by default. -Only outgoing connections and incoming connections to port 25 are allowed. -The initial packet of each connection has the SYN -flag set, will be passed and creates state. -All further packets of these connections are passed if they match a state. -.Pp -By default, packets coming in and out of any interface can match a state, -but it is also possible to change that behaviour by assigning states to a -single interface or a group of interfaces. -.Pp -The default policy is specified by the -.Ar state-policy -global option, but this can be adjusted on a per-rule basis by adding one -of the -.Ar if-bound , -.Ar group-bound -or -.Ar floating -keywords to the -.Ar keep state -option. -For example, if a rule is defined as: -.Bd -literal -offset indent -pass out on ppp from any to 10.12/16 keep state (group-bound) -.Ed -.Pp -A state created on ppp0 would match packets an all PPP interfaces, -but not packets flowing through fxp0 or any other interface. -.Pp -Keeping rules -.Ar floating -is the more flexible option when the firewall is in a dynamic routing -environment. -However, this has some security implications since a state created by one -trusted network could allow potentially hostile packets coming in from other -interfaces. -.Pp -Specifying -.Ar flags S/SA -restricts state creation to the initial SYN -packet of the TCP handshake. -One can also be less restrictive, and allow state creation from -intermediate -.Pq non-SYN -packets. -This will cause -.Xr pf 4 -to synchronize to existing connections, for instance -if one flushes the state table. -If you do this you must use the -.Ar pickups -option or -.Ar keep state -will blow up on TCP connections with window scaling turned on. -The -.Ar pickups -option tells keep state to skip sequence space checks on connections -for which no window scaling information is known (meaning it didn't see -the SYN from both directions). -.Pp -For UDP, which is stateless by nature, -.Ar keep state -will create state as well. -UDP packets are matched to states using only host addresses and ports. -.Pp -ICMP messages fall into two categories: ICMP error messages, which always -refer to a TCP or UDP packet, are matched against the referred to connection. -If one keeps state on a TCP connection, and an ICMP source quench message -referring to this TCP connection arrives, it will be matched to the right -state and get passed. -.Pp -For ICMP queries, -.Ar keep state -creates an ICMP state, and -.Xr pf 4 -knows how to match ICMP replies to states. -For example, -.Bd -literal -offset indent -pass out inet proto icmp all icmp-type echoreq keep state -.Ed -.Pp -allows echo requests (such as those created by -.Xr ping 8 ) -out, creates state, and matches incoming echo replies correctly to states. -.Pp -Note: -.Ar nat , binat No and Ar rdr -rules implicitly create state for connections. .Sh STATE MODULATION Much of the security derived from TCP is attributable to how well the initial sequence numbers (ISNs) are chosen. @@ -1917,25 +2097,10 @@ For instance: .Bd -literal -offset indent block all pass out proto tcp from any to any modulate state -pass in proto tcp from any to any port 25 flags S/SA modulate state +pass in proto tcp from any to any port 25 flags S/SFRA modulate state .Ed .Pp -There are two caveats associated with state modulation: -A -.Ar modulate state -rule can not be applied to a pre-existing but unmodulated connection. -Such an application would desynchronize TCP's strict -sequencing between the two endpoints. -Instead, -.Xr pf 4 -will treat the -.Ar modulate state -modifier as a -.Ar keep state -modifier and the pre-existing connection will be inferred without -the protection conferred by modulation. -.Pp -The other caveat affects currently modulated states when the state table +Note that modulated connections will not recover when the state table is lost (firewall reboot, flushing the state table, etc...). .Xr pf 4 will not be able to infer a connection again after the state table flushes @@ -1944,11 +2109,20 @@ When the state is lost, the connection may be left dangling until the respective endpoints time out the connection. It is possible on a fast local network for the endpoints to start an ACK storm while trying to resynchronize after the loss of the modulator. -Using a -.Ar flags S/SA -modifier on +The default +.Ar flags +settings (or a more strict equivalent) should be used on .Ar modulate state -rules between fast networks is suggested to prevent ACK storms. +rules to prevent ACK storms. +.Pp +Note that alternative methods are available +to prevent loss of the state table +and allow for firewall failover. +See +.Xr carp 4 +and +.Xr pfsync 4 +for further information. .Sh SYN PROXY By default, .Xr pf 4 @@ -1974,12 +2148,9 @@ chooses random initial sequence numbers for both handshakes. Once the handshakes are completed, the sequence number modulators (see previous section) are used to translate further packets of the connection. -Hence, .Ar synproxy state includes -.Ar modulate state -and -.Ar keep state . +.Ar modulate state . .Pp Rules with .Ar synproxy @@ -1990,18 +2161,21 @@ operates on a .Pp Example: .Bd -literal -offset indent -pass in proto tcp from any to any port www flags S/SA synproxy state +pass in proto tcp from any to any port www synproxy state .Ed .Sh STATEFUL TRACKING OPTIONS -All three of +A number of options related to stateful tracking can be applied on a +per-rule basis. .Ar keep state , .Ar modulate state and .Ar synproxy state -support the following options: +support these options, and +.Ar keep state +must be specified explicitly to apply options to a rule. .Pp .Bl -tag -width xxxx -compact -.It Ar max +.It Ar max Aq Ar number Limits the number of concurrent states the rule may create. When this limit is reached, further packets matching the rule that would create state are dropped, until existing states time out. @@ -2009,19 +2183,53 @@ create state are dropped, until existing states time out. Prevent state changes for states created by this rule from appearing on the .Xr pfsync 4 interface. -.It Ar +.It Xo Aq Ar timeout +.Aq Ar seconds +.Xc Changes the timeout values used for states created by this rule. +For a list of all valid timeout names, see +.Sx OPTIONS +above. +.El +.Pp +Multiple options can be specified, separated by commas: +.Bd -literal -offset indent +pass in proto tcp from any to any \e + port www keep state \e + (max 100, source-track rule, max-src-nodes 75, \e + max-src-states 3, tcp.established 60, tcp.closing 5) +.Ed .Pp When the .Ar source-track keyword is specified, the number of states per source IP is tracked. +.Pp +.Bl -tag -width xxxx -compact +.It Ar source-track rule +The maximum number of states created by this rule is limited by the rule's +.Ar max-src-nodes +and +.Ar max-src-states +options. +Only state entries created by this particular rule count toward the rule's +limits. +.It Ar source-track global +The number of states created by all rules that use this option is limited. +Each rule can specify different +.Ar max-src-nodes +and +.Ar max-src-states +options, however state entries created by any participating rule count towards +each individual rule's limits. +.El +.Pp The following limits can be set: .Pp -.Bl -tag -width xxxx -compact -.It Ar max-src-nodes +.Bl -tag -width xxxx -compact +.It Ar max-src-nodes Aq Ar number Limits the maximum number of source addresses which can simultaneously have state table entries. -.It Ar max-src-states +.It Ar max-src-states Aq Ar number Limits the maximum number of simultaneous state entries that a single source address can create with this rule. .It Ar pickups @@ -2058,16 +2266,52 @@ TCP flags of S/SA are implied and do not need to explicitly specified. .El .Pp -For a list of all valid timeout names, see -.Sx OPTIONS -above. +For stateful TCP connections, limits on established connections (connections +which have completed the TCP 3-way handshake) can also be enforced +per source IP. .Pp -Multiple options can be specified, separated by commas: -.Bd -literal -pass in proto tcp from any to any \e - port www flags S/SA keep state \e - (max 100, source-track rule, max-src-nodes 75, \e - max-src-states 3, tcp.established 60, tcp.closing 5) +.Bl -tag -width xxxx -compact +.It Ar max-src-conn Aq Ar number +Limits the maximum number of simultaneous TCP connections which have +completed the 3-way handshake that a single host can make. +.It Xo Ar max-src-conn-rate Aq Ar number +.No / Aq Ar seconds +.Xc +Limit the rate of new connections over a time interval. +The connection rate is an approximation calculated as a moving average. +.El +.Pp +Because the 3-way handshake ensures that the source address is not being +spoofed, more aggressive action can be taken based on these limits. +With the +.Ar overload Aq Ar table +state option, source IP addresses which hit either of the limits on +established connections will be added to the named table. +This table can be used in the ruleset to block further activity from +the offending host, redirect it to a tarpit process, or restrict its +bandwidth. +.Pp +The optional +.Ar flush +keyword kills all states created by the matching rule which originate +from the host which exceeds these limits. +The +.Ar global +modifier to the flush command kills all states originating from the +offending host, regardless of which rule created the state. +.Pp +For example, the following rules will protect the webserver against +hosts making more than 100 connections in 10 seconds. +Any host which connects faster than this rate will have its address added +to the +.Aq bad_hosts +table and have all states originating from it flushed. +Any new packets arriving from this host will be dropped unconditionally +by the block rule. +.Bd -literal -offset indent +block quick from \*(Ltbad_hosts\*(Gt +pass in on $ext_if proto tcp to $webserver port www keep state \e + (max-src-conn-rate 100/10, overload \*(Ltbad_hosts\*(Gt flush global) .Ed .El .Sh OPERATING SYSTEM FINGERPRINTING @@ -2125,12 +2369,12 @@ which no operating system fingerprint is known. .Pp Examples: .Bd -literal -offset indent -pass out proto tcp from any os OpenBSD keep state +pass out proto tcp from any os OpenBSD block out proto tcp from any os Doors block out proto tcp from any os "Doors PT" block out proto tcp from any os "Doors PT SP3" block out from any os "unknown" -pass on lo0 proto tcp from any os "OpenBSD 3.3 lo0" keep state +pass on lo0 proto tcp from any os "OpenBSD 3.3 lo0" .Ed .Pp Operating system fingerprinting is limited only to the TCP SYN packet. @@ -2251,116 +2495,128 @@ The timeout value can also be adjusted. .Pp Currently, only IPv4 fragments are supported and IPv6 fragments are blocked unconditionally. -.Sh ANCHORS AND NAMED RULESETS +.Sh ANCHORS Besides the main ruleset, .Xr pfctl 8 -can load named rulesets into +can load rulesets into .Ar anchor attachment points. An .Ar anchor -contains a list of named rulesets. +is a container that can hold rules, address tables, and other anchors. +.Pp An .Ar anchor -has a name which specifies where +has a name which specifies the path where .Xr pfctl 8 -can be used to attach sub-rulesets. -A named ruleset contains filter and translation rules, like the -main ruleset. -The main ruleset can reference +can be used to access the anchor to perform operations on it, such as +attaching child anchors to it or loading rules into it. +Anchors may be nested, with components separated by +.Sq / +characters, similar to how file system hierarchies are laid out. +The main ruleset is actually the default anchor, so filter and +translation rules, for example, may also be contained in any anchor. +.Pp +An anchor can reference another .Ar anchor -attachment points +attachment point using the following kinds of rules: .Bl -tag -width xxxx -.It Ar nat-anchor +.It Ar nat-anchor Aq Ar name Evaluates the .Ar nat -rules of all named rulesets in the specified +rules in the specified .Ar anchor . -.It Ar rdr-anchor +.It Ar rdr-anchor Aq Ar name Evaluates the .Ar rdr -rules of all named rulesets in the specified +rules in the specified .Ar anchor . -.It Ar binat-anchor +.It Ar binat-anchor Aq Ar name Evaluates the .Ar binat -rules of all named rulesets in the specified +rules in the specified .Ar anchor . -.It Ar anchor -Evaluates the filter rules of all named rulesets in the specified +.It Ar anchor Aq Ar name +Evaluates the filter rules in the specified .Ar anchor . -.It Ar load anchor : from -Loads the rules from the specified file into the named -ruleset -.Ar -attached to the anchor -.Ar . +.It Xo Ar load anchor +.Aq Ar name +.Ar from Aq Ar file +.Xc +Loads the rules from the specified file into the +anchor +.Ar name . .El .Pp When evaluation of the main ruleset reaches an .Ar anchor rule, .Xr pf 4 -will proceed to evaluate all rules specified in the -named rulesets attached to that -.Ar anchor . +will proceed to evaluate all rules specified in that anchor. .Pp -Matching filter rules in named rulesets with the +Matching filter and translation rules marked with the .Ar quick -option and matching translation rules are final and abort the -evaluation of both the rules in the +option are final and abort the evaluation of the rules in other +anchors and the main ruleset. +If the .Ar anchor -and the main ruleset. +itself is marked with the +.Ar quick +option, +ruleset evaluation will terminate when the anchor is exited if the packet is +matched by any rule within the anchor. .Pp -Only the main ruleset can contain .Ar anchor -rules. -.Pp -When an +rules are evaluated relative to the anchor in which they are contained. +For example, all +.Ar anchor +rules specified in the main ruleset will reference anchor +attachment points underneath the main ruleset, and .Ar anchor -contains more than one named ruleset, they are evaluated -in the alphabetical order of their names. +rules specified in a file loaded from a +.Ar load anchor +rule will be attached under that anchor point. .Pp -Rules may contain +Rules may be contained in .Ar anchor attachment points which do not contain any rules when the main ruleset -is loaded, and later such named rulesets can be manipulated through +is loaded, and later such anchors can be manipulated through .Xr pfctl 8 -without reloading the main ruleset. +without reloading the main ruleset or other anchors. For example, .Bd -literal -offset indent ext_if = \&"kue0\&" block on $ext_if all anchor spam -pass out on $ext_if all keep state +pass out on $ext_if all pass in on $ext_if proto tcp from any \e - to $ext_if port smtp keep state + to $ext_if port smtp .Ed .Pp blocks all packets on the external interface by default, then evaluates -all rulesets in the +all rules in the .Ar anchor named "spam", and finally passes all outgoing connections and incoming connections to port 25. .Bd -literal -offset indent # echo \&"block in quick from 1.2.3.4 to any\&" \&| \e - pfctl -a spam:manual -f - + pfctl -a spam -f - .Ed .Pp -loads a single ruleset containing a single rule into the +This loads a single rule into the .Ar anchor , which blocks all packets from a specific address. .Pp -The named ruleset can also be populated by adding a +The anchor can also be populated by adding a .Ar load anchor rule after the .Ar anchor rule: .Bd -literal -offset indent anchor spam -load anchor spam:manual from "/etc/pf-spam.conf" +load anchor spam from "/etc/pf-spam.conf" .Ed .Pp When @@ -2369,7 +2625,7 @@ loads .Nm , it will also load all the rules from the file .Pa /etc/pf-spam.conf -into the named ruleset. +into the anchor. .Pp Optionally, .Ar anchor @@ -2380,12 +2636,12 @@ using the same syntax as filter rules. When parameters are used, the .Ar anchor rule is only evaluated for matching packets. -This allows conditional evaluation of named rulesets, like: +This allows conditional evaluation of anchors, like: .Bd -literal -offset indent block on $ext_if all anchor spam proto tcp from any to any port smtp -pass out on $ext_if all keep state -pass in on $ext_if proto tcp from any to $ext_if port smtp keep state +pass out on $ext_if all +pass in on $ext_if proto tcp from any to $ext_if port smtp .Ed .Pp The rules inside @@ -2396,25 +2652,72 @@ packets with destination port 25. Hence, .Bd -literal -offset indent # echo \&"block in quick from 1.2.3.4 to any" \&| \e - pfctl -a spam:manual -f - + pfctl -a spam -f - .Ed .Pp will only block connections from 1.2.3.4 to port 25. -.Sh FILES -.Bl -tag -width "/etc/protocols" -compact -.It Pa /etc/hosts -Host name database. -.It Pa /etc/pf.conf -Default location of the ruleset file. -.It Pa /etc/pf.os -Default location of OS fingerprints. -.It Pa /etc/protocols -Protocol name database. -.It Pa /etc/services -Service name database. -.It Pa /usr/share/examples/pf -Example rulesets. -.El +.Pp +Anchors may end with the asterisk +.Pq Sq * +character, which signifies that all anchors attached at that point +should be evaluated in the alphabetical ordering of their anchor name. +For example, +.Bd -literal -offset indent +anchor "spam/*" +.Ed +.Pp +will evaluate each rule in each anchor attached to the +.Li spam +anchor. +Note that it will only evaluate anchors that are directly attached to the +.Li spam +anchor, and will not descend to evaluate anchors recursively. +.Pp +Since anchors are evaluated relative to the anchor in which they are +contained, there is a mechanism for accessing the parent and ancestor +anchors of a given anchor. +Similar to file system path name resolution, if the sequence +.Dq .. +appears as an anchor path component, the parent anchor of the current +anchor in the path evaluation at that point will become the new current +anchor. +As an example, consider the following: +.Bd -literal -offset indent +# echo ' anchor "spam/allowed" ' | pfctl -f - +# echo -e ' anchor "../banned" \en pass' | \e + pfctl -a spam/allowed -f - +.Ed +.Pp +Evaluation of the main ruleset will lead into the +.Li spam/allowed +anchor, which will evaluate the rules in the +.Li spam/banned +anchor, if any, before finally evaluating the +.Ar pass +rule. +.Pp +Filter rule +.Ar anchors +can also be loaded inline in the ruleset within a brace ('{' '}') delimited +block. +Brace delimited blocks may contain rules or other brace-delimited blocks. +When anchors are loaded this way the anchor name becomes optional. +.Bd -literal -offset indent +anchor "external" on egress { + block + anchor out { + pass proto tcp from any to port { 25, 80, 443 } + } + pass in proto tcp to any port 22 +} +.Ed +.Pp +Since the parser specification for anchor names is a string, any +reference to an anchor name containing solidus +.Pq Sq / +characters will require double quote +.Pq Sq \&" +characters around the anchor name. .Sh TRANSLATION EXAMPLES This example maps incoming requests on port 80 to port 8080, on which a daemon is running (because, for example, it is not run as root, @@ -2424,7 +2727,7 @@ and therefore lacks permission to bind to port 80). ext_if = \&"ne3\&" # map daemon on 8080 to appear to be on 80 -rdr on $ext_if proto tcp from any to any port 80 -> 127.0.0.1 port 8080 +rdr on $ext_if proto tcp from any to any port 80 -\*(Gt 127.0.0.1 port 8080 .Ed .Pp If the @@ -2432,7 +2735,7 @@ If the modifier is given, packets matching the translation rule are passed without inspecting the filter rules: .Bd -literal -rdr pass on $ext_if proto tcp from any to any port 80 -> 127.0.0.1 \e +rdr pass on $ext_if proto tcp from any to any port 80 -\*(Gt 127.0.0.1 \e port 8080 .Ed .Pp @@ -2445,7 +2748,7 @@ network appear as though it is the Internet routable address for the nodes on vlan12. (Thus, 192.168.168.1 can talk to the 192.168.168.0/24 nodes.) .Bd -literal -nat on ! vlan12 from 192.168.168.0/24 to any -> 204.92.77.111 +nat on ! vlan12 from 192.168.168.0/24 to any -\*(Gt 204.92.77.111 .Ed .Pp In the example below, the machine sits between a fake internal 144.19.74.* @@ -2456,7 +2759,7 @@ rule excludes protocol AH from being translated. .Bd -literal # NO NAT no nat on $ext_if proto ah from 144.19.74.0/24 to any -nat on $ext_if from 144.19.74.0/24 to any -> 204.92.77.100 +nat on $ext_if from 144.19.74.0/24 to any -\*(Gt 204.92.77.100 .Ed .Pp In the example below, packets bound for one specific server, as well as those @@ -2465,46 +2768,51 @@ generated by the sysadmins are not proxied; all other connections are. # NO RDR no rdr on $int_if proto { tcp, udp } from any to $server port 80 no rdr on $int_if proto { tcp, udp } from $sysadmins to any port 80 -rdr on $int_if proto { tcp, udp } from any to any port 80 -> 127.0.0.1 \e +rdr on $int_if proto { tcp, udp } from any to any port 80 -\*(Gt 127.0.0.1 \e port 80 .Ed .Pp This longer example uses both a NAT and a redirection. The external interface has the address 157.161.48.183. -On the internal interface, we are running +On localhost, we are running .Xr ftp-proxy 8 , -listening for outbound ftp sessions captured to port 8021. +waiting for FTP sessions to be redirected to it. +The three mandatory anchors for +.Xr ftp-proxy 8 +are omitted from this example; see the +.Xr ftp-proxy 8 +manpage. .Bd -literal # NAT # Translate outgoing packets' source addresses (any protocol). # In this case, any address but the gateway's external address is mapped. -nat on $ext_if inet from ! ($ext_if) to any -> ($ext_if) +nat on $ext_if inet from ! ($ext_if) to any -\*(Gt ($ext_if) # NAT PROXYING # Map outgoing packets' source port to an assigned proxy port instead of # an arbitrary port. # In this case, proxy outgoing isakmp with port 500 on the gateway. -nat on $ext_if inet proto udp from any port = isakmp to any -> ($ext_if) \e +nat on $ext_if inet proto udp from any port = isakmp to any -\*(Gt ($ext_if) \e port 500 # BINAT # Translate outgoing packets' source address (any protocol). # Translate incoming packets' destination address to an internal machine # (bidirectional). -binat on $ext_if from 10.1.2.150 to any -> ($ext_if) +binat on $ext_if from 10.1.2.150 to any -\*(Gt $ext_if # RDR # Translate incoming packets' destination addresses. # As an example, redirect a TCP and UDP port to an internal machine. rdr on $ext_if inet proto tcp from any to ($ext_if) port 8080 \e - -> 10.1.2.151 port 22 + -\*(Gt 10.1.2.151 port 22 rdr on $ext_if inet proto udp from any to ($ext_if) port 8080 \e - -> 10.1.2.151 port 53 + -\*(Gt 10.1.2.151 port 53 # RDR # Translate outgoing ftp control connections to send them to localhost # for proxying with ftp-proxy(8) running on port 8021. -rdr on $int_if proto tcp from any to any port 21 -> 127.0.0.1 port 8021 +rdr on $int_if proto tcp from any to any port 21 -\*(Gt 127.0.0.1 port 8021 .Ed .Pp In this example, a NAT gateway is set up to translate internal addresses @@ -2516,13 +2824,13 @@ network. # Translate outgoing packets' source addresses using an address pool. # A given source address is always translated to the same pool address by # using the source-hash keyword. -nat on $ext_if inet from any to any -> 192.0.2.16/28 source-hash +nat on $ext_if inet from any to any -\*(Gt 192.0.2.16/28 source-hash # RDR ROUND ROBIN # Translate incoming web server connections to a group of web servers on # the internal network. rdr on $ext_if proto tcp from any to any port 80 \e - -> { 10.1.2.155, 10.1.2.160, 10.1.2.161 } round-robin + -\*(Gt { 10.1.2.155, 10.1.2.160, 10.1.2.161 } round-robin .Ed .Sh FILTER EXAMPLES .Bd -literal @@ -2542,6 +2850,10 @@ block return log on $ext_if all # block anything coming from source we have no back routes for block in from no-route to any +# block packets whose ingress interface does not match the one in +# the route back to their source address +block in from urpf-failed to any + # block and log outgoing packets that do not have our address as source, # they are either spoofed or something is misconfigured (NAT disabled, # for instance), we want to be nice and do not send out garbage. @@ -2563,15 +2875,15 @@ block in log quick on $ext_if from { 10.0.0.0/8, 172.16.0.0/12, \e # so replies (like 0/0 for 8/0) will match queries # ICMP error messages (which always refer to a TCP/UDP packet) are # handled by the TCP/UDP states -pass on $ext_if inet proto icmp all icmp-type 8 code 0 keep state +pass on $ext_if inet proto icmp all icmp-type 8 code 0 # UDP # pass out all UDP connections and keep state -pass out on $ext_if proto udp all keep state +pass out on $ext_if proto udp all # pass in certain UDP connections and keep state (DNS) -pass in on $ext_if proto udp from any to any port domain keep state +pass in on $ext_if proto udp from any to any port domain # TCP @@ -2580,18 +2892,19 @@ pass out on $ext_if proto tcp all modulate state # pass in certain TCP connections and keep state (SSH, SMTP, DNS, IDENT) pass in on $ext_if proto tcp from any to any port { ssh, smtp, domain, \e - auth } flags S/SA keep state - -# pass in data mode connections for ftp-proxy running on this host. -# (see ftp-proxy(8) for details) -pass in on $ext_if proto tcp from any to 157.161.48.183 port >= 49152 \e - flags S/SA keep state + auth } # Do not allow Windows 9x SMTP connections since they are typically # a viral worm. Alternately we could limit these OSes to 1 connection each. block in on $ext_if proto tcp from any os {"Windows 95", "Windows 98"} \e to any port smtp +# IPv6 +# pass in/out all IPv6 traffic: note that we have to enable this in two +# different ways, on both our physical interface and our tunnel +pass quick on gif0 inet6 +pass quick on $ext_if proto ipv6 + # Using the pickup options to keep/modulate/synproxy state # # no-pickups (default) Do not allow connections to be picked up in the @@ -2618,32 +2931,33 @@ pass in on $ext_if proto tcp ... keep state (hash-only) # outgoing packets (i.e., packets from the wireless network) are only # permitted to access port 80. -pass in on $int_if from any to any tag INTNET keep state -pass in on $wifi_if from any to any keep state +pass in on $int_if from any to any tag INTNET +pass in on $wifi_if from any to any block out on $ext_if from any to any -pass out quick on $ext_if tagged INTNET keep state -pass out on $ext_if from any to any port 80 keep state +pass out quick on $ext_if tagged INTNET +pass out on $ext_if proto tcp from any to any port 80 # tag incoming packets as they are redirected to spamd(8). use the tag # to pass those packets through the packet filter. -rdr on $ext_if inet proto tcp from to port smtp \e - tag SPAMD -> 127.0.0.1 port spamd +rdr on $ext_if inet proto tcp from \*(Ltspammers\*(Gt to port smtp \e + tag SPAMD -\*(Gt 127.0.0.1 port spamd block in on $ext_if -pass in on $ext_if inet proto tcp tagged SPAMD keep state +pass in on $ext_if inet proto tcp tagged SPAMD .Ed .Sh GRAMMAR Syntax for .Nm in BNF: .Bd -literal -line = option | pf-rule | nat-rule | binat-rule | rdr-rule | - antispoof-rule | altq-rule | queue-rule | anchor-rule | - trans-anchors | load-anchors | table-rule +line = option | pf-rule | nat-rule | binat-rule | rdr-rule | + antispoof-rule | altq-rule | queue-rule | trans-anchors | + anchor-rule | anchor-close | load-anchor | table-rule option = "set" ( [ "timeout" ( timeout | "{" timeout-list "}" ) ] | + [ "ruleset-optimization" [ "none" | "basic" | "profile" ]] | [ "optimization" [ "default" | "normal" | "high-latency" | "satellite" | "aggressive" | "conservative" ] ] @@ -2651,17 +2965,20 @@ option = "set" ( [ "timeout" ( timeout | "{" timeout-list "}" ) ] | [ "loginterface" ( interface-name | "none" ) ] | [ "block-policy" ( "drop" | "return" ) ] | [ "keep-policy" keep ] | - [ "state-policy" ( "if-bound" | "group-bound" | - "floating" ) ] + [ "state-policy" ( "if-bound" | "floating" ) ] [ "require-order" ( "yes" | "no" ) ] [ "fingerprints" filename ] | + [ "skip on" ( interface-name | "{" interface-list "}" ) ] | [ "debug" ( "none" | "urgent" | "misc" | "loud" ) ] ) pf-rule = action [ "in" | "out" ] - [ "log" | "log-all" ] [ "quick" ] - [ "on" ifspec ] [ route ] [ af ] [ protospec ] + [ "log" [ "(" logopts ")"] ] [ "quick" ] + [ "on" ifspec ] [ "fastroute" | route ] [ af ] [ protospec ] hosts [ filteropt-list ] +logopts = logopt [ "," logopts ] +logopt = "all" | "user" | "to" interface-name + filteropt-list = filteropt-list filteropt | filteropt filteropt = user | group | flags | icmp-type | icmp6-type | tos | keep | "fragment" | "no-df" | "min-ttl" number | @@ -2675,27 +2992,30 @@ keep = "no" "state" | ( "keep" | "modulate" | "synproxy" ) "state" [ "(" state-opts ")" ] -nat-rule = [ "no" ] "nat" [ "pass" ] [ "on" ifspec ] [ af ] - [ protospec ] hosts [ "tag" string ] - [ "->" ( redirhost | "{" redirhost-list "}" ) +nat-rule = [ "no" ] "nat" [ "pass" [ "log" [ "(" logopts ")" ] ] ] + [ "on" ifspec ] [ af ] + [ protospec ] hosts [ "tag" string ] [ "tagged" string ] + [ "-\*(Gt" ( redirhost | "{" redirhost-list "}" ) [ portspec ] [ pooltype ] [ "static-port" ] ] -binat-rule = [ "no" ] "binat" [ "pass" ] [ "on" interface-name ] - [ af ] [ "proto" ( proto-name | proto-number ) ] +binat-rule = [ "no" ] "binat" [ "pass" [ "log" [ "(" logopts ")" ] ] ] + [ "on" interface-name ] [ af ] + [ "proto" ( proto-name | proto-number ) ] "from" address [ "/" mask-bits ] "to" ipspec - [ "tag" string ] - [ "->" address [ "/" mask-bits ] ] + [ "tag" string ] [ "tagged" string ] + [ "-\*(Gt" address [ "/" mask-bits ] ] -rdr-rule = [ "no" ] "rdr" [ "pass" ] [ "on" ifspec ] [ af ] - [ protospec ] hosts [ "tag" string ] - [ "->" ( redirhost | "{" redirhost-list "}" ) +rdr-rule = [ "no" ] "rdr" [ "pass" [ "log" [ "(" logopts ")" ] ] ] + [ "on" ifspec ] [ af ] + [ protospec ] hosts [ "tag" string ] [ "tagged" string ] + [ "-\*(Gt" ( redirhost | "{" redirhost-list "}" ) [ portspec ] [ pooltype ] ] antispoof-rule = "antispoof" [ "log" ] [ "quick" ] "for" ( interface-name | "{" interface-list "}" ) [ af ] [ "label" string ] -table-rule = "table" "<" string ">" [ tableopts-list ] +table-rule = "table" "\*(Lt" string "\*(Gt" [ tableopts-list ] tableopts-list = tableopts-list tableopts | tableopts tableopts = "persist" | "const" | "file" string | "{" [ tableaddr-list ] "}" @@ -2715,7 +3035,7 @@ anchor-rule = "anchor" string [ "in" | "out" ] [ "on" ifspec ] trans-anchors = ( "nat-anchor" | "rdr-anchor" | "binat-anchor" ) string [ "on" ifspec ] [ af ] [ "proto" ] [ protospec ] [ hosts ] -load-anchor = "load anchor" anchorname:rulesetname "from" filename +load-anchor = "load anchor" string "from" filename queueopts-list = queueopts-list queueopts | queueopts queueopts = "bandwidth" bandwidth-spec | @@ -2724,17 +3044,16 @@ queueopts = "bandwidth" bandwidth-spec | schedulers = cbq-def | hfsc-def | priq-def | fairq-def bandwidth-spec = "number" ( "b" | "Kb" | "Mb" | "Gb" | "%" ) -action = "pass" | "block" [ return ] | "scrub" +action = "pass" | "block" [ return ] | [ "no" ] "scrub" return = "drop" | "return" | "return-rst" [ "( ttl" number ")" ] | - "return-icmp" [ "(" icmpcode ["," icmp6code ] ")" ] | + "return-icmp" [ "(" icmpcode [ [ "," ] icmp6code ] ")" ] | "return-icmp6" [ "(" icmp6code ")" ] icmpcode = icmp-code-name | icmp-code-number icmp6code = icmp6-code-name | icmp6-code-number ifspec = ( [ "!" ] interface-name ) | "{" interface-list "}" interface-list = [ "!" ] interface-name [ [ "," ] interface-list ] -route = "fastroute" | - ( "route-to" | "reply-to" | "dup-to" ) +route = ( "route-to" | "reply-to" | "dup-to" ) ( routehost | "{" routehost-list "}" ) [ pooltype ] af = "inet" | "inet6" @@ -2744,13 +3063,13 @@ protospec = "proto" ( proto-name | proto-number | proto-list = ( proto-name | proto-number ) [ [ "," ] proto-list ] hosts = "all" | - "from" ( "any" | "no-route" | "self" | host | - "{" host-list "}" ) [ port ] [ os ] + "from" ( "any" | "no-route" | "urpf-failed" | "self" | host | + "{" host-list "}" | "route" string ) [ port ] [ os ] "to" ( "any" | "no-route" | "self" | host | - "{" host-list "}" ) [ port ] + "{" host-list "}" | "route" string ) [ port ] ipspec = "any" | host | "{" host-list "}" -host = [ "!" ] ( address [ "/" mask-bits ] | "<" string ">" ) +host = [ "!" ] ( address [ "/" mask-bits ] | "\*(Lt" string "\*(Gt" ) redirhost = address [ "/" mask-bits ] routehost = interface-name [ address [ "/" mask-bits ] ] address = interface-name | "(" interface-name ")" | hostname | @@ -2761,19 +3080,19 @@ routehost-list = routehost [ [ "," ] routehost-list ] port = "port" ( unary-op | binary-op | "{" op-list "}" ) portspec = "port" ( number | name ) [ ":" ( "*" | number | name ) ] -os = "os" ( os-name | "{" os-list "}" ) +os = "os" ( os-name | "{" os-list "}" ) user = "user" ( unary-op | binary-op | "{" op-list "}" ) group = "group" ( unary-op | binary-op | "{" op-list "}" ) -unary-op = [ "=" | "!=" | "<" | "<=" | ">" | ">=" ] +unary-op = [ "=" | "!=" | "\*(Lt" | "\*(Le" | "\*(Gt" | "\*(Ge" ] ( name | number ) -binary-op = number ( "<>" | "><" | ":" ) number +binary-op = number ( "\*(Lt\*(Gt" | "\*(Gt\*(Lt" | ":" ) number op-list = ( unary-op | binary-op ) [ [ "," ] op-list ] os-name = operating-system-name os-list = os-name [ [ "," ] os-list ] -flags = "flags" [ flag-set ] "/" flag-set +flags = "flags" ( [ flag-set ] "/" flag-set | "any" ) flag-set = [ "F" ] [ "S" ] [ "R" ] [ "P" ] [ "A" ] [ "U" ] [ "E" ] [ "W" ] @@ -2790,7 +3109,10 @@ state-opts = state-opt [ [ "," ] state-opts ] state-opt = "max" number | "no-sync" | timeout | "source-track" [ "rule" | "global" ] | "max-src-nodes" number | "max-src-states" number | - "if-bound" | "group-bound" | "floating" | + "max-src-conn" number | + "max-src-conn-rate" number "/" number | + "overload" "\*(Lt" string "\*(Gt" [ "flush" ] | + "if-bound" | "floating" | "pickups" | "no-pickups" | "hash-only" fragmentation = [ "fragment reassemble" | "fragment crop" | @@ -2838,13 +3160,30 @@ upperlimit-sc = "upperlimit" sc-spec sc-spec = bandwidth-spec | "(" bandwidth-spec number bandwidth-spec ")" .Ed +.Sh FILES +.Bl -tag -width "/etc/protocols" -compact +.It Pa /etc/hosts +Host name database. +.It Pa /etc/pf.conf +Default location of the ruleset file. +.It Pa /etc/pf.os +Default location of OS fingerprints. +.It Pa /etc/protocols +Protocol name database. +.It Pa /etc/services +Service name database. +.It Pa /usr/share/examples/pf +Example rulesets. +.El .Sh SEE ALSO +.Xr carp 4 , .Xr icmp 4 , .Xr icmp6 4 , .Xr ip 4 , .Xr ip6 4 , .Xr pf 4 , .Xr pfsync 4 , +.Xr route 4 , .Xr tcp 4 , .Xr udp 4 , .Xr hosts 5 , @@ -2853,7 +3192,8 @@ sc-spec = bandwidth-spec | .Xr services 5 , .Xr ftp-proxy 8 , .Xr pfctl 8 , -.Xr pflogd 8 +.Xr pflogd 8 , +.Xr route 8 .Sh HISTORY The .Nm diff --git a/usr.sbin/pfctl/pf.os.5 b/usr.sbin/pfctl/pf.os.5 index 8db55ee72a..371f44aa20 100644 --- a/usr.sbin/pfctl/pf.os.5 +++ b/usr.sbin/pfctl/pf.os.5 @@ -1,5 +1,4 @@ -.\" $OpenBSD: pf.os.5,v 1.4 2003/08/28 09:41:23 jmc Exp $ -.\" $DragonFly: src/usr.sbin/pfctl/pf.os.5,v 1.4 2007/05/12 21:22:11 swildner Exp $ +.\" $OpenBSD: pf.os.5,v 1.7 2005/11/16 20:07:18 stevesk Exp $ .\" .\" Copyright (c) 2003 Mike Frantzen .\" @@ -207,39 +206,17 @@ The output of .Bd -literal # tcpdump -s128 -c1 -nv 'tcp[13] == 2' - 03:13:48.118526 10.0.0.1.3377 > 10.0.0.0.2: S [tcp sum ok] \e + 03:13:48.118526 10.0.0.1.3377 > 10.0.0.2.80: S [tcp sum ok] \e 534596083:534596083(0) win 57344 (DF) [tos 0x10] \e - (ttl 64, id 11315) + (ttl 64, id 11315, len 44) .Ed .Pp almost translates into the following fingerprint .Bd -literal 57344:64:1:44:M1460: exampleOS:1.0::exampleOS 1.0 .Ed -.Pp -.Xr tcpdump 1 -does not explicitly give the packet length. -But it can usually be derived by adding the size of the IPv4 header to -the size of the TCP header to the size of the TCP options. -The size of both headers is typically twenty each and the usual -sizes of the TCP options are: -.Pp -.Bl -tag -width timestamp -offset indent -compact -.It mss -four bytes. -.It nop -1 byte. -.It sackOK -two bytes. -.It timestamp -ten bytes. -.It wscale -three bytes. -.El -.Pp -In the above example, the packet size comes out to 44 bytes. .Sh SEE ALSO -.Xr tcpdump 1 , -.Xr pf 4 , + .Xr pf 4 , .Xr pf.conf 5 , -.Xr pfctl 8 +.Xr pfctl 8 , +.Xr tcpdump 1 diff --git a/usr.sbin/pfctl/pf_print_state.c b/usr.sbin/pfctl/pf_print_state.c index 1144527e80..224e3e51ff 100644 --- a/usr.sbin/pfctl/pf_print_state.c +++ b/usr.sbin/pfctl/pf_print_state.c @@ -1,5 +1,4 @@ -/* $OpenBSD: pf_print_state.c,v 1.39 2004/02/10 17:48:08 henning Exp $ */ -/* $DragonFly: src/usr.sbin/pfctl/pf_print_state.c,v 1.3 2008/04/11 18:21:49 dillon Exp $ */ +/* $OpenBSD: pf_print_state.c,v 1.44 2007/03/01 17:20:53 deraadt Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -32,8 +31,8 @@ */ #include -#include #include +#include #include #define TCPSTATES #include @@ -98,6 +97,12 @@ print_addr(struct pf_addr_wrap *addr, sa_family_t af, int verbose) case PF_ADDR_NOROUTE: printf("no-route"); return; + case PF_ADDR_URPFFAILED: + printf("urpf-failed"); + return; + case PF_ADDR_RTLABEL: + printf("route \"%s\"", addr->v.rtlabelname); + return; default: printf("?"); return; @@ -116,19 +121,19 @@ print_addr(struct pf_addr_wrap *addr, sa_family_t af, int verbose) void print_name(struct pf_addr *addr, sa_family_t af) { - char hostname[NI_MAXHOST]; + char his_host[NI_MAXHOST]; - strlcpy(hostname, "?", sizeof(hostname)); + strlcpy(his_host, "?", sizeof(his_host)); switch (af) { case AF_INET: { - struct sockaddr_in loc_sin; + struct sockaddr_in sin; - memset(&loc_sin, 0, sizeof(loc_sin)); - loc_sin.sin_len = sizeof(loc_sin); - loc_sin.sin_family = AF_INET; - loc_sin.sin_addr = addr->v4; - getnameinfo((struct sockaddr *)&loc_sin, loc_sin.sin_len, - hostname, sizeof(hostname), NULL, 0, NI_NOFQDN); + memset(&sin, 0, sizeof(sin)); + sin.sin_len = sizeof(sin); + sin.sin_family = AF_INET; + sin.sin_addr = addr->v4; + getnameinfo((struct sockaddr *)&sin, sin.sin_len, + his_host, sizeof(his_host), NULL, 0, NI_NOFQDN); break; } case AF_INET6: { @@ -139,11 +144,11 @@ print_name(struct pf_addr *addr, sa_family_t af) sin6.sin6_family = AF_INET6; sin6.sin6_addr = addr->v6; getnameinfo((struct sockaddr *)&sin6, sin6.sin6_len, - hostname, sizeof(hostname), NULL, 0, NI_NOFQDN); + his_host, sizeof(his_host), NULL, 0, NI_NOFQDN); break; } } - printf("%s", hostname); + printf("%s", his_host); } void @@ -273,11 +278,11 @@ print_state(struct pf_state *s, int opts) min = s->expire % 60; s->expire /= 60; printf(", expires in %.2u:%.2u:%.2u", s->expire, min, sec); - printf(", %u:%u pkts, %u:%u bytes", + printf(", %llu:%llu pkts, %llu:%llu bytes", s->packets[0], s->packets[1], s->bytes[0], s->bytes[1]); - if (s->anchor.nr != (uint32_t)(-1)) + if (s->anchor.nr != (unsigned)-1) printf(", anchor %u", s->anchor.nr); - if (s->rule.nr != (uint32_t)(-1)) + if (s->rule.nr != (unsigned)-1) printf(", rule %u", s->rule.nr); if (s->src_node != NULL) printf(", source-track"); @@ -286,19 +291,9 @@ print_state(struct pf_state *s, int opts) printf("\n"); } if (opts & PF_OPT_VERBOSE2) { - printf(" id: %016llx creatorid: %08x", - (unsigned long long)be64toh(s->id), ntohl(s->creatorid)); - - printf(" synchronization: "); - if ((s->sync_flags & PFSTATE_GOT_SYN_MASK) == - PFSTATE_GOT_SYN_MASK) { - printf("good"); - } else if (s->sync_flags & PFSTATE_GOT_SYN_MASK) { - printf("incomplete"); - } else { - printf("indeterminate"); - } - printf("\n"); + printf(" id: %016llx creatorid: %08x%s\n", + (unsigned long long)be64toh(s->id), ntohl(s->creatorid), + ((s->sync_flags & PFSTATE_NOSYNC) ? " (no-sync)" : "")); } } diff --git a/usr.sbin/pfctl/pfctl.8 b/usr.sbin/pfctl/pfctl.8 index 7fc71136fe..b5be8a1f62 100644 --- a/usr.sbin/pfctl/pfctl.8 +++ b/usr.sbin/pfctl/pfctl.8 @@ -1,5 +1,4 @@ -.\" $OpenBSD: pfctl.8,v 1.110 2004/03/20 09:31:42 david Exp $ -.\" $DragonFly: src/usr.sbin/pfctl/pfctl.8,v 1.4 2007/07/29 17:27:45 swildner Exp $ +.\" $OpenBSD: pfctl.8,v 1.128 2007/01/30 21:01:56 jmc Exp $ .\" .\" Copyright (c) 2001 Kjell Wooding. All rights reserved. .\" @@ -32,19 +31,25 @@ .Nm pfctl .Nd "control the packet filter (PF) and network address translation (NAT) device" .Sh SYNOPSIS -.Nm +.Nm pfctl .Bk -words -.Op Fl AdeghNnOqRrvz -.Op Fl a Ar anchor Ns Op Ar :ruleset -.Op Fl D Ar macro=value +.Op Fl AdeghmNnOqRrvz +.Op Fl a Ar anchor +.Oo Fl D Ar macro Ns = +.Ar value Oc .Op Fl F Ar modifier .Op Fl f Ar file .Op Fl i Ar interface -.Op Fl k Ar host +.Op Fl K Ar host | network +.Op Fl k Ar host | network +.Op Fl o Op Ar level .Op Fl p Ar device .Op Fl s Ar modifier -.Op Fl T Ar command Op Ar address ... -.Op Fl t Ar table +.Oo +.Fl t Ar table +.Fl T Ar command +.Op Ar address ... +.Oc .Op Fl x Ar level .Ek .Sh DESCRIPTION @@ -74,9 +79,11 @@ Translation rules are described in .Xr pf.conf 5 . .Pp When the variable -.Va pf_enable -is set to YES in -.Xr rc.conf 5 , +.Va pf +is set to +.Dv YES +in +.Xr rc.conf.local 8 , the rule file specified with the variable .Va pf_rules is loaded automatically by the @@ -87,9 +94,9 @@ The packet filter does not itself forward packets between interfaces. Forwarding can be enabled by setting the .Xr sysctl 8 variables -.Va net.inet.ip.forwarding +.Em net.inet.ip.forwarding and/or -.Va net.inet6.ip6.forwarding , +.Em net.inet6.ip6.forwarding to 1. Set them permanently in .Xr sysctl.conf 5 . @@ -102,48 +109,82 @@ The options are as follows: .It Fl A Load only the queue rules present in the rule file. Other rules and options are ignored. -.It Fl a Ar anchor Ns Op Ar :ruleset +.It Fl a Ar anchor Apply flags .Fl f , -.Fl F +.Fl F , and .Fl s only to the rules in the specified -.Ar anchor -and optional named ruleset -.Ar ruleset . +.Ar anchor . In addition to the main ruleset, .Nm -can load and manipulate additional rulesets by name. -Named rulesets are attached at -.Ar anchor -points, which are also referenced by name. +can load and manipulate additional rulesets by name, +called anchors. +The main ruleset is the default anchor. +.Pp +Anchors are referenced by name and may be nested, +with the various components of the anchor path separated by +.Sq / +characters, similar to how file system hierarchies are laid out. +The last component of the anchor path is where ruleset operations are +performed. +.Pp Evaluation of .Ar anchor rules from the main ruleset is described in .Xr pf.conf 5 . -For example, to show all filter rules inside anchor -.Li foo : +.Pp +For example, the following will show all filter rules (see the +.Fl s +flag below) inside the anchor +.Dq authpf/smith(1234) , +which would have been created for user +.Dq smith +by +.Xr authpf 8 , +PID 1234: .Bd -literal -offset indent -# pfctl -a foo -s rules +# pfctl -a "authpf/smith(1234)" -s rules .Ed .Pp -Private tables can also be put inside subrulesets, either by having table +Private tables can also be put inside anchors, either by having table statements in the .Xr pf.conf 5 -file that is loaded in the anchor, or by using regular table commands as in: +file that is loaded in the anchor, or by using regular table commands, as in: .Bd -literal -offset indent -# pfctl -a foo:bar -t mytable -T add 1.2.3.4 5.6.7.8 +# pfctl -a foo/bar -t mytable -T add 1.2.3.4 5.6.7.8 .Ed .Pp When a rule referring to a table is loaded in an anchor, the rule will use the -private table if one is defined, and then fallback to the table defined in the +private table if one is defined, and then fall back to the table defined in the main ruleset, if there is one. -This is similar to C rules for variables. +This is similar to C rules for variable scope. It is possible to create distinct tables with the same name in the global ruleset and in an anchor, but this is often bad design and a warning will be issued in that case. -.It Fl D Ar macro=value +.Pp +By default, recursive inline printing of anchors applies only to unnamed +anchors specified inline in the ruleset. +If the anchor name is terminated with a +.Sq * +character, the +.Fl s +flag will recursively print all anchors in a brace delimited block. +For example the following will print the +.Dq authpf +ruleset recursively: +.Bd -literal -offset indent +# pfctl -a 'authpf/*' -sr +.Ed +.Pp +To print the main ruleset recursively, specify only +.Sq * +as the anchor name: +.Bd -literal -offset indent +# pfctl -a '*' -sr +.Ed +.It Fl D Ar macro Ns = Ns Ar value Define .Ar macro to be set to @@ -162,23 +203,23 @@ Flush the filter parameters specified by (may be abbreviated): .Pp .Bl -tag -width xxxxxxxxxxxx -compact -.It Fl F Ar nat +.It Fl F Cm nat Flush the NAT rules. -.It Fl F Ar queue +.It Fl F Cm queue Flush the queue rules. -.It Fl F Ar rules +.It Fl F Cm rules Flush the filter rules. -.It Fl F Ar state +.It Fl F Cm state Flush the state table (NAT and filter). -.It Fl F Ar Sources +.It Fl F Cm Sources Flush the source tracking table. -.It Fl F Ar info +.It Fl F Cm info Flush the filter information (statistics that are not bound to rules). -.It Fl F Ar Tables +.It Fl F Cm Tables Flush the tables. -.It Fl F Ar osfp +.It Fl F Cm osfp Flush the passive operating system fingerprints. -.It Fl F Ar all +.It Fl F Cm all Flush all of the above. .El .It Fl f Ar file @@ -197,28 +238,55 @@ Help. .It Fl i Ar interface Restrict the operation to the given .Ar interface . -.It Fl k Ar host +.It Fl K Ar host | network +Kill all of the source tracking entries originating from the specified +.Ar host +or +.Ar network . +A second +.Fl K Ar host +or +.Fl K Ar network +option may be specified, which will kill all the source tracking +entries from the first host/network to the second. +.It Fl k Ar host | network Kill all of the state entries originating from the specified -.Ar host . +.Ar host +or +.Ar network . A second .Fl k Ar host +or +.Fl k Ar network option may be specified, which will kill all the state entries -from the first -.Ar host -to the second -.Ar host . +from the first host/network to the second. For example, to kill all of the state entries originating from -.Li host : -.Bd -literal -offset indent -# pfctl -k host -.Ed +.Dq host : +.Pp +.Dl # pfctl -k host .Pp To kill all of the state entries from -.Li host1 +.Dq host1 to -.Li host2 : +.Dq host2 : +.Pp +.Dl # pfctl -k host1 -k host2 +.Pp +To kill all states originating from 192.168.1.0/24 to 172.16.0.0/16: +.Pp +.Dl # pfctl -k 192.168.1.0/24 -k 172.16.0.0/16 +.Pp +A network prefix length of 0 can be used as a wildcard. +To kill all states with the target +.Dq host2 : +.Pp +.Dl # pfctl -k 0.0.0.0/0 -k host2 +.It Fl m +Merge in explicitly given options without resetting those +which are omitted. +Allows single options to be modified without disturbing the others: .Bd -literal -offset indent -# pfctl -k host1 -k host2 +# echo "set loginterface fxp0" | pfctl -mf - .Ed .It Fl N Load only the NAT rules present in the rule file. @@ -228,6 +296,58 @@ Do not actually load rules, just parse them. .It Fl O Load only the options present in the rule file. Other rules and options are ignored. +.It Fl o Op Ar level +Control the ruleset optimizer. +The ruleset optimizer attempts to improve rulesets by removing rule +duplication and making better use of rule ordering. +.Pp +.Bl -tag -width xxxxxxxxxxxx -compact +.It Fl o Cm none +Disable the ruleset optimizer. +.It Fl o Cm basic +Enable basic ruleset optimizations. +.It Fl o Cm profile +Enable basic ruleset optimizations with profiling. +.El +.Pp +.Cm basic +optimization does does four things: +.Pp +.Bl -enum -compact +.It +remove duplicate rules +.It +remove rules that are a subset of another rule +.It +combine multiple rules into a table when advantageous +.It +re-order the rules to improve evaluation performance +.El +.Pp +If +.Cm profile +is specified, the currently loaded ruleset will be examined as a feedback +profile to tailor the optimization of the +.Ar quick +rules to the actual network behavior. +.Pp +It is important to note that the ruleset optimizer will modify the ruleset +to improve performance. +A side effect of the ruleset modification is that per-rule accounting +statistics will have different meanings than before. +If per-rule accounting is important for billing purposes or whatnot, either +the ruleset optimizer should not be used or a +.Ar label +field should be added to all of the accounting rules to act as optimization +barriers. +.Pp +To retain compatibility with previous behaviour, a single +.Fl o +without any options will enable +.Cm basic +optimizations, and a second +.Fl o +will enable profiling. .It Fl p Ar device Use the device file .Ar device @@ -246,9 +366,9 @@ Show the filter parameters specified by (may be abbreviated): .Pp .Bl -tag -width xxxxxxxxxxxxx -compact -.It Fl s Ar nat +.It Fl s Cm nat Show the currently loaded NAT rules. -.It Fl s Ar queue +.It Fl s Cm queue Show the currently loaded queue rules. When used together with .Fl v , @@ -258,51 +378,62 @@ When used together with .Nm will loop and show updated queue statistics every five seconds, including measured bandwidth and packets per second. -.It Fl s Ar rules +.It Fl s Cm rules Show the currently loaded filter rules. When used together with .Fl v , the per-rule statistics (number of evaluations, packets and bytes) are also shown. -Note that the 'skip step' optimization done automatically by the kernel +Note that the +.Dq skip step +optimization done automatically by the kernel will skip evaluation of rules where possible. Packets passed statefully are counted in the rule that created the state (even though the rule isn't evaluated more than once for the entire connection). -.It Fl s Ar Anchors -Show the currently loaded anchors. +.It Fl s Cm Anchors +Show the currently loaded anchors directly attached to the main ruleset. If .Fl a Ar anchor -is specified as well, the named rulesets currently loaded in the specified -anchor are shown instead. -.It Fl s Ar state +is specified as well, the anchors loaded directly below the given +.Ar anchor +are shown instead. +If +.Fl v +is specified, all anchors attached under the target anchor will be +displayed recursively. +.It Fl s Cm state Show the contents of the state table. -.It Fl s Ar Sources +.It Fl s Cm Sources Show the contents of the source tracking table. -.It Fl s Ar info +.It Fl s Cm info Show filter information (statistics and counters). When used together with .Fl v , source tracking statistics are also shown. -.It Fl s Ar labels -Show per-rule statistics (label, evaluations, packets, bytes) of +.It Fl s Cm labels +Show per-rule statistics (label, evaluations, packets total, bytes total, +packets in, bytes in, packets out, bytes out) of filter rules with labels, useful for accounting. -.It Fl s Ar timeouts +.It Fl s Cm timeouts Show the current global timeouts. -.It Fl s Ar memory +.It Fl s Cm memory Show the current pool memory hard limits. -.It Fl s Ar Tables +.It Fl s Cm Tables Show the list of tables. -.It Fl s Ar osfp +.It Fl s Cm osfp Show the list of operating system fingerprints. -.It Fl s Ar Interfaces +.It Fl s Cm Interfaces Show the list of interfaces and interface drivers available to PF. -When used together with a double +When used together with .Fl v , +it additionally lists which interfaces have skip rules activated. +When used together with +.Fl vv , interface statistics are also shown. .Fl i can be used to select an interface or a group of interfaces. -.It Fl s Ar all +.It Fl s Cm all Show all of the above, except for the lists of interfaces and operating system fingerprints. .El @@ -313,25 +444,32 @@ Specify the Commands include: .Pp .Bl -tag -width xxxxxxxxxxxx -compact -.It Fl T Ar kill +.It Fl T Cm kill Kill a table. -.It Fl T Ar flush +.It Fl T Cm flush Flush all addresses of a table. -.It Fl T Ar add +.It Fl T Cm add Add one or more addresses in a table. Automatically create a nonexisting table. -.It Fl T Ar delete +.It Fl T Cm delete Delete one or more addresses from a table. -.It Fl T Ar replace +.It Fl T Cm expire Ar number +Delete addresses which had their statistics cleared more than +.Ar number +seconds ago. +For entries which have never had their statistics cleared, +.Ar number +refers to the time they were added to the table. +.It Fl T Cm replace Replace the addresses of the table. Automatically create a nonexisting table. -.It Fl T Ar show +.It Fl T Cm show Show the content (addresses) of a table. -.It Fl T Ar test +.It Fl T Cm test Test if the given addresses match a table. -.It Fl T Ar zero +.It Fl T Cm zero Clear all the statistics of a table. -.It Fl T Ar load +.It Fl T Cm load Load only the table definitions from .Xr pf.conf 5 . This is used in conjunction with the @@ -343,16 +481,18 @@ flag, as in: .El .Pp For the -.Ar add , -.Ar delete , -.Ar replace +.Cm add , +.Cm delete , +.Cm replace , and -.Ar test +.Cm test commands, the list of addresses can be specified either directly on the command line and/or in an unformatted text file, using the .Fl f flag. -Comments starting with a "#" are allowed in the text file. +Comments starting with a +.Sq # +are allowed in the text file. With these commands, the .Fl v flag can also be used once or twice, in which case @@ -369,11 +509,17 @@ The address/network has been changed (negated). .It D The address/network has been deleted. .It M -The address matches (test operation only). +The address matches +.Po +.Cm test +operation only +.Pc . .It X The address/network is duplicated and therefore ignored. .It Y -The address/network cannot be added/deleted due to conflicting "!" attribute. +The address/network cannot be added/deleted due to conflicting +.Sq \&! +attributes. .It Z The address/network has been cleared (statistics). .El @@ -385,21 +531,21 @@ flag of For example, the following commands define a wide open firewall which will keep track of packets going to or coming from the .Ox -ftp server. -The following commands configure the firewall and send 10 pings to the ftp +FTP server. +The following commands configure the firewall and send 10 pings to the FTP server: .Bd -literal -offset indent # printf "table { ftp.openbsd.org }\en \e - pass out to keep state\en" | pfctl -f- + pass out to \en" | pfctl -f- # ping -qc10 ftp.openbsd.org .Ed .Pp We can now use the table -.Ar show +.Cm show command to output, for each address and packet direction, the number of packets and bytes that are being passed or blocked by rules referencing the table. The time at which the current accounting started is also shown with the -.Ar Cleared +.Dq Cleared line. .Bd -literal -offset indent # pfctl -t test -vTshow @@ -415,7 +561,8 @@ Similarly, it is possible to view global information about the tables by using the .Fl v modifier twice and the -.Ar show Tables +.Fl s +.Cm Tables command. This will display the number of addresses on each table, the number of rules which reference the table, and the global @@ -436,16 +583,18 @@ packet statistics for the whole table: .Ed .Pp As we can see here, only one packet \- the initial ping request \- matched the -table; but all packets passing as the result of the state are correctly +table, but all packets passing as the result of the state are correctly accounted for. Reloading the table(s) or ruleset will not affect packet accounting in any way. The two -.Ar XPass +.Dq XPass counters are incremented instead of the -.Ar Pass -counters when a "stateful" packet is passed but doesn't match the table -anymore. -This will happen in our example if someone flushes the table while the ping +.Dq Pass +counters when a +.Dq stateful +packet is passed but doesn't match the table anymore. +This will happen in our example if someone flushes the table while the +.Xr ping 8 command is running. .Pp When used with a single @@ -459,11 +608,11 @@ The flags are defined as follows: For constant tables, which cannot be altered outside .Xr pf.conf 5 . .It p -For persistent tables, which don't get automatically flushed when no rules +For persistent tables, which don't get automatically killed when no rules refer to them. .It a For tables which are part of the -.Ar active +.Em active tableset. Tables without this flag do not really exist, cannot contain addresses, and are only listed if the @@ -471,7 +620,7 @@ only listed if the flag is given. .It i For tables which are part of the -.Ar inactive +.Em inactive tableset. This flag can only be witnessed briefly during the loading of .Xr pf.conf 5 . @@ -479,7 +628,7 @@ This flag can only be witnessed briefly during the loading of For tables which are referenced (used) by rules. .It h This flag is set when a table in the main ruleset is hidden by one or more -tables of the same name in sub-rulesets (anchors). +tables of the same name from anchors attached below it. .El .It Fl t Ar table Specify the name of the table. @@ -488,20 +637,20 @@ Produce more verbose output. A second use of .Fl v will produce even more verbose output including ruleset warnings. -See previous section for its effect on table commands. +See the previous section for its effect on table commands. .It Fl x Ar level Set the debug .Ar level (may be abbreviated) to one of the following: .Pp .Bl -tag -width xxxxxxxxxxxx -compact -.It Fl x Ar none +.It Fl x Cm none Don't generate debug messages. -.It Fl x Ar urgent +.It Fl x Cm urgent Generate debug messages only for serious errors. -.It Fl x Ar misc +.It Fl x Cm misc Generate debug messages for various errors. -.It Fl x Ar loud +.It Fl x Cm loud Generate debug messages for common conditions. .El .It Fl z @@ -511,15 +660,18 @@ Clear per-rule statistics. .Bl -tag -width "/etc/pf.conf" -compact .It Pa /etc/pf.conf Packet filter rules file. +.It Pa /etc/pf.os +Passive operating system fingerprint database. .El .Sh SEE ALSO .Xr pf 4 , .Xr pf.conf 5 , .Xr pf.os 5 , -.Xr rc.conf 5 , .Xr sysctl.conf 5 , +.Xr authpf 8 , .Xr ftp-proxy 8 , .Xr rc 8 , +.Xr rc.conf 8 , .Xr sysctl 8 .Sh HISTORY The diff --git a/usr.sbin/pfctl/pfctl.c b/usr.sbin/pfctl/pfctl.c index bf57626c97..1ec95c146c 100644 --- a/usr.sbin/pfctl/pfctl.c +++ b/usr.sbin/pfctl/pfctl.c @@ -1,5 +1,4 @@ -/* $OpenBSD: pfctl.c,v 1.213 2004/03/20 09:31:42 david Exp $ */ -/* $DragonFly: src/usr.sbin/pfctl/pfctl.c,v 1.2 2005/02/11 22:31:45 joerg Exp $ */ +/* $OpenBSD: pfctl.c,v 1.262 2007/03/01 17:20:53 deraadt Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -35,12 +34,14 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include @@ -59,37 +60,56 @@ void usage(void); int pfctl_enable(int, int); int pfctl_disable(int, int); int pfctl_clear_stats(int, int); -int pfctl_clear_rules(int, int, char *, char *); -int pfctl_clear_nat(int, int, char *, char *); +int pfctl_clear_interface_flags(int, int); +int pfctl_clear_rules(int, int, char *); +int pfctl_clear_nat(int, int, char *); int pfctl_clear_altq(int, int); int pfctl_clear_src_nodes(int, int); int pfctl_clear_states(int, const char *, int); +void pfctl_addrprefix(char *, struct pf_addr *); +int pfctl_kill_src_nodes(int, const char *, int); int pfctl_kill_states(int, const char *, int); +void pfctl_init_options(struct pfctl *); +int pfctl_load_options(struct pfctl *); +int pfctl_load_limit(struct pfctl *, unsigned int, unsigned int); +int pfctl_load_timeout(struct pfctl *, unsigned int, unsigned int); +int pfctl_load_debug(struct pfctl *, unsigned int); +int pfctl_load_logif(struct pfctl *, char *); +int pfctl_load_hostid(struct pfctl *, unsigned int); int pfctl_get_pool(int, struct pf_pool *, u_int32_t, u_int32_t, int, - char *, char *); + char *); void pfctl_print_rule_counters(struct pf_rule *, int); -int pfctl_show_rules(int, int, int, char *, char *); -int pfctl_show_nat(int, int, char *, char *); +int pfctl_show_rules(int, char *, int, enum pfctl_show, char *, int); +int pfctl_show_nat(int, int, char *); int pfctl_show_src_nodes(int, int); int pfctl_show_states(int, const char *, int); int pfctl_show_status(int, int); int pfctl_show_timeouts(int, int); int pfctl_show_limits(int, int); -int pfctl_debug(int, u_int32_t, int); -int pfctl_clear_rule_counters(int, int); +void pfctl_debug(int, u_int32_t, int); int pfctl_test_altqsupport(int, int); int pfctl_show_anchors(int, int, char *); +int pfctl_ruleset_trans(struct pfctl *, char *, struct pf_anchor *); +int pfctl_load_ruleset(struct pfctl *, char *, + struct pf_ruleset *, int, int); +int pfctl_load_rule(struct pfctl *, char *, struct pf_rule *, int); const char *pfctl_lookup_option(char *, const char **); +struct pf_anchor_global pf_anchors; +struct pf_anchor pf_main_anchor; + const char *clearopt; char *rulesopt; const char *showopt; const char *debugopt; char *anchoropt; -const char *pf_device = "/dev/pf"; +const char *optiopt = NULL; +const char *pf_device = "/dev/pf"; char *ifaceopt; char *tableopt; const char *tblcmdopt; +int src_node_killers; +char *src_node_kill[2]; int state_killers; char *state_kill[2]; int loadopt; @@ -101,14 +121,25 @@ int labels = 0; const char *infile; +#define INDENT(d, o) do { \ + if (o) { \ + int i; \ + for (i=0; i < d; i++) \ + printf(" "); \ + } \ + } while (0); \ + + static const struct { const char *name; int index; } pf_limits[] = { - { "states", PF_LIMIT_STATES }, - { "src-nodes", PF_LIMIT_SRC_NODES }, - { "frags", PF_LIMIT_FRAGS }, - { NULL, 0 } + { "states", PF_LIMIT_STATES }, + { "src-nodes", PF_LIMIT_SRC_NODES }, + { "frags", PF_LIMIT_FRAGS }, + { "tables", PF_LIMIT_TABLES }, + { "table-entries", PF_LIMIT_TABLE_ENTRIES }, + { NULL, 0 } }; struct pf_hint { @@ -122,6 +153,7 @@ static const struct pf_hint pf_hint_normal[] = { { "tcp.closing", 15 * 60 }, { "tcp.finwait", 45 }, { "tcp.closed", 90 }, + { "tcp.tsdiff", 30 }, { NULL, 0 } }; static const struct pf_hint pf_hint_satellite[] = { @@ -131,6 +163,7 @@ static const struct pf_hint pf_hint_satellite[] = { { "tcp.closing", 15 * 60 + 5 }, { "tcp.finwait", 45 + 5 }, { "tcp.closed", 90 + 5 }, + { "tcp.tsdiff", 60 }, { NULL, 0 } }; static const struct pf_hint pf_hint_conservative[] = { @@ -140,6 +173,7 @@ static const struct pf_hint pf_hint_conservative[] = { { "tcp.closing", 60 * 60 }, { "tcp.finwait", 10 * 60 }, { "tcp.closed", 3 * 60 }, + { "tcp.tsdiff", 60 }, { NULL, 0 } }; static const struct pf_hint pf_hint_aggressive[] = { @@ -149,6 +183,7 @@ static const struct pf_hint pf_hint_aggressive[] = { { "tcp.closing", 60 }, { "tcp.finwait", 30 }, { "tcp.closed", 30 }, + { "tcp.tsdiff", 10 }, { NULL, 0 } }; @@ -177,25 +212,26 @@ static const char *showopt_list[] = { static const char *tblcmdopt_list[] = { "kill", "flush", "add", "delete", "load", "replace", "show", - "test", "zero", NULL + "test", "zero", "expire", NULL }; static const char *debugopt_list[] = { "none", "urgent", "misc", "loud", NULL }; +static const char *optiopt_list[] = { + "o", "none", "basic", "profile", NULL +}; void usage(void) { - fprintf(stderr, "usage: %s [-AdeghNnOqRrvz] ", getprogname()); - fprintf(stderr, "[-a anchor[:ruleset]] [-D macro=value]\n"); - fprintf(stderr, " "); - fprintf(stderr, "[-F modifier] [-f file] [-i interface] "); - fprintf(stderr, "[-k host] [-p device]\n"); - fprintf(stderr, " "); - fprintf(stderr, "[-s modifier] [-T command [address ...]] "); - fprintf(stderr, "[-t table] [-x level]\n"); + fprintf(stderr, "usage: %s [-AdeghmNnOqRrvz] ", getprogname()); + fprintf(stderr, "[-a anchor] [-D macro=value] [-F modifier]\n"); + fprintf(stderr, "\t[-f file] [-i interface] [-K host | network] "); + fprintf(stderr, "[-k host | network ]\n"); + fprintf(stderr, "\t[-o [level]] [-p device] [-s modifier ]\n"); + fprintf(stderr, "\t[-t table -T command [address ...]] [-x level]\n"); exit(1); } @@ -248,42 +284,31 @@ pfctl_clear_stats(int dev, int opts) } int -pfctl_clear_rules(int dev, int opts, char *anchorname, char *rulesetname) +pfctl_clear_interface_flags(int dev, int opts) { - struct pfr_buffer t; + struct pfioc_iface pi; - if (*anchorname && !*rulesetname) { - struct pfioc_ruleset pr; - int mnr, nr, r; + if ((opts & PF_OPT_NOACTION) == 0) { + bzero(&pi, sizeof(pi)); + pi.pfiio_flags = PFI_IFLAG_SKIP; - memset(&pr, 0, sizeof(pr)); - memcpy(pr.anchor, anchorname, sizeof(pr.anchor)); - if (ioctl(dev, DIOCGETRULESETS, &pr)) { - if (errno == EINVAL) - fprintf(stderr, "No rulesets in anchor '%s'.\n", - anchorname); - else - err(1, "DIOCGETRULESETS"); - return (-1); - } - mnr = pr.nr; - for (nr = mnr - 1; nr >= 0; --nr) { - pr.nr = nr; - if (ioctl(dev, DIOCGETRULESET, &pr)) - err(1, "DIOCGETRULESET"); - r = pfctl_clear_rules(dev, opts | PF_OPT_QUIET, - anchorname, pr.name); - if (r) - return (r); - } + if (ioctl(dev, DIOCCLRIFFLAG, &pi)) + err(1, "DIOCCLRIFFLAG"); if ((opts & PF_OPT_QUIET) == 0) - fprintf(stderr, "rules cleared\n"); - return (0); + fprintf(stderr, "pf: interface flags reset\n"); } + return (0); +} + +int +pfctl_clear_rules(int dev, int opts, char *anchorname) +{ + struct pfr_buffer t; + memset(&t, 0, sizeof(t)); t.pfrb_type = PFRB_TRANS; - if (pfctl_add_trans(&t, PF_RULESET_SCRUB, anchorname, rulesetname) || - pfctl_add_trans(&t, PF_RULESET_FILTER, anchorname, rulesetname) || + if (pfctl_add_trans(&t, PF_RULESET_SCRUB, anchorname) || + pfctl_add_trans(&t, PF_RULESET_FILTER, anchorname) || pfctl_trans(dev, &t, DIOCXBEGIN, 0) || pfctl_trans(dev, &t, DIOCXCOMMIT, 0)) err(1, "pfctl_clear_rules"); @@ -293,43 +318,15 @@ pfctl_clear_rules(int dev, int opts, char *anchorname, char *rulesetname) } int -pfctl_clear_nat(int dev, int opts, char *anchorname, char *rulesetname) +pfctl_clear_nat(int dev, int opts, char *anchorname) { struct pfr_buffer t; - if (*anchorname && !*rulesetname) { - struct pfioc_ruleset pr; - int mnr, nr, r; - - memset(&pr, 0, sizeof(pr)); - memcpy(pr.anchor, anchorname, sizeof(pr.anchor)); - if (ioctl(dev, DIOCGETRULESETS, &pr)) { - if (errno == EINVAL) - fprintf(stderr, "No rulesets in anchor '%s'.\n", - anchorname); - else - err(1, "DIOCGETRULESETS"); - return (-1); - } - mnr = pr.nr; - for (nr = mnr - 1; nr >= 0; --nr) { - pr.nr = nr; - if (ioctl(dev, DIOCGETRULESET, &pr)) - err(1, "DIOCGETRULESET"); - r = pfctl_clear_nat(dev, opts | PF_OPT_QUIET, - anchorname, pr.name); - if (r) - return (r); - } - if ((opts & PF_OPT_QUIET) == 0) - fprintf(stderr, "nat cleared\n"); - return (0); - } memset(&t, 0, sizeof(t)); t.pfrb_type = PFRB_TRANS; - if (pfctl_add_trans(&t, PF_RULESET_NAT, anchorname, rulesetname) || - pfctl_add_trans(&t, PF_RULESET_BINAT, anchorname, rulesetname) || - pfctl_add_trans(&t, PF_RULESET_RDR, anchorname, rulesetname) || + if (pfctl_add_trans(&t, PF_RULESET_NAT, anchorname) || + pfctl_add_trans(&t, PF_RULESET_BINAT, anchorname) || + pfctl_add_trans(&t, PF_RULESET_RDR, anchorname) || pfctl_trans(dev, &t, DIOCXBEGIN, 0) || pfctl_trans(dev, &t, DIOCXCOMMIT, 0)) err(1, "pfctl_clear_nat"); @@ -347,7 +344,7 @@ pfctl_clear_altq(int dev, int opts) return (-1); memset(&t, 0, sizeof(t)); t.pfrb_type = PFRB_TRANS; - if (pfctl_add_trans(&t, PF_RULESET_ALTQ, "", "") || + if (pfctl_add_trans(&t, PF_RULESET_ALTQ, "") || pfctl_trans(dev, &t, DIOCXBEGIN, 0) || pfctl_trans(dev, &t, DIOCXCOMMIT, 0)) err(1, "pfctl_clear_altq"); @@ -383,6 +380,163 @@ pfctl_clear_states(int dev, const char *iface, int opts) return (0); } +void +pfctl_addrprefix(char *addr, struct pf_addr *mask) +{ + char *p; + const char *errstr; + int prefix, ret_ga, q, r; + struct addrinfo hints, *res; + + if ((p = strchr(addr, '/')) == NULL) + return; + + *p++ = '\0'; + prefix = strtonum(p, 0, 128, &errstr); + if (errstr) + errx(1, "prefix is %s: %s", errstr, p); + + bzero(&hints, sizeof(hints)); + /* prefix only with numeric addresses */ + hints.ai_flags |= AI_NUMERICHOST; + + if ((ret_ga = getaddrinfo(addr, NULL, &hints, &res))) { + errx(1, "getaddrinfo: %s", gai_strerror(ret_ga)); + /* NOTREACHED */ + } + + if (res->ai_family == AF_INET && prefix > 32) + errx(1, "prefix too long for AF_INET"); + else if (res->ai_family == AF_INET6 && prefix > 128) + errx(1, "prefix too long for AF_INET6"); + + q = prefix >> 3; + r = prefix & 7; + switch (res->ai_family) { + case AF_INET: + bzero(&mask->v4, sizeof(mask->v4)); + mask->v4.s_addr = htonl((u_int32_t) + (0xffffffffffULL << (32 - prefix))); + break; + case AF_INET6: + bzero(&mask->v6, sizeof(mask->v6)); + if (q > 0) + memset((void *)&mask->v6, 0xff, q); + if (r > 0) + *((u_char *)&mask->v6 + q) = + (0xff00 >> r) & 0xff; + break; + } + freeaddrinfo(res); +} + +int +pfctl_kill_src_nodes(int dev, const char *iface __unused, int opts) +{ + struct pfioc_src_node_kill psnk; + struct addrinfo *res[2], *resp[2]; + struct sockaddr last_src, last_dst; + int killed, sources, dests; + int ret_ga; + + killed = sources = dests = 0; + + memset(&psnk, 0, sizeof(psnk)); + memset(&psnk.psnk_src.addr.v.a.mask, 0xff, + sizeof(psnk.psnk_src.addr.v.a.mask)); + memset(&last_src, 0xff, sizeof(last_src)); + memset(&last_dst, 0xff, sizeof(last_dst)); + + pfctl_addrprefix(src_node_kill[0], &psnk.psnk_src.addr.v.a.mask); + + if ((ret_ga = getaddrinfo(src_node_kill[0], NULL, NULL, &res[0]))) { + errx(1, "getaddrinfo: %s", gai_strerror(ret_ga)); + /* NOTREACHED */ + } + for (resp[0] = res[0]; resp[0]; resp[0] = resp[0]->ai_next) { + if (resp[0]->ai_addr == NULL) + continue; + /* We get lots of duplicates. Catch the easy ones */ + if (memcmp(&last_src, resp[0]->ai_addr, sizeof(last_src)) == 0) + continue; + last_src = *(struct sockaddr *)resp[0]->ai_addr; + + psnk.psnk_af = resp[0]->ai_family; + sources++; + + if (psnk.psnk_af == AF_INET) + psnk.psnk_src.addr.v.a.addr.v4 = + ((struct sockaddr_in *)resp[0]->ai_addr)->sin_addr; + else if (psnk.psnk_af == AF_INET6) + psnk.psnk_src.addr.v.a.addr.v6 = + ((struct sockaddr_in6 *)resp[0]->ai_addr)-> + sin6_addr; + else + errx(1, "Unknown address family %d", psnk.psnk_af); + + if (src_node_killers > 1) { + dests = 0; + memset(&psnk.psnk_dst.addr.v.a.mask, 0xff, + sizeof(psnk.psnk_dst.addr.v.a.mask)); + memset(&last_dst, 0xff, sizeof(last_dst)); + pfctl_addrprefix(src_node_kill[1], + &psnk.psnk_dst.addr.v.a.mask); + if ((ret_ga = getaddrinfo(src_node_kill[1], NULL, NULL, + &res[1]))) { + errx(1, "getaddrinfo: %s", + gai_strerror(ret_ga)); + /* NOTREACHED */ + } + for (resp[1] = res[1]; resp[1]; + resp[1] = resp[1]->ai_next) { + if (resp[1]->ai_addr == NULL) + continue; + if (psnk.psnk_af != resp[1]->ai_family) + continue; + + if (memcmp(&last_dst, resp[1]->ai_addr, + sizeof(last_dst)) == 0) + continue; + last_dst = *(struct sockaddr *)resp[1]->ai_addr; + + dests++; + + if (psnk.psnk_af == AF_INET) + psnk.psnk_dst.addr.v.a.addr.v4 = + ((struct sockaddr_in *)resp[1]-> + ai_addr)->sin_addr; + else if (psnk.psnk_af == AF_INET6) + psnk.psnk_dst.addr.v.a.addr.v6 = + ((struct sockaddr_in6 *)resp[1]-> + ai_addr)->sin6_addr; + else + errx(1, "Unknown address family %d", + psnk.psnk_af); + + if (ioctl(dev, DIOCKILLSRCNODES, &psnk)) + err(1, "DIOCKILLSRCNODES"); + killed += psnk.psnk_af; + /* fixup psnk.psnk_af */ + psnk.psnk_af = resp[1]->ai_family; + } + freeaddrinfo(res[1]); + } else { + if (ioctl(dev, DIOCKILLSRCNODES, &psnk)) + err(1, "DIOCKILLSRCNODES"); + killed += psnk.psnk_af; + /* fixup psnk.psnk_af */ + psnk.psnk_af = res[0]->ai_family; + } + } + + freeaddrinfo(res[0]); + + if ((opts & PF_OPT_QUIET) == 0) + fprintf(stderr, "killed %d src nodes from %d sources and %d " + "destinations\n", killed, sources, dests); + return (0); +} + int pfctl_kill_states(int dev, const char *iface, int opts) { @@ -403,6 +557,8 @@ pfctl_kill_states(int dev, const char *iface, int opts) sizeof(psk.psk_ifname)) >= sizeof(psk.psk_ifname)) errx(1, "invalid interface: %s", iface); + pfctl_addrprefix(state_kill[0], &psk.psk_src.addr.v.a.mask); + if ((ret_ga = getaddrinfo(state_kill[0], NULL, NULL, &res[0]))) { errx(1, "getaddrinfo: %s", gai_strerror(ret_ga)); /* NOTREACHED */ @@ -433,6 +589,8 @@ pfctl_kill_states(int dev, const char *iface, int opts) memset(&psk.psk_dst.addr.v.a.mask, 0xff, sizeof(psk.psk_dst.addr.v.a.mask)); memset(&last_dst, 0xff, sizeof(last_dst)); + pfctl_addrprefix(state_kill[1], + &psk.psk_dst.addr.v.a.mask); if ((ret_ga = getaddrinfo(state_kill[1], NULL, NULL, &res[1]))) { errx(1, "getaddrinfo: %s", @@ -491,7 +649,7 @@ pfctl_kill_states(int dev, const char *iface, int opts) int pfctl_get_pool(int dev, struct pf_pool *pool, u_int32_t nr, - u_int32_t ticket, int r_action, char *anchorname, char *rulesetname) + u_int32_t ticket, int r_action, char *anchorname) { struct pfioc_pooladdr pp; struct pf_pooladdr *pa; @@ -499,7 +657,6 @@ pfctl_get_pool(int dev, struct pf_pool *pool, u_int32_t nr, memset(&pp, 0, sizeof(pp)); memcpy(pp.anchor, anchorname, sizeof(pp.anchor)); - memcpy(pp.ruleset, rulesetname, sizeof(pp.ruleset)); pp.r_action = r_action; pp.r_num = nr; pp.ticket = ticket; @@ -525,6 +682,17 @@ pfctl_get_pool(int dev, struct pf_pool *pool, u_int32_t nr, return (0); } +void +pfctl_move_pool(struct pf_pool *src, struct pf_pool *dst) +{ + struct pf_pooladdr *pa; + + while ((pa = TAILQ_FIRST(&src->list)) != NULL) { + TAILQ_REMOVE(&src->list, pa, entries); + TAILQ_INSERT_TAIL(&dst->list, pa, entries); + } +} + void pfctl_clear_pool(struct pf_pool *pool) { @@ -559,12 +727,18 @@ pfctl_print_rule_counters(struct pf_rule *rule, int opts) printf(" [ queue: qname=%s qid=%u pqname=%s pqid=%u ]\n", rule->qname, rule->qid, rule->pqname, rule->pqid); } - if (opts & PF_OPT_VERBOSE) + if (opts & PF_OPT_VERBOSE) { printf(" [ Evaluations: %-8llu Packets: %-8llu " "Bytes: %-10llu States: %-6u]\n", (unsigned long long)rule->evaluations, - (unsigned long long)rule->packets, - (unsigned long long)rule->bytes, rule->states); + (unsigned long long)(rule->packets[0] + + rule->packets[1]), + (unsigned long long)(rule->bytes[0] + + rule->bytes[1]), rule->states); + if (!(opts & PF_OPT_DEBUG)) + printf(" [ Inserted: uid %u pid %u ]\n", + (unsigned)rule->cuid, (unsigned)rule->cpid); + } } void @@ -577,169 +751,172 @@ pfctl_print_title(const char *title) } int -pfctl_show_rules(int dev, int opts, int format, char *anchorname, - char *rulesetname) +pfctl_show_rules(int dev, char *path, int opts, enum pfctl_show format, + char *anchorname, int depth) { struct pfioc_rule pr; u_int32_t nr, mnr, header = 0; int rule_numbers = opts & (PF_OPT_VERBOSE2 | PF_OPT_DEBUG); + int len = strlen(path); + int brace; + char *p; - if (*anchorname && !*rulesetname) { - struct pfioc_ruleset pr2; - int r; - - memset(&pr2, 0, sizeof(pr2)); - memcpy(pr2.anchor, anchorname, sizeof(pr2.anchor)); - if (ioctl(dev, DIOCGETRULESETS, &pr2)) { - if (errno == EINVAL) - fprintf(stderr, "No rulesets in anchor '%s'.\n", - anchorname); - else - err(1, "DIOCGETRULESETS"); - return (-1); - } - if (opts & PF_OPT_SHOWALL && pr2.nr) - pfctl_print_title("FILTER RULES:"); - mnr = pr2.nr; - for (nr = 0; nr < mnr; ++nr) { - pr2.nr = nr; - if (ioctl(dev, DIOCGETRULESET, &pr2)) - err(1, "DIOCGETRULESET"); - r = pfctl_show_rules(dev, opts, format, anchorname, - pr2.name); - if (r) - return (r); - } - return (0); - } + if (path[0]) + snprintf(&path[len], MAXPATHLEN - len, "/%s", anchorname); + else + snprintf(&path[len], MAXPATHLEN - len, "%s", anchorname); memset(&pr, 0, sizeof(pr)); - memcpy(pr.anchor, anchorname, sizeof(pr.anchor)); - memcpy(pr.ruleset, rulesetname, sizeof(pr.ruleset)); + memcpy(pr.anchor, path, sizeof(pr.anchor)); if (opts & PF_OPT_SHOWALL) { pr.rule.action = PF_PASS; if (ioctl(dev, DIOCGETRULES, &pr)) { warn("DIOCGETRULES"); - return (-1); + goto error; } header++; } pr.rule.action = PF_SCRUB; if (ioctl(dev, DIOCGETRULES, &pr)) { warn("DIOCGETRULES"); - return (-1); + goto error; } if (opts & PF_OPT_SHOWALL) { - if (format == 0 && (pr.nr > 0 || header)) + if (format == PFCTL_SHOW_RULES && (pr.nr > 0 || header)) pfctl_print_title("FILTER RULES:"); - else if (format == 1 && labels) + else if (format == PFCTL_SHOW_LABELS && labels) pfctl_print_title("LABEL COUNTERS:"); } mnr = pr.nr; + if (opts & PF_OPT_CLRRULECTRS) + pr.action = PF_GET_CLR_CNTR; + for (nr = 0; nr < mnr; ++nr) { pr.nr = nr; if (ioctl(dev, DIOCGETRULE, &pr)) { warn("DIOCGETRULE"); - return (-1); + goto error; } if (pfctl_get_pool(dev, &pr.rule.rpool, - nr, pr.ticket, PF_SCRUB, anchorname, rulesetname) != 0) - return (-1); + nr, pr.ticket, PF_SCRUB, path) != 0) + goto error; switch (format) { - case 1: + case PFCTL_SHOW_LABELS: if (pr.rule.label[0]) { printf("%s ", pr.rule.label); - printf("%llu %llu %llu\n", + printf("%llu %llu %llu %llu %llu %llu %llu\n", (unsigned long long)pr.rule.evaluations, - (unsigned long long)pr.rule.packets, - (unsigned long long)pr.rule.bytes); + (unsigned long long)(pr.rule.packets[0] + + pr.rule.packets[1]), + (unsigned long long)(pr.rule.bytes[0] + + pr.rule.bytes[1]), + (unsigned long long)pr.rule.packets[0], + (unsigned long long)pr.rule.bytes[0], + (unsigned long long)pr.rule.packets[1], + (unsigned long long)pr.rule.bytes[1]); } break; - default: + case PFCTL_SHOW_RULES: if (pr.rule.label[0] && (opts & PF_OPT_SHOWALL)) labels = 1; - print_rule(&pr.rule, rule_numbers); + print_rule(&pr.rule, pr.anchor_call, rule_numbers); + printf("\n"); pfctl_print_rule_counters(&pr.rule, opts); + break; + case PFCTL_SHOW_NOTHING: + break; } pfctl_clear_pool(&pr.rule.rpool); } pr.rule.action = PF_PASS; if (ioctl(dev, DIOCGETRULES, &pr)) { warn("DIOCGETRULES"); - return (-1); + goto error; } mnr = pr.nr; for (nr = 0; nr < mnr; ++nr) { pr.nr = nr; if (ioctl(dev, DIOCGETRULE, &pr)) { warn("DIOCGETRULE"); - return (-1); + goto error; } if (pfctl_get_pool(dev, &pr.rule.rpool, - nr, pr.ticket, PF_PASS, anchorname, rulesetname) != 0) - return (-1); + nr, pr.ticket, PF_PASS, path) != 0) + goto error; switch (format) { - case 1: + case PFCTL_SHOW_LABELS: if (pr.rule.label[0]) { printf("%s ", pr.rule.label); - printf("%llu %llu %llu\n", + printf("%llu %llu %llu %llu %llu %llu %llu\n", (unsigned long long)pr.rule.evaluations, - (unsigned long long)pr.rule.packets, - (unsigned long long)pr.rule.bytes); + (unsigned long long)(pr.rule.packets[0] + + pr.rule.packets[1]), + (unsigned long long)(pr.rule.bytes[0] + + pr.rule.bytes[1]), + (unsigned long long)pr.rule.packets[0], + (unsigned long long)pr.rule.bytes[0], + (unsigned long long)pr.rule.packets[1], + (unsigned long long)pr.rule.bytes[1]); } break; - default: + case PFCTL_SHOW_RULES: + brace = 0; if (pr.rule.label[0] && (opts & PF_OPT_SHOWALL)) labels = 1; - print_rule(&pr.rule, rule_numbers); + INDENT(depth, !(opts & PF_OPT_VERBOSE)); + if (pr.anchor_call[0] && + ((((p = strrchr(pr.anchor_call, '_')) != NULL) && + ((void *)p == (void *)pr.anchor_call || + *(--p) == '/')) || (opts & PF_OPT_RECURSE))) { + brace++; + if ((p = strrchr(pr.anchor_call, '/')) != + NULL) + p++; + else + p = &pr.anchor_call[0]; + } else + p = &pr.anchor_call[0]; + + print_rule(&pr.rule, p, rule_numbers); + if (brace) + printf(" {\n"); + else + printf("\n"); pfctl_print_rule_counters(&pr.rule, opts); + if (brace) { + pfctl_show_rules(dev, path, opts, format, + p, depth + 1); + INDENT(depth, !(opts & PF_OPT_VERBOSE)); + printf("}\n"); + } + break; + case PFCTL_SHOW_NOTHING: + break; } pfctl_clear_pool(&pr.rule.rpool); } + path[len] = '\0'; return (0); + + error: + path[len] = '\0'; + return (-1); } int -pfctl_show_nat(int dev, int opts, char *anchorname, char *rulesetname) +pfctl_show_nat(int dev, int opts, char *anchorname) { struct pfioc_rule pr; u_int32_t mnr, nr; static int nattype[3] = { PF_NAT, PF_RDR, PF_BINAT }; int i, dotitle = opts & PF_OPT_SHOWALL; - if (*anchorname && !*rulesetname) { - struct pfioc_ruleset pr2; - int r; - - memset(&pr2, 0, sizeof(pr2)); - memcpy(pr2.anchor, anchorname, sizeof(pr2.anchor)); - if (ioctl(dev, DIOCGETRULESETS, &pr2)) { - if (errno == EINVAL) - fprintf(stderr, "No rulesets in anchor '%s'.\n", - anchorname); - else - err(1, "DIOCGETRULESETS"); - return (-1); - } - mnr = pr2.nr; - for (nr = 0; nr < mnr; ++nr) { - pr2.nr = nr; - if (ioctl(dev, DIOCGETRULESET, &pr2)) - err(1, "DIOCGETRULESET"); - r = pfctl_show_nat(dev, opts, anchorname, pr2.name); - if (r) - return (r); - } - return (0); - } - memset(&pr, 0, sizeof(pr)); memcpy(pr.anchor, anchorname, sizeof(pr.anchor)); - memcpy(pr.ruleset, rulesetname, sizeof(pr.ruleset)); for (i = 0; i < 3; i++) { pr.rule.action = nattype[i]; if (ioctl(dev, DIOCGETRULES, &pr)) { @@ -754,14 +931,15 @@ pfctl_show_nat(int dev, int opts, char *anchorname, char *rulesetname) return (-1); } if (pfctl_get_pool(dev, &pr.rule.rpool, nr, - pr.ticket, nattype[i], anchorname, - rulesetname) != 0) + pr.ticket, nattype[i], anchorname) != 0) return (-1); if (dotitle) { pfctl_print_title("TRANSLATION RULES:"); dotitle = 0; } - print_rule(&pr.rule, opts & PF_OPT_VERBOSE2); + print_rule(&pr.rule, pr.anchor_call, + opts & PF_OPT_VERBOSE2); + printf("\n"); pfctl_print_rule_counters(&pr.rule, opts); pfctl_clear_pool(&pr.rule.rpool); } @@ -789,16 +967,17 @@ pfctl_show_src_nodes(int dev, int opts) } if (ioctl(dev, DIOCGETSRCNODES, &psn) < 0) { warn("DIOCGETSRCNODES"); + free(inbuf); return (-1); } if (psn.psn_len + sizeof(struct pfioc_src_nodes) < len) break; if (len == 0 && psn.psn_len == 0) - return (0); + goto done; if (len == 0 && psn.psn_len != 0) len = psn.psn_len; if (psn.psn_len == 0) - return (0); /* no src_nodes */ + goto done; /* no src_nodes */ len *= 2; } p = psn.psn_src_nodes; @@ -808,6 +987,8 @@ pfctl_show_src_nodes(int dev, int opts) print_src_node(p, opts); p++; } +done: + free(inbuf); return (0); } @@ -831,16 +1012,17 @@ pfctl_show_states(int dev, const char *iface, int opts) } if (ioctl(dev, DIOCGETSTATES, &ps) < 0) { warn("DIOCGETSTATES"); + free(inbuf); return (-1); } if (ps.ps_len + sizeof(struct pfioc_states) < len) break; if (len == 0 && ps.ps_len == 0) - return (0); + goto done; if (len == 0 && ps.ps_len != 0) len = ps.ps_len; if (ps.ps_len == 0) - return (0); /* no states */ + goto done; /* no states */ len *= 2; } p = ps.ps_states; @@ -853,6 +1035,8 @@ pfctl_show_states(int dev, const char *iface, int opts) } print_state(p, opts); } +done: + free(inbuf); return (0); } @@ -885,7 +1069,8 @@ pfctl_show_timeouts(int dev, int opts) if (ioctl(dev, DIOCGETTIMEOUT, &pt)) err(1, "DIOCGETTIMEOUT"); printf("%-20s %10d", pf_timeouts[i].name, pt.seconds); - if (i >= PFTM_ADAPTIVE_START && i <= PFTM_ADAPTIVE_END) + if (pf_timeouts[i].timeout >= PFTM_ADAPTIVE_START && + pf_timeouts[i].timeout <= PFTM_ADAPTIVE_END) printf(" states"); else printf("s"); @@ -908,11 +1093,11 @@ pfctl_show_limits(int dev, int opts) pl.index = pf_limits[i].index; if (ioctl(dev, DIOCGETLIMIT, &pl)) err(1, "DIOCGETLIMIT"); - printf("%-10s ", pf_limits[i].name); + printf("%-13s ", pf_limits[i].name); if (pl.limit == UINT_MAX) printf("unlimited\n"); else - printf("hard limit %6u\n", pl.limit); + printf("hard limit %8u\n", pl.limit); } return (0); } @@ -940,64 +1125,189 @@ pfctl_add_pool(struct pfctl *pf, struct pf_pool *p, sa_family_t af) } int -pfctl_add_rule(struct pfctl *pf, struct pf_rule *r) +pfctl_add_rule(struct pfctl *pf, struct pf_rule *r, const char *anchor_call) { u_int8_t rs_num; - struct pfioc_rule pr; + struct pf_rule *rule; + struct pf_ruleset *rs; + char *p; + + rs_num = pf_get_ruleset_number(r->action); + if (rs_num == PF_RULESET_MAX) + errx(1, "Invalid rule type %d", r->action); + + rs = &pf->anchor->ruleset; + + if (anchor_call[0] && r->anchor == NULL) { + /* + * Don't make non-brace anchors part of the main anchor pool. + */ + if ((r->anchor = calloc(1, sizeof(*r->anchor))) == NULL) + err(1, "pfctl_add_rule: calloc"); + + pf_init_ruleset(&r->anchor->ruleset); + r->anchor->ruleset.anchor = r->anchor; + if (strlcpy(r->anchor->path, anchor_call, + sizeof(rule->anchor->path)) >= sizeof(rule->anchor->path)) + errx(1, "pfctl_add_rule: strlcpy"); + if ((p = strrchr(anchor_call, '/')) != NULL) { + if (!strlen(p)) + err(1, "pfctl_add_rule: bad anchor name %s", + anchor_call); + } else + p = __DECONST(char *, anchor_call); + if (strlcpy(r->anchor->name, p, + sizeof(rule->anchor->name)) >= sizeof(rule->anchor->name)) + errx(1, "pfctl_add_rule: strlcpy"); + } - switch (r->action) { - case PF_SCRUB: - if ((loadopt & PFCTL_FLAG_FILTER) == 0) - return (0); - rs_num = PF_RULESET_SCRUB; - break; - case PF_DROP: - case PF_PASS: - if ((loadopt & PFCTL_FLAG_FILTER) == 0) - return (0); - rs_num = PF_RULESET_FILTER; - break; - case PF_NAT: - case PF_NONAT: - if ((loadopt & PFCTL_FLAG_NAT) == 0) - return (0); - rs_num = PF_RULESET_NAT; - break; - case PF_RDR: - case PF_NORDR: - if ((loadopt & PFCTL_FLAG_NAT) == 0) - return (0); - rs_num = PF_RULESET_RDR; - break; - case PF_BINAT: - case PF_NOBINAT: - if ((loadopt & PFCTL_FLAG_NAT) == 0) - return (0); - rs_num = PF_RULESET_BINAT; - break; - default: - errx(1, "Invalid rule type"); - break; + if ((rule = calloc(1, sizeof(*rule))) == NULL) + err(1, "calloc"); + bcopy(r, rule, sizeof(*rule)); + TAILQ_INIT(&rule->rpool.list); + pfctl_move_pool(&r->rpool, &rule->rpool); + + TAILQ_INSERT_TAIL(rs->rules[rs_num].active.ptr, rule, entries); + return (0); +} + +int +pfctl_ruleset_trans(struct pfctl *pf, char *path, struct pf_anchor *a) +{ + int osize = pf->trans->pfrb_size; + + if ((pf->loadopt & PFCTL_FLAG_NAT) != 0) { + if (pfctl_add_trans(pf->trans, PF_RULESET_NAT, path) || + pfctl_add_trans(pf->trans, PF_RULESET_BINAT, path) || + pfctl_add_trans(pf->trans, PF_RULESET_RDR, path)) + return (1); + } + if (a == pf->astack[0] && ((altqsupport && + (pf->loadopt & PFCTL_FLAG_ALTQ) != 0))) { + if (pfctl_add_trans(pf->trans, PF_RULESET_ALTQ, path)) + return (2); } + if ((pf->loadopt & PFCTL_FLAG_FILTER) != 0) { + if (pfctl_add_trans(pf->trans, PF_RULESET_SCRUB, path) || + pfctl_add_trans(pf->trans, PF_RULESET_FILTER, path)) + return (3); + } + if (pf->loadopt & PFCTL_FLAG_TABLE) + if (pfctl_add_trans(pf->trans, PF_RULESET_TABLE, path)) + return (4); + if (pfctl_trans(pf->dev, pf->trans, DIOCXBEGIN, osize)) + return (5); + + return (0); +} + +int +pfctl_load_ruleset(struct pfctl *pf, char *path, struct pf_ruleset *rs, + int rs_num, int depth) +{ + struct pf_rule *r; + int error, len = strlen(path); + int brace = 0; + + pf->anchor = rs->anchor; + + if (path[0]) + snprintf(&path[len], MAXPATHLEN - len, "/%s", pf->anchor->name); + else + snprintf(&path[len], MAXPATHLEN - len, "%s", pf->anchor->name); + + if (depth) { + if (TAILQ_FIRST(rs->rules[rs_num].active.ptr) != NULL) { + brace++; + if (pf->opts & PF_OPT_VERBOSE) + printf(" {\n"); + if ((pf->opts & PF_OPT_NOACTION) == 0 && + (error = pfctl_ruleset_trans(pf, + path, rs->anchor))) { + printf("pfctl_load_rulesets: " + "pfctl_ruleset_trans %d\n", error); + goto error; + } + } else if (pf->opts & PF_OPT_VERBOSE) + printf("\n"); + + } + + if (pf->optimize && rs_num == PF_RULESET_FILTER) + pfctl_optimize_ruleset(pf, rs); + + while ((r = TAILQ_FIRST(rs->rules[rs_num].active.ptr)) != NULL) { + TAILQ_REMOVE(rs->rules[rs_num].active.ptr, r, entries); + if ((error = pfctl_load_rule(pf, path, r, depth))) + goto error; + if (r->anchor) { + if ((error = pfctl_load_ruleset(pf, path, + &r->anchor->ruleset, rs_num, depth + 1))) + goto error; + } else if (pf->opts & PF_OPT_VERBOSE) + printf("\n"); + free(r); + } + if (brace && pf->opts & PF_OPT_VERBOSE) { + INDENT(depth - 1, (pf->opts & PF_OPT_VERBOSE)); + printf("}\n"); + } + path[len] = '\0'; + return (0); + + error: + path[len] = '\0'; + return (error); + +} + +int +pfctl_load_rule(struct pfctl *pf, char *path, struct pf_rule *r, int depth) +{ + u_int8_t rs_num = pf_get_ruleset_number(r->action); + char *name; + struct pfioc_rule pr; + int len = strlen(path); + + bzero(&pr, sizeof(pr)); + /* set up anchor before adding to path for anchor_call */ + if ((pf->opts & PF_OPT_NOACTION) == 0) + pr.ticket = pfctl_get_ticket(pf->trans, rs_num, path); + if (strlcpy(pr.anchor, path, sizeof(pr.anchor)) >= sizeof(pr.anchor)) + errx(1, "pfctl_load_rule: strlcpy"); + + if (r->anchor) { + if (r->anchor->match) { + if (path[0]) + snprintf(&path[len], MAXPATHLEN - len, + "/%s", r->anchor->name); + else + snprintf(&path[len], MAXPATHLEN - len, + "%s", r->anchor->name); + name = path; + } else + name = r->anchor->path; + } else + name = __DECONST(char *, ""); if ((pf->opts & PF_OPT_NOACTION) == 0) { - bzero(&pr, sizeof(pr)); - if (strlcpy(pr.anchor, pf->anchor, sizeof(pr.anchor)) >= - sizeof(pr.anchor) || - strlcpy(pr.ruleset, pf->ruleset, sizeof(pr.ruleset)) >= - sizeof(pr.ruleset)) - errx(1, "pfctl_add_rule: strlcpy"); if (pfctl_add_pool(pf, &r->rpool, r->af)) return (1); - pr.ticket = pfctl_get_ticket(pf->trans, rs_num, pf->anchor, - pf->ruleset); pr.pool_ticket = pf->paddr.ticket; memcpy(&pr.rule, r, sizeof(pr.rule)); + if (r->anchor && strlcpy(pr.anchor_call, name, + sizeof(pr.anchor_call)) >= sizeof(pr.anchor_call)) + errx(1, "pfctl_load_rule: strlcpy"); if (ioctl(pf->dev, DIOCADDRULE, &pr)) err(1, "DIOCADDRULE"); } - if (pf->opts & PF_OPT_VERBOSE) - print_rule(r, pf->opts & PF_OPT_VERBOSE2); + + if (pf->opts & PF_OPT_VERBOSE) { + INDENT(depth, !(pf->opts & PF_OPT_VERBOSE2)); + print_rule(r, r->anchor ? r->anchor->name : "", + pf->opts & PF_OPT_VERBOSE2); + } + path[len] = '\0'; pfctl_clear_pool(&r->rpool); return (0); } @@ -1025,94 +1335,86 @@ pfctl_add_altq(struct pfctl *pf, struct pf_altq *a) } int -pfctl_rules(int dev, char *filename, int opts, char *anchorname, - char *rulesetname, struct pfr_buffer *trans) +pfctl_rules(int dev, char *filename, FILE *fin, int opts, int optimize, + char *anchorname, struct pfr_buffer *trans) { #define ERR(x) do { warn(x); goto _error; } while(0) #define ERRX(x) do { warnx(x); goto _error; } while(0) - FILE *fin; struct pfr_buffer *t, buf; struct pfioc_altq pa; struct pfctl pf; + struct pf_ruleset *rs; struct pfr_table trs; + char *path; int osize; + RB_INIT(&pf_anchors); + memset(&pf_main_anchor, 0, sizeof(pf_main_anchor)); + pf_init_ruleset(&pf_main_anchor.ruleset); + pf_main_anchor.ruleset.anchor = &pf_main_anchor; if (trans == NULL) { - bzero(&buf, sizeof(buf)); - buf.pfrb_type = PFRB_TRANS; - t = &buf; - osize = 0; + bzero(&buf, sizeof(buf)); + buf.pfrb_type = PFRB_TRANS; + t = &buf; + osize = 0; } else { - t = trans; - osize = t->pfrb_size; + t = trans; + osize = t->pfrb_size; } memset(&pa, 0, sizeof(pa)); memset(&pf, 0, sizeof(pf)); memset(&trs, 0, sizeof(trs)); + if ((path = calloc(1, MAXPATHLEN)) == NULL) + ERRX("pfctl_rules: calloc"); if (strlcpy(trs.pfrt_anchor, anchorname, - sizeof(trs.pfrt_anchor)) >= sizeof(trs.pfrt_anchor) || - strlcpy(trs.pfrt_ruleset, rulesetname, - sizeof(trs.pfrt_ruleset)) >= sizeof(trs.pfrt_ruleset)) + sizeof(trs.pfrt_anchor)) >= sizeof(trs.pfrt_anchor)) ERRX("pfctl_rules: strlcpy"); - if (strcmp(filename, "-") == 0) { - fin = stdin; - infile = "stdin"; - } else { - if ((fin = fopen(filename, "r")) == NULL) { - warn("%s", filename); - return (1); - } - infile = filename; - } + infile = filename; pf.dev = dev; pf.opts = opts; + pf.optimize = optimize; pf.loadopt = loadopt; + + /* non-brace anchor, create without resolving the path */ + if ((pf.anchor = calloc(1, sizeof(*pf.anchor))) == NULL) + ERRX("pfctl_rules: calloc"); + rs = &pf.anchor->ruleset; + pf_init_ruleset(rs); + rs->anchor = pf.anchor; + if (strlcpy(pf.anchor->path, anchorname, + sizeof(pf.anchor->path)) >= sizeof(pf.anchor->path)) + errx(1, "pfctl_add_rule: strlcpy"); + if (strlcpy(pf.anchor->name, anchorname, + sizeof(pf.anchor->name)) >= sizeof(pf.anchor->name)) + errx(1, "pfctl_add_rule: strlcpy"); + + + pf.astack[0] = pf.anchor; + pf.asd = 0; if (anchorname[0]) pf.loadopt &= ~PFCTL_FLAG_ALTQ; pf.paltq = &pa; pf.trans = t; - pf.rule_nr = 0; - pf.anchor = anchorname; - pf.ruleset = rulesetname; + pfctl_init_options(&pf); if ((opts & PF_OPT_NOACTION) == 0) { - if ((pf.loadopt & PFCTL_FLAG_NAT) != 0) { - if (pfctl_add_trans(t, PF_RULESET_NAT, anchorname, - rulesetname) || - pfctl_add_trans(t, PF_RULESET_BINAT, anchorname, - rulesetname) || - pfctl_add_trans(t, PF_RULESET_RDR, anchorname, - rulesetname)) - ERR("pfctl_rules"); - } - if (((altqsupport && (pf.loadopt & PFCTL_FLAG_ALTQ) != 0))) { - if (pfctl_add_trans(t, PF_RULESET_ALTQ, anchorname, - rulesetname)) - ERR("pfctl_rules"); - } - if ((pf.loadopt & PFCTL_FLAG_FILTER) != 0) { - if (pfctl_add_trans(t, PF_RULESET_SCRUB, anchorname, - rulesetname) || - pfctl_add_trans(t, PF_RULESET_FILTER, anchorname, - rulesetname)) - ERR("pfctl_rules"); - } - if (pf.loadopt & PFCTL_FLAG_TABLE) { - if (pfctl_add_trans(t, PF_RULESET_TABLE, anchorname, - rulesetname)) - ERR("pfctl_rules"); - } - if (pfctl_trans(dev, t, DIOCXBEGIN, osize)) - ERR("DIOCXBEGIN"); + /* + * XXX For the time being we need to open transactions for + * the main ruleset before parsing, because tables are still + * loaded at parse time. + */ + if (pfctl_ruleset_trans(&pf, anchorname, pf.anchor)) + ERRX("pfctl_rules"); if (altqsupport && (pf.loadopt & PFCTL_FLAG_ALTQ)) - pa.ticket = pfctl_get_ticket(t, PF_RULESET_ALTQ, - anchorname, rulesetname); + pa.ticket = + pfctl_get_ticket(t, PF_RULESET_ALTQ, anchorname); if (pf.loadopt & PFCTL_FLAG_TABLE) - pf.tticket = pfctl_get_ticket(t, PF_RULESET_TABLE, - anchorname, rulesetname); + pf.astack[0]->ruleset.tticket = + pfctl_get_ticket(t, PF_RULESET_TABLE, anchorname); } + if (parse_rules(fin, &pf) < 0) { if ((opts & PF_OPT_NOACTION) == 0) ERRX("Syntax error in config file: " @@ -1120,60 +1422,191 @@ pfctl_rules(int dev, char *filename, int opts, char *anchorname, else goto _error; } + + if ((pf.loadopt & PFCTL_FLAG_FILTER && + (pfctl_load_ruleset(&pf, path, rs, PF_RULESET_SCRUB, 0))) || + (pf.loadopt & PFCTL_FLAG_NAT && + (pfctl_load_ruleset(&pf, path, rs, PF_RULESET_NAT, 0) || + pfctl_load_ruleset(&pf, path, rs, PF_RULESET_RDR, 0) || + pfctl_load_ruleset(&pf, path, rs, PF_RULESET_BINAT, 0))) || + (pf.loadopt & PFCTL_FLAG_FILTER && + pfctl_load_ruleset(&pf, path, rs, PF_RULESET_FILTER, 0))) { + if ((opts & PF_OPT_NOACTION) == 0) + ERRX("Unable to load rules into kernel"); + else + goto _error; + } + if ((altqsupport && (pf.loadopt & PFCTL_FLAG_ALTQ) != 0)) if (check_commit_altq(dev, opts) != 0) ERRX("errors in altq config"); - if (fin != stdin) + + if (fin != stdin) { fclose(fin); + fin = NULL; + } /* process "load anchor" directives */ - if (!anchorname[0] && !rulesetname[0]) - if (pfctl_load_anchors(dev, opts, t) == -1) + if (!anchorname[0]) + if (pfctl_load_anchors(dev, &pf, t) == -1) ERRX("load anchors"); - if (trans == NULL && (opts & PF_OPT_NOACTION) == 0) - if (pfctl_trans(dev, t, DIOCXCOMMIT, 0)) + if (trans == NULL && (opts & PF_OPT_NOACTION) == 0) { + if (!anchorname[0]) + if (pfctl_load_options(&pf)) + goto _error; + if (pfctl_trans(dev, t, DIOCXCOMMIT, osize)) ERR("DIOCXCOMMIT"); + } return (0); _error: if (trans == NULL) { /* main ruleset */ if ((opts & PF_OPT_NOACTION) == 0) - if (pfctl_trans(dev, t, DIOCXROLLBACK, 0)) + if (pfctl_trans(dev, t, DIOCXROLLBACK, osize)) err(1, "DIOCXROLLBACK"); exit(1); - } else /* sub ruleset */ + } else { /* sub ruleset */ + if (fin != NULL && fin != stdin) + fclose(fin); return (-1); + } #undef ERR #undef ERRX } +FILE * +pfctl_fopen(const char *name, const char *mode) +{ + struct stat st; + FILE *fp; + + fp = fopen(name, mode); + if (fp == NULL) + return (NULL); + if (fstat(fileno(fp), &st)) { + fclose(fp); + return (NULL); + } + if (S_ISDIR(st.st_mode)) { + fclose(fp); + errno = EISDIR; + return (NULL); + } + return (fp); +} + +void +pfctl_init_options(struct pfctl *pf) +{ + int mib[2], mem; + size_t size; + + pf->timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL; + pf->timeout[PFTM_TCP_OPENING] = PFTM_TCP_OPENING_VAL; + pf->timeout[PFTM_TCP_ESTABLISHED] = PFTM_TCP_ESTABLISHED_VAL; + pf->timeout[PFTM_TCP_CLOSING] = PFTM_TCP_CLOSING_VAL; + pf->timeout[PFTM_TCP_FIN_WAIT] = PFTM_TCP_FIN_WAIT_VAL; + pf->timeout[PFTM_TCP_CLOSED] = PFTM_TCP_CLOSED_VAL; + pf->timeout[PFTM_UDP_FIRST_PACKET] = PFTM_UDP_FIRST_PACKET_VAL; + pf->timeout[PFTM_UDP_SINGLE] = PFTM_UDP_SINGLE_VAL; + pf->timeout[PFTM_UDP_MULTIPLE] = PFTM_UDP_MULTIPLE_VAL; + pf->timeout[PFTM_ICMP_FIRST_PACKET] = PFTM_ICMP_FIRST_PACKET_VAL; + pf->timeout[PFTM_ICMP_ERROR_REPLY] = PFTM_ICMP_ERROR_REPLY_VAL; + pf->timeout[PFTM_OTHER_FIRST_PACKET] = PFTM_OTHER_FIRST_PACKET_VAL; + pf->timeout[PFTM_OTHER_SINGLE] = PFTM_OTHER_SINGLE_VAL; + pf->timeout[PFTM_OTHER_MULTIPLE] = PFTM_OTHER_MULTIPLE_VAL; + pf->timeout[PFTM_FRAG] = PFTM_FRAG_VAL; + pf->timeout[PFTM_INTERVAL] = PFTM_INTERVAL_VAL; + pf->timeout[PFTM_SRC_NODE] = PFTM_SRC_NODE_VAL; + pf->timeout[PFTM_TS_DIFF] = PFTM_TS_DIFF_VAL; + pf->timeout[PFTM_ADAPTIVE_START] = PFSTATE_ADAPT_START; + pf->timeout[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END; + + pf->limit[PF_LIMIT_STATES] = PFSTATE_HIWAT; + pf->limit[PF_LIMIT_FRAGS] = PFFRAG_FRENT_HIWAT; + pf->limit[PF_LIMIT_SRC_NODES] = PFSNODE_HIWAT; + pf->limit[PF_LIMIT_TABLES] = PFR_KTABLE_HIWAT; + pf->limit[PF_LIMIT_TABLE_ENTRIES] = PFR_KENTRY_HIWAT; + + mib[0] = CTL_HW; + mib[1] = HW_PHYSMEM; + size = sizeof(mem); + (void) sysctl(mib, 2, &mem, &size, NULL, 0); + if (mem <= 100*1024*1024) + pf->limit[PF_LIMIT_TABLE_ENTRIES] = PFR_KENTRY_HIWAT_SMALL; + + pf->debug = PF_DEBUG_URGENT; +} + int -pfctl_set_limit(struct pfctl *pf, const char *opt, unsigned int limit) +pfctl_load_options(struct pfctl *pf) { - struct pfioc_limit pl; - int i; + int i, error = 0; if ((loadopt & PFCTL_FLAG_OPTION) == 0) return (0); - memset(&pl, 0, sizeof(pl)); + /* load limits */ + for (i = 0; i < PF_LIMIT_MAX; i++) { + if ((pf->opts & PF_OPT_MERGE) && !pf->limit_set[i]) + continue; + if (pfctl_load_limit(pf, i, pf->limit[i])) + error = 1; + } + + /* + * If we've set the limit, but havn't explicitly set adaptive + * timeouts, do it now with a start of 60% and end of 120%. + */ + if (pf->limit_set[PF_LIMIT_STATES] && + !pf->timeout_set[PFTM_ADAPTIVE_START] && + !pf->timeout_set[PFTM_ADAPTIVE_END]) { + pf->timeout[PFTM_ADAPTIVE_START] = + (pf->limit[PF_LIMIT_STATES] / 10) * 6; + pf->timeout_set[PFTM_ADAPTIVE_START] = 1; + pf->timeout[PFTM_ADAPTIVE_END] = + (pf->limit[PF_LIMIT_STATES] / 10) * 12; + pf->timeout_set[PFTM_ADAPTIVE_END] = 1; + } + + /* load timeouts */ + for (i = 0; i < PFTM_MAX; i++) { + if ((pf->opts & PF_OPT_MERGE) && !pf->timeout_set[i]) + continue; + if (pfctl_load_timeout(pf, i, pf->timeout[i])) + error = 1; + } + + /* load debug */ + if (!(pf->opts & PF_OPT_MERGE) || pf->debug_set) + if (pfctl_load_debug(pf, pf->debug)) + error = 1; + + /* load logif */ + if (!(pf->opts & PF_OPT_MERGE) || pf->ifname_set) + if (pfctl_load_logif(pf, pf->ifname)) + error = 1; + + /* load hostid */ + if (!(pf->opts & PF_OPT_MERGE) || pf->hostid_set) + if (pfctl_load_hostid(pf, pf->hostid)) + error = 1; + + return (error); +} + +int +pfctl_set_limit(struct pfctl *pf, const char *opt, unsigned int limit) +{ + int i; + + for (i = 0; pf_limits[i].name; i++) { if (strcasecmp(opt, pf_limits[i].name) == 0) { - pl.index = pf_limits[i].index; - pl.limit = limit; - if ((pf->opts & PF_OPT_NOACTION) == 0) { - if (ioctl(pf->dev, DIOCSETLIMIT, &pl)) { - if (errno == EBUSY) { - warnx("Current pool " - "size exceeds requested " - "hard limit"); - return (1); - } else - err(1, "DIOCSETLIMIT"); - } - } + pf->limit[pf_limits[i].index] = limit; + pf->limit_set[pf_limits[i].index] = 1; break; } } @@ -1188,19 +1621,36 @@ pfctl_set_limit(struct pfctl *pf, const char *opt, unsigned int limit) return (0); } +int +pfctl_load_limit(struct pfctl *pf, unsigned int his_index, unsigned int limit) +{ + struct pfioc_limit pl; + + memset(&pl, 0, sizeof(pl)); + pl.index = his_index; + pl.limit = limit; + if (ioctl(pf->dev, DIOCSETLIMIT, &pl)) { + if (errno == EBUSY) + warnx("Current pool size exceeds requested hard limit"); + else + warnx("DIOCSETLIMIT"); + return (1); + } + return (0); +} + int pfctl_set_timeout(struct pfctl *pf, const char *opt, int seconds, int quiet) { - struct pfioc_tm pt; int i; if ((loadopt & PFCTL_FLAG_OPTION) == 0) return (0); - memset(&pt, 0, sizeof(pt)); for (i = 0; pf_timeouts[i].name; i++) { if (strcasecmp(opt, pf_timeouts[i].name) == 0) { - pt.timeout = pf_timeouts[i].timeout; + pf->timeout[pf_timeouts[i].timeout] = seconds; + pf->timeout_set[pf_timeouts[i].timeout] = 1; break; } } @@ -1210,11 +1660,6 @@ pfctl_set_timeout(struct pfctl *pf, const char *opt, int seconds, int quiet) return (1); } - pt.seconds = seconds; - if ((pf->opts & PF_OPT_NOACTION) == 0) { - if (ioctl(pf->dev, DIOCSETTIMEOUT, &pt)) - err(1, "DIOCSETTIMEOUT"); - } if (pf->opts & PF_OPT_VERBOSE && ! quiet) printf("set timeout %s %d\n", opt, seconds); @@ -1222,6 +1667,21 @@ pfctl_set_timeout(struct pfctl *pf, const char *opt, int seconds, int quiet) return (0); } +int +pfctl_load_timeout(struct pfctl *pf, unsigned int timeout, unsigned int seconds) +{ + struct pfioc_tm pt; + + memset(&pt, 0, sizeof(pt)); + pt.timeout = timeout; + pt.seconds = seconds; + if (ioctl(pf->dev, DIOCSETTIMEOUT, &pt)) { + warnx("DIOCSETTIMEOUT"); + return (1); + } + return (0); +} + int pfctl_set_optimization(struct pfctl *pf, const char *opt) { @@ -1237,7 +1697,7 @@ pfctl_set_optimization(struct pfctl *pf, const char *opt) hint = pf_hints[i].hint; if (hint == NULL) { - warnx("Bad hint name."); + warnx("invalid state timeouts optimization"); return (1); } @@ -1255,23 +1715,19 @@ pfctl_set_optimization(struct pfctl *pf, const char *opt) int pfctl_set_logif(struct pfctl *pf, char *ifname) { - struct pfioc_if pi; if ((loadopt & PFCTL_FLAG_OPTION) == 0) return (0); - memset(&pi, 0, sizeof(pi)); - if ((pf->opts & PF_OPT_NOACTION) == 0) { - if (!strcmp(ifname, "none")) - bzero(pi.ifname, sizeof(pi.ifname)); - else { - if (strlcpy(pi.ifname, ifname, - sizeof(pi.ifname)) >= sizeof(pi.ifname)) - errx(1, "pfctl_set_logif: strlcpy"); - } - if (ioctl(pf->dev, DIOCSETSTATUSIF, &pi)) - err(1, "DIOCSETSTATUSIF"); + if (!strcmp(ifname, "none")) { + free(pf->ifname); + pf->ifname = NULL; + } else { + pf->ifname = strdup(ifname); + if (!pf->ifname) + errx(1, "pfctl_set_logif: strdup"); } + pf->ifname_set = 1; if (pf->opts & PF_OPT_VERBOSE) printf("set loginterface %s\n", ifname); @@ -1279,6 +1735,24 @@ pfctl_set_logif(struct pfctl *pf, char *ifname) return (0); } +int +pfctl_load_logif(struct pfctl *pf, char *ifname) +{ + struct pfioc_if pi; + + memset(&pi, 0, sizeof(pi)); + if (ifname && strlcpy(pi.ifname, ifname, + sizeof(pi.ifname)) >= sizeof(pi.ifname)) { + warnx("pfctl_load_logif: strlcpy"); + return (1); + } + if (ioctl(pf->dev, DIOCSETSTATUSIF, &pi)) { + warnx("DIOCSETSTATUSIF"); + return (1); + } + return (0); +} + int pfctl_set_hostid(struct pfctl *pf, u_int32_t hostid) { @@ -1287,10 +1761,8 @@ pfctl_set_hostid(struct pfctl *pf, u_int32_t hostid) hostid = htonl(hostid); - if ((pf->opts & PF_OPT_NOACTION) == 0) { - if (ioctl(dev_fd, DIOCSETHOSTID, &hostid)) - err(1, "DIOCSETHOSTID"); - } + pf->hostid = hostid; + pf->hostid_set = 1; if (pf->opts & PF_OPT_VERBOSE) printf("set hostid 0x%08x\n", ntohl(hostid)); @@ -1298,6 +1770,16 @@ pfctl_set_hostid(struct pfctl *pf, u_int32_t hostid) return (0); } +int +pfctl_load_hostid(struct pfctl *pf __unused, u_int32_t hostid) +{ + if (ioctl(dev_fd, DIOCSETHOSTID, &hostid)) { + warnx("DIOCSETHOSTID"); + return (1); + } + return (0); +} + int pfctl_set_debug(struct pfctl *pf, char *d) { @@ -1307,18 +1789,20 @@ pfctl_set_debug(struct pfctl *pf, char *d) return (0); if (!strcmp(d, "none")) - level = PF_DEBUG_NONE; + pf->debug = PF_DEBUG_NONE; else if (!strcmp(d, "urgent")) - level = PF_DEBUG_URGENT; + pf->debug = PF_DEBUG_URGENT; else if (!strcmp(d, "misc")) - level = PF_DEBUG_MISC; + pf->debug = PF_DEBUG_MISC; else if (!strcmp(d, "loud")) - level = PF_DEBUG_NOISY; + pf->debug = PF_DEBUG_NOISY; else { warnx("unknown debug level \"%s\"", d); return (-1); } + pf->debug_set = 1; + if ((pf->opts & PF_OPT_NOACTION) == 0) if (ioctl(dev_fd, DIOCSETDEBUG, &level)) err(1, "DIOCSETDEBUG"); @@ -1330,6 +1814,44 @@ pfctl_set_debug(struct pfctl *pf, char *d) } int +pfctl_load_debug(struct pfctl *pf, unsigned int level) +{ + if (ioctl(pf->dev, DIOCSETDEBUG, &level)) { + warnx("DIOCSETDEBUG"); + return (1); + } + return (0); +} + +int +pfctl_set_interface_flags(struct pfctl *pf, char *ifname, int flags, int how) +{ + struct pfioc_iface pi; + + if ((loadopt & PFCTL_FLAG_OPTION) == 0) + return (0); + + bzero(&pi, sizeof(pi)); + + pi.pfiio_flags = flags; + + if (strlcpy(pi.pfiio_name, ifname, sizeof(pi.pfiio_name)) >= + sizeof(pi.pfiio_name)) + errx(1, "pfctl_set_interface_flags: strlcpy"); + + if ((pf->opts & PF_OPT_NOACTION) == 0) { + if (how == 0) { + if (ioctl(pf->dev, DIOCCLRIFFLAG, &pi)) + err(1, "DIOCCLRIFFLAG"); + } else { + if (ioctl(pf->dev, DIOCSETIFFLAG, &pi)) + err(1, "DIOCSETIFFLAG"); + } + } + return (0); +} + +void pfctl_debug(int dev, u_int32_t level, int opts) { if (ioctl(dev, DIOCSETDEBUG, &level)) @@ -1355,17 +1877,6 @@ pfctl_debug(int dev, u_int32_t level, int opts) } fprintf(stderr, "'\n"); } - return (0); -} - -int -pfctl_clear_rule_counters(int dev, int opts) -{ - if (ioctl(dev, DIOCCLRRULECTRS)) - err(1, "DIOCCLRRULECTRS"); - if ((opts & PF_OPT_QUIET) == 0) - fprintf(stderr, "pf: rule counters cleared\n"); - return (0); } int @@ -1388,48 +1899,38 @@ pfctl_test_altqsupport(int dev, int opts) int pfctl_show_anchors(int dev, int opts, char *anchorname) { - u_int32_t nr, mnr; + struct pfioc_ruleset pr; + u_int32_t mnr, nr; - if (!*anchorname) { - struct pfioc_anchor pa; + memset(&pr, 0, sizeof(pr)); + memcpy(pr.path, anchorname, sizeof(pr.path)); + if (ioctl(dev, DIOCGETRULESETS, &pr)) { + if (errno == EINVAL) + fprintf(stderr, "Anchor '%s' not found.\n", + anchorname); + else + err(1, "DIOCGETRULESETS"); + return (-1); + } + mnr = pr.nr; + for (nr = 0; nr < mnr; ++nr) { + char sub[MAXPATHLEN]; - memset(&pa, 0, sizeof(pa)); - if (ioctl(dev, DIOCGETANCHORS, &pa)) { - warn("DIOCGETANCHORS"); - return (-1); - } - mnr = pa.nr; - for (nr = 0; nr < mnr; ++nr) { - pa.nr = nr; - if (ioctl(dev, DIOCGETANCHOR, &pa)) { - warn("DIOCGETANCHOR"); - return (-1); - } - if (!(opts & PF_OPT_VERBOSE) && - !strcmp(pa.name, PF_RESERVED_ANCHOR)) - continue; - printf(" %s\n", pa.name); + pr.nr = nr; + if (ioctl(dev, DIOCGETRULESET, &pr)) + err(1, "DIOCGETRULESET"); + if (!strcmp(pr.name, PF_RESERVED_ANCHOR)) + continue; + sub[0] = 0; + if (pr.path[0]) { + strlcat(sub, pr.path, sizeof(sub)); + strlcat(sub, "/", sizeof(sub)); } - } else { - struct pfioc_ruleset pr; - - memset(&pr, 0, sizeof(pr)); - memcpy(pr.anchor, anchorname, sizeof(pr.anchor)); - if (ioctl(dev, DIOCGETRULESETS, &pr)) { - if (errno == EINVAL) - fprintf(stderr, "No rulesets in anchor '%s'.\n", - anchorname); - else - err(1, "DIOCGETRULESETS"); + strlcat(sub, pr.name, sizeof(sub)); + if (sub[0] != '_' || (opts & PF_OPT_VERBOSE)) + printf(" %s\n", sub); + if ((opts & PF_OPT_VERBOSE) && pfctl_show_anchors(dev, opts, sub)) return (-1); - } - mnr = pr.nr; - for (nr = 0; nr < mnr; ++nr) { - pr.nr = nr; - if (ioctl(dev, DIOCGETRULESET, &pr)) - err(1, "DIOCGETRULESET"); - printf(" %s:%s\n", pr.anchor, pr.name); - } } return (0); } @@ -1447,18 +1948,20 @@ pfctl_lookup_option(char *cmd, const char **list) int main(int argc, char *argv[]) { - int error = 0; - int ch; - int mode = O_RDONLY; - int opts = 0; - char anchorname[PF_ANCHOR_NAME_SIZE]; - char rulesetname[PF_RULESET_NAME_SIZE]; + int error = 0; + int ch; + int mode = O_RDONLY; + int opts = 0; + int optimize = 0; + char anchorname[MAXPATHLEN]; + char *path; + FILE *fin = NULL; if (argc < 2) usage(); while ((ch = getopt(argc, argv, - "a:AdD:eqf:F:ghi:k:nNOp:rRs:t:T:vx:z")) != -1) { + "a:AdD:eqf:F:ghi:k:K:mnNOo::p:rRs:t:T:vx:z")) != -1) { switch (ch) { case 'a': anchoropt = optarg; @@ -1499,6 +2002,18 @@ main(int argc, char *argv[]) state_kill[state_killers++] = optarg; mode = O_RDWR; break; + case 'K': + if (src_node_killers >= 2) { + warnx("can only specify -K twice"); + usage(); + /* NOTREACHED */ + } + src_node_kill[src_node_killers++] = optarg; + mode = O_RDWR; + break; + case 'm': + opts |= PF_OPT_MERGE; + break; case 'n': opts |= PF_OPT_NOACTION; break; @@ -1521,6 +2036,27 @@ main(int argc, char *argv[]) case 'R': loadopt |= PFCTL_FLAG_FILTER; break; + case 'o': + if (optarg) { + optiopt = pfctl_lookup_option(optarg, + optiopt_list); + if (optiopt == NULL) { + warnx("Unknown optimization '%s'", + optarg); + usage(); + } + } + if (opts & PF_OPT_OPTIMIZE) { + if (optiopt != NULL) { + warnx("Cannot specify -o multiple times" + "with optimizer level"); + usage(); + } + optimize |= PF_OPTIMIZE_PROFILE; + } + optimize |= PF_OPTIMIZE_BASIC; + opts |= PF_OPT_OPTIMIZE; + break; case 'O': loadopt |= PFCTL_FLAG_OPTION; break; @@ -1577,7 +2113,7 @@ main(int argc, char *argv[]) loadopt |= PFCTL_FLAG_TABLE; tblcmdopt = NULL; } else - mode = strchr("acdfkrz", ch) ? O_RDWR : O_RDONLY; + mode = strchr("acdefkrz", ch) ? O_RDWR : O_RDONLY; } else if (argc != optind) { warnx("unknown command line argument: %s ...", argv[optind]); usage(); @@ -1586,32 +2122,23 @@ main(int argc, char *argv[]) if (loadopt == 0) loadopt = ~0; + if ((path = calloc(1, MAXPATHLEN)) == NULL) + errx(1, "pfctl: calloc"); memset(anchorname, 0, sizeof(anchorname)); - memset(rulesetname, 0, sizeof(rulesetname)); if (anchoropt != NULL) { - char *t; + int len = strlen(anchoropt); - if ((t = strchr(anchoropt, ':')) == NULL) { - if (strlcpy(anchorname, anchoropt, - sizeof(anchorname)) >= sizeof(anchorname)) - errx(1, "anchor name '%s' too long", - anchoropt); - } else { - char *p; - - if ((p = strdup(anchoropt)) == NULL) - err(1, "anchoropt: strdup"); - t = strsep(&p, ":"); - if (*t == '\0' || *p == '\0') - errx(1, "anchor '%s' invalid", anchoropt); - if (strlcpy(anchorname, t, sizeof(anchorname)) >= - sizeof(anchorname)) - errx(1, "anchor name '%s' too long", t); - if (strlcpy(rulesetname, p, sizeof(rulesetname)) >= - sizeof(rulesetname)) - errx(1, "ruleset name '%s' too long", p); - free(t); /* not p */ + if (anchoropt[len - 1] == '*') { + if (len >= 2 && anchoropt[len - 2] == '/') + anchoropt[len - 2] = '\0'; + else + anchoropt[len - 1] = '\0'; + opts |= PF_OPT_RECURSE; } + if (strlcpy(anchorname, anchoropt, + sizeof(anchorname)) >= sizeof(anchorname)) + errx(1, "anchor name '%s' too long", + anchoropt); loadopt &= PFCTL_FLAG_FILTER|PFCTL_FLAG_NAT|PFCTL_FLAG_TABLE; } @@ -1641,17 +2168,17 @@ main(int argc, char *argv[]) break; case 'r': pfctl_load_fingerprints(dev_fd, opts); - pfctl_show_rules(dev_fd, opts, 0, anchorname, - rulesetname); + pfctl_show_rules(dev_fd, path, opts, PFCTL_SHOW_RULES, + anchorname, 0); break; case 'l': pfctl_load_fingerprints(dev_fd, opts); - pfctl_show_rules(dev_fd, opts, 1, anchorname, - rulesetname); + pfctl_show_rules(dev_fd, path, opts, PFCTL_SHOW_LABELS, + anchorname, 0); break; case 'n': pfctl_load_fingerprints(dev_fd, opts); - pfctl_show_nat(dev_fd, opts, anchorname, rulesetname); + pfctl_show_nat(dev_fd, opts, anchorname); break; case 'q': pfctl_show_altq(dev_fd, ifaceopt, opts, @@ -1676,21 +2203,20 @@ main(int argc, char *argv[]) opts |= PF_OPT_SHOWALL; pfctl_load_fingerprints(dev_fd, opts); - pfctl_show_nat(dev_fd, opts, anchorname, rulesetname); - pfctl_show_rules(dev_fd, opts, 0, anchorname, - rulesetname); + pfctl_show_nat(dev_fd, opts, anchorname); + pfctl_show_rules(dev_fd, path, opts, 0, anchorname, 0); pfctl_show_altq(dev_fd, ifaceopt, opts, 0); pfctl_show_states(dev_fd, ifaceopt, opts); pfctl_show_src_nodes(dev_fd, opts); pfctl_show_status(dev_fd, opts); - pfctl_show_rules(dev_fd, opts, 1, anchorname, rulesetname); + pfctl_show_rules(dev_fd, path, opts, 1, anchorname, 0); pfctl_show_timeouts(dev_fd, opts); pfctl_show_limits(dev_fd, opts); - pfctl_show_tables(anchorname, rulesetname, opts); + pfctl_show_tables(anchorname, opts); pfctl_show_fingerprints(opts); break; case 'T': - pfctl_show_tables(anchorname, rulesetname, opts); + pfctl_show_tables(anchorname, opts); break; case 'o': pfctl_load_fingerprints(dev_fd, opts); @@ -1702,13 +2228,21 @@ main(int argc, char *argv[]) } } + if ((opts & PF_OPT_CLRRULECTRS) && showopt == NULL) + pfctl_show_rules(dev_fd, path, opts, PFCTL_SHOW_NOTHING, + anchorname, 0); + if (clearopt != NULL) { + if (anchorname[0] == '_' || strstr(anchorname, "/_") != NULL) + errx(1, "anchor names beginning with '_' cannot " + "be modified from the command line"); + switch (*clearopt) { case 'r': - pfctl_clear_rules(dev_fd, opts, anchorname, rulesetname); + pfctl_clear_rules(dev_fd, opts, anchorname); break; case 'n': - pfctl_clear_nat(dev_fd, opts, anchorname, rulesetname); + pfctl_clear_nat(dev_fd, opts, anchorname); break; case 'q': pfctl_clear_altq(dev_fd, opts); @@ -1723,41 +2257,77 @@ main(int argc, char *argv[]) pfctl_clear_stats(dev_fd, opts); break; case 'a': - pfctl_clear_rules(dev_fd, opts, anchorname, rulesetname); - pfctl_clear_nat(dev_fd, opts, anchorname, rulesetname); - pfctl_clear_tables(anchorname, rulesetname, opts); - if (!*anchorname && !*rulesetname) { + pfctl_clear_rules(dev_fd, opts, anchorname); + pfctl_clear_nat(dev_fd, opts, anchorname); + pfctl_clear_tables(anchorname, opts); + if (!*anchorname) { pfctl_clear_altq(dev_fd, opts); pfctl_clear_states(dev_fd, ifaceopt, opts); pfctl_clear_src_nodes(dev_fd, opts); pfctl_clear_stats(dev_fd, opts); pfctl_clear_fingerprints(dev_fd, opts); + pfctl_clear_interface_flags(dev_fd, opts); } break; case 'o': pfctl_clear_fingerprints(dev_fd, opts); break; case 'T': - pfctl_clear_tables(anchorname, rulesetname, opts); + pfctl_clear_tables(anchorname, opts); break; } } if (state_killers) pfctl_kill_states(dev_fd, ifaceopt, opts); + if (src_node_killers) + pfctl_kill_src_nodes(dev_fd, ifaceopt, opts); + if (tblcmdopt != NULL) { error = pfctl_command_tables(argc, argv, tableopt, - tblcmdopt, rulesopt, anchorname, rulesetname, opts); + tblcmdopt, rulesopt, anchorname, opts); rulesopt = NULL; } + if (optiopt != NULL) { + switch (*optiopt) { + case 'n': + optimize = 0; + break; + case 'b': + optimize |= PF_OPTIMIZE_BASIC; + break; + case 'o': + case 'p': + optimize |= PF_OPTIMIZE_PROFILE; + break; + } + } + + if (rulesopt != NULL) { + if (strcmp(rulesopt, "-") == 0) { + fin = stdin; + rulesopt = __DECONST(char *, "stdin"); + } else { + if ((fin = pfctl_fopen(rulesopt, "r")) == NULL) + err(1, "%s", rulesopt); + } + } + if ((rulesopt != NULL) && (loadopt & PFCTL_FLAG_OPTION) && + !anchorname[0]) + if (pfctl_clear_interface_flags(dev_fd, opts | PF_OPT_QUIET)) + error = 1; - if (rulesopt != NULL) + if (rulesopt != NULL && !(opts & (PF_OPT_MERGE|PF_OPT_NOACTION)) && + !anchorname[0] && (loadopt & PFCTL_FLAG_OPTION)) if (pfctl_file_fingerprints(dev_fd, opts, PF_OSFP_FILE)) error = 1; if (rulesopt != NULL) { - if (pfctl_rules(dev_fd, rulesopt, opts, anchorname, rulesetname, - NULL)) + if (anchorname[0] == '_' || strstr(anchorname, "/_") != NULL) + errx(1, "anchor names beginning with '_' cannot " + "be modified from the command line"); + if (pfctl_rules(dev_fd, rulesopt, fin, opts, optimize, + anchorname, NULL)) error = 1; else if (!(opts & PF_OPT_NOACTION) && (loadopt & PFCTL_FLAG_TABLE)) @@ -1785,9 +2355,5 @@ main(int argc, char *argv[]) } } - if (opts & PF_OPT_CLRRULECTRS) { - if (pfctl_clear_rule_counters(dev_fd, opts)) - error = 1; - } exit(error); } diff --git a/usr.sbin/pfctl/pfctl.h b/usr.sbin/pfctl/pfctl.h index e59dbd8efe..2ab987577a 100644 --- a/usr.sbin/pfctl/pfctl.h +++ b/usr.sbin/pfctl/pfctl.h @@ -1,5 +1,4 @@ -/* $OpenBSD: pfctl.h,v 1.33 2004/02/19 21:37:01 cedric Exp $ */ -/* $DragonFly: src/usr.sbin/pfctl/pfctl.h,v 1.1 2004/09/21 21:25:28 joerg Exp $ */ +/* $OpenBSD: pfctl.h,v 1.40 2007/02/09 11:25:27 henning Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -34,6 +33,8 @@ #ifndef _PFCTL_H_ #define _PFCTL_H_ +enum pfctl_show { PFCTL_SHOW_RULES, PFCTL_SHOW_LABELS, PFCTL_SHOW_NOTHING }; + enum { PFRB_TABLES = 1, PFRB_TSTATS, PFRB_ADDRS, PFRB_ASTATS, PFRB_IFACES, PFRB_TRANS, PFRB_MAX }; struct pfr_buffer { @@ -65,8 +66,6 @@ int pfr_get_astats(struct pfr_table *, struct pfr_astats *, int *, int); int pfr_clr_astats(struct pfr_table *, struct pfr_addr *, int, int *, int); int pfr_tst_addrs(struct pfr_table *, struct pfr_addr *, int, int *, int); int pfr_set_tflags(struct pfr_table *, int, int, int, int *, int *, int); -int pfr_ina_begin(struct pfr_table *, int *, int *, int); -int pfr_ina_commit(struct pfr_table *, int, int *, int *, int); int pfr_ina_define(struct pfr_table *, struct pfr_addr *, int, int *, int *, int, int); void pfr_buf_clear(struct pfr_buffer *); @@ -76,17 +75,18 @@ int pfr_buf_grow(struct pfr_buffer *, int); int pfr_buf_load(struct pfr_buffer *, char *, int, int (*)(struct pfr_buffer *, char *, int)); const char *pfr_strerror(int); -int pfi_get_ifaces(const char *, struct pfi_if *, int *, int); +int pfi_get_ifaces(const char *, struct pfi_kif *, int *); int pfi_clr_istats(const char *, int *, int); void pfctl_print_title(const char *); -int pfctl_clear_tables(const char *, const char *, int); -int pfctl_show_tables(const char *, const char *, int); +int pfctl_clear_tables(const char *, int); +int pfctl_show_tables(const char *, int); int pfctl_command_tables(int, char *[], char *, const char *, char *, - const char *, const char *, int); + const char *, int); int pfctl_show_altq(int, const char *, int, int); void warn_namespace_collision(const char *); int pfctl_show_ifaces(const char *, int); +FILE *pfctl_fopen(const char *, const char *); #ifndef DEFAULT_PRIORITY #define DEFAULT_PRIORITY 1 @@ -104,9 +104,10 @@ struct segment { double x, y, d, m; }; +extern int loadopt; + int check_commit_altq(int, int); void pfaltq_store(struct pf_altq *); -void pfaltq_free(struct pf_altq *); struct pf_altq *pfaltq_lookup(const char *); char *rate2str(double); @@ -117,9 +118,9 @@ void print_state(struct pf_state *, int); int unmask(struct pf_addr *, sa_family_t); int pfctl_cmdline_symset(char *); -int pfctl_add_trans(struct pfr_buffer *, int, const char *, const char *); +int pfctl_add_trans(struct pfr_buffer *, int, const char *); u_int32_t - pfctl_get_ticket(struct pfr_buffer *, int, const char *, const char *); + pfctl_get_ticket(struct pfr_buffer *, int, const char *); int pfctl_trans(int, struct pfr_buffer *, u_long, int); #endif /* _PFCTL_H_ */ diff --git a/usr.sbin/pfctl/pfctl_altq.c b/usr.sbin/pfctl/pfctl_altq.c index c7efc6368e..a498e2c734 100644 --- a/usr.sbin/pfctl/pfctl_altq.c +++ b/usr.sbin/pfctl/pfctl_altq.c @@ -1,5 +1,4 @@ -/* $OpenBSD: pfctl_altq.c,v 1.83 2004/03/14 21:51:44 dhartmei Exp $ */ -/* $DragonFly: src/usr.sbin/pfctl/pfctl_altq.c,v 1.4 2008/11/03 00:25:45 pavalos Exp $ */ +/* $OpenBSD: pfctl_altq.c,v 1.91 2006/11/28 00:08:50 henning Exp $ */ /* * Copyright (c) 2002 @@ -104,21 +103,6 @@ pfaltq_store(struct pf_altq *a) TAILQ_INSERT_TAIL(&altqs, altq, entries); } -void -pfaltq_free(struct pf_altq *a) -{ - struct pf_altq *altq; - - TAILQ_FOREACH(altq, &altqs, entries) { - if (strncmp(a->ifname, altq->ifname, IFNAMSIZ) == 0 && - strncmp(a->qname, altq->qname, PF_QNAME_SIZE) == 0) { - TAILQ_REMOVE(&altqs, altq, entries); - free(altq); - return; - } - } -} - struct pf_altq * pfaltq_lookup(const char *ifname) { @@ -168,7 +152,7 @@ print_altq(const struct pf_altq *a, unsigned level, struct node_queue_bw *bw, struct node_queue_opt *qopts) { if (a->qname[0] != 0) { - print_queue(a, level, bw, 0, qopts); + print_queue(a, level, bw, 1, qopts); return; } @@ -248,8 +232,8 @@ print_queue(const struct pf_altq *a, unsigned level, struct node_queue_bw *bw, * eval_pfaltq computes the discipline parameters. */ int -eval_pfaltq(struct pfctl *pf __unused, struct pf_altq *pa, - struct node_queue_bw *bw, struct node_queue_opt *opts) +eval_pfaltq(struct pfctl *pf __unused, struct pf_altq *pa, struct node_queue_bw *bw, + struct node_queue_opt *opts) { u_int rate, size, errors = 0; @@ -257,8 +241,8 @@ eval_pfaltq(struct pfctl *pf __unused, struct pf_altq *pa, pa->ifbandwidth = bw->bw_absolute; else if ((rate = getifspeed(pa->ifname)) == 0) { - fprintf(stderr, "cannot determine interface bandwidth " - "for %s, specify an absolute bandwidth\n", + fprintf(stderr, "interface %s does not know its bandwidth, " + "please specify an absolute bandwidth\n", pa->ifname); errors++; } else if ((pa->ifbandwidth = eval_bwspec(bw, rate)) == 0) @@ -326,7 +310,8 @@ eval_pfqueue(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw, struct node_queue_opt *opts) { /* should be merged with expand_queue */ - struct pf_altq *if_pa, *parent; + struct pf_altq *if_pa, *parent, *altq; + u_int32_t bwsum; int error = 0; /* find the corresponding interface and copy fields used by queues */ @@ -358,23 +343,36 @@ eval_pfqueue(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw, pa->qlimit = DEFAULT_QLIMIT; if (pa->scheduler == ALTQT_CBQ || pa->scheduler == ALTQT_HFSC || - pa->scheduler == ALTQT_FAIRQ) { - if ((pa->bandwidth = eval_bwspec(bw, - parent == NULL ? 0 : parent->bandwidth)) == 0) { - fprintf(stderr, "bandwidth for %s invalid (%d / %d)\n", - pa->qname, bw->bw_absolute, bw->bw_percent); - return (1); - } + pa->scheduler == ALTQT_FAIRQ) { + pa->bandwidth = eval_bwspec(bw, + parent == NULL ? 0 : parent->bandwidth); if (pa->bandwidth > pa->ifbandwidth) { fprintf(stderr, "bandwidth for %s higher than " "interface\n", pa->qname); return (1); } - if (parent != NULL && pa->bandwidth > parent->bandwidth) { - fprintf(stderr, "bandwidth for %s higher than parent\n", - pa->qname); - return (1); + /* check the sum of the child bandwidth is under parent's */ + if (parent != NULL) { + if (pa->bandwidth > parent->bandwidth) { + warnx("bandwidth for %s higher than parent", + pa->qname); + return (1); + } + bwsum = 0; + TAILQ_FOREACH(altq, &altqs, entries) { + if (strncmp(altq->ifname, pa->ifname, + IFNAMSIZ) == 0 && + altq->qname[0] != 0 && + strncmp(altq->parent, pa->parent, + PF_QNAME_SIZE) == 0) + bwsum += altq->bandwidth; + } + bwsum += pa->bandwidth; + if (bwsum > parent->bandwidth) { + warnx("the sum of the child bandwidth higher" + " than parent \"%s\"", parent->qname); + } } } @@ -502,10 +500,7 @@ cbq_compute_idletime(struct pfctl *pf, struct pf_altq *pa) maxidle = ptime * maxidle; else maxidle = ptime * maxidle_s; - if (minburst) - offtime = cptime * (1.0 + 1.0/(1.0 - g) * (1.0 - gtom) / gtom); - else - offtime = cptime; + offtime = cptime * (1.0 + 1.0/(1.0 - g) * (1.0 - gtom) / gtom); minidle = -((double)opts->maxpktsize * (double)nsPerByte); /* scale parameters */ @@ -708,8 +703,8 @@ eval_pfqueue_hfsc(struct pfctl *pf __unused, struct pf_altq *pa) } if ((opts->rtsc_m1 < opts->rtsc_m2 && opts->rtsc_m1 != 0) || - (opts->rtsc_m1 < opts->rtsc_m2 && opts->rtsc_m1 != 0) || - (opts->rtsc_m1 < opts->rtsc_m2 && opts->rtsc_m1 != 0)) { + (opts->lssc_m1 < opts->lssc_m2 && opts->lssc_m1 != 0) || + (opts->ulsc_m1 < opts->ulsc_m2 && opts->ulsc_m1 != 0)) { warnx("m1 must be zero for convex curve: %s", pa->qname); return (-1); } @@ -719,7 +714,7 @@ eval_pfqueue_hfsc(struct pfctl *pf __unused, struct pf_altq *pa) * for the real-time service curve, the sum of the service curves * should not exceed 80% of the interface bandwidth. 20% is reserved * not to over-commit the actual interface bandwidth. - * for the link-sharing service curve, the sum of the child service + * for the linkshare service curve, the sum of the child service * curve should not exceed the parent service curve. * for the upper-limit service curve, the assigned bandwidth should * be smaller than the interface bandwidth, and the upper-limit should @@ -746,7 +741,7 @@ eval_pfqueue_hfsc(struct pfctl *pf __unused, struct pf_altq *pa) if (strncmp(altq->parent, pa->parent, PF_QNAME_SIZE) != 0) continue; - /* if the class has a link-sharing service curve, add it. */ + /* if the class has a linkshare service curve, add it. */ if (opts->lssc_m2 != 0 && altq->pq_u.hfsc_opts.lssc_m2 != 0) { sc.m1 = altq->pq_u.hfsc_opts.lssc_m1; sc.d = altq->pq_u.hfsc_opts.lssc_d; @@ -757,22 +752,35 @@ eval_pfqueue_hfsc(struct pfctl *pf __unused, struct pf_altq *pa) /* check the real-time service curve. reserve 20% of interface bw */ if (opts->rtsc_m2 != 0) { + /* add this queue to the sum */ + sc.m1 = opts->rtsc_m1; + sc.d = opts->rtsc_d; + sc.m2 = opts->rtsc_m2; + gsc_add_sc(&rtsc, &sc); + /* compare the sum with 80% of the interface */ sc.m1 = 0; sc.d = 0; sc.m2 = pa->ifbandwidth / 100 * 80; if (!is_gsc_under_sc(&rtsc, &sc)) { - warnx("real-time sc exceeds the interface bandwidth"); + warnx("real-time sc exceeds 80%% of the interface " + "bandwidth (%s)", rate2str((double)sc.m2)); goto err_ret; } } - /* check the link-sharing service curve. */ + /* check the linkshare service curve. */ if (opts->lssc_m2 != 0) { + /* add this queue to the child sum */ + sc.m1 = opts->lssc_m1; + sc.d = opts->lssc_d; + sc.m2 = opts->lssc_m2; + gsc_add_sc(&lssc, &sc); + /* compare the sum of the children with parent's sc */ sc.m1 = parent->pq_u.hfsc_opts.lssc_m1; sc.d = parent->pq_u.hfsc_opts.lssc_d; sc.m2 = parent->pq_u.hfsc_opts.lssc_m2; if (!is_gsc_under_sc(&lssc, &sc)) { - warnx("link-sharing sc exceeds parent's sc"); + warnx("linkshare sc exceeds parent's sc"); goto err_ret; } } @@ -1279,6 +1287,7 @@ getifmtu(char *ifname) if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0) err(1, "socket"); + bzero(&ifr, sizeof(ifr)); if (strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)) >= sizeof(ifr.ifr_name)) errx(1, "getifmtu: strlcpy"); diff --git a/usr.sbin/pfctl/pfctl_optimize.c b/usr.sbin/pfctl/pfctl_optimize.c new file mode 100644 index 0000000000..4c3d84e08c --- /dev/null +++ b/usr.sbin/pfctl/pfctl_optimize.c @@ -0,0 +1,1645 @@ +/* $OpenBSD: pfctl_optimize.c,v 1.13 2006/10/31 14:17:45 mcbride Exp $ */ + +/* + * Copyright (c) 2004 Mike Frantzen + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pfctl_parser.h" +#include "pfctl.h" + +/* The size at which a table becomes faster than individual rules */ +#define TABLE_THRESHOLD 6 + + +/* #define OPT_DEBUG 1 */ +#ifdef OPT_DEBUG +# define DEBUG(str, v...) \ + printf("%s: " str "\n", __FUNCTION__ , ## v) +#else +# define DEBUG(str, v...) ((void)0) +#endif + + +/* + * A container that lets us sort a superblock to optimize the skip step jumps + */ +struct pf_skip_step { + int ps_count; /* number of items */ + TAILQ_HEAD( , pf_opt_rule) ps_rules; + TAILQ_ENTRY(pf_skip_step) ps_entry; +}; + + +/* + * A superblock is a block of adjacent rules of similar action. If there + * are five PASS rules in a row, they all become members of a superblock. + * Once we have a superblock, we are free to re-order any rules within it + * in order to improve performance; if a packet is passed, it doesn't matter + * who passed it. + */ +struct superblock { + TAILQ_HEAD( , pf_opt_rule) sb_rules; + TAILQ_ENTRY(superblock) sb_entry; + struct superblock *sb_profiled_block; + TAILQ_HEAD(skiplist, pf_skip_step) sb_skipsteps[PF_SKIP_COUNT]; +}; +TAILQ_HEAD(superblocks, superblock); + + +/* + * Description of the PF rule structure. + */ +enum { + BARRIER, /* the presence of the field puts the rule in it's own block */ + BREAK, /* the field may not differ between rules in a superblock */ + NOMERGE, /* the field may not differ between rules when combined */ + COMBINED, /* the field may itself be combined with other rules */ + DC, /* we just don't care about the field */ + NEVER}; /* we should never see this field set?!? */ +struct pf_rule_field { + const char *prf_name; + int prf_type; + size_t prf_offset; + size_t prf_size; +} pf_rule_desc[] = { +#define PF_RULE_FIELD(field, ty) \ + {#field, \ + ty, \ + offsetof(struct pf_rule, field), \ + sizeof(((struct pf_rule *)0)->field)} + + + /* + * The presence of these fields in a rule put the rule in it's own + * superblock. Thus it will not be optimized. It also prevents the + * rule from being re-ordered at all. + */ + PF_RULE_FIELD(label, BARRIER), + PF_RULE_FIELD(prob, BARRIER), + PF_RULE_FIELD(max_states, BARRIER), + PF_RULE_FIELD(max_src_nodes, BARRIER), + PF_RULE_FIELD(max_src_states, BARRIER), + PF_RULE_FIELD(max_src_conn, BARRIER), + PF_RULE_FIELD(max_src_conn_rate, BARRIER), + PF_RULE_FIELD(anchor, BARRIER), /* for now */ + + /* + * These fields must be the same between all rules in the same superblock. + * These rules are allowed to be re-ordered but only among like rules. + * For instance we can re-order all 'tag "foo"' rules because they have the + * same tag. But we can not re-order between a 'tag "foo"' and a + * 'tag "bar"' since that would change the meaning of the ruleset. + */ + PF_RULE_FIELD(tagname, BREAK), + PF_RULE_FIELD(keep_state, BREAK), + PF_RULE_FIELD(qname, BREAK), + PF_RULE_FIELD(pqname, BREAK), + PF_RULE_FIELD(rt, BREAK), + PF_RULE_FIELD(allow_opts, BREAK), + PF_RULE_FIELD(rule_flag, BREAK), + PF_RULE_FIELD(action, BREAK), + PF_RULE_FIELD(log, BREAK), + PF_RULE_FIELD(quick, BREAK), + PF_RULE_FIELD(return_ttl, BREAK), + PF_RULE_FIELD(overload_tblname, BREAK), + PF_RULE_FIELD(flush, BREAK), + PF_RULE_FIELD(rpool, BREAK), + PF_RULE_FIELD(logif, BREAK), + + /* + * Any fields not listed in this structure act as BREAK fields + */ + + + /* + * These fields must not differ when we merge two rules together but + * their difference isn't enough to put the rules in different superblocks. + * There are no problems re-ordering any rules with these fields. + */ + PF_RULE_FIELD(af, NOMERGE), + PF_RULE_FIELD(ifnot, NOMERGE), + PF_RULE_FIELD(ifname, NOMERGE), /* hack for IF groups */ + PF_RULE_FIELD(match_tag_not, NOMERGE), + PF_RULE_FIELD(match_tagname, NOMERGE), + PF_RULE_FIELD(os_fingerprint, NOMERGE), + PF_RULE_FIELD(timeout, NOMERGE), + PF_RULE_FIELD(return_icmp, NOMERGE), + PF_RULE_FIELD(return_icmp6, NOMERGE), + PF_RULE_FIELD(uid, NOMERGE), + PF_RULE_FIELD(gid, NOMERGE), + PF_RULE_FIELD(direction, NOMERGE), + PF_RULE_FIELD(proto, NOMERGE), + PF_RULE_FIELD(type, NOMERGE), + PF_RULE_FIELD(code, NOMERGE), + PF_RULE_FIELD(flags, NOMERGE), + PF_RULE_FIELD(flagset, NOMERGE), + PF_RULE_FIELD(tos, NOMERGE), + PF_RULE_FIELD(src.port, NOMERGE), + PF_RULE_FIELD(dst.port, NOMERGE), + PF_RULE_FIELD(src.port_op, NOMERGE), + PF_RULE_FIELD(dst.port_op, NOMERGE), + PF_RULE_FIELD(src.neg, NOMERGE), + PF_RULE_FIELD(dst.neg, NOMERGE), + + /* These fields can be merged */ + PF_RULE_FIELD(src.addr, COMBINED), + PF_RULE_FIELD(dst.addr, COMBINED), + + /* We just don't care about these fields. They're set by the kernel */ + PF_RULE_FIELD(skip, DC), + PF_RULE_FIELD(evaluations, DC), + PF_RULE_FIELD(packets, DC), + PF_RULE_FIELD(bytes, DC), + PF_RULE_FIELD(kif, DC), + PF_RULE_FIELD(states, DC), + PF_RULE_FIELD(src_nodes, DC), + PF_RULE_FIELD(nr, DC), + PF_RULE_FIELD(entries, DC), + PF_RULE_FIELD(qid, DC), + PF_RULE_FIELD(pqid, DC), + PF_RULE_FIELD(anchor_relative, DC), + PF_RULE_FIELD(anchor_wildcard, DC), + PF_RULE_FIELD(tag, DC), + PF_RULE_FIELD(match_tag, DC), + PF_RULE_FIELD(overload_tbl, DC), + + /* These fields should never be set in a PASS/BLOCK rule */ + PF_RULE_FIELD(natpass, NEVER), + PF_RULE_FIELD(max_mss, NEVER), + PF_RULE_FIELD(min_ttl, NEVER), +}; + + + +int add_opt_table(struct pfctl *, struct pf_opt_tbl **, sa_family_t, + struct pf_rule_addr *); +int addrs_combineable(struct pf_rule_addr *, struct pf_rule_addr *); +int addrs_equal(struct pf_rule_addr *, struct pf_rule_addr *); +int block_feedback(struct pfctl *, struct superblock *); +int combine_rules(struct pfctl *, struct superblock *); +void comparable_rule(struct pf_rule *, const struct pf_rule *, int); +int construct_superblocks(struct pfctl *, struct pf_opt_queue *, + struct superblocks *); +void exclude_supersets(struct pf_rule *, struct pf_rule *); +int interface_group(const char *); +int load_feedback_profile(struct pfctl *, struct superblocks *); +int optimize_superblock(struct pfctl *, struct superblock *); +int pf_opt_create_table(struct pfctl *, struct pf_opt_tbl *); +void remove_from_skipsteps(struct skiplist *, struct superblock *, + struct pf_opt_rule *, struct pf_skip_step *); +int remove_identical_rules(struct pfctl *, struct superblock *); +int reorder_rules(struct pfctl *, struct superblock *, int); +int rules_combineable(struct pf_rule *, struct pf_rule *); +void skip_append(struct superblock *, int, struct pf_skip_step *, + struct pf_opt_rule *); +int skip_compare(int, struct pf_skip_step *, struct pf_opt_rule *); +void skip_init(void); +int skip_cmp_af(struct pf_rule *, struct pf_rule *); +int skip_cmp_dir(struct pf_rule *, struct pf_rule *); +int skip_cmp_dst_addr(struct pf_rule *, struct pf_rule *); +int skip_cmp_dst_port(struct pf_rule *, struct pf_rule *); +int skip_cmp_ifp(struct pf_rule *, struct pf_rule *); +int skip_cmp_proto(struct pf_rule *, struct pf_rule *); +int skip_cmp_src_addr(struct pf_rule *, struct pf_rule *); +int skip_cmp_src_port(struct pf_rule *, struct pf_rule *); +int superblock_inclusive(struct superblock *, struct pf_opt_rule *); +void superblock_free(struct pfctl *, struct superblock *); + + +int (*skip_comparitors[PF_SKIP_COUNT])(struct pf_rule *, struct pf_rule *); +const char *skip_comparitors_names[PF_SKIP_COUNT]; +#define PF_SKIP_COMPARITORS { \ + { "ifp", PF_SKIP_IFP, skip_cmp_ifp }, \ + { "dir", PF_SKIP_DIR, skip_cmp_dir }, \ + { "af", PF_SKIP_AF, skip_cmp_af }, \ + { "proto", PF_SKIP_PROTO, skip_cmp_proto }, \ + { "saddr", PF_SKIP_SRC_ADDR, skip_cmp_src_addr }, \ + { "sport", PF_SKIP_SRC_PORT, skip_cmp_src_port }, \ + { "daddr", PF_SKIP_DST_ADDR, skip_cmp_dst_addr }, \ + { "dport", PF_SKIP_DST_PORT, skip_cmp_dst_port } \ +} + +struct pfr_buffer table_buffer; +int table_identifier; + + +int +pfctl_optimize_ruleset(struct pfctl *pf, struct pf_ruleset *rs) +{ + struct superblocks superblocks; + struct pf_opt_queue opt_queue; + struct superblock *block; + struct pf_opt_rule *por; + struct pf_rule *r; + struct pf_rulequeue *old_rules; + + DEBUG("optimizing ruleset"); + memset(&table_buffer, 0, sizeof(table_buffer)); + skip_init(); + TAILQ_INIT(&opt_queue); + + old_rules = rs->rules[PF_RULESET_FILTER].active.ptr; + rs->rules[PF_RULESET_FILTER].active.ptr = + rs->rules[PF_RULESET_FILTER].inactive.ptr; + rs->rules[PF_RULESET_FILTER].inactive.ptr = old_rules; + + /* + * XXX expanding the pf_opt_rule format throughout pfctl might allow + * us to avoid all this copying. + */ + while ((r = TAILQ_FIRST(rs->rules[PF_RULESET_FILTER].inactive.ptr)) + != NULL) { + TAILQ_REMOVE(rs->rules[PF_RULESET_FILTER].inactive.ptr, r, + entries); + if ((por = calloc(1, sizeof(*por))) == NULL) + err(1, "calloc"); + memcpy(&por->por_rule, r, sizeof(*r)); + if (TAILQ_FIRST(&r->rpool.list) != NULL) { + TAILQ_INIT(&por->por_rule.rpool.list); + pfctl_move_pool(&r->rpool, &por->por_rule.rpool); + } else + bzero(&por->por_rule.rpool, + sizeof(por->por_rule.rpool)); + + + TAILQ_INSERT_TAIL(&opt_queue, por, por_entry); + } + + TAILQ_INIT(&superblocks); + if (construct_superblocks(pf, &opt_queue, &superblocks)) + goto error; + + if (pf->optimize & PF_OPTIMIZE_PROFILE) { + if (load_feedback_profile(pf, &superblocks)) + goto error; + } + + TAILQ_FOREACH(block, &superblocks, sb_entry) { + if (optimize_superblock(pf, block)) + goto error; + } + + rs->anchor->refcnt = 0; + while ((block = TAILQ_FIRST(&superblocks))) { + TAILQ_REMOVE(&superblocks, block, sb_entry); + + while ((por = TAILQ_FIRST(&block->sb_rules))) { + TAILQ_REMOVE(&block->sb_rules, por, por_entry); + por->por_rule.nr = rs->anchor->refcnt++; + if ((r = calloc(1, sizeof(*r))) == NULL) + err(1, "calloc"); + memcpy(r, &por->por_rule, sizeof(*r)); + TAILQ_INIT(&r->rpool.list); + pfctl_move_pool(&por->por_rule.rpool, &r->rpool); + TAILQ_INSERT_TAIL( + rs->rules[PF_RULESET_FILTER].active.ptr, + r, entries); + free(por); + } + free(block); + } + + return (0); + +error: + while ((por = TAILQ_FIRST(&opt_queue))) { + TAILQ_REMOVE(&opt_queue, por, por_entry); + if (por->por_src_tbl) { + pfr_buf_clear(por->por_src_tbl->pt_buf); + free(por->por_src_tbl->pt_buf); + free(por->por_src_tbl); + } + if (por->por_dst_tbl) { + pfr_buf_clear(por->por_dst_tbl->pt_buf); + free(por->por_dst_tbl->pt_buf); + free(por->por_dst_tbl); + } + free(por); + } + while ((block = TAILQ_FIRST(&superblocks))) { + TAILQ_REMOVE(&superblocks, block, sb_entry); + superblock_free(pf, block); + } + return (1); +} + + +/* + * Go ahead and optimize a superblock + */ +int +optimize_superblock(struct pfctl *pf, struct superblock *block) +{ +#ifdef OPT_DEBUG + struct pf_opt_rule *por; +#endif /* OPT_DEBUG */ + + /* We have a few optimization passes: + * 1) remove duplicate rules or rules that are a subset of other + * rules + * 2) combine otherwise identical rules with different IP addresses + * into a single rule and put the addresses in a table. + * 3) re-order the rules to improve kernel skip steps + * 4) re-order the 'quick' rules based on feedback from the + * active ruleset statistics + * + * XXX combine_rules() doesn't combine v4 and v6 rules. would just + * have to keep af in the table container, make af 'COMBINE' and + * twiddle the af on the merged rule + * XXX maybe add a weighting to the metric on skipsteps when doing + * reordering. sometimes two sequential tables will be better + * that four consecutive interfaces. + * XXX need to adjust the skipstep count of everything after PROTO, + * since they aren't actually checked on a proto mismatch in + * pf_test_{tcp, udp, icmp}() + * XXX should i treat proto=0, af=0 or dir=0 special in skepstep + * calculation since they are a DC? + * XXX keep last skiplist of last superblock to influence this + * superblock. '5 inet6 log' should make '3 inet6' come before '4 + * inet' in the next superblock. + * XXX would be useful to add tables for ports + * XXX we can also re-order some mutually exclusive superblocks to + * try merging superblocks before any of these optimization passes. + * for instance a single 'log in' rule in the middle of non-logging + * out rules. + */ + + /* shortcut. there will be alot of 1-rule superblocks */ + if (!TAILQ_NEXT(TAILQ_FIRST(&block->sb_rules), por_entry)) + return (0); + +#ifdef OPT_DEBUG + printf("--- Superblock ---\n"); + TAILQ_FOREACH(por, &block->sb_rules, por_entry) { + printf(" "); + print_rule(&por->por_rule, por->por_rule.anchor ? + por->por_rule.anchor->name : "", 1); + } +#endif /* OPT_DEBUG */ + + + if (remove_identical_rules(pf, block)) + return (1); + if (combine_rules(pf, block)) + return (1); + if ((pf->optimize & PF_OPTIMIZE_PROFILE) && + TAILQ_FIRST(&block->sb_rules)->por_rule.quick && + block->sb_profiled_block) { + if (block_feedback(pf, block)) + return (1); + } else if (reorder_rules(pf, block, 0)) { + return (1); + } + + /* + * Don't add any optimization passes below reorder_rules(). It will + * have divided superblocks into smaller blocks for further refinement + * and doesn't put them back together again. What once was a true + * superblock might have been split into multiple superblocks. + */ + +#ifdef OPT_DEBUG + printf("--- END Superblock ---\n"); +#endif /* OPT_DEBUG */ + return (0); +} + + +/* + * Optimization pass #1: remove identical rules + */ +int +remove_identical_rules(struct pfctl *pf, struct superblock *block) +{ + struct pf_opt_rule *por1, *por2, *por_next, *por2_next; + struct pf_rule a, a2, b, b2; + + for (por1 = TAILQ_FIRST(&block->sb_rules); por1; por1 = por_next) { + por_next = TAILQ_NEXT(por1, por_entry); + for (por2 = por_next; por2; por2 = por2_next) { + por2_next = TAILQ_NEXT(por2, por_entry); + comparable_rule(&a, &por1->por_rule, DC); + comparable_rule(&b, &por2->por_rule, DC); + memcpy(&a2, &a, sizeof(a2)); + memcpy(&b2, &b, sizeof(b2)); + + exclude_supersets(&a, &b); + exclude_supersets(&b2, &a2); + if (memcmp(&a, &b, sizeof(a)) == 0) { + DEBUG("removing identical rule nr%d = *nr%d*", + por1->por_rule.nr, por2->por_rule.nr); + TAILQ_REMOVE(&block->sb_rules, por2, por_entry); + if (por_next == por2) + por_next = TAILQ_NEXT(por1, por_entry); + free(por2); + } else if (memcmp(&a2, &b2, sizeof(a2)) == 0) { + DEBUG("removing identical rule *nr%d* = nr%d", + por1->por_rule.nr, por2->por_rule.nr); + TAILQ_REMOVE(&block->sb_rules, por1, por_entry); + free(por1); + break; + } + } + } + + return (0); +} + + +/* + * Optimization pass #2: combine similar rules with different addresses + * into a single rule and a table + */ +int +combine_rules(struct pfctl *pf, struct superblock *block) +{ + struct pf_opt_rule *p1, *p2, *por_next; + int src_eq, dst_eq; + + if ((pf->loadopt & PFCTL_FLAG_TABLE) == 0) { + warnx("Must enable table loading for optimizations"); + return (1); + } + + /* First we make a pass to combine the rules. O(n log n) */ + TAILQ_FOREACH(p1, &block->sb_rules, por_entry) { + for (p2 = TAILQ_NEXT(p1, por_entry); p2; p2 = por_next) { + por_next = TAILQ_NEXT(p2, por_entry); + + src_eq = addrs_equal(&p1->por_rule.src, + &p2->por_rule.src); + dst_eq = addrs_equal(&p1->por_rule.dst, + &p2->por_rule.dst); + + if (src_eq && !dst_eq && p1->por_src_tbl == NULL && + p2->por_dst_tbl == NULL && + p2->por_src_tbl == NULL && + rules_combineable(&p1->por_rule, &p2->por_rule) && + addrs_combineable(&p1->por_rule.dst, + &p2->por_rule.dst)) { + DEBUG("can combine rules nr%d = nr%d", + p1->por_rule.nr, p2->por_rule.nr); + if (p1->por_dst_tbl == NULL && + add_opt_table(pf, &p1->por_dst_tbl, + p1->por_rule.af, &p1->por_rule.dst)) + return (1); + if (add_opt_table(pf, &p1->por_dst_tbl, + p1->por_rule.af, &p2->por_rule.dst)) + return (1); + p2->por_dst_tbl = p1->por_dst_tbl; + if (p1->por_dst_tbl->pt_rulecount >= + TABLE_THRESHOLD) { + TAILQ_REMOVE(&block->sb_rules, p2, + por_entry); + free(p2); + } + } else if (!src_eq && dst_eq && p1->por_dst_tbl == NULL + && p2->por_src_tbl == NULL && + p2->por_dst_tbl == NULL && + rules_combineable(&p1->por_rule, &p2->por_rule) && + addrs_combineable(&p1->por_rule.src, + &p2->por_rule.src)) { + DEBUG("can combine rules nr%d = nr%d", + p1->por_rule.nr, p2->por_rule.nr); + if (p1->por_src_tbl == NULL && + add_opt_table(pf, &p1->por_src_tbl, + p1->por_rule.af, &p1->por_rule.src)) + return (1); + if (add_opt_table(pf, &p1->por_src_tbl, + p1->por_rule.af, &p2->por_rule.src)) + return (1); + p2->por_src_tbl = p1->por_src_tbl; + if (p1->por_src_tbl->pt_rulecount >= + TABLE_THRESHOLD) { + TAILQ_REMOVE(&block->sb_rules, p2, + por_entry); + free(p2); + } + } + } + } + + + /* + * Then we make a final pass to create a valid table name and + * insert the name into the rules. + */ + for (p1 = TAILQ_FIRST(&block->sb_rules); p1; p1 = por_next) { + por_next = TAILQ_NEXT(p1, por_entry); + assert(p1->por_src_tbl == NULL || p1->por_dst_tbl == NULL); + + if (p1->por_src_tbl && p1->por_src_tbl->pt_rulecount >= + TABLE_THRESHOLD) { + if (p1->por_src_tbl->pt_generated) { + /* This rule is included in a table */ + TAILQ_REMOVE(&block->sb_rules, p1, por_entry); + free(p1); + continue; + } + p1->por_src_tbl->pt_generated = 1; + + if ((pf->opts & PF_OPT_NOACTION) == 0 && + pf_opt_create_table(pf, p1->por_src_tbl)) + return (1); + + pf->tdirty = 1; + + if (pf->opts & PF_OPT_VERBOSE) + print_tabledef(p1->por_src_tbl->pt_name, + PFR_TFLAG_CONST, 1, + &p1->por_src_tbl->pt_nodes); + + memset(&p1->por_rule.src.addr, 0, + sizeof(p1->por_rule.src.addr)); + p1->por_rule.src.addr.type = PF_ADDR_TABLE; + strlcpy(p1->por_rule.src.addr.v.tblname, + p1->por_src_tbl->pt_name, + sizeof(p1->por_rule.src.addr.v.tblname)); + + pfr_buf_clear(p1->por_src_tbl->pt_buf); + free(p1->por_src_tbl->pt_buf); + p1->por_src_tbl->pt_buf = NULL; + } + if (p1->por_dst_tbl && p1->por_dst_tbl->pt_rulecount >= + TABLE_THRESHOLD) { + if (p1->por_dst_tbl->pt_generated) { + /* This rule is included in a table */ + TAILQ_REMOVE(&block->sb_rules, p1, por_entry); + free(p1); + continue; + } + p1->por_dst_tbl->pt_generated = 1; + + if ((pf->opts & PF_OPT_NOACTION) == 0 && + pf_opt_create_table(pf, p1->por_dst_tbl)) + return (1); + pf->tdirty = 1; + + if (pf->opts & PF_OPT_VERBOSE) + print_tabledef(p1->por_dst_tbl->pt_name, + PFR_TFLAG_CONST, 1, + &p1->por_dst_tbl->pt_nodes); + + memset(&p1->por_rule.dst.addr, 0, + sizeof(p1->por_rule.dst.addr)); + p1->por_rule.dst.addr.type = PF_ADDR_TABLE; + strlcpy(p1->por_rule.dst.addr.v.tblname, + p1->por_dst_tbl->pt_name, + sizeof(p1->por_rule.dst.addr.v.tblname)); + + pfr_buf_clear(p1->por_dst_tbl->pt_buf); + free(p1->por_dst_tbl->pt_buf); + p1->por_dst_tbl->pt_buf = NULL; + } + } + + return (0); +} + + +/* + * Optimization pass #3: re-order rules to improve skip steps + */ +int +reorder_rules(struct pfctl *pf, struct superblock *block, int depth) +{ + struct superblock *newblock; + struct pf_skip_step *skiplist; + struct pf_opt_rule *por; + int i, largest, largest_list, rule_count = 0; + TAILQ_HEAD( , pf_opt_rule) head; + + /* + * Calculate the best-case skip steps. We put each rule in a list + * of other rules with common fields + */ + for (i = 0; i < PF_SKIP_COUNT; i++) { + TAILQ_FOREACH(por, &block->sb_rules, por_entry) { + TAILQ_FOREACH(skiplist, &block->sb_skipsteps[i], + ps_entry) { + if (skip_compare(i, skiplist, por) == 0) + break; + } + if (skiplist == NULL) { + if ((skiplist = calloc(1, sizeof(*skiplist))) == + NULL) + err(1, "calloc"); + TAILQ_INIT(&skiplist->ps_rules); + TAILQ_INSERT_TAIL(&block->sb_skipsteps[i], + skiplist, ps_entry); + } + skip_append(block, i, skiplist, por); + } + } + + TAILQ_FOREACH(por, &block->sb_rules, por_entry) + rule_count++; + + /* + * Now we're going to ignore any fields that are identical between + * all of the rules in the superblock and those fields which differ + * between every rule in the superblock. + */ + largest = 0; + for (i = 0; i < PF_SKIP_COUNT; i++) { + skiplist = TAILQ_FIRST(&block->sb_skipsteps[i]); + if (skiplist->ps_count == rule_count) { + DEBUG("(%d) original skipstep '%s' is all rules", + depth, skip_comparitors_names[i]); + skiplist->ps_count = 0; + } else if (skiplist->ps_count == 1) { + skiplist->ps_count = 0; + } else { + DEBUG("(%d) original skipstep '%s' largest jump is %d", + depth, skip_comparitors_names[i], + skiplist->ps_count); + if (skiplist->ps_count > largest) + largest = skiplist->ps_count; + } + } + if (largest == 0) { + /* Ugh. There is NO commonality in the superblock on which + * optimize the skipsteps optimization. + */ + goto done; + } + + /* + * Now we're going to empty the superblock rule list and re-create + * it based on a more optimal skipstep order. + */ + TAILQ_INIT(&head); + while ((por = TAILQ_FIRST(&block->sb_rules))) { + TAILQ_REMOVE(&block->sb_rules, por, por_entry); + TAILQ_INSERT_TAIL(&head, por, por_entry); + } + + + while (!TAILQ_EMPTY(&head)) { + largest = 1; + + /* + * Find the most useful skip steps remaining + */ + for (i = 0; i < PF_SKIP_COUNT; i++) { + skiplist = TAILQ_FIRST(&block->sb_skipsteps[i]); + if (skiplist->ps_count > largest) { + largest = skiplist->ps_count; + largest_list = i; + } + } + + if (largest <= 1) { + /* + * Nothing useful left. Leave remaining rules in order. + */ + DEBUG("(%d) no more commonality for skip steps", depth); + while ((por = TAILQ_FIRST(&head))) { + TAILQ_REMOVE(&head, por, por_entry); + TAILQ_INSERT_TAIL(&block->sb_rules, por, + por_entry); + } + } else { + /* + * There is commonality. Extract those common rules + * and place them in the ruleset adjacent to each + * other. + */ + skiplist = TAILQ_FIRST(&block->sb_skipsteps[ + largest_list]); + DEBUG("(%d) skipstep '%s' largest jump is %d @ #%d", + depth, skip_comparitors_names[largest_list], + largest, TAILQ_FIRST(&TAILQ_FIRST(&block-> + sb_skipsteps [largest_list])->ps_rules)-> + por_rule.nr); + TAILQ_REMOVE(&block->sb_skipsteps[largest_list], + skiplist, ps_entry); + + + /* + * There may be further commonality inside these + * rules. So we'll split them off into they're own + * superblock and pass it back into the optimizer. + */ + if (skiplist->ps_count > 2) { + if ((newblock = calloc(1, sizeof(*newblock))) + == NULL) { + warn("calloc"); + return (1); + } + TAILQ_INIT(&newblock->sb_rules); + for (i = 0; i < PF_SKIP_COUNT; i++) + TAILQ_INIT(&newblock->sb_skipsteps[i]); + TAILQ_INSERT_BEFORE(block, newblock, sb_entry); + DEBUG("(%d) splitting off %d rules from superblock @ #%d", + depth, skiplist->ps_count, + TAILQ_FIRST(&skiplist->ps_rules)-> + por_rule.nr); + } else { + newblock = block; + } + + while ((por = TAILQ_FIRST(&skiplist->ps_rules))) { + TAILQ_REMOVE(&head, por, por_entry); + TAILQ_REMOVE(&skiplist->ps_rules, por, + por_skip_entry[largest_list]); + TAILQ_INSERT_TAIL(&newblock->sb_rules, por, + por_entry); + + /* Remove this rule from all other skiplists */ + remove_from_skipsteps(&block->sb_skipsteps[ + largest_list], block, por, skiplist); + } + free(skiplist); + if (newblock != block) + if (reorder_rules(pf, newblock, depth + 1)) + return (1); + } + } + +done: + for (i = 0; i < PF_SKIP_COUNT; i++) { + while ((skiplist = TAILQ_FIRST(&block->sb_skipsteps[i]))) { + TAILQ_REMOVE(&block->sb_skipsteps[i], skiplist, + ps_entry); + free(skiplist); + } + } + + return (0); +} + + +/* + * Optimization pass #4: re-order 'quick' rules based on feedback from the + * currently running ruleset + */ +int +block_feedback(struct pfctl *pf, struct superblock *block) +{ + TAILQ_HEAD( , pf_opt_rule) queue; + struct pf_opt_rule *por1, *por2; + u_int64_t total_count = 0; + struct pf_rule a, b; + + + /* + * Walk through all of the profiled superblock's rules and copy + * the counters onto our rules. + */ + TAILQ_FOREACH(por1, &block->sb_profiled_block->sb_rules, por_entry) { + comparable_rule(&a, &por1->por_rule, DC); + total_count += por1->por_rule.packets[0] + + por1->por_rule.packets[1]; + TAILQ_FOREACH(por2, &block->sb_rules, por_entry) { + if (por2->por_profile_count) + continue; + comparable_rule(&b, &por2->por_rule, DC); + if (memcmp(&a, &b, sizeof(a)) == 0) { + por2->por_profile_count = + por1->por_rule.packets[0] + + por1->por_rule.packets[1]; + break; + } + } + } + superblock_free(pf, block->sb_profiled_block); + block->sb_profiled_block = NULL; + + /* + * Now we pull all of the rules off the superblock and re-insert them + * in sorted order. + */ + + TAILQ_INIT(&queue); + while ((por1 = TAILQ_FIRST(&block->sb_rules)) != NULL) { + TAILQ_REMOVE(&block->sb_rules, por1, por_entry); + TAILQ_INSERT_TAIL(&queue, por1, por_entry); + } + + while ((por1 = TAILQ_FIRST(&queue)) != NULL) { + TAILQ_REMOVE(&queue, por1, por_entry); +/* XXX I should sort all of the unused rules based on skip steps */ + TAILQ_FOREACH(por2, &block->sb_rules, por_entry) { + if (por1->por_profile_count > por2->por_profile_count) { + TAILQ_INSERT_BEFORE(por2, por1, por_entry); + break; + } + } + if (por2 == NULL) + TAILQ_INSERT_TAIL(&block->sb_rules, por1, por_entry); + } + + return (0); +} + + +/* + * Load the current ruleset from the kernel and try to associate them with + * the ruleset we're optimizing. + */ +int +load_feedback_profile(struct pfctl *pf, struct superblocks *superblocks) +{ + struct superblock *block, *blockcur; + struct superblocks prof_superblocks; + struct pf_opt_rule *por; + struct pf_opt_queue queue; + struct pfioc_rule pr; + struct pf_rule a, b; + int nr, mnr; + + TAILQ_INIT(&queue); + TAILQ_INIT(&prof_superblocks); + + memset(&pr, 0, sizeof(pr)); + pr.rule.action = PF_PASS; + if (ioctl(pf->dev, DIOCGETRULES, &pr)) { + warn("DIOCGETRULES"); + return (1); + } + mnr = pr.nr; + + DEBUG("Loading %d active rules for a feedback profile", mnr); + for (nr = 0; nr < mnr; ++nr) { + struct pf_ruleset *rs; + if ((por = calloc(1, sizeof(*por))) == NULL) { + warn("calloc"); + return (1); + } + pr.nr = nr; + if (ioctl(pf->dev, DIOCGETRULE, &pr)) { + warn("DIOCGETRULES"); + return (1); + } + memcpy(&por->por_rule, &pr.rule, sizeof(por->por_rule)); + rs = pf_find_or_create_ruleset(pr.anchor_call); + por->por_rule.anchor = rs->anchor; + if (TAILQ_EMPTY(&por->por_rule.rpool.list)) + memset(&por->por_rule.rpool, 0, + sizeof(por->por_rule.rpool)); + TAILQ_INSERT_TAIL(&queue, por, por_entry); + + /* XXX pfctl_get_pool(pf->dev, &pr.rule.rpool, nr, pr.ticket, + * PF_PASS, pf->anchor) ??? + * ... pfctl_clear_pool(&pr.rule.rpool) + */ + } + + if (construct_superblocks(pf, &queue, &prof_superblocks)) + return (1); + + + /* + * Now we try to associate the active ruleset's superblocks with + * the superblocks we're compiling. + */ + block = TAILQ_FIRST(superblocks); + blockcur = TAILQ_FIRST(&prof_superblocks); + while (block && blockcur) { + comparable_rule(&a, &TAILQ_FIRST(&block->sb_rules)->por_rule, + BREAK); + comparable_rule(&b, &TAILQ_FIRST(&blockcur->sb_rules)->por_rule, + BREAK); + if (memcmp(&a, &b, sizeof(a)) == 0) { + /* The two superblocks lined up */ + block->sb_profiled_block = blockcur; + } else { + DEBUG("superblocks don't line up between #%d and #%d", + TAILQ_FIRST(&block->sb_rules)->por_rule.nr, + TAILQ_FIRST(&blockcur->sb_rules)->por_rule.nr); + break; + } + block = TAILQ_NEXT(block, sb_entry); + blockcur = TAILQ_NEXT(blockcur, sb_entry); + } + + + + /* Free any superblocks we couldn't link */ + while (blockcur) { + block = TAILQ_NEXT(blockcur, sb_entry); + superblock_free(pf, blockcur); + blockcur = block; + } + return (0); +} + + +/* + * Compare a rule to a skiplist to see if the rule is a member + */ +int +skip_compare(int skipnum, struct pf_skip_step *skiplist, + struct pf_opt_rule *por) +{ + struct pf_rule *a, *b; + if (skipnum >= PF_SKIP_COUNT || skipnum < 0) + errx(1, "skip_compare() out of bounds"); + a = &por->por_rule; + b = &TAILQ_FIRST(&skiplist->ps_rules)->por_rule; + + return ((skip_comparitors[skipnum])(a, b)); +} + + +/* + * Add a rule to a skiplist + */ +void +skip_append(struct superblock *superblock, int skipnum, + struct pf_skip_step *skiplist, struct pf_opt_rule *por) +{ + struct pf_skip_step *prev; + + skiplist->ps_count++; + TAILQ_INSERT_TAIL(&skiplist->ps_rules, por, por_skip_entry[skipnum]); + + /* Keep the list of skiplists sorted by whichever is larger */ + while ((prev = TAILQ_PREV(skiplist, skiplist, ps_entry)) && + prev->ps_count < skiplist->ps_count) { + TAILQ_REMOVE(&superblock->sb_skipsteps[skipnum], + skiplist, ps_entry); + TAILQ_INSERT_BEFORE(prev, skiplist, ps_entry); + } +} + + +/* + * Remove a rule from the other skiplist calculations. + */ +void +remove_from_skipsteps(struct skiplist *head, struct superblock *block, + struct pf_opt_rule *por, struct pf_skip_step *active_list) +{ + struct pf_skip_step *sk, *next; + struct pf_opt_rule *p2; + int i, found; + + for (i = 0; i < PF_SKIP_COUNT; i++) { + sk = TAILQ_FIRST(&block->sb_skipsteps[i]); + if (sk == NULL || sk == active_list || sk->ps_count <= 1) + continue; + found = 0; + do { + TAILQ_FOREACH(p2, &sk->ps_rules, por_skip_entry[i]) + if (p2 == por) { + TAILQ_REMOVE(&sk->ps_rules, p2, + por_skip_entry[i]); + found = 1; + sk->ps_count--; + break; + } + } while (!found && (sk = TAILQ_NEXT(sk, ps_entry))); + if (found && sk) { + /* Does this change the sorting order? */ + while ((next = TAILQ_NEXT(sk, ps_entry)) && + next->ps_count > sk->ps_count) { + TAILQ_REMOVE(head, sk, ps_entry); + TAILQ_INSERT_AFTER(head, next, sk, ps_entry); + } +#ifdef OPT_DEBUG + next = TAILQ_NEXT(sk, ps_entry); + assert(next == NULL || next->ps_count <= sk->ps_count); +#endif /* OPT_DEBUG */ + } + } +} + + +/* Compare two rules AF field for skiplist construction */ +int +skip_cmp_af(struct pf_rule *a, struct pf_rule *b) +{ + if (a->af != b->af || a->af == 0) + return (1); + return (0); +} + +/* Compare two rules DIRECTION field for skiplist construction */ +int +skip_cmp_dir(struct pf_rule *a, struct pf_rule *b) +{ + if (a->direction == 0 || a->direction != b->direction) + return (1); + return (0); +} + +/* Compare two rules DST Address field for skiplist construction */ +int +skip_cmp_dst_addr(struct pf_rule *a, struct pf_rule *b) +{ + if (a->dst.neg != b->dst.neg || + a->dst.addr.type != b->dst.addr.type) + return (1); + /* XXX if (a->proto != b->proto && a->proto != 0 && b->proto != 0 + * && (a->proto == IPPROTO_TCP || a->proto == IPPROTO_UDP || + * a->proto == IPPROTO_ICMP + * return (1); + */ + switch (a->dst.addr.type) { + case PF_ADDR_ADDRMASK: + if (memcmp(&a->dst.addr.v.a.addr, &b->dst.addr.v.a.addr, + sizeof(a->dst.addr.v.a.addr)) || + memcmp(&a->dst.addr.v.a.mask, &b->dst.addr.v.a.mask, + sizeof(a->dst.addr.v.a.mask)) || + (a->dst.addr.v.a.addr.addr32[0] == 0 && + a->dst.addr.v.a.addr.addr32[1] == 0 && + a->dst.addr.v.a.addr.addr32[2] == 0 && + a->dst.addr.v.a.addr.addr32[3] == 0)) + return (1); + return (0); + case PF_ADDR_DYNIFTL: + if (strcmp(a->dst.addr.v.ifname, b->dst.addr.v.ifname) != 0 || + a->dst.addr.iflags != a->dst.addr.iflags || + memcmp(&a->dst.addr.v.a.mask, &b->dst.addr.v.a.mask, + sizeof(a->dst.addr.v.a.mask))) + return (1); + return (0); + case PF_ADDR_NOROUTE: + case PF_ADDR_URPFFAILED: + return (0); + case PF_ADDR_TABLE: + return (strcmp(a->dst.addr.v.tblname, b->dst.addr.v.tblname)); + } + return (1); +} + +/* Compare two rules DST port field for skiplist construction */ +int +skip_cmp_dst_port(struct pf_rule *a, struct pf_rule *b) +{ + /* XXX if (a->proto != b->proto && a->proto != 0 && b->proto != 0 + * && (a->proto == IPPROTO_TCP || a->proto == IPPROTO_UDP || + * a->proto == IPPROTO_ICMP + * return (1); + */ + if (a->dst.port_op == PF_OP_NONE || a->dst.port_op != b->dst.port_op || + a->dst.port[0] != b->dst.port[0] || + a->dst.port[1] != b->dst.port[1]) + return (1); + return (0); +} + +/* Compare two rules IFP field for skiplist construction */ +int +skip_cmp_ifp(struct pf_rule *a, struct pf_rule *b) +{ + if (strcmp(a->ifname, b->ifname) || a->ifname[0] == '\0') + return (1); + return (a->ifnot != b->ifnot); +} + +/* Compare two rules PROTO field for skiplist construction */ +int +skip_cmp_proto(struct pf_rule *a, struct pf_rule *b) +{ + return (a->proto != b->proto || a->proto == 0); +} + +/* Compare two rules SRC addr field for skiplist construction */ +int +skip_cmp_src_addr(struct pf_rule *a, struct pf_rule *b) +{ + if (a->src.neg != b->src.neg || + a->src.addr.type != b->src.addr.type) + return (1); + /* XXX if (a->proto != b->proto && a->proto != 0 && b->proto != 0 + * && (a->proto == IPPROTO_TCP || a->proto == IPPROTO_UDP || + * a->proto == IPPROTO_ICMP + * return (1); + */ + switch (a->src.addr.type) { + case PF_ADDR_ADDRMASK: + if (memcmp(&a->src.addr.v.a.addr, &b->src.addr.v.a.addr, + sizeof(a->src.addr.v.a.addr)) || + memcmp(&a->src.addr.v.a.mask, &b->src.addr.v.a.mask, + sizeof(a->src.addr.v.a.mask)) || + (a->src.addr.v.a.addr.addr32[0] == 0 && + a->src.addr.v.a.addr.addr32[1] == 0 && + a->src.addr.v.a.addr.addr32[2] == 0 && + a->src.addr.v.a.addr.addr32[3] == 0)) + return (1); + return (0); + case PF_ADDR_DYNIFTL: + if (strcmp(a->src.addr.v.ifname, b->src.addr.v.ifname) != 0 || + a->src.addr.iflags != a->src.addr.iflags || + memcmp(&a->src.addr.v.a.mask, &b->src.addr.v.a.mask, + sizeof(a->src.addr.v.a.mask))) + return (1); + return (0); + case PF_ADDR_NOROUTE: + case PF_ADDR_URPFFAILED: + return (0); + case PF_ADDR_TABLE: + return (strcmp(a->src.addr.v.tblname, b->src.addr.v.tblname)); + } + return (1); +} + +/* Compare two rules SRC port field for skiplist construction */ +int +skip_cmp_src_port(struct pf_rule *a, struct pf_rule *b) +{ + if (a->src.port_op == PF_OP_NONE || a->src.port_op != b->src.port_op || + a->src.port[0] != b->src.port[0] || + a->src.port[1] != b->src.port[1]) + return (1); + /* XXX if (a->proto != b->proto && a->proto != 0 && b->proto != 0 + * && (a->proto == IPPROTO_TCP || a->proto == IPPROTO_UDP || + * a->proto == IPPROTO_ICMP + * return (1); + */ + return (0); +} + + +void +skip_init(void) +{ + struct { + char *name; + int skipnum; + int (*func)(struct pf_rule *, struct pf_rule *); + } comps[] = PF_SKIP_COMPARITORS; + int skipnum, i; + + for (skipnum = 0; skipnum < PF_SKIP_COUNT; skipnum++) { + for (i = 0; i < sizeof(comps)/sizeof(*comps); i++) + if (comps[i].skipnum == skipnum) { + skip_comparitors[skipnum] = comps[i].func; + skip_comparitors_names[skipnum] = comps[i].name; + } + } + for (skipnum = 0; skipnum < PF_SKIP_COUNT; skipnum++) + if (skip_comparitors[skipnum] == NULL) + errx(1, "Need to add skip step comparitor to pfctl?!"); +} + +/* + * Add a host/netmask to a table + */ +int +add_opt_table(struct pfctl *pf, struct pf_opt_tbl **tbl, sa_family_t af, + struct pf_rule_addr *addr) +{ +#ifdef OPT_DEBUG + char buf[128]; +#endif /* OPT_DEBUG */ + static int tablenum = 0; + struct node_host node_host; + + if (*tbl == NULL) { + if ((*tbl = calloc(1, sizeof(**tbl))) == NULL || + ((*tbl)->pt_buf = calloc(1, sizeof(*(*tbl)->pt_buf))) == + NULL) + err(1, "calloc"); + (*tbl)->pt_buf->pfrb_type = PFRB_ADDRS; + SIMPLEQ_INIT(&(*tbl)->pt_nodes); + + /* This is just a temporary table name */ + snprintf((*tbl)->pt_name, sizeof((*tbl)->pt_name), "%s%d", + PF_OPT_TABLE_PREFIX, tablenum++); + DEBUG("creating table <%s>", (*tbl)->pt_name); + } + + memset(&node_host, 0, sizeof(node_host)); + node_host.af = af; + node_host.addr = addr->addr; + +#ifdef OPT_DEBUG + DEBUG("<%s> adding %s/%d", (*tbl)->pt_name, inet_ntop(af, + &node_host.addr.v.a.addr, buf, sizeof(buf)), + unmask(&node_host.addr.v.a.mask, af)); +#endif /* OPT_DEBUG */ + + if (append_addr_host((*tbl)->pt_buf, &node_host, 0, 0)) { + warn("failed to add host"); + return (1); + } + if (pf->opts & PF_OPT_VERBOSE) { + struct node_tinit *ti; + + if ((ti = calloc(1, sizeof(*ti))) == NULL) + err(1, "malloc"); + if ((ti->host = malloc(sizeof(*ti->host))) == NULL) + err(1, "malloc"); + memcpy(ti->host, &node_host, sizeof(*ti->host)); + SIMPLEQ_INSERT_TAIL(&(*tbl)->pt_nodes, ti, entries); + } + + (*tbl)->pt_rulecount++; + if ((*tbl)->pt_rulecount == TABLE_THRESHOLD) + DEBUG("table <%s> now faster than skip steps", (*tbl)->pt_name); + + return (0); +} + + +/* + * Do the dirty work of choosing an unused table name and creating it. + * (be careful with the table name, it might already be used in another anchor) + */ +int +pf_opt_create_table(struct pfctl *pf, struct pf_opt_tbl *tbl) +{ + static int tablenum; + const struct pfr_table *t; + + if (table_buffer.pfrb_type == 0) { + /* Initialize the list of tables */ + table_buffer.pfrb_type = PFRB_TABLES; + for (;;) { + pfr_buf_grow(&table_buffer, table_buffer.pfrb_size); + table_buffer.pfrb_size = table_buffer.pfrb_msize; + if (pfr_get_tables(NULL, table_buffer.pfrb_caddr, + &table_buffer.pfrb_size, PFR_FLAG_ALLRSETS)) + err(1, "pfr_get_tables"); + if (table_buffer.pfrb_size <= table_buffer.pfrb_msize) + break; + } + table_identifier = arc4random(); + } + + /* XXX would be *really* nice to avoid duplicating identical tables */ + + /* Now we have to pick a table name that isn't used */ +again: + DEBUG("translating temporary table <%s> to <%s%x_%d>", tbl->pt_name, + PF_OPT_TABLE_PREFIX, table_identifier, tablenum); + snprintf(tbl->pt_name, sizeof(tbl->pt_name), "%s%x_%d", + PF_OPT_TABLE_PREFIX, table_identifier, tablenum); + PFRB_FOREACH(t, &table_buffer) { + if (strcasecmp(t->pfrt_name, tbl->pt_name) == 0) { + /* Collision. Try again */ + DEBUG("wow, table <%s> in use. trying again", + tbl->pt_name); + table_identifier = arc4random(); + goto again; + } + } + tablenum++; + + + if (pfctl_define_table(tbl->pt_name, PFR_TFLAG_CONST, 1, + pf->anchor->name, tbl->pt_buf, pf->anchor->ruleset.tticket)) { + warn("failed to create table %s", tbl->pt_name); + return (1); + } + return (0); +} + +/* + * Partition the flat ruleset into a list of distinct superblocks + */ +int +construct_superblocks(struct pfctl *pf, struct pf_opt_queue *opt_queue, + struct superblocks *superblocks) +{ + struct superblock *block = NULL; + struct pf_opt_rule *por; + int i; + + while (!TAILQ_EMPTY(opt_queue)) { + por = TAILQ_FIRST(opt_queue); + TAILQ_REMOVE(opt_queue, por, por_entry); + if (block == NULL || !superblock_inclusive(block, por)) { + if ((block = calloc(1, sizeof(*block))) == NULL) { + warn("calloc"); + return (1); + } + TAILQ_INIT(&block->sb_rules); + for (i = 0; i < PF_SKIP_COUNT; i++) + TAILQ_INIT(&block->sb_skipsteps[i]); + TAILQ_INSERT_TAIL(superblocks, block, sb_entry); + } + TAILQ_INSERT_TAIL(&block->sb_rules, por, por_entry); + } + + return (0); +} + + +/* + * Compare two rule addresses + */ +int +addrs_equal(struct pf_rule_addr *a, struct pf_rule_addr *b) +{ + if (a->neg != b->neg) + return (0); + return (memcmp(&a->addr, &b->addr, sizeof(a->addr)) == 0); +} + + +/* + * The addresses are not equal, but can we combine them into one table? + */ +int +addrs_combineable(struct pf_rule_addr *a, struct pf_rule_addr *b) +{ + if (a->addr.type != PF_ADDR_ADDRMASK || + b->addr.type != PF_ADDR_ADDRMASK) + return (0); + if (a->neg != b->neg || a->port_op != b->port_op || + a->port[0] != b->port[0] || a->port[1] != b->port[1]) + return (0); + return (1); +} + + +/* + * Are we allowed to combine these two rules + */ +int +rules_combineable(struct pf_rule *p1, struct pf_rule *p2) +{ + struct pf_rule a, b; + + comparable_rule(&a, p1, COMBINED); + comparable_rule(&b, p2, COMBINED); + return (memcmp(&a, &b, sizeof(a)) == 0); +} + + +/* + * Can a rule be included inside a superblock + */ +int +superblock_inclusive(struct superblock *block, struct pf_opt_rule *por) +{ + struct pf_rule a, b; + int i, j; + + /* First check for hard breaks */ + for (i = 0; i < sizeof(pf_rule_desc)/sizeof(*pf_rule_desc); i++) { + if (pf_rule_desc[i].prf_type == BARRIER) { + for (j = 0; j < pf_rule_desc[i].prf_size; j++) + if (((char *)&por->por_rule)[j + + pf_rule_desc[i].prf_offset] != 0) + return (0); + } + } + + /* per-rule src-track is also a hard break */ + if (por->por_rule.rule_flag & PFRULE_RULESRCTRACK) + return (0); + + /* + * Have to handle interface groups seperately. Consider the following + * rules: + * block on EXTIFS to any port 22 + * pass on em0 to any port 22 + * (where EXTIFS is an arbitrary interface group) + * The optimizer may decide to re-order the pass rule in front of the + * block rule. But what if EXTIFS includes em0??? Such a reordering + * would change the meaning of the ruleset. + * We can't just lookup the EXTIFS group and check if em0 is a member + * because the user is allowed to add interfaces to a group during + * runtime. + * Ergo interface groups become a defacto superblock break :-( + */ + if (interface_group(por->por_rule.ifname) || + interface_group(TAILQ_FIRST(&block->sb_rules)->por_rule.ifname)) { + if (strcasecmp(por->por_rule.ifname, + TAILQ_FIRST(&block->sb_rules)->por_rule.ifname) != 0) + return (0); + } + + comparable_rule(&a, &TAILQ_FIRST(&block->sb_rules)->por_rule, NOMERGE); + comparable_rule(&b, &por->por_rule, NOMERGE); + if (memcmp(&a, &b, sizeof(a)) == 0) + return (1); + +#ifdef OPT_DEBUG + for (i = 0; i < sizeof(por->por_rule); i++) { + int closest = -1; + if (((u_int8_t *)&a)[i] != ((u_int8_t *)&b)[i]) { + for (j = 0; j < sizeof(pf_rule_desc) / + sizeof(*pf_rule_desc); j++) { + if (i >= pf_rule_desc[j].prf_offset && + i < pf_rule_desc[j].prf_offset + + pf_rule_desc[j].prf_size) { + DEBUG("superblock break @ %d due to %s", + por->por_rule.nr, + pf_rule_desc[j].prf_name); + return (0); + } + if (i > pf_rule_desc[j].prf_offset) { + if (closest == -1 || + i-pf_rule_desc[j].prf_offset < + i-pf_rule_desc[closest].prf_offset) + closest = j; + } + } + + if (closest >= 0) + DEBUG("superblock break @ %d on %s+%xh", + por->por_rule.nr, + pf_rule_desc[closest].prf_name, + i - pf_rule_desc[closest].prf_offset - + pf_rule_desc[closest].prf_size); + else + DEBUG("superblock break @ %d on field @ %d", + por->por_rule.nr, i); + return (0); + } + } +#endif /* OPT_DEBUG */ + + return (0); +} + + +/* + * Figure out if an interface name is an actual interface or actually a + * group of interfaces. + */ +int +interface_group(const char *ifname) +{ + if (ifname == NULL || !ifname[0]) + return (0); + + /* Real interfaces must end in a number, interface groups do not */ + if (isdigit(ifname[strlen(ifname) - 1])) + return (0); + else + return (1); +} + + +/* + * Make a rule that can directly compared by memcmp() + */ +void +comparable_rule(struct pf_rule *dst, const struct pf_rule *src, int type) +{ + int i; + /* + * To simplify the comparison, we just zero out the fields that are + * allowed to be different and then do a simple memcmp() + */ + memcpy(dst, src, sizeof(*dst)); + for (i = 0; i < sizeof(pf_rule_desc)/sizeof(*pf_rule_desc); i++) + if (pf_rule_desc[i].prf_type >= type) { +#ifdef OPT_DEBUG + assert(pf_rule_desc[i].prf_type != NEVER || + *(((char *)dst) + pf_rule_desc[i].prf_offset) == 0); +#endif /* OPT_DEBUG */ + memset(((char *)dst) + pf_rule_desc[i].prf_offset, 0, + pf_rule_desc[i].prf_size); + } +} + + +/* + * Remove superset information from two rules so we can directly compare them + * with memcmp() + */ +void +exclude_supersets(struct pf_rule *super, struct pf_rule *sub) +{ + if (super->ifname[0] == '\0') + memset(sub->ifname, 0, sizeof(sub->ifname)); + if (super->direction == PF_INOUT) + sub->direction = PF_INOUT; + if ((super->proto == 0 || super->proto == sub->proto) && + super->flags == 0 && super->flagset == 0 && (sub->flags || + sub->flagset)) { + sub->flags = super->flags; + sub->flagset = super->flagset; + } + if (super->proto == 0) + sub->proto = 0; + + if (super->src.port_op == 0) { + sub->src.port_op = 0; + sub->src.port[0] = 0; + sub->src.port[1] = 0; + } + if (super->dst.port_op == 0) { + sub->dst.port_op = 0; + sub->dst.port[0] = 0; + sub->dst.port[1] = 0; + } + + if (super->src.addr.type == PF_ADDR_ADDRMASK && !super->src.neg && + !sub->src.neg && super->src.addr.v.a.mask.addr32[0] == 0 && + super->src.addr.v.a.mask.addr32[1] == 0 && + super->src.addr.v.a.mask.addr32[2] == 0 && + super->src.addr.v.a.mask.addr32[3] == 0) + memset(&sub->src.addr, 0, sizeof(sub->src.addr)); + else if (super->src.addr.type == PF_ADDR_ADDRMASK && + sub->src.addr.type == PF_ADDR_ADDRMASK && + super->src.neg == sub->src.neg && + super->af == sub->af && + unmask(&super->src.addr.v.a.mask, super->af) < + unmask(&sub->src.addr.v.a.mask, sub->af) && + super->src.addr.v.a.addr.addr32[0] == + (sub->src.addr.v.a.addr.addr32[0] & + super->src.addr.v.a.mask.addr32[0]) && + super->src.addr.v.a.addr.addr32[1] == + (sub->src.addr.v.a.addr.addr32[1] & + super->src.addr.v.a.mask.addr32[1]) && + super->src.addr.v.a.addr.addr32[2] == + (sub->src.addr.v.a.addr.addr32[2] & + super->src.addr.v.a.mask.addr32[2]) && + super->src.addr.v.a.addr.addr32[3] == + (sub->src.addr.v.a.addr.addr32[3] & + super->src.addr.v.a.mask.addr32[3])) { + /* sub->src.addr is a subset of super->src.addr/mask */ + memcpy(&sub->src.addr, &super->src.addr, sizeof(sub->src.addr)); + } + + if (super->dst.addr.type == PF_ADDR_ADDRMASK && !super->dst.neg && + !sub->dst.neg && super->dst.addr.v.a.mask.addr32[0] == 0 && + super->dst.addr.v.a.mask.addr32[1] == 0 && + super->dst.addr.v.a.mask.addr32[2] == 0 && + super->dst.addr.v.a.mask.addr32[3] == 0) + memset(&sub->dst.addr, 0, sizeof(sub->dst.addr)); + else if (super->dst.addr.type == PF_ADDR_ADDRMASK && + sub->dst.addr.type == PF_ADDR_ADDRMASK && + super->dst.neg == sub->dst.neg && + super->af == sub->af && + unmask(&super->dst.addr.v.a.mask, super->af) < + unmask(&sub->dst.addr.v.a.mask, sub->af) && + super->dst.addr.v.a.addr.addr32[0] == + (sub->dst.addr.v.a.addr.addr32[0] & + super->dst.addr.v.a.mask.addr32[0]) && + super->dst.addr.v.a.addr.addr32[1] == + (sub->dst.addr.v.a.addr.addr32[1] & + super->dst.addr.v.a.mask.addr32[1]) && + super->dst.addr.v.a.addr.addr32[2] == + (sub->dst.addr.v.a.addr.addr32[2] & + super->dst.addr.v.a.mask.addr32[2]) && + super->dst.addr.v.a.addr.addr32[3] == + (sub->dst.addr.v.a.addr.addr32[3] & + super->dst.addr.v.a.mask.addr32[3])) { + /* sub->dst.addr is a subset of super->dst.addr/mask */ + memcpy(&sub->dst.addr, &super->dst.addr, sizeof(sub->dst.addr)); + } + + if (super->af == 0) + sub->af = 0; +} + + +void +superblock_free(struct pfctl *pf, struct superblock *block) +{ + struct pf_opt_rule *por; + while ((por = TAILQ_FIRST(&block->sb_rules))) { + TAILQ_REMOVE(&block->sb_rules, por, por_entry); + if (por->por_src_tbl) { + if (por->por_src_tbl->pt_buf) { + pfr_buf_clear(por->por_src_tbl->pt_buf); + free(por->por_src_tbl->pt_buf); + } + free(por->por_src_tbl); + } + if (por->por_dst_tbl) { + if (por->por_dst_tbl->pt_buf) { + pfr_buf_clear(por->por_dst_tbl->pt_buf); + free(por->por_dst_tbl->pt_buf); + } + free(por->por_dst_tbl); + } + free(por); + } + if (block->sb_profiled_block) + superblock_free(pf, block->sb_profiled_block); + free(block); +} + diff --git a/usr.sbin/pfctl/pfctl_osfp.c b/usr.sbin/pfctl/pfctl_osfp.c index 568def0724..4e4f20349c 100644 --- a/usr.sbin/pfctl/pfctl_osfp.c +++ b/usr.sbin/pfctl/pfctl_osfp.c @@ -1,5 +1,4 @@ -/* $OpenBSD: pfctl_osfp.c,v 1.8 2004/02/27 10:42:00 henning Exp $ */ -/* $DragonFly: src/usr.sbin/pfctl/pfctl_osfp.c,v 1.1 2004/09/21 21:25:28 joerg Exp $ */ +/* $OpenBSD: pfctl_osfp.c,v 1.15 2006/12/13 05:10:15 itojun Exp $ */ /* * Copyright (c) 2003 Mike Frantzen @@ -24,6 +23,10 @@ #include #include +#include +#include +#include + #include #include #include @@ -98,8 +101,8 @@ pfctl_file_fingerprints(int dev, int opts, const char *fp_filename) pfctl_flush_my_fingerprints(&classes); - if ((in = fopen(fp_filename, "r")) == NULL) { - warn("fopen(%s)", fp_filename); + if ((in = pfctl_fopen(fp_filename, "r")) == NULL) { + warn("%s", fp_filename); return (1); } class = version = subtype = desc = tcpopts = NULL; @@ -241,6 +244,10 @@ pfctl_file_fingerprints(int dev, int opts, const char *fp_filename) sizeof(fp.fp_os.fp_subtype_nm)); add_fingerprint(dev, opts, &fp); + + fp.fp_flags |= (PF_OSFP_DF | PF_OSFP_INET6); + fp.fp_psize += sizeof(struct ip6_hdr) - sizeof(struct ip); + add_fingerprint(dev, opts, &fp); } if (class) @@ -251,6 +258,8 @@ pfctl_file_fingerprints(int dev, int opts, const char *fp_filename) free(subtype); if (desc) free(desc); + if (tcpopts) + free(tcpopts); fclose(in); @@ -277,9 +286,9 @@ pfctl_flush_my_fingerprints(struct name_list *list) while ((nm = LIST_FIRST(list)) != NULL) { LIST_REMOVE(nm, nm_entry); pfctl_flush_my_fingerprints(&nm->nm_sublist); - fingerprint_count--; free(nm); } + fingerprint_count = 0; class_count = 0; } @@ -349,7 +358,7 @@ pfctl_get_fingerprint(const char *name) if ((wr_name = strdup(name)) == NULL) err(1, "malloc"); - if ((ptr = index(wr_name, ' ')) == NULL) { + if ((ptr = strchr(wr_name, ' ')) == NULL) { free(wr_name); return (PF_OSFP_NOMATCH); } @@ -509,9 +518,9 @@ found: strlcat(buf, " ", len); strlcat(buf, version_name, len); if (subtype_name) { - if (index(version_name, ' ')) + if (strchr(version_name, ' ')) strlcat(buf, " ", len); - else if (index(version_name, '.') && + else if (strchr(version_name, '.') && isdigit(*subtype_name)) strlcat(buf, ".", len); else @@ -626,7 +635,7 @@ add_fingerprint(int dev, int opts, struct pf_osfp_ioctl *fp) fingerprint_count++; #ifdef FAKE_PF_KERNEL - /* Linked to the sys/net/pf/pf_osfp.c. Call pf_osfp_add() */ + /* Linked to the sys/net/pf_osfp.c. Call pf_osfp_add() */ if ((errno = pf_osfp_add(fp))) #else if ((opts & PF_OPT_NOACTION) == 0 && ioctl(dev, DIOCOSFPADD, fp)) @@ -703,9 +712,8 @@ fingerprint_name_entry(struct name_list *list, char *name) nm_entry = calloc(1, sizeof(*nm_entry)); if (nm_entry == NULL) err(1, "calloc"); - LIST_INIT(&nm_entry->nm_sublist); - strlcpy(nm_entry->nm_name, name, - sizeof(nm_entry->nm_name)); + LIST_INIT(&nm_entry->nm_sublist); + strlcpy(nm_entry->nm_name, name, sizeof(nm_entry->nm_name)); } LIST_INSERT_HEAD(list, nm_entry, nm_entry); return (nm_entry); @@ -764,7 +772,6 @@ sort_name_list(int opts, struct name_list *nml) LIST_INSERT_AFTER(nmlast, nm, nm_entry); nmlast = nm; } - return; } /* parse the next integer in a formatted config file line */ diff --git a/usr.sbin/pfctl/pfctl_parser.c b/usr.sbin/pfctl/pfctl_parser.c index 1b8d8687d3..4d635cc5d1 100644 --- a/usr.sbin/pfctl/pfctl_parser.c +++ b/usr.sbin/pfctl/pfctl_parser.c @@ -1,5 +1,4 @@ -/* $OpenBSD: pfctl_parser.c,v 1.194.2.1 2004/05/05 04:00:50 brad Exp $ */ -/* $DragonFly: src/usr.sbin/pfctl/pfctl_parser.c,v 1.3 2008/04/11 18:21:49 dillon Exp $ */ +/* $OpenBSD: pfctl_parser.c,v 1.234 2006/10/31 23:46:24 mcbride Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -32,9 +31,11 @@ * */ -#include +#include #include #include +#include +#include #include #include #include @@ -53,6 +54,7 @@ #include #include #include +#include #include "pfctl_parser.h" #include "pfctl.h" @@ -65,6 +67,7 @@ void print_fromto(struct pf_rule_addr *, pf_osfp_t, struct pf_rule_addr *, u_int8_t, u_int8_t, int); int ifa_skip_if(const char *filter, struct node_host *p); +struct node_host *ifa_grouplookup(const char *, int); struct node_host *host_if(const char *, int); static struct node_host *host_v4(const char *); struct node_host *host_v6(const char *, int); @@ -184,6 +187,7 @@ const struct pf_timeout pf_timeouts[] = { { "tcp.closing", PFTM_TCP_CLOSING }, { "tcp.finwait", PFTM_TCP_FIN_WAIT }, { "tcp.closed", PFTM_TCP_CLOSED }, + { "tcp.tsdiff", PFTM_TS_DIFF }, { "udp.first", PFTM_UDP_FIRST_PACKET }, { "udp.single", PFTM_UDP_SINGLE }, { "udp.multiple", PFTM_UDP_MULTIPLE }, @@ -364,13 +368,13 @@ print_fromto(struct pf_rule_addr *src, pf_osfp_t osfp, struct pf_rule_addr *dst, PF_AZERO(&src->addr.v.a.mask, AF_INET6) && PF_AZERO(&dst->addr.v.a.addr, AF_INET6) && PF_AZERO(&dst->addr.v.a.mask, AF_INET6) && - !src->not && !dst->not && + !src->neg && !dst->neg && !src->port_op && !dst->port_op && osfp == PF_OSFP_ANY) printf(" all"); else { printf(" from "); - if (src->not) + if (src->neg) printf("! "); print_addr(&src->addr, af, verbose); if (src->port_op) @@ -382,7 +386,7 @@ print_fromto(struct pf_rule_addr *src, pf_osfp_t osfp, struct pf_rule_addr *dst, sizeof(buf))); printf(" to "); - if (dst->not) + if (dst->neg) printf("! "); print_addr(&dst->addr, af, verbose); if (dst->port_op) @@ -470,16 +474,19 @@ print_pool(struct pf_pool *pool, u_int16_t p1, u_int16_t p2, } const char *pf_reasons[PFRES_MAX+1] = PFRES_NAMES; +const char *pf_lcounters[LCNT_MAX+1] = LCNT_NAMES; const char *pf_fcounters[FCNT_MAX+1] = FCNT_NAMES; const char *pf_scounters[FCNT_MAX+1] = FCNT_NAMES; void print_status(struct pf_status *s, int opts) { - char statline[80]; + char statline[80]; const char *running; - time_t runtime; - int i; + time_t runtime; + int i; + char buf[PF_MD5_DIGEST_LENGTH * 2 + 1]; + static const char hex[] = "0123456789abcdef"; runtime = time(NULL) - s->since; running = s->running ? "Enabled" : "Disabled"; @@ -513,7 +520,18 @@ print_status(struct pf_status *s, int opts) printf("%15s\n\n", "Debug: Loud"); break; } - printf("Hostid: 0x%08x\n\n", ntohl(s->hostid)); + + if (opts & PF_OPT_VERBOSE) { + printf("Hostid: 0x%08x\n", ntohl(s->hostid)); + + for (i = 0; i < PF_MD5_DIGEST_LENGTH; i++) { + buf[i + i] = hex[s->pf_chksum[i] >> 4]; + buf[i + i + 1] = hex[s->pf_chksum[i] & 0x0f]; + } + buf[i + i] = '\0'; + printf("Checksum: 0x%s\n\n", buf); + } + if (s->ifname[0] != 0) { printf("Interface Stats for %-16s %5s %16s\n", s->ifname, "IPv4", "IPv6"); @@ -573,6 +591,18 @@ print_status(struct pf_status *s, int opts) else printf("%14s\n", ""); } + if (opts & PF_OPT_VERBOSE) { + printf("Limit Counters\n"); + for (i = 0; i < LCNT_MAX; i++) { + printf(" %-25s %14lld ", pf_lcounters[i], + s->lcounters[i]); + if (runtime > 0) + printf("%14.1f/s\n", + (double)s->lcounters[i] / (double)runtime); + else + printf("%14s\n", ""); + } + } } void @@ -592,7 +622,9 @@ print_src_node(struct pf_src_node *sn, int opts) printf(" -> "); aw.v.a.addr = sn->raddr; print_addr(&aw, sn->af, opts & PF_OPT_VERBOSE2); - printf(" (%d states)\n", sn->states); + printf(" ( states %u, connections %u, rate %u.%u/%us )\n", sn->states, + sn->conn, sn->conn_rate.count / 1000, + (sn->conn_rate.count % 1000) / 100, sn->conn_rate.seconds); if (opts & PF_OPT_VERBOSE) { sec = sn->creation % 60; sn->creation /= 60; @@ -607,7 +639,9 @@ print_src_node(struct pf_src_node *sn, int opts) printf(", expires in %.2u:%.2u:%.2u", sn->expire, min, sec); } - printf(", %u pkts, %u bytes", sn->packets, sn->bytes); + printf(", %llu pkts, %llu bytes", + sn->packets[0] + sn->packets[1], + sn->bytes[0] + sn->bytes[1]); switch (sn->ruletype) { case PF_NAT: if (sn->rule.nr != (uint32_t)(-1)) @@ -627,22 +661,26 @@ print_src_node(struct pf_src_node *sn, int opts) } void -print_rule(struct pf_rule *r, int verbose) +print_rule(struct pf_rule *r, const char *anchor_call, int verbose) { - static const char *actiontypes[] = { "pass", "block", "scrub", "nat", - "no nat", "binat", "no binat", "rdr", "no rdr" }; + static const char *actiontypes[] = { "pass", "block", "scrub", + "no scrub", "nat", "no nat", "binat", "no binat", "rdr", "no rdr" }; static const char *anchortypes[] = { "anchor", "anchor", "anchor", - "nat-anchor", "nat-anchor", "binat-anchor", "binat-anchor", - "rdr-anchor", "rdr-anchor" }; + "anchor", "nat-anchor", "nat-anchor", "binat-anchor", + "binat-anchor", "rdr-anchor", "rdr-anchor" }; int i, opts; if (verbose) printf("@%d ", r->nr); if (r->action > PF_NORDR) printf("action(%d)", r->action); - else if (r->anchorname[0]) - printf("%s %s", anchortypes[r->action], r->anchorname); - else { + else if (anchor_call[0]) { + if (anchor_call[0] == '_') { + printf("%s", anchortypes[r->action]); + } else + printf("%s \"%s\"", anchortypes[r->action], + anchor_call); + } else { printf("%s", actiontypes[r->action]); if (r->natpass) printf(" pass"); @@ -697,10 +735,22 @@ print_rule(struct pf_rule *r, int verbose) printf(" in"); else if (r->direction == PF_OUT) printf(" out"); - if (r->log == 1) + if (r->log) { printf(" log"); - else if (r->log == 2) - printf(" log-all"); + if (r->log & ~PF_LOG || r->logif) { + int count = 0; + + printf(" ("); + if (r->log & PF_LOG_ALL) + printf("%sall", count++ ? ", " : ""); + if (r->log & PF_LOG_SOCKET_LOOKUP) + printf("%suser", count++ ? ", " : ""); + if (r->logif) + printf("%sto pflog%u", count++ ? ", " : "", + r->logif); + printf(")"); + } + } if (r->quick) printf(" quick"); if (r->ifname[0]) { @@ -750,7 +800,11 @@ print_rule(struct pf_rule *r, int verbose) print_flags(r->flags); printf("/"); print_flags(r->flagset); - } + } else if (r->action == PF_PASS && + (!r->proto || r->proto == IPPROTO_TCP) && + !(r->rule_flag & PFRULE_FRAGMENT) && + !anchor_call[0] && r->keep_state) + printf(" flags any"); if (r->type) { const struct icmptypeent *it; @@ -775,7 +829,9 @@ print_rule(struct pf_rule *r, int verbose) } if (r->tos) printf(" tos 0x%2.2x", r->tos); - if (r->keep_state == PF_STATE_NORMAL) + if (!r->keep_state && r->action == PF_PASS && !anchor_call[0]) + printf(" no state"); + else if (r->keep_state == PF_STATE_NORMAL) printf(" keep state"); else if (r->keep_state == PF_STATE_MODULATE) printf(" modulate state"); @@ -805,7 +861,7 @@ print_rule(struct pf_rule *r, int verbose) opts = 1; if (r->rule_flag & PFRULE_SRCTRACK) opts = 1; - if (r->rule_flag & (PFRULE_IFBOUND | PFRULE_GRBOUND)) + if (r->rule_flag & PFRULE_IFBOUND) opts = 1; for (i = 0; !opts && i < PFTM_MAX; ++i) if (r->timeout[i]) @@ -859,30 +915,54 @@ print_rule(struct pf_rule *r, int verbose) printf("max-src-states %u", r->max_src_states); opts = 0; } + if (r->max_src_conn) { + if (!opts) + printf(", "); + printf("max-src-conn %u", r->max_src_conn); + opts = 0; + } + if (r->max_src_conn_rate.limit) { + if (!opts) + printf(", "); + printf("max-src-conn-rate %u/%u", + r->max_src_conn_rate.limit, + r->max_src_conn_rate.seconds); + opts = 0; + } if (r->max_src_nodes) { if (!opts) printf(", "); printf("max-src-nodes %u", r->max_src_nodes); opts = 0; } - if (r->rule_flag & PFRULE_IFBOUND) { + if (r->overload_tblname[0]) { if (!opts) printf(", "); - printf("if-bound"); - opts = 0; + printf("overload <%s>", r->overload_tblname); + if (r->flush) + printf(" flush"); + if (r->flush & PF_FLUSH_GLOBAL) + printf(" global"); } - if (r->rule_flag & PFRULE_GRBOUND) { + if (r->rule_flag & PFRULE_IFBOUND) { if (!opts) printf(", "); - printf("group-bound"); + printf("if-bound"); opts = 0; } for (i = 0; i < PFTM_MAX; ++i) if (r->timeout[i]) { + int j; + if (!opts) printf(", "); opts = 0; - printf("%s %u", pf_timeouts[i].name, + for (j = 0; pf_timeouts[j].name != NULL; + ++j) + if (pf_timeouts[j].timeout == i) + break; + printf("%s %u", pf_timeouts[j].name == NULL ? + "inv.timeout" : pf_timeouts[j].name, r->timeout[i]); } printf(")"); @@ -923,13 +1003,14 @@ print_rule(struct pf_rule *r, int verbose) printf(" !"); printf(" tagged %s", r->match_tagname); } - if (!r->anchorname[0] && (r->action == PF_NAT || + if (r->rtableid != -1) + printf(" rtable %u", r->rtableid); + if (!anchor_call[0] && (r->action == PF_NAT || r->action == PF_BINAT || r->action == PF_RDR)) { printf(" -> "); print_pool(&r->rpool, r->rpool.proxy_port[0], r->rpool.proxy_port[1], r->af, r->action); } - printf("\n"); } void @@ -990,9 +1071,7 @@ set_ipmask(struct node_host *h, u_int8_t b) int i, j = 0; m = &h->addr.v.a.mask; - - for (i = 0; i < 4; i++) - m->addr32[i] = 0; + memset(m, 0, sizeof(*m)); while (b >= 32) { m->addr32[j++] = 0xffffffff; @@ -1044,8 +1123,6 @@ ifa_load(void) { struct ifaddrs *ifap, *ifa; struct node_host *n = NULL, *h = NULL; - struct pfr_buffer b; - const struct pfi_if *p; if (getifaddrs(&ifap) < 0) err(1, "getifaddrs"); @@ -1121,80 +1198,86 @@ ifa_load(void) } } - /* add interface groups, including clonable and dynamic stuff */ - bzero(&b, sizeof(b)); - b.pfrb_type = PFRB_IFACES; - for (;;) { - if (pfr_buf_grow(&b, b.pfrb_size)) - err(1, "ifa_load: pfr_buf_grow"); - b.pfrb_size = b.pfrb_msize; - if (pfi_get_ifaces(NULL, b.pfrb_caddr, &b.pfrb_size, - PFI_FLAG_GROUP)) - err(1, "ifa_load: pfi_get_ifaces"); - if (b.pfrb_size <= b.pfrb_msize) - break; - } - PFRB_FOREACH(p, &b) { - n = calloc(1, sizeof(struct node_host)); - if (n == NULL) - err(1, "address: calloc"); - n->af = AF_LINK; - n->ifa_flags = PF_IFA_FLAG_GROUP; - if (p->pfif_flags & PFI_IFLAG_DYNAMIC) - n->ifa_flags |= PF_IFA_FLAG_DYNAMIC; - if (p->pfif_flags & PFI_IFLAG_CLONABLE) - n->ifa_flags |= PF_IFA_FLAG_CLONABLE; - if (!strcmp(p->pfif_name, "lo")) - n->ifa_flags |= IFF_LOOPBACK; - if ((n->ifname = strdup(p->pfif_name)) == NULL) - err(1, "ifa_load: strdup"); - n->next = NULL; - n->tail = n; - if (h == NULL) - h = n; - else { - h->tail->next = n; - h->tail = n; - } - } - iftab = h; freeifaddrs(ifap); } struct node_host * -ifa_exists(const char *ifa_name, int group_ok) +ifa_exists(const char *ifa_name) { struct node_host *n; - char *p, buf[IFNAMSIZ]; - int group; + struct ifgroupreq ifgr; + int s; - group = !isdigit(ifa_name[strlen(ifa_name) - 1]); - if (group && !group_ok) - return (NULL); if (iftab == NULL) ifa_load(); + /* check wether this is a group */ + if ((s = socket(AF_INET, SOCK_DGRAM, 0)) == -1) + err(1, "socket"); + bzero(&ifgr, sizeof(ifgr)); + strlcpy(ifgr.ifgr_name, ifa_name, sizeof(ifgr.ifgr_name)); + if (ioctl(s, SIOCGIFGMEMB, (caddr_t)&ifgr) == 0) { + /* fake a node_host */ + if ((n = calloc(1, sizeof(*n))) == NULL) + err(1, "calloc"); + if ((n->ifname = strdup(ifa_name)) == NULL) + err(1, "strdup"); + close(s); + return (n); + } + close(s); + for (n = iftab; n; n = n->next) { if (n->af == AF_LINK && !strncmp(n->ifname, ifa_name, IFNAMSIZ)) return (n); } - if (!group) { - /* look for clonable and/or dynamic interface */ - strlcpy(buf, ifa_name, sizeof(buf)); - for (p = buf + strlen(buf) - 1; p > buf && isdigit(*p); p--) - *p = '\0'; - for (n = iftab; n != NULL; n = n->next) - if (n->af == AF_LINK && - !strncmp(n->ifname, buf, IFNAMSIZ)) - break; - if (n != NULL && n->ifa_flags & - (PF_IFA_FLAG_DYNAMIC | PF_IFA_FLAG_CLONABLE)) - return (n); /* XXX */ - } + return (NULL); } +struct node_host * +ifa_grouplookup(const char *ifa_name, int flags) +{ + struct ifg_req *ifg; + struct ifgroupreq ifgr; + int s; + size_t len; + struct node_host *n, *h = NULL; + + if ((s = socket(AF_INET, SOCK_DGRAM, 0)) == -1) + err(1, "socket"); + bzero(&ifgr, sizeof(ifgr)); + strlcpy(ifgr.ifgr_name, ifa_name, sizeof(ifgr.ifgr_name)); + if (ioctl(s, SIOCGIFGMEMB, (caddr_t)&ifgr) == -1) { + close(s); + return (NULL); + } + + len = ifgr.ifgr_len; + if ((ifgr.ifgr_groups = calloc(1, len)) == NULL) + err(1, "calloc"); + if (ioctl(s, SIOCGIFGMEMB, (caddr_t)&ifgr) == -1) + err(1, "SIOCGIFGMEMB"); + + for (ifg = ifgr.ifgr_groups; ifg && len >= sizeof(struct ifg_req); + ifg++) { + len -= sizeof(struct ifg_req); + if ((n = ifa_lookup(ifg->ifgrq_member, flags)) == NULL) + continue; + if (h == NULL) + h = n; + else { + h->tail->next = n; + h->tail = n->tail; + } + } + free(ifgr.ifgr_groups); + close(s); + + return (h); +} + struct node_host * ifa_lookup(const char *ifa_name, int flags) { @@ -1202,6 +1285,9 @@ ifa_lookup(const char *ifa_name, int flags) int got4 = 0, got6 = 0; const char *last_if = NULL; + if ((h = ifa_grouplookup(ifa_name, flags)) != NULL) + return (h); + if (!strncmp(ifa_name, "self", IFNAMSIZ)) ifa_name = NULL; @@ -1305,7 +1391,7 @@ host(const char *s) if ((p = strrchr(s, '/')) != NULL) { mask = strtol(p+1, &q, 0); if (!q || *q || mask > 128 || q == (p+1)) { - fprintf(stderr, "invalid netmask\n"); + fprintf(stderr, "invalid netmask '%s'\n", p); return (NULL); } if ((ps = malloc(strlen(s) - strlen(p) + 1)) == NULL) @@ -1379,7 +1465,7 @@ host_if(const char *s, int mask) free(ps); return (NULL); } - if (ifa_exists(ps, 1) || !strncmp(ps, "self", IFNAMSIZ)) { + if (ifa_exists(ps) || !strncmp(ps, "self", IFNAMSIZ)) { /* interface with this name exists */ h = ifa_lookup(ps, flags); for (n = h; n != NULL && mask > -1; n = n->next) @@ -1459,7 +1545,7 @@ host_dns(const char *s, int v4mask, int v6mask) char *p, *ps; if ((ps = strdup(s)) == NULL) - err(1, "host_if: strdup"); + err(1, "host_dns: strdup"); if ((p = strrchr(ps, ':')) != NULL && !strcmp(p, ":0")) { noalias = 1; *p = '\0'; @@ -1468,8 +1554,10 @@ host_dns(const char *s, int v4mask, int v6mask) hints.ai_family = PF_UNSPEC; hints.ai_socktype = SOCK_STREAM; /* DUMMY */ error = getaddrinfo(ps, NULL, &hints, &res0); - if (error) + if (error) { + free(ps); return (h); + } for (res = res0; res; res = res->ai_next) { if (res->ai_family != AF_INET && @@ -1593,33 +1681,28 @@ append_addr_host(struct pfr_buffer *b, struct node_host *n, int test, int not) } int -pfctl_add_trans(struct pfr_buffer *buf, int rs_num, const char *anchor, - const char *ruleset) +pfctl_add_trans(struct pfr_buffer *buf, int rs_num, const char *anchor) { struct pfioc_trans_e trans; bzero(&trans, sizeof(trans)); trans.rs_num = rs_num; if (strlcpy(trans.anchor, anchor, - sizeof(trans.anchor)) >= sizeof(trans.anchor) || - strlcpy(trans.ruleset, ruleset, - sizeof(trans.ruleset)) >= sizeof(trans.ruleset)) + sizeof(trans.anchor)) >= sizeof(trans.anchor)) errx(1, "pfctl_add_trans: strlcpy"); return pfr_buf_add(buf, &trans); } u_int32_t -pfctl_get_ticket(struct pfr_buffer *buf, int rs_num, const char *anchor, - const char *ruleset) +pfctl_get_ticket(struct pfr_buffer *buf, int rs_num, const char *anchor) { const struct pfioc_trans_e *p; PFRB_FOREACH(p, buf) - if (rs_num == p->rs_num && !strcmp(anchor, p->anchor) && - !strcmp(ruleset, p->ruleset)) + if (rs_num == p->rs_num && !strcmp(anchor, p->anchor)) return (p->ticket); - errx(1, "pfr_get_ticket: assertion failed"); + errx(1, "pfctl_get_ticket: assertion failed"); } int diff --git a/usr.sbin/pfctl/pfctl_parser.h b/usr.sbin/pfctl/pfctl_parser.h index e42255f940..4a36508af2 100644 --- a/usr.sbin/pfctl/pfctl_parser.h +++ b/usr.sbin/pfctl/pfctl_parser.h @@ -1,5 +1,4 @@ -/* $OpenBSD: pfctl_parser.h,v 1.74 2004/02/10 22:26:56 dhartmei Exp $ */ -/* $DragonFly: src/usr.sbin/pfctl/pfctl_parser.h,v 1.3 2008/04/11 18:21:49 dillon Exp $ */ +/* $OpenBSD: pfctl_parser.h,v 1.86 2006/10/31 23:46:25 mcbride Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -47,12 +46,18 @@ #define PF_OPT_DUMMYACTION 0x0100 #define PF_OPT_DEBUG 0x0200 #define PF_OPT_SHOWALL 0x0400 +#define PF_OPT_OPTIMIZE 0x0800 +#define PF_OPT_MERGE 0x2000 +#define PF_OPT_RECURSE 0x4000 #define PF_TH_ALL 0xFF #define PF_NAT_PROXY_PORT_LOW 50001 #define PF_NAT_PROXY_PORT_HIGH 65535 +#define PF_OPTIMIZE_BASIC 0x0001 +#define PF_OPTIMIZE_PROFILE 0x0002 + #define FCNT_NAMES { \ "searches", \ "inserts", \ @@ -60,24 +65,45 @@ NULL \ } +struct pfr_buffer; /* forward definition */ + + struct pfctl { int dev; int opts; + int optimize; int loadopt; - u_int32_t tticket; /* table ticket */ + int asd; /* anchor stack depth */ + int bn; /* brace number */ + int brace; int tdirty; /* kernel dirty */ - u_int32_t rule_nr; +#define PFCTL_ANCHOR_STACK_DEPTH 64 + struct pf_anchor *astack[PFCTL_ANCHOR_STACK_DEPTH]; struct pfioc_pooladdr paddr; struct pfioc_altq *paltq; struct pfioc_queue *pqueue; struct pfr_buffer *trans; - const char *anchor; + struct pf_anchor *anchor, *alast; const char *ruleset; + + /* 'set foo' options */ + u_int32_t timeout[PFTM_MAX]; + u_int32_t limit[PF_LIMIT_MAX]; + u_int32_t debug; + u_int32_t hostid; + char *ifname; + + u_int8_t timeout_set[PFTM_MAX]; + u_int8_t limit_set[PF_LIMIT_MAX]; + u_int8_t debug_set; + u_int8_t hostid_set; + u_int8_t ifname_set; }; struct node_if { char ifname[IFNAMSIZ]; u_int8_t not; + u_int8_t dynamic; /* antispoof */ u_int ifa_flags; struct node_if *next; struct node_if *tail; @@ -95,10 +121,6 @@ struct node_host { struct node_host *next; struct node_host *tail; }; -/* special flags used by ifa_exists */ -#define PF_IFA_FLAG_GROUP 0x10000 -#define PF_IFA_FLAG_DYNAMIC 0x20000 -#define PF_IFA_FLAG_CLONABLE 0x40000 struct node_os { char *os; @@ -187,13 +209,36 @@ struct node_tinit { /* table initializer */ char *file; }; -struct pfr_buffer; /* forward definition */ -int pfctl_rules(int, char *, int, char *, char *, struct pfr_buffer *); +/* optimizer created tables */ +struct pf_opt_tbl { + char pt_name[PF_TABLE_NAME_SIZE]; + int pt_rulecount; + int pt_generated; + struct node_tinithead pt_nodes; + struct pfr_buffer *pt_buf; +}; +#define PF_OPT_TABLE_PREFIX "__automatic_" + +/* optimizer pf_rule container */ +struct pf_opt_rule { + struct pf_rule por_rule; + struct pf_opt_tbl *por_src_tbl; + struct pf_opt_tbl *por_dst_tbl; + u_int64_t por_profile_count; + TAILQ_ENTRY(pf_opt_rule) por_entry; + TAILQ_ENTRY(pf_opt_rule) por_skip_entry[PF_SKIP_COUNT]; +}; + +TAILQ_HEAD(pf_opt_queue, pf_opt_rule); + +int pfctl_rules(int, char *, FILE *, int, int, char *, struct pfr_buffer *); +int pfctl_optimize_ruleset(struct pfctl *, struct pf_ruleset *); -int pfctl_add_rule(struct pfctl *, struct pf_rule *); +int pfctl_add_rule(struct pfctl *, struct pf_rule *, const char *); int pfctl_add_altq(struct pfctl *, struct pf_altq *); int pfctl_add_pool(struct pfctl *, struct pf_pool *, sa_family_t); +void pfctl_move_pool(struct pf_pool *, struct pf_pool *); void pfctl_clear_pool(struct pf_pool *); int pfctl_set_timeout(struct pfctl *, const char *, int, int); @@ -202,14 +247,15 @@ int pfctl_set_limit(struct pfctl *, const char *, unsigned int); int pfctl_set_logif(struct pfctl *, char *); int pfctl_set_hostid(struct pfctl *, u_int32_t); int pfctl_set_debug(struct pfctl *, char *); +int pfctl_set_interface_flags(struct pfctl *, char *, int, int); int parse_rules(FILE *, struct pfctl *); int parse_flags(const char *); -int pfctl_load_anchors(int, int, struct pfr_buffer *); +int pfctl_load_anchors(int, struct pfctl *, struct pfr_buffer *); void print_pool(struct pf_pool *, u_int16_t, u_int16_t, sa_family_t, int); void print_src_node(struct pf_src_node *, int); -void print_rule(struct pf_rule *, int); +void print_rule(struct pf_rule *, const char *, int); void print_tabledef(const char *, int, int, struct node_tinithead *); void print_status(struct pf_status *, int); @@ -223,8 +269,8 @@ void print_altq(const struct pf_altq *, unsigned, struct node_queue_bw *, void print_queue(const struct pf_altq *, unsigned, struct node_queue_bw *, int, struct node_queue_opt *); -int pfctl_define_table(char *, int, int, const char *, const char *, - struct pfr_buffer *, u_int32_t); +int pfctl_define_table(char *, int, int, const char *, struct pfr_buffer *, + u_int32_t); void pfctl_clear_fingerprints(int, int); int pfctl_file_fingerprints(int, int, const char *); @@ -266,7 +312,7 @@ extern const struct pf_timeout pf_timeouts[]; void set_ipmask(struct node_host *, u_int8_t); int check_netmask(struct node_host *, sa_family_t); void ifa_load(void); -struct node_host *ifa_exists(const char *, int); +struct node_host *ifa_exists(const char *); struct node_host *ifa_lookup(const char *, int); struct node_host *host(const char *); diff --git a/usr.sbin/pfctl/pfctl_qstats.c b/usr.sbin/pfctl/pfctl_qstats.c index d6e95cee83..a9268fe758 100644 --- a/usr.sbin/pfctl/pfctl_qstats.c +++ b/usr.sbin/pfctl/pfctl_qstats.c @@ -1,5 +1,4 @@ -/* $OpenBSD: pfctl_qstats.c,v 1.29 2004/03/15 15:25:44 dhartmei Exp $ */ -/* $DragonFly: src/usr.sbin/pfctl/pfctl_qstats.c,v 1.3 2008/04/06 18:58:14 dillon Exp $ */ +/* $OpenBSD: pfctl_qstats.c,v 1.30 2004/04/27 21:47:32 kjc Exp $ */ /* * Copyright (c) Henning Brauer diff --git a/usr.sbin/pfctl/pfctl_radix.c b/usr.sbin/pfctl/pfctl_radix.c index c3349cd97b..eb9c047ad9 100644 --- a/usr.sbin/pfctl/pfctl_radix.c +++ b/usr.sbin/pfctl/pfctl_radix.c @@ -1,5 +1,4 @@ -/* $OpenBSD: pfctl_radix.c,v 1.24 2004/02/10 18:29:30 henning Exp $ */ -/* $DragonFly: src/usr.sbin/pfctl/pfctl_radix.c,v 1.1 2004/09/21 21:25:28 joerg Exp $ */ +/* $OpenBSD: pfctl_radix.c,v 1.27 2005/05/21 21:03:58 henning Exp $ */ /* * Copyright (c) 2002 Cedric Berger @@ -393,44 +392,6 @@ pfr_tst_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, return (0); } -int -pfr_ina_begin(struct pfr_table *trs, int *ticket, int *ndel, int flags) -{ - struct pfioc_table io; - - bzero(&io, sizeof io); - if (trs != NULL) - io.pfrio_table = *trs; - io.pfrio_flags = flags; - if (ioctl(dev_fd, DIOCRINABEGIN, &io)) - return (-1); - if (ndel != NULL) - *ndel = io.pfrio_ndel; - if (ticket != NULL) - *ticket = io.pfrio_ticket; - return (0); -} - -int -pfr_ina_commit(struct pfr_table *trs, int ticket, int *nadd, int *nchange, - int flags) -{ - struct pfioc_table io; - - bzero(&io, sizeof io); - if (trs != NULL) - io.pfrio_table = *trs; - io.pfrio_flags = flags; - io.pfrio_ticket = ticket; - if (ioctl(dev_fd, DIOCRINACOMMIT, &io)) - return (-1); - if (nadd != NULL) - *nadd = io.pfrio_nadd; - if (nchange != NULL) - *nchange = io.pfrio_nchange; - return (0); -} - int pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size, int *nadd, int *naddr, int ticket, int flags) @@ -460,7 +421,7 @@ pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size, /* interface management code */ int -pfi_get_ifaces(const char *filter, struct pfi_if *buf, int *size, int flags) +pfi_get_ifaces(const char *filter, struct pfi_kif *buf, int *size) { struct pfioc_iface io; @@ -469,7 +430,6 @@ pfi_get_ifaces(const char *filter, struct pfi_if *buf, int *size, int flags) return (-1); } bzero(&io, sizeof io); - io.pfiio_flags = flags; if (filter != NULL) if (strlcpy(io.pfiio_name, filter, sizeof(io.pfiio_name)) >= sizeof(io.pfiio_name)) { @@ -490,7 +450,7 @@ pfi_get_ifaces(const char *filter, struct pfi_if *buf, int *size, int flags) size_t buf_esize[PFRB_MAX] = { 0, sizeof(struct pfr_table), sizeof(struct pfr_tstats), sizeof(struct pfr_addr), sizeof(struct pfr_astats), - sizeof(struct pfi_if), sizeof(struct pfioc_trans_e) + sizeof(struct pfi_kif), sizeof(struct pfioc_trans_e) }; /* @@ -607,7 +567,7 @@ pfr_buf_load(struct pfr_buffer *b, char *file, int nonetwork, if (!strcmp(file, "-")) fp = stdin; else { - fp = fopen(file, "r"); + fp = pfctl_fopen(file, "r"); if (fp == NULL) return (-1); } diff --git a/usr.sbin/pfctl/pfctl_table.c b/usr.sbin/pfctl/pfctl_table.c index 6dcf0e1319..9a7f7da028 100644 --- a/usr.sbin/pfctl/pfctl_table.c +++ b/usr.sbin/pfctl/pfctl_table.c @@ -1,5 +1,4 @@ -/* $OpenBSD: pfctl_table.c,v 1.59 2004/03/15 15:25:44 dhartmei Exp $ */ -/* $DragonFly: src/usr.sbin/pfctl/pfctl_table.c,v 1.1 2004/09/21 21:25:28 joerg Exp $ */ +/* $OpenBSD: pfctl_table.c,v 1.66 2007/03/01 17:20:54 deraadt Exp $ */ /* * Copyright (c) 2002 Cedric Berger @@ -54,7 +53,7 @@ extern void usage(void); static int pfctl_table(int, char *[], char *, const char *, char *, - const char *, const char *, int); + const char *, int); static void print_table(const struct pfr_table *, int, int); static void print_tstats(const struct pfr_tstats *, int); static int load_addr(struct pfr_buffer *, int, char *[], char *, int); @@ -63,8 +62,7 @@ static void print_addrx(const struct pfr_addr *, const struct pfr_addr *, static void print_astats(const struct pfr_astats *, int); static void radix_perror(void); static void xprintf(int, const char *, ...); -static void print_iface(const struct pfi_if *, int); -static void oprintf(int, int, const char *, int *, int); +static void print_iface(const struct pfi_kif *, int); static const char *stats_text[PFR_DIR_MAX][PFR_OP_TABLE_MAX] = { { "In/Block:", "In/Pass:", "In/XPass:" }, @@ -87,7 +85,13 @@ static const char *istats_text[2][2][2] = { #define CREATE_TABLE do { \ table.pfrt_flags |= PFR_TFLAG_PERSIST; \ - RVTEST(pfr_add_tables(&table, 1, &nadd, flags)); \ + if ((!(opts & PF_OPT_NOACTION) || \ + (opts & PF_OPT_DUMMYACTION)) && \ + (pfr_add_tables(&table, 1, &nadd, flags)) && \ + (errno != EPERM)) { \ + radix_perror(); \ + goto _error; \ + } \ if (nadd) { \ warn_namespace_collision(table.pfrt_name); \ xprintf(opts, "%d table created", nadd); \ @@ -98,31 +102,29 @@ static const char *istats_text[2][2][2] = { } while(0) int -pfctl_clear_tables(const char *anchor, const char *ruleset, int opts) +pfctl_clear_tables(const char *anchor, int opts) { - return pfctl_table(0, NULL, NULL, "-F", NULL, anchor, ruleset, opts); + return pfctl_table(0, NULL, NULL, "-F", NULL, anchor, opts); } int -pfctl_show_tables(const char *anchor, const char *ruleset, int opts) +pfctl_show_tables(const char *anchor, int opts) { - return pfctl_table(0, NULL, NULL, "-s", NULL, anchor, ruleset, opts); + return pfctl_table(0, NULL, NULL, "-s", NULL, anchor, opts); } int pfctl_command_tables(int argc, char *argv[], char *tname, - const char *command, char *file, const char *anchor, const char *ruleset, - int opts) + const char *command, char *file, const char *anchor, int opts) { if (tname == NULL || command == NULL) usage(); - return pfctl_table(argc, argv, tname, command, file, anchor, ruleset, - opts); + return pfctl_table(argc, argv, tname, command, file, anchor, opts); } int pfctl_table(int argc, char *argv[], char *tname, const char *command, - char *file, const char *anchor, const char *ruleset, int opts) + char *file, const char *anchor, int opts) { struct pfr_table table; struct pfr_buffer b, b2; @@ -147,9 +149,7 @@ pfctl_table(int argc, char *argv[], char *tname, const char *command, errx(1, "pfctl_table: strlcpy"); } if (strlcpy(table.pfrt_anchor, anchor, - sizeof(table.pfrt_anchor)) >= sizeof(table.pfrt_anchor) || - strlcpy(table.pfrt_ruleset, ruleset, - sizeof(table.pfrt_ruleset)) >= sizeof(table.pfrt_ruleset)) + sizeof(table.pfrt_anchor)) >= sizeof(table.pfrt_anchor)) errx(1, "pfctl_table: strlcpy"); if (!strcmp(command, "-F")) { @@ -175,7 +175,7 @@ pfctl_table(int argc, char *argv[], char *tname, const char *command, break; } - if (opts & PF_OPT_SHOWALL && b.pfrb_size > 0) + if ((opts & PF_OPT_SHOWALL) && b.pfrb_size > 0) pfctl_print_title("TABLES:"); PFRB_FOREACH(p, &b) @@ -254,6 +254,42 @@ pfctl_table(int argc, char *argv[], char *tname, const char *command, if ((opts & PF_OPT_VERBOSE2) || a->pfra_fback) print_addrx(a, NULL, opts & PF_OPT_USEDNS); + } else if (!strcmp(command, "expire")) { + const char *errstr; + u_int lifetime; + + b.pfrb_type = PFRB_ASTATS; + b2.pfrb_type = PFRB_ADDRS; + if (argc != 1 || file != NULL) + usage(); + lifetime = strtonum(*argv, 0, UINT_MAX, &errstr); + if (errstr) + errx(1, "expiry time: %s", errstr); + for (;;) { + pfr_buf_grow(&b, b.pfrb_size); + b.pfrb_size = b.pfrb_msize; + RVTEST(pfr_get_astats(&table, b.pfrb_caddr, + &b.pfrb_size, flags)); + if (b.pfrb_size <= b.pfrb_msize) + break; + } + PFRB_FOREACH(p, &b) + if (time(NULL) - ((struct pfr_astats *)p)->pfras_tzero > + lifetime) + if (pfr_buf_add(&b2, + &((struct pfr_astats *)p)->pfras_a)) + err(1, "duplicate buffer"); + + if (opts & PF_OPT_VERBOSE) + flags |= PFR_FLAG_FEEDBACK; + RVTEST(pfr_del_addrs(&table, b2.pfrb_caddr, b2.pfrb_size, + &ndel, flags)); + xprintf(opts, "%d/%d addresses expired", ndel, b2.pfrb_size); + if (opts & PF_OPT_VERBOSE) + PFRB_FOREACH(a, &b2) + if ((opts & PF_OPT_VERBOSE2) || a->pfra_fback) + print_addrx(a, NULL, + opts & PF_OPT_USEDNS); } else if (!strcmp(command, "show")) { b.pfrb_type = (opts & PF_OPT_VERBOSE) ? PFRB_ASTATS : PFRB_ADDRS; @@ -291,7 +327,7 @@ pfctl_table(int argc, char *argv[], char *tname, const char *command, RVTEST(pfr_tst_addrs(&table, b.pfrb_caddr, b.pfrb_size, &nmatch, flags)); xprintf(opts, "%d/%d addresses match", nmatch, b.pfrb_size); - if (opts & PF_OPT_VERBOSE && !(opts & PF_OPT_VERBOSE2)) + if ((opts & PF_OPT_VERBOSE) && !(opts & PF_OPT_VERBOSE2)) PFRB_FOREACH(a, &b) if (a->pfra_fback == PFR_FB_MATCH) print_addrx(a, NULL, @@ -339,8 +375,6 @@ print_table(const struct pfr_table *ta, int verbose, int debug) ta->pfrt_name); if (ta->pfrt_anchor[0]) printf("\t%s", ta->pfrt_anchor); - if (ta->pfrt_ruleset[0]) - printf(":%s", ta->pfrt_ruleset); puts(""); } else puts(ta->pfrt_name); @@ -457,16 +491,14 @@ radix_perror(void) int pfctl_define_table(char *name, int flags, int addrs, const char *anchor, - const char *ruleset, struct pfr_buffer *ab, u_int32_t ticket) + struct pfr_buffer *ab, u_int32_t ticket) { struct pfr_table tbl; bzero(&tbl, sizeof(tbl)); if (strlcpy(tbl.pfrt_name, name, sizeof(tbl.pfrt_name)) >= sizeof(tbl.pfrt_name) || strlcpy(tbl.pfrt_anchor, anchor, - sizeof(tbl.pfrt_anchor)) >= sizeof(tbl.pfrt_anchor) || - strlcpy(tbl.pfrt_ruleset, ruleset, sizeof(tbl.pfrt_ruleset)) >= - sizeof(tbl.pfrt_ruleset)) + sizeof(tbl.pfrt_anchor)) >= sizeof(tbl.pfrt_anchor)) errx(1, "pfctl_define_table: strlcpy"); tbl.pfrt_flags = flags; @@ -479,7 +511,7 @@ warn_namespace_collision(const char *filter) { struct pfr_buffer b; const struct pfr_table *t; - const char *name = NULL, *lastcoll = NULL; + const char *name = NULL, *lastcoll; int coll = 0; bzero(&b, sizeof(b)); @@ -542,17 +574,15 @@ int pfctl_show_ifaces(const char *filter, int opts) { struct pfr_buffer b; - const struct pfi_if *p; - int i = 0, f = PFI_FLAG_GROUP|PFI_FLAG_INSTANCE; + const struct pfi_kif *p; + int i = 0; - if (filter != NULL && *filter && !isdigit(filter[strlen(filter)-1])) - f &= ~PFI_FLAG_INSTANCE; bzero(&b, sizeof(b)); b.pfrb_type = PFRB_IFACES; for (;;) { pfr_buf_grow(&b, b.pfrb_size); b.pfrb_size = b.pfrb_msize; - if (pfi_get_ifaces(filter, b.pfrb_caddr, &b.pfrb_size, f)) { + if (pfi_get_ifaces(filter, b.pfrb_caddr, &b.pfrb_size)) { radix_perror(); return (1); } @@ -568,45 +598,30 @@ pfctl_show_ifaces(const char *filter, int opts) } void -print_iface(const struct pfi_if *p, int opts) +print_iface(const struct pfi_kif *p, int opts) { - time_t tzero = p->pfif_tzero; - int flags = (opts & PF_OPT_VERBOSE) ? p->pfif_flags : 0; - int first = 1; + time_t tzero = p->pfik_tzero; int i, af, dir, act; - printf("%s", p->pfif_name); - oprintf(flags, PFI_IFLAG_INSTANCE, "instance", &first, 0); - oprintf(flags, PFI_IFLAG_GROUP, "group", &first, 0); - oprintf(flags, PFI_IFLAG_CLONABLE, "clonable", &first, 0); - oprintf(flags, PFI_IFLAG_DYNAMIC, "dynamic", &first, 0); - oprintf(flags, PFI_IFLAG_ATTACHED, "attached", &first, 1); + printf("%s", p->pfik_name); + if (opts & PF_OPT_VERBOSE) { + if (p->pfik_flags & PFI_IFLAG_SKIP) + printf(" (skip)"); + } printf("\n"); if (!(opts & PF_OPT_VERBOSE2)) return; printf("\tCleared: %s", ctime(&tzero)); printf("\tReferences: [ States: %-18d Rules: %-18d ]\n", - p->pfif_states, p->pfif_rules); + p->pfik_states, p->pfik_rules); for (i = 0; i < 8; i++) { af = (i>>2) & 1; dir = (i>>1) &1; act = i & 1; printf("\t%-12s [ Packets: %-18llu Bytes: %-18llu ]\n", istats_text[af][dir][act], - (unsigned long long)p->pfif_packets[af][dir][act], - (unsigned long long)p->pfif_bytes[af][dir][act]); + (unsigned long long)p->pfik_packets[af][dir][act], + (unsigned long long)p->pfik_bytes[af][dir][act]); } } - -void -oprintf(int flags, int flag, const char *s, int *first, int last) -{ - if (flags & flag) { - printf(*first ? "\t(%s" : ", %s", s); - *first = 0; - } - if (last && !*first) - printf(")"); -} -