char *strnstr(const char *, const char *, size_t) __pure;
#endif
char *strpbrk(const char *, const char *) __pure;
+#if !defined(_KERNEL_VIRTUAL)
char *strrchr(const char *, int) __pure;
+#endif
#if __BSD_VISIBLE
#if !defined(_KERNEL_VIRTUAL)
char *strsep(char **, const char *);
-.\" $OpenBSD: pf.4,v 1.46 2004/02/19 21:29:51 cedric Exp $
-.\" $DragonFly: src/share/man/man4/pf.4,v 1.5 2007/05/19 17:32:12 swildner Exp $
+.\" $OpenBSD: pf.4,v 1.58 2007/02/09 11:39:06 henning Exp $
.\"
.\" Copyright (C) 2001, Kjell Wooding. All rights reserved.
.\"
.Xr pfctl 8 .
.Pp
Manipulations like loading a ruleset that involve more than a single
-ioctl call require a so-called ticket, which prevents the occurrence of
+.Xr ioctl 2
+call require a so-called
+.Em ticket ,
+which prevents the occurrence of
multiple concurrent manipulations.
.Pp
-Fields of ioctl parameter structures that refer to packet data (like
+Fields of
+.Xr ioctl 2
+parameter structures that refer to packet data (like
addresses and ports) are generally expected in network byte-order.
-.Sh FILES
-.Bl -tag -width /dev/pf -compact
-.It Pa /dev/pf
-packet filtering device.
-.El
+.Pp
+Rules and address tables are contained in so-called
+.Em anchors .
+When servicing an
+.Xr ioctl 2
+request, if the anchor field of the argument structure is empty,
+the kernel will use the default anchor (i.e., the main ruleset)
+in operations.
+Anchors are specified by name and may be nested, with components
+separated by
+.Sq /
+characters, similar to how file system hierarchies are laid out.
+The final component of the anchor path is the anchor under which
+operations will be performed.
.Sh IOCTL INTERFACE
-pf supports the following
+.Nm
+supports the following
.Xr ioctl 2
-commands:
+commands, available through
+.Aq Pa net/pfvar.h :
.Bl -tag -width xxxxxx
.It Dv DIOCSTART
-Starts the packet filter.
+Start the packet filter.
.It Dv DIOCSTOP
-Stops the packet filter.
+Stop the packet filter.
.It Dv DIOCSTARTALTQ
-Starts the
-.Xr altq 4
-bandwidth control system.
+Start the ALTQ bandwidth control system (see
+.Xr altq 9 ) .
.It Dv DIOCSTOPALTQ
-Stops the
-.Xr altq 4
-bandwidth control system.
-.It Dv DIOCBEGINADDRS Fa "struct pfioc_pooladdr"
+Stop the ALTQ bandwidth control system.
+.It Dv DIOCBEGINADDRS Fa "struct pfioc_pooladdr *pp"
.Bd -literal
struct pfioc_pooladdr {
u_int32_t action;
u_int8_t r_action;
u_int8_t r_last;
u_int8_t af;
- char anchor[PF_ANCHOR_NAME_SIZE];
- char ruleset[PF_RULESET_NAME_SIZE];
+ char anchor[MAXPATHLEN];
struct pf_pooladdr addr;
};
.Ed
.Pp
-Clears the buffer address pool
-and returns a
+Clear the buffer address pool and get a
.Va ticket
for subsequent
.Dv DIOCADDADDR ,
-.Dv DIOCADDRULE
+.Dv DIOCADDRULE ,
and
.Dv DIOCCHANGERULE
calls.
-.It Dv DIOCADDADDR Fa "struct pfioc_pooladdr"
+.It Dv DIOCADDADDR Fa "struct pfioc_pooladdr *pp"
.Pp
-Adds pool address
+Add the pool address
.Va addr
to the buffer address pool to be used in the following
.Dv DIOCADDRULE
.Dv DIOCCHANGERULE
call.
All other members of the structure are ignored.
-.It Dv DIOCADDRULE Fa "struct pfioc_rule"
+.It Dv DIOCADDRULE Fa "struct pfioc_rule *pr"
.Bd -literal
struct pfioc_rule {
u_int32_t action;
u_int32_t ticket;
u_int32_t pool_ticket;
u_int32_t nr;
- char anchor[PF_ANCHOR_NAME_SIZE];
- char ruleset[PF_RULESET_NAME_SIZE];
+ char anchor[MAXPATHLEN];
+ char anchor_call[MAXPATHLEN];
struct pf_rule rule;
};
.Ed
.Pp
-Adds
+Add
.Va rule
at the end of the inactive ruleset.
-Requires
+This call requires a
.Va ticket
-obtained through preceding
+obtained through a preceding
.Dv DIOCXBEGIN
-call, and
+call and a
.Va pool_ticket
-obtained through
+obtained through a
.Dv DIOCBEGINADDRS
call.
.Dv DIOCADDADDR
must also be called if any pool addresses are required.
The optional
.Va anchor
-and
-.Va ruleset
-names indicate the anchor and ruleset in which to append the rule.
+name indicates the anchor in which to append the rule.
.Va nr
and
.Va action
are ignored.
-.It Dv DIOCADDALTQ Fa "struct pfioc_altq"
-Adds
+.It Dv DIOCADDALTQ Fa "struct pfioc_altq *pa"
+Add an ALTQ discipline or queue.
.Bd -literal
struct pfioc_altq {
u_int32_t action;
u_int32_t ticket;
u_int32_t nr;
- struct pf_altq altq;
+ struct pf_altq altq;
};
.Ed
-.It Dv DIOCGETRULES Fa "struct pfioc_rule"
-Returns
+.It Dv DIOCGETRULES Fa "struct pfioc_rule *pr"
+Get a
.Va ticket
for subsequent
.Dv DIOCGETRULE
-calls and
+calls and the number
.Va nr
of rules in the active ruleset.
-.It Dv DIOCGETRULE Fa "struct pfioc_rule"
-Returns
+.It Dv DIOCGETRULE Fa "struct pfioc_rule *pr"
+Get a
.Va rule
-number
+by its number
.Va nr
-using
+using the
.Va ticket
obtained through a preceding
.Dv DIOCGETRULES
call.
-.It Dv DIOCGETADDRS Fa "struct pfioc_pooladdr"
-Returns
+If
+.Va action
+is set to
+.Dv PF_GET_CLR_CNTR ,
+the per-rule statistics on the requested rule are cleared.
+.It Dv DIOCGETADDRS Fa "struct pfioc_pooladdr *pp"
+Get a
.Va ticket
for subsequent
.Dv DIOCGETADDR
-calls and
+calls and the number
.Va nr
of pool addresses in the rule specified with
.Va r_action ,
.Va r_num ,
-.Va anchor
and
-.Va ruleset .
-.It Dv DIOCGETADDR Fa "struct pfioc_pooladdr"
-Returns pool address
+.Va anchor .
+.It Dv DIOCGETADDR Fa "struct pfioc_pooladdr *pp"
+Get the pool address
.Va addr
-number
+by its number
.Va nr
from the rule specified with
.Va r_action ,
.Va r_num ,
-.Va anchor
and
-.Va ruleset
-using
+.Va anchor
+using the
.Va ticket
obtained through a preceding
.Dv DIOCGETADDRS
call.
-.It Dv DIOCGETALTQS Fa "struct pfioc_altq"
-Returns
+.It Dv DIOCGETALTQS Fa "struct pfioc_altq *pa"
+Get a
.Va ticket
for subsequent
.Dv DIOCGETALTQ
-calls and
+calls and the number
.Va nr
of queues in the active list.
-.It Dv DIOCGETALTQ Fa "struct pfioc_altq"
-Returns
+.It Dv DIOCGETALTQ Fa "struct pfioc_altq *pa"
+Get the queueing discipline
.Va altq
-number
+by its number
.Va nr
-using
+using the
.Va ticket
obtained through a preceding
.Dv DIOCGETALTQS
call.
-.It Dv DIOCGETQSTATS Fa "struct pfioc_qstats"
-Returns statistics on a queue.
+.It Dv DIOCGETQSTATS Fa "struct pfioc_qstats *pq"
+Get the statistics on a queue.
.Bd -literal
struct pfioc_qstats {
u_int32_t ticket;
};
.Ed
.Pp
-A pointer to a buffer of statistics
-.Va buf
+This call fills in a pointer to the buffer of statistics
+.Va buf ,
of length
-.Va nbytes
+.Va nbytes ,
for the queue specified by
.Va nr .
-.It Dv DIOCADDSTATE Fa "struct pfioc_state"
-Adds a state entry.
-.It Dv DIOCGETSTATE Fa "struct pfioc_state"
+.It Dv DIOCGETRULESETS Fa "struct pfioc_ruleset *pr"
+.Bd -literal
+struct pfioc_ruleset {
+ u_int32_t nr;
+ char path[MAXPATHLEN];
+ char name[PF_ANCHOR_NAME_SIZE];
+};
+.Ed
+.Pp
+Get the number
+.Va nr
+of rulesets (i.e., anchors) directly attached to the anchor named by
+.Va path
+for use in subsequent
+.Dv DIOCGETRULESET
+calls.
+Nested anchors, since they are not directly attached to the given
+anchor, will not be included.
+This ioctl returns
+.Er EINVAL
+if the given anchor does not exist.
+.It Dv DIOCGETRULESET Fa "struct pfioc_ruleset *pr"
+Get a ruleset (i.e., an anchor)
+.Va name
+by its number
+.Va nr
+from the given anchor
+.Va path ,
+the maximum number of which can be obtained from a preceding
+.Dv DIOCGETRULESETS
+call.
+This ioctl returns
+.Er EINVAL
+if the given anchor does not exist or
+.Er EBUSY
+if another process is concurrently updating a ruleset.
+.It Dv DIOCADDSTATE Fa "struct pfioc_state *ps"
+Add a state entry.
.Bd -literal
struct pfioc_state {
u_int32_t nr;
struct pf_state state;
};
.Ed
-.Pp
-Extracts the entry with the specified number from the state table.
-.It Dv DIOCKILLSTATES Fa "struct pfioc_state_kill"
-Removes matching entries from the state table.
-Returns the number of killed states in psk_af.
+.It Dv DIOCGETSTATE Fa "struct pfioc_state *ps"
+Extract the entry with the specified number
+.Va nr
+from the state table.
+.It Dv DIOCKILLSTATES Fa "struct pfioc_state_kill *psk"
+Remove matching entries from the state table.
+This ioctl returns the number of killed states in
+.Va psk_af .
.Bd -literal
struct pfioc_state_kill {
- int psk_af;
+ sa_family_t psk_af;
int psk_proto;
struct pf_rule_addr psk_src;
struct pf_rule_addr psk_dst;
char psk_ifname[IFNAMSIZ];
};
.Ed
-.It Dv DIOCCLRSTATES Fa "struct pfioc_state_kill"
-Clears all states.
+.It Dv DIOCCLRSTATES Fa "struct pfioc_state_kill *psk"
+Clear all states.
It works like
.Dv DIOCKILLSTATES ,
-but ignores the psk_af, psk_proto, psk_src and psk_dst fields of the
-.Fa pfioc_state_kill
+but ignores the
+.Va psk_af ,
+.Va psk_proto ,
+.Va psk_src ,
+and
+.Va psk_dst
+fields of the
+.Vt pfioc_state_kill
structure.
-.It Dv DIOCSETSTATUSIF Fa "struct pfioc_if"
+.It Dv DIOCSETSTATUSIF Fa "struct pfioc_if *pi"
+Specify the interface for which statistics are accumulated.
.Bd -literal
struct pfioc_if {
char ifname[IFNAMSIZ];
};
.Ed
-.Pp
-Specifies the interface for which statistics are accumulated.
-.It Dv DIOCGETSTATUS Fa "struct pf_status"
+.It Dv DIOCGETSTATUS Fa "struct pf_status *s"
+Get the internal packet filter statistics.
.Bd -literal
struct pf_status {
u_int64_t counters[PFRES_MAX];
+ u_int64_t lcounters[LCNT_MAX];
u_int64_t fcounters[FCNT_MAX];
u_int64_t scounters[SCNT_MAX];
u_int64_t pcounters[2][2][3];
u_int32_t since;
u_int32_t debug;
char ifname[IFNAMSIZ];
+ u_int8_t pf_chksum[MD5_DIGEST_LENGTH];
};
.Ed
-.Pp
-Gets the internal packet filter statistics.
.It Dv DIOCCLRSTATUS
-Clears the internal packet filter statistics.
-.It Dv DIOCNATLOOK Fa "struct pfioc_natlook"
-Looks up a state table entry by source and destination addresses and ports.
+Clear the internal packet filter statistics.
+.It Dv DIOCNATLOOK Fa "struct pfioc_natlook *pnl"
+Look up a state table entry by source and destination addresses and ports.
.Bd -literal
struct pfioc_natlook {
struct pf_addr saddr;
u_int8_t direction;
};
.Ed
-.It Dv DIOCSETDEBUG Fa "u_int32_t"
-Sets the debug level.
+.It Dv DIOCSETDEBUG Fa "u_int32_t *level"
+Set the debug level.
.Bd -literal
-enum { PF_DEBUG_NONE=0, PF_DEBUG_URGENT=1, PF_DEBUG_MISC=2 };
+enum { PF_DEBUG_NONE, PF_DEBUG_URGENT, PF_DEBUG_MISC,
+ PF_DEBUG_NOISY };
.Ed
-.It Dv DIOCGETSTATES Fa "struct pfioc_states"
+.It Dv DIOCGETSTATES Fa "struct pfioc_states *ps"
+Get state table entries.
.Bd -literal
struct pfioc_states {
int ps_len;
union {
- caddr_t psu_buf;
+ caddr_t psu_buf;
struct pf_state *psu_states;
} ps_u;
#define ps_buf ps_u.psu_buf
#define ps_states ps_u.psu_states
};
.Ed
-.It Dv DIOCCHANGERULE Fa "struct pfioc_rule"
-Adds or removes the
+.Pp
+If
+.Va ps_len
+is non-zero on entry, as many states as possible that can fit into this
+size will be copied into the supplied buffer
+.Va ps_states .
+On exit,
+.Va ps_len
+is always set to the total size required to hold all state table entries
+(i.e., it is set to
+.Li sizeof(struct pf_state) * nr ) .
+.It Dv DIOCCHANGERULE Fa "struct pfioc_rule *pcr"
+Add or remove the
.Va rule
in the ruleset specified by
.Va rule.action .
-.Bd -literal
-enum { PF_CHANGE_ADD_HEAD=1, PF_CHANGE_ADD_TAIL=2,
- PF_CHANGE_ADD_BEFORE=3, PF_CHANGE_ADD_AFTER=4,
- PF_CHANGE_REMOVE=5, PF_CHANGE_GET_TICKET=6 };
-.Ed
.Pp
The type of operation to be performed is indicated by
-.Va action .
+.Va action ,
+which can be any of the following:
+.Bd -literal
+enum { PF_CHANGE_NONE, PF_CHANGE_ADD_HEAD, PF_CHANGE_ADD_TAIL,
+ PF_CHANGE_ADD_BEFORE, PF_CHANGE_ADD_AFTER,
+ PF_CHANGE_REMOVE, PF_CHANGE_GET_TICKET };
+.Ed
.Pp
.Va ticket
-must be set to the value obtained with PF_CHANGE_GET_TICKET
-for all actions except PF_CHANGE_GET_TICKET.
+must be set to the value obtained with
+.Dv PF_CHANGE_GET_TICKET
+for all actions except
+.Dv PF_CHANGE_GET_TICKET .
.Va pool_ticket
must be set to the value obtained with the
.Dv DIOCBEGINADDRS
-call for all actions except PF_CHANGE_REMOVE and PF_CHANGE_GET_TICKET.
-.Pp
-.Va anchor
+call for all actions except
+.Dv PF_CHANGE_REMOVE
and
-.Va ruleset
-indicate which anchor and ruleset the operation applies to.
+.Dv PF_CHANGE_GET_TICKET .
+.Va anchor
+indicates to which anchor the operation applies.
.Va nr
-indicates the rule number against which PF_CHANGE_ADD_BEFORE,
-PF_CHANGE_ADD_AFTER or PF_CHANGE_REMOVE actions are applied.
-.It Dv DIOCCHANGEADDR Fa "struct pfioc_pooladdr"
-Adds or removes a pool address
+indicates the rule number against which
+.Dv PF_CHANGE_ADD_BEFORE ,
+.Dv PF_CHANGE_ADD_AFTER ,
+or
+.Dv PF_CHANGE_REMOVE
+actions are applied.
+.\" It Dv DIOCCHANGEALTQ Fa "struct pfioc_altq *pcr"
+.It Dv DIOCCHANGEADDR Fa "struct pfioc_pooladdr *pca"
+Add or remove the pool address
.Va addr
-from a rule specified with
+from the rule specified by
.Va r_action ,
.Va r_num ,
-.Va anchor
and
-.Va ruleset .
-.It Dv DIOCSETTIMEOUT Fa "struct pfioc_tm"
+.Va anchor .
+.It Dv DIOCSETTIMEOUT Fa "struct pfioc_tm *pt"
.Bd -literal
struct pfioc_tm {
int timeout;
int seconds;
};
.Ed
-.It Dv DIOCGETTIMEOUT Fa "struct pfioc_tm"
+.Pp
+Set the state timeout of
+.Va timeout
+to
+.Va seconds .
+The old value will be placed into
+.Va seconds .
+For possible values of
+.Va timeout ,
+consult the
+.Dv PFTM_*
+values in
+.Aq Pa net/pfvar.h .
+.It Dv DIOCGETTIMEOUT Fa "struct pfioc_tm *pt"
+Get the state timeout of
+.Va timeout .
+The value will be placed into the
+.Va seconds
+field.
.It Dv DIOCCLRRULECTRS
Clear per-rule statistics.
-.It Dv DIOCSETLIMIT Fa "struct pfioc_limit"
-Sets hard limits on the memory pools used by the packet filter.
+.It Dv DIOCSETLIMIT Fa "struct pfioc_limit *pl"
+Set the hard limits on the memory pools used by the packet filter.
.Bd -literal
struct pfioc_limit {
int index;
unsigned limit;
};
+
+enum { PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS,
+ PF_LIMIT_TABLES, PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_MAX };
.Ed
-.It Dv DIOCGETLIMIT Fa "struct pfioc_limit"
-.It Dv DIOCRCLRTABLES Fa "struct pfioc_table"
+.It Dv DIOCGETLIMIT Fa "struct pfioc_limit *pl"
+Get the hard
+.Va limit
+for the memory pool indicated by
+.Va index .
+.It Dv DIOCRCLRTABLES Fa "struct pfioc_table *io"
Clear all tables.
-All the IOCTLs that manipulate radix tables
+All the ioctls that manipulate radix tables
use the same structure described below.
For
-.Dv DIOCRCLRTABLES, pfrio_ndel contains on exit the number
-of tables deleted.
+.Dv DIOCRCLRTABLES ,
+.Va pfrio_ndel
+contains on exit the number of tables deleted.
.Bd -literal
struct pfioc_table {
- struct pfr_table pfrio_table;
- void *pfrio_buffer;
- int pfrio_esize;
- int pfrio_size;
- int pfrio_size2;
- int pfrio_nadd;
- int pfrio_ndel;
- int pfrio_nchange;
- int pfrio_flags;
- int pfrio_ticket;
+ struct pfr_table pfrio_table;
+ void *pfrio_buffer;
+ int pfrio_esize;
+ int pfrio_size;
+ int pfrio_size2;
+ int pfrio_nadd;
+ int pfrio_ndel;
+ int pfrio_nchange;
+ int pfrio_flags;
+ u_int32_t pfrio_ticket;
};
#define pfrio_exists pfrio_nadd
#define pfrio_nzero pfrio_nadd
#define pfrio_setflag pfrio_size2
#define pfrio_clrflag pfrio_nadd
.Ed
-.It Dv DIOCRADDTABLES Fa "struct pfioc_table"
-Creates one or more tables.
-On entry, pfrio_buffer[pfrio_size] contains a table of pfr_table structures.
-On exit, pfrio_nadd contains the number of tables effectively created.
+.It Dv DIOCRADDTABLES Fa "struct pfioc_table *io"
+Create one or more tables.
+On entry,
+.Va pfrio_buffer
+must point to an array of
+.Vt struct pfr_table
+containing at least
+.Vt pfrio_size
+elements.
+.Vt pfrio_esize
+must be the size of
+.Vt struct pfr_table .
+On exit,
+.Va pfrio_nadd
+contains the number of tables effectively created.
.Bd -literal
struct pfr_table {
- char pfrt_anchor[PF_ANCHOR_NAME_SIZE];
- char pfrt_ruleset[PF_RULESET_NAME_SIZE];
- char pfrt_name[PF_TABLE_NAME_SIZE];
- u_int32_t pfrt_flags;
- u_int8_t pfrt_fback;
+ char pfrt_anchor[MAXPATHLEN];
+ char pfrt_name[PF_TABLE_NAME_SIZE];
+ u_int32_t pfrt_flags;
+ u_int8_t pfrt_fback;
};
.Ed
-.It Dv DIOCRDELTABLES Fa "struct pfioc_table"
-Deletes one or more tables.
-On entry, pfrio_buffer[pfrio_size] contains a table of pfr_table structures.
-On exit, pfrio_nadd contains the number of tables effectively deleted.
-.It Dv DIOCRGETTABLES Fa "struct pfioc_table"
+.It Dv DIOCRDELTABLES Fa "struct pfioc_table *io"
+Delete one or more tables.
+On entry,
+.Va pfrio_buffer
+must point to an array of
+.Vt struct pfr_table
+containing at least
+.Vt pfrio_size
+elements.
+.Vt pfrio_esize
+must be the size of
+.Vt struct pfr_table .
+On exit,
+.Va pfrio_ndel
+contains the number of tables effectively deleted.
+.It Dv DIOCRGETTABLES Fa "struct pfioc_table *io"
Get the list of all tables.
-On entry, pfrio_buffer[pfrio_size] contains a valid writeable buffer for
-pfr_table structures.
-On exit, pfrio_size contains the number of tables written into the buffer.
+On entry,
+.Va pfrio_buffer[pfrio_size]
+contains a valid writeable buffer for
+.Vt pfr_table
+structures.
+On exit,
+.Va pfrio_size
+contains the number of tables written into the buffer.
If the buffer is too small, the kernel does not store anything but just
returns the required buffer size, without error.
-.It Dv DIOCRGETTSTATS Fa "struct pfioc_table"
-Like
-.Dv DIOCRGETTABLES ,
-but returns an array of pfr_tstats structures.
+.It Dv DIOCRGETTSTATS Fa "struct pfioc_table *io"
+This call is like
+.Dv DIOCRGETTABLES
+but is used to get an array of
+.Vt pfr_tstats
+structures.
.Bd -literal
struct pfr_tstats {
- struct pfr_table pfrts_t;
- u_int64_t pfrts_packets
- [PFR_DIR_MAX][PFR_OP_TABLE_MAX];
- u_int64_t pfrts_bytes
- [PFR_DIR_MAX][PFR_OP_TABLE_MAX];
- u_int64_t pfrts_match;
- u_int64_t pfrts_nomatch;
- long pfrts_tzero;
- int pfrts_cnt;
- int pfrts_refcnt[PFR_REFCNT_MAX];
+ struct pfr_table pfrts_t;
+ u_int64_t pfrts_packets
+ [PFR_DIR_MAX][PFR_OP_TABLE_MAX];
+ u_int64_t pfrts_bytes
+ [PFR_DIR_MAX][PFR_OP_TABLE_MAX];
+ u_int64_t pfrts_match;
+ u_int64_t pfrts_nomatch;
+ long pfrts_tzero;
+ int pfrts_cnt;
+ int pfrts_refcnt[PFR_REFCNT_MAX];
};
-#define pfrts_name pfrts_t.pfrt_name
-#define pfrts_flags pfrts_t.pfrt_flags
+#define pfrts_name pfrts_t.pfrt_name
+#define pfrts_flags pfrts_t.pfrt_flags
.Ed
-.It Dv DIOCRCLRTSTATS Fa "struct pfioc_table"
-Clears the statistics of one or more tables.
-On entry, pfrio_buffer[pfrio_size] contains a table of pfr_table structures.
-On exit, pfrio_nzero contains the number of tables effectively cleared.
-.It Dv DIOCRCLRADDRS Fa "struct pfioc_table"
+.It Dv DIOCRCLRTSTATS Fa "struct pfioc_table *io"
+Clear the statistics of one or more tables.
+On entry,
+.Va pfrio_buffer
+must point to an array of
+.Vt struct pfr_table
+containing at least
+.Vt pfrio_size
+elements.
+.Vt pfrio_esize
+must be the size of
+.Vt struct pfr_table .
+On exit,
+.Va pfrio_nzero
+contains the number of tables effectively cleared.
+.It Dv DIOCRCLRADDRS Fa "struct pfioc_table *io"
Clear all addresses in a table.
-On entry, pfrio_table contains the table to clear.
-On exit, pfrio_ndel contains the number of addresses removed.
-.It Dv DIOCRADDADDRS Fa "struct pfioc_table"
+On entry,
+.Va pfrio_table
+contains the table to clear.
+On exit,
+.Va pfrio_ndel
+contains the number of addresses removed.
+.It Dv DIOCRADDADDRS Fa "struct pfioc_table *io"
Add one or more addresses to a table.
-On entry, pfrio_table contains the table id and pfrio_buffer[pfrio_size]
-contains the list of pfr_addr structures to add.
-On exit, pfrio_nadd contains the number of addresses effectively added.
+On entry,
+.Va pfrio_table
+contains the table ID and
+.Va pfrio_buffer
+must point to an array of
+.Vt struct pfr_addr
+containing at least
+.Vt pfrio_size
+elements to add to the table.
+.Vt pfrio_esize
+must be the size of
+.Vt struct pfr_addr .
+On exit,
+.Va pfrio_nadd
+contains the number of addresses effectively added.
.Bd -literal
struct pfr_addr {
- union {
- struct in_addr _pfra_ip4addr;
- struct in6_addr _pfra_ip6addr;
- } pfra_u;
- u_int8_t pfra_af;
- u_int8_t pfra_net;
- u_int8_t pfra_not;
- u_int8_t pfra_fback;
+ union {
+ struct in_addr _pfra_ip4addr;
+ struct in6_addr _pfra_ip6addr;
+ } pfra_u;
+ u_int8_t pfra_af;
+ u_int8_t pfra_net;
+ u_int8_t pfra_not;
+ u_int8_t pfra_fback;
};
#define pfra_ip4addr pfra_u._pfra_ip4addr
#define pfra_ip6addr pfra_u._pfra_ip6addr
.Ed
-.It Dv DIOCRDELADDRS Fa "struct pfioc_table"
+.It Dv DIOCRDELADDRS Fa "struct pfioc_table *io"
Delete one or more addresses from a table.
-On entry, pfrio_table contains the table id and pfrio_buffer[pfrio_size]
-contains the list of pfr_addr structures to delete.
-On exit, pfrio_ndel contains the number of addresses effectively deleted.
-.It Dv DIOCRSETADDRS Fa "struct pfioc_table"
+On entry,
+.Va pfrio_table
+contains the table ID and
+.Va pfrio_buffer
+must point to an array of
+.Vt struct pfr_addr
+containing at least
+.Vt pfrio_size
+elements to delete from the table.
+.Vt pfrio_esize
+must be the size of
+.Vt struct pfr_addr .
+On exit,
+.Va pfrio_ndel
+contains the number of addresses effectively deleted.
+.It Dv DIOCRSETADDRS Fa "struct pfioc_table *io"
Replace the content of a table by a new address list.
This is the most complicated command, which uses all the structure members.
-On entry, pfrio_table contains the table id and pfrio_buffer[pfrio_size]
-contains the new list of pfr_addr structures.
-In addition to that, if size2 is nonzero, pfrio_buffer[pfrio_size..pfrio_size2]
-must be a writeable buffer, into which the kernel can copy the addresses that
-have been deleted during the replace operation.
-On exit, pfrio_ndel, pfrio_nadd and pfrio_nchange contain the number of
-addresses deleted, added and changed by the kernel.
-If pfrio_size2 was set on
-entry, pfrio_size2 will point to the size of the buffer used, exactly like
+.Pp
+On entry,
+.Va pfrio_table
+contains the table ID and
+.Va pfrio_buffer
+must point to an array of
+.Vt struct pfr_addr
+containing at least
+.Vt pfrio_size
+elements which become the new contents of the table.
+.Vt pfrio_esize
+must be the size of
+.Vt struct pfr_addr .
+Additionally, if
+.Va pfrio_size2
+is non-zero,
+.Va pfrio_buffer[pfrio_size..pfrio_size2]
+must be a writeable buffer, into which the kernel can copy the
+addresses that have been deleted during the replace operation.
+On exit,
+.Va pfrio_ndel ,
+.Va pfrio_nadd ,
+and
+.Va pfrio_nchange
+contain the number of addresses deleted, added, and changed by the
+kernel.
+If
+.Va pfrio_size2
+was set on entry,
+.Va pfrio_size2
+will point to the size of the buffer used, exactly like
.Dv DIOCRGETADDRS .
-.It Dv DIOCRGETADDRS Fa "struct pfioc_table"
+.It Dv DIOCRGETADDRS Fa "struct pfioc_table *io"
Get all the addresses of a table.
-On entry, pfrio_table contains the table id and pfrio_buffer[pfrio_size]
-contains a valid writeable buffer for pfr_addr structures.
-On exit, pfrio_size contains the number of addresses written into the buffer.
+On entry,
+.Va pfrio_table
+contains the table ID and
+.Va pfrio_buffer[pfrio_size]
+contains a valid writeable buffer for
+.Vt pfr_addr
+structures.
+On exit,
+.Va pfrio_size
+contains the number of addresses written into the buffer.
If the buffer was too small, the kernel does not store anything but just
-return the required buffer size, without returning an error.
-.It Dv DIOCRGETASTATS Fa "struct pfioc_table"
-Like
-.Dv DIOCRGETADDRS ,
-but returns an array of pfr_astats structures.
+returns the required buffer size, without returning an error.
+.It Dv DIOCRGETASTATS Fa "struct pfioc_table *io"
+This call is like
+.Dv DIOCRGETADDRS
+but is used to get an array of
+.Vt pfr_astats
+structures.
.Bd -literal
struct pfr_astats {
- struct pfr_addr pfras_a;
- u_int64_t pfras_packets
- [PFR_DIR_MAX][PFR_OP_ADDR_MAX];
- u_int64_t pfras_bytes
- [PFR_DIR_MAX][PFR_OP_ADDR_MAX];
- long pfras_tzero;
+ struct pfr_addr pfras_a;
+ u_int64_t pfras_packets
+ [PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+ u_int64_t pfras_bytes
+ [PFR_DIR_MAX][PFR_OP_ADDR_MAX];
+ long pfras_tzero;
};
.Ed
-.It Dv DIOCRCLRASTATS Fa "struct pfioc_table"
-Clears the statistics of one or more addresses.
-On entry, pfrio_table contains the table id and pfrio_buffer[pfrio_size]
-contains a table of pfr_addr structures to clear.
-On exit, pfrio_nzero contains the number of addresses effectively cleared.
-.It Dv DIOCRTSTADDRS Fa "struct pfioc_table"
+.It Dv DIOCRCLRASTATS Fa "struct pfioc_table *io"
+Clear the statistics of one or more addresses.
+On entry,
+.Va pfrio_table
+contains the table ID and
+.Va pfrio_buffer
+must point to an array of
+.Vt struct pfr_addr
+containing at least
+.Vt pfrio_size
+elements to be cleared from the table.
+.Vt pfrio_esize
+must be the size of
+.Vt struct pfr_addr .
+On exit,
+.Va pfrio_nzero
+contains the number of addresses effectively cleared.
+.It Dv DIOCRTSTADDRS Fa "struct pfioc_table *io"
Test if the given addresses match a table.
-On entry, pfrio_table contains the table id and pfrio_buffer[pfrio_size]
-contains a table of pfr_addr structures to test.
-On exit, the kernel updates the pfr_addr table by setting the pfra_fback
+On entry,
+.Va pfrio_table
+contains the table ID and
+.Va pfrio_buffer
+must point to an array of
+.Vt struct pfr_addr
+containing at least
+.Vt pfrio_size
+elements, each of which will be tested for a match in the table.
+.Vt pfrio_esize
+must be the size of
+.Vt struct pfr_addr .
+On exit, the kernel updates the
+.Vt pfr_addr
+array by setting the
+.Va pfra_fback
member appropriately.
-.It Dv DIOCRSETTFLAGS Fa "struct pfioc_table"
+.It Dv DIOCRSETTFLAGS Fa "struct pfioc_table *io"
Change the
-.Va const
+.Dv PFR_TFLAG_CONST
or
-.Va persist
-flag of a table.
-On entry, pfrio_buffer[pfrio_size] contains a table of pfr_table structures,
-and pfrio_setflag contains the flags to add, while pfrio_clrflag contains the
-flags to remove.
-On exit, pfrio_nchange and pfrio_ndel contain the number of tables altered
-or deleted by the kernel.
+.Dv PFR_TFLAG_PERSIST
+flags of a table.
+On entry,
+.Va pfrio_buffer
+must point to an array of
+.Vt struct pfr_table
+containing at least
+.Vt pfrio_size
+elements.
+.Va pfrio_esize
+must be the size of
+.Vt struct pfr_table .
+.Va pfrio_setflag
+must contain the flags to add, while
+.Va pfrio_clrflag
+must contain the flags to remove.
+On exit,
+.Va pfrio_nchange
+and
+.Va pfrio_ndel
+contain the number of tables altered or deleted by the kernel.
Yes, tables can be deleted if one removes the
-.Va persist
+.Dv PFR_TFLAG_PERSIST
flag of an unreferenced table.
-.It Dv DIOCRINADEFINE Fa "struct pfioc_table"
+.It Dv DIOCRINADEFINE Fa "struct pfioc_table *io"
Defines a table in the inactive set.
-On entry, pfrio_table contains the table id and pfrio_buffer[pfrio_size]
-contains the list of pfr_addr structures to put in the table.
-A valid ticket must also be supplied to pfrio_ticket.
-On exit, pfrio_nadd contains 0 if the table was already defined in the
-inactive list, or 1 if a new table has been created.
-pfrio_naddr contains the number of addresses effectively put in the table.
-.It Dv DIOCXBEGIN Fa "struct pfioc_trans"
+On entry,
+.Va pfrio_table
+contains the table ID and
+.Va pfrio_buffer[pfrio_size]
+contains an array of
+.Vt pfr_addr
+structures to put in the table.
+A valid ticket must also be supplied to
+.Va pfrio_ticket .
+On exit,
+.Va pfrio_nadd
+contains 0 if the table was already defined in the inactive list
+or 1 if a new table has been created.
+.Va pfrio_naddr
+contains the number of addresses effectively put in the table.
+.It Dv DIOCXBEGIN Fa "struct pfioc_trans *io"
.Bd -literal
-#define PF_RULESET_ALTQ (PF_RULESET_MAX)
-#define PF_RULESET_TABLE (PF_RULESET_MAX+1)
struct pfioc_trans {
- int size; /* number of elements */
- int esize; /* size of each element in bytes */
- struct pfioc_trans_e {
- int rs_num;
- char anchor[PF_ANCHOR_NAME_SIZE];
- char ruleset[PF_RULESET_NAME_SIZE];
- u_int32_t ticket;
- } *array;
+ int size; /* number of elements */
+ int esize; /* size of each element in bytes */
+ struct pfioc_trans_e {
+ int rs_num;
+ char anchor[MAXPATHLEN];
+ u_int32_t ticket;
+ } *array;
};
.Ed
.Pp
-Clears all the inactive rulesets specified in the
-.Fa "struct pfioc_trans_e"
+Clear all the inactive rulesets specified in the
+.Vt pfioc_trans_e
array.
-For each ruleset, a ticket is returned for subsequent "add rule" IOCTLs,
+For each ruleset, a ticket is returned for subsequent "add rule" ioctls,
as well as for the
.Dv DIOCXCOMMIT
and
.Dv DIOCXROLLBACK
calls.
-.It Dv DIOCXCOMMIT Fa "struct pfioc_trans"
+.Pp
+Ruleset types, identified by
+.Va rs_num ,
+include the following:
+.Pp
+.Bl -tag -width PF_RULESET_FILTER -offset ind -compact
+.It Dv PF_RULESET_SCRUB
+Scrub (packet normalization) rules.
+.It Dv PF_RULESET_FILTER
+Filter rules.
+.It Dv PF_RULESET_NAT
+NAT (Network Address Translation) rules.
+.It Dv PF_RULESET_BINAT
+Bidirectional NAT rules.
+.It Dv PF_RULESET_RDR
+Redirect rules.
+.It Dv PF_RULESET_ALTQ
+ALTQ disciplines.
+.It Dv PF_RULESET_TABLE
+Address tables.
+.El
+.It Dv DIOCXCOMMIT Fa "struct pfioc_trans *io"
Atomically switch a vector of inactive rulesets to the active rulesets.
-Implemented as a standard 2-phase commit, which will either fail for all
-rulesets or completely succeed.
+This call is implemented as a standard two-phase commit, which will either
+fail for all rulesets or completely succeed.
All tickets need to be valid.
-Returns
+This ioctl returns
.Er EBUSY
-if a concurrent process is trying to update some of the same rulesets
-concurrently.
-.It Dv DIOCXROLLBACK Fa "struct pfioc_trans"
+if another process is concurrently updating some of the same rulesets.
+.It Dv DIOCXROLLBACK Fa "struct pfioc_trans *io"
Clean up the kernel by undoing all changes that have taken place on the
inactive rulesets since the last
.Dv DIOCXBEGIN .
.Dv DIOCXROLLBACK
will silently ignore rulesets for which the ticket is invalid.
-.It Dv DIOCFPFLUSH
+.It Dv DIOCSETHOSTID Fa "u_int32_t *hostid"
+Set the host ID, which is used by
+.Xr pfsync 4
+to identify which host created state table entries.
+.It Dv DIOCOSFPFLUSH
Flush the passive OS fingerprint table.
-.It Dv DIOCFPADD Fa "struct pf_osfp_ioctl"
+.It Dv DIOCOSFPADD Fa "struct pf_osfp_ioctl *io"
.Bd -literal
struct pf_osfp_ioctl {
struct pf_osfp_entry {
char fp_version_nm[PF_OSFP_LEN];
char fp_subtype_nm[PF_OSFP_LEN];
} fp_os;
- u_int16_t fp_mss;
+ pf_tcpopts_t fp_tcpopts;
u_int16_t fp_wsize;
u_int16_t fp_psize;
- u_int8_t fp_ttl;
+ u_int16_t fp_mss;
+ u_int16_t fp_flags;
+ u_int8_t fp_optcnt;
u_int8_t fp_wscale;
- u_int8_t fp_flags;
+ u_int8_t fp_ttl;
int fp_getnum;
};
.Ed
.Va fp_wsize ,
.Va fp_psize ,
.Va fp_ttl ,
+.Va fp_optcnt ,
and
.Va fp_wscale
-are set to the TCP MSS, the TCP window size, the IP length and the IP TTL of
-the TCP SYN packet respectively.
+are set to the TCP MSS, the TCP window size, the IP length, the IP TTL,
+the number of TCP options, and the TCP window scaling constant of the
+TCP SYN packet, respectively.
+.Pp
The
.Va fp_flags
member is filled according to the
-.In net/pf/pfvar.h
-include file PF_OSFP_* defines.
+.Aq Pa net/pfvar.h
+include file
+.Dv PF_OSFP_*
+defines.
+The
+.Va fp_tcpopts
+member contains packed TCP options.
+Each option uses
+.Dv PF_OSFP_TCPOPT_BITS
+bits in the packed value.
+Options include any of
+.Dv PF_OSFP_TCPOPT_NOP ,
+.Dv PF_OSFP_TCPOPT_SACK ,
+.Dv PF_OSFP_TCPOPT_WSCALE ,
+.Dv PF_OSFP_TCPOPT_MSS ,
+or
+.Dv PF_OSFP_TCPOPT_TS .
+.Pp
The
.Va fp_getnum
-is not used with this ioctl.
+member is not used with this ioctl.
.Pp
-The structure's slack space must be zeroed for correct operation; memset
+The structure's slack space must be zeroed for correct operation;
+.Xr memset 3
the whole structure to zero before filling and sending to the kernel.
-.It Dv DIOCFPGET Fa "struct pf_osfp_ioctl"
-.Bd -literal
-struct pf_osfp_ioctl {
- struct pf_osfp_entry {
- SLIST_ENTRY(pf_osfp_entry) fp_entry;
- pf_osfp_t fp_os;
- char fp_class_nm[PF_OSFP_LEN];
- char fp_version_nm[PF_OSFP_LEN];
- char fp_subtype_nm[PF_OSFP_LEN];
- } fp_os;
- u_int16_t fp_mss;
- u_int16_t fp_wsize;
- u_int16_t fp_psize;
- u_int8_t fp_ttl;
- u_int8_t fp_wscale;
- u_int8_t fp_flags;
- int fp_getnum;
-};
-.Ed
-.Pp
+.It Dv DIOCOSFPGET Fa "struct pf_osfp_ioctl *io"
Get the passive OS fingerprint number
.Va fp_getnum
from the kernel's fingerprint list.
.Va fp_getnum
number until the ioctl returns
.Er EBUSY .
-.It Dv DIOCGETSRCNODES Fa "struct pfioc_src_nodes"
+.It Dv DIOCGETSRCNODES Fa "struct pfioc_src_nodes *psn"
.Bd -literal
struct pfioc_src_nodes {
- int psn_len;
- union {
- caddr_t psu_buf;
- struct pf_src_node *psu_src_nodes;
- } psn_u;
-#define psn_buf psn_u.psu_buf
-#define psn_src_nodes psn_u.psu_src_nodes
+ int psn_len;
+ union {
+ caddr_t psu_buf;
+ struct pf_src_node *psu_src_nodes;
+ } psn_u;
+#define psn_buf psn_u.psu_buf
+#define psn_src_nodes psn_u.psu_src_nodes
};
.Ed
.Pp
-Get the list of source nodes kept by the
-.Ar sticky-address
-and
-.Ar source-track
-options.
+Get the list of source nodes kept by sticky addresses and source
+tracking.
The ioctl must be called once with
.Va psn_len
set to 0.
.Va psn_buf .
The ioctl must then be called again to fill this buffer with the actual
source node data.
-After the ioctl call
+After that call,
.Va psn_len
will be set to the length of the buffer actually used.
-.It Dv DIOCCLRSRCNODES Fa "struct pfioc_table"
+.It Dv DIOCCLRSRCNODES
Clear the tree of source tracking nodes.
-.It Dv DIOCIGETIFACES Fa "struct pfioc_iface"
-Gets the list of interfaces and interface drivers known to
+.It Dv DIOCIGETIFACES Fa "struct pfioc_iface *io"
+Get the list of interfaces and interface drivers known to
.Nm .
-All the IOCTLs that manipulate interfaces
+All the ioctls that manipulate interfaces
use the same structure described below:
.Bd -literal
struct pfioc_iface {
- char pfiio_name[IFNAMSIZ];
- void *pfiio_buffer;
- int pfiio_esize;
- int pfiio_size;
- int pfiio_nzero;
- int pfiio_flags;
+ char pfiio_name[IFNAMSIZ];
+ void *pfiio_buffer;
+ int pfiio_esize;
+ int pfiio_size;
+ int pfiio_nzero;
+ int pfiio_flags;
};
-
-#define PFI_FLAG_GROUP 0x0001 /* gets groups of interfaces */
-#define PFI_FLAG_INSTANCE 0x0002 /* gets single interfaces */
-#define PFI_FLAG_ALLMASK 0x0003
.Ed
.Pp
If not empty,
is the user-supplied buffer for returning the data.
On entry,
.Va pfiio_size
-represents the number of
-.Va pfi_if
+contains the number of
+.Vt pfi_kif
entries that can fit into the buffer.
The kernel will replace this value by the real number of entries it wants
to return.
.Va pfiio_esize
-should be set to sizeof(struct pfi_if).
-.Va pfiio_flags
should be set to
-.Dv PFI_FLAG_GROUP , PFI_FLAG_INSTANCE ,
-or both to tell the kernel to return a group of interfaces
-(drivers, like "fxp"), real interface instances (like "fxp1") or both.
+.Li sizeof(struct pfi_kif) .
+.Pp
The data is returned in the
-.Va pfi_if
+.Vt pfi_kif
structure described below:
.Bd -literal
-struct pfi_if {
- char pfif_name[IFNAMSIZ];
- u_int64_t pfif_packets[2][2][2];
- u_int64_t pfif_bytes[2][2][2];
- u_int64_t pfif_addcnt;
- u_int64_t pfif_delcnt;
- long pfif_tzero;
- int pfif_states;
- int pfif_rules;
- int pfif_flags;
+struct pfi_kif {
+ RB_ENTRY(pfi_kif) pfik_tree;
+ char pfik_name[IFNAMSIZ];
+ u_int64_t pfik_packets[2][2][2];
+ u_int64_t pfik_bytes[2][2][2];
+ u_int32_t pfik_tzero;
+ int pfik_flags;
+ struct pf_state_tree_lan_ext pfik_lan_ext;
+ struct pf_state_tree_ext_gwy pfik_ext_gwy;
+ TAILQ_ENTRY(pfi_kif) pfik_w_states;
+ void *pfik_ah_cookie;
+ struct ifnet *pfik_ifp;
+ struct ifg_group *pfik_group;
+ int pfik_states;
+ int pfik_rules;
+ TAILQ_HEAD(, pfi_dynaddr) pfik_dynaddrs;
};
-
-#define PFI_IFLAG_GROUP 0x0001 /* group of interfaces */
-#define PFI_IFLAG_INSTANCE 0x0002 /* single instance */
-#define PFI_IFLAG_CLONABLE 0x0010 /* clonable group */
-#define PFI_IFLAG_DYNAMIC 0x0020 /* dynamic group */
-#define PFI_IFLAG_ATTACHED 0x0040 /* interface attached */
-#define PFI_IFLAG_REFERENCED 0x0080 /* referenced by rules */
.Ed
-.It Dv DIOCICLRISTATS Fa "struct pfioc_iface"
-Clear the statistics counters of one or more interfaces.
-.Va pfiio_name
-and
-.Va pfrio_flags
-can be used to select which interfaces need to be cleared.
+.It Dv DIOCSETIFFLAG Fa "struct pfioc_iface *io"
+Set the user setable flags (described above) of the
+.Nm
+internal interface description.
The filtering process is the same as for
.Dv DIOCIGETIFACES .
-.Va pfiio_nzero
-will be set by the kernel to the number of interfaces and drivers
-that have been cleared.
+.Bd -literal
+#define PFI_IFLAG_SKIP 0x0100 /* skip filtering on interface */
+.Ed
+.It Dv DIOCCLRIFFLAG Fa "struct pfioc_iface *io"
+Works as
+.Dv DIOCSETIFFLAG
+above but clears the flags.
+.El
+.Sh FILES
+.Bl -tag -width /dev/pf -compact
+.It Pa /dev/pf
+packet filtering device.
.El
.Sh EXAMPLES
The following example demonstrates how to use the
.Dv DIOCNATLOOK
-command to find the internal host/port of a NATed connection.
+command to find the internal host/port of a NATed connection:
.Bd -literal
#include <sys/types.h>
#include <sys/socket.h>
.Xr bridge 4 ,
.Xr pflog 4 ,
.Xr pfsync 4 ,
-.Xr pfctl 8
+.Xr pfctl 8 ,
+.Xr altq 9
.Sh HISTORY
The
.Nm
-.\" $OpenBSD: pflog.4,v 1.7 2004/03/21 19:47:59 miod Exp $
-.\" $DragonFly: src/share/man/man4/pflog.4,v 1.4 2007/07/29 17:27:45 swildner Exp $
+.\" $OpenBSD: pflog.4,v 1.9 2006/10/25 12:51:31 jmc Exp $
.\"
.\" Copyright (c) 2001 Tobias Weingartner
.\" All rights reserved.
interface, or stored to disk using
.Xr pflogd 8 .
.Pp
+The pflog0 interface is created automatically at boot if both
+.Xr pf 4
+and
+.Xr pflogd 8
+are enabled;
+further instances can be created using
+.Xr ifconfig 8 .
+.Pp
Each packet retrieved on this interface has a header associated
with it of length
.Dv PFLOG_HDRLEN .
char ruleset[PF_RULESET_NAME_SIZE];
u_int32_t rulenr;
u_int32_t subrulenr;
+ uid_t uid;
+ pid_t pid;
+ uid_t rule_uid;
+ pid_t rule_pid;
u_int8_t dir;
u_int8_t pad[3];
};
.Ed
.Sh EXAMPLES
+Create a
+.Nm
+interface
+and monitor all packets logged on it:
.Bd -literal -offset indent
# ifconfig pflog0 up
# tcpdump -n -e -ttt -i pflog0
.Ed
.Sh SEE ALSO
-.Xr tcpdump 1 ,
.Xr inet 4 ,
.Xr inet6 4 ,
.Xr netintro 4 ,
.Xr pf 4 ,
.Xr ifconfig 8 ,
-.Xr pflogd 8
+.Xr pflogd 8,
+.Xr tcpdump 1
.Sh HISTORY
The
.Nm
-.\" $OpenBSD: pfsync.4,v 1.14 2004/03/21 19:47:59 miod Exp $
-.\" $DragonFly: src/share/man/man4/pfsync.4,v 1.6 2007/11/03 18:37:42 swildner Exp $
+\" $OpenBSD: pfsync.4,v 1.24 2006/10/23 07:05:49 jmc Exp $
.\"
.\" Copyright (c) 2002 Michael Shalayeff
+.\" Copyright (c) 2003-2004 Ryan McBride
.\" All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.\"
-.Dd April 9, 2007
+.Dd August 5, 2010
.Dt PFSYNC 4
.Os
.Sh NAME
.Nm pfsync
-.Nd packet filter states table logging interface
+.Nd packet filter state table logging interface
.Sh SYNOPSIS
.Cd "device pfsync"
.Sh DESCRIPTION
The maximum number of times this can be done before the update is sent out
is controlled by the
.Ar maxupd
-to ifconfig.
+parameter to ifconfig
(see
.Xr ifconfig 8
-and the example below for more details)
+and the example below for more details).
.Pp
Each packet retrieved on this interface has a header associated
with it of length
.Dv PFSYNC_HDRLEN .
The header indicates the version of the protocol, address family,
-action taken on the following states and the number of state
+action taken on the following states, and the number of state
table entries attached in this packet.
-This structure, defined in
-.In net/pf/if_pfsync.h
-looks like:
+This structure is defined in
+.Aq Pa net/pf/if_pfsync.h
+as:
.Bd -literal -offset indent
struct pfsync_header {
u_int8_t version;
interface, by specifying a synchronisation interface using
.Xr ifconfig 8 .
For example, the following command sets fxp0 as the synchronisation
-interface.
+interface:
.Bd -literal -offset indent
-# ifconfig pfsync0 syncif fxp0
+# ifconfig pfsync0 syncdev fxp0
.Ed
.Pp
-State change messages are sent out on the synchronisation
+By default, state change messages are sent out on the synchronisation
interface using IP multicast packets.
The protocol is IP protocol 240, PFSYNC, and the multicast group
used is 224.0.0.240.
+When a peer address is specified using the
+.Ic syncpeer
+keyword, the peer address is used as a destination for the pfsync traffic,
+and the traffic can then be protected using
+.Xr ipsec 4 .
+In such a configuration, the syncdev should be set to the
+.Xr enc 4
+interface, as this is where the traffic arrives when it is decapsulated,
+e.g.:
+.Bd -literal -offset indent
+# ifconfig pfsync0 syncpeer 10.0.0.2 syncdev enc0
+.Ed
.Pp
-It is important that the synchronisation interface be on a trusted
-network as there is no authentication on the protocol and it would
+It is important that the pfsync traffic be well secured
+as there is no authentication on the protocol and it would
be trivial to spoof packets which create states, bypassing the pf ruleset.
-Ideally, this is a network dedicated to pfsync messages,
-i.e. a crossover cable between two firewalls.
+Either run the pfsync protocol on a trusted network \- ideally a network
+dedicated to pfsync messages such as a crossover cable between two firewalls,
+or specify a peer address and protect the traffic with
+.Xr ipsec 4 .
.Pp
There is a one-to-one correspondence between packets seen by
.Xr bpf 4
.Pa /etc/pf.conf :
.Bd -literal -offset indent
pass quick on { sis2 } proto pfsync
-pass quick on { sis0 sis1 } proto carp keep state
+pass on { sis0 sis1 } proto carp
.Ed
.Pp
If it is preferable that one firewall handle the traffic,
net.inet.carp.preempt=1
.Ed
.Sh SEE ALSO
-.Xr tcpdump 1 ,
.Xr bpf 4 ,
.Xr carp 4 ,
.Xr inet 4 ,
.Xr inet6 4 ,
+.Xr ipsec 4 ,
.Xr netintro 4 ,
.Xr pf 4 ,
.Xr pf.conf 5 ,
.Xr protocols 5 ,
-.Xr ifconfig 8
+.Xr ifconfig 8 ,
+.Xr ifstated 8 ,
+.Xr tcpdump 1
.Sh HISTORY
The
.Nm
libkern/scanc.c standard
libkern/skpc.c standard
libkern/strcat.c standard
-libkern/strchr.c standard
libkern/strcmp.c standard
libkern/strcasecmp.c standard
libkern/fnmatch.c standard
+++ /dev/null
-/*-
- * Copyright (c) 1990, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- */
-
-#include <sys/libkern.h>
-#include <sys/_null.h>
-
-char *
-strchr(const char *str, int c)
-{
- const char *s;
-
- for (s = str; (*s) && (*s != (char)c); ++s);
- return ((*s) ? __DECONST(char *, s) : NULL);
-}
{
cbq_state_t *cbqp = (cbq_state_t *)ifq->altq_disc;
struct rm_class *cl;
+ struct pf_mtag *pf;
int len;
/* grab class set by classifier */
m_freem(m);
return (ENOBUFS);
}
- if (m->m_pkthdr.fw_flags & ALTQ_MBUF_TAGGED)
- cl = clh_to_clp(cbqp, m->m_pkthdr.altq_qid);
+
+ if ((pf = altq_find_pftag(m)) != NULL)
+ cl = clh_to_clp(cbqp, pf->qid);
else
cl = NULL;
if (cl == NULL) {
/*
* FAIRQ - take traffic classified by keep state (hashed into
- * mbuf->m_pkthdr.altq_state_hash) and bucketize it. Fairly extract
- * the first packet from each bucket in a round-robin fashion.
+ * pf->state_hash) and bucketize it. Fairly extract
+ * the first packet from each bucket in a round-robin fashion.
*
* TODO - better overall qlimit support (right now it is per-bucket).
* - NOTE: red etc is per bucket, not overall.
static int fairq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
static struct mbuf *fairq_dequeue(struct ifaltq *, struct mbuf *, int);
-static int fairq_addq(struct fairq_class *, struct mbuf *);
+static int fairq_addq(struct fairq_class *, struct mbuf *, struct pf_mtag *);
static struct mbuf *fairq_getq(struct fairq_class *, uint64_t);
static struct mbuf *fairq_pollq(struct fairq_class *, uint64_t, int *);
static fairq_bucket_t *fairq_selectq(struct fairq_class *, int);
{
struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
struct fairq_class *cl;
+ struct pf_mtag *pf;
int error;
int len;
goto done;
}
- if (m->m_pkthdr.fw_flags & ALTQ_MBUF_TAGGED)
- cl = clh_to_clp(pif, m->m_pkthdr.altq_qid);
+ if ((pf = altq_find_pftag(m)) != NULL)
+ cl = clh_to_clp(pif, pf->qid);
else
cl = NULL;
if (cl == NULL) {
cl->cl_flags |= FARF_HAS_PACKETS;
cl->cl_pktattr = NULL;
len = m_pktlen(m);
- if (fairq_addq(cl, m) != 0) {
+ if (fairq_addq(cl, m, pf) != 0) {
/* drop occurred. mbuf was freed in fairq_addq. */
PKTCNTR_ADD(&cl->cl_dropcnt, len);
error = ENOBUFS;
}
static int
-fairq_addq(struct fairq_class *cl, struct mbuf *m)
+fairq_addq(struct fairq_class *cl, struct mbuf *m, struct pf_mtag *pf)
{
fairq_bucket_t *b;
u_int hindex;
* If the packet doesn't have any keep state put it on the end of
* our queue. XXX this can result in out of order delivery.
*/
- if ((m->m_pkthdr.fw_flags & ALTQ_MBUF_STATE_HASHED) == 0) {
+ if (pf == NULL || (pf->flags & PF_TAG_STATE_HASHED) == 0) {
if (cl->cl_head)
b = cl->cl_head->prev;
else
b = &cl->cl_buckets[0];
} else {
- hindex = m->m_pkthdr.altq_state_hash & cl->cl_nbucket_mask;
+ hindex = pf->state_hash & cl->cl_nbucket_mask;
b = &cl->cl_buckets[hindex];
}
{
struct hfsc_if *hif = (struct hfsc_if *)ifq->altq_disc;
struct hfsc_class *cl;
+ struct pf_mtag *pf;
int len;
/* grab class set by classifier */
return (ENOBUFS);
}
crit_enter();
- if (m->m_pkthdr.fw_flags & ALTQ_MBUF_TAGGED)
- cl = clh_to_clp(hif, m->m_pkthdr.altq_qid);
+ if ((pf = altq_find_pftag(m)) != NULL)
+ cl = clh_to_clp(hif, pf->qid);
else
cl = NULL;
if (cl == NULL || is_a_parent_class(cl)) {
{
struct priq_if *pif = (struct priq_if *)ifq->altq_disc;
struct priq_class *cl;
+ struct pf_mtag *pf;
int error;
int len;
goto done;
}
- if (m->m_pkthdr.fw_flags & ALTQ_MBUF_TAGGED)
- cl = clh_to_clp(pif, m->m_pkthdr.altq_qid);
+ if ((pf = altq_find_pftag(m)) != NULL)
+ cl = clh_to_clp(pif, pf->qid);
else
cl = NULL;
if (cl == NULL) {
int
mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
{
+ struct pf_mtag *pf;
struct mbuf *m0;
void *hdr;
int af;
- if ((m->m_pkthdr.fw_flags & ALTQ_MBUF_TAGGED) == 0)
+ pf = altq_find_pftag(m);
+ if (pf == NULL)
return (0);
- af = m->m_pkthdr.ecn_af;
- hdr = m->m_pkthdr.header;
+ af = pf->af;
+ hdr = pf->hdr;
if (af != AF_INET && af != AF_INET6)
return (0);
}
return (val);
}
+
+struct pf_mtag *
+altq_find_pftag(struct mbuf *m)
+{
+ struct m_tag *mtag;
+
+ mtag = m_tag_find(m, PF_MBUF_TAGGED, NULL);
+ if (mtag)
+ return((struct pf_mtag *)(mtag + 1));
+ return(NULL);
+}
+
struct ifnet;
struct mbuf;
struct pf_altq;
+struct pf_mtag;
void *altq_lookup(const char *, int);
+struct pf_mtag *altq_find_pftag(struct mbuf *m);
uint8_t read_dsfield(struct mbuf *, struct altq_pktattr *);
void write_dsfield(struct mbuf *, struct altq_pktattr *, uint8_t);
int tbr_set(struct ifaltq *, struct tb_profile *);
rel_mplock();
}
+/*
+ * Incoming linkage from device drivers, where we have a mbuf chain
+ * but need to prepend some arbitrary header from a linear buffer.
+ *
+ * Con up a minimal dummy header to pacify bpf. Allocate (only) a
+ * struct m_hdr on the stack. This is safe as bpf only reads from the
+ * fields in this header that we initialize, and will not try to free
+ * it or keep a pointer to it.
+ */
+void
+bpf_mtap_hdr(struct bpf_if *arg, caddr_t data, u_int dlen, struct mbuf *m, u_int direction)
+{
+ struct m_hdr mh;
+
+ mh.mh_flags = 0;
+ mh.mh_next = m;
+ mh.mh_len = dlen;
+ mh.mh_data = data;
+
+ return bpf_mtap(arg, (struct mbuf *) &mh);
+}
+
void
bpf_mtap_family(struct bpf_if *bp, struct mbuf *m, sa_family_t family)
{
void bpf_tap(struct bpf_if *, u_char *, u_int);
void bpf_mtap(struct bpf_if *, struct mbuf *);
void bpf_mtap_family(struct bpf_if *, struct mbuf *m, __uint8_t family);
+void bpf_mtap_hdr(struct bpf_if *, caddr_t, u_int, struct mbuf *, u_int);
void bpf_ptap(struct bpf_if *, struct mbuf *, const void *, u_int);
void bpfattach(struct ifnet *, u_int, u_int);
void bpfattach_dlt(struct ifnet *, u_int, u_int, struct bpf_if **);
#define ifc_req ifc_ifcu.ifcu_req /* array of structures returned */
};
+/*
+ * interface groups
+ */
+
+#define IFG_ALL "all" /* group contains all interfaces */
+/* XXX: will we implement this? */
+#define IFG_EGRESS "egress" /* if(s) default route(s) point to */
+
+struct ifg_req {
+ union {
+ char ifgrqu_group[IFNAMSIZ];
+ char ifgrqu_member[IFNAMSIZ];
+ } ifgrq_ifgrqu;
+#define ifgrq_group ifgrq_ifgrqu.ifgrqu_group
+#define ifgrq_member ifgrq_ifgrqu.ifgrqu_member
+};
+
+/*
+ * Used to lookup groups for an interface
+ */
+struct ifgroupreq {
+ char ifgr_name[IFNAMSIZ];
+ u_int ifgr_len;
+ union {
+ char ifgru_group[IFNAMSIZ];
+ struct ifg_req *ifgru_groups;
+ } ifgr_ifgru;
+#define ifgr_group ifgr_ifgru.ifgru_group
+#define ifgr_groups ifgr_ifgru.ifgru_groups
+};
/*
* Structure for SIOC[AGD]LIFADDR
#define IFT_PVC 0xf1
#define IFT_FAITH 0xf2
#define IFT_STF 0xf3
+#define IFT_ENC 0xf4
#define IFT_PFLOG 0xf5 /* Packet filter logging */
#define IFT_PFSYNC 0xf6 /* Packet filter state syncing */
#define IFT_CARP 0xf8 /* Common Address Redundancy Protocol */
(struct ifnet *, struct sockaddr **, struct sockaddr *);
int (*if_start_cpuid) /* cpuid to run if_start */
(struct ifnet *);
+ TAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if */
+ /* protected by if_addr_mtx */
#ifdef DEVICE_POLLING
void (*if_poll) /* IFF_POLLING support */
(struct ifnet *, enum poll_cmd, int);
struct lwkt_serialize if_default_serializer; /* if not supplied */
int if_cpuid;
struct netmsg *if_start_nmsg; /* percpu messages to schedule if_start */
+ void *if_pf_kif; /* pf interface abstraction */
};
typedef void if_init_f_t (void *);
typedef void (*ifnet_detach_event_handler_t)(void *, struct ifnet *);
EVENTHANDLER_DECLARE(ifnet_detach_event, ifnet_detach_event_handler_t);
+/*
+ * interface groups
+ */
+struct ifg_group {
+ char ifg_group[IFNAMSIZ];
+ u_int ifg_refcnt;
+ void *ifg_pf_kif;
+ TAILQ_HEAD(, ifg_member) ifg_members;
+ TAILQ_ENTRY(ifg_group) ifg_next;
+};
+
+struct ifg_member {
+ TAILQ_ENTRY(ifg_member) ifgm_next;
+ struct ifnet *ifgm_ifp;
+};
+
+struct ifg_list {
+ struct ifg_group *ifgl_group;
+ TAILQ_ENTRY(ifg_list) ifgl_next;
+};
+
+/* group attach event */
+typedef void (*group_attach_event_handler_t)(void *, struct ifg_group *);
+EVENTHANDLER_DECLARE(group_attach_event, group_attach_event_handler_t);
+/* group detach event */
+typedef void (*group_detach_event_handler_t)(void *, struct ifg_group *);
+EVENTHANDLER_DECLARE(group_detach_event, group_detach_event_handler_t);
+/* group change event */
+typedef void (*group_change_event_handler_t)(void *, const char *);
+EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t);
+
+
#ifdef INVARIANTS
#define ASSERT_IFAC_VALID(ifac) do { \
KKASSERT((ifac)->ifa_magic == IFA_CONTAINER_MAGIC); \
KMOD= pf
SRCS= if_pflog.c pf.c pf_if.c pf_ioctl.c pf_norm.c pf_osfp.c pf_subr.c
-SRCS+= pf_table.c
+SRCS+= pf_table.c pf_ruleset.c
SRCS+= use_pflog.h use_pfsync.h opt_inet.h opt_inet6.h
SRCS+= opt_icmp_bandlim.h
/* $FreeBSD: src/sys/contrib/pf/net/if_pflog.c,v 1.9 2004/06/22 20:13:24 brooks Exp $ */
/* $OpenBSD: if_pflog.c,v 1.11 2003/12/31 11:18:25 cedric Exp $ */
/* $DragonFly: src/sys/net/pf/if_pflog.c,v 1.6 2006/12/22 23:44:57 swildner Exp $ */
-
+/* $OpenBSD: if_pflog.c,v 1.22 2006/12/15 09:31:20 otto Exp $ */
/*
* The authors of this code are John Ioannidis (ji@tla.org),
* Angelos D. Keromytis (kermit@csd.uch.gr) and
#include <sys/systm.h>
#include <sys/in_cksum.h>
#include <sys/mbuf.h>
+#include <sys/proc.h>
#include <sys/socket.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#define DPRINTF(x)
#endif
-
-static void pflog_clone_destroy(struct ifnet *);
-static int pflog_clone_create(struct if_clone *, int, caddr_t);
+void pflogattach(int);
int pflogoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
struct rtentry *);
int pflogioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
void pflogrtrequest(int, struct rtentry *, struct sockaddr *);
void pflogstart(struct ifnet *);
-static MALLOC_DEFINE(M_PFLOG, PFLOGNAME, "Packet Filter Logging Interface");
-static LIST_HEAD(pflog_list, pflog_softc) pflog_list;
-struct if_clone pflog_cloner = IF_CLONE_INITIALIZER("pflog", pflog_clone_create,
- pflog_clone_destroy, 1, 1);
-
-static void
-pflog_clone_destroy(struct ifnet *ifp)
-{
- struct pflog_softc *sc;
+int pflog_clone_create(struct if_clone *, int, caddr_t);
+void pflog_clone_destroy(struct ifnet *);
- sc = ifp->if_softc;
+LIST_HEAD(, pflog_softc) pflogif_list;
+struct if_clone pflog_cloner =
+ IF_CLONE_INITIALIZER("pflog", pflog_clone_create, pflog_clone_destroy, 1, 1);
- /*
- * Do we really need this?
- */
- IF_DRAIN(&ifp->if_snd);
+struct ifnet *pflogifs[PFLOGIFS_MAX]; /* for fast access */
- bpfdetach(ifp);
- if_detach(ifp);
- LIST_REMOVE(sc, sc_next);
- kfree(sc, M_PFLOG);
+void
+pflogattach(int npflog)
+{
+ int i;
+ LIST_INIT(&pflogif_list);
+ for (i = 0; i < PFLOGIFS_MAX; i++)
+ pflogifs[i] = NULL;
+ (void) pflog_clone_create(&pflog_cloner, 0, NULL);
+ if_clone_attach(&pflog_cloner);
}
-static int
+int
pflog_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused)
{
- struct pflog_softc *sc;
+ struct ifnet *ifp;
+ struct pflog_softc *pflogif;
- MALLOC(sc, struct pflog_softc *, sizeof(*sc), M_PFLOG, M_WAITOK|M_ZERO);
+ if (unit >= PFLOGIFS_MAX)
+ return (EINVAL);
+
+ if ((pflogif = kmalloc(sizeof(*pflogif), M_DEVBUF, M_WAITOK)) == NULL)
+ return (ENOMEM);
+ bzero(pflogif, sizeof(*pflogif));
+
+ pflogif->sc_unit = unit;
+ ifp = &pflogif->sc_if;
+ ksnprintf(ifp->if_xname, sizeof ifp->if_xname, "pflog%d", unit);
+ ifp->if_softc = pflogif;
+ ifp->if_mtu = PFLOGMTU;
+ ifp->if_ioctl = pflogioctl;
+ ifp->if_output = pflogoutput;
+ ifp->if_start = pflogstart;
+ ifp->if_type = IFT_PFLOG;
+ ifp->if_snd.ifq_maxlen = ifqmaxlen;
+ ifp->if_hdrlen = PFLOG_HDRLEN;
+ if_attach(ifp, NULL);
+
+ bpfattach(&pflogif->sc_if, DLT_PFLOG, PFLOG_HDRLEN);
+
+ crit_enter();
+ LIST_INSERT_HEAD(&pflogif_list, pflogif, sc_list);
+ pflogifs[unit] = ifp;
+ crit_exit();
- if_initname(&sc->sc_if, ifc->ifc_name, unit);
- sc->sc_if.if_mtu = PFLOGMTU;
- sc->sc_if.if_ioctl = pflogioctl;
- sc->sc_if.if_output = pflogoutput;
- sc->sc_if.if_start = pflogstart;
- sc->sc_if.if_type = IFT_PFLOG;
- sc->sc_if.if_snd.ifq_maxlen = ifqmaxlen;
- sc->sc_if.if_hdrlen = PFLOG_HDRLEN;
- sc->sc_if.if_softc = sc;
- if_attach(&sc->sc_if, NULL);
+ return (0);
+}
+
+void
+pflog_clone_destroy(struct ifnet *ifp)
+{
+ struct pflog_softc *pflogif = ifp->if_softc;
- LIST_INSERT_HEAD(&pflog_list, sc, sc_next);
- bpfattach(&sc->sc_if, DLT_PFLOG, PFLOG_HDRLEN);
+ crit_enter();
+ pflogifs[pflogif->sc_unit] = NULL;
+ LIST_REMOVE(pflogif, sc_list);
+ crit_exit();
- return (0);
+#if NBPFILTER > 0
+ bpfdetach(ifp);
+#endif
+ if_detach(ifp);
+ kfree(pflogif, M_DEVBUF);
}
/*
void
pflogstart(struct ifnet *ifp)
{
- crit_enter();
- IF_DROP(&ifp->if_snd);
- IF_DRAIN(&ifp->if_snd);
- crit_exit();
+ struct mbuf *m;
+
+ for (;;) {
+ crit_enter();
+ IF_DROP(&ifp->if_snd);
+ IF_DEQUEUE(&ifp->if_snd, m);
+ crit_exit();
+
+ if (m == NULL)
+ return;
+ else
+ m_freem(m);
+ }
}
int
int
pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir,
u_int8_t reason, struct pf_rule *rm, struct pf_rule *am,
- struct pf_ruleset *ruleset)
+ struct pf_ruleset *ruleset, struct pf_pdesc *pd)
{
- struct ifnet *ifn;
+ struct ifnet *ifn = NULL;
struct pfloghdr hdr;
- struct mbuf m1;
if (kif == NULL || m == NULL || rm == NULL)
return (-1);
+ if ((ifn = pflogifs[rm->logif]) == NULL || !ifn->if_bpf)
+ return (0);
+
bzero(&hdr, sizeof(hdr));
hdr.length = PFLOG_REAL_HDRLEN;
hdr.af = af;
} else {
hdr.rulenr = htonl(am->nr);
hdr.subrulenr = htonl(rm->nr);
- if (ruleset != NULL)
- memcpy(hdr.ruleset, ruleset->name,
+ if (ruleset != NULL && ruleset->anchor != NULL) {
+ strlcpy(hdr.ruleset, ruleset->anchor->name,
sizeof(hdr.ruleset));
-
-
+ }
}
+ if (rm->log & PF_LOG_SOCKET_LOOKUP && !pd->lookup.done)
+ pd->lookup.done = pf_socket_lookup(dir, pd, NULL);
+ if (pd->lookup.done > 0) {
+ hdr.uid = pd->lookup.uid;
+ hdr.pid = pd->lookup.pid;
+ } else {
+ hdr.uid = UID_MAX;
+ hdr.pid = NO_PID;
+ }
+ hdr.rule_uid = rm->cuid;
+ hdr.rule_pid = rm->cpid;
hdr.dir = dir;
#ifdef INET
if (af == AF_INET) {
- struct ip *ip = mtod(m, struct ip *);
-
+ struct ip *ip;
+ ip = mtod(m, struct ip *);
ip->ip_len = htons(ip->ip_len);
ip->ip_off = htons(ip->ip_off);
if (dir == PF_OUT) {
ip->ip_sum = 0;
ip->ip_sum = in_cksum(m, ip->ip_hl << 2);
- }
+ }
}
#endif /* INET */
- m1.m_next = m;
- m1.m_len = PFLOG_HDRLEN;
- m1.m_data = (char *) &hdr;
-
- KASSERT((!LIST_EMPTY(&pflog_list)), ("pflog: no interface"));
- ifn = &LIST_FIRST(&pflog_list)->sc_if;
-
- BPF_MTAP(ifn, &m1);
+ ifn->if_opackets++;
+ ifn->if_obytes += m->m_pkthdr.len;
+ bpf_mtap_hdr(ifn->if_bpf, (char *)&hdr, PFLOG_HDRLEN, m,
+ BPF_DIRECTION_OUT);
#ifdef INET
if (af == AF_INET) {
switch (type) {
case MOD_LOAD:
- LIST_INIT(&pflog_list);
+ LIST_INIT(&pflogif_list);
if_clone_attach(&pflog_cloner);
break;
case MOD_UNLOAD:
if_clone_detach(&pflog_cloner);
- while (!LIST_EMPTY(&pflog_list))
+ while (!LIST_EMPTY(&pflogif_list)) {
pflog_clone_destroy(
- &LIST_FIRST(&pflog_list)->sc_if);
+ &LIST_FIRST(&pflogif_list)->sc_if);
+ }
break;
default:
/* $FreeBSD: src/sys/contrib/pf/net/if_pflog.h,v 1.4 2004/06/16 23:24:00 mlaier Exp $ */
/* $OpenBSD: if_pflog.h,v 1.10 2004/03/19 04:52:04 frantzen Exp $ */
/* $DragonFly: src/sys/net/pf/if_pflog.h,v 1.1 2004/09/19 22:32:47 joerg Exp $ */
-
+/* $OpenBSD: if_pflog.h,v 1.14 2006/10/25 11:27:01 henning Exp $ */
/*
* Copyright (c) 2004 The DragonFly Project. All rights reserved.
*
#ifndef _NET_IF_PFLOG_H_
#define _NET_IF_PFLOG_H_
+#define PFLOGIFS_MAX 16
+
struct pflog_softc {
- struct ifnet sc_if; /* the interface */
- LIST_ENTRY(pflog_softc) sc_next;
+ struct ifnet sc_if; /* the interface */
+ int sc_unit;
+ LIST_ENTRY(pflog_softc) sc_list;
};
-/* XXX keep in sync with pfvar.h */
-#ifndef PF_RULESET_NAME_SIZE
-#define PF_RULESET_NAME_SIZE 16
-#endif
+#define PFLOG_RULESET_NAME_SIZE 16
struct pfloghdr {
u_int8_t length;
u_int8_t action;
u_int8_t reason;
char ifname[IFNAMSIZ];
- char ruleset[PF_RULESET_NAME_SIZE];
+ char ruleset[PFLOG_RULESET_NAME_SIZE];
u_int32_t rulenr;
u_int32_t subrulenr;
+ uid_t uid;
+ pid_t pid;
+ uid_t rule_uid;
+ pid_t rule_pid;
u_int8_t dir;
u_int8_t pad[3];
};
#include "use_pflog.h"
#if NPFLOG > 0
-#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g) pflog_packet(i,a,b,c,d,e,f,g)
+#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g,h) pflog_packet(i,a,b,c,d,e,f,g,h)
#else
-#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g) ((void)0)
+#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g,h) ((void)0)
#endif /* NPFLOG > 0 */
#endif /* _KERNEL */
#endif /* _NET_IF_PFLOG_H_ */
-/* $FreeBSD: src/sys/contrib/pf/net/if_pfsync.c,v 1.11 2004/08/14 15:32:40 dwmalone Exp $ */
-/* $OpenBSD: if_pfsync.c,v 1.26 2004/03/28 18:14:20 mcbride Exp $ */
-/* $DragonFly: src/sys/net/pf/if_pfsync.c,v 1.8 2008/04/12 17:39:41 dillon Exp $ */
+/* $OpenBSD: if_pfsync.c,v 1.73 2006/11/16 13:13:38 henning Exp $ */
/*
- * Copyright (c) 2004 The DragonFly Project. All rights reserved.
- *
* Copyright (c) 2002 Michael Shalayeff
* All rights reserved.
*
* THE POSSIBILITY OF SUCH DAMAGE.
*/
-#include "opt_inet.h"
-#include "opt_inet6.h"
#include <sys/param.h>
#include <sys/proc.h>
-#include <sys/priv.h>
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/timeout.h>
#include <sys/kernel.h>
-#include <sys/malloc.h>
-#include <sys/module.h>
-#include <sys/sockio.h>
-#include <sys/thread2.h>
-#include <vm/vm_zone.h>
-
-#include <machine/inttypes.h>
#include <net/if.h>
#include <net/if_types.h>
#include <net/route.h>
#include <net/bpf.h>
+#include <netinet/in.h>
+#include <netinet/if_ether.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_seq.h>
#ifdef INET
-#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
#endif
#ifdef INET6
-#ifndef INET
-#include <netinet/in.h>
-#endif
#include <netinet6/nd6.h>
#endif /* INET6 */
-#include <net/pf/pfvar.h>
-#include <net/pf/if_pfsync.h>
+#include "carp.h"
+#if NCARP > 0
+#include <netinet/ip_carp.h>
+#endif
-#define PFSYNCNAME "pfsync"
+#include <net/pfvar.h>
+#include <net/if_pfsync.h>
+
+#include "bpfilter.h"
+#include "pfsync.h"
#define PFSYNC_MINMTU \
(sizeof(struct pfsync_header) + sizeof(struct pf_state))
#ifdef PFSYNCDEBUG
-#define DPRINTF(x) do { if (pfsyncdebug) kprintf x ; } while (0)
+#define DPRINTF(x) do { if (pfsyncdebug) printf x ; } while (0)
int pfsyncdebug;
#else
#define DPRINTF(x)
#endif
-int pfsync_sync_ok;
-struct pfsyncstats pfsyncstats;
+struct pfsync_softc *pfsyncif = NULL;
+struct pfsyncstats pfsyncstats;
-static void pfsync_clone_destroy(struct ifnet *);
-static int pfsync_clone_create(struct if_clone *, int, caddr_t);
+void pfsyncattach(int);
+int pfsync_clone_create(struct if_clone *, int);
+int pfsync_clone_destroy(struct ifnet *);
void pfsync_setmtu(struct pfsync_softc *, int);
-int pfsync_insert_net_state(struct pfsync_state *);
+int pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
+ struct pf_state_peer *);
+int pfsync_insert_net_state(struct pfsync_state *, u_int8_t);
+void pfsync_update_net_tdb(struct pfsync_tdb *);
int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
struct rtentry *);
-int pfsyncioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
+int pfsyncioctl(struct ifnet *, u_long, caddr_t);
void pfsyncstart(struct ifnet *);
struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **);
int pfsync_request_update(struct pfsync_state_upd *, struct in_addr *);
int pfsync_sendout(struct pfsync_softc *);
+int pfsync_tdb_sendout(struct pfsync_softc *);
+int pfsync_sendout_mbuf(struct pfsync_softc *, struct mbuf *);
void pfsync_timeout(void *);
+void pfsync_tdb_timeout(void *);
void pfsync_send_bus(struct pfsync_softc *, u_int8_t);
void pfsync_bulk_update(void *);
void pfsync_bulkfail(void *);
-static MALLOC_DEFINE(M_PFSYNC, PFSYNCNAME, "Packet Filter State Sync. Interface");
-static LIST_HEAD(pfsync_list, pfsync_softc) pfsync_list;
-struct if_clone pfsync_cloner = IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create,
- pfsync_clone_destroy, 1, 1);
-
-static void
-pfsync_clone_destroy(struct ifnet *ifp)
-{
- struct pfsync_softc *sc;
+int pfsync_sync_ok;
+extern int ifqmaxlen;
- sc = ifp->if_softc;
- callout_stop(&sc->sc_tmo);
- callout_stop(&sc->sc_bulk_tmo);
- callout_stop(&sc->sc_bulkfail_tmo);
+struct if_clone pfsync_cloner =
+ IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy);
- bpfdetach(ifp);
- if_detach(ifp);
- LIST_REMOVE(sc, sc_next);
- kfree(sc, M_PFSYNC);
+void
+pfsyncattach(int npfsync)
+{
+ if_clone_attach(&pfsync_cloner);
}
-
-static int
-pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused)
+int
+pfsync_clone_create(struct if_clone *ifc, int unit)
{
- struct pfsync_softc *sc;
struct ifnet *ifp;
- MALLOC(sc, struct pfsync_softc *, sizeof(*sc), M_PFSYNC,
- M_WAITOK|M_ZERO);
+ if (unit != 0)
+ return (EINVAL);
pfsync_sync_ok = 1;
- sc->sc_mbuf = NULL;
- sc->sc_mbuf_net = NULL;
- sc->sc_statep.s = NULL;
- sc->sc_statep_net.s = NULL;
- sc->sc_maxupdates = 128;
- sc->sc_sendaddr.s_addr = htonl(INADDR_PFSYNC_GROUP);
- sc->sc_ureq_received = 0;
- sc->sc_ureq_sent = 0;
-
- ifp = &sc->sc_if;
- if_initname(ifp, ifc->ifc_name, unit);
+ if ((pfsyncif = malloc(sizeof(*pfsyncif), M_DEVBUF, M_WAITOK)) == NULL)
+ return (ENOMEM);
+ bzero(pfsyncif, sizeof(*pfsyncif));
+ pfsyncif->sc_mbuf = NULL;
+ pfsyncif->sc_mbuf_net = NULL;
+ pfsyncif->sc_mbuf_tdb = NULL;
+ pfsyncif->sc_statep.s = NULL;
+ pfsyncif->sc_statep_net.s = NULL;
+ pfsyncif->sc_statep_tdb.t = NULL;
+ pfsyncif->sc_maxupdates = 128;
+ pfsyncif->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
+ pfsyncif->sc_sendaddr.s_addr = INADDR_PFSYNC_GROUP;
+ pfsyncif->sc_ureq_received = 0;
+ pfsyncif->sc_ureq_sent = 0;
+ pfsyncif->sc_bulk_send_next = NULL;
+ pfsyncif->sc_bulk_terminator = NULL;
+ ifp = &pfsyncif->sc_if;
+ snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit);
+ ifp->if_softc = pfsyncif;
ifp->if_ioctl = pfsyncioctl;
ifp->if_output = pfsyncoutput;
ifp->if_start = pfsyncstart;
ifp->if_type = IFT_PFSYNC;
ifp->if_snd.ifq_maxlen = ifqmaxlen;
ifp->if_hdrlen = PFSYNC_HDRLEN;
- ifp->if_baudrate = IF_Mbps(100);
- ifp->if_softc = sc;
- pfsync_setmtu(sc, MCLBYTES);
- callout_init(&sc->sc_tmo);
- callout_init(&sc->sc_bulk_tmo);
- callout_init(&sc->sc_bulkfail_tmo);
- if_attach(&sc->sc_if, NULL);
+ pfsync_setmtu(pfsyncif, ETHERMTU);
+ timeout_set(&pfsyncif->sc_tmo, pfsync_timeout, pfsyncif);
+ timeout_set(&pfsyncif->sc_tdb_tmo, pfsync_tdb_timeout, pfsyncif);
+ timeout_set(&pfsyncif->sc_bulk_tmo, pfsync_bulk_update, pfsyncif);
+ timeout_set(&pfsyncif->sc_bulkfail_tmo, pfsync_bulkfail, pfsyncif);
+ if_attach(ifp);
+ if_alloc_sadl(ifp);
+
+#if NCARP > 0
+ if_addgroup(ifp, "carp");
+#endif
+
+#if NBPFILTER > 0
+ bpfattach(&pfsyncif->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
+#endif
- LIST_INSERT_HEAD(&pfsync_list, sc, sc_next);
- bpfattach(&sc->sc_if, DLT_PFSYNC, PFSYNC_HDRLEN);
+ return (0);
+}
+int
+pfsync_clone_destroy(struct ifnet *ifp)
+{
+#if NBPFILTER > 0
+ bpfdetach(ifp);
+#endif
+ if_detach(ifp);
+ free(pfsyncif, M_DEVBUF);
+ pfsyncif = NULL;
return (0);
}
void
pfsyncstart(struct ifnet *ifp)
{
- crit_enter();
- IF_DROP(&ifp->if_snd);
- IF_DRAIN(&ifp->if_snd);
- crit_exit();
+ struct mbuf *m;
+ int s;
+
+ for (;;) {
+ s = splnet();
+ IF_DROP(&ifp->if_snd);
+ IF_DEQUEUE(&ifp->if_snd, m);
+ splx(s);
+
+ if (m == NULL)
+ return;
+ else
+ m_freem(m);
+ }
+}
+
+int
+pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
+ struct pf_state_peer *d)
+{
+ if (s->scrub.scrub_flag && d->scrub == NULL) {
+ d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
+ if (d->scrub == NULL)
+ return (ENOMEM);
+ bzero(d->scrub, sizeof(*d->scrub));
+ }
+
+ return (0);
}
int
-pfsync_insert_net_state(struct pfsync_state *sp)
+pfsync_insert_net_state(struct pfsync_state *sp, u_int8_t chksum_flag)
{
struct pf_state *st = NULL;
struct pf_rule *r = NULL;
struct pfi_kif *kif;
if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) {
- kprintf("pfsync_insert_net_state: invalid creator id:"
- " %08" PRIx32 "\n", ntohl(sp->creatorid));
+ printf("pfsync_insert_net_state: invalid creator id:"
+ " %08x\n", ntohl(sp->creatorid));
return (EINVAL);
}
- kif = pfi_lookup_create(sp->ifname);
+ kif = pfi_kif_get(sp->ifname);
if (kif == NULL) {
if (pf_status.debug >= PF_DEBUG_MISC)
- kprintf("pfsync_insert_net_state: "
+ printf("pfsync_insert_net_state: "
"unknown interface: %s\n", sp->ifname);
/* skip this state */
return (0);
}
/*
- * Just use the default rule until we have infrastructure to find the
- * best matching rule.
+ * If the ruleset checksums match, it's safe to associate the state
+ * with the rule of that number.
*/
- r = &pf_default_rule;
+ if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && chksum_flag)
+ r = pf_main_ruleset.rules[
+ PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)];
+ else
+ r = &pf_default_rule;
if (!r->max_states || r->states < r->max_states)
st = pool_get(&pf_state_pl, PR_NOWAIT);
if (st == NULL) {
- pfi_maybe_destroy(kif);
+ pfi_kif_unref(kif, PFI_KIF_REF_NONE);
return (ENOMEM);
}
bzero(st, sizeof(*st));
+ /* allocate memory for scrub info */
+ if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
+ pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) {
+ pfi_kif_unref(kif, PFI_KIF_REF_NONE);
+ if (st->src.scrub)
+ pool_put(&pf_state_scrub_pl, st->src.scrub);
+ pool_put(&pf_state_pl, st);
+ return (ENOMEM);
+ }
+
st->rule.ptr = r;
/* XXX get pointers to nat_rule and anchor */
+ /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
+ r->states++;
+
/* fill in the rest of the state entry */
pf_state_host_ntoh(&sp->lan, &st->lan);
pf_state_host_ntoh(&sp->gwy, &st->gwy);
pf_state_peer_ntoh(&sp->dst, &st->dst);
bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
- st->hash = pf_state_hash(st);
- st->creation = ntohl(sp->creation) + time_second;
+ st->creation = time_second - ntohl(sp->creation);
st->expire = ntohl(sp->expire) + time_second;
st->af = sp->af;
bcopy(sp->id, &st->id, sizeof(st->id));
st->creatorid = sp->creatorid;
- st->sync_flags = sp->sync_flags | PFSTATE_FROMSYNC;
-
+ st->sync_flags = PFSTATE_FROMSYNC;
if (pf_insert_state(kif, st)) {
- pfi_maybe_destroy(kif);
+ pfi_kif_unref(kif, PFI_KIF_REF_NONE);
+ /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */
+ r->states--;
+ if (st->dst.scrub)
+ pool_put(&pf_state_scrub_pl, st->dst.scrub);
+ if (st->src.scrub)
+ pool_put(&pf_state_scrub_pl, st->src.scrub);
pool_put(&pf_state_pl, st);
return (EINVAL);
}
{
struct ip *ip = mtod(m, struct ip *);
struct pfsync_header *ph;
- struct pfsync_softc *sc = LIST_FIRST(&pfsync_list);
- struct pf_state *st, key;
+ struct pfsync_softc *sc = pfsyncif;
+ struct pf_state *st;
+ struct pf_state_cmp key;
struct pfsync_state *sp;
struct pfsync_state_upd *up;
struct pfsync_state_del *dp;
struct pfsync_state_clr *cp;
struct pfsync_state_upd_req *rup;
struct pfsync_state_bus *bus;
+ struct pfsync_tdb *pt;
struct in_addr src;
struct mbuf *mp;
- int iplen, action, error, i, count, offp;
+ int iplen, action, error, i, s, count, offp, sfail, stale = 0;
+ u_int8_t chksum_flag = 0;
pfsyncstats.pfsyncs_ipackets++;
/* verify that we have a sync interface configured */
- if (!sc->sc_sync_ifp || !pf_status.running)
+ if (!sc || !sc->sc_sync_ifp || !pf_status.running)
goto done;
/* verify that the packet came in on the right interface */
/* Cheaper to grab this now than having to mess with mbufs later */
src = ip->ip_src;
+ if (!bcmp(&ph->pf_chksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
+ chksum_flag++;
+
switch (action) {
case PFSYNC_ACT_CLR: {
+ struct pf_state *nexts;
struct pfi_kif *kif;
u_int32_t creatorid;
if ((mp = m_pulldown(m, iplen + sizeof(*ph),
cp = (struct pfsync_state_clr *)(mp->m_data + offp);
creatorid = cp->creatorid;
- crit_enter();
+ s = splsoftnet();
if (cp->ifname[0] == '\0') {
- RB_FOREACH(st, pf_state_tree_id, &tree_id) {
- if (st->creatorid == creatorid)
- st->timeout = PFTM_PURGE;
+ for (st = RB_MIN(pf_state_tree_id, &tree_id);
+ st; st = nexts) {
+ nexts = RB_NEXT(pf_state_tree_id, &tree_id, st);
+ if (st->creatorid == creatorid) {
+ st->sync_flags |= PFSTATE_FROMSYNC;
+ pf_unlink_state(st);
+ }
}
} else {
- kif = pfi_lookup_if(cp->ifname);
- if (kif == NULL) {
- if (pf_status.debug >= PF_DEBUG_MISC)
- kprintf("pfsync_input: PFSYNC_ACT_CLR "
- "bad interface: %s\n", cp->ifname);
- crit_exit();
- goto done;
+ if ((kif = pfi_kif_get(cp->ifname)) == NULL) {
+ splx(s);
+ return;
}
- RB_FOREACH(st, pf_state_tree_lan_ext,
- &kif->pfik_lan_ext) {
- if (st->creatorid == creatorid)
- st->timeout = PFTM_PURGE;
+ for (st = RB_MIN(pf_state_tree_lan_ext,
+ &kif->pfik_lan_ext); st; st = nexts) {
+ nexts = RB_NEXT(pf_state_tree_lan_ext,
+ &kif->pfik_lan_ext, st);
+ if (st->creatorid == creatorid) {
+ st->sync_flags |= PFSTATE_FROMSYNC;
+ pf_unlink_state(st);
+ }
}
}
- pf_purge_expired_states();
- crit_exit();
+ splx(s);
break;
}
return;
}
- crit_enter();
+ s = splsoftnet();
for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
i < count; i++, sp++) {
/* check for invalid values */
sp->direction > PF_OUT ||
(sp->af != AF_INET && sp->af != AF_INET6)) {
if (pf_status.debug >= PF_DEBUG_MISC)
- kprintf("pfsync_insert: PFSYNC_ACT_INS: "
+ printf("pfsync_insert: PFSYNC_ACT_INS: "
"invalid value\n");
pfsyncstats.pfsyncs_badstate++;
continue;
}
- if ((error = pfsync_insert_net_state(sp))) {
+ if ((error = pfsync_insert_net_state(sp,
+ chksum_flag))) {
if (error == ENOMEM) {
- crit_exit();
+ splx(s);
goto done;
}
continue;
}
}
- crit_exit();
+ splx(s);
break;
case PFSYNC_ACT_UPD:
if ((mp = m_pulldown(m, iplen + sizeof(*ph),
return;
}
- crit_enter();
+ s = splsoftnet();
for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
i < count; i++, sp++) {
+ int flags = PFSYNC_FLAG_STALE;
+
/* check for invalid values */
if (sp->timeout >= PFTM_MAX ||
sp->src.state > PF_TCPS_PROXY_DST ||
sp->dst.state > PF_TCPS_PROXY_DST) {
if (pf_status.debug >= PF_DEBUG_MISC)
- kprintf("pfsync_insert: PFSYNC_ACT_UPD: "
+ printf("pfsync_insert: PFSYNC_ACT_UPD: "
"invalid value\n");
pfsyncstats.pfsyncs_badstate++;
continue;
st = pf_find_state_byid(&key);
if (st == NULL) {
/* insert the update */
- if (pfsync_insert_net_state(sp))
+ if (pfsync_insert_net_state(sp, chksum_flag))
pfsyncstats.pfsyncs_badstate++;
continue;
}
+ sfail = 0;
+ if (st->proto == IPPROTO_TCP) {
+ /*
+ * The state should never go backwards except
+ * for syn-proxy states. Neither should the
+ * sequence window slide backwards.
+ */
+ if (st->src.state > sp->src.state &&
+ (st->src.state < PF_TCPS_PROXY_SRC ||
+ sp->src.state >= PF_TCPS_PROXY_SRC))
+ sfail = 1;
+ else if (SEQ_GT(st->src.seqlo,
+ ntohl(sp->src.seqlo)))
+ sfail = 3;
+ else if (st->dst.state > sp->dst.state) {
+ /* There might still be useful
+ * information about the src state here,
+ * so import that part of the update,
+ * then "fail" so we send the updated
+ * state back to the peer who is missing
+ * our what we know. */
+ pf_state_peer_ntoh(&sp->src, &st->src);
+ /* XXX do anything with timeouts? */
+ sfail = 7;
+ flags = 0;
+ } else if (st->dst.state >= TCPS_SYN_SENT &&
+ SEQ_GT(st->dst.seqlo, ntohl(sp->dst.seqlo)))
+ sfail = 4;
+ } else {
+ /*
+ * Non-TCP protocol state machine always go
+ * forwards
+ */
+ if (st->src.state > sp->src.state)
+ sfail = 5;
+ else if (st->dst.state > sp->dst.state)
+ sfail = 6;
+ }
+ if (sfail) {
+ if (pf_status.debug >= PF_DEBUG_MISC)
+ printf("pfsync: %s stale update "
+ "(%d) id: %016llx "
+ "creatorid: %08x\n",
+ (sfail < 7 ? "ignoring"
+ : "partial"), sfail,
+ betoh64(st->id),
+ ntohl(st->creatorid));
+ pfsyncstats.pfsyncs_badstate++;
+
+ if (!(sp->sync_flags & PFSTATE_STALE)) {
+ /* we have a better state, send it */
+ if (sc->sc_mbuf != NULL && !stale)
+ pfsync_sendout(sc);
+ stale++;
+ if (!st->sync_flags)
+ pfsync_pack_state(
+ PFSYNC_ACT_UPD, st, flags);
+ }
+ continue;
+ }
+ pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
pf_state_peer_ntoh(&sp->src, &st->src);
pf_state_peer_ntoh(&sp->dst, &st->dst);
st->expire = ntohl(sp->expire) + time_second;
st->timeout = sp->timeout;
-
}
- crit_exit();
+ if (stale && sc->sc_mbuf != NULL)
+ pfsync_sendout(sc);
+ splx(s);
break;
/*
* It's not strictly necessary for us to support the "uncompressed"
return;
}
- crit_enter();
+ s = splsoftnet();
for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
i < count; i++, sp++) {
bcopy(sp->id, &key.id, sizeof(key.id));
pfsyncstats.pfsyncs_badstate++;
continue;
}
- /*
- * XXX
- * pf_purge_expired_states() is expensive,
- * we really want to purge the state directly.
- */
- st->timeout = PFTM_PURGE;
st->sync_flags |= PFSTATE_FROMSYNC;
+ pf_unlink_state(st);
}
- pf_purge_expired_states();
- crit_exit();
+ splx(s);
break;
case PFSYNC_ACT_UPD_C: {
int update_requested = 0;
return;
}
- crit_enter();
+ s = splsoftnet();
for (i = 0, up = (struct pfsync_state_upd *)(mp->m_data + offp);
i < count; i++, up++) {
/* check for invalid values */
up->src.state > PF_TCPS_PROXY_DST ||
up->dst.state > PF_TCPS_PROXY_DST) {
if (pf_status.debug >= PF_DEBUG_MISC)
- kprintf("pfsync_insert: "
+ printf("pfsync_insert: "
"PFSYNC_ACT_UPD_C: "
"invalid value\n");
pfsyncstats.pfsyncs_badstate++;
st = pf_find_state_byid(&key);
if (st == NULL) {
/* We don't have this state. Ask for it. */
- pfsync_request_update(up, &src);
+ error = pfsync_request_update(up, &src);
+ if (error == ENOMEM) {
+ splx(s);
+ goto done;
+ }
update_requested = 1;
pfsyncstats.pfsyncs_badstate++;
continue;
}
+ sfail = 0;
+ if (st->proto == IPPROTO_TCP) {
+ /*
+ * The state should never go backwards except
+ * for syn-proxy states. Neither should the
+ * sequence window slide backwards.
+ */
+ if (st->src.state > up->src.state &&
+ (st->src.state < PF_TCPS_PROXY_SRC ||
+ up->src.state >= PF_TCPS_PROXY_SRC))
+ sfail = 1;
+ else if (st->dst.state > up->dst.state)
+ sfail = 2;
+ else if (SEQ_GT(st->src.seqlo,
+ ntohl(up->src.seqlo)))
+ sfail = 3;
+ else if (st->dst.state >= TCPS_SYN_SENT &&
+ SEQ_GT(st->dst.seqlo, ntohl(up->dst.seqlo)))
+ sfail = 4;
+ } else {
+ /*
+ * Non-TCP protocol state machine always go
+ * forwards
+ */
+ if (st->src.state > up->src.state)
+ sfail = 5;
+ else if (st->dst.state > up->dst.state)
+ sfail = 6;
+ }
+ if (sfail) {
+ if (pf_status.debug >= PF_DEBUG_MISC)
+ printf("pfsync: ignoring stale update "
+ "(%d) id: %016llx "
+ "creatorid: %08x\n", sfail,
+ betoh64(st->id),
+ ntohl(st->creatorid));
+ pfsyncstats.pfsyncs_badstate++;
+
+ /* we have a better state, send it out */
+ if ((!stale || update_requested) &&
+ sc->sc_mbuf != NULL) {
+ pfsync_sendout(sc);
+ update_requested = 0;
+ }
+ stale++;
+ if (!st->sync_flags)
+ pfsync_pack_state(PFSYNC_ACT_UPD, st,
+ PFSYNC_FLAG_STALE);
+ continue;
+ }
+ pfsync_alloc_scrub_memory(&up->dst, &st->dst);
pf_state_peer_ntoh(&up->src, &st->src);
pf_state_peer_ntoh(&up->dst, &st->dst);
st->expire = ntohl(up->expire) + time_second;
st->timeout = up->timeout;
}
- if (update_requested)
+ if ((update_requested || stale) && sc->sc_mbuf)
pfsync_sendout(sc);
- crit_exit();
+ splx(s);
break;
}
case PFSYNC_ACT_DEL_C:
return;
}
- crit_enter();
+ s = splsoftnet();
for (i = 0, dp = (struct pfsync_state_del *)(mp->m_data + offp);
i < count; i++, dp++) {
bcopy(dp->id, &key.id, sizeof(key.id));
pfsyncstats.pfsyncs_badstate++;
continue;
}
- /*
- * XXX
- * pf_purge_expired_states() is expensive,
- * we really want to purge the state directly.
- */
- st->timeout = PFTM_PURGE;
st->sync_flags |= PFSTATE_FROMSYNC;
+ pf_unlink_state(st);
}
- pf_purge_expired_states();
- crit_exit();
+ splx(s);
break;
case PFSYNC_ACT_INS_F:
case PFSYNC_ACT_DEL_F:
return;
}
- crit_enter();
- /* XXX send existing. pfsync_pack_state should handle this. */
+ s = splsoftnet();
if (sc->sc_mbuf != NULL)
pfsync_sendout(sc);
for (i = 0,
key.creatorid = rup->creatorid;
if (key.id == 0 && key.creatorid == 0) {
- sc->sc_ureq_received = mycpu->gd_time_seconds;
+ sc->sc_ureq_received = time_uptime;
+ if (sc->sc_bulk_send_next == NULL)
+ sc->sc_bulk_send_next =
+ TAILQ_FIRST(&state_list);
+ sc->sc_bulk_terminator = sc->sc_bulk_send_next;
if (pf_status.debug >= PF_DEBUG_MISC)
- kprintf("pfsync: received "
+ printf("pfsync: received "
"bulk update request\n");
pfsync_send_bus(sc, PFSYNC_BUS_START);
- callout_reset(&sc->sc_bulk_tmo, 1 * hz,
- pfsync_bulk_update,
- LIST_FIRST(&pfsync_list));
+ timeout_add(&sc->sc_bulk_tmo, 1 * hz);
} else {
st = pf_find_state_byid(&key);
if (st == NULL) {
pfsyncstats.pfsyncs_badstate++;
continue;
}
- pfsync_pack_state(PFSYNC_ACT_UPD, st, 0);
+ if (!st->sync_flags)
+ pfsync_pack_state(PFSYNC_ACT_UPD,
+ st, 0);
}
}
if (sc->sc_mbuf != NULL)
pfsync_sendout(sc);
- crit_exit();
+ splx(s);
break;
case PFSYNC_ACT_BUS:
/* If we're not waiting for a bulk update, who cares. */
bus = (struct pfsync_state_bus *)(mp->m_data + offp);
switch (bus->status) {
case PFSYNC_BUS_START:
- callout_reset(&sc->sc_bulkfail_tmo,
+ timeout_add(&sc->sc_bulkfail_tmo,
pf_pool_limits[PF_LIMIT_STATES].limit /
- (PFSYNC_BULKPACKETS * sc->sc_maxcount),
- pfsync_bulkfail, LIST_FIRST(&pfsync_list));
+ (PFSYNC_BULKPACKETS * sc->sc_maxcount));
if (pf_status.debug >= PF_DEBUG_MISC)
- kprintf("pfsync: received bulk "
+ printf("pfsync: received bulk "
"update start\n");
break;
case PFSYNC_BUS_END:
- if (mycpu->gd_time_seconds - ntohl(bus->endtime) >=
+ if (time_uptime - ntohl(bus->endtime) >=
sc->sc_ureq_sent) {
/* that's it, we're happy */
sc->sc_ureq_sent = 0;
sc->sc_bulk_tries = 0;
- callout_stop(&sc->sc_bulkfail_tmo);
+ timeout_del(&sc->sc_bulkfail_tmo);
+#if NCARP > 0
+ if (!pfsync_sync_ok)
+ carp_group_demote_adj(&sc->sc_if, -1);
+#endif
pfsync_sync_ok = 1;
if (pf_status.debug >= PF_DEBUG_MISC)
- kprintf("pfsync: received valid "
+ printf("pfsync: received valid "
"bulk update end\n");
} else {
if (pf_status.debug >= PF_DEBUG_MISC)
- kprintf("pfsync: received invalid "
+ printf("pfsync: received invalid "
"bulk update end: bad timestamp\n");
}
break;
}
break;
+ case PFSYNC_ACT_TDB_UPD:
+ if ((mp = m_pulldown(m, iplen + sizeof(*ph),
+ count * sizeof(*pt), &offp)) == NULL) {
+ pfsyncstats.pfsyncs_badlen++;
+ return;
+ }
+ s = splsoftnet();
+ for (i = 0, pt = (struct pfsync_tdb *)(mp->m_data + offp);
+ i < count; i++, pt++)
+ pfsync_update_net_tdb(pt);
+ splx(s);
+ break;
}
done:
/* ARGSUSED */
int
-pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
+pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
+ struct proc *p = curproc;
struct pfsync_softc *sc = ifp->if_softc;
struct ifreq *ifr = (struct ifreq *)data;
struct ip_moptions *imo = &sc->sc_imo;
struct pfsyncreq pfsyncr;
struct ifnet *sifp;
- int error;
+ int s, error;
switch (cmd) {
case SIOCSIFADDR:
return (EINVAL);
if (ifr->ifr_mtu > MCLBYTES)
ifr->ifr_mtu = MCLBYTES;
- crit_enter();
+ s = splnet();
if (ifr->ifr_mtu < ifp->if_mtu)
pfsync_sendout(sc);
pfsync_setmtu(sc, ifr->ifr_mtu);
- crit_exit();
+ splx(s);
break;
case SIOCGETPFSYNC:
bzero(&pfsyncr, sizeof(pfsyncr));
if (sc->sc_sync_ifp)
- strlcpy(pfsyncr.pfsyncr_syncif,
+ strlcpy(pfsyncr.pfsyncr_syncdev,
sc->sc_sync_ifp->if_xname, IFNAMSIZ);
+ pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
if ((error = copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))))
return (error);
break;
case SIOCSETPFSYNC:
- if ((error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY)) != 0)
+ if ((error = suser(p, p->p_acflag)) != 0)
return (error);
if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
return (error);
+ if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
+ sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
+ else
+ sc->sc_sync_peer.s_addr =
+ pfsyncr.pfsyncr_syncpeer.s_addr;
+
if (pfsyncr.pfsyncr_maxupdates > 255)
return (EINVAL);
sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
- if (pfsyncr.pfsyncr_syncif[0] == 0) {
+ if (pfsyncr.pfsyncr_syncdev[0] == 0) {
sc->sc_sync_ifp = NULL;
if (sc->sc_mbuf_net != NULL) {
/* Don't keep stale pfsync packets around. */
- crit_enter();
+ s = splnet();
m_freem(sc->sc_mbuf_net);
sc->sc_mbuf_net = NULL;
sc->sc_statep_net.s = NULL;
- crit_exit();
+ splx(s);
+ }
+ if (imo->imo_num_memberships > 0) {
+ in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
+ imo->imo_multicast_ifp = NULL;
}
break;
}
- if ((sifp = ifunit(pfsyncr.pfsyncr_syncif)) == NULL)
+
+ if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL)
return (EINVAL);
- else if (sifp == sc->sc_sync_ifp)
- break;
- crit_enter();
+ s = splnet();
if (sifp->if_mtu < sc->sc_if.if_mtu ||
(sc->sc_sync_ifp != NULL &&
sifp->if_mtu < sc->sc_sync_ifp->if_mtu) ||
imo->imo_multicast_ifp = NULL;
}
- if (sc->sc_sync_ifp) {
+ if (sc->sc_sync_ifp &&
+ sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
struct in_addr addr;
- addr.s_addr = htonl(INADDR_PFSYNC_GROUP);
- /* XXX do we only use one group? Also see above */
+ if (!(sc->sc_sync_ifp->if_flags & IFF_MULTICAST)) {
+ sc->sc_sync_ifp = NULL;
+ splx(s);
+ return (EADDRNOTAVAIL);
+ }
+
+ addr.s_addr = INADDR_PFSYNC_GROUP;
+
if ((imo->imo_membership[0] =
in_addmulti(&addr, sc->sc_sync_ifp)) == NULL) {
- crit_exit();
+ sc->sc_sync_ifp = NULL;
+ splx(s);
return (ENOBUFS);
}
imo->imo_num_memberships++;
imo->imo_multicast_ifp = sc->sc_sync_ifp;
imo->imo_multicast_ttl = PFSYNC_DFLTTL;
imo->imo_multicast_loop = 0;
+ }
+ if (sc->sc_sync_ifp ||
+ sc->sc_sendaddr.s_addr != INADDR_PFSYNC_GROUP) {
/* Request a full state table update. */
- sc->sc_ureq_sent = mycpu->gd_time_seconds;
+ sc->sc_ureq_sent = time_uptime;
+#if NCARP > 0
+ if (pfsync_sync_ok)
+ carp_group_demote_adj(&sc->sc_if, 1);
+#endif
pfsync_sync_ok = 0;
if (pf_status.debug >= PF_DEBUG_MISC)
- kprintf("pfsync: requesting bulk update\n");
- callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
- pfsync_bulkfail, LIST_FIRST(&pfsync_list));
- pfsync_request_update(NULL, NULL);
+ printf("pfsync: requesting bulk update\n");
+ timeout_add(&sc->sc_bulkfail_tmo, 5 * hz);
+ error = pfsync_request_update(NULL, NULL);
+ if (error == ENOMEM) {
+ splx(s);
+ return (ENOMEM);
+ }
pfsync_sendout(sc);
}
- crit_exit();
+ splx(s);
break;
struct mbuf *m;
int len;
- MGETHDR(m, MB_DONTWAIT, MT_DATA);
+ MGETHDR(m, M_DONTWAIT, MT_DATA);
if (m == NULL) {
sc->sc_if.if_oerrors++;
return (NULL);
len = sizeof(struct pfsync_header) +
sizeof(struct pfsync_state_bus);
break;
+ case PFSYNC_ACT_TDB_UPD:
+ len = (sc->sc_maxcount * sizeof(struct pfsync_tdb)) +
+ sizeof(struct pfsync_header);
+ break;
default:
len = (sc->sc_maxcount * sizeof(struct pfsync_state)) +
sizeof(struct pfsync_header);
}
if (len > MHLEN) {
- MCLGET(m, MB_DONTWAIT);
+ MCLGET(m, M_DONTWAIT);
if ((m->m_flags & M_EXT) == 0) {
m_free(m);
sc->sc_if.if_oerrors++;
h->af = 0;
h->count = 0;
h->action = action;
+ if (action != PFSYNC_ACT_TDB_UPD)
+ bcopy(&pf_status.pf_chksum, &h->pf_chksum,
+ PF_MD5_DIGEST_LENGTH);
*sp = (void *)((char *)h + PFSYNC_HDRLEN);
- callout_reset(&sc->sc_tmo, hz, pfsync_timeout,
- LIST_FIRST(&pfsync_list));
+ if (action == PFSYNC_ACT_TDB_UPD)
+ timeout_add(&sc->sc_tdb_tmo, hz);
+ else
+ timeout_add(&sc->sc_tmo, hz);
return (m);
}
int
-pfsync_pack_state(u_int8_t action, struct pf_state *st, int compress)
+pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags)
{
- struct ifnet *ifp = &(LIST_FIRST(&pfsync_list))->sc_if;
- struct pfsync_softc *sc = ifp->if_softc;
+ struct ifnet *ifp = NULL;
+ struct pfsync_softc *sc = pfsyncif;
struct pfsync_header *h, *h_net;
struct pfsync_state *sp = NULL;
struct pfsync_state_upd *up = NULL;
struct pfsync_state_del *dp = NULL;
struct pf_rule *r;
u_long secs;
- int ret = 0;
+ int s, ret = 0;
u_int8_t i = 255, newaction = 0;
+ if (sc == NULL)
+ return (0);
+ ifp = &sc->sc_if;
+
/*
* If a packet falls in the forest and there's nobody around to
* hear, does it make a sound?
*/
- if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL) {
+ if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL &&
+ sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
/* Don't leave any stale pfsync packets hanging around. */
if (sc->sc_mbuf != NULL) {
m_freem(sc->sc_mbuf);
if (action >= PFSYNC_ACT_MAX)
return (EINVAL);
- crit_enter();
+ s = splnet();
if (sc->sc_mbuf == NULL) {
if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action,
(void *)&sc->sc_statep.s)) == NULL) {
- crit_exit();
+ splx(s);
return (ENOMEM);
}
h = mtod(sc->sc_mbuf, struct pfsync_header *);
pfsync_sendout(sc);
if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action,
(void *)&sc->sc_statep.s)) == NULL) {
- crit_exit();
+ splx(s);
return (ENOMEM);
}
h = mtod(sc->sc_mbuf, struct pfsync_header *);
secs = time_second;
- st->pfsync_time = mycpu->gd_time_seconds;
- TAILQ_REMOVE(&state_updates, st, u.s.entry_updates);
- TAILQ_INSERT_TAIL(&state_updates, st, u.s.entry_updates);
+ st->pfsync_time = time_uptime;
if (sp == NULL) {
/* not a "duplicate" update */
bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
sp->creation = htonl(secs - st->creation);
- sp->packets[0] = htonl(st->packets[0]);
- sp->packets[1] = htonl(st->packets[1]);
- sp->bytes[0] = htonl(st->bytes[0]);
- sp->bytes[1] = htonl(st->bytes[1]);
+ pf_state_counter_hton(st->packets[0], sp->packets[0]);
+ pf_state_counter_hton(st->packets[1], sp->packets[1]);
+ pf_state_counter_hton(st->bytes[0], sp->bytes[0]);
+ pf_state_counter_hton(st->bytes[1], sp->bytes[1]);
if ((r = st->rule.ptr) == NULL)
sp->rule = htonl(-1);
else
sp->allow_opts = st->allow_opts;
sp->timeout = st->timeout;
- sp->sync_flags = st->sync_flags & PFSTATE_NOSYNC;
+ if (flags & PFSYNC_FLAG_STALE)
+ sp->sync_flags |= PFSTATE_STALE;
}
pf_state_peer_hton(&st->src, &sp->src);
sp->expire = htonl(st->expire - secs);
/* do we need to build "compressed" actions for network transfer? */
- if (sc->sc_sync_ifp && compress) {
+ if (sc->sc_sync_ifp && flags & PFSYNC_FLAG_COMPRESS) {
switch (action) {
case PFSYNC_ACT_UPD:
newaction = PFSYNC_ACT_UPD_C;
if (sc->sc_mbuf_net == NULL) {
if ((sc->sc_mbuf_net = pfsync_get_mbuf(sc, newaction,
(void *)&sc->sc_statep_net.s)) == NULL) {
- crit_exit();
+ splx(s);
return (ENOMEM);
}
}
(sc->sc_maxupdates && (sp->updates >= sc->sc_maxupdates)))
ret = pfsync_sendout(sc);
- crit_exit();
+ splx(s);
return (ret);
}
int
pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src)
{
- struct ifnet *ifp = &(LIST_FIRST(&pfsync_list))->sc_if;
+ struct ifnet *ifp = NULL;
struct pfsync_header *h;
- struct pfsync_softc *sc = ifp->if_softc;
+ struct pfsync_softc *sc = pfsyncif;
struct pfsync_state_upd_req *rup;
int ret = 0;
+ if (sc == NULL)
+ return (0);
+
+ ifp = &sc->sc_if;
if (sc->sc_mbuf == NULL) {
if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ,
- (void *)&sc->sc_statep.s)) == NULL) {
+ (void *)&sc->sc_statep.s)) == NULL)
return (ENOMEM);
- }
h = mtod(sc->sc_mbuf, struct pfsync_header *);
} else {
h = mtod(sc->sc_mbuf, struct pfsync_header *);
if (h->action != PFSYNC_ACT_UREQ) {
pfsync_sendout(sc);
if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ,
- (void *)&sc->sc_statep.s)) == NULL) {
+ (void *)&sc->sc_statep.s)) == NULL)
return (ENOMEM);
- }
h = mtod(sc->sc_mbuf, struct pfsync_header *);
}
}
int
pfsync_clear_states(u_int32_t creatorid, char *ifname)
{
- struct ifnet *ifp = &(LIST_FIRST(&pfsync_list))->sc_if;
- struct pfsync_softc *sc = ifp->if_softc;
+ struct ifnet *ifp = NULL;
+ struct pfsync_softc *sc = pfsyncif;
struct pfsync_state_clr *cp;
- int ret;
+ int s, ret;
- crit_enter();
+ if (sc == NULL)
+ return (0);
+
+ ifp = &sc->sc_if;
+ s = splnet();
if (sc->sc_mbuf != NULL)
pfsync_sendout(sc);
if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR,
(void *)&sc->sc_statep.c)) == NULL) {
- crit_exit();
+ splx(s);
return (ENOMEM);
}
sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*cp);
strlcpy(cp->ifname, ifname, IFNAMSIZ);
ret = (pfsync_sendout(sc));
- crit_exit();
+ splx(s);
return (ret);
}
pfsync_timeout(void *v)
{
struct pfsync_softc *sc = v;
+ int s;
- crit_enter();
+ s = splnet();
pfsync_sendout(sc);
- crit_exit();
+ splx(s);
}
void
+pfsync_tdb_timeout(void *v)
+{
+ struct pfsync_softc *sc = v;
+ int s;
+
+ s = splnet();
+ pfsync_tdb_sendout(sc);
+ splx(s);
+}
+
+/* This must be called in splnet() */
+void
pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status)
{
struct pfsync_state_bus *bus;
bus = sc->sc_statep.b;
bus->creatorid = pf_status.hostid;
bus->status = status;
- bus->endtime = htonl(mycpu->gd_time_seconds - sc->sc_ureq_received);
+ bus->endtime = htonl(time_uptime - sc->sc_ureq_received);
pfsync_sendout(sc);
}
}
pfsync_bulk_update(void *v)
{
struct pfsync_softc *sc = v;
- int i = 0;
+ int s, i = 0;
struct pf_state *state;
- crit_enter();
+ s = splnet();
if (sc->sc_mbuf != NULL)
pfsync_sendout(sc);
* Grab at most PFSYNC_BULKPACKETS worth of states which have not
* been sent since the latest request was made.
*/
- while ((state = TAILQ_FIRST(&state_updates)) != NULL &&
- ++i < (sc->sc_maxcount * PFSYNC_BULKPACKETS)) {
- if (state->pfsync_time > sc->sc_ureq_received) {
- /* we're done */
- pfsync_send_bus(sc, PFSYNC_BUS_END);
- sc->sc_ureq_received = 0;
- callout_stop(&sc->sc_bulk_tmo);
- if (pf_status.debug >= PF_DEBUG_MISC)
- kprintf("pfsync: bulk update complete\n");
- break;
- } else {
- /* send an update and move to end of list */
- if (!state->sync_flags)
+ state = sc->sc_bulk_send_next;
+ if (state)
+ do {
+ /* send state update if syncable and not already sent */
+ if (!state->sync_flags
+ && state->timeout < PFTM_MAX
+ && state->pfsync_time <= sc->sc_ureq_received) {
pfsync_pack_state(PFSYNC_ACT_UPD, state, 0);
- state->pfsync_time = mycpu->gd_time_seconds;
- TAILQ_REMOVE(&state_updates, state, u.s.entry_updates);
- TAILQ_INSERT_TAIL(&state_updates, state,
- u.s.entry_updates);
-
- /* look again for more in a bit */
- callout_reset(&sc->sc_bulk_tmo, 1, pfsync_timeout,
- LIST_FIRST(&pfsync_list));
- }
+ i++;
+ }
+
+ /* figure next state to send */
+ state = TAILQ_NEXT(state, u.s.entry_list);
+
+ /* wrap to start of list if we hit the end */
+ if (!state)
+ state = TAILQ_FIRST(&state_list);
+ } while (i < sc->sc_maxcount * PFSYNC_BULKPACKETS &&
+ state != sc->sc_bulk_terminator);
+
+ if (!state || state == sc->sc_bulk_terminator) {
+ /* we're done */
+ pfsync_send_bus(sc, PFSYNC_BUS_END);
+ sc->sc_ureq_received = 0;
+ sc->sc_bulk_send_next = NULL;
+ sc->sc_bulk_terminator = NULL;
+ timeout_del(&sc->sc_bulk_tmo);
+ if (pf_status.debug >= PF_DEBUG_MISC)
+ printf("pfsync: bulk update complete\n");
+ } else {
+ /* look again for more in a bit */
+ timeout_add(&sc->sc_bulk_tmo, 1);
+ sc->sc_bulk_send_next = state;
}
if (sc->sc_mbuf != NULL)
pfsync_sendout(sc);
- crit_exit();
+ splx(s);
}
void
pfsync_bulkfail(void *v)
{
struct pfsync_softc *sc = v;
+ int s, error;
if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
/* Try again in a bit */
- callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulkfail,
- LIST_FIRST(&pfsync_list));
- pfsync_request_update(NULL, NULL);
- pfsync_sendout(sc);
+ timeout_add(&sc->sc_bulkfail_tmo, 5 * hz);
+ s = splnet();
+ error = pfsync_request_update(NULL, NULL);
+ if (error == ENOMEM) {
+ if (pf_status.debug >= PF_DEBUG_MISC)
+ printf("pfsync: cannot allocate mbufs for "
+ "bulk update\n");
+ } else
+ pfsync_sendout(sc);
+ splx(s);
} else {
/* Pretend like the transfer was ok */
sc->sc_ureq_sent = 0;
sc->sc_bulk_tries = 0;
+#if NCARP > 0
+ if (!pfsync_sync_ok)
+ carp_group_demote_adj(&sc->sc_if, -1);
+#endif
pfsync_sync_ok = 1;
if (pf_status.debug >= PF_DEBUG_MISC)
- kprintf("pfsync: failed to receive "
+ printf("pfsync: failed to receive "
"bulk update status\n");
- callout_stop(&sc->sc_bulkfail_tmo);
+ timeout_del(&sc->sc_bulkfail_tmo);
}
}
+/* This must be called in splnet() */
int
pfsync_sendout(struct pfsync_softc *sc)
{
+#if NBPFILTER > 0
struct ifnet *ifp = &sc->sc_if;
+#endif
struct mbuf *m;
- callout_stop(&sc->sc_tmo);
+ timeout_del(&sc->sc_tmo);
if (sc->sc_mbuf == NULL)
return (0);
sc->sc_mbuf = NULL;
sc->sc_statep.s = NULL;
- KASSERT(m != NULL, ("pfsync_sendout: null mbuf"));
- BPF_MTAP(ifp, m);
+#if NBPFILTER > 0
+ if (ifp->if_bpf)
+ bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
+#endif
if (sc->sc_mbuf_net) {
m_freem(m);
sc->sc_statep_net.s = NULL;
}
- if (sc->sc_sync_ifp) {
- struct ip *ip;
- struct ifaddr *ifa;
- struct sockaddr sa;
+ return pfsync_sendout_mbuf(sc, m);
+}
+
+int
+pfsync_tdb_sendout(struct pfsync_softc *sc)
+{
+#if NBPFILTER > 0
+ struct ifnet *ifp = &sc->sc_if;
+#endif
+ struct mbuf *m;
+
+ timeout_del(&sc->sc_tdb_tmo);
+
+ if (sc->sc_mbuf_tdb == NULL)
+ return (0);
+ m = sc->sc_mbuf_tdb;
+ sc->sc_mbuf_tdb = NULL;
+ sc->sc_statep_tdb.t = NULL;
+
+#if NBPFILTER > 0
+ if (ifp->if_bpf)
+ bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
+#endif
+
+ return pfsync_sendout_mbuf(sc, m);
+}
- M_PREPEND(m, sizeof(struct ip), MB_DONTWAIT);
+int
+pfsync_sendout_mbuf(struct pfsync_softc *sc, struct mbuf *m)
+{
+ struct sockaddr sa;
+ struct ip *ip;
+
+ if (sc->sc_sync_ifp ||
+ sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) {
+ M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
if (m == NULL) {
pfsyncstats.pfsyncs_onomem++;
return (0);
ip->ip_v = IPVERSION;
ip->ip_hl = sizeof(*ip) >> 2;
ip->ip_tos = IPTOS_LOWDELAY;
- ip->ip_len = m->m_pkthdr.len;
-#ifdef RANDOM_IP_ID
- ip->ip_id = ip_randomid();
-#else
- ip->ip_id = ntohs(ip_id++);
-#endif
- ip->ip_off = IP_DF;
+ ip->ip_len = htons(m->m_pkthdr.len);
+ ip->ip_id = htons(ip_randomid());
+ ip->ip_off = htons(IP_DF);
ip->ip_ttl = PFSYNC_DFLTTL;
ip->ip_p = IPPROTO_PFSYNC;
ip->ip_sum = 0;
bzero(&sa, sizeof(sa));
- sa.sa_family = AF_INET;
- ifa = ifaof_ifpforaddr(&sa, sc->sc_sync_ifp);
- if (ifa == NULL)
- return (0);
- ip->ip_src.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
+ ip->ip_src.s_addr = INADDR_ANY;
- if (sc->sc_sendaddr.s_addr == htonl(INADDR_PFSYNC_GROUP))
+ if (sc->sc_sendaddr.s_addr == INADDR_PFSYNC_GROUP)
m->m_flags |= M_MCAST;
ip->ip_dst = sc->sc_sendaddr;
- sc->sc_sendaddr.s_addr = htonl(INADDR_PFSYNC_GROUP);
+ sc->sc_sendaddr.s_addr = sc->sc_sync_peer.s_addr;
pfsyncstats.pfsyncs_opackets++;
return (0);
}
-static int
-pfsync_modevent(module_t mod, int type, void *data)
+/* Update an in-kernel tdb. Silently fail if no tdb is found. */
+void
+pfsync_update_net_tdb(struct pfsync_tdb *pt)
{
- int error = 0;
+ struct tdb *tdb;
+ int s;
+
+ /* check for invalid values */
+ if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
+ (pt->dst.sa.sa_family != AF_INET &&
+ pt->dst.sa.sa_family != AF_INET6))
+ goto bad;
+
+ s = spltdb();
+ tdb = gettdb(pt->spi, &pt->dst, pt->sproto);
+ if (tdb) {
+ pt->rpl = ntohl(pt->rpl);
+ pt->cur_bytes = betoh64(pt->cur_bytes);
+
+ /* Neither replay nor byte counter should ever decrease. */
+ if (pt->rpl < tdb->tdb_rpl ||
+ pt->cur_bytes < tdb->tdb_cur_bytes) {
+ splx(s);
+ goto bad;
+ }
- switch (type) {
- case MOD_LOAD:
- LIST_INIT(&pfsync_list);
- if_clone_attach(&pfsync_cloner);
- break;
+ tdb->tdb_rpl = pt->rpl;
+ tdb->tdb_cur_bytes = pt->cur_bytes;
+ }
+ splx(s);
+ return;
+
+ bad:
+ if (pf_status.debug >= PF_DEBUG_MISC)
+ printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: "
+ "invalid value\n");
+ pfsyncstats.pfsyncs_badstate++;
+ return;
+}
- case MOD_UNLOAD:
- if_clone_detach(&pfsync_cloner);
- while (!LIST_EMPTY(&pfsync_list))
- pfsync_clone_destroy(
- &LIST_FIRST(&pfsync_list)->sc_if);
- break;
+/* One of our local tdbs have been updated, need to sync rpl with others */
+int
+pfsync_update_tdb(struct tdb *tdb, int output)
+{
+ struct ifnet *ifp = NULL;
+ struct pfsync_softc *sc = pfsyncif;
+ struct pfsync_header *h;
+ struct pfsync_tdb *pt = NULL;
+ int s, i, ret;
- default:
- error = EINVAL;
- break;
+ if (sc == NULL)
+ return (0);
+
+ ifp = &sc->sc_if;
+ if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL &&
+ sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
+ /* Don't leave any stale pfsync packets hanging around. */
+ if (sc->sc_mbuf_tdb != NULL) {
+ m_freem(sc->sc_mbuf_tdb);
+ sc->sc_mbuf_tdb = NULL;
+ sc->sc_statep_tdb.t = NULL;
+ }
+ return (0);
}
- return error;
-}
+ s = splnet();
+ if (sc->sc_mbuf_tdb == NULL) {
+ if ((sc->sc_mbuf_tdb = pfsync_get_mbuf(sc, PFSYNC_ACT_TDB_UPD,
+ (void *)&sc->sc_statep_tdb.t)) == NULL) {
+ splx(s);
+ return (ENOMEM);
+ }
+ h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *);
+ } else {
+ h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *);
+ if (h->action != PFSYNC_ACT_TDB_UPD) {
+ /*
+ * XXX will never happen as long as there's
+ * only one "TDB action".
+ */
+ pfsync_tdb_sendout(sc);
+ sc->sc_mbuf_tdb = pfsync_get_mbuf(sc,
+ PFSYNC_ACT_TDB_UPD, (void *)&sc->sc_statep_tdb.t);
+ if (sc->sc_mbuf_tdb == NULL) {
+ splx(s);
+ return (ENOMEM);
+ }
+ h = mtod(sc->sc_mbuf_tdb, struct pfsync_header *);
+ } else if (sc->sc_maxupdates) {
+ /*
+ * If it's an update, look in the packet to see if
+ * we already have an update for the state.
+ */
+ struct pfsync_tdb *u =
+ (void *)((char *)h + PFSYNC_HDRLEN);
+
+ for (i = 0; !pt && i < h->count; i++) {
+ if (tdb->tdb_spi == u->spi &&
+ tdb->tdb_sproto == u->sproto &&
+ !bcmp(&tdb->tdb_dst, &u->dst,
+ SA_LEN(&u->dst.sa))) {
+ pt = u;
+ pt->updates++;
+ }
+ u++;
+ }
+ }
+ }
-static moduledata_t pfsync_mod = {
- "pfsync",
- pfsync_modevent,
- 0
-};
+ if (pt == NULL) {
+ /* not a "duplicate" update */
+ pt = sc->sc_statep_tdb.t++;
+ sc->sc_mbuf_tdb->m_pkthdr.len =
+ sc->sc_mbuf_tdb->m_len += sizeof(struct pfsync_tdb);
+ h->count++;
+ bzero(pt, sizeof(*pt));
-#define PFSYNC_MODVER 1
+ pt->spi = tdb->tdb_spi;
+ memcpy(&pt->dst, &tdb->tdb_dst, sizeof pt->dst);
+ pt->sproto = tdb->tdb_sproto;
+ }
-DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
-MODULE_VERSION(pfsync, PFSYNC_MODVER);
+ /*
+ * When a failover happens, the master's rpl is probably above
+ * what we see here (we may be up to a second late), so
+ * increase it a bit for outbound tdbs to manage most such
+ * situations.
+ *
+ * For now, just add an offset that is likely to be larger
+ * than the number of packets we can see in one second. The RFC
+ * just says the next packet must have a higher seq value.
+ *
+ * XXX What is a good algorithm for this? We could use
+ * a rate-determined increase, but to know it, we would have
+ * to extend struct tdb.
+ * XXX pt->rpl can wrap over MAXINT, but if so the real tdb
+ * will soon be replaced anyway. For now, just don't handle
+ * this edge case.
+ */
+#define RPL_INCR 16384
+ pt->rpl = htonl(tdb->tdb_rpl + (output ? RPL_INCR : 0));
+ pt->cur_bytes = htobe64(tdb->tdb_cur_bytes);
+
+ if (h->count == sc->sc_maxcount ||
+ (sc->sc_maxupdates && (pt->updates >= sc->sc_maxupdates)))
+ ret = pfsync_tdb_sendout(sc);
+
+ splx(s);
+ return (ret);
+}
-/* $FreeBSD: src/sys/contrib/pf/net/if_pfsync.h,v 1.4 2004/06/16 23:24:00 mlaier Exp $ */
-/* $OpenBSD: if_pfsync.h,v 1.13 2004/03/22 04:54:17 mcbride Exp $ */
-/* $DragonFly: src/sys/net/pf/if_pfsync.h,v 1.2 2004/09/20 01:43:13 dillon Exp $ */
+/* $OpenBSD: if_pfsync.h,v 1.30 2006/10/31 14:49:01 henning Exp $ */
/*
- * Copyright (c) 2004 The DragonFly Project. All rights reserved.
- *
* Copyright (c) 2001 Michael Shalayeff
* All rights reserved.
*
#ifndef _NET_IF_PFSYNC_H_
#define _NET_IF_PFSYNC_H_
-#include <sys/ioccom.h>
-
-/*
- * pfvar.h is required to get struct pf_addr. Also kdump and other utilities
- * blindly include header files to try to get all the ioctl constants and
- * buildworld will fail without this. We need a better way XXX
- */
-#ifndef _NET_PFVAR_H_
-#include "pfvar.h"
-#endif
-
#define PFSYNC_ID_LEN sizeof(u_int64_t)
struct pfsync_state_scrub {
u_int16_t pfss_flags;
u_int8_t pfss_ttl; /* stashed TTL */
+#define PFSYNC_SCRUB_FLAG_VALID 0x01
u_int8_t scrub_flag;
u_int32_t pfss_ts_mod; /* timestamp modulation */
} __packed;
u_int16_t mss; /* Maximum segment size option */
u_int8_t state; /* active state level */
u_int8_t wscale; /* window scaling factor */
- u_int8_t scrub_flag;
- u_int8_t pad[5];
+ u_int8_t pad[6];
} __packed;
struct pfsync_state {
u_int32_t nat_rule;
u_int32_t creation;
u_int32_t expire;
- u_int32_t packets[2];
- u_int32_t bytes[2];
+ u_int32_t packets[2][2];
+ u_int32_t bytes[2][2];
u_int32_t creatorid;
sa_family_t af;
u_int8_t proto;
u_int8_t updates;
} __packed;
+#define PFSYNC_FLAG_COMPRESS 0x01
+#define PFSYNC_FLAG_STALE 0x02
+
+struct pfsync_tdb {
+ u_int32_t spi;
+ union sockaddr_union dst;
+ u_int32_t rpl;
+ u_int64_t cur_bytes;
+ u_int8_t sproto;
+ u_int8_t updates;
+ u_int8_t pad[2];
+} __packed;
+
struct pfsync_state_upd {
u_int32_t id[2];
struct pfsync_state_peer src;
struct pfsync_state_upd_req *r;
};
+union sc_tdb_statep {
+ struct pfsync_tdb *t;
+};
+
extern int pfsync_sync_ok;
struct pfsync_softc {
struct ifnet *sc_sync_ifp;
struct ip_moptions sc_imo;
- struct callout sc_tmo;
- struct callout sc_bulk_tmo;
- struct callout sc_bulkfail_tmo;
+ struct timeout sc_tmo;
+ struct timeout sc_tdb_tmo;
+ struct timeout sc_bulk_tmo;
+ struct timeout sc_bulkfail_tmo;
+ struct in_addr sc_sync_peer;
struct in_addr sc_sendaddr;
- struct mbuf *sc_mbuf; /* current cummulative mbuf */
- struct mbuf *sc_mbuf_net; /* current cummulative mbuf */
+ struct mbuf *sc_mbuf; /* current cumulative mbuf */
+ struct mbuf *sc_mbuf_net; /* current cumulative mbuf */
+ struct mbuf *sc_mbuf_tdb; /* dito for TDB updates */
union sc_statep sc_statep;
union sc_statep sc_statep_net;
+ union sc_tdb_statep sc_statep_tdb;
u_int32_t sc_ureq_received;
u_int32_t sc_ureq_sent;
+ struct pf_state *sc_bulk_send_next;
+ struct pf_state *sc_bulk_terminator;
int sc_bulk_tries;
int sc_maxcount; /* number of states in mtu */
int sc_maxupdates; /* number of updates/state */
- LIST_ENTRY(pfsync_softc) sc_next;
};
+
+extern struct pfsync_softc *pfsyncif;
#endif
struct pfsync_header {
u_int8_t version;
-#define PFSYNC_VERSION 2
+#define PFSYNC_VERSION 3
u_int8_t af;
u_int8_t action;
#define PFSYNC_ACT_CLR 0 /* clear all states */
#define PFSYNC_ACT_DEL_F 7 /* delete fragments */
#define PFSYNC_ACT_UREQ 8 /* request "uncompressed" state */
#define PFSYNC_ACT_BUS 9 /* Bulk Update Status */
-#define PFSYNC_ACT_MAX 10
+#define PFSYNC_ACT_TDB_UPD 10 /* TDB replay counter update */
+#define PFSYNC_ACT_MAX 11
u_int8_t count;
+ u_int8_t pf_chksum[PF_MD5_DIGEST_LENGTH];
} __packed;
#define PFSYNC_BULKPACKETS 1 /* # of packets per timeout */
-#define PFSYNC_MAX_BULKTRIES 12
+#define PFSYNC_MAX_BULKTRIES 12
#define PFSYNC_HDRLEN sizeof(struct pfsync_header)
#define PFSYNC_ACTIONS \
"CLR ST", "INS ST", "UPD ST", "DEL ST", \
"UPD ST COMP", "DEL ST COMP", "INS FR", "DEL FR", \
- "UPD REQ", "BLK UPD STAT"
+ "UPD REQ", "BLK UPD STAT", "TDB UPD"
#define PFSYNC_DFLTTL 255
struct pfsyncstats {
- u_long pfsyncs_ipackets; /* total input packets, IPv4 */
- u_long pfsyncs_ipackets6; /* total input packets, IPv6 */
- u_long pfsyncs_badif; /* not the right interface */
- u_long pfsyncs_badttl; /* TTL is not PFSYNC_DFLTTL */
- u_long pfsyncs_hdrops; /* packets shorter than header */
- u_long pfsyncs_badver; /* bad (incl unsupp) version */
- u_long pfsyncs_badact; /* bad action */
- u_long pfsyncs_badlen; /* data length does not match */
- u_long pfsyncs_badauth; /* bad authentication */
- u_long pfsyncs_badstate; /* insert/lookup failed */
-
- u_long pfsyncs_opackets; /* total output packets, IPv4 */
- u_long pfsyncs_opackets6; /* total output packets, IPv6 */
- u_long pfsyncs_onomem; /* no memory for an mbuf for a send */
- u_long pfsyncs_oerrors; /* ip output error */
+ u_int64_t pfsyncs_ipackets; /* total input packets, IPv4 */
+ u_int64_t pfsyncs_ipackets6; /* total input packets, IPv6 */
+ u_int64_t pfsyncs_badif; /* not the right interface */
+ u_int64_t pfsyncs_badttl; /* TTL is not PFSYNC_DFLTTL */
+ u_int64_t pfsyncs_hdrops; /* packets shorter than hdr */
+ u_int64_t pfsyncs_badver; /* bad (incl unsupp) version */
+ u_int64_t pfsyncs_badact; /* bad action */
+ u_int64_t pfsyncs_badlen; /* data length does not match */
+ u_int64_t pfsyncs_badauth; /* bad authentication */
+ u_int64_t pfsyncs_stale; /* stale state */
+ u_int64_t pfsyncs_badval; /* bad values */
+ u_int64_t pfsyncs_badstate; /* insert/lookup failed */
+
+ u_int64_t pfsyncs_opackets; /* total output packets, IPv4 */
+ u_int64_t pfsyncs_opackets6; /* total output packets, IPv6 */
+ u_int64_t pfsyncs_onomem; /* no memory for an mbuf */
+ u_int64_t pfsyncs_oerrors; /* ip output error */
};
/*
* Configuration structure for SIOCSETPFSYNC SIOCGETPFSYNC
*/
struct pfsyncreq {
- char pfsyncr_syncif[IFNAMSIZ];
- int pfsyncr_maxupdates;
- int pfsyncr_authlevel;
+ char pfsyncr_syncdev[IFNAMSIZ];
+ struct in_addr pfsyncr_syncpeer;
+ int pfsyncr_maxupdates;
+ int pfsyncr_authlevel;
};
-#define SIOCSETPFSYNC _IOW('i', 247, struct ifreq)
-#define SIOCGETPFSYNC _IOWR('i', 248, struct ifreq)
#define pf_state_peer_hton(s,d) do { \
(d)->mss = htons((s)->mss); \
(d)->state = (s)->state; \
(d)->wscale = (s)->wscale; \
+ if ((s)->scrub) { \
+ (d)->scrub.pfss_flags = \
+ htons((s)->scrub->pfss_flags & PFSS_TIMESTAMP); \
+ (d)->scrub.pfss_ttl = (s)->scrub->pfss_ttl; \
+ (d)->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);\
+ (d)->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID; \
+ } \
} while (0)
#define pf_state_peer_ntoh(s,d) do { \
(d)->mss = ntohs((s)->mss); \
(d)->state = (s)->state; \
(d)->wscale = (s)->wscale; \
+ if ((s)->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID && \
+ (d)->scrub != NULL) { \
+ (d)->scrub->pfss_flags = \
+ ntohs((s)->scrub.pfss_flags) & PFSS_TIMESTAMP; \
+ (d)->scrub->pfss_ttl = (s)->scrub.pfss_ttl; \
+ (d)->scrub->pfss_ts_mod = ntohl((s)->scrub.pfss_ts_mod);\
+ } \
} while (0)
#define pf_state_host_hton(s,d) do { \
(d)->port = (s)->port; \
} while (0)
+#define pf_state_counter_hton(s,d) do { \
+ d[0] = htonl((s>>32)&0xffffffff); \
+ d[1] = htonl(s&0xffffffff); \
+} while (0)
+
+#define pf_state_counter_ntoh(s,d) do { \
+ d = ntohl(s[0]); \
+ d = d<<32; \
+ d += ntohl(s[1]); \
+} while (0)
+
#ifdef _KERNEL
void pfsync_input(struct mbuf *, ...);
int pfsync_clear_states(u_int32_t, char *);
(st->proto == IPPROTO_PFSYNC)) \
st->sync_flags |= PFSTATE_NOSYNC; \
else if (!st->sync_flags) \
- pfsync_pack_state(PFSYNC_ACT_INS, (st), 1); \
+ pfsync_pack_state(PFSYNC_ACT_INS, (st), \
+ PFSYNC_FLAG_COMPRESS); \
st->sync_flags &= ~PFSTATE_FROMSYNC; \
} while (0)
#define pfsync_update_state(st) do { \
if (!st->sync_flags) \
- pfsync_pack_state(PFSYNC_ACT_UPD, (st), 1); \
+ pfsync_pack_state(PFSYNC_ACT_UPD, (st), \
+ PFSYNC_FLAG_COMPRESS); \
st->sync_flags &= ~PFSTATE_FROMSYNC; \
} while (0)
#define pfsync_delete_state(st) do { \
if (!st->sync_flags) \
- pfsync_pack_state(PFSYNC_ACT_DEL, (st), 1); \
- st->sync_flags &= ~PFSTATE_FROMSYNC; \
+ pfsync_pack_state(PFSYNC_ACT_DEL, (st), \
+ PFSYNC_FLAG_COMPRESS); \
} while (0)
+int pfsync_update_tdb(struct tdb *, int);
#endif
#endif /* _NET_IF_PFSYNC_H_ */
/* $OpenBSD: pf.c,v 1.433.2.2 2004/07/17 03:22:34 brad Exp $ */
/* add $OpenBSD: pf.c,v 1.448 2004/05/11 07:34:11 dhartmei Exp $ */
/* $DragonFly: src/sys/net/pf/pf.c,v 1.20 2008/06/05 18:06:32 swildner Exp $ */
+/* $OpenBSD: pf.c,v 1.527 2007/02/22 15:23:23 pyr Exp $ */
/*
* Copyright (c) 2004 The DragonFly Project. All rights reserved.
#include <sys/sysctl.h>
#include <sys/endian.h>
#include <vm/vm_zone.h>
+#include <sys/proc.h>
+#include <sys/kthread.h>
#include <machine/inttypes.h>
#include <netinet/tcp_var.h>
#include <netinet/udp_var.h>
#include <netinet/icmp_var.h>
+#include <netinet/if_ether.h>
#include <net/pf/pfvar.h>
#include <net/pf/if_pflog.h>
#include <net/netmsg2.h>
extern int ip_optcopy(struct ip *, struct ip *);
+extern int debug_pfugidhack;
#define DPFPRINTF(n, x) if (pf_status.debug >= (n)) kprintf x
* Global variables
*/
-struct pf_anchorqueue pf_anchors;
-struct pf_ruleset pf_main_ruleset;
struct pf_altqqueue pf_altqs[2];
struct pf_palist pf_pabuf;
struct pf_altqqueue *pf_altqs_active;
int altqs_inactive_open;
u_int32_t ticket_pabuf;
-struct callout pf_expire_to; /* expire timeout */
+struct pf_anchor_stackframe {
+ struct pf_ruleset *rs;
+ struct pf_rule *r;
+ struct pf_anchor_node *parent;
+ struct pf_anchor *child;
+} pf_anchor_stack[64];
vm_zone_t pf_src_tree_pl, pf_rule_pl;
vm_zone_t pf_state_pl, pf_altq_pl, pf_pooladdr_pl;
void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
-void pf_print_state(struct pf_state *);
-void pf_print_flags(u_int8_t);
-u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t,
- u_int8_t);
+void pf_init_threshold(struct pf_threshold *, u_int32_t,
+ u_int32_t);
+void pf_add_threshold(struct pf_threshold *);
+int pf_check_threshold(struct pf_threshold *);
+
void pf_change_ap(struct pf_addr *, u_int16_t *,
u_int16_t *, u_int16_t *, struct pf_addr *,
u_int16_t, u_int8_t, sa_family_t);
+int pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *,
+ struct tcphdr *, struct pf_state_peer *);
#ifdef INET6
void pf_change_a6(struct pf_addr *, u_int16_t *,
struct pf_addr *, u_int8_t);
void pf_send_tcp(const struct pf_rule *, sa_family_t,
const struct pf_addr *, const struct pf_addr *,
u_int16_t, u_int16_t, u_int32_t, u_int32_t,
- u_int8_t, u_int16_t, u_int16_t, u_int8_t);
+ u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
+ u_int16_t, struct ether_header *, struct ifnet *);
void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
sa_family_t, struct pf_rule *);
struct pf_rule *pf_match_translation(struct pf_pdesc *, struct mbuf *,
int pf_test_tcp(struct pf_rule **, struct pf_state **,
int, struct pfi_kif *, struct mbuf *, int,
void *, struct pf_pdesc *, struct pf_rule **,
- struct pf_ruleset **);
+ struct pf_ruleset **, struct ifqueue *, struct inpcb *);
int pf_test_udp(struct pf_rule **, struct pf_state **,
int, struct pfi_kif *, struct mbuf *, int,
void *, struct pf_pdesc *, struct pf_rule **,
- struct pf_ruleset **);
+ struct pf_ruleset **, struct ifqueue *, struct inpcb *);
int pf_test_icmp(struct pf_rule **, struct pf_state **,
int, struct pfi_kif *, struct mbuf *, int,
void *, struct pf_pdesc *, struct pf_rule **,
- struct pf_ruleset **);
+ struct pf_ruleset **, struct ifqueue *);
int pf_test_other(struct pf_rule **, struct pf_state **,
int, struct pfi_kif *, struct mbuf *, int, void *,
struct pf_pdesc *, struct pf_rule **,
- struct pf_ruleset **);
+ struct pf_ruleset **, struct ifqueue *);
int pf_test_fragment(struct pf_rule **, int,
struct pfi_kif *, struct mbuf *, void *,
struct pf_pdesc *, struct pf_rule **,
void *, struct pf_pdesc *);
int pf_test_state_icmp(struct pf_state **, int,
struct pfi_kif *, struct mbuf *, int,
- void *, struct pf_pdesc *);
+ void *, struct pf_pdesc *, u_short *);
int pf_test_state_other(struct pf_state **, int,
struct pfi_kif *, struct pf_pdesc *);
-static int pf_match_tag(struct mbuf *, struct pf_rule *,
- struct pf_rule *, int *);
+int pf_match_tag(struct mbuf *, struct pf_rule *,
+ struct pf_mtag *, int *);
+int pf_step_out_of_anchor(int *, struct pf_ruleset **,
+ int, struct pf_rule **, struct pf_rule **,
+ int *);
void pf_hash(struct pf_addr *, struct pf_addr *,
struct pf_poolhashkey *, sa_family_t);
int pf_map_addr(u_int8_t, struct pf_rule *,
struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t,
struct pf_src_node **);
void pf_route(struct mbuf **, struct pf_rule *, int,
- struct ifnet *, struct pf_state *);
+ struct ifnet *, struct pf_state *,
+ struct pf_pdesc *);
void pf_route6(struct mbuf **, struct pf_rule *, int,
- struct ifnet *, struct pf_state *);
-int pf_socket_lookup(uid_t *, gid_t *,
- int, struct pf_pdesc *);
+ struct ifnet *, struct pf_state *,
+ struct pf_pdesc *);
u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t,
sa_family_t);
u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t,
int pf_addr_wrap_neq(struct pf_addr_wrap *,
struct pf_addr_wrap *);
struct pf_state *pf_find_state_recurse(struct pfi_kif *,
- struct pf_state *, u_int8_t);
-
-struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX];
+ struct pf_state_cmp *, u_int8_t);
+int pf_src_connlimit(struct pf_state **);
+int pf_check_congestion(struct ifqueue *);
+
+extern int pf_end_threads;
+
+struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
+ { &pf_state_pl, PFSTATE_HIWAT },
+ { &pf_src_tree_pl, PFSNODE_HIWAT },
+ { &pf_frent_pl, PFFRAG_FRENT_HIWAT },
+ { &pfr_ktable_pl, PFR_KTABLE_HIWAT },
+ { &pfr_kentry_pl, PFR_KENTRY_HIWAT }
+};
#define STATE_LOOKUP() \
do { \
if (direction == PF_IN) \
- *state = pf_find_state_recurse( \
+ *state = pf_find_state_recurse( \
kif, &key, PF_EXT_GWY); \
else \
*state = pf_find_state_recurse( \
kif, &key, PF_LAN_EXT); \
- if (*state == NULL) \
+ if (*state == NULL || (*state)->timeout == PFTM_PURGE) \
return (PF_DROP); \
if (direction == PF_OUT && \
(((*state)->rule.ptr->rt == PF_ROUTETO && \
(s)->lan.addr.addr32[3] != (s)->gwy.addr.addr32[3])) || \
(s)->lan.port != (s)->gwy.port
-#define BOUND_IFACE(r, k) (((r)->rule_flag & PFRULE_IFBOUND) ? (k) : \
- ((r)->rule_flag & PFRULE_GRBOUND) ? (k)->pfik_parent : \
- (k)->pfik_parent->pfik_parent)
+#define BOUND_IFACE(r, k) \
+ ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all
+
+#define STATE_INC_COUNTERS(s) \
+ do { \
+ s->rule.ptr->states++; \
+ if (s->anchor.ptr != NULL) \
+ s->anchor.ptr->states++; \
+ if (s->nat_rule.ptr != NULL) \
+ s->nat_rule.ptr->states++; \
+ } while (0)
+
+#define STATE_DEC_COUNTERS(s) \
+ do { \
+ if (s->nat_rule.ptr != NULL) \
+ s->nat_rule.ptr->states--; \
+ if (s->anchor.ptr != NULL) \
+ s->anchor.ptr->states--; \
+ s->rule.ptr->states--; \
+ } while (0)
-static int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
-static int pf_state_compare_lan_ext(struct pf_state *,
+static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
+static __inline int pf_state_compare_lan_ext(struct pf_state *,
struct pf_state *);
-static int pf_state_compare_ext_gwy(struct pf_state *,
+static __inline int pf_state_compare_ext_gwy(struct pf_state *,
struct pf_state *);
-static int pf_state_compare_id(struct pf_state *,
+static __inline int pf_state_compare_id(struct pf_state *,
struct pf_state *);
struct pf_src_tree tree_src_tracking;
struct pf_state_tree_id tree_id;
-struct pf_state_queue state_updates;
+struct pf_state_queue state_list;
RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
RB_GENERATE(pf_state_tree_lan_ext, pf_state,
RB_GENERATE(pf_state_tree_id, pf_state,
u.s.entry_id, pf_state_compare_id);
-static int
+static __inline int
pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
{
int diff;
return(hv);
}
-static int
+static __inline int
pf_state_compare_lan_ext(struct pf_state *a, struct pf_state *b)
{
int diff;
return (0);
}
-static int
+static __inline int
pf_state_compare_ext_gwy(struct pf_state *a, struct pf_state *b)
{
int diff;
return (0);
}
-static int
+static __inline int
pf_state_compare_id(struct pf_state *a, struct pf_state *b)
{
if (a->id > b->id)
break;
}
}
-#endif
+#endif /* INET6 */
struct pf_state *
-pf_find_state_byid(struct pf_state *key)
+pf_find_state_byid(struct pf_state_cmp *key)
{
pf_status.fcounters[FCNT_STATE_SEARCH]++;
- return (RB_FIND(pf_state_tree_id, &tree_id, key));
+ return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key));
}
struct pf_state *
-pf_find_state_recurse(struct pfi_kif *kif, struct pf_state *key, u_int8_t tree)
+pf_find_state_recurse(struct pfi_kif *kif, struct pf_state_cmp *key, u_int8_t tree)
{
struct pf_state *s;
switch (tree) {
case PF_LAN_EXT:
- for (; kif != NULL; kif = kif->pfik_parent) {
- s = RB_FIND(pf_state_tree_lan_ext,
- &kif->pfik_lan_ext, key);
- if (s != NULL)
- return (s);
- }
+ if ((s = RB_FIND(pf_state_tree_lan_ext, &kif->pfik_lan_ext,
+ (struct pf_state *)key)) != NULL)
+ return (s);
+ if ((s = RB_FIND(pf_state_tree_lan_ext, &pfi_all->pfik_lan_ext,
+ (struct pf_state *)key)) != NULL)
+ return (s);
return (NULL);
case PF_EXT_GWY:
- for (; kif != NULL; kif = kif->pfik_parent) {
- s = RB_FIND(pf_state_tree_ext_gwy,
- &kif->pfik_ext_gwy, key);
- if (s != NULL)
- return (s);
- }
+ if ((s = RB_FIND(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy,
+ (struct pf_state *)key)) != NULL)
+ return (s);
+ if ((s = RB_FIND(pf_state_tree_ext_gwy, &pfi_all->pfik_ext_gwy,
+ (struct pf_state *)key)) != NULL)
+ return (s);
return (NULL);
default:
panic("pf_find_state_recurse");
}
struct pf_state *
-pf_find_state_all(struct pf_state *key, u_int8_t tree, int *more)
+pf_find_state_all(struct pf_state_cmp *key, u_int8_t tree, int *more)
{
struct pf_state *s, *ss = NULL;
struct pfi_kif *kif;
case PF_LAN_EXT:
TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) {
s = RB_FIND(pf_state_tree_lan_ext,
- &kif->pfik_lan_ext, key);
+ &kif->pfik_lan_ext, (struct pf_state *)key);
if (s == NULL)
continue;
if (more == NULL)
case PF_EXT_GWY:
TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) {
s = RB_FIND(pf_state_tree_ext_gwy,
- &kif->pfik_ext_gwy, key);
+ &kif->pfik_ext_gwy, (struct pf_state *)key);
if (s == NULL)
continue;
if (more == NULL)
}
}
+void
+pf_init_threshold(struct pf_threshold *threshold,
+ u_int32_t limit, u_int32_t seconds)
+{
+ threshold->limit = limit * PF_THRESHOLD_MULT;
+ threshold->seconds = seconds;
+ threshold->count = 0;
+ threshold->last = time_second;
+}
+
+void
+pf_add_threshold(struct pf_threshold *threshold)
+{
+ u_int32_t t = time_second, diff = t - threshold->last;
+
+ if (diff >= threshold->seconds)
+ threshold->count = 0;
+ else
+ threshold->count -= threshold->count * diff /
+ threshold->seconds;
+ threshold->count += PF_THRESHOLD_MULT;
+ threshold->last = t;
+}
+
+int
+pf_check_threshold(struct pf_threshold *threshold)
+{
+ return (threshold->count > threshold->limit);
+}
+
+int
+pf_src_connlimit(struct pf_state **state)
+{
+ struct pf_state *s;
+ int bad = 0;
+
+ (*state)->src_node->conn++;
+ (*state)->src.tcp_est = 1;
+ pf_add_threshold(&(*state)->src_node->conn_rate);
+
+ if ((*state)->rule.ptr->max_src_conn &&
+ (*state)->rule.ptr->max_src_conn <
+ (*state)->src_node->conn) {
+ pf_status.lcounters[LCNT_SRCCONN]++;
+ bad++;
+ }
+
+ if ((*state)->rule.ptr->max_src_conn_rate.limit &&
+ pf_check_threshold(&(*state)->src_node->conn_rate)) {
+ pf_status.lcounters[LCNT_SRCCONNRATE]++;
+ bad++;
+ }
+
+ if (!bad)
+ return (0);
+
+ if ((*state)->rule.ptr->overload_tbl) {
+ struct pfr_addr p;
+ u_int32_t killed = 0;
+
+ pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
+ if (pf_status.debug >= PF_DEBUG_MISC) {
+ kprintf("pf_src_connlimit: blocking address ");
+ pf_print_host(&(*state)->src_node->addr, 0,
+ (*state)->af);
+ }
+
+ bzero(&p, sizeof(p));
+ p.pfra_af = (*state)->af;
+ switch ((*state)->af) {
+#ifdef INET
+ case AF_INET:
+ p.pfra_net = 32;
+ p.pfra_ip4addr = (*state)->src_node->addr.v4;
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ p.pfra_net = 128;
+ p.pfra_ip6addr = (*state)->src_node->addr.v6;
+ break;
+#endif /* INET6 */
+ }
+
+ pfr_insert_kentry((*state)->rule.ptr->overload_tbl,
+ &p, time_second);
+
+ /* kill existing states if that's required. */
+ if ((*state)->rule.ptr->flush) {
+ pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
+
+ RB_FOREACH(s, pf_state_tree_id, &tree_id) {
+ /*
+ * Kill states from this source. (Only those
+ * from the same rule if PF_FLUSH_GLOBAL is not
+ * set)
+ */
+ if (s->af == (*state)->af &&
+ (((*state)->direction == PF_OUT &&
+ PF_AEQ(&(*state)->src_node->addr,
+ &s->lan.addr, s->af)) ||
+ ((*state)->direction == PF_IN &&
+ PF_AEQ(&(*state)->src_node->addr,
+ &s->ext.addr, s->af))) &&
+ ((*state)->rule.ptr->flush &
+ PF_FLUSH_GLOBAL ||
+ (*state)->rule.ptr == s->rule.ptr)) {
+ s->timeout = PFTM_PURGE;
+ s->src.state = s->dst.state =
+ TCPS_CLOSED;
+ killed++;
+ }
+ }
+ if (pf_status.debug >= PF_DEBUG_MISC)
+ kprintf(", %u states killed", killed);
+ }
+ if (pf_status.debug >= PF_DEBUG_MISC)
+ kprintf("\n");
+ }
+
+ /* kill this state */
+ (*state)->timeout = PFTM_PURGE;
+ (*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
+ return (1);
+}
+
int
pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
struct pf_addr *src, sa_family_t af)
if (!rule->max_src_nodes ||
rule->src_nodes < rule->max_src_nodes)
(*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT);
+ else
+ pf_status.lcounters[LCNT_SRCNODES]++;
if ((*sn) == NULL)
return (-1);
bzero(*sn, sizeof(struct pf_src_node));
+
+ pf_init_threshold(&(*sn)->conn_rate,
+ rule->max_src_conn_rate.limit,
+ rule->max_src_conn_rate.seconds);
+
(*sn)->af = af;
if (rule->rule_flag & PFRULE_RULESRCTRACK ||
rule->rpool.opts & PF_POOL_STICKYADDR)
pf_status.src_nodes++;
} else {
if (rule->max_src_states &&
- (*sn)->states >= rule->max_src_states)
+ (*sn)->states >= rule->max_src_states) {
+ pf_status.lcounters[LCNT_SRCSTATES]++;
return (-1);
+ }
}
return (0);
}
RB_REMOVE(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state);
return (-1);
}
- TAILQ_INSERT_HEAD(&state_updates, state, u.s.entry_updates);
-
+ TAILQ_INSERT_TAIL(&state_list, state, u.s.entry_list);
pf_status.fcounters[FCNT_STATE_INSERT]++;
pf_status.states++;
- pfi_attach_state(kif);
+ pfi_kif_ref(kif, PFI_KIF_REF_STATE);
#if NPFSYNC
pfsync_insert_state(state);
#endif
}
void
-pf_purge_timeout(void *arg)
+pf_purge_thread(void *v)
{
- struct callout *to = arg;
+ int nloops = 0;
+ int locked = 0;
+
+ for (;;) {
+ tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz);
+
+ lockmgr(&pf_consistency_lock, LK_EXCLUSIVE);
+
+ if (pf_end_threads) {
+ pf_purge_expired_states(pf_status.states, 1);
+ pf_purge_expired_fragments();
+ pf_purge_expired_src_nodes(1);
+ pf_end_threads++;
+
+ lockmgr(&pf_consistency_lock, LK_RELEASE);
+ wakeup(pf_purge_thread);
+ kthread_exit();
+ }
+ crit_enter();
+
+ /* process a fraction of the state table every second */
+ if(!pf_purge_expired_states(1 + (pf_status.states
+ / pf_default_rule.timeout[PFTM_INTERVAL]), 0)) {
- crit_enter();
- pf_purge_expired_states();
- pf_purge_expired_fragments();
- pf_purge_expired_src_nodes();
- crit_exit();
+ pf_purge_expired_states(1 + (pf_status.states
+ / pf_default_rule.timeout[PFTM_INTERVAL]), 1);
+ }
- callout_reset(to, pf_default_rule.timeout[PFTM_INTERVAL] * hz,
- pf_purge_timeout, to);
+ /* purge other expired types every PFTM_INTERVAL seconds */
+ if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
+ pf_purge_expired_fragments();
+ if (!pf_purge_expired_src_nodes(locked)) {
+ pf_purge_expired_src_nodes(1);
+ }
+ nloops = 0;
+ }
+ crit_exit();
+ lockmgr(&pf_consistency_lock, LK_RELEASE);
+ }
}
u_int32_t
return (time_second);
if (state->timeout == PFTM_UNTIL_PACKET)
return (0);
- KASSERT((state->timeout < PFTM_MAX),
- ("pf_state_expires: timeout > PFTM_MAX"));
+ KKASSERT(state->timeout != PFTM_UNLINKED);
+ KASSERT((state->timeout < PFTM_MAX),
+ ("pf_state_expires: timeout > PFTM_MAX"));
timeout = state->rule.ptr->timeout[state->timeout];
if (!timeout)
timeout = pf_default_rule.timeout[state->timeout];
return (state->expire + timeout);
}
-void
-pf_purge_expired_src_nodes(void)
+int
+pf_purge_expired_src_nodes(int waslocked)
{
struct pf_src_node *cur, *next;
+ int locked = waslocked;
for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
if (cur->states <= 0 && cur->expire <= time_second) {
+ if (! locked) {
+ lockmgr(&pf_consistency_lock, LK_EXCLUSIVE);
+ next = RB_NEXT(pf_src_tree,
+ &tree_src_tracking, cur);
+ locked = 1;
+ }
if (cur->rule.ptr != NULL) {
cur->rule.ptr->src_nodes--;
if (cur->rule.ptr->states <= 0 &&
pool_put(&pf_src_tree_pl, cur);
}
}
+
+ if (locked && !waslocked)
+ lockmgr(&pf_consistency_lock, LK_RELEASE);
+ return(1);
}
void
u_int32_t timeout;
if (s->src_node != NULL) {
+ if (s->proto == IPPROTO_TCP) {
+ if (s->src.tcp_est)
+ --s->src_node->conn;
+ }
if (--s->src_node->states <= 0) {
timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
if (!timeout)
s->src_node = s->nat_src_node = NULL;
}
-static int
-pf_purge_expired_states_callback(struct pf_state *cur, void *data __unused)
+/* callers should be at crit_enter() */
+void
+pf_unlink_state(struct pf_state *cur)
{
- if (pf_state_expires(cur) <= time_second) {
- RB_REMOVE(pf_state_tree_ext_gwy,
- &cur->u.s.kif->pfik_ext_gwy, cur);
- RB_REMOVE(pf_state_tree_lan_ext,
- &cur->u.s.kif->pfik_lan_ext, cur);
- RB_REMOVE(pf_state_tree_id, &tree_id, cur);
- if (cur->src.state == PF_TCPS_PROXY_DST) {
- pf_send_tcp(cur->rule.ptr, cur->af,
- &cur->ext.addr, &cur->lan.addr,
- cur->ext.port, cur->lan.port,
- cur->src.seqhi, cur->src.seqlo + 1, 0,
- TH_RST|TH_ACK, 0, 0);
- }
+ if (cur->src.state == PF_TCPS_PROXY_DST) {
+ pf_send_tcp(cur->rule.ptr, cur->af,
+ &cur->ext.addr, &cur->lan.addr,
+ cur->ext.port, cur->lan.port,
+ cur->src.seqhi, cur->src.seqlo + 1,
+ TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL);
+ }
+ RB_REMOVE(pf_state_tree_ext_gwy,
+ &cur->u.s.kif->pfik_ext_gwy, cur);
+ RB_REMOVE(pf_state_tree_lan_ext,
+ &cur->u.s.kif->pfik_lan_ext, cur);
+ RB_REMOVE(pf_state_tree_id, &tree_id, cur);
#if NPFSYNC
+ if (cur->creatorid == pf_status.hostid)
pfsync_delete_state(cur);
#endif
- pf_src_tree_remove_state(cur);
- if (--cur->rule.ptr->states <= 0 &&
- cur->rule.ptr->src_nodes <= 0)
- pf_rm_rule(NULL, cur->rule.ptr);
- if (cur->nat_rule.ptr != NULL)
- if (--cur->nat_rule.ptr->states <= 0 &&
- cur->nat_rule.ptr->src_nodes <= 0)
- pf_rm_rule(NULL, cur->nat_rule.ptr);
- if (cur->anchor.ptr != NULL)
- if (--cur->anchor.ptr->states <= 0)
- pf_rm_rule(NULL, cur->anchor.ptr);
- pf_normalize_tcp_cleanup(cur);
- pfi_detach_state(cur->u.s.kif);
- TAILQ_REMOVE(&state_updates, cur, u.s.entry_updates);
- pool_put(&pf_state_pl, cur);
- pf_status.fcounters[FCNT_STATE_REMOVALS]++;
- pf_status.states--;
- }
- return(0);
+ cur->timeout = PFTM_UNLINKED;
+ pf_src_tree_remove_state(cur);
}
+/* callers should be at crit_enter() and hold the
+ * write_lock on pf_consistency_lock */
void
-pf_purge_expired_states(void)
+pf_free_state(struct pf_state *cur)
{
- RB_SCAN(pf_state_tree_id, &tree_id, NULL,
- pf_purge_expired_states_callback, NULL);
+#if NPFSYNC
+ if (pfsyncif != NULL &&
+ (pfsyncif->sc_bulk_send_next == cur ||
+ pfsyncif->sc_bulk_terminator == cur))
+ return;
+#endif
+ KKASSERT(cur->timeout == PFTM_UNLINKED);
+ if (--cur->rule.ptr->states <= 0 &&
+ cur->rule.ptr->src_nodes <= 0)
+ pf_rm_rule(NULL, cur->rule.ptr);
+ if (cur->nat_rule.ptr != NULL)
+ if (--cur->nat_rule.ptr->states <= 0 &&
+ cur->nat_rule.ptr->src_nodes <= 0)
+ pf_rm_rule(NULL, cur->nat_rule.ptr);
+ if (cur->anchor.ptr != NULL)
+ if (--cur->anchor.ptr->states <= 0)
+ pf_rm_rule(NULL, cur->anchor.ptr);
+ pf_normalize_tcp_cleanup(cur);
+ pfi_kif_unref(cur->u.s.kif, PFI_KIF_REF_STATE);
+ TAILQ_REMOVE(&state_list, cur, u.s.entry_list);
+ if (cur->tag)
+ pf_tag_unref(cur->tag);
+ pool_put(&pf_state_pl, cur);
+ pf_status.fcounters[FCNT_STATE_REMOVALS]++;
+ pf_status.states--;
}
+int
+pf_purge_expired_states(u_int32_t maxcheck, int waslocked)
+{
+ static struct pf_state *cur = NULL;
+ struct pf_state *next;
+ int locked = waslocked;
+
+ while (maxcheck--) {
+ /* wrap to start of list when we hit the end */
+ if (cur == NULL) {
+ cur = TAILQ_FIRST(&state_list);
+ if (cur == NULL)
+ break; /* list empty */
+ }
+
+ /* get next state, as cur may get deleted */
+ next = TAILQ_NEXT(cur, u.s.entry_list);
+
+ if (cur->timeout == PFTM_UNLINKED) {
+ /* free unlinked state */
+ if (! locked) {
+ lockmgr(&pf_consistency_lock, LK_EXCLUSIVE);
+ locked = 1;
+ }
+ pf_free_state(cur);
+ } else if (pf_state_expires(cur) <= time_second) {
+ /* unlink and free expired state */
+ pf_unlink_state(cur);
+ if (! locked) {
+ if (!lockmgr(&pf_consistency_lock, LK_EXCLUSIVE))
+ return (0);
+ locked = 1;
+ }
+ pf_free_state(cur);
+ }
+ cur = next;
+ }
+
+ if (locked)
+ lockmgr(&pf_consistency_lock, LK_RELEASE);
+ return (1);
+}
int
pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
PF_SET_SKIP_STEPS(PF_SKIP_AF);
if (cur->proto != prev->proto)
PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
- if (cur->src.not != prev->src.not ||
+ if (cur->src.neg != prev->src.neg ||
pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
if (cur->src.port[0] != prev->src.port[0] ||
cur->src.port[1] != prev->src.port[1] ||
cur->src.port_op != prev->src.port_op)
PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
- if (cur->dst.not != prev->dst.not ||
+ if (cur->dst.neg != prev->dst.neg ||
pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
if (cur->dst.port[0] != prev->dst.port[0] ||
case PF_ADDR_DYNIFTL:
return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
case PF_ADDR_NOROUTE:
+ case PF_ADDR_URPFFAILED:
return (0);
case PF_ADDR_TABLE:
return (aw1->p.tbl != aw2->p.tbl);
+ case PF_ADDR_RTLABEL:
+ return (aw1->v.rtlabel != aw2->v.rtlabel);
default:
kprintf("invalid address type: %d\n", aw1->type);
return (1);
}
}
-void
-pf_update_anchor_rules(void)
-{
- struct pf_rule *rule;
- int i;
-
- for (i = 0; i < PF_RULESET_MAX; ++i)
- TAILQ_FOREACH(rule, pf_main_ruleset.rules[i].active.ptr,
- entries)
- if (rule->anchorname[0])
- rule->anchor = pf_find_anchor(rule->anchorname);
- else
- rule->anchor = NULL;
-}
-
u_int16_t
pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
{
}
}
+
+/*
+ * Need to modulate the sequence numbers in the TCP SACK option
+ * (credits to Krzysztof Pfaff for report and patch)
+ */
+int
+pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
+ struct tcphdr *th, struct pf_state_peer *dst)
+{
+ int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
+ u_int8_t opts[TCP_MAXOLEN], *opt = opts;
+ int copyback = 0, i, olen;
+ struct raw_sackblock sack;
+
+#define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2)
+ if (hlen < TCPOLEN_SACKLEN ||
+ !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af))
+ return 0;
+
+ while (hlen >= TCPOLEN_SACKLEN) {
+ olen = opt[1];
+ switch (*opt) {
+ case TCPOPT_EOL: /* FALLTHROUGH */
+ case TCPOPT_NOP:
+ opt++;
+ hlen--;
+ break;
+ case TCPOPT_SACK:
+ if (olen > hlen)
+ olen = hlen;
+ if (olen >= TCPOLEN_SACKLEN) {
+ for (i = 2; i + TCPOLEN_SACK <= olen;
+ i += TCPOLEN_SACK) {
+ memcpy(&sack, &opt[i], sizeof(sack));
+ pf_change_a(&sack.rblk_start, &th->th_sum,
+ htonl(ntohl(sack.rblk_start) -
+ dst->seqdiff), 0);
+ pf_change_a(&sack.rblk_end, &th->th_sum,
+ htonl(ntohl(sack.rblk_end) -
+ dst->seqdiff), 0);
+ memcpy(&opt[i], &sack, sizeof(sack));
+ }
+ copyback = 1;
+ }
+ /* FALLTHROUGH */
+ default:
+ if (olen < 2)
+ olen = 2;
+ hlen -= olen;
+ opt += olen;
+ }
+ }
+
+ if (copyback)
+ m_copyback(m, off + sizeof(*th), thoptlen, opts);
+ return (copyback);
+}
+
void
pf_send_tcp(const struct pf_rule *r, sa_family_t af,
const struct pf_addr *saddr, const struct pf_addr *daddr,
u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
- u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl)
+ u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
+ u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp)
{
struct mbuf *m;
int len = 0, tlen;
struct ip6_hdr *h6 = NULL;
#endif /* INET6 */
struct tcphdr *th = NULL;
- char *opt;
+ char *opt;
+ struct pf_mtag *pf_mtag;
/* maximum segment size tcp option */
tlen = sizeof(struct tcphdr);
m = m_gethdr(MB_DONTWAIT, MT_HEADER);
if (m == NULL)
return;
- m->m_pkthdr.fw_flags = PF_MBUF_GENERATED;
+ if ((pf_mtag = pf_get_mtag(m)) == NULL) {
+ m_freem(m);
+ return;
+ }
+ if (tag)
+ pf_mtag->flags |= PF_TAG_GENERATED;
+
+ pf_mtag->tag = rtag;
+
+ if (r != NULL && r->rtableid >= 0)
+ pf_mtag->rtableid = r->rtableid;
+
#ifdef ALTQ
if (r != NULL && r->qid) {
- m->m_pkthdr.fw_flags |= ALTQ_MBUF_TAGGED;
- m->m_pkthdr.altq_qid = r->qid;
- m->m_pkthdr.ecn_af = af;
- m->m_pkthdr.header = mtod(m, struct ip *);
+ pf_mtag->qid = r->qid;
+ /* add hints for ecn */
+ pf_mtag->af = af;
+ pf_mtag->hdr = mtod(m, struct ip *);
}
-#endif
+#endif /* ALTQ */
m->m_data += max_linkhdr;
m->m_pkthdr.len = m->m_len = len;
m->m_pkthdr.rcvif = NULL;
h->ip_off = path_mtu_discovery ? IP_DF : 0;
h->ip_ttl = ttl ? ttl : ip_defttl;
h->ip_sum = 0;
- ip_output(m, NULL, NULL, 0, NULL, NULL);
+ if (eh == NULL) {
+ ip_output(m, NULL, NULL, 0, NULL, NULL);
+ } else {
+ struct route ro;
+ struct rtentry rt;
+ struct ether_header *e = (void *)ro.ro_dst.sa_data;
+
+ if (ifp == NULL) {
+ m_freem(m);
+ return;
+ }
+ rt.rt_ifp = ifp;
+ ro.ro_rt = &rt;
+ ro.ro_dst.sa_len = sizeof(ro.ro_dst);
+ ro.ro_dst.sa_family = pseudo_AF_HDRCMPLT;
+ bcopy(eh->ether_dhost, e->ether_shost, ETHER_ADDR_LEN);
+ bcopy(eh->ether_shost, e->ether_dhost, ETHER_ADDR_LEN);
+ e->ether_type = eh->ether_type;
+ /* XXX_IMPORT: later */
+ ip_output(m, (void *)NULL, &ro, 0,
+ (void *)NULL, (void *)NULL);
+ }
break;
#endif /* INET */
#ifdef INET6
pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
struct pf_rule *r)
{
+ struct pf_mtag *pf_mtag;
struct mbuf *m0;
- m0 = m_copypacket(m, MB_DONTWAIT);
- if (m0 == NULL)
+ m0 = m_copy(m, 0, M_COPYALL);
+
+ if ((pf_mtag = pf_get_mtag(m0)) == NULL)
return;
- m0->m_pkthdr.fw_flags |= PF_MBUF_GENERATED;
+ pf_mtag->flags |= PF_TAG_GENERATED;
+
+ if (r->rtableid >= 0)
+ pf_mtag->rtableid = r->rtableid;
#ifdef ALTQ
if (r->qid) {
- m->m_pkthdr.fw_flags |= ALTQ_MBUF_TAGGED;
- m->m_pkthdr.altq_qid = r->qid;
- m->m_pkthdr.ecn_af = af;
- m->m_pkthdr.header = mtod(m0, struct ip *);
+ pf_mtag->qid = r->qid;
+ /* add hints for ecn */
+ pf_mtag->af = af;
+ pf_mtag->hdr = mtod(m0, struct ip *);
}
-#endif
+#endif /* ALTQ */
switch (af) {
#ifdef INET
return (pf_match(op, a1, a2, g));
}
-static int
-pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_rule *nat_rule,
- int *tag)
+struct pf_mtag *
+pf_find_mtag(struct mbuf *m)
{
- if (*tag == -1) { /* find mbuf tag */
- if (nat_rule != NULL && nat_rule->tag)
- *tag = nat_rule->tag;
- else if (m->m_pkthdr.fw_flags & PF_MBUF_TAGGED)
- *tag = m->m_pkthdr.pf_tag;
- else
- *tag = 0;
+ struct m_tag *mtag;
+
+ if ((mtag = m_tag_find(m, PF_MBUF_TAGGED, NULL)) == NULL)
+ return (NULL);
+
+ return ((struct pf_mtag *)(mtag + 1));
+}
+
+struct pf_mtag *
+pf_get_mtag(struct mbuf *m)
+{
+ struct m_tag *mtag;
+
+ if ((mtag = m_tag_find(m, PF_MBUF_TAGGED, NULL)) == NULL) {
+ mtag = m_tag_get(PF_MBUF_TAGGED, sizeof(struct pf_mtag),
+ M_NOWAIT);
+ if (mtag == NULL)
+ return (NULL);
+ bzero(mtag + 1, sizeof(struct pf_mtag));
+ m_tag_prepend(m, mtag);
}
+ return ((struct pf_mtag *)(mtag + 1));
+}
+
+int
+pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_mtag *pf_mtag,
+ int *tag)
+{
+ if (*tag == -1)
+ *tag = pf_mtag->tag;
+
return ((!r->match_tag_not && r->match_tag == *tag) ||
(r->match_tag_not && r->match_tag != *tag));
}
-void
-pf_tag_packet(struct mbuf *m, int tag)
+int
+pf_tag_packet(struct mbuf *m, struct pf_mtag *pf_mtag, int tag, int rtableid)
{
- if (tag <= 0)
- return;
+ if (tag <= 0 && rtableid < 0)
+ return (0);
+
+ if (pf_mtag == NULL)
+ if ((pf_mtag = pf_get_mtag(m)) == NULL)
+ return (1);
+ if (tag > 0)
+ pf_mtag->tag = tag;
+ if (rtableid >= 0)
+ pf_mtag->rtableid = rtableid;
- m->m_pkthdr.fw_flags |= PF_MBUF_TAGGED;
- m->m_pkthdr.pf_tag = tag;
+ return (0);
}
-#define PF_STEP_INTO_ANCHOR(r, a, s, n) \
- do { \
- if ((r) == NULL || (r)->anchor == NULL || \
- (s) != NULL || (a) != NULL) \
- panic("PF_STEP_INTO_ANCHOR"); \
- (a) = (r); \
- (s) = TAILQ_FIRST(&(r)->anchor->rulesets); \
- (r) = NULL; \
- while ((s) != NULL && ((r) = \
- TAILQ_FIRST((s)->rules[n].active.ptr)) == NULL) \
- (s) = TAILQ_NEXT((s), entries); \
- if ((r) == NULL) { \
- (r) = TAILQ_NEXT((a), entries); \
- (a) = NULL; \
- } \
- } while (0)
+static void
+pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n,
+ struct pf_rule **r, struct pf_rule **a, int *match)
+{
+ struct pf_anchor_stackframe *f;
+
+ (*r)->anchor->match = 0;
+ if (match)
+ *match = 0;
+ if (*depth >= sizeof(pf_anchor_stack) /
+ sizeof(pf_anchor_stack[0])) {
+ kprintf("pf_step_into_anchor: stack overflow\n");
+ *r = TAILQ_NEXT(*r, entries);
+ return;
+ } else if (*depth == 0 && a != NULL)
+ *a = *r;
+ f = pf_anchor_stack + (*depth)++;
+ f->rs = *rs;
+ f->r = *r;
+ if ((*r)->anchor_wildcard) {
+ f->parent = &(*r)->anchor->children;
+ if ((f->child = RB_MIN(pf_anchor_node, f->parent)) ==
+ NULL) {
+ *r = NULL;
+ return;
+ }
+ *rs = &f->child->ruleset;
+ } else {
+ f->parent = NULL;
+ f->child = NULL;
+ *rs = &(*r)->anchor->ruleset;
+ }
+ *r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
+}
-#define PF_STEP_OUT_OF_ANCHOR(r, a, s, n) \
- do { \
- if ((r) != NULL || (a) == NULL || (s) == NULL) \
- panic("PF_STEP_OUT_OF_ANCHOR"); \
- (s) = TAILQ_NEXT((s), entries); \
- while ((s) != NULL && ((r) = \
- TAILQ_FIRST((s)->rules[n].active.ptr)) == NULL) \
- (s) = TAILQ_NEXT((s), entries); \
- if ((r) == NULL) { \
- (r) = TAILQ_NEXT((a), entries); \
- (a) = NULL; \
- } \
- } while (0)
+int
+pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n,
+ struct pf_rule **r, struct pf_rule **a, int *match)
+{
+ struct pf_anchor_stackframe *f;
+ int quick = 0;
+
+ do {
+ if (*depth <= 0)
+ break;
+ f = pf_anchor_stack + *depth - 1;
+ if (f->parent != NULL && f->child != NULL) {
+ if (f->child->match ||
+ (match != NULL && *match)) {
+ f->r->anchor->match = 1;
+ *match = 0;
+ }
+ f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
+ if (f->child != NULL) {
+ *rs = &f->child->ruleset;
+ *r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
+ if (*r == NULL)
+ continue;
+ else
+ break;
+ }
+ }
+ (*depth)--;
+ if (*depth == 0 && a != NULL)
+ *a = NULL;
+ *rs = f->rs;
+ if (f->r->anchor->match || (match != NULL && *match))
+ quick = f->r->quick;
+ *r = TAILQ_NEXT(f->r, entries);
+ } while (*r == NULL);
+
+ return (quick);
+}
#ifdef INET6
void
if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
return (1);
if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
- if (af == AF_INET) {
+ switch (af) {
+#ifdef INET
+ case AF_INET:
if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
(rpool->opts & PF_POOL_TYPEMASK) !=
PF_POOL_ROUNDROBIN)
return (1);
raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
- } else {
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
(rpool->opts & PF_POOL_TYPEMASK) !=
PF_POOL_ROUNDROBIN)
return (1);
raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
+ break;
+#endif /* INET6 */
}
} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
switch (af) {
#ifdef INET
case AF_INET:
- rpool->counter.addr32[0] = karc4random();
+ rpool->counter.addr32[0] = htonl(karc4random());
break;
#endif /* INET */
#ifdef INET6
case AF_INET6:
if (rmask->addr32[3] != 0xffffffff)
- rpool->counter.addr32[3] = karc4random();
+ rpool->counter.addr32[3] =
+ htonl(karc4random());
else
break;
if (rmask->addr32[2] != 0xffffffff)
- rpool->counter.addr32[2] = karc4random();
+ rpool->counter.addr32[2] =
+ htonl(karc4random());
else
break;
if (rmask->addr32[1] != 0xffffffff)
- rpool->counter.addr32[1] = karc4random();
+ rpool->counter.addr32[1] =
+ htonl(karc4random());
else
break;
if (rmask->addr32[0] != 0xffffffff)
- rpool->counter.addr32[0] = karc4random();
+ rpool->counter.addr32[0] =
+ htonl(karc4random());
break;
#endif /* INET6 */
}
get_addr:
PF_ACPY(naddr, &rpool->counter, af);
+ if (init_addr != NULL && PF_AZERO(init_addr, af))
+ PF_ACPY(init_addr, naddr, af);
PF_AINC(&rpool->counter, af);
break;
}
struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
struct pf_src_node **sn)
{
- struct pf_state key;
+ struct pf_state_cmp key;
struct pf_addr init_addr;
u_int16_t cut;
if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
return (1);
+ if (proto == IPPROTO_ICMP) {
+ low = 1;
+ high = 65535;
+ }
+
do {
key.af = af;
key.proto = proto;
* port search; start random, step;
* similar 2 portloop in in_pcbbind
*/
- if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP)) {
- key.gwy.port = 0;
+ if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
+ proto == IPPROTO_ICMP)) {
+ key.gwy.port = dport;
if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
return (0);
} else if (low == 0 && high == 0) {
high = tmp;
}
/* low < high */
- cut = karc4random() % (1 + high - low) + low;
+ cut = htonl(karc4random()) % (1 + high - low) + low;
/* low <= cut <= high */
for (tmp = cut; tmp <= high; ++(tmp)) {
key.gwy.port = htons(tmp);
int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport,
struct pf_addr *daddr, u_int16_t dport, int rs_num)
{
- struct pf_rule *r, *rm = NULL, *anchorrule = NULL;
+ struct pf_rule *r, *rm = NULL;
struct pf_ruleset *ruleset = NULL;
+ int tag = -1;
+ int rtableid = -1;
+ int asd = 0;
r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
while (r && rm == NULL) {
}
r->evaluations++;
- if (r->kif != NULL &&
- (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
+ if (pfi_kif_match(r->kif, kif) == r->ifnot)
r = r->skip[PF_SKIP_IFP].ptr;
else if (r->direction && r->direction != direction)
r = r->skip[PF_SKIP_DIR].ptr;
r = r->skip[PF_SKIP_AF].ptr;
else if (r->proto && r->proto != pd->proto)
r = r->skip[PF_SKIP_PROTO].ptr;
- else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, src->not))
+ else if (PF_MISMATCHAW(&src->addr, saddr, pd->af,
+ src->neg, kif))
r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
PF_SKIP_DST_ADDR].ptr;
else if (src->port_op && !pf_match_port(src->port_op,
r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
PF_SKIP_DST_PORT].ptr;
else if (dst != NULL &&
- PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->not))
+ PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL))
r = r->skip[PF_SKIP_DST_ADDR].ptr;
- else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, 0))
+ else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
+ 0, NULL))
r = TAILQ_NEXT(r, entries);
else if (dst != NULL && dst->port_op &&
!pf_match_port(dst->port_op, dst->port[0],
dst->port[1], dport))
r = r->skip[PF_SKIP_DST_PORT].ptr;
+ else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
+ r = TAILQ_NEXT(r, entries);
else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
off, pd->hdr.tcp), r->os_fingerprint)))
r = TAILQ_NEXT(r, entries);
- else if (r->anchorname[0] && r->anchor == NULL)
- r = TAILQ_NEXT(r, entries);
- else if (r->anchor == NULL)
+ else {
+ if (r->tag)
+ tag = r->tag;
+ if (r->rtableid >= 0)
+ rtableid = r->rtableid;
+ if (r->anchor == NULL) {
rm = r;
- else
- PF_STEP_INTO_ANCHOR(r, anchorrule, ruleset, rs_num);
- if (r == NULL && anchorrule != NULL)
- PF_STEP_OUT_OF_ANCHOR(r, anchorrule, ruleset,
- rs_num);
+ } else
+ pf_step_into_anchor(&asd, &ruleset, rs_num,
+ &r, NULL, NULL);
+ }
+ if (r == NULL)
+ pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r,
+ NULL, NULL);
}
+ if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid))
+ return (NULL);
if (rm != NULL && (rm->action == PF_NONAT ||
rm->action == PF_NORDR || rm->action == PF_NOBINAT))
return (NULL);
switch (direction) {
case PF_OUT:
if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
- if (pd->af == AF_INET) {
+ switch (pd->af) {
+#ifdef INET
+ case AF_INET:
if (r->rpool.cur->addr.p.dyn->
pfid_acnt4 < 1)
return (NULL);
&r->rpool.cur->addr.p.dyn->
pfid_mask4,
saddr, AF_INET);
- } else {
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
if (r->rpool.cur->addr.p.dyn->
pfid_acnt6 < 1)
return (NULL);
&r->rpool.cur->addr.p.dyn->
pfid_mask6,
saddr, AF_INET6);
+ break;
+#endif /* INET6 */
}
} else
PF_POOLMASK(naddr,
saddr, pd->af);
break;
case PF_IN:
- if (r->src.addr.type == PF_ADDR_DYNIFTL){
- if (pd->af == AF_INET) {
+ if (r->src.addr.type == PF_ADDR_DYNIFTL) {
+ switch (pd->af) {
+#ifdef INET
+ case AF_INET:
if (r->src.addr.p.dyn->
pfid_acnt4 < 1)
return (NULL);
&r->src.addr.p.dyn->
pfid_mask4,
daddr, AF_INET);
- } else {
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
if (r->src.addr.p.dyn->
pfid_acnt6 < 1)
return (NULL);
&r->src.addr.p.dyn->
pfid_mask6,
daddr, AF_INET6);
+ break;
+#endif /* INET6 */
}
} else
PF_POOLMASK(naddr,
}
break;
case PF_RDR: {
- if (pf_map_addr(r->af, r, saddr, naddr, NULL, sn))
+ if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn))
return (NULL);
-
+ if ((r->rpool.opts & PF_POOL_TYPEMASK) ==
+ PF_POOL_BITMASK)
+ PF_POOLMASK(naddr, naddr,
+ &r->rpool.cur->addr.v.a.mask, daddr,
+ pd->af);
if (r->rpool.proxy_port[1]) {
u_int32_t tmp_nport;
#endif /* SMP */
int
-pf_socket_lookup(uid_t *uid, gid_t *gid, int direction, struct pf_pdesc *pd)
+pf_socket_lookup(int direction, struct pf_pdesc *pd, struct inpcb *inp_arg)
{
struct pf_addr *saddr, *daddr;
u_int16_t sport, dport;
#endif
int pi_cpu = 0;
- *uid = UID_MAX;
- *gid = GID_MAX;
+ if (pd == NULL)
+ return (-1);
+ pd->lookup.uid = UID_MAX;
+ pd->lookup.gid = GID_MAX;
+ pd->lookup.pid = NO_PID;
if (direction == PF_IN) {
saddr = pd->src;
daddr = pd->dst;
&daddr->v6, dport, INPLOOKUP_WILDCARD, NULL);
if (inp == NULL)
- return (0);
+ return (-1);
break;
}
/* FALLTHROUGH if SMP and on other CPU */
break;
default:
- return (0);
+ return (-1);
}
- *uid = inp->inp_socket->so_cred->cr_uid;
- *gid = inp->inp_socket->so_cred->cr_groups[0];
+ pd->lookup.uid = inp->inp_socket->so_cred->cr_uid;
+ pd->lookup.gid = inp->inp_socket->so_cred->cr_groups[0];
return (1);
}
break;
case TCPOPT_MAXSEG:
bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
+ NTOHS(mss);
/* FALLTHROUGH */
default:
optlen = opt[1];
int
pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction,
struct pfi_kif *kif, struct mbuf *m, int off, void *h,
- struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm)
+ struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
+ struct ifqueue *ifq, struct inpcb *inp)
{
struct pf_rule *nr = NULL;
struct pf_addr *saddr = pd->src, *daddr = pd->dst;
struct tcphdr *th = pd->hdr.tcp;
u_int16_t bport, nport = 0;
sa_family_t af = pd->af;
- int lookup = -1;
- uid_t uid;
- gid_t gid;
struct pf_rule *r, *a = NULL;
struct pf_ruleset *ruleset = NULL;
struct pf_src_node *nsn = NULL;
u_short reason;
int rewrite = 0;
- int tag = -1;
+ int tag = -1, rtableid = -1;
+ u_int16_t mss = tcp_mssdflt;
+ int asd = 0;
+ int match = 0;
+
+ if (pf_check_congestion(ifq)) {
+ REASON_SET(&reason, PFRES_CONGEST);
+ return (PF_DROP);
+ }
+
+ if (inp != NULL)
+ pd->lookup.done = pf_socket_lookup(direction, pd, inp);
+ else if (debug_pfugidhack) {
+ crit_exit();
+ DPFPRINTF(PF_DEBUG_MISC, ("pf: unlocked lookup\n"));
+ pd->lookup.done = pf_socket_lookup(direction, pd, inp);
+ crit_enter();
+ }
+
r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
while (r != NULL) {
r->evaluations++;
- if (r->kif != NULL &&
- (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
+ if (pfi_kif_match(r->kif, kif) == r->ifnot)
r = r->skip[PF_SKIP_IFP].ptr;
else if (r->direction && r->direction != direction)
r = r->skip[PF_SKIP_DIR].ptr;
r = r->skip[PF_SKIP_AF].ptr;
else if (r->proto && r->proto != IPPROTO_TCP)
r = r->skip[PF_SKIP_PROTO].ptr;
- else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not))
+ else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
+ r->src.neg, kif))
r = r->skip[PF_SKIP_SRC_ADDR].ptr;
else if (r->src.port_op && !pf_match_port(r->src.port_op,
r->src.port[0], r->src.port[1], th->th_sport))
r = r->skip[PF_SKIP_SRC_PORT].ptr;
- else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not))
+ else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
+ r->dst.neg, NULL))
r = r->skip[PF_SKIP_DST_ADDR].ptr;
else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
r->dst.port[0], r->dst.port[1], th->th_dport))
r = r->skip[PF_SKIP_DST_PORT].ptr;
- else if (r->tos && !(r->tos & pd->tos))
+ else if (r->tos && !(r->tos == pd->tos))
r = TAILQ_NEXT(r, entries);
else if (r->rule_flag & PFRULE_FRAGMENT)
r = TAILQ_NEXT(r, entries);
else if ((r->flagset & th->th_flags) != r->flags)
r = TAILQ_NEXT(r, entries);
- else if (r->uid.op && (lookup != -1 || (lookup =
- pf_socket_lookup(&uid, &gid, direction, pd), 1)) &&
+ else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
+ pf_socket_lookup(direction, pd, inp), 1)) &&
!pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
- uid))
+ pd->lookup.uid))
r = TAILQ_NEXT(r, entries);
- else if (r->gid.op && (lookup != -1 || (lookup =
- pf_socket_lookup(&uid, &gid, direction, pd), 1)) &&
+ else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
+ pf_socket_lookup(direction, pd, inp), 1)) &&
!pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
- gid))
+ pd->lookup.gid))
r = TAILQ_NEXT(r, entries);
else if (r->prob && r->prob <= karc4random())
r = TAILQ_NEXT(r, entries);
- else if (r->match_tag && !pf_match_tag(m, r, nr, &tag))
- r = TAILQ_NEXT(r, entries);
- else if (r->anchorname[0] && r->anchor == NULL)
+ else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
r = TAILQ_NEXT(r, entries);
else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
pf_osfp_fingerprint(pd, m, off, th), r->os_fingerprint))
else {
if (r->tag)
tag = r->tag;
+ if (r->rtableid >= 0)
+ rtableid = r->rtableid;
if (r->anchor == NULL) {
+ match = 1;
*rm = r;
*am = a;
*rsm = ruleset;
break;
r = TAILQ_NEXT(r, entries);
} else
- PF_STEP_INTO_ANCHOR(r, a, ruleset,
- PF_RULESET_FILTER);
+ pf_step_into_anchor(&asd, &ruleset,
+ PF_RULESET_FILTER, &r, &a, &match);
}
- if (r == NULL && a != NULL)
- PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
- PF_RULESET_FILTER);
+ if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
+ PF_RULESET_FILTER, &r, &a, &match))
+ break;
}
r = *rm;
a = *am;
REASON_SET(&reason, PFRES_MATCH);
- if (r->log) {
+ if (r->log || (nr != NULL && nr->natpass && nr->log)) {
if (rewrite)
m_copyback(m, off, sizeof(*th), (caddr_t)th);
- PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
+ PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
+ a, ruleset, pd);
}
if ((r->action == PF_DROP) &&
pf_send_tcp(r, af, pd->dst,
pd->src, th->th_dport, th->th_sport,
ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
- r->return_ttl);
+ r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp);
} else if ((af == AF_INET) && r->return_icmp)
pf_send_icmp(m, r->return_icmp >> 8,
r->return_icmp & 255, af, r);
r->return_icmp6 & 255, af, r);
}
- if (r->action == PF_DROP)
+ if (r->action == PF_DROP) {
return (PF_DROP);
+ }
- pf_tag_packet(m, tag);
+ if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) {
+ REASON_SET(&reason, PFRES_MEMORY);
+ return (PF_DROP);
+ }
if (r->keep_state || nr != NULL ||
(pd->flags & PFDESC_TCP_NORM)) {
len = pd->tot_len - off - (th->th_off << 2);
/* check maximums */
- if (r->max_states && (r->states >= r->max_states))
+ if (r->max_states && (r->states >= r->max_states)) {
+ pf_status.lcounters[LCNT_STATES]++;
+ REASON_SET(&reason, PFRES_MAXSTATES);
goto cleanup;
- /* src node for flter rule */
+ }
+ /* src node for filter rule */
if ((r->rule_flag & PFRULE_SRCTRACK ||
r->rpool.opts & PF_POOL_STICKYADDR) &&
- pf_insert_src_node(&sn, r, saddr, af) != 0)
+ pf_insert_src_node(&sn, r, saddr, af) != 0) {
+ REASON_SET(&reason, PFRES_SRCLIMIT);
goto cleanup;
+ }
/* src node for translation rule */
if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
((direction == PF_OUT &&
pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
- (pf_insert_src_node(&nsn, nr, saddr, af) != 0)))
+ (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
+ REASON_SET(&reason, PFRES_SRCLIMIT);
goto cleanup;
+ }
s = pool_get(&pf_state_pl, PR_NOWAIT);
if (s == NULL) {
+ REASON_SET(&reason, PFRES_MEMORY);
cleanup:
if (sn != NULL && sn->states == 0 && sn->expire == 0) {
RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
pf_status.src_nodes--;
pool_put(&pf_src_tree_pl, nsn);
}
- REASON_SET(&reason, PFRES_MEMORY);
return (PF_DROP);
}
bzero(s, sizeof(*s));
- r->states++;
- if (a != NULL)
- a->states++;
s->rule.ptr = r;
s->nat_rule.ptr = nr;
- if (s->nat_rule.ptr != NULL)
- s->nat_rule.ptr->states++;
s->anchor.ptr = a;
+ STATE_INC_COUNTERS(s);
s->allow_opts = r->allow_opts;
- s->log = r->log & 2;
+ s->log = r->log & PF_LOG_ALL;
+ if (nr != NULL)
+ s->log |= nr->log & PF_LOG_ALL;
s->proto = IPPROTO_TCP;
s->direction = direction;
s->af = af;
if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
r->keep_state == PF_STATE_MODULATE) {
/* Generate sequence number modulator */
- while ((s->src.seqdiff = karc4random()) == 0)
+ while ((s->src.seqdiff =
+ pf_new_isn(s) - s->src.seqlo) == 0)
;
pf_change_a(&th->th_seq, &th->th_sum,
htonl(s->src.seqlo + s->src.seqdiff), 0);
rewrite = 1;
} else
s->src.seqdiff = 0;
-
- /*
- * WARNING! NetBSD patched this to not scale max_win up
- * on the initial SYN, but they failed to correct the code
- * in pf_test_state_tcp() that 'undid' the scaling, and they
- * failed to remove the scale factor on successful window
- * scale negotiation (and doing so would be difficult in the
- * face of retransmission, without adding more flags to the
- * state structure).
- *
- * After discussions with Daniel Hartmeier and Max Laier
- * I've decided not to apply the NetBSD patch.
- *
- * The worst that happens is that the undo code on window
- * scale negotiation failures will produce a larger
- * max_win then actual.
- */
if (th->th_flags & TH_SYN) {
s->src.seqhi++;
s->src.wscale = pf_get_wscale(m, off, th->th_off, af);
off, pd, th, &s->src, &s->dst)) {
REASON_SET(&reason, PFRES_MEMORY);
pf_src_tree_remove_state(s);
+ STATE_DEC_COUNTERS(s);
pool_put(&pf_state_pl, s);
return (PF_DROP);
}
if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
- pf_normalize_tcp_stateful(m, off, pd, &reason, th, &s->src,
- &s->dst, &rewrite)) {
+ pf_normalize_tcp_stateful(m, off, pd, &reason, th, s,
+ &s->src, &s->dst, &rewrite)) {
+ /* This really shouldn't happen!!! */
+ DPFPRINTF(PF_DEBUG_URGENT,
+ ("pf_normalize_tcp_stateful failed on first pkt"));
pf_normalize_tcp_cleanup(s);
pf_src_tree_remove_state(s);
+ STATE_DEC_COUNTERS(s);
pool_put(&pf_state_pl, s);
return (PF_DROP);
}
if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
pf_normalize_tcp_cleanup(s);
- REASON_SET(&reason, PFRES_MEMORY);
+ REASON_SET(&reason, PFRES_STATEINS);
pf_src_tree_remove_state(s);
+ STATE_DEC_COUNTERS(s);
pool_put(&pf_state_pl, s);
return (PF_DROP);
} else
*sm = s;
+ if (tag > 0) {
+ pf_tag_ref(tag);
+ s->tag = tag;
+ }
if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
r->keep_state == PF_STATE_SYNPROXY) {
- u_int16_t mss;
-
s->src.state = PF_TCPS_PROXY_SRC;
if (nr != NULL) {
if (direction == PF_OUT) {
bport, 0, af);
}
}
- s->src.seqhi = karc4random();
+ s->src.seqhi = htonl(karc4random());
/* Find mss option */
mss = pf_get_mss(m, off, th->th_off, af);
mss = pf_calc_mss(saddr, af, mss);
s->src.mss = mss;
pf_send_tcp(r, af, daddr, saddr, th->th_dport,
th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
- TH_SYN|TH_ACK, 0, s->src.mss, 0);
+ TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL);
+ REASON_SET(&reason, PFRES_SYNPROXY);
return (PF_SYNPROXY_DROP);
}
}
int
pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction,
struct pfi_kif *kif, struct mbuf *m, int off, void *h,
- struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm)
+ struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
+ struct ifqueue *ifq, struct inpcb *inp)
{
struct pf_rule *nr = NULL;
struct pf_addr *saddr = pd->src, *daddr = pd->dst;
struct udphdr *uh = pd->hdr.udp;
u_int16_t bport, nport = 0;
sa_family_t af = pd->af;
- int lookup = -1;
- uid_t uid;
- gid_t gid;
struct pf_rule *r, *a = NULL;
struct pf_ruleset *ruleset = NULL;
struct pf_src_node *nsn = NULL;
u_short reason;
int rewrite = 0;
- int tag = -1;
-
- r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
+ int tag = -1, rtableid = -1;
+ int asd = 0;
+ int match = 0;
+
+ if (pf_check_congestion(ifq)) {
+ REASON_SET(&reason, PFRES_CONGEST);
+ return (PF_DROP);
+ }
+
+ r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
if (direction == PF_OUT) {
bport = nport = uh->uh_sport;
while (r != NULL) {
r->evaluations++;
- if (r->kif != NULL &&
- (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
+ if (pfi_kif_match(r->kif, kif) == r->ifnot)
r = r->skip[PF_SKIP_IFP].ptr;
else if (r->direction && r->direction != direction)
r = r->skip[PF_SKIP_DIR].ptr;
r = r->skip[PF_SKIP_AF].ptr;
else if (r->proto && r->proto != IPPROTO_UDP)
r = r->skip[PF_SKIP_PROTO].ptr;
- else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not))
+ else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
+ r->src.neg, kif))
r = r->skip[PF_SKIP_SRC_ADDR].ptr;
else if (r->src.port_op && !pf_match_port(r->src.port_op,
r->src.port[0], r->src.port[1], uh->uh_sport))
r = r->skip[PF_SKIP_SRC_PORT].ptr;
- else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not))
+ else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
+ r->dst.neg, NULL))
r = r->skip[PF_SKIP_DST_ADDR].ptr;
else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
r->dst.port[0], r->dst.port[1], uh->uh_dport))
r = r->skip[PF_SKIP_DST_PORT].ptr;
- else if (r->tos && !(r->tos & pd->tos))
+ else if (r->tos && !(r->tos == pd->tos))
r = TAILQ_NEXT(r, entries);
else if (r->rule_flag & PFRULE_FRAGMENT)
r = TAILQ_NEXT(r, entries);
- else if (r->uid.op && (lookup != -1 || (lookup =
- pf_socket_lookup(&uid, &gid, direction, pd), 1)) &&
+ else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
+ pf_socket_lookup(direction, pd, inp), 1)) &&
!pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
- uid))
+ pd->lookup.uid))
r = TAILQ_NEXT(r, entries);
- else if (r->gid.op && (lookup != -1 || (lookup =
- pf_socket_lookup(&uid, &gid, direction, pd), 1)) &&
+ else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
+ pf_socket_lookup(direction, pd, inp), 1)) &&
!pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
- gid))
+ pd->lookup.gid))
r = TAILQ_NEXT(r, entries);
else if (r->prob && r->prob <= karc4random())
r = TAILQ_NEXT(r, entries);
- else if (r->match_tag && !pf_match_tag(m, r, nr, &tag))
- r = TAILQ_NEXT(r, entries);
- else if (r->anchorname[0] && r->anchor == NULL)
+ else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
r = TAILQ_NEXT(r, entries);
else if (r->os_fingerprint != PF_OSFP_ANY)
r = TAILQ_NEXT(r, entries);
else {
if (r->tag)
tag = r->tag;
+ if (r->rtableid >= 0)
+ rtableid = r->rtableid;
if (r->anchor == NULL) {
+ match = 1;
*rm = r;
*am = a;
*rsm = ruleset;
break;
r = TAILQ_NEXT(r, entries);
} else
- PF_STEP_INTO_ANCHOR(r, a, ruleset,
- PF_RULESET_FILTER);
+ pf_step_into_anchor(&asd, &ruleset,
+ PF_RULESET_FILTER, &r, &a, &match);
}
- if (r == NULL && a != NULL)
- PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
- PF_RULESET_FILTER);
+ if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
+ PF_RULESET_FILTER, &r, &a, &match))
+ break;
}
r = *rm;
a = *am;
REASON_SET(&reason, PFRES_MATCH);
- if (r->log) {
+ if (r->log || (nr != NULL && nr->natpass && nr->log)) {
if (rewrite)
m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
- PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
+ PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
+ a, ruleset, pd);
}
if ((r->action == PF_DROP) &&
if (r->action == PF_DROP)
return (PF_DROP);
- pf_tag_packet(m, tag);
+ if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) {
+ REASON_SET(&reason, PFRES_MEMORY);
+ return (PF_DROP);
+ }
if (r->keep_state || nr != NULL) {
/* create new state */
struct pf_src_node *sn = NULL;
/* check maximums */
- if (r->max_states && (r->states >= r->max_states))
+ if (r->max_states && (r->states >= r->max_states)) {
+ pf_status.lcounters[LCNT_STATES]++;
+ REASON_SET(&reason, PFRES_MAXSTATES);
goto cleanup;
- /* src node for flter rule */
+ }
+ /* src node for filter rule */
if ((r->rule_flag & PFRULE_SRCTRACK ||
r->rpool.opts & PF_POOL_STICKYADDR) &&
- pf_insert_src_node(&sn, r, saddr, af) != 0)
+ pf_insert_src_node(&sn, r, saddr, af) != 0) {
+ REASON_SET(&reason, PFRES_SRCLIMIT);
goto cleanup;
+ }
/* src node for translation rule */
if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
((direction == PF_OUT &&
pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
- (pf_insert_src_node(&nsn, nr, saddr, af) != 0)))
+ (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
+ REASON_SET(&reason, PFRES_SRCLIMIT);
goto cleanup;
+ }
s = pool_get(&pf_state_pl, PR_NOWAIT);
if (s == NULL) {
+ REASON_SET(&reason, PFRES_MEMORY);
cleanup:
if (sn != NULL && sn->states == 0 && sn->expire == 0) {
RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
pf_status.src_nodes--;
pool_put(&pf_src_tree_pl, nsn);
}
- REASON_SET(&reason, PFRES_MEMORY);
return (PF_DROP);
}
bzero(s, sizeof(*s));
- r->states++;
- if (a != NULL)
- a->states++;
s->rule.ptr = r;
s->nat_rule.ptr = nr;
- if (s->nat_rule.ptr != NULL)
- s->nat_rule.ptr->states++;
s->anchor.ptr = a;
+ STATE_INC_COUNTERS(s);
s->allow_opts = r->allow_opts;
- s->log = r->log & 2;
+ s->log = r->log & PF_LOG_ALL;
+ if (nr != NULL)
+ s->log |= nr->log & PF_LOG_ALL;
s->proto = IPPROTO_UDP;
s->direction = direction;
s->af = af;
s->nat_src_node->states++;
}
if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
- REASON_SET(&reason, PFRES_MEMORY);
+ REASON_SET(&reason, PFRES_STATEINS);
pf_src_tree_remove_state(s);
+ STATE_DEC_COUNTERS(s);
pool_put(&pf_state_pl, s);
return (PF_DROP);
} else
*sm = s;
+ if (tag > 0) {
+ pf_tag_ref(tag);
+ s->tag = tag;
+ }
}
/* copy back packet headers if we performed NAT operations */
int
pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction,
struct pfi_kif *kif, struct mbuf *m, int off, void *h,
- struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm)
+ struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
+ struct ifqueue *ifq)
{
struct pf_rule *nr = NULL;
struct pf_addr *saddr = pd->src, *daddr = pd->dst;
struct pf_ruleset *ruleset = NULL;
struct pf_src_node *nsn = NULL;
u_short reason;
- u_int16_t icmpid = 0;
+ u_int16_t icmpid = 0, bport, nport = 0;
sa_family_t af = pd->af;
u_int8_t icmptype = 0, icmpcode = 0;
int state_icmp = 0;
- int tag = -1;
+ int tag = -1, rtableid = -1;
#ifdef INET6
int rewrite = 0;
#endif /* INET6 */
+ int asd = 0;
+ int match = 0;
+
+ if (pf_check_congestion(ifq)) {
+ REASON_SET(&reason, PFRES_CONGEST);
+ return (PF_DROP);
+ }
switch (pd->proto) {
#ifdef INET
r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
if (direction == PF_OUT) {
+ bport = nport = icmpid;
/* check outgoing packet for BINAT/NAT */
if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
- saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
+ saddr, icmpid, daddr, icmpid, &pd->naddr, &nport)) !=
+ NULL) {
PF_ACPY(&pd->baddr, saddr, af);
switch (af) {
#ifdef INET
case AF_INET:
pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
pd->naddr.v4.s_addr, 0);
+ pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
+ pd->hdr.icmp->icmp_cksum, icmpid, nport, 0);
+ pd->hdr.icmp->icmp_id = nport;
+ m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
break;
#endif /* INET */
#ifdef INET6
pd->nat_rule = nr;
}
} else {
+ bport = nport = icmpid;
/* check incoming packet for BINAT/RDR */
if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
- saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
+ saddr, icmpid, daddr, icmpid, &pd->naddr, &nport)) !=
+ NULL) {
PF_ACPY(&pd->baddr, daddr, af);
switch (af) {
#ifdef INET
while (r != NULL) {
r->evaluations++;
- if (r->kif != NULL &&
- (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
+ if (pfi_kif_match(r->kif, kif) == r->ifnot)
r = r->skip[PF_SKIP_IFP].ptr;
else if (r->direction && r->direction != direction)
r = r->skip[PF_SKIP_DIR].ptr;
r = r->skip[PF_SKIP_AF].ptr;
else if (r->proto && r->proto != pd->proto)
r = r->skip[PF_SKIP_PROTO].ptr;
- else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not))
+ else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
+ r->src.neg, kif))
r = r->skip[PF_SKIP_SRC_ADDR].ptr;
- else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not))
+ else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
+ r->dst.neg, NULL))
r = r->skip[PF_SKIP_DST_ADDR].ptr;
else if (r->type && r->type != icmptype + 1)
r = TAILQ_NEXT(r, entries);
else if (r->code && r->code != icmpcode + 1)
r = TAILQ_NEXT(r, entries);
- else if (r->tos && !(r->tos & pd->tos))
+ else if (r->tos && !(r->tos == pd->tos))
r = TAILQ_NEXT(r, entries);
else if (r->rule_flag & PFRULE_FRAGMENT)
r = TAILQ_NEXT(r, entries);
else if (r->prob && r->prob <= karc4random())
r = TAILQ_NEXT(r, entries);
- else if (r->match_tag && !pf_match_tag(m, r, nr, &tag))
- r = TAILQ_NEXT(r, entries);
- else if (r->anchorname[0] && r->anchor == NULL)
+ else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
r = TAILQ_NEXT(r, entries);
else if (r->os_fingerprint != PF_OSFP_ANY)
r = TAILQ_NEXT(r, entries);
else {
if (r->tag)
tag = r->tag;
+ if (r->rtableid >= 0)
+ rtableid = r->rtableid;
if (r->anchor == NULL) {
+ match = 1;
*rm = r;
*am = a;
*rsm = ruleset;
break;
r = TAILQ_NEXT(r, entries);
} else
- PF_STEP_INTO_ANCHOR(r, a, ruleset,
- PF_RULESET_FILTER);
+ pf_step_into_anchor(&asd, &ruleset,
+ PF_RULESET_FILTER, &r, &a, &match);
}
- if (r == NULL && a != NULL)
- PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
- PF_RULESET_FILTER);
+ if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
+ PF_RULESET_FILTER, &r, &a, &match))
+ break;
}
r = *rm;
a = *am;
REASON_SET(&reason, PFRES_MATCH);
- if (r->log) {
+ if (r->log || (nr != NULL && nr->natpass && nr->log)) {
#ifdef INET6
if (rewrite)
m_copyback(m, off, sizeof(struct icmp6_hdr),
(caddr_t)pd->hdr.icmp6);
#endif /* INET6 */
- PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
+ PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
+ a, ruleset, pd);
}
if (r->action != PF_PASS)
return (PF_DROP);
- pf_tag_packet(m, tag);
+ if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) {
+ REASON_SET(&reason, PFRES_MEMORY);
+ return (PF_DROP);
+ }
if (!state_icmp && (r->keep_state || nr != NULL)) {
/* create new state */
struct pf_src_node *sn = NULL;
/* check maximums */
- if (r->max_states && (r->states >= r->max_states))
+ if (r->max_states && (r->states >= r->max_states)) {
+ pf_status.lcounters[LCNT_STATES]++;
+ REASON_SET(&reason, PFRES_MAXSTATES);
goto cleanup;
- /* src node for flter rule */
+ }
+ /* src node for filter rule */
if ((r->rule_flag & PFRULE_SRCTRACK ||
r->rpool.opts & PF_POOL_STICKYADDR) &&
- pf_insert_src_node(&sn, r, saddr, af) != 0)
+ pf_insert_src_node(&sn, r, saddr, af) != 0) {
+ REASON_SET(&reason, PFRES_SRCLIMIT);
goto cleanup;
+ }
/* src node for translation rule */
if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
((direction == PF_OUT &&
pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
- (pf_insert_src_node(&nsn, nr, saddr, af) != 0)))
+ (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
+ REASON_SET(&reason, PFRES_SRCLIMIT);
goto cleanup;
+ }
s = pool_get(&pf_state_pl, PR_NOWAIT);
if (s == NULL) {
+ REASON_SET(&reason, PFRES_MEMORY);
cleanup:
if (sn != NULL && sn->states == 0 && sn->expire == 0) {
RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
pf_status.src_nodes--;
pool_put(&pf_src_tree_pl, nsn);
}
- REASON_SET(&reason, PFRES_MEMORY);
return (PF_DROP);
}
bzero(s, sizeof(*s));
- r->states++;
- if (a != NULL)
- a->states++;
s->rule.ptr = r;
s->nat_rule.ptr = nr;
- if (s->nat_rule.ptr != NULL)
- s->nat_rule.ptr->states++;
s->anchor.ptr = a;
+ STATE_INC_COUNTERS(s);
s->allow_opts = r->allow_opts;
- s->log = r->log & 2;
+ s->log = r->log & PF_LOG_ALL;
+ if (nr != NULL)
+ s->log |= nr->log & PF_LOG_ALL;
s->proto = pd->proto;
s->direction = direction;
s->af = af;
if (direction == PF_OUT) {
PF_ACPY(&s->gwy.addr, saddr, af);
- s->gwy.port = icmpid;
+ s->gwy.port = nport;
PF_ACPY(&s->ext.addr, daddr, af);
- s->ext.port = icmpid;
- if (nr != NULL)
+ s->ext.port = 0;
+ if (nr != NULL) {
PF_ACPY(&s->lan.addr, &pd->baddr, af);
- else
+ s->lan.port = bport;
+ } else {
PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
- s->lan.port = icmpid;
+ s->lan.port = s->gwy.port;
+ }
} else {
PF_ACPY(&s->lan.addr, daddr, af);
- s->lan.port = icmpid;
+ s->lan.port = nport;
PF_ACPY(&s->ext.addr, saddr, af);
- s->ext.port = icmpid;
- if (nr != NULL)
+ s->ext.port = 0;
+ if (nr != NULL) {
PF_ACPY(&s->gwy.addr, &pd->baddr, af);
- else
+ s->gwy.port = bport;
+ } else {
PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
- s->gwy.port = icmpid;
+ s->gwy.port = s->lan.port;
+ }
}
s->hash = pf_state_hash(s);
s->creation = time_second;
s->nat_src_node->states++;
}
if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
- REASON_SET(&reason, PFRES_MEMORY);
+ REASON_SET(&reason, PFRES_STATEINS);
pf_src_tree_remove_state(s);
+ STATE_DEC_COUNTERS(s);
pool_put(&pf_state_pl, s);
return (PF_DROP);
} else
*sm = s;
+ if (tag > 0) {
+ pf_tag_ref(tag);
+ s->tag = tag;
+ }
}
#ifdef INET6
int
pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction,
struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
- struct pf_rule **am, struct pf_ruleset **rsm)
+ struct pf_rule **am, struct pf_ruleset **rsm, struct ifqueue *ifq)
{
struct pf_rule *nr = NULL;
struct pf_rule *r, *a = NULL;
struct pf_addr *saddr = pd->src, *daddr = pd->dst;
sa_family_t af = pd->af;
u_short reason;
- int tag = -1;
+ int tag = -1, rtableid = -1;
+ int asd = 0;
+ int match = 0;
+
+ if (pf_check_congestion(ifq)) {
+ REASON_SET(&reason, PFRES_CONGEST);
+ return (PF_DROP);
+ }
r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
while (r != NULL) {
r->evaluations++;
- if (r->kif != NULL &&
- (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
+ if (pfi_kif_match(r->kif, kif) == r->ifnot)
r = r->skip[PF_SKIP_IFP].ptr;
else if (r->direction && r->direction != direction)
r = r->skip[PF_SKIP_DIR].ptr;
r = r->skip[PF_SKIP_AF].ptr;
else if (r->proto && r->proto != pd->proto)
r = r->skip[PF_SKIP_PROTO].ptr;
- else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.not))
+ else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
+ r->src.neg, kif))
r = r->skip[PF_SKIP_SRC_ADDR].ptr;
- else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.not))
+ else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
+ r->dst.neg, NULL))
r = r->skip[PF_SKIP_DST_ADDR].ptr;
- else if (r->tos && !(r->tos & pd->tos))
+ else if (r->tos && !(r->tos == pd->tos))
r = TAILQ_NEXT(r, entries);
else if (r->rule_flag & PFRULE_FRAGMENT)
r = TAILQ_NEXT(r, entries);
else if (r->prob && r->prob <= karc4random())
r = TAILQ_NEXT(r, entries);
- else if (r->match_tag && !pf_match_tag(m, r, nr, &tag))
- r = TAILQ_NEXT(r, entries);
- else if (r->anchorname[0] && r->anchor == NULL)
+ else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
r = TAILQ_NEXT(r, entries);
else if (r->os_fingerprint != PF_OSFP_ANY)
r = TAILQ_NEXT(r, entries);
else {
if (r->tag)
tag = r->tag;
+ if (r->rtableid >= 0)
+ rtableid = r->rtableid;
if (r->anchor == NULL) {
+ match = 1;
*rm = r;
&nb