2 * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * $FreeBSD: src/sys/netinet/ip_fw2.c,v 1.6.2.12 2003/04/08 10:42:32 maxim Exp $
29 * Implement IP packet firewall (new version)
35 #error IPFIREWALL requires INET.
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/malloc.h>
42 #include <sys/kernel.h>
44 #include <sys/socket.h>
45 #include <sys/socketvar.h>
46 #include <sys/sysctl.h>
47 #include <sys/syslog.h>
48 #include <sys/ucred.h>
49 #include <sys/in_cksum.h>
50 #include <sys/limits.h>
55 #include <net/route.h>
57 #include <net/dummynet/ip_dummynet.h>
59 #include <sys/thread2.h>
60 #include <sys/mplock2.h>
61 #include <net/netmsg2.h>
63 #include <netinet/in.h>
64 #include <netinet/in_systm.h>
65 #include <netinet/in_var.h>
66 #include <netinet/in_pcb.h>
67 #include <netinet/ip.h>
68 #include <netinet/ip_var.h>
69 #include <netinet/ip_icmp.h>
70 #include <netinet/tcp.h>
71 #include <netinet/tcp_seq.h>
72 #include <netinet/tcp_timer.h>
73 #include <netinet/tcp_var.h>
74 #include <netinet/tcpip.h>
75 #include <netinet/udp.h>
76 #include <netinet/udp_var.h>
77 #include <netinet/ip_divert.h>
78 #include <netinet/if_ether.h> /* XXX for ETHERTYPE_IP */
80 #include <net/ipfw/ip_fw2.h>
82 #ifdef IPFIREWALL_DEBUG
83 #define DPRINTF(fmt, ...) \
86 kprintf(fmt, __VA_ARGS__); \
89 #define DPRINTF(fmt, ...) ((void)0)
93 * Description about per-CPU rule duplication:
95 * Module loading/unloading and all ioctl operations are serialized
96 * by netisr0, so we don't have any ordering or locking problems.
98 * Following graph shows how operation on per-CPU rule list is
99 * performed [2 CPU case]:
103 * netisr0 <------------------------------------+
109 * forwardmsg---------->netisr1 |
114 * replymsg--------------+
118 * Rule structure [2 CPU case]
122 * layer3_chain layer3_chain
125 * +-------+ sibling +-------+ sibling
126 * | rule1 |--------->| rule1 |--------->NULL
127 * +-------+ +-------+
131 * +-------+ sibling +-------+ sibling
132 * | rule2 |--------->| rule2 |--------->NULL
133 * +-------+ +-------+
136 * 1) Ease statistics calculation during IP_FW_GET. We only need to
137 * iterate layer3_chain in netisr0; the current rule's duplication
138 * to the other CPUs could safely be read-only accessed through
140 * 2) Accelerate rule insertion and deletion, e.g. rule insertion:
141 * a) In netisr0 rule3 is determined to be inserted between rule1
142 * and rule2. To make this decision we need to iterate the
143 * layer3_chain in netisr0. The netmsg, which is used to insert
144 * the rule, will contain rule1 in netisr0 as prev_rule and rule2
145 * in netisr0 as next_rule.
146 * b) After the insertion in netisr0 is done, we will move on to
147 * netisr1. But instead of relocating the rule3's position in
148 * netisr1 by iterating the layer3_chain in netisr1, we set the
149 * netmsg's prev_rule to rule1->sibling and next_rule to
150 * rule2->sibling before the netmsg is forwarded to netisr1 from
155 * Description of states and tracks.
157 * Both states and tracks are stored in per-cpu RB trees instead of
158 * per-cpu hash tables to avoid the worst case hash degeneration.
160 * The lifetimes of states and tracks are regulated by dyn_*_lifetime,
161 * measured in seconds and depending on the flags.
163 * When a packet is received, its address fields are first masked with
164 * the mask defined for the rule, then matched against the entries in
165 * the per-cpu state RB tree. States are generated by 'keep-state'
166 * and 'limit' options.
168 * The max number of states is ipfw_state_max. When we reach the
169 * maximum number of states we do not create anymore. This is done to
170 * avoid consuming too much memory, but also too much time when
171 * searching on each packet.
173 * Each state holds a pointer to the parent ipfw rule of the current
174 * CPU so we know what action to perform. States are removed when the
175 * parent rule is deleted. XXX we should make them survive.
177 * There are some limitations with states -- we do not obey the
178 * 'randomized match', and we do not do multiple passes through the
179 * firewall. XXX check the latter!!!
181 * States grow independently on each CPU, e.g. 2 CPU case:
184 * ................... ...................
185 * : state RB tree : : state RB tree :
187 * : state1 state2 : : state3 :
189 * :.....|....|......: :........|........:
194 * +-------+ +-------+
195 * | rule1 | | rule1 |
196 * +-------+ +-------+
198 * Tracks are used to enforce limits on the number of sessions. Tracks
199 * are generated by 'limit' option.
201 * The max number of tracks is ipfw_track_max. When we reach the
202 * maximum number of tracks we do not create anymore. This is done to
203 * avoid consuming too much memory.
205 * Tracks are organized into two layers, track counter RB tree is
206 * shared between CPUs, track RB tree is per-cpu. States generated by
207 * 'limit' option are linked to the track in addition to the per-cpu
208 * state RB tree; mainly to ease expiration. e.g. 2 CPU case:
210 * ..............................
211 * : track counter RB tree :
216 * : +--->counter<----+ :
218 * : | +-----------+ | :
219 * :......|................|....:
222 * ................. |t_count | .................
223 * : track RB tree : | | : track RB tree :
225 * : +-->track1-------+ +--------track2 :
228 * :.|.....|.......: :...............:
229 * | +----------------+
230 * | .................... |
231 * | : state RB tree : |st_track
233 * +---state1 state2---+
235 * :.....|.......|....:
244 #define IPFW_AUTOINC_STEP_MIN 1
245 #define IPFW_AUTOINC_STEP_MAX 1000
246 #define IPFW_AUTOINC_STEP_DEF 100
248 #define IPFW_TABLE_MAX_DEF 64
250 #define IPFW_DEFAULT_RULE 65535 /* rulenum for the default rule */
251 #define IPFW_DEFAULT_SET 31 /* set number for the default rule */
253 #define MATCH_REVERSE 0
254 #define MATCH_FORWARD 1
256 #define MATCH_UNKNOWN 3
258 #define TIME_LEQ(a, b) ((a) - (b) <= 0)
260 #define IPFW_STATE_TCPFLAGS (TH_SYN | TH_FIN | TH_RST)
261 #define IPFW_STATE_TCPSTATES (IPFW_STATE_TCPFLAGS | \
262 (IPFW_STATE_TCPFLAGS << 8))
264 #define BOTH_SYN (TH_SYN | (TH_SYN << 8))
265 #define BOTH_FIN (TH_FIN | (TH_FIN << 8))
266 #define BOTH_RST (TH_RST | (TH_RST << 8))
267 /* TH_ACK here means FIN was ACKed. */
268 #define BOTH_FINACK (TH_ACK | (TH_ACK << 8))
270 #define IPFW_STATE_TCPCLOSED(s) ((s)->st_proto == IPPROTO_TCP && \
271 (((s)->st_state & BOTH_RST) || \
272 ((s)->st_state & BOTH_FINACK) == BOTH_FINACK))
274 #define O_ANCHOR O_NOP
276 #define IPFW_ISXLAT(type) ((type) == O_REDIRECT)
277 #define IPFW_XLAT_INVALID(s) (IPFW_ISXLAT((s)->st_type) && \
278 ((struct ipfw_xlat *)(s))->xlat_invalid)
280 #define IPFW_MBUF_XLATINS FW_MBUF_PRIVATE1
281 #define IPFW_MBUF_XLATFWD FW_MBUF_PRIVATE2
283 #define IPFW_XLATE_INSERT 0x0001
284 #define IPFW_XLATE_FORWARD 0x0002
285 #define IPFW_XLATE_OUTPUT 0x0004
288 struct netmsg_base base;
289 const struct ipfw_ioc_rule *ioc_rule;
290 struct ip_fw *next_rule;
291 struct ip_fw *prev_rule;
292 struct ip_fw *sibling;
294 struct ip_fw **cross_rules;
298 struct netmsg_base base;
299 struct ip_fw *start_rule;
300 struct ip_fw *prev_rule;
307 struct netmsg_base base;
308 struct ip_fw *start_rule;
313 struct netmsg_cpstate {
314 struct netmsg_base base;
315 struct ipfw_ioc_state *ioc_state;
320 struct netmsg_tblent {
321 struct netmsg_base base;
322 struct sockaddr *key;
323 struct sockaddr *netmask;
324 struct ipfw_tblent *sibling;
328 struct netmsg_tblflush {
329 struct netmsg_base base;
334 struct netmsg_tblexp {
335 struct netmsg_base base;
340 struct radix_node_head *rnh;
343 struct ipfw_table_cp {
344 struct ipfw_ioc_tblent *te;
351 * offset The offset of a fragment. offset != 0 means that
352 * we have a fragment at this offset of an IPv4 packet.
353 * offset == 0 means that (if this is an IPv4 packet)
354 * this is the first or only fragment.
359 * Local copies of addresses. They are only valid if we have
362 * proto The protocol. Set to 0 for non-ip packets,
363 * or to the protocol read from the packet otherwise.
364 * proto != 0 means that we have an IPv4 packet.
366 * src_port, dst_port port numbers, in HOST format. Only
367 * valid for TCP and UDP packets.
369 * src_ip, dst_ip ip addresses, in NETWORK format.
370 * Only valid for IPv4 packets.
373 uint16_t src_port; /* NOTE: host format */
374 uint16_t dst_port; /* NOTE: host format */
375 struct in_addr src_ip; /* NOTE: network format */
376 struct in_addr dst_ip; /* NOTE: network format */
382 uint32_t addr1; /* host byte order */
383 uint32_t addr2; /* host byte order */
387 uint16_t port1; /* host byte order */
388 uint16_t port2; /* host byte order */
393 struct ipfw_addrs addrs;
397 struct ipfw_ports ports;
401 uint8_t swap; /* IPFW_KEY_SWAP_ */
405 #define IPFW_KEY_SWAP_ADDRS 0x1
406 #define IPFW_KEY_SWAP_PORTS 0x2
407 #define IPFW_KEY_SWAP_ALL (IPFW_KEY_SWAP_ADDRS | IPFW_KEY_SWAP_PORTS)
410 RB_ENTRY(ipfw_trkcnt) tc_rblink;
411 struct ipfw_key tc_key;
415 time_t tc_expire; /* userland get-only */
416 uint16_t tc_rulenum; /* userland get-only */
419 #define tc_addrs tc_key.addr_u.value
420 #define tc_ports tc_key.port_u.value
421 #define tc_proto tc_key.proto
422 #define tc_saddr tc_key.addr_u.addrs.addr1
423 #define tc_daddr tc_key.addr_u.addrs.addr2
424 #define tc_sport tc_key.port_u.ports.port1
425 #define tc_dport tc_key.port_u.ports.port2
427 RB_HEAD(ipfw_trkcnt_tree, ipfw_trkcnt);
432 RB_ENTRY(ipfw_track) t_rblink;
433 struct ipfw_key t_key;
434 struct ip_fw *t_rule;
436 LIST_HEAD(, ipfw_state) t_state_list;
438 volatile int *t_count;
439 struct ipfw_trkcnt *t_trkcnt;
440 TAILQ_ENTRY(ipfw_track) t_link;
443 #define t_addrs t_key.addr_u.value
444 #define t_ports t_key.port_u.value
445 #define t_proto t_key.proto
446 #define t_saddr t_key.addr_u.addrs.addr1
447 #define t_daddr t_key.addr_u.addrs.addr2
448 #define t_sport t_key.port_u.ports.port1
449 #define t_dport t_key.port_u.ports.port2
451 RB_HEAD(ipfw_track_tree, ipfw_track);
452 TAILQ_HEAD(ipfw_track_list, ipfw_track);
455 RB_ENTRY(ipfw_state) st_rblink;
456 struct ipfw_key st_key;
458 time_t st_expire; /* expire time */
459 struct ip_fw *st_rule;
461 uint64_t st_pcnt; /* packets */
462 uint64_t st_bcnt; /* bytes */
466 * State of this rule, typically a combination of TCP flags.
468 * st_ack_fwd/st_ack_rev:
469 * Most recent ACKs in forward and reverse direction. They
470 * are used to generate keepalives.
473 uint32_t st_ack_fwd; /* host byte order */
474 uint32_t st_seq_fwd; /* host byte order */
475 uint32_t st_ack_rev; /* host byte order */
476 uint32_t st_seq_rev; /* host byte order */
478 uint16_t st_flags; /* IPFW_STATE_F_ */
479 uint16_t st_type; /* KEEP_STATE/LIMIT/RDR */
480 struct ipfw_track *st_track;
482 LIST_ENTRY(ipfw_state) st_trklink;
483 TAILQ_ENTRY(ipfw_state) st_link;
486 #define st_addrs st_key.addr_u.value
487 #define st_ports st_key.port_u.value
488 #define st_proto st_key.proto
489 #define st_swap st_key.swap
491 #define IPFW_STATE_F_ACKFWD 0x0001
492 #define IPFW_STATE_F_SEQFWD 0x0002
493 #define IPFW_STATE_F_ACKREV 0x0004
494 #define IPFW_STATE_F_SEQREV 0x0008
495 #define IPFW_STATE_F_XLATSRC 0x0010
496 #define IPFW_STATE_F_XLATSLAVE 0x0020
497 #define IPFW_STATE_F_LINKED 0x0040
499 #define IPFW_STATE_SCANSKIP(s) ((s)->st_type == O_ANCHOR || \
500 ((s)->st_flags & IPFW_STATE_F_XLATSLAVE))
502 /* Expired or being deleted. */
503 #define IPFW_STATE_ISDEAD(s) (TIME_LEQ((s)->st_expire, time_uptime) || \
504 IPFW_XLAT_INVALID((s)))
506 TAILQ_HEAD(ipfw_state_list, ipfw_state);
507 RB_HEAD(ipfw_state_tree, ipfw_state);
510 struct ipfw_state xlat_st; /* MUST be the first field */
511 uint32_t xlat_addr; /* network byte order */
512 uint16_t xlat_port; /* network byte order */
513 uint16_t xlat_dir; /* MATCH_ */
514 struct ifnet *xlat_ifp; /* matching ifnet */
515 struct ipfw_xlat *xlat_pair; /* paired state */
516 int xlat_pcpu; /* paired cpu */
517 volatile int xlat_invalid; /* invalid, but not dtor yet */
518 volatile uint64_t xlat_crefs; /* cross references */
519 struct netmsg_base xlat_freenm; /* for remote free */
522 #define xlat_type xlat_st.st_type
523 #define xlat_flags xlat_st.st_flags
524 #define xlat_rule xlat_st.st_rule
525 #define xlat_bcnt xlat_st.st_bcnt
526 #define xlat_pcnt xlat_st.st_pcnt
529 struct radix_node te_nodes[2];
530 struct sockaddr_in te_key;
533 struct ipfw_tblent *te_sibling;
534 volatile int te_expired;
537 struct ipfw_context {
538 struct ip_fw *ipfw_layer3_chain; /* rules for layer3 */
539 struct ip_fw *ipfw_default_rule; /* default rule */
540 uint64_t ipfw_norule_counter; /* ipfw_log(NULL) stat*/
543 * ipfw_set_disable contains one bit per set value (0..31).
544 * If the bit is set, all rules with the corresponding set
545 * are disabled. Set IPDW_DEFAULT_SET is reserved for the
546 * default rule and CANNOT be disabled.
548 uint32_t ipfw_set_disable;
550 uint8_t ipfw_flags; /* IPFW_FLAG_ */
552 struct ip_fw *ipfw_cont_rule;
553 struct ipfw_xlat *ipfw_cont_xlat;
555 struct ipfw_state_tree ipfw_state_tree;
556 struct ipfw_state_list ipfw_state_list;
557 int ipfw_state_loosecnt;
561 struct ipfw_state state;
562 struct ipfw_track track;
563 struct ipfw_trkcnt trkcnt;
566 struct ipfw_track_tree ipfw_track_tree;
567 struct ipfw_track_list ipfw_track_list;
568 struct ipfw_trkcnt *ipfw_trkcnt_spare;
570 struct callout ipfw_stateto_ch;
571 time_t ipfw_state_lastexp;
572 struct netmsg_base ipfw_stateexp_nm;
573 struct netmsg_base ipfw_stateexp_more;
574 struct ipfw_state ipfw_stateexp_anch;
576 struct callout ipfw_trackto_ch;
577 time_t ipfw_track_lastexp;
578 struct netmsg_base ipfw_trackexp_nm;
579 struct netmsg_base ipfw_trackexp_more;
580 struct ipfw_track ipfw_trackexp_anch;
582 struct callout ipfw_keepalive_ch;
583 struct netmsg_base ipfw_keepalive_nm;
584 struct netmsg_base ipfw_keepalive_more;
585 struct ipfw_state ipfw_keepalive_anch;
587 struct callout ipfw_xlatreap_ch;
588 struct netmsg_base ipfw_xlatreap_nm;
589 struct ipfw_state_list ipfw_xlatreap;
594 u_long ipfw_sts_reap;
595 u_long ipfw_sts_reapfailed;
596 u_long ipfw_sts_overflow;
597 u_long ipfw_sts_nomem;
598 u_long ipfw_sts_tcprecycled;
600 u_long ipfw_tks_nomem;
601 u_long ipfw_tks_reap;
602 u_long ipfw_tks_reapfailed;
603 u_long ipfw_tks_overflow;
604 u_long ipfw_tks_cntnomem;
607 u_long ipfw_defraged;
608 u_long ipfw_defrag_remote;
611 u_long ipfw_xlate_split;
612 u_long ipfw_xlate_conflicts;
613 u_long ipfw_xlate_cresolved;
616 struct radix_node_head *ipfw_tables[];
619 #define IPFW_FLAG_KEEPALIVE 0x01
620 #define IPFW_FLAG_STATEEXP 0x02
621 #define IPFW_FLAG_TRACKEXP 0x04
622 #define IPFW_FLAG_STATEREAP 0x08
623 #define IPFW_FLAG_TRACKREAP 0x10
625 #define ipfw_state_tmpkey ipfw_tmpkey.state
626 #define ipfw_track_tmpkey ipfw_tmpkey.track
627 #define ipfw_trkcnt_tmpkey ipfw_tmpkey.trkcnt
630 int ipfw_state_loosecnt; /* cache aligned */
631 time_t ipfw_state_globexp __cachealign;
633 struct lwkt_token ipfw_trkcnt_token __cachealign;
634 struct ipfw_trkcnt_tree ipfw_trkcnt_tree;
636 time_t ipfw_track_globexp;
638 /* Accessed in netisr0. */
639 struct ip_fw *ipfw_crossref_free __cachealign;
640 struct callout ipfw_crossref_ch;
641 struct netmsg_base ipfw_crossref_nm;
645 * Module can not be unloaded, if there are references to
646 * certains rules of ipfw(4), e.g. dummynet(4)
648 int ipfw_refcnt __cachealign;
652 static struct ipfw_context *ipfw_ctx[MAXCPU];
654 MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's");
657 * Following two global variables are accessed and updated only
660 static uint32_t static_count; /* # of static rules */
661 static uint32_t static_ioc_len; /* bytes of static rules */
664 * If 1, then ipfw static rules are being flushed,
665 * ipfw_chk() will skip to the default rule.
667 static int ipfw_flushing;
669 static int fw_verbose;
670 static int verbose_limit;
673 static int autoinc_step = IPFW_AUTOINC_STEP_DEF;
675 static int ipfw_table_max = IPFW_TABLE_MAX_DEF;
677 static int ipfw_sysctl_enable(SYSCTL_HANDLER_ARGS);
678 static int ipfw_sysctl_autoinc_step(SYSCTL_HANDLER_ARGS);
680 TUNABLE_INT("net.inet.ip.fw.table_max", &ipfw_table_max);
682 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
683 SYSCTL_NODE(_net_inet_ip_fw, OID_AUTO, stats, CTLFLAG_RW, 0,
684 "Firewall statistics");
686 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW,
687 &fw_enable, 0, ipfw_sysctl_enable, "I", "Enable ipfw");
688 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, autoinc_step, CTLTYPE_INT | CTLFLAG_RW,
689 &autoinc_step, 0, ipfw_sysctl_autoinc_step, "I",
690 "Rule number autincrement step");
691 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO,one_pass,CTLFLAG_RW,
693 "Only do a single pass through ipfw when using dummynet(4)");
694 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, debug, CTLFLAG_RW,
695 &fw_debug, 0, "Enable printing of debug ip_fw statements");
696 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose, CTLFLAG_RW,
697 &fw_verbose, 0, "Log matches to ipfw rules");
698 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_RW,
699 &verbose_limit, 0, "Set upper limit of matches of ipfw rules logged");
700 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, table_max, CTLFLAG_RD,
701 &ipfw_table_max, 0, "Max # of tables");
703 static int ipfw_sysctl_dyncnt(SYSCTL_HANDLER_ARGS);
704 static int ipfw_sysctl_dynmax(SYSCTL_HANDLER_ARGS);
705 static int ipfw_sysctl_statecnt(SYSCTL_HANDLER_ARGS);
706 static int ipfw_sysctl_statemax(SYSCTL_HANDLER_ARGS);
707 static int ipfw_sysctl_scancnt(SYSCTL_HANDLER_ARGS);
708 static int ipfw_sysctl_stat(SYSCTL_HANDLER_ARGS);
711 * Timeouts for various events in handing states.
715 * 2 == 1~2 second(s).
717 * We use 2 seconds for FIN lifetime, so that the states will not be
718 * ripped prematurely.
720 static uint32_t dyn_ack_lifetime = 300;
721 static uint32_t dyn_syn_lifetime = 20;
722 static uint32_t dyn_finwait_lifetime = 20;
723 static uint32_t dyn_fin_lifetime = 2;
724 static uint32_t dyn_rst_lifetime = 2;
725 static uint32_t dyn_udp_lifetime = 10;
726 static uint32_t dyn_short_lifetime = 5; /* used by tracks too */
729 * Keepalives are sent if dyn_keepalive is set. They are sent every
730 * dyn_keepalive_period seconds, in the last dyn_keepalive_interval
731 * seconds of lifetime of a rule.
733 static uint32_t dyn_keepalive_interval = 20;
734 static uint32_t dyn_keepalive_period = 5;
735 static uint32_t dyn_keepalive = 1; /* do send keepalives */
737 static struct ipfw_global ipfw_gd;
738 static int ipfw_state_loosecnt_updthr;
739 static int ipfw_state_max = 4096; /* max # of states */
740 static int ipfw_track_max = 4096; /* max # of tracks */
742 static int ipfw_state_headroom; /* setup at module load time */
743 static int ipfw_state_reap_min = 8;
744 static int ipfw_state_expire_max = 32;
745 static int ipfw_state_scan_max = 256;
746 static int ipfw_keepalive_max = 8;
747 static int ipfw_track_reap_max = 4;
748 static int ipfw_track_expire_max = 16;
749 static int ipfw_track_scan_max = 128;
751 static eventhandler_tag ipfw_ifaddr_event;
754 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_count,
755 CTLTYPE_INT | CTLFLAG_RD, NULL, 0, ipfw_sysctl_dyncnt, "I",
756 "Number of states and tracks");
757 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_max,
758 CTLTYPE_INT | CTLFLAG_RW, NULL, 0, ipfw_sysctl_dynmax, "I",
759 "Max number of states and tracks");
761 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_cnt,
762 CTLTYPE_INT | CTLFLAG_RD, NULL, 0, ipfw_sysctl_statecnt, "I",
764 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_max,
765 CTLTYPE_INT | CTLFLAG_RW, NULL, 0, ipfw_sysctl_statemax, "I",
766 "Max number of states");
767 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, state_headroom, CTLFLAG_RW,
768 &ipfw_state_headroom, 0, "headroom for state reap");
769 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, track_cnt, CTLFLAG_RD,
770 &ipfw_gd.ipfw_trkcnt_cnt, 0, "Number of tracks");
771 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, track_max, CTLFLAG_RW,
772 &ipfw_track_max, 0, "Max number of tracks");
773 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count, CTLFLAG_RD,
774 &static_count, 0, "Number of static rules");
775 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, CTLFLAG_RW,
776 &dyn_ack_lifetime, 0, "Lifetime of dyn. rules for acks");
777 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, CTLFLAG_RW,
778 &dyn_syn_lifetime, 0, "Lifetime of dyn. rules for syn");
779 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, CTLFLAG_RW,
780 &dyn_fin_lifetime, 0, "Lifetime of dyn. rules for fin");
781 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_finwait_lifetime, CTLFLAG_RW,
782 &dyn_finwait_lifetime, 0, "Lifetime of dyn. rules for fin wait");
783 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, CTLFLAG_RW,
784 &dyn_rst_lifetime, 0, "Lifetime of dyn. rules for rst");
785 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, CTLFLAG_RW,
786 &dyn_udp_lifetime, 0, "Lifetime of dyn. rules for UDP");
787 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, CTLFLAG_RW,
788 &dyn_short_lifetime, 0, "Lifetime of dyn. rules for other situations");
789 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, CTLFLAG_RW,
790 &dyn_keepalive, 0, "Enable keepalives for dyn. rules");
791 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_scan_max,
792 CTLTYPE_INT | CTLFLAG_RW, &ipfw_state_scan_max, 0, ipfw_sysctl_scancnt,
793 "I", "# of states to scan for each expire iteration");
794 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_expire_max,
795 CTLTYPE_INT | CTLFLAG_RW, &ipfw_state_expire_max, 0, ipfw_sysctl_scancnt,
796 "I", "# of states to expire for each expire iteration");
797 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, keepalive_max,
798 CTLTYPE_INT | CTLFLAG_RW, &ipfw_keepalive_max, 0, ipfw_sysctl_scancnt,
799 "I", "# of states to expire for each expire iteration");
800 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_reap_min,
801 CTLTYPE_INT | CTLFLAG_RW, &ipfw_state_reap_min, 0, ipfw_sysctl_scancnt,
802 "I", "# of states to reap for state shortage");
803 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, track_scan_max,
804 CTLTYPE_INT | CTLFLAG_RW, &ipfw_track_scan_max, 0, ipfw_sysctl_scancnt,
805 "I", "# of tracks to scan for each expire iteration");
806 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, track_expire_max,
807 CTLTYPE_INT | CTLFLAG_RW, &ipfw_track_expire_max, 0, ipfw_sysctl_scancnt,
808 "I", "# of tracks to expire for each expire iteration");
809 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, track_reap_max,
810 CTLTYPE_INT | CTLFLAG_RW, &ipfw_track_reap_max, 0, ipfw_sysctl_scancnt,
811 "I", "# of tracks to reap for track shortage");
813 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_reap,
814 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
815 __offsetof(struct ipfw_context, ipfw_sts_reap), ipfw_sysctl_stat,
816 "LU", "# of state reaps due to states shortage");
817 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_reapfailed,
818 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
819 __offsetof(struct ipfw_context, ipfw_sts_reapfailed), ipfw_sysctl_stat,
820 "LU", "# of state reap failure");
821 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_overflow,
822 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
823 __offsetof(struct ipfw_context, ipfw_sts_overflow), ipfw_sysctl_stat,
824 "LU", "# of state overflow");
825 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_nomem,
826 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
827 __offsetof(struct ipfw_context, ipfw_sts_nomem), ipfw_sysctl_stat,
828 "LU", "# of state allocation failure");
829 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_tcprecycled,
830 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
831 __offsetof(struct ipfw_context, ipfw_sts_tcprecycled), ipfw_sysctl_stat,
832 "LU", "# of state deleted due to fast TCP port recycling");
834 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_nomem,
835 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
836 __offsetof(struct ipfw_context, ipfw_tks_nomem), ipfw_sysctl_stat,
837 "LU", "# of track allocation failure");
838 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_reap,
839 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
840 __offsetof(struct ipfw_context, ipfw_tks_reap), ipfw_sysctl_stat,
841 "LU", "# of track reap due to tracks shortage");
842 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_reapfailed,
843 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
844 __offsetof(struct ipfw_context, ipfw_tks_reapfailed), ipfw_sysctl_stat,
845 "LU", "# of track reap failure");
846 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_overflow,
847 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
848 __offsetof(struct ipfw_context, ipfw_tks_overflow), ipfw_sysctl_stat,
849 "LU", "# of track overflow");
850 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_cntnomem,
851 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
852 __offsetof(struct ipfw_context, ipfw_tks_cntnomem), ipfw_sysctl_stat,
853 "LU", "# of track counter allocation failure");
854 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, frags,
855 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
856 __offsetof(struct ipfw_context, ipfw_frags), ipfw_sysctl_stat,
857 "LU", "# of IP fragements defraged");
858 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, defraged,
859 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
860 __offsetof(struct ipfw_context, ipfw_defraged), ipfw_sysctl_stat,
861 "LU", "# of IP packets after defrag");
862 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, defrag_remote,
863 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
864 __offsetof(struct ipfw_context, ipfw_defrag_remote), ipfw_sysctl_stat,
865 "LU", "# of IP packets after defrag dispatched to remote cpus");
866 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, xlated,
867 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
868 __offsetof(struct ipfw_context, ipfw_xlated), ipfw_sysctl_stat,
869 "LU", "# address/port translations");
870 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, xlate_split,
871 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
872 __offsetof(struct ipfw_context, ipfw_xlate_split), ipfw_sysctl_stat,
873 "LU", "# address/port translations split between different cpus");
874 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, xlate_conflicts,
875 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
876 __offsetof(struct ipfw_context, ipfw_xlate_conflicts), ipfw_sysctl_stat,
877 "LU", "# address/port translations conflicts on remote cpu");
878 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, xlate_cresolved,
879 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
880 __offsetof(struct ipfw_context, ipfw_xlate_cresolved), ipfw_sysctl_stat,
881 "LU", "# address/port translations conflicts resolved on remote cpu");
883 static int ipfw_state_cmp(struct ipfw_state *,
884 struct ipfw_state *);
885 static int ipfw_trkcnt_cmp(struct ipfw_trkcnt *,
886 struct ipfw_trkcnt *);
887 static int ipfw_track_cmp(struct ipfw_track *,
888 struct ipfw_track *);
890 RB_PROTOTYPE(ipfw_state_tree, ipfw_state, st_rblink, ipfw_state_cmp);
891 RB_GENERATE(ipfw_state_tree, ipfw_state, st_rblink, ipfw_state_cmp);
893 RB_PROTOTYPE(ipfw_trkcnt_tree, ipfw_trkcnt, tc_rblink, ipfw_trkcnt_cmp);
894 RB_GENERATE(ipfw_trkcnt_tree, ipfw_trkcnt, tc_rblink, ipfw_trkcnt_cmp);
896 RB_PROTOTYPE(ipfw_track_tree, ipfw_track, t_rblink, ipfw_track_cmp);
897 RB_GENERATE(ipfw_track_tree, ipfw_track, t_rblink, ipfw_track_cmp);
899 static int ipfw_chk(struct ip_fw_args *);
900 static void ipfw_track_expire_ipifunc(void *);
901 static void ipfw_state_expire_ipifunc(void *);
902 static void ipfw_keepalive(void *);
903 static int ipfw_state_expire_start(struct ipfw_context *,
905 static void ipfw_crossref_timeo(void *);
906 static void ipfw_state_remove(struct ipfw_context *,
907 struct ipfw_state *);
908 static void ipfw_xlat_reap_timeo(void *);
909 static void ipfw_defrag_redispatch(struct mbuf *, int,
912 #define IPFW_TRKCNT_TOKGET lwkt_gettoken(&ipfw_gd.ipfw_trkcnt_token)
913 #define IPFW_TRKCNT_TOKREL lwkt_reltoken(&ipfw_gd.ipfw_trkcnt_token)
914 #define IPFW_TRKCNT_TOKINIT \
915 lwkt_token_init(&ipfw_gd.ipfw_trkcnt_token, "ipfw_trkcnt");
918 sa_maskedcopy(const struct sockaddr *src, struct sockaddr *dst,
919 const struct sockaddr *netmask)
921 const u_char *cp1 = (const u_char *)src;
922 u_char *cp2 = (u_char *)dst;
923 const u_char *cp3 = (const u_char *)netmask;
924 u_char *cplim = cp2 + *cp3;
925 u_char *cplim2 = cp2 + *cp1;
927 *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
932 *cp2++ = *cp1++ & *cp3++;
934 bzero(cp2, cplim2 - cp2);
937 static __inline uint16_t
938 pfil_cksum_fixup(uint16_t cksum, uint16_t old, uint16_t new, uint8_t udp)
944 l = cksum + old - new;
945 l = (l >> 16) + (l & 65535);
953 ipfw_key_build(struct ipfw_key *key, in_addr_t saddr, uint16_t sport,
954 in_addr_t daddr, uint16_t dport, uint8_t proto)
961 key->addr_u.addrs.addr1 = daddr;
962 key->addr_u.addrs.addr2 = saddr;
963 key->swap |= IPFW_KEY_SWAP_ADDRS;
965 key->addr_u.addrs.addr1 = saddr;
966 key->addr_u.addrs.addr2 = daddr;
970 key->port_u.ports.port1 = dport;
971 key->port_u.ports.port2 = sport;
972 key->swap |= IPFW_KEY_SWAP_PORTS;
974 key->port_u.ports.port1 = sport;
975 key->port_u.ports.port2 = dport;
978 if (sport == dport && (key->swap & IPFW_KEY_SWAP_ADDRS))
979 key->swap |= IPFW_KEY_SWAP_PORTS;
980 if (saddr == daddr && (key->swap & IPFW_KEY_SWAP_PORTS))
981 key->swap |= IPFW_KEY_SWAP_ADDRS;
985 ipfw_key_4tuple(const struct ipfw_key *key, in_addr_t *saddr, uint16_t *sport,
986 in_addr_t *daddr, uint16_t *dport)
989 if (key->swap & IPFW_KEY_SWAP_ADDRS) {
990 *saddr = key->addr_u.addrs.addr2;
991 *daddr = key->addr_u.addrs.addr1;
993 *saddr = key->addr_u.addrs.addr1;
994 *daddr = key->addr_u.addrs.addr2;
997 if (key->swap & IPFW_KEY_SWAP_PORTS) {
998 *sport = key->port_u.ports.port2;
999 *dport = key->port_u.ports.port1;
1001 *sport = key->port_u.ports.port1;
1002 *dport = key->port_u.ports.port2;
1007 ipfw_state_cmp(struct ipfw_state *s1, struct ipfw_state *s2)
1010 if (s1->st_proto > s2->st_proto)
1012 if (s1->st_proto < s2->st_proto)
1015 if (s1->st_addrs > s2->st_addrs)
1017 if (s1->st_addrs < s2->st_addrs)
1020 if (s1->st_ports > s2->st_ports)
1022 if (s1->st_ports < s2->st_ports)
1025 if (s1->st_swap == s2->st_swap ||
1026 (s1->st_swap ^ s2->st_swap) == IPFW_KEY_SWAP_ALL)
1029 if (s1->st_swap > s2->st_swap)
1036 ipfw_trkcnt_cmp(struct ipfw_trkcnt *t1, struct ipfw_trkcnt *t2)
1039 if (t1->tc_proto > t2->tc_proto)
1041 if (t1->tc_proto < t2->tc_proto)
1044 if (t1->tc_addrs > t2->tc_addrs)
1046 if (t1->tc_addrs < t2->tc_addrs)
1049 if (t1->tc_ports > t2->tc_ports)
1051 if (t1->tc_ports < t2->tc_ports)
1054 if (t1->tc_ruleid > t2->tc_ruleid)
1056 if (t1->tc_ruleid < t2->tc_ruleid)
1063 ipfw_track_cmp(struct ipfw_track *t1, struct ipfw_track *t2)
1066 if (t1->t_proto > t2->t_proto)
1068 if (t1->t_proto < t2->t_proto)
1071 if (t1->t_addrs > t2->t_addrs)
1073 if (t1->t_addrs < t2->t_addrs)
1076 if (t1->t_ports > t2->t_ports)
1078 if (t1->t_ports < t2->t_ports)
1081 if ((uintptr_t)t1->t_rule > (uintptr_t)t2->t_rule)
1083 if ((uintptr_t)t1->t_rule < (uintptr_t)t2->t_rule)
1089 static __inline struct ipfw_state *
1090 ipfw_state_link(struct ipfw_context *ctx, struct ipfw_state *s)
1092 struct ipfw_state *dup;
1094 KASSERT((s->st_flags & IPFW_STATE_F_LINKED) == 0,
1095 ("state %p was linked", s));
1096 dup = RB_INSERT(ipfw_state_tree, &ctx->ipfw_state_tree, s);
1098 TAILQ_INSERT_TAIL(&ctx->ipfw_state_list, s, st_link);
1099 s->st_flags |= IPFW_STATE_F_LINKED;
1104 static __inline void
1105 ipfw_state_unlink(struct ipfw_context *ctx, struct ipfw_state *s)
1108 KASSERT(s->st_flags & IPFW_STATE_F_LINKED,
1109 ("state %p was not linked", s));
1110 RB_REMOVE(ipfw_state_tree, &ctx->ipfw_state_tree, s);
1111 TAILQ_REMOVE(&ctx->ipfw_state_list, s, st_link);
1112 s->st_flags &= ~IPFW_STATE_F_LINKED;
1116 ipfw_state_max_set(int state_max)
1119 ipfw_state_max = state_max;
1120 /* Allow 5% states over-allocation. */
1121 ipfw_state_loosecnt_updthr = (state_max / 20) / netisr_ncpus;
1125 ipfw_state_cntcoll(void)
1127 int cpu, state_cnt = 0;
1129 for (cpu = 0; cpu < netisr_ncpus; ++cpu)
1130 state_cnt += ipfw_ctx[cpu]->ipfw_state_cnt;
1135 ipfw_state_cntsync(void)
1139 state_cnt = ipfw_state_cntcoll();
1140 ipfw_gd.ipfw_state_loosecnt = state_cnt;
1145 ipfw_free_rule(struct ip_fw *rule)
1147 KASSERT(rule->cpuid == mycpuid, ("rule freed on cpu%d", mycpuid));
1148 KASSERT(rule->refcnt > 0, ("invalid refcnt %u", rule->refcnt));
1150 if (rule->refcnt == 0) {
1151 if (rule->cross_rules != NULL)
1152 kfree(rule->cross_rules, M_IPFW);
1153 kfree(rule, M_IPFW);
1160 ipfw_unref_rule(void *priv)
1162 ipfw_free_rule(priv);
1164 KASSERT(ipfw_gd.ipfw_refcnt > 0,
1165 ("invalid ipfw_refcnt %d", ipfw_gd.ipfw_refcnt));
1166 atomic_subtract_int(&ipfw_gd.ipfw_refcnt, 1);
1170 static __inline void
1171 ipfw_ref_rule(struct ip_fw *rule)
1173 KASSERT(rule->cpuid == mycpuid, ("rule used on cpu%d", mycpuid));
1175 atomic_add_int(&ipfw_gd.ipfw_refcnt, 1);
1181 * This macro maps an ip pointer into a layer3 header pointer of type T
1183 #define L3HDR(T, ip) ((T *)((uint32_t *)(ip) + (ip)->ip_hl))
1186 icmptype_match(struct ip *ip, ipfw_insn_u32 *cmd)
1188 int type = L3HDR(struct icmp,ip)->icmp_type;
1189 int idx_max = F_LEN(&cmd->o) - F_INSN_SIZE(ipfw_insn);
1190 int idx = type / 32;
1194 return (cmd->d[idx] & (1 << (type % 32)));
1197 #define TT ((1 << ICMP_ECHO) | \
1198 (1 << ICMP_ROUTERSOLICIT) | \
1199 (1 << ICMP_TSTAMP) | \
1200 (1 << ICMP_IREQ) | \
1201 (1 << ICMP_MASKREQ))
1204 is_icmp_query(struct ip *ip)
1206 int type = L3HDR(struct icmp, ip)->icmp_type;
1208 return (type < 32 && (TT & (1 << type)));
1214 * The following checks use two arrays of 8 or 16 bits to store the
1215 * bits that we want set or clear, respectively. They are in the
1216 * low and high half of cmd->arg1 or cmd->d[0].
1218 * We scan options and store the bits we find set. We succeed if
1220 * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear
1222 * The code is sometimes optimized not to store additional variables.
1225 flags_match(ipfw_insn *cmd, uint8_t bits)
1230 if (((cmd->arg1 & 0xff) & bits) != 0)
1231 return 0; /* some bits we want set were clear */
1233 want_clear = (cmd->arg1 >> 8) & 0xff;
1234 if ((want_clear & bits) != want_clear)
1235 return 0; /* some bits we want clear were set */
1240 ipopts_match(struct ip *ip, ipfw_insn *cmd)
1242 int optlen, bits = 0;
1243 u_char *cp = (u_char *)(ip + 1);
1244 int x = (ip->ip_hl << 2) - sizeof(struct ip);
1246 for (; x > 0; x -= optlen, cp += optlen) {
1247 int opt = cp[IPOPT_OPTVAL];
1249 if (opt == IPOPT_EOL)
1252 if (opt == IPOPT_NOP) {
1255 optlen = cp[IPOPT_OLEN];
1256 if (optlen <= 0 || optlen > x)
1257 return 0; /* invalid or truncated */
1262 bits |= IP_FW_IPOPT_LSRR;
1266 bits |= IP_FW_IPOPT_SSRR;
1270 bits |= IP_FW_IPOPT_RR;
1274 bits |= IP_FW_IPOPT_TS;
1281 return (flags_match(cmd, bits));
1285 tcpopts_match(struct ip *ip, ipfw_insn *cmd)
1287 int optlen, bits = 0;
1288 struct tcphdr *tcp = L3HDR(struct tcphdr,ip);
1289 u_char *cp = (u_char *)(tcp + 1);
1290 int x = (tcp->th_off << 2) - sizeof(struct tcphdr);
1292 for (; x > 0; x -= optlen, cp += optlen) {
1295 if (opt == TCPOPT_EOL)
1298 if (opt == TCPOPT_NOP) {
1308 bits |= IP_FW_TCPOPT_MSS;
1312 bits |= IP_FW_TCPOPT_WINDOW;
1315 case TCPOPT_SACK_PERMITTED:
1317 bits |= IP_FW_TCPOPT_SACK;
1320 case TCPOPT_TIMESTAMP:
1321 bits |= IP_FW_TCPOPT_TS;
1327 bits |= IP_FW_TCPOPT_CC;
1334 return (flags_match(cmd, bits));
1338 iface_match(struct ifnet *ifp, ipfw_insn_if *cmd)
1340 if (ifp == NULL) /* no iface with this packet, match fails */
1343 /* Check by name or by IP address */
1344 if (cmd->name[0] != '\0') { /* match by name */
1347 if (kfnmatch(cmd->name, ifp->if_xname, 0) == 0)
1350 if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0)
1354 struct ifaddr_container *ifac;
1356 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1357 struct ifaddr *ia = ifac->ifa;
1359 if (ia->ifa_addr == NULL)
1361 if (ia->ifa_addr->sa_family != AF_INET)
1363 if (cmd->p.ip.s_addr == ((struct sockaddr_in *)
1364 (ia->ifa_addr))->sin_addr.s_addr)
1365 return(1); /* match */
1368 return(0); /* no match, fail ... */
1371 #define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0
1374 * We enter here when we have a rule with O_LOG.
1375 * XXX this function alone takes about 2Kbytes of code!
1378 ipfw_log(struct ipfw_context *ctx, struct ip_fw *f, u_int hlen,
1379 struct ether_header *eh, struct mbuf *m, struct ifnet *oif)
1382 int limit_reached = 0;
1383 char action2[40], proto[48], fragment[28], abuf[INET_ADDRSTRLEN];
1388 if (f == NULL) { /* bogus pkt */
1389 if (verbose_limit != 0 &&
1390 ctx->ipfw_norule_counter >= verbose_limit)
1392 ctx->ipfw_norule_counter++;
1393 if (ctx->ipfw_norule_counter == verbose_limit)
1394 limit_reached = verbose_limit;
1396 } else { /* O_LOG is the first action, find the real one */
1397 ipfw_insn *cmd = ACTION_PTR(f);
1398 ipfw_insn_log *l = (ipfw_insn_log *)cmd;
1400 if (l->max_log != 0 && l->log_left == 0)
1403 if (l->log_left == 0)
1404 limit_reached = l->max_log;
1405 cmd += F_LEN(cmd); /* point to first action */
1406 if (cmd->opcode == O_PROB)
1410 switch (cmd->opcode) {
1416 if (cmd->arg1==ICMP_REJECT_RST) {
1418 } else if (cmd->arg1==ICMP_UNREACH_HOST) {
1421 ksnprintf(SNPARGS(action2, 0), "Unreach %d",
1435 ksnprintf(SNPARGS(action2, 0), "Divert %d", cmd->arg1);
1439 ksnprintf(SNPARGS(action2, 0), "Tee %d", cmd->arg1);
1443 ksnprintf(SNPARGS(action2, 0), "SkipTo %d", cmd->arg1);
1447 ksnprintf(SNPARGS(action2, 0), "Pipe %d", cmd->arg1);
1451 ksnprintf(SNPARGS(action2, 0), "Queue %d", cmd->arg1);
1456 ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd;
1459 len = ksnprintf(SNPARGS(action2, 0),
1461 kinet_ntoa(sa->sa.sin_addr, abuf));
1462 if (sa->sa.sin_port) {
1463 ksnprintf(SNPARGS(action2, len), ":%d",
1475 if (hlen == 0) { /* non-ip */
1476 ksnprintf(SNPARGS(proto, 0), "MAC");
1478 struct ip *ip = mtod(m, struct ip *);
1479 /* these three are all aliases to the same thing */
1480 struct icmp *const icmp = L3HDR(struct icmp, ip);
1481 struct tcphdr *const tcp = (struct tcphdr *)icmp;
1482 struct udphdr *const udp = (struct udphdr *)icmp;
1484 int ip_off, offset, ip_len;
1487 if (eh != NULL) { /* layer 2 packets are as on the wire */
1488 ip_off = ntohs(ip->ip_off);
1489 ip_len = ntohs(ip->ip_len);
1491 ip_off = ip->ip_off;
1492 ip_len = ip->ip_len;
1494 offset = ip_off & IP_OFFMASK;
1497 len = ksnprintf(SNPARGS(proto, 0), "TCP %s",
1498 kinet_ntoa(ip->ip_src, abuf));
1500 ksnprintf(SNPARGS(proto, len), ":%d %s:%d",
1501 ntohs(tcp->th_sport),
1502 kinet_ntoa(ip->ip_dst, abuf),
1503 ntohs(tcp->th_dport));
1505 ksnprintf(SNPARGS(proto, len), " %s",
1506 kinet_ntoa(ip->ip_dst, abuf));
1511 len = ksnprintf(SNPARGS(proto, 0), "UDP %s",
1512 kinet_ntoa(ip->ip_src, abuf));
1514 ksnprintf(SNPARGS(proto, len), ":%d %s:%d",
1515 ntohs(udp->uh_sport),
1516 kinet_ntoa(ip->ip_dst, abuf),
1517 ntohs(udp->uh_dport));
1519 ksnprintf(SNPARGS(proto, len), " %s",
1520 kinet_ntoa(ip->ip_dst, abuf));
1526 len = ksnprintf(SNPARGS(proto, 0),
1531 len = ksnprintf(SNPARGS(proto, 0), "ICMP ");
1533 len += ksnprintf(SNPARGS(proto, len), "%s",
1534 kinet_ntoa(ip->ip_src, abuf));
1535 ksnprintf(SNPARGS(proto, len), " %s",
1536 kinet_ntoa(ip->ip_dst, abuf));
1540 len = ksnprintf(SNPARGS(proto, 0), "P:%d %s", ip->ip_p,
1541 kinet_ntoa(ip->ip_src, abuf));
1542 ksnprintf(SNPARGS(proto, len), " %s",
1543 kinet_ntoa(ip->ip_dst, abuf));
1547 if (ip_off & (IP_MF | IP_OFFMASK)) {
1548 ksnprintf(SNPARGS(fragment, 0), " (frag %d:%d@%d%s)",
1549 ntohs(ip->ip_id), ip_len - (ip->ip_hl << 2),
1550 offset << 3, (ip_off & IP_MF) ? "+" : "");
1554 if (oif || m->m_pkthdr.rcvif) {
1555 log(LOG_SECURITY | LOG_INFO,
1556 "ipfw: %d %s %s %s via %s%s\n",
1557 f ? f->rulenum : -1,
1558 action, proto, oif ? "out" : "in",
1559 oif ? oif->if_xname : m->m_pkthdr.rcvif->if_xname,
1562 log(LOG_SECURITY | LOG_INFO,
1563 "ipfw: %d %s %s [no if info]%s\n",
1564 f ? f->rulenum : -1,
1565 action, proto, fragment);
1568 if (limit_reached) {
1569 log(LOG_SECURITY | LOG_NOTICE,
1570 "ipfw: limit %d reached on entry %d\n",
1571 limit_reached, f ? f->rulenum : -1);
1578 ipfw_xlat_reap(struct ipfw_xlat *x, struct ipfw_xlat *slave_x)
1580 struct ip_fw *rule = slave_x->xlat_rule;
1582 KKASSERT(rule->cpuid == mycpuid);
1584 /* No more cross references; free this pair now. */
1586 kfree(slave_x, M_IPFW);
1588 /* See the comment in ipfw_ip_xlate_dispatch(). */
1593 ipfw_xlat_reap_dispatch(netmsg_t nm)
1595 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
1596 struct ipfw_state *s, *ns;
1598 ASSERT_NETISR_NCPUS(mycpuid);
1602 netisr_replymsg(&ctx->ipfw_xlatreap_nm, 0);
1605 /* TODO: limit scanning depth */
1606 TAILQ_FOREACH_MUTABLE(s, &ctx->ipfw_xlatreap, st_link, ns) {
1607 struct ipfw_xlat *x = (struct ipfw_xlat *)s;
1608 struct ipfw_xlat *slave_x = x->xlat_pair;
1611 crefs = slave_x->xlat_crefs + x->xlat_crefs;
1613 TAILQ_REMOVE(&ctx->ipfw_xlatreap, &x->xlat_st, st_link);
1614 ipfw_xlat_reap(x, slave_x);
1617 if (!TAILQ_EMPTY(&ctx->ipfw_xlatreap)) {
1618 callout_reset(&ctx->ipfw_xlatreap_ch, 2, ipfw_xlat_reap_timeo,
1619 &ctx->ipfw_xlatreap_nm);
1624 ipfw_xlat_reap_timeo(void *xnm)
1626 struct netmsg_base *nm = xnm;
1628 KKASSERT(mycpuid < netisr_ncpus);
1631 if (nm->lmsg.ms_flags & MSGF_DONE)
1632 netisr_sendmsg_oncpu(nm);
1637 ipfw_xlat_free_dispatch(netmsg_t nmsg)
1639 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
1640 struct ipfw_xlat *x = nmsg->lmsg.u.ms_resultp;
1641 struct ipfw_xlat *slave_x = x->xlat_pair;
1644 ASSERT_NETISR_NCPUS(mycpuid);
1646 KKASSERT(slave_x != NULL);
1647 KKASSERT(slave_x->xlat_invalid && x->xlat_invalid);
1649 KASSERT((x->xlat_flags & IPFW_STATE_F_LINKED) == 0,
1650 ("master xlat is still linked"));
1651 if (slave_x->xlat_flags & IPFW_STATE_F_LINKED)
1652 ipfw_state_unlink(ctx, &slave_x->xlat_st);
1654 /* See the comment in ipfw_ip_xlate_dispatch(). */
1655 slave_x->xlat_crefs--;
1657 crefs = slave_x->xlat_crefs + x->xlat_crefs;
1659 ipfw_xlat_reap(x, slave_x);
1663 if (TAILQ_EMPTY(&ctx->ipfw_xlatreap)) {
1664 callout_reset(&ctx->ipfw_xlatreap_ch, 2, ipfw_xlat_reap_timeo,
1665 &ctx->ipfw_xlatreap_nm);
1669 * This pair is still referenced; defer its destruction.
1670 * YYY reuse st_link.
1672 TAILQ_INSERT_TAIL(&ctx->ipfw_xlatreap, &x->xlat_st, st_link);
1675 static __inline void
1676 ipfw_xlat_invalidate(struct ipfw_xlat *x)
1679 x->xlat_invalid = 1;
1680 x->xlat_pair->xlat_invalid = 1;
1684 ipfw_state_del(struct ipfw_context *ctx, struct ipfw_state *s)
1686 struct ipfw_xlat *x, *slave_x;
1687 struct netmsg_base *nm;
1689 KASSERT(s->st_type == O_KEEP_STATE || s->st_type == O_LIMIT ||
1690 IPFW_ISXLAT(s->st_type), ("invalid state type %u", s->st_type));
1691 KASSERT((s->st_flags & IPFW_STATE_F_XLATSLAVE) == 0,
1692 ("delete slave xlat"));
1694 KASSERT(ctx->ipfw_state_cnt > 0,
1695 ("invalid state count %d", ctx->ipfw_state_cnt));
1696 ctx->ipfw_state_cnt--;
1697 if (ctx->ipfw_state_loosecnt > 0)
1698 ctx->ipfw_state_loosecnt--;
1701 * Unhook this state.
1703 if (s->st_track != NULL) {
1704 struct ipfw_track *t = s->st_track;
1706 KASSERT(!LIST_EMPTY(&t->t_state_list),
1707 ("track state list is empty"));
1708 LIST_REMOVE(s, st_trklink);
1710 KASSERT(*t->t_count > 0,
1711 ("invalid track count %d", *t->t_count));
1712 atomic_subtract_int(t->t_count, 1);
1714 ipfw_state_unlink(ctx, s);
1717 * Free this state. Xlat requires special processing,
1718 * since xlat are paired state and they could be on
1722 if (!IPFW_ISXLAT(s->st_type)) {
1723 /* Not xlat; free now. */
1728 x = (struct ipfw_xlat *)s;
1730 if (x->xlat_pair == NULL) {
1731 /* Not setup yet; free now. */
1736 slave_x = x->xlat_pair;
1737 KKASSERT(slave_x->xlat_flags & IPFW_STATE_F_XLATSLAVE);
1739 if (x->xlat_pcpu == mycpuid) {
1741 * Paired states are on the same cpu; delete this
1744 KKASSERT(x->xlat_crefs == 0);
1745 KKASSERT(slave_x->xlat_crefs == 0);
1746 if (slave_x->xlat_flags & IPFW_STATE_F_LINKED)
1747 ipfw_state_unlink(ctx, &slave_x->xlat_st);
1749 kfree(slave_x, M_IPFW);
1754 * Free the paired states on the cpu owning the slave xlat.
1758 * Mark the state pair invalid; completely deleting them
1759 * may take some time.
1761 ipfw_xlat_invalidate(x);
1763 nm = &x->xlat_freenm;
1764 netmsg_init(nm, NULL, &netisr_apanic_rport, MSGF_PRIORITY,
1765 ipfw_xlat_free_dispatch);
1766 nm->lmsg.u.ms_resultp = x;
1768 /* See the comment in ipfw_xlate_redispatch(). */
1769 x->xlat_rule->cross_refs++;
1772 netisr_sendmsg(nm, x->xlat_pcpu);
1776 ipfw_state_remove(struct ipfw_context *ctx, struct ipfw_state *s)
1779 if (s->st_flags & IPFW_STATE_F_XLATSLAVE) {
1780 KKASSERT(IPFW_ISXLAT(s->st_type));
1781 ipfw_xlat_invalidate((struct ipfw_xlat *)s);
1782 ipfw_state_unlink(ctx, s);
1785 ipfw_state_del(ctx, s);
1789 ipfw_state_reap(struct ipfw_context *ctx, int reap_max)
1791 struct ipfw_state *s, *anchor;
1794 if (reap_max < ipfw_state_reap_min)
1795 reap_max = ipfw_state_reap_min;
1797 if ((ctx->ipfw_flags & IPFW_FLAG_STATEEXP) == 0) {
1799 * Kick start state expiring. Ignore scan limit,
1800 * we are short of states.
1802 ctx->ipfw_flags |= IPFW_FLAG_STATEREAP;
1803 expired = ipfw_state_expire_start(ctx, INT_MAX, reap_max);
1804 ctx->ipfw_flags &= ~IPFW_FLAG_STATEREAP;
1809 * States are being expired.
1812 if (ctx->ipfw_state_cnt == 0)
1816 anchor = &ctx->ipfw_stateexp_anch;
1817 while ((s = TAILQ_NEXT(anchor, st_link)) != NULL) {
1819 * Ignore scan limit; we are short of states.
1822 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link);
1823 TAILQ_INSERT_AFTER(&ctx->ipfw_state_list, s, anchor, st_link);
1825 if (IPFW_STATE_SCANSKIP(s))
1828 if (IPFW_STATE_ISDEAD(s) || IPFW_STATE_TCPCLOSED(s)) {
1829 ipfw_state_del(ctx, s);
1830 if (++expired >= reap_max)
1832 if ((expired & 0xff) == 0 &&
1833 ipfw_state_cntcoll() + ipfw_state_headroom <=
1840 * Leave the anchor on the list, even if the end of the list has
1841 * been reached. ipfw_state_expire_more_dispatch() will handle
1848 ipfw_state_flush(struct ipfw_context *ctx, const struct ip_fw *rule)
1850 struct ipfw_state *s, *sn;
1852 TAILQ_FOREACH_MUTABLE(s, &ctx->ipfw_state_list, st_link, sn) {
1853 if (IPFW_STATE_SCANSKIP(s))
1855 if (rule != NULL && s->st_rule != rule)
1857 ipfw_state_del(ctx, s);
1862 ipfw_state_expire_done(struct ipfw_context *ctx)
1865 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP,
1866 ("stateexp is not in progress"));
1867 ctx->ipfw_flags &= ~IPFW_FLAG_STATEEXP;
1868 callout_reset(&ctx->ipfw_stateto_ch, hz,
1869 ipfw_state_expire_ipifunc, NULL);
1873 ipfw_state_expire_more(struct ipfw_context *ctx)
1875 struct netmsg_base *nm = &ctx->ipfw_stateexp_more;
1877 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP,
1878 ("stateexp is not in progress"));
1879 KASSERT(nm->lmsg.ms_flags & MSGF_DONE,
1880 ("stateexp more did not finish"));
1881 netisr_sendmsg_oncpu(nm);
1885 ipfw_state_expire_loop(struct ipfw_context *ctx, struct ipfw_state *anchor,
1886 int scan_max, int expire_max)
1888 struct ipfw_state *s;
1889 int scanned = 0, expired = 0;
1891 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP,
1892 ("stateexp is not in progress"));
1894 while ((s = TAILQ_NEXT(anchor, st_link)) != NULL) {
1895 if (scanned++ >= scan_max) {
1896 ipfw_state_expire_more(ctx);
1900 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link);
1901 TAILQ_INSERT_AFTER(&ctx->ipfw_state_list, s, anchor, st_link);
1903 if (IPFW_STATE_SCANSKIP(s))
1906 if (IPFW_STATE_ISDEAD(s) ||
1907 ((ctx->ipfw_flags & IPFW_FLAG_STATEREAP) &&
1908 IPFW_STATE_TCPCLOSED(s))) {
1909 ipfw_state_del(ctx, s);
1910 if (++expired >= expire_max) {
1911 ipfw_state_expire_more(ctx);
1914 if ((ctx->ipfw_flags & IPFW_FLAG_STATEREAP) &&
1915 (expired & 0xff) == 0 &&
1916 ipfw_state_cntcoll() + ipfw_state_headroom <=
1918 ipfw_state_expire_more(ctx);
1923 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link);
1924 ipfw_state_expire_done(ctx);
1929 ipfw_state_expire_more_dispatch(netmsg_t nm)
1931 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
1932 struct ipfw_state *anchor;
1934 ASSERT_NETISR_NCPUS(mycpuid);
1935 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP,
1936 ("statexp is not in progress"));
1939 netisr_replymsg(&nm->base, 0);
1941 anchor = &ctx->ipfw_stateexp_anch;
1942 if (ctx->ipfw_state_cnt == 0) {
1943 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link);
1944 ipfw_state_expire_done(ctx);
1947 ipfw_state_expire_loop(ctx, anchor,
1948 ipfw_state_scan_max, ipfw_state_expire_max);
1952 ipfw_state_expire_start(struct ipfw_context *ctx, int scan_max, int expire_max)
1954 struct ipfw_state *anchor;
1956 KASSERT((ctx->ipfw_flags & IPFW_FLAG_STATEEXP) == 0,
1957 ("stateexp is in progress"));
1958 ctx->ipfw_flags |= IPFW_FLAG_STATEEXP;
1960 if (ctx->ipfw_state_cnt == 0) {
1961 ipfw_state_expire_done(ctx);
1966 * Do not expire more than once per second, it is useless.
1968 if ((ctx->ipfw_flags & IPFW_FLAG_STATEREAP) == 0 &&
1969 ctx->ipfw_state_lastexp == time_uptime) {
1970 ipfw_state_expire_done(ctx);
1973 ctx->ipfw_state_lastexp = time_uptime;
1975 anchor = &ctx->ipfw_stateexp_anch;
1976 TAILQ_INSERT_HEAD(&ctx->ipfw_state_list, anchor, st_link);
1977 return (ipfw_state_expire_loop(ctx, anchor, scan_max, expire_max));
1981 ipfw_state_expire_dispatch(netmsg_t nm)
1983 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
1985 ASSERT_NETISR_NCPUS(mycpuid);
1989 netisr_replymsg(&nm->base, 0);
1992 if (ctx->ipfw_flags & IPFW_FLAG_STATEEXP) {
1993 /* Running; done. */
1996 ipfw_state_expire_start(ctx,
1997 ipfw_state_scan_max, ipfw_state_expire_max);
2001 ipfw_state_expire_ipifunc(void *dummy __unused)
2003 struct netmsg_base *msg;
2005 KKASSERT(mycpuid < netisr_ncpus);
2006 msg = &ipfw_ctx[mycpuid]->ipfw_stateexp_nm;
2009 if (msg->lmsg.ms_flags & MSGF_DONE)
2010 netisr_sendmsg_oncpu(msg);
2015 ipfw_state_update_tcp(struct ipfw_state *s, int dir, const struct tcphdr *tcp)
2017 uint32_t seq = ntohl(tcp->th_seq);
2018 uint32_t ack = ntohl(tcp->th_ack);
2020 if (tcp->th_flags & TH_RST)
2023 if (dir == MATCH_FORWARD) {
2024 if ((s->st_flags & IPFW_STATE_F_SEQFWD) == 0) {
2025 s->st_flags |= IPFW_STATE_F_SEQFWD;
2026 s->st_seq_fwd = seq;
2027 } else if (SEQ_GEQ(seq, s->st_seq_fwd)) {
2028 s->st_seq_fwd = seq;
2030 /* Out-of-sequence; done. */
2033 if (tcp->th_flags & TH_ACK) {
2034 if ((s->st_flags & IPFW_STATE_F_ACKFWD) == 0) {
2035 s->st_flags |= IPFW_STATE_F_ACKFWD;
2036 s->st_ack_fwd = ack;
2037 } else if (SEQ_GEQ(ack, s->st_ack_fwd)) {
2038 s->st_ack_fwd = ack;
2040 /* Out-of-sequence; done. */
2044 if ((s->st_state & ((TH_FIN | TH_ACK) << 8)) ==
2045 (TH_FIN << 8) && s->st_ack_fwd == s->st_seq_rev + 1)
2046 s->st_state |= (TH_ACK << 8);
2049 if ((s->st_flags & IPFW_STATE_F_SEQREV) == 0) {
2050 s->st_flags |= IPFW_STATE_F_SEQREV;
2051 s->st_seq_rev = seq;
2052 } else if (SEQ_GEQ(seq, s->st_seq_rev)) {
2053 s->st_seq_rev = seq;
2055 /* Out-of-sequence; done. */
2058 if (tcp->th_flags & TH_ACK) {
2059 if ((s->st_flags & IPFW_STATE_F_ACKREV) == 0) {
2060 s->st_flags |= IPFW_STATE_F_ACKREV;
2062 } else if (SEQ_GEQ(ack, s->st_ack_rev)) {
2063 s->st_ack_rev = ack;
2065 /* Out-of-sequence; done. */
2069 if ((s->st_state & (TH_FIN | TH_ACK)) == TH_FIN &&
2070 s->st_ack_rev == s->st_seq_fwd + 1)
2071 s->st_state |= TH_ACK;
2078 ipfw_state_update(const struct ipfw_flow_id *pkt, int dir,
2079 const struct tcphdr *tcp, struct ipfw_state *s)
2082 if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */
2083 u_char flags = pkt->flags & IPFW_STATE_TCPFLAGS;
2085 if (tcp != NULL && !ipfw_state_update_tcp(s, dir, tcp))
2088 s->st_state |= (dir == MATCH_FORWARD) ? flags : (flags << 8);
2089 switch (s->st_state & IPFW_STATE_TCPSTATES) {
2090 case TH_SYN: /* opening */
2091 s->st_expire = time_uptime + dyn_syn_lifetime;
2094 case BOTH_SYN: /* move to established */
2095 case BOTH_SYN | TH_FIN: /* one side tries to close */
2096 case BOTH_SYN | (TH_FIN << 8):
2097 s->st_expire = time_uptime + dyn_ack_lifetime;
2100 case BOTH_SYN | BOTH_FIN: /* both sides closed */
2101 if ((s->st_state & BOTH_FINACK) == BOTH_FINACK) {
2102 /* And both FINs were ACKed. */
2103 s->st_expire = time_uptime + dyn_fin_lifetime;
2105 s->st_expire = time_uptime +
2106 dyn_finwait_lifetime;
2113 * reset or some invalid combination, but can also
2114 * occur if we use keep-state the wrong way.
2116 if ((s->st_state & ((TH_RST << 8) | TH_RST)) == 0)
2117 kprintf("invalid state: 0x%x\n", s->st_state);
2119 s->st_expire = time_uptime + dyn_rst_lifetime;
2122 } else if (pkt->proto == IPPROTO_UDP) {
2123 s->st_expire = time_uptime + dyn_udp_lifetime;
2125 /* other protocols */
2126 s->st_expire = time_uptime + dyn_short_lifetime;
2133 static struct ipfw_state *
2134 ipfw_state_lookup(struct ipfw_context *ctx, const struct ipfw_flow_id *pkt,
2135 int *match_direction, const struct tcphdr *tcp)
2137 struct ipfw_state *key, *s;
2138 int dir = MATCH_NONE;
2140 key = &ctx->ipfw_state_tmpkey;
2141 ipfw_key_build(&key->st_key, pkt->src_ip, pkt->src_port,
2142 pkt->dst_ip, pkt->dst_port, pkt->proto);
2143 s = RB_FIND(ipfw_state_tree, &ctx->ipfw_state_tree, key);
2145 goto done; /* not found. */
2146 if (IPFW_STATE_ISDEAD(s)) {
2147 ipfw_state_remove(ctx, s);
2151 if ((pkt->flags & TH_SYN) && IPFW_STATE_TCPCLOSED(s)) {
2152 /* TCP ports recycling is too fast. */
2153 ctx->ipfw_sts_tcprecycled++;
2154 ipfw_state_remove(ctx, s);
2159 if (s->st_swap == key->st_swap) {
2160 dir = MATCH_FORWARD;
2162 KASSERT((s->st_swap & key->st_swap) == 0,
2163 ("found mismatch state"));
2164 dir = MATCH_REVERSE;
2167 /* Update this state. */
2168 ipfw_state_update(pkt, dir, tcp, s);
2170 if (s->st_track != NULL) {
2171 /* This track has been used. */
2172 s->st_track->t_expire = time_uptime + dyn_short_lifetime;
2175 if (match_direction)
2176 *match_direction = dir;
2180 static struct ipfw_state *
2181 ipfw_state_alloc(struct ipfw_context *ctx, const struct ipfw_flow_id *id,
2182 uint16_t type, struct ip_fw *rule, const struct tcphdr *tcp)
2184 struct ipfw_state *s;
2187 KASSERT(type == O_KEEP_STATE || type == O_LIMIT || IPFW_ISXLAT(type),
2188 ("invalid state type %u", type));
2190 sz = sizeof(struct ipfw_state);
2191 if (IPFW_ISXLAT(type))
2192 sz = sizeof(struct ipfw_xlat);
2194 s = kmalloc(sz, M_IPFW, M_INTWAIT | M_NULLOK | M_ZERO);
2196 ctx->ipfw_sts_nomem++;
2200 ipfw_key_build(&s->st_key, id->src_ip, id->src_port,
2201 id->dst_ip, id->dst_port, id->proto);
2205 if (IPFW_ISXLAT(type)) {
2206 struct ipfw_xlat *x = (struct ipfw_xlat *)s;
2208 x->xlat_dir = MATCH_NONE;
2213 * Update this state:
2214 * Set st_expire and st_state.
2216 ipfw_state_update(id, MATCH_FORWARD, tcp, s);
2221 static struct ipfw_state *
2222 ipfw_state_add(struct ipfw_context *ctx, const struct ipfw_flow_id *id,
2223 uint16_t type, struct ip_fw *rule, struct ipfw_track *t,
2224 const struct tcphdr *tcp)
2226 struct ipfw_state *s, *dup;
2228 s = ipfw_state_alloc(ctx, id, type, rule, tcp);
2232 ctx->ipfw_state_cnt++;
2233 ctx->ipfw_state_loosecnt++;
2234 if (ctx->ipfw_state_loosecnt >= ipfw_state_loosecnt_updthr) {
2235 ipfw_gd.ipfw_state_loosecnt += ctx->ipfw_state_loosecnt;
2236 ctx->ipfw_state_loosecnt = 0;
2239 dup = ipfw_state_link(ctx, s);
2241 panic("ipfw: %u state exists %p", type, dup);
2244 /* Keep the track referenced. */
2245 LIST_INSERT_HEAD(&t->t_state_list, s, st_trklink);
2252 ipfw_track_free(struct ipfw_context *ctx, struct ipfw_track *t)
2254 struct ipfw_trkcnt *trk;
2255 boolean_t trk_freed = FALSE;
2257 KASSERT(t->t_count != NULL, ("track anchor"));
2258 KASSERT(LIST_EMPTY(&t->t_state_list),
2259 ("invalid track is still referenced"));
2262 KASSERT(trk != NULL, ("track has no trkcnt"));
2264 RB_REMOVE(ipfw_track_tree, &ctx->ipfw_track_tree, t);
2265 TAILQ_REMOVE(&ctx->ipfw_track_list, t, t_link);
2269 * fdrop() style reference counting.
2270 * See kern/kern_descrip.c fdrop().
2273 int refs = trk->tc_refs;
2276 KASSERT(refs > 0, ("invalid trkcnt refs %d", refs));
2279 if (atomic_cmpset_int(&trk->tc_refs, refs, 0)) {
2280 KASSERT(trk->tc_count == 0,
2281 ("%d states reference this trkcnt",
2283 RB_REMOVE(ipfw_trkcnt_tree,
2284 &ipfw_gd.ipfw_trkcnt_tree, trk);
2286 KASSERT(ipfw_gd.ipfw_trkcnt_cnt > 0,
2287 ("invalid trkcnt cnt %d",
2288 ipfw_gd.ipfw_trkcnt_cnt));
2289 ipfw_gd.ipfw_trkcnt_cnt--;
2292 if (ctx->ipfw_trkcnt_spare == NULL)
2293 ctx->ipfw_trkcnt_spare = trk;
2301 } else if (atomic_cmpset_int(&trk->tc_refs, refs, refs - 1)) {
2310 ipfw_track_flush(struct ipfw_context *ctx, struct ip_fw *rule)
2312 struct ipfw_track *t, *tn;
2314 TAILQ_FOREACH_MUTABLE(t, &ctx->ipfw_track_list, t_link, tn) {
2315 if (t->t_count == NULL) /* anchor */
2317 if (rule != NULL && t->t_rule != rule)
2319 ipfw_track_free(ctx, t);
2324 ipfw_track_state_expire(struct ipfw_context *ctx, struct ipfw_track *t,
2327 struct ipfw_state *s, *sn;
2328 boolean_t ret = FALSE;
2330 KASSERT(t->t_count != NULL, ("track anchor"));
2332 if (LIST_EMPTY(&t->t_state_list))
2336 * Do not expire more than once per second, it is useless.
2338 if (t->t_lastexp == time_uptime)
2340 t->t_lastexp = time_uptime;
2342 LIST_FOREACH_MUTABLE(s, &t->t_state_list, st_trklink, sn) {
2343 if (IPFW_STATE_ISDEAD(s) || (reap && IPFW_STATE_TCPCLOSED(s))) {
2344 KASSERT(s->st_track == t,
2345 ("state track %p does not match %p",
2347 ipfw_state_del(ctx, s);
2354 static __inline struct ipfw_trkcnt *
2355 ipfw_trkcnt_alloc(struct ipfw_context *ctx)
2357 struct ipfw_trkcnt *trk;
2359 if (ctx->ipfw_trkcnt_spare != NULL) {
2360 trk = ctx->ipfw_trkcnt_spare;
2361 ctx->ipfw_trkcnt_spare = NULL;
2363 trk = kmalloc_cachealign(sizeof(*trk), M_IPFW,
2364 M_INTWAIT | M_NULLOK);
2370 ipfw_track_expire_done(struct ipfw_context *ctx)
2373 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP,
2374 ("trackexp is not in progress"));
2375 ctx->ipfw_flags &= ~IPFW_FLAG_TRACKEXP;
2376 callout_reset(&ctx->ipfw_trackto_ch, hz,
2377 ipfw_track_expire_ipifunc, NULL);
2381 ipfw_track_expire_more(struct ipfw_context *ctx)
2383 struct netmsg_base *nm = &ctx->ipfw_trackexp_more;
2385 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP,
2386 ("trackexp is not in progress"));
2387 KASSERT(nm->lmsg.ms_flags & MSGF_DONE,
2388 ("trackexp more did not finish"));
2389 netisr_sendmsg_oncpu(nm);
2393 ipfw_track_expire_loop(struct ipfw_context *ctx, struct ipfw_track *anchor,
2394 int scan_max, int expire_max)
2396 struct ipfw_track *t;
2397 int scanned = 0, expired = 0;
2398 boolean_t reap = FALSE;
2400 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP,
2401 ("trackexp is not in progress"));
2403 if (ctx->ipfw_flags & IPFW_FLAG_TRACKREAP)
2406 while ((t = TAILQ_NEXT(anchor, t_link)) != NULL) {
2407 if (scanned++ >= scan_max) {
2408 ipfw_track_expire_more(ctx);
2412 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link);
2413 TAILQ_INSERT_AFTER(&ctx->ipfw_track_list, t, anchor, t_link);
2415 if (t->t_count == NULL) /* anchor */
2418 ipfw_track_state_expire(ctx, t, reap);
2419 if (!LIST_EMPTY(&t->t_state_list)) {
2420 /* There are states referencing this track. */
2424 if (TIME_LEQ(t->t_expire, time_uptime) || reap) {
2426 if (ipfw_track_free(ctx, t)) {
2427 if (++expired >= expire_max) {
2428 ipfw_track_expire_more(ctx);
2434 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link);
2435 ipfw_track_expire_done(ctx);
2440 ipfw_track_expire_start(struct ipfw_context *ctx, int scan_max, int expire_max)
2442 struct ipfw_track *anchor;
2444 KASSERT((ctx->ipfw_flags & IPFW_FLAG_TRACKEXP) == 0,
2445 ("trackexp is in progress"));
2446 ctx->ipfw_flags |= IPFW_FLAG_TRACKEXP;
2448 if (RB_EMPTY(&ctx->ipfw_track_tree)) {
2449 ipfw_track_expire_done(ctx);
2454 * Do not expire more than once per second, it is useless.
2456 if ((ctx->ipfw_flags & IPFW_FLAG_TRACKREAP) == 0 &&
2457 ctx->ipfw_track_lastexp == time_uptime) {
2458 ipfw_track_expire_done(ctx);
2461 ctx->ipfw_track_lastexp = time_uptime;
2463 anchor = &ctx->ipfw_trackexp_anch;
2464 TAILQ_INSERT_HEAD(&ctx->ipfw_track_list, anchor, t_link);
2465 return (ipfw_track_expire_loop(ctx, anchor, scan_max, expire_max));
2469 ipfw_track_expire_more_dispatch(netmsg_t nm)
2471 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
2472 struct ipfw_track *anchor;
2474 ASSERT_NETISR_NCPUS(mycpuid);
2475 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP,
2476 ("trackexp is not in progress"));
2479 netisr_replymsg(&nm->base, 0);
2481 anchor = &ctx->ipfw_trackexp_anch;
2482 if (RB_EMPTY(&ctx->ipfw_track_tree)) {
2483 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link);
2484 ipfw_track_expire_done(ctx);
2487 ipfw_track_expire_loop(ctx, anchor,
2488 ipfw_track_scan_max, ipfw_track_expire_max);
2492 ipfw_track_expire_dispatch(netmsg_t nm)
2494 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
2496 ASSERT_NETISR_NCPUS(mycpuid);
2500 netisr_replymsg(&nm->base, 0);
2503 if (ctx->ipfw_flags & IPFW_FLAG_TRACKEXP) {
2504 /* Running; done. */
2507 ipfw_track_expire_start(ctx,
2508 ipfw_track_scan_max, ipfw_track_expire_max);
2512 ipfw_track_expire_ipifunc(void *dummy __unused)
2514 struct netmsg_base *msg;
2516 KKASSERT(mycpuid < netisr_ncpus);
2517 msg = &ipfw_ctx[mycpuid]->ipfw_trackexp_nm;
2520 if (msg->lmsg.ms_flags & MSGF_DONE)
2521 netisr_sendmsg_oncpu(msg);
2526 ipfw_track_reap(struct ipfw_context *ctx)
2528 struct ipfw_track *t, *anchor;
2531 if ((ctx->ipfw_flags & IPFW_FLAG_TRACKEXP) == 0) {
2533 * Kick start track expiring. Ignore scan limit,
2534 * we are short of tracks.
2536 ctx->ipfw_flags |= IPFW_FLAG_TRACKREAP;
2537 expired = ipfw_track_expire_start(ctx, INT_MAX,
2538 ipfw_track_reap_max);
2539 ctx->ipfw_flags &= ~IPFW_FLAG_TRACKREAP;
2544 * Tracks are being expired.
2547 if (RB_EMPTY(&ctx->ipfw_track_tree))
2551 anchor = &ctx->ipfw_trackexp_anch;
2552 while ((t = TAILQ_NEXT(anchor, t_link)) != NULL) {
2554 * Ignore scan limit; we are short of tracks.
2557 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link);
2558 TAILQ_INSERT_AFTER(&ctx->ipfw_track_list, t, anchor, t_link);
2560 if (t->t_count == NULL) /* anchor */
2563 ipfw_track_state_expire(ctx, t, TRUE);
2564 if (!LIST_EMPTY(&t->t_state_list)) {
2565 /* There are states referencing this track. */
2569 if (ipfw_track_free(ctx, t)) {
2570 if (++expired >= ipfw_track_reap_max) {
2571 ipfw_track_expire_more(ctx);
2578 * Leave the anchor on the list, even if the end of the list has
2579 * been reached. ipfw_track_expire_more_dispatch() will handle
2585 static struct ipfw_track *
2586 ipfw_track_alloc(struct ipfw_context *ctx, const struct ipfw_flow_id *id,
2587 uint16_t limit_mask, struct ip_fw *rule)
2589 struct ipfw_track *key, *t, *dup;
2590 struct ipfw_trkcnt *trk, *ret;
2591 boolean_t do_expire = FALSE;
2593 KASSERT(rule->track_ruleid != 0,
2594 ("rule %u has no track ruleid", rule->rulenum));
2596 key = &ctx->ipfw_track_tmpkey;
2597 key->t_proto = id->proto;
2601 if (limit_mask & DYN_SRC_ADDR)
2602 key->t_saddr = id->src_ip;
2603 if (limit_mask & DYN_DST_ADDR)
2604 key->t_daddr = id->dst_ip;
2605 if (limit_mask & DYN_SRC_PORT)
2606 key->t_sport = id->src_port;
2607 if (limit_mask & DYN_DST_PORT)
2608 key->t_dport = id->dst_port;
2610 t = RB_FIND(ipfw_track_tree, &ctx->ipfw_track_tree, key);
2614 t = kmalloc(sizeof(*t), M_IPFW, M_INTWAIT | M_NULLOK);
2616 ctx->ipfw_tks_nomem++;
2620 t->t_key = key->t_key;
2623 LIST_INIT(&t->t_state_list);
2625 if (ipfw_gd.ipfw_trkcnt_cnt >= ipfw_track_max) {
2626 time_t globexp, uptime;
2632 * Do not expire globally more than once per second,
2635 uptime = time_uptime;
2636 globexp = ipfw_gd.ipfw_track_globexp;
2637 if (globexp != uptime &&
2638 atomic_cmpset_long(&ipfw_gd.ipfw_track_globexp,
2642 /* Expire tracks on other CPUs. */
2643 for (cpu = 0; cpu < netisr_ncpus; ++cpu) {
2646 lwkt_send_ipiq(globaldata_find(cpu),
2647 ipfw_track_expire_ipifunc, NULL);
2651 trk = ipfw_trkcnt_alloc(ctx);
2654 struct ipfw_trkcnt *tkey;
2656 tkey = &ctx->ipfw_trkcnt_tmpkey;
2657 key = NULL; /* tkey overlaps key */
2659 tkey->tc_key = t->t_key;
2660 tkey->tc_ruleid = rule->track_ruleid;
2663 trk = RB_FIND(ipfw_trkcnt_tree, &ipfw_gd.ipfw_trkcnt_tree,
2668 ctx->ipfw_tks_reap++;
2669 if (ipfw_track_reap(ctx) > 0) {
2670 if (ipfw_gd.ipfw_trkcnt_cnt <
2672 trk = ipfw_trkcnt_alloc(ctx);
2675 ctx->ipfw_tks_cntnomem++;
2677 ctx->ipfw_tks_overflow++;
2680 ctx->ipfw_tks_reapfailed++;
2681 ctx->ipfw_tks_overflow++;
2684 ctx->ipfw_tks_cntnomem++;
2689 KASSERT(trk->tc_refs > 0 && trk->tc_refs < netisr_ncpus,
2690 ("invalid trkcnt refs %d", trk->tc_refs));
2691 atomic_add_int(&trk->tc_refs, 1);
2695 trk->tc_key = t->t_key;
2696 trk->tc_ruleid = rule->track_ruleid;
2700 trk->tc_rulenum = rule->rulenum;
2703 ret = RB_INSERT(ipfw_trkcnt_tree, &ipfw_gd.ipfw_trkcnt_tree,
2706 KASSERT(ret->tc_refs > 0 &&
2707 ret->tc_refs < netisr_ncpus,
2708 ("invalid trkcnt refs %d", ret->tc_refs));
2709 KASSERT(ctx->ipfw_trkcnt_spare == NULL,
2710 ("trkcnt spare was installed"));
2711 ctx->ipfw_trkcnt_spare = trk;
2714 ipfw_gd.ipfw_trkcnt_cnt++;
2716 atomic_add_int(&trk->tc_refs, 1);
2719 t->t_count = &trk->tc_count;
2722 dup = RB_INSERT(ipfw_track_tree, &ctx->ipfw_track_tree, t);
2724 panic("ipfw: track exists");
2725 TAILQ_INSERT_TAIL(&ctx->ipfw_track_list, t, t_link);
2727 t->t_expire = time_uptime + dyn_short_lifetime;
2732 * Install state for rule type cmd->o.opcode
2734 * Returns NULL if state is not installed because of errors or because
2735 * states limitations are enforced.
2737 static struct ipfw_state *
2738 ipfw_state_install(struct ipfw_context *ctx, struct ip_fw *rule,
2739 ipfw_insn_limit *cmd, struct ip_fw_args *args, const struct tcphdr *tcp)
2741 struct ipfw_state *s;
2742 struct ipfw_track *t;
2745 if (ipfw_gd.ipfw_state_loosecnt >= ipfw_state_max &&
2746 (diff = (ipfw_state_cntsync() - ipfw_state_max)) >= 0) {
2747 boolean_t overflow = TRUE;
2749 ctx->ipfw_sts_reap++;
2750 if (ipfw_state_reap(ctx, diff) == 0)
2751 ctx->ipfw_sts_reapfailed++;
2752 if (ipfw_state_cntsync() < ipfw_state_max)
2756 time_t globexp, uptime;
2760 * Do not expire globally more than once per second,
2763 uptime = time_uptime;
2764 globexp = ipfw_gd.ipfw_state_globexp;
2765 if (globexp == uptime ||
2766 !atomic_cmpset_long(&ipfw_gd.ipfw_state_globexp,
2768 ctx->ipfw_sts_overflow++;
2772 /* Expire states on other CPUs. */
2773 for (cpu = 0; cpu < netisr_ncpus; ++cpu) {
2776 lwkt_send_ipiq(globaldata_find(cpu),
2777 ipfw_state_expire_ipifunc, NULL);
2779 ctx->ipfw_sts_overflow++;
2784 switch (cmd->o.opcode) {
2785 case O_KEEP_STATE: /* bidir rule */
2787 s = ipfw_state_add(ctx, &args->f_id, cmd->o.opcode, rule, NULL,
2793 case O_LIMIT: /* limit number of sessions */
2794 t = ipfw_track_alloc(ctx, &args->f_id, cmd->limit_mask, rule);
2798 if (*t->t_count >= cmd->conn_limit) {
2799 if (!ipfw_track_state_expire(ctx, t, TRUE))
2803 count = *t->t_count;
2804 if (count >= cmd->conn_limit)
2806 if (atomic_cmpset_int(t->t_count, count, count + 1))
2810 s = ipfw_state_add(ctx, &args->f_id, O_LIMIT, rule, t, tcp);
2813 atomic_subtract_int(t->t_count, 1);
2819 panic("unknown state type %u\n", cmd->o.opcode);
2822 if (s->st_type == O_REDIRECT) {
2823 struct ipfw_xlat *x = (struct ipfw_xlat *)s;
2824 ipfw_insn_rdr *r = (ipfw_insn_rdr *)cmd;
2826 x->xlat_addr = r->addr.s_addr;
2827 x->xlat_port = r->port;
2828 x->xlat_ifp = args->m->m_pkthdr.rcvif;
2829 x->xlat_dir = MATCH_FORWARD;
2830 KKASSERT(x->xlat_ifp != NULL);
2836 ipfw_table_lookup(struct ipfw_context *ctx, uint16_t tableid,
2837 const struct in_addr *in)
2839 struct radix_node_head *rnh;
2840 struct sockaddr_in sin;
2841 struct ipfw_tblent *te;
2843 KASSERT(tableid < ipfw_table_max, ("invalid tableid %u", tableid));
2844 rnh = ctx->ipfw_tables[tableid];
2846 return (0); /* no match */
2848 memset(&sin, 0, sizeof(sin));
2849 sin.sin_family = AF_INET;
2850 sin.sin_len = sizeof(sin);
2853 te = (struct ipfw_tblent *)rnh->rnh_matchaddr((char *)&sin, rnh);
2855 return (0); /* no match */
2858 te->te_lastuse = time_second;
2859 return (1); /* match */
2863 * Transmit a TCP packet, containing either a RST or a keepalive.
2864 * When flags & TH_RST, we are sending a RST packet, because of a
2865 * "reset" action matched the packet.
2866 * Otherwise we are sending a keepalive, and flags & TH_
2868 * Only {src,dst}_{ip,port} of "id" are used.
2871 send_pkt(const struct ipfw_flow_id *id, uint32_t seq, uint32_t ack, int flags)
2876 struct route sro; /* fake route */
2878 MGETHDR(m, M_NOWAIT, MT_HEADER);
2881 m->m_pkthdr.rcvif = NULL;
2882 m->m_pkthdr.len = m->m_len = sizeof(struct ip) + sizeof(struct tcphdr);
2883 m->m_data += max_linkhdr;
2885 ip = mtod(m, struct ip *);
2886 bzero(ip, m->m_len);
2887 tcp = (struct tcphdr *)(ip + 1); /* no IP options */
2888 ip->ip_p = IPPROTO_TCP;
2892 * Assume we are sending a RST (or a keepalive in the reverse
2893 * direction), swap src and destination addresses and ports.
2895 ip->ip_src.s_addr = htonl(id->dst_ip);
2896 ip->ip_dst.s_addr = htonl(id->src_ip);
2897 tcp->th_sport = htons(id->dst_port);
2898 tcp->th_dport = htons(id->src_port);
2899 if (flags & TH_RST) { /* we are sending a RST */
2900 if (flags & TH_ACK) {
2901 tcp->th_seq = htonl(ack);
2902 tcp->th_ack = htonl(0);
2903 tcp->th_flags = TH_RST;
2907 tcp->th_seq = htonl(0);
2908 tcp->th_ack = htonl(seq);
2909 tcp->th_flags = TH_RST | TH_ACK;
2913 * We are sending a keepalive. flags & TH_SYN determines
2914 * the direction, forward if set, reverse if clear.
2915 * NOTE: seq and ack are always assumed to be correct
2916 * as set by the caller. This may be confusing...
2918 if (flags & TH_SYN) {
2920 * we have to rewrite the correct addresses!
2922 ip->ip_dst.s_addr = htonl(id->dst_ip);
2923 ip->ip_src.s_addr = htonl(id->src_ip);
2924 tcp->th_dport = htons(id->dst_port);
2925 tcp->th_sport = htons(id->src_port);
2927 tcp->th_seq = htonl(seq);
2928 tcp->th_ack = htonl(ack);
2929 tcp->th_flags = TH_ACK;
2933 * set ip_len to the payload size so we can compute
2934 * the tcp checksum on the pseudoheader
2935 * XXX check this, could save a couple of words ?
2937 ip->ip_len = htons(sizeof(struct tcphdr));
2938 tcp->th_sum = in_cksum(m, m->m_pkthdr.len);
2941 * now fill fields left out earlier
2943 ip->ip_ttl = ip_defttl;
2944 ip->ip_len = m->m_pkthdr.len;
2946 bzero(&sro, sizeof(sro));
2947 ip_rtaddr(ip->ip_dst, &sro);
2949 m->m_pkthdr.fw_flags |= IPFW_MBUF_GENERATED;
2950 ip_output(m, NULL, &sro, 0, NULL, NULL);
2956 * Send a reject message, consuming the mbuf passed as an argument.
2959 send_reject(struct ip_fw_args *args, int code, int offset, int ip_len)
2961 if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */
2962 /* We need the IP header in host order for icmp_error(). */
2963 if (args->eh != NULL) {
2964 struct ip *ip = mtod(args->m, struct ip *);
2966 ip->ip_len = ntohs(ip->ip_len);
2967 ip->ip_off = ntohs(ip->ip_off);
2969 icmp_error(args->m, ICMP_UNREACH, code, 0L, 0);
2970 } else if (offset == 0 && args->f_id.proto == IPPROTO_TCP) {
2971 struct tcphdr *const tcp =
2972 L3HDR(struct tcphdr, mtod(args->m, struct ip *));
2974 if ((tcp->th_flags & TH_RST) == 0) {
2975 send_pkt(&args->f_id, ntohl(tcp->th_seq),
2976 ntohl(tcp->th_ack), tcp->th_flags | TH_RST);
2986 * Given an ip_fw *, lookup_next_rule will return a pointer
2987 * to the next rule, which can be either the jump
2988 * target (for skipto instructions) or the next one in the list (in
2989 * all other cases including a missing jump target).
2990 * The result is also written in the "next_rule" field of the rule.
2991 * Backward jumps are not allowed, so start looking from the next
2994 * This never returns NULL -- in case we do not have an exact match,
2995 * the next rule is returned. When the ruleset is changed,
2996 * pointers are flushed so we are always correct.
2998 static struct ip_fw *
2999 lookup_next_rule(struct ip_fw *me)
3001 struct ip_fw *rule = NULL;
3004 /* look for action, in case it is a skipto */
3005 cmd = ACTION_PTR(me);
3006 if (cmd->opcode == O_LOG)
3008 if (cmd->opcode == O_SKIPTO) {
3009 for (rule = me->next; rule; rule = rule->next) {
3010 if (rule->rulenum >= cmd->arg1)
3014 if (rule == NULL) /* failure or not a skipto */
3016 me->next_rule = rule;
3021 ipfw_match_uid(const struct ipfw_flow_id *fid, struct ifnet *oif,
3022 enum ipfw_opcodes opcode, uid_t uid)
3024 struct in_addr src_ip, dst_ip;
3025 struct inpcbinfo *pi;
3029 if (fid->proto == IPPROTO_TCP) {
3031 pi = &tcbinfo[mycpuid];
3032 } else if (fid->proto == IPPROTO_UDP) {
3034 pi = &udbinfo[mycpuid];
3040 * Values in 'fid' are in host byte order
3042 dst_ip.s_addr = htonl(fid->dst_ip);
3043 src_ip.s_addr = htonl(fid->src_ip);
3045 pcb = in_pcblookup_hash(pi,
3046 dst_ip, htons(fid->dst_port),
3047 src_ip, htons(fid->src_port),
3050 pcb = in_pcblookup_hash(pi,
3051 src_ip, htons(fid->src_port),
3052 dst_ip, htons(fid->dst_port),
3055 if (pcb == NULL || pcb->inp_socket == NULL)
3058 if (opcode == O_UID) {
3059 #define socheckuid(a,b) ((a)->so_cred->cr_uid != (b))
3060 return !socheckuid(pcb->inp_socket, uid);
3063 return groupmember(uid, pcb->inp_socket->so_cred);
3068 ipfw_match_ifip(ipfw_insn_ifip *cmd, const struct in_addr *ip)
3071 if (__predict_false((cmd->o.arg1 & IPFW_IFIP_VALID) == 0)) {
3072 struct ifaddr_container *ifac;
3075 ifp = ifunit_netisr(cmd->ifname);
3079 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
3080 struct ifaddr *ia = ifac->ifa;
3082 if (ia->ifa_addr == NULL)
3084 if (ia->ifa_addr->sa_family != AF_INET)
3087 cmd->mask.s_addr = INADDR_ANY;
3088 if (cmd->o.arg1 & IPFW_IFIP_NET) {
3089 cmd->mask = ((struct sockaddr_in *)
3090 ia->ifa_netmask)->sin_addr;
3092 if (cmd->mask.s_addr == INADDR_ANY)
3093 cmd->mask.s_addr = INADDR_BROADCAST;
3096 ((struct sockaddr_in *)ia->ifa_addr)->sin_addr;
3097 cmd->addr.s_addr &= cmd->mask.s_addr;
3099 cmd->o.arg1 |= IPFW_IFIP_VALID;
3102 if ((cmd->o.arg1 & IPFW_IFIP_VALID) == 0)
3105 return ((ip->s_addr & cmd->mask.s_addr) == cmd->addr.s_addr);
3109 ipfw_xlate(const struct ipfw_xlat *x, struct mbuf *m,
3110 struct in_addr *old_addr, uint16_t *old_port)
3112 struct ip *ip = mtod(m, struct ip *);
3113 struct in_addr *addr;
3114 uint16_t *port, *csum, dlen = 0;
3116 boolean_t pseudo = FALSE;
3118 if (x->xlat_flags & IPFW_STATE_F_XLATSRC) {
3122 port = &L3HDR(struct tcphdr, ip)->th_sport;
3123 csum = &L3HDR(struct tcphdr, ip)->th_sum;
3126 port = &L3HDR(struct udphdr, ip)->uh_sport;
3127 csum = &L3HDR(struct udphdr, ip)->uh_sum;
3131 panic("ipfw: unsupported src xlate proto %u", ip->ip_p);
3137 port = &L3HDR(struct tcphdr, ip)->th_dport;
3138 csum = &L3HDR(struct tcphdr, ip)->th_sum;
3141 port = &L3HDR(struct udphdr, ip)->uh_dport;
3142 csum = &L3HDR(struct udphdr, ip)->uh_sum;
3146 panic("ipfw: unsupported dst xlate proto %u", ip->ip_p);
3149 if (old_addr != NULL)
3151 if (old_port != NULL) {
3152 if (x->xlat_port != 0)
3158 if (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_TCP | CSUM_TSO)) {
3159 if ((m->m_pkthdr.csum_flags & CSUM_TSO) == 0)
3160 dlen = ip->ip_len - (ip->ip_hl << 2);
3165 const uint16_t *oaddr, *naddr;
3167 oaddr = (const uint16_t *)&addr->s_addr;
3168 naddr = (const uint16_t *)&x->xlat_addr;
3170 ip->ip_sum = pfil_cksum_fixup(pfil_cksum_fixup(ip->ip_sum,
3171 oaddr[0], naddr[0], 0), oaddr[1], naddr[1], 0);
3172 *csum = pfil_cksum_fixup(pfil_cksum_fixup(*csum,
3173 oaddr[0], naddr[0], udp), oaddr[1], naddr[1], udp);
3175 addr->s_addr = x->xlat_addr;
3177 if (x->xlat_port != 0) {
3179 *csum = pfil_cksum_fixup(*csum, *port, x->xlat_port,
3182 *port = x->xlat_port;
3186 *csum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
3187 htons(dlen + ip->ip_p));
3192 ipfw_ip_xlate_dispatch(netmsg_t nmsg)
3194 struct netmsg_genpkt *nm = (struct netmsg_genpkt *)nmsg;
3195 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
3196 struct mbuf *m = nm->m;
3197 struct ipfw_xlat *x = nm->arg1;
3198 struct ip_fw *rule = x->xlat_rule;
3200 ASSERT_NETISR_NCPUS(mycpuid);
3201 KASSERT(rule->cpuid == mycpuid,
3202 ("rule does not belong to cpu%d", mycpuid));
3203 KASSERT(m->m_pkthdr.fw_flags & IPFW_MBUF_CONTINUE,
3204 ("mbuf does not have ipfw continue rule"));
3206 KASSERT(ctx->ipfw_cont_rule == NULL,
3207 ("pending ipfw continue rule"));
3208 KASSERT(ctx->ipfw_cont_xlat == NULL,
3209 ("pending ipfw continue xlat"));
3210 ctx->ipfw_cont_rule = rule;
3211 ctx->ipfw_cont_xlat = x;
3216 ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
3218 /* May not be cleared, if ipfw was unload/disabled. */
3219 ctx->ipfw_cont_rule = NULL;
3220 ctx->ipfw_cont_xlat = NULL;
3223 * This state is no longer used; decrement its xlat_crefs,
3224 * so this state can be deleted.
3228 * This rule is no longer used; decrement its cross_refs,
3229 * so this rule can be deleted.
3232 * Decrement cross_refs in the last step of this function,
3233 * so that the module could be unloaded safely.
3239 ipfw_xlate_redispatch(struct mbuf *m, int cpuid, struct ipfw_xlat *x,
3242 struct netmsg_genpkt *nm;
3244 KASSERT(x->xlat_pcpu == cpuid, ("xlat paired cpu%d, target cpu%d",
3245 x->xlat_pcpu, cpuid));
3248 * Bump cross_refs to prevent this rule and its siblings
3249 * from being deleted, while this mbuf is inflight. The
3250 * cross_refs of the sibling rule on the target cpu will
3251 * be decremented, once this mbuf is going to be filtered
3252 * on the target cpu.
3254 x->xlat_rule->cross_refs++;
3256 * Bump xlat_crefs to prevent this state and its paired
3257 * state from being deleted, while this mbuf is inflight.
3258 * The xlat_crefs of the paired state on the target cpu
3259 * will be decremented, once this mbuf is going to be
3260 * filtered on the target cpu.
3264 m->m_pkthdr.fw_flags |= IPFW_MBUF_CONTINUE;
3265 if (flags & IPFW_XLATE_INSERT)
3266 m->m_pkthdr.fw_flags |= IPFW_MBUF_XLATINS;
3267 if (flags & IPFW_XLATE_FORWARD)
3268 m->m_pkthdr.fw_flags |= IPFW_MBUF_XLATFWD;
3270 if ((flags & IPFW_XLATE_OUTPUT) == 0) {
3271 struct ip *ip = mtod(m, struct ip *);
3275 * ip_input() expects ip_len/ip_off are in network
3278 ip->ip_len = htons(ip->ip_len);
3279 ip->ip_off = htons(ip->ip_off);
3282 nm = &m->m_hdr.mh_genmsg;
3283 netmsg_init(&nm->base, NULL, &netisr_apanic_rport, 0,
3284 ipfw_ip_xlate_dispatch);
3286 nm->arg1 = x->xlat_pair;
3288 if (flags & IPFW_XLATE_OUTPUT)
3290 netisr_sendmsg(&nm->base, cpuid);
3293 static struct mbuf *
3294 ipfw_setup_local(struct mbuf *m, const int hlen, struct ip_fw_args *args,
3295 struct ip_fw_local *local, struct ip **ip0)
3297 struct ip *ip = mtod(m, struct ip *);
3302 * Collect parameters into local variables for faster matching.
3304 if (hlen == 0) { /* do not grab addresses for non-ip pkts */
3305 local->proto = args->f_id.proto = 0; /* mark f_id invalid */
3309 local->proto = args->f_id.proto = ip->ip_p;
3310 local->src_ip = ip->ip_src;
3311 local->dst_ip = ip->ip_dst;
3312 if (args->eh != NULL) { /* layer 2 packets are as on the wire */
3313 local->offset = ntohs(ip->ip_off) & IP_OFFMASK;
3314 local->ip_len = ntohs(ip->ip_len);
3316 local->offset = ip->ip_off & IP_OFFMASK;
3317 local->ip_len = ip->ip_len;
3320 #define PULLUP_TO(len) \
3322 if (m->m_len < (len)) { \
3323 args->m = m = m_pullup(m, (len)); \
3328 ip = mtod(m, struct ip *); \
3332 if (local->offset == 0) {
3333 switch (local->proto) {
3335 PULLUP_TO(hlen + sizeof(struct tcphdr));
3336 local->tcp = tcp = L3HDR(struct tcphdr, ip);
3337 local->dst_port = tcp->th_dport;
3338 local->src_port = tcp->th_sport;
3339 args->f_id.flags = tcp->th_flags;
3343 PULLUP_TO(hlen + sizeof(struct udphdr));
3344 udp = L3HDR(struct udphdr, ip);
3345 local->dst_port = udp->uh_dport;
3346 local->src_port = udp->uh_sport;
3350 PULLUP_TO(hlen + 4); /* type, code and checksum. */
3351 args->f_id.flags = L3HDR(struct icmp, ip)->icmp_type;
3361 args->f_id.src_ip = ntohl(local->src_ip.s_addr);
3362 args->f_id.dst_ip = ntohl(local->dst_ip.s_addr);
3363 args->f_id.src_port = local->src_port = ntohs(local->src_port);
3364 args->f_id.dst_port = local->dst_port = ntohs(local->dst_port);
3370 static struct mbuf *
3371 ipfw_rehashm(struct mbuf *m, const int hlen, struct ip_fw_args *args,
3372 struct ip_fw_local *local, struct ip **ip0)
3374 struct ip *ip = mtod(m, struct ip *);
3376 ip->ip_len = htons(ip->ip_len);
3377 ip->ip_off = htons(ip->ip_off);
3379 m->m_flags &= ~M_HASH;
3386 KASSERT(m->m_flags & M_HASH, ("no hash"));
3388 /* 'm' might be changed by ip_hashfn(). */
3389 ip = mtod(m, struct ip *);
3390 ip->ip_len = ntohs(ip->ip_len);
3391 ip->ip_off = ntohs(ip->ip_off);
3393 return (ipfw_setup_local(m, hlen, args, local, ip0));
3397 * The main check routine for the firewall.
3399 * All arguments are in args so we can modify them and return them
3400 * back to the caller.
3404 * args->m (in/out) The packet; we set to NULL when/if we nuke it.
3405 * Starts with the IP header.
3406 * args->eh (in) Mac header if present, or NULL for layer3 packet.
3407 * args->oif Outgoing interface, or NULL if packet is incoming.
3408 * The incoming interface is in the mbuf. (in)
3410 * args->rule Pointer to the last matching rule (in/out)
3411 * args->f_id Addresses grabbed from the packet (out)
3415 * If the packet was denied/rejected and has been dropped, *m is equal
3416 * to NULL upon return.
3418 * IP_FW_DENY the packet must be dropped.
3419 * IP_FW_PASS The packet is to be accepted and routed normally.
3420 * IP_FW_DIVERT Divert the packet to port (args->cookie)
3421 * IP_FW_TEE Tee the packet to port (args->cookie)
3422 * IP_FW_DUMMYNET Send the packet to pipe/queue (args->cookie)
3423 * IP_FW_CONTINUE Continue processing on another cpu.
3426 ipfw_chk(struct ip_fw_args *args)
3429 * Local variables hold state during the processing of a packet.
3431 * IMPORTANT NOTE: to speed up the processing of rules, there
3432 * are some assumption on the values of the variables, which
3433 * are documented here. Should you change them, please check
3434 * the implementation of the various instructions to make sure
3435 * that they still work.
3437 * args->eh The MAC header. It is non-null for a layer2
3438 * packet, it is NULL for a layer-3 packet.
3440 * m | args->m Pointer to the mbuf, as received from the caller.
3441 * It may change if ipfw_chk() does an m_pullup, or if it
3442 * consumes the packet because it calls send_reject().
3443 * XXX This has to change, so that ipfw_chk() never modifies
3444 * or consumes the buffer.
3445 * ip is simply an alias of the value of m, and it is kept
3446 * in sync with it (the packet is supposed to start with
3449 struct mbuf *m = args->m;
3450 struct ip *ip = mtod(m, struct ip *);
3453 * oif | args->oif If NULL, ipfw_chk has been called on the
3454 * inbound path (ether_input, ip_input).
3455 * If non-NULL, ipfw_chk has been called on the outbound path
3456 * (ether_output, ip_output).
3458 struct ifnet *oif = args->oif;
3460 struct ip_fw *f = NULL; /* matching rule */
3461 int retval = IP_FW_PASS;
3463 struct divert_info *divinfo;
3464 struct ipfw_state *s;
3467 * hlen The length of the IPv4 header.
3468 * hlen >0 means we have an IPv4 packet.
3470 u_int hlen = 0; /* hlen >0 means we have an IP pkt */
3472 struct ip_fw_local lc;
3475 * dyn_dir = MATCH_UNKNOWN when rules unchecked,
3476 * MATCH_NONE when checked and not matched (dyn_f = NULL),
3477 * MATCH_FORWARD or MATCH_REVERSE otherwise (dyn_f != NULL)
3479 int dyn_dir = MATCH_UNKNOWN;
3480 struct ip_fw *dyn_f = NULL;
3481 int cpuid = mycpuid;
3482 struct ipfw_context *ctx;
3484 ASSERT_NETISR_NCPUS(cpuid);
3485 ctx = ipfw_ctx[cpuid];
3487 if (m->m_pkthdr.fw_flags & IPFW_MBUF_GENERATED)
3488 return IP_FW_PASS; /* accept */
3490 if (args->eh == NULL || /* layer 3 packet */
3491 (m->m_pkthdr.len >= sizeof(struct ip) &&
3492 ntohs(args->eh->ether_type) == ETHERTYPE_IP))
3493 hlen = ip->ip_hl << 2;
3495 memset(&lc, 0, sizeof(lc));
3497 m = ipfw_setup_local(m, hlen, args, &lc, &ip);
3503 * Packet has already been tagged. Look for the next rule
3504 * to restart processing.
3506 * If fw_one_pass != 0 then just accept it.
3507 * XXX should not happen here, but optimized out in
3510 if (fw_one_pass && (args->flags & IP_FWARG_F_CONT) == 0)
3512 args->flags &= ~IP_FWARG_F_CONT;
3514 /* This rule is being/has been flushed */
3518 KASSERT(args->rule->cpuid == cpuid,
3519 ("rule used on cpu%d", cpuid));
3521 /* This rule was deleted */
3522 if (args->rule->rule_flags & IPFW_RULE_F_INVALID)
3525 if (args->xlat != NULL) {
3526 struct ipfw_xlat *x = args->xlat;
3528 /* This xlat is being deleted. */
3529 if (x->xlat_invalid)
3535 dyn_dir = (args->flags & IP_FWARG_F_XLATFWD) ?
3536 MATCH_FORWARD : MATCH_REVERSE;
3538 if (args->flags & IP_FWARG_F_XLATINS) {
3539 KASSERT(x->xlat_flags & IPFW_STATE_F_XLATSLAVE,
3540 ("not slave %u state", x->xlat_type));
3541 s = ipfw_state_link(ctx, &x->xlat_st);
3543 ctx->ipfw_xlate_conflicts++;
3544 if (IPFW_STATE_ISDEAD(s)) {
3545 ipfw_state_remove(ctx, s);
3546 s = ipfw_state_link(ctx,
3553 "conflicts %u state\n",
3557 ipfw_xlat_invalidate(x);
3560 ctx->ipfw_xlate_cresolved++;
3563 ipfw_state_update(&args->f_id, dyn_dir,
3564 lc.tcp, &x->xlat_st);
3567 /* TODO: setup dyn_f, dyn_dir */
3569 f = args->rule->next_rule;
3571 f = lookup_next_rule(args->rule);
3575 * Find the starting rule. It can be either the first
3576 * one, or the one after divert_rule if asked so.
3580 KKASSERT((args->flags &
3581 (IP_FWARG_F_XLATINS | IP_FWARG_F_CONT)) == 0);
3582 KKASSERT(args->xlat == NULL);
3584 mtag = m_tag_find(m, PACKET_TAG_IPFW_DIVERT, NULL);
3586 divinfo = m_tag_data(mtag);
3587 skipto = divinfo->skipto;
3592 f = ctx->ipfw_layer3_chain;
3593 if (args->eh == NULL && skipto != 0) {
3594 /* No skipto during rule flushing */
3598 if (skipto >= IPFW_DEFAULT_RULE)
3599 return IP_FW_DENY; /* invalid */
3601 while (f && f->rulenum <= skipto)
3603 if (f == NULL) /* drop packet */
3605 } else if (ipfw_flushing) {
3606 /* Rules are being flushed; skip to default rule */
3607 f = ctx->ipfw_default_rule;
3610 if ((mtag = m_tag_find(m, PACKET_TAG_IPFW_DIVERT, NULL)) != NULL)
3611 m_tag_delete(m, mtag);
3614 * Now scan the rules, and parse microinstructions for each rule.
3616 for (; f; f = f->next) {
3619 int skip_or; /* skip rest of OR block */
3622 if (ctx->ipfw_set_disable & (1 << f->set)) {
3627 if (args->xlat != NULL) {
3629 l = f->cmd_len - f->act_ofs;
3630 cmd = ACTION_PTR(f);
3637 for (; l > 0; l -= cmdlen, cmd += cmdlen) {
3641 * check_body is a jump target used when we find a
3642 * CHECK_STATE, and need to jump to the body of
3646 cmdlen = F_LEN(cmd);
3648 * An OR block (insn_1 || .. || insn_n) has the
3649 * F_OR bit set in all but the last instruction.
3650 * The first match will set "skip_or", and cause
3651 * the following instructions to be skipped until
3652 * past the one with the F_OR bit clear.
3654 if (skip_or) { /* skip this instruction */
3655 if ((cmd->len & F_OR) == 0)
3656 skip_or = 0; /* next one is good */
3659 match = 0; /* set to 1 if we succeed */
3661 switch (cmd->opcode) {
3663 * The first set of opcodes compares the packet's
3664 * fields with some pattern, setting 'match' if a
3665 * match is found. At the end of the loop there is
3666 * logic to deal with F_NOT and F_OR flags associated
3674 kprintf("ipfw: opcode %d unimplemented\n",
3681 * We only check offset == 0 && proto != 0,
3682 * as this ensures that we have an IPv4
3683 * packet with the ports info.
3688 match = ipfw_match_uid(&args->f_id, oif,
3690 (uid_t)((ipfw_insn_u32 *)cmd)->d[0]);
3694 match = iface_match(m->m_pkthdr.rcvif,
3695 (ipfw_insn_if *)cmd);
3699 match = iface_match(oif, (ipfw_insn_if *)cmd);
3703 match = iface_match(oif ? oif :
3704 m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd);
3708 if (args->eh != NULL) { /* have MAC header */
3709 uint32_t *want = (uint32_t *)
3710 ((ipfw_insn_mac *)cmd)->addr;
3711 uint32_t *mask = (uint32_t *)
3712 ((ipfw_insn_mac *)cmd)->mask;
3713 uint32_t *hdr = (uint32_t *)args->eh;
3716 (want[0] == (hdr[0] & mask[0]) &&
3717 want[1] == (hdr[1] & mask[1]) &&
3718 want[2] == (hdr[2] & mask[2]));
3723 if (args->eh != NULL) {
3725 ntohs(args->eh->ether_type);
3727 ((ipfw_insn_u16 *)cmd)->ports;
3730 /* Special vlan handling */
3731 if (m->m_flags & M_VLANTAG)
3734 for (i = cmdlen - 1; !match && i > 0;
3737 (t >= p[0] && t <= p[1]);
3743 match = (hlen > 0 && lc.offset != 0);
3750 if (args->eh != NULL)
3751 off = ntohs(ip->ip_off);
3754 if (off & (IP_MF | IP_OFFMASK))
3759 case O_IN: /* "out" is "not in" */
3760 match = (oif == NULL);
3764 match = (args->eh != NULL);
3769 * We do not allow an arg of 0 so the
3770 * check of "proto" only suffices.
3772 match = (lc.proto == cmd->arg1);
3776 match = (hlen > 0 &&
3777 ((ipfw_insn_ip *)cmd)->addr.s_addr ==
3782 match = (hlen > 0 &&
3783 ((ipfw_insn_ip *)cmd)->addr.s_addr ==
3785 ((ipfw_insn_ip *)cmd)->mask.s_addr));
3792 tif = INADDR_TO_IFP(&lc.src_ip);
3793 match = (tif != NULL);
3797 case O_IP_SRC_TABLE:
3798 match = ipfw_table_lookup(ctx, cmd->arg1,
3803 match = ipfw_match_ifip((ipfw_insn_ifip *)cmd,
3810 uint32_t *d = (uint32_t *)(cmd + 1);
3812 cmd->opcode == O_IP_DST_SET ?
3818 addr -= d[0]; /* subtract base */
3820 (addr < cmd->arg1) &&
3821 (d[1 + (addr >> 5)] &
3822 (1 << (addr & 0x1f)));
3827 match = (hlen > 0 &&
3828 ((ipfw_insn_ip *)cmd)->addr.s_addr ==
3833 match = (hlen > 0) &&
3834 (((ipfw_insn_ip *)cmd)->addr.s_addr ==
3836 ((ipfw_insn_ip *)cmd)->mask.s_addr));
3843 tif = INADDR_TO_IFP(&lc.dst_ip);
3844 match = (tif != NULL);
3848 case O_IP_DST_TABLE:
3849 match = ipfw_table_lookup(ctx, cmd->arg1,
3854 match = ipfw_match_ifip((ipfw_insn_ifip *)cmd,
3861 * offset == 0 && proto != 0 is enough
3862 * to guarantee that we have an IPv4
3863 * packet with port info.
3865 if ((lc.proto==IPPROTO_UDP ||
3866 lc.proto==IPPROTO_TCP)
3867 && lc.offset == 0) {
3869 (cmd->opcode == O_IP_SRCPORT) ?
3870 lc.src_port : lc.dst_port;
3872 ((ipfw_insn_u16 *)cmd)->ports;
3875 for (i = cmdlen - 1; !match && i > 0;
3878 (x >= p[0] && x <= p[1]);
3884 match = (lc.offset == 0 &&
3885 lc.proto==IPPROTO_ICMP &&
3886 icmptype_match(ip, (ipfw_insn_u32 *)cmd));
3890 match = (hlen > 0 && ipopts_match(ip, cmd));
3894 match = (hlen > 0 && cmd->arg1 == ip->ip_v);
3898 match = (hlen > 0 && cmd->arg1 == ip->ip_ttl);
3902 match = (hlen > 0 &&
3903 cmd->arg1 == ntohs(ip->ip_id));
3907 match = (hlen > 0 && cmd->arg1 == lc.ip_len);
3910 case O_IPPRECEDENCE:
3911 match = (hlen > 0 &&
3912 (cmd->arg1 == (ip->ip_tos & 0xe0)));
3916 match = (hlen > 0 &&
3917 flags_match(cmd, ip->ip_tos));
3921 match = (lc.proto == IPPROTO_TCP &&
3924 L3HDR(struct tcphdr,ip)->th_flags));
3928 match = (lc.proto == IPPROTO_TCP &&
3929 lc.offset == 0 && tcpopts_match(ip, cmd));
3933 match = (lc.proto == IPPROTO_TCP &&
3935 ((ipfw_insn_u32 *)cmd)->d[0] ==
3936 L3HDR(struct tcphdr,ip)->th_seq);
3940 match = (lc.proto == IPPROTO_TCP &&
3942 ((ipfw_insn_u32 *)cmd)->d[0] ==
3943 L3HDR(struct tcphdr,ip)->th_ack);
3947 match = (lc.proto == IPPROTO_TCP &&
3950 L3HDR(struct tcphdr,ip)->th_win);
3954 /* reject packets which have SYN only */
3955 /* XXX should i also check for TH_ACK ? */
3956 match = (lc.proto == IPPROTO_TCP &&
3958 (L3HDR(struct tcphdr,ip)->th_flags &
3959 (TH_RST | TH_ACK | TH_SYN)) != TH_SYN);
3964 ipfw_log(ctx, f, hlen, args->eh, m,
3971 match = (krandom() <
3972 ((ipfw_insn_u32 *)cmd)->d[0]);
3976 * The second set of opcodes represents 'actions',
3977 * i.e. the terminal part of a rule once the packet
3978 * matches all previous patterns.
3979 * Typically there is only one action for each rule,
3980 * and the opcode is stored at the end of the rule
3981 * (but there are exceptions -- see below).
3983 * In general, here we set retval and terminate the
3984 * outer loop (would be a 'break 3' in some language,
3985 * but we need to do a 'goto done').
3988 * O_COUNT and O_SKIPTO actions:
3989 * instead of terminating, we jump to the next rule
3990 * ('goto next_rule', equivalent to a 'break 2'),
3991 * or to the SKIPTO target ('goto again' after
3992 * having set f, cmd and l), respectively.
3994 * O_LIMIT and O_KEEP_STATE, O_REDIRECT: these opcodes
3995 * are not real 'actions', and are stored right
3996 * before the 'action' part of the rule.
3997 * These opcodes try to install an entry in the
3998 * state tables; if successful, we continue with
3999 * the next opcode (match=1; break;), otherwise
4000 * the packet must be dropped ('goto done' after
4001 * setting retval). If static rules are changed
4002 * during the state installation, the packet will
4003 * be dropped and rule's stats will not beupdated
4004 * ('return IP_FW_DENY').
4006 * O_PROBE_STATE and O_CHECK_STATE: these opcodes
4007 * cause a lookup of the state table, and a jump
4008 * to the 'action' part of the parent rule
4009 * ('goto check_body') if an entry is found, or
4010 * (CHECK_STATE only) a jump to the next rule if
4011 * the entry is not found ('goto next_rule').
4012 * The result of the lookup is cached to make
4013 * further instances of these opcodes are
4014 * effectively NOPs. If static rules are changed
4015 * during the state looking up, the packet will
4016 * be dropped and rule's stats will not be updated
4017 * ('return IP_FW_DENY').
4020 if (f->cross_rules == NULL) {
4022 * This rule was not completely setup;
4023 * move on to the next rule.
4028 * Apply redirect only on input path and
4029 * only to non-fragment TCP segments or
4032 * Does _not_ work with layer2 filtering.
4034 if (oif != NULL || args->eh != NULL ||
4035 (ip->ip_off & (IP_MF | IP_OFFMASK)) ||
4036 (lc.proto != IPPROTO_TCP &&
4037 lc.proto != IPPROTO_UDP))
4044 s = ipfw_state_install(ctx, f,
4045 (ipfw_insn_limit *)cmd, args, lc.tcp);
4047 retval = IP_FW_DENY;
4048 goto done; /* error/limit violation */
4051 s->st_bcnt += lc.ip_len;
4053 if (s->st_type == O_REDIRECT) {
4054 struct in_addr oaddr;
4056 struct ipfw_xlat *slave_x, *x;
4057 struct ipfw_state *dup;
4059 x = (struct ipfw_xlat *)s;
4060 ipfw_xlate(x, m, &oaddr, &oport);
4061 m = ipfw_rehashm(m, hlen, args, &lc,
4064 ipfw_state_del(ctx, s);
4068 cpuid = netisr_hashcpu(
4071 slave_x = (struct ipfw_xlat *)
4072 ipfw_state_alloc(ctx, &args->f_id,
4073 O_REDIRECT, f->cross_rules[cpuid],
4075 if (slave_x == NULL) {
4076 ipfw_state_del(ctx, s);
4077 retval = IP_FW_DENY;
4080 slave_x->xlat_addr = oaddr.s_addr;
4081 slave_x->xlat_port = oport;
4082 slave_x->xlat_dir = MATCH_REVERSE;
4083 slave_x->xlat_flags |=
4084 IPFW_STATE_F_XLATSRC |
4085 IPFW_STATE_F_XLATSLAVE;
4087 slave_x->xlat_pair = x;
4088 slave_x->xlat_pcpu = mycpuid;
4089 x->xlat_pair = slave_x;
4090 x->xlat_pcpu = cpuid;
4093 if (cpuid != mycpuid) {
4094 ctx->ipfw_xlate_split++;
4095 ipfw_xlate_redispatch(
4098 IPFW_XLATE_FORWARD);
4100 return (IP_FW_REDISPATCH);
4103 dup = ipfw_state_link(ctx,
4106 ctx->ipfw_xlate_conflicts++;
4107 if (IPFW_STATE_ISDEAD(dup)) {
4108 ipfw_state_remove(ctx,
4110 dup = ipfw_state_link(
4111 ctx, &slave_x->xlat_st);
4122 ipfw_state_del(ctx, s);
4123 return (IP_FW_DENY);
4125 ctx->ipfw_xlate_cresolved++;
4134 * States are checked at the first keep-state
4135 * check-state occurrence, with the result
4136 * being stored in dyn_dir. The compiler
4137 * introduces a PROBE_STATE instruction for
4138 * us when we have a KEEP_STATE/LIMIT/RDR
4139 * (because PROBE_STATE needs to be run first).
4142 if (dyn_dir == MATCH_UNKNOWN) {
4143 s = ipfw_state_lookup(ctx,
4144 &args->f_id, &dyn_dir, lc.tcp);
4147 (s->st_type == O_REDIRECT &&
4148 (args->eh != NULL ||
4149 (ip->ip_off & (IP_MF | IP_OFFMASK)) ||
4150 (lc.proto != IPPROTO_TCP &&
4151 lc.proto != IPPROTO_UDP)))) {
4153 * State not found. If CHECK_STATE,
4154 * skip to next rule, if PROBE_STATE
4155 * just ignore and continue with next
4158 if (cmd->opcode == O_CHECK_STATE)
4165 s->st_bcnt += lc.ip_len;
4167 if (s->st_type == O_REDIRECT) {
4168 struct ipfw_xlat *x =
4169 (struct ipfw_xlat *)s;
4172 x->xlat_ifp == NULL) {
4173 KASSERT(x->xlat_flags &
4174 IPFW_STATE_F_XLATSLAVE,
4175 ("master rdr state "
4179 (oif != NULL && x->xlat_ifp!=oif) ||
4181 x->xlat_ifp!=m->m_pkthdr.rcvif)) {
4182 retval = IP_FW_DENY;
4185 if (x->xlat_dir != dyn_dir)
4188 ipfw_xlate(x, m, NULL, NULL);
4189 m = ipfw_rehashm(m, hlen, args, &lc,
4194 cpuid = netisr_hashcpu(
4196 if (cpuid != mycpuid) {
4203 if (dyn_dir == MATCH_FORWARD) {
4207 ipfw_xlate_redispatch(m, cpuid,
4210 return (IP_FW_REDISPATCH);
4213 KKASSERT(x->xlat_pcpu == mycpuid);
4214 ipfw_state_update(&args->f_id, dyn_dir,
4215 lc.tcp, &x->xlat_pair->xlat_st);
4219 * Found a rule from a state; jump to the
4220 * 'action' part of the rule.
4223 KKASSERT(f->cpuid == mycpuid);
4225 cmd = ACTION_PTR(f);
4226 l = f->cmd_len - f->act_ofs;
4231 retval = IP_FW_PASS; /* accept */
4235 if (f->cross_rules == NULL) {
4237 * This rule was not completely setup;
4238 * move on to the next rule.
4244 * Don't defrag for l2 packets, output packets
4247 if (oif != NULL || args->eh != NULL ||
4248 (ip->ip_off & (IP_MF | IP_OFFMASK)) == 0)
4255 retval = IP_FW_PASS;
4258 ctx->ipfw_defraged++;
4259 KASSERT((m->m_flags & M_HASH) == 0,
4260 ("hash not cleared"));
4262 /* Update statistics */
4264 f->bcnt += lc.ip_len;
4265 f->timestamp = time_second;
4267 ip = mtod(m, struct ip *);
4268 hlen = ip->ip_hl << 2;
4271 ip->ip_len = htons(ip->ip_len);
4272 ip->ip_off = htons(ip->ip_off);
4279 KASSERT(m->m_flags & M_HASH, ("no hash"));
4280 cpuid = netisr_hashcpu(m->m_pkthdr.hash);
4281 if (cpuid != mycpuid) {
4284 * ip_len/ip_off are in network byte
4287 ctx->ipfw_defrag_remote++;
4288 ipfw_defrag_redispatch(m, cpuid, f);
4290 return (IP_FW_REDISPATCH);
4293 /* 'm' might be changed by ip_hashfn(). */
4294 ip = mtod(m, struct ip *);
4295 ip->ip_len = ntohs(ip->ip_len);
4296 ip->ip_off = ntohs(ip->ip_off);
4298 m = ipfw_setup_local(m, hlen, args, &lc, &ip);
4307 args->rule = f; /* report matching rule */
4308 args->cookie = cmd->arg1;
4309 retval = IP_FW_DUMMYNET;
4314 if (args->eh) /* not on layer 2 */
4317 mtag = m_tag_get(PACKET_TAG_IPFW_DIVERT,
4318 sizeof(*divinfo), M_INTWAIT | M_NULLOK);
4320 retval = IP_FW_DENY;
4323 divinfo = m_tag_data(mtag);
4325 divinfo->skipto = f->rulenum;
4326 divinfo->port = cmd->arg1;
4327 divinfo->tee = (cmd->opcode == O_TEE);
4328 m_tag_prepend(m, mtag);
4330 args->cookie = cmd->arg1;
4331 retval = (cmd->opcode == O_DIVERT) ?
4332 IP_FW_DIVERT : IP_FW_TEE;
4337 f->pcnt++; /* update stats */
4338 f->bcnt += lc.ip_len;
4339 f->timestamp = time_second;
4340 if (cmd->opcode == O_COUNT)
4343 if (f->next_rule == NULL)
4344 lookup_next_rule(f);
4350 * Drop the packet and send a reject notice
4351 * if the packet is not ICMP (or is an ICMP
4352 * query), and it is not multicast/broadcast.
4355 (lc.proto != IPPROTO_ICMP ||
4356 is_icmp_query(ip)) &&
4357 !(m->m_flags & (M_BCAST|M_MCAST)) &&
4358 !IN_MULTICAST(ntohl(lc.dst_ip.s_addr))) {
4359 send_reject(args, cmd->arg1,
4360 lc.offset, lc.ip_len);
4361 retval = IP_FW_DENY;
4366 retval = IP_FW_DENY;
4370 if (args->eh) /* not valid on layer2 pkts */
4372 if (!dyn_f || dyn_dir == MATCH_FORWARD) {
4373 struct sockaddr_in *sin;
4375 mtag = m_tag_get(PACKET_TAG_IPFORWARD,
4376 sizeof(*sin), M_INTWAIT | M_NULLOK);
4378 retval = IP_FW_DENY;
4381 sin = m_tag_data(mtag);
4383 /* Structure copy */
4384 *sin = ((ipfw_insn_sa *)cmd)->sa;
4386 m_tag_prepend(m, mtag);
4387 m->m_pkthdr.fw_flags |=
4388 IPFORWARD_MBUF_TAGGED;
4389 m->m_pkthdr.fw_flags &=
4390 ~BRIDGE_MBUF_TAGGED;
4392 retval = IP_FW_PASS;
4396 panic("-- unknown opcode %d", cmd->opcode);
4397 } /* end of switch() on opcodes */
4399 if (cmd->len & F_NOT)
4403 if (cmd->len & F_OR)
4406 if (!(cmd->len & F_OR)) /* not an OR block, */
4407 break; /* try next rule */
4410 } /* end of inner for, scan opcodes */
4412 next_rule:; /* try next rule */
4414 } /* end of outer for, scan rules */
4415 kprintf("+++ ipfw: ouch!, skip past end of rules, denying packet\n");
4419 /* Update statistics */
4421 f->bcnt += lc.ip_len;
4422 f->timestamp = time_second;
4427 kprintf("pullup failed\n");
4431 static struct mbuf *
4432 ipfw_dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa)
4437 const struct ipfw_flow_id *id;
4438 struct dn_flow_id *fid;
4442 mtag = m_tag_get(PACKET_TAG_DUMMYNET, sizeof(*pkt),
4443 M_INTWAIT | M_NULLOK);
4448 m_tag_prepend(m, mtag);
4450 pkt = m_tag_data(mtag);
4451 bzero(pkt, sizeof(*pkt));
4453 cmd = fwa->rule->cmd + fwa->rule->act_ofs;
4454 if (cmd->opcode == O_LOG)
4456 KASSERT(cmd->opcode == O_PIPE || cmd->opcode == O_QUEUE,
4457 ("Rule is not PIPE or QUEUE, opcode %d", cmd->opcode));
4460 pkt->dn_flags = (dir & DN_FLAGS_DIR_MASK);
4461 pkt->ifp = fwa->oif;
4462 pkt->pipe_nr = pipe_nr;
4464 pkt->cpuid = mycpuid;
4465 pkt->msgport = netisr_curport();
4469 fid->fid_dst_ip = id->dst_ip;
4470 fid->fid_src_ip = id->src_ip;
4471 fid->fid_dst_port = id->dst_port;
4472 fid->fid_src_port = id->src_port;
4473 fid->fid_proto = id->proto;
4474 fid->fid_flags = id->flags;
4476 ipfw_ref_rule(fwa->rule);
4477 pkt->dn_priv = fwa->rule;
4478 pkt->dn_unref_priv = ipfw_unref_rule;
4480 if (cmd->opcode == O_PIPE)
4481 pkt->dn_flags |= DN_FLAGS_IS_PIPE;
4483 m->m_pkthdr.fw_flags |= DUMMYNET_MBUF_TAGGED;
4488 * When a rule is added/deleted, clear the next_rule pointers in all rules.
4489 * These will be reconstructed on the fly as packets are matched.
4492 ipfw_flush_rule_ptrs(struct ipfw_context *ctx)
4496 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next)
4497 rule->next_rule = NULL;
4501 ipfw_inc_static_count(struct ip_fw *rule)
4503 /* Static rule's counts are updated only on CPU0 */
4504 KKASSERT(mycpuid == 0);
4507 static_ioc_len += IOC_RULESIZE(rule);
4511 ipfw_dec_static_count(struct ip_fw *rule)
4513 int l = IOC_RULESIZE(rule);
4515 /* Static rule's counts are updated only on CPU0 */
4516 KKASSERT(mycpuid == 0);
4518 KASSERT(static_count > 0, ("invalid static count %u", static_count));
4521 KASSERT(static_ioc_len >= l,
4522 ("invalid static len %u", static_ioc_len));
4523 static_ioc_len -= l;
4527 ipfw_link_sibling(struct netmsg_ipfw *fwmsg, struct ip_fw *rule)
4529 if (fwmsg->sibling != NULL) {
4530 KKASSERT(mycpuid > 0 && fwmsg->sibling->cpuid == mycpuid - 1);
4531 fwmsg->sibling->sibling = rule;
4533 fwmsg->sibling = rule;
4536 static struct ip_fw *
4537 ipfw_create_rule(const struct ipfw_ioc_rule *ioc_rule, uint32_t rule_flags)
4541 rule = kmalloc(RULESIZE(ioc_rule), M_IPFW, M_WAITOK | M_ZERO);
4543 rule->act_ofs = ioc_rule->act_ofs;
4544 rule->cmd_len = ioc_rule->cmd_len;
4545 rule->rulenum = ioc_rule->rulenum;
4546 rule->set = ioc_rule->set;
4547 rule->usr_flags = ioc_rule->usr_flags;
4549 bcopy(ioc_rule->cmd, rule->cmd, rule->cmd_len * 4 /* XXX */);
4552 rule->cpuid = mycpuid;
4553 rule->rule_flags = rule_flags;
4559 ipfw_add_rule_dispatch(netmsg_t nmsg)
4561 struct netmsg_ipfw *fwmsg = (struct netmsg_ipfw *)nmsg;
4562 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
4565 ASSERT_NETISR_NCPUS(mycpuid);
4567 rule = ipfw_create_rule(fwmsg->ioc_rule, fwmsg->rule_flags);
4570 * Insert rule into the pre-determined position
4572 if (fwmsg->prev_rule != NULL) {
4573 struct ip_fw *prev, *next;
4575 prev = fwmsg->prev_rule;
4576 KKASSERT(prev->cpuid == mycpuid);
4578 next = fwmsg->next_rule;
4579 KKASSERT(next->cpuid == mycpuid);
4585 * Move to the position on the next CPU
4586 * before the msg is forwarded.
4588 fwmsg->prev_rule = prev->sibling;
4589 fwmsg->next_rule = next->sibling;
4591 KKASSERT(fwmsg->next_rule == NULL);
4592 rule->next = ctx->ipfw_layer3_chain;
4593 ctx->ipfw_layer3_chain = rule;
4596 /* Link rule CPU sibling */
4597 ipfw_link_sibling(fwmsg, rule);
4599 ipfw_flush_rule_ptrs(ctx);
4602 /* Statistics only need to be updated once */
4603 ipfw_inc_static_count(rule);
4605 /* Return the rule on CPU0 */
4606 nmsg->lmsg.u.ms_resultp = rule;
4609 if (rule->rule_flags & IPFW_RULE_F_GENTRACK)
4610 rule->track_ruleid = (uintptr_t)nmsg->lmsg.u.ms_resultp;
4612 if (fwmsg->cross_rules != NULL) {
4613 /* Save rules for later use. */
4614 fwmsg->cross_rules[mycpuid] = rule;
4617 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
4621 ipfw_crossref_rule_dispatch(netmsg_t nmsg)
4623 struct netmsg_ipfw *fwmsg = (struct netmsg_ipfw *)nmsg;
4624 struct ip_fw *rule = fwmsg->sibling;
4625 int sz = sizeof(struct ip_fw *) * netisr_ncpus;
4627 ASSERT_NETISR_NCPUS(mycpuid);
4628 KASSERT(rule->rule_flags & IPFW_RULE_F_CROSSREF,
4629 ("not crossref rule"));
4631 rule->cross_rules = kmalloc(sz, M_IPFW, M_WAITOK);
4632 memcpy(rule->cross_rules, fwmsg->cross_rules, sz);
4634 fwmsg->sibling = rule->sibling;
4635 netisr_forwardmsg(&fwmsg->base, mycpuid + 1);
4639 * Add a new rule to the list. Copy the rule into a malloc'ed area,
4640 * then possibly create a rule number and add the rule to the list.
4641 * Update the rule_number in the input struct so the caller knows
4645 ipfw_add_rule(struct ipfw_ioc_rule *ioc_rule, uint32_t rule_flags)
4647 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
4648 struct netmsg_ipfw fwmsg;
4649 struct ip_fw *f, *prev, *rule;
4654 * If rulenum is 0, find highest numbered rule before the
4655 * default rule, and add rule number incremental step.
4657 if (ioc_rule->rulenum == 0) {
4658 int step = autoinc_step;
4660 KKASSERT(step >= IPFW_AUTOINC_STEP_MIN &&
4661 step <= IPFW_AUTOINC_STEP_MAX);
4664 * Locate the highest numbered rule before default
4666 for (f = ctx->ipfw_layer3_chain; f; f = f->next) {
4667 if (f->rulenum == IPFW_DEFAULT_RULE)
4669 ioc_rule->rulenum = f->rulenum;
4671 if (ioc_rule->rulenum < IPFW_DEFAULT_RULE - step)
4672 ioc_rule->rulenum += step;
4674 KASSERT(ioc_rule->rulenum != IPFW_DEFAULT_RULE &&
4675 ioc_rule->rulenum != 0,
4676 ("invalid rule num %d", ioc_rule->rulenum));
4679 * Now find the right place for the new rule in the sorted list.
4681 for (prev = NULL, f = ctx->ipfw_layer3_chain; f;
4682 prev = f, f = f->next) {
4683 if (f->rulenum > ioc_rule->rulenum) {
4684 /* Found the location */
4688 KASSERT(f != NULL, ("no default rule?!"));
4691 * Duplicate the rule onto each CPU.
4692 * The rule duplicated on CPU0 will be returned.
4694 bzero(&fwmsg, sizeof(fwmsg));
4695 netmsg_init(&fwmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY,
4696 ipfw_add_rule_dispatch);
4697 fwmsg.ioc_rule = ioc_rule;
4698 fwmsg.prev_rule = prev;
4699 fwmsg.next_rule = prev == NULL ? NULL : f;
4700 fwmsg.rule_flags = rule_flags;
4701 if (rule_flags & IPFW_RULE_F_CROSSREF) {
4702 fwmsg.cross_rules = kmalloc(
4703 sizeof(struct ip_fw *) * netisr_ncpus, M_TEMP,
4707 netisr_domsg_global(&fwmsg.base);
4708 KKASSERT(fwmsg.prev_rule == NULL && fwmsg.next_rule == NULL);
4710 rule = fwmsg.base.lmsg.u.ms_resultp;
4711 KKASSERT(rule != NULL && rule->cpuid == mycpuid);
4713 if (fwmsg.cross_rules != NULL) {
4714 netmsg_init(&fwmsg.base, NULL, &curthread->td_msgport,
4715 MSGF_PRIORITY, ipfw_crossref_rule_dispatch);
4716 fwmsg.sibling = rule;
4717 netisr_domsg_global(&fwmsg.base);
4718 KKASSERT(fwmsg.sibling == NULL);
4720 kfree(fwmsg.cross_rules, M_TEMP);
4723 atomic_add_int(&ipfw_gd.ipfw_refcnt, 1);
4727 DPRINTF("++ installed rule %d, static count now %d\n",
4728 rule->rulenum, static_count);
4732 * Free storage associated with a static rule (including derived
4734 * The caller is in charge of clearing rule pointers to avoid
4735 * dangling pointers.
4736 * @return a pointer to the next entry.
4737 * Arguments are not checked, so they better be correct.
4739 static struct ip_fw *
4740 ipfw_delete_rule(struct ipfw_context *ctx,
4741 struct ip_fw *prev, struct ip_fw *rule)
4747 ctx->ipfw_layer3_chain = n;
4751 /* Mark the rule as invalid */
4752 rule->rule_flags |= IPFW_RULE_F_INVALID;
4753 rule->next_rule = NULL;
4754 rule->sibling = NULL;
4756 /* Don't reset cpuid here; keep various assertion working */
4760 /* Statistics only need to be updated once */
4762 ipfw_dec_static_count(rule);
4764 if ((rule->rule_flags & IPFW_RULE_F_CROSSREF) == 0) {
4765 /* Try to free this rule */
4766 ipfw_free_rule(rule);
4768 /* TODO: check staging area. */
4770 rule->next = ipfw_gd.ipfw_crossref_free;
4771 ipfw_gd.ipfw_crossref_free = rule;
4775 /* Return the next rule */
4780 ipfw_flush_dispatch(netmsg_t nmsg)
4782 int kill_default = nmsg->lmsg.u.ms_result;
4783 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
4786 ASSERT_NETISR_NCPUS(mycpuid);
4791 ipfw_state_flush(ctx, NULL);
4792 KASSERT(ctx->ipfw_state_cnt == 0,
4793 ("%d pcpu states remain", ctx->ipfw_state_cnt));
4794 ctx->ipfw_state_loosecnt = 0;
4795 ctx->ipfw_state_lastexp = 0;
4800 ipfw_track_flush(ctx, NULL);
4801 ctx->ipfw_track_lastexp = 0;
4802 if (ctx->ipfw_trkcnt_spare != NULL) {
4803 kfree(ctx->ipfw_trkcnt_spare, M_IPFW);
4804 ctx->ipfw_trkcnt_spare = NULL;
4807 ipfw_flush_rule_ptrs(ctx); /* more efficient to do outside the loop */
4809 while ((rule = ctx->ipfw_layer3_chain) != NULL &&
4810 (kill_default || rule->rulenum != IPFW_DEFAULT_RULE))
4811 ipfw_delete_rule(ctx, NULL, rule);
4813 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
4817 * Deletes all rules from a chain (including the default rule
4818 * if the second argument is set).
4821 ipfw_flush(int kill_default)
4823 struct netmsg_base nmsg;
4825 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
4832 * If 'kill_default' then caller has done the necessary
4833 * msgport syncing; unnecessary to do it again.
4835 if (!kill_default) {
4837 * Let ipfw_chk() know the rules are going to
4838 * be flushed, so it could jump directly to
4842 /* XXX use priority sync */
4843 netmsg_service_sync();
4847 * Press the 'flush' button
4849 bzero(&nmsg, sizeof(nmsg));
4850 netmsg_init(&nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY,
4851 ipfw_flush_dispatch);
4852 nmsg.lmsg.u.ms_result = kill_default;
4853 netisr_domsg_global(&nmsg);
4854 ipfw_gd.ipfw_state_loosecnt = 0;
4855 ipfw_gd.ipfw_state_globexp = 0;
4856 ipfw_gd.ipfw_track_globexp = 0;
4859 state_cnt = ipfw_state_cntcoll();
4860 KASSERT(state_cnt == 0, ("%d states remain", state_cnt));
4862 KASSERT(ipfw_gd.ipfw_trkcnt_cnt == 0,
4863 ("%d trkcnts remain", ipfw_gd.ipfw_trkcnt_cnt));
4866 KASSERT(static_count == 0,
4867 ("%u static rules remain", static_count));
4868 KASSERT(static_ioc_len == 0,
4869 ("%u bytes of static rules remain", static_ioc_len));
4871 KASSERT(static_count == 1,
4872 ("%u static rules remain", static_count));
4873 KASSERT(static_ioc_len == IOC_RULESIZE(ctx->ipfw_default_rule),
4874 ("%u bytes of static rules remain, should be %lu",
4876 (u_long)IOC_RULESIZE(ctx->ipfw_default_rule)));
4885 ipfw_alt_delete_rule_dispatch(netmsg_t nmsg)
4887 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg;
4888 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
4889 struct ip_fw *rule, *prev;
4891 ASSERT_NETISR_NCPUS(mycpuid);
4893 rule = dmsg->start_rule;
4894 KKASSERT(rule->cpuid == mycpuid);
4895 dmsg->start_rule = rule->sibling;
4897 prev = dmsg->prev_rule;
4899 KKASSERT(prev->cpuid == mycpuid);
4902 * Move to the position on the next CPU
4903 * before the msg is forwarded.
4905 dmsg->prev_rule = prev->sibling;
4909 * flush pointers outside the loop, then delete all matching
4910 * rules. 'prev' remains the same throughout the cycle.
4912 ipfw_flush_rule_ptrs(ctx);
4913 while (rule && rule->rulenum == dmsg->rulenum) {
4914 if (rule->rule_flags & IPFW_RULE_F_GENSTATE) {
4915 /* Flush states generated by this rule. */
4916 ipfw_state_flush(ctx, rule);
4918 if (rule->rule_flags & IPFW_RULE_F_GENTRACK) {
4919 /* Flush tracks generated by this rule. */
4920 ipfw_track_flush(ctx, rule);
4922 rule = ipfw_delete_rule(ctx, prev, rule);
4925 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
4929 ipfw_alt_delete_rule(uint16_t rulenum)
4931 struct ip_fw *prev, *rule;
4932 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
4933 struct netmsg_del dmsg;
4938 * Locate first rule to delete
4940 for (prev = NULL, rule = ctx->ipfw_layer3_chain;
4941 rule && rule->rulenum < rulenum;
4942 prev = rule, rule = rule->next)
4944 if (rule->rulenum != rulenum)
4948 * Get rid of the rule duplications on all CPUs
4950 bzero(&dmsg, sizeof(dmsg));
4951 netmsg_init(&dmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY,
4952 ipfw_alt_delete_rule_dispatch);
4953 dmsg.prev_rule = prev;
4954 dmsg.start_rule = rule;
4955 dmsg.rulenum = rulenum;
4957 netisr_domsg_global(&dmsg.base);
4958 KKASSERT(dmsg.prev_rule == NULL && dmsg.start_rule == NULL);
4963 ipfw_alt_delete_ruleset_dispatch(netmsg_t nmsg)
4965 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg;
4966 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
4967 struct ip_fw *prev, *rule;
4972 ASSERT_NETISR_NCPUS(mycpuid);
4974 ipfw_flush_rule_ptrs(ctx);
4977 rule = ctx->ipfw_layer3_chain;
4978 while (rule != NULL) {
4979 if (rule->set == dmsg->from_set) {
4980 if (rule->rule_flags & IPFW_RULE_F_GENSTATE) {
4981 /* Flush states generated by this rule. */
4982 ipfw_state_flush(ctx, rule);
4984 if (rule->rule_flags & IPFW_RULE_F_GENTRACK) {
4985 /* Flush tracks generated by this rule. */
4986 ipfw_track_flush(ctx, rule);
4988 rule = ipfw_delete_rule(ctx, prev, rule);
4997 KASSERT(del, ("no match set?!"));
4999 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
5003 ipfw_alt_delete_ruleset(uint8_t set)
5005 struct netmsg_del dmsg;
5008 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5013 * Check whether the 'set' exists. If it exists,
5014 * then check whether any rules within the set will
5015 * try to create states.
5018 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) {
5019 if (rule->set == set)
5023 return 0; /* XXX EINVAL? */
5028 bzero(&dmsg, sizeof(dmsg));
5029 netmsg_init(&dmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY,
5030 ipfw_alt_delete_ruleset_dispatch);
5031 dmsg.from_set = set;
5032 netisr_domsg_global(&dmsg.base);
5038 ipfw_alt_move_rule_dispatch(netmsg_t nmsg)
5040 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg;
5043 ASSERT_NETISR_NCPUS(mycpuid);
5045 rule = dmsg->start_rule;
5046 KKASSERT(rule->cpuid == mycpuid);
5049 * Move to the position on the next CPU
5050 * before the msg is forwarded.
5052 dmsg->start_rule = rule->sibling;
5054 while (rule && rule->rulenum <= dmsg->rulenum) {
5055 if (rule->rulenum == dmsg->rulenum)
5056 rule->set = dmsg->to_set;
5059 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
5063 ipfw_alt_move_rule(uint16_t rulenum, uint8_t set)
5065 struct netmsg_del dmsg;
5066 struct netmsg_base *nmsg;
5068 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5073 * Locate first rule to move
5075 for (rule = ctx->ipfw_layer3_chain; rule && rule->rulenum <= rulenum;
5076 rule = rule->next) {
5077 if (rule->rulenum == rulenum && rule->set != set)
5080 if (rule == NULL || rule->rulenum > rulenum)
5081 return 0; /* XXX error? */
5083 bzero(&dmsg, sizeof(dmsg));
5085 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY,
5086 ipfw_alt_move_rule_dispatch);
5087 dmsg.start_rule = rule;
5088 dmsg.rulenum = rulenum;
5091 netisr_domsg_global(nmsg);
5092 KKASSERT(dmsg.start_rule == NULL);
5097 ipfw_alt_move_ruleset_dispatch(netmsg_t nmsg)
5099 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg;
5100 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5103 ASSERT_NETISR_NCPUS(mycpuid);
5105 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) {
5106 if (rule->set == dmsg->from_set)
5107 rule->set = dmsg->to_set;
5109 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
5113 ipfw_alt_move_ruleset(uint8_t from_set, uint8_t to_set)
5115 struct netmsg_del dmsg;
5116 struct netmsg_base *nmsg;
5120 bzero(&dmsg, sizeof(dmsg));
5122 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY,
5123 ipfw_alt_move_ruleset_dispatch);
5124 dmsg.from_set = from_set;
5125 dmsg.to_set = to_set;
5127 netisr_domsg_global(nmsg);
5132 ipfw_alt_swap_ruleset_dispatch(netmsg_t nmsg)
5134 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg;
5135 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5138 ASSERT_NETISR_NCPUS(mycpuid);
5140 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) {
5141 if (rule->set == dmsg->from_set)
5142 rule->set = dmsg->to_set;
5143 else if (rule->set == dmsg->to_set)
5144 rule->set = dmsg->from_set;
5146 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
5150 ipfw_alt_swap_ruleset(uint8_t set1, uint8_t set2)
5152 struct netmsg_del dmsg;
5153 struct netmsg_base *nmsg;
5157 bzero(&dmsg, sizeof(dmsg));
5159 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY,
5160 ipfw_alt_swap_ruleset_dispatch);
5161 dmsg.from_set = set1;
5164 netisr_domsg_global(nmsg);
5169 * Remove all rules with given number, and also do set manipulation.
5171 * The argument is an uint32_t. The low 16 bit are the rule or set number,
5172 * the next 8 bits are the new set, the top 8 bits are the command:
5174 * 0 delete rules with given number
5175 * 1 delete rules with given set number
5176 * 2 move rules with given number to new set
5177 * 3 move rules with given set number to new set
5178 * 4 swap sets with given numbers
5181 ipfw_ctl_alter(uint32_t arg)
5184 uint8_t cmd, new_set;
5189 rulenum = arg & 0xffff;
5190 cmd = (arg >> 24) & 0xff;
5191 new_set = (arg >> 16) & 0xff;
5195 if (new_set >= IPFW_DEFAULT_SET)
5197 if (cmd == 0 || cmd == 2) {
5198 if (rulenum == IPFW_DEFAULT_RULE)
5201 if (rulenum >= IPFW_DEFAULT_SET)
5206 case 0: /* delete rules with given number */
5207 error = ipfw_alt_delete_rule(rulenum);
5210 case 1: /* delete all rules with given set number */
5211 error = ipfw_alt_delete_ruleset(rulenum);
5214 case 2: /* move rules with given number to new set */
5215 error = ipfw_alt_move_rule(rulenum, new_set);
5218 case 3: /* move rules with given set number to new set */
5219 error = ipfw_alt_move_ruleset(rulenum, new_set);
5222 case 4: /* swap two sets */
5223 error = ipfw_alt_swap_ruleset(rulenum, new_set);
5230 * Clear counters for a specific rule.
5233 clear_counters(struct ip_fw *rule, int log_only)
5235 ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule);
5237 if (log_only == 0) {
5238 rule->bcnt = rule->pcnt = 0;
5239 rule->timestamp = 0;
5241 if (l->o.opcode == O_LOG)
5242 l->log_left = l->max_log;
5246 ipfw_zero_entry_dispatch(netmsg_t nmsg)
5248 struct netmsg_zent *zmsg = (struct netmsg_zent *)nmsg;
5249 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5252 ASSERT_NETISR_NCPUS(mycpuid);
5254 if (zmsg->rulenum == 0) {
5255 KKASSERT(zmsg->start_rule == NULL);
5257 ctx->ipfw_norule_counter = 0;
5258 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next)
5259 clear_counters(rule, zmsg->log_only);
5261 struct ip_fw *start = zmsg->start_rule;
5263 KKASSERT(start->cpuid == mycpuid);
5264 KKASSERT(start->rulenum == zmsg->rulenum);
5267 * We can have multiple rules with the same number, so we
5268 * need to clear them all.
5270 for (rule = start; rule && rule->rulenum == zmsg->rulenum;
5272 clear_counters(rule, zmsg->log_only);
5275 * Move to the position on the next CPU
5276 * before the msg is forwarded.
5278 zmsg->start_rule = start->sibling;
5280 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
5284 * Reset some or all counters on firewall rules.
5285 * @arg frwl is null to clear all entries, or contains a specific
5287 * @arg log_only is 1 if we only want to reset logs, zero otherwise.
5290 ipfw_ctl_zero_entry(int rulenum, int log_only)
5292 struct netmsg_zent zmsg;
5293 struct netmsg_base *nmsg;
5295 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5299 bzero(&zmsg, sizeof(zmsg));
5301 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY,
5302 ipfw_zero_entry_dispatch);
5303 zmsg.log_only = log_only;
5306 msg = log_only ? "ipfw: All logging counts reset.\n"
5307 : "ipfw: Accounting cleared.\n";
5312 * Locate the first rule with 'rulenum'
5314 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) {
5315 if (rule->rulenum == rulenum)
5318 if (rule == NULL) /* we did not find any matching rules */
5320 zmsg.start_rule = rule;
5321 zmsg.rulenum = rulenum;
5323 msg = log_only ? "ipfw: Entry %d logging count reset.\n"
5324 : "ipfw: Entry %d cleared.\n";
5326 netisr_domsg_global(nmsg);
5327 KKASSERT(zmsg.start_rule == NULL);
5330 log(LOG_SECURITY | LOG_NOTICE, msg, rulenum);
5335 * Check validity of the structure before insert.
5336 * Fortunately rules are simple, so this mostly need to check rule sizes.
5339 ipfw_check_ioc_rule(struct ipfw_ioc_rule *rule, int size, uint32_t *rule_flags)
5342 int have_action = 0;
5347 /* Check for valid size */
5348 if (size < sizeof(*rule)) {
5349 kprintf("ipfw: rule too short\n");
5352 l = IOC_RULESIZE(rule);
5354 kprintf("ipfw: size mismatch (have %d want %d)\n", size, l);
5358 /* Check rule number */
5359 if (rule->rulenum == IPFW_DEFAULT_RULE) {
5360 kprintf("ipfw: invalid rule number\n");
5365 * Now go for the individual checks. Very simple ones, basically only
5366 * instruction sizes.
5368 for (l = rule->cmd_len, cmd = rule->cmd; l > 0;
5369 l -= cmdlen, cmd += cmdlen) {
5370 cmdlen = F_LEN(cmd);
5372 kprintf("ipfw: opcode %d size truncated\n",
5377 DPRINTF("ipfw: opcode %d\n", cmd->opcode);
5379 if (cmd->opcode == O_KEEP_STATE || cmd->opcode == O_LIMIT ||
5380 IPFW_ISXLAT(cmd->opcode)) {
5381 /* This rule will generate states. */
5382 *rule_flags |= IPFW_RULE_F_GENSTATE;
5383 if (cmd->opcode == O_LIMIT)
5384 *rule_flags |= IPFW_RULE_F_GENTRACK;
5386 if (cmd->opcode == O_DEFRAG || IPFW_ISXLAT(cmd->opcode))
5387 *rule_flags |= IPFW_RULE_F_CROSSREF;
5388 if (cmd->opcode == O_IP_SRC_IFIP ||
5389 cmd->opcode == O_IP_DST_IFIP) {
5390 *rule_flags |= IPFW_RULE_F_DYNIFADDR;
5391 cmd->arg1 &= IPFW_IFIP_SETTINGS;
5394 switch (cmd->opcode) {
5409 case O_IPPRECEDENCE:
5416 if (cmdlen != F_INSN_SIZE(ipfw_insn))
5420 case O_IP_SRC_TABLE:
5421 case O_IP_DST_TABLE:
5422 if (cmdlen != F_INSN_SIZE(ipfw_insn))
5424 if (cmd->arg1 >= ipfw_table_max) {
5425 kprintf("ipfw: invalid table id %u, max %d\n",
5426 cmd->arg1, ipfw_table_max);
5433 if (cmdlen != F_INSN_SIZE(ipfw_insn_ifip))
5438 if (cmdlen < F_INSN_SIZE(ipfw_insn_u32))
5449 if (cmdlen != F_INSN_SIZE(ipfw_insn_u32))
5454 if (cmdlen != F_INSN_SIZE(ipfw_insn_limit))
5458 if (cmdlen != F_INSN_SIZE(ipfw_insn_rdr))
5463 if (cmdlen != F_INSN_SIZE(ipfw_insn_log))
5466 ((ipfw_insn_log *)cmd)->log_left =
5467 ((ipfw_insn_log *)cmd)->max_log;
5473 if (cmdlen != F_INSN_SIZE(ipfw_insn_ip))
5475 if (((ipfw_insn_ip *)cmd)->mask.s_addr == 0) {
5476 kprintf("ipfw: opcode %d, useless rule\n",
5484 if (cmd->arg1 == 0 || cmd->arg1 > 256) {
5485 kprintf("ipfw: invalid set size %d\n",
5489 if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) +
5495 if (cmdlen != F_INSN_SIZE(ipfw_insn_mac))
5501 case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */
5502 if (cmdlen < 2 || cmdlen > 31)
5509 if (cmdlen != F_INSN_SIZE(ipfw_insn_if))
5515 if (cmdlen != F_INSN_SIZE(ipfw_insn_pipe))
5520 if (cmdlen != F_INSN_SIZE(ipfw_insn_sa)) {
5525 fwd_addr = ((ipfw_insn_sa *)cmd)->
5527 if (IN_MULTICAST(ntohl(fwd_addr))) {
5528 kprintf("ipfw: try forwarding to "
5529 "multicast address\n");
5535 case O_FORWARD_MAC: /* XXX not implemented yet */
5545 if (cmdlen != F_INSN_SIZE(ipfw_insn))
5549 kprintf("ipfw: opcode %d, multiple actions"
5556 kprintf("ipfw: opcode %d, action must be"
5563 kprintf("ipfw: opcode %d, unknown opcode\n",
5568 if (have_action == 0) {
5569 kprintf("ipfw: missing action\n");
5575 kprintf("ipfw: opcode %d size %d wrong\n",
5576 cmd->opcode, cmdlen);
5581 ipfw_ctl_add_rule(struct sockopt *sopt)
5583 struct ipfw_ioc_rule *ioc_rule;
5585 uint32_t rule_flags;
5590 size = sopt->sopt_valsize;
5591 if (size > (sizeof(uint32_t) * IPFW_RULE_SIZE_MAX) ||
5592 size < sizeof(*ioc_rule)) {
5595 if (size != (sizeof(uint32_t) * IPFW_RULE_SIZE_MAX)) {
5596 sopt->sopt_val = krealloc(sopt->sopt_val, sizeof(uint32_t) *
5597 IPFW_RULE_SIZE_MAX, M_TEMP, M_WAITOK);
5599 ioc_rule = sopt->sopt_val;
5601 error = ipfw_check_ioc_rule(ioc_rule, size, &rule_flags);
5605 ipfw_add_rule(ioc_rule, rule_flags);
5607 if (sopt->sopt_dir == SOPT_GET)
5608 sopt->sopt_valsize = IOC_RULESIZE(ioc_rule);
5613 ipfw_copy_rule(const struct ipfw_context *ctx, const struct ip_fw *rule,
5614 struct ipfw_ioc_rule *ioc_rule)
5616 const struct ip_fw *sibling;
5622 KASSERT(rule->cpuid == 0, ("rule does not belong to cpu0"));
5624 ioc_rule->act_ofs = rule->act_ofs;
5625 ioc_rule->cmd_len = rule->cmd_len;
5626 ioc_rule->rulenum = rule->rulenum;
5627 ioc_rule->set = rule->set;
5628 ioc_rule->usr_flags = rule->usr_flags;
5630 ioc_rule->set_disable = ctx->ipfw_set_disable;
5631 ioc_rule->static_count = static_count;
5632 ioc_rule->static_len = static_ioc_len;
5635 * Visit (read-only) all of the rule's duplications to get
5636 * the necessary statistics
5643 ioc_rule->timestamp = 0;
5644 for (sibling = rule; sibling != NULL; sibling = sibling->sibling) {
5645 ioc_rule->pcnt += sibling->pcnt;
5646 ioc_rule->bcnt += sibling->bcnt;
5647 if (sibling->timestamp > ioc_rule->timestamp)
5648 ioc_rule->timestamp = sibling->timestamp;
5653 KASSERT(i == netisr_ncpus,
5654 ("static rule is not duplicated on netisr_ncpus %d", netisr_ncpus));
5656 bcopy(rule->cmd, ioc_rule->cmd, ioc_rule->cmd_len * 4 /* XXX */);
5658 return ((uint8_t *)ioc_rule + IOC_RULESIZE(ioc_rule));
5662 ipfw_track_copy(const struct ipfw_trkcnt *trk, struct ipfw_ioc_state *ioc_state)
5664 struct ipfw_ioc_flowid *ioc_id;
5666 if (trk->tc_expire == 0) {
5667 /* Not a scanned one. */
5671 ioc_state->expire = TIME_LEQ(trk->tc_expire, time_uptime) ?
5672 0 : trk->tc_expire - time_uptime;
5673 ioc_state->pcnt = 0;
5674 ioc_state->bcnt = 0;
5676 ioc_state->dyn_type = O_LIMIT_PARENT;
5677 ioc_state->count = trk->tc_count;
5679 ioc_state->rulenum = trk->tc_rulenum;
5681 ioc_id = &ioc_state->id;
5682 ioc_id->type = ETHERTYPE_IP;
5683 ioc_id->u.ip.proto = trk->tc_proto;
5684 ioc_id->u.ip.src_ip = trk->tc_saddr;
5685 ioc_id->u.ip.dst_ip = trk->tc_daddr;
5686 ioc_id->u.ip.src_port = trk->tc_sport;
5687 ioc_id->u.ip.dst_port = trk->tc_dport;
5693 ipfw_state_copy(const struct ipfw_state *s, struct ipfw_ioc_state *ioc_state)
5695 struct ipfw_ioc_flowid *ioc_id;
5697 if (IPFW_STATE_SCANSKIP(s))
5700 ioc_state->expire = TIME_LEQ(s->st_expire, time_uptime) ?
5701 0 : s->st_expire - time_uptime;
5702 ioc_state->pcnt = s->st_pcnt;
5703 ioc_state->bcnt = s->st_bcnt;
5705 ioc_state->dyn_type = s->st_type;
5706 ioc_state->count = 0;
5708 ioc_state->rulenum = s->st_rule->rulenum;
5710 ioc_id = &ioc_state->id;
5711 ioc_id->type = ETHERTYPE_IP;
5712 ioc_id->u.ip.proto = s->st_proto;
5713 ipfw_key_4tuple(&s->st_key,
5714 &ioc_id->u.ip.src_ip, &ioc_id->u.ip.src_port,
5715 &ioc_id->u.ip.dst_ip, &ioc_id->u.ip.dst_port);
5717 if (IPFW_ISXLAT(s->st_type)) {
5718 const struct ipfw_xlat *x = (const struct ipfw_xlat *)s;
5720 if (x->xlat_port == 0)
5721 ioc_state->xlat_port = ioc_id->u.ip.dst_port;
5723 ioc_state->xlat_port = ntohs(x->xlat_port);
5724 ioc_state->xlat_addr = ntohl(x->xlat_addr);
5726 ioc_state->pcnt += x->xlat_pair->xlat_pcnt;
5727 ioc_state->bcnt += x->xlat_pair->xlat_bcnt;
5734 ipfw_state_copy_dispatch(netmsg_t nmsg)
5736 struct netmsg_cpstate *nm = (struct netmsg_cpstate *)nmsg;
5737 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5738 const struct ipfw_state *s;
5739 const struct ipfw_track *t;
5741 ASSERT_NETISR_NCPUS(mycpuid);
5742 KASSERT(nm->state_cnt < nm->state_cntmax,
5743 ("invalid state count %d, max %d",
5744 nm->state_cnt, nm->state_cntmax));
5746 TAILQ_FOREACH(s, &ctx->ipfw_state_list, st_link) {
5747 if (ipfw_state_copy(s, nm->ioc_state)) {
5750 if (nm->state_cnt == nm->state_cntmax)
5756 * Prepare tracks in the global track tree for userland.
5758 TAILQ_FOREACH(t, &ctx->ipfw_track_list, t_link) {
5759 struct ipfw_trkcnt *trk;
5761 if (t->t_count == NULL) /* anchor */
5766 * Only one netisr can run this function at
5767 * any time, and only this function accesses
5768 * trkcnt's tc_expire, so this is safe w/o
5769 * ipfw_gd.ipfw_trkcnt_token.
5771 if (trk->tc_expire > t->t_expire)
5773 trk->tc_expire = t->t_expire;
5777 * Copy tracks in the global track tree to userland in
5780 if (mycpuid == netisr_ncpus - 1) {
5781 struct ipfw_trkcnt *trk;
5783 KASSERT(nm->state_cnt < nm->state_cntmax,
5784 ("invalid state count %d, max %d",
5785 nm->state_cnt, nm->state_cntmax));
5788 RB_FOREACH(trk, ipfw_trkcnt_tree, &ipfw_gd.ipfw_trkcnt_tree) {
5789 if (ipfw_track_copy(trk, nm->ioc_state)) {
5792 if (nm->state_cnt == nm->state_cntmax) {
5801 if (nm->state_cnt == nm->state_cntmax) {
5802 /* No more space; done. */
5803 netisr_replymsg(&nm->base, 0);
5805 netisr_forwardmsg(&nm->base, mycpuid + 1);
5810 ipfw_ctl_get_rules(struct sockopt *sopt)
5812 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5821 * pass up a copy of the current rules. Static rules
5822 * come first (the last of which has number IPFW_DEFAULT_RULE),
5823 * followed by a possibly empty list of states.
5826 size = static_ioc_len; /* size of static rules */
5829 * Size of the states.
5830 * XXX take tracks as state for userland compat.
5832 state_cnt = ipfw_state_cntcoll() + ipfw_gd.ipfw_trkcnt_cnt;
5833 state_cnt = (state_cnt * 5) / 4; /* leave 25% headroom */
5834 size += state_cnt * sizeof(struct ipfw_ioc_state);
5836 if (sopt->sopt_valsize < size) {
5837 /* short length, no need to return incomplete rules */
5838 /* XXX: if superuser, no need to zero buffer */
5839 bzero(sopt->sopt_val, sopt->sopt_valsize);
5842 bp = sopt->sopt_val;
5844 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next)
5845 bp = ipfw_copy_rule(ctx, rule, bp);
5848 struct netmsg_cpstate nm;
5850 size_t old_size = size;
5853 netmsg_init(&nm.base, NULL, &curthread->td_msgport,
5854 MSGF_PRIORITY, ipfw_state_copy_dispatch);
5856 nm.state_cntmax = state_cnt;
5858 netisr_domsg_global(&nm.base);
5861 * The # of states may be shrinked after the snapshot
5862 * of the state count was taken. To give user a correct
5863 * state count, nm->state_cnt is used to recalculate
5866 size = static_ioc_len +
5867 (nm.state_cnt * sizeof(struct ipfw_ioc_state));
5868 KKASSERT(size <= old_size);
5871 sopt->sopt_valsize = size;
5876 ipfw_set_disable_dispatch(netmsg_t nmsg)
5878 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5880 ASSERT_NETISR_NCPUS(mycpuid);
5882 ctx->ipfw_set_disable = nmsg->lmsg.u.ms_result32;
5883 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
5887 ipfw_ctl_set_disable(uint32_t disable, uint32_t enable)
5889 struct netmsg_base nmsg;
5890 uint32_t set_disable;
5894 /* IPFW_DEFAULT_SET is always enabled */
5895 enable |= (1 << IPFW_DEFAULT_SET);
5896 set_disable = (ipfw_ctx[mycpuid]->ipfw_set_disable | disable) & ~enable;
5898 bzero(&nmsg, sizeof(nmsg));
5899 netmsg_init(&nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY,
5900 ipfw_set_disable_dispatch);
5901 nmsg.lmsg.u.ms_result32 = set_disable;
5903 netisr_domsg_global(&nmsg);
5907 ipfw_table_create_dispatch(netmsg_t nm)
5909 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5910 int tblid = nm->lmsg.u.ms_result;
5912 ASSERT_NETISR_NCPUS(mycpuid);
5914 if (!rn_inithead((void **)&ctx->ipfw_tables[tblid],
5915 rn_cpumaskhead(mycpuid), 32))
5916 panic("ipfw: create table%d failed", tblid);
5918 netisr_forwardmsg(&nm->base, mycpuid + 1);
5922 ipfw_table_create(struct sockopt *sopt)
5924 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5925 struct ipfw_ioc_table *tbl;
5926 struct netmsg_base nm;
5930 if (sopt->sopt_valsize != sizeof(*tbl))
5933 tbl = sopt->sopt_val;
5934 if (tbl->tableid < 0 || tbl->tableid >= ipfw_table_max)
5937 if (ctx->ipfw_tables[tbl->tableid] != NULL)
5940 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY,
5941 ipfw_table_create_dispatch);
5942 nm.lmsg.u.ms_result = tbl->tableid;
5943 netisr_domsg_global(&nm);
5949 ipfw_table_killrn(struct radix_node_head *rnh, struct radix_node *rn)
5951 struct radix_node *ret;
5953 ret = rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh);
5955 panic("deleted other table entry");
5960 ipfw_table_killent(struct radix_node *rn, void *xrnh)
5963 ipfw_table_killrn(xrnh, rn);
5968 ipfw_table_flush_oncpu(struct ipfw_context *ctx, int tableid,
5971 struct radix_node_head *rnh;
5973 ASSERT_NETISR_NCPUS(mycpuid);
5975 rnh = ctx->ipfw_tables[tableid];
5976 rnh->rnh_walktree(rnh, ipfw_table_killent, rnh);
5979 ctx->ipfw_tables[tableid] = NULL;
5984 ipfw_table_flush_dispatch(netmsg_t nmsg)
5986 struct netmsg_tblflush *nm = (struct netmsg_tblflush *)nmsg;
5987 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5989 ASSERT_NETISR_NCPUS(mycpuid);
5991 ipfw_table_flush_oncpu(ctx, nm->tableid, nm->destroy);
5992 netisr_forwardmsg(&nm->base, mycpuid + 1);
5996 ipfw_table_flushall_oncpu(struct ipfw_context *ctx, int destroy)
6000 ASSERT_NETISR_NCPUS(mycpuid);
6002 for (i = 0; i < ipfw_table_max; ++i) {
6003 if (ctx->ipfw_tables[i] != NULL)
6004 ipfw_table_flush_oncpu(ctx, i, destroy);
6009 ipfw_table_flushall_dispatch(netmsg_t nmsg)
6011 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6013 ASSERT_NETISR_NCPUS(mycpuid);
6015 ipfw_table_flushall_oncpu(ctx, 0);
6016 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
6020 ipfw_table_flush(struct sockopt *sopt)
6022 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6023 struct ipfw_ioc_table *tbl;
6024 struct netmsg_tblflush nm;
6028 if (sopt->sopt_valsize != sizeof(*tbl))
6031 tbl = sopt->sopt_val;
6032 if (sopt->sopt_name == IP_FW_TBL_FLUSH && tbl->tableid < 0) {
6033 netmsg_init(&nm.base, NULL, &curthread->td_msgport,
6034 MSGF_PRIORITY, ipfw_table_flushall_dispatch);
6035 netisr_domsg_global(&nm.base);
6039 if (tbl->tableid < 0 || tbl->tableid >= ipfw_table_max)
6042 if (ctx->ipfw_tables[tbl->tableid] == NULL)
6045 netmsg_init(&nm.base, NULL, &curthread->td_msgport, MSGF_PRIORITY,
6046 ipfw_table_flush_dispatch);
6047 nm.tableid = tbl->tableid;
6049 if (sopt->sopt_name == IP_FW_TBL_DESTROY)
6051 netisr_domsg_global(&nm.base);
6057 ipfw_table_cntent(struct radix_node *rn __unused, void *xcnt)
6066 ipfw_table_cpent(struct radix_node *rn, void *xcp)
6068 struct ipfw_table_cp *cp = xcp;
6069 struct ipfw_tblent *te = (struct ipfw_tblent *)rn;
6070 struct ipfw_ioc_tblent *ioc_te;
6075 KASSERT(cp->te_idx < cp->te_cnt, ("invalid table cp idx %d, cnt %d",
6076 cp->te_idx, cp->te_cnt));
6077 ioc_te = &cp->te[cp->te_idx];
6079 if (te->te_nodes->rn_mask != NULL) {
6080 memcpy(&ioc_te->netmask, te->te_nodes->rn_mask,
6081 *te->te_nodes->rn_mask);
6083 ioc_te->netmask.sin_len = 0;
6085 memcpy(&ioc_te->key, &te->te_key, sizeof(ioc_te->key));
6087 ioc_te->use = te->te_use;
6088 ioc_te->last_used = te->te_lastuse;
6093 while ((te = te->te_sibling) != NULL) {
6097 ioc_te->use += te->te_use;
6098 if (te->te_lastuse > ioc_te->last_used)
6099 ioc_te->last_used = te->te_lastuse;
6101 KASSERT(cnt == netisr_ncpus,
6102 ("invalid # of tblent %d, should be %d", cnt, netisr_ncpus));
6110 ipfw_table_get(struct sockopt *sopt)
6112 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6113 struct radix_node_head *rnh;
6114 struct ipfw_ioc_table *tbl;
6115 struct ipfw_ioc_tblcont *cont;
6116 struct ipfw_table_cp cp;
6121 if (sopt->sopt_valsize < sizeof(*tbl))
6124 tbl = sopt->sopt_val;
6125 if (tbl->tableid < 0) {
6126 struct ipfw_ioc_tbllist *list;
6130 * List available table ids.
6132 for (i = 0; i < ipfw_table_max; ++i) {
6133 if (ctx->ipfw_tables[i] != NULL)
6137 sz = __offsetof(struct ipfw_ioc_tbllist, tables[cnt]);
6138 if (sopt->sopt_valsize < sz) {
6139 bzero(sopt->sopt_val, sopt->sopt_valsize);
6142 list = sopt->sopt_val;
6143 list->tablecnt = cnt;
6146 for (i = 0; i < ipfw_table_max; ++i) {
6147 if (ctx->ipfw_tables[i] != NULL) {
6148 KASSERT(cnt < list->tablecnt,
6149 ("invalid idx %d, cnt %d",
6150 cnt, list->tablecnt));
6151 list->tables[cnt++] = i;
6154 sopt->sopt_valsize = sz;
6156 } else if (tbl->tableid >= ipfw_table_max) {
6160 rnh = ctx->ipfw_tables[tbl->tableid];
6163 rnh->rnh_walktree(rnh, ipfw_table_cntent, &cnt);
6165 sz = __offsetof(struct ipfw_ioc_tblcont, ent[cnt]);
6166 if (sopt->sopt_valsize < sz) {
6167 bzero(sopt->sopt_val, sopt->sopt_valsize);
6170 cont = sopt->sopt_val;
6176 rnh->rnh_walktree(rnh, ipfw_table_cpent, &cp);
6178 sopt->sopt_valsize = sz;
6183 ipfw_table_add_dispatch(netmsg_t nmsg)
6185 struct netmsg_tblent *nm = (struct netmsg_tblent *)nmsg;
6186 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6187 struct radix_node_head *rnh;
6188 struct ipfw_tblent *te;
6190 ASSERT_NETISR_NCPUS(mycpuid);
6192 rnh = ctx->ipfw_tables[nm->tableid];
6194 te = kmalloc(sizeof(*te), M_IPFW, M_WAITOK | M_ZERO);
6195 te->te_nodes->rn_key = (char *)&te->te_key;
6196 memcpy(&te->te_key, nm->key, sizeof(te->te_key));
6198 if (rnh->rnh_addaddr((char *)&te->te_key, (char *)nm->netmask, rnh,
6199 te->te_nodes) == NULL) {
6202 netisr_replymsg(&nm->base, EEXIST);
6205 panic("rnh_addaddr failed");
6208 /* Link siblings. */
6209 if (nm->sibling != NULL)
6210 nm->sibling->te_sibling = te;
6213 netisr_forwardmsg(&nm->base, mycpuid + 1);
6217 ipfw_table_del_dispatch(netmsg_t nmsg)
6219 struct netmsg_tblent *nm = (struct netmsg_tblent *)nmsg;
6220 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6221 struct radix_node_head *rnh;
6222 struct radix_node *rn;
6224 ASSERT_NETISR_NCPUS(mycpuid);
6226 rnh = ctx->ipfw_tables[nm->tableid];
6227 rn = rnh->rnh_deladdr((char *)nm->key, (char *)nm->netmask, rnh);
6230 netisr_replymsg(&nm->base, ESRCH);
6233 panic("rnh_deladdr failed");
6237 netisr_forwardmsg(&nm->base, mycpuid + 1);
6241 ipfw_table_alt(struct sockopt *sopt)
6243 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6244 struct ipfw_ioc_tblcont *tbl;
6245 struct ipfw_ioc_tblent *te;
6246 struct sockaddr_in key0;
6247 struct sockaddr *netmask = NULL, *key;
6248 struct netmsg_tblent nm;
6252 if (sopt->sopt_valsize != sizeof(*tbl))
6254 tbl = sopt->sopt_val;
6256 if (tbl->tableid < 0 || tbl->tableid >= ipfw_table_max)
6258 if (tbl->entcnt != 1)
6261 if (ctx->ipfw_tables[tbl->tableid] == NULL)
6265 if (te->key.sin_family != AF_INET ||
6266 te->key.sin_port != 0 ||
6267 te->key.sin_len != sizeof(struct sockaddr_in))
6269 key = (struct sockaddr *)&te->key;
6271 if (te->netmask.sin_len != 0) {
6272 if (te->netmask.sin_port != 0 ||
6273 te->netmask.sin_len > sizeof(struct sockaddr_in))
6275 netmask = (struct sockaddr *)&te->netmask;
6276 sa_maskedcopy(key, (struct sockaddr *)&key0, netmask);
6277 key = (struct sockaddr *)&key0;
6280 if (sopt->sopt_name == IP_FW_TBL_ADD) {
6281 netmsg_init(&nm.base, NULL, &curthread->td_msgport,
6282 MSGF_PRIORITY, ipfw_table_add_dispatch);
6284 netmsg_init(&nm.base, NULL, &curthread->td_msgport,
6285 MSGF_PRIORITY, ipfw_table_del_dispatch);
6288 nm.netmask = netmask;
6289 nm.tableid = tbl->tableid;
6291 return (netisr_domsg_global(&nm.base));
6295 ipfw_table_zeroent(struct radix_node *rn, void *arg __unused)
6297 struct ipfw_tblent *te = (struct ipfw_tblent *)rn;
6305 ipfw_table_zero_dispatch(netmsg_t nmsg)
6307 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6308 struct radix_node_head *rnh;
6310 ASSERT_NETISR_NCPUS(mycpuid);
6312 rnh = ctx->ipfw_tables[nmsg->lmsg.u.ms_result];
6313 rnh->rnh_walktree(rnh, ipfw_table_zeroent, NULL);
6315 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
6319 ipfw_table_zeroall_dispatch(netmsg_t nmsg)
6321 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6324 ASSERT_NETISR_NCPUS(mycpuid);
6326 for (i = 0; i < ipfw_table_max; ++i) {
6327 struct radix_node_head *rnh = ctx->ipfw_tables[i];
6330 rnh->rnh_walktree(rnh, ipfw_table_zeroent, NULL);
6332 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
6336 ipfw_table_zero(struct sockopt *sopt)
6338 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6339 struct netmsg_base nm;
6340 struct ipfw_ioc_table *tbl;
6344 if (sopt->sopt_valsize != sizeof(*tbl))
6346 tbl = sopt->sopt_val;
6348 if (tbl->tableid < 0) {
6349 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY,
6350 ipfw_table_zeroall_dispatch);
6351 netisr_domsg_global(&nm);
6353 } else if (tbl->tableid >= ipfw_table_max) {
6355 } else if (ctx->ipfw_tables[tbl->tableid] == NULL) {
6359 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY,
6360 ipfw_table_zero_dispatch);
6361 nm.lmsg.u.ms_result = tbl->tableid;
6362 netisr_domsg_global(&nm);
6368 ipfw_table_killexp(struct radix_node *rn, void *xnm)
6370 struct netmsg_tblexp *nm = xnm;
6371 struct ipfw_tblent *te = (struct ipfw_tblent *)rn;
6373 if (te->te_expired) {
6374 ipfw_table_killrn(nm->rnh, rn);
6381 ipfw_table_expire_dispatch(netmsg_t nmsg)
6383 struct netmsg_tblexp *nm = (struct netmsg_tblexp *)nmsg;
6384 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6385 struct radix_node_head *rnh;
6387 ASSERT_NETISR_NCPUS(mycpuid);
6389 rnh = ctx->ipfw_tables[nm->tableid];
6391 rnh->rnh_walktree(rnh, ipfw_table_killexp, nm);
6393 KASSERT(nm->expcnt == nm->cnt * (mycpuid + 1),
6394 ("not all expired addresses (%d) were deleted (%d)",
6395 nm->cnt * (mycpuid + 1), nm->expcnt));
6397 netisr_forwardmsg(&nm->base, mycpuid + 1);
6401 ipfw_table_expireall_dispatch(netmsg_t nmsg)
6403 struct netmsg_tblexp *nm = (struct netmsg_tblexp *)nmsg;
6404 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6407 ASSERT_NETISR_NCPUS(mycpuid);
6409 for (i = 0; i < ipfw_table_max; ++i) {
6410 struct radix_node_head *rnh = ctx->ipfw_tables[i];
6415 rnh->rnh_walktree(rnh, ipfw_table_killexp, nm);
6418 KASSERT(nm->expcnt == nm->cnt * (mycpuid + 1),
6419 ("not all expired addresses (%d) were deleted (%d)",
6420 nm->cnt * (mycpuid + 1), nm->expcnt));
6422 netisr_forwardmsg(&nm->base, mycpuid + 1);
6426 ipfw_table_markexp(struct radix_node *rn, void *xnm)
6428 struct netmsg_tblexp *nm = xnm;
6429 struct ipfw_tblent *te;
6432 te = (struct ipfw_tblent *)rn;
6433 lastuse = te->te_lastuse;
6435 while ((te = te->te_sibling) != NULL) {
6436 if (te->te_lastuse > lastuse)
6437 lastuse = te->te_lastuse;
6439 if (!TIME_LEQ(lastuse + nm->expire, time_second)) {
6444 te = (struct ipfw_tblent *)rn;
6446 while ((te = te->te_sibling) != NULL)
6454 ipfw_table_expire(struct sockopt *sopt)
6456 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6457 struct netmsg_tblexp nm;
6458 struct ipfw_ioc_tblexp *tbl;
6459 struct radix_node_head *rnh;
6463 if (sopt->sopt_valsize != sizeof(*tbl))
6465 tbl = sopt->sopt_val;
6470 nm.expire = tbl->expire;
6472 if (tbl->tableid < 0) {
6475 for (i = 0; i < ipfw_table_max; ++i) {
6476 rnh = ctx->ipfw_tables[i];
6479 rnh->rnh_walktree(rnh, ipfw_table_markexp, &nm);
6482 /* No addresses can be expired. */
6485 tbl->expcnt = nm.cnt;
6487 netmsg_init(&nm.base, NULL, &curthread->td_msgport,
6488 MSGF_PRIORITY, ipfw_table_expireall_dispatch);
6490 netisr_domsg_global(&nm.base);
6491 KASSERT(nm.expcnt == nm.cnt * netisr_ncpus,
6492 ("not all expired addresses (%d) were deleted (%d)",
6493 nm.cnt * netisr_ncpus, nm.expcnt));
6496 } else if (tbl->tableid >= ipfw_table_max) {
6500 rnh = ctx->ipfw_tables[tbl->tableid];
6503 rnh->rnh_walktree(rnh, ipfw_table_markexp, &nm);
6505 /* No addresses can be expired. */
6508 tbl->expcnt = nm.cnt;
6510 netmsg_init(&nm.base, NULL, &curthread->td_msgport, MSGF_PRIORITY,
6511 ipfw_table_expire_dispatch);
6512 nm.tableid = tbl->tableid;
6513 netisr_domsg_global(&nm.base);
6514 KASSERT(nm.expcnt == nm.cnt * netisr_ncpus,
6515 ("not all expired addresses (%d) were deleted (%d)",
6516 nm.cnt * netisr_ncpus, nm.expcnt));
6521 ipfw_crossref_free_dispatch(netmsg_t nmsg)
6523 struct ip_fw *rule = nmsg->lmsg.u.ms_resultp;
6525 KKASSERT((rule->rule_flags &
6526 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID)) ==
6527 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID));
6528 ipfw_free_rule(rule);
6530 netisr_replymsg(&nmsg->base, 0);
6534 ipfw_crossref_reap(void)
6536 struct ip_fw *rule, *prev = NULL;
6540 rule = ipfw_gd.ipfw_crossref_free;
6541 while (rule != NULL) {
6542 uint64_t inflight = 0;
6545 for (i = 0; i < netisr_ncpus; ++i)
6546 inflight += rule->cross_rules[i]->cross_refs;
6547 if (inflight == 0) {
6548 struct ip_fw *f = rule;
6557 ipfw_gd.ipfw_crossref_free = rule;
6562 for (i = 1; i < netisr_ncpus; ++i) {
6563 struct netmsg_base nm;
6565 netmsg_init(&nm, NULL, &curthread->td_msgport,
6566 MSGF_PRIORITY, ipfw_crossref_free_dispatch);
6567 nm.lmsg.u.ms_resultp = f->cross_rules[i];
6568 netisr_domsg(&nm, i);
6570 KKASSERT((f->rule_flags &
6571 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID)) ==
6572 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID));
6580 if (ipfw_gd.ipfw_crossref_free != NULL) {
6581 callout_reset(&ipfw_gd.ipfw_crossref_ch, hz,
6582 ipfw_crossref_timeo, NULL);
6587 * {set|get}sockopt parser.
6590 ipfw_ctl(struct sockopt *sopt)
6600 switch (sopt->sopt_name) {
6602 error = ipfw_ctl_get_rules(sopt);
6606 ipfw_flush(0 /* keep default rule */);
6610 error = ipfw_ctl_add_rule(sopt);
6615 * IP_FW_DEL is used for deleting single rules or sets,
6616 * and (ab)used to atomically manipulate sets.
6617 * Argument size is used to distinguish between the two:
6619 * delete single rule or set of rules,
6620 * or reassign rules (or sets) to a different set.
6621 * 2 * sizeof(uint32_t)
6622 * atomic disable/enable sets.
6623 * first uint32_t contains sets to be disabled,
6624 * second uint32_t contains sets to be enabled.
6626 masks = sopt->sopt_val;
6627 size = sopt->sopt_valsize;
6628 if (size == sizeof(*masks)) {
6630 * Delete or reassign static rule
6632 error = ipfw_ctl_alter(masks[0]);
6633 } else if (size == (2 * sizeof(*masks))) {
6635 * Set enable/disable
6637 ipfw_ctl_set_disable(masks[0], masks[1]);
6644 case IP_FW_RESETLOG: /* argument is an int, the rule number */
6647 if (sopt->sopt_val != 0) {
6648 error = soopt_to_kbuf(sopt, &rulenum,
6649 sizeof(int), sizeof(int));
6653 error = ipfw_ctl_zero_entry(rulenum,
6654 sopt->sopt_name == IP_FW_RESETLOG);
6657 case IP_FW_TBL_CREATE:
6658 error = ipfw_table_create(sopt);
6663 error = ipfw_table_alt(sopt);
6666 case IP_FW_TBL_FLUSH:
6667 case IP_FW_TBL_DESTROY:
6668 error = ipfw_table_flush(sopt);
6672 error = ipfw_table_get(sopt);
6675 case IP_FW_TBL_ZERO:
6676 error = ipfw_table_zero(sopt);
6679 case IP_FW_TBL_EXPIRE:
6680 error = ipfw_table_expire(sopt);
6684 kprintf("ipfw_ctl invalid option %d\n", sopt->sopt_name);
6688 ipfw_crossref_reap();
6693 ipfw_keepalive_done(struct ipfw_context *ctx)
6696 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE,
6697 ("keepalive is not in progress"));
6698 ctx->ipfw_flags &= ~IPFW_FLAG_KEEPALIVE;
6699 callout_reset(&ctx->ipfw_keepalive_ch, dyn_keepalive_period * hz,
6700 ipfw_keepalive, NULL);
6704 ipfw_keepalive_more(struct ipfw_context *ctx)
6706 struct netmsg_base *nm = &ctx->ipfw_keepalive_more;
6708 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE,
6709 ("keepalive is not in progress"));
6710 KASSERT(nm->lmsg.ms_flags & MSGF_DONE,
6711 ("keepalive more did not finish"));
6712 netisr_sendmsg_oncpu(nm);
6716 ipfw_keepalive_loop(struct ipfw_context *ctx, struct ipfw_state *anchor)
6718 struct ipfw_state *s;
6719 int scanned = 0, expired = 0, kept = 0;
6721 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE,
6722 ("keepalive is not in progress"));
6724 while ((s = TAILQ_NEXT(anchor, st_link)) != NULL) {
6725 uint32_t ack_rev, ack_fwd;
6726 struct ipfw_flow_id id;
6729 if (scanned++ >= ipfw_state_scan_max) {
6730 ipfw_keepalive_more(ctx);
6734 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link);
6735 TAILQ_INSERT_AFTER(&ctx->ipfw_state_list, s, anchor, st_link);
6739 * Don't use IPFW_STATE_SCANSKIP; need to perform keepalive
6742 if (s->st_type == O_ANCHOR)
6745 if (IPFW_STATE_ISDEAD(s)) {
6746 ipfw_state_remove(ctx, s);
6747 if (++expired >= ipfw_state_expire_max) {
6748 ipfw_keepalive_more(ctx);
6755 * Keep alive processing
6758 if (s->st_proto != IPPROTO_TCP)
6760 if ((s->st_state & IPFW_STATE_TCPSTATES) != BOTH_SYN)
6762 if (TIME_LEQ(time_uptime + dyn_keepalive_interval,
6764 continue; /* too early */
6766 ipfw_key_4tuple(&s->st_key, &id.src_ip, &id.src_port,
6767 &id.dst_ip, &id.dst_port);
6768 ack_rev = s->st_ack_rev;
6769 ack_fwd = s->st_ack_fwd;
6771 #define SEND_FWD 0x1
6772 #define SEND_REV 0x2
6774 if (IPFW_ISXLAT(s->st_type)) {
6775 const struct ipfw_xlat *x = (const struct ipfw_xlat *)s;
6777 if (x->xlat_dir == MATCH_FORWARD)
6778 send_dir = SEND_FWD;
6780 send_dir = SEND_REV;
6782 send_dir = SEND_FWD | SEND_REV;
6785 if (send_dir & SEND_REV)
6786 send_pkt(&id, ack_rev - 1, ack_fwd, TH_SYN);
6787 if (send_dir & SEND_FWD)
6788 send_pkt(&id, ack_fwd - 1, ack_rev, 0);
6793 if (++kept >= ipfw_keepalive_max) {
6794 ipfw_keepalive_more(ctx);
6798 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link);
6799 ipfw_keepalive_done(ctx);
6803 ipfw_keepalive_more_dispatch(netmsg_t nm)
6805 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6806 struct ipfw_state *anchor;
6808 ASSERT_NETISR_NCPUS(mycpuid);
6809 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE,
6810 ("keepalive is not in progress"));
6813 netisr_replymsg(&nm->base, 0);
6815 anchor = &ctx->ipfw_keepalive_anch;
6816 if (!dyn_keepalive || ctx->ipfw_state_cnt == 0) {
6817 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link);
6818 ipfw_keepalive_done(ctx);
6821 ipfw_keepalive_loop(ctx, anchor);
6825 * This procedure is only used to handle keepalives. It is invoked
6826 * every dyn_keepalive_period
6829 ipfw_keepalive_dispatch(netmsg_t nm)
6831 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6832 struct ipfw_state *anchor;
6834 ASSERT_NETISR_NCPUS(mycpuid);
6835 KASSERT((ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE) == 0,
6836 ("keepalive is in progress"));
6837 ctx->ipfw_flags |= IPFW_FLAG_KEEPALIVE;
6841 netisr_replymsg(&nm->base, 0);
6844 if (!dyn_keepalive || ctx->ipfw_state_cnt == 0) {
6845 ipfw_keepalive_done(ctx);
6849 anchor = &ctx->ipfw_keepalive_anch;
6850 TAILQ_INSERT_HEAD(&ctx->ipfw_state_list, anchor, st_link);
6851 ipfw_keepalive_loop(ctx, anchor);
6855 * This procedure is only used to handle keepalives. It is invoked
6856 * every dyn_keepalive_period
6859 ipfw_keepalive(void *dummy __unused)
6861 struct netmsg_base *msg;
6863 KKASSERT(mycpuid < netisr_ncpus);
6864 msg = &ipfw_ctx[mycpuid]->ipfw_keepalive_nm;
6867 if (msg->lmsg.ms_flags & MSGF_DONE)
6868 netisr_sendmsg_oncpu(msg);
6873 ipfw_ip_input_dispatch(netmsg_t nmsg)
6875 struct netmsg_genpkt *nm = (struct netmsg_genpkt *)nmsg;
6876 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6877 struct mbuf *m = nm->m;
6878 struct ip_fw *rule = nm->arg1;
6880 ASSERT_NETISR_NCPUS(mycpuid);
6881 KASSERT(rule->cpuid == mycpuid,
6882 ("rule does not belong to cpu%d", mycpuid));
6883 KASSERT(m->m_pkthdr.fw_flags & IPFW_MBUF_CONTINUE,
6884 ("mbuf does not have ipfw continue rule"));
6886 KASSERT(ctx->ipfw_cont_rule == NULL,
6887 ("pending ipfw continue rule"));
6888 ctx->ipfw_cont_rule = rule;
6891 /* May not be cleared, if ipfw was unload/disabled. */
6892 ctx->ipfw_cont_rule = NULL;
6895 * This rule is no longer used; decrement its cross_refs,
6896 * so this rule can be deleted.
6902 ipfw_defrag_redispatch(struct mbuf *m, int cpuid, struct ip_fw *rule)
6904 struct netmsg_genpkt *nm;
6906 KASSERT(cpuid != mycpuid, ("continue on the same cpu%d", cpuid));
6910 * Bump cross_refs to prevent this rule and its siblings
6911 * from being deleted, while this mbuf is inflight. The
6912 * cross_refs of the sibling rule on the target cpu will
6913 * be decremented, once this mbuf is going to be filtered
6914 * on the target cpu.
6917 m->m_pkthdr.fw_flags |= IPFW_MBUF_CONTINUE;
6919 nm = &m->m_hdr.mh_genmsg;
6920 netmsg_init(&nm->base, NULL, &netisr_apanic_rport, 0,
6921 ipfw_ip_input_dispatch);
6923 nm->arg1 = rule->cross_rules[cpuid];
6924 netisr_sendmsg(&nm->base, cpuid);
6928 ipfw_init_args(struct ip_fw_args *args, struct mbuf *m, struct ifnet *oif)
6935 if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED) {
6938 /* Extract info from dummynet tag */
6939 mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL);
6940 KKASSERT(mtag != NULL);
6941 args->rule = ((struct dn_pkt *)m_tag_data(mtag))->dn_priv;
6942 KKASSERT(args->rule != NULL);
6944 m_tag_delete(m, mtag);
6945 m->m_pkthdr.fw_flags &= ~DUMMYNET_MBUF_TAGGED;
6946 } else if (m->m_pkthdr.fw_flags & IPFW_MBUF_CONTINUE) {
6947 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6949 KKASSERT(ctx->ipfw_cont_rule != NULL);
6950 args->rule = ctx->ipfw_cont_rule;
6951 ctx->ipfw_cont_rule = NULL;
6953 if (ctx->ipfw_cont_xlat != NULL) {
6954 args->xlat = ctx->ipfw_cont_xlat;
6955 ctx->ipfw_cont_xlat = NULL;
6956 if (m->m_pkthdr.fw_flags & IPFW_MBUF_XLATINS) {
6957 args->flags |= IP_FWARG_F_XLATINS;
6958 m->m_pkthdr.fw_flags &= ~IPFW_MBUF_XLATINS;
6960 if (m->m_pkthdr.fw_flags & IPFW_MBUF_XLATFWD) {
6961 args->flags |= IP_FWARG_F_XLATFWD;
6962 m->m_pkthdr.fw_flags &= ~IPFW_MBUF_XLATFWD;
6965 KKASSERT((m->m_pkthdr.fw_flags &
6966 (IPFW_MBUF_XLATINS | IPFW_MBUF_XLATFWD)) == 0);
6968 args->flags |= IP_FWARG_F_CONT;
6969 m->m_pkthdr.fw_flags &= ~IPFW_MBUF_CONTINUE;
6978 ipfw_check_in(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir)
6980 struct ip_fw_args args;
6981 struct mbuf *m = *m0;
6982 int tee = 0, error = 0, ret;
6984 ipfw_init_args(&args, m, NULL);
6986 ret = ipfw_chk(&args);
6989 if (ret != IP_FW_REDISPATCH)
7004 case IP_FW_DUMMYNET:
7005 /* Send packet to the appropriate pipe */
7006 m = ipfw_dummynet_io(m, args.cookie, DN_TO_IP_IN, &args);
7015 * Must clear bridge tag when changing
7017 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
7018 if (ip_divert_p != NULL) {
7019 m = ip_divert_p(m, tee, 1);
7023 /* not sure this is the right error msg */
7029 panic("unknown ipfw return value: %d", ret);
7037 ipfw_check_out(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir)
7039 struct ip_fw_args args;
7040 struct mbuf *m = *m0;
7041 int tee = 0, error = 0, ret;
7043 ipfw_init_args(&args, m, ifp);
7045 ret = ipfw_chk(&args);
7048 if (ret != IP_FW_REDISPATCH)
7063 case IP_FW_DUMMYNET:
7064 m = ipfw_dummynet_io(m, args.cookie, DN_TO_IP_OUT, &args);
7072 if (ip_divert_p != NULL) {
7073 m = ip_divert_p(m, tee, 0);
7077 /* not sure this is the right error msg */
7083 panic("unknown ipfw return value: %d", ret);
7093 struct pfil_head *pfh;
7097 pfh = pfil_head_get(PFIL_TYPE_AF, AF_INET);
7101 pfil_add_hook(ipfw_check_in, NULL, PFIL_IN, pfh);
7102 pfil_add_hook(ipfw_check_out, NULL, PFIL_OUT, pfh);
7108 struct pfil_head *pfh;
7112 pfh = pfil_head_get(PFIL_TYPE_AF, AF_INET);
7116 pfil_remove_hook(ipfw_check_in, NULL, PFIL_IN, pfh);
7117 pfil_remove_hook(ipfw_check_out, NULL, PFIL_OUT, pfh);
7121 ipfw_sysctl_dyncnt(SYSCTL_HANDLER_ARGS)
7125 dyn_cnt = ipfw_state_cntcoll();
7126 dyn_cnt += ipfw_gd.ipfw_trkcnt_cnt;
7128 return (sysctl_handle_int(oidp, &dyn_cnt, 0, req));
7132 ipfw_sysctl_statecnt(SYSCTL_HANDLER_ARGS)
7136 state_cnt = ipfw_state_cntcoll();
7137 return (sysctl_handle_int(oidp, &state_cnt, 0, req));
7141 ipfw_sysctl_statemax(SYSCTL_HANDLER_ARGS)
7143 int state_max, error;
7145 state_max = ipfw_state_max;
7146 error = sysctl_handle_int(oidp, &state_max, 0, req);
7147 if (error || req->newptr == NULL)
7153 ipfw_state_max_set(state_max);
7158 ipfw_sysctl_dynmax(SYSCTL_HANDLER_ARGS)
7162 dyn_max = ipfw_state_max + ipfw_track_max;
7164 error = sysctl_handle_int(oidp, &dyn_max, 0, req);
7165 if (error || req->newptr == NULL)
7171 ipfw_state_max_set(dyn_max / 2);
7172 ipfw_track_max = dyn_max / 2;
7177 ipfw_sysctl_enable_dispatch(netmsg_t nmsg)
7179 int enable = nmsg->lmsg.u.ms_result;
7183 if (fw_enable == enable)
7192 netisr_replymsg(&nmsg->base, 0);
7196 ipfw_sysctl_enable(SYSCTL_HANDLER_ARGS)
7198 struct netmsg_base nmsg;
7202 error = sysctl_handle_int(oidp, &enable, 0, req);
7203 if (error || req->newptr == NULL)
7206 netmsg_init(&nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY,
7207 ipfw_sysctl_enable_dispatch);
7208 nmsg.lmsg.u.ms_result = enable;
7210 return netisr_domsg(&nmsg, 0);
7214 ipfw_sysctl_autoinc_step(SYSCTL_HANDLER_ARGS)
7216 return sysctl_int_range(oidp, arg1, arg2, req,
7217 IPFW_AUTOINC_STEP_MIN, IPFW_AUTOINC_STEP_MAX);
7221 ipfw_sysctl_scancnt(SYSCTL_HANDLER_ARGS)
7224 return sysctl_int_range(oidp, arg1, arg2, req, 1, INT_MAX);
7228 ipfw_sysctl_stat(SYSCTL_HANDLER_ARGS)
7233 for (cpu = 0; cpu < netisr_ncpus; ++cpu)
7234 stat += *((u_long *)((uint8_t *)ipfw_ctx[cpu] + arg2));
7236 error = sysctl_handle_long(oidp, &stat, 0, req);
7237 if (error || req->newptr == NULL)
7240 /* Zero out this stat. */
7241 for (cpu = 0; cpu < netisr_ncpus; ++cpu)
7242 *((u_long *)((uint8_t *)ipfw_ctx[cpu] + arg2)) = 0;
7247 ipfw_ctx_init_dispatch(netmsg_t nmsg)
7249 struct netmsg_ipfw *fwmsg = (struct netmsg_ipfw *)nmsg;
7250 struct ipfw_context *ctx;
7251 struct ip_fw *def_rule;
7253 ASSERT_NETISR_NCPUS(mycpuid);
7255 ctx = kmalloc(__offsetof(struct ipfw_context,
7256 ipfw_tables[ipfw_table_max]), M_IPFW, M_WAITOK | M_ZERO);
7258 RB_INIT(&ctx->ipfw_state_tree);
7259 TAILQ_INIT(&ctx->ipfw_state_list);
7261 RB_INIT(&ctx->ipfw_track_tree);
7262 TAILQ_INIT(&ctx->ipfw_track_list);
7264 callout_init_mp(&ctx->ipfw_stateto_ch);
7265 netmsg_init(&ctx->ipfw_stateexp_nm, NULL, &netisr_adone_rport,
7266 MSGF_DROPABLE | MSGF_PRIORITY, ipfw_state_expire_dispatch);
7267 ctx->ipfw_stateexp_anch.st_type = O_ANCHOR;
7268 netmsg_init(&ctx->ipfw_stateexp_more, NULL, &netisr_adone_rport,
7269 MSGF_DROPABLE, ipfw_state_expire_more_dispatch);
7271 callout_init_mp(&ctx->ipfw_trackto_ch);
7272 netmsg_init(&ctx->ipfw_trackexp_nm, NULL, &netisr_adone_rport,
7273 MSGF_DROPABLE | MSGF_PRIORITY, ipfw_track_expire_dispatch);
7274 netmsg_init(&ctx->ipfw_trackexp_more, NULL, &netisr_adone_rport,
7275 MSGF_DROPABLE, ipfw_track_expire_more_dispatch);
7277 callout_init_mp(&ctx->ipfw_keepalive_ch);
7278 netmsg_init(&ctx->ipfw_keepalive_nm, NULL, &netisr_adone_rport,
7279 MSGF_DROPABLE | MSGF_PRIORITY, ipfw_keepalive_dispatch);
7280 ctx->ipfw_keepalive_anch.st_type = O_ANCHOR;
7281 netmsg_init(&ctx->ipfw_keepalive_more, NULL, &netisr_adone_rport,
7282 MSGF_DROPABLE, ipfw_keepalive_more_dispatch);
7284 callout_init_mp(&ctx->ipfw_xlatreap_ch);
7285 netmsg_init(&ctx->ipfw_xlatreap_nm, NULL, &netisr_adone_rport,
7286 MSGF_DROPABLE | MSGF_PRIORITY, ipfw_xlat_reap_dispatch);
7287 TAILQ_INIT(&ctx->ipfw_xlatreap);
7289 ipfw_ctx[mycpuid] = ctx;
7291 def_rule = kmalloc(sizeof(*def_rule), M_IPFW, M_WAITOK | M_ZERO);
7293 def_rule->act_ofs = 0;
7294 def_rule->rulenum = IPFW_DEFAULT_RULE;
7295 def_rule->cmd_len = 1;
7296 def_rule->set = IPFW_DEFAULT_SET;
7298 def_rule->cmd[0].len = 1;
7299 #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT
7300 def_rule->cmd[0].opcode = O_ACCEPT;
7302 if (filters_default_to_accept)
7303 def_rule->cmd[0].opcode = O_ACCEPT;
7305 def_rule->cmd[0].opcode = O_DENY;
7308 def_rule->refcnt = 1;
7309 def_rule->cpuid = mycpuid;
7311 /* Install the default rule */
7312 ctx->ipfw_default_rule = def_rule;
7313 ctx->ipfw_layer3_chain = def_rule;
7315 /* Link rule CPU sibling */
7316 ipfw_link_sibling(fwmsg, def_rule);
7318 /* Statistics only need to be updated once */
7320 ipfw_inc_static_count(def_rule);
7322 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
7326 ipfw_crossref_reap_dispatch(netmsg_t nmsg)
7331 netisr_replymsg(&nmsg->base, 0);
7333 ipfw_crossref_reap();
7337 ipfw_crossref_timeo(void *dummy __unused)
7339 struct netmsg_base *msg = &ipfw_gd.ipfw_crossref_nm;
7341 KKASSERT(mycpuid == 0);
7344 if (msg->lmsg.ms_flags & MSGF_DONE)
7345 netisr_sendmsg_oncpu(msg);
7350 ipfw_ifaddr_dispatch(netmsg_t nmsg)
7352 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
7353 struct ifnet *ifp = nmsg->lmsg.u.ms_resultp;
7356 ASSERT_NETISR_NCPUS(mycpuid);
7358 for (f = ctx->ipfw_layer3_chain; f != NULL; f = f->next) {
7362 if ((f->rule_flags & IPFW_RULE_F_DYNIFADDR) == 0)
7365 for (l = f->cmd_len, cmd = f->cmd; l > 0;
7366 l -= cmdlen, cmd += cmdlen) {
7367 cmdlen = F_LEN(cmd);
7368 if (cmd->opcode == O_IP_SRC_IFIP ||
7369 cmd->opcode == O_IP_DST_IFIP) {
7370 if (strncmp(ifp->if_xname,
7371 ((ipfw_insn_ifip *)cmd)->ifname,
7373 cmd->arg1 &= ~IPFW_IFIP_VALID;
7377 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
7381 ipfw_ifaddr(void *arg __unused, struct ifnet *ifp,
7382 enum ifaddr_event event __unused, struct ifaddr *ifa __unused)
7384 struct netmsg_base nm;
7386 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY,
7387 ipfw_ifaddr_dispatch);
7388 nm.lmsg.u.ms_resultp = ifp;
7389 netisr_domsg_global(&nm);
7393 ipfw_init_dispatch(netmsg_t nmsg)
7395 struct netmsg_ipfw fwmsg;
7401 kprintf("IP firewall already loaded\n");
7406 if (ipfw_table_max > UINT16_MAX || ipfw_table_max <= 0)
7407 ipfw_table_max = UINT16_MAX;
7409 /* Initialize global track tree. */
7410 RB_INIT(&ipfw_gd.ipfw_trkcnt_tree);
7411 IPFW_TRKCNT_TOKINIT;
7413 /* GC for freed crossref rules. */
7414 callout_init_mp(&ipfw_gd.ipfw_crossref_ch);
7415 netmsg_init(&ipfw_gd.ipfw_crossref_nm, NULL, &netisr_adone_rport,
7416 MSGF_PRIORITY | MSGF_DROPABLE, ipfw_crossref_reap_dispatch);
7418 ipfw_state_max_set(ipfw_state_max);
7419 ipfw_state_headroom = 8 * netisr_ncpus;
7421 bzero(&fwmsg, sizeof(fwmsg));
7422 netmsg_init(&fwmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY,
7423 ipfw_ctx_init_dispatch);
7424 netisr_domsg_global(&fwmsg.base);
7426 ip_fw_chk_ptr = ipfw_chk;
7427 ip_fw_ctl_ptr = ipfw_ctl;
7428 ip_fw_dn_io_ptr = ipfw_dummynet_io;
7430 kprintf("ipfw2 initialized, default to %s, logging ",
7431 ipfw_ctx[mycpuid]->ipfw_default_rule->cmd[0].opcode ==
7432 O_ACCEPT ? "accept" : "deny");
7434 #ifdef IPFIREWALL_VERBOSE
7437 #ifdef IPFIREWALL_VERBOSE_LIMIT
7438 verbose_limit = IPFIREWALL_VERBOSE_LIMIT;
7440 if (fw_verbose == 0) {
7441 kprintf("disabled\n");
7442 } else if (verbose_limit == 0) {
7443 kprintf("unlimited\n");
7445 kprintf("limited to %d packets/entry by default\n",
7450 for (cpu = 0; cpu < netisr_ncpus; ++cpu) {
7451 callout_reset_bycpu(&ipfw_ctx[cpu]->ipfw_stateto_ch, hz,
7452 ipfw_state_expire_ipifunc, NULL, cpu);
7453 callout_reset_bycpu(&ipfw_ctx[cpu]->ipfw_trackto_ch, hz,
7454 ipfw_track_expire_ipifunc, NULL, cpu);
7455 callout_reset_bycpu(&ipfw_ctx[cpu]->ipfw_keepalive_ch, hz,
7456 ipfw_keepalive, NULL, cpu);
7462 ipfw_ifaddr_event = EVENTHANDLER_REGISTER(ifaddr_event, ipfw_ifaddr,
7463 NULL, EVENTHANDLER_PRI_ANY);
7464 if (ipfw_ifaddr_event == NULL)
7465 kprintf("ipfw: ifaddr_event register failed\n");
7468 netisr_replymsg(&nmsg->base, error);
7474 struct netmsg_base smsg;
7476 netmsg_init(&smsg, NULL, &curthread->td_msgport, MSGF_PRIORITY,
7477 ipfw_init_dispatch);
7478 return netisr_domsg(&smsg, 0);
7484 ipfw_ctx_fini_dispatch(netmsg_t nmsg)
7486 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
7488 ASSERT_NETISR_NCPUS(mycpuid);
7490 callout_stop_sync(&ctx->ipfw_stateto_ch);
7491 callout_stop_sync(&ctx->ipfw_trackto_ch);
7492 callout_stop_sync(&ctx->ipfw_keepalive_ch);
7493 callout_stop_sync(&ctx->ipfw_xlatreap_ch);
7496 netisr_dropmsg(&ctx->ipfw_stateexp_more);
7497 netisr_dropmsg(&ctx->ipfw_stateexp_nm);
7498 netisr_dropmsg(&ctx->ipfw_trackexp_more);
7499 netisr_dropmsg(&ctx->ipfw_trackexp_nm);
7500 netisr_dropmsg(&ctx->ipfw_keepalive_more);
7501 netisr_dropmsg(&ctx->ipfw_keepalive_nm);
7502 netisr_dropmsg(&ctx->ipfw_xlatreap_nm);
7505 ipfw_table_flushall_oncpu(ctx, 1);
7507 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
7511 ipfw_fini_dispatch(netmsg_t nmsg)
7513 struct netmsg_base nm;
7518 ipfw_crossref_reap();
7520 if (ipfw_gd.ipfw_refcnt != 0) {
7528 /* Synchronize any inflight state/track expire IPIs. */
7529 lwkt_synchronize_ipiqs("ipfwfini");
7531 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY,
7532 ipfw_ctx_fini_dispatch);
7533 netisr_domsg_global(&nm);
7535 callout_stop_sync(&ipfw_gd.ipfw_crossref_ch);
7537 netisr_dropmsg(&ipfw_gd.ipfw_crossref_nm);
7540 if (ipfw_ifaddr_event != NULL)
7541 EVENTHANDLER_DEREGISTER(ifaddr_event, ipfw_ifaddr_event);
7543 ip_fw_chk_ptr = NULL;
7544 ip_fw_ctl_ptr = NULL;
7545 ip_fw_dn_io_ptr = NULL;
7546 ipfw_flush(1 /* kill default rule */);
7548 /* Free pre-cpu context */
7549 for (cpu = 0; cpu < netisr_ncpus; ++cpu)
7550 kfree(ipfw_ctx[cpu], M_IPFW);
7552 kprintf("IP firewall unloaded\n");
7554 netisr_replymsg(&nmsg->base, error);
7558 ipfw_fflush_dispatch(netmsg_t nmsg)
7561 ipfw_flush(0 /* keep default rule */);
7562 ipfw_crossref_reap();
7563 netisr_replymsg(&nmsg->base, 0);
7569 struct netmsg_base smsg;
7573 netmsg_init(&smsg, NULL, &curthread->td_msgport, MSGF_PRIORITY,
7574 ipfw_fflush_dispatch);
7575 netisr_domsg(&smsg, 0);
7577 if (ipfw_gd.ipfw_refcnt == 0)
7579 kprintf("ipfw: flush pending %d\n", ++i);
7580 tsleep(&smsg, 0, "ipfwff", (3 * hz) / 2);
7583 netmsg_init(&smsg, NULL, &curthread->td_msgport, MSGF_PRIORITY,
7584 ipfw_fini_dispatch);
7585 return netisr_domsg(&smsg, 0);
7588 #endif /* KLD_MODULE */
7591 ipfw_modevent(module_t mod, int type, void *unused)
7602 kprintf("ipfw statically compiled, cannot unload\n");
7614 static moduledata_t ipfwmod = {
7619 DECLARE_MODULE(ipfw, ipfwmod, SI_SUB_PROTO_END, SI_ORDER_ANY);
7620 MODULE_VERSION(ipfw, 1);