2 * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * $FreeBSD: src/sys/netinet/ip_fw2.c,v 1.6.2.12 2003/04/08 10:42:32 maxim Exp $
29 * Implement IP packet firewall (new version)
35 #error IPFIREWALL requires INET.
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/malloc.h>
42 #include <sys/kernel.h>
44 #include <sys/socket.h>
45 #include <sys/socketvar.h>
46 #include <sys/sysctl.h>
47 #include <sys/syslog.h>
48 #include <sys/ucred.h>
49 #include <sys/in_cksum.h>
50 #include <sys/limits.h>
55 #include <net/route.h>
57 #include <net/dummynet/ip_dummynet.h>
59 #include <sys/thread2.h>
60 #include <sys/mplock2.h>
61 #include <net/netmsg2.h>
63 #include <netinet/in.h>
64 #include <netinet/in_systm.h>
65 #include <netinet/in_var.h>
66 #include <netinet/in_pcb.h>
67 #include <netinet/ip.h>
68 #include <netinet/ip_var.h>
69 #include <netinet/ip_icmp.h>
70 #include <netinet/tcp.h>
71 #include <netinet/tcp_seq.h>
72 #include <netinet/tcp_timer.h>
73 #include <netinet/tcp_var.h>
74 #include <netinet/tcpip.h>
75 #include <netinet/udp.h>
76 #include <netinet/udp_var.h>
77 #include <netinet/ip_divert.h>
78 #include <netinet/if_ether.h> /* XXX for ETHERTYPE_IP */
80 #include <net/ipfw/ip_fw2.h>
82 #ifdef IPFIREWALL_DEBUG
83 #define DPRINTF(fmt, ...) \
86 kprintf(fmt, __VA_ARGS__); \
89 #define DPRINTF(fmt, ...) ((void)0)
93 * Description about per-CPU rule duplication:
95 * Module loading/unloading and all ioctl operations are serialized
96 * by netisr0, so we don't have any ordering or locking problems.
98 * Following graph shows how operation on per-CPU rule list is
99 * performed [2 CPU case]:
103 * netisr0 <------------------------------------+
109 * forwardmsg---------->netisr1 |
114 * replymsg--------------+
118 * Rule structure [2 CPU case]
122 * layer3_chain layer3_chain
125 * +-------+ sibling +-------+ sibling
126 * | rule1 |--------->| rule1 |--------->NULL
127 * +-------+ +-------+
131 * +-------+ sibling +-------+ sibling
132 * | rule2 |--------->| rule2 |--------->NULL
133 * +-------+ +-------+
136 * 1) Ease statistics calculation during IP_FW_GET. We only need to
137 * iterate layer3_chain in netisr0; the current rule's duplication
138 * to the other CPUs could safely be read-only accessed through
140 * 2) Accelerate rule insertion and deletion, e.g. rule insertion:
141 * a) In netisr0 rule3 is determined to be inserted between rule1
142 * and rule2. To make this decision we need to iterate the
143 * layer3_chain in netisr0. The netmsg, which is used to insert
144 * the rule, will contain rule1 in netisr0 as prev_rule and rule2
145 * in netisr0 as next_rule.
146 * b) After the insertion in netisr0 is done, we will move on to
147 * netisr1. But instead of relocating the rule3's position in
148 * netisr1 by iterating the layer3_chain in netisr1, we set the
149 * netmsg's prev_rule to rule1->sibling and next_rule to
150 * rule2->sibling before the netmsg is forwarded to netisr1 from
155 * Description of states and tracks.
157 * Both states and tracks are stored in per-cpu RB trees instead of
158 * per-cpu hash tables to avoid the worst case hash degeneration.
160 * The lifetimes of states and tracks are regulated by dyn_*_lifetime,
161 * measured in seconds and depending on the flags.
163 * When a packet is received, its address fields are first masked with
164 * the mask defined for the rule, then matched against the entries in
165 * the per-cpu state RB tree. States are generated by 'keep-state'
166 * and 'limit' options.
168 * The max number of states is ipfw_state_max. When we reach the
169 * maximum number of states we do not create anymore. This is done to
170 * avoid consuming too much memory, but also too much time when
171 * searching on each packet.
173 * Each state holds a pointer to the parent ipfw rule of the current
174 * CPU so we know what action to perform. States are removed when the
175 * parent rule is deleted. XXX we should make them survive.
177 * There are some limitations with states -- we do not obey the
178 * 'randomized match', and we do not do multiple passes through the
179 * firewall. XXX check the latter!!!
181 * States grow independently on each CPU, e.g. 2 CPU case:
184 * ................... ...................
185 * : state RB tree : : state RB tree :
187 * : state1 state2 : : state3 :
189 * :.....|....|......: :........|........:
194 * +-------+ +-------+
195 * | rule1 | | rule1 |
196 * +-------+ +-------+
198 * Tracks are used to enforce limits on the number of sessions. Tracks
199 * are generated by 'limit' option.
201 * The max number of tracks is ipfw_track_max. When we reach the
202 * maximum number of tracks we do not create anymore. This is done to
203 * avoid consuming too much memory.
205 * Tracks are organized into two layers, track counter RB tree is
206 * shared between CPUs, track RB tree is per-cpu. States generated by
207 * 'limit' option are linked to the track in addition to the per-cpu
208 * state RB tree; mainly to ease expiration. e.g. 2 CPU case:
210 * ..............................
211 * : track counter RB tree :
216 * : +--->counter<----+ :
218 * : | +-----------+ | :
219 * :......|................|....:
222 * ................. |t_count | .................
223 * : track RB tree : | | : track RB tree :
225 * : +-->track1-------+ +--------track2 :
228 * :.|.....|.......: :...............:
229 * | +----------------+
230 * | .................... |
231 * | : state RB tree : |st_track
233 * +---state1 state2---+
235 * :.....|.......|....:
244 #define IPFW_AUTOINC_STEP_MIN 1
245 #define IPFW_AUTOINC_STEP_MAX 1000
246 #define IPFW_AUTOINC_STEP_DEF 100
248 #define IPFW_TABLE_MAX_DEF 64
250 #define IPFW_DEFAULT_RULE 65535 /* rulenum for the default rule */
251 #define IPFW_DEFAULT_SET 31 /* set number for the default rule */
253 #define MATCH_REVERSE 0
254 #define MATCH_FORWARD 1
256 #define MATCH_UNKNOWN 3
258 #define IPFW_STATE_TCPFLAGS (TH_SYN | TH_FIN | TH_RST)
259 #define IPFW_STATE_TCPSTATES (IPFW_STATE_TCPFLAGS | \
260 (IPFW_STATE_TCPFLAGS << 8))
262 #define BOTH_SYN (TH_SYN | (TH_SYN << 8))
263 #define BOTH_FIN (TH_FIN | (TH_FIN << 8))
264 #define BOTH_RST (TH_RST | (TH_RST << 8))
265 /* TH_ACK here means FIN was ACKed. */
266 #define BOTH_FINACK (TH_ACK | (TH_ACK << 8))
268 #define IPFW_STATE_TCPCLOSED(s) ((s)->st_proto == IPPROTO_TCP && \
269 (((s)->st_state & BOTH_RST) || \
270 ((s)->st_state & BOTH_FINACK) == BOTH_FINACK))
272 #define O_ANCHOR O_NOP
275 struct netmsg_base base;
276 const struct ipfw_ioc_rule *ioc_rule;
277 struct ip_fw *next_rule;
278 struct ip_fw *prev_rule;
279 struct ip_fw *sibling;
281 struct ip_fw **cross_rules;
285 struct netmsg_base base;
286 struct ip_fw *start_rule;
287 struct ip_fw *prev_rule;
294 struct netmsg_base base;
295 struct ip_fw *start_rule;
300 struct netmsg_cpstate {
301 struct netmsg_base base;
302 struct ipfw_ioc_state *ioc_state;
307 struct netmsg_tblent {
308 struct netmsg_base base;
309 struct sockaddr *key;
310 struct sockaddr *netmask;
311 struct ipfw_tblent *sibling;
315 struct netmsg_tblflush {
316 struct netmsg_base base;
321 struct netmsg_tblexp {
322 struct netmsg_base base;
327 struct radix_node_head *rnh;
330 struct ipfw_table_cp {
331 struct ipfw_ioc_tblent *te;
348 struct ipfw_addrs addrs;
352 struct ipfw_ports ports;
356 uint8_t swap; /* IPFW_KEY_SWAP_ */
360 #define IPFW_KEY_SWAP_ADDRS 0x1
361 #define IPFW_KEY_SWAP_PORTS 0x2
362 #define IPFW_KEY_SWAP_ALL (IPFW_KEY_SWAP_ADDRS | IPFW_KEY_SWAP_PORTS)
365 RB_ENTRY(ipfw_trkcnt) tc_rblink;
366 struct ipfw_key tc_key;
370 time_t tc_expire; /* userland get-only */
371 uint16_t tc_rulenum; /* userland get-only */
374 #define tc_addrs tc_key.addr_u.value
375 #define tc_ports tc_key.port_u.value
376 #define tc_proto tc_key.proto
377 #define tc_saddr tc_key.addr_u.addrs.addr1
378 #define tc_daddr tc_key.addr_u.addrs.addr2
379 #define tc_sport tc_key.port_u.ports.port1
380 #define tc_dport tc_key.port_u.ports.port2
382 RB_HEAD(ipfw_trkcnt_tree, ipfw_trkcnt);
387 RB_ENTRY(ipfw_track) t_rblink;
388 struct ipfw_key t_key;
389 struct ip_fw *t_rule;
391 LIST_HEAD(, ipfw_state) t_state_list;
393 volatile int *t_count;
394 struct ipfw_trkcnt *t_trkcnt;
395 TAILQ_ENTRY(ipfw_track) t_link;
398 #define t_addrs t_key.addr_u.value
399 #define t_ports t_key.port_u.value
400 #define t_proto t_key.proto
401 #define t_saddr t_key.addr_u.addrs.addr1
402 #define t_daddr t_key.addr_u.addrs.addr2
403 #define t_sport t_key.port_u.ports.port1
404 #define t_dport t_key.port_u.ports.port2
406 RB_HEAD(ipfw_track_tree, ipfw_track);
407 TAILQ_HEAD(ipfw_track_list, ipfw_track);
410 RB_ENTRY(ipfw_state) st_rblink;
411 struct ipfw_key st_key;
413 time_t st_expire; /* expire time */
414 struct ip_fw *st_rule;
416 uint64_t st_pcnt; /* packets */
417 uint64_t st_bcnt; /* bytes */
421 * State of this rule, typically a combination of TCP flags.
423 * st_ack_fwd/st_ack_rev:
424 * Most recent ACKs in forward and reverse direction. They
425 * are used to generate keepalives.
433 uint16_t st_flags; /* IPFW_STATE_F_ */
434 uint16_t st_type; /* O_KEEP_STATE/O_LIMIT */
435 struct ipfw_track *st_track;
437 LIST_ENTRY(ipfw_state) st_trklink;
438 TAILQ_ENTRY(ipfw_state) st_link;
441 #define st_addrs st_key.addr_u.value
442 #define st_ports st_key.port_u.value
443 #define st_proto st_key.proto
444 #define st_swap st_key.swap
446 #define IPFW_STATE_F_ACKFWD 0x0001
447 #define IPFW_STATE_F_SEQFWD 0x0002
448 #define IPFW_STATE_F_ACKREV 0x0004
449 #define IPFW_STATE_F_SEQREV 0x0008
451 TAILQ_HEAD(ipfw_state_list, ipfw_state);
452 RB_HEAD(ipfw_state_tree, ipfw_state);
455 struct radix_node te_nodes[2];
456 struct sockaddr_in te_key;
459 struct ipfw_tblent *te_sibling;
460 volatile int te_expired;
463 struct ipfw_context {
464 struct ip_fw *ipfw_layer3_chain; /* rules for layer3 */
465 struct ip_fw *ipfw_default_rule; /* default rule */
466 uint64_t ipfw_norule_counter; /* ipfw_log(NULL) stat*/
469 * ipfw_set_disable contains one bit per set value (0..31).
470 * If the bit is set, all rules with the corresponding set
471 * are disabled. Set IPDW_DEFAULT_SET is reserved for the
472 * default rule and CANNOT be disabled.
474 uint32_t ipfw_set_disable;
476 uint8_t ipfw_flags; /* IPFW_FLAG_ */
478 struct ip_fw *ipfw_cont_rule;
480 struct ipfw_state_tree ipfw_state_tree;
481 struct ipfw_state_list ipfw_state_list;
482 int ipfw_state_loosecnt;
486 struct ipfw_state state;
487 struct ipfw_track track;
488 struct ipfw_trkcnt trkcnt;
491 struct ipfw_track_tree ipfw_track_tree;
492 struct ipfw_track_list ipfw_track_list;
493 struct ipfw_trkcnt *ipfw_trkcnt_spare;
495 struct callout ipfw_stateto_ch;
496 time_t ipfw_state_lastexp;
497 struct netmsg_base ipfw_stateexp_nm;
498 struct netmsg_base ipfw_stateexp_more;
499 struct ipfw_state ipfw_stateexp_anch;
501 struct callout ipfw_trackto_ch;
502 time_t ipfw_track_lastexp;
503 struct netmsg_base ipfw_trackexp_nm;
504 struct netmsg_base ipfw_trackexp_more;
505 struct ipfw_track ipfw_trackexp_anch;
507 struct callout ipfw_keepalive_ch;
508 struct netmsg_base ipfw_keepalive_nm;
509 struct netmsg_base ipfw_keepalive_more;
510 struct ipfw_state ipfw_keepalive_anch;
515 u_long ipfw_sts_reap;
516 u_long ipfw_sts_reapfailed;
517 u_long ipfw_sts_overflow;
518 u_long ipfw_sts_nomem;
519 u_long ipfw_sts_tcprecycled;
521 u_long ipfw_tks_nomem;
522 u_long ipfw_tks_reap;
523 u_long ipfw_tks_reapfailed;
524 u_long ipfw_tks_overflow;
525 u_long ipfw_tks_cntnomem;
528 u_long ipfw_defraged;
529 u_long ipfw_defrag_remote;
532 struct radix_node_head *ipfw_tables[];
535 #define IPFW_FLAG_KEEPALIVE 0x01
536 #define IPFW_FLAG_STATEEXP 0x02
537 #define IPFW_FLAG_TRACKEXP 0x04
538 #define IPFW_FLAG_STATEREAP 0x08
539 #define IPFW_FLAG_TRACKREAP 0x10
541 #define ipfw_state_tmpkey ipfw_tmpkey.state
542 #define ipfw_track_tmpkey ipfw_tmpkey.track
543 #define ipfw_trkcnt_tmpkey ipfw_tmpkey.trkcnt
546 int ipfw_state_loosecnt; /* cache aligned */
547 time_t ipfw_state_globexp __cachealign;
549 struct lwkt_token ipfw_trkcnt_token __cachealign;
550 struct ipfw_trkcnt_tree ipfw_trkcnt_tree;
552 time_t ipfw_track_globexp;
554 /* Accessed in netisr0. */
555 struct ip_fw *ipfw_crossref_free __cachealign;
556 struct callout ipfw_crossref_ch;
557 struct netmsg_base ipfw_crossref_nm;
561 * Module can not be unloaded, if there are references to
562 * certains rules of ipfw(4), e.g. dummynet(4)
564 int ipfw_refcnt __cachealign;
568 static struct ipfw_context *ipfw_ctx[MAXCPU];
570 MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's");
573 * Following two global variables are accessed and updated only
576 static uint32_t static_count; /* # of static rules */
577 static uint32_t static_ioc_len; /* bytes of static rules */
580 * If 1, then ipfw static rules are being flushed,
581 * ipfw_chk() will skip to the default rule.
583 static int ipfw_flushing;
585 static int fw_verbose;
586 static int verbose_limit;
589 static int autoinc_step = IPFW_AUTOINC_STEP_DEF;
591 static int ipfw_table_max = IPFW_TABLE_MAX_DEF;
593 static int ipfw_sysctl_enable(SYSCTL_HANDLER_ARGS);
594 static int ipfw_sysctl_autoinc_step(SYSCTL_HANDLER_ARGS);
596 TUNABLE_INT("net.inet.ip.fw.table_max", &ipfw_table_max);
598 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
599 SYSCTL_NODE(_net_inet_ip_fw, OID_AUTO, stats, CTLFLAG_RW, 0,
600 "Firewall statistics");
602 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW,
603 &fw_enable, 0, ipfw_sysctl_enable, "I", "Enable ipfw");
604 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, autoinc_step, CTLTYPE_INT | CTLFLAG_RW,
605 &autoinc_step, 0, ipfw_sysctl_autoinc_step, "I",
606 "Rule number autincrement step");
607 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO,one_pass,CTLFLAG_RW,
609 "Only do a single pass through ipfw when using dummynet(4)");
610 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, debug, CTLFLAG_RW,
611 &fw_debug, 0, "Enable printing of debug ip_fw statements");
612 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose, CTLFLAG_RW,
613 &fw_verbose, 0, "Log matches to ipfw rules");
614 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_RW,
615 &verbose_limit, 0, "Set upper limit of matches of ipfw rules logged");
616 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, table_max, CTLFLAG_RD,
617 &ipfw_table_max, 0, "Max # of tables");
619 static int ipfw_sysctl_dyncnt(SYSCTL_HANDLER_ARGS);
620 static int ipfw_sysctl_dynmax(SYSCTL_HANDLER_ARGS);
621 static int ipfw_sysctl_statecnt(SYSCTL_HANDLER_ARGS);
622 static int ipfw_sysctl_statemax(SYSCTL_HANDLER_ARGS);
623 static int ipfw_sysctl_scancnt(SYSCTL_HANDLER_ARGS);
624 static int ipfw_sysctl_stat(SYSCTL_HANDLER_ARGS);
627 * Timeouts for various events in handing states.
631 * 2 == 1~2 second(s).
633 * We use 2 seconds for FIN lifetime, so that the states will not be
634 * ripped prematurely.
636 static uint32_t dyn_ack_lifetime = 300;
637 static uint32_t dyn_syn_lifetime = 20;
638 static uint32_t dyn_finwait_lifetime = 20;
639 static uint32_t dyn_fin_lifetime = 2;
640 static uint32_t dyn_rst_lifetime = 2;
641 static uint32_t dyn_udp_lifetime = 10;
642 static uint32_t dyn_short_lifetime = 5; /* used by tracks too */
645 * Keepalives are sent if dyn_keepalive is set. They are sent every
646 * dyn_keepalive_period seconds, in the last dyn_keepalive_interval
647 * seconds of lifetime of a rule.
649 static uint32_t dyn_keepalive_interval = 20;
650 static uint32_t dyn_keepalive_period = 5;
651 static uint32_t dyn_keepalive = 1; /* do send keepalives */
653 static struct ipfw_global ipfw_gd;
654 static int ipfw_state_loosecnt_updthr;
655 static int ipfw_state_max = 4096; /* max # of states */
656 static int ipfw_track_max = 4096; /* max # of tracks */
658 static int ipfw_state_headroom; /* setup at module load time */
659 static int ipfw_state_reap_min = 8;
660 static int ipfw_state_expire_max = 32;
661 static int ipfw_state_scan_max = 256;
662 static int ipfw_keepalive_max = 8;
663 static int ipfw_track_reap_max = 4;
664 static int ipfw_track_expire_max = 16;
665 static int ipfw_track_scan_max = 128;
668 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_count,
669 CTLTYPE_INT | CTLFLAG_RD, NULL, 0, ipfw_sysctl_dyncnt, "I",
670 "Number of states and tracks");
671 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_max,
672 CTLTYPE_INT | CTLFLAG_RW, NULL, 0, ipfw_sysctl_dynmax, "I",
673 "Max number of states and tracks");
675 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_cnt,
676 CTLTYPE_INT | CTLFLAG_RD, NULL, 0, ipfw_sysctl_statecnt, "I",
678 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_max,
679 CTLTYPE_INT | CTLFLAG_RW, NULL, 0, ipfw_sysctl_statemax, "I",
680 "Max number of states");
681 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, state_headroom, CTLFLAG_RW,
682 &ipfw_state_headroom, 0, "headroom for state reap");
683 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, track_cnt, CTLFLAG_RD,
684 &ipfw_gd.ipfw_trkcnt_cnt, 0, "Number of tracks");
685 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, track_max, CTLFLAG_RW,
686 &ipfw_track_max, 0, "Max number of tracks");
687 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count, CTLFLAG_RD,
688 &static_count, 0, "Number of static rules");
689 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, CTLFLAG_RW,
690 &dyn_ack_lifetime, 0, "Lifetime of dyn. rules for acks");
691 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, CTLFLAG_RW,
692 &dyn_syn_lifetime, 0, "Lifetime of dyn. rules for syn");
693 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, CTLFLAG_RW,
694 &dyn_fin_lifetime, 0, "Lifetime of dyn. rules for fin");
695 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_finwait_lifetime, CTLFLAG_RW,
696 &dyn_finwait_lifetime, 0, "Lifetime of dyn. rules for fin wait");
697 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, CTLFLAG_RW,
698 &dyn_rst_lifetime, 0, "Lifetime of dyn. rules for rst");
699 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, CTLFLAG_RW,
700 &dyn_udp_lifetime, 0, "Lifetime of dyn. rules for UDP");
701 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, CTLFLAG_RW,
702 &dyn_short_lifetime, 0, "Lifetime of dyn. rules for other situations");
703 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, CTLFLAG_RW,
704 &dyn_keepalive, 0, "Enable keepalives for dyn. rules");
705 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_scan_max,
706 CTLTYPE_INT | CTLFLAG_RW, &ipfw_state_scan_max, 0, ipfw_sysctl_scancnt,
707 "I", "# of states to scan for each expire iteration");
708 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_expire_max,
709 CTLTYPE_INT | CTLFLAG_RW, &ipfw_state_expire_max, 0, ipfw_sysctl_scancnt,
710 "I", "# of states to expire for each expire iteration");
711 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, keepalive_max,
712 CTLTYPE_INT | CTLFLAG_RW, &ipfw_keepalive_max, 0, ipfw_sysctl_scancnt,
713 "I", "# of states to expire for each expire iteration");
714 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_reap_min,
715 CTLTYPE_INT | CTLFLAG_RW, &ipfw_state_reap_min, 0, ipfw_sysctl_scancnt,
716 "I", "# of states to reap for state shortage");
717 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, track_scan_max,
718 CTLTYPE_INT | CTLFLAG_RW, &ipfw_track_scan_max, 0, ipfw_sysctl_scancnt,
719 "I", "# of tracks to scan for each expire iteration");
720 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, track_expire_max,
721 CTLTYPE_INT | CTLFLAG_RW, &ipfw_track_expire_max, 0, ipfw_sysctl_scancnt,
722 "I", "# of tracks to expire for each expire iteration");
723 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, track_reap_max,
724 CTLTYPE_INT | CTLFLAG_RW, &ipfw_track_reap_max, 0, ipfw_sysctl_scancnt,
725 "I", "# of tracks to reap for track shortage");
727 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_reap,
728 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
729 __offsetof(struct ipfw_context, ipfw_sts_reap), ipfw_sysctl_stat,
730 "LU", "# of state reaps due to states shortage");
731 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_reapfailed,
732 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
733 __offsetof(struct ipfw_context, ipfw_sts_reapfailed), ipfw_sysctl_stat,
734 "LU", "# of state reap failure");
735 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_overflow,
736 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
737 __offsetof(struct ipfw_context, ipfw_sts_overflow), ipfw_sysctl_stat,
738 "LU", "# of state overflow");
739 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_nomem,
740 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
741 __offsetof(struct ipfw_context, ipfw_sts_nomem), ipfw_sysctl_stat,
742 "LU", "# of state allocation failure");
743 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_tcprecycled,
744 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
745 __offsetof(struct ipfw_context, ipfw_sts_tcprecycled), ipfw_sysctl_stat,
746 "LU", "# of state deleted due to fast TCP port recycling");
748 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_nomem,
749 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
750 __offsetof(struct ipfw_context, ipfw_tks_nomem), ipfw_sysctl_stat,
751 "LU", "# of track allocation failure");
752 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_reap,
753 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
754 __offsetof(struct ipfw_context, ipfw_tks_reap), ipfw_sysctl_stat,
755 "LU", "# of track reap due to tracks shortage");
756 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_reapfailed,
757 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
758 __offsetof(struct ipfw_context, ipfw_tks_reapfailed), ipfw_sysctl_stat,
759 "LU", "# of track reap failure");
760 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_overflow,
761 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
762 __offsetof(struct ipfw_context, ipfw_tks_overflow), ipfw_sysctl_stat,
763 "LU", "# of track overflow");
764 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_cntnomem,
765 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
766 __offsetof(struct ipfw_context, ipfw_tks_cntnomem), ipfw_sysctl_stat,
767 "LU", "# of track counter allocation failure");
768 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, frags,
769 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
770 __offsetof(struct ipfw_context, ipfw_frags), ipfw_sysctl_stat,
771 "LU", "# of IP fragements defraged");
772 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, defraged,
773 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
774 __offsetof(struct ipfw_context, ipfw_defraged), ipfw_sysctl_stat,
775 "LU", "# of IP packets after defrag");
776 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, defrag_remote,
777 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
778 __offsetof(struct ipfw_context, ipfw_defrag_remote), ipfw_sysctl_stat,
779 "LU", "# of IP packets after defrag dispatched to remote cpus");
781 static int ipfw_state_cmp(struct ipfw_state *,
782 struct ipfw_state *);
783 static int ipfw_trkcnt_cmp(struct ipfw_trkcnt *,
784 struct ipfw_trkcnt *);
785 static int ipfw_track_cmp(struct ipfw_track *,
786 struct ipfw_track *);
788 RB_PROTOTYPE(ipfw_state_tree, ipfw_state, st_rblink, ipfw_state_cmp);
789 RB_GENERATE(ipfw_state_tree, ipfw_state, st_rblink, ipfw_state_cmp);
791 RB_PROTOTYPE(ipfw_trkcnt_tree, ipfw_trkcnt, tc_rblink, ipfw_trkcnt_cmp);
792 RB_GENERATE(ipfw_trkcnt_tree, ipfw_trkcnt, tc_rblink, ipfw_trkcnt_cmp);
794 RB_PROTOTYPE(ipfw_track_tree, ipfw_track, t_rblink, ipfw_track_cmp);
795 RB_GENERATE(ipfw_track_tree, ipfw_track, t_rblink, ipfw_track_cmp);
797 static ip_fw_chk_t ipfw_chk;
798 static void ipfw_track_expire_ipifunc(void *);
799 static void ipfw_state_expire_ipifunc(void *);
800 static void ipfw_keepalive(void *);
801 static int ipfw_state_expire_start(struct ipfw_context *,
803 static void ipfw_crossref_timeo(void *);
805 #define IPFW_TRKCNT_TOKGET lwkt_gettoken(&ipfw_gd.ipfw_trkcnt_token)
806 #define IPFW_TRKCNT_TOKREL lwkt_reltoken(&ipfw_gd.ipfw_trkcnt_token)
807 #define IPFW_TRKCNT_TOKINIT \
808 lwkt_token_init(&ipfw_gd.ipfw_trkcnt_token, "ipfw_trkcnt");
811 sa_maskedcopy(const struct sockaddr *src, struct sockaddr *dst,
812 const struct sockaddr *netmask)
814 const u_char *cp1 = (const u_char *)src;
815 u_char *cp2 = (u_char *)dst;
816 const u_char *cp3 = (const u_char *)netmask;
817 u_char *cplim = cp2 + *cp3;
818 u_char *cplim2 = cp2 + *cp1;
820 *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
825 *cp2++ = *cp1++ & *cp3++;
827 bzero(cp2, cplim2 - cp2);
831 ipfw_key_build(struct ipfw_key *key, in_addr_t saddr, uint16_t sport,
832 in_addr_t daddr, uint16_t dport, uint8_t proto)
839 key->addr_u.addrs.addr1 = daddr;
840 key->addr_u.addrs.addr2 = saddr;
841 key->swap |= IPFW_KEY_SWAP_ADDRS;
843 key->addr_u.addrs.addr1 = saddr;
844 key->addr_u.addrs.addr2 = daddr;
848 key->port_u.ports.port1 = dport;
849 key->port_u.ports.port2 = sport;
850 key->swap |= IPFW_KEY_SWAP_PORTS;
852 key->port_u.ports.port1 = sport;
853 key->port_u.ports.port2 = dport;
856 if (sport == dport && (key->swap & IPFW_KEY_SWAP_ADDRS))
857 key->swap |= IPFW_KEY_SWAP_PORTS;
858 if (saddr == daddr && (key->swap & IPFW_KEY_SWAP_PORTS))
859 key->swap |= IPFW_KEY_SWAP_ADDRS;
863 ipfw_key_4tuple(const struct ipfw_key *key, in_addr_t *saddr, uint16_t *sport,
864 in_addr_t *daddr, uint16_t *dport)
867 if (key->swap & IPFW_KEY_SWAP_ADDRS) {
868 *saddr = key->addr_u.addrs.addr2;
869 *daddr = key->addr_u.addrs.addr1;
871 *saddr = key->addr_u.addrs.addr1;
872 *daddr = key->addr_u.addrs.addr2;
875 if (key->swap & IPFW_KEY_SWAP_PORTS) {
876 *sport = key->port_u.ports.port2;
877 *dport = key->port_u.ports.port1;
879 *sport = key->port_u.ports.port1;
880 *dport = key->port_u.ports.port2;
885 ipfw_state_cmp(struct ipfw_state *s1, struct ipfw_state *s2)
888 if (s1->st_proto > s2->st_proto)
890 if (s1->st_proto < s2->st_proto)
893 if (s1->st_addrs > s2->st_addrs)
895 if (s1->st_addrs < s2->st_addrs)
898 if (s1->st_ports > s2->st_ports)
900 if (s1->st_ports < s2->st_ports)
903 if (s1->st_swap == s2->st_swap ||
904 (s1->st_swap ^ s2->st_swap) == IPFW_KEY_SWAP_ALL)
907 if (s1->st_swap > s2->st_swap)
914 ipfw_trkcnt_cmp(struct ipfw_trkcnt *t1, struct ipfw_trkcnt *t2)
917 if (t1->tc_proto > t2->tc_proto)
919 if (t1->tc_proto < t2->tc_proto)
922 if (t1->tc_addrs > t2->tc_addrs)
924 if (t1->tc_addrs < t2->tc_addrs)
927 if (t1->tc_ports > t2->tc_ports)
929 if (t1->tc_ports < t2->tc_ports)
932 if (t1->tc_ruleid > t2->tc_ruleid)
934 if (t1->tc_ruleid < t2->tc_ruleid)
941 ipfw_track_cmp(struct ipfw_track *t1, struct ipfw_track *t2)
944 if (t1->t_proto > t2->t_proto)
946 if (t1->t_proto < t2->t_proto)
949 if (t1->t_addrs > t2->t_addrs)
951 if (t1->t_addrs < t2->t_addrs)
954 if (t1->t_ports > t2->t_ports)
956 if (t1->t_ports < t2->t_ports)
959 if ((uintptr_t)t1->t_rule > (uintptr_t)t2->t_rule)
961 if ((uintptr_t)t1->t_rule < (uintptr_t)t2->t_rule)
968 ipfw_state_max_set(int state_max)
971 ipfw_state_max = state_max;
972 /* Allow 5% states over-allocation. */
973 ipfw_state_loosecnt_updthr = (state_max / 20) / netisr_ncpus;
977 ipfw_state_cntcoll(void)
979 int cpu, state_cnt = 0;
981 for (cpu = 0; cpu < netisr_ncpus; ++cpu)
982 state_cnt += ipfw_ctx[cpu]->ipfw_state_cnt;
987 ipfw_state_cntsync(void)
991 state_cnt = ipfw_state_cntcoll();
992 ipfw_gd.ipfw_state_loosecnt = state_cnt;
997 ipfw_free_rule(struct ip_fw *rule)
999 KASSERT(rule->cpuid == mycpuid, ("rule freed on cpu%d", mycpuid));
1000 KASSERT(rule->refcnt > 0, ("invalid refcnt %u", rule->refcnt));
1002 if (rule->refcnt == 0) {
1003 if (rule->cross_rules != NULL)
1004 kfree(rule->cross_rules, M_IPFW);
1005 kfree(rule, M_IPFW);
1012 ipfw_unref_rule(void *priv)
1014 ipfw_free_rule(priv);
1016 KASSERT(ipfw_gd.ipfw_refcnt > 0,
1017 ("invalid ipfw_refcnt %d", ipfw_gd.ipfw_refcnt));
1018 atomic_subtract_int(&ipfw_gd.ipfw_refcnt, 1);
1022 static __inline void
1023 ipfw_ref_rule(struct ip_fw *rule)
1025 KASSERT(rule->cpuid == mycpuid, ("rule used on cpu%d", mycpuid));
1027 atomic_add_int(&ipfw_gd.ipfw_refcnt, 1);
1033 * This macro maps an ip pointer into a layer3 header pointer of type T
1035 #define L3HDR(T, ip) ((T *)((uint32_t *)(ip) + (ip)->ip_hl))
1038 icmptype_match(struct ip *ip, ipfw_insn_u32 *cmd)
1040 int type = L3HDR(struct icmp,ip)->icmp_type;
1042 return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1 << type)));
1045 #define TT ((1 << ICMP_ECHO) | \
1046 (1 << ICMP_ROUTERSOLICIT) | \
1047 (1 << ICMP_TSTAMP) | \
1048 (1 << ICMP_IREQ) | \
1049 (1 << ICMP_MASKREQ))
1052 is_icmp_query(struct ip *ip)
1054 int type = L3HDR(struct icmp, ip)->icmp_type;
1056 return (type <= ICMP_MAXTYPE && (TT & (1 << type)));
1062 * The following checks use two arrays of 8 or 16 bits to store the
1063 * bits that we want set or clear, respectively. They are in the
1064 * low and high half of cmd->arg1 or cmd->d[0].
1066 * We scan options and store the bits we find set. We succeed if
1068 * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear
1070 * The code is sometimes optimized not to store additional variables.
1073 flags_match(ipfw_insn *cmd, uint8_t bits)
1078 if (((cmd->arg1 & 0xff) & bits) != 0)
1079 return 0; /* some bits we want set were clear */
1081 want_clear = (cmd->arg1 >> 8) & 0xff;
1082 if ((want_clear & bits) != want_clear)
1083 return 0; /* some bits we want clear were set */
1088 ipopts_match(struct ip *ip, ipfw_insn *cmd)
1090 int optlen, bits = 0;
1091 u_char *cp = (u_char *)(ip + 1);
1092 int x = (ip->ip_hl << 2) - sizeof(struct ip);
1094 for (; x > 0; x -= optlen, cp += optlen) {
1095 int opt = cp[IPOPT_OPTVAL];
1097 if (opt == IPOPT_EOL)
1100 if (opt == IPOPT_NOP) {
1103 optlen = cp[IPOPT_OLEN];
1104 if (optlen <= 0 || optlen > x)
1105 return 0; /* invalid or truncated */
1110 bits |= IP_FW_IPOPT_LSRR;
1114 bits |= IP_FW_IPOPT_SSRR;
1118 bits |= IP_FW_IPOPT_RR;
1122 bits |= IP_FW_IPOPT_TS;
1129 return (flags_match(cmd, bits));
1133 tcpopts_match(struct ip *ip, ipfw_insn *cmd)
1135 int optlen, bits = 0;
1136 struct tcphdr *tcp = L3HDR(struct tcphdr,ip);
1137 u_char *cp = (u_char *)(tcp + 1);
1138 int x = (tcp->th_off << 2) - sizeof(struct tcphdr);
1140 for (; x > 0; x -= optlen, cp += optlen) {
1143 if (opt == TCPOPT_EOL)
1146 if (opt == TCPOPT_NOP) {
1156 bits |= IP_FW_TCPOPT_MSS;
1160 bits |= IP_FW_TCPOPT_WINDOW;
1163 case TCPOPT_SACK_PERMITTED:
1165 bits |= IP_FW_TCPOPT_SACK;
1168 case TCPOPT_TIMESTAMP:
1169 bits |= IP_FW_TCPOPT_TS;
1175 bits |= IP_FW_TCPOPT_CC;
1182 return (flags_match(cmd, bits));
1186 iface_match(struct ifnet *ifp, ipfw_insn_if *cmd)
1188 if (ifp == NULL) /* no iface with this packet, match fails */
1191 /* Check by name or by IP address */
1192 if (cmd->name[0] != '\0') { /* match by name */
1195 if (kfnmatch(cmd->name, ifp->if_xname, 0) == 0)
1198 if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0)
1202 struct ifaddr_container *ifac;
1204 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1205 struct ifaddr *ia = ifac->ifa;
1207 if (ia->ifa_addr == NULL)
1209 if (ia->ifa_addr->sa_family != AF_INET)
1211 if (cmd->p.ip.s_addr == ((struct sockaddr_in *)
1212 (ia->ifa_addr))->sin_addr.s_addr)
1213 return(1); /* match */
1216 return(0); /* no match, fail ... */
1219 #define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0
1222 * We enter here when we have a rule with O_LOG.
1223 * XXX this function alone takes about 2Kbytes of code!
1226 ipfw_log(struct ipfw_context *ctx, struct ip_fw *f, u_int hlen,
1227 struct ether_header *eh, struct mbuf *m, struct ifnet *oif)
1230 int limit_reached = 0;
1231 char action2[40], proto[48], fragment[28], abuf[INET_ADDRSTRLEN];
1236 if (f == NULL) { /* bogus pkt */
1237 if (verbose_limit != 0 &&
1238 ctx->ipfw_norule_counter >= verbose_limit)
1240 ctx->ipfw_norule_counter++;
1241 if (ctx->ipfw_norule_counter == verbose_limit)
1242 limit_reached = verbose_limit;
1244 } else { /* O_LOG is the first action, find the real one */
1245 ipfw_insn *cmd = ACTION_PTR(f);
1246 ipfw_insn_log *l = (ipfw_insn_log *)cmd;
1248 if (l->max_log != 0 && l->log_left == 0)
1251 if (l->log_left == 0)
1252 limit_reached = l->max_log;
1253 cmd += F_LEN(cmd); /* point to first action */
1254 if (cmd->opcode == O_PROB)
1258 switch (cmd->opcode) {
1264 if (cmd->arg1==ICMP_REJECT_RST) {
1266 } else if (cmd->arg1==ICMP_UNREACH_HOST) {
1269 ksnprintf(SNPARGS(action2, 0), "Unreach %d",
1283 ksnprintf(SNPARGS(action2, 0), "Divert %d", cmd->arg1);
1287 ksnprintf(SNPARGS(action2, 0), "Tee %d", cmd->arg1);
1291 ksnprintf(SNPARGS(action2, 0), "SkipTo %d", cmd->arg1);
1295 ksnprintf(SNPARGS(action2, 0), "Pipe %d", cmd->arg1);
1299 ksnprintf(SNPARGS(action2, 0), "Queue %d", cmd->arg1);
1304 ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd;
1307 len = ksnprintf(SNPARGS(action2, 0),
1309 kinet_ntoa(sa->sa.sin_addr, abuf));
1310 if (sa->sa.sin_port) {
1311 ksnprintf(SNPARGS(action2, len), ":%d",
1323 if (hlen == 0) { /* non-ip */
1324 ksnprintf(SNPARGS(proto, 0), "MAC");
1326 struct ip *ip = mtod(m, struct ip *);
1327 /* these three are all aliases to the same thing */
1328 struct icmp *const icmp = L3HDR(struct icmp, ip);
1329 struct tcphdr *const tcp = (struct tcphdr *)icmp;
1330 struct udphdr *const udp = (struct udphdr *)icmp;
1332 int ip_off, offset, ip_len;
1335 if (eh != NULL) { /* layer 2 packets are as on the wire */
1336 ip_off = ntohs(ip->ip_off);
1337 ip_len = ntohs(ip->ip_len);
1339 ip_off = ip->ip_off;
1340 ip_len = ip->ip_len;
1342 offset = ip_off & IP_OFFMASK;
1345 len = ksnprintf(SNPARGS(proto, 0), "TCP %s",
1346 kinet_ntoa(ip->ip_src, abuf));
1348 ksnprintf(SNPARGS(proto, len), ":%d %s:%d",
1349 ntohs(tcp->th_sport),
1350 kinet_ntoa(ip->ip_dst, abuf),
1351 ntohs(tcp->th_dport));
1353 ksnprintf(SNPARGS(proto, len), " %s",
1354 kinet_ntoa(ip->ip_dst, abuf));
1359 len = ksnprintf(SNPARGS(proto, 0), "UDP %s",
1360 kinet_ntoa(ip->ip_src, abuf));
1362 ksnprintf(SNPARGS(proto, len), ":%d %s:%d",
1363 ntohs(udp->uh_sport),
1364 kinet_ntoa(ip->ip_dst, abuf),
1365 ntohs(udp->uh_dport));
1367 ksnprintf(SNPARGS(proto, len), " %s",
1368 kinet_ntoa(ip->ip_dst, abuf));
1374 len = ksnprintf(SNPARGS(proto, 0),
1379 len = ksnprintf(SNPARGS(proto, 0), "ICMP ");
1381 len += ksnprintf(SNPARGS(proto, len), "%s",
1382 kinet_ntoa(ip->ip_src, abuf));
1383 ksnprintf(SNPARGS(proto, len), " %s",
1384 kinet_ntoa(ip->ip_dst, abuf));
1388 len = ksnprintf(SNPARGS(proto, 0), "P:%d %s", ip->ip_p,
1389 kinet_ntoa(ip->ip_src, abuf));
1390 ksnprintf(SNPARGS(proto, len), " %s",
1391 kinet_ntoa(ip->ip_dst, abuf));
1395 if (ip_off & (IP_MF | IP_OFFMASK)) {
1396 ksnprintf(SNPARGS(fragment, 0), " (frag %d:%d@%d%s)",
1397 ntohs(ip->ip_id), ip_len - (ip->ip_hl << 2),
1398 offset << 3, (ip_off & IP_MF) ? "+" : "");
1402 if (oif || m->m_pkthdr.rcvif) {
1403 log(LOG_SECURITY | LOG_INFO,
1404 "ipfw: %d %s %s %s via %s%s\n",
1405 f ? f->rulenum : -1,
1406 action, proto, oif ? "out" : "in",
1407 oif ? oif->if_xname : m->m_pkthdr.rcvif->if_xname,
1410 log(LOG_SECURITY | LOG_INFO,
1411 "ipfw: %d %s %s [no if info]%s\n",
1412 f ? f->rulenum : -1,
1413 action, proto, fragment);
1416 if (limit_reached) {
1417 log(LOG_SECURITY | LOG_NOTICE,
1418 "ipfw: limit %d reached on entry %d\n",
1419 limit_reached, f ? f->rulenum : -1);
1425 #define TIME_LEQ(a, b) ((a) - (b) <= 0)
1428 ipfw_state_del(struct ipfw_context *ctx, struct ipfw_state *s)
1431 KASSERT(s->st_type == O_KEEP_STATE || s->st_type == O_LIMIT,
1432 ("invalid state type %u", s->st_type));
1433 KASSERT(ctx->ipfw_state_cnt > 0,
1434 ("invalid state count %d", ctx->ipfw_state_cnt));
1436 if (s->st_track != NULL) {
1437 struct ipfw_track *t = s->st_track;
1439 KASSERT(!LIST_EMPTY(&t->t_state_list),
1440 ("track state list is empty"));
1441 LIST_REMOVE(s, st_trklink);
1443 KASSERT(*t->t_count > 0,
1444 ("invalid track count %d", *t->t_count));
1445 atomic_subtract_int(t->t_count, 1);
1448 TAILQ_REMOVE(&ctx->ipfw_state_list, s, st_link);
1449 RB_REMOVE(ipfw_state_tree, &ctx->ipfw_state_tree, s);
1452 ctx->ipfw_state_cnt--;
1453 if (ctx->ipfw_state_loosecnt > 0)
1454 ctx->ipfw_state_loosecnt--;
1458 ipfw_state_reap(struct ipfw_context *ctx, int reap_max)
1460 struct ipfw_state *s, *anchor;
1463 if (reap_max < ipfw_state_reap_min)
1464 reap_max = ipfw_state_reap_min;
1466 if ((ctx->ipfw_flags & IPFW_FLAG_STATEEXP) == 0) {
1468 * Kick start state expiring. Ignore scan limit,
1469 * we are short of states.
1471 ctx->ipfw_flags |= IPFW_FLAG_STATEREAP;
1472 expired = ipfw_state_expire_start(ctx, INT_MAX, reap_max);
1473 ctx->ipfw_flags &= ~IPFW_FLAG_STATEREAP;
1478 * States are being expired.
1481 if (ctx->ipfw_state_cnt == 0)
1485 anchor = &ctx->ipfw_stateexp_anch;
1486 while ((s = TAILQ_NEXT(anchor, st_link)) != NULL) {
1488 * Ignore scan limit; we are short of states.
1491 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link);
1492 TAILQ_INSERT_AFTER(&ctx->ipfw_state_list, s, anchor, st_link);
1494 if (s->st_type == O_ANCHOR)
1497 if (IPFW_STATE_TCPCLOSED(s) ||
1498 TIME_LEQ(s->st_expire, time_uptime)) {
1499 ipfw_state_del(ctx, s);
1500 if (++expired >= reap_max)
1502 if ((expired & 0xff) == 0 &&
1503 ipfw_state_cntcoll() + ipfw_state_headroom <=
1510 * Leave the anchor on the list, even if the end of the list has
1511 * been reached. ipfw_state_expire_more_dispatch() will handle
1518 ipfw_state_flush(struct ipfw_context *ctx, const struct ip_fw *rule)
1520 struct ipfw_state *s, *sn;
1522 TAILQ_FOREACH_MUTABLE(s, &ctx->ipfw_state_list, st_link, sn) {
1523 if (s->st_type == O_ANCHOR)
1525 if (rule != NULL && s->st_rule != rule)
1527 ipfw_state_del(ctx, s);
1532 ipfw_state_expire_done(struct ipfw_context *ctx)
1535 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP,
1536 ("stateexp is not in progress"));
1537 ctx->ipfw_flags &= ~IPFW_FLAG_STATEEXP;
1538 callout_reset(&ctx->ipfw_stateto_ch, hz,
1539 ipfw_state_expire_ipifunc, NULL);
1543 ipfw_state_expire_more(struct ipfw_context *ctx)
1545 struct netmsg_base *nm = &ctx->ipfw_stateexp_more;
1547 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP,
1548 ("stateexp is not in progress"));
1549 KASSERT(nm->lmsg.ms_flags & MSGF_DONE,
1550 ("stateexp more did not finish"));
1551 netisr_sendmsg_oncpu(nm);
1555 ipfw_state_expire_loop(struct ipfw_context *ctx, struct ipfw_state *anchor,
1556 int scan_max, int expire_max)
1558 struct ipfw_state *s;
1559 int scanned = 0, expired = 0;
1561 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP,
1562 ("stateexp is not in progress"));
1564 while ((s = TAILQ_NEXT(anchor, st_link)) != NULL) {
1565 if (scanned++ >= scan_max) {
1566 ipfw_state_expire_more(ctx);
1570 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link);
1571 TAILQ_INSERT_AFTER(&ctx->ipfw_state_list, s, anchor, st_link);
1573 if (s->st_type == O_ANCHOR)
1576 if (TIME_LEQ(s->st_expire, time_uptime) ||
1577 ((ctx->ipfw_flags & IPFW_FLAG_STATEREAP) &&
1578 IPFW_STATE_TCPCLOSED(s))) {
1579 ipfw_state_del(ctx, s);
1580 if (++expired >= expire_max) {
1581 ipfw_state_expire_more(ctx);
1584 if ((ctx->ipfw_flags & IPFW_FLAG_STATEREAP) &&
1585 (expired & 0xff) == 0 &&
1586 ipfw_state_cntcoll() + ipfw_state_headroom <=
1588 ipfw_state_expire_more(ctx);
1593 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link);
1594 ipfw_state_expire_done(ctx);
1599 ipfw_state_expire_more_dispatch(netmsg_t nm)
1601 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
1602 struct ipfw_state *anchor;
1604 ASSERT_NETISR_NCPUS(mycpuid);
1605 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP,
1606 ("statexp is not in progress"));
1609 netisr_replymsg(&nm->base, 0);
1611 anchor = &ctx->ipfw_stateexp_anch;
1612 if (ctx->ipfw_state_cnt == 0) {
1613 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link);
1614 ipfw_state_expire_done(ctx);
1617 ipfw_state_expire_loop(ctx, anchor,
1618 ipfw_state_scan_max, ipfw_state_expire_max);
1622 ipfw_state_expire_start(struct ipfw_context *ctx, int scan_max, int expire_max)
1624 struct ipfw_state *anchor;
1626 KASSERT((ctx->ipfw_flags & IPFW_FLAG_STATEEXP) == 0,
1627 ("stateexp is in progress"));
1628 ctx->ipfw_flags |= IPFW_FLAG_STATEEXP;
1630 if (ctx->ipfw_state_cnt == 0) {
1631 ipfw_state_expire_done(ctx);
1636 * Do not expire more than once per second, it is useless.
1638 if ((ctx->ipfw_flags & IPFW_FLAG_STATEREAP) == 0 &&
1639 ctx->ipfw_state_lastexp == time_uptime) {
1640 ipfw_state_expire_done(ctx);
1643 ctx->ipfw_state_lastexp = time_uptime;
1645 anchor = &ctx->ipfw_stateexp_anch;
1646 TAILQ_INSERT_HEAD(&ctx->ipfw_state_list, anchor, st_link);
1647 return (ipfw_state_expire_loop(ctx, anchor, scan_max, expire_max));
1651 ipfw_state_expire_dispatch(netmsg_t nm)
1653 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
1655 ASSERT_NETISR_NCPUS(mycpuid);
1659 netisr_replymsg(&nm->base, 0);
1662 if (ctx->ipfw_flags & IPFW_FLAG_STATEEXP) {
1663 /* Running; done. */
1666 ipfw_state_expire_start(ctx,
1667 ipfw_state_scan_max, ipfw_state_expire_max);
1671 ipfw_state_expire_ipifunc(void *dummy __unused)
1673 struct netmsg_base *msg;
1675 KKASSERT(mycpuid < netisr_ncpus);
1676 msg = &ipfw_ctx[mycpuid]->ipfw_stateexp_nm;
1679 if (msg->lmsg.ms_flags & MSGF_DONE)
1680 netisr_sendmsg_oncpu(msg);
1685 ipfw_state_update_tcp(struct ipfw_state *s, int dir, const struct tcphdr *tcp)
1687 uint32_t seq = ntohl(tcp->th_seq);
1688 uint32_t ack = ntohl(tcp->th_ack);
1690 if (tcp->th_flags & TH_RST)
1693 if (dir == MATCH_FORWARD) {
1694 if ((s->st_flags & IPFW_STATE_F_SEQFWD) == 0) {
1695 s->st_flags |= IPFW_STATE_F_SEQFWD;
1696 s->st_seq_fwd = seq;
1697 } else if (SEQ_GEQ(seq, s->st_seq_fwd)) {
1698 s->st_seq_fwd = seq;
1700 /* Out-of-sequence; done. */
1703 if (tcp->th_flags & TH_ACK) {
1704 if ((s->st_flags & IPFW_STATE_F_ACKFWD) == 0) {
1705 s->st_flags |= IPFW_STATE_F_ACKFWD;
1706 s->st_ack_fwd = ack;
1707 } else if (SEQ_GEQ(ack, s->st_ack_fwd)) {
1708 s->st_ack_fwd = ack;
1710 /* Out-of-sequence; done. */
1714 if ((s->st_state & ((TH_FIN | TH_ACK) << 8)) ==
1715 (TH_FIN << 8) && s->st_ack_fwd == s->st_seq_rev + 1)
1716 s->st_state |= (TH_ACK << 8);
1719 if ((s->st_flags & IPFW_STATE_F_SEQREV) == 0) {
1720 s->st_flags |= IPFW_STATE_F_SEQREV;
1721 s->st_seq_rev = seq;
1722 } else if (SEQ_GEQ(seq, s->st_seq_rev)) {
1723 s->st_seq_rev = seq;
1725 /* Out-of-sequence; done. */
1728 if (tcp->th_flags & TH_ACK) {
1729 if ((s->st_flags & IPFW_STATE_F_ACKREV) == 0) {
1730 s->st_flags |= IPFW_STATE_F_ACKREV;
1732 } else if (SEQ_GEQ(ack, s->st_ack_rev)) {
1733 s->st_ack_rev = ack;
1735 /* Out-of-sequence; done. */
1739 if ((s->st_state & (TH_FIN | TH_ACK)) == TH_FIN &&
1740 s->st_ack_rev == s->st_seq_fwd + 1)
1741 s->st_state |= TH_ACK;
1748 ipfw_state_update(const struct ipfw_flow_id *pkt, int dir,
1749 const struct tcphdr *tcp, struct ipfw_state *s)
1752 if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */
1753 u_char flags = pkt->flags & IPFW_STATE_TCPFLAGS;
1755 if (tcp != NULL && !ipfw_state_update_tcp(s, dir, tcp))
1758 s->st_state |= (dir == MATCH_FORWARD) ? flags : (flags << 8);
1759 switch (s->st_state & IPFW_STATE_TCPSTATES) {
1760 case TH_SYN: /* opening */
1761 s->st_expire = time_uptime + dyn_syn_lifetime;
1764 case BOTH_SYN: /* move to established */
1765 case BOTH_SYN | TH_FIN: /* one side tries to close */
1766 case BOTH_SYN | (TH_FIN << 8):
1767 s->st_expire = time_uptime + dyn_ack_lifetime;
1770 case BOTH_SYN | BOTH_FIN: /* both sides closed */
1771 if ((s->st_state & BOTH_FINACK) == BOTH_FINACK) {
1772 /* And both FINs were ACKed. */
1773 s->st_expire = time_uptime + dyn_fin_lifetime;
1775 s->st_expire = time_uptime +
1776 dyn_finwait_lifetime;
1783 * reset or some invalid combination, but can also
1784 * occur if we use keep-state the wrong way.
1786 if ((s->st_state & ((TH_RST << 8) | TH_RST)) == 0)
1787 kprintf("invalid state: 0x%x\n", s->st_state);
1789 s->st_expire = time_uptime + dyn_rst_lifetime;
1792 } else if (pkt->proto == IPPROTO_UDP) {
1793 s->st_expire = time_uptime + dyn_udp_lifetime;
1795 /* other protocols */
1796 s->st_expire = time_uptime + dyn_short_lifetime;
1803 static struct ipfw_state *
1804 ipfw_state_lookup(struct ipfw_context *ctx, const struct ipfw_flow_id *pkt,
1805 int *match_direction, const struct tcphdr *tcp)
1807 struct ipfw_state *key, *s;
1808 int dir = MATCH_NONE;
1810 key = &ctx->ipfw_state_tmpkey;
1811 ipfw_key_build(&key->st_key, pkt->src_ip, pkt->src_port,
1812 pkt->dst_ip, pkt->dst_port, pkt->proto);
1813 s = RB_FIND(ipfw_state_tree, &ctx->ipfw_state_tree, key);
1815 goto done; /* not found. */
1816 if (TIME_LEQ(s->st_expire, time_uptime)) {
1818 ipfw_state_del(ctx, s);
1822 if ((pkt->flags & TH_SYN) && IPFW_STATE_TCPCLOSED(s)) {
1823 /* TCP ports recycling is too fast. */
1824 ctx->ipfw_sts_tcprecycled++;
1825 ipfw_state_del(ctx, s);
1830 if (s->st_swap == key->st_swap) {
1831 dir = MATCH_FORWARD;
1833 KASSERT((s->st_swap & key->st_swap) == 0,
1834 ("found mismatch state"));
1835 dir = MATCH_REVERSE;
1838 /* Update this state. */
1839 ipfw_state_update(pkt, dir, tcp, s);
1841 if (s->st_track != NULL) {
1842 /* This track has been used. */
1843 s->st_track->t_expire = time_uptime + dyn_short_lifetime;
1846 if (match_direction)
1847 *match_direction = dir;
1851 static __inline struct ip_fw *
1852 ipfw_state_lookup_rule(struct ipfw_context *ctx, const struct ipfw_flow_id *pkt,
1853 int *match_direction, const struct tcphdr *tcp, uint16_t len)
1855 struct ipfw_state *s;
1857 s = ipfw_state_lookup(ctx, pkt, match_direction, tcp);
1861 KASSERT(s->st_rule->cpuid == mycpuid,
1862 ("rule %p (cpu%d) does not belong to the current cpu%d",
1863 s->st_rule, s->st_rule->cpuid, mycpuid));
1868 return (s->st_rule);
1871 static struct ipfw_state *
1872 ipfw_state_add(struct ipfw_context *ctx, const struct ipfw_flow_id *id,
1873 uint16_t type, struct ip_fw *rule, struct ipfw_track *t,
1874 const struct tcphdr *tcp)
1876 struct ipfw_state *s, *dup;
1878 KASSERT(type == O_KEEP_STATE || type == O_LIMIT,
1879 ("invalid state type %u", type));
1881 s = kmalloc(sizeof(*s), M_IPFW, M_INTWAIT | M_NULLOK | M_ZERO);
1883 ctx->ipfw_sts_nomem++;
1887 ipfw_key_build(&s->st_key, id->src_ip, id->src_port,
1888 id->dst_ip, id->dst_port, id->proto);
1893 ctx->ipfw_state_cnt++;
1894 ctx->ipfw_state_loosecnt++;
1895 if (ctx->ipfw_state_loosecnt >= ipfw_state_loosecnt_updthr) {
1896 ipfw_gd.ipfw_state_loosecnt += ctx->ipfw_state_loosecnt;
1897 ctx->ipfw_state_loosecnt = 0;
1900 dup = RB_INSERT(ipfw_state_tree, &ctx->ipfw_state_tree, s);
1902 panic("ipfw: state exists");
1903 TAILQ_INSERT_TAIL(&ctx->ipfw_state_list, s, st_link);
1906 * Update this state:
1907 * Set st_expire and st_state.
1909 ipfw_state_update(id, MATCH_FORWARD, tcp, s);
1912 /* Keep the track referenced. */
1913 LIST_INSERT_HEAD(&t->t_state_list, s, st_trklink);
1920 ipfw_track_free(struct ipfw_context *ctx, struct ipfw_track *t)
1922 struct ipfw_trkcnt *trk;
1923 boolean_t trk_freed = FALSE;
1925 KASSERT(t->t_count != NULL, ("track anchor"));
1926 KASSERT(LIST_EMPTY(&t->t_state_list),
1927 ("invalid track is still referenced"));
1930 KASSERT(trk != NULL, ("track has no trkcnt"));
1932 RB_REMOVE(ipfw_track_tree, &ctx->ipfw_track_tree, t);
1933 TAILQ_REMOVE(&ctx->ipfw_track_list, t, t_link);
1937 * fdrop() style reference counting.
1938 * See kern/kern_descrip.c fdrop().
1941 int refs = trk->tc_refs;
1944 KASSERT(refs > 0, ("invalid trkcnt refs %d", refs));
1947 if (atomic_cmpset_int(&trk->tc_refs, refs, 0)) {
1948 KASSERT(trk->tc_count == 0,
1949 ("%d states reference this trkcnt",
1951 RB_REMOVE(ipfw_trkcnt_tree,
1952 &ipfw_gd.ipfw_trkcnt_tree, trk);
1954 KASSERT(ipfw_gd.ipfw_trkcnt_cnt > 0,
1955 ("invalid trkcnt cnt %d",
1956 ipfw_gd.ipfw_trkcnt_cnt));
1957 ipfw_gd.ipfw_trkcnt_cnt--;
1960 if (ctx->ipfw_trkcnt_spare == NULL)
1961 ctx->ipfw_trkcnt_spare = trk;
1969 } else if (atomic_cmpset_int(&trk->tc_refs, refs, refs - 1)) {
1978 ipfw_track_flush(struct ipfw_context *ctx, struct ip_fw *rule)
1980 struct ipfw_track *t, *tn;
1982 TAILQ_FOREACH_MUTABLE(t, &ctx->ipfw_track_list, t_link, tn) {
1983 if (t->t_count == NULL) /* anchor */
1985 if (rule != NULL && t->t_rule != rule)
1987 ipfw_track_free(ctx, t);
1992 ipfw_track_state_expire(struct ipfw_context *ctx, struct ipfw_track *t,
1995 struct ipfw_state *s, *sn;
1996 boolean_t ret = FALSE;
1998 KASSERT(t->t_count != NULL, ("track anchor"));
2000 if (LIST_EMPTY(&t->t_state_list))
2004 * Do not expire more than once per second, it is useless.
2006 if (t->t_lastexp == time_uptime)
2008 t->t_lastexp = time_uptime;
2010 LIST_FOREACH_MUTABLE(s, &t->t_state_list, st_trklink, sn) {
2011 if (TIME_LEQ(s->st_expire, time_uptime) ||
2012 (reap && IPFW_STATE_TCPCLOSED(s))) {
2013 KASSERT(s->st_track == t,
2014 ("state track %p does not match %p",
2016 ipfw_state_del(ctx, s);
2023 static __inline struct ipfw_trkcnt *
2024 ipfw_trkcnt_alloc(struct ipfw_context *ctx)
2026 struct ipfw_trkcnt *trk;
2028 if (ctx->ipfw_trkcnt_spare != NULL) {
2029 trk = ctx->ipfw_trkcnt_spare;
2030 ctx->ipfw_trkcnt_spare = NULL;
2032 trk = kmalloc_cachealign(sizeof(*trk), M_IPFW,
2033 M_INTWAIT | M_NULLOK);
2039 ipfw_track_expire_done(struct ipfw_context *ctx)
2042 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP,
2043 ("trackexp is not in progress"));
2044 ctx->ipfw_flags &= ~IPFW_FLAG_TRACKEXP;
2045 callout_reset(&ctx->ipfw_trackto_ch, hz,
2046 ipfw_track_expire_ipifunc, NULL);
2050 ipfw_track_expire_more(struct ipfw_context *ctx)
2052 struct netmsg_base *nm = &ctx->ipfw_trackexp_more;
2054 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP,
2055 ("trackexp is not in progress"));
2056 KASSERT(nm->lmsg.ms_flags & MSGF_DONE,
2057 ("trackexp more did not finish"));
2058 netisr_sendmsg_oncpu(nm);
2062 ipfw_track_expire_loop(struct ipfw_context *ctx, struct ipfw_track *anchor,
2063 int scan_max, int expire_max)
2065 struct ipfw_track *t;
2066 int scanned = 0, expired = 0;
2067 boolean_t reap = FALSE;
2069 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP,
2070 ("trackexp is not in progress"));
2072 if (ctx->ipfw_flags & IPFW_FLAG_TRACKREAP)
2075 while ((t = TAILQ_NEXT(anchor, t_link)) != NULL) {
2076 if (scanned++ >= scan_max) {
2077 ipfw_track_expire_more(ctx);
2081 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link);
2082 TAILQ_INSERT_AFTER(&ctx->ipfw_track_list, t, anchor, t_link);
2084 if (t->t_count == NULL) /* anchor */
2087 ipfw_track_state_expire(ctx, t, reap);
2088 if (!LIST_EMPTY(&t->t_state_list)) {
2089 /* There are states referencing this track. */
2093 if (TIME_LEQ(t->t_expire, time_uptime) || reap) {
2095 if (ipfw_track_free(ctx, t)) {
2096 if (++expired >= expire_max) {
2097 ipfw_track_expire_more(ctx);
2103 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link);
2104 ipfw_track_expire_done(ctx);
2109 ipfw_track_expire_start(struct ipfw_context *ctx, int scan_max, int expire_max)
2111 struct ipfw_track *anchor;
2113 KASSERT((ctx->ipfw_flags & IPFW_FLAG_TRACKEXP) == 0,
2114 ("trackexp is in progress"));
2115 ctx->ipfw_flags |= IPFW_FLAG_TRACKEXP;
2117 if (RB_EMPTY(&ctx->ipfw_track_tree)) {
2118 ipfw_track_expire_done(ctx);
2123 * Do not expire more than once per second, it is useless.
2125 if ((ctx->ipfw_flags & IPFW_FLAG_TRACKREAP) == 0 &&
2126 ctx->ipfw_track_lastexp == time_uptime) {
2127 ipfw_track_expire_done(ctx);
2130 ctx->ipfw_track_lastexp = time_uptime;
2132 anchor = &ctx->ipfw_trackexp_anch;
2133 TAILQ_INSERT_HEAD(&ctx->ipfw_track_list, anchor, t_link);
2134 return (ipfw_track_expire_loop(ctx, anchor, scan_max, expire_max));
2138 ipfw_track_expire_more_dispatch(netmsg_t nm)
2140 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
2141 struct ipfw_track *anchor;
2143 ASSERT_NETISR_NCPUS(mycpuid);
2144 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP,
2145 ("trackexp is not in progress"));
2148 netisr_replymsg(&nm->base, 0);
2150 anchor = &ctx->ipfw_trackexp_anch;
2151 if (RB_EMPTY(&ctx->ipfw_track_tree)) {
2152 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link);
2153 ipfw_track_expire_done(ctx);
2156 ipfw_track_expire_loop(ctx, anchor,
2157 ipfw_track_scan_max, ipfw_track_expire_max);
2161 ipfw_track_expire_dispatch(netmsg_t nm)
2163 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
2165 ASSERT_NETISR_NCPUS(mycpuid);
2169 netisr_replymsg(&nm->base, 0);
2172 if (ctx->ipfw_flags & IPFW_FLAG_TRACKEXP) {
2173 /* Running; done. */
2176 ipfw_track_expire_start(ctx,
2177 ipfw_track_scan_max, ipfw_track_expire_max);
2181 ipfw_track_expire_ipifunc(void *dummy __unused)
2183 struct netmsg_base *msg;
2185 KKASSERT(mycpuid < netisr_ncpus);
2186 msg = &ipfw_ctx[mycpuid]->ipfw_trackexp_nm;
2189 if (msg->lmsg.ms_flags & MSGF_DONE)
2190 netisr_sendmsg_oncpu(msg);
2195 ipfw_track_reap(struct ipfw_context *ctx)
2197 struct ipfw_track *t, *anchor;
2200 if ((ctx->ipfw_flags & IPFW_FLAG_TRACKEXP) == 0) {
2202 * Kick start track expiring. Ignore scan limit,
2203 * we are short of tracks.
2205 ctx->ipfw_flags |= IPFW_FLAG_TRACKREAP;
2206 expired = ipfw_track_expire_start(ctx, INT_MAX,
2207 ipfw_track_reap_max);
2208 ctx->ipfw_flags &= ~IPFW_FLAG_TRACKREAP;
2213 * Tracks are being expired.
2216 if (RB_EMPTY(&ctx->ipfw_track_tree))
2220 anchor = &ctx->ipfw_trackexp_anch;
2221 while ((t = TAILQ_NEXT(anchor, t_link)) != NULL) {
2223 * Ignore scan limit; we are short of tracks.
2226 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link);
2227 TAILQ_INSERT_AFTER(&ctx->ipfw_track_list, t, anchor, t_link);
2229 if (t->t_count == NULL) /* anchor */
2232 ipfw_track_state_expire(ctx, t, TRUE);
2233 if (!LIST_EMPTY(&t->t_state_list)) {
2234 /* There are states referencing this track. */
2238 if (ipfw_track_free(ctx, t)) {
2239 if (++expired >= ipfw_track_reap_max) {
2240 ipfw_track_expire_more(ctx);
2247 * Leave the anchor on the list, even if the end of the list has
2248 * been reached. ipfw_track_expire_more_dispatch() will handle
2254 static struct ipfw_track *
2255 ipfw_track_alloc(struct ipfw_context *ctx, const struct ipfw_flow_id *id,
2256 uint16_t limit_mask, struct ip_fw *rule)
2258 struct ipfw_track *key, *t, *dup;
2259 struct ipfw_trkcnt *trk, *ret;
2260 boolean_t do_expire = FALSE;
2262 KASSERT(rule->track_ruleid != 0,
2263 ("rule %u has no track ruleid", rule->rulenum));
2265 key = &ctx->ipfw_track_tmpkey;
2266 key->t_proto = id->proto;
2270 if (limit_mask & DYN_SRC_ADDR)
2271 key->t_saddr = id->src_ip;
2272 if (limit_mask & DYN_DST_ADDR)
2273 key->t_daddr = id->dst_ip;
2274 if (limit_mask & DYN_SRC_PORT)
2275 key->t_sport = id->src_port;
2276 if (limit_mask & DYN_DST_PORT)
2277 key->t_dport = id->dst_port;
2279 t = RB_FIND(ipfw_track_tree, &ctx->ipfw_track_tree, key);
2283 t = kmalloc(sizeof(*t), M_IPFW, M_INTWAIT | M_NULLOK);
2285 ctx->ipfw_tks_nomem++;
2289 t->t_key = key->t_key;
2292 LIST_INIT(&t->t_state_list);
2294 if (ipfw_gd.ipfw_trkcnt_cnt >= ipfw_track_max) {
2295 time_t globexp, uptime;
2301 * Do not expire globally more than once per second,
2304 uptime = time_uptime;
2305 globexp = ipfw_gd.ipfw_track_globexp;
2306 if (globexp != uptime &&
2307 atomic_cmpset_long(&ipfw_gd.ipfw_track_globexp,
2311 /* Expire tracks on other CPUs. */
2312 for (cpu = 0; cpu < netisr_ncpus; ++cpu) {
2315 lwkt_send_ipiq(globaldata_find(cpu),
2316 ipfw_track_expire_ipifunc, NULL);
2320 trk = ipfw_trkcnt_alloc(ctx);
2323 struct ipfw_trkcnt *tkey;
2325 tkey = &ctx->ipfw_trkcnt_tmpkey;
2326 key = NULL; /* tkey overlaps key */
2328 tkey->tc_key = t->t_key;
2329 tkey->tc_ruleid = rule->track_ruleid;
2332 trk = RB_FIND(ipfw_trkcnt_tree, &ipfw_gd.ipfw_trkcnt_tree,
2337 ctx->ipfw_tks_reap++;
2338 if (ipfw_track_reap(ctx) > 0) {
2339 if (ipfw_gd.ipfw_trkcnt_cnt <
2341 trk = ipfw_trkcnt_alloc(ctx);
2344 ctx->ipfw_tks_cntnomem++;
2346 ctx->ipfw_tks_overflow++;
2349 ctx->ipfw_tks_reapfailed++;
2350 ctx->ipfw_tks_overflow++;
2353 ctx->ipfw_tks_cntnomem++;
2358 KASSERT(trk->tc_refs > 0 && trk->tc_refs < netisr_ncpus,
2359 ("invalid trkcnt refs %d", trk->tc_refs));
2360 atomic_add_int(&trk->tc_refs, 1);
2364 trk->tc_key = t->t_key;
2365 trk->tc_ruleid = rule->track_ruleid;
2369 trk->tc_rulenum = rule->rulenum;
2372 ret = RB_INSERT(ipfw_trkcnt_tree, &ipfw_gd.ipfw_trkcnt_tree,
2375 KASSERT(ret->tc_refs > 0 &&
2376 ret->tc_refs < netisr_ncpus,
2377 ("invalid trkcnt refs %d", ret->tc_refs));
2378 KASSERT(ctx->ipfw_trkcnt_spare == NULL,
2379 ("trkcnt spare was installed"));
2380 ctx->ipfw_trkcnt_spare = trk;
2383 ipfw_gd.ipfw_trkcnt_cnt++;
2385 atomic_add_int(&trk->tc_refs, 1);
2388 t->t_count = &trk->tc_count;
2391 dup = RB_INSERT(ipfw_track_tree, &ctx->ipfw_track_tree, t);
2393 panic("ipfw: track exists");
2394 TAILQ_INSERT_TAIL(&ctx->ipfw_track_list, t, t_link);
2396 t->t_expire = time_uptime + dyn_short_lifetime;
2401 * Install state for rule type cmd->o.opcode
2403 * Returns 1 (failure) if state is not installed because of errors or because
2404 * states limitations are enforced.
2407 ipfw_state_install(struct ipfw_context *ctx, struct ip_fw *rule,
2408 ipfw_insn_limit *cmd, struct ip_fw_args *args, const struct tcphdr *tcp)
2410 struct ipfw_state *s;
2411 struct ipfw_track *t;
2414 if (ipfw_gd.ipfw_state_loosecnt >= ipfw_state_max &&
2415 (diff = (ipfw_state_cntsync() - ipfw_state_max)) >= 0) {
2416 boolean_t overflow = TRUE;
2418 ctx->ipfw_sts_reap++;
2419 if (ipfw_state_reap(ctx, diff) == 0)
2420 ctx->ipfw_sts_reapfailed++;
2421 if (ipfw_state_cntsync() < ipfw_state_max)
2425 time_t globexp, uptime;
2429 * Do not expire globally more than once per second,
2432 uptime = time_uptime;
2433 globexp = ipfw_gd.ipfw_state_globexp;
2434 if (globexp == uptime ||
2435 !atomic_cmpset_long(&ipfw_gd.ipfw_state_globexp,
2437 ctx->ipfw_sts_overflow++;
2441 /* Expire states on other CPUs. */
2442 for (cpu = 0; cpu < netisr_ncpus; ++cpu) {
2445 lwkt_send_ipiq(globaldata_find(cpu),
2446 ipfw_state_expire_ipifunc, NULL);
2448 ctx->ipfw_sts_overflow++;
2453 switch (cmd->o.opcode) {
2454 case O_KEEP_STATE: /* bidir rule */
2455 s = ipfw_state_add(ctx, &args->f_id, O_KEEP_STATE, rule, NULL,
2461 case O_LIMIT: /* limit number of sessions */
2462 t = ipfw_track_alloc(ctx, &args->f_id, cmd->limit_mask, rule);
2466 if (*t->t_count >= cmd->conn_limit) {
2467 if (!ipfw_track_state_expire(ctx, t, TRUE))
2471 count = *t->t_count;
2472 if (count >= cmd->conn_limit)
2474 if (atomic_cmpset_int(t->t_count, count, count + 1))
2478 s = ipfw_state_add(ctx, &args->f_id, O_LIMIT, rule, t, tcp);
2481 atomic_subtract_int(t->t_count, 1);
2487 panic("unknown state type %u\n", cmd->o.opcode);
2493 ipfw_table_lookup(struct ipfw_context *ctx, uint16_t tableid,
2494 const struct in_addr *in)
2496 struct radix_node_head *rnh;
2497 struct sockaddr_in sin;
2498 struct ipfw_tblent *te;
2500 KASSERT(tableid < ipfw_table_max, ("invalid tableid %u", tableid));
2501 rnh = ctx->ipfw_tables[tableid];
2503 return (0); /* no match */
2505 memset(&sin, 0, sizeof(sin));
2506 sin.sin_family = AF_INET;
2507 sin.sin_len = sizeof(sin);
2510 te = (struct ipfw_tblent *)rnh->rnh_matchaddr((char *)&sin, rnh);
2512 return (0); /* no match */
2515 te->te_lastuse = time_second;
2516 return (1); /* match */
2520 * Transmit a TCP packet, containing either a RST or a keepalive.
2521 * When flags & TH_RST, we are sending a RST packet, because of a
2522 * "reset" action matched the packet.
2523 * Otherwise we are sending a keepalive, and flags & TH_
2525 * Only {src,dst}_{ip,port} of "id" are used.
2528 send_pkt(const struct ipfw_flow_id *id, uint32_t seq, uint32_t ack, int flags)
2533 struct route sro; /* fake route */
2535 MGETHDR(m, M_NOWAIT, MT_HEADER);
2538 m->m_pkthdr.rcvif = NULL;
2539 m->m_pkthdr.len = m->m_len = sizeof(struct ip) + sizeof(struct tcphdr);
2540 m->m_data += max_linkhdr;
2542 ip = mtod(m, struct ip *);
2543 bzero(ip, m->m_len);
2544 tcp = (struct tcphdr *)(ip + 1); /* no IP options */
2545 ip->ip_p = IPPROTO_TCP;
2549 * Assume we are sending a RST (or a keepalive in the reverse
2550 * direction), swap src and destination addresses and ports.
2552 ip->ip_src.s_addr = htonl(id->dst_ip);
2553 ip->ip_dst.s_addr = htonl(id->src_ip);
2554 tcp->th_sport = htons(id->dst_port);
2555 tcp->th_dport = htons(id->src_port);
2556 if (flags & TH_RST) { /* we are sending a RST */
2557 if (flags & TH_ACK) {
2558 tcp->th_seq = htonl(ack);
2559 tcp->th_ack = htonl(0);
2560 tcp->th_flags = TH_RST;
2564 tcp->th_seq = htonl(0);
2565 tcp->th_ack = htonl(seq);
2566 tcp->th_flags = TH_RST | TH_ACK;
2570 * We are sending a keepalive. flags & TH_SYN determines
2571 * the direction, forward if set, reverse if clear.
2572 * NOTE: seq and ack are always assumed to be correct
2573 * as set by the caller. This may be confusing...
2575 if (flags & TH_SYN) {
2577 * we have to rewrite the correct addresses!
2579 ip->ip_dst.s_addr = htonl(id->dst_ip);
2580 ip->ip_src.s_addr = htonl(id->src_ip);
2581 tcp->th_dport = htons(id->dst_port);
2582 tcp->th_sport = htons(id->src_port);
2584 tcp->th_seq = htonl(seq);
2585 tcp->th_ack = htonl(ack);
2586 tcp->th_flags = TH_ACK;
2590 * set ip_len to the payload size so we can compute
2591 * the tcp checksum on the pseudoheader
2592 * XXX check this, could save a couple of words ?
2594 ip->ip_len = htons(sizeof(struct tcphdr));
2595 tcp->th_sum = in_cksum(m, m->m_pkthdr.len);
2598 * now fill fields left out earlier
2600 ip->ip_ttl = ip_defttl;
2601 ip->ip_len = m->m_pkthdr.len;
2603 bzero(&sro, sizeof(sro));
2604 ip_rtaddr(ip->ip_dst, &sro);
2606 m->m_pkthdr.fw_flags |= IPFW_MBUF_GENERATED;
2607 ip_output(m, NULL, &sro, 0, NULL, NULL);
2613 * Send a reject message, consuming the mbuf passed as an argument.
2616 send_reject(struct ip_fw_args *args, int code, int offset, int ip_len)
2618 if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */
2619 /* We need the IP header in host order for icmp_error(). */
2620 if (args->eh != NULL) {
2621 struct ip *ip = mtod(args->m, struct ip *);
2623 ip->ip_len = ntohs(ip->ip_len);
2624 ip->ip_off = ntohs(ip->ip_off);
2626 icmp_error(args->m, ICMP_UNREACH, code, 0L, 0);
2627 } else if (offset == 0 && args->f_id.proto == IPPROTO_TCP) {
2628 struct tcphdr *const tcp =
2629 L3HDR(struct tcphdr, mtod(args->m, struct ip *));
2631 if ((tcp->th_flags & TH_RST) == 0) {
2632 send_pkt(&args->f_id, ntohl(tcp->th_seq),
2633 ntohl(tcp->th_ack), tcp->th_flags | TH_RST);
2643 * Given an ip_fw *, lookup_next_rule will return a pointer
2644 * to the next rule, which can be either the jump
2645 * target (for skipto instructions) or the next one in the list (in
2646 * all other cases including a missing jump target).
2647 * The result is also written in the "next_rule" field of the rule.
2648 * Backward jumps are not allowed, so start looking from the next
2651 * This never returns NULL -- in case we do not have an exact match,
2652 * the next rule is returned. When the ruleset is changed,
2653 * pointers are flushed so we are always correct.
2655 static struct ip_fw *
2656 lookup_next_rule(struct ip_fw *me)
2658 struct ip_fw *rule = NULL;
2661 /* look for action, in case it is a skipto */
2662 cmd = ACTION_PTR(me);
2663 if (cmd->opcode == O_LOG)
2665 if (cmd->opcode == O_SKIPTO) {
2666 for (rule = me->next; rule; rule = rule->next) {
2667 if (rule->rulenum >= cmd->arg1)
2671 if (rule == NULL) /* failure or not a skipto */
2673 me->next_rule = rule;
2678 ipfw_match_uid(const struct ipfw_flow_id *fid, struct ifnet *oif,
2679 enum ipfw_opcodes opcode, uid_t uid)
2681 struct in_addr src_ip, dst_ip;
2682 struct inpcbinfo *pi;
2686 if (fid->proto == IPPROTO_TCP) {
2688 pi = &tcbinfo[mycpuid];
2689 } else if (fid->proto == IPPROTO_UDP) {
2691 pi = &udbinfo[mycpuid];
2697 * Values in 'fid' are in host byte order
2699 dst_ip.s_addr = htonl(fid->dst_ip);
2700 src_ip.s_addr = htonl(fid->src_ip);
2702 pcb = in_pcblookup_hash(pi,
2703 dst_ip, htons(fid->dst_port),
2704 src_ip, htons(fid->src_port),
2707 pcb = in_pcblookup_hash(pi,
2708 src_ip, htons(fid->src_port),
2709 dst_ip, htons(fid->dst_port),
2712 if (pcb == NULL || pcb->inp_socket == NULL)
2715 if (opcode == O_UID) {
2716 #define socheckuid(a,b) ((a)->so_cred->cr_uid != (b))
2717 return !socheckuid(pcb->inp_socket, uid);
2720 return groupmember(uid, pcb->inp_socket->so_cred);
2725 * The main check routine for the firewall.
2727 * All arguments are in args so we can modify them and return them
2728 * back to the caller.
2732 * args->m (in/out) The packet; we set to NULL when/if we nuke it.
2733 * Starts with the IP header.
2734 * args->eh (in) Mac header if present, or NULL for layer3 packet.
2735 * args->oif Outgoing interface, or NULL if packet is incoming.
2736 * The incoming interface is in the mbuf. (in)
2738 * args->rule Pointer to the last matching rule (in/out)
2739 * args->f_id Addresses grabbed from the packet (out)
2743 * If the packet was denied/rejected and has been dropped, *m is equal
2744 * to NULL upon return.
2746 * IP_FW_DENY the packet must be dropped.
2747 * IP_FW_PASS The packet is to be accepted and routed normally.
2748 * IP_FW_DIVERT Divert the packet to port (args->cookie)
2749 * IP_FW_TEE Tee the packet to port (args->cookie)
2750 * IP_FW_DUMMYNET Send the packet to pipe/queue (args->cookie)
2751 * IP_FW_CONTINUE Continue processing on another cpu.
2754 ipfw_chk(struct ip_fw_args *args)
2757 * Local variables hold state during the processing of a packet.
2759 * IMPORTANT NOTE: to speed up the processing of rules, there
2760 * are some assumption on the values of the variables, which
2761 * are documented here. Should you change them, please check
2762 * the implementation of the various instructions to make sure
2763 * that they still work.
2765 * args->eh The MAC header. It is non-null for a layer2
2766 * packet, it is NULL for a layer-3 packet.
2768 * m | args->m Pointer to the mbuf, as received from the caller.
2769 * It may change if ipfw_chk() does an m_pullup, or if it
2770 * consumes the packet because it calls send_reject().
2771 * XXX This has to change, so that ipfw_chk() never modifies
2772 * or consumes the buffer.
2773 * ip is simply an alias of the value of m, and it is kept
2774 * in sync with it (the packet is supposed to start with
2777 struct mbuf *m = args->m;
2778 struct ip *ip = mtod(m, struct ip *);
2781 * oif | args->oif If NULL, ipfw_chk has been called on the
2782 * inbound path (ether_input, ip_input).
2783 * If non-NULL, ipfw_chk has been called on the outbound path
2784 * (ether_output, ip_output).
2786 struct ifnet *oif = args->oif;
2788 struct ip_fw *f = NULL; /* matching rule */
2789 int retval = IP_FW_PASS;
2791 struct divert_info *divinfo;
2794 * hlen The length of the IPv4 header.
2795 * hlen >0 means we have an IPv4 packet.
2797 u_int hlen = 0; /* hlen >0 means we have an IP pkt */
2800 * offset The offset of a fragment. offset != 0 means that
2801 * we have a fragment at this offset of an IPv4 packet.
2802 * offset == 0 means that (if this is an IPv4 packet)
2803 * this is the first or only fragment.
2808 * Local copies of addresses. They are only valid if we have
2811 * proto The protocol. Set to 0 for non-ip packets,
2812 * or to the protocol read from the packet otherwise.
2813 * proto != 0 means that we have an IPv4 packet.
2815 * src_port, dst_port port numbers, in HOST format. Only
2816 * valid for TCP and UDP packets.
2818 * src_ip, dst_ip ip addresses, in NETWORK format.
2819 * Only valid for IPv4 packets.
2822 uint16_t src_port = 0, dst_port = 0; /* NOTE: host format */
2823 struct in_addr src_ip, dst_ip; /* NOTE: network format */
2824 uint16_t ip_len = 0;
2827 * dyn_dir = MATCH_UNKNOWN when rules unchecked,
2828 * MATCH_NONE when checked and not matched (dyn_f = NULL),
2829 * MATCH_FORWARD or MATCH_REVERSE otherwise (dyn_f != NULL)
2831 int dyn_dir = MATCH_UNKNOWN;
2832 struct ip_fw *dyn_f = NULL;
2833 int cpuid = mycpuid;
2834 struct ipfw_context *ctx;
2836 ASSERT_NETISR_NCPUS(cpuid);
2837 ctx = ipfw_ctx[cpuid];
2839 if (m->m_pkthdr.fw_flags & IPFW_MBUF_GENERATED)
2840 return IP_FW_PASS; /* accept */
2842 if (args->eh == NULL || /* layer 3 packet */
2843 (m->m_pkthdr.len >= sizeof(struct ip) &&
2844 ntohs(args->eh->ether_type) == ETHERTYPE_IP))
2845 hlen = ip->ip_hl << 2;
2848 * Collect parameters into local variables for faster matching.
2850 if (hlen == 0) { /* do not grab addresses for non-ip pkts */
2851 proto = args->f_id.proto = 0; /* mark f_id invalid */
2852 goto after_ip_checks;
2855 proto = args->f_id.proto = ip->ip_p;
2856 src_ip = ip->ip_src;
2857 dst_ip = ip->ip_dst;
2858 if (args->eh != NULL) { /* layer 2 packets are as on the wire */
2859 offset = ntohs(ip->ip_off) & IP_OFFMASK;
2860 ip_len = ntohs(ip->ip_len);
2862 offset = ip->ip_off & IP_OFFMASK;
2863 ip_len = ip->ip_len;
2866 #define PULLUP_TO(len) \
2868 if (m->m_len < (len)) { \
2869 args->m = m = m_pullup(m, (len));\
2871 goto pullup_failed; \
2872 ip = mtod(m, struct ip *); \
2882 PULLUP_TO(hlen + sizeof(struct tcphdr));
2883 tcp = L3HDR(struct tcphdr, ip);
2884 dst_port = tcp->th_dport;
2885 src_port = tcp->th_sport;
2886 args->f_id.flags = tcp->th_flags;
2894 PULLUP_TO(hlen + sizeof(struct udphdr));
2895 udp = L3HDR(struct udphdr, ip);
2896 dst_port = udp->uh_dport;
2897 src_port = udp->uh_sport;
2902 PULLUP_TO(hlen + 4); /* type, code and checksum. */
2903 args->f_id.flags = L3HDR(struct icmp, ip)->icmp_type;
2911 args->f_id.src_ip = ntohl(src_ip.s_addr);
2912 args->f_id.dst_ip = ntohl(dst_ip.s_addr);
2913 args->f_id.src_port = src_port = ntohs(src_port);
2914 args->f_id.dst_port = dst_port = ntohs(dst_port);
2919 * Packet has already been tagged. Look for the next rule
2920 * to restart processing.
2922 * If fw_one_pass != 0 then just accept it.
2923 * XXX should not happen here, but optimized out in
2926 if (fw_one_pass && !args->cont)
2930 /* This rule is being/has been flushed */
2934 KASSERT(args->rule->cpuid == cpuid,
2935 ("rule used on cpu%d", cpuid));
2937 /* This rule was deleted */
2938 if (args->rule->rule_flags & IPFW_RULE_F_INVALID)
2941 f = args->rule->next_rule;
2943 f = lookup_next_rule(args->rule);
2946 * Find the starting rule. It can be either the first
2947 * one, or the one after divert_rule if asked so.
2951 KKASSERT(!args->cont);
2953 mtag = m_tag_find(m, PACKET_TAG_IPFW_DIVERT, NULL);
2955 divinfo = m_tag_data(mtag);
2956 skipto = divinfo->skipto;
2961 f = ctx->ipfw_layer3_chain;
2962 if (args->eh == NULL && skipto != 0) {
2963 /* No skipto during rule flushing */
2967 if (skipto >= IPFW_DEFAULT_RULE)
2968 return IP_FW_DENY; /* invalid */
2970 while (f && f->rulenum <= skipto)
2972 if (f == NULL) /* drop packet */
2974 } else if (ipfw_flushing) {
2975 /* Rules are being flushed; skip to default rule */
2976 f = ctx->ipfw_default_rule;
2979 if ((mtag = m_tag_find(m, PACKET_TAG_IPFW_DIVERT, NULL)) != NULL)
2980 m_tag_delete(m, mtag);
2983 * Now scan the rules, and parse microinstructions for each rule.
2985 for (; f; f = f->next) {
2988 int skip_or; /* skip rest of OR block */
2991 if (ctx->ipfw_set_disable & (1 << f->set))
2995 for (l = f->cmd_len, cmd = f->cmd; l > 0;
2996 l -= cmdlen, cmd += cmdlen) {
3000 * check_body is a jump target used when we find a
3001 * CHECK_STATE, and need to jump to the body of
3006 cmdlen = F_LEN(cmd);
3008 * An OR block (insn_1 || .. || insn_n) has the
3009 * F_OR bit set in all but the last instruction.
3010 * The first match will set "skip_or", and cause
3011 * the following instructions to be skipped until
3012 * past the one with the F_OR bit clear.
3014 if (skip_or) { /* skip this instruction */
3015 if ((cmd->len & F_OR) == 0)
3016 skip_or = 0; /* next one is good */
3019 match = 0; /* set to 1 if we succeed */
3021 switch (cmd->opcode) {
3023 * The first set of opcodes compares the packet's
3024 * fields with some pattern, setting 'match' if a
3025 * match is found. At the end of the loop there is
3026 * logic to deal with F_NOT and F_OR flags associated
3034 kprintf("ipfw: opcode %d unimplemented\n",
3041 * We only check offset == 0 && proto != 0,
3042 * as this ensures that we have an IPv4
3043 * packet with the ports info.
3048 match = ipfw_match_uid(&args->f_id, oif,
3050 (uid_t)((ipfw_insn_u32 *)cmd)->d[0]);
3054 match = iface_match(m->m_pkthdr.rcvif,
3055 (ipfw_insn_if *)cmd);
3059 match = iface_match(oif, (ipfw_insn_if *)cmd);
3063 match = iface_match(oif ? oif :
3064 m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd);
3068 if (args->eh != NULL) { /* have MAC header */
3069 uint32_t *want = (uint32_t *)
3070 ((ipfw_insn_mac *)cmd)->addr;
3071 uint32_t *mask = (uint32_t *)
3072 ((ipfw_insn_mac *)cmd)->mask;
3073 uint32_t *hdr = (uint32_t *)args->eh;
3076 (want[0] == (hdr[0] & mask[0]) &&
3077 want[1] == (hdr[1] & mask[1]) &&
3078 want[2] == (hdr[2] & mask[2]));
3083 if (args->eh != NULL) {
3085 ntohs(args->eh->ether_type);
3087 ((ipfw_insn_u16 *)cmd)->ports;
3090 /* Special vlan handling */
3091 if (m->m_flags & M_VLANTAG)
3094 for (i = cmdlen - 1; !match && i > 0;
3097 (t >= p[0] && t <= p[1]);
3103 match = (hlen > 0 && offset != 0);
3106 case O_IN: /* "out" is "not in" */
3107 match = (oif == NULL);
3111 match = (args->eh != NULL);
3116 * We do not allow an arg of 0 so the
3117 * check of "proto" only suffices.
3119 match = (proto == cmd->arg1);
3123 match = (hlen > 0 &&
3124 ((ipfw_insn_ip *)cmd)->addr.s_addr ==
3129 match = (hlen > 0 &&
3130 ((ipfw_insn_ip *)cmd)->addr.s_addr ==
3132 ((ipfw_insn_ip *)cmd)->mask.s_addr));
3139 tif = INADDR_TO_IFP(&src_ip);
3140 match = (tif != NULL);
3144 case O_IP_SRC_TABLE:
3145 match = ipfw_table_lookup(ctx, cmd->arg1,
3152 uint32_t *d = (uint32_t *)(cmd + 1);
3154 cmd->opcode == O_IP_DST_SET ?
3160 addr -= d[0]; /* subtract base */
3162 (addr < cmd->arg1) &&
3163 (d[1 + (addr >> 5)] &
3164 (1 << (addr & 0x1f)));
3169 match = (hlen > 0 &&
3170 ((ipfw_insn_ip *)cmd)->addr.s_addr ==
3175 match = (hlen > 0) &&
3176 (((ipfw_insn_ip *)cmd)->addr.s_addr ==
3178 ((ipfw_insn_ip *)cmd)->mask.s_addr));
3185 tif = INADDR_TO_IFP(&dst_ip);
3186 match = (tif != NULL);
3190 case O_IP_DST_TABLE:
3191 match = ipfw_table_lookup(ctx, cmd->arg1,
3198 * offset == 0 && proto != 0 is enough
3199 * to guarantee that we have an IPv4
3200 * packet with port info.
3202 if ((proto==IPPROTO_UDP || proto==IPPROTO_TCP)
3205 (cmd->opcode == O_IP_SRCPORT) ?
3206 src_port : dst_port ;
3208 ((ipfw_insn_u16 *)cmd)->ports;
3211 for (i = cmdlen - 1; !match && i > 0;
3214 (x >= p[0] && x <= p[1]);
3220 match = (offset == 0 && proto==IPPROTO_ICMP &&
3221 icmptype_match(ip, (ipfw_insn_u32 *)cmd));
3225 match = (hlen > 0 && ipopts_match(ip, cmd));
3229 match = (hlen > 0 && cmd->arg1 == ip->ip_v);
3233 match = (hlen > 0 && cmd->arg1 == ip->ip_ttl);
3237 match = (hlen > 0 &&
3238 cmd->arg1 == ntohs(ip->ip_id));
3242 match = (hlen > 0 && cmd->arg1 == ip_len);
3245 case O_IPPRECEDENCE:
3246 match = (hlen > 0 &&
3247 (cmd->arg1 == (ip->ip_tos & 0xe0)));
3251 match = (hlen > 0 &&
3252 flags_match(cmd, ip->ip_tos));
3256 match = (proto == IPPROTO_TCP && offset == 0 &&
3258 L3HDR(struct tcphdr,ip)->th_flags));
3262 match = (proto == IPPROTO_TCP && offset == 0 &&
3263 tcpopts_match(ip, cmd));
3267 match = (proto == IPPROTO_TCP && offset == 0 &&
3268 ((ipfw_insn_u32 *)cmd)->d[0] ==
3269 L3HDR(struct tcphdr,ip)->th_seq);
3273 match = (proto == IPPROTO_TCP && offset == 0 &&
3274 ((ipfw_insn_u32 *)cmd)->d[0] ==
3275 L3HDR(struct tcphdr,ip)->th_ack);
3279 match = (proto == IPPROTO_TCP && offset == 0 &&
3281 L3HDR(struct tcphdr,ip)->th_win);
3285 /* reject packets which have SYN only */
3286 /* XXX should i also check for TH_ACK ? */
3287 match = (proto == IPPROTO_TCP && offset == 0 &&
3288 (L3HDR(struct tcphdr,ip)->th_flags &
3289 (TH_RST | TH_ACK | TH_SYN)) != TH_SYN);
3294 ipfw_log(ctx, f, hlen, args->eh, m,
3301 match = (krandom() <
3302 ((ipfw_insn_u32 *)cmd)->d[0]);
3306 * The second set of opcodes represents 'actions',
3307 * i.e. the terminal part of a rule once the packet
3308 * matches all previous patterns.
3309 * Typically there is only one action for each rule,
3310 * and the opcode is stored at the end of the rule
3311 * (but there are exceptions -- see below).
3313 * In general, here we set retval and terminate the
3314 * outer loop (would be a 'break 3' in some language,
3315 * but we need to do a 'goto done').
3318 * O_COUNT and O_SKIPTO actions:
3319 * instead of terminating, we jump to the next rule
3320 * ('goto next_rule', equivalent to a 'break 2'),
3321 * or to the SKIPTO target ('goto again' after
3322 * having set f, cmd and l), respectively.
3324 * O_LIMIT and O_KEEP_STATE: these opcodes are
3325 * not real 'actions', and are stored right
3326 * before the 'action' part of the rule.
3327 * These opcodes try to install an entry in the
3328 * state tables; if successful, we continue with
3329 * the next opcode (match=1; break;), otherwise
3330 * the packet must be dropped ('goto done' after
3331 * setting retval). If static rules are changed
3332 * during the state installation, the packet will
3333 * be dropped and rule's stats will not beupdated
3334 * ('return IP_FW_DENY').
3336 * O_PROBE_STATE and O_CHECK_STATE: these opcodes
3337 * cause a lookup of the state table, and a jump
3338 * to the 'action' part of the parent rule
3339 * ('goto check_body') if an entry is found, or
3340 * (CHECK_STATE only) a jump to the next rule if
3341 * the entry is not found ('goto next_rule').
3342 * The result of the lookup is cached to make
3343 * further instances of these opcodes are
3344 * effectively NOPs. If static rules are changed
3345 * during the state looking up, the packet will
3346 * be dropped and rule's stats will not be updated
3347 * ('return IP_FW_DENY').
3351 if (ipfw_state_install(ctx, f,
3352 (ipfw_insn_limit *)cmd, args,
3353 (offset == 0 && proto == IPPROTO_TCP) ?
3354 L3HDR(struct tcphdr, ip) : NULL)) {
3355 retval = IP_FW_DENY;
3356 goto done; /* error/limit violation */
3364 * States are checked at the first keep-state
3365 * check-state occurrence, with the result
3366 * being stored in dyn_dir. The compiler
3367 * introduces a PROBE_STATE instruction for
3368 * us when we have a KEEP_STATE/LIMIT (because
3369 * PROBE_STATE needs to be run first).
3371 if (dyn_dir == MATCH_UNKNOWN) {
3372 dyn_f = ipfw_state_lookup_rule(ctx,
3373 &args->f_id, &dyn_dir,
3375 proto == IPPROTO_TCP) ?
3376 L3HDR(struct tcphdr, ip) : NULL,
3378 if (dyn_f != NULL) {
3380 * Found a rule from a state;
3381 * jump to the 'action' part
3385 cmd = ACTION_PTR(f);
3386 l = f->cmd_len - f->act_ofs;
3391 * State not found. If CHECK_STATE, skip to
3392 * next rule, if PROBE_STATE just ignore and
3393 * continue with next opcode.
3395 if (cmd->opcode == O_CHECK_STATE)
3401 retval = IP_FW_PASS; /* accept */
3405 if (f->cross_rules == NULL) {
3407 * This rule was not completely setup;
3408 * move on to the next rule.
3414 * Don't defrag for l2 packets, output packets
3417 if (oif != NULL || args->eh != NULL ||
3418 (ip->ip_off & (IP_MF | IP_OFFMASK)) == 0)
3425 retval = IP_FW_PASS;
3428 ctx->ipfw_defraged++;
3429 KASSERT((m->m_flags & M_HASH) == 0,
3430 ("hash not cleared"));
3432 /* Update statistics */
3435 f->timestamp = time_second;
3437 ip = mtod(m, struct ip *);
3438 hlen = ip->ip_hl << 2;
3441 ip->ip_len = htons(ip->ip_len);
3442 ip->ip_off = htons(ip->ip_off);
3449 KASSERT(m->m_flags & M_HASH, ("no hash"));
3450 cpuid = netisr_hashcpu(m->m_pkthdr.hash);
3451 if (cpuid != mycpuid) {
3454 * ip_len/ip_off are in network byte
3457 ctx->ipfw_defrag_remote++;
3459 return (IP_FW_CONTINUE);
3462 /* 'm' might be changed by ip_hashfn(). */
3463 ip = mtod(m, struct ip *);
3464 ip->ip_len = ntohs(ip->ip_len);
3465 ip->ip_off = ntohs(ip->ip_off);
3467 ip_len = ip->ip_len;
3469 proto = args->f_id.proto = ip->ip_p;
3477 sizeof(struct tcphdr));
3478 tcp = L3HDR(struct tcphdr, ip);
3479 dst_port = tcp->th_dport;
3480 src_port = tcp->th_sport;
3491 sizeof(struct udphdr));
3492 udp = L3HDR(struct udphdr, ip);
3493 dst_port = udp->uh_dport;
3494 src_port = udp->uh_sport;
3499 /* type, code and checksum. */
3500 PULLUP_TO(hlen + 4);
3502 L3HDR(struct icmp, ip)->icmp_type;
3508 args->f_id.src_port = src_port =
3510 args->f_id.dst_port = dst_port =
3518 args->rule = f; /* report matching rule */
3519 args->cookie = cmd->arg1;
3520 retval = IP_FW_DUMMYNET;
3525 if (args->eh) /* not on layer 2 */
3528 mtag = m_tag_get(PACKET_TAG_IPFW_DIVERT,
3529 sizeof(*divinfo), M_INTWAIT | M_NULLOK);
3531 retval = IP_FW_DENY;
3534 divinfo = m_tag_data(mtag);
3536 divinfo->skipto = f->rulenum;
3537 divinfo->port = cmd->arg1;
3538 divinfo->tee = (cmd->opcode == O_TEE);
3539 m_tag_prepend(m, mtag);
3541 args->cookie = cmd->arg1;
3542 retval = (cmd->opcode == O_DIVERT) ?
3543 IP_FW_DIVERT : IP_FW_TEE;
3548 f->pcnt++; /* update stats */
3550 f->timestamp = time_second;
3551 if (cmd->opcode == O_COUNT)
3554 if (f->next_rule == NULL)
3555 lookup_next_rule(f);
3561 * Drop the packet and send a reject notice
3562 * if the packet is not ICMP (or is an ICMP
3563 * query), and it is not multicast/broadcast.
3566 (proto != IPPROTO_ICMP ||
3567 is_icmp_query(ip)) &&
3568 !(m->m_flags & (M_BCAST|M_MCAST)) &&
3569 !IN_MULTICAST(ntohl(dst_ip.s_addr))) {
3570 send_reject(args, cmd->arg1,
3572 retval = IP_FW_DENY;
3577 retval = IP_FW_DENY;
3581 if (args->eh) /* not valid on layer2 pkts */
3583 if (!dyn_f || dyn_dir == MATCH_FORWARD) {
3584 struct sockaddr_in *sin;
3586 mtag = m_tag_get(PACKET_TAG_IPFORWARD,
3587 sizeof(*sin), M_INTWAIT | M_NULLOK);
3589 retval = IP_FW_DENY;
3592 sin = m_tag_data(mtag);
3594 /* Structure copy */
3595 *sin = ((ipfw_insn_sa *)cmd)->sa;
3597 m_tag_prepend(m, mtag);
3598 m->m_pkthdr.fw_flags |=
3599 IPFORWARD_MBUF_TAGGED;
3600 m->m_pkthdr.fw_flags &=
3601 ~BRIDGE_MBUF_TAGGED;
3603 retval = IP_FW_PASS;
3607 panic("-- unknown opcode %d", cmd->opcode);
3608 } /* end of switch() on opcodes */
3610 if (cmd->len & F_NOT)
3614 if (cmd->len & F_OR)
3617 if (!(cmd->len & F_OR)) /* not an OR block, */
3618 break; /* try next rule */
3621 } /* end of inner for, scan opcodes */
3623 next_rule:; /* try next rule */
3625 } /* end of outer for, scan rules */
3626 kprintf("+++ ipfw: ouch!, skip past end of rules, denying packet\n");
3630 /* Update statistics */
3633 f->timestamp = time_second;
3638 kprintf("pullup failed\n");
3644 static struct mbuf *
3645 ipfw_dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa)
3650 const struct ipfw_flow_id *id;
3651 struct dn_flow_id *fid;
3655 mtag = m_tag_get(PACKET_TAG_DUMMYNET, sizeof(*pkt),
3656 M_INTWAIT | M_NULLOK);
3661 m_tag_prepend(m, mtag);
3663 pkt = m_tag_data(mtag);
3664 bzero(pkt, sizeof(*pkt));
3666 cmd = fwa->rule->cmd + fwa->rule->act_ofs;
3667 if (cmd->opcode == O_LOG)
3669 KASSERT(cmd->opcode == O_PIPE || cmd->opcode == O_QUEUE,
3670 ("Rule is not PIPE or QUEUE, opcode %d", cmd->opcode));
3673 pkt->dn_flags = (dir & DN_FLAGS_DIR_MASK);
3674 pkt->ifp = fwa->oif;
3675 pkt->pipe_nr = pipe_nr;
3677 pkt->cpuid = mycpuid;
3678 pkt->msgport = netisr_curport();
3682 fid->fid_dst_ip = id->dst_ip;
3683 fid->fid_src_ip = id->src_ip;
3684 fid->fid_dst_port = id->dst_port;
3685 fid->fid_src_port = id->src_port;
3686 fid->fid_proto = id->proto;
3687 fid->fid_flags = id->flags;
3689 ipfw_ref_rule(fwa->rule);
3690 pkt->dn_priv = fwa->rule;
3691 pkt->dn_unref_priv = ipfw_unref_rule;
3693 if (cmd->opcode == O_PIPE)
3694 pkt->dn_flags |= DN_FLAGS_IS_PIPE;
3696 m->m_pkthdr.fw_flags |= DUMMYNET_MBUF_TAGGED;
3701 * When a rule is added/deleted, clear the next_rule pointers in all rules.
3702 * These will be reconstructed on the fly as packets are matched.
3705 ipfw_flush_rule_ptrs(struct ipfw_context *ctx)
3709 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next)
3710 rule->next_rule = NULL;
3713 static __inline void
3714 ipfw_inc_static_count(struct ip_fw *rule)
3716 /* Static rule's counts are updated only on CPU0 */
3717 KKASSERT(mycpuid == 0);
3720 static_ioc_len += IOC_RULESIZE(rule);
3723 static __inline void
3724 ipfw_dec_static_count(struct ip_fw *rule)
3726 int l = IOC_RULESIZE(rule);
3728 /* Static rule's counts are updated only on CPU0 */
3729 KKASSERT(mycpuid == 0);
3731 KASSERT(static_count > 0, ("invalid static count %u", static_count));
3734 KASSERT(static_ioc_len >= l,
3735 ("invalid static len %u", static_ioc_len));
3736 static_ioc_len -= l;
3740 ipfw_link_sibling(struct netmsg_ipfw *fwmsg, struct ip_fw *rule)
3742 if (fwmsg->sibling != NULL) {
3743 KKASSERT(mycpuid > 0 && fwmsg->sibling->cpuid == mycpuid - 1);
3744 fwmsg->sibling->sibling = rule;
3746 fwmsg->sibling = rule;
3749 static struct ip_fw *
3750 ipfw_create_rule(const struct ipfw_ioc_rule *ioc_rule, uint32_t rule_flags)
3754 rule = kmalloc(RULESIZE(ioc_rule), M_IPFW, M_WAITOK | M_ZERO);
3756 rule->act_ofs = ioc_rule->act_ofs;
3757 rule->cmd_len = ioc_rule->cmd_len;
3758 rule->rulenum = ioc_rule->rulenum;
3759 rule->set = ioc_rule->set;
3760 rule->usr_flags = ioc_rule->usr_flags;
3762 bcopy(ioc_rule->cmd, rule->cmd, rule->cmd_len * 4 /* XXX */);
3765 rule->cpuid = mycpuid;
3766 rule->rule_flags = rule_flags;
3772 ipfw_add_rule_dispatch(netmsg_t nmsg)
3774 struct netmsg_ipfw *fwmsg = (struct netmsg_ipfw *)nmsg;
3775 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
3778 ASSERT_NETISR_NCPUS(mycpuid);
3780 rule = ipfw_create_rule(fwmsg->ioc_rule, fwmsg->rule_flags);
3783 * Insert rule into the pre-determined position
3785 if (fwmsg->prev_rule != NULL) {
3786 struct ip_fw *prev, *next;
3788 prev = fwmsg->prev_rule;
3789 KKASSERT(prev->cpuid == mycpuid);
3791 next = fwmsg->next_rule;
3792 KKASSERT(next->cpuid == mycpuid);
3798 * Move to the position on the next CPU
3799 * before the msg is forwarded.
3801 fwmsg->prev_rule = prev->sibling;
3802 fwmsg->next_rule = next->sibling;
3804 KKASSERT(fwmsg->next_rule == NULL);
3805 rule->next = ctx->ipfw_layer3_chain;
3806 ctx->ipfw_layer3_chain = rule;
3809 /* Link rule CPU sibling */
3810 ipfw_link_sibling(fwmsg, rule);
3812 ipfw_flush_rule_ptrs(ctx);
3815 /* Statistics only need to be updated once */
3816 ipfw_inc_static_count(rule);
3818 /* Return the rule on CPU0 */
3819 nmsg->lmsg.u.ms_resultp = rule;
3822 if (rule->rule_flags & IPFW_RULE_F_GENTRACK)
3823 rule->track_ruleid = (uintptr_t)nmsg->lmsg.u.ms_resultp;
3825 if (fwmsg->cross_rules != NULL) {
3826 /* Save rules for later use. */
3827 fwmsg->cross_rules[mycpuid] = rule;
3830 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
3834 ipfw_crossref_rule_dispatch(netmsg_t nmsg)
3836 struct netmsg_ipfw *fwmsg = (struct netmsg_ipfw *)nmsg;
3837 struct ip_fw *rule = fwmsg->sibling;
3838 int sz = sizeof(struct ip_fw *) * netisr_ncpus;
3840 ASSERT_NETISR_NCPUS(mycpuid);
3841 KASSERT(rule->rule_flags & IPFW_RULE_F_CROSSREF,
3842 ("not crossref rule"));
3844 rule->cross_rules = kmalloc(sz, M_IPFW, M_WAITOK);
3845 memcpy(rule->cross_rules, fwmsg->cross_rules, sz);
3847 fwmsg->sibling = rule->sibling;
3848 netisr_forwardmsg(&fwmsg->base, mycpuid + 1);
3852 * Add a new rule to the list. Copy the rule into a malloc'ed area,
3853 * then possibly create a rule number and add the rule to the list.
3854 * Update the rule_number in the input struct so the caller knows
3858 ipfw_add_rule(struct ipfw_ioc_rule *ioc_rule, uint32_t rule_flags)
3860 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
3861 struct netmsg_ipfw fwmsg;
3862 struct ip_fw *f, *prev, *rule;
3867 * If rulenum is 0, find highest numbered rule before the
3868 * default rule, and add rule number incremental step.
3870 if (ioc_rule->rulenum == 0) {
3871 int step = autoinc_step;
3873 KKASSERT(step >= IPFW_AUTOINC_STEP_MIN &&
3874 step <= IPFW_AUTOINC_STEP_MAX);
3877 * Locate the highest numbered rule before default
3879 for (f = ctx->ipfw_layer3_chain; f; f = f->next) {
3880 if (f->rulenum == IPFW_DEFAULT_RULE)
3882 ioc_rule->rulenum = f->rulenum;
3884 if (ioc_rule->rulenum < IPFW_DEFAULT_RULE - step)
3885 ioc_rule->rulenum += step;
3887 KASSERT(ioc_rule->rulenum != IPFW_DEFAULT_RULE &&
3888 ioc_rule->rulenum != 0,
3889 ("invalid rule num %d", ioc_rule->rulenum));
3892 * Now find the right place for the new rule in the sorted list.
3894 for (prev = NULL, f = ctx->ipfw_layer3_chain; f;
3895 prev = f, f = f->next) {
3896 if (f->rulenum > ioc_rule->rulenum) {
3897 /* Found the location */
3901 KASSERT(f != NULL, ("no default rule?!"));
3904 * Duplicate the rule onto each CPU.
3905 * The rule duplicated on CPU0 will be returned.
3907 bzero(&fwmsg, sizeof(fwmsg));
3908 netmsg_init(&fwmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY,
3909 ipfw_add_rule_dispatch);
3910 fwmsg.ioc_rule = ioc_rule;
3911 fwmsg.prev_rule = prev;
3912 fwmsg.next_rule = prev == NULL ? NULL : f;
3913 fwmsg.rule_flags = rule_flags;
3914 if (rule_flags & IPFW_RULE_F_CROSSREF) {
3915 fwmsg.cross_rules = kmalloc(
3916 sizeof(struct ip_fw *) * netisr_ncpus, M_TEMP,
3920 netisr_domsg_global(&fwmsg.base);
3921 KKASSERT(fwmsg.prev_rule == NULL && fwmsg.next_rule == NULL);
3923 rule = fwmsg.base.lmsg.u.ms_resultp;
3924 KKASSERT(rule != NULL && rule->cpuid == mycpuid);
3926 if (fwmsg.cross_rules != NULL) {
3927 netmsg_init(&fwmsg.base, NULL, &curthread->td_msgport,
3928 MSGF_PRIORITY, ipfw_crossref_rule_dispatch);
3929 fwmsg.sibling = rule;
3930 netisr_domsg_global(&fwmsg.base);
3931 KKASSERT(fwmsg.sibling == NULL);
3933 kfree(fwmsg.cross_rules, M_TEMP);
3936 atomic_add_int(&ipfw_gd.ipfw_refcnt, 1);
3940 DPRINTF("++ installed rule %d, static count now %d\n",
3941 rule->rulenum, static_count);
3945 * Free storage associated with a static rule (including derived
3947 * The caller is in charge of clearing rule pointers to avoid
3948 * dangling pointers.
3949 * @return a pointer to the next entry.
3950 * Arguments are not checked, so they better be correct.
3952 static struct ip_fw *
3953 ipfw_delete_rule(struct ipfw_context *ctx,
3954 struct ip_fw *prev, struct ip_fw *rule)
3960 ctx->ipfw_layer3_chain = n;
3964 /* Mark the rule as invalid */
3965 rule->rule_flags |= IPFW_RULE_F_INVALID;
3966 rule->next_rule = NULL;
3967 rule->sibling = NULL;
3969 /* Don't reset cpuid here; keep various assertion working */
3973 /* Statistics only need to be updated once */
3975 ipfw_dec_static_count(rule);
3977 if ((rule->rule_flags & IPFW_RULE_F_CROSSREF) == 0) {
3978 /* Try to free this rule */
3979 ipfw_free_rule(rule);
3981 /* TODO: check staging area. */
3983 rule->next = ipfw_gd.ipfw_crossref_free;
3984 ipfw_gd.ipfw_crossref_free = rule;
3988 /* Return the next rule */
3993 ipfw_flush_dispatch(netmsg_t nmsg)
3995 int kill_default = nmsg->lmsg.u.ms_result;
3996 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
3999 ASSERT_NETISR_NCPUS(mycpuid);
4004 ipfw_state_flush(ctx, NULL);
4005 KASSERT(ctx->ipfw_state_cnt == 0,
4006 ("%d pcpu states remain", ctx->ipfw_state_cnt));
4007 ctx->ipfw_state_loosecnt = 0;
4008 ctx->ipfw_state_lastexp = 0;
4013 ipfw_track_flush(ctx, NULL);
4014 ctx->ipfw_track_lastexp = 0;
4015 if (ctx->ipfw_trkcnt_spare != NULL) {
4016 kfree(ctx->ipfw_trkcnt_spare, M_IPFW);
4017 ctx->ipfw_trkcnt_spare = NULL;
4020 ipfw_flush_rule_ptrs(ctx); /* more efficient to do outside the loop */
4022 while ((rule = ctx->ipfw_layer3_chain) != NULL &&
4023 (kill_default || rule->rulenum != IPFW_DEFAULT_RULE))
4024 ipfw_delete_rule(ctx, NULL, rule);
4026 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
4030 * Deletes all rules from a chain (including the default rule
4031 * if the second argument is set).
4034 ipfw_flush(int kill_default)
4036 struct netmsg_base nmsg;
4038 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
4045 * If 'kill_default' then caller has done the necessary
4046 * msgport syncing; unnecessary to do it again.
4048 if (!kill_default) {
4050 * Let ipfw_chk() know the rules are going to
4051 * be flushed, so it could jump directly to
4055 /* XXX use priority sync */
4056 netmsg_service_sync();
4060 * Press the 'flush' button
4062 bzero(&nmsg, sizeof(nmsg));
4063 netmsg_init(&nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY,
4064 ipfw_flush_dispatch);
4065 nmsg.lmsg.u.ms_result = kill_default;
4066 netisr_domsg_global(&nmsg);
4067 ipfw_gd.ipfw_state_loosecnt = 0;
4068 ipfw_gd.ipfw_state_globexp = 0;
4069 ipfw_gd.ipfw_track_globexp = 0;
4072 state_cnt = ipfw_state_cntcoll();
4073 KASSERT(state_cnt == 0, ("%d states remain", state_cnt));
4075 KASSERT(ipfw_gd.ipfw_trkcnt_cnt == 0,
4076 ("%d trkcnts remain", ipfw_gd.ipfw_trkcnt_cnt));
4079 KASSERT(static_count == 0,
4080 ("%u static rules remain", static_count));
4081 KASSERT(static_ioc_len == 0,
4082 ("%u bytes of static rules remain", static_ioc_len));
4084 KASSERT(static_count == 1,
4085 ("%u static rules remain", static_count));
4086 KASSERT(static_ioc_len == IOC_RULESIZE(ctx->ipfw_default_rule),
4087 ("%u bytes of static rules remain, should be %lu",
4089 (u_long)IOC_RULESIZE(ctx->ipfw_default_rule)));
4098 ipfw_alt_delete_rule_dispatch(netmsg_t nmsg)
4100 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg;
4101 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
4102 struct ip_fw *rule, *prev;
4104 ASSERT_NETISR_NCPUS(mycpuid);
4106 rule = dmsg->start_rule;
4107 KKASSERT(rule->cpuid == mycpuid);
4108 dmsg->start_rule = rule->sibling;
4110 prev = dmsg->prev_rule;
4112 KKASSERT(prev->cpuid == mycpuid);
4115 * Move to the position on the next CPU
4116 * before the msg is forwarded.
4118 dmsg->prev_rule = prev->sibling;
4122 * flush pointers outside the loop, then delete all matching
4123 * rules. 'prev' remains the same throughout the cycle.
4125 ipfw_flush_rule_ptrs(ctx);
4126 while (rule && rule->rulenum == dmsg->rulenum) {
4127 if (rule->rule_flags & IPFW_RULE_F_GENSTATE) {
4128 /* Flush states generated by this rule. */
4129 ipfw_state_flush(ctx, rule);
4131 if (rule->rule_flags & IPFW_RULE_F_GENTRACK) {
4132 /* Flush tracks generated by this rule. */
4133 ipfw_track_flush(ctx, rule);
4135 rule = ipfw_delete_rule(ctx, prev, rule);
4138 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
4142 ipfw_alt_delete_rule(uint16_t rulenum)
4144 struct ip_fw *prev, *rule;
4145 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
4146 struct netmsg_del dmsg;
4151 * Locate first rule to delete
4153 for (prev = NULL, rule = ctx->ipfw_layer3_chain;
4154 rule && rule->rulenum < rulenum;
4155 prev = rule, rule = rule->next)
4157 if (rule->rulenum != rulenum)
4161 * Get rid of the rule duplications on all CPUs
4163 bzero(&dmsg, sizeof(dmsg));
4164 netmsg_init(&dmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY,
4165 ipfw_alt_delete_rule_dispatch);
4166 dmsg.prev_rule = prev;
4167 dmsg.start_rule = rule;
4168 dmsg.rulenum = rulenum;
4170 netisr_domsg_global(&dmsg.base);
4171 KKASSERT(dmsg.prev_rule == NULL && dmsg.start_rule == NULL);
4176 ipfw_alt_delete_ruleset_dispatch(netmsg_t nmsg)
4178 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg;
4179 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
4180 struct ip_fw *prev, *rule;
4185 ASSERT_NETISR_NCPUS(mycpuid);
4187 ipfw_flush_rule_ptrs(ctx);
4190 rule = ctx->ipfw_layer3_chain;
4191 while (rule != NULL) {
4192 if (rule->set == dmsg->from_set) {
4193 if (rule->rule_flags & IPFW_RULE_F_GENSTATE) {
4194 /* Flush states generated by this rule. */
4195 ipfw_state_flush(ctx, rule);
4197 if (rule->rule_flags & IPFW_RULE_F_GENTRACK) {
4198 /* Flush tracks generated by this rule. */
4199 ipfw_track_flush(ctx, rule);
4201 rule = ipfw_delete_rule(ctx, prev, rule);
4210 KASSERT(del, ("no match set?!"));
4212 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
4216 ipfw_alt_delete_ruleset(uint8_t set)
4218 struct netmsg_del dmsg;
4221 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
4226 * Check whether the 'set' exists. If it exists,
4227 * then check whether any rules within the set will
4228 * try to create states.
4231 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) {
4232 if (rule->set == set)
4236 return 0; /* XXX EINVAL? */
4241 bzero(&dmsg, sizeof(dmsg));
4242 netmsg_init(&dmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY,
4243 ipfw_alt_delete_ruleset_dispatch);
4244 dmsg.from_set = set;
4245 netisr_domsg_global(&dmsg.base);
4251 ipfw_alt_move_rule_dispatch(netmsg_t nmsg)
4253 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg;
4256 ASSERT_NETISR_NCPUS(mycpuid);
4258 rule = dmsg->start_rule;
4259 KKASSERT(rule->cpuid == mycpuid);
4262 * Move to the position on the next CPU
4263 * before the msg is forwarded.
4265 dmsg->start_rule = rule->sibling;
4267 while (rule && rule->rulenum <= dmsg->rulenum) {
4268 if (rule->rulenum == dmsg->rulenum)
4269 rule->set = dmsg->to_set;
4272 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
4276 ipfw_alt_move_rule(uint16_t rulenum, uint8_t set)
4278 struct netmsg_del dmsg;
4279 struct netmsg_base *nmsg;
4281 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
4286 * Locate first rule to move
4288 for (rule = ctx->ipfw_layer3_chain; rule && rule->rulenum <= rulenum;
4289 rule = rule->next) {
4290 if (rule->rulenum == rulenum && rule->set != set)
4293 if (rule == NULL || rule->rulenum > rulenum)
4294 return 0; /* XXX error? */
4296 bzero(&dmsg, sizeof(dmsg));
4298 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY,
4299 ipfw_alt_move_rule_dispatch);
4300 dmsg.start_rule = rule;
4301 dmsg.rulenum = rulenum;
4304 netisr_domsg_global(nmsg);
4305 KKASSERT(dmsg.start_rule == NULL);
4310 ipfw_alt_move_ruleset_dispatch(netmsg_t nmsg)
4312 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg;
4313 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
4316 ASSERT_NETISR_NCPUS(mycpuid);
4318 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) {
4319 if (rule->set == dmsg->from_set)
4320 rule->set = dmsg->to_set;
4322 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
4326 ipfw_alt_move_ruleset(uint8_t from_set, uint8_t to_set)
4328 struct netmsg_del dmsg;
4329 struct netmsg_base *nmsg;
4333 bzero(&dmsg, sizeof(dmsg));
4335 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY,
4336 ipfw_alt_move_ruleset_dispatch);
4337 dmsg.from_set = from_set;
4338 dmsg.to_set = to_set;
4340 netisr_domsg_global(nmsg);
4345 ipfw_alt_swap_ruleset_dispatch(netmsg_t nmsg)
4347 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg;
4348 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
4351 ASSERT_NETISR_NCPUS(mycpuid);
4353 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) {
4354 if (rule->set == dmsg->from_set)
4355 rule->set = dmsg->to_set;
4356 else if (rule->set == dmsg->to_set)
4357 rule->set = dmsg->from_set;
4359 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
4363 ipfw_alt_swap_ruleset(uint8_t set1, uint8_t set2)
4365 struct netmsg_del dmsg;
4366 struct netmsg_base *nmsg;
4370 bzero(&dmsg, sizeof(dmsg));
4372 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY,
4373 ipfw_alt_swap_ruleset_dispatch);
4374 dmsg.from_set = set1;
4377 netisr_domsg_global(nmsg);
4382 * Remove all rules with given number, and also do set manipulation.
4384 * The argument is an uint32_t. The low 16 bit are the rule or set number,
4385 * the next 8 bits are the new set, the top 8 bits are the command:
4387 * 0 delete rules with given number
4388 * 1 delete rules with given set number
4389 * 2 move rules with given number to new set
4390 * 3 move rules with given set number to new set
4391 * 4 swap sets with given numbers
4394 ipfw_ctl_alter(uint32_t arg)
4397 uint8_t cmd, new_set;
4402 rulenum = arg & 0xffff;
4403 cmd = (arg >> 24) & 0xff;
4404 new_set = (arg >> 16) & 0xff;
4408 if (new_set >= IPFW_DEFAULT_SET)
4410 if (cmd == 0 || cmd == 2) {
4411 if (rulenum == IPFW_DEFAULT_RULE)
4414 if (rulenum >= IPFW_DEFAULT_SET)
4419 case 0: /* delete rules with given number */
4420 error = ipfw_alt_delete_rule(rulenum);
4423 case 1: /* delete all rules with given set number */
4424 error = ipfw_alt_delete_ruleset(rulenum);
4427 case 2: /* move rules with given number to new set */
4428 error = ipfw_alt_move_rule(rulenum, new_set);
4431 case 3: /* move rules with given set number to new set */
4432 error = ipfw_alt_move_ruleset(rulenum, new_set);
4435 case 4: /* swap two sets */
4436 error = ipfw_alt_swap_ruleset(rulenum, new_set);
4443 * Clear counters for a specific rule.
4446 clear_counters(struct ip_fw *rule, int log_only)
4448 ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule);
4450 if (log_only == 0) {
4451 rule->bcnt = rule->pcnt = 0;
4452 rule->timestamp = 0;
4454 if (l->o.opcode == O_LOG)
4455 l->log_left = l->max_log;
4459 ipfw_zero_entry_dispatch(netmsg_t nmsg)
4461 struct netmsg_zent *zmsg = (struct netmsg_zent *)nmsg;
4462 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
4465 ASSERT_NETISR_NCPUS(mycpuid);
4467 if (zmsg->rulenum == 0) {
4468 KKASSERT(zmsg->start_rule == NULL);
4470 ctx->ipfw_norule_counter = 0;
4471 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next)
4472 clear_counters(rule, zmsg->log_only);
4474 struct ip_fw *start = zmsg->start_rule;
4476 KKASSERT(start->cpuid == mycpuid);
4477 KKASSERT(start->rulenum == zmsg->rulenum);
4480 * We can have multiple rules with the same number, so we
4481 * need to clear them all.
4483 for (rule = start; rule && rule->rulenum == zmsg->rulenum;
4485 clear_counters(rule, zmsg->log_only);
4488 * Move to the position on the next CPU
4489 * before the msg is forwarded.
4491 zmsg->start_rule = start->sibling;
4493 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
4497 * Reset some or all counters on firewall rules.
4498 * @arg frwl is null to clear all entries, or contains a specific
4500 * @arg log_only is 1 if we only want to reset logs, zero otherwise.
4503 ipfw_ctl_zero_entry(int rulenum, int log_only)
4505 struct netmsg_zent zmsg;
4506 struct netmsg_base *nmsg;
4508 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
4512 bzero(&zmsg, sizeof(zmsg));
4514 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY,
4515 ipfw_zero_entry_dispatch);
4516 zmsg.log_only = log_only;
4519 msg = log_only ? "ipfw: All logging counts reset.\n"
4520 : "ipfw: Accounting cleared.\n";
4525 * Locate the first rule with 'rulenum'
4527 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) {
4528 if (rule->rulenum == rulenum)
4531 if (rule == NULL) /* we did not find any matching rules */
4533 zmsg.start_rule = rule;
4534 zmsg.rulenum = rulenum;
4536 msg = log_only ? "ipfw: Entry %d logging count reset.\n"
4537 : "ipfw: Entry %d cleared.\n";
4539 netisr_domsg_global(nmsg);
4540 KKASSERT(zmsg.start_rule == NULL);
4543 log(LOG_SECURITY | LOG_NOTICE, msg, rulenum);
4548 * Check validity of the structure before insert.
4549 * Fortunately rules are simple, so this mostly need to check rule sizes.
4552 ipfw_check_ioc_rule(struct ipfw_ioc_rule *rule, int size, uint32_t *rule_flags)
4555 int have_action = 0;
4560 /* Check for valid size */
4561 if (size < sizeof(*rule)) {
4562 kprintf("ipfw: rule too short\n");
4565 l = IOC_RULESIZE(rule);
4567 kprintf("ipfw: size mismatch (have %d want %d)\n", size, l);
4571 /* Check rule number */
4572 if (rule->rulenum == IPFW_DEFAULT_RULE) {
4573 kprintf("ipfw: invalid rule number\n");
4578 * Now go for the individual checks. Very simple ones, basically only
4579 * instruction sizes.
4581 for (l = rule->cmd_len, cmd = rule->cmd; l > 0;
4582 l -= cmdlen, cmd += cmdlen) {
4583 cmdlen = F_LEN(cmd);
4585 kprintf("ipfw: opcode %d size truncated\n",
4590 DPRINTF("ipfw: opcode %d\n", cmd->opcode);
4592 if (cmd->opcode == O_KEEP_STATE || cmd->opcode == O_LIMIT) {
4593 /* This rule will generate states. */
4594 *rule_flags |= IPFW_RULE_F_GENSTATE;
4595 if (cmd->opcode == O_LIMIT)
4596 *rule_flags |= IPFW_RULE_F_GENTRACK;
4598 if (cmd->opcode == O_DEFRAG)
4599 *rule_flags |= IPFW_RULE_F_CROSSREF;
4601 switch (cmd->opcode) {
4615 case O_IPPRECEDENCE:
4622 if (cmdlen != F_INSN_SIZE(ipfw_insn))
4626 case O_IP_SRC_TABLE:
4627 case O_IP_DST_TABLE:
4628 if (cmdlen != F_INSN_SIZE(ipfw_insn))
4630 if (cmd->arg1 >= ipfw_table_max) {
4631 kprintf("ipfw: invalid table id %u, max %d\n",
4632 cmd->arg1, ipfw_table_max);
4645 if (cmdlen != F_INSN_SIZE(ipfw_insn_u32))
4650 if (cmdlen != F_INSN_SIZE(ipfw_insn_limit))
4655 if (cmdlen != F_INSN_SIZE(ipfw_insn_log))
4658 ((ipfw_insn_log *)cmd)->log_left =
4659 ((ipfw_insn_log *)cmd)->max_log;
4665 if (cmdlen != F_INSN_SIZE(ipfw_insn_ip))
4667 if (((ipfw_insn_ip *)cmd)->mask.s_addr == 0) {
4668 kprintf("ipfw: opcode %d, useless rule\n",
4676 if (cmd->arg1 == 0 || cmd->arg1 > 256) {
4677 kprintf("ipfw: invalid set size %d\n",
4681 if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) +
4687 if (cmdlen != F_INSN_SIZE(ipfw_insn_mac))
4693 case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */
4694 if (cmdlen < 2 || cmdlen > 31)
4701 if (cmdlen != F_INSN_SIZE(ipfw_insn_if))
4707 if (cmdlen != F_INSN_SIZE(ipfw_insn_pipe))
4712 if (cmdlen != F_INSN_SIZE(ipfw_insn_sa)) {
4717 fwd_addr = ((ipfw_insn_sa *)cmd)->
4719 if (IN_MULTICAST(ntohl(fwd_addr))) {
4720 kprintf("ipfw: try forwarding to "
4721 "multicast address\n");
4727 case O_FORWARD_MAC: /* XXX not implemented yet */
4737 if (cmdlen != F_INSN_SIZE(ipfw_insn))
4741 kprintf("ipfw: opcode %d, multiple actions"
4748 kprintf("ipfw: opcode %d, action must be"
4755 kprintf("ipfw: opcode %d, unknown opcode\n",
4760 if (have_action == 0) {
4761 kprintf("ipfw: missing action\n");
4767 kprintf("ipfw: opcode %d size %d wrong\n",
4768 cmd->opcode, cmdlen);
4773 ipfw_ctl_add_rule(struct sockopt *sopt)
4775 struct ipfw_ioc_rule *ioc_rule;
4777 uint32_t rule_flags;
4782 size = sopt->sopt_valsize;
4783 if (size > (sizeof(uint32_t) * IPFW_RULE_SIZE_MAX) ||
4784 size < sizeof(*ioc_rule)) {
4787 if (size != (sizeof(uint32_t) * IPFW_RULE_SIZE_MAX)) {
4788 sopt->sopt_val = krealloc(sopt->sopt_val, sizeof(uint32_t) *
4789 IPFW_RULE_SIZE_MAX, M_TEMP, M_WAITOK);
4791 ioc_rule = sopt->sopt_val;
4793 error = ipfw_check_ioc_rule(ioc_rule, size, &rule_flags);
4797 ipfw_add_rule(ioc_rule, rule_flags);
4799 if (sopt->sopt_dir == SOPT_GET)
4800 sopt->sopt_valsize = IOC_RULESIZE(ioc_rule);
4805 ipfw_copy_rule(const struct ipfw_context *ctx, const struct ip_fw *rule,
4806 struct ipfw_ioc_rule *ioc_rule)
4808 const struct ip_fw *sibling;
4814 KASSERT(rule->cpuid == 0, ("rule does not belong to cpu0"));
4816 ioc_rule->act_ofs = rule->act_ofs;
4817 ioc_rule->cmd_len = rule->cmd_len;
4818 ioc_rule->rulenum = rule->rulenum;
4819 ioc_rule->set = rule->set;
4820 ioc_rule->usr_flags = rule->usr_flags;
4822 ioc_rule->set_disable = ctx->ipfw_set_disable;
4823 ioc_rule->static_count = static_count;
4824 ioc_rule->static_len = static_ioc_len;
4827 * Visit (read-only) all of the rule's duplications to get
4828 * the necessary statistics
4835 ioc_rule->timestamp = 0;
4836 for (sibling = rule; sibling != NULL; sibling = sibling->sibling) {
4837 ioc_rule->pcnt += sibling->pcnt;
4838 ioc_rule->bcnt += sibling->bcnt;
4839 if (sibling->timestamp > ioc_rule->timestamp)
4840 ioc_rule->timestamp = sibling->timestamp;
4845 KASSERT(i == netisr_ncpus,
4846 ("static rule is not duplicated on netisr_ncpus %d", netisr_ncpus));
4848 bcopy(rule->cmd, ioc_rule->cmd, ioc_rule->cmd_len * 4 /* XXX */);
4850 return ((uint8_t *)ioc_rule + IOC_RULESIZE(ioc_rule));
4854 ipfw_track_copy(const struct ipfw_trkcnt *trk, struct ipfw_ioc_state *ioc_state)
4856 struct ipfw_ioc_flowid *ioc_id;
4858 if (trk->tc_expire == 0) {
4859 /* Not a scanned one. */
4863 ioc_state->expire = TIME_LEQ(trk->tc_expire, time_uptime) ?
4864 0 : trk->tc_expire - time_uptime;
4865 ioc_state->pcnt = 0;
4866 ioc_state->bcnt = 0;
4868 ioc_state->dyn_type = O_LIMIT_PARENT;
4869 ioc_state->count = trk->tc_count;
4871 ioc_state->rulenum = trk->tc_rulenum;
4873 ioc_id = &ioc_state->id;
4874 ioc_id->type = ETHERTYPE_IP;
4875 ioc_id->u.ip.proto = trk->tc_proto;
4876 ioc_id->u.ip.src_ip = trk->tc_saddr;
4877 ioc_id->u.ip.dst_ip = trk->tc_daddr;
4878 ioc_id->u.ip.src_port = trk->tc_sport;
4879 ioc_id->u.ip.dst_port = trk->tc_dport;
4885 ipfw_state_copy(const struct ipfw_state *s, struct ipfw_ioc_state *ioc_state)
4887 struct ipfw_ioc_flowid *ioc_id;
4889 if (s->st_type == O_ANCHOR)
4892 ioc_state->expire = TIME_LEQ(s->st_expire, time_uptime) ?
4893 0 : s->st_expire - time_uptime;
4894 ioc_state->pcnt = s->st_pcnt;
4895 ioc_state->bcnt = s->st_bcnt;
4897 ioc_state->dyn_type = s->st_type;
4898 ioc_state->count = 0;
4900 ioc_state->rulenum = s->st_rule->rulenum;
4902 ioc_id = &ioc_state->id;
4903 ioc_id->type = ETHERTYPE_IP;
4904 ioc_id->u.ip.proto = s->st_proto;
4905 ipfw_key_4tuple(&s->st_key,
4906 &ioc_id->u.ip.src_ip, &ioc_id->u.ip.src_port,
4907 &ioc_id->u.ip.dst_ip, &ioc_id->u.ip.dst_port);
4913 ipfw_state_copy_dispatch(netmsg_t nmsg)
4915 struct netmsg_cpstate *nm = (struct netmsg_cpstate *)nmsg;
4916 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
4917 const struct ipfw_state *s;
4918 const struct ipfw_track *t;
4920 ASSERT_NETISR_NCPUS(mycpuid);
4921 KASSERT(nm->state_cnt < nm->state_cntmax,
4922 ("invalid state count %d, max %d",
4923 nm->state_cnt, nm->state_cntmax));
4925 TAILQ_FOREACH(s, &ctx->ipfw_state_list, st_link) {
4926 if (ipfw_state_copy(s, nm->ioc_state)) {
4929 if (nm->state_cnt == nm->state_cntmax)
4935 * Prepare tracks in the global track tree for userland.
4937 TAILQ_FOREACH(t, &ctx->ipfw_track_list, t_link) {
4938 struct ipfw_trkcnt *trk;
4940 if (t->t_count == NULL) /* anchor */
4945 * Only one netisr can run this function at
4946 * any time, and only this function accesses
4947 * trkcnt's tc_expire, so this is safe w/o
4948 * ipfw_gd.ipfw_trkcnt_token.
4950 if (trk->tc_expire > t->t_expire)
4952 trk->tc_expire = t->t_expire;
4956 * Copy tracks in the global track tree to userland in
4959 if (mycpuid == netisr_ncpus - 1) {
4960 struct ipfw_trkcnt *trk;
4962 KASSERT(nm->state_cnt < nm->state_cntmax,
4963 ("invalid state count %d, max %d",
4964 nm->state_cnt, nm->state_cntmax));
4967 RB_FOREACH(trk, ipfw_trkcnt_tree, &ipfw_gd.ipfw_trkcnt_tree) {
4968 if (ipfw_track_copy(trk, nm->ioc_state)) {
4971 if (nm->state_cnt == nm->state_cntmax) {
4980 if (nm->state_cnt == nm->state_cntmax) {
4981 /* No more space; done. */
4982 netisr_replymsg(&nm->base, 0);
4984 netisr_forwardmsg(&nm->base, mycpuid + 1);
4989 ipfw_ctl_get_rules(struct sockopt *sopt)
4991 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5000 * pass up a copy of the current rules. Static rules
5001 * come first (the last of which has number IPFW_DEFAULT_RULE),
5002 * followed by a possibly empty list of states.
5005 size = static_ioc_len; /* size of static rules */
5008 * Size of the states.
5009 * XXX take tracks as state for userland compat.
5011 state_cnt = ipfw_state_cntcoll() + ipfw_gd.ipfw_trkcnt_cnt;
5012 state_cnt = (state_cnt * 5) / 4; /* leave 25% headroom */
5013 size += state_cnt * sizeof(struct ipfw_ioc_state);
5015 if (sopt->sopt_valsize < size) {
5016 /* short length, no need to return incomplete rules */
5017 /* XXX: if superuser, no need to zero buffer */
5018 bzero(sopt->sopt_val, sopt->sopt_valsize);
5021 bp = sopt->sopt_val;
5023 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next)
5024 bp = ipfw_copy_rule(ctx, rule, bp);
5027 struct netmsg_cpstate nm;
5029 size_t old_size = size;
5032 netmsg_init(&nm.base, NULL, &curthread->td_msgport,
5033 MSGF_PRIORITY, ipfw_state_copy_dispatch);
5035 nm.state_cntmax = state_cnt;
5037 netisr_domsg_global(&nm.base);
5040 * The # of states may be shrinked after the snapshot
5041 * of the state count was taken. To give user a correct
5042 * state count, nm->state_cnt is used to recalculate
5045 size = static_ioc_len +
5046 (nm.state_cnt * sizeof(struct ipfw_ioc_state));
5047 KKASSERT(size <= old_size);
5050 sopt->sopt_valsize = size;
5055 ipfw_set_disable_dispatch(netmsg_t nmsg)
5057 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5059 ASSERT_NETISR_NCPUS(mycpuid);
5061 ctx->ipfw_set_disable = nmsg->lmsg.u.ms_result32;
5062 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
5066 ipfw_ctl_set_disable(uint32_t disable, uint32_t enable)
5068 struct netmsg_base nmsg;
5069 uint32_t set_disable;
5073 /* IPFW_DEFAULT_SET is always enabled */
5074 enable |= (1 << IPFW_DEFAULT_SET);
5075 set_disable = (ipfw_ctx[mycpuid]->ipfw_set_disable | disable) & ~enable;
5077 bzero(&nmsg, sizeof(nmsg));
5078 netmsg_init(&nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY,
5079 ipfw_set_disable_dispatch);
5080 nmsg.lmsg.u.ms_result32 = set_disable;
5082 netisr_domsg_global(&nmsg);
5086 ipfw_table_create_dispatch(netmsg_t nm)
5088 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5089 int tblid = nm->lmsg.u.ms_result;
5091 ASSERT_NETISR_NCPUS(mycpuid);
5093 if (!rn_inithead((void **)&ctx->ipfw_tables[tblid],
5094 rn_cpumaskhead(mycpuid), 32))
5095 panic("ipfw: create table%d failed", tblid);
5097 netisr_forwardmsg(&nm->base, mycpuid + 1);
5101 ipfw_table_create(struct sockopt *sopt)
5103 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5104 struct ipfw_ioc_table *tbl;
5105 struct netmsg_base nm;
5109 if (sopt->sopt_valsize != sizeof(*tbl))
5112 tbl = sopt->sopt_val;
5113 if (tbl->tableid < 0 || tbl->tableid >= ipfw_table_max)
5116 if (ctx->ipfw_tables[tbl->tableid] != NULL)
5119 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY,
5120 ipfw_table_create_dispatch);
5121 nm.lmsg.u.ms_result = tbl->tableid;
5122 netisr_domsg_global(&nm);
5128 ipfw_table_killrn(struct radix_node_head *rnh, struct radix_node *rn)
5130 struct radix_node *ret;
5132 ret = rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh);
5134 panic("deleted other table entry");
5139 ipfw_table_killent(struct radix_node *rn, void *xrnh)
5142 ipfw_table_killrn(xrnh, rn);
5147 ipfw_table_flush_oncpu(struct ipfw_context *ctx, int tableid,
5150 struct radix_node_head *rnh;
5152 ASSERT_NETISR_NCPUS(mycpuid);
5154 rnh = ctx->ipfw_tables[tableid];
5155 rnh->rnh_walktree(rnh, ipfw_table_killent, rnh);
5158 ctx->ipfw_tables[tableid] = NULL;
5163 ipfw_table_flush_dispatch(netmsg_t nmsg)
5165 struct netmsg_tblflush *nm = (struct netmsg_tblflush *)nmsg;
5166 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5168 ASSERT_NETISR_NCPUS(mycpuid);
5170 ipfw_table_flush_oncpu(ctx, nm->tableid, nm->destroy);
5171 netisr_forwardmsg(&nm->base, mycpuid + 1);
5175 ipfw_table_flushall_oncpu(struct ipfw_context *ctx, int destroy)
5179 ASSERT_NETISR_NCPUS(mycpuid);
5181 for (i = 0; i < ipfw_table_max; ++i) {
5182 if (ctx->ipfw_tables[i] != NULL)
5183 ipfw_table_flush_oncpu(ctx, i, destroy);
5188 ipfw_table_flushall_dispatch(netmsg_t nmsg)
5190 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5192 ASSERT_NETISR_NCPUS(mycpuid);
5194 ipfw_table_flushall_oncpu(ctx, 0);
5195 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
5199 ipfw_table_flush(struct sockopt *sopt)
5201 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5202 struct ipfw_ioc_table *tbl;
5203 struct netmsg_tblflush nm;
5207 if (sopt->sopt_valsize != sizeof(*tbl))
5210 tbl = sopt->sopt_val;
5211 if (sopt->sopt_name == IP_FW_TBL_FLUSH && tbl->tableid < 0) {
5212 netmsg_init(&nm.base, NULL, &curthread->td_msgport,
5213 MSGF_PRIORITY, ipfw_table_flushall_dispatch);
5214 netisr_domsg_global(&nm.base);
5218 if (tbl->tableid < 0 || tbl->tableid >= ipfw_table_max)
5221 if (ctx->ipfw_tables[tbl->tableid] == NULL)
5224 netmsg_init(&nm.base, NULL, &curthread->td_msgport, MSGF_PRIORITY,
5225 ipfw_table_flush_dispatch);
5226 nm.tableid = tbl->tableid;
5228 if (sopt->sopt_name == IP_FW_TBL_DESTROY)
5230 netisr_domsg_global(&nm.base);
5236 ipfw_table_cntent(struct radix_node *rn __unused, void *xcnt)
5245 ipfw_table_cpent(struct radix_node *rn, void *xcp)
5247 struct ipfw_table_cp *cp = xcp;
5248 struct ipfw_tblent *te = (struct ipfw_tblent *)rn;
5249 struct ipfw_ioc_tblent *ioc_te;
5254 KASSERT(cp->te_idx < cp->te_cnt, ("invalid table cp idx %d, cnt %d",
5255 cp->te_idx, cp->te_cnt));
5256 ioc_te = &cp->te[cp->te_idx];
5258 if (te->te_nodes->rn_mask != NULL) {
5259 memcpy(&ioc_te->netmask, te->te_nodes->rn_mask,
5260 *te->te_nodes->rn_mask);
5262 ioc_te->netmask.sin_len = 0;
5264 memcpy(&ioc_te->key, &te->te_key, sizeof(ioc_te->key));
5266 ioc_te->use = te->te_use;
5267 ioc_te->last_used = te->te_lastuse;
5272 while ((te = te->te_sibling) != NULL) {
5276 ioc_te->use += te->te_use;
5277 if (te->te_lastuse > ioc_te->last_used)
5278 ioc_te->last_used = te->te_lastuse;
5280 KASSERT(cnt == netisr_ncpus,
5281 ("invalid # of tblent %d, should be %d", cnt, netisr_ncpus));
5289 ipfw_table_get(struct sockopt *sopt)
5291 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5292 struct radix_node_head *rnh;
5293 struct ipfw_ioc_table *tbl;
5294 struct ipfw_ioc_tblcont *cont;
5295 struct ipfw_table_cp cp;
5300 if (sopt->sopt_valsize < sizeof(*tbl))
5303 tbl = sopt->sopt_val;
5304 if (tbl->tableid < 0) {
5305 struct ipfw_ioc_tbllist *list;
5309 * List available table ids.
5311 for (i = 0; i < ipfw_table_max; ++i) {
5312 if (ctx->ipfw_tables[i] != NULL)
5316 sz = __offsetof(struct ipfw_ioc_tbllist, tables[cnt]);
5317 if (sopt->sopt_valsize < sz) {
5318 bzero(sopt->sopt_val, sopt->sopt_valsize);
5321 list = sopt->sopt_val;
5322 list->tablecnt = cnt;
5325 for (i = 0; i < ipfw_table_max; ++i) {
5326 if (ctx->ipfw_tables[i] != NULL) {
5327 KASSERT(cnt < list->tablecnt,
5328 ("invalid idx %d, cnt %d",
5329 cnt, list->tablecnt));
5330 list->tables[cnt++] = i;
5333 sopt->sopt_valsize = sz;
5335 } else if (tbl->tableid >= ipfw_table_max) {
5339 rnh = ctx->ipfw_tables[tbl->tableid];
5342 rnh->rnh_walktree(rnh, ipfw_table_cntent, &cnt);
5344 sz = __offsetof(struct ipfw_ioc_tblcont, ent[cnt]);
5345 if (sopt->sopt_valsize < sz) {
5346 bzero(sopt->sopt_val, sopt->sopt_valsize);
5349 cont = sopt->sopt_val;
5355 rnh->rnh_walktree(rnh, ipfw_table_cpent, &cp);
5357 sopt->sopt_valsize = sz;
5362 ipfw_table_add_dispatch(netmsg_t nmsg)
5364 struct netmsg_tblent *nm = (struct netmsg_tblent *)nmsg;
5365 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5366 struct radix_node_head *rnh;
5367 struct ipfw_tblent *te;
5369 ASSERT_NETISR_NCPUS(mycpuid);
5371 rnh = ctx->ipfw_tables[nm->tableid];
5373 te = kmalloc(sizeof(*te), M_IPFW, M_WAITOK | M_ZERO);
5374 te->te_nodes->rn_key = (char *)&te->te_key;
5375 memcpy(&te->te_key, nm->key, sizeof(te->te_key));
5377 if (rnh->rnh_addaddr((char *)&te->te_key, (char *)nm->netmask, rnh,
5378 te->te_nodes) == NULL) {
5381 netisr_replymsg(&nm->base, EEXIST);
5384 panic("rnh_addaddr failed");
5387 /* Link siblings. */
5388 if (nm->sibling != NULL)
5389 nm->sibling->te_sibling = te;
5392 netisr_forwardmsg(&nm->base, mycpuid + 1);
5396 ipfw_table_del_dispatch(netmsg_t nmsg)
5398 struct netmsg_tblent *nm = (struct netmsg_tblent *)nmsg;
5399 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5400 struct radix_node_head *rnh;
5401 struct radix_node *rn;
5403 ASSERT_NETISR_NCPUS(mycpuid);
5405 rnh = ctx->ipfw_tables[nm->tableid];
5406 rn = rnh->rnh_deladdr((char *)nm->key, (char *)nm->netmask, rnh);
5409 netisr_replymsg(&nm->base, ESRCH);
5412 panic("rnh_deladdr failed");
5416 netisr_forwardmsg(&nm->base, mycpuid + 1);
5420 ipfw_table_alt(struct sockopt *sopt)
5422 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5423 struct ipfw_ioc_tblcont *tbl;
5424 struct ipfw_ioc_tblent *te;
5425 struct sockaddr_in key0;
5426 struct sockaddr *netmask = NULL, *key;
5427 struct netmsg_tblent nm;
5431 if (sopt->sopt_valsize != sizeof(*tbl))
5433 tbl = sopt->sopt_val;
5435 if (tbl->tableid < 0 || tbl->tableid >= ipfw_table_max)
5437 if (tbl->entcnt != 1)
5440 if (ctx->ipfw_tables[tbl->tableid] == NULL)
5444 if (te->key.sin_family != AF_INET ||
5445 te->key.sin_port != 0 ||
5446 te->key.sin_len != sizeof(struct sockaddr_in))
5448 key = (struct sockaddr *)&te->key;
5450 if (te->netmask.sin_len != 0) {
5451 if (te->netmask.sin_port != 0 ||
5452 te->netmask.sin_len > sizeof(struct sockaddr_in))
5454 netmask = (struct sockaddr *)&te->netmask;
5455 sa_maskedcopy(key, (struct sockaddr *)&key0, netmask);
5456 key = (struct sockaddr *)&key0;
5459 if (sopt->sopt_name == IP_FW_TBL_ADD) {
5460 netmsg_init(&nm.base, NULL, &curthread->td_msgport,
5461 MSGF_PRIORITY, ipfw_table_add_dispatch);
5463 netmsg_init(&nm.base, NULL, &curthread->td_msgport,
5464 MSGF_PRIORITY, ipfw_table_del_dispatch);
5467 nm.netmask = netmask;
5468 nm.tableid = tbl->tableid;
5470 return (netisr_domsg_global(&nm.base));
5474 ipfw_table_zeroent(struct radix_node *rn, void *arg __unused)
5476 struct ipfw_tblent *te = (struct ipfw_tblent *)rn;
5484 ipfw_table_zero_dispatch(netmsg_t nmsg)
5486 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5487 struct radix_node_head *rnh;
5489 ASSERT_NETISR_NCPUS(mycpuid);
5491 rnh = ctx->ipfw_tables[nmsg->lmsg.u.ms_result];
5492 rnh->rnh_walktree(rnh, ipfw_table_zeroent, NULL);
5494 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
5498 ipfw_table_zeroall_dispatch(netmsg_t nmsg)
5500 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5503 ASSERT_NETISR_NCPUS(mycpuid);
5505 for (i = 0; i < ipfw_table_max; ++i) {
5506 struct radix_node_head *rnh = ctx->ipfw_tables[i];
5509 rnh->rnh_walktree(rnh, ipfw_table_zeroent, NULL);
5511 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
5515 ipfw_table_zero(struct sockopt *sopt)
5517 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5518 struct netmsg_base nm;
5519 struct ipfw_ioc_table *tbl;
5523 if (sopt->sopt_valsize != sizeof(*tbl))
5525 tbl = sopt->sopt_val;
5527 if (tbl->tableid < 0) {
5528 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY,
5529 ipfw_table_zeroall_dispatch);
5530 netisr_domsg_global(&nm);
5532 } else if (tbl->tableid >= ipfw_table_max) {
5534 } else if (ctx->ipfw_tables[tbl->tableid] == NULL) {
5538 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY,
5539 ipfw_table_zero_dispatch);
5540 nm.lmsg.u.ms_result = tbl->tableid;
5541 netisr_domsg_global(&nm);
5547 ipfw_table_killexp(struct radix_node *rn, void *xnm)
5549 struct netmsg_tblexp *nm = xnm;
5550 struct ipfw_tblent *te = (struct ipfw_tblent *)rn;
5552 if (te->te_expired) {
5553 ipfw_table_killrn(nm->rnh, rn);
5560 ipfw_table_expire_dispatch(netmsg_t nmsg)
5562 struct netmsg_tblexp *nm = (struct netmsg_tblexp *)nmsg;
5563 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5564 struct radix_node_head *rnh;
5566 ASSERT_NETISR_NCPUS(mycpuid);
5568 rnh = ctx->ipfw_tables[nm->tableid];
5570 rnh->rnh_walktree(rnh, ipfw_table_killexp, nm);
5572 KASSERT(nm->expcnt == nm->cnt * (mycpuid + 1),
5573 ("not all expired addresses (%d) were deleted (%d)",
5574 nm->cnt * (mycpuid + 1), nm->expcnt));
5576 netisr_forwardmsg(&nm->base, mycpuid + 1);
5580 ipfw_table_expireall_dispatch(netmsg_t nmsg)
5582 struct netmsg_tblexp *nm = (struct netmsg_tblexp *)nmsg;
5583 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5586 ASSERT_NETISR_NCPUS(mycpuid);
5588 for (i = 0; i < ipfw_table_max; ++i) {
5589 struct radix_node_head *rnh = ctx->ipfw_tables[i];
5594 rnh->rnh_walktree(rnh, ipfw_table_killexp, nm);
5597 KASSERT(nm->expcnt == nm->cnt * (mycpuid + 1),
5598 ("not all expired addresses (%d) were deleted (%d)",
5599 nm->cnt * (mycpuid + 1), nm->expcnt));
5601 netisr_forwardmsg(&nm->base, mycpuid + 1);
5605 ipfw_table_markexp(struct radix_node *rn, void *xnm)
5607 struct netmsg_tblexp *nm = xnm;
5608 struct ipfw_tblent *te;
5611 te = (struct ipfw_tblent *)rn;
5612 lastuse = te->te_lastuse;
5614 while ((te = te->te_sibling) != NULL) {
5615 if (te->te_lastuse > lastuse)
5616 lastuse = te->te_lastuse;
5618 if (!TIME_LEQ(lastuse + nm->expire, time_second)) {
5623 te = (struct ipfw_tblent *)rn;
5625 while ((te = te->te_sibling) != NULL)
5633 ipfw_table_expire(struct sockopt *sopt)
5635 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5636 struct netmsg_tblexp nm;
5637 struct ipfw_ioc_tblexp *tbl;
5638 struct radix_node_head *rnh;
5642 if (sopt->sopt_valsize != sizeof(*tbl))
5644 tbl = sopt->sopt_val;
5649 nm.expire = tbl->expire;
5651 if (tbl->tableid < 0) {
5654 for (i = 0; i < ipfw_table_max; ++i) {
5655 rnh = ctx->ipfw_tables[i];
5658 rnh->rnh_walktree(rnh, ipfw_table_markexp, &nm);
5661 /* No addresses can be expired. */
5664 tbl->expcnt = nm.cnt;
5666 netmsg_init(&nm.base, NULL, &curthread->td_msgport,
5667 MSGF_PRIORITY, ipfw_table_expireall_dispatch);
5669 netisr_domsg_global(&nm.base);
5670 KASSERT(nm.expcnt == nm.cnt * netisr_ncpus,
5671 ("not all expired addresses (%d) were deleted (%d)",
5672 nm.cnt * netisr_ncpus, nm.expcnt));
5675 } else if (tbl->tableid >= ipfw_table_max) {
5679 rnh = ctx->ipfw_tables[tbl->tableid];
5682 rnh->rnh_walktree(rnh, ipfw_table_markexp, &nm);
5684 /* No addresses can be expired. */
5687 tbl->expcnt = nm.cnt;
5689 netmsg_init(&nm.base, NULL, &curthread->td_msgport, MSGF_PRIORITY,
5690 ipfw_table_expire_dispatch);
5691 nm.tableid = tbl->tableid;
5692 netisr_domsg_global(&nm.base);
5693 KASSERT(nm.expcnt == nm.cnt * netisr_ncpus,
5694 ("not all expired addresses (%d) were deleted (%d)",
5695 nm.cnt * netisr_ncpus, nm.expcnt));
5700 ipfw_crossref_free_dispatch(netmsg_t nmsg)
5702 struct ip_fw *rule = nmsg->lmsg.u.ms_resultp;
5704 KKASSERT((rule->rule_flags &
5705 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID)) ==
5706 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID));
5707 ipfw_free_rule(rule);
5709 netisr_replymsg(&nmsg->base, 0);
5713 ipfw_crossref_reap(void)
5715 struct ip_fw *rule, *prev = NULL;
5719 rule = ipfw_gd.ipfw_crossref_free;
5720 while (rule != NULL) {
5721 uint64_t inflight = 0;
5724 for (i = 0; i < netisr_ncpus; ++i)
5725 inflight += rule->cross_rules[i]->cross_refs;
5726 if (inflight == 0) {
5727 struct ip_fw *f = rule;
5736 ipfw_gd.ipfw_crossref_free = rule;
5741 for (i = 1; i < netisr_ncpus; ++i) {
5742 struct netmsg_base nm;
5744 netmsg_init(&nm, NULL, &curthread->td_msgport,
5745 MSGF_PRIORITY, ipfw_crossref_free_dispatch);
5746 nm.lmsg.u.ms_resultp = f->cross_rules[i];
5747 netisr_domsg(&nm, i);
5749 KKASSERT((f->rule_flags &
5750 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID)) ==
5751 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID));
5759 if (ipfw_gd.ipfw_crossref_free != NULL) {
5760 callout_reset(&ipfw_gd.ipfw_crossref_ch, hz,
5761 ipfw_crossref_timeo, NULL);
5766 * {set|get}sockopt parser.
5769 ipfw_ctl(struct sockopt *sopt)
5779 switch (sopt->sopt_name) {
5781 error = ipfw_ctl_get_rules(sopt);
5785 ipfw_flush(0 /* keep default rule */);
5789 error = ipfw_ctl_add_rule(sopt);
5794 * IP_FW_DEL is used for deleting single rules or sets,
5795 * and (ab)used to atomically manipulate sets.
5796 * Argument size is used to distinguish between the two:
5798 * delete single rule or set of rules,
5799 * or reassign rules (or sets) to a different set.
5800 * 2 * sizeof(uint32_t)
5801 * atomic disable/enable sets.
5802 * first uint32_t contains sets to be disabled,
5803 * second uint32_t contains sets to be enabled.
5805 masks = sopt->sopt_val;
5806 size = sopt->sopt_valsize;
5807 if (size == sizeof(*masks)) {
5809 * Delete or reassign static rule
5811 error = ipfw_ctl_alter(masks[0]);
5812 } else if (size == (2 * sizeof(*masks))) {
5814 * Set enable/disable
5816 ipfw_ctl_set_disable(masks[0], masks[1]);
5823 case IP_FW_RESETLOG: /* argument is an int, the rule number */
5826 if (sopt->sopt_val != 0) {
5827 error = soopt_to_kbuf(sopt, &rulenum,
5828 sizeof(int), sizeof(int));
5832 error = ipfw_ctl_zero_entry(rulenum,
5833 sopt->sopt_name == IP_FW_RESETLOG);
5836 case IP_FW_TBL_CREATE:
5837 error = ipfw_table_create(sopt);
5842 error = ipfw_table_alt(sopt);
5845 case IP_FW_TBL_FLUSH:
5846 case IP_FW_TBL_DESTROY:
5847 error = ipfw_table_flush(sopt);
5851 error = ipfw_table_get(sopt);
5854 case IP_FW_TBL_ZERO:
5855 error = ipfw_table_zero(sopt);
5858 case IP_FW_TBL_EXPIRE:
5859 error = ipfw_table_expire(sopt);
5863 kprintf("ipfw_ctl invalid option %d\n", sopt->sopt_name);
5867 ipfw_crossref_reap();
5872 ipfw_keepalive_done(struct ipfw_context *ctx)
5875 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE,
5876 ("keepalive is not in progress"));
5877 ctx->ipfw_flags &= ~IPFW_FLAG_KEEPALIVE;
5878 callout_reset(&ctx->ipfw_keepalive_ch, dyn_keepalive_period * hz,
5879 ipfw_keepalive, NULL);
5883 ipfw_keepalive_more(struct ipfw_context *ctx)
5885 struct netmsg_base *nm = &ctx->ipfw_keepalive_more;
5887 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE,
5888 ("keepalive is not in progress"));
5889 KASSERT(nm->lmsg.ms_flags & MSGF_DONE,
5890 ("keepalive more did not finish"));
5891 netisr_sendmsg_oncpu(nm);
5895 ipfw_keepalive_loop(struct ipfw_context *ctx, struct ipfw_state *anchor)
5897 struct ipfw_state *s;
5898 int scanned = 0, expired = 0, kept = 0;
5900 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE,
5901 ("keepalive is not in progress"));
5903 while ((s = TAILQ_NEXT(anchor, st_link)) != NULL) {
5904 uint32_t ack_rev, ack_fwd;
5905 struct ipfw_flow_id id;
5907 if (scanned++ >= ipfw_state_scan_max) {
5908 ipfw_keepalive_more(ctx);
5912 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link);
5913 TAILQ_INSERT_AFTER(&ctx->ipfw_state_list, s, anchor, st_link);
5915 if (s->st_type == O_ANCHOR)
5918 if (TIME_LEQ(s->st_expire, time_uptime)) {
5919 /* State expired. */
5920 ipfw_state_del(ctx, s);
5921 if (++expired >= ipfw_state_expire_max) {
5922 ipfw_keepalive_more(ctx);
5929 * Keep alive processing
5932 if (s->st_proto != IPPROTO_TCP)
5934 if ((s->st_state & IPFW_STATE_TCPSTATES) != BOTH_SYN)
5936 if (TIME_LEQ(time_uptime + dyn_keepalive_interval,
5938 continue; /* too early */
5940 ipfw_key_4tuple(&s->st_key, &id.src_ip, &id.src_port,
5941 &id.dst_ip, &id.dst_port);
5942 ack_rev = s->st_ack_rev;
5943 ack_fwd = s->st_ack_fwd;
5945 send_pkt(&id, ack_rev - 1, ack_fwd, TH_SYN);
5946 send_pkt(&id, ack_fwd - 1, ack_rev, 0);
5948 if (++kept >= ipfw_keepalive_max) {
5949 ipfw_keepalive_more(ctx);
5953 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link);
5954 ipfw_keepalive_done(ctx);
5958 ipfw_keepalive_more_dispatch(netmsg_t nm)
5960 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5961 struct ipfw_state *anchor;
5963 ASSERT_NETISR_NCPUS(mycpuid);
5964 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE,
5965 ("keepalive is not in progress"));
5968 netisr_replymsg(&nm->base, 0);
5970 anchor = &ctx->ipfw_keepalive_anch;
5971 if (!dyn_keepalive || ctx->ipfw_state_cnt == 0) {
5972 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link);
5973 ipfw_keepalive_done(ctx);
5976 ipfw_keepalive_loop(ctx, anchor);
5980 * This procedure is only used to handle keepalives. It is invoked
5981 * every dyn_keepalive_period
5984 ipfw_keepalive_dispatch(netmsg_t nm)
5986 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5987 struct ipfw_state *anchor;
5989 ASSERT_NETISR_NCPUS(mycpuid);
5990 KASSERT((ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE) == 0,
5991 ("keepalive is in progress"));
5992 ctx->ipfw_flags |= IPFW_FLAG_KEEPALIVE;
5996 netisr_replymsg(&nm->base, 0);
5999 if (!dyn_keepalive || ctx->ipfw_state_cnt == 0) {
6000 ipfw_keepalive_done(ctx);
6004 anchor = &ctx->ipfw_keepalive_anch;
6005 TAILQ_INSERT_HEAD(&ctx->ipfw_state_list, anchor, st_link);
6006 ipfw_keepalive_loop(ctx, anchor);
6010 * This procedure is only used to handle keepalives. It is invoked
6011 * every dyn_keepalive_period
6014 ipfw_keepalive(void *dummy __unused)
6016 struct netmsg_base *msg;
6018 KKASSERT(mycpuid < netisr_ncpus);
6019 msg = &ipfw_ctx[mycpuid]->ipfw_keepalive_nm;
6022 if (msg->lmsg.ms_flags & MSGF_DONE)
6023 netisr_sendmsg_oncpu(msg);
6028 ipfw_ip_input_dispatch(netmsg_t nmsg)
6030 struct netmsg_genpkt *nm = (struct netmsg_genpkt *)nmsg;
6031 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6032 struct mbuf *m = nm->m;
6033 struct ip_fw *rule = nm->arg1;
6035 ASSERT_NETISR_NCPUS(mycpuid);
6036 KASSERT(rule->cpuid == mycpuid,
6037 ("rule does not belong to cpu%d", mycpuid));
6038 KASSERT(m->m_pkthdr.fw_flags & IPFW_MBUF_CONTINUE,
6039 ("mbuf does not have ipfw continue rule"));
6041 KASSERT(ctx->ipfw_cont_rule == NULL,
6042 ("pending ipfw continue rule"));
6043 ctx->ipfw_cont_rule = rule;
6047 * This rule is no longer used; decrement its cross_refs,
6048 * so this rule can be deleted.
6052 /* May not be cleared, if ipfw was unload/disabled. */
6053 ctx->ipfw_cont_rule = NULL;
6057 ipfw_check_in(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir)
6059 struct ip_fw_args args;
6060 struct mbuf *m = *m0;
6062 int tee = 0, error = 0, ret, cpuid;
6063 struct netmsg_genpkt *nm;
6066 if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED) {
6067 /* Extract info from dummynet tag */
6068 mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL);
6069 KKASSERT(mtag != NULL);
6070 args.rule = ((struct dn_pkt *)m_tag_data(mtag))->dn_priv;
6071 KKASSERT(args.rule != NULL);
6073 m_tag_delete(m, mtag);
6074 m->m_pkthdr.fw_flags &= ~DUMMYNET_MBUF_TAGGED;
6075 } else if (m->m_pkthdr.fw_flags & IPFW_MBUF_CONTINUE) {
6076 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6078 KKASSERT(ctx->ipfw_cont_rule != NULL);
6079 args.rule = ctx->ipfw_cont_rule;
6080 ctx->ipfw_cont_rule = NULL;
6083 m->m_pkthdr.fw_flags &= ~IPFW_MBUF_CONTINUE;
6091 ret = ipfw_chk(&args);
6109 case IP_FW_DUMMYNET:
6110 /* Send packet to the appropriate pipe */
6111 m = ipfw_dummynet_io(m, args.cookie, DN_TO_IP_IN, &args);
6120 * Must clear bridge tag when changing
6122 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
6123 if (ip_divert_p != NULL) {
6124 m = ip_divert_p(m, tee, 1);
6128 /* not sure this is the right error msg */
6133 case IP_FW_CONTINUE:
6134 KASSERT(m->m_flags & M_HASH, ("no hash"));
6135 cpuid = netisr_hashcpu(m->m_pkthdr.hash);
6136 KASSERT(cpuid != mycpuid,
6137 ("continue on the same cpu%d", cpuid));
6141 * Bump cross_refs to prevent this rule and its siblings
6142 * from being deleted, while this mbuf is inflight. The
6143 * cross_refs of the sibling rule on the target cpu will
6144 * be decremented, once this mbuf is going to be filtered
6145 * on the target cpu.
6147 args.rule->cross_refs++;
6148 m->m_pkthdr.fw_flags |= IPFW_MBUF_CONTINUE;
6150 nm = &m->m_hdr.mh_genmsg;
6151 netmsg_init(&nm->base, NULL, &netisr_apanic_rport, 0,
6152 ipfw_ip_input_dispatch);
6154 nm->arg1 = args.rule->cross_rules[cpuid];
6155 netisr_sendmsg(&nm->base, cpuid);
6157 /* This mbuf is dispatched; no longer valid. */
6162 panic("unknown ipfw return value: %d", ret);
6170 ipfw_check_out(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir)
6172 struct ip_fw_args args;
6173 struct mbuf *m = *m0;
6175 int tee = 0, error = 0, ret;
6178 if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED) {
6179 /* Extract info from dummynet tag */
6180 mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL);
6181 KKASSERT(mtag != NULL);
6182 args.rule = ((struct dn_pkt *)m_tag_data(mtag))->dn_priv;
6183 KKASSERT(args.rule != NULL);
6185 m_tag_delete(m, mtag);
6186 m->m_pkthdr.fw_flags &= ~DUMMYNET_MBUF_TAGGED;
6194 ret = ipfw_chk(&args);
6212 case IP_FW_DUMMYNET:
6213 m = ipfw_dummynet_io(m, args.cookie, DN_TO_IP_OUT, &args);
6221 if (ip_divert_p != NULL) {
6222 m = ip_divert_p(m, tee, 0);
6226 /* not sure this is the right error msg */
6232 panic("unknown ipfw return value: %d", ret);
6242 struct pfil_head *pfh;
6246 pfh = pfil_head_get(PFIL_TYPE_AF, AF_INET);
6250 pfil_add_hook(ipfw_check_in, NULL, PFIL_IN, pfh);
6251 pfil_add_hook(ipfw_check_out, NULL, PFIL_OUT, pfh);
6257 struct pfil_head *pfh;
6261 pfh = pfil_head_get(PFIL_TYPE_AF, AF_INET);
6265 pfil_remove_hook(ipfw_check_in, NULL, PFIL_IN, pfh);
6266 pfil_remove_hook(ipfw_check_out, NULL, PFIL_OUT, pfh);
6270 ipfw_sysctl_dyncnt(SYSCTL_HANDLER_ARGS)
6274 dyn_cnt = ipfw_state_cntcoll();
6275 dyn_cnt += ipfw_gd.ipfw_trkcnt_cnt;
6277 return (sysctl_handle_int(oidp, &dyn_cnt, 0, req));
6281 ipfw_sysctl_statecnt(SYSCTL_HANDLER_ARGS)
6285 state_cnt = ipfw_state_cntcoll();
6286 return (sysctl_handle_int(oidp, &state_cnt, 0, req));
6290 ipfw_sysctl_statemax(SYSCTL_HANDLER_ARGS)
6292 int state_max, error;
6294 state_max = ipfw_state_max;
6295 error = sysctl_handle_int(oidp, &state_max, 0, req);
6296 if (error || req->newptr == NULL)
6302 ipfw_state_max_set(state_max);
6307 ipfw_sysctl_dynmax(SYSCTL_HANDLER_ARGS)
6311 dyn_max = ipfw_state_max + ipfw_track_max;
6313 error = sysctl_handle_int(oidp, &dyn_max, 0, req);
6314 if (error || req->newptr == NULL)
6320 ipfw_state_max_set(dyn_max / 2);
6321 ipfw_track_max = dyn_max / 2;
6326 ipfw_sysctl_enable_dispatch(netmsg_t nmsg)
6328 int enable = nmsg->lmsg.u.ms_result;
6332 if (fw_enable == enable)
6341 netisr_replymsg(&nmsg->base, 0);
6345 ipfw_sysctl_enable(SYSCTL_HANDLER_ARGS)
6347 struct netmsg_base nmsg;
6351 error = sysctl_handle_int(oidp, &enable, 0, req);
6352 if (error || req->newptr == NULL)
6355 netmsg_init(&nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY,
6356 ipfw_sysctl_enable_dispatch);
6357 nmsg.lmsg.u.ms_result = enable;
6359 return netisr_domsg(&nmsg, 0);
6363 ipfw_sysctl_autoinc_step(SYSCTL_HANDLER_ARGS)
6365 return sysctl_int_range(oidp, arg1, arg2, req,
6366 IPFW_AUTOINC_STEP_MIN, IPFW_AUTOINC_STEP_MAX);
6370 ipfw_sysctl_scancnt(SYSCTL_HANDLER_ARGS)
6373 return sysctl_int_range(oidp, arg1, arg2, req, 1, INT_MAX);
6377 ipfw_sysctl_stat(SYSCTL_HANDLER_ARGS)
6382 for (cpu = 0; cpu < netisr_ncpus; ++cpu)
6383 stat += *((u_long *)((uint8_t *)ipfw_ctx[cpu] + arg2));
6385 error = sysctl_handle_long(oidp, &stat, 0, req);
6386 if (error || req->newptr == NULL)
6389 /* Zero out this stat. */
6390 for (cpu = 0; cpu < netisr_ncpus; ++cpu)
6391 *((u_long *)((uint8_t *)ipfw_ctx[cpu] + arg2)) = 0;
6396 ipfw_ctx_init_dispatch(netmsg_t nmsg)
6398 struct netmsg_ipfw *fwmsg = (struct netmsg_ipfw *)nmsg;
6399 struct ipfw_context *ctx;
6400 struct ip_fw *def_rule;
6402 ASSERT_NETISR_NCPUS(mycpuid);
6404 ctx = kmalloc(__offsetof(struct ipfw_context,
6405 ipfw_tables[ipfw_table_max]), M_IPFW, M_WAITOK | M_ZERO);
6407 RB_INIT(&ctx->ipfw_state_tree);
6408 TAILQ_INIT(&ctx->ipfw_state_list);
6410 RB_INIT(&ctx->ipfw_track_tree);
6411 TAILQ_INIT(&ctx->ipfw_track_list);
6413 callout_init_mp(&ctx->ipfw_stateto_ch);
6414 netmsg_init(&ctx->ipfw_stateexp_nm, NULL, &netisr_adone_rport,
6415 MSGF_DROPABLE | MSGF_PRIORITY, ipfw_state_expire_dispatch);
6416 ctx->ipfw_stateexp_anch.st_type = O_ANCHOR;
6417 netmsg_init(&ctx->ipfw_stateexp_more, NULL, &netisr_adone_rport,
6418 MSGF_DROPABLE, ipfw_state_expire_more_dispatch);
6420 callout_init_mp(&ctx->ipfw_trackto_ch);
6421 netmsg_init(&ctx->ipfw_trackexp_nm, NULL, &netisr_adone_rport,
6422 MSGF_DROPABLE | MSGF_PRIORITY, ipfw_track_expire_dispatch);
6423 netmsg_init(&ctx->ipfw_trackexp_more, NULL, &netisr_adone_rport,
6424 MSGF_DROPABLE, ipfw_track_expire_more_dispatch);
6426 callout_init_mp(&ctx->ipfw_keepalive_ch);
6427 netmsg_init(&ctx->ipfw_keepalive_nm, NULL, &netisr_adone_rport,
6428 MSGF_DROPABLE | MSGF_PRIORITY, ipfw_keepalive_dispatch);
6429 ctx->ipfw_keepalive_anch.st_type = O_ANCHOR;
6430 netmsg_init(&ctx->ipfw_keepalive_more, NULL, &netisr_adone_rport,
6431 MSGF_DROPABLE, ipfw_keepalive_more_dispatch);
6433 ipfw_ctx[mycpuid] = ctx;
6435 def_rule = kmalloc(sizeof(*def_rule), M_IPFW, M_WAITOK | M_ZERO);
6437 def_rule->act_ofs = 0;
6438 def_rule->rulenum = IPFW_DEFAULT_RULE;
6439 def_rule->cmd_len = 1;
6440 def_rule->set = IPFW_DEFAULT_SET;
6442 def_rule->cmd[0].len = 1;
6443 #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT
6444 def_rule->cmd[0].opcode = O_ACCEPT;
6446 if (filters_default_to_accept)
6447 def_rule->cmd[0].opcode = O_ACCEPT;
6449 def_rule->cmd[0].opcode = O_DENY;
6452 def_rule->refcnt = 1;
6453 def_rule->cpuid = mycpuid;
6455 /* Install the default rule */
6456 ctx->ipfw_default_rule = def_rule;
6457 ctx->ipfw_layer3_chain = def_rule;
6459 /* Link rule CPU sibling */
6460 ipfw_link_sibling(fwmsg, def_rule);
6462 /* Statistics only need to be updated once */
6464 ipfw_inc_static_count(def_rule);
6466 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
6470 ipfw_crossref_reap_dispatch(netmsg_t nmsg)
6475 netisr_replymsg(&nmsg->base, 0);
6477 ipfw_crossref_reap();
6481 ipfw_crossref_timeo(void *dummy __unused)
6483 struct netmsg_base *msg = &ipfw_gd.ipfw_crossref_nm;
6485 KKASSERT(mycpuid == 0);
6488 if (msg->lmsg.ms_flags & MSGF_DONE)
6489 netisr_sendmsg_oncpu(msg);
6494 ipfw_init_dispatch(netmsg_t nmsg)
6496 struct netmsg_ipfw fwmsg;
6502 kprintf("IP firewall already loaded\n");
6507 if (ipfw_table_max > UINT16_MAX || ipfw_table_max <= 0)
6508 ipfw_table_max = UINT16_MAX;
6510 /* Initialize global track tree. */
6511 RB_INIT(&ipfw_gd.ipfw_trkcnt_tree);
6512 IPFW_TRKCNT_TOKINIT;
6514 /* GC for freed crossref rules. */
6515 callout_init_mp(&ipfw_gd.ipfw_crossref_ch);
6516 netmsg_init(&ipfw_gd.ipfw_crossref_nm, NULL, &netisr_adone_rport,
6517 MSGF_PRIORITY | MSGF_DROPABLE, ipfw_crossref_reap_dispatch);
6519 ipfw_state_max_set(ipfw_state_max);
6520 ipfw_state_headroom = 8 * netisr_ncpus;
6522 bzero(&fwmsg, sizeof(fwmsg));
6523 netmsg_init(&fwmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY,
6524 ipfw_ctx_init_dispatch);
6525 netisr_domsg_global(&fwmsg.base);
6527 ip_fw_chk_ptr = ipfw_chk;
6528 ip_fw_ctl_ptr = ipfw_ctl;
6529 ip_fw_dn_io_ptr = ipfw_dummynet_io;
6531 kprintf("ipfw2 initialized, default to %s, logging ",
6532 ipfw_ctx[mycpuid]->ipfw_default_rule->cmd[0].opcode ==
6533 O_ACCEPT ? "accept" : "deny");
6535 #ifdef IPFIREWALL_VERBOSE
6538 #ifdef IPFIREWALL_VERBOSE_LIMIT
6539 verbose_limit = IPFIREWALL_VERBOSE_LIMIT;
6541 if (fw_verbose == 0) {
6542 kprintf("disabled\n");
6543 } else if (verbose_limit == 0) {
6544 kprintf("unlimited\n");
6546 kprintf("limited to %d packets/entry by default\n",
6551 for (cpu = 0; cpu < netisr_ncpus; ++cpu) {
6552 callout_reset_bycpu(&ipfw_ctx[cpu]->ipfw_stateto_ch, hz,
6553 ipfw_state_expire_ipifunc, NULL, cpu);
6554 callout_reset_bycpu(&ipfw_ctx[cpu]->ipfw_trackto_ch, hz,
6555 ipfw_track_expire_ipifunc, NULL, cpu);
6556 callout_reset_bycpu(&ipfw_ctx[cpu]->ipfw_keepalive_ch, hz,
6557 ipfw_keepalive, NULL, cpu);
6563 netisr_replymsg(&nmsg->base, error);
6569 struct netmsg_base smsg;
6571 netmsg_init(&smsg, NULL, &curthread->td_msgport, MSGF_PRIORITY,
6572 ipfw_init_dispatch);
6573 return netisr_domsg(&smsg, 0);
6579 ipfw_ctx_fini_dispatch(netmsg_t nmsg)
6581 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6583 ASSERT_NETISR_NCPUS(mycpuid);
6585 callout_stop_sync(&ctx->ipfw_stateto_ch);
6586 callout_stop_sync(&ctx->ipfw_trackto_ch);
6587 callout_stop_sync(&ctx->ipfw_keepalive_ch);
6590 netisr_dropmsg(&ctx->ipfw_stateexp_more);
6591 netisr_dropmsg(&ctx->ipfw_stateexp_nm);
6592 netisr_dropmsg(&ctx->ipfw_trackexp_more);
6593 netisr_dropmsg(&ctx->ipfw_trackexp_nm);
6594 netisr_dropmsg(&ctx->ipfw_keepalive_more);
6595 netisr_dropmsg(&ctx->ipfw_keepalive_nm);
6598 ipfw_table_flushall_oncpu(ctx, 1);
6600 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
6604 ipfw_fini_dispatch(netmsg_t nmsg)
6606 struct netmsg_base nm;
6611 ipfw_crossref_reap();
6613 if (ipfw_gd.ipfw_refcnt != 0) {
6621 /* Synchronize any inflight state/track expire IPIs. */
6622 lwkt_synchronize_ipiqs("ipfwfini");
6624 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY,
6625 ipfw_ctx_fini_dispatch);
6626 netisr_domsg_global(&nm);
6628 callout_stop_sync(&ipfw_gd.ipfw_crossref_ch);
6630 netisr_dropmsg(&ipfw_gd.ipfw_crossref_nm);
6633 ip_fw_chk_ptr = NULL;
6634 ip_fw_ctl_ptr = NULL;
6635 ip_fw_dn_io_ptr = NULL;
6636 ipfw_flush(1 /* kill default rule */);
6638 /* Free pre-cpu context */
6639 for (cpu = 0; cpu < netisr_ncpus; ++cpu)
6640 kfree(ipfw_ctx[cpu], M_IPFW);
6642 kprintf("IP firewall unloaded\n");
6644 netisr_replymsg(&nmsg->base, error);
6650 struct netmsg_base smsg;
6652 netmsg_init(&smsg, NULL, &curthread->td_msgport, MSGF_PRIORITY,
6653 ipfw_fini_dispatch);
6654 return netisr_domsg(&smsg, 0);
6657 #endif /* KLD_MODULE */
6660 ipfw_modevent(module_t mod, int type, void *unused)
6671 kprintf("ipfw statically compiled, cannot unload\n");
6683 static moduledata_t ipfwmod = {
6688 DECLARE_MODULE(ipfw, ipfwmod, SI_SUB_PROTO_END, SI_ORDER_ANY);
6689 MODULE_VERSION(ipfw, 1);