2 * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * $FreeBSD: src/sys/netinet/ip_fw2.c,v 1.6.2.12 2003/04/08 10:42:32 maxim Exp $
29 * Implement IP packet firewall (new version)
35 #error IPFIREWALL requires INET.
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/malloc.h>
42 #include <sys/kernel.h>
44 #include <sys/socket.h>
45 #include <sys/socketvar.h>
46 #include <sys/sysctl.h>
47 #include <sys/syslog.h>
48 #include <sys/ucred.h>
49 #include <sys/in_cksum.h>
50 #include <sys/limits.h>
55 #include <net/route.h>
57 #include <net/dummynet/ip_dummynet.h>
59 #include <sys/thread2.h>
60 #include <net/netmsg2.h>
62 #include <netinet/in.h>
63 #include <netinet/in_systm.h>
64 #include <netinet/in_var.h>
65 #include <netinet/in_pcb.h>
66 #include <netinet/ip.h>
67 #include <netinet/ip_var.h>
68 #include <netinet/ip_icmp.h>
69 #include <netinet/tcp.h>
70 #include <netinet/tcp_seq.h>
71 #include <netinet/tcp_timer.h>
72 #include <netinet/tcp_var.h>
73 #include <netinet/tcpip.h>
74 #include <netinet/udp.h>
75 #include <netinet/udp_var.h>
76 #include <netinet/ip_divert.h>
77 #include <netinet/if_ether.h> /* XXX for ETHERTYPE_IP */
79 #include <net/ipfw/ip_fw2.h>
81 #ifdef IPFIREWALL_DEBUG
82 #define DPRINTF(fmt, ...) \
85 kprintf(fmt, __VA_ARGS__); \
88 #define DPRINTF(fmt, ...) ((void)0)
92 * Description about per-CPU rule duplication:
94 * Module loading/unloading and all ioctl operations are serialized
95 * by netisr0, so we don't have any ordering or locking problems.
97 * Following graph shows how operation on per-CPU rule list is
98 * performed [2 CPU case]:
102 * netisr0 <------------------------------------+
108 * forwardmsg---------->netisr1 |
113 * replymsg--------------+
117 * Rule structure [2 CPU case]
121 * layer3_chain layer3_chain
124 * +-------+ sibling +-------+ sibling
125 * | rule1 |--------->| rule1 |--------->NULL
126 * +-------+ +-------+
130 * +-------+ sibling +-------+ sibling
131 * | rule2 |--------->| rule2 |--------->NULL
132 * +-------+ +-------+
135 * 1) Ease statistics calculation during IP_FW_GET. We only need to
136 * iterate layer3_chain in netisr0; the current rule's duplication
137 * to the other CPUs could safely be read-only accessed through
139 * 2) Accelerate rule insertion and deletion, e.g. rule insertion:
140 * a) In netisr0 rule3 is determined to be inserted between rule1
141 * and rule2. To make this decision we need to iterate the
142 * layer3_chain in netisr0. The netmsg, which is used to insert
143 * the rule, will contain rule1 in netisr0 as prev_rule and rule2
144 * in netisr0 as next_rule.
145 * b) After the insertion in netisr0 is done, we will move on to
146 * netisr1. But instead of relocating the rule3's position in
147 * netisr1 by iterating the layer3_chain in netisr1, we set the
148 * netmsg's prev_rule to rule1->sibling and next_rule to
149 * rule2->sibling before the netmsg is forwarded to netisr1 from
154 * Description of states and tracks.
156 * Both states and tracks are stored in per-cpu RB trees instead of
157 * per-cpu hash tables to avoid the worst case hash degeneration.
159 * The lifetimes of states and tracks are regulated by dyn_*_lifetime,
160 * measured in seconds and depending on the flags.
162 * When a packet is received, its address fields are first masked with
163 * the mask defined for the rule, then matched against the entries in
164 * the per-cpu state RB tree. States are generated by 'keep-state'
165 * and 'limit' options.
167 * The max number of states is ipfw_state_max. When we reach the
168 * maximum number of states we do not create anymore. This is done to
169 * avoid consuming too much memory, but also too much time when
170 * searching on each packet.
172 * Each state holds a pointer to the parent ipfw rule of the current
173 * CPU so we know what action to perform. States are removed when the
174 * parent rule is deleted. XXX we should make them survive.
176 * There are some limitations with states -- we do not obey the
177 * 'randomized match', and we do not do multiple passes through the
178 * firewall. XXX check the latter!!!
180 * States grow independently on each CPU, e.g. 2 CPU case:
183 * ................... ...................
184 * : state RB tree : : state RB tree :
186 * : state1 state2 : : state3 :
188 * :.....|....|......: :........|........:
193 * +-------+ +-------+
194 * | rule1 | | rule1 |
195 * +-------+ +-------+
197 * Tracks are used to enforce limits on the number of sessions. Tracks
198 * are generated by 'limit' option.
200 * The max number of tracks is ipfw_track_max. When we reach the
201 * maximum number of tracks we do not create anymore. This is done to
202 * avoid consuming too much memory.
204 * Tracks are organized into two layers, track counter RB tree is
205 * shared between CPUs, track RB tree is per-cpu. States generated by
206 * 'limit' option are linked to the track in addition to the per-cpu
207 * state RB tree; mainly to ease expiration. e.g. 2 CPU case:
209 * ..............................
210 * : track counter RB tree :
215 * : +--->counter<----+ :
217 * : | +-----------+ | :
218 * :......|................|....:
221 * ................. |t_count | .................
222 * : track RB tree : | | : track RB tree :
224 * : +-->track1-------+ +--------track2 :
227 * :.|.....|.......: :...............:
228 * | +----------------+
229 * | .................... |
230 * | : state RB tree : |st_track
232 * +---state1 state2---+
234 * :.....|.......|....:
243 #define IPFW_AUTOINC_STEP_MIN 1
244 #define IPFW_AUTOINC_STEP_MAX 1000
245 #define IPFW_AUTOINC_STEP_DEF 100
247 #define IPFW_TABLE_MAX_DEF 64
249 #define IPFW_DEFAULT_RULE 65535 /* rulenum for the default rule */
250 #define IPFW_DEFAULT_SET 31 /* set number for the default rule */
252 #define MATCH_REVERSE 0
253 #define MATCH_FORWARD 1
255 #define MATCH_UNKNOWN 3
257 #define TIME_LEQ(a, b) ((a) - (b) <= 0)
259 #define IPFW_STATE_TCPFLAGS (TH_SYN | TH_FIN | TH_RST)
260 #define IPFW_STATE_TCPSTATES (IPFW_STATE_TCPFLAGS | \
261 (IPFW_STATE_TCPFLAGS << 8))
263 #define BOTH_SYN (TH_SYN | (TH_SYN << 8))
264 #define BOTH_FIN (TH_FIN | (TH_FIN << 8))
265 #define BOTH_RST (TH_RST | (TH_RST << 8))
266 /* TH_ACK here means FIN was ACKed. */
267 #define BOTH_FINACK (TH_ACK | (TH_ACK << 8))
269 #define IPFW_STATE_TCPCLOSED(s) ((s)->st_proto == IPPROTO_TCP && \
270 (((s)->st_state & BOTH_RST) || \
271 ((s)->st_state & BOTH_FINACK) == BOTH_FINACK))
273 #define O_ANCHOR O_NOP
275 #define IPFW_ISXLAT(type) ((type) == O_REDIRECT)
276 #define IPFW_XLAT_INVALID(s) (IPFW_ISXLAT((s)->st_type) && \
277 ((struct ipfw_xlat *)(s))->xlat_invalid)
279 #define IPFW_MBUF_XLATINS FW_MBUF_PRIVATE1
280 #define IPFW_MBUF_XLATFWD FW_MBUF_PRIVATE2
282 #define IPFW_XLATE_INSERT 0x0001
283 #define IPFW_XLATE_FORWARD 0x0002
284 #define IPFW_XLATE_OUTPUT 0x0004
287 struct netmsg_base base;
288 const struct ipfw_ioc_rule *ioc_rule;
289 struct ip_fw *next_rule;
290 struct ip_fw *prev_rule;
291 struct ip_fw *sibling;
293 struct ip_fw **cross_rules;
297 struct netmsg_base base;
298 struct ip_fw *start_rule;
299 struct ip_fw *prev_rule;
306 struct netmsg_base base;
307 struct ip_fw *start_rule;
312 struct netmsg_cpstate {
313 struct netmsg_base base;
314 struct ipfw_ioc_state *ioc_state;
319 struct netmsg_tblent {
320 struct netmsg_base base;
321 struct sockaddr *key;
322 struct sockaddr *netmask;
323 struct ipfw_tblent *sibling;
327 struct netmsg_tblflush {
328 struct netmsg_base base;
333 struct netmsg_tblexp {
334 struct netmsg_base base;
339 struct radix_node_head *rnh;
342 struct ipfw_table_cp {
343 struct ipfw_ioc_tblent *te;
350 * offset The offset of a fragment. offset != 0 means that
351 * we have a fragment at this offset of an IPv4 packet.
352 * offset == 0 means that (if this is an IPv4 packet)
353 * this is the first or only fragment.
358 * Local copies of addresses. They are only valid if we have
361 * proto The protocol. Set to 0 for non-ip packets,
362 * or to the protocol read from the packet otherwise.
363 * proto != 0 means that we have an IPv4 packet.
365 * src_port, dst_port port numbers, in HOST format. Only
366 * valid for TCP and UDP packets.
368 * src_ip, dst_ip ip addresses, in NETWORK format.
369 * Only valid for IPv4 packets.
372 uint16_t src_port; /* NOTE: host format */
373 uint16_t dst_port; /* NOTE: host format */
374 struct in_addr src_ip; /* NOTE: network format */
375 struct in_addr dst_ip; /* NOTE: network format */
381 uint32_t addr1; /* host byte order */
382 uint32_t addr2; /* host byte order */
386 uint16_t port1; /* host byte order */
387 uint16_t port2; /* host byte order */
392 struct ipfw_addrs addrs;
396 struct ipfw_ports ports;
400 uint8_t swap; /* IPFW_KEY_SWAP_ */
404 #define IPFW_KEY_SWAP_ADDRS 0x1
405 #define IPFW_KEY_SWAP_PORTS 0x2
406 #define IPFW_KEY_SWAP_ALL (IPFW_KEY_SWAP_ADDRS | IPFW_KEY_SWAP_PORTS)
409 RB_ENTRY(ipfw_trkcnt) tc_rblink;
410 struct ipfw_key tc_key;
414 time_t tc_expire; /* userland get-only */
415 uint16_t tc_rulenum; /* userland get-only */
418 #define tc_addrs tc_key.addr_u.value
419 #define tc_ports tc_key.port_u.value
420 #define tc_proto tc_key.proto
421 #define tc_saddr tc_key.addr_u.addrs.addr1
422 #define tc_daddr tc_key.addr_u.addrs.addr2
423 #define tc_sport tc_key.port_u.ports.port1
424 #define tc_dport tc_key.port_u.ports.port2
426 RB_HEAD(ipfw_trkcnt_tree, ipfw_trkcnt);
431 RB_ENTRY(ipfw_track) t_rblink;
432 struct ipfw_key t_key;
433 struct ip_fw *t_rule;
435 LIST_HEAD(, ipfw_state) t_state_list;
437 volatile int *t_count;
438 struct ipfw_trkcnt *t_trkcnt;
439 TAILQ_ENTRY(ipfw_track) t_link;
442 #define t_addrs t_key.addr_u.value
443 #define t_ports t_key.port_u.value
444 #define t_proto t_key.proto
445 #define t_saddr t_key.addr_u.addrs.addr1
446 #define t_daddr t_key.addr_u.addrs.addr2
447 #define t_sport t_key.port_u.ports.port1
448 #define t_dport t_key.port_u.ports.port2
450 RB_HEAD(ipfw_track_tree, ipfw_track);
451 TAILQ_HEAD(ipfw_track_list, ipfw_track);
454 RB_ENTRY(ipfw_state) st_rblink;
455 struct ipfw_key st_key;
457 time_t st_expire; /* expire time */
458 struct ip_fw *st_rule;
460 uint64_t st_pcnt; /* packets */
461 uint64_t st_bcnt; /* bytes */
465 * State of this rule, typically a combination of TCP flags.
467 * st_ack_fwd/st_ack_rev:
468 * Most recent ACKs in forward and reverse direction. They
469 * are used to generate keepalives.
472 uint32_t st_ack_fwd; /* host byte order */
473 uint32_t st_seq_fwd; /* host byte order */
474 uint32_t st_ack_rev; /* host byte order */
475 uint32_t st_seq_rev; /* host byte order */
477 uint16_t st_flags; /* IPFW_STATE_F_ */
478 uint16_t st_type; /* KEEP_STATE/LIMIT/RDR */
479 struct ipfw_track *st_track;
481 LIST_ENTRY(ipfw_state) st_trklink;
482 TAILQ_ENTRY(ipfw_state) st_link;
485 #define st_addrs st_key.addr_u.value
486 #define st_ports st_key.port_u.value
487 #define st_proto st_key.proto
488 #define st_swap st_key.swap
490 #define IPFW_STATE_F_ACKFWD 0x0001
491 #define IPFW_STATE_F_SEQFWD 0x0002
492 #define IPFW_STATE_F_ACKREV 0x0004
493 #define IPFW_STATE_F_SEQREV 0x0008
494 #define IPFW_STATE_F_XLATSRC 0x0010
495 #define IPFW_STATE_F_XLATSLAVE 0x0020
496 #define IPFW_STATE_F_LINKED 0x0040
498 #define IPFW_STATE_SCANSKIP(s) ((s)->st_type == O_ANCHOR || \
499 ((s)->st_flags & IPFW_STATE_F_XLATSLAVE))
501 /* Expired or being deleted. */
502 #define IPFW_STATE_ISDEAD(s) (TIME_LEQ((s)->st_expire, time_uptime) || \
503 IPFW_XLAT_INVALID((s)))
505 TAILQ_HEAD(ipfw_state_list, ipfw_state);
506 RB_HEAD(ipfw_state_tree, ipfw_state);
509 struct ipfw_state xlat_st; /* MUST be the first field */
510 uint32_t xlat_addr; /* network byte order */
511 uint16_t xlat_port; /* network byte order */
512 uint16_t xlat_dir; /* MATCH_ */
513 struct ifnet *xlat_ifp; /* matching ifnet */
514 struct ipfw_xlat *xlat_pair; /* paired state */
515 int xlat_pcpu; /* paired cpu */
516 volatile int xlat_invalid; /* invalid, but not dtor yet */
517 volatile uint64_t xlat_crefs; /* cross references */
518 struct netmsg_base xlat_freenm; /* for remote free */
521 #define xlat_type xlat_st.st_type
522 #define xlat_flags xlat_st.st_flags
523 #define xlat_rule xlat_st.st_rule
524 #define xlat_bcnt xlat_st.st_bcnt
525 #define xlat_pcnt xlat_st.st_pcnt
528 struct radix_node te_nodes[2];
529 struct sockaddr_in te_key;
532 struct ipfw_tblent *te_sibling;
533 volatile int te_expired;
536 struct ipfw_context {
537 struct ip_fw *ipfw_layer3_chain; /* rules for layer3 */
538 struct ip_fw *ipfw_default_rule; /* default rule */
539 uint64_t ipfw_norule_counter; /* ipfw_log(NULL) stat*/
542 * ipfw_set_disable contains one bit per set value (0..31).
543 * If the bit is set, all rules with the corresponding set
544 * are disabled. Set IPDW_DEFAULT_SET is reserved for the
545 * default rule and CANNOT be disabled.
547 uint32_t ipfw_set_disable;
549 uint8_t ipfw_flags; /* IPFW_FLAG_ */
551 struct ip_fw *ipfw_cont_rule;
552 struct ipfw_xlat *ipfw_cont_xlat;
554 struct ipfw_state_tree ipfw_state_tree;
555 struct ipfw_state_list ipfw_state_list;
556 int ipfw_state_loosecnt;
560 struct ipfw_state state;
561 struct ipfw_track track;
562 struct ipfw_trkcnt trkcnt;
565 struct ipfw_track_tree ipfw_track_tree;
566 struct ipfw_track_list ipfw_track_list;
567 struct ipfw_trkcnt *ipfw_trkcnt_spare;
569 struct callout ipfw_stateto_ch;
570 time_t ipfw_state_lastexp;
571 struct netmsg_base ipfw_stateexp_nm;
572 struct netmsg_base ipfw_stateexp_more;
573 struct ipfw_state ipfw_stateexp_anch;
575 struct callout ipfw_trackto_ch;
576 time_t ipfw_track_lastexp;
577 struct netmsg_base ipfw_trackexp_nm;
578 struct netmsg_base ipfw_trackexp_more;
579 struct ipfw_track ipfw_trackexp_anch;
581 struct callout ipfw_keepalive_ch;
582 struct netmsg_base ipfw_keepalive_nm;
583 struct netmsg_base ipfw_keepalive_more;
584 struct ipfw_state ipfw_keepalive_anch;
586 struct callout ipfw_xlatreap_ch;
587 struct netmsg_base ipfw_xlatreap_nm;
588 struct ipfw_state_list ipfw_xlatreap;
593 u_long ipfw_sts_reap;
594 u_long ipfw_sts_reapfailed;
595 u_long ipfw_sts_overflow;
596 u_long ipfw_sts_nomem;
597 u_long ipfw_sts_tcprecycled;
599 u_long ipfw_tks_nomem;
600 u_long ipfw_tks_reap;
601 u_long ipfw_tks_reapfailed;
602 u_long ipfw_tks_overflow;
603 u_long ipfw_tks_cntnomem;
606 u_long ipfw_defraged;
607 u_long ipfw_defrag_remote;
610 u_long ipfw_xlate_split;
611 u_long ipfw_xlate_conflicts;
612 u_long ipfw_xlate_cresolved;
615 struct radix_node_head *ipfw_tables[];
618 #define IPFW_FLAG_KEEPALIVE 0x01
619 #define IPFW_FLAG_STATEEXP 0x02
620 #define IPFW_FLAG_TRACKEXP 0x04
621 #define IPFW_FLAG_STATEREAP 0x08
622 #define IPFW_FLAG_TRACKREAP 0x10
624 #define ipfw_state_tmpkey ipfw_tmpkey.state
625 #define ipfw_track_tmpkey ipfw_tmpkey.track
626 #define ipfw_trkcnt_tmpkey ipfw_tmpkey.trkcnt
629 int ipfw_state_loosecnt; /* cache aligned */
630 time_t ipfw_state_globexp __cachealign;
632 struct lwkt_token ipfw_trkcnt_token __cachealign;
633 struct ipfw_trkcnt_tree ipfw_trkcnt_tree;
635 time_t ipfw_track_globexp;
637 /* Accessed in netisr0. */
638 struct ip_fw *ipfw_crossref_free __cachealign;
639 struct callout ipfw_crossref_ch;
640 struct netmsg_base ipfw_crossref_nm;
644 * Module can not be unloaded, if there are references to
645 * certains rules of ipfw(4), e.g. dummynet(4)
647 int ipfw_refcnt __cachealign;
651 static struct ipfw_context *ipfw_ctx[MAXCPU];
653 MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's");
656 * Following two global variables are accessed and updated only
659 static uint32_t static_count; /* # of static rules */
660 static uint32_t static_ioc_len; /* bytes of static rules */
663 * If 1, then ipfw static rules are being flushed,
664 * ipfw_chk() will skip to the default rule.
666 static int ipfw_flushing;
668 static int fw_verbose;
669 static int verbose_limit;
672 static int autoinc_step = IPFW_AUTOINC_STEP_DEF;
674 static int ipfw_table_max = IPFW_TABLE_MAX_DEF;
676 static int ipfw_sysctl_enable(SYSCTL_HANDLER_ARGS);
677 static int ipfw_sysctl_autoinc_step(SYSCTL_HANDLER_ARGS);
679 TUNABLE_INT("net.inet.ip.fw.table_max", &ipfw_table_max);
681 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
682 SYSCTL_NODE(_net_inet_ip_fw, OID_AUTO, stats, CTLFLAG_RW, 0,
683 "Firewall statistics");
685 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW,
686 &fw_enable, 0, ipfw_sysctl_enable, "I", "Enable ipfw");
687 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, autoinc_step, CTLTYPE_INT | CTLFLAG_RW,
688 &autoinc_step, 0, ipfw_sysctl_autoinc_step, "I",
689 "Rule number autincrement step");
690 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO,one_pass,CTLFLAG_RW,
692 "Only do a single pass through ipfw when using dummynet(4)");
693 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, debug, CTLFLAG_RW,
694 &fw_debug, 0, "Enable printing of debug ip_fw statements");
695 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose, CTLFLAG_RW,
696 &fw_verbose, 0, "Log matches to ipfw rules");
697 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_RW,
698 &verbose_limit, 0, "Set upper limit of matches of ipfw rules logged");
699 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, table_max, CTLFLAG_RD,
700 &ipfw_table_max, 0, "Max # of tables");
702 static int ipfw_sysctl_dyncnt(SYSCTL_HANDLER_ARGS);
703 static int ipfw_sysctl_dynmax(SYSCTL_HANDLER_ARGS);
704 static int ipfw_sysctl_statecnt(SYSCTL_HANDLER_ARGS);
705 static int ipfw_sysctl_statemax(SYSCTL_HANDLER_ARGS);
706 static int ipfw_sysctl_scancnt(SYSCTL_HANDLER_ARGS);
707 static int ipfw_sysctl_stat(SYSCTL_HANDLER_ARGS);
710 * Timeouts for various events in handing states.
714 * 2 == 1~2 second(s).
716 * We use 2 seconds for FIN lifetime, so that the states will not be
717 * ripped prematurely.
719 static uint32_t dyn_ack_lifetime = 300;
720 static uint32_t dyn_syn_lifetime = 20;
721 static uint32_t dyn_finwait_lifetime = 20;
722 static uint32_t dyn_fin_lifetime = 2;
723 static uint32_t dyn_rst_lifetime = 2;
724 static uint32_t dyn_udp_lifetime = 10;
725 static uint32_t dyn_short_lifetime = 5; /* used by tracks too */
728 * Keepalives are sent if dyn_keepalive is set. They are sent every
729 * dyn_keepalive_period seconds, in the last dyn_keepalive_interval
730 * seconds of lifetime of a rule.
732 static uint32_t dyn_keepalive_interval = 20;
733 static uint32_t dyn_keepalive_period = 5;
734 static uint32_t dyn_keepalive = 1; /* do send keepalives */
736 static struct ipfw_global ipfw_gd;
737 static int ipfw_state_loosecnt_updthr;
738 static int ipfw_state_max = 4096; /* max # of states */
739 static int ipfw_track_max = 4096; /* max # of tracks */
741 static int ipfw_state_headroom; /* setup at module load time */
742 static int ipfw_state_reap_min = 8;
743 static int ipfw_state_expire_max = 32;
744 static int ipfw_state_scan_max = 256;
745 static int ipfw_keepalive_max = 8;
746 static int ipfw_track_reap_max = 4;
747 static int ipfw_track_expire_max = 16;
748 static int ipfw_track_scan_max = 128;
750 static eventhandler_tag ipfw_ifaddr_event;
753 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_count,
754 CTLTYPE_INT | CTLFLAG_RD, NULL, 0, ipfw_sysctl_dyncnt, "I",
755 "Number of states and tracks");
756 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_max,
757 CTLTYPE_INT | CTLFLAG_RW, NULL, 0, ipfw_sysctl_dynmax, "I",
758 "Max number of states and tracks");
760 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_cnt,
761 CTLTYPE_INT | CTLFLAG_RD, NULL, 0, ipfw_sysctl_statecnt, "I",
763 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_max,
764 CTLTYPE_INT | CTLFLAG_RW, NULL, 0, ipfw_sysctl_statemax, "I",
765 "Max number of states");
766 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, state_headroom, CTLFLAG_RW,
767 &ipfw_state_headroom, 0, "headroom for state reap");
768 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, track_cnt, CTLFLAG_RD,
769 &ipfw_gd.ipfw_trkcnt_cnt, 0, "Number of tracks");
770 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, track_max, CTLFLAG_RW,
771 &ipfw_track_max, 0, "Max number of tracks");
772 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count, CTLFLAG_RD,
773 &static_count, 0, "Number of static rules");
774 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, CTLFLAG_RW,
775 &dyn_ack_lifetime, 0, "Lifetime of dyn. rules for acks");
776 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, CTLFLAG_RW,
777 &dyn_syn_lifetime, 0, "Lifetime of dyn. rules for syn");
778 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, CTLFLAG_RW,
779 &dyn_fin_lifetime, 0, "Lifetime of dyn. rules for fin");
780 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_finwait_lifetime, CTLFLAG_RW,
781 &dyn_finwait_lifetime, 0, "Lifetime of dyn. rules for fin wait");
782 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, CTLFLAG_RW,
783 &dyn_rst_lifetime, 0, "Lifetime of dyn. rules for rst");
784 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, CTLFLAG_RW,
785 &dyn_udp_lifetime, 0, "Lifetime of dyn. rules for UDP");
786 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, CTLFLAG_RW,
787 &dyn_short_lifetime, 0, "Lifetime of dyn. rules for other situations");
788 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, CTLFLAG_RW,
789 &dyn_keepalive, 0, "Enable keepalives for dyn. rules");
790 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_scan_max,
791 CTLTYPE_INT | CTLFLAG_RW, &ipfw_state_scan_max, 0, ipfw_sysctl_scancnt,
792 "I", "# of states to scan for each expire iteration");
793 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_expire_max,
794 CTLTYPE_INT | CTLFLAG_RW, &ipfw_state_expire_max, 0, ipfw_sysctl_scancnt,
795 "I", "# of states to expire for each expire iteration");
796 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, keepalive_max,
797 CTLTYPE_INT | CTLFLAG_RW, &ipfw_keepalive_max, 0, ipfw_sysctl_scancnt,
798 "I", "# of states to expire for each expire iteration");
799 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_reap_min,
800 CTLTYPE_INT | CTLFLAG_RW, &ipfw_state_reap_min, 0, ipfw_sysctl_scancnt,
801 "I", "# of states to reap for state shortage");
802 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, track_scan_max,
803 CTLTYPE_INT | CTLFLAG_RW, &ipfw_track_scan_max, 0, ipfw_sysctl_scancnt,
804 "I", "# of tracks to scan for each expire iteration");
805 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, track_expire_max,
806 CTLTYPE_INT | CTLFLAG_RW, &ipfw_track_expire_max, 0, ipfw_sysctl_scancnt,
807 "I", "# of tracks to expire for each expire iteration");
808 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, track_reap_max,
809 CTLTYPE_INT | CTLFLAG_RW, &ipfw_track_reap_max, 0, ipfw_sysctl_scancnt,
810 "I", "# of tracks to reap for track shortage");
812 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_reap,
813 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
814 __offsetof(struct ipfw_context, ipfw_sts_reap), ipfw_sysctl_stat,
815 "LU", "# of state reaps due to states shortage");
816 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_reapfailed,
817 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
818 __offsetof(struct ipfw_context, ipfw_sts_reapfailed), ipfw_sysctl_stat,
819 "LU", "# of state reap failure");
820 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_overflow,
821 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
822 __offsetof(struct ipfw_context, ipfw_sts_overflow), ipfw_sysctl_stat,
823 "LU", "# of state overflow");
824 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_nomem,
825 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
826 __offsetof(struct ipfw_context, ipfw_sts_nomem), ipfw_sysctl_stat,
827 "LU", "# of state allocation failure");
828 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_tcprecycled,
829 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
830 __offsetof(struct ipfw_context, ipfw_sts_tcprecycled), ipfw_sysctl_stat,
831 "LU", "# of state deleted due to fast TCP port recycling");
833 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_nomem,
834 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
835 __offsetof(struct ipfw_context, ipfw_tks_nomem), ipfw_sysctl_stat,
836 "LU", "# of track allocation failure");
837 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_reap,
838 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
839 __offsetof(struct ipfw_context, ipfw_tks_reap), ipfw_sysctl_stat,
840 "LU", "# of track reap due to tracks shortage");
841 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_reapfailed,
842 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
843 __offsetof(struct ipfw_context, ipfw_tks_reapfailed), ipfw_sysctl_stat,
844 "LU", "# of track reap failure");
845 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_overflow,
846 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
847 __offsetof(struct ipfw_context, ipfw_tks_overflow), ipfw_sysctl_stat,
848 "LU", "# of track overflow");
849 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_cntnomem,
850 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
851 __offsetof(struct ipfw_context, ipfw_tks_cntnomem), ipfw_sysctl_stat,
852 "LU", "# of track counter allocation failure");
853 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, frags,
854 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
855 __offsetof(struct ipfw_context, ipfw_frags), ipfw_sysctl_stat,
856 "LU", "# of IP fragements defraged");
857 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, defraged,
858 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
859 __offsetof(struct ipfw_context, ipfw_defraged), ipfw_sysctl_stat,
860 "LU", "# of IP packets after defrag");
861 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, defrag_remote,
862 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
863 __offsetof(struct ipfw_context, ipfw_defrag_remote), ipfw_sysctl_stat,
864 "LU", "# of IP packets after defrag dispatched to remote cpus");
865 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, xlated,
866 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
867 __offsetof(struct ipfw_context, ipfw_xlated), ipfw_sysctl_stat,
868 "LU", "# address/port translations");
869 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, xlate_split,
870 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
871 __offsetof(struct ipfw_context, ipfw_xlate_split), ipfw_sysctl_stat,
872 "LU", "# address/port translations split between different cpus");
873 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, xlate_conflicts,
874 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
875 __offsetof(struct ipfw_context, ipfw_xlate_conflicts), ipfw_sysctl_stat,
876 "LU", "# address/port translations conflicts on remote cpu");
877 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, xlate_cresolved,
878 CTLTYPE_ULONG | CTLFLAG_RW, NULL,
879 __offsetof(struct ipfw_context, ipfw_xlate_cresolved), ipfw_sysctl_stat,
880 "LU", "# address/port translations conflicts resolved on remote cpu");
882 static int ipfw_state_cmp(struct ipfw_state *,
883 struct ipfw_state *);
884 static int ipfw_trkcnt_cmp(struct ipfw_trkcnt *,
885 struct ipfw_trkcnt *);
886 static int ipfw_track_cmp(struct ipfw_track *,
887 struct ipfw_track *);
889 RB_PROTOTYPE(ipfw_state_tree, ipfw_state, st_rblink, ipfw_state_cmp);
890 RB_GENERATE(ipfw_state_tree, ipfw_state, st_rblink, ipfw_state_cmp);
892 RB_PROTOTYPE(ipfw_trkcnt_tree, ipfw_trkcnt, tc_rblink, ipfw_trkcnt_cmp);
893 RB_GENERATE(ipfw_trkcnt_tree, ipfw_trkcnt, tc_rblink, ipfw_trkcnt_cmp);
895 RB_PROTOTYPE(ipfw_track_tree, ipfw_track, t_rblink, ipfw_track_cmp);
896 RB_GENERATE(ipfw_track_tree, ipfw_track, t_rblink, ipfw_track_cmp);
898 static int ipfw_chk(struct ip_fw_args *);
899 static void ipfw_track_expire_ipifunc(void *);
900 static void ipfw_state_expire_ipifunc(void *);
901 static void ipfw_keepalive(void *);
902 static int ipfw_state_expire_start(struct ipfw_context *,
904 static void ipfw_crossref_timeo(void *);
905 static void ipfw_state_remove(struct ipfw_context *,
906 struct ipfw_state *);
907 static void ipfw_xlat_reap_timeo(void *);
908 static void ipfw_defrag_redispatch(struct mbuf *, int,
911 #define IPFW_TRKCNT_TOKGET lwkt_gettoken(&ipfw_gd.ipfw_trkcnt_token)
912 #define IPFW_TRKCNT_TOKREL lwkt_reltoken(&ipfw_gd.ipfw_trkcnt_token)
913 #define IPFW_TRKCNT_TOKINIT \
914 lwkt_token_init(&ipfw_gd.ipfw_trkcnt_token, "ipfw_trkcnt");
917 sa_maskedcopy(const struct sockaddr *src, struct sockaddr *dst,
918 const struct sockaddr *netmask)
920 const u_char *cp1 = (const u_char *)src;
921 u_char *cp2 = (u_char *)dst;
922 const u_char *cp3 = (const u_char *)netmask;
923 u_char *cplim = cp2 + *cp3;
924 u_char *cplim2 = cp2 + *cp1;
926 *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
931 *cp2++ = *cp1++ & *cp3++;
933 bzero(cp2, cplim2 - cp2);
936 static __inline uint16_t
937 pfil_cksum_fixup(uint16_t cksum, uint16_t old, uint16_t new, uint8_t udp)
943 l = cksum + old - new;
944 l = (l >> 16) + (l & 65535);
952 ipfw_key_build(struct ipfw_key *key, in_addr_t saddr, uint16_t sport,
953 in_addr_t daddr, uint16_t dport, uint8_t proto)
960 key->addr_u.addrs.addr1 = daddr;
961 key->addr_u.addrs.addr2 = saddr;
962 key->swap |= IPFW_KEY_SWAP_ADDRS;
964 key->addr_u.addrs.addr1 = saddr;
965 key->addr_u.addrs.addr2 = daddr;
969 key->port_u.ports.port1 = dport;
970 key->port_u.ports.port2 = sport;
971 key->swap |= IPFW_KEY_SWAP_PORTS;
973 key->port_u.ports.port1 = sport;
974 key->port_u.ports.port2 = dport;
977 if (sport == dport && (key->swap & IPFW_KEY_SWAP_ADDRS))
978 key->swap |= IPFW_KEY_SWAP_PORTS;
979 if (saddr == daddr && (key->swap & IPFW_KEY_SWAP_PORTS))
980 key->swap |= IPFW_KEY_SWAP_ADDRS;
984 ipfw_key_4tuple(const struct ipfw_key *key, in_addr_t *saddr, uint16_t *sport,
985 in_addr_t *daddr, uint16_t *dport)
988 if (key->swap & IPFW_KEY_SWAP_ADDRS) {
989 *saddr = key->addr_u.addrs.addr2;
990 *daddr = key->addr_u.addrs.addr1;
992 *saddr = key->addr_u.addrs.addr1;
993 *daddr = key->addr_u.addrs.addr2;
996 if (key->swap & IPFW_KEY_SWAP_PORTS) {
997 *sport = key->port_u.ports.port2;
998 *dport = key->port_u.ports.port1;
1000 *sport = key->port_u.ports.port1;
1001 *dport = key->port_u.ports.port2;
1006 ipfw_state_cmp(struct ipfw_state *s1, struct ipfw_state *s2)
1009 if (s1->st_proto > s2->st_proto)
1011 if (s1->st_proto < s2->st_proto)
1014 if (s1->st_addrs > s2->st_addrs)
1016 if (s1->st_addrs < s2->st_addrs)
1019 if (s1->st_ports > s2->st_ports)
1021 if (s1->st_ports < s2->st_ports)
1024 if (s1->st_swap == s2->st_swap ||
1025 (s1->st_swap ^ s2->st_swap) == IPFW_KEY_SWAP_ALL)
1028 if (s1->st_swap > s2->st_swap)
1035 ipfw_trkcnt_cmp(struct ipfw_trkcnt *t1, struct ipfw_trkcnt *t2)
1038 if (t1->tc_proto > t2->tc_proto)
1040 if (t1->tc_proto < t2->tc_proto)
1043 if (t1->tc_addrs > t2->tc_addrs)
1045 if (t1->tc_addrs < t2->tc_addrs)
1048 if (t1->tc_ports > t2->tc_ports)
1050 if (t1->tc_ports < t2->tc_ports)
1053 if (t1->tc_ruleid > t2->tc_ruleid)
1055 if (t1->tc_ruleid < t2->tc_ruleid)
1062 ipfw_track_cmp(struct ipfw_track *t1, struct ipfw_track *t2)
1065 if (t1->t_proto > t2->t_proto)
1067 if (t1->t_proto < t2->t_proto)
1070 if (t1->t_addrs > t2->t_addrs)
1072 if (t1->t_addrs < t2->t_addrs)
1075 if (t1->t_ports > t2->t_ports)
1077 if (t1->t_ports < t2->t_ports)
1080 if ((uintptr_t)t1->t_rule > (uintptr_t)t2->t_rule)
1082 if ((uintptr_t)t1->t_rule < (uintptr_t)t2->t_rule)
1088 static __inline struct ipfw_state *
1089 ipfw_state_link(struct ipfw_context *ctx, struct ipfw_state *s)
1091 struct ipfw_state *dup;
1093 KASSERT((s->st_flags & IPFW_STATE_F_LINKED) == 0,
1094 ("state %p was linked", s));
1095 dup = RB_INSERT(ipfw_state_tree, &ctx->ipfw_state_tree, s);
1097 TAILQ_INSERT_TAIL(&ctx->ipfw_state_list, s, st_link);
1098 s->st_flags |= IPFW_STATE_F_LINKED;
1103 static __inline void
1104 ipfw_state_unlink(struct ipfw_context *ctx, struct ipfw_state *s)
1107 KASSERT(s->st_flags & IPFW_STATE_F_LINKED,
1108 ("state %p was not linked", s));
1109 RB_REMOVE(ipfw_state_tree, &ctx->ipfw_state_tree, s);
1110 TAILQ_REMOVE(&ctx->ipfw_state_list, s, st_link);
1111 s->st_flags &= ~IPFW_STATE_F_LINKED;
1115 ipfw_state_max_set(int state_max)
1118 ipfw_state_max = state_max;
1119 /* Allow 5% states over-allocation. */
1120 ipfw_state_loosecnt_updthr = (state_max / 20) / netisr_ncpus;
1124 ipfw_state_cntcoll(void)
1126 int cpu, state_cnt = 0;
1128 for (cpu = 0; cpu < netisr_ncpus; ++cpu)
1129 state_cnt += ipfw_ctx[cpu]->ipfw_state_cnt;
1134 ipfw_state_cntsync(void)
1138 state_cnt = ipfw_state_cntcoll();
1139 ipfw_gd.ipfw_state_loosecnt = state_cnt;
1144 ipfw_free_rule(struct ip_fw *rule)
1146 KASSERT(rule->cpuid == mycpuid, ("rule freed on cpu%d", mycpuid));
1147 KASSERT(rule->refcnt > 0, ("invalid refcnt %u", rule->refcnt));
1149 if (rule->refcnt == 0) {
1150 if (rule->cross_rules != NULL)
1151 kfree(rule->cross_rules, M_IPFW);
1152 kfree(rule, M_IPFW);
1159 ipfw_unref_rule(void *priv)
1161 ipfw_free_rule(priv);
1163 KASSERT(ipfw_gd.ipfw_refcnt > 0,
1164 ("invalid ipfw_refcnt %d", ipfw_gd.ipfw_refcnt));
1165 atomic_subtract_int(&ipfw_gd.ipfw_refcnt, 1);
1169 static __inline void
1170 ipfw_ref_rule(struct ip_fw *rule)
1172 KASSERT(rule->cpuid == mycpuid, ("rule used on cpu%d", mycpuid));
1174 atomic_add_int(&ipfw_gd.ipfw_refcnt, 1);
1180 * This macro maps an ip pointer into a layer3 header pointer of type T
1182 #define L3HDR(T, ip) ((T *)((uint32_t *)(ip) + (ip)->ip_hl))
1185 icmptype_match(struct ip *ip, ipfw_insn_u32 *cmd)
1187 int type = L3HDR(struct icmp,ip)->icmp_type;
1188 int idx_max = F_LEN(&cmd->o) - F_INSN_SIZE(ipfw_insn);
1189 int idx = type / 32;
1193 return (cmd->d[idx] & (1 << (type % 32)));
1197 icmpcode_match(struct ip *ip, ipfw_insn_u32 *cmd)
1199 int code = L3HDR(struct icmp,ip)->icmp_code;
1200 int idx_max = F_LEN(&cmd->o) - F_INSN_SIZE(ipfw_insn);
1201 int idx = code / 32;
1205 return (cmd->d[idx] & (1 << (code % 32)));
1208 #define TT ((1 << ICMP_ECHO) | \
1209 (1 << ICMP_ROUTERSOLICIT) | \
1210 (1 << ICMP_TSTAMP) | \
1211 (1 << ICMP_IREQ) | \
1212 (1 << ICMP_MASKREQ))
1215 is_icmp_query(struct ip *ip)
1217 int type = L3HDR(struct icmp, ip)->icmp_type;
1219 return (type < 32 && (TT & (1 << type)));
1225 * The following checks use two arrays of 8 or 16 bits to store the
1226 * bits that we want set or clear, respectively. They are in the
1227 * low and high half of cmd->arg1 or cmd->d[0].
1229 * We scan options and store the bits we find set. We succeed if
1231 * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear
1233 * The code is sometimes optimized not to store additional variables.
1236 flags_match(ipfw_insn *cmd, uint8_t bits)
1241 if (((cmd->arg1 & 0xff) & bits) != 0)
1242 return 0; /* some bits we want set were clear */
1244 want_clear = (cmd->arg1 >> 8) & 0xff;
1245 if ((want_clear & bits) != want_clear)
1246 return 0; /* some bits we want clear were set */
1251 ipopts_match(struct ip *ip, ipfw_insn *cmd)
1253 int optlen, bits = 0;
1254 u_char *cp = (u_char *)(ip + 1);
1255 int x = (ip->ip_hl << 2) - sizeof(struct ip);
1257 for (; x > 0; x -= optlen, cp += optlen) {
1258 int opt = cp[IPOPT_OPTVAL];
1260 if (opt == IPOPT_EOL)
1263 if (opt == IPOPT_NOP) {
1266 optlen = cp[IPOPT_OLEN];
1267 if (optlen <= 0 || optlen > x)
1268 return 0; /* invalid or truncated */
1273 bits |= IP_FW_IPOPT_LSRR;
1277 bits |= IP_FW_IPOPT_SSRR;
1281 bits |= IP_FW_IPOPT_RR;
1285 bits |= IP_FW_IPOPT_TS;
1292 return (flags_match(cmd, bits));
1296 tcpopts_match(struct ip *ip, ipfw_insn *cmd)
1298 int optlen, bits = 0;
1299 struct tcphdr *tcp = L3HDR(struct tcphdr,ip);
1300 u_char *cp = (u_char *)(tcp + 1);
1301 int x = (tcp->th_off << 2) - sizeof(struct tcphdr);
1303 for (; x > 0; x -= optlen, cp += optlen) {
1306 if (opt == TCPOPT_EOL)
1309 if (opt == TCPOPT_NOP) {
1319 bits |= IP_FW_TCPOPT_MSS;
1323 bits |= IP_FW_TCPOPT_WINDOW;
1326 case TCPOPT_SACK_PERMITTED:
1328 bits |= IP_FW_TCPOPT_SACK;
1331 case TCPOPT_TIMESTAMP:
1332 bits |= IP_FW_TCPOPT_TS;
1338 bits |= IP_FW_TCPOPT_CC;
1345 return (flags_match(cmd, bits));
1349 iface_match(struct ifnet *ifp, ipfw_insn_if *cmd)
1351 if (ifp == NULL) /* no iface with this packet, match fails */
1354 /* Check by name or by IP address */
1355 if (cmd->name[0] != '\0') { /* match by name */
1358 if (kfnmatch(cmd->name, ifp->if_xname, 0) == 0)
1361 if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0)
1365 struct ifaddr_container *ifac;
1367 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1368 struct ifaddr *ia = ifac->ifa;
1370 if (ia->ifa_addr == NULL)
1372 if (ia->ifa_addr->sa_family != AF_INET)
1374 if (cmd->p.ip.s_addr == ((struct sockaddr_in *)
1375 (ia->ifa_addr))->sin_addr.s_addr)
1376 return(1); /* match */
1379 return(0); /* no match, fail ... */
1382 #define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0
1385 * We enter here when we have a rule with O_LOG.
1386 * XXX this function alone takes about 2Kbytes of code!
1389 ipfw_log(struct ipfw_context *ctx, struct ip_fw *f, u_int hlen,
1390 struct ether_header *eh, struct mbuf *m, struct ifnet *oif)
1393 int limit_reached = 0;
1394 char action2[40], proto[48], fragment[28], abuf[INET_ADDRSTRLEN];
1399 if (f == NULL) { /* bogus pkt */
1400 if (verbose_limit != 0 &&
1401 ctx->ipfw_norule_counter >= verbose_limit)
1403 ctx->ipfw_norule_counter++;
1404 if (ctx->ipfw_norule_counter == verbose_limit)
1405 limit_reached = verbose_limit;
1407 } else { /* O_LOG is the first action, find the real one */
1408 ipfw_insn *cmd = ACTION_PTR(f);
1409 ipfw_insn_log *l = (ipfw_insn_log *)cmd;
1411 if (l->max_log != 0 && l->log_left == 0)
1414 if (l->log_left == 0)
1415 limit_reached = l->max_log;
1416 cmd += F_LEN(cmd); /* point to first action */
1417 if (cmd->opcode == O_PROB)
1421 switch (cmd->opcode) {
1427 if (cmd->arg1==ICMP_REJECT_RST) {
1429 } else if (cmd->arg1==ICMP_UNREACH_HOST) {
1432 ksnprintf(SNPARGS(action2, 0), "Unreach %d",
1446 ksnprintf(SNPARGS(action2, 0), "Divert %d", cmd->arg1);
1450 ksnprintf(SNPARGS(action2, 0), "Tee %d", cmd->arg1);
1454 ksnprintf(SNPARGS(action2, 0), "SkipTo %d", cmd->arg1);
1458 ksnprintf(SNPARGS(action2, 0), "Pipe %d", cmd->arg1);
1462 ksnprintf(SNPARGS(action2, 0), "Queue %d", cmd->arg1);
1467 ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd;
1470 len = ksnprintf(SNPARGS(action2, 0),
1472 kinet_ntoa(sa->sa.sin_addr, abuf));
1473 if (sa->sa.sin_port) {
1474 ksnprintf(SNPARGS(action2, len), ":%d",
1486 if (hlen == 0) { /* non-ip */
1487 ksnprintf(SNPARGS(proto, 0), "MAC");
1489 struct ip *ip = mtod(m, struct ip *);
1490 /* these three are all aliases to the same thing */
1491 struct icmp *const icmp = L3HDR(struct icmp, ip);
1492 struct tcphdr *const tcp = (struct tcphdr *)icmp;
1493 struct udphdr *const udp = (struct udphdr *)icmp;
1495 int ip_off, offset, ip_len;
1498 if (eh != NULL) { /* layer 2 packets are as on the wire */
1499 ip_off = ntohs(ip->ip_off);
1500 ip_len = ntohs(ip->ip_len);
1502 ip_off = ip->ip_off;
1503 ip_len = ip->ip_len;
1505 offset = ip_off & IP_OFFMASK;
1508 len = ksnprintf(SNPARGS(proto, 0), "TCP %s",
1509 kinet_ntoa(ip->ip_src, abuf));
1511 ksnprintf(SNPARGS(proto, len), ":%d %s:%d",
1512 ntohs(tcp->th_sport),
1513 kinet_ntoa(ip->ip_dst, abuf),
1514 ntohs(tcp->th_dport));
1516 ksnprintf(SNPARGS(proto, len), " %s",
1517 kinet_ntoa(ip->ip_dst, abuf));
1522 len = ksnprintf(SNPARGS(proto, 0), "UDP %s",
1523 kinet_ntoa(ip->ip_src, abuf));
1525 ksnprintf(SNPARGS(proto, len), ":%d %s:%d",
1526 ntohs(udp->uh_sport),
1527 kinet_ntoa(ip->ip_dst, abuf),
1528 ntohs(udp->uh_dport));
1530 ksnprintf(SNPARGS(proto, len), " %s",
1531 kinet_ntoa(ip->ip_dst, abuf));
1537 len = ksnprintf(SNPARGS(proto, 0),
1542 len = ksnprintf(SNPARGS(proto, 0), "ICMP ");
1544 len += ksnprintf(SNPARGS(proto, len), "%s",
1545 kinet_ntoa(ip->ip_src, abuf));
1546 ksnprintf(SNPARGS(proto, len), " %s",
1547 kinet_ntoa(ip->ip_dst, abuf));
1551 len = ksnprintf(SNPARGS(proto, 0), "P:%d %s", ip->ip_p,
1552 kinet_ntoa(ip->ip_src, abuf));
1553 ksnprintf(SNPARGS(proto, len), " %s",
1554 kinet_ntoa(ip->ip_dst, abuf));
1558 if (ip_off & (IP_MF | IP_OFFMASK)) {
1559 ksnprintf(SNPARGS(fragment, 0), " (frag %d:%d@%d%s)",
1560 ntohs(ip->ip_id), ip_len - (ip->ip_hl << 2),
1561 offset << 3, (ip_off & IP_MF) ? "+" : "");
1565 if (oif || m->m_pkthdr.rcvif) {
1566 log(LOG_SECURITY | LOG_INFO,
1567 "ipfw: %d %s %s %s via %s%s\n",
1568 f ? f->rulenum : -1,
1569 action, proto, oif ? "out" : "in",
1570 oif ? oif->if_xname : m->m_pkthdr.rcvif->if_xname,
1573 log(LOG_SECURITY | LOG_INFO,
1574 "ipfw: %d %s %s [no if info]%s\n",
1575 f ? f->rulenum : -1,
1576 action, proto, fragment);
1579 if (limit_reached) {
1580 log(LOG_SECURITY | LOG_NOTICE,
1581 "ipfw: limit %d reached on entry %d\n",
1582 limit_reached, f ? f->rulenum : -1);
1589 ipfw_xlat_reap(struct ipfw_xlat *x, struct ipfw_xlat *slave_x)
1591 struct ip_fw *rule = slave_x->xlat_rule;
1593 KKASSERT(rule->cpuid == mycpuid);
1595 /* No more cross references; free this pair now. */
1597 kfree(slave_x, M_IPFW);
1599 /* See the comment in ipfw_ip_xlate_dispatch(). */
1604 ipfw_xlat_reap_dispatch(netmsg_t nm)
1606 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
1607 struct ipfw_state *s, *ns;
1609 ASSERT_NETISR_NCPUS(mycpuid);
1613 netisr_replymsg(&ctx->ipfw_xlatreap_nm, 0);
1616 /* TODO: limit scanning depth */
1617 TAILQ_FOREACH_MUTABLE(s, &ctx->ipfw_xlatreap, st_link, ns) {
1618 struct ipfw_xlat *x = (struct ipfw_xlat *)s;
1619 struct ipfw_xlat *slave_x = x->xlat_pair;
1622 crefs = slave_x->xlat_crefs + x->xlat_crefs;
1624 TAILQ_REMOVE(&ctx->ipfw_xlatreap, &x->xlat_st, st_link);
1625 ipfw_xlat_reap(x, slave_x);
1628 if (!TAILQ_EMPTY(&ctx->ipfw_xlatreap)) {
1629 callout_reset(&ctx->ipfw_xlatreap_ch, 2, ipfw_xlat_reap_timeo,
1630 &ctx->ipfw_xlatreap_nm);
1635 ipfw_xlat_reap_timeo(void *xnm)
1637 struct netmsg_base *nm = xnm;
1639 KKASSERT(mycpuid < netisr_ncpus);
1642 if (nm->lmsg.ms_flags & MSGF_DONE)
1643 netisr_sendmsg_oncpu(nm);
1648 ipfw_xlat_free_dispatch(netmsg_t nmsg)
1650 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
1651 struct ipfw_xlat *x = nmsg->lmsg.u.ms_resultp;
1652 struct ipfw_xlat *slave_x = x->xlat_pair;
1655 ASSERT_NETISR_NCPUS(mycpuid);
1657 KKASSERT(slave_x != NULL);
1658 KKASSERT(slave_x->xlat_invalid && x->xlat_invalid);
1660 KASSERT((x->xlat_flags & IPFW_STATE_F_LINKED) == 0,
1661 ("master xlat is still linked"));
1662 if (slave_x->xlat_flags & IPFW_STATE_F_LINKED)
1663 ipfw_state_unlink(ctx, &slave_x->xlat_st);
1665 /* See the comment in ipfw_ip_xlate_dispatch(). */
1666 slave_x->xlat_crefs--;
1668 crefs = slave_x->xlat_crefs + x->xlat_crefs;
1670 ipfw_xlat_reap(x, slave_x);
1674 if (TAILQ_EMPTY(&ctx->ipfw_xlatreap)) {
1675 callout_reset(&ctx->ipfw_xlatreap_ch, 2, ipfw_xlat_reap_timeo,
1676 &ctx->ipfw_xlatreap_nm);
1680 * This pair is still referenced; defer its destruction.
1681 * YYY reuse st_link.
1683 TAILQ_INSERT_TAIL(&ctx->ipfw_xlatreap, &x->xlat_st, st_link);
1686 static __inline void
1687 ipfw_xlat_invalidate(struct ipfw_xlat *x)
1690 x->xlat_invalid = 1;
1691 x->xlat_pair->xlat_invalid = 1;
1695 ipfw_state_del(struct ipfw_context *ctx, struct ipfw_state *s)
1697 struct ipfw_xlat *x, *slave_x;
1698 struct netmsg_base *nm;
1700 KASSERT(s->st_type == O_KEEP_STATE || s->st_type == O_LIMIT ||
1701 IPFW_ISXLAT(s->st_type), ("invalid state type %u", s->st_type));
1702 KASSERT((s->st_flags & IPFW_STATE_F_XLATSLAVE) == 0,
1703 ("delete slave xlat"));
1705 KASSERT(ctx->ipfw_state_cnt > 0,
1706 ("invalid state count %d", ctx->ipfw_state_cnt));
1707 ctx->ipfw_state_cnt--;
1708 if (ctx->ipfw_state_loosecnt > 0)
1709 ctx->ipfw_state_loosecnt--;
1712 * Unhook this state.
1714 if (s->st_track != NULL) {
1715 struct ipfw_track *t = s->st_track;
1717 KASSERT(!LIST_EMPTY(&t->t_state_list),
1718 ("track state list is empty"));
1719 LIST_REMOVE(s, st_trklink);
1721 KASSERT(*t->t_count > 0,
1722 ("invalid track count %d", *t->t_count));
1723 atomic_subtract_int(t->t_count, 1);
1725 ipfw_state_unlink(ctx, s);
1728 * Free this state. Xlat requires special processing,
1729 * since xlat are paired state and they could be on
1733 if (!IPFW_ISXLAT(s->st_type)) {
1734 /* Not xlat; free now. */
1739 x = (struct ipfw_xlat *)s;
1741 if (x->xlat_pair == NULL) {
1742 /* Not setup yet; free now. */
1747 slave_x = x->xlat_pair;
1748 KKASSERT(slave_x->xlat_flags & IPFW_STATE_F_XLATSLAVE);
1750 if (x->xlat_pcpu == mycpuid) {
1752 * Paired states are on the same cpu; delete this
1755 KKASSERT(x->xlat_crefs == 0);
1756 KKASSERT(slave_x->xlat_crefs == 0);
1757 if (slave_x->xlat_flags & IPFW_STATE_F_LINKED)
1758 ipfw_state_unlink(ctx, &slave_x->xlat_st);
1760 kfree(slave_x, M_IPFW);
1765 * Free the paired states on the cpu owning the slave xlat.
1769 * Mark the state pair invalid; completely deleting them
1770 * may take some time.
1772 ipfw_xlat_invalidate(x);
1774 nm = &x->xlat_freenm;
1775 netmsg_init(nm, NULL, &netisr_apanic_rport, MSGF_PRIORITY,
1776 ipfw_xlat_free_dispatch);
1777 nm->lmsg.u.ms_resultp = x;
1779 /* See the comment in ipfw_xlate_redispatch(). */
1780 x->xlat_rule->cross_refs++;
1783 netisr_sendmsg(nm, x->xlat_pcpu);
1787 ipfw_state_remove(struct ipfw_context *ctx, struct ipfw_state *s)
1790 if (s->st_flags & IPFW_STATE_F_XLATSLAVE) {
1791 KKASSERT(IPFW_ISXLAT(s->st_type));
1792 ipfw_xlat_invalidate((struct ipfw_xlat *)s);
1793 ipfw_state_unlink(ctx, s);
1796 ipfw_state_del(ctx, s);
1800 ipfw_state_reap(struct ipfw_context *ctx, int reap_max)
1802 struct ipfw_state *s, *anchor;
1805 if (reap_max < ipfw_state_reap_min)
1806 reap_max = ipfw_state_reap_min;
1808 if ((ctx->ipfw_flags & IPFW_FLAG_STATEEXP) == 0) {
1810 * Kick start state expiring. Ignore scan limit,
1811 * we are short of states.
1813 ctx->ipfw_flags |= IPFW_FLAG_STATEREAP;
1814 expired = ipfw_state_expire_start(ctx, INT_MAX, reap_max);
1815 ctx->ipfw_flags &= ~IPFW_FLAG_STATEREAP;
1820 * States are being expired.
1823 if (ctx->ipfw_state_cnt == 0)
1827 anchor = &ctx->ipfw_stateexp_anch;
1828 while ((s = TAILQ_NEXT(anchor, st_link)) != NULL) {
1830 * Ignore scan limit; we are short of states.
1833 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link);
1834 TAILQ_INSERT_AFTER(&ctx->ipfw_state_list, s, anchor, st_link);
1836 if (IPFW_STATE_SCANSKIP(s))
1839 if (IPFW_STATE_ISDEAD(s) || IPFW_STATE_TCPCLOSED(s)) {
1840 ipfw_state_del(ctx, s);
1841 if (++expired >= reap_max)
1843 if ((expired & 0xff) == 0 &&
1844 ipfw_state_cntcoll() + ipfw_state_headroom <=
1851 * Leave the anchor on the list, even if the end of the list has
1852 * been reached. ipfw_state_expire_more_dispatch() will handle
1859 ipfw_state_flush(struct ipfw_context *ctx, const struct ip_fw *rule)
1861 struct ipfw_state *s, *sn;
1863 TAILQ_FOREACH_MUTABLE(s, &ctx->ipfw_state_list, st_link, sn) {
1864 if (IPFW_STATE_SCANSKIP(s))
1866 if (rule != NULL && s->st_rule != rule)
1868 ipfw_state_del(ctx, s);
1873 ipfw_state_expire_done(struct ipfw_context *ctx)
1876 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP,
1877 ("stateexp is not in progress"));
1878 ctx->ipfw_flags &= ~IPFW_FLAG_STATEEXP;
1879 callout_reset(&ctx->ipfw_stateto_ch, hz,
1880 ipfw_state_expire_ipifunc, NULL);
1884 ipfw_state_expire_more(struct ipfw_context *ctx)
1886 struct netmsg_base *nm = &ctx->ipfw_stateexp_more;
1888 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP,
1889 ("stateexp is not in progress"));
1890 KASSERT(nm->lmsg.ms_flags & MSGF_DONE,
1891 ("stateexp more did not finish"));
1892 netisr_sendmsg_oncpu(nm);
1896 ipfw_state_expire_loop(struct ipfw_context *ctx, struct ipfw_state *anchor,
1897 int scan_max, int expire_max)
1899 struct ipfw_state *s;
1900 int scanned = 0, expired = 0;
1902 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP,
1903 ("stateexp is not in progress"));
1905 while ((s = TAILQ_NEXT(anchor, st_link)) != NULL) {
1906 if (scanned++ >= scan_max) {
1907 ipfw_state_expire_more(ctx);
1911 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link);
1912 TAILQ_INSERT_AFTER(&ctx->ipfw_state_list, s, anchor, st_link);
1914 if (IPFW_STATE_SCANSKIP(s))
1917 if (IPFW_STATE_ISDEAD(s) ||
1918 ((ctx->ipfw_flags & IPFW_FLAG_STATEREAP) &&
1919 IPFW_STATE_TCPCLOSED(s))) {
1920 ipfw_state_del(ctx, s);
1921 if (++expired >= expire_max) {
1922 ipfw_state_expire_more(ctx);
1925 if ((ctx->ipfw_flags & IPFW_FLAG_STATEREAP) &&
1926 (expired & 0xff) == 0 &&
1927 ipfw_state_cntcoll() + ipfw_state_headroom <=
1929 ipfw_state_expire_more(ctx);
1934 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link);
1935 ipfw_state_expire_done(ctx);
1940 ipfw_state_expire_more_dispatch(netmsg_t nm)
1942 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
1943 struct ipfw_state *anchor;
1945 ASSERT_NETISR_NCPUS(mycpuid);
1946 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP,
1947 ("statexp is not in progress"));
1950 netisr_replymsg(&nm->base, 0);
1952 anchor = &ctx->ipfw_stateexp_anch;
1953 if (ctx->ipfw_state_cnt == 0) {
1954 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link);
1955 ipfw_state_expire_done(ctx);
1958 ipfw_state_expire_loop(ctx, anchor,
1959 ipfw_state_scan_max, ipfw_state_expire_max);
1963 ipfw_state_expire_start(struct ipfw_context *ctx, int scan_max, int expire_max)
1965 struct ipfw_state *anchor;
1967 KASSERT((ctx->ipfw_flags & IPFW_FLAG_STATEEXP) == 0,
1968 ("stateexp is in progress"));
1969 ctx->ipfw_flags |= IPFW_FLAG_STATEEXP;
1971 if (ctx->ipfw_state_cnt == 0) {
1972 ipfw_state_expire_done(ctx);
1977 * Do not expire more than once per second, it is useless.
1979 if ((ctx->ipfw_flags & IPFW_FLAG_STATEREAP) == 0 &&
1980 ctx->ipfw_state_lastexp == time_uptime) {
1981 ipfw_state_expire_done(ctx);
1984 ctx->ipfw_state_lastexp = time_uptime;
1986 anchor = &ctx->ipfw_stateexp_anch;
1987 TAILQ_INSERT_HEAD(&ctx->ipfw_state_list, anchor, st_link);
1988 return (ipfw_state_expire_loop(ctx, anchor, scan_max, expire_max));
1992 ipfw_state_expire_dispatch(netmsg_t nm)
1994 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
1996 ASSERT_NETISR_NCPUS(mycpuid);
2000 netisr_replymsg(&nm->base, 0);
2003 if (ctx->ipfw_flags & IPFW_FLAG_STATEEXP) {
2004 /* Running; done. */
2007 ipfw_state_expire_start(ctx,
2008 ipfw_state_scan_max, ipfw_state_expire_max);
2012 ipfw_state_expire_ipifunc(void *dummy __unused)
2014 struct netmsg_base *msg;
2016 KKASSERT(mycpuid < netisr_ncpus);
2017 msg = &ipfw_ctx[mycpuid]->ipfw_stateexp_nm;
2020 if (msg->lmsg.ms_flags & MSGF_DONE)
2021 netisr_sendmsg_oncpu(msg);
2026 ipfw_state_update_tcp(struct ipfw_state *s, int dir, const struct tcphdr *tcp)
2028 uint32_t seq = ntohl(tcp->th_seq);
2029 uint32_t ack = ntohl(tcp->th_ack);
2031 if (tcp->th_flags & TH_RST)
2034 if (dir == MATCH_FORWARD) {
2035 if ((s->st_flags & IPFW_STATE_F_SEQFWD) == 0) {
2036 s->st_flags |= IPFW_STATE_F_SEQFWD;
2037 s->st_seq_fwd = seq;
2038 } else if (SEQ_GEQ(seq, s->st_seq_fwd)) {
2039 s->st_seq_fwd = seq;
2041 /* Out-of-sequence; done. */
2044 if (tcp->th_flags & TH_ACK) {
2045 if ((s->st_flags & IPFW_STATE_F_ACKFWD) == 0) {
2046 s->st_flags |= IPFW_STATE_F_ACKFWD;
2047 s->st_ack_fwd = ack;
2048 } else if (SEQ_GEQ(ack, s->st_ack_fwd)) {
2049 s->st_ack_fwd = ack;
2051 /* Out-of-sequence; done. */
2055 if ((s->st_state & ((TH_FIN | TH_ACK) << 8)) ==
2056 (TH_FIN << 8) && s->st_ack_fwd == s->st_seq_rev + 1)
2057 s->st_state |= (TH_ACK << 8);
2060 if ((s->st_flags & IPFW_STATE_F_SEQREV) == 0) {
2061 s->st_flags |= IPFW_STATE_F_SEQREV;
2062 s->st_seq_rev = seq;
2063 } else if (SEQ_GEQ(seq, s->st_seq_rev)) {
2064 s->st_seq_rev = seq;
2066 /* Out-of-sequence; done. */
2069 if (tcp->th_flags & TH_ACK) {
2070 if ((s->st_flags & IPFW_STATE_F_ACKREV) == 0) {
2071 s->st_flags |= IPFW_STATE_F_ACKREV;
2073 } else if (SEQ_GEQ(ack, s->st_ack_rev)) {
2074 s->st_ack_rev = ack;
2076 /* Out-of-sequence; done. */
2080 if ((s->st_state & (TH_FIN | TH_ACK)) == TH_FIN &&
2081 s->st_ack_rev == s->st_seq_fwd + 1)
2082 s->st_state |= TH_ACK;
2089 ipfw_state_update(const struct ipfw_flow_id *pkt, int dir,
2090 const struct tcphdr *tcp, struct ipfw_state *s)
2093 if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */
2094 u_char flags = pkt->flags & IPFW_STATE_TCPFLAGS;
2096 if (tcp != NULL && !ipfw_state_update_tcp(s, dir, tcp))
2099 s->st_state |= (dir == MATCH_FORWARD) ? flags : (flags << 8);
2100 switch (s->st_state & IPFW_STATE_TCPSTATES) {
2101 case TH_SYN: /* opening */
2102 s->st_expire = time_uptime + dyn_syn_lifetime;
2105 case BOTH_SYN: /* move to established */
2106 case BOTH_SYN | TH_FIN: /* one side tries to close */
2107 case BOTH_SYN | (TH_FIN << 8):
2108 s->st_expire = time_uptime + dyn_ack_lifetime;
2111 case BOTH_SYN | BOTH_FIN: /* both sides closed */
2112 if ((s->st_state & BOTH_FINACK) == BOTH_FINACK) {
2113 /* And both FINs were ACKed. */
2114 s->st_expire = time_uptime + dyn_fin_lifetime;
2116 s->st_expire = time_uptime +
2117 dyn_finwait_lifetime;
2124 * reset or some invalid combination, but can also
2125 * occur if we use keep-state the wrong way.
2127 if ((s->st_state & ((TH_RST << 8) | TH_RST)) == 0)
2128 kprintf("invalid state: 0x%x\n", s->st_state);
2130 s->st_expire = time_uptime + dyn_rst_lifetime;
2133 } else if (pkt->proto == IPPROTO_UDP) {
2134 s->st_expire = time_uptime + dyn_udp_lifetime;
2136 /* other protocols */
2137 s->st_expire = time_uptime + dyn_short_lifetime;
2144 static struct ipfw_state *
2145 ipfw_state_lookup(struct ipfw_context *ctx, const struct ipfw_flow_id *pkt,
2146 int *match_direction, const struct tcphdr *tcp)
2148 struct ipfw_state *key, *s;
2149 int dir = MATCH_NONE;
2151 key = &ctx->ipfw_state_tmpkey;
2152 ipfw_key_build(&key->st_key, pkt->src_ip, pkt->src_port,
2153 pkt->dst_ip, pkt->dst_port, pkt->proto);
2154 s = RB_FIND(ipfw_state_tree, &ctx->ipfw_state_tree, key);
2156 goto done; /* not found. */
2157 if (IPFW_STATE_ISDEAD(s)) {
2158 ipfw_state_remove(ctx, s);
2162 if ((pkt->flags & TH_SYN) && IPFW_STATE_TCPCLOSED(s)) {
2163 /* TCP ports recycling is too fast. */
2164 ctx->ipfw_sts_tcprecycled++;
2165 ipfw_state_remove(ctx, s);
2170 if (s->st_swap == key->st_swap) {
2171 dir = MATCH_FORWARD;
2173 KASSERT((s->st_swap & key->st_swap) == 0,
2174 ("found mismatch state"));
2175 dir = MATCH_REVERSE;
2178 /* Update this state. */
2179 ipfw_state_update(pkt, dir, tcp, s);
2181 if (s->st_track != NULL) {
2182 /* This track has been used. */
2183 s->st_track->t_expire = time_uptime + dyn_short_lifetime;
2186 if (match_direction)
2187 *match_direction = dir;
2191 static struct ipfw_state *
2192 ipfw_state_alloc(struct ipfw_context *ctx, const struct ipfw_flow_id *id,
2193 uint16_t type, struct ip_fw *rule, const struct tcphdr *tcp)
2195 struct ipfw_state *s;
2198 KASSERT(type == O_KEEP_STATE || type == O_LIMIT || IPFW_ISXLAT(type),
2199 ("invalid state type %u", type));
2201 sz = sizeof(struct ipfw_state);
2202 if (IPFW_ISXLAT(type))
2203 sz = sizeof(struct ipfw_xlat);
2205 s = kmalloc(sz, M_IPFW, M_INTWAIT | M_NULLOK | M_ZERO);
2207 ctx->ipfw_sts_nomem++;
2211 ipfw_key_build(&s->st_key, id->src_ip, id->src_port,
2212 id->dst_ip, id->dst_port, id->proto);
2216 if (IPFW_ISXLAT(type)) {
2217 struct ipfw_xlat *x = (struct ipfw_xlat *)s;
2219 x->xlat_dir = MATCH_NONE;
2224 * Update this state:
2225 * Set st_expire and st_state.
2227 ipfw_state_update(id, MATCH_FORWARD, tcp, s);
2232 static struct ipfw_state *
2233 ipfw_state_add(struct ipfw_context *ctx, const struct ipfw_flow_id *id,
2234 uint16_t type, struct ip_fw *rule, struct ipfw_track *t,
2235 const struct tcphdr *tcp)
2237 struct ipfw_state *s, *dup;
2239 s = ipfw_state_alloc(ctx, id, type, rule, tcp);
2243 ctx->ipfw_state_cnt++;
2244 ctx->ipfw_state_loosecnt++;
2245 if (ctx->ipfw_state_loosecnt >= ipfw_state_loosecnt_updthr) {
2246 ipfw_gd.ipfw_state_loosecnt += ctx->ipfw_state_loosecnt;
2247 ctx->ipfw_state_loosecnt = 0;
2250 dup = ipfw_state_link(ctx, s);
2252 panic("ipfw: %u state exists %p", type, dup);
2255 /* Keep the track referenced. */
2256 LIST_INSERT_HEAD(&t->t_state_list, s, st_trklink);
2263 ipfw_track_free(struct ipfw_context *ctx, struct ipfw_track *t)
2265 struct ipfw_trkcnt *trk;
2266 boolean_t trk_freed = FALSE;
2268 KASSERT(t->t_count != NULL, ("track anchor"));
2269 KASSERT(LIST_EMPTY(&t->t_state_list),
2270 ("invalid track is still referenced"));
2273 KASSERT(trk != NULL, ("track has no trkcnt"));
2275 RB_REMOVE(ipfw_track_tree, &ctx->ipfw_track_tree, t);
2276 TAILQ_REMOVE(&ctx->ipfw_track_list, t, t_link);
2280 * fdrop() style reference counting.
2281 * See kern/kern_descrip.c fdrop().
2284 int refs = trk->tc_refs;
2287 KASSERT(refs > 0, ("invalid trkcnt refs %d", refs));
2290 if (atomic_cmpset_int(&trk->tc_refs, refs, 0)) {
2291 KASSERT(trk->tc_count == 0,
2292 ("%d states reference this trkcnt",
2294 RB_REMOVE(ipfw_trkcnt_tree,
2295 &ipfw_gd.ipfw_trkcnt_tree, trk);
2297 KASSERT(ipfw_gd.ipfw_trkcnt_cnt > 0,
2298 ("invalid trkcnt cnt %d",
2299 ipfw_gd.ipfw_trkcnt_cnt));
2300 ipfw_gd.ipfw_trkcnt_cnt--;
2303 if (ctx->ipfw_trkcnt_spare == NULL)
2304 ctx->ipfw_trkcnt_spare = trk;
2312 } else if (atomic_cmpset_int(&trk->tc_refs, refs, refs - 1)) {
2321 ipfw_track_flush(struct ipfw_context *ctx, struct ip_fw *rule)
2323 struct ipfw_track *t, *tn;
2325 TAILQ_FOREACH_MUTABLE(t, &ctx->ipfw_track_list, t_link, tn) {
2326 if (t->t_count == NULL) /* anchor */
2328 if (rule != NULL && t->t_rule != rule)
2330 ipfw_track_free(ctx, t);
2335 ipfw_track_state_expire(struct ipfw_context *ctx, struct ipfw_track *t,
2338 struct ipfw_state *s, *sn;
2339 boolean_t ret = FALSE;
2341 KASSERT(t->t_count != NULL, ("track anchor"));
2343 if (LIST_EMPTY(&t->t_state_list))
2347 * Do not expire more than once per second, it is useless.
2349 if (t->t_lastexp == time_uptime)
2351 t->t_lastexp = time_uptime;
2353 LIST_FOREACH_MUTABLE(s, &t->t_state_list, st_trklink, sn) {
2354 if (IPFW_STATE_ISDEAD(s) || (reap && IPFW_STATE_TCPCLOSED(s))) {
2355 KASSERT(s->st_track == t,
2356 ("state track %p does not match %p",
2358 ipfw_state_del(ctx, s);
2365 static __inline struct ipfw_trkcnt *
2366 ipfw_trkcnt_alloc(struct ipfw_context *ctx)
2368 struct ipfw_trkcnt *trk;
2370 if (ctx->ipfw_trkcnt_spare != NULL) {
2371 trk = ctx->ipfw_trkcnt_spare;
2372 ctx->ipfw_trkcnt_spare = NULL;
2374 trk = kmalloc(sizeof(*trk), M_IPFW,
2375 M_INTWAIT | M_NULLOK | M_CACHEALIGN);
2381 ipfw_track_expire_done(struct ipfw_context *ctx)
2384 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP,
2385 ("trackexp is not in progress"));
2386 ctx->ipfw_flags &= ~IPFW_FLAG_TRACKEXP;
2387 callout_reset(&ctx->ipfw_trackto_ch, hz,
2388 ipfw_track_expire_ipifunc, NULL);
2392 ipfw_track_expire_more(struct ipfw_context *ctx)
2394 struct netmsg_base *nm = &ctx->ipfw_trackexp_more;
2396 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP,
2397 ("trackexp is not in progress"));
2398 KASSERT(nm->lmsg.ms_flags & MSGF_DONE,
2399 ("trackexp more did not finish"));
2400 netisr_sendmsg_oncpu(nm);
2404 ipfw_track_expire_loop(struct ipfw_context *ctx, struct ipfw_track *anchor,
2405 int scan_max, int expire_max)
2407 struct ipfw_track *t;
2408 int scanned = 0, expired = 0;
2409 boolean_t reap = FALSE;
2411 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP,
2412 ("trackexp is not in progress"));
2414 if (ctx->ipfw_flags & IPFW_FLAG_TRACKREAP)
2417 while ((t = TAILQ_NEXT(anchor, t_link)) != NULL) {
2418 if (scanned++ >= scan_max) {
2419 ipfw_track_expire_more(ctx);
2423 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link);
2424 TAILQ_INSERT_AFTER(&ctx->ipfw_track_list, t, anchor, t_link);
2426 if (t->t_count == NULL) /* anchor */
2429 ipfw_track_state_expire(ctx, t, reap);
2430 if (!LIST_EMPTY(&t->t_state_list)) {
2431 /* There are states referencing this track. */
2435 if (TIME_LEQ(t->t_expire, time_uptime) || reap) {
2437 if (ipfw_track_free(ctx, t)) {
2438 if (++expired >= expire_max) {
2439 ipfw_track_expire_more(ctx);
2445 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link);
2446 ipfw_track_expire_done(ctx);
2451 ipfw_track_expire_start(struct ipfw_context *ctx, int scan_max, int expire_max)
2453 struct ipfw_track *anchor;
2455 KASSERT((ctx->ipfw_flags & IPFW_FLAG_TRACKEXP) == 0,
2456 ("trackexp is in progress"));
2457 ctx->ipfw_flags |= IPFW_FLAG_TRACKEXP;
2459 if (RB_EMPTY(&ctx->ipfw_track_tree)) {
2460 ipfw_track_expire_done(ctx);
2465 * Do not expire more than once per second, it is useless.
2467 if ((ctx->ipfw_flags & IPFW_FLAG_TRACKREAP) == 0 &&
2468 ctx->ipfw_track_lastexp == time_uptime) {
2469 ipfw_track_expire_done(ctx);
2472 ctx->ipfw_track_lastexp = time_uptime;
2474 anchor = &ctx->ipfw_trackexp_anch;
2475 TAILQ_INSERT_HEAD(&ctx->ipfw_track_list, anchor, t_link);
2476 return (ipfw_track_expire_loop(ctx, anchor, scan_max, expire_max));
2480 ipfw_track_expire_more_dispatch(netmsg_t nm)
2482 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
2483 struct ipfw_track *anchor;
2485 ASSERT_NETISR_NCPUS(mycpuid);
2486 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP,
2487 ("trackexp is not in progress"));
2490 netisr_replymsg(&nm->base, 0);
2492 anchor = &ctx->ipfw_trackexp_anch;
2493 if (RB_EMPTY(&ctx->ipfw_track_tree)) {
2494 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link);
2495 ipfw_track_expire_done(ctx);
2498 ipfw_track_expire_loop(ctx, anchor,
2499 ipfw_track_scan_max, ipfw_track_expire_max);
2503 ipfw_track_expire_dispatch(netmsg_t nm)
2505 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
2507 ASSERT_NETISR_NCPUS(mycpuid);
2511 netisr_replymsg(&nm->base, 0);
2514 if (ctx->ipfw_flags & IPFW_FLAG_TRACKEXP) {
2515 /* Running; done. */
2518 ipfw_track_expire_start(ctx,
2519 ipfw_track_scan_max, ipfw_track_expire_max);
2523 ipfw_track_expire_ipifunc(void *dummy __unused)
2525 struct netmsg_base *msg;
2527 KKASSERT(mycpuid < netisr_ncpus);
2528 msg = &ipfw_ctx[mycpuid]->ipfw_trackexp_nm;
2531 if (msg->lmsg.ms_flags & MSGF_DONE)
2532 netisr_sendmsg_oncpu(msg);
2537 ipfw_track_reap(struct ipfw_context *ctx)
2539 struct ipfw_track *t, *anchor;
2542 if ((ctx->ipfw_flags & IPFW_FLAG_TRACKEXP) == 0) {
2544 * Kick start track expiring. Ignore scan limit,
2545 * we are short of tracks.
2547 ctx->ipfw_flags |= IPFW_FLAG_TRACKREAP;
2548 expired = ipfw_track_expire_start(ctx, INT_MAX,
2549 ipfw_track_reap_max);
2550 ctx->ipfw_flags &= ~IPFW_FLAG_TRACKREAP;
2555 * Tracks are being expired.
2558 if (RB_EMPTY(&ctx->ipfw_track_tree))
2562 anchor = &ctx->ipfw_trackexp_anch;
2563 while ((t = TAILQ_NEXT(anchor, t_link)) != NULL) {
2565 * Ignore scan limit; we are short of tracks.
2568 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link);
2569 TAILQ_INSERT_AFTER(&ctx->ipfw_track_list, t, anchor, t_link);
2571 if (t->t_count == NULL) /* anchor */
2574 ipfw_track_state_expire(ctx, t, TRUE);
2575 if (!LIST_EMPTY(&t->t_state_list)) {
2576 /* There are states referencing this track. */
2580 if (ipfw_track_free(ctx, t)) {
2581 if (++expired >= ipfw_track_reap_max) {
2582 ipfw_track_expire_more(ctx);
2589 * Leave the anchor on the list, even if the end of the list has
2590 * been reached. ipfw_track_expire_more_dispatch() will handle
2596 static struct ipfw_track *
2597 ipfw_track_alloc(struct ipfw_context *ctx, const struct ipfw_flow_id *id,
2598 uint16_t limit_mask, struct ip_fw *rule)
2600 struct ipfw_track *key, *t, *dup;
2601 struct ipfw_trkcnt *trk, *ret;
2602 boolean_t do_expire = FALSE;
2604 KASSERT(rule->track_ruleid != 0,
2605 ("rule %u has no track ruleid", rule->rulenum));
2607 key = &ctx->ipfw_track_tmpkey;
2608 key->t_proto = id->proto;
2612 if (limit_mask & DYN_SRC_ADDR)
2613 key->t_saddr = id->src_ip;
2614 if (limit_mask & DYN_DST_ADDR)
2615 key->t_daddr = id->dst_ip;
2616 if (limit_mask & DYN_SRC_PORT)
2617 key->t_sport = id->src_port;
2618 if (limit_mask & DYN_DST_PORT)
2619 key->t_dport = id->dst_port;
2621 t = RB_FIND(ipfw_track_tree, &ctx->ipfw_track_tree, key);
2625 t = kmalloc(sizeof(*t), M_IPFW, M_INTWAIT | M_NULLOK);
2627 ctx->ipfw_tks_nomem++;
2631 t->t_key = key->t_key;
2634 LIST_INIT(&t->t_state_list);
2636 if (ipfw_gd.ipfw_trkcnt_cnt >= ipfw_track_max) {
2637 time_t globexp, uptime;
2643 * Do not expire globally more than once per second,
2646 uptime = time_uptime;
2647 globexp = ipfw_gd.ipfw_track_globexp;
2648 if (globexp != uptime &&
2649 atomic_cmpset_long(&ipfw_gd.ipfw_track_globexp,
2653 /* Expire tracks on other CPUs. */
2654 for (cpu = 0; cpu < netisr_ncpus; ++cpu) {
2657 lwkt_send_ipiq(globaldata_find(cpu),
2658 ipfw_track_expire_ipifunc, NULL);
2662 trk = ipfw_trkcnt_alloc(ctx);
2665 struct ipfw_trkcnt *tkey;
2667 tkey = &ctx->ipfw_trkcnt_tmpkey;
2668 key = NULL; /* tkey overlaps key */
2670 tkey->tc_key = t->t_key;
2671 tkey->tc_ruleid = rule->track_ruleid;
2674 trk = RB_FIND(ipfw_trkcnt_tree, &ipfw_gd.ipfw_trkcnt_tree,
2679 ctx->ipfw_tks_reap++;
2680 if (ipfw_track_reap(ctx) > 0) {
2681 if (ipfw_gd.ipfw_trkcnt_cnt <
2683 trk = ipfw_trkcnt_alloc(ctx);
2686 ctx->ipfw_tks_cntnomem++;
2688 ctx->ipfw_tks_overflow++;
2691 ctx->ipfw_tks_reapfailed++;
2692 ctx->ipfw_tks_overflow++;
2695 ctx->ipfw_tks_cntnomem++;
2700 KASSERT(trk->tc_refs > 0 && trk->tc_refs < netisr_ncpus,
2701 ("invalid trkcnt refs %d", trk->tc_refs));
2702 atomic_add_int(&trk->tc_refs, 1);
2706 trk->tc_key = t->t_key;
2707 trk->tc_ruleid = rule->track_ruleid;
2711 trk->tc_rulenum = rule->rulenum;
2714 ret = RB_INSERT(ipfw_trkcnt_tree, &ipfw_gd.ipfw_trkcnt_tree,
2717 KASSERT(ret->tc_refs > 0 &&
2718 ret->tc_refs < netisr_ncpus,
2719 ("invalid trkcnt refs %d", ret->tc_refs));
2720 KASSERT(ctx->ipfw_trkcnt_spare == NULL,
2721 ("trkcnt spare was installed"));
2722 ctx->ipfw_trkcnt_spare = trk;
2725 ipfw_gd.ipfw_trkcnt_cnt++;
2727 atomic_add_int(&trk->tc_refs, 1);
2730 t->t_count = &trk->tc_count;
2733 dup = RB_INSERT(ipfw_track_tree, &ctx->ipfw_track_tree, t);
2735 panic("ipfw: track exists");
2736 TAILQ_INSERT_TAIL(&ctx->ipfw_track_list, t, t_link);
2738 t->t_expire = time_uptime + dyn_short_lifetime;
2743 * Install state for rule type cmd->o.opcode
2745 * Returns NULL if state is not installed because of errors or because
2746 * states limitations are enforced.
2748 static struct ipfw_state *
2749 ipfw_state_install(struct ipfw_context *ctx, struct ip_fw *rule,
2750 ipfw_insn_limit *cmd, struct ip_fw_args *args, const struct tcphdr *tcp)
2752 struct ipfw_state *s;
2753 struct ipfw_track *t;
2756 if (ipfw_gd.ipfw_state_loosecnt >= ipfw_state_max &&
2757 (diff = (ipfw_state_cntsync() - ipfw_state_max)) >= 0) {
2758 boolean_t overflow = TRUE;
2760 ctx->ipfw_sts_reap++;
2761 if (ipfw_state_reap(ctx, diff) == 0)
2762 ctx->ipfw_sts_reapfailed++;
2763 if (ipfw_state_cntsync() < ipfw_state_max)
2767 time_t globexp, uptime;
2771 * Do not expire globally more than once per second,
2774 uptime = time_uptime;
2775 globexp = ipfw_gd.ipfw_state_globexp;
2776 if (globexp == uptime ||
2777 !atomic_cmpset_long(&ipfw_gd.ipfw_state_globexp,
2779 ctx->ipfw_sts_overflow++;
2783 /* Expire states on other CPUs. */
2784 for (cpu = 0; cpu < netisr_ncpus; ++cpu) {
2787 lwkt_send_ipiq(globaldata_find(cpu),
2788 ipfw_state_expire_ipifunc, NULL);
2790 ctx->ipfw_sts_overflow++;
2795 switch (cmd->o.opcode) {
2796 case O_KEEP_STATE: /* bidir rule */
2798 s = ipfw_state_add(ctx, &args->f_id, cmd->o.opcode, rule, NULL,
2804 case O_LIMIT: /* limit number of sessions */
2805 t = ipfw_track_alloc(ctx, &args->f_id, cmd->limit_mask, rule);
2809 if (*t->t_count >= cmd->conn_limit) {
2810 if (!ipfw_track_state_expire(ctx, t, TRUE))
2814 count = *t->t_count;
2815 if (count >= cmd->conn_limit)
2817 if (atomic_cmpset_int(t->t_count, count, count + 1))
2821 s = ipfw_state_add(ctx, &args->f_id, O_LIMIT, rule, t, tcp);
2824 atomic_subtract_int(t->t_count, 1);
2830 panic("unknown state type %u\n", cmd->o.opcode);
2833 if (s->st_type == O_REDIRECT) {
2834 struct ipfw_xlat *x = (struct ipfw_xlat *)s;
2835 ipfw_insn_rdr *r = (ipfw_insn_rdr *)cmd;
2837 x->xlat_addr = r->addr.s_addr;
2838 x->xlat_port = r->port;
2839 x->xlat_ifp = args->m->m_pkthdr.rcvif;
2840 x->xlat_dir = MATCH_FORWARD;
2841 KKASSERT(x->xlat_ifp != NULL);
2847 ipfw_table_lookup(struct ipfw_context *ctx, uint16_t tableid,
2848 const struct in_addr *in)
2850 struct radix_node_head *rnh;
2851 struct sockaddr_in sin;
2852 struct ipfw_tblent *te;
2854 KASSERT(tableid < ipfw_table_max, ("invalid tableid %u", tableid));
2855 rnh = ctx->ipfw_tables[tableid];
2857 return (0); /* no match */
2859 memset(&sin, 0, sizeof(sin));
2860 sin.sin_family = AF_INET;
2861 sin.sin_len = sizeof(sin);
2864 te = (struct ipfw_tblent *)rnh->rnh_matchaddr((char *)&sin, rnh);
2866 return (0); /* no match */
2869 te->te_lastuse = time_second;
2870 return (1); /* match */
2874 * Transmit a TCP packet, containing either a RST or a keepalive.
2875 * When flags & TH_RST, we are sending a RST packet, because of a
2876 * "reset" action matched the packet.
2877 * Otherwise we are sending a keepalive, and flags & TH_
2879 * Only {src,dst}_{ip,port} of "id" are used.
2882 send_pkt(const struct ipfw_flow_id *id, uint32_t seq, uint32_t ack, int flags)
2887 struct route sro; /* fake route */
2889 MGETHDR(m, M_NOWAIT, MT_HEADER);
2892 m->m_pkthdr.rcvif = NULL;
2893 m->m_pkthdr.len = m->m_len = sizeof(struct ip) + sizeof(struct tcphdr);
2894 m->m_data += max_linkhdr;
2896 ip = mtod(m, struct ip *);
2897 bzero(ip, m->m_len);
2898 tcp = (struct tcphdr *)(ip + 1); /* no IP options */
2899 ip->ip_p = IPPROTO_TCP;
2903 * Assume we are sending a RST (or a keepalive in the reverse
2904 * direction), swap src and destination addresses and ports.
2906 ip->ip_src.s_addr = htonl(id->dst_ip);
2907 ip->ip_dst.s_addr = htonl(id->src_ip);
2908 tcp->th_sport = htons(id->dst_port);
2909 tcp->th_dport = htons(id->src_port);
2910 if (flags & TH_RST) { /* we are sending a RST */
2911 if (flags & TH_ACK) {
2912 tcp->th_seq = htonl(ack);
2913 tcp->th_ack = htonl(0);
2914 tcp->th_flags = TH_RST;
2918 tcp->th_seq = htonl(0);
2919 tcp->th_ack = htonl(seq);
2920 tcp->th_flags = TH_RST | TH_ACK;
2924 * We are sending a keepalive. flags & TH_SYN determines
2925 * the direction, forward if set, reverse if clear.
2926 * NOTE: seq and ack are always assumed to be correct
2927 * as set by the caller. This may be confusing...
2929 if (flags & TH_SYN) {
2931 * we have to rewrite the correct addresses!
2933 ip->ip_dst.s_addr = htonl(id->dst_ip);
2934 ip->ip_src.s_addr = htonl(id->src_ip);
2935 tcp->th_dport = htons(id->dst_port);
2936 tcp->th_sport = htons(id->src_port);
2938 tcp->th_seq = htonl(seq);
2939 tcp->th_ack = htonl(ack);
2940 tcp->th_flags = TH_ACK;
2944 * set ip_len to the payload size so we can compute
2945 * the tcp checksum on the pseudoheader
2946 * XXX check this, could save a couple of words ?
2948 ip->ip_len = htons(sizeof(struct tcphdr));
2949 tcp->th_sum = in_cksum(m, m->m_pkthdr.len);
2952 * now fill fields left out earlier
2954 ip->ip_ttl = ip_defttl;
2955 ip->ip_len = m->m_pkthdr.len;
2957 bzero(&sro, sizeof(sro));
2958 ip_rtaddr(ip->ip_dst, &sro);
2960 m->m_pkthdr.fw_flags |= IPFW_MBUF_GENERATED;
2961 ip_output(m, NULL, &sro, 0, NULL, NULL);
2967 * Send a reject message, consuming the mbuf passed as an argument.
2970 send_reject(struct ip_fw_args *args, int code, int offset, int ip_len)
2972 if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */
2973 /* We need the IP header in host order for icmp_error(). */
2974 if (args->eh != NULL) {
2975 struct ip *ip = mtod(args->m, struct ip *);
2977 ip->ip_len = ntohs(ip->ip_len);
2978 ip->ip_off = ntohs(ip->ip_off);
2980 icmp_error(args->m, ICMP_UNREACH, code, 0L, 0);
2981 } else if (offset == 0 && args->f_id.proto == IPPROTO_TCP) {
2982 struct tcphdr *const tcp =
2983 L3HDR(struct tcphdr, mtod(args->m, struct ip *));
2985 if ((tcp->th_flags & TH_RST) == 0) {
2986 send_pkt(&args->f_id, ntohl(tcp->th_seq),
2987 ntohl(tcp->th_ack), tcp->th_flags | TH_RST);
2997 * Given an ip_fw *, lookup_next_rule will return a pointer
2998 * to the next rule, which can be either the jump
2999 * target (for skipto instructions) or the next one in the list (in
3000 * all other cases including a missing jump target).
3001 * The result is also written in the "next_rule" field of the rule.
3002 * Backward jumps are not allowed, so start looking from the next
3005 * This never returns NULL -- in case we do not have an exact match,
3006 * the next rule is returned. When the ruleset is changed,
3007 * pointers are flushed so we are always correct.
3009 static struct ip_fw *
3010 lookup_next_rule(struct ip_fw *me)
3012 struct ip_fw *rule = NULL;
3015 /* look for action, in case it is a skipto */
3016 cmd = ACTION_PTR(me);
3017 if (cmd->opcode == O_LOG)
3019 if (cmd->opcode == O_SKIPTO) {
3020 for (rule = me->next; rule; rule = rule->next) {
3021 if (rule->rulenum >= cmd->arg1)
3025 if (rule == NULL) /* failure or not a skipto */
3027 me->next_rule = rule;
3032 ipfw_match_uid(const struct ipfw_flow_id *fid, struct ifnet *oif,
3033 enum ipfw_opcodes opcode, uid_t uid)
3035 struct in_addr src_ip, dst_ip;
3036 struct inpcbinfo *pi;
3040 if (fid->proto == IPPROTO_TCP) {
3042 pi = &tcbinfo[mycpuid];
3043 } else if (fid->proto == IPPROTO_UDP) {
3045 pi = &udbinfo[mycpuid];
3051 * Values in 'fid' are in host byte order
3053 dst_ip.s_addr = htonl(fid->dst_ip);
3054 src_ip.s_addr = htonl(fid->src_ip);
3056 pcb = in_pcblookup_hash(pi,
3057 dst_ip, htons(fid->dst_port),
3058 src_ip, htons(fid->src_port),
3061 pcb = in_pcblookup_hash(pi,
3062 src_ip, htons(fid->src_port),
3063 dst_ip, htons(fid->dst_port),
3066 if (pcb == NULL || pcb->inp_socket == NULL)
3069 if (opcode == O_UID) {
3070 #define socheckuid(a,b) ((a)->so_cred->cr_uid != (b))
3071 return !socheckuid(pcb->inp_socket, uid);
3074 return groupmember(uid, pcb->inp_socket->so_cred);
3079 ipfw_match_ifip(ipfw_insn_ifip *cmd, const struct in_addr *ip)
3082 if (__predict_false((cmd->o.arg1 & IPFW_IFIP_VALID) == 0)) {
3083 struct ifaddr_container *ifac;
3086 ifp = ifunit_netisr(cmd->ifname);
3090 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
3091 struct ifaddr *ia = ifac->ifa;
3093 if (ia->ifa_addr == NULL)
3095 if (ia->ifa_addr->sa_family != AF_INET)
3098 cmd->mask.s_addr = INADDR_ANY;
3099 if (cmd->o.arg1 & IPFW_IFIP_NET) {
3100 cmd->mask = ((struct sockaddr_in *)
3101 ia->ifa_netmask)->sin_addr;
3103 if (cmd->mask.s_addr == INADDR_ANY)
3104 cmd->mask.s_addr = INADDR_BROADCAST;
3107 ((struct sockaddr_in *)ia->ifa_addr)->sin_addr;
3108 cmd->addr.s_addr &= cmd->mask.s_addr;
3110 cmd->o.arg1 |= IPFW_IFIP_VALID;
3113 if ((cmd->o.arg1 & IPFW_IFIP_VALID) == 0)
3116 return ((ip->s_addr & cmd->mask.s_addr) == cmd->addr.s_addr);
3120 ipfw_xlate(const struct ipfw_xlat *x, struct mbuf *m,
3121 struct in_addr *old_addr, uint16_t *old_port)
3123 struct ip *ip = mtod(m, struct ip *);
3124 struct in_addr *addr;
3125 uint16_t *port, *csum, dlen = 0;
3127 boolean_t pseudo = FALSE;
3129 if (x->xlat_flags & IPFW_STATE_F_XLATSRC) {
3133 port = &L3HDR(struct tcphdr, ip)->th_sport;
3134 csum = &L3HDR(struct tcphdr, ip)->th_sum;
3137 port = &L3HDR(struct udphdr, ip)->uh_sport;
3138 csum = &L3HDR(struct udphdr, ip)->uh_sum;
3142 panic("ipfw: unsupported src xlate proto %u", ip->ip_p);
3148 port = &L3HDR(struct tcphdr, ip)->th_dport;
3149 csum = &L3HDR(struct tcphdr, ip)->th_sum;
3152 port = &L3HDR(struct udphdr, ip)->uh_dport;
3153 csum = &L3HDR(struct udphdr, ip)->uh_sum;
3157 panic("ipfw: unsupported dst xlate proto %u", ip->ip_p);
3160 if (old_addr != NULL)
3162 if (old_port != NULL) {
3163 if (x->xlat_port != 0)
3169 if (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_TCP | CSUM_TSO)) {
3170 if ((m->m_pkthdr.csum_flags & CSUM_TSO) == 0)
3171 dlen = ip->ip_len - (ip->ip_hl << 2);
3176 const uint16_t *oaddr, *naddr;
3178 oaddr = (const uint16_t *)&addr->s_addr;
3179 naddr = (const uint16_t *)&x->xlat_addr;
3181 ip->ip_sum = pfil_cksum_fixup(pfil_cksum_fixup(ip->ip_sum,
3182 oaddr[0], naddr[0], 0), oaddr[1], naddr[1], 0);
3183 *csum = pfil_cksum_fixup(pfil_cksum_fixup(*csum,
3184 oaddr[0], naddr[0], udp), oaddr[1], naddr[1], udp);
3186 addr->s_addr = x->xlat_addr;
3188 if (x->xlat_port != 0) {
3190 *csum = pfil_cksum_fixup(*csum, *port, x->xlat_port,
3193 *port = x->xlat_port;
3197 *csum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
3198 htons(dlen + ip->ip_p));
3203 ipfw_ip_xlate_dispatch(netmsg_t nmsg)
3205 struct netmsg_genpkt *nm = (struct netmsg_genpkt *)nmsg;
3206 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
3207 struct mbuf *m = nm->m;
3208 struct ipfw_xlat *x = nm->arg1;
3209 struct ip_fw *rule = x->xlat_rule;
3211 ASSERT_NETISR_NCPUS(mycpuid);
3212 KASSERT(rule->cpuid == mycpuid,
3213 ("rule does not belong to cpu%d", mycpuid));
3214 KASSERT(m->m_pkthdr.fw_flags & IPFW_MBUF_CONTINUE,
3215 ("mbuf does not have ipfw continue rule"));
3217 KASSERT(ctx->ipfw_cont_rule == NULL,
3218 ("pending ipfw continue rule"));
3219 KASSERT(ctx->ipfw_cont_xlat == NULL,
3220 ("pending ipfw continue xlat"));
3221 ctx->ipfw_cont_rule = rule;
3222 ctx->ipfw_cont_xlat = x;
3227 ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
3229 /* May not be cleared, if ipfw was unload/disabled. */
3230 ctx->ipfw_cont_rule = NULL;
3231 ctx->ipfw_cont_xlat = NULL;
3234 * This state is no longer used; decrement its xlat_crefs,
3235 * so this state can be deleted.
3239 * This rule is no longer used; decrement its cross_refs,
3240 * so this rule can be deleted.
3243 * Decrement cross_refs in the last step of this function,
3244 * so that the module could be unloaded safely.
3250 ipfw_xlate_redispatch(struct mbuf *m, int cpuid, struct ipfw_xlat *x,
3253 struct netmsg_genpkt *nm;
3255 KASSERT(x->xlat_pcpu == cpuid, ("xlat paired cpu%d, target cpu%d",
3256 x->xlat_pcpu, cpuid));
3259 * Bump cross_refs to prevent this rule and its siblings
3260 * from being deleted, while this mbuf is inflight. The
3261 * cross_refs of the sibling rule on the target cpu will
3262 * be decremented, once this mbuf is going to be filtered
3263 * on the target cpu.
3265 x->xlat_rule->cross_refs++;
3267 * Bump xlat_crefs to prevent this state and its paired
3268 * state from being deleted, while this mbuf is inflight.
3269 * The xlat_crefs of the paired state on the target cpu
3270 * will be decremented, once this mbuf is going to be
3271 * filtered on the target cpu.
3275 m->m_pkthdr.fw_flags |= IPFW_MBUF_CONTINUE;
3276 if (flags & IPFW_XLATE_INSERT)
3277 m->m_pkthdr.fw_flags |= IPFW_MBUF_XLATINS;
3278 if (flags & IPFW_XLATE_FORWARD)
3279 m->m_pkthdr.fw_flags |= IPFW_MBUF_XLATFWD;
3281 if ((flags & IPFW_XLATE_OUTPUT) == 0) {
3282 struct ip *ip = mtod(m, struct ip *);
3286 * ip_input() expects ip_len/ip_off are in network
3289 ip->ip_len = htons(ip->ip_len);
3290 ip->ip_off = htons(ip->ip_off);
3293 nm = &m->m_hdr.mh_genmsg;
3294 netmsg_init(&nm->base, NULL, &netisr_apanic_rport, 0,
3295 ipfw_ip_xlate_dispatch);
3297 nm->arg1 = x->xlat_pair;
3299 if (flags & IPFW_XLATE_OUTPUT)
3301 netisr_sendmsg(&nm->base, cpuid);
3304 static struct mbuf *
3305 ipfw_setup_local(struct mbuf *m, const int hlen, struct ip_fw_args *args,
3306 struct ip_fw_local *local, struct ip **ip0)
3308 struct ip *ip = mtod(m, struct ip *);
3313 * Collect parameters into local variables for faster matching.
3315 if (hlen == 0) { /* do not grab addresses for non-ip pkts */
3316 local->proto = args->f_id.proto = 0; /* mark f_id invalid */
3320 local->proto = args->f_id.proto = ip->ip_p;
3321 local->src_ip = ip->ip_src;
3322 local->dst_ip = ip->ip_dst;
3323 if (args->eh != NULL) { /* layer 2 packets are as on the wire */
3324 local->offset = ntohs(ip->ip_off) & IP_OFFMASK;
3325 local->ip_len = ntohs(ip->ip_len);
3327 local->offset = ip->ip_off & IP_OFFMASK;
3328 local->ip_len = ip->ip_len;
3331 #define PULLUP_TO(len) \
3333 if (m->m_len < (len)) { \
3334 args->m = m = m_pullup(m, (len)); \
3339 ip = mtod(m, struct ip *); \
3343 if (local->offset == 0) {
3344 switch (local->proto) {
3346 PULLUP_TO(hlen + sizeof(struct tcphdr));
3347 local->tcp = tcp = L3HDR(struct tcphdr, ip);
3348 local->dst_port = tcp->th_dport;
3349 local->src_port = tcp->th_sport;
3350 args->f_id.flags = tcp->th_flags;
3354 PULLUP_TO(hlen + sizeof(struct udphdr));
3355 udp = L3HDR(struct udphdr, ip);
3356 local->dst_port = udp->uh_dport;
3357 local->src_port = udp->uh_sport;
3361 PULLUP_TO(hlen + 4); /* type, code and checksum. */
3362 args->f_id.flags = L3HDR(struct icmp, ip)->icmp_type;
3372 args->f_id.src_ip = ntohl(local->src_ip.s_addr);
3373 args->f_id.dst_ip = ntohl(local->dst_ip.s_addr);
3374 args->f_id.src_port = local->src_port = ntohs(local->src_port);
3375 args->f_id.dst_port = local->dst_port = ntohs(local->dst_port);
3381 static struct mbuf *
3382 ipfw_rehashm(struct mbuf *m, const int hlen, struct ip_fw_args *args,
3383 struct ip_fw_local *local, struct ip **ip0)
3385 struct ip *ip = mtod(m, struct ip *);
3387 ip->ip_len = htons(ip->ip_len);
3388 ip->ip_off = htons(ip->ip_off);
3390 m->m_flags &= ~M_HASH;
3397 KASSERT(m->m_flags & M_HASH, ("no hash"));
3399 /* 'm' might be changed by ip_hashfn(). */
3400 ip = mtod(m, struct ip *);
3401 ip->ip_len = ntohs(ip->ip_len);
3402 ip->ip_off = ntohs(ip->ip_off);
3404 return (ipfw_setup_local(m, hlen, args, local, ip0));
3408 * The main check routine for the firewall.
3410 * All arguments are in args so we can modify them and return them
3411 * back to the caller.
3415 * args->m (in/out) The packet; we set to NULL when/if we nuke it.
3416 * Starts with the IP header.
3417 * args->eh (in) Mac header if present, or NULL for layer3 packet.
3418 * args->oif Outgoing interface, or NULL if packet is incoming.
3419 * The incoming interface is in the mbuf. (in)
3421 * args->rule Pointer to the last matching rule (in/out)
3422 * args->f_id Addresses grabbed from the packet (out)
3426 * If the packet was denied/rejected and has been dropped, *m is equal
3427 * to NULL upon return.
3429 * IP_FW_DENY the packet must be dropped.
3430 * IP_FW_PASS The packet is to be accepted and routed normally.
3431 * IP_FW_DIVERT Divert the packet to port (args->cookie)
3432 * IP_FW_TEE Tee the packet to port (args->cookie)
3433 * IP_FW_DUMMYNET Send the packet to pipe/queue (args->cookie)
3434 * IP_FW_CONTINUE Continue processing on another cpu.
3437 ipfw_chk(struct ip_fw_args *args)
3440 * Local variables hold state during the processing of a packet.
3442 * IMPORTANT NOTE: to speed up the processing of rules, there
3443 * are some assumption on the values of the variables, which
3444 * are documented here. Should you change them, please check
3445 * the implementation of the various instructions to make sure
3446 * that they still work.
3448 * args->eh The MAC header. It is non-null for a layer2
3449 * packet, it is NULL for a layer-3 packet.
3451 * m | args->m Pointer to the mbuf, as received from the caller.
3452 * It may change if ipfw_chk() does an m_pullup, or if it
3453 * consumes the packet because it calls send_reject().
3454 * XXX This has to change, so that ipfw_chk() never modifies
3455 * or consumes the buffer.
3456 * ip is simply an alias of the value of m, and it is kept
3457 * in sync with it (the packet is supposed to start with
3460 struct mbuf *m = args->m;
3461 struct ip *ip = mtod(m, struct ip *);
3464 * oif | args->oif If NULL, ipfw_chk has been called on the
3465 * inbound path (ether_input, ip_input).
3466 * If non-NULL, ipfw_chk has been called on the outbound path
3467 * (ether_output, ip_output).
3469 struct ifnet *oif = args->oif;
3471 struct ip_fw *f = NULL; /* matching rule */
3472 int retval = IP_FW_PASS;
3474 struct divert_info *divinfo;
3475 struct ipfw_state *s;
3478 * hlen The length of the IPv4 header.
3479 * hlen >0 means we have an IPv4 packet.
3481 u_int hlen = 0; /* hlen >0 means we have an IP pkt */
3483 struct ip_fw_local lc;
3486 * dyn_dir = MATCH_UNKNOWN when rules unchecked,
3487 * MATCH_NONE when checked and not matched (dyn_f = NULL),
3488 * MATCH_FORWARD or MATCH_REVERSE otherwise (dyn_f != NULL)
3490 int dyn_dir = MATCH_UNKNOWN;
3491 struct ip_fw *dyn_f = NULL;
3492 int cpuid = mycpuid;
3493 struct ipfw_context *ctx;
3495 ASSERT_NETISR_NCPUS(cpuid);
3496 ctx = ipfw_ctx[cpuid];
3498 if (m->m_pkthdr.fw_flags & IPFW_MBUF_GENERATED)
3499 return IP_FW_PASS; /* accept */
3501 if (args->eh == NULL || /* layer 3 packet */
3502 (m->m_pkthdr.len >= sizeof(struct ip) &&
3503 ntohs(args->eh->ether_type) == ETHERTYPE_IP))
3504 hlen = ip->ip_hl << 2;
3506 memset(&lc, 0, sizeof(lc));
3508 m = ipfw_setup_local(m, hlen, args, &lc, &ip);
3514 * Packet has already been tagged. Look for the next rule
3515 * to restart processing.
3517 * If fw_one_pass != 0 then just accept it.
3518 * XXX should not happen here, but optimized out in
3521 if (fw_one_pass && (args->flags & IP_FWARG_F_CONT) == 0)
3523 args->flags &= ~IP_FWARG_F_CONT;
3525 /* This rule is being/has been flushed */
3529 KASSERT(args->rule->cpuid == cpuid,
3530 ("rule used on cpu%d", cpuid));
3532 /* This rule was deleted */
3533 if (args->rule->rule_flags & IPFW_RULE_F_INVALID)
3536 if (args->xlat != NULL) {
3537 struct ipfw_xlat *x = args->xlat;
3539 /* This xlat is being deleted. */
3540 if (x->xlat_invalid)
3546 dyn_dir = (args->flags & IP_FWARG_F_XLATFWD) ?
3547 MATCH_FORWARD : MATCH_REVERSE;
3549 if (args->flags & IP_FWARG_F_XLATINS) {
3550 KASSERT(x->xlat_flags & IPFW_STATE_F_XLATSLAVE,
3551 ("not slave %u state", x->xlat_type));
3552 s = ipfw_state_link(ctx, &x->xlat_st);
3554 ctx->ipfw_xlate_conflicts++;
3555 if (IPFW_STATE_ISDEAD(s)) {
3556 ipfw_state_remove(ctx, s);
3557 s = ipfw_state_link(ctx,
3564 "conflicts %u state\n",
3568 ipfw_xlat_invalidate(x);
3571 ctx->ipfw_xlate_cresolved++;
3574 ipfw_state_update(&args->f_id, dyn_dir,
3575 lc.tcp, &x->xlat_st);
3578 /* TODO: setup dyn_f, dyn_dir */
3580 f = args->rule->next_rule;
3582 f = lookup_next_rule(args->rule);
3586 * Find the starting rule. It can be either the first
3587 * one, or the one after divert_rule if asked so.
3591 KKASSERT((args->flags &
3592 (IP_FWARG_F_XLATINS | IP_FWARG_F_CONT)) == 0);
3593 KKASSERT(args->xlat == NULL);
3595 mtag = m_tag_find(m, PACKET_TAG_IPFW_DIVERT, NULL);
3597 divinfo = m_tag_data(mtag);
3598 skipto = divinfo->skipto;
3603 f = ctx->ipfw_layer3_chain;
3604 if (args->eh == NULL && skipto != 0) {
3605 /* No skipto during rule flushing */
3609 if (skipto >= IPFW_DEFAULT_RULE)
3610 return IP_FW_DENY; /* invalid */
3612 while (f && f->rulenum <= skipto)
3614 if (f == NULL) /* drop packet */
3616 } else if (ipfw_flushing) {
3617 /* Rules are being flushed; skip to default rule */
3618 f = ctx->ipfw_default_rule;
3621 if ((mtag = m_tag_find(m, PACKET_TAG_IPFW_DIVERT, NULL)) != NULL)
3622 m_tag_delete(m, mtag);
3625 * Now scan the rules, and parse microinstructions for each rule.
3627 for (; f; f = f->next) {
3630 int skip_or; /* skip rest of OR block */
3633 if (ctx->ipfw_set_disable & (1 << f->set)) {
3638 if (args->xlat != NULL) {
3640 l = f->cmd_len - f->act_ofs;
3641 cmd = ACTION_PTR(f);
3648 for (; l > 0; l -= cmdlen, cmd += cmdlen) {
3652 * check_body is a jump target used when we find a
3653 * CHECK_STATE, and need to jump to the body of
3657 cmdlen = F_LEN(cmd);
3659 * An OR block (insn_1 || .. || insn_n) has the
3660 * F_OR bit set in all but the last instruction.
3661 * The first match will set "skip_or", and cause
3662 * the following instructions to be skipped until
3663 * past the one with the F_OR bit clear.
3665 if (skip_or) { /* skip this instruction */
3666 if ((cmd->len & F_OR) == 0)
3667 skip_or = 0; /* next one is good */
3670 match = 0; /* set to 1 if we succeed */
3672 switch (cmd->opcode) {
3674 * The first set of opcodes compares the packet's
3675 * fields with some pattern, setting 'match' if a
3676 * match is found. At the end of the loop there is
3677 * logic to deal with F_NOT and F_OR flags associated
3685 kprintf("ipfw: opcode %d unimplemented\n",
3692 * We only check offset == 0 && proto != 0,
3693 * as this ensures that we have an IPv4
3694 * packet with the ports info.
3699 match = ipfw_match_uid(&args->f_id, oif,
3701 (uid_t)((ipfw_insn_u32 *)cmd)->d[0]);
3705 match = iface_match(m->m_pkthdr.rcvif,
3706 (ipfw_insn_if *)cmd);
3710 match = iface_match(oif, (ipfw_insn_if *)cmd);
3714 match = iface_match(oif ? oif :
3715 m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd);
3719 if (args->eh != NULL) { /* have MAC header */
3720 uint32_t *want = (uint32_t *)
3721 ((ipfw_insn_mac *)cmd)->addr;
3722 uint32_t *mask = (uint32_t *)
3723 ((ipfw_insn_mac *)cmd)->mask;
3724 uint32_t *hdr = (uint32_t *)args->eh;
3727 (want[0] == (hdr[0] & mask[0]) &&
3728 want[1] == (hdr[1] & mask[1]) &&
3729 want[2] == (hdr[2] & mask[2]));
3734 if (args->eh != NULL) {
3736 ntohs(args->eh->ether_type);
3738 ((ipfw_insn_u16 *)cmd)->ports;
3741 /* Special vlan handling */
3742 if (m->m_flags & M_VLANTAG)
3745 for (i = cmdlen - 1; !match && i > 0;
3748 (t >= p[0] && t <= p[1]);
3754 match = (hlen > 0 && lc.offset != 0);
3761 if (args->eh != NULL)
3762 off = ntohs(ip->ip_off);
3765 if (off & (IP_MF | IP_OFFMASK))
3770 case O_IN: /* "out" is "not in" */
3771 match = (oif == NULL);
3775 match = (args->eh != NULL);
3780 * We do not allow an arg of 0 so the
3781 * check of "proto" only suffices.
3783 match = (lc.proto == cmd->arg1);
3787 match = (hlen > 0 &&
3788 ((ipfw_insn_ip *)cmd)->addr.s_addr ==
3793 match = (hlen > 0 &&
3794 ((ipfw_insn_ip *)cmd)->addr.s_addr ==
3796 ((ipfw_insn_ip *)cmd)->mask.s_addr));
3803 tif = INADDR_TO_IFP(&lc.src_ip);
3804 match = (tif != NULL);
3808 case O_IP_SRC_TABLE:
3809 match = ipfw_table_lookup(ctx, cmd->arg1,
3814 match = ipfw_match_ifip((ipfw_insn_ifip *)cmd,
3821 uint32_t *d = (uint32_t *)(cmd + 1);
3823 cmd->opcode == O_IP_DST_SET ?
3829 addr -= d[0]; /* subtract base */
3831 (addr < cmd->arg1) &&
3832 (d[1 + (addr >> 5)] &
3833 (1 << (addr & 0x1f)));
3838 match = (hlen > 0 &&
3839 ((ipfw_insn_ip *)cmd)->addr.s_addr ==
3844 match = (hlen > 0) &&
3845 (((ipfw_insn_ip *)cmd)->addr.s_addr ==
3847 ((ipfw_insn_ip *)cmd)->mask.s_addr));
3854 tif = INADDR_TO_IFP(&lc.dst_ip);
3855 match = (tif != NULL);
3859 case O_IP_DST_TABLE:
3860 match = ipfw_table_lookup(ctx, cmd->arg1,
3865 match = ipfw_match_ifip((ipfw_insn_ifip *)cmd,
3872 * offset == 0 && proto != 0 is enough
3873 * to guarantee that we have an IPv4
3874 * packet with port info.
3876 if ((lc.proto==IPPROTO_UDP ||
3877 lc.proto==IPPROTO_TCP)
3878 && lc.offset == 0) {
3880 (cmd->opcode == O_IP_SRCPORT) ?
3881 lc.src_port : lc.dst_port;
3883 ((ipfw_insn_u16 *)cmd)->ports;
3886 for (i = cmdlen - 1; !match && i > 0;
3889 (x >= p[0] && x <= p[1]);
3895 match = (lc.offset == 0 &&
3896 lc.proto==IPPROTO_ICMP &&
3897 icmpcode_match(ip, (ipfw_insn_u32 *)cmd));
3901 match = (lc.offset == 0 &&
3902 lc.proto==IPPROTO_ICMP &&
3903 icmptype_match(ip, (ipfw_insn_u32 *)cmd));
3907 match = (hlen > 0 && ipopts_match(ip, cmd));
3911 match = (hlen > 0 && cmd->arg1 == ip->ip_v);
3915 match = (hlen > 0 && cmd->arg1 == ip->ip_ttl);
3919 match = (hlen > 0 &&
3920 cmd->arg1 == ntohs(ip->ip_id));
3924 match = (hlen > 0 && cmd->arg1 == lc.ip_len);
3927 case O_IPPRECEDENCE:
3928 match = (hlen > 0 &&
3929 (cmd->arg1 == (ip->ip_tos & 0xe0)));
3933 match = (hlen > 0 &&
3934 flags_match(cmd, ip->ip_tos));
3938 match = (lc.proto == IPPROTO_TCP &&
3941 L3HDR(struct tcphdr,ip)->th_flags));
3945 match = (lc.proto == IPPROTO_TCP &&
3946 lc.offset == 0 && tcpopts_match(ip, cmd));
3950 match = (lc.proto == IPPROTO_TCP &&
3952 ((ipfw_insn_u32 *)cmd)->d[0] ==
3953 L3HDR(struct tcphdr,ip)->th_seq);
3957 match = (lc.proto == IPPROTO_TCP &&
3959 ((ipfw_insn_u32 *)cmd)->d[0] ==
3960 L3HDR(struct tcphdr,ip)->th_ack);
3964 match = (lc.proto == IPPROTO_TCP &&
3967 L3HDR(struct tcphdr,ip)->th_win);
3971 /* reject packets which have SYN only */
3972 /* XXX should i also check for TH_ACK ? */
3973 match = (lc.proto == IPPROTO_TCP &&
3975 (L3HDR(struct tcphdr,ip)->th_flags &
3976 (TH_RST | TH_ACK | TH_SYN)) != TH_SYN);
3981 ipfw_log(ctx, f, hlen, args->eh, m,
3988 match = (krandom() <
3989 ((ipfw_insn_u32 *)cmd)->d[0]);
3993 * The second set of opcodes represents 'actions',
3994 * i.e. the terminal part of a rule once the packet
3995 * matches all previous patterns.
3996 * Typically there is only one action for each rule,
3997 * and the opcode is stored at the end of the rule
3998 * (but there are exceptions -- see below).
4000 * In general, here we set retval and terminate the
4001 * outer loop (would be a 'break 3' in some language,
4002 * but we need to do a 'goto done').
4005 * O_COUNT and O_SKIPTO actions:
4006 * instead of terminating, we jump to the next rule
4007 * ('goto next_rule', equivalent to a 'break 2'),
4008 * or to the SKIPTO target ('goto again' after
4009 * having set f, cmd and l), respectively.
4011 * O_LIMIT and O_KEEP_STATE, O_REDIRECT: these opcodes
4012 * are not real 'actions', and are stored right
4013 * before the 'action' part of the rule.
4014 * These opcodes try to install an entry in the
4015 * state tables; if successful, we continue with
4016 * the next opcode (match=1; break;), otherwise
4017 * the packet must be dropped ('goto done' after
4018 * setting retval). If static rules are changed
4019 * during the state installation, the packet will
4020 * be dropped and rule's stats will not beupdated
4021 * ('return IP_FW_DENY').
4023 * O_PROBE_STATE and O_CHECK_STATE: these opcodes
4024 * cause a lookup of the state table, and a jump
4025 * to the 'action' part of the parent rule
4026 * ('goto check_body') if an entry is found, or
4027 * (CHECK_STATE only) a jump to the next rule if
4028 * the entry is not found ('goto next_rule').
4029 * The result of the lookup is cached to make
4030 * further instances of these opcodes are
4031 * effectively NOPs. If static rules are changed
4032 * during the state looking up, the packet will
4033 * be dropped and rule's stats will not be updated
4034 * ('return IP_FW_DENY').
4037 if (f->cross_rules == NULL) {
4039 * This rule was not completely setup;
4040 * move on to the next rule.
4045 * Apply redirect only on input path and
4046 * only to non-fragment TCP segments or
4049 * Does _not_ work with layer2 filtering.
4051 if (oif != NULL || args->eh != NULL ||
4052 (ip->ip_off & (IP_MF | IP_OFFMASK)) ||
4053 (lc.proto != IPPROTO_TCP &&
4054 lc.proto != IPPROTO_UDP))
4061 s = ipfw_state_install(ctx, f,
4062 (ipfw_insn_limit *)cmd, args, lc.tcp);
4064 retval = IP_FW_DENY;
4065 goto done; /* error/limit violation */
4068 s->st_bcnt += lc.ip_len;
4070 if (s->st_type == O_REDIRECT) {
4071 struct in_addr oaddr;
4073 struct ipfw_xlat *slave_x, *x;
4074 struct ipfw_state *dup;
4076 x = (struct ipfw_xlat *)s;
4077 ipfw_xlate(x, m, &oaddr, &oport);
4078 m = ipfw_rehashm(m, hlen, args, &lc,
4081 ipfw_state_del(ctx, s);
4085 cpuid = netisr_hashcpu(
4088 slave_x = (struct ipfw_xlat *)
4089 ipfw_state_alloc(ctx, &args->f_id,
4090 O_REDIRECT, f->cross_rules[cpuid],
4092 if (slave_x == NULL) {
4093 ipfw_state_del(ctx, s);
4094 retval = IP_FW_DENY;
4097 slave_x->xlat_addr = oaddr.s_addr;
4098 slave_x->xlat_port = oport;
4099 slave_x->xlat_dir = MATCH_REVERSE;
4100 slave_x->xlat_flags |=
4101 IPFW_STATE_F_XLATSRC |
4102 IPFW_STATE_F_XLATSLAVE;
4104 slave_x->xlat_pair = x;
4105 slave_x->xlat_pcpu = mycpuid;
4106 x->xlat_pair = slave_x;
4107 x->xlat_pcpu = cpuid;
4110 if (cpuid != mycpuid) {
4111 ctx->ipfw_xlate_split++;
4112 ipfw_xlate_redispatch(
4115 IPFW_XLATE_FORWARD);
4117 return (IP_FW_REDISPATCH);
4120 dup = ipfw_state_link(ctx,
4123 ctx->ipfw_xlate_conflicts++;
4124 if (IPFW_STATE_ISDEAD(dup)) {
4125 ipfw_state_remove(ctx,
4127 dup = ipfw_state_link(
4128 ctx, &slave_x->xlat_st);
4139 ipfw_state_del(ctx, s);
4140 return (IP_FW_DENY);
4142 ctx->ipfw_xlate_cresolved++;
4151 * States are checked at the first keep-state
4152 * check-state occurrence, with the result
4153 * being stored in dyn_dir. The compiler
4154 * introduces a PROBE_STATE instruction for
4155 * us when we have a KEEP_STATE/LIMIT/RDR
4156 * (because PROBE_STATE needs to be run first).
4159 if (dyn_dir == MATCH_UNKNOWN) {
4160 s = ipfw_state_lookup(ctx,
4161 &args->f_id, &dyn_dir, lc.tcp);
4164 (s->st_type == O_REDIRECT &&
4165 (args->eh != NULL ||
4166 (ip->ip_off & (IP_MF | IP_OFFMASK)) ||
4167 (lc.proto != IPPROTO_TCP &&
4168 lc.proto != IPPROTO_UDP)))) {
4170 * State not found. If CHECK_STATE,
4171 * skip to next rule, if PROBE_STATE
4172 * just ignore and continue with next
4175 if (cmd->opcode == O_CHECK_STATE)
4182 s->st_bcnt += lc.ip_len;
4184 if (s->st_type == O_REDIRECT) {
4185 struct ipfw_xlat *x =
4186 (struct ipfw_xlat *)s;
4189 x->xlat_ifp == NULL) {
4190 KASSERT(x->xlat_flags &
4191 IPFW_STATE_F_XLATSLAVE,
4192 ("master rdr state "
4196 (oif != NULL && x->xlat_ifp!=oif) ||
4198 x->xlat_ifp!=m->m_pkthdr.rcvif)) {
4199 retval = IP_FW_DENY;
4202 if (x->xlat_dir != dyn_dir)
4205 ipfw_xlate(x, m, NULL, NULL);
4206 m = ipfw_rehashm(m, hlen, args, &lc,
4211 cpuid = netisr_hashcpu(
4213 if (cpuid != mycpuid) {
4220 if (dyn_dir == MATCH_FORWARD) {
4224 ipfw_xlate_redispatch(m, cpuid,
4227 return (IP_FW_REDISPATCH);
4230 KKASSERT(x->xlat_pcpu == mycpuid);
4231 ipfw_state_update(&args->f_id, dyn_dir,
4232 lc.tcp, &x->xlat_pair->xlat_st);
4236 * Found a rule from a state; jump to the
4237 * 'action' part of the rule.
4240 KKASSERT(f->cpuid == mycpuid);
4242 cmd = ACTION_PTR(f);
4243 l = f->cmd_len - f->act_ofs;
4248 retval = IP_FW_PASS; /* accept */
4252 if (f->cross_rules == NULL) {
4254 * This rule was not completely setup;
4255 * move on to the next rule.
4261 * Don't defrag for l2 packets, output packets
4264 if (oif != NULL || args->eh != NULL ||
4265 (ip->ip_off & (IP_MF | IP_OFFMASK)) == 0)
4272 retval = IP_FW_PASS;
4275 ctx->ipfw_defraged++;
4276 KASSERT((m->m_flags & M_HASH) == 0,
4277 ("hash not cleared"));
4279 /* Update statistics */
4281 f->bcnt += lc.ip_len;
4282 f->timestamp = time_second;
4284 ip = mtod(m, struct ip *);
4285 hlen = ip->ip_hl << 2;
4288 ip->ip_len = htons(ip->ip_len);
4289 ip->ip_off = htons(ip->ip_off);
4296 KASSERT(m->m_flags & M_HASH, ("no hash"));
4297 cpuid = netisr_hashcpu(m->m_pkthdr.hash);
4298 if (cpuid != mycpuid) {
4301 * ip_len/ip_off are in network byte
4304 ctx->ipfw_defrag_remote++;
4305 ipfw_defrag_redispatch(m, cpuid, f);
4307 return (IP_FW_REDISPATCH);
4310 /* 'm' might be changed by ip_hashfn(). */
4311 ip = mtod(m, struct ip *);
4312 ip->ip_len = ntohs(ip->ip_len);
4313 ip->ip_off = ntohs(ip->ip_off);
4315 m = ipfw_setup_local(m, hlen, args, &lc, &ip);
4324 args->rule = f; /* report matching rule */
4325 args->cookie = cmd->arg1;
4326 retval = IP_FW_DUMMYNET;
4331 if (args->eh) /* not on layer 2 */
4334 mtag = m_tag_get(PACKET_TAG_IPFW_DIVERT,
4335 sizeof(*divinfo), M_INTWAIT | M_NULLOK);
4337 retval = IP_FW_DENY;
4340 divinfo = m_tag_data(mtag);
4342 divinfo->skipto = f->rulenum;
4343 divinfo->port = cmd->arg1;
4344 divinfo->tee = (cmd->opcode == O_TEE);
4345 m_tag_prepend(m, mtag);
4347 args->cookie = cmd->arg1;
4348 retval = (cmd->opcode == O_DIVERT) ?
4349 IP_FW_DIVERT : IP_FW_TEE;
4354 f->pcnt++; /* update stats */
4355 f->bcnt += lc.ip_len;
4356 f->timestamp = time_second;
4357 if (cmd->opcode == O_COUNT)
4360 if (f->next_rule == NULL)
4361 lookup_next_rule(f);
4367 * Drop the packet and send a reject notice
4368 * if the packet is not ICMP (or is an ICMP
4369 * query), and it is not multicast/broadcast.
4372 (lc.proto != IPPROTO_ICMP ||
4373 is_icmp_query(ip)) &&
4374 !(m->m_flags & (M_BCAST|M_MCAST)) &&
4375 !IN_MULTICAST(ntohl(lc.dst_ip.s_addr))) {
4376 send_reject(args, cmd->arg1,
4377 lc.offset, lc.ip_len);
4378 retval = IP_FW_DENY;
4383 retval = IP_FW_DENY;
4387 if (args->eh) /* not valid on layer2 pkts */
4389 if (!dyn_f || dyn_dir == MATCH_FORWARD) {
4390 struct sockaddr_in *sin;
4392 mtag = m_tag_get(PACKET_TAG_IPFORWARD,
4393 sizeof(*sin), M_INTWAIT | M_NULLOK);
4395 retval = IP_FW_DENY;
4398 sin = m_tag_data(mtag);
4400 /* Structure copy */
4401 *sin = ((ipfw_insn_sa *)cmd)->sa;
4403 m_tag_prepend(m, mtag);
4404 m->m_pkthdr.fw_flags |=
4405 IPFORWARD_MBUF_TAGGED;
4406 m->m_pkthdr.fw_flags &=
4407 ~BRIDGE_MBUF_TAGGED;
4409 retval = IP_FW_PASS;
4413 panic("-- unknown opcode %d", cmd->opcode);
4414 } /* end of switch() on opcodes */
4416 if (cmd->len & F_NOT)
4420 if (cmd->len & F_OR)
4423 if (!(cmd->len & F_OR)) /* not an OR block, */
4424 break; /* try next rule */
4427 } /* end of inner for, scan opcodes */
4429 next_rule:; /* try next rule */
4431 } /* end of outer for, scan rules */
4432 kprintf("+++ ipfw: ouch!, skip past end of rules, denying packet\n");
4436 /* Update statistics */
4438 f->bcnt += lc.ip_len;
4439 f->timestamp = time_second;
4444 kprintf("pullup failed\n");
4448 static struct mbuf *
4449 ipfw_dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa)
4454 const struct ipfw_flow_id *id;
4455 struct dn_flow_id *fid;
4459 mtag = m_tag_get(PACKET_TAG_DUMMYNET, sizeof(*pkt),
4460 M_INTWAIT | M_NULLOK);
4465 m_tag_prepend(m, mtag);
4467 pkt = m_tag_data(mtag);
4468 bzero(pkt, sizeof(*pkt));
4470 cmd = fwa->rule->cmd + fwa->rule->act_ofs;
4471 if (cmd->opcode == O_LOG)
4473 KASSERT(cmd->opcode == O_PIPE || cmd->opcode == O_QUEUE,
4474 ("Rule is not PIPE or QUEUE, opcode %d", cmd->opcode));
4477 pkt->dn_flags = (dir & DN_FLAGS_DIR_MASK);
4478 pkt->ifp = fwa->oif;
4479 pkt->pipe_nr = pipe_nr;
4481 pkt->cpuid = mycpuid;
4482 pkt->msgport = netisr_curport();
4486 fid->fid_dst_ip = id->dst_ip;
4487 fid->fid_src_ip = id->src_ip;
4488 fid->fid_dst_port = id->dst_port;
4489 fid->fid_src_port = id->src_port;
4490 fid->fid_proto = id->proto;
4491 fid->fid_flags = id->flags;
4493 ipfw_ref_rule(fwa->rule);
4494 pkt->dn_priv = fwa->rule;
4495 pkt->dn_unref_priv = ipfw_unref_rule;
4497 if (cmd->opcode == O_PIPE)
4498 pkt->dn_flags |= DN_FLAGS_IS_PIPE;
4500 m->m_pkthdr.fw_flags |= DUMMYNET_MBUF_TAGGED;
4505 * When a rule is added/deleted, clear the next_rule pointers in all rules.
4506 * These will be reconstructed on the fly as packets are matched.
4509 ipfw_flush_rule_ptrs(struct ipfw_context *ctx)
4513 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next)
4514 rule->next_rule = NULL;
4518 ipfw_inc_static_count(struct ip_fw *rule)
4520 /* Static rule's counts are updated only on CPU0 */
4521 KKASSERT(mycpuid == 0);
4524 static_ioc_len += IOC_RULESIZE(rule);
4528 ipfw_dec_static_count(struct ip_fw *rule)
4530 int l = IOC_RULESIZE(rule);
4532 /* Static rule's counts are updated only on CPU0 */
4533 KKASSERT(mycpuid == 0);
4535 KASSERT(static_count > 0, ("invalid static count %u", static_count));
4538 KASSERT(static_ioc_len >= l,
4539 ("invalid static len %u", static_ioc_len));
4540 static_ioc_len -= l;
4544 ipfw_link_sibling(struct netmsg_ipfw *fwmsg, struct ip_fw *rule)
4546 if (fwmsg->sibling != NULL) {
4547 KKASSERT(mycpuid > 0 && fwmsg->sibling->cpuid == mycpuid - 1);
4548 fwmsg->sibling->sibling = rule;
4550 fwmsg->sibling = rule;
4553 static struct ip_fw *
4554 ipfw_create_rule(const struct ipfw_ioc_rule *ioc_rule, uint32_t rule_flags)
4558 rule = kmalloc(RULESIZE(ioc_rule), M_IPFW, M_WAITOK | M_ZERO);
4560 rule->act_ofs = ioc_rule->act_ofs;
4561 rule->cmd_len = ioc_rule->cmd_len;
4562 rule->rulenum = ioc_rule->rulenum;
4563 rule->set = ioc_rule->set;
4564 rule->usr_flags = ioc_rule->usr_flags;
4566 bcopy(ioc_rule->cmd, rule->cmd, rule->cmd_len * 4 /* XXX */);
4569 rule->cpuid = mycpuid;
4570 rule->rule_flags = rule_flags;
4576 ipfw_add_rule_dispatch(netmsg_t nmsg)
4578 struct netmsg_ipfw *fwmsg = (struct netmsg_ipfw *)nmsg;
4579 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
4582 ASSERT_NETISR_NCPUS(mycpuid);
4584 rule = ipfw_create_rule(fwmsg->ioc_rule, fwmsg->rule_flags);
4587 * Insert rule into the pre-determined position
4589 if (fwmsg->prev_rule != NULL) {
4590 struct ip_fw *prev, *next;
4592 prev = fwmsg->prev_rule;
4593 KKASSERT(prev->cpuid == mycpuid);
4595 next = fwmsg->next_rule;
4596 KKASSERT(next->cpuid == mycpuid);
4602 * Move to the position on the next CPU
4603 * before the msg is forwarded.
4605 fwmsg->prev_rule = prev->sibling;
4606 fwmsg->next_rule = next->sibling;
4608 KKASSERT(fwmsg->next_rule == NULL);
4609 rule->next = ctx->ipfw_layer3_chain;
4610 ctx->ipfw_layer3_chain = rule;
4613 /* Link rule CPU sibling */
4614 ipfw_link_sibling(fwmsg, rule);
4616 ipfw_flush_rule_ptrs(ctx);
4619 /* Statistics only need to be updated once */
4620 ipfw_inc_static_count(rule);
4622 /* Return the rule on CPU0 */
4623 nmsg->lmsg.u.ms_resultp = rule;
4626 if (rule->rule_flags & IPFW_RULE_F_GENTRACK)
4627 rule->track_ruleid = (uintptr_t)nmsg->lmsg.u.ms_resultp;
4629 if (fwmsg->cross_rules != NULL) {
4630 /* Save rules for later use. */
4631 fwmsg->cross_rules[mycpuid] = rule;
4634 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
4638 ipfw_crossref_rule_dispatch(netmsg_t nmsg)
4640 struct netmsg_ipfw *fwmsg = (struct netmsg_ipfw *)nmsg;
4641 struct ip_fw *rule = fwmsg->sibling;
4642 int sz = sizeof(struct ip_fw *) * netisr_ncpus;
4644 ASSERT_NETISR_NCPUS(mycpuid);
4645 KASSERT(rule->rule_flags & IPFW_RULE_F_CROSSREF,
4646 ("not crossref rule"));
4648 rule->cross_rules = kmalloc(sz, M_IPFW, M_WAITOK);
4649 memcpy(rule->cross_rules, fwmsg->cross_rules, sz);
4651 fwmsg->sibling = rule->sibling;
4652 netisr_forwardmsg(&fwmsg->base, mycpuid + 1);
4656 * Add a new rule to the list. Copy the rule into a malloc'ed area,
4657 * then possibly create a rule number and add the rule to the list.
4658 * Update the rule_number in the input struct so the caller knows
4662 ipfw_add_rule(struct ipfw_ioc_rule *ioc_rule, uint32_t rule_flags)
4664 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
4665 struct netmsg_ipfw fwmsg;
4666 struct ip_fw *f, *prev, *rule;
4671 * If rulenum is 0, find highest numbered rule before the
4672 * default rule, and add rule number incremental step.
4674 if (ioc_rule->rulenum == 0) {
4675 int step = autoinc_step;
4677 KKASSERT(step >= IPFW_AUTOINC_STEP_MIN &&
4678 step <= IPFW_AUTOINC_STEP_MAX);
4681 * Locate the highest numbered rule before default
4683 for (f = ctx->ipfw_layer3_chain; f; f = f->next) {
4684 if (f->rulenum == IPFW_DEFAULT_RULE)
4686 ioc_rule->rulenum = f->rulenum;
4688 if (ioc_rule->rulenum < IPFW_DEFAULT_RULE - step)
4689 ioc_rule->rulenum += step;
4691 KASSERT(ioc_rule->rulenum != IPFW_DEFAULT_RULE &&
4692 ioc_rule->rulenum != 0,
4693 ("invalid rule num %d", ioc_rule->rulenum));
4696 * Now find the right place for the new rule in the sorted list.
4698 for (prev = NULL, f = ctx->ipfw_layer3_chain; f;
4699 prev = f, f = f->next) {
4700 if (f->rulenum > ioc_rule->rulenum) {
4701 /* Found the location */
4705 KASSERT(f != NULL, ("no default rule?!"));
4708 * Duplicate the rule onto each CPU.
4709 * The rule duplicated on CPU0 will be returned.
4711 bzero(&fwmsg, sizeof(fwmsg));
4712 netmsg_init(&fwmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY,
4713 ipfw_add_rule_dispatch);
4714 fwmsg.ioc_rule = ioc_rule;
4715 fwmsg.prev_rule = prev;
4716 fwmsg.next_rule = prev == NULL ? NULL : f;
4717 fwmsg.rule_flags = rule_flags;
4718 if (rule_flags & IPFW_RULE_F_CROSSREF) {
4719 fwmsg.cross_rules = kmalloc(
4720 sizeof(struct ip_fw *) * netisr_ncpus, M_TEMP,
4724 netisr_domsg_global(&fwmsg.base);
4725 KKASSERT(fwmsg.prev_rule == NULL && fwmsg.next_rule == NULL);
4727 rule = fwmsg.base.lmsg.u.ms_resultp;
4728 KKASSERT(rule != NULL && rule->cpuid == mycpuid);
4730 if (fwmsg.cross_rules != NULL) {
4731 netmsg_init(&fwmsg.base, NULL, &curthread->td_msgport,
4732 MSGF_PRIORITY, ipfw_crossref_rule_dispatch);
4733 fwmsg.sibling = rule;
4734 netisr_domsg_global(&fwmsg.base);
4735 KKASSERT(fwmsg.sibling == NULL);
4737 kfree(fwmsg.cross_rules, M_TEMP);
4740 atomic_add_int(&ipfw_gd.ipfw_refcnt, 1);
4744 DPRINTF("++ installed rule %d, static count now %d\n",
4745 rule->rulenum, static_count);
4749 * Free storage associated with a static rule (including derived
4751 * The caller is in charge of clearing rule pointers to avoid
4752 * dangling pointers.
4753 * @return a pointer to the next entry.
4754 * Arguments are not checked, so they better be correct.
4756 static struct ip_fw *
4757 ipfw_delete_rule(struct ipfw_context *ctx,
4758 struct ip_fw *prev, struct ip_fw *rule)
4764 ctx->ipfw_layer3_chain = n;
4768 /* Mark the rule as invalid */
4769 rule->rule_flags |= IPFW_RULE_F_INVALID;
4770 rule->next_rule = NULL;
4771 rule->sibling = NULL;
4773 /* Don't reset cpuid here; keep various assertion working */
4777 /* Statistics only need to be updated once */
4779 ipfw_dec_static_count(rule);
4781 if ((rule->rule_flags & IPFW_RULE_F_CROSSREF) == 0) {
4782 /* Try to free this rule */
4783 ipfw_free_rule(rule);
4785 /* TODO: check staging area. */
4787 rule->next = ipfw_gd.ipfw_crossref_free;
4788 ipfw_gd.ipfw_crossref_free = rule;
4792 /* Return the next rule */
4797 ipfw_flush_dispatch(netmsg_t nmsg)
4799 int kill_default = nmsg->lmsg.u.ms_result;
4800 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
4803 ASSERT_NETISR_NCPUS(mycpuid);
4808 ipfw_state_flush(ctx, NULL);
4809 KASSERT(ctx->ipfw_state_cnt == 0,
4810 ("%d pcpu states remain", ctx->ipfw_state_cnt));
4811 ctx->ipfw_state_loosecnt = 0;
4812 ctx->ipfw_state_lastexp = 0;
4817 ipfw_track_flush(ctx, NULL);
4818 ctx->ipfw_track_lastexp = 0;
4819 if (ctx->ipfw_trkcnt_spare != NULL) {
4820 kfree(ctx->ipfw_trkcnt_spare, M_IPFW);
4821 ctx->ipfw_trkcnt_spare = NULL;
4824 ipfw_flush_rule_ptrs(ctx); /* more efficient to do outside the loop */
4826 while ((rule = ctx->ipfw_layer3_chain) != NULL &&
4827 (kill_default || rule->rulenum != IPFW_DEFAULT_RULE))
4828 ipfw_delete_rule(ctx, NULL, rule);
4830 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
4834 * Deletes all rules from a chain (including the default rule
4835 * if the second argument is set).
4838 ipfw_flush(int kill_default)
4840 struct netmsg_base nmsg;
4842 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
4849 * If 'kill_default' then caller has done the necessary
4850 * msgport syncing; unnecessary to do it again.
4852 if (!kill_default) {
4854 * Let ipfw_chk() know the rules are going to
4855 * be flushed, so it could jump directly to
4859 /* XXX use priority sync */
4860 netmsg_service_sync();
4864 * Press the 'flush' button
4866 bzero(&nmsg, sizeof(nmsg));
4867 netmsg_init(&nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY,
4868 ipfw_flush_dispatch);
4869 nmsg.lmsg.u.ms_result = kill_default;
4870 netisr_domsg_global(&nmsg);
4871 ipfw_gd.ipfw_state_loosecnt = 0;
4872 ipfw_gd.ipfw_state_globexp = 0;
4873 ipfw_gd.ipfw_track_globexp = 0;
4876 state_cnt = ipfw_state_cntcoll();
4877 KASSERT(state_cnt == 0, ("%d states remain", state_cnt));
4879 KASSERT(ipfw_gd.ipfw_trkcnt_cnt == 0,
4880 ("%d trkcnts remain", ipfw_gd.ipfw_trkcnt_cnt));
4883 KASSERT(static_count == 0,
4884 ("%u static rules remain", static_count));
4885 KASSERT(static_ioc_len == 0,
4886 ("%u bytes of static rules remain", static_ioc_len));
4888 KASSERT(static_count == 1,
4889 ("%u static rules remain", static_count));
4890 KASSERT(static_ioc_len == IOC_RULESIZE(ctx->ipfw_default_rule),
4891 ("%u bytes of static rules remain, should be %lu",
4893 (u_long)IOC_RULESIZE(ctx->ipfw_default_rule)));
4902 ipfw_alt_delete_rule_dispatch(netmsg_t nmsg)
4904 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg;
4905 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
4906 struct ip_fw *rule, *prev;
4908 ASSERT_NETISR_NCPUS(mycpuid);
4910 rule = dmsg->start_rule;
4911 KKASSERT(rule->cpuid == mycpuid);
4912 dmsg->start_rule = rule->sibling;
4914 prev = dmsg->prev_rule;
4916 KKASSERT(prev->cpuid == mycpuid);
4919 * Move to the position on the next CPU
4920 * before the msg is forwarded.
4922 dmsg->prev_rule = prev->sibling;
4926 * flush pointers outside the loop, then delete all matching
4927 * rules. 'prev' remains the same throughout the cycle.
4929 ipfw_flush_rule_ptrs(ctx);
4930 while (rule && rule->rulenum == dmsg->rulenum) {
4931 if (rule->rule_flags & IPFW_RULE_F_GENSTATE) {
4932 /* Flush states generated by this rule. */
4933 ipfw_state_flush(ctx, rule);
4935 if (rule->rule_flags & IPFW_RULE_F_GENTRACK) {
4936 /* Flush tracks generated by this rule. */
4937 ipfw_track_flush(ctx, rule);
4939 rule = ipfw_delete_rule(ctx, prev, rule);
4942 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
4946 ipfw_alt_delete_rule(uint16_t rulenum)
4948 struct ip_fw *prev, *rule;
4949 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
4950 struct netmsg_del dmsg;
4955 * Locate first rule to delete
4957 for (prev = NULL, rule = ctx->ipfw_layer3_chain;
4958 rule && rule->rulenum < rulenum;
4959 prev = rule, rule = rule->next)
4961 if (rule->rulenum != rulenum)
4965 * Get rid of the rule duplications on all CPUs
4967 bzero(&dmsg, sizeof(dmsg));
4968 netmsg_init(&dmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY,
4969 ipfw_alt_delete_rule_dispatch);
4970 dmsg.prev_rule = prev;
4971 dmsg.start_rule = rule;
4972 dmsg.rulenum = rulenum;
4974 netisr_domsg_global(&dmsg.base);
4975 KKASSERT(dmsg.prev_rule == NULL && dmsg.start_rule == NULL);
4980 ipfw_alt_delete_ruleset_dispatch(netmsg_t nmsg)
4982 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg;
4983 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
4984 struct ip_fw *prev, *rule;
4989 ASSERT_NETISR_NCPUS(mycpuid);
4991 ipfw_flush_rule_ptrs(ctx);
4994 rule = ctx->ipfw_layer3_chain;
4995 while (rule != NULL) {
4996 if (rule->set == dmsg->from_set) {
4997 if (rule->rule_flags & IPFW_RULE_F_GENSTATE) {
4998 /* Flush states generated by this rule. */
4999 ipfw_state_flush(ctx, rule);
5001 if (rule->rule_flags & IPFW_RULE_F_GENTRACK) {
5002 /* Flush tracks generated by this rule. */
5003 ipfw_track_flush(ctx, rule);
5005 rule = ipfw_delete_rule(ctx, prev, rule);
5014 KASSERT(del, ("no match set?!"));
5016 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
5020 ipfw_alt_delete_ruleset(uint8_t set)
5022 struct netmsg_del dmsg;
5025 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5030 * Check whether the 'set' exists. If it exists,
5031 * then check whether any rules within the set will
5032 * try to create states.
5035 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) {
5036 if (rule->set == set)
5040 return 0; /* XXX EINVAL? */
5045 bzero(&dmsg, sizeof(dmsg));
5046 netmsg_init(&dmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY,
5047 ipfw_alt_delete_ruleset_dispatch);
5048 dmsg.from_set = set;
5049 netisr_domsg_global(&dmsg.base);
5055 ipfw_alt_move_rule_dispatch(netmsg_t nmsg)
5057 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg;
5060 ASSERT_NETISR_NCPUS(mycpuid);
5062 rule = dmsg->start_rule;
5063 KKASSERT(rule->cpuid == mycpuid);
5066 * Move to the position on the next CPU
5067 * before the msg is forwarded.
5069 dmsg->start_rule = rule->sibling;
5071 while (rule && rule->rulenum <= dmsg->rulenum) {
5072 if (rule->rulenum == dmsg->rulenum)
5073 rule->set = dmsg->to_set;
5076 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
5080 ipfw_alt_move_rule(uint16_t rulenum, uint8_t set)
5082 struct netmsg_del dmsg;
5083 struct netmsg_base *nmsg;
5085 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5090 * Locate first rule to move
5092 for (rule = ctx->ipfw_layer3_chain; rule && rule->rulenum <= rulenum;
5093 rule = rule->next) {
5094 if (rule->rulenum == rulenum && rule->set != set)
5097 if (rule == NULL || rule->rulenum > rulenum)
5098 return 0; /* XXX error? */
5100 bzero(&dmsg, sizeof(dmsg));
5102 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY,
5103 ipfw_alt_move_rule_dispatch);
5104 dmsg.start_rule = rule;
5105 dmsg.rulenum = rulenum;
5108 netisr_domsg_global(nmsg);
5109 KKASSERT(dmsg.start_rule == NULL);
5114 ipfw_alt_move_ruleset_dispatch(netmsg_t nmsg)
5116 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg;
5117 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5120 ASSERT_NETISR_NCPUS(mycpuid);
5122 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) {
5123 if (rule->set == dmsg->from_set)
5124 rule->set = dmsg->to_set;
5126 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
5130 ipfw_alt_move_ruleset(uint8_t from_set, uint8_t to_set)
5132 struct netmsg_del dmsg;
5133 struct netmsg_base *nmsg;
5137 bzero(&dmsg, sizeof(dmsg));
5139 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY,
5140 ipfw_alt_move_ruleset_dispatch);
5141 dmsg.from_set = from_set;
5142 dmsg.to_set = to_set;
5144 netisr_domsg_global(nmsg);
5149 ipfw_alt_swap_ruleset_dispatch(netmsg_t nmsg)
5151 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg;
5152 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5155 ASSERT_NETISR_NCPUS(mycpuid);
5157 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) {
5158 if (rule->set == dmsg->from_set)
5159 rule->set = dmsg->to_set;
5160 else if (rule->set == dmsg->to_set)
5161 rule->set = dmsg->from_set;
5163 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
5167 ipfw_alt_swap_ruleset(uint8_t set1, uint8_t set2)
5169 struct netmsg_del dmsg;
5170 struct netmsg_base *nmsg;
5174 bzero(&dmsg, sizeof(dmsg));
5176 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY,
5177 ipfw_alt_swap_ruleset_dispatch);
5178 dmsg.from_set = set1;
5181 netisr_domsg_global(nmsg);
5186 * Remove all rules with given number, and also do set manipulation.
5188 * The argument is an uint32_t. The low 16 bit are the rule or set number,
5189 * the next 8 bits are the new set, the top 8 bits are the command:
5191 * 0 delete rules with given number
5192 * 1 delete rules with given set number
5193 * 2 move rules with given number to new set
5194 * 3 move rules with given set number to new set
5195 * 4 swap sets with given numbers
5198 ipfw_ctl_alter(uint32_t arg)
5201 uint8_t cmd, new_set;
5206 rulenum = arg & 0xffff;
5207 cmd = (arg >> 24) & 0xff;
5208 new_set = (arg >> 16) & 0xff;
5212 if (new_set >= IPFW_DEFAULT_SET)
5214 if (cmd == 0 || cmd == 2) {
5215 if (rulenum == IPFW_DEFAULT_RULE)
5218 if (rulenum >= IPFW_DEFAULT_SET)
5223 case 0: /* delete rules with given number */
5224 error = ipfw_alt_delete_rule(rulenum);
5227 case 1: /* delete all rules with given set number */
5228 error = ipfw_alt_delete_ruleset(rulenum);
5231 case 2: /* move rules with given number to new set */
5232 error = ipfw_alt_move_rule(rulenum, new_set);
5235 case 3: /* move rules with given set number to new set */
5236 error = ipfw_alt_move_ruleset(rulenum, new_set);
5239 case 4: /* swap two sets */
5240 error = ipfw_alt_swap_ruleset(rulenum, new_set);
5247 * Clear counters for a specific rule.
5250 clear_counters(struct ip_fw *rule, int log_only)
5252 ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule);
5254 if (log_only == 0) {
5255 rule->bcnt = rule->pcnt = 0;
5256 rule->timestamp = 0;
5258 if (l->o.opcode == O_LOG)
5259 l->log_left = l->max_log;
5263 ipfw_zero_entry_dispatch(netmsg_t nmsg)
5265 struct netmsg_zent *zmsg = (struct netmsg_zent *)nmsg;
5266 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5269 ASSERT_NETISR_NCPUS(mycpuid);
5271 if (zmsg->rulenum == 0) {
5272 KKASSERT(zmsg->start_rule == NULL);
5274 ctx->ipfw_norule_counter = 0;
5275 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next)
5276 clear_counters(rule, zmsg->log_only);
5278 struct ip_fw *start = zmsg->start_rule;
5280 KKASSERT(start->cpuid == mycpuid);
5281 KKASSERT(start->rulenum == zmsg->rulenum);
5284 * We can have multiple rules with the same number, so we
5285 * need to clear them all.
5287 for (rule = start; rule && rule->rulenum == zmsg->rulenum;
5289 clear_counters(rule, zmsg->log_only);
5292 * Move to the position on the next CPU
5293 * before the msg is forwarded.
5295 zmsg->start_rule = start->sibling;
5297 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
5301 * Reset some or all counters on firewall rules.
5302 * @arg frwl is null to clear all entries, or contains a specific
5304 * @arg log_only is 1 if we only want to reset logs, zero otherwise.
5307 ipfw_ctl_zero_entry(int rulenum, int log_only)
5309 struct netmsg_zent zmsg;
5310 struct netmsg_base *nmsg;
5312 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5316 bzero(&zmsg, sizeof(zmsg));
5318 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY,
5319 ipfw_zero_entry_dispatch);
5320 zmsg.log_only = log_only;
5323 msg = log_only ? "ipfw: All logging counts reset.\n"
5324 : "ipfw: Accounting cleared.\n";
5329 * Locate the first rule with 'rulenum'
5331 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) {
5332 if (rule->rulenum == rulenum)
5335 if (rule == NULL) /* we did not find any matching rules */
5337 zmsg.start_rule = rule;
5338 zmsg.rulenum = rulenum;
5340 msg = log_only ? "ipfw: Entry %d logging count reset.\n"
5341 : "ipfw: Entry %d cleared.\n";
5343 netisr_domsg_global(nmsg);
5344 KKASSERT(zmsg.start_rule == NULL);
5347 log(LOG_SECURITY | LOG_NOTICE, msg, rulenum);
5352 * Check validity of the structure before insert.
5353 * Fortunately rules are simple, so this mostly need to check rule sizes.
5356 ipfw_check_ioc_rule(struct ipfw_ioc_rule *rule, int size, uint32_t *rule_flags)
5359 int have_action = 0;
5364 /* Check for valid size */
5365 if (size < sizeof(*rule)) {
5366 kprintf("ipfw: rule too short\n");
5369 l = IOC_RULESIZE(rule);
5371 kprintf("ipfw: size mismatch (have %d want %d)\n", size, l);
5375 /* Check rule number */
5376 if (rule->rulenum == IPFW_DEFAULT_RULE) {
5377 kprintf("ipfw: invalid rule number\n");
5382 * Now go for the individual checks. Very simple ones, basically only
5383 * instruction sizes.
5385 for (l = rule->cmd_len, cmd = rule->cmd; l > 0;
5386 l -= cmdlen, cmd += cmdlen) {
5387 cmdlen = F_LEN(cmd);
5389 kprintf("ipfw: opcode %d size truncated\n",
5394 DPRINTF("ipfw: opcode %d\n", cmd->opcode);
5396 if (cmd->opcode == O_KEEP_STATE || cmd->opcode == O_LIMIT ||
5397 IPFW_ISXLAT(cmd->opcode)) {
5398 /* This rule will generate states. */
5399 *rule_flags |= IPFW_RULE_F_GENSTATE;
5400 if (cmd->opcode == O_LIMIT)
5401 *rule_flags |= IPFW_RULE_F_GENTRACK;
5403 if (cmd->opcode == O_DEFRAG || IPFW_ISXLAT(cmd->opcode))
5404 *rule_flags |= IPFW_RULE_F_CROSSREF;
5405 if (cmd->opcode == O_IP_SRC_IFIP ||
5406 cmd->opcode == O_IP_DST_IFIP) {
5407 *rule_flags |= IPFW_RULE_F_DYNIFADDR;
5408 cmd->arg1 &= IPFW_IFIP_SETTINGS;
5411 switch (cmd->opcode) {
5426 case O_IPPRECEDENCE:
5433 if (cmdlen != F_INSN_SIZE(ipfw_insn))
5437 case O_IP_SRC_TABLE:
5438 case O_IP_DST_TABLE:
5439 if (cmdlen != F_INSN_SIZE(ipfw_insn))
5441 if (cmd->arg1 >= ipfw_table_max) {
5442 kprintf("ipfw: invalid table id %u, max %d\n",
5443 cmd->arg1, ipfw_table_max);
5450 if (cmdlen != F_INSN_SIZE(ipfw_insn_ifip))
5456 if (cmdlen < F_INSN_SIZE(ipfw_insn_u32))
5467 if (cmdlen != F_INSN_SIZE(ipfw_insn_u32))
5472 if (cmdlen != F_INSN_SIZE(ipfw_insn_limit))
5476 if (cmdlen != F_INSN_SIZE(ipfw_insn_rdr))
5481 if (cmdlen != F_INSN_SIZE(ipfw_insn_log))
5484 ((ipfw_insn_log *)cmd)->log_left =
5485 ((ipfw_insn_log *)cmd)->max_log;
5491 if (cmdlen != F_INSN_SIZE(ipfw_insn_ip))
5493 if (((ipfw_insn_ip *)cmd)->mask.s_addr == 0) {
5494 kprintf("ipfw: opcode %d, useless rule\n",
5502 if (cmd->arg1 == 0 || cmd->arg1 > 256) {
5503 kprintf("ipfw: invalid set size %d\n",
5507 if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) +
5513 if (cmdlen != F_INSN_SIZE(ipfw_insn_mac))
5519 case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */
5520 if (cmdlen < 2 || cmdlen > 31)
5527 if (cmdlen != F_INSN_SIZE(ipfw_insn_if))
5533 if (cmdlen != F_INSN_SIZE(ipfw_insn_pipe))
5538 if (cmdlen != F_INSN_SIZE(ipfw_insn_sa)) {
5543 fwd_addr = ((ipfw_insn_sa *)cmd)->
5545 if (IN_MULTICAST(ntohl(fwd_addr))) {
5546 kprintf("ipfw: try forwarding to "
5547 "multicast address\n");
5553 case O_FORWARD_MAC: /* XXX not implemented yet */
5563 if (cmdlen != F_INSN_SIZE(ipfw_insn))
5567 kprintf("ipfw: opcode %d, multiple actions"
5574 kprintf("ipfw: opcode %d, action must be"
5581 kprintf("ipfw: opcode %d, unknown opcode\n",
5586 if (have_action == 0) {
5587 kprintf("ipfw: missing action\n");
5593 kprintf("ipfw: opcode %d size %d wrong\n",
5594 cmd->opcode, cmdlen);
5599 ipfw_ctl_add_rule(struct sockopt *sopt)
5601 struct ipfw_ioc_rule *ioc_rule;
5603 uint32_t rule_flags;
5608 size = sopt->sopt_valsize;
5609 if (size > (sizeof(uint32_t) * IPFW_RULE_SIZE_MAX) ||
5610 size < sizeof(*ioc_rule)) {
5613 if (size != (sizeof(uint32_t) * IPFW_RULE_SIZE_MAX)) {
5614 sopt->sopt_val = krealloc(sopt->sopt_val, sizeof(uint32_t) *
5615 IPFW_RULE_SIZE_MAX, M_TEMP, M_WAITOK);
5617 ioc_rule = sopt->sopt_val;
5619 error = ipfw_check_ioc_rule(ioc_rule, size, &rule_flags);
5623 ipfw_add_rule(ioc_rule, rule_flags);
5625 if (sopt->sopt_dir == SOPT_GET)
5626 sopt->sopt_valsize = IOC_RULESIZE(ioc_rule);
5631 ipfw_copy_rule(const struct ipfw_context *ctx, const struct ip_fw *rule,
5632 struct ipfw_ioc_rule *ioc_rule)
5634 const struct ip_fw *sibling;
5640 KASSERT(rule->cpuid == 0, ("rule does not belong to cpu0"));
5642 ioc_rule->act_ofs = rule->act_ofs;
5643 ioc_rule->cmd_len = rule->cmd_len;
5644 ioc_rule->rulenum = rule->rulenum;
5645 ioc_rule->set = rule->set;
5646 ioc_rule->usr_flags = rule->usr_flags;
5648 ioc_rule->set_disable = ctx->ipfw_set_disable;
5649 ioc_rule->static_count = static_count;
5650 ioc_rule->static_len = static_ioc_len;
5653 * Visit (read-only) all of the rule's duplications to get
5654 * the necessary statistics
5661 ioc_rule->timestamp = 0;
5662 for (sibling = rule; sibling != NULL; sibling = sibling->sibling) {
5663 ioc_rule->pcnt += sibling->pcnt;
5664 ioc_rule->bcnt += sibling->bcnt;
5665 if (sibling->timestamp > ioc_rule->timestamp)
5666 ioc_rule->timestamp = sibling->timestamp;
5671 KASSERT(i == netisr_ncpus,
5672 ("static rule is not duplicated on netisr_ncpus %d", netisr_ncpus));
5674 bcopy(rule->cmd, ioc_rule->cmd, ioc_rule->cmd_len * 4 /* XXX */);
5676 return ((uint8_t *)ioc_rule + IOC_RULESIZE(ioc_rule));
5680 ipfw_track_copy(const struct ipfw_trkcnt *trk, struct ipfw_ioc_state *ioc_state)
5682 struct ipfw_ioc_flowid *ioc_id;
5684 if (trk->tc_expire == 0) {
5685 /* Not a scanned one. */
5689 ioc_state->expire = TIME_LEQ(trk->tc_expire, time_uptime) ?
5690 0 : trk->tc_expire - time_uptime;
5691 ioc_state->pcnt = 0;
5692 ioc_state->bcnt = 0;
5694 ioc_state->dyn_type = O_LIMIT_PARENT;
5695 ioc_state->count = trk->tc_count;
5697 ioc_state->rulenum = trk->tc_rulenum;
5699 ioc_id = &ioc_state->id;
5700 ioc_id->type = ETHERTYPE_IP;
5701 ioc_id->u.ip.proto = trk->tc_proto;
5702 ioc_id->u.ip.src_ip = trk->tc_saddr;
5703 ioc_id->u.ip.dst_ip = trk->tc_daddr;
5704 ioc_id->u.ip.src_port = trk->tc_sport;
5705 ioc_id->u.ip.dst_port = trk->tc_dport;
5711 ipfw_state_copy(const struct ipfw_state *s, struct ipfw_ioc_state *ioc_state)
5713 struct ipfw_ioc_flowid *ioc_id;
5715 if (IPFW_STATE_SCANSKIP(s))
5718 ioc_state->expire = TIME_LEQ(s->st_expire, time_uptime) ?
5719 0 : s->st_expire - time_uptime;
5720 ioc_state->pcnt = s->st_pcnt;
5721 ioc_state->bcnt = s->st_bcnt;
5723 ioc_state->dyn_type = s->st_type;
5724 ioc_state->count = 0;
5726 ioc_state->rulenum = s->st_rule->rulenum;
5728 ioc_id = &ioc_state->id;
5729 ioc_id->type = ETHERTYPE_IP;
5730 ioc_id->u.ip.proto = s->st_proto;
5731 ipfw_key_4tuple(&s->st_key,
5732 &ioc_id->u.ip.src_ip, &ioc_id->u.ip.src_port,
5733 &ioc_id->u.ip.dst_ip, &ioc_id->u.ip.dst_port);
5735 if (IPFW_ISXLAT(s->st_type)) {
5736 const struct ipfw_xlat *x = (const struct ipfw_xlat *)s;
5738 if (x->xlat_port == 0)
5739 ioc_state->xlat_port = ioc_id->u.ip.dst_port;
5741 ioc_state->xlat_port = ntohs(x->xlat_port);
5742 ioc_state->xlat_addr = ntohl(x->xlat_addr);
5744 ioc_state->pcnt += x->xlat_pair->xlat_pcnt;
5745 ioc_state->bcnt += x->xlat_pair->xlat_bcnt;
5752 ipfw_state_copy_dispatch(netmsg_t nmsg)
5754 struct netmsg_cpstate *nm = (struct netmsg_cpstate *)nmsg;
5755 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5756 const struct ipfw_state *s;
5757 const struct ipfw_track *t;
5759 ASSERT_NETISR_NCPUS(mycpuid);
5760 KASSERT(nm->state_cnt < nm->state_cntmax,
5761 ("invalid state count %d, max %d",
5762 nm->state_cnt, nm->state_cntmax));
5764 TAILQ_FOREACH(s, &ctx->ipfw_state_list, st_link) {
5765 if (ipfw_state_copy(s, nm->ioc_state)) {
5768 if (nm->state_cnt == nm->state_cntmax)
5774 * Prepare tracks in the global track tree for userland.
5776 TAILQ_FOREACH(t, &ctx->ipfw_track_list, t_link) {
5777 struct ipfw_trkcnt *trk;
5779 if (t->t_count == NULL) /* anchor */
5784 * Only one netisr can run this function at
5785 * any time, and only this function accesses
5786 * trkcnt's tc_expire, so this is safe w/o
5787 * ipfw_gd.ipfw_trkcnt_token.
5789 if (trk->tc_expire > t->t_expire)
5791 trk->tc_expire = t->t_expire;
5795 * Copy tracks in the global track tree to userland in
5798 if (mycpuid == netisr_ncpus - 1) {
5799 struct ipfw_trkcnt *trk;
5801 KASSERT(nm->state_cnt < nm->state_cntmax,
5802 ("invalid state count %d, max %d",
5803 nm->state_cnt, nm->state_cntmax));
5806 RB_FOREACH(trk, ipfw_trkcnt_tree, &ipfw_gd.ipfw_trkcnt_tree) {
5807 if (ipfw_track_copy(trk, nm->ioc_state)) {
5810 if (nm->state_cnt == nm->state_cntmax) {
5819 if (nm->state_cnt == nm->state_cntmax) {
5820 /* No more space; done. */
5821 netisr_replymsg(&nm->base, 0);
5823 netisr_forwardmsg(&nm->base, mycpuid + 1);
5828 ipfw_ctl_get_rules(struct sockopt *sopt)
5830 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5839 * pass up a copy of the current rules. Static rules
5840 * come first (the last of which has number IPFW_DEFAULT_RULE),
5841 * followed by a possibly empty list of states.
5844 size = static_ioc_len; /* size of static rules */
5847 * Size of the states.
5848 * XXX take tracks as state for userland compat.
5850 state_cnt = ipfw_state_cntcoll() + ipfw_gd.ipfw_trkcnt_cnt;
5851 state_cnt = (state_cnt * 5) / 4; /* leave 25% headroom */
5852 size += state_cnt * sizeof(struct ipfw_ioc_state);
5854 if (sopt->sopt_valsize < size) {
5855 /* short length, no need to return incomplete rules */
5856 /* XXX: if superuser, no need to zero buffer */
5857 bzero(sopt->sopt_val, sopt->sopt_valsize);
5860 bp = sopt->sopt_val;
5862 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next)
5863 bp = ipfw_copy_rule(ctx, rule, bp);
5866 struct netmsg_cpstate nm;
5868 size_t old_size = size;
5871 netmsg_init(&nm.base, NULL, &curthread->td_msgport,
5872 MSGF_PRIORITY, ipfw_state_copy_dispatch);
5874 nm.state_cntmax = state_cnt;
5876 netisr_domsg_global(&nm.base);
5879 * The # of states may be shrinked after the snapshot
5880 * of the state count was taken. To give user a correct
5881 * state count, nm->state_cnt is used to recalculate
5884 size = static_ioc_len +
5885 (nm.state_cnt * sizeof(struct ipfw_ioc_state));
5886 KKASSERT(size <= old_size);
5889 sopt->sopt_valsize = size;
5894 ipfw_set_disable_dispatch(netmsg_t nmsg)
5896 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5898 ASSERT_NETISR_NCPUS(mycpuid);
5900 ctx->ipfw_set_disable = nmsg->lmsg.u.ms_result32;
5901 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
5905 ipfw_ctl_set_disable(uint32_t disable, uint32_t enable)
5907 struct netmsg_base nmsg;
5908 uint32_t set_disable;
5912 /* IPFW_DEFAULT_SET is always enabled */
5913 enable |= (1 << IPFW_DEFAULT_SET);
5914 set_disable = (ipfw_ctx[mycpuid]->ipfw_set_disable | disable) & ~enable;
5916 bzero(&nmsg, sizeof(nmsg));
5917 netmsg_init(&nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY,
5918 ipfw_set_disable_dispatch);
5919 nmsg.lmsg.u.ms_result32 = set_disable;
5921 netisr_domsg_global(&nmsg);
5925 ipfw_table_create_dispatch(netmsg_t nm)
5927 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5928 int tblid = nm->lmsg.u.ms_result;
5930 ASSERT_NETISR_NCPUS(mycpuid);
5932 if (!rn_inithead((void **)&ctx->ipfw_tables[tblid],
5933 rn_cpumaskhead(mycpuid), 32))
5934 panic("ipfw: create table%d failed", tblid);
5936 netisr_forwardmsg(&nm->base, mycpuid + 1);
5940 ipfw_table_create(struct sockopt *sopt)
5942 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
5943 struct ipfw_ioc_table *tbl;
5944 struct netmsg_base nm;
5948 if (sopt->sopt_valsize != sizeof(*tbl))
5951 tbl = sopt->sopt_val;
5952 if (tbl->tableid < 0 || tbl->tableid >= ipfw_table_max)
5955 if (ctx->ipfw_tables[tbl->tableid] != NULL)
5958 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY,
5959 ipfw_table_create_dispatch);
5960 nm.lmsg.u.ms_result = tbl->tableid;
5961 netisr_domsg_global(&nm);
5967 ipfw_table_killrn(struct radix_node_head *rnh, struct radix_node *rn)
5969 struct radix_node *ret;
5971 ret = rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh);
5973 panic("deleted other table entry");
5978 ipfw_table_killent(struct radix_node *rn, void *xrnh)
5981 ipfw_table_killrn(xrnh, rn);
5986 ipfw_table_flush_oncpu(struct ipfw_context *ctx, int tableid,
5989 struct radix_node_head *rnh;
5991 ASSERT_NETISR_NCPUS(mycpuid);
5993 rnh = ctx->ipfw_tables[tableid];
5994 rnh->rnh_walktree(rnh, ipfw_table_killent, rnh);
5997 ctx->ipfw_tables[tableid] = NULL;
6002 ipfw_table_flush_dispatch(netmsg_t nmsg)
6004 struct netmsg_tblflush *nm = (struct netmsg_tblflush *)nmsg;
6005 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6007 ASSERT_NETISR_NCPUS(mycpuid);
6009 ipfw_table_flush_oncpu(ctx, nm->tableid, nm->destroy);
6010 netisr_forwardmsg(&nm->base, mycpuid + 1);
6014 ipfw_table_flushall_oncpu(struct ipfw_context *ctx, int destroy)
6018 ASSERT_NETISR_NCPUS(mycpuid);
6020 for (i = 0; i < ipfw_table_max; ++i) {
6021 if (ctx->ipfw_tables[i] != NULL)
6022 ipfw_table_flush_oncpu(ctx, i, destroy);
6027 ipfw_table_flushall_dispatch(netmsg_t nmsg)
6029 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6031 ASSERT_NETISR_NCPUS(mycpuid);
6033 ipfw_table_flushall_oncpu(ctx, 0);
6034 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
6038 ipfw_table_flush(struct sockopt *sopt)
6040 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6041 struct ipfw_ioc_table *tbl;
6042 struct netmsg_tblflush nm;
6046 if (sopt->sopt_valsize != sizeof(*tbl))
6049 tbl = sopt->sopt_val;
6050 if (sopt->sopt_name == IP_FW_TBL_FLUSH && tbl->tableid < 0) {
6051 netmsg_init(&nm.base, NULL, &curthread->td_msgport,
6052 MSGF_PRIORITY, ipfw_table_flushall_dispatch);
6053 netisr_domsg_global(&nm.base);
6057 if (tbl->tableid < 0 || tbl->tableid >= ipfw_table_max)
6060 if (ctx->ipfw_tables[tbl->tableid] == NULL)
6063 netmsg_init(&nm.base, NULL, &curthread->td_msgport, MSGF_PRIORITY,
6064 ipfw_table_flush_dispatch);
6065 nm.tableid = tbl->tableid;
6067 if (sopt->sopt_name == IP_FW_TBL_DESTROY)
6069 netisr_domsg_global(&nm.base);
6075 ipfw_table_cntent(struct radix_node *rn __unused, void *xcnt)
6084 ipfw_table_cpent(struct radix_node *rn, void *xcp)
6086 struct ipfw_table_cp *cp = xcp;
6087 struct ipfw_tblent *te = (struct ipfw_tblent *)rn;
6088 struct ipfw_ioc_tblent *ioc_te;
6093 KASSERT(cp->te_idx < cp->te_cnt, ("invalid table cp idx %d, cnt %d",
6094 cp->te_idx, cp->te_cnt));
6095 ioc_te = &cp->te[cp->te_idx];
6097 if (te->te_nodes->rn_mask != NULL) {
6098 memcpy(&ioc_te->netmask, te->te_nodes->rn_mask,
6099 *te->te_nodes->rn_mask);
6101 ioc_te->netmask.sin_len = 0;
6103 memcpy(&ioc_te->key, &te->te_key, sizeof(ioc_te->key));
6105 ioc_te->use = te->te_use;
6106 ioc_te->last_used = te->te_lastuse;
6111 while ((te = te->te_sibling) != NULL) {
6115 ioc_te->use += te->te_use;
6116 if (te->te_lastuse > ioc_te->last_used)
6117 ioc_te->last_used = te->te_lastuse;
6119 KASSERT(cnt == netisr_ncpus,
6120 ("invalid # of tblent %d, should be %d", cnt, netisr_ncpus));
6128 ipfw_table_get(struct sockopt *sopt)
6130 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6131 struct radix_node_head *rnh;
6132 struct ipfw_ioc_table *tbl;
6133 struct ipfw_ioc_tblcont *cont;
6134 struct ipfw_table_cp cp;
6139 if (sopt->sopt_valsize < sizeof(*tbl))
6142 tbl = sopt->sopt_val;
6143 if (tbl->tableid < 0) {
6144 struct ipfw_ioc_tbllist *list;
6148 * List available table ids.
6150 for (i = 0; i < ipfw_table_max; ++i) {
6151 if (ctx->ipfw_tables[i] != NULL)
6155 sz = __offsetof(struct ipfw_ioc_tbllist, tables[cnt]);
6156 if (sopt->sopt_valsize < sz) {
6157 bzero(sopt->sopt_val, sopt->sopt_valsize);
6160 list = sopt->sopt_val;
6161 list->tablecnt = cnt;
6164 for (i = 0; i < ipfw_table_max; ++i) {
6165 if (ctx->ipfw_tables[i] != NULL) {
6166 KASSERT(cnt < list->tablecnt,
6167 ("invalid idx %d, cnt %d",
6168 cnt, list->tablecnt));
6169 list->tables[cnt++] = i;
6172 sopt->sopt_valsize = sz;
6174 } else if (tbl->tableid >= ipfw_table_max) {
6178 rnh = ctx->ipfw_tables[tbl->tableid];
6181 rnh->rnh_walktree(rnh, ipfw_table_cntent, &cnt);
6183 sz = __offsetof(struct ipfw_ioc_tblcont, ent[cnt]);
6184 if (sopt->sopt_valsize < sz) {
6185 bzero(sopt->sopt_val, sopt->sopt_valsize);
6188 cont = sopt->sopt_val;
6194 rnh->rnh_walktree(rnh, ipfw_table_cpent, &cp);
6196 sopt->sopt_valsize = sz;
6201 ipfw_table_add_dispatch(netmsg_t nmsg)
6203 struct netmsg_tblent *nm = (struct netmsg_tblent *)nmsg;
6204 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6205 struct radix_node_head *rnh;
6206 struct ipfw_tblent *te;
6208 ASSERT_NETISR_NCPUS(mycpuid);
6210 rnh = ctx->ipfw_tables[nm->tableid];
6212 te = kmalloc(sizeof(*te), M_IPFW, M_WAITOK | M_ZERO);
6213 te->te_nodes->rn_key = (char *)&te->te_key;
6214 memcpy(&te->te_key, nm->key, sizeof(te->te_key));
6216 if (rnh->rnh_addaddr((char *)&te->te_key, (char *)nm->netmask, rnh,
6217 te->te_nodes) == NULL) {
6220 netisr_replymsg(&nm->base, EEXIST);
6223 panic("rnh_addaddr failed");
6226 /* Link siblings. */
6227 if (nm->sibling != NULL)
6228 nm->sibling->te_sibling = te;
6231 netisr_forwardmsg(&nm->base, mycpuid + 1);
6235 ipfw_table_del_dispatch(netmsg_t nmsg)
6237 struct netmsg_tblent *nm = (struct netmsg_tblent *)nmsg;
6238 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6239 struct radix_node_head *rnh;
6240 struct radix_node *rn;
6242 ASSERT_NETISR_NCPUS(mycpuid);
6244 rnh = ctx->ipfw_tables[nm->tableid];
6245 rn = rnh->rnh_deladdr((char *)nm->key, (char *)nm->netmask, rnh);
6248 netisr_replymsg(&nm->base, ESRCH);
6251 panic("rnh_deladdr failed");
6255 netisr_forwardmsg(&nm->base, mycpuid + 1);
6259 ipfw_table_alt(struct sockopt *sopt)
6261 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6262 struct ipfw_ioc_tblcont *tbl;
6263 struct ipfw_ioc_tblent *te;
6264 struct sockaddr_in key0;
6265 struct sockaddr *netmask = NULL, *key;
6266 struct netmsg_tblent nm;
6270 if (sopt->sopt_valsize != sizeof(*tbl))
6272 tbl = sopt->sopt_val;
6274 if (tbl->tableid < 0 || tbl->tableid >= ipfw_table_max)
6276 if (tbl->entcnt != 1)
6279 if (ctx->ipfw_tables[tbl->tableid] == NULL)
6283 if (te->key.sin_family != AF_INET ||
6284 te->key.sin_port != 0 ||
6285 te->key.sin_len != sizeof(struct sockaddr_in))
6287 key = (struct sockaddr *)&te->key;
6289 if (te->netmask.sin_len != 0) {
6290 if (te->netmask.sin_port != 0 ||
6291 te->netmask.sin_len > sizeof(struct sockaddr_in))
6293 netmask = (struct sockaddr *)&te->netmask;
6294 sa_maskedcopy(key, (struct sockaddr *)&key0, netmask);
6295 key = (struct sockaddr *)&key0;
6298 if (sopt->sopt_name == IP_FW_TBL_ADD) {
6299 netmsg_init(&nm.base, NULL, &curthread->td_msgport,
6300 MSGF_PRIORITY, ipfw_table_add_dispatch);
6302 netmsg_init(&nm.base, NULL, &curthread->td_msgport,
6303 MSGF_PRIORITY, ipfw_table_del_dispatch);
6306 nm.netmask = netmask;
6307 nm.tableid = tbl->tableid;
6309 return (netisr_domsg_global(&nm.base));
6313 ipfw_table_zeroent(struct radix_node *rn, void *arg __unused)
6315 struct ipfw_tblent *te = (struct ipfw_tblent *)rn;
6323 ipfw_table_zero_dispatch(netmsg_t nmsg)
6325 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6326 struct radix_node_head *rnh;
6328 ASSERT_NETISR_NCPUS(mycpuid);
6330 rnh = ctx->ipfw_tables[nmsg->lmsg.u.ms_result];
6331 rnh->rnh_walktree(rnh, ipfw_table_zeroent, NULL);
6333 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
6337 ipfw_table_zeroall_dispatch(netmsg_t nmsg)
6339 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6342 ASSERT_NETISR_NCPUS(mycpuid);
6344 for (i = 0; i < ipfw_table_max; ++i) {
6345 struct radix_node_head *rnh = ctx->ipfw_tables[i];
6348 rnh->rnh_walktree(rnh, ipfw_table_zeroent, NULL);
6350 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
6354 ipfw_table_zero(struct sockopt *sopt)
6356 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6357 struct netmsg_base nm;
6358 struct ipfw_ioc_table *tbl;
6362 if (sopt->sopt_valsize != sizeof(*tbl))
6364 tbl = sopt->sopt_val;
6366 if (tbl->tableid < 0) {
6367 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY,
6368 ipfw_table_zeroall_dispatch);
6369 netisr_domsg_global(&nm);
6371 } else if (tbl->tableid >= ipfw_table_max) {
6373 } else if (ctx->ipfw_tables[tbl->tableid] == NULL) {
6377 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY,
6378 ipfw_table_zero_dispatch);
6379 nm.lmsg.u.ms_result = tbl->tableid;
6380 netisr_domsg_global(&nm);
6386 ipfw_table_killexp(struct radix_node *rn, void *xnm)
6388 struct netmsg_tblexp *nm = xnm;
6389 struct ipfw_tblent *te = (struct ipfw_tblent *)rn;
6391 if (te->te_expired) {
6392 ipfw_table_killrn(nm->rnh, rn);
6399 ipfw_table_expire_dispatch(netmsg_t nmsg)
6401 struct netmsg_tblexp *nm = (struct netmsg_tblexp *)nmsg;
6402 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6403 struct radix_node_head *rnh;
6405 ASSERT_NETISR_NCPUS(mycpuid);
6407 rnh = ctx->ipfw_tables[nm->tableid];
6409 rnh->rnh_walktree(rnh, ipfw_table_killexp, nm);
6411 KASSERT(nm->expcnt == nm->cnt * (mycpuid + 1),
6412 ("not all expired addresses (%d) were deleted (%d)",
6413 nm->cnt * (mycpuid + 1), nm->expcnt));
6415 netisr_forwardmsg(&nm->base, mycpuid + 1);
6419 ipfw_table_expireall_dispatch(netmsg_t nmsg)
6421 struct netmsg_tblexp *nm = (struct netmsg_tblexp *)nmsg;
6422 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6425 ASSERT_NETISR_NCPUS(mycpuid);
6427 for (i = 0; i < ipfw_table_max; ++i) {
6428 struct radix_node_head *rnh = ctx->ipfw_tables[i];
6433 rnh->rnh_walktree(rnh, ipfw_table_killexp, nm);
6436 KASSERT(nm->expcnt == nm->cnt * (mycpuid + 1),
6437 ("not all expired addresses (%d) were deleted (%d)",
6438 nm->cnt * (mycpuid + 1), nm->expcnt));
6440 netisr_forwardmsg(&nm->base, mycpuid + 1);
6444 ipfw_table_markexp(struct radix_node *rn, void *xnm)
6446 struct netmsg_tblexp *nm = xnm;
6447 struct ipfw_tblent *te;
6450 te = (struct ipfw_tblent *)rn;
6451 lastuse = te->te_lastuse;
6453 while ((te = te->te_sibling) != NULL) {
6454 if (te->te_lastuse > lastuse)
6455 lastuse = te->te_lastuse;
6457 if (!TIME_LEQ(lastuse + nm->expire, time_second)) {
6462 te = (struct ipfw_tblent *)rn;
6464 while ((te = te->te_sibling) != NULL)
6472 ipfw_table_expire(struct sockopt *sopt)
6474 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6475 struct netmsg_tblexp nm;
6476 struct ipfw_ioc_tblexp *tbl;
6477 struct radix_node_head *rnh;
6481 if (sopt->sopt_valsize != sizeof(*tbl))
6483 tbl = sopt->sopt_val;
6488 nm.expire = tbl->expire;
6490 if (tbl->tableid < 0) {
6493 for (i = 0; i < ipfw_table_max; ++i) {
6494 rnh = ctx->ipfw_tables[i];
6497 rnh->rnh_walktree(rnh, ipfw_table_markexp, &nm);
6500 /* No addresses can be expired. */
6503 tbl->expcnt = nm.cnt;
6505 netmsg_init(&nm.base, NULL, &curthread->td_msgport,
6506 MSGF_PRIORITY, ipfw_table_expireall_dispatch);
6508 netisr_domsg_global(&nm.base);
6509 KASSERT(nm.expcnt == nm.cnt * netisr_ncpus,
6510 ("not all expired addresses (%d) were deleted (%d)",
6511 nm.cnt * netisr_ncpus, nm.expcnt));
6514 } else if (tbl->tableid >= ipfw_table_max) {
6518 rnh = ctx->ipfw_tables[tbl->tableid];
6521 rnh->rnh_walktree(rnh, ipfw_table_markexp, &nm);
6523 /* No addresses can be expired. */
6526 tbl->expcnt = nm.cnt;
6528 netmsg_init(&nm.base, NULL, &curthread->td_msgport, MSGF_PRIORITY,
6529 ipfw_table_expire_dispatch);
6530 nm.tableid = tbl->tableid;
6531 netisr_domsg_global(&nm.base);
6532 KASSERT(nm.expcnt == nm.cnt * netisr_ncpus,
6533 ("not all expired addresses (%d) were deleted (%d)",
6534 nm.cnt * netisr_ncpus, nm.expcnt));
6539 ipfw_crossref_free_dispatch(netmsg_t nmsg)
6541 struct ip_fw *rule = nmsg->lmsg.u.ms_resultp;
6543 KKASSERT((rule->rule_flags &
6544 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID)) ==
6545 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID));
6546 ipfw_free_rule(rule);
6548 netisr_replymsg(&nmsg->base, 0);
6552 ipfw_crossref_reap(void)
6554 struct ip_fw *rule, *prev = NULL;
6558 rule = ipfw_gd.ipfw_crossref_free;
6559 while (rule != NULL) {
6560 uint64_t inflight = 0;
6563 for (i = 0; i < netisr_ncpus; ++i)
6564 inflight += rule->cross_rules[i]->cross_refs;
6565 if (inflight == 0) {
6566 struct ip_fw *f = rule;
6575 ipfw_gd.ipfw_crossref_free = rule;
6580 for (i = 1; i < netisr_ncpus; ++i) {
6581 struct netmsg_base nm;
6583 netmsg_init(&nm, NULL, &curthread->td_msgport,
6584 MSGF_PRIORITY, ipfw_crossref_free_dispatch);
6585 nm.lmsg.u.ms_resultp = f->cross_rules[i];
6586 netisr_domsg(&nm, i);
6588 KKASSERT((f->rule_flags &
6589 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID)) ==
6590 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID));
6598 if (ipfw_gd.ipfw_crossref_free != NULL) {
6599 callout_reset(&ipfw_gd.ipfw_crossref_ch, hz,
6600 ipfw_crossref_timeo, NULL);
6605 * {set|get}sockopt parser.
6608 ipfw_ctl(struct sockopt *sopt)
6618 switch (sopt->sopt_name) {
6620 error = ipfw_ctl_get_rules(sopt);
6624 ipfw_flush(0 /* keep default rule */);
6628 error = ipfw_ctl_add_rule(sopt);
6633 * IP_FW_DEL is used for deleting single rules or sets,
6634 * and (ab)used to atomically manipulate sets.
6635 * Argument size is used to distinguish between the two:
6637 * delete single rule or set of rules,
6638 * or reassign rules (or sets) to a different set.
6639 * 2 * sizeof(uint32_t)
6640 * atomic disable/enable sets.
6641 * first uint32_t contains sets to be disabled,
6642 * second uint32_t contains sets to be enabled.
6644 masks = sopt->sopt_val;
6645 size = sopt->sopt_valsize;
6646 if (size == sizeof(*masks)) {
6648 * Delete or reassign static rule
6650 error = ipfw_ctl_alter(masks[0]);
6651 } else if (size == (2 * sizeof(*masks))) {
6653 * Set enable/disable
6655 ipfw_ctl_set_disable(masks[0], masks[1]);
6662 case IP_FW_RESETLOG: /* argument is an int, the rule number */
6665 if (sopt->sopt_val != 0) {
6666 error = soopt_to_kbuf(sopt, &rulenum,
6667 sizeof(int), sizeof(int));
6671 error = ipfw_ctl_zero_entry(rulenum,
6672 sopt->sopt_name == IP_FW_RESETLOG);
6675 case IP_FW_TBL_CREATE:
6676 error = ipfw_table_create(sopt);
6681 error = ipfw_table_alt(sopt);
6684 case IP_FW_TBL_FLUSH:
6685 case IP_FW_TBL_DESTROY:
6686 error = ipfw_table_flush(sopt);
6690 error = ipfw_table_get(sopt);
6693 case IP_FW_TBL_ZERO:
6694 error = ipfw_table_zero(sopt);
6697 case IP_FW_TBL_EXPIRE:
6698 error = ipfw_table_expire(sopt);
6702 kprintf("ipfw_ctl invalid option %d\n", sopt->sopt_name);
6706 ipfw_crossref_reap();
6711 ipfw_keepalive_done(struct ipfw_context *ctx)
6714 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE,
6715 ("keepalive is not in progress"));
6716 ctx->ipfw_flags &= ~IPFW_FLAG_KEEPALIVE;
6717 callout_reset(&ctx->ipfw_keepalive_ch, dyn_keepalive_period * hz,
6718 ipfw_keepalive, NULL);
6722 ipfw_keepalive_more(struct ipfw_context *ctx)
6724 struct netmsg_base *nm = &ctx->ipfw_keepalive_more;
6726 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE,
6727 ("keepalive is not in progress"));
6728 KASSERT(nm->lmsg.ms_flags & MSGF_DONE,
6729 ("keepalive more did not finish"));
6730 netisr_sendmsg_oncpu(nm);
6734 ipfw_keepalive_loop(struct ipfw_context *ctx, struct ipfw_state *anchor)
6736 struct ipfw_state *s;
6737 int scanned = 0, expired = 0, kept = 0;
6739 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE,
6740 ("keepalive is not in progress"));
6742 while ((s = TAILQ_NEXT(anchor, st_link)) != NULL) {
6743 uint32_t ack_rev, ack_fwd;
6744 struct ipfw_flow_id id;
6747 if (scanned++ >= ipfw_state_scan_max) {
6748 ipfw_keepalive_more(ctx);
6752 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link);
6753 TAILQ_INSERT_AFTER(&ctx->ipfw_state_list, s, anchor, st_link);
6757 * Don't use IPFW_STATE_SCANSKIP; need to perform keepalive
6760 if (s->st_type == O_ANCHOR)
6763 if (IPFW_STATE_ISDEAD(s)) {
6764 ipfw_state_remove(ctx, s);
6765 if (++expired >= ipfw_state_expire_max) {
6766 ipfw_keepalive_more(ctx);
6773 * Keep alive processing
6776 if (s->st_proto != IPPROTO_TCP)
6778 if ((s->st_state & IPFW_STATE_TCPSTATES) != BOTH_SYN)
6780 if (TIME_LEQ(time_uptime + dyn_keepalive_interval,
6782 continue; /* too early */
6784 ipfw_key_4tuple(&s->st_key, &id.src_ip, &id.src_port,
6785 &id.dst_ip, &id.dst_port);
6786 ack_rev = s->st_ack_rev;
6787 ack_fwd = s->st_ack_fwd;
6789 #define SEND_FWD 0x1
6790 #define SEND_REV 0x2
6792 if (IPFW_ISXLAT(s->st_type)) {
6793 const struct ipfw_xlat *x = (const struct ipfw_xlat *)s;
6795 if (x->xlat_dir == MATCH_FORWARD)
6796 send_dir = SEND_FWD;
6798 send_dir = SEND_REV;
6800 send_dir = SEND_FWD | SEND_REV;
6803 if (send_dir & SEND_REV)
6804 send_pkt(&id, ack_rev - 1, ack_fwd, TH_SYN);
6805 if (send_dir & SEND_FWD)
6806 send_pkt(&id, ack_fwd - 1, ack_rev, 0);
6811 if (++kept >= ipfw_keepalive_max) {
6812 ipfw_keepalive_more(ctx);
6816 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link);
6817 ipfw_keepalive_done(ctx);
6821 ipfw_keepalive_more_dispatch(netmsg_t nm)
6823 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6824 struct ipfw_state *anchor;
6826 ASSERT_NETISR_NCPUS(mycpuid);
6827 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE,
6828 ("keepalive is not in progress"));
6831 netisr_replymsg(&nm->base, 0);
6833 anchor = &ctx->ipfw_keepalive_anch;
6834 if (!dyn_keepalive || ctx->ipfw_state_cnt == 0) {
6835 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link);
6836 ipfw_keepalive_done(ctx);
6839 ipfw_keepalive_loop(ctx, anchor);
6843 * This procedure is only used to handle keepalives. It is invoked
6844 * every dyn_keepalive_period
6847 ipfw_keepalive_dispatch(netmsg_t nm)
6849 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6850 struct ipfw_state *anchor;
6852 ASSERT_NETISR_NCPUS(mycpuid);
6853 KASSERT((ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE) == 0,
6854 ("keepalive is in progress"));
6855 ctx->ipfw_flags |= IPFW_FLAG_KEEPALIVE;
6859 netisr_replymsg(&nm->base, 0);
6862 if (!dyn_keepalive || ctx->ipfw_state_cnt == 0) {
6863 ipfw_keepalive_done(ctx);
6867 anchor = &ctx->ipfw_keepalive_anch;
6868 TAILQ_INSERT_HEAD(&ctx->ipfw_state_list, anchor, st_link);
6869 ipfw_keepalive_loop(ctx, anchor);
6873 * This procedure is only used to handle keepalives. It is invoked
6874 * every dyn_keepalive_period
6877 ipfw_keepalive(void *dummy __unused)
6879 struct netmsg_base *msg;
6881 KKASSERT(mycpuid < netisr_ncpus);
6882 msg = &ipfw_ctx[mycpuid]->ipfw_keepalive_nm;
6885 if (msg->lmsg.ms_flags & MSGF_DONE)
6886 netisr_sendmsg_oncpu(msg);
6891 ipfw_ip_input_dispatch(netmsg_t nmsg)
6893 struct netmsg_genpkt *nm = (struct netmsg_genpkt *)nmsg;
6894 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6895 struct mbuf *m = nm->m;
6896 struct ip_fw *rule = nm->arg1;
6898 ASSERT_NETISR_NCPUS(mycpuid);
6899 KASSERT(rule->cpuid == mycpuid,
6900 ("rule does not belong to cpu%d", mycpuid));
6901 KASSERT(m->m_pkthdr.fw_flags & IPFW_MBUF_CONTINUE,
6902 ("mbuf does not have ipfw continue rule"));
6904 KASSERT(ctx->ipfw_cont_rule == NULL,
6905 ("pending ipfw continue rule"));
6906 ctx->ipfw_cont_rule = rule;
6909 /* May not be cleared, if ipfw was unload/disabled. */
6910 ctx->ipfw_cont_rule = NULL;
6913 * This rule is no longer used; decrement its cross_refs,
6914 * so this rule can be deleted.
6920 ipfw_defrag_redispatch(struct mbuf *m, int cpuid, struct ip_fw *rule)
6922 struct netmsg_genpkt *nm;
6924 KASSERT(cpuid != mycpuid, ("continue on the same cpu%d", cpuid));
6928 * Bump cross_refs to prevent this rule and its siblings
6929 * from being deleted, while this mbuf is inflight. The
6930 * cross_refs of the sibling rule on the target cpu will
6931 * be decremented, once this mbuf is going to be filtered
6932 * on the target cpu.
6935 m->m_pkthdr.fw_flags |= IPFW_MBUF_CONTINUE;
6937 nm = &m->m_hdr.mh_genmsg;
6938 netmsg_init(&nm->base, NULL, &netisr_apanic_rport, 0,
6939 ipfw_ip_input_dispatch);
6941 nm->arg1 = rule->cross_rules[cpuid];
6942 netisr_sendmsg(&nm->base, cpuid);
6946 ipfw_init_args(struct ip_fw_args *args, struct mbuf *m, struct ifnet *oif)
6953 if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED) {
6956 /* Extract info from dummynet tag */
6957 mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL);
6958 KKASSERT(mtag != NULL);
6959 args->rule = ((struct dn_pkt *)m_tag_data(mtag))->dn_priv;
6960 KKASSERT(args->rule != NULL);
6962 m_tag_delete(m, mtag);
6963 m->m_pkthdr.fw_flags &= ~DUMMYNET_MBUF_TAGGED;
6964 } else if (m->m_pkthdr.fw_flags & IPFW_MBUF_CONTINUE) {
6965 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
6967 KKASSERT(ctx->ipfw_cont_rule != NULL);
6968 args->rule = ctx->ipfw_cont_rule;
6969 ctx->ipfw_cont_rule = NULL;
6971 if (ctx->ipfw_cont_xlat != NULL) {
6972 args->xlat = ctx->ipfw_cont_xlat;
6973 ctx->ipfw_cont_xlat = NULL;
6974 if (m->m_pkthdr.fw_flags & IPFW_MBUF_XLATINS) {
6975 args->flags |= IP_FWARG_F_XLATINS;
6976 m->m_pkthdr.fw_flags &= ~IPFW_MBUF_XLATINS;
6978 if (m->m_pkthdr.fw_flags & IPFW_MBUF_XLATFWD) {
6979 args->flags |= IP_FWARG_F_XLATFWD;
6980 m->m_pkthdr.fw_flags &= ~IPFW_MBUF_XLATFWD;
6983 KKASSERT((m->m_pkthdr.fw_flags &
6984 (IPFW_MBUF_XLATINS | IPFW_MBUF_XLATFWD)) == 0);
6986 args->flags |= IP_FWARG_F_CONT;
6987 m->m_pkthdr.fw_flags &= ~IPFW_MBUF_CONTINUE;
6996 ipfw_check_in(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir)
6998 struct ip_fw_args args;
6999 struct mbuf *m = *m0;
7000 int tee = 0, error = 0, ret;
7002 ipfw_init_args(&args, m, NULL);
7004 ret = ipfw_chk(&args);
7007 if (ret != IP_FW_REDISPATCH)
7022 case IP_FW_DUMMYNET:
7023 /* Send packet to the appropriate pipe */
7024 m = ipfw_dummynet_io(m, args.cookie, DN_TO_IP_IN, &args);
7033 * Must clear bridge tag when changing
7035 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
7036 if (ip_divert_p != NULL) {
7037 m = ip_divert_p(m, tee, 1);
7041 /* not sure this is the right error msg */
7047 panic("unknown ipfw return value: %d", ret);
7055 ipfw_check_out(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir)
7057 struct ip_fw_args args;
7058 struct mbuf *m = *m0;
7059 int tee = 0, error = 0, ret;
7061 ipfw_init_args(&args, m, ifp);
7063 ret = ipfw_chk(&args);
7066 if (ret != IP_FW_REDISPATCH)
7081 case IP_FW_DUMMYNET:
7082 m = ipfw_dummynet_io(m, args.cookie, DN_TO_IP_OUT, &args);
7090 if (ip_divert_p != NULL) {
7091 m = ip_divert_p(m, tee, 0);
7095 /* not sure this is the right error msg */
7101 panic("unknown ipfw return value: %d", ret);
7111 struct pfil_head *pfh;
7115 pfh = pfil_head_get(PFIL_TYPE_AF, AF_INET);
7119 pfil_add_hook(ipfw_check_in, NULL, PFIL_IN, pfh);
7120 pfil_add_hook(ipfw_check_out, NULL, PFIL_OUT, pfh);
7126 struct pfil_head *pfh;
7130 pfh = pfil_head_get(PFIL_TYPE_AF, AF_INET);
7134 pfil_remove_hook(ipfw_check_in, NULL, PFIL_IN, pfh);
7135 pfil_remove_hook(ipfw_check_out, NULL, PFIL_OUT, pfh);
7139 ipfw_sysctl_dyncnt(SYSCTL_HANDLER_ARGS)
7143 dyn_cnt = ipfw_state_cntcoll();
7144 dyn_cnt += ipfw_gd.ipfw_trkcnt_cnt;
7146 return (sysctl_handle_int(oidp, &dyn_cnt, 0, req));
7150 ipfw_sysctl_statecnt(SYSCTL_HANDLER_ARGS)
7154 state_cnt = ipfw_state_cntcoll();
7155 return (sysctl_handle_int(oidp, &state_cnt, 0, req));
7159 ipfw_sysctl_statemax(SYSCTL_HANDLER_ARGS)
7161 int state_max, error;
7163 state_max = ipfw_state_max;
7164 error = sysctl_handle_int(oidp, &state_max, 0, req);
7165 if (error || req->newptr == NULL)
7171 ipfw_state_max_set(state_max);
7176 ipfw_sysctl_dynmax(SYSCTL_HANDLER_ARGS)
7180 dyn_max = ipfw_state_max + ipfw_track_max;
7182 error = sysctl_handle_int(oidp, &dyn_max, 0, req);
7183 if (error || req->newptr == NULL)
7189 ipfw_state_max_set(dyn_max / 2);
7190 ipfw_track_max = dyn_max / 2;
7195 ipfw_sysctl_enable_dispatch(netmsg_t nmsg)
7197 int enable = nmsg->lmsg.u.ms_result;
7201 if (fw_enable == enable)
7210 netisr_replymsg(&nmsg->base, 0);
7214 ipfw_sysctl_enable(SYSCTL_HANDLER_ARGS)
7216 struct netmsg_base nmsg;
7220 error = sysctl_handle_int(oidp, &enable, 0, req);
7221 if (error || req->newptr == NULL)
7224 netmsg_init(&nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY,
7225 ipfw_sysctl_enable_dispatch);
7226 nmsg.lmsg.u.ms_result = enable;
7228 return netisr_domsg(&nmsg, 0);
7232 ipfw_sysctl_autoinc_step(SYSCTL_HANDLER_ARGS)
7234 return sysctl_int_range(oidp, arg1, arg2, req,
7235 IPFW_AUTOINC_STEP_MIN, IPFW_AUTOINC_STEP_MAX);
7239 ipfw_sysctl_scancnt(SYSCTL_HANDLER_ARGS)
7242 return sysctl_int_range(oidp, arg1, arg2, req, 1, INT_MAX);
7246 ipfw_sysctl_stat(SYSCTL_HANDLER_ARGS)
7251 for (cpu = 0; cpu < netisr_ncpus; ++cpu)
7252 stat += *((u_long *)((uint8_t *)ipfw_ctx[cpu] + arg2));
7254 error = sysctl_handle_long(oidp, &stat, 0, req);
7255 if (error || req->newptr == NULL)
7258 /* Zero out this stat. */
7259 for (cpu = 0; cpu < netisr_ncpus; ++cpu)
7260 *((u_long *)((uint8_t *)ipfw_ctx[cpu] + arg2)) = 0;
7265 ipfw_ctx_init_dispatch(netmsg_t nmsg)
7267 struct netmsg_ipfw *fwmsg = (struct netmsg_ipfw *)nmsg;
7268 struct ipfw_context *ctx;
7269 struct ip_fw *def_rule;
7271 ASSERT_NETISR_NCPUS(mycpuid);
7273 ctx = kmalloc(__offsetof(struct ipfw_context,
7274 ipfw_tables[ipfw_table_max]), M_IPFW, M_WAITOK | M_ZERO);
7276 RB_INIT(&ctx->ipfw_state_tree);
7277 TAILQ_INIT(&ctx->ipfw_state_list);
7279 RB_INIT(&ctx->ipfw_track_tree);
7280 TAILQ_INIT(&ctx->ipfw_track_list);
7282 callout_init_mp(&ctx->ipfw_stateto_ch);
7283 netmsg_init(&ctx->ipfw_stateexp_nm, NULL, &netisr_adone_rport,
7284 MSGF_DROPABLE | MSGF_PRIORITY, ipfw_state_expire_dispatch);
7285 ctx->ipfw_stateexp_anch.st_type = O_ANCHOR;
7286 netmsg_init(&ctx->ipfw_stateexp_more, NULL, &netisr_adone_rport,
7287 MSGF_DROPABLE, ipfw_state_expire_more_dispatch);
7289 callout_init_mp(&ctx->ipfw_trackto_ch);
7290 netmsg_init(&ctx->ipfw_trackexp_nm, NULL, &netisr_adone_rport,
7291 MSGF_DROPABLE | MSGF_PRIORITY, ipfw_track_expire_dispatch);
7292 netmsg_init(&ctx->ipfw_trackexp_more, NULL, &netisr_adone_rport,
7293 MSGF_DROPABLE, ipfw_track_expire_more_dispatch);
7295 callout_init_mp(&ctx->ipfw_keepalive_ch);
7296 netmsg_init(&ctx->ipfw_keepalive_nm, NULL, &netisr_adone_rport,
7297 MSGF_DROPABLE | MSGF_PRIORITY, ipfw_keepalive_dispatch);
7298 ctx->ipfw_keepalive_anch.st_type = O_ANCHOR;
7299 netmsg_init(&ctx->ipfw_keepalive_more, NULL, &netisr_adone_rport,
7300 MSGF_DROPABLE, ipfw_keepalive_more_dispatch);
7302 callout_init_mp(&ctx->ipfw_xlatreap_ch);
7303 netmsg_init(&ctx->ipfw_xlatreap_nm, NULL, &netisr_adone_rport,
7304 MSGF_DROPABLE | MSGF_PRIORITY, ipfw_xlat_reap_dispatch);
7305 TAILQ_INIT(&ctx->ipfw_xlatreap);
7307 ipfw_ctx[mycpuid] = ctx;
7309 def_rule = kmalloc(sizeof(*def_rule), M_IPFW, M_WAITOK | M_ZERO);
7311 def_rule->act_ofs = 0;
7312 def_rule->rulenum = IPFW_DEFAULT_RULE;
7313 def_rule->cmd_len = 1;
7314 def_rule->set = IPFW_DEFAULT_SET;
7316 def_rule->cmd[0].len = 1;
7317 #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT
7318 def_rule->cmd[0].opcode = O_ACCEPT;
7320 if (filters_default_to_accept)
7321 def_rule->cmd[0].opcode = O_ACCEPT;
7323 def_rule->cmd[0].opcode = O_DENY;
7326 def_rule->refcnt = 1;
7327 def_rule->cpuid = mycpuid;
7329 /* Install the default rule */
7330 ctx->ipfw_default_rule = def_rule;
7331 ctx->ipfw_layer3_chain = def_rule;
7333 /* Link rule CPU sibling */
7334 ipfw_link_sibling(fwmsg, def_rule);
7336 /* Statistics only need to be updated once */
7338 ipfw_inc_static_count(def_rule);
7340 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
7344 ipfw_crossref_reap_dispatch(netmsg_t nmsg)
7349 netisr_replymsg(&nmsg->base, 0);
7351 ipfw_crossref_reap();
7355 ipfw_crossref_timeo(void *dummy __unused)
7357 struct netmsg_base *msg = &ipfw_gd.ipfw_crossref_nm;
7359 KKASSERT(mycpuid == 0);
7362 if (msg->lmsg.ms_flags & MSGF_DONE)
7363 netisr_sendmsg_oncpu(msg);
7368 ipfw_ifaddr_dispatch(netmsg_t nmsg)
7370 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
7371 struct ifnet *ifp = nmsg->lmsg.u.ms_resultp;
7374 ASSERT_NETISR_NCPUS(mycpuid);
7376 for (f = ctx->ipfw_layer3_chain; f != NULL; f = f->next) {
7380 if ((f->rule_flags & IPFW_RULE_F_DYNIFADDR) == 0)
7383 for (l = f->cmd_len, cmd = f->cmd; l > 0;
7384 l -= cmdlen, cmd += cmdlen) {
7385 cmdlen = F_LEN(cmd);
7386 if (cmd->opcode == O_IP_SRC_IFIP ||
7387 cmd->opcode == O_IP_DST_IFIP) {
7388 if (strncmp(ifp->if_xname,
7389 ((ipfw_insn_ifip *)cmd)->ifname,
7391 cmd->arg1 &= ~IPFW_IFIP_VALID;
7395 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
7399 ipfw_ifaddr(void *arg __unused, struct ifnet *ifp,
7400 enum ifaddr_event event __unused, struct ifaddr *ifa __unused)
7402 struct netmsg_base nm;
7404 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY,
7405 ipfw_ifaddr_dispatch);
7406 nm.lmsg.u.ms_resultp = ifp;
7407 netisr_domsg_global(&nm);
7411 ipfw_init_dispatch(netmsg_t nmsg)
7413 struct netmsg_ipfw fwmsg;
7419 kprintf("IP firewall already loaded\n");
7424 if (ipfw_table_max > UINT16_MAX || ipfw_table_max <= 0)
7425 ipfw_table_max = UINT16_MAX;
7427 /* Initialize global track tree. */
7428 RB_INIT(&ipfw_gd.ipfw_trkcnt_tree);
7429 IPFW_TRKCNT_TOKINIT;
7431 /* GC for freed crossref rules. */
7432 callout_init_mp(&ipfw_gd.ipfw_crossref_ch);
7433 netmsg_init(&ipfw_gd.ipfw_crossref_nm, NULL, &netisr_adone_rport,
7434 MSGF_PRIORITY | MSGF_DROPABLE, ipfw_crossref_reap_dispatch);
7436 ipfw_state_max_set(ipfw_state_max);
7437 ipfw_state_headroom = 8 * netisr_ncpus;
7439 bzero(&fwmsg, sizeof(fwmsg));
7440 netmsg_init(&fwmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY,
7441 ipfw_ctx_init_dispatch);
7442 netisr_domsg_global(&fwmsg.base);
7444 ip_fw_chk_ptr = ipfw_chk;
7445 ip_fw_ctl_ptr = ipfw_ctl;
7446 ip_fw_dn_io_ptr = ipfw_dummynet_io;
7448 kprintf("ipfw2 initialized, default to %s, logging ",
7449 ipfw_ctx[mycpuid]->ipfw_default_rule->cmd[0].opcode ==
7450 O_ACCEPT ? "accept" : "deny");
7452 #ifdef IPFIREWALL_VERBOSE
7455 #ifdef IPFIREWALL_VERBOSE_LIMIT
7456 verbose_limit = IPFIREWALL_VERBOSE_LIMIT;
7458 if (fw_verbose == 0) {
7459 kprintf("disabled\n");
7460 } else if (verbose_limit == 0) {
7461 kprintf("unlimited\n");
7463 kprintf("limited to %d packets/entry by default\n",
7468 for (cpu = 0; cpu < netisr_ncpus; ++cpu) {
7469 callout_reset_bycpu(&ipfw_ctx[cpu]->ipfw_stateto_ch, hz,
7470 ipfw_state_expire_ipifunc, NULL, cpu);
7471 callout_reset_bycpu(&ipfw_ctx[cpu]->ipfw_trackto_ch, hz,
7472 ipfw_track_expire_ipifunc, NULL, cpu);
7473 callout_reset_bycpu(&ipfw_ctx[cpu]->ipfw_keepalive_ch, hz,
7474 ipfw_keepalive, NULL, cpu);
7480 ipfw_ifaddr_event = EVENTHANDLER_REGISTER(ifaddr_event, ipfw_ifaddr,
7481 NULL, EVENTHANDLER_PRI_ANY);
7482 if (ipfw_ifaddr_event == NULL)
7483 kprintf("ipfw: ifaddr_event register failed\n");
7486 netisr_replymsg(&nmsg->base, error);
7492 struct netmsg_base smsg;
7494 netmsg_init(&smsg, NULL, &curthread->td_msgport, MSGF_PRIORITY,
7495 ipfw_init_dispatch);
7496 return netisr_domsg(&smsg, 0);
7502 ipfw_ctx_fini_dispatch(netmsg_t nmsg)
7504 struct ipfw_context *ctx = ipfw_ctx[mycpuid];
7506 ASSERT_NETISR_NCPUS(mycpuid);
7508 callout_cancel(&ctx->ipfw_stateto_ch);
7509 callout_cancel(&ctx->ipfw_trackto_ch);
7510 callout_cancel(&ctx->ipfw_keepalive_ch);
7511 callout_cancel(&ctx->ipfw_xlatreap_ch);
7514 netisr_dropmsg(&ctx->ipfw_stateexp_more);
7515 netisr_dropmsg(&ctx->ipfw_stateexp_nm);
7516 netisr_dropmsg(&ctx->ipfw_trackexp_more);
7517 netisr_dropmsg(&ctx->ipfw_trackexp_nm);
7518 netisr_dropmsg(&ctx->ipfw_keepalive_more);
7519 netisr_dropmsg(&ctx->ipfw_keepalive_nm);
7520 netisr_dropmsg(&ctx->ipfw_xlatreap_nm);
7523 ipfw_table_flushall_oncpu(ctx, 1);
7525 netisr_forwardmsg(&nmsg->base, mycpuid + 1);
7529 ipfw_fini_dispatch(netmsg_t nmsg)
7531 struct netmsg_base nm;
7536 ipfw_crossref_reap();
7538 if (ipfw_gd.ipfw_refcnt != 0) {
7546 /* Synchronize any inflight state/track expire IPIs. */
7547 lwkt_synchronize_ipiqs("ipfwfini");
7549 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY,
7550 ipfw_ctx_fini_dispatch);
7551 netisr_domsg_global(&nm);
7553 callout_cancel(&ipfw_gd.ipfw_crossref_ch);
7555 netisr_dropmsg(&ipfw_gd.ipfw_crossref_nm);
7558 if (ipfw_ifaddr_event != NULL)
7559 EVENTHANDLER_DEREGISTER(ifaddr_event, ipfw_ifaddr_event);
7561 ip_fw_chk_ptr = NULL;
7562 ip_fw_ctl_ptr = NULL;
7563 ip_fw_dn_io_ptr = NULL;
7564 ipfw_flush(1 /* kill default rule */);
7566 /* Free pre-cpu context */
7567 for (cpu = 0; cpu < netisr_ncpus; ++cpu)
7568 kfree(ipfw_ctx[cpu], M_IPFW);
7570 kprintf("IP firewall unloaded\n");
7572 netisr_replymsg(&nmsg->base, error);
7576 ipfw_fflush_dispatch(netmsg_t nmsg)
7579 ipfw_flush(0 /* keep default rule */);
7580 ipfw_crossref_reap();
7581 netisr_replymsg(&nmsg->base, 0);
7587 struct netmsg_base smsg;
7591 netmsg_init(&smsg, NULL, &curthread->td_msgport, MSGF_PRIORITY,
7592 ipfw_fflush_dispatch);
7593 netisr_domsg(&smsg, 0);
7595 if (ipfw_gd.ipfw_refcnt == 0)
7597 kprintf("ipfw: flush pending %d\n", ++i);
7598 tsleep(&smsg, 0, "ipfwff", (3 * hz) / 2);
7601 netmsg_init(&smsg, NULL, &curthread->td_msgport, MSGF_PRIORITY,
7602 ipfw_fini_dispatch);
7603 return netisr_domsg(&smsg, 0);
7606 #endif /* KLD_MODULE */
7609 ipfw_modevent(module_t mod, int type, void *unused)
7620 kprintf("ipfw statically compiled, cannot unload\n");
7632 static moduledata_t ipfwmod = {
7637 DECLARE_MODULE(ipfw, ipfwmod, SI_SUB_PROTO_END, SI_ORDER_ANY);
7638 MODULE_VERSION(ipfw, 1);