| Commit | Line | Data |
|---|---|---|
| 984263bc MD |
1 | /* |
| 2 | * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa | |
| 3 | * | |
| 4 | * Redistribution and use in source and binary forms, with or without | |
| 5 | * modification, are permitted provided that the following conditions | |
| 6 | * are met: | |
| 7 | * 1. Redistributions of source code must retain the above copyright | |
| 8 | * notice, this list of conditions and the following disclaimer. | |
| 9 | * 2. Redistributions in binary form must reproduce the above copyright | |
| 10 | * notice, this list of conditions and the following disclaimer in the | |
| 11 | * documentation and/or other materials provided with the distribution. | |
| 12 | * | |
| 13 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | |
| 14 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 16 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | |
| 17 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 18 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
| 19 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 20 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
| 21 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
| 22 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 23 | * SUCH DAMAGE. | |
| 24 | * | |
| 25 | * $FreeBSD: src/sys/netinet/ip_fw2.c,v 1.6.2.12 2003/04/08 10:42:32 maxim Exp $ | |
| b95665d8 | 26 | * $DragonFly: src/sys/net/ipfw/ip_fw2.c,v 1.100 2008/11/22 11:03:35 sephe Exp $ |
| 984263bc MD |
27 | */ |
| 28 | ||
| 984263bc MD |
29 | /* |
| 30 | * Implement IP packet firewall (new version) | |
| 31 | */ | |
| 32 | ||
| 984263bc | 33 | #include "opt_ipfw.h" |
| 984263bc MD |
34 | #include "opt_inet.h" |
| 35 | #ifndef INET | |
| 36 | #error IPFIREWALL requires INET. | |
| 37 | #endif /* INET */ | |
| 984263bc | 38 | |
| 984263bc MD |
39 | #include <sys/param.h> |
| 40 | #include <sys/systm.h> | |
| 41 | #include <sys/malloc.h> | |
| 42 | #include <sys/mbuf.h> | |
| 43 | #include <sys/kernel.h> | |
| 44 | #include <sys/proc.h> | |
| 45 | #include <sys/socket.h> | |
| 46 | #include <sys/socketvar.h> | |
| 47 | #include <sys/sysctl.h> | |
| 48 | #include <sys/syslog.h> | |
| 5e33db2e | 49 | #include <sys/thread2.h> |
| 984263bc | 50 | #include <sys/ucred.h> |
| 3f9db7f8 | 51 | #include <sys/in_cksum.h> |
| 997a0e9a | 52 | #include <sys/lock.h> |
| 0049e46a | 53 | |
| 984263bc MD |
54 | #include <net/if.h> |
| 55 | #include <net/route.h> | |
| 0049e46a | 56 | #include <net/netmsg2.h> |
| 8c6081b9 | 57 | #include <net/pfil.h> |
| b6d66a39 | 58 | #include <net/dummynet/ip_dummynet.h> |
| 0049e46a | 59 | |
| 984263bc MD |
60 | #include <netinet/in.h> |
| 61 | #include <netinet/in_systm.h> | |
| 62 | #include <netinet/in_var.h> | |
| 63 | #include <netinet/in_pcb.h> | |
| 64 | #include <netinet/ip.h> | |
| 65 | #include <netinet/ip_var.h> | |
| 66 | #include <netinet/ip_icmp.h> | |
| 984263bc MD |
67 | #include <netinet/tcp.h> |
| 68 | #include <netinet/tcp_timer.h> | |
| 69 | #include <netinet/tcp_var.h> | |
| 70 | #include <netinet/tcpip.h> | |
| 71 | #include <netinet/udp.h> | |
| 72 | #include <netinet/udp_var.h> | |
| 68edaf54 | 73 | #include <netinet/ip_divert.h> |
| 984263bc MD |
74 | #include <netinet/if_ether.h> /* XXX for ETHERTYPE_IP */ |
| 75 | ||
| b6d66a39 SZ |
76 | #include <net/ipfw/ip_fw2.h> |
| 77 | ||
| 8d0865c8 SZ |
78 | #ifdef IPFIREWALL_DEBUG |
| 79 | #define DPRINTF(fmt, ...) \ | |
| 80 | do { \ | |
| 81 | if (fw_debug > 0) \ | |
| 82 | kprintf(fmt, __VA_ARGS__); \ | |
| 83 | } while (0) | |
| 84 | #else | |
| 85 | #define DPRINTF(fmt, ...) ((void)0) | |
| 86 | #endif | |
| 87 | ||
| ca12e259 SZ |
88 | /* |
| 89 | * Description about per-CPU rule duplication: | |
| 90 | * | |
| 91 | * Module loading/unloading and all ioctl operations are serialized | |
| 92 | * by netisr0, so we don't have any ordering or locking problems. | |
| 93 | * | |
| 94 | * Following graph shows how operation on per-CPU rule list is | |
| 95 | * performed [2 CPU case]: | |
| 96 | * | |
| 97 | * CPU0 CPU1 | |
| 98 | * | |
| 99 | * netisr0 <------------------------------------+ | |
| 100 | * domsg | | |
| 101 | * | | | |
| 102 | * | netmsg | | |
| 103 | * | | | |
| 104 | * V | | |
| 105 | * ifnet0 | | |
| 106 | * : | netmsg | |
| 107 | * :(delete/add...) | | |
| 108 | * : | | |
| 109 | * : netmsg | | |
| 110 | * forwardmsg---------->ifnet1 | | |
| 111 | * : | | |
| 112 | * :(delete/add...) | | |
| 113 | * : | | |
| 114 | * : | | |
| 115 | * replymsg--------------+ | |
| 116 | * | |
| 117 | * | |
| 118 | * | |
| 119 | * | |
| 120 | * Rules which will not create states (dyn rules) [2 CPU case] | |
| 121 | * | |
| 122 | * CPU0 CPU1 | |
| 123 | * layer3_chain layer3_chain | |
| 124 | * | | | |
| 125 | * V V | |
| 126 | * +-------+ sibling +-------+ sibling | |
| 127 | * | rule1 |--------->| rule1 |--------->NULL | |
| 128 | * +-------+ +-------+ | |
| 129 | * | | | |
| 130 | * |next |next | |
| 131 | * V V | |
| 132 | * +-------+ sibling +-------+ sibling | |
| 133 | * | rule2 |--------->| rule2 |--------->NULL | |
| 134 | * +-------+ +-------+ | |
| 135 | * | |
| 136 | * ip_fw.sibling: | |
| 137 | * 1) Ease statistics calculation during IP_FW_GET. We only need to | |
| 138 | * iterate layer3_chain on CPU0; the current rule's duplication on | |
| 139 | * the other CPUs could safely be read-only accessed by using | |
| 140 | * ip_fw.sibling | |
| 141 | * 2) Accelerate rule insertion and deletion, e.g. rule insertion: | |
| 142 | * a) In netisr0 (on CPU0) rule3 is determined to be inserted between | |
| 143 | * rule1 and rule2. To make this decision we need to iterate the | |
| 144 | * layer3_chain on CPU0. The netmsg, which is used to insert the | |
| 145 | * rule, will contain rule1 on CPU0 as prev_rule and rule2 on CPU0 | |
| 146 | * as next_rule | |
| 147 | * b) After the insertion on CPU0 is done, we will move on to CPU1. | |
| 148 | * But instead of relocating the rule3's position on CPU1 by | |
| 149 | * iterating the layer3_chain on CPU1, we set the netmsg's prev_rule | |
| 150 | * to rule1->sibling and next_rule to rule2->sibling before the | |
| 151 | * netmsg is forwarded to CPU1 from CPU0 | |
| 152 | * | |
| 153 | * | |
| 154 | * | |
| 155 | * Rules which will create states (dyn rules) [2 CPU case] | |
| 156 | * (unnecessary parts are omitted; they are same as in the previous figure) | |
| 157 | * | |
| 158 | * CPU0 CPU1 | |
| 159 | * | |
| 160 | * +-------+ +-------+ | |
| 161 | * | rule1 | | rule1 | | |
| 162 | * +-------+ +-------+ | |
| 163 | * ^ | | ^ | |
| 164 | * | |stub stub| | | |
| 165 | * | | | | | |
| 166 | * | +----+ +----+ | | |
| 167 | * | | | | | |
| 168 | * | V V | | |
| 169 | * | +--------------------+ | | |
| 170 | * | | rule_stub | | | |
| 171 | * | | (read-only shared) | | | |
| 172 | * | | | | | |
| 173 | * | | back pointer array | | | |
| 174 | * | | (indexed by cpuid) | | | |
| 175 | * | | | | | |
| 176 | * +----|---------[0] | | | |
| 177 | * | [1]--------|----+ | |
| 178 | * | | | |
| 179 | * +--------------------+ | |
| 180 | * ^ ^ | |
| 181 | * | | | |
| 182 | * ........|............|............ | |
| 183 | * : | | : | |
| 184 | * : |stub |stub : | |
| 185 | * : | | : | |
| 186 | * : +---------+ +---------+ : | |
| 187 | * : | state1a | | state1b | .... : | |
| 188 | * : +---------+ +---------+ : | |
| 189 | * : : | |
| 190 | * : states table : | |
| 191 | * : (shared) : | |
| 192 | * : (protected by dyn_lock) : | |
| 193 | * .................................. | |
| 194 | * | |
| 195 | * [state1a and state1b are states created by rule1] | |
| 196 | * | |
| 197 | * ip_fw_stub: | |
| 198 | * This structure is introduced so that shared (locked) state table could | |
| 199 | * work with per-CPU (duplicated) static rules. It mainly bridges states | |
| 200 | * and static rules and serves as static rule's place holder (a read-only | |
| 201 | * shared part of duplicated rules) from states point of view. | |
| 202 | * | |
| 203 | * IPFW_RULE_F_STATE (only for rules which create states): | |
| 204 | * o During rule installation, this flag is turned on after rule's | |
| 205 | * duplications reach all CPUs, to avoid at least following race: | |
| 206 | * 1) rule1 is duplicated on CPU0 and is not duplicated on CPU1 yet | |
| 207 | * 2) rule1 creates state1 | |
| 208 | * 3) state1 is located on CPU1 by check-state | |
| 209 | * But rule1 is not duplicated on CPU1 yet | |
| 210 | * o During rule deletion, this flag is turned off before deleting states | |
| 211 | * created by the rule and before deleting the rule itself, so no | |
| 212 | * more states will be created by the to-be-deleted rule even when its | |
| 213 | * duplication on certain CPUs are not eliminated yet. | |
| 214 | */ | |
| 215 | ||
| 9fabc2ac SZ |
216 | #define IPFW_AUTOINC_STEP_MIN 1 |
| 217 | #define IPFW_AUTOINC_STEP_MAX 1000 | |
| 218 | #define IPFW_AUTOINC_STEP_DEF 100 | |
| 219 | ||
| ca12e259 SZ |
220 | #define IPFW_DEFAULT_RULE 65535 /* rulenum for the default rule */ |
| 221 | #define IPFW_DEFAULT_SET 31 /* set number for the default rule */ | |
| 984263bc | 222 | |
| ca12e259 SZ |
223 | struct netmsg_ipfw { |
| 224 | struct netmsg nmsg; | |
| 225 | const struct ipfw_ioc_rule *ioc_rule; | |
| 226 | struct ip_fw *next_rule; | |
| 227 | struct ip_fw *prev_rule; | |
| 228 | struct ip_fw *sibling; | |
| 229 | struct ip_fw_stub *stub; | |
| 230 | }; | |
| 231 | ||
| 232 | struct netmsg_del { | |
| 233 | struct netmsg nmsg; | |
| 234 | struct ip_fw *start_rule; | |
| 235 | struct ip_fw *prev_rule; | |
| 236 | uint16_t rulenum; | |
| 237 | uint8_t from_set; | |
| 238 | uint8_t to_set; | |
| 239 | }; | |
| 240 | ||
| 241 | struct netmsg_zent { | |
| 242 | struct netmsg nmsg; | |
| 243 | struct ip_fw *start_rule; | |
| 244 | uint16_t rulenum; | |
| 245 | uint16_t log_only; | |
| 246 | }; | |
| 247 | ||
| 248 | struct ipfw_context { | |
| 249 | struct ip_fw *ipfw_layer3_chain; /* list of rules for layer3 */ | |
| 250 | struct ip_fw *ipfw_default_rule; /* default rule */ | |
| 251 | uint64_t ipfw_norule_counter; /* counter for ipfw_log(NULL) */ | |
| 252 | ||
| 253 | /* | |
| 254 | * ipfw_set_disable contains one bit per set value (0..31). | |
| 255 | * If the bit is set, all rules with the corresponding set | |
| 256 | * are disabled. Set IPDW_DEFAULT_SET is reserved for the | |
| 257 | * default rule and CANNOT be disabled. | |
| 258 | */ | |
| 259 | uint32_t ipfw_set_disable; | |
| 260 | uint32_t ipfw_gen; /* generation of rule list */ | |
| 261 | }; | |
| 262 | ||
| 263 | static struct ipfw_context *ipfw_ctx[MAXCPU]; | |
| 984263bc | 264 | |
| 84a3e25a | 265 | #ifdef KLD_MODULE |
| ca12e259 SZ |
266 | /* |
| 267 | * Module can not be unloaded, if there are references to | |
| 268 | * certains rules of ipfw(4), e.g. dummynet(4) | |
| 269 | */ | |
| 84a3e25a SZ |
270 | static int ipfw_refcnt; |
| 271 | #endif | |
| 272 | ||
| ca12e259 | 273 | MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's"); |
| 984263bc MD |
274 | |
| 275 | /* | |
| ca12e259 SZ |
276 | * Following two global variables are accessed and |
| 277 | * updated only on CPU0 | |
| 984263bc | 278 | */ |
| ca12e259 SZ |
279 | static uint32_t static_count; /* # of static rules */ |
| 280 | static uint32_t static_ioc_len; /* bytes of static rules */ | |
| 984263bc | 281 | |
| ca12e259 SZ |
282 | /* |
| 283 | * If 1, then ipfw static rules are being flushed, | |
| 284 | * ipfw_chk() will skip to the default rule. | |
| 285 | */ | |
| 286 | static int ipfw_flushing; | |
| 287 | ||
| 288 | static int fw_verbose; | |
| 289 | static int verbose_limit; | |
| 984263bc | 290 | |
| 8d0865c8 | 291 | static int fw_debug; |
| 9fabc2ac | 292 | static int autoinc_step = IPFW_AUTOINC_STEP_DEF; |
| 984263bc | 293 | |
| 2803ec4a | 294 | static int ipfw_sysctl_enable(SYSCTL_HANDLER_ARGS); |
| c1aa76bb | 295 | static int ipfw_sysctl_autoinc_step(SYSCTL_HANDLER_ARGS); |
| 997a0e9a SZ |
296 | static int ipfw_sysctl_dyn_buckets(SYSCTL_HANDLER_ARGS); |
| 297 | static int ipfw_sysctl_dyn_fin(SYSCTL_HANDLER_ARGS); | |
| 298 | static int ipfw_sysctl_dyn_rst(SYSCTL_HANDLER_ARGS); | |
| c1aa76bb | 299 | |
| 984263bc | 300 | SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); |
| 2803ec4a SZ |
301 | SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW, |
| 302 | &fw_enable, 0, ipfw_sysctl_enable, "I", "Enable ipfw"); | |
| c1aa76bb SZ |
303 | SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, autoinc_step, CTLTYPE_INT | CTLFLAG_RW, |
| 304 | &autoinc_step, 0, ipfw_sysctl_autoinc_step, "I", | |
| 305 | "Rule number autincrement step"); | |
| 984263bc MD |
306 | SYSCTL_INT(_net_inet_ip_fw, OID_AUTO,one_pass,CTLFLAG_RW, |
| 307 | &fw_one_pass, 0, | |
| 308 | "Only do a single pass through ipfw when using dummynet(4)"); | |
| 309 | SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, debug, CTLFLAG_RW, | |
| 310 | &fw_debug, 0, "Enable printing of debug ip_fw statements"); | |
| 311 | SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose, CTLFLAG_RW, | |
| 312 | &fw_verbose, 0, "Log matches to ipfw rules"); | |
| 313 | SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_RW, | |
| 314 | &verbose_limit, 0, "Set upper limit of matches of ipfw rules logged"); | |
| 315 | ||
| 316 | /* | |
| 317 | * Description of dynamic rules. | |
| 318 | * | |
| 319 | * Dynamic rules are stored in lists accessed through a hash table | |
| 320 | * (ipfw_dyn_v) whose size is curr_dyn_buckets. This value can | |
| 321 | * be modified through the sysctl variable dyn_buckets which is | |
| 322 | * updated when the table becomes empty. | |
| 323 | * | |
| 324 | * XXX currently there is only one list, ipfw_dyn. | |
| 325 | * | |
| 326 | * When a packet is received, its address fields are first masked | |
| 327 | * with the mask defined for the rule, then hashed, then matched | |
| 328 | * against the entries in the corresponding list. | |
| 329 | * Dynamic rules can be used for different purposes: | |
| 330 | * + stateful rules; | |
| 331 | * + enforcing limits on the number of sessions; | |
| 332 | * + in-kernel NAT (not implemented yet) | |
| 333 | * | |
| 334 | * The lifetime of dynamic rules is regulated by dyn_*_lifetime, | |
| 335 | * measured in seconds and depending on the flags. | |
| 336 | * | |
| 337 | * The total number of dynamic rules is stored in dyn_count. | |
| 338 | * The max number of dynamic rules is dyn_max. When we reach | |
| 339 | * the maximum number of rules we do not create anymore. This is | |
| 340 | * done to avoid consuming too much memory, but also too much | |
| 341 | * time when searching on each packet (ideally, we should try instead | |
| 342 | * to put a limit on the length of the list on each bucket...). | |
| 343 | * | |
| 344 | * Each dynamic rule holds a pointer to the parent ipfw rule so | |
| 345 | * we know what action to perform. Dynamic rules are removed when | |
| 346 | * the parent rule is deleted. XXX we should make them survive. | |
| 347 | * | |
| 348 | * There are some limitations with dynamic rules -- we do not | |
| 349 | * obey the 'randomized match', and we do not do multiple | |
| 350 | * passes through the firewall. XXX check the latter!!! | |
| 997a0e9a SZ |
351 | * |
| 352 | * NOTE about the SHARED LOCKMGR LOCK during dynamic rule looking up: | |
| 353 | * Only TCP state transition will change dynamic rule's state and ack | |
| 354 | * sequences, while all packets of one TCP connection only goes through | |
| 355 | * one TCP thread, so it is safe to use shared lockmgr lock during dynamic | |
| 356 | * rule looking up. The keep alive callout uses exclusive lockmgr lock | |
| 357 | * when it tries to find suitable dynamic rules to send keep alive, so | |
| 358 | * it will not see half updated state and ack sequences. Though the expire | |
| 359 | * field updating looks racy for other protocols, the resolution (second) | |
| 360 | * of expire field makes this kind of race harmless. | |
| 361 | * XXX statistics' updating is _not_ MPsafe!!! | |
| 362 | * XXX once UDP output path is fixed, we could use lockless dynamic rule | |
| 363 | * hash table | |
| 984263bc MD |
364 | */ |
| 365 | static ipfw_dyn_rule **ipfw_dyn_v = NULL; | |
| a998c492 SZ |
366 | static uint32_t dyn_buckets = 256; /* must be power of 2 */ |
| 367 | static uint32_t curr_dyn_buckets = 256; /* must be power of 2 */ | |
| c31665e4 | 368 | static uint32_t dyn_buckets_gen; /* generation of dyn buckets array */ |
| 997a0e9a | 369 | static struct lock dyn_lock; /* dynamic rules' hash table lock */ |
| ac5988d6 SZ |
370 | |
| 371 | static struct netmsg ipfw_timeout_netmsg; /* schedule ipfw timeout */ | |
| ca12e259 | 372 | static struct callout ipfw_timeout_h; |
| 984263bc MD |
373 | |
| 374 | /* | |
| 375 | * Timeouts for various events in handing dynamic rules. | |
| 376 | */ | |
| a998c492 SZ |
377 | static uint32_t dyn_ack_lifetime = 300; |
| 378 | static uint32_t dyn_syn_lifetime = 20; | |
| 379 | static uint32_t dyn_fin_lifetime = 1; | |
| 380 | static uint32_t dyn_rst_lifetime = 1; | |
| 381 | static uint32_t dyn_udp_lifetime = 10; | |
| 382 | static uint32_t dyn_short_lifetime = 5; | |
| 984263bc MD |
383 | |
| 384 | /* | |
| 385 | * Keepalives are sent if dyn_keepalive is set. They are sent every | |
| 386 | * dyn_keepalive_period seconds, in the last dyn_keepalive_interval | |
| 387 | * seconds of lifetime of a rule. | |
| 388 | * dyn_rst_lifetime and dyn_fin_lifetime should be strictly lower | |
| 389 | * than dyn_keepalive_period. | |
| 390 | */ | |
| 391 | ||
| a998c492 SZ |
392 | static uint32_t dyn_keepalive_interval = 20; |
| 393 | static uint32_t dyn_keepalive_period = 5; | |
| 394 | static uint32_t dyn_keepalive = 1; /* do send keepalives */ | |
| 984263bc | 395 | |
| a998c492 | 396 | static uint32_t dyn_count; /* # of dynamic rules */ |
| 997a0e9a | 397 | static uint32_t dyn_max = 4096; /* max # of dynamic rules */ |
| 984263bc | 398 | |
| 997a0e9a SZ |
399 | SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_buckets, CTLTYPE_INT | CTLFLAG_RW, |
| 400 | &dyn_buckets, 0, ipfw_sysctl_dyn_buckets, "I", "Number of dyn. buckets"); | |
| 984263bc MD |
401 | SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets, CTLFLAG_RD, |
| 402 | &curr_dyn_buckets, 0, "Current Number of dyn. buckets"); | |
| 403 | SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_count, CTLFLAG_RD, | |
| 404 | &dyn_count, 0, "Number of dyn. rules"); | |
| 405 | SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_max, CTLFLAG_RW, | |
| 406 | &dyn_max, 0, "Max number of dyn. rules"); | |
| 407 | SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count, CTLFLAG_RD, | |
| 408 | &static_count, 0, "Number of static rules"); | |
| 409 | SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, CTLFLAG_RW, | |
| 410 | &dyn_ack_lifetime, 0, "Lifetime of dyn. rules for acks"); | |
| 411 | SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, CTLFLAG_RW, | |
| 412 | &dyn_syn_lifetime, 0, "Lifetime of dyn. rules for syn"); | |
| 997a0e9a SZ |
413 | SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, |
| 414 | CTLTYPE_INT | CTLFLAG_RW, &dyn_fin_lifetime, 0, ipfw_sysctl_dyn_fin, "I", | |
| 415 | "Lifetime of dyn. rules for fin"); | |
| 416 | SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, | |
| 417 | CTLTYPE_INT | CTLFLAG_RW, &dyn_rst_lifetime, 0, ipfw_sysctl_dyn_rst, "I", | |
| 418 | "Lifetime of dyn. rules for rst"); | |
| 984263bc MD |
419 | SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, CTLFLAG_RW, |
| 420 | &dyn_udp_lifetime, 0, "Lifetime of dyn. rules for UDP"); | |
| 421 | SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, CTLFLAG_RW, | |
| 422 | &dyn_short_lifetime, 0, "Lifetime of dyn. rules for other situations"); | |
| 423 | SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, CTLFLAG_RW, | |
| 424 | &dyn_keepalive, 0, "Enable keepalives for dyn. rules"); | |
| 425 | ||
| 984263bc | 426 | static ip_fw_chk_t ipfw_chk; |
| ac5988d6 | 427 | static void ipfw_tick(void *); |
| 984263bc | 428 | |
| 84a3e25a SZ |
429 | static __inline int |
| 430 | ipfw_free_rule(struct ip_fw *rule) | |
| 431 | { | |
| ca12e259 | 432 | KASSERT(rule->cpuid == mycpuid, ("rule freed on cpu%d\n", mycpuid)); |
| 84a3e25a | 433 | KASSERT(rule->refcnt > 0, ("invalid refcnt %u\n", rule->refcnt)); |
| ca12e259 SZ |
434 | rule->refcnt--; |
| 435 | if (rule->refcnt == 0) { | |
| 84a3e25a SZ |
436 | kfree(rule, M_IPFW); |
| 437 | return 1; | |
| 438 | } | |
| 439 | return 0; | |
| 440 | } | |
| 441 | ||
| 442 | static void | |
| 443 | ipfw_unref_rule(void *priv) | |
| 444 | { | |
| 445 | ipfw_free_rule(priv); | |
| 446 | #ifdef KLD_MODULE | |
| 447 | atomic_subtract_int(&ipfw_refcnt, 1); | |
| 448 | #endif | |
| 449 | } | |
| 450 | ||
| 451 | static __inline void | |
| 452 | ipfw_ref_rule(struct ip_fw *rule) | |
| 453 | { | |
| ca12e259 | 454 | KASSERT(rule->cpuid == mycpuid, ("rule used on cpu%d\n", mycpuid)); |
| 84a3e25a SZ |
455 | #ifdef KLD_MODULE |
| 456 | atomic_add_int(&ipfw_refcnt, 1); | |
| 457 | #endif | |
| ca12e259 | 458 | rule->refcnt++; |
| 84a3e25a | 459 | } |
| 984263bc MD |
460 | |
| 461 | /* | |
| 462 | * This macro maps an ip pointer into a layer3 header pointer of type T | |
| 463 | */ | |
| a998c492 | 464 | #define L3HDR(T, ip) ((T *)((uint32_t *)(ip) + (ip)->ip_hl)) |
| 984263bc MD |
465 | |
| 466 | static __inline int | |
| 467 | icmptype_match(struct ip *ip, ipfw_insn_u32 *cmd) | |
| 468 | { | |
| 469 | int type = L3HDR(struct icmp,ip)->icmp_type; | |
| 470 | ||
| 368024c3 | 471 | return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1 << type))); |
| 984263bc MD |
472 | } |
| 473 | ||
| 368024c3 SZ |
474 | #define TT ((1 << ICMP_ECHO) | \ |
| 475 | (1 << ICMP_ROUTERSOLICIT) | \ | |
| 476 | (1 << ICMP_TSTAMP) | \ | |
| 477 | (1 << ICMP_IREQ) | \ | |
| 478 | (1 << ICMP_MASKREQ)) | |
| 984263bc MD |
479 | |
| 480 | static int | |
| 481 | is_icmp_query(struct ip *ip) | |
| 482 | { | |
| 483 | int type = L3HDR(struct icmp, ip)->icmp_type; | |
| 368024c3 SZ |
484 | |
| 485 | return (type <= ICMP_MAXTYPE && (TT & (1 << type))); | |
| 984263bc | 486 | } |
| 368024c3 | 487 | |
| 984263bc MD |
488 | #undef TT |
| 489 | ||
| 490 | /* | |
| 491 | * The following checks use two arrays of 8 or 16 bits to store the | |
| 492 | * bits that we want set or clear, respectively. They are in the | |
| 493 | * low and high half of cmd->arg1 or cmd->d[0]. | |
| 494 | * | |
| 495 | * We scan options and store the bits we find set. We succeed if | |
| 496 | * | |
| 497 | * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear | |
| 498 | * | |
| 499 | * The code is sometimes optimized not to store additional variables. | |
| 500 | */ | |
| 501 | ||
| 502 | static int | |
| a998c492 | 503 | flags_match(ipfw_insn *cmd, uint8_t bits) |
| 984263bc MD |
504 | { |
| 505 | u_char want_clear; | |
| 506 | bits = ~bits; | |
| 507 | ||
| 368024c3 | 508 | if (((cmd->arg1 & 0xff) & bits) != 0) |
| 984263bc | 509 | return 0; /* some bits we want set were clear */ |
| 368024c3 | 510 | |
| 984263bc | 511 | want_clear = (cmd->arg1 >> 8) & 0xff; |
| 368024c3 | 512 | if ((want_clear & bits) != want_clear) |
| 984263bc MD |
513 | return 0; /* some bits we want clear were set */ |
| 514 | return 1; | |
| 515 | } | |
| 516 | ||
| 517 | static int | |
| 518 | ipopts_match(struct ip *ip, ipfw_insn *cmd) | |
| 519 | { | |
| 520 | int optlen, bits = 0; | |
| 521 | u_char *cp = (u_char *)(ip + 1); | |
| 368024c3 | 522 | int x = (ip->ip_hl << 2) - sizeof(struct ip); |
| 984263bc MD |
523 | |
| 524 | for (; x > 0; x -= optlen, cp += optlen) { | |
| 525 | int opt = cp[IPOPT_OPTVAL]; | |
| 526 | ||
| 527 | if (opt == IPOPT_EOL) | |
| 528 | break; | |
| 368024c3 SZ |
529 | |
| 530 | if (opt == IPOPT_NOP) { | |
| 984263bc | 531 | optlen = 1; |
| 368024c3 | 532 | } else { |
| 984263bc MD |
533 | optlen = cp[IPOPT_OLEN]; |
| 534 | if (optlen <= 0 || optlen > x) | |
| 535 | return 0; /* invalid or truncated */ | |
| 536 | } | |
| 984263bc | 537 | |
| 368024c3 | 538 | switch (opt) { |
| 984263bc MD |
539 | case IPOPT_LSRR: |
| 540 | bits |= IP_FW_IPOPT_LSRR; | |
| 541 | break; | |
| 542 | ||
| 543 | case IPOPT_SSRR: | |
| 544 | bits |= IP_FW_IPOPT_SSRR; | |
| 545 | break; | |
| 546 | ||
| 547 | case IPOPT_RR: | |
| 548 | bits |= IP_FW_IPOPT_RR; | |
| 549 | break; | |
| 550 | ||
| 551 | case IPOPT_TS: | |
| 552 | bits |= IP_FW_IPOPT_TS; | |
| 553 | break; | |
| 368024c3 SZ |
554 | |
| 555 | default: | |
| 556 | break; | |
| 984263bc MD |
557 | } |
| 558 | } | |
| 559 | return (flags_match(cmd, bits)); | |
| 560 | } | |
| 561 | ||
| 562 | static int | |
| 563 | tcpopts_match(struct ip *ip, ipfw_insn *cmd) | |
| 564 | { | |
| 565 | int optlen, bits = 0; | |
| 566 | struct tcphdr *tcp = L3HDR(struct tcphdr,ip); | |
| 567 | u_char *cp = (u_char *)(tcp + 1); | |
| 568 | int x = (tcp->th_off << 2) - sizeof(struct tcphdr); | |
| 569 | ||
| 570 | for (; x > 0; x -= optlen, cp += optlen) { | |
| 571 | int opt = cp[0]; | |
| 368024c3 | 572 | |
| 984263bc MD |
573 | if (opt == TCPOPT_EOL) |
| 574 | break; | |
| 368024c3 SZ |
575 | |
| 576 | if (opt == TCPOPT_NOP) { | |
| 984263bc | 577 | optlen = 1; |
| 368024c3 | 578 | } else { |
| 984263bc MD |
579 | optlen = cp[1]; |
| 580 | if (optlen <= 0) | |
| 581 | break; | |
| 582 | } | |
| 583 | ||
| 584 | switch (opt) { | |
| 984263bc MD |
585 | case TCPOPT_MAXSEG: |
| 586 | bits |= IP_FW_TCPOPT_MSS; | |
| 587 | break; | |
| 588 | ||
| 589 | case TCPOPT_WINDOW: | |
| 590 | bits |= IP_FW_TCPOPT_WINDOW; | |
| 591 | break; | |
| 592 | ||
| 593 | case TCPOPT_SACK_PERMITTED: | |
| 594 | case TCPOPT_SACK: | |
| 595 | bits |= IP_FW_TCPOPT_SACK; | |
| 596 | break; | |
| 597 | ||
| 598 | case TCPOPT_TIMESTAMP: | |
| 599 | bits |= IP_FW_TCPOPT_TS; | |
| 600 | break; | |
| 601 | ||
| 602 | case TCPOPT_CC: | |
| 603 | case TCPOPT_CCNEW: | |
| 604 | case TCPOPT_CCECHO: | |
| 605 | bits |= IP_FW_TCPOPT_CC; | |
| 606 | break; | |
| 368024c3 SZ |
607 | |
| 608 | default: | |
| 609 | break; | |
| 984263bc MD |
610 | } |
| 611 | } | |
| 612 | return (flags_match(cmd, bits)); | |
| 613 | } | |
| 614 | ||
| 615 | static int | |
| 616 | iface_match(struct ifnet *ifp, ipfw_insn_if *cmd) | |
| 617 | { | |
| 618 | if (ifp == NULL) /* no iface with this packet, match fails */ | |
| 619 | return 0; | |
| 368024c3 | 620 | |
| 984263bc MD |
621 | /* Check by name or by IP address */ |
| 622 | if (cmd->name[0] != '\0') { /* match by name */ | |
| 984263bc | 623 | /* Check name */ |
| 3e4a09e7 | 624 | if (cmd->p.glob) { |
| e93690c2 | 625 | if (kfnmatch(cmd->name, ifp->if_xname, 0) == 0) |
| 3e4a09e7 MD |
626 | return(1); |
| 627 | } else { | |
| 628 | if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0) | |
| 629 | return(1); | |
| 630 | } | |
| 984263bc | 631 | } else { |
| b2632176 SZ |
632 | struct ifaddr_container *ifac; |
| 633 | ||
| 634 | TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { | |
| 635 | struct ifaddr *ia = ifac->ifa; | |
| 984263bc | 636 | |
| 984263bc MD |
637 | if (ia->ifa_addr == NULL) |
| 638 | continue; | |
| 639 | if (ia->ifa_addr->sa_family != AF_INET) | |
| 640 | continue; | |
| 641 | if (cmd->p.ip.s_addr == ((struct sockaddr_in *) | |
| 642 | (ia->ifa_addr))->sin_addr.s_addr) | |
| 643 | return(1); /* match */ | |
| 644 | } | |
| 645 | } | |
| 646 | return(0); /* no match, fail ... */ | |
| 647 | } | |
| 648 | ||
| 984263bc | 649 | #define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0 |
| 984263bc MD |
650 | |
| 651 | /* | |
| 652 | * We enter here when we have a rule with O_LOG. | |
| 653 | * XXX this function alone takes about 2Kbytes of code! | |
| 654 | */ | |
| 655 | static void | |
| 656 | ipfw_log(struct ip_fw *f, u_int hlen, struct ether_header *eh, | |
| 26ef90a3 | 657 | struct mbuf *m, struct ifnet *oif) |
| 984263bc MD |
658 | { |
| 659 | char *action; | |
| 660 | int limit_reached = 0; | |
| 661 | char action2[40], proto[48], fragment[28]; | |
| 662 | ||
| 663 | fragment[0] = '\0'; | |
| 664 | proto[0] = '\0'; | |
| 665 | ||
| 666 | if (f == NULL) { /* bogus pkt */ | |
| ca12e259 SZ |
667 | struct ipfw_context *ctx = ipfw_ctx[mycpuid]; |
| 668 | ||
| 669 | if (verbose_limit != 0 && | |
| 670 | ctx->ipfw_norule_counter >= verbose_limit) | |
| 984263bc | 671 | return; |
| ca12e259 SZ |
672 | ctx->ipfw_norule_counter++; |
| 673 | if (ctx->ipfw_norule_counter == verbose_limit) | |
| 984263bc MD |
674 | limit_reached = verbose_limit; |
| 675 | action = "Refuse"; | |
| 676 | } else { /* O_LOG is the first action, find the real one */ | |
| 677 | ipfw_insn *cmd = ACTION_PTR(f); | |
| 678 | ipfw_insn_log *l = (ipfw_insn_log *)cmd; | |
| 679 | ||
| 680 | if (l->max_log != 0 && l->log_left == 0) | |
| 681 | return; | |
| 682 | l->log_left--; | |
| 683 | if (l->log_left == 0) | |
| 684 | limit_reached = l->max_log; | |
| 685 | cmd += F_LEN(cmd); /* point to first action */ | |
| 686 | if (cmd->opcode == O_PROB) | |
| 687 | cmd += F_LEN(cmd); | |
| 688 | ||
| 689 | action = action2; | |
| 690 | switch (cmd->opcode) { | |
| 691 | case O_DENY: | |
| 692 | action = "Deny"; | |
| 693 | break; | |
| 694 | ||
| 695 | case O_REJECT: | |
| 26ef90a3 | 696 | if (cmd->arg1==ICMP_REJECT_RST) { |
| 984263bc | 697 | action = "Reset"; |
| 26ef90a3 | 698 | } else if (cmd->arg1==ICMP_UNREACH_HOST) { |
| 984263bc | 699 | action = "Reject"; |
| 26ef90a3 | 700 | } else { |
| f8c7a42d | 701 | ksnprintf(SNPARGS(action2, 0), "Unreach %d", |
| 26ef90a3 SZ |
702 | cmd->arg1); |
| 703 | } | |
| 984263bc MD |
704 | break; |
| 705 | ||
| 706 | case O_ACCEPT: | |
| 707 | action = "Accept"; | |
| 708 | break; | |
| 26ef90a3 | 709 | |
| 984263bc MD |
710 | case O_COUNT: |
| 711 | action = "Count"; | |
| 712 | break; | |
| 26ef90a3 | 713 | |
| 984263bc | 714 | case O_DIVERT: |
| 26ef90a3 | 715 | ksnprintf(SNPARGS(action2, 0), "Divert %d", cmd->arg1); |
| 984263bc | 716 | break; |
| 26ef90a3 | 717 | |
| 984263bc | 718 | case O_TEE: |
| 26ef90a3 | 719 | ksnprintf(SNPARGS(action2, 0), "Tee %d", cmd->arg1); |
| 984263bc | 720 | break; |
| 26ef90a3 | 721 | |
| 984263bc | 722 | case O_SKIPTO: |
| 26ef90a3 | 723 | ksnprintf(SNPARGS(action2, 0), "SkipTo %d", cmd->arg1); |
| 984263bc | 724 | break; |
| 26ef90a3 | 725 | |
| 984263bc | 726 | case O_PIPE: |
| 26ef90a3 | 727 | ksnprintf(SNPARGS(action2, 0), "Pipe %d", cmd->arg1); |
| 984263bc | 728 | break; |
| 26ef90a3 | 729 | |
| 984263bc | 730 | case O_QUEUE: |
| 26ef90a3 | 731 | ksnprintf(SNPARGS(action2, 0), "Queue %d", cmd->arg1); |
| 984263bc | 732 | break; |
| 26ef90a3 SZ |
733 | |
| 734 | case O_FORWARD_IP: | |
| 735 | { | |
| 736 | ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd; | |
| 737 | int len; | |
| 738 | ||
| 739 | len = ksnprintf(SNPARGS(action2, 0), | |
| 740 | "Forward to %s", | |
| 741 | inet_ntoa(sa->sa.sin_addr)); | |
| 742 | if (sa->sa.sin_port) { | |
| 743 | ksnprintf(SNPARGS(action2, len), ":%d", | |
| 744 | sa->sa.sin_port); | |
| 745 | } | |
| 984263bc MD |
746 | } |
| 747 | break; | |
| 26ef90a3 | 748 | |
| 984263bc MD |
749 | default: |
| 750 | action = "UNKNOWN"; | |
| 751 | break; | |
| 752 | } | |
| 753 | } | |
| 754 | ||
| 755 | if (hlen == 0) { /* non-ip */ | |
| f8c7a42d | 756 | ksnprintf(SNPARGS(proto, 0), "MAC"); |
| 984263bc MD |
757 | } else { |
| 758 | struct ip *ip = mtod(m, struct ip *); | |
| 759 | /* these three are all aliases to the same thing */ | |
| 760 | struct icmp *const icmp = L3HDR(struct icmp, ip); | |
| 761 | struct tcphdr *const tcp = (struct tcphdr *)icmp; | |
| 762 | struct udphdr *const udp = (struct udphdr *)icmp; | |
| 763 | ||
| 764 | int ip_off, offset, ip_len; | |
| 984263bc MD |
765 | int len; |
| 766 | ||
| 767 | if (eh != NULL) { /* layer 2 packets are as on the wire */ | |
| 768 | ip_off = ntohs(ip->ip_off); | |
| 769 | ip_len = ntohs(ip->ip_len); | |
| 770 | } else { | |
| 771 | ip_off = ip->ip_off; | |
| 772 | ip_len = ip->ip_len; | |
| 773 | } | |
| 774 | offset = ip_off & IP_OFFMASK; | |
| 775 | switch (ip->ip_p) { | |
| 776 | case IPPROTO_TCP: | |
| f8c7a42d | 777 | len = ksnprintf(SNPARGS(proto, 0), "TCP %s", |
| 26ef90a3 SZ |
778 | inet_ntoa(ip->ip_src)); |
| 779 | if (offset == 0) { | |
| f8c7a42d | 780 | ksnprintf(SNPARGS(proto, len), ":%d %s:%d", |
| 26ef90a3 SZ |
781 | ntohs(tcp->th_sport), |
| 782 | inet_ntoa(ip->ip_dst), | |
| 783 | ntohs(tcp->th_dport)); | |
| 784 | } else { | |
| f8c7a42d | 785 | ksnprintf(SNPARGS(proto, len), " %s", |
| 26ef90a3 SZ |
786 | inet_ntoa(ip->ip_dst)); |
| 787 | } | |
| 984263bc MD |
788 | break; |
| 789 | ||
| 790 | case IPPROTO_UDP: | |
| f8c7a42d | 791 | len = ksnprintf(SNPARGS(proto, 0), "UDP %s", |
| 26ef90a3 SZ |
792 | inet_ntoa(ip->ip_src)); |
| 793 | if (offset == 0) { | |
| f8c7a42d | 794 | ksnprintf(SNPARGS(proto, len), ":%d %s:%d", |
| 26ef90a3 SZ |
795 | ntohs(udp->uh_sport), |
| 796 | inet_ntoa(ip->ip_dst), | |
| 797 | ntohs(udp->uh_dport)); | |
| 798 | } else { | |
| f8c7a42d | 799 | ksnprintf(SNPARGS(proto, len), " %s", |
| 26ef90a3 SZ |
800 | inet_ntoa(ip->ip_dst)); |
| 801 | } | |
| 984263bc MD |
802 | break; |
| 803 | ||
| 804 | case IPPROTO_ICMP: | |
| 26ef90a3 | 805 | if (offset == 0) { |
| f8c7a42d | 806 | len = ksnprintf(SNPARGS(proto, 0), |
| 26ef90a3 SZ |
807 | "ICMP:%u.%u ", |
| 808 | icmp->icmp_type, | |
| 809 | icmp->icmp_code); | |
| 810 | } else { | |
| f8c7a42d | 811 | len = ksnprintf(SNPARGS(proto, 0), "ICMP "); |
| 26ef90a3 | 812 | } |
| f8c7a42d | 813 | len += ksnprintf(SNPARGS(proto, len), "%s", |
| 26ef90a3 | 814 | inet_ntoa(ip->ip_src)); |
| f8c7a42d | 815 | ksnprintf(SNPARGS(proto, len), " %s", |
| 26ef90a3 | 816 | inet_ntoa(ip->ip_dst)); |
| 984263bc MD |
817 | break; |
| 818 | ||
| 819 | default: | |
| f8c7a42d | 820 | len = ksnprintf(SNPARGS(proto, 0), "P:%d %s", ip->ip_p, |
| 26ef90a3 | 821 | inet_ntoa(ip->ip_src)); |
| f8c7a42d | 822 | ksnprintf(SNPARGS(proto, len), " %s", |
| 26ef90a3 | 823 | inet_ntoa(ip->ip_dst)); |
| 984263bc MD |
824 | break; |
| 825 | } | |
| 826 | ||
| 26ef90a3 | 827 | if (ip_off & (IP_MF | IP_OFFMASK)) { |
| f8c7a42d | 828 | ksnprintf(SNPARGS(fragment, 0), " (frag %d:%d@%d%s)", |
| 26ef90a3 SZ |
829 | ntohs(ip->ip_id), ip_len - (ip->ip_hl << 2), |
| 830 | offset << 3, (ip_off & IP_MF) ? "+" : ""); | |
| 831 | } | |
| 984263bc | 832 | } |
| 26ef90a3 SZ |
833 | |
| 834 | if (oif || m->m_pkthdr.rcvif) { | |
| 984263bc | 835 | log(LOG_SECURITY | LOG_INFO, |
| 3e4a09e7 | 836 | "ipfw: %d %s %s %s via %s%s\n", |
| 984263bc MD |
837 | f ? f->rulenum : -1, |
| 838 | action, proto, oif ? "out" : "in", | |
| 3e4a09e7 | 839 | oif ? oif->if_xname : m->m_pkthdr.rcvif->if_xname, |
| 984263bc | 840 | fragment); |
| 26ef90a3 | 841 | } else { |
| 984263bc MD |
842 | log(LOG_SECURITY | LOG_INFO, |
| 843 | "ipfw: %d %s %s [no if info]%s\n", | |
| 844 | f ? f->rulenum : -1, | |
| 845 | action, proto, fragment); | |
| 26ef90a3 SZ |
846 | } |
| 847 | ||
| 848 | if (limit_reached) { | |
| 984263bc MD |
849 | log(LOG_SECURITY | LOG_NOTICE, |
| 850 | "ipfw: limit %d reached on entry %d\n", | |
| 851 | limit_reached, f ? f->rulenum : -1); | |
| 26ef90a3 | 852 | } |
| 984263bc MD |
853 | } |
| 854 | ||
| f2c88f94 SZ |
855 | #undef SNPARGS |
| 856 | ||
| 984263bc MD |
857 | /* |
| 858 | * IMPORTANT: the hash function for dynamic rules must be commutative | |
| 859 | * in source and destination (ip,port), because rules are bidirectional | |
| 860 | * and we want to find both in the same bucket. | |
| 861 | */ | |
| 862 | static __inline int | |
| 863 | hash_packet(struct ipfw_flow_id *id) | |
| 864 | { | |
| a998c492 | 865 | uint32_t i; |
| 984263bc MD |
866 | |
| 867 | i = (id->dst_ip) ^ (id->src_ip) ^ (id->dst_port) ^ (id->src_port); | |
| 868 | i &= (curr_dyn_buckets - 1); | |
| 869 | return i; | |
| 870 | } | |
| 871 | ||
| 872 | /** | |
| 873 | * unlink a dynamic rule from a chain. prev is a pointer to | |
| 874 | * the previous one, q is a pointer to the rule to delete, | |
| 875 | * head is a pointer to the head of the queue. | |
| 876 | * Modifies q and potentially also head. | |
| 877 | */ | |
| e26039aa SZ |
878 | #define UNLINK_DYN_RULE(prev, head, q) \ |
| 879 | do { \ | |
| 984263bc MD |
880 | ipfw_dyn_rule *old_q = q; \ |
| 881 | \ | |
| 882 | /* remove a refcount to the parent */ \ | |
| 883 | if (q->dyn_type == O_LIMIT) \ | |
| 884 | q->parent->count--; \ | |
| 8d0865c8 SZ |
885 | DPRINTF("-- unlink entry 0x%08x %d -> 0x%08x %d, %d left\n", \ |
| 886 | q->id.src_ip, q->id.src_port, \ | |
| 887 | q->id.dst_ip, q->id.dst_port, dyn_count - 1); \ | |
| 984263bc MD |
888 | if (prev != NULL) \ |
| 889 | prev->next = q = q->next; \ | |
| 890 | else \ | |
| 891 | head = q = q->next; \ | |
| b78533e2 | 892 | KASSERT(dyn_count > 0, ("invalid dyn count %u\n", dyn_count)); \ |
| 984263bc | 893 | dyn_count--; \ |
| e26039aa SZ |
894 | kfree(old_q, M_IPFW); \ |
| 895 | } while (0) | |
| 984263bc | 896 | |
| e26039aa | 897 | #define TIME_LEQ(a, b) ((int)((a) - (b)) <= 0) |
| 984263bc MD |
898 | |
| 899 | /** | |
| 900 | * Remove dynamic rules pointing to "rule", or all of them if rule == NULL. | |
| 901 | * | |
| 902 | * If keep_me == NULL, rules are deleted even if not expired, | |
| 903 | * otherwise only expired rules are removed. | |
| 904 | * | |
| 905 | * The value of the second parameter is also used to point to identify | |
| 906 | * a rule we absolutely do not want to remove (e.g. because we are | |
| 907 | * holding a reference to it -- this is the case with O_LIMIT_PARENT | |
| 908 | * rules). The pointer is only used for comparison, so any non-null | |
| 909 | * value will do. | |
| 910 | */ | |
| 911 | static void | |
| 997a0e9a | 912 | remove_dyn_rule_locked(struct ip_fw *rule, ipfw_dyn_rule *keep_me) |
| 984263bc | 913 | { |
| 997a0e9a | 914 | static uint32_t last_remove = 0; /* XXX */ |
| 984263bc | 915 | |
| e26039aa | 916 | #define FORCE (keep_me == NULL) |
| 984263bc MD |
917 | |
| 918 | ipfw_dyn_rule *prev, *q; | |
| c31665e4 | 919 | int i, pass = 0, max_pass = 0, unlinked = 0; |
| 984263bc MD |
920 | |
| 921 | if (ipfw_dyn_v == NULL || dyn_count == 0) | |
| 922 | return; | |
| 923 | /* do not expire more than once per second, it is useless */ | |
| 924 | if (!FORCE && last_remove == time_second) | |
| 925 | return; | |
| 926 | last_remove = time_second; | |
| 927 | ||
| 928 | /* | |
| 929 | * because O_LIMIT refer to parent rules, during the first pass only | |
| 930 | * remove child and mark any pending LIMIT_PARENT, and remove | |
| 931 | * them in a second pass. | |
| 932 | */ | |
| 933 | next_pass: | |
| 0dbcbe32 SZ |
934 | for (i = 0; i < curr_dyn_buckets; i++) { |
| 935 | for (prev = NULL, q = ipfw_dyn_v[i]; q;) { | |
| 984263bc MD |
936 | /* |
| 937 | * Logic can become complex here, so we split tests. | |
| 938 | */ | |
| 939 | if (q == keep_me) | |
| 940 | goto next; | |
| ca12e259 | 941 | if (rule != NULL && rule->stub != q->stub) |
| 984263bc MD |
942 | goto next; /* not the one we are looking for */ |
| 943 | if (q->dyn_type == O_LIMIT_PARENT) { | |
| 944 | /* | |
| 945 | * handle parent in the second pass, | |
| 946 | * record we need one. | |
| 947 | */ | |
| 948 | max_pass = 1; | |
| 949 | if (pass == 0) | |
| 950 | goto next; | |
| 0dbcbe32 | 951 | if (FORCE && q->count != 0) { |
| 984263bc | 952 | /* XXX should not happen! */ |
| 0dbcbe32 SZ |
953 | kprintf("OUCH! cannot remove rule, " |
| 954 | "count %d\n", q->count); | |
| 984263bc MD |
955 | } |
| 956 | } else { | |
| 0dbcbe32 | 957 | if (!FORCE && !TIME_LEQ(q->expire, time_second)) |
| 984263bc MD |
958 | goto next; |
| 959 | } | |
| c31665e4 | 960 | unlinked = 1; |
| 984263bc MD |
961 | UNLINK_DYN_RULE(prev, ipfw_dyn_v[i], q); |
| 962 | continue; | |
| 963 | next: | |
| 0dbcbe32 SZ |
964 | prev = q; |
| 965 | q = q->next; | |
| 984263bc MD |
966 | } |
| 967 | } | |
| 968 | if (pass++ < max_pass) | |
| 969 | goto next_pass; | |
| e26039aa | 970 | |
| c31665e4 SZ |
971 | if (unlinked) |
| 972 | ++dyn_buckets_gen; | |
| 973 | ||
| e26039aa | 974 | #undef FORCE |
| 984263bc MD |
975 | } |
| 976 | ||
| 984263bc MD |
977 | /** |
| 978 | * lookup a dynamic rule. | |
| 979 | */ | |
| 980 | static ipfw_dyn_rule * | |
| 981 | lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction, | |
| 0dbcbe32 | 982 | struct tcphdr *tcp) |
| 984263bc MD |
983 | { |
| 984 | /* | |
| 985 | * stateful ipfw extensions. | |
| 986 | * Lookup into dynamic session queue | |
| 987 | */ | |
| 988 | #define MATCH_REVERSE 0 | |
| 989 | #define MATCH_FORWARD 1 | |
| 990 | #define MATCH_NONE 2 | |
| 991 | #define MATCH_UNKNOWN 3 | |
| 997a0e9a | 992 | int i, dir = MATCH_NONE; |
| 984263bc MD |
993 | ipfw_dyn_rule *prev, *q=NULL; |
| 994 | ||
| 995 | if (ipfw_dyn_v == NULL) | |
| 996 | goto done; /* not found */ | |
| 0dbcbe32 SZ |
997 | |
| 998 | i = hash_packet(pkt); | |
| 999 | for (prev = NULL, q = ipfw_dyn_v[i]; q != NULL;) { | |
| 984263bc MD |
1000 | if (q->dyn_type == O_LIMIT_PARENT) |
| 1001 | goto next; | |
| 0dbcbe32 | 1002 | |
| 997a0e9a SZ |
1003 | if (TIME_LEQ(q->expire, time_second)) { |
| 1004 | /* | |
| 1005 | * Entry expired; skip. | |
| 1006 | * Let ipfw_tick() take care of it | |
| 1007 | */ | |
| 1008 | goto next; | |
| 984263bc | 1009 | } |
| 997a0e9a | 1010 | |
| 0dbcbe32 | 1011 | if (pkt->proto == q->id.proto) { |
| 984263bc MD |
1012 | if (pkt->src_ip == q->id.src_ip && |
| 1013 | pkt->dst_ip == q->id.dst_ip && | |
| 1014 | pkt->src_port == q->id.src_port && | |
| 0dbcbe32 | 1015 | pkt->dst_port == q->id.dst_port) { |
| 984263bc MD |
1016 | dir = MATCH_FORWARD; |
| 1017 | break; | |
| 1018 | } | |
| 1019 | if (pkt->src_ip == q->id.dst_ip && | |
| 1020 | pkt->dst_ip == q->id.src_ip && | |
| 1021 | pkt->src_port == q->id.dst_port && | |
| 0dbcbe32 | 1022 | pkt->dst_port == q->id.src_port) { |
| 984263bc MD |
1023 | dir = MATCH_REVERSE; |
| 1024 | break; | |
| 1025 | } | |
| 1026 | } | |
| 1027 | next: | |
| 1028 | prev = q; | |
| 1029 | q = q->next; | |
| 1030 | } | |
| 1031 | if (q == NULL) | |
| 1032 | goto done; /* q = NULL, not found */ | |
| 1033 | ||
| 984263bc MD |
1034 | if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */ |
| 1035 | u_char flags = pkt->flags & (TH_FIN|TH_SYN|TH_RST); | |
| 1036 | ||
| 1037 | #define BOTH_SYN (TH_SYN | (TH_SYN << 8)) | |
| 1038 | #define BOTH_FIN (TH_FIN | (TH_FIN << 8)) | |
| 0dbcbe32 | 1039 | |
| 984263bc MD |
1040 | q->state |= (dir == MATCH_FORWARD ) ? flags : (flags << 8); |
| 1041 | switch (q->state) { | |
| 1042 | case TH_SYN: /* opening */ | |
| 1043 | q->expire = time_second + dyn_syn_lifetime; | |
| 1044 | break; | |
| 1045 | ||
| 1046 | case BOTH_SYN: /* move to established */ | |
| 1047 | case BOTH_SYN | TH_FIN : /* one side tries to close */ | |
| 1048 | case BOTH_SYN | (TH_FIN << 8) : | |
| 1049 | if (tcp) { | |
| 0dbcbe32 SZ |
1050 | uint32_t ack = ntohl(tcp->th_ack); |
| 1051 | ||
| 1052 | #define _SEQ_GE(a, b) ((int)(a) - (int)(b) >= 0) | |
| 1053 | ||
| 1054 | if (dir == MATCH_FORWARD) { | |
| 1055 | if (q->ack_fwd == 0 || | |
| 1056 | _SEQ_GE(ack, q->ack_fwd)) | |
| 1057 | q->ack_fwd = ack; | |
| 1058 | else /* ignore out-of-sequence */ | |
| 1059 | break; | |
| 1060 | } else { | |
| 1061 | if (q->ack_rev == 0 || | |
| 1062 | _SEQ_GE(ack, q->ack_rev)) | |
| 1063 | q->ack_rev = ack; | |
| 1064 | else /* ignore out-of-sequence */ | |
| 1065 | break; | |
| 984263bc | 1066 | } |
| 0dbcbe32 | 1067 | #undef _SEQ_GE |
| 984263bc MD |
1068 | } |
| 1069 | q->expire = time_second + dyn_ack_lifetime; | |
| 1070 | break; | |
| 1071 | ||
| 1072 | case BOTH_SYN | BOTH_FIN: /* both sides closed */ | |
| 997a0e9a | 1073 | KKASSERT(dyn_fin_lifetime < dyn_keepalive_period); |
| 984263bc MD |
1074 | q->expire = time_second + dyn_fin_lifetime; |
| 1075 | break; | |
| 1076 | ||
| 1077 | default: | |
| 1078 | #if 0 | |
| 1079 | /* | |
| 1080 | * reset or some invalid combination, but can also | |
| 1081 | * occur if we use keep-state the wrong way. | |
| 1082 | */ | |
| 0dbcbe32 | 1083 | if ((q->state & ((TH_RST << 8) | TH_RST)) == 0) |
| 4b1cf444 | 1084 | kprintf("invalid state: 0x%x\n", q->state); |
| 984263bc | 1085 | #endif |
| 997a0e9a | 1086 | KKASSERT(dyn_rst_lifetime < dyn_keepalive_period); |
| 984263bc MD |
1087 | q->expire = time_second + dyn_rst_lifetime; |
| 1088 | break; | |
| 1089 | } | |
| 1090 | } else if (pkt->proto == IPPROTO_UDP) { | |
| 1091 | q->expire = time_second + dyn_udp_lifetime; | |
| 1092 | } else { | |
| 1093 | /* other protocols */ | |
| 1094 | q->expire = time_second + dyn_short_lifetime; | |
| 1095 | } | |
| 1096 | done: | |
| 1097 | if (match_direction) | |
| 1098 | *match_direction = dir; | |
| 1099 | return q; | |
| 1100 | } | |
| 1101 | ||
| 997a0e9a SZ |
1102 | static struct ip_fw * |
| 1103 | lookup_rule(struct ipfw_flow_id *pkt, int *match_direction, struct tcphdr *tcp, | |
| 1104 | uint16_t len, int *deny) | |
| 1105 | { | |
| 1106 | struct ip_fw *rule = NULL; | |
| 1107 | ipfw_dyn_rule *q; | |
| ca12e259 | 1108 | struct ipfw_context *ctx = ipfw_ctx[mycpuid]; |
| 997a0e9a SZ |
1109 | uint32_t gen; |
| 1110 | ||
| 1111 | *deny = 0; | |
| ca12e259 | 1112 | gen = ctx->ipfw_gen; |
| 997a0e9a SZ |
1113 | |
| 1114 | lockmgr(&dyn_lock, LK_SHARED); | |
| 1115 | ||
| ca12e259 | 1116 | if (ctx->ipfw_gen != gen) { |
| 997a0e9a SZ |
1117 | /* |
| 1118 | * Static rules had been change when we were waiting | |
| 1119 | * for the dynamic hash table lock; deny this packet, | |
| 1120 | * since it is _not_ known whether it is safe to keep | |
| 1121 | * iterating the static rules. | |
| 1122 | */ | |
| 1123 | *deny = 1; | |
| 1124 | goto back; | |
| 1125 | } | |
| 1126 | ||
| 1127 | q = lookup_dyn_rule(pkt, match_direction, tcp); | |
| 1128 | if (q == NULL) { | |
| 1129 | rule = NULL; | |
| 1130 | } else { | |
| ca12e259 SZ |
1131 | rule = q->stub->rule[mycpuid]; |
| 1132 | KKASSERT(rule->stub == q->stub && rule->cpuid == mycpuid); | |
| 997a0e9a SZ |
1133 | |
| 1134 | /* XXX */ | |
| 1135 | q->pcnt++; | |
| 1136 | q->bcnt += len; | |
| 1137 | } | |
| 1138 | back: | |
| 1139 | lockmgr(&dyn_lock, LK_RELEASE); | |
| 1140 | return rule; | |
| 1141 | } | |
| 1142 | ||
| 984263bc MD |
1143 | static void |
| 1144 | realloc_dynamic_table(void) | |
| 1145 | { | |
| af162095 SZ |
1146 | ipfw_dyn_rule **old_dyn_v; |
| 1147 | uint32_t old_curr_dyn_buckets; | |
| 1148 | ||
| 997a0e9a SZ |
1149 | KASSERT(dyn_buckets <= 65536 && (dyn_buckets & (dyn_buckets - 1)) == 0, |
| 1150 | ("invalid dyn_buckets %d\n", dyn_buckets)); | |
| 0dbcbe32 | 1151 | |
| af162095 SZ |
1152 | /* Save the current buckets array for later error recovery */ |
| 1153 | old_dyn_v = ipfw_dyn_v; | |
| 1154 | old_curr_dyn_buckets = curr_dyn_buckets; | |
| 0dbcbe32 | 1155 | |
| af162095 | 1156 | curr_dyn_buckets = dyn_buckets; |
| 984263bc | 1157 | for (;;) { |
| 77652cad | 1158 | ipfw_dyn_v = kmalloc(curr_dyn_buckets * sizeof(ipfw_dyn_rule *), |
| af162095 | 1159 | M_IPFW, M_NOWAIT | M_ZERO); |
| 984263bc MD |
1160 | if (ipfw_dyn_v != NULL || curr_dyn_buckets <= 2) |
| 1161 | break; | |
| af162095 | 1162 | |
| 984263bc | 1163 | curr_dyn_buckets /= 2; |
| af162095 SZ |
1164 | if (curr_dyn_buckets <= old_curr_dyn_buckets && |
| 1165 | old_dyn_v != NULL) { | |
| 1166 | /* | |
| 1167 | * Don't try allocating smaller buckets array, reuse | |
| 1168 | * the old one, which alreay contains enough buckets | |
| 1169 | */ | |
| 1170 | break; | |
| 1171 | } | |
| 1172 | } | |
| 1173 | ||
| 1174 | if (ipfw_dyn_v != NULL) { | |
| 1175 | if (old_dyn_v != NULL) | |
| 1176 | kfree(old_dyn_v, M_IPFW); | |
| 1177 | } else { | |
| 1178 | /* Allocation failed, restore old buckets array */ | |
| 1179 | ipfw_dyn_v = old_dyn_v; | |
| 1180 | curr_dyn_buckets = old_curr_dyn_buckets; | |
| 984263bc | 1181 | } |
| c31665e4 SZ |
1182 | |
| 1183 | if (ipfw_dyn_v != NULL) | |
| 1184 | ++dyn_buckets_gen; | |
| 984263bc MD |
1185 | } |
| 1186 | ||
| 1187 | /** | |
| 1188 | * Install state of type 'type' for a dynamic session. | |
| 1189 | * The hash table contains two type of rules: | |
| 1190 | * - regular rules (O_KEEP_STATE) | |
| 1191 | * - rules for sessions with limited number of sess per user | |
| 1192 | * (O_LIMIT). When they are created, the parent is | |
| 1193 | * increased by 1, and decreased on delete. In this case, | |
| 1194 | * the third parameter is the parent rule and not the chain. | |
| 1195 | * - "parent" rules for the above (O_LIMIT_PARENT). | |
| 1196 | */ | |
| 1197 | static ipfw_dyn_rule * | |
| a998c492 | 1198 | add_dyn_rule(struct ipfw_flow_id *id, uint8_t dyn_type, struct ip_fw *rule) |
| 984263bc MD |
1199 | { |
| 1200 | ipfw_dyn_rule *r; | |
| 1201 | int i; | |
| 1202 | ||
| 1203 | if (ipfw_dyn_v == NULL || | |
| 1204 | (dyn_count == 0 && dyn_buckets != curr_dyn_buckets)) { | |
| 1205 | realloc_dynamic_table(); | |
| 1206 | if (ipfw_dyn_v == NULL) | |
| 1207 | return NULL; /* failed ! */ | |
| 1208 | } | |
| 1209 | i = hash_packet(id); | |
| 1210 | ||
| af162095 | 1211 | r = kmalloc(sizeof(*r), M_IPFW, M_NOWAIT | M_ZERO); |
| 984263bc | 1212 | if (r == NULL) { |
| 4b1cf444 | 1213 | kprintf ("sorry cannot allocate state\n"); |
| 984263bc MD |
1214 | return NULL; |
| 1215 | } | |
| 1216 | ||
| 1217 | /* increase refcount on parent, and set pointer */ | |
| 1218 | if (dyn_type == O_LIMIT) { | |
| 1219 | ipfw_dyn_rule *parent = (ipfw_dyn_rule *)rule; | |
| 0dbcbe32 SZ |
1220 | |
| 1221 | if (parent->dyn_type != O_LIMIT_PARENT) | |
| 984263bc MD |
1222 | panic("invalid parent"); |
| 1223 | parent->count++; | |
| 1224 | r->parent = parent; | |
| ca12e259 SZ |
1225 | rule = parent->stub->rule[mycpuid]; |
| 1226 | KKASSERT(rule->stub == parent->stub); | |
| 984263bc | 1227 | } |
| ca12e259 | 1228 | KKASSERT(rule->cpuid == mycpuid && rule->stub != NULL); |
| 984263bc MD |
1229 | |
| 1230 | r->id = *id; | |
| 1231 | r->expire = time_second + dyn_syn_lifetime; | |
| ca12e259 | 1232 | r->stub = rule->stub; |
| 984263bc MD |
1233 | r->dyn_type = dyn_type; |
| 1234 | r->pcnt = r->bcnt = 0; | |
| 1235 | r->count = 0; | |
| 1236 | ||
| 1237 | r->bucket = i; | |
| 1238 | r->next = ipfw_dyn_v[i]; | |
| 1239 | ipfw_dyn_v[i] = r; | |
| 1240 | dyn_count++; | |
| c31665e4 | 1241 | dyn_buckets_gen++; |
| 8d0865c8 SZ |
1242 | DPRINTF("-- add dyn entry ty %d 0x%08x %d -> 0x%08x %d, total %d\n", |
| 1243 | dyn_type, | |
| 1244 | r->id.src_ip, r->id.src_port, | |
| 1245 | r->id.dst_ip, r->id.dst_port, dyn_count); | |
| 984263bc MD |
1246 | return r; |
| 1247 | } | |
| 1248 | ||
| 1249 | /** | |
| 1250 | * lookup dynamic parent rule using pkt and rule as search keys. | |
| 1251 | * If the lookup fails, then install one. | |
| 1252 | */ | |
| 1253 | static ipfw_dyn_rule * | |
| 1254 | lookup_dyn_parent(struct ipfw_flow_id *pkt, struct ip_fw *rule) | |
| 1255 | { | |
| 1256 | ipfw_dyn_rule *q; | |
| 1257 | int i; | |
| 1258 | ||
| 1259 | if (ipfw_dyn_v) { | |
| 0dbcbe32 SZ |
1260 | i = hash_packet(pkt); |
| 1261 | for (q = ipfw_dyn_v[i]; q != NULL; q = q->next) { | |
| 984263bc | 1262 | if (q->dyn_type == O_LIMIT_PARENT && |
| ca12e259 | 1263 | rule->stub == q->stub && |
| 984263bc MD |
1264 | pkt->proto == q->id.proto && |
| 1265 | pkt->src_ip == q->id.src_ip && | |
| 1266 | pkt->dst_ip == q->id.dst_ip && | |
| 1267 | pkt->src_port == q->id.src_port && | |
| 1268 | pkt->dst_port == q->id.dst_port) { | |
| 1269 | q->expire = time_second + dyn_short_lifetime; | |
| 8d0865c8 | 1270 | DPRINTF("lookup_dyn_parent found 0x%p\n", q); |
| 984263bc MD |
1271 | return q; |
| 1272 | } | |
| 0dbcbe32 | 1273 | } |
| 984263bc MD |
1274 | } |
| 1275 | return add_dyn_rule(pkt, O_LIMIT_PARENT, rule); | |
| 1276 | } | |
| 1277 | ||
| 1278 | /** | |
| 1279 | * Install dynamic state for rule type cmd->o.opcode | |
| 1280 | * | |
| 1281 | * Returns 1 (failure) if state is not installed because of errors or because | |
| 1282 | * session limitations are enforced. | |
| 1283 | */ | |
| 1284 | static int | |
| 997a0e9a SZ |
1285 | install_state_locked(struct ip_fw *rule, ipfw_insn_limit *cmd, |
| 1286 | struct ip_fw_args *args) | |
| 984263bc | 1287 | { |
| 997a0e9a | 1288 | static int last_log; /* XXX */ |
| 984263bc MD |
1289 | |
| 1290 | ipfw_dyn_rule *q; | |
| 1291 | ||
| 8d0865c8 SZ |
1292 | DPRINTF("-- install state type %d 0x%08x %u -> 0x%08x %u\n", |
| 1293 | cmd->o.opcode, | |
| 1294 | args->f_id.src_ip, args->f_id.src_port, | |
| 1295 | args->f_id.dst_ip, args->f_id.dst_port); | |
| 984263bc MD |
1296 | |
| 1297 | q = lookup_dyn_rule(&args->f_id, NULL, NULL); | |
| 984263bc MD |
1298 | if (q != NULL) { /* should never occur */ |
| 1299 | if (last_log != time_second) { | |
| 1300 | last_log = time_second; | |
| 4b1cf444 | 1301 | kprintf(" install_state: entry already present, done\n"); |
| 984263bc MD |
1302 | } |
| 1303 | return 0; | |
| 1304 | } | |
| 1305 | ||
| 0dbcbe32 | 1306 | if (dyn_count >= dyn_max) { |
| 984263bc MD |
1307 | /* |
| 1308 | * Run out of slots, try to remove any expired rule. | |
| 1309 | */ | |
| 997a0e9a SZ |
1310 | remove_dyn_rule_locked(NULL, (ipfw_dyn_rule *)1); |
| 1311 | if (dyn_count >= dyn_max) { | |
| 1312 | if (last_log != time_second) { | |
| 1313 | last_log = time_second; | |
| 1314 | kprintf("install_state: " | |
| 1315 | "Too many dynamic rules\n"); | |
| 1316 | } | |
| 1317 | return 1; /* cannot install, notify caller */ | |
| 984263bc | 1318 | } |
| 984263bc MD |
1319 | } |
| 1320 | ||
| 1321 | switch (cmd->o.opcode) { | |
| 1322 | case O_KEEP_STATE: /* bidir rule */ | |
| af162095 SZ |
1323 | if (add_dyn_rule(&args->f_id, O_KEEP_STATE, rule) == NULL) |
| 1324 | return 1; | |
| 984263bc MD |
1325 | break; |
| 1326 | ||
| 1327 | case O_LIMIT: /* limit number of sessions */ | |
| 0dbcbe32 SZ |
1328 | { |
| 1329 | uint16_t limit_mask = cmd->limit_mask; | |
| 1330 | struct ipfw_flow_id id; | |
| 1331 | ipfw_dyn_rule *parent; | |
| 1332 | ||
| 8d0865c8 SZ |
1333 | DPRINTF("installing dyn-limit rule %d\n", |
| 1334 | cmd->conn_limit); | |
| 0dbcbe32 SZ |
1335 | |
| 1336 | id.dst_ip = id.src_ip = 0; | |
| 1337 | id.dst_port = id.src_port = 0; | |
| 1338 | id.proto = args->f_id.proto; | |
| 1339 | ||
| 1340 | if (limit_mask & DYN_SRC_ADDR) | |
| 1341 | id.src_ip = args->f_id.src_ip; | |
| 1342 | if (limit_mask & DYN_DST_ADDR) | |
| 1343 | id.dst_ip = args->f_id.dst_ip; | |
| 1344 | if (limit_mask & DYN_SRC_PORT) | |
| 1345 | id.src_port = args->f_id.src_port; | |
| 1346 | if (limit_mask & DYN_DST_PORT) | |
| 1347 | id.dst_port = args->f_id.dst_port; | |
| 1348 | ||
| 1349 | parent = lookup_dyn_parent(&id, rule); | |
| 1350 | if (parent == NULL) { | |
| 1351 | kprintf("add parent failed\n"); | |
| 1352 | return 1; | |
| 1353 | } | |
| 1354 | ||
| 984263bc | 1355 | if (parent->count >= cmd->conn_limit) { |
| 0dbcbe32 SZ |
1356 | /* |
| 1357 | * See if we can remove some expired rule. | |
| 1358 | */ | |
| 997a0e9a | 1359 | remove_dyn_rule_locked(rule, parent); |
| 0dbcbe32 SZ |
1360 | if (parent->count >= cmd->conn_limit) { |
| 1361 | if (fw_verbose && | |
| 1362 | last_log != time_second) { | |
| 1363 | last_log = time_second; | |
| 1364 | log(LOG_SECURITY | LOG_DEBUG, | |
| 1365 | "drop session, " | |
| 1366 | "too many entries\n"); | |
| 1367 | } | |
| 1368 | return 1; | |
| 984263bc | 1369 | } |
| 984263bc | 1370 | } |
| af162095 SZ |
1371 | if (add_dyn_rule(&args->f_id, O_LIMIT, |
| 1372 | (struct ip_fw *)parent) == NULL) | |
| 1373 | return 1; | |
| 984263bc | 1374 | } |
| 984263bc MD |
1375 | break; |
| 1376 | default: | |
| 4b1cf444 | 1377 | kprintf("unknown dynamic rule type %u\n", cmd->o.opcode); |
| 984263bc MD |
1378 | return 1; |
| 1379 | } | |
| 1380 | lookup_dyn_rule(&args->f_id, NULL, NULL); /* XXX just set lifetime */ | |
| 1381 | return 0; | |
| 1382 | } | |
| 1383 | ||
| 997a0e9a SZ |
1384 | static int |
| 1385 | install_state(struct ip_fw *rule, ipfw_insn_limit *cmd, | |
| 1386 | struct ip_fw_args *args, int *deny) | |
| 1387 | { | |
| ca12e259 | 1388 | struct ipfw_context *ctx = ipfw_ctx[mycpuid]; |
| 997a0e9a SZ |
1389 | uint32_t gen; |
| 1390 | int ret = 0; | |
| 1391 | ||
| 1392 | *deny = 0; | |
| ca12e259 | 1393 | gen = ctx->ipfw_gen; |
| 997a0e9a SZ |
1394 | |
| 1395 | lockmgr(&dyn_lock, LK_EXCLUSIVE); | |
| ca12e259 | 1396 | if (ctx->ipfw_gen != gen) { |
| 997a0e9a SZ |
1397 | /* See the comment in lookup_rule() */ |
| 1398 | *deny = 1; | |
| 1399 | } else { | |
| 1400 | ret = install_state_locked(rule, cmd, args); | |
| 1401 | } | |
| 1402 | lockmgr(&dyn_lock, LK_RELEASE); | |
| 1403 | ||
| 1404 | return ret; | |
| 1405 | } | |
| 1406 | ||
| 984263bc MD |
1407 | /* |
| 1408 | * Transmit a TCP packet, containing either a RST or a keepalive. | |
| 1409 | * When flags & TH_RST, we are sending a RST packet, because of a | |
| 1410 | * "reset" action matched the packet. | |
| 1411 | * Otherwise we are sending a keepalive, and flags & TH_ | |
| 1412 | */ | |
| 1413 | static void | |
| a998c492 | 1414 | send_pkt(struct ipfw_flow_id *id, uint32_t seq, uint32_t ack, int flags) |
| 984263bc MD |
1415 | { |
| 1416 | struct mbuf *m; | |
| 1417 | struct ip *ip; | |
| 1418 | struct tcphdr *tcp; | |
| 1419 | struct route sro; /* fake route */ | |
| 1420 | ||
| 74f1caca | 1421 | MGETHDR(m, MB_DONTWAIT, MT_HEADER); |
| 3f944588 | 1422 | if (m == NULL) |
| 984263bc | 1423 | return; |
| 6aabd1a4 | 1424 | m->m_pkthdr.rcvif = NULL; |
| 984263bc MD |
1425 | m->m_pkthdr.len = m->m_len = sizeof(struct ip) + sizeof(struct tcphdr); |
| 1426 | m->m_data += max_linkhdr; | |
| 1427 | ||
| 1428 | ip = mtod(m, struct ip *); | |
| 1429 | bzero(ip, m->m_len); | |
| 1430 | tcp = (struct tcphdr *)(ip + 1); /* no IP options */ | |
| 1431 | ip->ip_p = IPPROTO_TCP; | |
| 1432 | tcp->th_off = 5; | |
| 6aabd1a4 | 1433 | |
| 984263bc MD |
1434 | /* |
| 1435 | * Assume we are sending a RST (or a keepalive in the reverse | |
| 1436 | * direction), swap src and destination addresses and ports. | |
| 1437 | */ | |
| 1438 | ip->ip_src.s_addr = htonl(id->dst_ip); | |
| 1439 | ip->ip_dst.s_addr = htonl(id->src_ip); | |
| 1440 | tcp->th_sport = htons(id->dst_port); | |
| 1441 | tcp->th_dport = htons(id->src_port); | |
| 1442 | if (flags & TH_RST) { /* we are sending a RST */ | |
| 1443 | if (flags & TH_ACK) { | |
| 1444 | tcp->th_seq = htonl(ack); | |
| 1445 | tcp->th_ack = htonl(0); | |
| 1446 | tcp->th_flags = TH_RST; | |
| 1447 | } else { | |
| 1448 | if (flags & TH_SYN) | |
| 1449 | seq++; | |
| 1450 | tcp->th_seq = htonl(0); | |
| 1451 | tcp->th_ack = htonl(seq); | |
| 1452 | tcp->th_flags = TH_RST | TH_ACK; | |
| 1453 | } | |
| 1454 | } else { | |
| 1455 | /* | |
| 1456 | * We are sending a keepalive. flags & TH_SYN determines | |
| 1457 | * the direction, forward if set, reverse if clear. | |
| 1458 | * NOTE: seq and ack are always assumed to be correct | |
| 1459 | * as set by the caller. This may be confusing... | |
| 1460 | */ | |
| 1461 | if (flags & TH_SYN) { | |
| 1462 | /* | |
| 1463 | * we have to rewrite the correct addresses! | |
| 1464 | */ | |
| 1465 | ip->ip_dst.s_addr = htonl(id->dst_ip); | |
| 1466 | ip->ip_src.s_addr = htonl(id->src_ip); | |
| 1467 | tcp->th_dport = htons(id->dst_port); | |
| 1468 | tcp->th_sport = htons(id->src_port); | |
| 1469 | } | |
| 1470 | tcp->th_seq = htonl(seq); | |
| 1471 | tcp->th_ack = htonl(ack); | |
| 1472 | tcp->th_flags = TH_ACK; | |
| 1473 | } | |
| 6aabd1a4 | 1474 | |
| 984263bc MD |
1475 | /* |
| 1476 | * set ip_len to the payload size so we can compute | |
| 1477 | * the tcp checksum on the pseudoheader | |
| 1478 | * XXX check this, could save a couple of words ? | |
| 1479 | */ | |
| 1480 | ip->ip_len = htons(sizeof(struct tcphdr)); | |
| 1481 | tcp->th_sum = in_cksum(m, m->m_pkthdr.len); | |
| 6aabd1a4 | 1482 | |
| 984263bc MD |
1483 | /* |
| 1484 | * now fill fields left out earlier | |
| 1485 | */ | |
| 1486 | ip->ip_ttl = ip_defttl; | |
| 1487 | ip->ip_len = m->m_pkthdr.len; | |
| 6aabd1a4 SZ |
1488 | |
| 1489 | bzero(&sro, sizeof(sro)); | |
| 984263bc | 1490 | ip_rtaddr(ip->ip_dst, &sro); |
| 6aabd1a4 | 1491 | |
| f2c2ec09 | 1492 | m->m_pkthdr.fw_flags |= IPFW_MBUF_GENERATED; |
| 984263bc MD |
1493 | ip_output(m, NULL, &sro, 0, NULL, NULL); |
| 1494 | if (sro.ro_rt) | |
| 1495 | RTFREE(sro.ro_rt); | |
| 1496 | } | |
| 1497 | ||
| 1498 | /* | |
| 1499 | * sends a reject message, consuming the mbuf passed as an argument. | |
| 1500 | */ | |
| 1501 | static void | |
| 1502 | send_reject(struct ip_fw_args *args, int code, int offset, int ip_len) | |
| 1503 | { | |
| 984263bc MD |
1504 | if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */ |
| 1505 | /* We need the IP header in host order for icmp_error(). */ | |
| 1506 | if (args->eh != NULL) { | |
| 1507 | struct ip *ip = mtod(args->m, struct ip *); | |
| 48fabf32 | 1508 | |
| 984263bc MD |
1509 | ip->ip_len = ntohs(ip->ip_len); |
| 1510 | ip->ip_off = ntohs(ip->ip_off); | |
| 1511 | } | |
| 1512 | icmp_error(args->m, ICMP_UNREACH, code, 0L, 0); | |
| 1513 | } else if (offset == 0 && args->f_id.proto == IPPROTO_TCP) { | |
| 1514 | struct tcphdr *const tcp = | |
| 1515 | L3HDR(struct tcphdr, mtod(args->m, struct ip *)); | |
| 48fabf32 SZ |
1516 | |
| 1517 | if ((tcp->th_flags & TH_RST) == 0) { | |
| 1518 | send_pkt(&args->f_id, ntohl(tcp->th_seq), | |
| 1519 | ntohl(tcp->th_ack), tcp->th_flags | TH_RST); | |
| 1520 | } | |
| 984263bc | 1521 | m_freem(args->m); |
| 48fabf32 | 1522 | } else { |
| 984263bc | 1523 | m_freem(args->m); |
| 48fabf32 | 1524 | } |
| 984263bc MD |
1525 | args->m = NULL; |
| 1526 | } | |
| 1527 | ||
| 1528 | /** | |
| 1529 | * | |
| 1530 | * Given an ip_fw *, lookup_next_rule will return a pointer | |
| 1531 | * to the next rule, which can be either the jump | |
| 1532 | * target (for skipto instructions) or the next one in the list (in | |
| 1533 | * all other cases including a missing jump target). | |
| 1534 | * The result is also written in the "next_rule" field of the rule. | |
| 1535 | * Backward jumps are not allowed, so start looking from the next | |
| 1536 | * rule... | |
| 1537 | * | |
| 1538 | * This never returns NULL -- in case we do not have an exact match, | |
| 1539 | * the next rule is returned. When the ruleset is changed, | |
| 1540 | * pointers are flushed so we are always correct. | |
| 1541 | */ | |
| 1542 | ||
| 1543 | static struct ip_fw * | |
| 1544 | lookup_next_rule(struct ip_fw *me) | |
| 1545 | { | |
| 1546 | struct ip_fw *rule = NULL; | |
| 1547 | ipfw_insn *cmd; | |
| 1548 | ||
| 1549 | /* look for action, in case it is a skipto */ | |
| 1550 | cmd = ACTION_PTR(me); | |
| 1551 | if (cmd->opcode == O_LOG) | |
| 1552 | cmd += F_LEN(cmd); | |
| 48fabf32 SZ |
1553 | if (cmd->opcode == O_SKIPTO) { |
| 1554 | for (rule = me->next; rule; rule = rule->next) { | |
| 984263bc MD |
1555 | if (rule->rulenum >= cmd->arg1) |
| 1556 | break; | |
| 48fabf32 SZ |
1557 | } |
| 1558 | } | |
| 984263bc MD |
1559 | if (rule == NULL) /* failure or not a skipto */ |
| 1560 | rule = me->next; | |
| 1561 | me->next_rule = rule; | |
| 1562 | return rule; | |
| 1563 | } | |
| 1564 | ||
| e5f2be89 SZ |
1565 | static int |
| 1566 | _ipfw_match_uid(const struct ipfw_flow_id *fid, struct ifnet *oif, | |
| 1567 | enum ipfw_opcodes opcode, uid_t uid) | |
| 1568 | { | |
| 1569 | struct in_addr src_ip, dst_ip; | |
| 1570 | struct inpcbinfo *pi; | |
| 1571 | int wildcard; | |
| 1572 | struct inpcb *pcb; | |
| 1573 | ||
| 1574 | if (fid->proto == IPPROTO_TCP) { | |
| 1575 | wildcard = 0; | |
| 1576 | pi = &tcbinfo[mycpuid]; | |
| 1577 | } else if (fid->proto == IPPROTO_UDP) { | |
| 1578 | wildcard = 1; | |
| 1579 | pi = &udbinfo; | |
| 1580 | } else { | |
| 1581 | return 0; | |
| 1582 | } | |
| 1583 | ||
| 1584 | /* | |
| 1585 | * Values in 'fid' are in host byte order | |
| 1586 | */ | |
| 1587 | dst_ip.s_addr = htonl(fid->dst_ip); | |
| 1588 | src_ip.s_addr = htonl(fid->src_ip); | |
| 1589 | if (oif) { | |
| 1590 | pcb = in_pcblookup_hash(pi, | |
| 1591 | dst_ip, htons(fid->dst_port), | |
| 1592 | src_ip, htons(fid->src_port), | |
| 1593 | wildcard, oif); | |
| 1594 | } else { | |
| 1595 | pcb = in_pcblookup_hash(pi, | |
| 1596 | src_ip, htons(fid->src_port), | |
| 1597 | dst_ip, htons(fid->dst_port), | |
| 1598 | wildcard, NULL); | |
| 1599 | } | |
| 1600 | if (pcb == NULL || pcb->inp_socket == NULL) | |
| 1601 | return 0; | |
| 1602 | ||
| 1603 | if (opcode == O_UID) { | |
| 1604 | #define socheckuid(a,b) ((a)->so_cred->cr_uid != (b)) | |
| 1605 | return !socheckuid(pcb->inp_socket, uid); | |
| 1606 | #undef socheckuid | |
| 1607 | } else { | |
| 1608 | return groupmember(uid, pcb->inp_socket->so_cred); | |
| 1609 | } | |
| 1610 | } | |
| 1611 | ||
| 1612 | static int | |
| 1613 | ipfw_match_uid(const struct ipfw_flow_id *fid, struct ifnet *oif, | |
| e3f9aece | 1614 | enum ipfw_opcodes opcode, uid_t uid, int *deny) |
| e5f2be89 | 1615 | { |
| e3f9aece SZ |
1616 | struct ipfw_context *ctx = ipfw_ctx[mycpuid]; |
| 1617 | uint32_t gen; | |
| 1618 | int match = 0; | |
| 1619 | ||
| 1620 | *deny = 0; | |
| 1621 | gen = ctx->ipfw_gen; | |
| e5f2be89 SZ |
1622 | |
| 1623 | get_mplock(); | |
| e3f9aece SZ |
1624 | if (gen != ctx->ipfw_gen) { |
| 1625 | /* See the comment in lookup_rule() */ | |
| 1626 | *deny = 1; | |
| 1627 | } else { | |
| 1628 | match = _ipfw_match_uid(fid, oif, opcode, uid); | |
| 1629 | } | |
| e5f2be89 SZ |
1630 | rel_mplock(); |
| 1631 | return match; | |
| 1632 | } | |
| 1633 | ||
| 984263bc MD |
1634 | /* |
| 1635 | * The main check routine for the firewall. | |
| 1636 | * | |
| 1637 | * All arguments are in args so we can modify them and return them | |
| 1638 | * back to the caller. | |
| 1639 | * | |
| 1640 | * Parameters: | |
| 1641 | * | |
| 1642 | * args->m (in/out) The packet; we set to NULL when/if we nuke it. | |
| 1643 | * Starts with the IP header. | |
| 1644 | * args->eh (in) Mac header if present, or NULL for layer3 packet. | |
| 1645 | * args->oif Outgoing interface, or NULL if packet is incoming. | |
| 1646 | * The incoming interface is in the mbuf. (in) | |
| 984263bc MD |
1647 | * |
| 1648 | * args->rule Pointer to the last matching rule (in/out) | |
| 984263bc MD |
1649 | * args->f_id Addresses grabbed from the packet (out) |
| 1650 | * | |
| 1651 | * Return value: | |
| 1652 | * | |
| a237ddbd SZ |
1653 | * If the packet was denied/rejected and has been dropped, *m is equal |
| 1654 | * to NULL upon return. | |
| 984263bc | 1655 | * |
| a237ddbd SZ |
1656 | * IP_FW_DENY the packet must be dropped. |
| 1657 | * IP_FW_PASS The packet is to be accepted and routed normally. | |
| 1658 | * IP_FW_DIVERT Divert the packet to port (args->cookie) | |
| 1659 | * IP_FW_TEE Tee the packet to port (args->cookie) | |
| 1660 | * IP_FW_DUMMYNET Send the packet to pipe/queue (args->cookie) | |
| 984263bc MD |
1661 | */ |
| 1662 | ||
| 1663 | static int | |
| 1664 | ipfw_chk(struct ip_fw_args *args) | |
| 1665 | { | |
| 1666 | /* | |
| 1667 | * Local variables hold state during the processing of a packet. | |
| 1668 | * | |
| 1669 | * IMPORTANT NOTE: to speed up the processing of rules, there | |
| 1670 | * are some assumption on the values of the variables, which | |
| 1671 | * are documented here. Should you change them, please check | |
| 1672 | * the implementation of the various instructions to make sure | |
| 1673 | * that they still work. | |
| 1674 | * | |
| 1675 | * args->eh The MAC header. It is non-null for a layer2 | |
| 1676 | * packet, it is NULL for a layer-3 packet. | |
| 1677 | * | |
| 1678 | * m | args->m Pointer to the mbuf, as received from the caller. | |
| 1679 | * It may change if ipfw_chk() does an m_pullup, or if it | |
| 1680 | * consumes the packet because it calls send_reject(). | |
| 1681 | * XXX This has to change, so that ipfw_chk() never modifies | |
| 1682 | * or consumes the buffer. | |
| 1683 | * ip is simply an alias of the value of m, and it is kept | |
| 1684 | * in sync with it (the packet is supposed to start with | |
| 1685 | * the ip header). | |
| 1686 | */ | |
| 1687 | struct mbuf *m = args->m; | |
| 1688 | struct ip *ip = mtod(m, struct ip *); | |
| 1689 | ||
| 1690 | /* | |
| 1691 | * oif | args->oif If NULL, ipfw_chk has been called on the | |
| a8d45119 | 1692 | * inbound path (ether_input, ip_input). |
| 984263bc MD |
1693 | * If non-NULL, ipfw_chk has been called on the outbound path |
| 1694 | * (ether_output, ip_output). | |
| 1695 | */ | |
| 1696 | struct ifnet *oif = args->oif; | |
| 1697 | ||
| 1698 | struct ip_fw *f = NULL; /* matching rule */ | |
| 29b27cb7 | 1699 | int retval = IP_FW_PASS; |
| e5ecc832 | 1700 | struct m_tag *mtag; |
| 68edaf54 | 1701 | struct divert_info *divinfo; |
| 984263bc MD |
1702 | |
| 1703 | /* | |
| 1704 | * hlen The length of the IPv4 header. | |
| 1705 | * hlen >0 means we have an IPv4 packet. | |
| 1706 | */ | |
| 1707 | u_int hlen = 0; /* hlen >0 means we have an IP pkt */ | |
| 1708 | ||
| 1709 | /* | |
| 1710 | * offset The offset of a fragment. offset != 0 means that | |
| 1711 | * we have a fragment at this offset of an IPv4 packet. | |
| 1712 | * offset == 0 means that (if this is an IPv4 packet) | |
| 1713 | * this is the first or only fragment. | |
| 1714 | */ | |
| 1715 | u_short offset = 0; | |
| 1716 | ||
| 1717 | /* | |
| 1718 | * Local copies of addresses. They are only valid if we have | |
| 1719 | * an IP packet. | |
| 1720 | * | |
| 1721 | * proto The protocol. Set to 0 for non-ip packets, | |
| 1722 | * or to the protocol read from the packet otherwise. | |
| 1723 | * proto != 0 means that we have an IPv4 packet. | |
| 1724 | * | |
| 1725 | * src_port, dst_port port numbers, in HOST format. Only | |
| 1726 | * valid for TCP and UDP packets. | |
| 1727 | * | |
| 1728 | * src_ip, dst_ip ip addresses, in NETWORK format. | |
| 1729 | * Only valid for IPv4 packets. | |
| 1730 | */ | |
| a998c492 SZ |
1731 | uint8_t proto; |
| 1732 | uint16_t src_port = 0, dst_port = 0; /* NOTE: host format */ | |
| 984263bc | 1733 | struct in_addr src_ip, dst_ip; /* NOTE: network format */ |
| 50050193 | 1734 | uint16_t ip_len = 0; |
| 99216103 SZ |
1735 | |
| 1736 | /* | |
| 1737 | * dyn_dir = MATCH_UNKNOWN when rules unchecked, | |
| 1738 | * MATCH_NONE when checked and not matched (dyn_f = NULL), | |
| 1739 | * MATCH_FORWARD or MATCH_REVERSE otherwise (dyn_f != NULL) | |
| 1740 | */ | |
| 984263bc | 1741 | int dyn_dir = MATCH_UNKNOWN; |
| 99216103 | 1742 | struct ip_fw *dyn_f = NULL; |
| ca12e259 | 1743 | struct ipfw_context *ctx = ipfw_ctx[mycpuid]; |
| 984263bc | 1744 | |
| f2c2ec09 | 1745 | if (m->m_pkthdr.fw_flags & IPFW_MBUF_GENERATED) |
| 29b27cb7 | 1746 | return IP_FW_PASS; /* accept */ |
| 984263bc MD |
1747 | |
| 1748 | if (args->eh == NULL || /* layer 3 packet */ | |
| 50050193 SZ |
1749 | (m->m_pkthdr.len >= sizeof(struct ip) && |
| 1750 | ntohs(args->eh->ether_type) == ETHERTYPE_IP)) | |
| 1751 | hlen = ip->ip_hl << 2; | |
| 984263bc MD |
1752 | |
| 1753 | /* | |
| 1754 | * Collect parameters into local variables for faster matching. | |
| 1755 | */ | |
| 1756 | if (hlen == 0) { /* do not grab addresses for non-ip pkts */ | |
| 1757 | proto = args->f_id.proto = 0; /* mark f_id invalid */ | |
| 1758 | goto after_ip_checks; | |
| 1759 | } | |
| 1760 | ||
| 1761 | proto = args->f_id.proto = ip->ip_p; | |
| 1762 | src_ip = ip->ip_src; | |
| 1763 | dst_ip = ip->ip_dst; | |
| 1764 | if (args->eh != NULL) { /* layer 2 packets are as on the wire */ | |
| 1765 | offset = ntohs(ip->ip_off) & IP_OFFMASK; | |
| 1766 | ip_len = ntohs(ip->ip_len); | |
| 1767 | } else { | |
| 1768 | offset = ip->ip_off & IP_OFFMASK; | |
| 1769 | ip_len = ip->ip_len; | |
| 1770 | } | |
| 1771 | ||
| 50050193 SZ |
1772 | #define PULLUP_TO(len) \ |
| 1773 | do { \ | |
| 1774 | if (m->m_len < (len)) { \ | |
| 1775 | args->m = m = m_pullup(m, (len));\ | |
| 1776 | if (m == NULL) \ | |
| 1777 | goto pullup_failed; \ | |
| 1778 | ip = mtod(m, struct ip *); \ | |
| 1779 | } \ | |
| 1780 | } while (0) | |
| 984263bc MD |
1781 | |
| 1782 | if (offset == 0) { | |
| 1783 | switch (proto) { | |
| 1784 | case IPPROTO_TCP: | |
| 50050193 SZ |
1785 | { |
| 1786 | struct tcphdr *tcp; | |
| 1787 | ||
| 1788 | PULLUP_TO(hlen + sizeof(struct tcphdr)); | |
| 1789 | tcp = L3HDR(struct tcphdr, ip); | |
| 1790 | dst_port = tcp->th_dport; | |
| 1791 | src_port = tcp->th_sport; | |
| 1792 | args->f_id.flags = tcp->th_flags; | |
| 984263bc MD |
1793 | } |
| 1794 | break; | |
| 1795 | ||
| 1796 | case IPPROTO_UDP: | |
| 50050193 SZ |
1797 | { |
| 1798 | struct udphdr *udp; | |
| 984263bc | 1799 | |
| 50050193 SZ |
1800 | PULLUP_TO(hlen + sizeof(struct udphdr)); |
| 1801 | udp = L3HDR(struct udphdr, ip); | |
| 1802 | dst_port = udp->uh_dport; | |
| 1803 | src_port = udp->uh_sport; | |
| 984263bc MD |
1804 | } |
| 1805 | break; | |
| 1806 | ||
| 1807 | case IPPROTO_ICMP: | |
| 1808 | PULLUP_TO(hlen + 4); /* type, code and checksum. */ | |
| 1809 | args->f_id.flags = L3HDR(struct icmp, ip)->icmp_type; | |
| 1810 | break; | |
| 1811 | ||
| 1812 | default: | |
| 1813 | break; | |
| 1814 | } | |
| 984263bc MD |
1815 | } |
| 1816 | ||
| 50050193 SZ |
1817 | #undef PULLUP_TO |
| 1818 | ||
| 984263bc MD |
1819 | args->f_id.src_ip = ntohl(src_ip.s_addr); |
| 1820 | args->f_id.dst_ip = ntohl(dst_ip.s_addr); | |
| 1821 | args->f_id.src_port = src_port = ntohs(src_port); | |
| 1822 | args->f_id.dst_port = dst_port = ntohs(dst_port); | |
| 1823 | ||
| 1824 | after_ip_checks: | |
| 1825 | if (args->rule) { | |
| 1826 | /* | |
| 1827 | * Packet has already been tagged. Look for the next rule | |
| 1828 | * to restart processing. | |
| 1829 | * | |
| 1830 | * If fw_one_pass != 0 then just accept it. | |
| 1831 | * XXX should not happen here, but optimized out in | |
| 1832 | * the caller. | |
| 1833 | */ | |
| 1834 | if (fw_one_pass) | |
| 29b27cb7 | 1835 | return IP_FW_PASS; |
| 984263bc | 1836 | |
| ca12e259 SZ |
1837 | /* This rule is being/has been flushed */ |
| 1838 | if (ipfw_flushing) | |
| 29b27cb7 | 1839 | return IP_FW_DENY; |
| ca12e259 SZ |
1840 | |
| 1841 | KASSERT(args->rule->cpuid == mycpuid, | |
| 1842 | ("rule used on cpu%d\n", mycpuid)); | |
| 1843 | ||
| 84a3e25a SZ |
1844 | /* This rule was deleted */ |
| 1845 | if (args->rule->rule_flags & IPFW_RULE_F_INVALID) | |
| 29b27cb7 | 1846 | return IP_FW_DENY; |
| 84a3e25a | 1847 | |
| 984263bc MD |
1848 | f = args->rule->next_rule; |
| 1849 | if (f == NULL) | |
| 1850 | f = lookup_next_rule(args->rule); | |
| 1851 | } else { | |
| 1852 | /* | |
| 1853 | * Find the starting rule. It can be either the first | |
| 1854 | * one, or the one after divert_rule if asked so. | |
| 1855 | */ | |
| e5ecc832 JS |
1856 | int skipto; |
| 1857 | ||
| 1858 | mtag = m_tag_find(m, PACKET_TAG_IPFW_DIVERT, NULL); | |
| 68edaf54 SZ |
1859 | if (mtag != NULL) { |
| 1860 | divinfo = m_tag_data(mtag); | |
| 1861 | skipto = divinfo->skipto; | |
| 1862 | } else { | |
| e5ecc832 | 1863 | skipto = 0; |
| 68edaf54 | 1864 | } |
| 984263bc | 1865 | |
| ca12e259 | 1866 | f = ctx->ipfw_layer3_chain; |
| 984263bc | 1867 | if (args->eh == NULL && skipto != 0) { |
| ca12e259 SZ |
1868 | /* No skipto during rule flushing */ |
| 1869 | if (ipfw_flushing) | |
| 29b27cb7 | 1870 | return IP_FW_DENY; |
| ca12e259 | 1871 | |
| 984263bc | 1872 | if (skipto >= IPFW_DEFAULT_RULE) |
| 29b27cb7 | 1873 | return IP_FW_DENY; /* invalid */ |
| ca12e259 | 1874 | |
| 984263bc MD |
1875 | while (f && f->rulenum <= skipto) |
| 1876 | f = f->next; | |
| 1877 | if (f == NULL) /* drop packet */ | |
| 29b27cb7 | 1878 | return IP_FW_DENY; |
| ca12e259 SZ |
1879 | } else if (ipfw_flushing) { |
| 1880 | /* Rules are being flushed; skip to default rule */ | |
| 1881 | f = ctx->ipfw_default_rule; | |
| 984263bc MD |
1882 | } |
| 1883 | } | |
| e5ecc832 JS |
1884 | if ((mtag = m_tag_find(m, PACKET_TAG_IPFW_DIVERT, NULL)) != NULL) |
| 1885 | m_tag_delete(m, mtag); | |
| 984263bc MD |
1886 | |
| 1887 | /* | |
| 1888 | * Now scan the rules, and parse microinstructions for each rule. | |
| 1889 | */ | |
| 1890 | for (; f; f = f->next) { | |
| 1891 | int l, cmdlen; | |
| 1892 | ipfw_insn *cmd; | |
| 1893 | int skip_or; /* skip rest of OR block */ | |
| 1894 | ||
| 1895 | again: | |
| ca12e259 | 1896 | if (ctx->ipfw_set_disable & (1 << f->set)) |
| 984263bc MD |
1897 | continue; |
| 1898 | ||
| 1899 | skip_or = 0; | |
| 50050193 SZ |
1900 | for (l = f->cmd_len, cmd = f->cmd; l > 0; |
| 1901 | l -= cmdlen, cmd += cmdlen) { | |
| 997a0e9a | 1902 | int match, deny; |
| 984263bc MD |
1903 | |
| 1904 | /* | |
| 1905 | * check_body is a jump target used when we find a | |
| 1906 | * CHECK_STATE, and need to jump to the body of | |
| 1907 | * the target rule. | |
| 1908 | */ | |
| 1909 | ||
| 1910 | check_body: | |
| 1911 | cmdlen = F_LEN(cmd); | |
| 1912 | /* | |
| 1913 | * An OR block (insn_1 || .. || insn_n) has the | |
| 1914 | * F_OR bit set in all but the last instruction. | |
| 1915 | * The first match will set "skip_or", and cause | |
| 1916 | * the following instructions to be skipped until | |
| 1917 | * past the one with the F_OR bit clear. | |
| 1918 | */ | |
| 1919 | if (skip_or) { /* skip this instruction */ | |
| 1920 | if ((cmd->len & F_OR) == 0) | |
| 1921 | skip_or = 0; /* next one is good */ | |
| 1922 | continue; | |
| 1923 | } | |
| 1924 | match = 0; /* set to 1 if we succeed */ | |
| 1925 | ||
| 1926 | switch (cmd->opcode) { | |
| 1927 | /* | |
| 1928 | * The first set of opcodes compares the packet's | |
| 1929 | * fields with some pattern, setting 'match' if a | |
| 1930 | * match is found. At the end of the loop there is | |
| 1931 | * logic to deal with F_NOT and F_OR flags associated | |
| 1932 | * with the opcode. | |
| 1933 | */ | |
| 1934 | case O_NOP: | |
| 1935 | match = 1; | |
| 1936 | break; | |
| 1937 | ||
| 1938 | case O_FORWARD_MAC: | |
| 4b1cf444 | 1939 | kprintf("ipfw: opcode %d unimplemented\n", |
| 50050193 | 1940 | cmd->opcode); |
| 984263bc MD |
1941 | break; |
| 1942 | ||
| 1943 | case O_GID: | |
| 1944 | case O_UID: | |
| 1945 | /* | |
| 1946 | * We only check offset == 0 && proto != 0, | |
| 1947 | * as this ensures that we have an IPv4 | |
| 1948 | * packet with the ports info. | |
| 1949 | */ | |
| 1950 | if (offset!=0) | |
| 1951 | break; | |
| 50050193 | 1952 | |
| e5f2be89 SZ |
1953 | match = ipfw_match_uid(&args->f_id, oif, |
| 1954 | cmd->opcode, | |
| e3f9aece SZ |
1955 | (uid_t)((ipfw_insn_u32 *)cmd)->d[0], |
| 1956 | &deny); | |
| 1957 | if (deny) | |
| 1958 | return IP_FW_DENY; | |
| 984263bc MD |
1959 | break; |
| 1960 | ||
| 1961 | case O_RECV: | |
| 1962 | match = iface_match(m->m_pkthdr.rcvif, | |
| 1963 | (ipfw_insn_if *)cmd); | |
| 1964 | break; | |
| 1965 | ||
| 1966 | case O_XMIT: | |
| 1967 | match = iface_match(oif, (ipfw_insn_if *)cmd); | |
| 1968 | break; | |
| 1969 | ||
| 1970 | case O_VIA: | |
| 1971 | match = iface_match(oif ? oif : | |
| 1972 | m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd); | |
| 1973 | break; | |
| 1974 | ||
| 1975 | case O_MACADDR2: | |
| 1976 | if (args->eh != NULL) { /* have MAC header */ | |
| a998c492 | 1977 | uint32_t *want = (uint32_t *) |
| 984263bc | 1978 | ((ipfw_insn_mac *)cmd)->addr; |
| a998c492 | 1979 | uint32_t *mask = (uint32_t *) |
| 984263bc | 1980 | ((ipfw_insn_mac *)cmd)->mask; |
| a998c492 | 1981 | uint32_t *hdr = (uint32_t *)args->eh; |
| 984263bc MD |
1982 | |
| 1983 | match = | |
| 50050193 SZ |
1984 | (want[0] == (hdr[0] & mask[0]) && |
| 1985 | want[1] == (hdr[1] & mask[1]) && | |
| 1986 | want[2] == (hdr[2] & mask[2])); | |
| 984263bc MD |
1987 | } |
| 1988 | break; | |
| 1989 | ||
| 1990 | case O_MAC_TYPE: | |
| 1991 | if (args->eh != NULL) { | |
| a998c492 | 1992 | uint16_t t = |
| 984263bc | 1993 | ntohs(args->eh->ether_type); |
| a998c492 | 1994 | uint16_t *p = |
| 984263bc MD |
1995 | ((ipfw_insn_u16 *)cmd)->ports; |
| 1996 | int i; | |
| 1997 | ||
| 60f3eea1 SZ |
1998 | /* Special vlan handling */ |
| 1999 | if (m->m_flags & M_VLANTAG) | |
| 2000 | t = ETHERTYPE_VLAN; | |
| 2001 | ||
| 50050193 SZ |
2002 | for (i = cmdlen - 1; !match && i > 0; |
| 2003 | i--, p += 2) { | |
| 2004 | match = | |
| 2005 | (t >= p[0] && t <= p[1]); | |
| 2006 | } | |
| 984263bc MD |
2007 | } |
| 2008 | break; | |
| 2009 | ||
| 2010 | case O_FRAG: | |
| 2011 | match = (hlen > 0 && offset != 0); | |
| 2012 | break; | |
| 2013 | ||
| 2014 | case O_IN: /* "out" is "not in" */ | |
| 2015 | match = (oif == NULL); | |
| 2016 | break; | |
| 2017 | ||
| 2018 | case O_LAYER2: | |
| 2019 | match = (args->eh != NULL); | |
| 2020 | break; | |
| 2021 | ||
| 2022 | case O_PROTO: | |
| 2023 | /* | |
| 2024 | * We do not allow an arg of 0 so the | |
| 2025 | * check of "proto" only suffices. | |
| 2026 | */ | |
| 2027 | match = (proto == cmd->arg1); | |
| 2028 | break; | |
| 2029 | ||
| 2030 | case O_IP_SRC: | |
| 2031 | match = (hlen > 0 && | |
| 2032 | ((ipfw_insn_ip *)cmd)->addr.s_addr == | |
| 2033 | src_ip.s_addr); | |
| 2034 | break; | |
| 2035 | ||
| 2036 | case O_IP_SRC_MASK: | |
| 2037 | match = (hlen > 0 && | |
| 2038 | ((ipfw_insn_ip *)cmd)->addr.s_addr == | |
| 2039 | (src_ip.s_addr & | |
| 2040 | ((ipfw_insn_ip *)cmd)->mask.s_addr)); | |
| 2041 | break; | |
| 2042 | ||
| 2043 | case O_IP_SRC_ME: | |
| 2044 | if (hlen > 0) { | |
| 2045 | struct ifnet *tif; | |
| 2046 | ||
| f8983475 | 2047 | tif = INADDR_TO_IFP(&src_ip); |
| 984263bc MD |
2048 | match = (tif != NULL); |
| 2049 | } | |
| 2050 | break; | |
| 2051 | ||
| 2052 | case O_IP_DST_SET: | |
| 2053 | case O_IP_SRC_SET: | |
| 2054 | if (hlen > 0) { | |
| 3f944588 | 2055 | uint32_t *d = (uint32_t *)(cmd + 1); |
| a998c492 | 2056 | uint32_t addr = |
| 984263bc MD |
2057 | cmd->opcode == O_IP_DST_SET ? |
| 2058 | args->f_id.dst_ip : | |
| 2059 | args->f_id.src_ip; | |
| 2060 | ||
| 50050193 SZ |
2061 | if (addr < d[0]) |
| 2062 | break; | |
| 2063 | addr -= d[0]; /* subtract base */ | |
| 2064 | match = | |
| 2065 | (addr < cmd->arg1) && | |
| 2066 | (d[1 + (addr >> 5)] & | |
| 2067 | (1 << (addr & 0x1f))); | |
| 984263bc MD |
2068 | } |
| 2069 | break; | |
| 2070 | ||
| 2071 | case O_IP_DST: | |
| 2072 | match = (hlen > 0 && | |
| 2073 | ((ipfw_insn_ip *)cmd)->addr.s_addr == | |
| 2074 | dst_ip.s_addr); | |
| 2075 | break; | |
| 2076 | ||
| 2077 | case O_IP_DST_MASK: | |
| 2078 | match = (hlen > 0) && | |
| 2079 | (((ipfw_insn_ip *)cmd)->addr.s_addr == | |
| 2080 | (dst_ip.s_addr & | |
| 2081 | ((ipfw_insn_ip *)cmd)->mask.s_addr)); | |
| 2082 | break; | |
| 2083 | ||
| 2084 | case O_IP_DST_ME: | |
| 2085 | if (hlen > 0) { | |
| 2086 | struct ifnet *tif; | |
| 2087 | ||
| f8983475 | 2088 | tif = INADDR_TO_IFP(&dst_ip); |
| 984263bc MD |
2089 | match = (tif != NULL); |
| 2090 | } | |
| 2091 | break; | |
| 2092 | ||
| 2093 | case O_IP_SRCPORT: | |
| 2094 | case O_IP_DSTPORT: | |
| 2095 | /* | |
| 2096 | * offset == 0 && proto != 0 is enough | |
| 2097 | * to guarantee that we have an IPv4 | |
| 2098 | * packet with port info. | |
| 2099 | */ | |
| 2100 | if ((proto==IPPROTO_UDP || proto==IPPROTO_TCP) | |
| 2101 | && offset == 0) { | |
| a998c492 | 2102 | uint16_t x = |
| 984263bc MD |
2103 | (cmd->opcode == O_IP_SRCPORT) ? |
| 2104 | src_port : dst_port ; | |
| a998c492 | 2105 | uint16_t *p = |
| 984263bc MD |
2106 | ((ipfw_insn_u16 *)cmd)->ports; |
| 2107 | int i; | |
| 2108 | ||
| 50050193 SZ |
2109 | for (i = cmdlen - 1; !match && i > 0; |
| 2110 | i--, p += 2) { | |
| 2111 | match = | |
| 2112 | (x >= p[0] && x <= p[1]); | |
| 2113 | } | |
| 984263bc MD |
2114 | } |
| 2115 | break; | |
| 2116 | ||
| 2117 | case O_ICMPTYPE: | |
| 2118 | match = (offset == 0 && proto==IPPROTO_ICMP && | |
| 50050193 | 2119 | icmptype_match(ip, (ipfw_insn_u32 *)cmd)); |
| 984263bc MD |
2120 | break; |
| 2121 | ||
| 2122 | case O_IPOPT: | |
| 50050193 | 2123 | match = (hlen > 0 && ipopts_match(ip, cmd)); |
| 984263bc MD |
2124 | break; |
| 2125 | ||
| 2126 | case O_IPVER: | |
| 2127 | match = (hlen > 0 && cmd->arg1 == ip->ip_v); | |
| 2128 | break; | |
| 2129 | ||
| 2130 | case O_IPTTL: | |
| 2131 | match = (hlen > 0 && cmd->arg1 == ip->ip_ttl); | |
| 2132 | break; | |
| 2133 | ||
| 2134 | case O_IPID: | |
| 2135 | match = (hlen > 0 && | |
| 2136 | cmd->arg1 == ntohs(ip->ip_id)); | |
| 2137 | break; | |
| 2138 | ||
| 2139 | case O_IPLEN: | |
| 2140 | match = (hlen > 0 && cmd->arg1 == ip_len); | |
| 2141 | break; | |
| 2142 | ||
| 2143 | case O_IPPRECEDENCE: | |
| 2144 | match = (hlen > 0 && | |
| 50050193 | 2145 | (cmd->arg1 == (ip->ip_tos & 0xe0))); |
| 984263bc MD |
2146 | break; |
| 2147 | ||
| 2148 | case O_IPTOS: | |
| 2149 | match = (hlen > 0 && | |
| 2150 | flags_match(cmd, ip->ip_tos)); | |
| 2151 | break; | |
| 2152 | ||
| 2153 | case O_TCPFLAGS: | |
| 2154 | match = (proto == IPPROTO_TCP && offset == 0 && | |
| 2155 | flags_match(cmd, | |
| 2156 | L3HDR(struct tcphdr,ip)->th_flags)); | |
| 2157 | break; | |
| 2158 | ||
| 2159 | case O_TCPOPTS: | |
| 2160 | match = (proto == IPPROTO_TCP && offset == 0 && | |
| 2161 | tcpopts_match(ip, cmd)); | |
| 2162 | break; | |
| 2163 | ||
| 2164 | case O_TCPSEQ: | |
| 2165 | match = (proto == IPPROTO_TCP && offset == 0 && | |
| 2166 | ((ipfw_insn_u32 *)cmd)->d[0] == | |
| 2167 | L3HDR(struct tcphdr,ip)->th_seq); | |
| 2168 | break; | |
| 2169 | ||
| 2170 | case O_TCPACK: | |
| 2171 | match = (proto == IPPROTO_TCP && offset == 0 && | |
| 2172 | ((ipfw_insn_u32 *)cmd)->d[0] == | |
| 2173 | L3HDR(struct tcphdr,ip)->th_ack); | |
| 2174 | break; | |
| 2175 | ||
| 2176 | case O_TCPWIN: | |
| 2177 | match = (proto == IPPROTO_TCP && offset == 0 && | |
| 2178 | cmd->arg1 == | |
| 2179 | L3HDR(struct tcphdr,ip)->th_win); | |
| 2180 | break; | |
| 2181 | ||
| 2182 | case O_ESTAB: | |
| 2183 | /* reject packets which have SYN only */ | |
| 2184 | /* XXX should i also check for TH_ACK ? */ | |
| 2185 | match = (proto == IPPROTO_TCP && offset == 0 && | |
| 2186 | (L3HDR(struct tcphdr,ip)->th_flags & | |
| 2187 | (TH_RST | TH_ACK | TH_SYN)) != TH_SYN); | |
| 2188 | break; | |
| 2189 | ||
| 2190 | case O_LOG: | |
| 2191 | if (fw_verbose) | |
| 2192 | ipfw_log(f, hlen, args->eh, m, oif); | |
| 2193 | match = 1; | |
| 2194 | break; | |
| 2195 | ||
| 2196 | case O_PROB: | |
| cddfb7bb MD |
2197 | match = (krandom() < |
| 2198 | ((ipfw_insn_u32 *)cmd)->d[0]); | |
| 984263bc MD |
2199 | break; |
| 2200 | ||
| 2201 | /* | |
| 2202 | * The second set of opcodes represents 'actions', | |
| 2203 | * i.e. the terminal part of a rule once the packet | |
| 2204 | * matches all previous patterns. | |
| 2205 | * Typically there is only one action for each rule, | |
| 2206 | * and the opcode is stored at the end of the rule | |
| 2207 | * (but there are exceptions -- see below). | |
| 2208 | * | |
| 2209 | * In general, here we set retval and terminate the | |
| 2210 | * outer loop (would be a 'break 3' in some language, | |
| 2211 | * but we need to do a 'goto done'). | |
| 2212 | * | |
| 2213 | * Exceptions: | |
| 2214 | * O_COUNT and O_SKIPTO actions: | |
| 2215 | * instead of terminating, we jump to the next rule | |
| 2216 | * ('goto next_rule', equivalent to a 'break 2'), | |
| 2217 | * or to the SKIPTO target ('goto again' after | |
| 2218 | * having set f, cmd and l), respectively. | |
| 2219 | * | |
| 2220 | * O_LIMIT and O_KEEP_STATE: these opcodes are | |
| 2221 | * not real 'actions', and are stored right | |
| 2222 | * before the 'action' part of the rule. | |
| 2223 | * These opcodes try to install an entry in the | |
| 2224 | * state tables; if successful, we continue with | |
| 2225 | * the next opcode (match=1; break;), otherwise | |
| f5670523 SZ |
2226 | * the packet must be dropped ('goto done' after |
| 2227 | * setting retval). If static rules are changed | |
| 2228 | * during the state installation, the packet will | |
| 29b27cb7 SZ |
2229 | * be dropped and rule's stats will not beupdated |
| 2230 | * ('return IP_FW_DENY'). | |
| 984263bc MD |
2231 | * |
| 2232 | * O_PROBE_STATE and O_CHECK_STATE: these opcodes | |
| 2233 | * cause a lookup of the state table, and a jump | |
| 2234 | * to the 'action' part of the parent rule | |
| 2235 | * ('goto check_body') if an entry is found, or | |
| 2236 | * (CHECK_STATE only) a jump to the next rule if | |
| 2237 | * the entry is not found ('goto next_rule'). | |
| 2238 | * The result of the lookup is cached to make | |
| 2239 | * further instances of these opcodes are | |
| f5670523 SZ |
2240 | * effectively NOPs. If static rules are changed |
| 2241 | * during the state looking up, the packet will | |
| 29b27cb7 SZ |
2242 | * be dropped and rule's stats will not be updated |
| 2243 | * ('return IP_FW_DENY'). | |
| 984263bc MD |
2244 | */ |
| 2245 | case O_LIMIT: | |
| 2246 | case O_KEEP_STATE: | |
| ca12e259 SZ |
2247 | if (!(f->rule_flags & IPFW_RULE_F_STATE)) { |
| 2248 | kprintf("%s rule (%d) is not ready " | |
| 2249 | "on cpu%d\n", | |
| 2250 | cmd->opcode == O_LIMIT ? | |
| 2251 | "limit" : "keep state", | |
| 2252 | f->rulenum, f->cpuid); | |
| 2253 | goto next_rule; | |
| 2254 | } | |
| 984263bc | 2255 | if (install_state(f, |
| 997a0e9a SZ |
2256 | (ipfw_insn_limit *)cmd, args, &deny)) { |
| 2257 | if (deny) | |
| 29b27cb7 | 2258 | return IP_FW_DENY; |
| 997a0e9a | 2259 | |
| 29b27cb7 | 2260 | retval = IP_FW_DENY; |
| 984263bc MD |
2261 | goto done; /* error/limit violation */ |
| 2262 | } | |
| 997a0e9a | 2263 | if (deny) |
| 29b27cb7 | 2264 | return IP_FW_DENY; |
| 984263bc MD |
2265 | match = 1; |
| 2266 | break; | |
| 2267 | ||
| 2268 | case O_PROBE_STATE: | |
| 2269 | case O_CHECK_STATE: | |
| 2270 | /* | |
| 2271 | * dynamic rules are checked at the first | |
| 2272 | * keep-state or check-state occurrence, | |
| 2273 | * with the result being stored in dyn_dir. | |
| 2274 | * The compiler introduces a PROBE_STATE | |
| 2275 | * instruction for us when we have a | |
| 2276 | * KEEP_STATE (because PROBE_STATE needs | |
| 2277 | * to be run first). | |
| 2278 | */ | |
| 997a0e9a | 2279 | if (dyn_dir == MATCH_UNKNOWN) { |
| 997a0e9a SZ |
2280 | dyn_f = lookup_rule(&args->f_id, |
| 2281 | &dyn_dir, | |
| 2282 | proto == IPPROTO_TCP ? | |
| 2283 | L3HDR(struct tcphdr, ip) : NULL, | |
| 2284 | ip_len, &deny); | |
| 2285 | if (deny) | |
| 29b27cb7 | 2286 | return IP_FW_DENY; |
| 997a0e9a SZ |
2287 | if (dyn_f != NULL) { |
| 2288 | /* | |
| 2289 | * Found a rule from a dynamic | |
| 2290 | * entry; jump to the 'action' | |
| 2291 | * part of the rule. | |
| 2292 | */ | |
| 2293 | f = dyn_f; | |
| 2294 | cmd = ACTION_PTR(f); | |
| 2295 | l = f->cmd_len - f->act_ofs; | |
| 2296 | goto check_body; | |
| 2297 | } | |
| 984263bc MD |
2298 | } |
| 2299 | /* | |
| 2300 | * Dynamic entry not found. If CHECK_STATE, | |
| 2301 | * skip to next rule, if PROBE_STATE just | |
| 2302 | * ignore and continue with next opcode. | |
| 2303 | */ | |
| 2304 | if (cmd->opcode == O_CHECK_STATE) | |
| 2305 | goto next_rule; | |
| ca12e259 SZ |
2306 | else if (!(f->rule_flags & IPFW_RULE_F_STATE)) |
| 2307 | goto next_rule; /* not ready yet */ | |
| 984263bc MD |
2308 | match = 1; |
| 2309 | break; | |
| 2310 | ||
| 2311 | case O_ACCEPT: | |
| 29b27cb7 | 2312 | retval = IP_FW_PASS; /* accept */ |
| 984263bc MD |
2313 | goto done; |
| 2314 | ||
| 2315 | case O_PIPE: | |
| 2316 | case O_QUEUE: | |
| 2317 | args->rule = f; /* report matching rule */ | |
| 29b27cb7 SZ |
2318 | args->cookie = cmd->arg1; |
| 2319 | retval = IP_FW_DUMMYNET; | |
| 984263bc MD |
2320 | goto done; |
| 2321 | ||
| 2322 | case O_DIVERT: | |
| 2323 | case O_TEE: | |
| 2324 | if (args->eh) /* not on layer 2 */ | |
| 2325 | break; | |
| e5ecc832 JS |
2326 | |
| 2327 | mtag = m_tag_get(PACKET_TAG_IPFW_DIVERT, | |
| 68edaf54 | 2328 | sizeof(*divinfo), MB_DONTWAIT); |
| e5ecc832 | 2329 | if (mtag == NULL) { |
| 29b27cb7 | 2330 | retval = IP_FW_DENY; |
| e5ecc832 JS |
2331 | goto done; |
| 2332 | } | |
| 68edaf54 SZ |
2333 | divinfo = m_tag_data(mtag); |
| 2334 | ||
| 2335 | divinfo->skipto = f->rulenum; | |
| 2336 | divinfo->port = cmd->arg1; | |
| 2337 | divinfo->tee = (cmd->opcode == O_TEE); | |
| e5ecc832 | 2338 | m_tag_prepend(m, mtag); |
| 68edaf54 | 2339 | |
| 29b27cb7 | 2340 | args->cookie = cmd->arg1; |
| 984263bc | 2341 | retval = (cmd->opcode == O_DIVERT) ? |
| 29b27cb7 | 2342 | IP_FW_DIVERT : IP_FW_TEE; |
| 984263bc MD |
2343 | goto done; |
| 2344 | ||
| 2345 | case O_COUNT: | |
| 2346 | case O_SKIPTO: | |
| 2347 | f->pcnt++; /* update stats */ | |
| 2348 | f->bcnt += ip_len; | |
| 2349 | f->timestamp = time_second; | |
| 2350 | if (cmd->opcode == O_COUNT) | |
| 2351 | goto next_rule; | |
| 2352 | /* handle skipto */ | |
| 2353 | if (f->next_rule == NULL) | |
| 2354 | lookup_next_rule(f); | |
| 2355 | f = f->next_rule; | |
| 2356 | goto again; | |
| 2357 | ||
| 2358 | case O_REJECT: | |
| 2359 | /* | |
| 2360 | * Drop the packet and send a reject notice | |
| 2361 | * if the packet is not ICMP (or is an ICMP | |
| 2362 | * query), and it is not multicast/broadcast. | |
| 2363 | */ | |
| 2364 | if (hlen > 0 && | |
| 2365 | (proto != IPPROTO_ICMP || | |
| 2366 | is_icmp_query(ip)) && | |
| 2367 | !(m->m_flags & (M_BCAST|M_MCAST)) && | |
| d0d5be30 | 2368 | !IN_MULTICAST(ntohl(dst_ip.s_addr))) { |
| ca12e259 SZ |
2369 | /* |
| 2370 | * Update statistics before the possible | |
| 2371 | * blocking 'send_reject' | |
| 2372 | */ | |
| 2373 | f->pcnt++; | |
| 2374 | f->bcnt += ip_len; | |
| 2375 | f->timestamp = time_second; | |
| 2376 | ||
| 984263bc MD |
2377 | send_reject(args, cmd->arg1, |
| 2378 | offset,ip_len); | |
| 2379 | m = args->m; | |
| ca12e259 SZ |
2380 | |
| 2381 | /* | |
| 2382 | * Return directly here, rule stats | |
| 2383 | * have been updated above. | |
| 2384 | */ | |
| 29b27cb7 | 2385 | return IP_FW_DENY; |
| 984263bc MD |
2386 | } |
| 2387 | /* FALLTHROUGH */ | |
| 2388 | case O_DENY: | |
| 29b27cb7 | 2389 | retval = IP_FW_DENY; |
| 984263bc MD |
2390 | goto done; |
| 2391 | ||
| 2392 | case O_FORWARD_IP: | |
| 2393 | if (args->eh) /* not valid on layer2 pkts */ | |
| 2394 | break; | |
| 99216103 | 2395 | if (!dyn_f || dyn_dir == MATCH_FORWARD) { |
| 5de23090 SZ |
2396 | struct sockaddr_in *sin; |
| 2397 | ||
| 2398 | mtag = m_tag_get(PACKET_TAG_IPFORWARD, | |
| 2399 | sizeof(*sin), MB_DONTWAIT); | |
| 2400 | if (mtag == NULL) { | |
| 29b27cb7 | 2401 | retval = IP_FW_DENY; |
| 5de23090 SZ |
2402 | goto done; |
| 2403 | } | |
| 2404 | sin = m_tag_data(mtag); | |
| 2405 | ||
| 2406 | /* Structure copy */ | |
| 2407 | *sin = ((ipfw_insn_sa *)cmd)->sa; | |
| 2408 | ||
| 2409 | m_tag_prepend(m, mtag); | |
| 2410 | m->m_pkthdr.fw_flags |= | |
| 2411 | IPFORWARD_MBUF_TAGGED; | |
| 50050193 | 2412 | } |
| 29b27cb7 | 2413 | retval = IP_FW_PASS; |
| 984263bc MD |
2414 | goto done; |
| 2415 | ||
| 2416 | default: | |
| 2417 | panic("-- unknown opcode %d\n", cmd->opcode); | |
| 2418 | } /* end of switch() on opcodes */ | |
| 2419 | ||
| 2420 | if (cmd->len & F_NOT) | |
| 2421 | match = !match; | |
| 2422 | ||
| 2423 | if (match) { | |
| 2424 | if (cmd->len & F_OR) | |
| 2425 | skip_or = 1; | |
| 2426 | } else { | |
| 2427 | if (!(cmd->len & F_OR)) /* not an OR block, */ | |
| 2428 | break; /* try next rule */ | |
| 2429 | } | |
| 2430 | ||
| 2431 | } /* end of inner for, scan opcodes */ | |
| 2432 | ||
| 2433 | next_rule:; /* try next rule */ | |
| 2434 | ||
| 2435 | } /* end of outer for, scan rules */ | |
| 4b1cf444 | 2436 | kprintf("+++ ipfw: ouch!, skip past end of rules, denying packet\n"); |
| 29b27cb7 | 2437 | return IP_FW_DENY; |
| 984263bc MD |
2438 | |
| 2439 | done: | |
| 2440 | /* Update statistics */ | |
| 2441 | f->pcnt++; | |
| 2442 | f->bcnt += ip_len; | |
| 2443 | f->timestamp = time_second; | |
| 2444 | return retval; | |
| 2445 | ||
| 2446 | pullup_failed: | |
| 2447 | if (fw_verbose) | |
| 4b1cf444 | 2448 | kprintf("pullup failed\n"); |
| 29b27cb7 | 2449 | return IP_FW_DENY; |
| 984263bc MD |
2450 | } |
| 2451 | ||
| 84a3e25a SZ |
2452 | static void |
| 2453 | ipfw_dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) | |
| 2454 | { | |
| 2455 | struct m_tag *mtag; | |
| 2456 | struct dn_pkt *pkt; | |
| 2457 | ipfw_insn *cmd; | |
| 2458 | const struct ipfw_flow_id *id; | |
| 2459 | struct dn_flow_id *fid; | |
| 2460 | ||
| f849e7f7 SZ |
2461 | M_ASSERTPKTHDR(m); |
| 2462 | ||
| 84a3e25a SZ |
2463 | mtag = m_tag_get(PACKET_TAG_DUMMYNET, sizeof(*pkt), MB_DONTWAIT); |
| 2464 | if (mtag == NULL) { | |
| 2465 | m_freem(m); | |
| 2466 | return; | |
| 2467 | } | |
| 2468 | m_tag_prepend(m, mtag); | |
| 2469 | ||
| 2470 | pkt = m_tag_data(mtag); | |
| 2471 | bzero(pkt, sizeof(*pkt)); | |
| 2472 | ||
| 2473 | cmd = fwa->rule->cmd + fwa->rule->act_ofs; | |
| 2474 | if (cmd->opcode == O_LOG) | |
| 2475 | cmd += F_LEN(cmd); | |
| 2476 | KASSERT(cmd->opcode == O_PIPE || cmd->opcode == O_QUEUE, | |
| 2477 | ("Rule is not PIPE or QUEUE, opcode %d\n", cmd->opcode)); | |
| 2478 | ||
| 2479 | pkt->dn_m = m; | |
| 2480 | pkt->dn_flags = (dir & DN_FLAGS_DIR_MASK); | |
| 2481 | pkt->ifp = fwa->oif; | |
| 84a3e25a SZ |
2482 | pkt->pipe_nr = pipe_nr; |
| 2483 | ||
| e5d90c37 | 2484 | pkt->cpuid = mycpuid; |
| c244d613 | 2485 | pkt->msgport = curnetport; |
| e5d90c37 | 2486 | |
| 84a3e25a SZ |
2487 | id = &fwa->f_id; |
| 2488 | fid = &pkt->id; | |
| 2489 | fid->fid_dst_ip = id->dst_ip; | |
| 2490 | fid->fid_src_ip = id->src_ip; | |
| 2491 | fid->fid_dst_port = id->dst_port; | |
| 2492 | fid->fid_src_port = id->src_port; | |
| 2493 | fid->fid_proto = id->proto; | |
| 2494 | fid->fid_flags = id->flags; | |
| 2495 | ||
| 2496 | ipfw_ref_rule(fwa->rule); | |
| 2497 | pkt->dn_priv = fwa->rule; | |
| 2498 | pkt->dn_unref_priv = ipfw_unref_rule; | |
| 2499 | ||
| 2500 | if (cmd->opcode == O_PIPE) | |
| 2501 | pkt->dn_flags |= DN_FLAGS_IS_PIPE; | |
| 2502 | ||
| 84a3e25a | 2503 | m->m_pkthdr.fw_flags |= DUMMYNET_MBUF_TAGGED; |
| 84a3e25a SZ |
2504 | } |
| 2505 | ||
| 984263bc MD |
2506 | /* |
| 2507 | * When a rule is added/deleted, clear the next_rule pointers in all rules. | |
| 2508 | * These will be reconstructed on the fly as packets are matched. | |
| 2509 | * Must be called at splimp(). | |
| 2510 | */ | |
| 2511 | static void | |
| ca12e259 | 2512 | ipfw_flush_rule_ptrs(struct ipfw_context *ctx) |
| 984263bc MD |
2513 | { |
| 2514 | struct ip_fw *rule; | |
| 2515 | ||
| ca12e259 | 2516 | for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) |
| 984263bc MD |
2517 | rule->next_rule = NULL; |
| 2518 | } | |
| 2519 | ||
| b30401b3 SZ |
2520 | static __inline void |
| 2521 | ipfw_inc_static_count(struct ip_fw *rule) | |
| 2522 | { | |
| ac5988d6 | 2523 | /* Static rule's counts are updated only on CPU0 */ |
| ca12e259 | 2524 | KKASSERT(mycpuid == 0); |
| 61224eb9 | 2525 | |
| b30401b3 | 2526 | static_count++; |
| b78533e2 | 2527 | static_ioc_len += IOC_RULESIZE(rule); |
| b30401b3 SZ |
2528 | } |
| 2529 | ||
| 9c4d5568 SZ |
2530 | static __inline void |
| 2531 | ipfw_dec_static_count(struct ip_fw *rule) | |
| 2532 | { | |
| b78533e2 | 2533 | int l = IOC_RULESIZE(rule); |
| 9c4d5568 | 2534 | |
| ac5988d6 | 2535 | /* Static rule's counts are updated only on CPU0 */ |
| ca12e259 | 2536 | KKASSERT(mycpuid == 0); |
| 61224eb9 | 2537 | |
| 9c4d5568 SZ |
2538 | KASSERT(static_count > 0, ("invalid static count %u\n", static_count)); |
| 2539 | static_count--; | |
| 2540 | ||
| b78533e2 SZ |
2541 | KASSERT(static_ioc_len >= l, |
| 2542 | ("invalid static len %u\n", static_ioc_len)); | |
| 2543 | static_ioc_len -= l; | |
| 2544 | } | |
| 2545 | ||
| ca12e259 SZ |
2546 | static void |
| 2547 | ipfw_link_sibling(struct netmsg_ipfw *fwmsg, struct ip_fw *rule) | |
| 2548 | { | |
| 2549 | if (fwmsg->sibling != NULL) { | |
| 2550 | KKASSERT(mycpuid > 0 && fwmsg->sibling->cpuid == mycpuid - 1); | |
| 2551 | fwmsg->sibling->sibling = rule; | |
| 2552 | } | |
| 2553 | fwmsg->sibling = rule; | |
| 2554 | } | |
| 2555 | ||
| b78533e2 | 2556 | static struct ip_fw * |
| ca12e259 | 2557 | ipfw_create_rule(const struct ipfw_ioc_rule *ioc_rule, struct ip_fw_stub *stub) |
| b78533e2 SZ |
2558 | { |
| 2559 | struct ip_fw *rule; | |
| 2560 | ||
| 2561 | rule = kmalloc(RULESIZE(ioc_rule), M_IPFW, M_WAITOK | M_ZERO); | |
| 2562 | ||
| 2563 | rule->act_ofs = ioc_rule->act_ofs; | |
| 2564 | rule->cmd_len = ioc_rule->cmd_len; | |
| 2565 | rule->rulenum = ioc_rule->rulenum; | |
| 2566 | rule->set = ioc_rule->set; | |
| 2567 | rule->usr_flags = ioc_rule->usr_flags; | |
| 2568 | ||
| 2569 | bcopy(ioc_rule->cmd, rule->cmd, rule->cmd_len * 4 /* XXX */); | |
| 2570 | ||
| 84a3e25a | 2571 | rule->refcnt = 1; |
| ca12e259 SZ |
2572 | rule->cpuid = mycpuid; |
| 2573 | ||
| 2574 | rule->stub = stub; | |
| 2575 | if (stub != NULL) | |
| 2576 | stub->rule[mycpuid] = rule; | |
| 84a3e25a | 2577 | |
| b78533e2 | 2578 | return rule; |
| 9c4d5568 SZ |
2579 | } |
| 2580 | ||
| ca12e259 SZ |
2581 | static void |
| 2582 | ipfw_add_rule_dispatch(struct netmsg *nmsg) | |
| 2583 | { | |
| 2584 | struct netmsg_ipfw *fwmsg = (struct netmsg_ipfw *)nmsg; | |
| 2585 | struct ipfw_context *ctx = ipfw_ctx[mycpuid]; | |
| 2586 | struct ip_fw *rule; | |
| 2587 | ||
| 2588 | rule = ipfw_create_rule(fwmsg->ioc_rule, fwmsg->stub); | |
| 2589 | ||
| 2590 | /* | |
| 2591 | * Bump generation after ipfw_create_rule(), | |
| 2592 | * since this function is blocking | |
| 2593 | */ | |
| 2594 | ctx->ipfw_gen++; | |
| 2595 | ||
| 2596 | /* | |
| 2597 | * Insert rule into the pre-determined position | |
| 2598 | */ | |
| 2599 | if (fwmsg->prev_rule != NULL) { | |
| 2600 | struct ip_fw *prev, *next; | |
| 2601 | ||
| 2602 | prev = fwmsg->prev_rule; | |
| 2603 | KKASSERT(prev->cpuid == mycpuid); | |
| 2604 | ||
| 2605 | next = fwmsg->next_rule; | |
| 2606 | KKASSERT(next->cpuid == mycpuid); | |
| 2607 | ||
| 2608 | rule->next = next; | |
| 2609 | prev->next = rule; | |
| 2610 | ||
| 2611 | /* | |
| 2612 | * Move to the position on the next CPU | |
| 2613 | * before the msg is forwarded. | |
| 2614 | */ | |
| 2615 | fwmsg->prev_rule = prev->sibling; | |
| 2616 | fwmsg->next_rule = next->sibling; | |
| 2617 | } else { | |
| 2618 | KKASSERT(fwmsg->next_rule == NULL); | |
| 2619 | rule->next = ctx->ipfw_layer3_chain; | |
| 2620 | ctx->ipfw_layer3_chain = rule; | |
| 2621 | } | |
| 2622 | ||
| 2623 | /* Link rule CPU sibling */ | |
| 2624 | ipfw_link_sibling(fwmsg, rule); | |
| 2625 | ||
| 2626 | ipfw_flush_rule_ptrs(ctx); | |
| 2627 | ||
| 2628 | if (mycpuid == 0) { | |
| 2629 | /* Statistics only need to be updated once */ | |
| 2630 | ipfw_inc_static_count(rule); | |
| 2631 | ||
| 2632 | /* Return the rule on CPU0 */ | |
| 2633 | nmsg->nm_lmsg.u.ms_resultp = rule; | |
| 2634 | } | |
| 2635 | ||
| c4882b7e | 2636 | ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1); |
| ca12e259 SZ |
2637 | } |
| 2638 | ||
| 2639 | static void | |
| 2640 | ipfw_enable_state_dispatch(struct netmsg *nmsg) | |
| 2641 | { | |
| 2642 | struct lwkt_msg *lmsg = &nmsg->nm_lmsg; | |
| 2643 | struct ip_fw *rule = lmsg->u.ms_resultp; | |
| 153e0956 SZ |
2644 | struct ipfw_context *ctx = ipfw_ctx[mycpuid]; |
| 2645 | ||
| 2646 | ctx->ipfw_gen++; | |
| ca12e259 SZ |
2647 | |
| 2648 | KKASSERT(rule->cpuid == mycpuid); | |
| 2649 | KKASSERT(rule->stub != NULL && rule->stub->rule[mycpuid] == rule); | |
| 2650 | KKASSERT(!(rule->rule_flags & IPFW_RULE_F_STATE)); | |
| 2651 | rule->rule_flags |= IPFW_RULE_F_STATE; | |
| 2652 | lmsg->u.ms_resultp = rule->sibling; | |
| 2653 | ||
| c4882b7e | 2654 | ifnet_forwardmsg(lmsg, mycpuid + 1); |
| ca12e259 SZ |
2655 | } |
| 2656 | ||
| 984263bc | 2657 | /* |
| 9fabc2ac SZ |
2658 | * Add a new rule to the list. Copy the rule into a malloc'ed area, |
| 2659 | * then possibly create a rule number and add the rule to the list. | |
| 2660 | * Update the rule_number in the input struct so the caller knows | |
| 2661 | * it as well. | |
| 984263bc | 2662 | */ |
| 9fabc2ac | 2663 | static void |
| ca12e259 | 2664 | ipfw_add_rule(struct ipfw_ioc_rule *ioc_rule, uint32_t rule_flags) |
| 984263bc | 2665 | { |
| ca12e259 SZ |
2666 | struct ipfw_context *ctx = ipfw_ctx[mycpuid]; |
| 2667 | struct netmsg_ipfw fwmsg; | |
| 2668 | struct netmsg *nmsg; | |
| 2669 | struct ip_fw *f, *prev, *rule; | |
| 2670 | struct ip_fw_stub *stub; | |
| 984263bc | 2671 | |
| 82040d23 | 2672 | IPFW_ASSERT_CFGPORT(&curthread->td_msgport); |
| 984263bc | 2673 | |
| 984263bc MD |
2674 | /* |
| 2675 | * If rulenum is 0, find highest numbered rule before the | |
| ca12e259 | 2676 | * default rule, and add rule number incremental step. |
| 984263bc | 2677 | */ |
| ca12e259 | 2678 | if (ioc_rule->rulenum == 0) { |
| 9fabc2ac SZ |
2679 | int step = autoinc_step; |
| 2680 | ||
| c1aa76bb SZ |
2681 | KKASSERT(step >= IPFW_AUTOINC_STEP_MIN && |
| 2682 | step <= IPFW_AUTOINC_STEP_MAX); | |
| 9fabc2ac | 2683 | |
| 984263bc | 2684 | /* |
| 9fabc2ac | 2685 | * Locate the highest numbered rule before default |
| 984263bc | 2686 | */ |
| ca12e259 | 2687 | for (f = ctx->ipfw_layer3_chain; f; f = f->next) { |
| 984263bc MD |
2688 | if (f->rulenum == IPFW_DEFAULT_RULE) |
| 2689 | break; | |
| ca12e259 | 2690 | ioc_rule->rulenum = f->rulenum; |
| 984263bc | 2691 | } |
| ca12e259 SZ |
2692 | if (ioc_rule->rulenum < IPFW_DEFAULT_RULE - step) |
| 2693 | ioc_rule->rulenum += step; | |
| 984263bc | 2694 | } |
| ca12e259 SZ |
2695 | KASSERT(ioc_rule->rulenum != IPFW_DEFAULT_RULE && |
| 2696 | ioc_rule->rulenum != 0, | |
| 2697 | ("invalid rule num %d\n", ioc_rule->rulenum)); | |
| 984263bc MD |
2698 | |
| 2699 | /* | |
| ca12e259 | 2700 | * Now find the right place for the new rule in the sorted list. |
| 984263bc | 2701 | */ |
| ca12e259 SZ |
2702 | for (prev = NULL, f = ctx->ipfw_layer3_chain; f; |
| 2703 | prev = f, f = f->next) { | |
| 2704 | if (f->rulenum > ioc_rule->rulenum) { | |
| 9fabc2ac | 2705 | /* Found the location */ |
| 984263bc MD |
2706 | break; |
| 2707 | } | |
| 2708 | } | |
| ca12e259 SZ |
2709 | KASSERT(f != NULL, ("no default rule?!\n")); |
| 2710 | ||
| 2711 | if (rule_flags & IPFW_RULE_F_STATE) { | |
| 2712 | int size; | |
| 2713 | ||
| 2714 | /* | |
| 2715 | * If the new rule will create states, then allocate | |
| 2716 | * a rule stub, which will be referenced by states | |
| 2717 | * (dyn rules) | |
| 2718 | */ | |
| 2719 | size = sizeof(*stub) + ((ncpus - 1) * sizeof(struct ip_fw *)); | |
| 2720 | stub = kmalloc(size, M_IPFW, M_WAITOK | M_ZERO); | |
| 2721 | } else { | |
| 2722 | stub = NULL; | |
| 2723 | } | |
| 2724 | ||
| 2725 | /* | |
| 2726 | * Duplicate the rule onto each CPU. | |
| 2727 | * The rule duplicated on CPU0 will be returned. | |
| 2728 | */ | |
| 2729 | bzero(&fwmsg, sizeof(fwmsg)); | |
| 2730 | nmsg = &fwmsg.nmsg; | |
| 2731 | netmsg_init(nmsg, &curthread->td_msgport, 0, ipfw_add_rule_dispatch); | |
| 2732 | fwmsg.ioc_rule = ioc_rule; | |
| 2733 | fwmsg.prev_rule = prev; | |
| 2734 | fwmsg.next_rule = prev == NULL ? NULL : f; | |
| 2735 | fwmsg.stub = stub; | |
| 2736 | ||
| c4882b7e | 2737 | ifnet_domsg(&nmsg->nm_lmsg, 0); |
| ca12e259 | 2738 | KKASSERT(fwmsg.prev_rule == NULL && fwmsg.next_rule == NULL); |
| b30401b3 | 2739 | |
| ca12e259 SZ |
2740 | rule = nmsg->nm_lmsg.u.ms_resultp; |
| 2741 | KKASSERT(rule != NULL && rule->cpuid == mycpuid); | |
| 2742 | ||
| 2743 | if (rule_flags & IPFW_RULE_F_STATE) { | |
| 2744 | /* | |
| 2745 | * Turn on state flag, _after_ everything on all | |
| 2746 | * CPUs have been setup. | |
| 2747 | */ | |
| 2748 | bzero(nmsg, sizeof(*nmsg)); | |
| 2749 | netmsg_init(nmsg, &curthread->td_msgport, 0, | |
| 2750 | ipfw_enable_state_dispatch); | |
| 2751 | nmsg->nm_lmsg.u.ms_resultp = rule; | |
| 2752 | ||
| c4882b7e | 2753 | ifnet_domsg(&nmsg->nm_lmsg, 0); |
| ca12e259 SZ |
2754 | KKASSERT(nmsg->nm_lmsg.u.ms_resultp == NULL); |
| 2755 | } | |
| b78533e2 | 2756 | |
| 8d0865c8 SZ |
2757 | DPRINTF("++ installed rule %d, static count now %d\n", |
| 2758 | rule->rulenum, static_count); | |
| 984263bc MD |
2759 | } |
| 2760 | ||
| 2761 | /** | |
| 2762 | * Free storage associated with a static rule (including derived | |
| 2763 | * dynamic rules). | |
| 2764 | * The caller is in charge of clearing rule pointers to avoid | |
| 2765 | * dangling pointers. | |
| 2766 | * @return a pointer to the next entry. | |
| 2767 | * Arguments are not checked, so they better be correct. | |
| 2768 | * Must be called at splimp(). | |
| 2769 | */ | |
| 2770 | static struct ip_fw * | |
| ca12e259 SZ |
2771 | ipfw_delete_rule(struct ipfw_context *ctx, |
| 2772 | struct ip_fw *prev, struct ip_fw *rule) | |
| 984263bc MD |
2773 | { |
| 2774 | struct ip_fw *n; | |
| ca12e259 SZ |
2775 | struct ip_fw_stub *stub; |
| 2776 | ||
| 2777 | ctx->ipfw_gen++; | |
| 984263bc | 2778 | |
| ca12e259 SZ |
2779 | /* STATE flag should have been cleared before we reach here */ |
| 2780 | KKASSERT((rule->rule_flags & IPFW_RULE_F_STATE) == 0); | |
| 997a0e9a | 2781 | |
| ca12e259 | 2782 | stub = rule->stub; |
| 984263bc | 2783 | n = rule->next; |
| 984263bc | 2784 | if (prev == NULL) |
| ca12e259 | 2785 | ctx->ipfw_layer3_chain = n; |
| 984263bc MD |
2786 | else |
| 2787 | prev->next = n; | |
| 984263bc | 2788 | |
| 84a3e25a SZ |
2789 | /* Mark the rule as invalid */ |
| 2790 | rule->rule_flags |= IPFW_RULE_F_INVALID; | |
| 2791 | rule->next_rule = NULL; | |
| ca12e259 SZ |
2792 | rule->sibling = NULL; |
| 2793 | rule->stub = NULL; | |
| 2794 | #ifdef foo | |
| 2795 | /* Don't reset cpuid here; keep various assertion working */ | |
| 2796 | rule->cpuid = -1; | |
| 2797 | #endif | |
| 2798 | ||
| 2799 | /* Statistics only need to be updated once */ | |
| 2800 | if (mycpuid == 0) | |
| 2801 | ipfw_dec_static_count(rule); | |
| 2802 | ||
| 2803 | /* Free 'stub' on the last CPU */ | |
| 2804 | if (stub != NULL && mycpuid == ncpus - 1) | |
| 2805 | kfree(stub, M_IPFW); | |
| 84a3e25a SZ |
2806 | |
| 2807 | /* Try to free this rule */ | |
| 2808 | ipfw_free_rule(rule); | |
| 2809 | ||
| ca12e259 | 2810 | /* Return the next rule */ |
| 984263bc MD |
2811 | return n; |
| 2812 | } | |
| 2813 | ||
| ca12e259 SZ |
2814 | static void |
| 2815 | ipfw_flush_dispatch(struct netmsg *nmsg) | |
| 2816 | { | |
| 2817 | struct lwkt_msg *lmsg = &nmsg->nm_lmsg; | |
| 2818 | int kill_default = lmsg->u.ms_result; | |
| 2819 | struct ipfw_context *ctx = ipfw_ctx[mycpuid]; | |
| 2820 | struct ip_fw *rule; | |
| 2821 | ||
| 2822 | ipfw_flush_rule_ptrs(ctx); /* more efficient to do outside the loop */ | |
| 2823 | ||
| 2824 | while ((rule = ctx->ipfw_layer3_chain) != NULL && | |
| 2825 | (kill_default || rule->rulenum != IPFW_DEFAULT_RULE)) | |
| 2826 | ipfw_delete_rule(ctx, NULL, rule); | |
| 2827 | ||
| c4882b7e | 2828 | ifnet_forwardmsg(lmsg, mycpuid + 1); |
| ca12e259 SZ |
2829 | } |
| 2830 | ||
| 2831 | static void | |
| 2832 | ipfw_disable_rule_state_dispatch(struct netmsg *nmsg) | |
| 2833 | { | |
| 2834 | struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; | |
| 153e0956 | 2835 | struct ipfw_context *ctx = ipfw_ctx[mycpuid]; |
| ca12e259 SZ |
2836 | struct ip_fw *rule; |
| 2837 | ||
| 153e0956 SZ |
2838 | ctx->ipfw_gen++; |
| 2839 | ||
| ca12e259 SZ |
2840 | rule = dmsg->start_rule; |
| 2841 | if (rule != NULL) { | |
| 2842 | KKASSERT(rule->cpuid == mycpuid); | |
| 2843 | ||
| 2844 | /* | |
| 2845 | * Move to the position on the next CPU | |
| 2846 | * before the msg is forwarded. | |
| 2847 | */ | |
| 2848 | dmsg->start_rule = rule->sibling; | |
| 2849 | } else { | |
| ca12e259 SZ |
2850 | KKASSERT(dmsg->rulenum == 0); |
| 2851 | rule = ctx->ipfw_layer3_chain; | |
| 2852 | } | |
| 2853 | ||
| 2854 | while (rule != NULL) { | |
| 2855 | if (dmsg->rulenum && rule->rulenum != dmsg->rulenum) | |
| 2856 | break; | |
| 2857 | rule->rule_flags &= ~IPFW_RULE_F_STATE; | |
| 2858 | rule = rule->next; | |
| 2859 | } | |
| 2860 | ||
| c4882b7e | 2861 | ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1); |
| ca12e259 SZ |
2862 | } |
| 2863 | ||
| 984263bc MD |
2864 | /* |
| 2865 | * Deletes all rules from a chain (including the default rule | |
| 2866 | * if the second argument is set). | |
| 2867 | * Must be called at splimp(). | |
| 2868 | */ | |
| 2869 | static void | |
| ca12e259 | 2870 | ipfw_flush(int kill_default) |
| 984263bc | 2871 | { |
| ca12e259 SZ |
2872 | struct netmsg_del dmsg; |
| 2873 | struct netmsg nmsg; | |
| 2874 | struct lwkt_msg *lmsg; | |
| 984263bc | 2875 | struct ip_fw *rule; |
| ca12e259 | 2876 | struct ipfw_context *ctx = ipfw_ctx[mycpuid]; |
| 984263bc | 2877 | |
| ca12e259 | 2878 | IPFW_ASSERT_CFGPORT(&curthread->td_msgport); |
| 984263bc | 2879 | |
| ca12e259 SZ |
2880 | /* |
| 2881 | * If 'kill_default' then caller has done the necessary | |
| 2882 | * msgport syncing; unnecessary to do it again. | |
| 2883 | */ | |
| 2884 | if (!kill_default) { | |
| 2885 | /* | |
| 2886 | * Let ipfw_chk() know the rules are going to | |
| 2887 | * be flushed, so it could jump directly to | |
| 2888 | * the default rule. | |
| 2889 | */ | |
| 2890 | ipfw_flushing = 1; | |
| 2891 | netmsg_service_sync(); | |
| 2892 | } | |
| 2893 | ||
| 2894 | /* | |
| 2895 | * Clear STATE flag on rules, so no more states (dyn rules) | |
| 2896 | * will be created. | |
| 2897 | */ | |
| 2898 | bzero(&dmsg, sizeof(dmsg)); | |
| 2899 | netmsg_init(&dmsg.nmsg, &curthread->td_msgport, 0, | |
| 2900 | ipfw_disable_rule_state_dispatch); | |
| c4882b7e | 2901 | ifnet_domsg(&dmsg.nmsg.nm_lmsg, 0); |
| ca12e259 SZ |
2902 | |
| 2903 | /* | |
| 2904 | * This actually nukes all states (dyn rules) | |
| 2905 | */ | |
| 2906 | lockmgr(&dyn_lock, LK_EXCLUSIVE); | |
| 2907 | for (rule = ctx->ipfw_layer3_chain; rule != NULL; rule = rule->next) { | |
| 2908 | /* | |
| 2909 | * Can't check IPFW_RULE_F_STATE here, | |
| 2910 | * since it has been cleared previously. | |
| 2911 | * Check 'stub' instead. | |
| 2912 | */ | |
| 2913 | if (rule->stub != NULL) { | |
| 2914 | /* Force removal */ | |
| 2915 | remove_dyn_rule_locked(rule, NULL); | |
| 2916 | } | |
| 2917 | } | |
| 2918 | lockmgr(&dyn_lock, LK_RELEASE); | |
| 2919 | ||
| 2920 | /* | |
| 2921 | * Press the 'flush' button | |
| 2922 | */ | |
| 2923 | bzero(&nmsg, sizeof(nmsg)); | |
| 2924 | netmsg_init(&nmsg, &curthread->td_msgport, 0, ipfw_flush_dispatch); | |
| 2925 | lmsg = &nmsg.nm_lmsg; | |
| 2926 | lmsg->u.ms_result = kill_default; | |
| c4882b7e | 2927 | ifnet_domsg(lmsg, 0); |
| b78533e2 | 2928 | |
| dbf1ea5d SZ |
2929 | KASSERT(dyn_count == 0, ("%u dyn rule remains\n", dyn_count)); |
| 2930 | ||
| b78533e2 | 2931 | if (kill_default) { |
| dbf1ea5d SZ |
2932 | if (ipfw_dyn_v != NULL) { |
| 2933 | /* | |
| 2934 | * Free dynamic rules(state) hash table | |
| 2935 | */ | |
| 2936 | kfree(ipfw_dyn_v, M_IPFW); | |
| 2937 | ipfw_dyn_v = NULL; | |
| 2938 | } | |
| 2939 | ||
| b78533e2 SZ |
2940 | KASSERT(static_count == 0, |
| 2941 | ("%u static rules remains\n", static_count)); | |
| 2942 | KASSERT(static_ioc_len == 0, | |
| 2943 | ("%u bytes of static rules remains\n", static_ioc_len)); | |
| 2944 | } else { | |
| 2945 | KASSERT(static_count == 1, | |
| 2946 | ("%u static rules remains\n", static_count)); | |
| ca12e259 | 2947 | KASSERT(static_ioc_len == IOC_RULESIZE(ctx->ipfw_default_rule), |
| 973c11b9 | 2948 | ("%u bytes of static rules remains, should be %lu\n", |
| ca12e259 | 2949 | static_ioc_len, IOC_RULESIZE(ctx->ipfw_default_rule))); |
| b78533e2 | 2950 | } |
| ca12e259 SZ |
2951 | |
| 2952 | /* Flush is done */ | |
| 2953 | ipfw_flushing = 0; | |
| 2954 | } | |
| 2955 | ||
| 2956 | static void | |
| 2957 | ipfw_alt_delete_rule_dispatch(struct netmsg *nmsg) | |
| 2958 | { | |
| 2959 | struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; | |
| 2960 | struct ipfw_context *ctx = ipfw_ctx[mycpuid]; | |
| 2961 | struct ip_fw *rule, *prev; | |
| 2962 | ||
| 2963 | rule = dmsg->start_rule; | |
| 2964 | KKASSERT(rule->cpuid == mycpuid); | |
| 2965 | dmsg->start_rule = rule->sibling; | |
| 2966 | ||
| 2967 | prev = dmsg->prev_rule; | |
| 2968 | if (prev != NULL) { | |
| 2969 | KKASSERT(prev->cpuid == mycpuid); | |
| 2970 | ||
| 2971 | /* | |
| 2972 | * Move to the position on the next CPU | |
| 2973 | * before the msg is forwarded. | |
| 2974 | */ | |
| 2975 | dmsg->prev_rule = prev->sibling; | |
| 2976 | } | |
| 2977 | ||
| 2978 | /* | |
| 2979 | * flush pointers outside the loop, then delete all matching | |
| 2980 | * rules. 'prev' remains the same throughout the cycle. | |
| 2981 | */ | |
| 2982 | ipfw_flush_rule_ptrs(ctx); | |
| 2983 | while (rule && rule->rulenum == dmsg->rulenum) | |
| 2984 | rule = ipfw_delete_rule(ctx, prev, rule); | |
| 2985 | ||
| c4882b7e | 2986 | ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1); |
| ca12e259 SZ |
2987 | } |
| 2988 | ||
| 2989 | static int | |
| 2990 | ipfw_alt_delete_rule(uint16_t rulenum) | |
| 2991 | { | |
| 2992 | struct ip_fw *prev, *rule, *f; | |
| 2993 | struct ipfw_context *ctx = ipfw_ctx[mycpuid]; | |
| 2994 | struct netmsg_del dmsg; | |
| 2995 | struct netmsg *nmsg; | |
| 2996 | int state; | |
| 2997 | ||
| 2998 | /* | |
| 2999 | * Locate first rule to delete | |
| 3000 | */ | |
| 3001 | for (prev = NULL, rule = ctx->ipfw_layer3_chain; | |
| 3002 | rule && rule->rulenum < rulenum; | |
| 3003 | prev = rule, rule = rule->next) | |
| 3004 | ; /* EMPTY */ | |
| 3005 | if (rule->rulenum != rulenum) | |
| 3006 | return EINVAL; | |
| 3007 | ||
| 3008 | /* | |
| 3009 | * Check whether any rules with the given number will | |
| 3010 | * create states. | |
| 3011 | */ | |
| 3012 | state = 0; | |
| 3013 | for (f = rule; f && f->rulenum == rulenum; f = f->next) { | |
| 3014 | if (f->rule_flags & IPFW_RULE_F_STATE) { | |
| 3015 | state = 1; | |
| 3016 | break; | |
| 3017 | } | |
| 3018 | } | |
| 3019 | ||
| 3020 | if (state) { | |
| 3021 | /* | |
| 3022 | * Clear the STATE flag, so no more states will be | |
| 3023 | * created based the rules numbered 'rulenum'. | |
| 3024 | */ | |
| 3025 | bzero(&dmsg, sizeof(dmsg)); | |
| 3026 | nmsg = &dmsg.nmsg; | |
| 3027 | netmsg_init(nmsg, &curthread->td_msgport, 0, | |
| 3028 | ipfw_disable_rule_state_dispatch); | |
| 3029 | dmsg.start_rule = rule; | |
| 3030 | dmsg.rulenum = rulenum; | |
| 3031 | ||
| c4882b7e | 3032 | ifnet_domsg(&nmsg->nm_lmsg, 0); |
| ca12e259 SZ |
3033 | KKASSERT(dmsg.start_rule == NULL); |
| 3034 | ||
| 3035 | /* | |
| 3036 | * Nuke all related states | |
| 3037 | */ | |
| 3038 | lockmgr(&dyn_lock, LK_EXCLUSIVE); | |
| 3039 | for (f = rule; f && f->rulenum == rulenum; f = f->next) { | |
| 3040 | /* | |
| 3041 | * Can't check IPFW_RULE_F_STATE here, | |
| 3042 | * since it has been cleared previously. | |
| 3043 | * Check 'stub' instead. | |
| 3044 | */ | |
| 3045 | if (f->stub != NULL) { | |
| 3046 | /* Force removal */ | |
| 3047 | remove_dyn_rule_locked(f, NULL); | |
| 3048 | } | |
| 3049 | } | |
| 3050 | lockmgr(&dyn_lock, LK_RELEASE); | |
| 3051 | } | |
| 3052 | ||
| 3053 | /* | |
| 3054 | * Get rid of the rule duplications on all CPUs | |
| 3055 | */ | |
| 3056 | bzero(&dmsg, sizeof(dmsg)); | |
| 3057 | nmsg = &dmsg.nmsg; | |
| 3058 | netmsg_init(nmsg, &curthread->td_msgport, 0, | |
| 3059 | ipfw_alt_delete_rule_dispatch); | |
| 3060 | dmsg.prev_rule = prev; | |
| 3061 | dmsg.start_rule = rule; | |
| 3062 | dmsg.rulenum = rulenum; | |
| 3063 | ||
| c4882b7e | 3064 | ifnet_domsg(&nmsg->nm_lmsg, 0); |
| ca12e259 SZ |
3065 | KKASSERT(dmsg.prev_rule == NULL && dmsg.start_rule == NULL); |
| 3066 | return 0; | |
| 3067 | } | |
| 3068 | ||
| 3069 | static void | |
| 3070 | ipfw_alt_delete_ruleset_dispatch(struct netmsg *nmsg) | |
| 3071 | { | |
| 3072 | struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; | |
| 3073 | struct ipfw_context *ctx = ipfw_ctx[mycpuid]; | |
| 3074 | struct ip_fw *prev, *rule; | |
| 3075 | #ifdef INVARIANTS | |
| 3076 | int del = 0; | |
| 3077 | #endif | |
| 3078 | ||
| 3079 | ipfw_flush_rule_ptrs(ctx); | |
| 3080 | ||
| 3081 | prev = NULL; | |
| 3082 | rule = ctx->ipfw_layer3_chain; | |
| 3083 | while (rule != NULL) { | |
| 3084 | if (rule->set == dmsg->from_set) { | |
| 3085 | rule = ipfw_delete_rule(ctx, prev, rule); | |
| 3086 | #ifdef INVARIANTS | |
| 3087 | del = 1; | |
| 3088 | #endif | |
| 3089 | } else { | |
| 3090 | prev = rule; | |
| 3091 | rule = rule->next; | |
| 3092 | } | |
| 3093 | } | |
| 3094 | KASSERT(del, ("no match set?!\n")); | |
| 3095 | ||
| c4882b7e | 3096 | ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1); |
| ca12e259 SZ |
3097 | } |
| 3098 | ||
| 3099 | static void | |
| 3100 | ipfw_disable_ruleset_state_dispatch(struct netmsg *nmsg) | |
| 3101 | { | |
| 3102 | struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; | |
| 3103 | struct ipfw_context *ctx = ipfw_ctx[mycpuid]; | |
| 3104 | struct ip_fw *rule; | |
| 3105 | #ifdef INVARIANTS | |
| 3106 | int cleared = 0; | |
| 3107 | #endif | |
| 3108 | ||
| 153e0956 SZ |
3109 | ctx->ipfw_gen++; |
| 3110 | ||
| ca12e259 SZ |
3111 | for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { |
| 3112 | if (rule->set == dmsg->from_set) { | |
| 3113 | #ifdef INVARIANTS | |
| 3114 | cleared = 1; | |
| 3115 | #endif | |
| 3116 | rule->rule_flags &= ~IPFW_RULE_F_STATE; | |
| 3117 | } | |
| 3118 | } | |
| 3119 | KASSERT(cleared, ("no match set?!\n")); | |
| 3120 | ||
| c4882b7e | 3121 | ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1); |
| ca12e259 SZ |
3122 | } |
| 3123 | ||
| 3124 | static int | |
| 3125 | ipfw_alt_delete_ruleset(uint8_t set) | |
| 3126 | { | |
| 3127 | struct netmsg_del dmsg; | |
| 3128 | struct netmsg *nmsg; | |
| 3129 | int state, del; | |
| 3130 | struct ip_fw *rule; | |
| 3131 | struct ipfw_context *ctx = ipfw_ctx[mycpuid]; | |
| 3132 | ||
| 3133 | /* | |
| 3134 | * Check whether the 'set' exists. If it exists, | |
| 3135 | * then check whether any rules within the set will | |
| 3136 | * try to create states. | |
| 3137 | */ | |
| 3138 | state = 0; | |
| 3139 | del = 0; | |
| 3140 | for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { | |
| 3141 | if (rule->set == set) { | |
| 3142 | del = 1; | |
| 3143 | if (rule->rule_flags & IPFW_RULE_F_STATE) { | |
| 3144 | state = 1; | |
| 3145 | break; | |
| 3146 | } | |
| 3147 | } | |
| 3148 | } | |
| 3149 | if (!del) | |
| 3150 | return 0; /* XXX EINVAL? */ | |
| 3151 | ||
| 3152 | if (state) { | |
| 3153 | /* | |
| 3154 | * Clear the STATE flag, so no more states will be | |
| 3155 | * created based the rules in this set. | |
| 3156 | */ | |
| 3157 | bzero(&dmsg, sizeof(dmsg)); | |
| 3158 | nmsg = &dmsg.nmsg; | |
| 3159 | netmsg_init(nmsg, &curthread->td_msgport, 0, | |
| 3160 | ipfw_disable_ruleset_state_dispatch); | |
| 3161 | dmsg.from_set = set; | |
| 3162 | ||
| c4882b7e | 3163 | ifnet_domsg(&nmsg->nm_lmsg, 0); |
| ca12e259 SZ |
3164 | |
| 3165 | /* | |
| 3166 | * Nuke all related states | |
| 3167 | */ | |
| 3168 | lockmgr(&dyn_lock, LK_EXCLUSIVE); | |
| 3169 | for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { | |
| 3170 | if (rule->set != set) | |
| 3171 | continue; | |
| 3172 | ||
| 3173 | /* | |
| 3174 | * Can't check IPFW_RULE_F_STATE here, | |
| 3175 | * since it has been cleared previously. | |
| 3176 | * Check 'stub' instead. | |
| 3177 | */ | |
| 3178 | if (rule->stub != NULL) { | |
| 3179 | /* Force removal */ | |
| 3180 | remove_dyn_rule_locked(rule, NULL); | |
| 3181 | } | |
| 3182 | } | |
| 3183 | lockmgr(&dyn_lock, LK_RELEASE); | |
| 3184 | } | |
| 3185 | ||
| 3186 | /* | |
| 3187 | * Delete this set | |
| 3188 | */ | |
| 3189 | bzero(&dmsg, sizeof(dmsg)); | |
| 3190 | nmsg = &dmsg.nmsg; | |
| 3191 | netmsg_init(nmsg, &curthread->td_msgport, 0, | |
| 3192 | ipfw_alt_delete_ruleset_dispatch); | |
| 3193 | dmsg.from_set = set; | |
| 3194 | ||
| c4882b7e | 3195 | ifnet_domsg(&nmsg->nm_lmsg, 0); |
| ca12e259 SZ |
3196 | return 0; |
| 3197 | } | |
| 3198 | ||
| 3199 | static void | |
| 3200 | ipfw_alt_move_rule_dispatch(struct netmsg *nmsg) | |
| 3201 | { | |
| 3202 | struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; | |
| 3203 | struct ip_fw *rule; | |
| 3204 | ||
| 3205 | rule = dmsg->start_rule; | |
| 3206 | KKASSERT(rule->cpuid == mycpuid); | |
| 3207 | ||
| 3208 | /* | |
| 3209 | * Move to the position on the next CPU | |
| 3210 | * before the msg is forwarded. | |
| 3211 | */ | |
| 3212 | dmsg->start_rule = rule->sibling; | |
| 3213 | ||
| 3214 | while (rule && rule->rulenum <= dmsg->rulenum) { | |
| 3215 | if (rule->rulenum == dmsg->rulenum) | |
| 3216 | rule->set = dmsg->to_set; | |
| 3217 | rule = rule->next; | |
| 3218 | } | |
| c4882b7e | 3219 | ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1); |
| ca12e259 SZ |
3220 | } |
| 3221 | ||
| 3222 | static int | |
| 3223 | ipfw_alt_move_rule(uint16_t rulenum, uint8_t set) | |
| 3224 | { | |
| 3225 | struct netmsg_del dmsg; | |
| 3226 | struct netmsg *nmsg; | |
| 3227 | struct ip_fw *rule; | |
| 3228 | struct ipfw_context *ctx = ipfw_ctx[mycpuid]; | |
| 3229 | ||
| 3230 | /* | |
| 3231 | * Locate first rule to move | |
| 3232 | */ | |
| 3233 | for (rule = ctx->ipfw_layer3_chain; rule && rule->rulenum <= rulenum; | |
| 3234 | rule = rule->next) { | |
| 3235 | if (rule->rulenum == rulenum && rule->set != set) | |
| 3236 | break; | |
| 3237 | } | |
| 3238 | if (rule == NULL || rule->rulenum > rulenum) | |
| 3239 | return 0; /* XXX error? */ | |
| 3240 | ||
| 3241 | bzero(&dmsg, sizeof(dmsg)); | |
| 3242 | nmsg = &dmsg.nmsg; | |
| 3243 | netmsg_init(nmsg, &curthread->td_msgport, 0, | |
| 3244 | ipfw_alt_move_rule_dispatch); | |
| 3245 | dmsg.start_rule = rule; | |
| 3246 | dmsg.rulenum = rulenum; | |
| 3247 | dmsg.to_set = set; | |
| 3248 | ||
| c4882b7e | 3249 | ifnet_domsg(&nmsg->nm_lmsg, 0); |
| ca12e259 SZ |
3250 | KKASSERT(dmsg.start_rule == NULL); |
| 3251 | return 0; | |
| 3252 | } | |
| 3253 | ||
| 3254 | static void | |
| 3255 | ipfw_alt_move_ruleset_dispatch(struct netmsg *nmsg) | |
| 3256 | { | |
| 3257 | struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; | |
| 3258 | struct ipfw_context *ctx = ipfw_ctx[mycpuid]; | |
| 3259 | struct ip_fw *rule; | |
| 3260 | ||
| 3261 | for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { | |
| 3262 | if (rule->set == dmsg->from_set) | |
| 3263 | rule->set = dmsg->to_set; | |
| 3264 | } | |
| c4882b7e | 3265 | ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1); |
| ca12e259 SZ |
3266 | } |
| 3267 | ||
| 3268 | static int | |
| 3269 | ipfw_alt_move_ruleset(uint8_t from_set, uint8_t to_set) | |
| 3270 | { | |
| 3271 | struct netmsg_del dmsg; | |
| 3272 | struct netmsg *nmsg; | |
| 3273 | ||
| 3274 | bzero(&dmsg, sizeof(dmsg)); | |
| 3275 | nmsg = &dmsg.nmsg; | |
| 3276 | netmsg_init(nmsg, &curthread->td_msgport, 0, | |
| 3277 | ipfw_alt_move_ruleset_dispatch); | |
| 3278 | dmsg.from_set = from_set; | |
| 3279 | dmsg.to_set = to_set; | |
| 3280 | ||
| c4882b7e | 3281 | ifnet_domsg(&nmsg->nm_lmsg, 0); |
| ca12e259 SZ |
3282 | return 0; |
| 3283 | } | |
| 3284 | ||
| 3285 | static void | |
| 3286 | ipfw_alt_swap_ruleset_dispatch(struct netmsg *nmsg) | |
| 3287 | { | |
| 3288 | struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; | |
| 3289 | struct ipfw_context *ctx = ipfw_ctx[mycpuid]; | |
| 3290 | struct ip_fw *rule; | |
| 3291 | ||
| 3292 | for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { | |
| 3293 | if (rule->set == dmsg->from_set) | |
| 3294 | rule->set = dmsg->to_set; | |
| 3295 | else if (rule->set == dmsg->to_set) | |
| 3296 | rule->set = dmsg->from_set; | |
| 3297 | } | |
| c4882b7e | 3298 | ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1); |
| ca12e259 SZ |
3299 | } |
| 3300 | ||
| 3301 | static int | |
| 3302 | ipfw_alt_swap_ruleset(uint8_t set1, uint8_t set2) | |
| 3303 | { | |
| 3304 | struct netmsg_del dmsg; | |
| 3305 | struct netmsg *nmsg; | |
| 3306 | ||
| 3307 | bzero(&dmsg, sizeof(dmsg)); | |
| 3308 | nmsg = &dmsg.nmsg; | |
| 3309 | netmsg_init(nmsg, &curthread->td_msgport, 0, | |
| 3310 | ipfw_alt_swap_ruleset_dispatch); | |
| 3311 | dmsg.from_set = set1; | |
| 3312 | dmsg.to_set = set2; | |
| 3313 | ||
| c4882b7e | 3314 | ifnet_domsg(&nmsg->nm_lmsg, 0); |
| ca12e259 | 3315 | return 0; |
| 984263bc MD |
3316 | } |
| 3317 | ||
| 3318 | /** | |
| 3319 | * Remove all rules with given number, and also do set manipulation. | |
| 3320 | * | |
| a998c492 | 3321 | * The argument is an uint32_t. The low 16 bit are the rule or set number, |
| 984263bc MD |
3322 | * the next 8 bits are the new set, the top 8 bits are the command: |
| 3323 | * | |
| 3324 | * 0 delete rules with given number | |
| 3325 | * 1 delete rules with given set number | |
| 3326 | * 2 move rules with given number to new set | |
| 3327 | * 3 move rules with given set number to new set | |
| 3328 | * 4 swap sets with given numbers | |
| 3329 | */ | |
| 3330 | static int | |
| ca12e259 | 3331 | ipfw_ctl_alter(uint32_t arg) |
| 984263bc | 3332 | { |
| a998c492 SZ |
3333 | uint16_t rulenum; |
| 3334 | uint8_t cmd, new_set; | |
| ca12e259 | 3335 | int error = 0; |
| 984263bc MD |
3336 | |
| 3337 | rulenum = arg & 0xffff; | |
| 3338 | cmd = (arg >> 24) & 0xff; | |
| 3339 | new_set = (arg >> 16) & 0xff; | |
| 3340 | ||
| 3341 | if (cmd > 4) | |
| 3342 | return EINVAL; | |
| ca12e259 | 3343 | if (new_set >= IPFW_DEFAULT_SET) |
| 984263bc MD |
3344 | return EINVAL; |
| 3345 | if (cmd == 0 || cmd == 2) { | |
| 3346 | if (rulenum == IPFW_DEFAULT_RULE) | |
| 3347 | return EINVAL; | |
| 3348 | } else { | |
| ca12e259 | 3349 | if (rulenum >= IPFW_DEFAULT_SET) |
| 984263bc MD |
3350 | return EINVAL; |
| 3351 | } | |
| 3352 | ||
| 3353 | switch (cmd) { | |
| 3354 | case 0: /* delete rules with given number */ | |
| ca12e259 | 3355 | error = ipfw_alt_delete_rule(rulenum); |
| 984263bc MD |
3356 | break; |
| 3357 | ||
| 3358 | case 1: /* delete all rules with given set number */ | |
| ca12e259 | 3359 | error = ipfw_alt_delete_ruleset(rulenum); |
| 984263bc MD |
3360 | break; |
| 3361 | ||
| 3362 | case 2: /* move rules with given number to new set */ | |
| ca12e259 | 3363 | error = ipfw_alt_move_rule(rulenum, new_set); |
| 984263bc MD |
3364 | break; |
| 3365 | ||
| 3366 | case 3: /* move rules with given set number to new set */ | |
| ca12e259 | 3367 | error = ipfw_alt_move_ruleset(rulenum, new_set); |
| 984263bc MD |
3368 | break; |
| 3369 | ||
| 3370 | case 4: /* swap two sets */ | |
| ca12e259 | 3371 | error = ipfw_alt_swap_ruleset(rulenum, new_set); |
| 984263bc MD |
3372 | break; |
| 3373 | } | |
| ca12e259 | 3374 | return error; |
| 984263bc MD |
3375 | } |
| 3376 | ||
| 3377 | /* | |
| 3378 | * Clear counters for a specific rule. | |
| 3379 | */ | |
| 3380 | static void | |
| 3381 | clear_counters(struct ip_fw *rule, int log_only) | |
| 3382 | { | |
| 3383 | ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule); | |
| 3384 | ||
| 3385 | if (log_only == 0) { | |
| 3386 | rule->bcnt = rule->pcnt = 0; | |
| 3387 | rule->timestamp = 0; | |
| 3388 | } | |
| 3389 | if (l->o.opcode == O_LOG) | |
| 3390 | l->log_left = l->max_log; | |
| 3391 | } | |
| 3392 | ||
| ca12e259 SZ |
3393 | static void |
| 3394 | ipfw_zero_entry_dispatch(struct netmsg *nmsg) | |
| 3395 | { | |
| 3396 | struct netmsg_zent *zmsg = (struct netmsg_zent *)nmsg; | |
| 3397 | struct ipfw_context *ctx = ipfw_ctx[mycpuid]; | |
| 3398 | struct ip_fw *rule; | |
| 3399 | ||
| 3400 | if (zmsg->rulenum == 0) { | |
| 3401 | KKASSERT(zmsg->start_rule == NULL); | |
| 3402 | ||
| 3403 | ctx->ipfw_norule_counter = 0; | |
| 3404 | for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) | |
| 3405 | clear_counters(rule, zmsg->log_only); | |
| 3406 | } else { | |
| 3407 | struct ip_fw *start = zmsg->start_rule; | |
| 3408 | ||
| 3409 | KKASSERT(start->cpuid == mycpuid); | |
| 3410 | KKASSERT(start->rulenum == zmsg->rulenum); | |
| 3411 | ||
| 3412 | /* | |
| 3413 | * We can have multiple rules with the same number, so we | |
| 3414 | * need to clear them all. | |
| 3415 | */ | |
| 3416 | for (rule = start; rule && rule->rulenum == zmsg->rulenum; | |
| 3417 | rule = rule->next) | |
| 3418 | clear_counters(rule, zmsg->log_only); | |
| 3419 | ||
| 3420 | /* | |
| 3421 | * Move to the position on the next CPU | |
| 3422 | * before the msg is forwarded. | |
| 3423 | */ | |
| 3424 | zmsg->start_rule = start->sibling; | |
| 3425 | } | |
| c4882b7e | 3426 | ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1); |
| ca12e259 SZ |
3427 | } |
| 3428 | ||
| 984263bc MD |
3429 | /** |
| 3430 | * Reset some or all counters on firewall rules. | |
| 3431 | * @arg frwl is null to clear all entries, or contains a specific | |
| 3432 | * rule number. | |
| 3433 | * @arg log_only is 1 if we only want to reset logs, zero otherwise. | |
| 3434 | */ | |
| 3435 | static int | |
| ca12e259 | 3436 | ipfw_ctl_zero_entry(int rulenum, int log_only) |
| 984263bc | 3437 | { |
| ca12e259 SZ |
3438 | struct netmsg_zent zmsg; |
| 3439 | struct netmsg *nmsg; | |
| 3440 | const char *msg; | |
| 3441 | struct ipfw_context *ctx = ipfw_ctx[mycpuid]; | |
| 3442 | ||
| 3443 | bzero(&zmsg, sizeof(zmsg)); | |
| 3444 | nmsg = &zmsg.nmsg; | |
| 3445 | netmsg_init(nmsg, &curthread->td_msgport, 0, ipfw_zero_entry_dispatch); | |
| 3446 | zmsg.log_only = log_only; | |
| 984263bc MD |
3447 | |
| 3448 | if (rulenum == 0) { | |
| 2420f1d6 SZ |
3449 | msg = log_only ? "ipfw: All logging counts reset.\n" |
| 3450 | : "ipfw: Accounting cleared.\n"; | |
| 984263bc | 3451 | } else { |
| ca12e259 | 3452 | struct ip_fw *rule; |
| 2420f1d6 | 3453 | |
| 984263bc | 3454 | /* |
| ca12e259 | 3455 | * Locate the first rule with 'rulenum' |
| 984263bc | 3456 | */ |
| ca12e259 SZ |
3457 | for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { |
| 3458 | if (rule->rulenum == rulenum) | |
| 984263bc | 3459 | break; |
| 2420f1d6 | 3460 | } |
| ca12e259 | 3461 | if (rule == NULL) /* we did not find any matching rules */ |
| 984263bc | 3462 | return (EINVAL); |
| ca12e259 SZ |
3463 | zmsg.start_rule = rule; |
| 3464 | zmsg.rulenum = rulenum; | |
| 3465 | ||
| 2420f1d6 SZ |
3466 | msg = log_only ? "ipfw: Entry %d logging count reset.\n" |
| 3467 | : "ipfw: Entry %d cleared.\n"; | |
| 984263bc | 3468 | } |
| c4882b7e | 3469 | ifnet_domsg(&nmsg->nm_lmsg, 0); |
| ca12e259 SZ |
3470 | KKASSERT(zmsg.start_rule == NULL); |
| 3471 | ||
| 984263bc MD |
3472 | if (fw_verbose) |
| 3473 | log(LOG_SECURITY | LOG_NOTICE, msg, rulenum); | |
| 3474 | return (0); | |
| 3475 | } | |
| 3476 | ||
| 3477 | /* | |
| 3478 | * Check validity of the structure before insert. | |
| 3479 | * Fortunately rules are simple, so this mostly need to check rule sizes. | |
| 3480 | */ | |
| 3481 | static int | |
| ca12e259 | 3482 | ipfw_check_ioc_rule(struct ipfw_ioc_rule *rule, int size, uint32_t *rule_flags) |
| 984263bc MD |
3483 | { |
| 3484 | int l, cmdlen = 0; | |
| b78533e2 | 3485 | int have_action = 0; |
| 984263bc MD |
3486 | ipfw_insn *cmd; |
| 3487 | ||
| ca12e259 SZ |
3488 | *rule_flags = 0; |
| 3489 | ||
| b78533e2 | 3490 | /* Check for valid size */ |
| 984263bc | 3491 | if (size < sizeof(*rule)) { |
| 4b1cf444 | 3492 | kprintf("ipfw: rule too short\n"); |
| b78533e2 | 3493 | return EINVAL; |
| 984263bc | 3494 | } |
| b78533e2 | 3495 | l = IOC_RULESIZE(rule); |
| 984263bc | 3496 | if (l != size) { |
| 4b1cf444 | 3497 | kprintf("ipfw: size mismatch (have %d want %d)\n", size, l); |
| b78533e2 | 3498 | return EINVAL; |
| 984263bc | 3499 | } |
| b78533e2 | 3500 | |
| ca12e259 SZ |
3501 | /* Check rule number */ |
| 3502 | if (rule->rulenum == IPFW_DEFAULT_RULE) { | |
| 3503 | kprintf("ipfw: invalid rule number\n"); | |
| 3504 | return EINVAL; | |
| 3505 | } | |
| 3506 | ||
| 984263bc MD |
3507 | /* |
| 3508 | * Now go for the individual checks. Very simple ones, basically only | |
| 3509 | * instruction sizes. | |
| 3510 | */ | |
| b78533e2 SZ |
3511 | for (l = rule->cmd_len, cmd = rule->cmd; l > 0; |
| 3512 | l -= cmdlen, cmd += cmdlen) { | |
| 984263bc MD |
3513 | cmdlen = F_LEN(cmd); |
| 3514 | if (cmdlen > l) { | |
| 4b1cf444 | 3515 | kprintf("ipfw: opcode %d size truncated\n", |
| 2420f1d6 | 3516 | cmd->opcode); |
| 984263bc MD |
3517 | return EINVAL; |
| 3518 | } | |
| ca12e259 | 3519 | |
| 8d0865c8 | 3520 | DPRINTF("ipfw: opcode %d\n", cmd->opcode); |
| ca12e259 SZ |
3521 | |
| 3522 | if (cmd->opcode == O_KEEP_STATE || cmd->opcode == O_LIMIT) { | |
| 3523 | /* This rule will create states */ | |
| 3524 | *rule_flags |= IPFW_RULE_F_STATE; | |
| 3525 | } | |
| 3526 | ||
| 984263bc MD |
3527 | switch (cmd->opcode) { |
| 3528 | case O_NOP: | |
| 3529 | case O_PROBE_STATE: | |
| 3530 | case O_KEEP_STATE: | |
| 3531 | case O_PROTO: | |
| 3532 | case O_IP_SRC_ME: | |
| 3533 | case O_IP_DST_ME: | |
| 3534 | case O_LAYER2: | |
| 3535 | case O_IN: | |
| 3536 | case O_FRAG: | |
| 3537 | case O_IPOPT: | |
| 3538 | case O_IPLEN: | |
| 3539 | case O_IPID: | |
| 3540 | case O_IPTOS: | |
| 3541 | case O_IPPRECEDENCE: | |
| 3542 | case O_IPTTL: | |
| 3543 | case O_IPVER: | |
| 3544 | case O_TCPWIN: | |
| 3545 | case O_TCPFLAGS: | |
| 3546 | case O_TCPOPTS: | |
| 3547 | case O_ESTAB: | |
| 3548 | if (cmdlen != F_INSN_SIZE(ipfw_insn)) | |
| 3549 | goto bad_size; | |
| 3550 | break; | |
| 3551 | ||
| 3552 | case O_UID: | |
| 3553 | case O_GID: | |
| 3554 | case O_IP_SRC: | |
| 3555 | case O_IP_DST: | |
| 3556 | case O_TCPSEQ: | |
| 3557 | case O_TCPACK: | |
| 3558 | case O_PROB: | |
| 3559 | case O_ICMPTYPE: | |
| 3560 | if (cmdlen != F_INSN_SIZE(ipfw_insn_u32)) | |
| 3561 | goto bad_size; | |
| 3562 | break; | |
| 3563 | ||
| 3564 | case O_LIMIT: | |
| 3565 | if (cmdlen != F_INSN_SIZE(ipfw_insn_limit)) | |
| 3566 | goto bad_size; | |
| 3567 | break; | |
| 3568 | ||
| 3569 | case O_LOG: | |
| 3570 | if (cmdlen != F_INSN_SIZE(ipfw_insn_log)) | |
| 3571 | goto bad_size; | |
| 3572 | ||
| 3573 | ((ipfw_insn_log *)cmd)->log_left = | |
| 3574 | ((ipfw_insn_log *)cmd)->max_log; | |
| 3575 | ||
| 3576 | break; | |
| 3577 | ||
| 3578 | case O_IP_SRC_MASK: | |
| 3579 | case O_IP_DST_MASK: | |
| 3580 | if (cmdlen != F_INSN_SIZE(ipfw_insn_ip)) | |
| 3581 | goto bad_size; | |
| 3582 | if (((ipfw_insn_ip *)cmd)->mask.s_addr == 0) { | |
| 4b1cf444 | 3583 | kprintf("ipfw: opcode %d, useless rule\n", |
| 984263bc MD |
3584 | cmd->opcode); |
| 3585 | return EINVAL; | |
| 3586 | } | |
| 3587 | break; | |
| 3588 | ||
| 3589 | case O_IP_SRC_SET: | |
| 3590 | case O_IP_DST_SET: | |
| 3591 | if (cmd->arg1 == 0 || cmd->arg1 > 256) { | |
| 4b1cf444 | 3592 | kprintf("ipfw: invalid set size %d\n", |
| 984263bc MD |
3593 | cmd->arg1); |
| 3594 | return EINVAL; | |
| 3595 | } | |
| 3596 | if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + | |
| 3597 | (cmd->arg1+31)/32 ) | |
| 3598 | goto bad_size; | |
| 3599 | break; | |
| 3600 | ||
| 3601 | case O_MACADDR2: | |
| 3602 | if (cmdlen != F_INSN_SIZE(ipfw_insn_mac)) | |
| 3603 | goto bad_size; | |
| 3604 | break; | |
| 3605 | ||
| 3606 | case O_MAC_TYPE: | |
| 3607 | case O_IP_SRCPORT: | |
| 3608 | case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */ | |
| 3609 | if (cmdlen < 2 || cmdlen > 31) | |
| 3610 | goto bad_size; | |
| 3611 | break; | |
| 3612 | ||
| 3613 | case O_RECV: | |
| 3614 | case O_XMIT: | |
| 3615 | case O_VIA: | |
| 3616 | if (cmdlen != F_INSN_SIZE(ipfw_insn_if)) | |
| 3617 | goto bad_size; | |
| 3618 | break; | |
| 3619 | ||
| 3620 | case O_PIPE: | |
| 3621 | case O_QUEUE: | |
| 3622 | if (cmdlen != F_INSN_SIZE(ipfw_insn_pipe)) | |
| 3623 | goto bad_size; | |
| 3624 | goto check_action; | |
| 3625 | ||
| 3626 | case O_FORWARD_IP: | |
| d5db91c4 | 3627 | if (cmdlen != F_INSN_SIZE(ipfw_insn_sa)) { |
| 984263bc | 3628 | goto bad_size; |
| d5db91c4 SZ |