kernel: Make SMP support default (and non-optional).
[dragonfly.git] / sys / net / pf / pf.c
CommitLineData
ed1f0be2 1/* $OpenBSD: pf.c,v 1.614 2008/08/02 12:34:37 henning Exp $ */
02742ec6
JS
2
3/*
4 * Copyright (c) 2004 The DragonFly Project. All rights reserved.
5 *
6 * Copyright (c) 2001 Daniel Hartmeier
ed1f0be2 7 * Copyright (c) 2002 - 2008 Henning Brauer
02742ec6
JS
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * - Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * - Redistributions in binary form must reproduce the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer in the documentation and/or other materials provided
19 * with the distribution.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 * POSSIBILITY OF SUCH DAMAGE.
33 *
34 * Effort sponsored in part by the Defense Advanced Research Projects
35 * Agency (DARPA) and Air Force Research Laboratory, Air Force
36 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
37 *
38 */
39
40#include "opt_inet.h"
41#include "opt_inet6.h"
02742ec6
JS
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/malloc.h>
46#include <sys/mbuf.h>
47#include <sys/filio.h>
48#include <sys/socket.h>
49#include <sys/socketvar.h>
50#include <sys/kernel.h>
51#include <sys/time.h>
52#include <sys/sysctl.h>
53#include <sys/endian.h>
70224baa
JL
54#include <sys/proc.h>
55#include <sys/kthread.h>
02742ec6
JS
56
57#include <machine/inttypes.h>
58
ed1f0be2
JL
59#include <sys/md5.h>
60
02742ec6
JS
61#include <net/if.h>
62#include <net/if_types.h>
63#include <net/bpf.h>
4599cf19 64#include <net/netisr.h>
02742ec6
JS
65#include <net/route.h>
66
67#include <netinet/in.h>
68#include <netinet/in_var.h>
69#include <netinet/in_systm.h>
70#include <netinet/ip.h>
71#include <netinet/ip_var.h>
72#include <netinet/tcp.h>
73#include <netinet/tcp_seq.h>
74#include <netinet/udp.h>
75#include <netinet/ip_icmp.h>
76#include <netinet/in_pcb.h>
77#include <netinet/tcp_timer.h>
78#include <netinet/tcp_var.h>
79#include <netinet/udp_var.h>
80#include <netinet/icmp_var.h>
70224baa 81#include <netinet/if_ether.h>
02742ec6
JS
82
83#include <net/pf/pfvar.h>
84#include <net/pf/if_pflog.h>
85
02742ec6 86#include <net/pf/if_pfsync.h>
02742ec6
JS
87
88#ifdef INET6
89#include <netinet/ip6.h>
90#include <netinet/in_pcb.h>
91#include <netinet/icmp6.h>
92#include <netinet6/nd6.h>
93#include <netinet6/ip6_var.h>
94#include <netinet6/in6_pcb.h>
95#endif /* INET6 */
96
97#include <sys/in_cksum.h>
4599cf19 98#include <sys/ucred.h>
02742ec6
JS
99#include <machine/limits.h>
100#include <sys/msgport2.h>
4599cf19 101#include <net/netmsg2.h>
02742ec6
JS
102
103extern int ip_optcopy(struct ip *, struct ip *);
70224baa 104extern int debug_pfugidhack;
02742ec6 105
a3c18566 106struct lwkt_token pf_token = LWKT_TOKEN_INITIALIZER(pf_token);
2a7a2b1c 107
4b1cf444 108#define DPFPRINTF(n, x) if (pf_status.debug >= (n)) kprintf x
02742ec6
JS
109
110/*
111 * Global variables
112 */
113
b4628cf9
SZ
114/* mask radix tree */
115struct radix_node_head *pf_maskhead;
116
315a7da3 117/* state tables */
ed1f0be2 118struct pf_state_tree pf_statetbl;
315a7da3 119
02742ec6
JS
120struct pf_altqqueue pf_altqs[2];
121struct pf_palist pf_pabuf;
122struct pf_altqqueue *pf_altqs_active;
123struct pf_altqqueue *pf_altqs_inactive;
124struct pf_status pf_status;
125
126u_int32_t ticket_altqs_active;
127u_int32_t ticket_altqs_inactive;
128int altqs_inactive_open;
129u_int32_t ticket_pabuf;
130
ed1f0be2
JL
131MD5_CTX pf_tcp_secret_ctx;
132u_char pf_tcp_secret[16];
133int pf_tcp_secret_init;
134int pf_tcp_iss_off;
135
70224baa
JL
136struct pf_anchor_stackframe {
137 struct pf_ruleset *rs;
138 struct pf_rule *r;
139 struct pf_anchor_node *parent;
140 struct pf_anchor *child;
141} pf_anchor_stack[64];
02742ec6 142
1186cbc0
JL
143struct malloc_type *pf_src_tree_pl, *pf_rule_pl, *pf_pooladdr_pl;
144struct malloc_type *pf_state_pl, *pf_state_key_pl, *pf_state_item_pl;
145struct malloc_type *pf_altq_pl;
02742ec6
JS
146
147void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
02742ec6 148
70224baa
JL
149void pf_init_threshold(struct pf_threshold *, u_int32_t,
150 u_int32_t);
151void pf_add_threshold(struct pf_threshold *);
152int pf_check_threshold(struct pf_threshold *);
153
02742ec6
JS
154void pf_change_ap(struct pf_addr *, u_int16_t *,
155 u_int16_t *, u_int16_t *, struct pf_addr *,
156 u_int16_t, u_int8_t, sa_family_t);
70224baa
JL
157int pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *,
158 struct tcphdr *, struct pf_state_peer *);
02742ec6
JS
159#ifdef INET6
160void pf_change_a6(struct pf_addr *, u_int16_t *,
161 struct pf_addr *, u_int8_t);
162#endif /* INET6 */
163void pf_change_icmp(struct pf_addr *, u_int16_t *,
164 struct pf_addr *, struct pf_addr *, u_int16_t,
165 u_int16_t *, u_int16_t *, u_int16_t *,
166 u_int16_t *, u_int8_t, sa_family_t);
167void pf_send_tcp(const struct pf_rule *, sa_family_t,
168 const struct pf_addr *, const struct pf_addr *,
169 u_int16_t, u_int16_t, u_int32_t, u_int32_t,
70224baa
JL
170 u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
171 u_int16_t, struct ether_header *, struct ifnet *);
02742ec6
JS
172void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
173 sa_family_t, struct pf_rule *);
174struct pf_rule *pf_match_translation(struct pf_pdesc *, struct mbuf *,
175 int, int, struct pfi_kif *,
176 struct pf_addr *, u_int16_t, struct pf_addr *,
177 u_int16_t, int);
178struct pf_rule *pf_get_translation(struct pf_pdesc *, struct mbuf *,
179 int, int, struct pfi_kif *, struct pf_src_node **,
ed1f0be2
JL
180 struct pf_state_key **, struct pf_state_key **,
181 struct pf_state_key **, struct pf_state_key **,
182 struct pf_addr *, struct pf_addr *,
183 u_int16_t, u_int16_t);
184void pf_detach_state(struct pf_state *);
185int pf_state_key_setup(struct pf_pdesc *, struct pf_rule *,
186 struct pf_state_key **, struct pf_state_key **,
187 struct pf_state_key **, struct pf_state_key **,
188 struct pf_addr *, struct pf_addr *,
189 u_int16_t, u_int16_t);
190void pf_state_key_detach(struct pf_state *, int);
191u_int32_t pf_tcp_iss(struct pf_pdesc *);
315a7da3 192int pf_test_rule(struct pf_rule **, struct pf_state **,
02742ec6
JS
193 int, struct pfi_kif *, struct mbuf *, int,
194 void *, struct pf_pdesc *, struct pf_rule **,
70224baa 195 struct pf_ruleset **, struct ifqueue *, struct inpcb *);
ed1f0be2
JL
196static __inline int pf_create_state(struct pf_rule *, struct pf_rule *,
197 struct pf_rule *, struct pf_pdesc *,
198 struct pf_src_node *, struct pf_state_key *,
199 struct pf_state_key *, struct pf_state_key *,
200 struct pf_state_key *, struct mbuf *, int,
201 u_int16_t, u_int16_t, int *, struct pfi_kif *,
202 struct pf_state **, int, u_int16_t, u_int16_t,
203 int);
02742ec6
JS
204int pf_test_fragment(struct pf_rule **, int,
205 struct pfi_kif *, struct mbuf *, void *,
206 struct pf_pdesc *, struct pf_rule **,
207 struct pf_ruleset **);
ed1f0be2
JL
208int pf_tcp_track_full(struct pf_state_peer *,
209 struct pf_state_peer *, struct pf_state **,
210 struct pfi_kif *, struct mbuf *, int,
211 struct pf_pdesc *, u_short *, int *);
212int pf_tcp_track_sloppy(struct pf_state_peer *,
213 struct pf_state_peer *, struct pf_state **,
214 struct pf_pdesc *, u_short *);
02742ec6
JS
215int pf_test_state_tcp(struct pf_state **, int,
216 struct pfi_kif *, struct mbuf *, int,
217 void *, struct pf_pdesc *, u_short *);
218int pf_test_state_udp(struct pf_state **, int,
219 struct pfi_kif *, struct mbuf *, int,
220 void *, struct pf_pdesc *);
221int pf_test_state_icmp(struct pf_state **, int,
222 struct pfi_kif *, struct mbuf *, int,
70224baa 223 void *, struct pf_pdesc *, u_short *);
02742ec6 224int pf_test_state_other(struct pf_state **, int,
ed1f0be2 225 struct pfi_kif *, struct mbuf *, struct pf_pdesc *);
315a7da3 226void pf_step_into_anchor(int *, struct pf_ruleset **, int,
ed1f0be2 227 struct pf_rule **, struct pf_rule **, int *);
70224baa
JL
228int pf_step_out_of_anchor(int *, struct pf_ruleset **,
229 int, struct pf_rule **, struct pf_rule **,
230 int *);
02742ec6
JS
231void pf_hash(struct pf_addr *, struct pf_addr *,
232 struct pf_poolhashkey *, sa_family_t);
233int pf_map_addr(u_int8_t, struct pf_rule *,
234 struct pf_addr *, struct pf_addr *,
235 struct pf_addr *, struct pf_src_node **);
236int pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *,
237 struct pf_addr *, struct pf_addr *, u_int16_t,
238 struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t,
239 struct pf_src_node **);
240void pf_route(struct mbuf **, struct pf_rule *, int,
70224baa
JL
241 struct ifnet *, struct pf_state *,
242 struct pf_pdesc *);
02742ec6 243void pf_route6(struct mbuf **, struct pf_rule *, int,
70224baa
JL
244 struct ifnet *, struct pf_state *,
245 struct pf_pdesc *);
02742ec6
JS
246u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t,
247 sa_family_t);
248u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t,
249 sa_family_t);
250u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t,
251 u_int16_t);
252void pf_set_rt_ifp(struct pf_state *,
253 struct pf_addr *);
254int pf_check_proto_cksum(struct mbuf *, int, int,
255 u_int8_t, sa_family_t);
ed1f0be2
JL
256struct pf_divert *pf_get_divert(struct mbuf *);
257void pf_print_state_parts(struct pf_state *,
258 struct pf_state_key *, struct pf_state_key *);
02742ec6
JS
259int pf_addr_wrap_neq(struct pf_addr_wrap *,
260 struct pf_addr_wrap *);
315a7da3 261struct pf_state *pf_find_state(struct pfi_kif *,
ed1f0be2 262 struct pf_state_key_cmp *, u_int, struct mbuf *);
70224baa
JL
263int pf_src_connlimit(struct pf_state **);
264int pf_check_congestion(struct ifqueue *);
265
266extern int pf_end_threads;
267
268struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
269 { &pf_state_pl, PFSTATE_HIWAT },
270 { &pf_src_tree_pl, PFSNODE_HIWAT },
271 { &pf_frent_pl, PFFRAG_FRENT_HIWAT },
272 { &pfr_ktable_pl, PFR_KTABLE_HIWAT },
273 { &pfr_kentry_pl, PFR_KENTRY_HIWAT }
274};
02742ec6 275
ed1f0be2 276#define STATE_LOOKUP(i, k, d, s, m) \
02742ec6 277 do { \
ed1f0be2
JL
278 s = pf_find_state(i, k, d, m); \
279 if (s == NULL || (s)->timeout == PFTM_PURGE) \
02742ec6 280 return (PF_DROP); \
ed1f0be2
JL
281 if (d == PF_OUT && \
282 (((s)->rule.ptr->rt == PF_ROUTETO && \
283 (s)->rule.ptr->direction == PF_OUT) || \
284 ((s)->rule.ptr->rt == PF_REPLYTO && \
285 (s)->rule.ptr->direction == PF_IN)) && \
286 (s)->rt_kif != NULL && \
287 (s)->rt_kif != i) \
02742ec6
JS
288 return (PF_PASS); \
289 } while (0)
290
70224baa
JL
291#define BOUND_IFACE(r, k) \
292 ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all
293
294#define STATE_INC_COUNTERS(s) \
295 do { \
ed1f0be2
JL
296 s->rule.ptr->states_cur++; \
297 s->rule.ptr->states_tot++; \
298 if (s->anchor.ptr != NULL) { \
299 s->anchor.ptr->states_cur++; \
300 s->anchor.ptr->states_tot++; \
301 } \
302 if (s->nat_rule.ptr != NULL) { \
303 s->nat_rule.ptr->states_cur++; \
304 s->nat_rule.ptr->states_tot++; \
305 } \
70224baa
JL
306 } while (0)
307
308#define STATE_DEC_COUNTERS(s) \
309 do { \
310 if (s->nat_rule.ptr != NULL) \
ed1f0be2 311 s->nat_rule.ptr->states_cur--; \
70224baa 312 if (s->anchor.ptr != NULL) \
ed1f0be2
JL
313 s->anchor.ptr->states_cur--; \
314 s->rule.ptr->states_cur--; \
70224baa 315 } while (0)
02742ec6 316
1186cbc0
JL
317static MALLOC_DEFINE(M_PFSTATEPL, "pfstatepl", "pf state pool list");
318static MALLOC_DEFINE(M_PFSRCTREEPL, "pfsrctpl", "pf source tree pool list");
319static MALLOC_DEFINE(M_PFSTATEKEYPL, "pfstatekeypl", "pf state key pool list");
320static MALLOC_DEFINE(M_PFSTATEITEMPL, "pfstateitempl", "pf state item pool list");
321
70224baa 322static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
ed1f0be2 323static __inline int pf_state_compare_key(struct pf_state_key *,
315a7da3 324 struct pf_state_key *);
70224baa 325static __inline int pf_state_compare_id(struct pf_state *,
02742ec6
JS
326 struct pf_state *);
327
328struct pf_src_tree tree_src_tracking;
329
330struct pf_state_tree_id tree_id;
70224baa 331struct pf_state_queue state_list;
02742ec6
JS
332
333RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
ed1f0be2 334RB_GENERATE(pf_state_tree, pf_state_key, entry, pf_state_compare_key);
02742ec6 335RB_GENERATE(pf_state_tree_id, pf_state,
315a7da3
JL
336 entry_id, pf_state_compare_id);
337
70224baa 338static __inline int
02742ec6
JS
339pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
340{
341 int diff;
342
343 if (a->rule.ptr > b->rule.ptr)
344 return (1);
345 if (a->rule.ptr < b->rule.ptr)
346 return (-1);
347 if ((diff = a->af - b->af) != 0)
348 return (diff);
349 switch (a->af) {
350#ifdef INET
351 case AF_INET:
352 if (a->addr.addr32[0] > b->addr.addr32[0])
353 return (1);
354 if (a->addr.addr32[0] < b->addr.addr32[0])
355 return (-1);
356 break;
357#endif /* INET */
358#ifdef INET6
359 case AF_INET6:
360 if (a->addr.addr32[3] > b->addr.addr32[3])
361 return (1);
362 if (a->addr.addr32[3] < b->addr.addr32[3])
363 return (-1);
364 if (a->addr.addr32[2] > b->addr.addr32[2])
365 return (1);
366 if (a->addr.addr32[2] < b->addr.addr32[2])
367 return (-1);
368 if (a->addr.addr32[1] > b->addr.addr32[1])
369 return (1);
370 if (a->addr.addr32[1] < b->addr.addr32[1])
371 return (-1);
372 if (a->addr.addr32[0] > b->addr.addr32[0])
373 return (1);
374 if (a->addr.addr32[0] < b->addr.addr32[0])
375 return (-1);
376 break;
377#endif /* INET6 */
378 }
379 return (0);
380}
381
a814431a 382u_int32_t
315a7da3 383pf_state_hash(struct pf_state_key *sk)
5950bf01 384{
ed1f0be2 385 u_int32_t hv = (u_int32_t)(((intptr_t)sk >> 6) ^ ((intptr_t)sk >> 15));
a814431a
MD
386 if (hv == 0) /* disallow 0 */
387 hv = 1;
5950bf01
MD
388 return(hv);
389}
390
02742ec6
JS
391#ifdef INET6
392void
393pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
394{
395 switch (af) {
396#ifdef INET
397 case AF_INET:
398 dst->addr32[0] = src->addr32[0];
399 break;
400#endif /* INET */
401 case AF_INET6:
402 dst->addr32[0] = src->addr32[0];
403 dst->addr32[1] = src->addr32[1];
404 dst->addr32[2] = src->addr32[2];
405 dst->addr32[3] = src->addr32[3];
406 break;
407 }
408}
70224baa 409#endif /* INET6 */
02742ec6 410
70224baa
JL
411void
412pf_init_threshold(struct pf_threshold *threshold,
413 u_int32_t limit, u_int32_t seconds)
414{
415 threshold->limit = limit * PF_THRESHOLD_MULT;
416 threshold->seconds = seconds;
417 threshold->count = 0;
418 threshold->last = time_second;
419}
420
421void
422pf_add_threshold(struct pf_threshold *threshold)
423{
424 u_int32_t t = time_second, diff = t - threshold->last;
425
426 if (diff >= threshold->seconds)
427 threshold->count = 0;
428 else
429 threshold->count -= threshold->count * diff /
430 threshold->seconds;
431 threshold->count += PF_THRESHOLD_MULT;
432 threshold->last = t;
433}
434
435int
436pf_check_threshold(struct pf_threshold *threshold)
437{
438 return (threshold->count > threshold->limit);
439}
440
441int
442pf_src_connlimit(struct pf_state **state)
443{
70224baa
JL
444 int bad = 0;
445
446 (*state)->src_node->conn++;
447 (*state)->src.tcp_est = 1;
448 pf_add_threshold(&(*state)->src_node->conn_rate);
449
450 if ((*state)->rule.ptr->max_src_conn &&
451 (*state)->rule.ptr->max_src_conn <
452 (*state)->src_node->conn) {
453 pf_status.lcounters[LCNT_SRCCONN]++;
454 bad++;
455 }
456
457 if ((*state)->rule.ptr->max_src_conn_rate.limit &&
458 pf_check_threshold(&(*state)->src_node->conn_rate)) {
459 pf_status.lcounters[LCNT_SRCCONNRATE]++;
460 bad++;
461 }
462
463 if (!bad)
464 return (0);
465
466 if ((*state)->rule.ptr->overload_tbl) {
467 struct pfr_addr p;
468 u_int32_t killed = 0;
469
470 pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
471 if (pf_status.debug >= PF_DEBUG_MISC) {
472 kprintf("pf_src_connlimit: blocking address ");
473 pf_print_host(&(*state)->src_node->addr, 0,
ed1f0be2 474 (*state)->key[PF_SK_WIRE]->af);
70224baa
JL
475 }
476
477 bzero(&p, sizeof(p));
ed1f0be2
JL
478 p.pfra_af = (*state)->key[PF_SK_WIRE]->af;
479 switch ((*state)->key[PF_SK_WIRE]->af) {
70224baa
JL
480#ifdef INET
481 case AF_INET:
482 p.pfra_net = 32;
483 p.pfra_ip4addr = (*state)->src_node->addr.v4;
484 break;
485#endif /* INET */
486#ifdef INET6
487 case AF_INET6:
488 p.pfra_net = 128;
489 p.pfra_ip6addr = (*state)->src_node->addr.v6;
490 break;
491#endif /* INET6 */
492 }
493
494 pfr_insert_kentry((*state)->rule.ptr->overload_tbl,
495 &p, time_second);
496
497 /* kill existing states if that's required. */
498 if ((*state)->rule.ptr->flush) {
315a7da3
JL
499 struct pf_state_key *sk;
500 struct pf_state *st;
70224baa 501
315a7da3
JL
502 pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
503 RB_FOREACH(st, pf_state_tree_id, &tree_id) {
ed1f0be2 504 sk = st->key[PF_SK_WIRE];
70224baa
JL
505 /*
506 * Kill states from this source. (Only those
507 * from the same rule if PF_FLUSH_GLOBAL is not
508 * set)
509 */
315a7da3 510 if (sk->af ==
ed1f0be2
JL
511 (*state)->key[PF_SK_WIRE]->af &&
512 (((*state)->direction == PF_OUT &&
70224baa 513 PF_AEQ(&(*state)->src_node->addr,
ed1f0be2
JL
514 &sk->addr[0], sk->af)) ||
515 ((*state)->direction == PF_IN &&
70224baa 516 PF_AEQ(&(*state)->src_node->addr,
ed1f0be2 517 &sk->addr[1], sk->af))) &&
70224baa
JL
518 ((*state)->rule.ptr->flush &
519 PF_FLUSH_GLOBAL ||
315a7da3
JL
520 (*state)->rule.ptr == st->rule.ptr)) {
521 st->timeout = PFTM_PURGE;
522 st->src.state = st->dst.state =
70224baa
JL
523 TCPS_CLOSED;
524 killed++;
525 }
526 }
527 if (pf_status.debug >= PF_DEBUG_MISC)
528 kprintf(", %u states killed", killed);
529 }
530 if (pf_status.debug >= PF_DEBUG_MISC)
531 kprintf("\n");
532 }
533
534 /* kill this state */
535 (*state)->timeout = PFTM_PURGE;
536 (*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
537 return (1);
538}
539
02742ec6
JS
540int
541pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
542 struct pf_addr *src, sa_family_t af)
543{
544 struct pf_src_node k;
545
546 if (*sn == NULL) {
547 k.af = af;
548 PF_ACPY(&k.addr, src, af);
549 if (rule->rule_flag & PFRULE_RULESRCTRACK ||
550 rule->rpool.opts & PF_POOL_STICKYADDR)
551 k.rule.ptr = rule;
552 else
553 k.rule.ptr = NULL;
554 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
555 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
556 }
557 if (*sn == NULL) {
558 if (!rule->max_src_nodes ||
559 rule->src_nodes < rule->max_src_nodes)
1186cbc0 560 (*sn) = kmalloc(sizeof(struct pf_src_node), M_PFSRCTREEPL, M_NOWAIT|M_ZERO);
70224baa
JL
561 else
562 pf_status.lcounters[LCNT_SRCNODES]++;
02742ec6
JS
563 if ((*sn) == NULL)
564 return (-1);
70224baa
JL
565
566 pf_init_threshold(&(*sn)->conn_rate,
567 rule->max_src_conn_rate.limit,
568 rule->max_src_conn_rate.seconds);
569
02742ec6
JS
570 (*sn)->af = af;
571 if (rule->rule_flag & PFRULE_RULESRCTRACK ||
572 rule->rpool.opts & PF_POOL_STICKYADDR)
573 (*sn)->rule.ptr = rule;
574 else
575 (*sn)->rule.ptr = NULL;
576 PF_ACPY(&(*sn)->addr, src, af);
577 if (RB_INSERT(pf_src_tree,
578 &tree_src_tracking, *sn) != NULL) {
579 if (pf_status.debug >= PF_DEBUG_MISC) {
4b1cf444 580 kprintf("pf: src_tree insert failed: ");
02742ec6 581 pf_print_host(&(*sn)->addr, 0, af);
4b1cf444 582 kprintf("\n");
02742ec6 583 }
1186cbc0 584 kfree(*sn, M_PFSRCTREEPL);
02742ec6
JS
585 return (-1);
586 }
587 (*sn)->creation = time_second;
588 (*sn)->ruletype = rule->action;
589 if ((*sn)->rule.ptr != NULL)
590 (*sn)->rule.ptr->src_nodes++;
591 pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
592 pf_status.src_nodes++;
593 } else {
594 if (rule->max_src_states &&
70224baa
JL
595 (*sn)->states >= rule->max_src_states) {
596 pf_status.lcounters[LCNT_SRCSTATES]++;
02742ec6 597 return (-1);
70224baa 598 }
02742ec6
JS
599 }
600 return (0);
601}
602
ed1f0be2
JL
603/* state table stuff */
604
605static __inline int
606pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b)
315a7da3 607{
ed1f0be2
JL
608 int diff;
609
610 if ((diff = a->proto - b->proto) != 0)
611 return (diff);
612 if ((diff = a->af - b->af) != 0)
613 return (diff);
614 switch (a->af) {
615#ifdef INET
616 case AF_INET:
617 if (a->addr[0].addr32[0] > b->addr[0].addr32[0])
618 return (1);
619 if (a->addr[0].addr32[0] < b->addr[0].addr32[0])
620 return (-1);
621 if (a->addr[1].addr32[0] > b->addr[1].addr32[0])
622 return (1);
623 if (a->addr[1].addr32[0] < b->addr[1].addr32[0])
624 return (-1);
625 break;
626#endif /* INET */
627#ifdef INET6
628 case AF_INET6:
629 if (a->addr[0].addr32[3] > b->addr[0].addr32[3])
630 return (1);
631 if (a->addr[0].addr32[3] < b->addr[0].addr32[3])
632 return (-1);
633 if (a->addr[1].addr32[3] > b->addr[1].addr32[3])
634 return (1);
635 if (a->addr[1].addr32[3] < b->addr[1].addr32[3])
636 return (-1);
637 if (a->addr[0].addr32[2] > b->addr[0].addr32[2])
638 return (1);
639 if (a->addr[0].addr32[2] < b->addr[0].addr32[2])
640 return (-1);
641 if (a->addr[1].addr32[2] > b->addr[1].addr32[2])
642 return (1);
643 if (a->addr[1].addr32[2] < b->addr[1].addr32[2])
644 return (-1);
645 if (a->addr[0].addr32[1] > b->addr[0].addr32[1])
646 return (1);
647 if (a->addr[0].addr32[1] < b->addr[0].addr32[1])
648 return (-1);
649 if (a->addr[1].addr32[1] > b->addr[1].addr32[1])
650 return (1);
651 if (a->addr[1].addr32[1] < b->addr[1].addr32[1])
652 return (-1);
653 if (a->addr[0].addr32[0] > b->addr[0].addr32[0])
654 return (1);
655 if (a->addr[0].addr32[0] < b->addr[0].addr32[0])
656 return (-1);
657 if (a->addr[1].addr32[0] > b->addr[1].addr32[0])
658 return (1);
659 if (a->addr[1].addr32[0] < b->addr[1].addr32[0])
660 return (-1);
661 break;
662#endif /* INET6 */
315a7da3 663 }
ed1f0be2
JL
664
665 if ((diff = a->port[0] - b->port[0]) != 0)
666 return (diff);
667 if ((diff = a->port[1] - b->port[1]) != 0)
668 return (diff);
669
670 return (0);
671}
672
673static __inline int
674pf_state_compare_id(struct pf_state *a, struct pf_state *b)
675{
676 if (a->id > b->id)
677 return (1);
678 if (a->id < b->id)
679 return (-1);
680 if (a->creatorid > b->creatorid)
681 return (1);
682 if (a->creatorid < b->creatorid)
683 return (-1);
684
685 return (0);
315a7da3
JL
686}
687
02742ec6 688int
ed1f0be2 689pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx)
02742ec6 690{
ed1f0be2
JL
691 struct pf_state_item *si;
692 struct pf_state_key *cur;
315a7da3 693
ed1f0be2 694 KKASSERT(s->key[idx] == NULL); /* XXX handle this? */
315a7da3 695
ed1f0be2 696 if ((cur = RB_INSERT(pf_state_tree, &pf_statetbl, sk)) != NULL) {
315a7da3 697 /* key exists. check for same kif, if none, add to key */
ed1f0be2
JL
698 TAILQ_FOREACH(si, &cur->states, entry)
699 if (si->s->kif == s->kif &&
700 si->s->direction == s->direction) {
701 if (pf_status.debug >= PF_DEBUG_MISC) {
702 kprintf(
703 "pf: %s key attach failed on %s: ",
704 (idx == PF_SK_WIRE) ?
705 "wire" : "stack",
706 s->kif->pfik_name);
707 pf_print_state_parts(s,
708 (idx == PF_SK_WIRE) ? sk : NULL,
709 (idx == PF_SK_STACK) ? sk : NULL);
710 kprintf("\n");
711 }
1186cbc0 712 kfree(sk, M_PFSTATEKEYPL);
ed1f0be2 713 return (-1); /* collision! */
315a7da3 714 }
1186cbc0
JL
715 kfree(sk, M_PFSTATEKEYPL);
716
ed1f0be2
JL
717 s->key[idx] = cur;
718 } else
719 s->key[idx] = sk;
02742ec6 720
1186cbc0 721 if ((si = kmalloc(sizeof(struct pf_state_item), M_PFSTATEITEMPL, M_NOWAIT)) == NULL) {
ed1f0be2 722 pf_state_key_detach(s, idx);
02742ec6
JS
723 return (-1);
724 }
ed1f0be2 725 si->s = s;
02742ec6 726
ed1f0be2
JL
727 /* list is sorted, if-bound states before floating */
728 if (s->kif == pfi_all)
729 TAILQ_INSERT_TAIL(&s->key[idx]->states, si, entry);
730 else
731 TAILQ_INSERT_HEAD(&s->key[idx]->states, si, entry);
02742ec6
JS
732 return (0);
733}
734
735void
ed1f0be2 736pf_detach_state(struct pf_state *s)
02742ec6 737{
ed1f0be2
JL
738 if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK])
739 s->key[PF_SK_WIRE] = NULL;
70224baa 740
ed1f0be2
JL
741 if (s->key[PF_SK_STACK] != NULL)
742 pf_state_key_detach(s, PF_SK_STACK);
743
744 if (s->key[PF_SK_WIRE] != NULL)
745 pf_state_key_detach(s, PF_SK_WIRE);
746}
747
748void
749pf_state_key_detach(struct pf_state *s, int idx)
750{
751 struct pf_state_item *si;
ed1f0be2
JL
752 si = TAILQ_FIRST(&s->key[idx]->states);
753 while (si && si->s != s)
754 si = TAILQ_NEXT(si, entry);
755
756 if (si) {
757 TAILQ_REMOVE(&s->key[idx]->states, si, entry);
1186cbc0 758 kfree(si, M_PFSTATEITEMPL);
ed1f0be2
JL
759 }
760
761 if (TAILQ_EMPTY(&s->key[idx]->states)) {
762 RB_REMOVE(pf_state_tree, &pf_statetbl, s->key[idx]);
763 if (s->key[idx]->reverse)
764 s->key[idx]->reverse->reverse = NULL;
765 if (s->key[idx]->inp)
766 s->key[idx]->inp->inp_pf_sk = NULL;
1186cbc0 767 kfree(s->key[idx], M_PFSTATEKEYPL);
ed1f0be2
JL
768 }
769 s->key[idx] = NULL;
770}
771
772struct pf_state_key *
773pf_alloc_state_key(int pool_flags)
774{
775 struct pf_state_key *sk;
776
1186cbc0
JL
777 if ((sk = kmalloc(sizeof(struct pf_state_key), M_PFSTATEKEYPL, pool_flags)) == NULL)
778 return (NULL);
ed1f0be2
JL
779 TAILQ_INIT(&sk->states);
780
781 return (sk);
782}
783
784int
785pf_state_key_setup(struct pf_pdesc *pd, struct pf_rule *nr,
786 struct pf_state_key **skw, struct pf_state_key **sks,
787 struct pf_state_key **skp, struct pf_state_key **nkp,
788 struct pf_addr *saddr, struct pf_addr *daddr,
789 u_int16_t sport, u_int16_t dport)
790{
791 KKASSERT((*skp == NULL && *nkp == NULL));
792
1186cbc0 793 if ((*skp = pf_alloc_state_key(M_NOWAIT | M_ZERO)) == NULL)
ed1f0be2
JL
794 return (ENOMEM);
795
796 PF_ACPY(&(*skp)->addr[pd->sidx], saddr, pd->af);
797 PF_ACPY(&(*skp)->addr[pd->didx], daddr, pd->af);
798 (*skp)->port[pd->sidx] = sport;
799 (*skp)->port[pd->didx] = dport;
800 (*skp)->proto = pd->proto;
801 (*skp)->af = pd->af;
802
803 if (nr != NULL) {
1186cbc0 804 if ((*nkp = pf_alloc_state_key(M_NOWAIT | M_ZERO)) == NULL)
ed1f0be2
JL
805 return (ENOMEM); /* caller must handle cleanup */
806
807 /* XXX maybe just bcopy and TAILQ_INIT(&(*nkp)->states) */
808 PF_ACPY(&(*nkp)->addr[0], &(*skp)->addr[0], pd->af);
809 PF_ACPY(&(*nkp)->addr[1], &(*skp)->addr[1], pd->af);
810 (*nkp)->port[0] = (*skp)->port[0];
811 (*nkp)->port[1] = (*skp)->port[1];
812 (*nkp)->proto = pd->proto;
813 (*nkp)->af = pd->af;
814 } else
815 *nkp = *skp;
816
817 if (pd->dir == PF_IN) {
818 *skw = *skp;
819 *sks = *nkp;
820 } else {
821 *sks = *skp;
822 *skw = *nkp;
823 }
824 return (0);
825}
826
827
828int
829pf_state_insert(struct pfi_kif *kif, struct pf_state_key *skw,
830 struct pf_state_key *sks, struct pf_state *s)
831{
832 s->kif = kif;
833
834 if (skw == sks) {
835 if (pf_state_key_attach(skw, s, PF_SK_WIRE))
836 return (-1);
837 s->key[PF_SK_STACK] = s->key[PF_SK_WIRE];
838 } else {
839 if (pf_state_key_attach(skw, s, PF_SK_WIRE)) {
1186cbc0 840 kfree(sks, M_PFSTATEKEYPL);
ed1f0be2
JL
841 return (-1);
842 }
843 if (pf_state_key_attach(sks, s, PF_SK_STACK)) {
844 pf_state_key_detach(s, PF_SK_WIRE);
845 return (-1);
846 }
847 }
848
849 if (s->id == 0 && s->creatorid == 0) {
850 s->id = htobe64(pf_status.stateid++);
851 s->creatorid = pf_status.hostid;
852 }
14dd43dc
MD
853
854 /*
855 * Calculate hash code for altq
856 */
857 s->hash = crc32(s->key[PF_SK_WIRE], sizeof(*sks));
858
ed1f0be2
JL
859 if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) {
860 if (pf_status.debug >= PF_DEBUG_MISC) {
861 kprintf("pf: state insert failed: "
862 "id: %016jx creatorid: %08x",
863 (uintmax_t)be64toh(s->id), ntohl(s->creatorid));
864 if (s->sync_flags & PFSTATE_FROMSYNC)
865 kprintf(" (from sync)");
866 kprintf("\n");
867 }
868 pf_detach_state(s);
869 return (-1);
870 }
871 TAILQ_INSERT_TAIL(&state_list, s, entry_list);
872 pf_status.fcounters[FCNT_STATE_INSERT]++;
873 pf_status.states++;
874 pfi_kif_ref(kif, PFI_KIF_REF_STATE);
875 pfsync_insert_state(s);
876 return (0);
877}
878
879struct pf_state *
880pf_find_state_byid(struct pf_state_cmp *key)
881{
882 pf_status.fcounters[FCNT_STATE_SEARCH]++;
883
884 return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key));
885}
886
887struct pf_state *
888pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir,
889 struct mbuf *m)
890{
891 struct pf_state_key *sk;
892 struct pf_state_item *si;
893
894 pf_status.fcounters[FCNT_STATE_SEARCH]++;
895
896 if (dir == PF_OUT && m->m_pkthdr.pf.statekey &&
897 ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse)
898 sk = ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse;
899 else {
900 if ((sk = RB_FIND(pf_state_tree, &pf_statetbl,
901 (struct pf_state_key *)key)) == NULL)
902 return (NULL);
903 if (dir == PF_OUT && m->m_pkthdr.pf.statekey) {
904 ((struct pf_state_key *)
905 m->m_pkthdr.pf.statekey)->reverse = sk;
906 sk->reverse = m->m_pkthdr.pf.statekey;
907 }
908 }
909
910 if (dir == PF_OUT)
911 m->m_pkthdr.pf.statekey = NULL;
912
913 /* list is sorted, if-bound states before floating ones */
914 TAILQ_FOREACH(si, &sk->states, entry)
915 if ((si->s->kif == pfi_all || si->s->kif == kif) &&
916 sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] :
917 si->s->key[PF_SK_STACK]))
918 return (si->s);
919
920 return (NULL);
921}
922
923struct pf_state *
924pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more)
925{
926 struct pf_state_key *sk;
927 struct pf_state_item *si, *ret = NULL;
928
929 pf_status.fcounters[FCNT_STATE_SEARCH]++;
930
931 sk = RB_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key);
932
933 if (sk != NULL) {
934 TAILQ_FOREACH(si, &sk->states, entry)
935 if (dir == PF_INOUT ||
936 (sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] :
937 si->s->key[PF_SK_STACK]))) {
938 if (more == NULL)
939 return (si->s);
940
941 if (ret)
942 (*more)++;
943 else
944 ret = si;
945 }
946 }
947 return (ret ? ret->s : NULL);
948}
949
950/* END state table stuff */
951
952
953void
954pf_purge_thread(void *v)
955{
956 int nloops = 0;
957 int locked = 0;
958
959 lwkt_gettoken(&pf_token);
960 for (;;) {
961 tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz);
70224baa
JL
962
963 lockmgr(&pf_consistency_lock, LK_EXCLUSIVE);
964
965 if (pf_end_threads) {
1e37b5df 966 pf_purge_expired_states(pf_status.states, 0);
70224baa
JL
967 pf_purge_expired_fragments();
968 pf_purge_expired_src_nodes(1);
969 pf_end_threads++;
970
971 lockmgr(&pf_consistency_lock, LK_RELEASE);
972 wakeup(pf_purge_thread);
973 kthread_exit();
974 }
975 crit_enter();
976
977 /* process a fraction of the state table every second */
978 if(!pf_purge_expired_states(1 + (pf_status.states
979 / pf_default_rule.timeout[PFTM_INTERVAL]), 0)) {
02742ec6 980
70224baa
JL
981 pf_purge_expired_states(1 + (pf_status.states
982 / pf_default_rule.timeout[PFTM_INTERVAL]), 1);
983 }
02742ec6 984
70224baa
JL
985 /* purge other expired types every PFTM_INTERVAL seconds */
986 if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
987 pf_purge_expired_fragments();
988 if (!pf_purge_expired_src_nodes(locked)) {
989 pf_purge_expired_src_nodes(1);
990 }
991 nloops = 0;
992 }
993 crit_exit();
994 lockmgr(&pf_consistency_lock, LK_RELEASE);
995 }
aa1da187 996 lwkt_reltoken(&pf_token);
02742ec6
JS
997}
998
999u_int32_t
1000pf_state_expires(const struct pf_state *state)
1001{
1002 u_int32_t timeout;
1003 u_int32_t start;
1004 u_int32_t end;
1005 u_int32_t states;
1006
1007 /* handle all PFTM_* > PFTM_MAX here */
1008 if (state->timeout == PFTM_PURGE)
1009 return (time_second);
1010 if (state->timeout == PFTM_UNTIL_PACKET)
1011 return (0);
70224baa 1012 KKASSERT(state->timeout != PFTM_UNLINKED);
ed1f0be2 1013 KKASSERT(state->timeout < PFTM_MAX);
02742ec6
JS
1014 timeout = state->rule.ptr->timeout[state->timeout];
1015 if (!timeout)
1016 timeout = pf_default_rule.timeout[state->timeout];
1017 start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
1018 if (start) {
1019 end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
ed1f0be2 1020 states = state->rule.ptr->states_cur;
02742ec6
JS
1021 } else {
1022 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
1023 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
1024 states = pf_status.states;
1025 }
1026 if (end && states > start && start < end) {
1027 if (states < end)
1028 return (state->expire + timeout * (end - states) /
1029 (end - start));
1030 else
1031 return (time_second);
1032 }
1033 return (state->expire + timeout);
1034}
1035
70224baa
JL
1036int
1037pf_purge_expired_src_nodes(int waslocked)
02742ec6
JS
1038{
1039 struct pf_src_node *cur, *next;
70224baa 1040 int locked = waslocked;
02742ec6
JS
1041
1042 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
1043 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
1044
1045 if (cur->states <= 0 && cur->expire <= time_second) {
70224baa
JL
1046 if (! locked) {
1047 lockmgr(&pf_consistency_lock, LK_EXCLUSIVE);
1048 next = RB_NEXT(pf_src_tree,
1049 &tree_src_tracking, cur);
1050 locked = 1;
1051 }
02742ec6
JS
1052 if (cur->rule.ptr != NULL) {
1053 cur->rule.ptr->src_nodes--;
ed1f0be2 1054 if (cur->rule.ptr->states_cur <= 0 &&
02742ec6
JS
1055 cur->rule.ptr->max_src_nodes <= 0)
1056 pf_rm_rule(NULL, cur->rule.ptr);
1057 }
1058 RB_REMOVE(pf_src_tree, &tree_src_tracking, cur);
1059 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
1060 pf_status.src_nodes--;
1186cbc0 1061 kfree(cur, M_PFSRCTREEPL);
02742ec6
JS
1062 }
1063 }
70224baa
JL
1064
1065 if (locked && !waslocked)
1066 lockmgr(&pf_consistency_lock, LK_RELEASE);
1067 return(1);
02742ec6
JS
1068}
1069
1070void
1071pf_src_tree_remove_state(struct pf_state *s)
1072{
1073 u_int32_t timeout;
1074
1075 if (s->src_node != NULL) {
05ac5751
JL
1076 if (s->src.tcp_est)
1077 --s->src_node->conn;
02742ec6
JS
1078 if (--s->src_node->states <= 0) {
1079 timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
1080 if (!timeout)
1081 timeout =
1082 pf_default_rule.timeout[PFTM_SRC_NODE];
1083 s->src_node->expire = time_second + timeout;
1084 }
1085 }
1086 if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
1087 if (--s->nat_src_node->states <= 0) {
1088 timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
1089 if (!timeout)
1090 timeout =
1091 pf_default_rule.timeout[PFTM_SRC_NODE];
1092 s->nat_src_node->expire = time_second + timeout;
1093 }
1094 }
1095 s->src_node = s->nat_src_node = NULL;
1096}
1097
70224baa
JL
1098/* callers should be at crit_enter() */
1099void
1100pf_unlink_state(struct pf_state *cur)
02742ec6 1101{
70224baa 1102 if (cur->src.state == PF_TCPS_PROXY_DST) {
ed1f0be2
JL
1103 /* XXX wire key the right one? */
1104 pf_send_tcp(cur->rule.ptr, cur->key[PF_SK_WIRE]->af,
1105 &cur->key[PF_SK_WIRE]->addr[1],
1106 &cur->key[PF_SK_WIRE]->addr[0],
1107 cur->key[PF_SK_WIRE]->port[1],
1108 cur->key[PF_SK_WIRE]->port[0],
70224baa
JL
1109 cur->src.seqhi, cur->src.seqlo + 1,
1110 TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL);
1111 }
70224baa 1112 RB_REMOVE(pf_state_tree_id, &tree_id, cur);
70224baa 1113 if (cur->creatorid == pf_status.hostid)
f0ea6854 1114 pfsync_delete_state(cur);
70224baa
JL
1115 cur->timeout = PFTM_UNLINKED;
1116 pf_src_tree_remove_state(cur);
ed1f0be2 1117 pf_detach_state(cur);
02742ec6
JS
1118}
1119
aa1da187
MD
1120static struct pf_state *purge_cur;
1121
70224baa
JL
1122/* callers should be at crit_enter() and hold the
1123 * write_lock on pf_consistency_lock */
f0ea6854 1124void
70224baa 1125pf_free_state(struct pf_state *cur)
f0ea6854 1126{
70224baa
JL
1127 if (pfsyncif != NULL &&
1128 (pfsyncif->sc_bulk_send_next == cur ||
1129 pfsyncif->sc_bulk_terminator == cur))
1130 return;
70224baa 1131 KKASSERT(cur->timeout == PFTM_UNLINKED);
ed1f0be2 1132 if (--cur->rule.ptr->states_cur <= 0 &&
70224baa
JL
1133 cur->rule.ptr->src_nodes <= 0)
1134 pf_rm_rule(NULL, cur->rule.ptr);
1135 if (cur->nat_rule.ptr != NULL)
ed1f0be2 1136 if (--cur->nat_rule.ptr->states_cur <= 0 &&
70224baa
JL
1137 cur->nat_rule.ptr->src_nodes <= 0)
1138 pf_rm_rule(NULL, cur->nat_rule.ptr);
1139 if (cur->anchor.ptr != NULL)
ed1f0be2 1140 if (--cur->anchor.ptr->states_cur <= 0)
70224baa
JL
1141 pf_rm_rule(NULL, cur->anchor.ptr);
1142 pf_normalize_tcp_cleanup(cur);
315a7da3 1143 pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE);
aa1da187
MD
1144
1145 /*
1146 * We may be freeing pf_purge_expired_states()'s saved scan entry,
1147 * adjust it if necessary.
1148 */
1149 if (purge_cur == cur) {
1150 kprintf("PURGE CONFLICT\n");
1151 purge_cur = TAILQ_NEXT(purge_cur, entry_list);
1152 }
315a7da3 1153 TAILQ_REMOVE(&state_list, cur, entry_list);
70224baa
JL
1154 if (cur->tag)
1155 pf_tag_unref(cur->tag);
1186cbc0 1156 kfree(cur, M_PFSTATEPL);
70224baa
JL
1157 pf_status.fcounters[FCNT_STATE_REMOVALS]++;
1158 pf_status.states--;
f0ea6854
MD
1159}
1160
70224baa
JL
1161int
1162pf_purge_expired_states(u_int32_t maxcheck, int waslocked)
1163{
aa1da187 1164 struct pf_state *cur;
70224baa
JL
1165 int locked = waslocked;
1166
1167 while (maxcheck--) {
aa1da187
MD
1168 /*
1169 * Wrap to start of list when we hit the end
1170 */
1171 cur = purge_cur;
70224baa
JL
1172 if (cur == NULL) {
1173 cur = TAILQ_FIRST(&state_list);
1174 if (cur == NULL)
1175 break; /* list empty */
1176 }
1177
aa1da187
MD
1178 /*
1179 * Setup next (purge_cur) while we process this one. If we block and
1180 * something else deletes purge_cur, pf_free_state() will adjust it further
1181 * ahead.
1182 */
1183 purge_cur = TAILQ_NEXT(cur, entry_list);
70224baa
JL
1184
1185 if (cur->timeout == PFTM_UNLINKED) {
1186 /* free unlinked state */
1187 if (! locked) {
1188 lockmgr(&pf_consistency_lock, LK_EXCLUSIVE);
1189 locked = 1;
1190 }
1191 pf_free_state(cur);
1192 } else if (pf_state_expires(cur) <= time_second) {
1193 /* unlink and free expired state */
1194 pf_unlink_state(cur);
1195 if (! locked) {
1196 if (!lockmgr(&pf_consistency_lock, LK_EXCLUSIVE))
1197 return (0);
1198 locked = 1;
1199 }
1200 pf_free_state(cur);
1201 }
70224baa
JL
1202 }
1203
1204 if (locked)
1205 lockmgr(&pf_consistency_lock, LK_RELEASE);
1206 return (1);
1207}
f0ea6854 1208
02742ec6
JS
1209int
1210pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
1211{
1212 if (aw->type != PF_ADDR_TABLE)
1213 return (0);
1214 if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL)
1215 return (1);
1216 return (0);
1217}
1218
1219void
1220pf_tbladdr_remove(struct pf_addr_wrap *aw)
1221{
1222 if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL)
1223 return;
1224 pfr_detach_table(aw->p.tbl);
1225 aw->p.tbl = NULL;
1226}
1227
1228void
1229pf_tbladdr_copyout(struct pf_addr_wrap *aw)
1230{
1231 struct pfr_ktable *kt = aw->p.tbl;
1232
1233 if (aw->type != PF_ADDR_TABLE || kt == NULL)
1234 return;
1235 if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
1236 kt = kt->pfrkt_root;
1237 aw->p.tbl = NULL;
1238 aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
1239 kt->pfrkt_cnt : -1;
1240}
1241
1242void
1243pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
1244{
1245 switch (af) {
1246#ifdef INET
1247 case AF_INET: {
1248 u_int32_t a = ntohl(addr->addr32[0]);
4b1cf444 1249 kprintf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
02742ec6
JS
1250 (a>>8)&255, a&255);
1251 if (p) {
1252 p = ntohs(p);
4b1cf444 1253 kprintf(":%u", p);
02742ec6
JS
1254 }
1255 break;
1256 }
1257#endif /* INET */
1258#ifdef INET6
1259 case AF_INET6: {
1260 u_int16_t b;
1261 u_int8_t i, curstart = 255, curend = 0,
1262 maxstart = 0, maxend = 0;
1263 for (i = 0; i < 8; i++) {
1264 if (!addr->addr16[i]) {
1265 if (curstart == 255)
1266 curstart = i;
1267 else
1268 curend = i;
1269 } else {
1270 if (curstart) {
1271 if ((curend - curstart) >
1272 (maxend - maxstart)) {
1273 maxstart = curstart;
1274 maxend = curend;
1275 curstart = 255;
1276 }
1277 }
1278 }
1279 }
1280 for (i = 0; i < 8; i++) {
1281 if (i >= maxstart && i <= maxend) {
1282 if (maxend != 7) {
1283 if (i == maxstart)
4b1cf444 1284 kprintf(":");
02742ec6
JS
1285 } else {
1286 if (i == maxend)
4b1cf444 1287 kprintf(":");
02742ec6
JS
1288 }
1289 } else {
1290 b = ntohs(addr->addr16[i]);
4b1cf444 1291 kprintf("%x", b);
02742ec6 1292 if (i < 7)
4b1cf444 1293 kprintf(":");
02742ec6
JS
1294 }
1295 }
1296 if (p) {
1297 p = ntohs(p);
4b1cf444 1298 kprintf("[%u]", p);
02742ec6
JS
1299 }
1300 break;
1301 }
1302#endif /* INET6 */
1303 }
1304}
1305
1306void
1307pf_print_state(struct pf_state *s)
1308{
ed1f0be2
JL
1309 pf_print_state_parts(s, NULL, NULL);
1310}
1311
1312void
1313pf_print_state_parts(struct pf_state *s,
1314 struct pf_state_key *skwp, struct pf_state_key *sksp)
1315{
1316 struct pf_state_key *skw, *sks;
1317 u_int8_t proto, dir;
1318
1319 /* Do our best to fill these, but they're skipped if NULL */
1320 skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL);
1321 sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL);
1322 proto = skw ? skw->proto : (sks ? sks->proto : 0);
1323 dir = s ? s->direction : 0;
1324
1325 switch (proto) {
02742ec6 1326 case IPPROTO_TCP:
4b1cf444 1327 kprintf("TCP ");
02742ec6
JS
1328 break;
1329 case IPPROTO_UDP:
4b1cf444 1330 kprintf("UDP ");
02742ec6
JS
1331 break;
1332 case IPPROTO_ICMP:
4b1cf444 1333 kprintf("ICMP ");
02742ec6
JS
1334 break;
1335 case IPPROTO_ICMPV6:
4b1cf444 1336 kprintf("ICMPV6 ");
02742ec6
JS
1337 break;
1338 default:
ed1f0be2
JL
1339 kprintf("%u ", skw->proto);
1340 break;
1341 }
1342 switch (dir) {
1343 case PF_IN:
1344 kprintf(" in");
1345 break;
1346 case PF_OUT:
1347 kprintf(" out");
02742ec6
JS
1348 break;
1349 }
ed1f0be2
JL
1350 if (skw) {
1351 kprintf(" wire: ");
1352 pf_print_host(&skw->addr[0], skw->port[0], skw->af);
1353 kprintf(" ");
1354 pf_print_host(&skw->addr[1], skw->port[1], skw->af);
1355 }
1356 if (sks) {
1357 kprintf(" stack: ");
1358 if (sks != skw) {
1359 pf_print_host(&sks->addr[0], sks->port[0], sks->af);
1360 kprintf(" ");
1361 pf_print_host(&sks->addr[1], sks->port[1], sks->af);
1362 } else
1363 kprintf("-");
1364 }
1365 if (s) {
1366 if (proto == IPPROTO_TCP) {
1367 kprintf(" [lo=%u high=%u win=%u modulator=%u",
1368 s->src.seqlo, s->src.seqhi,
1369 s->src.max_win, s->src.seqdiff);
1370 if (s->src.wscale && s->dst.wscale)
1371 kprintf(" wscale=%u",
1372 s->src.wscale & PF_WSCALE_MASK);
1373 kprintf("]");
1374 kprintf(" [lo=%u high=%u win=%u modulator=%u",
1375 s->dst.seqlo, s->dst.seqhi,
1376 s->dst.max_win, s->dst.seqdiff);
1377 if (s->src.wscale && s->dst.wscale)
1378 kprintf(" wscale=%u",
1379 s->dst.wscale & PF_WSCALE_MASK);
1380 kprintf("]");
1381 }
1382 kprintf(" %u:%u", s->src.state, s->dst.state);
1383 }
02742ec6
JS
1384}
1385
1386void
1387pf_print_flags(u_int8_t f)
1388{
1389 if (f)
4b1cf444 1390 kprintf(" ");
02742ec6 1391 if (f & TH_FIN)
4b1cf444 1392 kprintf("F");
02742ec6 1393 if (f & TH_SYN)
4b1cf444 1394 kprintf("S");
02742ec6 1395 if (f & TH_RST)
4b1cf444 1396 kprintf("R");
02742ec6 1397 if (f & TH_PUSH)
4b1cf444 1398 kprintf("P");
02742ec6 1399 if (f & TH_ACK)
4b1cf444 1400 kprintf("A");
02742ec6 1401 if (f & TH_URG)
4b1cf444 1402 kprintf("U");
02742ec6 1403 if (f & TH_ECE)
4b1cf444 1404 kprintf("E");
02742ec6 1405 if (f & TH_CWR)
4b1cf444 1406 kprintf("W");
02742ec6
JS
1407}
1408
1409#define PF_SET_SKIP_STEPS(i) \
1410 do { \
1411 while (head[i] != cur) { \
1412 head[i]->skip[i].ptr = cur; \
1413 head[i] = TAILQ_NEXT(head[i], entries); \
1414 } \
1415 } while (0)
1416
1417void
1418pf_calc_skip_steps(struct pf_rulequeue *rules)
1419{
1420 struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
1421 int i;
1422
1423 cur = TAILQ_FIRST(rules);
1424 prev = cur;
1425 for (i = 0; i < PF_SKIP_COUNT; ++i)
1426 head[i] = cur;
1427 while (cur != NULL) {
1428
1429 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
1430 PF_SET_SKIP_STEPS(PF_SKIP_IFP);
1431 if (cur->direction != prev->direction)
1432 PF_SET_SKIP_STEPS(PF_SKIP_DIR);
1433 if (cur->af != prev->af)
1434 PF_SET_SKIP_STEPS(PF_SKIP_AF);
1435 if (cur->proto != prev->proto)
1436 PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
70224baa 1437 if (cur->src.neg != prev->src.neg ||
02742ec6
JS
1438 pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
1439 PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
1440 if (cur->src.port[0] != prev->src.port[0] ||
1441 cur->src.port[1] != prev->src.port[1] ||
1442 cur->src.port_op != prev->src.port_op)
1443 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
70224baa 1444 if (cur->dst.neg != prev->dst.neg ||
02742ec6
JS
1445 pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
1446 PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
1447 if (cur->dst.port[0] != prev->dst.port[0] ||
1448 cur->dst.port[1] != prev->dst.port[1] ||
1449 cur->dst.port_op != prev->dst.port_op)
1450 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
1451
1452 prev = cur;
1453 cur = TAILQ_NEXT(cur, entries);
1454 }
1455 for (i = 0; i < PF_SKIP_COUNT; ++i)
1456 PF_SET_SKIP_STEPS(i);
1457}
1458
1459int
1460pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
1461{
1462 if (aw1->type != aw2->type)
1463 return (1);
1464 switch (aw1->type) {
1465 case PF_ADDR_ADDRMASK:
ed1f0be2 1466 case PF_ADDR_RANGE:
02742ec6
JS
1467 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0))
1468 return (1);
1469 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0))
1470 return (1);
1471 return (0);
1472 case PF_ADDR_DYNIFTL:
1473 return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
1474 case PF_ADDR_NOROUTE:
70224baa 1475 case PF_ADDR_URPFFAILED:
02742ec6
JS
1476 return (0);
1477 case PF_ADDR_TABLE:
1478 return (aw1->p.tbl != aw2->p.tbl);
70224baa
JL
1479 case PF_ADDR_RTLABEL:
1480 return (aw1->v.rtlabel != aw2->v.rtlabel);
02742ec6 1481 default:
4b1cf444 1482 kprintf("invalid address type: %d\n", aw1->type);
02742ec6
JS
1483 return (1);
1484 }
1485}
1486
02742ec6
JS
1487u_int16_t
1488pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
1489{
1490 u_int32_t l;
1491
1492 if (udp && !cksum)
1493 return (0x0000);
1494 l = cksum + old - new;
1495 l = (l >> 16) + (l & 65535);
1496 l = l & 65535;
1497 if (udp && !l)
1498 return (0xFFFF);
1499 return (l);
1500}
1501
1502void
1503pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc,
1504 struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af)
1505{
1506 struct pf_addr ao;
1507 u_int16_t po = *p;
1508
1509 PF_ACPY(&ao, a, af);
1510 PF_ACPY(a, an, af);
1511
1512 *p = pn;
1513
1514 switch (af) {
1515#ifdef INET
1516 case AF_INET:
1517 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1518 ao.addr16[0], an->addr16[0], 0),
1519 ao.addr16[1], an->addr16[1], 0);
1520 *p = pn;
1521 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1522 ao.addr16[0], an->addr16[0], u),
1523 ao.addr16[1], an->addr16[1], u),
1524 po, pn, u);
1525 break;
1526#endif /* INET */
1527#ifdef INET6
1528 case AF_INET6:
1529 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1530 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1531 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1532 ao.addr16[0], an->addr16[0], u),
1533 ao.addr16[1], an->addr16[1], u),
1534 ao.addr16[2], an->addr16[2], u),
1535 ao.addr16[3], an->addr16[3], u),
1536 ao.addr16[4], an->addr16[4], u),
1537 ao.addr16[5], an->addr16[5], u),
1538 ao.addr16[6], an->addr16[6], u),
1539 ao.addr16[7], an->addr16[7], u),
1540 po, pn, u);
1541 break;
1542#endif /* INET6 */
1543 }
1544}
1545
1546
1547/* Changes a u_int32_t. Uses a void * so there are no align restrictions */
1548void
1549pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
1550{
1551 u_int32_t ao;
1552
1553 memcpy(&ao, a, sizeof(ao));
1554 memcpy(a, &an, sizeof(u_int32_t));
1555 *c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
1556 ao % 65536, an % 65536, u);
1557}
1558
1559#ifdef INET6
1560void
1561pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
1562{
1563 struct pf_addr ao;
1564
1565 PF_ACPY(&ao, a, AF_INET6);
1566 PF_ACPY(a, an, AF_INET6);
1567
1568 *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1569 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1570 pf_cksum_fixup(pf_cksum_fixup(*c,
1571 ao.addr16[0], an->addr16[0], u),
1572 ao.addr16[1], an->addr16[1], u),
1573 ao.addr16[2], an->addr16[2], u),
1574 ao.addr16[3], an->addr16[3], u),
1575 ao.addr16[4], an->addr16[4], u),
1576 ao.addr16[5], an->addr16[5], u),
1577 ao.addr16[6], an->addr16[6], u),
1578 ao.addr16[7], an->addr16[7], u);
1579}
1580#endif /* INET6 */
1581
1582void
1583pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
1584 struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
1585 u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
1586{
1587 struct pf_addr oia, ooa;
1588
1589 PF_ACPY(&oia, ia, af);
ed1f0be2
JL
1590 if (oa)
1591 PF_ACPY(&ooa, oa, af);
02742ec6
JS
1592
1593 /* Change inner protocol port, fix inner protocol checksum. */
1594 if (ip != NULL) {
1595 u_int16_t oip = *ip;
1596 u_int32_t opc = 0;
1597
1598 if (pc != NULL)
1599 opc = *pc;
1600 *ip = np;
1601 if (pc != NULL)
1602 *pc = pf_cksum_fixup(*pc, oip, *ip, u);
1603 *ic = pf_cksum_fixup(*ic, oip, *ip, 0);
1604 if (pc != NULL)
1605 *ic = pf_cksum_fixup(*ic, opc, *pc, 0);
1606 }
1607 /* Change inner ip address, fix inner ip and icmp checksums. */
1608 PF_ACPY(ia, na, af);
1609 switch (af) {
1610#ifdef INET
1611 case AF_INET: {
1612 u_int32_t oh2c = *h2c;
1613
1614 *h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
1615 oia.addr16[0], ia->addr16[0], 0),
1616 oia.addr16[1], ia->addr16[1], 0);
1617 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1618 oia.addr16[0], ia->addr16[0], 0),
1619 oia.addr16[1], ia->addr16[1], 0);
1620 *ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
1621 break;
1622 }
1623#endif /* INET */
1624#ifdef INET6
1625 case AF_INET6:
1626 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1627 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1628 pf_cksum_fixup(pf_cksum_fixup(*ic,
1629 oia.addr16[0], ia->addr16[0], u),
1630 oia.addr16[1], ia->addr16[1], u),
1631 oia.addr16[2], ia->addr16[2], u),
1632 oia.addr16[3], ia->addr16[3], u),
1633 oia.addr16[4], ia->addr16[4], u),
1634 oia.addr16[5], ia->addr16[5], u),
1635 oia.addr16[6], ia->addr16[6], u),
1636 oia.addr16[7], ia->addr16[7], u);
1637 break;
1638#endif /* INET6 */
1639 }
ed1f0be2
JL
1640 /* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */
1641 if (oa) {
1642 PF_ACPY(oa, na, af);
1643 switch (af) {
02742ec6 1644#ifdef INET
ed1f0be2
JL
1645 case AF_INET:
1646 *hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
1647 ooa.addr16[0], oa->addr16[0], 0),
1648 ooa.addr16[1], oa->addr16[1], 0);
1649 break;
02742ec6
JS
1650#endif /* INET */
1651#ifdef INET6
ed1f0be2
JL
1652 case AF_INET6:
1653 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1654 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1655 pf_cksum_fixup(pf_cksum_fixup(*ic,
1656 ooa.addr16[0], oa->addr16[0], u),
1657 ooa.addr16[1], oa->addr16[1], u),
1658 ooa.addr16[2], oa->addr16[2], u),
1659 ooa.addr16[3], oa->addr16[3], u),
1660 ooa.addr16[4], oa->addr16[4], u),
1661 ooa.addr16[5], oa->addr16[5], u),
1662 ooa.addr16[6], oa->addr16[6], u),
1663 ooa.addr16[7], oa->addr16[7], u);
1664 break;
02742ec6 1665#endif /* INET6 */
ed1f0be2 1666 }
02742ec6
JS
1667 }
1668}
1669
70224baa
JL
1670
1671/*
1672 * Need to modulate the sequence numbers in the TCP SACK option
1673 * (credits to Krzysztof Pfaff for report and patch)
1674 */
1675int
1676pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
1677 struct tcphdr *th, struct pf_state_peer *dst)
1678{
1679 int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
1680 u_int8_t opts[TCP_MAXOLEN], *opt = opts;
1681 int copyback = 0, i, olen;
1682 struct raw_sackblock sack;
1683
1684#define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2)
1685 if (hlen < TCPOLEN_SACKLEN ||
1686 !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af))
1687 return 0;
1688
1689 while (hlen >= TCPOLEN_SACKLEN) {
1690 olen = opt[1];
1691 switch (*opt) {
1692 case TCPOPT_EOL: /* FALLTHROUGH */
1693 case TCPOPT_NOP:
1694 opt++;
1695 hlen--;
1696 break;
1697 case TCPOPT_SACK:
1698 if (olen > hlen)
1699 olen = hlen;
1700 if (olen >= TCPOLEN_SACKLEN) {
1701 for (i = 2; i + TCPOLEN_SACK <= olen;
1702 i += TCPOLEN_SACK) {
1703 memcpy(&sack, &opt[i], sizeof(sack));
1704 pf_change_a(&sack.rblk_start, &th->th_sum,
4fc5aa1c 1705 htonl(ntohl(sack.rblk_start) -
70224baa
JL
1706 dst->seqdiff), 0);
1707 pf_change_a(&sack.rblk_end, &th->th_sum,
4fc5aa1c 1708 htonl(ntohl(sack.rblk_end) -
70224baa
JL
1709 dst->seqdiff), 0);
1710 memcpy(&opt[i], &sack, sizeof(sack));
1711 }
1712 copyback = 1;
1713 }
1714 /* FALLTHROUGH */
1715 default:
1716 if (olen < 2)
1717 olen = 2;
1718 hlen -= olen;
1719 opt += olen;
1720 }
1721 }
1722
1723 if (copyback)
1724 m_copyback(m, off + sizeof(*th), thoptlen, opts);
1725 return (copyback);
1726}
1727
02742ec6
JS
1728void
1729pf_send_tcp(const struct pf_rule *r, sa_family_t af,
1730 const struct pf_addr *saddr, const struct pf_addr *daddr,
1731 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
70224baa
JL
1732 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
1733 u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp)
02742ec6
JS
1734{
1735 struct mbuf *m;
1736 int len = 0, tlen;
1737#ifdef INET
1738 struct ip *h = NULL;
1739#endif /* INET */
1740#ifdef INET6
1741 struct ip6_hdr *h6 = NULL;
1742#endif /* INET6 */
1743 struct tcphdr *th = NULL;
70224baa 1744 char *opt;
02742ec6 1745
2a7a2b1c
JL
1746 ASSERT_LWKT_TOKEN_HELD(&pf_token);
1747
02742ec6
JS
1748 /* maximum segment size tcp option */
1749 tlen = sizeof(struct tcphdr);
1750 if (mss)
1751 tlen += 4;
1752
1753 switch (af) {
1754#ifdef INET
1755 case AF_INET:
1756 len = sizeof(struct ip) + tlen;
1757 break;
1758#endif /* INET */
1759#ifdef INET6
1760 case AF_INET6:
1761 len = sizeof(struct ip6_hdr) + tlen;
1762 break;
1763#endif /* INET6 */
1764 }
1765
aa1da187
MD
1766 /*
1767 * Create outgoing mbuf.
1768 *
1769 * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags,
1770 * so make sure pf.flags is clear.
1771 */
02742ec6 1772 m = m_gethdr(MB_DONTWAIT, MT_HEADER);
2a7a2b1c 1773 if (m == NULL) {
02742ec6 1774 return;
2a7a2b1c 1775 }
70224baa 1776 if (tag)
aa1da187
MD
1777 m->m_pkthdr.fw_flags |= PF_MBUF_TAGGED;
1778 m->m_pkthdr.pf.flags = 0;
315a7da3 1779 m->m_pkthdr.pf.tag = rtag;
ed1f0be2
JL
1780 /* XXX Recheck when upgrading to > 4.4 */
1781 m->m_pkthdr.pf.statekey = NULL;
70224baa 1782 if (r != NULL && r->rtableid >= 0)
02dd99a9 1783 m->m_pkthdr.pf.rtableid = r->rtableid;
70224baa 1784
02742ec6
JS
1785#ifdef ALTQ
1786 if (r != NULL && r->qid) {
315a7da3
JL
1787 m->m_pkthdr.fw_flags |= PF_MBUF_STRUCTURE;
1788 m->m_pkthdr.pf.qid = r->qid;
1789 m->m_pkthdr.pf.ecn_af = af;
1790 m->m_pkthdr.pf.hdr = mtod(m, struct ip *);
02742ec6 1791 }
70224baa 1792#endif /* ALTQ */
02742ec6
JS
1793 m->m_data += max_linkhdr;
1794 m->m_pkthdr.len = m->m_len = len;
1795 m->m_pkthdr.rcvif = NULL;
1796 bzero(m->m_data, len);
1797 switch (af) {
1798#ifdef INET
1799 case AF_INET:
1800 h = mtod(m, struct ip *);
1801
1802 /* IP header fields included in the TCP checksum */
1803 h->ip_p = IPPROTO_TCP;
1804 h->ip_len = tlen;
1805 h->ip_src.s_addr = saddr->v4.s_addr;
1806 h->ip_dst.s_addr = daddr->v4.s_addr;
1807
1808 th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
1809 break;
1810#endif /* INET */
1811#ifdef INET6
1812 case AF_INET6:
1813 h6 = mtod(m, struct ip6_hdr *);
1814
1815 /* IP header fields included in the TCP checksum */
1816 h6->ip6_nxt = IPPROTO_TCP;
1817 h6->ip6_plen = htons(tlen);
1818 memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
1819 memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
1820
1821 th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
1822 break;
1823#endif /* INET6 */
1824 }
1825
1826 /* TCP header */
1827 th->th_sport = sport;
1828 th->th_dport = dport;
1829 th->th_seq = htonl(seq);
1830 th->th_ack = htonl(ack);
1831 th->th_off = tlen >> 2;
1832 th->th_flags = flags;
1833 th->th_win = htons(win);
1834
1835 if (mss) {
1836 opt = (char *)(th + 1);
1837 opt[0] = TCPOPT_MAXSEG;
1838 opt[1] = 4;
1839 mss = htons(mss);
1840 bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
1841 }
1842
1843 switch (af) {
1844#ifdef INET
1845 case AF_INET:
1846 /* TCP checksum */
1847 th->th_sum = in_cksum(m, len);
1848
1849 /* Finish the IP header */
1850 h->ip_v = 4;
1851 h->ip_hl = sizeof(*h) >> 2;
1852 h->ip_tos = IPTOS_LOWDELAY;
1853 h->ip_len = len;
1854 h->ip_off = path_mtu_discovery ? IP_DF : 0;
1855 h->ip_ttl = ttl ? ttl : ip_defttl;
1856 h->ip_sum = 0;
70224baa 1857 if (eh == NULL) {
2a7a2b1c 1858 lwkt_reltoken(&pf_token);
70224baa 1859 ip_output(m, NULL, NULL, 0, NULL, NULL);
2a7a2b1c 1860 lwkt_gettoken(&pf_token);
70224baa
JL
1861 } else {
1862 struct route ro;
1863 struct rtentry rt;
1864 struct ether_header *e = (void *)ro.ro_dst.sa_data;
1865
1866 if (ifp == NULL) {
1867 m_freem(m);
1868 return;
1869 }
1870 rt.rt_ifp = ifp;
1871 ro.ro_rt = &rt;
1872 ro.ro_dst.sa_len = sizeof(ro.ro_dst);
1873 ro.ro_dst.sa_family = pseudo_AF_HDRCMPLT;
1874 bcopy(eh->ether_dhost, e->ether_shost, ETHER_ADDR_LEN);
1875 bcopy(eh->ether_shost, e->ether_dhost, ETHER_ADDR_LEN);
1876 e->ether_type = eh->ether_type;
1877 /* XXX_IMPORT: later */
2a7a2b1c 1878 lwkt_reltoken(&pf_token);
a60655aa 1879 ip_output(m, NULL, &ro, 0, NULL, NULL);
2a7a2b1c 1880 lwkt_gettoken(&pf_token);
70224baa 1881 }
02742ec6
JS
1882 break;
1883#endif /* INET */
1884#ifdef INET6
1885 case AF_INET6:
1886 /* TCP checksum */
1887 th->th_sum = in6_cksum(m, IPPROTO_TCP,
1888 sizeof(struct ip6_hdr), tlen);
1889
1890 h6->ip6_vfc |= IPV6_VERSION;
1891 h6->ip6_hlim = IPV6_DEFHLIM;
1892
2a7a2b1c 1893 lwkt_reltoken(&pf_token);
02742ec6 1894 ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
2a7a2b1c 1895 lwkt_gettoken(&pf_token);
02742ec6
JS
1896 break;
1897#endif /* INET6 */
1898 }
1899}
1900
1901void
1902pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
1903 struct pf_rule *r)
1904{
1905 struct mbuf *m0;
1906
aa1da187
MD
1907 /*
1908 * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags,
1909 * so make sure pf.flags is clear.
1910 */
ed1f0be2
JL
1911 if ((m0 = m_copy(m, 0, M_COPYALL)) == NULL)
1912 return;
1913
aa1da187
MD
1914 m0->m_pkthdr.fw_flags |= PF_MBUF_TAGGED;
1915 m0->m_pkthdr.pf.flags = 0;
ed1f0be2
JL
1916 /* XXX Re-Check when Upgrading to > 4.4 */
1917 m0->m_pkthdr.pf.statekey = NULL;
70224baa
JL
1918
1919 if (r->rtableid >= 0)
315a7da3 1920 m0->m_pkthdr.pf.rtableid = r->rtableid;
02742ec6
JS
1921
1922#ifdef ALTQ
1923 if (r->qid) {
315a7da3
JL
1924 m->m_pkthdr.fw_flags |= PF_MBUF_STRUCTURE;
1925 m0->m_pkthdr.pf.qid = r->qid;
1926 m0->m_pkthdr.pf.ecn_af = af;
1927 m0->m_pkthdr.pf.hdr = mtod(m0, struct ip *);
02742ec6 1928 }
70224baa 1929#endif /* ALTQ */
02742ec6
JS
1930
1931 switch (af) {
1932#ifdef INET
1933 case AF_INET:
745a4a5d 1934 icmp_error(m0, type, code, 0, 0);
02742ec6
JS
1935 break;
1936#endif /* INET */
1937#ifdef INET6
1938 case AF_INET6:
1939 icmp6_error(m0, type, code, 0);
1940 break;
1941#endif /* INET6 */
1942 }
1943}
1944
1945/*
1946 * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
1947 * If n is 0, they match if they are equal. If n is != 0, they match if they
1948 * are different.
1949 */
1950int
1951pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
1952 struct pf_addr *b, sa_family_t af)
1953{
1954 int match = 0;
1955
1956 switch (af) {
1957#ifdef INET
1958 case AF_INET:
1959 if ((a->addr32[0] & m->addr32[0]) ==
1960 (b->addr32[0] & m->addr32[0]))
1961 match++;
1962 break;
1963#endif /* INET */
1964#ifdef INET6
1965 case AF_INET6:
1966 if (((a->addr32[0] & m->addr32[0]) ==
1967 (b->addr32[0] & m->addr32[0])) &&
1968 ((a->addr32[1] & m->addr32[1]) ==
1969 (b->addr32[1] & m->addr32[1])) &&
1970 ((a->addr32[2] & m->addr32[2]) ==
1971 (b->addr32[2] & m->addr32[2])) &&
1972 ((a->addr32[3] & m->addr32[3]) ==
1973 (b->addr32[3] & m->addr32[3])))
1974 match++;
1975 break;
1976#endif /* INET6 */
1977 }
1978 if (match) {
1979 if (n)
1980 return (0);
1981 else
1982 return (1);
1983 } else {
1984 if (n)
1985 return (1);
1986 else
1987 return (0);
1988 }
1989}
1990
ed1f0be2
JL
1991/*
1992 * Return 1 if b <= a <= e, otherwise return 0.
1993 */
1994int
1995pf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
1996 struct pf_addr *a, sa_family_t af)
1997{
1998 switch (af) {
1999#ifdef INET
2000 case AF_INET:
2001 if ((a->addr32[0] < b->addr32[0]) ||
2002 (a->addr32[0] > e->addr32[0]))
2003 return (0);
2004 break;
2005#endif /* INET */
2006#ifdef INET6
2007 case AF_INET6: {
2008 int i;
2009
2010 /* check a >= b */
2011 for (i = 0; i < 4; ++i)
2012 if (a->addr32[i] > b->addr32[i])
2013 break;
2014 else if (a->addr32[i] < b->addr32[i])
2015 return (0);
2016 /* check a <= e */
2017 for (i = 0; i < 4; ++i)
2018 if (a->addr32[i] < e->addr32[i])
2019 break;
2020 else if (a->addr32[i] > e->addr32[i])
2021 return (0);
2022 break;
2023 }
2024#endif /* INET6 */
2025 }
2026 return (1);
2027}
2028
02742ec6
JS
2029int
2030pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
2031{
2032 switch (op) {
2033 case PF_OP_IRG:
2034 return ((p > a1) && (p < a2));
2035 case PF_OP_XRG:
2036 return ((p < a1) || (p > a2));
2037 case PF_OP_RRG:
2038 return ((p >= a1) && (p <= a2));
2039 case PF_OP_EQ:
2040 return (p == a1);
2041 case PF_OP_NE:
2042 return (p != a1);
2043 case PF_OP_LT:
2044 return (p < a1);
2045 case PF_OP_LE:
2046 return (p <= a1);
2047 case PF_OP_GT:
2048 return (p > a1);
2049 case PF_OP_GE:
2050 return (p >= a1);
2051 }
2052 return (0); /* never reached */
2053}
2054
2055int
2056pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
2057{
2058 a1 = ntohs(a1);
2059 a2 = ntohs(a2);
2060 p = ntohs(p);
2061 return (pf_match(op, a1, a2, p));
2062}
2063
2064int
2065pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
2066{
2067 if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
2068 return (0);
2069 return (pf_match(op, a1, a2, u));
2070}
2071
2072int
2073pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
2074{
2075 if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
2076 return (0);
2077 return (pf_match(op, a1, a2, g));
2078}
2079
70224baa 2080int
315a7da3 2081pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag)
70224baa
JL
2082{
2083 if (*tag == -1)
315a7da3 2084 *tag = m->m_pkthdr.pf.tag;
70224baa 2085
02742ec6
JS
2086 return ((!r->match_tag_not && r->match_tag == *tag) ||
2087 (r->match_tag_not && r->match_tag != *tag));
2088}
2089
70224baa 2090int
315a7da3 2091pf_tag_packet(struct mbuf *m, int tag, int rtableid)
02742ec6 2092{
70224baa
JL
2093 if (tag <= 0 && rtableid < 0)
2094 return (0);
2095
70224baa 2096 if (tag > 0)
315a7da3 2097 m->m_pkthdr.pf.tag = tag;
70224baa 2098 if (rtableid >= 0)
315a7da3 2099 m->m_pkthdr.pf.rtableid = rtableid;
02742ec6 2100
70224baa 2101 return (0);
02742ec6
JS
2102}
2103
315a7da3 2104void
70224baa 2105pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n,
ed1f0be2 2106 struct pf_rule **r, struct pf_rule **a, int *match)
70224baa
JL
2107{
2108 struct pf_anchor_stackframe *f;
2109
2110 (*r)->anchor->match = 0;
2111 if (match)
2112 *match = 0;
b370aff7 2113 if (*depth >= NELEM(pf_anchor_stack)) {
70224baa
JL
2114 kprintf("pf_step_into_anchor: stack overflow\n");
2115 *r = TAILQ_NEXT(*r, entries);
2116 return;
2117 } else if (*depth == 0 && a != NULL)
2118 *a = *r;
2119 f = pf_anchor_stack + (*depth)++;
2120 f->rs = *rs;
2121 f->r = *r;
2122 if ((*r)->anchor_wildcard) {
2123 f->parent = &(*r)->anchor->children;
2124 if ((f->child = RB_MIN(pf_anchor_node, f->parent)) ==
2125 NULL) {
2126 *r = NULL;
2127 return;
2128 }
2129 *rs = &f->child->ruleset;
2130 } else {
2131 f->parent = NULL;
2132 f->child = NULL;
2133 *rs = &(*r)->anchor->ruleset;
2134 }
2135 *r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
2136}
02742ec6 2137
70224baa
JL
2138int
2139pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n,
2140 struct pf_rule **r, struct pf_rule **a, int *match)
2141{
2142 struct pf_anchor_stackframe *f;
2143 int quick = 0;
2144
2145 do {
2146 if (*depth <= 0)
2147 break;
2148 f = pf_anchor_stack + *depth - 1;
2149 if (f->parent != NULL && f->child != NULL) {
2150 if (f->child->match ||
2151 (match != NULL && *match)) {
2152 f->r->anchor->match = 1;
2153 *match = 0;
2154 }
2155 f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
2156 if (f->child != NULL) {
2157 *rs = &f->child->ruleset;
2158 *r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
2159 if (*r == NULL)
2160 continue;
2161 else
2162 break;
2163 }
2164 }
2165 (*depth)--;
2166 if (*depth == 0 && a != NULL)
2167 *a = NULL;
2168 *rs = f->rs;
ed1f0be2 2169 if (f->r->anchor->match || (match != NULL && *match))
70224baa
JL
2170 quick = f->r->quick;
2171 *r = TAILQ_NEXT(f->r, entries);
2172 } while (*r == NULL);
2173
2174 return (quick);
2175}
02742ec6
JS
2176
2177#ifdef INET6
2178void
2179pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
2180 struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
2181{
2182 switch (af) {
2183#ifdef INET
2184 case AF_INET:
2185 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
2186 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
2187 break;
2188#endif /* INET */
2189 case AF_INET6:
2190 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
2191 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
2192 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
2193 ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
2194 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
2195 ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
2196 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
2197 ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
2198 break;
2199 }
2200}
2201
2202void
2203pf_addr_inc(struct pf_addr *addr, sa_family_t af)
2204{
2205 switch (af) {
2206#ifdef INET
2207 case AF_INET:
2208 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
2209 break;
2210#endif /* INET */
2211 case AF_INET6:
2212 if (addr->addr32[3] == 0xffffffff) {
2213 addr->addr32[3] = 0;
2214 if (addr->addr32[2] == 0xffffffff) {
2215 addr->addr32[2] = 0;
2216 if (addr->addr32[1] == 0xffffffff) {
2217 addr->addr32[1] = 0;
2218 addr->addr32[0] =
2219 htonl(ntohl(addr->addr32[0]) + 1);
2220 } else
2221 addr->addr32[1] =
2222 htonl(ntohl(addr->addr32[1]) + 1);
2223 } else
2224 addr->addr32[2] =
2225 htonl(ntohl(addr->addr32[2]) + 1);
2226 } else
2227 addr->addr32[3] =
2228 htonl(ntohl(addr->addr32[3]) + 1);
2229 break;
2230 }
2231}
2232#endif /* INET6 */
2233
2234#define mix(a,b,c) \
2235 do { \
2236 a -= b; a -= c; a ^= (c >> 13); \
2237 b -= c; b -= a; b ^= (a << 8); \
2238 c -= a; c -= b; c ^= (b >> 13); \
2239 a -= b; a -= c; a ^= (c >> 12); \
2240 b -= c; b -= a; b ^= (a << 16); \
2241 c -= a; c -= b; c ^= (b >> 5); \
2242 a -= b; a -= c; a ^= (c >> 3); \
2243 b -= c; b -= a; b ^= (a << 10); \
2244 c -= a; c -= b; c ^= (b >> 15); \
2245 } while (0)
2246
2247/*
2248 * hash function based on bridge_hash in if_bridge.c
2249 */
2250void
2251pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
2252 struct pf_poolhashkey *key, sa_family_t af)
2253{
2254 u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
2255
2256 switch (af) {
2257#ifdef INET
2258 case AF_INET:
2259 a += inaddr->addr32[0];
2260 b += key->key32[1];
2261 mix(a, b, c);
2262 hash->addr32[0] = c + key->key32[2];
2263 break;
2264#endif /* INET */
2265#ifdef INET6
2266 case AF_INET6:
2267 a += inaddr->addr32[0];
2268 b += inaddr->addr32[2];
2269 mix(a, b, c);
2270 hash->addr32[0] = c;
2271 a += inaddr->addr32[1];
2272 b += inaddr->addr32[3];
2273 c += key->key32[1];
2274 mix(a, b, c);
2275 hash->addr32[1] = c;
2276 a += inaddr->addr32[2];
2277 b += inaddr->addr32[1];
2278 c += key->key32[2];
2279 mix(a, b, c);
2280 hash->addr32[2] = c;
2281 a += inaddr->addr32[3];
2282 b += inaddr->addr32[0];
2283 c += key->key32[3];
2284 mix(a, b, c);
2285 hash->addr32[3] = c;
2286 break;
2287#endif /* INET6 */
2288 }
2289}
2290
2291int
2292pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
2293 struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn)
2294{
2295 unsigned char hash[16];
2296 struct pf_pool *rpool = &r->rpool;
2297 struct pf_addr *raddr = &rpool->cur->addr.v.a.addr;
2298 struct pf_addr *rmask = &rpool->cur->addr.v.a.mask;
2299 struct pf_pooladdr *acur = rpool->cur;
2300 struct pf_src_node k;
2301
2302 if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
2303 (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
2304 k.af = af;
2305 PF_ACPY(&k.addr, saddr, af);
2306 if (r->rule_flag & PFRULE_RULESRCTRACK ||
2307 r->rpool.opts & PF_POOL_STICKYADDR)
2308 k.rule.ptr = r;
2309 else
2310 k.rule.ptr = NULL;
2311 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
2312 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
2313 if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
2314 PF_ACPY(naddr, &(*sn)->raddr, af);
2315 if (pf_status.debug >= PF_DEBUG_MISC) {
4b1cf444 2316 kprintf("pf_map_addr: src tracking maps ");
02742ec6 2317 pf_print_host(&k.addr, 0, af);
4b1cf444 2318 kprintf(" to ");
02742ec6 2319 pf_print_host(naddr, 0, af);
4b1cf444 2320 kprintf("\n");
02742ec6
JS
2321 }
2322 return (0);
2323 }
2324 }
2325
2326 if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
2327 return (1);
2328 if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
70224baa
JL
2329 switch (af) {
2330#ifdef INET
2331 case AF_INET:
02742ec6
JS
2332 if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
2333 (rpool->opts & PF_POOL_TYPEMASK) !=
2334 PF_POOL_ROUNDROBIN)
2335 return (1);
8f706258
SW
2336 raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
2337 rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
70224baa
JL
2338 break;
2339#endif /* INET */
2340#ifdef INET6
2341 case AF_INET6:
02742ec6
JS
2342 if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
2343 (rpool->opts & PF_POOL_TYPEMASK) !=
2344 PF_POOL_ROUNDROBIN)
2345 return (1);
2346 raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
2347 rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
70224baa
JL
2348 break;
2349#endif /* INET6 */
02742ec6
JS
2350 }
2351 } else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2352 if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
2353 return (1); /* unsupported */
2354 } else {
2355 raddr = &rpool->cur->addr.v.a.addr;
2356 rmask = &rpool->cur->addr.v.a.mask;
2357 }
2358
2359 switch (rpool->opts & PF_POOL_TYPEMASK) {
2360 case PF_POOL_NONE:
2361 PF_ACPY(naddr, raddr, af);
2362 break;
2363 case PF_POOL_BITMASK:
2364 PF_POOLMASK(naddr, raddr, rmask, saddr, af);
2365 break;
2366 case PF_POOL_RANDOM:
2367 if (init_addr != NULL && PF_AZERO(init_addr, af)) {
2368 switch (af) {
2369#ifdef INET
2370 case AF_INET:
4fc5aa1c 2371 rpool->counter.addr32[0] = htonl(karc4random());
02742ec6
JS
2372 break;
2373#endif /* INET */
2374#ifdef INET6
2375 case AF_INET6:
2376 if (rmask->addr32[3] != 0xffffffff)
70224baa 2377 rpool->counter.addr32[3] =
4fc5aa1c 2378 htonl(karc4random());
02742ec6
JS
2379 else
2380 break;
2381 if (rmask->addr32[2] != 0xffffffff)
70224baa 2382 rpool->counter.addr32[2] =
4fc5aa1c 2383 htonl(karc4random());
02742ec6
JS
2384 else
2385 break;
2386 if (rmask->addr32[1] != 0xffffffff)
70224baa 2387 rpool->counter.addr32[1] =
4fc5aa1c 2388 htonl(karc4random());
02742ec6
JS
2389 else
2390 break;
2391 if (rmask->addr32[0] != 0xffffffff)
70224baa 2392 rpool->counter.addr32[0] =
4fc5aa1c 2393 htonl(karc4random());
02742ec6
JS
2394 break;
2395#endif /* INET6 */
2396 }
2397 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
2398 PF_ACPY(init_addr, naddr, af);
2399
2400 } else {
2401 PF_AINC(&rpool->counter, af);
2402 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
2403 }
2404 break;
2405 case PF_POOL_SRCHASH:
2406 pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
2407 PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
2408 break;
2409 case PF_POOL_ROUNDROBIN:
2410 if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2411 if (!pfr_pool_get(rpool->cur->addr.p.tbl,
2412 &rpool->tblidx, &rpool->counter,
2413 &raddr, &rmask, af))
2414 goto get_addr;
2415 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2416 if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
2417 &rpool->tblidx, &rpool->counter,
2418 &raddr, &rmask, af))
2419 goto get_addr;
2420 } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
2421 goto get_addr;
2422
2423 try_next:
2424 if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL)
2425 rpool->cur = TAILQ_FIRST(&rpool->list);
2426 if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2427 rpool->tblidx = -1;
2428 if (pfr_pool_get(rpool->cur->addr.p.tbl,
2429 &rpool->tblidx, &rpool->counter,
2430 &raddr, &rmask, af)) {
2431 /* table contains no address of type 'af' */
2432 if (rpool->cur != acur)
2433 goto try_next;
2434 return (1);
2435 }
2436 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2437 rpool->tblidx = -1;
2438 if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
2439 &rpool->tblidx, &rpool->counter,
2440 &raddr, &rmask, af)) {
2441 /* table contains no address of type 'af' */
2442 if (rpool->cur != acur)
2443 goto try_next;
2444 return (1);
2445 }
2446 } else {
2447 raddr = &rpool->cur->addr.v.a.addr;
2448 rmask = &rpool->cur->addr.v.a.mask;
2449 PF_ACPY(&rpool->counter, raddr, af);
2450 }
2451
2452 get_addr:
2453 PF_ACPY(naddr, &rpool->counter, af);
70224baa
JL
2454 if (init_addr != NULL && PF_AZERO(init_addr, af))
2455 PF_ACPY(init_addr, naddr, af);
02742ec6
JS
2456 PF_AINC(&rpool->counter, af);
2457 break;
2458 }
2459 if (*sn != NULL)
2460 PF_ACPY(&(*sn)->raddr, naddr, af);
2461
2462 if (pf_status.debug >= PF_DEBUG_MISC &&
2463 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
4b1cf444 2464 kprintf("pf_map_addr: selected address ");
02742ec6 2465 pf_print_host(naddr, 0, af);
4b1cf444 2466 kprintf("\n");
02742ec6
JS
2467 }
2468
2469 return (0);
2470}
2471
2472int
2473pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
2474 struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport,
2475 struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
2476 struct pf_src_node **sn)
2477{
315a7da3 2478 struct pf_state_key_cmp key;
02742ec6
JS
2479 struct pf_addr init_addr;
2480 u_int16_t cut;
2481
2482 bzero(&init_addr, sizeof(init_addr));
2483 if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
2484 return (1);
2485
70224baa
JL
2486 if (proto == IPPROTO_ICMP) {
2487 low = 1;
2488 high = 65535;
2489 }
2490
02742ec6
JS
2491 do {
2492 key.af = af;
2493 key.proto = proto;
ed1f0be2
JL
2494 PF_ACPY(&key.addr[1], daddr, key.af);
2495 PF_ACPY(&key.addr[0], naddr, key.af);
2496 key.port[1] = dport;
02742ec6
JS
2497
2498 /*
2499 * port search; start random, step;
2500 * similar 2 portloop in in_pcbbind
2501 */
70224baa
JL
2502 if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
2503 proto == IPPROTO_ICMP)) {
ed1f0be2
JL
2504 key.port[0] = dport;
2505 if (pf_find_state_all(&key, PF_IN, NULL) == NULL)
02742ec6
JS
2506 return (0);
2507 } else if (low == 0 && high == 0) {
ed1f0be2
JL
2508 key.port[0] = *nport;
2509 if (pf_find_state_all(&key, PF_IN, NULL) == NULL)
02742ec6
JS
2510 return (0);
2511 } else if (low == high) {
ed1f0be2
JL
2512 key.port[0] = htons(low);
2513 if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
02742ec6
JS
2514 *nport = htons(low);
2515 return (0);
2516 }
2517 } else {
2518 u_int16_t tmp;
2519
2520 if (low > high) {
2521 tmp = low;
2522 low = high;
2523 high = tmp;
2524 }
2525 /* low < high */
4fc5aa1c 2526 cut = htonl(karc4random()) % (1 + high - low) + low;
02742ec6
JS
2527 /* low <= cut <= high */
2528 for (tmp = cut; tmp <= high; ++(tmp)) {
ed1f0be2
JL
2529 key.port[0] = htons(tmp);
2530 if (pf_find_state_all(&key, PF_IN, NULL) ==
2531 NULL && !in_baddynamic(tmp, proto)) {
02742ec6
JS
2532 *nport = htons(tmp);
2533 return (0);
2534 }
2535 }
2536 for (tmp = cut - 1; tmp >= low; --(tmp)) {
ed1f0be2
JL
2537 key.port[0] = htons(tmp);
2538 if (pf_find_state_all(&key, PF_IN, NULL) ==
2539 NULL && !in_baddynamic(tmp, proto)) {
02742ec6
JS
2540 *nport = htons(tmp);
2541 return (0);
2542 }
2543 }
2544 }
2545
2546 switch (r->rpool.opts & PF_POOL_TYPEMASK) {
2547 case PF_POOL_RANDOM:
2548 case PF_POOL_ROUNDROBIN:
2549 if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
2550 return (1);
2551 break;
2552 case PF_POOL_NONE:
2553 case PF_POOL_SRCHASH:
2554 case PF_POOL_BITMASK:
2555 default:
2556 return (1);
2557 }
2558 } while (! PF_AEQ(&init_addr, naddr, af) );
02742ec6
JS
2559 return (1); /* none available */
2560}
2561
2562struct pf_rule *
2563pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
2564 int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport,
2565 struct pf_addr *daddr, u_int16_t dport, int rs_num)
2566{
70224baa 2567 struct pf_rule *r, *rm = NULL;
02742ec6 2568 struct pf_ruleset *ruleset = NULL;
70224baa
JL
2569 int tag = -1;
2570 int rtableid = -1;
2571 int asd = 0;
02742ec6
JS
2572
2573 r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
2574 while (r && rm == NULL) {
2575 struct pf_rule_addr *src = NULL, *dst = NULL;
2576 struct pf_addr_wrap *xdst = NULL;
2577
2578 if (r->action == PF_BINAT && direction == PF_IN) {
2579 src = &r->dst;
2580 if (r->rpool.cur != NULL)
2581 xdst = &r->rpool.cur->addr;
2582 } else {
2583 src = &r->src;
2584 dst = &r->dst;
2585 }
2586
2587 r->evaluations++;
70224baa 2588 if (pfi_kif_match(r->kif, kif) == r->ifnot)
02742ec6
JS
2589 r = r->skip[PF_SKIP_IFP].ptr;
2590 else if (r->direction && r->direction != direction)
2591 r = r->skip[PF_SKIP_DIR].ptr;
2592 else if (r->af && r->af != pd->af)
2593 r = r->skip[PF_SKIP_AF].ptr;
2594 else if (r->proto && r->proto != pd->proto)
2595 r = r->skip[PF_SKIP_PROTO].ptr;
70224baa
JL
2596 else if (PF_MISMATCHAW(&src->addr, saddr, pd->af,
2597 src->neg, kif))
02742ec6
JS
2598 r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
2599 PF_SKIP_DST_ADDR].ptr;
2600 else if (src->port_op && !pf_match_port(src->port_op,
2601 src->port[0], src->port[1], sport))
2602 r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
2603 PF_SKIP_DST_PORT].ptr;
2604 else if (dst != NULL &&
70224baa 2605 PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL))
02742ec6 2606 r = r->skip[PF_SKIP_DST_ADDR].ptr;
70224baa
JL
2607 else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
2608 0, NULL))
02742ec6
JS
2609 r = TAILQ_NEXT(r, entries);
2610 else if (dst != NULL && dst->port_op &&
2611 !pf_match_port(dst->port_op, dst->port[0],
2612 dst->port[1], dport))
2613 r = r->skip[PF_SKIP_DST_PORT].ptr;
315a7da3 2614 else if (r->match_tag && !pf_match_tag(m, r, &tag))
70224baa 2615 r = TAILQ_NEXT(r, entries);
02742ec6
JS
2616 else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
2617 IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
2618 off, pd->hdr.tcp), r->os_fingerprint)))
2619 r = TAILQ_NEXT(r, entries);
70224baa
JL
2620 else {
2621 if (r->tag)
2622 tag = r->tag;
2623 if (r->rtableid >= 0)
2624 rtableid = r->rtableid;
2625 if (r->anchor == NULL) {
02742ec6 2626 rm = r;
70224baa
JL
2627 } else
2628 pf_step_into_anchor(&asd, &ruleset, rs_num,
2629 &r, NULL, NULL);
2630 }
2631 if (r == NULL)
2632 pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r,
2633 NULL, NULL);
02742ec6 2634 }
315a7da3 2635 if (pf_tag_packet(m, tag, rtableid))
70224baa 2636 return (NULL);
02742ec6
JS
2637 if (rm != NULL && (rm->action == PF_NONAT ||
2638 rm->action == PF_NORDR || rm->action == PF_NOBINAT))
2639 return (NULL);
2640 return (rm);
2641}
2642
2643struct pf_rule *
2644pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
2645 struct pfi_kif *kif, struct pf_src_node **sn,
ed1f0be2
JL
2646 struct pf_state_key **skw, struct pf_state_key **sks,
2647 struct pf_state_key **skp, struct pf_state_key **nkp,
2648 struct pf_addr *saddr, struct pf_addr *daddr,
2649 u_int16_t sport, u_int16_t dport)
02742ec6
JS
2650{
2651 struct pf_rule *r = NULL;
2652
ed1f0be2 2653
02742ec6
JS
2654 if (direction == PF_OUT) {
2655 r = pf_match_translation(pd, m, off, direction, kif, saddr,
2656 sport, daddr, dport, PF_RULESET_BINAT);
2657 if (r == NULL)
2658 r = pf_match_translation(pd, m, off, direction, kif,
2659 saddr, sport, daddr, dport, PF_RULESET_NAT);
2660 } else {
2661 r = pf_match_translation(pd, m, off, direction, kif, saddr,
2662 sport, daddr, dport, PF_RULESET_RDR);
2663 if (r == NULL)
2664 r = pf_match_translation(pd, m, off, direction, kif,
2665 saddr, sport, daddr, dport, PF_RULESET_BINAT);
2666 }
2667
2668 if (r != NULL) {
ed1f0be2
JL
2669 struct pf_addr *naddr;
2670 u_int16_t *nport;
2671
2672 if (pf_state_key_setup(pd, r, skw, sks, skp, nkp,
2673 saddr, daddr, sport, dport))
2674 return r;
2675
2676 /* XXX We only modify one side for now. */
2677 naddr = &(*nkp)->addr[1];
2678 nport = &(*nkp)->port[1];
2679
be02a6a0
MD
2680 /*
2681 * NOTE: Currently all translations will clear
2682 * BRIDGE_MBUF_TAGGED, telling the bridge to
2683 * ignore the original input encapsulation.
2684 */
02742ec6
JS
2685 switch (r->action) {
2686 case PF_NONAT:
2687 case PF_NOBINAT:
2688 case PF_NORDR:
2689 return (NULL);
2690 case PF_NAT:
be02a6a0 2691 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
02742ec6
JS
2692 if (pf_get_sport(pd->af, pd->proto, r, saddr,
2693 daddr, dport, naddr, nport, r->rpool.proxy_port[0],
2694 r->rpool.proxy_port[1], sn)) {
2695 DPFPRINTF(PF_DEBUG_MISC,
2696 ("pf: NAT proxy port allocation "
2697 "(%u-%u) failed\n",
2698 r->rpool.proxy_port[0],
2699 r->rpool.proxy_port[1]));
2700 return (NULL);
2701 }
2702 break;
2703 case PF_BINAT:
be02a6a0 2704 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
02742ec6
JS
2705 switch (direction) {
2706 case PF_OUT:
2707 if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
70224baa
JL
2708 switch (pd->af) {
2709#ifdef INET
2710 case AF_INET:
02742ec6
JS
2711 if (r->rpool.cur->addr.p.dyn->
2712 pfid_acnt4 < 1)
2713 return (NULL);
2714 PF_POOLMASK(naddr,
2715 &r->rpool.cur->addr.p.dyn->
2716 pfid_addr4,
2717 &r->rpool.cur->addr.p.dyn->
2718 pfid_mask4,
2719 saddr, AF_INET);
70224baa
JL
2720 break;
2721#endif /* INET */
2722#ifdef INET6
2723 case AF_INET6:
02742ec6
JS
2724 if (r->rpool.cur->addr.p.dyn->
2725 pfid_acnt6 < 1)
2726 return (NULL);
2727 PF_POOLMASK(naddr,
2728 &r->rpool.cur->addr.p.dyn->
2729 pfid_addr6,
2730 &r->rpool.cur->addr.p.dyn->
2731 pfid_mask6,
2732 saddr, AF_INET6);
70224baa
JL
2733 break;
2734#endif /* INET6 */
02742ec6
JS
2735 }
2736 } else
2737 PF_POOLMASK(naddr,
2738 &r->rpool.cur->addr.v.a.addr,
2739 &r->rpool.cur->addr.v.a.mask,
2740 saddr, pd->af);
2741 break;
2742 case PF_IN:
70224baa
JL
2743 if (r->src.addr.type == PF_ADDR_DYNIFTL) {
2744 switch (pd->af) {
2745#ifdef INET
2746 case AF_INET:
02742ec6
JS
2747 if (r->src.addr.p.dyn->
2748 pfid_acnt4 < 1)
2749 return (NULL);
2750 PF_POOLMASK(naddr,
2751 &r->src.addr.p.dyn->
2752 pfid_addr4,
2753 &r->src.addr.p.dyn->
2754 pfid_mask4,
2755 daddr, AF_INET);
70224baa
JL
2756 break;
2757#endif /* INET */
2758#ifdef INET6
2759 case AF_INET6:
02742ec6
JS
2760 if (r->src.addr.p.dyn->
2761 pfid_acnt6 < 1)
2762 return (NULL);
2763 PF_POOLMASK(naddr,
2764 &r->src.addr.p.dyn->
2765 pfid_addr6,
2766 &r->src.addr.p.dyn->
2767 pfid_mask6,
2768 daddr, AF_INET6);
70224baa
JL
2769 break;
2770#endif /* INET6 */
02742ec6
JS
2771 }
2772 } else
2773 PF_POOLMASK(naddr,
2774 &r->src.addr.v.a.addr,
2775 &r->src.addr.v.a.mask, daddr,
2776 pd->af);
2777 break;
2778 }
2779 break;
2780 case PF_RDR: {
be02a6a0 2781 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
70224baa 2782 if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn))
02742ec6 2783 return (NULL);
70224baa
JL
2784 if ((r->rpool.opts & PF_POOL_TYPEMASK) ==
2785 PF_POOL_BITMASK)
2786 PF_POOLMASK(naddr, naddr,
2787 &r->rpool.cur->addr.v.a.mask, daddr,
2788 pd->af);
315a7da3 2789
02742ec6
JS
2790 if (r->rpool.proxy_port[1]) {
2791 u_int32_t tmp_nport;
2792
2793 tmp_nport = ((ntohs(dport) -
2794 ntohs(r->dst.port[0])) %
2795 (r->rpool.proxy_port[1] -
2796 r->rpool.proxy_port[0] + 1)) +
2797 r->rpool.proxy_port[0];
2798
2799 /* wrap around if necessary */
2800 if (tmp_nport > 65535)
2801 tmp_nport -= 65535;
2802 *nport = htons((u_int16_t)tmp_nport);
2803 } else if (r->rpool.proxy_port[0])
2804 *nport = htons(r->rpool.proxy_port[0]);
2805 break;
2806 }
2807 default:
2808 return (NULL);
2809 }
2810 }
2811
2812 return (r);
2813}
2814
02742ec6 2815struct netmsg_hashlookup {
002c1265 2816 struct netmsg_base base;
02742ec6
JS
2817 struct inpcb **nm_pinp;
2818 struct inpcbinfo *nm_pcbinfo;
2819 struct pf_addr *nm_saddr;
2820 struct pf_addr *nm_daddr;
2821 uint16_t nm_sport;
2822 uint16_t nm_dport;
2823 sa_family_t nm_af;
2824};
2825
0379d9fd 2826#ifdef PF_SOCKET_LOOKUP_DOMSG
4599cf19 2827static void
002c1265 2828in_pcblookup_hash_handler(netmsg_t msg)
02742ec6 2829{
002c1265 2830 struct netmsg_hashlookup *rmsg = (struct netmsg_hashlookup *)msg;
02742ec6 2831
002c1265
MD
2832 if (rmsg->nm_af == AF_INET)
2833 *rmsg->nm_pinp = in_pcblookup_hash(rmsg->nm_pcbinfo,
2834 rmsg->nm_saddr->v4, rmsg->nm_sport, rmsg->nm_daddr->v4,
2835 rmsg->nm_dport, INPLOOKUP_WILDCARD, NULL);
02742ec6
JS
2836#ifdef INET6
2837 else
002c1265
MD
2838 *rmsg->nm_pinp = in6_pcblookup_hash(rmsg->nm_pcbinfo,
2839 &rmsg->nm_saddr->v6, rmsg->nm_sport, &rmsg->nm_daddr->v6,
2840 rmsg->nm_dport, INPLOOKUP_WILDCARD, NULL);
02742ec6 2841#endif /* INET6 */
002c1265 2842 lwkt_replymsg(&rmsg->base.lmsg, 0);
02742ec6 2843}
0379d9fd
SZ
2844#endif /* PF_SOCKET_LOOKUP_DOMSG */
2845
02742ec6 2846int
315a7da3 2847pf_socket_lookup(int direction, struct pf_pdesc *pd)
02742ec6
JS
2848{
2849 struct pf_addr *saddr, *daddr;
2850 u_int16_t sport, dport;
2851 struct inpcbinfo *pi;
2852 struct inpcb *inp;
02742ec6 2853 struct netmsg_hashlookup *msg = NULL;
0379d9fd 2854#ifdef PF_SOCKET_LOOKUP_DOMSG
d9663f05 2855 struct netmsg_hashlookup msg0;
02742ec6
JS
2856#endif
2857 int pi_cpu = 0;
2858
70224baa
JL
2859 if (pd == NULL)
2860 return (-1);
2861 pd->lookup.uid = UID_MAX;
2862 pd->lookup.gid = GID_MAX;
2863 pd->lookup.pid = NO_PID;
02742ec6
JS
2864 if (direction == PF_IN) {
2865 saddr = pd->src;
2866 daddr = pd->dst;
2867 } else {
2868 saddr = pd->dst;
2869 daddr = pd->src;
2870 }
2871 switch (pd->proto) {
2872 case IPPROTO_TCP:
315a7da3
JL
2873 if (pd->hdr.tcp == NULL)
2874 return (-1);
02742ec6
JS
2875 sport = pd->hdr.tcp->th_sport;
2876 dport = pd->hdr.tcp->th_dport;
2877
2878 pi_cpu = tcp_addrcpu(saddr->v4.s_addr, sport, daddr->v4.s_addr, dport);
2879 pi = &tcbinfo[pi_cpu];
02742ec6
JS
2880 /*
2881 * Our netstack runs lockless on MP systems
2882 * (only for TCP connections at the moment).
2883 *
2884 * As we are not allowed to read another CPU's tcbinfo,
2885 * we have to ask that CPU via remote call to search the
2886 * table for us.
2887 *
2888 * Prepare a msg iff data belongs to another CPU.
2889 */
2890 if (pi_cpu != mycpu->gd_cpuid) {
0379d9fd
SZ
2891#ifdef PF_SOCKET_LOOKUP_DOMSG
2892 /*
2893 * NOTE:
2894 *
2895 * Following lwkt_domsg() is dangerous and could
2896 * lockup the network system, e.g.
2897 *
2898 * On 2 CPU system:
2899 * netisr0 domsg to netisr1 (due to lookup)
2900 * netisr1 domsg to netisr0 (due to lookup)
2901 *
2902 * We simply return -1 here, since we are probably
2903 * called before NAT, so the TCP packet should
2904 * already be on the correct CPU.
2905 */
d9663f05
SZ
2906 msg = &msg0;
2907 netmsg_init(&msg->base, NULL, &curthread->td_msgport,
48e7b118 2908 0, in_pcblookup_hash_handler);
02742ec6
JS
2909 msg->nm_pinp = &inp;
2910 msg->nm_pcbinfo = pi;
2911 msg->nm_saddr = saddr;
2912 msg->nm_sport = sport;
2913 msg->nm_daddr = daddr;
2914 msg->nm_dport = dport;
2915 msg->nm_af = pd->af;
0379d9fd
SZ
2916#else /* !PF_SOCKET_LOOKUP_DOMSG */
2917 kprintf("pf_socket_lookup: tcp packet not on the "
2918 "correct cpu %d, cur cpu %d\n",
2919 pi_cpu, mycpuid);
2920 print_backtrace(-1);
2921 return -1;
2922#endif /* PF_SOCKET_LOOKUP_DOMSG */
02742ec6 2923 }
02742ec6
JS
2924 break;
2925 case IPPROTO_UDP:
315a7da3
JL
2926 if (pd->hdr.udp == NULL)
2927 return (-1);
02742ec6
JS
2928 sport = pd->hdr.udp->uh_sport;
2929 dport = pd->hdr.udp->uh_dport;
2930 pi = &udbinfo;
2931 break;
2932 default:
315a7da3 2933 return (-1);
02742ec6
JS
2934 }
2935 if (direction != PF_IN) {
2936 u_int16_t p;
2937
2938 p = sport;
2939 sport = dport;
2940 dport = p;
2941 }
2942 switch (pd->af) {
2943#ifdef INET6
2944 case AF_INET6:
02742ec6
JS
2945 /*
2946 * Query other CPU, second part
2947 *
2948 * msg only gets initialized when:
2949 * 1) packet is TCP
2950 * 2) the info belongs to another CPU
2951 *
2952 * Use some switch/case magic to avoid code duplication.
2953 */
1918fc5c 2954 if (msg == NULL) {
02742ec6
JS
2955 inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
2956 &daddr->v6, dport, INPLOOKUP_WILDCARD, NULL);
2957
2958 if (inp == NULL)
70224baa 2959 return (-1);
02742ec6
JS
2960 break;
2961 }
2962 /* FALLTHROUGH if SMP and on other CPU */
2963#endif /* INET6 */
2964 case AF_INET:
02742ec6 2965 if (msg != NULL) {
3abced87 2966 lwkt_domsg(netisr_portfn(pi_cpu),
002c1265 2967 &msg->base.lmsg, 0);
02742ec6 2968 } else
02742ec6
JS
2969 {
2970 inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4,
2971 dport, INPLOOKUP_WILDCARD, NULL);
2972 }
2973 if (inp == NULL)
315a7da3 2974 return (-1);
02742ec6
JS
2975 break;
2976
2977 default:
70224baa 2978 return (-1);
02742ec6 2979 }
70224baa
JL
2980 pd->lookup.uid = inp->inp_socket->so_cred->cr_uid;
2981 pd->lookup.gid = inp->inp_socket->so_cred->cr_groups[0];
02742ec6
JS
2982 return (1);
2983}
2984
2985u_int8_t
2986pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2987{
2988 int hlen;
2989 u_int8_t hdr[60];
2990 u_int8_t *opt, optlen;
2991 u_int8_t wscale = 0;
2992
2993 hlen = th_off << 2; /* hlen <= sizeof(hdr) */
2994 if (hlen <= sizeof(struct tcphdr))
2995 return (0);
2996 if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2997 return (0);
2998 opt = hdr + sizeof(struct tcphdr);
2999 hlen -= sizeof(struct tcphdr);
3000 while (hlen >= 3) {
3001 switch (*opt) {
3002 case TCPOPT_EOL:
3003 case TCPOPT_NOP:
3004 ++opt;
3005 --hlen;
3006 break;
3007 case TCPOPT_WINDOW:
3008 wscale = opt[2];
3009 if (wscale > TCP_MAX_WINSHIFT)
3010 wscale = TCP_MAX_WINSHIFT;
3011 wscale |= PF_WSCALE_FLAG;
3012 /* FALLTHROUGH */
3013 default:
3014 optlen = opt[1];
3015 if (optlen < 2)
3016 optlen = 2;
3017 hlen -= optlen;
3018 opt += optlen;
3019 break;
3020 }
3021 }
3022 return (wscale);
3023}
3024
3025u_int16_t
3026pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
3027{
3028 int hlen;
3029 u_int8_t hdr[60];
3030 u_int8_t *opt, optlen;
3031 u_int16_t mss = tcp_mssdflt;
3032
3033 hlen = th_off << 2; /* hlen <= sizeof(hdr) */
3034 if (hlen <= sizeof(struct tcphdr))
3035 return (0);
3036 if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
3037 return (0);
3038 opt = hdr + sizeof(struct tcphdr);
3039 hlen -= sizeof(struct tcphdr);
3040 while (hlen >= TCPOLEN_MAXSEG) {
3041 switch (*opt) {
3042 case TCPOPT_EOL:
3043 case TCPOPT_NOP:
3044 ++opt;
3045 --hlen;
3046 break;
3047 case TCPOPT_MAXSEG:
3048 bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
3049 /* FALLTHROUGH */
3050 default:
3051 optlen = opt[1];
3052 if (optlen < 2)
3053 optlen = 2;
3054 hlen -= optlen;
3055 opt += optlen;
3056 break;
3057 }
3058 }
3059 return (mss);
3060}
3061
3062u_int16_t
3063pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
3064{
3065#ifdef INET
3066 struct sockaddr_in *dst;
3067 struct route ro;
3068#endif /* INET */
3069#ifdef INET6
3070 struct sockaddr_in6 *dst6;
3071 struct route_in6 ro6;
3072#endif /* INET6 */
3073 struct rtentry *rt = NULL;
3074 int hlen = 0;
3075 u_int16_t mss = tcp_mssdflt;
3076
3077 switch (af) {
3078#ifdef INET
3079 case AF_INET:
3080 hlen = sizeof(struct ip);
3081 bzero(&ro, sizeof(ro));
3082 dst = (struct sockaddr_in *)&ro.ro_dst;
3083 dst->sin_family = AF_INET;
3084 dst->sin_len = sizeof(*dst);
3085 dst->sin_addr = addr->v4;
3086 rtalloc_ign(&ro, (RTF_CLONING | RTF_PRCLONING));
3087 rt = ro.ro_rt;
3088 break;
3089#endif /* INET */
3090#ifdef INET6
3091 case AF_INET6:
3092 hlen = sizeof(struct ip6_hdr);
3093 bzero(&ro6, sizeof(ro6));
3094 dst6 = (struct sockaddr_in6 *)&ro6.ro_dst;
3095 dst6->sin6_family = AF_INET6;
3096 dst6->sin6_len = sizeof(*dst6);
3097 dst6->sin6_addr = addr->v6;
3098 rtalloc_ign((struct route *)&ro6, (RTF_CLONING | RTF_PRCLONING));
3099 rt = ro6.ro_rt;
3100 break;
3101#endif /* INET6 */
3102 }
3103
3104 if (rt && rt->rt_ifp) {
3105 mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
3106 mss = max(tcp_mssdflt, mss);
3107 RTFREE(rt);
3108 }
3109 mss = min(mss, offer);
3110 mss = max(mss, 64); /* sanity - at least max opt space */
3111 return (mss);
3112}
3113
3114void
3115pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr)
3116{
3117 struct pf_rule *r = s->rule.ptr;
3118
3119 s->rt_kif = NULL;
3120 if (!r->rt || r->rt == PF_FASTROUTE)
3121 return;
ed1f0be2 3122 switch (s->key[PF_SK_WIRE]->af) {
02742ec6
JS
3123#ifdef INET
3124 case AF_INET:
3125 pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL,
3126 &s->nat_src_node);
3127 s->rt_kif = r->rpool.cur->kif;
3128 break;
3129#endif /* INET */
3130#ifdef INET6
3131 case AF_INET6:
3132 pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL,
3133 &s->nat_src_node);
3134 s->rt_kif = r->rpool.cur->kif;
3135 break;
3136#endif /* INET6 */
3137 }
3138}
3139
ed1f0be2
JL
3140u_int32_t
3141pf_tcp_iss(struct pf_pdesc *pd)
315a7da3 3142{
ed1f0be2
JL
3143 MD5_CTX ctx;
3144 u_int32_t digest[4];
3145
3146 if (pf_tcp_secret_init == 0) {
3147 karc4rand(pf_tcp_secret, sizeof(pf_tcp_secret));
3148 MD5Init(&pf_tcp_secret_ctx);
3149 MD5Update(&pf_tcp_secret_ctx, pf_tcp_secret,
3150 sizeof(pf_tcp_secret));
3151 pf_tcp_secret_init = 1;
3152 }
3153 ctx = pf_tcp_secret_ctx;
3154
3155 MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof(u_short));
3156 MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof(u_short));
3157 if (pd->af == AF_INET6) {
3158 MD5Update(&ctx, (char *)&pd->src->v6, sizeof(struct in6_addr));
3159 MD5Update(&ctx, (char *)&pd->dst->v6, sizeof(struct in6_addr));
3160 } else {
3161 MD5Update(&ctx, (char *)&pd->src->v4, sizeof(struct in_addr));
3162 MD5Update(&ctx, (char *)&pd->dst->v4, sizeof(struct in_addr));
315a7da3 3163 }
ed1f0be2
JL
3164 MD5Final((u_char *)digest, &ctx);
3165 pf_tcp_iss_off += 4096;
3166 return (digest[0] + pd->hdr.tcp->th_seq + pf_tcp_iss_off);
315a7da3
JL
3167}
3168
02742ec6 3169int
315a7da3 3170pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
02742ec6 3171 struct pfi_kif *kif, struct mbuf *m, int off, void *h,
70224baa
JL
3172 struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
3173 struct ifqueue *ifq, struct inpcb *inp)
02742ec6
JS
3174{
3175 struct pf_rule *nr = NULL;
3176 struct pf_addr *saddr = pd->src, *daddr = pd->dst;
02742ec6 3177 sa_family_t af = pd->af;
02742ec6
JS
3178 struct pf_rule *r, *a = NULL;
3179 struct pf_ruleset *ruleset = NULL;
3180 struct pf_src_node *nsn = NULL;
315a7da3 3181 struct tcphdr *th = pd->hdr.tcp;
ed1f0be2
JL
3182 struct pf_state_key *skw = NULL, *sks = NULL;
3183 struct pf_state_key *sk = NULL, *nk = NULL;
02742ec6 3184 u_short reason;
315a7da3 3185 int rewrite = 0, hdrlen = 0;
70224baa 3186 int tag = -1, rtableid = -1;
70224baa
JL
3187 int asd = 0;
3188 int match = 0;
315a7da3 3189 int state_icmp = 0;
ed1f0be2
JL
3190 u_int16_t sport = 0, dport = 0;
3191 u_int16_t nport = 0, bport = 0;
3192 u_int16_t bproto_sum = 0, bip_sum = 0;
315a7da3 3193 u_int8_t icmptype = 0, icmpcode = 0;
70224baa 3194
ed1f0be2 3195
315a7da3 3196 if (direction == PF_IN && pf_check_congestion(ifq)) {
70224baa
JL
3197 REASON_SET(&reason, PFRES_CONGEST);
3198 return (PF_DROP);
3199 }
3200
3201 if (inp != NULL)
315a7da3
JL
3202 pd->lookup.done = pf_socket_lookup(direction, pd);
3203 else if (debug_pfugidhack) {
70224baa 3204 DPFPRINTF(PF_DEBUG_MISC, ("pf: unlocked lookup\n"));
315a7da3 3205 pd->lookup.done = pf_socket_lookup(direction, pd);
70224baa 3206 }
315a7da3 3207
315a7da3
JL
3208 switch (pd->proto) {
3209 case IPPROTO_TCP:
3210 sport = th->th_sport;
3211 dport = th->th_dport;
3212 hdrlen = sizeof(*th);
3213 break;
3214 case IPPROTO_UDP:
3215 sport = pd->hdr.udp->uh_sport;
3216 dport = pd->hdr.udp->uh_dport;
3217 hdrlen = sizeof(*pd->hdr.udp);
3218 break;
3219#ifdef INET
3220 case IPPROTO_ICMP:
3221 if (pd->af != AF_INET)
3222 break;
3223 sport = dport = pd->hdr.icmp->icmp_id;
ed1f0be2 3224 hdrlen = sizeof(*pd->hdr.icmp);
315a7da3
JL
3225 icmptype = pd->hdr.icmp->icmp_type;
3226 icmpcode = pd->hdr.icmp->icmp_code;
3227
3228 if (icmptype == ICMP_UNREACH ||
3229 icmptype == ICMP_SOURCEQUENCH ||
3230 icmptype == ICMP_REDIRECT ||
3231 icmptype == ICMP_TIMXCEED ||
3232 icmptype == ICMP_PARAMPROB)
3233 state_icmp++;
3234 break;
3235#endif /* INET */
3236#ifdef INET6
3237 case IPPROTO_ICMPV6:
ed1f0be2 3238 if (af != AF_INET6)
315a7da3
JL
3239 break;
3240 sport = dport = pd->hdr.icmp6->icmp6_id;
3241 hdrlen = sizeof(*pd->hdr.icmp6);
3242 icmptype = pd->hdr.icmp6->icmp6_type;
3243 icmpcode = pd->hdr.icmp6->icmp6_code;
3244
3245 if (icmptype == ICMP6_DST_UNREACH ||
3246 icmptype == ICMP6_PACKET_TOO_BIG ||
3247 icmptype == ICMP6_TIME_EXCEEDED ||
3248 icmptype == ICMP6_PARAM_PROB)
3249 state_icmp++;
3250 break;
3251#endif /* INET6 */
ed1f0be2
JL
3252 default:
3253 sport = dport = hdrlen = 0;
3254 break;
315a7da3 3255 }
02742ec6
JS
3256
3257 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3258
ed1f0be2
JL
3259 bport = nport = sport;
3260 /* check packet for BINAT/NAT/RDR */
3261 if ((nr = pf_get_translation(pd, m, off, direction, kif, &nsn,
3262 &skw, &sks, &sk, &nk, saddr, daddr, sport, dport)) != NULL) {
3263 if (nk == NULL || sk == NULL) {
3264 REASON_SET(&reason, PFRES_MEMORY);
3265 goto cleanup;
3266 }
3267
3268 if (pd->ip_sum)
3269 bip_sum = *pd->ip_sum;
3270
3271 switch (pd->proto) {
3272 case IPPROTO_TCP:
3273 bproto_sum = th->th_sum;
3274 pd->proto_sum = &th->th_sum;
3275
3276 if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
3277 nk->port[pd->sidx] != sport) {
315a7da3 3278 pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
ed1f0be2
JL
3279 &th->th_sum, &nk->addr[pd->sidx],
3280 nk->port[pd->sidx], 0, af);
3281 pd->sport = &th->th_sport;
315a7da3 3282 sport = th->th_sport;
ed1f0be2
JL
3283 }
3284
3285 if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
3286 nk->port[pd->didx] != dport) {
3287 pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
3288 &th->th_sum, &nk->addr[pd->didx],
3289 nk->port[pd->didx], 0, af);
3290 dport = th->th_dport;
3291 pd->dport = &th->th_dport;
3292 }
3293 rewrite++;
3294 break;
3295 case IPPROTO_UDP:
3296 bproto_sum = pd->hdr.udp->uh_sum;
3297 pd->proto_sum = &pd->hdr.udp->uh_sum;
3298
3299 if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
3300 nk->port[pd->sidx] != sport) {
315a7da3
JL
3301 pf_change_ap(saddr, &pd->hdr.udp->uh_sport,
3302 pd->ip_sum, &pd->hdr.udp->uh_sum,
ed1f0be2
JL
3303 &nk->addr[pd->sidx],
3304 nk->port[pd->sidx], 1, af);
315a7da3 3305 sport = pd->hdr.udp->uh_sport;
ed1f0be2
JL
3306 pd->sport = &pd->hdr.udp->uh_sport;
3307 }
3308
3309 if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
3310 nk->port[pd->didx] != dport) {
3311 pf_change_ap(daddr, &pd->hdr.udp->uh_dport,
3312 pd->ip_sum, &pd->hdr.udp->uh_sum,
3313 &nk->addr[pd->didx],
3314 nk->port[pd->didx], 1, af);
3315 dport = pd->hdr.udp->uh_dport;
3316 pd->dport = &pd->hdr.udp->uh_dport;
3317 }
3318 rewrite++;
3319 break;
315a7da3 3320#ifdef INET
ed1f0be2
JL
3321 case IPPROTO_ICMP:
3322 nk->port[0] = nk->port[1];
3323 if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET))
315a7da3 3324 pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
ed1f0be2
JL
3325 nk->addr[pd->sidx].v4.s_addr, 0);
3326
3327 if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET))
3328 pf_change_a(&daddr->v4.s_addr, pd->ip_sum,
3329 nk->addr[pd->didx].v4.s_addr, 0);
3330
3331 if (nk->port[1] != pd->hdr.icmp->icmp_id) {
315a7da3 3332 pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
ed1f0be2
JL
3333 pd->hdr.icmp->icmp_cksum, sport,
3334 nk->port[1], 0);
3335 pd->hdr.icmp->icmp_id = nk->port[1];
3336 pd->sport = &pd->hdr.icmp->icmp_id;
3337 }
3338 m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
3339 break;
315a7da3
JL
3340#endif /* INET */
3341#ifdef INET6
ed1f0be2
JL
3342 case IPPROTO_ICMPV6:
3343 nk->port[0] = nk->port[1];
3344 if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET6))
315a7da3 3345 pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
ed1f0be2
JL
3346 &nk->addr[pd->sidx], 0);
3347
3348 if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET6))
3349 pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
3350 &nk->addr[pd->didx], 0);
3351 rewrite++;
3352 break;
315a7da3 3353#endif /* INET */
ed1f0be2
JL
3354 default:
3355 switch (af) {
315a7da3 3356#ifdef INET
ed1f0be2
JL
3357 case AF_INET:
3358 if (PF_ANEQ(saddr,
3359 &nk->addr[pd->sidx], AF_INET))
315a7da3 3360 pf_change_a(&saddr->v4.s_addr,
ed1f0be2
JL
3361 pd->ip_sum,
3362 nk->addr[pd->sidx].v4.s_addr, 0);
3363
3364 if (PF_ANEQ(daddr,
3365 &nk->addr[pd->didx], AF_INET))
3366 pf_change_a(&daddr->v4.s_addr,
3367 pd->ip_sum,
3368 nk->addr[pd->didx].v4.s_addr, 0);
3369 break;
315a7da3
JL
3370#endif /* INET */
3371#ifdef INET6
ed1f0be2
JL
3372 case AF_INET6:
3373 if (PF_ANEQ(saddr,
3374 &nk->addr[pd->sidx], AF_INET6))
3375 PF_ACPY(saddr, &nk->addr[pd->sidx], af);
315a7da3 3376
ed1f0be2
JL
3377 if (PF_ANEQ(daddr,
3378 &nk->addr[pd->didx], AF_INET6))
3379 PF_ACPY(saddr, &nk->addr[pd->didx], af);
315a7da3 3380 break;
315a7da3 3381#endif /* INET */
315a7da3 3382 }
ed1f0be2 3383 break;
02742ec6 3384 }
ed1f0be2
JL
3385 if (nr->natpass)
3386 r = NULL;
3387 pd->nat_rule = nr;
02742ec6
JS
3388 }
3389
3390 while (r != NULL) {
3391 r->evaluations++;
70224baa 3392 if (pfi_kif_match(r->kif, kif) == r->ifnot)
02742ec6
JS
3393 r = r->skip[PF_SKIP_IFP].ptr;
3394 else if (r->direction && r->direction != direction)
3395 r = r->skip[PF_SKIP_DIR].ptr;
3396 else if (r->af && r->af != af)
3397 r = r->skip[PF_SKIP_AF].ptr;
315a7da3 3398 else if (r->proto && r->proto != pd->proto)
02742ec6 3399 r = r->skip[PF_SKIP_PROTO].ptr;
70224baa
JL
3400 else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
3401 r->src.neg, kif))
02742ec6 3402 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
315a7da3 3403 /* tcp/udp only. port_op always 0 in other cases */
02742ec6 3404 else if (r->src.port_op && !pf_match_port(r->src.port_op,
315a7da3 3405 r->src.port[0], r->src.port[1], sport))
02742ec6 3406 r = r->skip[PF_SKIP_SRC_PORT].ptr;
70224baa
JL
3407 else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
3408 r->dst.neg, NULL))
02742ec6 3409 r = r->skip[PF_SKIP_DST_ADDR].ptr;
315a7da3 3410 /* tcp/udp only. port_op always 0 in other cases */
02742ec6 3411 else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
315a7da3 3412 r->dst.port[0], r->dst.port[1], dport))
02742ec6 3413 r = r->skip[PF_SKIP_DST_PORT].ptr;
315a7da3
JL
3414 /* icmp only. type always 0 in other cases */
3415 else if (r->type && r->type != icmptype + 1)
3416 r = TAILQ_NEXT(r, entries);
3417 /* icmp only. type always 0 in other cases */
3418 else if (r->code && r->code != icmpcode + 1)
3419 r = TAILQ_NEXT(r, entries);
70224baa 3420 else if (r->tos && !(r->tos == pd->tos))
02742ec6
JS
3421 r = TAILQ_NEXT(r, entries);
3422 else if (r->rule_flag & PFRULE_FRAGMENT)
3423 r = TAILQ_NEXT(r, entries);
315a7da3
JL
3424 else if (pd->proto == IPPROTO_TCP &&
3425 (r->flagset & th->th_flags) != r->flags)
02742ec6 3426 r = TAILQ_NEXT(r, entries);
315a7da3 3427 /* tcp/udp only. uid.op always 0 in other cases */
70224baa 3428 else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
315a7da3 3429 pf_socket_lookup(direction, pd), 1)) &&
02742ec6 3430 !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
70224baa 3431 pd->lookup.uid))
02742ec6 3432 r = TAILQ_NEXT(r, entries);
315a7da3 3433 /* tcp/udp only. gid.op always 0 in other cases */
70224baa 3434 else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
315a7da3 3435 pf_socket_lookup(direction, pd), 1)) &&
02742ec6 3436 !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
70224baa 3437 pd->lookup.gid))
02742ec6 3438 r = TAILQ_NEXT(r, entries);
ed1f0be2
JL
3439 else if (r->prob &&
3440 r->prob <= karc4random())
75fda04a 3441 r = TAILQ_NEXT(r, entries);
315a7da3 3442 else if (r->match_tag && !pf_match_tag(m, r, &tag))
02742ec6 3443 r = TAILQ_NEXT(r, entries);
315a7da3
JL
3444 else if (r->os_fingerprint != PF_OSFP_ANY &&
3445 (pd->proto != IPPROTO_TCP || !pf_osfp_match(
3446 pf_osfp_fingerprint(pd, m, off, th),
3447 r->os_fingerprint)))
02742ec6
JS
3448 r = TAILQ_NEXT(r, entries);
3449 else {
3450 if (r->tag)
3451 tag = r->tag;
70224baa
JL
3452 if (r->rtableid >= 0)
3453 rtableid = r->rtableid;
02742ec6 3454 if (r->anchor == NULL) {
70224baa 3455 match = 1;
02742ec6
JS
3456 *rm = r;
3457 *am = a;
3458 *rsm = ruleset;
3459 if ((*rm)->quick)
3460 break;
3461 r = TAILQ_NEXT(r, entries);
3462 } else
70224baa
JL
3463 pf_step_into_anchor(&asd, &ruleset,
3464 PF_RULESET_FILTER, &r, &a, &match);
02742ec6 3465 }
70224baa
JL
3466 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
3467 PF_RULESET_FILTER, &r, &a, &match))
3468 break;
02742ec6
JS
3469 }
3470 r = *rm;
3471 a = *am;
3472 ruleset = *rsm;
3473
3474 REASON_SET(&reason, PFRES_MATCH);
3475
315a7da3 3476 if (r->log || (nr != NULL && nr->log)) {
02742ec6 3477 if (rewrite)
315a7da3 3478 m_copyback(m, off, hdrlen, pd->hdr.any);
70224baa
JL
3479 PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
3480 a, ruleset, pd);
02742ec6
JS
3481 }
3482
3483 if ((r->action == PF_DROP) &&
3484 ((r->rule_flag & PFRULE_RETURNRST) ||
3485 (r->rule_flag & PFRULE_RETURNICMP) ||
3486 (r->rule_flag & PFRULE_RETURN))) {
3487 /* undo NAT changes, if they have taken place */
3488 if (nr != NULL) {
ed1f0be2
JL
3489 PF_ACPY(saddr, &sk->addr[pd->sidx], af);
3490 PF_ACPY(daddr, &sk->addr[pd->didx], af);
3491 if (pd->sport)
3492 *pd->sport = sk->port[pd->sidx];
3493 if (pd->dport)
3494 *pd->dport = sk->port[pd->didx];
3495 if (pd->proto_sum)
3496 *pd->proto_sum = bproto_sum;
3497 if (pd->ip_sum)
3498 *pd->ip_sum = bip_sum;
3499 m_copyback(m, off, hdrlen, pd->hdr.any);
02742ec6 3500 }
315a7da3
JL
3501 if (pd->proto == IPPROTO_TCP &&
3502 ((r->rule_flag & PFRULE_RETURNRST) ||
02742ec6
JS
3503 (r->rule_flag & PFRULE_RETURN)) &&
3504 !(th->th_flags & TH_RST)) {
315a7da3 3505 u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
ed1f0be2
JL
3506 int len = 0;
3507 struct ip *h4;
66ddb4d5 3508#ifdef INET6
ed1f0be2 3509 struct ip6_hdr *h6;
66ddb4d5 3510#endif
ed1f0be2
JL
3511 switch (af) {
3512 case AF_INET:
3513 h4 = mtod(m, struct ip *);
3514 len = h4->ip_len - off;
3515 break;
2dba2225 3516#ifdef INET6
ed1f0be2
JL
3517 case AF_INET6:
3518 h6 = mtod(m, struct ip6_hdr *);
3519 len = h6->ip6_plen - (off - sizeof(*h6));
3520 break;
2dba2225 3521#endif
ed1f0be2 3522 }
02742ec6 3523
ed1f0be2 3524 if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af))
315a7da3
JL
3525 REASON_SET(&reason, PFRES_PROTCKSUM);
3526 else {
3527 if (th->th_flags & TH_SYN)
3528 ack++;
3529 if (th->th_flags & TH_FIN)
3530 ack++;
3531 pf_send_tcp(r, af, pd->dst,
3532 pd->src, th->th_dport, th->th_sport,
3533 ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
3534 r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp);
3535 }
ed1f0be2
JL
3536 } else if (pd->proto != IPPROTO_ICMP && af == AF_INET &&
3537 r->return_icmp)
02742ec6
JS
3538 pf_send_icmp(m, r->return_icmp >> 8,
3539 r->return_icmp & 255, af, r);
ed1f0be2
JL
3540 else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 &&
3541 r->return_icmp6)
02742ec6
JS
3542 pf_send_icmp(m, r->return_icmp6 >> 8,
3543 r->return_icmp6 & 255, af, r);
3544 }
3545
315a7da3 3546 if (r->action == PF_DROP)
ed1f0be2 3547 goto cleanup;
02742ec6 3548
315a7da3 3549 if (pf_tag_packet(m, tag, rtableid)) {
70224baa 3550 REASON_SET(&reason, PFRES_MEMORY);
ed1f0be2 3551 goto cleanup;
70224baa 3552 }
02742ec6 3553
315a7da3
JL
3554 if (!state_icmp && (r->keep_state || nr != NULL ||
3555 (pd->flags & PFDESC_TCP_NORM))) {
ed1f0be2
JL
3556 int action;
3557 action = pf_create_state(r, nr, a, pd, nsn, skw, sks, nk, sk, m,
3558 off, sport, dport, &rewrite, kif, sm, tag, bproto_sum,
3559 bip_sum, hdrlen);
3560 if (action != PF_PASS)
3561 return (action);
3562 }
3563
3564 /* copy back packet headers if we performed NAT operations */
3565 if (rewrite)
3566 m_copyback(m, off, hdrlen, pd->hdr.any);
3567
3568 return (PF_PASS);
3569
02742ec6 3570cleanup:
ed1f0be2 3571 if (sk != NULL)
1186cbc0 3572 kfree(sk, M_PFSTATEKEYPL);
ed1f0be2 3573 if (nk != NULL)
1186cbc0 3574 kfree(nk, M_PFSTATEKEYPL);
ed1f0be2
JL
3575 return (PF_DROP);
3576}
02742ec6 3577
ed1f0be2
JL
3578static __inline int
3579pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a,
3580 struct pf_pdesc *pd, struct pf_src_node *nsn, struct pf_state_key *skw,
3581 struct pf_state_key *sks, struct pf_state_key *nk, struct pf_state_key *sk,
3582 struct mbuf *m, int off, u_int16_t sport, u_int16_t dport, int *rewrite,
3583 struct pfi_kif *kif, struct pf_state **sm, int tag, u_int16_t bproto_sum,
3584 u_int16_t bip_sum, int hdrlen)
3585{
3586 struct pf_state *s = NULL;
3587 struct pf_src_node *sn = NULL;
3588 struct tcphdr *th = pd->hdr.tcp;
3589 u_int16_t mss = tcp_mssdflt;
3590 u_short reason;
315a7da3 3591
ed1f0be2
JL
3592 /* check maximums */
3593 if (r->max_states && (r->states_cur >= r->max_states)) {
3594 pf_status.lcounters[LCNT_STATES]++;
3595 REASON_SET(&reason, PFRES_MAXSTATES);
3596 return (PF_DROP);
3597 }
3598 /* src node for filter rule */
3599 if ((r->rule_flag & PFRULE_SRCTRACK ||
3600 r->rpool.opts & PF_POOL_STICKYADDR) &&
3601 pf_insert_src_node(&sn, r, pd->src, pd->af) != 0) {
3602 REASON_SET(&reason, PFRES_SRCLIMIT);
3603 goto csfailed;
3604 }
3605 /* src node for translation rule */
3606 if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3607 pf_insert_src_node(&nsn, nr, &sk->addr[pd->sidx], pd->af)) {
3608 REASON_SET(&reason, PFRES_SRCLIMIT);
3609 goto csfailed;
3610 }
1186cbc0 3611 s = kmalloc(sizeof(struct pf_state), M_PFSTATEPL, M_NOWAIT|M_ZERO);
ed1f0be2
JL
3612 if (s == NULL) {
3613 REASON_SET(&reason, PFRES_MEMORY);
3614 goto csfailed;
3615 }
3616 s->id = 0; /* XXX Do we really need that? not in OpenBSD */
3617 s->creatorid = 0;
3618 s->rule.ptr = r;
3619 s->nat_rule.ptr = nr;
3620 s->anchor.ptr = a;
3621 STATE_INC_COUNTERS(s);
3622 if (r->allow_opts)
3623 s->state_flags |= PFSTATE_ALLOWOPTS;
3624 if (r->rule_flag & PFRULE_STATESLOPPY)
3625 s->state_flags |= PFSTATE_SLOPPY;
3626 s->log = r->log & PF_LOG_ALL;
3627 if (nr != NULL)
3628 s->log |= nr->log & PF_LOG_ALL;
3629 switch (pd->proto) {
3630 case IPPROTO_TCP:
3631 s->src.seqlo = ntohl(th->th_seq);
3632 s->src.seqhi = s->src.seqlo + pd->p_len + 1;
3633 if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
3634 r->keep_state == PF_STATE_MODULATE) {
3635 /* Generate sequence number modulator */
3636 if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) ==
3637 0)
3638 s->src.seqdiff = 1;
3639 pf_change_a(&th->th_seq, &th->th_sum,
3640 htonl(s->src.seqlo + s->src.seqdiff), 0);
3641 *rewrite = 1;
3642 } else
3643 s->src.seqdiff = 0;
3644 if (th->th_flags & TH_SYN) {
3645 s->src.seqhi++;
3646 s->src.wscale = pf_get_wscale(m, off,
3647 th->th_off, pd->af);
315a7da3 3648 }
ed1f0be2
JL
3649 s->src.max_win = MAX(ntohs(th->th_win), 1);
3650 if (s->src.wscale & PF_WSCALE_MASK) {
3651 /* Remove scale factor from initial window */
3652 int win = s->src.max_win;
3653 win += 1 << (s->src.wscale & PF_WSCALE_MASK);
3654 s->src.max_win = (win - 1) >>
3655 (s->src.wscale & PF_WSCALE_MASK);
315a7da3 3656 }
ed1f0be2
JL
3657 if (th->th_flags & TH_FIN)
3658 s->src.seqhi++;
3659 s->dst.seqhi = 1;
3660 s->dst.max_win = 1;
3661 s->src.state = TCPS_SYN_SENT;
3662 s->dst.state = TCPS_CLOSED;
3663 s->timeout = PFTM_TCP_FIRST_PACKET;
3664 break;
3665 case IPPROTO_UDP:
3666 s->src.state = PFUDPS_SINGLE;
3667 s->dst.state = PFUDPS_NO_TRAFFIC;
3668 s->timeout = PFTM_UDP_FIRST_PACKET;
3669 break;
3670 case IPPROTO_ICMP:
315a7da3 3671#ifdef INET6
ed1f0be2 3672 case IPPROTO_ICMPV6:
315a7da3 3673#endif
ed1f0be2
JL
3674 s->timeout = PFTM_ICMP_FIRST_PACKET;
3675 break;
3676 default:
3677 s->src.state = PFOTHERS_SINGLE;
3678 s->dst.state = PFOTHERS_NO_TRAFFIC;
3679 s->timeout = PFTM_OTHER_FIRST_PACKET;
3680 }
315a7da3 3681
ed1f0be2
JL
3682 s->creation = time_second;
3683 s->expire = time_second;
315a7da3 3684
ed1f0be2
JL
3685 if (sn != NULL) {
3686 s->src_node = sn;
3687 s->src_node->states++;
3688 }
3689 if (nsn != NULL) {
3690 /* XXX We only modify one side for now. */
3691 PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af);
3692 s->nat_src_node = nsn;
3693 s->nat_src_node->states++;
3694 }
3695 if (pd->proto == IPPROTO_TCP) {
3696 if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
3697 off, pd, th, &s->src, &s->dst)) {
3698 REASON_SET(&reason, PFRES_MEMORY);
02742ec6 3699 pf_src_tree_remove_state(s);
70224baa 3700 STATE_DEC_COUNTERS(s);
1186cbc0 3701 kfree(s, M_PFSTATEPL);
02742ec6 3702 return (PF_DROP);
70224baa 3703 }
ed1f0be2
JL
3704 if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
3705 pf_normalize_tcp_stateful(m, off, pd, &reason, th, s,
3706 &s->src, &s->dst, rewrite)) {
3707 /* This really shouldn't happen!!! */
3708 DPFPRINTF(PF_DEBUG_URGENT,
3709 ("pf_normalize_tcp_stateful failed on first pkt"));
3710 pf_normalize_tcp_cleanup(s);
3711 pf_src_tree_remove_state(s);
3712 STATE_DEC_COUNTERS(s);
1186cbc0 3713 kfree(s, M_PFSTATEPL);
ed1f0be2 3714 return (PF_DROP);
315a7da3 3715 }
02742ec6 3716 }
ed1f0be2 3717 s->direction = pd->dir;
02742ec6 3718
ed1f0be2
JL
3719 if (sk == NULL && pf_state_key_setup(pd, nr, &skw, &sks, &sk, &nk,
3720 pd->src, pd->dst, sport, dport))
3721 goto csfailed;
3722
3723 if (pf_state_insert(BOUND_IFACE(r, kif), skw, sks, s)) {
3724 if (pd->proto == IPPROTO_TCP)
3725 pf_normalize_tcp_cleanup(s);
3726 REASON_SET(&reason, PFRES_STATEINS);
3727 pf_src_tree_remove_state(s);
3728 STATE_DEC_COUNTERS(s);
1186cbc0 3729 kfree(s, M_PFSTATEPL);
ed1f0be2
JL
3730 return (PF_DROP);
3731 } else
3732 *sm = s;
3733
3734 pf_set_rt_ifp(s, pd->src); /* needs s->state_key set */
3735 if (tag > 0) {
3736 pf_tag_ref(tag);
3737 s->tag = tag;
3738 }
3739 if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) ==
3740 TH_SYN && r->keep_state == PF_STATE_SYNPROXY) {
3741 s->src.state = PF_TCPS_PROXY_SRC;
3742 /* undo NAT changes, if they have taken place */
3743 if (nr != NULL) {
3744 struct pf_state_key *skt = s->key[PF_SK_WIRE];
3745 if (pd->dir == PF_OUT)
3746 skt = s->key[PF_SK_STACK];
3747 PF_ACPY(pd->src, &skt->addr[pd->sidx], pd->af);
3748 PF_ACPY(pd->dst, &skt->addr[pd->didx], pd->af);
3749 if (pd->sport)
3750 *pd->sport = skt->port[pd->sidx];
3751 if (pd->dport)
3752 *pd->dport = skt->port[pd->didx];
3753 if (pd->proto_sum)
3754 *pd->proto_sum = bproto_sum;
3755 if (pd->ip_sum)
3756 *pd->ip_sum = bip_sum;
3757 m_copyback(m, off, hdrlen, pd->hdr.any);
3758 }
3759 s->src.seqhi = htonl(karc4random());
3760 /* Find mss option */
3761 mss = pf_get_mss(m, off, th->th_off, pd->af);
3762 mss = pf_calc_mss(pd->src, pd->af, mss);
3763 mss = pf_calc_mss(pd->dst, pd->af, mss);
3764 s->src.mss = mss;
3765 pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport,
3766 th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
3767 TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL);
3768 REASON_SET(&reason, PFRES_SYNPROXY);
3769 return (PF_SYNPROXY_DROP);
3770 }
315a7da3 3771
02742ec6 3772 return (PF_PASS);
ed1f0be2
JL
3773
3774csfailed:
3775 if (sk != NULL)
1186cbc0 3776 kfree(sk, M_PFSTATEKEYPL);
ed1f0be2 3777 if (nk != NULL)
1186cbc0 3778 kfree(nk, M_PFSTATEKEYPL);
ed1f0be2
JL
3779
3780 if (sn != NULL && sn->states == 0 && sn->expire == 0) {
3781 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
3782 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3783 pf_status.src_nodes--;
1186cbc0 3784 kfree(sn, M_PFSRCTREEPL);
ed1f0be2
JL
3785 }
3786 if (nsn != sn && nsn != NULL && nsn->states == 0 && nsn->expire == 0) {
3787 RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
3788 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3789 pf_status.src_nodes--;
1186cbc0 3790 kfree(nsn, M_PFSRCTREEPL);
ed1f0be2
JL
3791 }
3792 return (PF_DROP);
02742ec6
JS
3793}
3794
3795int
3796pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
3797 struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
3798 struct pf_ruleset **rsm)
3799{
3800 struct pf_rule *r, *a = NULL;
3801 struct pf_ruleset *ruleset = NULL;
3802 sa_family_t af = pd->af;
3803 u_short reason;
3804 int tag = -1;
70224baa
JL
3805 int asd = 0;
3806 int match = 0;
02742ec6
JS
3807
3808 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3809 while (r != NULL) {
3810 r->evaluations++;
70224baa 3811 if (pfi_kif_match(r->kif, kif) == r->ifnot)
02742ec6
JS
3812 r = r->skip[PF_SKIP_IFP].ptr;
3813 else if (r->direction && r->direction != direction)
3814 r = r->skip[PF_SKIP_DIR].ptr;
3815 else if (r->af && r->af != af)
3816 r = r->skip[PF_SKIP_AF].ptr;
3817 else if (r->proto && r->proto != pd->proto)
3818 r = r->skip[PF_SKIP_PROTO].ptr;
70224baa
JL
3819 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
3820 r->src.neg, kif))
02742ec6 3821 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
70224baa
JL
3822 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
3823 r->dst.neg, NULL))
02742ec6 3824 r = r->skip[PF_SKIP_DST_ADDR].ptr;
70224baa 3825 else if (r->tos && !(r->tos == pd->tos))
02742ec6 3826 r = TAILQ_NEXT(r, entries);
a814431a
MD
3827 else if (r->os_fingerprint != PF_OSFP_ANY)
3828 r = TAILQ_NEXT(r, entries);
3829 else if (pd->proto == IPPROTO_UDP &&
3830 (r->src.port_op || r->dst.port_op))
3831 r = TAILQ_NEXT(r, entries);
3832 else if (pd->proto == IPPROTO_TCP &&
3833 (r->src.port_op || r->dst.port_op || r->flagset))
3834 r = TAILQ_NEXT(r, entries);
3835 else if ((pd->proto == IPPROTO_ICMP ||
3836 pd->proto == IPPROTO_ICMPV6) &&
3837 (r->type || r->code))
02742ec6 3838 r = TAILQ_NEXT(r, entries);
75fda04a
MD
3839 else if (r->prob && r->prob <= karc4random())
3840 r = TAILQ_NEXT(r, entries);
315a7da3 3841 else if (r->match_tag && !pf_match_tag(m, r, &tag))
02742ec6 3842 r = TAILQ_NEXT(r, entries);
02742ec6
JS
3843 else {
3844 if (r->anchor == NULL) {
70224baa 3845 match = 1;
02742ec6
JS
3846 *rm = r;
3847 *am = a;
3848 *rsm = ruleset;
3849 if ((*rm)->quick)
3850 break;
3851 r = TAILQ_NEXT(r, entries);
3852 } else
70224baa
JL
3853 pf_step_into_anchor(&asd, &ruleset,
3854 PF_RULESET_FILTER, &r, &a, &match);
02742ec6 3855 }
70224baa
JL