pf: fix another double lock release
[dragonfly.git] / sys / net / pf / pf.c
CommitLineData
ed1f0be2 1/* $OpenBSD: pf.c,v 1.614 2008/08/02 12:34:37 henning Exp $ */
02742ec6
JS
2
3/*
4 * Copyright (c) 2004 The DragonFly Project. All rights reserved.
5 *
6 * Copyright (c) 2001 Daniel Hartmeier
ed1f0be2 7 * Copyright (c) 2002 - 2008 Henning Brauer
02742ec6
JS
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * - Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * - Redistributions in binary form must reproduce the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer in the documentation and/or other materials provided
19 * with the distribution.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 * POSSIBILITY OF SUCH DAMAGE.
33 *
34 * Effort sponsored in part by the Defense Advanced Research Projects
35 * Agency (DARPA) and Air Force Research Laboratory, Air Force
36 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
37 *
38 */
39
40#include "opt_inet.h"
41#include "opt_inet6.h"
02742ec6
JS
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/malloc.h>
46#include <sys/mbuf.h>
47#include <sys/filio.h>
48#include <sys/socket.h>
49#include <sys/socketvar.h>
50#include <sys/kernel.h>
51#include <sys/time.h>
52#include <sys/sysctl.h>
53#include <sys/endian.h>
70224baa
JL
54#include <sys/proc.h>
55#include <sys/kthread.h>
02742ec6
JS
56
57#include <machine/inttypes.h>
58
ed1f0be2
JL
59#include <sys/md5.h>
60
02742ec6
JS
61#include <net/if.h>
62#include <net/if_types.h>
63#include <net/bpf.h>
4599cf19 64#include <net/netisr.h>
02742ec6
JS
65#include <net/route.h>
66
67#include <netinet/in.h>
68#include <netinet/in_var.h>
69#include <netinet/in_systm.h>
70#include <netinet/ip.h>
71#include <netinet/ip_var.h>
72#include <netinet/tcp.h>
73#include <netinet/tcp_seq.h>
74#include <netinet/udp.h>
75#include <netinet/ip_icmp.h>
76#include <netinet/in_pcb.h>
77#include <netinet/tcp_timer.h>
78#include <netinet/tcp_var.h>
79#include <netinet/udp_var.h>
80#include <netinet/icmp_var.h>
70224baa 81#include <netinet/if_ether.h>
02742ec6
JS
82
83#include <net/pf/pfvar.h>
84#include <net/pf/if_pflog.h>
85
02742ec6 86#include <net/pf/if_pfsync.h>
02742ec6
JS
87
88#ifdef INET6
89#include <netinet/ip6.h>
90#include <netinet/in_pcb.h>
91#include <netinet/icmp6.h>
92#include <netinet6/nd6.h>
93#include <netinet6/ip6_var.h>
94#include <netinet6/in6_pcb.h>
95#endif /* INET6 */
96
97#include <sys/in_cksum.h>
4599cf19 98#include <sys/ucred.h>
02742ec6
JS
99#include <machine/limits.h>
100#include <sys/msgport2.h>
4599cf19 101#include <net/netmsg2.h>
02742ec6
JS
102
103extern int ip_optcopy(struct ip *, struct ip *);
70224baa 104extern int debug_pfugidhack;
02742ec6 105
a3c18566 106struct lwkt_token pf_token = LWKT_TOKEN_INITIALIZER(pf_token);
2a7a2b1c 107
4b1cf444 108#define DPFPRINTF(n, x) if (pf_status.debug >= (n)) kprintf x
02742ec6
JS
109
110/*
111 * Global variables
112 */
113
b4628cf9
SZ
114/* mask radix tree */
115struct radix_node_head *pf_maskhead;
116
315a7da3 117/* state tables */
ed1f0be2 118struct pf_state_tree pf_statetbl;
315a7da3 119
02742ec6
JS
120struct pf_altqqueue pf_altqs[2];
121struct pf_palist pf_pabuf;
122struct pf_altqqueue *pf_altqs_active;
123struct pf_altqqueue *pf_altqs_inactive;
124struct pf_status pf_status;
125
126u_int32_t ticket_altqs_active;
127u_int32_t ticket_altqs_inactive;
128int altqs_inactive_open;
129u_int32_t ticket_pabuf;
130
ed1f0be2
JL
131MD5_CTX pf_tcp_secret_ctx;
132u_char pf_tcp_secret[16];
133int pf_tcp_secret_init;
134int pf_tcp_iss_off;
135
70224baa
JL
136struct pf_anchor_stackframe {
137 struct pf_ruleset *rs;
138 struct pf_rule *r;
139 struct pf_anchor_node *parent;
140 struct pf_anchor *child;
141} pf_anchor_stack[64];
02742ec6 142
1186cbc0
JL
143struct malloc_type *pf_src_tree_pl, *pf_rule_pl, *pf_pooladdr_pl;
144struct malloc_type *pf_state_pl, *pf_state_key_pl, *pf_state_item_pl;
145struct malloc_type *pf_altq_pl;
02742ec6
JS
146
147void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
02742ec6 148
70224baa
JL
149void pf_init_threshold(struct pf_threshold *, u_int32_t,
150 u_int32_t);
151void pf_add_threshold(struct pf_threshold *);
152int pf_check_threshold(struct pf_threshold *);
153
02742ec6
JS
154void pf_change_ap(struct pf_addr *, u_int16_t *,
155 u_int16_t *, u_int16_t *, struct pf_addr *,
156 u_int16_t, u_int8_t, sa_family_t);
70224baa
JL
157int pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *,
158 struct tcphdr *, struct pf_state_peer *);
02742ec6
JS
159#ifdef INET6
160void pf_change_a6(struct pf_addr *, u_int16_t *,
161 struct pf_addr *, u_int8_t);
162#endif /* INET6 */
163void pf_change_icmp(struct pf_addr *, u_int16_t *,
164 struct pf_addr *, struct pf_addr *, u_int16_t,
165 u_int16_t *, u_int16_t *, u_int16_t *,
166 u_int16_t *, u_int8_t, sa_family_t);
167void pf_send_tcp(const struct pf_rule *, sa_family_t,
168 const struct pf_addr *, const struct pf_addr *,
169 u_int16_t, u_int16_t, u_int32_t, u_int32_t,
70224baa
JL
170 u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
171 u_int16_t, struct ether_header *, struct ifnet *);
02742ec6
JS
172void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
173 sa_family_t, struct pf_rule *);
174struct pf_rule *pf_match_translation(struct pf_pdesc *, struct mbuf *,
175 int, int, struct pfi_kif *,
176 struct pf_addr *, u_int16_t, struct pf_addr *,
177 u_int16_t, int);
178struct pf_rule *pf_get_translation(struct pf_pdesc *, struct mbuf *,
179 int, int, struct pfi_kif *, struct pf_src_node **,
ed1f0be2
JL
180 struct pf_state_key **, struct pf_state_key **,
181 struct pf_state_key **, struct pf_state_key **,
182 struct pf_addr *, struct pf_addr *,
183 u_int16_t, u_int16_t);
184void pf_detach_state(struct pf_state *);
185int pf_state_key_setup(struct pf_pdesc *, struct pf_rule *,
186 struct pf_state_key **, struct pf_state_key **,
187 struct pf_state_key **, struct pf_state_key **,
188 struct pf_addr *, struct pf_addr *,
189 u_int16_t, u_int16_t);
190void pf_state_key_detach(struct pf_state *, int);
191u_int32_t pf_tcp_iss(struct pf_pdesc *);
315a7da3 192int pf_test_rule(struct pf_rule **, struct pf_state **,
02742ec6
JS
193 int, struct pfi_kif *, struct mbuf *, int,
194 void *, struct pf_pdesc *, struct pf_rule **,
70224baa 195 struct pf_ruleset **, struct ifqueue *, struct inpcb *);
ed1f0be2
JL
196static __inline int pf_create_state(struct pf_rule *, struct pf_rule *,
197 struct pf_rule *, struct pf_pdesc *,
198 struct pf_src_node *, struct pf_state_key *,
199 struct pf_state_key *, struct pf_state_key *,
200 struct pf_state_key *, struct mbuf *, int,
201 u_int16_t, u_int16_t, int *, struct pfi_kif *,
202 struct pf_state **, int, u_int16_t, u_int16_t,
203 int);
02742ec6
JS
204int pf_test_fragment(struct pf_rule **, int,
205 struct pfi_kif *, struct mbuf *, void *,
206 struct pf_pdesc *, struct pf_rule **,
207 struct pf_ruleset **);
ed1f0be2
JL
208int pf_tcp_track_full(struct pf_state_peer *,
209 struct pf_state_peer *, struct pf_state **,
210 struct pfi_kif *, struct mbuf *, int,
211 struct pf_pdesc *, u_short *, int *);
212int pf_tcp_track_sloppy(struct pf_state_peer *,
213 struct pf_state_peer *, struct pf_state **,
214 struct pf_pdesc *, u_short *);
02742ec6
JS
215int pf_test_state_tcp(struct pf_state **, int,
216 struct pfi_kif *, struct mbuf *, int,
217 void *, struct pf_pdesc *, u_short *);
218int pf_test_state_udp(struct pf_state **, int,
219 struct pfi_kif *, struct mbuf *, int,
220 void *, struct pf_pdesc *);
221int pf_test_state_icmp(struct pf_state **, int,
222 struct pfi_kif *, struct mbuf *, int,
70224baa 223 void *, struct pf_pdesc *, u_short *);
02742ec6 224int pf_test_state_other(struct pf_state **, int,
ed1f0be2 225 struct pfi_kif *, struct mbuf *, struct pf_pdesc *);
315a7da3 226void pf_step_into_anchor(int *, struct pf_ruleset **, int,
ed1f0be2 227 struct pf_rule **, struct pf_rule **, int *);
70224baa
JL
228int pf_step_out_of_anchor(int *, struct pf_ruleset **,
229 int, struct pf_rule **, struct pf_rule **,
230 int *);
02742ec6
JS
231void pf_hash(struct pf_addr *, struct pf_addr *,
232 struct pf_poolhashkey *, sa_family_t);
233int pf_map_addr(u_int8_t, struct pf_rule *,
234 struct pf_addr *, struct pf_addr *,
235 struct pf_addr *, struct pf_src_node **);
236int pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *,
237 struct pf_addr *, struct pf_addr *, u_int16_t,
238 struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t,
239 struct pf_src_node **);
240void pf_route(struct mbuf **, struct pf_rule *, int,
70224baa
JL
241 struct ifnet *, struct pf_state *,
242 struct pf_pdesc *);
02742ec6 243void pf_route6(struct mbuf **, struct pf_rule *, int,
70224baa
JL
244 struct ifnet *, struct pf_state *,
245 struct pf_pdesc *);
02742ec6
JS
246u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t,
247 sa_family_t);
248u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t,
249 sa_family_t);
250u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t,
251 u_int16_t);
252void pf_set_rt_ifp(struct pf_state *,
253 struct pf_addr *);
254int pf_check_proto_cksum(struct mbuf *, int, int,
255 u_int8_t, sa_family_t);
ed1f0be2
JL
256struct pf_divert *pf_get_divert(struct mbuf *);
257void pf_print_state_parts(struct pf_state *,
258 struct pf_state_key *, struct pf_state_key *);
02742ec6
JS
259int pf_addr_wrap_neq(struct pf_addr_wrap *,
260 struct pf_addr_wrap *);
315a7da3 261struct pf_state *pf_find_state(struct pfi_kif *,
ed1f0be2 262 struct pf_state_key_cmp *, u_int, struct mbuf *);
70224baa
JL
263int pf_src_connlimit(struct pf_state **);
264int pf_check_congestion(struct ifqueue *);
265
266extern int pf_end_threads;
267
268struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
269 { &pf_state_pl, PFSTATE_HIWAT },
270 { &pf_src_tree_pl, PFSNODE_HIWAT },
271 { &pf_frent_pl, PFFRAG_FRENT_HIWAT },
272 { &pfr_ktable_pl, PFR_KTABLE_HIWAT },
273 { &pfr_kentry_pl, PFR_KENTRY_HIWAT }
274};
02742ec6 275
ed1f0be2 276#define STATE_LOOKUP(i, k, d, s, m) \
02742ec6 277 do { \
ed1f0be2
JL
278 s = pf_find_state(i, k, d, m); \
279 if (s == NULL || (s)->timeout == PFTM_PURGE) \
02742ec6 280 return (PF_DROP); \
ed1f0be2
JL
281 if (d == PF_OUT && \
282 (((s)->rule.ptr->rt == PF_ROUTETO && \
283 (s)->rule.ptr->direction == PF_OUT) || \
284 ((s)->rule.ptr->rt == PF_REPLYTO && \
285 (s)->rule.ptr->direction == PF_IN)) && \
286 (s)->rt_kif != NULL && \
287 (s)->rt_kif != i) \
02742ec6
JS
288 return (PF_PASS); \
289 } while (0)
290
70224baa
JL
291#define BOUND_IFACE(r, k) \
292 ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all
293
294#define STATE_INC_COUNTERS(s) \
295 do { \
ed1f0be2
JL
296 s->rule.ptr->states_cur++; \
297 s->rule.ptr->states_tot++; \
298 if (s->anchor.ptr != NULL) { \
299 s->anchor.ptr->states_cur++; \
300 s->anchor.ptr->states_tot++; \
301 } \
302 if (s->nat_rule.ptr != NULL) { \
303 s->nat_rule.ptr->states_cur++; \
304 s->nat_rule.ptr->states_tot++; \
305 } \
70224baa
JL
306 } while (0)
307
308#define STATE_DEC_COUNTERS(s) \
309 do { \
310 if (s->nat_rule.ptr != NULL) \
ed1f0be2 311 s->nat_rule.ptr->states_cur--; \
70224baa 312 if (s->anchor.ptr != NULL) \
ed1f0be2
JL
313 s->anchor.ptr->states_cur--; \
314 s->rule.ptr->states_cur--; \
70224baa 315 } while (0)
02742ec6 316
1186cbc0
JL
317static MALLOC_DEFINE(M_PFSTATEPL, "pfstatepl", "pf state pool list");
318static MALLOC_DEFINE(M_PFSRCTREEPL, "pfsrctpl", "pf source tree pool list");
319static MALLOC_DEFINE(M_PFSTATEKEYPL, "pfstatekeypl", "pf state key pool list");
320static MALLOC_DEFINE(M_PFSTATEITEMPL, "pfstateitempl", "pf state item pool list");
321
70224baa 322static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
ed1f0be2 323static __inline int pf_state_compare_key(struct pf_state_key *,
315a7da3 324 struct pf_state_key *);
70224baa 325static __inline int pf_state_compare_id(struct pf_state *,
02742ec6
JS
326 struct pf_state *);
327
328struct pf_src_tree tree_src_tracking;
329
330struct pf_state_tree_id tree_id;
70224baa 331struct pf_state_queue state_list;
02742ec6
JS
332
333RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
ed1f0be2 334RB_GENERATE(pf_state_tree, pf_state_key, entry, pf_state_compare_key);
02742ec6 335RB_GENERATE(pf_state_tree_id, pf_state,
315a7da3
JL
336 entry_id, pf_state_compare_id);
337
70224baa 338static __inline int
02742ec6
JS
339pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
340{
341 int diff;
342
343 if (a->rule.ptr > b->rule.ptr)
344 return (1);
345 if (a->rule.ptr < b->rule.ptr)
346 return (-1);
347 if ((diff = a->af - b->af) != 0)
348 return (diff);
349 switch (a->af) {
350#ifdef INET
351 case AF_INET:
352 if (a->addr.addr32[0] > b->addr.addr32[0])
353 return (1);
354 if (a->addr.addr32[0] < b->addr.addr32[0])
355 return (-1);
356 break;
357#endif /* INET */
358#ifdef INET6
359 case AF_INET6:
360 if (a->addr.addr32[3] > b->addr.addr32[3])
361 return (1);
362 if (a->addr.addr32[3] < b->addr.addr32[3])
363 return (-1);
364 if (a->addr.addr32[2] > b->addr.addr32[2])
365 return (1);
366 if (a->addr.addr32[2] < b->addr.addr32[2])
367 return (-1);
368 if (a->addr.addr32[1] > b->addr.addr32[1])
369 return (1);
370 if (a->addr.addr32[1] < b->addr.addr32[1])
371 return (-1);
372 if (a->addr.addr32[0] > b->addr.addr32[0])
373 return (1);
374 if (a->addr.addr32[0] < b->addr.addr32[0])
375 return (-1);
376 break;
377#endif /* INET6 */
378 }
379 return (0);
380}
381
a814431a 382u_int32_t
315a7da3 383pf_state_hash(struct pf_state_key *sk)
5950bf01 384{
ed1f0be2 385 u_int32_t hv = (u_int32_t)(((intptr_t)sk >> 6) ^ ((intptr_t)sk >> 15));
a814431a
MD
386 if (hv == 0) /* disallow 0 */
387 hv = 1;
5950bf01
MD
388 return(hv);
389}
390
02742ec6
JS
391#ifdef INET6
392void
393pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
394{
395 switch (af) {
396#ifdef INET
397 case AF_INET:
398 dst->addr32[0] = src->addr32[0];
399 break;
400#endif /* INET */
401 case AF_INET6:
402 dst->addr32[0] = src->addr32[0];
403 dst->addr32[1] = src->addr32[1];
404 dst->addr32[2] = src->addr32[2];
405 dst->addr32[3] = src->addr32[3];
406 break;
407 }
408}
70224baa 409#endif /* INET6 */
02742ec6 410
70224baa
JL
411void
412pf_init_threshold(struct pf_threshold *threshold,
413 u_int32_t limit, u_int32_t seconds)
414{
415 threshold->limit = limit * PF_THRESHOLD_MULT;
416 threshold->seconds = seconds;
417 threshold->count = 0;
418 threshold->last = time_second;
419}
420
421void
422pf_add_threshold(struct pf_threshold *threshold)
423{
424 u_int32_t t = time_second, diff = t - threshold->last;
425
426 if (diff >= threshold->seconds)
427 threshold->count = 0;
428 else
429 threshold->count -= threshold->count * diff /
430 threshold->seconds;
431 threshold->count += PF_THRESHOLD_MULT;
432 threshold->last = t;
433}
434
435int
436pf_check_threshold(struct pf_threshold *threshold)
437{
438 return (threshold->count > threshold->limit);
439}
440
441int
442pf_src_connlimit(struct pf_state **state)
443{
70224baa
JL
444 int bad = 0;
445
446 (*state)->src_node->conn++;
447 (*state)->src.tcp_est = 1;
448 pf_add_threshold(&(*state)->src_node->conn_rate);
449
450 if ((*state)->rule.ptr->max_src_conn &&
451 (*state)->rule.ptr->max_src_conn <
452 (*state)->src_node->conn) {
453 pf_status.lcounters[LCNT_SRCCONN]++;
454 bad++;
455 }
456
457 if ((*state)->rule.ptr->max_src_conn_rate.limit &&
458 pf_check_threshold(&(*state)->src_node->conn_rate)) {
459 pf_status.lcounters[LCNT_SRCCONNRATE]++;
460 bad++;
461 }
462
463 if (!bad)
464 return (0);
465
466 if ((*state)->rule.ptr->overload_tbl) {
467 struct pfr_addr p;
468 u_int32_t killed = 0;
469
470 pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
471 if (pf_status.debug >= PF_DEBUG_MISC) {
472 kprintf("pf_src_connlimit: blocking address ");
473 pf_print_host(&(*state)->src_node->addr, 0,
ed1f0be2 474 (*state)->key[PF_SK_WIRE]->af);
70224baa
JL
475 }
476
477 bzero(&p, sizeof(p));
ed1f0be2
JL
478 p.pfra_af = (*state)->key[PF_SK_WIRE]->af;
479 switch ((*state)->key[PF_SK_WIRE]->af) {
70224baa
JL
480#ifdef INET
481 case AF_INET:
482 p.pfra_net = 32;
483 p.pfra_ip4addr = (*state)->src_node->addr.v4;
484 break;
485#endif /* INET */
486#ifdef INET6
487 case AF_INET6:
488 p.pfra_net = 128;
489 p.pfra_ip6addr = (*state)->src_node->addr.v6;
490 break;
491#endif /* INET6 */
492 }
493
494 pfr_insert_kentry((*state)->rule.ptr->overload_tbl,
495 &p, time_second);
496
497 /* kill existing states if that's required. */
498 if ((*state)->rule.ptr->flush) {
315a7da3
JL
499 struct pf_state_key *sk;
500 struct pf_state *st;
70224baa 501
315a7da3
JL
502 pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
503 RB_FOREACH(st, pf_state_tree_id, &tree_id) {
ed1f0be2 504 sk = st->key[PF_SK_WIRE];
70224baa
JL
505 /*
506 * Kill states from this source. (Only those
507 * from the same rule if PF_FLUSH_GLOBAL is not
508 * set)
509 */
315a7da3 510 if (sk->af ==
ed1f0be2
JL
511 (*state)->key[PF_SK_WIRE]->af &&
512 (((*state)->direction == PF_OUT &&
70224baa 513 PF_AEQ(&(*state)->src_node->addr,
ed1f0be2
JL
514 &sk->addr[0], sk->af)) ||
515 ((*state)->direction == PF_IN &&
70224baa 516 PF_AEQ(&(*state)->src_node->addr,
ed1f0be2 517 &sk->addr[1], sk->af))) &&
70224baa
JL
518 ((*state)->rule.ptr->flush &
519 PF_FLUSH_GLOBAL ||
315a7da3
JL
520 (*state)->rule.ptr == st->rule.ptr)) {
521 st->timeout = PFTM_PURGE;
522 st->src.state = st->dst.state =
70224baa
JL
523 TCPS_CLOSED;
524 killed++;
525 }
526 }
527 if (pf_status.debug >= PF_DEBUG_MISC)
528 kprintf(", %u states killed", killed);
529 }
530 if (pf_status.debug >= PF_DEBUG_MISC)
531 kprintf("\n");
532 }
533
534 /* kill this state */
535 (*state)->timeout = PFTM_PURGE;
536 (*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
537 return (1);
538}
539
02742ec6
JS
540int
541pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
542 struct pf_addr *src, sa_family_t af)
543{
544 struct pf_src_node k;
545
546 if (*sn == NULL) {
547 k.af = af;
548 PF_ACPY(&k.addr, src, af);
549 if (rule->rule_flag & PFRULE_RULESRCTRACK ||
550 rule->rpool.opts & PF_POOL_STICKYADDR)
551 k.rule.ptr = rule;
552 else
553 k.rule.ptr = NULL;
554 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
555 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
556 }
557 if (*sn == NULL) {
558 if (!rule->max_src_nodes ||
559 rule->src_nodes < rule->max_src_nodes)
1186cbc0 560 (*sn) = kmalloc(sizeof(struct pf_src_node), M_PFSRCTREEPL, M_NOWAIT|M_ZERO);
70224baa
JL
561 else
562 pf_status.lcounters[LCNT_SRCNODES]++;
02742ec6
JS
563 if ((*sn) == NULL)
564 return (-1);
70224baa
JL
565
566 pf_init_threshold(&(*sn)->conn_rate,
567 rule->max_src_conn_rate.limit,
568 rule->max_src_conn_rate.seconds);
569
02742ec6
JS
570 (*sn)->af = af;
571 if (rule->rule_flag & PFRULE_RULESRCTRACK ||
572 rule->rpool.opts & PF_POOL_STICKYADDR)
573 (*sn)->rule.ptr = rule;
574 else
575 (*sn)->rule.ptr = NULL;
576 PF_ACPY(&(*sn)->addr, src, af);
577 if (RB_INSERT(pf_src_tree,
578 &tree_src_tracking, *sn) != NULL) {
579 if (pf_status.debug >= PF_DEBUG_MISC) {
4b1cf444 580 kprintf("pf: src_tree insert failed: ");
02742ec6 581 pf_print_host(&(*sn)->addr, 0, af);
4b1cf444 582 kprintf("\n");
02742ec6 583 }
1186cbc0 584 kfree(*sn, M_PFSRCTREEPL);
02742ec6
JS
585 return (-1);
586 }
587 (*sn)->creation = time_second;
588 (*sn)->ruletype = rule->action;
589 if ((*sn)->rule.ptr != NULL)
590 (*sn)->rule.ptr->src_nodes++;
591 pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
592 pf_status.src_nodes++;
593 } else {
594 if (rule->max_src_states &&
70224baa
JL
595 (*sn)->states >= rule->max_src_states) {
596 pf_status.lcounters[LCNT_SRCSTATES]++;
02742ec6 597 return (-1);
70224baa 598 }
02742ec6
JS
599 }
600 return (0);
601}
602
ed1f0be2
JL
603/* state table stuff */
604
605static __inline int
606pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b)
315a7da3 607{
ed1f0be2
JL
608 int diff;
609
610 if ((diff = a->proto - b->proto) != 0)
611 return (diff);
612 if ((diff = a->af - b->af) != 0)
613 return (diff);
614 switch (a->af) {
615#ifdef INET
616 case AF_INET:
617 if (a->addr[0].addr32[0] > b->addr[0].addr32[0])
618 return (1);
619 if (a->addr[0].addr32[0] < b->addr[0].addr32[0])
620 return (-1);
621 if (a->addr[1].addr32[0] > b->addr[1].addr32[0])
622 return (1);
623 if (a->addr[1].addr32[0] < b->addr[1].addr32[0])
624 return (-1);
625 break;
626#endif /* INET */
627#ifdef INET6
628 case AF_INET6:
629 if (a->addr[0].addr32[3] > b->addr[0].addr32[3])
630 return (1);
631 if (a->addr[0].addr32[3] < b->addr[0].addr32[3])
632 return (-1);
633 if (a->addr[1].addr32[3] > b->addr[1].addr32[3])
634 return (1);
635 if (a->addr[1].addr32[3] < b->addr[1].addr32[3])
636 return (-1);
637 if (a->addr[0].addr32[2] > b->addr[0].addr32[2])
638 return (1);
639 if (a->addr[0].addr32[2] < b->addr[0].addr32[2])
640 return (-1);
641 if (a->addr[1].addr32[2] > b->addr[1].addr32[2])
642 return (1);
643 if (a->addr[1].addr32[2] < b->addr[1].addr32[2])
644 return (-1);
645 if (a->addr[0].addr32[1] > b->addr[0].addr32[1])
646 return (1);
647 if (a->addr[0].addr32[1] < b->addr[0].addr32[1])
648 return (-1);
649 if (a->addr[1].addr32[1] > b->addr[1].addr32[1])
650 return (1);
651 if (a->addr[1].addr32[1] < b->addr[1].addr32[1])
652 return (-1);
653 if (a->addr[0].addr32[0] > b->addr[0].addr32[0])
654 return (1);
655 if (a->addr[0].addr32[0] < b->addr[0].addr32[0])
656 return (-1);
657 if (a->addr[1].addr32[0] > b->addr[1].addr32[0])
658 return (1);
659 if (a->addr[1].addr32[0] < b->addr[1].addr32[0])
660 return (-1);
661 break;
662#endif /* INET6 */
315a7da3 663 }
ed1f0be2
JL
664
665 if ((diff = a->port[0] - b->port[0]) != 0)
666 return (diff);
667 if ((diff = a->port[1] - b->port[1]) != 0)
668 return (diff);
669
670 return (0);
671}
672
673static __inline int
674pf_state_compare_id(struct pf_state *a, struct pf_state *b)
675{
676 if (a->id > b->id)
677 return (1);
678 if (a->id < b->id)
679 return (-1);
680 if (a->creatorid > b->creatorid)
681 return (1);
682 if (a->creatorid < b->creatorid)
683 return (-1);
684
685 return (0);
315a7da3
JL
686}
687
02742ec6 688int
ed1f0be2 689pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx)
02742ec6 690{
ed1f0be2
JL
691 struct pf_state_item *si;
692 struct pf_state_key *cur;
315a7da3 693
ed1f0be2 694 KKASSERT(s->key[idx] == NULL); /* XXX handle this? */
315a7da3 695
ed1f0be2 696 if ((cur = RB_INSERT(pf_state_tree, &pf_statetbl, sk)) != NULL) {
315a7da3 697 /* key exists. check for same kif, if none, add to key */
ed1f0be2
JL
698 TAILQ_FOREACH(si, &cur->states, entry)
699 if (si->s->kif == s->kif &&
700 si->s->direction == s->direction) {
701 if (pf_status.debug >= PF_DEBUG_MISC) {
702 kprintf(
703 "pf: %s key attach failed on %s: ",
704 (idx == PF_SK_WIRE) ?
705 "wire" : "stack",
706 s->kif->pfik_name);
707 pf_print_state_parts(s,
708 (idx == PF_SK_WIRE) ? sk : NULL,
709 (idx == PF_SK_STACK) ? sk : NULL);
710 kprintf("\n");
711 }
1186cbc0 712 kfree(sk, M_PFSTATEKEYPL);
ed1f0be2 713 return (-1); /* collision! */
315a7da3 714 }
1186cbc0
JL
715 kfree(sk, M_PFSTATEKEYPL);
716
ed1f0be2
JL
717 s->key[idx] = cur;
718 } else
719 s->key[idx] = sk;
02742ec6 720
1186cbc0 721 if ((si = kmalloc(sizeof(struct pf_state_item), M_PFSTATEITEMPL, M_NOWAIT)) == NULL) {
ed1f0be2 722 pf_state_key_detach(s, idx);
02742ec6
JS
723 return (-1);
724 }
ed1f0be2 725 si->s = s;
02742ec6 726
ed1f0be2
JL
727 /* list is sorted, if-bound states before floating */
728 if (s->kif == pfi_all)
729 TAILQ_INSERT_TAIL(&s->key[idx]->states, si, entry);
730 else
731 TAILQ_INSERT_HEAD(&s->key[idx]->states, si, entry);
02742ec6
JS
732 return (0);
733}
734
735void
ed1f0be2 736pf_detach_state(struct pf_state *s)
02742ec6 737{
ed1f0be2
JL
738 if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK])
739 s->key[PF_SK_WIRE] = NULL;
70224baa 740
ed1f0be2
JL
741 if (s->key[PF_SK_STACK] != NULL)
742 pf_state_key_detach(s, PF_SK_STACK);
743
744 if (s->key[PF_SK_WIRE] != NULL)
745 pf_state_key_detach(s, PF_SK_WIRE);
746}
747
748void
749pf_state_key_detach(struct pf_state *s, int idx)
750{
751 struct pf_state_item *si;
ed1f0be2
JL
752 si = TAILQ_FIRST(&s->key[idx]->states);
753 while (si && si->s != s)
754 si = TAILQ_NEXT(si, entry);
755
756 if (si) {
757 TAILQ_REMOVE(&s->key[idx]->states, si, entry);
1186cbc0 758 kfree(si, M_PFSTATEITEMPL);
ed1f0be2
JL
759 }
760
761 if (TAILQ_EMPTY(&s->key[idx]->states)) {
762 RB_REMOVE(pf_state_tree, &pf_statetbl, s->key[idx]);
763 if (s->key[idx]->reverse)
764 s->key[idx]->reverse->reverse = NULL;
765 if (s->key[idx]->inp)
766 s->key[idx]->inp->inp_pf_sk = NULL;
1186cbc0 767 kfree(s->key[idx], M_PFSTATEKEYPL);
ed1f0be2
JL
768 }
769 s->key[idx] = NULL;
770}
771
772struct pf_state_key *
773pf_alloc_state_key(int pool_flags)
774{
775 struct pf_state_key *sk;
776
1186cbc0
JL
777 if ((sk = kmalloc(sizeof(struct pf_state_key), M_PFSTATEKEYPL, pool_flags)) == NULL)
778 return (NULL);
ed1f0be2
JL
779 TAILQ_INIT(&sk->states);
780
781 return (sk);
782}
783
784int
785pf_state_key_setup(struct pf_pdesc *pd, struct pf_rule *nr,
786 struct pf_state_key **skw, struct pf_state_key **sks,
787 struct pf_state_key **skp, struct pf_state_key **nkp,
788 struct pf_addr *saddr, struct pf_addr *daddr,
789 u_int16_t sport, u_int16_t dport)
790{
791 KKASSERT((*skp == NULL && *nkp == NULL));
792
1186cbc0 793 if ((*skp = pf_alloc_state_key(M_NOWAIT | M_ZERO)) == NULL)
ed1f0be2
JL
794 return (ENOMEM);
795
796 PF_ACPY(&(*skp)->addr[pd->sidx], saddr, pd->af);
797 PF_ACPY(&(*skp)->addr[pd->didx], daddr, pd->af);
798 (*skp)->port[pd->sidx] = sport;
799 (*skp)->port[pd->didx] = dport;
800 (*skp)->proto = pd->proto;
801 (*skp)->af = pd->af;
802
803 if (nr != NULL) {
1186cbc0 804 if ((*nkp = pf_alloc_state_key(M_NOWAIT | M_ZERO)) == NULL)
ed1f0be2
JL
805 return (ENOMEM); /* caller must handle cleanup */
806
807 /* XXX maybe just bcopy and TAILQ_INIT(&(*nkp)->states) */
808 PF_ACPY(&(*nkp)->addr[0], &(*skp)->addr[0], pd->af);
809 PF_ACPY(&(*nkp)->addr[1], &(*skp)->addr[1], pd->af);
810 (*nkp)->port[0] = (*skp)->port[0];
811 (*nkp)->port[1] = (*skp)->port[1];
812 (*nkp)->proto = pd->proto;
813 (*nkp)->af = pd->af;
814 } else
815 *nkp = *skp;
816
817 if (pd->dir == PF_IN) {
818 *skw = *skp;
819 *sks = *nkp;
820 } else {
821 *sks = *skp;
822 *skw = *nkp;
823 }
824 return (0);
825}
826
827
828int
829pf_state_insert(struct pfi_kif *kif, struct pf_state_key *skw,
830 struct pf_state_key *sks, struct pf_state *s)
831{
832 s->kif = kif;
833
834 if (skw == sks) {
835 if (pf_state_key_attach(skw, s, PF_SK_WIRE))
836 return (-1);
837 s->key[PF_SK_STACK] = s->key[PF_SK_WIRE];
838 } else {
839 if (pf_state_key_attach(skw, s, PF_SK_WIRE)) {
1186cbc0 840 kfree(sks, M_PFSTATEKEYPL);
ed1f0be2
JL
841 return (-1);
842 }
843 if (pf_state_key_attach(sks, s, PF_SK_STACK)) {
844 pf_state_key_detach(s, PF_SK_WIRE);
845 return (-1);
846 }
847 }
848
849 if (s->id == 0 && s->creatorid == 0) {
850 s->id = htobe64(pf_status.stateid++);
851 s->creatorid = pf_status.hostid;
852 }
14dd43dc
MD
853
854 /*
855 * Calculate hash code for altq
856 */
857 s->hash = crc32(s->key[PF_SK_WIRE], sizeof(*sks));
858
ed1f0be2
JL
859 if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) {
860 if (pf_status.debug >= PF_DEBUG_MISC) {
861 kprintf("pf: state insert failed: "
862 "id: %016jx creatorid: %08x",
863 (uintmax_t)be64toh(s->id), ntohl(s->creatorid));
864 if (s->sync_flags & PFSTATE_FROMSYNC)
865 kprintf(" (from sync)");
866 kprintf("\n");
867 }
868 pf_detach_state(s);
869 return (-1);
870 }
871 TAILQ_INSERT_TAIL(&state_list, s, entry_list);
872 pf_status.fcounters[FCNT_STATE_INSERT]++;
873 pf_status.states++;
874 pfi_kif_ref(kif, PFI_KIF_REF_STATE);
875 pfsync_insert_state(s);
876 return (0);
877}
878
879struct pf_state *
880pf_find_state_byid(struct pf_state_cmp *key)
881{
882 pf_status.fcounters[FCNT_STATE_SEARCH]++;
883
884 return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key));
885}
886
887struct pf_state *
888pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir,
889 struct mbuf *m)
890{
891 struct pf_state_key *sk;
892 struct pf_state_item *si;
893
894 pf_status.fcounters[FCNT_STATE_SEARCH]++;
895
896 if (dir == PF_OUT && m->m_pkthdr.pf.statekey &&
897 ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse)
898 sk = ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse;
899 else {
900 if ((sk = RB_FIND(pf_state_tree, &pf_statetbl,
901 (struct pf_state_key *)key)) == NULL)
902 return (NULL);
903 if (dir == PF_OUT && m->m_pkthdr.pf.statekey) {
904 ((struct pf_state_key *)
905 m->m_pkthdr.pf.statekey)->reverse = sk;
906 sk->reverse = m->m_pkthdr.pf.statekey;
907 }
908 }
909
910 if (dir == PF_OUT)
911 m->m_pkthdr.pf.statekey = NULL;
912
913 /* list is sorted, if-bound states before floating ones */
914 TAILQ_FOREACH(si, &sk->states, entry)
915 if ((si->s->kif == pfi_all || si->s->kif == kif) &&
916 sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] :
917 si->s->key[PF_SK_STACK]))
918 return (si->s);
919
920 return (NULL);
921}
922
923struct pf_state *
924pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more)
925{
926 struct pf_state_key *sk;
927 struct pf_state_item *si, *ret = NULL;
928
929 pf_status.fcounters[FCNT_STATE_SEARCH]++;
930
931 sk = RB_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key);
932
933 if (sk != NULL) {
934 TAILQ_FOREACH(si, &sk->states, entry)
935 if (dir == PF_INOUT ||
936 (sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] :
937 si->s->key[PF_SK_STACK]))) {
938 if (more == NULL)
939 return (si->s);
940
941 if (ret)
942 (*more)++;
943 else
944 ret = si;
945 }
946 }
947 return (ret ? ret->s : NULL);
948}
949
950/* END state table stuff */
951
952
953void
954pf_purge_thread(void *v)
955{
956 int nloops = 0;
957 int locked = 0;
958
959 lwkt_gettoken(&pf_token);
960 for (;;) {
961 tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz);
70224baa
JL
962
963 lockmgr(&pf_consistency_lock, LK_EXCLUSIVE);
964
965 if (pf_end_threads) {
1e37b5df 966 pf_purge_expired_states(pf_status.states, 0);
70224baa
JL
967 pf_purge_expired_fragments();
968 pf_purge_expired_src_nodes(1);
969 pf_end_threads++;
970
971 lockmgr(&pf_consistency_lock, LK_RELEASE);
972 wakeup(pf_purge_thread);
973 kthread_exit();
974 }
975 crit_enter();
976
977 /* process a fraction of the state table every second */
978 if(!pf_purge_expired_states(1 + (pf_status.states
979 / pf_default_rule.timeout[PFTM_INTERVAL]), 0)) {
02742ec6 980
70224baa
JL
981 pf_purge_expired_states(1 + (pf_status.states
982 / pf_default_rule.timeout[PFTM_INTERVAL]), 1);
983 }
02742ec6 984
70224baa
JL
985 /* purge other expired types every PFTM_INTERVAL seconds */
986 if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
987 pf_purge_expired_fragments();
988 if (!pf_purge_expired_src_nodes(locked)) {
989 pf_purge_expired_src_nodes(1);
990 }
991 nloops = 0;
992 }
993 crit_exit();
994 lockmgr(&pf_consistency_lock, LK_RELEASE);
995 }
aa1da187 996 lwkt_reltoken(&pf_token);
02742ec6
JS
997}
998
999u_int32_t
1000pf_state_expires(const struct pf_state *state)
1001{
1002 u_int32_t timeout;
1003 u_int32_t start;
1004 u_int32_t end;
1005 u_int32_t states;
1006
1007 /* handle all PFTM_* > PFTM_MAX here */
1008 if (state->timeout == PFTM_PURGE)
1009 return (time_second);
1010 if (state->timeout == PFTM_UNTIL_PACKET)
1011 return (0);
70224baa 1012 KKASSERT(state->timeout != PFTM_UNLINKED);
ed1f0be2 1013 KKASSERT(state->timeout < PFTM_MAX);
02742ec6
JS
1014 timeout = state->rule.ptr->timeout[state->timeout];
1015 if (!timeout)
1016 timeout = pf_default_rule.timeout[state->timeout];
1017 start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
1018 if (start) {
1019 end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
ed1f0be2 1020 states = state->rule.ptr->states_cur;
02742ec6
JS
1021 } else {
1022 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
1023 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
1024 states = pf_status.states;
1025 }
1026 if (end && states > start && start < end) {
1027 if (states < end)
1028 return (state->expire + timeout * (end - states) /
1029 (end - start));
1030 else
1031 return (time_second);
1032 }
1033 return (state->expire + timeout);
1034}
1035
70224baa
JL
1036int
1037pf_purge_expired_src_nodes(int waslocked)
02742ec6
JS
1038{
1039 struct pf_src_node *cur, *next;
70224baa 1040 int locked = waslocked;
02742ec6
JS
1041
1042 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
1043 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
1044
1045 if (cur->states <= 0 && cur->expire <= time_second) {
70224baa
JL
1046 if (! locked) {
1047 lockmgr(&pf_consistency_lock, LK_EXCLUSIVE);
1048 next = RB_NEXT(pf_src_tree,
1049 &tree_src_tracking, cur);
1050 locked = 1;
1051 }
02742ec6
JS
1052 if (cur->rule.ptr != NULL) {
1053 cur->rule.ptr->src_nodes--;
ed1f0be2 1054 if (cur->rule.ptr->states_cur <= 0 &&
02742ec6
JS
1055 cur->rule.ptr->max_src_nodes <= 0)
1056 pf_rm_rule(NULL, cur->rule.ptr);
1057 }
1058 RB_REMOVE(pf_src_tree, &tree_src_tracking, cur);
1059 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
1060 pf_status.src_nodes--;
1186cbc0 1061 kfree(cur, M_PFSRCTREEPL);
02742ec6
JS
1062 }
1063 }
70224baa
JL
1064
1065 if (locked && !waslocked)
1066 lockmgr(&pf_consistency_lock, LK_RELEASE);
1067 return(1);
02742ec6
JS
1068}
1069
1070void
1071pf_src_tree_remove_state(struct pf_state *s)
1072{
1073 u_int32_t timeout;
1074
1075 if (s->src_node != NULL) {
05ac5751
JL
1076 if (s->src.tcp_est)
1077 --s->src_node->conn;
02742ec6
JS
1078 if (--s->src_node->states <= 0) {
1079 timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
1080 if (!timeout)
1081 timeout =
1082 pf_default_rule.timeout[PFTM_SRC_NODE];
1083 s->src_node->expire = time_second + timeout;
1084 }
1085 }
1086 if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
1087 if (--s->nat_src_node->states <= 0) {
1088 timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
1089 if (!timeout)
1090 timeout =
1091 pf_default_rule.timeout[PFTM_SRC_NODE];
1092 s->nat_src_node->expire = time_second + timeout;
1093 }
1094 }
1095 s->src_node = s->nat_src_node = NULL;
1096}
1097
70224baa
JL
1098/* callers should be at crit_enter() */
1099void
1100pf_unlink_state(struct pf_state *cur)
02742ec6 1101{
70224baa 1102 if (cur->src.state == PF_TCPS_PROXY_DST) {
ed1f0be2
JL
1103 /* XXX wire key the right one? */
1104 pf_send_tcp(cur->rule.ptr, cur->key[PF_SK_WIRE]->af,
1105 &cur->key[PF_SK_WIRE]->addr[1],
1106 &cur->key[PF_SK_WIRE]->addr[0],
1107 cur->key[PF_SK_WIRE]->port[1],
1108 cur->key[PF_SK_WIRE]->port[0],
70224baa
JL
1109 cur->src.seqhi, cur->src.seqlo + 1,
1110 TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL);
1111 }
70224baa 1112 RB_REMOVE(pf_state_tree_id, &tree_id, cur);
70224baa 1113 if (cur->creatorid == pf_status.hostid)
f0ea6854 1114 pfsync_delete_state(cur);
70224baa
JL
1115 cur->timeout = PFTM_UNLINKED;
1116 pf_src_tree_remove_state(cur);
ed1f0be2 1117 pf_detach_state(cur);
02742ec6
JS
1118}
1119
aa1da187
MD
1120static struct pf_state *purge_cur;
1121
70224baa
JL
1122/* callers should be at crit_enter() and hold the
1123 * write_lock on pf_consistency_lock */
f0ea6854 1124void
70224baa 1125pf_free_state(struct pf_state *cur)
f0ea6854 1126{
70224baa
JL
1127 if (pfsyncif != NULL &&
1128 (pfsyncif->sc_bulk_send_next == cur ||
1129 pfsyncif->sc_bulk_terminator == cur))
1130 return;
70224baa 1131 KKASSERT(cur->timeout == PFTM_UNLINKED);
ed1f0be2 1132 if (--cur->rule.ptr->states_cur <= 0 &&
70224baa
JL
1133 cur->rule.ptr->src_nodes <= 0)
1134 pf_rm_rule(NULL, cur->rule.ptr);
1135 if (cur->nat_rule.ptr != NULL)
ed1f0be2 1136 if (--cur->nat_rule.ptr->states_cur <= 0 &&
70224baa
JL
1137 cur->nat_rule.ptr->src_nodes <= 0)
1138 pf_rm_rule(NULL, cur->nat_rule.ptr);
1139 if (cur->anchor.ptr != NULL)
ed1f0be2 1140 if (--cur->anchor.ptr->states_cur <= 0)
70224baa
JL
1141 pf_rm_rule(NULL, cur->anchor.ptr);
1142 pf_normalize_tcp_cleanup(cur);
315a7da3 1143 pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE);
aa1da187
MD
1144
1145 /*
1146 * We may be freeing pf_purge_expired_states()'s saved scan entry,
1147 * adjust it if necessary.
1148 */
1149 if (purge_cur == cur) {
1150 kprintf("PURGE CONFLICT\n");
1151 purge_cur = TAILQ_NEXT(purge_cur, entry_list);
1152 }
315a7da3 1153 TAILQ_REMOVE(&state_list, cur, entry_list);
70224baa
JL
1154 if (cur->tag)
1155 pf_tag_unref(cur->tag);
1186cbc0 1156 kfree(cur, M_PFSTATEPL);
70224baa
JL
1157 pf_status.fcounters[FCNT_STATE_REMOVALS]++;
1158 pf_status.states--;
f0ea6854
MD
1159}
1160
70224baa
JL
1161int
1162pf_purge_expired_states(u_int32_t maxcheck, int waslocked)
1163{
aa1da187 1164 struct pf_state *cur;
70224baa
JL
1165 int locked = waslocked;
1166
1167 while (maxcheck--) {
aa1da187
MD
1168 /*
1169 * Wrap to start of list when we hit the end
1170 */
1171 cur = purge_cur;
70224baa
JL
1172 if (cur == NULL) {
1173 cur = TAILQ_FIRST(&state_list);
1174 if (cur == NULL)
1175 break; /* list empty */
1176 }
1177
aa1da187
MD
1178 /*
1179 * Setup next (purge_cur) while we process this one. If we block and
1180 * something else deletes purge_cur, pf_free_state() will adjust it further
1181 * ahead.
1182 */
1183 purge_cur = TAILQ_NEXT(cur, entry_list);
70224baa
JL
1184
1185 if (cur->timeout == PFTM_UNLINKED) {
1186 /* free unlinked state */
1187 if (! locked) {
1188 lockmgr(&pf_consistency_lock, LK_EXCLUSIVE);
1189 locked = 1;
1190 }
1191 pf_free_state(cur);
1192 } else if (pf_state_expires(cur) <= time_second) {
1193 /* unlink and free expired state */
1194 pf_unlink_state(cur);
1195 if (! locked) {
1196 if (!lockmgr(&pf_consistency_lock, LK_EXCLUSIVE))
1197 return (0);
1198 locked = 1;
1199 }
1200 pf_free_state(cur);
1201 }
70224baa
JL
1202 }
1203
1204 if (locked)
1205 lockmgr(&pf_consistency_lock, LK_RELEASE);
1206 return (1);
1207}
f0ea6854 1208
02742ec6
JS
1209int
1210pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
1211{
1212 if (aw->type != PF_ADDR_TABLE)
1213 return (0);
1214 if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL)
1215 return (1);
1216 return (0);
1217}
1218
1219void
1220pf_tbladdr_remove(struct pf_addr_wrap *aw)
1221{
1222 if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL)
1223 return;
1224 pfr_detach_table(aw->p.tbl);
1225 aw->p.tbl = NULL;
1226}
1227
1228void
1229pf_tbladdr_copyout(struct pf_addr_wrap *aw)
1230{
1231 struct pfr_ktable *kt = aw->p.tbl;
1232
1233 if (aw->type != PF_ADDR_TABLE || kt == NULL)
1234 return;
1235 if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
1236 kt = kt->pfrkt_root;
1237 aw->p.tbl = NULL;
1238 aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
1239 kt->pfrkt_cnt : -1;
1240}
1241
1242void
1243pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
1244{
1245 switch (af) {
1246#ifdef INET
1247 case AF_INET: {
1248 u_int32_t a = ntohl(addr->addr32[0]);
4b1cf444 1249 kprintf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
02742ec6
JS
1250 (a>>8)&255, a&255);
1251 if (p) {
1252 p = ntohs(p);
4b1cf444 1253 kprintf(":%u", p);
02742ec6
JS
1254 }
1255 break;
1256 }
1257#endif /* INET */
1258#ifdef INET6
1259 case AF_INET6: {
1260 u_int16_t b;
1261 u_int8_t i, curstart = 255, curend = 0,
1262 maxstart = 0, maxend = 0;
1263 for (i = 0; i < 8; i++) {
1264 if (!addr->addr16[i]) {
1265 if (curstart == 255)
1266 curstart = i;
1267 else
1268 curend = i;
1269 } else {
1270 if (curstart) {
1271 if ((curend - curstart) >
1272 (maxend - maxstart)) {
1273 maxstart = curstart;
1274 maxend = curend;
1275 curstart = 255;
1276 }
1277 }
1278 }
1279 }
1280 for (i = 0; i < 8; i++) {
1281 if (i >= maxstart && i <= maxend) {
1282 if (maxend != 7) {
1283 if (i == maxstart)
4b1cf444 1284 kprintf(":");
02742ec6
JS
1285 } else {
1286 if (i == maxend)
4b1cf444 1287 kprintf(":");
02742ec6
JS
1288 }
1289 } else {
1290 b = ntohs(addr->addr16[i]);
4b1cf444 1291 kprintf("%x", b);
02742ec6 1292 if (i < 7)
4b1cf444 1293 kprintf(":");
02742ec6
JS
1294 }
1295 }
1296 if (p) {
1297 p = ntohs(p);
4b1cf444 1298 kprintf("[%u]", p);
02742ec6
JS
1299 }
1300 break;
1301 }
1302#endif /* INET6 */
1303 }
1304}
1305
1306void
1307pf_print_state(struct pf_state *s)
1308{
ed1f0be2
JL
1309 pf_print_state_parts(s, NULL, NULL);
1310}
1311
1312void
1313pf_print_state_parts(struct pf_state *s,
1314 struct pf_state_key *skwp, struct pf_state_key *sksp)
1315{
1316 struct pf_state_key *skw, *sks;
1317 u_int8_t proto, dir;
1318
1319 /* Do our best to fill these, but they're skipped if NULL */
1320 skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL);
1321 sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL);
1322 proto = skw ? skw->proto : (sks ? sks->proto : 0);
1323 dir = s ? s->direction : 0;
1324
1325 switch (proto) {
02742ec6 1326 case IPPROTO_TCP:
4b1cf444 1327 kprintf("TCP ");
02742ec6
JS
1328 break;
1329 case IPPROTO_UDP:
4b1cf444 1330 kprintf("UDP ");
02742ec6
JS
1331 break;
1332 case IPPROTO_ICMP:
4b1cf444 1333 kprintf("ICMP ");
02742ec6
JS
1334 break;
1335 case IPPROTO_ICMPV6:
4b1cf444 1336 kprintf("ICMPV6 ");
02742ec6
JS
1337 break;
1338 default:
ed1f0be2
JL
1339 kprintf("%u ", skw->proto);
1340 break;
1341 }
1342 switch (dir) {
1343 case PF_IN:
1344 kprintf(" in");
1345 break;
1346 case PF_OUT:
1347 kprintf(" out");
02742ec6
JS
1348 break;
1349 }
ed1f0be2
JL
1350 if (skw) {
1351 kprintf(" wire: ");
1352 pf_print_host(&skw->addr[0], skw->port[0], skw->af);
1353 kprintf(" ");
1354 pf_print_host(&skw->addr[1], skw->port[1], skw->af);
1355 }
1356 if (sks) {
1357 kprintf(" stack: ");
1358 if (sks != skw) {
1359 pf_print_host(&sks->addr[0], sks->port[0], sks->af);
1360 kprintf(" ");
1361 pf_print_host(&sks->addr[1], sks->port[1], sks->af);
1362 } else
1363 kprintf("-");
1364 }
1365 if (s) {
1366 if (proto == IPPROTO_TCP) {
1367 kprintf(" [lo=%u high=%u win=%u modulator=%u",
1368 s->src.seqlo, s->src.seqhi,
1369 s->src.max_win, s->src.seqdiff);
1370 if (s->src.wscale && s->dst.wscale)
1371 kprintf(" wscale=%u",
1372 s->src.wscale & PF_WSCALE_MASK);
1373 kprintf("]");
1374 kprintf(" [lo=%u high=%u win=%u modulator=%u",
1375 s->dst.seqlo, s->dst.seqhi,
1376 s->dst.max_win, s->dst.seqdiff);
1377 if (s->src.wscale && s->dst.wscale)
1378 kprintf(" wscale=%u",
1379 s->dst.wscale & PF_WSCALE_MASK);
1380 kprintf("]");
1381 }
1382 kprintf(" %u:%u", s->src.state, s->dst.state);
1383 }
02742ec6
JS
1384}
1385
1386void
1387pf_print_flags(u_int8_t f)
1388{
1389 if (f)
4b1cf444 1390 kprintf(" ");
02742ec6 1391 if (f & TH_FIN)
4b1cf444 1392 kprintf("F");
02742ec6 1393 if (f & TH_SYN)
4b1cf444 1394 kprintf("S");
02742ec6 1395 if (f & TH_RST)
4b1cf444 1396 kprintf("R");
02742ec6 1397 if (f & TH_PUSH)
4b1cf444 1398 kprintf("P");
02742ec6 1399 if (f & TH_ACK)
4b1cf444 1400 kprintf("A");
02742ec6 1401 if (f & TH_URG)
4b1cf444 1402 kprintf("U");
02742ec6 1403 if (f & TH_ECE)
4b1cf444 1404 kprintf("E");
02742ec6 1405 if (f & TH_CWR)
4b1cf444 1406 kprintf("W");
02742ec6
JS
1407}
1408
1409#define PF_SET_SKIP_STEPS(i) \
1410 do { \
1411 while (head[i] != cur) { \
1412 head[i]->skip[i].ptr = cur; \
1413 head[i] = TAILQ_NEXT(head[i], entries); \
1414 } \
1415 } while (0)
1416
1417void
1418pf_calc_skip_steps(struct pf_rulequeue *rules)
1419{
1420 struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
1421 int i;
1422
1423 cur = TAILQ_FIRST(rules);
1424 prev = cur;
1425 for (i = 0; i < PF_SKIP_COUNT; ++i)
1426 head[i] = cur;
1427 while (cur != NULL) {
1428
1429 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
1430 PF_SET_SKIP_STEPS(PF_SKIP_IFP);
1431 if (cur->direction != prev->direction)
1432 PF_SET_SKIP_STEPS(PF_SKIP_DIR);
1433 if (cur->af != prev->af)
1434 PF_SET_SKIP_STEPS(PF_SKIP_AF);
1435 if (cur->proto != prev->proto)
1436 PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
70224baa 1437 if (cur->src.neg != prev->src.neg ||
02742ec6
JS
1438 pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
1439 PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
1440 if (cur->src.port[0] != prev->src.port[0] ||
1441 cur->src.port[1] != prev->src.port[1] ||
1442 cur->src.port_op != prev->src.port_op)
1443 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
70224baa 1444 if (cur->dst.neg != prev->dst.neg ||
02742ec6
JS
1445 pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
1446 PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
1447 if (cur->dst.port[0] != prev->dst.port[0] ||
1448 cur->dst.port[1] != prev->dst.port[1] ||
1449 cur->dst.port_op != prev->dst.port_op)
1450 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
1451
1452 prev = cur;
1453 cur = TAILQ_NEXT(cur, entries);
1454 }
1455 for (i = 0; i < PF_SKIP_COUNT; ++i)
1456 PF_SET_SKIP_STEPS(i);
1457}
1458
1459int
1460pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
1461{
1462 if (aw1->type != aw2->type)
1463 return (1);
1464 switch (aw1->type) {
1465 case PF_ADDR_ADDRMASK:
ed1f0be2 1466 case PF_ADDR_RANGE:
02742ec6
JS
1467 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0))
1468 return (1);
1469 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0))
1470 return (1);
1471 return (0);
1472 case PF_ADDR_DYNIFTL:
1473 return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
1474 case PF_ADDR_NOROUTE:
70224baa 1475 case PF_ADDR_URPFFAILED:
02742ec6
JS
1476 return (0);
1477 case PF_ADDR_TABLE:
1478 return (aw1->p.tbl != aw2->p.tbl);
70224baa
JL
1479 case PF_ADDR_RTLABEL:
1480 return (aw1->v.rtlabel != aw2->v.rtlabel);
02742ec6 1481 default:
4b1cf444 1482 kprintf("invalid address type: %d\n", aw1->type);
02742ec6
JS
1483 return (1);
1484 }
1485}
1486
02742ec6
JS
1487u_int16_t
1488pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
1489{
1490 u_int32_t l;
1491
1492 if (udp && !cksum)
1493 return (0x0000);
1494 l = cksum + old - new;
1495 l = (l >> 16) + (l & 65535);
1496 l = l & 65535;
1497 if (udp && !l)
1498 return (0xFFFF);
1499 return (l);
1500}
1501
1502void
1503pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc,
1504 struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af)
1505{
1506 struct pf_addr ao;
1507 u_int16_t po = *p;
1508
1509 PF_ACPY(&ao, a, af);
1510 PF_ACPY(a, an, af);
1511
1512 *p = pn;
1513
1514 switch (af) {
1515#ifdef INET
1516 case AF_INET:
1517 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1518 ao.addr16[0], an->addr16[0], 0),
1519 ao.addr16[1], an->addr16[1], 0);
1520 *p = pn;
1521 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1522 ao.addr16[0], an->addr16[0], u),
1523 ao.addr16[1], an->addr16[1], u),
1524 po, pn, u);
1525 break;
1526#endif /* INET */
1527#ifdef INET6
1528 case AF_INET6:
1529 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1530 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1531 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1532 ao.addr16[0], an->addr16[0], u),
1533 ao.addr16[1], an->addr16[1], u),
1534 ao.addr16[2], an->addr16[2], u),
1535 ao.addr16[3], an->addr16[3], u),
1536 ao.addr16[4], an->addr16[4], u),
1537 ao.addr16[5], an->addr16[5], u),
1538 ao.addr16[6], an->addr16[6], u),
1539 ao.addr16[7], an->addr16[7], u),
1540 po, pn, u);
1541 break;
1542#endif /* INET6 */
1543 }
1544}
1545
1546
1547/* Changes a u_int32_t. Uses a void * so there are no align restrictions */
1548void
1549pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
1550{
1551 u_int32_t ao;
1552
1553 memcpy(&ao, a, sizeof(ao));
1554 memcpy(a, &an, sizeof(u_int32_t));
1555 *c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
1556 ao % 65536, an % 65536, u);
1557}
1558
1559#ifdef INET6
1560void
1561pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
1562{
1563 struct pf_addr ao;
1564
1565 PF_ACPY(&ao, a, AF_INET6);
1566 PF_ACPY(a, an, AF_INET6);
1567
1568 *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1569 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1570 pf_cksum_fixup(pf_cksum_fixup(*c,
1571 ao.addr16[0], an->addr16[0], u),
1572 ao.addr16[1], an->addr16[1], u),
1573 ao.addr16[2], an->addr16[2], u),
1574 ao.addr16[3], an->addr16[3], u),
1575 ao.addr16[4], an->addr16[4], u),
1576 ao.addr16[5], an->addr16[5], u),
1577 ao.addr16[6], an->addr16[6], u),
1578 ao.addr16[7], an->addr16[7], u);
1579}
1580#endif /* INET6 */
1581
1582void
1583pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
1584 struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
1585 u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
1586{
1587 struct pf_addr oia, ooa;
1588
1589 PF_ACPY(&oia, ia, af);
ed1f0be2
JL
1590 if (oa)
1591 PF_ACPY(&ooa, oa, af);
02742ec6
JS
1592
1593 /* Change inner protocol port, fix inner protocol checksum. */
1594 if (ip != NULL) {
1595 u_int16_t oip = *ip;
1596 u_int32_t opc = 0;
1597
1598 if (pc != NULL)
1599 opc = *pc;
1600 *ip = np;
1601 if (pc != NULL)
1602 *pc = pf_cksum_fixup(*pc, oip, *ip, u);
1603 *ic = pf_cksum_fixup(*ic, oip, *ip, 0);
1604 if (pc != NULL)
1605 *ic = pf_cksum_fixup(*ic, opc, *pc, 0);
1606 }
1607 /* Change inner ip address, fix inner ip and icmp checksums. */
1608 PF_ACPY(ia, na, af);
1609 switch (af) {
1610#ifdef INET
1611 case AF_INET: {
1612 u_int32_t oh2c = *h2c;
1613
1614 *h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
1615 oia.addr16[0], ia->addr16[0], 0),
1616 oia.addr16[1], ia->addr16[1], 0);
1617 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1618 oia.addr16[0], ia->addr16[0], 0),
1619 oia.addr16[1], ia->addr16[1], 0);
1620 *ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
1621 break;
1622 }
1623#endif /* INET */
1624#ifdef INET6
1625 case AF_INET6:
1626 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1627 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1628 pf_cksum_fixup(pf_cksum_fixup(*ic,
1629 oia.addr16[0], ia->addr16[0], u),
1630 oia.addr16[1], ia->addr16[1], u),
1631 oia.addr16[2], ia->addr16[2], u),
1632 oia.addr16[3], ia->addr16[3], u),
1633 oia.addr16[4], ia->addr16[4], u),
1634 oia.addr16[5], ia->addr16[5], u),
1635 oia.addr16[6], ia->addr16[6], u),
1636 oia.addr16[7], ia->addr16[7], u);
1637 break;
1638#endif /* INET6 */
1639 }
ed1f0be2
JL
1640 /* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */
1641 if (oa) {
1642 PF_ACPY(oa, na, af);
1643 switch (af) {
02742ec6 1644#ifdef INET
ed1f0be2
JL
1645 case AF_INET:
1646 *hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
1647 ooa.addr16[0], oa->addr16[0], 0),
1648 ooa.addr16[1], oa->addr16[1], 0);
1649 break;
02742ec6
JS
1650#endif /* INET */
1651#ifdef INET6
ed1f0be2
JL
1652 case AF_INET6:
1653 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1654 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1655 pf_cksum_fixup(pf_cksum_fixup(*ic,
1656 ooa.addr16[0], oa->addr16[0], u),
1657 ooa.addr16[1], oa->addr16[1], u),
1658 ooa.addr16[2], oa->addr16[2], u),
1659 ooa.addr16[3], oa->addr16[3], u),
1660 ooa.addr16[4], oa->addr16[4], u),
1661 ooa.addr16[5], oa->addr16[5], u),
1662 ooa.addr16[6], oa->addr16[6], u),
1663 ooa.addr16[7], oa->addr16[7], u);
1664 break;
02742ec6 1665#endif /* INET6 */
ed1f0be2 1666 }
02742ec6
JS
1667 }
1668}
1669
70224baa
JL
1670
1671/*
1672 * Need to modulate the sequence numbers in the TCP SACK option
1673 * (credits to Krzysztof Pfaff for report and patch)
1674 */
1675int
1676pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
1677 struct tcphdr *th, struct pf_state_peer *dst)
1678{
1679 int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
1680 u_int8_t opts[TCP_MAXOLEN], *opt = opts;
1681 int copyback = 0, i, olen;
1682 struct raw_sackblock sack;
1683
1684#define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2)
1685 if (hlen < TCPOLEN_SACKLEN ||
1686 !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af))
1687 return 0;
1688
1689 while (hlen >= TCPOLEN_SACKLEN) {
1690 olen = opt[1];
1691 switch (*opt) {
1692 case TCPOPT_EOL: /* FALLTHROUGH */
1693 case TCPOPT_NOP:
1694 opt++;
1695 hlen--;
1696 break;
1697 case TCPOPT_SACK:
1698 if (olen > hlen)
1699 olen = hlen;
1700 if (olen >= TCPOLEN_SACKLEN) {
1701 for (i = 2; i + TCPOLEN_SACK <= olen;
1702 i += TCPOLEN_SACK) {
1703 memcpy(&sack, &opt[i], sizeof(sack));
1704 pf_change_a(&sack.rblk_start, &th->th_sum,
4fc5aa1c 1705 htonl(ntohl(sack.rblk_start) -
70224baa
JL
1706 dst->seqdiff), 0);
1707 pf_change_a(&sack.rblk_end, &th->th_sum,
4fc5aa1c 1708 htonl(ntohl(sack.rblk_end) -
70224baa
JL
1709 dst->seqdiff), 0);
1710 memcpy(&opt[i], &sack, sizeof(sack));
1711 }
1712 copyback = 1;
1713 }
1714 /* FALLTHROUGH */
1715 default:
1716 if (olen < 2)
1717 olen = 2;
1718 hlen -= olen;
1719 opt += olen;
1720 }
1721 }
1722
1723 if (copyback)
1724 m_copyback(m, off + sizeof(*th), thoptlen, opts);
1725 return (copyback);
1726}
1727
02742ec6
JS
1728void
1729pf_send_tcp(const struct pf_rule *r, sa_family_t af,
1730 const struct pf_addr *saddr, const struct pf_addr *daddr,
1731 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
70224baa
JL
1732 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
1733 u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp)
02742ec6
JS
1734{
1735 struct mbuf *m;
1736 int len = 0, tlen;
1737#ifdef INET
1738 struct ip *h = NULL;
1739#endif /* INET */
1740#ifdef INET6
1741 struct ip6_hdr *h6 = NULL;
1742#endif /* INET6 */
1743 struct tcphdr *th = NULL;
70224baa 1744 char *opt;
02742ec6 1745
2a7a2b1c
JL
1746 ASSERT_LWKT_TOKEN_HELD(&pf_token);
1747
02742ec6
JS
1748 /* maximum segment size tcp option */
1749 tlen = sizeof(struct tcphdr);
1750 if (mss)
1751 tlen += 4;
1752
1753 switch (af) {
1754#ifdef INET
1755 case AF_INET:
1756 len = sizeof(struct ip) + tlen;
1757 break;
1758#endif /* INET */
1759#ifdef INET6
1760 case AF_INET6:
1761 len = sizeof(struct ip6_hdr) + tlen;
1762 break;
1763#endif /* INET6 */
1764 }
1765
aa1da187
MD
1766 /*
1767 * Create outgoing mbuf.
1768 *
1769 * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags,
1770 * so make sure pf.flags is clear.
1771 */
02742ec6 1772 m = m_gethdr(MB_DONTWAIT, MT_HEADER);
2a7a2b1c 1773 if (m == NULL) {
02742ec6 1774 return;
2a7a2b1c 1775 }
70224baa 1776 if (tag)
aa1da187
MD
1777 m->m_pkthdr.fw_flags |= PF_MBUF_TAGGED;
1778 m->m_pkthdr.pf.flags = 0;
315a7da3 1779 m->m_pkthdr.pf.tag = rtag;
ed1f0be2
JL
1780 /* XXX Recheck when upgrading to > 4.4 */
1781 m->m_pkthdr.pf.statekey = NULL;
70224baa 1782 if (r != NULL && r->rtableid >= 0)
02dd99a9 1783 m->m_pkthdr.pf.rtableid = r->rtableid;
70224baa 1784
02742ec6
JS
1785#ifdef ALTQ
1786 if (r != NULL && r->qid) {
315a7da3
JL
1787 m->m_pkthdr.fw_flags |= PF_MBUF_STRUCTURE;
1788 m->m_pkthdr.pf.qid = r->qid;
1789 m->m_pkthdr.pf.ecn_af = af;
1790 m->m_pkthdr.pf.hdr = mtod(m, struct ip *);
02742ec6 1791 }
70224baa 1792#endif /* ALTQ */
02742ec6
JS
1793 m->m_data += max_linkhdr;
1794 m->m_pkthdr.len = m->m_len = len;
1795 m->m_pkthdr.rcvif = NULL;
1796 bzero(m->m_data, len);
1797 switch (af) {
1798#ifdef INET
1799 case AF_INET:
1800 h = mtod(m, struct ip *);
1801
1802 /* IP header fields included in the TCP checksum */
1803 h->ip_p = IPPROTO_TCP;
1804 h->ip_len = tlen;
1805 h->ip_src.s_addr = saddr->v4.s_addr;
1806 h->ip_dst.s_addr = daddr->v4.s_addr;
1807
1808 th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
1809 break;
1810#endif /* INET */
1811#ifdef INET6
1812 case AF_INET6:
1813 h6 = mtod(m, struct ip6_hdr *);
1814
1815 /* IP header fields included in the TCP checksum */
1816 h6->ip6_nxt = IPPROTO_TCP;
1817 h6->ip6_plen = htons(tlen);
1818 memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
1819 memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
1820
1821 th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
1822 break;
1823#endif /* INET6 */
1824 }
1825
1826 /* TCP header */
1827 th->th_sport = sport;
1828 th->th_dport = dport;
1829 th->th_seq = htonl(seq);
1830 th->th_ack = htonl(ack);
1831 th->th_off = tlen >> 2;
1832 th->th_flags = flags;
1833 th->th_win = htons(win);
1834
1835 if (mss) {
1836 opt = (char *)(th + 1);
1837 opt[0] = TCPOPT_MAXSEG;
1838 opt[1] = 4;
1839 mss = htons(mss);
1840 bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
1841 }
1842
1843 switch (af) {
1844#ifdef INET
1845 case AF_INET:
1846 /* TCP checksum */
1847 th->th_sum = in_cksum(m, len);
1848
1849 /* Finish the IP header */
1850 h->ip_v = 4;
1851 h->ip_hl = sizeof(*h) >> 2;
1852 h->ip_tos = IPTOS_LOWDELAY;
1853 h->ip_len = len;
1854 h->ip_off = path_mtu_discovery ? IP_DF : 0;
1855 h->ip_ttl = ttl ? ttl : ip_defttl;
1856 h->ip_sum = 0;
70224baa 1857 if (eh == NULL) {
2a7a2b1c 1858 lwkt_reltoken(&pf_token);
70224baa 1859 ip_output(m, NULL, NULL, 0, NULL, NULL);
2a7a2b1c 1860 lwkt_gettoken(&pf_token);
70224baa
JL
1861 } else {
1862 struct route ro;
1863 struct rtentry rt;
1864 struct ether_header *e = (void *)ro.ro_dst.sa_data;
1865
1866 if (ifp == NULL) {
1867 m_freem(m);
1868 return;
1869 }
1870 rt.rt_ifp = ifp;
1871 ro.ro_rt = &rt;
1872 ro.ro_dst.sa_len = sizeof(ro.ro_dst);
1873 ro.ro_dst.sa_family = pseudo_AF_HDRCMPLT;
1874 bcopy(eh->ether_dhost, e->ether_shost, ETHER_ADDR_LEN);
1875 bcopy(eh->ether_shost, e->ether_dhost, ETHER_ADDR_LEN);
1876 e->ether_type = eh->ether_type;
1877 /* XXX_IMPORT: later */
2a7a2b1c 1878 lwkt_reltoken(&pf_token);
70224baa
JL
1879 ip_output(m, (void *)NULL, &ro, 0,
1880 (void *)NULL, (void *)NULL);
2a7a2b1c 1881 lwkt_gettoken(&pf_token);
70224baa 1882 }
02742ec6
JS
1883 break;
1884#endif /* INET */
1885#ifdef INET6
1886 case AF_INET6:
1887 /* TCP checksum */
1888 th->th_sum = in6_cksum(m, IPPROTO_TCP,
1889 sizeof(struct ip6_hdr), tlen);
1890
1891 h6->ip6_vfc |= IPV6_VERSION;
1892 h6->ip6_hlim = IPV6_DEFHLIM;
1893
2a7a2b1c 1894 lwkt_reltoken(&pf_token);
02742ec6 1895 ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
2a7a2b1c 1896 lwkt_gettoken(&pf_token);
02742ec6
JS
1897 break;
1898#endif /* INET6 */
1899 }
1900}
1901
1902void
1903pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
1904 struct pf_rule *r)
1905{
1906 struct mbuf *m0;
1907
aa1da187
MD
1908 /*
1909 * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags,
1910 * so make sure pf.flags is clear.
1911 */
ed1f0be2
JL
1912 if ((m0 = m_copy(m, 0, M_COPYALL)) == NULL)
1913 return;
1914
aa1da187
MD
1915 m0->m_pkthdr.fw_flags |= PF_MBUF_TAGGED;
1916 m0->m_pkthdr.pf.flags = 0;
ed1f0be2
JL
1917 /* XXX Re-Check when Upgrading to > 4.4 */
1918 m0->m_pkthdr.pf.statekey = NULL;
70224baa
JL
1919
1920 if (r->rtableid >= 0)
315a7da3 1921 m0->m_pkthdr.pf.rtableid = r->rtableid;
02742ec6
JS
1922
1923#ifdef ALTQ
1924 if (r->qid) {
315a7da3
JL
1925 m->m_pkthdr.fw_flags |= PF_MBUF_STRUCTURE;
1926 m0->m_pkthdr.pf.qid = r->qid;
1927 m0->m_pkthdr.pf.ecn_af = af;
1928 m0->m_pkthdr.pf.hdr = mtod(m0, struct ip *);
02742ec6 1929 }
70224baa 1930#endif /* ALTQ */
02742ec6
JS
1931
1932 switch (af) {
1933#ifdef INET
1934 case AF_INET:
745a4a5d 1935 icmp_error(m0, type, code, 0, 0);
02742ec6
JS
1936 break;
1937#endif /* INET */
1938#ifdef INET6
1939 case AF_INET6:
1940 icmp6_error(m0, type, code, 0);
1941 break;
1942#endif /* INET6 */
1943 }
1944}
1945
1946/*
1947 * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
1948 * If n is 0, they match if they are equal. If n is != 0, they match if they
1949 * are different.
1950 */
1951int
1952pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
1953 struct pf_addr *b, sa_family_t af)
1954{
1955 int match = 0;
1956
1957 switch (af) {
1958#ifdef INET
1959 case AF_INET:
1960 if ((a->addr32[0] & m->addr32[0]) ==
1961 (b->addr32[0] & m->addr32[0]))
1962 match++;
1963 break;
1964#endif /* INET */
1965#ifdef INET6
1966 case AF_INET6:
1967 if (((a->addr32[0] & m->addr32[0]) ==
1968 (b->addr32[0] & m->addr32[0])) &&
1969 ((a->addr32[1] & m->addr32[1]) ==
1970 (b->addr32[1] & m->addr32[1])) &&
1971 ((a->addr32[2] & m->addr32[2]) ==
1972 (b->addr32[2] & m->addr32[2])) &&
1973 ((a->addr32[3] & m->addr32[3]) ==
1974 (b->addr32[3] & m->addr32[3])))
1975 match++;
1976 break;
1977#endif /* INET6 */
1978 }
1979 if (match) {
1980 if (n)
1981 return (0);
1982 else
1983 return (1);
1984 } else {
1985 if (n)
1986 return (1);
1987 else
1988 return (0);
1989 }
1990}
1991
ed1f0be2
JL
1992/*
1993 * Return 1 if b <= a <= e, otherwise return 0.
1994 */
1995int
1996pf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
1997 struct pf_addr *a, sa_family_t af)
1998{
1999 switch (af) {
2000#ifdef INET
2001 case AF_INET:
2002 if ((a->addr32[0] < b->addr32[0]) ||
2003 (a->addr32[0] > e->addr32[0]))
2004 return (0);
2005 break;
2006#endif /* INET */
2007#ifdef INET6
2008 case AF_INET6: {
2009 int i;
2010
2011 /* check a >= b */
2012 for (i = 0; i < 4; ++i)
2013 if (a->addr32[i] > b->addr32[i])
2014 break;
2015 else if (a->addr32[i] < b->addr32[i])
2016 return (0);
2017 /* check a <= e */
2018 for (i = 0; i < 4; ++i)
2019 if (a->addr32[i] < e->addr32[i])
2020 break;
2021 else if (a->addr32[i] > e->addr32[i])
2022 return (0);
2023 break;
2024 }
2025#endif /* INET6 */
2026 }
2027 return (1);
2028}
2029
02742ec6
JS
2030int
2031pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
2032{
2033 switch (op) {
2034 case PF_OP_IRG:
2035 return ((p > a1) && (p < a2));
2036 case PF_OP_XRG:
2037 return ((p < a1) || (p > a2));
2038 case PF_OP_RRG:
2039 return ((p >= a1) && (p <= a2));
2040 case PF_OP_EQ:
2041 return (p == a1);
2042 case PF_OP_NE:
2043 return (p != a1);
2044 case PF_OP_LT:
2045 return (p < a1);
2046 case PF_OP_LE:
2047 return (p <= a1);
2048 case PF_OP_GT:
2049 return (p > a1);
2050 case PF_OP_GE:
2051 return (p >= a1);
2052 }
2053 return (0); /* never reached */
2054}
2055
2056int
2057pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
2058{
2059 a1 = ntohs(a1);
2060 a2 = ntohs(a2);
2061 p = ntohs(p);
2062 return (pf_match(op, a1, a2, p));
2063}
2064
2065int
2066pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
2067{
2068 if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
2069 return (0);
2070 return (pf_match(op, a1, a2, u));
2071}
2072
2073int
2074pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
2075{
2076 if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
2077 return (0);
2078 return (pf_match(op, a1, a2, g));
2079}
2080
70224baa 2081int
315a7da3 2082pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag)
70224baa
JL
2083{
2084 if (*tag == -1)
315a7da3 2085 *tag = m->m_pkthdr.pf.tag;
70224baa 2086
02742ec6
JS
2087 return ((!r->match_tag_not && r->match_tag == *tag) ||
2088 (r->match_tag_not && r->match_tag != *tag));
2089}
2090
70224baa 2091int
315a7da3 2092pf_tag_packet(struct mbuf *m, int tag, int rtableid)
02742ec6 2093{
70224baa
JL
2094 if (tag <= 0 && rtableid < 0)
2095 return (0);
2096
70224baa 2097 if (tag > 0)
315a7da3 2098 m->m_pkthdr.pf.tag = tag;
70224baa 2099 if (rtableid >= 0)
315a7da3 2100 m->m_pkthdr.pf.rtableid = rtableid;
02742ec6 2101
70224baa 2102 return (0);
02742ec6
JS
2103}
2104
315a7da3 2105void
70224baa 2106pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n,
ed1f0be2 2107 struct pf_rule **r, struct pf_rule **a, int *match)
70224baa
JL
2108{
2109 struct pf_anchor_stackframe *f;
2110
2111 (*r)->anchor->match = 0;
2112 if (match)
2113 *match = 0;
2114 if (*depth >= sizeof(pf_anchor_stack) /
2115 sizeof(pf_anchor_stack[0])) {
2116 kprintf("pf_step_into_anchor: stack overflow\n");
2117 *r = TAILQ_NEXT(*r, entries);
2118 return;
2119 } else if (*depth == 0 && a != NULL)
2120 *a = *r;
2121 f = pf_anchor_stack + (*depth)++;
2122 f->rs = *rs;
2123 f->r = *r;
2124 if ((*r)->anchor_wildcard) {
2125 f->parent = &(*r)->anchor->children;
2126 if ((f->child = RB_MIN(pf_anchor_node, f->parent)) ==
2127 NULL) {
2128 *r = NULL;
2129 return;
2130 }
2131 *rs = &f->child->ruleset;
2132 } else {
2133 f->parent = NULL;
2134 f->child = NULL;
2135 *rs = &(*r)->anchor->ruleset;
2136 }
2137 *r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
2138}
02742ec6 2139
70224baa
JL
2140int
2141pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n,
2142 struct pf_rule **r, struct pf_rule **a, int *match)
2143{
2144 struct pf_anchor_stackframe *f;
2145 int quick = 0;
2146
2147 do {
2148 if (*depth <= 0)
2149 break;
2150 f = pf_anchor_stack + *depth - 1;
2151 if (f->parent != NULL && f->child != NULL) {
2152 if (f->child->match ||
2153 (match != NULL && *match)) {
2154 f->r->anchor->match = 1;
2155 *match = 0;
2156 }
2157 f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
2158 if (f->child != NULL) {
2159 *rs = &f->child->ruleset;
2160 *r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
2161 if (*r == NULL)
2162 continue;
2163 else
2164 break;
2165 }
2166 }
2167 (*depth)--;
2168 if (*depth == 0 && a != NULL)
2169 *a = NULL;
2170 *rs = f->rs;
ed1f0be2 2171 if (f->r->anchor->match || (match != NULL && *match))
70224baa
JL
2172 quick = f->r->quick;
2173 *r = TAILQ_NEXT(f->r, entries);
2174 } while (*r == NULL);
2175
2176 return (quick);
2177}
02742ec6
JS
2178
2179#ifdef INET6
2180void
2181pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
2182 struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
2183{
2184 switch (af) {
2185#ifdef INET
2186 case AF_INET:
2187 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
2188 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
2189 break;
2190#endif /* INET */
2191 case AF_INET6:
2192 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
2193 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
2194 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
2195 ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
2196 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
2197 ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
2198 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
2199 ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
2200 break;
2201 }
2202}
2203
2204void
2205pf_addr_inc(struct pf_addr *addr, sa_family_t af)
2206{
2207 switch (af) {
2208#ifdef INET
2209 case AF_INET:
2210 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
2211 break;
2212#endif /* INET */
2213 case AF_INET6:
2214 if (addr->addr32[3] == 0xffffffff) {
2215 addr->addr32[3] = 0;
2216 if (addr->addr32[2] == 0xffffffff) {
2217 addr->addr32[2] = 0;
2218 if (addr->addr32[1] == 0xffffffff) {
2219 addr->addr32[1] = 0;
2220 addr->addr32[0] =
2221 htonl(ntohl(addr->addr32[0]) + 1);
2222 } else
2223 addr->addr32[1] =
2224 htonl(ntohl(addr->addr32[1]) + 1);
2225 } else
2226 addr->addr32[2] =
2227 htonl(ntohl(addr->addr32[2]) + 1);
2228 } else
2229 addr->addr32[3] =
2230 htonl(ntohl(addr->addr32[3]) + 1);
2231 break;
2232 }
2233}
2234#endif /* INET6 */
2235
2236#define mix(a,b,c) \
2237 do { \
2238 a -= b; a -= c; a ^= (c >> 13); \
2239 b -= c; b -= a; b ^= (a << 8); \
2240 c -= a; c -= b; c ^= (b >> 13); \
2241 a -= b; a -= c; a ^= (c >> 12); \
2242 b -= c; b -= a; b ^= (a << 16); \
2243 c -= a; c -= b; c ^= (b >> 5); \
2244 a -= b; a -= c; a ^= (c >> 3); \
2245 b -= c; b -= a; b ^= (a << 10); \
2246 c -= a; c -= b; c ^= (b >> 15); \
2247 } while (0)
2248
2249/*
2250 * hash function based on bridge_hash in if_bridge.c
2251 */
2252void
2253pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
2254 struct pf_poolhashkey *key, sa_family_t af)
2255{
2256 u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
2257
2258 switch (af) {
2259#ifdef INET
2260 case AF_INET:
2261 a += inaddr->addr32[0];
2262 b += key->key32[1];
2263 mix(a, b, c);
2264 hash->addr32[0] = c + key->key32[2];
2265 break;
2266#endif /* INET */
2267#ifdef INET6
2268 case AF_INET6:
2269 a += inaddr->addr32[0];
2270 b += inaddr->addr32[2];
2271 mix(a, b, c);
2272 hash->addr32[0] = c;
2273 a += inaddr->addr32[1];
2274 b += inaddr->addr32[3];
2275 c += key->key32[1];
2276 mix(a, b, c);
2277 hash->addr32[1] = c;
2278 a += inaddr->addr32[2];
2279 b += inaddr->addr32[1];
2280 c += key->key32[2];
2281 mix(a, b, c);
2282 hash->addr32[2] = c;
2283 a += inaddr->addr32[3];
2284 b += inaddr->addr32[0];
2285 c += key->key32[3];
2286 mix(a, b, c);
2287 hash->addr32[3] = c;
2288 break;
2289#endif /* INET6 */
2290 }
2291}
2292
2293int
2294pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
2295 struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn)
2296{
2297 unsigned char hash[16];
2298 struct pf_pool *rpool = &r->rpool;
2299 struct pf_addr *raddr = &rpool->cur->addr.v.a.addr;
2300 struct pf_addr *rmask = &rpool->cur->addr.v.a.mask;
2301 struct pf_pooladdr *acur = rpool->cur;
2302 struct pf_src_node k;
2303
2304 if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
2305 (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
2306 k.af = af;
2307 PF_ACPY(&k.addr, saddr, af);
2308 if (r->rule_flag & PFRULE_RULESRCTRACK ||
2309 r->rpool.opts & PF_POOL_STICKYADDR)
2310 k.rule.ptr = r;
2311 else
2312 k.rule.ptr = NULL;
2313 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
2314 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
2315 if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
2316 PF_ACPY(naddr, &(*sn)->raddr, af);
2317 if (pf_status.debug >= PF_DEBUG_MISC) {
4b1cf444 2318 kprintf("pf_map_addr: src tracking maps ");
02742ec6 2319 pf_print_host(&k.addr, 0, af);
4b1cf444 2320 kprintf(" to ");
02742ec6 2321 pf_print_host(naddr, 0, af);
4b1cf444 2322 kprintf("\n");
02742ec6
JS
2323 }
2324 return (0);
2325 }
2326 }
2327
2328 if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
2329 return (1);
2330 if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
70224baa
JL
2331 switch (af) {
2332#ifdef INET
2333 case AF_INET:
02742ec6
JS
2334 if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
2335 (rpool->opts & PF_POOL_TYPEMASK) !=
2336 PF_POOL_ROUNDROBIN)
2337 return (1);
2338 raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
2339 rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
70224baa
JL
2340 break;
2341#endif /* INET */
2342#ifdef INET6
2343 case AF_INET6:
02742ec6
JS
2344 if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
2345 (rpool->opts & PF_POOL_TYPEMASK) !=
2346 PF_POOL_ROUNDROBIN)
2347 return (1);
2348 raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
2349 rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
70224baa
JL
2350 break;
2351#endif /* INET6 */
02742ec6
JS
2352 }
2353 } else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2354 if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
2355 return (1); /* unsupported */
2356 } else {
2357 raddr = &rpool->cur->addr.v.a.addr;
2358 rmask = &rpool->cur->addr.v.a.mask;
2359 }
2360
2361 switch (rpool->opts & PF_POOL_TYPEMASK) {
2362 case PF_POOL_NONE:
2363 PF_ACPY(naddr, raddr, af);
2364 break;
2365 case PF_POOL_BITMASK:
2366 PF_POOLMASK(naddr, raddr, rmask, saddr, af);
2367 break;
2368 case PF_POOL_RANDOM:
2369 if (init_addr != NULL && PF_AZERO(init_addr, af)) {
2370 switch (af) {
2371#ifdef INET
2372 case AF_INET:
4fc5aa1c 2373 rpool->counter.addr32[0] = htonl(karc4random());
02742ec6
JS
2374 break;
2375#endif /* INET */
2376#ifdef INET6
2377 case AF_INET6:
2378 if (rmask->addr32[3] != 0xffffffff)
70224baa 2379 rpool->counter.addr32[3] =
4fc5aa1c 2380 htonl(karc4random());
02742ec6
JS
2381 else
2382 break;
2383 if (rmask->addr32[2] != 0xffffffff)
70224baa 2384 rpool->counter.addr32[2] =
4fc5aa1c 2385 htonl(karc4random());
02742ec6
JS
2386 else
2387 break;
2388 if (rmask->addr32[1] != 0xffffffff)
70224baa 2389 rpool->counter.addr32[1] =
4fc5aa1c 2390 htonl(karc4random());
02742ec6
JS
2391 else
2392 break;
2393 if (rmask->addr32[0] != 0xffffffff)
70224baa 2394 rpool->counter.addr32[0] =
4fc5aa1c 2395 htonl(karc4random());
02742ec6
JS
2396 break;
2397#endif /* INET6 */
2398 }
2399 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
2400 PF_ACPY(init_addr, naddr, af);
2401
2402 } else {
2403 PF_AINC(&rpool->counter, af);
2404 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
2405 }
2406 break;
2407 case PF_POOL_SRCHASH:
2408 pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
2409 PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
2410 break;
2411 case PF_POOL_ROUNDROBIN:
2412 if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2413 if (!pfr_pool_get(rpool->cur->addr.p.tbl,
2414 &rpool->tblidx, &rpool->counter,
2415 &raddr, &rmask, af))
2416 goto get_addr;
2417 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2418 if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
2419 &rpool->tblidx, &rpool->counter,
2420 &raddr, &rmask, af))
2421 goto get_addr;
2422 } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
2423 goto get_addr;
2424
2425 try_next:
2426 if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL)
2427 rpool->cur = TAILQ_FIRST(&rpool->list);
2428 if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2429 rpool->tblidx = -1;
2430 if (pfr_pool_get(rpool->cur->addr.p.tbl,
2431 &rpool->tblidx, &rpool->counter,
2432 &raddr, &rmask, af)) {
2433 /* table contains no address of type 'af' */
2434 if (rpool->cur != acur)
2435 goto try_next;
2436 return (1);
2437 }
2438 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2439 rpool->tblidx = -1;
2440 if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
2441 &rpool->tblidx, &rpool->counter,
2442 &raddr, &rmask, af)) {
2443 /* table contains no address of type 'af' */
2444 if (rpool->cur != acur)
2445 goto try_next;
2446 return (1);
2447 }
2448 } else {
2449 raddr = &rpool->cur->addr.v.a.addr;
2450 rmask = &rpool->cur->addr.v.a.mask;
2451 PF_ACPY(&rpool->counter, raddr, af);
2452 }
2453
2454 get_addr:
2455 PF_ACPY(naddr, &rpool->counter, af);
70224baa
JL
2456 if (init_addr != NULL && PF_AZERO(init_addr, af))
2457 PF_ACPY(init_addr, naddr, af);
02742ec6
JS
2458 PF_AINC(&rpool->counter, af);
2459 break;
2460 }
2461 if (*sn != NULL)
2462 PF_ACPY(&(*sn)->raddr, naddr, af);
2463
2464 if (pf_status.debug >= PF_DEBUG_MISC &&
2465 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
4b1cf444 2466 kprintf("pf_map_addr: selected address ");
02742ec6 2467 pf_print_host(naddr, 0, af);
4b1cf444 2468 kprintf("\n");
02742ec6
JS
2469 }
2470
2471 return (0);
2472}
2473
2474int
2475pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
2476 struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport,
2477 struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
2478 struct pf_src_node **sn)
2479{
315a7da3 2480 struct pf_state_key_cmp key;
02742ec6
JS
2481 struct pf_addr init_addr;
2482 u_int16_t cut;
2483
2484 bzero(&init_addr, sizeof(init_addr));
2485 if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
2486 return (1);
2487
70224baa
JL
2488 if (proto == IPPROTO_ICMP) {
2489 low = 1;
2490 high = 65535;
2491 }
2492
02742ec6
JS
2493 do {
2494 key.af = af;
2495 key.proto = proto;
ed1f0be2
JL
2496 PF_ACPY(&key.addr[1], daddr, key.af);
2497 PF_ACPY(&key.addr[0], naddr, key.af);
2498 key.port[1] = dport;
02742ec6
JS
2499
2500 /*
2501 * port search; start random, step;
2502 * similar 2 portloop in in_pcbbind
2503 */
70224baa
JL
2504 if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
2505 proto == IPPROTO_ICMP)) {
ed1f0be2
JL
2506 key.port[0] = dport;
2507 if (pf_find_state_all(&key, PF_IN, NULL) == NULL)
02742ec6
JS
2508 return (0);
2509 } else if (low == 0 && high == 0) {
ed1f0be2
JL
2510 key.port[0] = *nport;
2511 if (pf_find_state_all(&key, PF_IN, NULL) == NULL)
02742ec6
JS
2512 return (0);
2513 } else if (low == high) {
ed1f0be2
JL
2514 key.port[0] = htons(low);
2515 if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
02742ec6
JS
2516 *nport = htons(low);
2517 return (0);
2518 }
2519 } else {
2520 u_int16_t tmp;
2521
2522 if (low > high) {
2523 tmp = low;
2524 low = high;
2525 high = tmp;
2526 }
2527 /* low < high */
4fc5aa1c 2528 cut = htonl(karc4random()) % (1 + high - low) + low;
02742ec6
JS
2529 /* low <= cut <= high */
2530 for (tmp = cut; tmp <= high; ++(tmp)) {
ed1f0be2
JL
2531 key.port[0] = htons(tmp);
2532 if (pf_find_state_all(&key, PF_IN, NULL) ==
2533 NULL && !in_baddynamic(tmp, proto)) {
02742ec6
JS
2534 *nport = htons(tmp);
2535 return (0);
2536 }
2537 }
2538 for (tmp = cut - 1; tmp >= low; --(tmp)) {
ed1f0be2
JL
2539 key.port[0] = htons(tmp);
2540 if (pf_find_state_all(&key, PF_IN, NULL) ==
2541 NULL && !in_baddynamic(tmp, proto)) {
02742ec6
JS
2542 *nport = htons(tmp);
2543 return (0);
2544 }
2545 }
2546 }
2547
2548 switch (r->rpool.opts & PF_POOL_TYPEMASK) {
2549 case PF_POOL_RANDOM:
2550 case PF_POOL_ROUNDROBIN:
2551 if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
2552 return (1);
2553 break;
2554 case PF_POOL_NONE:
2555 case PF_POOL_SRCHASH:
2556 case PF_POOL_BITMASK:
2557 default:
2558 return (1);
2559 }
2560 } while (! PF_AEQ(&init_addr, naddr, af) );
02742ec6
JS
2561 return (1); /* none available */
2562}
2563
2564struct pf_rule *
2565pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
2566 int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport,
2567 struct pf_addr *daddr, u_int16_t dport, int rs_num)
2568{
70224baa 2569 struct pf_rule *r, *rm = NULL;
02742ec6 2570 struct pf_ruleset *ruleset = NULL;
70224baa
JL
2571 int tag = -1;
2572 int rtableid = -1;
2573 int asd = 0;
02742ec6
JS
2574
2575 r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
2576 while (r && rm == NULL) {
2577 struct pf_rule_addr *src = NULL, *dst = NULL;
2578 struct pf_addr_wrap *xdst = NULL;
2579
2580 if (r->action == PF_BINAT && direction == PF_IN) {
2581 src = &r->dst;
2582 if (r->rpool.cur != NULL)
2583 xdst = &r->rpool.cur->addr;
2584 } else {
2585 src = &r->src;
2586 dst = &r->dst;
2587 }
2588
2589 r->evaluations++;
70224baa 2590 if (pfi_kif_match(r->kif, kif) == r->ifnot)
02742ec6
JS
2591 r = r->skip[PF_SKIP_IFP].ptr;
2592 else if (r->direction && r->direction != direction)
2593 r = r->skip[PF_SKIP_DIR].ptr;
2594 else if (r->af && r->af != pd->af)
2595 r = r->skip[PF_SKIP_AF].ptr;
2596 else if (r->proto && r->proto != pd->proto)
2597 r = r->skip[PF_SKIP_PROTO].ptr;
70224baa
JL
2598 else if (PF_MISMATCHAW(&src->addr, saddr, pd->af,
2599 src->neg, kif))
02742ec6
JS
2600 r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
2601 PF_SKIP_DST_ADDR].ptr;
2602 else if (src->port_op && !pf_match_port(src->port_op,
2603 src->port[0], src->port[1], sport))
2604 r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
2605 PF_SKIP_DST_PORT].ptr;
2606 else if (dst != NULL &&
70224baa 2607 PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL))
02742ec6 2608 r = r->skip[PF_SKIP_DST_ADDR].ptr;
70224baa
JL
2609 else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
2610 0, NULL))
02742ec6
JS
2611 r = TAILQ_NEXT(r, entries);
2612 else if (dst != NULL && dst->port_op &&
2613 !pf_match_port(dst->port_op, dst->port[0],
2614 dst->port[1], dport))
2615 r = r->skip[PF_SKIP_DST_PORT].ptr;
315a7da3 2616 else if (r->match_tag && !pf_match_tag(m, r, &tag))
70224baa 2617 r = TAILQ_NEXT(r, entries);
02742ec6
JS
2618 else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
2619 IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
2620 off, pd->hdr.tcp), r->os_fingerprint)))
2621 r = TAILQ_NEXT(r, entries);
70224baa
JL
2622 else {
2623 if (r->tag)
2624 tag = r->tag;
2625 if (r->rtableid >= 0)
2626 rtableid = r->rtableid;
2627 if (r->anchor == NULL) {
02742ec6 2628 rm = r;
70224baa
JL
2629 } else
2630 pf_step_into_anchor(&asd, &ruleset, rs_num,
2631 &r, NULL, NULL);
2632 }
2633 if (r == NULL)
2634 pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r,
2635 NULL, NULL);
02742ec6 2636 }
315a7da3 2637 if (pf_tag_packet(m, tag, rtableid))
70224baa 2638 return (NULL);
02742ec6
JS
2639 if (rm != NULL && (rm->action == PF_NONAT ||
2640 rm->action == PF_NORDR || rm->action == PF_NOBINAT))
2641 return (NULL);
2642 return (rm);
2643}
2644
2645struct pf_rule *
2646pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
2647 struct pfi_kif *kif, struct pf_src_node **sn,
ed1f0be2
JL
2648 struct pf_state_key **skw, struct pf_state_key **sks,
2649 struct pf_state_key **skp, struct pf_state_key **nkp,
2650 struct pf_addr *saddr, struct pf_addr *daddr,
2651 u_int16_t sport, u_int16_t dport)
02742ec6
JS
2652{
2653 struct pf_rule *r = NULL;
2654
ed1f0be2 2655
02742ec6
JS
2656 if (direction == PF_OUT) {
2657 r = pf_match_translation(pd, m, off, direction, kif, saddr,
2658 sport, daddr, dport, PF_RULESET_BINAT);
2659 if (r == NULL)
2660 r = pf_match_translation(pd, m, off, direction, kif,
2661 saddr, sport, daddr, dport, PF_RULESET_NAT);
2662 } else {
2663 r = pf_match_translation(pd, m, off, direction, kif, saddr,
2664 sport, daddr, dport, PF_RULESET_RDR);
2665 if (r == NULL)
2666 r = pf_match_translation(pd, m, off, direction, kif,
2667 saddr, sport, daddr, dport, PF_RULESET_BINAT);
2668 }
2669
2670 if (r != NULL) {
ed1f0be2
JL
2671 struct pf_addr *naddr;
2672 u_int16_t *nport;
2673
2674 if (pf_state_key_setup(pd, r, skw, sks, skp, nkp,
2675 saddr, daddr, sport, dport))
2676 return r;
2677
2678 /* XXX We only modify one side for now. */
2679 naddr = &(*nkp)->addr[1];
2680 nport = &(*nkp)->port[1];
2681
be02a6a0
MD
2682 /*
2683 * NOTE: Currently all translations will clear
2684 * BRIDGE_MBUF_TAGGED, telling the bridge to
2685 * ignore the original input encapsulation.
2686 */
02742ec6
JS
2687 switch (r->action) {
2688 case PF_NONAT:
2689 case PF_NOBINAT:
2690 case PF_NORDR:
2691 return (NULL);
2692 case PF_NAT:
be02a6a0 2693 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
02742ec6
JS
2694 if (pf_get_sport(pd->af, pd->proto, r, saddr,
2695 daddr, dport, naddr, nport, r->rpool.proxy_port[0],
2696 r->rpool.proxy_port[1], sn)) {
2697 DPFPRINTF(PF_DEBUG_MISC,
2698 ("pf: NAT proxy port allocation "
2699 "(%u-%u) failed\n",
2700 r->rpool.proxy_port[0],
2701 r->rpool.proxy_port[1]));
2702 return (NULL);
2703 }
2704 break;
2705 case PF_BINAT:
be02a6a0 2706 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
02742ec6
JS
2707 switch (direction) {
2708 case PF_OUT:
2709 if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
70224baa
JL
2710 switch (pd->af) {
2711#ifdef INET
2712 case AF_INET:
02742ec6
JS
2713 if (r->rpool.cur->addr.p.dyn->
2714 pfid_acnt4 < 1)
2715 return (NULL);
2716 PF_POOLMASK(naddr,
2717 &r->rpool.cur->addr.p.dyn->
2718 pfid_addr4,
2719 &r->rpool.cur->addr.p.dyn->
2720 pfid_mask4,
2721 saddr, AF_INET);
70224baa
JL
2722 break;
2723#endif /* INET */
2724#ifdef INET6
2725 case AF_INET6:
02742ec6
JS
2726 if (r->rpool.cur->addr.p.dyn->
2727 pfid_acnt6 < 1)
2728 return (NULL);
2729 PF_POOLMASK(naddr,
2730 &r->rpool.cur->addr.p.dyn->
2731 pfid_addr6,
2732 &r->rpool.cur->addr.p.dyn->
2733 pfid_mask6,
2734 saddr, AF_INET6);
70224baa
JL
2735 break;
2736#endif /* INET6 */
02742ec6
JS
2737 }
2738 } else
2739 PF_POOLMASK(naddr,
2740 &r->rpool.cur->addr.v.a.addr,
2741 &r->rpool.cur->addr.v.a.mask,
2742 saddr, pd->af);
2743 break;
2744 case PF_IN:
70224baa
JL
2745 if (r->src.addr.type == PF_ADDR_DYNIFTL) {
2746 switch (pd->af) {
2747#ifdef INET
2748 case AF_INET:
02742ec6
JS
2749 if (r->src.addr.p.dyn->
2750 pfid_acnt4 < 1)
2751 return (NULL);
2752 PF_POOLMASK(naddr,
2753 &r->src.addr.p.dyn->
2754 pfid_addr4,
2755 &r->src.addr.p.dyn->
2756 pfid_mask4,
2757 daddr, AF_INET);
70224baa
JL
2758 break;
2759#endif /* INET */
2760#ifdef INET6
2761 case AF_INET6:
02742ec6
JS
2762 if (r->src.addr.p.dyn->
2763 pfid_acnt6 < 1)
2764 return (NULL);
2765 PF_POOLMASK(naddr,
2766 &r->src.addr.p.dyn->
2767 pfid_addr6,
2768 &r->src.addr.p.dyn->
2769 pfid_mask6,
2770 daddr, AF_INET6);
70224baa
JL
2771 break;
2772#endif /* INET6 */
02742ec6
JS
2773 }
2774 } else
2775 PF_POOLMASK(naddr,
2776 &r->src.addr.v.a.addr,
2777 &r->src.addr.v.a.mask, daddr,
2778 pd->af);
2779 break;
2780 }
2781 break;
2782 case PF_RDR: {
be02a6a0 2783 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED;
70224baa 2784 if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn))
02742ec6 2785 return (NULL);
70224baa
JL
2786 if ((r->rpool.opts & PF_POOL_TYPEMASK) ==
2787 PF_POOL_BITMASK)
2788 PF_POOLMASK(naddr, naddr,
2789 &r->rpool.cur->addr.v.a.mask, daddr,
2790 pd->af);
315a7da3 2791
02742ec6
JS
2792 if (r->rpool.proxy_port[1]) {
2793 u_int32_t tmp_nport;
2794
2795 tmp_nport = ((ntohs(dport) -
2796 ntohs(r->dst.port[0])) %
2797 (r->rpool.proxy_port[1] -
2798 r->rpool.proxy_port[0] + 1)) +
2799 r->rpool.proxy_port[0];
2800
2801 /* wrap around if necessary */
2802 if (tmp_nport > 65535)
2803 tmp_nport -= 65535;
2804 *nport = htons((u_int16_t)tmp_nport);
2805 } else if (r->rpool.proxy_port[0])
2806 *nport = htons(r->rpool.proxy_port[0]);
2807 break;
2808 }
2809 default:
2810 return (NULL);
2811 }
2812 }
2813
2814 return (r);
2815}
2816
2817#ifdef SMP
2818struct netmsg_hashlookup {
002c1265 2819 struct netmsg_base base;
02742ec6
JS
2820 struct inpcb **nm_pinp;
2821 struct inpcbinfo *nm_pcbinfo;
2822 struct pf_addr *nm_saddr;
2823 struct pf_addr *nm_daddr;
2824 uint16_t nm_sport;
2825 uint16_t nm_dport;
2826 sa_family_t nm_af;
2827};
2828
0379d9fd 2829#ifdef PF_SOCKET_LOOKUP_DOMSG
4599cf19 2830static void
002c1265 2831in_pcblookup_hash_handler(netmsg_t msg)
02742ec6 2832{
002c1265 2833 struct netmsg_hashlookup *rmsg = (struct netmsg_hashlookup *)msg;
02742ec6 2834
002c1265
MD
2835 if (rmsg->nm_af == AF_INET)
2836 *rmsg->nm_pinp = in_pcblookup_hash(rmsg->nm_pcbinfo,
2837 rmsg->nm_saddr->v4, rmsg->nm_sport, rmsg->nm_daddr->v4,
2838 rmsg->nm_dport, INPLOOKUP_WILDCARD, NULL);
02742ec6
JS
2839#ifdef INET6
2840 else
002c1265
MD
2841 *rmsg->nm_pinp = in6_pcblookup_hash(rmsg->nm_pcbinfo,
2842 &rmsg->nm_saddr->v6, rmsg->nm_sport, &rmsg->nm_daddr->v6,
2843 rmsg->nm_dport, INPLOOKUP_WILDCARD, NULL);
02742ec6 2844#endif /* INET6 */
002c1265 2845 lwkt_replymsg(&rmsg->base.lmsg, 0);
02742ec6 2846}
0379d9fd
SZ
2847#endif /* PF_SOCKET_LOOKUP_DOMSG */
2848
02742ec6
JS
2849#endif /* SMP */
2850
2851int
315a7da3 2852pf_socket_lookup(int direction, struct pf_pdesc *pd)
02742ec6
JS
2853{
2854 struct pf_addr *saddr, *daddr;
2855 u_int16_t sport, dport;
2856 struct inpcbinfo *pi;
2857 struct inpcb *inp;
2858#ifdef SMP
2859 struct netmsg_hashlookup *msg = NULL;
0379d9fd 2860#ifdef PF_SOCKET_LOOKUP_DOMSG
d9663f05 2861 struct netmsg_hashlookup msg0;
02742ec6 2862#endif
0379d9fd 2863#endif
02742ec6
JS
2864 int pi_cpu = 0;
2865
70224baa
JL
2866 if (pd == NULL)
2867 return (-1);
2868 pd->lookup.uid = UID_MAX;
2869 pd->lookup.gid = GID_MAX;
2870 pd->lookup.pid = NO_PID;
02742ec6
JS
2871 if (direction == PF_IN) {
2872 saddr = pd->src;
2873 daddr = pd->dst;
2874 } else {
2875 saddr = pd->dst;
2876 daddr = pd->src;
2877 }
2878 switch (pd->proto) {
2879 case IPPROTO_TCP:
315a7da3
JL
2880 if (pd->hdr.tcp == NULL)
2881 return (-1);
02742ec6
JS
2882 sport = pd->hdr.tcp->th_sport;
2883 dport = pd->hdr.tcp->th_dport;
2884
2885 pi_cpu = tcp_addrcpu(saddr->v4.s_addr, sport, daddr->v4.s_addr, dport);
2886 pi = &tcbinfo[pi_cpu];
2887#ifdef SMP
2888 /*
2889 * Our netstack runs lockless on MP systems
2890 * (only for TCP connections at the moment).
2891 *
2892 * As we are not allowed to read another CPU's tcbinfo,
2893 * we have to ask that CPU via remote call to search the
2894 * table for us.
2895 *
2896 * Prepare a msg iff data belongs to another CPU.
2897 */
2898 if (pi_cpu != mycpu->gd_cpuid) {
0379d9fd
SZ
2899#ifdef PF_SOCKET_LOOKUP_DOMSG
2900 /*
2901 * NOTE:
2902 *
2903 * Following lwkt_domsg() is dangerous and could
2904 * lockup the network system, e.g.
2905 *
2906 * On 2 CPU system:
2907 * netisr0 domsg to netisr1 (due to lookup)
2908 * netisr1 domsg to netisr0 (due to lookup)
2909 *
2910 * We simply return -1 here, since we are probably
2911 * called before NAT, so the TCP packet should
2912 * already be on the correct CPU.
2913 */
d9663f05
SZ
2914 msg = &msg0;
2915 netmsg_init(&msg->base, NULL, &curthread->td_msgport,
48e7b118 2916 0, in_pcblookup_hash_handler);
02742ec6
JS
2917 msg->nm_pinp = &inp;
2918 msg->nm_pcbinfo = pi;
2919 msg->nm_saddr = saddr;
2920 msg->nm_sport = sport;
2921 msg->nm_daddr = daddr;
2922 msg->nm_dport = dport;
2923 msg->nm_af = pd->af;
0379d9fd
SZ
2924#else /* !PF_SOCKET_LOOKUP_DOMSG */
2925 kprintf("pf_socket_lookup: tcp packet not on the "
2926 "correct cpu %d, cur cpu %d\n",
2927 pi_cpu, mycpuid);
2928 print_backtrace(-1);
2929 return -1;
2930#endif /* PF_SOCKET_LOOKUP_DOMSG */
02742ec6
JS
2931 }
2932#endif /* SMP */
2933 break;
2934 case IPPROTO_UDP:
315a7da3
JL
2935 if (pd->hdr.udp == NULL)
2936 return (-1);
02742ec6
JS
2937 sport = pd->hdr.udp->uh_sport;
2938 dport = pd->hdr.udp->uh_dport;
2939 pi = &udbinfo;
2940 break;
2941 default:
315a7da3 2942 return (-1);
02742ec6
JS
2943 }
2944 if (direction != PF_IN) {
2945 u_int16_t p;
2946
2947 p = sport;
2948 sport = dport;
2949 dport = p;
2950 }
2951 switch (pd->af) {
2952#ifdef INET6
2953 case AF_INET6:
2954#ifdef SMP
2955 /*
2956 * Query other CPU, second part
2957 *
2958 * msg only gets initialized when:
2959 * 1) packet is TCP
2960 * 2) the info belongs to another CPU
2961 *
2962 * Use some switch/case magic to avoid code duplication.
2963 */
2964 if (msg == NULL)
2965#endif /* SMP */
2966 {
2967 inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
2968 &daddr->v6, dport, INPLOOKUP_WILDCARD, NULL);
2969
2970 if (inp == NULL)
70224baa 2971 return (-1);
02742ec6
JS
2972 break;
2973 }
2974 /* FALLTHROUGH if SMP and on other CPU */
2975#endif /* INET6 */
2976 case AF_INET:
2977#ifdef SMP
2978 if (msg != NULL) {
2a7a2b1c 2979 lwkt_domsg(cpu_portfn(pi_cpu),
002c1265 2980 &msg->base.lmsg, 0);
02742ec6
JS
2981 } else
2982#endif /* SMP */
2983 {
2984 inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4,
2985 dport, INPLOOKUP_WILDCARD, NULL);
2986 }
2987 if (inp == NULL)
315a7da3 2988 return (-1);
02742ec6
JS
2989 break;
2990
2991 default:
70224baa 2992 return (-1);
02742ec6 2993 }
70224baa
JL
2994 pd->lookup.uid = inp->inp_socket->so_cred->cr_uid;
2995 pd->lookup.gid = inp->inp_socket->so_cred->cr_groups[0];
02742ec6
JS
2996 return (1);
2997}
2998
2999u_int8_t
3000pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
3001{
3002 int hlen;
3003 u_int8_t hdr[60];
3004 u_int8_t *opt, optlen;
3005 u_int8_t wscale = 0;
3006
3007 hlen = th_off << 2; /* hlen <= sizeof(hdr) */
3008 if (hlen <= sizeof(struct tcphdr))
3009 return (0);
3010 if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
3011 return (0);
3012 opt = hdr + sizeof(struct tcphdr);
3013 hlen -= sizeof(struct tcphdr);
3014 while (hlen >= 3) {
3015 switch (*opt) {
3016 case TCPOPT_EOL:
3017 case TCPOPT_NOP:
3018 ++opt;
3019 --hlen;
3020 break;
3021 case TCPOPT_WINDOW:
3022 wscale = opt[2];
3023 if (wscale > TCP_MAX_WINSHIFT)
3024 wscale = TCP_MAX_WINSHIFT;
3025 wscale |= PF_WSCALE_FLAG;
3026 /* FALLTHROUGH */
3027 default:
3028 optlen = opt[1];
3029 if (optlen < 2)
3030 optlen = 2;
3031 hlen -= optlen;
3032 opt += optlen;
3033 break;
3034 }
3035 }
3036 return (wscale);
3037}
3038
3039u_int16_t
3040pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
3041{
3042 int hlen;
3043 u_int8_t hdr[60];
3044 u_int8_t *opt, optlen;
3045 u_int16_t mss = tcp_mssdflt;
3046
3047 hlen = th_off << 2; /* hlen <= sizeof(hdr) */
3048 if (hlen <= sizeof(struct tcphdr))
3049 return (0);
3050 if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
3051 return (0);
3052 opt = hdr + sizeof(struct tcphdr);
3053 hlen -= sizeof(struct tcphdr);
3054 while (hlen >= TCPOLEN_MAXSEG) {
3055 switch (*opt) {
3056 case TCPOPT_EOL:
3057 case TCPOPT_NOP:
3058 ++opt;
3059 --hlen;
3060 break;
3061 case TCPOPT_MAXSEG:
3062 bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
3063 /* FALLTHROUGH */
3064 default:
3065 optlen = opt[1];
3066 if (optlen < 2)
3067 optlen = 2;
3068 hlen -= optlen;
3069 opt += optlen;
3070 break;
3071 }
3072 }
3073 return (mss);
3074}
3075
3076u_int16_t
3077pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
3078{
3079#ifdef INET
3080 struct sockaddr_in *dst;
3081 struct route ro;
3082#endif /* INET */
3083#ifdef INET6
3084 struct sockaddr_in6 *dst6;
3085 struct route_in6 ro6;
3086#endif /* INET6 */
3087 struct rtentry *rt = NULL;
3088 int hlen = 0;
3089 u_int16_t mss = tcp_mssdflt;
3090
3091 switch (af) {
3092#ifdef INET
3093 case AF_INET:
3094 hlen = sizeof(struct ip);
3095 bzero(&ro, sizeof(ro));
3096 dst = (struct sockaddr_in *)&ro.ro_dst;
3097 dst->sin_family = AF_INET;
3098 dst->sin_len = sizeof(*dst);
3099 dst->sin_addr = addr->v4;
3100 rtalloc_ign(&ro, (RTF_CLONING | RTF_PRCLONING));
3101 rt = ro.ro_rt;
3102 break;
3103#endif /* INET */
3104#ifdef INET6
3105 case AF_INET6:
3106 hlen = sizeof(struct ip6_hdr);
3107 bzero(&ro6, sizeof(ro6));
3108 dst6 = (struct sockaddr_in6 *)&ro6.ro_dst;
3109 dst6->sin6_family = AF_INET6;
3110 dst6->sin6_len = sizeof(*dst6);
3111 dst6->sin6_addr = addr->v6;
3112 rtalloc_ign((struct route *)&ro6, (RTF_CLONING | RTF_PRCLONING));
3113 rt = ro6.ro_rt;
3114 break;
3115#endif /* INET6 */
3116 }
3117
3118 if (rt && rt->rt_ifp) {
3119 mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
3120 mss = max(tcp_mssdflt, mss);
3121 RTFREE(rt);
3122 }
3123 mss = min(mss, offer);
3124 mss = max(mss, 64); /* sanity - at least max opt space */
3125 return (mss);
3126}
3127
3128void
3129pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr)
3130{
3131 struct pf_rule *r = s->rule.ptr;
3132
3133 s->rt_kif = NULL;
3134 if (!r->rt || r->rt == PF_FASTROUTE)
3135 return;
ed1f0be2 3136 switch (s->key[PF_SK_WIRE]->af) {
02742ec6
JS
3137#ifdef INET
3138 case AF_INET:
3139 pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL,
3140 &s->nat_src_node);
3141 s->rt_kif = r->rpool.cur->kif;
3142 break;
3143#endif /* INET */
3144#ifdef INET6
3145 case AF_INET6:
3146 pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL,
3147 &s->nat_src_node);
3148 s->rt_kif = r->rpool.cur->kif;
3149 break;
3150#endif /* INET6 */
3151 }
3152}
3153
ed1f0be2
JL
3154u_int32_t
3155pf_tcp_iss(struct pf_pdesc *pd)
315a7da3 3156{
ed1f0be2
JL
3157 MD5_CTX ctx;
3158 u_int32_t digest[4];
3159
3160 if (pf_tcp_secret_init == 0) {
3161 karc4rand(pf_tcp_secret, sizeof(pf_tcp_secret));
3162 MD5Init(&pf_tcp_secret_ctx);
3163 MD5Update(&pf_tcp_secret_ctx, pf_tcp_secret,
3164 sizeof(pf_tcp_secret));
3165 pf_tcp_secret_init = 1;
3166 }
3167 ctx = pf_tcp_secret_ctx;
3168
3169 MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof(u_short));
3170 MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof(u_short));
3171 if (pd->af == AF_INET6) {
3172 MD5Update(&ctx, (char *)&pd->src->v6, sizeof(struct in6_addr));
3173 MD5Update(&ctx, (char *)&pd->dst->v6, sizeof(struct in6_addr));
3174 } else {
3175 MD5Update(&ctx, (char *)&pd->src->v4, sizeof(struct in_addr));
3176 MD5Update(&ctx, (char *)&pd->dst->v4, sizeof(struct in_addr));
315a7da3 3177 }
ed1f0be2
JL
3178 MD5Final((u_char *)digest, &ctx);
3179 pf_tcp_iss_off += 4096;
3180 return (digest[0] + pd->hdr.tcp->th_seq + pf_tcp_iss_off);
315a7da3
JL
3181}
3182
02742ec6 3183int
315a7da3 3184pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
02742ec6 3185 struct pfi_kif *kif, struct mbuf *m, int off, void *h,
70224baa
JL
3186 struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
3187 struct ifqueue *ifq, struct inpcb *inp)
02742ec6
JS
3188{
3189 struct pf_rule *nr = NULL;
3190 struct pf_addr *saddr = pd->src, *daddr = pd->dst;
02742ec6 3191 sa_family_t af = pd->af;
02742ec6
JS
3192 struct pf_rule *r, *a = NULL;
3193 struct pf_ruleset *ruleset = NULL;
3194 struct pf_src_node *nsn = NULL;
315a7da3 3195 struct tcphdr *th = pd->hdr.tcp;
ed1f0be2
JL
3196 struct pf_state_key *skw = NULL, *sks = NULL;
3197 struct pf_state_key *sk = NULL, *nk = NULL;
02742ec6 3198 u_short reason;
315a7da3 3199 int rewrite = 0, hdrlen = 0;
70224baa 3200 int tag = -1, rtableid = -1;
70224baa
JL
3201 int asd = 0;
3202 int match = 0;
315a7da3 3203 int state_icmp = 0;
ed1f0be2
JL
3204 u_int16_t sport = 0, dport = 0;
3205 u_int16_t nport = 0, bport = 0;
3206 u_int16_t bproto_sum = 0, bip_sum = 0;
315a7da3 3207 u_int8_t icmptype = 0, icmpcode = 0;
70224baa 3208
ed1f0be2 3209
315a7da3 3210 if (direction == PF_IN && pf_check_congestion(ifq)) {
70224baa
JL
3211 REASON_SET(&reason, PFRES_CONGEST);
3212 return (PF_DROP);
3213 }
3214
3215 if (inp != NULL)
315a7da3
JL
3216 pd->lookup.done = pf_socket_lookup(direction, pd);
3217 else if (debug_pfugidhack) {
70224baa 3218 DPFPRINTF(PF_DEBUG_MISC, ("pf: unlocked lookup\n"));
315a7da3 3219 pd->lookup.done = pf_socket_lookup(direction, pd);
70224baa 3220 }
315a7da3 3221
315a7da3
JL
3222 switch (pd->proto) {
3223 case IPPROTO_TCP:
3224 sport = th->th_sport;
3225 dport = th->th_dport;
3226 hdrlen = sizeof(*th);
3227 break;
3228 case IPPROTO_UDP:
3229 sport = pd->hdr.udp->uh_sport;
3230 dport = pd->hdr.udp->uh_dport;
3231 hdrlen = sizeof(*pd->hdr.udp);
3232 break;
3233#ifdef INET
3234 case IPPROTO_ICMP:
3235 if (pd->af != AF_INET)
3236 break;
3237 sport = dport = pd->hdr.icmp->icmp_id;
ed1f0be2 3238 hdrlen = sizeof(*pd->hdr.icmp);
315a7da3
JL
3239 icmptype = pd->hdr.icmp->icmp_type;
3240 icmpcode = pd->hdr.icmp->icmp_code;
3241
3242 if (icmptype == ICMP_UNREACH ||
3243 icmptype == ICMP_SOURCEQUENCH ||
3244 icmptype == ICMP_REDIRECT ||
3245 icmptype == ICMP_TIMXCEED ||
3246 icmptype == ICMP_PARAMPROB)
3247 state_icmp++;
3248 break;
3249#endif /* INET */
3250#ifdef INET6
3251 case IPPROTO_ICMPV6:
ed1f0be2 3252 if (af != AF_INET6)
315a7da3
JL
3253 break;
3254 sport = dport = pd->hdr.icmp6->icmp6_id;
3255 hdrlen = sizeof(*pd->hdr.icmp6);
3256 icmptype = pd->hdr.icmp6->icmp6_type;
3257 icmpcode = pd->hdr.icmp6->icmp6_code;
3258
3259 if (icmptype == ICMP6_DST_UNREACH ||
3260 icmptype == ICMP6_PACKET_TOO_BIG ||
3261 icmptype == ICMP6_TIME_EXCEEDED ||
3262 icmptype == ICMP6_PARAM_PROB)
3263 state_icmp++;
3264 break;
3265#endif /* INET6 */
ed1f0be2
JL
3266 default:
3267 sport = dport = hdrlen = 0;
3268 break;
315a7da3 3269 }
02742ec6
JS
3270
3271 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3272
ed1f0be2
JL
3273 bport = nport = sport;
3274 /* check packet for BINAT/NAT/RDR */
3275 if ((nr = pf_get_translation(pd, m, off, direction, kif, &nsn,
3276 &skw, &sks, &sk, &nk, saddr, daddr, sport, dport)) != NULL) {
3277 if (nk == NULL || sk == NULL) {
3278 REASON_SET(&reason, PFRES_MEMORY);
3279 goto cleanup;
3280 }
3281
3282 if (pd->ip_sum)
3283 bip_sum = *pd->ip_sum;
3284
3285 switch (pd->proto) {
3286 case IPPROTO_TCP:
3287 bproto_sum = th->th_sum;
3288 pd->proto_sum = &th->th_sum;
3289
3290 if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
3291 nk->port[pd->sidx] != sport) {
315a7da3 3292 pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
ed1f0be2
JL
3293 &th->th_sum, &nk->addr[pd->sidx],
3294 nk->port[pd->sidx], 0, af);
3295 pd->sport = &th->th_sport;
315a7da3 3296 sport = th->th_sport;
ed1f0be2
JL
3297 }
3298
3299 if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
3300 nk->port[pd->didx] != dport) {
3301 pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
3302 &th->th_sum, &nk->addr[pd->didx],
3303 nk->port[pd->didx], 0, af);
3304 dport = th->th_dport;
3305 pd->dport = &th->th_dport;
3306 }
3307 rewrite++;
3308 break;
3309 case IPPROTO_UDP:
3310 bproto_sum = pd->hdr.udp->uh_sum;
3311 pd->proto_sum = &pd->hdr.udp->uh_sum;
3312
3313 if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
3314 nk->port[pd->sidx] != sport) {
315a7da3
JL
3315 pf_change_ap(saddr, &pd->hdr.udp->uh_sport,
3316 pd->ip_sum, &pd->hdr.udp->uh_sum,
ed1f0be2
JL
3317 &nk->addr[pd->sidx],
3318 nk->port[pd->sidx], 1, af);
315a7da3 3319 sport = pd->hdr.udp->uh_sport;
ed1f0be2
JL
3320 pd->sport = &pd->hdr.udp->uh_sport;
3321 }
3322
3323 if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
3324 nk->port[pd->didx] != dport) {
3325 pf_change_ap(daddr, &pd->hdr.udp->uh_dport,
3326 pd->ip_sum, &pd->hdr.udp->uh_sum,
3327 &nk->addr[pd->didx],
3328 nk->port[pd->didx], 1, af);
3329 dport = pd->hdr.udp->uh_dport;
3330 pd->dport = &pd->hdr.udp->uh_dport;
3331 }
3332 rewrite++;
3333 break;
315a7da3 3334#ifdef INET
ed1f0be2
JL
3335 case IPPROTO_ICMP:
3336 nk->port[0] = nk->port[1];
3337 if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET))
315a7da3 3338 pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
ed1f0be2
JL
3339 nk->addr[pd->sidx].v4.s_addr, 0);
3340
3341 if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET))
3342 pf_change_a(&daddr->v4.s_addr, pd->ip_sum,
3343 nk->addr[pd->didx].v4.s_addr, 0);
3344
3345 if (nk->port[1] != pd->hdr.icmp->icmp_id) {
315a7da3 3346 pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
ed1f0be2
JL
3347 pd->hdr.icmp->icmp_cksum, sport,
3348 nk->port[1], 0);
3349 pd->hdr.icmp->icmp_id = nk->port[1];
3350 pd->sport = &pd->hdr.icmp->icmp_id;
3351 }
3352 m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
3353 break;
315a7da3
JL
3354#endif /* INET */
3355#ifdef INET6
ed1f0be2
JL
3356 case IPPROTO_ICMPV6:
3357 nk->port[0] = nk->port[1];
3358 if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET6))
315a7da3 3359 pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
ed1f0be2
JL
3360 &nk->addr[pd->sidx], 0);
3361
3362 if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET6))
3363 pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
3364 &nk->addr[pd->didx], 0);
3365 rewrite++;
3366 break;
315a7da3 3367#endif /* INET */
ed1f0be2
JL
3368 default:
3369 switch (af) {
315a7da3 3370#ifdef INET
ed1f0be2
JL
3371 case AF_INET:
3372 if (PF_ANEQ(saddr,
3373 &nk->addr[pd->sidx], AF_INET))
315a7da3 3374 pf_change_a(&saddr->v4.s_addr,
ed1f0be2
JL
3375 pd->ip_sum,
3376 nk->addr[pd->sidx].v4.s_addr, 0);
3377
3378 if (PF_ANEQ(daddr,
3379 &nk->addr[pd->didx], AF_INET))
3380 pf_change_a(&daddr->v4.s_addr,
3381 pd->ip_sum,
3382 nk->addr[pd->didx].v4.s_addr, 0);
3383 break;
315a7da3
JL
3384#endif /* INET */
3385#ifdef INET6
ed1f0be2
JL
3386 case AF_INET6:
3387 if (PF_ANEQ(saddr,
3388 &nk->addr[pd->sidx], AF_INET6))
3389 PF_ACPY(saddr, &nk->addr[pd->sidx], af);
315a7da3 3390
ed1f0be2
JL
3391 if (PF_ANEQ(daddr,
3392 &nk->addr[pd->didx], AF_INET6))
3393 PF_ACPY(saddr, &nk->addr[pd->didx], af);
315a7da3 3394 break;
315a7da3 3395#endif /* INET */
315a7da3 3396 }
ed1f0be2 3397 break;
02742ec6 3398 }
ed1f0be2
JL
3399 if (nr->natpass)
3400 r = NULL;
3401 pd->nat_rule = nr;
02742ec6
JS
3402 }
3403
3404 while (r != NULL) {
3405 r->evaluations++;
70224baa 3406 if (pfi_kif_match(r->kif, kif) == r->ifnot)
02742ec6
JS
3407 r = r->skip[PF_SKIP_IFP].ptr;
3408 else if (r->direction && r->direction != direction)
3409 r = r->skip[PF_SKIP_DIR].ptr;
3410 else if (r->af && r->af != af)
3411 r = r->skip[PF_SKIP_AF].ptr;
315a7da3 3412 else if (r->proto && r->proto != pd->proto)
02742ec6 3413 r = r->skip[PF_SKIP_PROTO].ptr;
70224baa
JL
3414 else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
3415 r->src.neg, kif))
02742ec6 3416 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
315a7da3 3417 /* tcp/udp only. port_op always 0 in other cases */
02742ec6 3418 else if (r->src.port_op && !pf_match_port(r->src.port_op,
315a7da3 3419 r->src.port[0], r->src.port[1], sport))
02742ec6 3420 r = r->skip[PF_SKIP_SRC_PORT].ptr;
70224baa
JL
3421 else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
3422 r->dst.neg, NULL))
02742ec6 3423 r = r->skip[PF_SKIP_DST_ADDR].ptr;
315a7da3 3424 /* tcp/udp only. port_op always 0 in other cases */
02742ec6 3425 else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
315a7da3 3426 r->dst.port[0], r->dst.port[1], dport))
02742ec6 3427 r = r->skip[PF_SKIP_DST_PORT].ptr;
315a7da3
JL
3428 /* icmp only. type always 0 in other cases */
3429 else if (r->type && r->type != icmptype + 1)
3430 r = TAILQ_NEXT(r, entries);
3431 /* icmp only. type always 0 in other cases */
3432 else if (r->code && r->code != icmpcode + 1)
3433 r = TAILQ_NEXT(r, entries);
70224baa 3434 else if (r->tos && !(r->tos == pd->tos))
02742ec6
JS
3435 r = TAILQ_NEXT(r, entries);
3436 else if (r->rule_flag & PFRULE_FRAGMENT)
3437 r = TAILQ_NEXT(r, entries);
315a7da3
JL
3438 else if (pd->proto == IPPROTO_TCP &&
3439 (r->flagset & th->th_flags) != r->flags)
02742ec6 3440 r = TAILQ_NEXT(r, entries);
315a7da3 3441 /* tcp/udp only. uid.op always 0 in other cases */
70224baa 3442 else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
315a7da3 3443 pf_socket_lookup(direction, pd), 1)) &&
02742ec6 3444 !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
70224baa 3445 pd->lookup.uid))
02742ec6 3446 r = TAILQ_NEXT(r, entries);
315a7da3 3447 /* tcp/udp only. gid.op always 0 in other cases */
70224baa 3448 else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
315a7da3 3449 pf_socket_lookup(direction, pd), 1)) &&
02742ec6 3450 !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
70224baa 3451 pd->lookup.gid))
02742ec6 3452 r = TAILQ_NEXT(r, entries);
ed1f0be2
JL
3453 else if (r->prob &&
3454 r->prob <= karc4random())
75fda04a 3455 r = TAILQ_NEXT(r, entries);
315a7da3 3456 else if (r->match_tag && !pf_match_tag(m, r, &tag))
02742ec6 3457 r = TAILQ_NEXT(r, entries);
315a7da3
JL
3458 else if (r->os_fingerprint != PF_OSFP_ANY &&
3459 (pd->proto != IPPROTO_TCP || !pf_osfp_match(
3460 pf_osfp_fingerprint(pd, m, off, th),
3461 r->os_fingerprint)))
02742ec6
JS
3462 r = TAILQ_NEXT(r, entries);
3463 else {
3464 if (r->tag)
3465 tag = r->tag;
70224baa
JL
3466 if (r->rtableid >= 0)
3467 rtableid = r->rtableid;
02742ec6 3468 if (r->anchor == NULL) {
70224baa 3469 match = 1;
02742ec6
JS
3470 *rm = r;
3471 *am = a;
3472 *rsm = ruleset;
3473 if ((*rm)->quick)
3474 break;
3475 r = TAILQ_NEXT(r, entries);
3476 } else
70224baa
JL
3477 pf_step_into_anchor(&asd, &ruleset,
3478 PF_RULESET_FILTER, &r, &a, &match);
02742ec6 3479 }
70224baa
JL
3480 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
3481 PF_RULESET_FILTER, &r, &a, &match))
3482 break;
02742ec6
JS
3483 }
3484 r = *rm;
3485 a = *am;
3486 ruleset = *rsm;
3487
3488 REASON_SET(&reason, PFRES_MATCH);
3489
315a7da3 3490 if (r->log || (nr != NULL && nr->log)) {
02742ec6 3491 if (rewrite)
315a7da3 3492 m_copyback(m, off, hdrlen, pd->hdr.any);
70224baa
JL
3493 PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
3494 a, ruleset, pd);
02742ec6
JS
3495 }
3496
3497 if ((r->action == PF_DROP) &&
3498 ((r->rule_flag & PFRULE_RETURNRST) ||
3499 (r->rule_flag & PFRULE_RETURNICMP) ||
3500 (r->rule_flag & PFRULE_RETURN))) {
3501 /* undo NAT changes, if they have taken place */
3502 if (nr != NULL) {
ed1f0be2
JL
3503 PF_ACPY(saddr, &sk->addr[pd->sidx], af);
3504 PF_ACPY(daddr, &sk->addr[pd->didx], af);
3505 if (pd->sport)
3506 *pd->sport = sk->port[pd->sidx];
3507 if (pd->dport)
3508 *pd->dport = sk->port[pd->didx];
3509 if (pd->proto_sum)
3510 *pd->proto_sum = bproto_sum;
3511 if (pd->ip_sum)
3512 *pd->ip_sum = bip_sum;
3513 m_copyback(m, off, hdrlen, pd->hdr.any);
02742ec6 3514 }
315a7da3
JL
3515 if (pd->proto == IPPROTO_TCP &&
3516 ((r->rule_flag & PFRULE_RETURNRST) ||
02742ec6
JS
3517 (r->rule_flag & PFRULE_RETURN)) &&
3518 !(th->th_flags & TH_RST)) {
315a7da3 3519 u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
ed1f0be2
JL
3520 int len = 0;
3521 struct ip *h4;
66ddb4d5 3522#ifdef INET6
ed1f0be2 3523 struct ip6_hdr *h6;
66ddb4d5 3524#endif
ed1f0be2
JL
3525 switch (af) {
3526 case AF_INET:
3527 h4 = mtod(m, struct ip *);
3528 len = h4->ip_len - off;
3529 break;
2dba2225 3530#ifdef INET6
ed1f0be2
JL
3531 case AF_INET6:
3532 h6 = mtod(m, struct ip6_hdr *);
3533 len = h6->ip6_plen - (off - sizeof(*h6));
3534 break;
2dba2225 3535#endif
ed1f0be2 3536 }
02742ec6 3537
ed1f0be2 3538 if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af))
315a7da3
JL
3539 REASON_SET(&reason, PFRES_PROTCKSUM);
3540 else {
3541 if (th->th_flags & TH_SYN)
3542 ack++;
3543 if (th->th_flags & TH_FIN)
3544 ack++;
3545 pf_send_tcp(r, af, pd->dst,
3546 pd->src, th->th_dport, th->th_sport,
3547 ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
3548 r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp);
3549 }
ed1f0be2
JL
3550 } else if (pd->proto != IPPROTO_ICMP && af == AF_INET &&
3551 r->return_icmp)
02742ec6
JS
3552 pf_send_icmp(m, r->return_icmp >> 8,
3553 r->return_icmp & 255, af, r);
ed1f0be2
JL
3554 else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 &&
3555 r->return_icmp6)
02742ec6
JS
3556 pf_send_icmp(m, r->return_icmp6 >> 8,
3557 r->return_icmp6 & 255, af, r);
3558 }
3559
315a7da3 3560 if (r->action == PF_DROP)
ed1f0be2 3561 goto cleanup;
02742ec6 3562
315a7da3 3563 if (pf_tag_packet(m, tag, rtableid)) {
70224baa 3564 REASON_SET(&reason, PFRES_MEMORY);
ed1f0be2 3565 goto cleanup;
70224baa 3566 }
02742ec6 3567
315a7da3
JL
3568 if (!state_icmp && (r->keep_state || nr != NULL ||
3569 (pd->flags & PFDESC_TCP_NORM))) {
ed1f0be2
JL
3570 int action;
3571 action = pf_create_state(r, nr, a, pd, nsn, skw, sks, nk, sk, m,
3572 off, sport, dport, &rewrite, kif, sm, tag, bproto_sum,
3573 bip_sum, hdrlen);
3574 if (action != PF_PASS)
3575 return (action);
3576 }
3577
3578 /* copy back packet headers if we performed NAT operations */
3579 if (rewrite)
3580 m_copyback(m, off, hdrlen, pd->hdr.any);
3581
3582 return (PF_PASS);
3583
02742ec6 3584cleanup:
ed1f0be2 3585 if (sk != NULL)
1186cbc0 3586 kfree(sk, M_PFSTATEKEYPL);
ed1f0be2 3587 if (nk != NULL)
1186cbc0 3588 kfree(nk, M_PFSTATEKEYPL);
ed1f0be2
JL
3589 return (PF_DROP);
3590}
02742ec6 3591
ed1f0be2
JL
3592static __inline int
3593pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a,
3594 struct pf_pdesc *pd, struct pf_src_node *nsn, struct pf_state_key *skw,
3595 struct pf_state_key *sks, struct pf_state_key *nk, struct pf_state_key *sk,
3596 struct mbuf *m, int off, u_int16_t sport, u_int16_t dport, int *rewrite,
3597 struct pfi_kif *kif, struct pf_state **sm, int tag, u_int16_t bproto_sum,
3598 u_int16_t bip_sum, int hdrlen)
3599{
3600 struct pf_state *s = NULL;
3601 struct pf_src_node *sn = NULL;
3602 struct tcphdr *th = pd->hdr.tcp;
3603 u_int16_t mss = tcp_mssdflt;
3604 u_short reason;
315a7da3 3605
ed1f0be2
JL
3606 /* check maximums */
3607 if (r->max_states && (r->states_cur >= r->max_states)) {
3608 pf_status.lcounters[LCNT_STATES]++;
3609 REASON_SET(&reason, PFRES_MAXSTATES);
3610 return (PF_DROP);
3611 }
3612 /* src node for filter rule */
3613 if ((r->rule_flag & PFRULE_SRCTRACK ||
3614 r->rpool.opts & PF_POOL_STICKYADDR) &&
3615 pf_insert_src_node(&sn, r, pd->src, pd->af) != 0) {
3616 REASON_SET(&reason, PFRES_SRCLIMIT);
3617 goto csfailed;
3618 }
3619 /* src node for translation rule */
3620 if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3621 pf_insert_src_node(&nsn, nr, &sk->addr[pd->sidx], pd->af)) {
3622 REASON_SET(&reason, PFRES_SRCLIMIT);
3623 goto csfailed;
3624 }
1186cbc0 3625 s = kmalloc(sizeof(struct pf_state), M_PFSTATEPL, M_NOWAIT|M_ZERO);
ed1f0be2
JL
3626 if (s == NULL) {
3627 REASON_SET(&reason, PFRES_MEMORY);
3628 goto csfailed;
3629 }
3630 s->id = 0; /* XXX Do we really need that? not in OpenBSD */
3631 s->creatorid = 0;
3632 s->rule.ptr = r;
3633 s->nat_rule.ptr = nr;
3634 s->anchor.ptr = a;
3635 STATE_INC_COUNTERS(s);
3636 if (r->allow_opts)
3637 s->state_flags |= PFSTATE_ALLOWOPTS;
3638 if (r->rule_flag & PFRULE_STATESLOPPY)
3639 s->state_flags |= PFSTATE_SLOPPY;
3640 s->log = r->log & PF_LOG_ALL;
3641 if (nr != NULL)
3642 s->log |= nr->log & PF_LOG_ALL;
3643 switch (pd->proto) {
3644 case IPPROTO_TCP:
3645 s->src.seqlo = ntohl(th->th_seq);
3646 s->src.seqhi = s->src.seqlo + pd->p_len + 1;
3647 if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
3648 r->keep_state == PF_STATE_MODULATE) {
3649 /* Generate sequence number modulator */
3650 if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) ==
3651 0)
3652 s->src.seqdiff = 1;
3653 pf_change_a(&th->th_seq, &th->th_sum,
3654 htonl(s->src.seqlo + s->src.seqdiff), 0);
3655 *rewrite = 1;
3656 } else
3657 s->src.seqdiff = 0;
3658 if (th->th_flags & TH_SYN) {
3659 s->src.seqhi++;
3660 s->src.wscale = pf_get_wscale(m, off,
3661 th->th_off, pd->af);
315a7da3 3662 }
ed1f0be2
JL
3663 s->src.max_win = MAX(ntohs(th->th_win), 1);
3664 if (s->src.wscale & PF_WSCALE_MASK) {
3665 /* Remove scale factor from initial window */
3666 int win = s->src.max_win;
3667 win += 1 << (s->src.wscale & PF_WSCALE_MASK);
3668 s->src.max_win = (win - 1) >>
3669 (s->src.wscale & PF_WSCALE_MASK);
315a7da3 3670 }
ed1f0be2
JL
3671 if (th->th_flags & TH_FIN)
3672 s->src.seqhi++;
3673 s->dst.seqhi = 1;
3674 s->dst.max_win = 1;
3675 s->src.state = TCPS_SYN_SENT;
3676 s->dst.state = TCPS_CLOSED;
3677 s->timeout = PFTM_TCP_FIRST_PACKET;
3678 break;
3679 case IPPROTO_UDP:
3680 s->src.state = PFUDPS_SINGLE;
3681 s->dst.state = PFUDPS_NO_TRAFFIC;
3682 s->timeout = PFTM_UDP_FIRST_PACKET;
3683 break;
3684 case IPPROTO_ICMP:
315a7da3 3685#ifdef INET6
ed1f0be2 3686 case IPPROTO_ICMPV6:
315a7da3 3687#endif
ed1f0be2
JL
3688 s->timeout = PFTM_ICMP_FIRST_PACKET;
3689 break;
3690 default:
3691 s->src.state = PFOTHERS_SINGLE;
3692 s->dst.state = PFOTHERS_NO_TRAFFIC;
3693 s->timeout = PFTM_OTHER_FIRST_PACKET;
3694 }
315a7da3 3695
ed1f0be2
JL
3696 s->creation = time_second;
3697 s->expire = time_second;
315a7da3 3698
ed1f0be2
JL
3699 if (sn != NULL) {
3700 s->src_node = sn;
3701 s->src_node->states++;
3702 }
3703 if (nsn != NULL) {
3704 /* XXX We only modify one side for now. */
3705 PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af);
3706 s->nat_src_node = nsn;
3707 s->nat_src_node->states++;
3708 }
3709 if (pd->proto == IPPROTO_TCP) {
3710 if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
3711 off, pd, th, &s->src, &s->dst)) {
3712 REASON_SET(&reason, PFRES_MEMORY);
02742ec6 3713 pf_src_tree_remove_state(s);
70224baa 3714 STATE_DEC_COUNTERS(s);
1186cbc0 3715 kfree(s, M_PFSTATEPL);
02742ec6 3716 return (PF_DROP);
70224baa 3717 }
ed1f0be2
JL
3718 if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
3719 pf_normalize_tcp_stateful(m, off, pd, &reason, th, s,
3720 &s->src, &s->dst, rewrite)) {
3721 /* This really shouldn't happen!!! */
3722 DPFPRINTF(PF_DEBUG_URGENT,
3723 ("pf_normalize_tcp_stateful failed on first pkt"));
3724 pf_normalize_tcp_cleanup(s);
3725 pf_src_tree_remove_state(s);
3726 STATE_DEC_COUNTERS(s);
1186cbc0 3727 kfree(s, M_PFSTATEPL);
ed1f0be2 3728 return (PF_DROP);
315a7da3 3729 }
02742ec6 3730 }
ed1f0be2 3731 s->direction = pd->dir;
02742ec6 3732
ed1f0be2
JL
3733 if (sk == NULL && pf_state_key_setup(pd, nr, &skw, &sks, &sk, &nk,
3734 pd->src, pd->dst, sport, dport))
3735 goto csfailed;
3736
3737 if (pf_state_insert(BOUND_IFACE(r, kif), skw, sks, s)) {
3738 if (pd->proto == IPPROTO_TCP)
3739 pf_normalize_tcp_cleanup(s);
3740 REASON_SET(&reason, PFRES_STATEINS);
3741 pf_src_tree_remove_state(s);
3742 STATE_DEC_COUNTERS(s);
1186cbc0 3743 kfree(s, M_PFSTATEPL);
ed1f0be2
JL
3744 return (PF_DROP);
3745 } else
3746 *sm = s;
3747
3748 pf_set_rt_ifp(s, pd->src); /* needs s->state_key set */
3749 if (tag > 0) {
3750 pf_tag_ref(tag);
3751 s->tag = tag;
3752 }
3753 if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) ==
3754 TH_SYN && r->keep_state == PF_STATE_SYNPROXY) {
3755 s->src.state = PF_TCPS_PROXY_SRC;
3756 /* undo NAT changes, if they have taken place */
3757 if (nr != NULL) {
3758 struct pf_state_key *skt = s->key[PF_SK_WIRE];
3759 if (pd->dir == PF_OUT)
3760 skt = s->key[PF_SK_STACK];
3761 PF_ACPY(pd->src, &skt->addr[pd->sidx], pd->af);
3762 PF_ACPY(pd->dst, &skt->addr[pd->didx], pd->af);
3763 if (pd->sport)
3764 *pd->sport = skt->port[pd->sidx];
3765 if (pd->dport)
3766 *pd->dport = skt->port[pd->didx];
3767 if (pd->proto_sum)
3768 *pd->proto_sum = bproto_sum;
3769 if (pd->ip_sum)
3770 *pd->ip_sum = bip_sum;
3771 m_copyback(m, off, hdrlen, pd->hdr.any);
3772 }
3773 s->src.seqhi = htonl(karc4random());
3774 /* Find mss option */
3775 mss = pf_get_mss(m, off, th->th_off, pd->af);
3776 mss = pf_calc_mss(pd->src, pd->af, mss);
3777 mss = pf_calc_mss(pd->dst, pd->af, mss);
3778 s->src.mss = mss;
3779 pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport,
3780 th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
3781 TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL);
3782 REASON_SET(&reason, PFRES_SYNPROXY);
3783 return (PF_SYNPROXY_DROP);
3784 }
315a7da3 3785
02742ec6 3786 return (PF_PASS);
ed1f0be2
JL
3787
3788csfailed:
3789 if (sk != NULL)
1186cbc0 3790 kfree(sk, M_PFSTATEKEYPL);
ed1f0be2 3791 if (nk != NULL)
1186cbc0 3792 kfree(nk, M_PFSTATEKEYPL);
ed1f0be2
JL
3793
3794 if (sn != NULL && sn->states == 0 && sn->expire == 0) {
3795 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
3796 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3797 pf_status.src_nodes--;
1186cbc0 3798 kfree(sn, M_PFSRCTREEPL);
ed1f0be2
JL
3799 }
3800 if (nsn != sn && nsn != NULL && nsn->states == 0 && nsn->expire == 0) {
3801 RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
3802 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3803 pf_status.src_nodes--;
1186cbc0 3804 kfree(nsn, M_PFSRCTREEPL);
ed1f0be2
JL
3805 }
3806 return (PF_DROP);
02742ec6
JS
3807}
3808
3809int
3810pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
3811 struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
3812 struct pf_ruleset **rsm)
3813{
3814 struct pf_rule *r, *a = NULL;
3815 struct pf_ruleset *ruleset = NULL;
3816 sa_family_t af = pd->af;
3817 u_short reason;
3818 int tag = -1;
70224baa
JL
3819 int asd = 0;
3820 int match = 0;
02742ec6
JS
3821
3822 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3823 while (r != NULL) {
3824 r->evaluations++;
70224baa 3825 if (pfi_kif_match(r->kif, kif) == r->ifnot)
02742ec6
JS
3826 r = r->skip[PF_SKIP_IFP].ptr;
3827 else if (r->direction && r->direction != direction)
3828 r = r->skip[PF_SKIP_DIR].ptr;
3829 else if (r->af && r->af != af)
3830 r = r->skip[PF_SKIP_AF].ptr;
3831 else if (r->proto && r->proto != pd->proto)
3832 r = r->skip[PF_SKIP_PROTO].ptr;
70224baa
JL
3833 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
3834 r->src.neg, kif))
02742ec6 3835 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
70224baa
JL
3836 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
3837 r->dst.neg, NULL))
02742ec6 3838 r = r->skip[PF_SKIP_DST_ADDR].ptr;
70224baa 3839 else if (r->tos && !(r->tos == pd->tos))
02742ec6 3840 r = TAILQ_NEXT(r, entries);
a814431a
MD
3841 else if (r->os_fingerprint != PF_OSFP_ANY)
3842 r = TAILQ_NEXT(r, entries);
3843 else if (pd->proto == IPPROTO_UDP &&
3844 (r->src.port_op || r->dst.port_op))
3845 r = TAILQ_NEXT(r, entries);
3846 else if (pd->proto == IPPROTO_TCP &&
3847 (r->src.port_op || r->dst.port_op || r->flagset))
3848 r = TAILQ_NEXT(r, entries);
3849 else if ((pd->proto == IPPROTO_ICMP ||
3850 pd->proto == IPPROTO_ICMPV6) &&
3851 (r->type || r->code))
02742ec6 3852 r = TAILQ_NEXT(r, entries);
75fda04a
MD
3853 else if (r->prob && r->prob <= karc4random())
3854 r = TAILQ_NEXT(r, entries);
315a7da3 3855 else if (r->match_tag && !pf_match_tag(m, r, &tag))
02742ec6 3856 r = TAILQ_NEXT(r, entries);
02742ec6
JS
3857 else {
3858 if (r->anchor == NULL) {
70224baa 3859 match = 1;
02742ec6
JS
3860 *rm = r;
3861 *am = a;
3862 *rsm = ruleset;
3863 if ((*rm)->quick)
3864 break;
3865 r = TAILQ_NEXT(r, entries);
3866 } else