apic: Clear all entries in int table
[dragonfly.git] / sys / net / pf / pf.c
CommitLineData
05ac5751 1/* $OpenBSD: pf.c,v 1.552.2.1 2007/11/27 16:37:57 henning Exp $ */
21fe214d
JL
2/* add $OpenBSD: pf.c,v 1.553 2007/08/23 11:15:49 dhartmei Exp $ */
3/* add $OpenBSD: pf.c,v 1.554 2007/08/28 16:09:12 henning Exp $ */
4
02742ec6
JS
5
6/*
7 * Copyright (c) 2004 The DragonFly Project. All rights reserved.
8 *
9 * Copyright (c) 2001 Daniel Hartmeier
10 * Copyright (c) 2002,2003 Henning Brauer
11 * All rights reserved.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 *
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials provided
22 * with the distribution.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
28 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
29 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
34 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Effort sponsored in part by the Defense Advanced Research Projects
38 * Agency (DARPA) and Air Force Research Laboratory, Air Force
39 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
40 *
41 */
42
43#include "opt_inet.h"
44#include "opt_inet6.h"
45#include "use_pfsync.h"
46
47#include <sys/param.h>
48#include <sys/systm.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/filio.h>
52#include <sys/socket.h>
53#include <sys/socketvar.h>
54#include <sys/kernel.h>
55#include <sys/time.h>
56#include <sys/sysctl.h>
57#include <sys/endian.h>
58#include <vm/vm_zone.h>
70224baa
JL
59#include <sys/proc.h>
60#include <sys/kthread.h>
02742ec6
JS
61
62#include <machine/inttypes.h>
63
64#include <net/if.h>
65#include <net/if_types.h>
66#include <net/bpf.h>
4599cf19 67#include <net/netisr.h>
02742ec6
JS
68#include <net/route.h>
69
70#include <netinet/in.h>
71#include <netinet/in_var.h>
72#include <netinet/in_systm.h>
73#include <netinet/ip.h>
74#include <netinet/ip_var.h>
75#include <netinet/tcp.h>
76#include <netinet/tcp_seq.h>
77#include <netinet/udp.h>
78#include <netinet/ip_icmp.h>
79#include <netinet/in_pcb.h>
80#include <netinet/tcp_timer.h>
81#include <netinet/tcp_var.h>
82#include <netinet/udp_var.h>
83#include <netinet/icmp_var.h>
70224baa 84#include <netinet/if_ether.h>
02742ec6
JS
85
86#include <net/pf/pfvar.h>
87#include <net/pf/if_pflog.h>
88
89#if NPFSYNC > 0
90#include <net/pf/if_pfsync.h>
91#endif /* NPFSYNC > 0 */
92
93#ifdef INET6
94#include <netinet/ip6.h>
95#include <netinet/in_pcb.h>
96#include <netinet/icmp6.h>
97#include <netinet6/nd6.h>
98#include <netinet6/ip6_var.h>
99#include <netinet6/in6_pcb.h>
100#endif /* INET6 */
101
102#include <sys/in_cksum.h>
4599cf19 103#include <sys/ucred.h>
02742ec6
JS
104#include <machine/limits.h>
105#include <sys/msgport2.h>
4599cf19 106#include <net/netmsg2.h>
02742ec6
JS
107
108extern int ip_optcopy(struct ip *, struct ip *);
70224baa 109extern int debug_pfugidhack;
02742ec6 110
2a7a2b1c
JL
111struct lwkt_token pf_token = LWKT_TOKEN_MP_INITIALIZER(pf_token);
112
4b1cf444 113#define DPFPRINTF(n, x) if (pf_status.debug >= (n)) kprintf x
02742ec6
JS
114
115/*
116 * Global variables
117 */
118
315a7da3
JL
119/* state tables */
120struct pf_state_tree_lan_ext pf_statetbl_lan_ext;
121struct pf_state_tree_ext_gwy pf_statetbl_ext_gwy;
122
02742ec6
JS
123struct pf_altqqueue pf_altqs[2];
124struct pf_palist pf_pabuf;
125struct pf_altqqueue *pf_altqs_active;
126struct pf_altqqueue *pf_altqs_inactive;
127struct pf_status pf_status;
128
129u_int32_t ticket_altqs_active;
130u_int32_t ticket_altqs_inactive;
131int altqs_inactive_open;
132u_int32_t ticket_pabuf;
133
70224baa
JL
134struct pf_anchor_stackframe {
135 struct pf_ruleset *rs;
136 struct pf_rule *r;
137 struct pf_anchor_node *parent;
138 struct pf_anchor *child;
139} pf_anchor_stack[64];
02742ec6 140
315a7da3
JL
141vm_zone_t pf_src_tree_pl, pf_rule_pl, pf_pooladdr_pl;
142vm_zone_t pf_state_pl, pf_state_key_pl;
143vm_zone_t pf_altq_pl;
02742ec6
JS
144
145void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
02742ec6 146
70224baa
JL
147void pf_init_threshold(struct pf_threshold *, u_int32_t,
148 u_int32_t);
149void pf_add_threshold(struct pf_threshold *);
150int pf_check_threshold(struct pf_threshold *);
151
02742ec6
JS
152void pf_change_ap(struct pf_addr *, u_int16_t *,
153 u_int16_t *, u_int16_t *, struct pf_addr *,
154 u_int16_t, u_int8_t, sa_family_t);
70224baa
JL
155int pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *,
156 struct tcphdr *, struct pf_state_peer *);
02742ec6
JS
157#ifdef INET6
158void pf_change_a6(struct pf_addr *, u_int16_t *,
159 struct pf_addr *, u_int8_t);
160#endif /* INET6 */
161void pf_change_icmp(struct pf_addr *, u_int16_t *,
162 struct pf_addr *, struct pf_addr *, u_int16_t,
163 u_int16_t *, u_int16_t *, u_int16_t *,
164 u_int16_t *, u_int8_t, sa_family_t);
165void pf_send_tcp(const struct pf_rule *, sa_family_t,
166 const struct pf_addr *, const struct pf_addr *,
167 u_int16_t, u_int16_t, u_int32_t, u_int32_t,
70224baa
JL
168 u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
169 u_int16_t, struct ether_header *, struct ifnet *);
02742ec6
JS
170void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
171 sa_family_t, struct pf_rule *);
172struct pf_rule *pf_match_translation(struct pf_pdesc *, struct mbuf *,
173 int, int, struct pfi_kif *,
174 struct pf_addr *, u_int16_t, struct pf_addr *,
175 u_int16_t, int);
176struct pf_rule *pf_get_translation(struct pf_pdesc *, struct mbuf *,
177 int, int, struct pfi_kif *, struct pf_src_node **,
178 struct pf_addr *, u_int16_t,
179 struct pf_addr *, u_int16_t,
180 struct pf_addr *, u_int16_t *);
315a7da3
JL
181void pf_attach_state(struct pf_state_key *,
182 struct pf_state *, int);
183void pf_detach_state(struct pf_state *, int);
184int pf_test_rule(struct pf_rule **, struct pf_state **,
02742ec6
JS
185 int, struct pfi_kif *, struct mbuf *, int,
186 void *, struct pf_pdesc *, struct pf_rule **,
70224baa 187 struct pf_ruleset **, struct ifqueue *, struct inpcb *);
02742ec6
JS
188int pf_test_fragment(struct pf_rule **, int,
189 struct pfi_kif *, struct mbuf *, void *,
190 struct pf_pdesc *, struct pf_rule **,
191 struct pf_ruleset **);
192int pf_test_state_tcp(struct pf_state **, int,
193 struct pfi_kif *, struct mbuf *, int,
194 void *, struct pf_pdesc *, u_short *);
195int pf_test_state_udp(struct pf_state **, int,
196 struct pfi_kif *, struct mbuf *, int,
197 void *, struct pf_pdesc *);
198int pf_test_state_icmp(struct pf_state **, int,
199 struct pfi_kif *, struct mbuf *, int,
70224baa 200 void *, struct pf_pdesc *, u_short *);
02742ec6
JS
201int pf_test_state_other(struct pf_state **, int,
202 struct pfi_kif *, struct pf_pdesc *);
315a7da3
JL
203int pf_match_tag(struct mbuf *, struct pf_rule *, int *);
204void pf_step_into_anchor(int *, struct pf_ruleset **, int,
205 struct pf_rule **, struct pf_rule **, int *);
70224baa
JL
206int pf_step_out_of_anchor(int *, struct pf_ruleset **,
207 int, struct pf_rule **, struct pf_rule **,
208 int *);
02742ec6
JS
209void pf_hash(struct pf_addr *, struct pf_addr *,
210 struct pf_poolhashkey *, sa_family_t);
211int pf_map_addr(u_int8_t, struct pf_rule *,
212 struct pf_addr *, struct pf_addr *,
213 struct pf_addr *, struct pf_src_node **);
214int pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *,
215 struct pf_addr *, struct pf_addr *, u_int16_t,
216 struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t,
217 struct pf_src_node **);
218void pf_route(struct mbuf **, struct pf_rule *, int,
70224baa
JL
219 struct ifnet *, struct pf_state *,
220 struct pf_pdesc *);
02742ec6 221void pf_route6(struct mbuf **, struct pf_rule *, int,
70224baa
JL
222 struct ifnet *, struct pf_state *,
223 struct pf_pdesc *);
02742ec6
JS
224u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t,
225 sa_family_t);
226u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t,
227 sa_family_t);
228u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t,
229 u_int16_t);
230void pf_set_rt_ifp(struct pf_state *,
231 struct pf_addr *);
232int pf_check_proto_cksum(struct mbuf *, int, int,
233 u_int8_t, sa_family_t);
234int pf_addr_wrap_neq(struct pf_addr_wrap *,
235 struct pf_addr_wrap *);
315a7da3
JL
236struct pf_state *pf_find_state(struct pfi_kif *,
237 struct pf_state_key_cmp *, u_int8_t);
70224baa 238int pf_src_connlimit(struct pf_state **);
315a7da3
JL
239void pf_stateins_err(const char *, struct pf_state *,
240 struct pfi_kif *);
70224baa
JL
241int pf_check_congestion(struct ifqueue *);
242
243extern int pf_end_threads;
244
245struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
246 { &pf_state_pl, PFSTATE_HIWAT },
247 { &pf_src_tree_pl, PFSNODE_HIWAT },
248 { &pf_frent_pl, PFFRAG_FRENT_HIWAT },
249 { &pfr_ktable_pl, PFR_KTABLE_HIWAT },
250 { &pfr_kentry_pl, PFR_KENTRY_HIWAT }
251};
02742ec6
JS
252
253#define STATE_LOOKUP() \
254 do { \
255 if (direction == PF_IN) \
315a7da3 256 *state = pf_find_state(kif, &key, PF_EXT_GWY); \
02742ec6 257 else \
315a7da3 258 *state = pf_find_state(kif, &key, PF_LAN_EXT); \
70224baa 259 if (*state == NULL || (*state)->timeout == PFTM_PURGE) \
02742ec6
JS
260 return (PF_DROP); \
261 if (direction == PF_OUT && \
262 (((*state)->rule.ptr->rt == PF_ROUTETO && \
263 (*state)->rule.ptr->direction == PF_OUT) || \
264 ((*state)->rule.ptr->rt == PF_REPLYTO && \
265 (*state)->rule.ptr->direction == PF_IN)) && \
266 (*state)->rt_kif != NULL && \
267 (*state)->rt_kif != kif) \
268 return (PF_PASS); \
269 } while (0)
270
315a7da3
JL
271#define STATE_TRANSLATE(sk) \
272 (sk)->lan.addr.addr32[0] != (sk)->gwy.addr.addr32[0] || \
273 ((sk)->af == AF_INET6 && \
274 ((sk)->lan.addr.addr32[1] != (sk)->gwy.addr.addr32[1] || \
275 (sk)->lan.addr.addr32[2] != (sk)->gwy.addr.addr32[2] || \
276 (sk)->lan.addr.addr32[3] != (sk)->gwy.addr.addr32[3])) || \
277 (sk)->lan.port != (sk)->gwy.port
02742ec6 278
70224baa
JL
279#define BOUND_IFACE(r, k) \
280 ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all
281
282#define STATE_INC_COUNTERS(s) \
283 do { \
284 s->rule.ptr->states++; \
285 if (s->anchor.ptr != NULL) \
286 s->anchor.ptr->states++; \
287 if (s->nat_rule.ptr != NULL) \
288 s->nat_rule.ptr->states++; \
289 } while (0)
290
291#define STATE_DEC_COUNTERS(s) \
292 do { \
293 if (s->nat_rule.ptr != NULL) \
294 s->nat_rule.ptr->states--; \
295 if (s->anchor.ptr != NULL) \
296 s->anchor.ptr->states--; \
297 s->rule.ptr->states--; \
298 } while (0)
02742ec6 299
70224baa 300static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
315a7da3
JL
301static __inline int pf_state_compare_lan_ext(struct pf_state_key *,
302 struct pf_state_key *);
303static __inline int pf_state_compare_ext_gwy(struct pf_state_key *,
304 struct pf_state_key *);
70224baa 305static __inline int pf_state_compare_id(struct pf_state *,
02742ec6
JS
306 struct pf_state *);
307
308struct pf_src_tree tree_src_tracking;
309
310struct pf_state_tree_id tree_id;
70224baa 311struct pf_state_queue state_list;
02742ec6
JS
312
313RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
315a7da3
JL
314RB_GENERATE(pf_state_tree_lan_ext, pf_state_key,
315 entry_lan_ext, pf_state_compare_lan_ext);
316RB_GENERATE(pf_state_tree_ext_gwy, pf_state_key,
317 entry_ext_gwy, pf_state_compare_ext_gwy);
02742ec6 318RB_GENERATE(pf_state_tree_id, pf_state,
315a7da3
JL
319 entry_id, pf_state_compare_id);
320
321#define PF_DT_SKIP_LANEXT 0x01
322#define PF_DT_SKIP_EXTGWY 0x02
02742ec6 323
70224baa 324static __inline int
02742ec6
JS
325pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
326{
327 int diff;
328
329 if (a->rule.ptr > b->rule.ptr)
330 return (1);
331 if (a->rule.ptr < b->rule.ptr)
332 return (-1);
333 if ((diff = a->af - b->af) != 0)
334 return (diff);
335 switch (a->af) {
336#ifdef INET
337 case AF_INET:
338 if (a->addr.addr32[0] > b->addr.addr32[0])
339 return (1);
340 if (a->addr.addr32[0] < b->addr.addr32[0])
341 return (-1);
342 break;
343#endif /* INET */
344#ifdef INET6
345 case AF_INET6:
346 if (a->addr.addr32[3] > b->addr.addr32[3])
347 return (1);
348 if (a->addr.addr32[3] < b->addr.addr32[3])
349 return (-1);
350 if (a->addr.addr32[2] > b->addr.addr32[2])
351 return (1);
352 if (a->addr.addr32[2] < b->addr.addr32[2])
353 return (-1);
354 if (a->addr.addr32[1] > b->addr.addr32[1])
355 return (1);
356 if (a->addr.addr32[1] < b->addr.addr32[1])
357 return (-1);
358 if (a->addr.addr32[0] > b->addr.addr32[0])
359 return (1);
360 if (a->addr.addr32[0] < b->addr.addr32[0])
361 return (-1);
362 break;
363#endif /* INET6 */
364 }
365 return (0);
366}
367
a814431a 368u_int32_t
315a7da3 369pf_state_hash(struct pf_state_key *sk)
5950bf01 370{
315a7da3 371 u_int32_t hv = (intptr_t)sk / sizeof(*sk);
5950bf01 372
315a7da3
JL
373 hv ^= crc32(&sk->lan, sizeof(sk->lan));
374 hv ^= crc32(&sk->gwy, sizeof(sk->gwy));
375 hv ^= crc32(&sk->ext, sizeof(sk->ext));
a814431a
MD
376 if (hv == 0) /* disallow 0 */
377 hv = 1;
5950bf01
MD
378 return(hv);
379}
380
70224baa 381static __inline int
315a7da3 382pf_state_compare_lan_ext(struct pf_state_key *a, struct pf_state_key *b)
02742ec6
JS
383{
384 int diff;
385
386 if ((diff = a->proto - b->proto) != 0)
387 return (diff);
388 if ((diff = a->af - b->af) != 0)
389 return (diff);
390 switch (a->af) {
391#ifdef INET
392 case AF_INET:
393 if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
394 return (1);
395 if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
396 return (-1);
397 if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
398 return (1);
399 if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
400 return (-1);
401 break;
402#endif /* INET */
403#ifdef INET6
404 case AF_INET6:
405 if (a->lan.addr.addr32[3] > b->lan.addr.addr32[3])
406 return (1);
407 if (a->lan.addr.addr32[3] < b->lan.addr.addr32[3])
408 return (-1);
409 if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
410 return (1);
411 if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
412 return (-1);
413 if (a->lan.addr.addr32[2] > b->lan.addr.addr32[2])
414 return (1);
415 if (a->lan.addr.addr32[2] < b->lan.addr.addr32[2])
416 return (-1);
417 if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
418 return (1);
419 if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
420 return (-1);
421 if (a->lan.addr.addr32[1] > b->lan.addr.addr32[1])
422 return (1);
423 if (a->lan.addr.addr32[1] < b->lan.addr.addr32[1])
424 return (-1);
425 if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
426 return (1);
427 if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
428 return (-1);
429 if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
430 return (1);
431 if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
432 return (-1);
433 if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
434 return (1);
435 if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
436 return (-1);
437 break;
438#endif /* INET6 */
439 }
440
441 if ((diff = a->lan.port - b->lan.port) != 0)
442 return (diff);
443 if ((diff = a->ext.port - b->ext.port) != 0)
444 return (diff);
445
446 return (0);
447}
448
70224baa 449static __inline int
315a7da3 450pf_state_compare_ext_gwy(struct pf_state_key *a, struct pf_state_key *b)
02742ec6
JS
451{
452 int diff;
453
454 if ((diff = a->proto - b->proto) != 0)
455 return (diff);
456 if ((diff = a->af - b->af) != 0)
457 return (diff);
458 switch (a->af) {
459#ifdef INET
460 case AF_INET:
461 if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
462 return (1);
463 if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
464 return (-1);
465 if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
466 return (1);
467 if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
468 return (-1);
469 break;
470#endif /* INET */
471#ifdef INET6
472 case AF_INET6:
473 if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
474 return (1);
475 if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
476 return (-1);
477 if (a->gwy.addr.addr32[3] > b->gwy.addr.addr32[3])
478 return (1);
479 if (a->gwy.addr.addr32[3] < b->gwy.addr.addr32[3])
480 return (-1);
481 if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
482 return (1);
483 if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
484 return (-1);
485 if (a->gwy.addr.addr32[2] > b->gwy.addr.addr32[2])
486 return (1);
487 if (a->gwy.addr.addr32[2] < b->gwy.addr.addr32[2])
488 return (-1);
489 if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
490 return (1);
491 if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
492 return (-1);
493 if (a->gwy.addr.addr32[1] > b->gwy.addr.addr32[1])
494 return (1);
495 if (a->gwy.addr.addr32[1] < b->gwy.addr.addr32[1])
496 return (-1);
497 if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
498 return (1);
499 if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
500 return (-1);
501 if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
502 return (1);
503 if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
504 return (-1);
505 break;
506#endif /* INET6 */
507 }
508
509 if ((diff = a->ext.port - b->ext.port) != 0)
510 return (diff);
511 if ((diff = a->gwy.port - b->gwy.port) != 0)
512 return (diff);
513
514 return (0);
515}
516
70224baa 517static __inline int
02742ec6
JS
518pf_state_compare_id(struct pf_state *a, struct pf_state *b)
519{
520 if (a->id > b->id)
521 return (1);
522 if (a->id < b->id)
523 return (-1);
524 if (a->creatorid > b->creatorid)
525 return (1);
526 if (a->creatorid < b->creatorid)
527 return (-1);
528
529 return (0);
530}
531
532#ifdef INET6
533void
534pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
535{
536 switch (af) {
537#ifdef INET
538 case AF_INET:
539 dst->addr32[0] = src->addr32[0];
540 break;
541#endif /* INET */
542 case AF_INET6:
543 dst->addr32[0] = src->addr32[0];
544 dst->addr32[1] = src->addr32[1];
545 dst->addr32[2] = src->addr32[2];
546 dst->addr32[3] = src->addr32[3];
547 break;
548 }
549}
70224baa 550#endif /* INET6 */
02742ec6
JS
551
552struct pf_state *
70224baa 553pf_find_state_byid(struct pf_state_cmp *key)
02742ec6
JS
554{
555 pf_status.fcounters[FCNT_STATE_SEARCH]++;
315a7da3 556
70224baa 557 return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key));
02742ec6
JS
558}
559
560struct pf_state *
315a7da3 561pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int8_t tree)
02742ec6 562{
315a7da3
JL
563 struct pf_state_key *sk;
564 struct pf_state *s;
02742ec6
JS
565
566 pf_status.fcounters[FCNT_STATE_SEARCH]++;
567
568 switch (tree) {
569 case PF_LAN_EXT:
315a7da3
JL
570 sk = RB_FIND(pf_state_tree_lan_ext, &pf_statetbl_lan_ext,
571 (struct pf_state_key *)key);
572 break;
02742ec6 573 case PF_EXT_GWY:
315a7da3
JL
574 sk = RB_FIND(pf_state_tree_ext_gwy, &pf_statetbl_ext_gwy,
575 (struct pf_state_key *)key);
576 break;
02742ec6 577 default:
315a7da3 578 panic("pf_find_state");
02742ec6 579 }
315a7da3
JL
580
581 /* list is sorted, if-bound states before floating ones */
aa1da187
MD
582 if (sk != NULL) {
583 TAILQ_FOREACH(s, &sk->states, next) {
315a7da3
JL
584 if (s->kif == pfi_all || s->kif == kif)
585 return (s);
aa1da187
MD
586 }
587 }
315a7da3
JL
588
589 return (NULL);
02742ec6
JS
590}
591
592struct pf_state *
315a7da3 593pf_find_state_all(struct pf_state_key_cmp *key, u_int8_t tree, int *more)
02742ec6 594{
315a7da3
JL
595 struct pf_state_key *sk;
596 struct pf_state *s, *ret = NULL;
02742ec6
JS
597
598 pf_status.fcounters[FCNT_STATE_SEARCH]++;
599
600 switch (tree) {
601 case PF_LAN_EXT:
315a7da3
JL
602 sk = RB_FIND(pf_state_tree_lan_ext,
603 &pf_statetbl_lan_ext, (struct pf_state_key *)key);
604 break;
02742ec6 605 case PF_EXT_GWY:
315a7da3
JL
606 sk = RB_FIND(pf_state_tree_ext_gwy,
607 &pf_statetbl_ext_gwy, (struct pf_state_key *)key);
608 break;
02742ec6
JS
609 default:
610 panic("pf_find_state_all");
611 }
315a7da3
JL
612
613 if (sk != NULL) {
614 ret = TAILQ_FIRST(&sk->states);
615 if (more == NULL)
616 return (ret);
617
618 TAILQ_FOREACH(s, &sk->states, next)
619 (*more)++;
620 }
621
622 return (ret);
02742ec6
JS
623}
624
70224baa
JL
625void
626pf_init_threshold(struct pf_threshold *threshold,
627 u_int32_t limit, u_int32_t seconds)
628{
629 threshold->limit = limit * PF_THRESHOLD_MULT;
630 threshold->seconds = seconds;
631 threshold->count = 0;
632 threshold->last = time_second;
633}
634
635void
636pf_add_threshold(struct pf_threshold *threshold)
637{
638 u_int32_t t = time_second, diff = t - threshold->last;
639
640 if (diff >= threshold->seconds)
641 threshold->count = 0;
642 else
643 threshold->count -= threshold->count * diff /
644 threshold->seconds;
645 threshold->count += PF_THRESHOLD_MULT;
646 threshold->last = t;
647}
648
649int
650pf_check_threshold(struct pf_threshold *threshold)
651{
652 return (threshold->count > threshold->limit);
653}
654
655int
656pf_src_connlimit(struct pf_state **state)
657{
70224baa
JL
658 int bad = 0;
659
660 (*state)->src_node->conn++;
661 (*state)->src.tcp_est = 1;
662 pf_add_threshold(&(*state)->src_node->conn_rate);
663
664 if ((*state)->rule.ptr->max_src_conn &&
665 (*state)->rule.ptr->max_src_conn <
666 (*state)->src_node->conn) {
667 pf_status.lcounters[LCNT_SRCCONN]++;
668 bad++;
669 }
670
671 if ((*state)->rule.ptr->max_src_conn_rate.limit &&
672 pf_check_threshold(&(*state)->src_node->conn_rate)) {
673 pf_status.lcounters[LCNT_SRCCONNRATE]++;
674 bad++;
675 }
676
677 if (!bad)
678 return (0);
679
680 if ((*state)->rule.ptr->overload_tbl) {
681 struct pfr_addr p;
682 u_int32_t killed = 0;
683
684 pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
685 if (pf_status.debug >= PF_DEBUG_MISC) {
686 kprintf("pf_src_connlimit: blocking address ");
687 pf_print_host(&(*state)->src_node->addr, 0,
315a7da3 688 (*state)->state_key->af);
70224baa
JL
689 }
690
691 bzero(&p, sizeof(p));
315a7da3
JL
692 p.pfra_af = (*state)->state_key->af;
693 switch ((*state)->state_key->af) {
70224baa
JL
694#ifdef INET
695 case AF_INET:
696 p.pfra_net = 32;
697 p.pfra_ip4addr = (*state)->src_node->addr.v4;
698 break;
699#endif /* INET */
700#ifdef INET6
701 case AF_INET6:
702 p.pfra_net = 128;
703 p.pfra_ip6addr = (*state)->src_node->addr.v6;
704 break;
705#endif /* INET6 */
706 }
707
708 pfr_insert_kentry((*state)->rule.ptr->overload_tbl,
709 &p, time_second);
710
711 /* kill existing states if that's required. */
712 if ((*state)->rule.ptr->flush) {
315a7da3
JL
713 struct pf_state_key *sk;
714 struct pf_state *st;
70224baa 715
315a7da3
JL
716 pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
717 RB_FOREACH(st, pf_state_tree_id, &tree_id) {
718 sk = st->state_key;
70224baa
JL
719 /*
720 * Kill states from this source. (Only those
721 * from the same rule if PF_FLUSH_GLOBAL is not
722 * set)
723 */
315a7da3
JL
724 if (sk->af ==
725 (*state)->state_key->af &&
726 (((*state)->state_key->direction ==
727 PF_OUT &&
70224baa 728 PF_AEQ(&(*state)->src_node->addr,
315a7da3
JL
729 &sk->lan.addr, sk->af)) ||
730 ((*state)->state_key->direction == PF_IN &&
70224baa 731 PF_AEQ(&(*state)->src_node->addr,
315a7da3 732 &sk->ext.addr, sk->af))) &&
70224baa
JL
733 ((*state)->rule.ptr->flush &
734 PF_FLUSH_GLOBAL ||
315a7da3
JL
735 (*state)->rule.ptr == st->rule.ptr)) {
736 st->timeout = PFTM_PURGE;
737 st->src.state = st->dst.state =
70224baa
JL
738 TCPS_CLOSED;
739 killed++;
740 }
741 }
742 if (pf_status.debug >= PF_DEBUG_MISC)
743 kprintf(", %u states killed", killed);
744 }
745 if (pf_status.debug >= PF_DEBUG_MISC)
746 kprintf("\n");
747 }
748
749 /* kill this state */
750 (*state)->timeout = PFTM_PURGE;
751 (*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
752 return (1);
753}
754
02742ec6
JS
755int
756pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
757 struct pf_addr *src, sa_family_t af)
758{
759 struct pf_src_node k;
760
761 if (*sn == NULL) {
762 k.af = af;
763 PF_ACPY(&k.addr, src, af);
764 if (rule->rule_flag & PFRULE_RULESRCTRACK ||
765 rule->rpool.opts & PF_POOL_STICKYADDR)
766 k.rule.ptr = rule;
767 else
768 k.rule.ptr = NULL;
769 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
770 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
771 }
772 if (*sn == NULL) {
773 if (!rule->max_src_nodes ||
774 rule->src_nodes < rule->max_src_nodes)
775 (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT);
70224baa
JL
776 else
777 pf_status.lcounters[LCNT_SRCNODES]++;
02742ec6
JS
778 if ((*sn) == NULL)
779 return (-1);
780 bzero(*sn, sizeof(struct pf_src_node));
70224baa
JL
781
782 pf_init_threshold(&(*sn)->conn_rate,
783 rule->max_src_conn_rate.limit,
784 rule->max_src_conn_rate.seconds);
785
02742ec6
JS
786 (*sn)->af = af;
787 if (rule->rule_flag & PFRULE_RULESRCTRACK ||
788 rule->rpool.opts & PF_POOL_STICKYADDR)
789 (*sn)->rule.ptr = rule;
790 else
791 (*sn)->rule.ptr = NULL;
792 PF_ACPY(&(*sn)->addr, src, af);
793 if (RB_INSERT(pf_src_tree,
794 &tree_src_tracking, *sn) != NULL) {
795 if (pf_status.debug >= PF_DEBUG_MISC) {
4b1cf444 796 kprintf("pf: src_tree insert failed: ");
02742ec6 797 pf_print_host(&(*sn)->addr, 0, af);
4b1cf444 798 kprintf("\n");
02742ec6
JS
799 }
800 pool_put(&pf_src_tree_pl, *sn);
801 return (-1);
802 }
803 (*sn)->creation = time_second;
804 (*sn)->ruletype = rule->action;
805 if ((*sn)->rule.ptr != NULL)
806 (*sn)->rule.ptr->src_nodes++;
807 pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
808 pf_status.src_nodes++;
809 } else {
810 if (rule->max_src_states &&
70224baa
JL
811 (*sn)->states >= rule->max_src_states) {
812 pf_status.lcounters[LCNT_SRCSTATES]++;
02742ec6 813 return (-1);
70224baa 814 }
02742ec6
JS
815 }
816 return (0);
817}
818
315a7da3
JL
819void
820pf_stateins_err(const char *tree, struct pf_state *s, struct pfi_kif *kif)
821{
822 struct pf_state_key *sk = s->state_key;
823
824 if (pf_status.debug >= PF_DEBUG_MISC) {
825 kprintf("pf: state insert failed: %s %s", tree, kif->pfik_name);
826 kprintf(" lan: ");
827 pf_print_host(&sk->lan.addr, sk->lan.port,
828 sk->af);
829 kprintf(" gwy: ");
830 pf_print_host(&sk->gwy.addr, sk->gwy.port,
831 sk->af);
832 kprintf(" ext: ");
833 pf_print_host(&sk->ext.addr, sk->ext.port,
834 sk->af);
835 if (s->sync_flags & PFSTATE_FROMSYNC)
836 kprintf(" (from sync)");
837 kprintf("\n");
838 }
839}
840
02742ec6 841int
315a7da3 842pf_insert_state(struct pfi_kif *kif, struct pf_state *s)
02742ec6 843{
315a7da3
JL
844 struct pf_state_key *cur;
845 struct pf_state *sp;
846
847 KKASSERT(s->state_key != NULL);
848 s->kif = kif;
849
850 if ((cur = RB_INSERT(pf_state_tree_lan_ext, &pf_statetbl_lan_ext,
851 s->state_key)) != NULL) {
852 /* key exists. check for same kif, if none, add to key */
853 TAILQ_FOREACH(sp, &cur->states, next)
854 if (sp->kif == kif) { /* collision! */
855 pf_stateins_err("tree_lan_ext", s, kif);
05ac5751
JL
856 pf_detach_state(s,
857 PF_DT_SKIP_LANEXT|PF_DT_SKIP_EXTGWY);
315a7da3
JL
858 return (-1);
859 }
860 pf_detach_state(s, PF_DT_SKIP_LANEXT|PF_DT_SKIP_EXTGWY);
861 pf_attach_state(cur, s, kif == pfi_all ? 1 : 0);
02742ec6
JS
862 }
863
315a7da3
JL
864 /* if cur != NULL, we already found a state key and attached to it */
865 if (cur == NULL && (cur = RB_INSERT(pf_state_tree_ext_gwy,
866 &pf_statetbl_ext_gwy, s->state_key)) != NULL) {
867 /* must not happen. we must have found the sk above! */
868 pf_stateins_err("tree_ext_gwy", s, kif);
869 pf_detach_state(s, PF_DT_SKIP_EXTGWY);
02742ec6
JS
870 return (-1);
871 }
872
315a7da3
JL
873 if (s->id == 0 && s->creatorid == 0) {
874 s->id = htobe64(pf_status.stateid++);
875 s->creatorid = pf_status.hostid;
02742ec6 876 }
315a7da3 877 if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) {
02742ec6 878 if (pf_status.debug >= PF_DEBUG_MISC) {
4b1cf444 879 kprintf("pf: state insert failed: "
2e5b1311
SW
880 "id: %016jx creatorid: %08x",
881 (uintmax_t)be64toh(s->id), ntohl(s->creatorid));
315a7da3 882 if (s->sync_flags & PFSTATE_FROMSYNC)
4b1cf444
SW
883 kprintf(" (from sync)");
884 kprintf("\n");
02742ec6 885 }
315a7da3 886 pf_detach_state(s, 0);
02742ec6
JS
887 return (-1);
888 }
315a7da3 889 TAILQ_INSERT_TAIL(&state_list, s, entry_list);
02742ec6
JS
890 pf_status.fcounters[FCNT_STATE_INSERT]++;
891 pf_status.states++;
70224baa 892 pfi_kif_ref(kif, PFI_KIF_REF_STATE);
02742ec6 893#if NPFSYNC
315a7da3 894 pfsync_insert_state(s);
02742ec6
JS
895#endif
896 return (0);
897}
898
899void
70224baa 900pf_purge_thread(void *v)
02742ec6 901{
70224baa
JL
902 int nloops = 0;
903 int locked = 0;
904
aa1da187 905 lwkt_gettoken(&pf_token);
70224baa
JL
906 for (;;) {
907 tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz);
908
909 lockmgr(&pf_consistency_lock, LK_EXCLUSIVE);
910
911 if (pf_end_threads) {
912 pf_purge_expired_states(pf_status.states, 1);
913 pf_purge_expired_fragments();
914 pf_purge_expired_src_nodes(1);
915 pf_end_threads++;
916
917 lockmgr(&pf_consistency_lock, LK_RELEASE);
918 wakeup(pf_purge_thread);
919 kthread_exit();
920 }
921 crit_enter();
922
923 /* process a fraction of the state table every second */
924 if(!pf_purge_expired_states(1 + (pf_status.states
925 / pf_default_rule.timeout[PFTM_INTERVAL]), 0)) {
02742ec6 926
70224baa
JL
927 pf_purge_expired_states(1 + (pf_status.states
928 / pf_default_rule.timeout[PFTM_INTERVAL]), 1);
929 }
02742ec6 930
70224baa
JL
931 /* purge other expired types every PFTM_INTERVAL seconds */
932 if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
933 pf_purge_expired_fragments();
934 if (!pf_purge_expired_src_nodes(locked)) {
935 pf_purge_expired_src_nodes(1);
936 }
937 nloops = 0;
938 }
939 crit_exit();
940 lockmgr(&pf_consistency_lock, LK_RELEASE);
941 }
aa1da187 942 lwkt_reltoken(&pf_token);
02742ec6
JS
943}
944
945u_int32_t
946pf_state_expires(const struct pf_state *state)
947{
948 u_int32_t timeout;
949 u_int32_t start;
950 u_int32_t end;
951 u_int32_t states;
952
953 /* handle all PFTM_* > PFTM_MAX here */
954 if (state->timeout == PFTM_PURGE)
955 return (time_second);
956 if (state->timeout == PFTM_UNTIL_PACKET)
957 return (0);
70224baa
JL
958 KKASSERT(state->timeout != PFTM_UNLINKED);
959 KASSERT((state->timeout < PFTM_MAX),
960 ("pf_state_expires: timeout > PFTM_MAX"));
02742ec6
JS
961 timeout = state->rule.ptr->timeout[state->timeout];
962 if (!timeout)
963 timeout = pf_default_rule.timeout[state->timeout];
964 start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
965 if (start) {
966 end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
967 states = state->rule.ptr->states;
968 } else {
969 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
970 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
971 states = pf_status.states;
972 }
973 if (end && states > start && start < end) {
974 if (states < end)
975 return (state->expire + timeout * (end - states) /
976 (end - start));
977 else
978 return (time_second);
979 }
980 return (state->expire + timeout);
981}
982
70224baa
JL
983int
984pf_purge_expired_src_nodes(int waslocked)
02742ec6
JS
985{
986 struct pf_src_node *cur, *next;
70224baa 987 int locked = waslocked;
02742ec6
JS
988
989 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
990 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
991
992 if (cur->states <= 0 && cur->expire <= time_second) {
70224baa
JL
993 if (! locked) {
994 lockmgr(&pf_consistency_lock, LK_EXCLUSIVE);
995 next = RB_NEXT(pf_src_tree,
996 &tree_src_tracking, cur);
997 locked = 1;
998 }
02742ec6
JS
999 if (cur->rule.ptr != NULL) {
1000 cur->rule.ptr->src_nodes--;
1001 if (cur->rule.ptr->states <= 0 &&
1002 cur->rule.ptr->max_src_nodes <= 0)
1003 pf_rm_rule(NULL, cur->rule.ptr);
1004 }
1005 RB_REMOVE(pf_src_tree, &tree_src_tracking, cur);
1006 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
1007 pf_status.src_nodes--;
1008 pool_put(&pf_src_tree_pl, cur);
1009 }
1010 }
70224baa
JL
1011
1012 if (locked && !waslocked)
1013 lockmgr(&pf_consistency_lock, LK_RELEASE);
1014 return(1);
02742ec6
JS
1015}
1016
1017void
1018pf_src_tree_remove_state(struct pf_state *s)
1019{
1020 u_int32_t timeout;
1021
1022 if (s->src_node != NULL) {
05ac5751
JL
1023 if (s->src.tcp_est)
1024 --s->src_node->conn;
02742ec6
JS
1025 if (--s->src_node->states <= 0) {
1026 timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
1027 if (!timeout)
1028 timeout =
1029 pf_default_rule.timeout[PFTM_SRC_NODE];
1030 s->src_node->expire = time_second + timeout;
1031 }
1032 }
1033 if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
1034 if (--s->nat_src_node->states <= 0) {
1035 timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
1036 if (!timeout)
1037 timeout =
1038 pf_default_rule.timeout[PFTM_SRC_NODE];
1039 s->nat_src_node->expire = time_second + timeout;
1040 }
1041 }
1042 s->src_node = s->nat_src_node = NULL;
1043}
1044
70224baa
JL
1045/* callers should be at crit_enter() */
1046void
1047pf_unlink_state(struct pf_state *cur)
02742ec6 1048{
70224baa 1049 if (cur->src.state == PF_TCPS_PROXY_DST) {
315a7da3
JL
1050 pf_send_tcp(cur->rule.ptr, cur->state_key->af,
1051 &cur->state_key->ext.addr, &cur->state_key->lan.addr,
1052 cur->state_key->ext.port, cur->state_key->lan.port,
70224baa
JL
1053 cur->src.seqhi, cur->src.seqlo + 1,
1054 TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL);
1055 }
70224baa 1056 RB_REMOVE(pf_state_tree_id, &tree_id, cur);
02742ec6 1057#if NPFSYNC
70224baa 1058 if (cur->creatorid == pf_status.hostid)
f0ea6854 1059 pfsync_delete_state(cur);
02742ec6 1060#endif
70224baa
JL
1061 cur->timeout = PFTM_UNLINKED;
1062 pf_src_tree_remove_state(cur);
315a7da3 1063 pf_detach_state(cur, 0);
02742ec6
JS
1064}
1065
aa1da187
MD
1066static struct pf_state *purge_cur;
1067
70224baa
JL
1068/* callers should be at crit_enter() and hold the
1069 * write_lock on pf_consistency_lock */
f0ea6854 1070void
70224baa 1071pf_free_state(struct pf_state *cur)
f0ea6854 1072{
70224baa
JL
1073#if NPFSYNC
1074 if (pfsyncif != NULL &&
1075 (pfsyncif->sc_bulk_send_next == cur ||
1076 pfsyncif->sc_bulk_terminator == cur))
1077 return;
1078#endif
1079 KKASSERT(cur->timeout == PFTM_UNLINKED);
1080 if (--cur->rule.ptr->states <= 0 &&
1081 cur->rule.ptr->src_nodes <= 0)
1082 pf_rm_rule(NULL, cur->rule.ptr);
1083 if (cur->nat_rule.ptr != NULL)
1084 if (--cur->nat_rule.ptr->states <= 0 &&
1085 cur->nat_rule.ptr->src_nodes <= 0)
1086 pf_rm_rule(NULL, cur->nat_rule.ptr);
1087 if (cur->anchor.ptr != NULL)
1088 if (--cur->anchor.ptr->states <= 0)
1089 pf_rm_rule(NULL, cur->anchor.ptr);
1090 pf_normalize_tcp_cleanup(cur);
315a7da3 1091 pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE);
aa1da187
MD
1092
1093 /*
1094 * We may be freeing pf_purge_expired_states()'s saved scan entry,
1095 * adjust it if necessary.
1096 */
1097 if (purge_cur == cur) {
1098 kprintf("PURGE CONFLICT\n");
1099 purge_cur = TAILQ_NEXT(purge_cur, entry_list);
1100 }
315a7da3 1101 TAILQ_REMOVE(&state_list, cur, entry_list);
aa1da187 1102
70224baa
JL
1103 if (cur->tag)
1104 pf_tag_unref(cur->tag);
1105 pool_put(&pf_state_pl, cur);
1106 pf_status.fcounters[FCNT_STATE_REMOVALS]++;
1107 pf_status.states--;
f0ea6854
MD
1108}
1109
70224baa
JL
1110int
1111pf_purge_expired_states(u_int32_t maxcheck, int waslocked)
1112{
aa1da187 1113 struct pf_state *cur;
70224baa
JL
1114 int locked = waslocked;
1115
1116 while (maxcheck--) {
aa1da187
MD
1117 /*
1118 * Wrap to start of list when we hit the end
1119 */
1120 cur = purge_cur;
70224baa
JL
1121 if (cur == NULL) {
1122 cur = TAILQ_FIRST(&state_list);
1123 if (cur == NULL)
1124 break; /* list empty */
1125 }
1126
aa1da187
MD
1127 /*
1128 * Setup next (purge_cur) while we process this one. If we block and
1129 * something else deletes purge_cur, pf_free_state() will adjust it further
1130 * ahead.
1131 */
1132 purge_cur = TAILQ_NEXT(cur, entry_list);
70224baa
JL
1133
1134 if (cur->timeout == PFTM_UNLINKED) {
1135 /* free unlinked state */
1136 if (! locked) {
1137 lockmgr(&pf_consistency_lock, LK_EXCLUSIVE);
1138 locked = 1;
1139 }
1140 pf_free_state(cur);
1141 } else if (pf_state_expires(cur) <= time_second) {
1142 /* unlink and free expired state */
1143 pf_unlink_state(cur);
1144 if (! locked) {
1145 if (!lockmgr(&pf_consistency_lock, LK_EXCLUSIVE))
1146 return (0);
1147 locked = 1;
1148 }
1149 pf_free_state(cur);
1150 }
70224baa
JL
1151 }
1152
1153 if (locked)
1154 lockmgr(&pf_consistency_lock, LK_RELEASE);
1155 return (1);
1156}
f0ea6854 1157
02742ec6
JS
1158int
1159pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
1160{
1161 if (aw->type != PF_ADDR_TABLE)
1162 return (0);
1163 if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL)
1164 return (1);
1165 return (0);
1166}
1167
1168void
1169pf_tbladdr_remove(struct pf_addr_wrap *aw)
1170{
1171 if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL)
1172 return;
1173 pfr_detach_table(aw->p.tbl);
1174 aw->p.tbl = NULL;
1175}
1176
1177void
1178pf_tbladdr_copyout(struct pf_addr_wrap *aw)
1179{
1180 struct pfr_ktable *kt = aw->p.tbl;
1181
1182 if (aw->type != PF_ADDR_TABLE || kt == NULL)
1183 return;
1184 if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
1185 kt = kt->pfrkt_root;
1186 aw->p.tbl = NULL;
1187 aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
1188 kt->pfrkt_cnt : -1;
1189}
1190
1191void
1192pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
1193{
1194 switch (af) {
1195#ifdef INET
1196 case AF_INET: {
1197 u_int32_t a = ntohl(addr->addr32[0]);
4b1cf444 1198 kprintf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
02742ec6
JS
1199 (a>>8)&255, a&255);
1200 if (p) {
1201 p = ntohs(p);
4b1cf444 1202 kprintf(":%u", p);
02742ec6
JS
1203 }
1204 break;
1205 }
1206#endif /* INET */
1207#ifdef INET6
1208 case AF_INET6: {
1209 u_int16_t b;
1210 u_int8_t i, curstart = 255, curend = 0,
1211 maxstart = 0, maxend = 0;
1212 for (i = 0; i < 8; i++) {
1213 if (!addr->addr16[i]) {
1214 if (curstart == 255)
1215 curstart = i;
1216 else
1217 curend = i;
1218 } else {
1219 if (curstart) {
1220 if ((curend - curstart) >
1221 (maxend - maxstart)) {
1222 maxstart = curstart;
1223 maxend = curend;
1224 curstart = 255;
1225 }
1226 }
1227 }
1228 }
1229 for (i = 0; i < 8; i++) {
1230 if (i >= maxstart && i <= maxend) {
1231 if (maxend != 7) {
1232 if (i == maxstart)
4b1cf444 1233 kprintf(":");
02742ec6
JS
1234 } else {
1235 if (i == maxend)
4b1cf444 1236 kprintf(":");
02742ec6
JS
1237 }
1238 } else {
1239 b = ntohs(addr->addr16[i]);
4b1cf444 1240 kprintf("%x", b);
02742ec6 1241 if (i < 7)
4b1cf444 1242 kprintf(":");
02742ec6
JS
1243 }
1244 }
1245 if (p) {
1246 p = ntohs(p);
4b1cf444 1247 kprintf("[%u]", p);
02742ec6
JS
1248 }
1249 break;
1250 }
1251#endif /* INET6 */
1252 }
1253}
1254
1255void
1256pf_print_state(struct pf_state *s)
1257{
315a7da3
JL
1258 struct pf_state_key *sk = s->state_key;
1259 switch (sk->proto) {
02742ec6 1260 case IPPROTO_TCP:
4b1cf444 1261 kprintf("TCP ");
02742ec6
JS
1262 break;
1263 case IPPROTO_UDP:
4b1cf444 1264 kprintf("UDP ");
02742ec6
JS
1265 break;
1266 case IPPROTO_ICMP:
4b1cf444 1267 kprintf("ICMP ");
02742ec6
JS
1268 break;
1269 case IPPROTO_ICMPV6:
4b1cf444 1270 kprintf("ICMPV6 ");
02742ec6
JS
1271 break;
1272 default:
315a7da3 1273 kprintf("%u ", sk->proto);
02742ec6
JS
1274 break;
1275 }
315a7da3 1276 pf_print_host(&sk->lan.addr, sk->lan.port, sk->af);
4b1cf444 1277 kprintf(" ");
315a7da3 1278 pf_print_host(&sk->gwy.addr, sk->gwy.port, sk->af);
4b1cf444 1279 kprintf(" ");
315a7da3 1280 pf_print_host(&sk->ext.addr, sk->ext.port, sk->af);
4b1cf444 1281 kprintf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo,
02742ec6
JS
1282 s->src.seqhi, s->src.max_win, s->src.seqdiff);
1283 if (s->src.wscale && s->dst.wscale)
4b1cf444
SW
1284 kprintf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK);
1285 kprintf("]");
1286 kprintf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo,
02742ec6
JS
1287 s->dst.seqhi, s->dst.max_win, s->dst.seqdiff);
1288 if (s->src.wscale && s->dst.wscale)
4b1cf444
SW
1289 kprintf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK);
1290 kprintf("]");
1291 kprintf(" %u:%u", s->src.state, s->dst.state);
02742ec6
JS
1292}
1293
1294void
1295pf_print_flags(u_int8_t f)
1296{
1297 if (f)
4b1cf444 1298 kprintf(" ");
02742ec6 1299 if (f & TH_FIN)
4b1cf444 1300 kprintf("F");
02742ec6 1301 if (f & TH_SYN)
4b1cf444 1302 kprintf("S");
02742ec6 1303 if (f & TH_RST)
4b1cf444 1304 kprintf("R");
02742ec6 1305 if (f & TH_PUSH)
4b1cf444 1306 kprintf("P");
02742ec6 1307 if (f & TH_ACK)
4b1cf444 1308 kprintf("A");
02742ec6 1309 if (f & TH_URG)
4b1cf444 1310 kprintf("U");
02742ec6 1311 if (f & TH_ECE)
4b1cf444 1312 kprintf("E");
02742ec6 1313 if (f & TH_CWR)
4b1cf444 1314 kprintf("W");
02742ec6
JS
1315}
1316
1317#define PF_SET_SKIP_STEPS(i) \
1318 do { \
1319 while (head[i] != cur) { \
1320 head[i]->skip[i].ptr = cur; \
1321 head[i] = TAILQ_NEXT(head[i], entries); \
1322 } \
1323 } while (0)
1324
1325void
1326pf_calc_skip_steps(struct pf_rulequeue *rules)
1327{
1328 struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
1329 int i;
1330
1331 cur = TAILQ_FIRST(rules);
1332 prev = cur;
1333 for (i = 0; i < PF_SKIP_COUNT; ++i)
1334 head[i] = cur;
1335 while (cur != NULL) {
1336
1337 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
1338 PF_SET_SKIP_STEPS(PF_SKIP_IFP);
1339 if (cur->direction != prev->direction)
1340 PF_SET_SKIP_STEPS(PF_SKIP_DIR);
1341 if (cur->af != prev->af)
1342 PF_SET_SKIP_STEPS(PF_SKIP_AF);
1343 if (cur->proto != prev->proto)
1344 PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
70224baa 1345 if (cur->src.neg != prev->src.neg ||
02742ec6
JS
1346 pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
1347 PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
1348 if (cur->src.port[0] != prev->src.port[0] ||
1349 cur->src.port[1] != prev->src.port[1] ||
1350 cur->src.port_op != prev->src.port_op)
1351 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
70224baa 1352 if (cur->dst.neg != prev->dst.neg ||
02742ec6
JS
1353 pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
1354 PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
1355 if (cur->dst.port[0] != prev->dst.port[0] ||
1356 cur->dst.port[1] != prev->dst.port[1] ||
1357 cur->dst.port_op != prev->dst.port_op)
1358 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
1359
1360 prev = cur;
1361 cur = TAILQ_NEXT(cur, entries);
1362 }
1363 for (i = 0; i < PF_SKIP_COUNT; ++i)
1364 PF_SET_SKIP_STEPS(i);
1365}
1366
1367int
1368pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
1369{
1370 if (aw1->type != aw2->type)
1371 return (1);
1372 switch (aw1->type) {
1373 case PF_ADDR_ADDRMASK:
1374 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0))
1375 return (1);
1376 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0))
1377 return (1);
1378 return (0);
1379 case PF_ADDR_DYNIFTL:
1380 return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
1381 case PF_ADDR_NOROUTE:
70224baa 1382 case PF_ADDR_URPFFAILED:
02742ec6
JS
1383 return (0);
1384 case PF_ADDR_TABLE:
1385 return (aw1->p.tbl != aw2->p.tbl);
70224baa
JL
1386 case PF_ADDR_RTLABEL:
1387 return (aw1->v.rtlabel != aw2->v.rtlabel);
02742ec6 1388 default:
4b1cf444 1389 kprintf("invalid address type: %d\n", aw1->type);
02742ec6
JS
1390 return (1);
1391 }
1392}
1393
02742ec6
JS
1394u_int16_t
1395pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
1396{
1397 u_int32_t l;
1398
1399 if (udp && !cksum)
1400 return (0x0000);
1401 l = cksum + old - new;
1402 l = (l >> 16) + (l & 65535);
1403 l = l & 65535;
1404 if (udp && !l)
1405 return (0xFFFF);
1406 return (l);
1407}
1408
1409void
1410pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc,
1411 struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af)
1412{
1413 struct pf_addr ao;
1414 u_int16_t po = *p;
1415
1416 PF_ACPY(&ao, a, af);
1417 PF_ACPY(a, an, af);
1418
1419 *p = pn;
1420
1421 switch (af) {
1422#ifdef INET
1423 case AF_INET:
1424 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1425 ao.addr16[0], an->addr16[0], 0),
1426 ao.addr16[1], an->addr16[1], 0);
1427 *p = pn;
1428 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1429 ao.addr16[0], an->addr16[0], u),
1430 ao.addr16[1], an->addr16[1], u),
1431 po, pn, u);
1432 break;
1433#endif /* INET */
1434#ifdef INET6
1435 case AF_INET6:
1436 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1437 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1438 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1439 ao.addr16[0], an->addr16[0], u),
1440 ao.addr16[1], an->addr16[1], u),
1441 ao.addr16[2], an->addr16[2], u),
1442 ao.addr16[3], an->addr16[3], u),
1443 ao.addr16[4], an->addr16[4], u),
1444 ao.addr16[5], an->addr16[5], u),
1445 ao.addr16[6], an->addr16[6], u),
1446 ao.addr16[7], an->addr16[7], u),
1447 po, pn, u);
1448 break;
1449#endif /* INET6 */
1450 }
1451}
1452
1453
1454/* Changes a u_int32_t. Uses a void * so there are no align restrictions */
1455void
1456pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
1457{
1458 u_int32_t ao;
1459
1460 memcpy(&ao, a, sizeof(ao));
1461 memcpy(a, &an, sizeof(u_int32_t));
1462 *c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
1463 ao % 65536, an % 65536, u);
1464}
1465
1466#ifdef INET6
1467void
1468pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
1469{
1470 struct pf_addr ao;
1471
1472 PF_ACPY(&ao, a, AF_INET6);
1473 PF_ACPY(a, an, AF_INET6);
1474
1475 *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1476 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1477 pf_cksum_fixup(pf_cksum_fixup(*c,
1478 ao.addr16[0], an->addr16[0], u),
1479 ao.addr16[1], an->addr16[1], u),
1480 ao.addr16[2], an->addr16[2], u),
1481 ao.addr16[3], an->addr16[3], u),
1482 ao.addr16[4], an->addr16[4], u),
1483 ao.addr16[5], an->addr16[5], u),
1484 ao.addr16[6], an->addr16[6], u),
1485 ao.addr16[7], an->addr16[7], u);
1486}
1487#endif /* INET6 */
1488
1489void
1490pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
1491 struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
1492 u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
1493{
1494 struct pf_addr oia, ooa;
1495
1496 PF_ACPY(&oia, ia, af);
1497 PF_ACPY(&ooa, oa, af);
1498
1499 /* Change inner protocol port, fix inner protocol checksum. */
1500 if (ip != NULL) {
1501 u_int16_t oip = *ip;
1502 u_int32_t opc = 0;
1503
1504 if (pc != NULL)
1505 opc = *pc;
1506 *ip = np;
1507 if (pc != NULL)
1508 *pc = pf_cksum_fixup(*pc, oip, *ip, u);
1509 *ic = pf_cksum_fixup(*ic, oip, *ip, 0);
1510 if (pc != NULL)
1511 *ic = pf_cksum_fixup(*ic, opc, *pc, 0);
1512 }
1513 /* Change inner ip address, fix inner ip and icmp checksums. */
1514 PF_ACPY(ia, na, af);
1515 switch (af) {
1516#ifdef INET
1517 case AF_INET: {
1518 u_int32_t oh2c = *h2c;
1519
1520 *h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
1521 oia.addr16[0], ia->addr16[0], 0),
1522 oia.addr16[1], ia->addr16[1], 0);
1523 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1524 oia.addr16[0], ia->addr16[0], 0),
1525 oia.addr16[1], ia->addr16[1], 0);
1526 *ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
1527 break;
1528 }
1529#endif /* INET */
1530#ifdef INET6
1531 case AF_INET6:
1532 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1533 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1534 pf_cksum_fixup(pf_cksum_fixup(*ic,
1535 oia.addr16[0], ia->addr16[0], u),
1536 oia.addr16[1], ia->addr16[1], u),
1537 oia.addr16[2], ia->addr16[2], u),
1538 oia.addr16[3], ia->addr16[3], u),
1539 oia.addr16[4], ia->addr16[4], u),
1540 oia.addr16[5], ia->addr16[5], u),
1541 oia.addr16[6], ia->addr16[6], u),
1542 oia.addr16[7], ia->addr16[7], u);
1543 break;
1544#endif /* INET6 */
1545 }
1546 /* Change outer ip address, fix outer ip or icmpv6 checksum. */
1547 PF_ACPY(oa, na, af);
1548 switch (af) {
1549#ifdef INET
1550 case AF_INET:
1551 *hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
1552 ooa.addr16[0], oa->addr16[0], 0),
1553 ooa.addr16[1], oa->addr16[1], 0);
1554 break;
1555#endif /* INET */
1556#ifdef INET6
1557 case AF_INET6:
1558 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1559 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1560 pf_cksum_fixup(pf_cksum_fixup(*ic,
1561 ooa.addr16[0], oa->addr16[0], u),
1562 ooa.addr16[1], oa->addr16[1], u),
1563 ooa.addr16[2], oa->addr16[2], u),
1564 ooa.addr16[3], oa->addr16[3], u),
1565 ooa.addr16[4], oa->addr16[4], u),
1566 ooa.addr16[5], oa->addr16[5], u),
1567 ooa.addr16[6], oa->addr16[6], u),
1568 ooa.addr16[7], oa->addr16[7], u);
1569 break;
1570#endif /* INET6 */
1571 }
1572}
1573
70224baa
JL
1574
1575/*
1576 * Need to modulate the sequence numbers in the TCP SACK option
1577 * (credits to Krzysztof Pfaff for report and patch)
1578 */
1579int
1580pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
1581 struct tcphdr *th, struct pf_state_peer *dst)
1582{
1583 int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
1584 u_int8_t opts[TCP_MAXOLEN], *opt = opts;
1585 int copyback = 0, i, olen;
1586 struct raw_sackblock sack;
1587
1588#define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2)
1589 if (hlen < TCPOLEN_SACKLEN ||
1590 !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af))
1591 return 0;
1592
1593 while (hlen >= TCPOLEN_SACKLEN) {
1594 olen = opt[1];
1595 switch (*opt) {
1596 case TCPOPT_EOL: /* FALLTHROUGH */
1597 case TCPOPT_NOP:
1598 opt++;
1599 hlen--;
1600 break;
1601 case TCPOPT_SACK:
1602 if (olen > hlen)
1603 olen = hlen;
1604 if (olen >= TCPOLEN_SACKLEN) {
1605 for (i = 2; i + TCPOLEN_SACK <= olen;
1606 i += TCPOLEN_SACK) {
1607 memcpy(&sack, &opt[i], sizeof(sack));
1608 pf_change_a(&sack.rblk_start, &th->th_sum,
4fc5aa1c 1609 htonl(ntohl(sack.rblk_start) -
70224baa
JL
1610 dst->seqdiff), 0);
1611 pf_change_a(&sack.rblk_end, &th->th_sum,
4fc5aa1c 1612 htonl(ntohl(sack.rblk_end) -
70224baa
JL
1613 dst->seqdiff), 0);
1614 memcpy(&opt[i], &sack, sizeof(sack));
1615 }
1616 copyback = 1;
1617 }
1618 /* FALLTHROUGH */
1619 default:
1620 if (olen < 2)
1621 olen = 2;
1622 hlen -= olen;
1623 opt += olen;
1624 }
1625 }
1626
1627 if (copyback)
1628 m_copyback(m, off + sizeof(*th), thoptlen, opts);
1629 return (copyback);
1630}
1631
02742ec6
JS
1632void
1633pf_send_tcp(const struct pf_rule *r, sa_family_t af,
1634 const struct pf_addr *saddr, const struct pf_addr *daddr,
1635 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
70224baa
JL
1636 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
1637 u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp)
02742ec6
JS
1638{
1639 struct mbuf *m;
1640 int len = 0, tlen;
1641#ifdef INET
1642 struct ip *h = NULL;
1643#endif /* INET */
1644#ifdef INET6
1645 struct ip6_hdr *h6 = NULL;
1646#endif /* INET6 */
1647 struct tcphdr *th = NULL;
70224baa 1648 char *opt;
02742ec6 1649
2a7a2b1c
JL
1650 ASSERT_LWKT_TOKEN_HELD(&pf_token);
1651
02742ec6
JS
1652 /* maximum segment size tcp option */
1653 tlen = sizeof(struct tcphdr);
1654 if (mss)
1655 tlen += 4;
1656
1657 switch (af) {
1658#ifdef INET
1659 case AF_INET:
1660 len = sizeof(struct ip) + tlen;
1661 break;
1662#endif /* INET */
1663#ifdef INET6
1664 case AF_INET6:
1665 len = sizeof(struct ip6_hdr) + tlen;
1666 break;
1667#endif /* INET6 */
1668 }
1669
aa1da187
MD
1670 /*
1671 * Create outgoing mbuf.
1672 *
1673 * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags,
1674 * so make sure pf.flags is clear.
1675 */
02742ec6 1676 m = m_gethdr(MB_DONTWAIT, MT_HEADER);
2a7a2b1c 1677 if (m == NULL) {
02742ec6 1678 return;
2a7a2b1c 1679 }
70224baa 1680 if (tag)
aa1da187
MD
1681 m->m_pkthdr.fw_flags |= PF_MBUF_TAGGED;
1682 m->m_pkthdr.pf.flags = 0;
315a7da3 1683 m->m_pkthdr.pf.tag = rtag;
70224baa
JL
1684
1685 if (r != NULL && r->rtableid >= 0)
02dd99a9 1686 m->m_pkthdr.pf.rtableid = r->rtableid;
70224baa 1687
02742ec6
JS
1688#ifdef ALTQ
1689 if (r != NULL && r->qid) {
315a7da3
JL
1690 m->m_pkthdr.fw_flags |= PF_MBUF_STRUCTURE;
1691 m->m_pkthdr.pf.qid = r->qid;
1692 m->m_pkthdr.pf.ecn_af = af;
1693 m->m_pkthdr.pf.hdr = mtod(m, struct ip *);
02742ec6 1694 }
70224baa 1695#endif /* ALTQ */
02742ec6
JS
1696 m->m_data += max_linkhdr;
1697 m->m_pkthdr.len = m->m_len = len;
1698 m->m_pkthdr.rcvif = NULL;
1699 bzero(m->m_data, len);
1700 switch (af) {
1701#ifdef INET
1702 case AF_INET:
1703 h = mtod(m, struct ip *);
1704
1705 /* IP header fields included in the TCP checksum */
1706 h->ip_p = IPPROTO_TCP;
1707 h->ip_len = tlen;
1708 h->ip_src.s_addr = saddr->v4.s_addr;
1709 h->ip_dst.s_addr = daddr->v4.s_addr;
1710
1711 th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
1712 break;
1713#endif /* INET */
1714#ifdef INET6
1715 case AF_INET6:
1716 h6 = mtod(m, struct ip6_hdr *);
1717
1718 /* IP header fields included in the TCP checksum */
1719 h6->ip6_nxt = IPPROTO_TCP;
1720 h6->ip6_plen = htons(tlen);
1721 memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
1722 memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
1723
1724 th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
1725 break;
1726#endif /* INET6 */
1727 }
1728
1729 /* TCP header */
1730 th->th_sport = sport;
1731 th->th_dport = dport;
1732 th->th_seq = htonl(seq);
1733 th->th_ack = htonl(ack);
1734 th->th_off = tlen >> 2;
1735 th->th_flags = flags;
1736 th->th_win = htons(win);
1737
1738 if (mss) {
1739 opt = (char *)(th + 1);
1740 opt[0] = TCPOPT_MAXSEG;
1741 opt[1] = 4;
1742 mss = htons(mss);
1743 bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
1744 }
1745
1746 switch (af) {
1747#ifdef INET
1748 case AF_INET:
1749 /* TCP checksum */
1750 th->th_sum = in_cksum(m, len);
1751
1752 /* Finish the IP header */
1753 h->ip_v = 4;
1754 h->ip_hl = sizeof(*h) >> 2;
1755 h->ip_tos = IPTOS_LOWDELAY;
1756 h->ip_len = len;
1757 h->ip_off = path_mtu_discovery ? IP_DF : 0;
1758 h->ip_ttl = ttl ? ttl : ip_defttl;
1759 h->ip_sum = 0;
70224baa 1760 if (eh == NULL) {
2a7a2b1c 1761 lwkt_reltoken(&pf_token);
70224baa 1762 ip_output(m, NULL, NULL, 0, NULL, NULL);
2a7a2b1c 1763 lwkt_gettoken(&pf_token);
70224baa
JL
1764 } else {
1765 struct route ro;
1766 struct rtentry rt;
1767 struct ether_header *e = (void *)ro.ro_dst.sa_data;
1768
1769 if (ifp == NULL) {
1770 m_freem(m);
1771 return;
1772 }
1773 rt.rt_ifp = ifp;
1774 ro.ro_rt = &rt;
1775 ro.ro_dst.sa_len = sizeof(ro.ro_dst);
1776 ro.ro_dst.sa_family = pseudo_AF_HDRCMPLT;
1777 bcopy(eh->ether_dhost, e->ether_shost, ETHER_ADDR_LEN);
1778 bcopy(eh->ether_shost, e->ether_dhost, ETHER_ADDR_LEN);
1779 e->ether_type = eh->ether_type;
1780 /* XXX_IMPORT: later */
2a7a2b1c 1781 lwkt_reltoken(&pf_token);
70224baa
JL
1782 ip_output(m, (void *)NULL, &ro, 0,
1783 (void *)NULL, (void *)NULL);
2a7a2b1c 1784 lwkt_gettoken(&pf_token);
70224baa 1785 }
02742ec6
JS
1786 break;
1787#endif /* INET */
1788#ifdef INET6
1789 case AF_INET6:
1790 /* TCP checksum */
1791 th->th_sum = in6_cksum(m, IPPROTO_TCP,
1792 sizeof(struct ip6_hdr), tlen);
1793
1794 h6->ip6_vfc |= IPV6_VERSION;
1795 h6->ip6_hlim = IPV6_DEFHLIM;
1796
2a7a2b1c 1797 lwkt_reltoken(&pf_token);
02742ec6 1798 ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
2a7a2b1c 1799 lwkt_gettoken(&pf_token);
02742ec6
JS
1800 break;
1801#endif /* INET6 */
1802 }
1803}
1804
1805void
1806pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
1807 struct pf_rule *r)
1808{
1809 struct mbuf *m0;
1810
aa1da187
MD
1811 /*
1812 * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags,
1813 * so make sure pf.flags is clear.
1814 */
70224baa 1815 m0 = m_copy(m, 0, M_COPYALL);
aa1da187
MD
1816 m0->m_pkthdr.fw_flags |= PF_MBUF_TAGGED;
1817 m0->m_pkthdr.pf.flags = 0;
70224baa
JL
1818
1819 if (r->rtableid >= 0)
315a7da3 1820 m0->m_pkthdr.pf.rtableid = r->rtableid;
02742ec6
JS
1821
1822#ifdef ALTQ
1823 if (r->qid) {
315a7da3
JL
1824 m->m_pkthdr.fw_flags |= PF_MBUF_STRUCTURE;
1825 m0->m_pkthdr.pf.qid = r->qid;
1826 m0->m_pkthdr.pf.ecn_af = af;
1827 m0->m_pkthdr.pf.hdr = mtod(m0, struct ip *);
02742ec6 1828 }
70224baa 1829#endif /* ALTQ */
02742ec6
JS
1830
1831 switch (af) {
1832#ifdef INET
1833 case AF_INET:
745a4a5d 1834 icmp_error(m0, type, code, 0, 0);
02742ec6
JS
1835 break;
1836#endif /* INET */
1837#ifdef INET6
1838 case AF_INET6:
1839 icmp6_error(m0, type, code, 0);
1840 break;
1841#endif /* INET6 */
1842 }
1843}
1844
1845/*
1846 * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
1847 * If n is 0, they match if they are equal. If n is != 0, they match if they
1848 * are different.
1849 */
1850int
1851pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
1852 struct pf_addr *b, sa_family_t af)
1853{
1854 int match = 0;
1855
1856 switch (af) {
1857#ifdef INET
1858 case AF_INET:
1859 if ((a->addr32[0] & m->addr32[0]) ==
1860 (b->addr32[0] & m->addr32[0]))
1861 match++;
1862 break;
1863#endif /* INET */
1864#ifdef INET6
1865 case AF_INET6:
1866 if (((a->addr32[0] & m->addr32[0]) ==
1867 (b->addr32[0] & m->addr32[0])) &&
1868 ((a->addr32[1] & m->addr32[1]) ==
1869 (b->addr32[1] & m->addr32[1])) &&
1870 ((a->addr32[2] & m->addr32[2]) ==
1871 (b->addr32[2] & m->addr32[2])) &&
1872 ((a->addr32[3] & m->addr32[3]) ==
1873 (b->addr32[3] & m->addr32[3])))
1874 match++;
1875 break;
1876#endif /* INET6 */
1877 }
1878 if (match) {
1879 if (n)
1880 return (0);
1881 else
1882 return (1);
1883 } else {
1884 if (n)
1885 return (1);
1886 else
1887 return (0);
1888 }
1889}
1890
1891int
1892pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
1893{
1894 switch (op) {
1895 case PF_OP_IRG:
1896 return ((p > a1) && (p < a2));
1897 case PF_OP_XRG:
1898 return ((p < a1) || (p > a2));
1899 case PF_OP_RRG:
1900 return ((p >= a1) && (p <= a2));
1901 case PF_OP_EQ:
1902 return (p == a1);
1903 case PF_OP_NE:
1904 return (p != a1);
1905 case PF_OP_LT:
1906 return (p < a1);
1907 case PF_OP_LE:
1908 return (p <= a1);
1909 case PF_OP_GT:
1910 return (p > a1);
1911 case PF_OP_GE:
1912 return (p >= a1);
1913 }
1914 return (0); /* never reached */
1915}
1916
1917int
1918pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
1919{
1920 a1 = ntohs(a1);
1921 a2 = ntohs(a2);
1922 p = ntohs(p);
1923 return (pf_match(op, a1, a2, p));
1924}
1925
1926int
1927pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
1928{
1929 if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
1930 return (0);
1931 return (pf_match(op, a1, a2, u));
1932}
1933
1934int
1935pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
1936{
1937 if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
1938 return (0);
1939 return (pf_match(op, a1, a2, g));
1940}
1941
70224baa 1942int
315a7da3 1943pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag)
70224baa
JL
1944{
1945 if (*tag == -1)
315a7da3 1946 *tag = m->m_pkthdr.pf.tag;
70224baa 1947
02742ec6
JS
1948 return ((!r->match_tag_not && r->match_tag == *tag) ||
1949 (r->match_tag_not && r->match_tag != *tag));
1950}
1951
70224baa 1952int
315a7da3 1953pf_tag_packet(struct mbuf *m, int tag, int rtableid)
02742ec6 1954{
70224baa
JL
1955 if (tag <= 0 && rtableid < 0)
1956 return (0);
1957
70224baa 1958 if (tag > 0)
315a7da3 1959 m->m_pkthdr.pf.tag = tag;
70224baa 1960 if (rtableid >= 0)
315a7da3 1961 m->m_pkthdr.pf.rtableid = rtableid;
02742ec6 1962
70224baa 1963 return (0);
02742ec6
JS
1964}
1965
315a7da3 1966void
70224baa
JL
1967pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n,
1968 struct pf_rule **r, struct pf_rule **a, int *match)
1969{
1970 struct pf_anchor_stackframe *f;
1971
1972 (*r)->anchor->match = 0;
1973 if (match)
1974 *match = 0;
1975 if (*depth >= sizeof(pf_anchor_stack) /
1976 sizeof(pf_anchor_stack[0])) {
1977 kprintf("pf_step_into_anchor: stack overflow\n");
1978 *r = TAILQ_NEXT(*r, entries);
1979 return;
1980 } else if (*depth == 0 && a != NULL)
1981 *a = *r;
1982 f = pf_anchor_stack + (*depth)++;
1983 f->rs = *rs;
1984 f->r = *r;
1985 if ((*r)->anchor_wildcard) {
1986 f->parent = &(*r)->anchor->children;
1987 if ((f->child = RB_MIN(pf_anchor_node, f->parent)) ==
1988 NULL) {
1989 *r = NULL;
1990 return;
1991 }
1992 *rs = &f->child->ruleset;
1993 } else {
1994 f->parent = NULL;
1995 f->child = NULL;
1996 *rs = &(*r)->anchor->ruleset;
1997 }
1998 *r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
1999}
02742ec6 2000
70224baa
JL
2001int
2002pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n,
2003 struct pf_rule **r, struct pf_rule **a, int *match)
2004{
2005 struct pf_anchor_stackframe *f;
2006 int quick = 0;
2007
2008 do {
2009 if (*depth <= 0)
2010 break;
2011 f = pf_anchor_stack + *depth - 1;
2012 if (f->parent != NULL && f->child != NULL) {
2013 if (f->child->match ||
2014 (match != NULL && *match)) {
2015 f->r->anchor->match = 1;
2016 *match = 0;
2017 }
2018 f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
2019 if (f->child != NULL) {
2020 *rs = &f->child->ruleset;
2021 *r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
2022 if (*r == NULL)
2023 continue;
2024 else
2025 break;
2026 }
2027 }
2028 (*depth)--;
2029 if (*depth == 0 && a != NULL)
2030 *a = NULL;
2031 *rs = f->rs;
2032 if (f->r->anchor->match || (match != NULL && *match))
2033 quick = f->r->quick;
2034 *r = TAILQ_NEXT(f->r, entries);
2035 } while (*r == NULL);
2036
2037 return (quick);
2038}
02742ec6
JS
2039
2040#ifdef INET6
2041void
2042pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
2043 struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
2044{
2045 switch (af) {
2046#ifdef INET
2047 case AF_INET:
2048 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
2049 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
2050 break;
2051#endif /* INET */
2052 case AF_INET6:
2053 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
2054 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
2055 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
2056 ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
2057 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
2058 ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
2059 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
2060 ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
2061 break;
2062 }
2063}
2064
2065void
2066pf_addr_inc(struct pf_addr *addr, sa_family_t af)
2067{
2068 switch (af) {
2069#ifdef INET
2070 case AF_INET:
2071 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
2072 break;
2073#endif /* INET */
2074 case AF_INET6:
2075 if (addr->addr32[3] == 0xffffffff) {
2076 addr->addr32[3] = 0;
2077 if (addr->addr32[2] == 0xffffffff) {
2078 addr->addr32[2] = 0;
2079 if (addr->addr32[1] == 0xffffffff) {
2080 addr->addr32[1] = 0;
2081 addr->addr32[0] =
2082 htonl(ntohl(addr->addr32[0]) + 1);
2083 } else
2084 addr->addr32[1] =
2085 htonl(ntohl(addr->addr32[1]) + 1);
2086 } else
2087 addr->addr32[2] =
2088 htonl(ntohl(addr->addr32[2]) + 1);
2089 } else
2090 addr->addr32[3] =
2091 htonl(ntohl(addr->addr32[3]) + 1);
2092 break;
2093 }
2094}
2095#endif /* INET6 */
2096
2097#define mix(a,b,c) \
2098 do { \
2099 a -= b; a -= c; a ^= (c >> 13); \
2100 b -= c; b -= a; b ^= (a << 8); \
2101 c -= a; c -= b; c ^= (b >> 13); \
2102 a -= b; a -= c; a ^= (c >> 12); \
2103 b -= c; b -= a; b ^= (a << 16); \
2104 c -= a; c -= b; c ^= (b >> 5); \
2105 a -= b; a -= c; a ^= (c >> 3); \
2106 b -= c; b -= a; b ^= (a << 10); \
2107 c -= a; c -= b; c ^= (b >> 15); \
2108 } while (0)
2109
2110/*
2111 * hash function based on bridge_hash in if_bridge.c
2112 */
2113void
2114pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
2115 struct pf_poolhashkey *key, sa_family_t af)
2116{
2117 u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
2118
2119 switch (af) {
2120#ifdef INET
2121 case AF_INET:
2122 a += inaddr->addr32[0];
2123 b += key->key32[1];
2124 mix(a, b, c);
2125 hash->addr32[0] = c + key->key32[2];
2126 break;
2127#endif /* INET */
2128#ifdef INET6
2129 case AF_INET6:
2130 a += inaddr->addr32[0];
2131 b += inaddr->addr32[2];
2132 mix(a, b, c);
2133 hash->addr32[0] = c;
2134 a += inaddr->addr32[1];
2135 b += inaddr->addr32[3];
2136 c += key->key32[1];
2137 mix(a, b, c);
2138 hash->addr32[1] = c;
2139 a += inaddr->addr32[2];
2140 b += inaddr->addr32[1];
2141 c += key->key32[2];
2142 mix(a, b, c);
2143 hash->addr32[2] = c;
2144 a += inaddr->addr32[3];
2145 b += inaddr->addr32[0];
2146 c += key->key32[3];
2147 mix(a, b, c);
2148 hash->addr32[3] = c;
2149 break;
2150#endif /* INET6 */
2151 }
2152}
2153
2154int
2155pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
2156 struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn)
2157{
2158 unsigned char hash[16];
2159 struct pf_pool *rpool = &r->rpool;
2160 struct pf_addr *raddr = &rpool->cur->addr.v.a.addr;
2161 struct pf_addr *rmask = &rpool->cur->addr.v.a.mask;
2162 struct pf_pooladdr *acur = rpool->cur;
2163 struct pf_src_node k;
2164
2165 if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
2166 (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
2167 k.af = af;
2168 PF_ACPY(&k.addr, saddr, af);
2169 if (r->rule_flag & PFRULE_RULESRCTRACK ||
2170 r->rpool.opts & PF_POOL_STICKYADDR)
2171 k.rule.ptr = r;
2172 else
2173 k.rule.ptr = NULL;
2174 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
2175 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
2176 if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
2177 PF_ACPY(naddr, &(*sn)->raddr, af);
2178 if (pf_status.debug >= PF_DEBUG_MISC) {
4b1cf444 2179 kprintf("pf_map_addr: src tracking maps ");
02742ec6 2180 pf_print_host(&k.addr, 0, af);
4b1cf444 2181 kprintf(" to ");
02742ec6 2182 pf_print_host(naddr, 0, af);
4b1cf444 2183 kprintf("\n");
02742ec6
JS
2184 }
2185 return (0);
2186 }
2187 }
2188
2189 if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
2190 return (1);
2191 if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
70224baa
JL
2192 switch (af) {
2193#ifdef INET
2194 case AF_INET:
02742ec6
JS
2195 if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
2196 (rpool->opts & PF_POOL_TYPEMASK) !=
2197 PF_POOL_ROUNDROBIN)
2198 return (1);
2199 raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
2200 rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
70224baa
JL
2201 break;
2202#endif /* INET */
2203#ifdef INET6
2204 case AF_INET6:
02742ec6
JS
2205 if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
2206 (rpool->opts & PF_POOL_TYPEMASK) !=
2207 PF_POOL_ROUNDROBIN)
2208 return (1);
2209 raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
2210 rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
70224baa
JL
2211 break;
2212#endif /* INET6 */
02742ec6
JS
2213 }
2214 } else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2215 if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
2216 return (1); /* unsupported */
2217 } else {
2218 raddr = &rpool->cur->addr.v.a.addr;
2219 rmask = &rpool->cur->addr.v.a.mask;
2220 }
2221
2222 switch (rpool->opts & PF_POOL_TYPEMASK) {
2223 case PF_POOL_NONE:
2224 PF_ACPY(naddr, raddr, af);
2225 break;
2226 case PF_POOL_BITMASK:
2227 PF_POOLMASK(naddr, raddr, rmask, saddr, af);
2228 break;
2229 case PF_POOL_RANDOM:
2230 if (init_addr != NULL && PF_AZERO(init_addr, af)) {
2231 switch (af) {
2232#ifdef INET
2233 case AF_INET:
4fc5aa1c 2234 rpool->counter.addr32[0] = htonl(karc4random());
02742ec6
JS
2235 break;
2236#endif /* INET */
2237#ifdef INET6
2238 case AF_INET6:
2239 if (rmask->addr32[3] != 0xffffffff)
70224baa 2240 rpool->counter.addr32[3] =
4fc5aa1c 2241 htonl(karc4random());
02742ec6
JS
2242 else
2243 break;
2244 if (rmask->addr32[2] != 0xffffffff)
70224baa 2245 rpool->counter.addr32[2] =
4fc5aa1c 2246 htonl(karc4random());
02742ec6
JS
2247 else
2248 break;
2249 if (rmask->addr32[1] != 0xffffffff)
70224baa 2250 rpool->counter.addr32[1] =
4fc5aa1c 2251 htonl(karc4random());
02742ec6
JS
2252 else
2253 break;
2254 if (rmask->addr32[0] != 0xffffffff)
70224baa 2255 rpool->counter.addr32[0] =
4fc5aa1c 2256 htonl(karc4random());
02742ec6
JS
2257 break;
2258#endif /* INET6 */
2259 }
2260 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
2261 PF_ACPY(init_addr, naddr, af);
2262
2263 } else {
2264 PF_AINC(&rpool->counter, af);
2265 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
2266 }
2267 break;
2268 case PF_POOL_SRCHASH:
2269 pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
2270 PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
2271 break;
2272 case PF_POOL_ROUNDROBIN:
2273 if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2274 if (!pfr_pool_get(rpool->cur->addr.p.tbl,
2275 &rpool->tblidx, &rpool->counter,
2276 &raddr, &rmask, af))
2277 goto get_addr;
2278 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2279 if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
2280 &rpool->tblidx, &rpool->counter,
2281 &raddr, &rmask, af))
2282 goto get_addr;
2283 } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
2284 goto get_addr;
2285
2286 try_next:
2287 if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL)
2288 rpool->cur = TAILQ_FIRST(&rpool->list);
2289 if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2290 rpool->tblidx = -1;
2291 if (pfr_pool_get(rpool->cur->addr.p.tbl,
2292 &rpool->tblidx, &rpool->counter,
2293 &raddr, &rmask, af)) {
2294 /* table contains no address of type 'af' */
2295 if (rpool->cur != acur)
2296 goto try_next;
2297 return (1);
2298 }
2299 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2300 rpool->tblidx = -1;
2301 if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
2302 &rpool->tblidx, &rpool->counter,
2303 &raddr, &rmask, af)) {
2304 /* table contains no address of type 'af' */
2305 if (rpool->cur != acur)
2306 goto try_next;
2307 return (1);
2308 }
2309 } else {
2310 raddr = &rpool->cur->addr.v.a.addr;
2311 rmask = &rpool->cur->addr.v.a.mask;
2312 PF_ACPY(&rpool->counter, raddr, af);
2313 }
2314
2315 get_addr:
2316 PF_ACPY(naddr, &rpool->counter, af);
70224baa
JL
2317 if (init_addr != NULL && PF_AZERO(init_addr, af))
2318 PF_ACPY(init_addr, naddr, af);
02742ec6
JS
2319 PF_AINC(&rpool->counter, af);
2320 break;
2321 }
2322 if (*sn != NULL)
2323 PF_ACPY(&(*sn)->raddr, naddr, af);
2324
2325 if (pf_status.debug >= PF_DEBUG_MISC &&
2326 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
4b1cf444 2327 kprintf("pf_map_addr: selected address ");
02742ec6 2328 pf_print_host(naddr, 0, af);
4b1cf444 2329 kprintf("\n");
02742ec6
JS
2330 }
2331
2332 return (0);
2333}
2334
2335int
2336pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
2337 struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport,
2338 struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
2339 struct pf_src_node **sn)
2340{
315a7da3 2341 struct pf_state_key_cmp key;
02742ec6
JS
2342 struct pf_addr init_addr;
2343 u_int16_t cut;
2344
2345 bzero(&init_addr, sizeof(init_addr));
2346 if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
2347 return (1);
2348
70224baa
JL
2349 if (proto == IPPROTO_ICMP) {
2350 low = 1;
2351 high = 65535;
2352 }
2353
02742ec6
JS
2354 do {
2355 key.af = af;
2356 key.proto = proto;
2357 PF_ACPY(&key.ext.addr, daddr, key.af);
2358 PF_ACPY(&key.gwy.addr, naddr, key.af);
2359 key.ext.port = dport;
2360
2361 /*
2362 * port search; start random, step;
2363 * similar 2 portloop in in_pcbbind
2364 */
70224baa
JL
2365 if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
2366 proto == IPPROTO_ICMP)) {
2367 key.gwy.port = dport;
02742ec6
JS
2368 if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
2369 return (0);
2370 } else if (low == 0 && high == 0) {
2371 key.gwy.port = *nport;
2372 if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
2373 return (0);
2374 } else if (low == high) {
2375 key.gwy.port = htons(low);
2376 if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) {
2377 *nport = htons(low);
2378 return (0);
2379 }
2380 } else {
2381 u_int16_t tmp;
2382
2383 if (low > high) {
2384 tmp = low;
2385 low = high;
2386 high = tmp;
2387 }
2388 /* low < high */
4fc5aa1c 2389 cut = htonl(karc4random()) % (1 + high - low) + low;
02742ec6
JS
2390 /* low <= cut <= high */
2391 for (tmp = cut; tmp <= high; ++(tmp)) {
2392 key.gwy.port = htons(tmp);
2393 if (pf_find_state_all(&key, PF_EXT_GWY, NULL) ==
2394 NULL) {
2395 *nport = htons(tmp);
2396 return (0);
2397 }
2398 }
2399 for (tmp = cut - 1; tmp >= low; --(tmp)) {
2400 key.gwy.port = htons(tmp);
2401 if (pf_find_state_all(&key, PF_EXT_GWY, NULL) ==
2402 NULL) {
2403 *nport = htons(tmp);
2404 return (0);
2405 }
2406 }
2407 }
2408
2409 switch (r->rpool.opts & PF_POOL_TYPEMASK) {
2410 case PF_POOL_RANDOM:
2411 case PF_POOL_ROUNDROBIN:
2412 if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
2413 return (1);
2414 break;
2415 case PF_POOL_NONE:
2416 case PF_POOL_SRCHASH:
2417 case PF_POOL_BITMASK:
2418 default:
2419 return (1);
2420 }
2421 } while (! PF_AEQ(&init_addr, naddr, af) );
2422
2423 return (1); /* none available */
2424}
2425
2426struct pf_rule *
2427pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
2428 int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport,
2429 struct pf_addr *daddr, u_int16_t dport, int rs_num)
2430{
70224baa 2431 struct pf_rule *r, *rm = NULL;
02742ec6 2432 struct pf_ruleset *ruleset = NULL;
70224baa
JL
2433 int tag = -1;
2434 int rtableid = -1;
2435 int asd = 0;
02742ec6
JS
2436
2437 r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
2438 while (r && rm == NULL) {
2439 struct pf_rule_addr *src = NULL, *dst = NULL;
2440 struct pf_addr_wrap *xdst = NULL;
2441
2442 if (r->action == PF_BINAT && direction == PF_IN) {
2443 src = &r->dst;
2444 if (r->rpool.cur != NULL)
2445 xdst = &r->rpool.cur->addr;
2446 } else {
2447 src = &r->src;
2448 dst = &r->dst;
2449 }
2450
2451 r->evaluations++;
70224baa 2452 if (pfi_kif_match(r->kif, kif) == r->ifnot)
02742ec6
JS
2453 r = r->skip[PF_SKIP_IFP].ptr;
2454 else if (r->direction && r->direction != direction)
2455 r = r->skip[PF_SKIP_DIR].ptr;
2456 else if (r->af && r->af != pd->af)
2457 r = r->skip[PF_SKIP_AF].ptr;
2458 else if (r->proto && r->proto != pd->proto)
2459 r = r->skip[PF_SKIP_PROTO].ptr;
70224baa
JL
2460 else if (PF_MISMATCHAW(&src->addr, saddr, pd->af,
2461 src->neg, kif))
02742ec6
JS
2462 r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
2463 PF_SKIP_DST_ADDR].ptr;
2464 else if (src->port_op && !pf_match_port(src->port_op,
2465 src->port[0], src->port[1], sport))
2466 r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
2467 PF_SKIP_DST_PORT].ptr;
2468 else if (dst != NULL &&
70224baa 2469 PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL))
02742ec6 2470 r = r->skip[PF_SKIP_DST_ADDR].ptr;
70224baa
JL
2471 else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
2472 0, NULL))
02742ec6
JS
2473 r = TAILQ_NEXT(r, entries);
2474 else if (dst != NULL && dst->port_op &&
2475 !pf_match_port(dst->port_op, dst->port[0],
2476 dst->port[1], dport))
2477 r = r->skip[PF_SKIP_DST_PORT].ptr;
315a7da3 2478 else if (r->match_tag && !pf_match_tag(m, r, &tag))
70224baa 2479 r = TAILQ_NEXT(r, entries);
02742ec6
JS
2480 else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
2481 IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
2482 off, pd->hdr.tcp), r->os_fingerprint)))
2483 r = TAILQ_NEXT(r, entries);
70224baa
JL
2484 else {
2485 if (r->tag)
2486 tag = r->tag;
2487 if (r->rtableid >= 0)
2488 rtableid = r->rtableid;
2489 if (r->anchor == NULL) {
02742ec6 2490 rm = r;
70224baa
JL
2491 } else
2492 pf_step_into_anchor(&asd, &ruleset, rs_num,
2493 &r, NULL, NULL);
2494 }
2495 if (r == NULL)
2496 pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r,
2497 NULL, NULL);
02742ec6 2498 }
315a7da3 2499 if (pf_tag_packet(m, tag, rtableid))
70224baa 2500 return (NULL);
02742ec6
JS
2501 if (rm != NULL && (rm->action == PF_NONAT ||
2502 rm->action == PF_NORDR || rm->action == PF_NOBINAT))
2503 return (NULL);
2504 return (rm);
2505}
2506
2507struct pf_rule *
2508pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
2509 struct pfi_kif *kif, struct pf_src_node **sn,
2510 struct pf_addr *saddr, u_int16_t sport,
2511 struct pf_addr *daddr, u_int16_t dport,
2512 struct pf_addr *naddr, u_int16_t *nport)
2513{
2514 struct pf_rule *r = NULL;
2515
2516 if (direction == PF_OUT) {
2517 r = pf_match_translation(pd, m, off, direction, kif, saddr,
2518 sport, daddr, dport, PF_RULESET_BINAT);
2519 if (r == NULL)
2520 r = pf_match_translation(pd, m, off, direction, kif,
2521 saddr, sport, daddr, dport, PF_RULESET_NAT);
2522 } else {
2523 r = pf_match_translation(pd, m, off, direction, kif, saddr,
2524 sport, daddr, dport, PF_RULESET_RDR);
2525 if (r == NULL)
2526 r = pf_match_translation(pd, m, off, direction, kif,
2527 saddr, sport, daddr, dport, PF_RULESET_BINAT);
2528 }
2529
2530 if (r != NULL) {
2531 switch (r->action) {
2532 case PF_NONAT:
2533 case PF_NOBINAT:
2534 case PF_NORDR:
2535 return (NULL);
2536 case PF_NAT:
2537 if (pf_get_sport(pd->af, pd->proto, r, saddr,
2538 daddr, dport, naddr, nport, r->rpool.proxy_port[0],
2539 r->rpool.proxy_port[1], sn)) {
2540 DPFPRINTF(PF_DEBUG_MISC,
2541 ("pf: NAT proxy port allocation "
2542 "(%u-%u) failed\n",
2543 r->rpool.proxy_port[0],
2544 r->rpool.proxy_port[1]));
2545 return (NULL);
2546 }
2547 break;
2548 case PF_BINAT:
2549 switch (direction) {
2550 case PF_OUT:
2551 if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
70224baa
JL
2552 switch (pd->af) {
2553#ifdef INET
2554 case AF_INET:
02742ec6
JS
2555 if (r->rpool.cur->addr.p.dyn->
2556 pfid_acnt4 < 1)
2557 return (NULL);
2558 PF_POOLMASK(naddr,
2559 &r->rpool.cur->addr.p.dyn->
2560 pfid_addr4,
2561 &r->rpool.cur->addr.p.dyn->
2562 pfid_mask4,
2563 saddr, AF_INET);
70224baa
JL
2564 break;
2565#endif /* INET */
2566#ifdef INET6
2567 case AF_INET6:
02742ec6
JS
2568 if (r->rpool.cur->addr.p.dyn->
2569 pfid_acnt6 < 1)
2570 return (NULL);
2571 PF_POOLMASK(naddr,
2572 &r->rpool.cur->addr.p.dyn->
2573 pfid_addr6,
2574 &r->rpool.cur->addr.p.dyn->
2575 pfid_mask6,
2576 saddr, AF_INET6);
70224baa
JL
2577 break;
2578#endif /* INET6 */
02742ec6
JS
2579 }
2580 } else
2581 PF_POOLMASK(naddr,
2582 &r->rpool.cur->addr.v.a.addr,
2583 &r->rpool.cur->addr.v.a.mask,
2584 saddr, pd->af);
2585 break;
2586 case PF_IN:
70224baa
JL
2587 if (r->src.addr.type == PF_ADDR_DYNIFTL) {
2588 switch (pd->af) {
2589#ifdef INET
2590 case AF_INET:
02742ec6
JS
2591 if (r->src.addr.p.dyn->
2592 pfid_acnt4 < 1)
2593 return (NULL);
2594 PF_POOLMASK(naddr,
2595 &r->src.addr.p.dyn->
2596 pfid_addr4,
2597 &r->src.addr.p.dyn->
2598 pfid_mask4,
2599 daddr, AF_INET);
70224baa
JL
2600 break;
2601#endif /* INET */
2602#ifdef INET6
2603 case AF_INET6:
02742ec6
JS
2604 if (r->src.addr.p.dyn->
2605 pfid_acnt6 < 1)
2606 return (NULL);
2607 PF_POOLMASK(naddr,
2608 &r->src.addr.p.dyn->
2609 pfid_addr6,
2610 &r->src.addr.p.dyn->
2611 pfid_mask6,
2612 daddr, AF_INET6);
70224baa
JL
2613 break;
2614#endif /* INET6 */
02742ec6
JS
2615 }
2616 } else
2617 PF_POOLMASK(naddr,
2618 &r->src.addr.v.a.addr,
2619 &r->src.addr.v.a.mask, daddr,
2620 pd->af);
2621 break;
2622 }
2623 break;
2624 case PF_RDR: {
70224baa 2625 if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn))
02742ec6 2626 return (NULL);
70224baa
JL
2627 if ((r->rpool.opts & PF_POOL_TYPEMASK) ==
2628 PF_POOL_BITMASK)
2629 PF_POOLMASK(naddr, naddr,
2630 &r->rpool.cur->addr.v.a.mask, daddr,
2631 pd->af);
315a7da3 2632
02742ec6
JS
2633 if (r->rpool.proxy_port[1]) {
2634 u_int32_t tmp_nport;
2635
2636 tmp_nport = ((ntohs(dport) -
2637 ntohs(r->dst.port[0])) %
2638 (r->rpool.proxy_port[1] -
2639 r->rpool.proxy_port[0] + 1)) +
2640 r->rpool.proxy_port[0];
2641
2642 /* wrap around if necessary */
2643 if (tmp_nport > 65535)
2644 tmp_nport -= 65535;
2645 *nport = htons((u_int16_t)tmp_nport);
2646 } else if (r->rpool.proxy_port[0])
2647 *nport = htons(r->rpool.proxy_port[0]);
2648 break;
2649 }
2650 default:
2651 return (NULL);
2652 }
2653 }
2654
2655 return (r);
2656}
2657
2658#ifdef SMP
2659struct netmsg_hashlookup {
002c1265 2660 struct netmsg_base base;
02742ec6
JS
2661 struct inpcb **nm_pinp;
2662 struct inpcbinfo *nm_pcbinfo;
2663 struct pf_addr *nm_saddr;
2664 struct pf_addr *nm_daddr;
2665 uint16_t nm_sport;
2666 uint16_t nm_dport;
2667 sa_family_t nm_af;
2668};
2669
4599cf19 2670static void
002c1265 2671in_pcblookup_hash_handler(netmsg_t msg)
02742ec6 2672{
002c1265 2673 struct netmsg_hashlookup *rmsg = (struct netmsg_hashlookup *)msg;
02742ec6 2674
002c1265
MD
2675 if (rmsg->nm_af == AF_INET)
2676 *rmsg->nm_pinp = in_pcblookup_hash(rmsg->nm_pcbinfo,
2677 rmsg->nm_saddr->v4, rmsg->nm_sport, rmsg->nm_daddr->v4,
2678 rmsg->nm_dport, INPLOOKUP_WILDCARD, NULL);
02742ec6
JS
2679#ifdef INET6
2680 else
002c1265
MD
2681 *rmsg->nm_pinp = in6_pcblookup_hash(rmsg->nm_pcbinfo,
2682 &rmsg->nm_saddr->v6, rmsg->nm_sport, &rmsg->nm_daddr->v6,
2683 rmsg->nm_dport, INPLOOKUP_WILDCARD, NULL);
02742ec6 2684#endif /* INET6 */
002c1265 2685 lwkt_replymsg(&rmsg->base.lmsg, 0);
02742ec6
JS
2686}
2687#endif /* SMP */
2688
2689int
315a7da3 2690pf_socket_lookup(int direction, struct pf_pdesc *pd)
02742ec6
JS
2691{
2692 struct pf_addr *saddr, *daddr;
2693 u_int16_t sport, dport;
2694 struct inpcbinfo *pi;
2695 struct inpcb *inp;
2696#ifdef SMP
2697 struct netmsg_hashlookup *msg = NULL;
2698#endif
2699 int pi_cpu = 0;
2700
70224baa
JL
2701 if (pd == NULL)
2702 return (-1);
2703 pd->lookup.uid = UID_MAX;
2704 pd->lookup.gid = GID_MAX;
2705 pd->lookup.pid = NO_PID;
02742ec6
JS
2706 if (direction == PF_IN) {
2707 saddr = pd->src;
2708 daddr = pd->dst;
2709 } else {
2710 saddr = pd->dst;
2711 daddr = pd->src;
2712 }
2713 switch (pd->proto) {
2714 case IPPROTO_TCP:
315a7da3
JL
2715 if (pd->hdr.tcp == NULL)
2716 return (-1);
02742ec6
JS
2717 sport = pd->hdr.tcp->th_sport;
2718 dport = pd->hdr.tcp->th_dport;
2719
2720 pi_cpu = tcp_addrcpu(saddr->v4.s_addr, sport, daddr->v4.s_addr, dport);
2721 pi = &tcbinfo[pi_cpu];
2722#ifdef SMP
2723 /*
2724 * Our netstack runs lockless on MP systems
2725 * (only for TCP connections at the moment).
2726 *
2727 * As we are not allowed to read another CPU's tcbinfo,
2728 * we have to ask that CPU via remote call to search the
2729 * table for us.
2730 *
2731 * Prepare a msg iff data belongs to another CPU.
2732 */
2733 if (pi_cpu != mycpu->gd_cpuid) {
efda3bd0 2734 msg = kmalloc(sizeof(*msg), M_LWKTMSG, M_INTWAIT);
002c1265 2735 netmsg_init(&msg->base, NULL, &netisr_afree_rport,
48e7b118 2736 0, in_pcblookup_hash_handler);
02742ec6
JS
2737 msg->nm_pinp = &inp;
2738 msg->nm_pcbinfo = pi;
2739 msg->nm_saddr = saddr;
2740 msg->nm_sport = sport;
2741 msg->nm_daddr = daddr;
2742 msg->nm_dport = dport;
2743 msg->nm_af = pd->af;
2744 }
2745#endif /* SMP */
2746 break;
2747 case IPPROTO_UDP:
315a7da3
JL
2748 if (pd->hdr.udp == NULL)
2749 return (-1);
02742ec6
JS
2750 sport = pd->hdr.udp->uh_sport;
2751 dport = pd->hdr.udp->uh_dport;
2752 pi = &udbinfo;
2753 break;
2754 default:
315a7da3 2755 return (-1);
02742ec6
JS
2756 }
2757 if (direction != PF_IN) {
2758 u_int16_t p;
2759
2760 p = sport;
2761 sport = dport;
2762 dport = p;
2763 }
2764 switch (pd->af) {
2765#ifdef INET6
2766 case AF_INET6:
2767#ifdef SMP
2768 /*
2769 * Query other CPU, second part
2770 *
2771 * msg only gets initialized when:
2772 * 1) packet is TCP
2773 * 2) the info belongs to another CPU
2774 *
2775 * Use some switch/case magic to avoid code duplication.
2776 */
2777 if (msg == NULL)
2778#endif /* SMP */
2779 {
2780 inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
2781 &daddr->v6, dport, INPLOOKUP_WILDCARD, NULL);
2782
2783 if (inp == NULL)
70224baa 2784 return (-1);
02742ec6
JS
2785 break;
2786 }
2787 /* FALLTHROUGH if SMP and on other CPU */
2788#endif /* INET6 */
2789 case AF_INET:
2790#ifdef SMP
2791 if (msg != NULL) {
2a7a2b1c 2792 lwkt_domsg(cpu_portfn(pi_cpu),
002c1265 2793 &msg->base.lmsg, 0);
02742ec6
JS
2794 } else
2795#endif /* SMP */
2796 {
2797 inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4,
2798 dport, INPLOOKUP_WILDCARD, NULL);
2799 }
2800 if (inp == NULL)
315a7da3 2801 return (-1);
02742ec6
JS
2802 break;
2803
2804 default:
70224baa 2805 return (-1);
02742ec6 2806 }
70224baa
JL
2807 pd->lookup.uid = inp->inp_socket->so_cred->cr_uid;
2808 pd->lookup.gid = inp->inp_socket->so_cred->cr_groups[0];
02742ec6
JS
2809 return (1);
2810}
2811
2812u_int8_t
2813pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2814{
2815 int hlen;
2816 u_int8_t hdr[60];
2817 u_int8_t *opt, optlen;
2818 u_int8_t wscale = 0;
2819
2820 hlen = th_off << 2; /* hlen <= sizeof(hdr) */
2821 if (hlen <= sizeof(struct tcphdr))
2822 return (0);
2823 if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2824 return (0);
2825 opt = hdr + sizeof(struct tcphdr);
2826 hlen -= sizeof(struct tcphdr);
2827 while (hlen >= 3) {
2828 switch (*opt) {
2829 case TCPOPT_EOL:
2830 case TCPOPT_NOP:
2831 ++opt;
2832 --hlen;
2833 break;
2834 case TCPOPT_WINDOW:
2835 wscale = opt[2];
2836 if (wscale > TCP_MAX_WINSHIFT)
2837 wscale = TCP_MAX_WINSHIFT;
2838 wscale |= PF_WSCALE_FLAG;
2839 /* FALLTHROUGH */
2840 default:
2841 optlen = opt[1];
2842 if (optlen < 2)
2843 optlen = 2;
2844 hlen -= optlen;
2845 opt += optlen;
2846 break;
2847 }
2848 }
2849 return (wscale);
2850}
2851
2852u_int16_t
2853pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2854{
2855 int hlen;
2856 u_int8_t hdr[60];
2857 u_int8_t *opt, optlen;
2858 u_int16_t mss = tcp_mssdflt;
2859
2860 hlen = th_off << 2; /* hlen <= sizeof(hdr) */
2861 if (hlen <= sizeof(struct tcphdr))
2862 return (0);
2863 if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2864 return (0);
2865 opt = hdr + sizeof(struct tcphdr);
2866 hlen -= sizeof(struct tcphdr);
2867 while (hlen >= TCPOLEN_MAXSEG) {
2868 switch (*opt) {
2869 case TCPOPT_EOL:
2870 case TCPOPT_NOP:
2871 ++opt;
2872 --hlen;
2873 break;
2874 case TCPOPT_MAXSEG:
2875 bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
2876 /* FALLTHROUGH */
2877 default:
2878 optlen = opt[1];
2879 if (optlen < 2)
2880 optlen = 2;
2881 hlen -= optlen;
2882 opt += optlen;
2883 break;
2884 }
2885 }
2886 return (mss);
2887}
2888
2889u_int16_t
2890pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
2891{
2892#ifdef INET
2893 struct sockaddr_in *dst;
2894 struct route ro;
2895#endif /* INET */
2896#ifdef INET6
2897 struct sockaddr_in6 *dst6;
2898 struct route_in6 ro6;
2899#endif /* INET6 */
2900 struct rtentry *rt = NULL;
2901 int hlen = 0;
2902 u_int16_t mss = tcp_mssdflt;
2903
2904 switch (af) {
2905#ifdef INET
2906 case AF_INET:
2907 hlen = sizeof(struct ip);
2908 bzero(&ro, sizeof(ro));
2909 dst = (struct sockaddr_in *)&ro.ro_dst;
2910 dst->sin_family = AF_INET;
2911 dst->sin_len = sizeof(*dst);
2912 dst->sin_addr = addr->v4;
2913 rtalloc_ign(&ro, (RTF_CLONING | RTF_PRCLONING));
2914 rt = ro.ro_rt;
2915 break;
2916#endif /* INET */
2917#ifdef INET6
2918 case AF_INET6:
2919 hlen = sizeof(struct ip6_hdr);
2920 bzero(&ro6, sizeof(ro6));
2921 dst6 = (struct sockaddr_in6 *)&ro6.ro_dst;
2922 dst6->sin6_family = AF_INET6;
2923 dst6->sin6_len = sizeof(*dst6);
2924 dst6->sin6_addr = addr->v6;
2925 rtalloc_ign((struct route *)&ro6, (RTF_CLONING | RTF_PRCLONING));
2926 rt = ro6.ro_rt;
2927 break;
2928#endif /* INET6 */
2929 }
2930
2931 if (rt && rt->rt_ifp) {
2932 mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
2933 mss = max(tcp_mssdflt, mss);
2934 RTFREE(rt);
2935 }
2936 mss = min(mss, offer);
2937 mss = max(mss, 64); /* sanity - at least max opt space */
2938 return (mss);
2939}
2940
2941void
2942pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr)
2943{
2944 struct pf_rule *r = s->rule.ptr;
2945
2946 s->rt_kif = NULL;
2947 if (!r->rt || r->rt == PF_FASTROUTE)
2948 return;
315a7da3 2949 switch (s->state_key->af) {
02742ec6
JS
2950#ifdef INET
2951 case AF_INET:
2952 pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL,
2953 &s->nat_src_node);
2954 s->rt_kif = r->rpool.cur->kif;
2955 break;
2956#endif /* INET */
2957#ifdef INET6
2958 case AF_INET6:
2959 pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL,
2960 &s->nat_src_node);
2961 s->rt_kif = r->rpool.cur->kif;
2962 break;
2963#endif /* INET6 */
2964 }
2965}
2966
315a7da3
JL
2967void
2968pf_attach_state(struct pf_state_key *sk, struct pf_state *s, int tail)
2969{
2970 s->state_key = sk;
2971 sk->refcnt++;
2972
2973 /* list is sorted, if-bound states before floating */
2974 if (tail)
2975 TAILQ_INSERT_TAIL(&sk->states, s, next);
2976 else
2977 TAILQ_INSERT_HEAD(&sk->states, s, next);
2978}
2979
2980void
2981pf_detach_state(struct pf_state *s, int flags)
2982{
2983 struct pf_state_key *sk = s->state_key;
2984
2985 if (sk == NULL)
2986 return;
2987
2988 s->state_key = NULL;
2989 TAILQ_REMOVE(&sk->states, s, next);
2990 if (--sk->refcnt == 0) {
2991 if (!(flags & PF_DT_SKIP_EXTGWY))
2992 RB_REMOVE(pf_state_tree_ext_gwy,
2993 &pf_statetbl_ext_gwy, sk);
2994 if (!(flags & PF_DT_SKIP_LANEXT))
2995 RB_REMOVE(pf_state_tree_lan_ext,
2996 &pf_statetbl_lan_ext, sk);
2997 pool_put(&pf_state_key_pl, sk);
2998 }
2999}
3000
3001struct pf_state_key *
3002pf_alloc_state_key(struct pf_state *s)
3003{
3004 struct pf_state_key *sk;
3005
3006 if ((sk = pool_get(&pf_state_key_pl, PR_NOWAIT)) == NULL)
3007 return (NULL);
3008 bzero(sk, sizeof(*sk));
3009 TAILQ_INIT(&sk->states);
3010 pf_attach_state(sk, s, 0);
3011
3012 return (sk);
3013}
3014
02742ec6 3015int
315a7da3 3016pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
02742ec6 3017 struct pfi_kif *kif, struct mbuf *m, int off, void *h,
70224baa
JL
3018 struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
3019 struct ifqueue *ifq, struct inpcb *inp)
02742ec6
JS
3020{
3021 struct pf_rule *nr = NULL;
3022 struct pf_addr *saddr = pd->src, *daddr = pd->dst;
02742ec6
JS
3023 u_int16_t bport, nport = 0;
3024 sa_family_t af = pd->af;
02742ec6
JS
3025 struct pf_rule *r, *a = NULL;
3026 struct pf_ruleset *ruleset = NULL;
3027 struct pf_src_node *nsn = NULL;
315a7da3 3028 struct tcphdr *th = pd->hdr.tcp;
02742ec6 3029 u_short reason;
315a7da3 3030 int rewrite = 0, hdrlen = 0;
70224baa 3031 int tag = -1, rtableid = -1;
70224baa
JL
3032 int asd = 0;
3033 int match = 0;
315a7da3
JL
3034 int state_icmp = 0;
3035 u_int16_t mss = tcp_mssdflt;
3036 u_int16_t sport, dport;
3037 u_int8_t icmptype = 0, icmpcode = 0;
70224baa 3038
315a7da3 3039 if (direction == PF_IN && pf_check_congestion(ifq)) {
70224baa
JL
3040 REASON_SET(&reason, PFRES_CONGEST);
3041 return (PF_DROP);
3042 }
3043
3044 if (inp != NULL)
315a7da3
JL
3045 pd->lookup.done = pf_socket_lookup(direction, pd);
3046 else if (debug_pfugidhack) {
70224baa 3047 DPFPRINTF(PF_DEBUG_MISC, ("pf: unlocked lookup\n"));
315a7da3 3048 pd->lookup.done = pf_socket_lookup(direction, pd);
70224baa 3049 }
315a7da3
JL
3050
3051 sport = dport = hdrlen = 0;
3052
3053 switch (pd->proto) {
3054 case IPPROTO_TCP:
3055 sport = th->th_sport;
3056 dport = th->th_dport;
3057 hdrlen = sizeof(*th);
3058 break;
3059 case IPPROTO_UDP:
3060 sport = pd->hdr.udp->uh_sport;
3061 dport = pd->hdr.udp->uh_dport;
3062 hdrlen = sizeof(*pd->hdr.udp);
3063 break;
3064#ifdef INET
3065 case IPPROTO_ICMP:
3066 if (pd->af != AF_INET)
3067 break;
3068 sport = dport = pd->hdr.icmp->icmp_id;
3069 icmptype = pd->hdr.icmp->icmp_type;
3070 icmpcode = pd->hdr.icmp->icmp_code;
3071
3072 if (icmptype == ICMP_UNREACH ||
3073 icmptype == ICMP_SOURCEQUENCH ||
3074 icmptype == ICMP_REDIRECT ||
3075 icmptype == ICMP_TIMXCEED ||
3076 icmptype == ICMP_PARAMPROB)
3077 state_icmp++;
3078 break;
3079#endif /* INET */
3080#ifdef INET6
3081 case IPPROTO_ICMPV6:
3082 if (pd->af != AF_INET6)
3083 break;
3084 sport = dport = pd->hdr.icmp6->icmp6_id;
3085 hdrlen = sizeof(*pd->hdr.icmp6);
3086 icmptype = pd->hdr.icmp6->icmp6_type;
3087 icmpcode = pd->hdr.icmp6->icmp6_code;
3088
3089 if (icmptype == ICMP6_DST_UNREACH ||
3090 icmptype == ICMP6_PACKET_TOO_BIG ||
3091 icmptype == ICMP6_TIME_EXCEEDED ||
3092 icmptype == ICMP6_PARAM_PROB)
3093 state_icmp++;
3094 break;
3095#endif /* INET6 */
3096 }
02742ec6
JS
3097
3098 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3099
3100 if (direction == PF_OUT) {
315a7da3 3101 bport = nport = sport;
02742ec6
JS
3102 /* check outgoing packet for BINAT/NAT */
3103 if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
315a7da3 3104 saddr, sport, daddr, dport, &pd->naddr, &nport)) != NULL) {
02742ec6 3105 PF_ACPY(&pd->baddr, saddr, af);
315a7da3
JL
3106 switch (pd->proto) {
3107 case IPPROTO_TCP:
3108 pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
3109 &th->th_sum, &pd->naddr, nport, 0, af);
3110 sport = th->th_sport;
3111 rewrite++;
3112 break;
3113 case IPPROTO_UDP:
3114 pf_change_ap(saddr, &pd->hdr.udp->uh_sport,
3115 pd->ip_sum, &pd->hdr.udp->uh_sum,
3116 &pd->naddr, nport, 1, af);
3117 sport = pd->hdr.udp->uh_sport;
3118 rewrite++;
3119 break;
3120#ifdef INET
3121 case IPPROTO_ICMP:
3122 pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
3123 pd->naddr.v4.s_addr, 0);
3124 pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
3125 pd->hdr.icmp->icmp_cksum, sport, nport, 0);
3126 pd->hdr.icmp->icmp_id = nport;
3127 m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
3128 break;
3129#endif /* INET */
3130#ifdef INET6
3131 case IPPROTO_ICMPV6:
3132 pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
3133 &pd->naddr, 0);
3134 rewrite++;
3135 break;
3136#endif /* INET */
3137 default:
3138 switch (af) {
3139#ifdef INET
3140 case AF_INET:
3141 pf_change_a(&saddr->v4.s_addr,
3142 pd->ip_sum, pd->naddr.v4.s_addr, 0);
3143 break;
3144#endif /* INET */
3145#ifdef INET6
3146 case AF_INET6:
3147 PF_ACPY(saddr, &pd->naddr, af);
3148 break;
3149#endif /* INET */
3150 }
3151 break;
3152 }
3153
02742ec6
JS
3154 if (nr->natpass)
3155 r = NULL;
3156 pd->nat_rule = nr;
3157 }
3158 } else {
315a7da3 3159 bport = nport = dport;
02742ec6
JS
3160 /* check incoming packet for BINAT/RDR */
3161 if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
315a7da3 3162 saddr, sport, daddr, dport, &pd->naddr, &nport)) != NULL) {
02742ec6 3163 PF_ACPY(&pd->baddr, daddr, af);
315a7da3
JL
3164 switch (pd->proto) {
3165 case IPPROTO_TCP:
3166 pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
3167 &th->th_sum, &pd->naddr, nport, 0, af);
3168 dport = th->th_dport;
3169 rewrite++;
3170 break;
3171 case IPPROTO_UDP:
3172 pf_change_ap(daddr, &pd->hdr.udp->uh_dport,
3173 pd->ip_sum, &pd->hdr.udp->uh_sum,
3174 &pd->naddr, nport, 1, af);
3175 dport = pd->hdr.udp->uh_dport;
3176 rewrite++;
3177 break;
3178#ifdef INET
3179 case IPPROTO_ICMP:
3180 pf_change_a(&daddr->v4.s_addr, pd->ip_sum,
3181 pd->naddr.v4.s_addr, 0);
3182 break;
3183#endif /* INET */
3184#ifdef INET6
3185 case IPPROTO_ICMPV6:
3186 pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
3187 &pd->naddr, 0);
3188 rewrite++;
3189 break;
3190#endif /* INET6 */
3191 default:
3192 switch (af) {
3193#ifdef INET
3194 case AF_INET:
3195 pf_change_a(&daddr->v4.s_addr,
3196 pd->ip_sum, pd->naddr.v4.s_addr, 0);
3197 break;
3198#endif /* INET */
3199#ifdef INET6
3200 case AF_INET6:
3201 PF_ACPY(daddr, &pd->naddr, af);
3202 break;
3203#endif /* INET */
3204 }
3205 break;
3206 }
3207
02742ec6
JS
3208 if (nr->natpass)
3209 r = NULL;
3210 pd->nat_rule = nr;
3211 }
3212 }
3213
3214 while (r != NULL) {
3215 r->evaluations++;
70224baa 3216 if (pfi_kif_match(r->kif, kif) == r->ifnot)
02742ec6
JS
3217 r = r->skip[PF_SKIP_IFP].ptr;
3218 else if (r->direction && r->direction != direction)
3219 r = r->skip[PF_SKIP_DIR].ptr;
3220 else if (r->af && r->af != af)
3221 r = r->skip[PF_SKIP_AF].ptr;
315a7da3 3222 else if (r->proto && r->proto != pd->proto)
02742ec6 3223 r = r->skip[PF_SKIP_PROTO].ptr;
70224baa
JL
3224 else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
3225 r->src.neg, kif))
02742ec6 3226 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
315a7da3 3227 /* tcp/udp only. port_op always 0 in other cases */
02742ec6 3228 else if (r->src.port_op && !pf_match_port(r->src.port_op,
315a7da3 3229 r->src.port[0], r->src.port[1], sport))
02742ec6 3230 r = r->skip[PF_SKIP_SRC_PORT].ptr;
70224baa
JL
3231 else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
3232 r->dst.neg, NULL))
02742ec6 3233 r = r->skip[PF_SKIP_DST_ADDR].ptr;
315a7da3 3234 /* tcp/udp only. port_op always 0 in other cases */
02742ec6 3235 else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
315a7da3 3236 r->dst.port[0], r->dst.port[1], dport))
02742ec6 3237 r = r->skip[PF_SKIP_DST_PORT].ptr;
315a7da3
JL
3238 /* icmp only. type always 0 in other cases */
3239 else if (r->type && r->type != icmptype + 1)
3240 r = TAILQ_NEXT(r, entries);
3241 /* icmp only. type always 0 in other cases */
3242 else if (r->code && r->code != icmpcode + 1)
3243 r = TAILQ_NEXT(r, entries);
70224baa 3244 else if (r->tos && !(r->tos == pd->tos))
02742ec6
JS
3245 r = TAILQ_NEXT(r, entries);
3246 else if (r->rule_flag & PFRULE_FRAGMENT)
3247 r = TAILQ_NEXT(r, entries);
315a7da3
JL
3248 else if (pd->proto == IPPROTO_TCP &&
3249 (r->flagset & th->th_flags) != r->flags)
02742ec6 3250 r = TAILQ_NEXT(r, entries);
315a7da3 3251 /* tcp/udp only. uid.op always 0 in other cases */
70224baa 3252 else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
315a7da3 3253 pf_socket_lookup(direction, pd), 1)) &&
02742ec6 3254 !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
70224baa 3255 pd->lookup.uid))
02742ec6 3256 r = TAILQ_NEXT(r, entries);
315a7da3 3257 /* tcp/udp only. gid.op always 0 in other cases */
70224baa 3258 else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
315a7da3 3259 pf_socket_lookup(direction, pd), 1)) &&
02742ec6 3260 !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
70224baa 3261 pd->lookup.gid))
02742ec6 3262 r = TAILQ_NEXT(r, entries);
75fda04a
MD
3263 else if (r->prob && r->prob <= karc4random())
3264 r = TAILQ_NEXT(r, entries);
315a7da3 3265 else if (r->match_tag && !pf_match_tag(m, r, &tag))
02742ec6 3266 r = TAILQ_NEXT(r, entries);
315a7da3
JL
3267 else if (r->os_fingerprint != PF_OSFP_ANY &&
3268 (pd->proto != IPPROTO_TCP || !pf_osfp_match(
3269 pf_osfp_fingerprint(pd, m, off, th),
3270 r->os_fingerprint)))
02742ec6
JS
3271 r = TAILQ_NEXT(r, entries);
3272 else {
3273 if (r->tag)
3274 tag = r->tag;
70224baa
JL
3275 if (r->rtableid >= 0)
3276 rtableid = r->rtableid;
02742ec6 3277 if (r->anchor == NULL) {
70224baa 3278 match = 1;
02742ec6
JS
3279 *rm = r;
3280 *am = a;
3281 *rsm = ruleset;
3282 if ((*rm)->quick)
3283 break;
3284 r = TAILQ_NEXT(r, entries);
3285 } else
70224baa
JL
3286 pf_step_into_anchor(&asd, &ruleset,
3287 PF_RULESET_FILTER, &r, &a, &match);
02742ec6 3288 }
70224baa
JL
3289 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
3290 PF_RULESET_FILTER, &r, &a, &match))
3291 break;
02742ec6
JS
3292 }
3293 r = *rm;
3294 a = *am;
3295 ruleset = *rsm;
3296
3297 REASON_SET(&reason, PFRES_MATCH);
3298
315a7da3 3299 if (r->log || (nr != NULL && nr->log)) {
02742ec6 3300 if (rewrite)
315a7da3 3301 m_copyback(m, off, hdrlen, pd->hdr.any);
70224baa
JL
3302 PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
3303 a, ruleset, pd);
02742ec6
JS
3304 }
3305
3306 if ((r->action == PF_DROP) &&
3307 ((r->rule_flag & PFRULE_RETURNRST) ||
3308 (r->rule_flag & PFRULE_RETURNICMP) ||
3309 (r->rule_flag & PFRULE_RETURN))) {
3310 /* undo NAT changes, if they have taken place */
3311 if (nr != NULL) {
3312 if (direction == PF_OUT) {
315a7da3
JL
3313 switch (pd->proto) {
3314 case IPPROTO_TCP:
3315 pf_change_ap(saddr, &th->th_sport,
3316 pd->ip_sum, &th->th_sum,
3317 &pd->baddr, bport, 0, af);
3318 sport = th->th_sport;
3319 rewrite++;
3320 break;
3321 case IPPROTO_UDP:
3322 pf_change_ap(saddr,
3323 &pd->hdr.udp->uh_sport, pd->ip_sum,
3324 &pd->hdr.udp->uh_sum, &pd->baddr,
3325 bport, 1, af);
3326 sport = pd->hdr.udp->uh_sport;
3327 rewrite++;
3328 break;
3329 case IPPROTO_ICMP:
3330#ifdef INET6
3331 case IPPROTO_ICMPV6:
3332#endif
3333 /* nothing! */
3334 break;
3335 default:
3336 switch (af) {
3337 case AF_INET:
3338 pf_change_a(&saddr->v4.s_addr,
3339 pd->ip_sum,
3340 pd->baddr.v4.s_addr, 0);
3341 break;
3342 case AF_INET6:
3343 PF_ACPY(saddr, &pd->baddr, af);
3344 break;
3345 }
3346 }
02742ec6 3347 } else {
315a7da3
JL
3348 switch (pd->proto) {
3349 case IPPROTO_TCP:
3350 pf_change_ap(daddr, &th->th_dport,
3351 pd->ip_sum, &th->th_sum,
3352 &pd->baddr, bport, 0, af);
3353 dport = th->th_dport;
3354 rewrite++;
3355 break;
3356 case IPPROTO_UDP:
3357 pf_change_ap(daddr,
3358 &pd->hdr.udp->uh_dport, pd->ip_sum,
3359 &pd->hdr.udp->uh_sum, &pd->baddr,
3360 bport, 1, af);
3361 dport = pd->hdr.udp->uh_dport;
3362 rewrite++;
3363 break;
3364 case IPPROTO_ICMP:
3365#ifdef INET6
3366 case IPPROTO_ICMPV6:
3367#endif
3368 /* nothing! */
3369 break;
3370 default:
3371 switch (af) {
3372 case AF_INET:
3373 pf_change_a(&daddr->v4.s_addr,
3374 pd->ip_sum,
3375 pd->baddr.v4.s_addr, 0);
3376 break;
3377 case AF_INET6:
3378 PF_ACPY(daddr, &pd->baddr, af);
3379 break;
3380 }
3381 }
02742ec6
JS
3382 }
3383 }
315a7da3
JL
3384 if (pd->proto == IPPROTO_TCP &&
3385 ((r->rule_flag & PFRULE_RETURNRST) ||
02742ec6
JS
3386 (r->rule_flag & PFRULE_RETURN)) &&
3387 !(th->th_flags & TH_RST)) {
315a7da3
JL
3388 u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
3389 struct ip *h = mtod(m, struct ip *);
02742ec6 3390
315a7da3
JL
3391 if (pf_check_proto_cksum(m, off,
3392 h->ip_len - off, IPPROTO_TCP, AF_INET))
3393 REASON_SET(&reason, PFRES_PROTCKSUM);
3394 else {
3395 if (th->th_flags & TH_SYN)
3396 ack++;
3397 if (th->th_flags & TH_FIN)
3398 ack++;
3399 pf_send_tcp(r, af, pd->dst,
3400 pd->src, th->th_dport, th->th_sport,
3401 ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
3402 r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp);
3403 }
02742ec6
JS
3404 } else if ((af == AF_INET) && r->return_icmp)
3405 pf_send_icmp(m, r->return_icmp >> 8,
3406 r->return_icmp & 255, af, r);
3407 else if ((af == AF_INET6) && r->return_icmp6)
3408 pf_send_icmp(m, r->return_icmp6 >> 8,
3409 r->return_icmp6 & 255, af, r);
3410 }
3411
315a7da3 3412 if (r->action == PF_DROP)
02742ec6
JS
3413 return (PF_DROP);
3414
315a7da3 3415 if (pf_tag_packet(m, tag, rtableid)) {
70224baa
JL
3416 REASON_SET(&reason, PFRES_MEMORY);
3417 return (PF_DROP);
3418 }
02742ec6 3419
315a7da3
JL
3420 if (!state_icmp && (r->keep_state || nr != NULL ||
3421 (pd->flags & PFDESC_TCP_NORM))) {
02742ec6 3422 /* create new state */
315a7da3 3423 u_int16_t len = 0;
02742ec6 3424 struct pf_state *s = NULL;
315a7da3 3425 struct pf_state_key *sk = NULL;
02742ec6
JS
3426 struct pf_src_node *sn = NULL;
3427
02742ec6 3428 /* check maximums */
70224baa
JL
3429 if (r->max_states && (r->states >= r->max_states)) {
3430 pf_status.lcounters[LCNT_STATES]++;
3431 REASON_SET(&reason, PFRES_MAXSTATES);
02742ec6 3432 goto cleanup;
70224baa
JL
3433 }
3434 /* src node for filter rule */
02742ec6
JS
3435 if ((r->rule_flag & PFRULE_SRCTRACK ||
3436 r->rpool.opts & PF_POOL_STICKYADDR) &&
70224baa
JL
3437 pf_insert_src_node(&sn, r, saddr, af) != 0) {
3438 REASON_SET(&reason, PFRES_SRCLIMIT);
02742ec6 3439 goto cleanup;
70224baa 3440 }
02742ec6
JS
3441 /* src node for translation rule */
3442 if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3443 ((direction == PF_OUT &&
3444 pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
70224baa
JL
3445 (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
3446 REASON_SET(&reason, PFRES_SRCLIMIT);
02742ec6 3447 goto cleanup;
70224baa 3448 }
02742ec6
JS
3449 s = pool_get(&pf_state_pl, PR_NOWAIT);
3450 if (s == NULL) {
70224baa 3451 REASON_SET(&reason, PFRES_MEMORY);
02742ec6
JS
3452cleanup:
3453 if (sn != NULL && sn->states == 0 && sn->expire == 0) {
3454 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
3455 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3456 pf_status.src_nodes--;
3457 pool_put(&pf_src_tree_pl, sn);
3458 }
3459 if (nsn != sn && nsn != NULL && nsn->states == 0 &&
3460 nsn->expire == 0) {
3461 RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
3462 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3463 pf_status.src_nodes--;
3464 pool_put(&pf_src_tree_pl, nsn);
3465 }
315a7da3
JL
3466 if (sk != NULL) {
3467 pool_put(&pf_state_key_pl, sk);
3468 }
02742ec6
JS
3469 return (PF_DROP);
3470 }
3471 bzero(s, sizeof(*s));
02742ec6
JS
3472 s->rule.ptr = r;
3473 s->nat_rule.ptr = nr;
02742ec6 3474 s->anchor.ptr = a;
70224baa 3475 STATE_INC_COUNTERS(s);
02742ec6 3476 s->allow_opts = r->allow_opts;
70224baa
JL
3477 s->log = r->log & PF_LOG_ALL;
3478 if (nr != NULL)
3479 s->log |= nr->log & PF_LOG_ALL;
315a7da3
JL
3480 switch (pd->proto) {
3481 case IPPROTO_TCP:
3482 len = pd->tot_len - off - (th->th_off << 2);
3483 s->src.seqlo = ntohl(th->th_seq);
3484 s->src.seqhi = s->src.seqlo + len + 1;
3485 if ((th->th_flags & (TH_SYN|TH_ACK)) ==
3486 TH_SYN && r->keep_state == PF_STATE_MODULATE) {
3487 /* Generate sequence number modulator */
3488 while ((s->src.seqdiff =
3489 pf_new_isn(sk) - s->src.seqlo) == 0)
3490 ;
3491 pf_change_a(&th->th_seq, &th->th_sum,
3492 htonl(s->src.seqlo + s->src.seqdiff), 0);
3493 rewrite = 1;
3494 } else
3495 s->src.seqdiff = 0;
3496 if (th->th_flags & TH_SYN) {
3497 s->src.seqhi++;
3498 s->src.wscale = pf_get_wscale(m, off,
3499 th->th_off, af);
02742ec6 3500 }
315a7da3
JL
3501 s->src.max_win = MAX(ntohs(th->th_win), 1);
3502 if (s->src.wscale & PF_WSCALE_MASK) {
3503 /* Remove scale factor from initial window */
3504 int win = s->src.max_win;
3505 win += 1 << (s->src.wscale & PF_WSCALE_MASK);
3506 s->src.max_win = (win - 1) >>
3507 (s->src.wscale & PF_WSCALE_MASK);
02742ec6 3508 }
315a7da3
JL
3509 if (th->th_flags & TH_FIN)
3510 s->src.seqhi++;
3511 s->dst.seqhi = 1;
3512 s->dst.max_win = 1;
3513 s->src.state = TCPS_SYN_SENT;
3514 s->dst.state = TCPS_CLOSED;
3515 s->timeout = PFTM_TCP_FIRST_PACKET;
70224baa 3516 break;
315a7da3
JL
3517 case IPPROTO_UDP:
3518 s->src.state = PFUDPS_SINGLE;
3519 s->dst.state = PFUDPS_NO_TRAFFIC;
3520 s->timeout = PFTM_UDP_FIRST_PACKET;
3521 break;
3522 case IPPROTO_ICMP:
02742ec6 3523#ifdef INET6
315a7da3
JL
3524 case IPPROTO_ICMPV6:
3525#endif
3526 s->timeout = PFTM_ICMP_FIRST_PACKET;
3527 break;
3528 default:
3529 s->src.state = PFOTHERS_SINGLE;
3530 s->dst.state = PFOTHERS_NO_TRAFFIC;
3531 s->timeout = PFTM_OTHER_FIRST_PACKET;
02742ec6 3532 }
02742ec6 3533
02742ec6
JS
3534 s->creation = time_second;
3535 s->expire = time_second;
315a7da3 3536
02742ec6
JS
3537 if (sn != NULL) {
3538 s->src_node = sn;
3539 s->src_node->states++;
3540 }
3541 if (nsn != NULL) {
3542 PF_ACPY(&nsn->raddr, &pd->naddr, af);
3543 s->nat_src_node = nsn;
3544 s->nat_src_node->states++;
3545 }
315a7da3
JL
3546 if (pd->proto == IPPROTO_TCP) {
3547 if ((pd->flags & PFDESC_TCP_NORM) &&
3548 pf_normalize_tcp_init(m, off, pd, th, &s->src,
3549 &s->dst)) {
3550 REASON_SET(&reason, PFRES_MEMORY);
3551 pf_src_tree_remove_state(s);
3552 STATE_DEC_COUNTERS(s);
3553 pool_put(&pf_state_pl, s);
3554 return (PF_DROP);
3555 }
3556 if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
3557 pf_normalize_tcp_stateful(m, off, pd, &reason,
3558 th, s, &s->src, &s->dst, &rewrite)) {
3559 /* This really shouldn't happen!!! */
3560 DPFPRINTF(PF_DEBUG_URGENT,
3561 ("pf_normalize_tcp_stateful failed on "
3562 "first pkt"));
3563 pf_normalize_tcp_cleanup(s);
3564 pf_src_tree_remove_state(s);
3565 STATE_DEC_COUNTERS(s);
3566 pool_put(&pf_state_pl, s);
3567 return (PF_DROP);
3568 }
3569 }
3570
3571 if ((sk = pf_alloc_state_key(s)) == NULL) {
3572 REASON_SET(&reason, PFRES_MEMORY);
3573 goto cleanup;
3574 }
3575
3576 sk->proto = pd->proto;
3577 sk->direction = direction;
3578 sk->af = af;
3579 if (direction == PF_OUT) {
3580 PF_ACPY(&sk->gwy.addr, saddr, af);
3581 PF_ACPY(&sk->ext.addr, daddr, af);
3582 switch (pd->proto) {
3583 case IPPROTO_ICMP:
3584#ifdef INET6
3585 case IPPROTO_ICMPV6:
3586#endif
3587 sk->gwy.port = nport;
3588 sk->ext.port = 0;
3589 break;
3590 default:
3591 sk->gwy.port = sport;
3592 sk->ext.port = dport;
3593 }
3594 if (nr != NULL) {
3595 PF_ACPY(&sk->lan.addr, &pd->baddr, af);
3596 sk->lan.port = bport;
3597 } else {
3598 PF_ACPY(&sk->lan.addr, &sk->gwy.addr, af);
3599 sk->lan.port = sk->gwy.port;
3600 }
3601 } else {
3602 PF_ACPY(&sk->lan.addr, daddr, af);
3603 PF_ACPY(&sk->ext.addr, saddr, af);
3604 switch (pd->proto) {
3605 case IPPROTO_ICMP:
3606#ifdef INET6
3607 case IPPROTO_ICMPV6:
3608#endif
3609 sk->lan.port = nport;
3610 sk->ext.port = 0;
3611 break;
3612 default:
3613 sk->lan.port = dport;
3614 sk->ext.port = sport;
3615 }
3616 if (nr != NULL) {
3617 PF_ACPY(&sk->gwy.addr, &pd->baddr, af);
3618 sk->gwy.port = bport;
3619 } else {
3620 PF_ACPY(&sk->gwy.addr, &sk->lan.addr, af);
3621 sk->gwy.port = sk->lan.port;
3622 }
3623 }
3624
3625 s->hash = pf_state_hash(sk);
3626 s->pickup_mode = r->pickup_mode;
3627
3628 pf_set_rt_ifp(s, saddr); /* needs s->state_key set */
3629
02742ec6 3630 if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
315a7da3
JL
3631 if (pd->proto == IPPROTO_TCP)
3632 pf_normalize_tcp_cleanup(s);
70224baa 3633 REASON_SET(&reason, PFRES_STATEINS);
02742ec6 3634 pf_src_tree_remove_state(s);
70224baa 3635 STATE_DEC_COUNTERS(s);
02742ec6
JS
3636 pool_put(&pf_state_pl, s);
3637 return (PF_DROP);
3638 } else
3639 *sm = s;
70224baa
JL
3640 if (tag > 0) {
3641 pf_tag_ref(tag);
3642 s->tag = tag;
3643 }
315a7da3
JL
3644 if (pd->proto == IPPROTO_TCP &&
3645 (th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
3646 r->keep_state == PF_STATE_SYNPROXY) {
3647 s->src.state = PF_TCPS_PROXY_SRC;
3648 if (nr != NULL) {
3649 if (direction == PF_OUT) {
3650 pf_change_ap(saddr, &th->th_sport,
3651 pd->ip_sum, &th->th_sum, &pd->baddr,
3652 bport, 0, af);
3653 sport = th->th_sport;
3654 } else {
3655 pf_change_ap(daddr, &th->th_dport,
3656 pd->ip_sum, &th->th_sum, &pd->baddr,
3657 bport, 0, af);
3658 sport = th->th_dport;
3659 }
3660 }
4fc5aa1c 3661 s->src.seqhi = htonl(karc4random());
315a7da3
JL
3662 /* Find mss option */
3663 mss = pf_get_mss(m, off, th->th_off, af);
3664 mss = pf_calc_mss(saddr, af, mss);
3665 mss = pf_calc_mss(daddr, af, mss);
3666 s->src.mss = mss;
3667 pf_send_tcp(r, af, daddr, saddr, th->th_dport,
3668 th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
3669 TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL);
3670 REASON_SET(&reason, PFRES_SYNPROXY);
3671 return (PF_SYNPROXY_DROP);
3672 }
02742ec6
JS
3673 }
3674
315a7da3
JL
3675 /* copy back packet headers if we performed NAT operations */
3676 if (rewrite)
3677 m_copyback(m, off, hdrlen, pd->hdr.any);
3678
02742ec6
JS
3679 return (PF_PASS);
3680}
3681
3682int
3683pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
3684 struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
3685 struct pf_ruleset **rsm)
3686{
3687 struct pf_rule *r, *a = NULL;
3688 struct pf_ruleset *ruleset = NULL;
3689 sa_family_t af = pd->af;
3690 u_short reason;
3691 int tag = -1;
70224baa
JL
3692 int asd = 0;
3693 int match = 0;
02742ec6
JS
3694
3695 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3696 while (r != NULL) {
3697 r->evaluations++;
70224baa 3698 if (pfi_kif_match(r->kif, kif) == r->ifnot)
02742ec6
JS
3699 r = r->skip[PF_SKIP_IFP].ptr;
3700 else if (r->direction && r->direction != direction)
3701 r = r->skip[PF_SKIP_DIR].ptr;
3702 else if (r->af && r->af != af)
3703 r = r->skip[PF_SKIP_AF].ptr;
3704 else if (r->proto && r->proto != pd->proto)
3705 r = r->skip[PF_SKIP_PROTO].ptr;
70224baa
JL
3706 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
3707 r->src.neg, kif))
02742ec6 3708 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
70224baa
JL
3709 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
3710 r->dst.neg, NULL))
02742ec6 3711 r = r->skip[PF_SKIP_DST_ADDR].ptr;
70224baa 3712 else if (r->tos && !(r->tos == pd->tos))
02742ec6 3713 r = TAILQ_NEXT(r, entries);
a814431a
MD
3714 else if (r->os_fingerprint != PF_OSFP_ANY)
3715 r = TAILQ_NEXT(r, entries);
3716 else if (pd->proto == IPPROTO_UDP &&
3717 (r->src.port_op || r->dst.port_op))
3718 r = TAILQ_NEXT(r, entries);
3719 else if (pd->proto == IPPROTO_TCP &&
3720 (r->src.port_op || r->dst.port_op || r->flagset))
3721 r = TAILQ_NEXT(r, entries);
3722 else if ((pd->proto == IPPROTO_ICMP ||
3723 pd->proto == IPPROTO_ICMPV6) &&
3724 (r->type || r->code))
02742ec6 3725 r = TAILQ_NEXT(r, entries);
75fda04a
MD
3726 else if (r->prob && r->prob <= karc4random())
3727 r = TAILQ_NEXT(r, entries);
315a7da3 3728 else if (r->match_tag && !pf_match_tag(m, r, &tag))
02742ec6 3729 r = TAILQ_NEXT(r, entries);
02742ec6
JS
3730 else {
3731 if (r->anchor == NULL) {
70224baa 3732 match = 1;
02742ec6
JS
3733 *rm = r;
3734 *am = a;
3735 *rsm = ruleset;
3736 if ((*rm)->quick)
3737 break;
3738 r = TAILQ_NEXT(r, entries);
3739 } else
70224baa
JL
3740 pf_step_into_anchor(&asd, &ruleset,
3741 PF_RULESET_FILTER, &r, &a, &match);
02742ec6 3742 }
70224baa
JL
3743 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
3744 PF_RULESET_FILTER, &r, &a, &match))
3745 break;
02742ec6
JS
3746 }
3747 r = *rm;
3748 a = *am;
3749 ruleset = *rsm;
3750
3751 REASON_SET(&reason, PFRES_MATCH);
3752
3753 if (r->log)
70224baa
JL
3754 PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset,
3755 pd);
02742ec6
JS
3756
3757 if (r->action != PF_PASS)
3758 return (PF_DROP);
3759
315a7da3 3760 if (pf_tag_packet(m, tag, -1)) {
70224baa
JL
3761 REASON_SET(&reason, PFRES_MEMORY);
3762 return (PF_DROP);
3763 }
02742ec6
JS
3764
3765 return (PF_PASS);
3766}
3767
3768int
3769pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
3770 struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
3771 u_short *reason)
3772{
315a7da3 3773 struct pf_state_key_cmp key;
02742ec6
JS
3774 struct tcphdr *th = pd->hdr.tcp;
3775 u_int16_t win = ntohs(th->th_win);
70224baa 3776 u_int32_t ack, end, seq, orig_seq;
02742ec6
JS
3777 u_int8_t sws, dws;
3778 int ackskew;
3779 int copyback = 0;
3780 struct pf_state_peer *src, *dst;
3781
3782 key.af = pd->af;
3783 key.proto = IPPROTO_TCP;
3784 if (direction == PF_IN) {
3785 PF_ACPY(&key.ext.addr, pd->src, key.af);
3786 PF_ACPY(&key.gwy.addr, pd->dst, key.af);
3787 key.ext.port = th->th_sport;
3788 key.gwy.port = th->th_dport;
3789 } else {
3790 PF_ACPY(&key.lan.addr, pd->src, key.af);
3791 PF_ACPY(&key.ext.addr, pd->dst, key.af);
3792 key.lan.port = th->th_sport;
3793 key.ext.port = th->th_dport;
3794 }
3795
3796 STATE_LOOKUP();
3797
315a7da3 3798 if (direction == (*state)->state_key->direction) {
02742ec6
JS
3799 src = &(*state)->src;
3800 dst = &(*state)->dst;
3801 } else {
3802 src = &(*state)->dst;
3803 dst = &(*state)->src;
3804 }
3805
3806 if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
315a7da3 3807 if (direction != (*state)->state_key->direction) {
70224baa 3808 REASON_SET(reason, PFRES_SYNPROXY);
02742ec6 3809 return (PF_SYNPROXY_DROP);
70224baa 3810 }
02742ec6 3811 if (th->th_flags & TH_SYN) {
70224baa
JL
3812 if (ntohl(th->th_seq) != (*state)->src.seqlo) {
3813 REASON_SET(reason, PFRES_SYNPROXY);
02742ec6 3814 return (PF_DROP);
70224baa 3815 }
02742ec6
JS
3816 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
3817 pd->src, th->th_dport, th->th_sport,
3818 (*state)->src.seqhi, ntohl(th->th_seq) + 1,
70224baa
JL
3819 TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1,
3820 0, NULL, NULL);
3821 REASON_SET(reason, PFRES_SYNPROXY);
02742ec6
JS
3822 return (PF_SYNPROXY_DROP);
3823 } else if (!(th->th_flags & TH_ACK) ||
3824 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
70224baa
JL
3825 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
3826 REASON_SET(reason, PFRES_SYNPROXY);
02742ec6 3827 return (PF_DROP);
70224baa
JL
3828 } else if ((*state)->src_node != NULL &&
3829 pf_src_connlimit(state)) {
3830 REASON_SET(reason, PFRES_SRCLIMIT);
3831 return (PF_DROP);
3832 } else
02742ec6
JS
3833 (*state)->src.state = PF_TCPS_PROXY_DST;
3834 }
3835 if ((*state)->src.state == PF_TCPS_PROXY_DST) {
3836 struct pf_state_host *src, *dst;
3837
3838 if (direction == PF_OUT) {
315a7da3
JL
3839 src = &(*state)->state_key->gwy;
3840 dst = &(*state)->state_key->ext;
02742ec6 3841 } else {
315a7da3
JL
3842 src = &(*state)->state_key->ext;
3843 dst = &(*state)->state_key->lan;
02742ec6 3844 }
315a7da3 3845 if (direction == (*state)->state_key->direction) {
02742ec6
JS
3846 if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
3847 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
70224baa
JL
3848 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
3849 REASON_SET(reason, PFRES_SYNPROXY);
02742ec6 3850 return (PF_DROP);
70224baa 3851 }
02742ec6
JS
3852 (*state)->src.max_win = MAX(ntohs(th->th_win), 1);
3853 if ((*state)->dst.seqhi == 1)
4fc5aa1c 3854 (*state)->dst.seqhi = htonl(karc4random());
02742ec6
JS
3855 pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr,
3856 &dst->addr, src->port, dst->port,
3857 (*state)->dst.seqhi, 0, TH_SYN, 0,
70224baa
JL
3858 (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL);
3859 REASON_SET(reason, PFRES_SYNPROXY);
02742ec6
JS
3860 return (PF_SYNPROXY_DROP);
3861 } else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
3862 (TH_SYN|TH_ACK)) ||
70224baa
JL
3863 (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) {
3864 REASON_SET(reason, PFRES_SYNPROXY);
02742ec6 3865 return (PF_DROP);
70224baa 3866 } else {
02742ec6
JS
3867 (*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
3868 (*state)->dst.seqlo = ntohl(th->th_seq);
3869 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
3870 pd->src, th->th_dport, th->th_sport,
3871 ntohl(th->th_ack), ntohl(th->th_seq) + 1,
70224baa
JL
3872 TH_ACK, (*state)->src.max_win, 0, 0, 0,
3873 (*state)->tag, NULL, NULL);
02742ec6
JS
3874 pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr,
3875 &dst->addr, src->port, dst->port,
3876 (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
70224baa
JL
3877 TH_ACK, (*state)->dst.max_win, 0, 0, 1,
3878 0, NULL, NULL);
02742ec6
JS
3879 (*state)->src.seqdiff = (*state)->dst.seqhi -
3880 (*state)->src.seqlo;
3881 (*state)->dst.seqdiff = (*state)->src.seqhi -
3882 (*state)->dst.seqlo;
3883 (*state)->src.seqhi = (*state)->src.seqlo +
02742ec6 3884 (*state)->dst.max_win;
9aa13ad5
MD
3885 (*state)->dst.seqhi = (*state)->dst.seqlo +
3886 (*state)->src.max_win;
02742ec6
JS
3887 (*state)->src.wscale = (*state)->dst.wscale = 0;
3888 (*state)->src.state = (*state)->dst.state =
3889 TCPS_ESTABLISHED;
70224baa 3890 REASON_SET(reason, PFRES_SYNPROXY);
02742ec6
JS
3891 return (PF_SYNPROXY_DROP);
3892 }
3893 }