Attempt to fix an occassional panic in pf_purge_expired_states() by
[dragonfly.git] / sys / net / pf / pf.c
1 /*      $FreeBSD: src/sys/contrib/pf/net/pf.c,v 1.19 2004/09/11 11:18:25 mlaier Exp $   */
2 /*      $OpenBSD: pf.c,v 1.433.2.2 2004/07/17 03:22:34 brad Exp $ */
3 /* add  $OpenBSD: pf.c,v 1.448 2004/05/11 07:34:11 dhartmei Exp $ */
4 /*      $DragonFly: src/sys/net/pf/pf.c,v 1.9 2006/07/16 22:42:23 dillon Exp $ */
5
6 /*
7  * Copyright (c) 2004 The DragonFly Project.  All rights reserved.
8  *
9  * Copyright (c) 2001 Daniel Hartmeier
10  * Copyright (c) 2002,2003 Henning Brauer
11  * All rights reserved.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  *
17  *    - Redistributions of source code must retain the above copyright
18  *      notice, this list of conditions and the following disclaimer.
19  *    - Redistributions in binary form must reproduce the above
20  *      copyright notice, this list of conditions and the following
21  *      disclaimer in the documentation and/or other materials provided
22  *      with the distribution.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
28  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
29  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
30  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
34  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  *
37  * Effort sponsored in part by the Defense Advanced Research Projects
38  * Agency (DARPA) and Air Force Research Laboratory, Air Force
39  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
40  *
41  */
42
43 #include "opt_inet.h"
44 #include "opt_inet6.h"
45 #include "use_pfsync.h"
46
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/filio.h>
52 #include <sys/socket.h>
53 #include <sys/socketvar.h>
54 #include <sys/kernel.h>
55 #include <sys/time.h>
56 #include <sys/sysctl.h>
57 #include <sys/endian.h>
58 #include <vm/vm_zone.h>
59
60 #include <machine/inttypes.h>
61
62 #include <net/if.h>
63 #include <net/if_types.h>
64 #include <net/bpf.h>
65 #include <net/route.h>
66
67 #include <netinet/in.h>
68 #include <netinet/in_var.h>
69 #include <netinet/in_systm.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip_var.h>
72 #include <netinet/tcp.h>
73 #include <netinet/tcp_seq.h>
74 #include <netinet/udp.h>
75 #include <netinet/ip_icmp.h>
76 #include <netinet/in_pcb.h>
77 #include <netinet/tcp_timer.h>
78 #include <netinet/tcp_var.h>
79 #include <netinet/udp_var.h>
80 #include <netinet/icmp_var.h>
81
82 #include <net/pf/pfvar.h>
83 #include <net/pf/if_pflog.h>
84
85 #if NPFSYNC > 0
86 #include <net/pf/if_pfsync.h>
87 #endif /* NPFSYNC > 0 */
88
89 #ifdef INET6
90 #include <netinet/ip6.h>
91 #include <netinet/in_pcb.h>
92 #include <netinet/icmp6.h>
93 #include <netinet6/nd6.h>
94 #include <netinet6/ip6_var.h>
95 #include <netinet6/in6_pcb.h>
96 #endif /* INET6 */
97
98 #include <sys/in_cksum.h>
99 #include <machine/limits.h>
100 #include <sys/msgport2.h>
101 #include <sys/ucred.h>
102
103 extern int ip_optcopy(struct ip *, struct ip *);
104
105 #define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x
106
107 /*
108  * Global variables
109  */
110
111 struct pf_anchorqueue    pf_anchors;
112 struct pf_ruleset        pf_main_ruleset;
113 struct pf_altqqueue      pf_altqs[2];
114 struct pf_palist         pf_pabuf;
115 struct pf_altqqueue     *pf_altqs_active;
116 struct pf_altqqueue     *pf_altqs_inactive;
117 struct pf_status         pf_status;
118
119 u_int32_t                ticket_altqs_active;
120 u_int32_t                ticket_altqs_inactive;
121 int                      altqs_inactive_open;
122 u_int32_t                ticket_pabuf;
123
124 struct callout           pf_expire_to;                  /* expire timeout */
125
126 vm_zone_t                pf_src_tree_pl, pf_rule_pl;
127 vm_zone_t                pf_state_pl, pf_altq_pl, pf_pooladdr_pl;
128
129 void                     pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
130 void                     pf_print_state(struct pf_state *);
131 void                     pf_print_flags(u_int8_t);
132
133 u_int16_t                pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t,
134                             u_int8_t);
135 void                     pf_change_ap(struct pf_addr *, u_int16_t *,
136                             u_int16_t *, u_int16_t *, struct pf_addr *,
137                             u_int16_t, u_int8_t, sa_family_t);
138 #ifdef INET6
139 void                     pf_change_a6(struct pf_addr *, u_int16_t *,
140                             struct pf_addr *, u_int8_t);
141 #endif /* INET6 */
142 void                     pf_change_icmp(struct pf_addr *, u_int16_t *,
143                             struct pf_addr *, struct pf_addr *, u_int16_t,
144                             u_int16_t *, u_int16_t *, u_int16_t *,
145                             u_int16_t *, u_int8_t, sa_family_t);
146 void                     pf_send_tcp(const struct pf_rule *, sa_family_t,
147                             const struct pf_addr *, const struct pf_addr *,
148                             u_int16_t, u_int16_t, u_int32_t, u_int32_t,
149                             u_int8_t, u_int16_t, u_int16_t, u_int8_t);
150 void                     pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
151                             sa_family_t, struct pf_rule *);
152 struct pf_rule          *pf_match_translation(struct pf_pdesc *, struct mbuf *,
153                             int, int, struct pfi_kif *,
154                             struct pf_addr *, u_int16_t, struct pf_addr *,
155                             u_int16_t, int);
156 struct pf_rule          *pf_get_translation(struct pf_pdesc *, struct mbuf *,
157                             int, int, struct pfi_kif *, struct pf_src_node **,
158                             struct pf_addr *, u_int16_t,
159                             struct pf_addr *, u_int16_t,
160                             struct pf_addr *, u_int16_t *);
161 int                      pf_test_tcp(struct pf_rule **, struct pf_state **,
162                             int, struct pfi_kif *, struct mbuf *, int,
163                             void *, struct pf_pdesc *, struct pf_rule **,
164                             struct pf_ruleset **);
165 int                      pf_test_udp(struct pf_rule **, struct pf_state **,
166                             int, struct pfi_kif *, struct mbuf *, int,
167                             void *, struct pf_pdesc *, struct pf_rule **,
168                             struct pf_ruleset **);
169 int                      pf_test_icmp(struct pf_rule **, struct pf_state **,
170                             int, struct pfi_kif *, struct mbuf *, int,
171                             void *, struct pf_pdesc *, struct pf_rule **,
172                             struct pf_ruleset **);
173 int                      pf_test_other(struct pf_rule **, struct pf_state **,
174                             int, struct pfi_kif *, struct mbuf *, int, void *,
175                             struct pf_pdesc *, struct pf_rule **,
176                             struct pf_ruleset **);
177 int                      pf_test_fragment(struct pf_rule **, int,
178                             struct pfi_kif *, struct mbuf *, void *,
179                             struct pf_pdesc *, struct pf_rule **,
180                             struct pf_ruleset **);
181 int                      pf_test_state_tcp(struct pf_state **, int,
182                             struct pfi_kif *, struct mbuf *, int,
183                             void *, struct pf_pdesc *, u_short *);
184 int                      pf_test_state_udp(struct pf_state **, int,
185                             struct pfi_kif *, struct mbuf *, int,
186                             void *, struct pf_pdesc *);
187 int                      pf_test_state_icmp(struct pf_state **, int,
188                             struct pfi_kif *, struct mbuf *, int,
189                             void *, struct pf_pdesc *);
190 int                      pf_test_state_other(struct pf_state **, int,
191                             struct pfi_kif *, struct pf_pdesc *);
192 static int               pf_match_tag(struct mbuf *, struct pf_rule *,
193                                       struct pf_rule *, int *);
194 void                     pf_hash(struct pf_addr *, struct pf_addr *,
195                             struct pf_poolhashkey *, sa_family_t);
196 int                      pf_map_addr(u_int8_t, struct pf_rule *,
197                             struct pf_addr *, struct pf_addr *,
198                             struct pf_addr *, struct pf_src_node **);
199 int                      pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *,
200                             struct pf_addr *, struct pf_addr *, u_int16_t,
201                             struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t,
202                             struct pf_src_node **);
203 void                     pf_route(struct mbuf **, struct pf_rule *, int,
204                             struct ifnet *, struct pf_state *);
205 void                     pf_route6(struct mbuf **, struct pf_rule *, int,
206                             struct ifnet *, struct pf_state *);
207 int                      pf_socket_lookup(uid_t *, gid_t *,
208                             int, struct pf_pdesc *);
209 u_int8_t                 pf_get_wscale(struct mbuf *, int, u_int16_t,
210                             sa_family_t);
211 u_int16_t                pf_get_mss(struct mbuf *, int, u_int16_t,
212                             sa_family_t);
213 u_int16_t                pf_calc_mss(struct pf_addr *, sa_family_t,
214                                 u_int16_t);
215 void                     pf_set_rt_ifp(struct pf_state *,
216                             struct pf_addr *);
217 int                      pf_check_proto_cksum(struct mbuf *, int, int,
218                             u_int8_t, sa_family_t);
219 int                      pf_addr_wrap_neq(struct pf_addr_wrap *,
220                             struct pf_addr_wrap *);
221 struct pf_state         *pf_find_state_recurse(struct pfi_kif *,
222                             struct pf_state *, u_int8_t);
223
224 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX];
225
226 #define STATE_LOOKUP()                                                  \
227         do {                                                            \
228                 if (direction == PF_IN)                                 \
229                         *state = pf_find_state_recurse(         \
230                             kif, &key, PF_EXT_GWY);                     \
231                 else                                                    \
232                         *state = pf_find_state_recurse(         \
233                             kif, &key, PF_LAN_EXT);                     \
234                 if (*state == NULL)                                     \
235                         return (PF_DROP);                               \
236                 if (direction == PF_OUT &&                              \
237                     (((*state)->rule.ptr->rt == PF_ROUTETO &&           \
238                     (*state)->rule.ptr->direction == PF_OUT) ||         \
239                     ((*state)->rule.ptr->rt == PF_REPLYTO &&            \
240                     (*state)->rule.ptr->direction == PF_IN)) &&         \
241                     (*state)->rt_kif != NULL &&                         \
242                     (*state)->rt_kif != kif)                            \
243                         return (PF_PASS);                               \
244         } while (0)
245
246 #define STATE_TRANSLATE(s) \
247         (s)->lan.addr.addr32[0] != (s)->gwy.addr.addr32[0] || \
248         ((s)->af == AF_INET6 && \
249         ((s)->lan.addr.addr32[1] != (s)->gwy.addr.addr32[1] || \
250         (s)->lan.addr.addr32[2] != (s)->gwy.addr.addr32[2] || \
251         (s)->lan.addr.addr32[3] != (s)->gwy.addr.addr32[3])) || \
252         (s)->lan.port != (s)->gwy.port
253
254 #define BOUND_IFACE(r, k) (((r)->rule_flag & PFRULE_IFBOUND) ? (k) :   \
255         ((r)->rule_flag & PFRULE_GRBOUND) ? (k)->pfik_parent :         \
256         (k)->pfik_parent->pfik_parent)
257
258 static int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
259 static int pf_state_compare_lan_ext(struct pf_state *,
260         struct pf_state *);
261 static int pf_state_compare_ext_gwy(struct pf_state *,
262         struct pf_state *);
263 static int pf_state_compare_id(struct pf_state *,
264         struct pf_state *);
265
266 struct pf_src_tree tree_src_tracking;
267
268 struct pf_state_tree_id tree_id;
269 struct pf_state_queue state_updates;
270
271 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
272 RB_GENERATE(pf_state_tree_lan_ext, pf_state,
273     u.s.entry_lan_ext, pf_state_compare_lan_ext);
274 RB_GENERATE(pf_state_tree_ext_gwy, pf_state,
275     u.s.entry_ext_gwy, pf_state_compare_ext_gwy);
276 RB_GENERATE(pf_state_tree_id, pf_state,
277     u.s.entry_id, pf_state_compare_id);
278
279 static int
280 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
281 {
282         int     diff;
283
284         if (a->rule.ptr > b->rule.ptr)
285                 return (1);
286         if (a->rule.ptr < b->rule.ptr)
287                 return (-1);
288         if ((diff = a->af - b->af) != 0)
289                 return (diff);
290         switch (a->af) {
291 #ifdef INET
292         case AF_INET:
293                 if (a->addr.addr32[0] > b->addr.addr32[0])
294                         return (1);
295                 if (a->addr.addr32[0] < b->addr.addr32[0])
296                         return (-1);
297                 break;
298 #endif /* INET */
299 #ifdef INET6
300         case AF_INET6:
301                 if (a->addr.addr32[3] > b->addr.addr32[3])
302                         return (1);
303                 if (a->addr.addr32[3] < b->addr.addr32[3])
304                         return (-1);
305                 if (a->addr.addr32[2] > b->addr.addr32[2])
306                         return (1);
307                 if (a->addr.addr32[2] < b->addr.addr32[2])
308                         return (-1);
309                 if (a->addr.addr32[1] > b->addr.addr32[1])
310                         return (1);
311                 if (a->addr.addr32[1] < b->addr.addr32[1])
312                         return (-1);
313                 if (a->addr.addr32[0] > b->addr.addr32[0])
314                         return (1);
315                 if (a->addr.addr32[0] < b->addr.addr32[0])
316                         return (-1);
317                 break;
318 #endif /* INET6 */
319         }
320         return (0);
321 }
322
323 static int
324 pf_state_compare_lan_ext(struct pf_state *a, struct pf_state *b)
325 {
326         int     diff;
327
328         if ((diff = a->proto - b->proto) != 0)
329                 return (diff);
330         if ((diff = a->af - b->af) != 0)
331                 return (diff);
332         switch (a->af) {
333 #ifdef INET
334         case AF_INET:
335                 if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
336                         return (1);
337                 if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
338                         return (-1);
339                 if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
340                         return (1);
341                 if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
342                         return (-1);
343                 break;
344 #endif /* INET */
345 #ifdef INET6
346         case AF_INET6:
347                 if (a->lan.addr.addr32[3] > b->lan.addr.addr32[3])
348                         return (1);
349                 if (a->lan.addr.addr32[3] < b->lan.addr.addr32[3])
350                         return (-1);
351                 if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
352                         return (1);
353                 if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
354                         return (-1);
355                 if (a->lan.addr.addr32[2] > b->lan.addr.addr32[2])
356                         return (1);
357                 if (a->lan.addr.addr32[2] < b->lan.addr.addr32[2])
358                         return (-1);
359                 if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
360                         return (1);
361                 if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
362                         return (-1);
363                 if (a->lan.addr.addr32[1] > b->lan.addr.addr32[1])
364                         return (1);
365                 if (a->lan.addr.addr32[1] < b->lan.addr.addr32[1])
366                         return (-1);
367                 if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
368                         return (1);
369                 if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
370                         return (-1);
371                 if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
372                         return (1);
373                 if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
374                         return (-1);
375                 if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
376                         return (1);
377                 if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
378                         return (-1);
379                 break;
380 #endif /* INET6 */
381         }
382
383         if ((diff = a->lan.port - b->lan.port) != 0)
384                 return (diff);
385         if ((diff = a->ext.port - b->ext.port) != 0)
386                 return (diff);
387
388         return (0);
389 }
390
391 static int
392 pf_state_compare_ext_gwy(struct pf_state *a, struct pf_state *b)
393 {
394         int     diff;
395
396         if ((diff = a->proto - b->proto) != 0)
397                 return (diff);
398         if ((diff = a->af - b->af) != 0)
399                 return (diff);
400         switch (a->af) {
401 #ifdef INET
402         case AF_INET:
403                 if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
404                         return (1);
405                 if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
406                         return (-1);
407                 if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
408                         return (1);
409                 if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
410                         return (-1);
411                 break;
412 #endif /* INET */
413 #ifdef INET6
414         case AF_INET6:
415                 if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
416                         return (1);
417                 if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
418                         return (-1);
419                 if (a->gwy.addr.addr32[3] > b->gwy.addr.addr32[3])
420                         return (1);
421                 if (a->gwy.addr.addr32[3] < b->gwy.addr.addr32[3])
422                         return (-1);
423                 if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
424                         return (1);
425                 if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
426                         return (-1);
427                 if (a->gwy.addr.addr32[2] > b->gwy.addr.addr32[2])
428                         return (1);
429                 if (a->gwy.addr.addr32[2] < b->gwy.addr.addr32[2])
430                         return (-1);
431                 if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
432                         return (1);
433                 if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
434                         return (-1);
435                 if (a->gwy.addr.addr32[1] > b->gwy.addr.addr32[1])
436                         return (1);
437                 if (a->gwy.addr.addr32[1] < b->gwy.addr.addr32[1])
438                         return (-1);
439                 if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
440                         return (1);
441                 if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
442                         return (-1);
443                 if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
444                         return (1);
445                 if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
446                         return (-1);
447                 break;
448 #endif /* INET6 */
449         }
450
451         if ((diff = a->ext.port - b->ext.port) != 0)
452                 return (diff);
453         if ((diff = a->gwy.port - b->gwy.port) != 0)
454                 return (diff);
455
456         return (0);
457 }
458
459 static int
460 pf_state_compare_id(struct pf_state *a, struct pf_state *b)
461 {
462         if (a->id > b->id)
463                 return (1);
464         if (a->id < b->id)
465                 return (-1);
466         if (a->creatorid > b->creatorid)
467                 return (1);
468         if (a->creatorid < b->creatorid)
469                 return (-1);
470
471         return (0);
472 }
473
474 #ifdef INET6
475 void
476 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
477 {
478         switch (af) {
479 #ifdef INET
480         case AF_INET:
481                 dst->addr32[0] = src->addr32[0];
482                 break;
483 #endif /* INET */
484         case AF_INET6:
485                 dst->addr32[0] = src->addr32[0];
486                 dst->addr32[1] = src->addr32[1];
487                 dst->addr32[2] = src->addr32[2];
488                 dst->addr32[3] = src->addr32[3];
489                 break;
490         }
491 }
492 #endif
493
494 struct pf_state *
495 pf_find_state_byid(struct pf_state *key)
496 {
497         pf_status.fcounters[FCNT_STATE_SEARCH]++;
498         return (RB_FIND(pf_state_tree_id, &tree_id, key));
499 }
500
501 struct pf_state *
502 pf_find_state_recurse(struct pfi_kif *kif, struct pf_state *key, u_int8_t tree)
503 {
504         struct pf_state *s;
505
506         pf_status.fcounters[FCNT_STATE_SEARCH]++;
507
508         switch (tree) {
509         case PF_LAN_EXT:
510                 for (; kif != NULL; kif = kif->pfik_parent) {
511                         s = RB_FIND(pf_state_tree_lan_ext,
512                             &kif->pfik_lan_ext, key);
513                         if (s != NULL)
514                                 return (s);
515                 }
516                 return (NULL);
517         case PF_EXT_GWY:
518                 for (; kif != NULL; kif = kif->pfik_parent) {
519                         s = RB_FIND(pf_state_tree_ext_gwy,
520                             &kif->pfik_ext_gwy, key);
521                         if (s != NULL)
522                                 return (s);
523                 }
524                 return (NULL);
525         default:
526                 panic("pf_find_state_recurse");
527         }
528 }
529
530 struct pf_state *
531 pf_find_state_all(struct pf_state *key, u_int8_t tree, int *more)
532 {
533         struct pf_state *s, *ss = NULL;
534         struct pfi_kif  *kif;
535
536         pf_status.fcounters[FCNT_STATE_SEARCH]++;
537
538         switch (tree) {
539         case PF_LAN_EXT:
540                 TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) {
541                         s = RB_FIND(pf_state_tree_lan_ext,
542                             &kif->pfik_lan_ext, key);
543                         if (s == NULL)
544                                 continue;
545                         if (more == NULL)
546                                 return (s);
547                         ss = s;
548                         (*more)++;
549                 }
550                 return (ss);
551         case PF_EXT_GWY:
552                 TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) {
553                         s = RB_FIND(pf_state_tree_ext_gwy,
554                             &kif->pfik_ext_gwy, key);
555                         if (s == NULL)
556                                 continue;
557                         if (more == NULL)
558                                 return (s);
559                         ss = s;
560                         (*more)++;
561                 }
562                 return (ss);
563         default:
564                 panic("pf_find_state_all");
565         }
566 }
567
568 int
569 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
570     struct pf_addr *src, sa_family_t af)
571 {
572         struct pf_src_node      k;
573
574         if (*sn == NULL) {
575                 k.af = af;
576                 PF_ACPY(&k.addr, src, af);
577                 if (rule->rule_flag & PFRULE_RULESRCTRACK ||
578                     rule->rpool.opts & PF_POOL_STICKYADDR)
579                         k.rule.ptr = rule;
580                 else
581                         k.rule.ptr = NULL;
582                 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
583                 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
584         }
585         if (*sn == NULL) {
586                 if (!rule->max_src_nodes ||
587                     rule->src_nodes < rule->max_src_nodes)
588                         (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT);
589                 if ((*sn) == NULL)
590                         return (-1);
591                 bzero(*sn, sizeof(struct pf_src_node));
592                 (*sn)->af = af;
593                 if (rule->rule_flag & PFRULE_RULESRCTRACK ||
594                     rule->rpool.opts & PF_POOL_STICKYADDR)
595                         (*sn)->rule.ptr = rule;
596                 else
597                         (*sn)->rule.ptr = NULL;
598                 PF_ACPY(&(*sn)->addr, src, af);
599                 if (RB_INSERT(pf_src_tree,
600                     &tree_src_tracking, *sn) != NULL) {
601                         if (pf_status.debug >= PF_DEBUG_MISC) {
602                                 printf("pf: src_tree insert failed: ");
603                                 pf_print_host(&(*sn)->addr, 0, af);
604                                 printf("\n");
605                         }
606                         pool_put(&pf_src_tree_pl, *sn);
607                         return (-1);
608                 }
609                 (*sn)->creation = time_second;
610                 (*sn)->ruletype = rule->action;
611                 if ((*sn)->rule.ptr != NULL)
612                         (*sn)->rule.ptr->src_nodes++;
613                 pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
614                 pf_status.src_nodes++;
615         } else {
616                 if (rule->max_src_states &&
617                     (*sn)->states >= rule->max_src_states)
618                         return (-1);
619         }
620         return (0);
621 }
622
623 int
624 pf_insert_state(struct pfi_kif *kif, struct pf_state *state)
625 {
626         /* Thou MUST NOT insert multiple duplicate keys */
627         state->u.s.kif = kif;
628         if (RB_INSERT(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state)) {
629                 if (pf_status.debug >= PF_DEBUG_MISC) {
630                         printf("pf: state insert failed: tree_lan_ext");
631                         printf(" lan: ");
632                         pf_print_host(&state->lan.addr, state->lan.port,
633                             state->af);
634                         printf(" gwy: ");
635                         pf_print_host(&state->gwy.addr, state->gwy.port,
636                             state->af);
637                         printf(" ext: ");
638                         pf_print_host(&state->ext.addr, state->ext.port,
639                             state->af);
640                         if (state->sync_flags & PFSTATE_FROMSYNC)
641                                 printf(" (from sync)");
642                         printf("\n");
643                 }
644                 return (-1);
645         }
646
647         if (RB_INSERT(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state)) {
648                 if (pf_status.debug >= PF_DEBUG_MISC) {
649                         printf("pf: state insert failed: tree_ext_gwy");
650                         printf(" lan: ");
651                         pf_print_host(&state->lan.addr, state->lan.port,
652                             state->af);
653                         printf(" gwy: ");
654                         pf_print_host(&state->gwy.addr, state->gwy.port,
655                             state->af);
656                         printf(" ext: ");
657                         pf_print_host(&state->ext.addr, state->ext.port,
658                             state->af);
659                         if (state->sync_flags & PFSTATE_FROMSYNC)
660                                 printf(" (from sync)");
661                         printf("\n");
662                 }
663                 RB_REMOVE(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state);
664                 return (-1);
665         }
666
667         if (state->id == 0 && state->creatorid == 0) {
668                 state->id = htobe64(pf_status.stateid++);
669                 state->creatorid = pf_status.hostid;
670         }
671         if (RB_INSERT(pf_state_tree_id, &tree_id, state) != NULL) {
672                 if (pf_status.debug >= PF_DEBUG_MISC) {
673                         printf("pf: state insert failed: "
674                             "id: %016" PRIx64 " creatorid: %08" PRIx32,
675                             be64toh(state->id), ntohl(state->creatorid));
676                         if (state->sync_flags & PFSTATE_FROMSYNC)
677                                 printf(" (from sync)");
678                         printf("\n");
679                 }
680                 RB_REMOVE(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state);
681                 RB_REMOVE(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state);
682                 return (-1);
683         }
684         TAILQ_INSERT_HEAD(&state_updates, state, u.s.entry_updates);
685
686         pf_status.fcounters[FCNT_STATE_INSERT]++;
687         pf_status.states++;
688         pfi_attach_state(kif);
689 #if NPFSYNC
690         pfsync_insert_state(state);
691 #endif
692         return (0);
693 }
694
695 void
696 pf_purge_timeout(void *arg)
697 {
698         struct callout  *to = arg;
699
700         crit_enter();
701         pf_purge_expired_states();
702         pf_purge_expired_fragments();
703         pf_purge_expired_src_nodes();
704         crit_exit();
705
706         callout_reset(to, pf_default_rule.timeout[PFTM_INTERVAL] * hz,
707             pf_purge_timeout, to);
708 }
709
710 u_int32_t
711 pf_state_expires(const struct pf_state *state)
712 {
713         u_int32_t       timeout;
714         u_int32_t       start;
715         u_int32_t       end;
716         u_int32_t       states;
717
718         /* handle all PFTM_* > PFTM_MAX here */
719         if (state->timeout == PFTM_PURGE)
720                 return (time_second);
721         if (state->timeout == PFTM_UNTIL_PACKET)
722                 return (0);
723         KASSERT((state->timeout < PFTM_MAX), 
724             ("pf_state_expires: timeout > PFTM_MAX"));
725         timeout = state->rule.ptr->timeout[state->timeout];
726         if (!timeout)
727                 timeout = pf_default_rule.timeout[state->timeout];
728         start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
729         if (start) {
730                 end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
731                 states = state->rule.ptr->states;
732         } else {
733                 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
734                 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
735                 states = pf_status.states;
736         }
737         if (end && states > start && start < end) {
738                 if (states < end)
739                         return (state->expire + timeout * (end - states) /
740                             (end - start));
741                 else
742                         return (time_second);
743         }
744         return (state->expire + timeout);
745 }
746
747 void
748 pf_purge_expired_src_nodes(void)
749 {
750          struct pf_src_node             *cur, *next;
751
752          for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
753                  next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
754
755                  if (cur->states <= 0 && cur->expire <= time_second) {
756                          if (cur->rule.ptr != NULL) {
757                                  cur->rule.ptr->src_nodes--;
758                                  if (cur->rule.ptr->states <= 0 &&
759                                      cur->rule.ptr->max_src_nodes <= 0)
760                                          pf_rm_rule(NULL, cur->rule.ptr);
761                          }
762                          RB_REMOVE(pf_src_tree, &tree_src_tracking, cur);
763                          pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
764                          pf_status.src_nodes--;
765                          pool_put(&pf_src_tree_pl, cur);
766                  }
767          }
768 }
769
770 void
771 pf_src_tree_remove_state(struct pf_state *s)
772 {
773         u_int32_t timeout;
774
775         if (s->src_node != NULL) {
776                 if (--s->src_node->states <= 0) {
777                         timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
778                         if (!timeout)
779                                 timeout =
780                                     pf_default_rule.timeout[PFTM_SRC_NODE];
781                         s->src_node->expire = time_second + timeout;
782                 }
783         }
784         if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
785                 if (--s->nat_src_node->states <= 0) {
786                         timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
787                         if (!timeout)
788                                 timeout =
789                                     pf_default_rule.timeout[PFTM_SRC_NODE];
790                         s->nat_src_node->expire = time_second + timeout;
791                 }
792         }
793         s->src_node = s->nat_src_node = NULL;
794 }
795
796 static int
797 pf_purge_expired_states_callback(struct pf_state *cur, void *data __unused)
798 {
799         if (pf_state_expires(cur) <= time_second) {
800                 RB_REMOVE(pf_state_tree_ext_gwy,
801                     &cur->u.s.kif->pfik_ext_gwy, cur);
802                 RB_REMOVE(pf_state_tree_lan_ext,
803                     &cur->u.s.kif->pfik_lan_ext, cur);
804                 RB_REMOVE(pf_state_tree_id, &tree_id, cur);
805                 if (cur->src.state == PF_TCPS_PROXY_DST) {
806                         pf_send_tcp(cur->rule.ptr, cur->af,
807                             &cur->ext.addr, &cur->lan.addr,
808                             cur->ext.port, cur->lan.port,
809                             cur->src.seqhi, cur->src.seqlo + 1, 0,
810                             TH_RST|TH_ACK, 0, 0);
811                 }
812 #if NPFSYNC
813                 pfsync_delete_state(cur);
814 #endif
815                 pf_src_tree_remove_state(cur);
816                 if (--cur->rule.ptr->states <= 0 &&
817                     cur->rule.ptr->src_nodes <= 0)
818                         pf_rm_rule(NULL, cur->rule.ptr);
819                 if (cur->nat_rule.ptr != NULL)
820                         if (--cur->nat_rule.ptr->states <= 0 &&
821                                 cur->nat_rule.ptr->src_nodes <= 0)
822                                 pf_rm_rule(NULL, cur->nat_rule.ptr);
823                 if (cur->anchor.ptr != NULL)
824                         if (--cur->anchor.ptr->states <= 0)
825                                 pf_rm_rule(NULL, cur->anchor.ptr);
826                 pf_normalize_tcp_cleanup(cur);
827                 pfi_detach_state(cur->u.s.kif);
828                 TAILQ_REMOVE(&state_updates, cur, u.s.entry_updates);
829                 pool_put(&pf_state_pl, cur);
830                 pf_status.fcounters[FCNT_STATE_REMOVALS]++;
831                 pf_status.states--;
832         }
833         return(0);
834 }
835
836 void
837 pf_purge_expired_states(void)
838 {
839         RB_SCAN(pf_state_tree_id, &tree_id, NULL,
840                 pf_purge_expired_states_callback, NULL);
841 }
842
843
844 int
845 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
846 {
847         if (aw->type != PF_ADDR_TABLE)
848                 return (0);
849         if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL)
850                 return (1);
851         return (0);
852 }
853
854 void
855 pf_tbladdr_remove(struct pf_addr_wrap *aw)
856 {
857         if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL)
858                 return;
859         pfr_detach_table(aw->p.tbl);
860         aw->p.tbl = NULL;
861 }
862
863 void
864 pf_tbladdr_copyout(struct pf_addr_wrap *aw)
865 {
866         struct pfr_ktable *kt = aw->p.tbl;
867
868         if (aw->type != PF_ADDR_TABLE || kt == NULL)
869                 return;
870         if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
871                 kt = kt->pfrkt_root;
872         aw->p.tbl = NULL;
873         aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
874                 kt->pfrkt_cnt : -1;
875 }
876
877 void
878 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
879 {
880         switch (af) {
881 #ifdef INET
882         case AF_INET: {
883                 u_int32_t a = ntohl(addr->addr32[0]);
884                 printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
885                     (a>>8)&255, a&255);
886                 if (p) {
887                         p = ntohs(p);
888                         printf(":%u", p);
889                 }
890                 break;
891         }
892 #endif /* INET */
893 #ifdef INET6
894         case AF_INET6: {
895                 u_int16_t b;
896                 u_int8_t i, curstart = 255, curend = 0,
897                     maxstart = 0, maxend = 0;
898                 for (i = 0; i < 8; i++) {
899                         if (!addr->addr16[i]) {
900                                 if (curstart == 255)
901                                         curstart = i;
902                                 else
903                                         curend = i;
904                         } else {
905                                 if (curstart) {
906                                         if ((curend - curstart) >
907                                             (maxend - maxstart)) {
908                                                 maxstart = curstart;
909                                                 maxend = curend;
910                                                 curstart = 255;
911                                         }
912                                 }
913                         }
914                 }
915                 for (i = 0; i < 8; i++) {
916                         if (i >= maxstart && i <= maxend) {
917                                 if (maxend != 7) {
918                                         if (i == maxstart)
919                                                 printf(":");
920                                 } else {
921                                         if (i == maxend)
922                                                 printf(":");
923                                 }
924                         } else {
925                                 b = ntohs(addr->addr16[i]);
926                                 printf("%x", b);
927                                 if (i < 7)
928                                         printf(":");
929                         }
930                 }
931                 if (p) {
932                         p = ntohs(p);
933                         printf("[%u]", p);
934                 }
935                 break;
936         }
937 #endif /* INET6 */
938         }
939 }
940
941 void
942 pf_print_state(struct pf_state *s)
943 {
944         switch (s->proto) {
945         case IPPROTO_TCP:
946                 printf("TCP ");
947                 break;
948         case IPPROTO_UDP:
949                 printf("UDP ");
950                 break;
951         case IPPROTO_ICMP:
952                 printf("ICMP ");
953                 break;
954         case IPPROTO_ICMPV6:
955                 printf("ICMPV6 ");
956                 break;
957         default:
958                 printf("%u ", s->proto);
959                 break;
960         }
961         pf_print_host(&s->lan.addr, s->lan.port, s->af);
962         printf(" ");
963         pf_print_host(&s->gwy.addr, s->gwy.port, s->af);
964         printf(" ");
965         pf_print_host(&s->ext.addr, s->ext.port, s->af);
966         printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo,
967             s->src.seqhi, s->src.max_win, s->src.seqdiff);
968         if (s->src.wscale && s->dst.wscale)
969                 printf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK);
970         printf("]");
971         printf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo,
972             s->dst.seqhi, s->dst.max_win, s->dst.seqdiff);
973         if (s->src.wscale && s->dst.wscale)
974                 printf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK);
975         printf("]");
976         printf(" %u:%u", s->src.state, s->dst.state);
977 }
978
979 void
980 pf_print_flags(u_int8_t f)
981 {
982         if (f)
983                 printf(" ");
984         if (f & TH_FIN)
985                 printf("F");
986         if (f & TH_SYN)
987                 printf("S");
988         if (f & TH_RST)
989                 printf("R");
990         if (f & TH_PUSH)
991                 printf("P");
992         if (f & TH_ACK)
993                 printf("A");
994         if (f & TH_URG)
995                 printf("U");
996         if (f & TH_ECE)
997                 printf("E");
998         if (f & TH_CWR)
999                 printf("W");
1000 }
1001
1002 #define PF_SET_SKIP_STEPS(i)                                    \
1003         do {                                                    \
1004                 while (head[i] != cur) {                        \
1005                         head[i]->skip[i].ptr = cur;             \
1006                         head[i] = TAILQ_NEXT(head[i], entries); \
1007                 }                                               \
1008         } while (0)
1009
1010 void
1011 pf_calc_skip_steps(struct pf_rulequeue *rules)
1012 {
1013         struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
1014         int i;
1015
1016         cur = TAILQ_FIRST(rules);
1017         prev = cur;
1018         for (i = 0; i < PF_SKIP_COUNT; ++i)
1019                 head[i] = cur;
1020         while (cur != NULL) {
1021
1022                 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
1023                         PF_SET_SKIP_STEPS(PF_SKIP_IFP);
1024                 if (cur->direction != prev->direction)
1025                         PF_SET_SKIP_STEPS(PF_SKIP_DIR);
1026                 if (cur->af != prev->af)
1027                         PF_SET_SKIP_STEPS(PF_SKIP_AF);
1028                 if (cur->proto != prev->proto)
1029                         PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
1030                 if (cur->src.not != prev->src.not ||
1031                     pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
1032                         PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
1033                 if (cur->src.port[0] != prev->src.port[0] ||
1034                     cur->src.port[1] != prev->src.port[1] ||
1035                     cur->src.port_op != prev->src.port_op)
1036                         PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
1037                 if (cur->dst.not != prev->dst.not ||
1038                     pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
1039                         PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
1040                 if (cur->dst.port[0] != prev->dst.port[0] ||
1041                     cur->dst.port[1] != prev->dst.port[1] ||
1042                     cur->dst.port_op != prev->dst.port_op)
1043                         PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
1044
1045                 prev = cur;
1046                 cur = TAILQ_NEXT(cur, entries);
1047         }
1048         for (i = 0; i < PF_SKIP_COUNT; ++i)
1049                 PF_SET_SKIP_STEPS(i);
1050 }
1051
1052 int
1053 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
1054 {
1055         if (aw1->type != aw2->type)
1056                 return (1);
1057         switch (aw1->type) {
1058         case PF_ADDR_ADDRMASK:
1059                 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0))
1060                         return (1);
1061                 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0))
1062                         return (1);
1063                 return (0);
1064         case PF_ADDR_DYNIFTL:
1065                 return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
1066         case PF_ADDR_NOROUTE:
1067                 return (0);
1068         case PF_ADDR_TABLE:
1069                 return (aw1->p.tbl != aw2->p.tbl);
1070         default:
1071                 printf("invalid address type: %d\n", aw1->type);
1072                 return (1);
1073         }
1074 }
1075
1076 void
1077 pf_update_anchor_rules(void)
1078 {
1079         struct pf_rule  *rule;
1080         int              i;
1081
1082         for (i = 0; i < PF_RULESET_MAX; ++i)
1083                 TAILQ_FOREACH(rule, pf_main_ruleset.rules[i].active.ptr,
1084                     entries)
1085                         if (rule->anchorname[0])
1086                                 rule->anchor = pf_find_anchor(rule->anchorname);
1087                         else
1088                                 rule->anchor = NULL;
1089 }
1090
1091 u_int16_t
1092 pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
1093 {
1094         u_int32_t       l;
1095
1096         if (udp && !cksum)
1097                 return (0x0000);
1098         l = cksum + old - new;
1099         l = (l >> 16) + (l & 65535);
1100         l = l & 65535;
1101         if (udp && !l)
1102                 return (0xFFFF);
1103         return (l);
1104 }
1105
1106 void
1107 pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc,
1108     struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af)
1109 {
1110         struct pf_addr  ao;
1111         u_int16_t       po = *p;
1112
1113         PF_ACPY(&ao, a, af);
1114         PF_ACPY(a, an, af);
1115
1116         *p = pn;
1117
1118         switch (af) {
1119 #ifdef INET
1120         case AF_INET:
1121                 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1122                     ao.addr16[0], an->addr16[0], 0),
1123                     ao.addr16[1], an->addr16[1], 0);
1124                 *p = pn;
1125                 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1126                     ao.addr16[0], an->addr16[0], u),
1127                     ao.addr16[1], an->addr16[1], u),
1128                     po, pn, u);
1129                 break;
1130 #endif /* INET */
1131 #ifdef INET6
1132         case AF_INET6:
1133                 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1134                     pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1135                     pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1136                     ao.addr16[0], an->addr16[0], u),
1137                     ao.addr16[1], an->addr16[1], u),
1138                     ao.addr16[2], an->addr16[2], u),
1139                     ao.addr16[3], an->addr16[3], u),
1140                     ao.addr16[4], an->addr16[4], u),
1141                     ao.addr16[5], an->addr16[5], u),
1142                     ao.addr16[6], an->addr16[6], u),
1143                     ao.addr16[7], an->addr16[7], u),
1144                     po, pn, u);
1145                 break;
1146 #endif /* INET6 */
1147         }
1148 }
1149
1150
1151 /* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
1152 void
1153 pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
1154 {
1155         u_int32_t       ao;
1156
1157         memcpy(&ao, a, sizeof(ao));
1158         memcpy(a, &an, sizeof(u_int32_t));
1159         *c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
1160             ao % 65536, an % 65536, u);
1161 }
1162
1163 #ifdef INET6
1164 void
1165 pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
1166 {
1167         struct pf_addr  ao;
1168
1169         PF_ACPY(&ao, a, AF_INET6);
1170         PF_ACPY(a, an, AF_INET6);
1171
1172         *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1173             pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1174             pf_cksum_fixup(pf_cksum_fixup(*c,
1175             ao.addr16[0], an->addr16[0], u),
1176             ao.addr16[1], an->addr16[1], u),
1177             ao.addr16[2], an->addr16[2], u),
1178             ao.addr16[3], an->addr16[3], u),
1179             ao.addr16[4], an->addr16[4], u),
1180             ao.addr16[5], an->addr16[5], u),
1181             ao.addr16[6], an->addr16[6], u),
1182             ao.addr16[7], an->addr16[7], u);
1183 }
1184 #endif /* INET6 */
1185
1186 void
1187 pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
1188     struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
1189     u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
1190 {
1191         struct pf_addr  oia, ooa;
1192
1193         PF_ACPY(&oia, ia, af);
1194         PF_ACPY(&ooa, oa, af);
1195
1196         /* Change inner protocol port, fix inner protocol checksum. */
1197         if (ip != NULL) {
1198                 u_int16_t       oip = *ip;
1199                 u_int32_t       opc = 0;
1200
1201                 if (pc != NULL)
1202                         opc = *pc;
1203                 *ip = np;
1204                 if (pc != NULL)
1205                         *pc = pf_cksum_fixup(*pc, oip, *ip, u);
1206                 *ic = pf_cksum_fixup(*ic, oip, *ip, 0);
1207                 if (pc != NULL)
1208                         *ic = pf_cksum_fixup(*ic, opc, *pc, 0);
1209         }
1210         /* Change inner ip address, fix inner ip and icmp checksums. */
1211         PF_ACPY(ia, na, af);
1212         switch (af) {
1213 #ifdef INET
1214         case AF_INET: {
1215                 u_int32_t        oh2c = *h2c;
1216
1217                 *h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
1218                     oia.addr16[0], ia->addr16[0], 0),
1219                     oia.addr16[1], ia->addr16[1], 0);
1220                 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1221                     oia.addr16[0], ia->addr16[0], 0),
1222                     oia.addr16[1], ia->addr16[1], 0);
1223                 *ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
1224                 break;
1225         }
1226 #endif /* INET */
1227 #ifdef INET6
1228         case AF_INET6:
1229                 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1230                     pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1231                     pf_cksum_fixup(pf_cksum_fixup(*ic,
1232                     oia.addr16[0], ia->addr16[0], u),
1233                     oia.addr16[1], ia->addr16[1], u),
1234                     oia.addr16[2], ia->addr16[2], u),
1235                     oia.addr16[3], ia->addr16[3], u),
1236                     oia.addr16[4], ia->addr16[4], u),
1237                     oia.addr16[5], ia->addr16[5], u),
1238                     oia.addr16[6], ia->addr16[6], u),
1239                     oia.addr16[7], ia->addr16[7], u);
1240                 break;
1241 #endif /* INET6 */
1242         }
1243         /* Change outer ip address, fix outer ip or icmpv6 checksum. */
1244         PF_ACPY(oa, na, af);
1245         switch (af) {
1246 #ifdef INET
1247         case AF_INET:
1248                 *hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
1249                     ooa.addr16[0], oa->addr16[0], 0),
1250                     ooa.addr16[1], oa->addr16[1], 0);
1251                 break;
1252 #endif /* INET */
1253 #ifdef INET6
1254         case AF_INET6:
1255                 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1256                     pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1257                     pf_cksum_fixup(pf_cksum_fixup(*ic,
1258                     ooa.addr16[0], oa->addr16[0], u),
1259                     ooa.addr16[1], oa->addr16[1], u),
1260                     ooa.addr16[2], oa->addr16[2], u),
1261                     ooa.addr16[3], oa->addr16[3], u),
1262                     ooa.addr16[4], oa->addr16[4], u),
1263                     ooa.addr16[5], oa->addr16[5], u),
1264                     ooa.addr16[6], oa->addr16[6], u),
1265                     ooa.addr16[7], oa->addr16[7], u);
1266                 break;
1267 #endif /* INET6 */
1268         }
1269 }
1270
1271 void
1272 pf_send_tcp(const struct pf_rule *r, sa_family_t af,
1273     const struct pf_addr *saddr, const struct pf_addr *daddr,
1274     u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
1275     u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl)
1276 {
1277         struct mbuf     *m;
1278         int              len = 0, tlen;
1279 #ifdef INET
1280         struct ip       *h = NULL;
1281 #endif /* INET */
1282 #ifdef INET6
1283         struct ip6_hdr  *h6 = NULL;
1284 #endif /* INET6 */
1285         struct tcphdr   *th = NULL;
1286         char *opt;
1287
1288         /* maximum segment size tcp option */
1289         tlen = sizeof(struct tcphdr);
1290         if (mss)
1291                 tlen += 4;
1292
1293         switch (af) {
1294 #ifdef INET
1295         case AF_INET:
1296                 len = sizeof(struct ip) + tlen;
1297                 break;
1298 #endif /* INET */
1299 #ifdef INET6
1300         case AF_INET6:
1301                 len = sizeof(struct ip6_hdr) + tlen;
1302                 break;
1303 #endif /* INET6 */
1304         }
1305
1306         /* create outgoing mbuf */
1307         m = m_gethdr(MB_DONTWAIT, MT_HEADER);
1308         if (m == NULL)
1309                 return;
1310         m->m_pkthdr.fw_flags |= PF_MBUF_GENERATED;
1311 #ifdef ALTQ
1312         if (r != NULL && r->qid) {
1313                 m->m_pkthdr.fw_flags |= ALTQ_MBUF_TAGGED;
1314                 m->m_pkthdr.altq_qid = r->qid;
1315                 m->m_pkthdr.ecn_af = af;
1316                 m->m_pkthdr.header = mtod(m, struct ip *);
1317         }
1318 #endif
1319         m->m_data += max_linkhdr;
1320         m->m_pkthdr.len = m->m_len = len;
1321         m->m_pkthdr.rcvif = NULL;
1322         bzero(m->m_data, len);
1323         switch (af) {
1324 #ifdef INET
1325         case AF_INET:
1326                 h = mtod(m, struct ip *);
1327
1328                 /* IP header fields included in the TCP checksum */
1329                 h->ip_p = IPPROTO_TCP;
1330                 h->ip_len = tlen;
1331                 h->ip_src.s_addr = saddr->v4.s_addr;
1332                 h->ip_dst.s_addr = daddr->v4.s_addr;
1333
1334                 th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
1335                 break;
1336 #endif /* INET */
1337 #ifdef INET6
1338         case AF_INET6:
1339                 h6 = mtod(m, struct ip6_hdr *);
1340
1341                 /* IP header fields included in the TCP checksum */
1342                 h6->ip6_nxt = IPPROTO_TCP;
1343                 h6->ip6_plen = htons(tlen);
1344                 memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
1345                 memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
1346
1347                 th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
1348                 break;
1349 #endif /* INET6 */
1350         }
1351
1352         /* TCP header */
1353         th->th_sport = sport;
1354         th->th_dport = dport;
1355         th->th_seq = htonl(seq);
1356         th->th_ack = htonl(ack);
1357         th->th_off = tlen >> 2;
1358         th->th_flags = flags;
1359         th->th_win = htons(win);
1360
1361         if (mss) {
1362                 opt = (char *)(th + 1);
1363                 opt[0] = TCPOPT_MAXSEG;
1364                 opt[1] = 4;
1365                 mss = htons(mss);
1366                 bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
1367         }
1368
1369         switch (af) {
1370 #ifdef INET
1371         case AF_INET:
1372                 /* TCP checksum */
1373                 th->th_sum = in_cksum(m, len);
1374
1375                 /* Finish the IP header */
1376                 h->ip_v = 4;
1377                 h->ip_hl = sizeof(*h) >> 2;
1378                 h->ip_tos = IPTOS_LOWDELAY;
1379                 h->ip_len = len;
1380                 h->ip_off = path_mtu_discovery ? IP_DF : 0;
1381                 h->ip_ttl = ttl ? ttl : ip_defttl;
1382                 h->ip_sum = 0;
1383                 ip_output(m, (void *)NULL, (void *)NULL, 0, (void *)NULL,
1384                     (void *)NULL);
1385                 break;
1386 #endif /* INET */
1387 #ifdef INET6
1388         case AF_INET6:
1389                 /* TCP checksum */
1390                 th->th_sum = in6_cksum(m, IPPROTO_TCP,
1391                     sizeof(struct ip6_hdr), tlen);
1392
1393                 h6->ip6_vfc |= IPV6_VERSION;
1394                 h6->ip6_hlim = IPV6_DEFHLIM;
1395
1396                 ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
1397                 break;
1398 #endif /* INET6 */
1399         }
1400 }
1401
1402 void
1403 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
1404     struct pf_rule *r)
1405 {
1406         struct mbuf     *m0;
1407
1408         m0 = m_copypacket(m, MB_DONTWAIT);
1409         if (m0 == NULL)
1410                 return;
1411         m0->m_pkthdr.fw_flags |= PF_MBUF_GENERATED;
1412
1413 #ifdef ALTQ
1414         if (r->qid) {
1415                 m->m_pkthdr.fw_flags |= ALTQ_MBUF_TAGGED;
1416                 m->m_pkthdr.altq_qid = r->qid;
1417                 m->m_pkthdr.ecn_af = af;
1418                 m->m_pkthdr.header = mtod(m0, struct ip *);
1419         }
1420 #endif
1421
1422         switch (af) {
1423 #ifdef INET
1424         case AF_INET:
1425                 icmp_error(m0, type, code, 0, 0);
1426                 break;
1427 #endif /* INET */
1428 #ifdef INET6
1429         case AF_INET6:
1430                 icmp6_error(m0, type, code, 0);
1431                 break;
1432 #endif /* INET6 */
1433         }
1434 }
1435
1436 /*
1437  * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
1438  * If n is 0, they match if they are equal. If n is != 0, they match if they
1439  * are different.
1440  */
1441 int
1442 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
1443     struct pf_addr *b, sa_family_t af)
1444 {
1445         int     match = 0;
1446
1447         switch (af) {
1448 #ifdef INET
1449         case AF_INET:
1450                 if ((a->addr32[0] & m->addr32[0]) ==
1451                     (b->addr32[0] & m->addr32[0]))
1452                         match++;
1453                 break;
1454 #endif /* INET */
1455 #ifdef INET6
1456         case AF_INET6:
1457                 if (((a->addr32[0] & m->addr32[0]) ==
1458                      (b->addr32[0] & m->addr32[0])) &&
1459                     ((a->addr32[1] & m->addr32[1]) ==
1460                      (b->addr32[1] & m->addr32[1])) &&
1461                     ((a->addr32[2] & m->addr32[2]) ==
1462                      (b->addr32[2] & m->addr32[2])) &&
1463                     ((a->addr32[3] & m->addr32[3]) ==
1464                      (b->addr32[3] & m->addr32[3])))
1465                         match++;
1466                 break;
1467 #endif /* INET6 */
1468         }
1469         if (match) {
1470                 if (n)
1471                         return (0);
1472                 else
1473                         return (1);
1474         } else {
1475                 if (n)
1476                         return (1);
1477                 else
1478                         return (0);
1479         }
1480 }
1481
1482 int
1483 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
1484 {
1485         switch (op) {
1486         case PF_OP_IRG:
1487                 return ((p > a1) && (p < a2));
1488         case PF_OP_XRG:
1489                 return ((p < a1) || (p > a2));
1490         case PF_OP_RRG:
1491                 return ((p >= a1) && (p <= a2));
1492         case PF_OP_EQ:
1493                 return (p == a1);
1494         case PF_OP_NE:
1495                 return (p != a1);
1496         case PF_OP_LT:
1497                 return (p < a1);
1498         case PF_OP_LE:
1499                 return (p <= a1);
1500         case PF_OP_GT:
1501                 return (p > a1);
1502         case PF_OP_GE:
1503                 return (p >= a1);
1504         }
1505         return (0); /* never reached */
1506 }
1507
1508 int
1509 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
1510 {
1511         a1 = ntohs(a1);
1512         a2 = ntohs(a2);
1513         p = ntohs(p);
1514         return (pf_match(op, a1, a2, p));
1515 }
1516
1517 int
1518 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
1519 {
1520         if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
1521                 return (0);
1522         return (pf_match(op, a1, a2, u));
1523 }
1524
1525 int
1526 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
1527 {
1528         if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
1529                 return (0);
1530         return (pf_match(op, a1, a2, g));
1531 }
1532
1533 static int
1534 pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_rule *nat_rule,
1535              int *tag)
1536 {
1537         if (*tag == -1) {       /* find mbuf tag */
1538                 if (nat_rule != NULL && nat_rule->tag)
1539                         *tag = nat_rule->tag;
1540                 else if (m->m_pkthdr.fw_flags & PF_MBUF_TAGGED)
1541                         *tag = m->m_pkthdr.pf_tag;
1542                 else
1543                         *tag = 0;
1544         }
1545
1546         return ((!r->match_tag_not && r->match_tag == *tag) ||
1547             (r->match_tag_not && r->match_tag != *tag));
1548 }
1549
1550 void
1551 pf_tag_packet(struct mbuf *m, int tag)
1552 {
1553         if (tag <= 0)
1554                 return;
1555
1556         m->m_pkthdr.fw_flags |= PF_MBUF_TAGGED;
1557         m->m_pkthdr.pf_tag = tag;
1558 }
1559
1560 #define PF_STEP_INTO_ANCHOR(r, a, s, n)                                 \
1561         do {                                                            \
1562                 if ((r) == NULL || (r)->anchor == NULL ||               \
1563                     (s) != NULL || (a) != NULL)                         \
1564                         panic("PF_STEP_INTO_ANCHOR");                   \
1565                 (a) = (r);                                              \
1566                 (s) = TAILQ_FIRST(&(r)->anchor->rulesets);              \
1567                 (r) = NULL;                                             \
1568                 while ((s) != NULL && ((r) =                            \
1569                     TAILQ_FIRST((s)->rules[n].active.ptr)) == NULL)     \
1570                         (s) = TAILQ_NEXT((s), entries);                 \
1571                 if ((r) == NULL) {                                      \
1572                         (r) = TAILQ_NEXT((a), entries);                 \
1573                         (a) = NULL;                                     \
1574                 }                                                       \
1575         } while (0)
1576
1577 #define PF_STEP_OUT_OF_ANCHOR(r, a, s, n)                               \
1578         do {                                                            \
1579                 if ((r) != NULL || (a) == NULL || (s) == NULL)          \
1580                         panic("PF_STEP_OUT_OF_ANCHOR");                 \
1581                 (s) = TAILQ_NEXT((s), entries);                         \
1582                 while ((s) != NULL && ((r) =                            \
1583                     TAILQ_FIRST((s)->rules[n].active.ptr)) == NULL)     \
1584                         (s) = TAILQ_NEXT((s), entries);                 \
1585                 if ((r) == NULL) {                                      \
1586                         (r) = TAILQ_NEXT((a), entries);                 \
1587                         (a) = NULL;                                     \
1588                 }                                                       \
1589         } while (0)
1590
1591 #ifdef INET6
1592 void
1593 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
1594     struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
1595 {
1596         switch (af) {
1597 #ifdef INET
1598         case AF_INET:
1599                 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
1600                 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
1601                 break;
1602 #endif /* INET */
1603         case AF_INET6:
1604                 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
1605                 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
1606                 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
1607                 ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
1608                 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
1609                 ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
1610                 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
1611                 ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
1612                 break;
1613         }
1614 }
1615
1616 void
1617 pf_addr_inc(struct pf_addr *addr, sa_family_t af)
1618 {
1619         switch (af) {
1620 #ifdef INET
1621         case AF_INET:
1622                 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
1623                 break;
1624 #endif /* INET */
1625         case AF_INET6:
1626                 if (addr->addr32[3] == 0xffffffff) {
1627                         addr->addr32[3] = 0;
1628                         if (addr->addr32[2] == 0xffffffff) {
1629                                 addr->addr32[2] = 0;
1630                                 if (addr->addr32[1] == 0xffffffff) {
1631                                         addr->addr32[1] = 0;
1632                                         addr->addr32[0] =
1633                                             htonl(ntohl(addr->addr32[0]) + 1);
1634                                 } else
1635                                         addr->addr32[1] =
1636                                             htonl(ntohl(addr->addr32[1]) + 1);
1637                         } else
1638                                 addr->addr32[2] =
1639                                     htonl(ntohl(addr->addr32[2]) + 1);
1640                 } else
1641                         addr->addr32[3] =
1642                             htonl(ntohl(addr->addr32[3]) + 1);
1643                 break;
1644         }
1645 }
1646 #endif /* INET6 */
1647
1648 #define mix(a,b,c) \
1649         do {                                    \
1650                 a -= b; a -= c; a ^= (c >> 13); \
1651                 b -= c; b -= a; b ^= (a << 8);  \
1652                 c -= a; c -= b; c ^= (b >> 13); \
1653                 a -= b; a -= c; a ^= (c >> 12); \
1654                 b -= c; b -= a; b ^= (a << 16); \
1655                 c -= a; c -= b; c ^= (b >> 5);  \
1656                 a -= b; a -= c; a ^= (c >> 3);  \
1657                 b -= c; b -= a; b ^= (a << 10); \
1658                 c -= a; c -= b; c ^= (b >> 15); \
1659         } while (0)
1660
1661 /*
1662  * hash function based on bridge_hash in if_bridge.c
1663  */
1664 void
1665 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
1666     struct pf_poolhashkey *key, sa_family_t af)
1667 {
1668         u_int32_t       a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
1669
1670         switch (af) {
1671 #ifdef INET
1672         case AF_INET:
1673                 a += inaddr->addr32[0];
1674                 b += key->key32[1];
1675                 mix(a, b, c);
1676                 hash->addr32[0] = c + key->key32[2];
1677                 break;
1678 #endif /* INET */
1679 #ifdef INET6
1680         case AF_INET6:
1681                 a += inaddr->addr32[0];
1682                 b += inaddr->addr32[2];
1683                 mix(a, b, c);
1684                 hash->addr32[0] = c;
1685                 a += inaddr->addr32[1];
1686                 b += inaddr->addr32[3];
1687                 c += key->key32[1];
1688                 mix(a, b, c);
1689                 hash->addr32[1] = c;
1690                 a += inaddr->addr32[2];
1691                 b += inaddr->addr32[1];
1692                 c += key->key32[2];
1693                 mix(a, b, c);
1694                 hash->addr32[2] = c;
1695                 a += inaddr->addr32[3];
1696                 b += inaddr->addr32[0];
1697                 c += key->key32[3];
1698                 mix(a, b, c);
1699                 hash->addr32[3] = c;
1700                 break;
1701 #endif /* INET6 */
1702         }
1703 }
1704
1705 int
1706 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
1707     struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn)
1708 {
1709         unsigned char            hash[16];
1710         struct pf_pool          *rpool = &r->rpool;
1711         struct pf_addr          *raddr = &rpool->cur->addr.v.a.addr;
1712         struct pf_addr          *rmask = &rpool->cur->addr.v.a.mask;
1713         struct pf_pooladdr      *acur = rpool->cur;
1714         struct pf_src_node       k;
1715
1716         if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
1717             (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
1718                 k.af = af;
1719                 PF_ACPY(&k.addr, saddr, af);
1720                 if (r->rule_flag & PFRULE_RULESRCTRACK ||
1721                     r->rpool.opts & PF_POOL_STICKYADDR)
1722                         k.rule.ptr = r;
1723                 else
1724                         k.rule.ptr = NULL;
1725                 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
1726                 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
1727                 if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
1728                         PF_ACPY(naddr, &(*sn)->raddr, af);
1729                         if (pf_status.debug >= PF_DEBUG_MISC) {
1730                                 printf("pf_map_addr: src tracking maps ");
1731                                 pf_print_host(&k.addr, 0, af);
1732                                 printf(" to ");
1733                                 pf_print_host(naddr, 0, af);
1734                                 printf("\n");
1735                         }
1736                         return (0);
1737                 }
1738         }
1739
1740         if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
1741                 return (1);
1742         if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
1743                 if (af == AF_INET) {
1744                         if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
1745                             (rpool->opts & PF_POOL_TYPEMASK) !=
1746                             PF_POOL_ROUNDROBIN)
1747                                 return (1);
1748                          raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
1749                          rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
1750                 } else {
1751                         if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
1752                             (rpool->opts & PF_POOL_TYPEMASK) !=
1753                             PF_POOL_ROUNDROBIN)
1754                                 return (1);
1755                         raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
1756                         rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
1757                 }
1758         } else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
1759                 if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
1760                         return (1); /* unsupported */
1761         } else {
1762                 raddr = &rpool->cur->addr.v.a.addr;
1763                 rmask = &rpool->cur->addr.v.a.mask;
1764         }
1765
1766         switch (rpool->opts & PF_POOL_TYPEMASK) {
1767         case PF_POOL_NONE:
1768                 PF_ACPY(naddr, raddr, af);
1769                 break;
1770         case PF_POOL_BITMASK:
1771                 PF_POOLMASK(naddr, raddr, rmask, saddr, af);
1772                 break;
1773         case PF_POOL_RANDOM:
1774                 if (init_addr != NULL && PF_AZERO(init_addr, af)) {
1775                         switch (af) {
1776 #ifdef INET
1777                         case AF_INET:
1778                                 rpool->counter.addr32[0] = arc4random();
1779                                 break;
1780 #endif /* INET */
1781 #ifdef INET6
1782                         case AF_INET6:
1783                                 if (rmask->addr32[3] != 0xffffffff)
1784                                         rpool->counter.addr32[3] = arc4random();
1785                                 else
1786                                         break;
1787                                 if (rmask->addr32[2] != 0xffffffff)
1788                                         rpool->counter.addr32[2] = arc4random();
1789                                 else
1790                                         break;
1791                                 if (rmask->addr32[1] != 0xffffffff)
1792                                         rpool->counter.addr32[1] = arc4random();
1793                                 else
1794                                         break;
1795                                 if (rmask->addr32[0] != 0xffffffff)
1796                                         rpool->counter.addr32[0] = arc4random();
1797                                 break;
1798 #endif /* INET6 */
1799                         }
1800                         PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
1801                         PF_ACPY(init_addr, naddr, af);
1802
1803                 } else {
1804                         PF_AINC(&rpool->counter, af);
1805                         PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
1806                 }
1807                 break;
1808         case PF_POOL_SRCHASH:
1809                 pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
1810                 PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
1811                 break;
1812         case PF_POOL_ROUNDROBIN:
1813                 if (rpool->cur->addr.type == PF_ADDR_TABLE) {
1814                         if (!pfr_pool_get(rpool->cur->addr.p.tbl,
1815                             &rpool->tblidx, &rpool->counter,
1816                             &raddr, &rmask, af))
1817                                 goto get_addr;
1818                 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
1819                         if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
1820                             &rpool->tblidx, &rpool->counter,
1821                             &raddr, &rmask, af))
1822                                 goto get_addr;
1823                 } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
1824                         goto get_addr;
1825
1826         try_next:
1827                 if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL)
1828                         rpool->cur = TAILQ_FIRST(&rpool->list);
1829                 if (rpool->cur->addr.type == PF_ADDR_TABLE) {
1830                         rpool->tblidx = -1;
1831                         if (pfr_pool_get(rpool->cur->addr.p.tbl,
1832                             &rpool->tblidx, &rpool->counter,
1833                             &raddr, &rmask, af)) {
1834                                 /* table contains no address of type 'af' */
1835                                 if (rpool->cur != acur)
1836                                         goto try_next;
1837                                 return (1);
1838                         }
1839                 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
1840                         rpool->tblidx = -1;
1841                         if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
1842                             &rpool->tblidx, &rpool->counter,
1843                             &raddr, &rmask, af)) {
1844                                 /* table contains no address of type 'af' */
1845                                 if (rpool->cur != acur)
1846                                         goto try_next;
1847                                 return (1);
1848                         }
1849                 } else {
1850                         raddr = &rpool->cur->addr.v.a.addr;
1851                         rmask = &rpool->cur->addr.v.a.mask;
1852                         PF_ACPY(&rpool->counter, raddr, af);
1853                 }
1854
1855         get_addr:
1856                 PF_ACPY(naddr, &rpool->counter, af);
1857                 PF_AINC(&rpool->counter, af);
1858                 break;
1859         }
1860         if (*sn != NULL)
1861                 PF_ACPY(&(*sn)->raddr, naddr, af);
1862
1863         if (pf_status.debug >= PF_DEBUG_MISC &&
1864             (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
1865                 printf("pf_map_addr: selected address ");
1866                 pf_print_host(naddr, 0, af);
1867                 printf("\n");
1868         }
1869
1870         return (0);
1871 }
1872
1873 int
1874 pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
1875     struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport,
1876     struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
1877     struct pf_src_node **sn)
1878 {
1879         struct pf_state         key;
1880         struct pf_addr          init_addr;
1881         u_int16_t               cut;
1882
1883         bzero(&init_addr, sizeof(init_addr));
1884         if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
1885                 return (1);
1886
1887         do {
1888                 key.af = af;
1889                 key.proto = proto;
1890                 PF_ACPY(&key.ext.addr, daddr, key.af);
1891                 PF_ACPY(&key.gwy.addr, naddr, key.af);
1892                 key.ext.port = dport;
1893
1894                 /*
1895                  * port search; start random, step;
1896                  * similar 2 portloop in in_pcbbind
1897                  */
1898                 if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP)) {
1899                         key.gwy.port = 0;
1900                         if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
1901                                 return (0);
1902                 } else if (low == 0 && high == 0) {
1903                         key.gwy.port = *nport;
1904                         if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
1905                                 return (0);
1906                 } else if (low == high) {
1907                         key.gwy.port = htons(low);
1908                         if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) {
1909                                 *nport = htons(low);
1910                                 return (0);
1911                         }
1912                 } else {
1913                         u_int16_t tmp;
1914
1915                         if (low > high) {
1916                                 tmp = low;
1917                                 low = high;
1918                                 high = tmp;
1919                         }
1920                         /* low < high */
1921                         cut = arc4random() % (1 + high - low) + low;
1922                         /* low <= cut <= high */
1923                         for (tmp = cut; tmp <= high; ++(tmp)) {
1924                                 key.gwy.port = htons(tmp);
1925                                 if (pf_find_state_all(&key, PF_EXT_GWY, NULL) ==
1926                                     NULL) {
1927                                         *nport = htons(tmp);
1928                                         return (0);
1929                                 }
1930                         }
1931                         for (tmp = cut - 1; tmp >= low; --(tmp)) {
1932                                 key.gwy.port = htons(tmp);
1933                                 if (pf_find_state_all(&key, PF_EXT_GWY, NULL) ==
1934                                     NULL) {
1935                                         *nport = htons(tmp);
1936                                         return (0);
1937                                 }
1938                         }
1939                 }
1940
1941                 switch (r->rpool.opts & PF_POOL_TYPEMASK) {
1942                 case PF_POOL_RANDOM:
1943                 case PF_POOL_ROUNDROBIN:
1944                         if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
1945                                 return (1);
1946                         break;
1947                 case PF_POOL_NONE:
1948                 case PF_POOL_SRCHASH:
1949                 case PF_POOL_BITMASK:
1950                 default:
1951                         return (1);
1952                 }
1953         } while (! PF_AEQ(&init_addr, naddr, af) );
1954
1955         return (1);                                     /* none available */
1956 }
1957
1958 struct pf_rule *
1959 pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
1960     int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport,
1961     struct pf_addr *daddr, u_int16_t dport, int rs_num)
1962 {
1963         struct pf_rule          *r, *rm = NULL, *anchorrule = NULL;
1964         struct pf_ruleset       *ruleset = NULL;
1965
1966         r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
1967         while (r && rm == NULL) {
1968                 struct pf_rule_addr     *src = NULL, *dst = NULL;
1969                 struct pf_addr_wrap     *xdst = NULL;
1970
1971                 if (r->action == PF_BINAT && direction == PF_IN) {
1972                         src = &r->dst;
1973                         if (r->rpool.cur != NULL)
1974                                 xdst = &r->rpool.cur->addr;
1975                 } else {
1976                         src = &r->src;
1977                         dst = &r->dst;
1978                 }
1979
1980                 r->evaluations++;
1981                 if (r->kif != NULL &&
1982                     (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
1983                         r = r->skip[PF_SKIP_IFP].ptr;
1984                 else if (r->direction && r->direction != direction)
1985                         r = r->skip[PF_SKIP_DIR].ptr;
1986                 else if (r->af && r->af != pd->af)
1987                         r = r->skip[PF_SKIP_AF].ptr;
1988                 else if (r->proto && r->proto != pd->proto)
1989                         r = r->skip[PF_SKIP_PROTO].ptr;
1990                 else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, src->not))
1991                         r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
1992                             PF_SKIP_DST_ADDR].ptr;
1993                 else if (src->port_op && !pf_match_port(src->port_op,
1994                     src->port[0], src->port[1], sport))
1995                         r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
1996                             PF_SKIP_DST_PORT].ptr;
1997                 else if (dst != NULL &&
1998                     PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->not))
1999                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
2000                 else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, 0))
2001                         r = TAILQ_NEXT(r, entries);
2002                 else if (dst != NULL && dst->port_op &&
2003                     !pf_match_port(dst->port_op, dst->port[0],
2004                     dst->port[1], dport))
2005                         r = r->skip[PF_SKIP_DST_PORT].ptr;
2006                 else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
2007                     IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
2008                     off, pd->hdr.tcp), r->os_fingerprint)))
2009                         r = TAILQ_NEXT(r, entries);
2010                 else if (r->anchorname[0] && r->anchor == NULL)
2011                         r = TAILQ_NEXT(r, entries);
2012                 else if (r->anchor == NULL)
2013                                 rm = r;
2014                 else
2015                         PF_STEP_INTO_ANCHOR(r, anchorrule, ruleset, rs_num);
2016                 if (r == NULL && anchorrule != NULL)
2017                         PF_STEP_OUT_OF_ANCHOR(r, anchorrule, ruleset,
2018                             rs_num);
2019         }
2020         if (rm != NULL && (rm->action == PF_NONAT ||
2021             rm->action == PF_NORDR || rm->action == PF_NOBINAT))
2022                 return (NULL);
2023         return (rm);
2024 }
2025
2026 struct pf_rule *
2027 pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
2028     struct pfi_kif *kif, struct pf_src_node **sn,
2029     struct pf_addr *saddr, u_int16_t sport,
2030     struct pf_addr *daddr, u_int16_t dport,
2031     struct pf_addr *naddr, u_int16_t *nport)
2032 {
2033         struct pf_rule  *r = NULL;
2034
2035         if (direction == PF_OUT) {
2036                 r = pf_match_translation(pd, m, off, direction, kif, saddr,
2037                     sport, daddr, dport, PF_RULESET_BINAT);
2038                 if (r == NULL)
2039                         r = pf_match_translation(pd, m, off, direction, kif,
2040                             saddr, sport, daddr, dport, PF_RULESET_NAT);
2041         } else {
2042                 r = pf_match_translation(pd, m, off, direction, kif, saddr,
2043                     sport, daddr, dport, PF_RULESET_RDR);
2044                 if (r == NULL)
2045                         r = pf_match_translation(pd, m, off, direction, kif,
2046                             saddr, sport, daddr, dport, PF_RULESET_BINAT);
2047         }
2048
2049         if (r != NULL) {
2050                 switch (r->action) {
2051                 case PF_NONAT:
2052                 case PF_NOBINAT:
2053                 case PF_NORDR:
2054                         return (NULL);
2055                 case PF_NAT:
2056                         if (pf_get_sport(pd->af, pd->proto, r, saddr,
2057                             daddr, dport, naddr, nport, r->rpool.proxy_port[0],
2058                             r->rpool.proxy_port[1], sn)) {
2059                                 DPFPRINTF(PF_DEBUG_MISC,
2060                                     ("pf: NAT proxy port allocation "
2061                                     "(%u-%u) failed\n",
2062                                     r->rpool.proxy_port[0],
2063                                     r->rpool.proxy_port[1]));
2064                                 return (NULL);
2065                         }
2066                         break;
2067                 case PF_BINAT:
2068                         switch (direction) {
2069                         case PF_OUT:
2070                                 if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
2071                                         if (pd->af == AF_INET) {
2072                                                 if (r->rpool.cur->addr.p.dyn->
2073                                                     pfid_acnt4 < 1)
2074                                                         return (NULL);
2075                                                 PF_POOLMASK(naddr,
2076                                                     &r->rpool.cur->addr.p.dyn->
2077                                                     pfid_addr4,
2078                                                     &r->rpool.cur->addr.p.dyn->
2079                                                     pfid_mask4,
2080                                                     saddr, AF_INET);
2081                                         } else {
2082                                                 if (r->rpool.cur->addr.p.dyn->
2083                                                     pfid_acnt6 < 1)
2084                                                         return (NULL);
2085                                                 PF_POOLMASK(naddr,
2086                                                     &r->rpool.cur->addr.p.dyn->
2087                                                     pfid_addr6,
2088                                                     &r->rpool.cur->addr.p.dyn->
2089                                                     pfid_mask6,
2090                                                     saddr, AF_INET6);
2091                                         }
2092                                 } else
2093                                         PF_POOLMASK(naddr,
2094                                             &r->rpool.cur->addr.v.a.addr,
2095                                             &r->rpool.cur->addr.v.a.mask,
2096                                             saddr, pd->af);
2097                                 break;
2098                         case PF_IN:
2099                                 if (r->src.addr.type == PF_ADDR_DYNIFTL){
2100                                         if (pd->af == AF_INET) {
2101                                                 if (r->src.addr.p.dyn->
2102                                                     pfid_acnt4 < 1)
2103                                                         return (NULL);
2104                                                 PF_POOLMASK(naddr,
2105                                                     &r->src.addr.p.dyn->
2106                                                     pfid_addr4,
2107                                                     &r->src.addr.p.dyn->
2108                                                     pfid_mask4,
2109                                                     daddr, AF_INET);
2110                                         } else {
2111                                                 if (r->src.addr.p.dyn->
2112                                                     pfid_acnt6 < 1)
2113                                                         return (NULL);
2114                                                 PF_POOLMASK(naddr,
2115                                                     &r->src.addr.p.dyn->
2116                                                     pfid_addr6,
2117                                                     &r->src.addr.p.dyn->
2118                                                     pfid_mask6,
2119                                                     daddr, AF_INET6);
2120                                         }
2121                                 } else
2122                                         PF_POOLMASK(naddr,
2123                                             &r->src.addr.v.a.addr,
2124                                             &r->src.addr.v.a.mask, daddr,
2125                                             pd->af);
2126                                 break;
2127                         }
2128                         break;
2129                 case PF_RDR: {
2130                         if (pf_map_addr(r->af, r, saddr, naddr, NULL, sn))
2131                                 return (NULL);
2132
2133                         if (r->rpool.proxy_port[1]) {
2134                                 u_int32_t       tmp_nport;
2135
2136                                 tmp_nport = ((ntohs(dport) -
2137                                     ntohs(r->dst.port[0])) %
2138                                     (r->rpool.proxy_port[1] -
2139                                     r->rpool.proxy_port[0] + 1)) +
2140                                     r->rpool.proxy_port[0];
2141
2142                                 /* wrap around if necessary */
2143                                 if (tmp_nport > 65535)
2144                                         tmp_nport -= 65535;
2145                                 *nport = htons((u_int16_t)tmp_nport);
2146                         } else if (r->rpool.proxy_port[0])
2147                                 *nport = htons(r->rpool.proxy_port[0]);
2148                         break;
2149                 }
2150                 default:
2151                         return (NULL);
2152                 }
2153         }
2154
2155         return (r);
2156 }
2157
2158 #ifdef SMP
2159 struct netmsg_hashlookup {
2160         struct lwkt_msg         nm_lmsg;
2161         struct inpcb            **nm_pinp;
2162         struct inpcbinfo        *nm_pcbinfo;
2163         struct pf_addr          *nm_saddr;
2164         struct pf_addr          *nm_daddr;
2165         uint16_t                nm_sport;
2166         uint16_t                nm_dport;
2167         sa_family_t             nm_af;
2168 };
2169
2170 static int
2171 in_pcblookup_hash_handler(struct lwkt_msg *msg0)
2172 {
2173         struct netmsg_hashlookup *msg = (struct netmsg_hashlookup *)msg0;
2174
2175         if (msg->nm_af == AF_INET)
2176                 *msg->nm_pinp = in_pcblookup_hash(msg->nm_pcbinfo,
2177                     msg->nm_saddr->v4, msg->nm_sport, msg->nm_daddr->v4,
2178                     msg->nm_dport, INPLOOKUP_WILDCARD, NULL);
2179 #ifdef INET6
2180         else
2181                 *msg->nm_pinp = in6_pcblookup_hash(msg->nm_pcbinfo,
2182                     &msg->nm_saddr->v6, msg->nm_sport, &msg->nm_daddr->v6,
2183                     msg->nm_dport, INPLOOKUP_WILDCARD, NULL);
2184 #endif /* INET6 */
2185         lwkt_replymsg(&msg->nm_lmsg, 0);
2186         return (EASYNC);
2187 }
2188 #endif /* SMP */
2189
2190 int
2191 pf_socket_lookup(uid_t *uid, gid_t *gid, int direction, struct pf_pdesc *pd)
2192 {
2193         struct pf_addr          *saddr, *daddr;
2194         u_int16_t                sport, dport;
2195         struct inpcbinfo        *pi;
2196         struct inpcb            *inp;
2197 #ifdef SMP
2198         struct netmsg_hashlookup *msg = NULL;
2199 #endif
2200         int                      pi_cpu = 0;
2201
2202         *uid = UID_MAX;
2203         *gid = GID_MAX;
2204         if (direction == PF_IN) {
2205                 saddr = pd->src;
2206                 daddr = pd->dst;
2207         } else {
2208                 saddr = pd->dst;
2209                 daddr = pd->src;
2210         }
2211         switch (pd->proto) {
2212         case IPPROTO_TCP:
2213                 sport = pd->hdr.tcp->th_sport;
2214                 dport = pd->hdr.tcp->th_dport;
2215
2216                 pi_cpu = tcp_addrcpu(saddr->v4.s_addr, sport, daddr->v4.s_addr, dport);
2217                 pi = &tcbinfo[pi_cpu];
2218 #ifdef SMP
2219                 /*
2220                  * Our netstack runs lockless on MP systems
2221                  * (only for TCP connections at the moment).
2222                  * 
2223                  * As we are not allowed to read another CPU's tcbinfo,
2224                  * we have to ask that CPU via remote call to search the
2225                  * table for us.
2226                  * 
2227                  * Prepare a msg iff data belongs to another CPU.
2228                  */
2229                 if (pi_cpu != mycpu->gd_cpuid) {
2230                         msg = malloc(sizeof(*msg), M_LWKTMSG, M_INTWAIT);
2231                         lwkt_initmsg(&msg->nm_lmsg, &netisr_afree_rport, 0,
2232                             lwkt_cmd_func(in_pcblookup_hash_handler),
2233                             lwkt_cmd_op_none);
2234                         msg->nm_pinp = &inp;
2235                         msg->nm_pcbinfo = pi;
2236                         msg->nm_saddr = saddr;
2237                         msg->nm_sport = sport;
2238                         msg->nm_daddr = daddr;
2239                         msg->nm_dport = dport;
2240                         msg->nm_af = pd->af;
2241                 }
2242 #endif /* SMP */
2243                 break;
2244         case IPPROTO_UDP:
2245                 sport = pd->hdr.udp->uh_sport;
2246                 dport = pd->hdr.udp->uh_dport;
2247                 pi = &udbinfo;
2248                 break;
2249         default:
2250                 return (0);
2251         }
2252         if (direction != PF_IN) {
2253                 u_int16_t       p;
2254
2255                 p = sport;
2256                 sport = dport;
2257                 dport = p;
2258         }
2259         switch (pd->af) {
2260 #ifdef INET6
2261         case AF_INET6:
2262 #ifdef SMP
2263                 /*
2264                  * Query other CPU, second part
2265                  * 
2266                  * msg only gets initialized when:
2267                  * 1) packet is TCP
2268                  * 2) the info belongs to another CPU
2269                  *
2270                  * Use some switch/case magic to avoid code duplication.
2271                  */
2272                 if (msg == NULL)
2273 #endif /* SMP */
2274                 {
2275                         inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
2276                             &daddr->v6, dport, INPLOOKUP_WILDCARD, NULL);
2277
2278                         if (inp == NULL)
2279                                 return (0);
2280                         break;
2281                 }
2282                 /* FALLTHROUGH if SMP and on other CPU */
2283 #endif /* INET6 */
2284         case AF_INET:
2285 #ifdef SMP
2286                 if (msg != NULL) {
2287                         lwkt_sendmsg(tcp_cport(pi_cpu), &msg->nm_lmsg);
2288                 } else
2289 #endif /* SMP */
2290                 {
2291                         inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4,
2292                             dport, INPLOOKUP_WILDCARD, NULL);
2293                 }
2294                 if (inp == NULL)
2295                         return (0);
2296                 break;
2297
2298         default:
2299                 return (0);
2300         }
2301         *uid = inp->inp_socket->so_cred->cr_uid;
2302         *gid = inp->inp_socket->so_cred->cr_groups[0];
2303         return (1);
2304 }
2305
2306 u_int8_t
2307 pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2308 {
2309         int              hlen;
2310         u_int8_t         hdr[60];
2311         u_int8_t        *opt, optlen;
2312         u_int8_t         wscale = 0;
2313
2314         hlen = th_off << 2;             /* hlen <= sizeof(hdr) */
2315         if (hlen <= sizeof(struct tcphdr))
2316                 return (0);
2317         if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2318                 return (0);
2319         opt = hdr + sizeof(struct tcphdr);
2320         hlen -= sizeof(struct tcphdr);
2321         while (hlen >= 3) {
2322                 switch (*opt) {
2323                 case TCPOPT_EOL:
2324                 case TCPOPT_NOP:
2325                         ++opt;
2326                         --hlen;
2327                         break;
2328                 case TCPOPT_WINDOW:
2329                         wscale = opt[2];
2330                         if (wscale > TCP_MAX_WINSHIFT)
2331                                 wscale = TCP_MAX_WINSHIFT;
2332                         wscale |= PF_WSCALE_FLAG;
2333                         /* FALLTHROUGH */
2334                 default:
2335                         optlen = opt[1];
2336                         if (optlen < 2)
2337                                 optlen = 2;
2338                         hlen -= optlen;
2339                         opt += optlen;
2340                         break;
2341                 }
2342         }
2343         return (wscale);
2344 }
2345
2346 u_int16_t
2347 pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2348 {
2349         int              hlen;
2350         u_int8_t         hdr[60];
2351         u_int8_t        *opt, optlen;
2352         u_int16_t        mss = tcp_mssdflt;
2353
2354         hlen = th_off << 2;     /* hlen <= sizeof(hdr) */
2355         if (hlen <= sizeof(struct tcphdr))
2356                 return (0);
2357         if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2358                 return (0);
2359         opt = hdr + sizeof(struct tcphdr);
2360         hlen -= sizeof(struct tcphdr);
2361         while (hlen >= TCPOLEN_MAXSEG) {
2362                 switch (*opt) {
2363                 case TCPOPT_EOL:
2364                 case TCPOPT_NOP:
2365                         ++opt;
2366                         --hlen;
2367                         break;
2368                 case TCPOPT_MAXSEG:
2369                         bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
2370                         /* FALLTHROUGH */
2371                 default:
2372                         optlen = opt[1];
2373                         if (optlen < 2)
2374                                 optlen = 2;
2375                         hlen -= optlen;
2376                         opt += optlen;
2377                         break;
2378                 }
2379         }
2380         return (mss);
2381 }
2382
2383 u_int16_t
2384 pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
2385 {
2386 #ifdef INET
2387         struct sockaddr_in      *dst;
2388         struct route             ro;
2389 #endif /* INET */
2390 #ifdef INET6
2391         struct sockaddr_in6     *dst6;
2392         struct route_in6         ro6;
2393 #endif /* INET6 */
2394         struct rtentry          *rt = NULL;
2395         int                      hlen = 0;
2396         u_int16_t                mss = tcp_mssdflt;
2397
2398         switch (af) {
2399 #ifdef INET
2400         case AF_INET:
2401                 hlen = sizeof(struct ip);
2402                 bzero(&ro, sizeof(ro));
2403                 dst = (struct sockaddr_in *)&ro.ro_dst;
2404                 dst->sin_family = AF_INET;
2405                 dst->sin_len = sizeof(*dst);
2406                 dst->sin_addr = addr->v4;
2407                 rtalloc_ign(&ro, (RTF_CLONING | RTF_PRCLONING));
2408                 rt = ro.ro_rt;
2409                 break;
2410 #endif /* INET */
2411 #ifdef INET6
2412         case AF_INET6:
2413                 hlen = sizeof(struct ip6_hdr);
2414                 bzero(&ro6, sizeof(ro6));
2415                 dst6 = (struct sockaddr_in6 *)&ro6.ro_dst;
2416                 dst6->sin6_family = AF_INET6;
2417                 dst6->sin6_len = sizeof(*dst6);
2418                 dst6->sin6_addr = addr->v6;
2419                 rtalloc_ign((struct route *)&ro6, (RTF_CLONING | RTF_PRCLONING));
2420                 rt = ro6.ro_rt;
2421                 break;
2422 #endif /* INET6 */
2423         }
2424
2425         if (rt && rt->rt_ifp) {
2426                 mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
2427                 mss = max(tcp_mssdflt, mss);
2428                 RTFREE(rt);
2429         }
2430         mss = min(mss, offer);
2431         mss = max(mss, 64);             /* sanity - at least max opt space */
2432         return (mss);
2433 }
2434
2435 void
2436 pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr)
2437 {
2438         struct pf_rule *r = s->rule.ptr;
2439
2440         s->rt_kif = NULL;
2441         if (!r->rt || r->rt == PF_FASTROUTE)
2442                 return;
2443         switch (s->af) {
2444 #ifdef INET
2445         case AF_INET:
2446                 pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL,
2447                     &s->nat_src_node);
2448                 s->rt_kif = r->rpool.cur->kif;
2449                 break;
2450 #endif /* INET */
2451 #ifdef INET6
2452         case AF_INET6:
2453                 pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL,
2454                     &s->nat_src_node);
2455                 s->rt_kif = r->rpool.cur->kif;
2456                 break;
2457 #endif /* INET6 */
2458         }
2459 }
2460
2461 int
2462 pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction,
2463     struct pfi_kif *kif, struct mbuf *m, int off, void *h,
2464     struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm)
2465 {
2466         struct pf_rule          *nr = NULL;
2467         struct pf_addr          *saddr = pd->src, *daddr = pd->dst;
2468         struct tcphdr           *th = pd->hdr.tcp;
2469         u_int16_t                bport, nport = 0;
2470         sa_family_t              af = pd->af;
2471         int                      lookup = -1;
2472         uid_t                    uid;
2473         gid_t                    gid;
2474         struct pf_rule          *r, *a = NULL;
2475         struct pf_ruleset       *ruleset = NULL;
2476         struct pf_src_node      *nsn = NULL;
2477         u_short                  reason;
2478         int                      rewrite = 0;
2479         int                      tag = -1;
2480         u_int16_t                mss = tcp_mssdflt;
2481
2482         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
2483
2484         if (direction == PF_OUT) {
2485                 bport = nport = th->th_sport;
2486                 /* check outgoing packet for BINAT/NAT */
2487                 if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
2488                     saddr, th->th_sport, daddr, th->th_dport,
2489                     &pd->naddr, &nport)) != NULL) {
2490                         PF_ACPY(&pd->baddr, saddr, af);
2491                         pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
2492                             &th->th_sum, &pd->naddr, nport, 0, af);
2493                         rewrite++;
2494                         if (nr->natpass)
2495                                 r = NULL;
2496                         pd->nat_rule = nr;
2497                 }
2498         } else {
2499                 bport = nport = th->th_dport;
2500                 /* check incoming packet for BINAT/RDR */
2501                 if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
2502                     saddr, th->th_sport, daddr, th->th_dport,
2503                     &pd->naddr, &nport)) != NULL) {
2504                         PF_ACPY(&pd->baddr, daddr, af);
2505                         pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
2506                             &th->th_sum, &pd->naddr, nport, 0, af);
2507                         rewrite++;
2508                         if (nr->natpass)
2509                                 r = NULL;
2510                         pd->nat_rule = nr;
2511                 }
2512         }
2513
2514         while (r != NULL) {
2515                 r->evaluations++;
2516                 if (r->kif != NULL &&
2517                     (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
2518                         r = r->skip[PF_SKIP_IFP].ptr;
2519                 else if (r->direction && r->direction != direction)
2520                         r = r->skip[PF_SKIP_DIR].ptr;
2521                 else if (r->af && r->af != af)
2522                         r = r->skip[PF_SKIP_AF].ptr;
2523                 else if (r->proto && r->proto != IPPROTO_TCP)
2524                         r = r->skip[PF_SKIP_PROTO].ptr;
2525                 else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not))
2526                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
2527                 else if (r->src.port_op && !pf_match_port(r->src.port_op,
2528                     r->src.port[0], r->src.port[1], th->th_sport))
2529                         r = r->skip[PF_SKIP_SRC_PORT].ptr;
2530                 else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not))
2531                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
2532                 else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
2533                     r->dst.port[0], r->dst.port[1], th->th_dport))
2534                         r = r->skip[PF_SKIP_DST_PORT].ptr;
2535                 else if (r->tos && !(r->tos & pd->tos))
2536                         r = TAILQ_NEXT(r, entries);
2537                 else if (r->rule_flag & PFRULE_FRAGMENT)
2538                         r = TAILQ_NEXT(r, entries);
2539                 else if ((r->flagset & th->th_flags) != r->flags)
2540                         r = TAILQ_NEXT(r, entries);
2541                 else if (r->uid.op && (lookup != -1 || (lookup =
2542                     pf_socket_lookup(&uid, &gid, direction, pd), 1)) &&
2543                     !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
2544                     uid))
2545                         r = TAILQ_NEXT(r, entries);
2546                 else if (r->gid.op && (lookup != -1 || (lookup =
2547                     pf_socket_lookup(&uid, &gid, direction, pd), 1)) &&
2548                     !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
2549                     gid))
2550                         r = TAILQ_NEXT(r, entries);
2551                 else if (r->match_tag && !pf_match_tag(m, r, nr, &tag))
2552                         r = TAILQ_NEXT(r, entries);
2553                 else if (r->anchorname[0] && r->anchor == NULL)
2554                         r = TAILQ_NEXT(r, entries);
2555                 else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
2556                     pf_osfp_fingerprint(pd, m, off, th), r->os_fingerprint))
2557                         r = TAILQ_NEXT(r, entries);
2558                 else {
2559                         if (r->tag)
2560                                 tag = r->tag;
2561                         if (r->anchor == NULL) {
2562                                 *rm = r;
2563                                 *am = a;
2564                                 *rsm = ruleset;
2565                                 if ((*rm)->quick)
2566                                         break;
2567                                 r = TAILQ_NEXT(r, entries);
2568                         } else
2569                                 PF_STEP_INTO_ANCHOR(r, a, ruleset,
2570                                     PF_RULESET_FILTER);
2571                 }
2572                 if (r == NULL && a != NULL)
2573                         PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
2574                             PF_RULESET_FILTER);
2575         }
2576         r = *rm;
2577         a = *am;
2578         ruleset = *rsm;
2579
2580         REASON_SET(&reason, PFRES_MATCH);
2581
2582         if (r->log) {
2583                 if (rewrite)
2584                         m_copyback(m, off, sizeof(*th), (caddr_t)th);
2585                 PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
2586         }
2587
2588         if ((r->action == PF_DROP) &&
2589             ((r->rule_flag & PFRULE_RETURNRST) ||
2590             (r->rule_flag & PFRULE_RETURNICMP) ||
2591             (r->rule_flag & PFRULE_RETURN))) {
2592                 /* undo NAT changes, if they have taken place */
2593                 if (nr != NULL) {
2594                         if (direction == PF_OUT) {
2595                                 pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
2596                                     &th->th_sum, &pd->baddr, bport, 0, af);
2597                                 rewrite++;
2598                         } else {
2599                                 pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
2600                                     &th->th_sum, &pd->baddr, bport, 0, af);
2601                                 rewrite++;
2602                         }
2603                 }
2604                 if (((r->rule_flag & PFRULE_RETURNRST) ||
2605                     (r->rule_flag & PFRULE_RETURN)) &&
2606                     !(th->th_flags & TH_RST)) {
2607                         u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
2608
2609                         if (th->th_flags & TH_SYN)
2610                                 ack++;
2611                         if (th->th_flags & TH_FIN)
2612                                 ack++;
2613                         pf_send_tcp(r, af, pd->dst,
2614                             pd->src, th->th_dport, th->th_sport,
2615                             ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
2616                             r->return_ttl);
2617                 } else if ((af == AF_INET) && r->return_icmp)
2618                         pf_send_icmp(m, r->return_icmp >> 8,
2619                             r->return_icmp & 255, af, r);
2620                 else if ((af == AF_INET6) && r->return_icmp6)
2621                         pf_send_icmp(m, r->return_icmp6 >> 8,
2622                             r->return_icmp6 & 255, af, r);
2623         }
2624
2625         if (r->action == PF_DROP)
2626                 return (PF_DROP);
2627
2628         pf_tag_packet(m, tag);
2629
2630         if (r->keep_state || nr != NULL ||
2631             (pd->flags & PFDESC_TCP_NORM)) {
2632                 /* create new state */
2633                 u_int16_t        len;
2634                 struct pf_state *s = NULL;
2635                 struct pf_src_node *sn = NULL;
2636
2637                 len = pd->tot_len - off - (th->th_off << 2);
2638
2639                 /* check maximums */
2640                 if (r->max_states && (r->states >= r->max_states))
2641                         goto cleanup;
2642                 /* src node for flter rule */
2643                 if ((r->rule_flag & PFRULE_SRCTRACK ||
2644                     r->rpool.opts & PF_POOL_STICKYADDR) &&
2645                     pf_insert_src_node(&sn, r, saddr, af) != 0)
2646                         goto cleanup;
2647                 /* src node for translation rule */
2648                 if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
2649                     ((direction == PF_OUT &&
2650                     pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
2651                     (pf_insert_src_node(&nsn, nr, saddr, af) != 0)))
2652                         goto cleanup;
2653                 s = pool_get(&pf_state_pl, PR_NOWAIT);
2654                 if (s == NULL) {
2655 cleanup:
2656                         if (sn != NULL && sn->states == 0 && sn->expire == 0) {
2657                                 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
2658                                 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
2659                                 pf_status.src_nodes--;
2660                                 pool_put(&pf_src_tree_pl, sn);
2661                         }
2662                         if (nsn != sn && nsn != NULL && nsn->states == 0 &&
2663                             nsn->expire == 0) {
2664                                 RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
2665                                 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
2666                                 pf_status.src_nodes--;
2667                                 pool_put(&pf_src_tree_pl, nsn);
2668                         }
2669                         REASON_SET(&reason, PFRES_MEMORY);
2670                         return (PF_DROP);
2671                 }
2672                 bzero(s, sizeof(*s));
2673                 r->states++;
2674                 if (a != NULL)
2675                         a->states++;
2676                 s->rule.ptr = r;
2677                 s->nat_rule.ptr = nr;
2678                 if (s->nat_rule.ptr != NULL)
2679                         s->nat_rule.ptr->states++;
2680                 s->anchor.ptr = a;
2681                 s->allow_opts = r->allow_opts;
2682                 s->log = r->log & 2;
2683                 s->proto = IPPROTO_TCP;
2684                 s->direction = direction;
2685                 s->af = af;
2686                 if (direction == PF_OUT) {
2687                         PF_ACPY(&s->gwy.addr, saddr, af);
2688                         s->gwy.port = th->th_sport;             /* sport */
2689                         PF_ACPY(&s->ext.addr, daddr, af);
2690                         s->ext.port = th->th_dport;
2691                         if (nr != NULL) {
2692                                 PF_ACPY(&s->lan.addr, &pd->baddr, af);
2693                                 s->lan.port = bport;
2694                         } else {
2695                                 PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
2696                                 s->lan.port = s->gwy.port;
2697                         }
2698                 } else {
2699                         PF_ACPY(&s->lan.addr, daddr, af);
2700                         s->lan.port = th->th_dport;
2701                         PF_ACPY(&s->ext.addr, saddr, af);
2702                         s->ext.port = th->th_sport;
2703                         if (nr != NULL) {
2704                                 PF_ACPY(&s->gwy.addr, &pd->baddr, af);
2705                                 s->gwy.port = bport;
2706                         } else {
2707                                 PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
2708                                 s->gwy.port = s->lan.port;
2709                         }
2710                 }
2711
2712                 s->src.seqlo = ntohl(th->th_seq);
2713                 s->src.seqhi = s->src.seqlo + len + 1;
2714                 if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
2715                     r->keep_state == PF_STATE_MODULATE) {
2716                         /* Generate sequence number modulator */
2717                         while ((s->src.seqdiff = arc4random()) == 0)
2718                                 ;
2719                         pf_change_a(&th->th_seq, &th->th_sum,
2720                             htonl(s->src.seqlo + s->src.seqdiff), 0);
2721                         rewrite = 1;
2722                 } else
2723                         s->src.seqdiff = 0;
2724                 if (th->th_flags & TH_SYN) {
2725                         s->src.seqhi++;
2726                         s->src.wscale = pf_get_wscale(m, off, th->th_off, af);
2727                 }
2728                 s->src.max_win = MAX(ntohs(th->th_win), 1);
2729                 if (s->src.wscale & PF_WSCALE_MASK) {
2730                         /* Remove scale factor from initial window */
2731                         int win = s->src.max_win;
2732                         win += 1 << (s->src.wscale & PF_WSCALE_MASK);
2733                         s->src.max_win = (win - 1) >>
2734                             (s->src.wscale & PF_WSCALE_MASK);
2735                 }
2736                 if (th->th_flags & TH_FIN)
2737                         s->src.seqhi++;
2738                 s->dst.seqhi = 1;
2739                 s->dst.max_win = 1;
2740                 s->src.state = TCPS_SYN_SENT;
2741                 s->dst.state = TCPS_CLOSED;
2742                 s->creation = time_second;
2743                 s->expire = time_second;
2744                 s->timeout = PFTM_TCP_FIRST_PACKET;
2745                 pf_set_rt_ifp(s, saddr);
2746                 if (sn != NULL) {
2747                         s->src_node = sn;
2748                         s->src_node->states++;
2749                 }
2750                 if (nsn != NULL) {
2751                         PF_ACPY(&nsn->raddr, &pd->naddr, af);
2752                         s->nat_src_node = nsn;
2753                         s->nat_src_node->states++;
2754                 }
2755                 if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
2756                     off, pd, th, &s->src, &s->dst)) {
2757                         REASON_SET(&reason, PFRES_MEMORY);
2758                         pf_src_tree_remove_state(s);
2759                         pool_put(&pf_state_pl, s);
2760                         return (PF_DROP);
2761                 }
2762                 if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
2763                     pf_normalize_tcp_stateful(m, off, pd, &reason, th, &s->src,
2764                     &s->dst, &rewrite)) {
2765                         pf_normalize_tcp_cleanup(s);
2766                         pf_src_tree_remove_state(s);
2767                         pool_put(&pf_state_pl, s);
2768                         return (PF_DROP);
2769                 }
2770                 if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
2771                         pf_normalize_tcp_cleanup(s);
2772                         REASON_SET(&reason, PFRES_MEMORY);
2773                         pf_src_tree_remove_state(s);
2774                         pool_put(&pf_state_pl, s);
2775                         return (PF_DROP);
2776                 } else
2777                         *sm = s;
2778                 if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
2779                     r->keep_state == PF_STATE_SYNPROXY) {
2780                         s->src.state = PF_TCPS_PROXY_SRC;
2781                         if (nr != NULL) {
2782                                 if (direction == PF_OUT) {
2783                                         pf_change_ap(saddr, &th->th_sport,
2784                                             pd->ip_sum, &th->th_sum, &pd->baddr,
2785                                             bport, 0, af);
2786                                 } else {
2787                                         pf_change_ap(daddr, &th->th_dport,
2788                                             pd->ip_sum, &th->th_sum, &pd->baddr,
2789                                             bport, 0, af);
2790                                 }
2791                         }
2792                         s->src.seqhi = arc4random();
2793                         /* Find mss option */
2794                         mss = pf_get_mss(m, off, th->th_off, af);
2795                         mss = pf_calc_mss(saddr, af, mss);
2796                         mss = pf_calc_mss(daddr, af, mss);
2797                         s->src.mss = mss;
2798                         pf_send_tcp(r, af, daddr, saddr, th->th_dport,
2799                             th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
2800                             TH_SYN|TH_ACK, 0, s->src.mss, 0);
2801                         return (PF_SYNPROXY_DROP);
2802                 }
2803         }
2804
2805         /* copy back packet headers if we performed NAT operations */
2806         if (rewrite)
2807                 m_copyback(m, off, sizeof(*th), (caddr_t)th);
2808
2809         return (PF_PASS);
2810 }
2811
2812 int
2813 pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction,
2814     struct pfi_kif *kif, struct mbuf *m, int off, void *h,
2815     struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm)
2816 {
2817         struct pf_rule          *nr = NULL;
2818         struct pf_addr          *saddr = pd->src, *daddr = pd->dst;
2819         struct udphdr           *uh = pd->hdr.udp;
2820         u_int16_t                bport, nport = 0;
2821         sa_family_t              af = pd->af;
2822         int                      lookup = -1;
2823         uid_t                    uid;
2824         gid_t                    gid;
2825         struct pf_rule          *r, *a = NULL;
2826         struct pf_ruleset       *ruleset = NULL;
2827         struct pf_src_node      *nsn = NULL;
2828         u_short                  reason;
2829         int                      rewrite = 0;
2830         int                      tag = -1;
2831
2832         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
2833
2834         if (direction == PF_OUT) {
2835                 bport = nport = uh->uh_sport;
2836                 /* check outgoing packet for BINAT/NAT */
2837                 if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
2838                     saddr, uh->uh_sport, daddr, uh->uh_dport,
2839                     &pd->naddr, &nport)) != NULL) {
2840                         PF_ACPY(&pd->baddr, saddr, af);
2841                         pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum,
2842                             &uh->uh_sum, &pd->naddr, nport, 1, af);
2843                         rewrite++;
2844                         if (nr->natpass)
2845                                 r = NULL;
2846                         pd->nat_rule = nr;
2847                 }
2848         } else {
2849                 bport = nport = uh->uh_dport;
2850                 /* check incoming packet for BINAT/RDR */
2851                 if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
2852                     saddr, uh->uh_sport, daddr, uh->uh_dport, &pd->naddr,
2853                     &nport)) != NULL) {
2854                         PF_ACPY(&pd->baddr, daddr, af);
2855                         pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum,
2856                             &uh->uh_sum, &pd->naddr, nport, 1, af);
2857                         rewrite++;
2858                         if (nr->natpass)
2859                                 r = NULL;
2860                         pd->nat_rule = nr;
2861                 }
2862         }
2863
2864         while (r != NULL) {
2865                 r->evaluations++;
2866                 if (r->kif != NULL &&
2867                     (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
2868                         r = r->skip[PF_SKIP_IFP].ptr;
2869                 else if (r->direction && r->direction != direction)
2870                         r = r->skip[PF_SKIP_DIR].ptr;
2871                 else if (r->af && r->af != af)
2872                         r = r->skip[PF_SKIP_AF].ptr;
2873                 else if (r->proto && r->proto != IPPROTO_UDP)
2874                         r = r->skip[PF_SKIP_PROTO].ptr;
2875                 else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not))
2876                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
2877                 else if (r->src.port_op && !pf_match_port(r->src.port_op,
2878                     r->src.port[0], r->src.port[1], uh->uh_sport))
2879                         r = r->skip[PF_SKIP_SRC_PORT].ptr;
2880                 else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not))
2881                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
2882                 else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
2883                     r->dst.port[0], r->dst.port[1], uh->uh_dport))
2884                         r = r->skip[PF_SKIP_DST_PORT].ptr;
2885                 else if (r->tos && !(r->tos & pd->tos))
2886                         r = TAILQ_NEXT(r, entries);
2887                 else if (r->rule_flag & PFRULE_FRAGMENT)
2888                         r = TAILQ_NEXT(r, entries);
2889                 else if (r->uid.op && (lookup != -1 || (lookup =
2890                     pf_socket_lookup(&uid, &gid, direction, pd), 1)) &&
2891                     !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
2892                     uid))
2893                         r = TAILQ_NEXT(r, entries);
2894                 else if (r->gid.op && (lookup != -1 || (lookup =
2895                     pf_socket_lookup(&uid, &gid, direction, pd), 1)) &&
2896                     !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
2897                     gid))
2898                         r = TAILQ_NEXT(r, entries);
2899                 else if (r->match_tag && !pf_match_tag(m, r, nr, &tag))
2900                         r = TAILQ_NEXT(r, entries);
2901                 else if (r->anchorname[0] && r->anchor == NULL)
2902                         r = TAILQ_NEXT(r, entries);
2903                 else if (r->os_fingerprint != PF_OSFP_ANY)
2904                         r = TAILQ_NEXT(r, entries);
2905                 else {
2906                         if (r->tag)
2907                                 tag = r->tag;
2908                         if (r->anchor == NULL) {
2909                                 *rm = r;
2910                                 *am = a;
2911                                 *rsm = ruleset;
2912                                 if ((*rm)->quick)
2913                                         break;
2914                                 r = TAILQ_NEXT(r, entries);
2915                         } else
2916                                 PF_STEP_INTO_ANCHOR(r, a, ruleset,
2917                                     PF_RULESET_FILTER);
2918                 }
2919                 if (r == NULL && a != NULL)
2920                         PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
2921                             PF_RULESET_FILTER);
2922         }
2923         r = *rm;
2924         a = *am;
2925         ruleset = *rsm;
2926
2927         REASON_SET(&reason, PFRES_MATCH);
2928
2929         if (r->log) {
2930                 if (rewrite)
2931                         m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
2932                 PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
2933         }
2934
2935         if ((r->action == PF_DROP) &&
2936             ((r->rule_flag & PFRULE_RETURNICMP) ||
2937             (r->rule_flag & PFRULE_RETURN))) {
2938                 /* undo NAT changes, if they have taken place */
2939                 if (nr != NULL) {
2940                         if (direction == PF_OUT) {
2941                                 pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum,
2942                                     &uh->uh_sum, &pd->baddr, bport, 1, af);
2943                                 rewrite++;
2944                         } else {
2945                                 pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum,
2946                                     &uh->uh_sum, &pd->baddr, bport, 1, af);
2947                                 rewrite++;
2948                         }
2949                 }
2950                 if ((af == AF_INET) && r->return_icmp)
2951                         pf_send_icmp(m, r->return_icmp >> 8,
2952                             r->return_icmp & 255, af, r);
2953                 else if ((af == AF_INET6) && r->return_icmp6)
2954                         pf_send_icmp(m, r->return_icmp6 >> 8,
2955                             r->return_icmp6 & 255, af, r);
2956         }
2957
2958         if (r->action == PF_DROP)
2959                 return (PF_DROP);
2960
2961         pf_tag_packet(m, tag);
2962
2963         if (r->keep_state || nr != NULL) {
2964                 /* create new state */
2965                 struct pf_state *s = NULL;
2966                 struct pf_src_node *sn = NULL;
2967
2968                 /* check maximums */
2969                 if (r->max_states && (r->states >= r->max_states))
2970                         goto cleanup;
2971                 /* src node for flter rule */
2972                 if ((r->rule_flag & PFRULE_SRCTRACK ||
2973                     r->rpool.opts & PF_POOL_STICKYADDR) &&
2974                     pf_insert_src_node(&sn, r, saddr, af) != 0)
2975                         goto cleanup;
2976                 /* src node for translation rule */
2977                 if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
2978                     ((direction == PF_OUT &&
2979                     pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
2980                     (pf_insert_src_node(&nsn, nr, saddr, af) != 0)))
2981                         goto cleanup;
2982                 s = pool_get(&pf_state_pl, PR_NOWAIT);
2983                 if (s == NULL) {
2984 cleanup:
2985                         if (sn != NULL && sn->states == 0 && sn->expire == 0) {
2986                                 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
2987                                 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
2988                                 pf_status.src_nodes--;
2989                                 pool_put(&pf_src_tree_pl, sn);
2990                         }
2991                         if (nsn != sn && nsn != NULL && nsn->states == 0 &&
2992                             nsn->expire == 0) {
2993                                 RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
2994                                 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
2995                                 pf_status.src_nodes--;
2996                                 pool_put(&pf_src_tree_pl, nsn);
2997                         }
2998                         REASON_SET(&reason, PFRES_MEMORY);
2999                         return (PF_DROP);
3000                 }
3001                 bzero(s, sizeof(*s));
3002                 r->states++;
3003                 if (a != NULL)
3004                         a->states++;
3005                 s->rule.ptr = r;
3006                 s->nat_rule.ptr = nr;
3007                 if (s->nat_rule.ptr != NULL)
3008                         s->nat_rule.ptr->states++;
3009                 s->anchor.ptr = a;
3010                 s->allow_opts = r->allow_opts;
3011                 s->log = r->log & 2;
3012                 s->proto = IPPROTO_UDP;
3013                 s->direction = direction;
3014                 s->af = af;
3015                 if (direction == PF_OUT) {
3016                         PF_ACPY(&s->gwy.addr, saddr, af);
3017                         s->gwy.port = uh->uh_sport;
3018                         PF_ACPY(&s->ext.addr, daddr, af);
3019                         s->ext.port = uh->uh_dport;
3020                         if (nr != NULL) {
3021                                 PF_ACPY(&s->lan.addr, &pd->baddr, af);
3022                                 s->lan.port = bport;
3023                         } else {
3024                                 PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
3025                                 s->lan.port = s->gwy.port;
3026                         }
3027                 } else {
3028                         PF_ACPY(&s->lan.addr, daddr, af);
3029                         s->lan.port = uh->uh_dport;
3030                         PF_ACPY(&s->ext.addr, saddr, af);
3031                         s->ext.port = uh->uh_sport;
3032                         if (nr != NULL) {
3033                                 PF_ACPY(&s->gwy.addr, &pd->baddr, af);
3034                                 s->gwy.port = bport;
3035                         } else {
3036                                 PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
3037                                 s->gwy.port = s->lan.port;
3038                         }
3039                 }
3040                 s->src.state = PFUDPS_SINGLE;
3041                 s->dst.state = PFUDPS_NO_TRAFFIC;
3042                 s->creation = time_second;
3043                 s->expire = time_second;
3044                 s->timeout = PFTM_UDP_FIRST_PACKET;
3045                 pf_set_rt_ifp(s, saddr);
3046                 if (sn != NULL) {
3047                         s->src_node = sn;
3048                         s->src_node->states++;
3049                 }
3050                 if (nsn != NULL) {
3051                         PF_ACPY(&nsn->raddr, &pd->naddr, af);
3052                         s->nat_src_node = nsn;
3053                         s->nat_src_node->states++;
3054                 }
3055                 if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
3056                         REASON_SET(&reason, PFRES_MEMORY);
3057                         pf_src_tree_remove_state(s);
3058                         pool_put(&pf_state_pl, s);
3059                         return (PF_DROP);
3060                 } else
3061                         *sm = s;
3062         }
3063
3064         /* copy back packet headers if we performed NAT operations */
3065         if (rewrite)
3066                 m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
3067
3068         return (PF_PASS);
3069 }
3070
3071 int
3072 pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction,
3073     struct pfi_kif *kif, struct mbuf *m, int off, void *h,
3074     struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm)
3075 {
3076         struct pf_rule          *nr = NULL;
3077         struct pf_addr          *saddr = pd->src, *daddr = pd->dst;
3078         struct pf_rule          *r, *a = NULL;
3079         struct pf_ruleset       *ruleset = NULL;
3080         struct pf_src_node      *nsn = NULL;
3081         u_short                  reason;
3082         u_int16_t                icmpid = 0;
3083         sa_family_t              af = pd->af;
3084         u_int8_t                 icmptype = 0, icmpcode = 0;
3085         int                      state_icmp = 0;
3086         int                      tag = -1;
3087 #ifdef INET6
3088         int                      rewrite = 0;
3089 #endif /* INET6 */
3090
3091         switch (pd->proto) {
3092 #ifdef INET
3093         case IPPROTO_ICMP:
3094                 icmptype = pd->hdr.icmp->icmp_type;
3095                 icmpcode = pd->hdr.icmp->icmp_code;
3096                 icmpid = pd->hdr.icmp->icmp_id;
3097
3098                 if (icmptype == ICMP_UNREACH ||
3099                     icmptype == ICMP_SOURCEQUENCH ||
3100                     icmptype == ICMP_REDIRECT ||
3101                     icmptype == ICMP_TIMXCEED ||
3102                     icmptype == ICMP_PARAMPROB)
3103                         state_icmp++;
3104                 break;
3105 #endif /* INET */
3106 #ifdef INET6
3107         case IPPROTO_ICMPV6:
3108                 icmptype = pd->hdr.icmp6->icmp6_type;
3109                 icmpcode = pd->hdr.icmp6->icmp6_code;
3110                 icmpid = pd->hdr.icmp6->icmp6_id;
3111
3112                 if (icmptype == ICMP6_DST_UNREACH ||
3113                     icmptype == ICMP6_PACKET_TOO_BIG ||
3114                     icmptype == ICMP6_TIME_EXCEEDED ||
3115                     icmptype == ICMP6_PARAM_PROB)
3116                         state_icmp++;
3117                 break;
3118 #endif /* INET6 */
3119         }
3120
3121         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3122
3123         if (direction == PF_OUT) {
3124                 /* check outgoing packet for BINAT/NAT */
3125                 if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
3126                     saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
3127                         PF_ACPY(&pd->baddr, saddr, af);
3128                         switch (af) {
3129 #ifdef INET
3130                         case AF_INET:
3131                                 pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
3132                                     pd->naddr.v4.s_addr, 0);
3133                                 break;
3134 #endif /* INET */
3135 #ifdef INET6
3136                         case AF_INET6:
3137                                 pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
3138                                     &pd->naddr, 0);
3139                                 rewrite++;
3140                                 break;
3141 #endif /* INET6 */
3142                         }
3143                         if (nr->natpass)
3144                                 r = NULL;
3145                         pd->nat_rule = nr;
3146                 }
3147         } else {
3148                 /* check incoming packet for BINAT/RDR */
3149                 if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
3150                     saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
3151                         PF_ACPY(&pd->baddr, daddr, af);
3152                         switch (af) {
3153 #ifdef INET
3154                         case AF_INET:
3155                                 pf_change_a(&daddr->v4.s_addr,
3156                                     pd->ip_sum, pd->naddr.v4.s_addr, 0);
3157                                 break;
3158 #endif /* INET */
3159 #ifdef INET6
3160                         case AF_INET6:
3161                                 pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
3162                                     &pd->naddr, 0);
3163                                 rewrite++;
3164                                 break;
3165 #endif /* INET6 */
3166                         }
3167                         if (nr->natpass)
3168                                 r = NULL;
3169                         pd->nat_rule = nr;
3170                 }
3171         }
3172
3173         while (r != NULL) {
3174                 r->evaluations++;
3175                 if (r->kif != NULL &&
3176                     (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
3177                         r = r->skip[PF_SKIP_IFP].ptr;
3178                 else if (r->direction && r->direction != direction)
3179                         r = r->skip[PF_SKIP_DIR].ptr;
3180                 else if (r->af && r->af != af)
3181                         r = r->skip[PF_SKIP_AF].ptr;
3182                 else if (r->proto && r->proto != pd->proto)
3183                         r = r->skip[PF_SKIP_PROTO].ptr;
3184                 else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.not))
3185                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3186                 else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.not))
3187                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
3188                 else if (r->type && r->type != icmptype + 1)
3189                         r = TAILQ_NEXT(r, entries);
3190                 else if (r->code && r->code != icmpcode + 1)
3191                         r = TAILQ_NEXT(r, entries);
3192                 else if (r->tos && !(r->tos & pd->tos))
3193                         r = TAILQ_NEXT(r, entries);
3194                 else if (r->rule_flag & PFRULE_FRAGMENT)
3195                         r = TAILQ_NEXT(r, entries);
3196                 else if (r->match_tag && !pf_match_tag(m, r, nr, &tag))
3197                         r = TAILQ_NEXT(r, entries);
3198                 else if (r->anchorname[0] && r->anchor == NULL)
3199                         r = TAILQ_NEXT(r, entries);
3200                 else if (r->os_fingerprint != PF_OSFP_ANY)
3201                         r = TAILQ_NEXT(r, entries);
3202                 else {
3203                         if (r->tag)
3204                                 tag = r->tag;
3205                         if (r->anchor == NULL) {
3206                                 *rm = r;
3207                                 *am = a;
3208                                 *rsm = ruleset;
3209                                 if ((*rm)->quick)
3210                                         break;
3211                                 r = TAILQ_NEXT(r, entries);
3212                         } else
3213                                 PF_STEP_INTO_ANCHOR(r, a, ruleset,
3214                                     PF_RULESET_FILTER);
3215                 }
3216                 if (r == NULL && a != NULL)
3217                         PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
3218                             PF_RULESET_FILTER);
3219         }
3220         r = *rm;
3221         a = *am;
3222         ruleset = *rsm;
3223
3224         REASON_SET(&reason, PFRES_MATCH);
3225
3226         if (r->log) {
3227 #ifdef INET6
3228                 if (rewrite)
3229                         m_copyback(m, off, sizeof(struct icmp6_hdr),
3230                             (caddr_t)pd->hdr.icmp6);
3231 #endif /* INET6 */
3232                 PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
3233         }
3234
3235         if (r->action != PF_PASS)
3236                 return (PF_DROP);
3237
3238         pf_tag_packet(m, tag);
3239
3240         if (!state_icmp && (r->keep_state || nr != NULL)) {
3241                 /* create new state */
3242                 struct pf_state *s = NULL;
3243                 struct pf_src_node *sn = NULL;
3244
3245                 /* check maximums */
3246                 if (r->max_states && (r->states >= r->max_states))
3247                         goto cleanup;
3248                 /* src node for flter rule */
3249                 if ((r->rule_flag & PFRULE_SRCTRACK ||
3250                     r->rpool.opts & PF_POOL_STICKYADDR) &&
3251                     pf_insert_src_node(&sn, r, saddr, af) != 0)
3252                         goto cleanup;
3253                 /* src node for translation rule */
3254                 if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3255                     ((direction == PF_OUT &&
3256                     pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
3257                     (pf_insert_src_node(&nsn, nr, saddr, af) != 0)))
3258                         goto cleanup;
3259                 s = pool_get(&pf_state_pl, PR_NOWAIT);
3260                 if (s == NULL) {
3261 cleanup:
3262                         if (sn != NULL && sn->states == 0 && sn->expire == 0) {
3263                                 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
3264                                 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3265                                 pf_status.src_nodes--;
3266                                 pool_put(&pf_src_tree_pl, sn);
3267                         }
3268                         if (nsn != sn && nsn != NULL && nsn->states == 0 &&
3269                             nsn->expire == 0) {
3270                                 RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
3271                                 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3272                                 pf_status.src_nodes--;
3273                                 pool_put(&pf_src_tree_pl, nsn);
3274                         }
3275                         REASON_SET(&reason, PFRES_MEMORY);
3276                         return (PF_DROP);
3277                 }
3278                 bzero(s, sizeof(*s));
3279                 r->states++;
3280                 if (a != NULL)
3281                         a->states++;
3282                 s->rule.ptr = r;
3283                 s->nat_rule.ptr = nr;
3284                 if (s->nat_rule.ptr != NULL)
3285                         s->nat_rule.ptr->states++;
3286                 s->anchor.ptr = a;
3287                 s->allow_opts = r->allow_opts;
3288                 s->log = r->log & 2;
3289                 s->proto = pd->proto;
3290                 s->direction = direction;
3291                 s->af = af;
3292                 if (direction == PF_OUT) {
3293                         PF_ACPY(&s->gwy.addr, saddr, af);
3294                         s->gwy.port = icmpid;
3295                         PF_ACPY(&s->ext.addr, daddr, af);
3296                         s->ext.port = icmpid;
3297                         if (nr != NULL)
3298                                 PF_ACPY(&s->lan.addr, &pd->baddr, af);
3299                         else
3300                                 PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
3301                         s->lan.port = icmpid;
3302                 } else {
3303                         PF_ACPY(&s->lan.addr, daddr, af);
3304                         s->lan.port = icmpid;
3305                         PF_ACPY(&s->ext.addr, saddr, af);
3306                         s->ext.port = icmpid;
3307                         if (nr != NULL)
3308                                 PF_ACPY(&s->gwy.addr, &pd->baddr, af);
3309                         else
3310                                 PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
3311                         s->gwy.port = icmpid;
3312                 }
3313                 s->creation = time_second;
3314                 s->expire = time_second;
3315                 s->timeout = PFTM_ICMP_FIRST_PACKET;
3316                 pf_set_rt_ifp(s, saddr);
3317                 if (sn != NULL) {
3318                         s->src_node = sn;
3319                         s->src_node->states++;
3320                 }
3321                 if (nsn != NULL) {
3322                         PF_ACPY(&nsn->raddr, &pd->naddr, af);
3323                         s->nat_src_node = nsn;
3324                         s->nat_src_node->states++;
3325                 }
3326                 if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
3327                         REASON_SET(&reason, PFRES_MEMORY);
3328                         pf_src_tree_remove_state(s);
3329                         pool_put(&pf_state_pl, s);
3330                         return (PF_DROP);
3331                 } else
3332                         *sm = s;
3333         }
3334
3335 #ifdef INET6
3336         /* copy back packet headers if we performed IPv6 NAT operations */
3337         if (rewrite)
3338                 m_copyback(m, off, sizeof(struct icmp6_hdr),
3339                     (caddr_t)pd->hdr.icmp6);
3340 #endif /* INET6 */
3341
3342         return (PF_PASS);
3343 }
3344
3345 int
3346 pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction,
3347     struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
3348     struct pf_rule **am, struct pf_ruleset **rsm)
3349 {
3350         struct pf_rule          *nr = NULL;
3351         struct pf_rule          *r, *a = NULL;
3352         struct pf_ruleset       *ruleset = NULL;
3353         struct pf_src_node      *nsn = NULL;
3354         struct pf_addr          *saddr = pd->src, *daddr = pd->dst;
3355         sa_family_t              af = pd->af;
3356         u_short                  reason;
3357         int                      tag = -1;
3358
3359         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3360
3361         if (direction == PF_OUT) {
3362                 /* check outgoing packet for BINAT/NAT */
3363                 if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
3364                     saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
3365                         PF_ACPY(&pd->baddr, saddr, af);
3366                         switch (af) {
3367 #ifdef INET
3368                         case AF_INET:
3369                                 pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
3370                                     pd->naddr.v4.s_addr, 0);
3371                                 break;
3372 #endif /* INET */
3373 #ifdef INET6
3374                         case AF_INET6:
3375                                 PF_ACPY(saddr, &pd->naddr, af);
3376                                 break;
3377 #endif /* INET6 */
3378                         }
3379                         if (nr->natpass)
3380                                 r = NULL;
3381                         pd->nat_rule = nr;
3382                 }
3383         } else {
3384                 /* check incoming packet for BINAT/RDR */
3385                 if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
3386                     saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
3387                         PF_ACPY(&pd->baddr, daddr, af);
3388                         switch (af) {
3389 #ifdef INET
3390                         case AF_INET:
3391                                 pf_change_a(&daddr->v4.s_addr,
3392                                     pd->ip_sum, pd->naddr.v4.s_addr, 0);
3393                                 break;
3394 #endif /* INET */
3395 #ifdef INET6
3396                         case AF_INET6:
3397                                 PF_ACPY(daddr, &pd->naddr, af);
3398                                 break;
3399 #endif /* INET6 */
3400                         }
3401                         if (nr->natpass)
3402                                 r = NULL;
3403                         pd->nat_rule = nr;
3404                 }
3405         }
3406
3407         while (r != NULL) {
3408                 r->evaluations++;
3409                 if (r->kif != NULL &&
3410                     (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
3411                         r = r->skip[PF_SKIP_IFP].ptr;
3412                 else if (r->direction && r->direction != direction)
3413                         r = r->skip[PF_SKIP_DIR].ptr;
3414                 else if (r->af && r->af != af)
3415                         r = r->skip[PF_SKIP_AF].ptr;
3416                 else if (r->proto && r->proto != pd->proto)
3417                         r = r->skip[PF_SKIP_PROTO].ptr;
3418                 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.not))
3419                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3420                 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.not))
3421                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
3422                 else if (r->tos && !(r->tos & pd->tos))
3423                         r = TAILQ_NEXT(r, entries);
3424                 else if (r->rule_flag & PFRULE_FRAGMENT)
3425                         r = TAILQ_NEXT(r, entries);
3426                 else if (r->match_tag && !pf_match_tag(m, r, nr, &tag))
3427                         r = TAILQ_NEXT(r, entries);
3428                 else if (r->anchorname[0] && r->anchor == NULL)
3429                         r = TAILQ_NEXT(r, entries);
3430                 else if (r->os_fingerprint != PF_OSFP_ANY)
3431                         r = TAILQ_NEXT(r, entries);
3432                 else {
3433                         if (r->tag)
3434                                 tag = r->tag;
3435                         if (r->anchor == NULL) {
3436                                 *rm = r;
3437                                 *am = a;
3438                                 *rsm = ruleset;
3439                                 if ((*rm)->quick)
3440                                         break;
3441                                 r = TAILQ_NEXT(r, entries);
3442                         } else
3443                                 PF_STEP_INTO_ANCHOR(r, a, ruleset,
3444                                     PF_RULESET_FILTER);
3445                 }
3446                 if (r == NULL && a != NULL)
3447                         PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
3448                             PF_RULESET_FILTER);
3449         }
3450         r = *rm;
3451         a = *am;
3452         ruleset = *rsm;
3453
3454         REASON_SET(&reason, PFRES_MATCH);
3455
3456         if (r->log)
3457                 PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
3458
3459         if ((r->action == PF_DROP) &&
3460             ((r->rule_flag & PFRULE_RETURNICMP) ||
3461             (r->rule_flag & PFRULE_RETURN))) {
3462                 struct pf_addr *a = NULL;
3463
3464                 if (nr != NULL) {
3465                         if (direction == PF_OUT)
3466                                 a = saddr;
3467                         else
3468                                 a = daddr;
3469                 }
3470                 if (a != NULL) {
3471                         switch (af) {
3472 #ifdef INET
3473                         case AF_INET:
3474                                 pf_change_a(&a->v4.s_addr, pd->ip_sum,
3475                                     pd->baddr.v4.s_addr, 0);
3476                                 break;
3477 #endif /* INET */
3478 #ifdef INET6
3479                         case AF_INET6:
3480                                 PF_ACPY(a, &pd->baddr, af);
3481                                 break;
3482 #endif /* INET6 */
3483                         }
3484                 }
3485                 if ((af == AF_INET) && r->return_icmp)
3486                         pf_send_icmp(m, r->return_icmp >> 8,
3487                             r->return_icmp & 255, af, r);
3488                 else if ((af == AF_INET6) && r->return_icmp6)
3489                         pf_send_icmp(m, r->return_icmp6 >> 8,
3490                             r->return_icmp6 & 255, af, r);
3491         }
3492
3493         if (r->action != PF_PASS)
3494                 return (PF_DROP);
3495
3496         pf_tag_packet(m, tag);
3497
3498         if (r->keep_state || nr != NULL) {
3499                 /* create new state */
3500                 struct pf_state *s = NULL;
3501                 struct pf_src_node *sn = NULL;
3502
3503                 /* check maximums */
3504                 if (r->max_states && (r->states >= r->max_states))
3505                         goto cleanup;
3506                 /* src node for flter rule */
3507                 if ((r->rule_flag & PFRULE_SRCTRACK ||
3508                     r->rpool.opts & PF_POOL_STICKYADDR) &&
3509                     pf_insert_src_node(&sn, r, saddr, af) != 0)
3510                         goto cleanup;
3511                 /* src node for translation rule */
3512                 if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3513                     ((direction == PF_OUT &&
3514                     pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
3515                     (pf_insert_src_node(&nsn, nr, saddr, af) != 0)))
3516                         goto cleanup;
3517                 s = pool_get(&pf_state_pl, PR_NOWAIT);
3518                 if (s == NULL) {
3519 cleanup:
3520                         if (sn != NULL && sn->states == 0 && sn->expire == 0) {
3521                                 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
3522                                 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3523                                 pf_status.src_nodes--;
3524                                 pool_put(&pf_src_tree_pl, sn);
3525                         }
3526                         if (nsn != sn && nsn != NULL && nsn->states == 0 &&
3527                             nsn->expire == 0) {
3528                                 RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
3529                                 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3530                                 pf_status.src_nodes--;
3531                                 pool_put(&pf_src_tree_pl, nsn);
3532                         }
3533                         REASON_SET(&reason, PFRES_MEMORY);
3534                         return (PF_DROP);
3535                 }
3536                 bzero(s, sizeof(*s));
3537                 r->states++;
3538                 if (a != NULL)
3539                         a->states++;
3540                 s->rule.ptr = r;
3541                 s->nat_rule.ptr = nr;
3542                 if (s->nat_rule.ptr != NULL)
3543                         s->nat_rule.ptr->states++;
3544                 s->anchor.ptr = a;
3545                 s->allow_opts = r->allow_opts;
3546                 s->log = r->log & 2;
3547                 s->proto = pd->proto;
3548                 s->direction = direction;
3549                 s->af = af;
3550                 if (direction == PF_OUT) {
3551                         PF_ACPY(&s->gwy.addr, saddr, af);
3552                         PF_ACPY(&s->ext.addr, daddr, af);
3553                         if (nr != NULL)
3554                                 PF_ACPY(&s->lan.addr, &pd->baddr, af);
3555                         else
3556                                 PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
3557                 } else {
3558                         PF_ACPY(&s->lan.addr, daddr, af);
3559                         PF_ACPY(&s->ext.addr, saddr, af);
3560                         if (nr != NULL)
3561                                 PF_ACPY(&s->gwy.addr, &pd->baddr, af);
3562                         else
3563                                 PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
3564                 }
3565                 s->src.state = PFOTHERS_SINGLE;
3566                 s->dst.state = PFOTHERS_NO_TRAFFIC;
3567                 s->creation = time_second;
3568                 s->expire = time_second;
3569                 s->timeout = PFTM_OTHER_FIRST_PACKET;
3570                 pf_set_rt_ifp(s, saddr);
3571                 if (sn != NULL) {
3572                         s->src_node = sn;
3573                         s->src_node->states++;
3574                 }
3575                 if (nsn != NULL) {
3576                         PF_ACPY(&nsn->raddr, &pd->naddr, af);
3577                         s->nat_src_node = nsn;
3578                         s->nat_src_node->states++;
3579                 }
3580                 if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
3581                         REASON_SET(&reason, PFRES_MEMORY);
3582                         pf_src_tree_remove_state(s);
3583                         pool_put(&pf_state_pl, s);
3584                         return (PF_DROP);
3585                 } else
3586                         *sm = s;
3587         }
3588
3589         return (PF_PASS);
3590 }
3591
3592 int
3593 pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
3594     struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
3595     struct pf_ruleset **rsm)
3596 {
3597         struct pf_rule          *r, *a = NULL;
3598         struct pf_ruleset       *ruleset = NULL;
3599         sa_family_t              af = pd->af;
3600         u_short                  reason;
3601         int                      tag = -1;
3602
3603         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3604         while (r != NULL) {
3605                 r->evaluations++;
3606                 if (r->kif != NULL &&
3607                     (r->kif != kif && r->kif != kif->pfik_parent) == !r->ifnot)
3608                         r = r->skip[PF_SKIP_IFP].ptr;
3609                 else if (r->direction && r->direction != direction)
3610                         r = r->skip[PF_SKIP_DIR].ptr;
3611                 else if (r->af && r->af != af)
3612                         r = r->skip[PF_SKIP_AF].ptr;
3613                 else if (r->proto && r->proto != pd->proto)
3614                         r = r->skip[PF_SKIP_PROTO].ptr;
3615                 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.not))
3616                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3617                 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.not))
3618                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
3619                 else if (r->tos && !(r->tos & pd->tos))
3620                         r = TAILQ_NEXT(r, entries);
3621                 else if (r->src.port_op || r->dst.port_op ||
3622                     r->flagset || r->type || r->code ||
3623                     r->os_fingerprint != PF_OSFP_ANY)
3624                         r = TAILQ_NEXT(r, entries);
3625                 else if (r->match_tag && !pf_match_tag(m, r, NULL, &tag))
3626                         r = TAILQ_NEXT(r, entries);
3627                 else if (r->anchorname[0] && r->anchor == NULL)
3628                         r = TAILQ_NEXT(r, entries);
3629                 else {
3630                         if (r->anchor == NULL) {
3631                                 *rm = r;
3632                                 *am = a;
3633                                 *rsm = ruleset;
3634                                 if ((*rm)->quick)
3635                                         break;
3636                                 r = TAILQ_NEXT(r, entries);
3637                         } else
3638                                 PF_STEP_INTO_ANCHOR(r, a, ruleset,
3639                                     PF_RULESET_FILTER);
3640                 }
3641                 if (r == NULL && a != NULL)
3642                         PF_STEP_OUT_OF_ANCHOR(r, a, ruleset,
3643                             PF_RULESET_FILTER);
3644         }
3645         r = *rm;
3646         a = *am;
3647         ruleset = *rsm;
3648
3649         REASON_SET(&reason, PFRES_MATCH);
3650
3651         if (r->log)
3652                 PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset);
3653
3654         if (r->action != PF_PASS)
3655                 return (PF_DROP);
3656
3657   &