pf: Update packet filter to the version that comes with OpenBSD 4.1
[dragonfly.git] / sys / net / pf / pf.c
1 /*      $FreeBSD: src/sys/contrib/pf/net/pf.c,v 1.19 2004/09/11 11:18:25 mlaier Exp $   */
2 /*      $OpenBSD: pf.c,v 1.433.2.2 2004/07/17 03:22:34 brad Exp $ */
3 /* add  $OpenBSD: pf.c,v 1.448 2004/05/11 07:34:11 dhartmei Exp $ */
4 /*      $DragonFly: src/sys/net/pf/pf.c,v 1.20 2008/06/05 18:06:32 swildner Exp $ */
5 /*      $OpenBSD: pf.c,v 1.527 2007/02/22 15:23:23 pyr Exp $ */
6
7 /*
8  * Copyright (c) 2004 The DragonFly Project.  All rights reserved.
9  *
10  * Copyright (c) 2001 Daniel Hartmeier
11  * Copyright (c) 2002,2003 Henning Brauer
12  * All rights reserved.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  *
18  *    - Redistributions of source code must retain the above copyright
19  *      notice, this list of conditions and the following disclaimer.
20  *    - Redistributions in binary form must reproduce the above
21  *      copyright notice, this list of conditions and the following
22  *      disclaimer in the documentation and/or other materials provided
23  *      with the distribution.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
28  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
29  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
30  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
31  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
32  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
33  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
35  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  *
38  * Effort sponsored in part by the Defense Advanced Research Projects
39  * Agency (DARPA) and Air Force Research Laboratory, Air Force
40  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
41  *
42  */
43
44 #include "opt_inet.h"
45 #include "opt_inet6.h"
46 #include "use_pfsync.h"
47
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/filio.h>
53 #include <sys/socket.h>
54 #include <sys/socketvar.h>
55 #include <sys/kernel.h>
56 #include <sys/time.h>
57 #include <sys/sysctl.h>
58 #include <sys/endian.h>
59 #include <vm/vm_zone.h>
60 #include <sys/proc.h>
61 #include <sys/kthread.h>
62
63 #include <machine/inttypes.h>
64
65 #include <net/if.h>
66 #include <net/if_types.h>
67 #include <net/bpf.h>
68 #include <net/netisr.h>
69 #include <net/route.h>
70
71 #include <netinet/in.h>
72 #include <netinet/in_var.h>
73 #include <netinet/in_systm.h>
74 #include <netinet/ip.h>
75 #include <netinet/ip_var.h>
76 #include <netinet/tcp.h>
77 #include <netinet/tcp_seq.h>
78 #include <netinet/udp.h>
79 #include <netinet/ip_icmp.h>
80 #include <netinet/in_pcb.h>
81 #include <netinet/tcp_timer.h>
82 #include <netinet/tcp_var.h>
83 #include <netinet/udp_var.h>
84 #include <netinet/icmp_var.h>
85 #include <netinet/if_ether.h>
86
87 #include <net/pf/pfvar.h>
88 #include <net/pf/if_pflog.h>
89
90 #if NPFSYNC > 0
91 #include <net/pf/if_pfsync.h>
92 #endif /* NPFSYNC > 0 */
93
94 #ifdef INET6
95 #include <netinet/ip6.h>
96 #include <netinet/in_pcb.h>
97 #include <netinet/icmp6.h>
98 #include <netinet6/nd6.h>
99 #include <netinet6/ip6_var.h>
100 #include <netinet6/in6_pcb.h>
101 #endif /* INET6 */
102
103 #include <sys/in_cksum.h>
104 #include <sys/ucred.h>
105 #include <machine/limits.h>
106 #include <sys/msgport2.h>
107 #include <net/netmsg2.h>
108
109 extern int ip_optcopy(struct ip *, struct ip *);
110 extern int debug_pfugidhack;
111
112 #define DPFPRINTF(n, x) if (pf_status.debug >= (n)) kprintf x
113
114 /*
115  * Global variables
116  */
117
118 struct pf_altqqueue      pf_altqs[2];
119 struct pf_palist         pf_pabuf;
120 struct pf_altqqueue     *pf_altqs_active;
121 struct pf_altqqueue     *pf_altqs_inactive;
122 struct pf_status         pf_status;
123
124 u_int32_t                ticket_altqs_active;
125 u_int32_t                ticket_altqs_inactive;
126 int                      altqs_inactive_open;
127 u_int32_t                ticket_pabuf;
128
129 struct pf_anchor_stackframe {
130         struct pf_ruleset                       *rs;
131         struct pf_rule                          *r;
132         struct pf_anchor_node                   *parent;
133         struct pf_anchor                        *child;
134 } pf_anchor_stack[64];
135
136 vm_zone_t                pf_src_tree_pl, pf_rule_pl;
137 vm_zone_t                pf_state_pl, pf_altq_pl, pf_pooladdr_pl;
138
139 void                     pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
140
141 void                     pf_init_threshold(struct pf_threshold *, u_int32_t,
142                             u_int32_t);
143 void                     pf_add_threshold(struct pf_threshold *);
144 int                      pf_check_threshold(struct pf_threshold *);
145
146 void                     pf_change_ap(struct pf_addr *, u_int16_t *,
147                             u_int16_t *, u_int16_t *, struct pf_addr *,
148                             u_int16_t, u_int8_t, sa_family_t);
149 int                      pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *,
150                             struct tcphdr *, struct pf_state_peer *);
151 #ifdef INET6
152 void                     pf_change_a6(struct pf_addr *, u_int16_t *,
153                             struct pf_addr *, u_int8_t);
154 #endif /* INET6 */
155 void                     pf_change_icmp(struct pf_addr *, u_int16_t *,
156                             struct pf_addr *, struct pf_addr *, u_int16_t,
157                             u_int16_t *, u_int16_t *, u_int16_t *,
158                             u_int16_t *, u_int8_t, sa_family_t);
159 void                     pf_send_tcp(const struct pf_rule *, sa_family_t,
160                             const struct pf_addr *, const struct pf_addr *,
161                             u_int16_t, u_int16_t, u_int32_t, u_int32_t,
162                             u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
163                             u_int16_t, struct ether_header *, struct ifnet *);
164 void                     pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
165                             sa_family_t, struct pf_rule *);
166 struct pf_rule          *pf_match_translation(struct pf_pdesc *, struct mbuf *,
167                             int, int, struct pfi_kif *,
168                             struct pf_addr *, u_int16_t, struct pf_addr *,
169                             u_int16_t, int);
170 struct pf_rule          *pf_get_translation(struct pf_pdesc *, struct mbuf *,
171                             int, int, struct pfi_kif *, struct pf_src_node **,
172                             struct pf_addr *, u_int16_t,
173                             struct pf_addr *, u_int16_t,
174                             struct pf_addr *, u_int16_t *);
175 int                      pf_test_tcp(struct pf_rule **, struct pf_state **,
176                             int, struct pfi_kif *, struct mbuf *, int,
177                             void *, struct pf_pdesc *, struct pf_rule **,
178                             struct pf_ruleset **, struct ifqueue *, struct inpcb *);
179 int                      pf_test_udp(struct pf_rule **, struct pf_state **,
180                             int, struct pfi_kif *, struct mbuf *, int,
181                             void *, struct pf_pdesc *, struct pf_rule **,
182                             struct pf_ruleset **, struct ifqueue *, struct inpcb *);
183 int                      pf_test_icmp(struct pf_rule **, struct pf_state **,
184                             int, struct pfi_kif *, struct mbuf *, int,
185                             void *, struct pf_pdesc *, struct pf_rule **,
186                             struct pf_ruleset **, struct ifqueue *);
187 int                      pf_test_other(struct pf_rule **, struct pf_state **,
188                             int, struct pfi_kif *, struct mbuf *, int, void *,
189                             struct pf_pdesc *, struct pf_rule **,
190                             struct pf_ruleset **, struct ifqueue *);
191 int                      pf_test_fragment(struct pf_rule **, int,
192                             struct pfi_kif *, struct mbuf *, void *,
193                             struct pf_pdesc *, struct pf_rule **,
194                             struct pf_ruleset **);
195 int                      pf_test_state_tcp(struct pf_state **, int,
196                             struct pfi_kif *, struct mbuf *, int,
197                             void *, struct pf_pdesc *, u_short *);
198 int                      pf_test_state_udp(struct pf_state **, int,
199                             struct pfi_kif *, struct mbuf *, int,
200                             void *, struct pf_pdesc *);
201 int                      pf_test_state_icmp(struct pf_state **, int,
202                             struct pfi_kif *, struct mbuf *, int,
203                             void *, struct pf_pdesc *, u_short *);
204 int                      pf_test_state_other(struct pf_state **, int,
205                             struct pfi_kif *, struct pf_pdesc *);
206 int                      pf_match_tag(struct mbuf *, struct pf_rule *,
207                              struct pf_mtag *, int *);
208 int                      pf_step_out_of_anchor(int *, struct pf_ruleset **,
209                              int, struct pf_rule **, struct pf_rule **,
210                              int *);
211 void                     pf_hash(struct pf_addr *, struct pf_addr *,
212                             struct pf_poolhashkey *, sa_family_t);
213 int                      pf_map_addr(u_int8_t, struct pf_rule *,
214                             struct pf_addr *, struct pf_addr *,
215                             struct pf_addr *, struct pf_src_node **);
216 int                      pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *,
217                             struct pf_addr *, struct pf_addr *, u_int16_t,
218                             struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t,
219                             struct pf_src_node **);
220 void                     pf_route(struct mbuf **, struct pf_rule *, int,
221                             struct ifnet *, struct pf_state *,
222                             struct pf_pdesc *);
223 void                     pf_route6(struct mbuf **, struct pf_rule *, int,
224                             struct ifnet *, struct pf_state *,
225                             struct pf_pdesc *);
226 u_int8_t                 pf_get_wscale(struct mbuf *, int, u_int16_t,
227                             sa_family_t);
228 u_int16_t                pf_get_mss(struct mbuf *, int, u_int16_t,
229                             sa_family_t);
230 u_int16_t                pf_calc_mss(struct pf_addr *, sa_family_t,
231                                 u_int16_t);
232 void                     pf_set_rt_ifp(struct pf_state *,
233                             struct pf_addr *);
234 int                      pf_check_proto_cksum(struct mbuf *, int, int,
235                             u_int8_t, sa_family_t);
236 int                      pf_addr_wrap_neq(struct pf_addr_wrap *,
237                             struct pf_addr_wrap *);
238 struct pf_state         *pf_find_state_recurse(struct pfi_kif *,
239                             struct pf_state_cmp *, u_int8_t);
240 int                      pf_src_connlimit(struct pf_state **);
241 int                      pf_check_congestion(struct ifqueue *);
242
243 extern int pf_end_threads;
244
245 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
246         { &pf_state_pl, PFSTATE_HIWAT },
247         { &pf_src_tree_pl, PFSNODE_HIWAT },
248         { &pf_frent_pl, PFFRAG_FRENT_HIWAT },
249         { &pfr_ktable_pl, PFR_KTABLE_HIWAT },
250         { &pfr_kentry_pl, PFR_KENTRY_HIWAT }
251 };
252
253 #define STATE_LOOKUP()                                                  \
254         do {                                                            \
255                 if (direction == PF_IN)                                 \
256                         *state = pf_find_state_recurse(                 \
257                             kif, &key, PF_EXT_GWY);                     \
258                 else                                                    \
259                         *state = pf_find_state_recurse(         \
260                             kif, &key, PF_LAN_EXT);                     \
261                 if (*state == NULL || (*state)->timeout == PFTM_PURGE)  \
262                         return (PF_DROP);                               \
263                 if (direction == PF_OUT &&                              \
264                     (((*state)->rule.ptr->rt == PF_ROUTETO &&           \
265                     (*state)->rule.ptr->direction == PF_OUT) ||         \
266                     ((*state)->rule.ptr->rt == PF_REPLYTO &&            \
267                     (*state)->rule.ptr->direction == PF_IN)) &&         \
268                     (*state)->rt_kif != NULL &&                         \
269                     (*state)->rt_kif != kif)                            \
270                         return (PF_PASS);                               \
271         } while (0)
272
273 #define STATE_TRANSLATE(s) \
274         (s)->lan.addr.addr32[0] != (s)->gwy.addr.addr32[0] || \
275         ((s)->af == AF_INET6 && \
276         ((s)->lan.addr.addr32[1] != (s)->gwy.addr.addr32[1] || \
277         (s)->lan.addr.addr32[2] != (s)->gwy.addr.addr32[2] || \
278         (s)->lan.addr.addr32[3] != (s)->gwy.addr.addr32[3])) || \
279         (s)->lan.port != (s)->gwy.port
280
281 #define BOUND_IFACE(r, k) \
282         ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all
283
284 #define STATE_INC_COUNTERS(s)                           \
285         do {                                            \
286                 s->rule.ptr->states++;                  \
287                 if (s->anchor.ptr != NULL)              \
288                         s->anchor.ptr->states++;        \
289                 if (s->nat_rule.ptr != NULL)            \
290                         s->nat_rule.ptr->states++;      \
291         } while (0)
292
293 #define STATE_DEC_COUNTERS(s)                           \
294         do {                                            \
295                 if (s->nat_rule.ptr != NULL)            \
296                         s->nat_rule.ptr->states--;      \
297                 if (s->anchor.ptr != NULL)              \
298                         s->anchor.ptr->states--;        \
299                 s->rule.ptr->states--;                  \
300         } while (0)
301
302 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
303 static __inline int pf_state_compare_lan_ext(struct pf_state *,
304         struct pf_state *);
305 static __inline int pf_state_compare_ext_gwy(struct pf_state *,
306         struct pf_state *);
307 static __inline int pf_state_compare_id(struct pf_state *,
308         struct pf_state *);
309
310 struct pf_src_tree tree_src_tracking;
311
312 struct pf_state_tree_id tree_id;
313 struct pf_state_queue state_list;
314
315 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
316 RB_GENERATE(pf_state_tree_lan_ext, pf_state,
317     u.s.entry_lan_ext, pf_state_compare_lan_ext);
318 RB_GENERATE(pf_state_tree_ext_gwy, pf_state,
319     u.s.entry_ext_gwy, pf_state_compare_ext_gwy);
320 RB_GENERATE(pf_state_tree_id, pf_state,
321     u.s.entry_id, pf_state_compare_id);
322
323 static __inline int
324 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
325 {
326         int     diff;
327
328         if (a->rule.ptr > b->rule.ptr)
329                 return (1);
330         if (a->rule.ptr < b->rule.ptr)
331                 return (-1);
332         if ((diff = a->af - b->af) != 0)
333                 return (diff);
334         switch (a->af) {
335 #ifdef INET
336         case AF_INET:
337                 if (a->addr.addr32[0] > b->addr.addr32[0])
338                         return (1);
339                 if (a->addr.addr32[0] < b->addr.addr32[0])
340                         return (-1);
341                 break;
342 #endif /* INET */
343 #ifdef INET6
344         case AF_INET6:
345                 if (a->addr.addr32[3] > b->addr.addr32[3])
346                         return (1);
347                 if (a->addr.addr32[3] < b->addr.addr32[3])
348                         return (-1);
349                 if (a->addr.addr32[2] > b->addr.addr32[2])
350                         return (1);
351                 if (a->addr.addr32[2] < b->addr.addr32[2])
352                         return (-1);
353                 if (a->addr.addr32[1] > b->addr.addr32[1])
354                         return (1);
355                 if (a->addr.addr32[1] < b->addr.addr32[1])
356                         return (-1);
357                 if (a->addr.addr32[0] > b->addr.addr32[0])
358                         return (1);
359                 if (a->addr.addr32[0] < b->addr.addr32[0])
360                         return (-1);
361                 break;
362 #endif /* INET6 */
363         }
364         return (0);
365 }
366
367 u_int32_t
368 pf_state_hash(struct pf_state *s)
369 {
370         u_int32_t hv = (intptr_t)s / sizeof(*s);
371
372         hv ^= crc32(&s->lan, sizeof(s->lan));
373         hv ^= crc32(&s->gwy, sizeof(s->gwy));
374         hv ^= crc32(&s->ext, sizeof(s->ext));
375         if (hv == 0)    /* disallow 0 */
376                 hv = 1;
377         return(hv);
378 }
379
380 static __inline int
381 pf_state_compare_lan_ext(struct pf_state *a, struct pf_state *b)
382 {
383         int     diff;
384
385         if ((diff = a->proto - b->proto) != 0)
386                 return (diff);
387         if ((diff = a->af - b->af) != 0)
388                 return (diff);
389         switch (a->af) {
390 #ifdef INET
391         case AF_INET:
392                 if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
393                         return (1);
394                 if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
395                         return (-1);
396                 if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
397                         return (1);
398                 if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
399                         return (-1);
400                 break;
401 #endif /* INET */
402 #ifdef INET6
403         case AF_INET6:
404                 if (a->lan.addr.addr32[3] > b->lan.addr.addr32[3])
405                         return (1);
406                 if (a->lan.addr.addr32[3] < b->lan.addr.addr32[3])
407                         return (-1);
408                 if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
409                         return (1);
410                 if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
411                         return (-1);
412                 if (a->lan.addr.addr32[2] > b->lan.addr.addr32[2])
413                         return (1);
414                 if (a->lan.addr.addr32[2] < b->lan.addr.addr32[2])
415                         return (-1);
416                 if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
417                         return (1);
418                 if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
419                         return (-1);
420                 if (a->lan.addr.addr32[1] > b->lan.addr.addr32[1])
421                         return (1);
422                 if (a->lan.addr.addr32[1] < b->lan.addr.addr32[1])
423                         return (-1);
424                 if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
425                         return (1);
426                 if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
427                         return (-1);
428                 if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
429                         return (1);
430                 if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
431                         return (-1);
432                 if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
433                         return (1);
434                 if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
435                         return (-1);
436                 break;
437 #endif /* INET6 */
438         }
439
440         if ((diff = a->lan.port - b->lan.port) != 0)
441                 return (diff);
442         if ((diff = a->ext.port - b->ext.port) != 0)
443                 return (diff);
444
445         return (0);
446 }
447
448 static __inline int
449 pf_state_compare_ext_gwy(struct pf_state *a, struct pf_state *b)
450 {
451         int     diff;
452
453         if ((diff = a->proto - b->proto) != 0)
454                 return (diff);
455         if ((diff = a->af - b->af) != 0)
456                 return (diff);
457         switch (a->af) {
458 #ifdef INET
459         case AF_INET:
460                 if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
461                         return (1);
462                 if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
463                         return (-1);
464                 if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
465                         return (1);
466                 if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
467                         return (-1);
468                 break;
469 #endif /* INET */
470 #ifdef INET6
471         case AF_INET6:
472                 if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
473                         return (1);
474                 if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
475                         return (-1);
476                 if (a->gwy.addr.addr32[3] > b->gwy.addr.addr32[3])
477                         return (1);
478                 if (a->gwy.addr.addr32[3] < b->gwy.addr.addr32[3])
479                         return (-1);
480                 if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
481                         return (1);
482                 if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
483                         return (-1);
484                 if (a->gwy.addr.addr32[2] > b->gwy.addr.addr32[2])
485                         return (1);
486                 if (a->gwy.addr.addr32[2] < b->gwy.addr.addr32[2])
487                         return (-1);
488                 if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
489                         return (1);
490                 if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
491                         return (-1);
492                 if (a->gwy.addr.addr32[1] > b->gwy.addr.addr32[1])
493                         return (1);
494                 if (a->gwy.addr.addr32[1] < b->gwy.addr.addr32[1])
495                         return (-1);
496                 if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
497                         return (1);
498                 if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
499                         return (-1);
500                 if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
501                         return (1);
502                 if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
503                         return (-1);
504                 break;
505 #endif /* INET6 */
506         }
507
508         if ((diff = a->ext.port - b->ext.port) != 0)
509                 return (diff);
510         if ((diff = a->gwy.port - b->gwy.port) != 0)
511                 return (diff);
512
513         return (0);
514 }
515
516 static __inline int
517 pf_state_compare_id(struct pf_state *a, struct pf_state *b)
518 {
519         if (a->id > b->id)
520                 return (1);
521         if (a->id < b->id)
522                 return (-1);
523         if (a->creatorid > b->creatorid)
524                 return (1);
525         if (a->creatorid < b->creatorid)
526                 return (-1);
527
528         return (0);
529 }
530
531 #ifdef INET6
532 void
533 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
534 {
535         switch (af) {
536 #ifdef INET
537         case AF_INET:
538                 dst->addr32[0] = src->addr32[0];
539                 break;
540 #endif /* INET */
541         case AF_INET6:
542                 dst->addr32[0] = src->addr32[0];
543                 dst->addr32[1] = src->addr32[1];
544                 dst->addr32[2] = src->addr32[2];
545                 dst->addr32[3] = src->addr32[3];
546                 break;
547         }
548 }
549 #endif /* INET6 */
550
551 struct pf_state *
552 pf_find_state_byid(struct pf_state_cmp *key)
553 {
554         pf_status.fcounters[FCNT_STATE_SEARCH]++;
555         return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key));
556 }
557
558 struct pf_state *
559 pf_find_state_recurse(struct pfi_kif *kif, struct pf_state_cmp *key, u_int8_t tree)
560 {
561         struct pf_state *s;
562
563         pf_status.fcounters[FCNT_STATE_SEARCH]++;
564
565         switch (tree) {
566         case PF_LAN_EXT:
567                 if ((s = RB_FIND(pf_state_tree_lan_ext, &kif->pfik_lan_ext,
568                     (struct pf_state *)key)) != NULL)
569                         return (s);
570                 if ((s = RB_FIND(pf_state_tree_lan_ext, &pfi_all->pfik_lan_ext,
571                     (struct pf_state *)key)) != NULL)
572                         return (s);
573                 return (NULL);
574         case PF_EXT_GWY:
575                 if ((s = RB_FIND(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy,
576                     (struct pf_state *)key)) != NULL)
577                         return (s);
578                 if ((s = RB_FIND(pf_state_tree_ext_gwy, &pfi_all->pfik_ext_gwy,
579                     (struct pf_state *)key)) != NULL)
580                         return (s);
581                 return (NULL);
582         default:
583                 panic("pf_find_state_recurse");
584         }
585 }
586
587 struct pf_state *
588 pf_find_state_all(struct pf_state_cmp *key, u_int8_t tree, int *more)
589 {
590         struct pf_state *s, *ss = NULL;
591         struct pfi_kif  *kif;
592
593         pf_status.fcounters[FCNT_STATE_SEARCH]++;
594
595         switch (tree) {
596         case PF_LAN_EXT:
597                 TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) {
598                         s = RB_FIND(pf_state_tree_lan_ext,
599                             &kif->pfik_lan_ext, (struct pf_state *)key);
600                         if (s == NULL)
601                                 continue;
602                         if (more == NULL)
603                                 return (s);
604                         ss = s;
605                         (*more)++;
606                 }
607                 return (ss);
608         case PF_EXT_GWY:
609                 TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) {
610                         s = RB_FIND(pf_state_tree_ext_gwy,
611                             &kif->pfik_ext_gwy, (struct pf_state *)key);
612                         if (s == NULL)
613                                 continue;
614                         if (more == NULL)
615                                 return (s);
616                         ss = s;
617                         (*more)++;
618                 }
619                 return (ss);
620         default:
621                 panic("pf_find_state_all");
622         }
623 }
624
625 void
626 pf_init_threshold(struct pf_threshold *threshold,
627     u_int32_t limit, u_int32_t seconds)
628 {
629         threshold->limit = limit * PF_THRESHOLD_MULT;
630         threshold->seconds = seconds;
631         threshold->count = 0;
632         threshold->last = time_second;
633 }
634
635 void
636 pf_add_threshold(struct pf_threshold *threshold)
637 {
638         u_int32_t t = time_second, diff = t - threshold->last;
639
640         if (diff >= threshold->seconds)
641                 threshold->count = 0;
642         else
643                 threshold->count -= threshold->count * diff /
644                     threshold->seconds;
645         threshold->count += PF_THRESHOLD_MULT;
646         threshold->last = t;
647 }
648
649 int
650 pf_check_threshold(struct pf_threshold *threshold)
651 {
652         return (threshold->count > threshold->limit);
653 }
654
655 int
656 pf_src_connlimit(struct pf_state **state)
657 {
658         struct pf_state *s;
659         int bad = 0;
660
661         (*state)->src_node->conn++;
662         (*state)->src.tcp_est = 1;
663         pf_add_threshold(&(*state)->src_node->conn_rate);
664
665         if ((*state)->rule.ptr->max_src_conn &&
666             (*state)->rule.ptr->max_src_conn <
667             (*state)->src_node->conn) {
668                 pf_status.lcounters[LCNT_SRCCONN]++;
669                 bad++;
670         }
671
672         if ((*state)->rule.ptr->max_src_conn_rate.limit &&
673             pf_check_threshold(&(*state)->src_node->conn_rate)) {
674                 pf_status.lcounters[LCNT_SRCCONNRATE]++;
675                 bad++;
676         }
677
678         if (!bad)
679                 return (0);
680
681         if ((*state)->rule.ptr->overload_tbl) {
682                 struct pfr_addr p;
683                 u_int32_t       killed = 0;
684
685                 pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
686                 if (pf_status.debug >= PF_DEBUG_MISC) {
687                         kprintf("pf_src_connlimit: blocking address ");
688                         pf_print_host(&(*state)->src_node->addr, 0,
689                             (*state)->af);
690                 }
691
692                 bzero(&p, sizeof(p));
693                 p.pfra_af = (*state)->af;
694                 switch ((*state)->af) {
695 #ifdef INET
696                 case AF_INET:
697                         p.pfra_net = 32;
698                         p.pfra_ip4addr = (*state)->src_node->addr.v4;
699                         break;
700 #endif /* INET */
701 #ifdef INET6
702                 case AF_INET6:
703                         p.pfra_net = 128;
704                         p.pfra_ip6addr = (*state)->src_node->addr.v6;
705                         break;
706 #endif /* INET6 */
707                 }
708
709                 pfr_insert_kentry((*state)->rule.ptr->overload_tbl,
710                     &p, time_second);
711
712                 /* kill existing states if that's required. */
713                 if ((*state)->rule.ptr->flush) {
714                         pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
715
716                         RB_FOREACH(s, pf_state_tree_id, &tree_id) {
717                                 /*
718                                  * Kill states from this source.  (Only those
719                                  * from the same rule if PF_FLUSH_GLOBAL is not
720                                  * set)
721                                  */
722                                 if (s->af == (*state)->af &&
723                                     (((*state)->direction == PF_OUT &&
724                                     PF_AEQ(&(*state)->src_node->addr,
725                                     &s->lan.addr, s->af)) ||
726                                     ((*state)->direction == PF_IN &&
727                                     PF_AEQ(&(*state)->src_node->addr,
728                                     &s->ext.addr, s->af))) &&
729                                     ((*state)->rule.ptr->flush &
730                                     PF_FLUSH_GLOBAL ||
731                                     (*state)->rule.ptr == s->rule.ptr)) {
732                                         s->timeout = PFTM_PURGE;
733                                         s->src.state = s->dst.state =
734                                             TCPS_CLOSED;
735                                         killed++;
736                                 }
737                         }
738                         if (pf_status.debug >= PF_DEBUG_MISC)
739                                 kprintf(", %u states killed", killed);
740                 }
741                 if (pf_status.debug >= PF_DEBUG_MISC)
742                         kprintf("\n");
743         }
744
745         /* kill this state */
746         (*state)->timeout = PFTM_PURGE;
747         (*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
748         return (1);
749 }
750
751 int
752 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
753     struct pf_addr *src, sa_family_t af)
754 {
755         struct pf_src_node      k;
756
757         if (*sn == NULL) {
758                 k.af = af;
759                 PF_ACPY(&k.addr, src, af);
760                 if (rule->rule_flag & PFRULE_RULESRCTRACK ||
761                     rule->rpool.opts & PF_POOL_STICKYADDR)
762                         k.rule.ptr = rule;
763                 else
764                         k.rule.ptr = NULL;
765                 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
766                 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
767         }
768         if (*sn == NULL) {
769                 if (!rule->max_src_nodes ||
770                     rule->src_nodes < rule->max_src_nodes)
771                         (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT);
772                 else
773                         pf_status.lcounters[LCNT_SRCNODES]++;
774                 if ((*sn) == NULL)
775                         return (-1);
776                 bzero(*sn, sizeof(struct pf_src_node));
777
778                 pf_init_threshold(&(*sn)->conn_rate,
779                     rule->max_src_conn_rate.limit,
780                     rule->max_src_conn_rate.seconds);
781
782                 (*sn)->af = af;
783                 if (rule->rule_flag & PFRULE_RULESRCTRACK ||
784                     rule->rpool.opts & PF_POOL_STICKYADDR)
785                         (*sn)->rule.ptr = rule;
786                 else
787                         (*sn)->rule.ptr = NULL;
788                 PF_ACPY(&(*sn)->addr, src, af);
789                 if (RB_INSERT(pf_src_tree,
790                     &tree_src_tracking, *sn) != NULL) {
791                         if (pf_status.debug >= PF_DEBUG_MISC) {
792                                 kprintf("pf: src_tree insert failed: ");
793                                 pf_print_host(&(*sn)->addr, 0, af);
794                                 kprintf("\n");
795                         }
796                         pool_put(&pf_src_tree_pl, *sn);
797                         return (-1);
798                 }
799                 (*sn)->creation = time_second;
800                 (*sn)->ruletype = rule->action;
801                 if ((*sn)->rule.ptr != NULL)
802                         (*sn)->rule.ptr->src_nodes++;
803                 pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
804                 pf_status.src_nodes++;
805         } else {
806                 if (rule->max_src_states &&
807                     (*sn)->states >= rule->max_src_states) {
808                         pf_status.lcounters[LCNT_SRCSTATES]++;
809                         return (-1);
810                 }
811         }
812         return (0);
813 }
814
815 int
816 pf_insert_state(struct pfi_kif *kif, struct pf_state *state)
817 {
818         /* Thou MUST NOT insert multiple duplicate keys */
819         state->u.s.kif = kif;
820         if (RB_INSERT(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state)) {
821                 if (pf_status.debug >= PF_DEBUG_MISC) {
822                         kprintf("pf: state insert failed: tree_lan_ext");
823                         kprintf(" lan: ");
824                         pf_print_host(&state->lan.addr, state->lan.port,
825                             state->af);
826                         kprintf(" gwy: ");
827                         pf_print_host(&state->gwy.addr, state->gwy.port,
828                             state->af);
829                         kprintf(" ext: ");
830                         pf_print_host(&state->ext.addr, state->ext.port,
831                             state->af);
832                         if (state->sync_flags & PFSTATE_FROMSYNC)
833                                 kprintf(" (from sync)");
834                         kprintf("\n");
835                 }
836                 return (-1);
837         }
838
839         if (RB_INSERT(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state)) {
840                 if (pf_status.debug >= PF_DEBUG_MISC) {
841                         kprintf("pf: state insert failed: tree_ext_gwy");
842                         kprintf(" lan: ");
843                         pf_print_host(&state->lan.addr, state->lan.port,
844                             state->af);
845                         kprintf(" gwy: ");
846                         pf_print_host(&state->gwy.addr, state->gwy.port,
847                             state->af);
848                         kprintf(" ext: ");
849                         pf_print_host(&state->ext.addr, state->ext.port,
850                             state->af);
851                         if (state->sync_flags & PFSTATE_FROMSYNC)
852                                 kprintf(" (from sync)");
853                         kprintf("\n");
854                 }
855                 RB_REMOVE(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state);
856                 return (-1);
857         }
858
859         if (state->id == 0 && state->creatorid == 0) {
860                 state->id = htobe64(pf_status.stateid++);
861                 state->creatorid = pf_status.hostid;
862         }
863         if (RB_INSERT(pf_state_tree_id, &tree_id, state) != NULL) {
864                 if (pf_status.debug >= PF_DEBUG_MISC) {
865                         kprintf("pf: state insert failed: "
866                             "id: %016" PRIx64 " creatorid: %08" PRIx32,
867                             be64toh(state->id), ntohl(state->creatorid));
868                         if (state->sync_flags & PFSTATE_FROMSYNC)
869                                 kprintf(" (from sync)");
870                         kprintf("\n");
871                 }
872                 RB_REMOVE(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state);
873                 RB_REMOVE(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state);
874                 return (-1);
875         }
876         TAILQ_INSERT_TAIL(&state_list, state, u.s.entry_list);
877         pf_status.fcounters[FCNT_STATE_INSERT]++;
878         pf_status.states++;
879         pfi_kif_ref(kif, PFI_KIF_REF_STATE);
880 #if NPFSYNC
881         pfsync_insert_state(state);
882 #endif
883         return (0);
884 }
885
886 void
887 pf_purge_thread(void *v)
888 {
889         int nloops = 0;
890         int locked = 0;
891
892         for (;;) {
893                 tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz);
894
895                 lockmgr(&pf_consistency_lock, LK_EXCLUSIVE);
896
897                 if (pf_end_threads) {
898                         pf_purge_expired_states(pf_status.states, 1);
899                         pf_purge_expired_fragments();
900                         pf_purge_expired_src_nodes(1);
901                         pf_end_threads++;
902
903                         lockmgr(&pf_consistency_lock, LK_RELEASE);
904                         wakeup(pf_purge_thread);
905                         kthread_exit();
906                 }
907                 crit_enter();
908
909                 /* process a fraction of the state table every second */
910                 if(!pf_purge_expired_states(1 + (pf_status.states
911                     / pf_default_rule.timeout[PFTM_INTERVAL]), 0)) {
912
913                         pf_purge_expired_states(1 + (pf_status.states
914                             / pf_default_rule.timeout[PFTM_INTERVAL]), 1);
915                 }
916
917                 /* purge other expired types every PFTM_INTERVAL seconds */
918                 if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
919                         pf_purge_expired_fragments();
920                         if (!pf_purge_expired_src_nodes(locked)) {
921                                 pf_purge_expired_src_nodes(1);
922                         }
923                         nloops = 0;
924                 }
925                 crit_exit();
926                 lockmgr(&pf_consistency_lock, LK_RELEASE);
927         }
928 }
929
930 u_int32_t
931 pf_state_expires(const struct pf_state *state)
932 {
933         u_int32_t       timeout;
934         u_int32_t       start;
935         u_int32_t       end;
936         u_int32_t       states;
937
938         /* handle all PFTM_* > PFTM_MAX here */
939         if (state->timeout == PFTM_PURGE)
940                 return (time_second);
941         if (state->timeout == PFTM_UNTIL_PACKET)
942                 return (0);
943         KKASSERT(state->timeout != PFTM_UNLINKED);
944         KASSERT((state->timeout < PFTM_MAX),
945                 ("pf_state_expires: timeout > PFTM_MAX"));
946         timeout = state->rule.ptr->timeout[state->timeout];
947         if (!timeout)
948                 timeout = pf_default_rule.timeout[state->timeout];
949         start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
950         if (start) {
951                 end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
952                 states = state->rule.ptr->states;
953         } else {
954                 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
955                 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
956                 states = pf_status.states;
957         }
958         if (end && states > start && start < end) {
959                 if (states < end)
960                         return (state->expire + timeout * (end - states) /
961                             (end - start));
962                 else
963                         return (time_second);
964         }
965         return (state->expire + timeout);
966 }
967
968 int
969 pf_purge_expired_src_nodes(int waslocked)
970 {
971          struct pf_src_node             *cur, *next;
972          int                             locked = waslocked;
973
974          for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
975                  next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
976
977                  if (cur->states <= 0 && cur->expire <= time_second) {
978                          if (! locked) {
979                                  lockmgr(&pf_consistency_lock, LK_EXCLUSIVE);
980                                  next = RB_NEXT(pf_src_tree,
981                                      &tree_src_tracking, cur);
982                                  locked = 1;
983                          }
984                          if (cur->rule.ptr != NULL) {
985                                  cur->rule.ptr->src_nodes--;
986                                  if (cur->rule.ptr->states <= 0 &&
987                                      cur->rule.ptr->max_src_nodes <= 0)
988                                          pf_rm_rule(NULL, cur->rule.ptr);
989                          }
990                          RB_REMOVE(pf_src_tree, &tree_src_tracking, cur);
991                          pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
992                          pf_status.src_nodes--;
993                          pool_put(&pf_src_tree_pl, cur);
994                  }
995          }
996
997          if (locked && !waslocked)
998                 lockmgr(&pf_consistency_lock, LK_RELEASE);
999         return(1);
1000 }
1001
1002 void
1003 pf_src_tree_remove_state(struct pf_state *s)
1004 {
1005         u_int32_t timeout;
1006
1007         if (s->src_node != NULL) {
1008                 if (s->proto == IPPROTO_TCP) {
1009                         if (s->src.tcp_est)
1010                                 --s->src_node->conn;
1011                 }
1012                 if (--s->src_node->states <= 0) {
1013                         timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
1014                         if (!timeout)
1015                                 timeout =
1016                                     pf_default_rule.timeout[PFTM_SRC_NODE];
1017                         s->src_node->expire = time_second + timeout;
1018                 }
1019         }
1020         if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
1021                 if (--s->nat_src_node->states <= 0) {
1022                         timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
1023                         if (!timeout)
1024                                 timeout =
1025                                     pf_default_rule.timeout[PFTM_SRC_NODE];
1026                         s->nat_src_node->expire = time_second + timeout;
1027                 }
1028         }
1029         s->src_node = s->nat_src_node = NULL;
1030 }
1031
1032 /* callers should be at crit_enter() */
1033 void
1034 pf_unlink_state(struct pf_state *cur)
1035 {
1036         if (cur->src.state == PF_TCPS_PROXY_DST) {
1037                 pf_send_tcp(cur->rule.ptr, cur->af,
1038                     &cur->ext.addr, &cur->lan.addr,
1039                     cur->ext.port, cur->lan.port,
1040                     cur->src.seqhi, cur->src.seqlo + 1,
1041                     TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL);
1042         }
1043         RB_REMOVE(pf_state_tree_ext_gwy,
1044             &cur->u.s.kif->pfik_ext_gwy, cur);
1045         RB_REMOVE(pf_state_tree_lan_ext,
1046             &cur->u.s.kif->pfik_lan_ext, cur);
1047         RB_REMOVE(pf_state_tree_id, &tree_id, cur);
1048 #if NPFSYNC
1049         if (cur->creatorid == pf_status.hostid)
1050                 pfsync_delete_state(cur);
1051 #endif
1052         cur->timeout = PFTM_UNLINKED;
1053         pf_src_tree_remove_state(cur);
1054 }
1055
1056 /* callers should be at crit_enter() and hold the
1057  * write_lock on pf_consistency_lock */
1058 void
1059 pf_free_state(struct pf_state *cur)
1060 {
1061 #if NPFSYNC
1062         if (pfsyncif != NULL &&
1063             (pfsyncif->sc_bulk_send_next == cur ||
1064             pfsyncif->sc_bulk_terminator == cur))
1065                 return;
1066 #endif
1067         KKASSERT(cur->timeout == PFTM_UNLINKED);
1068         if (--cur->rule.ptr->states <= 0 &&
1069             cur->rule.ptr->src_nodes <= 0)
1070                 pf_rm_rule(NULL, cur->rule.ptr);
1071         if (cur->nat_rule.ptr != NULL)
1072                 if (--cur->nat_rule.ptr->states <= 0 &&
1073                         cur->nat_rule.ptr->src_nodes <= 0)
1074                         pf_rm_rule(NULL, cur->nat_rule.ptr);
1075         if (cur->anchor.ptr != NULL)
1076                 if (--cur->anchor.ptr->states <= 0)
1077                         pf_rm_rule(NULL, cur->anchor.ptr);
1078         pf_normalize_tcp_cleanup(cur);
1079         pfi_kif_unref(cur->u.s.kif, PFI_KIF_REF_STATE);
1080         TAILQ_REMOVE(&state_list, cur, u.s.entry_list);
1081         if (cur->tag)
1082                 pf_tag_unref(cur->tag);
1083         pool_put(&pf_state_pl, cur);
1084         pf_status.fcounters[FCNT_STATE_REMOVALS]++;
1085         pf_status.states--;
1086 }
1087
1088 int
1089 pf_purge_expired_states(u_int32_t maxcheck, int waslocked)
1090 {
1091         static struct pf_state  *cur = NULL;
1092         struct pf_state         *next;
1093         int                      locked = waslocked;
1094
1095         while (maxcheck--) {
1096                 /* wrap to start of list when we hit the end */
1097                 if (cur == NULL) {
1098                         cur = TAILQ_FIRST(&state_list);
1099                         if (cur == NULL)
1100                                 break;  /* list empty */
1101                 }
1102
1103                 /* get next state, as cur may get deleted */
1104                 next = TAILQ_NEXT(cur, u.s.entry_list);
1105
1106                 if (cur->timeout == PFTM_UNLINKED) {
1107                         /* free unlinked state */
1108                         if (! locked) {
1109                                 lockmgr(&pf_consistency_lock, LK_EXCLUSIVE);
1110                                 locked = 1;
1111                         }
1112                         pf_free_state(cur);
1113                 } else if (pf_state_expires(cur) <= time_second) {
1114                         /* unlink and free expired state */
1115                         pf_unlink_state(cur);
1116                         if (! locked) {
1117                                 if (!lockmgr(&pf_consistency_lock, LK_EXCLUSIVE))
1118                                         return (0);
1119                                 locked = 1;
1120                         }
1121                         pf_free_state(cur);
1122                 }
1123                 cur = next;
1124         }
1125
1126         if (locked)
1127                 lockmgr(&pf_consistency_lock, LK_RELEASE);
1128         return (1);
1129 }
1130
1131 int
1132 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
1133 {
1134         if (aw->type != PF_ADDR_TABLE)
1135                 return (0);
1136         if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL)
1137                 return (1);
1138         return (0);
1139 }
1140
1141 void
1142 pf_tbladdr_remove(struct pf_addr_wrap *aw)
1143 {
1144         if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL)
1145                 return;
1146         pfr_detach_table(aw->p.tbl);
1147         aw->p.tbl = NULL;
1148 }
1149
1150 void
1151 pf_tbladdr_copyout(struct pf_addr_wrap *aw)
1152 {
1153         struct pfr_ktable *kt = aw->p.tbl;
1154
1155         if (aw->type != PF_ADDR_TABLE || kt == NULL)
1156                 return;
1157         if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
1158                 kt = kt->pfrkt_root;
1159         aw->p.tbl = NULL;
1160         aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
1161                 kt->pfrkt_cnt : -1;
1162 }
1163
1164 void
1165 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
1166 {
1167         switch (af) {
1168 #ifdef INET
1169         case AF_INET: {
1170                 u_int32_t a = ntohl(addr->addr32[0]);
1171                 kprintf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
1172                     (a>>8)&255, a&255);
1173                 if (p) {
1174                         p = ntohs(p);
1175                         kprintf(":%u", p);
1176                 }
1177                 break;
1178         }
1179 #endif /* INET */
1180 #ifdef INET6
1181         case AF_INET6: {
1182                 u_int16_t b;
1183                 u_int8_t i, curstart = 255, curend = 0,
1184                     maxstart = 0, maxend = 0;
1185                 for (i = 0; i < 8; i++) {
1186                         if (!addr->addr16[i]) {
1187                                 if (curstart == 255)
1188                                         curstart = i;
1189                                 else
1190                                         curend = i;
1191                         } else {
1192                                 if (curstart) {
1193                                         if ((curend - curstart) >
1194                                             (maxend - maxstart)) {
1195                                                 maxstart = curstart;
1196                                                 maxend = curend;
1197                                                 curstart = 255;
1198                                         }
1199                                 }
1200                         }
1201                 }
1202                 for (i = 0; i < 8; i++) {
1203                         if (i >= maxstart && i <= maxend) {
1204                                 if (maxend != 7) {
1205                                         if (i == maxstart)
1206                                                 kprintf(":");
1207                                 } else {
1208                                         if (i == maxend)
1209                                                 kprintf(":");
1210                                 }
1211                         } else {
1212                                 b = ntohs(addr->addr16[i]);
1213                                 kprintf("%x", b);
1214                                 if (i < 7)
1215                                         kprintf(":");
1216                         }
1217                 }
1218                 if (p) {
1219                         p = ntohs(p);
1220                         kprintf("[%u]", p);
1221                 }
1222                 break;
1223         }
1224 #endif /* INET6 */
1225         }
1226 }
1227
1228 void
1229 pf_print_state(struct pf_state *s)
1230 {
1231         switch (s->proto) {
1232         case IPPROTO_TCP:
1233                 kprintf("TCP ");
1234                 break;
1235         case IPPROTO_UDP:
1236                 kprintf("UDP ");
1237                 break;
1238         case IPPROTO_ICMP:
1239                 kprintf("ICMP ");
1240                 break;
1241         case IPPROTO_ICMPV6:
1242                 kprintf("ICMPV6 ");
1243                 break;
1244         default:
1245                 kprintf("%u ", s->proto);
1246                 break;
1247         }
1248         pf_print_host(&s->lan.addr, s->lan.port, s->af);
1249         kprintf(" ");
1250         pf_print_host(&s->gwy.addr, s->gwy.port, s->af);
1251         kprintf(" ");
1252         pf_print_host(&s->ext.addr, s->ext.port, s->af);
1253         kprintf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo,
1254             s->src.seqhi, s->src.max_win, s->src.seqdiff);
1255         if (s->src.wscale && s->dst.wscale)
1256                 kprintf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK);
1257         kprintf("]");
1258         kprintf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo,
1259             s->dst.seqhi, s->dst.max_win, s->dst.seqdiff);
1260         if (s->src.wscale && s->dst.wscale)
1261                 kprintf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK);
1262         kprintf("]");
1263         kprintf(" %u:%u", s->src.state, s->dst.state);
1264 }
1265
1266 void
1267 pf_print_flags(u_int8_t f)
1268 {
1269         if (f)
1270                 kprintf(" ");
1271         if (f & TH_FIN)
1272                 kprintf("F");
1273         if (f & TH_SYN)
1274                 kprintf("S");
1275         if (f & TH_RST)
1276                 kprintf("R");
1277         if (f & TH_PUSH)
1278                 kprintf("P");
1279         if (f & TH_ACK)
1280                 kprintf("A");
1281         if (f & TH_URG)
1282                 kprintf("U");
1283         if (f & TH_ECE)
1284                 kprintf("E");
1285         if (f & TH_CWR)
1286                 kprintf("W");
1287 }
1288
1289 #define PF_SET_SKIP_STEPS(i)                                    \
1290         do {                                                    \
1291                 while (head[i] != cur) {                        \
1292                         head[i]->skip[i].ptr = cur;             \
1293                         head[i] = TAILQ_NEXT(head[i], entries); \
1294                 }                                               \
1295         } while (0)
1296
1297 void
1298 pf_calc_skip_steps(struct pf_rulequeue *rules)
1299 {
1300         struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
1301         int i;
1302
1303         cur = TAILQ_FIRST(rules);
1304         prev = cur;
1305         for (i = 0; i < PF_SKIP_COUNT; ++i)
1306                 head[i] = cur;
1307         while (cur != NULL) {
1308
1309                 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
1310                         PF_SET_SKIP_STEPS(PF_SKIP_IFP);
1311                 if (cur->direction != prev->direction)
1312                         PF_SET_SKIP_STEPS(PF_SKIP_DIR);
1313                 if (cur->af != prev->af)
1314                         PF_SET_SKIP_STEPS(PF_SKIP_AF);
1315                 if (cur->proto != prev->proto)
1316                         PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
1317                 if (cur->src.neg != prev->src.neg ||
1318                     pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
1319                         PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
1320                 if (cur->src.port[0] != prev->src.port[0] ||
1321                     cur->src.port[1] != prev->src.port[1] ||
1322                     cur->src.port_op != prev->src.port_op)
1323                         PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
1324                 if (cur->dst.neg != prev->dst.neg ||
1325                     pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
1326                         PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
1327                 if (cur->dst.port[0] != prev->dst.port[0] ||
1328                     cur->dst.port[1] != prev->dst.port[1] ||
1329                     cur->dst.port_op != prev->dst.port_op)
1330                         PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
1331
1332                 prev = cur;
1333                 cur = TAILQ_NEXT(cur, entries);
1334         }
1335         for (i = 0; i < PF_SKIP_COUNT; ++i)
1336                 PF_SET_SKIP_STEPS(i);
1337 }
1338
1339 int
1340 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
1341 {
1342         if (aw1->type != aw2->type)
1343                 return (1);
1344         switch (aw1->type) {
1345         case PF_ADDR_ADDRMASK:
1346                 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0))
1347                         return (1);
1348                 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0))
1349                         return (1);
1350                 return (0);
1351         case PF_ADDR_DYNIFTL:
1352                 return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
1353         case PF_ADDR_NOROUTE:
1354         case PF_ADDR_URPFFAILED:
1355                 return (0);
1356         case PF_ADDR_TABLE:
1357                 return (aw1->p.tbl != aw2->p.tbl);
1358         case PF_ADDR_RTLABEL:
1359                 return (aw1->v.rtlabel != aw2->v.rtlabel);
1360         default:
1361                 kprintf("invalid address type: %d\n", aw1->type);
1362                 return (1);
1363         }
1364 }
1365
1366 u_int16_t
1367 pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
1368 {
1369         u_int32_t       l;
1370
1371         if (udp && !cksum)
1372                 return (0x0000);
1373         l = cksum + old - new;
1374         l = (l >> 16) + (l & 65535);
1375         l = l & 65535;
1376         if (udp && !l)
1377                 return (0xFFFF);
1378         return (l);
1379 }
1380
1381 void
1382 pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc,
1383     struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af)
1384 {
1385         struct pf_addr  ao;
1386         u_int16_t       po = *p;
1387
1388         PF_ACPY(&ao, a, af);
1389         PF_ACPY(a, an, af);
1390
1391         *p = pn;
1392
1393         switch (af) {
1394 #ifdef INET
1395         case AF_INET:
1396                 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1397                     ao.addr16[0], an->addr16[0], 0),
1398                     ao.addr16[1], an->addr16[1], 0);
1399                 *p = pn;
1400                 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1401                     ao.addr16[0], an->addr16[0], u),
1402                     ao.addr16[1], an->addr16[1], u),
1403                     po, pn, u);
1404                 break;
1405 #endif /* INET */
1406 #ifdef INET6
1407         case AF_INET6:
1408                 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1409                     pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1410                     pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1411                     ao.addr16[0], an->addr16[0], u),
1412                     ao.addr16[1], an->addr16[1], u),
1413                     ao.addr16[2], an->addr16[2], u),
1414                     ao.addr16[3], an->addr16[3], u),
1415                     ao.addr16[4], an->addr16[4], u),
1416                     ao.addr16[5], an->addr16[5], u),
1417                     ao.addr16[6], an->addr16[6], u),
1418                     ao.addr16[7], an->addr16[7], u),
1419                     po, pn, u);
1420                 break;
1421 #endif /* INET6 */
1422         }
1423 }
1424
1425
1426 /* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
1427 void
1428 pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
1429 {
1430         u_int32_t       ao;
1431
1432         memcpy(&ao, a, sizeof(ao));
1433         memcpy(a, &an, sizeof(u_int32_t));
1434         *c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
1435             ao % 65536, an % 65536, u);
1436 }
1437
1438 #ifdef INET6
1439 void
1440 pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
1441 {
1442         struct pf_addr  ao;
1443
1444         PF_ACPY(&ao, a, AF_INET6);
1445         PF_ACPY(a, an, AF_INET6);
1446
1447         *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1448             pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1449             pf_cksum_fixup(pf_cksum_fixup(*c,
1450             ao.addr16[0], an->addr16[0], u),
1451             ao.addr16[1], an->addr16[1], u),
1452             ao.addr16[2], an->addr16[2], u),
1453             ao.addr16[3], an->addr16[3], u),
1454             ao.addr16[4], an->addr16[4], u),
1455             ao.addr16[5], an->addr16[5], u),
1456             ao.addr16[6], an->addr16[6], u),
1457             ao.addr16[7], an->addr16[7], u);
1458 }
1459 #endif /* INET6 */
1460
1461 void
1462 pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
1463     struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
1464     u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
1465 {
1466         struct pf_addr  oia, ooa;
1467
1468         PF_ACPY(&oia, ia, af);
1469         PF_ACPY(&ooa, oa, af);
1470
1471         /* Change inner protocol port, fix inner protocol checksum. */
1472         if (ip != NULL) {
1473                 u_int16_t       oip = *ip;
1474                 u_int32_t       opc = 0;
1475
1476                 if (pc != NULL)
1477                         opc = *pc;
1478                 *ip = np;
1479                 if (pc != NULL)
1480                         *pc = pf_cksum_fixup(*pc, oip, *ip, u);
1481                 *ic = pf_cksum_fixup(*ic, oip, *ip, 0);
1482                 if (pc != NULL)
1483                         *ic = pf_cksum_fixup(*ic, opc, *pc, 0);
1484         }
1485         /* Change inner ip address, fix inner ip and icmp checksums. */
1486         PF_ACPY(ia, na, af);
1487         switch (af) {
1488 #ifdef INET
1489         case AF_INET: {
1490                 u_int32_t        oh2c = *h2c;
1491
1492                 *h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
1493                     oia.addr16[0], ia->addr16[0], 0),
1494                     oia.addr16[1], ia->addr16[1], 0);
1495                 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1496                     oia.addr16[0], ia->addr16[0], 0),
1497                     oia.addr16[1], ia->addr16[1], 0);
1498                 *ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
1499                 break;
1500         }
1501 #endif /* INET */
1502 #ifdef INET6
1503         case AF_INET6:
1504                 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1505                     pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1506                     pf_cksum_fixup(pf_cksum_fixup(*ic,
1507                     oia.addr16[0], ia->addr16[0], u),
1508                     oia.addr16[1], ia->addr16[1], u),
1509                     oia.addr16[2], ia->addr16[2], u),
1510                     oia.addr16[3], ia->addr16[3], u),
1511                     oia.addr16[4], ia->addr16[4], u),
1512                     oia.addr16[5], ia->addr16[5], u),
1513                     oia.addr16[6], ia->addr16[6], u),
1514                     oia.addr16[7], ia->addr16[7], u);
1515                 break;
1516 #endif /* INET6 */
1517         }
1518         /* Change outer ip address, fix outer ip or icmpv6 checksum. */
1519         PF_ACPY(oa, na, af);
1520         switch (af) {
1521 #ifdef INET
1522         case AF_INET:
1523                 *hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
1524                     ooa.addr16[0], oa->addr16[0], 0),
1525                     ooa.addr16[1], oa->addr16[1], 0);
1526                 break;
1527 #endif /* INET */
1528 #ifdef INET6
1529         case AF_INET6:
1530                 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1531                     pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1532                     pf_cksum_fixup(pf_cksum_fixup(*ic,
1533                     ooa.addr16[0], oa->addr16[0], u),
1534                     ooa.addr16[1], oa->addr16[1], u),
1535                     ooa.addr16[2], oa->addr16[2], u),
1536                     ooa.addr16[3], oa->addr16[3], u),
1537                     ooa.addr16[4], oa->addr16[4], u),
1538                     ooa.addr16[5], oa->addr16[5], u),
1539                     ooa.addr16[6], oa->addr16[6], u),
1540                     ooa.addr16[7], oa->addr16[7], u);
1541                 break;
1542 #endif /* INET6 */
1543         }
1544 }
1545
1546
1547 /*
1548  * Need to modulate the sequence numbers in the TCP SACK option
1549  * (credits to Krzysztof Pfaff for report and patch)
1550  */
1551 int
1552 pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
1553     struct tcphdr *th, struct pf_state_peer *dst)
1554 {
1555         int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
1556         u_int8_t opts[TCP_MAXOLEN], *opt = opts;
1557         int copyback = 0, i, olen;
1558         struct raw_sackblock sack;
1559
1560 #define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2)
1561         if (hlen < TCPOLEN_SACKLEN ||
1562             !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af))
1563                 return 0;
1564
1565         while (hlen >= TCPOLEN_SACKLEN) {
1566                 olen = opt[1];
1567                 switch (*opt) {
1568                 case TCPOPT_EOL:        /* FALLTHROUGH */
1569                 case TCPOPT_NOP:
1570                         opt++;
1571                         hlen--;
1572                         break;
1573                 case TCPOPT_SACK:
1574                         if (olen > hlen)
1575                                 olen = hlen;
1576                         if (olen >= TCPOLEN_SACKLEN) {
1577                                 for (i = 2; i + TCPOLEN_SACK <= olen;
1578                                     i += TCPOLEN_SACK) {
1579                                         memcpy(&sack, &opt[i], sizeof(sack));
1580                                         pf_change_a(&sack.rblk_start, &th->th_sum,
1581                                             htonl(ntohl(sack.rblk_start) -
1582                                             dst->seqdiff), 0);
1583                                         pf_change_a(&sack.rblk_end, &th->th_sum,
1584                                             htonl(ntohl(sack.rblk_end) -
1585                                             dst->seqdiff), 0);
1586                                         memcpy(&opt[i], &sack, sizeof(sack));
1587                                 }
1588                                 copyback = 1;
1589                         }
1590                         /* FALLTHROUGH */
1591                 default:
1592                         if (olen < 2)
1593                                 olen = 2;
1594                         hlen -= olen;
1595                         opt += olen;
1596                 }
1597         }
1598
1599         if (copyback)
1600                 m_copyback(m, off + sizeof(*th), thoptlen, opts);
1601         return (copyback);
1602 }
1603
1604 void
1605 pf_send_tcp(const struct pf_rule *r, sa_family_t af,
1606     const struct pf_addr *saddr, const struct pf_addr *daddr,
1607     u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
1608     u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
1609     u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp)
1610 {
1611         struct mbuf     *m;
1612         int              len = 0, tlen;
1613 #ifdef INET
1614         struct ip       *h = NULL;
1615 #endif /* INET */
1616 #ifdef INET6
1617         struct ip6_hdr  *h6 = NULL;
1618 #endif /* INET6 */
1619         struct tcphdr   *th = NULL;
1620         char            *opt;
1621         struct pf_mtag  *pf_mtag;
1622
1623         /* maximum segment size tcp option */
1624         tlen = sizeof(struct tcphdr);
1625         if (mss)
1626                 tlen += 4;
1627
1628         switch (af) {
1629 #ifdef INET
1630         case AF_INET:
1631                 len = sizeof(struct ip) + tlen;
1632                 break;
1633 #endif /* INET */
1634 #ifdef INET6
1635         case AF_INET6:
1636                 len = sizeof(struct ip6_hdr) + tlen;
1637                 break;
1638 #endif /* INET6 */
1639         }
1640
1641         /* create outgoing mbuf */
1642         m = m_gethdr(MB_DONTWAIT, MT_HEADER);
1643         if (m == NULL)
1644                 return;
1645         if ((pf_mtag = pf_get_mtag(m)) == NULL) {
1646                 m_freem(m);
1647                 return;
1648         }
1649         if (tag)
1650                 pf_mtag->flags |= PF_TAG_GENERATED;
1651
1652         pf_mtag->tag = rtag;
1653
1654         if (r != NULL && r->rtableid >= 0)
1655                 pf_mtag->rtableid = r->rtableid;
1656
1657 #ifdef ALTQ
1658         if (r != NULL && r->qid) {
1659                 pf_mtag->qid = r->qid;
1660                 /* add hints for ecn */
1661                 pf_mtag->af = af;
1662                 pf_mtag->hdr = mtod(m, struct ip *);
1663         }
1664 #endif /* ALTQ */
1665         m->m_data += max_linkhdr;
1666         m->m_pkthdr.len = m->m_len = len;
1667         m->m_pkthdr.rcvif = NULL;
1668         bzero(m->m_data, len);
1669         switch (af) {
1670 #ifdef INET
1671         case AF_INET:
1672                 h = mtod(m, struct ip *);
1673
1674                 /* IP header fields included in the TCP checksum */
1675                 h->ip_p = IPPROTO_TCP;
1676                 h->ip_len = tlen;
1677                 h->ip_src.s_addr = saddr->v4.s_addr;
1678                 h->ip_dst.s_addr = daddr->v4.s_addr;
1679
1680                 th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
1681                 break;
1682 #endif /* INET */
1683 #ifdef INET6
1684         case AF_INET6:
1685                 h6 = mtod(m, struct ip6_hdr *);
1686
1687                 /* IP header fields included in the TCP checksum */
1688                 h6->ip6_nxt = IPPROTO_TCP;
1689                 h6->ip6_plen = htons(tlen);
1690                 memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
1691                 memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
1692
1693                 th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
1694                 break;
1695 #endif /* INET6 */
1696         }
1697
1698         /* TCP header */
1699         th->th_sport = sport;
1700         th->th_dport = dport;
1701         th->th_seq = htonl(seq);
1702         th->th_ack = htonl(ack);
1703         th->th_off = tlen >> 2;
1704         th->th_flags = flags;
1705         th->th_win = htons(win);
1706
1707         if (mss) {
1708                 opt = (char *)(th + 1);
1709                 opt[0] = TCPOPT_MAXSEG;
1710                 opt[1] = 4;
1711                 mss = htons(mss);
1712                 bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
1713         }
1714
1715         switch (af) {
1716 #ifdef INET
1717         case AF_INET:
1718                 /* TCP checksum */
1719                 th->th_sum = in_cksum(m, len);
1720
1721                 /* Finish the IP header */
1722                 h->ip_v = 4;
1723                 h->ip_hl = sizeof(*h) >> 2;
1724                 h->ip_tos = IPTOS_LOWDELAY;
1725                 h->ip_len = len;
1726                 h->ip_off = path_mtu_discovery ? IP_DF : 0;
1727                 h->ip_ttl = ttl ? ttl : ip_defttl;
1728                 h->ip_sum = 0;
1729                 if (eh == NULL) {
1730                         ip_output(m, NULL, NULL, 0, NULL, NULL);
1731                 } else {
1732                         struct route             ro;
1733                         struct rtentry           rt;
1734                         struct ether_header     *e = (void *)ro.ro_dst.sa_data;
1735
1736                         if (ifp == NULL) {
1737                                 m_freem(m);
1738                                 return;
1739                         }
1740                         rt.rt_ifp = ifp;
1741                         ro.ro_rt = &rt;
1742                         ro.ro_dst.sa_len = sizeof(ro.ro_dst);
1743                         ro.ro_dst.sa_family = pseudo_AF_HDRCMPLT;
1744                         bcopy(eh->ether_dhost, e->ether_shost, ETHER_ADDR_LEN);
1745                         bcopy(eh->ether_shost, e->ether_dhost, ETHER_ADDR_LEN);
1746                         e->ether_type = eh->ether_type;
1747                         /* XXX_IMPORT: later */
1748                         ip_output(m, (void *)NULL, &ro, 0,
1749                             (void *)NULL, (void *)NULL);
1750                 }
1751                 break;
1752 #endif /* INET */
1753 #ifdef INET6
1754         case AF_INET6:
1755                 /* TCP checksum */
1756                 th->th_sum = in6_cksum(m, IPPROTO_TCP,
1757                     sizeof(struct ip6_hdr), tlen);
1758
1759                 h6->ip6_vfc |= IPV6_VERSION;
1760                 h6->ip6_hlim = IPV6_DEFHLIM;
1761
1762                 ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
1763                 break;
1764 #endif /* INET6 */
1765         }
1766 }
1767
1768 void
1769 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
1770     struct pf_rule *r)
1771 {
1772         struct pf_mtag  *pf_mtag;
1773         struct mbuf     *m0;
1774
1775         m0 = m_copy(m, 0, M_COPYALL);
1776
1777         if ((pf_mtag = pf_get_mtag(m0)) == NULL)
1778                 return;
1779         pf_mtag->flags |= PF_TAG_GENERATED;
1780
1781         if (r->rtableid >= 0)
1782                 pf_mtag->rtableid = r->rtableid;
1783
1784 #ifdef ALTQ
1785         if (r->qid) {
1786                 pf_mtag->qid = r->qid;
1787                 /* add hints for ecn */
1788                 pf_mtag->af = af;
1789                 pf_mtag->hdr = mtod(m0, struct ip *);
1790         }
1791 #endif /* ALTQ */
1792
1793         switch (af) {
1794 #ifdef INET
1795         case AF_INET:
1796                 icmp_error(m0, type, code, 0, 0);
1797                 break;
1798 #endif /* INET */
1799 #ifdef INET6
1800         case AF_INET6:
1801                 icmp6_error(m0, type, code, 0);
1802                 break;
1803 #endif /* INET6 */
1804         }
1805 }
1806
1807 /*
1808  * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
1809  * If n is 0, they match if they are equal. If n is != 0, they match if they
1810  * are different.
1811  */
1812 int
1813 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
1814     struct pf_addr *b, sa_family_t af)
1815 {
1816         int     match = 0;
1817
1818         switch (af) {
1819 #ifdef INET
1820         case AF_INET:
1821                 if ((a->addr32[0] & m->addr32[0]) ==
1822                     (b->addr32[0] & m->addr32[0]))
1823                         match++;
1824                 break;
1825 #endif /* INET */
1826 #ifdef INET6
1827         case AF_INET6:
1828                 if (((a->addr32[0] & m->addr32[0]) ==
1829                      (b->addr32[0] & m->addr32[0])) &&
1830                     ((a->addr32[1] & m->addr32[1]) ==
1831                      (b->addr32[1] & m->addr32[1])) &&
1832                     ((a->addr32[2] & m->addr32[2]) ==
1833                      (b->addr32[2] & m->addr32[2])) &&
1834                     ((a->addr32[3] & m->addr32[3]) ==
1835                      (b->addr32[3] & m->addr32[3])))
1836                         match++;
1837                 break;
1838 #endif /* INET6 */
1839         }
1840         if (match) {
1841                 if (n)
1842                         return (0);
1843                 else
1844                         return (1);
1845         } else {
1846                 if (n)
1847                         return (1);
1848                 else
1849                         return (0);
1850         }
1851 }
1852
1853 int
1854 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
1855 {
1856         switch (op) {
1857         case PF_OP_IRG:
1858                 return ((p > a1) && (p < a2));
1859         case PF_OP_XRG:
1860                 return ((p < a1) || (p > a2));
1861         case PF_OP_RRG:
1862                 return ((p >= a1) && (p <= a2));
1863         case PF_OP_EQ:
1864                 return (p == a1);
1865         case PF_OP_NE:
1866                 return (p != a1);
1867         case PF_OP_LT:
1868                 return (p < a1);
1869         case PF_OP_LE:
1870                 return (p <= a1);
1871         case PF_OP_GT:
1872                 return (p > a1);
1873         case PF_OP_GE:
1874                 return (p >= a1);
1875         }
1876         return (0); /* never reached */
1877 }
1878
1879 int
1880 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
1881 {
1882         a1 = ntohs(a1);
1883         a2 = ntohs(a2);
1884         p = ntohs(p);
1885         return (pf_match(op, a1, a2, p));
1886 }
1887
1888 int
1889 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
1890 {
1891         if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
1892                 return (0);
1893         return (pf_match(op, a1, a2, u));
1894 }
1895
1896 int
1897 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
1898 {
1899         if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
1900                 return (0);
1901         return (pf_match(op, a1, a2, g));
1902 }
1903
1904 struct pf_mtag *
1905 pf_find_mtag(struct mbuf *m)
1906 {
1907         struct m_tag    *mtag;
1908
1909         if ((mtag = m_tag_find(m, PF_MBUF_TAGGED, NULL)) == NULL)
1910                 return (NULL);
1911
1912         return ((struct pf_mtag *)(mtag + 1));
1913 }
1914
1915 struct pf_mtag *
1916 pf_get_mtag(struct mbuf *m)
1917 {
1918         struct m_tag    *mtag;
1919
1920         if ((mtag = m_tag_find(m, PF_MBUF_TAGGED, NULL)) == NULL) {
1921                 mtag = m_tag_get(PF_MBUF_TAGGED, sizeof(struct pf_mtag),
1922                     M_NOWAIT);
1923                 if (mtag == NULL)
1924                         return (NULL);
1925                 bzero(mtag + 1, sizeof(struct pf_mtag));
1926                 m_tag_prepend(m, mtag);
1927         }
1928
1929         return ((struct pf_mtag *)(mtag + 1));
1930 }
1931
1932 int
1933 pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_mtag *pf_mtag,
1934     int *tag)
1935 {
1936         if (*tag == -1)
1937                 *tag = pf_mtag->tag;
1938
1939         return ((!r->match_tag_not && r->match_tag == *tag) ||
1940             (r->match_tag_not && r->match_tag != *tag));
1941 }
1942
1943 int
1944 pf_tag_packet(struct mbuf *m, struct pf_mtag *pf_mtag, int tag, int rtableid)
1945 {
1946         if (tag <= 0 && rtableid < 0)
1947                 return (0);
1948
1949         if (pf_mtag == NULL)
1950                 if ((pf_mtag = pf_get_mtag(m)) == NULL)
1951                         return (1);
1952         if (tag > 0)
1953                 pf_mtag->tag = tag;
1954         if (rtableid >= 0)
1955                 pf_mtag->rtableid = rtableid;
1956
1957         return (0);
1958 }
1959
1960 static void
1961 pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n,
1962     struct pf_rule **r, struct pf_rule **a,  int *match)
1963 {
1964         struct pf_anchor_stackframe     *f;
1965
1966         (*r)->anchor->match = 0;
1967         if (match)
1968                 *match = 0;
1969         if (*depth >= sizeof(pf_anchor_stack) /
1970             sizeof(pf_anchor_stack[0])) {
1971                 kprintf("pf_step_into_anchor: stack overflow\n");
1972                 *r = TAILQ_NEXT(*r, entries);
1973                 return;
1974         } else if (*depth == 0 && a != NULL)
1975                 *a = *r;
1976         f = pf_anchor_stack + (*depth)++;
1977         f->rs = *rs;
1978         f->r = *r;
1979         if ((*r)->anchor_wildcard) {
1980                 f->parent = &(*r)->anchor->children;
1981                 if ((f->child = RB_MIN(pf_anchor_node, f->parent)) ==
1982                     NULL) {
1983                         *r = NULL;
1984                         return;
1985                 }
1986                 *rs = &f->child->ruleset;
1987         } else {
1988                 f->parent = NULL;
1989                 f->child = NULL;
1990                 *rs = &(*r)->anchor->ruleset;
1991         }
1992         *r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
1993 }
1994
1995 int
1996 pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n,
1997     struct pf_rule **r, struct pf_rule **a, int *match)
1998 {
1999         struct pf_anchor_stackframe     *f;
2000         int quick = 0;
2001
2002         do {
2003                 if (*depth <= 0)
2004                         break;
2005                 f = pf_anchor_stack + *depth - 1;
2006                 if (f->parent != NULL && f->child != NULL) {
2007                         if (f->child->match ||
2008                             (match != NULL && *match)) {
2009                                 f->r->anchor->match = 1;
2010                                 *match = 0;
2011                         }
2012                         f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
2013                         if (f->child != NULL) {
2014                                 *rs = &f->child->ruleset;
2015                                 *r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
2016                                 if (*r == NULL)
2017                                         continue;
2018                                 else
2019                                         break;
2020                         }
2021                 }
2022                 (*depth)--;
2023                 if (*depth == 0 && a != NULL)
2024                         *a = NULL;
2025                 *rs = f->rs;
2026                 if (f->r->anchor->match || (match  != NULL && *match))
2027                         quick = f->r->quick;
2028                 *r = TAILQ_NEXT(f->r, entries);
2029         } while (*r == NULL);
2030
2031         return (quick);
2032 }
2033
2034 #ifdef INET6
2035 void
2036 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
2037     struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
2038 {
2039         switch (af) {
2040 #ifdef INET
2041         case AF_INET:
2042                 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
2043                 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
2044                 break;
2045 #endif /* INET */
2046         case AF_INET6:
2047                 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
2048                 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
2049                 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
2050                 ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
2051                 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
2052                 ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
2053                 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
2054                 ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
2055                 break;
2056         }
2057 }
2058
2059 void
2060 pf_addr_inc(struct pf_addr *addr, sa_family_t af)
2061 {
2062         switch (af) {
2063 #ifdef INET
2064         case AF_INET:
2065                 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
2066                 break;
2067 #endif /* INET */
2068         case AF_INET6:
2069                 if (addr->addr32[3] == 0xffffffff) {
2070                         addr->addr32[3] = 0;
2071                         if (addr->addr32[2] == 0xffffffff) {
2072                                 addr->addr32[2] = 0;
2073                                 if (addr->addr32[1] == 0xffffffff) {
2074                                         addr->addr32[1] = 0;
2075                                         addr->addr32[0] =
2076                                             htonl(ntohl(addr->addr32[0]) + 1);
2077                                 } else
2078                                         addr->addr32[1] =
2079                                             htonl(ntohl(addr->addr32[1]) + 1);
2080                         } else
2081                                 addr->addr32[2] =
2082                                     htonl(ntohl(addr->addr32[2]) + 1);
2083                 } else
2084                         addr->addr32[3] =
2085                             htonl(ntohl(addr->addr32[3]) + 1);
2086                 break;
2087         }
2088 }
2089 #endif /* INET6 */
2090
2091 #define mix(a,b,c) \
2092         do {                                    \
2093                 a -= b; a -= c; a ^= (c >> 13); \
2094                 b -= c; b -= a; b ^= (a << 8);  \
2095                 c -= a; c -= b; c ^= (b >> 13); \
2096                 a -= b; a -= c; a ^= (c >> 12); \
2097                 b -= c; b -= a; b ^= (a << 16); \
2098                 c -= a; c -= b; c ^= (b >> 5);  \
2099                 a -= b; a -= c; a ^= (c >> 3);  \
2100                 b -= c; b -= a; b ^= (a << 10); \
2101                 c -= a; c -= b; c ^= (b >> 15); \
2102         } while (0)
2103
2104 /*
2105  * hash function based on bridge_hash in if_bridge.c
2106  */
2107 void
2108 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
2109     struct pf_poolhashkey *key, sa_family_t af)
2110 {
2111         u_int32_t       a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
2112
2113         switch (af) {
2114 #ifdef INET
2115         case AF_INET:
2116                 a += inaddr->addr32[0];
2117                 b += key->key32[1];
2118                 mix(a, b, c);
2119                 hash->addr32[0] = c + key->key32[2];
2120                 break;
2121 #endif /* INET */
2122 #ifdef INET6
2123         case AF_INET6:
2124                 a += inaddr->addr32[0];
2125                 b += inaddr->addr32[2];
2126                 mix(a, b, c);
2127                 hash->addr32[0] = c;
2128                 a += inaddr->addr32[1];
2129                 b += inaddr->addr32[3];
2130                 c += key->key32[1];
2131                 mix(a, b, c);
2132                 hash->addr32[1] = c;
2133                 a += inaddr->addr32[2];
2134                 b += inaddr->addr32[1];
2135                 c += key->key32[2];
2136                 mix(a, b, c);
2137                 hash->addr32[2] = c;
2138                 a += inaddr->addr32[3];
2139                 b += inaddr->addr32[0];
2140                 c += key->key32[3];
2141                 mix(a, b, c);
2142                 hash->addr32[3] = c;
2143                 break;
2144 #endif /* INET6 */
2145         }
2146 }
2147
2148 int
2149 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
2150     struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn)
2151 {
2152         unsigned char            hash[16];
2153         struct pf_pool          *rpool = &r->rpool;
2154         struct pf_addr          *raddr = &rpool->cur->addr.v.a.addr;
2155         struct pf_addr          *rmask = &rpool->cur->addr.v.a.mask;
2156         struct pf_pooladdr      *acur = rpool->cur;
2157         struct pf_src_node       k;
2158
2159         if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
2160             (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
2161                 k.af = af;
2162                 PF_ACPY(&k.addr, saddr, af);
2163                 if (r->rule_flag & PFRULE_RULESRCTRACK ||
2164                     r->rpool.opts & PF_POOL_STICKYADDR)
2165                         k.rule.ptr = r;
2166                 else
2167                         k.rule.ptr = NULL;
2168                 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
2169                 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
2170                 if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
2171                         PF_ACPY(naddr, &(*sn)->raddr, af);
2172                         if (pf_status.debug >= PF_DEBUG_MISC) {
2173                                 kprintf("pf_map_addr: src tracking maps ");
2174                                 pf_print_host(&k.addr, 0, af);
2175                                 kprintf(" to ");
2176                                 pf_print_host(naddr, 0, af);
2177                                 kprintf("\n");
2178                         }
2179                         return (0);
2180                 }
2181         }
2182
2183         if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
2184                 return (1);
2185         if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2186                 switch (af) {
2187 #ifdef INET
2188                 case AF_INET:
2189                         if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
2190                             (rpool->opts & PF_POOL_TYPEMASK) !=
2191                             PF_POOL_ROUNDROBIN)
2192                                 return (1);
2193                          raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
2194                          rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
2195                         break;
2196 #endif /* INET */
2197 #ifdef INET6
2198                 case AF_INET6:
2199                         if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
2200                             (rpool->opts & PF_POOL_TYPEMASK) !=
2201                             PF_POOL_ROUNDROBIN)
2202                                 return (1);
2203                         raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
2204                         rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
2205                         break;
2206 #endif /* INET6 */
2207                 }
2208         } else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2209                 if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
2210                         return (1); /* unsupported */
2211         } else {
2212                 raddr = &rpool->cur->addr.v.a.addr;
2213                 rmask = &rpool->cur->addr.v.a.mask;
2214         }
2215
2216         switch (rpool->opts & PF_POOL_TYPEMASK) {
2217         case PF_POOL_NONE:
2218                 PF_ACPY(naddr, raddr, af);
2219                 break;
2220         case PF_POOL_BITMASK:
2221                 PF_POOLMASK(naddr, raddr, rmask, saddr, af);
2222                 break;
2223         case PF_POOL_RANDOM:
2224                 if (init_addr != NULL && PF_AZERO(init_addr, af)) {
2225                         switch (af) {
2226 #ifdef INET
2227                         case AF_INET:
2228                                 rpool->counter.addr32[0] = htonl(karc4random());
2229                                 break;
2230 #endif /* INET */
2231 #ifdef INET6
2232                         case AF_INET6:
2233                                 if (rmask->addr32[3] != 0xffffffff)
2234                                         rpool->counter.addr32[3] =
2235                                             htonl(karc4random());
2236                                 else
2237                                         break;
2238                                 if (rmask->addr32[2] != 0xffffffff)
2239                                         rpool->counter.addr32[2] =
2240                                             htonl(karc4random());
2241                                 else
2242                                         break;
2243                                 if (rmask->addr32[1] != 0xffffffff)
2244                                         rpool->counter.addr32[1] =
2245                                             htonl(karc4random());
2246                                 else
2247                                         break;
2248                                 if (rmask->addr32[0] != 0xffffffff)
2249                                         rpool->counter.addr32[0] =
2250                                             htonl(karc4random());
2251                                 break;
2252 #endif /* INET6 */
2253                         }
2254                         PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
2255                         PF_ACPY(init_addr, naddr, af);
2256
2257                 } else {
2258                         PF_AINC(&rpool->counter, af);
2259                         PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
2260                 }
2261                 break;
2262         case PF_POOL_SRCHASH:
2263                 pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
2264                 PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
2265                 break;
2266         case PF_POOL_ROUNDROBIN:
2267                 if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2268                         if (!pfr_pool_get(rpool->cur->addr.p.tbl,
2269                             &rpool->tblidx, &rpool->counter,
2270                             &raddr, &rmask, af))
2271                                 goto get_addr;
2272                 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2273                         if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
2274                             &rpool->tblidx, &rpool->counter,
2275                             &raddr, &rmask, af))
2276                                 goto get_addr;
2277                 } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
2278                         goto get_addr;
2279
2280         try_next:
2281                 if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL)
2282                         rpool->cur = TAILQ_FIRST(&rpool->list);
2283                 if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2284                         rpool->tblidx = -1;
2285                         if (pfr_pool_get(rpool->cur->addr.p.tbl,
2286                             &rpool->tblidx, &rpool->counter,
2287                             &raddr, &rmask, af)) {
2288                                 /* table contains no address of type 'af' */
2289                                 if (rpool->cur != acur)
2290                                         goto try_next;
2291                                 return (1);
2292                         }
2293                 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2294                         rpool->tblidx = -1;
2295                         if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
2296                             &rpool->tblidx, &rpool->counter,
2297                             &raddr, &rmask, af)) {
2298                                 /* table contains no address of type 'af' */
2299                                 if (rpool->cur != acur)
2300                                         goto try_next;
2301                                 return (1);
2302                         }
2303                 } else {
2304                         raddr = &rpool->cur->addr.v.a.addr;
2305                         rmask = &rpool->cur->addr.v.a.mask;
2306                         PF_ACPY(&rpool->counter, raddr, af);
2307                 }
2308
2309         get_addr:
2310                 PF_ACPY(naddr, &rpool->counter, af);
2311                 if (init_addr != NULL && PF_AZERO(init_addr, af))
2312                         PF_ACPY(init_addr, naddr, af);
2313                 PF_AINC(&rpool->counter, af);
2314                 break;
2315         }
2316         if (*sn != NULL)
2317                 PF_ACPY(&(*sn)->raddr, naddr, af);
2318
2319         if (pf_status.debug >= PF_DEBUG_MISC &&
2320             (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
2321                 kprintf("pf_map_addr: selected address ");
2322                 pf_print_host(naddr, 0, af);
2323                 kprintf("\n");
2324         }
2325
2326         return (0);
2327 }
2328
2329 int
2330 pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
2331     struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport,
2332     struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
2333     struct pf_src_node **sn)
2334 {
2335         struct pf_state_cmp     key;
2336         struct pf_addr          init_addr;
2337         u_int16_t               cut;
2338
2339         bzero(&init_addr, sizeof(init_addr));
2340         if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
2341                 return (1);
2342
2343         if (proto == IPPROTO_ICMP) {
2344                 low = 1;
2345                 high = 65535;
2346         }
2347
2348         do {
2349                 key.af = af;
2350                 key.proto = proto;
2351                 PF_ACPY(&key.ext.addr, daddr, key.af);
2352                 PF_ACPY(&key.gwy.addr, naddr, key.af);
2353                 key.ext.port = dport;
2354
2355                 /*
2356                  * port search; start random, step;
2357                  * similar 2 portloop in in_pcbbind
2358                  */
2359                 if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
2360                     proto == IPPROTO_ICMP)) {
2361                         key.gwy.port = dport;
2362                         if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
2363                                 return (0);
2364                 } else if (low == 0 && high == 0) {
2365                         key.gwy.port = *nport;
2366                         if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
2367                                 return (0);
2368                 } else if (low == high) {
2369                         key.gwy.port = htons(low);
2370                         if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) {
2371                                 *nport = htons(low);
2372                                 return (0);
2373                         }
2374                 } else {
2375                         u_int16_t tmp;
2376
2377                         if (low > high) {
2378                                 tmp = low;
2379                                 low = high;
2380                                 high = tmp;
2381                         }
2382                         /* low < high */
2383                         cut = htonl(karc4random()) % (1 + high - low) + low;
2384                         /* low <= cut <= high */
2385                         for (tmp = cut; tmp <= high; ++(tmp)) {
2386                                 key.gwy.port = htons(tmp);
2387                                 if (pf_find_state_all(&key, PF_EXT_GWY, NULL) ==
2388                                     NULL) {
2389                                         *nport = htons(tmp);
2390                                         return (0);
2391                                 }
2392                         }
2393                         for (tmp = cut - 1; tmp >= low; --(tmp)) {
2394                                 key.gwy.port = htons(tmp);
2395                                 if (pf_find_state_all(&key, PF_EXT_GWY, NULL) ==
2396                                     NULL) {
2397                                         *nport = htons(tmp);
2398                                         return (0);
2399                                 }
2400                         }
2401                 }
2402
2403                 switch (r->rpool.opts & PF_POOL_TYPEMASK) {
2404                 case PF_POOL_RANDOM:
2405                 case PF_POOL_ROUNDROBIN:
2406                         if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
2407                                 return (1);
2408                         break;
2409                 case PF_POOL_NONE:
2410                 case PF_POOL_SRCHASH:
2411                 case PF_POOL_BITMASK:
2412                 default:
2413                         return (1);
2414                 }
2415         } while (! PF_AEQ(&init_addr, naddr, af) );
2416
2417         return (1);                                     /* none available */
2418 }
2419
2420 struct pf_rule *
2421 pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
2422     int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport,
2423     struct pf_addr *daddr, u_int16_t dport, int rs_num)
2424 {
2425         struct pf_rule          *r, *rm = NULL;
2426         struct pf_ruleset       *ruleset = NULL;
2427         int                      tag = -1;
2428         int                      rtableid = -1;
2429         int                      asd = 0;
2430
2431         r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
2432         while (r && rm == NULL) {
2433                 struct pf_rule_addr     *src = NULL, *dst = NULL;
2434                 struct pf_addr_wrap     *xdst = NULL;
2435
2436                 if (r->action == PF_BINAT && direction == PF_IN) {
2437                         src = &r->dst;
2438                         if (r->rpool.cur != NULL)
2439                                 xdst = &r->rpool.cur->addr;
2440                 } else {
2441                         src = &r->src;
2442                         dst = &r->dst;
2443                 }
2444
2445                 r->evaluations++;
2446                 if (pfi_kif_match(r->kif, kif) == r->ifnot)
2447                         r = r->skip[PF_SKIP_IFP].ptr;
2448                 else if (r->direction && r->direction != direction)
2449                         r = r->skip[PF_SKIP_DIR].ptr;
2450                 else if (r->af && r->af != pd->af)
2451                         r = r->skip[PF_SKIP_AF].ptr;
2452                 else if (r->proto && r->proto != pd->proto)
2453                         r = r->skip[PF_SKIP_PROTO].ptr;
2454                 else if (PF_MISMATCHAW(&src->addr, saddr, pd->af,
2455                     src->neg, kif))
2456                         r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
2457                             PF_SKIP_DST_ADDR].ptr;
2458                 else if (src->port_op && !pf_match_port(src->port_op,
2459                     src->port[0], src->port[1], sport))
2460                         r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
2461                             PF_SKIP_DST_PORT].ptr;
2462                 else if (dst != NULL &&
2463                     PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL))
2464                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
2465                 else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
2466                     0, NULL))
2467                         r = TAILQ_NEXT(r, entries);
2468                 else if (dst != NULL && dst->port_op &&
2469                     !pf_match_port(dst->port_op, dst->port[0],
2470                     dst->port[1], dport))
2471                         r = r->skip[PF_SKIP_DST_PORT].ptr;
2472                 else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
2473                         r = TAILQ_NEXT(r, entries);
2474                 else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
2475                     IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
2476                     off, pd->hdr.tcp), r->os_fingerprint)))
2477                         r = TAILQ_NEXT(r, entries);
2478                 else {
2479                         if (r->tag)
2480                                 tag = r->tag;
2481                         if (r->rtableid >= 0)
2482                                 rtableid = r->rtableid;
2483                         if (r->anchor == NULL) {
2484                                 rm = r;
2485                         } else
2486                                 pf_step_into_anchor(&asd, &ruleset, rs_num,
2487                                     &r, NULL, NULL);
2488                 }
2489                 if (r == NULL)
2490                         pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r,
2491                             NULL, NULL);
2492         }
2493         if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid))
2494                 return (NULL);
2495         if (rm != NULL && (rm->action == PF_NONAT ||
2496             rm->action == PF_NORDR || rm->action == PF_NOBINAT))
2497                 return (NULL);
2498         return (rm);
2499 }
2500
2501 struct pf_rule *
2502 pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
2503     struct pfi_kif *kif, struct pf_src_node **sn,
2504     struct pf_addr *saddr, u_int16_t sport,
2505     struct pf_addr *daddr, u_int16_t dport,
2506     struct pf_addr *naddr, u_int16_t *nport)
2507 {
2508         struct pf_rule  *r = NULL;
2509
2510         if (direction == PF_OUT) {
2511                 r = pf_match_translation(pd, m, off, direction, kif, saddr,
2512                     sport, daddr, dport, PF_RULESET_BINAT);
2513                 if (r == NULL)
2514                         r = pf_match_translation(pd, m, off, direction, kif,
2515                             saddr, sport, daddr, dport, PF_RULESET_NAT);
2516         } else {
2517                 r = pf_match_translation(pd, m, off, direction, kif, saddr,
2518                     sport, daddr, dport, PF_RULESET_RDR);
2519                 if (r == NULL)
2520                         r = pf_match_translation(pd, m, off, direction, kif,
2521                             saddr, sport, daddr, dport, PF_RULESET_BINAT);
2522         }
2523
2524         if (r != NULL) {
2525                 switch (r->action) {
2526                 case PF_NONAT:
2527                 case PF_NOBINAT:
2528                 case PF_NORDR:
2529                         return (NULL);
2530                 case PF_NAT:
2531                         if (pf_get_sport(pd->af, pd->proto, r, saddr,
2532                             daddr, dport, naddr, nport, r->rpool.proxy_port[0],
2533                             r->rpool.proxy_port[1], sn)) {
2534                                 DPFPRINTF(PF_DEBUG_MISC,
2535                                     ("pf: NAT proxy port allocation "
2536                                     "(%u-%u) failed\n",
2537                                     r->rpool.proxy_port[0],
2538                                     r->rpool.proxy_port[1]));
2539                                 return (NULL);
2540                         }
2541                         break;
2542                 case PF_BINAT:
2543                         switch (direction) {
2544                         case PF_OUT:
2545                                 if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
2546                                         switch (pd->af) {
2547 #ifdef INET
2548                                         case AF_INET:
2549                                                 if (r->rpool.cur->addr.p.dyn->
2550                                                     pfid_acnt4 < 1)
2551                                                         return (NULL);
2552                                                 PF_POOLMASK(naddr,
2553                                                     &r->rpool.cur->addr.p.dyn->
2554                                                     pfid_addr4,
2555                                                     &r->rpool.cur->addr.p.dyn->
2556                                                     pfid_mask4,
2557                                                     saddr, AF_INET);
2558                                                 break;
2559 #endif /* INET */
2560 #ifdef INET6
2561                                         case AF_INET6:
2562                                                 if (r->rpool.cur->addr.p.dyn->
2563                                                     pfid_acnt6 < 1)
2564                                                         return (NULL);
2565                                                 PF_POOLMASK(naddr,
2566                                                     &r->rpool.cur->addr.p.dyn->
2567                                                     pfid_addr6,
2568                                                     &r->rpool.cur->addr.p.dyn->
2569                                                     pfid_mask6,
2570                                                     saddr, AF_INET6);
2571                                                 break;
2572 #endif /* INET6 */
2573                                         }
2574                                 } else
2575                                         PF_POOLMASK(naddr,
2576                                             &r->rpool.cur->addr.v.a.addr,
2577                                             &r->rpool.cur->addr.v.a.mask,
2578                                             saddr, pd->af);
2579                                 break;
2580                         case PF_IN:
2581                                 if (r->src.addr.type == PF_ADDR_DYNIFTL) {
2582                                         switch (pd->af) {
2583 #ifdef INET
2584                                         case AF_INET:
2585                                                 if (r->src.addr.p.dyn->
2586                                                     pfid_acnt4 < 1)
2587                                                         return (NULL);
2588                                                 PF_POOLMASK(naddr,
2589                                                     &r->src.addr.p.dyn->
2590                                                     pfid_addr4,
2591                                                     &r->src.addr.p.dyn->
2592                                                     pfid_mask4,
2593                                                     daddr, AF_INET);
2594                                                 break;
2595 #endif /* INET */
2596 #ifdef INET6
2597                                         case AF_INET6:
2598                                                 if (r->src.addr.p.dyn->
2599                                                     pfid_acnt6 < 1)
2600                                                         return (NULL);
2601                                                 PF_POOLMASK(naddr,
2602                                                     &r->src.addr.p.dyn->
2603                                                     pfid_addr6,
2604                                                     &r->src.addr.p.dyn->
2605                                                     pfid_mask6,
2606                                                     daddr, AF_INET6);
2607                                                 break;
2608 #endif /* INET6 */
2609                                         }
2610                                 } else
2611                                         PF_POOLMASK(naddr,
2612                                             &r->src.addr.v.a.addr,
2613                                             &r->src.addr.v.a.mask, daddr,
2614                                             pd->af);
2615                                 break;
2616                         }
2617                         break;
2618                 case PF_RDR: {
2619                         if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn))
2620                                 return (NULL);
2621                         if ((r->rpool.opts & PF_POOL_TYPEMASK) ==
2622                             PF_POOL_BITMASK)
2623                                 PF_POOLMASK(naddr, naddr,
2624                                     &r->rpool.cur->addr.v.a.mask, daddr,
2625                                     pd->af);
2626                         if (r->rpool.proxy_port[1]) {
2627                                 u_int32_t       tmp_nport;
2628
2629                                 tmp_nport = ((ntohs(dport) -
2630                                     ntohs(r->dst.port[0])) %
2631                                     (r->rpool.proxy_port[1] -
2632                                     r->rpool.proxy_port[0] + 1)) +
2633                                     r->rpool.proxy_port[0];
2634
2635                                 /* wrap around if necessary */
2636                                 if (tmp_nport > 65535)
2637                                         tmp_nport -= 65535;
2638                                 *nport = htons((u_int16_t)tmp_nport);
2639                         } else if (r->rpool.proxy_port[0])
2640                                 *nport = htons(r->rpool.proxy_port[0]);
2641                         break;
2642                 }
2643                 default:
2644                         return (NULL);
2645                 }
2646         }
2647
2648         return (r);
2649 }
2650
2651 #ifdef SMP
2652 struct netmsg_hashlookup {
2653         struct netmsg           nm_netmsg;
2654         struct inpcb            **nm_pinp;
2655         struct inpcbinfo        *nm_pcbinfo;
2656         struct pf_addr          *nm_saddr;
2657         struct pf_addr          *nm_daddr;
2658         uint16_t                nm_sport;
2659         uint16_t                nm_dport;
2660         sa_family_t             nm_af;
2661 };
2662
2663 static void
2664 in_pcblookup_hash_handler(struct netmsg *msg0)
2665 {
2666         struct netmsg_hashlookup *msg = (struct netmsg_hashlookup *)msg0;
2667
2668         if (msg->nm_af == AF_INET)
2669                 *msg->nm_pinp = in_pcblookup_hash(msg->nm_pcbinfo,
2670                     msg->nm_saddr->v4, msg->nm_sport, msg->nm_daddr->v4,
2671                     msg->nm_dport, INPLOOKUP_WILDCARD, NULL);
2672 #ifdef INET6
2673         else
2674                 *msg->nm_pinp = in6_pcblookup_hash(msg->nm_pcbinfo,
2675                     &msg->nm_saddr->v6, msg->nm_sport, &msg->nm_daddr->v6,
2676                     msg->nm_dport, INPLOOKUP_WILDCARD, NULL);
2677 #endif /* INET6 */
2678         lwkt_replymsg(&msg->nm_netmsg.nm_lmsg, 0);
2679 }
2680 #endif /* SMP */
2681
2682 int
2683 pf_socket_lookup(int direction, struct pf_pdesc *pd, struct inpcb *inp_arg)
2684 {
2685         struct pf_addr          *saddr, *daddr;
2686         u_int16_t                sport, dport;
2687         struct inpcbinfo        *pi;
2688         struct inpcb            *inp;
2689 #ifdef SMP
2690         struct netmsg_hashlookup *msg = NULL;
2691 #endif
2692         int                      pi_cpu = 0;
2693
2694         if (pd == NULL)
2695                 return (-1);
2696         pd->lookup.uid = UID_MAX;
2697         pd->lookup.gid = GID_MAX;
2698         pd->lookup.pid = NO_PID;
2699         if (direction == PF_IN) {
2700                 saddr = pd->src;
2701                 daddr = pd->dst;
2702         } else {
2703                 saddr = pd->dst;
2704                 daddr = pd->src;
2705         }
2706         switch (pd->proto) {
2707         case IPPROTO_TCP:
2708                 sport = pd->hdr.tcp->th_sport;
2709                 dport = pd->hdr.tcp->th_dport;
2710
2711                 pi_cpu = tcp_addrcpu(saddr->v4.s_addr, sport, daddr->v4.s_addr, dport);
2712                 pi = &tcbinfo[pi_cpu];
2713 #ifdef SMP
2714                 /*
2715                  * Our netstack runs lockless on MP systems
2716                  * (only for TCP connections at the moment).
2717                  * 
2718                  * As we are not allowed to read another CPU's tcbinfo,
2719                  * we have to ask that CPU via remote call to search the
2720                  * table for us.
2721                  * 
2722                  * Prepare a msg iff data belongs to another CPU.
2723                  */
2724                 if (pi_cpu != mycpu->gd_cpuid) {
2725                         msg = kmalloc(sizeof(*msg), M_LWKTMSG, M_INTWAIT);
2726                         netmsg_init(&msg->nm_netmsg, NULL, &netisr_afree_rport,
2727                                     0, in_pcblookup_hash_handler);
2728                         msg->nm_pinp = &inp;
2729                         msg->nm_pcbinfo = pi;
2730                         msg->nm_saddr = saddr;
2731                         msg->nm_sport = sport;
2732                         msg->nm_daddr = daddr;
2733                         msg->nm_dport = dport;
2734                         msg->nm_af = pd->af;
2735                 }
2736 #endif /* SMP */
2737                 break;
2738         case IPPROTO_UDP:
2739                 sport = pd->hdr.udp->uh_sport;
2740                 dport = pd->hdr.udp->uh_dport;
2741                 pi = &udbinfo;
2742                 break;
2743         default:
2744                 return (0);
2745         }
2746         if (direction != PF_IN) {
2747                 u_int16_t       p;
2748
2749                 p = sport;
2750                 sport = dport;
2751                 dport = p;
2752         }
2753         switch (pd->af) {
2754 #ifdef INET6
2755         case AF_INET6:
2756 #ifdef SMP
2757                 /*
2758                  * Query other CPU, second part
2759                  * 
2760                  * msg only gets initialized when:
2761                  * 1) packet is TCP
2762                  * 2) the info belongs to another CPU
2763                  *
2764                  * Use some switch/case magic to avoid code duplication.
2765                  */
2766                 if (msg == NULL)
2767 #endif /* SMP */
2768                 {
2769                         inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
2770                             &daddr->v6, dport, INPLOOKUP_WILDCARD, NULL);
2771
2772                         if (inp == NULL)
2773                                 return (-1);
2774                         break;
2775                 }
2776                 /* FALLTHROUGH if SMP and on other CPU */
2777 #endif /* INET6 */
2778         case AF_INET:
2779 #ifdef SMP
2780                 if (msg != NULL) {
2781                         lwkt_sendmsg(tcp_cport(pi_cpu),
2782                                      &msg->nm_netmsg.nm_lmsg);
2783                 } else
2784 #endif /* SMP */
2785                 {
2786                         inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4,
2787                             dport, INPLOOKUP_WILDCARD, NULL);
2788                 }
2789                 if (inp == NULL)
2790                         return (0);
2791                 break;
2792
2793         default:
2794                 return (-1);
2795         }
2796         pd->lookup.uid = inp->inp_socket->so_cred->cr_uid;
2797         pd->lookup.gid = inp->inp_socket->so_cred->cr_groups[0];
2798         return (1);
2799 }
2800
2801 u_int8_t
2802 pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2803 {
2804         int              hlen;
2805         u_int8_t         hdr[60];
2806         u_int8_t        *opt, optlen;
2807         u_int8_t         wscale = 0;
2808
2809         hlen = th_off << 2;             /* hlen <= sizeof(hdr) */
2810         if (hlen <= sizeof(struct tcphdr))
2811                 return (0);
2812         if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2813                 return (0);
2814         opt = hdr + sizeof(struct tcphdr);
2815         hlen -= sizeof(struct tcphdr);
2816         while (hlen >= 3) {
2817                 switch (*opt) {
2818                 case TCPOPT_EOL:
2819                 case TCPOPT_NOP:
2820                         ++opt;
2821                         --hlen;
2822                         break;
2823                 case TCPOPT_WINDOW:
2824                         wscale = opt[2];
2825                         if (wscale > TCP_MAX_WINSHIFT)
2826                                 wscale = TCP_MAX_WINSHIFT;
2827                         wscale |= PF_WSCALE_FLAG;
2828                         /* FALLTHROUGH */
2829                 default:
2830                         optlen = opt[1];
2831                         if (optlen < 2)
2832                                 optlen = 2;
2833                         hlen -= optlen;
2834                         opt += optlen;
2835                         break;
2836                 }
2837         }
2838         return (wscale);
2839 }
2840
2841 u_int16_t
2842 pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2843 {
2844         int              hlen;
2845         u_int8_t         hdr[60];
2846         u_int8_t        *opt, optlen;
2847         u_int16_t        mss = tcp_mssdflt;
2848
2849         hlen = th_off << 2;     /* hlen <= sizeof(hdr) */
2850         if (hlen <= sizeof(struct tcphdr))
2851                 return (0);
2852         if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2853                 return (0);
2854         opt = hdr + sizeof(struct tcphdr);
2855         hlen -= sizeof(struct tcphdr);
2856         while (hlen >= TCPOLEN_MAXSEG) {
2857                 switch (*opt) {
2858                 case TCPOPT_EOL:
2859                 case TCPOPT_NOP:
2860                         ++opt;
2861                         --hlen;
2862                         break;
2863                 case TCPOPT_MAXSEG:
2864                         bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
2865                         NTOHS(mss);
2866                         /* FALLTHROUGH */
2867                 default:
2868                         optlen = opt[1];
2869                         if (optlen < 2)
2870                                 optlen = 2;
2871                         hlen -= optlen;
2872                         opt += optlen;
2873                         break;
2874                 }
2875         }
2876         return (mss);
2877 }
2878
2879 u_int16_t
2880 pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
2881 {
2882 #ifdef INET
2883         struct sockaddr_in      *dst;
2884         struct route             ro;
2885 #endif /* INET */
2886 #ifdef INET6
2887         struct sockaddr_in6     *dst6;
2888         struct route_in6         ro6;
2889 #endif /* INET6 */
2890         struct rtentry          *rt = NULL;
2891         int                      hlen = 0;
2892         u_int16_t                mss = tcp_mssdflt;
2893
2894         switch (af) {
2895 #ifdef INET
2896         case AF_INET:
2897                 hlen = sizeof(struct ip);
2898                 bzero(&ro, sizeof(ro));
2899                 dst = (struct sockaddr_in *)&ro.ro_dst;
2900                 dst->sin_family = AF_INET;
2901                 dst->sin_len = sizeof(*dst);
2902                 dst->sin_addr = addr->v4;
2903                 rtalloc_ign(&ro, (RTF_CLONING | RTF_PRCLONING));
2904                 rt = ro.ro_rt;
2905                 break;
2906 #endif /* INET */
2907 #ifdef INET6
2908         case AF_INET6:
2909                 hlen = sizeof(struct ip6_hdr);
2910                 bzero(&ro6, sizeof(ro6));
2911                 dst6 = (struct sockaddr_in6 *)&ro6.ro_dst;
2912                 dst6->sin6_family = AF_INET6;
2913                 dst6->sin6_len = sizeof(*dst6);
2914                 dst6->sin6_addr = addr->v6;
2915                 rtalloc_ign((struct route *)&ro6, (RTF_CLONING | RTF_PRCLONING));
2916                 rt = ro6.ro_rt;
2917                 break;
2918 #endif /* INET6 */
2919         }
2920
2921         if (rt && rt->rt_ifp) {
2922                 mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
2923                 mss = max(tcp_mssdflt, mss);
2924                 RTFREE(rt);
2925         }
2926         mss = min(mss, offer);
2927         mss = max(mss, 64);             /* sanity - at least max opt space */
2928         return (mss);
2929 }
2930
2931 void
2932 pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr)
2933 {
2934         struct pf_rule *r = s->rule.ptr;
2935
2936         s->rt_kif = NULL;
2937         if (!r->rt || r->rt == PF_FASTROUTE)
2938                 return;
2939         switch (s->af) {
2940 #ifdef INET
2941         case AF_INET:
2942                 pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL,
2943                     &s->nat_src_node);
2944                 s->rt_kif = r->rpool.cur->kif;
2945                 break;
2946 #endif /* INET */
2947 #ifdef INET6
2948         case AF_INET6:
2949                 pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL,
2950                     &s->nat_src_node);
2951                 s->rt_kif = r->rpool.cur->kif;
2952                 break;
2953 #endif /* INET6 */
2954         }
2955 }
2956
2957 int
2958 pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction,
2959     struct pfi_kif *kif, struct mbuf *m, int off, void *h,
2960     struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
2961     struct ifqueue *ifq, struct inpcb *inp)
2962 {
2963         struct pf_rule          *nr = NULL;
2964         struct pf_addr          *saddr = pd->src, *daddr = pd->dst;
2965         struct tcphdr           *th = pd->hdr.tcp;
2966         u_int16_t                bport, nport = 0;
2967         sa_family_t              af = pd->af;
2968         struct pf_rule          *r, *a = NULL;
2969         struct pf_ruleset       *ruleset = NULL;
2970         struct pf_src_node      *nsn = NULL;
2971         u_short                  reason;
2972         int                      rewrite = 0;
2973         int                      tag = -1, rtableid = -1;
2974         u_int16_t                mss = tcp_mssdflt;
2975         int                      asd = 0;
2976         int                      match = 0;
2977
2978         if (pf_check_congestion(ifq)) {
2979                 REASON_SET(&reason, PFRES_CONGEST);
2980                 return (PF_DROP);
2981         }
2982
2983         if (inp != NULL)
2984                 pd->lookup.done = pf_socket_lookup(direction, pd, inp);
2985         else if (debug_pfugidhack) {
2986                 crit_exit();
2987                 DPFPRINTF(PF_DEBUG_MISC, ("pf: unlocked lookup\n"));
2988                 pd->lookup.done = pf_socket_lookup(direction, pd, inp);
2989                 crit_enter();
2990         }
2991         
2992
2993         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
2994
2995         if (direction == PF_OUT) {
2996                 bport = nport = th->th_sport;
2997                 /* check outgoing packet for BINAT/NAT */
2998                 if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
2999                     saddr, th->th_sport, daddr, th->th_dport,
3000                     &pd->naddr, &nport)) != NULL) {
3001                         PF_ACPY(&pd->baddr, saddr, af);
3002                         pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
3003                             &th->th_sum, &pd->naddr, nport, 0, af);
3004                         rewrite++;
3005                         if (nr->natpass)
3006                                 r = NULL;
3007                         pd->nat_rule = nr;
3008                 }
3009         } else {
3010                 bport = nport = th->th_dport;
3011                 /* check incoming packet for BINAT/RDR */
3012                 if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
3013                     saddr, th->th_sport, daddr, th->th_dport,
3014                     &pd->naddr, &nport)) != NULL) {
3015                         PF_ACPY(&pd->baddr, daddr, af);
3016                         pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
3017                             &th->th_sum, &pd->naddr, nport, 0, af);
3018                         rewrite++;
3019                         if (nr->natpass)
3020                                 r = NULL;
3021                         pd->nat_rule = nr;
3022                 }
3023         }
3024
3025         while (r != NULL) {
3026                 r->evaluations++;
3027                 if (pfi_kif_match(r->kif, kif) == r->ifnot)
3028                         r = r->skip[PF_SKIP_IFP].ptr;
3029                 else if (r->direction && r->direction != direction)
3030                         r = r->skip[PF_SKIP_DIR].ptr;
3031                 else if (r->af && r->af != af)
3032                         r = r->skip[PF_SKIP_AF].ptr;
3033                 else if (r->proto && r->proto != IPPROTO_TCP)
3034                         r = r->skip[PF_SKIP_PROTO].ptr;
3035                 else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
3036                     r->src.neg, kif))
3037                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3038                 else if (r->src.port_op && !pf_match_port(r->src.port_op,
3039                     r->src.port[0], r->src.port[1], th->th_sport))
3040                         r = r->skip[PF_SKIP_SRC_PORT].ptr;
3041                 else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
3042                     r->dst.neg, NULL))
3043                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
3044                 else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
3045                     r->dst.port[0], r->dst.port[1], th->th_dport))
3046                         r = r->skip[PF_SKIP_DST_PORT].ptr;
3047                 else if (r->tos && !(r->tos == pd->tos))
3048                         r = TAILQ_NEXT(r, entries);
3049                 else if (r->rule_flag & PFRULE_FRAGMENT)
3050                         r = TAILQ_NEXT(r, entries);
3051                 else if ((r->flagset & th->th_flags) != r->flags)
3052                         r = TAILQ_NEXT(r, entries);
3053                 else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
3054                     pf_socket_lookup(direction, pd, inp), 1)) &&
3055                     !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
3056                     pd->lookup.uid))
3057                         r = TAILQ_NEXT(r, entries);
3058                 else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
3059                     pf_socket_lookup(direction, pd, inp), 1)) &&
3060                     !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
3061                     pd->lookup.gid))
3062                         r = TAILQ_NEXT(r, entries);
3063                 else if (r->prob && r->prob <= karc4random())
3064                         r = TAILQ_NEXT(r, entries);
3065                 else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
3066                         r = TAILQ_NEXT(r, entries);
3067                 else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
3068                     pf_osfp_fingerprint(pd, m, off, th), r->os_fingerprint))
3069                         r = TAILQ_NEXT(r, entries);
3070                 else {
3071                         if (r->tag)
3072                                 tag = r->tag;
3073                         if (r->rtableid >= 0)
3074                                 rtableid = r->rtableid;
3075                         if (r->anchor == NULL) {
3076                                 match = 1;
3077                                 *rm = r;
3078                                 *am = a;
3079                                 *rsm = ruleset;
3080                                 if ((*rm)->quick)
3081                                         break;
3082                                 r = TAILQ_NEXT(r, entries);
3083                         } else
3084                                 pf_step_into_anchor(&asd, &ruleset,
3085                                     PF_RULESET_FILTER, &r, &a, &match);
3086                 }
3087                 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
3088                     PF_RULESET_FILTER, &r, &a, &match))
3089                         break;
3090         }
3091         r = *rm;
3092         a = *am;
3093         ruleset = *rsm;
3094
3095         REASON_SET(&reason, PFRES_MATCH);
3096
3097         if (r->log || (nr != NULL && nr->natpass && nr->log)) {
3098                 if (rewrite)
3099                         m_copyback(m, off, sizeof(*th), (caddr_t)th);
3100                 PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
3101                     a, ruleset, pd);
3102         }
3103
3104         if ((r->action == PF_DROP) &&
3105             ((r->rule_flag & PFRULE_RETURNRST) ||
3106             (r->rule_flag & PFRULE_RETURNICMP) ||
3107             (r->rule_flag & PFRULE_RETURN))) {
3108                 /* undo NAT changes, if they have taken place */
3109                 if (nr != NULL) {
3110                         if (direction == PF_OUT) {
3111                                 pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
3112                                     &th->th_sum, &pd->baddr, bport, 0, af);
3113                                 rewrite++;
3114                         } else {
3115                                 pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
3116                                     &th->th_sum, &pd->baddr, bport, 0, af);
3117                                 rewrite++;
3118                         }
3119                 }
3120                 if (((r->rule_flag & PFRULE_RETURNRST) ||
3121                     (r->rule_flag & PFRULE_RETURN)) &&
3122                     !(th->th_flags & TH_RST)) {
3123                         u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
3124
3125                         if (th->th_flags & TH_SYN)
3126                                 ack++;
3127                         if (th->th_flags & TH_FIN)
3128                                 ack++;
3129                         pf_send_tcp(r, af, pd->dst,
3130                             pd->src, th->th_dport, th->th_sport,
3131                             ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
3132                             r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp);
3133                 } else if ((af == AF_INET) && r->return_icmp)
3134                         pf_send_icmp(m, r->return_icmp >> 8,
3135                             r->return_icmp & 255, af, r);
3136                 else if ((af == AF_INET6) && r->return_icmp6)
3137                         pf_send_icmp(m, r->return_icmp6 >> 8,
3138                             r->return_icmp6 & 255, af, r);
3139         }
3140
3141         if (r->action == PF_DROP) {
3142                 return (PF_DROP);
3143         }
3144
3145         if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) {
3146                 REASON_SET(&reason, PFRES_MEMORY);
3147                 return (PF_DROP);
3148         }
3149
3150         if (r->keep_state || nr != NULL ||
3151             (pd->flags & PFDESC_TCP_NORM)) {
3152                 /* create new state */
3153                 u_int16_t        len;
3154                 struct pf_state *s = NULL;
3155                 struct pf_src_node *sn = NULL;
3156
3157                 len = pd->tot_len - off - (th->th_off << 2);
3158
3159                 /* check maximums */
3160                 if (r->max_states && (r->states >= r->max_states)) {
3161                         pf_status.lcounters[LCNT_STATES]++;
3162                         REASON_SET(&reason, PFRES_MAXSTATES);
3163                         goto cleanup;
3164                 }
3165                 /* src node for filter rule */
3166                 if ((r->rule_flag & PFRULE_SRCTRACK ||
3167                     r->rpool.opts & PF_POOL_STICKYADDR) &&
3168                     pf_insert_src_node(&sn, r, saddr, af) != 0) {
3169                         REASON_SET(&reason, PFRES_SRCLIMIT);
3170                         goto cleanup;
3171                 }
3172                 /* src node for translation rule */
3173                 if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3174                     ((direction == PF_OUT &&
3175                     pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
3176                     (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
3177                         REASON_SET(&reason, PFRES_SRCLIMIT);
3178                         goto cleanup;
3179                 }
3180                 s = pool_get(&pf_state_pl, PR_NOWAIT);
3181                 if (s == NULL) {
3182                         REASON_SET(&reason, PFRES_MEMORY);
3183 cleanup:
3184                         if (sn != NULL && sn->states == 0 && sn->expire == 0) {
3185                                 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
3186                                 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3187                                 pf_status.src_nodes--;
3188                                 pool_put(&pf_src_tree_pl, sn);
3189                         }
3190                         if (nsn != sn && nsn != NULL && nsn->states == 0 &&
3191                             nsn->expire == 0) {
3192                                 RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
3193                                 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3194                                 pf_status.src_nodes--;
3195                                 pool_put(&pf_src_tree_pl, nsn);
3196                         }
3197                         return (PF_DROP);
3198                 }
3199                 bzero(s, sizeof(*s));
3200                 s->rule.ptr = r;
3201                 s->nat_rule.ptr = nr;
3202                 s->anchor.ptr = a;
3203                 STATE_INC_COUNTERS(s);
3204                 s->allow_opts = r->allow_opts;
3205                 s->log = r->log & PF_LOG_ALL;
3206                 if (nr != NULL)
3207                         s->log |= nr->log & PF_LOG_ALL;
3208                 s->proto = IPPROTO_TCP;
3209                 s->direction = direction;
3210                 s->af = af;
3211                 if (direction == PF_OUT) {
3212                         PF_ACPY(&s->gwy.addr, saddr, af);
3213                         s->gwy.port = th->th_sport;             /* sport */
3214                         PF_ACPY(&s->ext.addr, daddr, af);
3215                         s->ext.port = th->th_dport;
3216                         if (nr != NULL) {
3217                                 PF_ACPY(&s->lan.addr, &pd->baddr, af);
3218                                 s->lan.port = bport;
3219                         } else {
3220                                 PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
3221                                 s->lan.port = s->gwy.port;
3222                         }
3223                 } else {
3224                         PF_ACPY(&s->lan.addr, daddr, af);
3225                         s->lan.port = th->th_dport;
3226                         PF_ACPY(&s->ext.addr, saddr, af);
3227                         s->ext.port = th->th_sport;
3228                         if (nr != NULL) {
3229                                 PF_ACPY(&s->gwy.addr, &pd->baddr, af);
3230                                 s->gwy.port = bport;
3231                         } else {
3232                                 PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
3233                                 s->gwy.port = s->lan.port;
3234                         }
3235                 }
3236
3237                 s->hash = pf_state_hash(s);
3238                 s->src.seqlo = ntohl(th->th_seq);
3239                 s->src.seqhi = s->src.seqlo + len + 1;
3240                 s->pickup_mode = r->pickup_mode;
3241
3242                 if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
3243                     r->keep_state == PF_STATE_MODULATE) {
3244                         /* Generate sequence number modulator */
3245                         while ((s->src.seqdiff =
3246                             pf_new_isn(s) - s->src.seqlo) == 0)
3247                                 ;
3248                         pf_change_a(&th->th_seq, &th->th_sum,
3249                             htonl(s->src.seqlo + s->src.seqdiff), 0);
3250                         rewrite = 1;
3251                 } else
3252                         s->src.seqdiff = 0;
3253                 if (th->th_flags & TH_SYN) {
3254                         s->src.seqhi++;
3255                         s->src.wscale = pf_get_wscale(m, off, th->th_off, af);
3256                         s->sync_flags |= PFSTATE_GOT_SYN1;
3257                 }
3258                 s->src.max_win = MAX(ntohs(th->th_win), 1);
3259                 if (s->src.wscale & PF_WSCALE_MASK) {
3260                         /* Remove scale factor from initial window */
3261                         u_int win = s->src.max_win;
3262                         win += 1 << (s->src.wscale & PF_WSCALE_MASK);
3263                         s->src.max_win = (win - 1) >>
3264                             (s->src.wscale & PF_WSCALE_MASK);
3265                 }
3266                 if (th->th_flags & TH_FIN)
3267                         s->src.seqhi++;
3268                 s->dst.seqhi = 1;
3269                 s->dst.max_win = 1;
3270                 s->src.state = TCPS_SYN_SENT;
3271                 s->dst.state = TCPS_CLOSED;
3272                 s->creation = time_second;
3273                 s->expire = time_second;
3274                 s->timeout = PFTM_TCP_FIRST_PACKET;
3275                 pf_set_rt_ifp(s, saddr);
3276                 if (sn != NULL) {
3277                         s->src_node = sn;
3278                         s->src_node->states++;
3279                 }
3280                 if (nsn != NULL) {
3281                         PF_ACPY(&nsn->raddr, &pd->naddr, af);
3282                         s->nat_src_node = nsn;
3283                         s->nat_src_node->states++;
3284                 }
3285                 if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
3286                     off, pd, th, &s->src, &s->dst)) {
3287                         REASON_SET(&reason, PFRES_MEMORY);
3288                         pf_src_tree_remove_state(s);
3289                         STATE_DEC_COUNTERS(s);
3290                         pool_put(&pf_state_pl, s);
3291                         return (PF_DROP);
3292                 }
3293                 if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
3294                     pf_normalize_tcp_stateful(m, off, pd, &reason, th, s,
3295                     &s->src, &s->dst, &rewrite)) {
3296                         /* This really shouldn't happen!!! */
3297                         DPFPRINTF(PF_DEBUG_URGENT,
3298                             ("pf_normalize_tcp_stateful failed on first pkt"));
3299                         pf_normalize_tcp_cleanup(s);
3300                         pf_src_tree_remove_state(s);
3301                         STATE_DEC_COUNTERS(s);
3302                         pool_put(&pf_state_pl, s);
3303                         return (PF_DROP);
3304                 }
3305                 if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
3306                         pf_normalize_tcp_cleanup(s);
3307                         REASON_SET(&reason, PFRES_STATEINS);
3308                         pf_src_tree_remove_state(s);
3309                         STATE_DEC_COUNTERS(s);
3310                         pool_put(&pf_state_pl, s);
3311                         return (PF_DROP);
3312                 } else
3313                         *sm = s;
3314                 if (tag > 0) {
3315                         pf_tag_ref(tag);
3316                         s->tag = tag;
3317                 }
3318                 if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
3319                     r->keep_state == PF_STATE_SYNPROXY) {
3320                         s->src.state = PF_TCPS_PROXY_SRC;
3321                         if (nr != NULL) {
3322                                 if (direction == PF_OUT) {
3323                                         pf_change_ap(saddr, &th->th_sport,
3324                                             pd->ip_sum, &th->th_sum, &pd->baddr,
3325                                             bport, 0, af);
3326                                 } else {
3327                                         pf_change_ap(daddr, &th->th_dport,
3328                                             pd->ip_sum, &th->th_sum, &pd->baddr,
3329                                             bport, 0, af);
3330                                 }
3331                         }
3332                         s->src.seqhi = htonl(karc4random());
3333                         /* Find mss option */
3334                         mss = pf_get_mss(m, off, th->th_off, af);
3335                         mss = pf_calc_mss(saddr, af, mss);
3336                         mss = pf_calc_mss(daddr, af, mss);
3337                         s->src.mss = mss;
3338                         pf_send_tcp(r, af, daddr, saddr, th->th_dport,
3339                             th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
3340                             TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL);
3341                         REASON_SET(&reason, PFRES_SYNPROXY);
3342                         return (PF_SYNPROXY_DROP);
3343                 }
3344         }
3345
3346         /* copy back packet headers if we performed NAT operations */
3347         if (rewrite)
3348                 m_copyback(m, off, sizeof(*th), (caddr_t)th);
3349
3350         return (PF_PASS);
3351 }
3352
3353 int
3354 pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction,
3355     struct pfi_kif *kif, struct mbuf *m, int off, void *h,
3356     struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
3357     struct ifqueue *ifq, struct inpcb *inp)
3358 {
3359         struct pf_rule          *nr = NULL;
3360         struct pf_addr          *saddr = pd->src, *daddr = pd->dst;
3361         struct udphdr           *uh = pd->hdr.udp;
3362         u_int16_t                bport, nport = 0;
3363         sa_family_t              af = pd->af;
3364         struct pf_rule          *r, *a = NULL;
3365         struct pf_ruleset       *ruleset = NULL;
3366         struct pf_src_node      *nsn = NULL;
3367         u_short                  reason;
3368         int                      rewrite = 0;
3369         int                      tag = -1, rtableid = -1;
3370         int                      asd = 0;
3371         int                      match = 0;
3372
3373         if (pf_check_congestion(ifq)) {
3374                 REASON_SET(&reason, PFRES_CONGEST);
3375                 return (PF_DROP);
3376         }
3377
3378         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3379
3380         if (direction == PF_OUT) {
3381                 bport = nport = uh->uh_sport;
3382                 /* check outgoing packet for BINAT/NAT */
3383                 if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
3384                     saddr, uh->uh_sport, daddr, uh->uh_dport,
3385                     &pd->naddr, &nport)) != NULL) {
3386                         PF_ACPY(&pd->baddr, saddr, af);
3387                         pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum,
3388                             &uh->uh_sum, &pd->naddr, nport, 1, af);
3389                         rewrite++;
3390                         if (nr->natpass)
3391                                 r = NULL;
3392                         pd->nat_rule = nr;
3393                 }
3394         } else {
3395                 bport = nport = uh->uh_dport;
3396                 /* check incoming packet for BINAT/RDR */
3397                 if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
3398                     saddr, uh->uh_sport, daddr, uh->uh_dport, &pd->naddr,
3399                     &nport)) != NULL) {
3400                         PF_ACPY(&pd->baddr, daddr, af);
3401                         pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum,
3402                             &uh->uh_sum, &pd->naddr, nport, 1, af);
3403                         rewrite++;
3404                         if (nr->natpass)
3405                                 r = NULL;
3406                         pd->nat_rule = nr;
3407                 }
3408         }
3409
3410         while (r != NULL) {
3411                 r->evaluations++;
3412                 if (pfi_kif_match(r->kif, kif) == r->ifnot)
3413                         r = r->skip[PF_SKIP_IFP].ptr;
3414                 else if (r->direction && r->direction != direction)
3415                         r = r->skip[PF_SKIP_DIR].ptr;
3416                 else if (r->af && r->af != af)
3417                         r = r->skip[PF_SKIP_AF].ptr;
3418                 else if (r->proto && r->proto != IPPROTO_UDP)
3419                         r = r->skip[PF_SKIP_PROTO].ptr;
3420                 else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
3421                     r->src.neg, kif))
3422                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3423                 else if (r->src.port_op && !pf_match_port(r->src.port_op,
3424                     r->src.port[0], r->src.port[1], uh->uh_sport))
3425                         r = r->skip[PF_SKIP_SRC_PORT].ptr;
3426                 else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
3427                     r->dst.neg, NULL))
3428                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
3429                 else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
3430                     r->dst.port[0], r->dst.port[1], uh->uh_dport))
3431                         r = r->skip[PF_SKIP_DST_PORT].ptr;
3432                 else if (r->tos && !(r->tos == pd->tos))
3433                         r = TAILQ_NEXT(r, entries);
3434                 else if (r->rule_flag & PFRULE_FRAGMENT)
3435                         r = TAILQ_NEXT(r, entries);
3436                 else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
3437                     pf_socket_lookup(direction, pd, inp), 1)) &&
3438                     !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
3439                     pd->lookup.uid))
3440                         r = TAILQ_NEXT(r, entries);
3441                 else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
3442                     pf_socket_lookup(direction, pd, inp), 1)) &&
3443                     !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
3444                     pd->lookup.gid))
3445                         r = TAILQ_NEXT(r, entries);
3446                 else if (r->prob && r->prob <= karc4random())
3447                         r = TAILQ_NEXT(r, entries);
3448                 else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
3449                         r = TAILQ_NEXT(r, entries);
3450                 else if (r->os_fingerprint != PF_OSFP_ANY)
3451                         r = TAILQ_NEXT(r, entries);
3452                 else {
3453                         if (r->tag)
3454                                 tag = r->tag;
3455                         if (r->rtableid >= 0)
3456                                 rtableid = r->rtableid;
3457                         if (r->anchor == NULL) {
3458                                 match = 1;
3459                                 *rm = r;
3460                                 *am = a;
3461                                 *rsm = ruleset;
3462                                 if ((*rm)->quick)
3463                                         break;
3464                                 r = TAILQ_NEXT(r, entries);
3465                         } else
3466                                 pf_step_into_anchor(&asd, &ruleset,
3467                                     PF_RULESET_FILTER, &r, &a, &match);
3468                 }
3469                 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
3470                     PF_RULESET_FILTER, &r, &a, &match))
3471                         break;
3472         }
3473         r = *rm;
3474         a = *am;
3475         ruleset = *rsm;
3476
3477         REASON_SET(&reason, PFRES_MATCH);
3478
3479         if (r->log || (nr != NULL && nr->natpass && nr->log)) {
3480                 if (rewrite)
3481                         m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
3482                 PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
3483                     a, ruleset, pd);
3484         }
3485
3486         if ((r->action == PF_DROP) &&
3487             ((r->rule_flag & PFRULE_RETURNICMP) ||
3488             (r->rule_flag & PFRULE_RETURN))) {
3489                 /* undo NAT changes, if they have taken place */
3490                 if (nr != NULL) {
3491                         if (direction == PF_OUT) {
3492                                 pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum,
3493                                     &uh->uh_sum, &pd->baddr, bport, 1, af);
3494                                 rewrite++;
3495                         } else {
3496                                 pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum,
3497                                     &uh->uh_sum, &pd->baddr, bport, 1, af);
3498                                 rewrite++;
3499                         }
3500                 }
3501                 if ((af == AF_INET) && r->return_icmp)
3502                         pf_send_icmp(m, r->return_icmp >> 8,
3503                             r->return_icmp & 255, af, r);
3504                 else if ((af == AF_INET6) && r->return_icmp6)
3505                         pf_send_icmp(m, r->return_icmp6 >> 8,
3506                             r->return_icmp6 & 255, af, r);
3507         }
3508
3509         if (r->action == PF_DROP)
3510                 return (PF_DROP);
3511
3512         if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) {
3513                 REASON_SET(&reason, PFRES_MEMORY);
3514                 return (PF_DROP);
3515         }
3516
3517         if (r->keep_state || nr != NULL) {
3518                 /* create new state */
3519                 struct pf_state *s = NULL;
3520                 struct pf_src_node *sn = NULL;
3521
3522                 /* check maximums */
3523                 if (r->max_states && (r->states >= r->max_states)) {
3524                         pf_status.lcounters[LCNT_STATES]++;
3525                         REASON_SET(&reason, PFRES_MAXSTATES);
3526                         goto cleanup;
3527                 }
3528                 /* src node for filter rule */
3529                 if ((r->rule_flag & PFRULE_SRCTRACK ||
3530                     r->rpool.opts & PF_POOL_STICKYADDR) &&
3531                     pf_insert_src_node(&sn, r, saddr, af) != 0) {
3532                         REASON_SET(&reason, PFRES_SRCLIMIT);
3533                         goto cleanup;
3534                 }
3535                 /* src node for translation rule */
3536                 if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3537                     ((direction == PF_OUT &&
3538                     pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
3539                     (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
3540                         REASON_SET(&reason, PFRES_SRCLIMIT);
3541                         goto cleanup;
3542                 }
3543                 s = pool_get(&pf_state_pl, PR_NOWAIT);
3544                 if (s == NULL) {
3545                         REASON_SET(&reason, PFRES_MEMORY);
3546 cleanup:
3547                         if (sn != NULL && sn->states == 0 && sn->expire == 0) {
3548                                 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
3549                                 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3550                                 pf_status.src_nodes--;
3551                                 pool_put(&pf_src_tree_pl, sn);
3552                         }
3553                         if (nsn != sn && nsn != NULL && nsn->states == 0 &&
3554                             nsn->expire == 0) {
3555                                 RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
3556                                 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3557                                 pf_status.src_nodes--;
3558                                 pool_put(&pf_src_tree_pl, nsn);
3559                         }
3560                         return (PF_DROP);
3561                 }
3562                 bzero(s, sizeof(*s));
3563                 s->rule.ptr = r;
3564                 s->nat_rule.ptr = nr;
3565                 s->anchor.ptr = a;
3566                 STATE_INC_COUNTERS(s);
3567                 s->allow_opts = r->allow_opts;
3568                 s->log = r->log & PF_LOG_ALL;
3569                 if (nr != NULL)
3570                         s->log |= nr->log & PF_LOG_ALL;
3571                 s->proto = IPPROTO_UDP;
3572                 s->direction = direction;
3573                 s->af = af;
3574                 if (direction == PF_OUT) {
3575                         PF_ACPY(&s->gwy.addr, saddr, af);
3576                         s->gwy.port = uh->uh_sport;
3577                         PF_ACPY(&s->ext.addr, daddr, af);
3578                         s->ext.port = uh->uh_dport;
3579                         if (nr != NULL) {
3580                                 PF_ACPY(&s->lan.addr, &pd->baddr, af);
3581                                 s->lan.port = bport;
3582                         } else {
3583                                 PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
3584                                 s->lan.port = s->gwy.port;
3585                         }
3586                 } else {
3587                         PF_ACPY(&s->lan.addr, daddr, af);
3588                         s->lan.port = uh->uh_dport;
3589                         PF_ACPY(&s->ext.addr, saddr, af);
3590                         s->ext.port = uh->uh_sport;
3591                         if (nr != NULL) {
3592                                 PF_ACPY(&s->gwy.addr, &pd->baddr, af);
3593                                 s->gwy.port = bport;
3594                         } else {
3595                                 PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
3596                                 s->gwy.port = s->lan.port;
3597                         }
3598                 }
3599                 s->hash = pf_state_hash(s);
3600                 s->src.state = PFUDPS_SINGLE;
3601                 s->dst.state = PFUDPS_NO_TRAFFIC;
3602                 s->creation = time_second;
3603                 s->expire = time_second;
3604                 s->timeout = PFTM_UDP_FIRST_PACKET;
3605                 pf_set_rt_ifp(s, saddr);
3606                 if (sn != NULL) {
3607                         s->src_node = sn;
3608                         s->src_node->states++;
3609                 }
3610                 if (nsn != NULL) {
3611                         PF_ACPY(&nsn->raddr, &pd->naddr, af);
3612                         s->nat_src_node = nsn;
3613                         s->nat_src_node->states++;
3614                 }
3615                 if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
3616                         REASON_SET(&reason, PFRES_STATEINS);
3617                         pf_src_tree_remove_state(s);
3618                         STATE_DEC_COUNTERS(s);
3619                         pool_put(&pf_state_pl, s);
3620                         return (PF_DROP);
3621                 } else
3622                         *sm = s;
3623                 if (tag > 0) {
3624                         pf_tag_ref(tag);
3625                         s->tag = tag;
3626                 }
3627         }
3628
3629         /* copy back packet headers if we performed NAT operations */
3630         if (rewrite)
3631                 m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
3632
3633         return (PF_PASS);
3634 }
3635
3636 int
3637 pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction,
3638     struct pfi_kif *kif, struct mbuf *m, int off, void *h,
3639     struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
3640     struct ifqueue *ifq)
3641 {
3642         struct pf_rule          *nr = NULL;
3643         struct pf_addr          *saddr = pd->src, *daddr = pd->dst;
3644         struct pf_rule          *r, *a = NULL;
3645         struct pf_ruleset       *ruleset = NULL;
3646         struct pf_src_node      *nsn = NULL;
3647         u_short                  reason;
3648         u_int16_t                icmpid = 0, bport, nport = 0;
3649         sa_family_t              af = pd->af;
3650         u_int8_t                 icmptype = 0, icmpcode = 0;
3651         int                      state_icmp = 0;
3652         int                      tag = -1, rtableid = -1;
3653 #ifdef INET6
3654         int                      rewrite = 0;
3655 #endif /* INET6 */
3656         int                      asd = 0;
3657         int                      match = 0;
3658
3659         if (pf_check_congestion(ifq)) {
3660                 REASON_SET(&reason, PFRES_CONGEST);
3661                 return (PF_DROP);
3662         }
3663
3664         switch (pd->proto) {
3665 #ifdef INET
3666         case IPPROTO_ICMP:
3667                 icmptype = pd->hdr.icmp->icmp_type;
3668                 icmpcode = pd->hdr.icmp->icmp_code;
3669                 icmpid = pd->hdr.icmp->icmp_id;
3670
3671                 if (icmptype == ICMP_UNREACH ||
3672                     icmptype == ICMP_SOURCEQUENCH ||
3673                     icmptype == ICMP_REDIRECT ||
3674                     icmptype == ICMP_TIMXCEED ||
3675                     icmptype == ICMP_PARAMPROB)
3676                         state_icmp++;
3677                 break;
3678 #endif /* INET */
3679 #ifdef INET6
3680         case IPPROTO_ICMPV6:
3681                 icmptype = pd->hdr.icmp6->icmp6_type;
3682                 icmpcode = pd->hdr.icmp6->icmp6_code;
3683                 icmpid = pd->hdr.icmp6->icmp6_id;
3684
3685                 if (icmptype == ICMP6_DST_UNREACH ||
3686                     icmptype == ICMP6_PACKET_TOO_BIG ||
3687                     icmptype == ICMP6_TIME_EXCEEDED ||
3688                     icmptype == ICMP6_PARAM_PROB)
3689                         state_icmp++;
3690                 break;
3691 #endif /* INET6 */
3692         }
3693
3694         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3695
3696         if (direction == PF_OUT) {
3697                 bport = nport = icmpid;
3698                 /* check outgoing packet for BINAT/NAT */
3699                 if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
3700                     saddr, icmpid, daddr, icmpid, &pd->naddr, &nport)) !=
3701                     NULL) {
3702                         PF_ACPY(&pd->baddr, saddr, af);
3703                         switch (af) {
3704 #ifdef INET
3705                         case AF_INET:
3706                                 pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
3707                                     pd->naddr.v4.s_addr, 0);
3708                                 pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
3709                                     pd->hdr.icmp->icmp_cksum, icmpid, nport, 0);
3710                                 pd->hdr.icmp->icmp_id = nport;
3711                                 m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
3712                                 break;
3713 #endif /* INET */
3714 #ifdef INET6
3715                         case AF_INET6:
3716                                 pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
3717                                     &pd->naddr, 0);
3718                                 rewrite++;
3719                                 break;
3720 #endif /* INET6 */
3721                         }
3722                         if (nr->natpass)
3723                                 r = NULL;
3724                         pd->nat_rule = nr;
3725                 }
3726         } else {
3727                 bport = nport = icmpid;
3728                 /* check incoming packet for BINAT/RDR */
3729                 if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
3730                     saddr, icmpid, daddr, icmpid, &pd->naddr, &nport)) !=
3731                     NULL) {
3732                         PF_ACPY(&pd->baddr, daddr, af);
3733                         switch (af) {
3734 #ifdef INET
3735                         case AF_INET:
3736                                 pf_change_a(&daddr->v4.s_addr,
3737                                     pd->ip_sum, pd->naddr.v4.s_addr, 0);
3738                                 break;
3739 #endif /* INET */
3740 #ifdef INET6
3741                         case AF_INET6:
3742                                 pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
3743                                     &pd->naddr, 0);
3744                                 rewrite++;
3745                                 break;
3746 #endif /* INET6 */
3747                         }
3748                         if (nr->natpass)
3749                                 r = NULL;
3750                         pd->nat_rule = nr;
3751                 }
3752         }
3753
3754         while (r != NULL) {
3755                 r->evaluations++;
3756                 if (pfi_kif_match(r->kif, kif) == r->ifnot)
3757                         r = r->skip[PF_SKIP_IFP].ptr;
3758                 else if (r->direction && r->direction != direction)
3759                         r = r->skip[PF_SKIP_DIR].ptr;
3760                 else if (r->af && r->af != af)
3761                         r = r->skip[PF_SKIP_AF].ptr;
3762                 else if (r->proto && r->proto != pd->proto)
3763                         r = r->skip[PF_SKIP_PROTO].ptr;
3764                 else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
3765                     r->src.neg, kif))
3766                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3767                 else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
3768                     r->dst.neg, NULL))
3769                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
3770                 else if (r->type && r->type != icmptype + 1)
3771                         r = TAILQ_NEXT(r, entries);
3772                 else if (r->code && r->code != icmpcode + 1)
3773                         r = TAILQ_NEXT(r, entries);
3774                 else if (r->tos && !(r->tos == pd->tos))
3775                         r = TAILQ_NEXT(r, entries);
3776                 else if (r->rule_flag & PFRULE_FRAGMENT)
3777                         r = TAILQ_NEXT(r, entries);
3778                 else if (r->prob && r->prob <= karc4random())
3779                         r = TAILQ_NEXT(r, entries);
3780                 else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
3781                         r = TAILQ_NEXT(r, entries);
3782                 else if (r->os_fingerprint != PF_OSFP_ANY)
3783                         r = TAILQ_NEXT(r, entries);
3784                 else {
3785                         if (r->tag)
3786                                 tag = r->tag;
3787                         if (r->rtableid >= 0)
3788                                 rtableid = r->rtableid;
3789                         if (r->anchor == NULL) {
3790                                 match = 1;
3791                                 *rm = r;
3792                                 *am = a;
3793                                 *rsm = ruleset;
3794                                 if ((*rm)->quick)
3795                                         break;
3796                                 r = TAILQ_NEXT(r, entries);
3797                         } else
3798                                 pf_step_into_anchor(&asd, &ruleset,
3799                                     PF_RULESET_FILTER, &r, &a, &match);
3800                 }
3801                 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
3802                     PF_RULESET_FILTER, &r, &a, &match))
3803                         break;
3804         }
3805         r = *rm;
3806         a = *am;
3807         ruleset = *rsm;
3808
3809         REASON_SET(&reason, PFRES_MATCH);
3810
3811         if (r->log || (nr != NULL && nr->natpass && nr->log)) {
3812 #ifdef INET6
3813                 if (rewrite)
3814                         m_copyback(m, off, sizeof(struct icmp6_hdr),
3815                             (caddr_t)pd->hdr.icmp6);
3816 #endif /* INET6 */
3817                 PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
3818                     a, ruleset, pd);
3819         }
3820
3821         if (r->action != PF_PASS)
3822                 return (PF_DROP);
3823
3824         if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) {
3825                 REASON_SET(&reason, PFRES_MEMORY);
3826                 return (PF_DROP);
3827         }
3828
3829         if (!state_icmp && (r->keep_state || nr != NULL)) {
3830                 /* create new state */
3831                 struct pf_state *s = NULL;
3832                 struct pf_src_node *sn = NULL;
3833
3834                 /* check maximums */
3835                 if (r->max_states && (r->states >= r->max_states)) {
3836                         pf_status.lcounters[LCNT_STATES]++;
3837                         REASON_SET(&reason, PFRES_MAXSTATES);
3838                         goto cleanup;
3839                 }
3840                 /* src node for filter rule */
3841                 if ((r->rule_flag & PFRULE_SRCTRACK ||
3842                     r->rpool.opts & PF_POOL_STICKYADDR) &&
3843                     pf_insert_src_node(&sn, r, saddr, af) != 0) {
3844                         REASON_SET(&reason, PFRES_SRCLIMIT);
3845                         goto cleanup;
3846                 }
3847                 /* src node for translation rule */
3848                 if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3849                     ((direction == PF_OUT &&
3850                     pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
3851                     (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
3852                         REASON_SET(&reason, PFRES_SRCLIMIT);
3853                         goto cleanup;
3854                 }
3855                 s = pool_get(&pf_state_pl, PR_NOWAIT);
3856                 if (s == NULL) {
3857                         REASON_SET(&reason, PFRES_MEMORY);
3858 cleanup:
3859                         if (sn != NULL && sn->states == 0 && sn->expire == 0) {
3860                                 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
3861                                 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3862                                 pf_status.src_nodes--;
3863                                 pool_put(&pf_src_tree_pl, sn);
3864                         }
3865                         if (nsn != sn && nsn != NULL && nsn->states == 0 &&
3866                             nsn->expire == 0) {
3867                                 RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
3868                                 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3869                                 pf_status.src_nodes--;
3870                                 pool_put(&pf_src_tree_pl, nsn);
3871                         }
3872                         return (PF_DROP);
3873                 }
3874                 bzero(s, sizeof(*s));
3875                 s->rule.ptr = r;
3876                 s->nat_rule.ptr = nr;
3877                 s->anchor.ptr = a;
3878                 STATE_INC_COUNTERS(s);
3879                 s->allow_opts = r->allow_opts;
3880                 s->log = r->log & PF_LOG_ALL;
3881                 if (nr != NULL)
3882                         s->log |= nr->log & PF_LOG_ALL;
3883                 s->proto = pd->proto;
3884                 s->direction = direction;
3885                 s->af = af;
3886                 if (direction == PF_OUT) {
3887                         PF_ACPY(&s->gwy.addr, saddr, af);
3888                         s->gwy.port = nport;
3889                         PF_ACPY(&s->ext.addr, daddr, af);
3890                         s->ext.port = 0;
3891                         if (nr != NULL) {
3892                                 PF_ACPY(&s->lan.addr, &pd->baddr, af);
3893                                 s->lan.port = bport;
3894                         } else {
3895                                 PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
3896                                 s->lan.port = s->gwy.port;
3897                         }
3898                 } else {
3899                         PF_ACPY(&s->lan.addr, daddr, af);
3900                         s->lan.port = nport;
3901                         PF_ACPY(&s->ext.addr, saddr, af);
3902                         s->ext.port = 0; 
3903                         if (nr != NULL) {
3904                                 PF_ACPY(&s->gwy.addr, &pd->baddr, af);
3905                                 s->gwy.port = bport;
3906                         } else {
3907                                 PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
3908                                 s->gwy.port = s->lan.port;
3909                         }
3910                 }
3911                 s->hash = pf_state_hash(s);
3912                 s->creation = time_second;
3913                 s->expire = time_second;
3914                 s->timeout = PFTM_ICMP_FIRST_PACKET;
3915                 pf_set_rt_ifp(s, saddr);
3916                 if (sn != NULL) {
3917                         s->src_node = sn;
3918                         s->src_node->states++;
3919                 }
3920                 if (nsn != NULL) {
3921                         PF_ACPY(&nsn->raddr, &pd->naddr, af);
3922                         s->nat_src_node = nsn;
3923                         s->nat_src_node->states++;
3924                 }
3925                 if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
3926                         REASON_SET(&reason, PFRES_STATEINS);
3927                         pf_src_tree_remove_state(s);
3928                         STATE_DEC_COUNTERS(s);
3929                         pool_put(&pf_state_pl, s);
3930                         return (PF_DROP);
3931                 } else
3932                         *sm = s;
3933                 if (tag > 0) {
3934                         pf_tag_ref(tag);
3935                         s->tag = tag;
3936                 }
3937         }
3938
3939 #ifdef INET6
3940         /* copy back packet headers if we performed IPv6 NAT operations */
3941         if (rewrite)
3942                 m_copyback(m, off, sizeof(struct icmp6_hdr),
3943                     (caddr_t)pd->hdr.icmp6);
3944 #endif /* INET6 */
3945
3946         return (PF_PASS);
3947 }
3948
3949 int
3950 pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction,
3951     struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
3952     struct pf_rule **am, struct pf_ruleset **rsm, struct ifqueue *ifq)
3953 {
3954         struct pf_rule          *nr = NULL;
3955         struct pf_rule          *r, *a = NULL;
3956         struct pf_ruleset       *ruleset = NULL;
3957         struct pf_src_node      *nsn = NULL;
3958         struct pf_addr          *saddr = pd->src, *daddr = pd->dst;
3959         sa_family_t              af = pd->af;
3960         u_short                  reason;
3961         int                      tag = -1, rtableid = -1;
3962         int                      asd = 0;
3963         int                      match = 0;
3964
3965         if (pf_check_congestion(ifq)) {
3966                 REASON_SET(&reason, PFRES_CONGEST);
3967                 return (PF_DROP);
3968         }
3969
3970         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3971
3972         if (direction == PF_OUT) {
3973                 /* check outgoing packet for BINAT/NAT */
3974                 if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
3975                     saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
3976                         PF_ACPY(&pd->baddr, saddr, af);
3977                         switch (af) {
3978 #ifdef INET
3979                         case AF_INET:
3980                                 pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
3981                                     pd->naddr.v4.s_addr, 0);
3982                                 break;
3983 #endif /* INET */
3984 #ifdef INET6
3985                         case AF_INET6:
3986                                 PF_ACPY(saddr, &pd->naddr, af);
3987                                 break;
3988 #endif /* INET6 */
3989                         }
3990                         if (nr->natpass)
3991                                 r = NULL;
3992                         pd->nat_rule = nr;
3993                 }
3994         } else {
3995                 /* check incoming packet for BINAT/RDR */
3996                 if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
3997                     saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
3998                         PF_ACPY(&pd->baddr, daddr, af);
3999                         switch (af) {
4000 #ifdef INET
4001                         case AF_INET:
4002                                 pf_change_a(&daddr->v4.s_addr,
4003                                     pd->ip_sum, pd->naddr.v4.s_addr, 0);
4004                                 break;
4005 #endif /* INET */
4006 #ifdef INET6
4007                         case AF_INET6:
4008                                 PF_ACPY(daddr, &pd->naddr, af);
4009                                 break;
4010 #endif /* INET6 */
4011                         }
4012                         if (nr->natpass)
4013                                 r = NULL;
4014                         pd->nat_rule = nr;
4015                 }
4016         }
4017
4018         while (r != NULL) {
4019                 r->evaluations++;
4020                 if (pfi_kif_match(r->kif, kif) == r->ifnot)
4021                         r = r->skip[PF_SKIP_IFP].ptr;
4022                 else if (r->direction && r->direction != direction)
4023                         r = r->skip[PF_SKIP_DIR].ptr;
4024                 else if (r->af && r->af != af)
4025                         r = r->skip[PF_SKIP_AF].ptr;
4026                 else if (r->proto && r->proto != pd->proto)
4027                         r = r->skip[PF_SKIP_PROTO].ptr;
4028                 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
4029                     r->src.neg, kif))
4030                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
4031                 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
4032                     r->dst.neg, NULL))
4033                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
4034                 else if (r->tos && !(r->tos == pd->tos))
4035                         r = TAILQ_NEXT(r, entries);
4036                 else if (r->rule_flag & PFRULE_FRAGMENT)
4037                         r = TAILQ_NEXT(r, entries);
4038                 else if (r->prob && r->prob <= karc4random())
4039                         r = TAILQ_NEXT(r, entries);
4040                 else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
4041                         r = TAILQ_NEXT(r, entries);
4042                 else if (r->os_fingerprint != PF_OSFP_ANY)
4043                         r = TAILQ_NEXT(r, entries);
4044                 else {
4045                         if (r->tag)
4046                                 tag = r->tag;
4047                         if (r->rtableid >= 0)
4048                                 rtableid = r->rtableid;
4049                         if (r->anchor == NULL) {
4050                                 match = 1;
4051                                 *rm = r;
4052                                 *am = a;
4053                                 *rsm = ruleset;
4054                                 if ((*rm)->quick)
4055                                         break;
4056                                 r = TAILQ_NEXT(r, entries);
4057                         } else
4058                                 pf_step_into_anchor(&asd, &ruleset,
4059                                     PF_RULESET_FILTER, &r, &a, &match);
4060                 }
4061                 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
4062                     PF_RULESET_FILTER, &r, &a, &match))
4063                         break;
4064         }
4065         r = *rm;
4066         a = *am;
4067         ruleset = *rsm;
4068
4069         REASON_SET(&reason, PFRES_MATCH);
4070
4071         if (r->log || (nr != NULL && nr->natpass && nr->log))
4072                 PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
4073                     a, ruleset, pd);
4074
4075         if ((r->action == PF_DROP) &&
4076             ((r->rule_flag & PFRULE_RETURNICMP) ||
4077             (r->rule_flag & PFRULE_RETURN))) {
4078                 struct pf_addr *a = NULL;
4079
4080                 if (nr != NULL) {
4081                         if (direction == PF_OUT)
4082                                 a = saddr;
4083                         else
4084                                 a = daddr;
4085                 }
4086                 if (a != NULL) {
4087                         switch (af) {
4088 #ifdef INET
4089                         case AF_INET:
4090                                 pf_change_a(&a->v4.s_addr, pd->ip_sum,
4091                                     pd->baddr.v4.s_addr, 0);
4092                                 break;
4093 #endif /* INET */
4094 #ifdef INET6
4095                         case AF_INET6:
4096                                 PF_ACPY(a, &pd->baddr, af);
4097                                 break;
4098 #endif /* INET6 */
4099                         }
4100                 }
4101                 if ((af == AF_INET) && r->return_icmp)
4102                         pf_send_icmp(m, r->return_icmp >> 8,
4103                             r->return_icmp & 255, af, r);
4104                 else if ((af == AF_INET6) && r->return_icmp6)
4105                         pf_send_icmp(m, r->return_icmp6 >> 8,
4106                             r->return_icmp6 & 255, af, r);
4107         }
4108
4109         if (r->action != PF_PASS)
4110                 return (PF_DROP);
4111
4112         if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) {
4113                 REASON_SET(&reason, PFRES_MEMORY);
4114                 return (PF_DROP);
4115         }
4116
4117         if (r->keep_state || nr != NULL) {
4118                 /* create new state */
4119                 struct pf_state *s = NULL;
4120                 struct pf_src_node *sn = NULL;
4121
4122                 /* check maximums */
4123                 if (r->max_states && (r->states >= r->max_states)) {
4124                         pf_status.lcounters[LCNT_STATES]++;
4125                         REASON_SET(&reason, PFRES_MAXSTATES);
4126                         goto cleanup;
4127                 }
4128                 /* src node for filter rule */
4129                 if ((r->rule_flag & PFRULE_SRCTRACK ||
4130                     r->rpool.opts & PF_POOL_STICKYADDR) &&
4131                     pf_insert_src_node(&sn, r, saddr, af) != 0) {
4132                         REASON_SET(&reason, PFRES_SRCLIMIT);
4133                         goto cleanup;
4134                 }
4135                 /* src node for translation rule */
4136                 if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
4137                     ((direction == PF_OUT &&
4138                     pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
4139                     (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
4140                         REASON_SET(&reason, PFRES_SRCLIMIT);
4141                         goto cleanup;
4142                 }
4143                 s = pool_get(&pf_state_pl, PR_NOWAIT);
4144                 if (s == NULL) {
4145                         REASON_SET(&reason, PFRES_MEMORY);
4146 cleanup:
4147                         if (sn != NULL && sn->states == 0 && sn->expire == 0) {
4148                                 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
4149                                 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
4150                                 pf_status.src_nodes--;
4151                                 pool_put(&pf_src_tree_pl, sn);
4152                         }
4153                         if (nsn != sn && nsn != NULL && nsn->states == 0 &&
4154                             nsn->expire == 0) {
4155                                 RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
4156                                 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
4157                                 pf_status.src_nodes--;
4158                                 pool_put(&pf_src_tree_pl, nsn);
4159                         }
4160                         return (PF_DROP);
4161                 }
4162                 bzero(s, sizeof(*s));
4163                 s->rule.ptr = r;
4164                 s->nat_rule.ptr = nr;
4165                 s->anchor.ptr = a;
4166                 STATE_INC_COUNTERS(s);
4167                 s->allow_opts = r->allow_opts;
4168                 s->log = r->log & PF_LOG_ALL;
4169                 if (nr != NULL)
4170                         s->log |= nr->log & PF_LOG_ALL;
4171                 s->proto = pd->proto;
4172                 s->direction = direction;
4173                 s->af = af;
4174                 if (direction == PF_OUT) {
4175                         PF_ACPY(&s->gwy.addr, saddr, af);
4176                         PF_ACPY(&s->ext.addr, daddr, af);
4177                         if (nr != NULL)
4178                                 PF_ACPY(&s->lan.addr, &pd->baddr, af);
4179                         else
4180                                 PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
4181                 } else {
4182                         PF_ACPY(&s->lan.addr, daddr, af);
4183                         PF_ACPY(&s->ext.addr, saddr, af);
4184                         if (nr != NULL)
4185                                 PF_ACPY(&s->gwy.addr, &pd->baddr, af);
4186                         else
4187                                 PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
4188                 }
4189                 s->hash = pf_state_hash(s);
4190                 s->src.state = PFOTHERS_SINGLE;
4191                 s->dst.state = PFOTHERS_NO_TRAFFIC;
4192                 s->creation = time_second;
4193                 s->expire = time_second;
4194                 s->timeout = PFTM_OTHER_FIRST_PACKET;
4195                 pf_set_rt_ifp(s, saddr);
4196                 if (sn != NULL) {
4197                         s->src_node = sn;
4198                         s->src_node->states++;
4199                 }
4200                 if (nsn != NULL) {
4201                         PF_ACPY(&nsn->raddr, &pd->naddr, af);
4202                         s->nat_src_node = nsn;
4203                         s->nat_src_node->states++;
4204                 }
4205                 if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
4206                         REASON_SET(&reason, PFRES_STATEINS);
4207                         pf_src_tree_remove_state(s);
4208                         STATE_DEC_COUNTERS(s);
4209                         pool_put(&pf_state_pl, s);
4210                         return (PF_DROP);
4211                 } else
4212                         *sm = s;
4213                 if (tag > 0) {
4214                         pf_tag_ref(tag);
4215                         s->tag = tag;
4216                 }
4217         }
4218
4219         return (PF_PASS);
4220 }
4221
4222 int
4223 pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
4224     struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
4225     struct pf_ruleset **rsm)
4226 {
4227         struct pf_rule          *r, *a = NULL;
4228         struct pf_ruleset       *ruleset = NULL;
4229         sa_family_t              af = pd->af;
4230         u_short                  reason;
4231         int                      tag = -1;
4232         int                      asd = 0;
4233         int                      match = 0;
4234
4235         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
4236         while (r != NULL) {
4237                 r->evaluations++;
4238                 if (pfi_kif_match(r->kif, kif) == r->ifnot)
4239                         r = r->skip[PF_SKIP_IFP].ptr;
4240                 else if (r->direction && r->direction != direction)
4241                         r = r->skip[PF_SKIP_DIR].ptr;
4242                 else if (r->af && r->af != af)
4243                         r = r->skip[PF_SKIP_AF].ptr;
4244                 else if (r->proto && r->proto != pd->proto)
4245                         r = r->skip[PF_SKIP_PROTO].ptr;
4246                 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
4247                     r->src.neg, kif))
4248                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
4249                 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
4250                     r->dst.neg, NULL))
4251                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
4252                 else if (r->tos && !(r->tos == pd->tos))
4253                         r = TAILQ_NEXT(r, entries);
4254                 else if (r->os_fingerprint != PF_OSFP_ANY)
4255                         r = TAILQ_NEXT(r, entries);
4256                 else if (pd->proto == IPPROTO_UDP &&
4257                     (r->src.port_op || r->dst.port_op))
4258                         r = TAILQ_NEXT(r, entries);
4259                 else if (pd->proto == IPPROTO_TCP &&
4260                     (r->src.port_op || r->dst.port_op || r->flagset))
4261                         r = TAILQ_NEXT(r, entries);
4262                 else if ((pd->proto == IPPROTO_ICMP ||
4263                     pd->proto == IPPROTO_ICMPV6) &&
4264                     (r->type || r->code))
4265                         r = TAILQ_NEXT(r, entries);
4266                 else if (r->prob && r->prob <= karc4random())
4267                         r = TAILQ_NEXT(r, entries);
4268                 else if (r->match_tag && !pf_match_tag(m, r, NULL, &tag))
4269                         r = TAILQ_NEXT(r, entries);
4270                 else {
4271                         if (r->anchor == NULL) {
4272                                 match = 1;
4273                                 *rm = r;
4274                                 *am = a;
4275                                 *rsm = ruleset;
4276                                 if ((*rm)->quick)
4277                                         break;
4278                                 r = TAILQ_NEXT(r, entries);
4279                         } else
4280                                 pf_step_into_anchor(&asd, &ruleset,
4281                                     PF_RULESET_FILTER, &r, &a, &match);
4282                 }
4283                 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
4284                     PF_RULESET_FILTER, &r, &a, &match))
4285                         break;
4286         }
4287         r = *rm;
4288         a = *am;
4289         ruleset = *rsm;
4290
4291         REASON_SET(&reason, PFRES_MATCH);
4292
4293         if (r->log)
4294                 PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset,
4295                     pd);
4296
4297         if (r->action != PF_PASS)
4298                 return (PF_DROP);
4299
4300         if (pf_tag_packet(m, pd->pf_mtag, tag, -1)) {
4301                 REASON_SET(&reason, PFRES_MEMORY);
4302                 return (PF_DROP);
4303         }
4304
4305         return (PF_PASS);
4306 }
4307
4308 int
4309 pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
4310     struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
4311     u_short *reason)
4312 {
4313         struct pf_state_cmp      key;
4314         struct tcphdr           *th = pd->hdr.tcp;
4315         u_int16_t                win = ntohs(th->th_win);
4316         u_int32_t                ack, end, seq, orig_seq;
4317         u_int8_t                 sws, dws;
4318         int                      ackskew;
4319         int                      copyback = 0;
4320         struct pf_state_peer    *src, *dst;
4321
4322         key.af = pd->af;
4323         key.proto = IPPROTO_TCP;
4324         if (direction == PF_IN) {
4325                 PF_ACPY(&key.ext.addr, pd->src, key.af);
4326                 PF_ACPY(&key.gwy.addr, pd->dst, key.af);
4327                 key.ext.port = th->th_sport;
4328                 key.gwy.port = th->th_dport;
4329         } else {
4330                 PF_ACPY(&key.lan.addr, pd->src, key.af);
4331                 PF_ACPY(&key.ext.addr, pd->dst, key.af);
4332                 key.lan.port = th->th_sport;
4333                 key.ext.port = th->th_dport;
4334         }
4335
4336         STATE_LOOKUP();
4337
4338         if (direction == (*state)->direction) {
4339                 src = &(*state)->src;
4340                 dst = &(*state)->dst;
4341         } else {
4342                 src = &(*state)->dst;
4343                 dst = &(*state)->src;
4344         }
4345
4346         if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
4347                 if (direction != (*state)->direction) {
4348                         REASON_SET(reason, PFRES_SYNPROXY);
4349                         return (PF_SYNPROXY_DROP);
4350                 }
4351                 if (th->th_flags & TH_SYN) {
4352                         if (ntohl(th->th_seq) != (*state)->src.seqlo) {
4353                                 REASON_SET(reason, PFRES_SYNPROXY);
4354                                 return (PF_DROP);
4355                         }
4356                         pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
4357                             pd->src, th->th_dport, th->th_sport,
4358                             (*state)->src.seqhi, ntohl(th->th_seq) + 1,
4359                             TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1,
4360                             0, NULL, NULL);
4361                         REASON_SET(reason, PFRES_SYNPROXY);
4362                         return (PF_SYNPROXY_DROP);
4363                 } else if (!(th->th_flags & TH_ACK) ||
4364                     (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
4365                     (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
4366                         REASON_SET(reason, PFRES_SYNPROXY);
4367                         return (PF_DROP);
4368                 } else if ((*state)->src_node != NULL &&
4369                     pf_src_connlimit(state)) {
4370                         REASON_SET(reason, PFRES_SRCLIMIT);
4371                         return (PF_DROP);
4372                 } else
4373                         (*state)->src.state = PF_TCPS_PROXY_DST;
4374         }
4375         if ((*state)->src.state == PF_TCPS_PROXY_DST) {
4376                 struct pf_state_host *src, *dst;
4377
4378                 if (direction == PF_OUT) {
4379                         src = &(*state)->gwy;
4380                         dst = &(*state)->ext;
4381                 } else {
4382                         src = &(*state)->ext;
4383                         dst = &(*state)->lan;
4384                 }
4385                 if (direction == (*state)->direction) {
4386                         if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
4387                             (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
4388                             (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
4389                                 REASON_SET(reason, PFRES_SYNPROXY);
4390                                 return (PF_DROP);
4391                         }
4392                         (*state)->src.max_win = MAX(ntohs(th->th_win), 1);
4393                         if ((*state)->dst.seqhi == 1)
4394                                 (*state)->dst.seqhi = htonl(karc4random());
4395                         pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr,
4396                             &dst->addr, src->port, dst->port,
4397                             (*state)->dst.seqhi, 0, TH_SYN, 0,
4398                             (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL);
4399                         REASON_SET(reason, PFRES_SYNPROXY);
4400                         return (PF_SYNPROXY_DROP);
4401                 } else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
4402                     (TH_SYN|TH_ACK)) ||
4403                     (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) {
4404                         REASON_SET(reason, PFRES_SYNPROXY);
4405                         return (PF_DROP);
4406                 } else {
4407                         (*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
4408                         (*state)->dst.seqlo = ntohl(th->th_seq);
4409                         pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
4410                             pd->src, th->th_dport, th->th_sport,
4411                             ntohl(th->th_ack), ntohl(th->th_seq) + 1,
4412                             TH_ACK, (*state)->src.max_win, 0, 0, 0,
4413                             (*state)->tag, NULL, NULL);
4414                         pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr,
4415                             &dst->addr, src->port, dst->port,
4416                             (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
4417                             TH_ACK, (*state)->dst.max_win, 0, 0, 1,
4418                             0, NULL, NULL);
4419                         (*state)->src.seqdiff = (*state)->dst.seqhi -
4420                             (*state)->src.seqlo;
4421                         (*state)->dst.seqdiff = (*state)->src.seqhi -
4422                             (*state)->dst.seqlo;
4423                         (*state)->src.seqhi = (*state)->src.seqlo +
4424                             (*state)->dst.max_win;
4425                         (*state)->dst.seqhi = (*state)->dst.seqlo +
4426                             (*state)->src.max_win;
4427                         (*state)->src.wscale = (*state)->dst.wscale = 0;
4428                         (*state)->src.state = (*state)->dst.state =
4429                             TCPS_ESTABLISHED;
4430                         REASON_SET(reason, PFRES_SYNPROXY);
4431                         return (PF_SYNPROXY_DROP);
4432                 }
4433         }
4434
4435         if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
4436                 sws = src->wscale & PF_WSCALE_MASK;
4437                 dws = dst->wscale & PF_WSCALE_MASK;
4438         } else
4439                 sws = dws = 0;
4440
4441         /*
4442          * Sequence tracking algorithm from Guido van Rooij's paper:
4443          *   http://www.madison-gurkha.com/publications/tcp_filtering/
4444          *      tcp_filtering.ps
4445          */
4446
4447         orig_seq = seq = ntohl(th->th_seq);
4448         if (src->seqlo == 0) {
4449                 /* First packet from this end. Set its state */
4450
4451                 if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
4452                     src->scrub == NULL) {
4453                         if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) {
4454                                 REASON_SET(reason, PFRES_MEMORY);
4455                                 return (PF_DROP);
4456                         }
4457                 }
4458
4459                 /* Deferred generation of sequence number modulator */
4460                 if (dst->seqdiff && !src->seqdiff) {
4461                         while ((src->seqdiff = pf_new_isn(*state) - seq) == 0)
4462                                 ;
4463                         ack = ntohl(th->th_ack) - dst->seqdiff;
4464                         pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
4465                             src->seqdiff), 0);
4466                         pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
4467                         copyback = 1;
4468                 } else {
4469                         ack = ntohl(th->th_ack);
4470                 }
4471
4472                 end = seq + pd->p_len;
4473                 if (th->th_flags & TH_SYN) {
4474                         end++;
4475                         (*state)->sync_flags |= PFSTATE_GOT_SYN2;
4476                         if (dst->wscale & PF_WSCALE_FLAG) {
4477                                 src->wscale = pf_get_wscale(m, off, th->th_off,
4478                                     pd->af);
4479                                 if (src->wscale & PF_WSCALE_FLAG) {
4480                                         /* Remove scale factor from initial
4481                                          * window */
4482                                         sws = src->wscale & PF_WSCALE_MASK;
4483                                         win = ((u_int32_t)win + (1 << sws) - 1)
4484                                             >> sws;
4485                                         dws = dst->wscale & PF_WSCALE_MASK;
4486                                 } else {
4487                                         /* fixup other window */
4488                                         dst->max_win <<= dst->wscale &
4489                                             PF_WSCALE_MASK;
4490                                         /* in case of a retrans SYN|ACK */
4491                                         dst->wscale = 0;
4492                                 }
4493                         }
4494                 }
4495                 if (th->th_flags & TH_FIN)
4496                         end++;
4497
4498                 src->seqlo = seq;
4499                 if (src->state < TCPS_SYN_SENT)
4500                         src->state = TCPS_SYN_SENT;
4501
4502                 /*
4503                  * May need to slide the window (seqhi may have been set by
4504                  * the crappy stack check or if we picked up the connection
4505                  * after establishment)
4506                  */
4507                 if (src->seqhi == 1 ||
4508                     SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
4509                         src->seqhi = end + MAX(1, dst->max_win << dws);
4510                 if (win > src->max_win)
4511                         src->max_win = win;
4512
4513         } else {
4514                 ack = ntohl(th->th_ack) - dst->seqdiff;
4515                 if (src->seqdiff) {
4516                         /* Modulate sequence numbers */
4517                         pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
4518                             src->seqdiff), 0);
4519                         pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
4520                         copyback = 1;
4521                 }
4522                 end = seq + pd->p_len;
4523                 if (th->th_flags & TH_SYN)
4524                         end++;
4525                 if (th->th_flags & TH_FIN)
4526                         end++;
4527         }
4528
4529         if ((th->th_flags & TH_ACK) == 0) {
4530                 /* Let it pass through the ack skew check */
4531                 ack = dst->seqlo;
4532         } else if ((ack == 0 &&
4533             (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
4534             /* broken tcp stacks do not set ack */
4535             (dst->state < TCPS_SYN_SENT)) {
4536                 /*
4537                  * Many stacks (ours included) will set the ACK number in an
4538                  * FIN|ACK if the SYN times out -- no sequence to ACK.
4539                  */
4540                 ack = dst->seqlo;
4541         }
4542
4543         if (seq == end) {
4544                 /* Ease sequencing restrictions on no data packets */
4545                 seq = src->seqlo;
4546                 end = seq;
4547         }
4548
4549         ackskew = dst->seqlo - ack;
4550
4551
4552         /*
4553          * Need to demodulate the sequence numbers in any TCP SACK options
4554          * (Selective ACK). We could optionally validate the SACK values
4555          * against the current ACK window, either forwards or backwards, but
4556          * I'm not confident that SACK has been implemented properly
4557          * everywhere. It wouldn't surprise me if several stacks accidently
4558          * SACK too far backwards of previously ACKed data. There really aren't
4559          * any security implications of bad SACKing unless the target stack
4560          * doesn't validate the option length correctly. Someone trying to
4561          * spoof into a TCP connection won't bother blindly sending SACK
4562          * options anyway.
4563          */
4564         if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) {
4565                 if (pf_modulate_sack(m, off, pd, th, dst))
4566                         copyback = 1;
4567         }
4568
4569
4570 #define MAXACKWINDOW (0xffff + 1500)    /* 1500 is an arbitrary fudge factor */
4571         if (SEQ_GEQ(src->seqhi, end) &&
4572             /* Last octet inside other's window space */
4573             SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
4574             /* Retrans: not more than one window back */
4575             (ackskew >= -MAXACKWINDOW) &&
4576             /* Acking not more than one reassembled fragment backwards */
4577             (ackskew <= (MAXACKWINDOW << sws)) &&
4578             /* Acking not more than one window forward */
4579             ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
4580             (orig_seq == src->seqlo + 1) || (pd->flags & PFDESC_IP_REAS) == 0)) {
4581             /* Require an exact/+1 sequence match on resets when possible */
4582
4583                 if (dst->scrub || src->scrub) {
4584                         if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
4585                             *state, src, dst, &copyback))
4586                                 return (PF_DROP);
4587                 }
4588
4589                 /* update max window */
4590                 if (src->max_win < win)
4591                         src->max_win = win;
4592                 /* synchronize sequencing */
4593                 if (SEQ_GT(end, src->seqlo))
4594                         src->seqlo = end;
4595                 /* slide the window of what the other end can send */
4596                 if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
4597                         dst->seqhi = ack + MAX((win << sws), 1);
4598
4599
4600                 /* update states */
4601                 if (th->th_flags & TH_SYN)
4602                         if (src->state < TCPS_SYN_SENT)
4603                                 src->state = TCPS_SYN_SENT;
4604                 if (th->th_flags & TH_FIN)
4605                         if (src->state < TCPS_CLOSING)
4606                                 src->state = TCPS_CLOSING;
4607                 if (th->th_flags & TH_ACK) {
4608                         if (dst->state == TCPS_SYN_SENT) {
4609                                 dst->state = TCPS_ESTABLISHED;
4610                                 if (src->state == TCPS_ESTABLISHED &&
4611                                     (*state)->src_node != NULL &&
4612                                     pf_src_connlimit(state)) {
4613                                         REASON_SET(reason, PFRES_SRCLIMIT);
4614                                         return (PF_DROP);
4615                                 }
4616                         } else if (dst->state == TCPS_CLOSING)
4617                                 dst->state = TCPS_FIN_WAIT_2;
4618                 }
4619                 if (th->th_flags & TH_RST)
4620                         src->state = dst->state = TCPS_TIME_WAIT;
4621
4622                 /* update expire time */
4623                 (*state)->expire = time_second;
4624                 if (src->state >= TCPS_FIN_WAIT_2 &&
4625                     dst->state >= TCPS_FIN_WAIT_2)
4626                         (*state)->timeout = PFTM_TCP_CLOSED;
4627                 else if (src->state >= TCPS_CLOSING &&
4628                     dst->state >= TCPS_CLOSING)
4629                         (*state)->timeout = PFTM_TCP_FIN_WAIT;
4630                 else if (src->state < TCPS_ESTABLISHED ||
4631                     dst->state < TCPS_ESTABLISHED)
4632                         (*state)->timeout = PFTM_TCP_OPENING;
4633                 else if (src->state >= TCPS_CLOSING ||
4634                     dst->state >= TCPS_CLOSING)
4635                         (*state)->timeout = PFTM_TCP_CLOSING;
4636                 else
4637                         (*state)->timeout = PFTM_TCP_ESTABLISHED;
4638
4639                 /* Fall through to PASS packet */
4640
4641         } else if ((dst->state < TCPS_SYN_SENT ||
4642                 dst->state >= TCPS_FIN_WAIT_2 ||
4643                 src->state >= TCPS_FIN_WAIT_2) &&
4644             SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
4645             /* Within a window forward of the originating packet */
4646             SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
4647             /* Within a window backward of the originating packet */
4648
4649                 /*
4650                  * This currently handles three situations:
4651                  *  1) Stupid stacks will shotgun SYNs before their peer
4652                  *     replies.
4653                  *  2) When PF catches an already established stream (the
4654                  *     firewall rebooted, the state table was flushed, routes
4655                  *     changed...)
4656                  *  3) Packets get funky immediately after the connection
4657                  *     closes (this should catch Solaris spurious ACK|FINs
4658                  *     that web servers like to spew after a close)
4659                  *
4660                  * This must be a little more careful than the above code
4661                  * since packet floods will also be caught here. We don't
4662                  * update the TTL here to mitigate the damage of a packet
4663                  * flood and so the same code can handle awkward establishment
4664                  * and a loosened connection close.
4665                  * In the establishment case, a correct peer response will
4666                  * validate the connection, go through the normal state code
4667                  * and keep updating the state TTL.
4668                  */
4669
4670                 if (pf_status.debug >= PF_DEBUG_MISC) {
4671                         kprintf("pf: loose state match: ");
4672                         pf_print_state(*state);
4673                         pf_print_flags(th->th_flags);
4674                         kprintf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
4675                             "pkts=%llu:%llu\n", seq, orig_seq, ack, pd->p_len,
4676                             ackskew, (unsigned long long)(*state)->packets[0],
4677                             (unsigned long long)(*state)->packets[1]);
4678                 }
4679
4680                 if (dst->scrub || src->scrub) {
4681                         if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
4682                             *state, src, dst, &copyback))
4683                                 return (PF_DROP);
4684                 }
4685
4686                 /* update max window */
4687                 if (src->max_win < win)
4688                         src->max_win = win;
4689                 /* synchronize sequencing */
4690                 if (SEQ_GT(end, src->seqlo))
4691                         src->seqlo = end;
4692                 /* slide the window of what the other end can send */
4693                 if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
4694                         dst->seqhi = ack + MAX((win << sws), 1);
4695
4696                 /*
4697                  * Cannot set dst->seqhi here since this could be a shotgunned
4698                  * SYN and not an already established connection.
4699                  */
4700
4701                 if (th->th_flags & TH_FIN)
4702                         if (src->state < TCPS_CLOSING)
4703                                 src->state = TCPS_CLOSING;
4704                 if (th->th_flags & TH_RST)
4705                         src->state = dst->state = TCPS_TIME_WAIT;
4706
4707                 /* Fall through to PASS packet */
4708
4709         } else if ((*state)->pickup_mode == PF_PICKUPS_HASHONLY ||
4710                     ((*state)->pickup_mode == PF_PICKUPS_ENABLED &&
4711                      ((*state)->sync_flags & PFSTATE_GOT_SYN_MASK) !=
4712                       PFSTATE_GOT_SYN_MASK)) {
4713                 /*
4714                  * If pickup mode is hash only, do not fail on sequence checks.
4715                  *
4716                  * If pickup mode is enabled and we did not see the SYN in
4717                  * both direction, do not fail on sequence checks because
4718                  * we do not have complete information on window scale.
4719                  *
4720                  * Adjust expiration and fall through to PASS packet.
4721                  * XXX Add a FIN check to reduce timeout?
4722                  */
4723                 (*state)->expire = time_second;
4724         } else  {
4725                 /*
4726                  * Failure processing
4727                  */
4728                 if ((*state)->dst.state == TCPS_SYN_SENT &&
4729                     (*state)->src.state == TCPS_SYN_SENT) {
4730                         /* Send RST for state mismatches during handshake */
4731                         if (!(th->th_flags & TH_RST))
4732                                 pf_send_tcp((*state)->rule.ptr, pd->af,
4733                                     pd->dst, pd->src, th->th_dport,
4734                                     th->th_sport, ntohl(th->th_ack), 0,
4735                                     TH_RST, 0, 0,
4736                                     (*state)->rule.ptr->return_ttl, 1, 0,
4737                                     pd->eh, kif->pfik_ifp);
4738                         src->seqlo = 0;
4739                         src->seqhi = 1;
4740                         src->max_win = 1;
4741                 } else if (pf_status.debug >= PF_DEBUG_MISC) {
4742                         kprintf("pf: BAD state: ");
4743                         pf_print_state(*state);
4744                         pf_print_flags(th->th_flags);
4745                         kprintf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
4746                             "pkts=%llu:%llu dir=%s,%s\n",
4747                             seq, orig_seq, ack, pd->p_len, ackskew,
4748                             (unsigned long long)(*state)->packets[0],
4749                                 (unsigned long long)(*state)->packets[1],
4750                             direction == PF_IN ? "in" : "out",
4751                             direction == (*state)->direction ? "fwd" : "rev");
4752                         kprintf("pf: State failure on: %c %c %c %c | %c %c\n",
4753                             SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
4754                             SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
4755                             ' ': '2',
4756                             (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
4757                             (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
4758                             SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
4759                             SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
4760                 }
4761                 REASON_SET(reason, PFRES_BADSTATE);
4762                 return (PF_DROP);
4763         }
4764
4765         /* Any packets which have gotten here are to be passed */
4766
4767         /* translate source/destination address, if necessary */
4768         if (STATE_TRANSLATE(*state)) {
4769                 if (direction == PF_OUT) {
4770                         pf_change_ap(pd->src, &th->th_sport, pd->ip_sum,
4771                             &th->th_sum, &(*state)->gwy.addr,
4772                             (*state)->gwy.port, 0, pd->af);
4773                 } else {
4774                         /*
4775                          * If we don't redispatch the packet will go into
4776                          * the protocol stack on the wrong cpu for the
4777                          * post-translated address.
4778                          */
4779                         m->m_pkthdr.fw_flags |= FW_MBUF_REDISPATCH;
4780                         pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum,
4781                             &th->th_sum, &(*state)->lan.addr,
4782                             (*state)->lan.port, 0, pd->af);
4783                 }
4784                 m_copyback(m, off, sizeof(*th), (caddr_t)th);
4785         } else if (copyback) {
4786                 /* Copyback sequence modulation or stateful scrub changes */
4787                 m_copyback(m, off, sizeof(*th), (caddr_t)th);
4788         }
4789
4790         return (PF_PASS);
4791 }
4792
4793 int
4794 pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
4795     struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
4796 {
4797         struct pf_state_peer    *src, *dst;
4798         struct pf_state_cmp      key;
4799         struct udphdr           *uh = pd->hdr.udp;
4800
4801         key.af = pd->af;
4802         key.proto = IPPROTO_UDP;
4803         if (direction == PF_IN) {
4804                 PF_ACPY(&key.ext.addr, pd->src, key.af);
4805                 PF_ACPY(&key.gwy.addr, pd->dst, key.af);
4806                 key.ext.port = uh->uh_sport;
4807                 key.gwy.port = uh->uh_dport;
4808         } else {
4809                 PF_ACPY(&key.lan.addr, pd->src, key.af);
4810                 PF_ACPY(&key.ext.addr, pd->dst, key.af);
4811                 key.lan.port = uh->uh_sport;
4812                 key.ext.port = uh->uh_dport;
4813         }
4814
4815         STATE_LOOKUP();
4816
4817         if (direction == (*state)->direction) {
4818                 src = &(*state)->src;
4819                 dst = &(*state)->dst;
4820         } else {
4821                 src = &(*state)->dst;
4822                 dst = &(*state)->src;
4823         }
4824
4825         /* update states */
4826         if (src->state < PFUDPS_SINGLE)
4827                 src->state = PFUDPS_SINGLE;
4828         if (dst->state == PFUDPS_SINGLE)
4829                 dst->state = PFUDPS_MULTIPLE;
4830
4831         /* update expire time */
4832         (*state)->expire = time_second;
4833         if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
4834                 (*state)->timeout = PFTM_UDP_MULTIPLE;
4835         else
4836                 (*state)->timeout = PFTM_UDP_SINGLE;
4837
4838         /* translate source/destination address, if necessary */
4839         if (STATE_TRANSLATE(*state)) {
4840                 if (direction == PF_OUT) {
4841                         pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum,
4842                             &uh->uh_sum, &(*state)->gwy.addr,
4843                             (*state)->gwy.port, 1, pd->af);
4844                 } else {
4845                         /*
4846                          * If we don't redispatch the packet will go into
4847                          * the protocol stack on the wrong cpu for the
4848                          * post-translated address.
4849                          */
4850                         m->m_pkthdr.fw_flags |= FW_MBUF_REDISPATCH;
4851                         pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum,
4852                             &uh->uh_sum, &(*state)->lan.addr,
4853                             (*state)->lan.port, 1, pd->af);
4854                 }
4855                 m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
4856         }
4857
4858         return (PF_PASS);
4859 }
4860
4861 int
4862 pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
4863     struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason)
4864 {
4865         struct pf_addr  *saddr = pd->src, *daddr = pd->dst;
4866         u_int16_t        icmpid = 0, *icmpsum;
4867         u_int8_t         icmptype;
4868         int              state_icmp = 0;
4869         struct pf_state_cmp key;
4870
4871         switch (pd->proto) {
4872 #ifdef INET
4873         case IPPROTO_ICMP:
4874                 icmptype = pd->hdr.icmp->icmp_type;
4875                 icmpid = pd->hdr.icmp->icmp_id;
4876                 icmpsum = &pd->hdr.icmp->icmp_cksum;
4877
4878                 if (icmptype == ICMP_UNREACH ||
4879                     icmptype == ICMP_SOURCEQUENCH ||
4880                     icmptype == ICMP_REDIRECT ||
4881                     icmptype == ICMP_TIMXCEED ||
4882                     icmptype == ICMP_PARAMPROB)
4883                         state_icmp++;
4884                 break;
4885 #endif /* INET */
4886 #ifdef INET6
4887         case IPPROTO_ICMPV6:
4888                 icmptype = pd->hdr.icmp6->icmp6_type;
4889                 icmpid = pd->hdr.icmp6->icmp6_id;
4890                 icmpsum = &pd->hdr.icmp6->icmp6_cksum;
4891
4892                 if (icmptype == ICMP6_DST_UNREACH ||
4893                     icmptype == ICMP6_PACKET_TOO_BIG ||
4894                     icmptype == ICMP6_TIME_EXCEEDED ||
4895                     icmptype == ICMP6_PARAM_PROB)
4896                         state_icmp++;
4897                 break;
4898 #endif /* INET6 */
4899         }
4900
4901         if (!state_icmp) {
4902
4903                 /*
4904                  * ICMP query/reply message not related to a TCP/UDP packet.
4905                  * Search for an ICMP state.
4906                  */
4907                 key.af = pd->af;
4908                 key.proto = pd->proto;
4909                 if (direction == PF_IN) {
4910                         PF_ACPY(&key.ext.addr, pd->src, key.af);
4911                         PF_ACPY(&key.gwy.addr, pd->dst, key.af);
4912                         key.ext.port = 0;
4913                         key.gwy.port = icmpid;
4914                 } else {
4915                         PF_ACPY(&key.lan.addr, pd->src, key.af);
4916                         PF_ACPY(&key.ext.addr, pd->dst, key.af);
4917                         key.lan.port = icmpid;
4918                         key.ext.port = 0;
4919                 }
4920
4921                 STATE_LOOKUP();
4922
4923                 (*state)->expire = time_second;
4924                 (*state)->timeout = PFTM_ICMP_ERROR_REPLY;
4925
4926                 /* translate source/destination address, if necessary */
4927                 if (STATE_TRANSLATE(*state)) {
4928                         if (direction == PF_OUT) {
4929                                 switch (pd->af) {
4930 #ifdef INET
4931                                 case AF_INET:
4932                                         pf_change_a(&saddr->v4.s_addr,
4933                                             pd->ip_sum,
4934                                             (*state)->gwy.addr.v4.s_addr, 0);
4935                                         pd->hdr.icmp->icmp_cksum =
4936                                             pf_cksum_fixup(
4937                                             pd->hdr.icmp->icmp_cksum, icmpid,
4938                                             (*state)->gwy.port, 0);
4939                                         pd->hdr.icmp->icmp_id =
4940                                             (*state)->gwy.port;
4941                                         m_copyback(m, off, ICMP_MINLEN,
4942                                             (caddr_t)pd->hdr.icmp);
4943                                         break;
4944 #endif /* INET */
4945 #ifdef INET6
4946                                 case AF_INET6:
4947                                         pf_change_a6(saddr,
4948                                             &pd->hdr.icmp6->icmp6_cksum,
4949                                             &(*state)->gwy.addr, 0);
4950                                         m_copyback(m, off,
4951                                             sizeof(struct icmp6_hdr),
4952                                             (caddr_t)pd->hdr.icmp6);
4953                                         break;
4954 #endif /* INET6 */
4955                                 }
4956                         } else {
4957                                 switch (pd->af) {
4958 #ifdef INET
4959                                 case AF_INET:
4960                                         pf_change_a(&daddr->v4.s_addr,
4961                                             pd->ip_sum,
4962                                             (*state)->lan.addr.v4.s_addr, 0);
4963                                         pd->hdr.icmp->icmp_cksum =
4964                                             pf_cksum_fixup(
4965                                             pd->hdr.icmp->icmp_cksum, icmpid,
4966                                             (*state)->lan.port, 0);
4967                                         pd->hdr.icmp->icmp_id =
4968                                             (*state)->lan.port;
4969                                         m_copyback(m, off, ICMP_MINLEN,
4970                                             (caddr_t)pd->hdr.icmp);
4971                                         break;
4972 #endif /* INET */
4973 #ifdef INET6
4974                                 case AF_INET6:
4975                                         pf_change_a6(daddr,
4976                                             &pd->hdr.icmp6->icmp6_cksum,
4977                                             &(*state)->lan.addr, 0);
4978                                         m_copyback(m, off,
4979                                             sizeof(struct icmp6_hdr),
4980                                             (caddr_t)pd->hdr.icmp6);
4981                                         break;
4982 #endif /* INET6 */
4983                                 }
4984                         }
4985                 }
4986
4987                 return (PF_PASS);
4988
4989         } else {
4990                 /*
4991                  * ICMP error message in response to a TCP/UDP packet.
4992                  * Extract the inner TCP/UDP header and search for that state.
4993                  */
4994
4995                 struct pf_pdesc pd2;
4996 #ifdef INET
4997                 struct ip       h2;
4998 #endif /* INET */
4999 #ifdef INET6
5000                 struct ip6_hdr  h2_6;
5001                 int             terminal = 0;
5002 #endif /* INET6 */
5003                 int             ipoff2;
5004                 int             off2;
5005
5006                 pd2.af = pd->af;
5007                 switch (pd->af) {
5008 #ifdef INET
5009                 case AF_INET:
5010                         /* offset of h2 in mbuf chain */
5011                         ipoff2 = off + ICMP_MINLEN;
5012
5013                         if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2),
5014                             NULL, reason, pd2.af)) {
5015                                 DPFPRINTF(PF_DEBUG_MISC,
5016                                     ("pf: ICMP error message too short "
5017                                     "(ip)\n"));
5018                                 return (PF_DROP);
5019                         }
5020                         /*
5021                          * ICMP error messages don't refer to non-first
5022                          * fragments
5023                          */
5024                         if (h2.ip_off & htons(IP_OFFMASK)) {
5025                                 REASON_SET(reason, PFRES_FRAG);
5026                                 return (PF_DROP);
5027                         }
5028
5029                         /* offset of protocol header that follows h2 */
5030                         off2 = ipoff2 + (h2.ip_hl << 2);
5031
5032                         pd2.proto = h2.ip_p;
5033                         pd2.src = (struct pf_addr *)&h2.ip_src;
5034                         pd2.dst = (struct pf_addr *)&h2.ip_dst;
5035                         pd2.ip_sum = &h2.ip_sum;
5036                         break;
5037 #endif /* INET */
5038 #ifdef INET6
5039                 case AF_INET6:
5040                         ipoff2 = off + sizeof(struct icmp6_hdr);
5041
5042                         if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6),
5043                             NULL, reason, pd2.af)) {
5044                                 DPFPRINTF(PF_DEBUG_MISC,
5045                                     ("pf: ICMP error message too short "
5046                                     "(ip6)\n"));
5047                                 return (PF_DROP);
5048                         }
5049                         pd2.proto = h2_6.ip6_nxt;
5050                         pd2.src = (struct pf_addr *)&h2_6.ip6_src;
5051                         pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
5052                         pd2.ip_sum = NULL;
5053                         off2 = ipoff2 + sizeof(h2_6);
5054                         do {
5055                                 switch (pd2.proto) {
5056                                 case IPPROTO_FRAGMENT:
5057                                         /*
5058                                          * ICMPv6 error messages for
5059                                          * non-first fragments
5060                                          */
5061                                         REASON_SET(reason, PFRES_FRAG);
5062                                         return (PF_DROP);
5063                                 case IPPROTO_AH:
5064                                 case IPPROTO_HOPOPTS:
5065                                 case IPPROTO_ROUTING:
5066                                 case IPPROTO_DSTOPTS: {
5067                                         /* get next header and header length */
5068                                         struct ip6_ext opt6;
5069
5070                                         if (!pf_pull_hdr(m, off2, &opt6,
5071                                             sizeof(opt6), NULL, reason,
5072                                             pd2.af)) {
5073                                                 DPFPRINTF(PF_DEBUG_MISC,
5074                                                     ("pf: ICMPv6 short opt\n"));
5075                                                 return (PF_DROP);
5076                                         }
5077                                         if (pd2.proto == IPPROTO_AH)
5078                                                 off2 += (opt6.ip6e_len + 2) * 4;
5079                                         else
5080                                                 off2 += (opt6.ip6e_len + 1) * 8;
5081                                         pd2.proto = opt6.ip6e_nxt;
5082                                         /* goto the next header */
5083                                         break;
5084                                 }
5085                                 default:
5086                                         terminal++;
5087                                         break;
5088                                 }
5089                         } while (!terminal);
5090                         break;
5091 #endif /* INET6 */
5092                 default:
5093                         DPFPRINTF(PF_DEBUG_MISC,
5094                             ("pf: ICMP AF %d unknown (ip6)\n", pd->af));
5095                         return (PF_DROP);
5096                         break;
5097                 }
5098
5099                 switch (pd2.proto) {
5100                 case IPPROTO_TCP: {
5101                         struct tcphdr            th;
5102                         u_int32_t                seq;
5103                         struct pf_state_cmp              key;
5104                         struct pf_state_peer    *src, *dst;
5105                         u_int8_t                 dws;
5106                         int                      copyback = 0;
5107
5108                         /*
5109                          * Only the first 8 bytes of the TCP header can be
5110                          * expected. Don't access any TCP header fields after
5111                          * th_seq, an ackskew test is not possible.
5112                          */
5113                         if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason,
5114                             pd2.af)) {
5115                                 DPFPRINTF(PF_DEBUG_MISC,
5116                                     ("pf: ICMP error message too short "
5117                                     "(tcp)\n"));
5118                                 return (PF_DROP);
5119                         }
5120
5121                         key.af = pd2.af;
5122                         key.proto = IPPROTO_TCP;
5123                         if (direction == PF_IN) {
5124                                 PF_ACPY(&key.ext.addr, pd2.dst, key.af);
5125                                 PF_ACPY(&key.gwy.addr, pd2.src, key.af);
5126                                 key.ext.port = th.th_dport;
5127                                 key.gwy.port = th.th_sport;
5128                         } else {
5129                                 PF_ACPY(&key.lan.addr, pd2.dst, key.af);
5130                                 PF_ACPY(&key.ext.addr, pd2.src, key.af);
5131                                 key.lan.port = th.th_dport;
5132                                 key.ext.port = th.th_sport;
5133                         }
5134
5135                         STATE_LOOKUP();
5136
5137                         if (direction == (*state)->direction) {
5138                                 src = &(*state)->dst;
5139                                 dst = &(*state)->src;
5140                         } else {
5141                                 src = &(*state)->src;
5142                                 dst = &(*state)->dst;
5143                         }
5144
5145                         if (src->wscale && dst->wscale &&
5146                             !(th.th_flags & TH_SYN))
5147                                 dws = dst->wscale & PF_WSCALE_MASK;
5148                         else
5149                                 dws = 0;
5150
5151                         /* Demodulate sequence number */
5152                         seq = ntohl(th.th_seq) - src->seqdiff;
5153                         if (src->seqdiff) {
5154                                 pf_change_a(&th.th_seq, icmpsum,
5155                                     htonl(seq), 0);
5156                                 copyback = 1;
5157                         }
5158
5159                         if (!SEQ_GEQ(src->seqhi, seq) ||
5160                             !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws))) {
5161                                 if (pf_status.debug >= PF_DEBUG_MISC) {
5162                                         kprintf("pf: BAD ICMP %d:%d ",
5163                                             icmptype, pd->hdr.icmp->icmp_code);
5164                                         pf_print_host(pd->src, 0, pd->af);
5165                                         kprintf(" -> ");
5166                                         pf_print_host(pd->dst, 0, pd->af);
5167                                         kprintf(" state: ");
5168                                         pf_print_state(*state);
5169                                         kprintf(" seq=%u\n", seq);
5170                                 }
5171                                 REASON_SET(reason, PFRES_BADSTATE);
5172                                 return (PF_DROP);
5173                         }
5174
5175                         if (STATE_TRANSLATE(*state)) {
5176                                 if (direction == PF_IN) {
5177                                         pf_change_icmp(pd2.src, &th.th_sport,
5178                                             daddr, &(*state)->lan.addr,
5179                                             (*state)->lan.port, NULL,
5180                                             pd2.ip_sum, icmpsum,
5181                                             pd->ip_sum, 0, pd2.af);
5182                                 } else {
5183                                         pf_change_icmp(pd2.dst, &th.th_dport,
5184                                             saddr, &(*state)->gwy.addr,
5185                                             (*state)->gwy.port, NULL,
5186                                             pd2.ip_sum, icmpsum,
5187                                             pd->ip_sum, 0, pd2.af);
5188                                 }
5189                                 copyback = 1;
5190                         }
5191
5192                         if (copyback) {
5193                                 switch (pd2.af) {
5194 #ifdef INET
5195                                 case AF_INET:
5196                                         m_copyback(m, off, ICMP_MINLEN,
5197                                             (caddr_t)pd->hdr.icmp);
5198                                         m_copyback(m, ipoff2, sizeof(h2),
5199                                             (caddr_t)&h2);
5200                                         break;
5201 #endif /* INET */
5202 #ifdef INET6
5203                                 case AF_INET6:
5204                                         m_copyback(m, off,
5205                                             sizeof(struct icmp6_hdr),
5206                                             (caddr_t)pd->hdr.icmp6);
5207                                         m_copyback(m, ipoff2, sizeof(h2_6),
5208                                             (caddr_t)&h2_6);
5209                                         break;
5210 #endif /* INET6 */
5211                                 }
5212                                 m_copyback(m, off2, 8, (caddr_t)&th);
5213                         }
5214
5215                         return (PF_PASS);
5216                         break;
5217                 }
5218                 case IPPROTO_UDP: {
5219                         struct udphdr           uh;
5220
5221                         if (!pf_pull_hdr(m, off2, &uh, sizeof(uh),
5222                             NULL, reason, pd2.af)) {
5223                                 DPFPRINTF(PF_DEBUG_MISC,
5224                                     ("pf: ICMP error message too short "
5225                                     "(udp)\n"));
5226                                 return (PF_DROP);
5227                         }
5228
5229                         key.af = pd2.af;
5230                         key.proto = IPPROTO_UDP;
5231                         if (direction == PF_IN) {
5232                                 PF_ACPY(&key.ext.addr, pd2.dst, key.af);
5233                                 PF_ACPY(&key.gwy.addr, pd2.src, key.af);
5234                                 key.ext.port = uh.uh_dport;
5235                                 key.gwy.port = uh.uh_sport;
5236                         } else {
5237                                 PF_ACPY(&key.lan.addr, pd2.dst, key.af);
5238                                 PF_ACPY(&key.ext.addr, pd2.src, key.af);
5239                                 key.lan.port = uh.uh_dport;
5240                                 key.ext.port = uh.uh_sport;
5241                         }
5242
5243                         STATE_LOOKUP();
5244
5245                         if (STATE_TRANSLATE(*state)) {
5246                                 if (direction == PF_IN) {
5247                                         pf_change_icmp(pd2.src, &uh.uh_sport,
5248                                             daddr, &(*state)->lan.addr,
5249                                             (*state)->lan.port, &uh.uh_sum,
5250                                             pd2.ip_sum, icmpsum,
5251                                             pd->ip_sum, 1, pd2.af);
5252                                 } else {
5253                                         pf_change_icmp(pd2.dst, &uh.uh_dport,
5254                                             saddr, &(*state)->gwy.addr,
5255                                             (*state)->gwy.port, &uh.uh_sum,
5256                                             pd2.ip_sum, icmpsum,
5257                                             pd->ip_sum, 1, pd2.af);
5258                                 }
5259                                 switch (pd2.af) {
5260 #ifdef INET
5261                                 case AF_INET:
5262                                         m_copyback(m, off, ICMP_MINLEN,
5263                                             (caddr_t)pd->hdr.icmp);
5264                                         m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
5265                                         break;
5266 #endif /* INET */
5267 #ifdef INET6
5268                                 case AF_INET6:
5269                                         m_copyback(m, off,
5270                                             sizeof(struct icmp6_hdr),
5271                                             (caddr_t)pd->hdr.icmp6);
5272                                         m_copyback(m, ipoff2, sizeof(h2_6),
5273                                             (caddr_t)&h2_6);
5274                                         break;
5275 #endif /* INET6 */
5276                                 }
5277                                 m_copyback(m, off2, sizeof(uh), (caddr_t)&uh);
5278                         }
5279
5280                         return (PF_PASS);
5281                         break;
5282                 }
5283 #ifdef INET
5284                 case IPPROTO_ICMP: {
5285                         struct icmp             iih;
5286
5287                         if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN,
5288                             NULL, reason, pd2.af)) {
5289                                 DPFPRINTF(PF_DEBUG_MISC,
5290                                     ("pf: ICMP error message too short i"
5291                                     "(icmp)\n"));
5292                                 return (PF_DROP);
5293                         }
5294
5295                         key.af = pd2.af;
5296                         key.proto = IPPROTO_ICMP;
5297                         if (direction == PF_IN) {
5298                                 PF_ACPY(&key.ext.addr, pd2.dst, key.af);
5299                                 PF_ACPY(&key.gwy.addr, pd2.src, key.af);
5300                                 key.ext.port = 0;
5301                                 key.gwy.port = iih.icmp_id;
5302                         } else {
5303                                 PF_ACPY(&key.lan.addr, pd2.dst, key.af);
5304                                 PF_ACPY(&key.ext.addr, pd2.src, key.af);
5305                                 key.lan.port = iih.icmp_id;
5306                                 key.ext.port = 0;
5307                         }
5308
5309                         STATE_LOOKUP();
5310
5311                         if (STATE_TRANSLATE(*state)) {
5312                                 if (direction == PF_IN) {
5313                                         pf_change_icmp(pd2.src, &iih.icmp_id,
5314                                             daddr, &(*state)->lan.addr,
5315                                             (*state)->lan.port, NULL,
5316                                             pd2.ip_sum, icmpsum,
5317                                             pd->ip_sum, 0, AF_INET);
5318                                 } else {
5319                                         pf_change_icmp(pd2.dst, &iih.icmp_id,
5320                                             saddr, &(*state)->gwy.addr,
5321                                             (*state)->gwy.port, NULL,
5322                                             pd2.ip_sum, icmpsum,
5323                                             pd->ip_sum, 0, AF_INET);
5324                                 }
5325                                 m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
5326                                 m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
5327                                 m_copyback(m, off2, ICMP_MINLEN, (caddr_t)&iih);
5328                         }
5329
5330                         return (PF_PASS);
5331                         break;
5332                 }
5333 #endif /* INET */
5334 #ifdef INET6
5335                 case IPPROTO_ICMPV6: {
5336                         struct icmp6_hdr        iih;
5337
5338                         if (!pf_pull_hdr(m, off2, &iih,
5339                             sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
5340                                 DPFPRINTF(PF_DEBUG_MISC,
5341                                     ("pf: ICMP error message too short "
5342                                     "(icmp6)\n"));
5343                                 return (PF_DROP);
5344                         }
5345
5346                         key.af = pd2.af;
5347                         key.proto = IPPROTO_ICMPV6;
5348                         if (direction == PF_IN) {
5349                                 PF_ACPY(&key.ext.addr, pd2.dst, key.af);
5350                                 PF_ACPY(&key.gwy.addr, pd2.src, key.af);
5351                                 key.ext.port = 0;
5352                                 key.gwy.port = iih.icmp6_id;
5353                         } else {
5354                                 PF_ACPY(&key.lan.addr, pd2.dst, key.af);
5355                                 PF_ACPY(&key.ext.addr, pd2.src, key.af);
5356                                 key.lan.port = iih.icmp6_id;
5357                                 key.ext.port = 0;
5358                         }
5359
5360                         STATE_LOOKUP();
5361
5362                         if (STATE_TRANSLATE(*state)) {
5363                                 if (direction == PF_IN) {
5364                                         pf_change_icmp(pd2.src, &iih.icmp6_id,
5365                                             daddr, &(*state)->lan.addr,
5366                                             (*state)->lan.port, NULL,
5367                                             pd2.ip_sum, icmpsum,
5368                                             pd->ip_sum, 0, AF_INET6);
5369                                 } else {
5370                                         pf_change_icmp(pd2.dst, &iih.icmp6_id,
5371                                             saddr, &(*state)->gwy.addr,
5372                                             (*state)->gwy.port, NULL,
5373                                             pd2.ip_sum, icmpsum,
5374                                             pd->ip_sum, 0, AF_INET6);
5375                                 }
5376                                 m_copyback(m, off, sizeof(struct icmp6_hdr),
5377                                     (caddr_t)pd->hdr.icmp6);
5378                                 m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6);
5379                                 m_copyback(m, off2, sizeof(struct icmp6_hdr),
5380                                     (caddr_t)&iih);
5381                         }
5382
5383                         return (PF_PASS);
5384                         break;
5385                 }
5386 #endif /* INET6 */
5387                 default: {
5388                         key.af = pd2.af;
5389                         key.proto = pd2.proto;
5390                         if (direction == PF_IN) {
5391                                 PF_ACPY(&key.ext.addr, pd2.dst, key.af);
5392                                 PF_ACPY(&key.gwy.addr, pd2.src, key.af);
5393                                 key.ext.port = 0;
5394                                 key.gwy.port = 0;
5395                         } else {
5396                                 PF_ACPY(&key.lan.addr, pd2.dst, key.af);
5397                                 PF_ACPY(&key.ext.addr, pd2.src, key.af);
5398                                 key.lan.port = 0;
5399                                 key.ext.port = 0;
5400                         }
5401
5402                         STATE_LOOKUP();
5403
5404                         if (STATE_TRANSLATE(*state)) {
5405                                 if (direction == PF_IN) {
5406                                         pf_change_icmp(pd2.src, NULL,
5407                                             daddr, &(*state)->lan.addr,
5408                                             0, NULL,
5409                                             pd2.ip_sum, icmpsum,
5410                                             pd->ip_sum, 0, pd2.af);
5411                                 } else {
5412                                         pf_change_icmp(pd2.dst, NULL,
5413                                             saddr, &(*state)->gwy.addr,
5414                                             0, NULL,
5415                                             pd2.ip_sum, icmpsum,
5416                                             pd->ip_sum, 0, pd2.af);
5417                                 }
5418                                 switch (pd2.af) {
5419 #ifdef INET
5420                                 case AF_INET:
5421                                         m_copyback(m, off, ICMP_MINLEN,
5422                                             (caddr_t)pd->hdr.icmp);
5423                                         m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
5424                                         break;
5425 #endif /* INET */
5426 #ifdef INET6
5427                                 case AF_INET6:
5428                                         m_copyback(m, off,
5429                                             sizeof(struct icmp6_hdr),
5430                                             (caddr_t)pd->hdr.icmp6);
5431                                         m_copyback(m, ipoff2, sizeof(h2_6),
5432                                             (caddr_t)&h2_6);
5433                                         break;
5434 #endif /* INET6 */
5435                                 }
5436                         }
5437
5438                         return (PF_PASS);
5439                         break;
5440                 }
5441                 }
5442         }
5443 }
5444
5445 int
5446 pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
5447     struct pf_pdesc *pd)
5448 {
5449         struct pf_state_peer    *src, *dst;
5450         struct pf_state_cmp      key;
5451
5452         key.af = pd->af;
5453         key.proto = pd->proto;
5454         if (direction == PF_IN) {
5455                 PF_ACPY(&key.ext.addr, pd->src, key.af);
5456                 PF_ACPY(&key.gwy.addr, pd->dst, key.af);
5457                 key.ext.port = 0;
5458                 key.gwy.port = 0;
5459         } else {
5460                 PF_ACPY(&key.lan.addr, pd->src, key.af);
5461                 PF_ACPY(&key.ext.addr, pd->dst, key.af);
5462                 key.lan.port = 0;
5463                 key.ext.port = 0;
5464         }
5465
5466         STATE_LOOKUP();
5467
5468         if (direction == (*state)->direction) {
5469                 src = &(*state)->src;
5470                 dst = &(*state)->dst;
5471         } else {
5472                 src = &(*state)->dst;
5473                 dst = &(*state)->src;
5474         }
5475
5476         /* update states */
5477         if (src->state < PFOTHERS_SINGLE)
5478                 src->state = PFOTHERS_SINGLE;
5479         if (dst->state == PFOTHERS_SINGLE)
5480                 dst->state = PFOTHERS_MULTIPLE;
5481
5482         /* update expire time */
5483         (*state)->expire = time_second;
5484         if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
5485                 (*state)->timeout = PFTM_OTHER_MULTIPLE;
5486         else
5487                 (*state)->timeout = PFTM_OTHER_SINGLE;
5488
5489         /* translate source/destination address, if necessary */
5490         if (STATE_TRANSLATE(*state)) {
5491                 if (direction == PF_OUT)
5492                         switch (pd->af) {
5493 #ifdef INET
5494                         case AF_INET:
5495                                 pf_change_a(&pd->src->v4.s_addr,
5496                                     pd->ip_sum, (*state)->gwy.addr.v4.s_addr,
5497                                     0);
5498                                 break;
5499 #endif /* INET */
5500 #ifdef INET6
5501                         case AF_INET6:
5502                                 PF_ACPY(pd->src, &(*state)->gwy.addr, pd->af);
5503                                 break;
5504 #endif /* INET6 */
5505                         }
5506                 else
5507                         switch (pd->af) {
5508 #ifdef INET
5509                         case AF_INET:
5510                                 pf_change_a(&pd->dst->v4.s_addr,
5511                                     pd->ip_sum, (*state)->lan.addr.v4.s_addr,
5512                                     0);
5513                                 break;
5514 #endif /* INET */
5515 #ifdef INET6
5516                         case AF_INET6:
5517                                 PF_ACPY(pd->dst, &(*state)->lan.addr, pd->af);
5518                                 break;
5519 #endif /* INET6 */
5520                         }
5521         }
5522
5523         return (PF_PASS);
5524 }
5525
5526 /*
5527  * ipoff and off are measured from the start of the mbuf chain.
5528  * h must be at "ipoff" on the mbuf chain.
5529  */
5530 void *
5531 pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
5532     u_short *actionp, u_short *reasonp, sa_family_t af)
5533 {
5534         switch (af) {
5535 #ifdef INET
5536         case AF_INET: {
5537                 struct ip       *h = mtod(m, struct ip *);
5538                 u_int16_t        fragoff = (h->ip_off & IP_OFFMASK) << 3;
5539
5540                 if (fragoff) {
5541                         if (fragoff >= len)
5542                                 ACTION_SET(actionp, PF_PASS);
5543                         else {
5544                                 ACTION_SET(actionp, PF_DROP);
5545                                 REASON_SET(reasonp, PFRES_FRAG);
5546                         }
5547                         return (NULL);
5548                 }
5549                 if (m->m_pkthdr.len < off + len ||
5550                     h->ip_len < off + len) {
5551                         ACTION_SET(actionp, PF_DROP);
5552                         REASON_SET(reasonp, PFRES_SHORT);
5553                         return (NULL);
5554                 }
5555                 break;
5556         }
5557 #endif /* INET */
5558 #ifdef INET6
5559         case AF_INET6: {
5560                 struct ip6_hdr  *h = mtod(m, struct ip6_hdr *);
5561
5562                 if (m->m_pkthdr.len < off + len ||
5563                     (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
5564                     (unsigned)(off + len)) {
5565                         ACTION_SET(actionp, PF_DROP);
5566                         REASON_SET(reasonp, PFRES_SHORT);
5567                         return (NULL);
5568                 }
5569                 break;
5570         }
5571 #endif /* INET6 */
5572         }
5573         m_copydata(m, off, len, p);
5574         return (p);
5575 }
5576
5577 int
5578 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif)
5579 {
5580         struct sockaddr_in      *dst;
5581         int                      ret = 1;
5582         int                      check_mpath;
5583 #ifdef INET6
5584         struct sockaddr_in6     *dst6;
5585         struct route_in6         ro;
5586 #else
5587         struct route             ro;
5588 #endif
5589         struct radix_node       *rn;
5590         struct rtentry          *rt;
5591         struct ifnet            *ifp;
5592
5593         check_mpath = 0;
5594         bzero(&ro, sizeof(ro));
5595         switch (af) {
5596         case AF_INET:
5597                 dst = satosin(&ro.ro_dst);
5598                 dst->sin_family = AF_INET;
5599                 dst->sin_len = sizeof(*dst);
5600                 dst->sin_addr = addr->v4;
5601                 break;
5602 #ifdef INET6
5603         case AF_INET6:
5604                 dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
5605                 dst6->sin6_family = AF_INET6;
5606                 dst6->sin6_len = sizeof(*dst6);
5607                 dst6->sin6_addr = addr->v6;
5608                 break;
5609 #endif /* INET6 */
5610         default:
5611                 return (0);
5612         }
5613
5614         /* Skip checks for ipsec interfaces */
5615         if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
5616                 goto out;
5617
5618         rtalloc_ign((struct route *)&ro, 0);
5619
5620         if (ro.ro_rt != NULL) {
5621                 /* No interface given, this is a no-route check */
5622                 if (kif == NULL)
5623                         goto out;
5624
5625                 if (kif->pfik_ifp == NULL) {
5626                         ret = 0;
5627                         goto out;
5628                 }
5629
5630                 /* Perform uRPF check if passed input interface */
5631                 ret = 0;
5632                 rn = (struct radix_node *)ro.ro_rt;
5633                 do {
5634                         rt = (struct rtentry *)rn;
5635                         ifp = rt->rt_ifp;
5636
5637                         if (kif->pfik_ifp == ifp)
5638                                 ret = 1;
5639                         rn = NULL;
5640                 } while (check_mpath == 1 && rn != NULL && ret == 0);
5641         } else
5642                 ret = 0;
5643 out:
5644         if (ro.ro_rt != NULL)
5645                 RTFREE(ro.ro_rt);
5646         return (ret);
5647 }
5648
5649 int
5650 pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw)
5651 {
5652         struct sockaddr_in      *dst;
5653 #ifdef INET6
5654         struct sockaddr_in6     *dst6;
5655         struct route_in6         ro;
5656 #else
5657         struct route             ro;
5658 #endif
5659         int                      ret = 0;
5660
5661         bzero(&ro, sizeof(ro));
5662         switch (af) {
5663         case AF_INET:
5664                 dst = satosin(&ro.ro_dst);
5665                 dst->sin_family = AF_INET;
5666                 dst->sin_len = sizeof(*dst);
5667                 dst->sin_addr = addr->v4;
5668                 break;
5669 #ifdef INET6
5670         case AF_INET6:
5671                 dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
5672                 dst6->sin6_family = AF_INET6;
5673                 dst6->sin6_len = sizeof(*dst6);
5674                 dst6->sin6_addr = addr->v6;
5675                 break;
5676 #endif /* INET6 */
5677         default:
5678                 return (0);
5679         }
5680
5681 rtalloc_ign((struct route *)&ro, (RTF_CLONING | RTF_PRCLONING));
5682
5683         if (ro.ro_rt != NULL) {
5684                 RTFREE(ro.ro_rt);
5685         }
5686
5687         return (ret);
5688 }
5689
5690 #ifdef INET
5691 void
5692 pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
5693     struct pf_state *s, struct pf_pdesc *pd)
5694 {
5695         struct mbuf             *m0, *m1;
5696         struct route             iproute;
5697         struct route            *ro = NULL;
5698         struct sockaddr_in      *dst;
5699         struct ip               *ip;
5700         struct ifnet            *ifp = NULL;
5701         struct pf_addr           naddr;
5702         struct pf_src_node      *sn = NULL;
5703         int                      error = 0;
5704         int sw_csum;
5705 #ifdef IPSEC
5706         struct m_tag            *mtag;
5707 #endif /* IPSEC */
5708
5709         if (m == NULL || *m == NULL || r == NULL ||
5710             (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
5711                 panic("pf_route: invalid parameters");
5712
5713         if (((*m)->m_pkthdr.fw_flags & PF_MBUF_ROUTED) == 0) {
5714                 (*m)->m_pkthdr.fw_flags |= PF_MBUF_ROUTED;
5715                 (*m)->m_pkthdr.pf_routed = 1;
5716         } else {
5717                 if ((*m)->m_pkthdr.pf_routed > 3) {
5718                         m0 = *m;
5719                         *m = NULL;
5720                         goto bad;
5721                 }
5722                 (*m)->m_pkthdr.pf_routed++;
5723         }
5724
5725         if (r->rt == PF_DUPTO) {
5726                 if ((m0 = m_dup(*m, MB_DONTWAIT)) == NULL)
5727                         return;
5728         } else {
5729                 if ((r->rt == PF_REPLYTO) == (r->direction == dir))
5730                         return;
5731                 m0 = *m;
5732         }
5733
5734         if (m0->m_len < sizeof(struct ip)) {
5735                 DPFPRINTF(PF_DEBUG_URGENT,
5736                     ("pf_route: m0->m_len < sizeof(struct ip)\n"));
5737                 goto bad;
5738         }
5739
5740         ip = mtod(m0, struct ip *);
5741
5742         ro = &iproute;
5743         bzero((caddr_t)ro, sizeof(*ro));
5744         dst = satosin(&ro->ro_dst);
5745         dst->sin_family = AF_INET;
5746         dst->sin_len = sizeof(*dst);
5747         dst->sin_addr = ip->ip_dst;
5748
5749         if (r->rt == PF_FASTROUTE) {
5750                 rtalloc(ro);
5751                 if (ro->ro_rt == 0) {
5752                         ipstat.ips_noroute++;
5753                         goto bad;
5754                 }
5755
5756                 ifp = ro->ro_rt->rt_ifp;
5757                 ro->ro_rt->rt_use++;
5758
5759                 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
5760                         dst = satosin(ro->ro_rt->rt_gateway);
5761         } else {
5762                 if (TAILQ_EMPTY(&r->rpool.list)) {
5763                         DPFPRINTF(PF_DEBUG_URGENT,
5764                             ("pf_route: TAILQ_EMPTY(&r->rpool.list)\n"));
5765                         goto bad;
5766                 }
5767                 if (s == NULL) {
5768                         pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
5769                             &naddr, NULL, &sn);
5770                         if (!PF_AZERO(&naddr, AF_INET))
5771                                 dst->sin_addr.s_addr = naddr.v4.s_addr;
5772                         ifp = r->rpool.cur->kif ?
5773                             r->rpool.cur->kif->pfik_ifp : NULL;
5774                 } else {
5775                         if (!PF_AZERO(&s->rt_addr, AF_INET))
5776                                 dst->sin_addr.s_addr =
5777                                     s->rt_addr.v4.s_addr;
5778                         ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
5779                 }
5780         }
5781         if (ifp == NULL)
5782                 goto bad;
5783
5784         if (oifp != ifp) {
5785                 crit_exit();
5786                 if (pf_test(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
5787                         crit_enter();
5788                         goto bad;
5789                 } else if (m0 == NULL) {
5790                         crit_enter();
5791                         goto done;
5792                 }
5793                 crit_enter();
5794                 if (m0->m_len < sizeof(struct ip)) {
5795                         DPFPRINTF(PF_DEBUG_URGENT,
5796                             ("pf_route: m0->m_len < sizeof(struct ip)\n"));
5797                         goto bad;
5798                 }
5799                 ip = mtod(m0, struct ip *);
5800         }
5801
5802         /* Copied from FreeBSD 5.1-CURRENT ip_output. */
5803         m0->m_pkthdr.csum_flags |= CSUM_IP;
5804         sw_csum = m0->m_pkthdr.csum_flags & ~ifp->if_hwassist;
5805         if (sw_csum & CSUM_DELAY_DATA) {
5806                 /*
5807                  * XXX: in_delayed_cksum assumes HBO for ip->ip_len (at least)
5808                  */
5809                 NTOHS(ip->ip_len);
5810                 NTOHS(ip->ip_off);       /* XXX: needed? */
5811                 in_delayed_cksum(m0);
5812                 HTONS(ip->ip_len);
5813                 HTONS(ip->ip_off);
5814                 sw_csum &= ~CSUM_DELAY_DATA;
5815         }
5816         m0->m_pkthdr.csum_flags &= ifp->if_hwassist;
5817
5818         if (ntohs(ip->ip_len) <= ifp->if_mtu ||
5819             (ifp->if_hwassist & CSUM_FRAGMENT &&
5820                 ((ip->ip_off & htons(IP_DF)) == 0))) {
5821                 /*
5822                  * ip->ip_len = htons(ip->ip_len);
5823                  * ip->ip_off = htons(ip->ip_off);
5824                  */
5825                 ip->ip_sum = 0;
5826                 if (sw_csum & CSUM_DELAY_IP) {
5827                         /* From KAME */
5828                         if (ip->ip_v == IPVERSION &&
5829                             (ip->ip_hl << 2) == sizeof(*ip)) {
5830                                 ip->ip_sum = in_cksum_hdr(ip);
5831                         } else {
5832                                 ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
5833                         }
5834                 }
5835                 crit_exit();
5836                 error = (*ifp->if_output)(ifp, m0, sintosa(dst), ro->ro_rt);
5837                 crit_enter();
5838                 goto done;
5839         }
5840
5841         /*
5842          * Too large for interface; fragment if possible.
5843          * Must be able to put at least 8 bytes per fragment.
5844          */
5845         if (ip->ip_off & htons(IP_DF)) {
5846                 ipstat.ips_cantfrag++;
5847                 if (r->rt != PF_DUPTO) {
5848                         /* icmp_error() expects host byte ordering */
5849                         NTOHS(ip->ip_len);
5850                         NTOHS(ip->ip_off);
5851                         crit_exit();
5852                         icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
5853                             ifp->if_mtu);
5854                         crit_enter();
5855                         goto done;
5856                 } else
5857                         goto bad;
5858         }
5859
5860         m1 = m0;
5861         /*
5862          * XXX: is cheaper + less error prone than own function
5863          */
5864         NTOHS(ip->ip_len);
5865         NTOHS(ip->ip_off);
5866         error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist, sw_csum);
5867         if (error) {
5868                 goto bad;
5869         }
5870
5871         for (m0 = m1; m0; m0 = m1) {
5872                 m1 = m0->m_nextpkt;
5873                 m0->m_nextpkt = 0;
5874                 if (error == 0) {
5875                         crit_exit();
5876                         error = (*ifp->if_output)(ifp, m0, sintosa(dst),
5877                             NULL);
5878                         crit_enter();
5879                 } else
5880                         m_freem(m0);
5881         }
5882
5883         if (error == 0)
5884                 ipstat.ips_fragmented++;
5885
5886 done:
5887         if (r->rt != PF_DUPTO)
5888                 *m = NULL;
5889         if (ro == &iproute && ro->ro_rt)
5890                 RTFREE(ro->ro_rt);
5891         return;
5892
5893 bad:
5894         m_freem(m0);
5895         goto done;
5896 }
5897 #endif /* INET */
5898
5899 #ifdef INET6
5900 void
5901 pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
5902     struct pf_state *s, struct pf_pdesc *pd)
5903 {
5904         struct mbuf             *m0;
5905         struct route_in6         ip6route;
5906         struct route_in6        *ro;
5907         struct sockaddr_in6     *dst;
5908         struct ip6_hdr          *ip6;
5909         struct ifnet            *ifp = NULL;
5910         struct pf_addr           naddr;
5911         struct pf_src_node      *sn = NULL;
5912         int                      error = 0;
5913
5914         if (m == NULL || *m == NULL || r == NULL ||
5915             (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
5916                 panic("pf_route6: invalid parameters");
5917
5918         if (((*m)->m_pkthdr.fw_flags & PF_MBUF_ROUTED) == 0) {
5919                 (*m)->m_pkthdr.fw_flags |= PF_MBUF_ROUTED;
5920                 (*m)->m_pkthdr.pf_routed = 1;
5921         } else {
5922                 if ((*m)->m_pkthdr.pf_routed > 3) {
5923                         m0 = *m;
5924                         *m = NULL;
5925                         goto bad;
5926                 }
5927                 (*m)->m_pkthdr.pf_routed++;
5928         }
5929
5930         if (r->rt == PF_DUPTO) {
5931                 if ((m0 = m_dup(*m, MB_DONTWAIT)) == NULL)
5932                         return;
5933         } else {
5934                 if ((r->rt == PF_REPLYTO) == (r->direction == dir))
5935                         return;
5936                 m0 = *m;
5937         }
5938
5939         if (m0->m_len < sizeof(struct ip6_hdr)) {
5940                 DPFPRINTF(PF_DEBUG_URGENT,
5941                     ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n"));
5942                 goto bad;
5943         }
5944         ip6 = mtod(m0, struct ip6_hdr *);
5945
5946         ro = &ip6route;
5947         bzero((caddr_t)ro, sizeof(*ro));
5948         dst = (struct sockaddr_in6 *)&ro->ro_dst;
5949         dst->sin6_family = AF_INET6;
5950         dst->sin6_len = sizeof(*dst);
5951         dst->sin6_addr = ip6->ip6_dst;
5952
5953         /* Cheat. XXX why only in the v6 case??? */
5954         if (r->rt == PF_FASTROUTE) {
5955                 pd->pf_mtag->flags |= PF_TAG_GENERATED;
5956                 crit_exit();
5957                 ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
5958                 crit_enter();
5959                 return;
5960         }
5961
5962         if (TAILQ_EMPTY(&r->rpool.list)) {
5963                 DPFPRINTF(PF_DEBUG_URGENT,
5964                     ("pf_route6: TAILQ_EMPTY(&r->rpool.list)\n"));
5965                 goto bad;
5966         }
5967         if (s == NULL) {
5968                 pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src,
5969                     &naddr, NULL, &sn);
5970                 if (!PF_AZERO(&naddr, AF_INET6))
5971                         PF_ACPY((struct pf_addr *)&dst->sin6_addr,
5972                             &naddr, AF_INET6);
5973                 ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
5974         } else {
5975                 if (!PF_AZERO(&s->rt_addr, AF_INET6))
5976                         PF_ACPY((struct pf_addr *)&dst->sin6_addr,
5977                             &s->rt_addr, AF_INET6);
5978                 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
5979         }
5980         if (ifp == NULL)
5981                 goto bad;
5982
5983         if (oifp != ifp) {
5984                 crit_exit();
5985                 if (pf_test6(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
5986                         crit_enter();
5987                         goto bad;
5988                 } else if (m0 == NULL) {
5989                         crit_enter();
5990                         goto done;
5991                 }
5992                 crit_enter();
5993                 if (m0->m_len < sizeof(struct ip6_hdr)) {
5994                         DPFPRINTF(PF_DEBUG_URGENT,
5995                             ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n"));
5996                         goto bad;
5997                 }
5998                 ip6 = mtod(m0, struct ip6_hdr *);
5999         }
6000
6001         /*
6002          * If the packet is too large for the outgoing interface,
6003          * send back an icmp6 error.
6004          */
6005         if (IN6_IS_ADDR_LINKLOCAL(&dst->sin6_addr))
6006                 dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
6007         if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) {
6008                 crit_exit();
6009                 error = nd6_output(ifp, ifp, m0, dst, NULL);
6010                 crit_enter();
6011         } else {
6012                 in6_ifstat_inc(ifp, ifs6_in_toobig);
6013                 if (r->rt != PF_DUPTO) {
6014                         crit_exit();
6015                         icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
6016                         crit_enter();
6017                  } else
6018                         goto bad;
6019         }
6020
6021 done:
6022         if (r->rt != PF_DUPTO)
6023                 *m = NULL;
6024         return;
6025
6026 bad:
6027         m_freem(m0);
6028         goto done;
6029 }
6030
6031 #endif /* INET6 */
6032
6033
6034 /*
6035  * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag
6036  *   off is the offset where the protocol header starts
6037  *   len is the total length of protocol header plus payload
6038  * returns 0 when the checksum is valid, otherwise returns 1.
6039  */
6040 /*
6041  * XXX
6042  * FreeBSD supports cksum offload for the following drivers.
6043  * em(4), gx(4), lge(4), nge(4), ti(4), xl(4)
6044  * If we can make full use of it we would outperform ipfw/ipfilter in
6045  * very heavy traffic. 
6046  * I have not tested 'cause I don't have NICs that supports cksum offload.
6047  * (There might be problems. Typical phenomena would be
6048  *   1. No route message for UDP packet.
6049  *   2. No connection acceptance from external hosts regardless of rule set.)
6050  */
6051 int
6052 pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p,
6053     sa_family_t af)
6054 {
6055         u_int16_t sum = 0;
6056         int hw_assist = 0;
6057         struct ip *ip;
6058
6059         if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
6060                 return (1);
6061         if (m->m_pkthdr.len < off + len)
6062                 return (1);
6063
6064         switch (p) {
6065         case IPPROTO_TCP:
6066         case IPPROTO_UDP:
6067                 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
6068                         if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
6069                                 sum = m->m_pkthdr.csum_data;
6070                         } else {
6071                                 ip = mtod(m, struct ip *);      
6072                                 sum = in_pseudo(ip->ip_src.s_addr,
6073                                         ip->ip_dst.s_addr, htonl((u_short)len +
6074                                         m->m_pkthdr.csum_data + p));
6075                         }
6076                         sum ^= 0xffff;
6077                         ++hw_assist;
6078                 }
6079                 break;
6080         case IPPROTO_ICMP:
6081 #ifdef INET6
6082         case IPPROTO_ICMPV6:
6083 #endif /* INET6 */
6084                 break;
6085         default:
6086                 return (1);
6087         }
6088
6089         if (!hw_assist) {
6090                 switch (af) {
6091                 case AF_INET:
6092                         if (p == IPPROTO_ICMP) {
6093                                 if (m->m_len < off)
6094                                         return (1);
6095                                 m->m_data += off;
6096                                 m->m_len -= off;
6097                                 sum = in_cksum(m, len);
6098                                 m->m_data -= off;
6099                                 m->m_len += off;
6100                         } else {
6101                                 if (m->m_len < sizeof(struct ip))
6102                                         return (1);
6103                                 sum = in_cksum_range(m, p, off, len);
6104                                 if (sum == 0) {
6105                                         m->m_pkthdr.csum_flags |=
6106                                             (CSUM_DATA_VALID |
6107                                              CSUM_PSEUDO_HDR);
6108                                         m->m_pkthdr.csum_data = 0xffff;
6109                                 }
6110                         }
6111                         break;
6112 #ifdef INET6
6113                 case AF_INET6:
6114                         if (m->m_len < sizeof(struct ip6_hdr))
6115                                 return (1);
6116                         sum = in6_cksum(m, p, off, len);
6117                         /*
6118                          * XXX
6119                          * IPv6 H/W cksum off-load not supported yet!
6120                          *
6121                          * if (sum == 0) {
6122                          *      m->m_pkthdr.csum_flags |=
6123                          *          (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
6124                          *      m->m_pkthdr.csum_data = 0xffff;
6125                          *}
6126                          */
6127                         break;
6128 #endif /* INET6 */
6129                 default:
6130                         return (1);
6131                 }
6132         }
6133         if (sum) {
6134                 switch (p) {
6135                 case IPPROTO_TCP:
6136                         tcpstat.tcps_rcvbadsum++;
6137                         break;
6138                 case IPPROTO_UDP:
6139                         udpstat.udps_badsum++;
6140                         break;
6141                 case IPPROTO_ICMP:
6142                         icmpstat.icps_checksum++;
6143                         break;
6144 #ifdef INET6
6145                 case IPPROTO_ICMPV6:
6146                         icmp6stat.icp6s_checksum++;
6147                         break;
6148 #endif /* INET6 */
6149                 }
6150                 return (1);
6151         }
6152         return (0);
6153 }
6154
6155 #ifdef INET
6156 int
6157 pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
6158     struct ether_header *eh, struct inpcb *inp)
6159 {
6160         struct pfi_kif          *kif;
6161         u_short                  action, reason = 0, log = 0;
6162         struct mbuf             *m = *m0;
6163         struct ip               *h = NULL;
6164         struct pf_rule          *a = NULL, *r = &pf_default_rule, *tr, *nr;
6165         struct pf_state         *s = NULL;
6166         struct pf_ruleset       *ruleset = NULL;
6167         struct pf_pdesc          pd;
6168         int                      off, dirndx, pqid = 0;
6169         
6170         if (!pf_status.running)
6171                 return (PF_PASS);
6172
6173         memset(&pd, 0, sizeof(pd));
6174         if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) {
6175                 DPFPRINTF(PF_DEBUG_URGENT,
6176                     ("pf_test: pf_get_mtag returned NULL\n"));
6177                 return (PF_DROP);
6178         }
6179         if (pd.pf_mtag->flags & PF_TAG_GENERATED)
6180                 return (PF_PASS);
6181         kif = (struct pfi_kif *)ifp->if_pf_kif;
6182         if (kif == NULL) {
6183                 DPFPRINTF(PF_DEBUG_URGENT,
6184                     ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname));
6185                 return (PF_DROP);
6186         }
6187         if (kif->pfik_flags & PFI_IFLAG_SKIP)
6188                 return (PF_PASS);
6189
6190 #ifdef DIAGNOSTIC
6191         if ((m->m_flags & M_PKTHDR) == 0)
6192                 panic("non-M_PKTHDR is passed to pf_test");
6193 #endif /* DIAGNOSTIC */
6194
6195         if (m->m_pkthdr.len < (int)sizeof(*h)) {
6196                 action = PF_DROP;
6197                 REASON_SET(&reason, PFRES_SHORT);
6198                 log = 1;
6199                 panic("debug");
6200                 goto done;
6201         }
6202
6203         /* We do IP header normalization and packet reassembly here */
6204         if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) {
6205                 action = PF_DROP;
6206                 goto done;
6207         }
6208         m = *m0;
6209         h = mtod(m, struct ip *);
6210
6211         off = h->ip_hl << 2;
6212         if (off < (int)sizeof(*h)) {
6213                 action = PF_DROP;
6214                 REASON_SET(&reason, PFRES_SHORT);
6215                 log = 1;
6216                 panic("debug");
6217                 goto done;
6218         }
6219
6220         pd.src = (struct pf_addr *)&h->ip_src;
6221         pd.dst = (struct pf_addr *)&h->ip_dst;
6222         PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET);
6223         pd.ip_sum = &h->ip_sum;
6224         pd.proto = h->ip_p;
6225         pd.af = AF_INET;
6226         pd.tos = h->ip_tos;
6227         pd.tot_len = h->ip_len;
6228         pd.eh = eh;
6229
6230         /* handle fragments that didn't get reassembled by normalization */
6231         if (h->ip_off & (IP_MF | IP_OFFMASK)) {
6232                 action = pf_test_fragment(&r, dir, kif, m, h,
6233                     &pd, &a, &ruleset);
6234                 goto done;
6235         }
6236         switch (h->ip_p) {
6237
6238         case IPPROTO_TCP: {
6239                 struct tcphdr   th;
6240
6241                 pd.hdr.tcp = &th;
6242                 if (!pf_pull_hdr(m, off, &th, sizeof(th),
6243                     &action, &reason, AF_INET)) {
6244                         log = action != PF_PASS;
6245                         goto done;
6246                 }
6247                 if (dir == PF_IN && pf_check_proto_cksum(m, off,
6248                     h->ip_len - off, IPPROTO_TCP, AF_INET)) {
6249                         REASON_SET(&reason, PFRES_PROTCKSUM);
6250                         action = PF_DROP;
6251                         goto done;
6252                 }
6253                 pd.p_len = pd.tot_len - off - (th.th_off << 2);
6254                 if ((th.th_flags & TH_ACK) && pd.p_len == 0)
6255                         pqid = 1;
6256                 action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
6257                 if (action == PF_DROP)
6258                         goto done;
6259                 action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
6260                     &reason);
6261                 if (action == PF_PASS) {
6262 #if NPFSYNC
6263                         pfsync_update_state(s);
6264 #endif /* NPFSYNC */
6265                         r = s->rule.ptr;
6266                         a = s->anchor.ptr;
6267                         log = s->log;
6268                 } else if (s == NULL) {
6269                         action = pf_test_tcp(&r, &s, dir, kif,
6270                             m, off, h, &pd, &a, &ruleset, NULL, inp);
6271                         }
6272                 break;
6273         }
6274
6275         case IPPROTO_UDP: {
6276                 struct udphdr   uh;
6277
6278                 pd.hdr.udp = &uh;
6279                 if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
6280                     &action, &reason, AF_INET)) {
6281                         log = action != PF_PASS;
6282                         goto done;
6283                 }
6284                 if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m,
6285                     off, h->ip_len - off, IPPROTO_UDP, AF_INET)) {
6286                         action = PF_DROP;
6287                         REASON_SET(&reason, PFRES_PROTCKSUM);
6288                         goto done;
6289                 }
6290                 if (uh.uh_dport == 0 ||
6291                     ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
6292                     ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
6293                         action = PF_DROP;
6294                         REASON_SET(&reason, PFRES_SHORT);
6295                         goto done;
6296                 }
6297                 action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
6298                 if (action == PF_PASS) {
6299 #if NPFSYNC
6300                         pfsync_update_state(s);
6301 #endif /* NPFSYNC */
6302                         r = s->rule.ptr;
6303                         a = s->anchor.ptr;
6304                         log = s->log;
6305                 } else if (s == NULL)
6306                         action = pf_test_udp(&r, &s, dir, kif,
6307                             m, off, h, &pd, &a, &ruleset, NULL, inp);
6308                 break;
6309         }
6310
6311         case IPPROTO_ICMP: {
6312                 struct icmp     ih;
6313
6314                 pd.hdr.icmp = &ih;
6315                 if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN,
6316                     &action, &reason, AF_INET)) {
6317                         log = action != PF_PASS;
6318                         goto done;
6319                 }
6320                 if (dir == PF_IN && pf_check_proto_cksum(m, off,
6321                     h->ip_len - off, IPPROTO_ICMP, AF_INET)) {
6322                         action = PF_DROP;
6323                         REASON_SET(&reason, PFRES_PROTCKSUM);
6324                         goto done;
6325                 }
6326                 action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd,
6327                     &reason);
6328                 if (action == PF_PASS) {
6329 #if NPFSYNC
6330                         pfsync_update_state(s);
6331 #endif /* NPFSYNC */
6332                         r = s->rule.ptr;
6333                         a = s->anchor.ptr;
6334                         log = s->log;
6335                 } else if (s == NULL)
6336                         action = pf_test_icmp(&r, &s, dir, kif,
6337                             m, off, h, &pd, &a, &ruleset, NULL);
6338                 break;
6339         }
6340
6341         default:
6342                 action = pf_test_state_other(&s, dir, kif, &pd);
6343                 if (action == PF_PASS) {
6344 #if NPFSYNC
6345                         pfsync_update_state(s);
6346 #endif /* NPFSYNC */
6347                         r = s->rule.ptr;
6348                         a = s->anchor.ptr;
6349                         log = s->log;
6350                 } else if (s == NULL)
6351                         action = pf_test_other(&r, &s, dir, kif, m, off, h,
6352                             &pd, &a, &ruleset, NULL);
6353                 break;
6354         }
6355
6356 done:
6357         if (action == PF_PASS && h->ip_hl > 5 &&
6358             !((s && s->allow_opts) || r->allow_opts)) {
6359                 action = PF_DROP;
6360                 REASON_SET(&reason, PFRES_IPOPTIONS);
6361                 log = 1;
6362                 DPFPRINTF(PF_DEBUG_MISC,
6363                     ("pf: dropping packet with ip options\n"));
6364         }
6365
6366         if ((s && s->tag) || r->rtableid)
6367                 pf_tag_packet(m, pd.pf_mtag, s ? s->tag : 0, r->rtableid);
6368
6369 #ifdef ALTQ
6370         if (action == PF_PASS && r->qid) {
6371                 if (pqid || (pd.tos & IPTOS_LOWDELAY))
6372                         pd.pf_mtag->qid = r->pqid;
6373                 else
6374                         pd.pf_mtag->qid = r->qid;
6375                 /* add hints for ecn */
6376                 pd.pf_mtag->af = AF_INET;
6377                 pd.pf_mtag->hdr = h;
6378                 /* add connection hash for fairq */
6379                 if (s) {
6380                         KKASSERT(s->hash != 0);
6381                         pd.pf_mtag->flags |= PF_TAG_STATE_HASHED;
6382                         pd.pf_mtag->state_hash = s->hash;
6383                 }
6384         }
6385 #endif /* ALTQ */
6386
6387         /*
6388          * connections redirected to loopback should not match sockets
6389          * bound specifically to loopback due to security implications,
6390          * see tcp_input() and in_pcblookup_listen().
6391          */
6392         if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
6393             pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
6394             (s->nat_rule.ptr->action == PF_RDR ||
6395             s->nat_rule.ptr->action == PF_BINAT) &&
6396             (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
6397                 pd.pf_mtag->flags |= PF_TAG_TRANSLATE_LOCALHOST;
6398
6399         if (log) {
6400                 struct pf_rule *lr;
6401
6402                 if (s != NULL && s->nat_rule.ptr != NULL &&
6403                     s->nat_rule.ptr->log & PF_LOG_ALL)
6404                         lr = s->nat_rule.ptr;
6405                 else
6406                         lr = r;
6407                 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, lr, a, ruleset,
6408                     &pd);
6409         }
6410
6411         kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
6412         kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
6413
6414         if (action == PF_PASS || r->action == PF_DROP) {
6415                 dirndx = (dir == PF_OUT);
6416                 r->packets[dirndx]++;
6417                 r->bytes[dirndx] += pd.tot_len;
6418                 if (a != NULL) {
6419                         a->packets[dirndx]++;
6420                         a->bytes[dirndx] += pd.tot_len;
6421                 }
6422                 if (s != NULL) {
6423                         if (s->nat_rule.ptr != NULL) {
6424                                 s->nat_rule.ptr->packets[dirndx]++;
6425                                 s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
6426                         }
6427                         if (s->src_node != NULL) {
6428                                 s->src_node->packets[dirndx]++;
6429                                 s->src_node->bytes[dirndx] += pd.tot_len;
6430                         }
6431                         if (s->nat_src_node != NULL) {
6432                                 s->nat_src_node->packets[dirndx]++;
6433                                 s->nat_src_node->bytes[dirndx] += pd.tot_len;
6434                         }
6435                         dirndx = (dir == s->direction) ? 0 : 1;
6436                         s->packets[dirndx]++;
6437                         s->bytes[dirndx] += pd.tot_len;
6438                 }
6439                 tr = r;
6440                 nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
6441                 if (nr != NULL) {
6442                         struct pf_addr *x;
6443                         /*
6444                          * XXX: we need to make sure that the addresses
6445                          * passed to pfr_update_stats() are the same than
6446                          * the addresses used during matching (pfr_match)
6447                          */
6448                         if (r == &pf_default_rule) {
6449                                 tr = nr;
6450                                 x = (s == NULL || s->direction == dir) ?
6451                                     &pd.baddr : &pd.naddr;
6452                         } else
6453                                 x = (s == NULL || s->direction == dir) ?
6454                                     &pd.naddr : &pd.baddr;
6455                         if (x == &pd.baddr || s == NULL) {
6456                                 /* we need to change the address */
6457                                 if (dir == PF_OUT)
6458                                         pd.src = x;
6459                                 else
6460                                         pd.dst = x;
6461                         }
6462                 }
6463                 if (tr->src.addr.type == PF_ADDR_TABLE)
6464                         pfr_update_stats(tr->src.addr.p.tbl, (s == NULL ||
6465                             s->direction == dir) ? pd.src : pd.dst, pd.af,
6466                             pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6467                             tr->src.neg);
6468                 if (tr->dst.addr.type == PF_ADDR_TABLE)
6469                         pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL ||
6470                             s->direction == dir) ? pd.dst : pd.src, pd.af,
6471                             pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6472                             tr->dst.neg);
6473         }
6474
6475
6476         if (action == PF_SYNPROXY_DROP) {
6477                 m_freem(*m0);
6478                 *m0 = NULL;
6479                 action = PF_PASS;
6480         } else if (r->rt)
6481                 /* pf_route can free the mbuf causing *m0 to become NULL */
6482                 pf_route(m0, r, dir, ifp, s, &pd);
6483         return (action);
6484 }
6485 #endif /* INET */
6486
6487 #ifdef INET6
6488 int
6489 pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
6490     struct ether_header *eh, struct inpcb *inp)
6491 {
6492         struct pfi_kif          *kif;
6493         u_short                  action, reason = 0, log = 0;
6494         struct mbuf             *m = *m0, *n = NULL;
6495         struct ip6_hdr          *h = NULL;
6496         struct pf_rule          *a = NULL, *r = &pf_default_rule, *tr, *nr;
6497         struct pf_state         *s = NULL;
6498         struct pf_ruleset       *ruleset = NULL;
6499         struct pf_pdesc          pd;
6500         int                      off, terminal = 0, dirndx;
6501
6502         if (!pf_status.running)
6503                 return (PF_PASS);
6504
6505         memset(&pd, 0, sizeof(pd));
6506         if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) {
6507                 DPFPRINTF(PF_DEBUG_URGENT,
6508                     ("pf_test6: pf_get_mtag returned NULL\n"));
6509                 return (PF_DROP);
6510         }
6511         if (pd.pf_mtag->flags & PF_TAG_GENERATED)
6512                 return (PF_PASS);
6513
6514         kif = (struct pfi_kif *)ifp->if_pf_kif;
6515         if (kif == NULL) {
6516                 DPFPRINTF(PF_DEBUG_URGENT,
6517                     ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname));
6518                 return (PF_DROP);
6519         }
6520         if (kif->pfik_flags & PFI_IFLAG_SKIP)
6521                 return (PF_PASS);
6522
6523 #ifdef DIAGNOSTIC
6524         if ((m->m_flags & M_PKTHDR) == 0)
6525                 panic("non-M_PKTHDR is passed to pf_test6");
6526 #endif /* DIAGNOSTIC */
6527
6528         if (m->m_pkthdr.len < (int)sizeof(*h)) {
6529                 action = PF_DROP;
6530                 REASON_SET(&reason, PFRES_SHORT);
6531                 log = 1;
6532                 goto done;
6533         }
6534
6535         /* We do IP header normalization and packet reassembly here */
6536         if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) {
6537                 action = PF_DROP;
6538                 goto done;
6539         }
6540         m = *m0;
6541         h = mtod(m, struct ip6_hdr *);
6542
6543 #if 1
6544         /*
6545          * we do not support jumbogram yet.  if we keep going, zero ip6_plen
6546          * will do something bad, so drop the packet for now.
6547          */
6548         if (htons(h->ip6_plen) == 0) {
6549                 action = PF_DROP;
6550                 REASON_SET(&reason, PFRES_NORM);        /*XXX*/
6551                 goto done;
6552         }
6553 #endif
6554
6555         pd.src = (struct pf_addr *)&h->ip6_src;
6556         pd.dst = (struct pf_addr *)&h->ip6_dst;
6557         PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET6);
6558         pd.ip_sum = NULL;
6559         pd.af = AF_INET6;
6560         pd.tos = 0;
6561         pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
6562         pd.eh = eh;
6563
6564         off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr);
6565         pd.proto = h->ip6_nxt;
6566         do {
6567                 switch (pd.proto) {
6568                 case IPPROTO_FRAGMENT:
6569                         action = pf_test_fragment(&r, dir, kif, m, h,
6570                             &pd, &a, &ruleset);
6571                         if (action == PF_DROP)
6572                                 REASON_SET(&reason, PFRES_FRAG);
6573                         goto done;
6574                 case IPPROTO_ROUTING: {
6575                         struct ip6_rthdr rthdr;
6576
6577                         if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL,
6578                             &reason, pd.af)) {
6579                                 DPFPRINTF(PF_DEBUG_MISC,
6580                                     ("pf: IPv6 short rthdr\n"));
6581                                 action = PF_DROP;
6582                                 log = 1;
6583                                 goto done;
6584                         }
6585                         if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) {
6586                                 DPFPRINTF(PF_DEBUG_MISC,
6587                                     ("pf: IPv6 rthdr0\n"));
6588                                 action = PF_DROP;
6589                                 REASON_SET(&reason, PFRES_IPOPTIONS);
6590                                 log = 1;
6591                                 goto done;
6592                         }
6593                         /* FALLTHROUGH */
6594                 }
6595                 case IPPROTO_AH:
6596                 case IPPROTO_HOPOPTS:
6597                 case IPPROTO_DSTOPTS: {
6598                         /* get next header and header length */
6599                         struct ip6_ext  opt6;
6600
6601                         if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6),
6602                             NULL, &reason, pd.af)) {
6603                                 DPFPRINTF(PF_DEBUG_MISC,
6604                                     ("pf: IPv6 short opt\n"));
6605                                 action = PF_DROP;
6606                                 log = 1;
6607                                 goto done;
6608                         }
6609                         if (pd.proto == IPPROTO_AH)
6610                                 off += (opt6.ip6e_len + 2) * 4;
6611                         else
6612                                 off += (opt6.ip6e_len + 1) * 8;
6613                         pd.proto = opt6.ip6e_nxt;
6614                         /* goto the next header */
6615                         break;
6616                 }
6617                 default:
6618                         terminal++;
6619                         break;
6620                 }
6621         } while (!terminal);
6622
6623         /* if there's no routing header, use unmodified mbuf for checksumming */
6624         if (!n)
6625                 n = m;
6626
6627         switch (pd.proto) {
6628
6629         case IPPROTO_TCP: {
6630                 struct tcphdr   th;
6631
6632                 pd.hdr.tcp = &th;
6633                 if (!pf_pull_hdr(m, off, &th, sizeof(th),
6634                     &action, &reason, AF_INET6)) {
6635                         log = action != PF_PASS;
6636                         goto done;
6637                 }
6638                 if (dir == PF_IN && pf_check_proto_cksum(n, off,
6639                     ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)),
6640                     IPPROTO_TCP, AF_INET6)) {
6641                         action = PF_DROP;
6642                         REASON_SET(&reason, PFRES_PROTCKSUM);
6643                         goto done;
6644                 }
6645                 pd.p_len = pd.tot_len - off - (th.th_off << 2);
6646                 action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
6647                 if (action == PF_DROP)
6648                         goto done;
6649                 action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
6650                     &reason);
6651                 if (action == PF_PASS) {
6652 #if NPFSYNC
6653                         pfsync_update_state(s);
6654 #endif /* NPFSYNC */
6655                         r = s->rule.ptr;
6656                         a = s->anchor.ptr;
6657                         log = s->log;
6658                 } else if (s == NULL)
6659                         action = pf_test_tcp(&r, &s, dir, kif,
6660                             m, off, h, &pd, &a, &ruleset, NULL, inp);
6661                 break;
6662         }
6663
6664         case IPPROTO_UDP: {
6665                 struct udphdr   uh;
6666
6667                 pd.hdr.udp = &uh;
6668                 if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
6669                     &action, &reason, AF_INET6)) {
6670                         log = action != PF_PASS;
6671                         goto done;
6672                 }
6673                 if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(n,
6674                     off, ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)),
6675                     IPPROTO_UDP, AF_INET6)) {
6676                         action = PF_DROP;
6677                         REASON_SET(&reason, PFRES_PROTCKSUM);
6678                         goto done;
6679                 }
6680                 if (uh.uh_dport == 0 ||
6681                     ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
6682                     ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
6683                         action = PF_DROP;
6684                         REASON_SET(&reason, PFRES_SHORT);
6685                         goto done;
6686                 }
6687                 action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
6688                 if (action == PF_PASS) {
6689 #if NPFSYNC
6690                         pfsync_update_state(s);
6691 #endif /* NPFSYNC */
6692                         r = s->rule.ptr;
6693                         a = s->anchor.ptr;
6694                         log = s->log;
6695                 } else if (s == NULL)
6696                         action = pf_test_udp(&r, &s, dir, kif,
6697                             m, off, h, &pd, &a, &ruleset, NULL, inp);
6698                 break;
6699         }
6700
6701         case IPPROTO_ICMPV6: {
6702                 struct icmp6_hdr        ih;
6703
6704                 pd.hdr.icmp6 = &ih;
6705                 if (!pf_pull_hdr(m, off, &ih, sizeof(ih),
6706                     &action, &reason, AF_INET6)) {
6707                         log = action != PF_PASS;
6708                         goto done;
6709                 }
6710                 if (dir == PF_IN && pf_check_proto_cksum(n, off,
6711                     ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)),
6712                     IPPROTO_ICMPV6, AF_INET6)) {
6713                         action = PF_DROP;
6714                         REASON_SET(&reason, PFRES_PROTCKSUM);
6715                         goto done;
6716                 }
6717                 action = pf_test_state_icmp(&s, dir, kif,
6718                     m, off, h, &pd, &reason);
6719                 if (action == PF_PASS) {
6720 #if NPFSYNC
6721                         pfsync_update_state(s);
6722 #endif /* NPFSYNC */
6723                         r = s->rule.ptr;
6724                         a = s->anchor.ptr;
6725                         log = s->log;
6726                 } else if (s == NULL)
6727                         action = pf_test_icmp(&r, &s, dir, kif,
6728                             m, off, h, &pd, &a, &ruleset, NULL);
6729                 break;
6730         }
6731
6732         default:
6733                 action = pf_test_state_other(&s, dir, kif, &pd);
6734                 if (action == PF_PASS) {
6735 #if NPFSYNC
6736                         pfsync_update_state(s);
6737 #endif /* NPFSYNC */
6738                         r = s->rule.ptr;
6739                         a = s->anchor.ptr;
6740                         log = s->log;
6741                 } else if (s == NULL)
6742                         action = pf_test_other(&r, &s, dir, kif, m, off, h,
6743                             &pd, &a, &ruleset, NULL);
6744                 break;
6745         }
6746
6747 done:
6748         if (n != m) {
6749                 m_freem(n);
6750                 n = NULL;
6751         }
6752
6753         /* XXX handle IPv6 options, if not allowed.  not implemented. */
6754
6755         if ((s && s->tag) || r->rtableid)
6756                 pf_tag_packet(m, pd.pf_mtag, s ? s->tag : 0, r->rtableid);
6757
6758 #ifdef ALTQ
6759         if (action == PF_PASS && r->qid) {
6760                 if (pd.tos & IPTOS_LOWDELAY)
6761                         pd.pf_mtag->qid = r->pqid;
6762                 else
6763                         pd.pf_mtag->qid = r->qid;
6764                 /* add hints for ecn */
6765                 pd.pf_mtag->af = AF_INET6;
6766                 pd.pf_mtag->hdr = h;
6767                 /* add connection hash for fairq */
6768                 if (s) {
6769                         KKASSERT(s->hash != 0);
6770                         pd.pf_mtag->flags |= PF_TAG_STATE_HASHED;
6771                         pd.pf_mtag->state_hash = s->hash;
6772                 }
6773         }
6774 #endif /* ALTQ */
6775
6776         if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
6777             pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
6778             (s->nat_rule.ptr->action == PF_RDR ||
6779             s->nat_rule.ptr->action == PF_BINAT) &&
6780             IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))
6781                 pd.pf_mtag->flags |= PF_TAG_TRANSLATE_LOCALHOST;
6782
6783         if (log) {
6784                 struct pf_rule *lr;
6785
6786                 if (s != NULL && s->nat_rule.ptr != NULL &&
6787                     s->nat_rule.ptr->log & PF_LOG_ALL)
6788                         lr = s->nat_rule.ptr;
6789                 else
6790                         lr = r;
6791                 PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, lr, a, ruleset,
6792                     &pd);
6793         }
6794
6795         kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
6796         kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
6797
6798         if (action == PF_PASS || r->action == PF_DROP) {
6799                 dirndx = (dir == PF_OUT);
6800                 r->packets[dirndx]++;
6801                 r->bytes[dirndx] += pd.tot_len;
6802                 if (a != NULL) {
6803                         a->packets[dirndx]++;
6804                         a->bytes[dirndx] += pd.tot_len;
6805                 }
6806                 if (s != NULL) {
6807                         if (s->nat_rule.ptr != NULL) {
6808                                 s->nat_rule.ptr->packets[dirndx]++;
6809                                 s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
6810                         }
6811                         if (s->src_node != NULL) {
6812                                 s->src_node->packets[dirndx]++;
6813                                 s->src_node->bytes[dirndx] += pd.tot_len;
6814                         }
6815                         if (s->nat_src_node != NULL) {
6816                                 s->nat_src_node->packets[dirndx]++;
6817                                 s->nat_src_node->bytes[dirndx] += pd.tot_len;
6818                         }
6819                         dirndx = (dir == s->direction) ? 0 : 1;
6820                         s->packets[dirndx]++;
6821                         s->bytes[dirndx] += pd.tot_len;
6822                 }
6823                 tr = r;
6824                 nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
6825                 if (nr != NULL) {
6826                         struct pf_addr *x;
6827                         /*
6828                          * XXX: we need to make sure that the addresses
6829                          * passed to pfr_update_stats() are the same than
6830                          * the addresses used during matching (pfr_match)
6831                          */
6832                         if (r == &pf_default_rule) {
6833                                 tr = nr;
6834                                 x = (s == NULL || s->direction == dir) ?
6835                                     &pd.baddr : &pd.naddr;
6836                         } else {
6837                                 x = (s == NULL || s->direction == dir) ?
6838                                     &pd.naddr : &pd.baddr;
6839                         }
6840                         if (x == &pd.baddr || s == NULL) {
6841                                 if (dir == PF_OUT)
6842                                         pd.src = x;
6843                                 else
6844                                         pd.dst = x;
6845                         }
6846                 }
6847                 if (tr->src.addr.type == PF_ADDR_TABLE)
6848                         pfr_update_stats(tr->src.addr.p.tbl, (s == NULL ||
6849                             s->direction == dir) ? pd.src : pd.dst, pd.af,
6850                             pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6851                             tr->src.neg);
6852                 if (tr->dst.addr.type == PF_ADDR_TABLE)
6853                         pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL ||
6854                             s->direction == dir) ? pd.dst : pd.src, pd.af,
6855                             pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6856                             tr->dst.neg);
6857         }
6858
6859
6860         if (action == PF_SYNPROXY_DROP) {
6861                 m_freem(*m0);
6862                 *m0 = NULL;
6863                 action = PF_PASS;
6864         } else if (r->rt)
6865                 /* pf_route6 can free the mbuf causing *m0 to become NULL */
6866                 pf_route6(m0, r, dir, ifp, s, &pd);
6867
6868         return (action);
6869 }
6870 #endif /* INET6 */
6871
6872 int
6873 pf_check_congestion(struct ifqueue *ifq)
6874 {
6875                 return (0);
6876 }