Merge branch 'vendor/TNFTP'
[dragonfly.git] / sys / net / pf / pf.c
1 /*      $OpenBSD: pf.c,v 1.552.2.1 2007/11/27 16:37:57 henning Exp $ */
2 /* add $OpenBSD: pf.c,v 1.553 2007/08/23 11:15:49 dhartmei Exp $ */
3 /* add $OpenBSD: pf.c,v 1.554 2007/08/28 16:09:12 henning Exp $ */
4
5
6 /*
7  * Copyright (c) 2004 The DragonFly Project.  All rights reserved.
8  *
9  * Copyright (c) 2001 Daniel Hartmeier
10  * Copyright (c) 2002,2003 Henning Brauer
11  * All rights reserved.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  *
17  *    - Redistributions of source code must retain the above copyright
18  *      notice, this list of conditions and the following disclaimer.
19  *    - Redistributions in binary form must reproduce the above
20  *      copyright notice, this list of conditions and the following
21  *      disclaimer in the documentation and/or other materials provided
22  *      with the distribution.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
28  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
29  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
30  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
34  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  *
37  * Effort sponsored in part by the Defense Advanced Research Projects
38  * Agency (DARPA) and Air Force Research Laboratory, Air Force
39  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
40  *
41  */
42
43 #include "opt_inet.h"
44 #include "opt_inet6.h"
45 #include "use_pfsync.h"
46
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/filio.h>
52 #include <sys/socket.h>
53 #include <sys/socketvar.h>
54 #include <sys/kernel.h>
55 #include <sys/time.h>
56 #include <sys/sysctl.h>
57 #include <sys/endian.h>
58 #include <vm/vm_zone.h>
59 #include <sys/proc.h>
60 #include <sys/kthread.h>
61
62 #include <machine/inttypes.h>
63
64 #include <net/if.h>
65 #include <net/if_types.h>
66 #include <net/bpf.h>
67 #include <net/netisr.h>
68 #include <net/route.h>
69
70 #include <netinet/in.h>
71 #include <netinet/in_var.h>
72 #include <netinet/in_systm.h>
73 #include <netinet/ip.h>
74 #include <netinet/ip_var.h>
75 #include <netinet/tcp.h>
76 #include <netinet/tcp_seq.h>
77 #include <netinet/udp.h>
78 #include <netinet/ip_icmp.h>
79 #include <netinet/in_pcb.h>
80 #include <netinet/tcp_timer.h>
81 #include <netinet/tcp_var.h>
82 #include <netinet/udp_var.h>
83 #include <netinet/icmp_var.h>
84 #include <netinet/if_ether.h>
85
86 #include <net/pf/pfvar.h>
87 #include <net/pf/if_pflog.h>
88
89 #if NPFSYNC > 0
90 #include <net/pf/if_pfsync.h>
91 #endif /* NPFSYNC > 0 */
92
93 #ifdef INET6
94 #include <netinet/ip6.h>
95 #include <netinet/in_pcb.h>
96 #include <netinet/icmp6.h>
97 #include <netinet6/nd6.h>
98 #include <netinet6/ip6_var.h>
99 #include <netinet6/in6_pcb.h>
100 #endif /* INET6 */
101
102 #include <sys/in_cksum.h>
103 #include <sys/ucred.h>
104 #include <machine/limits.h>
105 #include <sys/msgport2.h>
106 #include <net/netmsg2.h>
107
108 extern int ip_optcopy(struct ip *, struct ip *);
109 extern int debug_pfugidhack;
110
111 struct lwkt_token pf_token = LWKT_TOKEN_MP_INITIALIZER(pf_token);
112
113 #define DPFPRINTF(n, x) if (pf_status.debug >= (n)) kprintf x
114
115 /*
116  * Global variables
117  */
118
119 /* state tables */
120 struct pf_state_tree_lan_ext     pf_statetbl_lan_ext;
121 struct pf_state_tree_ext_gwy     pf_statetbl_ext_gwy;
122
123 struct pf_altqqueue      pf_altqs[2];
124 struct pf_palist         pf_pabuf;
125 struct pf_altqqueue     *pf_altqs_active;
126 struct pf_altqqueue     *pf_altqs_inactive;
127 struct pf_status         pf_status;
128
129 u_int32_t                ticket_altqs_active;
130 u_int32_t                ticket_altqs_inactive;
131 int                      altqs_inactive_open;
132 u_int32_t                ticket_pabuf;
133
134 struct pf_anchor_stackframe {
135         struct pf_ruleset                       *rs;
136         struct pf_rule                          *r;
137         struct pf_anchor_node                   *parent;
138         struct pf_anchor                        *child;
139 } pf_anchor_stack[64];
140
141 vm_zone_t                pf_src_tree_pl, pf_rule_pl, pf_pooladdr_pl;
142 vm_zone_t                pf_state_pl, pf_state_key_pl;
143 vm_zone_t                pf_altq_pl;
144
145 void                     pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
146
147 void                     pf_init_threshold(struct pf_threshold *, u_int32_t,
148                             u_int32_t);
149 void                     pf_add_threshold(struct pf_threshold *);
150 int                      pf_check_threshold(struct pf_threshold *);
151
152 void                     pf_change_ap(struct pf_addr *, u_int16_t *,
153                             u_int16_t *, u_int16_t *, struct pf_addr *,
154                             u_int16_t, u_int8_t, sa_family_t);
155 int                      pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *,
156                             struct tcphdr *, struct pf_state_peer *);
157 #ifdef INET6
158 void                     pf_change_a6(struct pf_addr *, u_int16_t *,
159                             struct pf_addr *, u_int8_t);
160 #endif /* INET6 */
161 void                     pf_change_icmp(struct pf_addr *, u_int16_t *,
162                             struct pf_addr *, struct pf_addr *, u_int16_t,
163                             u_int16_t *, u_int16_t *, u_int16_t *,
164                             u_int16_t *, u_int8_t, sa_family_t);
165 void                     pf_send_tcp(const struct pf_rule *, sa_family_t,
166                             const struct pf_addr *, const struct pf_addr *,
167                             u_int16_t, u_int16_t, u_int32_t, u_int32_t,
168                             u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
169                             u_int16_t, struct ether_header *, struct ifnet *);
170 void                     pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
171                             sa_family_t, struct pf_rule *);
172 struct pf_rule          *pf_match_translation(struct pf_pdesc *, struct mbuf *,
173                             int, int, struct pfi_kif *,
174                             struct pf_addr *, u_int16_t, struct pf_addr *,
175                             u_int16_t, int);
176 struct pf_rule          *pf_get_translation(struct pf_pdesc *, struct mbuf *,
177                             int, int, struct pfi_kif *, struct pf_src_node **,
178                             struct pf_addr *, u_int16_t,
179                             struct pf_addr *, u_int16_t,
180                             struct pf_addr *, u_int16_t *);
181 void                     pf_attach_state(struct pf_state_key *,
182                             struct pf_state *, int);
183 void                     pf_detach_state(struct pf_state *, int);
184 int                      pf_test_rule(struct pf_rule **, struct pf_state **,
185                             int, struct pfi_kif *, struct mbuf *, int,
186                             void *, struct pf_pdesc *, struct pf_rule **,
187                             struct pf_ruleset **, struct ifqueue *, struct inpcb *);
188 int                      pf_test_fragment(struct pf_rule **, int,
189                             struct pfi_kif *, struct mbuf *, void *,
190                             struct pf_pdesc *, struct pf_rule **,
191                             struct pf_ruleset **);
192 int                      pf_test_state_tcp(struct pf_state **, int,
193                             struct pfi_kif *, struct mbuf *, int,
194                             void *, struct pf_pdesc *, u_short *);
195 int                      pf_test_state_udp(struct pf_state **, int,
196                             struct pfi_kif *, struct mbuf *, int,
197                             void *, struct pf_pdesc *);
198 int                      pf_test_state_icmp(struct pf_state **, int,
199                             struct pfi_kif *, struct mbuf *, int,
200                             void *, struct pf_pdesc *, u_short *);
201 int                      pf_test_state_other(struct pf_state **, int,
202                             struct pfi_kif *, struct pf_pdesc *);
203 int                      pf_match_tag(struct mbuf *, struct pf_rule *, int *);
204 void                     pf_step_into_anchor(int *, struct pf_ruleset **, int,
205                             struct pf_rule **, struct pf_rule **,  int *);
206 int                      pf_step_out_of_anchor(int *, struct pf_ruleset **,
207                              int, struct pf_rule **, struct pf_rule **,
208                              int *);
209 void                     pf_hash(struct pf_addr *, struct pf_addr *,
210                             struct pf_poolhashkey *, sa_family_t);
211 int                      pf_map_addr(u_int8_t, struct pf_rule *,
212                             struct pf_addr *, struct pf_addr *,
213                             struct pf_addr *, struct pf_src_node **);
214 int                      pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *,
215                             struct pf_addr *, struct pf_addr *, u_int16_t,
216                             struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t,
217                             struct pf_src_node **);
218 void                     pf_route(struct mbuf **, struct pf_rule *, int,
219                             struct ifnet *, struct pf_state *,
220                             struct pf_pdesc *);
221 void                     pf_route6(struct mbuf **, struct pf_rule *, int,
222                             struct ifnet *, struct pf_state *,
223                             struct pf_pdesc *);
224 u_int8_t                 pf_get_wscale(struct mbuf *, int, u_int16_t,
225                             sa_family_t);
226 u_int16_t                pf_get_mss(struct mbuf *, int, u_int16_t,
227                             sa_family_t);
228 u_int16_t                pf_calc_mss(struct pf_addr *, sa_family_t,
229                                 u_int16_t);
230 void                     pf_set_rt_ifp(struct pf_state *,
231                             struct pf_addr *);
232 int                      pf_check_proto_cksum(struct mbuf *, int, int,
233                             u_int8_t, sa_family_t);
234 int                      pf_addr_wrap_neq(struct pf_addr_wrap *,
235                             struct pf_addr_wrap *);
236 struct pf_state         *pf_find_state(struct pfi_kif *,
237                             struct pf_state_key_cmp *, u_int8_t);
238 int                      pf_src_connlimit(struct pf_state **);
239 void                     pf_stateins_err(const char *, struct pf_state *,
240                             struct pfi_kif *);
241 int                      pf_check_congestion(struct ifqueue *);
242
243 extern int pf_end_threads;
244
245 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
246         { &pf_state_pl, PFSTATE_HIWAT },
247         { &pf_src_tree_pl, PFSNODE_HIWAT },
248         { &pf_frent_pl, PFFRAG_FRENT_HIWAT },
249         { &pfr_ktable_pl, PFR_KTABLE_HIWAT },
250         { &pfr_kentry_pl, PFR_KENTRY_HIWAT }
251 };
252
253 #define STATE_LOOKUP()                                                  \
254         do {                                                            \
255                 if (direction == PF_IN)                                 \
256                         *state = pf_find_state(kif, &key, PF_EXT_GWY);  \
257                 else                                                    \
258                         *state = pf_find_state(kif, &key, PF_LAN_EXT);  \
259                 if (*state == NULL || (*state)->timeout == PFTM_PURGE)  \
260                         return (PF_DROP);                               \
261                 if (direction == PF_OUT &&                              \
262                     (((*state)->rule.ptr->rt == PF_ROUTETO &&           \
263                     (*state)->rule.ptr->direction == PF_OUT) ||         \
264                     ((*state)->rule.ptr->rt == PF_REPLYTO &&            \
265                     (*state)->rule.ptr->direction == PF_IN)) &&         \
266                     (*state)->rt_kif != NULL &&                         \
267                     (*state)->rt_kif != kif)                            \
268                         return (PF_PASS);                               \
269         } while (0)
270
271 #define STATE_TRANSLATE(sk) \
272         (sk)->lan.addr.addr32[0] != (sk)->gwy.addr.addr32[0] || \
273         ((sk)->af == AF_INET6 && \
274         ((sk)->lan.addr.addr32[1] != (sk)->gwy.addr.addr32[1] || \
275         (sk)->lan.addr.addr32[2] != (sk)->gwy.addr.addr32[2] || \
276         (sk)->lan.addr.addr32[3] != (sk)->gwy.addr.addr32[3])) || \
277         (sk)->lan.port != (sk)->gwy.port
278
279 #define BOUND_IFACE(r, k) \
280         ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all
281
282 #define STATE_INC_COUNTERS(s)                           \
283         do {                                            \
284                 s->rule.ptr->states++;                  \
285                 if (s->anchor.ptr != NULL)              \
286                         s->anchor.ptr->states++;        \
287                 if (s->nat_rule.ptr != NULL)            \
288                         s->nat_rule.ptr->states++;      \
289         } while (0)
290
291 #define STATE_DEC_COUNTERS(s)                           \
292         do {                                            \
293                 if (s->nat_rule.ptr != NULL)            \
294                         s->nat_rule.ptr->states--;      \
295                 if (s->anchor.ptr != NULL)              \
296                         s->anchor.ptr->states--;        \
297                 s->rule.ptr->states--;                  \
298         } while (0)
299
300 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
301 static __inline int pf_state_compare_lan_ext(struct pf_state_key *,
302         struct pf_state_key *);
303 static __inline int pf_state_compare_ext_gwy(struct pf_state_key *,
304         struct pf_state_key *);
305 static __inline int pf_state_compare_id(struct pf_state *,
306         struct pf_state *);
307
308 struct pf_src_tree tree_src_tracking;
309
310 struct pf_state_tree_id tree_id;
311 struct pf_state_queue state_list;
312
313 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
314 RB_GENERATE(pf_state_tree_lan_ext, pf_state_key,
315     entry_lan_ext, pf_state_compare_lan_ext);
316 RB_GENERATE(pf_state_tree_ext_gwy, pf_state_key,
317     entry_ext_gwy, pf_state_compare_ext_gwy);
318 RB_GENERATE(pf_state_tree_id, pf_state,
319     entry_id, pf_state_compare_id);
320
321 #define PF_DT_SKIP_LANEXT       0x01
322 #define PF_DT_SKIP_EXTGWY       0x02
323
324 static __inline int
325 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
326 {
327         int     diff;
328
329         if (a->rule.ptr > b->rule.ptr)
330                 return (1);
331         if (a->rule.ptr < b->rule.ptr)
332                 return (-1);
333         if ((diff = a->af - b->af) != 0)
334                 return (diff);
335         switch (a->af) {
336 #ifdef INET
337         case AF_INET:
338                 if (a->addr.addr32[0] > b->addr.addr32[0])
339                         return (1);
340                 if (a->addr.addr32[0] < b->addr.addr32[0])
341                         return (-1);
342                 break;
343 #endif /* INET */
344 #ifdef INET6
345         case AF_INET6:
346                 if (a->addr.addr32[3] > b->addr.addr32[3])
347                         return (1);
348                 if (a->addr.addr32[3] < b->addr.addr32[3])
349                         return (-1);
350                 if (a->addr.addr32[2] > b->addr.addr32[2])
351                         return (1);
352                 if (a->addr.addr32[2] < b->addr.addr32[2])
353                         return (-1);
354                 if (a->addr.addr32[1] > b->addr.addr32[1])
355                         return (1);
356                 if (a->addr.addr32[1] < b->addr.addr32[1])
357                         return (-1);
358                 if (a->addr.addr32[0] > b->addr.addr32[0])
359                         return (1);
360                 if (a->addr.addr32[0] < b->addr.addr32[0])
361                         return (-1);
362                 break;
363 #endif /* INET6 */
364         }
365         return (0);
366 }
367
368 u_int32_t
369 pf_state_hash(struct pf_state_key *sk)
370 {
371         u_int32_t hv = (intptr_t)sk / sizeof(*sk);
372
373         hv ^= crc32(&sk->lan, sizeof(sk->lan));
374         hv ^= crc32(&sk->gwy, sizeof(sk->gwy));
375         hv ^= crc32(&sk->ext, sizeof(sk->ext));
376         if (hv == 0)    /* disallow 0 */
377                 hv = 1;
378         return(hv);
379 }
380
381 static __inline int
382 pf_state_compare_lan_ext(struct pf_state_key *a, struct pf_state_key *b)
383 {
384         int     diff;
385
386         if ((diff = a->proto - b->proto) != 0)
387                 return (diff);
388         if ((diff = a->af - b->af) != 0)
389                 return (diff);
390         switch (a->af) {
391 #ifdef INET
392         case AF_INET:
393                 if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
394                         return (1);
395                 if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
396                         return (-1);
397                 if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
398                         return (1);
399                 if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
400                         return (-1);
401                 break;
402 #endif /* INET */
403 #ifdef INET6
404         case AF_INET6:
405                 if (a->lan.addr.addr32[3] > b->lan.addr.addr32[3])
406                         return (1);
407                 if (a->lan.addr.addr32[3] < b->lan.addr.addr32[3])
408                         return (-1);
409                 if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
410                         return (1);
411                 if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
412                         return (-1);
413                 if (a->lan.addr.addr32[2] > b->lan.addr.addr32[2])
414                         return (1);
415                 if (a->lan.addr.addr32[2] < b->lan.addr.addr32[2])
416                         return (-1);
417                 if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
418                         return (1);
419                 if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
420                         return (-1);
421                 if (a->lan.addr.addr32[1] > b->lan.addr.addr32[1])
422                         return (1);
423                 if (a->lan.addr.addr32[1] < b->lan.addr.addr32[1])
424                         return (-1);
425                 if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
426                         return (1);
427                 if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
428                         return (-1);
429                 if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
430                         return (1);
431                 if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
432                         return (-1);
433                 if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
434                         return (1);
435                 if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
436                         return (-1);
437                 break;
438 #endif /* INET6 */
439         }
440
441         if ((diff = a->lan.port - b->lan.port) != 0)
442                 return (diff);
443         if ((diff = a->ext.port - b->ext.port) != 0)
444                 return (diff);
445
446         return (0);
447 }
448
449 static __inline int
450 pf_state_compare_ext_gwy(struct pf_state_key *a, struct pf_state_key *b)
451 {
452         int     diff;
453
454         if ((diff = a->proto - b->proto) != 0)
455                 return (diff);
456         if ((diff = a->af - b->af) != 0)
457                 return (diff);
458         switch (a->af) {
459 #ifdef INET
460         case AF_INET:
461                 if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
462                         return (1);
463                 if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
464                         return (-1);
465                 if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
466                         return (1);
467                 if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
468                         return (-1);
469                 break;
470 #endif /* INET */
471 #ifdef INET6
472         case AF_INET6:
473                 if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
474                         return (1);
475                 if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
476                         return (-1);
477                 if (a->gwy.addr.addr32[3] > b->gwy.addr.addr32[3])
478                         return (1);
479                 if (a->gwy.addr.addr32[3] < b->gwy.addr.addr32[3])
480                         return (-1);
481                 if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
482                         return (1);
483                 if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
484                         return (-1);
485                 if (a->gwy.addr.addr32[2] > b->gwy.addr.addr32[2])
486                         return (1);
487                 if (a->gwy.addr.addr32[2] < b->gwy.addr.addr32[2])
488                         return (-1);
489                 if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
490                         return (1);
491                 if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
492                         return (-1);
493                 if (a->gwy.addr.addr32[1] > b->gwy.addr.addr32[1])
494                         return (1);
495                 if (a->gwy.addr.addr32[1] < b->gwy.addr.addr32[1])
496                         return (-1);
497                 if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
498                         return (1);
499                 if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
500                         return (-1);
501                 if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
502                         return (1);
503                 if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
504                         return (-1);
505                 break;
506 #endif /* INET6 */
507         }
508
509         if ((diff = a->ext.port - b->ext.port) != 0)
510                 return (diff);
511         if ((diff = a->gwy.port - b->gwy.port) != 0)
512                 return (diff);
513
514         return (0);
515 }
516
517 static __inline int
518 pf_state_compare_id(struct pf_state *a, struct pf_state *b)
519 {
520         if (a->id > b->id)
521                 return (1);
522         if (a->id < b->id)
523                 return (-1);
524         if (a->creatorid > b->creatorid)
525                 return (1);
526         if (a->creatorid < b->creatorid)
527                 return (-1);
528
529         return (0);
530 }
531
532 #ifdef INET6
533 void
534 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
535 {
536         switch (af) {
537 #ifdef INET
538         case AF_INET:
539                 dst->addr32[0] = src->addr32[0];
540                 break;
541 #endif /* INET */
542         case AF_INET6:
543                 dst->addr32[0] = src->addr32[0];
544                 dst->addr32[1] = src->addr32[1];
545                 dst->addr32[2] = src->addr32[2];
546                 dst->addr32[3] = src->addr32[3];
547                 break;
548         }
549 }
550 #endif /* INET6 */
551
552 struct pf_state *
553 pf_find_state_byid(struct pf_state_cmp *key)
554 {
555         pf_status.fcounters[FCNT_STATE_SEARCH]++;
556         
557         return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key));
558 }
559
560 struct pf_state *
561 pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int8_t tree)
562 {
563         struct pf_state_key     *sk;
564         struct pf_state         *s;
565
566         pf_status.fcounters[FCNT_STATE_SEARCH]++;
567
568         switch (tree) {
569         case PF_LAN_EXT:
570                 sk = RB_FIND(pf_state_tree_lan_ext, &pf_statetbl_lan_ext,
571                     (struct pf_state_key *)key);
572                 break;
573         case PF_EXT_GWY:
574                 sk = RB_FIND(pf_state_tree_ext_gwy, &pf_statetbl_ext_gwy,
575                     (struct pf_state_key *)key);
576                 break;
577         default:
578                 panic("pf_find_state");
579         }
580
581         /* list is sorted, if-bound states before floating ones */
582         if (sk != NULL) {
583                 TAILQ_FOREACH(s, &sk->states, next) {
584                         if (s->kif == pfi_all || s->kif == kif)
585                                 return (s);
586                 }
587         }
588
589         return (NULL);
590 }
591
592 struct pf_state *
593 pf_find_state_all(struct pf_state_key_cmp *key, u_int8_t tree, int *more)
594 {
595         struct pf_state_key     *sk;
596         struct pf_state         *s, *ret = NULL;
597
598         pf_status.fcounters[FCNT_STATE_SEARCH]++;
599
600         switch (tree) {
601         case PF_LAN_EXT:
602                 sk = RB_FIND(pf_state_tree_lan_ext,
603                     &pf_statetbl_lan_ext, (struct pf_state_key *)key);
604                 break;
605         case PF_EXT_GWY:
606                 sk = RB_FIND(pf_state_tree_ext_gwy,
607                     &pf_statetbl_ext_gwy, (struct pf_state_key *)key);
608                 break;
609         default:
610                 panic("pf_find_state_all");
611         }
612
613         if (sk != NULL) {
614                 ret = TAILQ_FIRST(&sk->states);
615                 if (more == NULL)
616                         return (ret);
617
618                 TAILQ_FOREACH(s, &sk->states, next)
619                         (*more)++;
620         }
621
622         return (ret);
623 }
624
625 void
626 pf_init_threshold(struct pf_threshold *threshold,
627     u_int32_t limit, u_int32_t seconds)
628 {
629         threshold->limit = limit * PF_THRESHOLD_MULT;
630         threshold->seconds = seconds;
631         threshold->count = 0;
632         threshold->last = time_second;
633 }
634
635 void
636 pf_add_threshold(struct pf_threshold *threshold)
637 {
638         u_int32_t t = time_second, diff = t - threshold->last;
639
640         if (diff >= threshold->seconds)
641                 threshold->count = 0;
642         else
643                 threshold->count -= threshold->count * diff /
644                     threshold->seconds;
645         threshold->count += PF_THRESHOLD_MULT;
646         threshold->last = t;
647 }
648
649 int
650 pf_check_threshold(struct pf_threshold *threshold)
651 {
652         return (threshold->count > threshold->limit);
653 }
654
655 int
656 pf_src_connlimit(struct pf_state **state)
657 {
658         int bad = 0;
659
660         (*state)->src_node->conn++;
661         (*state)->src.tcp_est = 1;
662         pf_add_threshold(&(*state)->src_node->conn_rate);
663
664         if ((*state)->rule.ptr->max_src_conn &&
665             (*state)->rule.ptr->max_src_conn <
666             (*state)->src_node->conn) {
667                 pf_status.lcounters[LCNT_SRCCONN]++;
668                 bad++;
669         }
670
671         if ((*state)->rule.ptr->max_src_conn_rate.limit &&
672             pf_check_threshold(&(*state)->src_node->conn_rate)) {
673                 pf_status.lcounters[LCNT_SRCCONNRATE]++;
674                 bad++;
675         }
676
677         if (!bad)
678                 return (0);
679
680         if ((*state)->rule.ptr->overload_tbl) {
681                 struct pfr_addr p;
682                 u_int32_t       killed = 0;
683
684                 pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
685                 if (pf_status.debug >= PF_DEBUG_MISC) {
686                         kprintf("pf_src_connlimit: blocking address ");
687                         pf_print_host(&(*state)->src_node->addr, 0,
688                             (*state)->state_key->af);
689                 }
690
691                 bzero(&p, sizeof(p));
692                 p.pfra_af = (*state)->state_key->af;
693                 switch ((*state)->state_key->af) {
694 #ifdef INET
695                 case AF_INET:
696                         p.pfra_net = 32;
697                         p.pfra_ip4addr = (*state)->src_node->addr.v4;
698                         break;
699 #endif /* INET */
700 #ifdef INET6
701                 case AF_INET6:
702                         p.pfra_net = 128;
703                         p.pfra_ip6addr = (*state)->src_node->addr.v6;
704                         break;
705 #endif /* INET6 */
706                 }
707
708                 pfr_insert_kentry((*state)->rule.ptr->overload_tbl,
709                     &p, time_second);
710
711                 /* kill existing states if that's required. */
712                 if ((*state)->rule.ptr->flush) {
713                         struct pf_state_key *sk;
714                         struct pf_state *st;
715
716                         pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
717                         RB_FOREACH(st, pf_state_tree_id, &tree_id) {
718                                 sk = st->state_key;
719                                 /*
720                                  * Kill states from this source.  (Only those
721                                  * from the same rule if PF_FLUSH_GLOBAL is not
722                                  * set)
723                                  */
724                                 if (sk->af ==
725                                     (*state)->state_key->af &&
726                                     (((*state)->state_key->direction ==
727                                         PF_OUT &&
728                                     PF_AEQ(&(*state)->src_node->addr,
729                                         &sk->lan.addr, sk->af)) ||
730                                     ((*state)->state_key->direction == PF_IN &&
731                                     PF_AEQ(&(*state)->src_node->addr,
732                                         &sk->ext.addr, sk->af))) &&
733                                     ((*state)->rule.ptr->flush &
734                                     PF_FLUSH_GLOBAL ||
735                                     (*state)->rule.ptr == st->rule.ptr)) {
736                                         st->timeout = PFTM_PURGE;
737                                         st->src.state = st->dst.state =
738                                             TCPS_CLOSED;
739                                         killed++;
740                                 }
741                         }
742                         if (pf_status.debug >= PF_DEBUG_MISC)
743                                 kprintf(", %u states killed", killed);
744                 }
745                 if (pf_status.debug >= PF_DEBUG_MISC)
746                         kprintf("\n");
747         }
748
749         /* kill this state */
750         (*state)->timeout = PFTM_PURGE;
751         (*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
752         return (1);
753 }
754
755 int
756 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
757     struct pf_addr *src, sa_family_t af)
758 {
759         struct pf_src_node      k;
760
761         if (*sn == NULL) {
762                 k.af = af;
763                 PF_ACPY(&k.addr, src, af);
764                 if (rule->rule_flag & PFRULE_RULESRCTRACK ||
765                     rule->rpool.opts & PF_POOL_STICKYADDR)
766                         k.rule.ptr = rule;
767                 else
768                         k.rule.ptr = NULL;
769                 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
770                 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
771         }
772         if (*sn == NULL) {
773                 if (!rule->max_src_nodes ||
774                     rule->src_nodes < rule->max_src_nodes)
775                         (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT);
776                 else
777                         pf_status.lcounters[LCNT_SRCNODES]++;
778                 if ((*sn) == NULL)
779                         return (-1);
780                 bzero(*sn, sizeof(struct pf_src_node));
781
782                 pf_init_threshold(&(*sn)->conn_rate,
783                     rule->max_src_conn_rate.limit,
784                     rule->max_src_conn_rate.seconds);
785
786                 (*sn)->af = af;
787                 if (rule->rule_flag & PFRULE_RULESRCTRACK ||
788                     rule->rpool.opts & PF_POOL_STICKYADDR)
789                         (*sn)->rule.ptr = rule;
790                 else
791                         (*sn)->rule.ptr = NULL;
792                 PF_ACPY(&(*sn)->addr, src, af);
793                 if (RB_INSERT(pf_src_tree,
794                     &tree_src_tracking, *sn) != NULL) {
795                         if (pf_status.debug >= PF_DEBUG_MISC) {
796                                 kprintf("pf: src_tree insert failed: ");
797                                 pf_print_host(&(*sn)->addr, 0, af);
798                                 kprintf("\n");
799                         }
800                         pool_put(&pf_src_tree_pl, *sn);
801                         return (-1);
802                 }
803                 (*sn)->creation = time_second;
804                 (*sn)->ruletype = rule->action;
805                 if ((*sn)->rule.ptr != NULL)
806                         (*sn)->rule.ptr->src_nodes++;
807                 pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
808                 pf_status.src_nodes++;
809         } else {
810                 if (rule->max_src_states &&
811                     (*sn)->states >= rule->max_src_states) {
812                         pf_status.lcounters[LCNT_SRCSTATES]++;
813                         return (-1);
814                 }
815         }
816         return (0);
817 }
818
819 void
820 pf_stateins_err(const char *tree, struct pf_state *s, struct pfi_kif *kif)
821 {
822         struct pf_state_key     *sk = s->state_key;
823
824         if (pf_status.debug >= PF_DEBUG_MISC) {
825                 kprintf("pf: state insert failed: %s %s", tree, kif->pfik_name);
826                 kprintf(" lan: ");
827                 pf_print_host(&sk->lan.addr, sk->lan.port,
828                     sk->af);
829                 kprintf(" gwy: ");
830                 pf_print_host(&sk->gwy.addr, sk->gwy.port,
831                     sk->af);
832                 kprintf(" ext: ");
833                 pf_print_host(&sk->ext.addr, sk->ext.port,
834                     sk->af);
835                 if (s->sync_flags & PFSTATE_FROMSYNC)
836                         kprintf(" (from sync)");
837                 kprintf("\n");
838         }
839 }
840
841 int
842 pf_insert_state(struct pfi_kif *kif, struct pf_state *s)
843 {
844         struct pf_state_key     *cur;
845         struct pf_state         *sp;
846
847         KKASSERT(s->state_key != NULL);
848         s->kif = kif;
849
850         if ((cur = RB_INSERT(pf_state_tree_lan_ext, &pf_statetbl_lan_ext,
851             s->state_key)) != NULL) {
852                 /* key exists. check for same kif, if none, add to key */
853                 TAILQ_FOREACH(sp, &cur->states, next)
854                         if (sp->kif == kif) {   /* collision! */
855                                 pf_stateins_err("tree_lan_ext", s, kif);
856                                 pf_detach_state(s,
857                                     PF_DT_SKIP_LANEXT|PF_DT_SKIP_EXTGWY);
858                                 return (-1);
859                         }
860                 pf_detach_state(s, PF_DT_SKIP_LANEXT|PF_DT_SKIP_EXTGWY);
861                 pf_attach_state(cur, s, kif == pfi_all ? 1 : 0);
862         }
863
864         /* if cur != NULL, we already found a state key and attached to it */
865         if (cur == NULL && (cur = RB_INSERT(pf_state_tree_ext_gwy,
866             &pf_statetbl_ext_gwy, s->state_key)) != NULL) {
867                 /* must not happen. we must have found the sk above! */
868                 pf_stateins_err("tree_ext_gwy", s, kif);
869                 pf_detach_state(s, PF_DT_SKIP_EXTGWY);
870                 return (-1);
871         }
872
873         if (s->id == 0 && s->creatorid == 0) {
874                 s->id = htobe64(pf_status.stateid++);
875                 s->creatorid = pf_status.hostid;
876         }
877         if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) {
878                 if (pf_status.debug >= PF_DEBUG_MISC) {
879                         kprintf("pf: state insert failed: "
880                             "id: %016jx creatorid: %08x",
881                             (uintmax_t)be64toh(s->id), ntohl(s->creatorid));
882                         if (s->sync_flags & PFSTATE_FROMSYNC)
883                                 kprintf(" (from sync)");
884                         kprintf("\n");
885                 }
886                 pf_detach_state(s, 0);
887                 return (-1);
888         }
889         TAILQ_INSERT_TAIL(&state_list, s, entry_list);
890         pf_status.fcounters[FCNT_STATE_INSERT]++;
891         pf_status.states++;
892         pfi_kif_ref(kif, PFI_KIF_REF_STATE);
893 #if NPFSYNC
894         pfsync_insert_state(s);
895 #endif
896         return (0);
897 }
898
899 void
900 pf_purge_thread(void *v)
901 {
902         int nloops = 0;
903         int locked = 0;
904
905         lwkt_gettoken(&pf_token);
906         for (;;) {
907                 tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz);
908
909                 lockmgr(&pf_consistency_lock, LK_EXCLUSIVE);
910
911                 if (pf_end_threads) {
912                         pf_purge_expired_states(pf_status.states, 1);
913                         pf_purge_expired_fragments();
914                         pf_purge_expired_src_nodes(1);
915                         pf_end_threads++;
916
917                         lockmgr(&pf_consistency_lock, LK_RELEASE);
918                         wakeup(pf_purge_thread);
919                         kthread_exit();
920                 }
921                 crit_enter();
922
923                 /* process a fraction of the state table every second */
924                 if(!pf_purge_expired_states(1 + (pf_status.states
925                     / pf_default_rule.timeout[PFTM_INTERVAL]), 0)) {
926
927                         pf_purge_expired_states(1 + (pf_status.states
928                             / pf_default_rule.timeout[PFTM_INTERVAL]), 1);
929                 }
930
931                 /* purge other expired types every PFTM_INTERVAL seconds */
932                 if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
933                         pf_purge_expired_fragments();
934                         if (!pf_purge_expired_src_nodes(locked)) {
935                                 pf_purge_expired_src_nodes(1);
936                         }
937                         nloops = 0;
938                 }
939                 crit_exit();
940                 lockmgr(&pf_consistency_lock, LK_RELEASE);
941         }
942         lwkt_reltoken(&pf_token);
943 }
944
945 u_int32_t
946 pf_state_expires(const struct pf_state *state)
947 {
948         u_int32_t       timeout;
949         u_int32_t       start;
950         u_int32_t       end;
951         u_int32_t       states;
952
953         /* handle all PFTM_* > PFTM_MAX here */
954         if (state->timeout == PFTM_PURGE)
955                 return (time_second);
956         if (state->timeout == PFTM_UNTIL_PACKET)
957                 return (0);
958         KKASSERT(state->timeout != PFTM_UNLINKED);
959         KASSERT((state->timeout < PFTM_MAX),
960                 ("pf_state_expires: timeout > PFTM_MAX"));
961         timeout = state->rule.ptr->timeout[state->timeout];
962         if (!timeout)
963                 timeout = pf_default_rule.timeout[state->timeout];
964         start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
965         if (start) {
966                 end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
967                 states = state->rule.ptr->states;
968         } else {
969                 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
970                 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
971                 states = pf_status.states;
972         }
973         if (end && states > start && start < end) {
974                 if (states < end)
975                         return (state->expire + timeout * (end - states) /
976                             (end - start));
977                 else
978                         return (time_second);
979         }
980         return (state->expire + timeout);
981 }
982
983 int
984 pf_purge_expired_src_nodes(int waslocked)
985 {
986          struct pf_src_node             *cur, *next;
987          int                             locked = waslocked;
988
989          for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
990                  next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
991
992                  if (cur->states <= 0 && cur->expire <= time_second) {
993                          if (! locked) {
994                                  lockmgr(&pf_consistency_lock, LK_EXCLUSIVE);
995                                  next = RB_NEXT(pf_src_tree,
996                                      &tree_src_tracking, cur);
997                                  locked = 1;
998                          }
999                          if (cur->rule.ptr != NULL) {
1000                                  cur->rule.ptr->src_nodes--;
1001                                  if (cur->rule.ptr->states <= 0 &&
1002                                      cur->rule.ptr->max_src_nodes <= 0)
1003                                          pf_rm_rule(NULL, cur->rule.ptr);
1004                          }
1005                          RB_REMOVE(pf_src_tree, &tree_src_tracking, cur);
1006                          pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
1007                          pf_status.src_nodes--;
1008                          pool_put(&pf_src_tree_pl, cur);
1009                  }
1010          }
1011
1012          if (locked && !waslocked)
1013                 lockmgr(&pf_consistency_lock, LK_RELEASE);
1014         return(1);
1015 }
1016
1017 void
1018 pf_src_tree_remove_state(struct pf_state *s)
1019 {
1020         u_int32_t timeout;
1021
1022         if (s->src_node != NULL) {
1023                 if (s->src.tcp_est)
1024                         --s->src_node->conn;
1025                 if (--s->src_node->states <= 0) {
1026                         timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
1027                         if (!timeout)
1028                                 timeout =
1029                                     pf_default_rule.timeout[PFTM_SRC_NODE];
1030                         s->src_node->expire = time_second + timeout;
1031                 }
1032         }
1033         if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
1034                 if (--s->nat_src_node->states <= 0) {
1035                         timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
1036                         if (!timeout)
1037                                 timeout =
1038                                     pf_default_rule.timeout[PFTM_SRC_NODE];
1039                         s->nat_src_node->expire = time_second + timeout;
1040                 }
1041         }
1042         s->src_node = s->nat_src_node = NULL;
1043 }
1044
1045 /* callers should be at crit_enter() */
1046 void
1047 pf_unlink_state(struct pf_state *cur)
1048 {
1049         if (cur->src.state == PF_TCPS_PROXY_DST) {
1050                 pf_send_tcp(cur->rule.ptr, cur->state_key->af,
1051                     &cur->state_key->ext.addr, &cur->state_key->lan.addr,
1052                     cur->state_key->ext.port, cur->state_key->lan.port,
1053                     cur->src.seqhi, cur->src.seqlo + 1,
1054                     TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL);
1055         }
1056         RB_REMOVE(pf_state_tree_id, &tree_id, cur);
1057 #if NPFSYNC
1058         if (cur->creatorid == pf_status.hostid)
1059                 pfsync_delete_state(cur);
1060 #endif
1061         cur->timeout = PFTM_UNLINKED;
1062         pf_src_tree_remove_state(cur);
1063         pf_detach_state(cur, 0);
1064 }
1065
1066 static struct pf_state  *purge_cur;
1067
1068 /* callers should be at crit_enter() and hold the
1069  * write_lock on pf_consistency_lock */
1070 void
1071 pf_free_state(struct pf_state *cur)
1072 {
1073 #if NPFSYNC
1074         if (pfsyncif != NULL &&
1075             (pfsyncif->sc_bulk_send_next == cur ||
1076             pfsyncif->sc_bulk_terminator == cur))
1077                 return;
1078 #endif
1079         KKASSERT(cur->timeout == PFTM_UNLINKED);
1080         if (--cur->rule.ptr->states <= 0 &&
1081             cur->rule.ptr->src_nodes <= 0)
1082                 pf_rm_rule(NULL, cur->rule.ptr);
1083         if (cur->nat_rule.ptr != NULL)
1084                 if (--cur->nat_rule.ptr->states <= 0 &&
1085                         cur->nat_rule.ptr->src_nodes <= 0)
1086                         pf_rm_rule(NULL, cur->nat_rule.ptr);
1087         if (cur->anchor.ptr != NULL)
1088                 if (--cur->anchor.ptr->states <= 0)
1089                         pf_rm_rule(NULL, cur->anchor.ptr);
1090         pf_normalize_tcp_cleanup(cur);
1091         pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE);
1092
1093         /*
1094          * We may be freeing pf_purge_expired_states()'s saved scan entry,
1095          * adjust it if necessary.
1096          */
1097         if (purge_cur == cur) {
1098                 kprintf("PURGE CONFLICT\n");
1099                 purge_cur = TAILQ_NEXT(purge_cur, entry_list);
1100         }
1101         TAILQ_REMOVE(&state_list, cur, entry_list);
1102
1103         if (cur->tag)
1104                 pf_tag_unref(cur->tag);
1105         pool_put(&pf_state_pl, cur);
1106         pf_status.fcounters[FCNT_STATE_REMOVALS]++;
1107         pf_status.states--;
1108 }
1109
1110 int
1111 pf_purge_expired_states(u_int32_t maxcheck, int waslocked)
1112 {
1113         struct pf_state         *cur;
1114         int                      locked = waslocked;
1115
1116         while (maxcheck--) {
1117                 /*
1118                  * Wrap to start of list when we hit the end
1119                  */
1120                 cur = purge_cur;
1121                 if (cur == NULL) {
1122                         cur = TAILQ_FIRST(&state_list);
1123                         if (cur == NULL)
1124                                 break;  /* list empty */
1125                 }
1126
1127                 /*
1128                  * Setup next (purge_cur) while we process this one.  If we block and
1129                  * something else deletes purge_cur, pf_free_state() will adjust it further
1130                  * ahead.
1131                  */
1132                 purge_cur = TAILQ_NEXT(cur, entry_list);
1133
1134                 if (cur->timeout == PFTM_UNLINKED) {
1135                         /* free unlinked state */
1136                         if (! locked) {
1137                                 lockmgr(&pf_consistency_lock, LK_EXCLUSIVE);
1138                                 locked = 1;
1139                         }
1140                         pf_free_state(cur);
1141                 } else if (pf_state_expires(cur) <= time_second) {
1142                         /* unlink and free expired state */
1143                         pf_unlink_state(cur);
1144                         if (! locked) {
1145                                 if (!lockmgr(&pf_consistency_lock, LK_EXCLUSIVE))
1146                                         return (0);
1147                                 locked = 1;
1148                         }
1149                         pf_free_state(cur);
1150                 }
1151         }
1152
1153         if (locked)
1154                 lockmgr(&pf_consistency_lock, LK_RELEASE);
1155         return (1);
1156 }
1157
1158 int
1159 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
1160 {
1161         if (aw->type != PF_ADDR_TABLE)
1162                 return (0);
1163         if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL)
1164                 return (1);
1165         return (0);
1166 }
1167
1168 void
1169 pf_tbladdr_remove(struct pf_addr_wrap *aw)
1170 {
1171         if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL)
1172                 return;
1173         pfr_detach_table(aw->p.tbl);
1174         aw->p.tbl = NULL;
1175 }
1176
1177 void
1178 pf_tbladdr_copyout(struct pf_addr_wrap *aw)
1179 {
1180         struct pfr_ktable *kt = aw->p.tbl;
1181
1182         if (aw->type != PF_ADDR_TABLE || kt == NULL)
1183                 return;
1184         if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
1185                 kt = kt->pfrkt_root;
1186         aw->p.tbl = NULL;
1187         aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
1188                 kt->pfrkt_cnt : -1;
1189 }
1190
1191 void
1192 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
1193 {
1194         switch (af) {
1195 #ifdef INET
1196         case AF_INET: {
1197                 u_int32_t a = ntohl(addr->addr32[0]);
1198                 kprintf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
1199                     (a>>8)&255, a&255);
1200                 if (p) {
1201                         p = ntohs(p);
1202                         kprintf(":%u", p);
1203                 }
1204                 break;
1205         }
1206 #endif /* INET */
1207 #ifdef INET6
1208         case AF_INET6: {
1209                 u_int16_t b;
1210                 u_int8_t i, curstart = 255, curend = 0,
1211                     maxstart = 0, maxend = 0;
1212                 for (i = 0; i < 8; i++) {
1213                         if (!addr->addr16[i]) {
1214                                 if (curstart == 255)
1215                                         curstart = i;
1216                                 else
1217                                         curend = i;
1218                         } else {
1219                                 if (curstart) {
1220                                         if ((curend - curstart) >
1221                                             (maxend - maxstart)) {
1222                                                 maxstart = curstart;
1223                                                 maxend = curend;
1224                                                 curstart = 255;
1225                                         }
1226                                 }
1227                         }
1228                 }
1229                 for (i = 0; i < 8; i++) {
1230                         if (i >= maxstart && i <= maxend) {
1231                                 if (maxend != 7) {
1232                                         if (i == maxstart)
1233                                                 kprintf(":");
1234                                 } else {
1235                                         if (i == maxend)
1236                                                 kprintf(":");
1237                                 }
1238                         } else {
1239                                 b = ntohs(addr->addr16[i]);
1240                                 kprintf("%x", b);
1241                                 if (i < 7)
1242                                         kprintf(":");
1243                         }
1244                 }
1245                 if (p) {
1246                         p = ntohs(p);
1247                         kprintf("[%u]", p);
1248                 }
1249                 break;
1250         }
1251 #endif /* INET6 */
1252         }
1253 }
1254
1255 void
1256 pf_print_state(struct pf_state *s)
1257 {
1258         struct pf_state_key *sk = s->state_key;
1259         switch (sk->proto) {
1260         case IPPROTO_TCP:
1261                 kprintf("TCP ");
1262                 break;
1263         case IPPROTO_UDP:
1264                 kprintf("UDP ");
1265                 break;
1266         case IPPROTO_ICMP:
1267                 kprintf("ICMP ");
1268                 break;
1269         case IPPROTO_ICMPV6:
1270                 kprintf("ICMPV6 ");
1271                 break;
1272         default:
1273                 kprintf("%u ", sk->proto);
1274                 break;
1275         }
1276         pf_print_host(&sk->lan.addr, sk->lan.port, sk->af);
1277         kprintf(" ");
1278         pf_print_host(&sk->gwy.addr, sk->gwy.port, sk->af);
1279         kprintf(" ");
1280         pf_print_host(&sk->ext.addr, sk->ext.port, sk->af);
1281         kprintf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo,
1282             s->src.seqhi, s->src.max_win, s->src.seqdiff);
1283         if (s->src.wscale && s->dst.wscale)
1284                 kprintf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK);
1285         kprintf("]");
1286         kprintf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo,
1287             s->dst.seqhi, s->dst.max_win, s->dst.seqdiff);
1288         if (s->src.wscale && s->dst.wscale)
1289                 kprintf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK);
1290         kprintf("]");
1291         kprintf(" %u:%u", s->src.state, s->dst.state);
1292 }
1293
1294 void
1295 pf_print_flags(u_int8_t f)
1296 {
1297         if (f)
1298                 kprintf(" ");
1299         if (f & TH_FIN)
1300                 kprintf("F");
1301         if (f & TH_SYN)
1302                 kprintf("S");
1303         if (f & TH_RST)
1304                 kprintf("R");
1305         if (f & TH_PUSH)
1306                 kprintf("P");
1307         if (f & TH_ACK)
1308                 kprintf("A");
1309         if (f & TH_URG)
1310                 kprintf("U");
1311         if (f & TH_ECE)
1312                 kprintf("E");
1313         if (f & TH_CWR)
1314                 kprintf("W");
1315 }
1316
1317 #define PF_SET_SKIP_STEPS(i)                                    \
1318         do {                                                    \
1319                 while (head[i] != cur) {                        \
1320                         head[i]->skip[i].ptr = cur;             \
1321                         head[i] = TAILQ_NEXT(head[i], entries); \
1322                 }                                               \
1323         } while (0)
1324
1325 void
1326 pf_calc_skip_steps(struct pf_rulequeue *rules)
1327 {
1328         struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
1329         int i;
1330
1331         cur = TAILQ_FIRST(rules);
1332         prev = cur;
1333         for (i = 0; i < PF_SKIP_COUNT; ++i)
1334                 head[i] = cur;
1335         while (cur != NULL) {
1336
1337                 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
1338                         PF_SET_SKIP_STEPS(PF_SKIP_IFP);
1339                 if (cur->direction != prev->direction)
1340                         PF_SET_SKIP_STEPS(PF_SKIP_DIR);
1341                 if (cur->af != prev->af)
1342                         PF_SET_SKIP_STEPS(PF_SKIP_AF);
1343                 if (cur->proto != prev->proto)
1344                         PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
1345                 if (cur->src.neg != prev->src.neg ||
1346                     pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
1347                         PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
1348                 if (cur->src.port[0] != prev->src.port[0] ||
1349                     cur->src.port[1] != prev->src.port[1] ||
1350                     cur->src.port_op != prev->src.port_op)
1351                         PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
1352                 if (cur->dst.neg != prev->dst.neg ||
1353                     pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
1354                         PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
1355                 if (cur->dst.port[0] != prev->dst.port[0] ||
1356                     cur->dst.port[1] != prev->dst.port[1] ||
1357                     cur->dst.port_op != prev->dst.port_op)
1358                         PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
1359
1360                 prev = cur;
1361                 cur = TAILQ_NEXT(cur, entries);
1362         }
1363         for (i = 0; i < PF_SKIP_COUNT; ++i)
1364                 PF_SET_SKIP_STEPS(i);
1365 }
1366
1367 int
1368 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
1369 {
1370         if (aw1->type != aw2->type)
1371                 return (1);
1372         switch (aw1->type) {
1373         case PF_ADDR_ADDRMASK:
1374                 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0))
1375                         return (1);
1376                 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0))
1377                         return (1);
1378                 return (0);
1379         case PF_ADDR_DYNIFTL:
1380                 return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
1381         case PF_ADDR_NOROUTE:
1382         case PF_ADDR_URPFFAILED:
1383                 return (0);
1384         case PF_ADDR_TABLE:
1385                 return (aw1->p.tbl != aw2->p.tbl);
1386         case PF_ADDR_RTLABEL:
1387                 return (aw1->v.rtlabel != aw2->v.rtlabel);
1388         default:
1389                 kprintf("invalid address type: %d\n", aw1->type);
1390                 return (1);
1391         }
1392 }
1393
1394 u_int16_t
1395 pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
1396 {
1397         u_int32_t       l;
1398
1399         if (udp && !cksum)
1400                 return (0x0000);
1401         l = cksum + old - new;
1402         l = (l >> 16) + (l & 65535);
1403         l = l & 65535;
1404         if (udp && !l)
1405                 return (0xFFFF);
1406         return (l);
1407 }
1408
1409 void
1410 pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc,
1411     struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af)
1412 {
1413         struct pf_addr  ao;
1414         u_int16_t       po = *p;
1415
1416         PF_ACPY(&ao, a, af);
1417         PF_ACPY(a, an, af);
1418
1419         *p = pn;
1420
1421         switch (af) {
1422 #ifdef INET
1423         case AF_INET:
1424                 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1425                     ao.addr16[0], an->addr16[0], 0),
1426                     ao.addr16[1], an->addr16[1], 0);
1427                 *p = pn;
1428                 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1429                     ao.addr16[0], an->addr16[0], u),
1430                     ao.addr16[1], an->addr16[1], u),
1431                     po, pn, u);
1432                 break;
1433 #endif /* INET */
1434 #ifdef INET6
1435         case AF_INET6:
1436                 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1437                     pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1438                     pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
1439                     ao.addr16[0], an->addr16[0], u),
1440                     ao.addr16[1], an->addr16[1], u),
1441                     ao.addr16[2], an->addr16[2], u),
1442                     ao.addr16[3], an->addr16[3], u),
1443                     ao.addr16[4], an->addr16[4], u),
1444                     ao.addr16[5], an->addr16[5], u),
1445                     ao.addr16[6], an->addr16[6], u),
1446                     ao.addr16[7], an->addr16[7], u),
1447                     po, pn, u);
1448                 break;
1449 #endif /* INET6 */
1450         }
1451 }
1452
1453
1454 /* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
1455 void
1456 pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
1457 {
1458         u_int32_t       ao;
1459
1460         memcpy(&ao, a, sizeof(ao));
1461         memcpy(a, &an, sizeof(u_int32_t));
1462         *c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
1463             ao % 65536, an % 65536, u);
1464 }
1465
1466 #ifdef INET6
1467 void
1468 pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
1469 {
1470         struct pf_addr  ao;
1471
1472         PF_ACPY(&ao, a, AF_INET6);
1473         PF_ACPY(a, an, AF_INET6);
1474
1475         *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1476             pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1477             pf_cksum_fixup(pf_cksum_fixup(*c,
1478             ao.addr16[0], an->addr16[0], u),
1479             ao.addr16[1], an->addr16[1], u),
1480             ao.addr16[2], an->addr16[2], u),
1481             ao.addr16[3], an->addr16[3], u),
1482             ao.addr16[4], an->addr16[4], u),
1483             ao.addr16[5], an->addr16[5], u),
1484             ao.addr16[6], an->addr16[6], u),
1485             ao.addr16[7], an->addr16[7], u);
1486 }
1487 #endif /* INET6 */
1488
1489 void
1490 pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
1491     struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
1492     u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
1493 {
1494         struct pf_addr  oia, ooa;
1495
1496         PF_ACPY(&oia, ia, af);
1497         PF_ACPY(&ooa, oa, af);
1498
1499         /* Change inner protocol port, fix inner protocol checksum. */
1500         if (ip != NULL) {
1501                 u_int16_t       oip = *ip;
1502                 u_int32_t       opc = 0;
1503
1504                 if (pc != NULL)
1505                         opc = *pc;
1506                 *ip = np;
1507                 if (pc != NULL)
1508                         *pc = pf_cksum_fixup(*pc, oip, *ip, u);
1509                 *ic = pf_cksum_fixup(*ic, oip, *ip, 0);
1510                 if (pc != NULL)
1511                         *ic = pf_cksum_fixup(*ic, opc, *pc, 0);
1512         }
1513         /* Change inner ip address, fix inner ip and icmp checksums. */
1514         PF_ACPY(ia, na, af);
1515         switch (af) {
1516 #ifdef INET
1517         case AF_INET: {
1518                 u_int32_t        oh2c = *h2c;
1519
1520                 *h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
1521                     oia.addr16[0], ia->addr16[0], 0),
1522                     oia.addr16[1], ia->addr16[1], 0);
1523                 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
1524                     oia.addr16[0], ia->addr16[0], 0),
1525                     oia.addr16[1], ia->addr16[1], 0);
1526                 *ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
1527                 break;
1528         }
1529 #endif /* INET */
1530 #ifdef INET6
1531         case AF_INET6:
1532                 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1533                     pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1534                     pf_cksum_fixup(pf_cksum_fixup(*ic,
1535                     oia.addr16[0], ia->addr16[0], u),
1536                     oia.addr16[1], ia->addr16[1], u),
1537                     oia.addr16[2], ia->addr16[2], u),
1538                     oia.addr16[3], ia->addr16[3], u),
1539                     oia.addr16[4], ia->addr16[4], u),
1540                     oia.addr16[5], ia->addr16[5], u),
1541                     oia.addr16[6], ia->addr16[6], u),
1542                     oia.addr16[7], ia->addr16[7], u);
1543                 break;
1544 #endif /* INET6 */
1545         }
1546         /* Change outer ip address, fix outer ip or icmpv6 checksum. */
1547         PF_ACPY(oa, na, af);
1548         switch (af) {
1549 #ifdef INET
1550         case AF_INET:
1551                 *hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
1552                     ooa.addr16[0], oa->addr16[0], 0),
1553                     ooa.addr16[1], oa->addr16[1], 0);
1554                 break;
1555 #endif /* INET */
1556 #ifdef INET6
1557         case AF_INET6:
1558                 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1559                     pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
1560                     pf_cksum_fixup(pf_cksum_fixup(*ic,
1561                     ooa.addr16[0], oa->addr16[0], u),
1562                     ooa.addr16[1], oa->addr16[1], u),
1563                     ooa.addr16[2], oa->addr16[2], u),
1564                     ooa.addr16[3], oa->addr16[3], u),
1565                     ooa.addr16[4], oa->addr16[4], u),
1566                     ooa.addr16[5], oa->addr16[5], u),
1567                     ooa.addr16[6], oa->addr16[6], u),
1568                     ooa.addr16[7], oa->addr16[7], u);
1569                 break;
1570 #endif /* INET6 */
1571         }
1572 }
1573
1574
1575 /*
1576  * Need to modulate the sequence numbers in the TCP SACK option
1577  * (credits to Krzysztof Pfaff for report and patch)
1578  */
1579 int
1580 pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
1581     struct tcphdr *th, struct pf_state_peer *dst)
1582 {
1583         int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
1584         u_int8_t opts[TCP_MAXOLEN], *opt = opts;
1585         int copyback = 0, i, olen;
1586         struct raw_sackblock sack;
1587
1588 #define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2)
1589         if (hlen < TCPOLEN_SACKLEN ||
1590             !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af))
1591                 return 0;
1592
1593         while (hlen >= TCPOLEN_SACKLEN) {
1594                 olen = opt[1];
1595                 switch (*opt) {
1596                 case TCPOPT_EOL:        /* FALLTHROUGH */
1597                 case TCPOPT_NOP:
1598                         opt++;
1599                         hlen--;
1600                         break;
1601                 case TCPOPT_SACK:
1602                         if (olen > hlen)
1603                                 olen = hlen;
1604                         if (olen >= TCPOLEN_SACKLEN) {
1605                                 for (i = 2; i + TCPOLEN_SACK <= olen;
1606                                     i += TCPOLEN_SACK) {
1607                                         memcpy(&sack, &opt[i], sizeof(sack));
1608                                         pf_change_a(&sack.rblk_start, &th->th_sum,
1609                                             htonl(ntohl(sack.rblk_start) -
1610                                             dst->seqdiff), 0);
1611                                         pf_change_a(&sack.rblk_end, &th->th_sum,
1612                                             htonl(ntohl(sack.rblk_end) -
1613                                             dst->seqdiff), 0);
1614                                         memcpy(&opt[i], &sack, sizeof(sack));
1615                                 }
1616                                 copyback = 1;
1617                         }
1618                         /* FALLTHROUGH */
1619                 default:
1620                         if (olen < 2)
1621                                 olen = 2;
1622                         hlen -= olen;
1623                         opt += olen;
1624                 }
1625         }
1626
1627         if (copyback)
1628                 m_copyback(m, off + sizeof(*th), thoptlen, opts);
1629         return (copyback);
1630 }
1631
1632 void
1633 pf_send_tcp(const struct pf_rule *r, sa_family_t af,
1634     const struct pf_addr *saddr, const struct pf_addr *daddr,
1635     u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
1636     u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
1637     u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp)
1638 {
1639         struct mbuf     *m;
1640         int              len = 0, tlen;
1641 #ifdef INET
1642         struct ip       *h = NULL;
1643 #endif /* INET */
1644 #ifdef INET6
1645         struct ip6_hdr  *h6 = NULL;
1646 #endif /* INET6 */
1647         struct tcphdr   *th = NULL;
1648         char            *opt;
1649
1650         ASSERT_LWKT_TOKEN_HELD(&pf_token);
1651
1652         /* maximum segment size tcp option */
1653         tlen = sizeof(struct tcphdr);
1654         if (mss)
1655                 tlen += 4;
1656
1657         switch (af) {
1658 #ifdef INET
1659         case AF_INET:
1660                 len = sizeof(struct ip) + tlen;
1661                 break;
1662 #endif /* INET */
1663 #ifdef INET6
1664         case AF_INET6:
1665                 len = sizeof(struct ip6_hdr) + tlen;
1666                 break;
1667 #endif /* INET6 */
1668         }
1669
1670         /*
1671          * Create outgoing mbuf.
1672          *
1673          * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags,
1674          * so make sure pf.flags is clear.
1675          */
1676         m = m_gethdr(MB_DONTWAIT, MT_HEADER);
1677         if (m == NULL) {
1678                 return;
1679         }
1680         if (tag)
1681                 m->m_pkthdr.fw_flags |= PF_MBUF_TAGGED;
1682         m->m_pkthdr.pf.flags = 0;
1683         m->m_pkthdr.pf.tag = rtag;
1684
1685         if (r != NULL && r->rtableid >= 0)
1686                 m->m_pkthdr.pf.rtableid = r->rtableid;
1687
1688 #ifdef ALTQ
1689         if (r != NULL && r->qid) {
1690                 m->m_pkthdr.fw_flags |= PF_MBUF_STRUCTURE;
1691                 m->m_pkthdr.pf.qid = r->qid;
1692                 m->m_pkthdr.pf.ecn_af = af;
1693                 m->m_pkthdr.pf.hdr = mtod(m, struct ip *);
1694         }
1695 #endif /* ALTQ */
1696         m->m_data += max_linkhdr;
1697         m->m_pkthdr.len = m->m_len = len;
1698         m->m_pkthdr.rcvif = NULL;
1699         bzero(m->m_data, len);
1700         switch (af) {
1701 #ifdef INET
1702         case AF_INET:
1703                 h = mtod(m, struct ip *);
1704
1705                 /* IP header fields included in the TCP checksum */
1706                 h->ip_p = IPPROTO_TCP;
1707                 h->ip_len = tlen;
1708                 h->ip_src.s_addr = saddr->v4.s_addr;
1709                 h->ip_dst.s_addr = daddr->v4.s_addr;
1710
1711                 th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
1712                 break;
1713 #endif /* INET */
1714 #ifdef INET6
1715         case AF_INET6:
1716                 h6 = mtod(m, struct ip6_hdr *);
1717
1718                 /* IP header fields included in the TCP checksum */
1719                 h6->ip6_nxt = IPPROTO_TCP;
1720                 h6->ip6_plen = htons(tlen);
1721                 memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
1722                 memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
1723
1724                 th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
1725                 break;
1726 #endif /* INET6 */
1727         }
1728
1729         /* TCP header */
1730         th->th_sport = sport;
1731         th->th_dport = dport;
1732         th->th_seq = htonl(seq);
1733         th->th_ack = htonl(ack);
1734         th->th_off = tlen >> 2;
1735         th->th_flags = flags;
1736         th->th_win = htons(win);
1737
1738         if (mss) {
1739                 opt = (char *)(th + 1);
1740                 opt[0] = TCPOPT_MAXSEG;
1741                 opt[1] = 4;
1742                 mss = htons(mss);
1743                 bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
1744         }
1745
1746         switch (af) {
1747 #ifdef INET
1748         case AF_INET:
1749                 /* TCP checksum */
1750                 th->th_sum = in_cksum(m, len);
1751
1752                 /* Finish the IP header */
1753                 h->ip_v = 4;
1754                 h->ip_hl = sizeof(*h) >> 2;
1755                 h->ip_tos = IPTOS_LOWDELAY;
1756                 h->ip_len = len;
1757                 h->ip_off = path_mtu_discovery ? IP_DF : 0;
1758                 h->ip_ttl = ttl ? ttl : ip_defttl;
1759                 h->ip_sum = 0;
1760                 if (eh == NULL) {
1761                         lwkt_reltoken(&pf_token);
1762                         ip_output(m, NULL, NULL, 0, NULL, NULL);
1763                         lwkt_gettoken(&pf_token);
1764                 } else {
1765                         struct route             ro;
1766                         struct rtentry           rt;
1767                         struct ether_header     *e = (void *)ro.ro_dst.sa_data;
1768
1769                         if (ifp == NULL) {
1770                                 m_freem(m);
1771                                 return;
1772                         }
1773                         rt.rt_ifp = ifp;
1774                         ro.ro_rt = &rt;
1775                         ro.ro_dst.sa_len = sizeof(ro.ro_dst);
1776                         ro.ro_dst.sa_family = pseudo_AF_HDRCMPLT;
1777                         bcopy(eh->ether_dhost, e->ether_shost, ETHER_ADDR_LEN);
1778                         bcopy(eh->ether_shost, e->ether_dhost, ETHER_ADDR_LEN);
1779                         e->ether_type = eh->ether_type;
1780                         /* XXX_IMPORT: later */
1781                         lwkt_reltoken(&pf_token);
1782                         ip_output(m, (void *)NULL, &ro, 0,
1783                             (void *)NULL, (void *)NULL);
1784                         lwkt_gettoken(&pf_token);
1785                 }
1786                 break;
1787 #endif /* INET */
1788 #ifdef INET6
1789         case AF_INET6:
1790                 /* TCP checksum */
1791                 th->th_sum = in6_cksum(m, IPPROTO_TCP,
1792                     sizeof(struct ip6_hdr), tlen);
1793
1794                 h6->ip6_vfc |= IPV6_VERSION;
1795                 h6->ip6_hlim = IPV6_DEFHLIM;
1796
1797                 lwkt_reltoken(&pf_token);
1798                 ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
1799                 lwkt_gettoken(&pf_token);
1800                 break;
1801 #endif /* INET6 */
1802         }
1803 }
1804
1805 void
1806 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
1807     struct pf_rule *r)
1808 {
1809         struct mbuf     *m0;
1810
1811         /*
1812          * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags,
1813          * so make sure pf.flags is clear.
1814          */
1815         m0 = m_copy(m, 0, M_COPYALL);
1816         m0->m_pkthdr.fw_flags |= PF_MBUF_TAGGED;
1817         m0->m_pkthdr.pf.flags = 0;
1818
1819         if (r->rtableid >= 0)
1820                 m0->m_pkthdr.pf.rtableid = r->rtableid;
1821
1822 #ifdef ALTQ
1823         if (r->qid) {
1824                 m->m_pkthdr.fw_flags |= PF_MBUF_STRUCTURE;
1825                 m0->m_pkthdr.pf.qid = r->qid;
1826                 m0->m_pkthdr.pf.ecn_af = af;
1827                 m0->m_pkthdr.pf.hdr = mtod(m0, struct ip *);
1828         }
1829 #endif /* ALTQ */
1830
1831         switch (af) {
1832 #ifdef INET
1833         case AF_INET:
1834                 icmp_error(m0, type, code, 0, 0);
1835                 break;
1836 #endif /* INET */
1837 #ifdef INET6
1838         case AF_INET6:
1839                 icmp6_error(m0, type, code, 0);
1840                 break;
1841 #endif /* INET6 */
1842         }
1843 }
1844
1845 /*
1846  * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
1847  * If n is 0, they match if they are equal. If n is != 0, they match if they
1848  * are different.
1849  */
1850 int
1851 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
1852     struct pf_addr *b, sa_family_t af)
1853 {
1854         int     match = 0;
1855
1856         switch (af) {
1857 #ifdef INET
1858         case AF_INET:
1859                 if ((a->addr32[0] & m->addr32[0]) ==
1860                     (b->addr32[0] & m->addr32[0]))
1861                         match++;
1862                 break;
1863 #endif /* INET */
1864 #ifdef INET6
1865         case AF_INET6:
1866                 if (((a->addr32[0] & m->addr32[0]) ==
1867                      (b->addr32[0] & m->addr32[0])) &&
1868                     ((a->addr32[1] & m->addr32[1]) ==
1869                      (b->addr32[1] & m->addr32[1])) &&
1870                     ((a->addr32[2] & m->addr32[2]) ==
1871                      (b->addr32[2] & m->addr32[2])) &&
1872                     ((a->addr32[3] & m->addr32[3]) ==
1873                      (b->addr32[3] & m->addr32[3])))
1874                         match++;
1875                 break;
1876 #endif /* INET6 */
1877         }
1878         if (match) {
1879                 if (n)
1880                         return (0);
1881                 else
1882                         return (1);
1883         } else {
1884                 if (n)
1885                         return (1);
1886                 else
1887                         return (0);
1888         }
1889 }
1890
1891 int
1892 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
1893 {
1894         switch (op) {
1895         case PF_OP_IRG:
1896                 return ((p > a1) && (p < a2));
1897         case PF_OP_XRG:
1898                 return ((p < a1) || (p > a2));
1899         case PF_OP_RRG:
1900                 return ((p >= a1) && (p <= a2));
1901         case PF_OP_EQ:
1902                 return (p == a1);
1903         case PF_OP_NE:
1904                 return (p != a1);
1905         case PF_OP_LT:
1906                 return (p < a1);
1907         case PF_OP_LE:
1908                 return (p <= a1);
1909         case PF_OP_GT:
1910                 return (p > a1);
1911         case PF_OP_GE:
1912                 return (p >= a1);
1913         }
1914         return (0); /* never reached */
1915 }
1916
1917 int
1918 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
1919 {
1920         a1 = ntohs(a1);
1921         a2 = ntohs(a2);
1922         p = ntohs(p);
1923         return (pf_match(op, a1, a2, p));
1924 }
1925
1926 int
1927 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
1928 {
1929         if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
1930                 return (0);
1931         return (pf_match(op, a1, a2, u));
1932 }
1933
1934 int
1935 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
1936 {
1937         if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
1938                 return (0);
1939         return (pf_match(op, a1, a2, g));
1940 }
1941
1942 int
1943 pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag)
1944 {
1945         if (*tag == -1)
1946                 *tag = m->m_pkthdr.pf.tag;
1947
1948         return ((!r->match_tag_not && r->match_tag == *tag) ||
1949             (r->match_tag_not && r->match_tag != *tag));
1950 }
1951
1952 int
1953 pf_tag_packet(struct mbuf *m, int tag, int rtableid)
1954 {
1955         if (tag <= 0 && rtableid < 0)
1956                 return (0);
1957
1958         if (tag > 0)
1959                 m->m_pkthdr.pf.tag = tag;
1960         if (rtableid >= 0)
1961                 m->m_pkthdr.pf.rtableid = rtableid;
1962
1963         return (0);
1964 }
1965
1966 void
1967 pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n,
1968     struct pf_rule **r, struct pf_rule **a,  int *match)
1969 {
1970         struct pf_anchor_stackframe     *f;
1971
1972         (*r)->anchor->match = 0;
1973         if (match)
1974                 *match = 0;
1975         if (*depth >= sizeof(pf_anchor_stack) /
1976             sizeof(pf_anchor_stack[0])) {
1977                 kprintf("pf_step_into_anchor: stack overflow\n");
1978                 *r = TAILQ_NEXT(*r, entries);
1979                 return;
1980         } else if (*depth == 0 && a != NULL)
1981                 *a = *r;
1982         f = pf_anchor_stack + (*depth)++;
1983         f->rs = *rs;
1984         f->r = *r;
1985         if ((*r)->anchor_wildcard) {
1986                 f->parent = &(*r)->anchor->children;
1987                 if ((f->child = RB_MIN(pf_anchor_node, f->parent)) ==
1988                     NULL) {
1989                         *r = NULL;
1990                         return;
1991                 }
1992                 *rs = &f->child->ruleset;
1993         } else {
1994                 f->parent = NULL;
1995                 f->child = NULL;
1996                 *rs = &(*r)->anchor->ruleset;
1997         }
1998         *r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
1999 }
2000
2001 int
2002 pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n,
2003     struct pf_rule **r, struct pf_rule **a, int *match)
2004 {
2005         struct pf_anchor_stackframe     *f;
2006         int quick = 0;
2007
2008         do {
2009                 if (*depth <= 0)
2010                         break;
2011                 f = pf_anchor_stack + *depth - 1;
2012                 if (f->parent != NULL && f->child != NULL) {
2013                         if (f->child->match ||
2014                             (match != NULL && *match)) {
2015                                 f->r->anchor->match = 1;
2016                                 *match = 0;
2017                         }
2018                         f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
2019                         if (f->child != NULL) {
2020                                 *rs = &f->child->ruleset;
2021                                 *r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
2022                                 if (*r == NULL)
2023                                         continue;
2024                                 else
2025                                         break;
2026                         }
2027                 }
2028                 (*depth)--;
2029                 if (*depth == 0 && a != NULL)
2030                         *a = NULL;
2031                 *rs = f->rs;
2032                 if (f->r->anchor->match || (match  != NULL && *match))
2033                         quick = f->r->quick;
2034                 *r = TAILQ_NEXT(f->r, entries);
2035         } while (*r == NULL);
2036
2037         return (quick);
2038 }
2039
2040 #ifdef INET6
2041 void
2042 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
2043     struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
2044 {
2045         switch (af) {
2046 #ifdef INET
2047         case AF_INET:
2048                 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
2049                 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
2050                 break;
2051 #endif /* INET */
2052         case AF_INET6:
2053                 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
2054                 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
2055                 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
2056                 ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
2057                 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
2058                 ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
2059                 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
2060                 ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
2061                 break;
2062         }
2063 }
2064
2065 void
2066 pf_addr_inc(struct pf_addr *addr, sa_family_t af)
2067 {
2068         switch (af) {
2069 #ifdef INET
2070         case AF_INET:
2071                 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
2072                 break;
2073 #endif /* INET */
2074         case AF_INET6:
2075                 if (addr->addr32[3] == 0xffffffff) {
2076                         addr->addr32[3] = 0;
2077                         if (addr->addr32[2] == 0xffffffff) {
2078                                 addr->addr32[2] = 0;
2079                                 if (addr->addr32[1] == 0xffffffff) {
2080                                         addr->addr32[1] = 0;
2081                                         addr->addr32[0] =
2082                                             htonl(ntohl(addr->addr32[0]) + 1);
2083                                 } else
2084                                         addr->addr32[1] =
2085                                             htonl(ntohl(addr->addr32[1]) + 1);
2086                         } else
2087                                 addr->addr32[2] =
2088                                     htonl(ntohl(addr->addr32[2]) + 1);
2089                 } else
2090                         addr->addr32[3] =
2091                             htonl(ntohl(addr->addr32[3]) + 1);
2092                 break;
2093         }
2094 }
2095 #endif /* INET6 */
2096
2097 #define mix(a,b,c) \
2098         do {                                    \
2099                 a -= b; a -= c; a ^= (c >> 13); \
2100                 b -= c; b -= a; b ^= (a << 8);  \
2101                 c -= a; c -= b; c ^= (b >> 13); \
2102                 a -= b; a -= c; a ^= (c >> 12); \
2103                 b -= c; b -= a; b ^= (a << 16); \
2104                 c -= a; c -= b; c ^= (b >> 5);  \
2105                 a -= b; a -= c; a ^= (c >> 3);  \
2106                 b -= c; b -= a; b ^= (a << 10); \
2107                 c -= a; c -= b; c ^= (b >> 15); \
2108         } while (0)
2109
2110 /*
2111  * hash function based on bridge_hash in if_bridge.c
2112  */
2113 void
2114 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
2115     struct pf_poolhashkey *key, sa_family_t af)
2116 {
2117         u_int32_t       a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
2118
2119         switch (af) {
2120 #ifdef INET
2121         case AF_INET:
2122                 a += inaddr->addr32[0];
2123                 b += key->key32[1];
2124                 mix(a, b, c);
2125                 hash->addr32[0] = c + key->key32[2];
2126                 break;
2127 #endif /* INET */
2128 #ifdef INET6
2129         case AF_INET6:
2130                 a += inaddr->addr32[0];
2131                 b += inaddr->addr32[2];
2132                 mix(a, b, c);
2133                 hash->addr32[0] = c;
2134                 a += inaddr->addr32[1];
2135                 b += inaddr->addr32[3];
2136                 c += key->key32[1];
2137                 mix(a, b, c);
2138                 hash->addr32[1] = c;
2139                 a += inaddr->addr32[2];
2140                 b += inaddr->addr32[1];
2141                 c += key->key32[2];
2142                 mix(a, b, c);
2143                 hash->addr32[2] = c;
2144                 a += inaddr->addr32[3];
2145                 b += inaddr->addr32[0];
2146                 c += key->key32[3];
2147                 mix(a, b, c);
2148                 hash->addr32[3] = c;
2149                 break;
2150 #endif /* INET6 */
2151         }
2152 }
2153
2154 int
2155 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
2156     struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn)
2157 {
2158         unsigned char            hash[16];
2159         struct pf_pool          *rpool = &r->rpool;
2160         struct pf_addr          *raddr = &rpool->cur->addr.v.a.addr;
2161         struct pf_addr          *rmask = &rpool->cur->addr.v.a.mask;
2162         struct pf_pooladdr      *acur = rpool->cur;
2163         struct pf_src_node       k;
2164
2165         if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
2166             (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
2167                 k.af = af;
2168                 PF_ACPY(&k.addr, saddr, af);
2169                 if (r->rule_flag & PFRULE_RULESRCTRACK ||
2170                     r->rpool.opts & PF_POOL_STICKYADDR)
2171                         k.rule.ptr = r;
2172                 else
2173                         k.rule.ptr = NULL;
2174                 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
2175                 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
2176                 if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
2177                         PF_ACPY(naddr, &(*sn)->raddr, af);
2178                         if (pf_status.debug >= PF_DEBUG_MISC) {
2179                                 kprintf("pf_map_addr: src tracking maps ");
2180                                 pf_print_host(&k.addr, 0, af);
2181                                 kprintf(" to ");
2182                                 pf_print_host(naddr, 0, af);
2183                                 kprintf("\n");
2184                         }
2185                         return (0);
2186                 }
2187         }
2188
2189         if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
2190                 return (1);
2191         if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2192                 switch (af) {
2193 #ifdef INET
2194                 case AF_INET:
2195                         if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
2196                             (rpool->opts & PF_POOL_TYPEMASK) !=
2197                             PF_POOL_ROUNDROBIN)
2198                                 return (1);
2199                          raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
2200                          rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
2201                         break;
2202 #endif /* INET */
2203 #ifdef INET6
2204                 case AF_INET6:
2205                         if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
2206                             (rpool->opts & PF_POOL_TYPEMASK) !=
2207                             PF_POOL_ROUNDROBIN)
2208                                 return (1);
2209                         raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
2210                         rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
2211                         break;
2212 #endif /* INET6 */
2213                 }
2214         } else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2215                 if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
2216                         return (1); /* unsupported */
2217         } else {
2218                 raddr = &rpool->cur->addr.v.a.addr;
2219                 rmask = &rpool->cur->addr.v.a.mask;
2220         }
2221
2222         switch (rpool->opts & PF_POOL_TYPEMASK) {
2223         case PF_POOL_NONE:
2224                 PF_ACPY(naddr, raddr, af);
2225                 break;
2226         case PF_POOL_BITMASK:
2227                 PF_POOLMASK(naddr, raddr, rmask, saddr, af);
2228                 break;
2229         case PF_POOL_RANDOM:
2230                 if (init_addr != NULL && PF_AZERO(init_addr, af)) {
2231                         switch (af) {
2232 #ifdef INET
2233                         case AF_INET:
2234                                 rpool->counter.addr32[0] = htonl(karc4random());
2235                                 break;
2236 #endif /* INET */
2237 #ifdef INET6
2238                         case AF_INET6:
2239                                 if (rmask->addr32[3] != 0xffffffff)
2240                                         rpool->counter.addr32[3] =
2241                                             htonl(karc4random());
2242                                 else
2243                                         break;
2244                                 if (rmask->addr32[2] != 0xffffffff)
2245                                         rpool->counter.addr32[2] =
2246                                             htonl(karc4random());
2247                                 else
2248                                         break;
2249                                 if (rmask->addr32[1] != 0xffffffff)
2250                                         rpool->counter.addr32[1] =
2251                                             htonl(karc4random());
2252                                 else
2253                                         break;
2254                                 if (rmask->addr32[0] != 0xffffffff)
2255                                         rpool->counter.addr32[0] =
2256                                             htonl(karc4random());
2257                                 break;
2258 #endif /* INET6 */
2259                         }
2260                         PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
2261                         PF_ACPY(init_addr, naddr, af);
2262
2263                 } else {
2264                         PF_AINC(&rpool->counter, af);
2265                         PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
2266                 }
2267                 break;
2268         case PF_POOL_SRCHASH:
2269                 pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
2270                 PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
2271                 break;
2272         case PF_POOL_ROUNDROBIN:
2273                 if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2274                         if (!pfr_pool_get(rpool->cur->addr.p.tbl,
2275                             &rpool->tblidx, &rpool->counter,
2276                             &raddr, &rmask, af))
2277                                 goto get_addr;
2278                 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2279                         if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
2280                             &rpool->tblidx, &rpool->counter,
2281                             &raddr, &rmask, af))
2282                                 goto get_addr;
2283                 } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
2284                         goto get_addr;
2285
2286         try_next:
2287                 if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL)
2288                         rpool->cur = TAILQ_FIRST(&rpool->list);
2289                 if (rpool->cur->addr.type == PF_ADDR_TABLE) {
2290                         rpool->tblidx = -1;
2291                         if (pfr_pool_get(rpool->cur->addr.p.tbl,
2292                             &rpool->tblidx, &rpool->counter,
2293                             &raddr, &rmask, af)) {
2294                                 /* table contains no address of type 'af' */
2295                                 if (rpool->cur != acur)
2296                                         goto try_next;
2297                                 return (1);
2298                         }
2299                 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
2300                         rpool->tblidx = -1;
2301                         if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
2302                             &rpool->tblidx, &rpool->counter,
2303                             &raddr, &rmask, af)) {
2304                                 /* table contains no address of type 'af' */
2305                                 if (rpool->cur != acur)
2306                                         goto try_next;
2307                                 return (1);
2308                         }
2309                 } else {
2310                         raddr = &rpool->cur->addr.v.a.addr;
2311                         rmask = &rpool->cur->addr.v.a.mask;
2312                         PF_ACPY(&rpool->counter, raddr, af);
2313                 }
2314
2315         get_addr:
2316                 PF_ACPY(naddr, &rpool->counter, af);
2317                 if (init_addr != NULL && PF_AZERO(init_addr, af))
2318                         PF_ACPY(init_addr, naddr, af);
2319                 PF_AINC(&rpool->counter, af);
2320                 break;
2321         }
2322         if (*sn != NULL)
2323                 PF_ACPY(&(*sn)->raddr, naddr, af);
2324
2325         if (pf_status.debug >= PF_DEBUG_MISC &&
2326             (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
2327                 kprintf("pf_map_addr: selected address ");
2328                 pf_print_host(naddr, 0, af);
2329                 kprintf("\n");
2330         }
2331
2332         return (0);
2333 }
2334
2335 int
2336 pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
2337     struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport,
2338     struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
2339     struct pf_src_node **sn)
2340 {
2341         struct pf_state_key_cmp key;
2342         struct pf_addr          init_addr;
2343         u_int16_t               cut;
2344
2345         bzero(&init_addr, sizeof(init_addr));
2346         if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
2347                 return (1);
2348
2349         if (proto == IPPROTO_ICMP) {
2350                 low = 1;
2351                 high = 65535;
2352         }
2353
2354         do {
2355                 key.af = af;
2356                 key.proto = proto;
2357                 PF_ACPY(&key.ext.addr, daddr, key.af);
2358                 PF_ACPY(&key.gwy.addr, naddr, key.af);
2359                 key.ext.port = dport;
2360
2361                 /*
2362                  * port search; start random, step;
2363                  * similar 2 portloop in in_pcbbind
2364                  */
2365                 if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
2366                     proto == IPPROTO_ICMP)) {
2367                         key.gwy.port = dport;
2368                         if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
2369                                 return (0);
2370                 } else if (low == 0 && high == 0) {
2371                         key.gwy.port = *nport;
2372                         if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
2373                                 return (0);
2374                 } else if (low == high) {
2375                         key.gwy.port = htons(low);
2376                         if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) {
2377                                 *nport = htons(low);
2378                                 return (0);
2379                         }
2380                 } else {
2381                         u_int16_t tmp;
2382
2383                         if (low > high) {
2384                                 tmp = low;
2385                                 low = high;
2386                                 high = tmp;
2387                         }
2388                         /* low < high */
2389                         cut = htonl(karc4random()) % (1 + high - low) + low;
2390                         /* low <= cut <= high */
2391                         for (tmp = cut; tmp <= high; ++(tmp)) {
2392                                 key.gwy.port = htons(tmp);
2393                                 if (pf_find_state_all(&key, PF_EXT_GWY, NULL) ==
2394                                     NULL) {
2395                                         *nport = htons(tmp);
2396                                         return (0);
2397                                 }
2398                         }
2399                         for (tmp = cut - 1; tmp >= low; --(tmp)) {
2400                                 key.gwy.port = htons(tmp);
2401                                 if (pf_find_state_all(&key, PF_EXT_GWY, NULL) ==
2402                                     NULL) {
2403                                         *nport = htons(tmp);
2404                                         return (0);
2405                                 }
2406                         }
2407                 }
2408
2409                 switch (r->rpool.opts & PF_POOL_TYPEMASK) {
2410                 case PF_POOL_RANDOM:
2411                 case PF_POOL_ROUNDROBIN:
2412                         if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
2413                                 return (1);
2414                         break;
2415                 case PF_POOL_NONE:
2416                 case PF_POOL_SRCHASH:
2417                 case PF_POOL_BITMASK:
2418                 default:
2419                         return (1);
2420                 }
2421         } while (! PF_AEQ(&init_addr, naddr, af) );
2422
2423         return (1);                                     /* none available */
2424 }
2425
2426 struct pf_rule *
2427 pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
2428     int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport,
2429     struct pf_addr *daddr, u_int16_t dport, int rs_num)
2430 {
2431         struct pf_rule          *r, *rm = NULL;
2432         struct pf_ruleset       *ruleset = NULL;
2433         int                      tag = -1;
2434         int                      rtableid = -1;
2435         int                      asd = 0;
2436
2437         r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
2438         while (r && rm == NULL) {
2439                 struct pf_rule_addr     *src = NULL, *dst = NULL;
2440                 struct pf_addr_wrap     *xdst = NULL;
2441
2442                 if (r->action == PF_BINAT && direction == PF_IN) {
2443                         src = &r->dst;
2444                         if (r->rpool.cur != NULL)
2445                                 xdst = &r->rpool.cur->addr;
2446                 } else {
2447                         src = &r->src;
2448                         dst = &r->dst;
2449                 }
2450
2451                 r->evaluations++;
2452                 if (pfi_kif_match(r->kif, kif) == r->ifnot)
2453                         r = r->skip[PF_SKIP_IFP].ptr;
2454                 else if (r->direction && r->direction != direction)
2455                         r = r->skip[PF_SKIP_DIR].ptr;
2456                 else if (r->af && r->af != pd->af)
2457                         r = r->skip[PF_SKIP_AF].ptr;
2458                 else if (r->proto && r->proto != pd->proto)
2459                         r = r->skip[PF_SKIP_PROTO].ptr;
2460                 else if (PF_MISMATCHAW(&src->addr, saddr, pd->af,
2461                     src->neg, kif))
2462                         r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
2463                             PF_SKIP_DST_ADDR].ptr;
2464                 else if (src->port_op && !pf_match_port(src->port_op,
2465                     src->port[0], src->port[1], sport))
2466                         r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
2467                             PF_SKIP_DST_PORT].ptr;
2468                 else if (dst != NULL &&
2469                     PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL))
2470                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
2471                 else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
2472                     0, NULL))
2473                         r = TAILQ_NEXT(r, entries);
2474                 else if (dst != NULL && dst->port_op &&
2475                     !pf_match_port(dst->port_op, dst->port[0],
2476                     dst->port[1], dport))
2477                         r = r->skip[PF_SKIP_DST_PORT].ptr;
2478                 else if (r->match_tag && !pf_match_tag(m, r, &tag))
2479                         r = TAILQ_NEXT(r, entries);
2480                 else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
2481                     IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
2482                     off, pd->hdr.tcp), r->os_fingerprint)))
2483                         r = TAILQ_NEXT(r, entries);
2484                 else {
2485                         if (r->tag)
2486                                 tag = r->tag;
2487                         if (r->rtableid >= 0)
2488                                 rtableid = r->rtableid;
2489                         if (r->anchor == NULL) {
2490                                 rm = r;
2491                         } else
2492                                 pf_step_into_anchor(&asd, &ruleset, rs_num,
2493                                     &r, NULL, NULL);
2494                 }
2495                 if (r == NULL)
2496                         pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r,
2497                             NULL, NULL);
2498         }
2499         if (pf_tag_packet(m, tag, rtableid))
2500                 return (NULL);
2501         if (rm != NULL && (rm->action == PF_NONAT ||
2502             rm->action == PF_NORDR || rm->action == PF_NOBINAT))
2503                 return (NULL);
2504         return (rm);
2505 }
2506
2507 struct pf_rule *
2508 pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
2509     struct pfi_kif *kif, struct pf_src_node **sn,
2510     struct pf_addr *saddr, u_int16_t sport,
2511     struct pf_addr *daddr, u_int16_t dport,
2512     struct pf_addr *naddr, u_int16_t *nport)
2513 {
2514         struct pf_rule  *r = NULL;
2515
2516         if (direction == PF_OUT) {
2517                 r = pf_match_translation(pd, m, off, direction, kif, saddr,
2518                     sport, daddr, dport, PF_RULESET_BINAT);
2519                 if (r == NULL)
2520                         r = pf_match_translation(pd, m, off, direction, kif,
2521                             saddr, sport, daddr, dport, PF_RULESET_NAT);
2522         } else {
2523                 r = pf_match_translation(pd, m, off, direction, kif, saddr,
2524                     sport, daddr, dport, PF_RULESET_RDR);
2525                 if (r == NULL)
2526                         r = pf_match_translation(pd, m, off, direction, kif,
2527                             saddr, sport, daddr, dport, PF_RULESET_BINAT);
2528         }
2529
2530         if (r != NULL) {
2531                 switch (r->action) {
2532                 case PF_NONAT:
2533                 case PF_NOBINAT:
2534                 case PF_NORDR:
2535                         return (NULL);
2536                 case PF_NAT:
2537                         if (pf_get_sport(pd->af, pd->proto, r, saddr,
2538                             daddr, dport, naddr, nport, r->rpool.proxy_port[0],
2539                             r->rpool.proxy_port[1], sn)) {
2540                                 DPFPRINTF(PF_DEBUG_MISC,
2541                                     ("pf: NAT proxy port allocation "
2542                                     "(%u-%u) failed\n",
2543                                     r->rpool.proxy_port[0],
2544                                     r->rpool.proxy_port[1]));
2545                                 return (NULL);
2546                         }
2547                         break;
2548                 case PF_BINAT:
2549                         switch (direction) {
2550                         case PF_OUT:
2551                                 if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
2552                                         switch (pd->af) {
2553 #ifdef INET
2554                                         case AF_INET:
2555                                                 if (r->rpool.cur->addr.p.dyn->
2556                                                     pfid_acnt4 < 1)
2557                                                         return (NULL);
2558                                                 PF_POOLMASK(naddr,
2559                                                     &r->rpool.cur->addr.p.dyn->
2560                                                     pfid_addr4,
2561                                                     &r->rpool.cur->addr.p.dyn->
2562                                                     pfid_mask4,
2563                                                     saddr, AF_INET);
2564                                                 break;
2565 #endif /* INET */
2566 #ifdef INET6
2567                                         case AF_INET6:
2568                                                 if (r->rpool.cur->addr.p.dyn->
2569                                                     pfid_acnt6 < 1)
2570                                                         return (NULL);
2571                                                 PF_POOLMASK(naddr,
2572                                                     &r->rpool.cur->addr.p.dyn->
2573                                                     pfid_addr6,
2574                                                     &r->rpool.cur->addr.p.dyn->
2575                                                     pfid_mask6,
2576                                                     saddr, AF_INET6);
2577                                                 break;
2578 #endif /* INET6 */
2579                                         }
2580                                 } else
2581                                         PF_POOLMASK(naddr,
2582                                             &r->rpool.cur->addr.v.a.addr,
2583                                             &r->rpool.cur->addr.v.a.mask,
2584                                             saddr, pd->af);
2585                                 break;
2586                         case PF_IN:
2587                                 if (r->src.addr.type == PF_ADDR_DYNIFTL) {
2588                                         switch (pd->af) {
2589 #ifdef INET
2590                                         case AF_INET:
2591                                                 if (r->src.addr.p.dyn->
2592                                                     pfid_acnt4 < 1)
2593                                                         return (NULL);
2594                                                 PF_POOLMASK(naddr,
2595                                                     &r->src.addr.p.dyn->
2596                                                     pfid_addr4,
2597                                                     &r->src.addr.p.dyn->
2598                                                     pfid_mask4,
2599                                                     daddr, AF_INET);
2600                                                 break;
2601 #endif /* INET */
2602 #ifdef INET6
2603                                         case AF_INET6:
2604                                                 if (r->src.addr.p.dyn->
2605                                                     pfid_acnt6 < 1)
2606                                                         return (NULL);
2607                                                 PF_POOLMASK(naddr,
2608                                                     &r->src.addr.p.dyn->
2609                                                     pfid_addr6,
2610                                                     &r->src.addr.p.dyn->
2611                                                     pfid_mask6,
2612                                                     daddr, AF_INET6);
2613                                                 break;
2614 #endif /* INET6 */
2615                                         }
2616                                 } else
2617                                         PF_POOLMASK(naddr,
2618                                             &r->src.addr.v.a.addr,
2619                                             &r->src.addr.v.a.mask, daddr,
2620                                             pd->af);
2621                                 break;
2622                         }
2623                         break;
2624                 case PF_RDR: {
2625                         if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn))
2626                                 return (NULL);
2627                         if ((r->rpool.opts & PF_POOL_TYPEMASK) ==
2628                             PF_POOL_BITMASK)
2629                                 PF_POOLMASK(naddr, naddr,
2630                                     &r->rpool.cur->addr.v.a.mask, daddr,
2631                                     pd->af);
2632
2633                         if (r->rpool.proxy_port[1]) {
2634                                 u_int32_t       tmp_nport;
2635
2636                                 tmp_nport = ((ntohs(dport) -
2637                                     ntohs(r->dst.port[0])) %
2638                                     (r->rpool.proxy_port[1] -
2639                                     r->rpool.proxy_port[0] + 1)) +
2640                                     r->rpool.proxy_port[0];
2641
2642                                 /* wrap around if necessary */
2643                                 if (tmp_nport > 65535)
2644                                         tmp_nport -= 65535;
2645                                 *nport = htons((u_int16_t)tmp_nport);
2646                         } else if (r->rpool.proxy_port[0])
2647                                 *nport = htons(r->rpool.proxy_port[0]);
2648                         break;
2649                 }
2650                 default:
2651                         return (NULL);
2652                 }
2653         }
2654
2655         return (r);
2656 }
2657
2658 #ifdef SMP
2659 struct netmsg_hashlookup {
2660         struct netmsg_base      base;
2661         struct inpcb            **nm_pinp;
2662         struct inpcbinfo        *nm_pcbinfo;
2663         struct pf_addr          *nm_saddr;
2664         struct pf_addr          *nm_daddr;
2665         uint16_t                nm_sport;
2666         uint16_t                nm_dport;
2667         sa_family_t             nm_af;
2668 };
2669
2670 static void
2671 in_pcblookup_hash_handler(netmsg_t msg)
2672 {
2673         struct netmsg_hashlookup *rmsg = (struct netmsg_hashlookup *)msg;
2674
2675         if (rmsg->nm_af == AF_INET)
2676                 *rmsg->nm_pinp = in_pcblookup_hash(rmsg->nm_pcbinfo,
2677                     rmsg->nm_saddr->v4, rmsg->nm_sport, rmsg->nm_daddr->v4,
2678                     rmsg->nm_dport, INPLOOKUP_WILDCARD, NULL);
2679 #ifdef INET6
2680         else
2681                 *rmsg->nm_pinp = in6_pcblookup_hash(rmsg->nm_pcbinfo,
2682                     &rmsg->nm_saddr->v6, rmsg->nm_sport, &rmsg->nm_daddr->v6,
2683                     rmsg->nm_dport, INPLOOKUP_WILDCARD, NULL);
2684 #endif /* INET6 */
2685         lwkt_replymsg(&rmsg->base.lmsg, 0);
2686 }
2687 #endif /* SMP */
2688
2689 int
2690 pf_socket_lookup(int direction, struct pf_pdesc *pd)
2691 {
2692         struct pf_addr          *saddr, *daddr;
2693         u_int16_t                sport, dport;
2694         struct inpcbinfo        *pi;
2695         struct inpcb            *inp;
2696 #ifdef SMP
2697         struct netmsg_hashlookup *msg = NULL;
2698 #endif
2699         int                      pi_cpu = 0;
2700
2701         if (pd == NULL)
2702                 return (-1);
2703         pd->lookup.uid = UID_MAX;
2704         pd->lookup.gid = GID_MAX;
2705         pd->lookup.pid = NO_PID;
2706         if (direction == PF_IN) {
2707                 saddr = pd->src;
2708                 daddr = pd->dst;
2709         } else {
2710                 saddr = pd->dst;
2711                 daddr = pd->src;
2712         }
2713         switch (pd->proto) {
2714         case IPPROTO_TCP:
2715                 if (pd->hdr.tcp == NULL)
2716                         return (-1);
2717                 sport = pd->hdr.tcp->th_sport;
2718                 dport = pd->hdr.tcp->th_dport;
2719
2720                 pi_cpu = tcp_addrcpu(saddr->v4.s_addr, sport, daddr->v4.s_addr, dport);
2721                 pi = &tcbinfo[pi_cpu];
2722 #ifdef SMP
2723                 /*
2724                  * Our netstack runs lockless on MP systems
2725                  * (only for TCP connections at the moment).
2726                  * 
2727                  * As we are not allowed to read another CPU's tcbinfo,
2728                  * we have to ask that CPU via remote call to search the
2729                  * table for us.
2730                  * 
2731                  * Prepare a msg iff data belongs to another CPU.
2732                  */
2733                 if (pi_cpu != mycpu->gd_cpuid) {
2734                         msg = kmalloc(sizeof(*msg), M_LWKTMSG, M_INTWAIT);
2735                         netmsg_init(&msg->base, NULL, &netisr_afree_rport,
2736                                     0, in_pcblookup_hash_handler);
2737                         msg->nm_pinp = &inp;
2738                         msg->nm_pcbinfo = pi;
2739                         msg->nm_saddr = saddr;
2740                         msg->nm_sport = sport;
2741                         msg->nm_daddr = daddr;
2742                         msg->nm_dport = dport;
2743                         msg->nm_af = pd->af;
2744                 }
2745 #endif /* SMP */
2746                 break;
2747         case IPPROTO_UDP:
2748                 if (pd->hdr.udp == NULL)
2749                         return (-1);
2750                 sport = pd->hdr.udp->uh_sport;
2751                 dport = pd->hdr.udp->uh_dport;
2752                 pi = &udbinfo;
2753                 break;
2754         default:
2755                 return (-1);
2756         }
2757         if (direction != PF_IN) {
2758                 u_int16_t       p;
2759
2760                 p = sport;
2761                 sport = dport;
2762                 dport = p;
2763         }
2764         switch (pd->af) {
2765 #ifdef INET6
2766         case AF_INET6:
2767 #ifdef SMP
2768                 /*
2769                  * Query other CPU, second part
2770                  * 
2771                  * msg only gets initialized when:
2772                  * 1) packet is TCP
2773                  * 2) the info belongs to another CPU
2774                  *
2775                  * Use some switch/case magic to avoid code duplication.
2776                  */
2777                 if (msg == NULL)
2778 #endif /* SMP */
2779                 {
2780                         inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
2781                             &daddr->v6, dport, INPLOOKUP_WILDCARD, NULL);
2782
2783                         if (inp == NULL)
2784                                 return (-1);
2785                         break;
2786                 }
2787                 /* FALLTHROUGH if SMP and on other CPU */
2788 #endif /* INET6 */
2789         case AF_INET:
2790 #ifdef SMP
2791                 if (msg != NULL) {
2792                         lwkt_domsg(cpu_portfn(pi_cpu),
2793                                      &msg->base.lmsg, 0);
2794                 } else
2795 #endif /* SMP */
2796                 {
2797                         inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4,
2798                             dport, INPLOOKUP_WILDCARD, NULL);
2799                 }
2800                 if (inp == NULL)
2801                         return (-1);
2802                 break;
2803
2804         default:
2805                 return (-1);
2806         }
2807         pd->lookup.uid = inp->inp_socket->so_cred->cr_uid;
2808         pd->lookup.gid = inp->inp_socket->so_cred->cr_groups[0];
2809         return (1);
2810 }
2811
2812 u_int8_t
2813 pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2814 {
2815         int              hlen;
2816         u_int8_t         hdr[60];
2817         u_int8_t        *opt, optlen;
2818         u_int8_t         wscale = 0;
2819
2820         hlen = th_off << 2;             /* hlen <= sizeof(hdr) */
2821         if (hlen <= sizeof(struct tcphdr))
2822                 return (0);
2823         if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2824                 return (0);
2825         opt = hdr + sizeof(struct tcphdr);
2826         hlen -= sizeof(struct tcphdr);
2827         while (hlen >= 3) {
2828                 switch (*opt) {
2829                 case TCPOPT_EOL:
2830                 case TCPOPT_NOP:
2831                         ++opt;
2832                         --hlen;
2833                         break;
2834                 case TCPOPT_WINDOW:
2835                         wscale = opt[2];
2836                         if (wscale > TCP_MAX_WINSHIFT)
2837                                 wscale = TCP_MAX_WINSHIFT;
2838                         wscale |= PF_WSCALE_FLAG;
2839                         /* FALLTHROUGH */
2840                 default:
2841                         optlen = opt[1];
2842                         if (optlen < 2)
2843                                 optlen = 2;
2844                         hlen -= optlen;
2845                         opt += optlen;
2846                         break;
2847                 }
2848         }
2849         return (wscale);
2850 }
2851
2852 u_int16_t
2853 pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
2854 {
2855         int              hlen;
2856         u_int8_t         hdr[60];
2857         u_int8_t        *opt, optlen;
2858         u_int16_t        mss = tcp_mssdflt;
2859
2860         hlen = th_off << 2;     /* hlen <= sizeof(hdr) */
2861         if (hlen <= sizeof(struct tcphdr))
2862                 return (0);
2863         if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
2864                 return (0);
2865         opt = hdr + sizeof(struct tcphdr);
2866         hlen -= sizeof(struct tcphdr);
2867         while (hlen >= TCPOLEN_MAXSEG) {
2868                 switch (*opt) {
2869                 case TCPOPT_EOL:
2870                 case TCPOPT_NOP:
2871                         ++opt;
2872                         --hlen;
2873                         break;
2874                 case TCPOPT_MAXSEG:
2875                         bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
2876                         /* FALLTHROUGH */
2877                 default:
2878                         optlen = opt[1];
2879                         if (optlen < 2)
2880                                 optlen = 2;
2881                         hlen -= optlen;
2882                         opt += optlen;
2883                         break;
2884                 }
2885         }
2886         return (mss);
2887 }
2888
2889 u_int16_t
2890 pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
2891 {
2892 #ifdef INET
2893         struct sockaddr_in      *dst;
2894         struct route             ro;
2895 #endif /* INET */
2896 #ifdef INET6
2897         struct sockaddr_in6     *dst6;
2898         struct route_in6         ro6;
2899 #endif /* INET6 */
2900         struct rtentry          *rt = NULL;
2901         int                      hlen = 0;
2902         u_int16_t                mss = tcp_mssdflt;
2903
2904         switch (af) {
2905 #ifdef INET
2906         case AF_INET:
2907                 hlen = sizeof(struct ip);
2908                 bzero(&ro, sizeof(ro));
2909                 dst = (struct sockaddr_in *)&ro.ro_dst;
2910                 dst->sin_family = AF_INET;
2911                 dst->sin_len = sizeof(*dst);
2912                 dst->sin_addr = addr->v4;
2913                 rtalloc_ign(&ro, (RTF_CLONING | RTF_PRCLONING));
2914                 rt = ro.ro_rt;
2915                 break;
2916 #endif /* INET */
2917 #ifdef INET6
2918         case AF_INET6:
2919                 hlen = sizeof(struct ip6_hdr);
2920                 bzero(&ro6, sizeof(ro6));
2921                 dst6 = (struct sockaddr_in6 *)&ro6.ro_dst;
2922                 dst6->sin6_family = AF_INET6;
2923                 dst6->sin6_len = sizeof(*dst6);
2924                 dst6->sin6_addr = addr->v6;
2925                 rtalloc_ign((struct route *)&ro6, (RTF_CLONING | RTF_PRCLONING));
2926                 rt = ro6.ro_rt;
2927                 break;
2928 #endif /* INET6 */
2929         }
2930
2931         if (rt && rt->rt_ifp) {
2932                 mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
2933                 mss = max(tcp_mssdflt, mss);
2934                 RTFREE(rt);
2935         }
2936         mss = min(mss, offer);
2937         mss = max(mss, 64);             /* sanity - at least max opt space */
2938         return (mss);
2939 }
2940
2941 void
2942 pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr)
2943 {
2944         struct pf_rule *r = s->rule.ptr;
2945
2946         s->rt_kif = NULL;
2947         if (!r->rt || r->rt == PF_FASTROUTE)
2948                 return;
2949         switch (s->state_key->af) {
2950 #ifdef INET
2951         case AF_INET:
2952                 pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL,
2953                     &s->nat_src_node);
2954                 s->rt_kif = r->rpool.cur->kif;
2955                 break;
2956 #endif /* INET */
2957 #ifdef INET6
2958         case AF_INET6:
2959                 pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL,
2960                     &s->nat_src_node);
2961                 s->rt_kif = r->rpool.cur->kif;
2962                 break;
2963 #endif /* INET6 */
2964         }
2965 }
2966
2967 void
2968 pf_attach_state(struct pf_state_key *sk, struct pf_state *s, int tail)
2969 {
2970         s->state_key = sk;
2971         sk->refcnt++;
2972
2973         /* list is sorted, if-bound states before floating */
2974         if (tail)
2975                 TAILQ_INSERT_TAIL(&sk->states, s, next);
2976         else
2977                 TAILQ_INSERT_HEAD(&sk->states, s, next);
2978 }
2979
2980 void
2981 pf_detach_state(struct pf_state *s, int flags)
2982 {
2983         struct pf_state_key     *sk = s->state_key;
2984
2985         if (sk == NULL)
2986                 return;
2987
2988         s->state_key = NULL;
2989         TAILQ_REMOVE(&sk->states, s, next);
2990         if (--sk->refcnt == 0) {
2991                 if (!(flags & PF_DT_SKIP_EXTGWY))
2992                         RB_REMOVE(pf_state_tree_ext_gwy,
2993                             &pf_statetbl_ext_gwy, sk);
2994                 if (!(flags & PF_DT_SKIP_LANEXT))
2995                         RB_REMOVE(pf_state_tree_lan_ext,
2996                             &pf_statetbl_lan_ext, sk);
2997                 pool_put(&pf_state_key_pl, sk);
2998         }
2999 }
3000
3001 struct pf_state_key *
3002 pf_alloc_state_key(struct pf_state *s)
3003 {
3004         struct pf_state_key     *sk;
3005
3006         if ((sk = pool_get(&pf_state_key_pl, PR_NOWAIT)) == NULL)
3007                 return (NULL);
3008         bzero(sk, sizeof(*sk));
3009         TAILQ_INIT(&sk->states);
3010         pf_attach_state(sk, s, 0);
3011
3012         return (sk);
3013 }
3014
3015 int
3016 pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
3017     struct pfi_kif *kif, struct mbuf *m, int off, void *h,
3018     struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
3019     struct ifqueue *ifq, struct inpcb *inp)
3020 {
3021         struct pf_rule          *nr = NULL;
3022         struct pf_addr          *saddr = pd->src, *daddr = pd->dst;
3023         u_int16_t                bport, nport = 0;
3024         sa_family_t              af = pd->af;
3025         struct pf_rule          *r, *a = NULL;
3026         struct pf_ruleset       *ruleset = NULL;
3027         struct pf_src_node      *nsn = NULL;
3028         struct tcphdr           *th = pd->hdr.tcp;
3029         u_short                  reason;
3030         int                      rewrite = 0, hdrlen = 0;
3031         int                      tag = -1, rtableid = -1;
3032         int                      asd = 0;
3033         int                      match = 0;
3034         int                      state_icmp = 0;
3035         u_int16_t                mss = tcp_mssdflt;
3036         u_int16_t                sport, dport;
3037         u_int8_t                 icmptype = 0, icmpcode = 0;
3038
3039         if (direction == PF_IN && pf_check_congestion(ifq)) {
3040                 REASON_SET(&reason, PFRES_CONGEST);
3041                 return (PF_DROP);
3042         }
3043
3044         if (inp != NULL)
3045                 pd->lookup.done = pf_socket_lookup(direction, pd);
3046         else if (debug_pfugidhack) { 
3047                 DPFPRINTF(PF_DEBUG_MISC, ("pf: unlocked lookup\n"));
3048                 pd->lookup.done = pf_socket_lookup(direction, pd);
3049         }
3050
3051         sport = dport = hdrlen = 0;
3052
3053         switch (pd->proto) {
3054         case IPPROTO_TCP:
3055                 sport = th->th_sport;
3056                 dport = th->th_dport;
3057                 hdrlen = sizeof(*th);
3058                 break;
3059         case IPPROTO_UDP:
3060                 sport = pd->hdr.udp->uh_sport;
3061                 dport = pd->hdr.udp->uh_dport;
3062                 hdrlen = sizeof(*pd->hdr.udp);
3063                 break;
3064 #ifdef INET
3065         case IPPROTO_ICMP:
3066                 if (pd->af != AF_INET)
3067                         break;
3068                 sport = dport = pd->hdr.icmp->icmp_id;
3069                 icmptype = pd->hdr.icmp->icmp_type;
3070                 icmpcode = pd->hdr.icmp->icmp_code;
3071
3072                 if (icmptype == ICMP_UNREACH ||
3073                     icmptype == ICMP_SOURCEQUENCH ||
3074                     icmptype == ICMP_REDIRECT ||
3075                     icmptype == ICMP_TIMXCEED ||
3076                     icmptype == ICMP_PARAMPROB)
3077                         state_icmp++;
3078                 break;
3079 #endif /* INET */
3080 #ifdef INET6
3081         case IPPROTO_ICMPV6:
3082                 if (pd->af != AF_INET6)
3083                         break;
3084                 sport = dport = pd->hdr.icmp6->icmp6_id;
3085                 hdrlen = sizeof(*pd->hdr.icmp6);
3086                 icmptype = pd->hdr.icmp6->icmp6_type;
3087                 icmpcode = pd->hdr.icmp6->icmp6_code;
3088
3089                 if (icmptype == ICMP6_DST_UNREACH ||
3090                     icmptype == ICMP6_PACKET_TOO_BIG ||
3091                     icmptype == ICMP6_TIME_EXCEEDED ||
3092                     icmptype == ICMP6_PARAM_PROB)
3093                         state_icmp++;
3094                 break;
3095 #endif /* INET6 */
3096         }
3097
3098         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3099
3100         if (direction == PF_OUT) {
3101                 bport = nport = sport;
3102                 /* check outgoing packet for BINAT/NAT */
3103                 if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
3104                     saddr, sport, daddr, dport, &pd->naddr, &nport)) != NULL) {
3105                         PF_ACPY(&pd->baddr, saddr, af);
3106                         switch (pd->proto) {
3107                         case IPPROTO_TCP:
3108                                 pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
3109                                     &th->th_sum, &pd->naddr, nport, 0, af);
3110                                 sport = th->th_sport;
3111                                 rewrite++;
3112                                 break;
3113                         case IPPROTO_UDP:
3114                                 pf_change_ap(saddr, &pd->hdr.udp->uh_sport,
3115                                     pd->ip_sum, &pd->hdr.udp->uh_sum,
3116                                     &pd->naddr, nport, 1, af);
3117                                 sport = pd->hdr.udp->uh_sport;
3118                                 rewrite++;
3119                                 break;
3120 #ifdef INET
3121                         case IPPROTO_ICMP:
3122                                 pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
3123                                     pd->naddr.v4.s_addr, 0);
3124                                 pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
3125                                     pd->hdr.icmp->icmp_cksum, sport, nport, 0);
3126                                 pd->hdr.icmp->icmp_id = nport;
3127                                 m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
3128                                 break;
3129 #endif /* INET */
3130 #ifdef INET6
3131                         case IPPROTO_ICMPV6:
3132                                 pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
3133                                     &pd->naddr, 0);
3134                                 rewrite++;
3135                                 break;
3136 #endif /* INET */
3137                         default:
3138                                 switch (af) {
3139 #ifdef INET
3140                                 case AF_INET:
3141                                         pf_change_a(&saddr->v4.s_addr,
3142                                             pd->ip_sum, pd->naddr.v4.s_addr, 0);
3143                                         break;
3144 #endif /* INET */
3145 #ifdef INET6
3146                                 case AF_INET6:
3147                                         PF_ACPY(saddr, &pd->naddr, af);
3148                                         break;
3149 #endif /* INET */
3150                                 }
3151                                 break;
3152                         }
3153
3154                         if (nr->natpass)
3155                                 r = NULL;
3156                         pd->nat_rule = nr;
3157                 }
3158         } else {
3159                 bport = nport = dport;
3160                 /* check incoming packet for BINAT/RDR */
3161                 if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
3162                     saddr, sport, daddr, dport, &pd->naddr, &nport)) != NULL) {
3163                         PF_ACPY(&pd->baddr, daddr, af);
3164                         switch (pd->proto) {
3165                         case IPPROTO_TCP:
3166                                 pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
3167                                     &th->th_sum, &pd->naddr, nport, 0, af);
3168                                 dport = th->th_dport;
3169                                 rewrite++;
3170                                 break;
3171                         case IPPROTO_UDP:
3172                                 pf_change_ap(daddr, &pd->hdr.udp->uh_dport,
3173                                     pd->ip_sum, &pd->hdr.udp->uh_sum,
3174                                     &pd->naddr, nport, 1, af);
3175                                 dport = pd->hdr.udp->uh_dport;
3176                                 rewrite++;
3177                                 break;
3178 #ifdef INET
3179                         case IPPROTO_ICMP:
3180                                 pf_change_a(&daddr->v4.s_addr, pd->ip_sum,
3181                                     pd->naddr.v4.s_addr, 0);
3182                                 break;
3183 #endif /* INET */
3184 #ifdef INET6
3185                         case IPPROTO_ICMPV6:
3186                                 pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
3187                                     &pd->naddr, 0);
3188                                 rewrite++;
3189                                 break;
3190 #endif /* INET6 */
3191                         default:
3192                                 switch (af) {
3193 #ifdef INET
3194                                 case AF_INET:
3195                                         pf_change_a(&daddr->v4.s_addr,
3196                                             pd->ip_sum, pd->naddr.v4.s_addr, 0);
3197                                         break;
3198 #endif /* INET */
3199 #ifdef INET6
3200                                 case AF_INET6:
3201                                         PF_ACPY(daddr, &pd->naddr, af);
3202                                         break;
3203 #endif /* INET */
3204                                 }
3205                                 break;
3206                         }
3207
3208                         if (nr->natpass)
3209                                 r = NULL;
3210                         pd->nat_rule = nr;
3211                 }
3212         }
3213
3214         while (r != NULL) {
3215                 r->evaluations++;
3216                 if (pfi_kif_match(r->kif, kif) == r->ifnot)
3217                         r = r->skip[PF_SKIP_IFP].ptr;
3218                 else if (r->direction && r->direction != direction)
3219                         r = r->skip[PF_SKIP_DIR].ptr;
3220                 else if (r->af && r->af != af)
3221                         r = r->skip[PF_SKIP_AF].ptr;
3222                 else if (r->proto && r->proto != pd->proto)
3223                         r = r->skip[PF_SKIP_PROTO].ptr;
3224                 else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
3225                     r->src.neg, kif))
3226                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3227                 /* tcp/udp only. port_op always 0 in other cases */
3228                 else if (r->src.port_op && !pf_match_port(r->src.port_op,
3229                     r->src.port[0], r->src.port[1], sport))
3230                         r = r->skip[PF_SKIP_SRC_PORT].ptr;
3231                 else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
3232                     r->dst.neg, NULL))
3233                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
3234                 /* tcp/udp only. port_op always 0 in other cases */
3235                 else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
3236                     r->dst.port[0], r->dst.port[1], dport))
3237                         r = r->skip[PF_SKIP_DST_PORT].ptr;
3238                 /* icmp only. type always 0 in other cases */
3239                 else if (r->type && r->type != icmptype + 1)
3240                         r = TAILQ_NEXT(r, entries);
3241                 /* icmp only. type always 0 in other cases */
3242                 else if (r->code && r->code != icmpcode + 1)
3243                         r = TAILQ_NEXT(r, entries);
3244                 else if (r->tos && !(r->tos == pd->tos))
3245                         r = TAILQ_NEXT(r, entries);
3246                 else if (r->rule_flag & PFRULE_FRAGMENT)
3247                         r = TAILQ_NEXT(r, entries);
3248                 else if (pd->proto == IPPROTO_TCP &&
3249                     (r->flagset & th->th_flags) != r->flags)
3250                         r = TAILQ_NEXT(r, entries);
3251                 /* tcp/udp only. uid.op always 0 in other cases */
3252                 else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
3253                     pf_socket_lookup(direction, pd), 1)) &&
3254                     !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
3255                     pd->lookup.uid))
3256                         r = TAILQ_NEXT(r, entries);
3257                 /* tcp/udp only. gid.op always 0 in other cases */
3258                 else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
3259                     pf_socket_lookup(direction, pd), 1)) &&
3260                     !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
3261                     pd->lookup.gid))
3262                         r = TAILQ_NEXT(r, entries);
3263                 else if (r->prob && r->prob <= karc4random())
3264                         r = TAILQ_NEXT(r, entries);
3265                 else if (r->match_tag && !pf_match_tag(m, r, &tag))
3266                         r = TAILQ_NEXT(r, entries);
3267                 else if (r->os_fingerprint != PF_OSFP_ANY &&
3268                     (pd->proto != IPPROTO_TCP || !pf_osfp_match(
3269                     pf_osfp_fingerprint(pd, m, off, th),
3270                     r->os_fingerprint)))
3271                         r = TAILQ_NEXT(r, entries);
3272                 else {
3273                         if (r->tag)
3274                                 tag = r->tag;
3275                         if (r->rtableid >= 0)
3276                                 rtableid = r->rtableid;
3277                         if (r->anchor == NULL) {
3278                                 match = 1;
3279                                 *rm = r;
3280                                 *am = a;
3281                                 *rsm = ruleset;
3282                                 if ((*rm)->quick)
3283                                         break;
3284                                 r = TAILQ_NEXT(r, entries);
3285                         } else
3286                                 pf_step_into_anchor(&asd, &ruleset,
3287                                     PF_RULESET_FILTER, &r, &a, &match);
3288                 }
3289                 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
3290                     PF_RULESET_FILTER, &r, &a, &match))
3291                         break;
3292         }
3293         r = *rm;
3294         a = *am;
3295         ruleset = *rsm;
3296
3297         REASON_SET(&reason, PFRES_MATCH);
3298
3299         if (r->log || (nr != NULL && nr->log)) {
3300                 if (rewrite)
3301                         m_copyback(m, off, hdrlen, pd->hdr.any);
3302                 PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
3303                     a, ruleset, pd);
3304         }
3305
3306         if ((r->action == PF_DROP) &&
3307             ((r->rule_flag & PFRULE_RETURNRST) ||
3308             (r->rule_flag & PFRULE_RETURNICMP) ||
3309             (r->rule_flag & PFRULE_RETURN))) {
3310                 /* undo NAT changes, if they have taken place */
3311                 if (nr != NULL) {
3312                         if (direction == PF_OUT) {
3313                                 switch (pd->proto) {
3314                                 case IPPROTO_TCP:
3315                                         pf_change_ap(saddr, &th->th_sport,
3316                                             pd->ip_sum, &th->th_sum,
3317                                             &pd->baddr, bport, 0, af);
3318                                         sport = th->th_sport;
3319                                         rewrite++;
3320                                         break;
3321                                 case IPPROTO_UDP:
3322                                         pf_change_ap(saddr,
3323                                             &pd->hdr.udp->uh_sport, pd->ip_sum,
3324                                             &pd->hdr.udp->uh_sum, &pd->baddr,
3325                                             bport, 1, af);
3326                                         sport = pd->hdr.udp->uh_sport;
3327                                         rewrite++;
3328                                         break;
3329                                 case IPPROTO_ICMP:
3330 #ifdef INET6
3331                                 case IPPROTO_ICMPV6:
3332 #endif
3333                                         /* nothing! */
3334                                         break;
3335                                 default:
3336                                         switch (af) {
3337                                         case AF_INET:
3338                                                 pf_change_a(&saddr->v4.s_addr,
3339                                                     pd->ip_sum,
3340                                                     pd->baddr.v4.s_addr, 0);
3341                                                 break;
3342                                         case AF_INET6:
3343                                                 PF_ACPY(saddr, &pd->baddr, af);
3344                                                 break;
3345                                         }
3346                                 }
3347                         } else {
3348                                 switch (pd->proto) {
3349                                 case IPPROTO_TCP:
3350                                         pf_change_ap(daddr, &th->th_dport,
3351                                             pd->ip_sum, &th->th_sum,
3352                                             &pd->baddr, bport, 0, af);
3353                                         dport = th->th_dport;
3354                                         rewrite++;
3355                                         break;
3356                                 case IPPROTO_UDP:
3357                                         pf_change_ap(daddr,
3358                                             &pd->hdr.udp->uh_dport, pd->ip_sum,
3359                                             &pd->hdr.udp->uh_sum, &pd->baddr,
3360                                             bport, 1, af);
3361                                         dport = pd->hdr.udp->uh_dport;
3362                                         rewrite++;
3363                                         break;
3364                                 case IPPROTO_ICMP:
3365 #ifdef INET6
3366                                 case IPPROTO_ICMPV6:
3367 #endif
3368                                         /* nothing! */
3369                                         break;
3370                                 default:
3371                                         switch (af) {
3372                                         case AF_INET:
3373                                                 pf_change_a(&daddr->v4.s_addr,
3374                                                     pd->ip_sum,
3375                                                     pd->baddr.v4.s_addr, 0);
3376                                                 break;
3377                                         case AF_INET6:
3378                                                 PF_ACPY(daddr, &pd->baddr, af);
3379                                                 break;
3380                                         }
3381                                 }
3382                         }
3383                 }
3384                 if (pd->proto == IPPROTO_TCP &&
3385                     ((r->rule_flag & PFRULE_RETURNRST) ||
3386                     (r->rule_flag & PFRULE_RETURN)) &&
3387                     !(th->th_flags & TH_RST)) {
3388                         u_int32_t        ack = ntohl(th->th_seq) + pd->p_len;
3389                         struct ip       *h = mtod(m, struct ip *);
3390
3391                         if (pf_check_proto_cksum(m, off,
3392                             h->ip_len - off, IPPROTO_TCP, AF_INET))
3393                                 REASON_SET(&reason, PFRES_PROTCKSUM);
3394                         else {
3395                                 if (th->th_flags & TH_SYN)
3396                                         ack++;
3397                                 if (th->th_flags & TH_FIN)
3398                                         ack++;
3399                                 pf_send_tcp(r, af, pd->dst,
3400                                     pd->src, th->th_dport, th->th_sport,
3401                                     ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
3402                                     r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp);
3403                         }
3404                 } else if ((af == AF_INET) && r->return_icmp)
3405                         pf_send_icmp(m, r->return_icmp >> 8,
3406                             r->return_icmp & 255, af, r);
3407                 else if ((af == AF_INET6) && r->return_icmp6)
3408                         pf_send_icmp(m, r->return_icmp6 >> 8,
3409                             r->return_icmp6 & 255, af, r);
3410         }
3411
3412         if (r->action == PF_DROP)
3413                 return (PF_DROP);
3414
3415         if (pf_tag_packet(m, tag, rtableid)) {
3416                 REASON_SET(&reason, PFRES_MEMORY);
3417                 return (PF_DROP);
3418         }
3419
3420         if (!state_icmp && (r->keep_state || nr != NULL ||
3421             (pd->flags & PFDESC_TCP_NORM))) {
3422                 /* create new state */
3423                 u_int16_t        len = 0;
3424                 struct pf_state *s = NULL;
3425                 struct pf_state_key *sk = NULL;
3426                 struct pf_src_node *sn = NULL;
3427
3428                 /* check maximums */
3429                 if (r->max_states && (r->states >= r->max_states)) {
3430                         pf_status.lcounters[LCNT_STATES]++;
3431                         REASON_SET(&reason, PFRES_MAXSTATES);
3432                         goto cleanup;
3433                 }
3434                 /* src node for filter rule */
3435                 if ((r->rule_flag & PFRULE_SRCTRACK ||
3436                     r->rpool.opts & PF_POOL_STICKYADDR) &&
3437                     pf_insert_src_node(&sn, r, saddr, af) != 0) {
3438                         REASON_SET(&reason, PFRES_SRCLIMIT);
3439                         goto cleanup;
3440                 }
3441                 /* src node for translation rule */
3442                 if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
3443                     ((direction == PF_OUT &&
3444                     pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
3445                     (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
3446                         REASON_SET(&reason, PFRES_SRCLIMIT);
3447                         goto cleanup;
3448                 }
3449                 s = pool_get(&pf_state_pl, PR_NOWAIT);
3450                 if (s == NULL) {
3451                         REASON_SET(&reason, PFRES_MEMORY);
3452 cleanup:
3453                         if (sn != NULL && sn->states == 0 && sn->expire == 0) {
3454                                 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
3455                                 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3456                                 pf_status.src_nodes--;
3457                                 pool_put(&pf_src_tree_pl, sn);
3458                         }
3459                         if (nsn != sn && nsn != NULL && nsn->states == 0 &&
3460                             nsn->expire == 0) {
3461                                 RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
3462                                 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
3463                                 pf_status.src_nodes--;
3464                                 pool_put(&pf_src_tree_pl, nsn);
3465                         }
3466                         if (sk != NULL) {
3467                                 pool_put(&pf_state_key_pl, sk);
3468                         }
3469                         return (PF_DROP);
3470                 }
3471                 bzero(s, sizeof(*s));
3472                 s->rule.ptr = r;
3473                 s->nat_rule.ptr = nr;
3474                 s->anchor.ptr = a;
3475                 STATE_INC_COUNTERS(s);
3476                 s->allow_opts = r->allow_opts;
3477                 s->log = r->log & PF_LOG_ALL;
3478                 if (nr != NULL)
3479                         s->log |= nr->log & PF_LOG_ALL;
3480                 switch (pd->proto) {
3481                 case IPPROTO_TCP:
3482                         len = pd->tot_len - off - (th->th_off << 2);
3483                         s->src.seqlo = ntohl(th->th_seq);
3484                         s->src.seqhi = s->src.seqlo + len + 1;
3485                         if ((th->th_flags & (TH_SYN|TH_ACK)) ==
3486                         TH_SYN && r->keep_state == PF_STATE_MODULATE) {
3487                                 /* Generate sequence number modulator */
3488                                 while ((s->src.seqdiff =
3489                                     pf_new_isn(sk) - s->src.seqlo) == 0)
3490                                         ;
3491                                 pf_change_a(&th->th_seq, &th->th_sum,
3492                                     htonl(s->src.seqlo + s->src.seqdiff), 0);
3493                                 rewrite = 1;
3494                         } else
3495                                 s->src.seqdiff = 0;
3496                         if (th->th_flags & TH_SYN) {
3497                                 s->src.seqhi++;
3498                                 s->src.wscale = pf_get_wscale(m, off,
3499                                     th->th_off, af);
3500                         }
3501                         s->src.max_win = MAX(ntohs(th->th_win), 1);
3502                         if (s->src.wscale & PF_WSCALE_MASK) {
3503                                 /* Remove scale factor from initial window */
3504                                 int win = s->src.max_win;
3505                                 win += 1 << (s->src.wscale & PF_WSCALE_MASK);
3506                                 s->src.max_win = (win - 1) >>
3507                                     (s->src.wscale & PF_WSCALE_MASK);
3508                         }
3509                         if (th->th_flags & TH_FIN)
3510                                 s->src.seqhi++;
3511                         s->dst.seqhi = 1;
3512                         s->dst.max_win = 1;
3513                         s->src.state = TCPS_SYN_SENT;
3514                         s->dst.state = TCPS_CLOSED;
3515                         s->timeout = PFTM_TCP_FIRST_PACKET;
3516                         break;
3517                 case IPPROTO_UDP:
3518                         s->src.state = PFUDPS_SINGLE;
3519                         s->dst.state = PFUDPS_NO_TRAFFIC;
3520                         s->timeout = PFTM_UDP_FIRST_PACKET;
3521                         break;
3522                 case IPPROTO_ICMP:
3523 #ifdef INET6
3524                 case IPPROTO_ICMPV6:
3525 #endif
3526                         s->timeout = PFTM_ICMP_FIRST_PACKET;
3527                         break;
3528                 default:
3529                         s->src.state = PFOTHERS_SINGLE;
3530                         s->dst.state = PFOTHERS_NO_TRAFFIC;
3531                         s->timeout = PFTM_OTHER_FIRST_PACKET;
3532                 }
3533
3534                 s->creation = time_second;
3535                 s->expire = time_second;
3536
3537                 if (sn != NULL) {
3538                         s->src_node = sn;
3539                         s->src_node->states++;
3540                 }
3541                 if (nsn != NULL) {
3542                         PF_ACPY(&nsn->raddr, &pd->naddr, af);
3543                         s->nat_src_node = nsn;
3544                         s->nat_src_node->states++;
3545                 }
3546                 if (pd->proto == IPPROTO_TCP) {
3547                         if ((pd->flags & PFDESC_TCP_NORM) &&
3548                             pf_normalize_tcp_init(m, off, pd, th, &s->src,
3549                             &s->dst)) {
3550                                 REASON_SET(&reason, PFRES_MEMORY);
3551                                 pf_src_tree_remove_state(s);
3552                                 STATE_DEC_COUNTERS(s);
3553                                 pool_put(&pf_state_pl, s);
3554                                 return (PF_DROP);
3555                         }
3556                         if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
3557                             pf_normalize_tcp_stateful(m, off, pd, &reason,
3558                             th, s, &s->src, &s->dst, &rewrite)) {
3559                                 /* This really shouldn't happen!!! */
3560                                 DPFPRINTF(PF_DEBUG_URGENT,
3561                                     ("pf_normalize_tcp_stateful failed on "
3562                                     "first pkt"));
3563                                 pf_normalize_tcp_cleanup(s);
3564                                 pf_src_tree_remove_state(s);
3565                                 STATE_DEC_COUNTERS(s);
3566                                 pool_put(&pf_state_pl, s);
3567                                 return (PF_DROP);
3568                         }
3569                 }
3570
3571                 if ((sk = pf_alloc_state_key(s)) == NULL) {
3572                         REASON_SET(&reason, PFRES_MEMORY);
3573                         goto cleanup;
3574                 }
3575
3576                 sk->proto = pd->proto;
3577                 sk->direction = direction;
3578                 sk->af = af;
3579                 if (direction == PF_OUT) {
3580                         PF_ACPY(&sk->gwy.addr, saddr, af);
3581                         PF_ACPY(&sk->ext.addr, daddr, af);
3582                         switch (pd->proto) {
3583                         case IPPROTO_ICMP:
3584 #ifdef INET6
3585                         case IPPROTO_ICMPV6:
3586 #endif
3587                                 sk->gwy.port = nport;
3588                                 sk->ext.port = 0;
3589                                 break;
3590                         default:
3591                                 sk->gwy.port = sport;
3592                                 sk->ext.port = dport;
3593                         }
3594                         if (nr != NULL) {
3595                                 PF_ACPY(&sk->lan.addr, &pd->baddr, af);
3596                                 sk->lan.port = bport;
3597                         } else {
3598                                 PF_ACPY(&sk->lan.addr, &sk->gwy.addr, af);
3599                                 sk->lan.port = sk->gwy.port;
3600                         }
3601                 } else {
3602                         PF_ACPY(&sk->lan.addr, daddr, af);
3603                         PF_ACPY(&sk->ext.addr, saddr, af);
3604                         switch (pd->proto) {
3605                         case IPPROTO_ICMP:
3606 #ifdef INET6
3607                         case IPPROTO_ICMPV6:
3608 #endif
3609                                 sk->lan.port = nport;
3610                                 sk->ext.port = 0;
3611                                 break;
3612                         default:
3613                                 sk->lan.port = dport;
3614                                 sk->ext.port = sport;
3615                         }
3616                         if (nr != NULL) {
3617                                 PF_ACPY(&sk->gwy.addr, &pd->baddr, af);
3618                                 sk->gwy.port = bport;
3619                         } else {
3620                                 PF_ACPY(&sk->gwy.addr, &sk->lan.addr, af);
3621                                 sk->gwy.port = sk->lan.port;
3622                         }
3623                 }
3624
3625                 s->hash = pf_state_hash(sk);
3626                 s->pickup_mode = r->pickup_mode;
3627
3628                 pf_set_rt_ifp(s, saddr);        /* needs s->state_key set */
3629
3630                 if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
3631                         if (pd->proto == IPPROTO_TCP)
3632                                 pf_normalize_tcp_cleanup(s);
3633                         REASON_SET(&reason, PFRES_STATEINS);
3634                         pf_src_tree_remove_state(s);
3635                         STATE_DEC_COUNTERS(s);
3636                         pool_put(&pf_state_pl, s);
3637                         return (PF_DROP);
3638                 } else
3639                         *sm = s;
3640                 if (tag > 0) {
3641                         pf_tag_ref(tag);
3642                         s->tag = tag;
3643                 }
3644                 if (pd->proto == IPPROTO_TCP &&
3645                     (th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
3646                     r->keep_state == PF_STATE_SYNPROXY) {
3647                         s->src.state = PF_TCPS_PROXY_SRC;
3648                         if (nr != NULL) {
3649                                 if (direction == PF_OUT) {
3650                                         pf_change_ap(saddr, &th->th_sport,
3651                                             pd->ip_sum, &th->th_sum, &pd->baddr,
3652                                             bport, 0, af);
3653                                         sport = th->th_sport;
3654                                 } else {
3655                                         pf_change_ap(daddr, &th->th_dport,
3656                                             pd->ip_sum, &th->th_sum, &pd->baddr,
3657                                             bport, 0, af);
3658                                         sport = th->th_dport;
3659                                 }
3660                         }
3661                         s->src.seqhi = htonl(karc4random());
3662                         /* Find mss option */
3663                         mss = pf_get_mss(m, off, th->th_off, af);
3664                         mss = pf_calc_mss(saddr, af, mss);
3665                         mss = pf_calc_mss(daddr, af, mss);
3666                         s->src.mss = mss;
3667                         pf_send_tcp(r, af, daddr, saddr, th->th_dport,
3668                             th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
3669                             TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL);
3670                         REASON_SET(&reason, PFRES_SYNPROXY);
3671                         return (PF_SYNPROXY_DROP);
3672                 }
3673         }
3674
3675         /* copy back packet headers if we performed NAT operations */
3676         if (rewrite)
3677                 m_copyback(m, off, hdrlen, pd->hdr.any);
3678
3679         return (PF_PASS);
3680 }
3681
3682 int
3683 pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
3684     struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
3685     struct pf_ruleset **rsm)
3686 {
3687         struct pf_rule          *r, *a = NULL;
3688         struct pf_ruleset       *ruleset = NULL;
3689         sa_family_t              af = pd->af;
3690         u_short                  reason;
3691         int                      tag = -1;
3692         int                      asd = 0;
3693         int                      match = 0;
3694
3695         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
3696         while (r != NULL) {
3697                 r->evaluations++;
3698                 if (pfi_kif_match(r->kif, kif) == r->ifnot)
3699                         r = r->skip[PF_SKIP_IFP].ptr;
3700                 else if (r->direction && r->direction != direction)
3701                         r = r->skip[PF_SKIP_DIR].ptr;
3702                 else if (r->af && r->af != af)
3703                         r = r->skip[PF_SKIP_AF].ptr;
3704                 else if (r->proto && r->proto != pd->proto)
3705                         r = r->skip[PF_SKIP_PROTO].ptr;
3706                 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
3707                     r->src.neg, kif))
3708                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
3709                 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
3710                     r->dst.neg, NULL))
3711                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
3712                 else if (r->tos && !(r->tos == pd->tos))
3713                         r = TAILQ_NEXT(r, entries);
3714                 else if (r->os_fingerprint != PF_OSFP_ANY)
3715                         r = TAILQ_NEXT(r, entries);
3716                 else if (pd->proto == IPPROTO_UDP &&
3717                     (r->src.port_op || r->dst.port_op))
3718                         r = TAILQ_NEXT(r, entries);
3719                 else if (pd->proto == IPPROTO_TCP &&
3720                     (r->src.port_op || r->dst.port_op || r->flagset))
3721                         r = TAILQ_NEXT(r, entries);
3722                 else if ((pd->proto == IPPROTO_ICMP ||
3723                     pd->proto == IPPROTO_ICMPV6) &&
3724                     (r->type || r->code))
3725                         r = TAILQ_NEXT(r, entries);
3726                 else if (r->prob && r->prob <= karc4random())
3727                         r = TAILQ_NEXT(r, entries);
3728                 else if (r->match_tag && !pf_match_tag(m, r, &tag))
3729                         r = TAILQ_NEXT(r, entries);
3730                 else {
3731                         if (r->anchor == NULL) {
3732                                 match = 1;
3733                                 *rm = r;
3734                                 *am = a;
3735                                 *rsm = ruleset;
3736                                 if ((*rm)->quick)
3737                                         break;
3738                                 r = TAILQ_NEXT(r, entries);
3739                         } else
3740                                 pf_step_into_anchor(&asd, &ruleset,
3741                                     PF_RULESET_FILTER, &r, &a, &match);
3742                 }
3743                 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
3744                     PF_RULESET_FILTER, &r, &a, &match))
3745                         break;
3746         }
3747         r = *rm;
3748         a = *am;
3749         ruleset = *rsm;
3750
3751         REASON_SET(&reason, PFRES_MATCH);
3752
3753         if (r->log)
3754                 PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset,
3755                     pd);
3756
3757         if (r->action != PF_PASS)
3758                 return (PF_DROP);
3759
3760         if (pf_tag_packet(m, tag, -1)) {
3761                 REASON_SET(&reason, PFRES_MEMORY);
3762                 return (PF_DROP);
3763         }
3764
3765         return (PF_PASS);
3766 }
3767
3768 int
3769 pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
3770     struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
3771     u_short *reason)
3772 {
3773         struct pf_state_key_cmp  key;
3774         struct tcphdr           *th = pd->hdr.tcp;
3775         u_int16_t                win = ntohs(th->th_win);
3776         u_int32_t                ack, end, seq, orig_seq;
3777         u_int8_t                 sws, dws;
3778         int                      ackskew;
3779         int                      copyback = 0;
3780         struct pf_state_peer    *src, *dst;
3781
3782         key.af = pd->af;
3783         key.proto = IPPROTO_TCP;
3784         if (direction == PF_IN) {
3785                 PF_ACPY(&key.ext.addr, pd->src, key.af);
3786                 PF_ACPY(&key.gwy.addr, pd->dst, key.af);
3787                 key.ext.port = th->th_sport;
3788                 key.gwy.port = th->th_dport;
3789         } else {
3790                 PF_ACPY(&key.lan.addr, pd->src, key.af);
3791                 PF_ACPY(&key.ext.addr, pd->dst, key.af);
3792                 key.lan.port = th->th_sport;
3793                 key.ext.port = th->th_dport;
3794         }
3795
3796         STATE_LOOKUP();
3797
3798         if (direction == (*state)->state_key->direction) {
3799                 src = &(*state)->src;
3800                 dst = &(*state)->dst;
3801         } else {
3802                 src = &(*state)->dst;
3803                 dst = &(*state)->src;
3804         }
3805
3806         if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
3807                 if (direction != (*state)->state_key->direction) {
3808                         REASON_SET(reason, PFRES_SYNPROXY);
3809                         return (PF_SYNPROXY_DROP);
3810                 }
3811                 if (th->th_flags & TH_SYN) {
3812                         if (ntohl(th->th_seq) != (*state)->src.seqlo) {
3813                                 REASON_SET(reason, PFRES_SYNPROXY);
3814                                 return (PF_DROP);
3815                         }
3816                         pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
3817                             pd->src, th->th_dport, th->th_sport,
3818                             (*state)->src.seqhi, ntohl(th->th_seq) + 1,
3819                             TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1,
3820                             0, NULL, NULL);
3821                         REASON_SET(reason, PFRES_SYNPROXY);
3822                         return (PF_SYNPROXY_DROP);
3823                 } else if (!(th->th_flags & TH_ACK) ||
3824                     (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
3825                     (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
3826                         REASON_SET(reason, PFRES_SYNPROXY);
3827                         return (PF_DROP);
3828                 } else if ((*state)->src_node != NULL &&
3829                     pf_src_connlimit(state)) {
3830                         REASON_SET(reason, PFRES_SRCLIMIT);
3831                         return (PF_DROP);
3832                 } else
3833                         (*state)->src.state = PF_TCPS_PROXY_DST;
3834         }
3835         if ((*state)->src.state == PF_TCPS_PROXY_DST) {
3836                 struct pf_state_host *src, *dst;
3837
3838                 if (direction == PF_OUT) {
3839                         src = &(*state)->state_key->gwy;
3840                         dst = &(*state)->state_key->ext;
3841                 } else {
3842                         src = &(*state)->state_key->ext;
3843                         dst = &(*state)->state_key->lan;
3844                 }
3845                 if (direction == (*state)->state_key->direction) {
3846                         if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
3847                             (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
3848                             (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
3849                                 REASON_SET(reason, PFRES_SYNPROXY);
3850                                 return (PF_DROP);
3851                         }
3852                         (*state)->src.max_win = MAX(ntohs(th->th_win), 1);
3853                         if ((*state)->dst.seqhi == 1)
3854                                 (*state)->dst.seqhi = htonl(karc4random());
3855                         pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr,
3856                             &dst->addr, src->port, dst->port,
3857                             (*state)->dst.seqhi, 0, TH_SYN, 0,
3858                             (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL);
3859                         REASON_SET(reason, PFRES_SYNPROXY);
3860                         return (PF_SYNPROXY_DROP);
3861                 } else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
3862                     (TH_SYN|TH_ACK)) ||
3863                     (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) {
3864                         REASON_SET(reason, PFRES_SYNPROXY);
3865                         return (PF_DROP);
3866                 } else {
3867                         (*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
3868                         (*state)->dst.seqlo = ntohl(th->th_seq);
3869                         pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
3870                             pd->src, th->th_dport, th->th_sport,
3871                             ntohl(th->th_ack), ntohl(th->th_seq) + 1,
3872                             TH_ACK, (*state)->src.max_win, 0, 0, 0,
3873                             (*state)->tag, NULL, NULL);
3874                         pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr,
3875                             &dst->addr, src->port, dst->port,
3876                             (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
3877                             TH_ACK, (*state)->dst.max_win, 0, 0, 1,
3878                             0, NULL, NULL);
3879                         (*state)->src.seqdiff = (*state)->dst.seqhi -
3880                             (*state)->src.seqlo;
3881                         (*state)->dst.seqdiff = (*state)->src.seqhi -
3882                             (*state)->dst.seqlo;
3883                         (*state)->src.seqhi = (*state)->src.seqlo +
3884                             (*state)->dst.max_win;
3885                         (*state)->dst.seqhi = (*state)->dst.seqlo +
3886                             (*state)->src.max_win;
3887                         (*state)->src.wscale = (*state)->dst.wscale = 0;
3888                         (*state)->src.state = (*state)->dst.state =
3889                             TCPS_ESTABLISHED;
3890                         REASON_SET(reason, PFRES_SYNPROXY);
3891                         return (PF_SYNPROXY_DROP);
3892                 }
3893         }
3894
3895         if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
3896                 sws = src->wscale & PF_WSCALE_MASK;
3897                 dws = dst->wscale & PF_WSCALE_MASK;
3898         } else
3899                 sws = dws = 0;
3900
3901         /*
3902          * Sequence tracking algorithm from Guido van Rooij's paper:
3903          *   http://www.madison-gurkha.com/publications/tcp_filtering/
3904          *      tcp_filtering.ps
3905          */
3906
3907         orig_seq = seq = ntohl(th->th_seq);
3908         if (src->seqlo == 0) {
3909                 /* First packet from this end. Set its state */
3910
3911                 if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
3912                     src->scrub == NULL) {
3913                         if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) {
3914                                 REASON_SET(reason, PFRES_MEMORY);
3915                                 return (PF_DROP);
3916                         }
3917                 }
3918
3919                 /* Deferred generation of sequence number modulator */
3920                 if (dst->seqdiff && !src->seqdiff) {
3921                         
3922                         while ((src->seqdiff = pf_new_isn((struct pf_state_key *)&key) - seq) == 0)
3923                                 ;
3924                         ack = ntohl(th->th_ack) - dst->seqdiff;
3925                         pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
3926                             src->seqdiff), 0);
3927                         pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
3928                         copyback = 1;
3929                 } else {
3930                         ack = ntohl(th->th_ack);
3931                 }
3932
3933                 end = seq + pd->p_len;
3934                 if (th->th_flags & TH_SYN) {
3935                         end++;
3936                         (*state)->sync_flags |= PFSTATE_GOT_SYN2;
3937                         if (dst->wscale & PF_WSCALE_FLAG) {
3938                                 src->wscale = pf_get_wscale(m, off, th->th_off,
3939                                     pd->af);
3940                                 if (src->wscale & PF_WSCALE_FLAG) {
3941                                         /* Remove scale factor from initial
3942                                          * window */
3943                                         sws = src->wscale & PF_WSCALE_MASK;
3944                                         win = ((u_int32_t)win + (1 << sws) - 1)
3945                                             >> sws;
3946                                         dws = dst->wscale & PF_WSCALE_MASK;
3947                                 } else {
3948                                         /* fixup other window */
3949                                         dst->max_win <<= dst->wscale &
3950                                             PF_WSCALE_MASK;
3951                                         /* in case of a retrans SYN|ACK */
3952                                         dst->wscale = 0;
3953                                 }
3954                         }
3955                 }
3956                 if (th->th_flags & TH_FIN)
3957                         end++;
3958
3959                 src->seqlo = seq;
3960                 if (src->state < TCPS_SYN_SENT)
3961                         src->state = TCPS_SYN_SENT;
3962
3963                 /*
3964                  * May need to slide the window (seqhi may have been set by
3965                  * the crappy stack check or if we picked up the connection
3966                  * after establishment)
3967                  */
3968                 if (src->seqhi == 1 ||
3969                     SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
3970                         src->seqhi = end + MAX(1, dst->max_win << dws);
3971                 if (win > src->max_win)
3972                         src->max_win = win;
3973
3974         } else {
3975                 ack = ntohl(th->th_ack) - dst->seqdiff;
3976                 if (src->seqdiff) {
3977                         /* Modulate sequence numbers */
3978                         pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
3979                             src->seqdiff), 0);
3980                         pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
3981                         copyback = 1;
3982                 }
3983                 end = seq + pd->p_len;
3984                 if (th->th_flags & TH_SYN)
3985                         end++;
3986                 if (th->th_flags & TH_FIN)
3987                         end++;
3988         }
3989
3990         if ((th->th_flags & TH_ACK) == 0) {
3991                 /* Let it pass through the ack skew check */
3992                 ack = dst->seqlo;
3993         } else if ((ack == 0 &&
3994             (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
3995             /* broken tcp stacks do not set ack */
3996             (dst->state < TCPS_SYN_SENT)) {
3997                 /*
3998                  * Many stacks (ours included) will set the ACK number in an
3999                  * FIN|ACK if the SYN times out -- no sequence to ACK.
4000                  */
4001                 ack = dst->seqlo;
4002         }
4003
4004         if (seq == end) {
4005                 /* Ease sequencing restrictions on no data packets */
4006                 seq = src->seqlo;
4007                 end = seq;
4008         }
4009
4010         ackskew = dst->seqlo - ack;
4011
4012
4013         /*
4014          * Need to demodulate the sequence numbers in any TCP SACK options
4015          * (Selective ACK). We could optionally validate the SACK values
4016          * against the current ACK window, either forwards or backwards, but
4017          * I'm not confident that SACK has been implemented properly
4018          * everywhere. It wouldn't surprise me if several stacks accidently
4019          * SACK too far backwards of previously ACKed data. There really aren't
4020          * any security implications of bad SACKing unless the target stack
4021          * doesn't validate the option length correctly. Someone trying to
4022          * spoof into a TCP connection won't bother blindly sending SACK
4023          * options anyway.
4024          */
4025         if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) {
4026                 if (pf_modulate_sack(m, off, pd, th, dst))
4027                         copyback = 1;
4028         }
4029
4030
4031 #define MAXACKWINDOW (0xffff + 1500)    /* 1500 is an arbitrary fudge factor */
4032         if (SEQ_GEQ(src->seqhi, end) &&
4033             /* Last octet inside other's window space */
4034             SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
4035             /* Retrans: not more than one window back */
4036             (ackskew >= -MAXACKWINDOW) &&
4037             /* Acking not more than one reassembled fragment backwards */
4038             (ackskew <= (MAXACKWINDOW << sws)) &&
4039             /* Acking not more than one window forward */
4040             ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
4041             (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) ||
4042             (pd->flags & PFDESC_IP_REAS) == 0)) {
4043             /* Require an exact/+1 sequence match on resets when possible */
4044
4045                 if (dst->scrub || src->scrub) {
4046                         if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
4047                             *state, src, dst, &copyback))
4048                                 return (PF_DROP);
4049                 }
4050
4051                 /* update max window */
4052                 if (src->max_win < win)
4053                         src->max_win = win;
4054                 /* synchronize sequencing */
4055                 if (SEQ_GT(end, src->seqlo))
4056                         src->seqlo = end;
4057                 /* slide the window of what the other end can send */
4058                 if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
4059                         dst->seqhi = ack + MAX((win << sws), 1);
4060
4061
4062                 /* update states */
4063                 if (th->th_flags & TH_SYN)
4064                         if (src->state < TCPS_SYN_SENT)
4065                                 src->state = TCPS_SYN_SENT;
4066                 if (th->th_flags & TH_FIN)
4067                         if (src->state < TCPS_CLOSING)
4068                                 src->state = TCPS_CLOSING;
4069                 if (th->th_flags & TH_ACK) {
4070                         if (dst->state == TCPS_SYN_SENT) {
4071                                 dst->state = TCPS_ESTABLISHED;
4072                                 if (src->state == TCPS_ESTABLISHED &&
4073                                     (*state)->src_node != NULL &&
4074                                     pf_src_connlimit(state)) {
4075                                         REASON_SET(reason, PFRES_SRCLIMIT);
4076                                         return (PF_DROP);
4077                                 }
4078                         } else if (dst->state == TCPS_CLOSING)
4079                                 dst->state = TCPS_FIN_WAIT_2;
4080                 }
4081                 if (th->th_flags & TH_RST)
4082                         src->state = dst->state = TCPS_TIME_WAIT;
4083
4084                 /* update expire time */
4085                 (*state)->expire = time_second;
4086                 if (src->state >= TCPS_FIN_WAIT_2 &&
4087                     dst->state >= TCPS_FIN_WAIT_2)
4088                         (*state)->timeout = PFTM_TCP_CLOSED;
4089                 else if (src->state >= TCPS_CLOSING &&
4090                     dst->state >= TCPS_CLOSING)
4091                         (*state)->timeout = PFTM_TCP_FIN_WAIT;
4092                 else if (src->state < TCPS_ESTABLISHED ||
4093                     dst->state < TCPS_ESTABLISHED)
4094                         (*state)->timeout = PFTM_TCP_OPENING;
4095                 else if (src->state >= TCPS_CLOSING ||
4096                     dst->state >= TCPS_CLOSING)
4097                         (*state)->timeout = PFTM_TCP_CLOSING;
4098                 else
4099                         (*state)->timeout = PFTM_TCP_ESTABLISHED;
4100
4101                 /* Fall through to PASS packet */
4102
4103         } else if ((dst->state < TCPS_SYN_SENT ||
4104                 dst->state >= TCPS_FIN_WAIT_2 ||
4105                 src->state >= TCPS_FIN_WAIT_2) &&
4106             SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
4107             /* Within a window forward of the originating packet */
4108             SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
4109             /* Within a window backward of the originating packet */
4110
4111                 /*
4112                  * This currently handles three situations:
4113                  *  1) Stupid stacks will shotgun SYNs before their peer
4114                  *     replies.
4115                  *  2) When PF catches an already established stream (the
4116                  *     firewall rebooted, the state table was flushed, routes
4117                  *     changed...)
4118                  *  3) Packets get funky immediately after the connection
4119                  *     closes (this should catch Solaris spurious ACK|FINs
4120                  *     that web servers like to spew after a close)
4121                  *
4122                  * This must be a little more careful than the above code
4123                  * since packet floods will also be caught here. We don't
4124                  * update the TTL here to mitigate the damage of a packet
4125                  * flood and so the same code can handle awkward establishment
4126                  * and a loosened connection close.
4127                  * In the establishment case, a correct peer response will
4128                  * validate the connection, go through the normal state code
4129                  * and keep updating the state TTL.
4130                  */
4131
4132                 if (pf_status.debug >= PF_DEBUG_MISC) {
4133                         kprintf("pf: loose state match: ");
4134                         pf_print_state(*state);
4135                         pf_print_flags(th->th_flags);
4136                         kprintf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
4137                             "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, pd->p_len,
4138                             ackskew, (unsigned long long)(*state)->packets[0],
4139                             (unsigned long long)(*state)->packets[1],
4140                             direction == PF_IN ? "in" : "out",
4141                             direction == (*state)->state_key->direction ?
4142                                 "fwd" : "rev");
4143                 }
4144
4145                 if (dst->scrub || src->scrub) {
4146                         if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
4147                             *state, src, dst, &copyback))
4148                                 return (PF_DROP);
4149                 }
4150
4151                 /* update max window */
4152                 if (src->max_win < win)
4153                         src->max_win = win;
4154                 /* synchronize sequencing */
4155                 if (SEQ_GT(end, src->seqlo))
4156                         src->seqlo = end;
4157                 /* slide the window of what the other end can send */
4158                 if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
4159                         dst->seqhi = ack + MAX((win << sws), 1);
4160
4161                 /*
4162                  * Cannot set dst->seqhi here since this could be a shotgunned
4163                  * SYN and not an already established connection.
4164                  */
4165
4166                 if (th->th_flags & TH_FIN)
4167                         if (src->state < TCPS_CLOSING)
4168                                 src->state = TCPS_CLOSING;
4169                 if (th->th_flags & TH_RST)
4170                         src->state = dst->state = TCPS_TIME_WAIT;
4171
4172                 /* Fall through to PASS packet */
4173
4174         } else if ((*state)->pickup_mode == PF_PICKUPS_HASHONLY ||
4175                     ((*state)->pickup_mode == PF_PICKUPS_ENABLED &&
4176                      ((*state)->sync_flags & PFSTATE_GOT_SYN_MASK) !=
4177                       PFSTATE_GOT_SYN_MASK)) {
4178                 /*
4179                  * If pickup mode is hash only, do not fail on sequence checks.
4180                  *
4181                  * If pickup mode is enabled and we did not see the SYN in
4182                  * both direction, do not fail on sequence checks because
4183                  * we do not have complete information on window scale.
4184                  *
4185                  * Adjust expiration and fall through to PASS packet.
4186                  * XXX Add a FIN check to reduce timeout?
4187                  */
4188                 (*state)->expire = time_second;
4189         } else  {
4190                 /*
4191                  * Failure processing
4192                  */
4193                 if ((*state)->dst.state == TCPS_SYN_SENT &&
4194                     (*state)->src.state == TCPS_SYN_SENT) {
4195                         /* Send RST for state mismatches during handshake */
4196                         if (!(th->th_flags & TH_RST))
4197                                 pf_send_tcp((*state)->rule.ptr, pd->af,
4198                                     pd->dst, pd->src, th->th_dport,
4199                                     th->th_sport, ntohl(th->th_ack), 0,
4200                                     TH_RST, 0, 0,
4201                                     (*state)->rule.ptr->return_ttl, 1, 0,
4202                                     pd->eh, kif->pfik_ifp);
4203                         src->seqlo = 0;
4204                         src->seqhi = 1;
4205                         src->max_win = 1;
4206                 } else if (pf_status.debug >= PF_DEBUG_MISC) {
4207                         kprintf("pf: BAD state: ");
4208                         pf_print_state(*state);
4209                         pf_print_flags(th->th_flags);
4210                         kprintf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
4211                             "pkts=%llu:%llu dir=%s,%s\n",
4212                             seq, orig_seq, ack, pd->p_len, ackskew,
4213                             (unsigned long long)(*state)->packets[0],
4214                                 (unsigned long long)(*state)->packets[1],
4215                             direction == PF_IN ? "in" : "out",
4216                             direction == (*state)->state_key->direction ?
4217                                 "fwd" : "rev");
4218                         kprintf("pf: State failure on: %c %c %c %c | %c %c\n",
4219                             SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
4220                             SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
4221                             ' ': '2',
4222                             (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
4223                             (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
4224                             SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
4225                             SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
4226                 }
4227                 REASON_SET(reason, PFRES_BADSTATE);
4228                 return (PF_DROP);
4229         }
4230
4231         /* Any packets which have gotten here are to be passed */
4232
4233         /* translate source/destination address, if necessary */
4234         if (STATE_TRANSLATE((*state)->state_key)) {
4235                 if (direction == PF_OUT) {
4236                         pf_change_ap(pd->src, &th->th_sport, pd->ip_sum,
4237                             &th->th_sum, &(*state)->state_key->gwy.addr,
4238                             (*state)->state_key->gwy.port, 0, pd->af);
4239                 } else {
4240                         /*
4241                          * If we don't redispatch the packet will go into
4242                          * the protocol stack on the wrong cpu for the
4243                          * post-translated address.
4244                          */
4245                         /* m->m_pkthdr.fw_flags |= FW_MBUF_REDISPATCH; */
4246                         m->m_flags &= ~M_HASH;
4247                         pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum,
4248                             &th->th_sum, &(*state)->state_key->lan.addr,
4249                             (*state)->state_key->lan.port, 0, pd->af);
4250                 }
4251                 m_copyback(m, off, sizeof(*th), (caddr_t)th);
4252         } else if (copyback) {
4253                 /* Copyback sequence modulation or stateful scrub changes */
4254                 m_copyback(m, off, sizeof(*th), (caddr_t)th);
4255         }
4256
4257         return (PF_PASS);
4258 }
4259
4260 int
4261 pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
4262     struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
4263 {
4264         struct pf_state_peer    *src, *dst;
4265         struct pf_state_key_cmp  key;
4266         struct udphdr           *uh = pd->hdr.udp;
4267
4268         key.af = pd->af;
4269         key.proto = IPPROTO_UDP;
4270         if (direction == PF_IN) {
4271                 PF_ACPY(&key.ext.addr, pd->src, key.af);
4272                 PF_ACPY(&key.gwy.addr, pd->dst, key.af);
4273                 key.ext.port = uh->uh_sport;
4274                 key.gwy.port = uh->uh_dport;
4275         } else {
4276                 PF_ACPY(&key.lan.addr, pd->src, key.af);
4277                 PF_ACPY(&key.ext.addr, pd->dst, key.af);
4278                 key.lan.port = uh->uh_sport;
4279                 key.ext.port = uh->uh_dport;
4280         }
4281
4282         STATE_LOOKUP();
4283
4284         if (direction == (*state)->state_key->direction) {
4285                 src = &(*state)->src;
4286                 dst = &(*state)->dst;
4287         } else {
4288                 src = &(*state)->dst;
4289                 dst = &(*state)->src;
4290         }
4291
4292         /* update states */
4293         if (src->state < PFUDPS_SINGLE)
4294                 src->state = PFUDPS_SINGLE;
4295         if (dst->state == PFUDPS_SINGLE)
4296                 dst->state = PFUDPS_MULTIPLE;
4297
4298         /* update expire time */
4299         (*state)->expire = time_second;
4300         if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
4301                 (*state)->timeout = PFTM_UDP_MULTIPLE;
4302         else
4303                 (*state)->timeout = PFTM_UDP_SINGLE;
4304
4305         /* translate source/destination address, if necessary */
4306         if (STATE_TRANSLATE((*state)->state_key)) {
4307                 if (direction == PF_OUT) {
4308                         pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum,
4309                             &uh->uh_sum, &(*state)->state_key->gwy.addr,
4310                             (*state)->state_key->gwy.port, 1, pd->af);
4311                 } else {
4312                         /*
4313                          * If we don't redispatch the packet will go into
4314                          * the protocol stack on the wrong cpu for the
4315                          * post-translated address.
4316                          */
4317                         /* m->m_pkthdr.fw_flags |= FW_MBUF_REDISPATCH; */
4318                         m->m_flags &= ~M_HASH;
4319                         pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum,
4320                             &uh->uh_sum, &(*state)->state_key->lan.addr,
4321                             (*state)->state_key->lan.port, 1, pd->af);
4322                 }
4323                 m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
4324         }
4325
4326         return (PF_PASS);
4327 }
4328
4329 int
4330 pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
4331     struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason)
4332 {
4333         struct pf_addr  *saddr = pd->src, *daddr = pd->dst;
4334         u_int16_t        icmpid = 0, *icmpsum;
4335         u_int8_t         icmptype;
4336         int              state_icmp = 0;
4337         struct pf_state_key_cmp key;
4338
4339         switch (pd->proto) {
4340 #ifdef INET
4341         case IPPROTO_ICMP:
4342                 icmptype = pd->hdr.icmp->icmp_type;
4343                 icmpid = pd->hdr.icmp->icmp_id;
4344                 icmpsum = &pd->hdr.icmp->icmp_cksum;
4345
4346                 if (icmptype == ICMP_UNREACH ||
4347                     icmptype == ICMP_SOURCEQUENCH ||
4348                     icmptype == ICMP_REDIRECT ||
4349                     icmptype == ICMP_TIMXCEED ||
4350                     icmptype == ICMP_PARAMPROB)
4351                         state_icmp++;
4352                 break;
4353 #endif /* INET */
4354 #ifdef INET6
4355         case IPPROTO_ICMPV6:
4356                 icmptype = pd->hdr.icmp6->icmp6_type;
4357                 icmpid = pd->hdr.icmp6->icmp6_id;
4358                 icmpsum = &pd->hdr.icmp6->icmp6_cksum;
4359
4360                 if (icmptype == ICMP6_DST_UNREACH ||
4361                     icmptype == ICMP6_PACKET_TOO_BIG ||
4362                     icmptype == ICMP6_TIME_EXCEEDED ||
4363                     icmptype == ICMP6_PARAM_PROB)
4364                         state_icmp++;
4365                 break;
4366 #endif /* INET6 */
4367         }
4368
4369         if (!state_icmp) {
4370
4371                 /*
4372                  * ICMP query/reply message not related to a TCP/UDP packet.
4373                  * Search for an ICMP state.
4374                  */
4375                 key.af = pd->af;
4376                 key.proto = pd->proto;
4377                 if (direction == PF_IN) {
4378                         PF_ACPY(&key.ext.addr, pd->src, key.af);
4379                         PF_ACPY(&key.gwy.addr, pd->dst, key.af);
4380                         key.ext.port = 0;
4381                         key.gwy.port = icmpid;
4382                 } else {
4383                         PF_ACPY(&key.lan.addr, pd->src, key.af);
4384                         PF_ACPY(&key.ext.addr, pd->dst, key.af);
4385                         key.lan.port = icmpid;
4386                         key.ext.port = 0;
4387                 }
4388
4389                 STATE_LOOKUP();
4390
4391                 (*state)->expire = time_second;
4392                 (*state)->timeout = PFTM_ICMP_ERROR_REPLY;
4393
4394                 /* translate source/destination address, if necessary */
4395                 if (STATE_TRANSLATE((*state)->state_key)) {
4396                         if (direction == PF_OUT) {
4397                                 switch (pd->af) {
4398 #ifdef INET
4399                                 case AF_INET:
4400                                         pf_change_a(&saddr->v4.s_addr,
4401                                             pd->ip_sum,
4402                                             (*state)->state_key->gwy.addr.v4.s_addr, 0);
4403                                         pd->hdr.icmp->icmp_cksum =
4404                                             pf_cksum_fixup(
4405                                             pd->hdr.icmp->icmp_cksum, icmpid,
4406                                             (*state)->state_key->gwy.port, 0);
4407                                         pd->hdr.icmp->icmp_id =
4408                                             (*state)->state_key->gwy.port;
4409                                         m_copyback(m, off, ICMP_MINLEN,
4410                                             (caddr_t)pd->hdr.icmp);
4411                                         break;
4412 #endif /* INET */
4413 #ifdef INET6
4414                                 case AF_INET6:
4415                                         pf_change_a6(saddr,
4416                                             &pd->hdr.icmp6->icmp6_cksum,
4417                                             &(*state)->state_key->gwy.addr, 0);
4418                                         m_copyback(m, off,
4419                                             sizeof(struct icmp6_hdr),
4420                                             (caddr_t)pd->hdr.icmp6);
4421                                         break;
4422 #endif /* INET6 */
4423                                 }
4424                         } else {
4425                                 switch (pd->af) {
4426 #ifdef INET
4427                                 case AF_INET:
4428                                         pf_change_a(&daddr->v4.s_addr,
4429                                             pd->ip_sum,
4430                                             (*state)->state_key->lan.addr.v4.s_addr, 0);
4431                                         pd->hdr.icmp->icmp_cksum =
4432                                             pf_cksum_fixup(
4433                                             pd->hdr.icmp->icmp_cksum, icmpid,
4434                                             (*state)->state_key->lan.port, 0);
4435                                         pd->hdr.icmp->icmp_id =
4436                                             (*state)->state_key->lan.port;
4437                                         m_copyback(m, off, ICMP_MINLEN,
4438                                             (caddr_t)pd->hdr.icmp);
4439                                         break;
4440 #endif /* INET */
4441 #ifdef INET6
4442                                 case AF_INET6:
4443                                         pf_change_a6(daddr,
4444                                             &pd->hdr.icmp6->icmp6_cksum,
4445                                             &(*state)->state_key->lan.addr, 0);
4446                                         m_copyback(m, off,
4447                                             sizeof(struct icmp6_hdr),
4448                                             (caddr_t)pd->hdr.icmp6);
4449                                         break;
4450 #endif /* INET6 */
4451                                 }
4452                         }
4453                 }
4454
4455                 return (PF_PASS);
4456
4457         } else {
4458                 /*
4459                  * ICMP error message in response to a TCP/UDP packet.
4460                  * Extract the inner TCP/UDP header and search for that state.
4461                  */
4462
4463                 struct pf_pdesc pd2;
4464 #ifdef INET
4465                 struct ip       h2;
4466 #endif /* INET */
4467 #ifdef INET6
4468                 struct ip6_hdr  h2_6;
4469                 int             terminal = 0;
4470 #endif /* INET6 */
4471                 int             ipoff2;
4472                 int             off2;
4473
4474                 pd2.af = pd->af;
4475                 switch (pd->af) {
4476 #ifdef INET
4477                 case AF_INET:
4478                         /* offset of h2 in mbuf chain */
4479                         ipoff2 = off + ICMP_MINLEN;
4480
4481                         if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2),
4482                             NULL, reason, pd2.af)) {
4483                                 DPFPRINTF(PF_DEBUG_MISC,
4484                                     ("pf: ICMP error message too short "
4485                                     "(ip)\n"));
4486                                 return (PF_DROP);
4487                         }
4488                         /*
4489                          * ICMP error messages don't refer to non-first
4490                          * fragments
4491                          */
4492                         if (h2.ip_off & htons(IP_OFFMASK)) {
4493                                 REASON_SET(reason, PFRES_FRAG);
4494                                 return (PF_DROP);
4495                         }
4496
4497                         /* offset of protocol header that follows h2 */
4498                         off2 = ipoff2 + (h2.ip_hl << 2);
4499
4500                         pd2.proto = h2.ip_p;
4501                         pd2.src = (struct pf_addr *)&h2.ip_src;
4502                         pd2.dst = (struct pf_addr *)&h2.ip_dst;
4503                         pd2.ip_sum = &h2.ip_sum;
4504                         break;
4505 #endif /* INET */
4506 #ifdef INET6
4507                 case AF_INET6:
4508                         ipoff2 = off + sizeof(struct icmp6_hdr);
4509
4510                         if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6),
4511                             NULL, reason, pd2.af)) {
4512                                 DPFPRINTF(PF_DEBUG_MISC,
4513                                     ("pf: ICMP error message too short "
4514                                     "(ip6)\n"));
4515                                 return (PF_DROP);
4516                         }
4517                         pd2.proto = h2_6.ip6_nxt;
4518                         pd2.src = (struct pf_addr *)&h2_6.ip6_src;
4519                         pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
4520                         pd2.ip_sum = NULL;
4521                         off2 = ipoff2 + sizeof(h2_6);
4522                         do {
4523                                 switch (pd2.proto) {
4524                                 case IPPROTO_FRAGMENT:
4525                                         /*
4526                                          * ICMPv6 error messages for
4527                                          * non-first fragments
4528                                          */
4529                                         REASON_SET(reason, PFRES_FRAG);
4530                                         return (PF_DROP);
4531                                 case IPPROTO_AH:
4532                                 case IPPROTO_HOPOPTS:
4533                                 case IPPROTO_ROUTING:
4534                                 case IPPROTO_DSTOPTS: {
4535                                         /* get next header and header length */
4536                                         struct ip6_ext opt6;
4537
4538                                         if (!pf_pull_hdr(m, off2, &opt6,
4539                                             sizeof(opt6), NULL, reason,
4540                                             pd2.af)) {
4541                                                 DPFPRINTF(PF_DEBUG_MISC,
4542                                                     ("pf: ICMPv6 short opt\n"));
4543                                                 return (PF_DROP);
4544                                         }
4545                                         if (pd2.proto == IPPROTO_AH)
4546                                                 off2 += (opt6.ip6e_len + 2) * 4;
4547                                         else
4548                                                 off2 += (opt6.ip6e_len + 1) * 8;
4549                                         pd2.proto = opt6.ip6e_nxt;
4550                                         /* goto the next header */
4551                                         break;
4552                                 }
4553                                 default:
4554                                         terminal++;
4555                                         break;
4556                                 }
4557                         } while (!terminal);
4558                         break;
4559 #endif /* INET6 */
4560                 default:
4561                         DPFPRINTF(PF_DEBUG_MISC,
4562                             ("pf: ICMP AF %d unknown (ip6)\n", pd->af));
4563                         return (PF_DROP);
4564                         break;
4565                 }
4566
4567                 switch (pd2.proto) {
4568                 case IPPROTO_TCP: {
4569                         struct tcphdr            th;
4570                         u_int32_t                seq;
4571                         struct pf_state_peer    *src, *dst;
4572                         u_int8_t                 dws;
4573                         int                      copyback = 0;
4574
4575                         /*
4576                          * Only the first 8 bytes of the TCP header can be
4577                          * expected. Don't access any TCP header fields after
4578                          * th_seq, an ackskew test is not possible.
4579                          */
4580                         if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason,
4581                             pd2.af)) {
4582                                 DPFPRINTF(PF_DEBUG_MISC,
4583                                     ("pf: ICMP error message too short "
4584                                     "(tcp)\n"));
4585                                 return (PF_DROP);
4586                         }
4587
4588                         key.af = pd2.af;
4589                         key.proto = IPPROTO_TCP;
4590                         if (direction == PF_IN) {
4591                                 PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4592                                 PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4593                                 key.ext.port = th.th_dport;
4594                                 key.gwy.port = th.th_sport;
4595                         } else {
4596                                 PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4597                                 PF_ACPY(&key.ext.addr, pd2.src, key.af);
4598                                 key.lan.port = th.th_dport;
4599                                 key.ext.port = th.th_sport;
4600                         }
4601
4602                         STATE_LOOKUP();
4603
4604                         if (direction == (*state)->state_key->direction) {
4605                                 src = &(*state)->dst;
4606                                 dst = &(*state)->src;
4607                         } else {
4608                                 src = &(*state)->src;
4609                                 dst = &(*state)->dst;
4610                         }
4611
4612                         if (src->wscale && dst->wscale)
4613                                 dws = dst->wscale & PF_WSCALE_MASK;
4614                         else
4615                                 dws = 0;
4616
4617                         /* Demodulate sequence number */
4618                         seq = ntohl(th.th_seq) - src->seqdiff;
4619                         if (src->seqdiff) {
4620                                 pf_change_a(&th.th_seq, icmpsum,
4621                                     htonl(seq), 0);
4622                                 copyback = 1;
4623                         }
4624
4625                         if (!SEQ_GEQ(src->seqhi, seq) ||
4626                             !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws))) {
4627                                 if (pf_status.debug >= PF_DEBUG_MISC) {
4628                                         kprintf("pf: BAD ICMP %d:%d ",
4629                                             icmptype, pd->hdr.icmp->icmp_code);
4630                                         pf_print_host(pd->src, 0, pd->af);
4631                                         kprintf(" -> ");
4632                                         pf_print_host(pd->dst, 0, pd->af);
4633                                         kprintf(" state: ");
4634                                         pf_print_state(*state);
4635                                         kprintf(" seq=%u\n", seq);
4636                                 }
4637                                 REASON_SET(reason, PFRES_BADSTATE);
4638                                 return (PF_DROP);
4639                         }
4640
4641                         if (STATE_TRANSLATE((*state)->state_key)) {
4642                                 if (direction == PF_IN) {
4643                                         pf_change_icmp(pd2.src, &th.th_sport,
4644                                             daddr, &(*state)->state_key->lan.addr,
4645                                             (*state)->state_key->lan.port, NULL,
4646                                             pd2.ip_sum, icmpsum,
4647                                             pd->ip_sum, 0, pd2.af);
4648                                 } else {
4649                                         pf_change_icmp(pd2.dst, &th.th_dport,
4650                                             saddr, &(*state)->state_key->gwy.addr,
4651                                             (*state)->state_key->gwy.port, NULL,
4652                                             pd2.ip_sum, icmpsum,
4653                                             pd->ip_sum, 0, pd2.af);
4654                                 }
4655                                 copyback = 1;
4656                         }
4657
4658                         if (copyback) {
4659                                 switch (pd2.af) {
4660 #ifdef INET
4661                                 case AF_INET:
4662                                         m_copyback(m, off, ICMP_MINLEN,
4663                                             (caddr_t)pd->hdr.icmp);
4664                                         m_copyback(m, ipoff2, sizeof(h2),
4665                                             (caddr_t)&h2);
4666                                         break;
4667 #endif /* INET */
4668 #ifdef INET6
4669                                 case AF_INET6:
4670                                         m_copyback(m, off,
4671                                             sizeof(struct icmp6_hdr),
4672                                             (caddr_t)pd->hdr.icmp6);
4673                                         m_copyback(m, ipoff2, sizeof(h2_6),
4674                                             (caddr_t)&h2_6);
4675                                         break;
4676 #endif /* INET6 */
4677                                 }
4678                                 m_copyback(m, off2, 8, (caddr_t)&th);
4679                         }
4680
4681                         return (PF_PASS);
4682                         break;
4683                 }
4684                 case IPPROTO_UDP: {
4685                         struct udphdr           uh;
4686
4687                         if (!pf_pull_hdr(m, off2, &uh, sizeof(uh),
4688                             NULL, reason, pd2.af)) {
4689                                 DPFPRINTF(PF_DEBUG_MISC,
4690                                     ("pf: ICMP error message too short "
4691                                     "(udp)\n"));
4692                                 return (PF_DROP);
4693                         }
4694
4695                         key.af = pd2.af;
4696                         key.proto = IPPROTO_UDP;
4697                         if (direction == PF_IN) {
4698                                 PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4699                                 PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4700                                 key.ext.port = uh.uh_dport;
4701                                 key.gwy.port = uh.uh_sport;
4702                         } else {
4703                                 PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4704                                 PF_ACPY(&key.ext.addr, pd2.src, key.af);
4705                                 key.lan.port = uh.uh_dport;
4706                                 key.ext.port = uh.uh_sport;
4707                         }
4708
4709                         STATE_LOOKUP();
4710
4711                         if (STATE_TRANSLATE((*state)->state_key)) {
4712                                 if (direction == PF_IN) {
4713                                         pf_change_icmp(pd2.src, &uh.uh_sport,
4714                                             daddr,
4715                                             &(*state)->state_key->lan.addr,
4716                                             (*state)->state_key->lan.port,
4717                                             &uh.uh_sum,
4718                                             pd2.ip_sum, icmpsum,
4719                                             pd->ip_sum, 1, pd2.af);
4720                                 } else {
4721                                         pf_change_icmp(pd2.dst, &uh.uh_dport,
4722                                             saddr,
4723                                             &(*state)->state_key->gwy.addr,
4724                                             (*state)->state_key->gwy.port, &uh.uh_sum,
4725                                             pd2.ip_sum, icmpsum,
4726                                             pd->ip_sum, 1, pd2.af);
4727                                 }
4728                                 switch (pd2.af) {
4729 #ifdef INET
4730                                 case AF_INET:
4731                                         m_copyback(m, off, ICMP_MINLEN,
4732                                             (caddr_t)pd->hdr.icmp);
4733                                         m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
4734                                         break;
4735 #endif /* INET */
4736 #ifdef INET6
4737                                 case AF_INET6:
4738                                         m_copyback(m, off,
4739                                             sizeof(struct icmp6_hdr),
4740                                             (caddr_t)pd->hdr.icmp6);
4741                                         m_copyback(m, ipoff2, sizeof(h2_6),
4742                                             (caddr_t)&h2_6);
4743                                         break;
4744 #endif /* INET6 */
4745                                 }
4746                                 m_copyback(m, off2, sizeof(uh), (caddr_t)&uh);
4747                         }
4748
4749                         return (PF_PASS);
4750                         break;
4751                 }
4752 #ifdef INET
4753                 case IPPROTO_ICMP: {
4754                         struct icmp             iih;
4755
4756                         if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN,
4757                             NULL, reason, pd2.af)) {
4758                                 DPFPRINTF(PF_DEBUG_MISC,
4759                                     ("pf: ICMP error message too short i"
4760                                     "(icmp)\n"));
4761                                 return (PF_DROP);
4762                         }
4763
4764                         key.af = pd2.af;
4765                         key.proto = IPPROTO_ICMP;
4766                         if (direction == PF_IN) {
4767                                 PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4768                                 PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4769                                 key.ext.port = 0;
4770                                 key.gwy.port = iih.icmp_id;
4771                         } else {
4772                                 PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4773                                 PF_ACPY(&key.ext.addr, pd2.src, key.af);
4774                                 key.lan.port = iih.icmp_id;
4775                                 key.ext.port = 0;
4776                         }
4777
4778                         STATE_LOOKUP();
4779
4780                         if (STATE_TRANSLATE((*state)->state_key)) {
4781                                 if (direction == PF_IN) {
4782                                         pf_change_icmp(pd2.src, &iih.icmp_id,
4783                                             daddr,
4784                                             &(*state)->state_key->lan.addr,
4785                                             (*state)->state_key->lan.port, NULL,
4786                                             pd2.ip_sum, icmpsum,
4787                                             pd->ip_sum, 0, AF_INET);
4788                                 } else {
4789                                         pf_change_icmp(pd2.dst, &iih.icmp_id,
4790                                             saddr,
4791                                             &(*state)->state_key->gwy.addr,
4792                                             (*state)->state_key->gwy.port, NULL,
4793                                             pd2.ip_sum, icmpsum,
4794                                             pd->ip_sum, 0, AF_INET);
4795                                 }
4796                                 m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
4797                                 m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
4798                                 m_copyback(m, off2, ICMP_MINLEN, (caddr_t)&iih);
4799                         }
4800
4801                         return (PF_PASS);
4802                         break;
4803                 }
4804 #endif /* INET */
4805 #ifdef INET6
4806                 case IPPROTO_ICMPV6: {
4807                         struct icmp6_hdr        iih;
4808
4809                         if (!pf_pull_hdr(m, off2, &iih,
4810                             sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
4811                                 DPFPRINTF(PF_DEBUG_MISC,
4812                                     ("pf: ICMP error message too short "
4813                                     "(icmp6)\n"));
4814                                 return (PF_DROP);
4815                         }
4816
4817                         key.af = pd2.af;
4818                         key.proto = IPPROTO_ICMPV6;
4819                         if (direction == PF_IN) {
4820                                 PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4821                                 PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4822                                 key.ext.port = 0;
4823                                 key.gwy.port = iih.icmp6_id;
4824                         } else {
4825                                 PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4826                                 PF_ACPY(&key.ext.addr, pd2.src, key.af);
4827                                 key.lan.port = iih.icmp6_id;
4828                                 key.ext.port = 0;
4829                         }
4830
4831                         STATE_LOOKUP();
4832
4833                         if (STATE_TRANSLATE((*state)->state_key)) {
4834                                 if (direction == PF_IN) {
4835                                         pf_change_icmp(pd2.src, &iih.icmp6_id,
4836                                             daddr,
4837                                             &(*state)->state_key->lan.addr,
4838                                             (*state)->state_key->lan.port, NULL,
4839                                             pd2.ip_sum, icmpsum,
4840                                             pd->ip_sum, 0, AF_INET6);
4841                                 } else {
4842                                         pf_change_icmp(pd2.dst, &iih.icmp6_id,
4843                                             saddr, &(*state)->state_key->gwy.addr,
4844                                             (*state)->state_key->gwy.port, NULL,
4845                                             pd2.ip_sum, icmpsum,
4846                                             pd->ip_sum, 0, AF_INET6);
4847                                 }
4848                                 m_copyback(m, off, sizeof(struct icmp6_hdr),
4849                                     (caddr_t)pd->hdr.icmp6);
4850                                 m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6);
4851                                 m_copyback(m, off2, sizeof(struct icmp6_hdr),
4852                                     (caddr_t)&iih);
4853                         }
4854
4855                         return (PF_PASS);
4856                         break;
4857                 }
4858 #endif /* INET6 */
4859                 default: {
4860                         key.af = pd2.af;
4861                         key.proto = pd2.proto;
4862                         if (direction == PF_IN) {
4863                                 PF_ACPY(&key.ext.addr, pd2.dst, key.af);
4864                                 PF_ACPY(&key.gwy.addr, pd2.src, key.af);
4865                                 key.ext.port = 0;
4866                                 key.gwy.port = 0;
4867                         } else {
4868                                 PF_ACPY(&key.lan.addr, pd2.dst, key.af);
4869                                 PF_ACPY(&key.ext.addr, pd2.src, key.af);
4870                                 key.lan.port = 0;
4871                                 key.ext.port = 0;
4872                         }
4873
4874                         STATE_LOOKUP();
4875
4876                         if (STATE_TRANSLATE((*state)->state_key)) {
4877                                 if (direction == PF_IN) {
4878                                         pf_change_icmp(pd2.src, NULL,
4879                                             daddr,
4880                                             &(*state)->state_key->lan.addr,
4881                                             0, NULL,
4882                                             pd2.ip_sum, icmpsum,
4883                                             pd->ip_sum, 0, pd2.af);
4884                                 } else {
4885                                         pf_change_icmp(pd2.dst, NULL,
4886                                             saddr,
4887                                             &(*state)->state_key->gwy.addr,
4888                                             0, NULL,
4889                                             pd2.ip_sum, icmpsum,
4890                                             pd->ip_sum, 0, pd2.af);
4891                                 }
4892                                 switch (pd2.af) {
4893 #ifdef INET
4894                                 case AF_INET:
4895                                         m_copyback(m, off, ICMP_MINLEN,
4896                                             (caddr_t)pd->hdr.icmp);
4897                                         m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
4898                                         break;
4899 #endif /* INET */
4900 #ifdef INET6
4901                                 case AF_INET6:
4902                                         m_copyback(m, off,
4903                                             sizeof(struct icmp6_hdr),
4904                                             (caddr_t)pd->hdr.icmp6);
4905                                         m_copyback(m, ipoff2, sizeof(h2_6),
4906                                             (caddr_t)&h2_6);
4907                                         break;
4908 #endif /* INET6 */
4909                                 }
4910                         }
4911
4912                         return (PF_PASS);
4913                         break;
4914                 }
4915                 }
4916         }
4917 }
4918
4919 int
4920 pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
4921     struct pf_pdesc *pd)
4922 {
4923         struct pf_state_peer    *src, *dst;
4924         struct pf_state_key_cmp  key;
4925
4926         key.af = pd->af;
4927         key.proto = pd->proto;
4928         if (direction == PF_IN) {
4929                 PF_ACPY(&key.ext.addr, pd->src, key.af);
4930                 PF_ACPY(&key.gwy.addr, pd->dst, key.af);
4931                 key.ext.port = 0;
4932                 key.gwy.port = 0;
4933         } else {
4934                 PF_ACPY(&key.lan.addr, pd->src, key.af);
4935                 PF_ACPY(&key.ext.addr, pd->dst, key.af);
4936                 key.lan.port = 0;
4937                 key.ext.port = 0;
4938         }
4939
4940         STATE_LOOKUP();
4941
4942         if (direction == (*state)->state_key->direction) {
4943                 src = &(*state)->src;
4944                 dst = &(*state)->dst;
4945         } else {
4946                 src = &(*state)->dst;
4947                 dst = &(*state)->src;
4948         }
4949
4950         /* update states */
4951         if (src->state < PFOTHERS_SINGLE)
4952                 src->state = PFOTHERS_SINGLE;
4953         if (dst->state == PFOTHERS_SINGLE)
4954                 dst->state = PFOTHERS_MULTIPLE;
4955
4956         /* update expire time */
4957         (*state)->expire = time_second;
4958         if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
4959                 (*state)->timeout = PFTM_OTHER_MULTIPLE;
4960         else
4961                 (*state)->timeout = PFTM_OTHER_SINGLE;
4962
4963         /* translate source/destination address, if necessary */
4964         if (STATE_TRANSLATE((*state)->state_key)) {
4965                 if (direction == PF_OUT)
4966                         switch (pd->af) {
4967 #ifdef INET
4968                         case AF_INET:
4969                                 pf_change_a(&pd->src->v4.s_addr,
4970                                     pd->ip_sum,
4971                                     (*state)->state_key->gwy.addr.v4.s_addr,
4972                                     0);
4973                                 break;
4974 #endif /* INET */
4975 #ifdef INET6
4976                         case AF_INET6:
4977                                 PF_ACPY(pd->src,
4978                                     &(*state)->state_key->gwy.addr, pd->af);
4979                                 break;
4980 #endif /* INET6 */
4981                         }
4982                 else
4983                         switch (pd->af) {
4984 #ifdef INET
4985                         case AF_INET:
4986                                 pf_change_a(&pd->dst->v4.s_addr,
4987                                     pd->ip_sum,
4988                                     (*state)->state_key->lan.addr.v4.s_addr,
4989                                     0);
4990                                 break;
4991 #endif /* INET */
4992 #ifdef INET6
4993                         case AF_INET6:
4994                                 PF_ACPY(pd->dst,
4995                                     &(*state)->state_key->lan.addr, pd->af);
4996                                 break;
4997 #endif /* INET6 */
4998                         }
4999         }
5000
5001         return (PF_PASS);
5002 }
5003
5004 /*
5005  * ipoff and off are measured from the start of the mbuf chain.
5006  * h must be at "ipoff" on the mbuf chain.
5007  */
5008 void *
5009 pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
5010     u_short *actionp, u_short *reasonp, sa_family_t af)
5011 {
5012         switch (af) {
5013 #ifdef INET
5014         case AF_INET: {
5015                 struct ip       *h = mtod(m, struct ip *);
5016                 u_int16_t        fragoff = (h->ip_off & IP_OFFMASK) << 3;
5017
5018                 if (fragoff) {
5019                         if (fragoff >= len)
5020                                 ACTION_SET(actionp, PF_PASS);
5021                         else {
5022                                 ACTION_SET(actionp, PF_DROP);
5023                                 REASON_SET(reasonp, PFRES_FRAG);
5024                         }
5025                         return (NULL);
5026                 }
5027                 if (m->m_pkthdr.len < off + len ||
5028                     h->ip_len < off + len) {
5029                         ACTION_SET(actionp, PF_DROP);
5030                         REASON_SET(reasonp, PFRES_SHORT);
5031                         return (NULL);
5032                 }
5033                 break;
5034         }
5035 #endif /* INET */
5036 #ifdef INET6
5037         case AF_INET6: {
5038                 struct ip6_hdr  *h = mtod(m, struct ip6_hdr *);
5039
5040                 if (m->m_pkthdr.len < off + len ||
5041                     (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
5042                     (unsigned)(off + len)) {
5043                         ACTION_SET(actionp, PF_DROP);
5044                         REASON_SET(reasonp, PFRES_SHORT);
5045                         return (NULL);
5046                 }
5047                 break;
5048         }
5049 #endif /* INET6 */
5050         }
5051         m_copydata(m, off, len, p);
5052         return (p);
5053 }
5054
5055 int
5056 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif)
5057 {
5058         struct sockaddr_in      *dst;
5059         int                      ret = 1;
5060         int                      check_mpath;
5061 #ifdef INET6
5062         struct sockaddr_in6     *dst6;
5063         struct route_in6         ro;
5064 #else
5065         struct route             ro;
5066 #endif
5067         struct radix_node       *rn;
5068         struct rtentry          *rt;
5069         struct ifnet            *ifp;
5070
5071         check_mpath = 0;
5072         bzero(&ro, sizeof(ro));
5073         switch (af) {
5074         case AF_INET:
5075                 dst = satosin(&ro.ro_dst);
5076                 dst->sin_family = AF_INET;
5077                 dst->sin_len = sizeof(*dst);
5078                 dst->sin_addr = addr->v4;
5079                 break;
5080 #ifdef INET6
5081         case AF_INET6:
5082                 dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
5083                 dst6->sin6_family = AF_INET6;
5084                 dst6->sin6_len = sizeof(*dst6);
5085                 dst6->sin6_addr = addr->v6;
5086                 break;
5087 #endif /* INET6 */
5088         default:
5089                 return (0);
5090         }
5091
5092         /* Skip checks for ipsec interfaces */
5093         if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
5094                 goto out;
5095
5096         rtalloc_ign((struct route *)&ro, 0);
5097
5098         if (ro.ro_rt != NULL) {
5099                 /* No interface given, this is a no-route check */
5100                 if (kif == NULL)
5101                         goto out;
5102
5103                 if (kif->pfik_ifp == NULL) {
5104                         ret = 0;
5105                         goto out;
5106                 }
5107
5108                 /* Perform uRPF check if passed input interface */
5109                 ret = 0;
5110                 rn = (struct radix_node *)ro.ro_rt;
5111                 do {
5112                         rt = (struct rtentry *)rn;
5113                         ifp = rt->rt_ifp;
5114
5115                         if (kif->pfik_ifp == ifp)
5116                                 ret = 1;
5117                         rn = NULL;
5118                 } while (check_mpath == 1 && rn != NULL && ret == 0);
5119         } else
5120                 ret = 0;
5121 out:
5122         if (ro.ro_rt != NULL)
5123                 RTFREE(ro.ro_rt);
5124         return (ret);
5125 }
5126
5127 int
5128 pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw)
5129 {
5130         struct sockaddr_in      *dst;
5131 #ifdef INET6
5132         struct sockaddr_in6     *dst6;
5133         struct route_in6         ro;
5134 #else
5135         struct route             ro;
5136 #endif
5137         int                      ret = 0;
5138
5139         ASSERT_LWKT_TOKEN_HELD(&pf_token);
5140
5141         bzero(&ro, sizeof(ro));
5142         switch (af) {
5143         case AF_INET:
5144                 dst = satosin(&ro.ro_dst);
5145                 dst->sin_family = AF_INET;
5146                 dst->sin_len = sizeof(*dst);
5147                 dst->sin_addr = addr->v4;
5148                 break;
5149 #ifdef INET6
5150         case AF_INET6:
5151                 dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
5152                 dst6->sin6_family = AF_INET6;
5153                 dst6->sin6_len = sizeof(*dst6);
5154                 dst6->sin6_addr = addr->v6;
5155                 break;
5156 #endif /* INET6 */
5157         default:
5158                 return (0);
5159         }
5160
5161 rtalloc_ign((struct route *)&ro, (RTF_CLONING | RTF_PRCLONING));
5162
5163         if (ro.ro_rt != NULL) {
5164                 RTFREE(ro.ro_rt);
5165         }
5166
5167         return (ret);
5168 }
5169
5170 #ifdef INET
5171 void
5172 pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
5173     struct pf_state *s, struct pf_pdesc *pd)
5174 {
5175         struct mbuf             *m0, *m1;
5176         struct route             iproute;
5177         struct route            *ro = NULL;
5178         struct sockaddr_in      *dst;
5179         struct ip               *ip;
5180         struct ifnet            *ifp = NULL;
5181         struct pf_addr           naddr;
5182         struct pf_src_node      *sn = NULL;
5183         int                      error = 0;
5184         int sw_csum;
5185 #ifdef IPSEC
5186         struct m_tag            *mtag;
5187 #endif /* IPSEC */
5188
5189         ASSERT_LWKT_TOKEN_HELD(&pf_token);
5190
5191         if (m == NULL || *m == NULL || r == NULL ||
5192             (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
5193                 panic("pf_route: invalid parameters");
5194
5195         if (((*m)->m_pkthdr.fw_flags & PF_MBUF_ROUTED) == 0) {
5196                 (*m)->m_pkthdr.fw_flags |= PF_MBUF_ROUTED;
5197                 (*m)->m_pkthdr.pf.routed = 1;
5198         } else {
5199                 if ((*m)->m_pkthdr.pf.routed++ > 3) {
5200                         m0 = *m;
5201                         *m = NULL;
5202                         goto bad;
5203                 }
5204         }
5205
5206         if (r->rt == PF_DUPTO) {
5207                 if ((m0 = m_dup(*m, MB_DONTWAIT)) == NULL) {
5208                         return;
5209                 }
5210         } else {
5211                 if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
5212                         return;
5213                 }
5214                 m0 = *m;
5215         }
5216
5217         if (m0->m_len < sizeof(struct ip)) {
5218                 DPFPRINTF(PF_DEBUG_URGENT,
5219                     ("pf_route: m0->m_len < sizeof(struct ip)\n"));
5220                 goto bad;
5221         }
5222
5223         ip = mtod(m0, struct ip *);
5224
5225         ro = &iproute;
5226         bzero((caddr_t)ro, sizeof(*ro));
5227         dst = satosin(&ro->ro_dst);
5228         dst->sin_family = AF_INET;
5229         dst->sin_len = sizeof(*dst);
5230         dst->sin_addr = ip->ip_dst;
5231
5232         if (r->rt == PF_FASTROUTE) {
5233                 rtalloc(ro);
5234                 if (ro->ro_rt == 0) {
5235                         ipstat.ips_noroute++;
5236                         goto bad;
5237                 }
5238
5239                 ifp = ro->ro_rt->rt_ifp;
5240                 ro->ro_rt->rt_use++;
5241
5242                 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
5243                         dst = satosin(ro->ro_rt->rt_gateway);
5244         } else {
5245                 if (TAILQ_EMPTY(&r->rpool.list)) {
5246                         DPFPRINTF(PF_DEBUG_URGENT,
5247                             ("pf_route: TAILQ_EMPTY(&r->rpool.list)\n"));
5248                         goto bad;
5249                 }
5250                 if (s == NULL) {
5251                         pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
5252                             &naddr, NULL, &sn);
5253                         if (!PF_AZERO(&naddr, AF_INET))
5254                                 dst->sin_addr.s_addr = naddr.v4.s_addr;
5255                         ifp = r->rpool.cur->kif ?
5256                             r->rpool.cur->kif->pfik_ifp : NULL;
5257                 } else {
5258                         if (!PF_AZERO(&s->rt_addr, AF_INET))
5259                                 dst->sin_addr.s_addr =
5260                                     s->rt_addr.v4.s_addr;
5261                         ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
5262                 }
5263         }
5264         if (ifp == NULL)
5265                 goto bad;
5266
5267         if (oifp != ifp) {
5268                 if (pf_test(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
5269                         goto bad;
5270                 } else if (m0 == NULL) {
5271                         goto done;
5272                 }
5273                 if (m0->m_len < sizeof(struct ip)) {
5274                         DPFPRINTF(PF_DEBUG_URGENT,
5275                             ("pf_route: m0->m_len < sizeof(struct ip)\n"));
5276                         goto bad;
5277                 }
5278                 ip = mtod(m0, struct ip *);
5279         }
5280
5281         /* Copied from FreeBSD 5.1-CURRENT ip_output. */
5282         m0->m_pkthdr.csum_flags |= CSUM_IP;
5283         sw_csum = m0->m_pkthdr.csum_flags & ~ifp->if_hwassist;
5284         if (sw_csum & CSUM_DELAY_DATA) {
5285                 in_delayed_cksum(m0);
5286                 sw_csum &= ~CSUM_DELAY_DATA;
5287         }
5288         m0->m_pkthdr.csum_flags &= ifp->if_hwassist;
5289
5290         if (ip->ip_len <= ifp->if_mtu ||
5291             (ifp->if_hwassist & CSUM_FRAGMENT &&
5292                 (ip->ip_off & IP_DF) == 0)) {
5293                 ip->ip_len = htons(ip->ip_len);
5294                 ip->ip_off = htons(ip->ip_off);
5295                 ip->ip_sum = 0;
5296                 if (sw_csum & CSUM_DELAY_IP) {
5297                         /* From KAME */
5298                         if (ip->ip_v == IPVERSION &&
5299                             (ip->ip_hl << 2) == sizeof(*ip)) {
5300                                 ip->ip_sum = in_cksum_hdr(ip);
5301                         } else {
5302                                 ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
5303                         }
5304                 }
5305                 lwkt_reltoken(&pf_token);
5306                 error = ifp->if_output(ifp, m0, sintosa(dst), ro->ro_rt);
5307                 lwkt_gettoken(&pf_token);
5308                 goto done;
5309         }
5310
5311         /*
5312          * Too large for interface; fragment if possible.
5313          * Must be able to put at least 8 bytes per fragment.
5314          */
5315         if (ip->ip_off & IP_DF) {
5316                 ipstat.ips_cantfrag++;
5317                 if (r->rt != PF_DUPTO) {
5318                         icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
5319                                    ifp->if_mtu);
5320                         goto done;
5321                 } else
5322                         goto bad;
5323         }
5324
5325         m1 = m0;
5326         error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist, sw_csum);
5327         if (error) {
5328                 goto bad;
5329         }
5330
5331         for (m0 = m1; m0; m0 = m1) {
5332                 m1 = m0->m_nextpkt;
5333                 m0->m_nextpkt = 0;
5334                 if (error == 0) {
5335                         lwkt_reltoken(&pf_token);
5336                         error = (*ifp->if_output)(ifp, m0, sintosa(dst),
5337                                                   NULL);
5338                         lwkt_gettoken(&pf_token);
5339                 } else
5340                         m_freem(m0);
5341         }
5342
5343         if (error == 0)
5344                 ipstat.ips_fragmented++;
5345
5346 done:
5347         if (r->rt != PF_DUPTO)
5348                 *m = NULL;
5349         if (ro == &iproute && ro->ro_rt)
5350                 RTFREE(ro->ro_rt);
5351         return;
5352
5353 bad:
5354         m_freem(m0);
5355         goto done;
5356 }
5357 #endif /* INET */
5358
5359 #ifdef INET6
5360 void
5361 pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
5362     struct pf_state *s, struct pf_pdesc *pd)
5363 {
5364         struct mbuf             *m0;
5365         struct route_in6         ip6route;
5366         struct route_in6        *ro;
5367         struct sockaddr_in6     *dst;
5368         struct ip6_hdr          *ip6;
5369         struct ifnet            *ifp = NULL;
5370         struct pf_addr           naddr;
5371         struct pf_src_node      *sn = NULL;
5372         int                      error = 0;
5373
5374         if (m == NULL || *m == NULL || r == NULL ||
5375             (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
5376                 panic("pf_route6: invalid parameters");
5377
5378         if (((*m)->m_pkthdr.fw_flags & PF_MBUF_ROUTED) == 0) {
5379                 (*m)->m_pkthdr.fw_flags |= PF_MBUF_ROUTED;
5380                 (*m)->m_pkthdr.pf.routed = 1;
5381         } else {
5382                 if ((*m)->m_pkthdr.pf.routed++ > 3) {
5383                         m0 = *m;
5384                         *m = NULL;
5385                         goto bad;
5386                 }
5387         }
5388
5389         if (r->rt == PF_DUPTO) {
5390                 if ((m0 = m_dup(*m, MB_DONTWAIT)) == NULL)
5391                         return;
5392         } else {
5393                 if ((r->rt == PF_REPLYTO) == (r->direction == dir))
5394                         return;
5395                 m0 = *m;
5396         }
5397
5398         if (m0->m_len < sizeof(struct ip6_hdr)) {
5399                 DPFPRINTF(PF_DEBUG_URGENT,
5400                     ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n"));
5401                 goto bad;
5402         }
5403         ip6 = mtod(m0, struct ip6_hdr *);
5404
5405         ro = &ip6route;
5406         bzero((caddr_t)ro, sizeof(*ro));
5407         dst = (struct sockaddr_in6 *)&ro->ro_dst;
5408         dst->sin6_family = AF_INET6;
5409         dst->sin6_len = sizeof(*dst);
5410         dst->sin6_addr = ip6->ip6_dst;
5411
5412         /*
5413          * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags,
5414          * so make sure pf.flags is clear.
5415          *
5416          * Cheat. XXX why only in the v6 case???
5417          */
5418         if (r->rt == PF_FASTROUTE) {
5419                 m0->m_pkthdr.fw_flags |= PF_MBUF_TAGGED;
5420                 m0->m_pkthdr.pf.flags = 0;
5421                 ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
5422                 return;
5423         }
5424
5425         if (TAILQ_EMPTY(&r->rpool.list)) {
5426                 DPFPRINTF(PF_DEBUG_URGENT,
5427                     ("pf_route6: TAILQ_EMPTY(&r->rpool.list)\n"));
5428                 goto bad;
5429         }
5430         if (s == NULL) {
5431                 pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src,
5432                     &naddr, NULL, &sn);
5433                 if (!PF_AZERO(&naddr, AF_INET6))
5434                         PF_ACPY((struct pf_addr *)&dst->sin6_addr,
5435                             &naddr, AF_INET6);
5436                 ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
5437         } else {
5438                 if (!PF_AZERO(&s->rt_addr, AF_INET6))
5439                         PF_ACPY((struct pf_addr *)&dst->sin6_addr,
5440                             &s->rt_addr, AF_INET6);
5441                 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
5442         }
5443         if (ifp == NULL)
5444                 goto bad;
5445
5446         if (oifp != ifp) {
5447                 if (pf_test6(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
5448                         goto bad;
5449                 } else if (m0 == NULL) {
5450                         goto done;
5451                 }
5452                 if (m0->m_len < sizeof(struct ip6_hdr)) {
5453                         DPFPRINTF(PF_DEBUG_URGENT,
5454                             ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n"));
5455                         goto bad;
5456                 }
5457                 ip6 = mtod(m0, struct ip6_hdr *);
5458         }
5459
5460         /*
5461          * If the packet is too large for the outgoing interface,
5462          * send back an icmp6 error.
5463          */
5464         if (IN6_IS_ADDR_LINKLOCAL(&dst->sin6_addr))
5465                 dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
5466         if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) {
5467                 error = nd6_output(ifp, ifp, m0, dst, NULL);
5468         } else {
5469                 in6_ifstat_inc(ifp, ifs6_in_toobig);
5470                 if (r->rt != PF_DUPTO) {
5471                         icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
5472                  } else
5473                         goto bad;
5474         }
5475
5476 done:
5477         if (r->rt != PF_DUPTO)
5478                 *m = NULL;
5479         return;
5480
5481 bad:
5482         m_freem(m0);
5483         goto done;
5484 }
5485
5486 #endif /* INET6 */
5487
5488
5489 /*
5490  * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag
5491  *   off is the offset where the protocol header starts
5492  *   len is the total length of protocol header plus payload
5493  * returns 0 when the checksum is valid, otherwise returns 1.
5494  */
5495 /*
5496  * XXX
5497  * FreeBSD supports cksum offload for the following drivers.
5498  * em(4), gx(4), lge(4), nge(4), ti(4), xl(4)
5499  * If we can make full use of it we would outperform ipfw/ipfilter in
5500  * very heavy traffic. 
5501  * I have not tested 'cause I don't have NICs that supports cksum offload.
5502  * (There might be problems. Typical phenomena would be
5503  *   1. No route message for UDP packet.
5504  *   2. No connection acceptance from external hosts regardless of rule set.)
5505  */
5506 int
5507 pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p,
5508     sa_family_t af)
5509 {
5510         u_int16_t sum = 0;
5511         int hw_assist = 0;
5512         struct ip *ip;
5513
5514         if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
5515                 return (1);
5516         if (m->m_pkthdr.len < off + len)
5517                 return (1);
5518
5519         switch (p) {
5520         case IPPROTO_TCP:
5521         case IPPROTO_UDP:
5522                 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
5523                         if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
5524                                 sum = m->m_pkthdr.csum_data;
5525                         } else {
5526                                 ip = mtod(m, struct ip *);      
5527                                 sum = in_pseudo(ip->ip_src.s_addr,
5528                                         ip->ip_dst.s_addr, htonl((u_short)len +
5529                                         m->m_pkthdr.csum_data + p));
5530                         }
5531                         sum ^= 0xffff;
5532                         ++hw_assist;
5533                 }
5534                 break;
5535         case IPPROTO_ICMP:
5536 #ifdef INET6
5537         case IPPROTO_ICMPV6:
5538 #endif /* INET6 */
5539                 break;
5540         default:
5541                 return (1);
5542         }
5543
5544         if (!hw_assist) {
5545                 switch (af) {
5546                 case AF_INET:
5547                         if (p == IPPROTO_ICMP) {
5548                                 if (m->m_len < off)
5549                                         return (1);
5550                                 m->m_data += off;
5551                                 m->m_len -= off;
5552                                 sum = in_cksum(m, len);
5553                                 m->m_data -= off;
5554                                 m->m_len += off;
5555                         } else {
5556                                 if (m->m_len < sizeof(struct ip))
5557                                         return (1);
5558                                 sum = in_cksum_range(m, p, off, len);
5559                                 if (sum == 0) {
5560                                         m->m_pkthdr.csum_flags |=
5561                                             (CSUM_DATA_VALID |
5562                                              CSUM_PSEUDO_HDR);
5563                                         m->m_pkthdr.csum_data = 0xffff;
5564                                 }
5565                         }
5566                         break;
5567 #ifdef INET6
5568                 case AF_INET6:
5569                         if (m->m_len < sizeof(struct ip6_hdr))
5570                                 return (1);
5571                         sum = in6_cksum(m, p, off, len);
5572                         /*
5573                          * XXX
5574                          * IPv6 H/W cksum off-load not supported yet!
5575                          *
5576                          * if (sum == 0) {
5577                          *      m->m_pkthdr.csum_flags |=
5578                          *          (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
5579                          *      m->m_pkthdr.csum_data = 0xffff;
5580                          *}
5581                          */
5582                         break;
5583 #endif /* INET6 */
5584                 default:
5585                         return (1);
5586                 }
5587         }
5588         if (sum) {
5589                 switch (p) {
5590                 case IPPROTO_TCP:
5591                         tcpstat.tcps_rcvbadsum++;
5592                         break;
5593                 case IPPROTO_UDP:
5594                         udpstat.udps_badsum++;
5595                         break;
5596                 case IPPROTO_ICMP:
5597                         icmpstat.icps_checksum++;
5598                         break;
5599 #ifdef INET6
5600                 case IPPROTO_ICMPV6:
5601                         icmp6stat.icp6s_checksum++;
5602                         break;
5603 #endif /* INET6 */
5604                 }
5605                 return (1);
5606         }
5607         return (0);
5608 }
5609
5610 #ifdef INET
5611 int
5612 pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
5613     struct ether_header *eh, struct inpcb *inp)
5614 {
5615         struct pfi_kif          *kif;
5616         u_short                  action, reason = 0, log = 0;
5617         struct mbuf             *m = *m0;
5618         struct ip               *h = NULL;
5619         struct pf_rule          *a = NULL, *r = &pf_default_rule, *tr, *nr;
5620         struct pf_state         *s = NULL;
5621         struct pf_state_key     *sk = NULL;
5622         struct pf_ruleset       *ruleset = NULL;
5623         struct pf_pdesc          pd;
5624         int                      off, dirndx, pqid = 0;
5625
5626         if (!pf_status.running)
5627                 return (PF_PASS);
5628
5629         memset(&pd, 0, sizeof(pd));
5630         if (ifp->if_type == IFT_CARP && ifp->if_carpdev)
5631                 kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif;
5632         else
5633                 kif = (struct pfi_kif *)ifp->if_pf_kif;
5634
5635         if (kif == NULL) {
5636                 DPFPRINTF(PF_DEBUG_URGENT,
5637                     ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname));
5638                 return (PF_DROP);
5639         }
5640         if (kif->pfik_flags & PFI_IFLAG_SKIP)
5641                 return (PF_PASS);
5642
5643 #ifdef DIAGNOSTIC
5644         if ((m->m_flags & M_PKTHDR) == 0)
5645                 panic("non-M_PKTHDR is passed to pf_test");
5646 #endif /* DIAGNOSTIC */
5647
5648         if (m->m_pkthdr.len < (int)sizeof(*h)) {
5649                 action = PF_DROP;
5650                 REASON_SET(&reason, PFRES_SHORT);
5651                 log = 1;
5652                 goto done;
5653         }
5654
5655         /*
5656          * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags,
5657          * so make sure pf.flags is clear.
5658          */
5659         if (m->m_pkthdr.fw_flags & PF_MBUF_TAGGED)
5660                 return (PF_PASS);
5661         m->m_pkthdr.pf.flags = 0;
5662
5663         /* We do IP header normalization and packet reassembly here */
5664         if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) {
5665                 action = PF_DROP;
5666                 goto done;
5667         }
5668         m = *m0;        /* pf_normalize messes with m0 */
5669         h = mtod(m, struct ip *);
5670
5671         off = h->ip_hl << 2;
5672         if (off < (int)sizeof(*h)) {
5673                 action = PF_DROP;
5674                 REASON_SET(&reason, PFRES_SHORT);
5675                 log = 1;
5676                 goto done;
5677         }
5678
5679         pd.src = (struct pf_addr *)&h->ip_src;
5680         pd.dst = (struct pf_addr *)&h->ip_dst;
5681         PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET);
5682         pd.ip_sum = &h->ip_sum;
5683         pd.proto = h->ip_p;
5684         pd.af = AF_INET;
5685         pd.tos = h->ip_tos;
5686         pd.tot_len = h->ip_len;
5687         pd.eh = eh;
5688
5689         /* handle fragments that didn't get reassembled by normalization */
5690         if (h->ip_off & (IP_MF | IP_OFFMASK)) {
5691                 action = pf_test_fragment(&r, dir, kif, m, h,
5692                     &pd, &a, &ruleset);
5693                 goto done;
5694         }
5695
5696         switch (h->ip_p) {
5697
5698         case IPPROTO_TCP: {
5699                 struct tcphdr   th;
5700
5701                 pd.hdr.tcp = &th;
5702                 if (!pf_pull_hdr(m, off, &th, sizeof(th),
5703                     &action, &reason, AF_INET)) {
5704                         log = action != PF_PASS;
5705                         goto done;
5706                 }
5707                 pd.p_len = pd.tot_len - off - (th.th_off << 2);
5708                 if ((th.th_flags & TH_ACK) && pd.p_len == 0)
5709                         pqid = 1;
5710                 action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
5711                 if (action == PF_DROP)
5712                         goto done;
5713                 action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
5714                     &reason);
5715                 if (action == PF_PASS) {
5716 #if NPFSYNC
5717                         pfsync_update_state(s);
5718 #endif /* NPFSYNC */
5719                         r = s->rule.ptr;
5720                         a = s->anchor.ptr;
5721                         log = s->log;
5722                 } else if (s == NULL)
5723                         action = pf_test_rule(&r, &s, dir, kif,
5724                             m, off, h, &pd, &a, &ruleset, NULL, inp);
5725                 break;
5726         }
5727
5728         case IPPROTO_UDP: {
5729                 struct udphdr   uh;
5730
5731                 pd.hdr.udp = &uh;
5732                 if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
5733                     &action, &reason, AF_INET)) {
5734                         log = action != PF_PASS;
5735                         goto done;
5736                 }
5737                 if (uh.uh_dport == 0 ||
5738                     ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
5739                     ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
5740                         action = PF_DROP;
5741                         REASON_SET(&reason, PFRES_SHORT);
5742                         goto done;
5743                 }
5744                 action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
5745                 if (action == PF_PASS) {
5746 #if NPFSYNC
5747                         pfsync_update_state(s);
5748 #endif /* NPFSYNC */
5749                         r = s->rule.ptr;
5750                         a = s->anchor.ptr;
5751                         log = s->log;
5752                 } else if (s == NULL)
5753                         action = pf_test_rule(&r, &s, dir, kif,
5754                             m, off, h, &pd, &a, &ruleset, NULL, inp);
5755                 break;
5756         }
5757
5758         case IPPROTO_ICMP: {
5759                 struct icmp     ih;
5760
5761                 pd.hdr.icmp = &ih;
5762                 if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN,
5763                     &action, &reason, AF_INET)) {
5764                         log = action != PF_PASS;
5765                         goto done;
5766                 }
5767                 action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd,
5768                     &reason);
5769                 if (action == PF_PASS) {
5770 #if NPFSYNC
5771                         pfsync_update_state(s);
5772 #endif /* NPFSYNC */
5773                         r = s->rule.ptr;
5774                         a = s->anchor.ptr;
5775                         log = s->log;
5776                 } else if (s == NULL)
5777                         action = pf_test_rule(&r, &s, dir, kif,
5778                             m, off, h, &pd, &a, &ruleset, NULL, inp);
5779                 break;
5780         }
5781
5782         default:
5783                 action = pf_test_state_other(&s, dir, kif, &pd);
5784                 if (action == PF_PASS) {
5785 #if NPFSYNC
5786                         pfsync_update_state(s);
5787 #endif /* NPFSYNC */
5788                         r = s->rule.ptr;
5789                         a = s->anchor.ptr;
5790                         log = s->log;
5791                 } else if (s == NULL)
5792                         action = pf_test_rule(&r, &s, dir, kif, m, off, h,
5793                             &pd, &a, &ruleset, NULL, inp);
5794                 break;
5795         }
5796
5797 done:
5798         if (action == PF_PASS && h->ip_hl > 5 &&
5799             !((s && s->allow_opts) || r->allow_opts)) {
5800                 action = PF_DROP;
5801                 REASON_SET(&reason, PFRES_IPOPTIONS);
5802                 log = 1;
5803                 DPFPRINTF(PF_DEBUG_MISC,
5804                     ("pf: dropping packet with ip options\n"));
5805         }
5806
5807         if ((s && s->tag) || r->rtableid)
5808                 pf_tag_packet(m, s ? s->tag : 0, r->rtableid);
5809
5810 #ifdef ALTQ
5811         if (action == PF_PASS && r->qid) {
5812                 m->m_pkthdr.fw_flags |= PF_MBUF_STRUCTURE;
5813                 if (pqid || (pd.tos & IPTOS_LOWDELAY))
5814                         m->m_pkthdr.pf.qid = r->pqid;
5815                 else
5816                         m->m_pkthdr.pf.qid = r->qid;
5817                 m->m_pkthdr.pf.ecn_af = AF_INET;
5818                 m->m_pkthdr.pf.hdr = h;
5819                 /* add connection hash for fairq */
5820                 if (s) {
5821                         /* for fairq */
5822                         m->m_pkthdr.pf.state_hash = s->hash;
5823                         m->m_pkthdr.pf.flags |= PF_TAG_STATE_HASHED;
5824                 }
5825         }
5826 #endif /* ALTQ */
5827
5828         /*
5829          * connections redirected to loopback should not match sockets
5830          * bound specifically to loopback due to security implications,
5831          * see tcp_input() and in_pcblookup_listen().
5832          */
5833         if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
5834             pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
5835             (s->nat_rule.ptr->action == PF_RDR ||
5836             s->nat_rule.ptr->action == PF_BINAT) &&
5837             (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
5838                 m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST;
5839
5840         if (log) {
5841                 struct pf_rule *lr;
5842
5843                 if (s != NULL && s->nat_rule.ptr != NULL &&
5844                     s->nat_rule.ptr->log & PF_LOG_ALL)
5845                         lr = s->nat_rule.ptr;
5846                 else
5847                         lr = r;
5848                 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, lr, a, ruleset,
5849                     &pd);
5850         }
5851
5852         kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
5853         kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
5854
5855         if (action == PF_PASS || r->action == PF_DROP) {
5856                 dirndx = (dir == PF_OUT);
5857                 r->packets[dirndx]++;
5858                 r->bytes[dirndx] += pd.tot_len;
5859                 if (a != NULL) {
5860                         a->packets[dirndx]++;
5861                         a->bytes[dirndx] += pd.tot_len;
5862                 }
5863                 if (s != NULL) {
5864                         sk = s->state_key;
5865                         if (s->nat_rule.ptr != NULL) {
5866                                 s->nat_rule.ptr->packets[dirndx]++;
5867                                 s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
5868                         }
5869                         if (s->src_node != NULL) {
5870                                 s->src_node->packets[dirndx]++;
5871                                 s->src_node->bytes[dirndx] += pd.tot_len;
5872                         }
5873                         if (s->nat_src_node != NULL) {
5874                                 s->nat_src_node->packets[dirndx]++;
5875                                 s->nat_src_node->bytes[dirndx] += pd.tot_len;
5876                         }
5877                         dirndx = (dir == sk->direction) ? 0 : 1;
5878                         s->packets[dirndx]++;
5879                         s->bytes[dirndx] += pd.tot_len;
5880                 }
5881                 tr = r;
5882                 nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
5883                 if (nr != NULL) {
5884                         struct pf_addr *x;
5885                         /*
5886                          * XXX: we need to make sure that the addresses
5887                          * passed to pfr_update_stats() are the same than
5888                          * the addresses used during matching (pfr_match)
5889                          */
5890                         if (r == &pf_default_rule) {
5891                                 tr = nr;
5892                                 x = (sk == NULL || sk->direction == dir) ?
5893                                     &pd.baddr : &pd.naddr;
5894                         } else
5895                                 x = (sk == NULL || sk->direction == dir) ?
5896                                     &pd.naddr : &pd.baddr;
5897                         if (x == &pd.baddr || s == NULL) {
5898                                 /* we need to change the address */
5899                                 if (dir == PF_OUT)
5900                                         pd.src = x;
5901                                 else
5902                                         pd.dst = x;
5903                         }
5904                 }
5905                 if (tr->src.addr.type == PF_ADDR_TABLE)
5906                         pfr_update_stats(tr->src.addr.p.tbl, (sk == NULL ||
5907                             sk->direction == dir) ?
5908                             pd.src : pd.dst, pd.af,
5909                             pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
5910                             tr->src.neg);
5911                 if (tr->dst.addr.type == PF_ADDR_TABLE)
5912                         pfr_update_stats(tr->dst.addr.p.tbl, (sk == NULL ||
5913                             sk->direction == dir) ? pd.dst : pd.src, pd.af,
5914                             pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
5915                             tr->dst.neg);
5916         }
5917
5918
5919         if (action == PF_SYNPROXY_DROP) {
5920                 m_freem(*m0);
5921                 *m0 = NULL;
5922                 action = PF_PASS;
5923         } else if (r->rt)
5924                 /* pf_route can free the mbuf causing *m0 to become NULL */
5925                 pf_route(m0, r, dir, kif->pfik_ifp, s, &pd);
5926
5927         return (action);
5928 }
5929 #endif /* INET */
5930
5931 #ifdef INET6
5932 int
5933 pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
5934     struct ether_header *eh, struct inpcb *inp)
5935 {
5936         struct pfi_kif          *kif;
5937         u_short                  action, reason = 0, log = 0;
5938         struct mbuf             *m = *m0, *n = NULL;
5939         struct ip6_hdr          *h = NULL;
5940         struct pf_rule          *a = NULL, *r = &pf_default_rule, *tr, *nr;
5941         struct pf_state         *s = NULL;
5942         struct pf_state_key     *sk = NULL;
5943         struct pf_ruleset       *ruleset = NULL;
5944         struct pf_pdesc          pd;
5945         int                      off, terminal = 0, dirndx, rh_cnt = 0;
5946
5947         if (!pf_status.running)
5948                 return (PF_PASS);
5949
5950         memset(&pd, 0, sizeof(pd));
5951         if (ifp->if_type == IFT_CARP && ifp->if_carpdev)
5952                 kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif;
5953         else
5954                 kif = (struct pfi_kif *)ifp->if_pf_kif;
5955
5956         if (kif == NULL) {
5957                 DPFPRINTF(PF_DEBUG_URGENT,
5958                     ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname));
5959                 return (PF_DROP);
5960         }
5961         if (kif->pfik_flags & PFI_IFLAG_SKIP)
5962                 return (PF_PASS);
5963
5964 #ifdef DIAGNOSTIC
5965         if ((m->m_flags & M_PKTHDR) == 0)
5966                 panic("non-M_PKTHDR is passed to pf_test6");
5967 #endif /* DIAGNOSTIC */
5968
5969         if (m->m_pkthdr.len < (int)sizeof(*h)) {
5970                 action = PF_DROP;
5971                 REASON_SET(&reason, PFRES_SHORT);
5972                 log = 1;
5973                 goto done;
5974         }
5975
5976         /*
5977          * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags,
5978          * so make sure pf.flags is clear.
5979          */
5980         if (m->m_pkthdr.fw_flags & PF_MBUF_TAGGED)
5981                 return (PF_PASS);
5982         m->m_pkthdr.pf.flags = 0;
5983
5984         /* We do IP header normalization and packet reassembly here */
5985         if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) {
5986                 action = PF_DROP;
5987                 goto done;
5988         }
5989         m = *m0;        /* pf_normalize messes with m0 */
5990         h = mtod(m, struct ip6_hdr *);
5991
5992 #if 1
5993         /*
5994          * we do not support jumbogram yet.  if we keep going, zero ip6_plen
5995          * will do something bad, so drop the packet for now.
5996          */
5997         if (htons(h->ip6_plen) == 0) {
5998                 action = PF_DROP;
5999                 REASON_SET(&reason, PFRES_NORM);        /*XXX*/
6000                 goto done;
6001         }
6002 #endif
6003
6004         pd.src = (struct pf_addr *)&h->ip6_src;
6005         pd.dst = (struct pf_addr *)&h->ip6_dst;
6006         PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET6);
6007         pd.ip_sum = NULL;
6008         pd.af = AF_INET6;
6009         pd.tos = 0;
6010         pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
6011         pd.eh = eh;
6012
6013         off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr);
6014         pd.proto = h->ip6_nxt;
6015         do {
6016                 switch (pd.proto) {
6017                 case IPPROTO_FRAGMENT:
6018                         action = pf_test_fragment(&r, dir, kif, m, h,
6019                             &pd, &a, &ruleset);
6020                         if (action == PF_DROP)
6021                                 REASON_SET(&reason, PFRES_FRAG);
6022                         goto done;
6023                 case IPPROTO_ROUTING: {
6024                         struct ip6_rthdr rthdr;
6025
6026                         if (rh_cnt++) {
6027                                 DPFPRINTF(PF_DEBUG_MISC,
6028                                     ("pf: IPv6 more than one rthdr\n"));
6029                                 action = PF_DROP;
6030                                 REASON_SET(&reason, PFRES_IPOPTIONS);
6031                                 log = 1;
6032                                 goto done;
6033                         }
6034                         if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL,
6035                             &reason, pd.af)) {
6036                                 DPFPRINTF(PF_DEBUG_MISC,
6037                                     ("pf: IPv6 short rthdr\n"));
6038                                 action = PF_DROP;
6039                                 REASON_SET(&reason, PFRES_SHORT);
6040                                 log = 1;
6041                                 goto done;
6042                         }
6043                         if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) {
6044                                 DPFPRINTF(PF_DEBUG_MISC,
6045                                     ("pf: IPv6 rthdr0\n"));
6046                                 action = PF_DROP;
6047                                 REASON_SET(&reason, PFRES_IPOPTIONS);
6048                                 log = 1;
6049                                 goto done;
6050                         }
6051                         /* FALLTHROUGH */
6052                 }
6053                 case IPPROTO_AH:
6054                 case IPPROTO_HOPOPTS:
6055                 case IPPROTO_DSTOPTS: {
6056                         /* get next header and header length */
6057                         struct ip6_ext  opt6;
6058
6059                         if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6),
6060                             NULL, &reason, pd.af)) {
6061                                 DPFPRINTF(PF_DEBUG_MISC,
6062                                     ("pf: IPv6 short opt\n"));
6063                                 action = PF_DROP;
6064                                 log = 1;
6065                                 goto done;
6066                         }
6067                         if (pd.proto == IPPROTO_AH)
6068                                 off += (opt6.ip6e_len + 2) * 4;
6069                         else
6070                                 off += (opt6.ip6e_len + 1) * 8;
6071                         pd.proto = opt6.ip6e_nxt;
6072                         /* goto the next header */
6073                         break;
6074                 }
6075                 default:
6076                         terminal++;
6077                         break;
6078                 }
6079         } while (!terminal);
6080
6081         /* if there's no routing header, use unmodified mbuf for checksumming */
6082         if (!n)
6083                 n = m;
6084
6085         switch (pd.proto) {
6086
6087         case IPPROTO_TCP: {
6088                 struct tcphdr   th;
6089
6090                 pd.hdr.tcp = &th;
6091                 if (!pf_pull_hdr(m, off, &th, sizeof(th),
6092                     &action, &reason, AF_INET6)) {
6093                         log = action != PF_PASS;
6094                         goto done;
6095                 }
6096                 pd.p_len = pd.tot_len - off - (th.th_off << 2);
6097                 action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
6098                 if (action == PF_DROP)
6099                         goto done;
6100                 action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
6101                     &reason);
6102                 if (action == PF_PASS) {
6103 #if NPFSYNC
6104                         pfsync_update_state(s);
6105 #endif /* NPFSYNC */
6106                         r = s->rule.ptr;
6107                         a = s->anchor.ptr;
6108                         log = s->log;
6109                 } else if (s == NULL)
6110                         action = pf_test_rule(&r, &s, dir, kif,
6111                             m, off, h, &pd, &a, &ruleset, NULL, inp);
6112                 break;
6113         }
6114
6115         case IPPROTO_UDP: {
6116                 struct udphdr   uh;
6117
6118                 pd.hdr.udp = &uh;
6119                 if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
6120                     &action, &reason, AF_INET6)) {
6121                         log = action != PF_PASS;
6122                         goto done;
6123                 }
6124                 if (uh.uh_dport == 0 ||
6125                     ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
6126                     ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
6127                         action = PF_DROP;
6128                         REASON_SET(&reason, PFRES_SHORT);
6129                         goto done;
6130                 }
6131                 action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
6132                 if (action == PF_PASS) {
6133 #if NPFSYNC
6134                         pfsync_update_state(s);
6135 #endif /* NPFSYNC */
6136                         r = s->rule.ptr;
6137                         a = s->anchor.ptr;
6138                         log = s->log;
6139                 } else if (s == NULL)
6140                         action = pf_test_rule(&r, &s, dir, kif,
6141                             m, off, h, &pd, &a, &ruleset, NULL, inp);
6142                 break;
6143         }
6144
6145         case IPPROTO_ICMPV6: {
6146                 struct icmp6_hdr        ih;
6147
6148                 pd.hdr.icmp6 = &ih;
6149                 if (!pf_pull_hdr(m, off, &ih, sizeof(ih),
6150                     &action, &reason, AF_INET6)) {
6151                         log = action != PF_PASS;
6152                         goto done;
6153                 }
6154                 action = pf_test_state_icmp(&s, dir, kif,
6155                     m, off, h, &pd, &reason);
6156                 if (action == PF_PASS) {
6157 #if NPFSYNC
6158                         pfsync_update_state(s);
6159 #endif /* NPFSYNC */
6160                         r = s->rule.ptr;
6161                         a = s->anchor.ptr;
6162                         log = s->log;
6163                 } else if (s == NULL)
6164                         action = pf_test_rule(&r, &s, dir, kif,
6165                             m, off, h, &pd, &a, &ruleset, NULL, inp);
6166                 break;
6167         }
6168
6169         default:
6170                 action = pf_test_state_other(&s, dir, kif, &pd);
6171                 if (action == PF_PASS) {
6172 #if NPFSYNC
6173                         pfsync_update_state(s);
6174 #endif /* NPFSYNC */
6175                         r = s->rule.ptr;
6176                         a = s->anchor.ptr;
6177                         log = s->log;
6178                 } else if (s == NULL)
6179                         action = pf_test_rule(&r, &s, dir, kif, m, off, h,
6180                             &pd, &a, &ruleset, NULL, inp);
6181                 break;
6182         }
6183
6184 done:
6185         if (n != m) {
6186                 m_freem(n);
6187                 n = NULL;
6188         }
6189
6190         /* handle dangerous IPv6 extension headers. */
6191         if (action == PF_PASS && rh_cnt &&
6192             !((s && s->allow_opts) || r->allow_opts)) {
6193                 action = PF_DROP;
6194                 REASON_SET(&reason, PFRES_IPOPTIONS);
6195                 log = 1;
6196                 DPFPRINTF(PF_DEBUG_MISC,
6197                     ("pf: dropping packet with dangerous v6 headers\n"));
6198         }
6199
6200         if ((s && s->tag) || r->rtableid)
6201                 pf_tag_packet(m, s ? s->tag : 0, r->rtableid);
6202
6203 #ifdef ALTQ
6204         if (action == PF_PASS && r->qid) {
6205                 m->m_pkthdr.fw_flags |= PF_MBUF_STRUCTURE;
6206                 if (pd.tos & IPTOS_LOWDELAY)
6207                         m->m_pkthdr.pf.qid = r->pqid;
6208                 else
6209                         m->m_pkthdr.pf.qid = r->qid;
6210                 m->m_pkthdr.pf.ecn_af = AF_INET6;
6211                 m->m_pkthdr.pf.hdr = h;
6212                 if (s) {
6213                         /* for fairq */
6214                         m->m_pkthdr.pf.state_hash = s->hash;
6215                         m->m_pkthdr.pf.flags |= PF_TAG_STATE_HASHED;
6216                 }
6217         }
6218 #endif /* ALTQ */
6219
6220         if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
6221             pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
6222             (s->nat_rule.ptr->action == PF_RDR ||
6223             s->nat_rule.ptr->action == PF_BINAT) &&
6224             IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))
6225                 m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST;
6226
6227         if (log) {
6228                 struct pf_rule *lr;
6229
6230                 if (s != NULL && s->nat_rule.ptr != NULL &&
6231                     s->nat_rule.ptr->log & PF_LOG_ALL)
6232                         lr = s->nat_rule.ptr;
6233                 else
6234                         lr = r;
6235                 PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, lr, a, ruleset,
6236                     &pd);
6237         }
6238
6239         kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
6240         kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
6241
6242         if (action == PF_PASS || r->action == PF_DROP) {
6243                 dirndx = (dir == PF_OUT);
6244                 r->packets[dirndx]++;
6245                 r->bytes[dirndx] += pd.tot_len;
6246                 if (a != NULL) {
6247                         a->packets[dirndx]++;
6248                         a->bytes[dirndx] += pd.tot_len;
6249                 }
6250                 if (s != NULL) {
6251                         sk = s->state_key;
6252                         if (s->nat_rule.ptr != NULL) {
6253                                 s->nat_rule.ptr->packets[dirndx]++;
6254                                 s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
6255                         }
6256                         if (s->src_node != NULL) {
6257                                 s->src_node->packets[dirndx]++;
6258                                 s->src_node->bytes[dirndx] += pd.tot_len;
6259                         }
6260                         if (s->nat_src_node != NULL) {
6261                                 s->nat_src_node->packets[dirndx]++;
6262                                 s->nat_src_node->bytes[dirndx] += pd.tot_len;
6263                         }
6264                         dirndx = (dir == sk->direction) ? 0 : 1;
6265                         s->packets[dirndx]++;
6266                         s->bytes[dirndx] += pd.tot_len;
6267                 }
6268                 tr = r;
6269                 nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
6270                 if (nr != NULL) {
6271                         struct pf_addr *x;
6272                         /*
6273                          * XXX: we need to make sure that the addresses
6274                          * passed to pfr_update_stats() are the same than
6275                          * the addresses used during matching (pfr_match)
6276                          */
6277                         if (r == &pf_default_rule) {
6278                                 tr = nr;
6279                                 x = (s == NULL || sk->direction == dir) ?
6280                                     &pd.baddr : &pd.naddr;
6281                         } else {
6282                                 x = (s == NULL || sk->direction == dir) ?
6283                                     &pd.naddr : &pd.baddr;
6284                         }
6285                         if (x == &pd.baddr || s == NULL) {
6286                                 if (dir == PF_OUT)
6287                                         pd.src = x;
6288                                 else
6289                                         pd.dst = x;
6290                         }
6291                 }
6292                 if (tr->src.addr.type == PF_ADDR_TABLE)
6293                         pfr_update_stats(tr->src.addr.p.tbl, (sk == NULL ||
6294                             sk->direction == dir) ? pd.src : pd.dst, pd.af,
6295                             pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6296                             tr->src.neg);
6297                 if (tr->dst.addr.type == PF_ADDR_TABLE)
6298                         pfr_update_stats(tr->dst.addr.p.tbl, (sk == NULL ||
6299                             sk->direction == dir) ? pd.dst : pd.src, pd.af,
6300                             pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
6301                             tr->dst.neg);
6302         }
6303
6304
6305         if (action == PF_SYNPROXY_DROP) {
6306                 m_freem(*m0);
6307                 *m0 = NULL;
6308                 action = PF_PASS;
6309         } else if (r->rt)
6310                 /* pf_route6 can free the mbuf causing *m0 to become NULL */
6311                 pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd);
6312
6313         return (action);
6314 }
6315 #endif /* INET6 */
6316
6317 int
6318 pf_check_congestion(struct ifqueue *ifq)
6319 {
6320                 return (0);
6321 }