sys/net/pf/pf_norm.c

   1 /*      $FreeBSD: src/sys/contrib/pf/net/pf_norm.c,v 1.10 2004/08/14 15:32:40 dwmalone Exp $    */
   2 /*      $OpenBSD: pf_norm.c,v 1.80.2.1 2004/04/30 21:46:33 brad Exp $ */
   3 /* add  $OpenBSD: pf_norm.c,v 1.87 2004/05/11 07:34:11 dhartmei Exp $ */
   4 /*      $DragonFly: src/sys/net/pf/pf_norm.c,v 1.10 2008/09/04 09:08:22 hasso Exp $ */
   5 /*      $OpenBSD: pf_norm.c,v 1.107 2006/04/16 00:59:52 pascoe Exp $ */
   6
   7 /*
   8  * Copyright (c) 2010 The DragonFly Project.  All rights reserved.
   9  *
  10  * Copyright 2001 Niels Provos <provos@citi.umich.edu>
  11  * All rights reserved.
  12  *
  13  * Redistribution and use in source and binary forms, with or without
  14  * modification, are permitted provided that the following conditions
  15  * are met:
  16  * 1. Redistributions of source code must retain the above copyright
  17  *    notice, this list of conditions and the following disclaimer.
  18  * 2. Redistributions in binary form must reproduce the above copyright
  19  *    notice, this list of conditions and the following disclaimer in the
  20  *    documentation and/or other materials provided with the distribution.
  21  *
  22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  23  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  24  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  25  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  26  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  27  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  31  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32  */
  33
  34 #include "opt_inet.h"
  35 #include "opt_inet6.h"
  36
  37 #include <sys/param.h>
  38 #include <sys/systm.h>
  39 #include <sys/mbuf.h>
  40 #include <sys/filio.h>
  41 #include <sys/fcntl.h>
  42 #include <sys/socket.h>
  43 #include <sys/kernel.h>
  44 #include <sys/time.h>
  45 #include <vm/vm_zone.h>
  46
  47 #include <net/if.h>
  48 #include <net/if_types.h>
  49 #include <net/bpf.h>
  50 #include <net/route.h>
  51 #include <net/pf/if_pflog.h>
  52
  53 #include <netinet/in.h>
  54 #include <netinet/in_var.h>
  55 #include <netinet/in_systm.h>
  56 #include <netinet/ip.h>
  57 #include <netinet/ip_var.h>
  58 #include <netinet/tcp.h>
  59 #include <netinet/tcp_seq.h>
  60 #include <netinet/udp.h>
  61 #include <netinet/ip_icmp.h>
  62
  63 #ifdef INET6
  64 #include <netinet/ip6.h>
  65 #endif /* INET6 */
  66
  67 #include <net/pf/pfvar.h>
  68
  69 #define PFFRAG_SEENLAST 0x0001          /* Seen the last fragment for this */
  70 #define PFFRAG_NOBUFFER 0x0002          /* Non-buffering fragment cache */
  71 #define PFFRAG_DROP     0x0004          /* Drop all fragments */
  72 #define BUFFER_FRAGMENTS(fr)    (!((fr)->fr_flags & PFFRAG_NOBUFFER))
  73
  74
  75 TAILQ_HEAD(pf_fragqueue, pf_fragment)   pf_fragqueue;
  76 TAILQ_HEAD(pf_cachequeue, pf_fragment)  pf_cachequeue;
  77
  78 static __inline int      pf_frag_compare(struct pf_fragment *,
  79                             struct pf_fragment *);
  80 RB_HEAD(pf_frag_tree, pf_fragment)      pf_frag_tree, pf_cache_tree;
  81 RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
  82 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
  83
  84 /* Private prototypes */
  85 void                     pf_ip2key(struct pf_fragment *, struct ip *);
  86 void                     pf_remove_fragment(struct pf_fragment *);
  87 void                     pf_flush_fragments(void);
  88 void                     pf_free_fragment(struct pf_fragment *);
  89 struct pf_fragment      *pf_find_fragment(struct ip *, struct pf_frag_tree *);
  90 struct mbuf             *pf_reassemble(struct mbuf **, struct pf_fragment **,
  91                             struct pf_frent *, int);
  92 struct mbuf             *pf_fragcache(struct mbuf **, struct ip*,
  93                             struct pf_fragment **, int, int, int *);
  94 int                      pf_normalize_tcpopt(struct pf_rule *, struct mbuf *,
  95                             struct tcphdr *, int);
  96
  97 #define DPFPRINTF(x) do {                               \
  98         if (pf_status.debug >= PF_DEBUG_MISC) {         \
  99                 kprintf("%s: ", __func__);              \
 100                 kprintf x ;                             \
 101         }                                               \
 102 } while(0)
 103
 104 /* Globals */
 105 vm_zone_t                pf_frent_pl, pf_frag_pl, pf_cache_pl, pf_cent_pl;
 106 vm_zone_t                pf_state_scrub_pl;
 107 int                      pf_nfrents, pf_ncache;
 108
 109 void
 110 pf_normalize_init(void)
 111 {
 112         /* XXX
 113         pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT);
 114         pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0);
 115         pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0);
 116         pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0);
 117         */
 118
 119         TAILQ_INIT(&pf_fragqueue);
 120         TAILQ_INIT(&pf_cachequeue);
 121 }
 122
 123 static __inline int
 124 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
 125 {
 126         int     diff;
 127
 128         if ((diff = a->fr_id - b->fr_id))
 129                 return (diff);
 130         else if ((diff = a->fr_p - b->fr_p))
 131                 return (diff);
 132         else if (a->fr_src.s_addr < b->fr_src.s_addr)
 133                 return (-1);
 134         else if (a->fr_src.s_addr > b->fr_src.s_addr)
 135                 return (1);
 136         else if (a->fr_dst.s_addr < b->fr_dst.s_addr)
 137                 return (-1);
 138         else if (a->fr_dst.s_addr > b->fr_dst.s_addr)
 139                 return (1);
 140         return (0);
 141 }
 142
 143 void
 144 pf_purge_expired_fragments(void)
 145 {
 146         struct pf_fragment      *frag;
 147         u_int32_t                expire = time_second -
 148                                     pf_default_rule.timeout[PFTM_FRAG];
 149
 150         while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
 151                 KASSERT((BUFFER_FRAGMENTS(frag)),
 152                         ("BUFFER_FRAGMENTS(frag) == 0: %s", __func__));
 153                 if (frag->fr_timeout > expire)
 154                         break;
 155
 156                 DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
 157                 pf_free_fragment(frag);
 158         }
 159
 160         while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) {
 161                 KASSERT((!BUFFER_FRAGMENTS(frag)),
 162                         ("BUFFER_FRAGMENTS(frag) != 0: %s", __func__));
 163                 if (frag->fr_timeout > expire)
 164                         break;
 165
 166                 DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
 167                 pf_free_fragment(frag);
 168                 KASSERT((TAILQ_EMPTY(&pf_cachequeue) ||
 169                     TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag),
 170                     ("!(TAILQ_EMPTY() || TAILQ_LAST() == farg): %s",
 171                     __func__));
 172         }
 173 }
 174
 175 /*
 176  * Try to flush old fragments to make space for new ones
 177  */
 178
 179 void
 180 pf_flush_fragments(void)
 181 {
 182         struct pf_fragment      *frag;
 183         int                      goal;
 184
 185         goal = pf_nfrents * 9 / 10;
 186         DPFPRINTF(("trying to free > %d frents\n",
 187             pf_nfrents - goal));
 188         while (goal < pf_nfrents) {
 189                 frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue);
 190                 if (frag == NULL)
 191                         break;
 192                 pf_free_fragment(frag);
 193         }
 194
 195
 196         goal = pf_ncache * 9 / 10;
 197         DPFPRINTF(("trying to free > %d cache entries\n",
 198             pf_ncache - goal));
 199         while (goal < pf_ncache) {
 200                 frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue);
 201                 if (frag == NULL)
 202                         break;
 203                 pf_free_fragment(frag);
 204         }
 205 }
 206
 207 /* Frees the fragments and all associated entries */
 208
 209 void
 210 pf_free_fragment(struct pf_fragment *frag)
 211 {
 212         struct pf_frent         *frent;
 213         struct pf_frcache       *frcache;
 214
 215         /* Free all fragments */
 216         if (BUFFER_FRAGMENTS(frag)) {
 217                 for (frent = LIST_FIRST(&frag->fr_queue); frent;
 218                     frent = LIST_FIRST(&frag->fr_queue)) {
 219                         LIST_REMOVE(frent, fr_next);
 220
 221                         m_freem(frent->fr_m);
 222                         pool_put(&pf_frent_pl, frent);
 223                         pf_nfrents--;
 224                 }
 225         } else {
 226                 for (frcache = LIST_FIRST(&frag->fr_cache); frcache;
 227                     frcache = LIST_FIRST(&frag->fr_cache)) {
 228                         LIST_REMOVE(frcache, fr_next);
 229
 230                         KASSERT((LIST_EMPTY(&frag->fr_cache) ||
 231                             LIST_FIRST(&frag->fr_cache)->fr_off >
 232                             frcache->fr_end),
 233                             ("! (LIST_EMPTY() || LIST_FIRST()->fr_off >"
 234                              " frcache->fr_end): %s", __func__));
 235
 236                         pool_put(&pf_cent_pl, frcache);
 237                         pf_ncache--;
 238                 }
 239         }
 240
 241         pf_remove_fragment(frag);
 242 }
 243
 244 void
 245 pf_ip2key(struct pf_fragment *key, struct ip *ip)
 246 {
 247         key->fr_p = ip->ip_p;
 248         key->fr_id = ip->ip_id;
 249         key->fr_src.s_addr = ip->ip_src.s_addr;
 250         key->fr_dst.s_addr = ip->ip_dst.s_addr;
 251 }
 252
 253 struct pf_fragment *
 254 pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree)
 255 {
 256         struct pf_fragment       key;
 257         struct pf_fragment      *frag;
 258
 259         pf_ip2key(&key, ip);
 260
 261         frag = RB_FIND(pf_frag_tree, tree, &key);
 262         if (frag != NULL) {
 263                 /* XXX Are we sure we want to update the timeout? */
 264                 frag->fr_timeout = time_second;
 265                 if (BUFFER_FRAGMENTS(frag)) {
 266                         TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
 267                         TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
 268                 } else {
 269                         TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
 270                         TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next);
 271                 }
 272         }
 273
 274         return (frag);
 275 }
 276
 277 /* Removes a fragment from the fragment queue and frees the fragment */
 278
 279 void
 280 pf_remove_fragment(struct pf_fragment *frag)
 281 {
 282         if (BUFFER_FRAGMENTS(frag)) {
 283                 RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag);
 284                 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
 285                 pool_put(&pf_frag_pl, frag);
 286         } else {
 287                 RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag);
 288                 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
 289                 pool_put(&pf_cache_pl, frag);
 290         }
 291 }
 292
 293 #define FR_IP_OFF(fr)   ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
 294 struct mbuf *
 295 pf_reassemble(struct mbuf **m0, struct pf_fragment **frag,
 296     struct pf_frent *frent, int mff)
 297 {
 298         struct mbuf     *m = *m0, *m2;
 299         struct pf_frent *frea, *next;
 300         struct pf_frent *frep = NULL;
 301         struct ip       *ip = frent->fr_ip;
 302         int              hlen = ip->ip_hl << 2;
 303         u_int16_t        off = (ip->ip_off & IP_OFFMASK) << 3;
 304         u_int16_t        ip_len = ip->ip_len - ip->ip_hl * 4;
 305         u_int16_t        max = ip_len + off;
 306
 307         KASSERT((*frag == NULL || BUFFER_FRAGMENTS(*frag)),
 308             ("! (*frag == NULL || BUFFER_FRAGMENTS(*frag)): %s", __func__));
 309
 310         /* Strip off ip header */
 311         m->m_data += hlen;
 312         m->m_len -= hlen;
 313
 314         /* Create a new reassembly queue for this packet */
 315         if (*frag == NULL) {
 316                 *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
 317                 if (*frag == NULL) {
 318                         pf_flush_fragments();
 319                         *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
 320                         if (*frag == NULL)
 321                                 goto drop_fragment;
 322                 }
 323
 324                 (*frag)->fr_flags = 0;
 325                 (*frag)->fr_max = 0;
 326                 (*frag)->fr_src = frent->fr_ip->ip_src;
 327                 (*frag)->fr_dst = frent->fr_ip->ip_dst;
 328                 (*frag)->fr_p = frent->fr_ip->ip_p;
 329                 (*frag)->fr_id = frent->fr_ip->ip_id;
 330                 (*frag)->fr_timeout = time_second;
 331                 LIST_INIT(&(*frag)->fr_queue);
 332
 333                 RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
 334                 TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
 335
 336                 /* We do not have a previous fragment */
 337                 frep = NULL;
 338                 goto insert;
 339         }
 340
 341         /*
 342          * Find a fragment after the current one:
 343          *  - off contains the real shifted offset.
 344          */
 345         LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
 346                 if (FR_IP_OFF(frea) > off)
 347                         break;
 348                 frep = frea;
 349         }
 350
 351         KASSERT((frep != NULL || frea != NULL),
 352             ("!(frep != NULL || frea != NULL): %s", __func__));
 353
 354         if (frep != NULL &&
 355             FR_IP_OFF(frep) + frep->fr_ip->ip_len - frep->fr_ip->ip_hl *
 356             4 > off)
 357         {
 358                 u_int16_t       precut;
 359
 360                 precut = FR_IP_OFF(frep) + frep->fr_ip->ip_len -
 361                     frep->fr_ip->ip_hl * 4 - off;
 362                 if (precut >= ip_len)
 363                         goto drop_fragment;
 364                 m_adj(frent->fr_m, precut);
 365                 DPFPRINTF(("overlap -%d\n", precut));
 366                 /* Enforce 8 byte boundaries */
 367                 ip->ip_off = ip->ip_off + (precut >> 3);
 368                 off = (ip->ip_off & IP_OFFMASK) << 3;
 369                 ip_len -= precut;
 370                 ip->ip_len = ip_len;
 371         }
 372
 373         for (; frea != NULL && ip_len + off > FR_IP_OFF(frea);
 374             frea = next)
 375         {
 376                 u_int16_t       aftercut;
 377
 378                 aftercut = ip_len + off - FR_IP_OFF(frea);
 379                 DPFPRINTF(("adjust overlap %d\n", aftercut));
 380                 if (aftercut < frea->fr_ip->ip_len - frea->fr_ip->ip_hl
 381                     * 4)
 382                 {
 383                         frea->fr_ip->ip_len =
 384                             frea->fr_ip->ip_len - aftercut;
 385                         frea->fr_ip->ip_off = frea->fr_ip->ip_off +
 386                             (aftercut >> 3);
 387                         m_adj(frea->fr_m, aftercut);
 388                         break;
 389                 }
 390
 391                 /* This fragment is completely overlapped, lose it */
 392                 next = LIST_NEXT(frea, fr_next);
 393                 m_freem(frea->fr_m);
 394                 LIST_REMOVE(frea, fr_next);
 395                 pool_put(&pf_frent_pl, frea);
 396                 pf_nfrents--;
 397         }
 398
 399  insert:
 400         /* Update maximum data size */
 401         if ((*frag)->fr_max < max)
 402                 (*frag)->fr_max = max;
 403         /* This is the last segment */
 404         if (!mff)
 405                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
 406
 407         if (frep == NULL)
 408                 LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
 409         else
 410                 LIST_INSERT_AFTER(frep, frent, fr_next);
 411
 412         /* Check if we are completely reassembled */
 413         if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
 414                 return (NULL);
 415
 416         /* Check if we have all the data */
 417         off = 0;
 418         for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
 419                 next = LIST_NEXT(frep, fr_next);
 420
 421                 off += frep->fr_ip->ip_len - frep->fr_ip->ip_hl * 4;
 422                 if (off < (*frag)->fr_max &&
 423                     (next == NULL || FR_IP_OFF(next) != off))
 424                 {
 425                         DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
 426                             off, next == NULL ? -1 : FR_IP_OFF(next),
 427                             (*frag)->fr_max));
 428                         return (NULL);
 429                 }
 430         }
 431         DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
 432         if (off < (*frag)->fr_max)
 433                 return (NULL);
 434
 435         /* We have all the data */
 436         frent = LIST_FIRST(&(*frag)->fr_queue);
 437         KASSERT((frent != NULL), ("frent == NULL: %s", __func__));
 438         if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) {
 439                 DPFPRINTF(("drop: too big: %d\n", off));
 440                 pf_free_fragment(*frag);
 441                 *frag = NULL;
 442                 return (NULL);
 443         }
 444         next = LIST_NEXT(frent, fr_next);
 445
 446         /* Magic from ip_input */
 447         ip = frent->fr_ip;
 448         m = frent->fr_m;
 449         m2 = m->m_next;
 450         m->m_next = NULL;
 451         m_cat(m, m2);
 452         pool_put(&pf_frent_pl, frent);
 453         pf_nfrents--;
 454         for (frent = next; frent != NULL; frent = next) {
 455                 next = LIST_NEXT(frent, fr_next);
 456
 457                 m2 = frent->fr_m;
 458                 pool_put(&pf_frent_pl, frent);
 459                 pf_nfrents--;
 460                 m_cat(m, m2);
 461         }
 462
 463         ip->ip_src = (*frag)->fr_src;
 464         ip->ip_dst = (*frag)->fr_dst;
 465
 466         /* Remove from fragment queue */
 467         pf_remove_fragment(*frag);
 468         *frag = NULL;
 469
 470         hlen = ip->ip_hl << 2;
 471         ip->ip_len = off + hlen;
 472         m->m_len += hlen;
 473         m->m_data -= hlen;
 474
 475         /* some debugging cruft by sklower, below, will go away soon */
 476         /* XXX this should be done elsewhere */
 477         if (m->m_flags & M_PKTHDR) {
 478                 int plen = 0;
 479                 for (m2 = m; m2; m2 = m2->m_next)
 480                         plen += m2->m_len;
 481                 m->m_pkthdr.len = plen;
 482         }
 483
 484         DPFPRINTF(("complete: %p(%d)\n", m, ip->ip_len));
 485         return (m);
 486
 487  drop_fragment:
 488         /* Oops - fail safe - drop packet */
 489         pool_put(&pf_frent_pl, frent);
 490         pf_nfrents--;
 491         m_freem(m);
 492         return (NULL);
 493 }
 494
 495 struct mbuf *
 496 pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
 497     int drop, int *nomem)
 498 {
 499         struct mbuf             *m = *m0;
 500         struct pf_frcache       *frp, *fra, *cur = NULL;
 501         int                      ip_len = h->ip_len - (h->ip_hl << 2);
 502         u_int16_t                off = h->ip_off << 3;
 503         u_int16_t                max = ip_len + off;
 504         int                      hosed = 0;
 505
 506         KASSERT((*frag == NULL || !BUFFER_FRAGMENTS(*frag)),
 507             ("!(*frag == NULL || !BUFFER_FRAGMENTS(*frag)): %s", __func__));
 508
 509         /* Create a new range queue for this packet */
 510         if (*frag == NULL) {
 511                 *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
 512                 if (*frag == NULL) {
 513                         pf_flush_fragments();
 514                         *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
 515                         if (*frag == NULL)
 516                                 goto no_mem;
 517                 }
 518
 519                 /* Get an entry for the queue */
 520                 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
 521                 if (cur == NULL) {
 522                         pool_put(&pf_cache_pl, *frag);
 523                         *frag = NULL;
 524                         goto no_mem;
 525                 }
 526                 pf_ncache++;
 527
 528                 (*frag)->fr_flags = PFFRAG_NOBUFFER;
 529                 (*frag)->fr_max = 0;
 530                 (*frag)->fr_src = h->ip_src;
 531                 (*frag)->fr_dst = h->ip_dst;
 532                 (*frag)->fr_p = h->ip_p;
 533                 (*frag)->fr_id = h->ip_id;
 534                 (*frag)->fr_timeout = time_second;
 535
 536                 cur->fr_off = off;
 537                 cur->fr_end = max;
 538                 LIST_INIT(&(*frag)->fr_cache);
 539                 LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
 540
 541                 RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
 542                 TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
 543
 544                 DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, max));
 545
 546                 goto pass;
 547         }
 548
 549         /*
 550          * Find a fragment after the current one:
 551          *  - off contains the real shifted offset.
 552          */
 553         frp = NULL;
 554         LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
 555                 if (fra->fr_off > off)
 556                         break;
 557                 frp = fra;
 558         }
 559
 560         KASSERT((frp != NULL || fra != NULL),
 561             ("!(frp != NULL || fra != NULL): %s", __func__));
 562
 563         if (frp != NULL) {
 564                 int     precut;
 565
 566                 precut = frp->fr_end - off;
 567                 if (precut >= ip_len) {
 568                         /* Fragment is entirely a duplicate */
 569                         DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
 570                             h->ip_id, frp->fr_off, frp->fr_end, off, max));
 571                         goto drop_fragment;
 572                 }
 573                 if (precut == 0) {
 574                         /* They are adjacent.  Fixup cache entry */
 575                         DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
 576                             h->ip_id, frp->fr_off, frp->fr_end, off, max));
 577                         frp->fr_end = max;
 578                 } else if (precut > 0) {
 579                         /* The first part of this payload overlaps with a
 580                          * fragment that has already been passed.
 581                          * Need to trim off the first part of the payload.
 582                          * But to do so easily, we need to create another
 583                          * mbuf to throw the original header into.
 584                          */
 585
 586                         DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
 587                             h->ip_id, precut, frp->fr_off, frp->fr_end, off,
 588                             max));
 589
 590                         off += precut;
 591                         max -= precut;
 592                         /* Update the previous frag to encompass this one */
 593                         frp->fr_end = max;
 594
 595                         if (!drop) {
 596                                 /* XXX Optimization opportunity
 597                                  * This is a very heavy way to trim the payload.
 598                                  * we could do it much faster by diddling mbuf
 599                                  * internals but that would be even less legible
 600                                  * than this mbuf magic.  For my next trick,
 601                                  * I'll pull a rabbit out of my laptop.
 602                                  */
 603                                 *m0 = m_dup(m, MB_DONTWAIT);
 604                                 /* From KAME Project : We have missed this! */
 605                                 m_adj(*m0, (h->ip_hl << 2) -
 606                                     (*m0)->m_pkthdr.len);
 607                                 if (*m0 == NULL)
 608                                         goto no_mem;
 609                                 KASSERT(((*m0)->m_next == NULL),
 610                                     ("(*m0)->m_next != NULL: %s",
 611                                     __func__));
 612                                 m_adj(m, precut + (h->ip_hl << 2));
 613                                 m_cat(*m0, m);
 614                                 m = *m0;
 615                                 if (m->m_flags & M_PKTHDR) {
 616                                         int plen = 0;
 617                                         struct mbuf *t;
 618                                         for (t = m; t; t = t->m_next)
 619                                                 plen += t->m_len;
 620                                         m->m_pkthdr.len = plen;
 621                                 }
 622
 623
 624                                 h = mtod(m, struct ip *);
 625
 626                                 KASSERT(((int)m->m_len ==
 627                                     h->ip_len - precut),
 628                                     ("m->m_len != h->ip_len - precut: %s",
 629                                     __func__));
 630                                 h->ip_off = h->ip_off +
 631                                     (precut >> 3);
 632                                 h->ip_len = h->ip_len - precut;
 633                         } else {
 634                                 hosed++;
 635                         }
 636                 } else {
 637                         /* There is a gap between fragments */
 638
 639                         DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
 640                             h->ip_id, -precut, frp->fr_off, frp->fr_end, off,
 641                             max));
 642
 643                         cur = pool_get(&pf_cent_pl, PR_NOWAIT);
 644                         if (cur == NULL)
 645                                 goto no_mem;
 646                         pf_ncache++;
 647
 648                         cur->fr_off = off;
 649                         cur->fr_end = max;
 650                         LIST_INSERT_AFTER(frp, cur, fr_next);
 651                 }
 652         }
 653
 654         if (fra != NULL) {
 655                 int     aftercut;
 656                 int     merge = 0;
 657
 658                 aftercut = max - fra->fr_off;
 659                 if (aftercut == 0) {
 660                         /* Adjacent fragments */
 661                         DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
 662                             h->ip_id, off, max, fra->fr_off, fra->fr_end));
 663                         fra->fr_off = off;
 664                         merge = 1;
 665                 } else if (aftercut > 0) {
 666                         /* Need to chop off the tail of this fragment */
 667                         DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
 668                             h->ip_id, aftercut, off, max, fra->fr_off,
 669                             fra->fr_end));
 670                         fra->fr_off = off;
 671                         max -= aftercut;
 672
 673                         merge = 1;
 674
 675                         if (!drop) {
 676                                 m_adj(m, -aftercut);
 677                                 if (m->m_flags & M_PKTHDR) {
 678                                         int plen = 0;
 679                                         struct mbuf *t;
 680                                         for (t = m; t; t = t->m_next)
 681                                                 plen += t->m_len;
 682                                         m->m_pkthdr.len = plen;
 683                                 }
 684                                 h = mtod(m, struct ip *);
 685                                 KASSERT(((int)m->m_len == h->ip_len - aftercut),
 686                                     ("m->m_len != h->ip_len - aftercut: %s",
 687                                     __func__));
 688                                 h->ip_len = h->ip_len - aftercut;
 689                         } else {
 690                                 hosed++;
 691                         }
 692                 } else if (frp == NULL) {
 693                         /* There is a gap between fragments */
 694                         DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
 695                             h->ip_id, -aftercut, off, max, fra->fr_off,
 696                             fra->fr_end));
 697
 698                         cur = pool_get(&pf_cent_pl, PR_NOWAIT);
 699                         if (cur == NULL)
 700                                 goto no_mem;
 701                         pf_ncache++;
 702
 703                         cur->fr_off = off;
 704                         cur->fr_end = max;
 705                         LIST_INSERT_BEFORE(fra, cur, fr_next);
 706                 }
 707
 708
 709                 /* Need to glue together two separate fragment descriptors */
 710                 if (merge) {
 711                         if (cur && fra->fr_off <= cur->fr_end) {
 712                                 /* Need to merge in a previous 'cur' */
 713                                 DPFPRINTF(("fragcache[%d]: adjacent(merge "
 714                                     "%d-%d) %d-%d (%d-%d)\n",
 715                                     h->ip_id, cur->fr_off, cur->fr_end, off,
 716                                     max, fra->fr_off, fra->fr_end));
 717                                 fra->fr_off = cur->fr_off;
 718                                 LIST_REMOVE(cur, fr_next);
 719                                 pool_put(&pf_cent_pl, cur);
 720                                 pf_ncache--;
 721                                 cur = NULL;
 722
 723                         } else if (frp && fra->fr_off <= frp->fr_end) {
 724                                 /* Need to merge in a modified 'frp' */
 725                                 KASSERT((cur == NULL), ("cur != NULL: %s",
 726                                     __func__));
 727                                 DPFPRINTF(("fragcache[%d]: adjacent(merge "
 728                                     "%d-%d) %d-%d (%d-%d)\n",
 729                                     h->ip_id, frp->fr_off, frp->fr_end, off,
 730                                     max, fra->fr_off, fra->fr_end));
 731                                 fra->fr_off = frp->fr_off;
 732                                 LIST_REMOVE(frp, fr_next);
 733                                 pool_put(&pf_cent_pl, frp);
 734                                 pf_ncache--;
 735                                 frp = NULL;
 736
 737                         }
 738                 }
 739         }
 740
 741         if (hosed) {
 742                 /*
 743                  * We must keep tracking the overall fragment even when
 744                  * we're going to drop it anyway so that we know when to
 745                  * free the overall descriptor.  Thus we drop the frag late.
 746                  */
 747                 goto drop_fragment;
 748         }
 749
 750
 751  pass:
 752         /* Update maximum data size */
 753         if ((*frag)->fr_max < max)
 754                 (*frag)->fr_max = max;
 755
 756         /* This is the last segment */
 757         if (!mff)
 758                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
 759
 760         /* Check if we are completely reassembled */
 761         if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
 762             LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 &&
 763             LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) {
 764                 /* Remove from fragment queue */
 765                 DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id,
 766                     (*frag)->fr_max));
 767                 pf_free_fragment(*frag);
 768                 *frag = NULL;
 769         }
 770
 771         return (m);
 772
 773  no_mem:
 774         *nomem = 1;
 775
 776         /* Still need to pay attention to !IP_MF */
 777         if (!mff && *frag != NULL)
 778                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
 779
 780         m_freem(m);
 781         return (NULL);
 782
 783  drop_fragment:
 784
 785         /* Still need to pay attention to !IP_MF */
 786         if (!mff && *frag != NULL)
 787                 (*frag)->fr_flags |= PFFRAG_SEENLAST;
 788
 789         if (drop) {
 790                 /* This fragment has been deemed bad.  Don't reass */
 791                 if (((*frag)->fr_flags & PFFRAG_DROP) == 0)
 792                         DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
 793                             h->ip_id));
 794                 (*frag)->fr_flags |= PFFRAG_DROP;
 795         }
 796
 797         m_freem(m);
 798         return (NULL);
 799 }
 800
 801 int
 802 pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
 803     struct pf_pdesc *pd)
 804 {
 805         struct mbuf             *m = *m0;
 806         struct pf_rule          *r;
 807         struct pf_frent         *frent;
 808         struct pf_fragment      *frag = NULL;
 809         struct ip               *h = mtod(m, struct ip *);
 810         int                      mff = (h->ip_off & IP_MF);
 811         int                      hlen = h->ip_hl << 2;
 812         u_int16_t                fragoff = (h->ip_off & IP_OFFMASK) << 3;
 813         u_int16_t                max;
 814         int                      ip_len;
 815         int                      ip_off;
 816
 817         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
 818         while (r != NULL) {
 819                 r->evaluations++;
 820                 if (pfi_kif_match(r->kif, kif) == r->ifnot)
 821                         r = r->skip[PF_SKIP_IFP].ptr;
 822                 else if (r->direction && r->direction != dir)
 823                         r = r->skip[PF_SKIP_DIR].ptr;
 824                 else if (r->af && r->af != AF_INET)
 825                         r = r->skip[PF_SKIP_AF].ptr;
 826                 else if (r->proto && r->proto != h->ip_p)
 827                         r = r->skip[PF_SKIP_PROTO].ptr;
 828                 else if (PF_MISMATCHAW(&r->src.addr,
 829                     (struct pf_addr *)&h->ip_src.s_addr, AF_INET,
 830                     r->src.neg, kif))
 831                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
 832                 else if (PF_MISMATCHAW(&r->dst.addr,
 833                     (struct pf_addr *)&h->ip_dst.s_addr, AF_INET,
 834                     r->dst.neg, NULL))
 835                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
 836                 else
 837                         break;
 838         }
 839
 840         if (r == NULL || r->action == PF_NOSCRUB)
 841                 return (PF_PASS);
 842         else {
 843                 r->packets[dir == PF_OUT]++;
 844                 r->bytes[dir == PF_OUT] += pd->tot_len;
 845         }
 846
 847         /* Check for illegal packets */
 848         if (hlen < (int)sizeof(struct ip))
 849                 goto drop;
 850
 851         if (hlen > h->ip_len)
 852                 goto drop;
 853
 854         /* Clear IP_DF if the rule uses the no-df option */
 855         if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
 856                 u_int16_t ip_off = h->ip_off;
 857
 858                 h->ip_off &= ~IP_DF;
 859                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
 860         }
 861
 862         /* We will need other tests here */
 863         if (!fragoff && !mff)
 864                 goto no_fragment;
 865
 866         /* We're dealing with a fragment now. Don't allow fragments
 867          * with IP_DF to enter the cache. If the flag was cleared by
 868          * no-df above, fine. Otherwise drop it.
 869          */
 870         if (h->ip_off & IP_DF) {
 871                 DPFPRINTF(("IP_DF\n"));
 872                 goto bad;
 873         }
 874
 875         ip_len = h->ip_len - hlen;
 876         ip_off = (h->ip_off & IP_OFFMASK) << 3;
 877
 878         /* All fragments are 8 byte aligned */
 879         if (mff && (ip_len & 0x7)) {
 880                 DPFPRINTF(("mff and %d\n", ip_len));
 881                 goto bad;
 882         }
 883
 884         /* Respect maximum length */
 885         if (fragoff + ip_len > IP_MAXPACKET) {
 886                 DPFPRINTF(("max packet %d\n", fragoff + ip_len));
 887                 goto bad;
 888         }
 889         max = fragoff + ip_len;
 890
 891         if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
 892                 /* Fully buffer all of the fragments */
 893
 894                 frag = pf_find_fragment(h, &pf_frag_tree);
 895
 896                 /* Check if we saw the last fragment already */
 897                 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
 898                     max > frag->fr_max)
 899                         goto bad;
 900
 901                 /* Get an entry for the fragment queue */
 902                 frent = pool_get(&pf_frent_pl, PR_NOWAIT);
 903                 if (frent == NULL) {
 904                         REASON_SET(reason, PFRES_MEMORY);
 905                         return (PF_DROP);
 906                 }
 907                 pf_nfrents++;
 908                 frent->fr_ip = h;
 909                 frent->fr_m = m;
 910
 911                 /* Might return a completely reassembled mbuf, or NULL */
 912                 DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max));
 913                 *m0 = m = pf_reassemble(m0, &frag, frent, mff);
 914
 915                 if (m == NULL)
 916                         return (PF_DROP);
 917
 918                 /* use mtag from concatenated mbuf chain */
 919                 pd->pf_mtag = pf_find_mtag(m);
 920 #ifdef DIAGNOSTIC
 921                 if (pd->pf_mtag == NULL) {
 922                         kprintf("%s: pf_find_mtag returned NULL(1)\n", __func__);
 923                         if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
 924                                 m_freem(m);
 925                                 *m0 = NULL;
 926                                 goto no_mem;
 927                         }
 928                 }
 929 #endif
 930                 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
 931                         goto drop;
 932
 933                 h = mtod(m, struct ip *);
 934         } else {
 935                 /* non-buffering fragment cache (drops or masks overlaps) */
 936                 int     nomem = 0;
 937
 938                 if (dir == PF_OUT && pd->pf_mtag->flags & PF_TAG_FRAGCACHE) {
 939                         /*
 940                          * Already passed the fragment cache in the
 941                          * input direction.  If we continued, it would
 942                          * appear to be a dup and would be dropped.
 943                          */
 944                         goto fragment_pass;
 945                 }
 946
 947                 frag = pf_find_fragment(h, &pf_cache_tree);
 948
 949                 /* Check if we saw the last fragment already */
 950                 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
 951                     max > frag->fr_max) {
 952                         if (r->rule_flag & PFRULE_FRAGDROP)
 953                                 frag->fr_flags |= PFFRAG_DROP;
 954                         goto bad;
 955                 }
 956
 957                 *m0 = m = pf_fragcache(m0, h, &frag, mff,
 958                     (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
 959                 if (m == NULL) {
 960                         if (nomem)
 961                                 goto no_mem;
 962                         goto drop;
 963                 }
 964
 965                 /* use mtag from copied and trimmed mbuf chain */
 966                 pd->pf_mtag = pf_find_mtag(m);
 967 #ifdef DIAGNOSTIC
 968                 if (pd->pf_mtag == NULL) {
 969                         kprintf("%s: pf_find_mtag returned NULL(2)\n", __func__);
 970                         if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
 971                                 m_freem(m);
 972                                 *m0 = NULL;
 973                                 goto no_mem;
 974                         }
 975                 }
 976 #endif
 977                 if (dir == PF_IN)
 978                         pd->pf_mtag->flags |= PF_TAG_FRAGCACHE;
 979
 980                 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
 981                         goto drop;
 982                 goto fragment_pass;
 983         }
 984
 985  no_fragment:
 986         /* At this point, only IP_DF is allowed in ip_off */
 987         if (h->ip_off & IP_DF) {
 988                 u_int16_t ip_off = h->ip_off;
 989
 990                 h->ip_off &= IP_DF;
 991                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
 992         }
 993
 994         /* Enforce a minimum ttl, may cause endless packet loops */
 995         if (r->min_ttl && h->ip_ttl < r->min_ttl) {
 996                 u_int16_t ip_ttl = h->ip_ttl;
 997
 998                 h->ip_ttl = r->min_ttl;
 999                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
1000         }
1001
1002         if (r->rule_flag & PFRULE_RANDOMID) {
1003                 u_int16_t ip_id = h->ip_id;
1004
1005                 h->ip_id = ip_randomid();
1006                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0);
1007         }
1008         if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
1009                 pd->flags |= PFDESC_IP_REAS;
1010
1011         return (PF_PASS);
1012
1013  fragment_pass:
1014         /* Enforce a minimum ttl, may cause endless packet loops */
1015         if (r->min_ttl && h->ip_ttl < r->min_ttl) {
1016                 u_int16_t ip_ttl = h->ip_ttl;
1017
1018                 h->ip_ttl = r->min_ttl;
1019                 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
1020         }
1021         if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
1022                 pd->flags |= PFDESC_IP_REAS;
1023         return (PF_PASS);
1024
1025  no_mem:
1026         REASON_SET(reason, PFRES_MEMORY);
1027         if (r != NULL && r->log)
1028                 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1029         return (PF_DROP);
1030
1031  drop:
1032         REASON_SET(reason, PFRES_NORM);
1033         if (r != NULL && r->log)
1034                 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1035         return (PF_DROP);
1036
1037  bad:
1038         DPFPRINTF(("dropping bad fragment\n"));
1039
1040         /* Free associated fragments */
1041         if (frag != NULL)
1042                 pf_free_fragment(frag);
1043
1044         REASON_SET(reason, PFRES_FRAG);
1045         if (r != NULL && r->log)
1046                 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1047
1048         return (PF_DROP);
1049 }
1050
1051 #ifdef INET6
1052 int
1053 pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
1054     u_short *reason, struct pf_pdesc *pd)
1055 {
1056         struct mbuf             *m = *m0;
1057         struct pf_rule          *r;
1058         struct ip6_hdr          *h = mtod(m, struct ip6_hdr *);
1059         int                      off;
1060         struct ip6_ext           ext;
1061         struct ip6_opt           opt;
1062         struct ip6_opt_jumbo     jumbo;
1063         struct ip6_frag          frag;
1064         u_int32_t                jumbolen = 0, plen;
1065         u_int16_t                fragoff = 0;
1066         int                      optend;
1067         int                      ooff;
1068         u_int8_t                 proto;
1069         int                      terminal;
1070
1071         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1072         while (r != NULL) {
1073                 r->evaluations++;
1074                 if (pfi_kif_match(r->kif, kif) == r->ifnot)
1075                         r = r->skip[PF_SKIP_IFP].ptr;
1076                 else if (r->direction && r->direction != dir)
1077                         r = r->skip[PF_SKIP_DIR].ptr;
1078                 else if (r->af && r->af != AF_INET6)
1079                         r = r->skip[PF_SKIP_AF].ptr;
1080 #if 0 /* header chain! */
1081                 else if (r->proto && r->proto != h->ip6_nxt)
1082                         r = r->skip[PF_SKIP_PROTO].ptr;
1083 #endif
1084                 else if (PF_MISMATCHAW(&r->src.addr,
1085                     (struct pf_addr *)&h->ip6_src, AF_INET6,
1086                     r->src.neg, kif))
1087                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1088                 else if (PF_MISMATCHAW(&r->dst.addr,
1089                     (struct pf_addr *)&h->ip6_dst, AF_INET6,
1090                     r->dst.neg, NULL))
1091                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
1092                 else
1093                         break;
1094         }
1095
1096         if (r == NULL || r->action == PF_NOSCRUB)
1097                 return (PF_PASS);
1098         else {
1099                 r->packets[dir == PF_OUT]++;
1100                 r->bytes[dir == PF_OUT] += pd->tot_len;
1101         }
1102
1103         /* Check for illegal packets */
1104         if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len)
1105                 goto drop;
1106
1107         off = sizeof(struct ip6_hdr);
1108         proto = h->ip6_nxt;
1109         terminal = 0;
1110         do {
1111                 switch (proto) {
1112                 case IPPROTO_FRAGMENT:
1113                         goto fragment;
1114                         break;
1115                 case IPPROTO_AH:
1116                 case IPPROTO_ROUTING:
1117                 case IPPROTO_DSTOPTS:
1118                         if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
1119                             NULL, AF_INET6))
1120                                 goto shortpkt;
1121                         if (proto == IPPROTO_AH)
1122                                 off += (ext.ip6e_len + 2) * 4;
1123                         else
1124                                 off += (ext.ip6e_len + 1) * 8;
1125                         proto = ext.ip6e_nxt;
1126                         break;
1127                 case IPPROTO_HOPOPTS:
1128                         if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
1129                             NULL, AF_INET6))
1130                                 goto shortpkt;
1131                         optend = off + (ext.ip6e_len + 1) * 8;
1132                         ooff = off + sizeof(ext);
1133                         do {
1134                                 if (!pf_pull_hdr(m, ooff, &opt.ip6o_type,
1135                                     sizeof(opt.ip6o_type), NULL, NULL,
1136                                     AF_INET6))
1137                                         goto shortpkt;
1138                                 if (opt.ip6o_type == IP6OPT_PAD1) {
1139                                         ooff++;
1140                                         continue;
1141                                 }
1142                                 if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt),
1143                                     NULL, NULL, AF_INET6))
1144                                         goto shortpkt;
1145                                 if (ooff + sizeof(opt) + opt.ip6o_len > optend)
1146                                         goto drop;
1147                                 switch (opt.ip6o_type) {
1148                                 case IP6OPT_JUMBO:
1149                                         if (h->ip6_plen != 0)
1150                                                 goto drop;
1151                                         if (!pf_pull_hdr(m, ooff, &jumbo,
1152                                             sizeof(jumbo), NULL, NULL,
1153                                             AF_INET6))
1154                                                 goto shortpkt;
1155                                         memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
1156                                             sizeof(jumbolen));
1157                                         jumbolen = ntohl(jumbolen);
1158                                         if (jumbolen <= IPV6_MAXPACKET)
1159                                                 goto drop;
1160                                         if (sizeof(struct ip6_hdr) + jumbolen !=
1161                                             m->m_pkthdr.len)
1162                                                 goto drop;
1163                                         break;
1164                                 default:
1165                                         break;
1166                                 }
1167                                 ooff += sizeof(opt) + opt.ip6o_len;
1168                         } while (ooff < optend);
1169
1170                         off = optend;
1171                         proto = ext.ip6e_nxt;
1172                         break;
1173                 default:
1174                         terminal = 1;
1175                         break;
1176                 }
1177         } while (!terminal);
1178
1179         /* jumbo payload option must be present, or plen > 0 */
1180         if (ntohs(h->ip6_plen) == 0)
1181                 plen = jumbolen;
1182         else
1183                 plen = ntohs(h->ip6_plen);
1184         if (plen == 0)
1185                 goto drop;
1186         if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
1187                 goto shortpkt;
1188
1189         /* Enforce a minimum ttl, may cause endless packet loops */
1190         if (r->min_ttl && h->ip6_hlim < r->min_ttl)
1191                 h->ip6_hlim = r->min_ttl;
1192
1193         return (PF_PASS);
1194
1195  fragment:
1196         if (ntohs(h->ip6_plen) == 0 || jumbolen)
1197                 goto drop;
1198         plen = ntohs(h->ip6_plen);
1199
1200         if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6))
1201                 goto shortpkt;
1202         fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK);
1203         if (fragoff + (plen - off - sizeof(frag)) > IPV6_MAXPACKET)
1204                 goto badfrag;
1205
1206         /* do something about it */
1207         /* remember to set pd->flags |= PFDESC_IP_REAS */
1208         return (PF_PASS);
1209
1210  shortpkt:
1211         REASON_SET(reason, PFRES_SHORT);
1212         if (r != NULL && r->log)
1213                 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
1214         return (PF_DROP);
1215
1216  drop:
1217         REASON_SET(reason, PFRES_NORM);
1218         if (r != NULL && r->log)
1219                 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
1220         return (PF_DROP);
1221
1222  badfrag:
1223         REASON_SET(reason, PFRES_FRAG);
1224         if (r != NULL && r->log)
1225                 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
1226         return (PF_DROP);
1227 }
1228 #endif /* INET6 */
1229
1230 int
1231 pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff,
1232     int off, void *h, struct pf_pdesc *pd)
1233 {
1234         struct pf_rule  *r, *rm = NULL;
1235         struct tcphdr   *th = pd->hdr.tcp;
1236         int              rewrite = 0;
1237         u_short          reason;
1238         u_int8_t         flags;
1239         sa_family_t      af = pd->af;
1240
1241         r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1242         while (r != NULL) {
1243                 r->evaluations++;
1244                 if (pfi_kif_match(r->kif, kif) == r->ifnot)
1245                         r = r->skip[PF_SKIP_IFP].ptr;
1246                 else if (r->direction && r->direction != dir)
1247                         r = r->skip[PF_SKIP_DIR].ptr;
1248                 else if (r->af && r->af != af)
1249                         r = r->skip[PF_SKIP_AF].ptr;
1250                 else if (r->proto && r->proto != pd->proto)
1251                         r = r->skip[PF_SKIP_PROTO].ptr;
1252                 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
1253                     r->src.neg, kif))
1254                         r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1255                 else if (r->src.port_op && !pf_match_port(r->src.port_op,
1256                             r->src.port[0], r->src.port[1], th->th_sport))
1257                         r = r->skip[PF_SKIP_SRC_PORT].ptr;
1258                 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
1259                     r->dst.neg, NULL))
1260                         r = r->skip[PF_SKIP_DST_ADDR].ptr;
1261                 else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
1262                             r->dst.port[0], r->dst.port[1], th->th_dport))
1263                         r = r->skip[PF_SKIP_DST_PORT].ptr;
1264                 else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
1265                             pf_osfp_fingerprint(pd, m, off, th),
1266                             r->os_fingerprint))
1267                         r = TAILQ_NEXT(r, entries);
1268                 else {
1269                         rm = r;
1270                         break;
1271                 }
1272         }
1273
1274         if (rm == NULL || rm->action == PF_NOSCRUB)
1275                 return (PF_PASS);
1276         else {
1277                 r->packets[dir == PF_OUT]++;
1278                 r->bytes[dir == PF_OUT] += pd->tot_len;
1279         }
1280
1281         if (rm->rule_flag & PFRULE_REASSEMBLE_TCP)
1282                 pd->flags |= PFDESC_TCP_NORM;
1283
1284         flags = th->th_flags;
1285         if (flags & TH_SYN) {
1286                 /* Illegal packet */
1287                 if (flags & TH_RST)
1288                         goto tcp_drop;
1289
1290                 if (flags & TH_FIN)
1291                         flags &= ~TH_FIN;
1292         } else {
1293                 /* Illegal packet */
1294                 if (!(flags & (TH_ACK|TH_RST)))
1295                         goto tcp_drop;
1296         }
1297
1298         if (!(flags & TH_ACK)) {
1299                 /* These flags are only valid if ACK is set */
1300                 if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG))
1301                         goto tcp_drop;
1302         }
1303
1304         /* Check for illegal header length */
1305         if (th->th_off < (sizeof(struct tcphdr) >> 2))
1306                 goto tcp_drop;
1307
1308         /* If flags changed, or reserved data set, then adjust */
1309         if (flags != th->th_flags || th->th_x2 != 0) {
1310                 u_int16_t       ov, nv;
1311
1312                 ov = *(u_int16_t *)(&th->th_ack + 1);
1313                 th->th_flags = flags;
1314                 th->th_x2 = 0;
1315                 nv = *(u_int16_t *)(&th->th_ack + 1);
1316
1317                 th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0);
1318                 rewrite = 1;
1319         }
1320
1321         /* Remove urgent pointer, if TH_URG is not set */
1322         if (!(flags & TH_URG) && th->th_urp) {
1323                 th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0);
1324                 th->th_urp = 0;
1325                 rewrite = 1;
1326         }
1327
1328         /* Process options */
1329         if (r->max_mss && pf_normalize_tcpopt(r, m, th, off))
1330                 rewrite = 1;
1331
1332         /* copy back packet headers if we sanitized */
1333         if (rewrite)
1334                 m_copyback(m, off, sizeof(*th), (caddr_t)th);
1335
1336         return (PF_PASS);
1337
1338  tcp_drop:
1339         REASON_SET(&reason, PFRES_NORM);
1340         if (rm != NULL && r->log)
1341                 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL, pd);
1342         return (PF_DROP);
1343 }
1344
1345 int
1346 pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd,
1347     struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst)
1348 {
1349         u_int32_t tsval, tsecr;
1350         u_int8_t hdr[60];
1351         u_int8_t *opt;
1352
1353         KASSERT((src->scrub == NULL),
1354             ("pf_normalize_tcp_init: src->scrub != NULL"));
1355
1356         src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
1357         if (src->scrub == NULL)
1358                 return (1);
1359         bzero(src->scrub, sizeof(*src->scrub));
1360
1361         switch (pd->af) {
1362 #ifdef INET
1363         case AF_INET: {
1364                 struct ip *h = mtod(m, struct ip *);
1365                 src->scrub->pfss_ttl = h->ip_ttl;
1366                 break;
1367         }
1368 #endif /* INET */
1369 #ifdef INET6
1370         case AF_INET6: {
1371                 struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1372                 src->scrub->pfss_ttl = h->ip6_hlim;
1373                 break;
1374         }
1375 #endif /* INET6 */
1376         }
1377
1378
1379         /*
1380          * All normalizations below are only begun if we see the start of
1381          * the connections.  They must all set an enabled bit in pfss_flags
1382          */
1383         if ((th->th_flags & TH_SYN) == 0)
1384                 return (0);
1385
1386
1387         if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub &&
1388             pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
1389                 /* Diddle with TCP options */
1390                 int hlen;
1391                 opt = hdr + sizeof(struct tcphdr);
1392                 hlen = (th->th_off << 2) - sizeof(struct tcphdr);
1393                 while (hlen >= TCPOLEN_TIMESTAMP) {
1394                         switch (*opt) {
1395                         case TCPOPT_EOL:        /* FALLTHROUGH */
1396                         case TCPOPT_NOP:
1397                                 opt++;
1398                                 hlen--;
1399                                 break;
1400                         case TCPOPT_TIMESTAMP:
1401                                 if (opt[1] >= TCPOLEN_TIMESTAMP) {
1402                                         src->scrub->pfss_flags |=
1403                                             PFSS_TIMESTAMP;
1404                                         src->scrub->pfss_ts_mod = karc4random();
1405
1406                                         /* note PFSS_PAWS not set yet */
1407                                         memcpy(&tsval, &opt[2],
1408                                             sizeof(u_int32_t));
1409                                         memcpy(&tsecr, &opt[6],
1410                                             sizeof(u_int32_t));
1411                                         src->scrub->pfss_tsval0 = ntohl(tsval);
1412                                         src->scrub->pfss_tsval = ntohl(tsval);
1413                                         src->scrub->pfss_tsecr = ntohl(tsecr);
1414                                         getmicrouptime(&src->scrub->pfss_last);
1415                                 }
1416                                 /* FALLTHROUGH */
1417                         default:
1418                                 hlen -= MAX(opt[1], 2);
1419                                 opt += MAX(opt[1], 2);
1420                                 break;
1421                         }
1422                 }
1423         }
1424
1425         return (0);
1426 }
1427
1428 void
1429 pf_normalize_tcp_cleanup(struct pf_state *state)
1430 {
1431         if (state->src.scrub)
1432                 pool_put(&pf_state_scrub_pl, state->src.scrub);
1433         if (state->dst.scrub)
1434                 pool_put(&pf_state_scrub_pl, state->dst.scrub);
1435
1436         /* Someday... flush the TCP segment reassembly descriptors. */
1437 }
1438
1439 int
1440 pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
1441     u_short *reason, struct tcphdr *th, struct pf_state *state,
1442     struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback)
1443 {
1444         struct timeval uptime;
1445         u_int32_t tsval, tsecr;
1446         u_int tsval_from_last;
1447         u_int8_t hdr[60];
1448         u_int8_t *opt;
1449         int copyback = 0;
1450         int got_ts = 0;
1451
1452         KASSERT((src->scrub || dst->scrub),
1453             ("pf_normalize_tcp_statefull: src->scrub && dst->scrub!"));
1454
1455         /*
1456          * Enforce the minimum TTL seen for this connection.  Negate a common
1457          * technique to evade an intrusion detection system and confuse
1458          * firewall state code.
1459          */
1460         switch (pd->af) {
1461 #ifdef INET
1462         case AF_INET: {
1463                 if (src->scrub) {
1464                         struct ip *h = mtod(m, struct ip *);
1465                         if (h->ip_ttl > src->scrub->pfss_ttl)
1466                                 src->scrub->pfss_ttl = h->ip_ttl;
1467                         h->ip_ttl = src->scrub->pfss_ttl;
1468                 }
1469                 break;
1470         }
1471 #endif /* INET */
1472 #ifdef INET6
1473         case AF_INET6: {
1474                 if (src->scrub) {
1475                         struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1476                         if (h->ip6_hlim > src->scrub->pfss_ttl)
1477                                 src->scrub->pfss_ttl = h->ip6_hlim;
1478                         h->ip6_hlim = src->scrub->pfss_ttl;
1479                 }
1480                 break;
1481         }
1482 #endif /* INET6 */
1483         }
1484
1485         if (th->th_off > (sizeof(struct tcphdr) >> 2) &&
1486             ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
1487             (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
1488             pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
1489                 /* Diddle with TCP options */
1490                 int hlen;
1491                 opt = hdr + sizeof(struct tcphdr);
1492                 hlen = (th->th_off << 2) - sizeof(struct tcphdr);
1493                 while (hlen >= TCPOLEN_TIMESTAMP) {
1494                         switch (*opt) {
1495                         case TCPOPT_EOL:        /* FALLTHROUGH */
1496                         case TCPOPT_NOP:
1497                                 opt++;
1498                                 hlen--;
1499                                 break;
1500                         case TCPOPT_TIMESTAMP:
1501                                 /* Modulate the timestamps.  Can be used for
1502                                  * NAT detection, OS uptime determination or
1503                                  * reboot detection.
1504                                  */
1505
1506                                 if (got_ts) {
1507                                         /* Huh?  Multiple timestamps!? */
1508                                         if (pf_status.debug >= PF_DEBUG_MISC) {
1509                                                 DPFPRINTF(("multiple TS??"));
1510                                                 pf_print_state(state);
1511                                                 kprintf("\n");
1512                                         }
1513                                         REASON_SET(reason, PFRES_TS);
1514                                         return (PF_DROP);
1515                                 }
1516                                 if (opt[1] >= TCPOLEN_TIMESTAMP) {
1517                                         memcpy(&tsval, &opt[2],
1518                                             sizeof(u_int32_t));
1519                                         if (tsval && src->scrub &&
1520                                             (src->scrub->pfss_flags &
1521                                             PFSS_TIMESTAMP)) {
1522                                                 tsval = ntohl(tsval);
1523                                                 pf_change_a(&opt[2],
1524                                                     &th->th_sum,
1525                                                     htonl(tsval +
1526                                                     src->scrub->pfss_ts_mod),
1527                                                     0);
1528                                                 copyback = 1;
1529                                         }
1530
1531                                         /* Modulate TS reply iff valid (!0) */
1532                                         memcpy(&tsecr, &opt[6],
1533                                             sizeof(u_int32_t));
1534                                         if (tsecr && dst->scrub &&
1535                                             (dst->scrub->pfss_flags &
1536                                             PFSS_TIMESTAMP)) {
1537                                                 tsecr = ntohl(tsecr)
1538                                                     - dst->scrub->pfss_ts_mod;
1539                                                 pf_change_a(&opt[6],
1540                                                     &th->th_sum, htonl(tsecr),
1541                                                     0);
1542                                                 copyback = 1;
1543                                         }
1544                                         got_ts = 1;
1545                                 }
1546                                 /* FALLTHROUGH */
1547                         default:
1548                                 hlen -= MAX(opt[1], 2);
1549                                 opt += MAX(opt[1], 2);
1550                                 break;
1551                         }
1552                 }
1553                 if (copyback) {
1554                         /* Copyback the options, caller copys back header */
1555                         *writeback = 1;
1556                         m_copyback(m, off + sizeof(struct tcphdr),
1557                             (th->th_off << 2) - sizeof(struct tcphdr), hdr +
1558                             sizeof(struct tcphdr));
1559                 }
1560         }
1561
1562
1563         /*
1564          * Must invalidate PAWS checks on connections idle for too long.
1565          * The fastest allowed timestamp clock is 1ms.  That turns out to
1566          * be about 24 days before it wraps.  XXX Right now our lowerbound
1567          * TS echo check only works for the first 12 days of a connection
1568          * when the TS has exhausted half its 32bit space
1569          */
1570 #define TS_MAX_IDLE     (24*24*60*60)
1571 #define TS_MAX_CONN     (12*24*60*60)   /* XXX remove when better tsecr check */
1572
1573         getmicrouptime(&uptime);
1574         if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
1575             (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
1576             time_second - state->creation > TS_MAX_CONN))  {
1577                 if (pf_status.debug >= PF_DEBUG_MISC) {
1578                         DPFPRINTF(("src idled out of PAWS\n"));
1579                         pf_print_state(state);
1580                         kprintf("\n");
1581                 }
1582                 src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS)
1583                     | PFSS_PAWS_IDLED;
1584         }
1585         if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
1586             uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
1587                 if (pf_status.debug >= PF_DEBUG_MISC) {
1588                         DPFPRINTF(("dst idled out of PAWS\n"));
1589                         pf_print_state(state);
1590                         kprintf("\n");
1591                 }
1592                 dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS)
1593                     | PFSS_PAWS_IDLED;
1594         }
1595
1596         if (got_ts && src->scrub && dst->scrub &&
1597             (src->scrub->pfss_flags & PFSS_PAWS) &&
1598             (dst->scrub->pfss_flags & PFSS_PAWS)) {
1599                 /* Validate that the timestamps are "in-window".
1600                  * RFC1323 describes TCP Timestamp options that allow
1601                  * measurement of RTT (round trip time) and PAWS
1602                  * (protection against wrapped sequence numbers).  PAWS
1603                  * gives us a set of rules for rejecting packets on
1604                  * long fat pipes (packets that were somehow delayed
1605                  * in transit longer than the time it took to send the
1606                  * full TCP sequence space of 4Gb).  We can use these
1607                  * rules and infer a few others that will let us treat
1608                  * the 32bit timestamp and the 32bit echoed timestamp
1609                  * as sequence numbers to prevent a blind attacker from
1610                  * inserting packets into a connection.
1611                  *
1612                  * RFC1323 tells us:
1613                  *  - The timestamp on this packet must be greater than
1614                  *    or equal to the last value echoed by the other
1615                  *    endpoint.  The RFC says those will be discarded
1616                  *    since it is a dup that has already been acked.
1617                  *    This gives us a lowerbound on the timestamp.
1618                  *        timestamp >= other last echoed timestamp
1619                  *  - The timestamp will be less than or equal to
1620                  *    the last timestamp plus the time between the
1621                  *    last packet and now.  The RFC defines the max
1622                  *    clock rate as 1ms.  We will allow clocks to be
1623                  *    up to 10% fast and will allow a total difference
1624                  *    or 30 seconds due to a route change.  And this
1625                  *    gives us an upperbound on the timestamp.
1626                  *        timestamp <= last timestamp + max ticks
1627                  *    We have to be careful here.  Windows will send an
1628                  *    initial timestamp of zero and then initialize it
1629                  *    to a random value after the 3whs; presumably to
1630                  *    avoid a DoS by having to call an expensive RNG
1631                  *    during a SYN flood.  Proof MS has at least one
1632                  *    good security geek.
1633                  *
1634                  *  - The TCP timestamp option must also echo the other
1635                  *    endpoints timestamp.  The timestamp echoed is the
1636                  *    one carried on the earliest unacknowledged segment
1637                  *    on the left edge of the sequence window.  The RFC
1638                  *    states that the host will reject any echoed
1639                  *    timestamps that were larger than any ever sent.
1640                  *    This gives us an upperbound on the TS echo.
1641                  *        tescr <= largest_tsval
1642                  *  - The lowerbound on the TS echo is a little more
1643                  *    tricky to determine.  The other endpoint's echoed
1644                  *    values will not decrease.  But there may be
1645                  *    network conditions that re-order packets and
1646                  *    cause our view of them to decrease.  For now the
1647                  *    only lowerbound we can safely determine is that
1648                  *    the TS echo will never be less than the orginal
1649                  *    TS.  XXX There is probably a better lowerbound.
1650                  *    Remove TS_MAX_CONN with better lowerbound check.
1651                  *        tescr >= other original TS
1652                  *
1653                  * It is also important to note that the fastest
1654                  * timestamp clock of 1ms will wrap its 32bit space in
1655                  * 24 days.  So we just disable TS checking after 24
1656                  * days of idle time.  We actually must use a 12d
1657                  * connection limit until we can come up with a better
1658                  * lowerbound to the TS echo check.
1659                  */
1660                 struct timeval delta_ts;
1661                 int ts_fudge;
1662
1663
1664                 /*
1665                  * PFTM_TS_DIFF is how many seconds of leeway to allow
1666                  * a host's timestamp.  This can happen if the previous
1667                  * packet got delayed in transit for much longer than
1668                  * this packet.
1669                  */
1670                 if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0)
1671                         ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF];
1672
1673
1674                 /* Calculate max ticks since the last timestamp */
1675 #define TS_MAXFREQ      1100            /* RFC max TS freq of 1Khz + 10% skew */
1676 #define TS_MICROSECS    1000000         /* microseconds per second */
1677 #ifndef timersub
1678 #define timersub(tvp, uvp, vvp)                                         \
1679         do {                                                            \
1680                 (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec;          \
1681                 (vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec;       \
1682                 if ((vvp)->tv_usec < 0) {                               \
1683                         (vvp)->tv_sec--;                                \
1684                         (vvp)->tv_usec += 1000000;                      \
1685                 }                                                       \
1686         } while (0)
1687 #endif
1688
1689                 timersub(&uptime, &src->scrub->pfss_last, &delta_ts);
1690                 tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
1691                 tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ);
1692
1693
1694                 if ((src->state >= TCPS_ESTABLISHED &&
1695                     dst->state >= TCPS_ESTABLISHED) &&
1696                     (SEQ_LT(tsval, dst->scrub->pfss_tsecr) ||
1697                     SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ||
1698                     (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) ||
1699                     SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
1700                         /* Bad RFC1323 implementation or an insertion attack.
1701                          *
1702                          * - Solaris 2.6 and 2.7 are known to send another ACK
1703                          *   after the FIN,FIN|ACK,ACK closing that carries
1704                          *   an old timestamp.
1705                          */
1706
1707                         DPFPRINTF(("Timestamp failed %c%c%c%c\n",
1708                             SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
1709                             SEQ_GT(tsval, src->scrub->pfss_tsval +
1710                             tsval_from_last) ? '1' : ' ',
1711                             SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
1712                             SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '));
1713                         DPFPRINTF((" tsval: %u  tsecr: %u  +ticks: %u  "
1714                             "idle: %lus %lums\n",
1715                             tsval, tsecr, tsval_from_last, delta_ts.tv_sec,
1716                             delta_ts.tv_usec / 1000));
1717                         DPFPRINTF((" src->tsval: %u  tsecr: %u\n",
1718                             src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
1719                         DPFPRINTF((" dst->tsval: %u  tsecr: %u  tsval0: %u"
1720                             "\n", dst->scrub->pfss_tsval,
1721                             dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0));
1722                         if (pf_status.debug >= PF_DEBUG_MISC) {
1723                                 pf_print_state(state);
1724                                 pf_print_flags(th->th_flags);
1725                                 kprintf("\n");
1726                         }
1727                         REASON_SET(reason, PFRES_TS);
1728                         return (PF_DROP);
1729                 }
1730
1731                 /* XXX I'd really like to require tsecr but it's optional */
1732
1733         } else if (!got_ts && (th->th_flags & TH_RST) == 0 &&
1734             ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
1735             || pd->p_len > 0 || (th->th_flags & TH_SYN)) &&
1736             src->scrub && dst->scrub &&
1737             (src->scrub->pfss_flags & PFSS_PAWS) &&
1738             (dst->scrub->pfss_flags & PFSS_PAWS)) {
1739                 /* Didn't send a timestamp.  Timestamps aren't really useful
1740                  * when:
1741                  *  - connection opening or closing (often not even sent).
1742                  *    but we must not let an attacker to put a FIN on a
1743                  *    data packet to sneak it through our ESTABLISHED check.
1744                  *  - on a TCP reset.  RFC suggests not even looking at TS.
1745                  *  - on an empty ACK.  The TS will not be echoed so it will
1746                  *    probably not help keep the RTT calculation in sync and
1747                  *    there isn't as much danger when the sequence numbers
1748                  *    got wrapped.  So some stacks don't include TS on empty
1749                  *    ACKs :-(
1750                  *
1751                  * To minimize the disruption to mostly RFC1323 conformant
1752                  * stacks, we will only require timestamps on data packets.
1753                  *
1754                  * And what do ya know, we cannot require timestamps on data
1755                  * packets.  There appear to be devices that do legitimate
1756                  * TCP connection hijacking.  There are HTTP devices that allow
1757                  * a 3whs (with timestamps) and then buffer the HTTP request.
1758                  * If the intermediate device has the HTTP response cache, it
1759                  * will spoof the response but not bother timestamping its
1760                  * packets.  So we can look for the presence of a timestamp in
1761                  * the first data packet and if there, require it in all future
1762                  * packets.
1763                  */
1764
1765                 if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
1766                         /*
1767                          * Hey!  Someone tried to sneak a packet in.  Or the
1768                          * stack changed its RFC1323 behavior?!?!
1769                          */
1770                         if (pf_status.debug >= PF_DEBUG_MISC) {
1771                                 DPFPRINTF(("Did not receive expected RFC1323 "
1772                                     "timestamp\n"));
1773                                 pf_print_state(state);
1774                                 pf_print_flags(th->th_flags);
1775                                 kprintf("\n");
1776                         }
1777                         REASON_SET(reason, PFRES_TS);
1778                         return (PF_DROP);
1779                 }
1780         }
1781
1782
1783         /*
1784          * We will note if a host sends his data packets with or without
1785          * timestamps.  And require all data packets to contain a timestamp
1786          * if the first does.  PAWS implicitly requires that all data packets be
1787          * timestamped.  But I think there are middle-man devices that hijack
1788          * TCP streams immediately after the 3whs and don't timestamp their
1789          * packets (seen in a WWW accelerator or cache).
1790          */
1791         if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags &
1792             (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
1793                 if (got_ts)
1794                         src->scrub->pfss_flags |= PFSS_DATA_TS;
1795                 else {
1796                         src->scrub->pfss_flags |= PFSS_DATA_NOTS;
1797                         if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
1798                             (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
1799                                 /* Don't warn if other host rejected RFC1323 */
1800                                 DPFPRINTF(("Broken RFC1323 stack did not "
1801                                     "timestamp data packet. Disabled PAWS "
1802                                     "security.\n"));
1803                                 pf_print_state(state);
1804                                 pf_print_flags(th->th_flags);
1805                                 kprintf("\n");
1806                         }
1807                 }
1808         }
1809
1810
1811         /*
1812          * Update PAWS values
1813          */
1814         if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
1815             (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) {
1816                 getmicrouptime(&src->scrub->pfss_last);
1817                 if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) ||
1818                     (src->scrub->pfss_flags & PFSS_PAWS) == 0)
1819                         src->scrub->pfss_tsval = tsval;
1820
1821                 if (tsecr) {
1822                         if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) ||
1823                             (src->scrub->pfss_flags & PFSS_PAWS) == 0)
1824                                 src->scrub->pfss_tsecr = tsecr;
1825
1826                         if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 &&
1827                             (SEQ_LT(tsval, src->scrub->pfss_tsval0) ||
1828                             src->scrub->pfss_tsval0 == 0)) {
1829                                 /* tsval0 MUST be the lowest timestamp */
1830                                 src->scrub->pfss_tsval0 = tsval;
1831                         }
1832
1833                         /* Only fully initialized after a TS gets echoed */
1834                         if ((src->scrub->pfss_flags & PFSS_PAWS) == 0)
1835                                 src->scrub->pfss_flags |= PFSS_PAWS;
1836                 }
1837         }
1838
1839         /* I have a dream....  TCP segment reassembly.... */
1840         return (0);
1841 }
1842
1843 int
1844 pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
1845     int off)
1846 {
1847         u_int16_t       *mss;
1848         int              thoff;
1849         int              opt, cnt, optlen = 0;
1850         int              rewrite = 0;
1851         u_char          *optp;
1852
1853         thoff = th->th_off << 2;
1854         cnt = thoff - sizeof(struct tcphdr);
1855         optp = mtod(m, caddr_t) + off + sizeof(struct tcphdr);
1856
1857         for (; cnt > 0; cnt -= optlen, optp += optlen) {
1858                 opt = optp[0];
1859                 if (opt == TCPOPT_EOL)
1860                         break;
1861                 if (opt == TCPOPT_NOP)
1862                         optlen = 1;
1863                 else {
1864                         if (cnt < 2)
1865                                 break;
1866                         optlen = optp[1];
1867                         if (optlen < 2 || optlen > cnt)
1868                                 break;
1869                 }
1870                 switch (opt) {
1871                 case TCPOPT_MAXSEG:
1872                         mss = (u_int16_t *)(optp + 2);
1873                         if ((ntohs(*mss)) > r->max_mss) {
1874                                 th->th_sum = pf_cksum_fixup(th->th_sum,
1875                                     *mss, htons(r->max_mss), 0);
1876                                 *mss = htons(r->max_mss);
1877                                 rewrite = 1;
1878                         }
1879                         break;
1880                 default:
1881                         break;
1882                 }
1883         }
1884
1885         return (rewrite);
1886 }