2 * Copyright (C) 1993-2001 by Darren Reed.
4 * See the IPFILTER.LICENCE file for details on licencing.
6 * @(#)ip_frag.c 1.11 3/24/96 (C) 1993-2000 Darren Reed
7 * @(#)$Id: ip_frag.c,v 2.10.2.24 2002/08/28 12:41:04 darrenr Exp $
8 * $FreeBSD: src/sys/contrib/ipfilter/netinet/ip_frag.c,v 1.15.2.7 2004/07/04 09:24:38 darrenr Exp $
9 * $DragonFly: src/sys/contrib/ipfilter/netinet/ip_frag.c,v 1.9 2005/06/05 12:17:46 corecode Exp $
11 #if defined(KERNEL) && !defined(_KERNEL)
15 #if defined(__sgi) && (IRIX > 602)
16 # include <sys/ptimers.h>
18 #include <sys/errno.h>
19 #include <sys/types.h>
20 #include <sys/param.h>
23 #if !defined(_KERNEL) && !defined(KERNEL)
28 #if (defined(KERNEL) || defined(_KERNEL)) && (defined(__DragonFly__) || __FreeBSD_version >= 220000)
29 # include <sys/filio.h>
30 # include <sys/fcntl.h>
32 # include <sys/ioctl.h>
35 # include <sys/protosw.h>
37 #include <sys/socket.h>
38 #if defined(_KERNEL) && !defined(linux)
39 # include <sys/systm.h>
41 #if !defined(__SVR4) && !defined(__svr4__)
42 # if defined(_KERNEL) && !defined(__sgi)
43 # include <sys/kernel.h>
46 # include <sys/mbuf.h>
49 # include <sys/byteorder.h>
51 # include <sys/dditypes.h>
53 # include <sys/stream.h>
54 # include <sys/kmem.h>
56 #if defined(__DragonFly__) && defined(_KERNEL)
57 # include <sys/thread2.h>
63 #include <net/route.h>
64 #include <netinet/in.h>
65 #include <netinet/in_systm.h>
66 #include <netinet/ip.h>
68 # include <netinet/ip_var.h>
70 #include <netinet/tcp.h>
71 #include <netinet/udp.h>
72 #include <netinet/ip_icmp.h>
73 #include "ip_compat.h"
74 #include <netinet/tcpip.h>
80 #if defined(__DragonFly__) || __FreeBSD_version >= 300000
81 # include <sys/malloc.h>
82 # if (defined(KERNEL) || defined(_KERNEL))
84 # include <sys/libkern.h>
85 # include <sys/systm.h>
87 extern struct callout ipfr_slowtimer_ch;
90 #if defined(__NetBSD__) && (__NetBSD_Version__ >= 104230000)
91 # include <sys/callout.h>
92 extern struct callout ipfr_slowtimer_ch;
94 #if defined(__OpenBSD__)
95 # include <sys/timeout.h>
96 extern struct timeout ipfr_slowtimer_ch;
99 static const char sccsid[] = "@(#)ip_frag.c 1.11 3/24/96 (C) 1993-2000 Darren Reed";
101 static ipfr_t *ipfr_heads[IPFT_SIZE];
102 static ipfr_t *ipfr_nattab[IPFT_SIZE];
103 static ipfrstat_t ipfr_stats;
104 static int ipfr_inuse = 0;
106 int fr_ipfrttl = 120; /* 60 seconds */
107 int fr_frag_lock = 0;
111 extern timeout_id_t ipfr_timer_id;
113 extern int ipfr_timer_id;
116 #if (SOLARIS || defined(__sgi)) && defined(_KERNEL)
117 extern KRWLOCK_T ipf_frag, ipf_natfrag, ipf_nat, ipf_mutex;
119 extern KRWLOCK_T ipf_solaris;
121 KRWLOCK_T ipf_solaris;
123 extern kmutex_t ipf_rw;
127 static ipfr_t *ipfr_new (ip_t *, fr_info_t *, ipfr_t **);
128 static ipfr_t *ipfr_lookup (ip_t *, fr_info_t *, ipfr_t **);
129 static void ipfr_delete (ipfr_t *);
132 ipfrstat_t *ipfr_fragstats()
134 ipfr_stats.ifs_table = ipfr_heads;
135 ipfr_stats.ifs_nattab = ipfr_nattab;
136 ipfr_stats.ifs_inuse = ipfr_inuse;
142 * add a new entry to the fragment cache, registering it as having come
143 * through this box, with the result of the filter operation.
145 static ipfr_t *ipfr_new(ip, fin, table)
150 ipfr_t **fp, *fra, frag;
153 if (ipfr_inuse >= IPFT_SIZE)
156 if (!(fin->fin_fi.fi_fl & FI_FRAG))
159 frag.ipfr_p = ip->ip_p;
161 frag.ipfr_id = ip->ip_id;
163 frag.ipfr_tos = ip->ip_tos;
164 frag.ipfr_src.s_addr = ip->ip_src.s_addr;
165 idx += ip->ip_src.s_addr;
166 frag.ipfr_dst.s_addr = ip->ip_dst.s_addr;
167 idx += ip->ip_dst.s_addr;
168 frag.ipfr_ifp = fin->fin_ifp;
172 frag.ipfr_optmsk = fin->fin_fi.fi_optmsk & IPF_OPTCOPY;
173 frag.ipfr_secmsk = fin->fin_fi.fi_secmsk;
174 frag.ipfr_auth = fin->fin_fi.fi_auth;
177 * first, make sure it isn't already there...
179 for (fp = &table[idx]; (fra = *fp); fp = &fra->ipfr_next)
180 if (!bcmp((char *)&frag.ipfr_src, (char *)&fra->ipfr_src,
182 ATOMIC_INCL(ipfr_stats.ifs_exists);
187 * allocate some memory, if possible, if not, just record that we
190 KMALLOC(fra, ipfr_t *);
192 ATOMIC_INCL(ipfr_stats.ifs_nomem);
196 if ((fra->ipfr_rule = fin->fin_fr) != NULL) {
197 ATOMIC_INC32(fin->fin_fr->fr_ref);
202 * Insert the fragment into the fragment table, copy the struct used
203 * in the search using bcopy rather than reassign each field.
204 * Set the ttl to the default.
206 if ((fra->ipfr_next = table[idx]))
207 table[idx]->ipfr_prev = fra;
208 fra->ipfr_prev = NULL;
209 fra->ipfr_data = NULL;
211 bcopy((char *)&frag.ipfr_src, (char *)&fra->ipfr_src, IPFR_CMPSZ);
212 fra->ipfr_ttl = fr_ipfrttl;
214 * Compute the offset of the expected start of the next packet.
216 off = ip->ip_off & IP_OFFMASK;
219 fra->ipfr_off = off + (fin->fin_dlen >> 3);
220 ATOMIC_INCL(ipfr_stats.ifs_new);
221 ATOMIC_INC32(ipfr_inuse);
226 int ipfr_newfrag(ip, fin)
232 if ((ip->ip_v != 4) || (fr_frag_lock))
234 WRITE_ENTER(&ipf_frag);
235 ipf = ipfr_new(ip, fin, ipfr_heads);
236 RWLOCK_EXIT(&ipf_frag);
238 ATOMIC_INCL(frstats[fin->fin_out].fr_bnfr);
241 ATOMIC_INCL(frstats[fin->fin_out].fr_nfr);
246 int ipfr_nat_newfrag(ip, fin, nat)
254 if ((ip->ip_v != 4) || (fr_frag_lock))
259 if ((off + fin->fin_dlen) > 0xffff || (fin->fin_dlen == 0))
262 WRITE_ENTER(&ipf_natfrag);
263 ipf = ipfr_new(ip, fin, ipfr_nattab);
265 ipf->ipfr_data = nat;
268 RWLOCK_EXIT(&ipf_natfrag);
274 * check the fragment cache to see if there is already a record of this packet
275 * with its filter result known.
277 static ipfr_t *ipfr_lookup(ip, fin, table)
286 * For fragments, we record protocol, packet id, TOS and both IP#'s
287 * (these should all be the same for all fragments of a packet).
289 * build up a hash value to index the table with.
291 frag.ipfr_p = ip->ip_p;
293 frag.ipfr_id = ip->ip_id;
295 frag.ipfr_tos = ip->ip_tos;
296 frag.ipfr_src.s_addr = ip->ip_src.s_addr;
297 idx += ip->ip_src.s_addr;
298 frag.ipfr_dst.s_addr = ip->ip_dst.s_addr;
299 idx += ip->ip_dst.s_addr;
300 frag.ipfr_ifp = fin->fin_ifp;
304 frag.ipfr_optmsk = fin->fin_fi.fi_optmsk & IPF_OPTCOPY;
305 frag.ipfr_secmsk = fin->fin_fi.fi_secmsk;
306 frag.ipfr_auth = fin->fin_fi.fi_auth;
309 * check the table, careful to only compare the right amount of data
311 for (f = table[idx]; f; f = f->ipfr_next)
312 if (!bcmp((char *)&frag.ipfr_src, (char *)&f->ipfr_src,
319 * XXX - We really need to be guarding against the
320 * retransmission of (src,dst,id,offset-range) here
321 * because a fragmented packet is never resent with
325 if (!off || (fin->fin_fl & FI_SHORT))
330 if (f != table[idx]) {
332 * move fragment info. to the top of the list
333 * to speed up searches.
335 if ((f->ipfr_prev->ipfr_next = f->ipfr_next))
336 f->ipfr_next->ipfr_prev = f->ipfr_prev;
337 f->ipfr_next = table[idx];
338 table[idx]->ipfr_prev = f;
342 atoff = off + (fin->fin_dlen >> 3);
344 * If we've follwed the fragments, and this is the
345 * last (in order), shrink expiration time.
347 if (off == f->ipfr_off) {
348 if (!(ip->ip_off & IP_MF))
353 ATOMIC_INCL(ipfr_stats.ifs_hits);
361 * functional interface for NAT lookups of the NAT fragment cache
363 nat_t *ipfr_nat_knownfrag(ip, fin)
371 if ((fin->fin_v != 4) || (fr_frag_lock))
376 if ((off + fin->fin_dlen) > 0xffff || (fin->fin_dlen == 0))
379 READ_ENTER(&ipf_natfrag);
380 ipf = ipfr_lookup(ip, fin, ipfr_nattab);
382 nat = ipf->ipfr_data;
384 * This is the last fragment for this packet.
386 if ((ipf->ipfr_ttl == 1) && (nat != NULL)) {
387 nat->nat_data = NULL;
388 ipf->ipfr_data = NULL;
392 RWLOCK_EXIT(&ipf_natfrag);
398 * functional interface for normal lookups of the fragment cache
400 frentry_t *ipfr_knownfrag(ip, fin)
408 if ((fin->fin_v != 4) || (fr_frag_lock))
413 if ((off + fin->fin_dlen) > 0xffff || (fin->fin_dlen == 0))
416 READ_ENTER(&ipf_frag);
417 fra = ipfr_lookup(ip, fin, ipfr_heads);
422 RWLOCK_EXIT(&ipf_frag);
428 * forget any references to this external object.
430 void ipfr_forget(ptr)
436 WRITE_ENTER(&ipf_frag);
437 for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
438 for (fr = ipfr_heads[idx]; fr; fr = fr->ipfr_next)
439 if (fr->ipfr_data == ptr)
440 fr->ipfr_data = NULL;
442 RWLOCK_EXIT(&ipf_frag);
447 * forget any references to this external object.
449 void ipfr_forgetnat(nat)
455 WRITE_ENTER(&ipf_natfrag);
456 for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
457 for (fr = ipfr_nattab[idx]; fr; fr = fr->ipfr_next)
458 if (fr->ipfr_data == nat)
459 fr->ipfr_data = NULL;
461 RWLOCK_EXIT(&ipf_natfrag);
465 static void ipfr_delete(fra)
472 ATOMIC_DEC32(fr->fr_ref);
477 fra->ipfr_prev->ipfr_next = fra->ipfr_next;
479 fra->ipfr_next->ipfr_prev = fra->ipfr_prev;
485 * Free memory in use by fragment state info. kept.
493 WRITE_ENTER(&ipf_frag);
494 for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
495 for (fp = &ipfr_heads[idx]; (fra = *fp); ) {
496 *fp = fra->ipfr_next;
499 RWLOCK_EXIT(&ipf_frag);
501 WRITE_ENTER(&ipf_nat);
502 WRITE_ENTER(&ipf_natfrag);
503 for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
504 for (fp = &ipfr_nattab[idx]; (fra = *fp); ) {
505 *fp = fra->ipfr_next;
506 nat = fra->ipfr_data;
508 if (nat->nat_data == fra)
509 nat->nat_data = NULL;
513 RWLOCK_EXIT(&ipf_natfrag);
514 RWLOCK_EXIT(&ipf_nat);
518 void ipfr_fragexpire()
524 # if !SOLARIS && !defined(__DragonFly__)
533 WRITE_ENTER(&ipf_frag);
536 * Go through the entire table, looking for entries to expire,
537 * decreasing the ttl by one for each entry. If it reaches 0,
538 * remove it from the chain and free it.
540 for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
541 for (fp = &ipfr_heads[idx]; (fra = *fp); ) {
543 if (fra->ipfr_ttl == 0) {
544 *fp = fra->ipfr_next;
546 ATOMIC_INCL(ipfr_stats.ifs_expire);
547 ATOMIC_DEC32(ipfr_inuse);
549 fp = &fra->ipfr_next;
551 RWLOCK_EXIT(&ipf_frag);
554 * Same again for the NAT table, except that if the structure also
555 * still points to a NAT structure, and the NAT structure points back
556 * at the one to be free'd, NULL the reference from the NAT struct.
557 * NOTE: We need to grab both mutex's early, and in this order so as
558 * to prevent a deadlock if both try to expire at the same time.
560 WRITE_ENTER(&ipf_nat);
561 WRITE_ENTER(&ipf_natfrag);
562 for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
563 for (fp = &ipfr_nattab[idx]; (fra = *fp); ) {
565 if (fra->ipfr_ttl == 0) {
566 ATOMIC_INCL(ipfr_stats.ifs_expire);
567 ATOMIC_DEC32(ipfr_inuse);
568 nat = fra->ipfr_data;
570 if (nat->nat_data == fra)
571 nat->nat_data = NULL;
573 *fp = fra->ipfr_next;
576 fp = &fra->ipfr_next;
578 RWLOCK_EXIT(&ipf_natfrag);
579 RWLOCK_EXIT(&ipf_nat);
585 * Slowly expire held state for fragments. Timeouts are set * in expectation
586 * of this being called twice per second.
589 # if (BSD >= 199306) || SOLARIS || defined(__sgi)
590 # if defined(SOLARIS2) && (SOLARIS2 < 7)
591 void ipfr_slowtimer()
593 void ipfr_slowtimer (void *ptr)
599 void ipfr_slowtimer()
602 #if defined(_KERNEL) && SOLARIS
603 extern int fr_running;
607 READ_ENTER(&ipf_solaris);
610 #if defined(__sgi) && defined(_KERNEL)
611 ipfilter_sgi_intfsync();
620 ipfr_timer_id = timeout(ipfr_slowtimer, NULL, drv_usectohz(500000));
621 RWLOCK_EXIT(&ipf_solaris);
623 # if defined(__NetBSD__) && (__NetBSD_Version__ >= 104240000)
624 callout_reset(&ipfr_slowtimer_ch, hz / 2, ipfr_slowtimer, NULL);
626 # if defined(__DragonFly__) || __FreeBSD_version >= 300000
627 callout_reset(&ipfr_slowtimer_ch, hz / 2, ipfr_slowtimer, NULL);
629 # if defined(__OpenBSD__)
630 timeout_add(&ipfr_slowtimer_ch, hz/2);
632 timeout(ipfr_slowtimer, NULL, hz/2);
635 # if (BSD < 199306) && !defined(__sgi)
637 # endif /* FreeBSD */
639 # endif /* SOLARIS */
640 #endif /* defined(_KERNEL) */