2 * Copyright (C) 1993-2001 by Darren Reed.
4 * See the IPFILTER.LICENCE file for details on licencing.
6 #if defined(KERNEL) && !defined(_KERNEL)
10 #if defined(__sgi) && (IRIX > 602)
11 # include <sys/ptimers.h>
13 #include <sys/errno.h>
14 #include <sys/types.h>
15 #include <sys/param.h>
18 #if !defined(_KERNEL) && !defined(KERNEL)
23 #if (defined(KERNEL) || defined(_KERNEL)) && (__FreeBSD_version >= 220000)
24 # include <sys/filio.h>
25 # include <sys/fcntl.h>
27 # include <sys/ioctl.h>
30 # include <sys/protosw.h>
32 #include <sys/socket.h>
33 #if defined(_KERNEL) && !defined(linux)
34 # include <sys/systm.h>
36 #if !defined(__SVR4) && !defined(__svr4__)
37 # if defined(_KERNEL) && !defined(__sgi)
38 # include <sys/kernel.h>
41 # include <sys/mbuf.h>
44 # include <sys/byteorder.h>
46 # include <sys/dditypes.h>
48 # include <sys/stream.h>
49 # include <sys/kmem.h>
55 #include <net/route.h>
56 #include <netinet/in.h>
57 #include <netinet/in_systm.h>
58 #include <netinet/ip.h>
60 # include <netinet/ip_var.h>
62 #include <netinet/tcp.h>
63 #include <netinet/udp.h>
64 #include <netinet/ip_icmp.h>
65 #include "netinet/ip_compat.h"
66 #include <netinet/tcpip.h>
67 #include "netinet/ip_fil.h"
68 #include "netinet/ip_nat.h"
69 #include "netinet/ip_frag.h"
70 #include "netinet/ip_state.h"
71 #include "netinet/ip_auth.h"
72 #if (__FreeBSD_version >= 300000)
73 # include <sys/malloc.h>
74 # if (defined(KERNEL) || defined(_KERNEL))
76 # include <sys/libkern.h>
77 # include <sys/systm.h>
79 extern struct callout_handle ipfr_slowtimer_ch;
82 #if defined(__NetBSD__) && (__NetBSD_Version__ >= 104230000)
83 # include <sys/callout.h>
84 extern struct callout ipfr_slowtimer_ch;
86 #if defined(__OpenBSD__)
87 # include <sys/timeout.h>
88 extern struct timeout ipfr_slowtimer_ch;
92 static const char sccsid[] = "@(#)ip_frag.c 1.11 3/24/96 (C) 1993-2000 Darren Reed";
93 static const char rcsid[] = "@(#)$Id: ip_frag.c,v 2.10.2.25 2002/12/06 11:40:21 darrenr Exp $";
97 static ipfr_t *ipfr_heads[IPFT_SIZE];
98 static ipfr_t *ipfr_nattab[IPFT_SIZE];
99 static ipfrstat_t ipfr_stats;
100 static int ipfr_inuse = 0;
102 int fr_ipfrttl = 120; /* 60 seconds */
103 int fr_frag_lock = 0;
107 extern timeout_id_t ipfr_timer_id;
109 extern int ipfr_timer_id;
112 #if (SOLARIS || defined(__sgi)) && defined(_KERNEL)
113 extern KRWLOCK_T ipf_frag, ipf_natfrag, ipf_nat, ipf_mutex;
115 extern KRWLOCK_T ipf_solaris;
117 KRWLOCK_T ipf_solaris;
119 extern kmutex_t ipf_rw;
123 static ipfr_t *ipfr_new __P((ip_t *, fr_info_t *, ipfr_t **));
124 static ipfr_t *ipfr_lookup __P((ip_t *, fr_info_t *, ipfr_t **));
125 static void ipfr_delete __P((ipfr_t *));
128 ipfrstat_t *ipfr_fragstats()
130 ipfr_stats.ifs_table = ipfr_heads;
131 ipfr_stats.ifs_nattab = ipfr_nattab;
132 ipfr_stats.ifs_inuse = ipfr_inuse;
138 * add a new entry to the fragment cache, registering it as having come
139 * through this box, with the result of the filter operation.
141 static ipfr_t *ipfr_new(ip, fin, table)
146 ipfr_t **fp, *fra, frag;
149 if (ipfr_inuse >= IPFT_SIZE)
152 if (!(fin->fin_fl & FI_FRAG))
155 frag.ipfr_p = ip->ip_p;
157 frag.ipfr_id = ip->ip_id;
159 frag.ipfr_tos = ip->ip_tos;
160 frag.ipfr_src.s_addr = ip->ip_src.s_addr;
161 idx += ip->ip_src.s_addr;
162 frag.ipfr_dst.s_addr = ip->ip_dst.s_addr;
163 idx += ip->ip_dst.s_addr;
164 frag.ipfr_ifp = fin->fin_ifp;
168 frag.ipfr_optmsk = fin->fin_fi.fi_optmsk & IPF_OPTCOPY;
169 frag.ipfr_secmsk = fin->fin_fi.fi_secmsk;
170 frag.ipfr_auth = fin->fin_fi.fi_auth;
173 * first, make sure it isn't already there...
175 for (fp = &table[idx]; (fra = *fp); fp = &fra->ipfr_next)
176 if (!bcmp((char *)&frag.ipfr_src, (char *)&fra->ipfr_src,
178 ATOMIC_INCL(ipfr_stats.ifs_exists);
183 * allocate some memory, if possible, if not, just record that we
186 KMALLOC(fra, ipfr_t *);
188 ATOMIC_INCL(ipfr_stats.ifs_nomem);
192 if ((fra->ipfr_rule = fin->fin_fr) != NULL) {
193 ATOMIC_INC32(fin->fin_fr->fr_ref);
198 * Instert the fragment into the fragment table, copy the struct used
199 * in the search using bcopy rather than reassign each field.
200 * Set the ttl to the default.
202 if ((fra->ipfr_next = table[idx]))
203 table[idx]->ipfr_prev = fra;
204 fra->ipfr_prev = NULL;
205 fra->ipfr_data = NULL;
207 bcopy((char *)&frag.ipfr_src, (char *)&fra->ipfr_src, IPFR_CMPSZ);
208 fra->ipfr_ttl = fr_ipfrttl;
210 * Compute the offset of the expected start of the next packet.
212 off = ip->ip_off & IP_OFFMASK;
215 fra->ipfr_off = off + (fin->fin_dlen >> 3);
216 ATOMIC_INCL(ipfr_stats.ifs_new);
217 ATOMIC_INC32(ipfr_inuse);
222 int ipfr_newfrag(ip, fin)
228 if ((ip->ip_v != 4) || (fr_frag_lock))
230 WRITE_ENTER(&ipf_frag);
231 ipf = ipfr_new(ip, fin, ipfr_heads);
232 RWLOCK_EXIT(&ipf_frag);
234 ATOMIC_INCL(frstats[fin->fin_out].fr_bnfr);
237 ATOMIC_INCL(frstats[fin->fin_out].fr_nfr);
242 int ipfr_nat_newfrag(ip, fin, nat)
250 if ((ip->ip_v != 4) || (fr_frag_lock))
255 if ((off + fin->fin_dlen) > 0xffff || (fin->fin_dlen == 0))
258 WRITE_ENTER(&ipf_natfrag);
259 ipf = ipfr_new(ip, fin, ipfr_nattab);
261 ipf->ipfr_data = nat;
264 RWLOCK_EXIT(&ipf_natfrag);
270 * check the fragment cache to see if there is already a record of this packet
271 * with its filter result known.
273 static ipfr_t *ipfr_lookup(ip, fin, table)
282 * For fragments, we record protocol, packet id, TOS and both IP#'s
283 * (these should all be the same for all fragments of a packet).
285 * build up a hash value to index the table with.
287 frag.ipfr_p = ip->ip_p;
289 frag.ipfr_id = ip->ip_id;
291 frag.ipfr_tos = ip->ip_tos;
292 frag.ipfr_src.s_addr = ip->ip_src.s_addr;
293 idx += ip->ip_src.s_addr;
294 frag.ipfr_dst.s_addr = ip->ip_dst.s_addr;
295 idx += ip->ip_dst.s_addr;
296 frag.ipfr_ifp = fin->fin_ifp;
300 frag.ipfr_optmsk = fin->fin_fi.fi_optmsk & IPF_OPTCOPY;
301 frag.ipfr_secmsk = fin->fin_fi.fi_secmsk;
302 frag.ipfr_auth = fin->fin_fi.fi_auth;
305 * check the table, careful to only compare the right amount of data
307 for (f = table[idx]; f; f = f->ipfr_next)
308 if (!bcmp((char *)&frag.ipfr_src, (char *)&f->ipfr_src,
315 * XXX - We really need to be guarding against the
316 * retransmission of (src,dst,id,offset-range) here
317 * because a fragmented packet is never resent with
321 if (!off || (fin->fin_fl & FI_SHORT))
326 if (f != table[idx]) {
328 * move fragment info. to the top of the list
329 * to speed up searches.
331 if ((f->ipfr_prev->ipfr_next = f->ipfr_next))
332 f->ipfr_next->ipfr_prev = f->ipfr_prev;
333 f->ipfr_next = table[idx];
334 table[idx]->ipfr_prev = f;
338 atoff = off + (fin->fin_dlen >> 3);
340 * If we've follwed the fragments, and this is the
341 * last (in order), shrink expiration time.
343 if (off == f->ipfr_off) {
344 if (!(ip->ip_off & IP_MF))
349 ATOMIC_INCL(ipfr_stats.ifs_hits);
357 * functional interface for NAT lookups of the NAT fragment cache
359 nat_t *ipfr_nat_knownfrag(ip, fin)
367 if ((fin->fin_v != 4) || (fr_frag_lock))
372 if ((off + fin->fin_dlen) > 0xffff || (fin->fin_dlen == 0))
375 READ_ENTER(&ipf_natfrag);
376 ipf = ipfr_lookup(ip, fin, ipfr_nattab);
378 nat = ipf->ipfr_data;
380 * This is the last fragment for this packet.
382 if ((ipf->ipfr_ttl == 1) && (nat != NULL)) {
383 nat->nat_data = NULL;
384 ipf->ipfr_data = NULL;
388 RWLOCK_EXIT(&ipf_natfrag);
394 * functional interface for normal lookups of the fragment cache
396 frentry_t *ipfr_knownfrag(ip, fin)
404 if ((fin->fin_v != 4) || (fr_frag_lock))
409 if ((off + fin->fin_dlen) > 0xffff || (fin->fin_dlen == 0))
412 READ_ENTER(&ipf_frag);
413 fra = ipfr_lookup(ip, fin, ipfr_heads);
418 RWLOCK_EXIT(&ipf_frag);
424 * forget any references to this external object.
426 void ipfr_forget(nat)
432 WRITE_ENTER(&ipf_natfrag);
433 for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
434 for (fr = ipfr_heads[idx]; fr; fr = fr->ipfr_next)
435 if (fr->ipfr_data == nat)
436 fr->ipfr_data = NULL;
438 RWLOCK_EXIT(&ipf_natfrag);
442 static void ipfr_delete(fra)
449 ATOMIC_DEC32(fr->fr_ref);
454 fra->ipfr_prev->ipfr_next = fra->ipfr_next;
456 fra->ipfr_next->ipfr_prev = fra->ipfr_prev;
462 * Free memory in use by fragment state info. kept.
470 WRITE_ENTER(&ipf_frag);
471 for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
472 for (fp = &ipfr_heads[idx]; (fra = *fp); ) {
473 *fp = fra->ipfr_next;
476 RWLOCK_EXIT(&ipf_frag);
478 WRITE_ENTER(&ipf_nat);
479 WRITE_ENTER(&ipf_natfrag);
480 for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
481 for (fp = &ipfr_nattab[idx]; (fra = *fp); ) {
482 *fp = fra->ipfr_next;
483 nat = fra->ipfr_data;
485 if (nat->nat_data == fra)
486 nat->nat_data = NULL;
490 RWLOCK_EXIT(&ipf_natfrag);
491 RWLOCK_EXIT(&ipf_nat);
495 void ipfr_fragexpire()
510 WRITE_ENTER(&ipf_frag);
513 * Go through the entire table, looking for entries to expire,
514 * decreasing the ttl by one for each entry. If it reaches 0,
515 * remove it from the chain and free it.
517 for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
518 for (fp = &ipfr_heads[idx]; (fra = *fp); ) {
520 if (fra->ipfr_ttl == 0) {
521 *fp = fra->ipfr_next;
523 ATOMIC_INCL(ipfr_stats.ifs_expire);
524 ATOMIC_DEC32(ipfr_inuse);
526 fp = &fra->ipfr_next;
528 RWLOCK_EXIT(&ipf_frag);
531 * Same again for the NAT table, except that if the structure also
532 * still points to a NAT structure, and the NAT structure points back
533 * at the one to be free'd, NULL the reference from the NAT struct.
534 * NOTE: We need to grab both mutex's early, and in this order so as
535 * to prevent a deadlock if both try to expire at the same time.
537 WRITE_ENTER(&ipf_nat);
538 WRITE_ENTER(&ipf_natfrag);
539 for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
540 for (fp = &ipfr_nattab[idx]; (fra = *fp); ) {
542 if (fra->ipfr_ttl == 0) {
543 ATOMIC_INCL(ipfr_stats.ifs_expire);
544 ATOMIC_DEC32(ipfr_inuse);
545 nat = fra->ipfr_data;
547 if (nat->nat_data == fra)
548 nat->nat_data = NULL;
550 *fp = fra->ipfr_next;
553 fp = &fra->ipfr_next;
555 RWLOCK_EXIT(&ipf_natfrag);
556 RWLOCK_EXIT(&ipf_nat);
562 * Slowly expire held state for fragments. Timeouts are set * in expectation
563 * of this being called twice per second.
566 # if (BSD >= 199306) || SOLARIS || defined(__sgi)
567 # if defined(SOLARIS2) && (SOLARIS2 < 7)
568 void ipfr_slowtimer()
570 void ipfr_slowtimer __P((void *ptr))
576 void ipfr_slowtimer()
579 #if defined(_KERNEL) && SOLARIS
580 extern int fr_running;
584 READ_ENTER(&ipf_solaris);
587 #if defined(__sgi) && defined(_KERNEL)
588 ipfilter_sgi_intfsync();
597 ipfr_timer_id = timeout(ipfr_slowtimer, NULL, drv_usectohz(500000));
598 RWLOCK_EXIT(&ipf_solaris);
600 # if defined(__NetBSD__) && (__NetBSD_Version__ >= 104240000)
601 callout_reset(&ipfr_slowtimer_ch, hz / 2, ipfr_slowtimer, NULL);
603 # if (__FreeBSD_version >= 300000)
604 ipfr_slowtimer_ch = timeout(ipfr_slowtimer, NULL, hz/2);
606 # if defined(__OpenBSD__)
607 timeout_add(&ipfr_slowtimer_ch, hz/2);
609 timeout(ipfr_slowtimer, NULL, hz/2);
612 # if (BSD < 199306) && !defined(__sgi)
614 # endif /* FreeBSD */
616 # endif /* SOLARIS */
617 #endif /* defined(_KERNEL) */