Add the DragonFly cvs id and perform general cleanups on cvs/rcs/sccs ids. Most
[dragonfly.git] / contrib / ipfilter / ip_frag.c
1 /*
2  * Copyright (C) 1993-2001 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  */
6 #if defined(KERNEL) && !defined(_KERNEL)
7 # define      _KERNEL
8 #endif
9
10 #if defined(__sgi) && (IRIX > 602)
11 # include <sys/ptimers.h>
12 #endif
13 #include <sys/errno.h>
14 #include <sys/types.h>
15 #include <sys/param.h>
16 #include <sys/time.h>
17 #include <sys/file.h>
18 #if !defined(_KERNEL) && !defined(KERNEL)
19 # include <stdio.h>
20 # include <string.h>
21 # include <stdlib.h>
22 #endif
23 #if (defined(KERNEL) || defined(_KERNEL)) && (__FreeBSD_version >= 220000)
24 # include <sys/filio.h>
25 # include <sys/fcntl.h>
26 #else
27 # include <sys/ioctl.h>
28 #endif
29 #ifndef linux
30 # include <sys/protosw.h>
31 #endif
32 #include <sys/socket.h>
33 #if defined(_KERNEL) && !defined(linux)
34 # include <sys/systm.h>
35 #endif
36 #if !defined(__SVR4) && !defined(__svr4__)
37 # if defined(_KERNEL) && !defined(__sgi)
38 #  include <sys/kernel.h>
39 # endif
40 # ifndef linux
41 #  include <sys/mbuf.h>
42 # endif
43 #else
44 # include <sys/byteorder.h>
45 # ifdef _KERNEL
46 #  include <sys/dditypes.h>
47 # endif
48 # include <sys/stream.h>
49 # include <sys/kmem.h>
50 #endif
51 #include <net/if.h>
52 #ifdef sun
53 # include <net/af.h>
54 #endif
55 #include <net/route.h>
56 #include <netinet/in.h>
57 #include <netinet/in_systm.h>
58 #include <netinet/ip.h>
59 #ifndef linux
60 # include <netinet/ip_var.h>
61 #endif
62 #include <netinet/tcp.h>
63 #include <netinet/udp.h>
64 #include <netinet/ip_icmp.h>
65 #include "netinet/ip_compat.h"
66 #include <netinet/tcpip.h>
67 #include "netinet/ip_fil.h"
68 #include "netinet/ip_nat.h"
69 #include "netinet/ip_frag.h"
70 #include "netinet/ip_state.h"
71 #include "netinet/ip_auth.h"
72 #if (__FreeBSD_version >= 300000)
73 # include <sys/malloc.h>
74 # if (defined(KERNEL) || defined(_KERNEL))
75 #  ifndef IPFILTER_LKM
76 #   include <sys/libkern.h>
77 #   include <sys/systm.h>
78 #  endif
79 extern struct callout_handle ipfr_slowtimer_ch;
80 # endif
81 #endif
82 #if defined(__NetBSD__) && (__NetBSD_Version__ >= 104230000)
83 # include <sys/callout.h>
84 extern struct callout ipfr_slowtimer_ch;
85 #endif
86 #if defined(__OpenBSD__)
87 # include <sys/timeout.h>
88 extern struct timeout ipfr_slowtimer_ch;
89 #endif
90
91 #if !defined(lint)
92 static const char sccsid[] = "@(#)ip_frag.c     1.11 3/24/96 (C) 1993-2000 Darren Reed";
93 static const char rcsid[] = "@(#)$Id: ip_frag.c,v 2.10.2.25 2002/12/06 11:40:21 darrenr Exp $";
94 #endif
95
96
97 static ipfr_t   *ipfr_heads[IPFT_SIZE];
98 static ipfr_t   *ipfr_nattab[IPFT_SIZE];
99 static ipfrstat_t ipfr_stats;
100 static int      ipfr_inuse = 0;
101
102 int     fr_ipfrttl = 120;       /* 60 seconds */
103 int     fr_frag_lock = 0;
104
105 #ifdef _KERNEL
106 # if SOLARIS2 >= 7
107 extern  timeout_id_t    ipfr_timer_id;
108 # else
109 extern  int     ipfr_timer_id;
110 # endif
111 #endif
112 #if     (SOLARIS || defined(__sgi)) && defined(_KERNEL)
113 extern  KRWLOCK_T       ipf_frag, ipf_natfrag, ipf_nat, ipf_mutex;
114 # if    SOLARIS
115 extern  KRWLOCK_T       ipf_solaris;
116 # else
117 KRWLOCK_T       ipf_solaris;
118 # endif
119 extern  kmutex_t        ipf_rw;
120 #endif
121
122
123 static ipfr_t *ipfr_new __P((ip_t *, fr_info_t *, ipfr_t **));
124 static ipfr_t *ipfr_lookup __P((ip_t *, fr_info_t *, ipfr_t **));
125 static void ipfr_delete __P((ipfr_t *));
126
127
128 ipfrstat_t *ipfr_fragstats()
129 {
130         ipfr_stats.ifs_table = ipfr_heads;
131         ipfr_stats.ifs_nattab = ipfr_nattab;
132         ipfr_stats.ifs_inuse = ipfr_inuse;
133         return &ipfr_stats;
134 }
135
136
137 /*
138  * add a new entry to the fragment cache, registering it as having come
139  * through this box, with the result of the filter operation.
140  */
141 static ipfr_t *ipfr_new(ip, fin, table)
142 ip_t *ip;
143 fr_info_t *fin;
144 ipfr_t *table[];
145 {
146         ipfr_t **fp, *fra, frag;
147         u_int idx, off;
148
149         if (ipfr_inuse >= IPFT_SIZE)
150                 return NULL;
151
152         if (!(fin->fin_fl & FI_FRAG))
153                 return NULL;
154
155         frag.ipfr_p = ip->ip_p;
156         idx = ip->ip_p;
157         frag.ipfr_id = ip->ip_id;
158         idx += ip->ip_id;
159         frag.ipfr_tos = ip->ip_tos;
160         frag.ipfr_src.s_addr = ip->ip_src.s_addr;
161         idx += ip->ip_src.s_addr;
162         frag.ipfr_dst.s_addr = ip->ip_dst.s_addr;
163         idx += ip->ip_dst.s_addr;
164         frag.ipfr_ifp = fin->fin_ifp;
165         idx *= 127;
166         idx %= IPFT_SIZE;
167
168         frag.ipfr_optmsk = fin->fin_fi.fi_optmsk & IPF_OPTCOPY;
169         frag.ipfr_secmsk = fin->fin_fi.fi_secmsk;
170         frag.ipfr_auth = fin->fin_fi.fi_auth;
171
172         /*
173          * first, make sure it isn't already there...
174          */
175         for (fp = &table[idx]; (fra = *fp); fp = &fra->ipfr_next)
176                 if (!bcmp((char *)&frag.ipfr_src, (char *)&fra->ipfr_src,
177                           IPFR_CMPSZ)) {
178                         ATOMIC_INCL(ipfr_stats.ifs_exists);
179                         return NULL;
180                 }
181
182         /*
183          * allocate some memory, if possible, if not, just record that we
184          * failed to do so.
185          */
186         KMALLOC(fra, ipfr_t *);
187         if (fra == NULL) {
188                 ATOMIC_INCL(ipfr_stats.ifs_nomem);
189                 return NULL;
190         }
191
192         if ((fra->ipfr_rule = fin->fin_fr) != NULL) {
193                 ATOMIC_INC32(fin->fin_fr->fr_ref);
194         }
195
196
197         /*
198          * Instert the fragment into the fragment table, copy the struct used
199          * in the search using bcopy rather than reassign each field.
200          * Set the ttl to the default.
201          */
202         if ((fra->ipfr_next = table[idx]))
203                 table[idx]->ipfr_prev = fra;
204         fra->ipfr_prev = NULL;
205         fra->ipfr_data = NULL;
206         table[idx] = fra;
207         bcopy((char *)&frag.ipfr_src, (char *)&fra->ipfr_src, IPFR_CMPSZ);
208         fra->ipfr_ttl = fr_ipfrttl;
209         /*
210          * Compute the offset of the expected start of the next packet.
211          */
212         off = ip->ip_off & IP_OFFMASK;
213         if (!off)
214                 fra->ipfr_seen0 = 1;
215         fra->ipfr_off = off + (fin->fin_dlen >> 3);
216         ATOMIC_INCL(ipfr_stats.ifs_new);
217         ATOMIC_INC32(ipfr_inuse);
218         return fra;
219 }
220
221
222 int ipfr_newfrag(ip, fin)
223 ip_t *ip;
224 fr_info_t *fin;
225 {
226         ipfr_t  *ipf;
227
228         if ((ip->ip_v != 4) || (fr_frag_lock))
229                 return -1;
230         WRITE_ENTER(&ipf_frag);
231         ipf = ipfr_new(ip, fin, ipfr_heads);
232         RWLOCK_EXIT(&ipf_frag);
233         if (ipf == NULL) {
234                 ATOMIC_INCL(frstats[fin->fin_out].fr_bnfr);
235                 return -1;
236         }
237         ATOMIC_INCL(frstats[fin->fin_out].fr_nfr);
238         return 0;
239 }
240
241
242 int ipfr_nat_newfrag(ip, fin, nat)
243 ip_t *ip;
244 fr_info_t *fin;
245 nat_t *nat;
246 {
247         ipfr_t  *ipf;
248         int off;
249
250         if ((ip->ip_v != 4) || (fr_frag_lock))
251                 return -1;
252
253         off = fin->fin_off;
254         off <<= 3;
255         if ((off + fin->fin_dlen) > 0xffff || (fin->fin_dlen == 0))
256                 return -1;
257
258         WRITE_ENTER(&ipf_natfrag);
259         ipf = ipfr_new(ip, fin, ipfr_nattab);
260         if (ipf != NULL) {
261                 ipf->ipfr_data = nat;
262                 nat->nat_data = ipf;
263         }
264         RWLOCK_EXIT(&ipf_natfrag);
265         return ipf ? 0 : -1;
266 }
267
268
269 /*
270  * check the fragment cache to see if there is already a record of this packet
271  * with its filter result known.
272  */
273 static ipfr_t *ipfr_lookup(ip, fin, table)
274 ip_t *ip;
275 fr_info_t *fin;
276 ipfr_t *table[];
277 {
278         ipfr_t  *f, frag;
279         u_int idx;
280  
281         /*
282          * For fragments, we record protocol, packet id, TOS and both IP#'s
283          * (these should all be the same for all fragments of a packet).
284          *
285          * build up a hash value to index the table with.
286          */
287         frag.ipfr_p = ip->ip_p;
288         idx = ip->ip_p;
289         frag.ipfr_id = ip->ip_id;
290         idx += ip->ip_id;
291         frag.ipfr_tos = ip->ip_tos;
292         frag.ipfr_src.s_addr = ip->ip_src.s_addr;
293         idx += ip->ip_src.s_addr;
294         frag.ipfr_dst.s_addr = ip->ip_dst.s_addr;
295         idx += ip->ip_dst.s_addr;
296         frag.ipfr_ifp = fin->fin_ifp;
297         idx *= 127;
298         idx %= IPFT_SIZE;
299
300         frag.ipfr_optmsk = fin->fin_fi.fi_optmsk & IPF_OPTCOPY;
301         frag.ipfr_secmsk = fin->fin_fi.fi_secmsk;
302         frag.ipfr_auth = fin->fin_fi.fi_auth;
303
304         /*
305          * check the table, careful to only compare the right amount of data
306          */
307         for (f = table[idx]; f; f = f->ipfr_next)
308                 if (!bcmp((char *)&frag.ipfr_src, (char *)&f->ipfr_src,
309                           IPFR_CMPSZ)) {
310                         u_short atoff, off;
311
312                         off = fin->fin_off;
313
314                         /*
315                          * XXX - We really need to be guarding against the
316                          * retransmission of (src,dst,id,offset-range) here
317                          * because a fragmented packet is never resent with
318                          * the same IP ID#.
319                          */
320                         if (f->ipfr_seen0) {
321                                 if (!off || (fin->fin_fl & FI_SHORT))
322                                         continue;
323                         } else if (!off)
324                                 f->ipfr_seen0 = 1;
325
326                         if (f != table[idx]) {
327                                 /*
328                                  * move fragment info. to the top of the list
329                                  * to speed up searches.
330                                  */
331                                 if ((f->ipfr_prev->ipfr_next = f->ipfr_next))
332                                         f->ipfr_next->ipfr_prev = f->ipfr_prev;
333                                 f->ipfr_next = table[idx];
334                                 table[idx]->ipfr_prev = f;
335                                 f->ipfr_prev = NULL;
336                                 table[idx] = f;
337                         }
338                         atoff = off + (fin->fin_dlen >> 3);
339                         /*
340                          * If we've follwed the fragments, and this is the
341                          * last (in order), shrink expiration time.
342                          */
343                         if (off == f->ipfr_off) {
344                                 if (!(ip->ip_off & IP_MF))
345                                         f->ipfr_ttl = 1;
346                                 else
347                                         f->ipfr_off = atoff;
348                         }
349                         ATOMIC_INCL(ipfr_stats.ifs_hits);
350                         return f;
351                 }
352         return NULL;
353 }
354
355
356 /*
357  * functional interface for NAT lookups of the NAT fragment cache
358  */
359 nat_t *ipfr_nat_knownfrag(ip, fin)
360 ip_t *ip;
361 fr_info_t *fin;
362 {
363         ipfr_t *ipf;
364         nat_t *nat;
365         int off;
366
367         if ((fin->fin_v != 4) || (fr_frag_lock))
368                 return NULL;
369
370         off = fin->fin_off;
371         off <<= 3;
372         if ((off + fin->fin_dlen) > 0xffff || (fin->fin_dlen == 0))
373                 return NULL;
374
375         READ_ENTER(&ipf_natfrag);
376         ipf = ipfr_lookup(ip, fin, ipfr_nattab);
377         if (ipf != NULL) {
378                 nat = ipf->ipfr_data;
379                 /*
380                  * This is the last fragment for this packet.
381                  */
382                 if ((ipf->ipfr_ttl == 1) && (nat != NULL)) {
383                         nat->nat_data = NULL;
384                         ipf->ipfr_data = NULL;
385                 }
386         } else
387                 nat = NULL;
388         RWLOCK_EXIT(&ipf_natfrag);
389         return nat;
390 }
391
392
393 /*
394  * functional interface for normal lookups of the fragment cache
395  */
396 frentry_t *ipfr_knownfrag(ip, fin)
397 ip_t *ip;
398 fr_info_t *fin;
399 {
400         frentry_t *fr;
401         ipfr_t *fra;
402         int off;
403
404         if ((fin->fin_v != 4) || (fr_frag_lock))
405                 return NULL;
406
407         off = fin->fin_off;
408         off <<= 3;
409         if ((off + fin->fin_dlen) > 0xffff || (fin->fin_dlen == 0))
410                 return NULL;
411
412         READ_ENTER(&ipf_frag);
413         fra = ipfr_lookup(ip, fin, ipfr_heads);
414         if (fra != NULL)
415                 fr = fra->ipfr_rule;
416         else
417                 fr = NULL;
418         RWLOCK_EXIT(&ipf_frag);
419         return fr;
420 }
421
422
423 /*
424  * forget any references to this external object.
425  */
426 void ipfr_forget(nat)
427 void *nat;
428 {
429         ipfr_t  *fr;
430         int     idx;
431
432         WRITE_ENTER(&ipf_natfrag);
433         for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
434                 for (fr = ipfr_heads[idx]; fr; fr = fr->ipfr_next)
435                         if (fr->ipfr_data == nat)
436                                 fr->ipfr_data = NULL;
437
438         RWLOCK_EXIT(&ipf_natfrag);
439 }
440
441
442 static void ipfr_delete(fra)
443 ipfr_t *fra;
444 {
445         frentry_t *fr;
446
447         fr = fra->ipfr_rule;
448         if (fr != NULL) {
449                 ATOMIC_DEC32(fr->fr_ref);
450                 if (fr->fr_ref == 0)
451                         KFREE(fr);
452         }
453         if (fra->ipfr_prev)
454                 fra->ipfr_prev->ipfr_next = fra->ipfr_next;
455         if (fra->ipfr_next)
456                 fra->ipfr_next->ipfr_prev = fra->ipfr_prev;
457         KFREE(fra);
458 }
459
460
461 /*
462  * Free memory in use by fragment state info. kept.
463  */
464 void ipfr_unload()
465 {
466         ipfr_t  **fp, *fra;
467         nat_t   *nat;
468         int     idx;
469
470         WRITE_ENTER(&ipf_frag);
471         for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
472                 for (fp = &ipfr_heads[idx]; (fra = *fp); ) {
473                         *fp = fra->ipfr_next;
474                         ipfr_delete(fra);
475                 }
476         RWLOCK_EXIT(&ipf_frag);
477
478         WRITE_ENTER(&ipf_nat);
479         WRITE_ENTER(&ipf_natfrag);
480         for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
481                 for (fp = &ipfr_nattab[idx]; (fra = *fp); ) {
482                         *fp = fra->ipfr_next;
483                         nat = fra->ipfr_data;
484                         if (nat != NULL) {
485                                 if (nat->nat_data == fra)
486                                         nat->nat_data = NULL;
487                         }
488                         ipfr_delete(fra);
489                 }
490         RWLOCK_EXIT(&ipf_natfrag);
491         RWLOCK_EXIT(&ipf_nat);
492 }
493
494
495 void ipfr_fragexpire()
496 {
497         ipfr_t  **fp, *fra;
498         nat_t   *nat;
499         int     idx;
500 #if defined(_KERNEL)
501 # if !SOLARIS
502         int     s;
503 # endif
504 #endif
505
506         if (fr_frag_lock)
507                 return;
508
509         SPL_NET(s);
510         WRITE_ENTER(&ipf_frag);
511
512         /*
513          * Go through the entire table, looking for entries to expire,
514          * decreasing the ttl by one for each entry.  If it reaches 0,
515          * remove it from the chain and free it.
516          */
517         for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
518                 for (fp = &ipfr_heads[idx]; (fra = *fp); ) {
519                         --fra->ipfr_ttl;
520                         if (fra->ipfr_ttl == 0) {
521                                 *fp = fra->ipfr_next;
522                                 ipfr_delete(fra);
523                                 ATOMIC_INCL(ipfr_stats.ifs_expire);
524                                 ATOMIC_DEC32(ipfr_inuse);
525                         } else
526                                 fp = &fra->ipfr_next;
527                 }
528         RWLOCK_EXIT(&ipf_frag);
529
530         /*
531          * Same again for the NAT table, except that if the structure also
532          * still points to a NAT structure, and the NAT structure points back
533          * at the one to be free'd, NULL the reference from the NAT struct.
534          * NOTE: We need to grab both mutex's early, and in this order so as
535          * to prevent a deadlock if both try to expire at the same time.
536          */
537         WRITE_ENTER(&ipf_nat);
538         WRITE_ENTER(&ipf_natfrag);
539         for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
540                 for (fp = &ipfr_nattab[idx]; (fra = *fp); ) {
541                         --fra->ipfr_ttl;
542                         if (fra->ipfr_ttl == 0) {
543                                 ATOMIC_INCL(ipfr_stats.ifs_expire);
544                                 ATOMIC_DEC32(ipfr_inuse);
545                                 nat = fra->ipfr_data;
546                                 if (nat != NULL) {
547                                         if (nat->nat_data == fra)
548                                                 nat->nat_data = NULL;
549                                 }
550                                 *fp = fra->ipfr_next;
551                                 ipfr_delete(fra);
552                         } else
553                                 fp = &fra->ipfr_next;
554                 }
555         RWLOCK_EXIT(&ipf_natfrag);
556         RWLOCK_EXIT(&ipf_nat);
557         SPL_X(s);
558 }
559
560
561 /*
562  * Slowly expire held state for fragments.  Timeouts are set * in expectation
563  * of this being called twice per second.
564  */
565 #ifdef _KERNEL
566 # if (BSD >= 199306) || SOLARIS || defined(__sgi)
567 #  if defined(SOLARIS2) && (SOLARIS2 < 7)
568 void ipfr_slowtimer()
569 #  else
570 void ipfr_slowtimer __P((void *ptr))
571 #  endif
572 # else
573 int ipfr_slowtimer()
574 # endif
575 #else
576 void ipfr_slowtimer()
577 #endif
578 {
579 #if defined(_KERNEL) && SOLARIS
580         extern  int     fr_running;
581
582         if (fr_running <= 0) 
583                 return;
584         READ_ENTER(&ipf_solaris);
585 #endif
586
587 #if defined(__sgi) && defined(_KERNEL)
588         ipfilter_sgi_intfsync();
589 #endif
590
591         ipfr_fragexpire();
592         fr_timeoutstate();
593         ip_natexpire();
594         fr_authexpire();
595 #if defined(_KERNEL)
596 # if    SOLARIS
597         ipfr_timer_id = timeout(ipfr_slowtimer, NULL, drv_usectohz(500000));
598         RWLOCK_EXIT(&ipf_solaris);
599 # else
600 #  if defined(__NetBSD__) && (__NetBSD_Version__ >= 104240000)
601         callout_reset(&ipfr_slowtimer_ch, hz / 2, ipfr_slowtimer, NULL);
602 #  else
603 #   if (__FreeBSD_version >= 300000)
604         ipfr_slowtimer_ch = timeout(ipfr_slowtimer, NULL, hz/2);
605 #   else
606 #    if defined(__OpenBSD__)
607         timeout_add(&ipfr_slowtimer_ch, hz/2);
608 #    else
609         timeout(ipfr_slowtimer, NULL, hz/2);
610 #    endif
611 #   endif
612 #   if (BSD < 199306) && !defined(__sgi)
613         return 0;
614 #   endif /* FreeBSD */
615 #  endif /* NetBSD */
616 # endif /* SOLARIS */
617 #endif /* defined(_KERNEL) */
618 }