Rename printf -> kprintf in sys/ and add some defines where necessary
[dragonfly.git] / sys / contrib / ipfilter / netinet / ip_nat.c
1 /*
2  * Copyright (C) 1995-2001 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Added redirect stuff and a LOT of bug fixes. (mcn@EnGarde.com)
7  *
8  * @(#)ip_nat.c     1.11 6/5/96 (C) 1995 Darren Reed
9  * @(#)$Id: ip_nat.c,v 2.37.2.70 2002/08/28 12:45:48 darrenr Exp $
10  * $FreeBSD: src/sys/contrib/ipfilter/netinet/ip_nat.c,v 1.22.2.8 2004/07/04 09:24:39 darrenr Exp $
11  * $DragonFly: src/sys/contrib/ipfilter/netinet/ip_nat.c,v 1.12 2006/12/23 00:27:02 swildner Exp $
12  */
13 #if (defined(__DragonFly__) || defined(__FreeBSD__)) && defined(KERNEL) && !defined(_KERNEL)
14 #define _KERNEL
15 #endif
16
17 #if defined(__sgi) && (IRIX > 602)
18 # include <sys/ptimers.h>
19 #endif
20 #include <sys/errno.h>
21 #include <sys/types.h>
22 #include <sys/param.h>
23 #include <sys/time.h>
24 #include <sys/file.h>
25 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
26     defined(_KERNEL)
27 # include "opt_ipfilter_log.h"
28 #endif
29 #if !defined(_KERNEL) && !defined(KERNEL)
30 # include <stdio.h>
31 # include <string.h>
32 # include <stdlib.h>
33 #endif
34 #if (defined(KERNEL) || defined(_KERNEL)) && (defined(__DragonFly__) || __FreeBSD_version >= 220000)
35 # include <sys/filio.h>
36 # include <sys/fcntl.h>
37 #else
38 # include <sys/ioctl.h>
39 #endif
40 #include <sys/fcntl.h>
41 #ifndef linux
42 # include <sys/protosw.h>
43 #endif
44 #include <sys/socket.h>
45 #if defined(_KERNEL) && !defined(linux)
46 # include <sys/systm.h>
47 #endif
48 #if !defined(__SVR4) && !defined(__svr4__)
49 # ifndef linux
50 #  include <sys/mbuf.h>
51 # endif
52 #else
53 # include <sys/filio.h>
54 # include <sys/byteorder.h>
55 # ifdef _KERNEL
56 #  include <sys/dditypes.h>
57 # endif
58 # include <sys/stream.h>
59 # include <sys/kmem.h>
60 #endif
61 #if defined(__DragonFly__) || __FreeBSD_version >= 300000
62 # include <sys/queue.h>
63 #endif
64 #if defined(__DragonFly__) && defined(_KERNEL)
65 # include <sys/thread2.h>
66 #endif
67 #include <net/if.h>
68 #if defined(__DragonFly__) || __FreeBSD_version >= 300000
69 # include <net/if_var.h>
70 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
71 #  include "opt_ipfilter.h"
72 # endif
73 #endif
74 #ifdef sun
75 # include <net/af.h>
76 #endif
77 #include <net/route.h>
78 #include <netinet/in.h>
79 #include <netinet/in_systm.h>
80 #include <netinet/ip.h>
81
82 #ifdef __sgi
83 # ifdef IFF_DRVRLOCK /* IRIX6 */
84 #include <sys/hashing.h>
85 #include <netinet/in_var.h>
86 # endif
87 #endif
88
89 #ifdef RFC1825
90 # include <vpn/md5.h>
91 # include <vpn/ipsec.h>
92 extern struct ifnet vpnif;
93 #endif
94
95 #ifndef linux
96 # include <netinet/ip_var.h>
97 # include <netinet/tcp_fsm.h>
98 #endif
99 #include <netinet/tcp.h>
100 #include <netinet/udp.h>
101 #include <netinet/ip_icmp.h>
102 #include "ip_compat.h"
103 #include <netinet/tcpip.h>
104 #include "ip_fil.h"
105 #include "ip_nat.h"
106 #include "ip_frag.h"
107 #include "ip_state.h"
108 #include "ip_proxy.h"
109 #if defined(__DragonFly__) || __FreeBSD_version >= 300000
110 # include <sys/malloc.h>
111 #endif
112 #ifndef MIN
113 # define        MIN(a,b)        (((a)<(b))?(a):(b))
114 #endif
115 #undef  SOCKADDR_IN
116 #define SOCKADDR_IN     struct sockaddr_in
117
118 #ifndef _KERNEL
119 # define kprintf        printf
120 #endif
121
122 static const char sccsid[] = "@(#)ip_nat.c     1.11 6/5/96 (C) 1995 Darren Reed";
123
124 nat_t   **nat_table[2] = { NULL, NULL },
125         *nat_instances = NULL;
126 ipnat_t *nat_list = NULL;
127 u_int   ipf_nattable_max = NAT_TABLE_MAX;
128 u_int   ipf_nattable_sz = NAT_TABLE_SZ;
129 u_int   ipf_natrules_sz = NAT_SIZE;
130 u_int   ipf_rdrrules_sz = RDR_SIZE;
131 u_int   ipf_hostmap_sz = HOSTMAP_SIZE;
132 u_32_t  nat_masks = 0;
133 u_32_t  rdr_masks = 0;
134 ipnat_t **nat_rules = NULL;
135 ipnat_t **rdr_rules = NULL;
136 hostmap_t       **maptable  = NULL;
137
138 u_long  fr_defnatage = DEF_NAT_AGE,
139         fr_defnaticmpage = 6;           /* 3 seconds */
140 natstat_t nat_stats;
141 int     fr_nat_lock = 0;
142 #if     (SOLARIS || defined(__sgi)) && defined(_KERNEL)
143 extern  kmutex_t        ipf_rw;
144 extern  KRWLOCK_T       ipf_nat;
145 #endif
146
147 static  int     nat_flushtable (void);
148 static  void    nat_addnat (struct ipnat *);
149 static  void    nat_addrdr (struct ipnat *);
150 static  void    nat_delete (struct nat *);
151 static  void    nat_delrdr (struct ipnat *);
152 static  void    nat_delnat (struct ipnat *);
153 static  int     fr_natgetent (caddr_t);
154 static  int     fr_natgetsz (caddr_t);
155 static  int     fr_natputent (caddr_t);
156 static  void    nat_tabmove (fr_info_t *, nat_t *);
157 static  int     nat_match (fr_info_t *, ipnat_t *, ip_t *);
158 static  hostmap_t *nat_hostmap (ipnat_t *, struct in_addr,
159                                     struct in_addr);
160 static  void    nat_hostmapdel (struct hostmap *);
161 static  void    nat_mssclamp (tcphdr_t *, u_32_t, fr_info_t *, u_short *);
162
163
164 int nat_init()
165 {
166         KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
167         if (nat_table[0] != NULL)
168                 bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *));
169         else
170                 return -1;
171
172         KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
173         if (nat_table[1] != NULL)
174                 bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *));
175         else
176                 return -1;
177
178         KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz);
179         if (nat_rules != NULL)
180                 bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *));
181         else
182                 return -1;
183
184         KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz);
185         if (rdr_rules != NULL)
186                 bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *));
187         else
188                 return -1;
189
190         KMALLOCS(maptable, hostmap_t **, sizeof(hostmap_t *) * ipf_hostmap_sz);
191         if (maptable != NULL)
192                 bzero((char *)maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
193         else
194                 return -1;
195         return 0;
196 }
197
198
199 static void nat_addrdr(n)
200 ipnat_t *n;
201 {
202         ipnat_t **np;
203         u_32_t j;
204         u_int hv;
205         int k;
206
207         k = countbits(n->in_outmsk);
208         if ((k >= 0) && (k != 32))
209                 rdr_masks |= 1 << k;
210         j = (n->in_outip & n->in_outmsk);
211         hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz);
212         np = rdr_rules + hv;
213         while (*np != NULL)
214                 np = &(*np)->in_rnext;
215         n->in_rnext = NULL;
216         n->in_prnext = np;
217         *np = n;
218 }
219
220
221 static void nat_addnat(n)
222 ipnat_t *n;
223 {
224         ipnat_t **np;
225         u_32_t j;
226         u_int hv;
227         int k;
228
229         k = countbits(n->in_inmsk);
230         if ((k >= 0) && (k != 32))
231                 nat_masks |= 1 << k;
232         j = (n->in_inip & n->in_inmsk);
233         hv = NAT_HASH_FN(j, 0, ipf_natrules_sz);
234         np = nat_rules + hv;
235         while (*np != NULL)
236                 np = &(*np)->in_mnext;
237         n->in_mnext = NULL;
238         n->in_pmnext = np;
239         *np = n;
240 }
241
242
243 static void nat_delrdr(n)
244 ipnat_t *n;
245 {
246         if (n->in_rnext)
247                 n->in_rnext->in_prnext = n->in_prnext;
248         *n->in_prnext = n->in_rnext;
249 }
250
251
252 static void nat_delnat(n)
253 ipnat_t *n;
254 {
255         if (n->in_mnext)
256                 n->in_mnext->in_pmnext = n->in_pmnext;
257         *n->in_pmnext = n->in_mnext;
258 }
259
260
261 /*
262  * check if an ip address has already been allocated for a given mapping that
263  * is not doing port based translation.
264  *
265  * Must be called with ipf_nat held as a write lock.
266  */
267 static struct hostmap *nat_hostmap(np, real, map)
268 ipnat_t *np;
269 struct in_addr real;
270 struct in_addr map;
271 {
272         hostmap_t *hm;
273         u_int hv;
274
275         hv = real.s_addr % HOSTMAP_SIZE;
276         for (hm = maptable[hv]; hm; hm = hm->hm_next)
277                 if ((hm->hm_realip.s_addr == real.s_addr) &&
278                     (np == hm->hm_ipnat)) {
279                         hm->hm_ref++;
280                         return hm;
281                 }
282
283         KMALLOC(hm, hostmap_t *);
284         if (hm) {
285                 hm->hm_next = maptable[hv];
286                 hm->hm_pnext = maptable + hv;
287                 if (maptable[hv])
288                         maptable[hv]->hm_pnext = &hm->hm_next;
289                 maptable[hv] = hm;
290                 hm->hm_ipnat = np;
291                 hm->hm_realip = real;
292                 hm->hm_mapip = map;
293                 hm->hm_ref = 1;
294         }
295         return hm;
296 }
297
298
299 /*
300  * Must be called with ipf_nat held as a write lock.
301  */
302 static void nat_hostmapdel(hm)
303 struct hostmap *hm;
304 {
305         ATOMIC_DEC32(hm->hm_ref);
306         if (hm->hm_ref == 0) {
307                 if (hm->hm_next)
308                         hm->hm_next->hm_pnext = hm->hm_pnext;
309                 *hm->hm_pnext = hm->hm_next;
310                 KFREE(hm);
311         }
312 }
313
314
315 void fix_outcksum(fin, sp, n)
316 fr_info_t *fin;
317 u_short *sp;
318 u_32_t n;
319 {
320         u_short sumshort;
321         u_32_t sum1;
322
323         if (!n)
324                 return;
325         else if (n & NAT_HW_CKSUM) {
326                 n &= 0xffff;
327                 n += fin->fin_dlen;
328                 n = (n & 0xffff) + (n >> 16);
329                 *sp = n & 0xffff;
330                 return;
331         }
332         sum1 = (~ntohs(*sp)) & 0xffff;
333         sum1 += (n);
334         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
335         /* Again */
336         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
337         sumshort = ~(u_short)sum1;
338         *(sp) = htons(sumshort);
339 }
340
341
342 void fix_incksum(fin, sp, n)
343 fr_info_t *fin;
344 u_short *sp;
345 u_32_t n;
346 {
347         u_short sumshort;
348         u_32_t sum1;
349
350         if (!n)
351                 return;
352         else if (n & NAT_HW_CKSUM) {
353                 n &= 0xffff;
354                 n += fin->fin_dlen;
355                 n = (n & 0xffff) + (n >> 16);
356                 *sp = n & 0xffff;
357                 return;
358         }
359 #ifdef sparc
360         sum1 = (~(*sp)) & 0xffff;
361 #else
362         sum1 = (~ntohs(*sp)) & 0xffff;
363 #endif
364         sum1 += ~(n) & 0xffff;
365         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
366         /* Again */
367         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
368         sumshort = ~(u_short)sum1;
369         *(sp) = htons(sumshort);
370 }
371
372
373 /*
374  * fix_datacksum is used *only* for the adjustments of checksums in the data
375  * section of an IP packet.
376  *
377  * The only situation in which you need to do this is when NAT'ing an 
378  * ICMP error message. Such a message, contains in its body the IP header
379  * of the original IP packet, that causes the error.
380  *
381  * You can't use fix_incksum or fix_outcksum in that case, because for the
382  * kernel the data section of the ICMP error is just data, and no special 
383  * processing like hardware cksum or ntohs processing have been done by the 
384  * kernel on the data section.
385  */
386 void fix_datacksum(sp, n)
387 u_short *sp;
388 u_32_t n;
389 {
390         u_short sumshort;
391          u_32_t sum1;
392
393         if (!n)
394                 return;
395
396         sum1 = (~ntohs(*sp)) & 0xffff;
397         sum1 += (n);
398         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
399         /* Again */
400         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
401         sumshort = ~(u_short)sum1;
402         *(sp) = htons(sumshort);
403 }
404
405 /*
406  * How the NAT is organised and works.
407  *
408  * Inside (interface y) NAT       Outside (interface x)
409  * -------------------- -+- -------------------------------------
410  * Packet going          |   out, processsed by ip_natout() for x
411  * ------------>         |   ------------>
412  * src=10.1.1.1          |   src=192.1.1.1
413  *                       |
414  *                       |   in, processed by ip_natin() for x
415  * <------------         |   <------------
416  * dst=10.1.1.1          |   dst=192.1.1.1
417  * -------------------- -+- -------------------------------------
418  * ip_natout() - changes ip_src and if required, sport
419  *             - creates a new mapping, if required.
420  * ip_natin()  - changes ip_dst and if required, dport
421  *
422  * In the NAT table, internal source is recorded as "in" and externally
423  * seen as "out".
424  */
425
426 /*
427  * Handle ioctls which manipulate the NAT.
428  */
429 int nat_ioctl(data, cmd, mode)
430 #if defined(__DragonFly__) || defined(__NetBSD__) || defined(__OpenBSD__) || (__FreeBSD_version >= 300003)
431 u_long cmd;
432 #else
433 int cmd;
434 #endif
435 caddr_t data;
436 int mode;
437 {
438         ipnat_t *nat, *nt, *n = NULL, **np = NULL;
439         int error = 0, ret, arg, getlock;
440         ipnat_t natd;
441         u_32_t i, j;
442
443 #if (BSD >= 199306) && defined(_KERNEL)
444         if ((securelevel >= 3) && (mode & FWRITE))
445                 return EPERM;
446 #endif
447
448         nat = NULL;     /* XXX gcc -Wuninitialized */
449         KMALLOC(nt, ipnat_t *);
450         getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
451         if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT)) {
452                 if (mode & NAT_SYSSPACE) {
453                         bcopy(data, (char *)&natd, sizeof(natd));
454                         error = 0;
455                 } else {
456                         error = IRCOPYPTR(data, (char *)&natd, sizeof(natd));
457                 }
458         } else if (cmd == SIOCIPFFL) {  /* SIOCFLNAT & SIOCCNATL */
459                 error = IRCOPY(data, (char *)&arg, sizeof(arg));
460                 if (error)
461                         error = EFAULT;
462         }
463
464         if (error)
465                 goto done;
466
467         /*
468          * For add/delete, look to see if the NAT entry is already present
469          */
470         if (getlock == 1) {
471                 WRITE_ENTER(&ipf_nat);
472         }
473         if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT)) {
474                 nat = &natd;
475                 nat->in_flags &= IPN_USERFLAGS;
476                 if ((nat->in_redir & NAT_MAPBLK) == 0) {
477                         if ((nat->in_flags & IPN_SPLIT) == 0)
478                                 nat->in_inip &= nat->in_inmsk;
479                         if ((nat->in_flags & IPN_IPRANGE) == 0)
480                                 nat->in_outip &= nat->in_outmsk;
481                 }
482                 for (np = &nat_list; (n = *np); np = &n->in_next)
483                         if (!bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
484                                         IPN_CMPSIZ)) {
485                                 if (n->in_redir == NAT_REDIRECT &&
486                                     n->in_pnext != nat->in_pnext)
487                                         continue;
488                                 break;
489                         }
490         }
491
492         switch (cmd)
493         {
494 #ifdef  IPFILTER_LOG
495         case SIOCIPFFB :
496         {
497                 int tmp;
498
499                 if (!(mode & FWRITE))
500                         error = EPERM;
501                 else {
502                         tmp = ipflog_clear(IPL_LOGNAT);
503                         IWCOPY((char *)&tmp, (char *)data, sizeof(tmp));
504                 }
505                 break;
506         }
507 #endif
508         case SIOCADNAT :
509                 if (!(mode & FWRITE)) {
510                         error = EPERM;
511                         break;
512                 }
513                 if (n) {
514                         error = EEXIST;
515                         break;
516                 }
517                 if (nt == NULL) {
518                         error = ENOMEM;
519                         break;
520                 }
521                 n = nt;
522                 nt = NULL;
523                 bcopy((char *)nat, (char *)n, sizeof(*n));
524                 n->in_ifp = (void *)GETUNIT(n->in_ifname, 4);
525                 if (!n->in_ifp)
526                         n->in_ifp = (void *)-1;
527                 if (n->in_plabel[0] != '\0') {
528                         n->in_apr = appr_lookup(n->in_p, n->in_plabel);
529                         if (!n->in_apr) {
530                                 error = ENOENT;
531                                 break;
532                         }
533                 }
534                 n->in_next = NULL;
535                 *np = n;
536
537                 if (n->in_redir & NAT_REDIRECT) {
538                         n->in_flags &= ~IPN_NOTDST;
539                         nat_addrdr(n);
540                 }
541                 if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
542                         n->in_flags &= ~IPN_NOTSRC;
543                         nat_addnat(n);
544                 }
545
546                 n->in_use = 0;
547                 if (n->in_redir & NAT_MAPBLK)
548                         n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
549                 else if (n->in_flags & IPN_AUTOPORTMAP)
550                         n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
551                 else if (n->in_flags & IPN_IPRANGE)
552                         n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
553                 else if (n->in_flags & IPN_SPLIT)
554                         n->in_space = 2;
555                 else
556                         n->in_space = ~ntohl(n->in_outmsk);
557                 /*
558                  * Calculate the number of valid IP addresses in the output
559                  * mapping range.  In all cases, the range is inclusive of
560                  * the start and ending IP addresses.
561                  * If to a CIDR address, lose 2: broadcast + network address
562                  *                               (so subtract 1)
563                  * If to a range, add one.
564                  * If to a single IP address, set to 1.
565                  */
566                 if (n->in_space) {
567                         if ((n->in_flags & IPN_IPRANGE) != 0)
568                                 n->in_space += 1;
569                         else
570                                 n->in_space -= 1;
571                 } else
572                         n->in_space = 1;
573                 if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
574                     ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
575                         n->in_nip = ntohl(n->in_outip) + 1;
576                 else if ((n->in_flags & IPN_SPLIT) &&
577                          (n->in_redir & NAT_REDIRECT))
578                         n->in_nip = ntohl(n->in_inip);
579                 else
580                         n->in_nip = ntohl(n->in_outip);
581                 if (n->in_redir & NAT_MAP) {
582                         n->in_pnext = ntohs(n->in_pmin);
583                         /*
584                          * Multiply by the number of ports made available.
585                          */
586                         if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
587                                 n->in_space *= (ntohs(n->in_pmax) -
588                                                 ntohs(n->in_pmin) + 1);
589                                 /*
590                                  * Because two different sources can map to
591                                  * different destinations but use the same
592                                  * local IP#/port #.
593                                  * If the result is smaller than in_space, then
594                                  * we may have wrapped around 32bits.
595                                  */
596                                 i = n->in_inmsk;
597                                 if ((i != 0) && (i != 0xffffffff)) {
598                                         j = n->in_space * (~ntohl(i) + 1);
599                                         if (j >= n->in_space)
600                                                 n->in_space = j;
601                                         else
602                                                 n->in_space = 0xffffffff;
603                                 }
604                         }
605                         /*
606                          * If no protocol is specified, multiple by 256.
607                          */
608                         if ((n->in_flags & IPN_TCPUDP) == 0) {
609                                         j = n->in_space * 256;
610                                         if (j >= n->in_space)
611                                                 n->in_space = j;
612                                         else
613                                                 n->in_space = 0xffffffff;
614                         }
615                 }
616                 /* Otherwise, these fields are preset */
617                 n = NULL;
618                 nat_stats.ns_rules++;
619                 break;
620         case SIOCRMNAT :
621                 if (!(mode & FWRITE)) {
622                         error = EPERM;
623                         n = NULL;
624                         break;
625                 }
626                 if (!n) {
627                         error = ESRCH;
628                         break;
629                 }
630                 if (n->in_redir & NAT_REDIRECT)
631                         nat_delrdr(n);
632                 if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
633                         nat_delnat(n);
634                 if (nat_list == NULL) {
635                         nat_masks = 0;
636                         rdr_masks = 0;
637                 }
638                 *np = n->in_next;
639                 if (!n->in_use) {
640                         if (n->in_apr)
641                                 appr_free(n->in_apr);
642                         KFREE(n);
643                         nat_stats.ns_rules--;
644                 } else {
645                         n->in_flags |= IPN_DELETE;
646                         n->in_next = NULL;
647                 }
648                 n = NULL;
649                 break;
650         case SIOCGNATS :
651                 MUTEX_DOWNGRADE(&ipf_nat);
652                 nat_stats.ns_table[0] = nat_table[0];
653                 nat_stats.ns_table[1] = nat_table[1];
654                 nat_stats.ns_list = nat_list;
655                 nat_stats.ns_maptable = maptable;
656                 nat_stats.ns_nattab_sz = ipf_nattable_sz;
657                 nat_stats.ns_rultab_sz = ipf_natrules_sz;
658                 nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz;
659                 nat_stats.ns_hostmap_sz = ipf_hostmap_sz;
660                 nat_stats.ns_instances = nat_instances;
661                 nat_stats.ns_apslist = ap_sess_list;
662                 error = IWCOPYPTR((char *)&nat_stats, (char *)data,
663                                   sizeof(nat_stats));
664                 break;
665         case SIOCGNATL :
666             {
667                 natlookup_t nl;
668
669                 MUTEX_DOWNGRADE(&ipf_nat);
670                 error = IRCOPYPTR((char *)data, (char *)&nl, sizeof(nl));
671                 if (error)
672                         break;
673
674                 if (nat_lookupredir(&nl)) {
675                         error = IWCOPYPTR((char *)&nl, (char *)data,
676                                           sizeof(nl));
677                 } else
678                         error = ESRCH;
679                 break;
680             }
681         case SIOCIPFFL :        /* old SIOCFLNAT & SIOCCNATL */
682                 if (!(mode & FWRITE)) {
683                         error = EPERM;
684                         break;
685                 }
686                 error = 0;
687                 if (arg == 0)
688                         ret = nat_flushtable();
689                 else if (arg == 1)
690                         ret = nat_clearlist();
691                 else
692                         error = EINVAL;
693                 MUTEX_DOWNGRADE(&ipf_nat);
694                 if (!error) {
695                         error = IWCOPY((caddr_t)&ret, data, sizeof(ret));
696                         if (error)
697                                 error = EFAULT;
698                 }
699                 break;
700         case SIOCSTLCK :
701                 error = IRCOPY(data, (caddr_t)&arg, sizeof(arg));
702                 if (!error) {
703                         error = IWCOPY((caddr_t)&fr_nat_lock, data,
704                                         sizeof(fr_nat_lock));
705                         if (!error)
706                                 fr_nat_lock = arg;
707                 } else
708                         error = EFAULT;
709                 break;
710         case SIOCSTPUT :
711                 if (fr_nat_lock)
712                         error = fr_natputent(data);
713                 else
714                         error = EACCES;
715                 break;
716         case SIOCSTGSZ :
717                 if (fr_nat_lock)
718                         error = fr_natgetsz(data);
719                 else
720                         error = EACCES;
721                 break;
722         case SIOCSTGET :
723                 if (fr_nat_lock)
724                         error = fr_natgetent(data);
725                 else
726                         error = EACCES;
727                 break;
728         case FIONREAD :
729 #ifdef  IPFILTER_LOG
730                 arg = (int)iplused[IPL_LOGNAT];
731                 MUTEX_DOWNGRADE(&ipf_nat);
732                 error = IWCOPY((caddr_t)&arg, (caddr_t)data, sizeof(arg));
733                 if (error)
734                         error = EFAULT;
735 #endif
736                 break;
737         default :
738                 error = EINVAL;
739                 break;
740         }
741         if (getlock == 1) {
742                 RWLOCK_EXIT(&ipf_nat);                  /* READ/WRITE */
743         }
744 done:
745         if (nt)
746                 KFREE(nt);
747         return error;
748 }
749
750
751 static int fr_natgetsz(data)
752 caddr_t data;
753 {
754         ap_session_t *aps;
755         nat_t *nat, *n;
756         int error = 0;
757         natget_t ng;
758
759         error = IRCOPY(data, (caddr_t)&ng, sizeof(ng));
760         if (error)
761                 return EFAULT;
762
763         nat = ng.ng_ptr;
764         if (!nat) {
765                 nat = nat_instances;
766                 ng.ng_sz = 0;
767                 if (nat == NULL) {
768                         error = IWCOPY((caddr_t)&ng, data, sizeof(ng));
769                         if (error)
770                                 error = EFAULT;
771                         return error;
772                 }
773         } else {
774                 /*
775                  * Make sure the pointer we're copying from exists in the
776                  * current list of entries.  Security precaution to prevent
777                  * copying of random kernel data.
778                  */
779                 for (n = nat_instances; n; n = n->nat_next)
780                         if (n == nat)
781                                 break;
782                 if (!n)
783                         return ESRCH;
784         }
785
786         ng.ng_sz = sizeof(nat_save_t);
787         aps = nat->nat_aps;
788         if ((aps != NULL) && (aps->aps_data != 0)) {
789                 ng.ng_sz += sizeof(ap_session_t);
790                 ng.ng_sz += aps->aps_psiz;
791                 if (aps->aps_psiz > 4)  /* XXX - sizeof(ipn_data) */
792                         ng.ng_sz -= 4;
793         }
794
795         error = IWCOPY((caddr_t)&ng, data, sizeof(ng));
796         if (error)
797                 error = EFAULT;
798         return error;
799 }
800
801
802 static int fr_natgetent(data)
803 caddr_t data;
804 {
805         nat_save_t ipn, *ipnp, *ipnn = NULL;
806         nat_t *n, *nat;
807         ap_session_t *aps;
808         size_t dsz;
809         int error;
810
811         error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp));
812         if (error)
813                 return EFAULT;
814         error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn));
815         if (error)
816                 return EFAULT;
817
818         nat = ipn.ipn_next;
819         if (!nat) {
820                 nat = nat_instances;
821                 if (nat == NULL) {
822                         if (nat_instances == NULL)
823                                 return ENOENT;
824                         return 0;
825                 }
826         } else {
827                 /*
828                  * Make sure the pointer we're copying from exists in the
829                  * current list of entries.  Security precaution to prevent
830                  * copying of random kernel data.
831                  */
832                 for (n = nat_instances; n; n = n->nat_next)
833                         if (n == nat)
834                                 break;
835                 if (!n)
836                         return ESRCH;
837         }
838
839         ipn.ipn_next = nat->nat_next;
840         bcopy((char *)nat, (char *)&ipn.ipn_nat, sizeof(ipn.ipn_nat));
841         ipn.ipn_nat.nat_data = NULL;
842
843         if (nat->nat_ptr) {
844                 bcopy((char *)nat->nat_ptr, (char *)&ipn.ipn_ipnat,
845                       sizeof(ipn.ipn_ipnat));
846         }
847
848         if (nat->nat_fr)
849                 bcopy((char *)nat->nat_fr, (char *)&ipn.ipn_rule,
850                       sizeof(ipn.ipn_rule));
851
852         if ((aps = nat->nat_aps)) {
853                 dsz = sizeof(*aps);
854                 if (aps->aps_data)
855                         dsz += aps->aps_psiz;
856                 ipn.ipn_dsize = dsz;
857                 if (dsz > sizeof(ipn.ipn_data))
858                         dsz -= sizeof(ipn.ipn_data);
859                 KMALLOCS(ipnn, nat_save_t *, sizeof(*ipnn) + dsz);
860                 if (ipnn == NULL)
861                         return ENOMEM;
862                 bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn));
863
864                 bcopy((char *)aps, (char *)ipnn->ipn_data, sizeof(*aps));
865                 if (aps->aps_data) {
866                         bcopy(aps->aps_data, ipnn->ipn_data + sizeof(*aps),
867                               aps->aps_psiz);
868                 }
869                 error = IWCOPY((caddr_t)ipnn, ipnp,
870                                sizeof(ipn) + dsz);
871                 if (error)
872                         error = EFAULT;
873                 KFREES(ipnn, sizeof(*ipnn) + dsz);
874         } else {
875                 ipn.ipn_dsize = 0;
876                 error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn));
877                 if (error)
878                         error = EFAULT;
879         }
880         return error;
881 }
882
883
884 static int fr_natputent(data)
885 caddr_t data;
886 {
887         nat_save_t ipn, *ipnp, *ipnn = NULL;
888         nat_t *n, *nat;
889         ap_session_t *aps;
890         frentry_t *fr;
891         ipnat_t *in;
892
893         int error;
894
895         error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp));
896         if (error)
897                 return EFAULT;
898         error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn));
899         if (error)
900                 return EFAULT;
901         nat = NULL;
902         if (ipn.ipn_dsize) {
903                 KMALLOCS(ipnn, nat_save_t *, sizeof(*ipnn) + ipn.ipn_dsize);
904                 if (ipnn == NULL)
905                         return ENOMEM;
906                 bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn));
907                 error = IRCOPY((caddr_t)ipnp + offsetof(nat_save_t, ipn_data),
908                                (caddr_t)ipnn->ipn_data, ipn.ipn_dsize);
909                 if (error) {
910                         error = EFAULT;
911                         goto junkput;
912                 }
913         } else
914                 ipnn = NULL;
915
916         KMALLOC(nat, nat_t *);
917         if (nat == NULL) {
918                 error = EFAULT;
919                 goto junkput;
920         }
921
922         bcopy((char *)&ipn.ipn_nat, (char *)nat, sizeof(*nat));
923         /*
924          * Initialize all these so that nat_delete() doesn't cause a crash.
925          */
926         nat->nat_phnext[0] = NULL;
927         nat->nat_phnext[1] = NULL;
928         fr = nat->nat_fr;
929         nat->nat_fr = NULL;
930         aps = nat->nat_aps;
931         nat->nat_aps = NULL;
932         in = nat->nat_ptr;
933         nat->nat_ptr = NULL;
934         nat->nat_hm = NULL;
935         nat->nat_data = NULL;
936         nat->nat_ifp = GETUNIT(nat->nat_ifname, 4);
937
938         /*
939          * Restore the rule associated with this nat session
940          */
941         if (in) {
942                 KMALLOC(in, ipnat_t *);
943                 if (in == NULL) {
944                         error = ENOMEM;
945                         goto junkput;
946                 }
947                 nat->nat_ptr = in;
948                 bcopy((char *)&ipn.ipn_ipnat, (char *)in, sizeof(*in));
949                 in->in_use = 1;
950                 in->in_flags |= IPN_DELETE;
951                 in->in_next = NULL;
952                 in->in_rnext = NULL;
953                 in->in_prnext = NULL;
954                 in->in_mnext = NULL;
955                 in->in_pmnext = NULL;
956                 in->in_ifp = GETUNIT(in->in_ifname, 4);
957                 if (in->in_plabel[0] != '\0') {
958                         in->in_apr = appr_lookup(in->in_p, in->in_plabel);
959                 }
960         }
961
962         /*
963          * Restore ap_session_t structure.  Include the private data allocated
964          * if it was there.
965          */
966         if (aps) {
967                 KMALLOC(aps, ap_session_t *);
968                 if (aps == NULL) {
969                         error = ENOMEM;
970                         goto junkput;
971                 }
972                 nat->nat_aps = aps;
973                 aps->aps_next = ap_sess_list;
974                 ap_sess_list = aps;
975                 bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
976                 if (in)
977                         aps->aps_apr = in->in_apr;
978                 if (aps->aps_psiz) {
979                         KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
980                         if (aps->aps_data == NULL) {
981                                 error = ENOMEM;
982                                 goto junkput;
983                         }
984                         bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
985                               aps->aps_psiz);
986                 } else {
987                         aps->aps_psiz = 0;
988                         aps->aps_data = NULL;
989                 }
990         }
991
992         /*
993          * If there was a filtering rule associated with this entry then
994          * build up a new one.
995          */
996         if (fr != NULL) {
997                 if (nat->nat_flags & FI_NEWFR) {
998                         KMALLOC(fr, frentry_t *);
999                         nat->nat_fr = fr;
1000                         if (fr == NULL) {
1001                                 error = ENOMEM;
1002                                 goto junkput;
1003                         }
1004                         bcopy((char *)&ipn.ipn_fr, (char *)fr, sizeof(*fr));
1005                         ipn.ipn_nat.nat_fr = fr;
1006                         error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn));
1007                         if (error) {
1008                                 error = EFAULT;
1009                                 goto junkput;
1010                         }
1011                 } else {
1012                         for (n = nat_instances; n; n = n->nat_next)
1013                                 if (n->nat_fr == fr)
1014                                         break;
1015                         if (!n) {
1016                                 error = ESRCH;
1017                                 goto junkput;
1018                         }
1019                 }
1020         }
1021
1022         if (ipnn)
1023                 KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize);
1024         nat_insert(nat);
1025         return 0;
1026 junkput:
1027         if (ipnn)
1028                 KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize);
1029         if (nat)
1030                 nat_delete(nat);
1031         return error;
1032 }
1033
1034
1035 /*
1036  * Delete a nat entry from the various lists and table.
1037  */
1038 static void nat_delete(natd)
1039 struct nat *natd;
1040 {
1041         struct ipnat *ipn;
1042
1043         if (natd->nat_flags & FI_WILDP)
1044                 nat_stats.ns_wilds--;
1045         if (natd->nat_hnext[0])
1046                 natd->nat_hnext[0]->nat_phnext[0] = natd->nat_phnext[0];
1047         *natd->nat_phnext[0] = natd->nat_hnext[0];
1048         if (natd->nat_hnext[1])
1049                 natd->nat_hnext[1]->nat_phnext[1] = natd->nat_phnext[1];
1050         *natd->nat_phnext[1] = natd->nat_hnext[1];
1051         if (natd->nat_me != NULL)
1052                 *natd->nat_me = NULL;
1053
1054         if (natd->nat_fr != NULL) {
1055                 ATOMIC_DEC32(natd->nat_fr->fr_ref);
1056         }
1057
1058         if (natd->nat_hm != NULL)
1059                 nat_hostmapdel(natd->nat_hm);
1060
1061         /*
1062          * If there is an active reference from the nat entry to its parent
1063          * rule, decrement the rule's reference count and free it too if no
1064          * longer being used.
1065          */
1066         ipn = natd->nat_ptr;
1067         if (ipn != NULL) {
1068                 ipn->in_space++;
1069                 ipn->in_use--;
1070                 if (!ipn->in_use && (ipn->in_flags & IPN_DELETE)) {
1071                         if (ipn->in_apr)
1072                                 appr_free(ipn->in_apr);
1073                         KFREE(ipn);
1074                         nat_stats.ns_rules--;
1075                 }
1076         }
1077
1078         MUTEX_DESTROY(&natd->nat_lock);
1079         /*
1080          * If there's a fragment table entry too for this nat entry, then
1081          * dereference that as well.
1082          */
1083         ipfr_forgetnat((void *)natd);
1084         aps_free(natd->nat_aps);
1085         nat_stats.ns_inuse--;
1086         KFREE(natd);
1087 }
1088
1089
1090 /*
1091  * nat_flushtable - clear the NAT table of all mapping entries.
1092  * (this is for the dynamic mappings)
1093  */
1094 static int nat_flushtable()
1095 {
1096         nat_t *nat, **natp;
1097         int j = 0;
1098
1099         /*
1100          * ALL NAT mappings deleted, so lets just make the deletions
1101          * quicker.
1102          */
1103         if (nat_table[0] != NULL)
1104                 bzero((char *)nat_table[0],
1105                       sizeof(nat_table[0]) * ipf_nattable_sz);
1106         if (nat_table[1] != NULL)
1107                 bzero((char *)nat_table[1],
1108                       sizeof(nat_table[1]) * ipf_nattable_sz);
1109
1110         for (natp = &nat_instances; (nat = *natp); ) {
1111                 *natp = nat->nat_next;
1112 #ifdef  IPFILTER_LOG
1113                 nat_log(nat, NL_FLUSH);
1114 #endif
1115                 nat_delete(nat);
1116                 j++;
1117         }
1118         nat_stats.ns_inuse = 0;
1119         return j;
1120 }
1121
1122
1123 /*
1124  * nat_clearlist - delete all rules in the active NAT mapping list.
1125  * (this is for NAT/RDR rules)
1126  */
1127 int nat_clearlist()
1128 {
1129         ipnat_t *n, **np = &nat_list;
1130         int i = 0;
1131
1132         if (nat_rules != NULL)
1133                 bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz);
1134         if (rdr_rules != NULL)
1135                 bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz);
1136
1137         while ((n = *np)) {
1138                 *np = n->in_next;
1139                 if (!n->in_use) {
1140                         if (n->in_apr)
1141                                 appr_free(n->in_apr);
1142                         KFREE(n);
1143                         nat_stats.ns_rules--;
1144                 } else {
1145                         n->in_flags |= IPN_DELETE;
1146                         n->in_next = NULL;
1147                 }
1148                 i++;
1149         }
1150         nat_masks = 0;
1151         rdr_masks = 0;
1152         return i;
1153 }
1154
1155
1156 /*
1157  * Create a new NAT table entry.
1158  * NOTE: Assumes write lock on ipf_nat has been obtained already.
1159  *       If you intend on changing this, beware: appr_new() may call nat_new()
1160  *       recursively!
1161  */
1162 nat_t *nat_new(fin, ip, np, natsave, flags, direction)
1163 fr_info_t *fin;
1164 ip_t *ip;
1165 ipnat_t *np;
1166 nat_t **natsave;
1167 u_int flags;
1168 int direction;
1169 {
1170         u_32_t sum1, sum2, sumd, l;
1171         u_short port = 0, sport = 0, dport = 0, nport = 0;
1172         struct in_addr in, inb;
1173         u_short nflags, sp, dp;
1174         tcphdr_t *tcp = NULL;
1175         hostmap_t *hm = NULL;
1176         nat_t *nat, *natl;
1177 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
1178         qif_t *qf = fin->fin_qif;
1179 #endif
1180
1181         if (nat_stats.ns_inuse >= ipf_nattable_max) {
1182                 nat_stats.ns_memfail++;
1183                 return NULL;
1184         }
1185
1186         nflags = flags & np->in_flags;
1187         if (flags & IPN_TCPUDP) {
1188                 tcp = (tcphdr_t *)fin->fin_dp;
1189                 sport = htons(fin->fin_data[0]);
1190                 dport = htons(fin->fin_data[1]);
1191         }
1192
1193         /* Give me a new nat */
1194         KMALLOC(nat, nat_t *);
1195         if (nat == NULL) {
1196                 nat_stats.ns_memfail++;
1197                 /*
1198                  * Try to automatically tune the max # of entries in the
1199                  * table allowed to be less than what will cause kmem_alloc()
1200                  * to fail and try to eliminate panics due to out of memory
1201                  * conditions arising.
1202                  */
1203                 if (ipf_nattable_max > ipf_nattable_sz) {
1204                         ipf_nattable_max = nat_stats.ns_inuse - 100;
1205                         kprintf("ipf_nattable_max reduced to %d\n",
1206                                 ipf_nattable_max);
1207                 }
1208                 return NULL;
1209         }
1210
1211         bzero((char *)nat, sizeof(*nat));
1212         nat->nat_tcpstate[0] = TCPS_CLOSED;
1213         nat->nat_tcpstate[1] = TCPS_CLOSED;
1214         nat->nat_flags = flags;
1215         if (flags & FI_WILDP)
1216                 nat_stats.ns_wilds++;
1217         /*
1218          * Search the current table for a match.
1219          */
1220         if (direction == NAT_OUTBOUND) {
1221                 /*
1222                  * Values at which the search for a free resouce starts.
1223                  */
1224                 u_32_t st_ip;
1225                 u_short st_port;
1226
1227                 /*
1228                  * If it's an outbound packet which doesn't match any existing
1229                  * record, then create a new port
1230                  */
1231                 l = 0;
1232                 st_ip = np->in_nip;
1233                 st_port = np->in_pnext;
1234
1235                 do {
1236                         port = 0;
1237                         in.s_addr = htonl(np->in_nip);
1238                         if (l == 0) {
1239                                 /*
1240                                  * Check to see if there is an existing NAT
1241                                  * setup for this IP address pair.
1242                                  */
1243                                 hm = nat_hostmap(np, fin->fin_src, in);
1244                                 if (hm != NULL)
1245                                         in.s_addr = hm->hm_mapip.s_addr;
1246                         } else if ((l == 1) && (hm != NULL)) {
1247                                 nat_hostmapdel(hm);
1248                                 hm = NULL;
1249                         }
1250                         in.s_addr = ntohl(in.s_addr);
1251
1252                         nat->nat_hm = hm;
1253
1254                         if ((np->in_outmsk == 0xffffffff) &&
1255                             (np->in_pnext == 0)) {
1256                                 if (l > 0)
1257                                         goto badnat;
1258                         }
1259
1260                         if (np->in_redir & NAT_MAPBLK) {
1261                                 if ((l >= np->in_ppip) || ((l > 0) &&
1262                                      !(flags & IPN_TCPUDP)))
1263                                         goto badnat;
1264                                 /*
1265                                  * map-block - Calculate destination address.
1266                                  */
1267                                 in.s_addr = ntohl(fin->fin_saddr);
1268                                 in.s_addr &= ntohl(~np->in_inmsk);
1269                                 inb.s_addr = in.s_addr;
1270                                 in.s_addr /= np->in_ippip;
1271                                 in.s_addr &= ntohl(~np->in_outmsk);
1272                                 in.s_addr += ntohl(np->in_outip);
1273                                 /*
1274                                  * Calculate destination port.
1275                                  */
1276                                 if ((flags & IPN_TCPUDP) &&
1277                                     (np->in_ppip != 0)) {
1278                                         port = ntohs(sport) + l;
1279                                         port %= np->in_ppip;
1280                                         port += np->in_ppip *
1281                                                 (inb.s_addr % np->in_ippip);
1282                                         port += MAPBLK_MINPORT;
1283                                         port = htons(port);
1284                                 }
1285                         } else if (!np->in_outip &&
1286                                    (np->in_outmsk == 0xffffffff)) {
1287                                 /*
1288                                  * 0/32 - use the interface's IP address.
1289                                  */
1290                                 if ((l > 0) ||
1291                                     fr_ifpaddr(4, fin->fin_ifp, &in) == -1)
1292                                         goto badnat;
1293                                 in.s_addr = ntohl(in.s_addr);
1294                         } else if (!np->in_outip && !np->in_outmsk) {
1295                                 /*
1296                                  * 0/0 - use the original source address/port.
1297                                  */
1298                                 if (l > 0)
1299                                         goto badnat;
1300                                 in.s_addr = ntohl(fin->fin_saddr);
1301                         } else if ((np->in_outmsk != 0xffffffff) &&
1302                                    (np->in_pnext == 0) &&
1303                                    ((l > 0) || (hm == NULL)))
1304                                 np->in_nip++;
1305                         natl = NULL;
1306
1307                         if ((nflags & IPN_TCPUDP) &&
1308                             ((np->in_redir & NAT_MAPBLK) == 0) &&
1309                             (np->in_flags & IPN_AUTOPORTMAP)) {
1310                                 if ((l > 0) && (l % np->in_ppip == 0)) {
1311                                         if (l > np->in_space) {
1312                                                 goto badnat;
1313                                         } else if ((l > np->in_ppip) &&
1314                                                    np->in_outmsk != 0xffffffff)
1315                                                 np->in_nip++;
1316                                 }
1317                                 if (np->in_ppip != 0) {
1318                                         port = ntohs(sport);
1319                                         port += (l % np->in_ppip);
1320                                         port %= np->in_ppip;
1321                                         port += np->in_ppip *
1322                                                 (ntohl(fin->fin_saddr) %
1323                                                  np->in_ippip);
1324                                         port += MAPBLK_MINPORT;
1325                                         port = htons(port);
1326                                 }
1327                         } else if (((np->in_redir & NAT_MAPBLK) == 0) &&
1328                                    (nflags & IPN_TCPUDP) &&
1329                                    (np->in_pnext != 0)) {
1330                                 port = htons(np->in_pnext++);
1331                                 if (np->in_pnext > ntohs(np->in_pmax)) {
1332                                         np->in_pnext = ntohs(np->in_pmin);
1333                                         if (np->in_outmsk != 0xffffffff)
1334                                                 np->in_nip++;
1335                                 }
1336                         }
1337
1338                         if (np->in_flags & IPN_IPRANGE) {
1339                                 if (np->in_nip > ntohl(np->in_outmsk))
1340                                         np->in_nip = ntohl(np->in_outip);
1341                         } else {
1342                                 if ((np->in_outmsk != 0xffffffff) &&
1343                                     ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
1344                                     ntohl(np->in_outip))
1345                                         np->in_nip = ntohl(np->in_outip) + 1;
1346                         }
1347
1348                         if (!port && (flags & IPN_TCPUDP))
1349                                 port = sport;
1350
1351                         /*
1352                          * Here we do a lookup of the connection as seen from
1353                          * the outside.  If an IP# pair already exists, try
1354                          * again.  So if you have A->B becomes C->B, you can
1355                          * also have D->E become C->E but not D->B causing
1356                          * another C->B.  Also take protocol and ports into
1357                          * account when determining whether a pre-existing
1358                          * NAT setup will cause an external conflict where
1359                          * this is appropriate.
1360                          */
1361                         inb.s_addr = htonl(in.s_addr);
1362                         sp = fin->fin_data[0];
1363                         dp = fin->fin_data[1];
1364                         fin->fin_data[0] = fin->fin_data[1];
1365                         fin->fin_data[1] = htons(port);
1366                         natl = nat_inlookup(fin, flags & ~FI_WILDP,
1367                                             (u_int)fin->fin_p, fin->fin_dst,
1368                                             inb, 1);
1369                         fin->fin_data[0] = sp;
1370                         fin->fin_data[1] = dp;
1371
1372                         /*
1373                          * Has the search wrapped around and come back to the
1374                          * start ?
1375                          */
1376                         if ((natl != NULL) &&
1377                             (np->in_pnext != 0) && (st_port == np->in_pnext) &&
1378                             (np->in_nip != 0) && (st_ip == np->in_nip))
1379                                 goto badnat;
1380                         l++;
1381                 } while (natl != NULL);
1382
1383                 if (np->in_space > 0)
1384                         np->in_space--;
1385
1386                 /* Setup the NAT table */
1387                 nat->nat_inip = fin->fin_src;
1388                 nat->nat_outip.s_addr = htonl(in.s_addr);
1389                 nat->nat_oip = fin->fin_dst;
1390                 if (nat->nat_hm == NULL)
1391                         nat->nat_hm = nat_hostmap(np, fin->fin_src,
1392                                                   nat->nat_outip);
1393
1394                 sum1 = LONG_SUM(ntohl(fin->fin_saddr)) + ntohs(sport);
1395                 sum2 = LONG_SUM(in.s_addr) + ntohs(port);
1396
1397                 if (flags & IPN_TCPUDP) {
1398                         nat->nat_inport = sport;
1399                         nat->nat_outport = port;        /* sport */
1400                         nat->nat_oport = dport;
1401                 }
1402         } else {
1403                 /*
1404                  * Otherwise, it's an inbound packet. Most likely, we don't
1405                  * want to rewrite source ports and source addresses. Instead,
1406                  * we want to rewrite to a fixed internal address and fixed
1407                  * internal port.
1408                  */
1409                 if (np->in_flags & IPN_SPLIT) {
1410                         in.s_addr = np->in_nip;
1411                         if (np->in_inip == htonl(in.s_addr))
1412                                 np->in_nip = ntohl(np->in_inmsk);
1413                         else {
1414                                 np->in_nip = ntohl(np->in_inip);
1415                                 if (np->in_flags & IPN_ROUNDR) {
1416                                         nat_delrdr(np);
1417                                         nat_addrdr(np);
1418                                 }
1419                         }
1420                 } else {
1421                         in.s_addr = ntohl(np->in_inip);
1422                         if (np->in_flags & IPN_ROUNDR) {
1423                                 nat_delrdr(np);
1424                                 nat_addrdr(np);
1425                         }
1426                 }
1427                 if (!np->in_pnext)
1428                         nport = dport;
1429                 else {
1430                         /*
1431                          * Whilst not optimized for the case where
1432                          * pmin == pmax, the gain is not significant.
1433                          */
1434                         if (np->in_pmin != np->in_pmax) {
1435                                 nport = ntohs(dport) - ntohs(np->in_pmin) +
1436                                         ntohs(np->in_pnext);
1437                                 nport = ntohs(nport);
1438                         } else
1439                                 nport = np->in_pnext;
1440                 }
1441
1442                 /*
1443                  * When the redirect-to address is set to 0.0.0.0, just
1444                  * assume a blank `forwarding' of the packet.
1445                  */
1446                 if (in.s_addr == 0)
1447                         in.s_addr = ntohl(fin->fin_daddr);
1448
1449                 nat->nat_inip.s_addr = htonl(in.s_addr);
1450                 nat->nat_outip = fin->fin_dst;
1451                 nat->nat_oip = fin->fin_src;
1452
1453                 sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport);
1454                 sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
1455
1456                 if (flags & IPN_TCPUDP) {
1457                         nat->nat_inport = nport;
1458                         nat->nat_outport = dport;
1459                         nat->nat_oport = sport;
1460                 }
1461         }
1462
1463         CALC_SUMD(sum1, sum2, sumd);
1464         nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
1465 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
1466         if ((flags & IPN_TCP) && dohwcksum &&
1467             (qf->qf_ill->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
1468                 if (direction == NAT_OUTBOUND)
1469                         sum1 = LONG_SUM(ntohl(in.s_addr));
1470                 else
1471                         sum1 = LONG_SUM(ntohl(fin->fin_saddr));
1472                 sum1 += LONG_SUM(ntohl(fin->fin_daddr));
1473                 sum1 += IPPROTO_TCP;
1474                 sum1 = (sum1 & 0xffff) + (sum1 >> 16);
1475                 nat->nat_sumd[1] = NAT_HW_CKSUM|(sum1 & 0xffff);
1476         } else
1477 #endif
1478                 nat->nat_sumd[1] = nat->nat_sumd[0];
1479
1480         if ((flags & IPN_TCPUDP) && ((sport != port) || (dport != nport))) {
1481                 if (direction == NAT_OUTBOUND)
1482                         sum1 = LONG_SUM(ntohl(fin->fin_saddr));
1483                 else
1484                         sum1 = LONG_SUM(ntohl(fin->fin_daddr));
1485
1486                 sum2 = LONG_SUM(in.s_addr);
1487
1488                 CALC_SUMD(sum1, sum2, sumd);
1489                 nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
1490         } else
1491                 nat->nat_ipsumd = nat->nat_sumd[0];
1492
1493         in.s_addr = htonl(in.s_addr);
1494
1495         strncpy(nat->nat_ifname, IFNAME(fin->fin_ifp), IFNAMSIZ);
1496
1497         nat->nat_me = natsave;
1498         nat->nat_dir = direction;
1499         nat->nat_ifp = fin->fin_ifp;
1500         nat->nat_ptr = np;
1501         nat->nat_p = fin->fin_p;
1502         nat->nat_bytes = 0;
1503         nat->nat_pkts = 0;
1504         nat->nat_mssclamp = np->in_mssclamp;
1505         nat->nat_fr = fin->fin_fr;
1506         if (nat->nat_fr != NULL) {
1507                 ATOMIC_INC32(nat->nat_fr->fr_ref);
1508         }
1509         if (direction == NAT_OUTBOUND) {
1510                 if (flags & IPN_TCPUDP)
1511                         tcp->th_sport = port;
1512         } else {
1513                 if (flags & IPN_TCPUDP)
1514                         tcp->th_dport = nport;
1515         }
1516
1517         nat_insert(nat);
1518
1519         if ((np->in_apr != NULL) && (np->in_dport == 0 ||
1520             (tcp != NULL && dport == np->in_dport)))
1521                 (void) appr_new(fin, ip, nat);
1522
1523         np->in_use++;
1524 #ifdef  IPFILTER_LOG
1525         nat_log(nat, (u_int)np->in_redir);
1526 #endif
1527         return nat;
1528 badnat:
1529         nat_stats.ns_badnat++;
1530         if ((hm = nat->nat_hm) != NULL)
1531                 nat_hostmapdel(hm);
1532         KFREE(nat);
1533         return NULL;
1534 }
1535
1536
1537 /*
1538  * Insert a NAT entry into the hash tables for searching and add it to the
1539  * list of active NAT entries.  Adjust global counters when complete.
1540  */
1541 void    nat_insert(nat)
1542 nat_t   *nat;
1543 {
1544         u_int hv1, hv2;
1545         nat_t **natp;
1546
1547         MUTEX_INIT(&nat->nat_lock, "nat entry lock", NULL);
1548
1549         nat->nat_age = fr_defnatage;
1550         nat->nat_ifname[sizeof(nat->nat_ifname) - 1] = '\0';
1551         if (nat->nat_ifname[0] !='\0') {
1552                 nat->nat_ifp = GETUNIT(nat->nat_ifname, 4);
1553         }
1554
1555         nat->nat_next = nat_instances;
1556         nat_instances = nat;
1557
1558         if (!(nat->nat_flags & (FI_W_SPORT|FI_W_DPORT))) {
1559                 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
1560                                   0xffffffff);
1561                 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
1562                                   ipf_nattable_sz);
1563                 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
1564                                   0xffffffff);
1565                 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
1566                                  ipf_nattable_sz);
1567         } else {
1568                 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, nat->nat_inip.s_addr,
1569                                   ipf_nattable_sz);
1570                 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, nat->nat_outip.s_addr,
1571                                   ipf_nattable_sz);
1572         }
1573
1574         natp = &nat_table[0][hv1];
1575         if (*natp)
1576                 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
1577         nat->nat_phnext[0] = natp;
1578         nat->nat_hnext[0] = *natp;
1579         *natp = nat;
1580
1581         natp = &nat_table[1][hv2];
1582         if (*natp)
1583                 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
1584         nat->nat_phnext[1] = natp;
1585         nat->nat_hnext[1] = *natp;
1586         *natp = nat;
1587
1588         nat_stats.ns_added++;
1589         nat_stats.ns_inuse++;
1590 }
1591
1592
1593 nat_t *nat_icmplookup(ip, fin, dir)
1594 ip_t *ip;
1595 fr_info_t *fin;
1596 int dir;
1597 {
1598         icmphdr_t *icmp;
1599         tcphdr_t *tcp = NULL;
1600         ip_t *oip;
1601         int flags = 0, type, minlen;
1602
1603         icmp = (icmphdr_t *)fin->fin_dp;
1604         /*
1605          * Does it at least have the return (basic) IP header ?
1606          * Only a basic IP header (no options) should be with an ICMP error
1607          * header.
1608          */
1609         if ((ip->ip_hl != 5) || (ip->ip_len < ICMPERR_MINPKTLEN))
1610                 return NULL;
1611         type = icmp->icmp_type;
1612         /*
1613          * If it's not an error type, then return.
1614          */
1615         if ((type != ICMP_UNREACH) && (type != ICMP_SOURCEQUENCH) &&
1616             (type != ICMP_REDIRECT) && (type != ICMP_TIMXCEED) &&
1617             (type != ICMP_PARAMPROB))
1618                 return NULL;
1619
1620         oip = (ip_t *)((char *)fin->fin_dp + 8);
1621         minlen = (oip->ip_hl << 2);
1622         if (minlen < sizeof(ip_t))
1623                 return NULL;
1624         if (ip->ip_len < ICMPERR_IPICMPHLEN + minlen)
1625                 return NULL;
1626         /*
1627          * Is the buffer big enough for all of it ?  It's the size of the IP
1628          * header claimed in the encapsulated part which is of concern.  It
1629          * may be too big to be in this buffer but not so big that it's
1630          * outside the ICMP packet, leading to TCP deref's causing problems.
1631          * This is possible because we don't know how big oip_hl is when we
1632          * do the pullup early in fr_check() and thus can't gaurantee it is
1633          * all here now.
1634          */
1635 #ifdef  _KERNEL
1636         {
1637         mb_t *m;
1638
1639 # if SOLARIS
1640         m = fin->fin_qfm;
1641         if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
1642                 return NULL;
1643 # else
1644         m = *(mb_t **)fin->fin_mp;
1645         if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
1646             (char *)ip + m->m_len)
1647                 return NULL;
1648 # endif
1649         }
1650 #endif
1651
1652         if (oip->ip_p == IPPROTO_TCP)
1653                 flags = IPN_TCP;
1654         else if (oip->ip_p == IPPROTO_UDP)
1655                 flags = IPN_UDP;
1656         if (flags & IPN_TCPUDP) {
1657                 u_short data[2];
1658                 nat_t *nat;
1659
1660                 minlen += 8;            /* + 64bits of data to get ports */
1661                 if (ip->ip_len < ICMPERR_IPICMPHLEN + minlen)
1662                         return NULL;
1663
1664                 data[0] = fin->fin_data[0];
1665                 data[1] = fin->fin_data[1];
1666                 tcp = (tcphdr_t *)((char *)oip + (oip->ip_hl << 2));
1667                 fin->fin_data[0] = ntohs(tcp->th_dport);
1668                 fin->fin_data[1] = ntohs(tcp->th_sport);
1669
1670                 if (dir == NAT_INBOUND) {
1671                         nat = nat_inlookup(fin, flags, (u_int)oip->ip_p,
1672                                             oip->ip_dst, oip->ip_src, 0);
1673                 } else {
1674                         nat = nat_outlookup(fin, flags, (u_int)oip->ip_p,
1675                                             oip->ip_dst, oip->ip_src, 0);
1676                 }
1677                 fin->fin_data[0] = data[0];
1678                 fin->fin_data[1] = data[1];
1679                 return nat;
1680         }
1681         if (dir == NAT_INBOUND)
1682                 return nat_inlookup(fin, 0, (u_int)oip->ip_p,
1683                                     oip->ip_dst, oip->ip_src, 0);
1684         else
1685                 return nat_outlookup(fin, 0, (u_int)oip->ip_p,
1686                                     oip->ip_dst, oip->ip_src, 0);
1687 }
1688
1689
1690 /*
1691  * This should *ONLY* be used for incoming packets to make sure a NAT'd ICMP
1692  * packet gets correctly recognised.
1693  */
1694 nat_t *nat_icmp(ip, fin, nflags, dir)
1695 ip_t *ip;
1696 fr_info_t *fin;
1697 u_int *nflags;
1698 int dir;
1699 {
1700         u_32_t sum1, sum2, sumd, sumd2 = 0;
1701         struct in_addr in;
1702         int flags, dlen;
1703         icmphdr_t *icmp;
1704         udphdr_t *udp;
1705         tcphdr_t *tcp;
1706         nat_t *nat;
1707         ip_t *oip;
1708
1709         if ((fin->fin_fl & FI_SHORT) || (fin->fin_off != 0))
1710                 return NULL;
1711         /*
1712          * nat_icmplookup() will return NULL for `defective' packets.
1713          */
1714         if ((ip->ip_v != 4) || !(nat = nat_icmplookup(ip, fin, dir)))
1715                 return NULL;
1716
1717         flags = 0;
1718         sumd2 = 0;
1719         *nflags = IPN_ICMPERR;
1720         icmp = (icmphdr_t *)fin->fin_dp;
1721         oip = (ip_t *)&icmp->icmp_ip;
1722         if (oip->ip_p == IPPROTO_TCP)
1723                 flags = IPN_TCP;
1724         else if (oip->ip_p == IPPROTO_UDP)
1725                 flags = IPN_UDP;
1726         udp = (udphdr_t *)((((char *)oip) + (oip->ip_hl << 2)));
1727         dlen = ip->ip_len - ((char *)udp - (char *)ip);
1728         /*
1729          * XXX - what if this is bogus hl and we go off the end ?
1730          * In this case, nat_icmplookup() will have returned NULL.
1731          */
1732         tcp = (tcphdr_t *)udp;
1733
1734         /*
1735          * Need to adjust ICMP header to include the real IP#'s and
1736          * port #'s.  Only apply a checksum change relative to the
1737          * IP address change as it will be modified again in ip_natout
1738          * for both address and port.  Two checksum changes are
1739          * necessary for the two header address changes.  Be careful
1740          * to only modify the checksum once for the port # and twice
1741          * for the IP#.
1742          */
1743
1744         /*
1745          * Step 1
1746          * Fix the IP addresses in the offending IP packet. You also need
1747          * to adjust the IP header checksum of that offending IP packet
1748          * and the ICMP checksum of the ICMP error message itself.
1749          *
1750          * Unfortunately, for UDP and TCP, the IP addresses are also contained
1751          * in the pseudo header that is used to compute the UDP resp. TCP
1752          * checksum. So, we must compensate that as well. Even worse, the
1753          * change in the UDP and TCP checksums require yet another
1754          * adjustment of the ICMP checksum of the ICMP error message.
1755          *
1756          */
1757
1758         if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) {
1759                 sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
1760                 in = nat->nat_inip;
1761                 oip->ip_src = in;
1762         } else {
1763                 sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
1764                 in = nat->nat_outip;
1765                 oip->ip_dst = in;
1766         }
1767
1768         sum2 = LONG_SUM(ntohl(in.s_addr));
1769
1770         CALC_SUMD(sum1, sum2, sumd);
1771
1772         /*
1773          * Fix IP checksum of the offending IP packet to adjust for
1774          * the change in the IP address.
1775          *
1776          * Normally, you would expect that the ICMP checksum of the 
1777          * ICMP error message needs to be adjusted as well for the
1778          * IP address change in oip.
1779          * However, this is a NOP, because the ICMP checksum is 
1780          * calculated over the complete ICMP packet, which includes the
1781          * changed oip IP addresses and oip->ip_sum. However, these 
1782          * two changes cancel each other out (if the delta for
1783          * the IP address is x, then the delta for ip_sum is minus x), 
1784          * so no change in the icmp_cksum is necessary.
1785          *
1786          * Be careful that nat_dir refers to the direction of the
1787          * offending IP packet (oip), not to its ICMP response (icmp)
1788          */
1789         fix_datacksum(&oip->ip_sum, sumd);
1790         /* Fix icmp cksum : IP Addr + Cksum */
1791
1792         /*
1793          * Fix UDP pseudo header checksum to compensate for the
1794          * IP address change.
1795          */
1796         if ((oip->ip_p == IPPROTO_UDP) && (dlen >= 8) && udp->uh_sum) {
1797                 /*
1798                  * The UDP checksum is optional, only adjust it 
1799                  * if it has been set.
1800                  */
1801                 sum1 = ntohs(udp->uh_sum);
1802                 fix_datacksum(&udp->uh_sum, sumd);
1803                 sum2 = ntohs(udp->uh_sum);
1804
1805                 /*
1806                  * Fix ICMP checksum to compensate the UDP 
1807                  * checksum adjustment.
1808                  */
1809                 sumd2 = sumd << 1;
1810                 CALC_SUMD(sum1, sum2, sumd);
1811                 sumd2 += sumd;
1812         }
1813
1814         /*
1815          * Fix TCP pseudo header checksum to compensate for the 
1816          * IP address change. Before we can do the change, we
1817          * must make sure that oip is sufficient large to hold
1818          * the TCP checksum (normally it does not!).
1819          */
1820         else if ((oip->ip_p == IPPROTO_TCP) && (dlen >= 18)) {
1821                 sum1 = ntohs(tcp->th_sum);
1822                 fix_datacksum(&tcp->th_sum, sumd);
1823                 sum2 = ntohs(tcp->th_sum);
1824
1825                 /*
1826                  * Fix ICMP checksum to compensate the TCP 
1827                  * checksum adjustment.
1828                  */
1829                 sumd2 = sumd << 1;
1830                 CALC_SUMD(sum1, sum2, sumd);
1831                 sumd2 += sumd;
1832         } else {
1833                 sumd2 = (sumd >> 16); 
1834                 if (nat->nat_dir == NAT_OUTBOUND)
1835                         sumd2 = ~sumd2;
1836                 else
1837                         sumd2 = ~sumd2 + 1;
1838         }
1839
1840         if (((flags & IPN_TCPUDP) != 0) && (dlen >= 4)) {
1841                 /*
1842                  * Step 2 :
1843                  * For offending TCP/UDP IP packets, translate the ports as
1844                  * well, based on the NAT specification. Of course such
1845                  * a change must be reflected in the ICMP checksum as well.
1846                  *
1847                  * Advance notice : Now it becomes complicated :-)
1848                  *
1849                  * Since the port fields are part of the TCP/UDP checksum
1850                  * of the offending IP packet, you need to adjust that checksum
1851                  * as well... but, if you change, you must change the icmp
1852                  * checksum *again*, to reflect that change.
1853                  *
1854                  * To further complicate: the TCP checksum is not in the first
1855                  * 8 bytes of the offending ip packet, so it most likely is not
1856                  * available. Some OSses like Solaris return enough bytes to
1857                  * include the TCP checksum. So we have to check if the
1858                  * ip->ip_len actually holds the TCP checksum of the oip!
1859                  */
1860                 if (nat->nat_oport == tcp->th_dport) {
1861                         if (tcp->th_sport != nat->nat_inport) {
1862                                 /*
1863                                  * Fix ICMP checksum to compensate port
1864                                  * adjustment.
1865                                  */
1866                                 sum1 = ntohs(nat->nat_inport);
1867                                 sum2 = ntohs(tcp->th_sport);
1868                                 tcp->th_sport = nat->nat_inport;
1869
1870                                 /*
1871                                  * Fix udp checksum to compensate port
1872                                  * adjustment.  NOTE : the offending IP packet
1873                                  * flows the other direction compared to the
1874                                  * ICMP message.
1875                                  *
1876                                  * The UDP checksum is optional, only adjust
1877                                  * it if it has been set.
1878                                  */
1879                                 if ((oip->ip_p == IPPROTO_UDP) &&
1880                                     (dlen >= 8) && udp->uh_sum) {
1881                                         sumd = sum1 - sum2;
1882                                         sumd2 += sumd;
1883
1884                                         sum1 = ntohs(udp->uh_sum);
1885                                         fix_datacksum(&udp->uh_sum, sumd);
1886                                         sum2 = ntohs(udp->uh_sum);
1887
1888                                         /*
1889                                          * Fix ICMP checksum to compensate
1890                                          * UDP checksum adjustment.
1891                                          */
1892                                         CALC_SUMD(sum1, sum2, sumd);
1893                                         sumd2 += sumd;
1894                                 }
1895
1896                                 /*
1897                                  * Fix tcp checksum (if present) to compensate
1898                                  * port adjustment. NOTE : the offending IP
1899                                  * packet flows the other direction compared to
1900                                  * the ICMP message.
1901                                  */
1902                                 if (oip->ip_p == IPPROTO_TCP) {
1903                                         if (dlen >= 18) {
1904                                                 sumd = sum1 - sum2;
1905                                                 sumd2 += sumd;
1906
1907                                                 sum1 = ntohs(tcp->th_sum);
1908                                                 fix_datacksum(&tcp->th_sum,
1909                                                               sumd);
1910                                                 sum2 = ntohs(tcp->th_sum);
1911
1912                                                 /*
1913                                                  * Fix ICMP checksum to 
1914                                                  * compensate TCP checksum 
1915                                                  * adjustment.
1916                                                  */
1917                                                 CALC_SUMD(sum1, sum2, sumd);
1918                                                 sumd2 += sumd;
1919                                         } else {
1920                                                 sumd = sum2 - sum1 + 1;
1921                                                 sumd2 += sumd;
1922                                         }
1923                                 }
1924                         }
1925                 } else if (tcp->th_dport != nat->nat_outport) {
1926                         /*
1927                          * Fix ICMP checksum to compensate port
1928                          * adjustment.
1929                          */
1930                         sum1 = ntohs(nat->nat_outport);
1931                         sum2 = ntohs(tcp->th_dport);
1932                         tcp->th_dport = nat->nat_outport;
1933
1934                         /*
1935                          * Fix udp checksum to compensate port
1936                          * adjustment.   NOTE : the offending IP
1937                          * packet flows the other direction compared
1938                          * to the ICMP message.
1939                          *
1940                          * The UDP checksum is optional, only adjust
1941                          * it if it has been set.
1942                          */
1943                         if ((oip->ip_p == IPPROTO_UDP) &&
1944                             (dlen >= 8) && udp->uh_sum) {
1945                                 sumd = sum1 - sum2;
1946                                 sumd2 += sumd;
1947
1948                                 sum1 = ntohs(udp->uh_sum);
1949                                 fix_datacksum(&udp->uh_sum, sumd);
1950                                 sum2 = ntohs(udp->uh_sum);
1951
1952                                 /*
1953                                  * Fix ICMP checksum to compensate
1954                                  * UDP checksum adjustment.
1955                                  */
1956                                 CALC_SUMD(sum1, sum2, sumd);
1957                         }
1958
1959                         /*
1960                          * Fix tcp checksum (if present) to compensate
1961                          * port adjustment. NOTE : the offending IP
1962                          * packet flows the other direction compared to
1963                          * the ICMP message.
1964                          */
1965                         if (oip->ip_p == IPPROTO_TCP) {
1966                                 if (dlen >= 18) {
1967                                         sumd = sum1 - sum2;
1968                                         sumd2 += sumd;
1969
1970                                         sum1 = ntohs(tcp->th_sum);
1971                                         fix_datacksum(&tcp->th_sum, sumd);
1972                                         sum2 = ntohs(tcp->th_sum);
1973
1974                                         /*
1975                                          * Fix ICMP checksum to compensate
1976                                          * UDP checksum adjustment.
1977                                          */
1978                                         CALC_SUMD(sum1, sum2, sumd);
1979                                 } else {
1980                                         sumd = sum2 - sum1;
1981                                         if (nat->nat_dir == NAT_OUTBOUND)
1982                                                 sumd++;
1983                                 }
1984                         }
1985                         sumd2 += sumd;
1986                 }
1987                 if (sumd2) {
1988                         sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
1989                         sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
1990                         fix_incksum(fin, &icmp->icmp_cksum, sumd2);
1991                 }
1992         }
1993         if (oip->ip_p == IPPROTO_ICMP)
1994                 nat->nat_age = fr_defnaticmpage;
1995         return nat;
1996 }
1997
1998
1999 /*
2000  * NB: these lookups don't lock access to the list, it assume it has already
2001  * been done!
2002  */
2003 /*
2004  * Lookup a nat entry based on the mapped destination ip address/port and
2005  * real source address/port.  We use this lookup when receiving a packet,
2006  * we're looking for a table entry, based on the destination address.
2007  * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.
2008  */
2009 nat_t *nat_inlookup(fin, flags, p, src, mapdst, rw)
2010 fr_info_t *fin;
2011 u_int flags, p;
2012 struct in_addr src , mapdst;
2013 int rw;
2014 {
2015         u_short sport, dport;
2016         nat_t *nat;
2017         int nflags;
2018         u_32_t dst;
2019         ipnat_t *ipn;
2020         void *ifp;
2021         u_int hv;
2022
2023         if (fin != NULL)
2024                 ifp = fin->fin_ifp;
2025         else
2026                 ifp = NULL;
2027         dst = mapdst.s_addr;
2028         if (flags & IPN_TCPUDP) {
2029                 sport = htons(fin->fin_data[0]);
2030                 dport = htons(fin->fin_data[1]);
2031         } else {
2032                 sport = 0;
2033                 dport = 0;
2034         }
2035
2036         hv = NAT_HASH_FN(dst, dport, 0xffffffff);
2037         hv = NAT_HASH_FN(src.s_addr, hv + sport, ipf_nattable_sz);
2038         nat = nat_table[1][hv];
2039         for (; nat; nat = nat->nat_hnext[1]) {
2040                 nflags = nat->nat_flags;
2041                 if ((!ifp || ifp == nat->nat_ifp) &&
2042                     nat->nat_oip.s_addr == src.s_addr &&
2043                     nat->nat_outip.s_addr == dst &&
2044                     ((p == 0) || (p == nat->nat_p))) {
2045                         switch (p)
2046                         {
2047                         case IPPROTO_TCP :
2048                         case IPPROTO_UDP :
2049                                 if (nat->nat_oport != sport)
2050                                         continue;
2051                                 if (nat->nat_outport != dport)
2052                                         continue;
2053                                 break;
2054                         default :
2055                                 break;
2056                         }
2057
2058                         ipn = nat->nat_ptr;
2059                         if ((ipn != NULL) && (nat->nat_aps != NULL))
2060                                 if (appr_match(fin, nat) != 0)
2061                                         continue;
2062                         return nat;
2063                 }
2064         }
2065         if (!nat_stats.ns_wilds || !(flags & FI_WILDP))
2066                 return NULL;
2067         if (!rw) {
2068                 RWLOCK_EXIT(&ipf_nat);
2069         }
2070         hv = NAT_HASH_FN(dst, 0, 0xffffffff);
2071         hv = NAT_HASH_FN(src.s_addr, dst, ipf_nattable_sz);
2072         if (!rw) {
2073                 WRITE_ENTER(&ipf_nat);
2074         }
2075         nat = nat_table[1][hv];
2076         for (; nat; nat = nat->nat_hnext[1]) {
2077                 nflags = nat->nat_flags;
2078                 if (ifp && ifp != nat->nat_ifp)
2079                         continue;
2080                 if (!(nflags & FI_WILDP))
2081                         continue;
2082                 if (nat->nat_oip.s_addr != src.s_addr ||
2083                     nat->nat_outip.s_addr != dst)
2084                         continue;
2085                 if (((nat->nat_oport == sport) || (nflags & FI_W_DPORT)) &&
2086                     ((nat->nat_outport == dport) || (nflags & FI_W_SPORT))) {
2087                         nat_tabmove(fin, nat);
2088                         break;
2089                 }
2090         }
2091         if (!rw) {
2092                 MUTEX_DOWNGRADE(&ipf_nat);
2093         }
2094         return nat;
2095 }
2096
2097
2098 /*
2099  * This function is only called for TCP/UDP NAT table entries where the
2100  * original was placed in the table without hashing on the ports and we now
2101  * want to include hashing on port numbers.
2102  */
2103 static void nat_tabmove(fin, nat)
2104 fr_info_t *fin;
2105 nat_t *nat;
2106 {
2107         u_short sport, dport;
2108         u_int hv, nflags;
2109         nat_t **natp;
2110
2111         nflags = nat->nat_flags;
2112
2113         sport = ntohs(fin->fin_data[0]);
2114         dport = ntohs(fin->fin_data[1]);
2115
2116         /*
2117          * Remove the NAT entry from the old location
2118          */
2119         if (nat->nat_hnext[0])
2120                 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
2121         *nat->nat_phnext[0] = nat->nat_hnext[0];
2122
2123         if (nat->nat_hnext[1])
2124                 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
2125         *nat->nat_phnext[1] = nat->nat_hnext[1];
2126
2127         /*
2128          * Add into the NAT table in the new position
2129          */
2130         hv = NAT_HASH_FN(nat->nat_inip.s_addr, sport, 0xffffffff);
2131         hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + dport, ipf_nattable_sz);
2132         natp = &nat_table[0][hv];
2133         if (*natp)
2134                 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2135         nat->nat_phnext[0] = natp;
2136         nat->nat_hnext[0] = *natp;
2137         *natp = nat;
2138
2139         hv = NAT_HASH_FN(nat->nat_outip.s_addr, sport, 0xffffffff);
2140         hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + dport, ipf_nattable_sz);
2141         natp = &nat_table[1][hv];
2142         if (*natp)
2143                 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2144         nat->nat_phnext[1] = natp;
2145         nat->nat_hnext[1] = *natp;
2146         *natp = nat;
2147 }
2148
2149
2150 /*
2151  * Lookup a nat entry based on the source 'real' ip address/port and
2152  * destination address/port.  We use this lookup when sending a packet out,
2153  * we're looking for a table entry, based on the source address.
2154  * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.
2155  */
2156 nat_t *nat_outlookup(fin, flags, p, src, dst, rw)
2157 fr_info_t *fin;
2158 u_int flags, p;
2159 struct in_addr src , dst;
2160 int rw;
2161 {
2162         u_short sport, dport;
2163         nat_t *nat;
2164         int nflags;
2165         ipnat_t *ipn;
2166         u_32_t srcip;
2167         void *ifp;
2168         u_int hv;
2169
2170         ifp = fin->fin_ifp;
2171         srcip = src.s_addr;
2172         if (flags & IPN_TCPUDP) {
2173                 sport = ntohs(fin->fin_data[0]);
2174                 dport = ntohs(fin->fin_data[1]);
2175         } else {
2176                 sport = 0;
2177                 dport = 0;
2178         }
2179
2180         hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
2181         hv = NAT_HASH_FN(dst.s_addr, hv + dport, ipf_nattable_sz);
2182         nat = nat_table[0][hv];
2183         for (; nat; nat = nat->nat_hnext[0]) {
2184                 nflags = nat->nat_flags;
2185
2186                 if ((!ifp || ifp == nat->nat_ifp) &&
2187                     nat->nat_inip.s_addr == srcip &&
2188                     nat->nat_oip.s_addr == dst.s_addr &&
2189                     ((p == 0) || (p == nat->nat_p))) {
2190                         switch (p)
2191                         {
2192                         case IPPROTO_TCP :
2193                         case IPPROTO_UDP :
2194                                 if (nat->nat_oport != dport)
2195                                         continue;
2196                                 if (nat->nat_inport != sport)
2197                                         continue;
2198                                 break;
2199                         default :
2200                                 break;
2201                         }
2202
2203                         ipn = nat->nat_ptr;
2204                         if ((ipn != NULL) && (nat->nat_aps != NULL))
2205                                 if (appr_match(fin, nat) != 0)
2206                                         continue;
2207                         return nat;
2208                 }
2209         }
2210         if (!nat_stats.ns_wilds || !(flags & FI_WILDP))
2211                 return NULL;
2212         if (!rw) {
2213                 RWLOCK_EXIT(&ipf_nat);
2214         }
2215
2216         hv = NAT_HASH_FN(dst.s_addr, srcip, ipf_nattable_sz);
2217         if (!rw) {
2218                 WRITE_ENTER(&ipf_nat);
2219         }
2220         nat = nat_table[0][hv];
2221         for (; nat; nat = nat->nat_hnext[0]) {
2222                 nflags = nat->nat_flags;
2223                 if (ifp && ifp != nat->nat_ifp)
2224                         continue;
2225                 if (!(nflags & FI_WILDP))
2226                         continue;
2227                 if ((nat->nat_inip.s_addr != srcip) ||
2228                     (nat->nat_oip.s_addr != dst.s_addr))
2229                         continue;
2230                 if (((nat->nat_inport == sport) || (nflags & FI_W_SPORT)) &&
2231                     ((nat->nat_oport == dport) || (nflags & FI_W_DPORT))) {
2232                         nat_tabmove(fin, nat);
2233                         break;
2234                 }
2235         }
2236         if (!rw) {
2237                 MUTEX_DOWNGRADE(&ipf_nat);
2238         }
2239         return nat;
2240 }
2241
2242
2243 /*
2244  * Lookup the NAT tables to search for a matching redirect
2245  */
2246 nat_t *nat_lookupredir(np)
2247 natlookup_t *np;
2248 {
2249         nat_t *nat;
2250         fr_info_t fi;
2251
2252         bzero((char *)&fi, sizeof(fi));
2253         fi.fin_data[0] = ntohs(np->nl_inport);
2254         fi.fin_data[1] = ntohs(np->nl_outport);
2255
2256         /*
2257          * If nl_inip is non null, this is a lookup based on the real
2258          * ip address. Else, we use the fake.
2259          */
2260         if ((nat = nat_outlookup(&fi, np->nl_flags, 0, np->nl_inip,
2261                                  np->nl_outip, 0))) {
2262                 np->nl_realip = nat->nat_outip;
2263                 np->nl_realport = nat->nat_outport;
2264         }
2265         return nat;
2266 }
2267
2268
2269 static int nat_match(fin, np, ip)
2270 fr_info_t *fin;
2271 ipnat_t *np;
2272 ip_t *ip;
2273 {
2274         frtuc_t *ft;
2275
2276         if (ip->ip_v != 4)
2277                 return 0;
2278
2279         if (np->in_p && fin->fin_p != np->in_p)
2280                 return 0;
2281         if (fin->fin_out) {
2282                 if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
2283                         return 0;
2284                 if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
2285                     ^ ((np->in_flags & IPN_NOTSRC) != 0))
2286                         return 0;
2287                 if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
2288                     ^ ((np->in_flags & IPN_NOTDST) != 0))
2289                         return 0;
2290         } else {
2291                 if (!(np->in_redir & NAT_REDIRECT))
2292                         return 0;
2293                 if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
2294                     ^ ((np->in_flags & IPN_NOTSRC) != 0))
2295                         return 0;
2296                 if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
2297                     ^ ((np->in_flags & IPN_NOTDST) != 0))
2298                         return 0;
2299         }
2300
2301         ft = &np->in_tuc;
2302         if (!(fin->fin_fl & FI_TCPUDP) ||
2303             (fin->fin_fl & FI_SHORT) || (fin->fin_off != 0)) {
2304                 if (ft->ftu_scmp || ft->ftu_dcmp)
2305                         return 0;
2306                 return 1;
2307         }
2308
2309         return fr_tcpudpchk(ft, fin);
2310 }
2311
2312
2313 /*
2314  * Packets going out on the external interface go through this.
2315  * Here, the source address requires alteration, if anything.
2316  */
2317 int ip_natout(ip, fin)
2318 ip_t *ip;
2319 fr_info_t *fin;
2320 {
2321         ipnat_t *np = NULL;
2322         u_32_t ipa;
2323         tcphdr_t *tcp = NULL;
2324         u_short sport = 0, dport = 0, *csump = NULL;
2325         int natadd = 1, i, icmpset = 1;
2326         u_int nflags = 0, hv, msk;
2327         struct ifnet *ifp;
2328         frentry_t *fr;
2329         void *sifp;
2330         u_32_t iph;
2331         nat_t *nat;
2332
2333         if (nat_list == NULL || (fr_nat_lock))
2334                 return 0;
2335
2336         if ((fr = fin->fin_fr) && !(fr->fr_flags & FR_DUP) &&
2337             fr->fr_tif.fd_ifp && fr->fr_tif.fd_ifp != (void *)-1) {
2338                 sifp = fin->fin_ifp;
2339                 fin->fin_ifp = fr->fr_tif.fd_ifp;
2340         } else
2341                 sifp = fin->fin_ifp;
2342         ifp = fin->fin_ifp;
2343
2344         if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2345                 if (fin->fin_p == IPPROTO_TCP)
2346                         nflags = IPN_TCP;
2347                 else if (fin->fin_p == IPPROTO_UDP)
2348                         nflags = IPN_UDP;
2349                 if ((nflags & IPN_TCPUDP)) {
2350                         tcp = (tcphdr_t *)fin->fin_dp;
2351                         sport = tcp->th_sport;
2352                         dport = tcp->th_dport;
2353                 }
2354         }
2355
2356         ipa = fin->fin_saddr;
2357
2358         READ_ENTER(&ipf_nat);
2359
2360         if ((fin->fin_p == IPPROTO_ICMP) &&
2361             (nat = nat_icmp(ip, fin, &nflags, NAT_OUTBOUND)))
2362                 icmpset = 1;
2363         else if ((fin->fin_fl & FI_FRAG) &&
2364             (nat = ipfr_nat_knownfrag(ip, fin)))
2365                 natadd = 0;
2366         else if ((nat = nat_outlookup(fin, nflags|FI_WILDP|FI_WILDA,
2367                                       (u_int)fin->fin_p, fin->fin_src,
2368                                       fin->fin_dst, 0))) {
2369                 nflags = nat->nat_flags;
2370                 if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) {
2371                         if ((nflags & FI_W_SPORT) &&
2372                             (nat->nat_inport != sport))
2373                                 nat->nat_inport = sport;
2374                         if ((nflags & FI_W_DPORT) &&
2375                             (nat->nat_oport != dport))
2376                                 nat->nat_oport = dport;
2377
2378                         if (nat->nat_outport == 0)
2379                                 nat->nat_outport = sport;
2380                         nat->nat_flags &= ~(FI_W_DPORT|FI_W_SPORT);
2381                         nflags = nat->nat_flags;
2382                         nat_stats.ns_wilds--;
2383                 }
2384         } else {
2385                 RWLOCK_EXIT(&ipf_nat);
2386
2387                 msk = 0xffffffff;
2388                 i = 32;
2389
2390                 WRITE_ENTER(&ipf_nat);
2391                 /*
2392                  * If there is no current entry in the nat table for this IP#,
2393                  * create one for it (if there is a matching rule).
2394                  */
2395 maskloop:
2396                 iph = ipa & htonl(msk);
2397                 hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz);
2398                 for (np = nat_rules[hv]; np; np = np->in_mnext)
2399                 {
2400                         if (np->in_ifp && (np->in_ifp != ifp))
2401                                 continue;
2402                         if ((np->in_flags & IPN_RF) &&
2403                             !(np->in_flags & nflags))
2404                                 continue;
2405                         if (np->in_flags & IPN_FILTER) {
2406                                 if (!nat_match(fin, np, ip))
2407                                         continue;
2408                         } else if ((ipa & np->in_inmsk) != np->in_inip)
2409                                 continue;
2410                         if (*np->in_plabel && !appr_ok(ip, tcp, np))
2411                                 continue;
2412                         nat = nat_new(fin, ip, np, NULL,
2413                                       (u_int)nflags, NAT_OUTBOUND);
2414                         if (nat != NULL) {
2415                                 np->in_hits++;
2416                                 break;
2417                         }
2418                 }
2419                 if ((np == NULL) && (i > 0)) {
2420                         do {
2421                                 i--;
2422                                 msk <<= 1;
2423                         } while ((i >= 0) && ((nat_masks & (1 << i)) == 0));
2424                         if (i >= 0)
2425                                 goto maskloop;
2426                 }
2427                 MUTEX_DOWNGRADE(&ipf_nat);
2428         }
2429
2430         /*
2431          * NOTE: ipf_nat must now only be held as a read lock
2432          */
2433         if (nat) {
2434                 np = nat->nat_ptr;
2435                 if (natadd && (fin->fin_fl & FI_FRAG) && np)
2436                         ipfr_nat_newfrag(ip, fin, nat);
2437                 MUTEX_ENTER(&nat->nat_lock);
2438                 if (fin->fin_p != IPPROTO_TCP) {
2439                         if (np && np->in_age[1])
2440                                 nat->nat_age = np->in_age[1];
2441                         else if (!icmpset && (fin->fin_p == IPPROTO_ICMP))
2442                                 nat->nat_age = fr_defnaticmpage;
2443                         else
2444                                 nat->nat_age = fr_defnatage;
2445                 }
2446                 nat->nat_bytes += ip->ip_len;
2447                 nat->nat_pkts++;
2448                 MUTEX_EXIT(&nat->nat_lock);
2449
2450                 /*
2451                  * Fix up checksums, not by recalculating them, but
2452                  * simply computing adjustments.
2453                  */
2454                 if (nflags == IPN_ICMPERR) {
2455                         u_32_t s1, s2, sumd;
2456
2457                         s1 = LONG_SUM(ntohl(fin->fin_saddr));
2458                         s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
2459                         CALC_SUMD(s1, s2, sumd);
2460                         fix_outcksum(fin, &ip->ip_sum, sumd);
2461                 }
2462 #if (SOLARIS || defined(__sgi)) || !defined(_KERNEL)
2463                 else {
2464                         if (nat->nat_dir == NAT_OUTBOUND)
2465                                 fix_outcksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2466                         else
2467                                 fix_incksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2468                 }
2469 #endif
2470                 /*
2471                  * Only change the packet contents, not what is filtered upon.
2472                  */
2473                 ip->ip_src = nat->nat_outip;
2474
2475                 if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2476
2477                         if ((nat->nat_outport != 0) && (tcp != NULL)) {
2478                                 tcp->th_sport = nat->nat_outport;
2479                                 fin->fin_data[0] = ntohs(tcp->th_sport);
2480                         }
2481
2482                         if (fin->fin_p == IPPROTO_TCP) {
2483                                 csump = &tcp->th_sum;
2484                                 MUTEX_ENTER(&nat->nat_lock);
2485                                 fr_tcp_age(&nat->nat_age,
2486                                            nat->nat_tcpstate, fin, 1, 0);
2487                                 if (nat->nat_age < fr_defnaticmpage)
2488                                         nat->nat_age = fr_defnaticmpage;
2489 #ifdef LARGE_NAT
2490                                 else if ((!np || !np->in_age[1]) &&
2491                                          (nat->nat_age > fr_defnatage))
2492                                         nat->nat_age = fr_defnatage;
2493 #endif
2494                                 /*
2495                                  * Increase this because we may have
2496                                  * "keep state" following this too and
2497                                  * packet storms can occur if this is
2498                                  * removed too quickly.
2499                                  */
2500                                 if (nat->nat_age == fr_tcpclosed)
2501                                         nat->nat_age = fr_tcplastack;
2502
2503                                 /*
2504                                  * Do a MSS CLAMPING on a SYN packet,
2505                                  * only deal IPv4 for now.
2506                                  */
2507                                 if (nat->nat_mssclamp &&
2508                                     (tcp->th_flags & TH_SYN) != 0)
2509                                         nat_mssclamp(tcp, nat->nat_mssclamp,
2510                                                      fin, csump);
2511
2512                                 MUTEX_EXIT(&nat->nat_lock);
2513                         } else if (fin->fin_p == IPPROTO_UDP) {
2514                                 udphdr_t *udp = (udphdr_t *)tcp;
2515
2516                                 if (udp->uh_sum)
2517                                         csump = &udp->uh_sum;
2518                         }
2519
2520                         if (csump) {
2521                                 if (nat->nat_dir == NAT_OUTBOUND)
2522                                         fix_outcksum(fin, csump,
2523                                                      nat->nat_sumd[1]);
2524                                 else
2525                                         fix_incksum(fin, csump,
2526                                                     nat->nat_sumd[1]);
2527                         }
2528                 }
2529
2530                 if (np && (np->in_apr != NULL) && (np->in_dport == 0 ||
2531                      (tcp != NULL && dport == np->in_dport))) {
2532                         i = appr_check(ip, fin, nat);
2533                         if (i == 0)
2534                                 i = 1;
2535                         else if (i == -1)
2536                                 nat->nat_drop[1]++;
2537                 } else
2538                         i = 1;
2539                 ATOMIC_INCL(nat_stats.ns_mapped[1]);
2540                 RWLOCK_EXIT(&ipf_nat);  /* READ */
2541                 fin->fin_ifp = sifp;
2542                 return i;
2543         }
2544         RWLOCK_EXIT(&ipf_nat);                  /* READ/WRITE */
2545         fin->fin_ifp = sifp;
2546         return 0;
2547 }
2548
2549
2550 /*
2551  * Packets coming in from the external interface go through this.
2552  * Here, the destination address requires alteration, if anything.
2553  */
2554 int ip_natin(ip, fin)
2555 ip_t *ip;
2556 fr_info_t *fin;
2557 {
2558         struct in_addr src;
2559         struct in_addr in;
2560         ipnat_t *np;
2561         u_short sport = 0, dport = 0, *csump = NULL;
2562         u_int nflags = 0, natadd = 1, hv, msk;
2563         struct ifnet *ifp = fin->fin_ifp;
2564         tcphdr_t *tcp = NULL;
2565         int i, icmpset = 0;
2566         nat_t *nat;
2567         u_32_t iph;
2568
2569         if ((nat_list == NULL) || (ip->ip_v != 4) || (fr_nat_lock))
2570                 return 0;
2571
2572         if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2573                 if (fin->fin_p == IPPROTO_TCP)
2574                         nflags = IPN_TCP;
2575                 else if (fin->fin_p == IPPROTO_UDP)
2576                         nflags = IPN_UDP;
2577                 if ((nflags & IPN_TCPUDP)) {
2578                         tcp = (tcphdr_t *)fin->fin_dp;
2579                         sport = tcp->th_sport;
2580                         dport = tcp->th_dport;
2581                 }
2582         }
2583
2584         in = fin->fin_dst;
2585         /* make sure the source address is to be redirected */
2586         src = fin->fin_src;
2587
2588         READ_ENTER(&ipf_nat);
2589
2590         if ((fin->fin_p == IPPROTO_ICMP) &&
2591             (nat = nat_icmp(ip, fin, &nflags, NAT_INBOUND)))
2592                 icmpset = 1;
2593         else if ((fin->fin_fl & FI_FRAG) &&
2594                  (nat = ipfr_nat_knownfrag(ip, fin)))
2595                 natadd = 0;
2596         else if ((nat = nat_inlookup(fin, nflags|FI_WILDP|FI_WILDA,
2597                                      (u_int)fin->fin_p, fin->fin_src, in, 0))) {
2598                 nflags = nat->nat_flags;
2599                 if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) {
2600                         if ((nat->nat_oport != sport) && (nflags & FI_W_DPORT))
2601                                 nat->nat_oport = sport;
2602                         if ((nat->nat_outport != dport) &&
2603                                  (nflags & FI_W_SPORT))
2604                                 nat->nat_outport = dport;
2605                         nat->nat_flags &= ~(FI_W_SPORT|FI_W_DPORT);
2606                         nflags = nat->nat_flags;
2607                         nat_stats.ns_wilds--;
2608                 }
2609         } else {
2610                 RWLOCK_EXIT(&ipf_nat);
2611
2612                 msk = 0xffffffff;
2613                 i = 32;
2614
2615                 WRITE_ENTER(&ipf_nat);
2616                 /*
2617                  * If there is no current entry in the nat table for this IP#,
2618                  * create one for it (if there is a matching rule).
2619                  */
2620 maskloop:
2621                 iph = in.s_addr & htonl(msk);
2622                 hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz);
2623                 for (np = rdr_rules[hv]; np; np = np->in_rnext) {
2624                         if ((np->in_ifp && (np->in_ifp != ifp)) ||
2625                             (np->in_p && (np->in_p != fin->fin_p)) ||
2626                             (np->in_flags && !(nflags & np->in_flags)))
2627                                 continue;
2628                         if (np->in_flags & IPN_FILTER) {
2629                                 if (!nat_match(fin, np, ip))
2630                                         continue;
2631                         } else if ((in.s_addr & np->in_outmsk) != np->in_outip)
2632                                 continue;
2633                         if ((!np->in_pmin || (np->in_flags & IPN_FILTER) ||
2634                              ((ntohs(np->in_pmax) >= ntohs(dport)) &&
2635                               (ntohs(dport) >= ntohs(np->in_pmin)))))
2636                                 if ((nat = nat_new(fin, ip, np, NULL, nflags,
2637                                                     NAT_INBOUND))) {
2638                                         np->in_hits++;
2639                                         break;
2640                                 }
2641                 }
2642
2643                 if ((np == NULL) && (i > 0)) {
2644                         do {
2645                                 i--;
2646                                 msk <<= 1;
2647                         } while ((i >= 0) && ((rdr_masks & (1 << i)) == 0));
2648                         if (i >= 0)
2649                                 goto maskloop;
2650                 }
2651                 MUTEX_DOWNGRADE(&ipf_nat);
2652         }
2653
2654         /*
2655          * NOTE: ipf_nat must now only be held as a read lock
2656          */
2657         if (nat) {
2658                 np = nat->nat_ptr;
2659                 fin->fin_fr = nat->nat_fr;
2660                 if (natadd && (fin->fin_fl & FI_FRAG) && np)
2661                         ipfr_nat_newfrag(ip, fin, nat);
2662                 if (np && (np->in_apr != NULL) && (np->in_dport == 0 ||
2663                      (tcp != NULL && sport == np->in_dport))) {
2664                         i = appr_check(ip, fin, nat);
2665                         if (i == -1) {
2666                                 nat->nat_drop[0]++;
2667                                 RWLOCK_EXIT(&ipf_nat);
2668                                 return i;
2669                         }
2670                 }
2671
2672                 MUTEX_ENTER(&nat->nat_lock);
2673                 if (fin->fin_p != IPPROTO_TCP) {
2674                         if (np && np->in_age[0])
2675                                 nat->nat_age = np->in_age[0];
2676                         else if (!icmpset && (fin->fin_p == IPPROTO_ICMP))
2677                                 nat->nat_age = fr_defnaticmpage;
2678                         else
2679                                 nat->nat_age = fr_defnatage;
2680                 }
2681                 nat->nat_bytes += ip->ip_len;
2682                 nat->nat_pkts++;
2683                 MUTEX_EXIT(&nat->nat_lock);
2684
2685                 /*
2686                  * Fix up checksums, not by recalculating them, but
2687                  * simply computing adjustments.
2688                  */
2689                 if (nat->nat_dir == NAT_OUTBOUND)
2690                         fix_incksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2691                 else
2692                         fix_outcksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2693
2694                 ip->ip_dst = nat->nat_inip;
2695                 fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
2696
2697                 if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2698
2699                         if ((nat->nat_inport != 0) && (tcp != NULL)) {
2700                                 tcp->th_dport = nat->nat_inport;
2701                                 fin->fin_data[1] = ntohs(tcp->th_dport);
2702                         }
2703
2704                         if (fin->fin_p == IPPROTO_TCP) {
2705                                 csump = &tcp->th_sum;
2706                                 MUTEX_ENTER(&nat->nat_lock);
2707                                 fr_tcp_age(&nat->nat_age,
2708                                            nat->nat_tcpstate, fin, 0, 0);
2709                                 if (nat->nat_age < fr_defnaticmpage)
2710                                         nat->nat_age = fr_defnaticmpage;
2711 #ifdef LARGE_NAT
2712                                 else if ((!np || !np->in_age[0]) &&
2713                                          (nat->nat_age > fr_defnatage))
2714                                         nat->nat_age = fr_defnatage;
2715 #endif
2716                                 /*
2717                                  * Increase this because we may have
2718                                  * "keep state" following this too and
2719                                  * packet storms can occur if this is
2720                                  * removed too quickly.
2721                                  */
2722                                 if (nat->nat_age == fr_tcpclosed)
2723                                         nat->nat_age = fr_tcplastack;
2724                                 /*
2725                                  * Do a MSS CLAMPING on a SYN packet,
2726                                  * only deal IPv4 for now.
2727                                  */
2728                                 if (nat->nat_mssclamp &&
2729                                     (tcp->th_flags & TH_SYN) != 0)
2730                                         nat_mssclamp(tcp, nat->nat_mssclamp,
2731                                                      fin, csump);
2732
2733                                 MUTEX_EXIT(&nat->nat_lock);
2734                         } else if (fin->fin_p == IPPROTO_UDP) {
2735                                 udphdr_t *udp = (udphdr_t *)tcp;
2736
2737                                 if (udp->uh_sum)
2738                                         csump = &udp->uh_sum;
2739                         }
2740
2741                         if (csump) {
2742                                 if (nat->nat_dir == NAT_OUTBOUND)
2743                                         fix_incksum(fin, csump,
2744                                                     nat->nat_sumd[0]);
2745                                 else
2746                                         fix_outcksum(fin, csump,
2747                                                     nat->nat_sumd[0]);
2748                         }
2749                 }
2750                 ATOMIC_INCL(nat_stats.ns_mapped[0]);
2751                 RWLOCK_EXIT(&ipf_nat);                  /* READ */
2752                 return 1;
2753         }
2754         RWLOCK_EXIT(&ipf_nat);                  /* READ/WRITE */
2755         return 0;
2756 }
2757
2758
2759 /*
2760  * Free all memory used by NAT structures allocated at runtime.
2761  */
2762 void ip_natunload()
2763 {
2764         WRITE_ENTER(&ipf_nat);
2765         (void) nat_clearlist();
2766         (void) nat_flushtable();
2767         RWLOCK_EXIT(&ipf_nat);
2768
2769         if (nat_table[0] != NULL) {
2770                 KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz);
2771                 nat_table[0] = NULL;
2772         }
2773         if (nat_table[1] != NULL) {
2774                 KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz);
2775                 nat_table[1] = NULL;
2776         }
2777         if (nat_rules != NULL) {
2778                 KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz);
2779                 nat_rules = NULL;
2780         }
2781         if (rdr_rules != NULL) {
2782                 KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz);
2783                 rdr_rules = NULL;
2784         }
2785         if (maptable != NULL) {
2786                 KFREES(maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
2787                 maptable = NULL;
2788         }
2789 }
2790
2791
2792 /*
2793  * Slowly expire held state for NAT entries.  Timeouts are set in
2794  * expectation of this being called twice per second.
2795  */
2796 void ip_natexpire()
2797 {
2798         struct nat *nat, **natp;
2799 #if defined(_KERNEL) && !SOLARIS && !defined(__DragonFly__)
2800         int s;
2801 #endif
2802
2803         SPL_NET(s);
2804         WRITE_ENTER(&ipf_nat);
2805         for (natp = &nat_instances; (nat = *natp); ) {
2806                 nat->nat_age--;
2807                 if (nat->nat_age) {
2808                         natp = &nat->nat_next;
2809                         continue;
2810                 }
2811                 *natp = nat->nat_next;
2812 #ifdef  IPFILTER_LOG
2813                 nat_log(nat, NL_EXPIRE);
2814 #endif
2815                 nat_delete(nat);
2816                 nat_stats.ns_expire++;
2817         }
2818         RWLOCK_EXIT(&ipf_nat);
2819         SPL_X(s);
2820 }
2821
2822
2823 /*
2824  */
2825 void ip_natsync(ifp)
2826 void *ifp;
2827 {
2828         ipnat_t *n;
2829         nat_t *nat;
2830         u_32_t sum1, sum2, sumd;
2831         struct in_addr in;
2832         ipnat_t *np;
2833         void *ifp2;
2834 #if defined(_KERNEL) && !SOLARIS && !defined(__DragonFly__)
2835         int s;
2836 #endif
2837
2838         /*
2839          * Change IP addresses for NAT sessions for any protocol except TCP
2840          * since it will break the TCP connection anyway.
2841          */
2842         SPL_NET(s);
2843         WRITE_ENTER(&ipf_nat);
2844         for (nat = nat_instances; nat; nat = nat->nat_next)
2845                 if (((ifp == NULL) || (ifp == nat->nat_ifp)) &&
2846                     !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr) &&
2847                     (np->in_outmsk == 0xffffffff) && !np->in_nip) {
2848                         ifp2 = nat->nat_ifp;
2849                         /*
2850                          * Change the map-to address to be the same as the
2851                          * new one.
2852                          */
2853                         sum1 = nat->nat_outip.s_addr;
2854                         if (fr_ifpaddr(4, ifp2, &in) != -1)
2855                                 nat->nat_outip = in;
2856                         sum2 = nat->nat_outip.s_addr;
2857
2858                         if (sum1 == sum2)
2859                                 continue;
2860                         /*
2861                          * Readjust the checksum adjustment to take into
2862                          * account the new IP#.
2863                          */
2864                         CALC_SUMD(sum1, sum2, sumd);
2865                         /* XXX - dont change for TCP when solaris does
2866                          * hardware checksumming.
2867                          */
2868                         sumd += nat->nat_sumd[0];
2869                         nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2870                         nat->nat_sumd[1] = nat->nat_sumd[0];
2871                 }
2872
2873         for (n = nat_list; (n != NULL); n = n->in_next)
2874                 if (n->in_ifp == ifp) {
2875                         n->in_ifp = (void *)GETUNIT(n->in_ifname, 4);
2876                         if (!n->in_ifp)
2877                                 n->in_ifp = (void *)-1;
2878                 }
2879         RWLOCK_EXIT(&ipf_nat);
2880         SPL_X(s);
2881 }
2882
2883
2884 #ifdef  IPFILTER_LOG
2885 void nat_log(nat, type)
2886 struct nat *nat;
2887 u_int type;
2888 {
2889 # ifndef LARGE_NAT
2890         struct ipnat *np;
2891         int rulen;
2892 # endif
2893         struct natlog natl;
2894         void *items[1];
2895         size_t sizes[1];
2896         int types[1];
2897
2898         natl.nl_inip = nat->nat_inip;
2899         natl.nl_outip = nat->nat_outip;
2900         natl.nl_origip = nat->nat_oip;
2901         natl.nl_bytes = nat->nat_bytes;
2902         natl.nl_pkts = nat->nat_pkts;
2903         natl.nl_origport = nat->nat_oport;
2904         natl.nl_inport = nat->nat_inport;
2905         natl.nl_outport = nat->nat_outport;
2906         natl.nl_p = nat->nat_p;
2907         natl.nl_type = type;
2908         natl.nl_rule = -1;
2909 #ifndef LARGE_NAT
2910         if (nat->nat_ptr != NULL) {
2911                 for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++)
2912                         if (np == nat->nat_ptr) {
2913                                 natl.nl_rule = rulen;
2914                                 break;
2915                         }
2916         }
2917 #endif
2918         items[0] = &natl;
2919         sizes[0] = sizeof(natl);
2920         types[0] = 0;
2921
2922         (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1);
2923 }
2924 #endif
2925
2926
2927 #if defined(__OpenBSD__)
2928 void nat_ifdetach(ifp)
2929 void *ifp;
2930 {
2931         frsync();
2932         return;
2933 }
2934 #endif
2935
2936
2937 /*
2938  * Check for MSS option and clamp it if necessary.
2939  */
2940 static void nat_mssclamp(tcp, maxmss, fin, csump)
2941 tcphdr_t *tcp;
2942 u_32_t maxmss;
2943 fr_info_t *fin;
2944 u_short *csump;
2945 {
2946         u_char *cp, *ep, opt;
2947         int hlen, advance;
2948         u_32_t mss, sumd;
2949         u_short v;
2950
2951         hlen = tcp->th_off << 2;
2952         if (hlen > sizeof(*tcp)) {
2953                 cp = (u_char *)tcp + sizeof(*tcp);
2954                 ep = (u_char *)tcp + hlen;
2955
2956                 while (cp < ep) {
2957                         opt = cp[0];
2958                         if (opt == TCPOPT_EOL)
2959                                 break;
2960                         else if (opt == TCPOPT_NOP) {
2961                                 cp++;
2962                                 continue;
2963                         }
2964  
2965                         if (&cp[1] >= ep)
2966                                 break;
2967                         advance = cp[1];
2968                         if (&cp[advance] > ep)
2969                                 break;
2970                         switch (opt) {
2971                         case TCPOPT_MAXSEG:
2972                                 if (advance != 4)
2973                                         break;
2974                                 bcopy(&cp[2], &v, sizeof(v));
2975                                 mss = ntohs(v);
2976                                 if (mss > maxmss) {
2977                                         v = htons(maxmss);
2978                                         bcopy(&v, &cp[2], sizeof(v));
2979                                         CALC_SUMD(mss, maxmss, sumd);
2980                                         fix_outcksum(fin, csump, sumd);
2981                                 }
2982                                 break;
2983                         default:
2984                                 /* ignore unknown options */
2985                                 break;
2986                         }
2987                     
2988                         cp += advance;  
2989                 }       
2990         }       
2991 }