Merge from vendor branch LESS:
[dragonfly.git] / sys / contrib / ipfilter / netinet / ip_nat.c
1 /*
2  * Copyright (C) 1995-2001 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Added redirect stuff and a LOT of bug fixes. (mcn@EnGarde.com)
7  *
8  * @(#)ip_nat.c     1.11 6/5/96 (C) 1995 Darren Reed
9  * @(#)$Id: ip_nat.c,v 2.37.2.70 2002/08/28 12:45:48 darrenr Exp $
10  * $FreeBSD: src/sys/contrib/ipfilter/netinet/ip_nat.c,v 1.22.2.7 2003/03/01 03:55:54 darrenr Exp $
11  * $DragonFly: src/sys/contrib/ipfilter/netinet/ip_nat.c,v 1.6 2004/02/12 22:35:47 joerg Exp $
12  */
13 #if (defined(__DragonFly__) || defined(__FreeBSD__)) && defined(KERNEL) && !defined(_KERNEL)
14 #define _KERNEL
15 #endif
16
17 #if defined(__sgi) && (IRIX > 602)
18 # include <sys/ptimers.h>
19 #endif
20 #include <sys/errno.h>
21 #include <sys/types.h>
22 #include <sys/param.h>
23 #include <sys/time.h>
24 #include <sys/file.h>
25 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
26     defined(_KERNEL)
27 # include "opt_ipfilter_log.h"
28 #endif
29 #if !defined(_KERNEL) && !defined(KERNEL)
30 # include <stdio.h>
31 # include <string.h>
32 # include <stdlib.h>
33 #endif
34 #if (defined(KERNEL) || defined(_KERNEL)) && (defined(__DragonFly__) || __FreeBSD_version >= 220000)
35 # include <sys/filio.h>
36 # include <sys/fcntl.h>
37 #else
38 # include <sys/ioctl.h>
39 #endif
40 #include <sys/fcntl.h>
41 #ifndef linux
42 # include <sys/protosw.h>
43 #endif
44 #include <sys/socket.h>
45 #if defined(_KERNEL) && !defined(linux)
46 # include <sys/systm.h>
47 #endif
48 #if !defined(__SVR4) && !defined(__svr4__)
49 # ifndef linux
50 #  include <sys/mbuf.h>
51 # endif
52 #else
53 # include <sys/filio.h>
54 # include <sys/byteorder.h>
55 # ifdef _KERNEL
56 #  include <sys/dditypes.h>
57 # endif
58 # include <sys/stream.h>
59 # include <sys/kmem.h>
60 #endif
61 #if defined(__DragonFly__) || __FreeBSD_version >= 300000
62 # include <sys/queue.h>
63 #endif
64 #include <net/if.h>
65 #if defined(__DragonFly__) || __FreeBSD_version >= 300000
66 # include <net/if_var.h>
67 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
68 #  include "opt_ipfilter.h"
69 # endif
70 #endif
71 #ifdef sun
72 # include <net/af.h>
73 #endif
74 #include <net/route.h>
75 #include <netinet/in.h>
76 #include <netinet/in_systm.h>
77 #include <netinet/ip.h>
78
79 #ifdef __sgi
80 # ifdef IFF_DRVRLOCK /* IRIX6 */
81 #include <sys/hashing.h>
82 #include <netinet/in_var.h>
83 # endif
84 #endif
85
86 #ifdef RFC1825
87 # include <vpn/md5.h>
88 # include <vpn/ipsec.h>
89 extern struct ifnet vpnif;
90 #endif
91
92 #ifndef linux
93 # include <netinet/ip_var.h>
94 # include <netinet/tcp_fsm.h>
95 #endif
96 #include <netinet/tcp.h>
97 #include <netinet/udp.h>
98 #include <netinet/ip_icmp.h>
99 #include "ip_compat.h"
100 #include <netinet/tcpip.h>
101 #include "ip_fil.h"
102 #include "ip_nat.h"
103 #include "ip_frag.h"
104 #include "ip_state.h"
105 #include "ip_proxy.h"
106 #if defined(__DragonFly__) || __FreeBSD_version >= 300000
107 # include <sys/malloc.h>
108 #endif
109 #ifndef MIN
110 # define        MIN(a,b)        (((a)<(b))?(a):(b))
111 #endif
112 #undef  SOCKADDR_IN
113 #define SOCKADDR_IN     struct sockaddr_in
114
115 static const char sccsid[] = "@(#)ip_nat.c     1.11 6/5/96 (C) 1995 Darren Reed";
116
117 nat_t   **nat_table[2] = { NULL, NULL },
118         *nat_instances = NULL;
119 ipnat_t *nat_list = NULL;
120 u_int   ipf_nattable_sz = NAT_TABLE_SZ;
121 u_int   ipf_natrules_sz = NAT_SIZE;
122 u_int   ipf_rdrrules_sz = RDR_SIZE;
123 u_int   ipf_hostmap_sz = HOSTMAP_SIZE;
124 u_32_t  nat_masks = 0;
125 u_32_t  rdr_masks = 0;
126 ipnat_t **nat_rules = NULL;
127 ipnat_t **rdr_rules = NULL;
128 hostmap_t       **maptable  = NULL;
129
130 u_long  fr_defnatage = DEF_NAT_AGE,
131         fr_defnaticmpage = 6;           /* 3 seconds */
132 natstat_t nat_stats;
133 int     fr_nat_lock = 0;
134 #if     (SOLARIS || defined(__sgi)) && defined(_KERNEL)
135 extern  kmutex_t        ipf_rw;
136 extern  KRWLOCK_T       ipf_nat;
137 #endif
138
139 static  int     nat_flushtable (void);
140 static  void    nat_addnat (struct ipnat *);
141 static  void    nat_addrdr (struct ipnat *);
142 static  void    nat_delete (struct nat *);
143 static  void    nat_delrdr (struct ipnat *);
144 static  void    nat_delnat (struct ipnat *);
145 static  int     fr_natgetent (caddr_t);
146 static  int     fr_natgetsz (caddr_t);
147 static  int     fr_natputent (caddr_t);
148 static  void    nat_tabmove (fr_info_t *, nat_t *);
149 static  int     nat_match (fr_info_t *, ipnat_t *, ip_t *);
150 static  hostmap_t *nat_hostmap (ipnat_t *, struct in_addr,
151                                     struct in_addr);
152 static  void    nat_hostmapdel (struct hostmap *);
153 static  void    nat_mssclamp (tcphdr_t *, u_32_t, fr_info_t *, u_short *);
154
155
156 int nat_init()
157 {
158         KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
159         if (nat_table[0] != NULL)
160                 bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *));
161         else
162                 return -1;
163
164         KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
165         if (nat_table[1] != NULL)
166                 bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *));
167         else
168                 return -1;
169
170         KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz);
171         if (nat_rules != NULL)
172                 bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *));
173         else
174                 return -1;
175
176         KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz);
177         if (rdr_rules != NULL)
178                 bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *));
179         else
180                 return -1;
181
182         KMALLOCS(maptable, hostmap_t **, sizeof(hostmap_t *) * ipf_hostmap_sz);
183         if (maptable != NULL)
184                 bzero((char *)maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
185         else
186                 return -1;
187         return 0;
188 }
189
190
191 static void nat_addrdr(n)
192 ipnat_t *n;
193 {
194         ipnat_t **np;
195         u_32_t j;
196         u_int hv;
197         int k;
198
199         k = countbits(n->in_outmsk);
200         if ((k >= 0) && (k != 32))
201                 rdr_masks |= 1 << k;
202         j = (n->in_outip & n->in_outmsk);
203         hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz);
204         np = rdr_rules + hv;
205         while (*np != NULL)
206                 np = &(*np)->in_rnext;
207         n->in_rnext = NULL;
208         n->in_prnext = np;
209         *np = n;
210 }
211
212
213 static void nat_addnat(n)
214 ipnat_t *n;
215 {
216         ipnat_t **np;
217         u_32_t j;
218         u_int hv;
219         int k;
220
221         k = countbits(n->in_inmsk);
222         if ((k >= 0) && (k != 32))
223                 nat_masks |= 1 << k;
224         j = (n->in_inip & n->in_inmsk);
225         hv = NAT_HASH_FN(j, 0, ipf_natrules_sz);
226         np = nat_rules + hv;
227         while (*np != NULL)
228                 np = &(*np)->in_mnext;
229         n->in_mnext = NULL;
230         n->in_pmnext = np;
231         *np = n;
232 }
233
234
235 static void nat_delrdr(n)
236 ipnat_t *n;
237 {
238         if (n->in_rnext)
239                 n->in_rnext->in_prnext = n->in_prnext;
240         *n->in_prnext = n->in_rnext;
241 }
242
243
244 static void nat_delnat(n)
245 ipnat_t *n;
246 {
247         if (n->in_mnext)
248                 n->in_mnext->in_pmnext = n->in_pmnext;
249         *n->in_pmnext = n->in_mnext;
250 }
251
252
253 /*
254  * check if an ip address has already been allocated for a given mapping that
255  * is not doing port based translation.
256  *
257  * Must be called with ipf_nat held as a write lock.
258  */
259 static struct hostmap *nat_hostmap(np, real, map)
260 ipnat_t *np;
261 struct in_addr real;
262 struct in_addr map;
263 {
264         hostmap_t *hm;
265         u_int hv;
266
267         hv = real.s_addr % HOSTMAP_SIZE;
268         for (hm = maptable[hv]; hm; hm = hm->hm_next)
269                 if ((hm->hm_realip.s_addr == real.s_addr) &&
270                     (np == hm->hm_ipnat)) {
271                         hm->hm_ref++;
272                         return hm;
273                 }
274
275         KMALLOC(hm, hostmap_t *);
276         if (hm) {
277                 hm->hm_next = maptable[hv];
278                 hm->hm_pnext = maptable + hv;
279                 if (maptable[hv])
280                         maptable[hv]->hm_pnext = &hm->hm_next;
281                 maptable[hv] = hm;
282                 hm->hm_ipnat = np;
283                 hm->hm_realip = real;
284                 hm->hm_mapip = map;
285                 hm->hm_ref = 1;
286         }
287         return hm;
288 }
289
290
291 /*
292  * Must be called with ipf_nat held as a write lock.
293  */
294 static void nat_hostmapdel(hm)
295 struct hostmap *hm;
296 {
297         ATOMIC_DEC32(hm->hm_ref);
298         if (hm->hm_ref == 0) {
299                 if (hm->hm_next)
300                         hm->hm_next->hm_pnext = hm->hm_pnext;
301                 *hm->hm_pnext = hm->hm_next;
302                 KFREE(hm);
303         }
304 }
305
306
307 void fix_outcksum(fin, sp, n)
308 fr_info_t *fin;
309 u_short *sp;
310 u_32_t n;
311 {
312         u_short sumshort;
313         u_32_t sum1;
314
315         if (!n)
316                 return;
317         else if (n & NAT_HW_CKSUM) {
318                 n &= 0xffff;
319                 n += fin->fin_dlen;
320                 n = (n & 0xffff) + (n >> 16);
321                 *sp = n & 0xffff;
322                 return;
323         }
324         sum1 = (~ntohs(*sp)) & 0xffff;
325         sum1 += (n);
326         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
327         /* Again */
328         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
329         sumshort = ~(u_short)sum1;
330         *(sp) = htons(sumshort);
331 }
332
333
334 void fix_incksum(fin, sp, n)
335 fr_info_t *fin;
336 u_short *sp;
337 u_32_t n;
338 {
339         u_short sumshort;
340         u_32_t sum1;
341
342         if (!n)
343                 return;
344         else if (n & NAT_HW_CKSUM) {
345                 n &= 0xffff;
346                 n += fin->fin_dlen;
347                 n = (n & 0xffff) + (n >> 16);
348                 *sp = n & 0xffff;
349                 return;
350         }
351 #ifdef sparc
352         sum1 = (~(*sp)) & 0xffff;
353 #else
354         sum1 = (~ntohs(*sp)) & 0xffff;
355 #endif
356         sum1 += ~(n) & 0xffff;
357         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
358         /* Again */
359         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
360         sumshort = ~(u_short)sum1;
361         *(sp) = htons(sumshort);
362 }
363
364
365 /*
366  * fix_datacksum is used *only* for the adjustments of checksums in the data
367  * section of an IP packet.
368  *
369  * The only situation in which you need to do this is when NAT'ing an 
370  * ICMP error message. Such a message, contains in its body the IP header
371  * of the original IP packet, that causes the error.
372  *
373  * You can't use fix_incksum or fix_outcksum in that case, because for the
374  * kernel the data section of the ICMP error is just data, and no special 
375  * processing like hardware cksum or ntohs processing have been done by the 
376  * kernel on the data section.
377  */
378 void fix_datacksum(sp, n)
379 u_short *sp;
380 u_32_t n;
381 {
382         u_short sumshort;
383          u_32_t sum1;
384
385         if (!n)
386                 return;
387
388         sum1 = (~ntohs(*sp)) & 0xffff;
389         sum1 += (n);
390         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
391         /* Again */
392         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
393         sumshort = ~(u_short)sum1;
394         *(sp) = htons(sumshort);
395 }
396
397 /*
398  * How the NAT is organised and works.
399  *
400  * Inside (interface y) NAT       Outside (interface x)
401  * -------------------- -+- -------------------------------------
402  * Packet going          |   out, processsed by ip_natout() for x
403  * ------------>         |   ------------>
404  * src=10.1.1.1          |   src=192.1.1.1
405  *                       |
406  *                       |   in, processed by ip_natin() for x
407  * <------------         |   <------------
408  * dst=10.1.1.1          |   dst=192.1.1.1
409  * -------------------- -+- -------------------------------------
410  * ip_natout() - changes ip_src and if required, sport
411  *             - creates a new mapping, if required.
412  * ip_natin()  - changes ip_dst and if required, dport
413  *
414  * In the NAT table, internal source is recorded as "in" and externally
415  * seen as "out".
416  */
417
418 /*
419  * Handle ioctls which manipulate the NAT.
420  */
421 int nat_ioctl(data, cmd, mode)
422 #if defined(__DragonFly__) || defined(__NetBSD__) || defined(__OpenBSD__) || (__FreeBSD_version >= 300003)
423 u_long cmd;
424 #else
425 int cmd;
426 #endif
427 caddr_t data;
428 int mode;
429 {
430         ipnat_t *nat, *nt, *n = NULL, **np = NULL;
431         int error = 0, ret, arg, getlock;
432         ipnat_t natd;
433         u_32_t i, j;
434
435 #if (BSD >= 199306) && defined(_KERNEL)
436         if ((securelevel >= 3) && (mode & FWRITE))
437                 return EPERM;
438 #endif
439
440         nat = NULL;     /* XXX gcc -Wuninitialized */
441         KMALLOC(nt, ipnat_t *);
442         getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
443         if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT)) {
444                 if (mode & NAT_SYSSPACE) {
445                         bcopy(data, (char *)&natd, sizeof(natd));
446                         error = 0;
447                 } else {
448                         error = IRCOPYPTR(data, (char *)&natd, sizeof(natd));
449                 }
450         } else if (cmd == SIOCIPFFL) {  /* SIOCFLNAT & SIOCCNATL */
451                 error = IRCOPY(data, (char *)&arg, sizeof(arg));
452                 if (error)
453                         error = EFAULT;
454         }
455
456         if (error)
457                 goto done;
458
459         /*
460          * For add/delete, look to see if the NAT entry is already present
461          */
462         if (getlock == 1) {
463                 WRITE_ENTER(&ipf_nat);
464         }
465         if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT)) {
466                 nat = &natd;
467                 nat->in_flags &= IPN_USERFLAGS;
468                 if ((nat->in_redir & NAT_MAPBLK) == 0) {
469                         if ((nat->in_flags & IPN_SPLIT) == 0)
470                                 nat->in_inip &= nat->in_inmsk;
471                         if ((nat->in_flags & IPN_IPRANGE) == 0)
472                                 nat->in_outip &= nat->in_outmsk;
473                 }
474                 for (np = &nat_list; (n = *np); np = &n->in_next)
475                         if (!bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
476                                         IPN_CMPSIZ)) {
477                                 if (n->in_redir == NAT_REDIRECT &&
478                                     n->in_pnext != nat->in_pnext)
479                                         continue;
480                                 break;
481                         }
482         }
483
484         switch (cmd)
485         {
486 #ifdef  IPFILTER_LOG
487         case SIOCIPFFB :
488         {
489                 int tmp;
490
491                 if (!(mode & FWRITE))
492                         error = EPERM;
493                 else {
494                         tmp = ipflog_clear(IPL_LOGNAT);
495                         IWCOPY((char *)&tmp, (char *)data, sizeof(tmp));
496                 }
497                 break;
498         }
499 #endif
500         case SIOCADNAT :
501                 if (!(mode & FWRITE)) {
502                         error = EPERM;
503                         break;
504                 }
505                 if (n) {
506                         error = EEXIST;
507                         break;
508                 }
509                 if (nt == NULL) {
510                         error = ENOMEM;
511                         break;
512                 }
513                 n = nt;
514                 nt = NULL;
515                 bcopy((char *)nat, (char *)n, sizeof(*n));
516                 n->in_ifp = (void *)GETUNIT(n->in_ifname, 4);
517                 if (!n->in_ifp)
518                         n->in_ifp = (void *)-1;
519                 if (n->in_plabel[0] != '\0') {
520                         n->in_apr = appr_lookup(n->in_p, n->in_plabel);
521                         if (!n->in_apr) {
522                                 error = ENOENT;
523                                 break;
524                         }
525                 }
526                 n->in_next = NULL;
527                 *np = n;
528
529                 if (n->in_redir & NAT_REDIRECT) {
530                         n->in_flags &= ~IPN_NOTDST;
531                         nat_addrdr(n);
532                 }
533                 if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
534                         n->in_flags &= ~IPN_NOTSRC;
535                         nat_addnat(n);
536                 }
537
538                 n->in_use = 0;
539                 if (n->in_redir & NAT_MAPBLK)
540                         n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
541                 else if (n->in_flags & IPN_AUTOPORTMAP)
542                         n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
543                 else if (n->in_flags & IPN_IPRANGE)
544                         n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
545                 else if (n->in_flags & IPN_SPLIT)
546                         n->in_space = 2;
547                 else
548                         n->in_space = ~ntohl(n->in_outmsk);
549                 /*
550                  * Calculate the number of valid IP addresses in the output
551                  * mapping range.  In all cases, the range is inclusive of
552                  * the start and ending IP addresses.
553                  * If to a CIDR address, lose 2: broadcast + network address
554                  *                               (so subtract 1)
555                  * If to a range, add one.
556                  * If to a single IP address, set to 1.
557                  */
558                 if (n->in_space) {
559                         if ((n->in_flags & IPN_IPRANGE) != 0)
560                                 n->in_space += 1;
561                         else
562                                 n->in_space -= 1;
563                 } else
564                         n->in_space = 1;
565                 if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
566                     ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
567                         n->in_nip = ntohl(n->in_outip) + 1;
568                 else if ((n->in_flags & IPN_SPLIT) &&
569                          (n->in_redir & NAT_REDIRECT))
570                         n->in_nip = ntohl(n->in_inip);
571                 else
572                         n->in_nip = ntohl(n->in_outip);
573                 if (n->in_redir & NAT_MAP) {
574                         n->in_pnext = ntohs(n->in_pmin);
575                         /*
576                          * Multiply by the number of ports made available.
577                          */
578                         if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
579                                 n->in_space *= (ntohs(n->in_pmax) -
580                                                 ntohs(n->in_pmin) + 1);
581                                 /*
582                                  * Because two different sources can map to
583                                  * different destinations but use the same
584                                  * local IP#/port #.
585                                  * If the result is smaller than in_space, then
586                                  * we may have wrapped around 32bits.
587                                  */
588                                 i = n->in_inmsk;
589                                 if ((i != 0) && (i != 0xffffffff)) {
590                                         j = n->in_space * (~ntohl(i) + 1);
591                                         if (j >= n->in_space)
592                                                 n->in_space = j;
593                                         else
594                                                 n->in_space = 0xffffffff;
595                                 }
596                         }
597                         /*
598                          * If no protocol is specified, multiple by 256.
599                          */
600                         if ((n->in_flags & IPN_TCPUDP) == 0) {
601                                         j = n->in_space * 256;
602                                         if (j >= n->in_space)
603                                                 n->in_space = j;
604                                         else
605                                                 n->in_space = 0xffffffff;
606                         }
607                 }
608                 /* Otherwise, these fields are preset */
609                 n = NULL;
610                 nat_stats.ns_rules++;
611                 break;
612         case SIOCRMNAT :
613                 if (!(mode & FWRITE)) {
614                         error = EPERM;
615                         n = NULL;
616                         break;
617                 }
618                 if (!n) {
619                         error = ESRCH;
620                         break;
621                 }
622                 if (n->in_redir & NAT_REDIRECT)
623                         nat_delrdr(n);
624                 if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
625                         nat_delnat(n);
626                 if (nat_list == NULL) {
627                         nat_masks = 0;
628                         rdr_masks = 0;
629                 }
630                 *np = n->in_next;
631                 if (!n->in_use) {
632                         if (n->in_apr)
633                                 appr_free(n->in_apr);
634                         KFREE(n);
635                         nat_stats.ns_rules--;
636                 } else {
637                         n->in_flags |= IPN_DELETE;
638                         n->in_next = NULL;
639                 }
640                 n = NULL;
641                 break;
642         case SIOCGNATS :
643                 MUTEX_DOWNGRADE(&ipf_nat);
644                 nat_stats.ns_table[0] = nat_table[0];
645                 nat_stats.ns_table[1] = nat_table[1];
646                 nat_stats.ns_list = nat_list;
647                 nat_stats.ns_maptable = maptable;
648                 nat_stats.ns_nattab_sz = ipf_nattable_sz;
649                 nat_stats.ns_rultab_sz = ipf_natrules_sz;
650                 nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz;
651                 nat_stats.ns_hostmap_sz = ipf_hostmap_sz;
652                 nat_stats.ns_instances = nat_instances;
653                 nat_stats.ns_apslist = ap_sess_list;
654                 error = IWCOPYPTR((char *)&nat_stats, (char *)data,
655                                   sizeof(nat_stats));
656                 break;
657         case SIOCGNATL :
658             {
659                 natlookup_t nl;
660
661                 MUTEX_DOWNGRADE(&ipf_nat);
662                 error = IRCOPYPTR((char *)data, (char *)&nl, sizeof(nl));
663                 if (error)
664                         break;
665
666                 if (nat_lookupredir(&nl)) {
667                         error = IWCOPYPTR((char *)&nl, (char *)data,
668                                           sizeof(nl));
669                 } else
670                         error = ESRCH;
671                 break;
672             }
673         case SIOCIPFFL :        /* old SIOCFLNAT & SIOCCNATL */
674                 if (!(mode & FWRITE)) {
675                         error = EPERM;
676                         break;
677                 }
678                 error = 0;
679                 if (arg == 0)
680                         ret = nat_flushtable();
681                 else if (arg == 1)
682                         ret = nat_clearlist();
683                 else
684                         error = EINVAL;
685                 MUTEX_DOWNGRADE(&ipf_nat);
686                 if (!error) {
687                         error = IWCOPY((caddr_t)&ret, data, sizeof(ret));
688                         if (error)
689                                 error = EFAULT;
690                 }
691                 break;
692         case SIOCSTLCK :
693                 error = IRCOPY(data, (caddr_t)&arg, sizeof(arg));
694                 if (!error) {
695                         error = IWCOPY((caddr_t)&fr_nat_lock, data,
696                                         sizeof(fr_nat_lock));
697                         if (!error)
698                                 fr_nat_lock = arg;
699                 } else
700                         error = EFAULT;
701                 break;
702         case SIOCSTPUT :
703                 if (fr_nat_lock)
704                         error = fr_natputent(data);
705                 else
706                         error = EACCES;
707                 break;
708         case SIOCSTGSZ :
709                 if (fr_nat_lock)
710                         error = fr_natgetsz(data);
711                 else
712                         error = EACCES;
713                 break;
714         case SIOCSTGET :
715                 if (fr_nat_lock)
716                         error = fr_natgetent(data);
717                 else
718                         error = EACCES;
719                 break;
720         case FIONREAD :
721 #ifdef  IPFILTER_LOG
722                 arg = (int)iplused[IPL_LOGNAT];
723                 MUTEX_DOWNGRADE(&ipf_nat);
724                 error = IWCOPY((caddr_t)&arg, (caddr_t)data, sizeof(arg));
725                 if (error)
726                         error = EFAULT;
727 #endif
728                 break;
729         default :
730                 error = EINVAL;
731                 break;
732         }
733         if (getlock == 1) {
734                 RWLOCK_EXIT(&ipf_nat);                  /* READ/WRITE */
735         }
736 done:
737         if (nt)
738                 KFREE(nt);
739         return error;
740 }
741
742
743 static int fr_natgetsz(data)
744 caddr_t data;
745 {
746         ap_session_t *aps;
747         nat_t *nat, *n;
748         int error = 0;
749         natget_t ng;
750
751         error = IRCOPY(data, (caddr_t)&ng, sizeof(ng));
752         if (error)
753                 return EFAULT;
754
755         nat = ng.ng_ptr;
756         if (!nat) {
757                 nat = nat_instances;
758                 ng.ng_sz = 0;
759                 if (nat == NULL) {
760                         error = IWCOPY((caddr_t)&ng, data, sizeof(ng));
761                         if (error)
762                                 error = EFAULT;
763                         return error;
764                 }
765         } else {
766                 /*
767                  * Make sure the pointer we're copying from exists in the
768                  * current list of entries.  Security precaution to prevent
769                  * copying of random kernel data.
770                  */
771                 for (n = nat_instances; n; n = n->nat_next)
772                         if (n == nat)
773                                 break;
774                 if (!n)
775                         return ESRCH;
776         }
777
778         ng.ng_sz = sizeof(nat_save_t);
779         aps = nat->nat_aps;
780         if ((aps != NULL) && (aps->aps_data != 0)) {
781                 ng.ng_sz += sizeof(ap_session_t);
782                 ng.ng_sz += aps->aps_psiz;
783         }
784
785         error = IWCOPY((caddr_t)&ng, data, sizeof(ng));
786         if (error)
787                 error = EFAULT;
788         return error;
789 }
790
791
792 static int fr_natgetent(data)
793 caddr_t data;
794 {
795         nat_save_t ipn, *ipnp, *ipnn = NULL;
796         nat_t *n, *nat;
797         ap_session_t *aps;
798         int error;
799
800         error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp));
801         if (error)
802                 return EFAULT;
803         error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn));
804         if (error)
805                 return EFAULT;
806
807         nat = ipn.ipn_next;
808         if (!nat) {
809                 nat = nat_instances;
810                 if (nat == NULL) {
811                         if (nat_instances == NULL)
812                                 return ENOENT;
813                         return 0;
814                 }
815         } else {
816                 /*
817                  * Make sure the pointer we're copying from exists in the
818                  * current list of entries.  Security precaution to prevent
819                  * copying of random kernel data.
820                  */
821                 for (n = nat_instances; n; n = n->nat_next)
822                         if (n == nat)
823                                 break;
824                 if (!n)
825                         return ESRCH;
826         }
827
828         ipn.ipn_next = nat->nat_next;
829         ipn.ipn_dsize = 0;
830         bcopy((char *)nat, (char *)&ipn.ipn_nat, sizeof(ipn.ipn_nat));
831         ipn.ipn_nat.nat_data = NULL;
832
833         if (nat->nat_ptr) {
834                 bcopy((char *)nat->nat_ptr, (char *)&ipn.ipn_ipnat,
835                       sizeof(ipn.ipn_ipnat));
836         }
837
838         if (nat->nat_fr)
839                 bcopy((char *)nat->nat_fr, (char *)&ipn.ipn_rule,
840                       sizeof(ipn.ipn_rule));
841
842         if ((aps = nat->nat_aps)) {
843                 ipn.ipn_dsize = sizeof(*aps);
844                 if (aps->aps_data)
845                         ipn.ipn_dsize += aps->aps_psiz;
846                 KMALLOCS(ipnn, nat_save_t *, sizeof(*ipnn) + ipn.ipn_dsize);
847                 if (ipnn == NULL)
848                         return ENOMEM;
849                 bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn));
850
851                 bcopy((char *)aps, (char *)ipnn->ipn_data, sizeof(*aps));
852                 if (aps->aps_data) {
853                         bcopy(aps->aps_data, ipnn->ipn_data + sizeof(*aps),
854                               aps->aps_psiz);
855                         ipnn->ipn_dsize += aps->aps_psiz;
856                 }
857                 error = IWCOPY((caddr_t)ipnn, ipnp,
858                                sizeof(ipn) + ipn.ipn_dsize);
859                 if (error)
860                         error = EFAULT;
861                 KFREES(ipnn, sizeof(*ipnn) + ipn.ipn_dsize);
862         } else {
863                 error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn));
864                 if (error)
865                         error = EFAULT;
866         }
867         return error;
868 }
869
870
871 static int fr_natputent(data)
872 caddr_t data;
873 {
874         nat_save_t ipn, *ipnp, *ipnn = NULL;
875         nat_t *n, *nat;
876         ap_session_t *aps;
877         frentry_t *fr;
878         ipnat_t *in;
879
880         int error;
881
882         error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp));
883         if (error)
884                 return EFAULT;
885         error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn));
886         if (error)
887                 return EFAULT;
888         nat = NULL;
889         if (ipn.ipn_dsize) {
890                 KMALLOCS(ipnn, nat_save_t *, sizeof(ipn) + ipn.ipn_dsize);
891                 if (ipnn == NULL)
892                         return ENOMEM;
893                 bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn));
894                 error = IRCOPY((caddr_t)ipnp, (caddr_t)ipn.ipn_data,
895                                ipn.ipn_dsize);
896                 if (error) {
897                         error = EFAULT;
898                         goto junkput;
899                 }
900         } else
901                 ipnn = NULL;
902
903         KMALLOC(nat, nat_t *);
904         if (nat == NULL) {
905                 error = EFAULT;
906                 goto junkput;
907         }
908
909         bcopy((char *)&ipn.ipn_nat, (char *)nat, sizeof(*nat));
910         /*
911          * Initialize all these so that nat_delete() doesn't cause a crash.
912          */
913         nat->nat_phnext[0] = NULL;
914         nat->nat_phnext[1] = NULL;
915         fr = nat->nat_fr;
916         nat->nat_fr = NULL;
917         aps = nat->nat_aps;
918         nat->nat_aps = NULL;
919         in = nat->nat_ptr;
920         nat->nat_ptr = NULL;
921         nat->nat_hm = NULL;
922         nat->nat_data = NULL;
923         nat->nat_ifp = GETUNIT(nat->nat_ifname, 4);
924
925         /*
926          * Restore the rule associated with this nat session
927          */
928         if (in) {
929                 KMALLOC(in, ipnat_t *);
930                 if (in == NULL) {
931                         error = ENOMEM;
932                         goto junkput;
933                 }
934                 nat->nat_ptr = in;
935                 bcopy((char *)&ipn.ipn_ipnat, (char *)in, sizeof(*in));
936                 in->in_use = 1;
937                 in->in_flags |= IPN_DELETE;
938                 in->in_next = NULL;
939                 in->in_rnext = NULL;
940                 in->in_prnext = NULL;
941                 in->in_mnext = NULL;
942                 in->in_pmnext = NULL;
943                 in->in_ifp = GETUNIT(in->in_ifname, 4);
944                 if (in->in_plabel[0] != '\0') {
945                         in->in_apr = appr_lookup(in->in_p, in->in_plabel);
946                 }
947         }
948
949         /*
950          * Restore ap_session_t structure.  Include the private data allocated
951          * if it was there.
952          */
953         if (aps) {
954                 KMALLOC(aps, ap_session_t *);
955                 if (aps == NULL) {
956                         error = ENOMEM;
957                         goto junkput;
958                 }
959                 nat->nat_aps = aps;
960                 aps->aps_next = ap_sess_list;
961                 ap_sess_list = aps;
962                 bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
963                 if (in)
964                         aps->aps_apr = in->in_apr;
965                 if (aps->aps_psiz) {
966                         KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
967                         if (aps->aps_data == NULL) {
968                                 error = ENOMEM;
969                                 goto junkput;
970                         }
971                         bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
972                               aps->aps_psiz);
973                 } else {
974                         aps->aps_psiz = 0;
975                         aps->aps_data = NULL;
976                 }
977         }
978
979         /*
980          * If there was a filtering rule associated with this entry then
981          * build up a new one.
982          */
983         if (fr != NULL) {
984                 if (nat->nat_flags & FI_NEWFR) {
985                         KMALLOC(fr, frentry_t *);
986                         nat->nat_fr = fr;
987                         if (fr == NULL) {
988                                 error = ENOMEM;
989                                 goto junkput;
990                         }
991                         bcopy((char *)&ipn.ipn_fr, (char *)fr, sizeof(*fr));
992                         ipn.ipn_nat.nat_fr = fr;
993                         error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn));
994                         if (error) {
995                                 error = EFAULT;
996                                 goto junkput;
997                         }
998                 } else {
999                         for (n = nat_instances; n; n = n->nat_next)
1000                                 if (n->nat_fr == fr)
1001                                         break;
1002                         if (!n) {
1003                                 error = ESRCH;
1004                                 goto junkput;
1005                         }
1006                 }
1007         }
1008
1009         if (ipnn)
1010                 KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize);
1011         nat_insert(nat);
1012         return 0;
1013 junkput:
1014         if (ipnn)
1015                 KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize);
1016         if (nat)
1017                 nat_delete(nat);
1018         return error;
1019 }
1020
1021
1022 /*
1023  * Delete a nat entry from the various lists and table.
1024  */
1025 static void nat_delete(natd)
1026 struct nat *natd;
1027 {
1028         struct ipnat *ipn;
1029
1030         if (natd->nat_flags & FI_WILDP)
1031                 nat_stats.ns_wilds--;
1032         if (natd->nat_hnext[0])
1033                 natd->nat_hnext[0]->nat_phnext[0] = natd->nat_phnext[0];
1034         *natd->nat_phnext[0] = natd->nat_hnext[0];
1035         if (natd->nat_hnext[1])
1036                 natd->nat_hnext[1]->nat_phnext[1] = natd->nat_phnext[1];
1037         *natd->nat_phnext[1] = natd->nat_hnext[1];
1038         if (natd->nat_me != NULL)
1039                 *natd->nat_me = NULL;
1040
1041         if (natd->nat_fr != NULL) {
1042                 ATOMIC_DEC32(natd->nat_fr->fr_ref);
1043         }
1044
1045         if (natd->nat_hm != NULL)
1046                 nat_hostmapdel(natd->nat_hm);
1047
1048         /*
1049          * If there is an active reference from the nat entry to its parent
1050          * rule, decrement the rule's reference count and free it too if no
1051          * longer being used.
1052          */
1053         ipn = natd->nat_ptr;
1054         if (ipn != NULL) {
1055                 ipn->in_space++;
1056                 ipn->in_use--;
1057                 if (!ipn->in_use && (ipn->in_flags & IPN_DELETE)) {
1058                         if (ipn->in_apr)
1059                                 appr_free(ipn->in_apr);
1060                         KFREE(ipn);
1061                         nat_stats.ns_rules--;
1062                 }
1063         }
1064
1065         MUTEX_DESTROY(&natd->nat_lock);
1066         /*
1067          * If there's a fragment table entry too for this nat entry, then
1068          * dereference that as well.
1069          */
1070         ipfr_forget((void *)natd);
1071         aps_free(natd->nat_aps);
1072         nat_stats.ns_inuse--;
1073         KFREE(natd);
1074 }
1075
1076
1077 /*
1078  * nat_flushtable - clear the NAT table of all mapping entries.
1079  * (this is for the dynamic mappings)
1080  */
1081 static int nat_flushtable()
1082 {
1083         nat_t *nat, **natp;
1084         int j = 0;
1085
1086         /*
1087          * ALL NAT mappings deleted, so lets just make the deletions
1088          * quicker.
1089          */
1090         if (nat_table[0] != NULL)
1091                 bzero((char *)nat_table[0],
1092                       sizeof(nat_table[0]) * ipf_nattable_sz);
1093         if (nat_table[1] != NULL)
1094                 bzero((char *)nat_table[1],
1095                       sizeof(nat_table[1]) * ipf_nattable_sz);
1096
1097         for (natp = &nat_instances; (nat = *natp); ) {
1098                 *natp = nat->nat_next;
1099 #ifdef  IPFILTER_LOG
1100                 nat_log(nat, NL_FLUSH);
1101 #endif
1102                 nat_delete(nat);
1103                 j++;
1104         }
1105         nat_stats.ns_inuse = 0;
1106         return j;
1107 }
1108
1109
1110 /*
1111  * nat_clearlist - delete all rules in the active NAT mapping list.
1112  * (this is for NAT/RDR rules)
1113  */
1114 int nat_clearlist()
1115 {
1116         ipnat_t *n, **np = &nat_list;
1117         int i = 0;
1118
1119         if (nat_rules != NULL)
1120                 bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz);
1121         if (rdr_rules != NULL)
1122                 bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz);
1123
1124         while ((n = *np)) {
1125                 *np = n->in_next;
1126                 if (!n->in_use) {
1127                         if (n->in_apr)
1128                                 appr_free(n->in_apr);
1129                         KFREE(n);
1130                         nat_stats.ns_rules--;
1131                 } else {
1132                         n->in_flags |= IPN_DELETE;
1133                         n->in_next = NULL;
1134                 }
1135                 i++;
1136         }
1137         nat_masks = 0;
1138         rdr_masks = 0;
1139         return i;
1140 }
1141
1142
1143 /*
1144  * Create a new NAT table entry.
1145  * NOTE: Assumes write lock on ipf_nat has been obtained already.
1146  *       If you intend on changing this, beware: appr_new() may call nat_new()
1147  *       recursively!
1148  */
1149 nat_t *nat_new(fin, ip, np, natsave, flags, direction)
1150 fr_info_t *fin;
1151 ip_t *ip;
1152 ipnat_t *np;
1153 nat_t **natsave;
1154 u_int flags;
1155 int direction;
1156 {
1157         u_32_t sum1, sum2, sumd, l;
1158         u_short port = 0, sport = 0, dport = 0, nport = 0;
1159         struct in_addr in, inb;
1160         u_short nflags, sp, dp;
1161         tcphdr_t *tcp = NULL;
1162         hostmap_t *hm = NULL;
1163         nat_t *nat, *natl;
1164 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
1165         qif_t *qf = fin->fin_qif;
1166 #endif
1167
1168         nflags = flags & np->in_flags;
1169         if (flags & IPN_TCPUDP) {
1170                 tcp = (tcphdr_t *)fin->fin_dp;
1171                 sport = htons(fin->fin_data[0]);
1172                 dport = htons(fin->fin_data[1]);
1173         }
1174
1175         /* Give me a new nat */
1176         KMALLOC(nat, nat_t *);
1177         if (nat == NULL) {
1178                 nat_stats.ns_memfail++;
1179                 return NULL;
1180         }
1181
1182         bzero((char *)nat, sizeof(*nat));
1183         nat->nat_flags = flags;
1184         if (flags & FI_WILDP)
1185                 nat_stats.ns_wilds++;
1186         /*
1187          * Search the current table for a match.
1188          */
1189         if (direction == NAT_OUTBOUND) {
1190                 /*
1191                  * Values at which the search for a free resouce starts.
1192                  */
1193                 u_32_t st_ip;
1194                 u_short st_port;
1195
1196                 /*
1197                  * If it's an outbound packet which doesn't match any existing
1198                  * record, then create a new port
1199                  */
1200                 l = 0;
1201                 st_ip = np->in_nip;
1202                 st_port = np->in_pnext;
1203
1204                 do {
1205                         port = 0;
1206                         in.s_addr = htonl(np->in_nip);
1207                         if (l == 0) {
1208                                 /*
1209                                  * Check to see if there is an existing NAT
1210                                  * setup for this IP address pair.
1211                                  */
1212                                 hm = nat_hostmap(np, fin->fin_src, in);
1213                                 if (hm != NULL)
1214                                         in.s_addr = hm->hm_mapip.s_addr;
1215                         } else if ((l == 1) && (hm != NULL)) {
1216                                 nat_hostmapdel(hm);
1217                                 hm = NULL;
1218                         }
1219                         in.s_addr = ntohl(in.s_addr);
1220
1221                         nat->nat_hm = hm;
1222
1223                         if ((np->in_outmsk == 0xffffffff) &&
1224                             (np->in_pnext == 0)) {
1225                                 if (l > 0)
1226                                         goto badnat;
1227                         }
1228
1229                         if (np->in_redir & NAT_MAPBLK) {
1230                                 if ((l >= np->in_ppip) || ((l > 0) &&
1231                                      !(flags & IPN_TCPUDP)))
1232                                         goto badnat;
1233                                 /*
1234                                  * map-block - Calculate destination address.
1235                                  */
1236                                 in.s_addr = ntohl(fin->fin_saddr);
1237                                 in.s_addr &= ntohl(~np->in_inmsk);
1238                                 inb.s_addr = in.s_addr;
1239                                 in.s_addr /= np->in_ippip;
1240                                 in.s_addr &= ntohl(~np->in_outmsk);
1241                                 in.s_addr += ntohl(np->in_outip);
1242                                 /*
1243                                  * Calculate destination port.
1244                                  */
1245                                 if ((flags & IPN_TCPUDP) &&
1246                                     (np->in_ppip != 0)) {
1247                                         port = ntohs(sport) + l;
1248                                         port %= np->in_ppip;
1249                                         port += np->in_ppip *
1250                                                 (inb.s_addr % np->in_ippip);
1251                                         port += MAPBLK_MINPORT;
1252                                         port = htons(port);
1253                                 }
1254                         } else if (!np->in_outip &&
1255                                    (np->in_outmsk == 0xffffffff)) {
1256                                 /*
1257                                  * 0/32 - use the interface's IP address.
1258                                  */
1259                                 if ((l > 0) ||
1260                                     fr_ifpaddr(4, fin->fin_ifp, &in) == -1)
1261                                         goto badnat;
1262                                 in.s_addr = ntohl(in.s_addr);
1263                         } else if (!np->in_outip && !np->in_outmsk) {
1264                                 /*
1265                                  * 0/0 - use the original source address/port.
1266                                  */
1267                                 if (l > 0)
1268                                         goto badnat;
1269                                 in.s_addr = ntohl(fin->fin_saddr);
1270                         } else if ((np->in_outmsk != 0xffffffff) &&
1271                                    (np->in_pnext == 0) &&
1272                                    ((l > 0) || (hm == NULL)))
1273                                 np->in_nip++;
1274                         natl = NULL;
1275
1276                         if ((nflags & IPN_TCPUDP) &&
1277                             ((np->in_redir & NAT_MAPBLK) == 0) &&
1278                             (np->in_flags & IPN_AUTOPORTMAP)) {
1279                                 if ((l > 0) && (l % np->in_ppip == 0)) {
1280                                         if (l > np->in_space) {
1281                                                 goto badnat;
1282                                         } else if ((l > np->in_ppip) &&
1283                                                    np->in_outmsk != 0xffffffff)
1284                                                 np->in_nip++;
1285                                 }
1286                                 if (np->in_ppip != 0) {
1287                                         port = ntohs(sport);
1288                                         port += (l % np->in_ppip);
1289                                         port %= np->in_ppip;
1290                                         port += np->in_ppip *
1291                                                 (ntohl(fin->fin_saddr) %
1292                                                  np->in_ippip);
1293                                         port += MAPBLK_MINPORT;
1294                                         port = htons(port);
1295                                 }
1296                         } else if (((np->in_redir & NAT_MAPBLK) == 0) &&
1297                                    (nflags & IPN_TCPUDP) &&
1298                                    (np->in_pnext != 0)) {
1299                                 port = htons(np->in_pnext++);
1300                                 if (np->in_pnext > ntohs(np->in_pmax)) {
1301                                         np->in_pnext = ntohs(np->in_pmin);
1302                                         if (np->in_outmsk != 0xffffffff)
1303                                                 np->in_nip++;
1304                                 }
1305                         }
1306
1307                         if (np->in_flags & IPN_IPRANGE) {
1308                                 if (np->in_nip > ntohl(np->in_outmsk))
1309                                         np->in_nip = ntohl(np->in_outip);
1310                         } else {
1311                                 if ((np->in_outmsk != 0xffffffff) &&
1312                                     ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
1313                                     ntohl(np->in_outip))
1314                                         np->in_nip = ntohl(np->in_outip) + 1;
1315                         }
1316
1317                         if (!port && (flags & IPN_TCPUDP))
1318                                 port = sport;
1319
1320                         /*
1321                          * Here we do a lookup of the connection as seen from
1322                          * the outside.  If an IP# pair already exists, try
1323                          * again.  So if you have A->B becomes C->B, you can
1324                          * also have D->E become C->E but not D->B causing
1325                          * another C->B.  Also take protocol and ports into
1326                          * account when determining whether a pre-existing
1327                          * NAT setup will cause an external conflict where
1328                          * this is appropriate.
1329                          */
1330                         inb.s_addr = htonl(in.s_addr);
1331                         sp = fin->fin_data[0];
1332                         dp = fin->fin_data[1];
1333                         fin->fin_data[0] = fin->fin_data[1];
1334                         fin->fin_data[1] = htons(port);
1335                         natl = nat_inlookup(fin, flags & ~FI_WILDP,
1336                                             (u_int)fin->fin_p, fin->fin_dst,
1337                                             inb, 1);
1338                         fin->fin_data[0] = sp;
1339                         fin->fin_data[1] = dp;
1340
1341                         /*
1342                          * Has the search wrapped around and come back to the
1343                          * start ?
1344                          */
1345                         if ((natl != NULL) &&
1346                             (np->in_pnext != 0) && (st_port == np->in_pnext) &&
1347                             (np->in_nip != 0) && (st_ip == np->in_nip))
1348                                 goto badnat;
1349                         l++;
1350                 } while (natl != NULL);
1351
1352                 if (np->in_space > 0)
1353                         np->in_space--;
1354
1355                 /* Setup the NAT table */
1356                 nat->nat_inip = fin->fin_src;
1357                 nat->nat_outip.s_addr = htonl(in.s_addr);
1358                 nat->nat_oip = fin->fin_dst;
1359                 if (nat->nat_hm == NULL)
1360                         nat->nat_hm = nat_hostmap(np, fin->fin_src,
1361                                                   nat->nat_outip);
1362
1363                 sum1 = LONG_SUM(ntohl(fin->fin_saddr)) + ntohs(sport);
1364                 sum2 = LONG_SUM(in.s_addr) + ntohs(port);
1365
1366                 if (flags & IPN_TCPUDP) {
1367                         nat->nat_inport = sport;
1368                         nat->nat_outport = port;        /* sport */
1369                         nat->nat_oport = dport;
1370                 }
1371         } else {
1372                 /*
1373                  * Otherwise, it's an inbound packet. Most likely, we don't
1374                  * want to rewrite source ports and source addresses. Instead,
1375                  * we want to rewrite to a fixed internal address and fixed
1376                  * internal port.
1377                  */
1378                 if (np->in_flags & IPN_SPLIT) {
1379                         in.s_addr = np->in_nip;
1380                         if (np->in_inip == htonl(in.s_addr))
1381                                 np->in_nip = ntohl(np->in_inmsk);
1382                         else {
1383                                 np->in_nip = ntohl(np->in_inip);
1384                                 if (np->in_flags & IPN_ROUNDR) {
1385                                         nat_delrdr(np);
1386                                         nat_addrdr(np);
1387                                 }
1388                         }
1389                 } else {
1390                         in.s_addr = ntohl(np->in_inip);
1391                         if (np->in_flags & IPN_ROUNDR) {
1392                                 nat_delrdr(np);
1393                                 nat_addrdr(np);
1394                         }
1395                 }
1396                 if (!np->in_pnext)
1397                         nport = dport;
1398                 else {
1399                         /*
1400                          * Whilst not optimized for the case where
1401                          * pmin == pmax, the gain is not significant.
1402                          */
1403                         if (np->in_pmin != np->in_pmax) {
1404                                 nport = ntohs(dport) - ntohs(np->in_pmin) +
1405                                         ntohs(np->in_pnext);
1406                                 nport = ntohs(nport);
1407                         } else
1408                                 nport = np->in_pnext;
1409                 }
1410
1411                 /*
1412                  * When the redirect-to address is set to 0.0.0.0, just
1413                  * assume a blank `forwarding' of the packet.
1414                  */
1415                 if (in.s_addr == 0)
1416                         in.s_addr = ntohl(fin->fin_daddr);
1417
1418                 nat->nat_inip.s_addr = htonl(in.s_addr);
1419                 nat->nat_outip = fin->fin_dst;
1420                 nat->nat_oip = fin->fin_src;
1421
1422                 sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport);
1423                 sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
1424
1425                 if (flags & IPN_TCPUDP) {
1426                         nat->nat_inport = nport;
1427                         nat->nat_outport = dport;
1428                         nat->nat_oport = sport;
1429                 }
1430         }
1431
1432         CALC_SUMD(sum1, sum2, sumd);
1433         nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
1434 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
1435         if ((flags & IPN_TCPUDP) && dohwcksum &&
1436             (qf->qf_ill->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
1437                 if (direction == NAT_OUTBOUND)
1438                         sum1 = LONG_SUM(ntohl(in.s_addr));
1439                 else
1440                         sum1 = LONG_SUM(ntohl(fin->fin_saddr));
1441                 sum1 += LONG_SUM(ntohl(fin->fin_daddr));
1442                 sum1 += IPPROTO_TCP;
1443                 sum1 = (sum1 & 0xffff) + (sum1 >> 16);
1444                 nat->nat_sumd[1] = NAT_HW_CKSUM|(sum1 & 0xffff);
1445         } else
1446 #endif
1447                 nat->nat_sumd[1] = nat->nat_sumd[0];
1448
1449         if ((flags & IPN_TCPUDP) && ((sport != port) || (dport != nport))) {
1450                 if (direction == NAT_OUTBOUND)
1451                         sum1 = LONG_SUM(ntohl(fin->fin_saddr));
1452                 else
1453                         sum1 = LONG_SUM(ntohl(fin->fin_daddr));
1454
1455                 sum2 = LONG_SUM(in.s_addr);
1456
1457                 CALC_SUMD(sum1, sum2, sumd);
1458                 nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
1459         } else
1460                 nat->nat_ipsumd = nat->nat_sumd[0];
1461
1462         in.s_addr = htonl(in.s_addr);
1463
1464         strncpy(nat->nat_ifname, IFNAME(fin->fin_ifp), IFNAMSIZ);
1465
1466         nat->nat_me = natsave;
1467         nat->nat_dir = direction;
1468         nat->nat_ifp = fin->fin_ifp;
1469         nat->nat_ptr = np;
1470         nat->nat_p = fin->fin_p;
1471         nat->nat_bytes = 0;
1472         nat->nat_pkts = 0;
1473         nat->nat_mssclamp = np->in_mssclamp;
1474         nat->nat_fr = fin->fin_fr;
1475         if (nat->nat_fr != NULL) {
1476                 ATOMIC_INC32(nat->nat_fr->fr_ref);
1477         }
1478         if (direction == NAT_OUTBOUND) {
1479                 if (flags & IPN_TCPUDP)
1480                         tcp->th_sport = port;
1481         } else {
1482                 if (flags & IPN_TCPUDP)
1483                         tcp->th_dport = nport;
1484         }
1485
1486         nat_insert(nat);
1487
1488         if ((np->in_apr != NULL) && (np->in_dport == 0 ||
1489             (tcp != NULL && dport == np->in_dport)))
1490                 (void) appr_new(fin, ip, nat);
1491
1492         np->in_use++;
1493 #ifdef  IPFILTER_LOG
1494         nat_log(nat, (u_int)np->in_redir);
1495 #endif
1496         return nat;
1497 badnat:
1498         nat_stats.ns_badnat++;
1499         if ((hm = nat->nat_hm) != NULL)
1500                 nat_hostmapdel(hm);
1501         KFREE(nat);
1502         return NULL;
1503 }
1504
1505
1506 /*
1507  * Insert a NAT entry into the hash tables for searching and add it to the
1508  * list of active NAT entries.  Adjust global counters when complete.
1509  */
1510 void    nat_insert(nat)
1511 nat_t   *nat;
1512 {
1513         u_int hv1, hv2;
1514         nat_t **natp;
1515
1516         MUTEX_INIT(&nat->nat_lock, "nat entry lock", NULL);
1517
1518         nat->nat_age = fr_defnatage;
1519         nat->nat_ifname[sizeof(nat->nat_ifname) - 1] = '\0';
1520         if (nat->nat_ifname[0] !='\0') {
1521                 nat->nat_ifp = GETUNIT(nat->nat_ifname, 4);
1522         }
1523
1524         nat->nat_next = nat_instances;
1525         nat_instances = nat;
1526
1527         if (!(nat->nat_flags & (FI_W_SPORT|FI_W_DPORT))) {
1528                 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
1529                                   0xffffffff);
1530                 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
1531                                   ipf_nattable_sz);
1532                 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
1533                                   0xffffffff);
1534                 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
1535                                  ipf_nattable_sz);
1536         } else {
1537                 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, nat->nat_inip.s_addr,
1538                                   ipf_nattable_sz);
1539                 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, nat->nat_outip.s_addr,
1540                                   ipf_nattable_sz);
1541         }
1542
1543         natp = &nat_table[0][hv1];
1544         if (*natp)
1545                 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
1546         nat->nat_phnext[0] = natp;
1547         nat->nat_hnext[0] = *natp;
1548         *natp = nat;
1549
1550         natp = &nat_table[1][hv2];
1551         if (*natp)
1552                 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
1553         nat->nat_phnext[1] = natp;
1554         nat->nat_hnext[1] = *natp;
1555         *natp = nat;
1556
1557         nat_stats.ns_added++;
1558         nat_stats.ns_inuse++;
1559 }
1560
1561
1562 nat_t *nat_icmplookup(ip, fin, dir)
1563 ip_t *ip;
1564 fr_info_t *fin;
1565 int dir;
1566 {
1567         icmphdr_t *icmp;
1568         tcphdr_t *tcp = NULL;
1569         ip_t *oip;
1570         int flags = 0, type, minlen;
1571
1572         icmp = (icmphdr_t *)fin->fin_dp;
1573         /*
1574          * Does it at least have the return (basic) IP header ?
1575          * Only a basic IP header (no options) should be with an ICMP error
1576          * header.
1577          */
1578         if ((ip->ip_hl != 5) || (ip->ip_len < ICMPERR_MINPKTLEN))
1579                 return NULL;
1580         type = icmp->icmp_type;
1581         /*
1582          * If it's not an error type, then return.
1583          */
1584         if ((type != ICMP_UNREACH) && (type != ICMP_SOURCEQUENCH) &&
1585             (type != ICMP_REDIRECT) && (type != ICMP_TIMXCEED) &&
1586             (type != ICMP_PARAMPROB))
1587                 return NULL;
1588
1589         oip = (ip_t *)((char *)fin->fin_dp + 8);
1590         minlen = (oip->ip_hl << 2);
1591         if (minlen < sizeof(ip_t))
1592                 return NULL;
1593         if (ip->ip_len < ICMPERR_IPICMPHLEN + minlen)
1594                 return NULL;
1595         /*
1596          * Is the buffer big enough for all of it ?  It's the size of the IP
1597          * header claimed in the encapsulated part which is of concern.  It
1598          * may be too big to be in this buffer but not so big that it's
1599          * outside the ICMP packet, leading to TCP deref's causing problems.
1600          * This is possible because we don't know how big oip_hl is when we
1601          * do the pullup early in fr_check() and thus can't gaurantee it is
1602          * all here now.
1603          */
1604 #ifdef  _KERNEL
1605         {
1606         mb_t *m;
1607
1608 # if SOLARIS
1609         m = fin->fin_qfm;
1610         if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
1611                 return NULL;
1612 # else
1613         m = *(mb_t **)fin->fin_mp;
1614         if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
1615             (char *)ip + m->m_len)
1616                 return NULL;
1617 # endif
1618         }
1619 #endif
1620
1621         if (oip->ip_p == IPPROTO_TCP)
1622                 flags = IPN_TCP;
1623         else if (oip->ip_p == IPPROTO_UDP)
1624                 flags = IPN_UDP;
1625         if (flags & IPN_TCPUDP) {
1626                 u_short data[2];
1627                 nat_t *nat;
1628
1629                 minlen += 8;            /* + 64bits of data to get ports */
1630                 if (ip->ip_len < ICMPERR_IPICMPHLEN + minlen)
1631                         return NULL;
1632
1633                 data[0] = fin->fin_data[0];
1634                 data[1] = fin->fin_data[1];
1635                 tcp = (tcphdr_t *)((char *)oip + (oip->ip_hl << 2));
1636                 fin->fin_data[0] = ntohs(tcp->th_dport);
1637                 fin->fin_data[1] = ntohs(tcp->th_sport);
1638
1639                 if (dir == NAT_INBOUND) {
1640                         nat = nat_inlookup(fin, flags, (u_int)oip->ip_p,
1641                                             oip->ip_dst, oip->ip_src, 0);
1642                 } else {
1643                         nat = nat_outlookup(fin, flags, (u_int)oip->ip_p,
1644                                             oip->ip_dst, oip->ip_src, 0);
1645                 }
1646                 fin->fin_data[0] = data[0];
1647                 fin->fin_data[1] = data[1];
1648                 return nat;
1649         }
1650         if (dir == NAT_INBOUND)
1651                 return nat_inlookup(fin, 0, (u_int)oip->ip_p,
1652                                     oip->ip_dst, oip->ip_src, 0);
1653         else
1654                 return nat_outlookup(fin, 0, (u_int)oip->ip_p,
1655                                     oip->ip_dst, oip->ip_src, 0);
1656 }
1657
1658
1659 /*
1660  * This should *ONLY* be used for incoming packets to make sure a NAT'd ICMP
1661  * packet gets correctly recognised.
1662  */
1663 nat_t *nat_icmp(ip, fin, nflags, dir)
1664 ip_t *ip;
1665 fr_info_t *fin;
1666 u_int *nflags;
1667 int dir;
1668 {
1669         u_32_t sum1, sum2, sumd, sumd2 = 0;
1670         struct in_addr in;
1671         int flags, dlen;
1672         icmphdr_t *icmp;
1673         udphdr_t *udp;
1674         tcphdr_t *tcp;
1675         nat_t *nat;
1676         ip_t *oip;
1677
1678         if ((fin->fin_fl & FI_SHORT) || (fin->fin_off != 0))
1679                 return NULL;
1680         /*
1681          * nat_icmplookup() will return NULL for `defective' packets.
1682          */
1683         if ((ip->ip_v != 4) || !(nat = nat_icmplookup(ip, fin, dir)))
1684                 return NULL;
1685
1686         flags = 0;
1687         *nflags = IPN_ICMPERR;
1688         icmp = (icmphdr_t *)fin->fin_dp;
1689         oip = (ip_t *)&icmp->icmp_ip;
1690         if (oip->ip_p == IPPROTO_TCP)
1691                 flags = IPN_TCP;
1692         else if (oip->ip_p == IPPROTO_UDP)
1693                 flags = IPN_UDP;
1694         udp = (udphdr_t *)((((char *)oip) + (oip->ip_hl << 2)));
1695         dlen = ip->ip_len - ((char *)udp - (char *)ip);
1696         /*
1697          * XXX - what if this is bogus hl and we go off the end ?
1698          * In this case, nat_icmplookup() will have returned NULL.
1699          */
1700         tcp = (tcphdr_t *)udp;
1701
1702         /*
1703          * Need to adjust ICMP header to include the real IP#'s and
1704          * port #'s.  Only apply a checksum change relative to the
1705          * IP address change as it will be modified again in ip_natout
1706          * for both address and port.  Two checksum changes are
1707          * necessary for the two header address changes.  Be careful
1708          * to only modify the checksum once for the port # and twice
1709          * for the IP#.
1710          */
1711
1712         /*
1713          * Step 1
1714          * Fix the IP addresses in the offending IP packet. You also need
1715          * to adjust the IP header checksum of that offending IP packet
1716          * and the ICMP checksum of the ICMP error message itself.
1717          *
1718          * Unfortunately, for UDP and TCP, the IP addresses are also contained
1719          * in the pseudo header that is used to compute the UDP resp. TCP
1720          * checksum. So, we must compensate that as well. Even worse, the
1721          * change in the UDP and TCP checksums require yet another
1722          * adjustment of the ICMP checksum of the ICMP error message.
1723          *
1724          */
1725
1726         if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) {
1727                 sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
1728                 in = nat->nat_inip;
1729                 oip->ip_src = in;
1730         } else {
1731                 sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
1732                 in = nat->nat_outip;
1733                 oip->ip_dst = in;
1734         }
1735
1736         sum2 = LONG_SUM(ntohl(in.s_addr));
1737
1738         CALC_SUMD(sum1, sum2, sumd);
1739
1740         if (nat->nat_dir == NAT_OUTBOUND) {
1741                 /*
1742                  * Fix IP checksum of the offending IP packet to adjust for
1743                  * the change in the IP address.
1744                  *
1745                  * Normally, you would expect that the ICMP checksum of the 
1746                  * ICMP error message needs to be adjusted as well for the
1747                  * IP address change in oip.
1748                  * However, this is a NOP, because the ICMP checksum is 
1749                  * calculated over the complete ICMP packet, which includes the
1750                  * changed oip IP addresses and oip->ip_sum. However, these 
1751                  * two changes cancel each other out (if the delta for
1752                  * the IP address is x, then the delta for ip_sum is minus x), 
1753                  * so no change in the icmp_cksum is necessary.
1754                  *
1755                  * Be careful that nat_dir refers to the direction of the
1756                  * offending IP packet (oip), not to its ICMP response (icmp)
1757                  */
1758                 fix_datacksum(&oip->ip_sum, sumd);
1759
1760                 /*
1761                  * Fix UDP pseudo header checksum to compensate for the
1762                  * IP address change.
1763                  */
1764                 if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1765                         /*
1766                          * The UDP checksum is optional, only adjust it 
1767                          * if it has been set.
1768                          */
1769                         sum1 = ntohs(udp->uh_sum);
1770                         fix_datacksum(&udp->uh_sum, sumd);
1771                         sum2 = ntohs(udp->uh_sum);
1772
1773                         /*
1774                          * Fix ICMP checksum to compensate the UDP 
1775                          * checksum adjustment.
1776                          */
1777                         CALC_SUMD(sum1, sum2, sumd);
1778                         sumd2 = sumd;
1779                 }
1780
1781                 /*
1782                  * Fix TCP pseudo header checksum to compensate for the 
1783                  * IP address change. Before we can do the change, we
1784                  * must make sure that oip is sufficient large to hold
1785                  * the TCP checksum (normally it does not!).
1786                  */
1787                 if (oip->ip_p == IPPROTO_TCP && dlen >= 18) {
1788                 
1789                         sum1 = ntohs(tcp->th_sum);
1790                         fix_datacksum(&tcp->th_sum, sumd);
1791                         sum2 = ntohs(tcp->th_sum);
1792
1793                         /*
1794                          * Fix ICMP checksum to compensate the TCP 
1795                          * checksum adjustment.
1796                          */
1797                         CALC_SUMD(sum1, sum2, sumd);
1798                         sumd2 = sumd;
1799                 }
1800         } else {
1801
1802                 /*
1803                  * Fix IP checksum of the offending IP packet to adjust for
1804                  * the change in the IP address.
1805                  *
1806                  * Normally, you would expect that the ICMP checksum of the 
1807                  * ICMP error message needs to be adjusted as well for the
1808                  * IP address change in oip.
1809                  * However, this is a NOP, because the ICMP checksum is 
1810                  * calculated over the complete ICMP packet, which includes the
1811                  * changed oip IP addresses and oip->ip_sum. However, these 
1812                  * two changes cancel each other out (if the delta for
1813                  * the IP address is x, then the delta for ip_sum is minus x), 
1814                  * so no change in the icmp_cksum is necessary.
1815                  *
1816                  * Be careful that nat_dir refers to the direction of the
1817                  * offending IP packet (oip), not to its ICMP response (icmp)
1818                  */
1819                 fix_datacksum(&oip->ip_sum, sumd);
1820
1821 /* XXX FV : without having looked at Solaris source code, it seems unlikely
1822  * that SOLARIS would compensate this in the kernel (a body of an IP packet 
1823  * in the data section of an ICMP packet). I have the feeling that this should
1824  * be unconditional, but I'm not in a position to check.
1825  */
1826 #if !SOLARIS && !defined(__sgi)
1827                 /*
1828                  * Fix UDP pseudo header checksum to compensate for the
1829                  * IP address change.
1830                  */
1831                 if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1832                         /*
1833                          * The UDP checksum is optional, only adjust it 
1834                          * if it has been set 
1835                          */
1836                         sum1 = ntohs(udp->uh_sum);
1837                         fix_datacksum(&udp->uh_sum, sumd);
1838                         sum2 = ntohs(udp->uh_sum);
1839
1840                         /*
1841                          * Fix ICMP checksum to compensate the UDP 
1842                          * checksum adjustment.
1843                          */
1844                         CALC_SUMD(sum1, sum2, sumd);
1845                         sumd2 = sumd;
1846                 }
1847                 
1848                 /* 
1849                  * Fix TCP pseudo header checksum to compensate for the 
1850                  * IP address change. Before we can do the change, we
1851                  * must make sure that oip is sufficient large to hold
1852                  * the TCP checksum (normally it does not!).
1853                  */
1854                 if (oip->ip_p == IPPROTO_TCP && dlen >= 18) {
1855                 
1856                         sum1 = ntohs(tcp->th_sum);
1857                         fix_datacksum(&tcp->th_sum, sumd);
1858                         sum2 = ntohs(tcp->th_sum);
1859
1860                         /*
1861                          * Fix ICMP checksum to compensate the TCP
1862                          * checksum adjustment.
1863                          */
1864                         CALC_SUMD(sum1, sum2, sumd);
1865                         sumd2 = sumd;
1866                 }
1867 #endif
1868         }
1869
1870         if ((flags & IPN_TCPUDP) != 0) {
1871                 /*
1872                  * Step 2 :
1873                  * For offending TCP/UDP IP packets, translate the ports as
1874                  * well, based on the NAT specification. Of course such
1875                  * a change must be reflected in the ICMP checksum as well.
1876                  *
1877                  * Advance notice : Now it becomes complicated :-)
1878                  *
1879                  * Since the port fields are part of the TCP/UDP checksum
1880                  * of the offending IP packet, you need to adjust that checksum
1881                  * as well... but, if you change, you must change the icmp
1882                  * checksum *again*, to reflect that change.
1883                  *
1884                  * To further complicate: the TCP checksum is not in the first
1885                  * 8 bytes of the offending ip packet, so it most likely is not
1886                  * available. Some OSses like Solaris return enough bytes to
1887                  * include the TCP checksum. So we have to check if the
1888                  * ip->ip_len actually holds the TCP checksum of the oip!
1889                  */
1890
1891                 if (nat->nat_oport == tcp->th_dport) {
1892                         if (tcp->th_sport != nat->nat_inport) {
1893                                 /*
1894                                  * Fix ICMP checksum to compensate port
1895                                  * adjustment.
1896                                  */
1897                                 sum1 = ntohs(tcp->th_sport);
1898                                 sum2 = ntohs(nat->nat_inport);
1899                                 CALC_SUMD(sum1, sum2, sumd);
1900                                 sumd2 += sumd;
1901                                 tcp->th_sport = nat->nat_inport;
1902
1903                                 /*
1904                                  * Fix udp checksum to compensate port
1905                                  * adjustment.  NOTE : the offending IP packet
1906                                  * flows the other direction compared to the
1907                                  * ICMP message.
1908                                  *
1909                                  * The UDP checksum is optional, only adjust
1910                                  * it if it has been set.
1911                                  */
1912                                 if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1913
1914                                         sum1 = ntohs(udp->uh_sum);
1915                                         fix_datacksum(&udp->uh_sum, sumd);
1916                                         sum2 = ntohs(udp->uh_sum);
1917
1918                                         /*
1919                                          * Fix ICMP checksum to 
1920                                          * compensate UDP checksum 
1921                                          * adjustment.
1922                                          */
1923                                         CALC_SUMD(sum1, sum2, sumd);
1924                                         sumd2 += sumd;
1925                                 }
1926
1927                                 /*
1928                                  * Fix tcp checksum (if present) to compensate
1929                                  * port adjustment. NOTE : the offending IP
1930                                  * packet flows the other direction compared to
1931                                  * the ICMP message.
1932                                  */
1933                                 if (oip->ip_p == IPPROTO_TCP && dlen >= 18) {
1934
1935                                         sum1 = ntohs(tcp->th_sum);
1936                                         fix_datacksum(&tcp->th_sum, sumd);
1937                                         sum2 = ntohs(tcp->th_sum);
1938
1939                                         /*
1940                                          * Fix ICMP checksum to 
1941                                          * compensate TCP checksum 
1942                                          * adjustment.
1943                                          */
1944                                         CALC_SUMD(sum1, sum2, sumd);
1945                                         sumd2 += sumd;
1946                                 }
1947                         }
1948                 } else {
1949                         if (tcp->th_dport != nat->nat_outport) {
1950                                 /*
1951                                  * Fix ICMP checksum to compensate port
1952                                  * adjustment.
1953                                  */
1954                                 sum1 = ntohs(tcp->th_dport);
1955                                 sum2 = ntohs(nat->nat_outport);
1956                                 CALC_SUMD(sum1, sum2, sumd);
1957                                 sumd2 += sumd;
1958                                 tcp->th_dport = nat->nat_outport;
1959
1960                                 /*
1961                                  * Fix udp checksum to compensate port
1962                                  * adjustment.   NOTE : the offending IP
1963                                  * packet flows the other direction compared
1964                                  * to the ICMP message.
1965                                  *
1966                                  * The UDP checksum is optional, only adjust
1967                                  * it if it has been set.
1968                                  */
1969                                 if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1970
1971                                         sum1 = ntohs(udp->uh_sum);
1972                                         fix_datacksum(&udp->uh_sum, sumd);
1973                                         sum2 = ntohs(udp->uh_sum);
1974
1975                                         /*
1976                                          * Fix ICMP checksum to compensate
1977                                          * UDP checksum adjustment.
1978                                          */
1979                                         CALC_SUMD(sum1, sum2, sumd);
1980                                         sumd2 += sumd;
1981                                 }
1982
1983                                 /*
1984                                  * Fix tcp checksum (if present) to compensate
1985                                  * port adjustment. NOTE : the offending IP
1986                                  * packet flows the other direction compared to
1987                                  * the ICMP message.
1988                                  */
1989                                 if (oip->ip_p == IPPROTO_TCP && dlen >= 18) {
1990
1991                                         sum1 = ntohs(tcp->th_sum);
1992                                         fix_datacksum(&tcp->th_sum, sumd);
1993                                         sum2 = ntohs(tcp->th_sum);
1994
1995                                         /*
1996                                          * Fix ICMP checksum to compensate
1997                                          * UDP checksum adjustment.
1998                                          */
1999                                         CALC_SUMD(sum1, sum2, sumd);
2000                                         sumd2 += sumd;
2001                                 }
2002                         }
2003                 }
2004                 if (sumd2) {
2005                         sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
2006                         sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
2007                         if (nat->nat_dir == NAT_OUTBOUND) {
2008                                 fix_outcksum(fin, &icmp->icmp_cksum, sumd2);
2009                         } else {
2010                                 fix_incksum(fin, &icmp->icmp_cksum, sumd2);
2011                         }
2012                 }
2013         }
2014         if (oip->ip_p == IPPROTO_ICMP)
2015                 nat->nat_age = fr_defnaticmpage;
2016         return nat;
2017 }
2018
2019
2020 /*
2021  * NB: these lookups don't lock access to the list, it assume it has already
2022  * been done!
2023  */
2024 /*
2025  * Lookup a nat entry based on the mapped destination ip address/port and
2026  * real source address/port.  We use this lookup when receiving a packet,
2027  * we're looking for a table entry, based on the destination address.
2028  * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.
2029  */
2030 nat_t *nat_inlookup(fin, flags, p, src, mapdst, rw)
2031 fr_info_t *fin;
2032 u_int flags, p;
2033 struct in_addr src , mapdst;
2034 int rw;
2035 {
2036         u_short sport, dport;
2037         nat_t *nat;
2038         int nflags;
2039         u_32_t dst;
2040         ipnat_t *ipn;
2041         void *ifp;
2042         u_int hv;
2043
2044         if (fin != NULL)
2045                 ifp = fin->fin_ifp;
2046         else
2047                 ifp = NULL;
2048         dst = mapdst.s_addr;
2049         if (flags & IPN_TCPUDP) {
2050                 sport = htons(fin->fin_data[0]);
2051                 dport = htons(fin->fin_data[1]);
2052         } else {
2053                 sport = 0;
2054                 dport = 0;
2055         }
2056
2057         hv = NAT_HASH_FN(dst, dport, 0xffffffff);
2058         hv = NAT_HASH_FN(src.s_addr, hv + sport, ipf_nattable_sz);
2059         nat = nat_table[1][hv];
2060         for (; nat; nat = nat->nat_hnext[1]) {
2061                 nflags = nat->nat_flags;
2062                 if ((!ifp || ifp == nat->nat_ifp) &&
2063                     nat->nat_oip.s_addr == src.s_addr &&
2064                     nat->nat_outip.s_addr == dst &&
2065                     ((p == 0) || (p == nat->nat_p))) {
2066                         switch (p)
2067                         {
2068                         case IPPROTO_TCP :
2069                         case IPPROTO_UDP :
2070                                 if (nat->nat_oport != sport)
2071                                         continue;
2072                                 if (nat->nat_outport != dport)
2073                                         continue;
2074                                 break;
2075                         default :
2076                                 break;
2077                         }
2078
2079                         ipn = nat->nat_ptr;
2080                         if ((ipn != NULL) && (nat->nat_aps != NULL))
2081                                 if (appr_match(fin, nat) != 0)
2082                                         continue;
2083                         return nat;
2084                 }
2085         }
2086         if (!nat_stats.ns_wilds || !(flags & FI_WILDP))
2087                 return NULL;
2088         if (!rw) {
2089                 RWLOCK_EXIT(&ipf_nat);
2090         }
2091         hv = NAT_HASH_FN(dst, 0, 0xffffffff);
2092         hv = NAT_HASH_FN(src.s_addr, dst, ipf_nattable_sz);
2093         if (!rw) {
2094                 WRITE_ENTER(&ipf_nat);
2095         }
2096         nat = nat_table[1][hv];
2097         for (; nat; nat = nat->nat_hnext[1]) {
2098                 nflags = nat->nat_flags;
2099                 if (ifp && ifp != nat->nat_ifp)
2100                         continue;
2101                 if (!(nflags & FI_WILDP))
2102                         continue;
2103                 if (nat->nat_oip.s_addr != src.s_addr ||
2104                     nat->nat_outip.s_addr != dst)
2105                         continue;
2106                 if (((nat->nat_oport == sport) || (nflags & FI_W_DPORT)) &&
2107                     ((nat->nat_outport == dport) || (nflags & FI_W_SPORT))) {
2108                         nat_tabmove(fin, nat);
2109                         break;
2110                 }
2111         }
2112         if (!rw) {
2113                 MUTEX_DOWNGRADE(&ipf_nat);
2114         }
2115         return nat;
2116 }
2117
2118
2119 /*
2120  * This function is only called for TCP/UDP NAT table entries where the
2121  * original was placed in the table without hashing on the ports and we now
2122  * want to include hashing on port numbers.
2123  */
2124 static void nat_tabmove(fin, nat)
2125 fr_info_t *fin;
2126 nat_t *nat;
2127 {
2128         u_short sport, dport;
2129         u_int hv, nflags;
2130         nat_t **natp;
2131
2132         nflags = nat->nat_flags;
2133
2134         sport = ntohs(fin->fin_data[0]);
2135         dport = ntohs(fin->fin_data[1]);
2136
2137         /*
2138          * Remove the NAT entry from the old location
2139          */
2140         if (nat->nat_hnext[0])
2141                 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
2142         *nat->nat_phnext[0] = nat->nat_hnext[0];
2143
2144         if (nat->nat_hnext[1])
2145                 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
2146         *nat->nat_phnext[1] = nat->nat_hnext[1];
2147
2148         /*
2149          * Add into the NAT table in the new position
2150          */
2151         hv = NAT_HASH_FN(nat->nat_inip.s_addr, sport, 0xffffffff);
2152         hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + dport, ipf_nattable_sz);
2153         natp = &nat_table[0][hv];
2154         if (*natp)
2155                 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2156         nat->nat_phnext[0] = natp;
2157         nat->nat_hnext[0] = *natp;
2158         *natp = nat;
2159
2160         hv = NAT_HASH_FN(nat->nat_outip.s_addr, sport, 0xffffffff);
2161         hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + dport, ipf_nattable_sz);
2162         natp = &nat_table[1][hv];
2163         if (*natp)
2164                 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2165         nat->nat_phnext[1] = natp;
2166         nat->nat_hnext[1] = *natp;
2167         *natp = nat;
2168 }
2169
2170
2171 /*
2172  * Lookup a nat entry based on the source 'real' ip address/port and
2173  * destination address/port.  We use this lookup when sending a packet out,
2174  * we're looking for a table entry, based on the source address.
2175  * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.
2176  */
2177 nat_t *nat_outlookup(fin, flags, p, src, dst, rw)
2178 fr_info_t *fin;
2179 u_int flags, p;
2180 struct in_addr src , dst;
2181 int rw;
2182 {
2183         u_short sport, dport;
2184         nat_t *nat;
2185         int nflags;
2186         ipnat_t *ipn;
2187         u_32_t srcip;
2188         void *ifp;
2189         u_int hv;
2190
2191         ifp = fin->fin_ifp;
2192         srcip = src.s_addr;
2193         if (flags & IPN_TCPUDP) {
2194                 sport = ntohs(fin->fin_data[0]);
2195                 dport = ntohs(fin->fin_data[1]);
2196         } else {
2197                 sport = 0;
2198                 dport = 0;
2199         }
2200
2201         hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
2202         hv = NAT_HASH_FN(dst.s_addr, hv + dport, ipf_nattable_sz);
2203         nat = nat_table[0][hv];
2204         for (; nat; nat = nat->nat_hnext[0]) {
2205                 nflags = nat->nat_flags;
2206
2207                 if ((!ifp || ifp == nat->nat_ifp) &&
2208                     nat->nat_inip.s_addr == srcip &&
2209                     nat->nat_oip.s_addr == dst.s_addr &&
2210                     ((p == 0) || (p == nat->nat_p))) {
2211                         switch (p)
2212                         {
2213                         case IPPROTO_TCP :
2214                         case IPPROTO_UDP :
2215                                 if (nat->nat_oport != dport)
2216                                         continue;
2217                                 if (nat->nat_inport != sport)
2218                                         continue;
2219                                 break;
2220                         default :
2221                                 break;
2222                         }
2223
2224                         ipn = nat->nat_ptr;
2225                         if ((ipn != NULL) && (nat->nat_aps != NULL))
2226                                 if (appr_match(fin, nat) != 0)
2227                                         continue;
2228                         return nat;
2229                 }
2230         }
2231         if (!nat_stats.ns_wilds || !(flags & FI_WILDP))
2232                 return NULL;
2233         if (!rw) {
2234                 RWLOCK_EXIT(&ipf_nat);
2235         }
2236
2237         hv = NAT_HASH_FN(dst.s_addr, srcip, ipf_nattable_sz);
2238         if (!rw) {
2239                 WRITE_ENTER(&ipf_nat);
2240         }
2241         nat = nat_table[0][hv];
2242         for (; nat; nat = nat->nat_hnext[0]) {
2243                 nflags = nat->nat_flags;
2244                 if (ifp && ifp != nat->nat_ifp)
2245                         continue;
2246                 if (!(nflags & FI_WILDP))
2247                         continue;
2248                 if ((nat->nat_inip.s_addr != srcip) ||
2249                     (nat->nat_oip.s_addr != dst.s_addr))
2250                         continue;
2251                 if (((nat->nat_inport == sport) || (nflags & FI_W_SPORT)) &&
2252                     ((nat->nat_oport == dport) || (nflags & FI_W_DPORT))) {
2253                         nat_tabmove(fin, nat);
2254                         break;
2255                 }
2256         }
2257         if (!rw) {
2258                 MUTEX_DOWNGRADE(&ipf_nat);
2259         }
2260         return nat;
2261 }
2262
2263
2264 /*
2265  * Lookup the NAT tables to search for a matching redirect
2266  */
2267 nat_t *nat_lookupredir(np)
2268 natlookup_t *np;
2269 {
2270         nat_t *nat;
2271         fr_info_t fi;
2272
2273         bzero((char *)&fi, sizeof(fi));
2274         fi.fin_data[0] = ntohs(np->nl_inport);
2275         fi.fin_data[1] = ntohs(np->nl_outport);
2276
2277         /*
2278          * If nl_inip is non null, this is a lookup based on the real
2279          * ip address. Else, we use the fake.
2280          */
2281         if ((nat = nat_outlookup(&fi, np->nl_flags, 0, np->nl_inip,
2282                                  np->nl_outip, 0))) {
2283                 np->nl_realip = nat->nat_outip;
2284                 np->nl_realport = nat->nat_outport;
2285         }
2286         return nat;
2287 }
2288
2289
2290 static int nat_match(fin, np, ip)
2291 fr_info_t *fin;
2292 ipnat_t *np;
2293 ip_t *ip;
2294 {
2295         frtuc_t *ft;
2296
2297         if (ip->ip_v != 4)
2298                 return 0;
2299
2300         if (np->in_p && fin->fin_p != np->in_p)
2301                 return 0;
2302         if (fin->fin_out) {
2303                 if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
2304                         return 0;
2305                 if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
2306                     ^ ((np->in_flags & IPN_NOTSRC) != 0))
2307                         return 0;
2308                 if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
2309                     ^ ((np->in_flags & IPN_NOTDST) != 0))
2310                         return 0;
2311         } else {
2312                 if (!(np->in_redir & NAT_REDIRECT))
2313                         return 0;
2314                 if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
2315                     ^ ((np->in_flags & IPN_NOTSRC) != 0))
2316                         return 0;
2317                 if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
2318                     ^ ((np->in_flags & IPN_NOTDST) != 0))
2319                         return 0;
2320         }
2321
2322         ft = &np->in_tuc;
2323         if (!(fin->fin_fl & FI_TCPUDP) ||
2324             (fin->fin_fl & FI_SHORT) || (fin->fin_off != 0)) {
2325                 if (ft->ftu_scmp || ft->ftu_dcmp)
2326                         return 0;
2327                 return 1;
2328         }
2329
2330         return fr_tcpudpchk(ft, fin);
2331 }
2332
2333
2334 /*
2335  * Packets going out on the external interface go through this.
2336  * Here, the source address requires alteration, if anything.
2337  */
2338 int ip_natout(ip, fin)
2339 ip_t *ip;
2340 fr_info_t *fin;
2341 {
2342         ipnat_t *np = NULL;
2343         u_32_t ipa;
2344         tcphdr_t *tcp = NULL;
2345         u_short sport = 0, dport = 0, *csump = NULL;
2346         int natadd = 1, i, icmpset = 1;
2347         u_int nflags = 0, hv, msk;
2348         struct ifnet *ifp;
2349         frentry_t *fr;
2350         void *sifp;
2351         u_32_t iph;
2352         nat_t *nat;
2353
2354         if (nat_list == NULL || (fr_nat_lock))
2355                 return 0;
2356
2357         if ((fr = fin->fin_fr) && !(fr->fr_flags & FR_DUP) &&
2358             fr->fr_tif.fd_ifp && fr->fr_tif.fd_ifp != (void *)-1) {
2359                 sifp = fin->fin_ifp;
2360                 fin->fin_ifp = fr->fr_tif.fd_ifp;
2361         } else
2362                 sifp = fin->fin_ifp;
2363         ifp = fin->fin_ifp;
2364
2365         if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2366                 if (fin->fin_p == IPPROTO_TCP)
2367                         nflags = IPN_TCP;
2368                 else if (fin->fin_p == IPPROTO_UDP)
2369                         nflags = IPN_UDP;
2370                 if ((nflags & IPN_TCPUDP)) {
2371                         tcp = (tcphdr_t *)fin->fin_dp;
2372                         sport = tcp->th_sport;
2373                         dport = tcp->th_dport;
2374                 }
2375         }
2376
2377         ipa = fin->fin_saddr;
2378
2379         READ_ENTER(&ipf_nat);
2380
2381         if ((fin->fin_p == IPPROTO_ICMP) &&
2382             (nat = nat_icmp(ip, fin, &nflags, NAT_OUTBOUND)))
2383                 icmpset = 1;
2384         else if ((fin->fin_fl & FI_FRAG) &&
2385             (nat = ipfr_nat_knownfrag(ip, fin)))
2386                 natadd = 0;
2387         else if ((nat = nat_outlookup(fin, nflags|FI_WILDP|FI_WILDA,
2388                                       (u_int)fin->fin_p, fin->fin_src,
2389                                       fin->fin_dst, 0))) {
2390                 nflags = nat->nat_flags;
2391                 if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) {
2392                         if ((nflags & FI_W_SPORT) &&
2393                             (nat->nat_inport != sport))
2394                                 nat->nat_inport = sport;
2395                         if ((nflags & FI_W_DPORT) &&
2396                             (nat->nat_oport != dport))
2397                                 nat->nat_oport = dport;
2398
2399                         if (nat->nat_outport == 0)
2400                                 nat->nat_outport = sport;
2401                         nat->nat_flags &= ~(FI_W_DPORT|FI_W_SPORT);
2402                         nflags = nat->nat_flags;
2403                         nat_stats.ns_wilds--;
2404                 }
2405         } else {
2406                 RWLOCK_EXIT(&ipf_nat);
2407
2408                 msk = 0xffffffff;
2409                 i = 32;
2410
2411                 WRITE_ENTER(&ipf_nat);
2412                 /*
2413                  * If there is no current entry in the nat table for this IP#,
2414                  * create one for it (if there is a matching rule).
2415                  */
2416 maskloop:
2417                 iph = ipa & htonl(msk);
2418                 hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz);
2419                 for (np = nat_rules[hv]; np; np = np->in_mnext)
2420                 {
2421                         if (np->in_ifp && (np->in_ifp != ifp))
2422                                 continue;
2423                         if ((np->in_flags & IPN_RF) &&
2424                             !(np->in_flags & nflags))
2425                                 continue;
2426                         if (np->in_flags & IPN_FILTER) {
2427                                 if (!nat_match(fin, np, ip))
2428                                         continue;
2429                         } else if ((ipa & np->in_inmsk) != np->in_inip)
2430                                 continue;
2431                         if (*np->in_plabel && !appr_ok(ip, tcp, np))
2432                                 continue;
2433                         nat = nat_new(fin, ip, np, NULL,
2434                                       (u_int)nflags, NAT_OUTBOUND);
2435                         if (nat != NULL) {
2436                                 np->in_hits++;
2437                                 break;
2438                         }
2439                 }
2440                 if ((np == NULL) && (i > 0)) {
2441                         do {
2442                                 i--;
2443                                 msk <<= 1;
2444                         } while ((i >= 0) && ((nat_masks & (1 << i)) == 0));
2445                         if (i >= 0)
2446                                 goto maskloop;
2447                 }
2448                 MUTEX_DOWNGRADE(&ipf_nat);
2449         }
2450
2451         /*
2452          * NOTE: ipf_nat must now only be held as a read lock
2453          */
2454         if (nat) {
2455                 np = nat->nat_ptr;
2456                 if (natadd && (fin->fin_fl & FI_FRAG) && np)
2457                         ipfr_nat_newfrag(ip, fin, nat);
2458                 MUTEX_ENTER(&nat->nat_lock);
2459                 if (fin->fin_p != IPPROTO_TCP) {
2460                         if (np && np->in_age[1])
2461                                 nat->nat_age = np->in_age[1];
2462                         else if (!icmpset && (fin->fin_p == IPPROTO_ICMP))
2463                                 nat->nat_age = fr_defnaticmpage;
2464                         else
2465                                 nat->nat_age = fr_defnatage;
2466                 }
2467                 nat->nat_bytes += ip->ip_len;
2468                 nat->nat_pkts++;
2469                 MUTEX_EXIT(&nat->nat_lock);
2470
2471                 /*
2472                  * Fix up checksums, not by recalculating them, but
2473                  * simply computing adjustments.
2474                  */
2475                 if (nflags == IPN_ICMPERR) {
2476                         u_32_t s1, s2, sumd;
2477
2478                         s1 = LONG_SUM(ntohl(fin->fin_saddr));
2479                         s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
2480                         CALC_SUMD(s1, s2, sumd);
2481
2482                         if (nat->nat_dir == NAT_OUTBOUND)
2483                                 fix_outcksum(fin, &ip->ip_sum, sumd);
2484                         else
2485                                 fix_incksum(fin, &ip->ip_sum, sumd);
2486                 }
2487 #if (SOLARIS || defined(__sgi)) && defined(_KERNEL)
2488                 else {
2489                         if (nat->nat_dir == NAT_OUTBOUND)
2490                                 fix_outcksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2491                         else
2492                                 fix_incksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2493                 }
2494 #endif
2495                 /*
2496                  * Only change the packet contents, not what is filtered upon.
2497                  */
2498                 ip->ip_src = nat->nat_outip;
2499
2500                 if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2501
2502                         if ((nat->nat_outport != 0) && (tcp != NULL)) {
2503                                 tcp->th_sport = nat->nat_outport;
2504                                 fin->fin_data[0] = ntohs(tcp->th_sport);
2505                         }
2506
2507                         if (fin->fin_p == IPPROTO_TCP) {
2508                                 csump = &tcp->th_sum;
2509                                 MUTEX_ENTER(&nat->nat_lock);
2510                                 fr_tcp_age(&nat->nat_age,
2511                                            nat->nat_tcpstate, fin, 1, 0);
2512                                 if (nat->nat_age < fr_defnaticmpage)
2513                                         nat->nat_age = fr_defnaticmpage;
2514 #ifdef LARGE_NAT
2515                                 else if (nat->nat_age > fr_defnatage)
2516                                         nat->nat_age = fr_defnatage;
2517 #endif
2518                                 /*
2519                                  * Increase this because we may have
2520                                  * "keep state" following this too and
2521                                  * packet storms can occur if this is
2522                                  * removed too quickly.
2523                                  */
2524                                 if (nat->nat_age == fr_tcpclosed)
2525                                         nat->nat_age = fr_tcplastack;
2526
2527                                 /*
2528                                  * Do a MSS CLAMPING on a SYN packet,
2529                                  * only deal IPv4 for now.
2530                                  */
2531                                 if (nat->nat_mssclamp &&
2532                                     (tcp->th_flags & TH_SYN) != 0)
2533                                         nat_mssclamp(tcp, nat->nat_mssclamp,
2534                                                      fin, csump);
2535
2536                                 MUTEX_EXIT(&nat->nat_lock);
2537                         } else if (fin->fin_p == IPPROTO_UDP) {
2538                                 udphdr_t *udp = (udphdr_t *)tcp;
2539
2540                                 if (udp->uh_sum)
2541                                         csump = &udp->uh_sum;
2542                         }
2543
2544                         if (csump) {
2545                                 if (nat->nat_dir == NAT_OUTBOUND)
2546                                         fix_outcksum(fin, csump,
2547                                                      nat->nat_sumd[1]);
2548                                 else
2549                                         fix_incksum(fin, csump,
2550                                                     nat->nat_sumd[1]);
2551                         }
2552                 }
2553
2554                 if (np && (np->in_apr != NULL) && (np->in_dport == 0 ||
2555                      (tcp != NULL && dport == np->in_dport))) {
2556                         i = appr_check(ip, fin, nat);
2557                         if (i == 0)
2558                                 i = 1;
2559                         else if (i == -1)
2560                                 nat->nat_drop[1]++;
2561                 } else
2562                         i = 1;
2563                 ATOMIC_INCL(nat_stats.ns_mapped[1]);
2564                 RWLOCK_EXIT(&ipf_nat);  /* READ */
2565                 fin->fin_ifp = sifp;
2566                 return i;
2567         }
2568         RWLOCK_EXIT(&ipf_nat);                  /* READ/WRITE */
2569         fin->fin_ifp = sifp;
2570         return 0;
2571 }
2572
2573
2574 /*
2575  * Packets coming in from the external interface go through this.
2576  * Here, the destination address requires alteration, if anything.
2577  */
2578 int ip_natin(ip, fin)
2579 ip_t *ip;
2580 fr_info_t *fin;
2581 {
2582         struct in_addr src;
2583         struct in_addr in;
2584         ipnat_t *np;
2585         u_short sport = 0, dport = 0, *csump = NULL;
2586         u_int nflags = 0, natadd = 1, hv, msk;
2587         struct ifnet *ifp = fin->fin_ifp;
2588         tcphdr_t *tcp = NULL;
2589         int i, icmpset = 0;
2590         nat_t *nat;
2591         u_32_t iph;
2592
2593         if ((nat_list == NULL) || (ip->ip_v != 4) || (fr_nat_lock))
2594                 return 0;
2595
2596         if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2597                 if (fin->fin_p == IPPROTO_TCP)
2598                         nflags = IPN_TCP;
2599                 else if (fin->fin_p == IPPROTO_UDP)
2600                         nflags = IPN_UDP;
2601                 if ((nflags & IPN_TCPUDP)) {
2602                         tcp = (tcphdr_t *)fin->fin_dp;
2603                         sport = tcp->th_sport;
2604                         dport = tcp->th_dport;
2605                 }
2606         }
2607
2608         in = fin->fin_dst;
2609         /* make sure the source address is to be redirected */
2610         src = fin->fin_src;
2611
2612         READ_ENTER(&ipf_nat);
2613
2614         if ((fin->fin_p == IPPROTO_ICMP) &&
2615             (nat = nat_icmp(ip, fin, &nflags, NAT_INBOUND)))
2616                 icmpset = 1;
2617         else if ((fin->fin_fl & FI_FRAG) &&
2618                  (nat = ipfr_nat_knownfrag(ip, fin)))
2619                 natadd = 0;
2620         else if ((nat = nat_inlookup(fin, nflags|FI_WILDP|FI_WILDA,
2621                                      (u_int)fin->fin_p, fin->fin_src, in, 0))) {
2622                 nflags = nat->nat_flags;
2623                 if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) {
2624                         if ((nat->nat_oport != sport) && (nflags & FI_W_DPORT))
2625                                 nat->nat_oport = sport;
2626                         if ((nat->nat_outport != dport) &&
2627                                  (nflags & FI_W_SPORT))
2628                                 nat->nat_outport = dport;
2629                         nat->nat_flags &= ~(FI_W_SPORT|FI_W_DPORT);
2630                         nflags = nat->nat_flags;
2631                         nat_stats.ns_wilds--;
2632                 }
2633         } else {
2634                 RWLOCK_EXIT(&ipf_nat);
2635
2636                 msk = 0xffffffff;
2637                 i = 32;
2638
2639                 WRITE_ENTER(&ipf_nat);
2640                 /*
2641                  * If there is no current entry in the nat table for this IP#,
2642                  * create one for it (if there is a matching rule).
2643                  */
2644 maskloop:
2645                 iph = in.s_addr & htonl(msk);
2646                 hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz);
2647                 for (np = rdr_rules[hv]; np; np = np->in_rnext) {
2648                         if ((np->in_ifp && (np->in_ifp != ifp)) ||
2649                             (np->in_p && (np->in_p != fin->fin_p)) ||
2650                             (np->in_flags && !(nflags & np->in_flags)))
2651                                 continue;
2652                         if (np->in_flags & IPN_FILTER) {
2653                                 if (!nat_match(fin, np, ip))
2654                                         continue;
2655                         } else if ((in.s_addr & np->in_outmsk) != np->in_outip)
2656                                 continue;
2657                         if ((!np->in_pmin || (np->in_flags & IPN_FILTER) ||
2658                              ((ntohs(np->in_pmax) >= ntohs(dport)) &&
2659                               (ntohs(dport) >= ntohs(np->in_pmin)))))
2660                                 if ((nat = nat_new(fin, ip, np, NULL, nflags,
2661                                                     NAT_INBOUND))) {
2662                                         np->in_hits++;
2663                                         break;
2664                                 }
2665                 }
2666
2667                 if ((np == NULL) && (i > 0)) {
2668                         do {
2669                                 i--;
2670                                 msk <<= 1;
2671                         } while ((i >= 0) && ((rdr_masks & (1 << i)) == 0));
2672                         if (i >= 0)
2673                                 goto maskloop;
2674                 }
2675                 MUTEX_DOWNGRADE(&ipf_nat);
2676         }
2677
2678         /*
2679          * NOTE: ipf_nat must now only be held as a read lock
2680          */
2681         if (nat) {
2682                 np = nat->nat_ptr;
2683                 fin->fin_fr = nat->nat_fr;
2684                 if (natadd && (fin->fin_fl & FI_FRAG) && np)
2685                         ipfr_nat_newfrag(ip, fin, nat);
2686                 if (np && (np->in_apr != NULL) && (np->in_dport == 0 ||
2687                      (tcp != NULL && sport == np->in_dport))) {
2688                         i = appr_check(ip, fin, nat);
2689                         if (i == -1) {
2690                                 nat->nat_drop[0]++;
2691                                 RWLOCK_EXIT(&ipf_nat);
2692                                 return i;
2693                         }
2694                 }
2695
2696                 MUTEX_ENTER(&nat->nat_lock);
2697                 if (fin->fin_p != IPPROTO_TCP) {
2698                         if (np && np->in_age[0])
2699                                 nat->nat_age = np->in_age[0];
2700                         else if (!icmpset && (fin->fin_p == IPPROTO_ICMP))
2701                                 nat->nat_age = fr_defnaticmpage;
2702                         else
2703                                 nat->nat_age = fr_defnatage;
2704                 }
2705                 nat->nat_bytes += ip->ip_len;
2706                 nat->nat_pkts++;
2707                 MUTEX_EXIT(&nat->nat_lock);
2708                 ip->ip_dst = nat->nat_inip;
2709                 fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
2710
2711                 /*
2712                  * Fix up checksums, not by recalculating them, but
2713                  * simply computing adjustments.
2714                  */
2715 #if (SOLARIS || defined(__sgi)) && defined(_KERNEL)
2716                 if (nat->nat_dir == NAT_OUTBOUND)
2717                         fix_incksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2718                 else
2719                         fix_outcksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2720 #endif
2721                 if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2722
2723                         if ((nat->nat_inport != 0) && (tcp != NULL)) {
2724                                 tcp->th_dport = nat->nat_inport;
2725                                 fin->fin_data[1] = ntohs(tcp->th_dport);
2726                         }
2727
2728                         if (fin->fin_p == IPPROTO_TCP) {
2729                                 csump = &tcp->th_sum;
2730                                 MUTEX_ENTER(&nat->nat_lock);
2731                                 fr_tcp_age(&nat->nat_age,
2732                                            nat->nat_tcpstate, fin, 0, 0);
2733                                 if (nat->nat_age < fr_defnaticmpage)
2734                                         nat->nat_age = fr_defnaticmpage;
2735 #ifdef LARGE_NAT
2736                                 else if (nat->nat_age > fr_defnatage)
2737                                         nat->nat_age = fr_defnatage;
2738 #endif
2739                                 /*
2740                                  * Increase this because we may have
2741                                  * "keep state" following this too and
2742                                  * packet storms can occur if this is
2743                                  * removed too quickly.
2744                                  */
2745                                 if (nat->nat_age == fr_tcpclosed)
2746                                         nat->nat_age = fr_tcplastack;
2747                                 /*
2748                                  * Do a MSS CLAMPING on a SYN packet,
2749                                  * only deal IPv4 for now.
2750                                  */
2751                                 if (nat->nat_mssclamp &&
2752                                     (tcp->th_flags & TH_SYN) != 0)
2753                                         nat_mssclamp(tcp, nat->nat_mssclamp,
2754                                                      fin, csump);
2755
2756                                 MUTEX_EXIT(&nat->nat_lock);
2757                         } else if (fin->fin_p == IPPROTO_UDP) {
2758                                 udphdr_t *udp = (udphdr_t *)tcp;
2759
2760                                 if (udp->uh_sum)
2761                                         csump = &udp->uh_sum;
2762                         }
2763
2764                         if (csump) {
2765                                 if (nat->nat_dir == NAT_OUTBOUND)
2766                                         fix_incksum(fin, csump,
2767                                                     nat->nat_sumd[0]);
2768                                 else
2769                                         fix_outcksum(fin, csump,
2770                                                     nat->nat_sumd[0]);
2771                         }
2772                 }
2773                 ATOMIC_INCL(nat_stats.ns_mapped[0]);
2774                 RWLOCK_EXIT(&ipf_nat);                  /* READ */
2775                 return 1;
2776         }
2777         RWLOCK_EXIT(&ipf_nat);                  /* READ/WRITE */
2778         return 0;
2779 }
2780
2781
2782 /*
2783  * Free all memory used by NAT structures allocated at runtime.
2784  */
2785 void ip_natunload()
2786 {
2787         WRITE_ENTER(&ipf_nat);
2788         (void) nat_clearlist();
2789         (void) nat_flushtable();
2790         RWLOCK_EXIT(&ipf_nat);
2791
2792         if (nat_table[0] != NULL) {
2793                 KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz);
2794                 nat_table[0] = NULL;
2795         }
2796         if (nat_table[1] != NULL) {
2797                 KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz);
2798                 nat_table[1] = NULL;
2799         }
2800         if (nat_rules != NULL) {
2801                 KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz);
2802                 nat_rules = NULL;
2803         }
2804         if (rdr_rules != NULL) {
2805                 KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz);
2806                 rdr_rules = NULL;
2807         }
2808         if (maptable != NULL) {
2809                 KFREES(maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
2810                 maptable = NULL;
2811         }
2812 }
2813
2814
2815 /*
2816  * Slowly expire held state for NAT entries.  Timeouts are set in
2817  * expectation of this being called twice per second.
2818  */
2819 void ip_natexpire()
2820 {
2821         struct nat *nat, **natp;
2822 #if defined(_KERNEL) && !SOLARIS
2823         int s;
2824 #endif
2825
2826         SPL_NET(s);
2827         WRITE_ENTER(&ipf_nat);
2828         for (natp = &nat_instances; (nat = *natp); ) {
2829                 nat->nat_age--;
2830                 if (nat->nat_age) {
2831                         natp = &nat->nat_next;
2832                         continue;
2833                 }
2834                 *natp = nat->nat_next;
2835 #ifdef  IPFILTER_LOG
2836                 nat_log(nat, NL_EXPIRE);
2837 #endif
2838                 nat_delete(nat);
2839                 nat_stats.ns_expire++;
2840         }
2841         RWLOCK_EXIT(&ipf_nat);
2842         SPL_X(s);
2843 }
2844
2845
2846 /*
2847  */
2848 void ip_natsync(ifp)
2849 void *ifp;
2850 {
2851         ipnat_t *n;
2852         nat_t *nat;
2853         u_32_t sum1, sum2, sumd;
2854         struct in_addr in;
2855         ipnat_t *np;
2856         void *ifp2;
2857 #if defined(_KERNEL) && !SOLARIS
2858         int s;
2859 #endif
2860
2861         /*
2862          * Change IP addresses for NAT sessions for any protocol except TCP
2863          * since it will break the TCP connection anyway.
2864          */
2865         SPL_NET(s);
2866         WRITE_ENTER(&ipf_nat);
2867         for (nat = nat_instances; nat; nat = nat->nat_next)
2868                 if (((ifp == NULL) || (ifp == nat->nat_ifp)) &&
2869                     !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr) &&
2870                     (np->in_outmsk == 0xffffffff) && !np->in_nip) {
2871                         ifp2 = nat->nat_ifp;
2872                         /*
2873                          * Change the map-to address to be the same as the
2874                          * new one.
2875                          */
2876                         sum1 = nat->nat_outip.s_addr;
2877                         if (fr_ifpaddr(4, ifp2, &in) != -1)
2878                                 nat->nat_outip = in;
2879                         sum2 = nat->nat_outip.s_addr;
2880
2881                         if (sum1 == sum2)
2882                                 continue;
2883                         /*
2884                          * Readjust the checksum adjustment to take into
2885                          * account the new IP#.
2886                          */
2887                         CALC_SUMD(sum1, sum2, sumd);
2888                         /* XXX - dont change for TCP when solaris does
2889                          * hardware checksumming.
2890                          */
2891                         sumd += nat->nat_sumd[0];
2892                         nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2893                         nat->nat_sumd[1] = nat->nat_sumd[0];
2894                 }
2895
2896         for (n = nat_list; (n != NULL); n = n->in_next)
2897                 if (n->in_ifp == ifp) {
2898                         n->in_ifp = (void *)GETUNIT(n->in_ifname, 4);
2899                         if (!n->in_ifp)
2900                                 n->in_ifp = (void *)-1;
2901                 }
2902         RWLOCK_EXIT(&ipf_nat);
2903         SPL_X(s);
2904 }
2905
2906
2907 #ifdef  IPFILTER_LOG
2908 void nat_log(nat, type)
2909 struct nat *nat;
2910 u_int type;
2911 {
2912         struct ipnat *np;
2913         struct natlog natl;
2914         void *items[1];
2915         size_t sizes[1];
2916         int rulen, types[1];
2917
2918         natl.nl_inip = nat->nat_inip;
2919         natl.nl_outip = nat->nat_outip;
2920         natl.nl_origip = nat->nat_oip;
2921         natl.nl_bytes = nat->nat_bytes;
2922         natl.nl_pkts = nat->nat_pkts;
2923         natl.nl_origport = nat->nat_oport;
2924         natl.nl_inport = nat->nat_inport;
2925         natl.nl_outport = nat->nat_outport;
2926         natl.nl_p = nat->nat_p;
2927         natl.nl_type = type;
2928         natl.nl_rule = -1;
2929 #ifndef LARGE_NAT
2930         if (nat->nat_ptr != NULL) {
2931                 for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++)
2932                         if (np == nat->nat_ptr) {
2933                                 natl.nl_rule = rulen;
2934                                 break;
2935                         }
2936         }
2937 #endif
2938         items[0] = &natl;
2939         sizes[0] = sizeof(natl);
2940         types[0] = 0;
2941
2942         (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1);
2943 }
2944 #endif
2945
2946
2947 #if defined(__OpenBSD__)
2948 void nat_ifdetach(ifp)
2949 void *ifp;
2950 {
2951         frsync();
2952         return;
2953 }
2954 #endif
2955
2956
2957 /*
2958  * Check for MSS option and clamp it if necessary.
2959  */
2960 static void nat_mssclamp(tcp, maxmss, fin, csump)
2961 tcphdr_t *tcp;
2962 u_32_t maxmss;
2963 fr_info_t *fin;
2964 u_short *csump;
2965 {
2966         u_char *cp, *ep, opt;
2967         int hlen, advance;
2968         u_32_t mss, sumd;
2969         u_short v;
2970
2971         hlen = tcp->th_off << 2;
2972         if (hlen > sizeof(*tcp)) {
2973                 cp = (u_char *)tcp + sizeof(*tcp);
2974                 ep = (u_char *)tcp + hlen;
2975
2976                 while (cp < ep) {
2977                         opt = cp[0];
2978                         if (opt == TCPOPT_EOL)
2979                                 break;
2980                         else if (opt == TCPOPT_NOP) {
2981                                 cp++;
2982                                 continue;
2983                         }
2984  
2985                         if (&cp[1] >= ep)
2986                                 break;
2987                         advance = cp[1];
2988                         if (&cp[advance] >= ep)
2989                                 break;
2990                         switch (opt) {
2991                         case TCPOPT_MAXSEG:
2992                                 if (advance != 4)
2993                                         break;
2994                                 bcopy(&cp[2], &v, sizeof(v));
2995                                 mss = ntohs(v);
2996                                 if (mss > maxmss) {
2997                                         v = htons(maxmss);
2998                                         bcopy(&v, &cp[2], sizeof(v));
2999                                         CALC_SUMD(mss, maxmss, sumd);
3000                                         fix_outcksum(fin, csump, sumd);
3001                                 }
3002                                 break;
3003                         default:
3004                                 /* ignore unknown options */
3005                                 break;
3006                         }
3007                     
3008                         cp += advance;  
3009                 }       
3010         }       
3011 }