Initial import from FreeBSD RELENG_4:
[dragonfly.git] / sys / contrib / ipfilter / netinet / ip_nat.c
1 /*
2  * Copyright (C) 1995-2001 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Added redirect stuff and a LOT of bug fixes. (mcn@EnGarde.com)
7  */
8 #if defined(__FreeBSD__) && defined(KERNEL) && !defined(_KERNEL)
9 #define _KERNEL
10 #endif
11
12 #if defined(__sgi) && (IRIX > 602)
13 # include <sys/ptimers.h>
14 #endif
15 #include <sys/errno.h>
16 #include <sys/types.h>
17 #include <sys/param.h>
18 #include <sys/time.h>
19 #include <sys/file.h>
20 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
21     defined(_KERNEL)
22 # include "opt_ipfilter_log.h"
23 #endif
24 #if !defined(_KERNEL) && !defined(KERNEL)
25 # include <stdio.h>
26 # include <string.h>
27 # include <stdlib.h>
28 #endif
29 #if (defined(KERNEL) || defined(_KERNEL)) && (__FreeBSD_version >= 220000)
30 # include <sys/filio.h>
31 # include <sys/fcntl.h>
32 #else
33 # include <sys/ioctl.h>
34 #endif
35 #include <sys/fcntl.h>
36 #ifndef linux
37 # include <sys/protosw.h>
38 #endif
39 #include <sys/socket.h>
40 #if defined(_KERNEL) && !defined(linux)
41 # include <sys/systm.h>
42 #endif
43 #if !defined(__SVR4) && !defined(__svr4__)
44 # ifndef linux
45 #  include <sys/mbuf.h>
46 # endif
47 #else
48 # include <sys/filio.h>
49 # include <sys/byteorder.h>
50 # ifdef _KERNEL
51 #  include <sys/dditypes.h>
52 # endif
53 # include <sys/stream.h>
54 # include <sys/kmem.h>
55 #endif
56 #if __FreeBSD_version >= 300000
57 # include <sys/queue.h>
58 #endif
59 #include <net/if.h>
60 #if __FreeBSD_version >= 300000
61 # include <net/if_var.h>
62 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
63 #  include "opt_ipfilter.h"
64 # endif
65 #endif
66 #ifdef sun
67 # include <net/af.h>
68 #endif
69 #include <net/route.h>
70 #include <netinet/in.h>
71 #include <netinet/in_systm.h>
72 #include <netinet/ip.h>
73
74 #ifdef __sgi
75 # ifdef IFF_DRVRLOCK /* IRIX6 */
76 #include <sys/hashing.h>
77 #include <netinet/in_var.h>
78 # endif
79 #endif
80
81 #ifdef RFC1825
82 # include <vpn/md5.h>
83 # include <vpn/ipsec.h>
84 extern struct ifnet vpnif;
85 #endif
86
87 #ifndef linux
88 # include <netinet/ip_var.h>
89 # include <netinet/tcp_fsm.h>
90 #endif
91 #include <netinet/tcp.h>
92 #include <netinet/udp.h>
93 #include <netinet/ip_icmp.h>
94 #include "netinet/ip_compat.h"
95 #include <netinet/tcpip.h>
96 #include "netinet/ip_fil.h"
97 #include "netinet/ip_nat.h"
98 #include "netinet/ip_frag.h"
99 #include "netinet/ip_state.h"
100 #include "netinet/ip_proxy.h"
101 #if (__FreeBSD_version >= 300000)
102 # include <sys/malloc.h>
103 #endif
104 #ifndef MIN
105 # define        MIN(a,b)        (((a)<(b))?(a):(b))
106 #endif
107 #undef  SOCKADDR_IN
108 #define SOCKADDR_IN     struct sockaddr_in
109
110 #if !defined(lint)
111 static const char sccsid[] = "@(#)ip_nat.c      1.11 6/5/96 (C) 1995 Darren Reed";
112 /*static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.37.2.70 2002/08/28 12:45:48 darrenr Exp $";*/
113 static const char rcsid[] = "@(#)$FreeBSD: src/sys/contrib/ipfilter/netinet/ip_nat.c,v 1.22.2.7 2003/03/01 03:55:54 darrenr Exp $";
114 #endif
115
116 nat_t   **nat_table[2] = { NULL, NULL },
117         *nat_instances = NULL;
118 ipnat_t *nat_list = NULL;
119 u_int   ipf_nattable_sz = NAT_TABLE_SZ;
120 u_int   ipf_natrules_sz = NAT_SIZE;
121 u_int   ipf_rdrrules_sz = RDR_SIZE;
122 u_int   ipf_hostmap_sz = HOSTMAP_SIZE;
123 u_32_t  nat_masks = 0;
124 u_32_t  rdr_masks = 0;
125 ipnat_t **nat_rules = NULL;
126 ipnat_t **rdr_rules = NULL;
127 hostmap_t       **maptable  = NULL;
128
129 u_long  fr_defnatage = DEF_NAT_AGE,
130         fr_defnaticmpage = 6;           /* 3 seconds */
131 natstat_t nat_stats;
132 int     fr_nat_lock = 0;
133 #if     (SOLARIS || defined(__sgi)) && defined(_KERNEL)
134 extern  kmutex_t        ipf_rw;
135 extern  KRWLOCK_T       ipf_nat;
136 #endif
137
138 static  int     nat_flushtable __P((void));
139 static  void    nat_addnat __P((struct ipnat *));
140 static  void    nat_addrdr __P((struct ipnat *));
141 static  void    nat_delete __P((struct nat *));
142 static  void    nat_delrdr __P((struct ipnat *));
143 static  void    nat_delnat __P((struct ipnat *));
144 static  int     fr_natgetent __P((caddr_t));
145 static  int     fr_natgetsz __P((caddr_t));
146 static  int     fr_natputent __P((caddr_t));
147 static  void    nat_tabmove __P((fr_info_t *, nat_t *));
148 static  int     nat_match __P((fr_info_t *, ipnat_t *, ip_t *));
149 static  hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
150                                     struct in_addr));
151 static  void    nat_hostmapdel __P((struct hostmap *));
152 static  void    nat_mssclamp __P((tcphdr_t *, u_32_t, fr_info_t *, u_short *));
153
154
155 int nat_init()
156 {
157         KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
158         if (nat_table[0] != NULL)
159                 bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *));
160         else
161                 return -1;
162
163         KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
164         if (nat_table[1] != NULL)
165                 bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *));
166         else
167                 return -1;
168
169         KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz);
170         if (nat_rules != NULL)
171                 bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *));
172         else
173                 return -1;
174
175         KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz);
176         if (rdr_rules != NULL)
177                 bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *));
178         else
179                 return -1;
180
181         KMALLOCS(maptable, hostmap_t **, sizeof(hostmap_t *) * ipf_hostmap_sz);
182         if (maptable != NULL)
183                 bzero((char *)maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
184         else
185                 return -1;
186         return 0;
187 }
188
189
190 static void nat_addrdr(n)
191 ipnat_t *n;
192 {
193         ipnat_t **np;
194         u_32_t j;
195         u_int hv;
196         int k;
197
198         k = countbits(n->in_outmsk);
199         if ((k >= 0) && (k != 32))
200                 rdr_masks |= 1 << k;
201         j = (n->in_outip & n->in_outmsk);
202         hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz);
203         np = rdr_rules + hv;
204         while (*np != NULL)
205                 np = &(*np)->in_rnext;
206         n->in_rnext = NULL;
207         n->in_prnext = np;
208         *np = n;
209 }
210
211
212 static void nat_addnat(n)
213 ipnat_t *n;
214 {
215         ipnat_t **np;
216         u_32_t j;
217         u_int hv;
218         int k;
219
220         k = countbits(n->in_inmsk);
221         if ((k >= 0) && (k != 32))
222                 nat_masks |= 1 << k;
223         j = (n->in_inip & n->in_inmsk);
224         hv = NAT_HASH_FN(j, 0, ipf_natrules_sz);
225         np = nat_rules + hv;
226         while (*np != NULL)
227                 np = &(*np)->in_mnext;
228         n->in_mnext = NULL;
229         n->in_pmnext = np;
230         *np = n;
231 }
232
233
234 static void nat_delrdr(n)
235 ipnat_t *n;
236 {
237         if (n->in_rnext)
238                 n->in_rnext->in_prnext = n->in_prnext;
239         *n->in_prnext = n->in_rnext;
240 }
241
242
243 static void nat_delnat(n)
244 ipnat_t *n;
245 {
246         if (n->in_mnext)
247                 n->in_mnext->in_pmnext = n->in_pmnext;
248         *n->in_pmnext = n->in_mnext;
249 }
250
251
252 /*
253  * check if an ip address has already been allocated for a given mapping that
254  * is not doing port based translation.
255  *
256  * Must be called with ipf_nat held as a write lock.
257  */
258 static struct hostmap *nat_hostmap(np, real, map)
259 ipnat_t *np;
260 struct in_addr real;
261 struct in_addr map;
262 {
263         hostmap_t *hm;
264         u_int hv;
265
266         hv = real.s_addr % HOSTMAP_SIZE;
267         for (hm = maptable[hv]; hm; hm = hm->hm_next)
268                 if ((hm->hm_realip.s_addr == real.s_addr) &&
269                     (np == hm->hm_ipnat)) {
270                         hm->hm_ref++;
271                         return hm;
272                 }
273
274         KMALLOC(hm, hostmap_t *);
275         if (hm) {
276                 hm->hm_next = maptable[hv];
277                 hm->hm_pnext = maptable + hv;
278                 if (maptable[hv])
279                         maptable[hv]->hm_pnext = &hm->hm_next;
280                 maptable[hv] = hm;
281                 hm->hm_ipnat = np;
282                 hm->hm_realip = real;
283                 hm->hm_mapip = map;
284                 hm->hm_ref = 1;
285         }
286         return hm;
287 }
288
289
290 /*
291  * Must be called with ipf_nat held as a write lock.
292  */
293 static void nat_hostmapdel(hm)
294 struct hostmap *hm;
295 {
296         ATOMIC_DEC32(hm->hm_ref);
297         if (hm->hm_ref == 0) {
298                 if (hm->hm_next)
299                         hm->hm_next->hm_pnext = hm->hm_pnext;
300                 *hm->hm_pnext = hm->hm_next;
301                 KFREE(hm);
302         }
303 }
304
305
306 void fix_outcksum(fin, sp, n)
307 fr_info_t *fin;
308 u_short *sp;
309 u_32_t n;
310 {
311         register u_short sumshort;
312         register u_32_t sum1;
313
314         if (!n)
315                 return;
316         else if (n & NAT_HW_CKSUM) {
317                 n &= 0xffff;
318                 n += fin->fin_dlen;
319                 n = (n & 0xffff) + (n >> 16);
320                 *sp = n & 0xffff;
321                 return;
322         }
323         sum1 = (~ntohs(*sp)) & 0xffff;
324         sum1 += (n);
325         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
326         /* Again */
327         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
328         sumshort = ~(u_short)sum1;
329         *(sp) = htons(sumshort);
330 }
331
332
333 void fix_incksum(fin, sp, n)
334 fr_info_t *fin;
335 u_short *sp;
336 u_32_t n;
337 {
338         register u_short sumshort;
339         register u_32_t sum1;
340
341         if (!n)
342                 return;
343         else if (n & NAT_HW_CKSUM) {
344                 n &= 0xffff;
345                 n += fin->fin_dlen;
346                 n = (n & 0xffff) + (n >> 16);
347                 *sp = n & 0xffff;
348                 return;
349         }
350 #ifdef sparc
351         sum1 = (~(*sp)) & 0xffff;
352 #else
353         sum1 = (~ntohs(*sp)) & 0xffff;
354 #endif
355         sum1 += ~(n) & 0xffff;
356         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
357         /* Again */
358         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
359         sumshort = ~(u_short)sum1;
360         *(sp) = htons(sumshort);
361 }
362
363
364 /*
365  * fix_datacksum is used *only* for the adjustments of checksums in the data
366  * section of an IP packet.
367  *
368  * The only situation in which you need to do this is when NAT'ing an 
369  * ICMP error message. Such a message, contains in its body the IP header
370  * of the original IP packet, that causes the error.
371  *
372  * You can't use fix_incksum or fix_outcksum in that case, because for the
373  * kernel the data section of the ICMP error is just data, and no special 
374  * processing like hardware cksum or ntohs processing have been done by the 
375  * kernel on the data section.
376  */
377 void fix_datacksum(sp, n)
378 u_short *sp;
379 u_32_t n;
380 {
381         register u_short sumshort;
382         register u_32_t sum1;
383
384         if (!n)
385                 return;
386
387         sum1 = (~ntohs(*sp)) & 0xffff;
388         sum1 += (n);
389         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
390         /* Again */
391         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
392         sumshort = ~(u_short)sum1;
393         *(sp) = htons(sumshort);
394 }
395
396 /*
397  * How the NAT is organised and works.
398  *
399  * Inside (interface y) NAT       Outside (interface x)
400  * -------------------- -+- -------------------------------------
401  * Packet going          |   out, processsed by ip_natout() for x
402  * ------------>         |   ------------>
403  * src=10.1.1.1          |   src=192.1.1.1
404  *                       |
405  *                       |   in, processed by ip_natin() for x
406  * <------------         |   <------------
407  * dst=10.1.1.1          |   dst=192.1.1.1
408  * -------------------- -+- -------------------------------------
409  * ip_natout() - changes ip_src and if required, sport
410  *             - creates a new mapping, if required.
411  * ip_natin()  - changes ip_dst and if required, dport
412  *
413  * In the NAT table, internal source is recorded as "in" and externally
414  * seen as "out".
415  */
416
417 /*
418  * Handle ioctls which manipulate the NAT.
419  */
420 int nat_ioctl(data, cmd, mode)
421 #if defined(__NetBSD__) || defined(__OpenBSD__) || (__FreeBSD_version >= 300003)
422 u_long cmd;
423 #else
424 int cmd;
425 #endif
426 caddr_t data;
427 int mode;
428 {
429         register ipnat_t *nat, *nt, *n = NULL, **np = NULL;
430         int error = 0, ret, arg, getlock;
431         ipnat_t natd;
432         u_32_t i, j;
433
434 #if (BSD >= 199306) && defined(_KERNEL)
435         if ((securelevel >= 3) && (mode & FWRITE))
436                 return EPERM;
437 #endif
438
439         nat = NULL;     /* XXX gcc -Wuninitialized */
440         KMALLOC(nt, ipnat_t *);
441         getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
442         if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT)) {
443                 if (mode & NAT_SYSSPACE) {
444                         bcopy(data, (char *)&natd, sizeof(natd));
445                         error = 0;
446                 } else {
447                         error = IRCOPYPTR(data, (char *)&natd, sizeof(natd));
448                 }
449         } else if (cmd == SIOCIPFFL) {  /* SIOCFLNAT & SIOCCNATL */
450                 error = IRCOPY(data, (char *)&arg, sizeof(arg));
451                 if (error)
452                         error = EFAULT;
453         }
454
455         if (error)
456                 goto done;
457
458         /*
459          * For add/delete, look to see if the NAT entry is already present
460          */
461         if (getlock == 1) {
462                 WRITE_ENTER(&ipf_nat);
463         }
464         if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT)) {
465                 nat = &natd;
466                 nat->in_flags &= IPN_USERFLAGS;
467                 if ((nat->in_redir & NAT_MAPBLK) == 0) {
468                         if ((nat->in_flags & IPN_SPLIT) == 0)
469                                 nat->in_inip &= nat->in_inmsk;
470                         if ((nat->in_flags & IPN_IPRANGE) == 0)
471                                 nat->in_outip &= nat->in_outmsk;
472                 }
473                 for (np = &nat_list; (n = *np); np = &n->in_next)
474                         if (!bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
475                                         IPN_CMPSIZ)) {
476                                 if (n->in_redir == NAT_REDIRECT &&
477                                     n->in_pnext != nat->in_pnext)
478                                         continue;
479                                 break;
480                         }
481         }
482
483         switch (cmd)
484         {
485 #ifdef  IPFILTER_LOG
486         case SIOCIPFFB :
487         {
488                 int tmp;
489
490                 if (!(mode & FWRITE))
491                         error = EPERM;
492                 else {
493                         tmp = ipflog_clear(IPL_LOGNAT);
494                         IWCOPY((char *)&tmp, (char *)data, sizeof(tmp));
495                 }
496                 break;
497         }
498 #endif
499         case SIOCADNAT :
500                 if (!(mode & FWRITE)) {
501                         error = EPERM;
502                         break;
503                 }
504                 if (n) {
505                         error = EEXIST;
506                         break;
507                 }
508                 if (nt == NULL) {
509                         error = ENOMEM;
510                         break;
511                 }
512                 n = nt;
513                 nt = NULL;
514                 bcopy((char *)nat, (char *)n, sizeof(*n));
515                 n->in_ifp = (void *)GETUNIT(n->in_ifname, 4);
516                 if (!n->in_ifp)
517                         n->in_ifp = (void *)-1;
518                 if (n->in_plabel[0] != '\0') {
519                         n->in_apr = appr_lookup(n->in_p, n->in_plabel);
520                         if (!n->in_apr) {
521                                 error = ENOENT;
522                                 break;
523                         }
524                 }
525                 n->in_next = NULL;
526                 *np = n;
527
528                 if (n->in_redir & NAT_REDIRECT) {
529                         n->in_flags &= ~IPN_NOTDST;
530                         nat_addrdr(n);
531                 }
532                 if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
533                         n->in_flags &= ~IPN_NOTSRC;
534                         nat_addnat(n);
535                 }
536
537                 n->in_use = 0;
538                 if (n->in_redir & NAT_MAPBLK)
539                         n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
540                 else if (n->in_flags & IPN_AUTOPORTMAP)
541                         n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
542                 else if (n->in_flags & IPN_IPRANGE)
543                         n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
544                 else if (n->in_flags & IPN_SPLIT)
545                         n->in_space = 2;
546                 else
547                         n->in_space = ~ntohl(n->in_outmsk);
548                 /*
549                  * Calculate the number of valid IP addresses in the output
550                  * mapping range.  In all cases, the range is inclusive of
551                  * the start and ending IP addresses.
552                  * If to a CIDR address, lose 2: broadcast + network address
553                  *                               (so subtract 1)
554                  * If to a range, add one.
555                  * If to a single IP address, set to 1.
556                  */
557                 if (n->in_space) {
558                         if ((n->in_flags & IPN_IPRANGE) != 0)
559                                 n->in_space += 1;
560                         else
561                                 n->in_space -= 1;
562                 } else
563                         n->in_space = 1;
564                 if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
565                     ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
566                         n->in_nip = ntohl(n->in_outip) + 1;
567                 else if ((n->in_flags & IPN_SPLIT) &&
568                          (n->in_redir & NAT_REDIRECT))
569                         n->in_nip = ntohl(n->in_inip);
570                 else
571                         n->in_nip = ntohl(n->in_outip);
572                 if (n->in_redir & NAT_MAP) {
573                         n->in_pnext = ntohs(n->in_pmin);
574                         /*
575                          * Multiply by the number of ports made available.
576                          */
577                         if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
578                                 n->in_space *= (ntohs(n->in_pmax) -
579                                                 ntohs(n->in_pmin) + 1);
580                                 /*
581                                  * Because two different sources can map to
582                                  * different destinations but use the same
583                                  * local IP#/port #.
584                                  * If the result is smaller than in_space, then
585                                  * we may have wrapped around 32bits.
586                                  */
587                                 i = n->in_inmsk;
588                                 if ((i != 0) && (i != 0xffffffff)) {
589                                         j = n->in_space * (~ntohl(i) + 1);
590                                         if (j >= n->in_space)
591                                                 n->in_space = j;
592                                         else
593                                                 n->in_space = 0xffffffff;
594                                 }
595                         }
596                         /*
597                          * If no protocol is specified, multiple by 256.
598                          */
599                         if ((n->in_flags & IPN_TCPUDP) == 0) {
600                                         j = n->in_space * 256;
601                                         if (j >= n->in_space)
602                                                 n->in_space = j;
603                                         else
604                                                 n->in_space = 0xffffffff;
605                         }
606                 }
607                 /* Otherwise, these fields are preset */
608                 n = NULL;
609                 nat_stats.ns_rules++;
610                 break;
611         case SIOCRMNAT :
612                 if (!(mode & FWRITE)) {
613                         error = EPERM;
614                         n = NULL;
615                         break;
616                 }
617                 if (!n) {
618                         error = ESRCH;
619                         break;
620                 }
621                 if (n->in_redir & NAT_REDIRECT)
622                         nat_delrdr(n);
623                 if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
624                         nat_delnat(n);
625                 if (nat_list == NULL) {
626                         nat_masks = 0;
627                         rdr_masks = 0;
628                 }
629                 *np = n->in_next;
630                 if (!n->in_use) {
631                         if (n->in_apr)
632                                 appr_free(n->in_apr);
633                         KFREE(n);
634                         nat_stats.ns_rules--;
635                 } else {
636                         n->in_flags |= IPN_DELETE;
637                         n->in_next = NULL;
638                 }
639                 n = NULL;
640                 break;
641         case SIOCGNATS :
642                 MUTEX_DOWNGRADE(&ipf_nat);
643                 nat_stats.ns_table[0] = nat_table[0];
644                 nat_stats.ns_table[1] = nat_table[1];
645                 nat_stats.ns_list = nat_list;
646                 nat_stats.ns_maptable = maptable;
647                 nat_stats.ns_nattab_sz = ipf_nattable_sz;
648                 nat_stats.ns_rultab_sz = ipf_natrules_sz;
649                 nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz;
650                 nat_stats.ns_hostmap_sz = ipf_hostmap_sz;
651                 nat_stats.ns_instances = nat_instances;
652                 nat_stats.ns_apslist = ap_sess_list;
653                 error = IWCOPYPTR((char *)&nat_stats, (char *)data,
654                                   sizeof(nat_stats));
655                 break;
656         case SIOCGNATL :
657             {
658                 natlookup_t nl;
659
660                 MUTEX_DOWNGRADE(&ipf_nat);
661                 error = IRCOPYPTR((char *)data, (char *)&nl, sizeof(nl));
662                 if (error)
663                         break;
664
665                 if (nat_lookupredir(&nl)) {
666                         error = IWCOPYPTR((char *)&nl, (char *)data,
667                                           sizeof(nl));
668                 } else
669                         error = ESRCH;
670                 break;
671             }
672         case SIOCIPFFL :        /* old SIOCFLNAT & SIOCCNATL */
673                 if (!(mode & FWRITE)) {
674                         error = EPERM;
675                         break;
676                 }
677                 error = 0;
678                 if (arg == 0)
679                         ret = nat_flushtable();
680                 else if (arg == 1)
681                         ret = nat_clearlist();
682                 else
683                         error = EINVAL;
684                 MUTEX_DOWNGRADE(&ipf_nat);
685                 if (!error) {
686                         error = IWCOPY((caddr_t)&ret, data, sizeof(ret));
687                         if (error)
688                                 error = EFAULT;
689                 }
690                 break;
691         case SIOCSTLCK :
692                 error = IRCOPY(data, (caddr_t)&arg, sizeof(arg));
693                 if (!error) {
694                         error = IWCOPY((caddr_t)&fr_nat_lock, data,
695                                         sizeof(fr_nat_lock));
696                         if (!error)
697                                 fr_nat_lock = arg;
698                 } else
699                         error = EFAULT;
700                 break;
701         case SIOCSTPUT :
702                 if (fr_nat_lock)
703                         error = fr_natputent(data);
704                 else
705                         error = EACCES;
706                 break;
707         case SIOCSTGSZ :
708                 if (fr_nat_lock)
709                         error = fr_natgetsz(data);
710                 else
711                         error = EACCES;
712                 break;
713         case SIOCSTGET :
714                 if (fr_nat_lock)
715                         error = fr_natgetent(data);
716                 else
717                         error = EACCES;
718                 break;
719         case FIONREAD :
720 #ifdef  IPFILTER_LOG
721                 arg = (int)iplused[IPL_LOGNAT];
722                 MUTEX_DOWNGRADE(&ipf_nat);
723                 error = IWCOPY((caddr_t)&arg, (caddr_t)data, sizeof(arg));
724                 if (error)
725                         error = EFAULT;
726 #endif
727                 break;
728         default :
729                 error = EINVAL;
730                 break;
731         }
732         if (getlock == 1) {
733                 RWLOCK_EXIT(&ipf_nat);                  /* READ/WRITE */
734         }
735 done:
736         if (nt)
737                 KFREE(nt);
738         return error;
739 }
740
741
742 static int fr_natgetsz(data)
743 caddr_t data;
744 {
745         ap_session_t *aps;
746         nat_t *nat, *n;
747         int error = 0;
748         natget_t ng;
749
750         error = IRCOPY(data, (caddr_t)&ng, sizeof(ng));
751         if (error)
752                 return EFAULT;
753
754         nat = ng.ng_ptr;
755         if (!nat) {
756                 nat = nat_instances;
757                 ng.ng_sz = 0;
758                 if (nat == NULL) {
759                         error = IWCOPY((caddr_t)&ng, data, sizeof(ng));
760                         if (error)
761                                 error = EFAULT;
762                         return error;
763                 }
764         } else {
765                 /*
766                  * Make sure the pointer we're copying from exists in the
767                  * current list of entries.  Security precaution to prevent
768                  * copying of random kernel data.
769                  */
770                 for (n = nat_instances; n; n = n->nat_next)
771                         if (n == nat)
772                                 break;
773                 if (!n)
774                         return ESRCH;
775         }
776
777         ng.ng_sz = sizeof(nat_save_t);
778         aps = nat->nat_aps;
779         if ((aps != NULL) && (aps->aps_data != 0)) {
780                 ng.ng_sz += sizeof(ap_session_t);
781                 ng.ng_sz += aps->aps_psiz;
782         }
783
784         error = IWCOPY((caddr_t)&ng, data, sizeof(ng));
785         if (error)
786                 error = EFAULT;
787         return error;
788 }
789
790
791 static int fr_natgetent(data)
792 caddr_t data;
793 {
794         nat_save_t ipn, *ipnp, *ipnn = NULL;
795         register nat_t *n, *nat;
796         ap_session_t *aps;
797         int error;
798
799         error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp));
800         if (error)
801                 return EFAULT;
802         error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn));
803         if (error)
804                 return EFAULT;
805
806         nat = ipn.ipn_next;
807         if (!nat) {
808                 nat = nat_instances;
809                 if (nat == NULL) {
810                         if (nat_instances == NULL)
811                                 return ENOENT;
812                         return 0;
813                 }
814         } else {
815                 /*
816                  * Make sure the pointer we're copying from exists in the
817                  * current list of entries.  Security precaution to prevent
818                  * copying of random kernel data.
819                  */
820                 for (n = nat_instances; n; n = n->nat_next)
821                         if (n == nat)
822                                 break;
823                 if (!n)
824                         return ESRCH;
825         }
826
827         ipn.ipn_next = nat->nat_next;
828         ipn.ipn_dsize = 0;
829         bcopy((char *)nat, (char *)&ipn.ipn_nat, sizeof(ipn.ipn_nat));
830         ipn.ipn_nat.nat_data = NULL;
831
832         if (nat->nat_ptr) {
833                 bcopy((char *)nat->nat_ptr, (char *)&ipn.ipn_ipnat,
834                       sizeof(ipn.ipn_ipnat));
835         }
836
837         if (nat->nat_fr)
838                 bcopy((char *)nat->nat_fr, (char *)&ipn.ipn_rule,
839                       sizeof(ipn.ipn_rule));
840
841         if ((aps = nat->nat_aps)) {
842                 ipn.ipn_dsize = sizeof(*aps);
843                 if (aps->aps_data)
844                         ipn.ipn_dsize += aps->aps_psiz;
845                 KMALLOCS(ipnn, nat_save_t *, sizeof(*ipnn) + ipn.ipn_dsize);
846                 if (ipnn == NULL)
847                         return ENOMEM;
848                 bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn));
849
850                 bcopy((char *)aps, (char *)ipnn->ipn_data, sizeof(*aps));
851                 if (aps->aps_data) {
852                         bcopy(aps->aps_data, ipnn->ipn_data + sizeof(*aps),
853                               aps->aps_psiz);
854                         ipnn->ipn_dsize += aps->aps_psiz;
855                 }
856                 error = IWCOPY((caddr_t)ipnn, ipnp,
857                                sizeof(ipn) + ipn.ipn_dsize);
858                 if (error)
859                         error = EFAULT;
860                 KFREES(ipnn, sizeof(*ipnn) + ipn.ipn_dsize);
861         } else {
862                 error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn));
863                 if (error)
864                         error = EFAULT;
865         }
866         return error;
867 }
868
869
870 static int fr_natputent(data)
871 caddr_t data;
872 {
873         nat_save_t ipn, *ipnp, *ipnn = NULL;
874         register nat_t *n, *nat;
875         ap_session_t *aps;
876         frentry_t *fr;
877         ipnat_t *in;
878
879         int error;
880
881         error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp));
882         if (error)
883                 return EFAULT;
884         error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn));
885         if (error)
886                 return EFAULT;
887         nat = NULL;
888         if (ipn.ipn_dsize) {
889                 KMALLOCS(ipnn, nat_save_t *, sizeof(ipn) + ipn.ipn_dsize);
890                 if (ipnn == NULL)
891                         return ENOMEM;
892                 bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn));
893                 error = IRCOPY((caddr_t)ipnp, (caddr_t)ipn.ipn_data,
894                                ipn.ipn_dsize);
895                 if (error) {
896                         error = EFAULT;
897                         goto junkput;
898                 }
899         } else
900                 ipnn = NULL;
901
902         KMALLOC(nat, nat_t *);
903         if (nat == NULL) {
904                 error = EFAULT;
905                 goto junkput;
906         }
907
908         bcopy((char *)&ipn.ipn_nat, (char *)nat, sizeof(*nat));
909         /*
910          * Initialize all these so that nat_delete() doesn't cause a crash.
911          */
912         nat->nat_phnext[0] = NULL;
913         nat->nat_phnext[1] = NULL;
914         fr = nat->nat_fr;
915         nat->nat_fr = NULL;
916         aps = nat->nat_aps;
917         nat->nat_aps = NULL;
918         in = nat->nat_ptr;
919         nat->nat_ptr = NULL;
920         nat->nat_hm = NULL;
921         nat->nat_data = NULL;
922         nat->nat_ifp = GETUNIT(nat->nat_ifname, 4);
923
924         /*
925          * Restore the rule associated with this nat session
926          */
927         if (in) {
928                 KMALLOC(in, ipnat_t *);
929                 if (in == NULL) {
930                         error = ENOMEM;
931                         goto junkput;
932                 }
933                 nat->nat_ptr = in;
934                 bcopy((char *)&ipn.ipn_ipnat, (char *)in, sizeof(*in));
935                 in->in_use = 1;
936                 in->in_flags |= IPN_DELETE;
937                 in->in_next = NULL;
938                 in->in_rnext = NULL;
939                 in->in_prnext = NULL;
940                 in->in_mnext = NULL;
941                 in->in_pmnext = NULL;
942                 in->in_ifp = GETUNIT(in->in_ifname, 4);
943                 if (in->in_plabel[0] != '\0') {
944                         in->in_apr = appr_lookup(in->in_p, in->in_plabel);
945                 }
946         }
947
948         /*
949          * Restore ap_session_t structure.  Include the private data allocated
950          * if it was there.
951          */
952         if (aps) {
953                 KMALLOC(aps, ap_session_t *);
954                 if (aps == NULL) {
955                         error = ENOMEM;
956                         goto junkput;
957                 }
958                 nat->nat_aps = aps;
959                 aps->aps_next = ap_sess_list;
960                 ap_sess_list = aps;
961                 bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
962                 if (in)
963                         aps->aps_apr = in->in_apr;
964                 if (aps->aps_psiz) {
965                         KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
966                         if (aps->aps_data == NULL) {
967                                 error = ENOMEM;
968                                 goto junkput;
969                         }
970                         bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
971                               aps->aps_psiz);
972                 } else {
973                         aps->aps_psiz = 0;
974                         aps->aps_data = NULL;
975                 }
976         }
977
978         /*
979          * If there was a filtering rule associated with this entry then
980          * build up a new one.
981          */
982         if (fr != NULL) {
983                 if (nat->nat_flags & FI_NEWFR) {
984                         KMALLOC(fr, frentry_t *);
985                         nat->nat_fr = fr;
986                         if (fr == NULL) {
987                                 error = ENOMEM;
988                                 goto junkput;
989                         }
990                         bcopy((char *)&ipn.ipn_fr, (char *)fr, sizeof(*fr));
991                         ipn.ipn_nat.nat_fr = fr;
992                         error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn));
993                         if (error) {
994                                 error = EFAULT;
995                                 goto junkput;
996                         }
997                 } else {
998                         for (n = nat_instances; n; n = n->nat_next)
999                                 if (n->nat_fr == fr)
1000                                         break;
1001                         if (!n) {
1002                                 error = ESRCH;
1003                                 goto junkput;
1004                         }
1005                 }
1006         }
1007
1008         if (ipnn)
1009                 KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize);
1010         nat_insert(nat);
1011         return 0;
1012 junkput:
1013         if (ipnn)
1014                 KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize);
1015         if (nat)
1016                 nat_delete(nat);
1017         return error;
1018 }
1019
1020
1021 /*
1022  * Delete a nat entry from the various lists and table.
1023  */
1024 static void nat_delete(natd)
1025 struct nat *natd;
1026 {
1027         struct ipnat *ipn;
1028
1029         if (natd->nat_flags & FI_WILDP)
1030                 nat_stats.ns_wilds--;
1031         if (natd->nat_hnext[0])
1032                 natd->nat_hnext[0]->nat_phnext[0] = natd->nat_phnext[0];
1033         *natd->nat_phnext[0] = natd->nat_hnext[0];
1034         if (natd->nat_hnext[1])
1035                 natd->nat_hnext[1]->nat_phnext[1] = natd->nat_phnext[1];
1036         *natd->nat_phnext[1] = natd->nat_hnext[1];
1037         if (natd->nat_me != NULL)
1038                 *natd->nat_me = NULL;
1039
1040         if (natd->nat_fr != NULL) {
1041                 ATOMIC_DEC32(natd->nat_fr->fr_ref);
1042         }
1043
1044         if (natd->nat_hm != NULL)
1045                 nat_hostmapdel(natd->nat_hm);
1046
1047         /*
1048          * If there is an active reference from the nat entry to its parent
1049          * rule, decrement the rule's reference count and free it too if no
1050          * longer being used.
1051          */
1052         ipn = natd->nat_ptr;
1053         if (ipn != NULL) {
1054                 ipn->in_space++;
1055                 ipn->in_use--;
1056                 if (!ipn->in_use && (ipn->in_flags & IPN_DELETE)) {
1057                         if (ipn->in_apr)
1058                                 appr_free(ipn->in_apr);
1059                         KFREE(ipn);
1060                         nat_stats.ns_rules--;
1061                 }
1062         }
1063
1064         MUTEX_DESTROY(&natd->nat_lock);
1065         /*
1066          * If there's a fragment table entry too for this nat entry, then
1067          * dereference that as well.
1068          */
1069         ipfr_forget((void *)natd);
1070         aps_free(natd->nat_aps);
1071         nat_stats.ns_inuse--;
1072         KFREE(natd);
1073 }
1074
1075
1076 /*
1077  * nat_flushtable - clear the NAT table of all mapping entries.
1078  * (this is for the dynamic mappings)
1079  */
1080 static int nat_flushtable()
1081 {
1082         register nat_t *nat, **natp;
1083         register int j = 0;
1084
1085         /*
1086          * ALL NAT mappings deleted, so lets just make the deletions
1087          * quicker.
1088          */
1089         if (nat_table[0] != NULL)
1090                 bzero((char *)nat_table[0],
1091                       sizeof(nat_table[0]) * ipf_nattable_sz);
1092         if (nat_table[1] != NULL)
1093                 bzero((char *)nat_table[1],
1094                       sizeof(nat_table[1]) * ipf_nattable_sz);
1095
1096         for (natp = &nat_instances; (nat = *natp); ) {
1097                 *natp = nat->nat_next;
1098 #ifdef  IPFILTER_LOG
1099                 nat_log(nat, NL_FLUSH);
1100 #endif
1101                 nat_delete(nat);
1102                 j++;
1103         }
1104         nat_stats.ns_inuse = 0;
1105         return j;
1106 }
1107
1108
1109 /*
1110  * nat_clearlist - delete all rules in the active NAT mapping list.
1111  * (this is for NAT/RDR rules)
1112  */
1113 int nat_clearlist()
1114 {
1115         register ipnat_t *n, **np = &nat_list;
1116         int i = 0;
1117
1118         if (nat_rules != NULL)
1119                 bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz);
1120         if (rdr_rules != NULL)
1121                 bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz);
1122
1123         while ((n = *np)) {
1124                 *np = n->in_next;
1125                 if (!n->in_use) {
1126                         if (n->in_apr)
1127                                 appr_free(n->in_apr);
1128                         KFREE(n);
1129                         nat_stats.ns_rules--;
1130                 } else {
1131                         n->in_flags |= IPN_DELETE;
1132                         n->in_next = NULL;
1133                 }
1134                 i++;
1135         }
1136         nat_masks = 0;
1137         rdr_masks = 0;
1138         return i;
1139 }
1140
1141
1142 /*
1143  * Create a new NAT table entry.
1144  * NOTE: Assumes write lock on ipf_nat has been obtained already.
1145  *       If you intend on changing this, beware: appr_new() may call nat_new()
1146  *       recursively!
1147  */
1148 nat_t *nat_new(fin, ip, np, natsave, flags, direction)
1149 fr_info_t *fin;
1150 ip_t *ip;
1151 ipnat_t *np;
1152 nat_t **natsave;
1153 u_int flags;
1154 int direction;
1155 {
1156         register u_32_t sum1, sum2, sumd, l;
1157         u_short port = 0, sport = 0, dport = 0, nport = 0;
1158         struct in_addr in, inb;
1159         u_short nflags, sp, dp;
1160         tcphdr_t *tcp = NULL;
1161         hostmap_t *hm = NULL;
1162         nat_t *nat, *natl;
1163 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
1164         qif_t *qf = fin->fin_qif;
1165 #endif
1166
1167         nflags = flags & np->in_flags;
1168         if (flags & IPN_TCPUDP) {
1169                 tcp = (tcphdr_t *)fin->fin_dp;
1170                 sport = htons(fin->fin_data[0]);
1171                 dport = htons(fin->fin_data[1]);
1172         }
1173
1174         /* Give me a new nat */
1175         KMALLOC(nat, nat_t *);
1176         if (nat == NULL) {
1177                 nat_stats.ns_memfail++;
1178                 return NULL;
1179         }
1180
1181         bzero((char *)nat, sizeof(*nat));
1182         nat->nat_flags = flags;
1183         if (flags & FI_WILDP)
1184                 nat_stats.ns_wilds++;
1185         /*
1186          * Search the current table for a match.
1187          */
1188         if (direction == NAT_OUTBOUND) {
1189                 /*
1190                  * Values at which the search for a free resouce starts.
1191                  */
1192                 u_32_t st_ip;
1193                 u_short st_port;
1194
1195                 /*
1196                  * If it's an outbound packet which doesn't match any existing
1197                  * record, then create a new port
1198                  */
1199                 l = 0;
1200                 st_ip = np->in_nip;
1201                 st_port = np->in_pnext;
1202
1203                 do {
1204                         port = 0;
1205                         in.s_addr = htonl(np->in_nip);
1206                         if (l == 0) {
1207                                 /*
1208                                  * Check to see if there is an existing NAT
1209                                  * setup for this IP address pair.
1210                                  */
1211                                 hm = nat_hostmap(np, fin->fin_src, in);
1212                                 if (hm != NULL)
1213                                         in.s_addr = hm->hm_mapip.s_addr;
1214                         } else if ((l == 1) && (hm != NULL)) {
1215                                 nat_hostmapdel(hm);
1216                                 hm = NULL;
1217                         }
1218                         in.s_addr = ntohl(in.s_addr);
1219
1220                         nat->nat_hm = hm;
1221
1222                         if ((np->in_outmsk == 0xffffffff) &&
1223                             (np->in_pnext == 0)) {
1224                                 if (l > 0)
1225                                         goto badnat;
1226                         }
1227
1228                         if (np->in_redir & NAT_MAPBLK) {
1229                                 if ((l >= np->in_ppip) || ((l > 0) &&
1230                                      !(flags & IPN_TCPUDP)))
1231                                         goto badnat;
1232                                 /*
1233                                  * map-block - Calculate destination address.
1234                                  */
1235                                 in.s_addr = ntohl(fin->fin_saddr);
1236                                 in.s_addr &= ntohl(~np->in_inmsk);
1237                                 inb.s_addr = in.s_addr;
1238                                 in.s_addr /= np->in_ippip;
1239                                 in.s_addr &= ntohl(~np->in_outmsk);
1240                                 in.s_addr += ntohl(np->in_outip);
1241                                 /*
1242                                  * Calculate destination port.
1243                                  */
1244                                 if ((flags & IPN_TCPUDP) &&
1245                                     (np->in_ppip != 0)) {
1246                                         port = ntohs(sport) + l;
1247                                         port %= np->in_ppip;
1248                                         port += np->in_ppip *
1249                                                 (inb.s_addr % np->in_ippip);
1250                                         port += MAPBLK_MINPORT;
1251                                         port = htons(port);
1252                                 }
1253                         } else if (!np->in_outip &&
1254                                    (np->in_outmsk == 0xffffffff)) {
1255                                 /*
1256                                  * 0/32 - use the interface's IP address.
1257                                  */
1258                                 if ((l > 0) ||
1259                                     fr_ifpaddr(4, fin->fin_ifp, &in) == -1)
1260                                         goto badnat;
1261                                 in.s_addr = ntohl(in.s_addr);
1262                         } else if (!np->in_outip && !np->in_outmsk) {
1263                                 /*
1264                                  * 0/0 - use the original source address/port.
1265                                  */
1266                                 if (l > 0)
1267                                         goto badnat;
1268                                 in.s_addr = ntohl(fin->fin_saddr);
1269                         } else if ((np->in_outmsk != 0xffffffff) &&
1270                                    (np->in_pnext == 0) &&
1271                                    ((l > 0) || (hm == NULL)))
1272                                 np->in_nip++;
1273                         natl = NULL;
1274
1275                         if ((nflags & IPN_TCPUDP) &&
1276                             ((np->in_redir & NAT_MAPBLK) == 0) &&
1277                             (np->in_flags & IPN_AUTOPORTMAP)) {
1278                                 if ((l > 0) && (l % np->in_ppip == 0)) {
1279                                         if (l > np->in_space) {
1280                                                 goto badnat;
1281                                         } else if ((l > np->in_ppip) &&
1282                                                    np->in_outmsk != 0xffffffff)
1283                                                 np->in_nip++;
1284                                 }
1285                                 if (np->in_ppip != 0) {
1286                                         port = ntohs(sport);
1287                                         port += (l % np->in_ppip);
1288                                         port %= np->in_ppip;
1289                                         port += np->in_ppip *
1290                                                 (ntohl(fin->fin_saddr) %
1291                                                  np->in_ippip);
1292                                         port += MAPBLK_MINPORT;
1293                                         port = htons(port);
1294                                 }
1295                         } else if (((np->in_redir & NAT_MAPBLK) == 0) &&
1296                                    (nflags & IPN_TCPUDP) &&
1297                                    (np->in_pnext != 0)) {
1298                                 port = htons(np->in_pnext++);
1299                                 if (np->in_pnext > ntohs(np->in_pmax)) {
1300                                         np->in_pnext = ntohs(np->in_pmin);
1301                                         if (np->in_outmsk != 0xffffffff)
1302                                                 np->in_nip++;
1303                                 }
1304                         }
1305
1306                         if (np->in_flags & IPN_IPRANGE) {
1307                                 if (np->in_nip > ntohl(np->in_outmsk))
1308                                         np->in_nip = ntohl(np->in_outip);
1309                         } else {
1310                                 if ((np->in_outmsk != 0xffffffff) &&
1311                                     ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
1312                                     ntohl(np->in_outip))
1313                                         np->in_nip = ntohl(np->in_outip) + 1;
1314                         }
1315
1316                         if (!port && (flags & IPN_TCPUDP))
1317                                 port = sport;
1318
1319                         /*
1320                          * Here we do a lookup of the connection as seen from
1321                          * the outside.  If an IP# pair already exists, try
1322                          * again.  So if you have A->B becomes C->B, you can
1323                          * also have D->E become C->E but not D->B causing
1324                          * another C->B.  Also take protocol and ports into
1325                          * account when determining whether a pre-existing
1326                          * NAT setup will cause an external conflict where
1327                          * this is appropriate.
1328                          */
1329                         inb.s_addr = htonl(in.s_addr);
1330                         sp = fin->fin_data[0];
1331                         dp = fin->fin_data[1];
1332                         fin->fin_data[0] = fin->fin_data[1];
1333                         fin->fin_data[1] = htons(port);
1334                         natl = nat_inlookup(fin, flags & ~FI_WILDP,
1335                                             (u_int)fin->fin_p, fin->fin_dst,
1336                                             inb, 1);
1337                         fin->fin_data[0] = sp;
1338                         fin->fin_data[1] = dp;
1339
1340                         /*
1341                          * Has the search wrapped around and come back to the
1342                          * start ?
1343                          */
1344                         if ((natl != NULL) &&
1345                             (np->in_pnext != 0) && (st_port == np->in_pnext) &&
1346                             (np->in_nip != 0) && (st_ip == np->in_nip))
1347                                 goto badnat;
1348                         l++;
1349                 } while (natl != NULL);
1350
1351                 if (np->in_space > 0)
1352                         np->in_space--;
1353
1354                 /* Setup the NAT table */
1355                 nat->nat_inip = fin->fin_src;
1356                 nat->nat_outip.s_addr = htonl(in.s_addr);
1357                 nat->nat_oip = fin->fin_dst;
1358                 if (nat->nat_hm == NULL)
1359                         nat->nat_hm = nat_hostmap(np, fin->fin_src,
1360                                                   nat->nat_outip);
1361
1362                 sum1 = LONG_SUM(ntohl(fin->fin_saddr)) + ntohs(sport);
1363                 sum2 = LONG_SUM(in.s_addr) + ntohs(port);
1364
1365                 if (flags & IPN_TCPUDP) {
1366                         nat->nat_inport = sport;
1367                         nat->nat_outport = port;        /* sport */
1368                         nat->nat_oport = dport;
1369                 }
1370         } else {
1371                 /*
1372                  * Otherwise, it's an inbound packet. Most likely, we don't
1373                  * want to rewrite source ports and source addresses. Instead,
1374                  * we want to rewrite to a fixed internal address and fixed
1375                  * internal port.
1376                  */
1377                 if (np->in_flags & IPN_SPLIT) {
1378                         in.s_addr = np->in_nip;
1379                         if (np->in_inip == htonl(in.s_addr))
1380                                 np->in_nip = ntohl(np->in_inmsk);
1381                         else {
1382                                 np->in_nip = ntohl(np->in_inip);
1383                                 if (np->in_flags & IPN_ROUNDR) {
1384                                         nat_delrdr(np);
1385                                         nat_addrdr(np);
1386                                 }
1387                         }
1388                 } else {
1389                         in.s_addr = ntohl(np->in_inip);
1390                         if (np->in_flags & IPN_ROUNDR) {
1391                                 nat_delrdr(np);
1392                                 nat_addrdr(np);
1393                         }
1394                 }
1395                 if (!np->in_pnext)
1396                         nport = dport;
1397                 else {
1398                         /*
1399                          * Whilst not optimized for the case where
1400                          * pmin == pmax, the gain is not significant.
1401                          */
1402                         if (np->in_pmin != np->in_pmax) {
1403                                 nport = ntohs(dport) - ntohs(np->in_pmin) +
1404                                         ntohs(np->in_pnext);
1405                                 nport = ntohs(nport);
1406                         } else
1407                                 nport = np->in_pnext;
1408                 }
1409
1410                 /*
1411                  * When the redirect-to address is set to 0.0.0.0, just
1412                  * assume a blank `forwarding' of the packet.
1413                  */
1414                 if (in.s_addr == 0)
1415                         in.s_addr = ntohl(fin->fin_daddr);
1416
1417                 nat->nat_inip.s_addr = htonl(in.s_addr);
1418                 nat->nat_outip = fin->fin_dst;
1419                 nat->nat_oip = fin->fin_src;
1420
1421                 sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport);
1422                 sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
1423
1424                 if (flags & IPN_TCPUDP) {
1425                         nat->nat_inport = nport;
1426                         nat->nat_outport = dport;
1427                         nat->nat_oport = sport;
1428                 }
1429         }
1430
1431         CALC_SUMD(sum1, sum2, sumd);
1432         nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
1433 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
1434         if ((flags & IPN_TCPUDP) && dohwcksum &&
1435             (qf->qf_ill->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
1436                 if (direction == NAT_OUTBOUND)
1437                         sum1 = LONG_SUM(ntohl(in.s_addr));
1438                 else
1439                         sum1 = LONG_SUM(ntohl(fin->fin_saddr));
1440                 sum1 += LONG_SUM(ntohl(fin->fin_daddr));
1441                 sum1 += IPPROTO_TCP;
1442                 sum1 = (sum1 & 0xffff) + (sum1 >> 16);
1443                 nat->nat_sumd[1] = NAT_HW_CKSUM|(sum1 & 0xffff);
1444         } else
1445 #endif
1446                 nat->nat_sumd[1] = nat->nat_sumd[0];
1447
1448         if ((flags & IPN_TCPUDP) && ((sport != port) || (dport != nport))) {
1449                 if (direction == NAT_OUTBOUND)
1450                         sum1 = LONG_SUM(ntohl(fin->fin_saddr));
1451                 else
1452                         sum1 = LONG_SUM(ntohl(fin->fin_daddr));
1453
1454                 sum2 = LONG_SUM(in.s_addr);
1455
1456                 CALC_SUMD(sum1, sum2, sumd);
1457                 nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
1458         } else
1459                 nat->nat_ipsumd = nat->nat_sumd[0];
1460
1461         in.s_addr = htonl(in.s_addr);
1462
1463         strncpy(nat->nat_ifname, IFNAME(fin->fin_ifp), IFNAMSIZ);
1464
1465         nat->nat_me = natsave;
1466         nat->nat_dir = direction;
1467         nat->nat_ifp = fin->fin_ifp;
1468         nat->nat_ptr = np;
1469         nat->nat_p = fin->fin_p;
1470         nat->nat_bytes = 0;
1471         nat->nat_pkts = 0;
1472         nat->nat_mssclamp = np->in_mssclamp;
1473         nat->nat_fr = fin->fin_fr;
1474         if (nat->nat_fr != NULL) {
1475                 ATOMIC_INC32(nat->nat_fr->fr_ref);
1476         }
1477         if (direction == NAT_OUTBOUND) {
1478                 if (flags & IPN_TCPUDP)
1479                         tcp->th_sport = port;
1480         } else {
1481                 if (flags & IPN_TCPUDP)
1482                         tcp->th_dport = nport;
1483         }
1484
1485         nat_insert(nat);
1486
1487         if ((np->in_apr != NULL) && (np->in_dport == 0 ||
1488             (tcp != NULL && dport == np->in_dport)))
1489                 (void) appr_new(fin, ip, nat);
1490
1491         np->in_use++;
1492 #ifdef  IPFILTER_LOG
1493         nat_log(nat, (u_int)np->in_redir);
1494 #endif
1495         return nat;
1496 badnat:
1497         nat_stats.ns_badnat++;
1498         if ((hm = nat->nat_hm) != NULL)
1499                 nat_hostmapdel(hm);
1500         KFREE(nat);
1501         return NULL;
1502 }
1503
1504
1505 /*
1506  * Insert a NAT entry into the hash tables for searching and add it to the
1507  * list of active NAT entries.  Adjust global counters when complete.
1508  */
1509 void    nat_insert(nat)
1510 nat_t   *nat;
1511 {
1512         u_int hv1, hv2;
1513         nat_t **natp;
1514
1515         MUTEX_INIT(&nat->nat_lock, "nat entry lock", NULL);
1516
1517         nat->nat_age = fr_defnatage;
1518         nat->nat_ifname[sizeof(nat->nat_ifname) - 1] = '\0';
1519         if (nat->nat_ifname[0] !='\0') {
1520                 nat->nat_ifp = GETUNIT(nat->nat_ifname, 4);
1521         }
1522
1523         nat->nat_next = nat_instances;
1524         nat_instances = nat;
1525
1526         if (!(nat->nat_flags & (FI_W_SPORT|FI_W_DPORT))) {
1527                 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
1528                                   0xffffffff);
1529                 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
1530                                   ipf_nattable_sz);
1531                 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
1532                                   0xffffffff);
1533                 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
1534                                  ipf_nattable_sz);
1535         } else {
1536                 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, nat->nat_inip.s_addr,
1537                                   ipf_nattable_sz);
1538                 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, nat->nat_outip.s_addr,
1539                                   ipf_nattable_sz);
1540         }
1541
1542         natp = &nat_table[0][hv1];
1543         if (*natp)
1544                 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
1545         nat->nat_phnext[0] = natp;
1546         nat->nat_hnext[0] = *natp;
1547         *natp = nat;
1548
1549         natp = &nat_table[1][hv2];
1550         if (*natp)
1551                 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
1552         nat->nat_phnext[1] = natp;
1553         nat->nat_hnext[1] = *natp;
1554         *natp = nat;
1555
1556         nat_stats.ns_added++;
1557         nat_stats.ns_inuse++;
1558 }
1559
1560
1561 nat_t *nat_icmplookup(ip, fin, dir)
1562 ip_t *ip;
1563 fr_info_t *fin;
1564 int dir;
1565 {
1566         icmphdr_t *icmp;
1567         tcphdr_t *tcp = NULL;
1568         ip_t *oip;
1569         int flags = 0, type, minlen;
1570
1571         icmp = (icmphdr_t *)fin->fin_dp;
1572         /*
1573          * Does it at least have the return (basic) IP header ?
1574          * Only a basic IP header (no options) should be with an ICMP error
1575          * header.
1576          */
1577         if ((ip->ip_hl != 5) || (ip->ip_len < ICMPERR_MINPKTLEN))
1578                 return NULL;
1579         type = icmp->icmp_type;
1580         /*
1581          * If it's not an error type, then return.
1582          */
1583         if ((type != ICMP_UNREACH) && (type != ICMP_SOURCEQUENCH) &&
1584             (type != ICMP_REDIRECT) && (type != ICMP_TIMXCEED) &&
1585             (type != ICMP_PARAMPROB))
1586                 return NULL;
1587
1588         oip = (ip_t *)((char *)fin->fin_dp + 8);
1589         minlen = (oip->ip_hl << 2);
1590         if (minlen < sizeof(ip_t))
1591                 return NULL;
1592         if (ip->ip_len < ICMPERR_IPICMPHLEN + minlen)
1593                 return NULL;
1594         /*
1595          * Is the buffer big enough for all of it ?  It's the size of the IP
1596          * header claimed in the encapsulated part which is of concern.  It
1597          * may be too big to be in this buffer but not so big that it's
1598          * outside the ICMP packet, leading to TCP deref's causing problems.
1599          * This is possible because we don't know how big oip_hl is when we
1600          * do the pullup early in fr_check() and thus can't gaurantee it is
1601          * all here now.
1602          */
1603 #ifdef  _KERNEL
1604         {
1605         mb_t *m;
1606
1607 # if SOLARIS
1608         m = fin->fin_qfm;
1609         if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
1610                 return NULL;
1611 # else
1612         m = *(mb_t **)fin->fin_mp;
1613         if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
1614             (char *)ip + m->m_len)
1615                 return NULL;
1616 # endif
1617         }
1618 #endif
1619
1620         if (oip->ip_p == IPPROTO_TCP)
1621                 flags = IPN_TCP;
1622         else if (oip->ip_p == IPPROTO_UDP)
1623                 flags = IPN_UDP;
1624         if (flags & IPN_TCPUDP) {
1625                 u_short data[2];
1626                 nat_t *nat;
1627
1628                 minlen += 8;            /* + 64bits of data to get ports */
1629                 if (ip->ip_len < ICMPERR_IPICMPHLEN + minlen)
1630                         return NULL;
1631
1632                 data[0] = fin->fin_data[0];
1633                 data[1] = fin->fin_data[1];
1634                 tcp = (tcphdr_t *)((char *)oip + (oip->ip_hl << 2));
1635                 fin->fin_data[0] = ntohs(tcp->th_dport);
1636                 fin->fin_data[1] = ntohs(tcp->th_sport);
1637
1638                 if (dir == NAT_INBOUND) {
1639                         nat = nat_inlookup(fin, flags, (u_int)oip->ip_p,
1640                                             oip->ip_dst, oip->ip_src, 0);
1641                 } else {
1642                         nat = nat_outlookup(fin, flags, (u_int)oip->ip_p,
1643                                             oip->ip_dst, oip->ip_src, 0);
1644                 }
1645                 fin->fin_data[0] = data[0];
1646                 fin->fin_data[1] = data[1];
1647                 return nat;
1648         }
1649         if (dir == NAT_INBOUND)
1650                 return nat_inlookup(fin, 0, (u_int)oip->ip_p,
1651                                     oip->ip_dst, oip->ip_src, 0);
1652         else
1653                 return nat_outlookup(fin, 0, (u_int)oip->ip_p,
1654                                     oip->ip_dst, oip->ip_src, 0);
1655 }
1656
1657
1658 /*
1659  * This should *ONLY* be used for incoming packets to make sure a NAT'd ICMP
1660  * packet gets correctly recognised.
1661  */
1662 nat_t *nat_icmp(ip, fin, nflags, dir)
1663 ip_t *ip;
1664 fr_info_t *fin;
1665 u_int *nflags;
1666 int dir;
1667 {
1668         u_32_t sum1, sum2, sumd, sumd2 = 0;
1669         struct in_addr in;
1670         int flags, dlen;
1671         icmphdr_t *icmp;
1672         udphdr_t *udp;
1673         tcphdr_t *tcp;
1674         nat_t *nat;
1675         ip_t *oip;
1676
1677         if ((fin->fin_fl & FI_SHORT) || (fin->fin_off != 0))
1678                 return NULL;
1679         /*
1680          * nat_icmplookup() will return NULL for `defective' packets.
1681          */
1682         if ((ip->ip_v != 4) || !(nat = nat_icmplookup(ip, fin, dir)))
1683                 return NULL;
1684
1685         flags = 0;
1686         *nflags = IPN_ICMPERR;
1687         icmp = (icmphdr_t *)fin->fin_dp;
1688         oip = (ip_t *)&icmp->icmp_ip;
1689         if (oip->ip_p == IPPROTO_TCP)
1690                 flags = IPN_TCP;
1691         else if (oip->ip_p == IPPROTO_UDP)
1692                 flags = IPN_UDP;
1693         udp = (udphdr_t *)((((char *)oip) + (oip->ip_hl << 2)));
1694         dlen = ip->ip_len - ((char *)udp - (char *)ip);
1695         /*
1696          * XXX - what if this is bogus hl and we go off the end ?
1697          * In this case, nat_icmplookup() will have returned NULL.
1698          */
1699         tcp = (tcphdr_t *)udp;
1700
1701         /*
1702          * Need to adjust ICMP header to include the real IP#'s and
1703          * port #'s.  Only apply a checksum change relative to the
1704          * IP address change as it will be modified again in ip_natout
1705          * for both address and port.  Two checksum changes are
1706          * necessary for the two header address changes.  Be careful
1707          * to only modify the checksum once for the port # and twice
1708          * for the IP#.
1709          */
1710
1711         /*
1712          * Step 1
1713          * Fix the IP addresses in the offending IP packet. You also need
1714          * to adjust the IP header checksum of that offending IP packet
1715          * and the ICMP checksum of the ICMP error message itself.
1716          *
1717          * Unfortunately, for UDP and TCP, the IP addresses are also contained
1718          * in the pseudo header that is used to compute the UDP resp. TCP
1719          * checksum. So, we must compensate that as well. Even worse, the
1720          * change in the UDP and TCP checksums require yet another
1721          * adjustment of the ICMP checksum of the ICMP error message.
1722          *
1723          */
1724
1725         if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) {
1726                 sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
1727                 in = nat->nat_inip;
1728                 oip->ip_src = in;
1729         } else {
1730                 sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
1731                 in = nat->nat_outip;
1732                 oip->ip_dst = in;
1733         }
1734
1735         sum2 = LONG_SUM(ntohl(in.s_addr));
1736
1737         CALC_SUMD(sum1, sum2, sumd);
1738
1739         if (nat->nat_dir == NAT_OUTBOUND) {
1740                 /*
1741                  * Fix IP checksum of the offending IP packet to adjust for
1742                  * the change in the IP address.
1743                  *
1744                  * Normally, you would expect that the ICMP checksum of the 
1745                  * ICMP error message needs to be adjusted as well for the
1746                  * IP address change in oip.
1747                  * However, this is a NOP, because the ICMP checksum is 
1748                  * calculated over the complete ICMP packet, which includes the
1749                  * changed oip IP addresses and oip->ip_sum. However, these 
1750                  * two changes cancel each other out (if the delta for
1751                  * the IP address is x, then the delta for ip_sum is minus x), 
1752                  * so no change in the icmp_cksum is necessary.
1753                  *
1754                  * Be careful that nat_dir refers to the direction of the
1755                  * offending IP packet (oip), not to its ICMP response (icmp)
1756                  */
1757                 fix_datacksum(&oip->ip_sum, sumd);
1758
1759                 /*
1760                  * Fix UDP pseudo header checksum to compensate for the
1761                  * IP address change.
1762                  */
1763                 if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1764                         /*
1765                          * The UDP checksum is optional, only adjust it 
1766                          * if it has been set.
1767                          */
1768                         sum1 = ntohs(udp->uh_sum);
1769                         fix_datacksum(&udp->uh_sum, sumd);
1770                         sum2 = ntohs(udp->uh_sum);
1771
1772                         /*
1773                          * Fix ICMP checksum to compensate the UDP 
1774                          * checksum adjustment.
1775                          */
1776                         CALC_SUMD(sum1, sum2, sumd);
1777                         sumd2 = sumd;
1778                 }
1779
1780                 /*
1781                  * Fix TCP pseudo header checksum to compensate for the 
1782                  * IP address change. Before we can do the change, we
1783                  * must make sure that oip is sufficient large to hold
1784                  * the TCP checksum (normally it does not!).
1785                  */
1786                 if (oip->ip_p == IPPROTO_TCP && dlen >= 18) {
1787                 
1788                         sum1 = ntohs(tcp->th_sum);
1789                         fix_datacksum(&tcp->th_sum, sumd);
1790                         sum2 = ntohs(tcp->th_sum);
1791
1792                         /*
1793                          * Fix ICMP checksum to compensate the TCP 
1794                          * checksum adjustment.
1795                          */
1796                         CALC_SUMD(sum1, sum2, sumd);
1797                         sumd2 = sumd;
1798                 }
1799         } else {
1800
1801                 /*
1802                  * Fix IP checksum of the offending IP packet to adjust for
1803                  * the change in the IP address.
1804                  *
1805                  * Normally, you would expect that the ICMP checksum of the 
1806                  * ICMP error message needs to be adjusted as well for the
1807                  * IP address change in oip.
1808                  * However, this is a NOP, because the ICMP checksum is 
1809                  * calculated over the complete ICMP packet, which includes the
1810                  * changed oip IP addresses and oip->ip_sum. However, these 
1811                  * two changes cancel each other out (if the delta for
1812                  * the IP address is x, then the delta for ip_sum is minus x), 
1813                  * so no change in the icmp_cksum is necessary.
1814                  *
1815                  * Be careful that nat_dir refers to the direction of the
1816                  * offending IP packet (oip), not to its ICMP response (icmp)
1817                  */
1818                 fix_datacksum(&oip->ip_sum, sumd);
1819
1820 /* XXX FV : without having looked at Solaris source code, it seems unlikely
1821  * that SOLARIS would compensate this in the kernel (a body of an IP packet 
1822  * in the data section of an ICMP packet). I have the feeling that this should
1823  * be unconditional, but I'm not in a position to check.
1824  */
1825 #if !SOLARIS && !defined(__sgi)
1826                 /*
1827                  * Fix UDP pseudo header checksum to compensate for the
1828                  * IP address change.
1829                  */
1830                 if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1831                         /*
1832                          * The UDP checksum is optional, only adjust it 
1833                          * if it has been set 
1834                          */
1835                         sum1 = ntohs(udp->uh_sum);
1836                         fix_datacksum(&udp->uh_sum, sumd);
1837                         sum2 = ntohs(udp->uh_sum);
1838
1839                         /*
1840                          * Fix ICMP checksum to compensate the UDP 
1841                          * checksum adjustment.
1842                          */
1843                         CALC_SUMD(sum1, sum2, sumd);
1844                         sumd2 = sumd;
1845                 }
1846                 
1847                 /* 
1848                  * Fix TCP pseudo header checksum to compensate for the 
1849                  * IP address change. Before we can do the change, we
1850                  * must make sure that oip is sufficient large to hold
1851                  * the TCP checksum (normally it does not!).
1852                  */
1853                 if (oip->ip_p == IPPROTO_TCP && dlen >= 18) {
1854                 
1855                         sum1 = ntohs(tcp->th_sum);
1856                         fix_datacksum(&tcp->th_sum, sumd);
1857                         sum2 = ntohs(tcp->th_sum);
1858
1859                         /*
1860                          * Fix ICMP checksum to compensate the TCP
1861                          * checksum adjustment.
1862                          */
1863                         CALC_SUMD(sum1, sum2, sumd);
1864                         sumd2 = sumd;
1865                 }
1866 #endif
1867         }
1868
1869         if ((flags & IPN_TCPUDP) != 0) {
1870                 /*
1871                  * Step 2 :
1872                  * For offending TCP/UDP IP packets, translate the ports as
1873                  * well, based on the NAT specification. Of course such
1874                  * a change must be reflected in the ICMP checksum as well.
1875                  *
1876                  * Advance notice : Now it becomes complicated :-)
1877                  *
1878                  * Since the port fields are part of the TCP/UDP checksum
1879                  * of the offending IP packet, you need to adjust that checksum
1880                  * as well... but, if you change, you must change the icmp
1881                  * checksum *again*, to reflect that change.
1882                  *
1883                  * To further complicate: the TCP checksum is not in the first
1884                  * 8 bytes of the offending ip packet, so it most likely is not
1885                  * available. Some OSses like Solaris return enough bytes to
1886                  * include the TCP checksum. So we have to check if the
1887                  * ip->ip_len actually holds the TCP checksum of the oip!
1888                  */
1889
1890                 if (nat->nat_oport == tcp->th_dport) {
1891                         if (tcp->th_sport != nat->nat_inport) {
1892                                 /*
1893                                  * Fix ICMP checksum to compensate port
1894                                  * adjustment.
1895                                  */
1896                                 sum1 = ntohs(tcp->th_sport);
1897                                 sum2 = ntohs(nat->nat_inport);
1898                                 CALC_SUMD(sum1, sum2, sumd);
1899                                 sumd2 += sumd;
1900                                 tcp->th_sport = nat->nat_inport;
1901
1902                                 /*
1903                                  * Fix udp checksum to compensate port
1904                                  * adjustment.  NOTE : the offending IP packet
1905                                  * flows the other direction compared to the
1906                                  * ICMP message.
1907                                  *
1908                                  * The UDP checksum is optional, only adjust
1909                                  * it if it has been set.
1910                                  */
1911                                 if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1912
1913                                         sum1 = ntohs(udp->uh_sum);
1914                                         fix_datacksum(&udp->uh_sum, sumd);
1915                                         sum2 = ntohs(udp->uh_sum);
1916
1917                                         /*
1918                                          * Fix ICMP checksum to 
1919                                          * compensate UDP checksum 
1920                                          * adjustment.
1921                                          */
1922                                         CALC_SUMD(sum1, sum2, sumd);
1923                                         sumd2 += sumd;
1924                                 }
1925
1926                                 /*
1927                                  * Fix tcp checksum (if present) to compensate
1928                                  * port adjustment. NOTE : the offending IP
1929                                  * packet flows the other direction compared to
1930                                  * the ICMP message.
1931                                  */
1932                                 if (oip->ip_p == IPPROTO_TCP && dlen >= 18) {
1933
1934                                         sum1 = ntohs(tcp->th_sum);
1935                                         fix_datacksum(&tcp->th_sum, sumd);
1936                                         sum2 = ntohs(tcp->th_sum);
1937
1938                                         /*
1939                                          * Fix ICMP checksum to 
1940                                          * compensate TCP checksum 
1941                                          * adjustment.
1942                                          */
1943                                         CALC_SUMD(sum1, sum2, sumd);
1944                                         sumd2 += sumd;
1945                                 }
1946                         }
1947                 } else {
1948                         if (tcp->th_dport != nat->nat_outport) {
1949                                 /*
1950                                  * Fix ICMP checksum to compensate port
1951                                  * adjustment.
1952                                  */
1953                                 sum1 = ntohs(tcp->th_dport);
1954                                 sum2 = ntohs(nat->nat_outport);
1955                                 CALC_SUMD(sum1, sum2, sumd);
1956                                 sumd2 += sumd;
1957                                 tcp->th_dport = nat->nat_outport;
1958
1959                                 /*
1960                                  * Fix udp checksum to compensate port
1961                                  * adjustment.   NOTE : the offending IP
1962                                  * packet flows the other direction compared
1963                                  * to the ICMP message.
1964                                  *
1965                                  * The UDP checksum is optional, only adjust
1966                                  * it if it has been set.
1967                                  */
1968                                 if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1969
1970                                         sum1 = ntohs(udp->uh_sum);
1971                                         fix_datacksum(&udp->uh_sum, sumd);
1972                                         sum2 = ntohs(udp->uh_sum);
1973
1974                                         /*
1975                                          * Fix ICMP checksum to compensate
1976                                          * UDP checksum adjustment.
1977                                          */
1978                                         CALC_SUMD(sum1, sum2, sumd);
1979                                         sumd2 += sumd;
1980                                 }
1981
1982                                 /*
1983                                  * Fix tcp checksum (if present) to compensate
1984                                  * port adjustment. NOTE : the offending IP
1985                                  * packet flows the other direction compared to
1986                                  * the ICMP message.
1987                                  */
1988                                 if (oip->ip_p == IPPROTO_TCP && dlen >= 18) {
1989
1990                                         sum1 = ntohs(tcp->th_sum);
1991                                         fix_datacksum(&tcp->th_sum, sumd);
1992                                         sum2 = ntohs(tcp->th_sum);
1993
1994                                         /*
1995                                          * Fix ICMP checksum to compensate
1996                                          * UDP checksum adjustment.
1997                                          */
1998                                         CALC_SUMD(sum1, sum2, sumd);
1999                                         sumd2 += sumd;
2000                                 }
2001                         }
2002                 }
2003                 if (sumd2) {
2004                         sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
2005                         sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
2006                         if (nat->nat_dir == NAT_OUTBOUND) {
2007                                 fix_outcksum(fin, &icmp->icmp_cksum, sumd2);
2008                         } else {
2009                                 fix_incksum(fin, &icmp->icmp_cksum, sumd2);
2010                         }
2011                 }
2012         }
2013         if (oip->ip_p == IPPROTO_ICMP)
2014                 nat->nat_age = fr_defnaticmpage;
2015         return nat;
2016 }
2017
2018
2019 /*
2020  * NB: these lookups don't lock access to the list, it assume it has already
2021  * been done!
2022  */
2023 /*
2024  * Lookup a nat entry based on the mapped destination ip address/port and
2025  * real source address/port.  We use this lookup when receiving a packet,
2026  * we're looking for a table entry, based on the destination address.
2027  * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.
2028  */
2029 nat_t *nat_inlookup(fin, flags, p, src, mapdst, rw)
2030 fr_info_t *fin;
2031 register u_int flags, p;
2032 struct in_addr src , mapdst;
2033 int rw;
2034 {
2035         register u_short sport, dport;
2036         register nat_t *nat;
2037         register int nflags;
2038         register u_32_t dst;
2039         ipnat_t *ipn;
2040         void *ifp;
2041         u_int hv;
2042
2043         if (fin != NULL)
2044                 ifp = fin->fin_ifp;
2045         else
2046                 ifp = NULL;
2047         dst = mapdst.s_addr;
2048         if (flags & IPN_TCPUDP) {
2049                 sport = htons(fin->fin_data[0]);
2050                 dport = htons(fin->fin_data[1]);
2051         } else {
2052                 sport = 0;
2053                 dport = 0;
2054         }
2055
2056         hv = NAT_HASH_FN(dst, dport, 0xffffffff);
2057         hv = NAT_HASH_FN(src.s_addr, hv + sport, ipf_nattable_sz);
2058         nat = nat_table[1][hv];
2059         for (; nat; nat = nat->nat_hnext[1]) {
2060                 nflags = nat->nat_flags;
2061                 if ((!ifp || ifp == nat->nat_ifp) &&
2062                     nat->nat_oip.s_addr == src.s_addr &&
2063                     nat->nat_outip.s_addr == dst &&
2064                     ((p == 0) || (p == nat->nat_p))) {
2065                         switch (p)
2066                         {
2067                         case IPPROTO_TCP :
2068                         case IPPROTO_UDP :
2069                                 if (nat->nat_oport != sport)
2070                                         continue;
2071                                 if (nat->nat_outport != dport)
2072                                         continue;
2073                                 break;
2074                         default :
2075                                 break;
2076                         }
2077
2078                         ipn = nat->nat_ptr;
2079                         if ((ipn != NULL) && (nat->nat_aps != NULL))
2080                                 if (appr_match(fin, nat) != 0)
2081                                         continue;
2082                         return nat;
2083                 }
2084         }
2085         if (!nat_stats.ns_wilds || !(flags & FI_WILDP))
2086                 return NULL;
2087         if (!rw) {
2088                 RWLOCK_EXIT(&ipf_nat);
2089         }
2090         hv = NAT_HASH_FN(dst, 0, 0xffffffff);
2091         hv = NAT_HASH_FN(src.s_addr, dst, ipf_nattable_sz);
2092         if (!rw) {
2093                 WRITE_ENTER(&ipf_nat);
2094         }
2095         nat = nat_table[1][hv];
2096         for (; nat; nat = nat->nat_hnext[1]) {
2097                 nflags = nat->nat_flags;
2098                 if (ifp && ifp != nat->nat_ifp)
2099                         continue;
2100                 if (!(nflags & FI_WILDP))
2101                         continue;
2102                 if (nat->nat_oip.s_addr != src.s_addr ||
2103                     nat->nat_outip.s_addr != dst)
2104                         continue;
2105                 if (((nat->nat_oport == sport) || (nflags & FI_W_DPORT)) &&
2106                     ((nat->nat_outport == dport) || (nflags & FI_W_SPORT))) {
2107                         nat_tabmove(fin, nat);
2108                         break;
2109                 }
2110         }
2111         if (!rw) {
2112                 MUTEX_DOWNGRADE(&ipf_nat);
2113         }
2114         return nat;
2115 }
2116
2117
2118 /*
2119  * This function is only called for TCP/UDP NAT table entries where the
2120  * original was placed in the table without hashing on the ports and we now
2121  * want to include hashing on port numbers.
2122  */
2123 static void nat_tabmove(fin, nat)
2124 fr_info_t *fin;
2125 nat_t *nat;
2126 {
2127         register u_short sport, dport;
2128         u_int hv, nflags;
2129         nat_t **natp;
2130
2131         nflags = nat->nat_flags;
2132
2133         sport = ntohs(fin->fin_data[0]);
2134         dport = ntohs(fin->fin_data[1]);
2135
2136         /*
2137          * Remove the NAT entry from the old location
2138          */
2139         if (nat->nat_hnext[0])
2140                 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
2141         *nat->nat_phnext[0] = nat->nat_hnext[0];
2142
2143         if (nat->nat_hnext[1])
2144                 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
2145         *nat->nat_phnext[1] = nat->nat_hnext[1];
2146
2147         /*
2148          * Add into the NAT table in the new position
2149          */
2150         hv = NAT_HASH_FN(nat->nat_inip.s_addr, sport, 0xffffffff);
2151         hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + dport, ipf_nattable_sz);
2152         natp = &nat_table[0][hv];
2153         if (*natp)
2154                 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2155         nat->nat_phnext[0] = natp;
2156         nat->nat_hnext[0] = *natp;
2157         *natp = nat;
2158
2159         hv = NAT_HASH_FN(nat->nat_outip.s_addr, sport, 0xffffffff);
2160         hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + dport, ipf_nattable_sz);
2161         natp = &nat_table[1][hv];
2162         if (*natp)
2163                 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2164         nat->nat_phnext[1] = natp;
2165         nat->nat_hnext[1] = *natp;
2166         *natp = nat;
2167 }
2168
2169
2170 /*
2171  * Lookup a nat entry based on the source 'real' ip address/port and
2172  * destination address/port.  We use this lookup when sending a packet out,
2173  * we're looking for a table entry, based on the source address.
2174  * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.
2175  */
2176 nat_t *nat_outlookup(fin, flags, p, src, dst, rw)
2177 fr_info_t *fin;
2178 register u_int flags, p;
2179 struct in_addr src , dst;
2180 int rw;
2181 {
2182         register u_short sport, dport;
2183         register nat_t *nat;
2184         register int nflags;
2185         ipnat_t *ipn;
2186         u_32_t srcip;
2187         void *ifp;
2188         u_int hv;
2189
2190         ifp = fin->fin_ifp;
2191         srcip = src.s_addr;
2192         if (flags & IPN_TCPUDP) {
2193                 sport = ntohs(fin->fin_data[0]);
2194                 dport = ntohs(fin->fin_data[1]);
2195         } else {
2196                 sport = 0;
2197                 dport = 0;
2198         }
2199
2200         hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
2201         hv = NAT_HASH_FN(dst.s_addr, hv + dport, ipf_nattable_sz);
2202         nat = nat_table[0][hv];
2203         for (; nat; nat = nat->nat_hnext[0]) {
2204                 nflags = nat->nat_flags;
2205
2206                 if ((!ifp || ifp == nat->nat_ifp) &&
2207                     nat->nat_inip.s_addr == srcip &&
2208                     nat->nat_oip.s_addr == dst.s_addr &&
2209                     ((p == 0) || (p == nat->nat_p))) {
2210                         switch (p)
2211                         {
2212                         case IPPROTO_TCP :
2213                         case IPPROTO_UDP :
2214                                 if (nat->nat_oport != dport)
2215                                         continue;
2216                                 if (nat->nat_inport != sport)
2217                                         continue;
2218                                 break;
2219                         default :
2220                                 break;
2221                         }
2222
2223                         ipn = nat->nat_ptr;
2224                         if ((ipn != NULL) && (nat->nat_aps != NULL))
2225                                 if (appr_match(fin, nat) != 0)
2226                                         continue;
2227                         return nat;
2228                 }
2229         }
2230         if (!nat_stats.ns_wilds || !(flags & FI_WILDP))
2231                 return NULL;
2232         if (!rw) {
2233                 RWLOCK_EXIT(&ipf_nat);
2234         }
2235
2236         hv = NAT_HASH_FN(dst.s_addr, srcip, ipf_nattable_sz);
2237         if (!rw) {
2238                 WRITE_ENTER(&ipf_nat);
2239         }
2240         nat = nat_table[0][hv];
2241         for (; nat; nat = nat->nat_hnext[0]) {
2242                 nflags = nat->nat_flags;
2243                 if (ifp && ifp != nat->nat_ifp)
2244                         continue;
2245                 if (!(nflags & FI_WILDP))
2246                         continue;
2247                 if ((nat->nat_inip.s_addr != srcip) ||
2248                     (nat->nat_oip.s_addr != dst.s_addr))
2249                         continue;
2250                 if (((nat->nat_inport == sport) || (nflags & FI_W_SPORT)) &&
2251                     ((nat->nat_oport == dport) || (nflags & FI_W_DPORT))) {
2252                         nat_tabmove(fin, nat);
2253                         break;
2254                 }
2255         }
2256         if (!rw) {
2257                 MUTEX_DOWNGRADE(&ipf_nat);
2258         }
2259         return nat;
2260 }
2261
2262
2263 /*
2264  * Lookup the NAT tables to search for a matching redirect
2265  */
2266 nat_t *nat_lookupredir(np)
2267 register natlookup_t *np;
2268 {
2269         nat_t *nat;
2270         fr_info_t fi;
2271
2272         bzero((char *)&fi, sizeof(fi));
2273         fi.fin_data[0] = ntohs(np->nl_inport);
2274         fi.fin_data[1] = ntohs(np->nl_outport);
2275
2276         /*
2277          * If nl_inip is non null, this is a lookup based on the real
2278          * ip address. Else, we use the fake.
2279          */
2280         if ((nat = nat_outlookup(&fi, np->nl_flags, 0, np->nl_inip,
2281                                  np->nl_outip, 0))) {
2282                 np->nl_realip = nat->nat_outip;
2283                 np->nl_realport = nat->nat_outport;
2284         }
2285         return nat;
2286 }
2287
2288
2289 static int nat_match(fin, np, ip)
2290 fr_info_t *fin;
2291 ipnat_t *np;
2292 ip_t *ip;
2293 {
2294         frtuc_t *ft;
2295
2296         if (ip->ip_v != 4)
2297                 return 0;
2298
2299         if (np->in_p && fin->fin_p != np->in_p)
2300                 return 0;
2301         if (fin->fin_out) {
2302                 if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
2303                         return 0;
2304                 if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
2305                     ^ ((np->in_flags & IPN_NOTSRC) != 0))
2306                         return 0;
2307                 if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
2308                     ^ ((np->in_flags & IPN_NOTDST) != 0))
2309                         return 0;
2310         } else {
2311                 if (!(np->in_redir & NAT_REDIRECT))
2312                         return 0;
2313                 if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
2314                     ^ ((np->in_flags & IPN_NOTSRC) != 0))
2315                         return 0;
2316                 if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
2317                     ^ ((np->in_flags & IPN_NOTDST) != 0))
2318                         return 0;
2319         }
2320
2321         ft = &np->in_tuc;
2322         if (!(fin->fin_fl & FI_TCPUDP) ||
2323             (fin->fin_fl & FI_SHORT) || (fin->fin_off != 0)) {
2324                 if (ft->ftu_scmp || ft->ftu_dcmp)
2325                         return 0;
2326                 return 1;
2327         }
2328
2329         return fr_tcpudpchk(ft, fin);
2330 }
2331
2332
2333 /*
2334  * Packets going out on the external interface go through this.
2335  * Here, the source address requires alteration, if anything.
2336  */
2337 int ip_natout(ip, fin)
2338 ip_t *ip;
2339 fr_info_t *fin;
2340 {
2341         register ipnat_t *np = NULL;
2342         register u_32_t ipa;
2343         tcphdr_t *tcp = NULL;
2344         u_short sport = 0, dport = 0, *csump = NULL;
2345         int natadd = 1, i, icmpset = 1;
2346         u_int nflags = 0, hv, msk;
2347         struct ifnet *ifp;
2348         frentry_t *fr;
2349         void *sifp;
2350         u_32_t iph;
2351         nat_t *nat;
2352
2353         if (nat_list == NULL || (fr_nat_lock))
2354                 return 0;
2355
2356         if ((fr = fin->fin_fr) && !(fr->fr_flags & FR_DUP) &&
2357             fr->fr_tif.fd_ifp && fr->fr_tif.fd_ifp != (void *)-1) {
2358                 sifp = fin->fin_ifp;
2359                 fin->fin_ifp = fr->fr_tif.fd_ifp;
2360         } else
2361                 sifp = fin->fin_ifp;
2362         ifp = fin->fin_ifp;
2363
2364         if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2365                 if (fin->fin_p == IPPROTO_TCP)
2366                         nflags = IPN_TCP;
2367                 else if (fin->fin_p == IPPROTO_UDP)
2368                         nflags = IPN_UDP;
2369                 if ((nflags & IPN_TCPUDP)) {
2370                         tcp = (tcphdr_t *)fin->fin_dp;
2371                         sport = tcp->th_sport;
2372                         dport = tcp->th_dport;
2373                 }
2374         }
2375
2376         ipa = fin->fin_saddr;
2377
2378         READ_ENTER(&ipf_nat);
2379
2380         if ((fin->fin_p == IPPROTO_ICMP) &&
2381             (nat = nat_icmp(ip, fin, &nflags, NAT_OUTBOUND)))
2382                 icmpset = 1;
2383         else if ((fin->fin_fl & FI_FRAG) &&
2384             (nat = ipfr_nat_knownfrag(ip, fin)))
2385                 natadd = 0;
2386         else if ((nat = nat_outlookup(fin, nflags|FI_WILDP|FI_WILDA,
2387                                       (u_int)fin->fin_p, fin->fin_src,
2388                                       fin->fin_dst, 0))) {
2389                 nflags = nat->nat_flags;
2390                 if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) {
2391                         if ((nflags & FI_W_SPORT) &&
2392                             (nat->nat_inport != sport))
2393                                 nat->nat_inport = sport;
2394                         if ((nflags & FI_W_DPORT) &&
2395                             (nat->nat_oport != dport))
2396                                 nat->nat_oport = dport;
2397
2398                         if (nat->nat_outport == 0)
2399                                 nat->nat_outport = sport;
2400                         nat->nat_flags &= ~(FI_W_DPORT|FI_W_SPORT);
2401                         nflags = nat->nat_flags;
2402                         nat_stats.ns_wilds--;
2403                 }
2404         } else {
2405                 RWLOCK_EXIT(&ipf_nat);
2406
2407                 msk = 0xffffffff;
2408                 i = 32;
2409
2410                 WRITE_ENTER(&ipf_nat);
2411                 /*
2412                  * If there is no current entry in the nat table for this IP#,
2413                  * create one for it (if there is a matching rule).
2414                  */
2415 maskloop:
2416                 iph = ipa & htonl(msk);
2417                 hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz);
2418                 for (np = nat_rules[hv]; np; np = np->in_mnext)
2419                 {
2420                         if (np->in_ifp && (np->in_ifp != ifp))
2421                                 continue;
2422                         if ((np->in_flags & IPN_RF) &&
2423                             !(np->in_flags & nflags))
2424                                 continue;
2425                         if (np->in_flags & IPN_FILTER) {
2426                                 if (!nat_match(fin, np, ip))
2427                                         continue;
2428                         } else if ((ipa & np->in_inmsk) != np->in_inip)
2429                                 continue;
2430                         if (*np->in_plabel && !appr_ok(ip, tcp, np))
2431                                 continue;
2432                         nat = nat_new(fin, ip, np, NULL,
2433                                       (u_int)nflags, NAT_OUTBOUND);
2434                         if (nat != NULL) {
2435                                 np->in_hits++;
2436                                 break;
2437                         }
2438                 }
2439                 if ((np == NULL) && (i > 0)) {
2440                         do {
2441                                 i--;
2442                                 msk <<= 1;
2443                         } while ((i >= 0) && ((nat_masks & (1 << i)) == 0));
2444                         if (i >= 0)
2445                                 goto maskloop;
2446                 }
2447                 MUTEX_DOWNGRADE(&ipf_nat);
2448         }
2449
2450         /*
2451          * NOTE: ipf_nat must now only be held as a read lock
2452          */
2453         if (nat) {
2454                 np = nat->nat_ptr;
2455                 if (natadd && (fin->fin_fl & FI_FRAG) && np)
2456                         ipfr_nat_newfrag(ip, fin, nat);
2457                 MUTEX_ENTER(&nat->nat_lock);
2458                 if (fin->fin_p != IPPROTO_TCP) {
2459                         if (np && np->in_age[1])
2460                                 nat->nat_age = np->in_age[1];
2461                         else if (!icmpset && (fin->fin_p == IPPROTO_ICMP))
2462                                 nat->nat_age = fr_defnaticmpage;
2463                         else
2464                                 nat->nat_age = fr_defnatage;
2465                 }
2466                 nat->nat_bytes += ip->ip_len;
2467                 nat->nat_pkts++;
2468                 MUTEX_EXIT(&nat->nat_lock);
2469
2470                 /*
2471                  * Fix up checksums, not by recalculating them, but
2472                  * simply computing adjustments.
2473                  */
2474                 if (nflags == IPN_ICMPERR) {
2475                         u_32_t s1, s2, sumd;
2476
2477                         s1 = LONG_SUM(ntohl(fin->fin_saddr));
2478                         s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
2479                         CALC_SUMD(s1, s2, sumd);
2480
2481                         if (nat->nat_dir == NAT_OUTBOUND)
2482                                 fix_outcksum(fin, &ip->ip_sum, sumd);
2483                         else
2484                                 fix_incksum(fin, &ip->ip_sum, sumd);
2485                 }
2486 #if (SOLARIS || defined(__sgi)) && defined(_KERNEL)
2487                 else {
2488                         if (nat->nat_dir == NAT_OUTBOUND)
2489                                 fix_outcksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2490                         else
2491                                 fix_incksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2492                 }
2493 #endif
2494                 /*
2495                  * Only change the packet contents, not what is filtered upon.
2496                  */
2497                 ip->ip_src = nat->nat_outip;
2498
2499                 if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2500
2501                         if ((nat->nat_outport != 0) && (tcp != NULL)) {
2502                                 tcp->th_sport = nat->nat_outport;
2503                                 fin->fin_data[0] = ntohs(tcp->th_sport);
2504                         }
2505
2506                         if (fin->fin_p == IPPROTO_TCP) {
2507                                 csump = &tcp->th_sum;
2508                                 MUTEX_ENTER(&nat->nat_lock);
2509                                 fr_tcp_age(&nat->nat_age,
2510                                            nat->nat_tcpstate, fin, 1, 0);
2511                                 if (nat->nat_age < fr_defnaticmpage)
2512                                         nat->nat_age = fr_defnaticmpage;
2513 #ifdef LARGE_NAT
2514                                 else if (nat->nat_age > fr_defnatage)
2515                                         nat->nat_age = fr_defnatage;
2516 #endif
2517                                 /*
2518                                  * Increase this because we may have
2519                                  * "keep state" following this too and
2520                                  * packet storms can occur if this is
2521                                  * removed too quickly.
2522                                  */
2523                                 if (nat->nat_age == fr_tcpclosed)
2524                                         nat->nat_age = fr_tcplastack;
2525
2526                                 /*
2527                                  * Do a MSS CLAMPING on a SYN packet,
2528                                  * only deal IPv4 for now.
2529                                  */
2530                                 if (nat->nat_mssclamp &&
2531                                     (tcp->th_flags & TH_SYN) != 0)
2532                                         nat_mssclamp(tcp, nat->nat_mssclamp,
2533                                                      fin, csump);
2534
2535                                 MUTEX_EXIT(&nat->nat_lock);
2536                         } else if (fin->fin_p == IPPROTO_UDP) {
2537                                 udphdr_t *udp = (udphdr_t *)tcp;
2538
2539                                 if (udp->uh_sum)
2540                                         csump = &udp->uh_sum;
2541                         }
2542
2543                         if (csump) {
2544                                 if (nat->nat_dir == NAT_OUTBOUND)
2545                                         fix_outcksum(fin, csump,
2546                                                      nat->nat_sumd[1]);
2547                                 else
2548                                         fix_incksum(fin, csump,
2549                                                     nat->nat_sumd[1]);
2550                         }
2551                 }
2552
2553                 if (np && (np->in_apr != NULL) && (np->in_dport == 0 ||
2554                      (tcp != NULL && dport == np->in_dport))) {
2555                         i = appr_check(ip, fin, nat);
2556                         if (i == 0)
2557                                 i = 1;
2558                         else if (i == -1)
2559                                 nat->nat_drop[1]++;
2560                 } else
2561                         i = 1;
2562                 ATOMIC_INCL(nat_stats.ns_mapped[1]);
2563                 RWLOCK_EXIT(&ipf_nat);  /* READ */
2564                 fin->fin_ifp = sifp;
2565                 return i;
2566         }
2567         RWLOCK_EXIT(&ipf_nat);                  /* READ/WRITE */
2568         fin->fin_ifp = sifp;
2569         return 0;
2570 }
2571
2572
2573 /*
2574  * Packets coming in from the external interface go through this.
2575  * Here, the destination address requires alteration, if anything.
2576  */
2577 int ip_natin(ip, fin)
2578 ip_t *ip;
2579 fr_info_t *fin;
2580 {
2581         register struct in_addr src;
2582         register struct in_addr in;
2583         register ipnat_t *np;
2584         u_short sport = 0, dport = 0, *csump = NULL;
2585         u_int nflags = 0, natadd = 1, hv, msk;
2586         struct ifnet *ifp = fin->fin_ifp;
2587         tcphdr_t *tcp = NULL;
2588         int i, icmpset = 0;
2589         nat_t *nat;
2590         u_32_t iph;
2591
2592         if ((nat_list == NULL) || (ip->ip_v != 4) || (fr_nat_lock))
2593                 return 0;
2594
2595         if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2596                 if (fin->fin_p == IPPROTO_TCP)
2597                         nflags = IPN_TCP;
2598                 else if (fin->fin_p == IPPROTO_UDP)
2599                         nflags = IPN_UDP;
2600                 if ((nflags & IPN_TCPUDP)) {
2601                         tcp = (tcphdr_t *)fin->fin_dp;
2602                         sport = tcp->th_sport;
2603                         dport = tcp->th_dport;
2604                 }
2605         }
2606
2607         in = fin->fin_dst;
2608         /* make sure the source address is to be redirected */
2609         src = fin->fin_src;
2610
2611         READ_ENTER(&ipf_nat);
2612
2613         if ((fin->fin_p == IPPROTO_ICMP) &&
2614             (nat = nat_icmp(ip, fin, &nflags, NAT_INBOUND)))
2615                 icmpset = 1;
2616         else if ((fin->fin_fl & FI_FRAG) &&
2617                  (nat = ipfr_nat_knownfrag(ip, fin)))
2618                 natadd = 0;
2619         else if ((nat = nat_inlookup(fin, nflags|FI_WILDP|FI_WILDA,
2620                                      (u_int)fin->fin_p, fin->fin_src, in, 0))) {
2621                 nflags = nat->nat_flags;
2622                 if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) {
2623                         if ((nat->nat_oport != sport) && (nflags & FI_W_DPORT))
2624                                 nat->nat_oport = sport;
2625                         if ((nat->nat_outport != dport) &&
2626                                  (nflags & FI_W_SPORT))
2627                                 nat->nat_outport = dport;
2628                         nat->nat_flags &= ~(FI_W_SPORT|FI_W_DPORT);
2629                         nflags = nat->nat_flags;
2630                         nat_stats.ns_wilds--;
2631                 }
2632         } else {
2633                 RWLOCK_EXIT(&ipf_nat);
2634
2635                 msk = 0xffffffff;
2636                 i = 32;
2637
2638                 WRITE_ENTER(&ipf_nat);
2639                 /*
2640                  * If there is no current entry in the nat table for this IP#,
2641                  * create one for it (if there is a matching rule).
2642                  */
2643 maskloop:
2644                 iph = in.s_addr & htonl(msk);
2645                 hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz);
2646                 for (np = rdr_rules[hv]; np; np = np->in_rnext) {
2647                         if ((np->in_ifp && (np->in_ifp != ifp)) ||
2648                             (np->in_p && (np->in_p != fin->fin_p)) ||
2649                             (np->in_flags && !(nflags & np->in_flags)))
2650                                 continue;
2651                         if (np->in_flags & IPN_FILTER) {
2652                                 if (!nat_match(fin, np, ip))
2653                                         continue;
2654                         } else if ((in.s_addr & np->in_outmsk) != np->in_outip)
2655                                 continue;
2656                         if ((!np->in_pmin || (np->in_flags & IPN_FILTER) ||
2657                              ((ntohs(np->in_pmax) >= ntohs(dport)) &&
2658                               (ntohs(dport) >= ntohs(np->in_pmin)))))
2659                                 if ((nat = nat_new(fin, ip, np, NULL, nflags,
2660                                                     NAT_INBOUND))) {
2661                                         np->in_hits++;
2662                                         break;
2663                                 }
2664                 }
2665
2666                 if ((np == NULL) && (i > 0)) {
2667                         do {
2668                                 i--;
2669                                 msk <<= 1;
2670                         } while ((i >= 0) && ((rdr_masks & (1 << i)) == 0));
2671                         if (i >= 0)
2672                                 goto maskloop;
2673                 }
2674                 MUTEX_DOWNGRADE(&ipf_nat);
2675         }
2676
2677         /*
2678          * NOTE: ipf_nat must now only be held as a read lock
2679          */
2680         if (nat) {
2681                 np = nat->nat_ptr;
2682                 fin->fin_fr = nat->nat_fr;
2683                 if (natadd && (fin->fin_fl & FI_FRAG) && np)
2684                         ipfr_nat_newfrag(ip, fin, nat);
2685                 if (np && (np->in_apr != NULL) && (np->in_dport == 0 ||
2686                      (tcp != NULL && sport == np->in_dport))) {
2687                         i = appr_check(ip, fin, nat);
2688                         if (i == -1) {
2689                                 nat->nat_drop[0]++;
2690                                 RWLOCK_EXIT(&ipf_nat);
2691                                 return i;
2692                         }
2693                 }
2694
2695                 MUTEX_ENTER(&nat->nat_lock);
2696                 if (fin->fin_p != IPPROTO_TCP) {
2697                         if (np && np->in_age[0])
2698                                 nat->nat_age = np->in_age[0];
2699                         else if (!icmpset && (fin->fin_p == IPPROTO_ICMP))
2700                                 nat->nat_age = fr_defnaticmpage;
2701                         else
2702                                 nat->nat_age = fr_defnatage;
2703                 }
2704                 nat->nat_bytes += ip->ip_len;
2705                 nat->nat_pkts++;
2706                 MUTEX_EXIT(&nat->nat_lock);
2707                 ip->ip_dst = nat->nat_inip;
2708                 fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
2709
2710                 /*
2711                  * Fix up checksums, not by recalculating them, but
2712                  * simply computing adjustments.
2713                  */
2714 #if (SOLARIS || defined(__sgi)) && defined(_KERNEL)
2715                 if (nat->nat_dir == NAT_OUTBOUND)
2716                         fix_incksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2717                 else
2718                         fix_outcksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2719 #endif
2720                 if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2721
2722                         if ((nat->nat_inport != 0) && (tcp != NULL)) {
2723                                 tcp->th_dport = nat->nat_inport;
2724                                 fin->fin_data[1] = ntohs(tcp->th_dport);
2725                         }
2726
2727                         if (fin->fin_p == IPPROTO_TCP) {
2728                                 csump = &tcp->th_sum;
2729                                 MUTEX_ENTER(&nat->nat_lock);
2730                                 fr_tcp_age(&nat->nat_age,
2731                                            nat->nat_tcpstate, fin, 0, 0);
2732                                 if (nat->nat_age < fr_defnaticmpage)
2733                                         nat->nat_age = fr_defnaticmpage;
2734 #ifdef LARGE_NAT
2735                                 else if (nat->nat_age > fr_defnatage)
2736                                         nat->nat_age = fr_defnatage;
2737 #endif
2738                                 /*
2739                                  * Increase this because we may have
2740                                  * "keep state" following this too and
2741                                  * packet storms can occur if this is
2742                                  * removed too quickly.
2743                                  */
2744                                 if (nat->nat_age == fr_tcpclosed)
2745                                         nat->nat_age = fr_tcplastack;
2746                                 /*
2747                                  * Do a MSS CLAMPING on a SYN packet,
2748                                  * only deal IPv4 for now.
2749                                  */
2750                                 if (nat->nat_mssclamp &&
2751                                     (tcp->th_flags & TH_SYN) != 0)
2752                                         nat_mssclamp(tcp, nat->nat_mssclamp,
2753                                                      fin, csump);
2754
2755                                 MUTEX_EXIT(&nat->nat_lock);
2756                         } else if (fin->fin_p == IPPROTO_UDP) {
2757                                 udphdr_t *udp = (udphdr_t *)tcp;
2758
2759                                 if (udp->uh_sum)
2760                                         csump = &udp->uh_sum;
2761                         }
2762
2763                         if (csump) {
2764                                 if (nat->nat_dir == NAT_OUTBOUND)
2765                                         fix_incksum(fin, csump,
2766                                                     nat->nat_sumd[0]);
2767                                 else
2768                                         fix_outcksum(fin, csump,
2769                                                     nat->nat_sumd[0]);
2770                         }
2771                 }
2772                 ATOMIC_INCL(nat_stats.ns_mapped[0]);
2773                 RWLOCK_EXIT(&ipf_nat);                  /* READ */
2774                 return 1;
2775         }
2776         RWLOCK_EXIT(&ipf_nat);                  /* READ/WRITE */
2777         return 0;
2778 }
2779
2780
2781 /*
2782  * Free all memory used by NAT structures allocated at runtime.
2783  */
2784 void ip_natunload()
2785 {
2786         WRITE_ENTER(&ipf_nat);
2787         (void) nat_clearlist();
2788         (void) nat_flushtable();
2789         RWLOCK_EXIT(&ipf_nat);
2790
2791         if (nat_table[0] != NULL) {
2792                 KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz);
2793                 nat_table[0] = NULL;
2794         }
2795         if (nat_table[1] != NULL) {
2796                 KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz);
2797                 nat_table[1] = NULL;
2798         }
2799         if (nat_rules != NULL) {
2800                 KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz);
2801                 nat_rules = NULL;
2802         }
2803         if (rdr_rules != NULL) {
2804                 KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz);
2805                 rdr_rules = NULL;
2806         }
2807         if (maptable != NULL) {
2808                 KFREES(maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
2809                 maptable = NULL;
2810         }
2811 }
2812
2813
2814 /*
2815  * Slowly expire held state for NAT entries.  Timeouts are set in
2816  * expectation of this being called twice per second.
2817  */
2818 void ip_natexpire()
2819 {
2820         register struct nat *nat, **natp;
2821 #if defined(_KERNEL) && !SOLARIS
2822         int s;
2823 #endif
2824
2825         SPL_NET(s);
2826         WRITE_ENTER(&ipf_nat);
2827         for (natp = &nat_instances; (nat = *natp); ) {
2828                 nat->nat_age--;
2829                 if (nat->nat_age) {
2830                         natp = &nat->nat_next;
2831                         continue;
2832                 }
2833                 *natp = nat->nat_next;
2834 #ifdef  IPFILTER_LOG
2835                 nat_log(nat, NL_EXPIRE);
2836 #endif
2837                 nat_delete(nat);
2838                 nat_stats.ns_expire++;
2839         }
2840         RWLOCK_EXIT(&ipf_nat);
2841         SPL_X(s);
2842 }
2843
2844
2845 /*
2846  */
2847 void ip_natsync(ifp)
2848 void *ifp;
2849 {
2850         register ipnat_t *n;
2851         register nat_t *nat;
2852         register u_32_t sum1, sum2, sumd;
2853         struct in_addr in;
2854         ipnat_t *np;
2855         void *ifp2;
2856 #if defined(_KERNEL) && !SOLARIS
2857         int s;
2858 #endif
2859
2860         /*
2861          * Change IP addresses for NAT sessions for any protocol except TCP
2862          * since it will break the TCP connection anyway.
2863          */
2864         SPL_NET(s);
2865         WRITE_ENTER(&ipf_nat);
2866         for (nat = nat_instances; nat; nat = nat->nat_next)
2867                 if (((ifp == NULL) || (ifp == nat->nat_ifp)) &&
2868                     !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr) &&
2869                     (np->in_outmsk == 0xffffffff) && !np->in_nip) {
2870                         ifp2 = nat->nat_ifp;
2871                         /*
2872                          * Change the map-to address to be the same as the
2873                          * new one.
2874                          */
2875                         sum1 = nat->nat_outip.s_addr;
2876                         if (fr_ifpaddr(4, ifp2, &in) != -1)
2877                                 nat->nat_outip = in;
2878                         sum2 = nat->nat_outip.s_addr;
2879
2880                         if (sum1 == sum2)
2881                                 continue;
2882                         /*
2883                          * Readjust the checksum adjustment to take into
2884                          * account the new IP#.
2885                          */
2886                         CALC_SUMD(sum1, sum2, sumd);
2887                         /* XXX - dont change for TCP when solaris does
2888                          * hardware checksumming.
2889                          */
2890                         sumd += nat->nat_sumd[0];
2891                         nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2892                         nat->nat_sumd[1] = nat->nat_sumd[0];
2893                 }
2894
2895         for (n = nat_list; (n != NULL); n = n->in_next)
2896                 if (n->in_ifp == ifp) {
2897                         n->in_ifp = (void *)GETUNIT(n->in_ifname, 4);
2898                         if (!n->in_ifp)
2899                                 n->in_ifp = (void *)-1;
2900                 }
2901         RWLOCK_EXIT(&ipf_nat);
2902         SPL_X(s);
2903 }
2904
2905
2906 #ifdef  IPFILTER_LOG
2907 void nat_log(nat, type)
2908 struct nat *nat;
2909 u_int type;
2910 {
2911         struct ipnat *np;
2912         struct natlog natl;
2913         void *items[1];
2914         size_t sizes[1];
2915         int rulen, types[1];
2916
2917         natl.nl_inip = nat->nat_inip;
2918         natl.nl_outip = nat->nat_outip;
2919         natl.nl_origip = nat->nat_oip;
2920         natl.nl_bytes = nat->nat_bytes;
2921         natl.nl_pkts = nat->nat_pkts;
2922         natl.nl_origport = nat->nat_oport;
2923         natl.nl_inport = nat->nat_inport;
2924         natl.nl_outport = nat->nat_outport;
2925         natl.nl_p = nat->nat_p;
2926         natl.nl_type = type;
2927         natl.nl_rule = -1;
2928 #ifndef LARGE_NAT
2929         if (nat->nat_ptr != NULL) {
2930                 for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++)
2931                         if (np == nat->nat_ptr) {
2932                                 natl.nl_rule = rulen;
2933                                 break;
2934                         }
2935         }
2936 #endif
2937         items[0] = &natl;
2938         sizes[0] = sizeof(natl);
2939         types[0] = 0;
2940
2941         (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1);
2942 }
2943 #endif
2944
2945
2946 #if defined(__OpenBSD__)
2947 void nat_ifdetach(ifp)
2948 void *ifp;
2949 {
2950         frsync();
2951         return;
2952 }
2953 #endif
2954
2955
2956 /*
2957  * Check for MSS option and clamp it if necessary.
2958  */
2959 static void nat_mssclamp(tcp, maxmss, fin, csump)
2960 tcphdr_t *tcp;
2961 u_32_t maxmss;
2962 fr_info_t *fin;
2963 u_short *csump;
2964 {
2965         u_char *cp, *ep, opt;
2966         int hlen, advance;
2967         u_32_t mss, sumd;
2968         u_short v;
2969
2970         hlen = tcp->th_off << 2;
2971         if (hlen > sizeof(*tcp)) {
2972                 cp = (u_char *)tcp + sizeof(*tcp);
2973                 ep = (u_char *)tcp + hlen;
2974
2975                 while (cp < ep) {
2976                         opt = cp[0];
2977                         if (opt == TCPOPT_EOL)
2978                                 break;
2979                         else if (opt == TCPOPT_NOP) {
2980                                 cp++;
2981                                 continue;
2982                         }
2983  
2984                         if (&cp[1] >= ep)
2985                                 break;
2986                         advance = cp[1];
2987                         if (&cp[advance] >= ep)
2988                                 break;
2989                         switch (opt) {
2990                         case TCPOPT_MAXSEG:
2991                                 if (advance != 4)
2992                                         break;
2993                                 bcopy(&cp[2], &v, sizeof(v));
2994                                 mss = ntohs(v);
2995                                 if (mss > maxmss) {
2996                                         v = htons(maxmss);
2997                                         bcopy(&v, &cp[2], sizeof(v));
2998                                         CALC_SUMD(mss, maxmss, sumd);
2999                                         fix_outcksum(fin, csump, sumd);
3000                                 }
3001                                 break;
3002                         default:
3003                                 /* ignore unknown options */
3004                                 break;
3005                         }
3006                     
3007                         cp += advance;  
3008                 }       
3009         }       
3010 }