Merge from vendor branch LIBSTDC++:
[dragonfly.git] / contrib / ipfilter / ip_nat.c
1 /*
2  * Copyright (C) 1995-2001 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Added redirect stuff and a LOT of bug fixes. (mcn@EnGarde.com)
7  */
8 #if defined(__FreeBSD__) && defined(KERNEL) && !defined(_KERNEL)
9 #define _KERNEL
10 #endif
11
12 #if defined(__sgi) && (IRIX > 602)
13 # include <sys/ptimers.h>
14 #endif
15 #include <sys/errno.h>
16 #include <sys/types.h>
17 #include <sys/param.h>
18 #include <sys/time.h>
19 #include <sys/file.h>
20 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
21     defined(_KERNEL)
22 # include "opt_ipfilter_log.h"
23 #endif
24 #if !defined(_KERNEL) && !defined(KERNEL)
25 # include <stdio.h>
26 # include <string.h>
27 # include <stdlib.h>
28 #endif
29 #if (defined(KERNEL) || defined(_KERNEL)) && (__FreeBSD_version >= 220000)
30 # include <sys/filio.h>
31 # include <sys/fcntl.h>
32 #else
33 # include <sys/ioctl.h>
34 #endif
35 #include <sys/fcntl.h>
36 #ifndef linux
37 # include <sys/protosw.h>
38 #endif
39 #include <sys/socket.h>
40 #if defined(_KERNEL) && !defined(linux)
41 # include <sys/systm.h>
42 #endif
43 #if !defined(__SVR4) && !defined(__svr4__)
44 # ifndef linux
45 #  include <sys/mbuf.h>
46 # endif
47 #else
48 # include <sys/filio.h>
49 # include <sys/byteorder.h>
50 # ifdef _KERNEL
51 #  include <sys/dditypes.h>
52 # endif
53 # include <sys/stream.h>
54 # include <sys/kmem.h>
55 #endif
56 #if __FreeBSD_version >= 300000
57 # include <sys/queue.h>
58 #endif
59 #include <net/if.h>
60 #if __FreeBSD_version >= 300000
61 # include <net/if_var.h>
62 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
63 #  include "opt_ipfilter.h"
64 # endif
65 #endif
66 #ifdef sun
67 # include <net/af.h>
68 #endif
69 #include <net/route.h>
70 #include <netinet/in.h>
71 #include <netinet/in_systm.h>
72 #include <netinet/ip.h>
73
74 #ifdef __sgi
75 # ifdef IFF_DRVRLOCK /* IRIX6 */
76 #include <sys/hashing.h>
77 #include <netinet/in_var.h>
78 # endif
79 #endif
80
81 #ifdef RFC1825
82 # include <vpn/md5.h>
83 # include <vpn/ipsec.h>
84 extern struct ifnet vpnif;
85 #endif
86
87 #ifndef linux
88 # include <netinet/ip_var.h>
89 # include <netinet/tcp_fsm.h>
90 #endif
91 #include <netinet/tcp.h>
92 #include <netinet/udp.h>
93 #include <netinet/ip_icmp.h>
94 #include "netinet/ip_compat.h"
95 #include <netinet/tcpip.h>
96 #include "netinet/ip_fil.h"
97 #include "netinet/ip_nat.h"
98 #include "netinet/ip_frag.h"
99 #include "netinet/ip_state.h"
100 #include "netinet/ip_proxy.h"
101 #if (__FreeBSD_version >= 300000)
102 # include <sys/malloc.h>
103 #endif
104 #ifndef MIN
105 # define        MIN(a,b)        (((a)<(b))?(a):(b))
106 #endif
107 #undef  SOCKADDR_IN
108 #define SOCKADDR_IN     struct sockaddr_in
109
110 #if !defined(lint)
111 static const char sccsid[] = "@(#)ip_nat.c      1.11 6/5/96 (C) 1995 Darren Reed";
112 static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.37.2.74 2002/12/06 11:40:21 darrenr Exp $";
113 #endif
114
115 nat_t   **nat_table[2] = { NULL, NULL },
116         *nat_instances = NULL;
117 ipnat_t *nat_list = NULL;
118 u_int   ipf_nattable_sz = NAT_TABLE_SZ;
119 u_int   ipf_natrules_sz = NAT_SIZE;
120 u_int   ipf_rdrrules_sz = RDR_SIZE;
121 u_int   ipf_hostmap_sz = HOSTMAP_SIZE;
122 u_32_t  nat_masks = 0;
123 u_32_t  rdr_masks = 0;
124 ipnat_t **nat_rules = NULL;
125 ipnat_t **rdr_rules = NULL;
126 hostmap_t       **maptable  = NULL;
127
128 u_long  fr_defnatage = DEF_NAT_AGE,
129         fr_defnaticmpage = 6;           /* 3 seconds */
130 natstat_t nat_stats;
131 int     fr_nat_lock = 0;
132 #if     (SOLARIS || defined(__sgi)) && defined(_KERNEL)
133 extern  kmutex_t        ipf_rw;
134 extern  KRWLOCK_T       ipf_nat;
135 #endif
136
137 static  int     nat_flushtable __P((void));
138 static  void    nat_addnat __P((struct ipnat *));
139 static  void    nat_addrdr __P((struct ipnat *));
140 static  void    nat_delete __P((struct nat *));
141 static  void    nat_delrdr __P((struct ipnat *));
142 static  void    nat_delnat __P((struct ipnat *));
143 static  int     fr_natgetent __P((caddr_t));
144 static  int     fr_natgetsz __P((caddr_t));
145 static  int     fr_natputent __P((caddr_t));
146 static  void    nat_tabmove __P((fr_info_t *, nat_t *));
147 static  int     nat_match __P((fr_info_t *, ipnat_t *, ip_t *));
148 static  hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
149                                     struct in_addr));
150 static  void    nat_hostmapdel __P((struct hostmap *));
151 static  void    nat_mssclamp __P((tcphdr_t *, u_32_t, fr_info_t *, u_short *));
152
153
154 int nat_init()
155 {
156         KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
157         if (nat_table[0] != NULL)
158                 bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *));
159         else
160                 return -1;
161
162         KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
163         if (nat_table[1] != NULL)
164                 bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *));
165         else
166                 return -1;
167
168         KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz);
169         if (nat_rules != NULL)
170                 bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *));
171         else
172                 return -1;
173
174         KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz);
175         if (rdr_rules != NULL)
176                 bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *));
177         else
178                 return -1;
179
180         KMALLOCS(maptable, hostmap_t **, sizeof(hostmap_t *) * ipf_hostmap_sz);
181         if (maptable != NULL)
182                 bzero((char *)maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
183         else
184                 return -1;
185         return 0;
186 }
187
188
189 static void nat_addrdr(n)
190 ipnat_t *n;
191 {
192         ipnat_t **np;
193         u_32_t j;
194         u_int hv;
195         int k;
196
197         k = countbits(n->in_outmsk);
198         if ((k >= 0) && (k != 32))
199                 rdr_masks |= 1 << k;
200         j = (n->in_outip & n->in_outmsk);
201         hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz);
202         np = rdr_rules + hv;
203         while (*np != NULL)
204                 np = &(*np)->in_rnext;
205         n->in_rnext = NULL;
206         n->in_prnext = np;
207         *np = n;
208 }
209
210
211 static void nat_addnat(n)
212 ipnat_t *n;
213 {
214         ipnat_t **np;
215         u_32_t j;
216         u_int hv;
217         int k;
218
219         k = countbits(n->in_inmsk);
220         if ((k >= 0) && (k != 32))
221                 nat_masks |= 1 << k;
222         j = (n->in_inip & n->in_inmsk);
223         hv = NAT_HASH_FN(j, 0, ipf_natrules_sz);
224         np = nat_rules + hv;
225         while (*np != NULL)
226                 np = &(*np)->in_mnext;
227         n->in_mnext = NULL;
228         n->in_pmnext = np;
229         *np = n;
230 }
231
232
233 static void nat_delrdr(n)
234 ipnat_t *n;
235 {
236         if (n->in_rnext)
237                 n->in_rnext->in_prnext = n->in_prnext;
238         *n->in_prnext = n->in_rnext;
239 }
240
241
242 static void nat_delnat(n)
243 ipnat_t *n;
244 {
245         if (n->in_mnext)
246                 n->in_mnext->in_pmnext = n->in_pmnext;
247         *n->in_pmnext = n->in_mnext;
248 }
249
250
251 /*
252  * check if an ip address has already been allocated for a given mapping that
253  * is not doing port based translation.
254  *
255  * Must be called with ipf_nat held as a write lock.
256  */
257 static struct hostmap *nat_hostmap(np, real, map)
258 ipnat_t *np;
259 struct in_addr real;
260 struct in_addr map;
261 {
262         hostmap_t *hm;
263         u_int hv;
264
265         hv = real.s_addr % HOSTMAP_SIZE;
266         for (hm = maptable[hv]; hm; hm = hm->hm_next)
267                 if ((hm->hm_realip.s_addr == real.s_addr) &&
268                     (np == hm->hm_ipnat)) {
269                         hm->hm_ref++;
270                         return hm;
271                 }
272
273         KMALLOC(hm, hostmap_t *);
274         if (hm) {
275                 hm->hm_next = maptable[hv];
276                 hm->hm_pnext = maptable + hv;
277                 if (maptable[hv])
278                         maptable[hv]->hm_pnext = &hm->hm_next;
279                 maptable[hv] = hm;
280                 hm->hm_ipnat = np;
281                 hm->hm_realip = real;
282                 hm->hm_mapip = map;
283                 hm->hm_ref = 1;
284         }
285         return hm;
286 }
287
288
289 /*
290  * Must be called with ipf_nat held as a write lock.
291  */
292 static void nat_hostmapdel(hm)
293 struct hostmap *hm;
294 {
295         ATOMIC_DEC32(hm->hm_ref);
296         if (hm->hm_ref == 0) {
297                 if (hm->hm_next)
298                         hm->hm_next->hm_pnext = hm->hm_pnext;
299                 *hm->hm_pnext = hm->hm_next;
300                 KFREE(hm);
301         }
302 }
303
304
305 void fix_outcksum(fin, sp, n)
306 fr_info_t *fin;
307 u_short *sp;
308 u_32_t n;
309 {
310         register u_short sumshort;
311         register u_32_t sum1;
312
313         if (!n)
314                 return;
315         else if (n & NAT_HW_CKSUM) {
316                 n &= 0xffff;
317                 n += fin->fin_dlen;
318                 n = (n & 0xffff) + (n >> 16);
319                 *sp = n & 0xffff;
320                 return;
321         }
322         sum1 = (~ntohs(*sp)) & 0xffff;
323         sum1 += (n);
324         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
325         /* Again */
326         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
327         sumshort = ~(u_short)sum1;
328         *(sp) = htons(sumshort);
329 }
330
331
332 void fix_incksum(fin, sp, n)
333 fr_info_t *fin;
334 u_short *sp;
335 u_32_t n;
336 {
337         register u_short sumshort;
338         register u_32_t sum1;
339
340         if (!n)
341                 return;
342         else if (n & NAT_HW_CKSUM) {
343                 n &= 0xffff;
344                 n += fin->fin_dlen;
345                 n = (n & 0xffff) + (n >> 16);
346                 *sp = n & 0xffff;
347                 return;
348         }
349 #ifdef sparc
350         sum1 = (~(*sp)) & 0xffff;
351 #else
352         sum1 = (~ntohs(*sp)) & 0xffff;
353 #endif
354         sum1 += ~(n) & 0xffff;
355         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
356         /* Again */
357         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
358         sumshort = ~(u_short)sum1;
359         *(sp) = htons(sumshort);
360 }
361
362
363 /*
364  * fix_datacksum is used *only* for the adjustments of checksums in the data
365  * section of an IP packet.
366  *
367  * The only situation in which you need to do this is when NAT'ing an 
368  * ICMP error message. Such a message, contains in its body the IP header
369  * of the original IP packet, that causes the error.
370  *
371  * You can't use fix_incksum or fix_outcksum in that case, because for the
372  * kernel the data section of the ICMP error is just data, and no special 
373  * processing like hardware cksum or ntohs processing have been done by the 
374  * kernel on the data section.
375  */
376 void fix_datacksum(sp, n)
377 u_short *sp;
378 u_32_t n;
379 {
380         register u_short sumshort;
381         register u_32_t sum1;
382
383         if (!n)
384                 return;
385
386         sum1 = (~ntohs(*sp)) & 0xffff;
387         sum1 += (n);
388         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
389         /* Again */
390         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
391         sumshort = ~(u_short)sum1;
392         *(sp) = htons(sumshort);
393 }
394
395 /*
396  * How the NAT is organised and works.
397  *
398  * Inside (interface y) NAT       Outside (interface x)
399  * -------------------- -+- -------------------------------------
400  * Packet going          |   out, processsed by ip_natout() for x
401  * ------------>         |   ------------>
402  * src=10.1.1.1          |   src=192.1.1.1
403  *                       |
404  *                       |   in, processed by ip_natin() for x
405  * <------------         |   <------------
406  * dst=10.1.1.1          |   dst=192.1.1.1
407  * -------------------- -+- -------------------------------------
408  * ip_natout() - changes ip_src and if required, sport
409  *             - creates a new mapping, if required.
410  * ip_natin()  - changes ip_dst and if required, dport
411  *
412  * In the NAT table, internal source is recorded as "in" and externally
413  * seen as "out".
414  */
415
416 /*
417  * Handle ioctls which manipulate the NAT.
418  */
419 int nat_ioctl(data, cmd, mode)
420 #if defined(__NetBSD__) || defined(__OpenBSD__) || (__FreeBSD_version >= 300003)
421 u_long cmd;
422 #else
423 int cmd;
424 #endif
425 caddr_t data;
426 int mode;
427 {
428         register ipnat_t *nat, *nt, *n = NULL, **np = NULL;
429         int error = 0, ret, arg, getlock;
430         ipnat_t natd;
431         u_32_t i, j;
432
433 #if (BSD >= 199306) && defined(_KERNEL)
434         if ((securelevel >= 2) && (mode & FWRITE))
435                 return EPERM;
436 #endif
437
438         nat = NULL;     /* XXX gcc -Wuninitialized */
439         KMALLOC(nt, ipnat_t *);
440         getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
441         if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT)) {
442                 if (mode & NAT_SYSSPACE) {
443                         bcopy(data, (char *)&natd, sizeof(natd));
444                         error = 0;
445                 } else {
446                         error = IRCOPYPTR(data, (char *)&natd, sizeof(natd));
447                 }
448         } else if (cmd == SIOCIPFFL) {  /* SIOCFLNAT & SIOCCNATL */
449                 error = IRCOPY(data, (char *)&arg, sizeof(arg));
450                 if (error)
451                         error = EFAULT;
452         }
453
454         if (error)
455                 goto done;
456
457         /*
458          * For add/delete, look to see if the NAT entry is already present
459          */
460         if (getlock == 1) {
461                 WRITE_ENTER(&ipf_nat);
462         }
463         if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT)) {
464                 nat = &natd;
465                 nat->in_flags &= IPN_USERFLAGS;
466                 if ((nat->in_redir & NAT_MAPBLK) == 0) {
467                         if ((nat->in_flags & IPN_SPLIT) == 0)
468                                 nat->in_inip &= nat->in_inmsk;
469                         if ((nat->in_flags & IPN_IPRANGE) == 0)
470                                 nat->in_outip &= nat->in_outmsk;
471                 }
472                 for (np = &nat_list; (n = *np); np = &n->in_next)
473                         if (!bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
474                                         IPN_CMPSIZ)) {
475                                 if (n->in_redir == NAT_REDIRECT &&
476                                     n->in_pnext != nat->in_pnext)
477                                         continue;
478                                 break;
479                         }
480         }
481
482         switch (cmd)
483         {
484 #ifdef  IPFILTER_LOG
485         case SIOCIPFFB :
486         {
487                 int tmp;
488
489                 if (!(mode & FWRITE))
490                         error = EPERM;
491                 else {
492                         tmp = ipflog_clear(IPL_LOGNAT);
493                         IWCOPY((char *)&tmp, (char *)data, sizeof(tmp));
494                 }
495                 break;
496         }
497 #endif
498         case SIOCADNAT :
499                 if (!(mode & FWRITE)) {
500                         error = EPERM;
501                         break;
502                 }
503                 if (n) {
504                         error = EEXIST;
505                         break;
506                 }
507                 if (nt == NULL) {
508                         error = ENOMEM;
509                         break;
510                 }
511                 n = nt;
512                 nt = NULL;
513                 bcopy((char *)nat, (char *)n, sizeof(*n));
514                 n->in_ifp = (void *)GETUNIT(n->in_ifname, 4);
515                 if (!n->in_ifp)
516                         n->in_ifp = (void *)-1;
517                 if (n->in_plabel[0] != '\0') {
518                         n->in_apr = appr_lookup(n->in_p, n->in_plabel);
519                         if (!n->in_apr) {
520                                 error = ENOENT;
521                                 break;
522                         }
523                 }
524                 n->in_next = NULL;
525                 *np = n;
526
527                 if (n->in_redir & NAT_REDIRECT) {
528                         n->in_flags &= ~IPN_NOTDST;
529                         nat_addrdr(n);
530                 }
531                 if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
532                         n->in_flags &= ~IPN_NOTSRC;
533                         nat_addnat(n);
534                 }
535
536                 n->in_use = 0;
537                 if (n->in_redir & NAT_MAPBLK)
538                         n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
539                 else if (n->in_flags & IPN_AUTOPORTMAP)
540                         n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
541                 else if (n->in_flags & IPN_IPRANGE)
542                         n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
543                 else if (n->in_flags & IPN_SPLIT)
544                         n->in_space = 2;
545                 else
546                         n->in_space = ~ntohl(n->in_outmsk);
547                 /*
548                  * Calculate the number of valid IP addresses in the output
549                  * mapping range.  In all cases, the range is inclusive of
550                  * the start and ending IP addresses.
551                  * If to a CIDR address, lose 2: broadcast + network address
552                  *                               (so subtract 1)
553                  * If to a range, add one.
554                  * If to a single IP address, set to 1.
555                  */
556                 if (n->in_space) {
557                         if ((n->in_flags & IPN_IPRANGE) != 0)
558                                 n->in_space += 1;
559                         else
560                                 n->in_space -= 1;
561                 } else
562                         n->in_space = 1;
563                 if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
564                     ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
565                         n->in_nip = ntohl(n->in_outip) + 1;
566                 else if ((n->in_flags & IPN_SPLIT) &&
567                          (n->in_redir & NAT_REDIRECT))
568                         n->in_nip = ntohl(n->in_inip);
569                 else
570                         n->in_nip = ntohl(n->in_outip);
571                 if (n->in_redir & NAT_MAP) {
572                         n->in_pnext = ntohs(n->in_pmin);
573                         /*
574                          * Multiply by the number of ports made available.
575                          */
576                         if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
577                                 n->in_space *= (ntohs(n->in_pmax) -
578                                                 ntohs(n->in_pmin) + 1);
579                                 /*
580                                  * Because two different sources can map to
581                                  * different destinations but use the same
582                                  * local IP#/port #.
583                                  * If the result is smaller than in_space, then
584                                  * we may have wrapped around 32bits.
585                                  */
586                                 i = n->in_inmsk;
587                                 if ((i != 0) && (i != 0xffffffff)) {
588                                         j = n->in_space * (~ntohl(i) + 1);
589                                         if (j >= n->in_space)
590                                                 n->in_space = j;
591                                         else
592                                                 n->in_space = 0xffffffff;
593                                 }
594                         }
595                         /*
596                          * If no protocol is specified, multiple by 256.
597                          */
598                         if ((n->in_flags & IPN_TCPUDP) == 0) {
599                                         j = n->in_space * 256;
600                                         if (j >= n->in_space)
601                                                 n->in_space = j;
602                                         else
603                                                 n->in_space = 0xffffffff;
604                         }
605                 }
606                 /* Otherwise, these fields are preset */
607                 n = NULL;
608                 nat_stats.ns_rules++;
609                 break;
610         case SIOCRMNAT :
611                 if (!(mode & FWRITE)) {
612                         error = EPERM;
613                         n = NULL;
614                         break;
615                 }
616                 if (!n) {
617                         error = ESRCH;
618                         break;
619                 }
620                 if (n->in_redir & NAT_REDIRECT)
621                         nat_delrdr(n);
622                 if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
623                         nat_delnat(n);
624                 if (nat_list == NULL) {
625                         nat_masks = 0;
626                         rdr_masks = 0;
627                 }
628                 *np = n->in_next;
629                 if (!n->in_use) {
630                         if (n->in_apr)
631                                 appr_free(n->in_apr);
632                         KFREE(n);
633                         nat_stats.ns_rules--;
634                 } else {
635                         n->in_flags |= IPN_DELETE;
636                         n->in_next = NULL;
637                 }
638                 n = NULL;
639                 break;
640         case SIOCGNATS :
641                 MUTEX_DOWNGRADE(&ipf_nat);
642                 nat_stats.ns_table[0] = nat_table[0];
643                 nat_stats.ns_table[1] = nat_table[1];
644                 nat_stats.ns_list = nat_list;
645                 nat_stats.ns_maptable = maptable;
646                 nat_stats.ns_nattab_sz = ipf_nattable_sz;
647                 nat_stats.ns_rultab_sz = ipf_natrules_sz;
648                 nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz;
649                 nat_stats.ns_hostmap_sz = ipf_hostmap_sz;
650                 nat_stats.ns_instances = nat_instances;
651                 nat_stats.ns_apslist = ap_sess_list;
652                 error = IWCOPYPTR((char *)&nat_stats, (char *)data,
653                                   sizeof(nat_stats));
654                 break;
655         case SIOCGNATL :
656             {
657                 natlookup_t nl;
658
659                 MUTEX_DOWNGRADE(&ipf_nat);
660                 error = IRCOPYPTR((char *)data, (char *)&nl, sizeof(nl));
661                 if (error)
662                         break;
663
664                 if (nat_lookupredir(&nl)) {
665                         error = IWCOPYPTR((char *)&nl, (char *)data,
666                                           sizeof(nl));
667                 } else
668                         error = ESRCH;
669                 break;
670             }
671         case SIOCIPFFL :        /* old SIOCFLNAT & SIOCCNATL */
672                 if (!(mode & FWRITE)) {
673                         error = EPERM;
674                         break;
675                 }
676                 error = 0;
677                 if (arg == 0)
678                         ret = nat_flushtable();
679                 else if (arg == 1)
680                         ret = nat_clearlist();
681                 else
682                         error = EINVAL;
683                 MUTEX_DOWNGRADE(&ipf_nat);
684                 if (!error) {
685                         error = IWCOPY((caddr_t)&ret, data, sizeof(ret));
686                         if (error)
687                                 error = EFAULT;
688                 }
689                 break;
690         case SIOCSTLCK :
691                 error = IRCOPY(data, (caddr_t)&arg, sizeof(arg));
692                 if (!error) {
693                         error = IWCOPY((caddr_t)&fr_nat_lock, data,
694                                         sizeof(fr_nat_lock));
695                         if (!error)
696                                 fr_nat_lock = arg;
697                 } else
698                         error = EFAULT;
699                 break;
700         case SIOCSTPUT :
701                 if (fr_nat_lock)
702                         error = fr_natputent(data);
703                 else
704                         error = EACCES;
705                 break;
706         case SIOCSTGSZ :
707                 if (fr_nat_lock)
708                         error = fr_natgetsz(data);
709                 else
710                         error = EACCES;
711                 break;
712         case SIOCSTGET :
713                 if (fr_nat_lock)
714                         error = fr_natgetent(data);
715                 else
716                         error = EACCES;
717                 break;
718         case FIONREAD :
719 #ifdef  IPFILTER_LOG
720                 arg = (int)iplused[IPL_LOGNAT];
721                 MUTEX_DOWNGRADE(&ipf_nat);
722                 error = IWCOPY((caddr_t)&arg, (caddr_t)data, sizeof(arg));
723                 if (error)
724                         error = EFAULT;
725 #endif
726                 break;
727         default :
728                 error = EINVAL;
729                 break;
730         }
731         if (getlock == 1) {
732                 RWLOCK_EXIT(&ipf_nat);                  /* READ/WRITE */
733         }
734 done:
735         if (nt)
736                 KFREE(nt);
737         return error;
738 }
739
740
741 static int fr_natgetsz(data)
742 caddr_t data;
743 {
744         ap_session_t *aps;
745         nat_t *nat, *n;
746         int error = 0;
747         natget_t ng;
748
749         error = IRCOPY(data, (caddr_t)&ng, sizeof(ng));
750         if (error)
751                 return EFAULT;
752
753         nat = ng.ng_ptr;
754         if (!nat) {
755                 nat = nat_instances;
756                 ng.ng_sz = 0;
757                 if (nat == NULL) {
758                         error = IWCOPY((caddr_t)&ng, data, sizeof(ng));
759                         if (error)
760                                 error = EFAULT;
761                         return error;
762                 }
763         } else {
764                 /*
765                  * Make sure the pointer we're copying from exists in the
766                  * current list of entries.  Security precaution to prevent
767                  * copying of random kernel data.
768                  */
769                 for (n = nat_instances; n; n = n->nat_next)
770                         if (n == nat)
771                                 break;
772                 if (!n)
773                         return ESRCH;
774         }
775
776         ng.ng_sz = sizeof(nat_save_t);
777         aps = nat->nat_aps;
778         if ((aps != NULL) && (aps->aps_data != 0)) {
779                 ng.ng_sz += sizeof(ap_session_t);
780                 ng.ng_sz += aps->aps_psiz;
781         }
782
783         error = IWCOPY((caddr_t)&ng, data, sizeof(ng));
784         if (error)
785                 error = EFAULT;
786         return error;
787 }
788
789
790 static int fr_natgetent(data)
791 caddr_t data;
792 {
793         nat_save_t ipn, *ipnp, *ipnn = NULL;
794         register nat_t *n, *nat;
795         ap_session_t *aps;
796         int error;
797
798         error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp));
799         if (error)
800                 return EFAULT;
801         error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn));
802         if (error)
803                 return EFAULT;
804
805         nat = ipn.ipn_next;
806         if (!nat) {
807                 nat = nat_instances;
808                 if (nat == NULL) {
809                         if (nat_instances == NULL)
810                                 return ENOENT;
811                         return 0;
812                 }
813         } else {
814                 /*
815                  * Make sure the pointer we're copying from exists in the
816                  * current list of entries.  Security precaution to prevent
817                  * copying of random kernel data.
818                  */
819                 for (n = nat_instances; n; n = n->nat_next)
820                         if (n == nat)
821                                 break;
822                 if (!n)
823                         return ESRCH;
824         }
825
826         ipn.ipn_next = nat->nat_next;
827         ipn.ipn_dsize = 0;
828         bcopy((char *)nat, (char *)&ipn.ipn_nat, sizeof(ipn.ipn_nat));
829         ipn.ipn_nat.nat_data = NULL;
830
831         if (nat->nat_ptr) {
832                 bcopy((char *)nat->nat_ptr, (char *)&ipn.ipn_ipnat,
833                       sizeof(ipn.ipn_ipnat));
834         }
835
836         if (nat->nat_fr)
837                 bcopy((char *)nat->nat_fr, (char *)&ipn.ipn_rule,
838                       sizeof(ipn.ipn_rule));
839
840         if ((aps = nat->nat_aps)) {
841                 ipn.ipn_dsize = sizeof(*aps);
842                 if (aps->aps_data)
843                         ipn.ipn_dsize += aps->aps_psiz;
844                 KMALLOCS(ipnn, nat_save_t *, sizeof(*ipnn) + ipn.ipn_dsize);
845                 if (ipnn == NULL)
846                         return ENOMEM;
847                 bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn));
848
849                 bcopy((char *)aps, (char *)ipnn->ipn_data, sizeof(*aps));
850                 if (aps->aps_data) {
851                         bcopy(aps->aps_data, ipnn->ipn_data + sizeof(*aps),
852                               aps->aps_psiz);
853                         ipnn->ipn_dsize += aps->aps_psiz;
854                 }
855                 error = IWCOPY((caddr_t)ipnn, ipnp,
856                                sizeof(ipn) + ipn.ipn_dsize);
857                 if (error)
858                         error = EFAULT;
859                 KFREES(ipnn, sizeof(*ipnn) + ipn.ipn_dsize);
860         } else {
861                 error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn));
862                 if (error)
863                         error = EFAULT;
864         }
865         return error;
866 }
867
868
869 static int fr_natputent(data)
870 caddr_t data;
871 {
872         nat_save_t ipn, *ipnp, *ipnn = NULL;
873         register nat_t *n, *nat;
874         ap_session_t *aps;
875         frentry_t *fr;
876         ipnat_t *in;
877
878         int error;
879
880         error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp));
881         if (error)
882                 return EFAULT;
883         error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn));
884         if (error)
885                 return EFAULT;
886         nat = NULL;
887         if (ipn.ipn_dsize) {
888                 KMALLOCS(ipnn, nat_save_t *, sizeof(ipn) + ipn.ipn_dsize);
889                 if (ipnn == NULL)
890                         return ENOMEM;
891                 bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn));
892                 error = IRCOPY((caddr_t)ipnp, (caddr_t)ipn.ipn_data,
893                                ipn.ipn_dsize);
894                 if (error) {
895                         error = EFAULT;
896                         goto junkput;
897                 }
898         } else
899                 ipnn = NULL;
900
901         KMALLOC(nat, nat_t *);
902         if (nat == NULL) {
903                 error = EFAULT;
904                 goto junkput;
905         }
906
907         bcopy((char *)&ipn.ipn_nat, (char *)nat, sizeof(*nat));
908         /*
909          * Initialize all these so that nat_delete() doesn't cause a crash.
910          */
911         nat->nat_phnext[0] = NULL;
912         nat->nat_phnext[1] = NULL;
913         fr = nat->nat_fr;
914         nat->nat_fr = NULL;
915         aps = nat->nat_aps;
916         nat->nat_aps = NULL;
917         in = nat->nat_ptr;
918         nat->nat_ptr = NULL;
919         nat->nat_hm = NULL;
920         nat->nat_data = NULL;
921         nat->nat_ifp = GETUNIT(nat->nat_ifname, 4);
922
923         /*
924          * Restore the rule associated with this nat session
925          */
926         if (in) {
927                 KMALLOC(in, ipnat_t *);
928                 if (in == NULL) {
929                         error = ENOMEM;
930                         goto junkput;
931                 }
932                 nat->nat_ptr = in;
933                 bcopy((char *)&ipn.ipn_ipnat, (char *)in, sizeof(*in));
934                 in->in_use = 1;
935                 in->in_flags |= IPN_DELETE;
936                 in->in_next = NULL;
937                 in->in_rnext = NULL;
938                 in->in_prnext = NULL;
939                 in->in_mnext = NULL;
940                 in->in_pmnext = NULL;
941                 in->in_ifp = GETUNIT(in->in_ifname, 4);
942                 if (in->in_plabel[0] != '\0') {
943                         in->in_apr = appr_lookup(in->in_p, in->in_plabel);
944                 }
945         }
946
947         /*
948          * Restore ap_session_t structure.  Include the private data allocated
949          * if it was there.
950          */
951         if (aps) {
952                 KMALLOC(aps, ap_session_t *);
953                 if (aps == NULL) {
954                         error = ENOMEM;
955                         goto junkput;
956                 }
957                 nat->nat_aps = aps;
958                 aps->aps_next = ap_sess_list;
959                 ap_sess_list = aps;
960                 bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
961                 if (in)
962                         aps->aps_apr = in->in_apr;
963                 if (aps->aps_psiz) {
964                         KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
965                         if (aps->aps_data == NULL) {
966                                 error = ENOMEM;
967                                 goto junkput;
968                         }
969                         bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
970                               aps->aps_psiz);
971                 } else {
972                         aps->aps_psiz = 0;
973                         aps->aps_data = NULL;
974                 }
975         }
976
977         /*
978          * If there was a filtering rule associated with this entry then
979          * build up a new one.
980          */
981         if (fr != NULL) {
982                 if (nat->nat_flags & FI_NEWFR) {
983                         KMALLOC(fr, frentry_t *);
984                         nat->nat_fr = fr;
985                         if (fr == NULL) {
986                                 error = ENOMEM;
987                                 goto junkput;
988                         }
989                         bcopy((char *)&ipn.ipn_fr, (char *)fr, sizeof(*fr));
990                         ipn.ipn_nat.nat_fr = fr;
991                         error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn));
992                         if (error) {
993                                 error = EFAULT;
994                                 goto junkput;
995                         }
996                 } else {
997                         for (n = nat_instances; n; n = n->nat_next)
998                                 if (n->nat_fr == fr)
999                                         break;
1000                         if (!n) {
1001                                 error = ESRCH;
1002                                 goto junkput;
1003                         }
1004                 }
1005         }
1006
1007         if (ipnn)
1008                 KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize);
1009         nat_insert(nat);
1010         return 0;
1011 junkput:
1012         if (ipnn)
1013                 KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize);
1014         if (nat)
1015                 nat_delete(nat);
1016         return error;
1017 }
1018
1019
1020 /*
1021  * Delete a nat entry from the various lists and table.
1022  */
1023 static void nat_delete(natd)
1024 struct nat *natd;
1025 {
1026         struct ipnat *ipn;
1027
1028         if (natd->nat_flags & FI_WILDP)
1029                 nat_stats.ns_wilds--;
1030         if (natd->nat_hnext[0])
1031                 natd->nat_hnext[0]->nat_phnext[0] = natd->nat_phnext[0];
1032         *natd->nat_phnext[0] = natd->nat_hnext[0];
1033         if (natd->nat_hnext[1])
1034                 natd->nat_hnext[1]->nat_phnext[1] = natd->nat_phnext[1];
1035         *natd->nat_phnext[1] = natd->nat_hnext[1];
1036         if (natd->nat_me != NULL)
1037                 *natd->nat_me = NULL;
1038
1039         if (natd->nat_fr != NULL) {
1040                 ATOMIC_DEC32(natd->nat_fr->fr_ref);
1041         }
1042
1043         if (natd->nat_hm != NULL)
1044                 nat_hostmapdel(natd->nat_hm);
1045
1046         /*
1047          * If there is an active reference from the nat entry to its parent
1048          * rule, decrement the rule's reference count and free it too if no
1049          * longer being used.
1050          */
1051         ipn = natd->nat_ptr;
1052         if (ipn != NULL) {
1053                 ipn->in_space++;
1054                 ipn->in_use--;
1055                 if (!ipn->in_use && (ipn->in_flags & IPN_DELETE)) {
1056                         if (ipn->in_apr)
1057                                 appr_free(ipn->in_apr);
1058                         KFREE(ipn);
1059                         nat_stats.ns_rules--;
1060                 }
1061         }
1062
1063         MUTEX_DESTROY(&natd->nat_lock);
1064         /*
1065          * If there's a fragment table entry too for this nat entry, then
1066          * dereference that as well.
1067          */
1068         ipfr_forget((void *)natd);
1069         aps_free(natd->nat_aps);
1070         nat_stats.ns_inuse--;
1071         KFREE(natd);
1072 }
1073
1074
1075 /*
1076  * nat_flushtable - clear the NAT table of all mapping entries.
1077  * (this is for the dynamic mappings)
1078  */
1079 static int nat_flushtable()
1080 {
1081         register nat_t *nat, **natp;
1082         register int j = 0;
1083
1084         /*
1085          * ALL NAT mappings deleted, so lets just make the deletions
1086          * quicker.
1087          */
1088         if (nat_table[0] != NULL)
1089                 bzero((char *)nat_table[0],
1090                       sizeof(nat_table[0]) * ipf_nattable_sz);
1091         if (nat_table[1] != NULL)
1092                 bzero((char *)nat_table[1],
1093                       sizeof(nat_table[1]) * ipf_nattable_sz);
1094
1095         for (natp = &nat_instances; (nat = *natp); ) {
1096                 *natp = nat->nat_next;
1097 #ifdef  IPFILTER_LOG
1098                 nat_log(nat, NL_FLUSH);
1099 #endif
1100                 nat_delete(nat);
1101                 j++;
1102         }
1103         nat_stats.ns_inuse = 0;
1104         return j;
1105 }
1106
1107
1108 /*
1109  * nat_clearlist - delete all rules in the active NAT mapping list.
1110  * (this is for NAT/RDR rules)
1111  */
1112 int nat_clearlist()
1113 {
1114         register ipnat_t *n, **np = &nat_list;
1115         int i = 0;
1116
1117         if (nat_rules != NULL)
1118                 bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz);
1119         if (rdr_rules != NULL)
1120                 bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz);
1121
1122         while ((n = *np)) {
1123                 *np = n->in_next;
1124                 if (!n->in_use) {
1125                         if (n->in_apr)
1126                                 appr_free(n->in_apr);
1127                         KFREE(n);
1128                         nat_stats.ns_rules--;
1129                 } else {
1130                         n->in_flags |= IPN_DELETE;
1131                         n->in_next = NULL;
1132                 }
1133                 i++;
1134         }
1135         nat_masks = 0;
1136         rdr_masks = 0;
1137         return i;
1138 }
1139
1140
1141 /*
1142  * Create a new NAT table entry.
1143  * NOTE: Assumes write lock on ipf_nat has been obtained already.
1144  *       If you intend on changing this, beware: appr_new() may call nat_new()
1145  *       recursively!
1146  */
1147 nat_t *nat_new(fin, ip, np, natsave, flags, direction)
1148 fr_info_t *fin;
1149 ip_t *ip;
1150 ipnat_t *np;
1151 nat_t **natsave;
1152 u_int flags;
1153 int direction;
1154 {
1155         register u_32_t sum1, sum2, sumd, l;
1156         u_short port = 0, sport = 0, dport = 0, nport = 0;
1157         struct in_addr in, inb;
1158         u_short nflags, sp, dp;
1159         tcphdr_t *tcp = NULL;
1160         hostmap_t *hm = NULL;
1161         nat_t *nat, *natl;
1162 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
1163         qif_t *qf = fin->fin_qif;
1164 #endif
1165
1166         nflags = flags & np->in_flags;
1167         if (flags & IPN_TCPUDP) {
1168                 tcp = (tcphdr_t *)fin->fin_dp;
1169                 sport = htons(fin->fin_data[0]);
1170                 dport = htons(fin->fin_data[1]);
1171         }
1172
1173         /* Give me a new nat */
1174         KMALLOC(nat, nat_t *);
1175         if (nat == NULL) {
1176                 nat_stats.ns_memfail++;
1177                 return NULL;
1178         }
1179
1180         bzero((char *)nat, sizeof(*nat));
1181         nat->nat_flags = flags;
1182         if (flags & FI_WILDP)
1183                 nat_stats.ns_wilds++;
1184         /*
1185          * Search the current table for a match.
1186          */
1187         if (direction == NAT_OUTBOUND) {
1188                 /*
1189                  * Values at which the search for a free resouce starts.
1190                  */
1191                 u_32_t st_ip;
1192                 u_short st_port;
1193
1194                 /*
1195                  * If it's an outbound packet which doesn't match any existing
1196                  * record, then create a new port
1197                  */
1198                 l = 0;
1199                 st_ip = np->in_nip;
1200                 st_port = np->in_pnext;
1201
1202                 do {
1203                         port = 0;
1204                         in.s_addr = htonl(np->in_nip);
1205                         if (l == 0) {
1206                                 /*
1207                                  * Check to see if there is an existing NAT
1208                                  * setup for this IP address pair.
1209                                  */
1210                                 hm = nat_hostmap(np, fin->fin_src, in);
1211                                 if (hm != NULL)
1212                                         in.s_addr = hm->hm_mapip.s_addr;
1213                         } else if ((l == 1) && (hm != NULL)) {
1214                                 nat_hostmapdel(hm);
1215                                 hm = NULL;
1216                         }
1217                         in.s_addr = ntohl(in.s_addr);
1218
1219                         nat->nat_hm = hm;
1220
1221                         if ((np->in_outmsk == 0xffffffff) &&
1222                             (np->in_pnext == 0)) {
1223                                 if (l > 0)
1224                                         goto badnat;
1225                         }
1226
1227                         if (np->in_redir & NAT_MAPBLK) {
1228                                 if ((l >= np->in_ppip) || ((l > 0) &&
1229                                      !(flags & IPN_TCPUDP)))
1230                                         goto badnat;
1231                                 /*
1232                                  * map-block - Calculate destination address.
1233                                  */
1234                                 in.s_addr = ntohl(fin->fin_saddr);
1235                                 in.s_addr &= ntohl(~np->in_inmsk);
1236                                 inb.s_addr = in.s_addr;
1237                                 in.s_addr /= np->in_ippip;
1238                                 in.s_addr &= ntohl(~np->in_outmsk);
1239                                 in.s_addr += ntohl(np->in_outip);
1240                                 /*
1241                                  * Calculate destination port.
1242                                  */
1243                                 if ((flags & IPN_TCPUDP) &&
1244                                     (np->in_ppip != 0)) {
1245                                         port = ntohs(sport) + l;
1246                                         port %= np->in_ppip;
1247                                         port += np->in_ppip *
1248                                                 (inb.s_addr % np->in_ippip);
1249                                         port += MAPBLK_MINPORT;
1250                                         port = htons(port);
1251                                 }
1252                         } else if (!np->in_outip &&
1253                                    (np->in_outmsk == 0xffffffff)) {
1254                                 /*
1255                                  * 0/32 - use the interface's IP address.
1256                                  */
1257                                 if ((l > 0) ||
1258                                     fr_ifpaddr(4, fin->fin_ifp, &in) == -1)
1259                                         goto badnat;
1260                                 in.s_addr = ntohl(in.s_addr);
1261                         } else if (!np->in_outip && !np->in_outmsk) {
1262                                 /*
1263                                  * 0/0 - use the original source address/port.
1264                                  */
1265                                 if (l > 0)
1266                                         goto badnat;
1267                                 in.s_addr = ntohl(fin->fin_saddr);
1268                         } else if ((np->in_outmsk != 0xffffffff) &&
1269                                    (np->in_pnext == 0) &&
1270                                    ((l > 0) || (hm == NULL)))
1271                                 np->in_nip++;
1272                         natl = NULL;
1273
1274                         if ((nflags & IPN_TCPUDP) &&
1275                             ((np->in_redir & NAT_MAPBLK) == 0) &&
1276                             (np->in_flags & IPN_AUTOPORTMAP)) {
1277                                 if ((l > 0) && (l % np->in_ppip == 0)) {
1278                                         if (l > np->in_space) {
1279                                                 goto badnat;
1280                                         } else if ((l > np->in_ppip) &&
1281                                                    np->in_outmsk != 0xffffffff)
1282                                                 np->in_nip++;
1283                                 }
1284                                 if (np->in_ppip != 0) {
1285                                         port = ntohs(sport);
1286                                         port += (l % np->in_ppip);
1287                                         port %= np->in_ppip;
1288                                         port += np->in_ppip *
1289                                                 (ntohl(fin->fin_saddr) %
1290                                                  np->in_ippip);
1291                                         port += MAPBLK_MINPORT;
1292                                         port = htons(port);
1293                                 }
1294                         } else if (((np->in_redir & NAT_MAPBLK) == 0) &&
1295                                    (nflags & IPN_TCPUDP) &&
1296                                    (np->in_pnext != 0)) {
1297                                 port = htons(np->in_pnext++);
1298                                 if (np->in_pnext > ntohs(np->in_pmax)) {
1299                                         np->in_pnext = ntohs(np->in_pmin);
1300                                         if (np->in_outmsk != 0xffffffff)
1301                                                 np->in_nip++;
1302                                 }
1303                         }
1304
1305                         if (np->in_flags & IPN_IPRANGE) {
1306                                 if (np->in_nip > ntohl(np->in_outmsk))
1307                                         np->in_nip = ntohl(np->in_outip);
1308                         } else {
1309                                 if ((np->in_outmsk != 0xffffffff) &&
1310                                     ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
1311                                     ntohl(np->in_outip))
1312                                         np->in_nip = ntohl(np->in_outip) + 1;
1313                         }
1314
1315                         if (!port && (flags & IPN_TCPUDP))
1316                                 port = sport;
1317
1318                         /*
1319                          * Here we do a lookup of the connection as seen from
1320                          * the outside.  If an IP# pair already exists, try
1321                          * again.  So if you have A->B becomes C->B, you can
1322                          * also have D->E become C->E but not D->B causing
1323                          * another C->B.  Also take protocol and ports into
1324                          * account when determining whether a pre-existing
1325                          * NAT setup will cause an external conflict where
1326                          * this is appropriate.
1327                          */
1328                         inb.s_addr = htonl(in.s_addr);
1329                         sp = fin->fin_data[0];
1330                         dp = fin->fin_data[1];
1331                         fin->fin_data[0] = fin->fin_data[1];
1332                         fin->fin_data[1] = htons(port);
1333                         natl = nat_inlookup(fin, flags & ~FI_WILDP,
1334                                             (u_int)fin->fin_p, fin->fin_dst,
1335                                             inb, 1);
1336                         fin->fin_data[0] = sp;
1337                         fin->fin_data[1] = dp;
1338
1339                         /*
1340                          * Has the search wrapped around and come back to the
1341                          * start ?
1342                          */
1343                         if ((natl != NULL) &&
1344                             (np->in_pnext != 0) && (st_port == np->in_pnext) &&
1345                             (np->in_nip != 0) && (st_ip == np->in_nip))
1346                                 goto badnat;
1347                         l++;
1348                 } while (natl != NULL);
1349
1350                 if (np->in_space > 0)
1351                         np->in_space--;
1352
1353                 /* Setup the NAT table */
1354                 nat->nat_inip = fin->fin_src;
1355                 nat->nat_outip.s_addr = htonl(in.s_addr);
1356                 nat->nat_oip = fin->fin_dst;
1357                 if (nat->nat_hm == NULL)
1358                         nat->nat_hm = nat_hostmap(np, fin->fin_src,
1359                                                   nat->nat_outip);
1360
1361                 sum1 = LONG_SUM(ntohl(fin->fin_saddr)) + ntohs(sport);
1362                 sum2 = LONG_SUM(in.s_addr) + ntohs(port);
1363
1364                 if (flags & IPN_TCPUDP) {
1365                         nat->nat_inport = sport;
1366                         nat->nat_outport = port;        /* sport */
1367                         nat->nat_oport = dport;
1368                 }
1369         } else {
1370                 /*
1371                  * Otherwise, it's an inbound packet. Most likely, we don't
1372                  * want to rewrite source ports and source addresses. Instead,
1373                  * we want to rewrite to a fixed internal address and fixed
1374                  * internal port.
1375                  */
1376                 if (np->in_flags & IPN_SPLIT) {
1377                         in.s_addr = np->in_nip;
1378                         if (np->in_inip == htonl(in.s_addr))
1379                                 np->in_nip = ntohl(np->in_inmsk);
1380                         else {
1381                                 np->in_nip = ntohl(np->in_inip);
1382                                 if (np->in_flags & IPN_ROUNDR) {
1383                                         nat_delrdr(np);
1384                                         nat_addrdr(np);
1385                                 }
1386                         }
1387                 } else {
1388                         in.s_addr = ntohl(np->in_inip);
1389                         if (np->in_flags & IPN_ROUNDR) {
1390                                 nat_delrdr(np);
1391                                 nat_addrdr(np);
1392                         }
1393                 }
1394                 if (!np->in_pnext)
1395                         nport = dport;
1396                 else {
1397                         /*
1398                          * Whilst not optimized for the case where
1399                          * pmin == pmax, the gain is not significant.
1400                          */
1401                         if (np->in_pmin != np->in_pmax) {
1402                                 nport = ntohs(dport) - ntohs(np->in_pmin) +
1403                                         ntohs(np->in_pnext);
1404                                 nport = ntohs(nport);
1405                         } else
1406                                 nport = np->in_pnext;
1407                 }
1408
1409                 /*
1410                  * When the redirect-to address is set to 0.0.0.0, just
1411                  * assume a blank `forwarding' of the packet.
1412                  */
1413                 if (in.s_addr == 0)
1414                         in.s_addr = ntohl(fin->fin_daddr);
1415
1416                 nat->nat_inip.s_addr = htonl(in.s_addr);
1417                 nat->nat_outip = fin->fin_dst;
1418                 nat->nat_oip = fin->fin_src;
1419
1420                 sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport);
1421                 sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
1422
1423                 if (flags & IPN_TCPUDP) {
1424                         nat->nat_inport = nport;
1425                         nat->nat_outport = dport;
1426                         nat->nat_oport = sport;
1427                 }
1428         }
1429
1430         CALC_SUMD(sum1, sum2, sumd);
1431         nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
1432 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
1433         if ((flags & IPN_TCPUDP) && dohwcksum &&
1434             (qf->qf_ill->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
1435                 if (direction == NAT_OUTBOUND)
1436                         sum1 = LONG_SUM(ntohl(in.s_addr));
1437                 else
1438                         sum1 = LONG_SUM(ntohl(fin->fin_saddr));
1439                 sum1 += LONG_SUM(ntohl(fin->fin_daddr));
1440                 sum1 += IPPROTO_TCP;
1441                 sum1 = (sum1 & 0xffff) + (sum1 >> 16);
1442                 nat->nat_sumd[1] = NAT_HW_CKSUM|(sum1 & 0xffff);
1443         } else
1444 #endif
1445                 nat->nat_sumd[1] = nat->nat_sumd[0];
1446
1447         if ((flags & IPN_TCPUDP) && ((sport != port) || (dport != nport))) {
1448                 if (direction == NAT_OUTBOUND)
1449                         sum1 = LONG_SUM(ntohl(fin->fin_saddr));
1450                 else
1451                         sum1 = LONG_SUM(ntohl(fin->fin_daddr));
1452
1453                 sum2 = LONG_SUM(in.s_addr);
1454
1455                 CALC_SUMD(sum1, sum2, sumd);
1456                 nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
1457         } else
1458                 nat->nat_ipsumd = nat->nat_sumd[0];
1459
1460         in.s_addr = htonl(in.s_addr);
1461
1462         strncpy(nat->nat_ifname, IFNAME(fin->fin_ifp), IFNAMSIZ);
1463
1464         nat->nat_me = natsave;
1465         nat->nat_dir = direction;
1466         nat->nat_ifp = fin->fin_ifp;
1467         nat->nat_ptr = np;
1468         nat->nat_p = fin->fin_p;
1469         nat->nat_bytes = 0;
1470         nat->nat_pkts = 0;
1471         nat->nat_mssclamp = np->in_mssclamp;
1472         nat->nat_fr = fin->fin_fr;
1473         if (nat->nat_fr != NULL) {
1474                 ATOMIC_INC32(nat->nat_fr->fr_ref);
1475         }
1476         if (direction == NAT_OUTBOUND) {
1477                 if (flags & IPN_TCPUDP)
1478                         tcp->th_sport = port;
1479         } else {
1480                 if (flags & IPN_TCPUDP)
1481                         tcp->th_dport = nport;
1482         }
1483
1484         nat_insert(nat);
1485
1486         if ((np->in_apr != NULL) && (np->in_dport == 0 ||
1487             (tcp != NULL && dport == np->in_dport)))
1488                 (void) appr_new(fin, ip, nat);
1489
1490         np->in_use++;
1491 #ifdef  IPFILTER_LOG
1492         nat_log(nat, (u_int)np->in_redir);
1493 #endif
1494         return nat;
1495 badnat:
1496         nat_stats.ns_badnat++;
1497         if ((hm = nat->nat_hm) != NULL)
1498                 nat_hostmapdel(hm);
1499         KFREE(nat);
1500         return NULL;
1501 }
1502
1503
1504 /*
1505  * Insert a NAT entry into the hash tables for searching and add it to the
1506  * list of active NAT entries.  Adjust global counters when complete.
1507  */
1508 void    nat_insert(nat)
1509 nat_t   *nat;
1510 {
1511         u_int hv1, hv2;
1512         nat_t **natp;
1513
1514         MUTEX_INIT(&nat->nat_lock, "nat entry lock", NULL);
1515
1516         nat->nat_age = fr_defnatage;
1517         nat->nat_ifname[sizeof(nat->nat_ifname) - 1] = '\0';
1518         if (nat->nat_ifname[0] !='\0') {
1519                 nat->nat_ifp = GETUNIT(nat->nat_ifname, 4);
1520         }
1521
1522         nat->nat_next = nat_instances;
1523         nat_instances = nat;
1524
1525         if (!(nat->nat_flags & (FI_W_SPORT|FI_W_DPORT))) {
1526                 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
1527                                   0xffffffff);
1528                 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
1529                                   ipf_nattable_sz);
1530                 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
1531                                   0xffffffff);
1532                 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
1533                                  ipf_nattable_sz);
1534         } else {
1535                 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, nat->nat_inip.s_addr,
1536                                   ipf_nattable_sz);
1537                 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, nat->nat_outip.s_addr,
1538                                   ipf_nattable_sz);
1539         }
1540
1541         natp = &nat_table[0][hv1];
1542         if (*natp)
1543                 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
1544         nat->nat_phnext[0] = natp;
1545         nat->nat_hnext[0] = *natp;
1546         *natp = nat;
1547
1548         natp = &nat_table[1][hv2];
1549         if (*natp)
1550                 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
1551         nat->nat_phnext[1] = natp;
1552         nat->nat_hnext[1] = *natp;
1553         *natp = nat;
1554
1555         nat_stats.ns_added++;
1556         nat_stats.ns_inuse++;
1557 }
1558
1559
1560 nat_t *nat_icmplookup(ip, fin, dir)
1561 ip_t *ip;
1562 fr_info_t *fin;
1563 int dir;
1564 {
1565         icmphdr_t *icmp;
1566         tcphdr_t *tcp = NULL;
1567         ip_t *oip;
1568         int flags = 0, type, minlen;
1569
1570         icmp = (icmphdr_t *)fin->fin_dp;
1571         /*
1572          * Does it at least have the return (basic) IP header ?
1573          * Only a basic IP header (no options) should be with an ICMP error
1574          * header.
1575          */
1576         if ((ip->ip_hl != 5) || (ip->ip_len < ICMPERR_MINPKTLEN))
1577                 return NULL;
1578         type = icmp->icmp_type;
1579         /*
1580          * If it's not an error type, then return.
1581          */
1582         if ((type != ICMP_UNREACH) && (type != ICMP_SOURCEQUENCH) &&
1583             (type != ICMP_REDIRECT) && (type != ICMP_TIMXCEED) &&
1584             (type != ICMP_PARAMPROB))
1585                 return NULL;
1586
1587         oip = (ip_t *)((char *)fin->fin_dp + 8);
1588         minlen = (oip->ip_hl << 2);
1589         if (minlen < sizeof(ip_t))
1590                 return NULL;
1591         if (ip->ip_len < ICMPERR_IPICMPHLEN + minlen)
1592                 return NULL;
1593         /*
1594          * Is the buffer big enough for all of it ?  It's the size of the IP
1595          * header claimed in the encapsulated part which is of concern.  It
1596          * may be too big to be in this buffer but not so big that it's
1597          * outside the ICMP packet, leading to TCP deref's causing problems.
1598          * This is possible because we don't know how big oip_hl is when we
1599          * do the pullup early in fr_check() and thus can't gaurantee it is
1600          * all here now.
1601          */
1602 #ifdef  _KERNEL
1603         {
1604         mb_t *m;
1605
1606 # if SOLARIS
1607         m = fin->fin_qfm;
1608         if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
1609                 return NULL;
1610 # else
1611         m = *(mb_t **)fin->fin_mp;
1612         if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
1613             (char *)ip + m->m_len)
1614                 return NULL;
1615 # endif
1616         }
1617 #endif
1618
1619         if (oip->ip_p == IPPROTO_TCP)
1620                 flags = IPN_TCP;
1621         else if (oip->ip_p == IPPROTO_UDP)
1622                 flags = IPN_UDP;
1623         if (flags & IPN_TCPUDP) {
1624                 u_short data[2];
1625                 nat_t *nat;
1626
1627                 minlen += 8;            /* + 64bits of data to get ports */
1628                 if (ip->ip_len < ICMPERR_IPICMPHLEN + minlen)
1629                         return NULL;
1630
1631                 data[0] = fin->fin_data[0];
1632                 data[1] = fin->fin_data[1];
1633                 tcp = (tcphdr_t *)((char *)oip + (oip->ip_hl << 2));
1634                 fin->fin_data[0] = ntohs(tcp->th_dport);
1635                 fin->fin_data[1] = ntohs(tcp->th_sport);
1636
1637                 if (dir == NAT_INBOUND) {
1638                         nat = nat_inlookup(fin, flags, (u_int)oip->ip_p,
1639                                             oip->ip_dst, oip->ip_src, 0);
1640                 } else {
1641                         nat = nat_outlookup(fin, flags, (u_int)oip->ip_p,
1642                                             oip->ip_dst, oip->ip_src, 0);
1643                 }
1644                 fin->fin_data[0] = data[0];
1645                 fin->fin_data[1] = data[1];
1646                 return nat;
1647         }
1648         if (dir == NAT_INBOUND)
1649                 return nat_inlookup(fin, 0, (u_int)oip->ip_p,
1650                                     oip->ip_dst, oip->ip_src, 0);
1651         else
1652                 return nat_outlookup(fin, 0, (u_int)oip->ip_p,
1653                                     oip->ip_dst, oip->ip_src, 0);
1654 }
1655
1656
1657 /*
1658  * This should *ONLY* be used for incoming packets to make sure a NAT'd ICMP
1659  * packet gets correctly recognised.
1660  */
1661 nat_t *nat_icmp(ip, fin, nflags, dir)
1662 ip_t *ip;
1663 fr_info_t *fin;
1664 u_int *nflags;
1665 int dir;
1666 {
1667         u_32_t sum1, sum2, sumd, sumd2 = 0;
1668         struct in_addr in;
1669         int flags, dlen;
1670         icmphdr_t *icmp;
1671         udphdr_t *udp;
1672         tcphdr_t *tcp;
1673         nat_t *nat;
1674         ip_t *oip;
1675
1676         if ((fin->fin_fl & FI_SHORT) || (fin->fin_off != 0))
1677                 return NULL;
1678         /*
1679          * nat_icmplookup() will return NULL for `defective' packets.
1680          */
1681         if ((ip->ip_v != 4) || !(nat = nat_icmplookup(ip, fin, dir)))
1682                 return NULL;
1683
1684         flags = 0;
1685         *nflags = IPN_ICMPERR;
1686         icmp = (icmphdr_t *)fin->fin_dp;
1687         oip = (ip_t *)&icmp->icmp_ip;
1688         if (oip->ip_p == IPPROTO_TCP)
1689                 flags = IPN_TCP;
1690         else if (oip->ip_p == IPPROTO_UDP)
1691                 flags = IPN_UDP;
1692         udp = (udphdr_t *)((((char *)oip) + (oip->ip_hl << 2)));
1693         dlen = ip->ip_len - ((char *)udp - (char *)ip);
1694         /*
1695          * XXX - what if this is bogus hl and we go off the end ?
1696          * In this case, nat_icmplookup() will have returned NULL.
1697          */
1698         tcp = (tcphdr_t *)udp;
1699
1700         /*
1701          * Need to adjust ICMP header to include the real IP#'s and
1702          * port #'s.  Only apply a checksum change relative to the
1703          * IP address change as it will be modified again in ip_natout
1704          * for both address and port.  Two checksum changes are
1705          * necessary for the two header address changes.  Be careful
1706          * to only modify the checksum once for the port # and twice
1707          * for the IP#.
1708          */
1709
1710         /*
1711          * Step 1
1712          * Fix the IP addresses in the offending IP packet. You also need
1713          * to adjust the IP header checksum of that offending IP packet
1714          * and the ICMP checksum of the ICMP error message itself.
1715          *
1716          * Unfortunately, for UDP and TCP, the IP addresses are also contained
1717          * in the pseudo header that is used to compute the UDP resp. TCP
1718          * checksum. So, we must compensate that as well. Even worse, the
1719          * change in the UDP and TCP checksums require yet another
1720          * adjustment of the ICMP checksum of the ICMP error message.
1721          *
1722          */
1723
1724         if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) {
1725                 sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
1726                 in = nat->nat_inip;
1727                 oip->ip_src = in;
1728         } else {
1729                 sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
1730                 in = nat->nat_outip;
1731                 oip->ip_dst = in;
1732         }
1733
1734         sum2 = LONG_SUM(ntohl(in.s_addr));
1735
1736         CALC_SUMD(sum1, sum2, sumd);
1737
1738         if (nat->nat_dir == NAT_OUTBOUND) {
1739                 /*
1740                  * Fix IP checksum of the offending IP packet to adjust for
1741                  * the change in the IP address.
1742                  *
1743                  * Normally, you would expect that the ICMP checksum of the 
1744                  * ICMP error message needs to be adjusted as well for the
1745                  * IP address change in oip.
1746                  * However, this is a NOP, because the ICMP checksum is 
1747                  * calculated over the complete ICMP packet, which includes the
1748                  * changed oip IP addresses and oip->ip_sum. However, these 
1749                  * two changes cancel each other out (if the delta for
1750                  * the IP address is x, then the delta for ip_sum is minus x), 
1751                  * so no change in the icmp_cksum is necessary.
1752                  *
1753                  * Be careful that nat_dir refers to the direction of the
1754                  * offending IP packet (oip), not to its ICMP response (icmp)
1755                  */
1756                 fix_datacksum(&oip->ip_sum, sumd);
1757
1758                 /*
1759                  * Fix UDP pseudo header checksum to compensate for the
1760                  * IP address change.
1761                  */
1762                 if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1763                         /*
1764                          * The UDP checksum is optional, only adjust it 
1765                          * if it has been set.
1766                          */
1767                         sum1 = ntohs(udp->uh_sum);
1768                         fix_datacksum(&udp->uh_sum, sumd);
1769                         sum2 = ntohs(udp->uh_sum);
1770
1771                         /*
1772                          * Fix ICMP checksum to compensate the UDP 
1773                          * checksum adjustment.
1774                          */
1775                         CALC_SUMD(sum1, sum2, sumd);
1776                         sumd2 = sumd;
1777                 }
1778
1779                 /*
1780                  * Fix TCP pseudo header checksum to compensate for the 
1781                  * IP address change. Before we can do the change, we
1782                  * must make sure that oip is sufficient large to hold
1783                  * the TCP checksum (normally it does not!).
1784                  */
1785                 if (oip->ip_p == IPPROTO_TCP && dlen >= 18) {
1786                 
1787                         sum1 = ntohs(tcp->th_sum);
1788                         fix_datacksum(&tcp->th_sum, sumd);
1789                         sum2 = ntohs(tcp->th_sum);
1790
1791                         /*
1792                          * Fix ICMP checksum to compensate the TCP 
1793                          * checksum adjustment.
1794                          */
1795                         CALC_SUMD(sum1, sum2, sumd);
1796                         sumd2 = sumd;
1797                 }
1798         } else {
1799
1800                 /*
1801                  * Fix IP checksum of the offending IP packet to adjust for
1802                  * the change in the IP address.
1803                  *
1804                  * Normally, you would expect that the ICMP checksum of the 
1805                  * ICMP error message needs to be adjusted as well for the
1806                  * IP address change in oip.
1807                  * However, this is a NOP, because the ICMP checksum is 
1808                  * calculated over the complete ICMP packet, which includes the
1809                  * changed oip IP addresses and oip->ip_sum. However, these 
1810                  * two changes cancel each other out (if the delta for
1811                  * the IP address is x, then the delta for ip_sum is minus x), 
1812                  * so no change in the icmp_cksum is necessary.
1813                  *
1814                  * Be careful that nat_dir refers to the direction of the
1815                  * offending IP packet (oip), not to its ICMP response (icmp)
1816                  */
1817                 fix_datacksum(&oip->ip_sum, sumd);
1818
1819 /* XXX FV : without having looked at Solaris source code, it seems unlikely
1820  * that SOLARIS would compensate this in the kernel (a body of an IP packet 
1821  * in the data section of an ICMP packet). I have the feeling that this should
1822  * be unconditional, but I'm not in a position to check.
1823  */
1824 #if !SOLARIS && !defined(__sgi)
1825                 /*
1826                  * Fix UDP pseudo header checksum to compensate for the
1827                  * IP address change.
1828                  */
1829                 if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1830                         /*
1831                          * The UDP checksum is optional, only adjust it 
1832                          * if it has been set 
1833                          */
1834                         sum1 = ntohs(udp->uh_sum);
1835                         fix_datacksum(&udp->uh_sum, sumd);
1836                         sum2 = ntohs(udp->uh_sum);
1837
1838                         /*
1839                          * Fix ICMP checksum to compensate the UDP 
1840                          * checksum adjustment.
1841                          */
1842                         CALC_SUMD(sum1, sum2, sumd);
1843                         sumd2 = sumd;
1844                 }
1845                 
1846                 /* 
1847                  * Fix TCP pseudo header checksum to compensate for the 
1848                  * IP address change. Before we can do the change, we
1849                  * must make sure that oip is sufficient large to hold
1850                  * the TCP checksum (normally it does not!).
1851                  */
1852                 if (oip->ip_p == IPPROTO_TCP && dlen >= 18) {
1853                 
1854                         sum1 = ntohs(tcp->th_sum);
1855                         fix_datacksum(&tcp->th_sum, sumd);
1856                         sum2 = ntohs(tcp->th_sum);
1857
1858                         /*
1859                          * Fix ICMP checksum to compensate the TCP
1860                          * checksum adjustment.
1861                          */
1862                         CALC_SUMD(sum1, sum2, sumd);
1863                         sumd2 = sumd;
1864                 }
1865 #endif
1866         }
1867
1868         if ((flags & IPN_TCPUDP) != 0) {
1869                 /*
1870                  * Step 2 :
1871                  * For offending TCP/UDP IP packets, translate the ports as
1872                  * well, based on the NAT specification. Of course such
1873                  * a change must be reflected in the ICMP checksum as well.
1874                  *
1875                  * Advance notice : Now it becomes complicated :-)
1876                  *
1877                  * Since the port fields are part of the TCP/UDP checksum
1878                  * of the offending IP packet, you need to adjust that checksum
1879                  * as well... but, if you change, you must change the icmp
1880                  * checksum *again*, to reflect that change.
1881                  *
1882                  * To further complicate: the TCP checksum is not in the first
1883                  * 8 bytes of the offending ip packet, so it most likely is not
1884                  * available. Some OSses like Solaris return enough bytes to
1885                  * include the TCP checksum. So we have to check if the
1886                  * ip->ip_len actually holds the TCP checksum of the oip!
1887                  */
1888
1889                 if (nat->nat_oport == tcp->th_dport) {
1890                         if (tcp->th_sport != nat->nat_inport) {
1891                                 /*
1892                                  * Fix ICMP checksum to compensate port
1893                                  * adjustment.
1894                                  */
1895                                 sum1 = ntohs(tcp->th_sport);
1896                                 sum2 = ntohs(nat->nat_inport);
1897                                 CALC_SUMD(sum1, sum2, sumd);
1898                                 sumd2 += sumd;
1899                                 tcp->th_sport = nat->nat_inport;
1900
1901                                 /*
1902                                  * Fix udp checksum to compensate port
1903                                  * adjustment.  NOTE : the offending IP packet
1904                                  * flows the other direction compared to the
1905                                  * ICMP message.
1906                                  *
1907                                  * The UDP checksum is optional, only adjust
1908                                  * it if it has been set.
1909                                  */
1910                                 if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1911
1912                                         sum1 = ntohs(udp->uh_sum);
1913                                         fix_datacksum(&udp->uh_sum, sumd);
1914                                         sum2 = ntohs(udp->uh_sum);
1915
1916                                         /*
1917                                          * Fix ICMP checksum to 
1918                                          * compensate UDP checksum 
1919                                          * adjustment.
1920                                          */
1921                                         CALC_SUMD(sum1, sum2, sumd);
1922                                         sumd2 += sumd;
1923                                 }
1924
1925                                 /*
1926                                  * Fix tcp checksum (if present) to compensate
1927                                  * port adjustment. NOTE : the offending IP
1928                                  * packet flows the other direction compared to
1929                                  * the ICMP message.
1930                                  */
1931                                 if (oip->ip_p == IPPROTO_TCP && dlen >= 18) {
1932
1933                                         sum1 = ntohs(tcp->th_sum);
1934                                         fix_datacksum(&tcp->th_sum, sumd);
1935                                         sum2 = ntohs(tcp->th_sum);
1936
1937                                         /*
1938                                          * Fix ICMP checksum to 
1939                                          * compensate TCP checksum 
1940                                          * adjustment.
1941                                          */
1942                                         CALC_SUMD(sum1, sum2, sumd);
1943                                         sumd2 += sumd;
1944                                 }
1945                         }
1946                 } else {
1947                         if (tcp->th_dport != nat->nat_outport) {
1948                                 /*
1949                                  * Fix ICMP checksum to compensate port
1950                                  * adjustment.
1951                                  */
1952                                 sum1 = ntohs(tcp->th_dport);
1953                                 sum2 = ntohs(nat->nat_outport);
1954                                 CALC_SUMD(sum1, sum2, sumd);
1955                                 sumd2 += sumd;
1956                                 tcp->th_dport = nat->nat_outport;
1957
1958                                 /*
1959                                  * Fix udp checksum to compensate port
1960                                  * adjustment.   NOTE : the offending IP
1961                                  * packet flows the other direction compared
1962                                  * to the ICMP message.
1963                                  *
1964                                  * The UDP checksum is optional, only adjust
1965                                  * it if it has been set.
1966                                  */
1967                                 if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
1968
1969                                         sum1 = ntohs(udp->uh_sum);
1970                                         fix_datacksum(&udp->uh_sum, sumd);
1971                                         sum2 = ntohs(udp->uh_sum);
1972
1973                                         /*
1974                                          * Fix ICMP checksum to compensate
1975                                          * UDP checksum adjustment.
1976                                          */
1977                                         CALC_SUMD(sum1, sum2, sumd);
1978                                         sumd2 += sumd;
1979                                 }
1980
1981                                 /*
1982                                  * Fix tcp checksum (if present) to compensate
1983                                  * port adjustment. NOTE : the offending IP
1984                                  * packet flows the other direction compared to
1985                                  * the ICMP message.
1986                                  */
1987                                 if (oip->ip_p == IPPROTO_TCP && dlen >= 18) {
1988
1989                                         sum1 = ntohs(tcp->th_sum);
1990                                         fix_datacksum(&tcp->th_sum, sumd);
1991                                         sum2 = ntohs(tcp->th_sum);
1992
1993                                         /*
1994                                          * Fix ICMP checksum to compensate
1995                                          * UDP checksum adjustment.
1996                                          */
1997                                         CALC_SUMD(sum1, sum2, sumd);
1998                                         sumd2 += sumd;
1999                                 }
2000                         }
2001                 }
2002                 if (sumd2) {
2003                         sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
2004                         sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
2005                         if (nat->nat_dir == NAT_OUTBOUND) {
2006                                 fix_outcksum(fin, &icmp->icmp_cksum, sumd2);
2007                         } else {
2008                                 fix_incksum(fin, &icmp->icmp_cksum, sumd2);
2009                         }
2010                 }
2011         }
2012         if (oip->ip_p == IPPROTO_ICMP)
2013                 nat->nat_age = fr_defnaticmpage;
2014         return nat;
2015 }
2016
2017
2018 /*
2019  * NB: these lookups don't lock access to the list, it assume it has already
2020  * been done!
2021  */
2022 /*
2023  * Lookup a nat entry based on the mapped destination ip address/port and
2024  * real source address/port.  We use this lookup when receiving a packet,
2025  * we're looking for a table entry, based on the destination address.
2026  * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.
2027  */
2028 nat_t *nat_inlookup(fin, flags, p, src, mapdst, rw)
2029 fr_info_t *fin;
2030 register u_int flags, p;
2031 struct in_addr src , mapdst;
2032 int rw;
2033 {
2034         register u_short sport, dport;
2035         register nat_t *nat;
2036         register int nflags;
2037         register u_32_t dst;
2038         ipnat_t *ipn;
2039         void *ifp;
2040         u_int hv;
2041
2042         if (fin != NULL)
2043                 ifp = fin->fin_ifp;
2044         else
2045                 ifp = NULL;
2046         dst = mapdst.s_addr;
2047         if (flags & IPN_TCPUDP) {
2048                 sport = htons(fin->fin_data[0]);
2049                 dport = htons(fin->fin_data[1]);
2050         } else {
2051                 sport = 0;
2052                 dport = 0;
2053         }
2054
2055         hv = NAT_HASH_FN(dst, dport, 0xffffffff);
2056         hv = NAT_HASH_FN(src.s_addr, hv + sport, ipf_nattable_sz);
2057         nat = nat_table[1][hv];
2058         for (; nat; nat = nat->nat_hnext[1]) {
2059                 nflags = nat->nat_flags;
2060                 if ((!ifp || ifp == nat->nat_ifp) &&
2061                     nat->nat_oip.s_addr == src.s_addr &&
2062                     nat->nat_outip.s_addr == dst &&
2063                     ((p == 0) || (p == nat->nat_p))) {
2064                         switch (p)
2065                         {
2066                         case IPPROTO_TCP :
2067                         case IPPROTO_UDP :
2068                                 if (nat->nat_oport != sport)
2069                                         continue;
2070                                 if (nat->nat_outport != dport)
2071                                         continue;
2072                                 break;
2073                         default :
2074                                 break;
2075                         }
2076
2077                         ipn = nat->nat_ptr;
2078                         if ((ipn != NULL) && (nat->nat_aps != NULL))
2079                                 if (appr_match(fin, nat) != 0)
2080                                         continue;
2081                         return nat;
2082                 }
2083         }
2084         if (!nat_stats.ns_wilds || !(flags & FI_WILDP))
2085                 return NULL;
2086         if (!rw) {
2087                 RWLOCK_EXIT(&ipf_nat);
2088         }
2089         hv = NAT_HASH_FN(dst, 0, 0xffffffff);
2090         hv = NAT_HASH_FN(src.s_addr, dst, ipf_nattable_sz);
2091         if (!rw) {
2092                 WRITE_ENTER(&ipf_nat);
2093         }
2094         nat = nat_table[1][hv];
2095         for (; nat; nat = nat->nat_hnext[1]) {
2096                 nflags = nat->nat_flags;
2097                 if (ifp && ifp != nat->nat_ifp)
2098                         continue;
2099                 if (!(nflags & FI_WILDP))
2100                         continue;
2101                 if (nat->nat_oip.s_addr != src.s_addr ||
2102                     nat->nat_outip.s_addr != dst)
2103                         continue;
2104                 if (((nat->nat_oport == sport) || (nflags & FI_W_DPORT)) &&
2105                     ((nat->nat_outport == dport) || (nflags & FI_W_SPORT))) {
2106                         nat_tabmove(fin, nat);
2107                         break;
2108                 }
2109         }
2110         if (!rw) {
2111                 MUTEX_DOWNGRADE(&ipf_nat);
2112         }
2113         return nat;
2114 }
2115
2116
2117 /*
2118  * This function is only called for TCP/UDP NAT table entries where the
2119  * original was placed in the table without hashing on the ports and we now
2120  * want to include hashing on port numbers.
2121  */
2122 static void nat_tabmove(fin, nat)
2123 fr_info_t *fin;
2124 nat_t *nat;
2125 {
2126         register u_short sport, dport;
2127         u_int hv, nflags;
2128         nat_t **natp;
2129
2130         nflags = nat->nat_flags;
2131
2132         sport = ntohs(fin->fin_data[0]);
2133         dport = ntohs(fin->fin_data[1]);
2134
2135         /*
2136          * Remove the NAT entry from the old location
2137          */
2138         if (nat->nat_hnext[0])
2139                 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
2140         *nat->nat_phnext[0] = nat->nat_hnext[0];
2141
2142         if (nat->nat_hnext[1])
2143                 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
2144         *nat->nat_phnext[1] = nat->nat_hnext[1];
2145
2146         /*
2147          * Add into the NAT table in the new position
2148          */
2149         hv = NAT_HASH_FN(nat->nat_inip.s_addr, sport, 0xffffffff);
2150         hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + dport, ipf_nattable_sz);
2151         natp = &nat_table[0][hv];
2152         if (*natp)
2153                 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2154         nat->nat_phnext[0] = natp;
2155         nat->nat_hnext[0] = *natp;
2156         *natp = nat;
2157
2158         hv = NAT_HASH_FN(nat->nat_outip.s_addr, sport, 0xffffffff);
2159         hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + dport, ipf_nattable_sz);
2160         natp = &nat_table[1][hv];
2161         if (*natp)
2162                 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2163         nat->nat_phnext[1] = natp;
2164         nat->nat_hnext[1] = *natp;
2165         *natp = nat;
2166 }
2167
2168
2169 /*
2170  * Lookup a nat entry based on the source 'real' ip address/port and
2171  * destination address/port.  We use this lookup when sending a packet out,
2172  * we're looking for a table entry, based on the source address.
2173  * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.
2174  */
2175 nat_t *nat_outlookup(fin, flags, p, src, dst, rw)
2176 fr_info_t *fin;
2177 register u_int flags, p;
2178 struct in_addr src , dst;
2179 int rw;
2180 {
2181         register u_short sport, dport;
2182         register nat_t *nat;
2183         register int nflags;
2184         ipnat_t *ipn;
2185         u_32_t srcip;
2186         void *ifp;
2187         u_int hv;
2188
2189         ifp = fin->fin_ifp;
2190         srcip = src.s_addr;
2191         if (flags & IPN_TCPUDP) {
2192                 sport = ntohs(fin->fin_data[0]);
2193                 dport = ntohs(fin->fin_data[1]);
2194         } else {
2195                 sport = 0;
2196                 dport = 0;
2197         }
2198
2199         hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
2200         hv = NAT_HASH_FN(dst.s_addr, hv + dport, ipf_nattable_sz);
2201         nat = nat_table[0][hv];
2202         for (; nat; nat = nat->nat_hnext[0]) {
2203                 nflags = nat->nat_flags;
2204
2205                 if ((!ifp || ifp == nat->nat_ifp) &&
2206                     nat->nat_inip.s_addr == srcip &&
2207                     nat->nat_oip.s_addr == dst.s_addr &&
2208                     ((p == 0) || (p == nat->nat_p))) {
2209                         switch (p)
2210                         {
2211                         case IPPROTO_TCP :
2212                         case IPPROTO_UDP :
2213                                 if (nat->nat_oport != dport)
2214                                         continue;
2215                                 if (nat->nat_inport != sport)
2216                                         continue;
2217                                 break;
2218                         default :
2219                                 break;
2220                         }
2221
2222                         ipn = nat->nat_ptr;
2223                         if ((ipn != NULL) && (nat->nat_aps != NULL))
2224                                 if (appr_match(fin, nat) != 0)
2225                                         continue;
2226                         return nat;
2227                 }
2228         }
2229         if (!nat_stats.ns_wilds || !(flags & FI_WILDP))
2230                 return NULL;
2231         if (!rw) {
2232                 RWLOCK_EXIT(&ipf_nat);
2233         }
2234
2235         hv = NAT_HASH_FN(dst.s_addr, srcip, ipf_nattable_sz);
2236         if (!rw) {
2237                 WRITE_ENTER(&ipf_nat);
2238         }
2239         nat = nat_table[0][hv];
2240         for (; nat; nat = nat->nat_hnext[0]) {
2241                 nflags = nat->nat_flags;
2242                 if (ifp && ifp != nat->nat_ifp)
2243                         continue;
2244                 if (!(nflags & FI_WILDP))
2245                         continue;
2246                 if ((nat->nat_inip.s_addr != srcip) ||
2247                     (nat->nat_oip.s_addr != dst.s_addr))
2248                         continue;
2249                 if (((nat->nat_inport == sport) || (nflags & FI_W_SPORT)) &&
2250                     ((nat->nat_oport == dport) || (nflags & FI_W_DPORT))) {
2251                         nat_tabmove(fin, nat);
2252                         break;
2253                 }
2254         }
2255         if (!rw) {
2256                 MUTEX_DOWNGRADE(&ipf_nat);
2257         }
2258         return nat;
2259 }
2260
2261
2262 /*
2263  * Lookup the NAT tables to search for a matching redirect
2264  */
2265 nat_t *nat_lookupredir(np)
2266 register natlookup_t *np;
2267 {
2268         nat_t *nat;
2269         fr_info_t fi;
2270
2271         bzero((char *)&fi, sizeof(fi));
2272         fi.fin_data[0] = ntohs(np->nl_inport);
2273         fi.fin_data[1] = ntohs(np->nl_outport);
2274
2275         /*
2276          * If nl_inip is non null, this is a lookup based on the real
2277          * ip address. Else, we use the fake.
2278          */
2279         if ((nat = nat_outlookup(&fi, np->nl_flags, 0, np->nl_inip,
2280                                  np->nl_outip, 0))) {
2281                 np->nl_realip = nat->nat_outip;
2282                 np->nl_realport = nat->nat_outport;
2283         }
2284         return nat;
2285 }
2286
2287
2288 static int nat_match(fin, np, ip)
2289 fr_info_t *fin;
2290 ipnat_t *np;
2291 ip_t *ip;
2292 {
2293         frtuc_t *ft;
2294
2295         if (ip->ip_v != 4)
2296                 return 0;
2297
2298         if (np->in_p && fin->fin_p != np->in_p)
2299                 return 0;
2300         if (fin->fin_out) {
2301                 if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
2302                         return 0;
2303                 if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
2304                     ^ ((np->in_flags & IPN_NOTSRC) != 0))
2305                         return 0;
2306                 if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
2307                     ^ ((np->in_flags & IPN_NOTDST) != 0))
2308                         return 0;
2309         } else {
2310                 if (!(np->in_redir & NAT_REDIRECT))
2311                         return 0;
2312                 if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
2313                     ^ ((np->in_flags & IPN_NOTSRC) != 0))
2314                         return 0;
2315                 if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
2316                     ^ ((np->in_flags & IPN_NOTDST) != 0))
2317                         return 0;
2318         }
2319
2320         ft = &np->in_tuc;
2321         if (!(fin->fin_fl & FI_TCPUDP) ||
2322             (fin->fin_fl & FI_SHORT) || (fin->fin_off != 0)) {
2323                 if (ft->ftu_scmp || ft->ftu_dcmp)
2324                         return 0;
2325                 return 1;
2326         }
2327
2328         return fr_tcpudpchk(ft, fin);
2329 }
2330
2331
2332 /*
2333  * Packets going out on the external interface go through this.
2334  * Here, the source address requires alteration, if anything.
2335  */
2336 int ip_natout(ip, fin)
2337 ip_t *ip;
2338 fr_info_t *fin;
2339 {
2340         register ipnat_t *np = NULL;
2341         register u_32_t ipa;
2342         tcphdr_t *tcp = NULL;
2343         u_short sport = 0, dport = 0, *csump = NULL;
2344         int natadd = 1, i, icmpset = 1;
2345         u_int nflags = 0, hv, msk;
2346         struct ifnet *ifp;
2347         frentry_t *fr;
2348         void *sifp;
2349         u_32_t iph;
2350         nat_t *nat;
2351
2352         if (nat_list == NULL || (fr_nat_lock))
2353                 return 0;
2354
2355         if ((fr = fin->fin_fr) && !(fr->fr_flags & FR_DUP) &&
2356             fr->fr_tif.fd_ifp && fr->fr_tif.fd_ifp != (void *)-1) {
2357                 sifp = fin->fin_ifp;
2358                 fin->fin_ifp = fr->fr_tif.fd_ifp;
2359         } else
2360                 sifp = fin->fin_ifp;
2361         ifp = fin->fin_ifp;
2362
2363         if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2364                 if (fin->fin_p == IPPROTO_TCP)
2365                         nflags = IPN_TCP;
2366                 else if (fin->fin_p == IPPROTO_UDP)
2367                         nflags = IPN_UDP;
2368                 if ((nflags & IPN_TCPUDP)) {
2369                         tcp = (tcphdr_t *)fin->fin_dp;
2370                         sport = tcp->th_sport;
2371                         dport = tcp->th_dport;
2372                 }
2373         }
2374
2375         ipa = fin->fin_saddr;
2376
2377         READ_ENTER(&ipf_nat);
2378
2379         if ((fin->fin_p == IPPROTO_ICMP) &&
2380             (nat = nat_icmp(ip, fin, &nflags, NAT_OUTBOUND)))
2381                 icmpset = 1;
2382         else if ((fin->fin_fl & FI_FRAG) &&
2383             (nat = ipfr_nat_knownfrag(ip, fin)))
2384                 natadd = 0;
2385         else if ((nat = nat_outlookup(fin, nflags|FI_WILDP|FI_WILDA,
2386                                       (u_int)fin->fin_p, fin->fin_src,
2387                                       fin->fin_dst, 0))) {
2388                 nflags = nat->nat_flags;
2389                 if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) {
2390                         if ((nflags & FI_W_SPORT) &&
2391                             (nat->nat_inport != sport))
2392                                 nat->nat_inport = sport;
2393                         if ((nflags & FI_W_DPORT) &&
2394                             (nat->nat_oport != dport))
2395                                 nat->nat_oport = dport;
2396
2397                         if (nat->nat_outport == 0)
2398                                 nat->nat_outport = sport;
2399                         nat->nat_flags &= ~(FI_W_DPORT|FI_W_SPORT);
2400                         nflags = nat->nat_flags;
2401                         nat_stats.ns_wilds--;
2402                 }
2403         } else {
2404                 RWLOCK_EXIT(&ipf_nat);
2405
2406                 msk = 0xffffffff;
2407                 i = 32;
2408
2409                 WRITE_ENTER(&ipf_nat);
2410                 /*
2411                  * If there is no current entry in the nat table for this IP#,
2412                  * create one for it (if there is a matching rule).
2413                  */
2414 maskloop:
2415                 iph = ipa & htonl(msk);
2416                 hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz);
2417                 for (np = nat_rules[hv]; np; np = np->in_mnext)
2418                 {
2419                         if (np->in_ifp && (np->in_ifp != ifp))
2420                                 continue;
2421                         if ((np->in_flags & IPN_RF) &&
2422                             !(np->in_flags & nflags))
2423                                 continue;
2424                         if (np->in_flags & IPN_FILTER) {
2425                                 if (!nat_match(fin, np, ip))
2426                                         continue;
2427                         } else if ((ipa & np->in_inmsk) != np->in_inip)
2428                                 continue;
2429                         if (*np->in_plabel && !appr_ok(ip, tcp, np))
2430                                 continue;
2431                         nat = nat_new(fin, ip, np, NULL,
2432                                       (u_int)nflags, NAT_OUTBOUND);
2433                         if (nat != NULL) {
2434                                 np->in_hits++;
2435                                 break;
2436                         }
2437                 }
2438                 if ((np == NULL) && (i > 0)) {
2439                         do {
2440                                 i--;
2441                                 msk <<= 1;
2442                         } while ((i >= 0) && ((nat_masks & (1 << i)) == 0));
2443                         if (i >= 0)
2444                                 goto maskloop;
2445                 }
2446                 MUTEX_DOWNGRADE(&ipf_nat);
2447         }
2448
2449         /*
2450          * NOTE: ipf_nat must now only be held as a read lock
2451          */
2452         if (nat) {
2453                 np = nat->nat_ptr;
2454                 if (natadd && (fin->fin_fl & FI_FRAG) && np)
2455                         ipfr_nat_newfrag(ip, fin, nat);
2456                 MUTEX_ENTER(&nat->nat_lock);
2457                 if (fin->fin_p != IPPROTO_TCP) {
2458                         if (np && np->in_age[1])
2459                                 nat->nat_age = np->in_age[1];
2460                         else if (!icmpset && (fin->fin_p == IPPROTO_ICMP))
2461                                 nat->nat_age = fr_defnaticmpage;
2462                         else
2463                                 nat->nat_age = fr_defnatage;
2464                 }
2465                 nat->nat_bytes += ip->ip_len;
2466                 nat->nat_pkts++;
2467                 MUTEX_EXIT(&nat->nat_lock);
2468
2469                 /*
2470                  * Fix up checksums, not by recalculating them, but
2471                  * simply computing adjustments.
2472                  */
2473                 if (nflags == IPN_ICMPERR) {
2474                         u_32_t s1, s2, sumd;
2475
2476                         s1 = LONG_SUM(ntohl(fin->fin_saddr));
2477                         s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
2478                         CALC_SUMD(s1, s2, sumd);
2479
2480                         if (nat->nat_dir == NAT_OUTBOUND)
2481                                 fix_outcksum(fin, &ip->ip_sum, sumd);
2482                         else
2483                                 fix_incksum(fin, &ip->ip_sum, sumd);
2484                 }
2485 #if (SOLARIS || defined(__sgi)) && defined(_KERNEL)
2486                 else {
2487                         if (nat->nat_dir == NAT_OUTBOUND)
2488                                 fix_outcksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2489                         else
2490                                 fix_incksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2491                 }
2492 #endif
2493                 /*
2494                  * Only change the packet contents, not what is filtered upon.
2495                  */
2496                 ip->ip_src = nat->nat_outip;
2497
2498                 if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2499
2500                         if ((nat->nat_outport != 0) && (tcp != NULL)) {
2501                                 tcp->th_sport = nat->nat_outport;
2502                                 fin->fin_data[0] = ntohs(tcp->th_sport);
2503                         }
2504
2505                         if (fin->fin_p == IPPROTO_TCP) {
2506                                 csump = &tcp->th_sum;
2507                                 MUTEX_ENTER(&nat->nat_lock);
2508                                 fr_tcp_age(&nat->nat_age,
2509                                            nat->nat_tcpstate, fin, 1, 0);
2510                                 if (nat->nat_age < fr_defnaticmpage)
2511                                         nat->nat_age = fr_defnaticmpage;
2512 #ifdef LARGE_NAT
2513                                 else if (nat->nat_age > fr_defnatage)
2514                                         nat->nat_age = fr_defnatage;
2515 #endif
2516                                 /*
2517                                  * Increase this because we may have
2518                                  * "keep state" following this too and
2519                                  * packet storms can occur if this is
2520                                  * removed too quickly.
2521                                  */
2522                                 if (nat->nat_age == fr_tcpclosed)
2523                                         nat->nat_age = fr_tcplastack;
2524
2525                                 /*
2526                                  * Do a MSS CLAMPING on a SYN packet,
2527                                  * only deal IPv4 for now.
2528                                  */
2529                                 if (nat->nat_mssclamp &&
2530                                     (tcp->th_flags & TH_SYN) != 0)
2531                                         nat_mssclamp(tcp, nat->nat_mssclamp,
2532                                                      fin, csump);
2533
2534                                 MUTEX_EXIT(&nat->nat_lock);
2535                         } else if (fin->fin_p == IPPROTO_UDP) {
2536                                 udphdr_t *udp = (udphdr_t *)tcp;
2537
2538                                 if (udp->uh_sum)
2539                                         csump = &udp->uh_sum;
2540                         }
2541
2542                         if (csump) {
2543                                 if (nat->nat_dir == NAT_OUTBOUND)
2544                                         fix_outcksum(fin, csump,
2545                                                      nat->nat_sumd[1]);
2546                                 else
2547                                         fix_incksum(fin, csump,
2548                                                     nat->nat_sumd[1]);
2549                         }
2550                 }
2551
2552                 if (np && (np->in_apr != NULL) && (np->in_dport == 0 ||
2553                      (tcp != NULL && dport == np->in_dport))) {
2554                         i = appr_check(ip, fin, nat);
2555                         if (i == 0)
2556                                 i = 1;
2557                         else if (i == -1)
2558                                 nat->nat_drop[1]++;
2559                 } else
2560                         i = 1;
2561                 ATOMIC_INCL(nat_stats.ns_mapped[1]);
2562                 RWLOCK_EXIT(&ipf_nat);  /* READ */
2563                 fin->fin_ifp = sifp;
2564                 return i;
2565         }
2566         RWLOCK_EXIT(&ipf_nat);                  /* READ/WRITE */
2567         fin->fin_ifp = sifp;
2568         return 0;
2569 }
2570
2571
2572 /*
2573  * Packets coming in from the external interface go through this.
2574  * Here, the destination address requires alteration, if anything.
2575  */
2576 int ip_natin(ip, fin)
2577 ip_t *ip;
2578 fr_info_t *fin;
2579 {
2580         register struct in_addr src;
2581         register struct in_addr in;
2582         register ipnat_t *np;
2583         u_short sport = 0, dport = 0, *csump = NULL;
2584         u_int nflags = 0, natadd = 1, hv, msk;
2585         struct ifnet *ifp = fin->fin_ifp;
2586         tcphdr_t *tcp = NULL;
2587         int i, icmpset = 0;
2588         nat_t *nat;
2589         u_32_t iph;
2590
2591         if ((nat_list == NULL) || (ip->ip_v != 4) || (fr_nat_lock))
2592                 return 0;
2593
2594         if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2595                 if (fin->fin_p == IPPROTO_TCP)
2596                         nflags = IPN_TCP;
2597                 else if (fin->fin_p == IPPROTO_UDP)
2598                         nflags = IPN_UDP;
2599                 if ((nflags & IPN_TCPUDP)) {
2600                         tcp = (tcphdr_t *)fin->fin_dp;
2601                         sport = tcp->th_sport;
2602                         dport = tcp->th_dport;
2603                 }
2604         }
2605
2606         in = fin->fin_dst;
2607         /* make sure the source address is to be redirected */
2608         src = fin->fin_src;
2609
2610         READ_ENTER(&ipf_nat);
2611
2612         if ((fin->fin_p == IPPROTO_ICMP) &&
2613             (nat = nat_icmp(ip, fin, &nflags, NAT_INBOUND)))
2614                 icmpset = 1;
2615         else if ((fin->fin_fl & FI_FRAG) &&
2616                  (nat = ipfr_nat_knownfrag(ip, fin)))
2617                 natadd = 0;
2618         else if ((nat = nat_inlookup(fin, nflags|FI_WILDP|FI_WILDA,
2619                                      (u_int)fin->fin_p, fin->fin_src, in, 0))) {
2620                 nflags = nat->nat_flags;
2621                 if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) {
2622                         if ((nat->nat_oport != sport) && (nflags & FI_W_DPORT))
2623                                 nat->nat_oport = sport;
2624                         if ((nat->nat_outport != dport) &&
2625                                  (nflags & FI_W_SPORT))
2626                                 nat->nat_outport = dport;
2627                         nat->nat_flags &= ~(FI_W_SPORT|FI_W_DPORT);
2628                         nflags = nat->nat_flags;
2629                         nat_stats.ns_wilds--;
2630                 }
2631         } else {
2632                 RWLOCK_EXIT(&ipf_nat);
2633
2634                 msk = 0xffffffff;
2635                 i = 32;
2636
2637                 WRITE_ENTER(&ipf_nat);
2638                 /*
2639                  * If there is no current entry in the nat table for this IP#,
2640                  * create one for it (if there is a matching rule).
2641                  */
2642 maskloop:
2643                 iph = in.s_addr & htonl(msk);
2644                 hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz);
2645                 for (np = rdr_rules[hv]; np; np = np->in_rnext) {
2646                         if ((np->in_ifp && (np->in_ifp != ifp)) ||
2647                             (np->in_p && (np->in_p != fin->fin_p)) ||
2648                             (np->in_flags && !(nflags & np->in_flags)))
2649                                 continue;
2650                         if (np->in_flags & IPN_FILTER) {
2651                                 if (!nat_match(fin, np, ip))
2652                                         continue;
2653                         } else if ((in.s_addr & np->in_outmsk) != np->in_outip)
2654                                 continue;
2655                         if ((!np->in_pmin || (np->in_flags & IPN_FILTER) ||
2656                              ((ntohs(np->in_pmax) >= ntohs(dport)) &&
2657                               (ntohs(dport) >= ntohs(np->in_pmin)))))
2658                                 if ((nat = nat_new(fin, ip, np, NULL, nflags,
2659                                                     NAT_INBOUND))) {
2660                                         np->in_hits++;
2661                                         break;
2662                                 }
2663                 }
2664
2665                 if ((np == NULL) && (i > 0)) {
2666                         do {
2667                                 i--;
2668                                 msk <<= 1;
2669                         } while ((i >= 0) && ((rdr_masks & (1 << i)) == 0));
2670                         if (i >= 0)
2671                                 goto maskloop;
2672                 }
2673                 MUTEX_DOWNGRADE(&ipf_nat);
2674         }
2675
2676         /*
2677          * NOTE: ipf_nat must now only be held as a read lock
2678          */
2679         if (nat) {
2680                 np = nat->nat_ptr;
2681                 fin->fin_fr = nat->nat_fr;
2682                 if (natadd && (fin->fin_fl & FI_FRAG) && np)
2683                         ipfr_nat_newfrag(ip, fin, nat);
2684                 if (np && (np->in_apr != NULL) && (np->in_dport == 0 ||
2685                      (tcp != NULL && sport == np->in_dport))) {
2686                         i = appr_check(ip, fin, nat);
2687                         if (i == -1) {
2688                                 nat->nat_drop[0]++;
2689                                 RWLOCK_EXIT(&ipf_nat);
2690                                 return i;
2691                         }
2692                 }
2693
2694                 MUTEX_ENTER(&nat->nat_lock);
2695                 if (fin->fin_p != IPPROTO_TCP) {
2696                         if (np && np->in_age[0])
2697                                 nat->nat_age = np->in_age[0];
2698                         else if (!icmpset && (fin->fin_p == IPPROTO_ICMP))
2699                                 nat->nat_age = fr_defnaticmpage;
2700                         else
2701                                 nat->nat_age = fr_defnatage;
2702                 }
2703                 nat->nat_bytes += ip->ip_len;
2704                 nat->nat_pkts++;
2705                 MUTEX_EXIT(&nat->nat_lock);
2706                 ip->ip_dst = nat->nat_inip;
2707                 fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
2708
2709                 /*
2710                  * Fix up checksums, not by recalculating them, but
2711                  * simply computing adjustments.
2712                  */
2713 #if (SOLARIS || defined(__sgi)) && defined(_KERNEL)
2714                 if (nat->nat_dir == NAT_OUTBOUND)
2715                         fix_incksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2716                 else
2717                         fix_outcksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2718 #endif
2719                 if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2720
2721                         if ((nat->nat_inport != 0) && (tcp != NULL)) {
2722                                 tcp->th_dport = nat->nat_inport;
2723                                 fin->fin_data[1] = ntohs(tcp->th_dport);
2724                         }
2725
2726                         if (fin->fin_p == IPPROTO_TCP) {
2727                                 csump = &tcp->th_sum;
2728                                 MUTEX_ENTER(&nat->nat_lock);
2729                                 fr_tcp_age(&nat->nat_age,
2730                                            nat->nat_tcpstate, fin, 0, 0);
2731                                 if (nat->nat_age < fr_defnaticmpage)
2732                                         nat->nat_age = fr_defnaticmpage;
2733 #ifdef LARGE_NAT
2734                                 else if (nat->nat_age > fr_defnatage)
2735                                         nat->nat_age = fr_defnatage;
2736 #endif
2737                                 /*
2738                                  * Increase this because we may have
2739                                  * "keep state" following this too and
2740                                  * packet storms can occur if this is
2741                                  * removed too quickly.
2742                                  */
2743                                 if (nat->nat_age == fr_tcpclosed)
2744                                         nat->nat_age = fr_tcplastack;
2745                                 /*
2746                                  * Do a MSS CLAMPING on a SYN packet,
2747                                  * only deal IPv4 for now.
2748                                  */
2749                                 if (nat->nat_mssclamp &&
2750                                     (tcp->th_flags & TH_SYN) != 0)
2751                                         nat_mssclamp(tcp, nat->nat_mssclamp,
2752                                                      fin, csump);
2753
2754                                 MUTEX_EXIT(&nat->nat_lock);
2755                         } else if (fin->fin_p == IPPROTO_UDP) {
2756                                 udphdr_t *udp = (udphdr_t *)tcp;
2757
2758                                 if (udp->uh_sum)
2759                                         csump = &udp->uh_sum;
2760                         }
2761
2762                         if (csump) {
2763                                 if (nat->nat_dir == NAT_OUTBOUND)
2764                                         fix_incksum(fin, csump,
2765                                                     nat->nat_sumd[0]);
2766                                 else
2767                                         fix_outcksum(fin, csump,
2768                                                     nat->nat_sumd[0]);
2769                         }
2770                 }
2771                 ATOMIC_INCL(nat_stats.ns_mapped[0]);
2772                 RWLOCK_EXIT(&ipf_nat);                  /* READ */
2773                 return 1;
2774         }
2775         RWLOCK_EXIT(&ipf_nat);                  /* READ/WRITE */
2776         return 0;
2777 }
2778
2779
2780 /*
2781  * Free all memory used by NAT structures allocated at runtime.
2782  */
2783 void ip_natunload()
2784 {
2785         WRITE_ENTER(&ipf_nat);
2786         (void) nat_clearlist();
2787         (void) nat_flushtable();
2788         RWLOCK_EXIT(&ipf_nat);
2789
2790         if (nat_table[0] != NULL) {
2791                 KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz);
2792                 nat_table[0] = NULL;
2793         }
2794         if (nat_table[1] != NULL) {
2795                 KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz);
2796                 nat_table[1] = NULL;
2797         }
2798         if (nat_rules != NULL) {
2799                 KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz);
2800                 nat_rules = NULL;
2801         }
2802         if (rdr_rules != NULL) {
2803                 KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz);
2804                 rdr_rules = NULL;
2805         }
2806         if (maptable != NULL) {
2807                 KFREES(maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
2808                 maptable = NULL;
2809         }
2810 }
2811
2812
2813 /*
2814  * Slowly expire held state for NAT entries.  Timeouts are set in
2815  * expectation of this being called twice per second.
2816  */
2817 void ip_natexpire()
2818 {
2819         register struct nat *nat, **natp;
2820 #if defined(_KERNEL) && !SOLARIS
2821         int s;
2822 #endif
2823
2824         SPL_NET(s);
2825         WRITE_ENTER(&ipf_nat);
2826         for (natp = &nat_instances; (nat = *natp); ) {
2827                 nat->nat_age--;
2828                 if (nat->nat_age) {
2829                         natp = &nat->nat_next;
2830                         continue;
2831                 }
2832                 *natp = nat->nat_next;
2833 #ifdef  IPFILTER_LOG
2834                 nat_log(nat, NL_EXPIRE);
2835 #endif
2836                 nat_delete(nat);
2837                 nat_stats.ns_expire++;
2838         }
2839         RWLOCK_EXIT(&ipf_nat);
2840         SPL_X(s);
2841 }
2842
2843
2844 /*
2845  */
2846 void ip_natsync(ifp)
2847 void *ifp;
2848 {
2849         register ipnat_t *n;
2850         register nat_t *nat;
2851         register u_32_t sum1, sum2, sumd;
2852         struct in_addr in;
2853         ipnat_t *np;
2854         void *ifp2;
2855 #if defined(_KERNEL) && !SOLARIS
2856         int s;
2857 #endif
2858
2859         /*
2860          * Change IP addresses for NAT sessions for any protocol except TCP
2861          * since it will break the TCP connection anyway.
2862          */
2863         SPL_NET(s);
2864         WRITE_ENTER(&ipf_nat);
2865         for (nat = nat_instances; nat; nat = nat->nat_next)
2866                 if (((ifp == NULL) || (ifp == nat->nat_ifp)) &&
2867                     !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr) &&
2868                     (np->in_outmsk == 0xffffffff) && !np->in_nip) {
2869                         ifp2 = nat->nat_ifp;
2870                         /*
2871                          * Change the map-to address to be the same as the
2872                          * new one.
2873                          */
2874                         sum1 = nat->nat_outip.s_addr;
2875                         if (fr_ifpaddr(4, ifp2, &in) != -1)
2876                                 nat->nat_outip = in;
2877                         sum2 = nat->nat_outip.s_addr;
2878
2879                         if (sum1 == sum2)
2880                                 continue;
2881                         /*
2882                          * Readjust the checksum adjustment to take into
2883                          * account the new IP#.
2884                          */
2885                         CALC_SUMD(sum1, sum2, sumd);
2886                         /* XXX - dont change for TCP when solaris does
2887                          * hardware checksumming.
2888                          */
2889                         sumd += nat->nat_sumd[0];
2890                         nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2891                         nat->nat_sumd[1] = nat->nat_sumd[0];
2892                 }
2893
2894         for (n = nat_list; (n != NULL); n = n->in_next)
2895                 if (n->in_ifp == ifp) {
2896                         n->in_ifp = (void *)GETUNIT(n->in_ifname, 4);
2897                         if (!n->in_ifp)
2898                                 n->in_ifp = (void *)-1;
2899                 }
2900         RWLOCK_EXIT(&ipf_nat);
2901         SPL_X(s);
2902 }
2903
2904
2905 #ifdef  IPFILTER_LOG
2906 void nat_log(nat, type)
2907 struct nat *nat;
2908 u_int type;
2909 {
2910         struct ipnat *np;
2911         struct natlog natl;
2912         void *items[1];
2913         size_t sizes[1];
2914         int rulen, types[1];
2915
2916         natl.nl_inip = nat->nat_inip;
2917         natl.nl_outip = nat->nat_outip;
2918         natl.nl_origip = nat->nat_oip;
2919         natl.nl_bytes = nat->nat_bytes;
2920         natl.nl_pkts = nat->nat_pkts;
2921         natl.nl_origport = nat->nat_oport;
2922         natl.nl_inport = nat->nat_inport;
2923         natl.nl_outport = nat->nat_outport;
2924         natl.nl_p = nat->nat_p;
2925         natl.nl_type = type;
2926         natl.nl_rule = -1;
2927 #ifndef LARGE_NAT
2928         if (nat->nat_ptr != NULL) {
2929                 for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++)
2930                         if (np == nat->nat_ptr) {
2931                                 natl.nl_rule = rulen;
2932                                 break;
2933                         }
2934         }
2935 #endif
2936         items[0] = &natl;
2937         sizes[0] = sizeof(natl);
2938         types[0] = 0;
2939
2940         (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1);
2941 }
2942 #endif
2943
2944
2945 #if defined(__OpenBSD__)
2946 void nat_ifdetach(ifp)
2947 void *ifp;
2948 {
2949         frsync();
2950         return;
2951 }
2952 #endif
2953
2954
2955 /*
2956  * Check for MSS option and clamp it if necessary.
2957  */
2958 static void nat_mssclamp(tcp, maxmss, fin, csump)
2959 tcphdr_t *tcp;
2960 u_32_t maxmss;
2961 fr_info_t *fin;
2962 u_short *csump;
2963 {
2964         u_char *cp, *ep, opt;
2965         int hlen, advance;
2966         u_32_t mss, sumd;
2967         u_short v;
2968
2969         hlen = tcp->th_off << 2;
2970         if (hlen > sizeof(*tcp)) {
2971                 cp = (u_char *)tcp + sizeof(*tcp);
2972                 ep = (u_char *)tcp + hlen;
2973
2974                 while (cp < ep) {
2975                         opt = cp[0];
2976                         if (opt == TCPOPT_EOL)
2977                                 break;
2978                         else if (opt == TCPOPT_NOP) {
2979                                 cp++;
2980                                 continue;
2981                         }
2982  
2983                         if (&cp[1] >= ep)
2984                                 break;
2985                         advance = cp[1];
2986                         if (&cp[advance] >= ep)
2987                                 break;
2988                         switch (opt) {
2989                         case TCPOPT_MAXSEG:
2990                                 if (advance != 4)
2991                                         break;
2992                                 bcopy(&cp[2], &v, sizeof(v));
2993                                 mss = ntohs(v);
2994                                 if (mss > maxmss) {
2995                                         v = htons(maxmss);
2996                                         bcopy(&v, &cp[2], sizeof(v));
2997                                         CALC_SUMD(mss, maxmss, sumd);
2998                                         fix_outcksum(fin, csump, sumd);
2999                                 }
3000                                 break;
3001                         default:
3002                                 /* ignore unknown options */
3003                                 break;
3004                         }
3005                     
3006                         cp += advance;  
3007                 }       
3008         }       
3009 }