Merge from vendor branch NTPD:
[dragonfly.git] / sys / contrib / ipfilter / netinet / ip_nat.c
1 /*
2  * Copyright (C) 1995-2001 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Added redirect stuff and a LOT of bug fixes. (mcn@EnGarde.com)
7  *
8  * @(#)ip_nat.c     1.11 6/5/96 (C) 1995 Darren Reed
9  * @(#)$Id: ip_nat.c,v 2.37.2.70 2002/08/28 12:45:48 darrenr Exp $
10  * $FreeBSD: src/sys/contrib/ipfilter/netinet/ip_nat.c,v 1.22.2.8 2004/07/04 09:24:39 darrenr Exp $
11  * $DragonFly: src/sys/contrib/ipfilter/netinet/ip_nat.c,v 1.8 2004/09/25 03:42:58 dillon Exp $
12  */
13 #if (defined(__DragonFly__) || defined(__FreeBSD__)) && defined(KERNEL) && !defined(_KERNEL)
14 #define _KERNEL
15 #endif
16
17 #if defined(__sgi) && (IRIX > 602)
18 # include <sys/ptimers.h>
19 #endif
20 #include <sys/errno.h>
21 #include <sys/types.h>
22 #include <sys/param.h>
23 #include <sys/time.h>
24 #include <sys/file.h>
25 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
26     defined(_KERNEL)
27 # include "opt_ipfilter_log.h"
28 #endif
29 #if !defined(_KERNEL) && !defined(KERNEL)
30 # include <stdio.h>
31 # include <string.h>
32 # include <stdlib.h>
33 #endif
34 #if (defined(KERNEL) || defined(_KERNEL)) && (defined(__DragonFly__) || __FreeBSD_version >= 220000)
35 # include <sys/filio.h>
36 # include <sys/fcntl.h>
37 #else
38 # include <sys/ioctl.h>
39 #endif
40 #include <sys/fcntl.h>
41 #ifndef linux
42 # include <sys/protosw.h>
43 #endif
44 #include <sys/socket.h>
45 #if defined(_KERNEL) && !defined(linux)
46 # include <sys/systm.h>
47 #endif
48 #if !defined(__SVR4) && !defined(__svr4__)
49 # ifndef linux
50 #  include <sys/mbuf.h>
51 # endif
52 #else
53 # include <sys/filio.h>
54 # include <sys/byteorder.h>
55 # ifdef _KERNEL
56 #  include <sys/dditypes.h>
57 # endif
58 # include <sys/stream.h>
59 # include <sys/kmem.h>
60 #endif
61 #if defined(__DragonFly__) || __FreeBSD_version >= 300000
62 # include <sys/queue.h>
63 #endif
64 #include <net/if.h>
65 #if defined(__DragonFly__) || __FreeBSD_version >= 300000
66 # include <net/if_var.h>
67 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
68 #  include "opt_ipfilter.h"
69 # endif
70 #endif
71 #ifdef sun
72 # include <net/af.h>
73 #endif
74 #include <net/route.h>
75 #include <netinet/in.h>
76 #include <netinet/in_systm.h>
77 #include <netinet/ip.h>
78
79 #ifdef __sgi
80 # ifdef IFF_DRVRLOCK /* IRIX6 */
81 #include <sys/hashing.h>
82 #include <netinet/in_var.h>
83 # endif
84 #endif
85
86 #ifdef RFC1825
87 # include <vpn/md5.h>
88 # include <vpn/ipsec.h>
89 extern struct ifnet vpnif;
90 #endif
91
92 #ifndef linux
93 # include <netinet/ip_var.h>
94 # include <netinet/tcp_fsm.h>
95 #endif
96 #include <netinet/tcp.h>
97 #include <netinet/udp.h>
98 #include <netinet/ip_icmp.h>
99 #include "ip_compat.h"
100 #include <netinet/tcpip.h>
101 #include "ip_fil.h"
102 #include "ip_nat.h"
103 #include "ip_frag.h"
104 #include "ip_state.h"
105 #include "ip_proxy.h"
106 #if defined(__DragonFly__) || __FreeBSD_version >= 300000
107 # include <sys/malloc.h>
108 #endif
109 #ifndef MIN
110 # define        MIN(a,b)        (((a)<(b))?(a):(b))
111 #endif
112 #undef  SOCKADDR_IN
113 #define SOCKADDR_IN     struct sockaddr_in
114
115 static const char sccsid[] = "@(#)ip_nat.c     1.11 6/5/96 (C) 1995 Darren Reed";
116
117 nat_t   **nat_table[2] = { NULL, NULL },
118         *nat_instances = NULL;
119 ipnat_t *nat_list = NULL;
120 u_int   ipf_nattable_max = NAT_TABLE_MAX;
121 u_int   ipf_nattable_sz = NAT_TABLE_SZ;
122 u_int   ipf_natrules_sz = NAT_SIZE;
123 u_int   ipf_rdrrules_sz = RDR_SIZE;
124 u_int   ipf_hostmap_sz = HOSTMAP_SIZE;
125 u_32_t  nat_masks = 0;
126 u_32_t  rdr_masks = 0;
127 ipnat_t **nat_rules = NULL;
128 ipnat_t **rdr_rules = NULL;
129 hostmap_t       **maptable  = NULL;
130
131 u_long  fr_defnatage = DEF_NAT_AGE,
132         fr_defnaticmpage = 6;           /* 3 seconds */
133 natstat_t nat_stats;
134 int     fr_nat_lock = 0;
135 #if     (SOLARIS || defined(__sgi)) && defined(_KERNEL)
136 extern  kmutex_t        ipf_rw;
137 extern  KRWLOCK_T       ipf_nat;
138 #endif
139
140 static  int     nat_flushtable (void);
141 static  void    nat_addnat (struct ipnat *);
142 static  void    nat_addrdr (struct ipnat *);
143 static  void    nat_delete (struct nat *);
144 static  void    nat_delrdr (struct ipnat *);
145 static  void    nat_delnat (struct ipnat *);
146 static  int     fr_natgetent (caddr_t);
147 static  int     fr_natgetsz (caddr_t);
148 static  int     fr_natputent (caddr_t);
149 static  void    nat_tabmove (fr_info_t *, nat_t *);
150 static  int     nat_match (fr_info_t *, ipnat_t *, ip_t *);
151 static  hostmap_t *nat_hostmap (ipnat_t *, struct in_addr,
152                                     struct in_addr);
153 static  void    nat_hostmapdel (struct hostmap *);
154 static  void    nat_mssclamp (tcphdr_t *, u_32_t, fr_info_t *, u_short *);
155
156
157 int nat_init()
158 {
159         KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
160         if (nat_table[0] != NULL)
161                 bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *));
162         else
163                 return -1;
164
165         KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
166         if (nat_table[1] != NULL)
167                 bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *));
168         else
169                 return -1;
170
171         KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz);
172         if (nat_rules != NULL)
173                 bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *));
174         else
175                 return -1;
176
177         KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz);
178         if (rdr_rules != NULL)
179                 bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *));
180         else
181                 return -1;
182
183         KMALLOCS(maptable, hostmap_t **, sizeof(hostmap_t *) * ipf_hostmap_sz);
184         if (maptable != NULL)
185                 bzero((char *)maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
186         else
187                 return -1;
188         return 0;
189 }
190
191
192 static void nat_addrdr(n)
193 ipnat_t *n;
194 {
195         ipnat_t **np;
196         u_32_t j;
197         u_int hv;
198         int k;
199
200         k = countbits(n->in_outmsk);
201         if ((k >= 0) && (k != 32))
202                 rdr_masks |= 1 << k;
203         j = (n->in_outip & n->in_outmsk);
204         hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz);
205         np = rdr_rules + hv;
206         while (*np != NULL)
207                 np = &(*np)->in_rnext;
208         n->in_rnext = NULL;
209         n->in_prnext = np;
210         *np = n;
211 }
212
213
214 static void nat_addnat(n)
215 ipnat_t *n;
216 {
217         ipnat_t **np;
218         u_32_t j;
219         u_int hv;
220         int k;
221
222         k = countbits(n->in_inmsk);
223         if ((k >= 0) && (k != 32))
224                 nat_masks |= 1 << k;
225         j = (n->in_inip & n->in_inmsk);
226         hv = NAT_HASH_FN(j, 0, ipf_natrules_sz);
227         np = nat_rules + hv;
228         while (*np != NULL)
229                 np = &(*np)->in_mnext;
230         n->in_mnext = NULL;
231         n->in_pmnext = np;
232         *np = n;
233 }
234
235
236 static void nat_delrdr(n)
237 ipnat_t *n;
238 {
239         if (n->in_rnext)
240                 n->in_rnext->in_prnext = n->in_prnext;
241         *n->in_prnext = n->in_rnext;
242 }
243
244
245 static void nat_delnat(n)
246 ipnat_t *n;
247 {
248         if (n->in_mnext)
249                 n->in_mnext->in_pmnext = n->in_pmnext;
250         *n->in_pmnext = n->in_mnext;
251 }
252
253
254 /*
255  * check if an ip address has already been allocated for a given mapping that
256  * is not doing port based translation.
257  *
258  * Must be called with ipf_nat held as a write lock.
259  */
260 static struct hostmap *nat_hostmap(np, real, map)
261 ipnat_t *np;
262 struct in_addr real;
263 struct in_addr map;
264 {
265         hostmap_t *hm;
266         u_int hv;
267
268         hv = real.s_addr % HOSTMAP_SIZE;
269         for (hm = maptable[hv]; hm; hm = hm->hm_next)
270                 if ((hm->hm_realip.s_addr == real.s_addr) &&
271                     (np == hm->hm_ipnat)) {
272                         hm->hm_ref++;
273                         return hm;
274                 }
275
276         KMALLOC(hm, hostmap_t *);
277         if (hm) {
278                 hm->hm_next = maptable[hv];
279                 hm->hm_pnext = maptable + hv;
280                 if (maptable[hv])
281                         maptable[hv]->hm_pnext = &hm->hm_next;
282                 maptable[hv] = hm;
283                 hm->hm_ipnat = np;
284                 hm->hm_realip = real;
285                 hm->hm_mapip = map;
286                 hm->hm_ref = 1;
287         }
288         return hm;
289 }
290
291
292 /*
293  * Must be called with ipf_nat held as a write lock.
294  */
295 static void nat_hostmapdel(hm)
296 struct hostmap *hm;
297 {
298         ATOMIC_DEC32(hm->hm_ref);
299         if (hm->hm_ref == 0) {
300                 if (hm->hm_next)
301                         hm->hm_next->hm_pnext = hm->hm_pnext;
302                 *hm->hm_pnext = hm->hm_next;
303                 KFREE(hm);
304         }
305 }
306
307
308 void fix_outcksum(fin, sp, n)
309 fr_info_t *fin;
310 u_short *sp;
311 u_32_t n;
312 {
313         u_short sumshort;
314         u_32_t sum1;
315
316         if (!n)
317                 return;
318         else if (n & NAT_HW_CKSUM) {
319                 n &= 0xffff;
320                 n += fin->fin_dlen;
321                 n = (n & 0xffff) + (n >> 16);
322                 *sp = n & 0xffff;
323                 return;
324         }
325         sum1 = (~ntohs(*sp)) & 0xffff;
326         sum1 += (n);
327         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
328         /* Again */
329         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
330         sumshort = ~(u_short)sum1;
331         *(sp) = htons(sumshort);
332 }
333
334
335 void fix_incksum(fin, sp, n)
336 fr_info_t *fin;
337 u_short *sp;
338 u_32_t n;
339 {
340         u_short sumshort;
341         u_32_t sum1;
342
343         if (!n)
344                 return;
345         else if (n & NAT_HW_CKSUM) {
346                 n &= 0xffff;
347                 n += fin->fin_dlen;
348                 n = (n & 0xffff) + (n >> 16);
349                 *sp = n & 0xffff;
350                 return;
351         }
352 #ifdef sparc
353         sum1 = (~(*sp)) & 0xffff;
354 #else
355         sum1 = (~ntohs(*sp)) & 0xffff;
356 #endif
357         sum1 += ~(n) & 0xffff;
358         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
359         /* Again */
360         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
361         sumshort = ~(u_short)sum1;
362         *(sp) = htons(sumshort);
363 }
364
365
366 /*
367  * fix_datacksum is used *only* for the adjustments of checksums in the data
368  * section of an IP packet.
369  *
370  * The only situation in which you need to do this is when NAT'ing an 
371  * ICMP error message. Such a message, contains in its body the IP header
372  * of the original IP packet, that causes the error.
373  *
374  * You can't use fix_incksum or fix_outcksum in that case, because for the
375  * kernel the data section of the ICMP error is just data, and no special 
376  * processing like hardware cksum or ntohs processing have been done by the 
377  * kernel on the data section.
378  */
379 void fix_datacksum(sp, n)
380 u_short *sp;
381 u_32_t n;
382 {
383         u_short sumshort;
384          u_32_t sum1;
385
386         if (!n)
387                 return;
388
389         sum1 = (~ntohs(*sp)) & 0xffff;
390         sum1 += (n);
391         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
392         /* Again */
393         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
394         sumshort = ~(u_short)sum1;
395         *(sp) = htons(sumshort);
396 }
397
398 /*
399  * How the NAT is organised and works.
400  *
401  * Inside (interface y) NAT       Outside (interface x)
402  * -------------------- -+- -------------------------------------
403  * Packet going          |   out, processsed by ip_natout() for x
404  * ------------>         |   ------------>
405  * src=10.1.1.1          |   src=192.1.1.1
406  *                       |
407  *                       |   in, processed by ip_natin() for x
408  * <------------         |   <------------
409  * dst=10.1.1.1          |   dst=192.1.1.1
410  * -------------------- -+- -------------------------------------
411  * ip_natout() - changes ip_src and if required, sport
412  *             - creates a new mapping, if required.
413  * ip_natin()  - changes ip_dst and if required, dport
414  *
415  * In the NAT table, internal source is recorded as "in" and externally
416  * seen as "out".
417  */
418
419 /*
420  * Handle ioctls which manipulate the NAT.
421  */
422 int nat_ioctl(data, cmd, mode)
423 #if defined(__DragonFly__) || defined(__NetBSD__) || defined(__OpenBSD__) || (__FreeBSD_version >= 300003)
424 u_long cmd;
425 #else
426 int cmd;
427 #endif
428 caddr_t data;
429 int mode;
430 {
431         ipnat_t *nat, *nt, *n = NULL, **np = NULL;
432         int error = 0, ret, arg, getlock;
433         ipnat_t natd;
434         u_32_t i, j;
435
436 #if (BSD >= 199306) && defined(_KERNEL)
437         if ((securelevel >= 3) && (mode & FWRITE))
438                 return EPERM;
439 #endif
440
441         nat = NULL;     /* XXX gcc -Wuninitialized */
442         KMALLOC(nt, ipnat_t *);
443         getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
444         if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT)) {
445                 if (mode & NAT_SYSSPACE) {
446                         bcopy(data, (char *)&natd, sizeof(natd));
447                         error = 0;
448                 } else {
449                         error = IRCOPYPTR(data, (char *)&natd, sizeof(natd));
450                 }
451         } else if (cmd == SIOCIPFFL) {  /* SIOCFLNAT & SIOCCNATL */
452                 error = IRCOPY(data, (char *)&arg, sizeof(arg));
453                 if (error)
454                         error = EFAULT;
455         }
456
457         if (error)
458                 goto done;
459
460         /*
461          * For add/delete, look to see if the NAT entry is already present
462          */
463         if (getlock == 1) {
464                 WRITE_ENTER(&ipf_nat);
465         }
466         if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT)) {
467                 nat = &natd;
468                 nat->in_flags &= IPN_USERFLAGS;
469                 if ((nat->in_redir & NAT_MAPBLK) == 0) {
470                         if ((nat->in_flags & IPN_SPLIT) == 0)
471                                 nat->in_inip &= nat->in_inmsk;
472                         if ((nat->in_flags & IPN_IPRANGE) == 0)
473                                 nat->in_outip &= nat->in_outmsk;
474                 }
475                 for (np = &nat_list; (n = *np); np = &n->in_next)
476                         if (!bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
477                                         IPN_CMPSIZ)) {
478                                 if (n->in_redir == NAT_REDIRECT &&
479                                     n->in_pnext != nat->in_pnext)
480                                         continue;
481                                 break;
482                         }
483         }
484
485         switch (cmd)
486         {
487 #ifdef  IPFILTER_LOG
488         case SIOCIPFFB :
489         {
490                 int tmp;
491
492                 if (!(mode & FWRITE))
493                         error = EPERM;
494                 else {
495                         tmp = ipflog_clear(IPL_LOGNAT);
496                         IWCOPY((char *)&tmp, (char *)data, sizeof(tmp));
497                 }
498                 break;
499         }
500 #endif
501         case SIOCADNAT :
502                 if (!(mode & FWRITE)) {
503                         error = EPERM;
504                         break;
505                 }
506                 if (n) {
507                         error = EEXIST;
508                         break;
509                 }
510                 if (nt == NULL) {
511                         error = ENOMEM;
512                         break;
513                 }
514                 n = nt;
515                 nt = NULL;
516                 bcopy((char *)nat, (char *)n, sizeof(*n));
517                 n->in_ifp = (void *)GETUNIT(n->in_ifname, 4);
518                 if (!n->in_ifp)
519                         n->in_ifp = (void *)-1;
520                 if (n->in_plabel[0] != '\0') {
521                         n->in_apr = appr_lookup(n->in_p, n->in_plabel);
522                         if (!n->in_apr) {
523                                 error = ENOENT;
524                                 break;
525                         }
526                 }
527                 n->in_next = NULL;
528                 *np = n;
529
530                 if (n->in_redir & NAT_REDIRECT) {
531                         n->in_flags &= ~IPN_NOTDST;
532                         nat_addrdr(n);
533                 }
534                 if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
535                         n->in_flags &= ~IPN_NOTSRC;
536                         nat_addnat(n);
537                 }
538
539                 n->in_use = 0;
540                 if (n->in_redir & NAT_MAPBLK)
541                         n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
542                 else if (n->in_flags & IPN_AUTOPORTMAP)
543                         n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
544                 else if (n->in_flags & IPN_IPRANGE)
545                         n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
546                 else if (n->in_flags & IPN_SPLIT)
547                         n->in_space = 2;
548                 else
549                         n->in_space = ~ntohl(n->in_outmsk);
550                 /*
551                  * Calculate the number of valid IP addresses in the output
552                  * mapping range.  In all cases, the range is inclusive of
553                  * the start and ending IP addresses.
554                  * If to a CIDR address, lose 2: broadcast + network address
555                  *                               (so subtract 1)
556                  * If to a range, add one.
557                  * If to a single IP address, set to 1.
558                  */
559                 if (n->in_space) {
560                         if ((n->in_flags & IPN_IPRANGE) != 0)
561                                 n->in_space += 1;
562                         else
563                                 n->in_space -= 1;
564                 } else
565                         n->in_space = 1;
566                 if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
567                     ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
568                         n->in_nip = ntohl(n->in_outip) + 1;
569                 else if ((n->in_flags & IPN_SPLIT) &&
570                          (n->in_redir & NAT_REDIRECT))
571                         n->in_nip = ntohl(n->in_inip);
572                 else
573                         n->in_nip = ntohl(n->in_outip);
574                 if (n->in_redir & NAT_MAP) {
575                         n->in_pnext = ntohs(n->in_pmin);
576                         /*
577                          * Multiply by the number of ports made available.
578                          */
579                         if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
580                                 n->in_space *= (ntohs(n->in_pmax) -
581                                                 ntohs(n->in_pmin) + 1);
582                                 /*
583                                  * Because two different sources can map to
584                                  * different destinations but use the same
585                                  * local IP#/port #.
586                                  * If the result is smaller than in_space, then
587                                  * we may have wrapped around 32bits.
588                                  */
589                                 i = n->in_inmsk;
590                                 if ((i != 0) && (i != 0xffffffff)) {
591                                         j = n->in_space * (~ntohl(i) + 1);
592                                         if (j >= n->in_space)
593                                                 n->in_space = j;
594                                         else
595                                                 n->in_space = 0xffffffff;
596                                 }
597                         }
598                         /*
599                          * If no protocol is specified, multiple by 256.
600                          */
601                         if ((n->in_flags & IPN_TCPUDP) == 0) {
602                                         j = n->in_space * 256;
603                                         if (j >= n->in_space)
604                                                 n->in_space = j;
605                                         else
606                                                 n->in_space = 0xffffffff;
607                         }
608                 }
609                 /* Otherwise, these fields are preset */
610                 n = NULL;
611                 nat_stats.ns_rules++;
612                 break;
613         case SIOCRMNAT :
614                 if (!(mode & FWRITE)) {
615                         error = EPERM;
616                         n = NULL;
617                         break;
618                 }
619                 if (!n) {
620                         error = ESRCH;
621                         break;
622                 }
623                 if (n->in_redir & NAT_REDIRECT)
624                         nat_delrdr(n);
625                 if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
626                         nat_delnat(n);
627                 if (nat_list == NULL) {
628                         nat_masks = 0;
629                         rdr_masks = 0;
630                 }
631                 *np = n->in_next;
632                 if (!n->in_use) {
633                         if (n->in_apr)
634                                 appr_free(n->in_apr);
635                         KFREE(n);
636                         nat_stats.ns_rules--;
637                 } else {
638                         n->in_flags |= IPN_DELETE;
639                         n->in_next = NULL;
640                 }
641                 n = NULL;
642                 break;
643         case SIOCGNATS :
644                 MUTEX_DOWNGRADE(&ipf_nat);
645                 nat_stats.ns_table[0] = nat_table[0];
646                 nat_stats.ns_table[1] = nat_table[1];
647                 nat_stats.ns_list = nat_list;
648                 nat_stats.ns_maptable = maptable;
649                 nat_stats.ns_nattab_sz = ipf_nattable_sz;
650                 nat_stats.ns_rultab_sz = ipf_natrules_sz;
651                 nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz;
652                 nat_stats.ns_hostmap_sz = ipf_hostmap_sz;
653                 nat_stats.ns_instances = nat_instances;
654                 nat_stats.ns_apslist = ap_sess_list;
655                 error = IWCOPYPTR((char *)&nat_stats, (char *)data,
656                                   sizeof(nat_stats));
657                 break;
658         case SIOCGNATL :
659             {
660                 natlookup_t nl;
661
662                 MUTEX_DOWNGRADE(&ipf_nat);
663                 error = IRCOPYPTR((char *)data, (char *)&nl, sizeof(nl));
664                 if (error)
665                         break;
666
667                 if (nat_lookupredir(&nl)) {
668                         error = IWCOPYPTR((char *)&nl, (char *)data,
669                                           sizeof(nl));
670                 } else
671                         error = ESRCH;
672                 break;
673             }
674         case SIOCIPFFL :        /* old SIOCFLNAT & SIOCCNATL */
675                 if (!(mode & FWRITE)) {
676                         error = EPERM;
677                         break;
678                 }
679                 error = 0;
680                 if (arg == 0)
681                         ret = nat_flushtable();
682                 else if (arg == 1)
683                         ret = nat_clearlist();
684                 else
685                         error = EINVAL;
686                 MUTEX_DOWNGRADE(&ipf_nat);
687                 if (!error) {
688                         error = IWCOPY((caddr_t)&ret, data, sizeof(ret));
689                         if (error)
690                                 error = EFAULT;
691                 }
692                 break;
693         case SIOCSTLCK :
694                 error = IRCOPY(data, (caddr_t)&arg, sizeof(arg));
695                 if (!error) {
696                         error = IWCOPY((caddr_t)&fr_nat_lock, data,
697                                         sizeof(fr_nat_lock));
698                         if (!error)
699                                 fr_nat_lock = arg;
700                 } else
701                         error = EFAULT;
702                 break;
703         case SIOCSTPUT :
704                 if (fr_nat_lock)
705                         error = fr_natputent(data);
706                 else
707                         error = EACCES;
708                 break;
709         case SIOCSTGSZ :
710                 if (fr_nat_lock)
711                         error = fr_natgetsz(data);
712                 else
713                         error = EACCES;
714                 break;
715         case SIOCSTGET :
716                 if (fr_nat_lock)
717                         error = fr_natgetent(data);
718                 else
719                         error = EACCES;
720                 break;
721         case FIONREAD :
722 #ifdef  IPFILTER_LOG
723                 arg = (int)iplused[IPL_LOGNAT];
724                 MUTEX_DOWNGRADE(&ipf_nat);
725                 error = IWCOPY((caddr_t)&arg, (caddr_t)data, sizeof(arg));
726                 if (error)
727                         error = EFAULT;
728 #endif
729                 break;
730         default :
731                 error = EINVAL;
732                 break;
733         }
734         if (getlock == 1) {
735                 RWLOCK_EXIT(&ipf_nat);                  /* READ/WRITE */
736         }
737 done:
738         if (nt)
739                 KFREE(nt);
740         return error;
741 }
742
743
744 static int fr_natgetsz(data)
745 caddr_t data;
746 {
747         ap_session_t *aps;
748         nat_t *nat, *n;
749         int error = 0;
750         natget_t ng;
751
752         error = IRCOPY(data, (caddr_t)&ng, sizeof(ng));
753         if (error)
754                 return EFAULT;
755
756         nat = ng.ng_ptr;
757         if (!nat) {
758                 nat = nat_instances;
759                 ng.ng_sz = 0;
760                 if (nat == NULL) {
761                         error = IWCOPY((caddr_t)&ng, data, sizeof(ng));
762                         if (error)
763                                 error = EFAULT;
764                         return error;
765                 }
766         } else {
767                 /*
768                  * Make sure the pointer we're copying from exists in the
769                  * current list of entries.  Security precaution to prevent
770                  * copying of random kernel data.
771                  */
772                 for (n = nat_instances; n; n = n->nat_next)
773                         if (n == nat)
774                                 break;
775                 if (!n)
776                         return ESRCH;
777         }
778
779         ng.ng_sz = sizeof(nat_save_t);
780         aps = nat->nat_aps;
781         if ((aps != NULL) && (aps->aps_data != 0)) {
782                 ng.ng_sz += sizeof(ap_session_t);
783                 ng.ng_sz += aps->aps_psiz;
784                 if (aps->aps_psiz > 4)  /* XXX - sizeof(ipn_data) */
785                         ng.ng_sz -= 4;
786         }
787
788         error = IWCOPY((caddr_t)&ng, data, sizeof(ng));
789         if (error)
790                 error = EFAULT;
791         return error;
792 }
793
794
795 static int fr_natgetent(data)
796 caddr_t data;
797 {
798         nat_save_t ipn, *ipnp, *ipnn = NULL;
799         nat_t *n, *nat;
800         ap_session_t *aps;
801         size_t dsz;
802         int error;
803
804         error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp));
805         if (error)
806                 return EFAULT;
807         error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn));
808         if (error)
809                 return EFAULT;
810
811         nat = ipn.ipn_next;
812         if (!nat) {
813                 nat = nat_instances;
814                 if (nat == NULL) {
815                         if (nat_instances == NULL)
816                                 return ENOENT;
817                         return 0;
818                 }
819         } else {
820                 /*
821                  * Make sure the pointer we're copying from exists in the
822                  * current list of entries.  Security precaution to prevent
823                  * copying of random kernel data.
824                  */
825                 for (n = nat_instances; n; n = n->nat_next)
826                         if (n == nat)
827                                 break;
828                 if (!n)
829                         return ESRCH;
830         }
831
832         ipn.ipn_next = nat->nat_next;
833         bcopy((char *)nat, (char *)&ipn.ipn_nat, sizeof(ipn.ipn_nat));
834         ipn.ipn_nat.nat_data = NULL;
835
836         if (nat->nat_ptr) {
837                 bcopy((char *)nat->nat_ptr, (char *)&ipn.ipn_ipnat,
838                       sizeof(ipn.ipn_ipnat));
839         }
840
841         if (nat->nat_fr)
842                 bcopy((char *)nat->nat_fr, (char *)&ipn.ipn_rule,
843                       sizeof(ipn.ipn_rule));
844
845         if ((aps = nat->nat_aps)) {
846                 dsz = sizeof(*aps);
847                 if (aps->aps_data)
848                         dsz += aps->aps_psiz;
849                 ipn.ipn_dsize = dsz;
850                 if (dsz > sizeof(ipn.ipn_data))
851                         dsz -= sizeof(ipn.ipn_data);
852                 KMALLOCS(ipnn, nat_save_t *, sizeof(*ipnn) + dsz);
853                 if (ipnn == NULL)
854                         return ENOMEM;
855                 bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn));
856
857                 bcopy((char *)aps, (char *)ipnn->ipn_data, sizeof(*aps));
858                 if (aps->aps_data) {
859                         bcopy(aps->aps_data, ipnn->ipn_data + sizeof(*aps),
860                               aps->aps_psiz);
861                 }
862                 error = IWCOPY((caddr_t)ipnn, ipnp,
863                                sizeof(ipn) + dsz);
864                 if (error)
865                         error = EFAULT;
866                 KFREES(ipnn, sizeof(*ipnn) + dsz);
867         } else {
868                 ipn.ipn_dsize = 0;
869                 error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn));
870                 if (error)
871                         error = EFAULT;
872         }
873         return error;
874 }
875
876
877 static int fr_natputent(data)
878 caddr_t data;
879 {
880         nat_save_t ipn, *ipnp, *ipnn = NULL;
881         nat_t *n, *nat;
882         ap_session_t *aps;
883         frentry_t *fr;
884         ipnat_t *in;
885
886         int error;
887
888         error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp));
889         if (error)
890                 return EFAULT;
891         error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn));
892         if (error)
893                 return EFAULT;
894         nat = NULL;
895         if (ipn.ipn_dsize) {
896                 KMALLOCS(ipnn, nat_save_t *, sizeof(*ipnn) + ipn.ipn_dsize);
897                 if (ipnn == NULL)
898                         return ENOMEM;
899                 bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn));
900                 error = IRCOPY((caddr_t)ipnp + offsetof(nat_save_t, ipn_data),
901                                (caddr_t)ipnn->ipn_data, ipn.ipn_dsize);
902                 if (error) {
903                         error = EFAULT;
904                         goto junkput;
905                 }
906         } else
907                 ipnn = NULL;
908
909         KMALLOC(nat, nat_t *);
910         if (nat == NULL) {
911                 error = EFAULT;
912                 goto junkput;
913         }
914
915         bcopy((char *)&ipn.ipn_nat, (char *)nat, sizeof(*nat));
916         /*
917          * Initialize all these so that nat_delete() doesn't cause a crash.
918          */
919         nat->nat_phnext[0] = NULL;
920         nat->nat_phnext[1] = NULL;
921         fr = nat->nat_fr;
922         nat->nat_fr = NULL;
923         aps = nat->nat_aps;
924         nat->nat_aps = NULL;
925         in = nat->nat_ptr;
926         nat->nat_ptr = NULL;
927         nat->nat_hm = NULL;
928         nat->nat_data = NULL;
929         nat->nat_ifp = GETUNIT(nat->nat_ifname, 4);
930
931         /*
932          * Restore the rule associated with this nat session
933          */
934         if (in) {
935                 KMALLOC(in, ipnat_t *);
936                 if (in == NULL) {
937                         error = ENOMEM;
938                         goto junkput;
939                 }
940                 nat->nat_ptr = in;
941                 bcopy((char *)&ipn.ipn_ipnat, (char *)in, sizeof(*in));
942                 in->in_use = 1;
943                 in->in_flags |= IPN_DELETE;
944                 in->in_next = NULL;
945                 in->in_rnext = NULL;
946                 in->in_prnext = NULL;
947                 in->in_mnext = NULL;
948                 in->in_pmnext = NULL;
949                 in->in_ifp = GETUNIT(in->in_ifname, 4);
950                 if (in->in_plabel[0] != '\0') {
951                         in->in_apr = appr_lookup(in->in_p, in->in_plabel);
952                 }
953         }
954
955         /*
956          * Restore ap_session_t structure.  Include the private data allocated
957          * if it was there.
958          */
959         if (aps) {
960                 KMALLOC(aps, ap_session_t *);
961                 if (aps == NULL) {
962                         error = ENOMEM;
963                         goto junkput;
964                 }
965                 nat->nat_aps = aps;
966                 aps->aps_next = ap_sess_list;
967                 ap_sess_list = aps;
968                 bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
969                 if (in)
970                         aps->aps_apr = in->in_apr;
971                 if (aps->aps_psiz) {
972                         KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
973                         if (aps->aps_data == NULL) {
974                                 error = ENOMEM;
975                                 goto junkput;
976                         }
977                         bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
978                               aps->aps_psiz);
979                 } else {
980                         aps->aps_psiz = 0;
981                         aps->aps_data = NULL;
982                 }
983         }
984
985         /*
986          * If there was a filtering rule associated with this entry then
987          * build up a new one.
988          */
989         if (fr != NULL) {
990                 if (nat->nat_flags & FI_NEWFR) {
991                         KMALLOC(fr, frentry_t *);
992                         nat->nat_fr = fr;
993                         if (fr == NULL) {
994                                 error = ENOMEM;
995                                 goto junkput;
996                         }
997                         bcopy((char *)&ipn.ipn_fr, (char *)fr, sizeof(*fr));
998                         ipn.ipn_nat.nat_fr = fr;
999                         error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn));
1000                         if (error) {
1001                                 error = EFAULT;
1002                                 goto junkput;
1003                         }
1004                 } else {
1005                         for (n = nat_instances; n; n = n->nat_next)
1006                                 if (n->nat_fr == fr)
1007                                         break;
1008                         if (!n) {
1009                                 error = ESRCH;
1010                                 goto junkput;
1011                         }
1012                 }
1013         }
1014
1015         if (ipnn)
1016                 KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize);
1017         nat_insert(nat);
1018         return 0;
1019 junkput:
1020         if (ipnn)
1021                 KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize);
1022         if (nat)
1023                 nat_delete(nat);
1024         return error;
1025 }
1026
1027
1028 /*
1029  * Delete a nat entry from the various lists and table.
1030  */
1031 static void nat_delete(natd)
1032 struct nat *natd;
1033 {
1034         struct ipnat *ipn;
1035
1036         if (natd->nat_flags & FI_WILDP)
1037                 nat_stats.ns_wilds--;
1038         if (natd->nat_hnext[0])
1039                 natd->nat_hnext[0]->nat_phnext[0] = natd->nat_phnext[0];
1040         *natd->nat_phnext[0] = natd->nat_hnext[0];
1041         if (natd->nat_hnext[1])
1042                 natd->nat_hnext[1]->nat_phnext[1] = natd->nat_phnext[1];
1043         *natd->nat_phnext[1] = natd->nat_hnext[1];
1044         if (natd->nat_me != NULL)
1045                 *natd->nat_me = NULL;
1046
1047         if (natd->nat_fr != NULL) {
1048                 ATOMIC_DEC32(natd->nat_fr->fr_ref);
1049         }
1050
1051         if (natd->nat_hm != NULL)
1052                 nat_hostmapdel(natd->nat_hm);
1053
1054         /*
1055          * If there is an active reference from the nat entry to its parent
1056          * rule, decrement the rule's reference count and free it too if no
1057          * longer being used.
1058          */
1059         ipn = natd->nat_ptr;
1060         if (ipn != NULL) {
1061                 ipn->in_space++;
1062                 ipn->in_use--;
1063                 if (!ipn->in_use && (ipn->in_flags & IPN_DELETE)) {
1064                         if (ipn->in_apr)
1065                                 appr_free(ipn->in_apr);
1066                         KFREE(ipn);
1067                         nat_stats.ns_rules--;
1068                 }
1069         }
1070
1071         MUTEX_DESTROY(&natd->nat_lock);
1072         /*
1073          * If there's a fragment table entry too for this nat entry, then
1074          * dereference that as well.
1075          */
1076         ipfr_forgetnat((void *)natd);
1077         aps_free(natd->nat_aps);
1078         nat_stats.ns_inuse--;
1079         KFREE(natd);
1080 }
1081
1082
1083 /*
1084  * nat_flushtable - clear the NAT table of all mapping entries.
1085  * (this is for the dynamic mappings)
1086  */
1087 static int nat_flushtable()
1088 {
1089         nat_t *nat, **natp;
1090         int j = 0;
1091
1092         /*
1093          * ALL NAT mappings deleted, so lets just make the deletions
1094          * quicker.
1095          */
1096         if (nat_table[0] != NULL)
1097                 bzero((char *)nat_table[0],
1098                       sizeof(nat_table[0]) * ipf_nattable_sz);
1099         if (nat_table[1] != NULL)
1100                 bzero((char *)nat_table[1],
1101                       sizeof(nat_table[1]) * ipf_nattable_sz);
1102
1103         for (natp = &nat_instances; (nat = *natp); ) {
1104                 *natp = nat->nat_next;
1105 #ifdef  IPFILTER_LOG
1106                 nat_log(nat, NL_FLUSH);
1107 #endif
1108                 nat_delete(nat);
1109                 j++;
1110         }
1111         nat_stats.ns_inuse = 0;
1112         return j;
1113 }
1114
1115
1116 /*
1117  * nat_clearlist - delete all rules in the active NAT mapping list.
1118  * (this is for NAT/RDR rules)
1119  */
1120 int nat_clearlist()
1121 {
1122         ipnat_t *n, **np = &nat_list;
1123         int i = 0;
1124
1125         if (nat_rules != NULL)
1126                 bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz);
1127         if (rdr_rules != NULL)
1128                 bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz);
1129
1130         while ((n = *np)) {
1131                 *np = n->in_next;
1132                 if (!n->in_use) {
1133                         if (n->in_apr)
1134                                 appr_free(n->in_apr);
1135                         KFREE(n);
1136                         nat_stats.ns_rules--;
1137                 } else {
1138                         n->in_flags |= IPN_DELETE;
1139                         n->in_next = NULL;
1140                 }
1141                 i++;
1142         }
1143         nat_masks = 0;
1144         rdr_masks = 0;
1145         return i;
1146 }
1147
1148
1149 /*
1150  * Create a new NAT table entry.
1151  * NOTE: Assumes write lock on ipf_nat has been obtained already.
1152  *       If you intend on changing this, beware: appr_new() may call nat_new()
1153  *       recursively!
1154  */
1155 nat_t *nat_new(fin, ip, np, natsave, flags, direction)
1156 fr_info_t *fin;
1157 ip_t *ip;
1158 ipnat_t *np;
1159 nat_t **natsave;
1160 u_int flags;
1161 int direction;
1162 {
1163         u_32_t sum1, sum2, sumd, l;
1164         u_short port = 0, sport = 0, dport = 0, nport = 0;
1165         struct in_addr in, inb;
1166         u_short nflags, sp, dp;
1167         tcphdr_t *tcp = NULL;
1168         hostmap_t *hm = NULL;
1169         nat_t *nat, *natl;
1170 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
1171         qif_t *qf = fin->fin_qif;
1172 #endif
1173
1174         if (nat_stats.ns_inuse >= ipf_nattable_max) {
1175                 nat_stats.ns_memfail++;
1176                 return NULL;
1177         }
1178
1179         nflags = flags & np->in_flags;
1180         if (flags & IPN_TCPUDP) {
1181                 tcp = (tcphdr_t *)fin->fin_dp;
1182                 sport = htons(fin->fin_data[0]);
1183                 dport = htons(fin->fin_data[1]);
1184         }
1185
1186         /* Give me a new nat */
1187         KMALLOC(nat, nat_t *);
1188         if (nat == NULL) {
1189                 nat_stats.ns_memfail++;
1190                 /*
1191                  * Try to automatically tune the max # of entries in the
1192                  * table allowed to be less than what will cause kmem_alloc()
1193                  * to fail and try to eliminate panics due to out of memory
1194                  * conditions arising.
1195                  */
1196                 if (ipf_nattable_max > ipf_nattable_sz) {
1197                         ipf_nattable_max = nat_stats.ns_inuse - 100;
1198                         printf("ipf_nattable_max reduced to %d\n",
1199                                 ipf_nattable_max);
1200                 }
1201                 return NULL;
1202         }
1203
1204         bzero((char *)nat, sizeof(*nat));
1205         nat->nat_tcpstate[0] = TCPS_CLOSED;
1206         nat->nat_tcpstate[1] = TCPS_CLOSED;
1207         nat->nat_flags = flags;
1208         if (flags & FI_WILDP)
1209                 nat_stats.ns_wilds++;
1210         /*
1211          * Search the current table for a match.
1212          */
1213         if (direction == NAT_OUTBOUND) {
1214                 /*
1215                  * Values at which the search for a free resouce starts.
1216                  */
1217                 u_32_t st_ip;
1218                 u_short st_port;
1219
1220                 /*
1221                  * If it's an outbound packet which doesn't match any existing
1222                  * record, then create a new port
1223                  */
1224                 l = 0;
1225                 st_ip = np->in_nip;
1226                 st_port = np->in_pnext;
1227
1228                 do {
1229                         port = 0;
1230                         in.s_addr = htonl(np->in_nip);
1231                         if (l == 0) {
1232                                 /*
1233                                  * Check to see if there is an existing NAT
1234                                  * setup for this IP address pair.
1235                                  */
1236                                 hm = nat_hostmap(np, fin->fin_src, in);
1237                                 if (hm != NULL)
1238                                         in.s_addr = hm->hm_mapip.s_addr;
1239                         } else if ((l == 1) && (hm != NULL)) {
1240                                 nat_hostmapdel(hm);
1241                                 hm = NULL;
1242                         }
1243                         in.s_addr = ntohl(in.s_addr);
1244
1245                         nat->nat_hm = hm;
1246
1247                         if ((np->in_outmsk == 0xffffffff) &&
1248                             (np->in_pnext == 0)) {
1249                                 if (l > 0)
1250                                         goto badnat;
1251                         }
1252
1253                         if (np->in_redir & NAT_MAPBLK) {
1254                                 if ((l >= np->in_ppip) || ((l > 0) &&
1255                                      !(flags & IPN_TCPUDP)))
1256                                         goto badnat;
1257                                 /*
1258                                  * map-block - Calculate destination address.
1259                                  */
1260                                 in.s_addr = ntohl(fin->fin_saddr);
1261                                 in.s_addr &= ntohl(~np->in_inmsk);
1262                                 inb.s_addr = in.s_addr;
1263                                 in.s_addr /= np->in_ippip;
1264                                 in.s_addr &= ntohl(~np->in_outmsk);
1265                                 in.s_addr += ntohl(np->in_outip);
1266                                 /*
1267                                  * Calculate destination port.
1268                                  */
1269                                 if ((flags & IPN_TCPUDP) &&
1270                                     (np->in_ppip != 0)) {
1271                                         port = ntohs(sport) + l;
1272                                         port %= np->in_ppip;
1273                                         port += np->in_ppip *
1274                                                 (inb.s_addr % np->in_ippip);
1275                                         port += MAPBLK_MINPORT;
1276                                         port = htons(port);
1277                                 }
1278                         } else if (!np->in_outip &&
1279                                    (np->in_outmsk == 0xffffffff)) {
1280                                 /*
1281                                  * 0/32 - use the interface's IP address.
1282                                  */
1283                                 if ((l > 0) ||
1284                                     fr_ifpaddr(4, fin->fin_ifp, &in) == -1)
1285                                         goto badnat;
1286                                 in.s_addr = ntohl(in.s_addr);
1287                         } else if (!np->in_outip && !np->in_outmsk) {
1288                                 /*
1289                                  * 0/0 - use the original source address/port.
1290                                  */
1291                                 if (l > 0)
1292                                         goto badnat;
1293                                 in.s_addr = ntohl(fin->fin_saddr);
1294                         } else if ((np->in_outmsk != 0xffffffff) &&
1295                                    (np->in_pnext == 0) &&
1296                                    ((l > 0) || (hm == NULL)))
1297                                 np->in_nip++;
1298                         natl = NULL;
1299
1300                         if ((nflags & IPN_TCPUDP) &&
1301                             ((np->in_redir & NAT_MAPBLK) == 0) &&
1302                             (np->in_flags & IPN_AUTOPORTMAP)) {
1303                                 if ((l > 0) && (l % np->in_ppip == 0)) {
1304                                         if (l > np->in_space) {
1305                                                 goto badnat;
1306                                         } else if ((l > np->in_ppip) &&
1307                                                    np->in_outmsk != 0xffffffff)
1308                                                 np->in_nip++;
1309                                 }
1310                                 if (np->in_ppip != 0) {
1311                                         port = ntohs(sport);
1312                                         port += (l % np->in_ppip);
1313                                         port %= np->in_ppip;
1314                                         port += np->in_ppip *
1315                                                 (ntohl(fin->fin_saddr) %
1316                                                  np->in_ippip);
1317                                         port += MAPBLK_MINPORT;
1318                                         port = htons(port);
1319                                 }
1320                         } else if (((np->in_redir & NAT_MAPBLK) == 0) &&
1321                                    (nflags & IPN_TCPUDP) &&
1322                                    (np->in_pnext != 0)) {
1323                                 port = htons(np->in_pnext++);
1324                                 if (np->in_pnext > ntohs(np->in_pmax)) {
1325                                         np->in_pnext = ntohs(np->in_pmin);
1326                                         if (np->in_outmsk != 0xffffffff)
1327                                                 np->in_nip++;
1328                                 }
1329                         }
1330
1331                         if (np->in_flags & IPN_IPRANGE) {
1332                                 if (np->in_nip > ntohl(np->in_outmsk))
1333                                         np->in_nip = ntohl(np->in_outip);
1334                         } else {
1335                                 if ((np->in_outmsk != 0xffffffff) &&
1336                                     ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
1337                                     ntohl(np->in_outip))
1338                                         np->in_nip = ntohl(np->in_outip) + 1;
1339                         }
1340
1341                         if (!port && (flags & IPN_TCPUDP))
1342                                 port = sport;
1343
1344                         /*
1345                          * Here we do a lookup of the connection as seen from
1346                          * the outside.  If an IP# pair already exists, try
1347                          * again.  So if you have A->B becomes C->B, you can
1348                          * also have D->E become C->E but not D->B causing
1349                          * another C->B.  Also take protocol and ports into
1350                          * account when determining whether a pre-existing
1351                          * NAT setup will cause an external conflict where
1352                          * this is appropriate.
1353                          */
1354                         inb.s_addr = htonl(in.s_addr);
1355                         sp = fin->fin_data[0];
1356                         dp = fin->fin_data[1];
1357                         fin->fin_data[0] = fin->fin_data[1];
1358                         fin->fin_data[1] = htons(port);
1359                         natl = nat_inlookup(fin, flags & ~FI_WILDP,
1360                                             (u_int)fin->fin_p, fin->fin_dst,
1361                                             inb, 1);
1362                         fin->fin_data[0] = sp;
1363                         fin->fin_data[1] = dp;
1364
1365                         /*
1366                          * Has the search wrapped around and come back to the
1367                          * start ?
1368                          */
1369                         if ((natl != NULL) &&
1370                             (np->in_pnext != 0) && (st_port == np->in_pnext) &&
1371                             (np->in_nip != 0) && (st_ip == np->in_nip))
1372                                 goto badnat;
1373                         l++;
1374                 } while (natl != NULL);
1375
1376                 if (np->in_space > 0)
1377                         np->in_space--;
1378
1379                 /* Setup the NAT table */
1380                 nat->nat_inip = fin->fin_src;
1381                 nat->nat_outip.s_addr = htonl(in.s_addr);
1382                 nat->nat_oip = fin->fin_dst;
1383                 if (nat->nat_hm == NULL)
1384                         nat->nat_hm = nat_hostmap(np, fin->fin_src,
1385                                                   nat->nat_outip);
1386
1387                 sum1 = LONG_SUM(ntohl(fin->fin_saddr)) + ntohs(sport);
1388                 sum2 = LONG_SUM(in.s_addr) + ntohs(port);
1389
1390                 if (flags & IPN_TCPUDP) {
1391                         nat->nat_inport = sport;
1392                         nat->nat_outport = port;        /* sport */
1393                         nat->nat_oport = dport;
1394                 }
1395         } else {
1396                 /*
1397                  * Otherwise, it's an inbound packet. Most likely, we don't
1398                  * want to rewrite source ports and source addresses. Instead,
1399                  * we want to rewrite to a fixed internal address and fixed
1400                  * internal port.
1401                  */
1402                 if (np->in_flags & IPN_SPLIT) {
1403                         in.s_addr = np->in_nip;
1404                         if (np->in_inip == htonl(in.s_addr))
1405                                 np->in_nip = ntohl(np->in_inmsk);
1406                         else {
1407                                 np->in_nip = ntohl(np->in_inip);
1408                                 if (np->in_flags & IPN_ROUNDR) {
1409                                         nat_delrdr(np);
1410                                         nat_addrdr(np);
1411                                 }
1412                         }
1413                 } else {
1414                         in.s_addr = ntohl(np->in_inip);
1415                         if (np->in_flags & IPN_ROUNDR) {
1416                                 nat_delrdr(np);
1417                                 nat_addrdr(np);
1418                         }
1419                 }
1420                 if (!np->in_pnext)
1421                         nport = dport;
1422                 else {
1423                         /*
1424                          * Whilst not optimized for the case where
1425                          * pmin == pmax, the gain is not significant.
1426                          */
1427                         if (np->in_pmin != np->in_pmax) {
1428                                 nport = ntohs(dport) - ntohs(np->in_pmin) +
1429                                         ntohs(np->in_pnext);
1430                                 nport = ntohs(nport);
1431                         } else
1432                                 nport = np->in_pnext;
1433                 }
1434
1435                 /*
1436                  * When the redirect-to address is set to 0.0.0.0, just
1437                  * assume a blank `forwarding' of the packet.
1438                  */
1439                 if (in.s_addr == 0)
1440                         in.s_addr = ntohl(fin->fin_daddr);
1441
1442                 nat->nat_inip.s_addr = htonl(in.s_addr);
1443                 nat->nat_outip = fin->fin_dst;
1444                 nat->nat_oip = fin->fin_src;
1445
1446                 sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport);
1447                 sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
1448
1449                 if (flags & IPN_TCPUDP) {
1450                         nat->nat_inport = nport;
1451                         nat->nat_outport = dport;
1452                         nat->nat_oport = sport;
1453                 }
1454         }
1455
1456         CALC_SUMD(sum1, sum2, sumd);
1457         nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
1458 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
1459         if ((flags & IPN_TCP) && dohwcksum &&
1460             (qf->qf_ill->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
1461                 if (direction == NAT_OUTBOUND)
1462                         sum1 = LONG_SUM(ntohl(in.s_addr));
1463                 else
1464                         sum1 = LONG_SUM(ntohl(fin->fin_saddr));
1465                 sum1 += LONG_SUM(ntohl(fin->fin_daddr));
1466                 sum1 += IPPROTO_TCP;
1467                 sum1 = (sum1 & 0xffff) + (sum1 >> 16);
1468                 nat->nat_sumd[1] = NAT_HW_CKSUM|(sum1 & 0xffff);
1469         } else
1470 #endif
1471                 nat->nat_sumd[1] = nat->nat_sumd[0];
1472
1473         if ((flags & IPN_TCPUDP) && ((sport != port) || (dport != nport))) {
1474                 if (direction == NAT_OUTBOUND)
1475                         sum1 = LONG_SUM(ntohl(fin->fin_saddr));
1476                 else
1477                         sum1 = LONG_SUM(ntohl(fin->fin_daddr));
1478
1479                 sum2 = LONG_SUM(in.s_addr);
1480
1481                 CALC_SUMD(sum1, sum2, sumd);
1482                 nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
1483         } else
1484                 nat->nat_ipsumd = nat->nat_sumd[0];
1485
1486         in.s_addr = htonl(in.s_addr);
1487
1488         strncpy(nat->nat_ifname, IFNAME(fin->fin_ifp), IFNAMSIZ);
1489
1490         nat->nat_me = natsave;
1491         nat->nat_dir = direction;
1492         nat->nat_ifp = fin->fin_ifp;
1493         nat->nat_ptr = np;
1494         nat->nat_p = fin->fin_p;
1495         nat->nat_bytes = 0;
1496         nat->nat_pkts = 0;
1497         nat->nat_mssclamp = np->in_mssclamp;
1498         nat->nat_fr = fin->fin_fr;
1499         if (nat->nat_fr != NULL) {
1500                 ATOMIC_INC32(nat->nat_fr->fr_ref);
1501         }
1502         if (direction == NAT_OUTBOUND) {
1503                 if (flags & IPN_TCPUDP)
1504                         tcp->th_sport = port;
1505         } else {
1506                 if (flags & IPN_TCPUDP)
1507                         tcp->th_dport = nport;
1508         }
1509
1510         nat_insert(nat);
1511
1512         if ((np->in_apr != NULL) && (np->in_dport == 0 ||
1513             (tcp != NULL && dport == np->in_dport)))
1514                 (void) appr_new(fin, ip, nat);
1515
1516         np->in_use++;
1517 #ifdef  IPFILTER_LOG
1518         nat_log(nat, (u_int)np->in_redir);
1519 #endif
1520         return nat;
1521 badnat:
1522         nat_stats.ns_badnat++;
1523         if ((hm = nat->nat_hm) != NULL)
1524                 nat_hostmapdel(hm);
1525         KFREE(nat);
1526         return NULL;
1527 }
1528
1529
1530 /*
1531  * Insert a NAT entry into the hash tables for searching and add it to the
1532  * list of active NAT entries.  Adjust global counters when complete.
1533  */
1534 void    nat_insert(nat)
1535 nat_t   *nat;
1536 {
1537         u_int hv1, hv2;
1538         nat_t **natp;
1539
1540         MUTEX_INIT(&nat->nat_lock, "nat entry lock", NULL);
1541
1542         nat->nat_age = fr_defnatage;
1543         nat->nat_ifname[sizeof(nat->nat_ifname) - 1] = '\0';
1544         if (nat->nat_ifname[0] !='\0') {
1545                 nat->nat_ifp = GETUNIT(nat->nat_ifname, 4);
1546         }
1547
1548         nat->nat_next = nat_instances;
1549         nat_instances = nat;
1550
1551         if (!(nat->nat_flags & (FI_W_SPORT|FI_W_DPORT))) {
1552                 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
1553                                   0xffffffff);
1554                 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
1555                                   ipf_nattable_sz);
1556                 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
1557                                   0xffffffff);
1558                 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
1559                                  ipf_nattable_sz);
1560         } else {
1561                 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, nat->nat_inip.s_addr,
1562                                   ipf_nattable_sz);
1563                 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, nat->nat_outip.s_addr,
1564                                   ipf_nattable_sz);
1565         }
1566
1567         natp = &nat_table[0][hv1];
1568         if (*natp)
1569                 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
1570         nat->nat_phnext[0] = natp;
1571         nat->nat_hnext[0] = *natp;
1572         *natp = nat;
1573
1574         natp = &nat_table[1][hv2];
1575         if (*natp)
1576                 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
1577         nat->nat_phnext[1] = natp;
1578         nat->nat_hnext[1] = *natp;
1579         *natp = nat;
1580
1581         nat_stats.ns_added++;
1582         nat_stats.ns_inuse++;
1583 }
1584
1585
1586 nat_t *nat_icmplookup(ip, fin, dir)
1587 ip_t *ip;
1588 fr_info_t *fin;
1589 int dir;
1590 {
1591         icmphdr_t *icmp;
1592         tcphdr_t *tcp = NULL;
1593         ip_t *oip;
1594         int flags = 0, type, minlen;
1595
1596         icmp = (icmphdr_t *)fin->fin_dp;
1597         /*
1598          * Does it at least have the return (basic) IP header ?
1599          * Only a basic IP header (no options) should be with an ICMP error
1600          * header.
1601          */
1602         if ((ip->ip_hl != 5) || (ip->ip_len < ICMPERR_MINPKTLEN))
1603                 return NULL;
1604         type = icmp->icmp_type;
1605         /*
1606          * If it's not an error type, then return.
1607          */
1608         if ((type != ICMP_UNREACH) && (type != ICMP_SOURCEQUENCH) &&
1609             (type != ICMP_REDIRECT) && (type != ICMP_TIMXCEED) &&
1610             (type != ICMP_PARAMPROB))
1611                 return NULL;
1612
1613         oip = (ip_t *)((char *)fin->fin_dp + 8);
1614         minlen = (oip->ip_hl << 2);
1615         if (minlen < sizeof(ip_t))
1616                 return NULL;
1617         if (ip->ip_len < ICMPERR_IPICMPHLEN + minlen)
1618                 return NULL;
1619         /*
1620          * Is the buffer big enough for all of it ?  It's the size of the IP
1621          * header claimed in the encapsulated part which is of concern.  It
1622          * may be too big to be in this buffer but not so big that it's
1623          * outside the ICMP packet, leading to TCP deref's causing problems.
1624          * This is possible because we don't know how big oip_hl is when we
1625          * do the pullup early in fr_check() and thus can't gaurantee it is
1626          * all here now.
1627          */
1628 #ifdef  _KERNEL
1629         {
1630         mb_t *m;
1631
1632 # if SOLARIS
1633         m = fin->fin_qfm;
1634         if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
1635                 return NULL;
1636 # else
1637         m = *(mb_t **)fin->fin_mp;
1638         if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
1639             (char *)ip + m->m_len)
1640                 return NULL;
1641 # endif
1642         }
1643 #endif
1644
1645         if (oip->ip_p == IPPROTO_TCP)
1646                 flags = IPN_TCP;
1647         else if (oip->ip_p == IPPROTO_UDP)
1648                 flags = IPN_UDP;
1649         if (flags & IPN_TCPUDP) {
1650                 u_short data[2];
1651                 nat_t *nat;
1652
1653                 minlen += 8;            /* + 64bits of data to get ports */
1654                 if (ip->ip_len < ICMPERR_IPICMPHLEN + minlen)
1655                         return NULL;
1656
1657                 data[0] = fin->fin_data[0];
1658                 data[1] = fin->fin_data[1];
1659                 tcp = (tcphdr_t *)((char *)oip + (oip->ip_hl << 2));
1660                 fin->fin_data[0] = ntohs(tcp->th_dport);
1661                 fin->fin_data[1] = ntohs(tcp->th_sport);
1662
1663                 if (dir == NAT_INBOUND) {
1664                         nat = nat_inlookup(fin, flags, (u_int)oip->ip_p,
1665                                             oip->ip_dst, oip->ip_src, 0);
1666                 } else {
1667                         nat = nat_outlookup(fin, flags, (u_int)oip->ip_p,
1668                                             oip->ip_dst, oip->ip_src, 0);
1669                 }
1670                 fin->fin_data[0] = data[0];
1671                 fin->fin_data[1] = data[1];
1672                 return nat;
1673         }
1674         if (dir == NAT_INBOUND)
1675                 return nat_inlookup(fin, 0, (u_int)oip->ip_p,
1676                                     oip->ip_dst, oip->ip_src, 0);
1677         else
1678                 return nat_outlookup(fin, 0, (u_int)oip->ip_p,
1679                                     oip->ip_dst, oip->ip_src, 0);
1680 }
1681
1682
1683 /*
1684  * This should *ONLY* be used for incoming packets to make sure a NAT'd ICMP
1685  * packet gets correctly recognised.
1686  */
1687 nat_t *nat_icmp(ip, fin, nflags, dir)
1688 ip_t *ip;
1689 fr_info_t *fin;
1690 u_int *nflags;
1691 int dir;
1692 {
1693         u_32_t sum1, sum2, sumd, sumd2 = 0;
1694         struct in_addr in;
1695         int flags, dlen;
1696         icmphdr_t *icmp;
1697         udphdr_t *udp;
1698         tcphdr_t *tcp;
1699         nat_t *nat;
1700         ip_t *oip;
1701
1702         if ((fin->fin_fl & FI_SHORT) || (fin->fin_off != 0))
1703                 return NULL;
1704         /*
1705          * nat_icmplookup() will return NULL for `defective' packets.
1706          */
1707         if ((ip->ip_v != 4) || !(nat = nat_icmplookup(ip, fin, dir)))
1708                 return NULL;
1709
1710         flags = 0;
1711         sumd2 = 0;
1712         *nflags = IPN_ICMPERR;
1713         icmp = (icmphdr_t *)fin->fin_dp;
1714         oip = (ip_t *)&icmp->icmp_ip;
1715         if (oip->ip_p == IPPROTO_TCP)
1716                 flags = IPN_TCP;
1717         else if (oip->ip_p == IPPROTO_UDP)
1718                 flags = IPN_UDP;
1719         udp = (udphdr_t *)((((char *)oip) + (oip->ip_hl << 2)));
1720         dlen = ip->ip_len - ((char *)udp - (char *)ip);
1721         /*
1722          * XXX - what if this is bogus hl and we go off the end ?
1723          * In this case, nat_icmplookup() will have returned NULL.
1724          */
1725         tcp = (tcphdr_t *)udp;
1726
1727         /*
1728          * Need to adjust ICMP header to include the real IP#'s and
1729          * port #'s.  Only apply a checksum change relative to the
1730          * IP address change as it will be modified again in ip_natout
1731          * for both address and port.  Two checksum changes are
1732          * necessary for the two header address changes.  Be careful
1733          * to only modify the checksum once for the port # and twice
1734          * for the IP#.
1735          */
1736
1737         /*
1738          * Step 1
1739          * Fix the IP addresses in the offending IP packet. You also need
1740          * to adjust the IP header checksum of that offending IP packet
1741          * and the ICMP checksum of the ICMP error message itself.
1742          *
1743          * Unfortunately, for UDP and TCP, the IP addresses are also contained
1744          * in the pseudo header that is used to compute the UDP resp. TCP
1745          * checksum. So, we must compensate that as well. Even worse, the
1746          * change in the UDP and TCP checksums require yet another
1747          * adjustment of the ICMP checksum of the ICMP error message.
1748          *
1749          */
1750
1751         if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) {
1752                 sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
1753                 in = nat->nat_inip;
1754                 oip->ip_src = in;
1755         } else {
1756                 sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
1757                 in = nat->nat_outip;
1758                 oip->ip_dst = in;
1759         }
1760
1761         sum2 = LONG_SUM(ntohl(in.s_addr));
1762
1763         CALC_SUMD(sum1, sum2, sumd);
1764
1765         /*
1766          * Fix IP checksum of the offending IP packet to adjust for
1767          * the change in the IP address.
1768          *
1769          * Normally, you would expect that the ICMP checksum of the 
1770          * ICMP error message needs to be adjusted as well for the
1771          * IP address change in oip.
1772          * However, this is a NOP, because the ICMP checksum is 
1773          * calculated over the complete ICMP packet, which includes the
1774          * changed oip IP addresses and oip->ip_sum. However, these 
1775          * two changes cancel each other out (if the delta for
1776          * the IP address is x, then the delta for ip_sum is minus x), 
1777          * so no change in the icmp_cksum is necessary.
1778          *
1779          * Be careful that nat_dir refers to the direction of the
1780          * offending IP packet (oip), not to its ICMP response (icmp)
1781          */
1782         fix_datacksum(&oip->ip_sum, sumd);
1783         /* Fix icmp cksum : IP Addr + Cksum */
1784
1785         /*
1786          * Fix UDP pseudo header checksum to compensate for the
1787          * IP address change.
1788          */
1789         if ((oip->ip_p == IPPROTO_UDP) && (dlen >= 8) && udp->uh_sum) {
1790                 /*
1791                  * The UDP checksum is optional, only adjust it 
1792                  * if it has been set.
1793                  */
1794                 sum1 = ntohs(udp->uh_sum);
1795                 fix_datacksum(&udp->uh_sum, sumd);
1796                 sum2 = ntohs(udp->uh_sum);
1797
1798                 /*
1799                  * Fix ICMP checksum to compensate the UDP 
1800                  * checksum adjustment.
1801                  */
1802                 sumd2 = sumd << 1;
1803                 CALC_SUMD(sum1, sum2, sumd);
1804                 sumd2 += sumd;
1805         }
1806
1807         /*
1808          * Fix TCP pseudo header checksum to compensate for the 
1809          * IP address change. Before we can do the change, we
1810          * must make sure that oip is sufficient large to hold
1811          * the TCP checksum (normally it does not!).
1812          */
1813         else if ((oip->ip_p == IPPROTO_TCP) && (dlen >= 18)) {
1814                 sum1 = ntohs(tcp->th_sum);
1815                 fix_datacksum(&tcp->th_sum, sumd);
1816                 sum2 = ntohs(tcp->th_sum);
1817
1818                 /*
1819                  * Fix ICMP checksum to compensate the TCP 
1820                  * checksum adjustment.
1821                  */
1822                 sumd2 = sumd << 1;
1823                 CALC_SUMD(sum1, sum2, sumd);
1824                 sumd2 += sumd;
1825         } else {
1826                 sumd2 = (sumd >> 16); 
1827                 if (nat->nat_dir == NAT_OUTBOUND)
1828                         sumd2 = ~sumd2;
1829                 else
1830                         sumd2 = ~sumd2 + 1;
1831         }
1832
1833         if (((flags & IPN_TCPUDP) != 0) && (dlen >= 4)) {
1834                 /*
1835                  * Step 2 :
1836                  * For offending TCP/UDP IP packets, translate the ports as
1837                  * well, based on the NAT specification. Of course such
1838                  * a change must be reflected in the ICMP checksum as well.
1839                  *
1840                  * Advance notice : Now it becomes complicated :-)
1841                  *
1842                  * Since the port fields are part of the TCP/UDP checksum
1843                  * of the offending IP packet, you need to adjust that checksum
1844                  * as well... but, if you change, you must change the icmp
1845                  * checksum *again*, to reflect that change.
1846                  *
1847                  * To further complicate: the TCP checksum is not in the first
1848                  * 8 bytes of the offending ip packet, so it most likely is not
1849                  * available. Some OSses like Solaris return enough bytes to
1850                  * include the TCP checksum. So we have to check if the
1851                  * ip->ip_len actually holds the TCP checksum of the oip!
1852                  */
1853                 if (nat->nat_oport == tcp->th_dport) {
1854                         if (tcp->th_sport != nat->nat_inport) {
1855                                 /*
1856                                  * Fix ICMP checksum to compensate port
1857                                  * adjustment.
1858                                  */
1859                                 sum1 = ntohs(nat->nat_inport);
1860                                 sum2 = ntohs(tcp->th_sport);
1861                                 tcp->th_sport = nat->nat_inport;
1862
1863                                 /*
1864                                  * Fix udp checksum to compensate port
1865                                  * adjustment.  NOTE : the offending IP packet
1866                                  * flows the other direction compared to the
1867                                  * ICMP message.
1868                                  *
1869                                  * The UDP checksum is optional, only adjust
1870                                  * it if it has been set.
1871                                  */
1872                                 if ((oip->ip_p == IPPROTO_UDP) &&
1873                                     (dlen >= 8) && udp->uh_sum) {
1874                                         sumd = sum1 - sum2;
1875                                         sumd2 += sumd;
1876
1877                                         sum1 = ntohs(udp->uh_sum);
1878                                         fix_datacksum(&udp->uh_sum, sumd);
1879                                         sum2 = ntohs(udp->uh_sum);
1880
1881                                         /*
1882                                          * Fix ICMP checksum to compensate
1883                                          * UDP checksum adjustment.
1884                                          */
1885                                         CALC_SUMD(sum1, sum2, sumd);
1886                                         sumd2 += sumd;
1887                                 }
1888
1889                                 /*
1890                                  * Fix tcp checksum (if present) to compensate
1891                                  * port adjustment. NOTE : the offending IP
1892                                  * packet flows the other direction compared to
1893                                  * the ICMP message.
1894                                  */
1895                                 if (oip->ip_p == IPPROTO_TCP) {
1896                                         if (dlen >= 18) {
1897                                                 sumd = sum1 - sum2;
1898                                                 sumd2 += sumd;
1899
1900                                                 sum1 = ntohs(tcp->th_sum);
1901                                                 fix_datacksum(&tcp->th_sum,
1902                                                               sumd);
1903                                                 sum2 = ntohs(tcp->th_sum);
1904
1905                                                 /*
1906                                                  * Fix ICMP checksum to 
1907                                                  * compensate TCP checksum 
1908                                                  * adjustment.
1909                                                  */
1910                                                 CALC_SUMD(sum1, sum2, sumd);
1911                                                 sumd2 += sumd;
1912                                         } else {
1913                                                 sumd = sum2 - sum1 + 1;
1914                                                 sumd2 += sumd;
1915                                         }
1916                                 }
1917                         }
1918                 } else if (tcp->th_dport != nat->nat_outport) {
1919                         /*
1920                          * Fix ICMP checksum to compensate port
1921                          * adjustment.
1922                          */
1923                         sum1 = ntohs(nat->nat_outport);
1924                         sum2 = ntohs(tcp->th_dport);
1925                         tcp->th_dport = nat->nat_outport;
1926
1927                         /*
1928                          * Fix udp checksum to compensate port
1929                          * adjustment.   NOTE : the offending IP
1930                          * packet flows the other direction compared
1931                          * to the ICMP message.
1932                          *
1933                          * The UDP checksum is optional, only adjust
1934                          * it if it has been set.
1935                          */
1936                         if ((oip->ip_p == IPPROTO_UDP) &&
1937                             (dlen >= 8) && udp->uh_sum) {
1938                                 sumd = sum1 - sum2;
1939                                 sumd2 += sumd;
1940
1941                                 sum1 = ntohs(udp->uh_sum);
1942                                 fix_datacksum(&udp->uh_sum, sumd);
1943                                 sum2 = ntohs(udp->uh_sum);
1944
1945                                 /*
1946                                  * Fix ICMP checksum to compensate
1947                                  * UDP checksum adjustment.
1948                                  */
1949                                 CALC_SUMD(sum1, sum2, sumd);
1950                         }
1951
1952                         /*
1953                          * Fix tcp checksum (if present) to compensate
1954                          * port adjustment. NOTE : the offending IP
1955                          * packet flows the other direction compared to
1956                          * the ICMP message.
1957                          */
1958                         if (oip->ip_p == IPPROTO_TCP) {
1959                                 if (dlen >= 18) {
1960                                         sumd = sum1 - sum2;
1961                                         sumd2 += sumd;
1962
1963                                         sum1 = ntohs(tcp->th_sum);
1964                                         fix_datacksum(&tcp->th_sum, sumd);
1965                                         sum2 = ntohs(tcp->th_sum);
1966
1967                                         /*
1968                                          * Fix ICMP checksum to compensate
1969                                          * UDP checksum adjustment.
1970                                          */
1971                                         CALC_SUMD(sum1, sum2, sumd);
1972                                 } else {
1973                                         sumd = sum2 - sum1;
1974                                         if (nat->nat_dir == NAT_OUTBOUND)
1975                                                 sumd++;
1976                                 }
1977                         }
1978                         sumd2 += sumd;
1979                 }
1980                 if (sumd2) {
1981                         sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
1982                         sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
1983                         fix_incksum(fin, &icmp->icmp_cksum, sumd2);
1984                 }
1985         }
1986         if (oip->ip_p == IPPROTO_ICMP)
1987                 nat->nat_age = fr_defnaticmpage;
1988         return nat;
1989 }
1990
1991
1992 /*
1993  * NB: these lookups don't lock access to the list, it assume it has already
1994  * been done!
1995  */
1996 /*
1997  * Lookup a nat entry based on the mapped destination ip address/port and
1998  * real source address/port.  We use this lookup when receiving a packet,
1999  * we're looking for a table entry, based on the destination address.
2000  * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.
2001  */
2002 nat_t *nat_inlookup(fin, flags, p, src, mapdst, rw)
2003 fr_info_t *fin;
2004 u_int flags, p;
2005 struct in_addr src , mapdst;
2006 int rw;
2007 {
2008         u_short sport, dport;
2009         nat_t *nat;
2010         int nflags;
2011         u_32_t dst;
2012         ipnat_t *ipn;
2013         void *ifp;
2014         u_int hv;
2015
2016         if (fin != NULL)
2017                 ifp = fin->fin_ifp;
2018         else
2019                 ifp = NULL;
2020         dst = mapdst.s_addr;
2021         if (flags & IPN_TCPUDP) {
2022                 sport = htons(fin->fin_data[0]);
2023                 dport = htons(fin->fin_data[1]);
2024         } else {
2025                 sport = 0;
2026                 dport = 0;
2027         }
2028
2029         hv = NAT_HASH_FN(dst, dport, 0xffffffff);
2030         hv = NAT_HASH_FN(src.s_addr, hv + sport, ipf_nattable_sz);
2031         nat = nat_table[1][hv];
2032         for (; nat; nat = nat->nat_hnext[1]) {
2033                 nflags = nat->nat_flags;
2034                 if ((!ifp || ifp == nat->nat_ifp) &&
2035                     nat->nat_oip.s_addr == src.s_addr &&
2036                     nat->nat_outip.s_addr == dst &&
2037                     ((p == 0) || (p == nat->nat_p))) {
2038                         switch (p)
2039                         {
2040                         case IPPROTO_TCP :
2041                         case IPPROTO_UDP :
2042                                 if (nat->nat_oport != sport)
2043                                         continue;
2044                                 if (nat->nat_outport != dport)
2045                                         continue;
2046                                 break;
2047                         default :
2048                                 break;
2049                         }
2050
2051                         ipn = nat->nat_ptr;
2052                         if ((ipn != NULL) && (nat->nat_aps != NULL))
2053                                 if (appr_match(fin, nat) != 0)
2054                                         continue;
2055                         return nat;
2056                 }
2057         }
2058         if (!nat_stats.ns_wilds || !(flags & FI_WILDP))
2059                 return NULL;
2060         if (!rw) {
2061                 RWLOCK_EXIT(&ipf_nat);
2062         }
2063         hv = NAT_HASH_FN(dst, 0, 0xffffffff);
2064         hv = NAT_HASH_FN(src.s_addr, dst, ipf_nattable_sz);
2065         if (!rw) {
2066                 WRITE_ENTER(&ipf_nat);
2067         }
2068         nat = nat_table[1][hv];
2069         for (; nat; nat = nat->nat_hnext[1]) {
2070                 nflags = nat->nat_flags;
2071                 if (ifp && ifp != nat->nat_ifp)
2072                         continue;
2073                 if (!(nflags & FI_WILDP))
2074                         continue;
2075                 if (nat->nat_oip.s_addr != src.s_addr ||
2076                     nat->nat_outip.s_addr != dst)
2077                         continue;
2078                 if (((nat->nat_oport == sport) || (nflags & FI_W_DPORT)) &&
2079                     ((nat->nat_outport == dport) || (nflags & FI_W_SPORT))) {
2080                         nat_tabmove(fin, nat);
2081                         break;
2082                 }
2083         }
2084         if (!rw) {
2085                 MUTEX_DOWNGRADE(&ipf_nat);
2086         }
2087         return nat;
2088 }
2089
2090
2091 /*
2092  * This function is only called for TCP/UDP NAT table entries where the
2093  * original was placed in the table without hashing on the ports and we now
2094  * want to include hashing on port numbers.
2095  */
2096 static void nat_tabmove(fin, nat)
2097 fr_info_t *fin;
2098 nat_t *nat;
2099 {
2100         u_short sport, dport;
2101         u_int hv, nflags;
2102         nat_t **natp;
2103
2104         nflags = nat->nat_flags;
2105
2106         sport = ntohs(fin->fin_data[0]);
2107         dport = ntohs(fin->fin_data[1]);
2108
2109         /*
2110          * Remove the NAT entry from the old location
2111          */
2112         if (nat->nat_hnext[0])
2113                 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
2114         *nat->nat_phnext[0] = nat->nat_hnext[0];
2115
2116         if (nat->nat_hnext[1])
2117                 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
2118         *nat->nat_phnext[1] = nat->nat_hnext[1];
2119
2120         /*
2121          * Add into the NAT table in the new position
2122          */
2123         hv = NAT_HASH_FN(nat->nat_inip.s_addr, sport, 0xffffffff);
2124         hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + dport, ipf_nattable_sz);
2125         natp = &nat_table[0][hv];
2126         if (*natp)
2127                 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2128         nat->nat_phnext[0] = natp;
2129         nat->nat_hnext[0] = *natp;
2130         *natp = nat;
2131
2132         hv = NAT_HASH_FN(nat->nat_outip.s_addr, sport, 0xffffffff);
2133         hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + dport, ipf_nattable_sz);
2134         natp = &nat_table[1][hv];
2135         if (*natp)
2136                 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2137         nat->nat_phnext[1] = natp;
2138         nat->nat_hnext[1] = *natp;
2139         *natp = nat;
2140 }
2141
2142
2143 /*
2144  * Lookup a nat entry based on the source 'real' ip address/port and
2145  * destination address/port.  We use this lookup when sending a packet out,
2146  * we're looking for a table entry, based on the source address.
2147  * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.
2148  */
2149 nat_t *nat_outlookup(fin, flags, p, src, dst, rw)
2150 fr_info_t *fin;
2151 u_int flags, p;
2152 struct in_addr src , dst;
2153 int rw;
2154 {
2155         u_short sport, dport;
2156         nat_t *nat;
2157         int nflags;
2158         ipnat_t *ipn;
2159         u_32_t srcip;
2160         void *ifp;
2161         u_int hv;
2162
2163         ifp = fin->fin_ifp;
2164         srcip = src.s_addr;
2165         if (flags & IPN_TCPUDP) {
2166                 sport = ntohs(fin->fin_data[0]);
2167                 dport = ntohs(fin->fin_data[1]);
2168         } else {
2169                 sport = 0;
2170                 dport = 0;
2171         }
2172
2173         hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
2174         hv = NAT_HASH_FN(dst.s_addr, hv + dport, ipf_nattable_sz);
2175         nat = nat_table[0][hv];
2176         for (; nat; nat = nat->nat_hnext[0]) {
2177                 nflags = nat->nat_flags;
2178
2179                 if ((!ifp || ifp == nat->nat_ifp) &&
2180                     nat->nat_inip.s_addr == srcip &&
2181                     nat->nat_oip.s_addr == dst.s_addr &&
2182                     ((p == 0) || (p == nat->nat_p))) {
2183                         switch (p)
2184                         {
2185                         case IPPROTO_TCP :
2186                         case IPPROTO_UDP :
2187                                 if (nat->nat_oport != dport)
2188                                         continue;
2189                                 if (nat->nat_inport != sport)
2190                                         continue;
2191                                 break;
2192                         default :
2193                                 break;
2194                         }
2195
2196                         ipn = nat->nat_ptr;
2197                         if ((ipn != NULL) && (nat->nat_aps != NULL))
2198                                 if (appr_match(fin, nat) != 0)
2199                                         continue;
2200                         return nat;
2201                 }
2202         }
2203         if (!nat_stats.ns_wilds || !(flags & FI_WILDP))
2204                 return NULL;
2205         if (!rw) {
2206                 RWLOCK_EXIT(&ipf_nat);
2207         }
2208
2209         hv = NAT_HASH_FN(dst.s_addr, srcip, ipf_nattable_sz);
2210         if (!rw) {
2211                 WRITE_ENTER(&ipf_nat);
2212         }
2213         nat = nat_table[0][hv];
2214         for (; nat; nat = nat->nat_hnext[0]) {
2215                 nflags = nat->nat_flags;
2216                 if (ifp && ifp != nat->nat_ifp)
2217                         continue;
2218                 if (!(nflags & FI_WILDP))
2219                         continue;
2220                 if ((nat->nat_inip.s_addr != srcip) ||
2221                     (nat->nat_oip.s_addr != dst.s_addr))
2222                         continue;
2223                 if (((nat->nat_inport == sport) || (nflags & FI_W_SPORT)) &&
2224                     ((nat->nat_oport == dport) || (nflags & FI_W_DPORT))) {
2225                         nat_tabmove(fin, nat);
2226                         break;
2227                 }
2228         }
2229         if (!rw) {
2230                 MUTEX_DOWNGRADE(&ipf_nat);
2231         }
2232         return nat;
2233 }
2234
2235
2236 /*
2237  * Lookup the NAT tables to search for a matching redirect
2238  */
2239 nat_t *nat_lookupredir(np)
2240 natlookup_t *np;
2241 {
2242         nat_t *nat;
2243         fr_info_t fi;
2244
2245         bzero((char *)&fi, sizeof(fi));
2246         fi.fin_data[0] = ntohs(np->nl_inport);
2247         fi.fin_data[1] = ntohs(np->nl_outport);
2248
2249         /*
2250          * If nl_inip is non null, this is a lookup based on the real
2251          * ip address. Else, we use the fake.
2252          */
2253         if ((nat = nat_outlookup(&fi, np->nl_flags, 0, np->nl_inip,
2254                                  np->nl_outip, 0))) {
2255                 np->nl_realip = nat->nat_outip;
2256                 np->nl_realport = nat->nat_outport;
2257         }
2258         return nat;
2259 }
2260
2261
2262 static int nat_match(fin, np, ip)
2263 fr_info_t *fin;
2264 ipnat_t *np;
2265 ip_t *ip;
2266 {
2267         frtuc_t *ft;
2268
2269         if (ip->ip_v != 4)
2270                 return 0;
2271
2272         if (np->in_p && fin->fin_p != np->in_p)
2273                 return 0;
2274         if (fin->fin_out) {
2275                 if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
2276                         return 0;
2277                 if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
2278                     ^ ((np->in_flags & IPN_NOTSRC) != 0))
2279                         return 0;
2280                 if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
2281                     ^ ((np->in_flags & IPN_NOTDST) != 0))
2282                         return 0;
2283         } else {
2284                 if (!(np->in_redir & NAT_REDIRECT))
2285                         return 0;
2286                 if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
2287                     ^ ((np->in_flags & IPN_NOTSRC) != 0))
2288                         return 0;
2289                 if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
2290                     ^ ((np->in_flags & IPN_NOTDST) != 0))
2291                         return 0;
2292         }
2293
2294         ft = &np->in_tuc;
2295         if (!(fin->fin_fl & FI_TCPUDP) ||
2296             (fin->fin_fl & FI_SHORT) || (fin->fin_off != 0)) {
2297                 if (ft->ftu_scmp || ft->ftu_dcmp)
2298                         return 0;
2299                 return 1;
2300         }
2301
2302         return fr_tcpudpchk(ft, fin);
2303 }
2304
2305
2306 /*
2307  * Packets going out on the external interface go through this.
2308  * Here, the source address requires alteration, if anything.
2309  */
2310 int ip_natout(ip, fin)
2311 ip_t *ip;
2312 fr_info_t *fin;
2313 {
2314         ipnat_t *np = NULL;
2315         u_32_t ipa;
2316         tcphdr_t *tcp = NULL;
2317         u_short sport = 0, dport = 0, *csump = NULL;
2318         int natadd = 1, i, icmpset = 1;
2319         u_int nflags = 0, hv, msk;
2320         struct ifnet *ifp;
2321         frentry_t *fr;
2322         void *sifp;
2323         u_32_t iph;
2324         nat_t *nat;
2325
2326         if (nat_list == NULL || (fr_nat_lock))
2327                 return 0;
2328
2329         if ((fr = fin->fin_fr) && !(fr->fr_flags & FR_DUP) &&
2330             fr->fr_tif.fd_ifp && fr->fr_tif.fd_ifp != (void *)-1) {
2331                 sifp = fin->fin_ifp;
2332                 fin->fin_ifp = fr->fr_tif.fd_ifp;
2333         } else
2334                 sifp = fin->fin_ifp;
2335         ifp = fin->fin_ifp;
2336
2337         if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2338                 if (fin->fin_p == IPPROTO_TCP)
2339                         nflags = IPN_TCP;
2340                 else if (fin->fin_p == IPPROTO_UDP)
2341                         nflags = IPN_UDP;
2342                 if ((nflags & IPN_TCPUDP)) {
2343                         tcp = (tcphdr_t *)fin->fin_dp;
2344                         sport = tcp->th_sport;
2345                         dport = tcp->th_dport;
2346                 }
2347         }
2348
2349         ipa = fin->fin_saddr;
2350
2351         READ_ENTER(&ipf_nat);
2352
2353         if ((fin->fin_p == IPPROTO_ICMP) &&
2354             (nat = nat_icmp(ip, fin, &nflags, NAT_OUTBOUND)))
2355                 icmpset = 1;
2356         else if ((fin->fin_fl & FI_FRAG) &&
2357             (nat = ipfr_nat_knownfrag(ip, fin)))
2358                 natadd = 0;
2359         else if ((nat = nat_outlookup(fin, nflags|FI_WILDP|FI_WILDA,
2360                                       (u_int)fin->fin_p, fin->fin_src,
2361                                       fin->fin_dst, 0))) {
2362                 nflags = nat->nat_flags;
2363                 if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) {
2364                         if ((nflags & FI_W_SPORT) &&
2365                             (nat->nat_inport != sport))
2366                                 nat->nat_inport = sport;
2367                         if ((nflags & FI_W_DPORT) &&
2368                             (nat->nat_oport != dport))
2369                                 nat->nat_oport = dport;
2370
2371                         if (nat->nat_outport == 0)
2372                                 nat->nat_outport = sport;
2373                         nat->nat_flags &= ~(FI_W_DPORT|FI_W_SPORT);
2374                         nflags = nat->nat_flags;
2375                         nat_stats.ns_wilds--;
2376                 }
2377         } else {
2378                 RWLOCK_EXIT(&ipf_nat);
2379
2380                 msk = 0xffffffff;
2381                 i = 32;
2382
2383                 WRITE_ENTER(&ipf_nat);
2384                 /*
2385                  * If there is no current entry in the nat table for this IP#,
2386                  * create one for it (if there is a matching rule).
2387                  */
2388 maskloop:
2389                 iph = ipa & htonl(msk);
2390                 hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz);
2391                 for (np = nat_rules[hv]; np; np = np->in_mnext)
2392                 {
2393                         if (np->in_ifp && (np->in_ifp != ifp))
2394                                 continue;
2395                         if ((np->in_flags & IPN_RF) &&
2396                             !(np->in_flags & nflags))
2397                                 continue;
2398                         if (np->in_flags & IPN_FILTER) {
2399                                 if (!nat_match(fin, np, ip))
2400                                         continue;
2401                         } else if ((ipa & np->in_inmsk) != np->in_inip)
2402                                 continue;
2403                         if (*np->in_plabel && !appr_ok(ip, tcp, np))
2404                                 continue;
2405                         nat = nat_new(fin, ip, np, NULL,
2406                                       (u_int)nflags, NAT_OUTBOUND);
2407                         if (nat != NULL) {
2408                                 np->in_hits++;
2409                                 break;
2410                         }
2411                 }
2412                 if ((np == NULL) && (i > 0)) {
2413                         do {
2414                                 i--;
2415                                 msk <<= 1;
2416                         } while ((i >= 0) && ((nat_masks & (1 << i)) == 0));
2417                         if (i >= 0)
2418                                 goto maskloop;
2419                 }
2420                 MUTEX_DOWNGRADE(&ipf_nat);
2421         }
2422
2423         /*
2424          * NOTE: ipf_nat must now only be held as a read lock
2425          */
2426         if (nat) {
2427                 np = nat->nat_ptr;
2428                 if (natadd && (fin->fin_fl & FI_FRAG) && np)
2429                         ipfr_nat_newfrag(ip, fin, nat);
2430                 MUTEX_ENTER(&nat->nat_lock);
2431                 if (fin->fin_p != IPPROTO_TCP) {
2432                         if (np && np->in_age[1])
2433                                 nat->nat_age = np->in_age[1];
2434                         else if (!icmpset && (fin->fin_p == IPPROTO_ICMP))
2435                                 nat->nat_age = fr_defnaticmpage;
2436                         else
2437                                 nat->nat_age = fr_defnatage;
2438                 }
2439                 nat->nat_bytes += ip->ip_len;
2440                 nat->nat_pkts++;
2441                 MUTEX_EXIT(&nat->nat_lock);
2442
2443                 /*
2444                  * Fix up checksums, not by recalculating them, but
2445                  * simply computing adjustments.
2446                  */
2447                 if (nflags == IPN_ICMPERR) {
2448                         u_32_t s1, s2, sumd;
2449
2450                         s1 = LONG_SUM(ntohl(fin->fin_saddr));
2451                         s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
2452                         CALC_SUMD(s1, s2, sumd);
2453                         fix_outcksum(fin, &ip->ip_sum, sumd);
2454                 }
2455 #if (SOLARIS || defined(__sgi)) || !defined(_KERNEL)
2456                 else {
2457                         if (nat->nat_dir == NAT_OUTBOUND)
2458                                 fix_outcksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2459                         else
2460                                 fix_incksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2461                 }
2462 #endif
2463                 /*
2464                  * Only change the packet contents, not what is filtered upon.
2465                  */
2466                 ip->ip_src = nat->nat_outip;
2467
2468                 if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2469
2470                         if ((nat->nat_outport != 0) && (tcp != NULL)) {
2471                                 tcp->th_sport = nat->nat_outport;
2472                                 fin->fin_data[0] = ntohs(tcp->th_sport);
2473                         }
2474
2475                         if (fin->fin_p == IPPROTO_TCP) {
2476                                 csump = &tcp->th_sum;
2477                                 MUTEX_ENTER(&nat->nat_lock);
2478                                 fr_tcp_age(&nat->nat_age,
2479                                            nat->nat_tcpstate, fin, 1, 0);
2480                                 if (nat->nat_age < fr_defnaticmpage)
2481                                         nat->nat_age = fr_defnaticmpage;
2482 #ifdef LARGE_NAT
2483                                 else if ((!np || !np->in_age[1]) &&
2484                                          (nat->nat_age > fr_defnatage))
2485                                         nat->nat_age = fr_defnatage;
2486 #endif
2487                                 /*
2488                                  * Increase this because we may have
2489                                  * "keep state" following this too and
2490                                  * packet storms can occur if this is
2491                                  * removed too quickly.
2492                                  */
2493                                 if (nat->nat_age == fr_tcpclosed)
2494                                         nat->nat_age = fr_tcplastack;
2495
2496                                 /*
2497                                  * Do a MSS CLAMPING on a SYN packet,
2498                                  * only deal IPv4 for now.
2499                                  */
2500                                 if (nat->nat_mssclamp &&
2501                                     (tcp->th_flags & TH_SYN) != 0)
2502                                         nat_mssclamp(tcp, nat->nat_mssclamp,
2503                                                      fin, csump);
2504
2505                                 MUTEX_EXIT(&nat->nat_lock);
2506                         } else if (fin->fin_p == IPPROTO_UDP) {
2507                                 udphdr_t *udp = (udphdr_t *)tcp;
2508
2509                                 if (udp->uh_sum)
2510                                         csump = &udp->uh_sum;
2511                         }
2512
2513                         if (csump) {
2514                                 if (nat->nat_dir == NAT_OUTBOUND)
2515                                         fix_outcksum(fin, csump,
2516                                                      nat->nat_sumd[1]);
2517                                 else
2518                                         fix_incksum(fin, csump,
2519                                                     nat->nat_sumd[1]);
2520                         }
2521                 }
2522
2523                 if (np && (np->in_apr != NULL) && (np->in_dport == 0 ||
2524                      (tcp != NULL && dport == np->in_dport))) {
2525                         i = appr_check(ip, fin, nat);
2526                         if (i == 0)
2527                                 i = 1;
2528                         else if (i == -1)
2529                                 nat->nat_drop[1]++;
2530                 } else
2531                         i = 1;
2532                 ATOMIC_INCL(nat_stats.ns_mapped[1]);
2533                 RWLOCK_EXIT(&ipf_nat);  /* READ */
2534                 fin->fin_ifp = sifp;
2535                 return i;
2536         }
2537         RWLOCK_EXIT(&ipf_nat);                  /* READ/WRITE */
2538         fin->fin_ifp = sifp;
2539         return 0;
2540 }
2541
2542
2543 /*
2544  * Packets coming in from the external interface go through this.
2545  * Here, the destination address requires alteration, if anything.
2546  */
2547 int ip_natin(ip, fin)
2548 ip_t *ip;
2549 fr_info_t *fin;
2550 {
2551         struct in_addr src;
2552         struct in_addr in;
2553         ipnat_t *np;
2554         u_short sport = 0, dport = 0, *csump = NULL;
2555         u_int nflags = 0, natadd = 1, hv, msk;
2556         struct ifnet *ifp = fin->fin_ifp;
2557         tcphdr_t *tcp = NULL;
2558         int i, icmpset = 0;
2559         nat_t *nat;
2560         u_32_t iph;
2561
2562         if ((nat_list == NULL) || (ip->ip_v != 4) || (fr_nat_lock))
2563                 return 0;
2564
2565         if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2566                 if (fin->fin_p == IPPROTO_TCP)
2567                         nflags = IPN_TCP;
2568                 else if (fin->fin_p == IPPROTO_UDP)
2569                         nflags = IPN_UDP;
2570                 if ((nflags & IPN_TCPUDP)) {
2571                         tcp = (tcphdr_t *)fin->fin_dp;
2572                         sport = tcp->th_sport;
2573                         dport = tcp->th_dport;
2574                 }
2575         }
2576
2577         in = fin->fin_dst;
2578         /* make sure the source address is to be redirected */
2579         src = fin->fin_src;
2580
2581         READ_ENTER(&ipf_nat);
2582
2583         if ((fin->fin_p == IPPROTO_ICMP) &&
2584             (nat = nat_icmp(ip, fin, &nflags, NAT_INBOUND)))
2585                 icmpset = 1;
2586         else if ((fin->fin_fl & FI_FRAG) &&
2587                  (nat = ipfr_nat_knownfrag(ip, fin)))
2588                 natadd = 0;
2589         else if ((nat = nat_inlookup(fin, nflags|FI_WILDP|FI_WILDA,
2590                                      (u_int)fin->fin_p, fin->fin_src, in, 0))) {
2591                 nflags = nat->nat_flags;
2592                 if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) {
2593                         if ((nat->nat_oport != sport) && (nflags & FI_W_DPORT))
2594                                 nat->nat_oport = sport;
2595                         if ((nat->nat_outport != dport) &&
2596                                  (nflags & FI_W_SPORT))
2597                                 nat->nat_outport = dport;
2598                         nat->nat_flags &= ~(FI_W_SPORT|FI_W_DPORT);
2599                         nflags = nat->nat_flags;
2600                         nat_stats.ns_wilds--;
2601                 }
2602         } else {
2603                 RWLOCK_EXIT(&ipf_nat);
2604
2605                 msk = 0xffffffff;
2606                 i = 32;
2607
2608                 WRITE_ENTER(&ipf_nat);
2609                 /*
2610                  * If there is no current entry in the nat table for this IP#,
2611                  * create one for it (if there is a matching rule).
2612                  */
2613 maskloop:
2614                 iph = in.s_addr & htonl(msk);
2615                 hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz);
2616                 for (np = rdr_rules[hv]; np; np = np->in_rnext) {
2617                         if ((np->in_ifp && (np->in_ifp != ifp)) ||
2618                             (np->in_p && (np->in_p != fin->fin_p)) ||
2619                             (np->in_flags && !(nflags & np->in_flags)))
2620                                 continue;
2621                         if (np->in_flags & IPN_FILTER) {
2622                                 if (!nat_match(fin, np, ip))
2623                                         continue;
2624                         } else if ((in.s_addr & np->in_outmsk) != np->in_outip)
2625                                 continue;
2626                         if ((!np->in_pmin || (np->in_flags & IPN_FILTER) ||
2627                              ((ntohs(np->in_pmax) >= ntohs(dport)) &&
2628                               (ntohs(dport) >= ntohs(np->in_pmin)))))
2629                                 if ((nat = nat_new(fin, ip, np, NULL, nflags,
2630                                                     NAT_INBOUND))) {
2631                                         np->in_hits++;
2632                                         break;
2633                                 }
2634                 }
2635
2636                 if ((np == NULL) && (i > 0)) {
2637                         do {
2638                                 i--;
2639                                 msk <<= 1;
2640                         } while ((i >= 0) && ((rdr_masks & (1 << i)) == 0));
2641                         if (i >= 0)
2642                                 goto maskloop;
2643                 }
2644                 MUTEX_DOWNGRADE(&ipf_nat);
2645         }
2646
2647         /*
2648          * NOTE: ipf_nat must now only be held as a read lock
2649          */
2650         if (nat) {
2651                 np = nat->nat_ptr;
2652                 fin->fin_fr = nat->nat_fr;
2653                 if (natadd && (fin->fin_fl & FI_FRAG) && np)
2654                         ipfr_nat_newfrag(ip, fin, nat);
2655                 if (np && (np->in_apr != NULL) && (np->in_dport == 0 ||
2656                      (tcp != NULL && sport == np->in_dport))) {
2657                         i = appr_check(ip, fin, nat);
2658                         if (i == -1) {
2659                                 nat->nat_drop[0]++;
2660                                 RWLOCK_EXIT(&ipf_nat);
2661                                 return i;
2662                         }
2663                 }
2664
2665                 MUTEX_ENTER(&nat->nat_lock);
2666                 if (fin->fin_p != IPPROTO_TCP) {
2667                         if (np && np->in_age[0])
2668                                 nat->nat_age = np->in_age[0];
2669                         else if (!icmpset && (fin->fin_p == IPPROTO_ICMP))
2670                                 nat->nat_age = fr_defnaticmpage;
2671                         else
2672                                 nat->nat_age = fr_defnatage;
2673                 }
2674                 nat->nat_bytes += ip->ip_len;
2675                 nat->nat_pkts++;
2676                 MUTEX_EXIT(&nat->nat_lock);
2677
2678                 /*
2679                  * Fix up checksums, not by recalculating them, but
2680                  * simply computing adjustments.
2681                  */
2682                 if (nat->nat_dir == NAT_OUTBOUND)
2683                         fix_incksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2684                 else
2685                         fix_outcksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2686
2687                 ip->ip_dst = nat->nat_inip;
2688                 fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
2689
2690                 if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2691
2692                         if ((nat->nat_inport != 0) && (tcp != NULL)) {
2693                                 tcp->th_dport = nat->nat_inport;
2694                                 fin->fin_data[1] = ntohs(tcp->th_dport);
2695                         }
2696
2697                         if (fin->fin_p == IPPROTO_TCP) {
2698                                 csump = &tcp->th_sum;
2699                                 MUTEX_ENTER(&nat->nat_lock);
2700                                 fr_tcp_age(&nat->nat_age,
2701                                            nat->nat_tcpstate, fin, 0, 0);
2702                                 if (nat->nat_age < fr_defnaticmpage)
2703                                         nat->nat_age = fr_defnaticmpage;
2704 #ifdef LARGE_NAT
2705                                 else if ((!np || !np->in_age[0]) &&
2706                                          (nat->nat_age > fr_defnatage))
2707                                         nat->nat_age = fr_defnatage;
2708 #endif
2709                                 /*
2710                                  * Increase this because we may have
2711                                  * "keep state" following this too and
2712                                  * packet storms can occur if this is
2713                                  * removed too quickly.
2714                                  */
2715                                 if (nat->nat_age == fr_tcpclosed)
2716                                         nat->nat_age = fr_tcplastack;
2717                                 /*
2718                                  * Do a MSS CLAMPING on a SYN packet,
2719                                  * only deal IPv4 for now.
2720                                  */
2721                                 if (nat->nat_mssclamp &&
2722                                     (tcp->th_flags & TH_SYN) != 0)
2723                                         nat_mssclamp(tcp, nat->nat_mssclamp,
2724                                                      fin, csump);
2725
2726                                 MUTEX_EXIT(&nat->nat_lock);
2727                         } else if (fin->fin_p == IPPROTO_UDP) {
2728                                 udphdr_t *udp = (udphdr_t *)tcp;
2729
2730                                 if (udp->uh_sum)
2731                                         csump = &udp->uh_sum;
2732                         }
2733
2734                         if (csump) {
2735                                 if (nat->nat_dir == NAT_OUTBOUND)
2736                                         fix_incksum(fin, csump,
2737                                                     nat->nat_sumd[0]);
2738                                 else
2739                                         fix_outcksum(fin, csump,
2740                                                     nat->nat_sumd[0]);
2741                         }
2742                 }
2743                 ATOMIC_INCL(nat_stats.ns_mapped[0]);
2744                 RWLOCK_EXIT(&ipf_nat);                  /* READ */
2745                 return 1;
2746         }
2747         RWLOCK_EXIT(&ipf_nat);                  /* READ/WRITE */
2748         return 0;
2749 }
2750
2751
2752 /*
2753  * Free all memory used by NAT structures allocated at runtime.
2754  */
2755 void ip_natunload()
2756 {
2757         WRITE_ENTER(&ipf_nat);
2758         (void) nat_clearlist();
2759         (void) nat_flushtable();
2760         RWLOCK_EXIT(&ipf_nat);
2761
2762         if (nat_table[0] != NULL) {
2763                 KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz);
2764                 nat_table[0] = NULL;
2765         }
2766         if (nat_table[1] != NULL) {
2767                 KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz);
2768                 nat_table[1] = NULL;
2769         }
2770         if (nat_rules != NULL) {
2771                 KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz);
2772                 nat_rules = NULL;
2773         }
2774         if (rdr_rules != NULL) {
2775                 KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz);
2776                 rdr_rules = NULL;
2777         }
2778         if (maptable != NULL) {
2779                 KFREES(maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
2780                 maptable = NULL;
2781         }
2782 }
2783
2784
2785 /*
2786  * Slowly expire held state for NAT entries.  Timeouts are set in
2787  * expectation of this being called twice per second.
2788  */
2789 void ip_natexpire()
2790 {
2791         struct nat *nat, **natp;
2792 #if defined(_KERNEL) && !SOLARIS
2793         int s;
2794 #endif
2795
2796         SPL_NET(s);
2797         WRITE_ENTER(&ipf_nat);
2798         for (natp = &nat_instances; (nat = *natp); ) {
2799                 nat->nat_age--;
2800                 if (nat->nat_age) {
2801                         natp = &nat->nat_next;
2802                         continue;
2803                 }
2804                 *natp = nat->nat_next;
2805 #ifdef  IPFILTER_LOG
2806                 nat_log(nat, NL_EXPIRE);
2807 #endif
2808                 nat_delete(nat);
2809                 nat_stats.ns_expire++;
2810         }
2811         RWLOCK_EXIT(&ipf_nat);
2812         SPL_X(s);
2813 }
2814
2815
2816 /*
2817  */
2818 void ip_natsync(ifp)
2819 void *ifp;
2820 {
2821         ipnat_t *n;
2822         nat_t *nat;
2823         u_32_t sum1, sum2, sumd;
2824         struct in_addr in;
2825         ipnat_t *np;
2826         void *ifp2;
2827 #if defined(_KERNEL) && !SOLARIS
2828         int s;
2829 #endif
2830
2831         /*
2832          * Change IP addresses for NAT sessions for any protocol except TCP
2833          * since it will break the TCP connection anyway.
2834          */
2835         SPL_NET(s);
2836         WRITE_ENTER(&ipf_nat);
2837         for (nat = nat_instances; nat; nat = nat->nat_next)
2838                 if (((ifp == NULL) || (ifp == nat->nat_ifp)) &&
2839                     !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr) &&
2840                     (np->in_outmsk == 0xffffffff) && !np->in_nip) {
2841                         ifp2 = nat->nat_ifp;
2842                         /*
2843                          * Change the map-to address to be the same as the
2844                          * new one.
2845                          */
2846                         sum1 = nat->nat_outip.s_addr;
2847                         if (fr_ifpaddr(4, ifp2, &in) != -1)
2848                                 nat->nat_outip = in;
2849                         sum2 = nat->nat_outip.s_addr;
2850
2851                         if (sum1 == sum2)
2852                                 continue;
2853                         /*
2854                          * Readjust the checksum adjustment to take into
2855                          * account the new IP#.
2856                          */
2857                         CALC_SUMD(sum1, sum2, sumd);
2858                         /* XXX - dont change for TCP when solaris does
2859                          * hardware checksumming.
2860                          */
2861                         sumd += nat->nat_sumd[0];
2862                         nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2863                         nat->nat_sumd[1] = nat->nat_sumd[0];
2864                 }
2865
2866         for (n = nat_list; (n != NULL); n = n->in_next)
2867                 if (n->in_ifp == ifp) {
2868                         n->in_ifp = (void *)GETUNIT(n->in_ifname, 4);
2869                         if (!n->in_ifp)
2870                                 n->in_ifp = (void *)-1;
2871                 }
2872         RWLOCK_EXIT(&ipf_nat);
2873         SPL_X(s);
2874 }
2875
2876
2877 #ifdef  IPFILTER_LOG
2878 void nat_log(nat, type)
2879 struct nat *nat;
2880 u_int type;
2881 {
2882         struct ipnat *np;
2883         struct natlog natl;
2884         void *items[1];
2885         size_t sizes[1];
2886         int rulen, types[1];
2887
2888         natl.nl_inip = nat->nat_inip;
2889         natl.nl_outip = nat->nat_outip;
2890         natl.nl_origip = nat->nat_oip;
2891         natl.nl_bytes = nat->nat_bytes;
2892         natl.nl_pkts = nat->nat_pkts;
2893         natl.nl_origport = nat->nat_oport;
2894         natl.nl_inport = nat->nat_inport;
2895         natl.nl_outport = nat->nat_outport;
2896         natl.nl_p = nat->nat_p;
2897         natl.nl_type = type;
2898         natl.nl_rule = -1;
2899 #ifndef LARGE_NAT
2900         if (nat->nat_ptr != NULL) {
2901                 for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++)
2902                         if (np == nat->nat_ptr) {
2903                                 natl.nl_rule = rulen;
2904                                 break;
2905                         }
2906         }
2907 #endif
2908         items[0] = &natl;
2909         sizes[0] = sizeof(natl);
2910         types[0] = 0;
2911
2912         (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1);
2913 }
2914 #endif
2915
2916
2917 #if defined(__OpenBSD__)
2918 void nat_ifdetach(ifp)
2919 void *ifp;
2920 {
2921         frsync();
2922         return;
2923 }
2924 #endif
2925
2926
2927 /*
2928  * Check for MSS option and clamp it if necessary.
2929  */
2930 static void nat_mssclamp(tcp, maxmss, fin, csump)
2931 tcphdr_t *tcp;
2932 u_32_t maxmss;
2933 fr_info_t *fin;
2934 u_short *csump;
2935 {
2936         u_char *cp, *ep, opt;
2937         int hlen, advance;
2938         u_32_t mss, sumd;
2939         u_short v;
2940
2941         hlen = tcp->th_off << 2;
2942         if (hlen > sizeof(*tcp)) {
2943                 cp = (u_char *)tcp + sizeof(*tcp);
2944                 ep = (u_char *)tcp + hlen;
2945
2946                 while (cp < ep) {
2947                         opt = cp[0];
2948                         if (opt == TCPOPT_EOL)
2949                                 break;
2950                         else if (opt == TCPOPT_NOP) {
2951                                 cp++;
2952                                 continue;
2953                         }
2954  
2955                         if (&cp[1] >= ep)
2956                                 break;
2957                         advance = cp[1];
2958                         if (&cp[advance] > ep)
2959                                 break;
2960                         switch (opt) {
2961                         case TCPOPT_MAXSEG:
2962                                 if (advance != 4)
2963                                         break;
2964                                 bcopy(&cp[2], &v, sizeof(v));
2965                                 mss = ntohs(v);
2966                                 if (mss > maxmss) {
2967                                         v = htons(maxmss);
2968                                         bcopy(&v, &cp[2], sizeof(v));
2969                                         CALC_SUMD(mss, maxmss, sumd);
2970                                         fix_outcksum(fin, csump, sumd);
2971                                 }
2972                                 break;
2973                         default:
2974                                 /* ignore unknown options */
2975                                 break;
2976                         }
2977                     
2978                         cp += advance;  
2979                 }       
2980         }       
2981 }