unbreak world: only include sys/thread2.h in kernel, not in userland
[dragonfly.git] / sys / contrib / ipfilter / netinet / ip_nat.c
1 /*
2  * Copyright (C) 1995-2001 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Added redirect stuff and a LOT of bug fixes. (mcn@EnGarde.com)
7  *
8  * @(#)ip_nat.c     1.11 6/5/96 (C) 1995 Darren Reed
9  * @(#)$Id: ip_nat.c,v 2.37.2.70 2002/08/28 12:45:48 darrenr Exp $
10  * $FreeBSD: src/sys/contrib/ipfilter/netinet/ip_nat.c,v 1.22.2.8 2004/07/04 09:24:39 darrenr Exp $
11  * $DragonFly: src/sys/contrib/ipfilter/netinet/ip_nat.c,v 1.11 2005/06/05 12:17:46 corecode Exp $
12  */
13 #if (defined(__DragonFly__) || defined(__FreeBSD__)) && defined(KERNEL) && !defined(_KERNEL)
14 #define _KERNEL
15 #endif
16
17 #if defined(__sgi) && (IRIX > 602)
18 # include <sys/ptimers.h>
19 #endif
20 #include <sys/errno.h>
21 #include <sys/types.h>
22 #include <sys/param.h>
23 #include <sys/time.h>
24 #include <sys/file.h>
25 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
26     defined(_KERNEL)
27 # include "opt_ipfilter_log.h"
28 #endif
29 #if !defined(_KERNEL) && !defined(KERNEL)
30 # include <stdio.h>
31 # include <string.h>
32 # include <stdlib.h>
33 #endif
34 #if (defined(KERNEL) || defined(_KERNEL)) && (defined(__DragonFly__) || __FreeBSD_version >= 220000)
35 # include <sys/filio.h>
36 # include <sys/fcntl.h>
37 #else
38 # include <sys/ioctl.h>
39 #endif
40 #include <sys/fcntl.h>
41 #ifndef linux
42 # include <sys/protosw.h>
43 #endif
44 #include <sys/socket.h>
45 #if defined(_KERNEL) && !defined(linux)
46 # include <sys/systm.h>
47 #endif
48 #if !defined(__SVR4) && !defined(__svr4__)
49 # ifndef linux
50 #  include <sys/mbuf.h>
51 # endif
52 #else
53 # include <sys/filio.h>
54 # include <sys/byteorder.h>
55 # ifdef _KERNEL
56 #  include <sys/dditypes.h>
57 # endif
58 # include <sys/stream.h>
59 # include <sys/kmem.h>
60 #endif
61 #if defined(__DragonFly__) || __FreeBSD_version >= 300000
62 # include <sys/queue.h>
63 #endif
64 #if defined(__DragonFly__) && defined(_KERNEL)
65 # include <sys/thread2.h>
66 #endif
67 #include <net/if.h>
68 #if defined(__DragonFly__) || __FreeBSD_version >= 300000
69 # include <net/if_var.h>
70 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
71 #  include "opt_ipfilter.h"
72 # endif
73 #endif
74 #ifdef sun
75 # include <net/af.h>
76 #endif
77 #include <net/route.h>
78 #include <netinet/in.h>
79 #include <netinet/in_systm.h>
80 #include <netinet/ip.h>
81
82 #ifdef __sgi
83 # ifdef IFF_DRVRLOCK /* IRIX6 */
84 #include <sys/hashing.h>
85 #include <netinet/in_var.h>
86 # endif
87 #endif
88
89 #ifdef RFC1825
90 # include <vpn/md5.h>
91 # include <vpn/ipsec.h>
92 extern struct ifnet vpnif;
93 #endif
94
95 #ifndef linux
96 # include <netinet/ip_var.h>
97 # include <netinet/tcp_fsm.h>
98 #endif
99 #include <netinet/tcp.h>
100 #include <netinet/udp.h>
101 #include <netinet/ip_icmp.h>
102 #include "ip_compat.h"
103 #include <netinet/tcpip.h>
104 #include "ip_fil.h"
105 #include "ip_nat.h"
106 #include "ip_frag.h"
107 #include "ip_state.h"
108 #include "ip_proxy.h"
109 #if defined(__DragonFly__) || __FreeBSD_version >= 300000
110 # include <sys/malloc.h>
111 #endif
112 #ifndef MIN
113 # define        MIN(a,b)        (((a)<(b))?(a):(b))
114 #endif
115 #undef  SOCKADDR_IN
116 #define SOCKADDR_IN     struct sockaddr_in
117
118 static const char sccsid[] = "@(#)ip_nat.c     1.11 6/5/96 (C) 1995 Darren Reed";
119
120 nat_t   **nat_table[2] = { NULL, NULL },
121         *nat_instances = NULL;
122 ipnat_t *nat_list = NULL;
123 u_int   ipf_nattable_max = NAT_TABLE_MAX;
124 u_int   ipf_nattable_sz = NAT_TABLE_SZ;
125 u_int   ipf_natrules_sz = NAT_SIZE;
126 u_int   ipf_rdrrules_sz = RDR_SIZE;
127 u_int   ipf_hostmap_sz = HOSTMAP_SIZE;
128 u_32_t  nat_masks = 0;
129 u_32_t  rdr_masks = 0;
130 ipnat_t **nat_rules = NULL;
131 ipnat_t **rdr_rules = NULL;
132 hostmap_t       **maptable  = NULL;
133
134 u_long  fr_defnatage = DEF_NAT_AGE,
135         fr_defnaticmpage = 6;           /* 3 seconds */
136 natstat_t nat_stats;
137 int     fr_nat_lock = 0;
138 #if     (SOLARIS || defined(__sgi)) && defined(_KERNEL)
139 extern  kmutex_t        ipf_rw;
140 extern  KRWLOCK_T       ipf_nat;
141 #endif
142
143 static  int     nat_flushtable (void);
144 static  void    nat_addnat (struct ipnat *);
145 static  void    nat_addrdr (struct ipnat *);
146 static  void    nat_delete (struct nat *);
147 static  void    nat_delrdr (struct ipnat *);
148 static  void    nat_delnat (struct ipnat *);
149 static  int     fr_natgetent (caddr_t);
150 static  int     fr_natgetsz (caddr_t);
151 static  int     fr_natputent (caddr_t);
152 static  void    nat_tabmove (fr_info_t *, nat_t *);
153 static  int     nat_match (fr_info_t *, ipnat_t *, ip_t *);
154 static  hostmap_t *nat_hostmap (ipnat_t *, struct in_addr,
155                                     struct in_addr);
156 static  void    nat_hostmapdel (struct hostmap *);
157 static  void    nat_mssclamp (tcphdr_t *, u_32_t, fr_info_t *, u_short *);
158
159
160 int nat_init()
161 {
162         KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
163         if (nat_table[0] != NULL)
164                 bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *));
165         else
166                 return -1;
167
168         KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
169         if (nat_table[1] != NULL)
170                 bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *));
171         else
172                 return -1;
173
174         KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz);
175         if (nat_rules != NULL)
176                 bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *));
177         else
178                 return -1;
179
180         KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz);
181         if (rdr_rules != NULL)
182                 bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *));
183         else
184                 return -1;
185
186         KMALLOCS(maptable, hostmap_t **, sizeof(hostmap_t *) * ipf_hostmap_sz);
187         if (maptable != NULL)
188                 bzero((char *)maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
189         else
190                 return -1;
191         return 0;
192 }
193
194
195 static void nat_addrdr(n)
196 ipnat_t *n;
197 {
198         ipnat_t **np;
199         u_32_t j;
200         u_int hv;
201         int k;
202
203         k = countbits(n->in_outmsk);
204         if ((k >= 0) && (k != 32))
205                 rdr_masks |= 1 << k;
206         j = (n->in_outip & n->in_outmsk);
207         hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz);
208         np = rdr_rules + hv;
209         while (*np != NULL)
210                 np = &(*np)->in_rnext;
211         n->in_rnext = NULL;
212         n->in_prnext = np;
213         *np = n;
214 }
215
216
217 static void nat_addnat(n)
218 ipnat_t *n;
219 {
220         ipnat_t **np;
221         u_32_t j;
222         u_int hv;
223         int k;
224
225         k = countbits(n->in_inmsk);
226         if ((k >= 0) && (k != 32))
227                 nat_masks |= 1 << k;
228         j = (n->in_inip & n->in_inmsk);
229         hv = NAT_HASH_FN(j, 0, ipf_natrules_sz);
230         np = nat_rules + hv;
231         while (*np != NULL)
232                 np = &(*np)->in_mnext;
233         n->in_mnext = NULL;
234         n->in_pmnext = np;
235         *np = n;
236 }
237
238
239 static void nat_delrdr(n)
240 ipnat_t *n;
241 {
242         if (n->in_rnext)
243                 n->in_rnext->in_prnext = n->in_prnext;
244         *n->in_prnext = n->in_rnext;
245 }
246
247
248 static void nat_delnat(n)
249 ipnat_t *n;
250 {
251         if (n->in_mnext)
252                 n->in_mnext->in_pmnext = n->in_pmnext;
253         *n->in_pmnext = n->in_mnext;
254 }
255
256
257 /*
258  * check if an ip address has already been allocated for a given mapping that
259  * is not doing port based translation.
260  *
261  * Must be called with ipf_nat held as a write lock.
262  */
263 static struct hostmap *nat_hostmap(np, real, map)
264 ipnat_t *np;
265 struct in_addr real;
266 struct in_addr map;
267 {
268         hostmap_t *hm;
269         u_int hv;
270
271         hv = real.s_addr % HOSTMAP_SIZE;
272         for (hm = maptable[hv]; hm; hm = hm->hm_next)
273                 if ((hm->hm_realip.s_addr == real.s_addr) &&
274                     (np == hm->hm_ipnat)) {
275                         hm->hm_ref++;
276                         return hm;
277                 }
278
279         KMALLOC(hm, hostmap_t *);
280         if (hm) {
281                 hm->hm_next = maptable[hv];
282                 hm->hm_pnext = maptable + hv;
283                 if (maptable[hv])
284                         maptable[hv]->hm_pnext = &hm->hm_next;
285                 maptable[hv] = hm;
286                 hm->hm_ipnat = np;
287                 hm->hm_realip = real;
288                 hm->hm_mapip = map;
289                 hm->hm_ref = 1;
290         }
291         return hm;
292 }
293
294
295 /*
296  * Must be called with ipf_nat held as a write lock.
297  */
298 static void nat_hostmapdel(hm)
299 struct hostmap *hm;
300 {
301         ATOMIC_DEC32(hm->hm_ref);
302         if (hm->hm_ref == 0) {
303                 if (hm->hm_next)
304                         hm->hm_next->hm_pnext = hm->hm_pnext;
305                 *hm->hm_pnext = hm->hm_next;
306                 KFREE(hm);
307         }
308 }
309
310
311 void fix_outcksum(fin, sp, n)
312 fr_info_t *fin;
313 u_short *sp;
314 u_32_t n;
315 {
316         u_short sumshort;
317         u_32_t sum1;
318
319         if (!n)
320                 return;
321         else if (n & NAT_HW_CKSUM) {
322                 n &= 0xffff;
323                 n += fin->fin_dlen;
324                 n = (n & 0xffff) + (n >> 16);
325                 *sp = n & 0xffff;
326                 return;
327         }
328         sum1 = (~ntohs(*sp)) & 0xffff;
329         sum1 += (n);
330         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
331         /* Again */
332         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
333         sumshort = ~(u_short)sum1;
334         *(sp) = htons(sumshort);
335 }
336
337
338 void fix_incksum(fin, sp, n)
339 fr_info_t *fin;
340 u_short *sp;
341 u_32_t n;
342 {
343         u_short sumshort;
344         u_32_t sum1;
345
346         if (!n)
347                 return;
348         else if (n & NAT_HW_CKSUM) {
349                 n &= 0xffff;
350                 n += fin->fin_dlen;
351                 n = (n & 0xffff) + (n >> 16);
352                 *sp = n & 0xffff;
353                 return;
354         }
355 #ifdef sparc
356         sum1 = (~(*sp)) & 0xffff;
357 #else
358         sum1 = (~ntohs(*sp)) & 0xffff;
359 #endif
360         sum1 += ~(n) & 0xffff;
361         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
362         /* Again */
363         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
364         sumshort = ~(u_short)sum1;
365         *(sp) = htons(sumshort);
366 }
367
368
369 /*
370  * fix_datacksum is used *only* for the adjustments of checksums in the data
371  * section of an IP packet.
372  *
373  * The only situation in which you need to do this is when NAT'ing an 
374  * ICMP error message. Such a message, contains in its body the IP header
375  * of the original IP packet, that causes the error.
376  *
377  * You can't use fix_incksum or fix_outcksum in that case, because for the
378  * kernel the data section of the ICMP error is just data, and no special 
379  * processing like hardware cksum or ntohs processing have been done by the 
380  * kernel on the data section.
381  */
382 void fix_datacksum(sp, n)
383 u_short *sp;
384 u_32_t n;
385 {
386         u_short sumshort;
387          u_32_t sum1;
388
389         if (!n)
390                 return;
391
392         sum1 = (~ntohs(*sp)) & 0xffff;
393         sum1 += (n);
394         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
395         /* Again */
396         sum1 = (sum1 >> 16) + (sum1 & 0xffff);
397         sumshort = ~(u_short)sum1;
398         *(sp) = htons(sumshort);
399 }
400
401 /*
402  * How the NAT is organised and works.
403  *
404  * Inside (interface y) NAT       Outside (interface x)
405  * -------------------- -+- -------------------------------------
406  * Packet going          |   out, processsed by ip_natout() for x
407  * ------------>         |   ------------>
408  * src=10.1.1.1          |   src=192.1.1.1
409  *                       |
410  *                       |   in, processed by ip_natin() for x
411  * <------------         |   <------------
412  * dst=10.1.1.1          |   dst=192.1.1.1
413  * -------------------- -+- -------------------------------------
414  * ip_natout() - changes ip_src and if required, sport
415  *             - creates a new mapping, if required.
416  * ip_natin()  - changes ip_dst and if required, dport
417  *
418  * In the NAT table, internal source is recorded as "in" and externally
419  * seen as "out".
420  */
421
422 /*
423  * Handle ioctls which manipulate the NAT.
424  */
425 int nat_ioctl(data, cmd, mode)
426 #if defined(__DragonFly__) || defined(__NetBSD__) || defined(__OpenBSD__) || (__FreeBSD_version >= 300003)
427 u_long cmd;
428 #else
429 int cmd;
430 #endif
431 caddr_t data;
432 int mode;
433 {
434         ipnat_t *nat, *nt, *n = NULL, **np = NULL;
435         int error = 0, ret, arg, getlock;
436         ipnat_t natd;
437         u_32_t i, j;
438
439 #if (BSD >= 199306) && defined(_KERNEL)
440         if ((securelevel >= 3) && (mode & FWRITE))
441                 return EPERM;
442 #endif
443
444         nat = NULL;     /* XXX gcc -Wuninitialized */
445         KMALLOC(nt, ipnat_t *);
446         getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
447         if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT)) {
448                 if (mode & NAT_SYSSPACE) {
449                         bcopy(data, (char *)&natd, sizeof(natd));
450                         error = 0;
451                 } else {
452                         error = IRCOPYPTR(data, (char *)&natd, sizeof(natd));
453                 }
454         } else if (cmd == SIOCIPFFL) {  /* SIOCFLNAT & SIOCCNATL */
455                 error = IRCOPY(data, (char *)&arg, sizeof(arg));
456                 if (error)
457                         error = EFAULT;
458         }
459
460         if (error)
461                 goto done;
462
463         /*
464          * For add/delete, look to see if the NAT entry is already present
465          */
466         if (getlock == 1) {
467                 WRITE_ENTER(&ipf_nat);
468         }
469         if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT)) {
470                 nat = &natd;
471                 nat->in_flags &= IPN_USERFLAGS;
472                 if ((nat->in_redir & NAT_MAPBLK) == 0) {
473                         if ((nat->in_flags & IPN_SPLIT) == 0)
474                                 nat->in_inip &= nat->in_inmsk;
475                         if ((nat->in_flags & IPN_IPRANGE) == 0)
476                                 nat->in_outip &= nat->in_outmsk;
477                 }
478                 for (np = &nat_list; (n = *np); np = &n->in_next)
479                         if (!bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
480                                         IPN_CMPSIZ)) {
481                                 if (n->in_redir == NAT_REDIRECT &&
482                                     n->in_pnext != nat->in_pnext)
483                                         continue;
484                                 break;
485                         }
486         }
487
488         switch (cmd)
489         {
490 #ifdef  IPFILTER_LOG
491         case SIOCIPFFB :
492         {
493                 int tmp;
494
495                 if (!(mode & FWRITE))
496                         error = EPERM;
497                 else {
498                         tmp = ipflog_clear(IPL_LOGNAT);
499                         IWCOPY((char *)&tmp, (char *)data, sizeof(tmp));
500                 }
501                 break;
502         }
503 #endif
504         case SIOCADNAT :
505                 if (!(mode & FWRITE)) {
506                         error = EPERM;
507                         break;
508                 }
509                 if (n) {
510                         error = EEXIST;
511                         break;
512                 }
513                 if (nt == NULL) {
514                         error = ENOMEM;
515                         break;
516                 }
517                 n = nt;
518                 nt = NULL;
519                 bcopy((char *)nat, (char *)n, sizeof(*n));
520                 n->in_ifp = (void *)GETUNIT(n->in_ifname, 4);
521                 if (!n->in_ifp)
522                         n->in_ifp = (void *)-1;
523                 if (n->in_plabel[0] != '\0') {
524                         n->in_apr = appr_lookup(n->in_p, n->in_plabel);
525                         if (!n->in_apr) {
526                                 error = ENOENT;
527                                 break;
528                         }
529                 }
530                 n->in_next = NULL;
531                 *np = n;
532
533                 if (n->in_redir & NAT_REDIRECT) {
534                         n->in_flags &= ~IPN_NOTDST;
535                         nat_addrdr(n);
536                 }
537                 if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
538                         n->in_flags &= ~IPN_NOTSRC;
539                         nat_addnat(n);
540                 }
541
542                 n->in_use = 0;
543                 if (n->in_redir & NAT_MAPBLK)
544                         n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
545                 else if (n->in_flags & IPN_AUTOPORTMAP)
546                         n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
547                 else if (n->in_flags & IPN_IPRANGE)
548                         n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
549                 else if (n->in_flags & IPN_SPLIT)
550                         n->in_space = 2;
551                 else
552                         n->in_space = ~ntohl(n->in_outmsk);
553                 /*
554                  * Calculate the number of valid IP addresses in the output
555                  * mapping range.  In all cases, the range is inclusive of
556                  * the start and ending IP addresses.
557                  * If to a CIDR address, lose 2: broadcast + network address
558                  *                               (so subtract 1)
559                  * If to a range, add one.
560                  * If to a single IP address, set to 1.
561                  */
562                 if (n->in_space) {
563                         if ((n->in_flags & IPN_IPRANGE) != 0)
564                                 n->in_space += 1;
565                         else
566                                 n->in_space -= 1;
567                 } else
568                         n->in_space = 1;
569                 if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
570                     ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
571                         n->in_nip = ntohl(n->in_outip) + 1;
572                 else if ((n->in_flags & IPN_SPLIT) &&
573                          (n->in_redir & NAT_REDIRECT))
574                         n->in_nip = ntohl(n->in_inip);
575                 else
576                         n->in_nip = ntohl(n->in_outip);
577                 if (n->in_redir & NAT_MAP) {
578                         n->in_pnext = ntohs(n->in_pmin);
579                         /*
580                          * Multiply by the number of ports made available.
581                          */
582                         if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
583                                 n->in_space *= (ntohs(n->in_pmax) -
584                                                 ntohs(n->in_pmin) + 1);
585                                 /*
586                                  * Because two different sources can map to
587                                  * different destinations but use the same
588                                  * local IP#/port #.
589                                  * If the result is smaller than in_space, then
590                                  * we may have wrapped around 32bits.
591                                  */
592                                 i = n->in_inmsk;
593                                 if ((i != 0) && (i != 0xffffffff)) {
594                                         j = n->in_space * (~ntohl(i) + 1);
595                                         if (j >= n->in_space)
596                                                 n->in_space = j;
597                                         else
598                                                 n->in_space = 0xffffffff;
599                                 }
600                         }
601                         /*
602                          * If no protocol is specified, multiple by 256.
603                          */
604                         if ((n->in_flags & IPN_TCPUDP) == 0) {
605                                         j = n->in_space * 256;
606                                         if (j >= n->in_space)
607                                                 n->in_space = j;
608                                         else
609                                                 n->in_space = 0xffffffff;
610                         }
611                 }
612                 /* Otherwise, these fields are preset */
613                 n = NULL;
614                 nat_stats.ns_rules++;
615                 break;
616         case SIOCRMNAT :
617                 if (!(mode & FWRITE)) {
618                         error = EPERM;
619                         n = NULL;
620                         break;
621                 }
622                 if (!n) {
623                         error = ESRCH;
624                         break;
625                 }
626                 if (n->in_redir & NAT_REDIRECT)
627                         nat_delrdr(n);
628                 if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
629                         nat_delnat(n);
630                 if (nat_list == NULL) {
631                         nat_masks = 0;
632                         rdr_masks = 0;
633                 }
634                 *np = n->in_next;
635                 if (!n->in_use) {
636                         if (n->in_apr)
637                                 appr_free(n->in_apr);
638                         KFREE(n);
639                         nat_stats.ns_rules--;
640                 } else {
641                         n->in_flags |= IPN_DELETE;
642                         n->in_next = NULL;
643                 }
644                 n = NULL;
645                 break;
646         case SIOCGNATS :
647                 MUTEX_DOWNGRADE(&ipf_nat);
648                 nat_stats.ns_table[0] = nat_table[0];
649                 nat_stats.ns_table[1] = nat_table[1];
650                 nat_stats.ns_list = nat_list;
651                 nat_stats.ns_maptable = maptable;
652                 nat_stats.ns_nattab_sz = ipf_nattable_sz;
653                 nat_stats.ns_rultab_sz = ipf_natrules_sz;
654                 nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz;
655                 nat_stats.ns_hostmap_sz = ipf_hostmap_sz;
656                 nat_stats.ns_instances = nat_instances;
657                 nat_stats.ns_apslist = ap_sess_list;
658                 error = IWCOPYPTR((char *)&nat_stats, (char *)data,
659                                   sizeof(nat_stats));
660                 break;
661         case SIOCGNATL :
662             {
663                 natlookup_t nl;
664
665                 MUTEX_DOWNGRADE(&ipf_nat);
666                 error = IRCOPYPTR((char *)data, (char *)&nl, sizeof(nl));
667                 if (error)
668                         break;
669
670                 if (nat_lookupredir(&nl)) {
671                         error = IWCOPYPTR((char *)&nl, (char *)data,
672                                           sizeof(nl));
673                 } else
674                         error = ESRCH;
675                 break;
676             }
677         case SIOCIPFFL :        /* old SIOCFLNAT & SIOCCNATL */
678                 if (!(mode & FWRITE)) {
679                         error = EPERM;
680                         break;
681                 }
682                 error = 0;
683                 if (arg == 0)
684                         ret = nat_flushtable();
685                 else if (arg == 1)
686                         ret = nat_clearlist();
687                 else
688                         error = EINVAL;
689                 MUTEX_DOWNGRADE(&ipf_nat);
690                 if (!error) {
691                         error = IWCOPY((caddr_t)&ret, data, sizeof(ret));
692                         if (error)
693                                 error = EFAULT;
694                 }
695                 break;
696         case SIOCSTLCK :
697                 error = IRCOPY(data, (caddr_t)&arg, sizeof(arg));
698                 if (!error) {
699                         error = IWCOPY((caddr_t)&fr_nat_lock, data,
700                                         sizeof(fr_nat_lock));
701                         if (!error)
702                                 fr_nat_lock = arg;
703                 } else
704                         error = EFAULT;
705                 break;
706         case SIOCSTPUT :
707                 if (fr_nat_lock)
708                         error = fr_natputent(data);
709                 else
710                         error = EACCES;
711                 break;
712         case SIOCSTGSZ :
713                 if (fr_nat_lock)
714                         error = fr_natgetsz(data);
715                 else
716                         error = EACCES;
717                 break;
718         case SIOCSTGET :
719                 if (fr_nat_lock)
720                         error = fr_natgetent(data);
721                 else
722                         error = EACCES;
723                 break;
724         case FIONREAD :
725 #ifdef  IPFILTER_LOG
726                 arg = (int)iplused[IPL_LOGNAT];
727                 MUTEX_DOWNGRADE(&ipf_nat);
728                 error = IWCOPY((caddr_t)&arg, (caddr_t)data, sizeof(arg));
729                 if (error)
730                         error = EFAULT;
731 #endif
732                 break;
733         default :
734                 error = EINVAL;
735                 break;
736         }
737         if (getlock == 1) {
738                 RWLOCK_EXIT(&ipf_nat);                  /* READ/WRITE */
739         }
740 done:
741         if (nt)
742                 KFREE(nt);
743         return error;
744 }
745
746
747 static int fr_natgetsz(data)
748 caddr_t data;
749 {
750         ap_session_t *aps;
751         nat_t *nat, *n;
752         int error = 0;
753         natget_t ng;
754
755         error = IRCOPY(data, (caddr_t)&ng, sizeof(ng));
756         if (error)
757                 return EFAULT;
758
759         nat = ng.ng_ptr;
760         if (!nat) {
761                 nat = nat_instances;
762                 ng.ng_sz = 0;
763                 if (nat == NULL) {
764                         error = IWCOPY((caddr_t)&ng, data, sizeof(ng));
765                         if (error)
766                                 error = EFAULT;
767                         return error;
768                 }
769         } else {
770                 /*
771                  * Make sure the pointer we're copying from exists in the
772                  * current list of entries.  Security precaution to prevent
773                  * copying of random kernel data.
774                  */
775                 for (n = nat_instances; n; n = n->nat_next)
776                         if (n == nat)
777                                 break;
778                 if (!n)
779                         return ESRCH;
780         }
781
782         ng.ng_sz = sizeof(nat_save_t);
783         aps = nat->nat_aps;
784         if ((aps != NULL) && (aps->aps_data != 0)) {
785                 ng.ng_sz += sizeof(ap_session_t);
786                 ng.ng_sz += aps->aps_psiz;
787                 if (aps->aps_psiz > 4)  /* XXX - sizeof(ipn_data) */
788                         ng.ng_sz -= 4;
789         }
790
791         error = IWCOPY((caddr_t)&ng, data, sizeof(ng));
792         if (error)
793                 error = EFAULT;
794         return error;
795 }
796
797
798 static int fr_natgetent(data)
799 caddr_t data;
800 {
801         nat_save_t ipn, *ipnp, *ipnn = NULL;
802         nat_t *n, *nat;
803         ap_session_t *aps;
804         size_t dsz;
805         int error;
806
807         error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp));
808         if (error)
809                 return EFAULT;
810         error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn));
811         if (error)
812                 return EFAULT;
813
814         nat = ipn.ipn_next;
815         if (!nat) {
816                 nat = nat_instances;
817                 if (nat == NULL) {
818                         if (nat_instances == NULL)
819                                 return ENOENT;
820                         return 0;
821                 }
822         } else {
823                 /*
824                  * Make sure the pointer we're copying from exists in the
825                  * current list of entries.  Security precaution to prevent
826                  * copying of random kernel data.
827                  */
828                 for (n = nat_instances; n; n = n->nat_next)
829                         if (n == nat)
830                                 break;
831                 if (!n)
832                         return ESRCH;
833         }
834
835         ipn.ipn_next = nat->nat_next;
836         bcopy((char *)nat, (char *)&ipn.ipn_nat, sizeof(ipn.ipn_nat));
837         ipn.ipn_nat.nat_data = NULL;
838
839         if (nat->nat_ptr) {
840                 bcopy((char *)nat->nat_ptr, (char *)&ipn.ipn_ipnat,
841                       sizeof(ipn.ipn_ipnat));
842         }
843
844         if (nat->nat_fr)
845                 bcopy((char *)nat->nat_fr, (char *)&ipn.ipn_rule,
846                       sizeof(ipn.ipn_rule));
847
848         if ((aps = nat->nat_aps)) {
849                 dsz = sizeof(*aps);
850                 if (aps->aps_data)
851                         dsz += aps->aps_psiz;
852                 ipn.ipn_dsize = dsz;
853                 if (dsz > sizeof(ipn.ipn_data))
854                         dsz -= sizeof(ipn.ipn_data);
855                 KMALLOCS(ipnn, nat_save_t *, sizeof(*ipnn) + dsz);
856                 if (ipnn == NULL)
857                         return ENOMEM;
858                 bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn));
859
860                 bcopy((char *)aps, (char *)ipnn->ipn_data, sizeof(*aps));
861                 if (aps->aps_data) {
862                         bcopy(aps->aps_data, ipnn->ipn_data + sizeof(*aps),
863                               aps->aps_psiz);
864                 }
865                 error = IWCOPY((caddr_t)ipnn, ipnp,
866                                sizeof(ipn) + dsz);
867                 if (error)
868                         error = EFAULT;
869                 KFREES(ipnn, sizeof(*ipnn) + dsz);
870         } else {
871                 ipn.ipn_dsize = 0;
872                 error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn));
873                 if (error)
874                         error = EFAULT;
875         }
876         return error;
877 }
878
879
880 static int fr_natputent(data)
881 caddr_t data;
882 {
883         nat_save_t ipn, *ipnp, *ipnn = NULL;
884         nat_t *n, *nat;
885         ap_session_t *aps;
886         frentry_t *fr;
887         ipnat_t *in;
888
889         int error;
890
891         error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp));
892         if (error)
893                 return EFAULT;
894         error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn));
895         if (error)
896                 return EFAULT;
897         nat = NULL;
898         if (ipn.ipn_dsize) {
899                 KMALLOCS(ipnn, nat_save_t *, sizeof(*ipnn) + ipn.ipn_dsize);
900                 if (ipnn == NULL)
901                         return ENOMEM;
902                 bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn));
903                 error = IRCOPY((caddr_t)ipnp + offsetof(nat_save_t, ipn_data),
904                                (caddr_t)ipnn->ipn_data, ipn.ipn_dsize);
905                 if (error) {
906                         error = EFAULT;
907                         goto junkput;
908                 }
909         } else
910                 ipnn = NULL;
911
912         KMALLOC(nat, nat_t *);
913         if (nat == NULL) {
914                 error = EFAULT;
915                 goto junkput;
916         }
917
918         bcopy((char *)&ipn.ipn_nat, (char *)nat, sizeof(*nat));
919         /*
920          * Initialize all these so that nat_delete() doesn't cause a crash.
921          */
922         nat->nat_phnext[0] = NULL;
923         nat->nat_phnext[1] = NULL;
924         fr = nat->nat_fr;
925         nat->nat_fr = NULL;
926         aps = nat->nat_aps;
927         nat->nat_aps = NULL;
928         in = nat->nat_ptr;
929         nat->nat_ptr = NULL;
930         nat->nat_hm = NULL;
931         nat->nat_data = NULL;
932         nat->nat_ifp = GETUNIT(nat->nat_ifname, 4);
933
934         /*
935          * Restore the rule associated with this nat session
936          */
937         if (in) {
938                 KMALLOC(in, ipnat_t *);
939                 if (in == NULL) {
940                         error = ENOMEM;
941                         goto junkput;
942                 }
943                 nat->nat_ptr = in;
944                 bcopy((char *)&ipn.ipn_ipnat, (char *)in, sizeof(*in));
945                 in->in_use = 1;
946                 in->in_flags |= IPN_DELETE;
947                 in->in_next = NULL;
948                 in->in_rnext = NULL;
949                 in->in_prnext = NULL;
950                 in->in_mnext = NULL;
951                 in->in_pmnext = NULL;
952                 in->in_ifp = GETUNIT(in->in_ifname, 4);
953                 if (in->in_plabel[0] != '\0') {
954                         in->in_apr = appr_lookup(in->in_p, in->in_plabel);
955                 }
956         }
957
958         /*
959          * Restore ap_session_t structure.  Include the private data allocated
960          * if it was there.
961          */
962         if (aps) {
963                 KMALLOC(aps, ap_session_t *);
964                 if (aps == NULL) {
965                         error = ENOMEM;
966                         goto junkput;
967                 }
968                 nat->nat_aps = aps;
969                 aps->aps_next = ap_sess_list;
970                 ap_sess_list = aps;
971                 bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
972                 if (in)
973                         aps->aps_apr = in->in_apr;
974                 if (aps->aps_psiz) {
975                         KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
976                         if (aps->aps_data == NULL) {
977                                 error = ENOMEM;
978                                 goto junkput;
979                         }
980                         bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
981                               aps->aps_psiz);
982                 } else {
983                         aps->aps_psiz = 0;
984                         aps->aps_data = NULL;
985                 }
986         }
987
988         /*
989          * If there was a filtering rule associated with this entry then
990          * build up a new one.
991          */
992         if (fr != NULL) {
993                 if (nat->nat_flags & FI_NEWFR) {
994                         KMALLOC(fr, frentry_t *);
995                         nat->nat_fr = fr;
996                         if (fr == NULL) {
997                                 error = ENOMEM;
998                                 goto junkput;
999                         }
1000                         bcopy((char *)&ipn.ipn_fr, (char *)fr, sizeof(*fr));
1001                         ipn.ipn_nat.nat_fr = fr;
1002                         error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn));
1003                         if (error) {
1004                                 error = EFAULT;
1005                                 goto junkput;
1006                         }
1007                 } else {
1008                         for (n = nat_instances; n; n = n->nat_next)
1009                                 if (n->nat_fr == fr)
1010                                         break;
1011                         if (!n) {
1012                                 error = ESRCH;
1013                                 goto junkput;
1014                         }
1015                 }
1016         }
1017
1018         if (ipnn)
1019                 KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize);
1020         nat_insert(nat);
1021         return 0;
1022 junkput:
1023         if (ipnn)
1024                 KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize);
1025         if (nat)
1026                 nat_delete(nat);
1027         return error;
1028 }
1029
1030
1031 /*
1032  * Delete a nat entry from the various lists and table.
1033  */
1034 static void nat_delete(natd)
1035 struct nat *natd;
1036 {
1037         struct ipnat *ipn;
1038
1039         if (natd->nat_flags & FI_WILDP)
1040                 nat_stats.ns_wilds--;
1041         if (natd->nat_hnext[0])
1042                 natd->nat_hnext[0]->nat_phnext[0] = natd->nat_phnext[0];
1043         *natd->nat_phnext[0] = natd->nat_hnext[0];
1044         if (natd->nat_hnext[1])
1045                 natd->nat_hnext[1]->nat_phnext[1] = natd->nat_phnext[1];
1046         *natd->nat_phnext[1] = natd->nat_hnext[1];
1047         if (natd->nat_me != NULL)
1048                 *natd->nat_me = NULL;
1049
1050         if (natd->nat_fr != NULL) {
1051                 ATOMIC_DEC32(natd->nat_fr->fr_ref);
1052         }
1053
1054         if (natd->nat_hm != NULL)
1055                 nat_hostmapdel(natd->nat_hm);
1056
1057         /*
1058          * If there is an active reference from the nat entry to its parent
1059          * rule, decrement the rule's reference count and free it too if no
1060          * longer being used.
1061          */
1062         ipn = natd->nat_ptr;
1063         if (ipn != NULL) {
1064                 ipn->in_space++;
1065                 ipn->in_use--;
1066                 if (!ipn->in_use && (ipn->in_flags & IPN_DELETE)) {
1067                         if (ipn->in_apr)
1068                                 appr_free(ipn->in_apr);
1069                         KFREE(ipn);
1070                         nat_stats.ns_rules--;
1071                 }
1072         }
1073
1074         MUTEX_DESTROY(&natd->nat_lock);
1075         /*
1076          * If there's a fragment table entry too for this nat entry, then
1077          * dereference that as well.
1078          */
1079         ipfr_forgetnat((void *)natd);
1080         aps_free(natd->nat_aps);
1081         nat_stats.ns_inuse--;
1082         KFREE(natd);
1083 }
1084
1085
1086 /*
1087  * nat_flushtable - clear the NAT table of all mapping entries.
1088  * (this is for the dynamic mappings)
1089  */
1090 static int nat_flushtable()
1091 {
1092         nat_t *nat, **natp;
1093         int j = 0;
1094
1095         /*
1096          * ALL NAT mappings deleted, so lets just make the deletions
1097          * quicker.
1098          */
1099         if (nat_table[0] != NULL)
1100                 bzero((char *)nat_table[0],
1101                       sizeof(nat_table[0]) * ipf_nattable_sz);
1102         if (nat_table[1] != NULL)
1103                 bzero((char *)nat_table[1],
1104                       sizeof(nat_table[1]) * ipf_nattable_sz);
1105
1106         for (natp = &nat_instances; (nat = *natp); ) {
1107                 *natp = nat->nat_next;
1108 #ifdef  IPFILTER_LOG
1109                 nat_log(nat, NL_FLUSH);
1110 #endif
1111                 nat_delete(nat);
1112                 j++;
1113         }
1114         nat_stats.ns_inuse = 0;
1115         return j;
1116 }
1117
1118
1119 /*
1120  * nat_clearlist - delete all rules in the active NAT mapping list.
1121  * (this is for NAT/RDR rules)
1122  */
1123 int nat_clearlist()
1124 {
1125         ipnat_t *n, **np = &nat_list;
1126         int i = 0;
1127
1128         if (nat_rules != NULL)
1129                 bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz);
1130         if (rdr_rules != NULL)
1131                 bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz);
1132
1133         while ((n = *np)) {
1134                 *np = n->in_next;
1135                 if (!n->in_use) {
1136                         if (n->in_apr)
1137                                 appr_free(n->in_apr);
1138                         KFREE(n);
1139                         nat_stats.ns_rules--;
1140                 } else {
1141                         n->in_flags |= IPN_DELETE;
1142                         n->in_next = NULL;
1143                 }
1144                 i++;
1145         }
1146         nat_masks = 0;
1147         rdr_masks = 0;
1148         return i;
1149 }
1150
1151
1152 /*
1153  * Create a new NAT table entry.
1154  * NOTE: Assumes write lock on ipf_nat has been obtained already.
1155  *       If you intend on changing this, beware: appr_new() may call nat_new()
1156  *       recursively!
1157  */
1158 nat_t *nat_new(fin, ip, np, natsave, flags, direction)
1159 fr_info_t *fin;
1160 ip_t *ip;
1161 ipnat_t *np;
1162 nat_t **natsave;
1163 u_int flags;
1164 int direction;
1165 {
1166         u_32_t sum1, sum2, sumd, l;
1167         u_short port = 0, sport = 0, dport = 0, nport = 0;
1168         struct in_addr in, inb;
1169         u_short nflags, sp, dp;
1170         tcphdr_t *tcp = NULL;
1171         hostmap_t *hm = NULL;
1172         nat_t *nat, *natl;
1173 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
1174         qif_t *qf = fin->fin_qif;
1175 #endif
1176
1177         if (nat_stats.ns_inuse >= ipf_nattable_max) {
1178                 nat_stats.ns_memfail++;
1179                 return NULL;
1180         }
1181
1182         nflags = flags & np->in_flags;
1183         if (flags & IPN_TCPUDP) {
1184                 tcp = (tcphdr_t *)fin->fin_dp;
1185                 sport = htons(fin->fin_data[0]);
1186                 dport = htons(fin->fin_data[1]);
1187         }
1188
1189         /* Give me a new nat */
1190         KMALLOC(nat, nat_t *);
1191         if (nat == NULL) {
1192                 nat_stats.ns_memfail++;
1193                 /*
1194                  * Try to automatically tune the max # of entries in the
1195                  * table allowed to be less than what will cause kmem_alloc()
1196                  * to fail and try to eliminate panics due to out of memory
1197                  * conditions arising.
1198                  */
1199                 if (ipf_nattable_max > ipf_nattable_sz) {
1200                         ipf_nattable_max = nat_stats.ns_inuse - 100;
1201                         printf("ipf_nattable_max reduced to %d\n",
1202                                 ipf_nattable_max);
1203                 }
1204                 return NULL;
1205         }
1206
1207         bzero((char *)nat, sizeof(*nat));
1208         nat->nat_tcpstate[0] = TCPS_CLOSED;
1209         nat->nat_tcpstate[1] = TCPS_CLOSED;
1210         nat->nat_flags = flags;
1211         if (flags & FI_WILDP)
1212                 nat_stats.ns_wilds++;
1213         /*
1214          * Search the current table for a match.
1215          */
1216         if (direction == NAT_OUTBOUND) {
1217                 /*
1218                  * Values at which the search for a free resouce starts.
1219                  */
1220                 u_32_t st_ip;
1221                 u_short st_port;
1222
1223                 /*
1224                  * If it's an outbound packet which doesn't match any existing
1225                  * record, then create a new port
1226                  */
1227                 l = 0;
1228                 st_ip = np->in_nip;
1229                 st_port = np->in_pnext;
1230
1231                 do {
1232                         port = 0;
1233                         in.s_addr = htonl(np->in_nip);
1234                         if (l == 0) {
1235                                 /*
1236                                  * Check to see if there is an existing NAT
1237                                  * setup for this IP address pair.
1238                                  */
1239                                 hm = nat_hostmap(np, fin->fin_src, in);
1240                                 if (hm != NULL)
1241                                         in.s_addr = hm->hm_mapip.s_addr;
1242                         } else if ((l == 1) && (hm != NULL)) {
1243                                 nat_hostmapdel(hm);
1244                                 hm = NULL;
1245                         }
1246                         in.s_addr = ntohl(in.s_addr);
1247
1248                         nat->nat_hm = hm;
1249
1250                         if ((np->in_outmsk == 0xffffffff) &&
1251                             (np->in_pnext == 0)) {
1252                                 if (l > 0)
1253                                         goto badnat;
1254                         }
1255
1256                         if (np->in_redir & NAT_MAPBLK) {
1257                                 if ((l >= np->in_ppip) || ((l > 0) &&
1258                                      !(flags & IPN_TCPUDP)))
1259                                         goto badnat;
1260                                 /*
1261                                  * map-block - Calculate destination address.
1262                                  */
1263                                 in.s_addr = ntohl(fin->fin_saddr);
1264                                 in.s_addr &= ntohl(~np->in_inmsk);
1265                                 inb.s_addr = in.s_addr;
1266                                 in.s_addr /= np->in_ippip;
1267                                 in.s_addr &= ntohl(~np->in_outmsk);
1268                                 in.s_addr += ntohl(np->in_outip);
1269                                 /*
1270                                  * Calculate destination port.
1271                                  */
1272                                 if ((flags & IPN_TCPUDP) &&
1273                                     (np->in_ppip != 0)) {
1274                                         port = ntohs(sport) + l;
1275                                         port %= np->in_ppip;
1276                                         port += np->in_ppip *
1277                                                 (inb.s_addr % np->in_ippip);
1278                                         port += MAPBLK_MINPORT;
1279                                         port = htons(port);
1280                                 }
1281                         } else if (!np->in_outip &&
1282                                    (np->in_outmsk == 0xffffffff)) {
1283                                 /*
1284                                  * 0/32 - use the interface's IP address.
1285                                  */
1286                                 if ((l > 0) ||
1287                                     fr_ifpaddr(4, fin->fin_ifp, &in) == -1)
1288                                         goto badnat;
1289                                 in.s_addr = ntohl(in.s_addr);
1290                         } else if (!np->in_outip && !np->in_outmsk) {
1291                                 /*
1292                                  * 0/0 - use the original source address/port.
1293                                  */
1294                                 if (l > 0)
1295                                         goto badnat;
1296                                 in.s_addr = ntohl(fin->fin_saddr);
1297                         } else if ((np->in_outmsk != 0xffffffff) &&
1298                                    (np->in_pnext == 0) &&
1299                                    ((l > 0) || (hm == NULL)))
1300                                 np->in_nip++;
1301                         natl = NULL;
1302
1303                         if ((nflags & IPN_TCPUDP) &&
1304                             ((np->in_redir & NAT_MAPBLK) == 0) &&
1305                             (np->in_flags & IPN_AUTOPORTMAP)) {
1306                                 if ((l > 0) && (l % np->in_ppip == 0)) {
1307                                         if (l > np->in_space) {
1308                                                 goto badnat;
1309                                         } else if ((l > np->in_ppip) &&
1310                                                    np->in_outmsk != 0xffffffff)
1311                                                 np->in_nip++;
1312                                 }
1313                                 if (np->in_ppip != 0) {
1314                                         port = ntohs(sport);
1315                                         port += (l % np->in_ppip);
1316                                         port %= np->in_ppip;
1317                                         port += np->in_ppip *
1318                                                 (ntohl(fin->fin_saddr) %
1319                                                  np->in_ippip);
1320                                         port += MAPBLK_MINPORT;
1321                                         port = htons(port);
1322                                 }
1323                         } else if (((np->in_redir & NAT_MAPBLK) == 0) &&
1324                                    (nflags & IPN_TCPUDP) &&
1325                                    (np->in_pnext != 0)) {
1326                                 port = htons(np->in_pnext++);
1327                                 if (np->in_pnext > ntohs(np->in_pmax)) {
1328                                         np->in_pnext = ntohs(np->in_pmin);
1329                                         if (np->in_outmsk != 0xffffffff)
1330                                                 np->in_nip++;
1331                                 }
1332                         }
1333
1334                         if (np->in_flags & IPN_IPRANGE) {
1335                                 if (np->in_nip > ntohl(np->in_outmsk))
1336                                         np->in_nip = ntohl(np->in_outip);
1337                         } else {
1338                                 if ((np->in_outmsk != 0xffffffff) &&
1339                                     ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
1340                                     ntohl(np->in_outip))
1341                                         np->in_nip = ntohl(np->in_outip) + 1;
1342                         }
1343
1344                         if (!port && (flags & IPN_TCPUDP))
1345                                 port = sport;
1346
1347                         /*
1348                          * Here we do a lookup of the connection as seen from
1349                          * the outside.  If an IP# pair already exists, try
1350                          * again.  So if you have A->B becomes C->B, you can
1351                          * also have D->E become C->E but not D->B causing
1352                          * another C->B.  Also take protocol and ports into
1353                          * account when determining whether a pre-existing
1354                          * NAT setup will cause an external conflict where
1355                          * this is appropriate.
1356                          */
1357                         inb.s_addr = htonl(in.s_addr);
1358                         sp = fin->fin_data[0];
1359                         dp = fin->fin_data[1];
1360                         fin->fin_data[0] = fin->fin_data[1];
1361                         fin->fin_data[1] = htons(port);
1362                         natl = nat_inlookup(fin, flags & ~FI_WILDP,
1363                                             (u_int)fin->fin_p, fin->fin_dst,
1364                                             inb, 1);
1365                         fin->fin_data[0] = sp;
1366                         fin->fin_data[1] = dp;
1367
1368                         /*
1369                          * Has the search wrapped around and come back to the
1370                          * start ?
1371                          */
1372                         if ((natl != NULL) &&
1373                             (np->in_pnext != 0) && (st_port == np->in_pnext) &&
1374                             (np->in_nip != 0) && (st_ip == np->in_nip))
1375                                 goto badnat;
1376                         l++;
1377                 } while (natl != NULL);
1378
1379                 if (np->in_space > 0)
1380                         np->in_space--;
1381
1382                 /* Setup the NAT table */
1383                 nat->nat_inip = fin->fin_src;
1384                 nat->nat_outip.s_addr = htonl(in.s_addr);
1385                 nat->nat_oip = fin->fin_dst;
1386                 if (nat->nat_hm == NULL)
1387                         nat->nat_hm = nat_hostmap(np, fin->fin_src,
1388                                                   nat->nat_outip);
1389
1390                 sum1 = LONG_SUM(ntohl(fin->fin_saddr)) + ntohs(sport);
1391                 sum2 = LONG_SUM(in.s_addr) + ntohs(port);
1392
1393                 if (flags & IPN_TCPUDP) {
1394                         nat->nat_inport = sport;
1395                         nat->nat_outport = port;        /* sport */
1396                         nat->nat_oport = dport;
1397                 }
1398         } else {
1399                 /*
1400                  * Otherwise, it's an inbound packet. Most likely, we don't
1401                  * want to rewrite source ports and source addresses. Instead,
1402                  * we want to rewrite to a fixed internal address and fixed
1403                  * internal port.
1404                  */
1405                 if (np->in_flags & IPN_SPLIT) {
1406                         in.s_addr = np->in_nip;
1407                         if (np->in_inip == htonl(in.s_addr))
1408                                 np->in_nip = ntohl(np->in_inmsk);
1409                         else {
1410                                 np->in_nip = ntohl(np->in_inip);
1411                                 if (np->in_flags & IPN_ROUNDR) {
1412                                         nat_delrdr(np);
1413                                         nat_addrdr(np);
1414                                 }
1415                         }
1416                 } else {
1417                         in.s_addr = ntohl(np->in_inip);
1418                         if (np->in_flags & IPN_ROUNDR) {
1419                                 nat_delrdr(np);
1420                                 nat_addrdr(np);
1421                         }
1422                 }
1423                 if (!np->in_pnext)
1424                         nport = dport;
1425                 else {
1426                         /*
1427                          * Whilst not optimized for the case where
1428                          * pmin == pmax, the gain is not significant.
1429                          */
1430                         if (np->in_pmin != np->in_pmax) {
1431                                 nport = ntohs(dport) - ntohs(np->in_pmin) +
1432                                         ntohs(np->in_pnext);
1433                                 nport = ntohs(nport);
1434                         } else
1435                                 nport = np->in_pnext;
1436                 }
1437
1438                 /*
1439                  * When the redirect-to address is set to 0.0.0.0, just
1440                  * assume a blank `forwarding' of the packet.
1441                  */
1442                 if (in.s_addr == 0)
1443                         in.s_addr = ntohl(fin->fin_daddr);
1444
1445                 nat->nat_inip.s_addr = htonl(in.s_addr);
1446                 nat->nat_outip = fin->fin_dst;
1447                 nat->nat_oip = fin->fin_src;
1448
1449                 sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport);
1450                 sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
1451
1452                 if (flags & IPN_TCPUDP) {
1453                         nat->nat_inport = nport;
1454                         nat->nat_outport = dport;
1455                         nat->nat_oport = sport;
1456                 }
1457         }
1458
1459         CALC_SUMD(sum1, sum2, sumd);
1460         nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
1461 #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
1462         if ((flags & IPN_TCP) && dohwcksum &&
1463             (qf->qf_ill->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
1464                 if (direction == NAT_OUTBOUND)
1465                         sum1 = LONG_SUM(ntohl(in.s_addr));
1466                 else
1467                         sum1 = LONG_SUM(ntohl(fin->fin_saddr));
1468                 sum1 += LONG_SUM(ntohl(fin->fin_daddr));
1469                 sum1 += IPPROTO_TCP;
1470                 sum1 = (sum1 & 0xffff) + (sum1 >> 16);
1471                 nat->nat_sumd[1] = NAT_HW_CKSUM|(sum1 & 0xffff);
1472         } else
1473 #endif
1474                 nat->nat_sumd[1] = nat->nat_sumd[0];
1475
1476         if ((flags & IPN_TCPUDP) && ((sport != port) || (dport != nport))) {
1477                 if (direction == NAT_OUTBOUND)
1478                         sum1 = LONG_SUM(ntohl(fin->fin_saddr));
1479                 else
1480                         sum1 = LONG_SUM(ntohl(fin->fin_daddr));
1481
1482                 sum2 = LONG_SUM(in.s_addr);
1483
1484                 CALC_SUMD(sum1, sum2, sumd);
1485                 nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
1486         } else
1487                 nat->nat_ipsumd = nat->nat_sumd[0];
1488
1489         in.s_addr = htonl(in.s_addr);
1490
1491         strncpy(nat->nat_ifname, IFNAME(fin->fin_ifp), IFNAMSIZ);
1492
1493         nat->nat_me = natsave;
1494         nat->nat_dir = direction;
1495         nat->nat_ifp = fin->fin_ifp;
1496         nat->nat_ptr = np;
1497         nat->nat_p = fin->fin_p;
1498         nat->nat_bytes = 0;
1499         nat->nat_pkts = 0;
1500         nat->nat_mssclamp = np->in_mssclamp;
1501         nat->nat_fr = fin->fin_fr;
1502         if (nat->nat_fr != NULL) {
1503                 ATOMIC_INC32(nat->nat_fr->fr_ref);
1504         }
1505         if (direction == NAT_OUTBOUND) {
1506                 if (flags & IPN_TCPUDP)
1507                         tcp->th_sport = port;
1508         } else {
1509                 if (flags & IPN_TCPUDP)
1510                         tcp->th_dport = nport;
1511         }
1512
1513         nat_insert(nat);
1514
1515         if ((np->in_apr != NULL) && (np->in_dport == 0 ||
1516             (tcp != NULL && dport == np->in_dport)))
1517                 (void) appr_new(fin, ip, nat);
1518
1519         np->in_use++;
1520 #ifdef  IPFILTER_LOG
1521         nat_log(nat, (u_int)np->in_redir);
1522 #endif
1523         return nat;
1524 badnat:
1525         nat_stats.ns_badnat++;
1526         if ((hm = nat->nat_hm) != NULL)
1527                 nat_hostmapdel(hm);
1528         KFREE(nat);
1529         return NULL;
1530 }
1531
1532
1533 /*
1534  * Insert a NAT entry into the hash tables for searching and add it to the
1535  * list of active NAT entries.  Adjust global counters when complete.
1536  */
1537 void    nat_insert(nat)
1538 nat_t   *nat;
1539 {
1540         u_int hv1, hv2;
1541         nat_t **natp;
1542
1543         MUTEX_INIT(&nat->nat_lock, "nat entry lock", NULL);
1544
1545         nat->nat_age = fr_defnatage;
1546         nat->nat_ifname[sizeof(nat->nat_ifname) - 1] = '\0';
1547         if (nat->nat_ifname[0] !='\0') {
1548                 nat->nat_ifp = GETUNIT(nat->nat_ifname, 4);
1549         }
1550
1551         nat->nat_next = nat_instances;
1552         nat_instances = nat;
1553
1554         if (!(nat->nat_flags & (FI_W_SPORT|FI_W_DPORT))) {
1555                 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
1556                                   0xffffffff);
1557                 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
1558                                   ipf_nattable_sz);
1559                 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
1560                                   0xffffffff);
1561                 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
1562                                  ipf_nattable_sz);
1563         } else {
1564                 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, nat->nat_inip.s_addr,
1565                                   ipf_nattable_sz);
1566                 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, nat->nat_outip.s_addr,
1567                                   ipf_nattable_sz);
1568         }
1569
1570         natp = &nat_table[0][hv1];
1571         if (*natp)
1572                 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
1573         nat->nat_phnext[0] = natp;
1574         nat->nat_hnext[0] = *natp;
1575         *natp = nat;
1576
1577         natp = &nat_table[1][hv2];
1578         if (*natp)
1579                 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
1580         nat->nat_phnext[1] = natp;
1581         nat->nat_hnext[1] = *natp;
1582         *natp = nat;
1583
1584         nat_stats.ns_added++;
1585         nat_stats.ns_inuse++;
1586 }
1587
1588
1589 nat_t *nat_icmplookup(ip, fin, dir)
1590 ip_t *ip;
1591 fr_info_t *fin;
1592 int dir;
1593 {
1594         icmphdr_t *icmp;
1595         tcphdr_t *tcp = NULL;
1596         ip_t *oip;
1597         int flags = 0, type, minlen;
1598
1599         icmp = (icmphdr_t *)fin->fin_dp;
1600         /*
1601          * Does it at least have the return (basic) IP header ?
1602          * Only a basic IP header (no options) should be with an ICMP error
1603          * header.
1604          */
1605         if ((ip->ip_hl != 5) || (ip->ip_len < ICMPERR_MINPKTLEN))
1606                 return NULL;
1607         type = icmp->icmp_type;
1608         /*
1609          * If it's not an error type, then return.
1610          */
1611         if ((type != ICMP_UNREACH) && (type != ICMP_SOURCEQUENCH) &&
1612             (type != ICMP_REDIRECT) && (type != ICMP_TIMXCEED) &&
1613             (type != ICMP_PARAMPROB))
1614                 return NULL;
1615
1616         oip = (ip_t *)((char *)fin->fin_dp + 8);
1617         minlen = (oip->ip_hl << 2);
1618         if (minlen < sizeof(ip_t))
1619                 return NULL;
1620         if (ip->ip_len < ICMPERR_IPICMPHLEN + minlen)
1621                 return NULL;
1622         /*
1623          * Is the buffer big enough for all of it ?  It's the size of the IP
1624          * header claimed in the encapsulated part which is of concern.  It
1625          * may be too big to be in this buffer but not so big that it's
1626          * outside the ICMP packet, leading to TCP deref's causing problems.
1627          * This is possible because we don't know how big oip_hl is when we
1628          * do the pullup early in fr_check() and thus can't gaurantee it is
1629          * all here now.
1630          */
1631 #ifdef  _KERNEL
1632         {
1633         mb_t *m;
1634
1635 # if SOLARIS
1636         m = fin->fin_qfm;
1637         if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
1638                 return NULL;
1639 # else
1640         m = *(mb_t **)fin->fin_mp;
1641         if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
1642             (char *)ip + m->m_len)
1643                 return NULL;
1644 # endif
1645         }
1646 #endif
1647
1648         if (oip->ip_p == IPPROTO_TCP)
1649                 flags = IPN_TCP;
1650         else if (oip->ip_p == IPPROTO_UDP)
1651                 flags = IPN_UDP;
1652         if (flags & IPN_TCPUDP) {
1653                 u_short data[2];
1654                 nat_t *nat;
1655
1656                 minlen += 8;            /* + 64bits of data to get ports */
1657                 if (ip->ip_len < ICMPERR_IPICMPHLEN + minlen)
1658                         return NULL;
1659
1660                 data[0] = fin->fin_data[0];
1661                 data[1] = fin->fin_data[1];
1662                 tcp = (tcphdr_t *)((char *)oip + (oip->ip_hl << 2));
1663                 fin->fin_data[0] = ntohs(tcp->th_dport);
1664                 fin->fin_data[1] = ntohs(tcp->th_sport);
1665
1666                 if (dir == NAT_INBOUND) {
1667                         nat = nat_inlookup(fin, flags, (u_int)oip->ip_p,
1668                                             oip->ip_dst, oip->ip_src, 0);
1669                 } else {
1670                         nat = nat_outlookup(fin, flags, (u_int)oip->ip_p,
1671                                             oip->ip_dst, oip->ip_src, 0);
1672                 }
1673                 fin->fin_data[0] = data[0];
1674                 fin->fin_data[1] = data[1];
1675                 return nat;
1676         }
1677         if (dir == NAT_INBOUND)
1678                 return nat_inlookup(fin, 0, (u_int)oip->ip_p,
1679                                     oip->ip_dst, oip->ip_src, 0);
1680         else
1681                 return nat_outlookup(fin, 0, (u_int)oip->ip_p,
1682                                     oip->ip_dst, oip->ip_src, 0);
1683 }
1684
1685
1686 /*
1687  * This should *ONLY* be used for incoming packets to make sure a NAT'd ICMP
1688  * packet gets correctly recognised.
1689  */
1690 nat_t *nat_icmp(ip, fin, nflags, dir)
1691 ip_t *ip;
1692 fr_info_t *fin;
1693 u_int *nflags;
1694 int dir;
1695 {
1696         u_32_t sum1, sum2, sumd, sumd2 = 0;
1697         struct in_addr in;
1698         int flags, dlen;
1699         icmphdr_t *icmp;
1700         udphdr_t *udp;
1701         tcphdr_t *tcp;
1702         nat_t *nat;
1703         ip_t *oip;
1704
1705         if ((fin->fin_fl & FI_SHORT) || (fin->fin_off != 0))
1706                 return NULL;
1707         /*
1708          * nat_icmplookup() will return NULL for `defective' packets.
1709          */
1710         if ((ip->ip_v != 4) || !(nat = nat_icmplookup(ip, fin, dir)))
1711                 return NULL;
1712
1713         flags = 0;
1714         sumd2 = 0;
1715         *nflags = IPN_ICMPERR;
1716         icmp = (icmphdr_t *)fin->fin_dp;
1717         oip = (ip_t *)&icmp->icmp_ip;
1718         if (oip->ip_p == IPPROTO_TCP)
1719                 flags = IPN_TCP;
1720         else if (oip->ip_p == IPPROTO_UDP)
1721                 flags = IPN_UDP;
1722         udp = (udphdr_t *)((((char *)oip) + (oip->ip_hl << 2)));
1723         dlen = ip->ip_len - ((char *)udp - (char *)ip);
1724         /*
1725          * XXX - what if this is bogus hl and we go off the end ?
1726          * In this case, nat_icmplookup() will have returned NULL.
1727          */
1728         tcp = (tcphdr_t *)udp;
1729
1730         /*
1731          * Need to adjust ICMP header to include the real IP#'s and
1732          * port #'s.  Only apply a checksum change relative to the
1733          * IP address change as it will be modified again in ip_natout
1734          * for both address and port.  Two checksum changes are
1735          * necessary for the two header address changes.  Be careful
1736          * to only modify the checksum once for the port # and twice
1737          * for the IP#.
1738          */
1739
1740         /*
1741          * Step 1
1742          * Fix the IP addresses in the offending IP packet. You also need
1743          * to adjust the IP header checksum of that offending IP packet
1744          * and the ICMP checksum of the ICMP error message itself.
1745          *
1746          * Unfortunately, for UDP and TCP, the IP addresses are also contained
1747          * in the pseudo header that is used to compute the UDP resp. TCP
1748          * checksum. So, we must compensate that as well. Even worse, the
1749          * change in the UDP and TCP checksums require yet another
1750          * adjustment of the ICMP checksum of the ICMP error message.
1751          *
1752          */
1753
1754         if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) {
1755                 sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
1756                 in = nat->nat_inip;
1757                 oip->ip_src = in;
1758         } else {
1759                 sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
1760                 in = nat->nat_outip;
1761                 oip->ip_dst = in;
1762         }
1763
1764         sum2 = LONG_SUM(ntohl(in.s_addr));
1765
1766         CALC_SUMD(sum1, sum2, sumd);
1767
1768         /*
1769          * Fix IP checksum of the offending IP packet to adjust for
1770          * the change in the IP address.
1771          *
1772          * Normally, you would expect that the ICMP checksum of the 
1773          * ICMP error message needs to be adjusted as well for the
1774          * IP address change in oip.
1775          * However, this is a NOP, because the ICMP checksum is 
1776          * calculated over the complete ICMP packet, which includes the
1777          * changed oip IP addresses and oip->ip_sum. However, these 
1778          * two changes cancel each other out (if the delta for
1779          * the IP address is x, then the delta for ip_sum is minus x), 
1780          * so no change in the icmp_cksum is necessary.
1781          *
1782          * Be careful that nat_dir refers to the direction of the
1783          * offending IP packet (oip), not to its ICMP response (icmp)
1784          */
1785         fix_datacksum(&oip->ip_sum, sumd);
1786         /* Fix icmp cksum : IP Addr + Cksum */
1787
1788         /*
1789          * Fix UDP pseudo header checksum to compensate for the
1790          * IP address change.
1791          */
1792         if ((oip->ip_p == IPPROTO_UDP) && (dlen >= 8) && udp->uh_sum) {
1793                 /*
1794                  * The UDP checksum is optional, only adjust it 
1795                  * if it has been set.
1796                  */
1797                 sum1 = ntohs(udp->uh_sum);
1798                 fix_datacksum(&udp->uh_sum, sumd);
1799                 sum2 = ntohs(udp->uh_sum);
1800
1801                 /*
1802                  * Fix ICMP checksum to compensate the UDP 
1803                  * checksum adjustment.
1804                  */
1805                 sumd2 = sumd << 1;
1806                 CALC_SUMD(sum1, sum2, sumd);
1807                 sumd2 += sumd;
1808         }
1809
1810         /*
1811          * Fix TCP pseudo header checksum to compensate for the 
1812          * IP address change. Before we can do the change, we
1813          * must make sure that oip is sufficient large to hold
1814          * the TCP checksum (normally it does not!).
1815          */
1816         else if ((oip->ip_p == IPPROTO_TCP) && (dlen >= 18)) {
1817                 sum1 = ntohs(tcp->th_sum);
1818                 fix_datacksum(&tcp->th_sum, sumd);
1819                 sum2 = ntohs(tcp->th_sum);
1820
1821                 /*
1822                  * Fix ICMP checksum to compensate the TCP 
1823                  * checksum adjustment.
1824                  */
1825                 sumd2 = sumd << 1;
1826                 CALC_SUMD(sum1, sum2, sumd);
1827                 sumd2 += sumd;
1828         } else {
1829                 sumd2 = (sumd >> 16); 
1830                 if (nat->nat_dir == NAT_OUTBOUND)
1831                         sumd2 = ~sumd2;
1832                 else
1833                         sumd2 = ~sumd2 + 1;
1834         }
1835
1836         if (((flags & IPN_TCPUDP) != 0) && (dlen >= 4)) {
1837                 /*
1838                  * Step 2 :
1839                  * For offending TCP/UDP IP packets, translate the ports as
1840                  * well, based on the NAT specification. Of course such
1841                  * a change must be reflected in the ICMP checksum as well.
1842                  *
1843                  * Advance notice : Now it becomes complicated :-)
1844                  *
1845                  * Since the port fields are part of the TCP/UDP checksum
1846                  * of the offending IP packet, you need to adjust that checksum
1847                  * as well... but, if you change, you must change the icmp
1848                  * checksum *again*, to reflect that change.
1849                  *
1850                  * To further complicate: the TCP checksum is not in the first
1851                  * 8 bytes of the offending ip packet, so it most likely is not
1852                  * available. Some OSses like Solaris return enough bytes to
1853                  * include the TCP checksum. So we have to check if the
1854                  * ip->ip_len actually holds the TCP checksum of the oip!
1855                  */
1856                 if (nat->nat_oport == tcp->th_dport) {
1857                         if (tcp->th_sport != nat->nat_inport) {
1858                                 /*
1859                                  * Fix ICMP checksum to compensate port
1860                                  * adjustment.
1861                                  */
1862                                 sum1 = ntohs(nat->nat_inport);
1863                                 sum2 = ntohs(tcp->th_sport);
1864                                 tcp->th_sport = nat->nat_inport;
1865
1866                                 /*
1867                                  * Fix udp checksum to compensate port
1868                                  * adjustment.  NOTE : the offending IP packet
1869                                  * flows the other direction compared to the
1870                                  * ICMP message.
1871                                  *
1872                                  * The UDP checksum is optional, only adjust
1873                                  * it if it has been set.
1874                                  */
1875                                 if ((oip->ip_p == IPPROTO_UDP) &&
1876                                     (dlen >= 8) && udp->uh_sum) {
1877                                         sumd = sum1 - sum2;
1878                                         sumd2 += sumd;
1879
1880                                         sum1 = ntohs(udp->uh_sum);
1881                                         fix_datacksum(&udp->uh_sum, sumd);
1882                                         sum2 = ntohs(udp->uh_sum);
1883
1884                                         /*
1885                                          * Fix ICMP checksum to compensate
1886                                          * UDP checksum adjustment.
1887                                          */
1888                                         CALC_SUMD(sum1, sum2, sumd);
1889                                         sumd2 += sumd;
1890                                 }
1891
1892                                 /*
1893                                  * Fix tcp checksum (if present) to compensate
1894                                  * port adjustment. NOTE : the offending IP
1895                                  * packet flows the other direction compared to
1896                                  * the ICMP message.
1897                                  */
1898                                 if (oip->ip_p == IPPROTO_TCP) {
1899                                         if (dlen >= 18) {
1900                                                 sumd = sum1 - sum2;
1901                                                 sumd2 += sumd;
1902
1903                                                 sum1 = ntohs(tcp->th_sum);
1904                                                 fix_datacksum(&tcp->th_sum,
1905                                                               sumd);
1906                                                 sum2 = ntohs(tcp->th_sum);
1907
1908                                                 /*
1909                                                  * Fix ICMP checksum to 
1910                                                  * compensate TCP checksum 
1911                                                  * adjustment.
1912                                                  */
1913                                                 CALC_SUMD(sum1, sum2, sumd);
1914                                                 sumd2 += sumd;
1915                                         } else {
1916                                                 sumd = sum2 - sum1 + 1;
1917                                                 sumd2 += sumd;
1918                                         }
1919                                 }
1920                         }
1921                 } else if (tcp->th_dport != nat->nat_outport) {
1922                         /*
1923                          * Fix ICMP checksum to compensate port
1924                          * adjustment.
1925                          */
1926                         sum1 = ntohs(nat->nat_outport);
1927                         sum2 = ntohs(tcp->th_dport);
1928                         tcp->th_dport = nat->nat_outport;
1929
1930                         /*
1931                          * Fix udp checksum to compensate port
1932                          * adjustment.   NOTE : the offending IP
1933                          * packet flows the other direction compared
1934                          * to the ICMP message.
1935                          *
1936                          * The UDP checksum is optional, only adjust
1937                          * it if it has been set.
1938                          */
1939                         if ((oip->ip_p == IPPROTO_UDP) &&
1940                             (dlen >= 8) && udp->uh_sum) {
1941                                 sumd = sum1 - sum2;
1942                                 sumd2 += sumd;
1943
1944                                 sum1 = ntohs(udp->uh_sum);
1945                                 fix_datacksum(&udp->uh_sum, sumd);
1946                                 sum2 = ntohs(udp->uh_sum);
1947
1948                                 /*
1949                                  * Fix ICMP checksum to compensate
1950                                  * UDP checksum adjustment.
1951                                  */
1952                                 CALC_SUMD(sum1, sum2, sumd);
1953                         }
1954
1955                         /*
1956                          * Fix tcp checksum (if present) to compensate
1957                          * port adjustment. NOTE : the offending IP
1958                          * packet flows the other direction compared to
1959                          * the ICMP message.
1960                          */
1961                         if (oip->ip_p == IPPROTO_TCP) {
1962                                 if (dlen >= 18) {
1963                                         sumd = sum1 - sum2;
1964                                         sumd2 += sumd;
1965
1966                                         sum1 = ntohs(tcp->th_sum);
1967                                         fix_datacksum(&tcp->th_sum, sumd);
1968                                         sum2 = ntohs(tcp->th_sum);
1969
1970                                         /*
1971                                          * Fix ICMP checksum to compensate
1972                                          * UDP checksum adjustment.
1973                                          */
1974                                         CALC_SUMD(sum1, sum2, sumd);
1975                                 } else {
1976                                         sumd = sum2 - sum1;
1977                                         if (nat->nat_dir == NAT_OUTBOUND)
1978                                                 sumd++;
1979                                 }
1980                         }
1981                         sumd2 += sumd;
1982                 }
1983                 if (sumd2) {
1984                         sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
1985                         sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
1986                         fix_incksum(fin, &icmp->icmp_cksum, sumd2);
1987                 }
1988         }
1989         if (oip->ip_p == IPPROTO_ICMP)
1990                 nat->nat_age = fr_defnaticmpage;
1991         return nat;
1992 }
1993
1994
1995 /*
1996  * NB: these lookups don't lock access to the list, it assume it has already
1997  * been done!
1998  */
1999 /*
2000  * Lookup a nat entry based on the mapped destination ip address/port and
2001  * real source address/port.  We use this lookup when receiving a packet,
2002  * we're looking for a table entry, based on the destination address.
2003  * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.
2004  */
2005 nat_t *nat_inlookup(fin, flags, p, src, mapdst, rw)
2006 fr_info_t *fin;
2007 u_int flags, p;
2008 struct in_addr src , mapdst;
2009 int rw;
2010 {
2011         u_short sport, dport;
2012         nat_t *nat;
2013         int nflags;
2014         u_32_t dst;
2015         ipnat_t *ipn;
2016         void *ifp;
2017         u_int hv;
2018
2019         if (fin != NULL)
2020                 ifp = fin->fin_ifp;
2021         else
2022                 ifp = NULL;
2023         dst = mapdst.s_addr;
2024         if (flags & IPN_TCPUDP) {
2025                 sport = htons(fin->fin_data[0]);
2026                 dport = htons(fin->fin_data[1]);
2027         } else {
2028                 sport = 0;
2029                 dport = 0;
2030         }
2031
2032         hv = NAT_HASH_FN(dst, dport, 0xffffffff);
2033         hv = NAT_HASH_FN(src.s_addr, hv + sport, ipf_nattable_sz);
2034         nat = nat_table[1][hv];
2035         for (; nat; nat = nat->nat_hnext[1]) {
2036                 nflags = nat->nat_flags;
2037                 if ((!ifp || ifp == nat->nat_ifp) &&
2038                     nat->nat_oip.s_addr == src.s_addr &&
2039                     nat->nat_outip.s_addr == dst &&
2040                     ((p == 0) || (p == nat->nat_p))) {
2041                         switch (p)
2042                         {
2043                         case IPPROTO_TCP :
2044                         case IPPROTO_UDP :
2045                                 if (nat->nat_oport != sport)
2046                                         continue;
2047                                 if (nat->nat_outport != dport)
2048                                         continue;
2049                                 break;
2050                         default :
2051                                 break;
2052                         }
2053
2054                         ipn = nat->nat_ptr;
2055                         if ((ipn != NULL) && (nat->nat_aps != NULL))
2056                                 if (appr_match(fin, nat) != 0)
2057                                         continue;
2058                         return nat;
2059                 }
2060         }
2061         if (!nat_stats.ns_wilds || !(flags & FI_WILDP))
2062                 return NULL;
2063         if (!rw) {
2064                 RWLOCK_EXIT(&ipf_nat);
2065         }
2066         hv = NAT_HASH_FN(dst, 0, 0xffffffff);
2067         hv = NAT_HASH_FN(src.s_addr, dst, ipf_nattable_sz);
2068         if (!rw) {
2069                 WRITE_ENTER(&ipf_nat);
2070         }
2071         nat = nat_table[1][hv];
2072         for (; nat; nat = nat->nat_hnext[1]) {
2073                 nflags = nat->nat_flags;
2074                 if (ifp && ifp != nat->nat_ifp)
2075                         continue;
2076                 if (!(nflags & FI_WILDP))
2077                         continue;
2078                 if (nat->nat_oip.s_addr != src.s_addr ||
2079                     nat->nat_outip.s_addr != dst)
2080                         continue;
2081                 if (((nat->nat_oport == sport) || (nflags & FI_W_DPORT)) &&
2082                     ((nat->nat_outport == dport) || (nflags & FI_W_SPORT))) {
2083                         nat_tabmove(fin, nat);
2084                         break;
2085                 }
2086         }
2087         if (!rw) {
2088                 MUTEX_DOWNGRADE(&ipf_nat);
2089         }
2090         return nat;
2091 }
2092
2093
2094 /*
2095  * This function is only called for TCP/UDP NAT table entries where the
2096  * original was placed in the table without hashing on the ports and we now
2097  * want to include hashing on port numbers.
2098  */
2099 static void nat_tabmove(fin, nat)
2100 fr_info_t *fin;
2101 nat_t *nat;
2102 {
2103         u_short sport, dport;
2104         u_int hv, nflags;
2105         nat_t **natp;
2106
2107         nflags = nat->nat_flags;
2108
2109         sport = ntohs(fin->fin_data[0]);
2110         dport = ntohs(fin->fin_data[1]);
2111
2112         /*
2113          * Remove the NAT entry from the old location
2114          */
2115         if (nat->nat_hnext[0])
2116                 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
2117         *nat->nat_phnext[0] = nat->nat_hnext[0];
2118
2119         if (nat->nat_hnext[1])
2120                 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
2121         *nat->nat_phnext[1] = nat->nat_hnext[1];
2122
2123         /*
2124          * Add into the NAT table in the new position
2125          */
2126         hv = NAT_HASH_FN(nat->nat_inip.s_addr, sport, 0xffffffff);
2127         hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + dport, ipf_nattable_sz);
2128         natp = &nat_table[0][hv];
2129         if (*natp)
2130                 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2131         nat->nat_phnext[0] = natp;
2132         nat->nat_hnext[0] = *natp;
2133         *natp = nat;
2134
2135         hv = NAT_HASH_FN(nat->nat_outip.s_addr, sport, 0xffffffff);
2136         hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + dport, ipf_nattable_sz);
2137         natp = &nat_table[1][hv];
2138         if (*natp)
2139                 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2140         nat->nat_phnext[1] = natp;
2141         nat->nat_hnext[1] = *natp;
2142         *natp = nat;
2143 }
2144
2145
2146 /*
2147  * Lookup a nat entry based on the source 'real' ip address/port and
2148  * destination address/port.  We use this lookup when sending a packet out,
2149  * we're looking for a table entry, based on the source address.
2150  * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.
2151  */
2152 nat_t *nat_outlookup(fin, flags, p, src, dst, rw)
2153 fr_info_t *fin;
2154 u_int flags, p;
2155 struct in_addr src , dst;
2156 int rw;
2157 {
2158         u_short sport, dport;
2159         nat_t *nat;
2160         int nflags;
2161         ipnat_t *ipn;
2162         u_32_t srcip;
2163         void *ifp;
2164         u_int hv;
2165
2166         ifp = fin->fin_ifp;
2167         srcip = src.s_addr;
2168         if (flags & IPN_TCPUDP) {
2169                 sport = ntohs(fin->fin_data[0]);
2170                 dport = ntohs(fin->fin_data[1]);
2171         } else {
2172                 sport = 0;
2173                 dport = 0;
2174         }
2175
2176         hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
2177         hv = NAT_HASH_FN(dst.s_addr, hv + dport, ipf_nattable_sz);
2178         nat = nat_table[0][hv];
2179         for (; nat; nat = nat->nat_hnext[0]) {
2180                 nflags = nat->nat_flags;
2181
2182                 if ((!ifp || ifp == nat->nat_ifp) &&
2183                     nat->nat_inip.s_addr == srcip &&
2184                     nat->nat_oip.s_addr == dst.s_addr &&
2185                     ((p == 0) || (p == nat->nat_p))) {
2186                         switch (p)
2187                         {
2188                         case IPPROTO_TCP :
2189                         case IPPROTO_UDP :
2190                                 if (nat->nat_oport != dport)
2191                                         continue;
2192                                 if (nat->nat_inport != sport)
2193                                         continue;
2194                                 break;
2195                         default :
2196                                 break;
2197                         }
2198
2199                         ipn = nat->nat_ptr;
2200                         if ((ipn != NULL) && (nat->nat_aps != NULL))
2201                                 if (appr_match(fin, nat) != 0)
2202                                         continue;
2203                         return nat;
2204                 }
2205         }
2206         if (!nat_stats.ns_wilds || !(flags & FI_WILDP))
2207                 return NULL;
2208         if (!rw) {
2209                 RWLOCK_EXIT(&ipf_nat);
2210         }
2211
2212         hv = NAT_HASH_FN(dst.s_addr, srcip, ipf_nattable_sz);
2213         if (!rw) {
2214                 WRITE_ENTER(&ipf_nat);
2215         }
2216         nat = nat_table[0][hv];
2217         for (; nat; nat = nat->nat_hnext[0]) {
2218                 nflags = nat->nat_flags;
2219                 if (ifp && ifp != nat->nat_ifp)
2220                         continue;
2221                 if (!(nflags & FI_WILDP))
2222                         continue;
2223                 if ((nat->nat_inip.s_addr != srcip) ||
2224                     (nat->nat_oip.s_addr != dst.s_addr))
2225                         continue;
2226                 if (((nat->nat_inport == sport) || (nflags & FI_W_SPORT)) &&
2227                     ((nat->nat_oport == dport) || (nflags & FI_W_DPORT))) {
2228                         nat_tabmove(fin, nat);
2229                         break;
2230                 }
2231         }
2232         if (!rw) {
2233                 MUTEX_DOWNGRADE(&ipf_nat);
2234         }
2235         return nat;
2236 }
2237
2238
2239 /*
2240  * Lookup the NAT tables to search for a matching redirect
2241  */
2242 nat_t *nat_lookupredir(np)
2243 natlookup_t *np;
2244 {
2245         nat_t *nat;
2246         fr_info_t fi;
2247
2248         bzero((char *)&fi, sizeof(fi));
2249         fi.fin_data[0] = ntohs(np->nl_inport);
2250         fi.fin_data[1] = ntohs(np->nl_outport);
2251
2252         /*
2253          * If nl_inip is non null, this is a lookup based on the real
2254          * ip address. Else, we use the fake.
2255          */
2256         if ((nat = nat_outlookup(&fi, np->nl_flags, 0, np->nl_inip,
2257                                  np->nl_outip, 0))) {
2258                 np->nl_realip = nat->nat_outip;
2259                 np->nl_realport = nat->nat_outport;
2260         }
2261         return nat;
2262 }
2263
2264
2265 static int nat_match(fin, np, ip)
2266 fr_info_t *fin;
2267 ipnat_t *np;
2268 ip_t *ip;
2269 {
2270         frtuc_t *ft;
2271
2272         if (ip->ip_v != 4)
2273                 return 0;
2274
2275         if (np->in_p && fin->fin_p != np->in_p)
2276                 return 0;
2277         if (fin->fin_out) {
2278                 if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
2279                         return 0;
2280                 if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
2281                     ^ ((np->in_flags & IPN_NOTSRC) != 0))
2282                         return 0;
2283                 if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
2284                     ^ ((np->in_flags & IPN_NOTDST) != 0))
2285                         return 0;
2286         } else {
2287                 if (!(np->in_redir & NAT_REDIRECT))
2288                         return 0;
2289                 if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
2290                     ^ ((np->in_flags & IPN_NOTSRC) != 0))
2291                         return 0;
2292                 if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
2293                     ^ ((np->in_flags & IPN_NOTDST) != 0))
2294                         return 0;
2295         }
2296
2297         ft = &np->in_tuc;
2298         if (!(fin->fin_fl & FI_TCPUDP) ||
2299             (fin->fin_fl & FI_SHORT) || (fin->fin_off != 0)) {
2300                 if (ft->ftu_scmp || ft->ftu_dcmp)
2301                         return 0;
2302                 return 1;
2303         }
2304
2305         return fr_tcpudpchk(ft, fin);
2306 }
2307
2308
2309 /*
2310  * Packets going out on the external interface go through this.
2311  * Here, the source address requires alteration, if anything.
2312  */
2313 int ip_natout(ip, fin)
2314 ip_t *ip;
2315 fr_info_t *fin;
2316 {
2317         ipnat_t *np = NULL;
2318         u_32_t ipa;
2319         tcphdr_t *tcp = NULL;
2320         u_short sport = 0, dport = 0, *csump = NULL;
2321         int natadd = 1, i, icmpset = 1;
2322         u_int nflags = 0, hv, msk;
2323         struct ifnet *ifp;
2324         frentry_t *fr;
2325         void *sifp;
2326         u_32_t iph;
2327         nat_t *nat;
2328
2329         if (nat_list == NULL || (fr_nat_lock))
2330                 return 0;
2331
2332         if ((fr = fin->fin_fr) && !(fr->fr_flags & FR_DUP) &&
2333             fr->fr_tif.fd_ifp && fr->fr_tif.fd_ifp != (void *)-1) {
2334                 sifp = fin->fin_ifp;
2335                 fin->fin_ifp = fr->fr_tif.fd_ifp;
2336         } else
2337                 sifp = fin->fin_ifp;
2338         ifp = fin->fin_ifp;
2339
2340         if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2341                 if (fin->fin_p == IPPROTO_TCP)
2342                         nflags = IPN_TCP;
2343                 else if (fin->fin_p == IPPROTO_UDP)
2344                         nflags = IPN_UDP;
2345                 if ((nflags & IPN_TCPUDP)) {
2346                         tcp = (tcphdr_t *)fin->fin_dp;
2347                         sport = tcp->th_sport;
2348                         dport = tcp->th_dport;
2349                 }
2350         }
2351
2352         ipa = fin->fin_saddr;
2353
2354         READ_ENTER(&ipf_nat);
2355
2356         if ((fin->fin_p == IPPROTO_ICMP) &&
2357             (nat = nat_icmp(ip, fin, &nflags, NAT_OUTBOUND)))
2358                 icmpset = 1;
2359         else if ((fin->fin_fl & FI_FRAG) &&
2360             (nat = ipfr_nat_knownfrag(ip, fin)))
2361                 natadd = 0;
2362         else if ((nat = nat_outlookup(fin, nflags|FI_WILDP|FI_WILDA,
2363                                       (u_int)fin->fin_p, fin->fin_src,
2364                                       fin->fin_dst, 0))) {
2365                 nflags = nat->nat_flags;
2366                 if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) {
2367                         if ((nflags & FI_W_SPORT) &&
2368                             (nat->nat_inport != sport))
2369                                 nat->nat_inport = sport;
2370                         if ((nflags & FI_W_DPORT) &&
2371                             (nat->nat_oport != dport))
2372                                 nat->nat_oport = dport;
2373
2374                         if (nat->nat_outport == 0)
2375                                 nat->nat_outport = sport;
2376                         nat->nat_flags &= ~(FI_W_DPORT|FI_W_SPORT);
2377                         nflags = nat->nat_flags;
2378                         nat_stats.ns_wilds--;
2379                 }
2380         } else {
2381                 RWLOCK_EXIT(&ipf_nat);
2382
2383                 msk = 0xffffffff;
2384                 i = 32;
2385
2386                 WRITE_ENTER(&ipf_nat);
2387                 /*
2388                  * If there is no current entry in the nat table for this IP#,
2389                  * create one for it (if there is a matching rule).
2390                  */
2391 maskloop:
2392                 iph = ipa & htonl(msk);
2393                 hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz);
2394                 for (np = nat_rules[hv]; np; np = np->in_mnext)
2395                 {
2396                         if (np->in_ifp && (np->in_ifp != ifp))
2397                                 continue;
2398                         if ((np->in_flags & IPN_RF) &&
2399                             !(np->in_flags & nflags))
2400                                 continue;
2401                         if (np->in_flags & IPN_FILTER) {
2402                                 if (!nat_match(fin, np, ip))
2403                                         continue;
2404                         } else if ((ipa & np->in_inmsk) != np->in_inip)
2405                                 continue;
2406                         if (*np->in_plabel && !appr_ok(ip, tcp, np))
2407                                 continue;
2408                         nat = nat_new(fin, ip, np, NULL,
2409                                       (u_int)nflags, NAT_OUTBOUND);
2410                         if (nat != NULL) {
2411                                 np->in_hits++;
2412                                 break;
2413                         }
2414                 }
2415                 if ((np == NULL) && (i > 0)) {
2416                         do {
2417                                 i--;
2418                                 msk <<= 1;
2419                         } while ((i >= 0) && ((nat_masks & (1 << i)) == 0));
2420                         if (i >= 0)
2421                                 goto maskloop;
2422                 }
2423                 MUTEX_DOWNGRADE(&ipf_nat);
2424         }
2425
2426         /*
2427          * NOTE: ipf_nat must now only be held as a read lock
2428          */
2429         if (nat) {
2430                 np = nat->nat_ptr;
2431                 if (natadd && (fin->fin_fl & FI_FRAG) && np)
2432                         ipfr_nat_newfrag(ip, fin, nat);
2433                 MUTEX_ENTER(&nat->nat_lock);
2434                 if (fin->fin_p != IPPROTO_TCP) {
2435                         if (np && np->in_age[1])
2436                                 nat->nat_age = np->in_age[1];
2437                         else if (!icmpset && (fin->fin_p == IPPROTO_ICMP))
2438                                 nat->nat_age = fr_defnaticmpage;
2439                         else
2440                                 nat->nat_age = fr_defnatage;
2441                 }
2442                 nat->nat_bytes += ip->ip_len;
2443                 nat->nat_pkts++;
2444                 MUTEX_EXIT(&nat->nat_lock);
2445
2446                 /*
2447                  * Fix up checksums, not by recalculating them, but
2448                  * simply computing adjustments.
2449                  */
2450                 if (nflags == IPN_ICMPERR) {
2451                         u_32_t s1, s2, sumd;
2452
2453                         s1 = LONG_SUM(ntohl(fin->fin_saddr));
2454                         s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
2455                         CALC_SUMD(s1, s2, sumd);
2456                         fix_outcksum(fin, &ip->ip_sum, sumd);
2457                 }
2458 #if (SOLARIS || defined(__sgi)) || !defined(_KERNEL)
2459                 else {
2460                         if (nat->nat_dir == NAT_OUTBOUND)
2461                                 fix_outcksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2462                         else
2463                                 fix_incksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2464                 }
2465 #endif
2466                 /*
2467                  * Only change the packet contents, not what is filtered upon.
2468                  */
2469                 ip->ip_src = nat->nat_outip;
2470
2471                 if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2472
2473                         if ((nat->nat_outport != 0) && (tcp != NULL)) {
2474                                 tcp->th_sport = nat->nat_outport;
2475                                 fin->fin_data[0] = ntohs(tcp->th_sport);
2476                         }
2477
2478                         if (fin->fin_p == IPPROTO_TCP) {
2479                                 csump = &tcp->th_sum;
2480                                 MUTEX_ENTER(&nat->nat_lock);
2481                                 fr_tcp_age(&nat->nat_age,
2482                                            nat->nat_tcpstate, fin, 1, 0);
2483                                 if (nat->nat_age < fr_defnaticmpage)
2484                                         nat->nat_age = fr_defnaticmpage;
2485 #ifdef LARGE_NAT
2486                                 else if ((!np || !np->in_age[1]) &&
2487                                          (nat->nat_age > fr_defnatage))
2488                                         nat->nat_age = fr_defnatage;
2489 #endif
2490                                 /*
2491                                  * Increase this because we may have
2492                                  * "keep state" following this too and
2493                                  * packet storms can occur if this is
2494                                  * removed too quickly.
2495                                  */
2496                                 if (nat->nat_age == fr_tcpclosed)
2497                                         nat->nat_age = fr_tcplastack;
2498
2499                                 /*
2500                                  * Do a MSS CLAMPING on a SYN packet,
2501                                  * only deal IPv4 for now.
2502                                  */
2503                                 if (nat->nat_mssclamp &&
2504                                     (tcp->th_flags & TH_SYN) != 0)
2505                                         nat_mssclamp(tcp, nat->nat_mssclamp,
2506                                                      fin, csump);
2507
2508                                 MUTEX_EXIT(&nat->nat_lock);
2509                         } else if (fin->fin_p == IPPROTO_UDP) {
2510                                 udphdr_t *udp = (udphdr_t *)tcp;
2511
2512                                 if (udp->uh_sum)
2513                                         csump = &udp->uh_sum;
2514                         }
2515
2516                         if (csump) {
2517                                 if (nat->nat_dir == NAT_OUTBOUND)
2518                                         fix_outcksum(fin, csump,
2519                                                      nat->nat_sumd[1]);
2520                                 else
2521                                         fix_incksum(fin, csump,
2522                                                     nat->nat_sumd[1]);
2523                         }
2524                 }
2525
2526                 if (np && (np->in_apr != NULL) && (np->in_dport == 0 ||
2527                      (tcp != NULL && dport == np->in_dport))) {
2528                         i = appr_check(ip, fin, nat);
2529                         if (i == 0)
2530                                 i = 1;
2531                         else if (i == -1)
2532                                 nat->nat_drop[1]++;
2533                 } else
2534                         i = 1;
2535                 ATOMIC_INCL(nat_stats.ns_mapped[1]);
2536                 RWLOCK_EXIT(&ipf_nat);  /* READ */
2537                 fin->fin_ifp = sifp;
2538                 return i;
2539         }
2540         RWLOCK_EXIT(&ipf_nat);                  /* READ/WRITE */
2541         fin->fin_ifp = sifp;
2542         return 0;
2543 }
2544
2545
2546 /*
2547  * Packets coming in from the external interface go through this.
2548  * Here, the destination address requires alteration, if anything.
2549  */
2550 int ip_natin(ip, fin)
2551 ip_t *ip;
2552 fr_info_t *fin;
2553 {
2554         struct in_addr src;
2555         struct in_addr in;
2556         ipnat_t *np;
2557         u_short sport = 0, dport = 0, *csump = NULL;
2558         u_int nflags = 0, natadd = 1, hv, msk;
2559         struct ifnet *ifp = fin->fin_ifp;
2560         tcphdr_t *tcp = NULL;
2561         int i, icmpset = 0;
2562         nat_t *nat;
2563         u_32_t iph;
2564
2565         if ((nat_list == NULL) || (ip->ip_v != 4) || (fr_nat_lock))
2566                 return 0;
2567
2568         if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2569                 if (fin->fin_p == IPPROTO_TCP)
2570                         nflags = IPN_TCP;
2571                 else if (fin->fin_p == IPPROTO_UDP)
2572                         nflags = IPN_UDP;
2573                 if ((nflags & IPN_TCPUDP)) {
2574                         tcp = (tcphdr_t *)fin->fin_dp;
2575                         sport = tcp->th_sport;
2576                         dport = tcp->th_dport;
2577                 }
2578         }
2579
2580         in = fin->fin_dst;
2581         /* make sure the source address is to be redirected */
2582         src = fin->fin_src;
2583
2584         READ_ENTER(&ipf_nat);
2585
2586         if ((fin->fin_p == IPPROTO_ICMP) &&
2587             (nat = nat_icmp(ip, fin, &nflags, NAT_INBOUND)))
2588                 icmpset = 1;
2589         else if ((fin->fin_fl & FI_FRAG) &&
2590                  (nat = ipfr_nat_knownfrag(ip, fin)))
2591                 natadd = 0;
2592         else if ((nat = nat_inlookup(fin, nflags|FI_WILDP|FI_WILDA,
2593                                      (u_int)fin->fin_p, fin->fin_src, in, 0))) {
2594                 nflags = nat->nat_flags;
2595                 if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) {
2596                         if ((nat->nat_oport != sport) && (nflags & FI_W_DPORT))
2597                                 nat->nat_oport = sport;
2598                         if ((nat->nat_outport != dport) &&
2599                                  (nflags & FI_W_SPORT))
2600                                 nat->nat_outport = dport;
2601                         nat->nat_flags &= ~(FI_W_SPORT|FI_W_DPORT);
2602                         nflags = nat->nat_flags;
2603                         nat_stats.ns_wilds--;
2604                 }
2605         } else {
2606                 RWLOCK_EXIT(&ipf_nat);
2607
2608                 msk = 0xffffffff;
2609                 i = 32;
2610
2611                 WRITE_ENTER(&ipf_nat);
2612                 /*
2613                  * If there is no current entry in the nat table for this IP#,
2614                  * create one for it (if there is a matching rule).
2615                  */
2616 maskloop:
2617                 iph = in.s_addr & htonl(msk);
2618                 hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz);
2619                 for (np = rdr_rules[hv]; np; np = np->in_rnext) {
2620                         if ((np->in_ifp && (np->in_ifp != ifp)) ||
2621                             (np->in_p && (np->in_p != fin->fin_p)) ||
2622                             (np->in_flags && !(nflags & np->in_flags)))
2623                                 continue;
2624                         if (np->in_flags & IPN_FILTER) {
2625                                 if (!nat_match(fin, np, ip))
2626                                         continue;
2627                         } else if ((in.s_addr & np->in_outmsk) != np->in_outip)
2628                                 continue;
2629                         if ((!np->in_pmin || (np->in_flags & IPN_FILTER) ||
2630                              ((ntohs(np->in_pmax) >= ntohs(dport)) &&
2631                               (ntohs(dport) >= ntohs(np->in_pmin)))))
2632                                 if ((nat = nat_new(fin, ip, np, NULL, nflags,
2633                                                     NAT_INBOUND))) {
2634                                         np->in_hits++;
2635                                         break;
2636                                 }
2637                 }
2638
2639                 if ((np == NULL) && (i > 0)) {
2640                         do {
2641                                 i--;
2642                                 msk <<= 1;
2643                         } while ((i >= 0) && ((rdr_masks & (1 << i)) == 0));
2644                         if (i >= 0)
2645                                 goto maskloop;
2646                 }
2647                 MUTEX_DOWNGRADE(&ipf_nat);
2648         }
2649
2650         /*
2651          * NOTE: ipf_nat must now only be held as a read lock
2652          */
2653         if (nat) {
2654                 np = nat->nat_ptr;
2655                 fin->fin_fr = nat->nat_fr;
2656                 if (natadd && (fin->fin_fl & FI_FRAG) && np)
2657                         ipfr_nat_newfrag(ip, fin, nat);
2658                 if (np && (np->in_apr != NULL) && (np->in_dport == 0 ||
2659                      (tcp != NULL && sport == np->in_dport))) {
2660                         i = appr_check(ip, fin, nat);
2661                         if (i == -1) {
2662                                 nat->nat_drop[0]++;
2663                                 RWLOCK_EXIT(&ipf_nat);
2664                                 return i;
2665                         }
2666                 }
2667
2668                 MUTEX_ENTER(&nat->nat_lock);
2669                 if (fin->fin_p != IPPROTO_TCP) {
2670                         if (np && np->in_age[0])
2671                                 nat->nat_age = np->in_age[0];
2672                         else if (!icmpset && (fin->fin_p == IPPROTO_ICMP))
2673                                 nat->nat_age = fr_defnaticmpage;
2674                         else
2675                                 nat->nat_age = fr_defnatage;
2676                 }
2677                 nat->nat_bytes += ip->ip_len;
2678                 nat->nat_pkts++;
2679                 MUTEX_EXIT(&nat->nat_lock);
2680
2681                 /*
2682                  * Fix up checksums, not by recalculating them, but
2683                  * simply computing adjustments.
2684                  */
2685                 if (nat->nat_dir == NAT_OUTBOUND)
2686                         fix_incksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2687                 else
2688                         fix_outcksum(fin, &ip->ip_sum, nat->nat_ipsumd);
2689
2690                 ip->ip_dst = nat->nat_inip;
2691                 fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
2692
2693                 if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) {
2694
2695                         if ((nat->nat_inport != 0) && (tcp != NULL)) {
2696                                 tcp->th_dport = nat->nat_inport;
2697                                 fin->fin_data[1] = ntohs(tcp->th_dport);
2698                         }
2699
2700                         if (fin->fin_p == IPPROTO_TCP) {
2701                                 csump = &tcp->th_sum;
2702                                 MUTEX_ENTER(&nat->nat_lock);
2703                                 fr_tcp_age(&nat->nat_age,
2704                                            nat->nat_tcpstate, fin, 0, 0);
2705                                 if (nat->nat_age < fr_defnaticmpage)
2706                                         nat->nat_age = fr_defnaticmpage;
2707 #ifdef LARGE_NAT
2708                                 else if ((!np || !np->in_age[0]) &&
2709                                          (nat->nat_age > fr_defnatage))
2710                                         nat->nat_age = fr_defnatage;
2711 #endif
2712                                 /*
2713                                  * Increase this because we may have
2714                                  * "keep state" following this too and
2715                                  * packet storms can occur if this is
2716                                  * removed too quickly.
2717                                  */
2718                                 if (nat->nat_age == fr_tcpclosed)
2719                                         nat->nat_age = fr_tcplastack;
2720                                 /*
2721                                  * Do a MSS CLAMPING on a SYN packet,
2722                                  * only deal IPv4 for now.
2723                                  */
2724                                 if (nat->nat_mssclamp &&
2725                                     (tcp->th_flags & TH_SYN) != 0)
2726                                         nat_mssclamp(tcp, nat->nat_mssclamp,
2727                                                      fin, csump);
2728
2729                                 MUTEX_EXIT(&nat->nat_lock);
2730                         } else if (fin->fin_p == IPPROTO_UDP) {
2731                                 udphdr_t *udp = (udphdr_t *)tcp;
2732
2733                                 if (udp->uh_sum)
2734                                         csump = &udp->uh_sum;
2735                         }
2736
2737                         if (csump) {
2738                                 if (nat->nat_dir == NAT_OUTBOUND)
2739                                         fix_incksum(fin, csump,
2740                                                     nat->nat_sumd[0]);
2741                                 else
2742                                         fix_outcksum(fin, csump,
2743                                                     nat->nat_sumd[0]);
2744                         }
2745                 }
2746                 ATOMIC_INCL(nat_stats.ns_mapped[0]);
2747                 RWLOCK_EXIT(&ipf_nat);                  /* READ */
2748                 return 1;
2749         }
2750         RWLOCK_EXIT(&ipf_nat);                  /* READ/WRITE */
2751         return 0;
2752 }
2753
2754
2755 /*
2756  * Free all memory used by NAT structures allocated at runtime.
2757  */
2758 void ip_natunload()
2759 {
2760         WRITE_ENTER(&ipf_nat);
2761         (void) nat_clearlist();
2762         (void) nat_flushtable();
2763         RWLOCK_EXIT(&ipf_nat);
2764
2765         if (nat_table[0] != NULL) {
2766                 KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz);
2767                 nat_table[0] = NULL;
2768         }
2769         if (nat_table[1] != NULL) {
2770                 KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz);
2771                 nat_table[1] = NULL;
2772         }
2773         if (nat_rules != NULL) {
2774                 KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz);
2775                 nat_rules = NULL;
2776         }
2777         if (rdr_rules != NULL) {
2778                 KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz);
2779                 rdr_rules = NULL;
2780         }
2781         if (maptable != NULL) {
2782                 KFREES(maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
2783                 maptable = NULL;
2784         }
2785 }
2786
2787
2788 /*
2789  * Slowly expire held state for NAT entries.  Timeouts are set in
2790  * expectation of this being called twice per second.
2791  */
2792 void ip_natexpire()
2793 {
2794         struct nat *nat, **natp;
2795 #if defined(_KERNEL) && !SOLARIS && !defined(__DragonFly__)
2796         int s;
2797 #endif
2798
2799         SPL_NET(s);
2800         WRITE_ENTER(&ipf_nat);
2801         for (natp = &nat_instances; (nat = *natp); ) {
2802                 nat->nat_age--;
2803                 if (nat->nat_age) {
2804                         natp = &nat->nat_next;
2805                         continue;
2806                 }
2807                 *natp = nat->nat_next;
2808 #ifdef  IPFILTER_LOG
2809                 nat_log(nat, NL_EXPIRE);
2810 #endif
2811                 nat_delete(nat);
2812                 nat_stats.ns_expire++;
2813         }
2814         RWLOCK_EXIT(&ipf_nat);
2815         SPL_X(s);
2816 }
2817
2818
2819 /*
2820  */
2821 void ip_natsync(ifp)
2822 void *ifp;
2823 {
2824         ipnat_t *n;
2825         nat_t *nat;
2826         u_32_t sum1, sum2, sumd;
2827         struct in_addr in;
2828         ipnat_t *np;
2829         void *ifp2;
2830 #if defined(_KERNEL) && !SOLARIS && !defined(__DragonFly__)
2831         int s;
2832 #endif
2833
2834         /*
2835          * Change IP addresses for NAT sessions for any protocol except TCP
2836          * since it will break the TCP connection anyway.
2837          */
2838         SPL_NET(s);
2839         WRITE_ENTER(&ipf_nat);
2840         for (nat = nat_instances; nat; nat = nat->nat_next)
2841                 if (((ifp == NULL) || (ifp == nat->nat_ifp)) &&
2842                     !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr) &&
2843                     (np->in_outmsk == 0xffffffff) && !np->in_nip) {
2844                         ifp2 = nat->nat_ifp;
2845                         /*
2846                          * Change the map-to address to be the same as the
2847                          * new one.
2848                          */
2849                         sum1 = nat->nat_outip.s_addr;
2850                         if (fr_ifpaddr(4, ifp2, &in) != -1)
2851                                 nat->nat_outip = in;
2852                         sum2 = nat->nat_outip.s_addr;
2853
2854                         if (sum1 == sum2)
2855                                 continue;
2856                         /*
2857                          * Readjust the checksum adjustment to take into
2858                          * account the new IP#.
2859                          */
2860                         CALC_SUMD(sum1, sum2, sumd);
2861                         /* XXX - dont change for TCP when solaris does
2862                          * hardware checksumming.
2863                          */
2864                         sumd += nat->nat_sumd[0];
2865                         nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
2866                         nat->nat_sumd[1] = nat->nat_sumd[0];
2867                 }
2868
2869         for (n = nat_list; (n != NULL); n = n->in_next)
2870                 if (n->in_ifp == ifp) {
2871                         n->in_ifp = (void *)GETUNIT(n->in_ifname, 4);
2872                         if (!n->in_ifp)
2873                                 n->in_ifp = (void *)-1;
2874                 }
2875         RWLOCK_EXIT(&ipf_nat);
2876         SPL_X(s);
2877 }
2878
2879
2880 #ifdef  IPFILTER_LOG
2881 void nat_log(nat, type)
2882 struct nat *nat;
2883 u_int type;
2884 {
2885 # ifndef LARGE_NAT
2886         struct ipnat *np;
2887         int rulen;
2888 # endif
2889         struct natlog natl;
2890         void *items[1];
2891         size_t sizes[1];
2892         int types[1];
2893
2894         natl.nl_inip = nat->nat_inip;
2895         natl.nl_outip = nat->nat_outip;
2896         natl.nl_origip = nat->nat_oip;
2897         natl.nl_bytes = nat->nat_bytes;
2898         natl.nl_pkts = nat->nat_pkts;
2899         natl.nl_origport = nat->nat_oport;
2900         natl.nl_inport = nat->nat_inport;
2901         natl.nl_outport = nat->nat_outport;
2902         natl.nl_p = nat->nat_p;
2903         natl.nl_type = type;
2904         natl.nl_rule = -1;
2905 #ifndef LARGE_NAT
2906         if (nat->nat_ptr != NULL) {
2907                 for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++)
2908                         if (np == nat->nat_ptr) {
2909                                 natl.nl_rule = rulen;
2910                                 break;
2911                         }
2912         }
2913 #endif
2914         items[0] = &natl;
2915         sizes[0] = sizeof(natl);
2916         types[0] = 0;
2917
2918         (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1);
2919 }
2920 #endif
2921
2922
2923 #if defined(__OpenBSD__)
2924 void nat_ifdetach(ifp)
2925 void *ifp;
2926 {
2927         frsync();
2928         return;
2929 }
2930 #endif
2931
2932
2933 /*
2934  * Check for MSS option and clamp it if necessary.
2935  */
2936 static void nat_mssclamp(tcp, maxmss, fin, csump)
2937 tcphdr_t *tcp;
2938 u_32_t maxmss;
2939 fr_info_t *fin;
2940 u_short *csump;
2941 {
2942         u_char *cp, *ep, opt;
2943         int hlen, advance;
2944         u_32_t mss, sumd;
2945         u_short v;
2946
2947         hlen = tcp->th_off << 2;
2948         if (hlen > sizeof(*tcp)) {
2949                 cp = (u_char *)tcp + sizeof(*tcp);
2950                 ep = (u_char *)tcp + hlen;
2951
2952                 while (cp < ep) {
2953                         opt = cp[0];
2954                         if (opt == TCPOPT_EOL)
2955                                 break;
2956                         else if (opt == TCPOPT_NOP) {
2957                                 cp++;
2958                                 continue;
2959                         }
2960  
2961                         if (&cp[1] >= ep)
2962                                 break;
2963                         advance = cp[1];
2964                         if (&cp[advance] > ep)
2965                                 break;
2966                         switch (opt) {
2967                         case TCPOPT_MAXSEG:
2968                                 if (advance != 4)
2969                                         break;
2970                                 bcopy(&cp[2], &v, sizeof(v));
2971                                 mss = ntohs(v);
2972                                 if (mss > maxmss) {
2973                                         v = htons(maxmss);
2974                                         bcopy(&v, &cp[2], sizeof(v));
2975                                         CALC_SUMD(mss, maxmss, sumd);
2976                                         fix_outcksum(fin, csump, sumd);
2977                                 }
2978                                 break;
2979                         default:
2980                                 /* ignore unknown options */
2981                                 break;
2982                         }
2983                     
2984                         cp += advance;  
2985                 }       
2986         }       
2987 }