2 * Copyright (c) 2014 - 2018 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Bill Yuan <bycn82@dragonflybsd.org>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 #error IPFIREWALL3 requires INET.
41 #include <sys/param.h>
42 #include <sys/kernel.h>
43 #include <sys/malloc.h>
45 #include <sys/socketvar.h>
46 #include <sys/sysctl.h>
47 #include <sys/systimer.h>
48 #include <sys/in_cksum.h>
49 #include <sys/systm.h>
51 #include <sys/socket.h>
52 #include <sys/syslog.h>
53 #include <sys/ucred.h>
55 #include <sys/mplock2.h>
57 #include <net/ethernet.h>
58 #include <net/netmsg2.h>
59 #include <net/netisr2.h>
60 #include <net/route.h>
63 #include <netinet/in.h>
64 #include <netinet/ip.h>
65 #include <netinet/ip_icmp.h>
66 #include <netinet/tcp.h>
67 #include <netinet/tcp_timer.h>
68 #include <netinet/tcp_var.h>
69 #include <netinet/tcpip.h>
70 #include <netinet/udp.h>
71 #include <netinet/udp_var.h>
72 #include <netinet/in_systm.h>
73 #include <netinet/in_var.h>
74 #include <netinet/in_pcb.h>
75 #include <netinet/ip_var.h>
76 #include <netinet/ip_divert.h>
77 #include <net/ipfw3/ip_fw.h>
79 #include "ip_fw3_nat.h"
81 MALLOC_DEFINE(M_IPFW3_NAT, "IP_FW3_NAT", "ipfw3_nat module");
84 * Highspeed Lockless Kernel NAT
87 * The network address translation (NAT) will replace the `src` of the packet
88 * with an `alias` (alias_addr & alias_port). Accordingt to the configuration,
89 * The alias will be randomly picked from the configured range.
92 * The first outgoing packet should trigger the creation of the `net_state`,
93 * and the `net_state` will keep in a RB-Tree for the subsequent outgoing
95 * The first returning packet will trigger the creation of the `net_state2`,
96 * which will be stored in a multidimensional array of points ( of net_state2 ).
99 * The `net_state` for outgoing packet will be stored in the nat_context of
100 * current CPU. But due to the nature of the NAT, the returning packet may be
101 * handled by another CPU. Hence, The `net_state2` for the returning packet
102 * will be prepared and stored into the nat_context of the right CPU.
105 struct ip_fw3_nat_context *ip_fw3_nat_ctx[MAXCPU];
106 static struct callout ip_fw3_nat_cleanup_callout;
107 extern struct ipfw3_context *fw3_ctx[MAXCPU];
108 extern ip_fw_ctl_t *ip_fw3_ctl_nat_ptr;
110 static int sysctl_var_cleanup_interval = 1;
111 static int sysctl_var_icmp_timeout = 10;
112 static int sysctl_var_tcp_timeout = 60;
113 static int sysctl_var_udp_timeout = 30;
115 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw3_nat, CTLFLAG_RW, 0, "ipfw3 NAT");
116 SYSCTL_INT(_net_inet_ip_fw3_nat, OID_AUTO, cleanup_interval, CTLFLAG_RW,
117 &sysctl_var_cleanup_interval, 0, "default life time");
118 SYSCTL_INT(_net_inet_ip_fw3_nat, OID_AUTO, icmp_timeout, CTLFLAG_RW,
119 &sysctl_var_icmp_timeout, 0, "default icmp state life time");
120 SYSCTL_INT(_net_inet_ip_fw3_nat, OID_AUTO, tcp_timeout, CTLFLAG_RW,
121 &sysctl_var_tcp_timeout, 0, "default tcp state life time");
122 SYSCTL_INT(_net_inet_ip_fw3_nat, OID_AUTO, udp_timeout, CTLFLAG_RW,
123 &sysctl_var_udp_timeout, 0, "default udp state life time");
125 RB_PROTOTYPE(state_tree, nat_state, entries, ip_fw3_nat_state_cmp);
126 RB_GENERATE(state_tree, nat_state, entries, ip_fw3_nat_state_cmp);
128 static __inline uint16_t
129 fix_cksum(uint16_t cksum, uint16_t old_info, uint16_t new_info, uint8_t is_udp)
133 if (is_udp && !cksum)
135 tmp = cksum + old_info - new_info;
136 tmp = (tmp >> 16) + (tmp & 65535);
144 check_nat(int *cmd_ctl, int *cmd_val, struct ip_fw_args **args,
145 struct ip_fw **f, ipfw_insn *cmd, uint16_t ip_len)
147 if ((*args)->eh != NULL) {
148 *cmd_ctl = IP_FW_CTL_NO;
149 *cmd_val = IP_FW_NOT_MATCH;
153 struct ip_fw3_nat_context *nat_ctx;
157 nat_ctx = ip_fw3_nat_ctx[mycpuid];
159 nat = ((ipfw_insn_nat *)cmd)->nat;
162 nat = nat_ctx->nats[nat_id - 1];
164 *cmd_val = IP_FW_DENY;
165 *cmd_ctl = IP_FW_CTL_DONE;
168 ((ipfw_insn_nat *)cmd)->nat = nat;
170 *cmd_val = ip_fw3_nat(*args, nat, (*args)->m);
171 *cmd_ctl = IP_FW_CTL_NAT;
175 ip_fw3_nat(struct ip_fw_args *args, struct cfg_nat *nat, struct mbuf *m)
177 struct state_tree *tree_out = NULL;
178 struct nat_state *s = NULL, *dup, *k, key;
179 struct nat_state2 *s2 = NULL;
180 struct ip *ip = mtod(m, struct ip *);
181 struct in_addr *old_addr = NULL, new_addr;
182 uint16_t *old_port = NULL, new_port;
183 uint16_t *csum = NULL, dlen = 0;
185 boolean_t pseudo = FALSE, need_return_state = FALSE;
186 struct cfg_alias *alias;
187 int i = 0, rand_n = 0;
190 memset(k, 0, LEN_NAT_STATE);
191 if (args->oif == NULL) {
192 old_addr = &ip->ip_dst;
193 k->dst_addr = ntohl(args->f_id.dst_ip);
194 LIST_FOREACH(alias, &nat->alias, next) {
195 if (alias->ip.s_addr == ntohl(args->f_id.dst_ip)) {
204 old_port = &L3HDR(struct tcphdr, ip)->th_dport;
205 s2 = alias->tcp_in[*old_port - ALIAS_BEGIN];
206 csum = &L3HDR(struct tcphdr, ip)->th_sum;
209 old_port = &L3HDR(struct udphdr, ip)->uh_dport;
210 s2 = alias->udp_in[*old_port - ALIAS_BEGIN];
211 csum = &L3HDR(struct udphdr, ip)->uh_sum;
215 old_port = &L3HDR(struct icmp, ip)->icmp_id;
216 s2 = alias->icmp_in[*old_port];
217 csum = &L3HDR(struct icmp, ip)->icmp_cksum;
220 panic("ipfw3: unsupported proto %u", ip->ip_p);
226 old_addr = &ip->ip_src;
227 k->src_addr = args->f_id.src_ip;
228 k->dst_addr = args->f_id.dst_ip;
231 k->src_port = args->f_id.src_port;
232 k->dst_port = args->f_id.dst_port;
233 m->m_pkthdr.csum_flags = CSUM_TCP;
234 tree_out = &nat->rb_tcp_out;
235 old_port = &L3HDR(struct tcphdr, ip)->th_sport;
236 csum = &L3HDR(struct tcphdr, ip)->th_sum;
239 k->src_port = args->f_id.src_port;
240 k->dst_port = args->f_id.dst_port;
241 m->m_pkthdr.csum_flags = CSUM_UDP;
242 tree_out = &nat->rb_udp_out;
243 old_port = &L3HDR(struct udphdr, ip)->uh_sport;
244 csum = &L3HDR(struct udphdr, ip)->uh_sum;
248 k->src_port = L3HDR(struct icmp, ip)->icmp_id;
249 k->dst_port = k->src_port;
250 tree_out = &nat->rb_icmp_out;
251 old_port = &L3HDR(struct icmp, ip)->icmp_id;
252 csum = &L3HDR(struct icmp, ip)->icmp_cksum;
255 panic("ipfw3: unsupported proto %u", ip->ip_p);
257 s = RB_FIND(state_tree, tree_out, k);
259 /* pick an alias ip randomly when there are multiple */
260 if (nat->count > 1) {
261 rand_n = krandom() % nat->count;
263 LIST_FOREACH(alias, &nat->alias, next) {
270 m->m_pkthdr.csum_flags = CSUM_TCP;
271 s = kmalloc(LEN_NAT_STATE, M_IPFW3_NAT,
272 M_INTWAIT | M_NULLOK | M_ZERO);
274 s->src_addr = args->f_id.src_ip;
275 s->src_port = args->f_id.src_port;
277 s->dst_addr = args->f_id.dst_ip;
278 s->dst_port = args->f_id.dst_port;
280 s->alias_addr = alias->ip.s_addr;
281 pick_alias_port(s, tree_out);
282 dup = RB_INSERT(state_tree, tree_out, s);
283 need_return_state = TRUE;
286 m->m_pkthdr.csum_flags = CSUM_UDP;
287 s = kmalloc(LEN_NAT_STATE, M_IPFW3_NAT,
288 M_INTWAIT | M_NULLOK | M_ZERO);
290 s->src_addr = args->f_id.src_ip;
291 s->src_port = args->f_id.src_port;
293 s->dst_addr = args->f_id.dst_ip;
294 s->dst_port = args->f_id.dst_port;
296 s->alias_addr = alias->ip.s_addr;
297 pick_alias_port(s, tree_out);
298 dup = RB_INSERT(state_tree, tree_out, s);
299 need_return_state = TRUE;
302 s = kmalloc(LEN_NAT_STATE, M_IPFW3_NAT,
303 M_INTWAIT | M_NULLOK | M_ZERO);
304 s->src_addr = args->f_id.src_ip;
305 s->dst_addr = args->f_id.dst_ip;
307 s->src_port = *old_port;
308 s->dst_port = *old_port;
310 s->alias_addr = alias->ip.s_addr;
311 s->alias_port = htons(s->src_addr % ALIAS_RANGE);
312 dup = RB_INSERT(state_tree, tree_out, s);
314 s2 = kmalloc(LEN_NAT_STATE2, M_IPFW3_NAT,
315 M_INTWAIT | M_NULLOK | M_ZERO);
317 s2->src_addr = args->f_id.dst_ip;
318 s2->dst_addr = alias->ip.s_addr;
320 s2->src_port = s->alias_port;
321 s2->dst_port = s->alias_port;
323 s2->alias_addr = htonl(args->f_id.src_ip);
324 s2->alias_port = *old_port;
326 alias->icmp_in[s->alias_port] = s2;
333 if (args->oif == NULL) {
334 new_addr.s_addr = s2->src_addr;
335 new_port = s2->src_port;
336 s2->timestamp = time_uptime;
338 new_addr.s_addr = s->alias_addr;
339 new_port = s->alias_port;
340 s->timestamp = time_uptime;
343 /* replace src/dst and fix the checksum */
344 if (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_TCP | CSUM_TSO)) {
345 if ((m->m_pkthdr.csum_flags & CSUM_TSO) == 0) {
346 dlen = ip->ip_len - (ip->ip_hl << 2);
351 const uint16_t *oaddr, *naddr;
352 oaddr = (const uint16_t *)&old_addr->s_addr;
353 naddr = (const uint16_t *)&new_addr.s_addr;
354 ip->ip_sum = fix_cksum(ip->ip_sum, oaddr[0], naddr[0], 0);
355 ip->ip_sum = fix_cksum(ip->ip_sum, oaddr[1], naddr[1], 0);
356 if (ip->ip_p != IPPROTO_ICMP) {
357 *csum = fix_cksum(*csum, oaddr[0], naddr[0], udp);
358 *csum = fix_cksum(*csum, oaddr[1], naddr[1], udp);
361 old_addr->s_addr = new_addr.s_addr;
363 *csum = fix_cksum(*csum, *old_port, new_port, udp);
365 *old_port = new_port;
368 *csum = in_pseudo(ip->ip_src.s_addr,
369 ip->ip_dst.s_addr, htons(dlen + ip->ip_p));
372 /* prepare the state for return traffic */
373 if (need_return_state) {
374 ip->ip_len = htons(ip->ip_len);
375 ip->ip_off = htons(ip->ip_off);
377 m->m_flags &= ~M_HASH;
380 ip->ip_len = ntohs(ip->ip_len);
381 ip->ip_off = ntohs(ip->ip_off);
383 int nextcpu = netisr_hashcpu(m->m_pkthdr.hash);
384 if (nextcpu != mycpuid) {
385 struct netmsg_nat_state_add *msg;
386 msg = kmalloc(LEN_NMSG_NAT_STATE_ADD,
387 M_LWKTMSG, M_NOWAIT | M_ZERO);
388 netmsg_init(&msg->base, NULL, &curthread->td_msgport,
389 0, nat_state_add_dispatch);
390 s2 = kmalloc(LEN_NAT_STATE2, M_IPFW3_NAT,
391 M_INTWAIT | M_NULLOK | M_ZERO);
393 s2->src_addr = args->f_id.dst_ip;
394 s2->src_port = args->f_id.dst_port;
396 s2->dst_addr = alias->ip.s_addr;
397 s2->dst_port = s->alias_port;
399 s2->src_addr = htonl(args->f_id.src_ip);
400 s2->src_port = htons(args->f_id.src_port);
402 s2->timestamp = s->timestamp;
403 msg->alias_addr.s_addr = alias->ip.s_addr;
404 msg->alias_port = s->alias_port;
406 msg->nat_id = nat->id;
407 msg->proto = ip->ip_p;
408 netisr_sendmsg(&msg->base, nextcpu);
410 s2 = kmalloc(LEN_NAT_STATE2, M_IPFW3_NAT,
411 M_INTWAIT | M_NULLOK | M_ZERO);
413 s2->src_addr = args->f_id.dst_ip;
414 s2->dst_addr = alias->ip.s_addr;
416 s2->src_port = s->alias_port;
417 s2->dst_port = s->alias_port;
419 s2->src_addr = htonl(args->f_id.src_ip);
420 s2->src_port = htons(args->f_id.src_port);
422 s2->timestamp = s->timestamp;
423 if (ip->ip_p == IPPROTO_TCP) {
424 alias->tcp_in[s->alias_port - ALIAS_BEGIN] = s2;
426 alias->udp_in[s->alias_port - ALIAS_BEGIN] = s2;
432 IPFW3_DEBUG1("oops\n");
437 pick_alias_port(struct nat_state *s, struct state_tree *tree)
440 s->alias_port = htons(krandom() % ALIAS_RANGE + ALIAS_BEGIN);
441 } while (RB_FIND(state_tree, tree, s) != NULL);
445 ip_fw3_nat_state_cmp(struct nat_state *s1, struct nat_state *s2)
447 if (s1->src_addr > s2->src_addr)
449 if (s1->src_addr < s2->src_addr)
452 if (s1->dst_addr > s2->dst_addr)
454 if (s1->dst_addr < s2->dst_addr)
457 if (s1->src_port > s2->src_port)
459 if (s1->src_port < s2->src_port)
462 if (s1->dst_port > s2->dst_port)
464 if (s1->dst_port < s2->dst_port)
471 ip_fw3_ctl_nat_get_cfg(struct sockopt *sopt)
473 struct ip_fw3_nat_context *nat_ctx;
476 struct cfg_alias *alias;
482 nat_ctx = ip_fw3_nat_ctx[mycpuid];
483 valsize = sopt->sopt_valsize;
484 ioc = (struct ioc_nat *)sopt->sopt_val;
486 for (i = 0; i < NAT_ID_MAX; i++) {
487 nat = nat_ctx->nats[i];
490 if (len >= valsize) {
494 ioc->count = nat->count;
496 LIST_FOREACH(alias, &nat->alias, next) {
501 bcopy(&alias->ip, ip, LEN_IN_ADDR);
506 sopt->sopt_valsize = len;
509 bzero(sopt->sopt_val, sopt->sopt_valsize);
510 sopt->sopt_valsize = 0;
515 ip_fw3_ctl_nat_get_record(struct sockopt *sopt)
517 struct ip_fw3_nat_context *nat_ctx;
519 size_t sopt_size, total_len = 0;
520 struct ioc_nat_state *ioc;
521 int ioc_nat_id, i, n, cpu;
523 struct nat_state2 *s2;
524 struct cfg_alias *a1;
526 ioc_nat_id = *((int *)(sopt->sopt_val));
527 sopt_size = sopt->sopt_valsize;
528 ioc = (struct ioc_nat_state *)sopt->sopt_val;
529 /* icmp states only in CPU 0 */
531 nat_ctx = ip_fw3_nat_ctx[cpu];
532 for (n = 0; n < NAT_ID_MAX; n++) {
533 if (ioc_nat_id == 0 || ioc_nat_id == n + 1) {
534 if (nat_ctx->nats[n] == NULL)
536 the = nat_ctx->nats[n];
537 RB_FOREACH(s, state_tree, &the->rb_icmp_out) {
538 total_len += LEN_IOC_NAT_STATE;
539 if (total_len > sopt_size)
541 ioc->src_addr.s_addr = ntohl(s->src_addr);
542 ioc->dst_addr.s_addr = s->dst_addr;
543 ioc->alias_addr.s_addr = s->alias_addr;
544 ioc->src_port = s->src_port;
545 ioc->dst_port = s->dst_port;
546 ioc->alias_port = s->alias_port;
549 ioc->proto = IPPROTO_ICMP;
551 ioc->life = s->timestamp +
552 sysctl_var_icmp_timeout - time_uptime;
556 LIST_FOREACH(a1, &the->alias, next) {
557 for (i = 0; i < ALIAS_RANGE; i++) {
563 total_len += LEN_IOC_NAT_STATE;
564 if (total_len > sopt_size)
567 ioc->src_addr.s_addr = ntohl(s2->src_addr);
568 ioc->dst_addr.s_addr = s2->dst_addr;
569 ioc->alias_addr.s_addr = s2->alias_addr;
570 ioc->src_port = s2->src_port;
571 ioc->dst_port = s2->dst_port;
572 ioc->alias_port = s2->alias_port;
575 ioc->proto = IPPROTO_ICMP;
577 ioc->life = s2->timestamp +
578 sysctl_var_icmp_timeout - time_uptime;
586 for (cpu = 0; cpu < ncpus; cpu++) {
587 nat_ctx = ip_fw3_nat_ctx[cpu];
588 for (n = 0; n < NAT_ID_MAX; n++) {
589 if (ioc_nat_id == 0 || ioc_nat_id == n + 1) {
590 if (nat_ctx->nats[n] == NULL)
592 the = nat_ctx->nats[n];
593 RB_FOREACH(s, state_tree, &the->rb_tcp_out) {
594 total_len += LEN_IOC_NAT_STATE;
595 if (total_len > sopt_size)
597 ioc->src_addr.s_addr = ntohl(s->src_addr);
598 ioc->dst_addr.s_addr = ntohl(s->dst_addr);
599 ioc->alias_addr.s_addr = s->alias_addr;
600 ioc->src_port = ntohs(s->src_port);
601 ioc->dst_port = ntohs(s->dst_port);
602 ioc->alias_port = s->alias_port;
605 ioc->proto = IPPROTO_TCP;
607 ioc->life = s->timestamp +
608 sysctl_var_tcp_timeout - time_uptime;
611 LIST_FOREACH(a1, &the->alias, next) {
612 for (i = 0; i < ALIAS_RANGE; i++) {
618 total_len += LEN_IOC_NAT_STATE;
619 if (total_len > sopt_size)
622 ioc->src_addr.s_addr = ntohl(s2->src_addr);
623 ioc->dst_addr.s_addr = s2->dst_addr;
624 ioc->alias_addr.s_addr = s2->alias_addr;
625 ioc->src_port = s2->src_port;
626 ioc->dst_port = s2->dst_port;
627 ioc->alias_port = s2->alias_port;
630 ioc->proto = IPPROTO_TCP;
632 ioc->life = s2->timestamp +
633 sysctl_var_icmp_timeout - time_uptime;
642 for (cpu = 0; cpu < ncpus; cpu++) {
643 nat_ctx = ip_fw3_nat_ctx[cpu];
644 for (n = 0; n < NAT_ID_MAX; n++) {
645 if (ioc_nat_id == 0 || ioc_nat_id == n + 1) {
646 if (nat_ctx->nats[n] == NULL)
648 the = nat_ctx->nats[n];
649 RB_FOREACH(s, state_tree, &the->rb_udp_out) {
650 total_len += LEN_IOC_NAT_STATE;
651 if (total_len > sopt_size)
653 ioc->src_addr.s_addr = ntohl(s->src_addr);
654 ioc->dst_addr.s_addr = s->dst_addr;
655 ioc->alias_addr.s_addr = s->alias_addr;
656 ioc->src_port = s->src_port;
657 ioc->dst_port = s->dst_port;
658 ioc->alias_port = s->alias_port;
661 ioc->proto = IPPROTO_UDP;
663 ioc->life = s->timestamp +
664 sysctl_var_udp_timeout - time_uptime;
667 LIST_FOREACH(a1, &the->alias, next) {
668 for (i = 0; i < ALIAS_RANGE; i++) {
674 total_len += LEN_IOC_NAT_STATE;
675 if (total_len > sopt_size)
678 ioc->src_addr.s_addr = ntohl(s2->src_addr);
679 ioc->dst_addr.s_addr = s2->dst_addr;
680 ioc->alias_addr.s_addr = s2->alias_addr;
681 ioc->src_port = s2->src_port;
682 ioc->dst_port = s2->dst_port;
683 ioc->alias_port = s2->alias_port;
686 ioc->proto = IPPROTO_UDP;
688 ioc->life = s2->timestamp +
689 sysctl_var_icmp_timeout - time_uptime;
696 sopt->sopt_valsize = total_len;
703 nat_state_add_dispatch(netmsg_t add_msg)
705 struct ip_fw3_nat_context *nat_ctx;
706 struct netmsg_nat_state_add *msg;
708 struct nat_state2 *s2;
709 struct cfg_alias *alias;
711 nat_ctx = ip_fw3_nat_ctx[mycpuid];
712 msg = (struct netmsg_nat_state_add *)add_msg;
713 nat = nat_ctx->nats[msg->nat_id - 1];
715 LIST_FOREACH(alias, &nat->alias, next) {
716 if (alias->ip.s_addr == msg->alias_addr.s_addr) {
721 if (msg->proto == IPPROTO_TCP) {
722 alias->tcp_in[msg->alias_port - ALIAS_BEGIN] = s2;
724 alias->udp_in[msg->alias_port - ALIAS_BEGIN] = s2;
729 * Init the RB trees only when the NAT is configured.
732 nat_add_dispatch(netmsg_t nat_add_msg)
734 struct ip_fw3_nat_context *nat_ctx;
735 struct netmsg_nat_add *msg;
738 struct cfg_alias *alias;
742 msg = (struct netmsg_nat_add *)nat_add_msg;
744 nat_ctx = ip_fw3_nat_ctx[mycpuid];
746 if (nat_ctx->nats[ioc->id - 1] == NULL) {
747 /* op = set, and nat not exists */
748 nat = kmalloc(LEN_CFG_NAT, M_IPFW3_NAT, M_WAITOK | M_ZERO);
749 LIST_INIT(&nat->alias);
750 RB_INIT(&nat->rb_tcp_out);
751 RB_INIT(&nat->rb_udp_out);
753 RB_INIT(&nat->rb_icmp_out);
756 nat->count = ioc->count;
758 for (n = 0; n < ioc->count; n++) {
759 alias = kmalloc(LEN_CFG_ALIAS,
760 M_IPFW3_NAT, M_WAITOK | M_ZERO);
761 memcpy(&alias->ip, ip, LEN_IN_ADDR);
762 LIST_INSERT_HEAD((&nat->alias), alias, next);
765 nat_ctx->nats[ioc->id - 1] = nat;
767 netisr_forwardmsg_all(&msg->base, mycpuid + 1);
771 ip_fw3_ctl_nat_add(struct sockopt *sopt)
773 struct netmsg_nat_add nat_add_msg, *msg;
777 ioc = (struct ioc_nat *)(sopt->sopt_val);
778 sooptcopyin(sopt, &msg->ioc_nat, sopt->sopt_valsize,
779 sizeof(struct ioc_nat));
780 netmsg_init(&msg->base, NULL, &curthread->td_msgport, 0,
782 netisr_domsg(&msg->base, 0);
787 nat_del_dispatch(netmsg_t nat_del_msg)
789 struct ip_fw3_nat_context *nat_ctx;
790 struct netmsg_nat_del *msg;
792 struct nat_state *s, *tmp;
793 struct cfg_alias *alias, *tmp3;
795 msg = (struct netmsg_nat_del *)nat_del_msg;
797 nat_ctx = ip_fw3_nat_ctx[mycpuid];
798 nat = nat_ctx->nats[msg->id - 1];
800 /* the icmp states will only stored in cpu 0 */
801 RB_FOREACH_SAFE(s, state_tree, &nat->rb_icmp_out, tmp) {
802 RB_REMOVE(state_tree, &nat->rb_icmp_out, s);
804 kfree(s, M_IPFW3_NAT);
808 LIST_FOREACH_MUTABLE(s2, &nat->alias->icmp_in, next, tmp2) {
809 LIST_REMOVE(s2, next);
811 kfree(s, M_IPFW3_NAT);
816 RB_FOREACH_SAFE(s, state_tree, &nat->rb_tcp_out, tmp) {
817 RB_REMOVE(state_tree, &nat->rb_tcp_out, s);
819 kfree(s, M_IPFW3_NAT);
823 LIST_FOREACH_MUTABLE(s2, &nat->alias->tcp_in, next, tmp2) {
824 LIST_REMOVE(s2, next);
826 kfree(s, M_IPFW3_NAT);
830 RB_FOREACH_SAFE(s, state_tree, &nat->rb_udp_out, tmp) {
831 RB_REMOVE(state_tree, &nat->rb_udp_out, s);
833 kfree(s, M_IPFW3_NAT);
837 LIST_FOREACH_MUTABLE(s2, &nat->alias->udp_in, next, tmp2) {
838 LIST_REMOVE(s2, next);
840 kfree(s, M_IPFW3_NAT);
844 LIST_FOREACH_MUTABLE(alias, &nat->alias, next, tmp3) {
845 kfree(alias, M_IPFW3_NAT);
847 kfree(nat, M_IPFW3_NAT);
848 nat_ctx->nats[msg->id - 1] = NULL;
850 netisr_forwardmsg_all(&nat_del_msg->base, mycpuid + 1);
853 ip_fw3_ctl_nat_del(struct sockopt *sopt)
855 struct netmsg_nat_del nat_del_msg, *msg;
858 msg->id = *((int *)sopt->sopt_val);
859 netmsg_init(&msg->base, NULL, &curthread->td_msgport,
860 0, nat_del_dispatch);
862 netisr_domsg(&msg->base, 0);
866 ip_fw3_ctl_nat_flush(struct sockopt *sopt)
868 struct netmsg_nat_del nat_del_msg, *msg;
871 for (i = 0; i < NAT_ID_MAX; i++) {
873 netmsg_init(&msg->base, NULL, &curthread->td_msgport,
874 0, nat_del_dispatch);
876 netisr_domsg(&msg->base, 0);
882 ip_fw3_ctl_nat_sockopt(struct sockopt *sopt)
885 switch (sopt->sopt_name) {
887 error = ip_fw3_ctl_nat_add(sopt);
890 error = ip_fw3_ctl_nat_del(sopt);
892 case IP_FW_NAT_FLUSH:
893 error = ip_fw3_ctl_nat_flush(sopt);
896 error = ip_fw3_ctl_nat_get_cfg(sopt);
898 case IP_FW_NAT_GET_RECORD:
899 error = ip_fw3_ctl_nat_get_record(sopt);
902 kprintf("ipfw3 nat invalid socket option %d\n",
909 nat_init_ctx_dispatch(netmsg_t msg)
911 struct ip_fw3_nat_context *tmp;
912 tmp = kmalloc(sizeof(struct ip_fw3_nat_context),
913 M_IPFW3_NAT, M_WAITOK | M_ZERO);
915 ip_fw3_nat_ctx[mycpuid] = tmp;
916 netisr_forwardmsg_all(&msg->base, mycpuid + 1);
920 nat_fnit_ctx_dispatch(netmsg_t msg)
922 kfree(ip_fw3_nat_ctx[mycpuid], M_IPFW3_NAT);
923 netisr_forwardmsg_all(&msg->base, mycpuid + 1);
927 nat_cleanup_func_dispatch(netmsg_t nmsg)
929 struct nat_state *s, *tmp;
930 struct ip_fw3_nat_context *nat_ctx;
932 struct cfg_alias *a1, *tmp2;
933 struct nat_state2 *s2;
936 nat_ctx = ip_fw3_nat_ctx[mycpuid];
937 for (j = 0; j < NAT_ID_MAX; j++) {
938 nat = nat_ctx->nats[j];
941 /* check the nat_states, remove the expired state */
942 /* the icmp states will only stored in cpu 0 */
943 RB_FOREACH_SAFE(s, state_tree, &nat->rb_icmp_out, tmp) {
944 if (time_uptime - s->timestamp > sysctl_var_icmp_timeout) {
945 RB_REMOVE(state_tree, &nat->rb_icmp_out, s);
946 kfree(s, M_IPFW3_NAT);
949 LIST_FOREACH_MUTABLE(a1, &nat->alias, next, tmp2) {
950 for (i = 0; i < ALIAS_RANGE; i++) {
953 if (time_uptime - s2->timestamp > sysctl_var_icmp_timeout) {
954 a1->icmp_in[i] = NULL;
955 kfree(s2, M_IPFW3_NAT);
962 RB_FOREACH_SAFE(s, state_tree, &nat->rb_tcp_out, tmp) {
963 if (time_uptime - s->timestamp > sysctl_var_tcp_timeout) {
964 RB_REMOVE(state_tree, &nat->rb_tcp_out, s);
965 kfree(s, M_IPFW3_NAT);
968 LIST_FOREACH_MUTABLE(a1, &nat->alias, next, tmp2) {
969 for (i = 0; i < ALIAS_RANGE; i++) {
972 if (time_uptime - s2->timestamp > sysctl_var_icmp_timeout) {
973 a1->tcp_in[i] = NULL;
974 kfree(s2, M_IPFW3_NAT);
980 RB_FOREACH_SAFE(s, state_tree, &nat->rb_udp_out, tmp) {
981 if (time_uptime - s->timestamp > sysctl_var_udp_timeout) {
982 RB_REMOVE(state_tree, &nat->rb_udp_out, s);
983 kfree(s, M_IPFW3_NAT);
986 LIST_FOREACH_MUTABLE(a1, &nat->alias, next, tmp2) {
987 for (i = 0; i < ALIAS_RANGE; i++) {
990 if (time_uptime - s2->timestamp > sysctl_var_icmp_timeout) {
991 a1->udp_in[i] = NULL;
992 kfree(s2, M_IPFW3_NAT);
999 netisr_forwardmsg_all(&nmsg->base, mycpuid + 1);
1003 ip_fw3_nat_cleanup_func(void *dummy __unused)
1005 struct netmsg_base msg;
1006 netmsg_init(&msg, NULL, &curthread->td_msgport, 0,
1007 nat_cleanup_func_dispatch);
1008 netisr_domsg(&msg, 0);
1010 callout_reset(&ip_fw3_nat_cleanup_callout,
1011 sysctl_var_cleanup_interval * hz,
1012 ip_fw3_nat_cleanup_func, NULL);
1016 int ip_fw3_nat_init(void)
1018 struct netmsg_base msg;
1019 ip_fw3_register_module(MODULE_NAT_ID, MODULE_NAT_NAME);
1020 ip_fw3_register_filter_funcs(MODULE_NAT_ID, O_NAT_NAT,
1021 (filter_func)check_nat);
1022 ip_fw3_ctl_nat_ptr = ip_fw3_ctl_nat_sockopt;
1023 netmsg_init(&msg, NULL, &curthread->td_msgport,
1024 0, nat_init_ctx_dispatch);
1025 netisr_domsg(&msg, 0);
1027 callout_init_mp(&ip_fw3_nat_cleanup_callout);
1028 callout_reset(&ip_fw3_nat_cleanup_callout,
1029 sysctl_var_cleanup_interval * hz,
1030 ip_fw3_nat_cleanup_func,
1036 ip_fw3_nat_fini(void)
1038 struct netmsg_base msg;
1039 struct netmsg_nat_del nat_del_msg, *msg1;
1042 callout_stop(&ip_fw3_nat_cleanup_callout);
1044 msg1 = &nat_del_msg;
1045 for (i = 0; i < NAT_ID_MAX; i++) {
1047 netmsg_init(&msg1->base, NULL, &curthread->td_msgport,
1048 0, nat_del_dispatch);
1050 netisr_domsg(&msg1->base, 0);
1053 netmsg_init(&msg, NULL, &curthread->td_msgport,
1054 0, nat_fnit_ctx_dispatch);
1055 netisr_domsg(&msg, 0);
1057 return ip_fw3_unregister_module(MODULE_NAT_ID);
1061 ip_fw3_nat_modevent(module_t mod, int type, void *data)
1065 return ip_fw3_nat_init();
1067 return ip_fw3_nat_fini();
1074 moduledata_t ip_fw3_nat_mod = {
1076 ip_fw3_nat_modevent,
1080 DECLARE_MODULE(ipfw3_nat, ip_fw3_nat_mod,
1081 SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
1082 MODULE_DEPEND(ipfw3_nat, ipfw3_basic, 1, 1, 1);
1083 MODULE_VERSION(ipfw3_nat, 1);