From d938108c68b0f702cc8fdd0f2bbfadd09337db06 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Thu, 7 Sep 2017 08:56:57 +0800 Subject: [PATCH] ipfw: Add per-cpu table support. This is intended to improve performance and reduce latency for matching discrete addresses. Table itself is radix tree. For exmaple, nginx, 1KB web object, 30K concurrent connections, 1 request/connection. ipfw is running on the server side. Comparison between no-match rules and no-match table entries: | perf-avg | lat-avg | lat-stdev | lat-99% | (tps) | (ms) | (ms) | (ms) -------------------+-----------+---------+-----------+--------- 100 nomatch rules | 184752.65 | 67.50 | 5.69 | 79.11 -------------------+-----------+---------+-----------+--------- 100 nomatch tblent | 200754.53 | 61.18 | 5.72 | 73.10 1K nomatch rules | 90836.43 | 144.72 | 12.28 | 168.97 -------------------+-----------+---------+-----------+--------- 1K nomatch tblent | 199750.35 | 61.54 | 5.73 | 72.90 10K nomatch rules | 14836.69 | 864.46 | 157.49 | 1110.00 -------------------+-----------+---------+-----------+--------- 10K nomatch tblent | 198412.93 | 62.17 | 5.66 | 73.08 Comparison between number of no-match table entries: | perf-avg | lat-avg | lat-stdev | lat-99% | (tps) | (ms) | (ms) | (ms) -------------------+-----------+---------+-----------+--------- no-ipfw | 210658.80 | 58.01 | 5.20 | 68.73 -------------------+-----------+---------+-----------+--------- 100 nomatch tblent | 200754.53 | 61.18 | 5.72 | 73.10 -------------------+-----------+---------+-----------+--------- 1K nomatch tblent | 199750.35 | 61.54 | 5.73 | 72.90 -------------------+-----------+---------+-----------+--------- 10K nomatch tblent | 198412.93 | 62.17 | 5.66 | 73.08 It scales pretty well with the number of no-match table entries. En if it is compared w/ no-ipfw case, the performance and latency impacts of the ipfw after this commit are pretty small. --- sbin/ipfw/ipfw.8 | 240 +++++++++++-- sbin/ipfw/ipfw2.c | 482 +++++++++++++++++++++++++- sys/net/ipfw/ip_fw2.c | 765 +++++++++++++++++++++++++++++++++++++++++- sys/net/ipfw/ip_fw2.h | 53 +++ sys/netinet/in.h | 9 + sys/netinet/raw_ip.c | 9 + 6 files changed, 1514 insertions(+), 44 deletions(-) diff --git a/sbin/ipfw/ipfw.8 b/sbin/ipfw/ipfw.8 index 32024cebeb..a1a984a583 100644 --- a/sbin/ipfw/ipfw.8 +++ b/sbin/ipfw/ipfw.8 @@ -2,7 +2,7 @@ .\" $FreeBSD: src/sbin/ipfw/ipfw.8,v 1.63.2.33 2003/02/04 01:36:02 brueffer Exp $ .\" $DragonFly: src/sbin/ipfw/ipfw.8,v 1.20 2008/11/23 21:55:52 swildner Exp $ .\" -.Dd September 5, 2017 +.Dd September 10, 2017 .Dt IPFW 8 .Os .Sh NAME @@ -18,7 +18,7 @@ .Brq Cm list | show .Op Ar number ... .Nm -.Op Fl f | q +.Op Fl fq .Cm flush .Nm .Op Fl q @@ -56,6 +56,41 @@ .Pp .Nm .Op Fl q +.Cm table Ar number Cm create +.Nm +.Op Fl fq +.Cm table Ar number +.Cm destroy +.Nm +.Op Fl fq +.Cm table +.Op Ar number +.Cm flush +.Nm +.Cm table list +.Nm +.Op Fl at +.Cm table Ar number +.Brq Cm show | print +.Nm +.Op Fl q +.Cm table Ar number +.Brq Cm add | delete +.Ar address +.Op Ar address ... +.Nm +.Op Fl q +.Cm table +.Op Ar number +.Cm zero +.Nm +.Op Fl fq +.Cm table +.Op Ar number +.Cm expire Ar seconds +.Pp +.Nm +.Op Fl q .Oo .Fl p Ar preproc .Oo Fl D @@ -136,10 +171,10 @@ option, then .Nm assumes a .Em stateful -behaviour, i.e. upon a match it will create dynamic rules matching +behaviour, i.e. upon a match it will create states matching the exact parameters (addresses and ports) of the matching packet. .Pp -These dynamic rules, which have a limited lifetime, are checked +These states, which have a limited lifetime, are checked at the first occurrence of a .Cm check-state , .Cm keep-state @@ -154,7 +189,7 @@ and Sections below for more information on the stateful behaviour of .Nm . .Pp -All rules (including dynamic ones) have a few associated counters: +All rules (including states) have a few associated counters: a packet count, a byte count, a log count and a timestamp indicating the time of the last match. Counters can be displayed or reset with @@ -204,11 +239,11 @@ When entering or showing rules, print them in compact form, i.e. without the optional "ip from any to any" string when this does not carry any additional information. .It Fl d -While listing, show dynamic rules in addition to static ones. +While listing, show states and tracks in addition to static ones. .It Fl e While listing, if the .Fl d -option was specified, also show expired dynamic rules. +option was specified, also show expired states and tracks. .It Fl f Don't ask for confirmation for commands that can cause problems if misused, @@ -504,16 +539,16 @@ will be executed when the packet matches the body of the rule. Allow packets that match rule. The search terminates. .It Cm check-state -Checks the packet against the dynamic ruleset. +Checks the packet against the state table. If a match is found, execute the action associated with -the rule which generated this dynamic rule, otherwise +the rule which generated this state, otherwise move to the next rule. .br .Cm Check-state rules do not have a body. If no .Cm check-state -rule is found, the dynamic ruleset is checked at the first +rule is found, the state table is checked at the first .Cm keep-state or .Cm limit @@ -716,6 +751,11 @@ matches any IP address. matches any IP address configured on an interface in the system. The address list is evaluated at the time the packet is analysed. +.It Cm < Ns Ar number Ns Cm > +Matches any network or host addresses in the +.Cm table +specified by the +.Ar number . .It Ar numeric-ip | hostname Matches a single IPv4 address, specified as dotted-quad or a hostname. Hostnames are resolved at the time the rule is added to the firewall list. @@ -813,10 +853,10 @@ operand, and possibly grouped into .Pp The following match patterns can be used (listed in alphabetical order): .Bl -tag -width indent -.It Cm dst-ip Ar ip address +.It Cm dst-ip Ar ip-address Matches IP packets whose destination IP is one of the address(es) specified as argument. -.It Cm dst-port Ar source ports +.It Cm dst-port Ar ports Matches IP packets whose destination port is one of the port(s) specified as argument. .It Cm established @@ -935,7 +975,7 @@ Matches IP packets whose time to live is Matches IP packets whose IP version field is .Ar ver . .It Cm keep-state -Upon a match, the firewall will create a dynamic rule, whose +Upon a match, the firewall will create a state, whose default behaviour is to match bidirectional traffic between source and destination IP/port using the same protocol. The rule has a limited lifetime (controlled by a set of @@ -1134,9 +1174,9 @@ By default, all sets are enabled. When you disable a set, its rules behave as if they do not exist in the firewall configuration, with only one exception: .Bd -ragged -offset indent -dynamic rules created from a rule before it had been disabled +states and tracks created from a rule before it had been disabled will still be active until they expire. In order to delete -dynamic rules you have to explicitly delete the parent rule +states and tracks you have to explicitly delete the parent rule which generated them. .Ed .Pp @@ -1159,7 +1199,7 @@ See the Section on some possible uses of sets of rules. .Sh STATEFUL FIREWALL Stateful operation is a way for the firewall to dynamically -create rules for specific flows when packets that +create states and tracks for specific flows when packets that match a given pattern are detected. Support for stateful operation comes through the .Cm check-state , keep-state @@ -1169,13 +1209,13 @@ options of .Nm rules. .Pp -Dynamic rules are created when a packet matches a +States are created when a packet matches a .Cm keep-state or .Cm limit rule, causing the creation of a -.Em dynamic -rule which will match all and only packets with +.Em state +which will match all and only packets with a given .Em protocol between a @@ -1186,7 +1226,11 @@ and .Em dst are used here only to denote the initial match addresses, but they are completely equivalent afterwards). -Dynamic rules will be checked at the first +Additionally, +tracks are created when a packet matches a +.Cm limit +rule. +States will be checked at the first .Cm check-state, keep-state or .Cm limit @@ -1194,11 +1238,11 @@ occurrence, and the action performed upon a match will be the same as in the parent rule. .Pp Note that no additional attributes other than protocol and IP addresses -and ports are checked on dynamic rules. +and ports are checked on states. .Pp -The typical use of dynamic rules is to keep a closed firewall configuration, +The typical use of states is to keep a closed firewall configuration, but let the first TCP SYN packet from the inside network install a -dynamic rule for the flow so that packets belonging to that session +state for the flow so that packets belonging to that session will be allowed through the firewall: .Pp .Dl "ipfw add check-state" @@ -1206,27 +1250,27 @@ will be allowed through the firewall: .Dl "ipfw add deny tcp from any to any" .Pp A similar approach can be used for UDP, where an UDP packet coming -from the inside will install a dynamic rule to let the response through +from the inside will install a state to let the response through the firewall: .Pp .Dl "ipfw add check-state" .Dl "ipfw add allow udp from my-subnet to any keep-state" .Dl "ipfw add deny udp from any to any" .Pp -Dynamic rules expire after some time, which depends on the status +States and tracks expire after some time, which depends on the status of the flow and the setting of some .Cm sysctl variables. See Section .Sx SYSCTL VARIABLES for more details. -For TCP sessions, dynamic rules can be instructed to periodically +For TCP sessions, states can be instructed to periodically send keepalive packets to refresh the state of the rule when it is about to expire. .Pp See Section .Sx EXAMPLES -for more examples on how to use dynamic rules. +for more examples on how to use states. .Sh TRAFFIC SHAPER (DUMMYNET) CONFIGURATION .Nm is also the user interface for the @@ -1439,6 +1483,104 @@ specifies the expected maximum packet size, only used when queue thresholds are in bytes (defaults to 1500, must be greater than zero). .El .El +.Sh TABLE +Table provides a convenient way to support a large amount of +discrete host or network addresses for the +.Cm from , +.Cm to , +.Cm src-ip , +and +.Cm dst-ip . +Non-existing tables never match. +For network addresses, +only CIDR form is supported. +.Pp +Tables are identified by +.Ar number , +which ranges from 0 to +.Cm net.inet.ip.fw.table_max +- 1. +Default number of available tables is 64, +i.e. valid table ids are from 0 to 63. +Number of available tables can be changed by setting tunable +.Cm net.inet.ip.fw.table_max . +Max configurable number of available tables is 65535. +.Pp +Tables must be created explicitly +before host or network addresses could be added to them: +.Bd -ragged -offset indent +.Cm table Ar number Cm create +.Ed +.Pp +Host or network addresses can be added to an existing +table by using: +.Bd -ragged -offset indent +.Cm table Ar number Cm add Ar address +.Op Ar address ... +.Ed +.Pp +Host or network addresses can be removed from an existing +table by using: +.Bd -ragged -offset indent +.Cm table Ar number Cm delete Ar address +.Op Ar address ... +.Ed +.Pp +Addresses in a table can be flushed by: +.Bd -ragged -offset indent +.Cm table Ar number Cm flush +.Ed +.Pp +Or you can optionally flush all existing tables: +.Bd -ragged -offset indent +.Cm table flush +.Ed +.Pp +Each address in a table has two counters. +One records the number of usage, +the other saves the time of the last match. +These counters can be resetted for a specific table: +.Bd -ragged -offset indent +.Cm table Ar number Cm zero +.Ed +.Pp +Or you can reset counters of addresses in all existing tables by: +.Bd -ragged -offset indent +.Cm table zero +.Ed +.Pp +Host and network addresses in the tables are not expired by the +.Nm , +manual intervention is required to expire addresses unused in a table +within the last +.Ar seconds : +.Bd -ragged -offset indent +.Cm table Ar number Cm expire Ar seconds +.Ed +.Pp +Optionally, +you can expire all addresses that were unused within the last +.Ar seconds +by: +.Bd -ragged -offset indent +.Cm table expire Ar seconds +.Ed +.Pp +An existing table can be destroyed by: +.Bd -ragged -offset indent +.Cm table Ar number Cm destroy +.Ed +.Pp +All existing tables can be listed by: +.Bd -ragged -offset indent +.Cm table list +.Ed +.Pp +All addresses in an existing table can be dumped by: +.Bd -ragged -offset indent +.Cm table Ar number +.Brq Cm print | show +.Ed .Sh CHECKLIST Here are some important points to consider when designing your rules: @@ -1557,6 +1699,10 @@ The value must be in the range 1..1000. .It Em net.inet.ip.fw.debug : No 1 Controls debugging messages produced by .Nm . +.It Em net.inet.ip.fw.table_max : No 64 +Number of available tables. +This value can only be changed by setting tunable +.Cm net.inet.ip.fw.table_max . .It Em net.inet.ip.fw.state_cnt : No 3 Current number of states (read-only). @@ -1587,8 +1733,7 @@ seconds of the lifetime of the rule. .It Em net.inet.ip.fw.dyn_rst_lifetime : No 2 .It Em net.inet.ip.fw.dyn_udp_lifetime : No 10 .It Em net.inet.ip.fw.dyn_short_lifetime : No 5 -These variables control the lifetime, in seconds, of dynamic -rules. +These variables control the lifetime, in seconds, of states and tracks. Upon the initial SYN exchange the lifetime is kept short, then increased after both SYN have been seen, then decreased again during the final FIN exchange or when a RST is received. @@ -1647,8 +1792,12 @@ pattern will always fail on them, and the operator will make this rule into a pass-all. .It Address sets .Nm ipfw1 -does not supports address sets (those in the form +does not support address sets (those in the form .Ar addr/masklen{num,num,...} ) . +.It Table +.Nm ipfw1 +does not support +.Cm table . .It Port specifications .Nm ipfw1 only allows one port range when specifying TCP and UDP ports, and @@ -1683,7 +1832,7 @@ does not support Or-blocks. .Nm ipfw1 does not generate keepalives for stateful sessions. As a consequence, it might cause idle sessions to drop because -the lifetime of the dynamic rules expires. +the lifetime of the states expires. .It Sets of rules .Nm ipfw1 does not implement sets of rules. @@ -1734,7 +1883,7 @@ network to my host: .Pp .Dl "ipfw add deny ip from 123.45.67.0/24 to my.host.org" .Pp -A first and efficient way to limit access (not using dynamic rules) +A first and efficient way to limit access (not using states) is the use of the following rules: .Pp .Dl "ipfw add allow tcp from any to any established" @@ -1769,18 +1918,33 @@ The .Nm ipfw1 syntax would require a separate rule for each IP in the above example. -.Ss DYNAMIC RULES +.Pp +If you have large number of discrete addresses to block, +and the number of addresses to block keep increasing, +.Cm table +can be used as below: +.Pp +.Dl "... Initialize the blocked address list using table 0 ..." +.Dl "ipfw table 0 create" +.Dl "ipfw table 0 add 10.0.0.1 10.1.0.1 172.0.0.1" +.Dl "... Block the addresses in table 0 ..." +.Dl "ipfw add deny ip from <0> to any" +.Dl "... Add more addresses to table 0 any time later..." +.Dl "ipfw table 0 add 172.1.0.1" +.Dl "... Expire the addresses unused within the last 24 hours ..." +.Dl "ipfw table 0 expire 86400" +.Ss STATES In order to protect a site from flood attacks involving fake -TCP packets, it is safer to use dynamic rules: +TCP packets, it is safer to use states: .Pp .Dl "ipfw add check-state" .Dl "ipfw add deny tcp from any to any established" .Dl "ipfw add allow tcp from my-net to any setup keep-state" .Pp -This will let the firewall install dynamic rules only for +This will let the firewall install states only for those connection which start with a regular SYN packet coming from the inside of our network. -Dynamic rules are checked when encountering the first +States are checked when encountering the first .Cm check-state or .Cm keep-state @@ -1804,7 +1968,7 @@ client does not use more than 4 simultaneous connections. .Pp .Em BEWARE : stateful rules can be subject to denial-of-service attacks -by a SYN-flood which opens a huge number of dynamic rules. +by a SYN-flood which opens a huge number of states. The effects of such attacks can be partially limited by acting on a set of .Xr sysctl 8 @@ -1967,6 +2131,8 @@ Stateful extensions were introduced in .Fx 4.0 , and were rewritten in .Dx 4.9 . +Table was introduced in +.Dx 4.9 . .Nm ipfw2 was introduced in Summer 2002. .Sh AUTHORS diff --git a/sbin/ipfw/ipfw2.c b/sbin/ipfw/ipfw2.c index 7eb9824ba0..df277d789e 100644 --- a/sbin/ipfw/ipfw2.c +++ b/sbin/ipfw/ipfw2.c @@ -63,6 +63,7 @@ int s, /* main RAW socket */ do_quiet, /* Be quiet in add and flush */ do_force, /* Don't ask for confirmation */ do_pipe, /* this cmd refers to a pipe */ + do_table, /* this cmd referes to a table */ do_sort, /* field to sort results (0 = no) */ do_dynamic, /* display dynamic rules */ do_expired, /* display expired dynamic rules */ @@ -648,6 +649,11 @@ print_ip(ipfw_insn_ip *cmd, char *s) printf("me"); return; } + if (cmd->o.opcode == O_IP_SRC_TABLE || + cmd->o.opcode == O_IP_DST_TABLE) { + printf("<%u>", cmd->o.arg1); + return; + } if (cmd->o.opcode == O_IP_SRC_SET || cmd->o.opcode == O_IP_DST_SET) { u_int32_t x, *d; int i; @@ -827,7 +833,9 @@ show_ipfw(struct ipfw_ioc_rule *rule, int pcwidth, int bcwidth) char timestr[30]; if (twidth == 0) { - strcpy(timestr, ctime((time_t *)&twidth)); + time_t t0 = 0; + + strcpy(timestr, ctime((time_t *)&t0)); *strchr(timestr, '\n') = '\0'; twidth = strlen(timestr); } @@ -987,6 +995,7 @@ show_ipfw(struct ipfw_ioc_rule *rule, int pcwidth, int bcwidth) case O_IP_SRC_MASK: case O_IP_SRC_ME: case O_IP_SRC_SET: + case O_IP_SRC_TABLE: show_prerequisites(&flags, HAVE_PROTO, 0); if (!(flags & HAVE_SRCIP)) printf(" from"); @@ -1001,6 +1010,7 @@ show_ipfw(struct ipfw_ioc_rule *rule, int pcwidth, int bcwidth) case O_IP_DST_MASK: case O_IP_DST_ME: case O_IP_DST_SET: + case O_IP_DST_TABLE: show_prerequisites(&flags, HAVE_PROTO|HAVE_SRCIP, 0); if (!(flags & HAVE_DSTIP)) printf(" to"); @@ -1777,6 +1787,27 @@ fill_ip(ipfw_insn_ip *cmd, char *av) return; } + if (strlen(av) >= 3 && av[0] == '<' && av[strlen(av) - 1] == '>') { + int pos = strlen(av) - 1; + uint16_t tableid; + char *eptr; + + /* + * Table: "" + */ + av[pos] = '\0'; + tableid = strtoul(&av[1], &eptr, 0); + if (*eptr != '\0') { + av[pos] = '>'; + errx(EX_DATAERR, "invalid tableid ``%s''", av); + } + av[pos] = '>'; + cmd->o.len |= F_INSN_SIZE(ipfw_insn); + cmd->o.opcode = O_IP_DST_TABLE; + cmd->o.arg1 = tableid; + return; + } + p = strchr(av, '/'); if (!p) p = strchr(av, ':'); @@ -2450,6 +2481,8 @@ add_srcip(ipfw_insn *cmd, char *av) fill_ip((ipfw_insn_ip *)cmd, av); if (cmd->opcode == O_IP_DST_SET) /* set */ cmd->opcode = O_IP_SRC_SET; + else if (cmd->opcode == O_IP_DST_TABLE) /* table */ + cmd->opcode = O_IP_SRC_TABLE; else if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn)) /* me */ cmd->opcode = O_IP_SRC_ME; else if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn_u32)) /* one IP */ @@ -2465,6 +2498,8 @@ add_dstip(ipfw_insn *cmd, char *av) fill_ip((ipfw_insn_ip *)cmd, av); if (cmd->opcode == O_IP_DST_SET) /* set */ ; + else if (cmd->opcode == O_IP_DST_TABLE) /* table */ + ; else if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn)) /* me */ cmd->opcode = O_IP_DST_ME; else if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn_u32)) /* one IP */ @@ -3368,6 +3403,410 @@ flush(void) printf("Flushed all %s.\n", do_pipe ? "pipes" : "rules"); } +static void +table_create(int ac, char **av) +{ + struct ipfw_ioc_table tbl; + char *eptr; + + ac--; + av++; + + if (ac == 0) + errx(EX_DATAERR, "missing table id"); + + memset(&tbl, 0, sizeof(tbl)); + tbl.tableid = strtoul(*av, &eptr, 0); + if (*eptr != '\0') + errx(EX_DATAERR, "invalid table id %s", *av); + if (setsockopt(s, IPPROTO_IP, IP_FW_TBL_CREATE, &tbl, sizeof(tbl)) < 0) + err(EX_UNAVAILABLE, "setsockopt(IP_FW_TBL_CREATE)"); + if (!do_quiet) + printf("Created table %d\n", tbl.tableid); +} + +static void +table_flush(int ac, char **av, int opt) +{ + struct ipfw_ioc_table tbl; + char *eptr; + + ac--; + av++; + + memset(&tbl, 0, sizeof(tbl)); + + if (ac == 0) { + if (opt == IP_FW_TBL_FLUSH) { + /* Flush all tables */ + tbl.tableid = -1; + goto flush; + } + errx(EX_DATAERR, "missing table id"); + } + + tbl.tableid = strtoul(*av, &eptr, 0); + if (*eptr != '\0') + errx(EX_DATAERR, "invalid table id %s", *av); + +flush: + if (!do_force && !do_quiet) { /* need to ask user */ + int c; + + printf("Are you sure? [yn] "); + fflush(stdout); + do { + c = toupper(getc(stdin)); + while (c != '\n' && getc(stdin) != '\n') + if (feof(stdin)) + return; /* and do not flush */ + } while (c != 'Y' && c != 'N'); + printf("\n"); + if (c == 'N') /* user said no */ + return; + } + + if (setsockopt(s, IPPROTO_IP, opt, &tbl, sizeof(tbl)) < 0) { + err(EX_UNAVAILABLE, "setsockopt(IP_FW_TBL_%s)", + opt == IP_FW_TBL_FLUSH ? "FLUSH" : "DESTROY"); + } + if (!do_quiet) { + if (tbl.tableid >= 0) { + printf("%sed table %d\n", + opt == IP_FW_TBL_FLUSH ? "Flush" : "Destroy", + tbl.tableid); + } else { + printf("Flushed all tables\n"); + } + } +} + +static void +table_list(void) +{ + struct ipfw_ioc_tbllist *list; + int table_max, i; + size_t len; + socklen_t len1; + + len = sizeof(table_max); + if (sysctlbyname("net.inet.ip.fw.table_max", &table_max, &len, + NULL, 0) < 0) + err(EX_UNAVAILABLE, "sysctl net.inet.ip.fw.table_max failed"); + + len1 = __offsetof(struct ipfw_ioc_tbllist, tables[table_max]); + list = malloc(len1); + list->tableid = -1; + + if (getsockopt(s, IPPROTO_IP, IP_FW_TBL_GET, list, &len1) < 0) + err(EX_UNAVAILABLE, "getsockopt(IP_FW_TBL_GET)"); + + for (i = 0; i < list->tablecnt; ++i) + printf("%u\n", list->tables[i]); +} + +/* XXX copied from route(8) */ +static void +inet_makenetandmask(in_addr_t net, struct sockaddr_in *in, + struct sockaddr_in *in_mask, int bits) +{ + in_addr_t addr, mask = 0; + char *cp; + + /* + * XXX: This approach unable to handle 0.0.0.1/32 correctly + * as inet_network() converts 0.0.0.1 and 1 equally. + */ + if (net <= 0xff) + addr = net << IN_CLASSA_NSHIFT; + else if (net <= 0xffff) + addr = net << IN_CLASSB_NSHIFT; + else if (net <= 0xffffff) + addr = net << IN_CLASSC_NSHIFT; + else + addr = net; + + if (bits != 0) + mask = 0xffffffff << (32 - bits); + else if (net == 0) + mask = 0; + else if (IN_CLASSA(addr)) + mask = IN_CLASSA_NET; + else if (IN_CLASSB(addr)) + mask = IN_CLASSB_NET; + else if (IN_CLASSC(addr)) + mask = IN_CLASSC_NET; + else if (IN_MULTICAST(addr)) + mask = IN_CLASSD_NET; + else + mask = 0xffffffff; + + in->sin_family = AF_INET; + in->sin_len = sizeof(struct sockaddr_in); + in->sin_addr.s_addr = htonl(addr); + + if (mask != 0xffffffff) { + in_mask->sin_addr.s_addr = htonl(mask); + cp = (char *)(&in_mask->sin_addr + 1); + while (*--cp == 0 && cp > (char *)in_mask) + ; + in_mask->sin_len = 1 + cp - (char *)in_mask; + } +} + +static void +table_alt(int ac, char **av, int opt) +{ + struct ipfw_ioc_tblcont ent; + struct ipfw_ioc_tblent *te; + char *eptr; + + --ac; + ++av; + if (ac == 0) + errx(EX_DATAERR, "missing table id"); + + memset(&ent, 0, sizeof(ent)); + ent.tableid = strtoul(*av, &eptr, 0); + ent.entcnt = 1; + te = &ent.ent[0]; + if (*eptr != '\0') + errx(EX_DATAERR, "invalid table id %s", *av); + + --ac; + ++av; + if (ac == 0) + errx(EX_DATAERR, "missing addresses"); + + while (ac > 0) { + char *q; + + q = strchr(*av, '/'); + if (q != NULL) { + in_addr_t val; + int bits; + + *q = '\0'; + val = inet_network(*av); + *q = '/'; + if (val == INADDR_NONE) { + fflush(stdout); + errx(EX_DATAERR, "invalid address %s", *av); + } + + bits = strtoul(q + 1, &eptr, 0); + if (*eptr != '\0') { + fflush(stdout); + errx(EX_DATAERR, "invalid address %s", *av); + } + inet_makenetandmask(val, &te->key, &te->netmask, bits); + } else { + int n; + + n = inet_pton(AF_INET, *av, &te->key.sin_addr); + if (n == 0) { + fflush(stdout); + errx(EX_DATAERR, "invalid address %s", *av); + } else if (n < 0) { + fflush(stdout); + err(EX_UNAVAILABLE, "inet_pton failed"); + } + te->key.sin_family = AF_INET; + te->key.sin_len = sizeof(struct sockaddr_in); + } + + if (setsockopt(s, IPPROTO_IP, opt, &ent, sizeof(ent)) < 0) { + if (opt == IP_FW_TBL_ADD && errno == EEXIST) { + printf("Failed to add %s to table %d\n", + *av, ent.tableid); + } else if (opt == IP_FW_TBL_DEL && errno == ESRCH) { + printf("Failed to delete %s from table %d\n", + *av, ent.tableid); + } else { + fflush(stdout); + err(EX_UNAVAILABLE, "setsockopt(IP_FW_TBL_%s)", + opt == IP_FW_TBL_ADD ? "ADD" : "DEL"); + } + } else if (!do_quiet) { + printf("%sed %s %s table %d\n", + opt == IP_FW_TBL_ADD ? "Add" : "Delet", *av, + opt == IP_FW_TBL_ADD ? "to" : "from", ent.tableid); + } + + --ac; + ++av; + } +} + +static void +table_show(int ac, char **av) +{ + struct ipfw_ioc_tblcont *cont = NULL; + int tableid, count = 128, i, uwidth = 0, lwidth = 0; + char *eptr; + + --ac; + ++av; + if (ac == 0) + errx(EX_DATAERR, "missing table id"); + + tableid = strtoul(*av, &eptr, 0); + if (*eptr != '\0') + errx(EX_DATAERR, "invalid table id %s", *av); + + for (;;) { + socklen_t len; + + len = __offsetof(struct ipfw_ioc_tblcont, ent[count]); + cont = reallocf(cont, len); + cont->tableid = tableid; + + if (getsockopt(s, IPPROTO_IP, IP_FW_TBL_GET, cont, &len) < 0) { + if (errno == E2BIG) { + count *= 2; + continue; + } + err(EX_UNAVAILABLE, "getsockopt(IP_FW_TBL_GET)"); + } + break; + } + if (cont->entcnt == 0) + return; + + if (do_acct) { + for (i = 0; i < cont->entcnt; ++i) { + int width; + + width = snprintf(NULL, 0, "%ju", + (uintmax_t)cont->ent[i].use); + if (width > uwidth) + uwidth = width; + } + } + + for (i = 0; i < cont->entcnt; ++i) { + const struct ipfw_ioc_tblent *te = &cont->ent[i]; + char addr[INET_ADDRSTRLEN]; + + if (do_acct) + printf("%*ju ", uwidth, (uintmax_t)te->use); + if (do_time) { + char timestr[30]; + + if (lwidth == 0) { + time_t t0 = 0; + + strcpy(timestr, ctime(&t0)); + *strchr(timestr, '\n') = '\0'; + lwidth = strlen(timestr); + } + if (te->last_used) { + time_t t = _long_to_time(te->last_used); + + strcpy(timestr, ctime(&t)); + *strchr(timestr, '\n') = '\0'; + printf("%s ", timestr); + } else { + printf("%*s ", lwidth, " "); + } + } + + if (te->netmask.sin_len == 0) { + printf("%s\n", inet_ntop(AF_INET, + &te->key.sin_addr, addr, sizeof(addr))); + } else { + struct sockaddr_in mask; + int b; + + memset(&mask, 0, sizeof(mask)); + memcpy(&mask, &te->netmask, + te->netmask.sin_len); + b = ffs(ntohl(te->netmask.sin_addr.s_addr)); + b = 32 - (b - 1); + + printf("%s/%d\n", inet_ntop(AF_INET, + &te->key.sin_addr, addr, sizeof(addr)), b); + } + } +} + +static void +table_zero(int ac, char **av) +{ + struct ipfw_ioc_table tbl; + + --ac; + ++av; + + memset(&tbl, 0, sizeof(tbl)); + if (ac == 0) { + tbl.tableid = -1; + } else { + char *eptr; + + tbl.tableid = strtoul(*av, &eptr, 0); + if (*eptr != '\0') + errx(EX_DATAERR, "invalid table id %s", *av); + } + + if (setsockopt(s, IPPROTO_IP, IP_FW_TBL_ZERO, &tbl, sizeof(tbl)) < 0) + err(EX_UNAVAILABLE, "setsockopt(IP_FW_TBL_ZERO)"); + if (!do_quiet) + printf("Accounting cleared\n"); +} + +static void +table_expire(int ac, char **av) +{ + struct ipfw_ioc_tblexp tbl; + char *eptr; + socklen_t len; + + --ac; + ++av; + + memset(&tbl, 0, sizeof(tbl)); + if (ac == 0) { + errx(EX_DATAERR, "missing expire time"); + } else if (ac == 1) { + tbl.tableid = -1; + } else { + tbl.tableid = strtoul(*av, &eptr, 0); + if (*eptr != '\0') + errx(EX_DATAERR, "invalid table id %s", *av); + --ac; + ++av; + } + + tbl.expire = strtoul(*av, &eptr, 0); + if (*eptr != '\0') + errx(EX_DATAERR, "invalid expire timeout %s", *av); + + if (!do_force && !do_quiet) { /* need to ask user */ + int c; + + printf("Are you sure? [yn] "); + fflush(stdout); + do { + c = toupper(getc(stdin)); + while (c != '\n' && getc(stdin) != '\n') + if (feof(stdin)) + return; /* and do not flush */ + } while (c != 'Y' && c != 'N'); + printf("\n"); + if (c == 'N') /* user said no */ + return; + } + + len = sizeof(tbl); + if (getsockopt(s, IPPROTO_IP, IP_FW_TBL_EXPIRE, &tbl, &len) < 0) + err(EX_UNAVAILABLE, "getsockopt(IP_FW_TBL_EXPIRE)"); + if (!do_quiet) { + printf("Expired %d address%s\n", tbl.expcnt, + (tbl.expcnt == 0 || tbl.expcnt > 1) ? "es" : ""); + } +} + static int ipfw_main(int ac, char **av) { @@ -3428,7 +3867,7 @@ ipfw_main(int ac, char **av) NEED1("bad arguments, for usage summary ``ipfw''"); /* - * optional: pipe or queue + * optional: pipe, queue or table */ if (!strncmp(*av, "pipe", strlen(*av))) { do_pipe = 1; @@ -3438,19 +3877,50 @@ ipfw_main(int ac, char **av) do_pipe = 2; ac--; av++; + } else if (!strncmp(*av, "table", strlen(*av))) { + do_table = 1; + ac--; + av++; } NEED1("missing command"); /* - * for pipes and queues we normally say 'pipe NN config' - * but the code is easier to parse as 'pipe config NN' - * so we swap the two arguments. + * for pipes, queues and table we normally say 'pipe NN config' + * but the code is easier to parse as 'pipe config NN' so we + * swap the two arguments. */ - if (do_pipe > 0 && ac > 1 && *av[0] >= '0' && *av[0] <= '9') { + if ((do_pipe > 0 || do_table > 0) && ac > 1 && + *av[0] >= '0' && *av[0] <= '9') { char *p = av[0]; av[0] = av[1]; av[1] = p; } + if (do_table) { + if (!strncmp(*av, "create", strlen(*av))) + table_create(ac, av); + else if (!strncmp(*av, "destroy", strlen(*av))) + table_flush(ac, av, IP_FW_TBL_DESTROY); + else if (!strncmp(*av, "list", strlen(*av))) + table_list(); + else if (!strncmp(*av, "add", strlen(*av))) + table_alt(ac, av, IP_FW_TBL_ADD); + else if (!strncmp(*av, "delete", strlen(*av))) + table_alt(ac, av, IP_FW_TBL_DEL); + else if (!strncmp(*av, "flush", strlen(*av))) + table_flush(ac, av, IP_FW_TBL_FLUSH); + else if (!strncmp(*av, "print", strlen(*av))) + table_show(ac, av); + else if (!strncmp(*av, "show", strlen(*av))) { + do_acct++; + table_show(ac, av); + } else if (!strncmp(*av, "zero", strlen(*av))) + table_zero(ac, av); + else if (!strncmp(*av, "expire", strlen(*av))) + table_expire(ac, av); + else + errx(EX_USAGE, "bad command `%s'", *av); + return 0; + } if (!strncmp(*av, "add", strlen(*av))) add(ac, av); else if (do_pipe && !strncmp(*av, "config", strlen(*av))) diff --git a/sys/net/ipfw/ip_fw2.c b/sys/net/ipfw/ip_fw2.c index 93c2de5dcf..4188b08b24 100644 --- a/sys/net/ipfw/ip_fw2.c +++ b/sys/net/ipfw/ip_fw2.c @@ -245,6 +245,8 @@ do { \ #define IPFW_AUTOINC_STEP_MAX 1000 #define IPFW_AUTOINC_STEP_DEF 100 +#define IPFW_TABLE_MAX_DEF 64 + #define IPFW_DEFAULT_RULE 65535 /* rulenum for the default rule */ #define IPFW_DEFAULT_SET 31 /* set number for the default rule */ @@ -301,6 +303,35 @@ struct netmsg_cpstate { int state_cnt; }; +struct netmsg_tblent { + struct netmsg_base base; + struct sockaddr *key; + struct sockaddr *netmask; + struct ipfw_tblent *sibling; + int tableid; +}; + +struct netmsg_tblflush { + struct netmsg_base base; + int tableid; + int destroy; +}; + +struct netmsg_tblexp { + struct netmsg_base base; + time_t expire; + int tableid; + int cnt; + int expcnt; + struct radix_node_head *rnh; +}; + +struct ipfw_table_cp { + struct ipfw_ioc_tblent *te; + int te_idx; + int te_cnt; +}; + struct ipfw_addrs { uint32_t addr1; uint32_t addr2; @@ -419,6 +450,15 @@ struct ipfw_state { TAILQ_HEAD(ipfw_state_list, ipfw_state); RB_HEAD(ipfw_state_tree, ipfw_state); +struct ipfw_tblent { + struct radix_node te_nodes[2]; + struct sockaddr_in te_key; + u_long te_use; + time_t te_lastuse; + struct ipfw_tblent *te_sibling; + volatile int te_expired; +}; + struct ipfw_context { struct ip_fw *ipfw_layer3_chain; /* rules for layer3 */ struct ip_fw *ipfw_default_rule; /* default rule */ @@ -480,6 +520,9 @@ struct ipfw_context { u_long ipfw_tks_reapfailed; u_long ipfw_tks_overflow; u_long ipfw_tks_cntnomem; + + /* Last field */ + struct radix_node_head *ipfw_tables[]; }; #define IPFW_FLAG_KEEPALIVE 0x01 @@ -533,9 +576,13 @@ static int verbose_limit; static int fw_debug; static int autoinc_step = IPFW_AUTOINC_STEP_DEF; +static int ipfw_table_max = IPFW_TABLE_MAX_DEF; + static int ipfw_sysctl_enable(SYSCTL_HANDLER_ARGS); static int ipfw_sysctl_autoinc_step(SYSCTL_HANDLER_ARGS); +TUNABLE_INT("net.inet.ip.fw.table_max", &ipfw_table_max); + SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); SYSCTL_NODE(_net_inet_ip_fw, OID_AUTO, stats, CTLFLAG_RW, 0, "Firewall statistics"); @@ -554,6 +601,8 @@ SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose, CTLFLAG_RW, &fw_verbose, 0, "Log matches to ipfw rules"); SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_RW, &verbose_limit, 0, "Set upper limit of matches of ipfw rules logged"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, table_max, CTLFLAG_RD, + &ipfw_table_max, 0, "Max # of tables"); static int ipfw_sysctl_dyncnt(SYSCTL_HANDLER_ARGS); static int ipfw_sysctl_dynmax(SYSCTL_HANDLER_ARGS); @@ -733,6 +782,26 @@ static int ipfw_state_expire_start(struct ipfw_context *, #define IPFW_TRKCNT_TOKINIT \ lwkt_token_init(&ipfw_gd.ipfw_trkcnt_token, "ipfw_trkcnt"); +static void +sa_maskedcopy(const struct sockaddr *src, struct sockaddr *dst, + const struct sockaddr *netmask) +{ + const u_char *cp1 = (const u_char *)src; + u_char *cp2 = (u_char *)dst; + const u_char *cp3 = (const u_char *)netmask; + u_char *cplim = cp2 + *cp3; + u_char *cplim2 = cp2 + *cp1; + + *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */ + cp3 += 2; + if (cplim > cplim2) + cplim = cplim2; + while (cp2 < cplim) + *cp2++ = *cp1++ & *cp3++; + if (cp2 < cplim2) + bzero(cp2, cplim2 - cp2); +} + static __inline void ipfw_key_build(struct ipfw_key *key, in_addr_t saddr, uint16_t sport, in_addr_t daddr, uint16_t dport, uint8_t proto) @@ -2391,6 +2460,33 @@ ipfw_state_install(struct ipfw_context *ctx, struct ip_fw *rule, return (0); } +static int +ipfw_table_lookup(struct ipfw_context *ctx, uint16_t tableid, + const struct in_addr *in) +{ + struct radix_node_head *rnh; + struct sockaddr_in sin; + struct ipfw_tblent *te; + + KASSERT(tableid < ipfw_table_max, ("invalid tableid %u", tableid)); + rnh = ctx->ipfw_tables[tableid]; + if (rnh == NULL) + return (0); /* no match */ + + memset(&sin, 0, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_len = sizeof(sin); + sin.sin_addr = *in; + + te = (struct ipfw_tblent *)rnh->rnh_matchaddr((char *)&sin, rnh); + if (te == NULL) + return (0); /* no match */ + + te->te_use++; + te->te_lastuse = time_second; + return (1); /* match */ +} + /* * Transmit a TCP packet, containing either a RST or a keepalive. * When flags & TH_RST, we are sending a RST packet, because of a @@ -3014,6 +3110,11 @@ check_body: } break; + case O_IP_SRC_TABLE: + match = ipfw_table_lookup(ctx, cmd->arg1, + &src_ip); + break; + case O_IP_DST_SET: case O_IP_SRC_SET: if (hlen > 0) { @@ -3055,6 +3156,11 @@ check_body: } break; + case O_IP_DST_TABLE: + match = ipfw_table_lookup(ctx, cmd->arg1, + &dst_ip); + break; + case O_IP_SRCPORT: case O_IP_DSTPORT: /* @@ -4333,6 +4439,17 @@ ipfw_check_ioc_rule(struct ipfw_ioc_rule *rule, int size, uint32_t *rule_flags) goto bad_size; break; + case O_IP_SRC_TABLE: + case O_IP_DST_TABLE: + if (cmdlen != F_INSN_SIZE(ipfw_insn)) + goto bad_size; + if (cmd->arg1 >= ipfw_table_max) { + kprintf("ipfw: invalid table id %u, max %d\n", + cmd->arg1, ipfw_table_max); + return EINVAL; + } + break; + case O_UID: case O_GID: case O_IP_SRC: @@ -4780,6 +4897,620 @@ ipfw_ctl_set_disable(uint32_t disable, uint32_t enable) netisr_domsg_global(&nmsg); } +static void +ipfw_table_create_dispatch(netmsg_t nm) +{ + struct ipfw_context *ctx = ipfw_ctx[mycpuid]; + int tblid = nm->lmsg.u.ms_result; + + ASSERT_NETISR_NCPUS(mycpuid); + + if (!rn_inithead((void **)&ctx->ipfw_tables[tblid], + rn_cpumaskhead(mycpuid), 32)) + panic("ipfw: create table%d failed", tblid); + + netisr_forwardmsg(&nm->base, mycpuid + 1); +} + +static int +ipfw_table_create(struct sockopt *sopt) +{ + struct ipfw_context *ctx = ipfw_ctx[mycpuid]; + struct ipfw_ioc_table *tbl; + struct netmsg_base nm; + + ASSERT_NETISR0; + + if (sopt->sopt_valsize != sizeof(*tbl)) + return (EINVAL); + + tbl = sopt->sopt_val; + if (tbl->tableid < 0 || tbl->tableid >= ipfw_table_max) + return (EINVAL); + + if (ctx->ipfw_tables[tbl->tableid] != NULL) + return (EEXIST); + + netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY, + ipfw_table_create_dispatch); + nm.lmsg.u.ms_result = tbl->tableid; + netisr_domsg_global(&nm); + + return (0); +} + +static void +ipfw_table_killrn(struct radix_node_head *rnh, struct radix_node *rn) +{ + struct radix_node *ret; + + ret = rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh); + if (ret != rn) + panic("deleted other table entry"); + kfree(ret, M_IPFW); +} + +static int +ipfw_table_killent(struct radix_node *rn, void *xrnh) +{ + + ipfw_table_killrn(xrnh, rn); + return (0); +} + +static void +ipfw_table_flush_oncpu(struct ipfw_context *ctx, int tableid, + int destroy) +{ + struct radix_node_head *rnh; + + ASSERT_NETISR_NCPUS(mycpuid); + + rnh = ctx->ipfw_tables[tableid]; + rnh->rnh_walktree(rnh, ipfw_table_killent, rnh); + if (destroy) { + Free(rnh); + ctx->ipfw_tables[tableid] = NULL; + } +} + +static void +ipfw_table_flush_dispatch(netmsg_t nmsg) +{ + struct netmsg_tblflush *nm = (struct netmsg_tblflush *)nmsg; + struct ipfw_context *ctx = ipfw_ctx[mycpuid]; + + ASSERT_NETISR_NCPUS(mycpuid); + + ipfw_table_flush_oncpu(ctx, nm->tableid, nm->destroy); + netisr_forwardmsg(&nm->base, mycpuid + 1); +} + +static void +ipfw_table_flushall_oncpu(struct ipfw_context *ctx, int destroy) +{ + int i; + + ASSERT_NETISR_NCPUS(mycpuid); + + for (i = 0; i < ipfw_table_max; ++i) { + if (ctx->ipfw_tables[i] != NULL) + ipfw_table_flush_oncpu(ctx, i, destroy); + } +} + +static void +ipfw_table_flushall_dispatch(netmsg_t nmsg) +{ + struct ipfw_context *ctx = ipfw_ctx[mycpuid]; + + ASSERT_NETISR_NCPUS(mycpuid); + + ipfw_table_flushall_oncpu(ctx, 0); + netisr_forwardmsg(&nmsg->base, mycpuid + 1); +} + +static int +ipfw_table_flush(struct sockopt *sopt) +{ + struct ipfw_context *ctx = ipfw_ctx[mycpuid]; + struct ipfw_ioc_table *tbl; + struct netmsg_tblflush nm; + + ASSERT_NETISR0; + + if (sopt->sopt_valsize != sizeof(*tbl)) + return (EINVAL); + + tbl = sopt->sopt_val; + if (sopt->sopt_name == IP_FW_TBL_FLUSH && tbl->tableid < 0) { + netmsg_init(&nm.base, NULL, &curthread->td_msgport, + MSGF_PRIORITY, ipfw_table_flushall_dispatch); + netisr_domsg_global(&nm.base); + return (0); + } + + if (tbl->tableid < 0 || tbl->tableid >= ipfw_table_max) + return (EINVAL); + + if (ctx->ipfw_tables[tbl->tableid] == NULL) + return (ENOENT); + + netmsg_init(&nm.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, + ipfw_table_flush_dispatch); + nm.tableid = tbl->tableid; + nm.destroy = 0; + if (sopt->sopt_name == IP_FW_TBL_DESTROY) + nm.destroy = 1; + netisr_domsg_global(&nm.base); + + return (0); +} + +static int +ipfw_table_cntent(struct radix_node *rn __unused, void *xcnt) +{ + int *cnt = xcnt; + + (*cnt)++; + return (0); +} + +static int +ipfw_table_cpent(struct radix_node *rn, void *xcp) +{ + struct ipfw_table_cp *cp = xcp; + struct ipfw_tblent *te = (struct ipfw_tblent *)rn; + struct ipfw_ioc_tblent *ioc_te; +#ifdef INVARIANTS + int cnt; +#endif + + KASSERT(cp->te_idx < cp->te_cnt, ("invalid table cp idx %d, cnt %d", + cp->te_idx, cp->te_cnt)); + ioc_te = &cp->te[cp->te_idx]; + + if (te->te_nodes->rn_mask != NULL) { + memcpy(&ioc_te->netmask, te->te_nodes->rn_mask, + *te->te_nodes->rn_mask); + } else { + ioc_te->netmask.sin_len = 0; + } + memcpy(&ioc_te->key, &te->te_key, sizeof(ioc_te->key)); + + ioc_te->use = te->te_use; + ioc_te->last_used = te->te_lastuse; +#ifdef INVARIANTS + cnt = 1; +#endif + + while ((te = te->te_sibling) != NULL) { +#ifdef INVARIANTS + ++cnt; +#endif + ioc_te->use += te->te_use; + if (te->te_lastuse > ioc_te->last_used) + ioc_te->last_used = te->te_lastuse; + } + KASSERT(cnt == netisr_ncpus, + ("invalid # of tblent %d, should be %d", cnt, netisr_ncpus)); + + cp->te_idx++; + + return (0); +} + +static int +ipfw_table_get(struct sockopt *sopt) +{ + struct ipfw_context *ctx = ipfw_ctx[mycpuid]; + struct radix_node_head *rnh; + struct ipfw_ioc_table *tbl; + struct ipfw_ioc_tblcont *cont; + struct ipfw_table_cp cp; + int cnt = 0, sz; + + ASSERT_NETISR0; + + if (sopt->sopt_valsize < sizeof(*tbl)) + return (EINVAL); + + tbl = sopt->sopt_val; + if (tbl->tableid < 0) { + struct ipfw_ioc_tbllist *list; + int i; + + /* + * List available table ids. + */ + for (i = 0; i < ipfw_table_max; ++i) { + if (ctx->ipfw_tables[i] != NULL) + ++cnt; + } + + sz = __offsetof(struct ipfw_ioc_tbllist, tables[cnt]); + if (sopt->sopt_valsize < sz) { + bzero(sopt->sopt_val, sopt->sopt_valsize); + return (E2BIG); + } + list = sopt->sopt_val; + list->tablecnt = cnt; + + cnt = 0; + for (i = 0; i < ipfw_table_max; ++i) { + if (ctx->ipfw_tables[i] != NULL) { + KASSERT(cnt < list->tablecnt, + ("invalid idx %d, cnt %d", + cnt, list->tablecnt)); + list->tables[cnt++] = i; + } + } + sopt->sopt_valsize = sz; + return (0); + } else if (tbl->tableid >= ipfw_table_max) { + return (EINVAL); + } + + rnh = ctx->ipfw_tables[tbl->tableid]; + if (rnh == NULL) + return (ENOENT); + rnh->rnh_walktree(rnh, ipfw_table_cntent, &cnt); + + sz = __offsetof(struct ipfw_ioc_tblcont, ent[cnt]); + if (sopt->sopt_valsize < sz) { + bzero(sopt->sopt_val, sopt->sopt_valsize); + return (E2BIG); + } + cont = sopt->sopt_val; + cont->entcnt = cnt; + + cp.te = cont->ent; + cp.te_idx = 0; + cp.te_cnt = cnt; + rnh->rnh_walktree(rnh, ipfw_table_cpent, &cp); + + sopt->sopt_valsize = sz; + return (0); +} + +static void +ipfw_table_add_dispatch(netmsg_t nmsg) +{ + struct netmsg_tblent *nm = (struct netmsg_tblent *)nmsg; + struct ipfw_context *ctx = ipfw_ctx[mycpuid]; + struct radix_node_head *rnh; + struct ipfw_tblent *te; + + ASSERT_NETISR_NCPUS(mycpuid); + + rnh = ctx->ipfw_tables[nm->tableid]; + + te = kmalloc(sizeof(*te), M_IPFW, M_WAITOK | M_ZERO); + te->te_nodes->rn_key = (char *)&te->te_key; + memcpy(&te->te_key, nm->key, sizeof(te->te_key)); + + if (rnh->rnh_addaddr((char *)&te->te_key, (char *)nm->netmask, rnh, + te->te_nodes) == NULL) { + if (mycpuid == 0) { + kfree(te, M_IPFW); + netisr_replymsg(&nm->base, EEXIST); + return; + } + panic("rnh_addaddr failed"); + } + + /* Link siblings. */ + if (nm->sibling != NULL) + nm->sibling->te_sibling = te; + nm->sibling = te; + + netisr_forwardmsg(&nm->base, mycpuid + 1); +} + +static void +ipfw_table_del_dispatch(netmsg_t nmsg) +{ + struct netmsg_tblent *nm = (struct netmsg_tblent *)nmsg; + struct ipfw_context *ctx = ipfw_ctx[mycpuid]; + struct radix_node_head *rnh; + struct radix_node *rn; + + ASSERT_NETISR_NCPUS(mycpuid); + + rnh = ctx->ipfw_tables[nm->tableid]; + rn = rnh->rnh_deladdr((char *)nm->key, (char *)nm->netmask, rnh); + if (rn == NULL) { + if (mycpuid == 0) { + netisr_replymsg(&nm->base, ESRCH); + return; + } + panic("rnh_deladdr failed"); + } + kfree(rn, M_IPFW); + + netisr_forwardmsg(&nm->base, mycpuid + 1); +} + +static int +ipfw_table_alt(struct sockopt *sopt) +{ + struct ipfw_context *ctx = ipfw_ctx[mycpuid]; + struct ipfw_ioc_tblcont *tbl; + struct ipfw_ioc_tblent *te; + struct sockaddr_in key0; + struct sockaddr *netmask = NULL, *key; + struct netmsg_tblent nm; + + ASSERT_NETISR0; + + if (sopt->sopt_valsize != sizeof(*tbl)) + return (EINVAL); + tbl = sopt->sopt_val; + + if (tbl->tableid < 0 || tbl->tableid >= ipfw_table_max) + return (EINVAL); + if (tbl->entcnt != 1) + return (EINVAL); + + if (ctx->ipfw_tables[tbl->tableid] == NULL) + return (ENOENT); + te = &tbl->ent[0]; + + if (te->key.sin_family != AF_INET || + te->key.sin_port != 0 || + te->key.sin_len != sizeof(struct sockaddr_in)) + return (EINVAL); + key = (struct sockaddr *)&te->key; + + if (te->netmask.sin_len != 0) { + if (te->netmask.sin_port != 0 || + te->netmask.sin_len > sizeof(struct sockaddr_in)) + return (EINVAL); + netmask = (struct sockaddr *)&te->netmask; + sa_maskedcopy(key, (struct sockaddr *)&key0, netmask); + key = (struct sockaddr *)&key0; + } + + if (sopt->sopt_name == IP_FW_TBL_ADD) { + netmsg_init(&nm.base, NULL, &curthread->td_msgport, + MSGF_PRIORITY, ipfw_table_add_dispatch); + } else { + netmsg_init(&nm.base, NULL, &curthread->td_msgport, + MSGF_PRIORITY, ipfw_table_del_dispatch); + } + nm.key = key; + nm.netmask = netmask; + nm.tableid = tbl->tableid; + nm.sibling = NULL; + return (netisr_domsg_global(&nm.base)); +} + +static int +ipfw_table_zeroent(struct radix_node *rn, void *arg __unused) +{ + struct ipfw_tblent *te = (struct ipfw_tblent *)rn; + + te->te_use = 0; + te->te_lastuse = 0; + return (0); +} + +static void +ipfw_table_zero_dispatch(netmsg_t nmsg) +{ + struct ipfw_context *ctx = ipfw_ctx[mycpuid]; + struct radix_node_head *rnh; + + ASSERT_NETISR_NCPUS(mycpuid); + + rnh = ctx->ipfw_tables[nmsg->lmsg.u.ms_result]; + rnh->rnh_walktree(rnh, ipfw_table_zeroent, NULL); + + netisr_forwardmsg(&nmsg->base, mycpuid + 1); +} + +static void +ipfw_table_zeroall_dispatch(netmsg_t nmsg) +{ + struct ipfw_context *ctx = ipfw_ctx[mycpuid]; + int i; + + ASSERT_NETISR_NCPUS(mycpuid); + + for (i = 0; i < ipfw_table_max; ++i) { + struct radix_node_head *rnh = ctx->ipfw_tables[i]; + + if (rnh != NULL) + rnh->rnh_walktree(rnh, ipfw_table_zeroent, NULL); + } + netisr_forwardmsg(&nmsg->base, mycpuid + 1); +} + +static int +ipfw_table_zero(struct sockopt *sopt) +{ + struct ipfw_context *ctx = ipfw_ctx[mycpuid]; + struct netmsg_base nm; + struct ipfw_ioc_table *tbl; + + ASSERT_NETISR0; + + if (sopt->sopt_valsize != sizeof(*tbl)) + return (EINVAL); + tbl = sopt->sopt_val; + + if (tbl->tableid < 0) { + netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY, + ipfw_table_zeroall_dispatch); + netisr_domsg_global(&nm); + return (0); + } else if (tbl->tableid >= ipfw_table_max) { + return (EINVAL); + } else if (ctx->ipfw_tables[tbl->tableid] == NULL) { + return (ENOENT); + } + + netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY, + ipfw_table_zero_dispatch); + nm.lmsg.u.ms_result = tbl->tableid; + netisr_domsg_global(&nm); + + return (0); +} + +static int +ipfw_table_killexp(struct radix_node *rn, void *xnm) +{ + struct netmsg_tblexp *nm = xnm; + struct ipfw_tblent *te = (struct ipfw_tblent *)rn; + + if (te->te_expired) { + ipfw_table_killrn(nm->rnh, rn); + nm->expcnt++; + } + return (0); +} + +static void +ipfw_table_expire_dispatch(netmsg_t nmsg) +{ + struct netmsg_tblexp *nm = (struct netmsg_tblexp *)nmsg; + struct ipfw_context *ctx = ipfw_ctx[mycpuid]; + struct radix_node_head *rnh; + + ASSERT_NETISR_NCPUS(mycpuid); + + rnh = ctx->ipfw_tables[nm->tableid]; + nm->rnh = rnh; + rnh->rnh_walktree(rnh, ipfw_table_killexp, nm); + + KASSERT(nm->expcnt == nm->cnt * (mycpuid + 1), + ("not all expired addresses (%d) were deleted (%d)", + nm->cnt * (mycpuid + 1), nm->expcnt)); + + netisr_forwardmsg(&nm->base, mycpuid + 1); +} + +static void +ipfw_table_expireall_dispatch(netmsg_t nmsg) +{ + struct netmsg_tblexp *nm = (struct netmsg_tblexp *)nmsg; + struct ipfw_context *ctx = ipfw_ctx[mycpuid]; + int i; + + ASSERT_NETISR_NCPUS(mycpuid); + + for (i = 0; i < ipfw_table_max; ++i) { + struct radix_node_head *rnh = ctx->ipfw_tables[i]; + + if (rnh == NULL) + continue; + nm->rnh = rnh; + rnh->rnh_walktree(rnh, ipfw_table_killexp, nm); + } + + KASSERT(nm->expcnt == nm->cnt * (mycpuid + 1), + ("not all expired addresses (%d) were deleted (%d)", + nm->cnt * (mycpuid + 1), nm->expcnt)); + + netisr_forwardmsg(&nm->base, mycpuid + 1); +} + +static int +ipfw_table_markexp(struct radix_node *rn, void *xnm) +{ + struct netmsg_tblexp *nm = xnm; + struct ipfw_tblent *te; + time_t lastuse; + + te = (struct ipfw_tblent *)rn; + lastuse = te->te_lastuse; + + while ((te = te->te_sibling) != NULL) { + if (te->te_lastuse > lastuse) + lastuse = te->te_lastuse; + } + if (!TIME_LEQ(lastuse + nm->expire, time_second)) { + /* Not expired */ + return (0); + } + + te = (struct ipfw_tblent *)rn; + te->te_expired = 1; + while ((te = te->te_sibling) != NULL) + te->te_expired = 1; + nm->cnt++; + + return (0); +} + +static int +ipfw_table_expire(struct sockopt *sopt) +{ + struct ipfw_context *ctx = ipfw_ctx[mycpuid]; + struct netmsg_tblexp nm; + struct ipfw_ioc_tblexp *tbl; + struct radix_node_head *rnh; + + ASSERT_NETISR0; + + if (sopt->sopt_valsize != sizeof(*tbl)) + return (EINVAL); + tbl = sopt->sopt_val; + tbl->expcnt = 0; + + nm.expcnt = 0; + nm.cnt = 0; + nm.expire = tbl->expire; + + if (tbl->tableid < 0) { + int i; + + for (i = 0; i < ipfw_table_max; ++i) { + rnh = ctx->ipfw_tables[i]; + if (rnh == NULL) + continue; + rnh->rnh_walktree(rnh, ipfw_table_markexp, &nm); + } + if (nm.cnt == 0) { + /* No addresses can be expired. */ + return (0); + } + tbl->expcnt = nm.cnt; + + netmsg_init(&nm.base, NULL, &curthread->td_msgport, + MSGF_PRIORITY, ipfw_table_expireall_dispatch); + nm.tableid = -1; + netisr_domsg_global(&nm.base); + KASSERT(nm.expcnt == nm.cnt * netisr_ncpus, + ("not all expired addresses (%d) were deleted (%d)", + nm.cnt * netisr_ncpus, nm.expcnt)); + + return (0); + } else if (tbl->tableid >= ipfw_table_max) { + return (EINVAL); + } + + rnh = ctx->ipfw_tables[tbl->tableid]; + if (rnh == NULL) + return (ENOENT); + rnh->rnh_walktree(rnh, ipfw_table_markexp, &nm); + if (nm.cnt == 0) { + /* No addresses can be expired. */ + return (0); + } + tbl->expcnt = nm.cnt; + + netmsg_init(&nm.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, + ipfw_table_expire_dispatch); + nm.tableid = tbl->tableid; + netisr_domsg_global(&nm.base); + KASSERT(nm.expcnt == nm.cnt * netisr_ncpus, + ("not all expired addresses (%d) were deleted (%d)", + nm.cnt * netisr_ncpus, nm.expcnt)); + return (0); +} + /* * {set|get}sockopt parser. */ @@ -4851,6 +5582,32 @@ ipfw_ctl(struct sockopt *sopt) sopt->sopt_name == IP_FW_RESETLOG); break; + case IP_FW_TBL_CREATE: + error = ipfw_table_create(sopt); + break; + + case IP_FW_TBL_ADD: + case IP_FW_TBL_DEL: + error = ipfw_table_alt(sopt); + break; + + case IP_FW_TBL_FLUSH: + case IP_FW_TBL_DESTROY: + error = ipfw_table_flush(sopt); + break; + + case IP_FW_TBL_GET: + error = ipfw_table_get(sopt); + break; + + case IP_FW_TBL_ZERO: + error = ipfw_table_zero(sopt); + break; + + case IP_FW_TBL_EXPIRE: + error = ipfw_table_expire(sopt); + break; + default: kprintf("ipfw_ctl invalid option %d\n", sopt->sopt_name); error = EINVAL; @@ -5322,7 +6079,8 @@ ipfw_ctx_init_dispatch(netmsg_t nmsg) ASSERT_NETISR_NCPUS(mycpuid); - ctx = kmalloc(sizeof(*ctx), M_IPFW, M_WAITOK | M_ZERO); + ctx = kmalloc(__offsetof(struct ipfw_context, + ipfw_tables[ipfw_table_max]), M_IPFW, M_WAITOK | M_ZERO); RB_INIT(&ctx->ipfw_state_tree); TAILQ_INIT(&ctx->ipfw_state_list); @@ -5400,6 +6158,9 @@ ipfw_init_dispatch(netmsg_t nmsg) goto reply; } + if (ipfw_table_max > UINT16_MAX || ipfw_table_max <= 0) + ipfw_table_max = UINT16_MAX; + /* Initialize global track tree. */ RB_INIT(&ipfw_gd.ipfw_trkcnt_tree); IPFW_TRKCNT_TOKINIT; @@ -5483,6 +6244,8 @@ ipfw_ctx_fini_dispatch(netmsg_t nmsg) netisr_dropmsg(&ctx->ipfw_keepalive_nm); crit_exit(); + ipfw_table_flushall_oncpu(ctx, 1); + netisr_forwardmsg(&nmsg->base, mycpuid + 1); } diff --git a/sys/net/ipfw/ip_fw2.h b/sys/net/ipfw/ip_fw2.h index 275bb61377..9245315aeb 100644 --- a/sys/net/ipfw/ip_fw2.h +++ b/sys/net/ipfw/ip_fw2.h @@ -112,6 +112,8 @@ enum ipfw_opcodes { /* arguments (4 byte each) */ O_TEE, /* arg1=port number */ O_FORWARD_IP, /* fwd sockaddr */ O_FORWARD_MAC, /* fwd mac */ + O_IP_SRC_TABLE, /* arg1 = tableid */ + O_IP_DST_TABLE, /* arg1 = tableid */ O_LAST_OPCODE /* not an opcode! */ }; @@ -464,4 +466,55 @@ struct ipfw_ioc_state { #define ICMP_REJECT_RST 0x100 /* fake ICMP code (send a TCP RST) */ +/* + * IP_FW_TBL_CREATE, tableid >= 0. + * IP_FW_TBL_FLUSH, tableid >= 0. + * IP_FW_TBL_FLUSH, tableid < 0, flush all tables. + * IP_FW_TBL_DESTROY, tableid >= 0. + * IP_FW_TBL_ZERO, tableid >= 0. + * IP_FW_TBL_ZERO, tableid < 0, zero all tables' counters. + */ +struct ipfw_ioc_table { + int tableid; +}; + +struct ipfw_ioc_tblent { + struct sockaddr_in key; + struct sockaddr_in netmask; + u_long use; + time_t last_used; + long unused[2]; +}; + +/* + * IP_FW_TBL_GET, tableid < 0, list of all tables. + */ +struct ipfw_ioc_tbllist { + int tableid; /* MUST be the first field */ + int tablecnt; + uint16_t tables[]; +}; + +/* + * IP_FW_TBL_GET, tableid >= 0, entries in the table. + * IP_FW_TBL_ADD, tableid >= 0, entcnt == 1. + * IP_FW_TBL_DEL, tableid >= 0, entcnt == 1. + */ +struct ipfw_ioc_tblcont { + int tableid; /* MUST be the first field */ + int entcnt; + struct ipfw_ioc_tblent ent[1]; +}; + +/* + * IP_FW_TBL_EXPIRE, tableid < 0, expire all tables. + * IP_FW_TBL_EXPIRE, tableid >= 0. + */ +struct ipfw_ioc_tblexp { + int tableid; + int expcnt; + time_t expire; + u_long unused1[2]; +}; + #endif /* _IPFW2_H */ diff --git a/sys/netinet/in.h b/sys/netinet/in.h index 46de27ad70..68108429c2 100644 --- a/sys/netinet/in.h +++ b/sys/netinet/in.h @@ -350,6 +350,15 @@ struct sockaddr_in { #define IP_IPSEC_POLICY 21 /* int; set/get security policy */ #define IP_FAITH 22 /* bool; accept FAITH'ed connections */ +#define IP_FW_TBL_CREATE 40 /* create ipfw table */ +#define IP_FW_TBL_DESTROY 41 /* destroy ipfw table */ +#define IP_FW_TBL_ADD 42 /* add network/host to ipfw table */ +#define IP_FW_TBL_DEL 43 /* delete network/host from ipfw table */ +#define IP_FW_TBL_FLUSH 44 /* flush ipfw table */ +#define IP_FW_TBL_GET 45 /* list/show ipfw table */ +#define IP_FW_TBL_ZERO 46 /* clear ipfw table counters */ +#define IP_FW_TBL_EXPIRE 47 /* expire addresses in ipfw table */ + #define IP_FW_X 49 /* ipfw2 firewall */ #define IP_FW_ADD 50 /* add a firewall rule to chain */ #define IP_FW_DEL 51 /* delete a firewall rule from chain */ diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c index a74c445545..ae0293fd26 100644 --- a/sys/netinet/raw_ip.c +++ b/sys/netinet/raw_ip.c @@ -377,6 +377,8 @@ rip_ctloutput(netmsg_t msg) case IP_FW_ADD: /* ADD actually returns the body... */ case IP_FW_GET: + case IP_FW_TBL_GET: + case IP_FW_TBL_EXPIRE: /* returns # of expired addresses */ error = ip_fw_sockopt(sopt); break; @@ -429,6 +431,13 @@ rip_ctloutput(netmsg_t msg) case IP_FW_FLUSH: case IP_FW_ZERO: case IP_FW_RESETLOG: + case IP_FW_TBL_CREATE: + case IP_FW_TBL_DESTROY: + case IP_FW_TBL_ADD: + case IP_FW_TBL_DEL: + case IP_FW_TBL_FLUSH: + case IP_FW_TBL_ZERO: + case IP_FW_TBL_EXPIRE: error = ip_fw_sockopt(sopt); break; -- 2.41.0