2 * Copyright (c) 2014 - 2018 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Bill Yuan <bycn82@dragonflybsd.org>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 #include <sys/param.h>
37 #include <sys/socket.h>
38 #include <sys/sockio.h>
39 #include <sys/sysctl.h>
43 #include <arpa/inet.h>
61 #include <netinet/in.h>
62 #include <netinet/in_systm.h>
63 #include <netinet/ip.h>
64 #include <netinet/ip_icmp.h>
65 #include <netinet/tcp.h>
67 #include <net/if_dl.h>
68 #include <net/route.h>
69 #include <net/ethernet.h>
71 #include <net/ipfw3/ip_fw3.h>
72 #include <net/ipfw3_basic/ip_fw3_table.h>
73 #include <net/ipfw3_basic/ip_fw3_sync.h>
74 #include <net/ipfw3_basic/ip_fw3_basic.h>
75 #include <net/ipfw3_nat/ip_fw3_nat.h>
76 #include <net/dummynet3/ip_dummynet3.h>
79 #include "ipfw3dummynet.h"
90 struct char_int_map dummynet_params[] = {
92 { "noerror", TOK_NOERROR },
93 { "buckets", TOK_BUCKETS },
94 { "dst-ip", TOK_DSTIP },
95 { "src-ip", TOK_SRCIP },
96 { "dst-port", TOK_DSTPORT },
97 { "src-port", TOK_SRCPORT },
98 { "proto", TOK_PROTO },
99 { "weight", TOK_WEIGHT },
101 { "mask", TOK_MASK },
102 { "droptail", TOK_DROPTAIL },
104 { "gred", TOK_GRED },
106 { "bandwidth", TOK_BW },
107 { "delay", TOK_DELAY },
108 { "pipe", TOK_PIPE },
109 { "queue", TOK_QUEUE },
110 { "dummynet-params", TOK_NULL },
116 sort_q(const void *pa, const void *pb)
118 int rev = (do_sort < 0);
119 int field = rev ? -do_sort : do_sort;
121 const struct dn_ioc_flowqueue *a = pa;
122 const struct dn_ioc_flowqueue *b = pb;
126 res = a->len - b->len;
129 res = a->len_bytes - b->len_bytes;
132 case 3: /* tot pkts */
133 res = a->tot_pkts - b->tot_pkts;
136 case 4: /* tot bytes */
137 res = a->tot_bytes - b->tot_bytes;
144 return (int)(rev ? res : -res);
150 * config dummynet pipe/queue
153 config_dummynet(int ac, char **av)
155 struct dn_ioc_pipe pipe;
162 memset(&pipe, 0, sizeof pipe);
164 if (ac && isdigit(**av)) {
176 int tok = match_token(dummynet_params, *av);
181 pipe.fs.flags_fs |= DN_NOERROR;
185 NEED1("plr needs argument 0..1\n");
186 d = strtod(av[0], NULL);
191 pipe.fs.plr = (int)(d*0x7fffffff);
196 NEED1("queue needs queue size\n");
198 pipe.fs.qsize = getbw(av[0], &pipe.fs.flags_fs, 1024);
203 NEED1("buckets needs argument\n");
204 pipe.fs.rq_size = strtoul(av[0], NULL, 0);
209 NEED1("mask needs mask specifier\n");
211 * per-flow queue, mask is dst_ip, dst_port,
212 * src_ip, src_port, proto measured in bits
216 pipe.fs.flow_mask.type = ETHERTYPE_IP;
217 pipe.fs.flow_mask.u.ip.dst_ip = 0;
218 pipe.fs.flow_mask.u.ip.src_ip = 0;
219 pipe.fs.flow_mask.u.ip.dst_port = 0;
220 pipe.fs.flow_mask.u.ip.src_port = 0;
221 pipe.fs.flow_mask.u.ip.proto = 0;
225 u_int32_t *p32 = NULL;
226 u_int16_t *p16 = NULL;
228 tok = match_token(dummynet_params, *av);
233 * special case, all bits significant
235 pipe.fs.flow_mask.u.ip.dst_ip = ~0;
236 pipe.fs.flow_mask.u.ip.src_ip = ~0;
237 pipe.fs.flow_mask.u.ip.dst_port = ~0;
238 pipe.fs.flow_mask.u.ip.src_port = ~0;
239 pipe.fs.flow_mask.u.ip.proto = ~0;
240 pipe.fs.flags_fs |= DN_HAVE_FLOW_MASK;
244 p32 = &pipe.fs.flow_mask.u.ip.dst_ip;
248 p32 = &pipe.fs.flow_mask.u.ip.src_ip;
252 p16 = &pipe.fs.flow_mask.u.ip.dst_port;
256 p16 = &pipe.fs.flow_mask.u.ip.src_port;
267 errx(EX_USAGE, "mask: value missing");
269 a = strtoul(av[0]+1, &end, 0);
270 a = (a == 32) ? ~0 : (1 << a) - 1;
272 a = strtoul(av[0], &end, 0);
275 else if (p16 != NULL) {
278 "mask: must be 16 bit");
283 "mask: must be 8 bit");
284 pipe.fs.flow_mask.u.ip.proto =
288 pipe.fs.flags_fs |= DN_HAVE_FLOW_MASK;
290 } /* end while, config masks */
297 NEED1("red/gred needs w_q/min_th/max_th/max_p\n");
298 pipe.fs.flags_fs |= DN_IS_RED;
300 pipe.fs.flags_fs |= DN_IS_GENTLE_RED;
302 * the format for parameters is w_q/min_th/max_th/max_p
304 if ((end = strsep(&av[0], "/"))) {
305 double w_q = strtod(end, NULL);
306 if (w_q > 1 || w_q <= 0)
307 errx(EX_DATAERR, "0 < w_q <= 1");
308 pipe.fs.w_q = (int) (w_q * (1 << SCALE_RED));
310 if ((end = strsep(&av[0], "/"))) {
311 pipe.fs.min_th = strtoul(end, &end, 0);
312 if (*end == 'K' || *end == 'k')
313 pipe.fs.min_th *= 1024;
315 if ((end = strsep(&av[0], "/"))) {
316 pipe.fs.max_th = strtoul(end, &end, 0);
317 if (*end == 'K' || *end == 'k')
318 pipe.fs.max_th *= 1024;
320 if ((end = strsep(&av[0], "/"))) {
321 double max_p = strtod(end, NULL);
322 if (max_p > 1 || max_p <= 0)
323 errx(EX_DATAERR, "0 < max_p <= 1");
324 pipe.fs.max_p = (int)(max_p * (1 << SCALE_RED));
330 pipe.fs.flags_fs &= ~(DN_IS_RED|DN_IS_GENTLE_RED);
334 NEED1("bw needs bandwidth\n");
337 "bandwidth only valid for pipes");
339 * set bandwidth value
341 pipe.bandwidth = getbw(av[0], NULL, 1000);
342 if (pipe.bandwidth < 0)
343 errx(EX_DATAERR, "bandwidth too large");
349 errx(EX_DATAERR, "delay only valid for pipes");
350 NEED1("delay needs argument 0..10000ms\n");
351 pipe.delay = strtoul(av[0], NULL, 0);
358 "weight only valid for queues");
359 NEED1("weight needs argument 0..100\n");
360 pipe.fs.weight = strtoul(av[0], &end, 0);
366 errx(EX_DATAERR, "pipe only valid for queues");
367 NEED1("pipe needs pipe_number\n");
368 pipe.fs.parent_nr = strtoul(av[0], &end, 0);
373 errx(EX_DATAERR, "unrecognised option ``%s''", *av);
377 if (pipe.pipe_nr == 0)
378 errx(EX_DATAERR, "pipe_nr must be > 0");
379 if (pipe.delay > 10000)
380 errx(EX_DATAERR, "delay must be < 10000");
381 } else { /* do_pipe == 2, queue */
382 if (pipe.fs.parent_nr == 0)
383 errx(EX_DATAERR, "pipe must be > 0");
384 if (pipe.fs.weight >100)
385 errx(EX_DATAERR, "weight must be <= 100");
387 if (pipe.fs.flags_fs & DN_QSIZE_IS_BYTES) {
388 if (pipe.fs.qsize > 1024*1024)
389 errx(EX_DATAERR, "queue size must be < 1MB");
391 if (pipe.fs.qsize > 100)
392 errx(EX_DATAERR, "2 <= queue size <= 100");
394 if (pipe.fs.flags_fs & DN_IS_RED) {
396 int lookup_depth, avg_pkt_size;
397 double s, idle, weight, w_q;
401 if (pipe.fs.min_th >= pipe.fs.max_th)
402 errx(EX_DATAERR, "min_th %d must be < than max_th %d",
403 pipe.fs.min_th, pipe.fs.max_th);
404 if (pipe.fs.max_th == 0)
405 errx(EX_DATAERR, "max_th must be > 0");
408 if (sysctlbyname("net.inet.ip.dummynet.red_lookup_depth",
409 &lookup_depth, &len, NULL, 0) == -1)
411 errx(1, "sysctlbyname(\"%s\")",
412 "net.inet.ip.dummynet.red_lookup_depth");
413 if (lookup_depth == 0)
414 errx(EX_DATAERR, "net.inet.ip.dummynet.red_lookup_depth"
415 " must be greater than zero");
418 if (sysctlbyname("net.inet.ip.dummynet.red_avg_pkt_size",
419 &avg_pkt_size, &len, NULL, 0) == -1)
421 errx(1, "sysctlbyname(\"%s\")",
422 "net.inet.ip.dummynet.red_avg_pkt_size");
423 if (avg_pkt_size == 0)
425 "net.inet.ip.dummynet.red_avg_pkt_size must"
426 " be greater than zero");
428 len = sizeof(clock_hz);
429 if (sysctlbyname("net.inet.ip.dummynet.hz", &clock_hz, &len,
431 errx(1, "sysctlbyname(\"%s\")",
432 "net.inet.ip.dummynet.hz");
436 * Ticks needed for sending a medium-sized packet.
437 * Unfortunately, when we are configuring a WF2Q+ queue, we
438 * do not have bandwidth information, because that is stored
439 * in the parent pipe, and also we have multiple queues
440 * competing for it. So we set s=0, which is not very
441 * correct. But on the other hand, why do we want RED with
444 if (pipe.bandwidth == 0) /* this is a WF2Q+ queue */
447 s = clock_hz * avg_pkt_size * 8 / pipe.bandwidth;
450 * max idle time (in ticks) before avg queue size becomes 0.
451 * NOTA: (3/w_q) is approx the value x so that
454 w_q = ((double)pipe.fs.w_q) / (1 << SCALE_RED);
456 pipe.fs.lookup_step = (int)idle / lookup_depth;
457 if (!pipe.fs.lookup_step)
458 pipe.fs.lookup_step = 1;
460 for (t = pipe.fs.lookup_step; t > 0; --t)
462 pipe.fs.lookup_weight = (int)(weight * (1 << SCALE_RED));
464 i = do_set_x(IP_DUMMYNET_CONFIGURE, &pipe, sizeof pipe);
466 err(1, "do_set_x(%s)", "IP_DUMMYNET_CONFIGURE");
471 show_dummynet(int ac, char *av[])
475 int nalloc = 1024; /* start somewhere... */
480 while (nbytes >= nalloc) {
481 nalloc = nalloc * 2 + 200;
483 if ((data = realloc(data, nbytes)) == NULL)
484 err(EX_OSERR, "realloc");
485 if (do_get_x(IP_DUMMYNET_GET, data, &nbytes) < 0) {
486 err(EX_OSERR, "do_get_x(IP_%s_GET)",
487 do_pipe ? "DUMMYNET" : "FW");
491 show_pipes(data, nbytes, ac, av);
496 show_pipes(void *data, int nbytes, int ac, char *av[])
500 struct dn_ioc_pipe *p = (struct dn_ioc_pipe *)data;
501 struct dn_ioc_flowset *fs;
502 struct dn_ioc_flowqueue *q;
506 rulenum = strtoul(*av++, NULL, 10);
509 for (; nbytes >= sizeof(*p); p = (struct dn_ioc_pipe *)next) {
510 double b = p->bandwidth;
514 if (p->fs.fs_type != DN_IS_PIPE)
515 break; /* done with pipes, now queues */
518 * compute length, as pipe have variable size
520 l = sizeof(*p) + p->fs.rq_elements * sizeof(*q);
521 next = (void *)p + l;
524 if (rulenum != 0 && rulenum != p->pipe_nr)
531 sprintf(buf, "unlimited");
532 else if (b >= 1000000)
533 sprintf(buf, "%7.3f Mbit/s", b/1000000);
535 sprintf(buf, "%7.3f Kbit/s", b/1000);
537 sprintf(buf, "%7.3f bit/s ", b);
539 sprintf(prefix, "%05d: %s %4d ms ",
540 p->pipe_nr, buf, p->delay);
541 show_flowset_parms(&p->fs, prefix);
543 printf(" V %20ju\n", (uintmax_t)p->V >> MY_M);
545 q = (struct dn_ioc_flowqueue *)(p+1);
546 show_queues(&p->fs, q);
549 for (fs = next; nbytes >= sizeof(*fs); fs = next) {
552 if (fs->fs_type != DN_IS_QUEUE)
554 l = sizeof(*fs) + fs->rq_elements * sizeof(*q);
555 next = (void *)fs + l;
557 q = (struct dn_ioc_flowqueue *)(fs+1);
558 sprintf(prefix, "q%05d: weight %d pipe %d ",
559 fs->fs_nr, fs->weight, fs->parent_nr);
560 show_flowset_parms(fs, prefix);
566 show_queues(struct dn_ioc_flowset *fs, struct dn_ioc_flowqueue *q)
570 printf("mask: 0x%02x 0x%08x/0x%04x -> 0x%08x/0x%04x\n",
571 fs->flow_mask.u.ip.proto,
572 fs->flow_mask.u.ip.src_ip, fs->flow_mask.u.ip.src_port,
573 fs->flow_mask.u.ip.dst_ip, fs->flow_mask.u.ip.dst_port);
574 if (fs->rq_elements == 0)
577 printf("BKT Prot ___Source IP/port____ "
578 "____Dest. IP/port____ Tot_pkt/bytes Pkt/Byte Drp\n");
580 heapsort(q, fs->rq_elements, sizeof(*q), sort_q);
581 for (l = 0; l < fs->rq_elements; l++) {
585 ina.s_addr = htonl(q[l].id.u.ip.src_ip);
586 printf("%3d ", q[l].hash_slot);
587 pe = getprotobynumber(q[l].id.u.ip.proto);
589 printf("%-4s ", pe->p_name);
591 printf("%4u ", q[l].id.u.ip.proto);
593 inet_ntoa(ina), q[l].id.u.ip.src_port);
594 ina.s_addr = htonl(q[l].id.u.ip.dst_ip);
596 inet_ntoa(ina), q[l].id.u.ip.dst_port);
597 printf("%4ju %8ju %2u %4u %3u\n",
598 (uintmax_t)q[l].tot_pkts, (uintmax_t)q[l].tot_bytes,
599 q[l].len, q[l].len_bytes, q[l].drops);
601 printf(" S %20ju F %20ju\n",
602 (uintmax_t)q[l].S, (uintmax_t)q[l].F);
607 show_flowset_parms(struct dn_ioc_flowset *fs, char *prefix)
611 char red[90]; /* Display RED parameters */
615 if (fs->flags_fs & DN_QSIZE_IS_BYTES) {
617 sprintf(qs, "%d KB", l / 1024);
619 sprintf(qs, "%d B", l);
621 sprintf(qs, "%3d sl.", l);
623 sprintf(plr, "plr %f", 1.0 * fs->plr / (double)(0x7fffffff));
626 if (fs->flags_fs & DN_IS_RED) /* RED parameters */
628 "\n\t %cRED w_q %f min_th %d max_th %d max_p %f",
629 (fs->flags_fs & DN_IS_GENTLE_RED) ? 'G' : ' ',
630 1.0 * fs->w_q / (double)(1 << SCALE_RED),
631 SCALE_VAL(fs->min_th),
632 SCALE_VAL(fs->max_th),
633 1.0 * fs->max_p / (double)(1 << SCALE_RED));
635 sprintf(red, "droptail");
637 printf("%s %s%s %d queues (%d buckets) %s\n",
638 prefix, qs, plr, fs->rq_elements, fs->rq_size, red);
642 getbw(const char *str, u_short *flags, int kb)
648 val = strtoul(str, &end, 0);
649 if (*end == 'k' || *end == 'K') {
652 } else if (*end == 'm' || *end == 'M') {
658 * Deal with bits or bytes or b(bits) or B(bytes). If there is no
659 * trailer assume bits.
661 if (strncasecmp(end, "bit", 3) == 0) {
663 } else if (strncasecmp(end, "byte", 4) == 0) {
665 } else if (*end == 'b') {
667 } else if (*end == 'B') {
672 * Return in bits if flags is NULL, else flag bits
673 * or bytes in flags and return the unconverted value.
675 if (inbytes && flags)
676 *flags |= DN_QSIZE_IS_BYTES;
677 else if (inbytes && flags == NULL)
686 int cmd = IP_FW_FLUSH;
688 cmd = IP_DUMMYNET_FLUSH;
693 printf("Are you sure? [yn] ");
696 c = toupper(getc(stdin));
697 while (c != '\n' && getc(stdin) != '\n')
699 return; /* and do not flush */
700 } while (c != 'Y' && c != 'N');
701 if (c == 'N') /* user said no */
704 if (do_set_x(cmd, NULL, 0) < 0 ) {
706 errx(EX_USAGE, "pipe/queue in use");
708 errx(EX_USAGE, "do_set_x(IP_FW_FLUSH) failed");
711 printf("Flushed all %s.\n", do_pipe ? "pipes" : "rules");
716 dummynet_main(int ac, char **av)
718 if (!strncmp(*av, "config", strlen(*av))) {
719 config_dummynet(ac, av);
720 } else if (!strncmp(*av, "flush", strlen(*av))) {
722 } else if (!strncmp(*av, "show", strlen(*av))) {
723 show_dummynet(ac, av);
725 errx(EX_USAGE, "bad ipfw pipe command `%s'", *av);