systat - Fix initial pftop state
[dragonfly.git] / usr.bin / systat / pftop.c
1 /*
2  * Copyright (c) 2013 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 #include <sys/param.h>
35 #include <sys/queue.h>
36 #include <sys/tree.h>
37 #include <sys/socket.h>
38 #include <sys/socketvar.h>
39 #include <sys/protosw.h>
40 #include <sys/sysctl.h>
41 #include <sys/endian.h>
42
43 #include <netinet/in.h>
44 #include <arpa/inet.h>
45 #include <net/route.h>
46 #include <net/if.h>
47 #include <net/pf/pfvar.h>
48 #include <netinet/in_systm.h>
49 #include <netinet/ip.h>
50 #ifdef INET6
51 #include <netinet/ip6.h>
52 #endif
53 #include <netinet/in_pcb.h>
54 #include <netinet/ip_icmp.h>
55 #include <netinet/icmp_var.h>
56 #include <netinet/ip_var.h>
57 #include <netinet/tcp.h>
58 #include <netinet/tcpip.h>
59 #include <netinet/tcp_seq.h>
60 #include <netinet/tcp_fsm.h>
61 #include <netinet/tcp_timer.h>
62 #include <netinet/tcp_var.h>
63 #include <netinet/tcp_debug.h>
64 #include <netinet/udp.h>
65 #include <netinet/udp_var.h>
66
67 #include <stdio.h>
68 #include <stdlib.h>
69 #include <string.h>
70 #include <unistd.h>
71 #include <fcntl.h>
72 #include <nlist.h>
73 #include <paths.h>
74 #include <err.h>
75 #include <errno.h>
76 #include <netdb.h>
77
78 #include "systat.h"
79 #include "extern.h"
80
81 struct mypfstate {
82         RB_ENTRY(mypfstate)     rb_node;
83         int                     seq;
84         double                  save_bw;
85         double                  best_bw;
86         struct pfsync_state     state;
87         struct pfsync_state     last_state;
88 };
89
90 double delta_time = 1.0;        /* for DELTARATE() initial state */
91 double highestbw;
92
93 static int
94 mypfstate_cmp(struct mypfstate *pf1, struct mypfstate *pf2)
95 {
96         struct pfsync_state_key *nk1, *nk2;
97         int r;
98
99         if (pf1->state.proto < pf2->state.proto)
100                 return(-1);
101         if (pf1->state.proto > pf2->state.proto)
102                 return(1);
103
104         if (pf1->state.direction == PF_OUT) {
105                 nk1 = &pf1->state.key[PF_SK_WIRE];
106         } else {
107                 nk1 = &pf1->state.key[PF_SK_STACK];
108         }
109         if (pf2->state.direction == PF_OUT) {
110                 nk2 = &pf2->state.key[PF_SK_WIRE];
111         } else {
112                 nk2 = &pf2->state.key[PF_SK_STACK];
113         }
114         if (pf1->state.proto == IPPROTO_TCP ||
115             pf1->state.proto == IPPROTO_UDP ||
116             pf1->state.proto == IPPROTO_ICMP ||
117             pf1->state.proto == IPPROTO_ICMPV6) {
118                 if (ntohs(nk1->port[0]) >= 1024 &&
119                     ntohs(nk2->port[0]) >= 1024) {
120                         if (ntohs(nk1->port[1]) < ntohs(nk2->port[1]))
121                                 return(-1);
122                         if (ntohs(nk1->port[1]) > ntohs(nk2->port[1]))
123                                 return(1);
124                 }
125                 if (ntohs(nk1->port[0]) < ntohs(nk2->port[0]))
126                         return(-1);
127                 if (ntohs(nk1->port[0]) > ntohs(nk2->port[0]))
128                         return(1);
129                 if (ntohs(nk1->port[1]) < ntohs(nk2->port[1]))
130                         return(-1);
131                 if (ntohs(nk1->port[1]) > ntohs(nk2->port[1]))
132                         return(1);
133         }
134
135         /*
136          * Sort IPV4 vs IPV6 addresses
137          */
138         if (pf1->state.af < pf2->state.af)
139                 return(-1);
140         if (pf1->state.af > pf2->state.af)
141                 return(1);
142
143         /*
144          * Local and foreign addresses
145          */
146         if (pf1->state.af == AF_INET) {
147                 if (ntohl(nk1->addr[0].v4.s_addr) <
148                     ntohl(nk2->addr[0].v4.s_addr))
149                         return(-1);
150                 if (ntohl(nk1->addr[0].v4.s_addr) >
151                     ntohl(nk2->addr[0].v4.s_addr))
152                         return(1);
153                 if (ntohl(nk1->addr[1].v4.s_addr) <
154                     ntohl(nk2->addr[1].v4.s_addr))
155                         return(-1);
156                 if (ntohl(nk1->addr[1].v4.s_addr) >
157                     ntohl(nk2->addr[1].v4.s_addr))
158                         return(1);
159         } else if (pf1->state.af == AF_INET6) {
160                 r = bcmp(&nk1->addr[0].v6,
161                          &nk2->addr[0].v6,
162                          sizeof(nk1->addr[0].v6));
163                 if (r)
164                         return(r);
165         } else {
166                 r = bcmp(&nk1->addr[0].v6,
167                          &nk2->addr[0].v6,
168                          sizeof(nk1->addr[0].v6));
169                 if (r)
170                         return(r);
171         }
172
173         /*
174          * Unique Identifier to prevent overloading which messes up
175          * the bandwidth calculations.
176          */
177         return (memcmp(pf1->state.id, pf2->state.id, sizeof(pf1->state.id)));
178 }
179
180 struct mypfstate_tree;
181 RB_HEAD(mypfstate_tree, mypfstate);
182 RB_PROTOTYPE(mypfstate_tree, mypfstate, rb_node, mypfstate_cmp);
183 RB_GENERATE(mypfstate_tree, mypfstate, rb_node, mypfstate_cmp);
184
185 static struct mypfstate_tree mypf_tree;
186 static struct timeval tv_curr;
187 static struct timeval tv_last;
188 static int tcp_pcb_seq;
189
190 static const char *numtok(double value, double template);
191 static const char *netaddrstr(sa_family_t af, struct pf_addr *addr,
192                         u_int16_t port);
193 static const char *statestr(int proto);
194 static void updatestate(struct pfsync_state *state);
195 static int statebwcmp(const void *data1, const void *data2);
196
197 #define GETBYTES64(field)       \
198         (be64toh(*(uint64_t *)elm->state.field))
199 #define DELTARATE(field)        \
200         ((double)(be64toh(*(uint64_t *)elm->state.field) - \
201                   be64toh(*(uint64_t *)elm->last_state.field)) / delta_time)
202
203 WINDOW *
204 openpftop(void)
205 {
206         RB_INIT(&mypf_tree);
207         return (subwin(stdscr, LINES-0-1, 0, 0, 0));
208 }
209
210 void
211 closepftop(WINDOW *w)
212 {
213         struct mypfstate *mypf;
214
215         while ((mypf = RB_ROOT(&mypf_tree)) != NULL) {
216                 RB_REMOVE(mypfstate_tree, &mypf_tree, mypf);
217                 free(mypf);
218         }
219
220         if (w != NULL) {
221                 wclear(w);
222                 wrefresh(w);
223                 delwin(w);
224         }
225 }
226
227 int
228 initpftop(void)
229 {
230         return(1);
231 }
232
233 void
234 fetchpftop(void)
235 {
236         struct pfioc_states ps;
237         struct pfsync_state *states;
238         size_t nstates;
239         size_t i;
240         int fd;
241
242         fd = open("/dev/pf", O_RDONLY);
243         if (fd < 0)
244                 return;
245
246         /*
247          * Extract PCB list
248          */
249         bzero(&ps, sizeof(ps));
250         if (ioctl(fd, DIOCGETSTATES, &ps) < 0) {
251                 close(fd);
252                 return;
253         }
254         ps.ps_len += 1024 * 1024;
255         ps.ps_buf = malloc(ps.ps_len);
256         if (ioctl(fd, DIOCGETSTATES, &ps) < 0) {
257                 free(ps.ps_buf);
258                 close(fd);
259                 return;
260         }
261
262         states = (void *)ps.ps_buf;
263         nstates = ps.ps_len / sizeof(*states);
264
265         ++tcp_pcb_seq;
266
267         highestbw = 0.0;
268         for (i = 0; i < nstates; ++i)
269                 updatestate(&states[i]);
270         free(ps.ps_buf);
271         close(fd);
272         states = NULL;
273         fd = -1;
274
275         tv_last = tv_curr;
276         gettimeofday(&tv_curr, NULL);
277 }
278
279 void
280 labelpftop(void)
281 {
282         wmove(wnd, 0, 0);
283         wclrtobot(wnd);
284 #if 0
285         mvwaddstr(wnd, 0, LADDR, "Local Address");
286         mvwaddstr(wnd, 0, FADDR, "Foreign Address");
287         mvwaddstr(wnd, 0, PROTO, "Proto");
288         mvwaddstr(wnd, 0, RCVCC, "Recv-Q");
289         mvwaddstr(wnd, 0, SNDCC, "Send-Q");
290         mvwaddstr(wnd, 0, STATE, "(state)");
291 #endif
292 }
293
294 void
295 showpftop(void)
296 {
297         struct mypfstate *elm;
298         struct mypfstate *delm;
299         struct mypfstate **array;
300         size_t i;
301         size_t n;
302         struct pfsync_state_key *nk;
303         int row;
304         int rxdir;
305         int txdir;
306
307         delta_time = (double)(tv_curr.tv_sec - tv_last.tv_sec) - 1.0 +
308                      (tv_curr.tv_usec + 1000000 - tv_last.tv_usec) / 1e6;
309         if (delta_time < 0.1) {
310                 delta_time = 0.1;       /* don't implode DELTARATE */
311                 return;
312         }
313
314         /*
315          * Delete and collect pass
316          */
317         delm = NULL;
318         i = 0;
319         n = 1024;
320         array = malloc(n * sizeof(*array));
321
322         RB_FOREACH(elm, mypfstate_tree, &mypf_tree) {
323                 if (delm) {
324                         RB_REMOVE(mypfstate_tree, &mypf_tree, delm);
325                         free(delm);
326                         delm = NULL;
327                 }
328
329                 if (elm->seq == tcp_pcb_seq && elm->save_bw > 0) {
330                         array[i++] = elm;
331                         if (i == n) {
332                                 n *= 2;
333                                 array = realloc(array, n * sizeof(*array));
334                         }
335                 } else if (elm->seq != tcp_pcb_seq) {
336                         delm = elm;
337                 }
338         }
339         if (delm) {
340                 RB_REMOVE(mypfstate_tree, &mypf_tree, delm);
341                 free(delm);
342                 delm = NULL;
343         }
344         qsort(array, i, sizeof(array[0]), statebwcmp);
345
346         row = 2;
347         n = i;
348         for (i = 0; i < n; ++i) {
349                 int64_t ttl;
350
351                 elm = array[i];
352                 if (elm->state.direction == PF_OUT) {
353                         nk = &elm->state.key[PF_SK_WIRE];
354                         rxdir = 0;
355                         txdir = 1;
356                 } else {
357                         nk = &elm->state.key[PF_SK_STACK];
358                         rxdir = 1;
359                         txdir = 0;
360                 }
361                 ttl = GETBYTES64(bytes[0]) + GETBYTES64(bytes[1]);
362                 mvwprintw(wnd, row, 0,
363                           "%s %s | %s "
364                           /*"rxb %s txb %s "*/
365                           "rcv %s snd %s ttl %s",
366                           statestr(elm->state.proto),
367                           netaddrstr(elm->state.af, &nk->addr[0], nk->port[0]),
368                           netaddrstr(elm->state.af, &nk->addr[1], nk->port[1]),
369                           numtok(DELTARATE(bytes[rxdir]), highestbw),
370                           numtok(DELTARATE(bytes[txdir]), highestbw),
371                           numtok(ttl, ttl)
372                 );
373 #if 0
374                 mvwprintw(wnd, row, 0,
375                           "%s %s %s "
376                           /*"rxb %s txb %s "*/
377                           "rcv %jd-%jd snd %jd-%jd ",
378                           statestr(elm->state.proto),
379                           netaddrstr(elm->state.af, &nk->addr[0], nk->port[0]),
380                           netaddrstr(elm->state.af, &nk->addr[1], nk->port[1]),
381                           be64toh(*(uint64_t *)elm->state.bytes[0]),
382                           be64toh(*(uint64_t *)elm->last_state.bytes[0]),
383                           be64toh(*(uint64_t *)elm->state.bytes[1]),
384                           be64toh(*(uint64_t *)elm->last_state.bytes[1])
385                 );
386 #endif
387                 wclrtoeol(wnd);
388                 if (++row >= LINES-3)
389                         break;
390         }
391         free(array);
392         wmove(wnd, row, 0);
393         wclrtobot(wnd);
394         mvwprintw(wnd, LINES-2, 0, "Rate bytes/sec, active pf states");
395 }
396
397 /*
398  * Sort by total bytes transfered, highest first
399  */
400 static
401 int
402 statebwcmp(const void *data1, const void *data2)
403 {
404         const struct mypfstate *elm1 = *__DECONST(struct mypfstate **, data1);
405         const struct mypfstate *elm2 = *__DECONST(struct mypfstate **, data2);
406         double dv;
407
408         dv = elm1->save_bw - elm2->save_bw;
409         if (dv < 0)
410                 return 1;
411         if (dv > 0)
412                 return -1;
413         return 0;
414 }
415
416 #if 0
417 int
418 cmdpftop(const char *cmd __unused, char *args __unused)
419 {
420         fetchpftop();
421         showpftop();
422         refresh();
423
424         return (0);
425 }
426 #endif
427
428 #define MAXINDEXES 8
429
430 static
431 const char *
432 numtok(double value, double template)
433 {
434         static char buf[MAXINDEXES][32];
435         static int nexti;
436         static const char *suffixes[] = { " ", "K", "M", "G", "T", NULL };
437         int suffix = 0;
438         const char *fmt;
439
440         while (template >= 1000.0 && suffixes[suffix+1]) {
441                 value /= 1000.0;
442                 template /= 1000.0;
443                 ++suffix;
444         }
445         nexti = (nexti + 1) % MAXINDEXES;
446         if (value < 0.001) {
447                 fmt = "      ";
448         } else if (template < 1.0) {
449                 fmt = "%5.3f%s";
450         } else if (template < 10.0) {
451                 fmt = "%5.3f%s";
452         } else if (template < 100.0) {
453                 fmt = "%5.2f%s";
454         } else if (template < 1000.0) {
455                 fmt = "%5.1f%s";
456         } else {
457                 fmt = "<huge>";
458         }
459         snprintf(buf[nexti], sizeof(buf[nexti]),
460                  fmt, value, suffixes[suffix]);
461         return (buf[nexti]);
462 }
463
464 static const char *
465 netaddrstr(sa_family_t af, struct pf_addr *addr, u_int16_t port)
466 {
467         static char buf[MAXINDEXES][64];
468         static int nexta;
469         char bufip[64];
470
471         nexta = (nexta + 1) % MAXINDEXES;
472
473         port = ntohs(port);
474
475         if (af == AF_INET) {
476                 snprintf(bufip, sizeof(bufip),
477                          "%d.%d.%d.%d",
478                          (ntohl(addr->v4.s_addr) >> 24) & 255,
479                          (ntohl(addr->v4.s_addr) >> 16) & 255,
480                          (ntohl(addr->v4.s_addr) >> 8) & 255,
481                          (ntohl(addr->v4.s_addr) >> 0) & 255);
482                 snprintf(buf[nexta], sizeof(buf[nexta]),
483                          "%-20s %-5d", bufip, port);
484         } else if (af == AF_INET6) {
485 #if defined(PFTOP_WIDE)
486                 snprintf(bufip, sizeof(bufip),
487                          "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x",
488                          ntohs(addr->v6.s6_addr16[0]),
489                          ntohs(addr->v6.s6_addr16[1]),
490                          ntohs(addr->v6.s6_addr16[2]),
491                          ntohs(addr->v6.s6_addr16[3]),
492                          ntohs(addr->v6.s6_addr16[4]),
493                          ntohs(addr->v6.s6_addr16[5]),
494                          ntohs(addr->v6.s6_addr16[6]),
495                          ntohs(addr->v6.s6_addr16[7]));
496                 snprintf(buf[nexta], sizeof(buf[nexta]),
497                          "%39s %-5d", bufip, port);
498 #else
499                 snprintf(bufip, sizeof(bufip),
500                          "%04x:%04x--%04x:%04x",
501                          ntohs(addr->v6.s6_addr16[0]),
502                          ntohs(addr->v6.s6_addr16[1]),
503                          ntohs(addr->v6.s6_addr16[6]),
504                          ntohs(addr->v6.s6_addr16[7]));
505                 snprintf(buf[nexta], sizeof(buf[nexta]),
506                          "%20s %-5d", bufip, port);
507 #endif
508         } else {
509                 snprintf(bufip, sizeof(bufip), "<unknown>:%-5d", port);
510                 snprintf(buf[nexta], sizeof(buf[nexta]),
511                          "%15s:%-5d", bufip, port);
512         }
513         return (buf[nexta]);
514 }
515
516 static
517 void
518 updatestate(struct pfsync_state *state)
519 {
520         struct mypfstate dummy;
521         struct mypfstate *elm;
522
523         dummy.state = *state;
524         if ((elm = RB_FIND(mypfstate_tree, &mypf_tree, &dummy)) == NULL) {
525                 elm = malloc(sizeof(*elm));
526                 bzero(elm, sizeof(*elm));
527                 elm->state = *state;
528                 elm->last_state = *state;
529                 elm->best_bw = DELTARATE(bytes[0]) + DELTARATE(bytes[1]);
530                 elm->save_bw = elm->best_bw;
531                 bzero(elm->last_state.bytes,
532                         sizeof(elm->last_state.bytes));
533                 bzero(elm->last_state.packets,
534                         sizeof(elm->last_state.packets));
535                 RB_INSERT(mypfstate_tree, &mypf_tree, elm);
536                 if (highestbw < elm->save_bw)
537                         highestbw = elm->save_bw;
538         } else {
539                 elm->last_state = elm->state;
540                 elm->state = *state;
541                 elm->best_bw = DELTARATE(bytes[0]) + DELTARATE(bytes[1]);
542                 if (elm->save_bw < elm->best_bw)
543                         elm->save_bw = elm->best_bw;
544                 else
545                         elm->save_bw = (elm->save_bw * 7 + elm->best_bw) / 8;
546                 if (highestbw < elm->save_bw)
547                         highestbw = elm->save_bw;
548         }
549         elm->seq = tcp_pcb_seq;
550 }
551
552 const char *
553 statestr(int proto)
554 {
555         static char buf[32];
556
557         switch(proto) {
558         case IPPROTO_TCP:
559                 return ("tcp  ");
560         case IPPROTO_UDP:
561                 return ("udp  ");
562         case IPPROTO_ICMP:
563                 return ("icmp ");
564         case IPPROTO_ICMPV6:
565                 return ("icmp6");
566         default:
567                 snprintf(buf, sizeof(buf), "%-5d", proto);
568                 return buf;
569         }
570 }