| Commit | Line | Data |
|---|---|---|
| b06ebda0 MD |
1 | /*- |
| 2 | * Copyright (c) 2004-2005 Gleb Smirnoff <glebius@FreeBSD.org> | |
| 3 | * Copyright (c) 2001-2003 Roman V. Palagin <romanp@unshadow.net> | |
| 4 | * All rights reserved. | |
| 5 | * | |
| 6 | * Redistribution and use in source and binary forms, with or without | |
| 7 | * modification, are permitted provided that the following conditions | |
| 8 | * are met: | |
| 9 | * 1. Redistributions of source code must retain the above copyright | |
| 10 | * notice, this list of conditions and the following disclaimer. | |
| 11 | * 2. Redistributions in binary form must reproduce the above copyright | |
| 12 | * notice, this list of conditions and the following disclaimer in the | |
| 13 | * documentation and/or other materials provided with the distribution. | |
| 14 | * | |
| 15 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | |
| 16 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | |
| 19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
| 21 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 22 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
| 23 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
| 24 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 25 | * SUCH DAMAGE. | |
| 26 | * | |
| 27 | * $SourceForge: netflow.c,v 1.41 2004/09/05 11:41:10 glebius Exp $ | |
| 5a975a3d MD |
28 | * $FreeBSD: src/sys/netgraph/netflow/netflow.c,v 1.29 2008/05/09 23:02:57 julian Exp $ |
| 29 | * $DragonFly: src/sys/netgraph7/netflow/netflow.c,v 1.2 2008/06/26 23:05:40 dillon Exp $ | |
| b06ebda0 MD |
30 | */ |
| 31 | ||
| b06ebda0 MD |
32 | #include <sys/param.h> |
| 33 | #include <sys/kernel.h> | |
| 34 | #include <sys/limits.h> | |
| 35 | #include <sys/mbuf.h> | |
| 36 | #include <sys/syslog.h> | |
| 37 | #include <sys/systm.h> | |
| 38 | #include <sys/socket.h> | |
| 39 | ||
| 40 | #include <machine/atomic.h> | |
| 41 | ||
| 42 | #include <net/if.h> | |
| 43 | #include <net/route.h> | |
| 44 | #include <netinet/in.h> | |
| 45 | #include <netinet/in_systm.h> | |
| 46 | #include <netinet/ip.h> | |
| 47 | #include <netinet/tcp.h> | |
| 48 | #include <netinet/udp.h> | |
| 49 | ||
| 5a975a3d MD |
50 | #include "ng_message.h" |
| 51 | #include "netgraph.h" | |
| b06ebda0 | 52 | |
| 5a975a3d MD |
53 | #include "netflow/netflow.h" |
| 54 | #include "netflow/ng_netflow.h" | |
| b06ebda0 MD |
55 | |
| 56 | #define NBUCKETS (65536) /* must be power of 2 */ | |
| 57 | ||
| 58 | /* This hash is for TCP or UDP packets. */ | |
| 59 | #define FULL_HASH(addr1, addr2, port1, port2) \ | |
| 60 | (((addr1 ^ (addr1 >> 16) ^ \ | |
| 61 | htons(addr2 ^ (addr2 >> 16))) ^ \ | |
| 62 | port1 ^ htons(port2)) & \ | |
| 63 | (NBUCKETS - 1)) | |
| 64 | ||
| 65 | /* This hash is for all other IP packets. */ | |
| 66 | #define ADDR_HASH(addr1, addr2) \ | |
| 67 | ((addr1 ^ (addr1 >> 16) ^ \ | |
| 68 | htons(addr2 ^ (addr2 >> 16))) & \ | |
| 69 | (NBUCKETS - 1)) | |
| 70 | ||
| 71 | /* Macros to shorten logical constructions */ | |
| 72 | /* XXX: priv must exist in namespace */ | |
| 73 | #define INACTIVE(fle) (time_uptime - fle->f.last > priv->info.nfinfo_inact_t) | |
| 74 | #define AGED(fle) (time_uptime - fle->f.first > priv->info.nfinfo_act_t) | |
| 75 | #define ISFREE(fle) (fle->f.packets == 0) | |
| 76 | ||
| 77 | /* | |
| 78 | * 4 is a magical number: statistically number of 4-packet flows is | |
| 79 | * bigger than 5,6,7...-packet flows by an order of magnitude. Most UDP/ICMP | |
| 80 | * scans are 1 packet (~ 90% of flow cache). TCP scans are 2-packet in case | |
| 81 | * of reachable host and 4-packet otherwise. | |
| 82 | */ | |
| 83 | #define SMALL(fle) (fle->f.packets <= 4) | |
| 84 | ||
| 85 | /* | |
| 86 | * Cisco uses milliseconds for uptime. Bad idea, since it overflows | |
| 87 | * every 48+ days. But we will do same to keep compatibility. This macro | |
| 88 | * does overflowable multiplication to 1000. | |
| 89 | */ | |
| 90 | #define MILLIUPTIME(t) (((t) << 9) + /* 512 */ \ | |
| 91 | ((t) << 8) + /* 256 */ \ | |
| 92 | ((t) << 7) + /* 128 */ \ | |
| 93 | ((t) << 6) + /* 64 */ \ | |
| 94 | ((t) << 5) + /* 32 */ \ | |
| 95 | ((t) << 3)) /* 8 */ | |
| 96 | ||
| 97 | MALLOC_DECLARE(M_NETFLOW_HASH); | |
| 98 | MALLOC_DEFINE(M_NETFLOW_HASH, "netflow_hash", "NetFlow hash"); | |
| 99 | ||
| 100 | static int export_add(item_p, struct flow_entry *); | |
| 101 | static int export_send(priv_p, item_p, int flags); | |
| 102 | ||
| 103 | /* Generate hash for a given flow record. */ | |
| 104 | static __inline uint32_t | |
| 105 | ip_hash(struct flow_rec *r) | |
| 106 | { | |
| 107 | switch (r->r_ip_p) { | |
| 108 | case IPPROTO_TCP: | |
| 109 | case IPPROTO_UDP: | |
| 110 | return FULL_HASH(r->r_src.s_addr, r->r_dst.s_addr, | |
| 111 | r->r_sport, r->r_dport); | |
| 112 | default: | |
| 113 | return ADDR_HASH(r->r_src.s_addr, r->r_dst.s_addr); | |
| 114 | } | |
| 115 | } | |
| 116 | ||
| 117 | /* This is callback from uma(9), called on alloc. */ | |
| 118 | static int | |
| 119 | uma_ctor_flow(void *mem, int size, void *arg, int how) | |
| 120 | { | |
| 121 | priv_p priv = (priv_p )arg; | |
| 122 | ||
| 123 | if (atomic_load_acq_32(&priv->info.nfinfo_used) >= CACHESIZE) | |
| 124 | return (ENOMEM); | |
| 125 | ||
| 126 | atomic_add_32(&priv->info.nfinfo_used, 1); | |
| 127 | ||
| 128 | return (0); | |
| 129 | } | |
| 130 | ||
| 131 | /* This is callback from uma(9), called on free. */ | |
| 132 | static void | |
| 133 | uma_dtor_flow(void *mem, int size, void *arg) | |
| 134 | { | |
| 135 | priv_p priv = (priv_p )arg; | |
| 136 | ||
| 137 | atomic_subtract_32(&priv->info.nfinfo_used, 1); | |
| 138 | } | |
| 139 | ||
| 140 | /* | |
| 141 | * Detach export datagram from priv, if there is any. | |
| 142 | * If there is no, allocate a new one. | |
| 143 | */ | |
| 144 | static item_p | |
| 145 | get_export_dgram(priv_p priv) | |
| 146 | { | |
| 147 | item_p item = NULL; | |
| 148 | ||
| 149 | mtx_lock(&priv->export_mtx); | |
| 150 | if (priv->export_item != NULL) { | |
| 151 | item = priv->export_item; | |
| 152 | priv->export_item = NULL; | |
| 153 | } | |
| 154 | mtx_unlock(&priv->export_mtx); | |
| 155 | ||
| 156 | if (item == NULL) { | |
| 157 | struct netflow_v5_export_dgram *dgram; | |
| 158 | struct mbuf *m; | |
| 159 | ||
| 5a975a3d | 160 | m = m_getcl(MB_DONTWAIT, MT_DATA, M_PKTHDR); |
| b06ebda0 MD |
161 | if (m == NULL) |
| 162 | return (NULL); | |
| 163 | item = ng_package_data(m, NG_NOFLAGS); | |
| 164 | if (item == NULL) | |
| 165 | return (NULL); | |
| 166 | dgram = mtod(m, struct netflow_v5_export_dgram *); | |
| 167 | dgram->header.count = 0; | |
| 168 | dgram->header.version = htons(NETFLOW_V5); | |
| 169 | ||
| 170 | } | |
| 171 | ||
| 172 | return (item); | |
| 173 | } | |
| 174 | ||
| 175 | /* | |
| 176 | * Re-attach incomplete datagram back to priv. | |
| 177 | * If there is already another one, then send incomplete. */ | |
| 178 | static void | |
| 179 | return_export_dgram(priv_p priv, item_p item, int flags) | |
| 180 | { | |
| 181 | /* | |
| 182 | * It may happen on SMP, that some thread has already | |
| 183 | * put its item there, in this case we bail out and | |
| 184 | * send what we have to collector. | |
| 185 | */ | |
| 186 | mtx_lock(&priv->export_mtx); | |
| 187 | if (priv->export_item == NULL) { | |
| 188 | priv->export_item = item; | |
| 189 | mtx_unlock(&priv->export_mtx); | |
| 190 | } else { | |
| 191 | mtx_unlock(&priv->export_mtx); | |
| 192 | export_send(priv, item, flags); | |
| 193 | } | |
| 194 | } | |
| 195 | ||
| 196 | /* | |
| 197 | * The flow is over. Call export_add() and free it. If datagram is | |
| 198 | * full, then call export_send(). | |
| 199 | */ | |
| 200 | static __inline void | |
| 201 | expire_flow(priv_p priv, item_p *item, struct flow_entry *fle, int flags) | |
| 202 | { | |
| 203 | if (*item == NULL) | |
| 204 | *item = get_export_dgram(priv); | |
| 205 | if (*item == NULL) { | |
| 206 | atomic_add_32(&priv->info.nfinfo_export_failed, 1); | |
| 207 | uma_zfree_arg(priv->zone, fle, priv); | |
| 208 | return; | |
| 209 | } | |
| 210 | if (export_add(*item, fle) > 0) { | |
| 211 | export_send(priv, *item, flags); | |
| 212 | *item = NULL; | |
| 213 | } | |
| 214 | uma_zfree_arg(priv->zone, fle, priv); | |
| 215 | } | |
| 216 | ||
| 217 | /* Get a snapshot of node statistics */ | |
| 218 | void | |
| 219 | ng_netflow_copyinfo(priv_p priv, struct ng_netflow_info *i) | |
| 220 | { | |
| 221 | /* XXX: atomic */ | |
| 222 | memcpy((void *)i, (void *)&priv->info, sizeof(priv->info)); | |
| 223 | } | |
| 224 | ||
| 225 | /* | |
| 226 | * Insert a record into defined slot. | |
| 227 | * | |
| 228 | * First we get for us a free flow entry, then fill in all | |
| 229 | * possible fields in it. | |
| 230 | * | |
| 231 | * TODO: consider dropping hash mutex while filling in datagram, | |
| 232 | * as this was done in previous version. Need to test & profile | |
| 233 | * to be sure. | |
| 234 | */ | |
| 235 | static __inline int | |
| 236 | hash_insert(priv_p priv, struct flow_hash_entry *hsh, struct flow_rec *r, | |
| 237 | int plen, uint8_t tcp_flags) | |
| 238 | { | |
| 239 | struct flow_entry *fle; | |
| 240 | struct sockaddr_in sin; | |
| 241 | struct rtentry *rt; | |
| 242 | ||
| 243 | mtx_assert(&hsh->mtx, MA_OWNED); | |
| 244 | ||
| 5a975a3d | 245 | fle = uma_zalloc_arg(priv->zone, priv, M_WAITOK | M_NULLOK); |
| b06ebda0 MD |
246 | if (fle == NULL) { |
| 247 | atomic_add_32(&priv->info.nfinfo_alloc_failed, 1); | |
| 248 | return (ENOMEM); | |
| 249 | } | |
| 250 | ||
| 251 | /* | |
| 252 | * Now fle is totally ours. It is detached from all lists, | |
| 253 | * we can safely edit it. | |
| 254 | */ | |
| 255 | ||
| 256 | bcopy(r, &fle->f.r, sizeof(struct flow_rec)); | |
| 257 | fle->f.bytes = plen; | |
| 258 | fle->f.packets = 1; | |
| 259 | fle->f.tcp_flags = tcp_flags; | |
| 260 | ||
| 261 | fle->f.first = fle->f.last = time_uptime; | |
| 262 | ||
| 263 | /* | |
| 264 | * First we do route table lookup on destination address. So we can | |
| 265 | * fill in out_ifx, dst_mask, nexthop, and dst_as in future releases. | |
| 266 | */ | |
| 267 | bzero(&sin, sizeof(sin)); | |
| 268 | sin.sin_len = sizeof(struct sockaddr_in); | |
| 269 | sin.sin_family = AF_INET; | |
| 270 | sin.sin_addr = fle->f.r.r_dst; | |
| 271 | /* XXX MRT 0 as a default.. need the m here to get fib */ | |
| 272 | rt = rtalloc1_fib((struct sockaddr *)&sin, 0, RTF_CLONING, 0); | |
| 273 | if (rt != NULL) { | |
| 274 | fle->f.fle_o_ifx = rt->rt_ifp->if_index; | |
| 275 | ||
| 276 | if (rt->rt_flags & RTF_GATEWAY && | |
| 277 | rt->rt_gateway->sa_family == AF_INET) | |
| 278 | fle->f.next_hop = | |
| 279 | ((struct sockaddr_in *)(rt->rt_gateway))->sin_addr; | |
| 280 | ||
| 281 | if (rt_mask(rt)) | |
| 282 | fle->f.dst_mask = bitcount32(((struct sockaddr_in *) | |
| 283 | rt_mask(rt))->sin_addr.s_addr); | |
| 284 | else if (rt->rt_flags & RTF_HOST) | |
| 285 | /* Give up. We can't determine mask :( */ | |
| 286 | fle->f.dst_mask = 32; | |
| 287 | ||
| 288 | RTFREE_LOCKED(rt); | |
| 289 | } | |
| 290 | ||
| 291 | /* Do route lookup on source address, to fill in src_mask. */ | |
| 292 | bzero(&sin, sizeof(sin)); | |
| 293 | sin.sin_len = sizeof(struct sockaddr_in); | |
| 294 | sin.sin_family = AF_INET; | |
| 295 | sin.sin_addr = fle->f.r.r_src; | |
| 296 | /* XXX MRT 0 as a default revisit. need the mbuf for fib*/ | |
| 297 | rt = rtalloc1_fib((struct sockaddr *)&sin, 0, RTF_CLONING, 0); | |
| 298 | if (rt != NULL) { | |
| 299 | if (rt_mask(rt)) | |
| 300 | fle->f.src_mask = bitcount32(((struct sockaddr_in *) | |
| 301 | rt_mask(rt))->sin_addr.s_addr); | |
| 302 | else if (rt->rt_flags & RTF_HOST) | |
| 303 | /* Give up. We can't determine mask :( */ | |
| 304 | fle->f.src_mask = 32; | |
| 305 | ||
| 306 | RTFREE_LOCKED(rt); | |
| 307 | } | |
| 308 | ||
| 309 | /* Push new flow at the and of hash. */ | |
| 310 | TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash); | |
| 311 | ||
| 312 | return (0); | |
| 313 | } | |
| 314 | ||
| 315 | ||
| 316 | /* | |
| 317 | * Non-static functions called from ng_netflow.c | |
| 318 | */ | |
| 319 | ||
| 320 | /* Allocate memory and set up flow cache */ | |
| 321 | int | |
| 322 | ng_netflow_cache_init(priv_p priv) | |
| 323 | { | |
| 324 | struct flow_hash_entry *hsh; | |
| 325 | int i; | |
| 326 | ||
| 327 | /* Initialize cache UMA zone. */ | |
| 328 | priv->zone = uma_zcreate("NetFlow cache", sizeof(struct flow_entry), | |
| 329 | uma_ctor_flow, uma_dtor_flow, NULL, NULL, UMA_ALIGN_CACHE, 0); | |
| 330 | uma_zone_set_max(priv->zone, CACHESIZE); | |
| 331 | ||
| 332 | /* Allocate hash. */ | |
| fc025606 SW |
333 | priv->hash = kmalloc(NBUCKETS * sizeof(struct flow_hash_entry), |
| 334 | M_NETFLOW_HASH, M_WAITOK | M_ZERO); | |
| b06ebda0 MD |
335 | |
| 336 | if (priv->hash == NULL) { | |
| 337 | uma_zdestroy(priv->zone); | |
| 338 | return (ENOMEM); | |
| 339 | } | |
| 340 | ||
| 341 | /* Initialize hash. */ | |
| 342 | for (i = 0, hsh = priv->hash; i < NBUCKETS; i++, hsh++) { | |
| 343 | mtx_init(&hsh->mtx, "hash mutex", NULL, MTX_DEF); | |
| 344 | TAILQ_INIT(&hsh->head); | |
| 345 | } | |
| 346 | ||
| 347 | mtx_init(&priv->export_mtx, "export dgram lock", NULL, MTX_DEF); | |
| 348 | ||
| 349 | return (0); | |
| 350 | } | |
| 351 | ||
| 352 | /* Free all flow cache memory. Called from node close method. */ | |
| 353 | void | |
| 354 | ng_netflow_cache_flush(priv_p priv) | |
| 355 | { | |
| 356 | struct flow_entry *fle, *fle1; | |
| 357 | struct flow_hash_entry *hsh; | |
| 358 | item_p item = NULL; | |
| 359 | int i; | |
| 360 | ||
| 361 | /* | |
| 362 | * We are going to free probably billable data. | |
| 363 | * Expire everything before freeing it. | |
| 364 | * No locking is required since callout is already drained. | |
| 365 | */ | |
| 366 | for (hsh = priv->hash, i = 0; i < NBUCKETS; hsh++, i++) | |
| 367 | TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) { | |
| 368 | TAILQ_REMOVE(&hsh->head, fle, fle_hash); | |
| 369 | expire_flow(priv, &item, fle, NG_QUEUE); | |
| 370 | } | |
| 371 | ||
| 372 | if (item != NULL) | |
| 373 | export_send(priv, item, NG_QUEUE); | |
| 374 | ||
| 375 | uma_zdestroy(priv->zone); | |
| 376 | ||
| 377 | /* Destroy hash mutexes. */ | |
| 378 | for (i = 0, hsh = priv->hash; i < NBUCKETS; i++, hsh++) | |
| 379 | mtx_destroy(&hsh->mtx); | |
| 380 | ||
| 381 | /* Free hash memory. */ | |
| 382 | if (priv->hash) | |
| fc025606 | 383 | kfree(priv->hash, M_NETFLOW_HASH); |
| b06ebda0 MD |
384 | |
| 385 | mtx_destroy(&priv->export_mtx); | |
| 386 | } | |
| 387 | ||
| 388 | /* Insert packet from into flow cache. */ | |
| 389 | int | |
| 390 | ng_netflow_flow_add(priv_p priv, struct ip *ip, iface_p iface, | |
| 391 | struct ifnet *ifp) | |
| 392 | { | |
| 393 | register struct flow_entry *fle, *fle1; | |
| 394 | struct flow_hash_entry *hsh; | |
| 395 | struct flow_rec r; | |
| 396 | item_p item = NULL; | |
| 397 | int hlen, plen; | |
| 398 | int error = 0; | |
| 399 | uint8_t tcp_flags = 0; | |
| 400 | ||
| 401 | /* Try to fill flow_rec r */ | |
| 402 | bzero(&r, sizeof(r)); | |
| 403 | /* check version */ | |
| 404 | if (ip->ip_v != IPVERSION) | |
| 405 | return (EINVAL); | |
| 406 | ||
| 407 | /* verify min header length */ | |
| 408 | hlen = ip->ip_hl << 2; | |
| 409 | ||
| 410 | if (hlen < sizeof(struct ip)) | |
| 411 | return (EINVAL); | |
| 412 | ||
| 413 | r.r_src = ip->ip_src; | |
| 414 | r.r_dst = ip->ip_dst; | |
| 415 | ||
| 416 | /* save packet length */ | |
| 417 | plen = ntohs(ip->ip_len); | |
| 418 | ||
| 419 | r.r_ip_p = ip->ip_p; | |
| 420 | r.r_tos = ip->ip_tos; | |
| 421 | ||
| 422 | /* Configured in_ifx overrides mbuf's */ | |
| 423 | if (iface->info.ifinfo_index == 0) { | |
| 424 | if (ifp != NULL) | |
| 425 | r.r_i_ifx = ifp->if_index; | |
| 426 | } else | |
| 427 | r.r_i_ifx = iface->info.ifinfo_index; | |
| 428 | ||
| 429 | /* | |
| 430 | * XXX NOTE: only first fragment of fragmented TCP, UDP and | |
| 431 | * ICMP packet will be recorded with proper s_port and d_port. | |
| 432 | * Following fragments will be recorded simply as IP packet with | |
| 433 | * ip_proto = ip->ip_p and s_port, d_port set to zero. | |
| 434 | * I know, it looks like bug. But I don't want to re-implement | |
| 435 | * ip packet assebmling here. Anyway, (in)famous trafd works this way - | |
| 436 | * and nobody complains yet :) | |
| 437 | */ | |
| 438 | if ((ip->ip_off & htons(IP_OFFMASK)) == 0) | |
| 439 | switch(r.r_ip_p) { | |
| 440 | case IPPROTO_TCP: | |
| 441 | { | |
| 442 | register struct tcphdr *tcp; | |
| 443 | ||
| 444 | tcp = (struct tcphdr *)((caddr_t )ip + hlen); | |
| 445 | r.r_sport = tcp->th_sport; | |
| 446 | r.r_dport = tcp->th_dport; | |
| 447 | tcp_flags = tcp->th_flags; | |
| 448 | break; | |
| 449 | } | |
| 450 | case IPPROTO_UDP: | |
| 451 | r.r_ports = *(uint32_t *)((caddr_t )ip + hlen); | |
| 452 | break; | |
| 453 | } | |
| 454 | ||
| 455 | /* Update node statistics. XXX: race... */ | |
| 456 | priv->info.nfinfo_packets ++; | |
| 457 | priv->info.nfinfo_bytes += plen; | |
| 458 | ||
| 459 | /* Find hash slot. */ | |
| 460 | hsh = &priv->hash[ip_hash(&r)]; | |
| 461 | ||
| 462 | mtx_lock(&hsh->mtx); | |
| 463 | ||
| 464 | /* | |
| 465 | * Go through hash and find our entry. If we encounter an | |
| 466 | * entry, that should be expired, purge it. We do a reverse | |
| 467 | * search since most active entries are first, and most | |
| 468 | * searches are done on most active entries. | |
| 469 | */ | |
| 470 | TAILQ_FOREACH_REVERSE_SAFE(fle, &hsh->head, fhead, fle_hash, fle1) { | |
| 471 | if (bcmp(&r, &fle->f.r, sizeof(struct flow_rec)) == 0) | |
| 472 | break; | |
| 473 | if ((INACTIVE(fle) && SMALL(fle)) || AGED(fle)) { | |
| 474 | TAILQ_REMOVE(&hsh->head, fle, fle_hash); | |
| 475 | expire_flow(priv, &item, fle, NG_QUEUE); | |
| 476 | atomic_add_32(&priv->info.nfinfo_act_exp, 1); | |
| 477 | } | |
| 478 | } | |
| 479 | ||
| 480 | if (fle) { /* An existent entry. */ | |
| 481 | ||
| 482 | fle->f.bytes += plen; | |
| 483 | fle->f.packets ++; | |
| 484 | fle->f.tcp_flags |= tcp_flags; | |
| 485 | fle->f.last = time_uptime; | |
| 486 | ||
| 487 | /* | |
| 488 | * We have the following reasons to expire flow in active way: | |
| 489 | * - it hit active timeout | |
| 490 | * - a TCP connection closed | |
| 491 | * - it is going to overflow counter | |
| 492 | */ | |
| 493 | if (tcp_flags & TH_FIN || tcp_flags & TH_RST || AGED(fle) || | |
| 494 | (fle->f.bytes >= (UINT_MAX - IF_MAXMTU)) ) { | |
| 495 | TAILQ_REMOVE(&hsh->head, fle, fle_hash); | |
| 496 | expire_flow(priv, &item, fle, NG_QUEUE); | |
| 497 | atomic_add_32(&priv->info.nfinfo_act_exp, 1); | |
| 498 | } else { | |
| 499 | /* | |
| 500 | * It is the newest, move it to the tail, | |
| 501 | * if it isn't there already. Next search will | |
| 502 | * locate it quicker. | |
| 503 | */ | |
| 504 | if (fle != TAILQ_LAST(&hsh->head, fhead)) { | |
| 505 | TAILQ_REMOVE(&hsh->head, fle, fle_hash); | |
| 506 | TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash); | |
| 507 | } | |
| 508 | } | |
| 509 | } else /* A new flow entry. */ | |
| 510 | error = hash_insert(priv, hsh, &r, plen, tcp_flags); | |
| 511 | ||
| 512 | mtx_unlock(&hsh->mtx); | |
| 513 | ||
| 514 | if (item != NULL) | |
| 515 | return_export_dgram(priv, item, NG_QUEUE); | |
| 516 | ||
| 517 | return (error); | |
| 518 | } | |
| 519 | ||
| 520 | /* | |
| 521 | * Return records from cache to userland. | |
| 522 | * | |
| 523 | * TODO: matching particular IP should be done in kernel, here. | |
| 524 | */ | |
| 525 | int | |
| 526 | ng_netflow_flow_show(priv_p priv, uint32_t last, struct ng_mesg *resp) | |
| 527 | { | |
| 528 | struct flow_hash_entry *hsh; | |
| 529 | struct flow_entry *fle; | |
| 530 | struct ngnf_flows *data; | |
| 531 | int i; | |
| 532 | ||
| 533 | data = (struct ngnf_flows *)resp->data; | |
| 534 | data->last = 0; | |
| 535 | data->nentries = 0; | |
| 536 | ||
| 537 | /* Check if this is a first run */ | |
| 538 | if (last == 0) { | |
| 539 | hsh = priv->hash; | |
| 540 | i = 0; | |
| 541 | } else { | |
| 542 | if (last > NBUCKETS-1) | |
| 543 | return (EINVAL); | |
| 544 | hsh = priv->hash + last; | |
| 545 | i = last; | |
| 546 | } | |
| 547 | ||
| 548 | /* | |
| 549 | * We will transfer not more than NREC_AT_ONCE. More data | |
| 550 | * will come in next message. | |
| 551 | * We send current hash index to userland, and userland should | |
| 552 | * return it back to us. Then, we will restart with new entry. | |
| 553 | * | |
| 554 | * The resulting cache snapshot is inaccurate for the | |
| 555 | * following reasons: | |
| 556 | * - we skip locked hash entries | |
| 557 | * - we bail out, if someone wants our entry | |
| 558 | * - we skip rest of entry, when hit NREC_AT_ONCE | |
| 559 | */ | |
| 560 | for (; i < NBUCKETS; hsh++, i++) { | |
| 561 | if (mtx_trylock(&hsh->mtx) == 0) | |
| 562 | continue; | |
| 563 | ||
| 564 | TAILQ_FOREACH(fle, &hsh->head, fle_hash) { | |
| 5a975a3d | 565 | if (mtx_contested(&hsh->mtx) |
| b06ebda0 MD |
566 | break; |
| 567 | ||
| 568 | bcopy(&fle->f, &(data->entries[data->nentries]), | |
| 569 | sizeof(fle->f)); | |
| 570 | data->nentries++; | |
| 571 | if (data->nentries == NREC_AT_ONCE) { | |
| 572 | mtx_unlock(&hsh->mtx); | |
| 573 | if (++i < NBUCKETS) | |
| 574 | data->last = i; | |
| 575 | return (0); | |
| 576 | } | |
| 577 | } | |
| 578 | mtx_unlock(&hsh->mtx); | |
| 579 | } | |
| 580 | ||
| 581 | return (0); | |
| 582 | } | |
| 583 | ||
| 584 | /* We have full datagram in privdata. Send it to export hook. */ | |
| 585 | static int | |
| 586 | export_send(priv_p priv, item_p item, int flags) | |
| 587 | { | |
| 588 | struct mbuf *m = NGI_M(item); | |
| 589 | struct netflow_v5_export_dgram *dgram = mtod(m, | |
| 590 | struct netflow_v5_export_dgram *); | |
| 591 | struct netflow_v5_header *header = &dgram->header; | |
| 592 | struct timespec ts; | |
| 593 | int error = 0; | |
| 594 | ||
| 595 | /* Fill mbuf header. */ | |
| 596 | m->m_len = m->m_pkthdr.len = sizeof(struct netflow_v5_record) * | |
| 597 | header->count + sizeof(struct netflow_v5_header); | |
| 598 | ||
| 599 | /* Fill export header. */ | |
| 600 | header->sys_uptime = htonl(MILLIUPTIME(time_uptime)); | |
| 601 | getnanotime(&ts); | |
| 602 | header->unix_secs = htonl(ts.tv_sec); | |
| 603 | header->unix_nsecs = htonl(ts.tv_nsec); | |
| 604 | header->engine_type = 0; | |
| 605 | header->engine_id = 0; | |
| 606 | header->pad = 0; | |
| 607 | header->flow_seq = htonl(atomic_fetchadd_32(&priv->flow_seq, | |
| 608 | header->count)); | |
| 609 | header->count = htons(header->count); | |
| 610 | ||
| 611 | if (priv->export != NULL) | |
| 612 | NG_FWD_ITEM_HOOK_FLAGS(error, item, priv->export, flags); | |
| 613 | else | |
| 614 | NG_FREE_ITEM(item); | |
| 615 | ||
| 616 | return (error); | |
| 617 | } | |
| 618 | ||
| 619 | ||
| 620 | /* Add export record to dgram. */ | |
| 621 | static int | |
| 622 | export_add(item_p item, struct flow_entry *fle) | |
| 623 | { | |
| 624 | struct netflow_v5_export_dgram *dgram = mtod(NGI_M(item), | |
| 625 | struct netflow_v5_export_dgram *); | |
| 626 | struct netflow_v5_header *header = &dgram->header; | |
| 627 | struct netflow_v5_record *rec; | |
| 628 | ||
| 629 | rec = &dgram->r[header->count]; | |
| 630 | header->count ++; | |
| 631 | ||
| 632 | KASSERT(header->count <= NETFLOW_V5_MAX_RECORDS, | |
| 633 | ("ng_netflow: export too big")); | |
| 634 | ||
| 635 | /* Fill in export record. */ | |
| 636 | rec->src_addr = fle->f.r.r_src.s_addr; | |
| 637 | rec->dst_addr = fle->f.r.r_dst.s_addr; | |
| 638 | rec->next_hop = fle->f.next_hop.s_addr; | |
| 639 | rec->i_ifx = htons(fle->f.fle_i_ifx); | |
| 640 | rec->o_ifx = htons(fle->f.fle_o_ifx); | |
| 641 | rec->packets = htonl(fle->f.packets); | |
| 642 | rec->octets = htonl(fle->f.bytes); | |
| 643 | rec->first = htonl(MILLIUPTIME(fle->f.first)); | |
| 644 | rec->last = htonl(MILLIUPTIME(fle->f.last)); | |
| 645 | rec->s_port = fle->f.r.r_sport; | |
| 646 | rec->d_port = fle->f.r.r_dport; | |
| 647 | rec->flags = fle->f.tcp_flags; | |
| 648 | rec->prot = fle->f.r.r_ip_p; | |
| 649 | rec->tos = fle->f.r.r_tos; | |
| 650 | rec->dst_mask = fle->f.dst_mask; | |
| 651 | rec->src_mask = fle->f.src_mask; | |
| 652 | ||
| 653 | /* Not supported fields. */ | |
| 654 | rec->src_as = rec->dst_as = 0; | |
| 655 | ||
| 656 | if (header->count == NETFLOW_V5_MAX_RECORDS) | |
| 657 | return (1); /* end of datagram */ | |
| 658 | else | |
| 659 | return (0); | |
| 660 | } | |
| 661 | ||
| 662 | /* Periodic flow expiry run. */ | |
| 663 | void | |
| 664 | ng_netflow_expire(void *arg) | |
| 665 | { | |
| 666 | struct flow_entry *fle, *fle1; | |
| 667 | struct flow_hash_entry *hsh; | |
| 668 | priv_p priv = (priv_p )arg; | |
| 669 | item_p item = NULL; | |
| 670 | uint32_t used; | |
| 671 | int i; | |
| 672 | ||
| 673 | /* | |
| 674 | * Going through all the cache. | |
| 675 | */ | |
| 676 | for (hsh = priv->hash, i = 0; i < NBUCKETS; hsh++, i++) { | |
| 677 | /* | |
| 678 | * Skip entries, that are already being worked on. | |
| 679 | */ | |
| 680 | if (mtx_trylock(&hsh->mtx) == 0) | |
| 681 | continue; | |
| 682 | ||
| 683 | used = atomic_load_acq_32(&priv->info.nfinfo_used); | |
| 684 | TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) { | |
| 685 | /* | |
| 686 | * Interrupt thread wants this entry! | |
| 687 | * Quick! Quick! Bail out! | |
| 688 | */ | |
| 5a975a3d | 689 | if (mtx_contested(&hsh->mtx)) |
| b06ebda0 MD |
690 | break; |
| 691 | ||
| 692 | /* | |
| 693 | * Don't expire aggressively while hash collision | |
| 694 | * ratio is predicted small. | |
| 695 | */ | |
| 696 | if (used <= (NBUCKETS*2) && !INACTIVE(fle)) | |
| 697 | break; | |
| 698 | ||
| 699 | if ((INACTIVE(fle) && (SMALL(fle) || | |
| 700 | (used > (NBUCKETS*2)))) || AGED(fle)) { | |
| 701 | TAILQ_REMOVE(&hsh->head, fle, fle_hash); | |
| 702 | expire_flow(priv, &item, fle, NG_NOFLAGS); | |
| 703 | used--; | |
| 704 | atomic_add_32(&priv->info.nfinfo_inact_exp, 1); | |
| 705 | } | |
| 706 | } | |
| 707 | mtx_unlock(&hsh->mtx); | |
| 708 | } | |
| 709 | ||
| 710 | if (item != NULL) | |
| 711 | return_export_dgram(priv, item, NG_NOFLAGS); | |
| 712 | ||
| 713 | /* Schedule next expire. */ | |
| 714 | callout_reset(&priv->exp_callout, (1*hz), &ng_netflow_expire, | |
| 715 | (void *)priv); | |
| 716 | } |