From 84a3e25a9678d1a49d321533d3aa0620bbe87a7c Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Fri, 16 Nov 2007 02:45:45 +0000 Subject: [PATCH] CPU localize dummynet(4) step 1/2 CPU ip_dn_cpu CPU n1 +--------------------------+ +---------------------+ | netisr | | | | | | | | | +<---------------dn_descX----[ip_fw_dn_io_ptr()] | | | | | | | [ip_dn_io_ptr(dn_descX)] | | | | | | | | | | | | | | | | | | | [transmit_event() begin | | | | +----------------dn_descY------>[ip_output()] | | : | | | | : | | | | : | +---------------------+ | : | | : | | : | CPU n2 | : | +---------------------+ | : | | | | +----------------dn_descZ------>[ip_input()] | | : | | | | transmit_event() end] | +---------------------+ | | | +--------------------------+ NOTE: transmit_event() is triggered by dummynet systimer on CPU ip_dn_cpu - Add flow id field, which is packet filter independent, in dummynet descriptor, so that we can record the flow id realted information on the originator's stack. In this way, dummynet descriptor and its associated mbuf could be dispatched to different thread for further processing. - Add packet filter private data and private data unreference function pointer in dummynet descriptor. - All of the dummynet descriptor is allocated and filled by packet filter (only ipfw(4) currently), so things like route entry reference is updated on the CPU to which it belongs. - All packets are dispatched to netisr on CPU ip_dn_cpu to be queued on the target flow queue. Netisr on CPU ip_dn_cpu is also where various dummynet events got processed. - DUMMYNET_LOADED is not checked before dispatching a packet; it is checked in netisr before the packet is handed to dummynet. This paves the way for step 2/2. - ip_{output,input}/ether_{demux,output_frame} is no longer called directly in dummynet, they are called after packet dispatched back to the originator CPU, so that ip_input() will be called on the same CPU (as determined by ip_mport()) and things like route entry reference will be updated on the CPU to which it belongs. - If the packet is to be dispatched back to ip_output(), the recorded route entry is checked to make sure that it is still up. - Dummynet discriptor and its associated mbuf is freed on their originator CPU. - Reference count the ipfw(4) rule if it is going to be associated with a dummynet descriptor, so we would not have a dangling rule pointer if the rule was deleted when the dummynet descriptor was in transit state. Suggested-by: dillon@ - If ipfw(4) is compiled and loaded as module, reference count the ipfw(4) module usage, if a rule is associated with a dummynet descriptor. - Add net/dummynet/ip_dummynet_glue.c, which contains various netisr dispatch function. This file will be compiled into kernel if 'options INET' is set, so that we will not have a dangling function pointer in transitting dummynet descriptor. - Add DUMMYNET_MBUF_TAGGED mbuf fw_flag, which may be used later. - Nuke dummynet's dependency on ipfw(4). --- sys/conf/files | 3 +- sys/net/dummynet/Makefile | 3 +- sys/net/dummynet/ip_dummynet.c | 289 ++++----------------- sys/net/dummynet/ip_dummynet.h | 60 ++++- sys/net/dummynet/ip_dummynet_glue.c | 390 ++++++++++++++++++++++++++++ sys/net/if_ethersubr.c | 12 +- sys/net/ipfw/ip_fw2.c | 136 +++++++++- sys/net/ipfw/ip_fw2.h | 19 +- sys/netinet/ip_input.c | 12 +- sys/netinet/ip_output.c | 10 +- sys/sys/mbuf.h | 3 +- 11 files changed, 654 insertions(+), 283 deletions(-) create mode 100644 sys/net/dummynet/ip_dummynet_glue.c diff --git a/sys/conf/files b/sys/conf/files index e3153f5229..d3604c1d07 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1,5 +1,5 @@ # $FreeBSD: src/sys/conf/files,v 1.340.2.137 2003/06/04 17:10:30 sam Exp $ -# $DragonFly: src/sys/conf/files,v 1.192 2007/11/10 19:02:04 swildner Exp $ +# $DragonFly: src/sys/conf/files,v 1.193 2007/11/16 02:45:44 sephe Exp $ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and @@ -903,6 +903,7 @@ netinet/ip_encap.c optional inet netinet/ip_encap.c optional inet6 netinet/ip_divert.c optional ipdivert net/dummynet/ip_dummynet.c optional dummynet +net/dummynet/ip_dummynet_glue.c optional inet netinet/ip_flow.c optional inet net/ipfw/ip_fw2.c optional ipfirewall netinet/ip_icmp.c optional inet diff --git a/sys/net/dummynet/Makefile b/sys/net/dummynet/Makefile index 52ff8d82cf..11c5b34c4f 100644 --- a/sys/net/dummynet/Makefile +++ b/sys/net/dummynet/Makefile @@ -1,8 +1,7 @@ # $FreeBSD: src/sys/modules/dummynet/Makefile,v 1.1.2.2 2003/04/08 10:18:00 maxim Exp $ -# $DragonFly: src/sys/net/dummynet/Makefile,v 1.5 2007/10/29 02:54:15 sephe Exp $ +# $DragonFly: src/sys/net/dummynet/Makefile,v 1.6 2007/11/16 02:45:45 sephe Exp $ KMOD= dummynet SRCS= ip_dummynet.c -KMODDEPS= ipfw .include diff --git a/sys/net/dummynet/ip_dummynet.c b/sys/net/dummynet/ip_dummynet.c index 723969e190..688c00097e 100644 --- a/sys/net/dummynet/ip_dummynet.c +++ b/sys/net/dummynet/ip_dummynet.c @@ -25,13 +25,9 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/netinet/ip_dummynet.c,v 1.24.2.22 2003/05/13 09:31:06 maxim Exp $ - * $DragonFly: src/sys/net/dummynet/ip_dummynet.c,v 1.48 2007/11/07 06:23:37 sephe Exp $ + * $DragonFly: src/sys/net/dummynet/ip_dummynet.c,v 1.49 2007/11/16 02:45:45 sephe Exp $ */ -#ifndef KLD_MODULE -#include "opt_ipfw.h" /* for IPFW2 definition */ -#endif - #ifdef DUMMYNET_DEBUG #define DPRINTF(fmt, ...) kprintf(fmt, __VA_ARGS__) #else @@ -39,8 +35,7 @@ #endif /* - * This module implements IP dummynet, a bandwidth limiter/delay emulator - * used in conjunction with the ipfw package. + * This module implements IP dummynet, a bandwidth limiter/delay emulator. * Description of the data structures used is in ip_dummynet.h * Here you mainly find the following blocks of code: * + variable declarations; @@ -68,15 +63,12 @@ #include #include -#include #include +#include -#include #include -#include #include -#include #include #ifndef DN_CALLOUT_FREQ_MAX @@ -107,6 +99,8 @@ MALLOC_DEFINE(M_DUMMYNET, "dummynet", "dummynet heap"); +extern int ip_dn_cpu; + static dn_key curr_time = 0; /* current simulation time */ static int dn_hash_size = 64; /* default hash size */ static int pipe_expire = 1; /* expire queue if empty */ @@ -145,7 +139,6 @@ static struct dn_flowset_head flowset_table[DN_NR_HASH_MAX]; static struct netmsg dn_netmsg; static struct systimer dn_clock; static int dn_hz = 1000; -static int dn_cpu = 0; /* TODO tunable */ static int sysctl_dn_hz(SYSCTL_HANDLER_ARGS); @@ -190,7 +183,6 @@ static void ready_event_wfq(struct dn_pipe *); static int config_pipe(struct dn_ioc_pipe *); static void dummynet_flush(void); -static void rt_unref(struct rtentry *); static void dummynet_clock(systimer_t, struct intrframe *); static void dummynet(struct netmsg *); @@ -205,19 +197,8 @@ typedef void (*dn_flowset_iter_t)(struct dn_flow_set *, void *); static void dn_iterate_flowset(dn_flowset_iter_t, void *); static ip_dn_io_t dummynet_io; -static ip_dn_ruledel_t dummynet_ruledel; static ip_dn_ctl_t dummynet_ctl; -static void -rt_unref(struct rtentry *rt) -{ - if (rt == NULL) - return; - if (rt->rt_refcnt <= 0) - kprintf("-- warning, refcnt now %ld, decreasing\n", rt->rt_refcnt); - RTFREE(rt); -} - /* * Heap management functions. * @@ -412,62 +393,8 @@ transmit_event(struct dn_pipe *pipe) while ((pkt = TAILQ_FIRST(&pipe->p_queue)) && DN_KEY_LEQ(pkt->output_time, curr_time)) { - struct rtentry *rt; - - /* - * First unlink, then call procedures, since ip_input() can invoke - * ip_output() and viceversa, thus causing nested calls - */ TAILQ_REMOVE(&pipe->p_queue, pkt, dn_next); - - /* - * NOTE: - * 'pkt' should _not_ be touched after calling - * ip_output(), ip_input(), ether_demux() and ether_output_frame() - */ - switch (pkt->dn_dir) { - case DN_TO_IP_OUT: - /* - * 'pkt' will be freed in ip_output, so we keep - * a reference of the 'rtentry' beforehand. - */ - rt = pkt->ro.ro_rt; - ip_output(pkt->dn_m, NULL, NULL, 0, NULL, NULL); - rt_unref(rt); - break; - - case DN_TO_IP_IN : - ip_input(pkt->dn_m); - break; - - case DN_TO_ETH_DEMUX: - { - struct mbuf *m = pkt->dn_m; - struct ether_header *eh; - - if (m->m_len < ETHER_HDR_LEN && - (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) { - kprintf("dummynet: pullup fail, dropping pkt\n"); - break; - } - /* - * Same as ether_input, make eh be a pointer into the mbuf - */ - eh = mtod(m, struct ether_header *); - m_adj(m, ETHER_HDR_LEN); - ether_demux(NULL, eh, m); - } - break; - - case DN_TO_ETH_OUT: - ether_output_frame(pkt->ifp, pkt->dn_m); - break; - - default: - kprintf("dummynet: bad switch %d!\n", pkt->dn_dir); - m_freem(pkt->dn_m); - break; - } + ip_dn_packet_redispatch(pkt); } /* @@ -833,7 +760,7 @@ create_queue(struct dn_flow_set *fs, int i) * so that further searches take less time. */ static struct dn_flow_queue * -find_queue(struct dn_flow_set *fs, struct ipfw_flow_id *id) +find_queue(struct dn_flow_set *fs, struct dn_flow_id *id) { struct dn_flow_queue *q; int i = 0; @@ -844,20 +771,20 @@ find_queue(struct dn_flow_set *fs, struct ipfw_flow_id *id) struct dn_flow_queue *qn; /* First, do the masking */ - id->dst_ip &= fs->flow_mask.dst_ip; - id->src_ip &= fs->flow_mask.src_ip; - id->dst_port &= fs->flow_mask.dst_port; - id->src_port &= fs->flow_mask.src_port; - id->proto &= fs->flow_mask.proto; - id->flags = 0; /* we don't care about this one */ + id->fid_dst_ip &= fs->flow_mask.fid_dst_ip; + id->fid_src_ip &= fs->flow_mask.fid_src_ip; + id->fid_dst_port &= fs->flow_mask.fid_dst_port; + id->fid_src_port &= fs->flow_mask.fid_src_port; + id->fid_proto &= fs->flow_mask.fid_proto; + id->fid_flags = 0; /* we don't care about this one */ /* Then, hash function */ - i = ((id->dst_ip) & 0xffff) ^ - ((id->dst_ip >> 15) & 0xffff) ^ - ((id->src_ip << 1) & 0xffff) ^ - ((id->src_ip >> 16 ) & 0xffff) ^ - (id->dst_port << 1) ^ (id->src_port) ^ - (id->proto); + i = ((id->fid_dst_ip) & 0xffff) ^ + ((id->fid_dst_ip >> 15) & 0xffff) ^ + ((id->fid_src_ip << 1) & 0xffff) ^ + ((id->fid_src_ip >> 16 ) & 0xffff) ^ + (id->fid_dst_port << 1) ^ (id->fid_src_port) ^ + (id->fid_proto); i = i % fs->rq_size; /* @@ -867,12 +794,12 @@ find_queue(struct dn_flow_set *fs, struct ipfw_flow_id *id) searches++; LIST_FOREACH_MUTABLE(q, &fs->rq[i], q_link, qn) { search_steps++; - if (id->dst_ip == q->id.dst_ip && - id->src_ip == q->id.src_ip && - id->dst_port == q->id.dst_port && - id->src_port == q->id.src_port && - id->proto == q->id.proto && - id->flags == q->id.flags) { + if (id->fid_dst_ip == q->id.fid_dst_ip && + id->fid_src_ip == q->id.fid_src_ip && + id->fid_dst_port == q->id.fid_dst_port && + id->fid_src_port == q->id.fid_src_port && + id->fid_proto == q->id.fid_proto && + id->fid_flags == q->id.fid_flags) { break; /* Found */ } else if (pipe_expire && TAILQ_EMPTY(&q->queue) && q->S == q->F + 1) { @@ -1091,7 +1018,7 @@ dn_locate_flowset(int pipe_nr, int is_pipe) * fwa->flags flags from the caller, only used in ip_output */ static int -dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) +dummynet_io(struct mbuf *m) { struct dn_pkt *pkt; struct m_tag *tag; @@ -1099,20 +1026,15 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) struct dn_pipe *pipe; uint64_t len = m->m_pkthdr.len; struct dn_flow_queue *q = NULL; - int is_pipe; - ipfw_insn *cmd; + int is_pipe, pipe_nr; crit_enter(); - cmd = fwa->rule->cmd + fwa->rule->act_ofs; - if (cmd->opcode == O_LOG) - cmd += F_LEN(cmd); - - KASSERT(cmd->opcode == O_PIPE || cmd->opcode == O_QUEUE, - ("Rule is not PIPE or QUEUE, opcode %d\n", cmd->opcode)); + tag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); + pkt = m_tag_data(tag); - is_pipe = (cmd->opcode == O_PIPE); - pipe_nr &= 0xffff; + is_pipe = pkt->dn_flags & DN_FLAGS_IS_PIPE; + pipe_nr = pkt->pipe_nr; /* * This is a dummynet rule, so we expect a O_PIPE or O_QUEUE rule @@ -1133,7 +1055,7 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) } } - q = find_queue(fs, &fwa->f_id); + q = find_queue(fs, &pkt->id); if (q == NULL) goto dropit; /* Cannot allocate queue */ @@ -1157,39 +1079,6 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) if ((fs->flags_fs & DN_IS_RED) && red_drops(fs, q, len)) goto dropit; - /* - * Build and enqueue packet + parameters - */ - tag = m_tag_get(PACKET_TAG_DUMMYNET, sizeof(*pkt), MB_DONTWAIT /* XXX */); - if (tag == NULL) - goto dropit; - m_tag_prepend(m, tag); - - pkt = m_tag_data(tag); - bzero(pkt, sizeof(*pkt)); /* XXX expensive to zero */ - - pkt->rule = fwa->rule; - pkt->dn_m = m; - pkt->dn_dir = dir; - - pkt->ifp = fwa->oif; - if (dir == DN_TO_IP_OUT) { - /* - * We need to copy *ro because for ICMP pkts (and maybe others) - * the caller passed a pointer into the stack; dst might also be - * a pointer into *ro so it needs to be updated. - */ - pkt->ro = *(fwa->ro); - if (fwa->ro->ro_rt) - fwa->ro->ro_rt->rt_refcnt++; - if (fwa->dst == (struct sockaddr_in *)&fwa->ro->ro_dst) { - /* 'dst' points into 'ro' */ - fwa->dst = (struct sockaddr_in *)&(pkt->ro.ro_dst); - } - - pkt->dn_dst = fwa->dst; - pkt->flags = fwa->flags; - } TAILQ_INSERT_TAIL(&q->queue, pkt, dn_next); q->len++; q->len_bytes += len; @@ -1279,20 +1168,9 @@ dropit: crit_exit(); if (q) q->drops++; - m_freem(m); - return ((fs && (fs->flags_fs & DN_NOERROR)) ? 0 : ENOBUFS); + return ENOBUFS; } -/* - * Below, the rt_unref is only needed when (pkt->dn_dir == DN_TO_IP_OUT) - * Doing this would probably save us the initial bzero of dn_pkt - */ -#define DN_FREE_PKT(pkt) \ -do { \ - rt_unref((pkt)->ro.ro_rt); \ - m_freem((pkt)->dn_m); \ -} while (0) - /* * Dispose all packets and flow_queues on a flow_set. * If all=1, also remove red lookup table and other storage, @@ -1315,7 +1193,7 @@ purge_flow_set(struct dn_flow_set *fs, int all) while ((pkt = TAILQ_FIRST(&q->queue)) != NULL) { TAILQ_REMOVE(&q->queue, pkt, dn_next); - DN_FREE_PKT(pkt); + ip_dn_packet_free(pkt); } LIST_REMOVE(q, q_link); @@ -1366,7 +1244,7 @@ purge_pipe(struct dn_pipe *pipe) while ((pkt = TAILQ_FIRST(&pipe->p_queue)) != NULL) { TAILQ_REMOVE(&pipe->p_queue, pkt, dn_next); - DN_FREE_PKT(pkt); + ip_dn_packet_free(pkt); } heap_free(&pipe->scheduler_heap); @@ -1375,8 +1253,7 @@ purge_pipe(struct dn_pipe *pipe) } /* - * Delete all pipes and heaps returning memory. Must also - * remove references from all ipfw rules to all pipes. + * Delete all pipes and heaps returning memory. */ static void dummynet_flush(void) @@ -1435,63 +1312,6 @@ dummynet_flush(void) } } - -extern struct ip_fw *ip_fw_default_rule; - -static void -dn_rule_delete_fs(struct dn_flow_set *fs, void *r) -{ - int i; - - for (i = 0; i <= fs->rq_size; i++) { /* Last one is ovflow */ - struct dn_flow_queue *q; - - LIST_FOREACH(q, &fs->rq[i], q_link) { - struct dn_pkt *pkt; - - TAILQ_FOREACH(pkt, &q->queue, dn_next) { - if (pkt->rule == r) - pkt->rule = ip_fw_default_rule; - } - } - } -} - -static void -dn_ruledel_pipe_cb(struct dn_pipe *pipe, void *rule) -{ - struct dn_pkt *pkt; - - dn_rule_delete_fs(&pipe->fs, rule); - - TAILQ_FOREACH(pkt, &pipe->p_queue, dn_next) { - if (pkt->rule == rule) - pkt->rule = ip_fw_default_rule; - } -} - -static void -dn_ruledel_fs_cb(struct dn_flow_set *fs, void *rule) -{ - dn_rule_delete_fs(fs, rule); -} - -/* - * When a firewall rule is deleted, scan all queues and remove the flow-id - * from packets matching this rule. - */ -void -dummynet_ruledel(void *r) -{ - /* - * If the rule references a queue (dn_flow_set), then scan - * the flow set, otherwise scan pipes. Should do either, but doing - * both does not harm. - */ - dn_iterate_flowset(dn_ruledel_fs_cb, r); - dn_iterate_pipe(dn_ruledel_pipe_cb, r); -} - /* * setup RED parameters */ @@ -1579,14 +1399,14 @@ alloc_hash(struct dn_flow_set *x, const struct dn_ioc_flowset *ioc_fs) } static void -set_flowid_parms(struct ipfw_flow_id *id, const struct dn_ioc_flowid *ioc_id) +set_flowid_parms(struct dn_flow_id *id, const struct dn_ioc_flowid *ioc_id) { - id->dst_ip = ioc_id->u.ip.dst_ip; - id->src_ip = ioc_id->u.ip.src_ip; - id->dst_port = ioc_id->u.ip.dst_port; - id->src_port = ioc_id->u.ip.src_port; - id->proto = ioc_id->u.ip.proto; - id->flags = ioc_id->u.ip.flags; + id->fid_dst_ip = ioc_id->u.ip.dst_ip; + id->fid_src_ip = ioc_id->u.ip.src_ip; + id->fid_dst_port = ioc_id->u.ip.dst_port; + id->fid_src_port = ioc_id->u.ip.src_port; + id->fid_proto = ioc_id->u.ip.proto; + id->fid_flags = ioc_id->u.ip.flags; } static void @@ -1860,15 +1680,15 @@ back: * helper function used to copy data from kernel in DUMMYNET_GET */ static void -dn_copy_flowid(const struct ipfw_flow_id *id, struct dn_ioc_flowid *ioc_id) +dn_copy_flowid(const struct dn_flow_id *id, struct dn_ioc_flowid *ioc_id) { ioc_id->type = ETHERTYPE_IP; - ioc_id->u.ip.dst_ip = id->dst_ip; - ioc_id->u.ip.src_ip = id->src_ip; - ioc_id->u.ip.dst_port = id->dst_port; - ioc_id->u.ip.src_port = id->src_port; - ioc_id->u.ip.proto = id->proto; - ioc_id->u.ip.flags = id->flags; + ioc_id->u.ip.dst_ip = id->fid_dst_ip; + ioc_id->u.ip.src_ip = id->fid_src_ip; + ioc_id->u.ip.dst_port = id->fid_dst_port; + ioc_id->u.ip.src_port = id->fid_src_port; + ioc_id->u.ip.proto = id->fid_proto; + ioc_id->u.ip.flags = id->fid_flags; } static void * @@ -2081,7 +1901,7 @@ dummynet_ctl(struct sockopt *sopt) static void dummynet_clock(systimer_t info __unused, struct intrframe *frame __unused) { - KASSERT(mycpu->gd_cpuid == dn_cpu, + KASSERT(mycpu->gd_cpuid == ip_dn_cpu, ("systimer comes on a different cpu!\n")); crit_enter(); @@ -2152,12 +1972,11 @@ ip_dn_init(void) ip_dn_ctl_ptr = dummynet_ctl; ip_dn_io_ptr = dummynet_io; - ip_dn_ruledel_ptr = dummynet_ruledel; netmsg_init(&dn_netmsg, &netisr_adone_rport, 0, dummynet); netmsg_init(&smsg, &curthread->td_msgport, 0, ip_dn_register_systimer); - port = cpu_portfn(dn_cpu); + port = cpu_portfn(ip_dn_cpu); lwkt_domsg(port, &smsg.nm_lmsg, 0); } @@ -2168,14 +1987,13 @@ ip_dn_stop(void) lwkt_port_t port; netmsg_init(&smsg, &curthread->td_msgport, 0, ip_dn_deregister_systimer); - port = cpu_portfn(dn_cpu); + port = cpu_portfn(ip_dn_cpu); lwkt_domsg(port, &smsg.nm_lmsg, 0); dummynet_flush(); ip_dn_ctl_ptr = NULL; ip_dn_io_ptr = NULL; - ip_dn_ruledel_ptr = NULL; netmsg_service_sync(); } @@ -2218,5 +2036,4 @@ static moduledata_t dummynet_mod = { NULL }; DECLARE_MODULE(dummynet, dummynet_mod, SI_SUB_PROTO_END, SI_ORDER_ANY); -MODULE_DEPEND(dummynet, ipfw, 1, 1, 1); MODULE_VERSION(dummynet, 1); diff --git a/sys/net/dummynet/ip_dummynet.h b/sys/net/dummynet/ip_dummynet.h index 5a084bfedc..c0655950f6 100644 --- a/sys/net/dummynet/ip_dummynet.h +++ b/sys/net/dummynet/ip_dummynet.h @@ -25,7 +25,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/netinet/ip_dummynet.h,v 1.10.2.9 2003/05/13 09:31:06 maxim Exp $ - * $DragonFly: src/sys/net/dummynet/ip_dummynet.h,v 1.16 2007/11/07 06:23:37 sephe Exp $ + * $DragonFly: src/sys/net/dummynet/ip_dummynet.h,v 1.17 2007/11/16 02:45:45 sephe Exp $ */ #ifndef _IP_DUMMYNET_H @@ -92,6 +92,29 @@ struct dn_heap { struct dn_heap_entry *p; /* really an array of "size" entries */ }; +struct dn_flow_id { + uint16_t fid_type; /* ETHERTYPE_ */ + uint16_t pad; + union { + struct { + uint32_t dst_ip; + uint32_t src_ip; + uint16_t dst_port; + uint16_t src_port; + uint8_t proto; + uint8_t flags; + } inet; + } fid_u; +#define fid_dst_ip fid_u.inet.dst_ip +#define fid_src_ip fid_u.inet.src_ip +#define fid_dst_port fid_u.inet.dst_port +#define fid_src_port fid_u.inet.src_port +#define fid_proto fid_u.inet.proto +#define fid_flags fid_u.inet.flags +}; + +typedef void (*ip_dn_unref_priv_t)(void *); + /* * struct dn_pkt identifies a packet in the dummynet queue, but is also used * to tag packets passed back to the various destinations (ip_input(), @@ -103,18 +126,29 @@ struct dn_pkt { struct mbuf *dn_m; TAILQ_ENTRY(dn_pkt) dn_next; - struct ip_fw *rule; /* matching rule */ - int dn_dir; /* action when packet comes out. */ -#define DN_TO_IP_OUT 1 -#define DN_TO_IP_IN 2 -#define DN_TO_ETH_DEMUX 4 -#define DN_TO_ETH_OUT 5 + void *dn_priv; + ip_dn_unref_priv_t dn_unref_priv; + + uint32_t dn_flags; /* action when packet comes out. */ +#define DN_FLAGS_IS_PIPE 0x10 +#define DN_FLAGS_DIR_MASK 0x0f +#define DN_TO_IP_OUT 1 +#define DN_TO_IP_IN 2 +#define DN_TO_ETH_DEMUX 4 +#define DN_TO_ETH_OUT 5 +#define DN_TO_MAX 6 dn_key output_time; /* when the pkt is due for delivery */ struct ifnet *ifp; /* interface, for ip_output */ struct sockaddr_in *dn_dst; struct route ro; /* route, for ip_output. MUST COPY */ int flags; /* flags, for ip_output (IPv6 ?) */ + + u_short pipe_nr; /* pipe/flow_set number */ + u_short pad; + + struct dn_flow_id id; /* flow id */ + int cpuid; /* target cpu, for IP_OUT/ETH_DEMUX/ETH_OUT */ }; TAILQ_HEAD(dn_pkt_queue, dn_pkt); @@ -180,7 +214,7 @@ TAILQ_HEAD(dn_pkt_queue, dn_pkt); * flow arrives. */ struct dn_flow_queue { - struct ipfw_flow_id id; + struct dn_flow_id id; LIST_ENTRY(dn_flow_queue) q_link; struct dn_pkt_queue queue; /* queue of packets */ @@ -237,7 +271,7 @@ struct dn_flow_set { int qsize; /* queue size in slots or bytes */ int plr; /* pkt loss rate (2^31-1 means 100%) */ - struct ipfw_flow_id flow_mask; + struct dn_flow_id flow_mask; /* hash table of queues onto this flow_set */ int rq_size; /* number of slots */ @@ -301,13 +335,15 @@ struct dn_pipe { /* a pipe */ LIST_HEAD(dn_pipe_head, dn_pipe); typedef int ip_dn_ctl_t(struct sockopt *); /* raw_ip.c */ -typedef void ip_dn_ruledel_t(void *); /* ip_fw2.c */ -typedef int ip_dn_io_t(struct mbuf *, int, int, struct ip_fw_args *); +typedef int ip_dn_io_t(struct mbuf *); extern ip_dn_ctl_t *ip_dn_ctl_ptr; -extern ip_dn_ruledel_t *ip_dn_ruledel_ptr; extern ip_dn_io_t *ip_dn_io_ptr; +void ip_dn_queue(struct mbuf *); +void ip_dn_packet_free(struct dn_pkt *); +void ip_dn_packet_redispatch(struct dn_pkt *); + #define DUMMYNET_LOADED (ip_dn_io_ptr != NULL) #endif /* _KERNEL */ diff --git a/sys/net/dummynet/ip_dummynet_glue.c b/sys/net/dummynet/ip_dummynet_glue.c new file mode 100644 index 0000000000..441fcb0584 --- /dev/null +++ b/sys/net/dummynet/ip_dummynet_glue.c @@ -0,0 +1,390 @@ +/* + * Copyright (c) 2007 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Sepherosa Ziehau + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/sys/net/dummynet/ip_dummynet_glue.c,v 1.1 2007/11/16 02:45:45 sephe Exp $ + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +static void ip_dn_ether_output(struct netmsg *); +static void ip_dn_ether_demux(struct netmsg *); +static void ip_dn_ip_input(struct netmsg *); +static void ip_dn_ip_output(struct netmsg *); + +static void ip_dn_freepkt_dispatch(struct netmsg *); +static void ip_dn_dispatch(struct netmsg *); + +static void ip_dn_freepkt(struct dn_pkt *); + +ip_dn_io_t *ip_dn_io_ptr; +int ip_dn_cpu = 0; + +void +ip_dn_queue(struct mbuf *m) +{ + struct netmsg_packet *nmp; + lwkt_port_t port; + + KASSERT(m->m_type != MT_TAG, ("mbuf contains old style tag!\n")); + + nmp = &m->m_hdr.mh_netmsg; + netmsg_init(&nmp->nm_netmsg, &netisr_apanic_rport, 0, + ip_dn_dispatch); + nmp->nm_packet = m; + + port = cpu_portfn(ip_dn_cpu); + lwkt_sendmsg(port, &nmp->nm_netmsg.nm_lmsg); +} + +void +ip_dn_packet_free(struct dn_pkt *pkt) +{ + struct netmsg_packet *nmp; + lwkt_port_t port; + struct mbuf *m = pkt->dn_m; + + KASSERT(m->m_type != MT_TAG, ("mbuf contains old style tag!\n")); + + if (pkt->cpuid == mycpuid) { + ip_dn_freepkt(pkt); + return; + } + + nmp = &m->m_hdr.mh_netmsg; + netmsg_init(&nmp->nm_netmsg, &netisr_apanic_rport, 0, + ip_dn_freepkt_dispatch); + nmp->nm_packet = m; + + port = cpu_portfn(pkt->cpuid); + lwkt_sendmsg(port, &nmp->nm_netmsg.nm_lmsg); +} + +void +ip_dn_packet_redispatch(struct dn_pkt *pkt) +{ + static const netisr_fn_t dispatches[DN_TO_MAX] = { + [DN_TO_IP_OUT] = ip_dn_ip_output, + [DN_TO_IP_IN] = ip_dn_ip_input, + [DN_TO_ETH_DEMUX] = ip_dn_ether_demux, + [DN_TO_ETH_OUT] = ip_dn_ether_output + }; + + struct netmsg_packet *nmp; + struct mbuf *m; + netisr_fn_t dispatch; + lwkt_port_t port; + int dir; + + dir = (pkt->dn_flags & DN_FLAGS_DIR_MASK); + KASSERT(dir < DN_TO_MAX, + ("unknown dummynet redispatch dir %d\n", dir)); + + dispatch = dispatches[dir]; + KASSERT(dispatch != NULL, + ("unsupported dummynet redispatch dir %d\n", dir)); + + m = pkt->dn_m; + KASSERT(m->m_type != MT_TAG, ("mbuf contains old style tag!\n")); + + nmp = &m->m_hdr.mh_netmsg; + netmsg_init(&nmp->nm_netmsg, &netisr_apanic_rport, 0, dispatch); + nmp->nm_packet = m; + + port = cpu_portfn(pkt->cpuid); + lwkt_sendmsg(port, &nmp->nm_netmsg.nm_lmsg); +} + +static void +ip_dn_freepkt(struct dn_pkt *pkt) +{ + struct rtentry *rt = pkt->ro.ro_rt; + + /* Unreference route entry */ + if (rt != NULL) { + if (rt->rt_refcnt <= 0) { /* XXX assert? */ + kprintf("-- warning, refcnt now %ld, decreasing\n", + rt->rt_refcnt); + } + RTFREE(rt); + } + + /* Unreference packet private data */ + if (pkt->dn_unref_priv) + pkt->dn_unref_priv(pkt->dn_priv); + + /* Free the parent mbuf, this will free 'pkt' as well */ + m_freem(pkt->dn_m); +} + +static void +ip_dn_freepkt_dispatch(struct netmsg *nmsg) +{ + struct netmsg_packet *nmp; + struct mbuf *m; + struct m_tag *mtag; + struct dn_pkt *pkt; + + nmp = (struct netmsg_packet *)nmsg; + m = nmp->nm_packet; + + mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); + pkt = m_tag_data(mtag); + + KASSERT(pkt->cpuid == mycpuid, + ("%s: dummynet packet was delivered to wrong cpu! " + "target cpuid %d, mycpuid %d\n", __func__, + pkt->cpuid, mycpuid)); + + ip_dn_freepkt(pkt); +} + +static void +ip_dn_dispatch(struct netmsg *nmsg) +{ + struct netmsg_packet *nmp; + struct mbuf *m; + struct m_tag *mtag; + struct dn_pkt *pkt; + + KASSERT(ip_dn_cpu == mycpuid, + ("%s: dummynet packet was delivered to wrong cpu! " + "dummynet cpuid %d, mycpuid %d\n", __func__, + ip_dn_cpu, mycpuid)); + + nmp = (struct netmsg_packet *)nmsg; + m = nmp->nm_packet; + + if (DUMMYNET_LOADED) { + if (ip_dn_io_ptr(m) == 0) + return; + } + + /* + * ip_dn_io_ptr() failed or dummynet(4) is not loaded + */ + mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); + pkt = m_tag_data(mtag); + ip_dn_packet_free(pkt); +} + +static void +ip_dn_ip_output(struct netmsg *nmsg) +{ + struct netmsg_packet *nmp; + struct mbuf *m; + struct m_tag *mtag; + struct dn_pkt *pkt; + struct rtentry *rt; + ip_dn_unref_priv_t unref_priv; + void *priv; + + nmp = (struct netmsg_packet *)nmsg; + m = nmp->nm_packet; + + mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); + KKASSERT(mtag != NULL); + KKASSERT(m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED); + + pkt = m_tag_data(mtag); + KASSERT(pkt->cpuid == mycpuid, + ("%s: dummynet packet was delivered to wrong cpu! " + "target cpuid %d, mycpuid %d\n", __func__, + pkt->cpuid, mycpuid)); + KASSERT((pkt->dn_flags & DN_FLAGS_DIR_MASK) == DN_TO_IP_OUT, + ("wrong direction %d, should be %d\n", + (pkt->dn_flags & DN_FLAGS_DIR_MASK), DN_TO_IP_OUT)); + + priv = pkt->dn_priv; + unref_priv = pkt->dn_unref_priv; + rt = pkt->ro.ro_rt; + + if (rt != NULL && !(rt->rt_flags & RTF_UP)) { + /* + * Recorded rtentry is gone, when the packet + * was on delay line. + */ + ip_dn_freepkt(pkt); + return; + } + + ip_output(pkt->dn_m, NULL, NULL, 0, NULL, NULL); + + if (rt != NULL) { + if (rt->rt_refcnt <= 0) { /* XXX assert? */ + kprintf("-- warning, refcnt now %ld, decreasing\n", + rt->rt_refcnt); + } + RTFREE(rt); + } + if (unref_priv) + unref_priv(priv); +} + +static void +ip_dn_ip_input(struct netmsg *nmsg) +{ + struct netmsg_packet *nmp; + struct mbuf *m; + struct m_tag *mtag; + struct dn_pkt *pkt; + ip_dn_unref_priv_t unref_priv; + void *priv; + + nmp = (struct netmsg_packet *)nmsg; + m = nmp->nm_packet; + + mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); + KKASSERT(mtag != NULL); + KKASSERT(m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED); + + pkt = m_tag_data(mtag); + KASSERT(pkt->cpuid == mycpuid, + ("%s: dummynet packet was delivered to wrong cpu! " + "target cpuid %d, mycpuid %d\n", __func__, + pkt->cpuid, mycpuid)); + KASSERT(pkt->ro.ro_rt == NULL, + ("route entry is not NULL for ip_input\n")); + KASSERT((pkt->dn_flags & DN_FLAGS_DIR_MASK) == DN_TO_IP_IN, + ("wrong direction %d, should be %d\n", + (pkt->dn_flags & DN_FLAGS_DIR_MASK), DN_TO_IP_IN)); + + priv = pkt->dn_priv; + unref_priv = pkt->dn_unref_priv; + + ip_input(m); + + if (unref_priv) + unref_priv(priv); +} + +static void +ip_dn_ether_demux(struct netmsg *nmsg) +{ + struct netmsg_packet *nmp; + struct mbuf *m; + struct m_tag *mtag; + struct dn_pkt *pkt; + struct ether_header *eh; + ip_dn_unref_priv_t unref_priv; + void *priv; + + nmp = (struct netmsg_packet *)nmsg; + m = nmp->nm_packet; + + mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); + KKASSERT(mtag != NULL); + KKASSERT(m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED); + + pkt = m_tag_data(mtag); + KASSERT(pkt->cpuid == mycpuid, + ("%s: dummynet packet was delivered to wrong cpu! " + "target cpuid %d, mycpuid %d\n", __func__, + pkt->cpuid, mycpuid)); + KASSERT(pkt->ro.ro_rt == NULL, + ("route entry is not NULL for ether_demux\n")); + KASSERT((pkt->dn_flags & DN_FLAGS_DIR_MASK) == DN_TO_ETH_DEMUX, + ("wrong direction %d, should be %d\n", + (pkt->dn_flags & DN_FLAGS_DIR_MASK), DN_TO_ETH_DEMUX)); + + priv = pkt->dn_priv; + unref_priv = pkt->dn_unref_priv; + + if (m->m_len < ETHER_HDR_LEN && + (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) { + kprintf("%s: pullup fail, dropping pkt\n", __func__); + goto back; + } + + /* + * Same as ether_input, make eh be a pointer into the mbuf + */ + eh = mtod(m, struct ether_header *); + m_adj(m, ETHER_HDR_LEN); + ether_demux(NULL, eh, m); +back: + if (unref_priv) + unref_priv(priv); +} + +static void +ip_dn_ether_output(struct netmsg *nmsg) +{ + struct netmsg_packet *nmp; + struct mbuf *m; + struct m_tag *mtag; + struct dn_pkt *pkt; + ip_dn_unref_priv_t unref_priv; + void *priv; + + nmp = (struct netmsg_packet *)nmsg; + m = nmp->nm_packet; + + mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); + KKASSERT(mtag != NULL); + KKASSERT(m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED); + + pkt = m_tag_data(mtag); + KASSERT(pkt->cpuid == mycpuid, + ("%s: dummynet packet was delivered to wrong cpu! " + "target cpuid %d, mycpuid %d\n", __func__, + pkt->cpuid, mycpuid)); + KASSERT(pkt->ro.ro_rt == NULL, + ("route entry is not NULL for ether_output_frame\n")); + KASSERT((pkt->dn_flags & DN_FLAGS_DIR_MASK) == DN_TO_ETH_OUT, + ("wrong direction %d, should be %d\n", + (pkt->dn_flags & DN_FLAGS_DIR_MASK), DN_TO_ETH_OUT)); + + priv = pkt->dn_priv; + unref_priv = pkt->dn_unref_priv; + + ether_output_frame(pkt->ifp, m); + + if (unref_priv) + unref_priv(priv); +} diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index fa64a7d6a3..99c5012f1f 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -32,7 +32,7 @@ * * @(#)if_ethersubr.c 8.1 (Berkeley) 6/10/93 * $FreeBSD: src/sys/net/if_ethersubr.c,v 1.70.2.33 2003/04/28 15:45:53 archie Exp $ - * $DragonFly: src/sys/net/if_ethersubr.c,v 1.50 2007/10/25 13:13:18 sephe Exp $ + * $DragonFly: src/sys/net/if_ethersubr.c,v 1.51 2007/11/16 02:45:44 sephe Exp $ */ #include "opt_atalk.h" @@ -393,7 +393,7 @@ ether_output_frame(struct ifnet *ifp, struct mbuf *m) /* Extract info from dummynet tag */ mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); if (mtag != NULL) { - rule = ((struct dn_pkt *)m_tag_data(mtag))->rule; + rule = ((struct dn_pkt *)m_tag_data(mtag))->dn_priv; m_tag_delete(m, mtag); mtag = NULL; @@ -491,7 +491,7 @@ ether_ipfw_chk( if (i == 0) /* a PASS rule. */ return TRUE; - if (DUMMYNET_LOADED && (i & IP_FW_PORT_DYNT_FLAG)) { + if (i & IP_FW_PORT_DYNT_FLAG) { /* * Pass the pkt to dummynet, which consumes it. * If shared, make a copy and keep the original. @@ -521,8 +521,8 @@ ether_ipfw_chk( bcopy(&save_eh, mtod(m, struct ether_header *), ETHER_HDR_LEN); } - ip_dn_io_ptr(m, (i & 0xffff), - dst ? DN_TO_ETH_OUT: DN_TO_ETH_DEMUX, &args); + ip_fw_dn_io_ptr(m, (i & 0xffff), + dst ? DN_TO_ETH_OUT: DN_TO_ETH_DEMUX, &args); return FALSE; } /* @@ -644,7 +644,7 @@ ether_demux(struct ifnet *ifp, struct ether_header *eh0, struct mbuf *m) /* Extract info from dummynet tag */ mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); if (mtag != NULL) { - rule = ((struct dn_pkt *)m_tag_data(mtag))->rule; + rule = ((struct dn_pkt *)m_tag_data(mtag))->dn_priv; KKASSERT(ifp == NULL); ifp = m->m_pkthdr.rcvif; diff --git a/sys/net/ipfw/ip_fw2.c b/sys/net/ipfw/ip_fw2.c index f83202f9f8..ad2ae6e0a2 100644 --- a/sys/net/ipfw/ip_fw2.c +++ b/sys/net/ipfw/ip_fw2.c @@ -23,7 +23,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/netinet/ip_fw2.c,v 1.6.2.12 2003/04/08 10:42:32 maxim Exp $ - * $DragonFly: src/sys/net/ipfw/ip_fw2.c,v 1.37 2007/11/06 14:42:51 sephe Exp $ + * $DragonFly: src/sys/net/ipfw/ip_fw2.c,v 1.38 2007/11/16 02:45:45 sephe Exp $ */ #define DEB(x) @@ -33,7 +33,7 @@ * Implement IP packet firewall (new version) */ -#if !defined(KLD_MODULE) +#ifndef KLD_MODULE #include "opt_ipfw.h" #include "opt_ipdn.h" #include "opt_ipdivert.h" @@ -87,6 +87,10 @@ static uint32_t set_disable; static int fw_verbose; static int verbose_limit; +#ifdef KLD_MODULE +static int ipfw_refcnt; +#endif + static struct callout ipfw_timeout_h; #define IPFW_DEFAULT_RULE 65535 @@ -221,7 +225,35 @@ struct ip_fw *ip_fw_default_rule; static ip_fw_chk_t ipfw_chk; -ip_dn_ruledel_t *ip_dn_ruledel_ptr = NULL; /* hook into dummynet */ +static __inline int +ipfw_free_rule(struct ip_fw *rule) +{ + KASSERT(rule->refcnt > 0, ("invalid refcnt %u\n", rule->refcnt)); + atomic_subtract_int(&rule->refcnt, 1); + if (atomic_cmpset_int(&rule->refcnt, 0, 1)) { + kfree(rule, M_IPFW); + return 1; + } + return 0; +} + +static void +ipfw_unref_rule(void *priv) +{ + ipfw_free_rule(priv); +#ifdef KLD_MODULE + atomic_subtract_int(&ipfw_refcnt, 1); +#endif +} + +static __inline void +ipfw_ref_rule(struct ip_fw *rule) +{ +#ifdef KLD_MODULE + atomic_add_int(&ipfw_refcnt, 1); +#endif + atomic_add_int(&rule->refcnt, 1); +} /* * This macro maps an ip pointer into a layer3 header pointer of type T @@ -1392,6 +1424,10 @@ after_ip_checks: if (fw_one_pass) return 0; + /* This rule was deleted */ + if (args->rule->rule_flags & IPFW_RULE_F_INVALID) + return IP_FW_PORT_DENY_FLAG; + f = args->rule->next_rule; if (f == NULL) f = lookup_next_rule(args->rule); @@ -1950,6 +1986,75 @@ pullup_failed: return(IP_FW_PORT_DENY_FLAG); } +static void +ipfw_dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) +{ + struct m_tag *mtag; + struct dn_pkt *pkt; + ipfw_insn *cmd; + const struct ipfw_flow_id *id; + struct dn_flow_id *fid; + + mtag = m_tag_get(PACKET_TAG_DUMMYNET, sizeof(*pkt), MB_DONTWAIT); + if (mtag == NULL) { + m_freem(m); + return; + } + m_tag_prepend(m, mtag); + + pkt = m_tag_data(mtag); + bzero(pkt, sizeof(*pkt)); + + cmd = fwa->rule->cmd + fwa->rule->act_ofs; + if (cmd->opcode == O_LOG) + cmd += F_LEN(cmd); + KASSERT(cmd->opcode == O_PIPE || cmd->opcode == O_QUEUE, + ("Rule is not PIPE or QUEUE, opcode %d\n", cmd->opcode)); + + pkt->dn_m = m; + pkt->dn_flags = (dir & DN_FLAGS_DIR_MASK); + pkt->ifp = fwa->oif; + pkt->cpuid = mycpu->gd_cpuid; + pkt->pipe_nr = pipe_nr; + + id = &fwa->f_id; + fid = &pkt->id; + fid->fid_dst_ip = id->dst_ip; + fid->fid_src_ip = id->src_ip; + fid->fid_dst_port = id->dst_port; + fid->fid_src_port = id->src_port; + fid->fid_proto = id->proto; + fid->fid_flags = id->flags; + + ipfw_ref_rule(fwa->rule); + pkt->dn_priv = fwa->rule; + pkt->dn_unref_priv = ipfw_unref_rule; + + if (cmd->opcode == O_PIPE) + pkt->dn_flags |= DN_FLAGS_IS_PIPE; + + if (dir == DN_TO_IP_OUT) { + /* + * We need to copy *ro because for ICMP pkts (and maybe + * others) the caller passed a pointer into the stack; + * dst might also be a pointer into *ro so it needs to + * be updated. + */ + pkt->ro = *(fwa->ro); + if (fwa->ro->ro_rt) + fwa->ro->ro_rt->rt_refcnt++; + if (fwa->dst == (struct sockaddr_in *)&fwa->ro->ro_dst) { + /* 'dst' points into 'ro' */ + fwa->dst = (struct sockaddr_in *)&(pkt->ro.ro_dst); + } + pkt->dn_dst = fwa->dst; + pkt->flags = fwa->flags; + } + + m->m_pkthdr.fw_flags |= DUMMYNET_MBUF_TAGGED; + ip_dn_queue(m); +} + /* * When a rule is added/deleted, clear the next_rule pointers in all rules. * These will be reconstructed on the fly as packets are matched. @@ -1999,6 +2104,8 @@ ipfw_create_rule(const struct ipfw_ioc_rule *ioc_rule) bcopy(ioc_rule->cmd, rule->cmd, rule->cmd_len * 4 /* XXX */); + rule->refcnt = 1; + return rule; } @@ -2088,9 +2195,13 @@ delete_rule(struct ip_fw **head, struct ip_fw *prev, struct ip_fw *rule) prev->next = n; ipfw_dec_static_count(rule); - if (DUMMYNET_LOADED) - ip_dn_ruledel_ptr(rule); - kfree(rule, M_IPFW); + /* Mark the rule as invalid */ + rule->rule_flags |= IPFW_RULE_F_INVALID; + rule->next_rule = NULL; + + /* Try to free this rule */ + ipfw_free_rule(rule); + return n; } @@ -2780,6 +2891,8 @@ ipfw_init_default_rule(struct ip_fw **head) def_rule->cmd[0].opcode = O_DENY; #endif + def_rule->refcnt = 1; + *head = def_rule; ipfw_inc_static_count(def_rule); @@ -2792,6 +2905,7 @@ ipfw_init(void) { ip_fw_chk_ptr = ipfw_chk; ip_fw_ctl_ptr = ipfw_ctl; + ip_fw_dn_io_ptr = ipfw_dummynet_io; layer3_chain = NULL; ipfw_init_default_rule(&layer3_chain); @@ -2842,14 +2956,20 @@ ipfw_modevent(module_t mod, int type, void *unused) break; case MOD_UNLOAD: -#if !defined(KLD_MODULE) +#ifndef KLD_MODULE kprintf("ipfw statically compiled, cannot unload\n"); err = EBUSY; #else - crit_enter(); + if (ipfw_refcnt != 0) { + err = EBUSY; + break; + } + + crit_enter(); callout_stop(&ipfw_timeout_h); ip_fw_chk_ptr = NULL; ip_fw_ctl_ptr = NULL; + ip_fw_dn_io_ptr = NULL; free_chain(&layer3_chain, 1 /* kill default rule */); crit_exit(); kprintf("IP firewall unloaded\n"); diff --git a/sys/net/ipfw/ip_fw2.h b/sys/net/ipfw/ip_fw2.h index cd48936f56..5a9337040d 100644 --- a/sys/net/ipfw/ip_fw2.h +++ b/sys/net/ipfw/ip_fw2.h @@ -23,7 +23,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/netinet/ip_fw2.h,v 1.1.2.2 2002/08/16 11:03:11 luigi Exp $ - * $DragonFly: src/sys/net/ipfw/ip_fw2.h,v 1.7 2007/11/06 14:42:51 sephe Exp $ + * $DragonFly: src/sys/net/ipfw/ip_fw2.h,v 1.8 2007/11/16 02:45:45 sephe Exp $ */ #ifndef _IPFW2_H @@ -290,9 +290,14 @@ struct ip_fw { uint64_t bcnt; /* Byte counter */ uint32_t timestamp; /* tv_sec of last match */ + uint32_t refcnt; /* Ref count for transit pkts */ + uint32_t rule_flags; /* IPFW_RULE_F_ */ + ipfw_insn cmd[1]; /* storage for commands */ }; +#define IPFW_RULE_F_INVALID 0x1 + #define RULESIZE(rule) (sizeof(struct ip_fw) + (rule)->cmd_len * 4 - 4) /* @@ -368,10 +373,14 @@ struct ip_fw_args { struct sockopt; struct dn_flow_set; -typedef int ip_fw_chk_t (struct ip_fw_args *args); -typedef int ip_fw_ctl_t (struct sockopt *); -extern ip_fw_chk_t *ip_fw_chk_ptr; -extern ip_fw_ctl_t *ip_fw_ctl_ptr; +typedef int ip_fw_chk_t(struct ip_fw_args *); +typedef int ip_fw_ctl_t(struct sockopt *); +typedef void ip_fw_dn_io_t(struct mbuf *, int, int, struct ip_fw_args *); + +extern ip_fw_chk_t *ip_fw_chk_ptr; +extern ip_fw_ctl_t *ip_fw_ctl_ptr; +extern ip_fw_dn_io_t *ip_fw_dn_io_ptr; + extern int fw_one_pass; extern int fw_enable; #define IPFW_LOADED (ip_fw_chk_ptr != NULL) diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index 524251466e..6f28757215 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -65,7 +65,7 @@ * * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 * $FreeBSD: src/sys/netinet/ip_input.c,v 1.130.2.52 2003/03/07 07:01:28 silby Exp $ - * $DragonFly: src/sys/netinet/ip_input.c,v 1.71 2007/10/25 13:13:18 sephe Exp $ + * $DragonFly: src/sys/netinet/ip_input.c,v 1.72 2007/11/16 02:45:45 sephe Exp $ */ #define _IP_VHL @@ -268,12 +268,10 @@ static const int ipstealth = 0; /* Firewall hooks */ ip_fw_chk_t *ip_fw_chk_ptr; +ip_fw_dn_io_t *ip_fw_dn_io_ptr; int fw_enable = 1; int fw_one_pass = 1; -/* Dummynet hooks */ -ip_dn_io_t *ip_dn_io_ptr; - struct pfil_head inet_pfil_hook; /* @@ -484,7 +482,7 @@ ip_input(struct mbuf *m) /* Extract info from dummynet tag */ mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); if (mtag != NULL) { - args.rule = ((struct dn_pkt *)m_tag_data(mtag))->rule; + args.rule = ((struct dn_pkt *)m_tag_data(mtag))->dn_priv; m_tag_delete(m, mtag); mtag = NULL; @@ -630,9 +628,9 @@ iphack: ip = mtod(m, struct ip *); /* just in case m changed */ if (i == 0 && args.next_hop == NULL) /* common case */ goto pass; - if (DUMMYNET_LOADED && (i & IP_FW_PORT_DYNT_FLAG)) { + if (i & IP_FW_PORT_DYNT_FLAG) { /* Send packet to the appropriate pipe */ - ip_dn_io_ptr(m, i&0xffff, DN_TO_IP_IN, &args); + ip_fw_dn_io_ptr(m, i&0xffff, DN_TO_IP_IN, &args); return; } #ifdef IPDIVERT diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 2a4b13de7f..2a188ad2fa 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -28,7 +28,7 @@ * * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 * $FreeBSD: src/sys/netinet/ip_output.c,v 1.99.2.37 2003/04/15 06:44:45 silby Exp $ - * $DragonFly: src/sys/netinet/ip_output.c,v 1.38 2007/10/25 13:13:18 sephe Exp $ + * $DragonFly: src/sys/netinet/ip_output.c,v 1.39 2007/11/16 02:45:45 sephe Exp $ */ #define _IP_VHL @@ -178,7 +178,7 @@ ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, * processing was already done, and we need to go down. * Get parameters from the tag. */ - args.rule = dn_pkt->rule; + args.rule = dn_pkt->dn_priv; opt = NULL; ro = &dn_pkt->ro; imo = NULL; @@ -760,7 +760,7 @@ spd_done: ip = mtod(m, struct ip *); if (off == 0 && dst == old) /* common case */ goto pass; - if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG)) { + if (off & IP_FW_PORT_DYNT_FLAG) { /* * pass the pkt to dummynet. Need to include * pipe number, m, ifp, ro, dst because these are @@ -774,8 +774,8 @@ spd_done: args.dst = dst; args.flags = flags; - error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT, - &args); + error = 0; + ip_fw_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT, &args); goto done; } #ifdef IPDIVERT diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index 5e03c3a044..7c491d2366 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -34,7 +34,7 @@ * * @(#)mbuf.h 8.5 (Berkeley) 2/19/95 * $FreeBSD: src/sys/sys/mbuf.h,v 1.44.2.17 2003/04/15 06:15:02 silby Exp $ - * $DragonFly: src/sys/sys/mbuf.h,v 1.42 2007/10/25 13:13:18 sephe Exp $ + * $DragonFly: src/sys/sys/mbuf.h,v 1.43 2007/11/16 02:45:45 sephe Exp $ */ #ifndef _SYS_MBUF_H_ @@ -239,6 +239,7 @@ struct mbuf { #define PF_MBUF_FRAGCACHE 0x00000010 #define ALTQ_MBUF_TAGGED 0x00000020 /* altq_qid is valid */ #define IPFW_MBUF_SKIP_FIREWALL 0x00000040 +#define DUMMYNET_MBUF_TAGGED 0x00000080 /* * mbuf types. -- 2.41.0