2 * Copyright (C) 2013 Universita` di Pisa. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * This module implements the VALE switch for netmap
32 NMG_LOCK() serializes all modifications to switches and ports.
33 A switch cannot be deleted until all ports are gone.
35 For each switch, an SX lock (RWlock on linux) protects
36 deletion of ports. When configuring or deleting a new port, the
37 lock is acquired in exclusive mode (after holding NMG_LOCK).
38 When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
39 The lock is held throughout the entire forwarding cycle,
40 during which the thread may incur in a page fault.
41 Hence it is important that sleepable shared locks are used.
43 On the rx ring, the per-port lock is grabbed initially to reserve
44 a number of slot in the ring, then the lock is released,
45 packets are copied from source to destination, and then
46 the lock is acquired again and the receive ring is updated.
47 (A similar thing is done on the tx ring for NIC and host stack
48 ports attached to the switch)
53 * OS-specific code that is used only within this file.
54 * Other OS-specific code that must be accessed by drivers
55 * is present in netmap_kern.h
58 /* __FBSDID("$FreeBSD: head/sys/dev/netmap/netmap.c 257176 2013-10-26 17:58:36Z glebius $"); */
60 #include <sys/types.h>
61 #include <sys/errno.h>
62 #include <sys/param.h> /* defines used in kernel.h */
63 #include <sys/kernel.h> /* types used in module initialization */
64 #include <sys/conf.h> /* cdevsw struct, UID, GID */
65 #include <sys/sockio.h>
66 #include <sys/socketvar.h> /* struct socket */
67 #include <sys/malloc.h>
70 #include <sys/socket.h> /* sockaddrs */
71 #include <sys/sysctl.h>
73 #include <net/if_var.h>
74 #include <net/bpf.h> /* BIOCIMMEDIATE */
75 #include <sys/bus.h> /* bus_dmamap_* */
76 #include <sys/endian.h>
77 #include <sys/refcount.h>
80 #define BDG_RWLOCK_T struct lock
82 #define BDG_RWINIT(b) \
83 lockinit(&(b)->bdg_lock, "bdg lock", 0, LK_CANRECURSE)
84 #define BDG_WLOCK(b) lockmgr(&(b)->bdg_lock, LK_EXCLUSIVE)
85 #define BDG_WUNLOCK(b) lockmgr(&(b)->bdg_lock, LK_RELEASE)
86 #define BDG_RLOCK(b) lockmgr(&(b)->bdg_lock, LK_SHARED)
87 #define BDG_RTRYLOCK(b) lockmgr(&(b)->bdg_lock, LK_SHARED|LK_NOWAIT)
88 #define BDG_RUNLOCK(b) lockmgr(&(b)->bdg_lock, LK_RELEASE)
89 #define BDG_RWDESTROY(b) lockuninit(&(b)->bdg_lock)
95 #include <net/netmap.h>
97 #include "netmap_kern.h"
98 #include "netmap_mem2.h"
103 * system parameters (most of them in netmap_kern.h)
104 * NM_NAME prefix for switch port names, default "vale"
105 * NM_BDG_MAXPORTS number of ports
106 * NM_BRIDGES max number of switches in the system.
107 * XXX should become a sysctl or tunable
109 * Switch ports are named valeX:Y where X is the switch name and Y
110 * is the port. If Y matches a physical interface name, the port is
111 * connected to a physical device.
113 * Unlike physical interfaces, switch ports use their own memory region
114 * for rings and buffers.
115 * The virtual interfaces use per-queue lock instead of core lock.
116 * In the tx loop, we aggregate traffic in batches to make all operations
117 * faster. The batch size is bridge_batch.
119 #define NM_BDG_MAXRINGS 16 /* XXX unclear how many. */
120 #define NM_BDG_MAXSLOTS 4096 /* XXX same as above */
121 #define NM_BRIDGE_RINGSIZE 1024 /* in the device */
122 #define NM_BDG_HASH 1024 /* forwarding table entries */
123 #define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */
124 #define NM_MULTISEG 64 /* max size of a chain of bufs */
125 /* actual size of the tables */
126 #define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NM_MULTISEG)
127 /* NM_FT_NULL terminates a list of slots in the ft */
128 #define NM_FT_NULL NM_BDG_BATCH_MAX
129 #define NM_BRIDGES 8 /* number of bridges */
133 * bridge_batch is set via sysctl to the max batch size to be
134 * used in the bridge. The actual value may be larger as the
135 * last packet in the block may overflow the size.
137 int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
138 SYSCTL_DECL(_dev_netmap);
139 SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
142 static int bdg_netmap_attach(struct netmap_adapter *);
143 static int bdg_netmap_reg(struct netmap_adapter *na, int onoff);
144 static int netmap_bwrap_attach(struct ifnet *, struct ifnet *);
145 static int netmap_bwrap_register(struct netmap_adapter *, int onoff);
146 int kern_netmap_regif(struct nmreq *nmr);
149 * Each transmit queue accumulates a batch of packets into
150 * a structure before forwarding. Packets to the same
151 * destination are put in a list using ft_next as a link field.
152 * ft_frags and ft_next are valid only on the first fragment.
154 struct nm_bdg_fwd { /* forwarding entry for a bridge */
155 void *ft_buf; /* netmap or indirect buffer */
156 uint8_t ft_frags; /* how many fragments (only on 1st frag) */
157 uint8_t _ft_port; /* dst port (unused) */
158 uint16_t ft_flags; /* flags, e.g. indirect */
159 uint16_t ft_len; /* src fragment len */
160 uint16_t ft_next; /* next packet to same destination */
164 * For each output interface, nm_bdg_q is used to construct a list.
165 * bq_len is the number of output buffers (we can have coalescing
171 uint32_t bq_len; /* number of buffers */
174 /* XXX revise this */
176 uint64_t mac; /* the top 2 bytes are the epoch */
181 * nm_bridge is a descriptor for a VALE switch.
182 * Interfaces for a bridge are all in bdg_ports[].
183 * The array has fixed size, an empty entry does not terminate
184 * the search, but lookups only occur on attach/detach so we
185 * don't mind if they are slow.
187 * The bridge is non blocking on the transmit ports: excess
188 * packets are dropped if there is no room on the output port.
190 * bdg_lock protects accesses to the bdg_ports array.
191 * This is a rw lock (or equivalent).
194 /* XXX what is the proper alignment/layout ? */
195 BDG_RWLOCK_T bdg_lock; /* protects bdg_ports */
197 uint32_t bdg_active_ports; /* 0 means free */
198 char bdg_basename[IFNAMSIZ];
200 /* Indexes of active ports (up to active_ports)
201 * and all other remaining ports.
203 uint8_t bdg_port_index[NM_BDG_MAXPORTS];
205 struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS];
209 * The function to decide the destination port.
210 * It returns either of an index of the destination port,
211 * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
212 * forward this packet. ring_nr is the source ring index, and the
213 * function may overwrite this value to forward this packet to a
214 * different ring index.
215 * This function must be set by netmap_bdgctl().
217 bdg_lookup_fn_t nm_bdg_lookup;
219 /* the forwarding table, MAC+ports.
220 * XXX should be changed to an argument to be passed to
221 * the lookup function, and allocated on attach
223 struct nm_hash_ent ht[NM_BDG_HASH];
228 * XXX in principle nm_bridges could be created dynamically
229 * Right now we have a static array and deletions are protected
230 * by an exclusive lock.
232 struct nm_bridge nm_bridges[NM_BRIDGES];
236 * A few function to tell which kind of port are we using.
237 * XXX should we hold a lock ?
239 * nma_is_vp() virtual port
240 * nma_is_host() port connected to the host stack
241 * nma_is_hw() port connected to a NIC
242 * nma_is_generic() generic netmap adapter XXX stop this madness
245 nma_is_vp(struct netmap_adapter *na)
247 return na->nm_register == bdg_netmap_reg;
252 nma_is_host(struct netmap_adapter *na)
254 return na->nm_register == NULL;
259 nma_is_hw(struct netmap_adapter *na)
261 /* In case of sw adapter, nm_register is NULL */
262 return !nma_is_vp(na) && !nma_is_host(na) && !nma_is_generic(na);
266 nma_is_bwrap(struct netmap_adapter *na)
268 return na->nm_register == netmap_bwrap_register;
274 * this is a slightly optimized copy routine which rounds
275 * to multiple of 64 bytes and is often faster than dealing
276 * with other odd sizes. We assume there is enough room
277 * in the source and destination buffers.
279 * XXX only for multiples of 64 bytes, non overlapped.
282 pkt_copy(void *_src, void *_dst, int l)
284 uint64_t *src = _src;
285 uint64_t *dst = _dst;
286 if (unlikely(l >= 1024)) {
290 for (; likely(l > 0); l-=64) {
305 * locate a bridge among the existing ones.
306 * MUST BE CALLED WITH NMG_LOCK()
308 * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
309 * We assume that this is called with a name of at least NM_NAME chars.
311 static struct nm_bridge *
312 nm_find_bridge(const char *name, int create)
315 struct nm_bridge *b = NULL;
319 namelen = strlen(NM_NAME); /* base length */
320 l = name ? strlen(name) : 0; /* actual length */
322 D("invalid bridge name %s", name ? name : NULL);
325 for (i = namelen + 1; i < l; i++) {
326 if (name[i] == ':') {
331 if (namelen >= IFNAMSIZ)
333 ND("--- prefix is '%.*s' ---", namelen, name);
335 /* lookup the name, remember empty slot if there is one */
336 for (i = 0; i < NM_BRIDGES; i++) {
337 struct nm_bridge *x = nm_bridges + i;
339 if (x->bdg_active_ports == 0) {
340 if (create && b == NULL)
341 b = x; /* record empty slot */
342 } else if (x->bdg_namelen != namelen) {
344 } else if (strncmp(name, x->bdg_basename, namelen) == 0) {
345 ND("found '%.*s' at %d", namelen, name, i);
350 if (i == NM_BRIDGES && b) { /* name not found, can create entry */
351 /* initialize the bridge */
352 strncpy(b->bdg_basename, name, namelen);
353 ND("create new bridge %s with ports %d", b->bdg_basename,
354 b->bdg_active_ports);
355 b->bdg_namelen = namelen;
356 b->bdg_active_ports = 0;
357 for (i = 0; i < NM_BDG_MAXPORTS; i++)
358 b->bdg_port_index[i] = i;
359 /* set the default function */
360 b->nm_bdg_lookup = netmap_bdg_learning;
361 /* reset the MAC address table */
362 bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
369 * Free the forwarding tables for rings attached to switch ports.
372 nm_free_bdgfwd(struct netmap_adapter *na)
375 struct netmap_kring *kring;
378 nrings = nma_is_vp(na) ? na->num_tx_rings : na->num_rx_rings;
379 kring = nma_is_vp(na) ? na->tx_rings : na->rx_rings;
380 for (i = 0; i < nrings; i++) {
381 if (kring[i].nkr_ft) {
382 kfree(kring[i].nkr_ft, M_DEVBUF);
383 kring[i].nkr_ft = NULL; /* protect from freeing twice */
390 * Allocate the forwarding tables for the rings attached to the bridge ports.
393 nm_alloc_bdgfwd(struct netmap_adapter *na)
395 int nrings, l, i, num_dstq;
396 struct netmap_kring *kring;
399 /* all port:rings + broadcast */
400 num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
401 l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
402 l += sizeof(struct nm_bdg_q) * num_dstq;
403 l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
405 nrings = na->num_tx_rings + 1;
406 kring = na->tx_rings;
407 for (i = 0; i < nrings; i++) {
408 struct nm_bdg_fwd *ft;
409 struct nm_bdg_q *dstq;
412 ft = kmalloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
417 dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
418 for (j = 0; j < num_dstq; j++) {
419 dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
422 kring[i].nkr_ft = ft;
429 netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
431 int s_hw = hw, s_sw = sw;
432 int i, lim =b->bdg_active_ports;
433 uint8_t tmp[NM_BDG_MAXPORTS];
437 make a copy of bdg_port_index;
438 lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
439 in the array of bdg_port_index, replacing them with
440 entries from the bottom of the array;
441 decrement bdg_active_ports;
442 acquire BDG_WLOCK() and copy back the array.
445 D("detach %d and %d (lim %d)", hw, sw, lim);
446 /* make a copy of the list of active ports, update it,
447 * and then copy back within BDG_WLOCK().
449 memcpy(tmp, b->bdg_port_index, sizeof(tmp));
450 for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
451 if (hw >= 0 && tmp[i] == hw) {
452 ND("detach hw %d at %d", hw, i);
453 lim--; /* point to last active port */
454 tmp[i] = tmp[lim]; /* swap with i */
455 tmp[lim] = hw; /* now this is inactive */
457 } else if (sw >= 0 && tmp[i] == sw) {
458 ND("detach sw %d at %d", sw, i);
467 if (hw >= 0 || sw >= 0) {
468 D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
472 b->bdg_ports[s_hw] = NULL;
474 b->bdg_ports[s_sw] = NULL;
476 memcpy(b->bdg_port_index, tmp, sizeof(tmp));
477 b->bdg_active_ports = lim;
480 ND("now %d active ports", lim);
482 ND("marking bridge %s as free", b->bdg_basename);
483 b->nm_bdg_lookup = NULL;
488 netmap_adapter_vp_dtor(struct netmap_adapter *na)
490 struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
491 struct nm_bridge *b = vpna->na_bdg;
492 struct ifnet *ifp = na->ifp;
494 ND("%s has %d references", NM_IFPNAME(ifp), na->na_refcount);
497 netmap_bdg_detach_common(b, vpna->bdg_port, -1);
500 bzero(ifp, sizeof(*ifp));
501 kfree(ifp, M_DEVBUF);
506 netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
508 const char *name = nmr->nr_name;
511 struct netmap_adapter *ret;
512 struct netmap_vp_adapter *vpna;
514 int i, j, cand = -1, cand2 = -1;
517 *na = NULL; /* default return value */
519 /* first try to see if this is a bridge port. */
521 if (strncmp(name, NM_NAME, sizeof(NM_NAME) - 1)) {
522 return 0; /* no error, but no VALE prefix */
525 b = nm_find_bridge(name, create);
527 D("no bridges available for '%s'", name);
531 /* Now we are sure that name starts with the bridge's name,
532 * lookup the port in the bridge. We need to scan the entire
533 * list. It is not important to hold a WLOCK on the bridge
534 * during the search because NMG_LOCK already guarantees
535 * that there are no other possible writers.
538 /* lookup in the local list of ports */
539 for (j = 0; j < b->bdg_active_ports; j++) {
540 i = b->bdg_port_index[j];
541 vpna = b->bdg_ports[i];
542 // KASSERT(na != NULL);
544 /* XXX make sure the name only contains one : */
545 if (!strcmp(NM_IFPNAME(ifp), name)) {
546 netmap_adapter_get(&vpna->up);
547 ND("found existing if %s refs %d", name,
548 vpna->na_bdg_refcount);
549 *na = (struct netmap_adapter *)vpna;
553 /* not found, should we create it? */
556 /* yes we should, see if we have space to attach entries */
557 needed = 2; /* in some cases we only need 1 */
558 if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
559 D("bridge full %d, cannot create new port", b->bdg_active_ports);
562 /* record the next two ports available, but do not allocate yet */
563 cand = b->bdg_port_index[b->bdg_active_ports];
564 cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
565 ND("+++ bridge %s port %s used %d avail %d %d",
566 b->bdg_basename, name, b->bdg_active_ports, cand, cand2);
569 * try see if there is a matching NIC with this name
570 * (after the bridge's name)
572 ifp = ifunit(name + b->bdg_namelen + 1);
573 if (!ifp) { /* this is a virtual port */
574 /* Create a temporary NA with arguments, then
575 * bdg_netmap_attach() will allocate the real one
576 * and attach it to the ifp
578 struct netmap_adapter tmp_na;
581 /* nr_cmd must be 0 for a virtual port */
584 bzero(&tmp_na, sizeof(tmp_na));
586 tmp_na.num_tx_rings = nmr->nr_tx_rings;
587 nm_bound_var(&tmp_na.num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
588 nmr->nr_tx_rings = tmp_na.num_tx_rings; // write back
589 tmp_na.num_rx_rings = nmr->nr_rx_rings;
590 nm_bound_var(&tmp_na.num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
591 nmr->nr_rx_rings = tmp_na.num_rx_rings; // write back
592 nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE,
593 1, NM_BDG_MAXSLOTS, NULL);
594 tmp_na.num_tx_desc = nmr->nr_tx_slots;
595 nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE,
596 1, NM_BDG_MAXSLOTS, NULL);
597 tmp_na.num_rx_desc = nmr->nr_rx_slots;
599 /* create a struct ifnet for the new port.
600 * need M_NOWAIT as we are under nma_lock
602 ifp = kmalloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
606 strcpy(ifp->if_xname, name);
608 /* bdg_netmap_attach creates a struct netmap_adapter */
609 error = bdg_netmap_attach(&tmp_na);
611 D("error %d", error);
612 kfree(ifp, M_DEVBUF);
616 cand2 = -1; /* only need one port */
617 } else { /* this is a NIC */
618 struct ifnet *fake_ifp;
620 error = netmap_get_hw_na(ifp, &ret);
621 if (error || ret == NULL)
624 /* make sure the NIC is not already in use */
625 if (NETMAP_OWNED_BY_ANY(ret)) {
626 D("NIC %s busy, cannot attach to bridge",
631 /* create a fake interface */
632 fake_ifp = kmalloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
637 strcpy(fake_ifp->if_xname, name);
638 error = netmap_bwrap_attach(fake_ifp, ifp);
640 kfree(fake_ifp, M_DEVBUF);
644 if (nmr->nr_arg1 != NETMAP_BDG_HOST)
645 cand2 = -1; /* only need one port */
650 vpna = (struct netmap_vp_adapter *)ret;
653 vpna->bdg_port = cand;
654 ND("NIC %p to bridge port %d", vpna, cand);
655 /* bind the port to the bridge (virtual ports are not active) */
656 b->bdg_ports[cand] = vpna;
658 b->bdg_active_ports++;
660 struct netmap_vp_adapter *hostna = vpna + 1;
661 /* also bind the host stack to the bridge */
662 b->bdg_ports[cand2] = hostna;
663 hostna->bdg_port = cand2;
665 b->bdg_active_ports++;
666 ND("host %p to bridge port %d", hostna, cand2);
668 ND("if %s refs %d", name, vpna->up.na_refcount);
671 netmap_adapter_get(ret);
683 /* Process NETMAP_BDG_ATTACH and NETMAP_BDG_DETACH */
685 nm_bdg_attach(struct nmreq *nmr)
687 struct netmap_adapter *na;
688 struct netmap_if *nifp;
689 struct netmap_priv_d *npriv;
690 struct netmap_bwrap_adapter *bna;
693 npriv = kmalloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
697 /* XXX probably netmap_get_bdg_na() */
698 error = netmap_get_na(nmr, &na, 1 /* create if not exists */);
699 if (error) /* no device, or another bridge or user owns the device */
701 /* netmap_get_na() sets na_bdg if this is a physical interface
702 * that we can attach to a switch.
704 if (!nma_is_bwrap(na)) {
705 /* got reference to a virtual port or direct access to a NIC.
706 * perhaps specified no bridge prefix or wrong NIC name
712 if (na->active_fds > 0) { /* already registered */
717 nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, &error);
722 bna = (struct netmap_bwrap_adapter*)na;
723 bna->na_kpriv = npriv;
725 ND("registered %s to netmap-mode", NM_IFPNAME(na->ifp));
729 netmap_adapter_put(na);
732 bzero(npriv, sizeof(*npriv));
733 kfree(npriv, M_DEVBUF);
738 nm_bdg_detach(struct nmreq *nmr)
740 struct netmap_adapter *na;
742 struct netmap_bwrap_adapter *bna;
746 error = netmap_get_na(nmr, &na, 0 /* don't create */);
747 if (error) { /* no device, or another bridge or user owns the device */
750 if (!nma_is_bwrap(na)) {
751 /* got reference to a virtual port or direct access to a NIC.
752 * perhaps specified no bridge's prefix or wrong NIC's name
757 bna = (struct netmap_bwrap_adapter *)na;
759 if (na->active_fds == 0) { /* not registered */
764 last_instance = netmap_dtor_locked(bna->na_kpriv); /* unregister */
765 if (!last_instance) {
766 D("--- error, trying to detach an entry with active mmaps");
769 struct netmap_priv_d *npriv = bna->na_kpriv;
771 bna->na_kpriv = NULL;
774 bzero(npriv, sizeof(*npriv));
775 kfree(npriv, M_DEVBUF);
779 netmap_adapter_put(na);
787 /* exported to kernel callers, e.g. OVS ?
789 * Called without NMG_LOCK.
792 netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func)
795 struct netmap_vp_adapter *na;
797 char *name = nmr->nr_name;
798 int cmd = nmr->nr_cmd, namelen = strlen(name);
802 case NETMAP_BDG_ATTACH:
803 error = nm_bdg_attach(nmr);
806 case NETMAP_BDG_DETACH:
807 error = nm_bdg_detach(nmr);
810 case NETMAP_BDG_LIST:
811 /* this is used to enumerate bridges and ports */
812 if (namelen) { /* look up indexes of bridge and port */
813 if (strncmp(name, NM_NAME, strlen(NM_NAME))) {
818 b = nm_find_bridge(name, 0 /* don't create */);
826 for (j = 0; j < b->bdg_active_ports; j++) {
827 i = b->bdg_port_index[j];
828 na = b->bdg_ports[i];
830 D("---AAAAAAAAARGH-------");
834 /* the former and the latter identify a
835 * virtual port and a NIC, respectively
837 if (!strcmp(iter->if_xname, name)) {
839 nmr->nr_arg1 = b - nm_bridges;
840 nmr->nr_arg2 = i; /* port index */
847 /* return the first non-empty entry starting from
848 * bridge nr_arg1 and port nr_arg2.
850 * Users can detect the end of the same bridge by
851 * seeing the new and old value of nr_arg1, and can
852 * detect the end of all the bridge by error != 0
858 for (error = ENOENT; i < NM_BRIDGES; i++) {
860 if (j >= b->bdg_active_ports) {
861 j = 0; /* following bridges scan from 0 */
866 j = b->bdg_port_index[j];
867 na = b->bdg_ports[j];
869 strncpy(name, iter->if_xname, (size_t)IFNAMSIZ);
877 case NETMAP_BDG_LOOKUP_REG:
878 /* register a lookup function to the given bridge.
879 * nmr->nr_name may be just bridge's name (including ':'
880 * if it is not just NM_NAME).
887 b = nm_find_bridge(name, 0 /* don't create */);
891 b->nm_bdg_lookup = func;
897 D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
906 netmap_vp_krings_create(struct netmap_adapter *na)
908 u_int ntx, nrx, tailroom;
912 /* XXX vps do not need host rings,
913 * but we crash if we don't have one
915 ntx = na->num_tx_rings + 1;
916 nrx = na->num_rx_rings + 1;
919 * Leases are attached to RX rings on vale ports
921 tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
923 error = netmap_krings_create(na, ntx, nrx, tailroom);
927 leases = na->tailroom;
929 for (i = 0; i < nrx; i++) { /* Receive rings */
930 na->rx_rings[i].nkr_leases = leases;
931 leases += na->num_rx_desc;
934 error = nm_alloc_bdgfwd(na);
936 netmap_krings_delete(na);
944 netmap_vp_krings_delete(struct netmap_adapter *na)
947 netmap_krings_delete(na);
952 nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
953 struct netmap_vp_adapter *na, u_int ring_nr);
957 * Grab packets from a kring, move them into the ft structure
958 * associated to the tx (input) port. Max one instance per port,
959 * filtered on input (ioctl, poll or XXX).
960 * Returns the next position in the ring.
963 nm_bdg_preflush(struct netmap_vp_adapter *na, u_int ring_nr,
964 struct netmap_kring *kring, u_int end)
966 struct netmap_ring *ring = kring->ring;
967 struct nm_bdg_fwd *ft;
968 u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
969 u_int ft_i = 0; /* start from 0 */
970 u_int frags = 1; /* how many frags ? */
971 struct nm_bridge *b = na->na_bdg;
973 /* To protect against modifications to the bridge we acquire a
974 * shared lock, waiting if we can sleep (if the source port is
975 * attached to a user process) or with a trylock otherwise (NICs).
977 ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
978 if (na->up.na_flags & NAF_BDG_MAYSLEEP)
980 else if (!BDG_RTRYLOCK(b))
982 ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
985 for (; likely(j != end); j = nm_next(j, lim)) {
986 struct netmap_slot *slot = &ring->slot[j];
989 ft[ft_i].ft_len = slot->len;
990 ft[ft_i].ft_flags = slot->flags;
992 ND("flags is 0x%x", slot->flags);
993 /* this slot goes into a list so initialize the link field */
994 ft[ft_i].ft_next = NM_FT_NULL;
995 buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
996 (void *)(uintptr_t)slot->ptr : BDG_NMB(&na->up, slot);
999 if (slot->flags & NS_MOREFRAG) {
1003 if (unlikely(netmap_verbose && frags > 1))
1004 RD(5, "%d frags at %d", frags, ft_i - frags);
1005 ft[ft_i - frags].ft_frags = frags;
1007 if (unlikely((int)ft_i >= bridge_batch))
1008 ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1011 D("truncate incomplete fragment at %d (%d frags)", ft_i, frags);
1012 // ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG
1013 ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG;
1014 ft[ft_i - frags].ft_frags = frags - 1;
1017 ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1024 *---- support for virtual bridge -----
1027 /* ----- FreeBSD if_bridge hash function ------- */
1030 * The following hash function is adapted from "Hash Functions" by Bob Jenkins
1031 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
1033 * http://www.burtleburtle.net/bob/hash/spooky.html
1035 #define mix(a, b, c) \
1037 a -= b; a -= c; a ^= (c >> 13); \
1038 b -= c; b -= a; b ^= (a << 8); \
1039 c -= a; c -= b; c ^= (b >> 13); \
1040 a -= b; a -= c; a ^= (c >> 12); \
1041 b -= c; b -= a; b ^= (a << 16); \
1042 c -= a; c -= b; c ^= (b >> 5); \
1043 a -= b; a -= c; a ^= (c >> 3); \
1044 b -= c; b -= a; b ^= (a << 10); \
1045 c -= a; c -= b; c ^= (b >> 15); \
1046 } while (/*CONSTCOND*/0)
1048 static __inline uint32_t
1049 nm_bridge_rthash(const uint8_t *addr)
1051 uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
1061 #define BRIDGE_RTHASH_MASK (NM_BDG_HASH-1)
1062 return (c & BRIDGE_RTHASH_MASK);
1069 bdg_netmap_reg(struct netmap_adapter *na, int onoff)
1071 struct netmap_vp_adapter *vpna =
1072 (struct netmap_vp_adapter*)na;
1073 struct ifnet *ifp = na->ifp;
1075 /* the interface is already attached to the bridge,
1076 * so we only need to toggle IFCAP_NETMAP.
1078 BDG_WLOCK(vpna->na_bdg);
1080 ifp->if_capenable |= IFCAP_NETMAP;
1082 ifp->if_capenable &= ~IFCAP_NETMAP;
1084 BDG_WUNLOCK(vpna->na_bdg);
1090 * Lookup function for a learning bridge.
1091 * Update the hash table with the source address,
1092 * and then returns the destination port index, and the
1093 * ring in *dst_ring (at the moment, always use ring 0)
1096 netmap_bdg_learning(char *buf, u_int buf_len, uint8_t *dst_ring,
1097 struct netmap_vp_adapter *na)
1099 struct nm_hash_ent *ht = na->na_bdg->ht;
1101 u_int dst, mysrc = na->bdg_port;
1102 uint64_t smac, dmac;
1105 D("invalid buf length %d", buf_len);
1106 return NM_BDG_NOPORT;
1108 dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
1109 smac = le64toh(*(uint64_t *)(buf + 4));
1113 * The hash is somewhat expensive, there might be some
1114 * worthwhile optimizations here.
1116 if ((buf[6] & 1) == 0) { /* valid src */
1118 sh = nm_bridge_rthash(s); // XXX hash of source
1119 /* update source port forwarding entry */
1120 ht[sh].mac = smac; /* XXX expire ? */
1121 ht[sh].ports = mysrc;
1123 D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
1124 s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
1126 dst = NM_BDG_BROADCAST;
1127 if ((buf[0] & 1) == 0) { /* unicast */
1128 dh = nm_bridge_rthash(buf); // XXX hash of dst
1129 if (ht[dh].mac == dmac) { /* found dst */
1132 /* XXX otherwise return NM_BDG_UNKNOWN ? */
1140 * This flush routine supports only unicast and broadcast but a large
1141 * number of ports, and lets us replace the learn and dispatch functions.
1144 nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
1147 struct nm_bdg_q *dst_ents, *brddst;
1148 uint16_t num_dsts = 0, *dsts;
1149 struct nm_bridge *b = na->na_bdg;
1150 u_int i, j, me = na->bdg_port;
1153 * The work area (pointed by ft) is followed by an array of
1154 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
1155 * queues per port plus one for the broadcast traffic.
1156 * Then we have an array of destination indexes.
1158 dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
1159 dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
1161 /* first pass: find a destination for each packet in the batch */
1162 for (i = 0; likely(i < n); i += ft[i].ft_frags) {
1163 uint8_t dst_ring = ring_nr; /* default, same ring as origin */
1164 uint16_t dst_port, d_i;
1167 ND("slot %d frags %d", i, ft[i].ft_frags);
1168 dst_port = b->nm_bdg_lookup(ft[i].ft_buf, ft[i].ft_len,
1170 if (netmap_verbose > 255)
1171 RD(5, "slot %d port %d -> %d", i, me, dst_port);
1172 if (dst_port == NM_BDG_NOPORT)
1173 continue; /* this packet is identified to be dropped */
1174 else if (unlikely(dst_port > NM_BDG_MAXPORTS))
1176 else if (dst_port == NM_BDG_BROADCAST)
1177 dst_ring = 0; /* broadcasts always go to ring 0 */
1178 else if (unlikely(dst_port == me ||
1179 !b->bdg_ports[dst_port]))
1182 /* get a position in the scratch pad */
1183 d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
1186 /* append the first fragment to the list */
1187 if (d->bq_head == NM_FT_NULL) { /* new destination */
1188 d->bq_head = d->bq_tail = i;
1189 /* remember this position to be scanned later */
1190 if (dst_port != NM_BDG_BROADCAST)
1191 dsts[num_dsts++] = d_i;
1193 ft[d->bq_tail].ft_next = i;
1196 d->bq_len += ft[i].ft_frags;
1200 * Broadcast traffic goes to ring 0 on all destinations.
1201 * So we need to add these rings to the list of ports to scan.
1202 * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
1203 * expensive. We should keep a compact list of active destinations
1204 * so we could shorten this loop.
1206 brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
1207 if (brddst->bq_head != NM_FT_NULL) {
1208 for (j = 0; likely(j < b->bdg_active_ports); j++) {
1210 i = b->bdg_port_index[j];
1211 if (unlikely(i == me))
1213 d_i = i * NM_BDG_MAXRINGS;
1214 if (dst_ents[d_i].bq_head == NM_FT_NULL)
1215 dsts[num_dsts++] = d_i;
1219 ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
1220 /* second pass: scan destinations (XXX will be modular somehow) */
1221 for (i = 0; i < num_dsts; i++) {
1222 struct ifnet *dst_ifp;
1223 struct netmap_vp_adapter *dst_na;
1224 struct netmap_kring *kring;
1225 struct netmap_ring *ring;
1226 u_int dst_nr, lim, j, sent = 0, d_i, next, brd_next;
1227 u_int needed, howmany;
1228 int retry = netmap_txsync_retry;
1230 uint32_t my_start = 0, lease_idx = 0;
1234 ND("second pass %d port %d", i, d_i);
1236 // XXX fix the division
1237 dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
1238 /* protect from the lookup function returning an inactive
1241 if (unlikely(dst_na == NULL))
1243 if (dst_na->up.na_flags & NAF_SW_ONLY)
1245 dst_ifp = dst_na->up.ifp;
1247 * The interface may be in !netmap mode in two cases:
1248 * - when na is attached but not activated yet;
1249 * - when na is being deactivated but is still attached.
1251 if (unlikely(!(dst_ifp->if_capenable & IFCAP_NETMAP))) {
1252 ND("not in netmap mode!");
1256 /* there is at least one either unicast or broadcast packet */
1257 brd_next = brddst->bq_head;
1259 /* we need to reserve this many slots. If fewer are
1260 * available, some packets will be dropped.
1261 * Packets may have multiple fragments, so we may not use
1262 * there is a chance that we may not use all of the slots
1263 * we have claimed, so we will need to handle the leftover
1264 * ones when we regain the lock.
1266 needed = d->bq_len + brddst->bq_len;
1268 ND(5, "pass 2 dst %d is %x %s",
1269 i, d_i, is_vp ? "virtual" : "nic/host");
1270 dst_nr = d_i & (NM_BDG_MAXRINGS-1);
1271 nrings = dst_na->up.num_rx_rings;
1272 if (dst_nr >= nrings)
1273 dst_nr = dst_nr % nrings;
1274 kring = &dst_na->up.rx_rings[dst_nr];
1276 lim = kring->nkr_num_slots - 1;
1280 /* reserve the buffers in the queue and an entry
1281 * to report completion, and drop lock.
1282 * XXX this might become a helper function.
1284 lockmgr(&kring->q_lock, LK_EXCLUSIVE);
1285 if (kring->nkr_stopped) {
1286 lockmgr(&kring->q_lock, LK_RELEASE);
1289 if (dst_na->retry) {
1290 dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1292 my_start = j = kring->nkr_hwlease;
1293 howmany = nm_kr_space(kring, 1);
1294 if (needed < howmany)
1296 lease_idx = nm_kr_lease(kring, howmany, 1);
1297 lockmgr(&kring->q_lock, LK_RELEASE);
1299 /* only retry if we need more than available slots */
1300 if (retry && needed <= howmany)
1303 /* copy to the destination queue */
1304 while (howmany > 0) {
1305 struct netmap_slot *slot;
1306 struct nm_bdg_fwd *ft_p, *ft_end;
1309 /* find the queue from which we pick next packet.
1310 * NM_FT_NULL is always higher than valid indexes
1311 * so we never dereference it if the other list
1312 * has packets (and if both are empty we never
1315 if (next < brd_next) {
1317 next = ft_p->ft_next;
1318 } else { /* insert broadcast */
1319 ft_p = ft + brd_next;
1320 brd_next = ft_p->ft_next;
1322 cnt = ft_p->ft_frags; // cnt > 0
1323 if (unlikely(cnt > howmany))
1324 break; /* no more space */
1326 if (netmap_verbose && cnt > 1)
1327 RD(5, "rx %d frags to %d", cnt, j);
1328 ft_end = ft_p + cnt;
1330 void *dst, *src = ft_p->ft_buf;
1331 size_t len = (ft_p->ft_len + 63) & ~63;
1333 slot = &ring->slot[j];
1334 dst = BDG_NMB(&dst_na->up, slot);
1335 /* round to a multiple of 64 */
1337 ND("send %d %d bytes at %s:%d",
1338 i, ft_p->ft_len, NM_IFPNAME(dst_ifp), j);
1339 if (ft_p->ft_flags & NS_INDIRECT) {
1340 if (copyin(src, dst, len)) {
1341 // invalid user pointer, pretend len is 0
1345 //memcpy(dst, src, len);
1346 pkt_copy(src, dst, (int)len);
1348 slot->len = ft_p->ft_len;
1349 slot->flags = (cnt << 8)| NS_MOREFRAG;
1350 j = nm_next(j, lim);
1353 } while (ft_p != ft_end);
1354 slot->flags = (cnt << 8); /* clear flag on last entry */
1356 if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
1360 /* current position */
1361 uint32_t *p = kring->nkr_leases; /* shorthand */
1362 uint32_t update_pos;
1363 int still_locked = 1;
1365 lockmgr(&kring->q_lock, LK_EXCLUSIVE);
1366 if (unlikely(howmany > 0)) {
1367 /* not used all bufs. If i am the last one
1368 * i can recover the slots, otherwise must
1369 * fill them with 0 to mark empty packets.
1371 ND("leftover %d bufs", howmany);
1372 if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
1373 /* yes i am the last one */
1374 ND("roll back nkr_hwlease to %d", j);
1375 kring->nkr_hwlease = j;
1377 while (howmany-- > 0) {
1378 ring->slot[j].len = 0;
1379 ring->slot[j].flags = 0;
1380 j = nm_next(j, lim);
1384 p[lease_idx] = j; /* report I am done */
1386 update_pos = nm_kr_rxpos(kring);
1388 if (my_start == update_pos) {
1389 /* all slots before my_start have been reported,
1390 * so scan subsequent leases to see if other ranges
1391 * have been completed, and to a selwakeup or txsync.
1393 while (lease_idx != kring->nkr_lease_idx &&
1394 p[lease_idx] != NR_NOSLOT) {
1396 p[lease_idx] = NR_NOSLOT;
1397 lease_idx = nm_next(lease_idx, lim);
1399 /* j is the new 'write' position. j != my_start
1400 * means there are new buffers to report
1402 if (likely(j != my_start)) {
1403 uint32_t old_avail = kring->nr_hwavail;
1405 kring->nr_hwavail = (j >= kring->nr_hwcur) ?
1406 j - kring->nr_hwcur :
1407 j + lim + 1 - kring->nr_hwcur;
1408 if (kring->nr_hwavail < old_avail) {
1409 D("avail shrink %d -> %d",
1410 old_avail, kring->nr_hwavail);
1412 dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1414 lockmgr(&kring->q_lock, LK_RELEASE);
1415 if (dst_na->retry && retry--)
1420 lockmgr(&kring->q_lock, LK_RELEASE);
1423 d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
1426 brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
1432 netmap_vp_txsync(struct netmap_vp_adapter *na, u_int ring_nr, int flags)
1434 struct netmap_kring *kring = &na->up.tx_rings[ring_nr];
1435 struct netmap_ring *ring = kring->ring;
1436 u_int j, k, lim = kring->nkr_num_slots - 1;
1440 return netmap_ring_reinit(kring);
1442 if (bridge_batch <= 0) { /* testing only */
1446 if (bridge_batch > NM_BDG_BATCH)
1447 bridge_batch = NM_BDG_BATCH;
1449 j = nm_bdg_preflush(na, ring_nr, kring, k);
1451 D("early break at %d/ %d, avail %d", j, k, kring->nr_hwavail);
1452 /* k-j modulo ring size is the number of slots processed */
1454 k += kring->nkr_num_slots;
1455 kring->nr_hwavail = lim - (k - j);
1458 kring->nr_hwcur = j;
1459 ring->avail = kring->nr_hwavail;
1461 D("%s ring %d flags %d", NM_IFPNAME(na->up.ifp), ring_nr, flags);
1467 * main dispatch routine for the bridge.
1468 * We already know that only one thread is running this.
1469 * we must run nm_bdg_preflush without lock.
1472 bdg_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
1474 struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
1475 return netmap_vp_txsync(vpna, ring_nr, flags);
1480 * user process reading from a VALE switch.
1481 * Already protected against concurrent calls from userspace,
1482 * but we must acquire the queue's lock to protect against
1483 * writers on the same queue.
1486 bdg_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
1488 struct netmap_kring *kring = &na->rx_rings[ring_nr];
1489 struct netmap_ring *ring = kring->ring;
1490 u_int j, lim = kring->nkr_num_slots - 1;
1491 u_int k = ring->cur, resvd = ring->reserved;
1494 lockmgr(&kring->q_lock, LK_EXCLUSIVE);
1496 D("ouch dangerous reset!!!");
1497 n = netmap_ring_reinit(kring);
1501 /* skip past packets that userspace has released */
1502 j = kring->nr_hwcur; /* netmap ring index */
1504 if (resvd + ring->avail >= lim + 1) {
1505 D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
1506 ring->reserved = resvd = 0; // XXX panic...
1508 k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
1511 if (j != k) { /* userspace has released some packets. */
1514 n += kring->nkr_num_slots;
1515 ND("userspace releases %d packets", n);
1516 for (n = 0; likely(j != k); n++) {
1517 struct netmap_slot *slot = &ring->slot[j];
1518 void *addr = BDG_NMB(na, slot);
1520 if (addr == netmap_buffer_base) { /* bad buf */
1521 D("bad buffer index %d, ignore ?",
1524 slot->flags &= ~NS_BUF_CHANGED;
1525 j = nm_next(j, lim);
1527 kring->nr_hwavail -= n;
1528 kring->nr_hwcur = k;
1530 /* tell userspace that there are new packets */
1531 ring->avail = kring->nr_hwavail - resvd;
1534 lockmgr(&kring->q_lock, LK_RELEASE);
1539 bdg_netmap_attach(struct netmap_adapter *arg)
1541 struct netmap_vp_adapter *vpna;
1542 struct netmap_adapter *na;
1545 vpna = kmalloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO);
1550 na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER;
1551 na->nm_txsync = bdg_netmap_txsync;
1552 na->nm_rxsync = bdg_netmap_rxsync;
1553 na->nm_register = bdg_netmap_reg;
1554 na->nm_dtor = netmap_adapter_vp_dtor;
1555 na->nm_krings_create = netmap_vp_krings_create;
1556 na->nm_krings_delete = netmap_vp_krings_delete;
1557 na->nm_mem = netmap_mem_private_new(NM_IFPNAME(arg->ifp),
1558 na->num_tx_rings, na->num_tx_desc,
1559 na->num_rx_rings, na->num_rx_desc);
1560 /* other nmd fields are set in the common routine */
1561 error = netmap_attach_common(na);
1563 kfree(vpna, M_DEVBUF);
1570 netmap_bwrap_dtor(struct netmap_adapter *na)
1572 struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
1573 struct netmap_adapter *hwna = bna->hwna;
1574 struct nm_bridge *b = bna->up.na_bdg,
1575 *bh = bna->host.na_bdg;
1576 struct ifnet *ifp = na->ifp;
1581 netmap_bdg_detach_common(b, bna->up.bdg_port,
1582 (bh ? bna->host.bdg_port : -1));
1585 hwna->na_private = NULL;
1586 netmap_adapter_put(hwna);
1588 bzero(ifp, sizeof(*ifp));
1589 kfree(ifp, M_DEVBUF);
1595 * Pass packets from nic to the bridge.
1596 * XXX TODO check locking: this is called from the interrupt
1597 * handler so we should make sure that the interface is not
1598 * disconnected while passing down an interrupt.
1600 * Note, no user process can access this NIC so we can ignore
1601 * the info in the 'ring'.
1603 /* callback that overwrites the hwna notify callback.
1604 * Packets come from the outside or from the host stack and are put on an hwna rx ring.
1605 * The bridge wrapper then sends the packets through the bridge.
1608 netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, int flags)
1610 struct ifnet *ifp = na->ifp;
1611 struct netmap_bwrap_adapter *bna = na->na_private;
1612 struct netmap_vp_adapter *hostna = &bna->host;
1613 struct netmap_kring *kring, *bkring;
1614 struct netmap_ring *ring;
1615 int is_host_ring = ring_nr == na->num_rx_rings;
1616 struct netmap_vp_adapter *vpna = &bna->up;
1619 ND("%s[%d] %s %x", NM_IFPNAME(ifp), ring_nr, (tx == NR_TX ? "TX" : "RX"), flags);
1621 if (flags & NAF_DISABLE_NOTIFY) {
1622 kring = tx == NR_TX ? na->tx_rings : na->rx_rings;
1623 bkring = tx == NR_TX ? vpna->up.rx_rings : vpna->up.tx_rings;
1624 if (kring->nkr_stopped)
1625 netmap_disable_ring(bkring);
1627 bkring->nkr_stopped = 0;
1631 if (ifp == NULL || !(ifp->if_capenable & IFCAP_NETMAP))
1637 kring = &na->rx_rings[ring_nr];
1640 /* make sure the ring is not disabled */
1641 if (nm_kr_tryget(kring))
1644 if (is_host_ring && hostna->na_bdg == NULL) {
1645 error = bna->save_notify(na, ring_nr, tx, flags);
1653 /* fetch packets that have arrived.
1654 * XXX maybe do this in a loop ?
1656 error = na->nm_rxsync(na, ring_nr, 0);
1660 if (kring->nr_hwavail == 0 && netmap_verbose) {
1661 D("how strange, interrupt with no packets on %s",
1666 ring->cur = nm_kr_rxpos(kring);
1667 netmap_vp_txsync(vpna, ring_nr, flags);
1670 error = na->nm_rxsync(na, ring_nr, 0);
1678 netmap_bwrap_register(struct netmap_adapter *na, int onoff)
1680 struct netmap_bwrap_adapter *bna =
1681 (struct netmap_bwrap_adapter *)na;
1682 struct netmap_adapter *hwna = bna->hwna;
1683 struct netmap_vp_adapter *hostna = &bna->host;
1686 ND("%s %d", NM_IFPNAME(ifp), onoff);
1691 hwna->na_lut = na->na_lut;
1692 hwna->na_lut_objtotal = na->na_lut_objtotal;
1694 if (hostna->na_bdg) {
1695 hostna->up.na_lut = na->na_lut;
1696 hostna->up.na_lut_objtotal = na->na_lut_objtotal;
1699 /* cross-link the netmap rings */
1700 for (i = 0; i <= na->num_tx_rings; i++) {
1701 hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots;
1702 hwna->tx_rings[i].ring = na->rx_rings[i].ring;
1704 for (i = 0; i <= na->num_rx_rings; i++) {
1705 hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots;
1706 hwna->rx_rings[i].ring = na->tx_rings[i].ring;
1711 error = hwna->nm_register(hwna, onoff);
1716 bdg_netmap_reg(na, onoff);
1719 bna->save_notify = hwna->nm_notify;
1720 hwna->nm_notify = netmap_bwrap_intr_notify;
1722 hwna->nm_notify = bna->save_notify;
1723 hwna->na_lut = NULL;
1724 hwna->na_lut_objtotal = 0;
1731 netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
1732 u_int *rxr, u_int *rxd)
1734 struct netmap_bwrap_adapter *bna =
1735 (struct netmap_bwrap_adapter *)na;
1736 struct netmap_adapter *hwna = bna->hwna;
1738 /* forward the request */
1739 netmap_update_config(hwna);
1740 /* swap the results */
1741 *txr = hwna->num_rx_rings;
1742 *txd = hwna->num_rx_desc;
1743 *rxr = hwna->num_tx_rings;
1744 *rxd = hwna->num_rx_desc;
1750 netmap_bwrap_krings_create(struct netmap_adapter *na)
1752 struct netmap_bwrap_adapter *bna =
1753 (struct netmap_bwrap_adapter *)na;
1754 struct netmap_adapter *hwna = bna->hwna;
1755 struct netmap_adapter *hostna = &bna->host.up;
1758 ND("%s", NM_IFPNAME(na->ifp));
1760 error = netmap_vp_krings_create(na);
1764 error = hwna->nm_krings_create(hwna);
1766 netmap_vp_krings_delete(na);
1770 hostna->tx_rings = na->tx_rings + na->num_tx_rings;
1771 hostna->rx_rings = na->rx_rings + na->num_rx_rings;
1777 netmap_bwrap_krings_delete(struct netmap_adapter *na)
1779 struct netmap_bwrap_adapter *bna =
1780 (struct netmap_bwrap_adapter *)na;
1781 struct netmap_adapter *hwna = bna->hwna;
1783 ND("%s", NM_IFPNAME(na->ifp));
1785 hwna->nm_krings_delete(hwna);
1786 netmap_vp_krings_delete(na);
1789 /* notify method for the bridge-->hwna direction */
1791 netmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
1793 struct netmap_bwrap_adapter *bna =
1794 (struct netmap_bwrap_adapter *)na;
1795 struct netmap_adapter *hwna = bna->hwna;
1796 struct netmap_kring *kring, *hw_kring;
1797 struct netmap_ring *ring;
1804 kring = &na->rx_rings[ring_n];
1805 hw_kring = &hwna->tx_rings[ring_n];
1808 lim = kring->nkr_num_slots - 1;
1809 k = nm_kr_rxpos(kring);
1811 if (hwna->ifp == NULL || !(hwna->ifp->if_capenable & IFCAP_NETMAP))
1814 ND("%s[%d] PRE rx(%d, %d, %d, %d) ring(%d, %d, %d) tx(%d, %d)",
1815 NM_IFPNAME(na->ifp), ring_n,
1816 kring->nr_hwcur, kring->nr_hwavail, kring->nkr_hwlease, kring->nr_hwreserved,
1817 ring->cur, ring->avail, ring->reserved,
1818 hw_kring->nr_hwcur, hw_kring->nr_hwavail);
1819 if (ring_n == na->num_rx_rings) {
1820 netmap_txsync_to_host(hwna);
1822 error = hwna->nm_txsync(hwna, ring_n, flags);
1824 kring->nr_hwcur = ring->cur;
1825 kring->nr_hwavail = 0;
1826 kring->nr_hwreserved = lim - ring->avail;
1827 ND("%s[%d] PST rx(%d, %d, %d, %d) ring(%d, %d, %d) tx(%d, %d)",
1828 NM_IFPNAME(na->ifp), ring_n,
1829 kring->nr_hwcur, kring->nr_hwavail, kring->nkr_hwlease, kring->nr_hwreserved,
1830 ring->cur, ring->avail, ring->reserved,
1831 hw_kring->nr_hwcur, hw_kring->nr_hwavail);
1837 netmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
1839 struct netmap_bwrap_adapter *bna = na->na_private;
1840 struct netmap_adapter *port_na = &bna->up.up;
1841 if (tx == NR_TX || ring_n != 0)
1843 return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags);
1846 /* attach a bridge wrapper to the 'real' device */
1848 netmap_bwrap_attach(struct ifnet *fake, struct ifnet *real)
1850 struct netmap_bwrap_adapter *bna;
1851 struct netmap_adapter *na;
1852 struct netmap_adapter *hwna = NA(real);
1853 struct netmap_adapter *hostna;
1857 bna = kmalloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO);
1863 /* fill the ring data for the bwrap adapter with rx/tx meanings
1864 * swapped. The real cross-linking will be done during register,
1865 * when all the krings will have been created.
1867 na->num_rx_rings = hwna->num_tx_rings;
1868 na->num_tx_rings = hwna->num_rx_rings;
1869 na->num_tx_desc = hwna->num_rx_desc;
1870 na->num_rx_desc = hwna->num_tx_desc;
1871 na->nm_dtor = netmap_bwrap_dtor;
1872 na->nm_register = netmap_bwrap_register;
1873 // na->nm_txsync = netmap_bwrap_txsync;
1874 // na->nm_rxsync = netmap_bwrap_rxsync;
1875 na->nm_config = netmap_bwrap_config;
1876 na->nm_krings_create = netmap_bwrap_krings_create;
1877 na->nm_krings_delete = netmap_bwrap_krings_delete;
1878 na->nm_notify = netmap_bwrap_notify;
1879 na->nm_mem = hwna->nm_mem;
1880 na->na_private = na; /* prevent NIOCREGIF */
1881 bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
1884 netmap_adapter_get(hwna);
1885 hwna->na_private = bna; /* weak reference */
1887 hostna = &bna->host.up;
1888 hostna->ifp = hwna->ifp;
1889 hostna->num_tx_rings = 1;
1890 hostna->num_tx_desc = hwna->num_rx_desc;
1891 hostna->num_rx_rings = 1;
1892 hostna->num_rx_desc = hwna->num_tx_desc;
1893 // hostna->nm_txsync = netmap_bwrap_host_txsync;
1894 // hostna->nm_rxsync = netmap_bwrap_host_rxsync;
1895 hostna->nm_notify = netmap_bwrap_host_notify;
1896 hostna->nm_mem = na->nm_mem;
1897 hostna->na_private = bna;
1899 D("%s<->%s txr %d txd %d rxr %d rxd %d", fake->if_xname, real->if_xname,
1900 na->num_tx_rings, na->num_tx_desc,
1901 na->num_rx_rings, na->num_rx_desc);
1903 error = netmap_attach_common(na);
1905 netmap_adapter_put(hwna);
1906 kfree(bna, M_DEVBUF);
1913 netmap_init_bridges(void)
1916 bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */
1917 for (i = 0; i < NM_BRIDGES; i++)
1918 BDG_RWINIT(&nm_bridges[i]);
1920 #endif /* WITH_VALE */