Add ifpoll, which support hardware TX/RX queues based polling.
[dragonfly.git] / sys / dev / netif / emx / if_emx.c
index f93ef20..662a023 100644 (file)
  * SUCH DAMAGE.
  */
 
-#include "opt_polling.h"
+#include "opt_ifpoll.h"
 #include "opt_serializer.h"
+#include "opt_rss.h"
+#include "opt_emx.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
@@ -78,6 +80,7 @@
 #include <sys/proc.h>
 #include <sys/rman.h>
 #include <sys/serialize.h>
+#include <sys/serialize2.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/ifq_var.h>
+#include <net/toeplitz.h>
+#include <net/toeplitz2.h>
 #include <net/vlan/if_vlan_var.h>
 #include <net/vlan/if_vlan_ether.h>
+#include <net/if_poll.h>
 
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
 #include <dev/netif/ig_hal/e1000_82571.h>
 #include <dev/netif/emx/if_emx.h>
 
+#ifdef EMX_RSS_DEBUG
+#define EMX_RSS_DPRINTF(sc, lvl, fmt, ...) \
+do { \
+       if (sc->rss_debug >= lvl) \
+               if_printf(&sc->arpcom.ac_if, fmt, __VA_ARGS__); \
+} while (0)
+#else  /* !EMX_RSS_DEBUG */
+#define EMX_RSS_DPRINTF(sc, lvl, fmt, ...)     ((void)0)
+#endif /* EMX_RSS_DEBUG */
+
 #define EMX_NAME       "Intel(R) PRO/1000 "
 
 #define EMX_DEVICE(id) \
@@ -123,6 +139,7 @@ static const struct emx_device {
        EMX_DEVICE(82571EB_SERDES_DUAL),
        EMX_DEVICE(82571EB_SERDES_QUAD),
        EMX_DEVICE(82571EB_QUAD_COPPER),
+       EMX_DEVICE(82571EB_QUAD_COPPER_BP),
        EMX_DEVICE(82571EB_QUAD_COPPER_LP),
        EMX_DEVICE(82571EB_QUAD_FIBER),
        EMX_DEVICE(82571PT_QUAD_COPPER),
@@ -158,16 +175,23 @@ static void       emx_init(void *);
 static void    emx_stop(struct emx_softc *);
 static int     emx_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
 static void    emx_start(struct ifnet *);
-#ifdef DEVICE_POLLING
-static void    emx_poll(struct ifnet *, enum poll_cmd, int);
+#ifdef IFPOLL_ENABLE
+static void    emx_qpoll(struct ifnet *, struct ifpoll_info *);
 #endif
 static void    emx_watchdog(struct ifnet *);
 static void    emx_media_status(struct ifnet *, struct ifmediareq *);
 static int     emx_media_change(struct ifnet *);
 static void    emx_timer(void *);
+static void    emx_serialize(struct ifnet *, enum ifnet_serialize);
+static void    emx_deserialize(struct ifnet *, enum ifnet_serialize);
+static int     emx_tryserialize(struct ifnet *, enum ifnet_serialize);
+#ifdef INVARIANTS
+static void    emx_serialize_assert(struct ifnet *, enum ifnet_serialize,
+                   boolean_t);
+#endif
 
 static void    emx_intr(void *);
-static void    emx_rxeof(struct emx_softc *, int);
+static void    emx_rxeof(struct emx_softc *, int, int);
 static void    emx_txeof(struct emx_softc *);
 static void    emx_tx_collect(struct emx_softc *);
 static void    emx_tx_purge(struct emx_softc *);
@@ -177,15 +201,15 @@ static void       emx_disable_intr(struct emx_softc *);
 static int     emx_dma_alloc(struct emx_softc *);
 static void    emx_dma_free(struct emx_softc *);
 static void    emx_init_tx_ring(struct emx_softc *);
-static int     emx_init_rx_ring(struct emx_softc *);
+static int     emx_init_rx_ring(struct emx_softc *, struct emx_rxdata *);
+static void    emx_free_rx_ring(struct emx_softc *, struct emx_rxdata *);
 static int     emx_create_tx_ring(struct emx_softc *);
-static int     emx_create_rx_ring(struct emx_softc *);
+static int     emx_create_rx_ring(struct emx_softc *, struct emx_rxdata *);
 static void    emx_destroy_tx_ring(struct emx_softc *, int);
-static void    emx_destroy_rx_ring(struct emx_softc *, int);
-static int     emx_newbuf(struct emx_softc *, int, int);
+static void    emx_destroy_rx_ring(struct emx_softc *,
+                   struct emx_rxdata *, int);
+static int     emx_newbuf(struct emx_softc *, struct emx_rxdata *, int, int);
 static int     emx_encap(struct emx_softc *, struct mbuf **);
-static void    emx_rxcsum(struct emx_softc *, struct e1000_rx_desc *,
-                   struct mbuf *);
 static int     emx_txcsum_pullup(struct emx_softc *, struct mbuf **);
 static int     emx_txcsum(struct emx_softc *, struct mbuf *,
                    uint32_t *, uint32_t *);
@@ -212,6 +236,12 @@ static int emx_sysctl_int_throttle(SYSCTL_HANDLER_ARGS);
 static int     emx_sysctl_int_tx_nsegs(SYSCTL_HANDLER_ARGS);
 static void    emx_add_sysctl(struct emx_softc *);
 
+static void    emx_serialize_skipmain(struct emx_softc *);
+static void    emx_deserialize_skipmain(struct emx_softc *);
+#ifdef IFPOLL_ENABLE
+static int     emx_tryserialize_skipmain(struct emx_softc *);
+#endif
+
 /* Management and WOL Support */
 static void    emx_get_mgmt(struct emx_softc *);
 static void    emx_rel_mgmt(struct emx_softc *);
@@ -279,6 +309,73 @@ KTR_INFO(KTR_IF_EMX, if_emx, pkt_txqueue, 5, "tx packet", 0);
 KTR_INFO(KTR_IF_EMX, if_emx, pkt_txclean, 6, "tx clean", 0);
 #define logif(name)    KTR_LOG(if_emx_ ## name)
 
+static __inline void
+emx_setup_rxdesc(emx_rxdesc_t *rxd, const struct emx_rxbuf *rxbuf)
+{
+       rxd->rxd_bufaddr = htole64(rxbuf->paddr);
+       /* DD bit must be cleared */
+       rxd->rxd_staterr = 0;
+}
+
+static __inline void
+emx_rxcsum(uint32_t staterr, struct mbuf *mp)
+{
+       /* Ignore Checksum bit is set */
+       if (staterr & E1000_RXD_STAT_IXSM)
+               return;
+
+       if ((staterr & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
+           E1000_RXD_STAT_IPCS)
+               mp->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID;
+
+       if ((staterr & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
+           E1000_RXD_STAT_TCPCS) {
+               mp->m_pkthdr.csum_flags |= CSUM_DATA_VALID |
+                                          CSUM_PSEUDO_HDR |
+                                          CSUM_FRAG_NOT_CHECKED;
+               mp->m_pkthdr.csum_data = htons(0xffff);
+       }
+}
+
+static __inline struct pktinfo *
+emx_rssinfo(struct mbuf *m, struct pktinfo *pi,
+           uint32_t mrq, uint32_t hash, uint32_t staterr)
+{
+       switch (mrq & EMX_RXDMRQ_RSSTYPE_MASK) {
+       case EMX_RXDMRQ_IPV4_TCP:
+               pi->pi_netisr = NETISR_IP;
+               pi->pi_flags = 0;
+               pi->pi_l3proto = IPPROTO_TCP;
+               break;
+
+       case EMX_RXDMRQ_IPV6_TCP:
+               pi->pi_netisr = NETISR_IPV6;
+               pi->pi_flags = 0;
+               pi->pi_l3proto = IPPROTO_TCP;
+               break;
+
+       case EMX_RXDMRQ_IPV4:
+               if (staterr & E1000_RXD_STAT_IXSM)
+                       return NULL;
+
+               if ((staterr &
+                    (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
+                   E1000_RXD_STAT_TCPCS) {
+                       pi->pi_netisr = NETISR_IP;
+                       pi->pi_flags = 0;
+                       pi->pi_l3proto = IPPROTO_UDP;
+                       break;
+               }
+               /* FALL THROUGH */
+       default:
+               return NULL;
+       }
+
+       m->m_flags |= M_HASH;
+       m->m_pkthdr.hash = toeplitz_hash(hash);
+       return pi;
+}
+
 static int
 emx_probe(device_t dev)
 {
@@ -303,9 +400,21 @@ emx_attach(device_t dev)
 {
        struct emx_softc *sc = device_get_softc(dev);
        struct ifnet *ifp = &sc->arpcom.ac_if;
-       int error = 0;
+       int error = 0, i;
        uint16_t eeprom_data, device_id;
 
+       lwkt_serialize_init(&sc->main_serialize);
+       lwkt_serialize_init(&sc->tx_serialize);
+       for (i = 0; i < EMX_NRX_RING; ++i)
+               lwkt_serialize_init(&sc->rx_data[i].rx_serialize);
+
+       i = 0;
+       sc->serializes[i++] = &sc->main_serialize;
+       sc->serializes[i++] = &sc->tx_serialize;
+       sc->serializes[i++] = &sc->rx_data[0].rx_serialize;
+       sc->serializes[i++] = &sc->rx_data[1].rx_serialize;
+       KKASSERT(i == EMX_NSERIALIZE);
+
        callout_init(&sc->timer);
 
        sc->dev = sc->osdep.dev = dev;
@@ -370,7 +479,6 @@ emx_attach(device_t dev)
        sc->hw.mac.autoneg = EMX_DO_AUTO_NEG;
        sc->hw.phy.autoneg_wait_to_complete = FALSE;
        sc->hw.phy.autoneg_advertised = EMX_AUTONEG_ADV_DEFAULT;
-       sc->rx_buffer_len = MCLBYTES;
 
        /*
         * Interrupt throttle rate
@@ -410,6 +518,15 @@ emx_attach(device_t dev)
        /* This controls when hardware reports transmit completion status. */
        sc->hw.mac.report_tx_early = 1;
 
+#ifdef RSS
+       /* Calculate # of RX rings */
+       if (ncpus > 1)
+               sc->rx_ring_cnt = EMX_NRX_RING;
+       else
+#endif
+               sc->rx_ring_cnt = 1;
+       sc->rx_ring_inuse = sc->rx_ring_cnt;
+
        /* Allocate RX/TX rings' busdma(9) stuffs */
        error = emx_dma_alloc(sc);
        if (error)
@@ -549,7 +666,7 @@ emx_attach(device_t dev)
                sc->tx_int_nsegs = sc->oact_tx_desc;
 
        error = bus_setup_intr(dev, sc->intr_res, INTR_MPSAFE, emx_intr, sc,
-                              &sc->intr_tag, ifp->if_serializer);
+                              &sc->intr_tag, &sc->main_serialize);
        if (error) {
                device_printf(dev, "Failed to register interrupt handler");
                ether_ifdetach(&sc->arpcom.ac_if);
@@ -572,7 +689,7 @@ emx_detach(device_t dev)
        if (device_is_attached(dev)) {
                struct ifnet *ifp = &sc->arpcom.ac_if;
 
-               lwkt_serialize_enter(ifp->if_serializer);
+               ifnet_serialize_all(ifp);
 
                emx_stop(sc);
 
@@ -592,7 +709,7 @@ emx_detach(device_t dev)
 
                bus_teardown_intr(dev, sc->intr_res, sc->intr_tag);
 
-               lwkt_serialize_exit(ifp->if_serializer);
+               ifnet_deserialize_all(ifp);
 
                ether_ifdetach(ifp);
        }
@@ -629,7 +746,7 @@ emx_suspend(device_t dev)
        struct emx_softc *sc = device_get_softc(dev);
        struct ifnet *ifp = &sc->arpcom.ac_if;
 
-       lwkt_serialize_enter(ifp->if_serializer);
+       ifnet_serialize_all(ifp);
 
        emx_stop(sc);
 
@@ -645,7 +762,7 @@ emx_suspend(device_t dev)
                emx_enable_wol(dev);
         }
 
-       lwkt_serialize_exit(ifp->if_serializer);
+       ifnet_deserialize_all(ifp);
 
        return bus_generic_suspend(dev);
 }
@@ -656,13 +773,13 @@ emx_resume(device_t dev)
        struct emx_softc *sc = device_get_softc(dev);
        struct ifnet *ifp = &sc->arpcom.ac_if;
 
-       lwkt_serialize_enter(ifp->if_serializer);
+       ifnet_serialize_all(ifp);
 
        emx_init(sc);
        emx_get_mgmt(sc);
        if_devstart(ifp);
 
-       lwkt_serialize_exit(ifp->if_serializer);
+       ifnet_deserialize_all(ifp);
 
        return bus_generic_resume(dev);
 }
@@ -673,7 +790,7 @@ emx_start(struct ifnet *ifp)
        struct emx_softc *sc = ifp->if_softc;
        struct mbuf *m_head;
 
-       ASSERT_SERIALIZED(ifp->if_serializer);
+       ASSERT_SERIALIZED(&sc->tx_serialize);
 
        if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)
                return;
@@ -722,7 +839,7 @@ emx_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
        int max_frame_size, mask, reinit;
        int error = 0;
 
-       ASSERT_SERIALIZED(ifp->if_serializer);
+       ASSERT_IFNET_SERIALIZED_ALL(ifp);
 
        switch (command) {
        case SIOCSIFMTU:
@@ -788,8 +905,8 @@ emx_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
                if (ifp->if_flags & IFF_RUNNING) {
                        emx_disable_intr(sc);
                        emx_set_multi(sc);
-#ifdef DEVICE_POLLING
-                       if (!(ifp->if_flags & IFF_POLLING))
+#ifdef IFPOLL_ENABLE
+                       if (!(ifp->if_flags & IFF_NPOLLING))
 #endif
                                emx_enable_intr(sc);
                }
@@ -819,6 +936,10 @@ emx_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
                        ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
                        reinit = 1;
                }
+               if (mask & IFCAP_RSS) {
+                       ifp->if_capenable ^= IFCAP_RSS;
+                       reinit = 1;
+               }
                if (reinit && (ifp->if_flags & IFF_RUNNING))
                        emx_init(sc);
                break;
@@ -835,7 +956,7 @@ emx_watchdog(struct ifnet *ifp)
 {
        struct emx_softc *sc = ifp->if_softc;
 
-       ASSERT_SERIALIZED(ifp->if_serializer);
+       ASSERT_IFNET_SERIALIZED_ALL(ifp);
 
        /*
         * The timer is set to 5 every time start queues a packet.
@@ -885,8 +1006,9 @@ emx_init(void *xsc)
        struct ifnet *ifp = &sc->arpcom.ac_if;
        device_t dev = sc->dev;
        uint32_t pba;
+       int i;
 
-       ASSERT_SERIALIZED(ifp->if_serializer);
+       ASSERT_IFNET_SERIALIZED_ALL(ifp);
 
        emx_stop(sc);
 
@@ -973,11 +1095,22 @@ emx_init(void *xsc)
        /* Setup Multicast table */
        emx_set_multi(sc);
 
+       /*
+        * Adjust # of RX ring to be used based on IFCAP_RSS
+        */
+       if (ifp->if_capenable & IFCAP_RSS)
+               sc->rx_ring_inuse = sc->rx_ring_cnt;
+       else
+               sc->rx_ring_inuse = 1;
+
        /* Prepare receive descriptors and buffers */
-       if (emx_init_rx_ring(sc)) {
-               device_printf(dev, "Could not setup receive structures\n");
-               emx_stop(sc);
-               return;
+       for (i = 0; i < sc->rx_ring_inuse; ++i) {
+               if (emx_init_rx_ring(sc, &sc->rx_data[i])) {
+                       device_printf(dev,
+                           "Could not setup receive structures\n");
+                       emx_stop(sc);
+                       return;
+               }
        }
        emx_init_rx_unit(sc);
 
@@ -1007,63 +1140,21 @@ emx_init(void *xsc)
                E1000_WRITE_REG(&sc->hw, E1000_IVAR, 0x800A0908);
        }
 
-#ifdef DEVICE_POLLING
+#ifdef IFPOLL_ENABLE
        /*
         * Only enable interrupts if we are not polling, make sure
         * they are off otherwise.
         */
-       if (ifp->if_flags & IFF_POLLING)
+       if (ifp->if_flags & IFF_NPOLLING)
                emx_disable_intr(sc);
        else
-#endif /* DEVICE_POLLING */
+#endif /* IFPOLL_ENABLE */
                emx_enable_intr(sc);
 
        /* Don't reset the phy next time init gets called */
        sc->hw.phy.reset_disable = TRUE;
 }
 
-#ifdef DEVICE_POLLING
-
-static void
-emx_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
-{
-       struct emx_softc *sc = ifp->if_softc;
-       uint32_t reg_icr;
-
-       ASSERT_SERIALIZED(ifp->if_serializer);
-
-       switch (cmd) {
-       case POLL_REGISTER:
-               emx_disable_intr(sc);
-               break;
-
-       case POLL_DEREGISTER:
-               emx_enable_intr(sc);
-               break;
-
-       case POLL_AND_CHECK_STATUS:
-               reg_icr = E1000_READ_REG(&sc->hw, E1000_ICR);
-               if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
-                       callout_stop(&sc->timer);
-                       sc->hw.mac.get_link_status = 1;
-                       emx_update_link_status(sc);
-                       callout_reset(&sc->timer, hz, emx_timer, sc);
-               }
-               /* FALL THROUGH */
-       case POLL_ONLY:
-               if (ifp->if_flags & IFF_RUNNING) {
-                       emx_rxeof(sc, count);
-                       emx_txeof(sc);
-
-                       if (!ifq_is_empty(&ifp->if_snd))
-                               if_devstart(ifp);
-               }
-               break;
-       }
-}
-
-#endif /* DEVICE_POLLING */
-
 static void
 emx_intr(void *xsc)
 {
@@ -1072,7 +1163,7 @@ emx_intr(void *xsc)
        uint32_t reg_icr;
 
        logif(intr_beg);
-       ASSERT_SERIALIZED(ifp->if_serializer);
+       ASSERT_SERIALIZED(&sc->main_serialize);
 
        reg_icr = E1000_READ_REG(&sc->hw, E1000_ICR);
 
@@ -1083,7 +1174,7 @@ emx_intr(void *xsc)
 
        /*
         * XXX: some laptops trigger several spurious interrupts
-        * on em(4) when in the resume cycle. The ICR register
+        * on emx(4) when in the resume cycle. The ICR register
         * reports all-ones value in this case. Processing such
         * interrupts would lead to a freeze. I don't know why.
         */
@@ -1094,17 +1185,30 @@ emx_intr(void *xsc)
 
        if (ifp->if_flags & IFF_RUNNING) {
                if (reg_icr &
-                   (E1000_IMS_RXT0 | E1000_IMS_RXDMT0 | E1000_ICR_RXO))
-                       emx_rxeof(sc, -1);
-               if (reg_icr & E1000_IMS_TXDW) {
+                   (E1000_ICR_RXT0 | E1000_ICR_RXDMT0 | E1000_ICR_RXO)) {
+                       int i;
+
+                       for (i = 0; i < sc->rx_ring_inuse; ++i) {
+                               lwkt_serialize_enter(
+                               &sc->rx_data[i].rx_serialize);
+                               emx_rxeof(sc, i, -1);
+                               lwkt_serialize_exit(
+                               &sc->rx_data[i].rx_serialize);
+                       }
+               }
+               if (reg_icr & E1000_ICR_TXDW) {
+                       lwkt_serialize_enter(&sc->tx_serialize);
                        emx_txeof(sc);
                        if (!ifq_is_empty(&ifp->if_snd))
                                if_devstart(ifp);
+                       lwkt_serialize_exit(&sc->tx_serialize);
                }
        }
 
        /* Link status change */
        if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
+               emx_serialize_skipmain(sc);
+
                callout_stop(&sc->timer);
                sc->hw.mac.get_link_status = 1;
                emx_update_link_status(sc);
@@ -1113,6 +1217,8 @@ emx_intr(void *xsc)
                emx_tx_purge(sc);
 
                callout_reset(&sc->timer, hz, emx_timer, sc);
+
+               emx_deserialize_skipmain(sc);
        }
 
        if (reg_icr & E1000_ICR_RXO)
@@ -1126,7 +1232,7 @@ emx_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
        struct emx_softc *sc = ifp->if_softc;
 
-       ASSERT_SERIALIZED(ifp->if_serializer);
+       ASSERT_IFNET_SERIALIZED_ALL(ifp);
 
        emx_update_link_status(sc);
 
@@ -1167,7 +1273,7 @@ emx_media_change(struct ifnet *ifp)
        struct emx_softc *sc = ifp->if_softc;
        struct ifmedia *ifm = &sc->media;
 
-       ASSERT_SERIALIZED(ifp->if_serializer);
+       ASSERT_IFNET_SERIALIZED_ALL(ifp);
 
        if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
                return (EINVAL);
@@ -1224,7 +1330,7 @@ emx_encap(struct emx_softc *sc, struct mbuf **m_headp)
 {
        bus_dma_segment_t segs[EMX_MAX_SCATTER];
        bus_dmamap_t map;
-       struct emx_buf *tx_buffer, *tx_buffer_mapped;
+       struct emx_txbuf *tx_buffer, *tx_buffer_mapped;
        struct e1000_tx_desc *ctxd = NULL;
        struct mbuf *m_head = *m_headp;
        uint32_t txd_upper, txd_lower, cmd = 0;
@@ -1253,7 +1359,7 @@ emx_encap(struct emx_softc *sc, struct mbuf **m_headp)
         * that now gets a DONE bit writeback.
         */
        first = sc->next_avail_tx_desc;
-       tx_buffer = &sc->tx_buffer_area[first];
+       tx_buffer = &sc->tx_buf[first];
        tx_buffer_mapped = tx_buffer;
        map = tx_buffer->map;
 
@@ -1287,7 +1393,7 @@ emx_encap(struct emx_softc *sc, struct mbuf **m_headp)
 
        /* Set up our transmit descriptors */
        for (j = 0; j < nsegs; j++) {
-               tx_buffer = &sc->tx_buffer_area[i];
+               tx_buffer = &sc->tx_buf[i];
                ctxd = &sc->tx_desc_base[i];
 
                ctxd->buffer_addr = htole64(segs[j].ds_addr);
@@ -1425,7 +1531,7 @@ emx_timer(void *xsc)
        struct emx_softc *sc = xsc;
        struct ifnet *ifp = &sc->arpcom.ac_if;
 
-       lwkt_serialize_enter(ifp->if_serializer);
+       ifnet_serialize_all(ifp);
 
        emx_update_link_status(sc);
        emx_update_stats(sc);
@@ -1441,7 +1547,7 @@ emx_timer(void *xsc)
 
        callout_reset(&sc->timer, hz, emx_timer, sc);
 
-       lwkt_serialize_exit(ifp->if_serializer);
+       ifnet_deserialize_all(ifp);
 }
 
 static void
@@ -1533,7 +1639,7 @@ emx_stop(struct emx_softc *sc)
        struct ifnet *ifp = &sc->arpcom.ac_if;
        int i;
 
-       ASSERT_SERIALIZED(ifp->if_serializer);
+       ASSERT_IFNET_SERIALIZED_ALL(ifp);
 
        emx_disable_intr(sc);
 
@@ -1542,11 +1648,20 @@ emx_stop(struct emx_softc *sc)
        ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
        ifp->if_timer = 0;
 
+       /*
+        * Disable multiple receive queues.
+        *
+        * NOTE:
+        * We should disable multiple receive queues before
+        * resetting the hardware.
+        */
+       E1000_WRITE_REG(&sc->hw, E1000_MRQC, 0);
+
        e1000_reset_hw(&sc->hw);
        E1000_WRITE_REG(&sc->hw, E1000_WUC, 0);
 
        for (i = 0; i < sc->num_tx_desc; i++) {
-               struct emx_buf *tx_buffer = &sc->tx_buffer_area[i];
+               struct emx_txbuf *tx_buffer = &sc->tx_buf[i];
 
                if (tx_buffer->m_head != NULL) {
                        bus_dmamap_unload(sc->txtag, tx_buffer->map);
@@ -1555,20 +1670,8 @@ emx_stop(struct emx_softc *sc)
                }
        }
 
-       for (i = 0; i < sc->num_rx_desc; i++) {
-               struct emx_buf *rx_buffer = &sc->rx_buffer_area[i];
-
-               if (rx_buffer->m_head != NULL) {
-                       bus_dmamap_unload(sc->rxtag, rx_buffer->map);
-                       m_freem(rx_buffer->m_head);
-                       rx_buffer->m_head = NULL;
-               }
-       }
-
-       if (sc->fmp != NULL)
-               m_freem(sc->fmp);
-       sc->fmp = NULL;
-       sc->lmp = NULL;
+       for (i = 0; i < sc->rx_ring_inuse; ++i)
+               emx_free_rx_ring(sc, &sc->rx_data[i]);
 
        sc->csum_flags = 0;
        sc->csum_ehlen = 0;
@@ -1656,10 +1759,16 @@ emx_setup_ifp(struct emx_softc *sc)
        ifp->if_init =  emx_init;
        ifp->if_ioctl = emx_ioctl;
        ifp->if_start = emx_start;
-#ifdef DEVICE_POLLING
-       ifp->if_poll = emx_poll;
+#ifdef IFPOLL_ENABLE
+       ifp->if_qpoll = emx_qpoll;
 #endif
        ifp->if_watchdog = emx_watchdog;
+       ifp->if_serialize = emx_serialize;
+       ifp->if_deserialize = emx_deserialize;
+       ifp->if_tryserialize = emx_tryserialize;
+#ifdef INVARIANTS
+       ifp->if_serialize_assert = emx_serialize_assert;
+#endif
        ifq_set_maxlen(&ifp->if_snd, sc->num_tx_desc - 1);
        ifq_set_ready(&ifp->if_snd);
 
@@ -1668,6 +1777,8 @@ emx_setup_ifp(struct emx_softc *sc)
        ifp->if_capabilities = IFCAP_HWCSUM |
                               IFCAP_VLAN_HWTAGGING |
                               IFCAP_VLAN_MTU;
+       if (sc->rx_ring_cnt > 1)
+               ifp->if_capabilities |= IFCAP_RSS;
        ifp->if_capenable = ifp->if_capabilities;
        ifp->if_hwassist = EMX_CSUM_FEATURES;
 
@@ -1769,7 +1880,7 @@ static int
 emx_create_tx_ring(struct emx_softc *sc)
 {
        device_t dev = sc->dev;
-       struct emx_buf *tx_buffer;
+       struct emx_txbuf *tx_buffer;
        int error, i, tsize;
 
        /*
@@ -1799,9 +1910,8 @@ emx_create_tx_ring(struct emx_softc *sc)
                return ENOMEM;
        }
 
-       sc->tx_buffer_area =
-               kmalloc(sizeof(struct emx_buf) * sc->num_tx_desc,
-                       M_DEVBUF, M_WAITOK | M_ZERO);
+       sc->tx_buf = kmalloc(sizeof(struct emx_txbuf) * sc->num_tx_desc,
+                            M_DEVBUF, M_WAITOK | M_ZERO);
 
        /*
         * Create DMA tags for tx buffers
@@ -1819,8 +1929,8 @@ emx_create_tx_ring(struct emx_softc *sc)
                        &sc->txtag);
        if (error) {
                device_printf(dev, "Unable to allocate TX DMA tag\n");
-               kfree(sc->tx_buffer_area, M_DEVBUF);
-               sc->tx_buffer_area = NULL;
+               kfree(sc->tx_buf, M_DEVBUF);
+               sc->tx_buf = NULL;
                return error;
        }
 
@@ -1828,7 +1938,7 @@ emx_create_tx_ring(struct emx_softc *sc)
         * Create DMA maps for tx buffers
         */
        for (i = 0; i < sc->num_tx_desc; i++) {
-               tx_buffer = &sc->tx_buffer_area[i];
+               tx_buffer = &sc->tx_buf[i];
 
                error = bus_dmamap_create(sc->txtag,
                                          BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE,
@@ -1926,7 +2036,7 @@ emx_init_tx_unit(struct emx_softc *sc)
 static void
 emx_destroy_tx_ring(struct emx_softc *sc, int ndesc)
 {
-       struct emx_buf *tx_buffer;
+       struct emx_txbuf *tx_buffer;
        int i;
 
        /* Free Transmit Descriptor ring */
@@ -1939,19 +2049,19 @@ emx_destroy_tx_ring(struct emx_softc *sc, int ndesc)
                sc->tx_desc_base = NULL;
        }
 
-       if (sc->tx_buffer_area == NULL)
+       if (sc->tx_buf == NULL)
                return;
 
        for (i = 0; i < ndesc; i++) {
-               tx_buffer = &sc->tx_buffer_area[i];
+               tx_buffer = &sc->tx_buf[i];
 
                KKASSERT(tx_buffer->m_head == NULL);
                bus_dmamap_destroy(sc->txtag, tx_buffer->map);
        }
        bus_dma_tag_destroy(sc->txtag);
 
-       kfree(sc->tx_buffer_area, M_DEVBUF);
-       sc->tx_buffer_area = NULL;
+       kfree(sc->tx_buf, M_DEVBUF);
+       sc->tx_buf = NULL;
 }
 
 /*
@@ -1972,7 +2082,7 @@ emx_txcsum(struct emx_softc *sc, struct mbuf *mp,
           uint32_t *txd_upper, uint32_t *txd_lower)
 {
        struct e1000_context_desc *TXD;
-       struct emx_buf *tx_buffer;
+       struct emx_txbuf *tx_buffer;
        struct ether_vlan_header *eh;
        struct ip *ip;
        int curr_txd, ehdrlen, csum_flags;
@@ -2029,7 +2139,7 @@ emx_txcsum(struct emx_softc *sc, struct mbuf *mp,
         */
 
        curr_txd = sc->next_avail_tx_desc;
-       tx_buffer = &sc->tx_buffer_area[curr_txd];
+       tx_buffer = &sc->tx_buf[curr_txd];
        TXD = (struct e1000_context_desc *)&sc->tx_desc_base[curr_txd];
 
        cmd = 0;
@@ -2164,7 +2274,7 @@ static void
 emx_txeof(struct emx_softc *sc)
 {
        struct ifnet *ifp = &sc->arpcom.ac_if;
-       struct emx_buf *tx_buffer;
+       struct emx_txbuf *tx_buffer;
        int first, num_avail;
 
        if (sc->tx_dd_head == sc->tx_dd_tail)
@@ -2192,7 +2302,7 @@ emx_txeof(struct emx_softc *sc)
 
                                num_avail++;
 
-                               tx_buffer = &sc->tx_buffer_area[first];
+                               tx_buffer = &sc->tx_buf[first];
                                if (tx_buffer->m_head) {
                                        ifp->if_opackets++;
                                        bus_dmamap_unload(sc->txtag,
@@ -2229,7 +2339,7 @@ static void
 emx_tx_collect(struct emx_softc *sc)
 {
        struct ifnet *ifp = &sc->arpcom.ac_if;
-       struct emx_buf *tx_buffer;
+       struct emx_txbuf *tx_buffer;
        int tdh, first, num_avail, dd_idx = -1;
 
        if (sc->num_tx_desc_avail == sc->num_tx_desc)
@@ -2250,7 +2360,7 @@ emx_tx_collect(struct emx_softc *sc)
 
                num_avail++;
 
-               tx_buffer = &sc->tx_buffer_area[first];
+               tx_buffer = &sc->tx_buf[first];
                if (tx_buffer->m_head) {
                        ifp->if_opackets++;
                        bus_dmamap_unload(sc->txtag,
@@ -2307,17 +2417,17 @@ emx_tx_purge(struct emx_softc *sc)
 }
 
 static int
-emx_newbuf(struct emx_softc *sc, int i, int init)
+emx_newbuf(struct emx_softc *sc, struct emx_rxdata *rdata, int i, int init)
 {
        struct mbuf *m;
        bus_dma_segment_t seg;
        bus_dmamap_t map;
-       struct emx_buf *rx_buffer;
+       struct emx_rxbuf *rx_buffer;
        int error, nseg;
 
        m = m_getcl(init ? MB_WAIT : MB_DONTWAIT, MT_DATA, M_PKTHDR);
        if (m == NULL) {
-               sc->mbuf_cluster_failed++;
+               rdata->mbuf_cluster_failed++;
                if (init) {
                        if_printf(&sc->arpcom.ac_if,
                                  "Unable to allocate RX mbuf\n");
@@ -2329,8 +2439,8 @@ emx_newbuf(struct emx_softc *sc, int i, int init)
        if (sc->max_frame_size <= MCLBYTES - ETHER_ALIGN)
                m_adj(m, ETHER_ALIGN);
 
-       error = bus_dmamap_load_mbuf_segment(sc->rxtag,
-                       sc->rx_sparemap, m,
+       error = bus_dmamap_load_mbuf_segment(rdata->rxtag,
+                       rdata->rx_sparemap, m,
                        &seg, 1, &nseg, BUS_DMA_NOWAIT);
        if (error) {
                m_freem(m);
@@ -2341,57 +2451,57 @@ emx_newbuf(struct emx_softc *sc, int i, int init)
                return (error);
        }
 
-       rx_buffer = &sc->rx_buffer_area[i];
+       rx_buffer = &rdata->rx_buf[i];
        if (rx_buffer->m_head != NULL)
-               bus_dmamap_unload(sc->rxtag, rx_buffer->map);
+               bus_dmamap_unload(rdata->rxtag, rx_buffer->map);
 
        map = rx_buffer->map;
-       rx_buffer->map = sc->rx_sparemap;
-       sc->rx_sparemap = map;
+       rx_buffer->map = rdata->rx_sparemap;
+       rdata->rx_sparemap = map;
 
        rx_buffer->m_head = m;
+       rx_buffer->paddr = seg.ds_addr;
 
-       sc->rx_desc_base[i].buffer_addr = htole64(seg.ds_addr);
+       emx_setup_rxdesc(&rdata->rx_desc[i], rx_buffer);
        return (0);
 }
 
 static int
-emx_create_rx_ring(struct emx_softc *sc)
+emx_create_rx_ring(struct emx_softc *sc, struct emx_rxdata *rdata)
 {
        device_t dev = sc->dev;
-       struct emx_buf *rx_buffer;
+       struct emx_rxbuf *rx_buffer;
        int i, error, rsize;
 
        /*
         * Validate number of receive descriptors.  It must not exceed
         * hardware maximum, and must be multiple of E1000_DBA_ALIGN.
         */
-       if ((emx_rxd * sizeof(struct e1000_rx_desc)) % EMX_DBA_ALIGN != 0 ||
+       if ((emx_rxd * sizeof(emx_rxdesc_t)) % EMX_DBA_ALIGN != 0 ||
            emx_rxd > EMX_MAX_RXD || emx_rxd < EMX_MIN_RXD) {
                device_printf(dev, "Using %d RX descriptors instead of %d!\n",
                    EMX_DEFAULT_RXD, emx_rxd);
-               sc->num_rx_desc = EMX_DEFAULT_RXD;
+               rdata->num_rx_desc = EMX_DEFAULT_RXD;
        } else {
-               sc->num_rx_desc = emx_rxd;
+               rdata->num_rx_desc = emx_rxd;
        }
 
        /*
         * Allocate Receive Descriptor ring
         */
-       rsize = roundup2(sc->num_rx_desc * sizeof(struct e1000_rx_desc),
+       rsize = roundup2(rdata->num_rx_desc * sizeof(emx_rxdesc_t),
                         EMX_DBA_ALIGN);
-       sc->rx_desc_base = bus_dmamem_coherent_any(sc->parent_dtag,
+       rdata->rx_desc = bus_dmamem_coherent_any(sc->parent_dtag,
                                EMX_DBA_ALIGN, rsize, BUS_DMA_WAITOK,
-                               &sc->rx_desc_dtag, &sc->rx_desc_dmap,
-                               &sc->rx_desc_paddr);
-       if (sc->rx_desc_base == NULL) {
+                               &rdata->rx_desc_dtag, &rdata->rx_desc_dmap,
+                               &rdata->rx_desc_paddr);
+       if (rdata->rx_desc == NULL) {
                device_printf(dev, "Unable to allocate rx_desc memory\n");
                return ENOMEM;
        }
 
-       sc->rx_buffer_area =
-               kmalloc(sizeof(struct emx_buf) * sc->num_rx_desc,
-                       M_DEVBUF, M_WAITOK | M_ZERO);
+       rdata->rx_buf = kmalloc(sizeof(struct emx_rxbuf) * rdata->num_rx_desc,
+                               M_DEVBUF, M_WAITOK | M_ZERO);
 
        /*
         * Create DMA tag for rx buffers
@@ -2405,62 +2515,82 @@ emx_create_rx_ring(struct emx_softc *sc)
                        1,                      /* nsegments */
                        MCLBYTES,               /* maxsegsize */
                        BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, /* flags */
-                       &sc->rxtag);
+                       &rdata->rxtag);
        if (error) {
                device_printf(dev, "Unable to allocate RX DMA tag\n");
-               kfree(sc->rx_buffer_area, M_DEVBUF);
-               sc->rx_buffer_area = NULL;
+               kfree(rdata->rx_buf, M_DEVBUF);
+               rdata->rx_buf = NULL;
                return error;
        }
 
        /*
         * Create spare DMA map for rx buffers
         */
-       error = bus_dmamap_create(sc->rxtag, BUS_DMA_WAITOK,
-                                 &sc->rx_sparemap);
+       error = bus_dmamap_create(rdata->rxtag, BUS_DMA_WAITOK,
+                                 &rdata->rx_sparemap);
        if (error) {
                device_printf(dev, "Unable to create spare RX DMA map\n");
-               bus_dma_tag_destroy(sc->rxtag);
-               kfree(sc->rx_buffer_area, M_DEVBUF);
-               sc->rx_buffer_area = NULL;
+               bus_dma_tag_destroy(rdata->rxtag);
+               kfree(rdata->rx_buf, M_DEVBUF);
+               rdata->rx_buf = NULL;
                return error;
        }
 
        /*
         * Create DMA maps for rx buffers
         */
-       for (i = 0; i < sc->num_rx_desc; i++) {
-               rx_buffer = &sc->rx_buffer_area[i];
+       for (i = 0; i < rdata->num_rx_desc; i++) {
+               rx_buffer = &rdata->rx_buf[i];
 
-               error = bus_dmamap_create(sc->rxtag, BUS_DMA_WAITOK,
+               error = bus_dmamap_create(rdata->rxtag, BUS_DMA_WAITOK,
                                          &rx_buffer->map);
                if (error) {
                        device_printf(dev, "Unable to create RX DMA map\n");
-                       emx_destroy_rx_ring(sc, i);
+                       emx_destroy_rx_ring(sc, rdata, i);
                        return error;
                }
        }
        return (0);
 }
 
+static void
+emx_free_rx_ring(struct emx_softc *sc, struct emx_rxdata *rdata)
+{
+       int i;
+
+       for (i = 0; i < rdata->num_rx_desc; i++) {
+               struct emx_rxbuf *rx_buffer = &rdata->rx_buf[i];
+
+               if (rx_buffer->m_head != NULL) {
+                       bus_dmamap_unload(rdata->rxtag, rx_buffer->map);
+                       m_freem(rx_buffer->m_head);
+                       rx_buffer->m_head = NULL;
+               }
+       }
+
+       if (rdata->fmp != NULL)
+               m_freem(rdata->fmp);
+       rdata->fmp = NULL;
+       rdata->lmp = NULL;
+}
+
 static int
-emx_init_rx_ring(struct emx_softc *sc)
+emx_init_rx_ring(struct emx_softc *sc, struct emx_rxdata *rdata)
 {
        int i, error;
 
        /* Reset descriptor ring */
-       bzero(sc->rx_desc_base,
-             sizeof(struct e1000_rx_desc) * sc->num_rx_desc);
+       bzero(rdata->rx_desc, sizeof(emx_rxdesc_t) * rdata->num_rx_desc);
 
        /* Allocate new ones. */
-       for (i = 0; i < sc->num_rx_desc; i++) {
-               error = emx_newbuf(sc, i, 1);
+       for (i = 0; i < rdata->num_rx_desc; i++) {
+               error = emx_newbuf(sc, rdata, i, 1);
                if (error)
                        return (error);
        }
 
        /* Setup our descriptor pointers */
-       sc->next_rx_desc_to_check = 0;
+       rdata->next_rx_desc_to_check = 0;
 
        return (0);
 }
@@ -2470,7 +2600,8 @@ emx_init_rx_unit(struct emx_softc *sc)
 {
        struct ifnet *ifp = &sc->arpcom.ac_if;
        uint64_t bus_addr;
-       uint32_t rctl, rxcsum;
+       uint32_t rctl, rxcsum, rfctl;
+       int i;
 
        /*
         * Make sure receives are disabled while setting
@@ -2490,67 +2621,126 @@ emx_init_rx_unit(struct emx_softc *sc)
                E1000_WRITE_REG(&sc->hw, E1000_ITR, 0);
        }
 
+       /* Use extended RX descriptor */
+       rfctl = E1000_RFCTL_EXTEN;
+
        /* Disable accelerated ackknowledge */
-       if (sc->hw.mac.type == e1000_82574) {
-               E1000_WRITE_REG(&sc->hw,
-                   E1000_RFCTL, E1000_RFCTL_ACK_DIS);
-       }
+       if (sc->hw.mac.type == e1000_82574)
+               rfctl |= E1000_RFCTL_ACK_DIS;
+
+       E1000_WRITE_REG(&sc->hw, E1000_RFCTL, rfctl);
 
        /* Setup the Base and Length of the Rx Descriptor Ring */
-       bus_addr = sc->rx_desc_paddr;
-       E1000_WRITE_REG(&sc->hw, E1000_RDLEN(0),
-           sc->num_rx_desc * sizeof(struct e1000_rx_desc));
-       E1000_WRITE_REG(&sc->hw, E1000_RDBAH(0), (uint32_t)(bus_addr >> 32));
-       E1000_WRITE_REG(&sc->hw, E1000_RDBAL(0), (uint32_t)bus_addr);
+       for (i = 0; i < sc->rx_ring_inuse; ++i) {
+               struct emx_rxdata *rdata = &sc->rx_data[i];
+
+               bus_addr = rdata->rx_desc_paddr;
+               E1000_WRITE_REG(&sc->hw, E1000_RDLEN(i),
+                   rdata->num_rx_desc * sizeof(emx_rxdesc_t));
+               E1000_WRITE_REG(&sc->hw, E1000_RDBAH(i),
+                   (uint32_t)(bus_addr >> 32));
+               E1000_WRITE_REG(&sc->hw, E1000_RDBAL(i),
+                   (uint32_t)bus_addr);
+       }
 
        /* Setup the Receive Control Register */
        rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
        rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
-               E1000_RCTL_RDMTS_HALF |
+               E1000_RCTL_RDMTS_HALF | E1000_RCTL_SECRC |
                (sc->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
 
        /* Make sure VLAN Filters are off */
        rctl &= ~E1000_RCTL_VFE;
 
-       if (e1000_tbi_sbp_enabled_82543(&sc->hw))
-               rctl |= E1000_RCTL_SBP;
-       else
-               rctl &= ~E1000_RCTL_SBP;
-
-       switch (sc->rx_buffer_len) {
-       default:
-       case 2048:
-               rctl |= E1000_RCTL_SZ_2048;
-               break;
-
-       case 4096:
-               rctl |= E1000_RCTL_SZ_4096 |
-                   E1000_RCTL_BSEX | E1000_RCTL_LPE;
-               break;
+       /* Don't store bad paket */
+       rctl &= ~E1000_RCTL_SBP;
 
-       case 8192:
-               rctl |= E1000_RCTL_SZ_8192 |
-                   E1000_RCTL_BSEX | E1000_RCTL_LPE;
-               break;
-
-       case 16384:
-               rctl |= E1000_RCTL_SZ_16384 |
-                   E1000_RCTL_BSEX | E1000_RCTL_LPE;
-               break;
-       }
+       /* MCLBYTES */
+       rctl |= E1000_RCTL_SZ_2048;
 
        if (ifp->if_mtu > ETHERMTU)
                rctl |= E1000_RCTL_LPE;
        else
                rctl &= ~E1000_RCTL_LPE;
 
-       /* Receive Checksum Offload for TCP and UDP */
-       if (ifp->if_capenable & IFCAP_RXCSUM) {
+       /*
+        * Receive Checksum Offload for TCP and UDP
+        *
+        * Checksum offloading is also enabled if multiple receive
+        * queue is to be supported, since we need it to figure out
+        * packet type.
+        */
+       if (ifp->if_capenable & (IFCAP_RSS | IFCAP_RXCSUM)) {
                rxcsum = E1000_READ_REG(&sc->hw, E1000_RXCSUM);
-               rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
+
+               /*
+                * NOTE:
+                * PCSD must be enabled to enable multiple
+                * receive queues.
+                */
+               rxcsum |= E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL |
+                         E1000_RXCSUM_PCSD;
                E1000_WRITE_REG(&sc->hw, E1000_RXCSUM, rxcsum);
        }
 
+       /*
+        * Configure multiple receive queue (RSS)
+        */
+       if (ifp->if_capenable & IFCAP_RSS) {
+               uint8_t key[EMX_NRSSRK * EMX_RSSRK_SIZE];
+               uint32_t reta;
+
+               KASSERT(sc->rx_ring_inuse == EMX_NRX_RING,
+                       ("invalid number of RX ring (%d)",
+                        sc->rx_ring_inuse));
+
+               /*
+                * NOTE:
+                * When we reach here, RSS has already been disabled
+                * in emx_stop(), so we could safely configure RSS key
+                * and redirect table.
+                */
+
+               /*
+                * Configure RSS key
+                */
+               toeplitz_get_key(key, sizeof(key));
+               for (i = 0; i < EMX_NRSSRK; ++i) {
+                       uint32_t rssrk;
+
+                       rssrk = EMX_RSSRK_VAL(key, i);
+                       EMX_RSS_DPRINTF(sc, 1, "rssrk%d 0x%08x\n", i, rssrk);
+
+                       E1000_WRITE_REG(&sc->hw, E1000_RSSRK(i), rssrk);
+               }
+
+               /*
+                * Configure RSS redirect table in following fashion:
+                * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
+                */
+               reta = 0;
+               for (i = 0; i < EMX_RETA_SIZE; ++i) {
+                       uint32_t q;
+
+                       q = (i % sc->rx_ring_inuse) << EMX_RETA_RINGIDX_SHIFT;
+                       reta |= q << (8 * i);
+               }
+               EMX_RSS_DPRINTF(sc, 1, "reta 0x%08x\n", reta);
+
+               for (i = 0; i < EMX_NRETA; ++i)
+                       E1000_WRITE_REG(&sc->hw, E1000_RETA(i), reta);
+
+               /*
+                * Enable multiple receive queues.
+                * Enable IPv4 RSS standard hash functions.
+                * Disable RSS interrupt.
+                */
+               E1000_WRITE_REG(&sc->hw, E1000_MRQC,
+                               E1000_MRQC_ENABLE_RSS_2Q |
+                               E1000_MRQC_RSS_FIELD_IPV4_TCP |
+                               E1000_MRQC_RSS_FIELD_IPV4);
+       }
+
        /*
         * XXX TEMPORARY WORKAROUND: on some systems with 82573
         * long latencies are observed, like Lenovo X60. This
@@ -2563,104 +2753,115 @@ emx_init_rx_unit(struct emx_softc *sc)
                E1000_WRITE_REG(&sc->hw, E1000_RDTR, EMX_RDTR_82573);
        }
 
-       /* Enable Receives */
-       E1000_WRITE_REG(&sc->hw, E1000_RCTL, rctl);
-
        /*
         * Setup the HW Rx Head and Tail Descriptor Pointers
         */
-       E1000_WRITE_REG(&sc->hw, E1000_RDH(0), 0);
-       E1000_WRITE_REG(&sc->hw, E1000_RDT(0), sc->num_rx_desc - 1);
+       for (i = 0; i < sc->rx_ring_inuse; ++i) {
+               E1000_WRITE_REG(&sc->hw, E1000_RDH(i), 0);
+               E1000_WRITE_REG(&sc->hw, E1000_RDT(i),
+                   sc->rx_data[i].num_rx_desc - 1);
+       }
+
+       /* Enable Receives */
+       E1000_WRITE_REG(&sc->hw, E1000_RCTL, rctl);
 }
 
 static void
-emx_destroy_rx_ring(struct emx_softc *sc, int ndesc)
+emx_destroy_rx_ring(struct emx_softc *sc, struct emx_rxdata *rdata, int ndesc)
 {
-       struct emx_buf *rx_buffer;
+       struct emx_rxbuf *rx_buffer;
        int i;
 
        /* Free Receive Descriptor ring */
-       if (sc->rx_desc_base) {
-               bus_dmamap_unload(sc->rx_desc_dtag, sc->rx_desc_dmap);
-               bus_dmamem_free(sc->rx_desc_dtag, sc->rx_desc_base,
-                               sc->rx_desc_dmap);
-               bus_dma_tag_destroy(sc->rx_desc_dtag);
+       if (rdata->rx_desc) {
+               bus_dmamap_unload(rdata->rx_desc_dtag, rdata->rx_desc_dmap);
+               bus_dmamem_free(rdata->rx_desc_dtag, rdata->rx_desc,
+                               rdata->rx_desc_dmap);
+               bus_dma_tag_destroy(rdata->rx_desc_dtag);
 
-               sc->rx_desc_base = NULL;
+               rdata->rx_desc = NULL;
        }
 
-       if (sc->rx_buffer_area == NULL)
+       if (rdata->rx_buf == NULL)
                return;
 
        for (i = 0; i < ndesc; i++) {
-               rx_buffer = &sc->rx_buffer_area[i];
+               rx_buffer = &rdata->rx_buf[i];
 
                KKASSERT(rx_buffer->m_head == NULL);
-               bus_dmamap_destroy(sc->rxtag, rx_buffer->map);
+               bus_dmamap_destroy(rdata->rxtag, rx_buffer->map);
        }
-       bus_dmamap_destroy(sc->rxtag, sc->rx_sparemap);
-       bus_dma_tag_destroy(sc->rxtag);
+       bus_dmamap_destroy(rdata->rxtag, rdata->rx_sparemap);
+       bus_dma_tag_destroy(rdata->rxtag);
 
-       kfree(sc->rx_buffer_area, M_DEVBUF);
-       sc->rx_buffer_area = NULL;
+       kfree(rdata->rx_buf, M_DEVBUF);
+       rdata->rx_buf = NULL;
 }
 
 static void
-emx_rxeof(struct emx_softc *sc, int count)
+emx_rxeof(struct emx_softc *sc, int ring_idx, int count)
 {
+       struct emx_rxdata *rdata = &sc->rx_data[ring_idx];
        struct ifnet *ifp = &sc->arpcom.ac_if;
-       uint8_t status, accept_frame = 0, eop = 0;
-       uint16_t len, desc_len, prev_len_adj;
-       struct e1000_rx_desc *current_desc;
+       uint32_t staterr;
+       emx_rxdesc_t *current_desc;
        struct mbuf *mp;
        int i;
        struct mbuf_chain chain[MAXCPU];
 
-       i = sc->next_rx_desc_to_check;
-       current_desc = &sc->rx_desc_base[i];
+       i = rdata->next_rx_desc_to_check;
+       current_desc = &rdata->rx_desc[i];
+       staterr = le32toh(current_desc->rxd_staterr);
 
-       if (!(current_desc->status & E1000_RXD_STAT_DD))
+       if (!(staterr & E1000_RXD_STAT_DD))
                return;
 
        ether_input_chain_init(chain);
 
-       while ((current_desc->status & E1000_RXD_STAT_DD) && count != 0) {
+       while ((staterr & E1000_RXD_STAT_DD) && count != 0) {
+               struct pktinfo *pi = NULL, pi0;
+               struct emx_rxbuf *rx_buf = &rdata->rx_buf[i];
                struct mbuf *m = NULL;
+               int eop, len;
 
                logif(pkt_receive);
 
-               mp = sc->rx_buffer_area[i].m_head;
+               mp = rx_buf->m_head;
 
                /*
                 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
                 * needs to access the last received byte in the mbuf.
                 */
-               bus_dmamap_sync(sc->rxtag, sc->rx_buffer_area[i].map,
+               bus_dmamap_sync(rdata->rxtag, rx_buf->map,
                                BUS_DMASYNC_POSTREAD);
 
-               accept_frame = 1;
-               prev_len_adj = 0;
-               desc_len = le16toh(current_desc->length);
-               status = current_desc->status;
-               if (status & E1000_RXD_STAT_EOP) {
+               len = le16toh(current_desc->rxd_length);
+               if (staterr & E1000_RXD_STAT_EOP) {
                        count--;
                        eop = 1;
-                       if (desc_len < ETHER_CRC_LEN) {
-                               len = 0;
-                               prev_len_adj = ETHER_CRC_LEN - desc_len;
-                       } else {
-                               len = desc_len - ETHER_CRC_LEN;
-                       }
                } else {
                        eop = 0;
-                       len = desc_len;
                }
 
-               if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK)
-                       accept_frame = 0;
+               if (!(staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK)) {
+                       uint16_t vlan = 0;
+                       uint32_t mrq, rss_hash;
 
-               if (accept_frame) {
-                       if (emx_newbuf(sc, i, 0) != 0) {
+                       /*
+                        * Save several necessary information,
+                        * before emx_newbuf() destroy it.
+                        */
+                       if ((staterr & E1000_RXD_STAT_VP) && eop)
+                               vlan = le16toh(current_desc->rxd_vlan);
+
+                       mrq = le32toh(current_desc->rxd_mrq);
+                       rss_hash = le32toh(current_desc->rxd_rss);
+
+                       EMX_RSS_DPRINTF(sc, 10,
+                           "ring%d, mrq 0x%08x, rss_hash 0x%08x\n",
+                           ring_idx, mrq, rss_hash);
+
+                       if (emx_newbuf(sc, rdata, i, 0) != 0) {
                                ifp->if_iqdrops++;
                                goto discard;
                        }
@@ -2668,112 +2869,79 @@ emx_rxeof(struct emx_softc *sc, int count)
                        /* Assign correct length to the current fragment */
                        mp->m_len = len;
 
-                       if (sc->fmp == NULL) {
+                       if (rdata->fmp == NULL) {
                                mp->m_pkthdr.len = len;
-                               sc->fmp = mp; /* Store the first mbuf */
-                               sc->lmp = mp;
+                               rdata->fmp = mp; /* Store the first mbuf */
+                               rdata->lmp = mp;
                        } else {
                                /*
                                 * Chain mbuf's together
                                 */
-
-                               /*
-                                * Adjust length of previous mbuf in chain if
-                                * we received less than 4 bytes in the last
-                                * descriptor.
-                                */
-                               if (prev_len_adj > 0) {
-                                       sc->lmp->m_len -= prev_len_adj;
-                                       sc->fmp->m_pkthdr.len -= prev_len_adj;
-                               }
-                               sc->lmp->m_next = mp;
-                               sc->lmp = sc->lmp->m_next;
-                               sc->fmp->m_pkthdr.len += len;
+                               rdata->lmp->m_next = mp;
+                               rdata->lmp = rdata->lmp->m_next;
+                               rdata->fmp->m_pkthdr.len += len;
                        }
 
                        if (eop) {
-                               sc->fmp->m_pkthdr.rcvif = ifp;
+                               rdata->fmp->m_pkthdr.rcvif = ifp;
                                ifp->if_ipackets++;
 
                                if (ifp->if_capenable & IFCAP_RXCSUM)
-                                       emx_rxcsum(sc, current_desc, sc->fmp);
+                                       emx_rxcsum(staterr, rdata->fmp);
+
+                               if (staterr & E1000_RXD_STAT_VP) {
+                                       rdata->fmp->m_pkthdr.ether_vlantag =
+                                           vlan;
+                                       rdata->fmp->m_flags |= M_VLANTAG;
+                               }
+                               m = rdata->fmp;
+                               rdata->fmp = NULL;
+                               rdata->lmp = NULL;
 
-                               if (status & E1000_RXD_STAT_VP) {
-                                       sc->fmp->m_pkthdr.ether_vlantag =
-                                           (le16toh(current_desc->special) &
-                                           E1000_RXD_SPC_VLAN_MASK);
-                                       sc->fmp->m_flags |= M_VLANTAG;
+                               if (ifp->if_capenable & IFCAP_RSS) {
+                                       pi = emx_rssinfo(m, &pi0, mrq,
+                                                        rss_hash, staterr);
                                }
-                               m = sc->fmp;
-                               sc->fmp = NULL;
-                               sc->lmp = NULL;
+#ifdef EMX_RSS_DEBUG
+                               rdata->rx_pkts++;
+#endif
                        }
                } else {
                        ifp->if_ierrors++;
 discard:
-#ifdef foo
-                       /* Reuse loaded DMA map and just update mbuf chain */
-                       mp = sc->rx_buffer_area[i].m_head;
-                       mp->m_len = mp->m_pkthdr.len = MCLBYTES;
-                       mp->m_data = mp->m_ext.ext_buf;
-                       mp->m_next = NULL;
-                       if (sc->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
-                               m_adj(mp, ETHER_ALIGN);
-#endif
-                       if (sc->fmp != NULL) {
-                               m_freem(sc->fmp);
-                               sc->fmp = NULL;
-                               sc->lmp = NULL;
+                       emx_setup_rxdesc(current_desc, rx_buf);
+                       if (rdata->fmp != NULL) {
+                               m_freem(rdata->fmp);
+                               rdata->fmp = NULL;
+                               rdata->lmp = NULL;
                        }
                        m = NULL;
                }
 
-               /* Zero out the receive descriptors status. */
-               current_desc->status = 0;
-
                if (m != NULL)
-                       ether_input_chain(ifp, m, chain);
+                       ether_input_chain(ifp, m, pi, chain);
 
                /* Advance our pointers to the next descriptor. */
-               if (++i == sc->num_rx_desc)
+               if (++i == rdata->num_rx_desc)
                        i = 0;
-               current_desc = &sc->rx_desc_base[i];
+
+               current_desc = &rdata->rx_desc[i];
+               staterr = le32toh(current_desc->rxd_staterr);
        }
-       sc->next_rx_desc_to_check = i;
+       rdata->next_rx_desc_to_check = i;
 
        ether_input_dispatch(chain);
 
-       /* Advance the E1000's Receive Queue #0  "Tail Pointer". */
+       /* Advance the E1000's Receive Queue "Tail Pointer". */
        if (--i < 0)
-               i = sc->num_rx_desc - 1;
-       E1000_WRITE_REG(&sc->hw, E1000_RDT(0), i);
-}
-
-static void
-emx_rxcsum(struct emx_softc *sc, struct e1000_rx_desc *rx_desc,
-          struct mbuf *mp)
-{
-       /* Ignore Checksum bit is set */
-       if (rx_desc->status & E1000_RXD_STAT_IXSM)
-               return;
-
-       if ((rx_desc->status & E1000_RXD_STAT_IPCS) &&
-           !(rx_desc->errors & E1000_RXD_ERR_IPE))
-               mp->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID;
-
-       if ((rx_desc->status & E1000_RXD_STAT_TCPCS) &&
-           !(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
-               mp->m_pkthdr.csum_flags |= CSUM_DATA_VALID |
-                                          CSUM_PSEUDO_HDR |
-                                          CSUM_FRAG_NOT_CHECKED;
-               mp->m_pkthdr.csum_data = htons(0xffff);
-       }
+               i = rdata->num_rx_desc - 1;
+       E1000_WRITE_REG(&sc->hw, E1000_RDT(ring_idx), i);
 }
 
 static void
 emx_enable_intr(struct emx_softc *sc)
 {
-       lwkt_serialize_handler_enable(sc->arpcom.ac_if.if_serializer);
+       lwkt_serialize_handler_enable(&sc->main_serialize);
        E1000_WRITE_REG(&sc->hw, E1000_IMS, IMS_ENABLE_MASK);
 }
 
@@ -2781,7 +2949,7 @@ static void
 emx_disable_intr(struct emx_softc *sc)
 {
        E1000_WRITE_REG(&sc->hw, E1000_IMC, 0xffffffff);
-       lwkt_serialize_handler_disable(sc->arpcom.ac_if.if_serializer);
+       lwkt_serialize_handler_disable(&sc->main_serialize);
 }
 
 /*
@@ -3050,7 +3218,7 @@ emx_print_debug_info(struct emx_softc *sc)
        device_printf(dev, "Std mbuf failed = %ld\n",
            sc->mbuf_alloc_failed);
        device_printf(dev, "Std mbuf cluster failed = %ld\n",
-           sc->mbuf_cluster_failed);
+           sc->rx_data[0].mbuf_cluster_failed);
        device_printf(dev, "Driver dropped packets = %ld\n",
            sc->dropped_pkts);
        device_printf(dev, "Driver tx dma failure in encap = %ld\n",
@@ -3154,7 +3322,7 @@ emx_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
        sc = (struct emx_softc *)arg1;
        ifp = &sc->arpcom.ac_if;
 
-       lwkt_serialize_enter(ifp->if_serializer);
+       ifnet_serialize_all(ifp);
 
        if (result == 1)
                emx_print_debug_info(sc);
@@ -3167,7 +3335,7 @@ emx_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
        if (result == 2)
                emx_print_nvm_info(sc);
 
-       lwkt_serialize_exit(ifp->if_serializer);
+       ifnet_deserialize_all(ifp);
 
        return (error);
 }
@@ -3186,9 +3354,9 @@ emx_sysctl_stats(SYSCTL_HANDLER_ARGS)
                struct emx_softc *sc = (struct emx_softc *)arg1;
                struct ifnet *ifp = &sc->arpcom.ac_if;
 
-               lwkt_serialize_enter(ifp->if_serializer);
+               ifnet_serialize_all(ifp);
                emx_print_hw_stats(sc);
-               lwkt_serialize_exit(ifp->if_serializer);
+               ifnet_deserialize_all(ifp);
        }
        return (error);
 }
@@ -3199,6 +3367,10 @@ emx_add_sysctl(struct emx_softc *sc)
 #ifdef PROFILE_SERIALIZER
        struct ifnet *ifp = &sc->arpcom.ac_if;
 #endif
+#ifdef EMX_RSS_DEBUG
+       char rx_pkt[32];
+       int i;
+#endif
 
        sysctl_ctx_init(&sc->sysctl_ctx);
        sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx,
@@ -3219,10 +3391,12 @@ emx_add_sysctl(struct emx_softc *sc)
                        emx_sysctl_stats, "I", "Statistics");
 
        SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
-                       OID_AUTO, "rxd", CTLFLAG_RD, &sc->num_rx_desc, 0, NULL);
+                      OID_AUTO, "rxd", CTLFLAG_RD,
+                      &sc->rx_data[0].num_rx_desc, 0, NULL);
        SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
-                       OID_AUTO, "txd", CTLFLAG_RD, &sc->num_tx_desc, 0, NULL);
+                      OID_AUTO, "txd", CTLFLAG_RD, &sc->num_tx_desc, 0, NULL);
 
+#ifdef notyet
 #ifdef PROFILE_SERIALIZER
        SYSCTL_ADD_UINT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
                        OID_AUTO, "serializer_sleep", CTLFLAG_RW,
@@ -3236,6 +3410,7 @@ emx_add_sysctl(struct emx_softc *sc)
        SYSCTL_ADD_UINT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
                        OID_AUTO, "serializer_try", CTLFLAG_RW,
                        &ifp->if_serializer->try_cnt, 0, NULL);
+#endif
 #endif
 
        SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
@@ -3246,6 +3421,23 @@ emx_add_sysctl(struct emx_softc *sc)
                        OID_AUTO, "int_tx_nsegs", CTLTYPE_INT|CTLFLAG_RW,
                        sc, 0, emx_sysctl_int_tx_nsegs, "I",
                        "# segments per TX interrupt");
+
+       SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
+                      OID_AUTO, "rx_ring_inuse", CTLFLAG_RD,
+                      &sc->rx_ring_inuse, 0, "RX ring in use");
+
+#ifdef EMX_RSS_DEBUG
+       SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
+                      OID_AUTO, "rss_debug", CTLFLAG_RW, &sc->rss_debug,
+                      0, "RSS debug level");
+       for (i = 0; i < sc->rx_ring_cnt; ++i) {
+               ksnprintf(rx_pkt, sizeof(rx_pkt), "rx%d_pkt", i);
+               SYSCTL_ADD_UINT(&sc->sysctl_ctx,
+                               SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO,
+                               rx_pkt, CTLFLAG_RW,
+                               &sc->rx_data[i].rx_pkts, 0, "RXed packets");
+       }
+#endif
 }
 
 static int
@@ -3274,7 +3466,7 @@ emx_sysctl_int_throttle(SYSCTL_HANDLER_ARGS)
                        return EINVAL;
        }
 
-       lwkt_serialize_enter(ifp->if_serializer);
+       ifnet_serialize_all(ifp);
 
        if (throttle)
                sc->int_throttle_ceil = 1000000000 / 256 / throttle;
@@ -3284,7 +3476,7 @@ emx_sysctl_int_throttle(SYSCTL_HANDLER_ARGS)
        if (ifp->if_flags & IFF_RUNNING)
                E1000_WRITE_REG(&sc->hw, E1000_ITR, throttle);
 
-       lwkt_serialize_exit(ifp->if_serializer);
+       ifnet_deserialize_all(ifp);
 
        if (bootverbose) {
                if_printf(ifp, "Interrupt moderation set to %d/sec\n",
@@ -3307,7 +3499,7 @@ emx_sysctl_int_tx_nsegs(SYSCTL_HANDLER_ARGS)
        if (segs <= 0)
                return EINVAL;
 
-       lwkt_serialize_enter(ifp->if_serializer);
+       ifnet_serialize_all(ifp);
 
        /*
         * Don't allow int_tx_nsegs to become:
@@ -3325,7 +3517,7 @@ emx_sysctl_int_tx_nsegs(SYSCTL_HANDLER_ARGS)
                sc->tx_int_nsegs = segs;
        }
 
-       lwkt_serialize_exit(ifp->if_serializer);
+       ifnet_deserialize_all(ifp);
 
        return error;
 }
@@ -3333,7 +3525,7 @@ emx_sysctl_int_tx_nsegs(SYSCTL_HANDLER_ARGS)
 static int
 emx_dma_alloc(struct emx_softc *sc)
 {
-       int error;
+       int error, i;
 
        /*
         * Create top level busdma tag
@@ -3360,10 +3552,13 @@ emx_dma_alloc(struct emx_softc *sc)
        /*
         * Allocate receive descriptors ring and buffers
         */
-       error = emx_create_rx_ring(sc);
-       if (error) {
-               device_printf(sc->dev, "Could not setup receive structures\n");
-               return error;
+       for (i = 0; i < sc->rx_ring_cnt; ++i) {
+               error = emx_create_rx_ring(sc, &sc->rx_data[i]);
+               if (error) {
+                       device_printf(sc->dev,
+                           "Could not setup receive structures\n");
+                       return error;
+               }
        }
        return 0;
 }
@@ -3371,10 +3566,258 @@ emx_dma_alloc(struct emx_softc *sc)
 static void
 emx_dma_free(struct emx_softc *sc)
 {
+       int i;
+
        emx_destroy_tx_ring(sc, sc->num_tx_desc);
-       emx_destroy_rx_ring(sc, sc->num_rx_desc);
+
+       for (i = 0; i < sc->rx_ring_cnt; ++i) {
+               emx_destroy_rx_ring(sc, &sc->rx_data[i],
+                                   sc->rx_data[i].num_rx_desc);
+       }
 
        /* Free top level busdma tag */
        if (sc->parent_dtag != NULL)
                bus_dma_tag_destroy(sc->parent_dtag);
 }
+
+static void
+emx_serialize(struct ifnet *ifp, enum ifnet_serialize slz)
+{
+       struct emx_softc *sc = ifp->if_softc;
+
+       switch (slz) {
+       case IFNET_SERIALIZE_ALL:
+               lwkt_serialize_array_enter(sc->serializes, EMX_NSERIALIZE, 0);
+               break;
+
+       case IFNET_SERIALIZE_MAIN:
+               lwkt_serialize_enter(&sc->main_serialize);
+               break;
+
+       case IFNET_SERIALIZE_TX:
+               lwkt_serialize_enter(&sc->tx_serialize);
+               break;
+
+       case IFNET_SERIALIZE_RX(0):
+               lwkt_serialize_enter(&sc->rx_data[0].rx_serialize);
+               break;
+
+       case IFNET_SERIALIZE_RX(1):
+               lwkt_serialize_enter(&sc->rx_data[1].rx_serialize);
+               break;
+
+       default:
+               panic("%s unsupported serialize type\n", ifp->if_xname);
+       }
+}
+
+static void
+emx_deserialize(struct ifnet *ifp, enum ifnet_serialize slz)
+{
+       struct emx_softc *sc = ifp->if_softc;
+
+       switch (slz) {
+       case IFNET_SERIALIZE_ALL:
+               lwkt_serialize_array_exit(sc->serializes, EMX_NSERIALIZE, 0);
+               break;
+
+       case IFNET_SERIALIZE_MAIN:
+               lwkt_serialize_exit(&sc->main_serialize);
+               break;
+
+       case IFNET_SERIALIZE_TX:
+               lwkt_serialize_exit(&sc->tx_serialize);
+               break;
+
+       case IFNET_SERIALIZE_RX(0):
+               lwkt_serialize_exit(&sc->rx_data[0].rx_serialize);
+               break;
+
+       case IFNET_SERIALIZE_RX(1):
+               lwkt_serialize_exit(&sc->rx_data[1].rx_serialize);
+               break;
+
+       default:
+               panic("%s unsupported serialize type\n", ifp->if_xname);
+       }
+}
+
+static int
+emx_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz)
+{
+       struct emx_softc *sc = ifp->if_softc;
+
+       switch (slz) {
+       case IFNET_SERIALIZE_ALL:
+               return lwkt_serialize_array_try(sc->serializes,
+                                               EMX_NSERIALIZE, 0);
+
+       case IFNET_SERIALIZE_MAIN:
+               return lwkt_serialize_try(&sc->main_serialize);
+
+       case IFNET_SERIALIZE_TX:
+               return lwkt_serialize_try(&sc->tx_serialize);
+
+       case IFNET_SERIALIZE_RX(0):
+               return lwkt_serialize_try(&sc->rx_data[0].rx_serialize);
+
+       case IFNET_SERIALIZE_RX(1):
+               return lwkt_serialize_try(&sc->rx_data[1].rx_serialize);
+
+       default:
+               panic("%s unsupported serialize type\n", ifp->if_xname);
+       }
+}
+
+static void
+emx_serialize_skipmain(struct emx_softc *sc)
+{
+       lwkt_serialize_array_enter(sc->serializes, EMX_NSERIALIZE, 1);
+}
+
+#ifdef IFPOLL_ENABLE
+static int
+emx_tryserialize_skipmain(struct emx_softc *sc)
+{
+       return lwkt_serialize_array_try(sc->serializes, EMX_NSERIALIZE, 1);
+}
+#endif
+
+static void
+emx_deserialize_skipmain(struct emx_softc *sc)
+{
+       lwkt_serialize_array_exit(sc->serializes, EMX_NSERIALIZE, 1);
+}
+
+#ifdef INVARIANTS
+
+static void
+emx_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz,
+                    boolean_t serialized)
+{
+       struct emx_softc *sc = ifp->if_softc;
+       int i;
+
+       switch (slz) {
+       case IFNET_SERIALIZE_ALL:
+               if (serialized) {
+                       for (i = 0; i < EMX_NSERIALIZE; ++i)
+                               ASSERT_SERIALIZED(sc->serializes[i]);
+               } else {
+                       for (i = 0; i < EMX_NSERIALIZE; ++i)
+                               ASSERT_NOT_SERIALIZED(sc->serializes[i]);
+               }
+               break;
+
+       case IFNET_SERIALIZE_MAIN:
+               if (serialized)
+                       ASSERT_SERIALIZED(&sc->main_serialize);
+               else
+                       ASSERT_NOT_SERIALIZED(&sc->main_serialize);
+               break;
+
+       case IFNET_SERIALIZE_TX:
+               if (serialized)
+                       ASSERT_SERIALIZED(&sc->tx_serialize);
+               else
+                       ASSERT_NOT_SERIALIZED(&sc->tx_serialize);
+               break;
+
+       case IFNET_SERIALIZE_RX(0):
+               if (serialized)
+                       ASSERT_SERIALIZED(&sc->rx_data[0].rx_serialize);
+               else
+                       ASSERT_NOT_SERIALIZED(&sc->rx_data[0].rx_serialize);
+               break;
+
+       case IFNET_SERIALIZE_RX(1):
+               if (serialized)
+                       ASSERT_SERIALIZED(&sc->rx_data[1].rx_serialize);
+               else
+                       ASSERT_NOT_SERIALIZED(&sc->rx_data[1].rx_serialize);
+               break;
+
+       default:
+               panic("%s unsupported serialize type\n", ifp->if_xname);
+       }
+}
+
+#endif /* INVARIANTS */
+
+#ifdef IFPOLL_ENABLE
+
+static void
+emx_qpoll_status(struct ifnet *ifp, int pollhz __unused)
+{
+       struct emx_softc *sc = ifp->if_softc;
+       uint32_t reg_icr;
+
+       ASSERT_SERIALIZED(&sc->main_serialize);
+
+       reg_icr = E1000_READ_REG(&sc->hw, E1000_ICR);
+       if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
+               if (emx_tryserialize_skipmain(sc)) {
+                       callout_stop(&sc->timer);
+                       sc->hw.mac.get_link_status = 1;
+                       emx_update_link_status(sc);
+                       callout_reset(&sc->timer, hz, emx_timer, sc);
+                       emx_deserialize_skipmain(sc);
+               }
+       }
+}
+
+static void
+emx_qpoll_tx(struct ifnet *ifp, void *arg __unused, int cycle __unused)
+{
+       struct emx_softc *sc = ifp->if_softc;
+
+       ASSERT_SERIALIZED(&sc->tx_serialize);
+
+       emx_txeof(sc);
+       if (!ifq_is_empty(&ifp->if_snd))
+               if_devstart(ifp);
+}
+
+static void
+emx_qpoll_rx(struct ifnet *ifp, void *arg, int cycle)
+{
+       struct emx_softc *sc = ifp->if_softc;
+       struct emx_rxdata *rdata = arg;
+
+       ASSERT_SERIALIZED(&rdata->rx_serialize);
+
+       emx_rxeof(sc, rdata - sc->rx_data, cycle);
+}
+
+static void
+emx_qpoll(struct ifnet *ifp, struct ifpoll_info *info)
+{
+       struct emx_softc *sc = ifp->if_softc;
+
+       ASSERT_IFNET_SERIALIZED_ALL(ifp);
+
+       if (info) {
+               int i;
+
+               info->ifpi_status.status_func = emx_qpoll_status;
+               info->ifpi_status.serializer = &sc->main_serialize;
+
+               info->ifpi_tx[0].poll_func = emx_qpoll_tx;
+               info->ifpi_tx[0].arg = NULL;
+               info->ifpi_tx[0].serializer = &sc->tx_serialize;
+
+               for (i = 0; i < sc->rx_ring_cnt; ++i) {
+                       info->ifpi_rx[i].poll_func = emx_qpoll_rx;
+                       info->ifpi_rx[i].arg = &sc->rx_data[i];
+                       info->ifpi_rx[i].serializer =
+                               &sc->rx_data[i].rx_serialize;
+               }
+
+               if (ifp->if_flags & IFF_RUNNING)
+                       emx_disable_intr(sc);
+       } else if (ifp->if_flags & IFF_RUNNING) {
+               emx_enable_intr(sc);
+       }
+}
+
+#endif /* IFPOLL_ENABLE */