bge: Don't peek at the TX descriptor in txeof()
[dragonfly.git] / sys / dev / netif / bge / if_bge.c
index 8bc47cc..c0ed897 100644 (file)
@@ -87,6 +87,9 @@
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <bus/pci/pcivar.h>
 
 #include <dev/netif/bge/if_bgereg.h>
+#include <dev/netif/bge/if_bgevar.h>
 
 /* "device miibus" required.  See GENERIC if you get errors here. */
 #include "miibus_if.h"
 
 #define BGE_CSUM_FEATURES      (CSUM_IP | CSUM_TCP)
-#define BGE_MIN_FRAME          60
 
-static const struct bge_type bge_devs[] = {
+static const struct bge_type {
+       uint16_t                bge_vid;
+       uint16_t                bge_did;
+       char                    *bge_name;
+} bge_devs[] = {
        { PCI_VENDOR_3COM, PCI_PRODUCT_3COM_3C996,
                "3COM 3C996 Gigabit Ethernet" },
 
@@ -279,13 +286,16 @@ static const struct bge_type bge_devs[] = {
 #define BGE_IS_5755_PLUS(sc)           ((sc)->bge_flags & BGE_FLAG_5755_PLUS)
 #define BGE_IS_5788(sc)                        ((sc)->bge_flags & BGE_FLAG_5788)
 
+#define BGE_IS_CRIPPLED(sc)            \
+       (BGE_IS_5788((sc)) || (sc)->bge_asicrev == BGE_ASICREV_BCM5700)
+
 typedef int    (*bge_eaddr_fcn_t)(struct bge_softc *, uint8_t[]);
 
 static int     bge_probe(device_t);
 static int     bge_attach(device_t);
 static int     bge_detach(device_t);
-static void    bge_txeof(struct bge_softc *);
-static void    bge_rxeof(struct bge_softc *);
+static void    bge_txeof(struct bge_softc *, uint16_t);
+static void    bge_rxeof(struct bge_softc *, uint16_t);
 
 static void    bge_tick(void *);
 static void    bge_stats_update(struct bge_softc *);
@@ -293,11 +303,17 @@ static void       bge_stats_update_regs(struct bge_softc *);
 static struct mbuf *
                bge_defrag_shortdma(struct mbuf *);
 static int     bge_encap(struct bge_softc *, struct mbuf **, uint32_t *);
+static int     bge_setup_tso(struct bge_softc *, struct mbuf **,
+                   uint16_t *, uint16_t *);
 
 #ifdef DEVICE_POLLING
 static void    bge_poll(struct ifnet *ifp, enum poll_cmd cmd, int count);
 #endif
-static void    bge_intr(void *);
+static void    bge_intr_crippled(void *);
+static void    bge_intr_legacy(void *);
+static void    bge_msi(void *);
+static void    bge_msi_oneshot(void *);
+static void    bge_intr(struct bge_softc *);
 static void    bge_enable_intr(struct bge_softc *);
 static void    bge_disable_intr(struct bge_softc *);
 static void    bge_start(struct ifnet *);
@@ -319,6 +335,7 @@ static int  bge_read_eeprom(struct bge_softc *, caddr_t, uint32_t, size_t);
 
 static void    bge_setmulti(struct bge_softc *);
 static void    bge_setpromisc(struct bge_softc *);
+static void    bge_enable_msi(struct bge_softc *sc);
 
 static int     bge_alloc_jumbo_mem(struct bge_softc *);
 static void    bge_free_jumbo_mem(struct bge_softc *);
@@ -339,6 +356,7 @@ static int  bge_init_tx_ring(struct bge_softc *);
 
 static int     bge_chipinit(struct bge_softc *);
 static int     bge_blockinit(struct bge_softc *);
+static void    bge_stop_block(struct bge_softc *, bus_size_t, uint32_t);
 
 static uint32_t        bge_readmem_ind(struct bge_softc *, uint32_t);
 static void    bge_writemem_ind(struct bge_softc *, uint32_t, uint32_t);
@@ -356,6 +374,7 @@ static void bge_bcm5700_link_upd(struct bge_softc *, uint32_t);
 static void    bge_tbi_link_upd(struct bge_softc *, uint32_t);
 static void    bge_copper_link_upd(struct bge_softc *, uint32_t);
 static void    bge_autopoll_link_upd(struct bge_softc *, uint32_t);
+static void    bge_link_poll(struct bge_softc *);
 
 static void    bge_reset(struct bge_softc *);
 
@@ -374,9 +393,14 @@ static int bge_get_eaddr(struct bge_softc *, uint8_t[]);
 static void    bge_coal_change(struct bge_softc *);
 static int     bge_sysctl_rx_coal_ticks(SYSCTL_HANDLER_ARGS);
 static int     bge_sysctl_tx_coal_ticks(SYSCTL_HANDLER_ARGS);
-static int     bge_sysctl_rx_max_coal_bds(SYSCTL_HANDLER_ARGS);
-static int     bge_sysctl_tx_max_coal_bds(SYSCTL_HANDLER_ARGS);
-static int     bge_sysctl_coal_chg(SYSCTL_HANDLER_ARGS, uint32_t *, uint32_t);
+static int     bge_sysctl_rx_coal_bds(SYSCTL_HANDLER_ARGS);
+static int     bge_sysctl_tx_coal_bds(SYSCTL_HANDLER_ARGS);
+static int     bge_sysctl_rx_coal_ticks_int(SYSCTL_HANDLER_ARGS);
+static int     bge_sysctl_tx_coal_ticks_int(SYSCTL_HANDLER_ARGS);
+static int     bge_sysctl_rx_coal_bds_int(SYSCTL_HANDLER_ARGS);
+static int     bge_sysctl_tx_coal_bds_int(SYSCTL_HANDLER_ARGS);
+static int     bge_sysctl_coal_chg(SYSCTL_HANDLER_ARGS, uint32_t *,
+                   int, int, uint32_t);
 
 /*
  * Set following tunable to 1 for some IBM blade servers with the DNLK
@@ -385,16 +409,8 @@ static int bge_sysctl_coal_chg(SYSCTL_HANDLER_ARGS, uint32_t *, uint32_t);
 static int     bge_fake_autoneg = 0;
 TUNABLE_INT("hw.bge.fake_autoneg", &bge_fake_autoneg);
 
-/* Interrupt moderation control variables. */
-static int     bge_rx_coal_ticks = 100;        /* usec */
-static int     bge_tx_coal_ticks = 1023;       /* usec */
-static int     bge_rx_max_coal_bds = 80;
-static int     bge_tx_max_coal_bds = 128;
-
-TUNABLE_INT("hw.bge.rx_coal_ticks", &bge_rx_coal_ticks);
-TUNABLE_INT("hw.bge.tx_coal_ticks", &bge_tx_coal_ticks);
-TUNABLE_INT("hw.bge.rx_max_coal_bds", &bge_rx_max_coal_bds);
-TUNABLE_INT("hw.bge.tx_max_coal_bds", &bge_tx_max_coal_bds);
+static int     bge_msi_enable = 1;
+TUNABLE_INT("hw.bge.msi.enable", &bge_msi_enable);
 
 #if !defined(KTR_IF_BGE)
 #define KTR_IF_BGE     KTR_ALL
@@ -496,6 +512,8 @@ bge_writembx(struct bge_softc *sc, int off, int val)
                off += BGE_LPMBX_IRQ0_HI - BGE_MBX_IRQ0_HI;
 
        CSR_WRITE_4(sc, off, val);
+       if (sc->bge_mbox_reorder)
+               CSR_READ_4(sc, off);
 }
 
 static uint8_t
@@ -1235,7 +1253,8 @@ bge_chipinit(struct bge_softc *sc)
        uint16_t val;
 
        /* Set endian type before we access any non-PCI registers. */
-       pci_write_config(sc->bge_dev, BGE_PCI_MISC_CTL, BGE_INIT, 4);
+       pci_write_config(sc->bge_dev, BGE_PCI_MISC_CTL,
+           BGE_INIT | sc->bge_pci_miscctl, 4);
 
        /* Clear the MAC control register */
        CSR_WRITE_4(sc, BGE_MAC_MODE, 0);
@@ -1264,72 +1283,63 @@ bge_chipinit(struct bge_softc *sc)
        }
 
        /* Set up the PCI DMA control register. */
+       dma_rw_ctl = BGE_PCI_READ_CMD | BGE_PCI_WRITE_CMD;
        if (sc->bge_flags & BGE_FLAG_PCIE) {
-               /* PCI Express */
-               dma_rw_ctl = BGE_PCI_READ_CMD|BGE_PCI_WRITE_CMD |
-                   (0xf << BGE_PCIDMARWCTL_RD_WAT_SHIFT) |
-                   (0x2 << BGE_PCIDMARWCTL_WR_WAT_SHIFT);
+               /* PCI-E bus */
+               /* DMA read watermark not used on PCI-E */
+               dma_rw_ctl |= (0x3 << BGE_PCIDMARWCTL_WR_WAT_SHIFT);
        } else if (sc->bge_flags & BGE_FLAG_PCIX) {
                /* PCI-X bus */
-               if (BGE_IS_5714_FAMILY(sc)) {
-                       dma_rw_ctl = BGE_PCI_READ_CMD|BGE_PCI_WRITE_CMD;
-                       dma_rw_ctl &= ~BGE_PCIDMARWCTL_ONEDMA_ATONCE; /* XXX */
-                       /* XXX magic values, Broadcom-supplied Linux driver */
-                       if (sc->bge_asicrev == BGE_ASICREV_BCM5780) {
-                               dma_rw_ctl |= (1 << 20) | (1 << 18) | 
-                                   BGE_PCIDMARWCTL_ONEDMA_ATONCE;
-                       } else {
-                               dma_rw_ctl |= (1 << 20) | (1 << 18) | (1 << 15);
-                       }
-               } else if (sc->bge_asicrev == BGE_ASICREV_BCM5703) {
-                       /*
-                        * In the BCM5703, the DMA read watermark should
-                        * be set to less than or equal to the maximum
-                        * memory read byte count of the PCI-X command
-                        * register.
-                        */
-                       dma_rw_ctl = BGE_PCI_READ_CMD|BGE_PCI_WRITE_CMD |
-                           (0x4 << BGE_PCIDMARWCTL_RD_WAT_SHIFT) |
-                           (0x3 << BGE_PCIDMARWCTL_WR_WAT_SHIFT);
-               } else if (sc->bge_asicrev == BGE_ASICREV_BCM5704) {
-                       /*
-                        * The 5704 uses a different encoding of read/write
-                        * watermarks.
-                        */
-                       dma_rw_ctl = BGE_PCI_READ_CMD|BGE_PCI_WRITE_CMD |
-                           (0x7 << BGE_PCIDMARWCTL_RD_WAT_SHIFT) |
-                           (0x3 << BGE_PCIDMARWCTL_WR_WAT_SHIFT);
-               } else {
-                       dma_rw_ctl = BGE_PCI_READ_CMD|BGE_PCI_WRITE_CMD |
-                           (0x3 << BGE_PCIDMARWCTL_RD_WAT_SHIFT) |
-                           (0x3 << BGE_PCIDMARWCTL_WR_WAT_SHIFT) |
-                           (0x0F);
-               }
-
-               /*
-                * 5703 and 5704 need ONEDMA_AT_ONCE as a workaround
-                * for hardware bugs.
-                */
-               if (sc->bge_asicrev == BGE_ASICREV_BCM5703 ||
+               if (sc->bge_asicrev == BGE_ASICREV_BCM5780) {
+                       dma_rw_ctl |= (0x4 << BGE_PCIDMARWCTL_RD_WAT_SHIFT) |
+                           (0x2 << BGE_PCIDMARWCTL_WR_WAT_SHIFT);
+                       dma_rw_ctl |= BGE_PCIDMARWCTL_ONEDMA_ATONCE_GLOBAL;
+               } else if (sc->bge_asicrev == BGE_ASICREV_BCM5714) {
+                       dma_rw_ctl |= (0x4 << BGE_PCIDMARWCTL_RD_WAT_SHIFT) |
+                           (0x2 << BGE_PCIDMARWCTL_WR_WAT_SHIFT);
+                       dma_rw_ctl |= BGE_PCIDMARWCTL_ONEDMA_ATONCE_LOCAL;
+               } else if (sc->bge_asicrev == BGE_ASICREV_BCM5703 ||
                    sc->bge_asicrev == BGE_ASICREV_BCM5704) {
-                       uint32_t tmp;
+                       uint32_t rd_wat = 0x7;
+                       uint32_t clkctl;
 
-                       tmp = CSR_READ_4(sc, BGE_PCI_CLKCTL) & 0x1f;
-                       if (tmp == 0x6 || tmp == 0x7)
-                               dma_rw_ctl |= BGE_PCIDMARWCTL_ONEDMA_ATONCE;
+                       clkctl = CSR_READ_4(sc, BGE_PCI_CLKCTL) & 0x1f;
+                       if ((sc->bge_flags & BGE_FLAG_MAXADDR_40BIT) &&
+                           sc->bge_asicrev == BGE_ASICREV_BCM5704) {
+                               dma_rw_ctl |=
+                                   BGE_PCIDMARWCTL_ONEDMA_ATONCE_LOCAL;
+                       } else if (clkctl == 0x6 || clkctl == 0x7) {
+                               dma_rw_ctl |=
+                                   BGE_PCIDMARWCTL_ONEDMA_ATONCE_GLOBAL;
+                       }
+                       if (sc->bge_asicrev == BGE_ASICREV_BCM5703)
+                               rd_wat = 0x4;
+
+                       dma_rw_ctl |= (rd_wat << BGE_PCIDMARWCTL_RD_WAT_SHIFT) |
+                           (3 << BGE_PCIDMARWCTL_WR_WAT_SHIFT);
+                       dma_rw_ctl |= BGE_PCIDMARWCTL_ASRT_ALL_BE;
+               } else {
+                       dma_rw_ctl |= (0x3 << BGE_PCIDMARWCTL_RD_WAT_SHIFT) |
+                           (0x3 << BGE_PCIDMARWCTL_WR_WAT_SHIFT);
+                       dma_rw_ctl |= 0xf;
                }
        } else {
                /* Conventional PCI bus */
-               dma_rw_ctl = BGE_PCI_READ_CMD|BGE_PCI_WRITE_CMD |
-                   (0x7 << BGE_PCIDMARWCTL_RD_WAT_SHIFT) |
-                   (0x7 << BGE_PCIDMARWCTL_WR_WAT_SHIFT) |
-                   (0x0F);
+               dma_rw_ctl |= (0x7 << BGE_PCIDMARWCTL_RD_WAT_SHIFT) |
+                   (0x7 << BGE_PCIDMARWCTL_WR_WAT_SHIFT);
+               if (sc->bge_asicrev != BGE_ASICREV_BCM5705 &&
+                   sc->bge_asicrev != BGE_ASICREV_BCM5750)
+                       dma_rw_ctl |= 0xf;
        }
 
        if (sc->bge_asicrev == BGE_ASICREV_BCM5703 ||
-           sc->bge_asicrev == BGE_ASICREV_BCM5704 ||
-           sc->bge_asicrev == BGE_ASICREV_BCM5705)
+           sc->bge_asicrev == BGE_ASICREV_BCM5704) {
                dma_rw_ctl &= ~BGE_PCIDMARWCTL_MINDMA;
+       } else if (sc->bge_asicrev == BGE_ASICREV_BCM5700 ||
+           sc->bge_asicrev == BGE_ASICREV_BCM5701) {
+               dma_rw_ctl |= BGE_PCIDMARWCTL_USE_MRM |
+                   BGE_PCIDMARWCTL_ASRT_ALL_BE;
+       }
        pci_write_config(sc->bge_dev, BGE_PCI_DMA_RW_CTL, dma_rw_ctl, 4);
 
        /*
@@ -1351,9 +1361,11 @@ bge_chipinit(struct bge_softc *sc)
 
        /*
         * Disable memory write invalidate.  Apparently it is not supported
-        * properly by these devices.
+        * properly by these devices.  Also ensure that INTx isn't disabled,
+        * as these chips need it even when using MSI.
         */
-       PCI_CLRBIT(sc->bge_dev, BGE_PCI_CMD, PCIM_CMD_MWIEN, 4);
+       PCI_CLRBIT(sc->bge_dev, BGE_PCI_CMD,
+           (PCIM_CMD_MWRICEN | PCIM_CMD_INTxDIS), 4);
 
        /* Set the timer prescaler (always 66Mhz) */
        CSR_WRITE_4(sc, BGE_MISC_CFG, 65 << 1/*BGE_32BITTIME_66MHZ*/);
@@ -1423,22 +1435,20 @@ bge_blockinit(struct bge_softc *sc)
        CSR_WRITE_4(sc, BGE_BMAN_DMA_DESCPOOL_HIWAT, 10);
 
        /* Enable buffer manager */
-       if (!BGE_IS_5705_PLUS(sc)) {
-               CSR_WRITE_4(sc, BGE_BMAN_MODE,
-                   BGE_BMANMODE_ENABLE|BGE_BMANMODE_LOMBUF_ATTN);
+       CSR_WRITE_4(sc, BGE_BMAN_MODE,
+           BGE_BMANMODE_ENABLE|BGE_BMANMODE_LOMBUF_ATTN);
 
-               /* Poll for buffer manager start indication */
-               for (i = 0; i < BGE_TIMEOUT; i++) {
-                       if (CSR_READ_4(sc, BGE_BMAN_MODE) & BGE_BMANMODE_ENABLE)
-                               break;
-                       DELAY(10);
-               }
+       /* Poll for buffer manager start indication */
+       for (i = 0; i < BGE_TIMEOUT; i++) {
+               if (CSR_READ_4(sc, BGE_BMAN_MODE) & BGE_BMANMODE_ENABLE)
+                       break;
+               DELAY(10);
+       }
 
-               if (i == BGE_TIMEOUT) {
-                       if_printf(&sc->arpcom.ac_if,
-                                 "buffer manager failed to start\n");
-                       return(ENXIO);
-               }
+       if (i == BGE_TIMEOUT) {
+               if_printf(&sc->arpcom.ac_if,
+                         "buffer manager failed to start\n");
+               return(ENXIO);
        }
 
        /* Enable flow-through queues */
@@ -1564,6 +1574,15 @@ bge_blockinit(struct bge_softc *sc)
                bge_writembx(sc, BGE_MBX_RX_MINI_PROD_LO, 0);
        }
 
+       /* Choose de-pipeline mode for BCM5906 A0, A1 and A2. */
+       if (sc->bge_asicrev == BGE_ASICREV_BCM5906 &&
+           (sc->bge_chipid == BGE_CHIPID_BCM5906_A0 ||
+            sc->bge_chipid == BGE_CHIPID_BCM5906_A1 ||
+            sc->bge_chipid == BGE_CHIPID_BCM5906_A2)) {
+               CSR_WRITE_4(sc, BGE_ISO_PKT_TX,
+                   (CSR_READ_4(sc, BGE_ISO_PKT_TX) & ~3) | 2);
+       }
+
        /*
         * The BD ring replenish thresholds control how often the
         * hardware fetches new BD's from the producer rings in host
@@ -1695,14 +1714,21 @@ bge_blockinit(struct bge_softc *sc)
        /* Set up host coalescing defaults */
        CSR_WRITE_4(sc, BGE_HCC_RX_COAL_TICKS, sc->bge_rx_coal_ticks);
        CSR_WRITE_4(sc, BGE_HCC_TX_COAL_TICKS, sc->bge_tx_coal_ticks);
-       CSR_WRITE_4(sc, BGE_HCC_RX_MAX_COAL_BDS, sc->bge_rx_max_coal_bds);
-       CSR_WRITE_4(sc, BGE_HCC_TX_MAX_COAL_BDS, sc->bge_tx_max_coal_bds);
+       CSR_WRITE_4(sc, BGE_HCC_RX_MAX_COAL_BDS, sc->bge_rx_coal_bds);
+       CSR_WRITE_4(sc, BGE_HCC_TX_MAX_COAL_BDS, sc->bge_tx_coal_bds);
        if (!BGE_IS_5705_PLUS(sc)) {
-               CSR_WRITE_4(sc, BGE_HCC_RX_COAL_TICKS_INT, 0);
-               CSR_WRITE_4(sc, BGE_HCC_TX_COAL_TICKS_INT, 0);
+               CSR_WRITE_4(sc, BGE_HCC_RX_COAL_TICKS_INT,
+                   sc->bge_rx_coal_ticks_int);
+               CSR_WRITE_4(sc, BGE_HCC_TX_COAL_TICKS_INT,
+                   sc->bge_tx_coal_ticks_int);
        }
-       CSR_WRITE_4(sc, BGE_HCC_RX_MAX_COAL_BDS_INT, 1);
-       CSR_WRITE_4(sc, BGE_HCC_TX_MAX_COAL_BDS_INT, 1);
+       /*
+        * NOTE:
+        * The datasheet (57XX-PG105-R) says BCM5705+ do not
+        * have following two registers; obviously it is wrong.
+        */
+       CSR_WRITE_4(sc, BGE_HCC_RX_MAX_COAL_BDS_INT, sc->bge_rx_coal_bds_int);
+       CSR_WRITE_4(sc, BGE_HCC_TX_MAX_COAL_BDS_INT, sc->bge_tx_coal_bds_int);
 
        /* Set up address of statistics block */
        if (!BGE_IS_5705_PLUS(sc)) {
@@ -1738,6 +1764,15 @@ bge_blockinit(struct bge_softc *sc)
        } else {
                val = BGE_STATBLKSZ_32BYTE;
        }
+#if 0
+       /*
+        * Does not seem to have visible effect in both
+        * bulk data (1472B UDP datagram) and tiny data
+        * (18B UDP datagram) TX tests.
+        */
+       if (!BGE_IS_CRIPPLED(sc))
+               val |= BGE_HCCMODE_CLRTICK_TX;
+#endif
 
        /* Turn on host coalescing state machine */
        CSR_WRITE_4(sc, BGE_HCC_MODE, val | BGE_HCCMODE_ENABLE);
@@ -1820,6 +1855,8 @@ bge_blockinit(struct bge_softc *sc)
                   BGE_RDMAMODE_MBUF_SBD_CRPT_ATTN;
        if (sc->bge_flags & BGE_FLAG_PCIE)
                val |= BGE_RDMAMODE_FIFO_LONG_BURST;
+       if (sc->bge_flags & BGE_FLAG_TSO)
+               val |= BGE_RDMAMODE_TSO4_ENABLE;
        CSR_WRITE_4(sc, BGE_RDMA_MODE, val);
        DELAY(40);
 
@@ -1846,7 +1883,11 @@ bge_blockinit(struct bge_softc *sc)
        CSR_WRITE_4(sc, BGE_SDC_MODE, val);
 
        /* Turn on send data initiator state machine */
-       CSR_WRITE_4(sc, BGE_SDI_MODE, BGE_SDIMODE_ENABLE);
+       if (sc->bge_flags & BGE_FLAG_TSO)
+               CSR_WRITE_4(sc, BGE_SDI_MODE, BGE_SDIMODE_ENABLE |
+                   BGE_SDIMODE_HW_LSO_PRE_DMA);
+       else
+               CSR_WRITE_4(sc, BGE_SDI_MODE, BGE_SDIMODE_ENABLE);
 
        /* Turn on send BD initiator state machine */
        CSR_WRITE_4(sc, BGE_SBDI_MODE, BGE_SBDIMODE_ENABLE);
@@ -1936,12 +1977,19 @@ bge_attach(device_t dev)
        int error = 0, rid, capmask;
        uint8_t ether_addr[ETHER_ADDR_LEN];
        uint16_t product, vendor;
+       driver_intr_t *intr_func;
+       uintptr_t mii_priv = 0;
+       u_int intr_flags;
+       int msi_enable;
 
        sc = device_get_softc(dev);
        sc->bge_dev = dev;
        callout_init(&sc->bge_stat_timer);
        lwkt_serialize_init(&sc->bge_jslot_serializer);
 
+       product = pci_get_device(dev);
+       vendor = pci_get_vendor(dev);
+
 #ifndef BURN_BRIDGES
        if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
                uint32_t irq, mem;
@@ -1980,8 +2028,11 @@ bge_attach(device_t dev)
        sc->bge_chipid =
            pci_read_config(dev, BGE_PCI_MISC_CTL, 4) >>
            BGE_PCIMISCCTL_ASICREV_SHIFT;
-        if (BGE_ASICREV(sc->bge_chipid) == BGE_ASICREV_USE_PRODID_REG)
+       if (BGE_ASICREV(sc->bge_chipid) == BGE_ASICREV_USE_PRODID_REG) {
+               /* All chips, which use BGE_PCI_PRODID_ASICREV, have CPMU */
+               sc->bge_flags |= BGE_FLAG_CPMU;
                sc->bge_chipid = pci_read_config(dev, BGE_PCI_PRODID_ASICREV, 4);
+       }
        sc->bge_asicrev = BGE_ASICREV(sc->bge_chipid);
        sc->bge_chiprev = BGE_CHIPREV(sc->bge_chipid);
 
@@ -2034,17 +2085,85 @@ bge_attach(device_t dev)
        if (BGE_IS_5755_PLUS(sc) || sc->bge_asicrev == BGE_ASICREV_BCM5906)
                sc->bge_flags |= BGE_FLAG_SHORTDMA;
 
+       /*
+        * Check if this is a PCI-X or PCI Express device.
+        */
+       if (BGE_IS_5705_PLUS(sc)) {
+               if (pci_is_pcie(dev)) {
+                       sc->bge_flags |= BGE_FLAG_PCIE;
+                       sc->bge_pciecap = pci_get_pciecap_ptr(sc->bge_dev);
+                       pcie_set_max_readrq(dev, PCIEM_DEVCTL_MAX_READRQ_4096);
+               }
+       } else {
+               /*
+                * Check if the device is in PCI-X Mode.
+                * (This bit is not valid on PCI Express controllers.)
+                */
+               if ((pci_read_config(sc->bge_dev, BGE_PCI_PCISTATE, 4) &
+                   BGE_PCISTATE_PCI_BUSMODE) == 0) {
+                       sc->bge_flags |= BGE_FLAG_PCIX;
+                       sc->bge_pcixcap = pci_get_pcixcap_ptr(sc->bge_dev);
+                       sc->bge_mbox_reorder = device_getenv_int(sc->bge_dev,
+                           "mbox_reorder", 0);
+               }
+       }
+       device_printf(dev, "CHIP ID 0x%08x; "
+                     "ASIC REV 0x%02x; CHIP REV 0x%02x; %s\n",
+                     sc->bge_chipid, sc->bge_asicrev, sc->bge_chiprev,
+                     (sc->bge_flags & BGE_FLAG_PCIX) ? "PCI-X"
+                     : ((sc->bge_flags & BGE_FLAG_PCIE) ?
+                       "PCI-E" : "PCI"));
+
        /*
-        * Set various quirk flags.
+        * The 40bit DMA bug applies to the 5714/5715 controllers and is
+        * not actually a MAC controller bug but an issue with the embedded
+        * PCIe to PCI-X bridge in the device. Use 40bit DMA workaround.
         */
+       if ((sc->bge_flags & BGE_FLAG_PCIX) &&
+           (BGE_IS_5714_FAMILY(sc) || device_getenv_int(dev, "dma40b", 0)))
+               sc->bge_flags |= BGE_FLAG_MAXADDR_40BIT;
 
-       product = pci_get_device(dev);
-       vendor = pci_get_vendor(dev);
+       /*
+        * When using the BCM5701 in PCI-X mode, data corruption has
+        * been observed in the first few bytes of some received packets.
+        * Aligning the packet buffer in memory eliminates the corruption.
+        * Unfortunately, this misaligns the packet payloads.  On platforms
+        * which do not support unaligned accesses, we will realign the
+        * payloads by copying the received packets.
+        */
+       if (sc->bge_asicrev == BGE_ASICREV_BCM5701 &&
+           (sc->bge_flags & BGE_FLAG_PCIX))
+               sc->bge_flags |= BGE_FLAG_RX_ALIGNBUG;
+
+       if (!BGE_IS_CRIPPLED(sc)) {
+               if (device_getenv_int(dev, "status_tag", 1)) {
+                       sc->bge_flags |= BGE_FLAG_STATUS_TAG;
+                       sc->bge_pci_miscctl = BGE_PCIMISCCTL_TAGGED_STATUS;
+                       if (bootverbose)
+                               device_printf(dev, "enable status tag\n");
+               }
+       }
+
+       if (BGE_IS_5755_PLUS(sc)) {
+               /*
+                * BCM5754 and BCM5787 shares the same ASIC id so
+                * explicit device id check is required.
+                * Due to unknown reason TSO does not work on BCM5755M.
+                */
+               if (product != PCI_PRODUCT_BROADCOM_BCM5754 &&
+                   product != PCI_PRODUCT_BROADCOM_BCM5754M &&
+                   product != PCI_PRODUCT_BROADCOM_BCM5755M)
+                       sc->bge_flags |= BGE_FLAG_TSO;
+       }
+
+       /*
+        * Set various PHY quirk flags.
+        */
 
        if ((sc->bge_asicrev == BGE_ASICREV_BCM5700 ||
             sc->bge_asicrev == BGE_ASICREV_BCM5701) &&
            pci_get_subvendor(dev) == PCI_VENDOR_DELL)
-               sc->bge_phy_flags |= BGE_PHY_NO_3LED;
+               mii_priv |= BRGPHY_FLAG_NO_3LED;
 
        capmask = MII_CAPMASK_DEFAULT;
        if ((sc->bge_asicrev == BGE_ASICREV_BCM5703 &&
@@ -2064,24 +2183,27 @@ bge_attach(device_t dev)
                capmask &= ~BMSR_EXTSTAT;
        }
 
-       sc->bge_phy_flags |= BGE_PHY_WIRESPEED;
+       mii_priv |= BRGPHY_FLAG_WIRESPEED;
        if (sc->bge_asicrev == BGE_ASICREV_BCM5700 ||
            (sc->bge_asicrev == BGE_ASICREV_BCM5705 &&
             (sc->bge_chipid != BGE_CHIPID_BCM5705_A0 &&
              sc->bge_chipid != BGE_CHIPID_BCM5705_A1)) ||
            sc->bge_asicrev == BGE_ASICREV_BCM5906)
-               sc->bge_phy_flags &= ~BGE_PHY_WIRESPEED;
+               mii_priv &= ~BRGPHY_FLAG_WIRESPEED;
 
        if (sc->bge_chipid == BGE_CHIPID_BCM5701_A0 ||
            sc->bge_chipid == BGE_CHIPID_BCM5701_B0)
-               sc->bge_phy_flags |= BGE_PHY_CRC_BUG;
+               mii_priv |= BRGPHY_FLAG_CRC_BUG;
 
        if (sc->bge_chiprev == BGE_CHIPREV_5703_AX ||
            sc->bge_chiprev == BGE_CHIPREV_5704_AX)
-               sc->bge_phy_flags |= BGE_PHY_ADC_BUG;
+               mii_priv |= BRGPHY_FLAG_ADC_BUG;
 
        if (sc->bge_chipid == BGE_CHIPID_BCM5704_A0)
-               sc->bge_phy_flags |= BGE_PHY_5704_A0_BUG;
+               mii_priv |= BRGPHY_FLAG_5704_A0;
+
+       if (sc->bge_asicrev == BGE_ASICREV_BCM5906)
+               mii_priv |= BRGPHY_FLAG_5906;
 
        if (BGE_IS_5705_PLUS(sc) &&
            sc->bge_asicrev != BGE_ASICREV_BCM5906 &&
@@ -2095,76 +2217,60 @@ bge_attach(device_t dev)
                    sc->bge_asicrev == BGE_ASICREV_BCM5787) {
                        if (product != PCI_PRODUCT_BROADCOM_BCM5722 &&
                            product != PCI_PRODUCT_BROADCOM_BCM5756)
-                               sc->bge_phy_flags |= BGE_PHY_JITTER_BUG;
+                               mii_priv |= BRGPHY_FLAG_JITTER_BUG;
                        if (product == PCI_PRODUCT_BROADCOM_BCM5755M)
-                               sc->bge_phy_flags |= BGE_PHY_ADJUST_TRIM;
+                               mii_priv |= BRGPHY_FLAG_ADJUST_TRIM;
                } else {
-                       sc->bge_phy_flags |= BGE_PHY_BER_BUG;
+                       mii_priv |= BRGPHY_FLAG_BER_BUG;
                }
        }
 
-       /* Identify the chips that use an CPMU. */
-       if (sc->bge_asicrev == BGE_ASICREV_BCM5784 ||
-           sc->bge_asicrev == BGE_ASICREV_BCM5761 ||
-           sc->bge_asicrev == BGE_ASICREV_BCM5785 ||
-           sc->bge_asicrev == BGE_ASICREV_BCM57780)
-               sc->bge_flags |= BGE_FLAG_CPMU;
-       if (sc->bge_flags & BGE_FLAG_CPMU)
-               sc->bge_mi_mode = BGE_MIMODE_500KHZ_CONST;
-       else
-               sc->bge_mi_mode = BGE_MIMODE_BASE;
-
-       /* Enable auto polling for BCM570[0-5]. */
-       if (BGE_IS_5700_FAMILY(sc) || sc->bge_asicrev == BGE_ASICREV_BCM5705)
-               sc->bge_mi_mode |= BGE_MIMODE_AUTOPOLL;
-
-       /* Allocate interrupt */
-       rid = 0;
+       /*
+        * Allocate interrupt
+        */
+       msi_enable = bge_msi_enable;
+       if ((sc->bge_flags & BGE_FLAG_STATUS_TAG) == 0) {
+               /* If "tagged status" is disabled, don't enable MSI */
+               msi_enable = 0;
+       } else if (msi_enable) {
+               msi_enable = 0; /* Disable by default */
+               if (BGE_IS_575X_PLUS(sc)) {
+                       msi_enable = 1;
+                       /* XXX we filter all 5714 chips */
+                       if (sc->bge_asicrev == BGE_ASICREV_BCM5714 ||
+                           (sc->bge_asicrev == BGE_ASICREV_BCM5750 &&
+                            (sc->bge_chiprev == BGE_CHIPREV_5750_AX ||
+                             sc->bge_chiprev == BGE_CHIPREV_5750_BX)))
+                               msi_enable = 0;
+                       else if (BGE_IS_5755_PLUS(sc) ||
+                           sc->bge_asicrev == BGE_ASICREV_BCM5906)
+                               sc->bge_flags |= BGE_FLAG_ONESHOT_MSI;
+               }
+       }
+       if (msi_enable) {
+               if (pci_find_extcap(dev, PCIY_MSI, &sc->bge_msicap)) {
+                       device_printf(dev, "no MSI capability\n");
+                       msi_enable = 0;
+               }
+       }
 
-       sc->bge_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
-           RF_SHAREABLE | RF_ACTIVE);
+       sc->bge_irq_type = pci_alloc_1intr(dev, msi_enable, &sc->bge_irq_rid,
+           &intr_flags);
 
+       sc->bge_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->bge_irq_rid,
+           intr_flags);
        if (sc->bge_irq == NULL) {
                device_printf(dev, "couldn't map interrupt\n");
                error = ENXIO;
                goto fail;
        }
 
-       /*
-        * Check if this is a PCI-X or PCI Express device.
-        */
-       if (BGE_IS_5705_PLUS(sc)) {
-               if (pci_is_pcie(dev)) {
-                       sc->bge_flags |= BGE_FLAG_PCIE;
-                       pcie_set_max_readrq(dev, PCIEM_DEVCTL_MAX_READRQ_4096);
-               }
-       } else {
-               /*
-                * Check if the device is in PCI-X Mode.
-                * (This bit is not valid on PCI Express controllers.)
-                */
-               if ((pci_read_config(sc->bge_dev, BGE_PCI_PCISTATE, 4) &
-                   BGE_PCISTATE_PCI_BUSMODE) == 0) {
-                       sc->bge_flags |= BGE_FLAG_PCIX;
-                       sc->bge_pcixcap = pci_get_pcixcap_ptr(sc->bge_dev);
-               }
-       }
-
-       device_printf(dev, "CHIP ID 0x%08x; "
-                     "ASIC REV 0x%02x; CHIP REV 0x%02x; %s\n",
-                     sc->bge_chipid, sc->bge_asicrev, sc->bge_chiprev,
-                     (sc->bge_flags & BGE_FLAG_PCIX) ? "PCI-X"
-                     : ((sc->bge_flags & BGE_FLAG_PCIE) ?
-                       "PCI-E" : "PCI"));
-
-       /*
-        * The 40bit DMA bug applies to the 5714/5715 controllers and is
-        * not actually a MAC controller bug but an issue with the embedded
-        * PCIe to PCI-X bridge in the device. Use 40bit DMA workaround.
-        */
-       if (BGE_IS_5714_FAMILY(sc) && (sc->bge_flags & BGE_FLAG_PCIX))
-               sc->bge_flags |= BGE_FLAG_MAXADDR_40BIT;
+       if (sc->bge_irq_type == PCI_INTR_TYPE_MSI)
+               bge_enable_msi(sc);
+       else
+               sc->bge_flags &= ~BGE_FLAG_ONESHOT_MSI;
 
+       /* Initialize if_name earlier, so if_printf could be used */
        ifp = &sc->arpcom.ac_if;
        if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 
@@ -2198,10 +2304,30 @@ bge_attach(device_t dev)
 
        /* Set default tuneable values. */
        sc->bge_stat_ticks = BGE_TICKS_PER_SEC;
-       sc->bge_rx_coal_ticks = bge_rx_coal_ticks;
-       sc->bge_tx_coal_ticks = bge_tx_coal_ticks;
-       sc->bge_rx_max_coal_bds = bge_rx_max_coal_bds;
-       sc->bge_tx_max_coal_bds = bge_tx_max_coal_bds;
+       sc->bge_rx_coal_ticks = BGE_RX_COAL_TICKS_DEF;
+       sc->bge_tx_coal_ticks = BGE_TX_COAL_TICKS_DEF;
+       sc->bge_rx_coal_bds = BGE_RX_COAL_BDS_DEF;
+       sc->bge_tx_coal_bds = BGE_TX_COAL_BDS_DEF;
+       if (sc->bge_flags & BGE_FLAG_STATUS_TAG) {
+               sc->bge_rx_coal_ticks_int = BGE_RX_COAL_TICKS_DEF;
+               sc->bge_tx_coal_ticks_int = BGE_TX_COAL_TICKS_DEF;
+               sc->bge_rx_coal_bds_int = BGE_RX_COAL_BDS_DEF;
+               sc->bge_tx_coal_bds_int = BGE_TX_COAL_BDS_DEF;
+       } else {
+               sc->bge_rx_coal_ticks_int = BGE_RX_COAL_TICKS_MIN;
+               sc->bge_tx_coal_ticks_int = BGE_TX_COAL_TICKS_MIN;
+               sc->bge_rx_coal_bds_int = BGE_RX_COAL_BDS_MIN;
+               sc->bge_tx_coal_bds_int = BGE_TX_COAL_BDS_MIN;
+       }
+
+       /* Set up TX spare and reserved descriptor count */
+       if (sc->bge_flags & BGE_FLAG_TSO) {
+               sc->bge_txspare = BGE_NSEG_SPARE_TSO;
+               sc->bge_txrsvd = BGE_NSEG_RSVD_TSO;
+       } else {
+               sc->bge_txspare = BGE_NSEG_SPARE;
+               sc->bge_txrsvd = BGE_NSEG_RSVD;
+       }
 
        /* Set up ifnet structure */
        ifp->if_softc = sc;
@@ -2224,7 +2350,11 @@ bge_attach(device_t dev)
         */
        if (sc->bge_chipid != BGE_CHIPID_BCM5700_B0) {
                ifp->if_capabilities |= IFCAP_HWCSUM;
-               ifp->if_hwassist = BGE_CSUM_FEATURES;
+               ifp->if_hwassist |= BGE_CSUM_FEATURES;
+       }
+       if (sc->bge_flags & BGE_FLAG_TSO) {
+               ifp->if_capabilities |= IFCAP_TSO;
+               ifp->if_hwassist |= CSUM_TSO;
        }
        ifp->if_capenable = ifp->if_capabilities;
 
@@ -2237,9 +2367,9 @@ bge_attach(device_t dev)
         * by its PCI subsystem ID, as we do below for the SysKonnect
         * SK-9D41.
         */
-       if (bge_readmem_ind(sc, BGE_SOFTWARE_GENCOMM_SIG) == BGE_MAGIC_NUMBER)
+       if (bge_readmem_ind(sc, BGE_SOFTWARE_GENCOMM_SIG) == BGE_MAGIC_NUMBER) {
                hwcfg = bge_readmem_ind(sc, BGE_SOFTWARE_GENCOMM_NICCFG);
-       else {
+       else {
                if (bge_read_eeprom(sc, (caddr_t)&hwcfg, BGE_EE_HWCFG_OFFSET,
                                    sizeof(hwcfg))) {
                        device_printf(dev, "failed to read EEPROM\n");
@@ -2258,6 +2388,32 @@ bge_attach(device_t dev)
                        sc->bge_flags |= BGE_FLAG_TBI;
        }
 
+       /* Setup MI MODE */
+       if (sc->bge_flags & BGE_FLAG_CPMU)
+               sc->bge_mi_mode = BGE_MIMODE_500KHZ_CONST;
+       else
+               sc->bge_mi_mode = BGE_MIMODE_BASE;
+       if (BGE_IS_5700_FAMILY(sc) || sc->bge_asicrev == BGE_ASICREV_BCM5705) {
+               /* Enable auto polling for BCM570[0-5]. */
+               sc->bge_mi_mode |= BGE_MIMODE_AUTOPOLL;
+       }
+
+       /* Setup link status update stuffs */
+       if (sc->bge_asicrev == BGE_ASICREV_BCM5700 &&
+           sc->bge_chipid != BGE_CHIPID_BCM5700_B2) {
+               sc->bge_link_upd = bge_bcm5700_link_upd;
+               sc->bge_link_chg = BGE_MACSTAT_MI_INTERRUPT;
+       } else if (sc->bge_flags & BGE_FLAG_TBI) {
+               sc->bge_link_upd = bge_tbi_link_upd;
+               sc->bge_link_chg = BGE_MACSTAT_LINK_CHANGED;
+       } else if (sc->bge_mi_mode & BGE_MIMODE_AUTOPOLL) {
+               sc->bge_link_upd = bge_autopoll_link_upd;
+               sc->bge_link_chg = BGE_MACSTAT_LINK_CHANGED;
+       } else {
+               sc->bge_link_upd = bge_copper_link_upd;
+               sc->bge_link_chg = BGE_MACSTAT_LINK_CHANGED;
+       }
+
        /*
         * Broadcom's own driver always assumes the internal
         * PHY is at GMII address 1.  On some chips, the PHY responds
@@ -2284,6 +2440,8 @@ bge_attach(device_t dev)
                mii_probe_args_init(&mii_args, bge_ifmedia_upd, bge_ifmedia_sts);
                mii_args.mii_probemask = 1 << sc->bge_phyno;
                mii_args.mii_capmask = capmask;
+               mii_args.mii_privtag = MII_PRIVTAG_BRGPHY;
+               mii_args.mii_priv = mii_priv;
 
                error = mii_probe(dev, &sc->bge_miibus, &mii_args);
                if (error) {
@@ -2292,33 +2450,6 @@ bge_attach(device_t dev)
                }
        }
 
-       /*
-        * When using the BCM5701 in PCI-X mode, data corruption has
-        * been observed in the first few bytes of some received packets.
-        * Aligning the packet buffer in memory eliminates the corruption.
-        * Unfortunately, this misaligns the packet payloads.  On platforms
-        * which do not support unaligned accesses, we will realign the
-        * payloads by copying the received packets.
-        */
-       if (sc->bge_asicrev == BGE_ASICREV_BCM5701 &&
-           (sc->bge_flags & BGE_FLAG_PCIX))
-               sc->bge_flags |= BGE_FLAG_RX_ALIGNBUG;
-
-       if (sc->bge_asicrev == BGE_ASICREV_BCM5700 &&
-           sc->bge_chipid != BGE_CHIPID_BCM5700_B2) {
-               sc->bge_link_upd = bge_bcm5700_link_upd;
-               sc->bge_link_chg = BGE_MACSTAT_MI_INTERRUPT;
-       } else if (sc->bge_flags & BGE_FLAG_TBI) {
-               sc->bge_link_upd = bge_tbi_link_upd;
-               sc->bge_link_chg = BGE_MACSTAT_LINK_CHANGED;
-       } else if (sc->bge_mi_mode & BGE_MIMODE_AUTOPOLL) {
-               sc->bge_link_upd = bge_autopoll_link_upd;
-               sc->bge_link_chg = BGE_MACSTAT_LINK_CHANGED;
-       } else {
-               sc->bge_link_upd = bge_copper_link_upd;
-               sc->bge_link_chg = BGE_MACSTAT_LINK_CHANGED;
-       }
-
        /*
         * Create sysctl nodes.
         */
@@ -2348,17 +2479,16 @@ bge_attach(device_t dev)
                        "Transmit coalescing ticks (usec).");
        SYSCTL_ADD_PROC(&sc->bge_sysctl_ctx,
                        SYSCTL_CHILDREN(sc->bge_sysctl_tree),
-                       OID_AUTO, "rx_max_coal_bds",
+                       OID_AUTO, "rx_coal_bds",
                        CTLTYPE_INT | CTLFLAG_RW,
-                       sc, 0, bge_sysctl_rx_max_coal_bds, "I",
+                       sc, 0, bge_sysctl_rx_coal_bds, "I",
                        "Receive max coalesced BD count.");
        SYSCTL_ADD_PROC(&sc->bge_sysctl_ctx,
                        SYSCTL_CHILDREN(sc->bge_sysctl_tree),
-                       OID_AUTO, "tx_max_coal_bds",
+                       OID_AUTO, "tx_coal_bds",
                        CTLTYPE_INT | CTLFLAG_RW,
-                       sc, 0, bge_sysctl_tx_max_coal_bds, "I",
+                       sc, 0, bge_sysctl_tx_coal_bds, "I",
                        "Transmit max coalesced BD count.");
-
        if (sc->bge_flags & BGE_FLAG_PCIE) {
                /*
                 * A common design characteristic for many Broadcom
@@ -2382,15 +2512,53 @@ bge_attach(device_t dev)
                               &sc->bge_force_defrag, 0,
                               "Force defragment on TX path");
        }
+       if (sc->bge_flags & BGE_FLAG_STATUS_TAG) {
+               if (!BGE_IS_5705_PLUS(sc)) {
+                       SYSCTL_ADD_PROC(&sc->bge_sysctl_ctx,
+                           SYSCTL_CHILDREN(sc->bge_sysctl_tree), OID_AUTO,
+                           "rx_coal_ticks_int", CTLTYPE_INT | CTLFLAG_RW,
+                           sc, 0, bge_sysctl_rx_coal_ticks_int, "I",
+                           "Receive coalescing ticks "
+                           "during interrupt (usec).");
+                       SYSCTL_ADD_PROC(&sc->bge_sysctl_ctx,
+                           SYSCTL_CHILDREN(sc->bge_sysctl_tree), OID_AUTO,
+                           "tx_coal_ticks_int", CTLTYPE_INT | CTLFLAG_RW,
+                           sc, 0, bge_sysctl_tx_coal_ticks_int, "I",
+                           "Transmit coalescing ticks "
+                           "during interrupt (usec).");
+               }
+               SYSCTL_ADD_PROC(&sc->bge_sysctl_ctx,
+                   SYSCTL_CHILDREN(sc->bge_sysctl_tree), OID_AUTO,
+                   "rx_coal_bds_int", CTLTYPE_INT | CTLFLAG_RW,
+                   sc, 0, bge_sysctl_rx_coal_bds_int, "I",
+                   "Receive max coalesced BD count during interrupt.");
+               SYSCTL_ADD_PROC(&sc->bge_sysctl_ctx,
+                   SYSCTL_CHILDREN(sc->bge_sysctl_tree), OID_AUTO,
+                   "tx_coal_bds_int", CTLTYPE_INT | CTLFLAG_RW,
+                   sc, 0, bge_sysctl_tx_coal_bds_int, "I",
+                   "Transmit max coalesced BD count during interrupt.");
+       }
 
        /*
         * Call MI attach routine.
         */
        ether_ifattach(ifp, ether_addr, NULL);
 
-       error = bus_setup_intr(dev, sc->bge_irq, INTR_MPSAFE,
-                              bge_intr, sc, &sc->bge_intrhand, 
-                              ifp->if_serializer);
+       if (sc->bge_irq_type == PCI_INTR_TYPE_MSI) {
+               if (sc->bge_flags & BGE_FLAG_ONESHOT_MSI) {
+                       intr_func = bge_msi_oneshot;
+                       if (bootverbose)
+                               device_printf(dev, "oneshot MSI\n");
+               } else {
+                       intr_func = bge_msi;
+               }
+       } else if (sc->bge_flags & BGE_FLAG_STATUS_TAG) {
+               intr_func = bge_intr_legacy;
+       } else {
+               intr_func = bge_intr_crippled;
+       }
+       error = bus_setup_intr(dev, sc->bge_irq, INTR_MPSAFE, intr_func, sc,
+           &sc->bge_intrhand, ifp->if_serializer);
        if (error) {
                ether_ifdetach(ifp);
                device_printf(dev, "couldn't set up irq\n");
@@ -2429,12 +2597,17 @@ bge_detach(device_t dev)
                device_delete_child(dev, sc->bge_miibus);
        bus_generic_detach(dev);
 
-        if (sc->bge_irq != NULL)
-               bus_release_resource(dev, SYS_RES_IRQ, 0, sc->bge_irq);
+       if (sc->bge_irq != NULL) {
+               bus_release_resource(dev, SYS_RES_IRQ, sc->bge_irq_rid,
+                   sc->bge_irq);
+       }
+       if (sc->bge_irq_type == PCI_INTR_TYPE_MSI)
+               pci_release_msi(dev);
 
-        if (sc->bge_res != NULL)
+       if (sc->bge_res != NULL) {
                bus_release_resource(dev, SYS_RES_MEMORY,
                    BGE_PCI_BAR0, sc->bge_res);
+       }
 
        if (sc->bge_sysctl_tree != NULL)
                sysctl_ctx_free(&sc->bge_sysctl_ctx);
@@ -2471,7 +2644,8 @@ bge_reset(struct bge_softc *sc)
 
        pci_write_config(dev, BGE_PCI_MISC_CTL,
            BGE_PCIMISCCTL_INDIRECT_ACCESS|BGE_PCIMISCCTL_MASK_PCI_INTR|
-           BGE_HIF_SWAP_OPTIONS|BGE_PCIMISCCTL_PCISTATE_RW, 4);
+           BGE_HIF_SWAP_OPTIONS|BGE_PCIMISCCTL_PCISTATE_RW|
+           sc->bge_pci_miscctl, 4);
 
        /* Disable fastboot on controllers that support it. */
        if (sc->bge_asicrev == BGE_ASICREV_BCM5752 ||
@@ -2492,8 +2666,14 @@ bge_reset(struct bge_softc *sc)
 
        /* XXX: Broadcom Linux driver. */
        if (sc->bge_flags & BGE_FLAG_PCIE) {
-               if (CSR_READ_4(sc, 0x7e2c) == 0x60)     /* PCIE 1.0 */
-                       CSR_WRITE_4(sc, 0x7e2c, 0x20);
+               /* Force PCI-E 1.0a mode */
+               if (sc->bge_asicrev != BGE_ASICREV_BCM5785 &&
+                   CSR_READ_4(sc, BGE_PCIE_PHY_TSTCTL) ==
+                   (BGE_PCIE_PHY_TSTCTL_PSCRAM |
+                    BGE_PCIE_PHY_TSTCTL_PCIE10)) {
+                       CSR_WRITE_4(sc, BGE_PCIE_PHY_TSTCTL,
+                           BGE_PCIE_PHY_TSTCTL_PSCRAM);
+               }
                if (sc->bge_chipid != BGE_CHIPID_BCM5750_A0) {
                        /* Prevent PCIE link training during global reset */
                        CSR_WRITE_4(sc, BGE_MISC_CFG, (1<<29));
@@ -2505,7 +2685,7 @@ bge_reset(struct bge_softc *sc)
         * Set GPHY Power Down Override to leave GPHY
         * powered up in D0 uninitialized.
         */
-       if (BGE_IS_5705_PLUS(sc))
+       if (BGE_IS_5705_PLUS(sc) && (sc->bge_flags & BGE_FLAG_CPMU) == 0)
                reset |= BGE_MISCCFG_GPHY_PD_OVERRIDE;
 
        /* Issue global reset */
@@ -2526,6 +2706,8 @@ bge_reset(struct bge_softc *sc)
 
        /* XXX: Broadcom Linux driver. */
        if (sc->bge_flags & BGE_FLAG_PCIE) {
+               uint16_t devctl;
+
                if (sc->bge_chipid == BGE_CHIPID_BCM5750_A0) {
                        uint32_t v;
 
@@ -2533,17 +2715,35 @@ bge_reset(struct bge_softc *sc)
                        v = pci_read_config(dev, 0xc4, 4);
                        pci_write_config(dev, 0xc4, v | (1<<15), 4);
                }
-               /*
-                * Set PCIE max payload size to 128 bytes and
-                * clear error status.
-                */
-               pci_write_config(dev, 0xd8, 0xf5000, 4);
+
+               devctl = pci_read_config(dev,
+                   sc->bge_pciecap + PCIER_DEVCTRL, 2);
+
+               /* Disable no snoop and disable relaxed ordering. */
+               devctl &= ~(PCIEM_DEVCTL_RELAX_ORDER | PCIEM_DEVCTL_NOSNOOP);
+
+               /* Old PCI-E chips only support 128 bytes Max PayLoad Size. */
+               if ((sc->bge_flags & BGE_FLAG_CPMU) == 0) {
+                       devctl &= ~PCIEM_DEVCTL_MAX_PAYLOAD_MASK;
+                       devctl |= PCIEM_DEVCTL_MAX_PAYLOAD_128;
+               }
+
+               pci_write_config(dev, sc->bge_pciecap + PCIER_DEVCTRL,
+                   devctl, 2);
+
+               /* Clear error status. */
+               pci_write_config(dev, sc->bge_pciecap + PCIER_DEVSTS,
+                   PCIEM_DEVSTS_CORR_ERR |
+                   PCIEM_DEVSTS_NFATAL_ERR |
+                   PCIEM_DEVSTS_FATAL_ERR |
+                   PCIEM_DEVSTS_UNSUPP_REQ, 2);
        }
 
        /* Reset some of the PCI state that got zapped by reset */
        pci_write_config(dev, BGE_PCI_MISC_CTL,
            BGE_PCIMISCCTL_INDIRECT_ACCESS|BGE_PCIMISCCTL_MASK_PCI_INTR|
-           BGE_HIF_SWAP_OPTIONS|BGE_PCIMISCCTL_PCISTATE_RW, 4);
+           BGE_HIF_SWAP_OPTIONS|BGE_PCIMISCCTL_PCISTATE_RW|
+           sc->bge_pci_miscctl, 4);
        pci_write_config(dev, BGE_PCI_CACHESZ, cachesize, 4);
        pci_write_config(dev, BGE_PCI_CMD, command, 4);
        write_op(sc, BGE_MISC_CFG, (65 << 1));
@@ -2571,10 +2771,21 @@ bge_reset(struct bge_softc *sc)
                    devctl, 2);
        }
 
-       /* Enable memory arbiter. */
+       /*
+        * Enable memory arbiter and re-enable MSI if necessary.
+        */
        if (BGE_IS_5714_FAMILY(sc)) {
                uint32_t val;
 
+               if (sc->bge_irq_type == PCI_INTR_TYPE_MSI) {
+                       /*
+                        * Resetting BCM5714 family will clear MSI
+                        * enable bit; restore it after resetting.
+                        */
+                       PCI_SETBIT(sc->bge_dev, sc->bge_msicap + PCIR_MSI_CTRL,
+                           PCIM_MSICTRL_MSI_ENABLE, 2);
+                       BGE_SETBIT(sc, BGE_MSI_MODE, BGE_MSIMODE_ENABLE);
+               }
                val = CSR_READ_4(sc, BGE_MARB_MODE);
                CSR_WRITE_4(sc, BGE_MARB_MODE, BGE_MARBMODE_ENABLE | val);
        } else {
@@ -2607,7 +2818,6 @@ bge_reset(struct bge_softc *sc)
                if (i == BGE_FIRMWARE_TIMEOUT) {
                        if_printf(&sc->arpcom.ac_if, "firmware handshake "
                                  "timed out, found 0x%08x\n", val);
-                       return;
                }
        }
 
@@ -2652,8 +2862,8 @@ bge_reset(struct bge_softc *sc)
                uint32_t v;
 
                /* Enable Data FIFO protection. */
-               v = CSR_READ_4(sc, 0x7c00);
-               CSR_WRITE_4(sc, 0x7c00, v | (1<<25));
+               v = CSR_READ_4(sc, BGE_PCIE_TLDLPL_PORT);
+               CSR_WRITE_4(sc, BGE_PCIE_TLDLPL_PORT, v | (1 << 25));
        }
 
        DELAY(10000);
@@ -2669,19 +2879,14 @@ bge_reset(struct bge_softc *sc)
  */
 
 static void
-bge_rxeof(struct bge_softc *sc)
+bge_rxeof(struct bge_softc *sc, uint16_t rx_prod)
 {
        struct ifnet *ifp;
        int stdcnt = 0, jumbocnt = 0;
 
-       if (sc->bge_rx_saved_considx ==
-           sc->bge_ldata.bge_status_block->bge_idx[0].bge_rx_prod_idx)
-               return;
-
        ifp = &sc->arpcom.ac_if;
 
-       while (sc->bge_rx_saved_considx !=
-              sc->bge_ldata.bge_status_block->bge_idx[0].bge_rx_prod_idx) {
+       while (sc->bge_rx_saved_considx != rx_prod) {
                struct bge_rx_bd        *cur_rx;
                uint32_t                rxidx;
                struct mbuf             *m = NULL;
@@ -2772,7 +2977,7 @@ bge_rxeof(struct bge_softc *sc)
                                        m->m_pkthdr.csum_flags |= CSUM_IP_VALID;
                        }
                        if ((cur_rx->bge_flags & BGE_RXBDFLAG_TCP_UDP_CSUM) &&
-                           m->m_pkthdr.len >= BGE_MIN_FRAME) {
+                           m->m_pkthdr.len >= BGE_MIN_FRAMELEN) {
                                m->m_pkthdr.csum_data =
                                        cur_rx->bge_tcp_udp_csum;
                                m->m_pkthdr.csum_flags |=
@@ -2800,30 +3005,22 @@ bge_rxeof(struct bge_softc *sc)
 }
 
 static void
-bge_txeof(struct bge_softc *sc)
+bge_txeof(struct bge_softc *sc, uint16_t tx_cons)
 {
-       struct bge_tx_bd *cur_tx = NULL;
        struct ifnet *ifp;
 
-       if (sc->bge_tx_saved_considx ==
-           sc->bge_ldata.bge_status_block->bge_idx[0].bge_tx_cons_idx)
-               return;
-
        ifp = &sc->arpcom.ac_if;
 
        /*
         * Go through our tx ring and free mbufs for those
         * frames that have been sent.
         */
-       while (sc->bge_tx_saved_considx !=
-              sc->bge_ldata.bge_status_block->bge_idx[0].bge_tx_cons_idx) {
+       while (sc->bge_tx_saved_considx != tx_cons) {
                uint32_t idx = 0;
 
                idx = sc->bge_tx_saved_considx;
-               cur_tx = &sc->bge_ldata.bge_tx_ring[idx];
-               if (cur_tx->bge_flags & BGE_TXBDFLAG_END)
-                       ifp->if_opackets++;
                if (sc->bge_cdata.bge_tx_chain[idx] != NULL) {
+                       ifp->if_opackets++;
                        bus_dmamap_unload(sc->bge_cdata.bge_tx_mtag,
                            sc->bge_cdata.bge_tx_dmamap[idx]);
                        m_freem(sc->bge_cdata.bge_tx_chain[idx]);
@@ -2834,9 +3031,8 @@ bge_txeof(struct bge_softc *sc)
                logif(tx_pkt);
        }
 
-       if (cur_tx != NULL &&
-           (BGE_TX_RING_CNT - sc->bge_txcnt) >=
-           (BGE_NSEG_RSVD + BGE_NSEG_SPARE))
+       if ((BGE_TX_RING_CNT - sc->bge_txcnt) >=
+           (sc->bge_txrsvd + sc->bge_txspare))
                ifp->if_flags &= ~IFF_OACTIVE;
 
        if (sc->bge_txcnt == 0)
@@ -2852,7 +3048,8 @@ static void
 bge_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
 {
        struct bge_softc *sc = ifp->if_softc;
-       uint32_t status;
+       struct bge_status_block *sblk = sc->bge_ldata.bge_status_block;
+       uint16_t rx_prod, tx_cons;
 
        switch(cmd) {
        case POLL_REGISTER:
@@ -2865,16 +3062,27 @@ bge_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
                /*
                 * Process link state changes.
                 */
-               status = CSR_READ_4(sc, BGE_MAC_STS);
-               if ((status & sc->bge_link_chg) || sc->bge_link_evt) {
-                       sc->bge_link_evt = 0;
-                       sc->bge_link_upd(sc, status);
-               }
-               /* fall through */
+               bge_link_poll(sc);
+               /* Fall through */
        case POLL_ONLY:
+               if (sc->bge_flags & BGE_FLAG_STATUS_TAG) {
+                       sc->bge_status_tag = sblk->bge_status_tag;
+                       /*
+                        * Use a load fence to ensure that status_tag
+                        * is saved  before rx_prod and tx_cons.
+                        */
+                       cpu_lfence();
+               }
+               rx_prod = sblk->bge_idx[0].bge_rx_prod_idx;
+               tx_cons = sblk->bge_idx[0].bge_tx_cons_idx;
                if (ifp->if_flags & IFF_RUNNING) {
-                       bge_rxeof(sc);
-                       bge_txeof(sc);
+                       rx_prod = sblk->bge_idx[0].bge_rx_prod_idx;
+                       if (sc->bge_rx_saved_considx != rx_prod)
+                               bge_rxeof(sc, rx_prod);
+
+                       tx_cons = sblk->bge_idx[0].bge_tx_cons_idx;
+                       if (sc->bge_tx_saved_considx != tx_cons)
+                               bge_txeof(sc, tx_cons);
                }
                break;
        }
@@ -2883,11 +3091,10 @@ bge_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
 #endif
 
 static void
-bge_intr(void *xsc)
+bge_intr_crippled(void *xsc)
 {
        struct bge_softc *sc = xsc;
        struct ifnet *ifp = &sc->arpcom.ac_if;
-       uint32_t status;
 
        logif(intr);
 
@@ -2916,20 +3123,98 @@ bge_intr(void *xsc)
        /*
         * Process link state changes.
         */
-       status = CSR_READ_4(sc, BGE_MAC_STS);
-       if ((status & sc->bge_link_chg) || sc->bge_link_evt) {
-               sc->bge_link_evt = 0;
-               sc->bge_link_upd(sc, status);
+       bge_link_poll(sc);
+
+       if (ifp->if_flags & IFF_RUNNING) {
+               struct bge_status_block *sblk = sc->bge_ldata.bge_status_block;
+               uint16_t rx_prod, tx_cons;
+
+               rx_prod = sblk->bge_idx[0].bge_rx_prod_idx;
+               if (sc->bge_rx_saved_considx != rx_prod)
+                       bge_rxeof(sc, rx_prod);
+
+               tx_cons = sblk->bge_idx[0].bge_tx_cons_idx;
+               if (sc->bge_tx_saved_considx != tx_cons)
+                       bge_txeof(sc, tx_cons);
        }
 
+       if (sc->bge_coal_chg)
+               bge_coal_change(sc);
+}
+
+static void
+bge_intr_legacy(void *xsc)
+{
+       struct bge_softc *sc = xsc;
+       struct bge_status_block *sblk = sc->bge_ldata.bge_status_block;
+
+       if (sc->bge_status_tag == sblk->bge_status_tag) {
+               uint32_t val;
+
+               val = pci_read_config(sc->bge_dev, BGE_PCI_PCISTATE, 4);
+               if (val & BGE_PCISTAT_INTR_NOTACT)
+                       return;
+       }
+
+       /*
+        * NOTE:
+        * Interrupt will have to be disabled if tagged status
+        * is used, else interrupt will always be asserted on
+        * certain chips (at least on BCM5750 AX/BX).
+        */
+       bge_writembx(sc, BGE_MBX_IRQ0_LO, 1);
+
+       bge_intr(sc);
+}
+
+static void
+bge_msi(void *xsc)
+{
+       struct bge_softc *sc = xsc;
+
+       /* Disable interrupt first */
+       bge_writembx(sc, BGE_MBX_IRQ0_LO, 1);
+       bge_intr(sc);
+}
+
+static void
+bge_msi_oneshot(void *xsc)
+{
+       bge_intr(xsc);
+}
+
+static void
+bge_intr(struct bge_softc *sc)
+{
+       struct ifnet *ifp = &sc->arpcom.ac_if;
+       struct bge_status_block *sblk = sc->bge_ldata.bge_status_block;
+       uint16_t rx_prod, tx_cons;
+       uint32_t status;
+
+       sc->bge_status_tag = sblk->bge_status_tag;
+       /*
+        * Use a load fence to ensure that status_tag is saved 
+        * before rx_prod, tx_cons and status.
+        */
+       cpu_lfence();
+
+       rx_prod = sblk->bge_idx[0].bge_rx_prod_idx;
+       tx_cons = sblk->bge_idx[0].bge_tx_cons_idx;
+       status = sblk->bge_status;
+
+       if ((status & BGE_STATFLAG_LINKSTATE_CHANGED) || sc->bge_link_evt)
+               bge_link_poll(sc);
+
        if (ifp->if_flags & IFF_RUNNING) {
-               /* Check RX return ring producer/consumer */
-               bge_rxeof(sc);
+               if (sc->bge_rx_saved_considx != rx_prod)
+                       bge_rxeof(sc, rx_prod);
 
-               /* Check TX ring producer/consumer */
-               bge_txeof(sc);
+               if (sc->bge_tx_saved_considx != tx_cons)
+                       bge_txeof(sc, tx_cons);
        }
 
+       bge_writembx(sc, BGE_MBX_IRQ0_LO, sc->bge_status_tag << 24);
+
        if (sc->bge_coal_chg)
                bge_coal_change(sc);
 }
@@ -2954,8 +3239,7 @@ bge_tick(void *xsc)
                 * and trigger interrupt.
                 */
                sc->bge_link_evt++;
-               if (sc->bge_asicrev == BGE_ASICREV_BCM5700 ||
-                   BGE_IS_5788(sc))
+               if (BGE_IS_CRIPPLED(sc))
                        BGE_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_INTR_SET);
                else
                        BGE_SETBIT(sc, BGE_HCC_MODE, BGE_HCCMODE_COAL_NOW);
@@ -3031,14 +3315,19 @@ bge_stats_update(struct bge_softc *sc)
 static int
 bge_encap(struct bge_softc *sc, struct mbuf **m_head0, uint32_t *txidx)
 {
-       struct bge_tx_bd *d = NULL;
-       uint16_t csum_flags = 0;
+       struct bge_tx_bd *d = NULL, *last_d;
+       uint16_t csum_flags = 0, mss = 0;
        bus_dma_segment_t segs[BGE_NSEG_NEW];
        bus_dmamap_t map;
        int error, maxsegs, nsegs, idx, i;
        struct mbuf *m_head = *m_head0, *m_new;
 
-       if (m_head->m_pkthdr.csum_flags) {
+       if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
+               error = bge_setup_tso(sc, m_head0, &mss, &csum_flags);
+               if (error)
+                       return ENOBUFS;
+               m_head = *m_head0;
+       } else if (m_head->m_pkthdr.csum_flags & BGE_CSUM_FEATURES) {
                if (m_head->m_pkthdr.csum_flags & CSUM_IP)
                        csum_flags |= BGE_TXBDFLAG_IP_CSUM;
                if (m_head->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))
@@ -3052,16 +3341,16 @@ bge_encap(struct bge_softc *sc, struct mbuf **m_head0, uint32_t *txidx)
        idx = *txidx;
        map = sc->bge_cdata.bge_tx_dmamap[idx];
 
-       maxsegs = (BGE_TX_RING_CNT - sc->bge_txcnt) - BGE_NSEG_RSVD;
-       KASSERT(maxsegs >= BGE_NSEG_SPARE,
+       maxsegs = (BGE_TX_RING_CNT - sc->bge_txcnt) - sc->bge_txrsvd;
+       KASSERT(maxsegs >= sc->bge_txspare,
                ("not enough segments %d", maxsegs));
 
        if (maxsegs > BGE_NSEG_NEW)
                maxsegs = BGE_NSEG_NEW;
 
        /*
-        * Pad outbound frame to BGE_MIN_FRAME for an unusual reason.
-        * The bge hardware will pad out Tx runts to BGE_MIN_FRAME,
+        * Pad outbound frame to BGE_MIN_FRAMELEN for an unusual reason.
+        * The bge hardware will pad out Tx runts to BGE_MIN_FRAMELEN,
         * but when such padded frames employ the bge IP/TCP checksum
         * offload, the hardware checksum assist gives incorrect results
         * (possibly from incorporating its own padding into the UDP/TCP
@@ -3069,8 +3358,8 @@ bge_encap(struct bge_softc *sc, struct mbuf **m_head0, uint32_t *txidx)
         * onboard checksum comes out correct.
         */
        if ((csum_flags & BGE_TXBDFLAG_TCP_UDP_CSUM) &&
-           m_head->m_pkthdr.len < BGE_MIN_FRAME) {
-               error = m_devpad(m_head, BGE_MIN_FRAME);
+           m_head->m_pkthdr.len < BGE_MIN_FRAMELEN) {
+               error = m_devpad(m_head, BGE_MIN_FRAMELEN);
                if (error)
                        goto back;
        }
@@ -3083,7 +3372,8 @@ bge_encap(struct bge_softc *sc, struct mbuf **m_head0, uint32_t *txidx)
                }
                *m_head0 = m_head = m_new;
        }
-       if (sc->bge_force_defrag && (sc->bge_flags & BGE_FLAG_PCIE) &&
+       if ((m_head->m_pkthdr.csum_flags & CSUM_TSO) == 0 &&
+           sc->bge_force_defrag && (sc->bge_flags & BGE_FLAG_PCIE) &&
            m_head->m_next != NULL) {
                /*
                 * Forcefully defragment mbuf chain to overcome hardware
@@ -3111,13 +3401,13 @@ bge_encap(struct bge_softc *sc, struct mbuf **m_head0, uint32_t *txidx)
                d->bge_addr.bge_addr_hi = BGE_ADDR_HI(segs[i].ds_addr);
                d->bge_len = segs[i].ds_len;
                d->bge_flags = csum_flags;
+               d->bge_mss = mss;
 
                if (i == nsegs - 1)
                        break;
                BGE_INC(idx, BGE_TX_RING_CNT);
        }
-       /* Mark the last segment as end of packet... */
-       d->bge_flags |= BGE_TXBDFLAG_END;
+       last_d = d;
 
        /* Set vlan tag to the first segment of the packet. */
        d = &sc->bge_ldata.bge_tx_ring[*txidx];
@@ -3128,6 +3418,9 @@ bge_encap(struct bge_softc *sc, struct mbuf **m_head0, uint32_t *txidx)
                d->bge_vlan_tag = 0;
        }
 
+       /* Mark the last segment as end of packet... */
+       last_d->bge_flags |= BGE_TXBDFLAG_END;
+
        /*
         * Insure that the map for this transmission is placed at
         * the array index of the last descriptor in this chain.
@@ -3186,7 +3479,7 @@ bge_start(struct ifnet *ifp)
                if ((m_head->m_flags & M_FIRSTFRAG) &&
                    (m_head->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
                        if ((BGE_TX_RING_CNT - sc->bge_txcnt) <
-                           m_head->m_pkthdr.csum_data + BGE_NSEG_RSVD) {
+                           m_head->m_pkthdr.csum_data + sc->bge_txrsvd) {
                                ifp->if_flags |= IFF_OACTIVE;
                                ifq_prepend(&ifp->if_snd, m_head);
                                break;
@@ -3194,13 +3487,13 @@ bge_start(struct ifnet *ifp)
                }
 
                /*
-                * Sanity check: avoid coming within BGE_NSEG_RSVD
+                * Sanity check: avoid coming within bge_txrsvd
                 * descriptors of the end of the ring.  Also make
-                * sure there are BGE_NSEG_SPARE descriptors for
+                * sure there are bge_txspare descriptors for
                 * jumbo buffers' defragmentation.
                 */
                if ((BGE_TX_RING_CNT - sc->bge_txcnt) <
-                   (BGE_NSEG_RSVD + BGE_NSEG_SPARE)) {
+                   (sc->bge_txrsvd + sc->bge_txspare)) {
                        ifp->if_flags |= IFF_OACTIVE;
                        ifq_prepend(&ifp->if_snd, m_head);
                        break;
@@ -3244,6 +3537,7 @@ bge_init(void *xsc)
        struct bge_softc *sc = xsc;
        struct ifnet *ifp = &sc->arpcom.ac_if;
        uint16_t *m;
+       uint32_t mode;
 
        ASSERT_SERIALIZED(ifp->if_serializer);
 
@@ -3316,8 +3610,12 @@ bge_init(void *xsc)
        /* Init TX ring. */
        bge_init_tx_ring(sc);
 
+       /* Enable TX MAC state machine lockup fix. */
+       mode = CSR_READ_4(sc, BGE_TX_MODE);
+       if (BGE_IS_5755_PLUS(sc) || sc->bge_asicrev == BGE_ASICREV_BCM5906)
+               mode |= BGE_TXMODE_MBUF_LOCKUP_FIX;
        /* Turn on transmitter */
-       BGE_SETBIT(sc, BGE_TX_MODE, BGE_TXMODE_ENABLE);
+       CSR_WRITE_4(sc, BGE_TX_MODE, mode | BGE_TXMODE_ENABLE);
 
        /* Turn on receiver */
        BGE_SETBIT(sc, BGE_RX_MODE, BGE_RXMODE_ENABLE);
@@ -3330,11 +3628,33 @@ bge_init(void *xsc)
         */
        CSR_WRITE_4(sc, BGE_MAX_RX_FRAME_LOWAT, 2);
 
+       if (sc->bge_irq_type == PCI_INTR_TYPE_MSI) {
+               if (bootverbose) {
+                       if_printf(ifp, "MSI_MODE: %#x\n",
+                           CSR_READ_4(sc, BGE_MSI_MODE));
+               }
+
+               /*
+                * XXX
+                * Linux driver turns it on for all chips supporting MSI?!
+                */
+               if (sc->bge_flags & BGE_FLAG_ONESHOT_MSI) {
+                       /*
+                        * XXX
+                        * According to 5722-PG101-R,
+                        * BGE_PCIE_TRANSACT_ONESHOT_MSI applies only to
+                        * BCM5906.
+                        */
+                       BGE_SETBIT(sc, BGE_PCIE_TRANSACT,
+                           BGE_PCIE_TRANSACT_ONESHOT_MSI);
+               }
+       }
+
        /* Tell firmware we're alive. */
        BGE_SETBIT(sc, BGE_MODE_CTL, BGE_MODECTL_STACKUP);
 
        /* Enable host interrupts if polling(4) is not enabled. */
-       BGE_SETBIT(sc, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_CLEAR_INTA);
+       PCI_SETBIT(sc->bge_dev, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_CLEAR_INTA, 4);
 #ifdef DEVICE_POLLING
        if (ifp->if_flags & IFF_POLLING)
                bge_disable_intr(sc);
@@ -3425,8 +3745,7 @@ bge_ifmedia_upd(struct ifnet *ifp)
                 * need to do this here if BGE_FLAG_TBI is set but as
                 * we poll for fiber anyway it should not harm.
                 */
-               if (sc->bge_asicrev == BGE_ASICREV_BCM5700 ||
-                   BGE_IS_5788(sc))
+               if (BGE_IS_CRIPPLED(sc))
                        BGE_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_INTR_SET);
                else
                        BGE_SETBIT(sc, BGE_HCC_MODE, BGE_HCCMODE_COAL_NOW);
@@ -3536,10 +3855,17 @@ bge_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
                mask = ifr->ifr_reqcap ^ ifp->if_capenable;
                if (mask & IFCAP_HWCSUM) {
                        ifp->if_capenable ^= (mask & IFCAP_HWCSUM);
-                       if (IFCAP_HWCSUM & ifp->if_capenable)
-                               ifp->if_hwassist = BGE_CSUM_FEATURES;
+                       if (ifp->if_capenable & IFCAP_TXCSUM)
+                               ifp->if_hwassist |= BGE_CSUM_FEATURES;
+                       else
+                               ifp->if_hwassist &= ~BGE_CSUM_FEATURES;
+               }
+               if (mask & IFCAP_TSO) {
+                       ifp->if_capenable ^= IFCAP_TSO;
+                       if (ifp->if_capenable & IFCAP_TSO)
+                               ifp->if_hwassist |= CSUM_TSO;
                        else
-                               ifp->if_hwassist = 0;
+                               ifp->if_hwassist &= ~CSUM_TSO;
                }
                break;
        default:
@@ -3580,35 +3906,35 @@ bge_stop(struct bge_softc *sc)
        /*
         * Disable all of the receiver blocks
         */
-       BGE_CLRBIT(sc, BGE_RX_MODE, BGE_RXMODE_ENABLE);
-       BGE_CLRBIT(sc, BGE_RBDI_MODE, BGE_RBDIMODE_ENABLE);
-       BGE_CLRBIT(sc, BGE_RXLP_MODE, BGE_RXLPMODE_ENABLE);
-       if (!BGE_IS_5705_PLUS(sc))
-               BGE_CLRBIT(sc, BGE_RXLS_MODE, BGE_RXLSMODE_ENABLE);
-       BGE_CLRBIT(sc, BGE_RDBDI_MODE, BGE_RBDIMODE_ENABLE);
-       BGE_CLRBIT(sc, BGE_RDC_MODE, BGE_RDCMODE_ENABLE);
-       BGE_CLRBIT(sc, BGE_RBDC_MODE, BGE_RBDCMODE_ENABLE);
+       bge_stop_block(sc, BGE_RX_MODE, BGE_RXMODE_ENABLE);
+       bge_stop_block(sc, BGE_RBDI_MODE, BGE_RBDIMODE_ENABLE);
+       bge_stop_block(sc, BGE_RXLP_MODE, BGE_RXLPMODE_ENABLE);
+       if (BGE_IS_5700_FAMILY(sc))
+               bge_stop_block(sc, BGE_RXLS_MODE, BGE_RXLSMODE_ENABLE);
+       bge_stop_block(sc, BGE_RDBDI_MODE, BGE_RBDIMODE_ENABLE);
+       bge_stop_block(sc, BGE_RDC_MODE, BGE_RDCMODE_ENABLE);
+       bge_stop_block(sc, BGE_RBDC_MODE, BGE_RBDCMODE_ENABLE);
 
        /*
         * Disable all of the transmit blocks
         */
-       BGE_CLRBIT(sc, BGE_SRS_MODE, BGE_SRSMODE_ENABLE);
-       BGE_CLRBIT(sc, BGE_SBDI_MODE, BGE_SBDIMODE_ENABLE);
-       BGE_CLRBIT(sc, BGE_SDI_MODE, BGE_SDIMODE_ENABLE);
-       BGE_CLRBIT(sc, BGE_RDMA_MODE, BGE_RDMAMODE_ENABLE);
-       BGE_CLRBIT(sc, BGE_SDC_MODE, BGE_SDCMODE_ENABLE);
-       if (!BGE_IS_5705_PLUS(sc))
-               BGE_CLRBIT(sc, BGE_DMAC_MODE, BGE_DMACMODE_ENABLE);
-       BGE_CLRBIT(sc, BGE_SBDC_MODE, BGE_SBDCMODE_ENABLE);
+       bge_stop_block(sc, BGE_SRS_MODE, BGE_SRSMODE_ENABLE);
+       bge_stop_block(sc, BGE_SBDI_MODE, BGE_SBDIMODE_ENABLE);
+       bge_stop_block(sc, BGE_SDI_MODE, BGE_SDIMODE_ENABLE);
+       bge_stop_block(sc, BGE_RDMA_MODE, BGE_RDMAMODE_ENABLE);
+       bge_stop_block(sc, BGE_SDC_MODE, BGE_SDCMODE_ENABLE);
+       if (BGE_IS_5700_FAMILY(sc))
+               bge_stop_block(sc, BGE_DMAC_MODE, BGE_DMACMODE_ENABLE);
+       bge_stop_block(sc, BGE_SBDC_MODE, BGE_SBDCMODE_ENABLE);
 
        /*
         * Shut down all of the memory managers and related
         * state machines.
         */
-       BGE_CLRBIT(sc, BGE_HCC_MODE, BGE_HCCMODE_ENABLE);
-       BGE_CLRBIT(sc, BGE_WDMA_MODE, BGE_WDMAMODE_ENABLE);
-       if (!BGE_IS_5705_PLUS(sc))
-               BGE_CLRBIT(sc, BGE_MBCF_MODE, BGE_MBCFMODE_ENABLE);
+       bge_stop_block(sc, BGE_HCC_MODE, BGE_HCCMODE_ENABLE);
+       bge_stop_block(sc, BGE_WDMA_MODE, BGE_WDMAMODE_ENABLE);
+       if (BGE_IS_5700_FAMILY(sc))
+               bge_stop_block(sc, BGE_MBCF_MODE, BGE_MBCFMODE_ENABLE);
        CSR_WRITE_4(sc, BGE_FTQ_RESET, 0xFFFFFFFF);
        CSR_WRITE_4(sc, BGE_FTQ_RESET, 0);
        if (!BGE_IS_5705_PLUS(sc)) {
@@ -3634,6 +3960,7 @@ bge_stop(struct bge_softc *sc)
        /* Free TX buffers. */
        bge_free_tx_ring(sc);
 
+       sc->bge_status_tag = 0;
        sc->bge_link = 0;
        sc->bge_coal_chg = 0;
 
@@ -3767,6 +4094,7 @@ bge_dma_alloc(struct bge_softc *sc)
        struct ifnet *ifp = &sc->arpcom.ac_if;
        int i, error;
        bus_addr_t lowaddr;
+       bus_size_t txmaxsz;
 
        lowaddr = BUS_SPACE_MAXADDR;
        if (sc->bge_flags & BGE_FLAG_MAXADDR_40BIT)
@@ -3836,10 +4164,14 @@ bge_dma_alloc(struct bge_softc *sc)
        /*
         * Create DMA tag and maps for TX mbufs.
         */
+       if (sc->bge_flags & BGE_FLAG_TSO)
+               txmaxsz = IP_MAXPACKET + sizeof(struct ether_vlan_header);
+       else
+               txmaxsz = BGE_JUMBO_FRAMELEN;
        error = bus_dma_tag_create(sc->bge_cdata.bge_parent_tag, 1, 0,
                                   BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
                                   NULL, NULL,
-                                  BGE_JUMBO_FRAMELEN, BGE_NSEG_NEW, MCLBYTES,
+                                  txmaxsz, BGE_NSEG_NEW, PAGE_SIZE,
                                   BUS_DMA_ALLOCNOW | BUS_DMA_WAITOK |
                                   BUS_DMA_ONEBPAGE,
                                   &sc->bge_cdata.bge_tx_mtag);
@@ -3894,11 +4226,12 @@ bge_dma_alloc(struct bge_softc *sc)
        /*
         * Create DMA stuffs for RX return ring.
         */
-       error = bge_dma_block_alloc(sc, BGE_RX_RTN_RING_SZ(sc),
-                                   &sc->bge_cdata.bge_rx_return_ring_tag,
-                                   &sc->bge_cdata.bge_rx_return_ring_map,
-                                   (void *)&sc->bge_ldata.bge_rx_return_ring,
-                                   &sc->bge_ldata.bge_rx_return_ring_paddr);
+       error = bge_dma_block_alloc(sc,
+           BGE_RX_RTN_RING_SZ(sc->bge_return_ring_cnt),
+           &sc->bge_cdata.bge_rx_return_ring_tag,
+           &sc->bge_cdata.bge_rx_return_ring_map,
+           (void *)&sc->bge_ldata.bge_rx_return_ring,
+           &sc->bge_ldata.bge_rx_return_ring_paddr);
        if (error) {
                if_printf(ifp, "could not create RX ret ring\n");
                return error;
@@ -4126,8 +4459,9 @@ bge_sysctl_rx_coal_ticks(SYSCTL_HANDLER_ARGS)
        struct bge_softc *sc = arg1;
 
        return bge_sysctl_coal_chg(oidp, arg1, arg2, req,
-                                  &sc->bge_rx_coal_ticks,
-                                  BGE_RX_COAL_TICKS_CHG);
+           &sc->bge_rx_coal_ticks,
+           BGE_RX_COAL_TICKS_MIN, BGE_RX_COAL_TICKS_MAX,
+           BGE_RX_COAL_TICKS_CHG);
 }
 
 static int
@@ -4136,33 +4470,80 @@ bge_sysctl_tx_coal_ticks(SYSCTL_HANDLER_ARGS)
        struct bge_softc *sc = arg1;
 
        return bge_sysctl_coal_chg(oidp, arg1, arg2, req,
-                                  &sc->bge_tx_coal_ticks,
-                                  BGE_TX_COAL_TICKS_CHG);
+           &sc->bge_tx_coal_ticks,
+           BGE_TX_COAL_TICKS_MIN, BGE_TX_COAL_TICKS_MAX,
+           BGE_TX_COAL_TICKS_CHG);
 }
 
 static int
-bge_sysctl_rx_max_coal_bds(SYSCTL_HANDLER_ARGS)
+bge_sysctl_rx_coal_bds(SYSCTL_HANDLER_ARGS)
 {
        struct bge_softc *sc = arg1;
 
        return bge_sysctl_coal_chg(oidp, arg1, arg2, req,
-                                  &sc->bge_rx_max_coal_bds,
-                                  BGE_RX_MAX_COAL_BDS_CHG);
+           &sc->bge_rx_coal_bds,
+           BGE_RX_COAL_BDS_MIN, BGE_RX_COAL_BDS_MAX,
+           BGE_RX_COAL_BDS_CHG);
 }
 
 static int
-bge_sysctl_tx_max_coal_bds(SYSCTL_HANDLER_ARGS)
+bge_sysctl_tx_coal_bds(SYSCTL_HANDLER_ARGS)
 {
        struct bge_softc *sc = arg1;
 
        return bge_sysctl_coal_chg(oidp, arg1, arg2, req,
-                                  &sc->bge_tx_max_coal_bds,
-                                  BGE_TX_MAX_COAL_BDS_CHG);
+           &sc->bge_tx_coal_bds,
+           BGE_TX_COAL_BDS_MIN, BGE_TX_COAL_BDS_MAX,
+           BGE_TX_COAL_BDS_CHG);
+}
+
+static int
+bge_sysctl_rx_coal_ticks_int(SYSCTL_HANDLER_ARGS)
+{
+       struct bge_softc *sc = arg1;
+
+       return bge_sysctl_coal_chg(oidp, arg1, arg2, req,
+           &sc->bge_rx_coal_ticks_int,
+           BGE_RX_COAL_TICKS_MIN, BGE_RX_COAL_TICKS_MAX,
+           BGE_RX_COAL_TICKS_INT_CHG);
+}
+
+static int
+bge_sysctl_tx_coal_ticks_int(SYSCTL_HANDLER_ARGS)
+{
+       struct bge_softc *sc = arg1;
+
+       return bge_sysctl_coal_chg(oidp, arg1, arg2, req,
+           &sc->bge_tx_coal_ticks_int,
+           BGE_TX_COAL_TICKS_MIN, BGE_TX_COAL_TICKS_MAX,
+           BGE_TX_COAL_TICKS_INT_CHG);
+}
+
+static int
+bge_sysctl_rx_coal_bds_int(SYSCTL_HANDLER_ARGS)
+{
+       struct bge_softc *sc = arg1;
+
+       return bge_sysctl_coal_chg(oidp, arg1, arg2, req,
+           &sc->bge_rx_coal_bds_int,
+           BGE_RX_COAL_BDS_MIN, BGE_RX_COAL_BDS_MAX,
+           BGE_RX_COAL_BDS_INT_CHG);
+}
+
+static int
+bge_sysctl_tx_coal_bds_int(SYSCTL_HANDLER_ARGS)
+{
+       struct bge_softc *sc = arg1;
+
+       return bge_sysctl_coal_chg(oidp, arg1, arg2, req,
+           &sc->bge_tx_coal_bds_int,
+           BGE_TX_COAL_BDS_MIN, BGE_TX_COAL_BDS_MAX,
+           BGE_TX_COAL_BDS_INT_CHG);
 }
 
 static int
 bge_sysctl_coal_chg(SYSCTL_HANDLER_ARGS, uint32_t *coal,
-                   uint32_t coal_chg_mask)
+    int coal_min, int coal_max, uint32_t coal_chg_mask)
 {
        struct bge_softc *sc = arg1;
        struct ifnet *ifp = &sc->arpcom.ac_if;
@@ -4173,7 +4554,7 @@ bge_sysctl_coal_chg(SYSCTL_HANDLER_ARGS, uint32_t *coal,
        v = *coal;
        error = sysctl_handle_int(oidp, &v, 0, req);
        if (!error && req->newptr != NULL) {
-               if (v < 0) {
+               if (v < coal_min || v > coal_max) {
                        error = EINVAL;
                } else {
                        *coal = v;
@@ -4217,27 +4598,75 @@ bge_coal_change(struct bge_softc *sc)
                }
        }
 
-       if (sc->bge_coal_chg & BGE_RX_MAX_COAL_BDS_CHG) {
+       if (sc->bge_coal_chg & BGE_RX_COAL_BDS_CHG) {
                CSR_WRITE_4(sc, BGE_HCC_RX_MAX_COAL_BDS,
-                           sc->bge_rx_max_coal_bds);
+                           sc->bge_rx_coal_bds);
                DELAY(10);
                val = CSR_READ_4(sc, BGE_HCC_RX_MAX_COAL_BDS);
 
                if (bootverbose) {
-                       if_printf(ifp, "rx_max_coal_bds -> %u\n",
-                                 sc->bge_rx_max_coal_bds);
+                       if_printf(ifp, "rx_coal_bds -> %u\n",
+                                 sc->bge_rx_coal_bds);
                }
        }
 
-       if (sc->bge_coal_chg & BGE_TX_MAX_COAL_BDS_CHG) {
+       if (sc->bge_coal_chg & BGE_TX_COAL_BDS_CHG) {
                CSR_WRITE_4(sc, BGE_HCC_TX_MAX_COAL_BDS,
-                           sc->bge_tx_max_coal_bds);
+                           sc->bge_tx_coal_bds);
                DELAY(10);
                val = CSR_READ_4(sc, BGE_HCC_TX_MAX_COAL_BDS);
 
                if (bootverbose) {
                        if_printf(ifp, "tx_max_coal_bds -> %u\n",
-                                 sc->bge_tx_max_coal_bds);
+                                 sc->bge_tx_coal_bds);
+               }
+       }
+
+       if (sc->bge_coal_chg & BGE_RX_COAL_TICKS_INT_CHG) {
+               CSR_WRITE_4(sc, BGE_HCC_RX_COAL_TICKS_INT,
+                   sc->bge_rx_coal_ticks_int);
+               DELAY(10);
+               val = CSR_READ_4(sc, BGE_HCC_RX_COAL_TICKS_INT);
+
+               if (bootverbose) {
+                       if_printf(ifp, "rx_coal_ticks_int -> %u\n",
+                           sc->bge_rx_coal_ticks_int);
+               }
+       }
+
+       if (sc->bge_coal_chg & BGE_TX_COAL_TICKS_INT_CHG) {
+               CSR_WRITE_4(sc, BGE_HCC_TX_COAL_TICKS_INT,
+                   sc->bge_tx_coal_ticks_int);
+               DELAY(10);
+               val = CSR_READ_4(sc, BGE_HCC_TX_COAL_TICKS_INT);
+
+               if (bootverbose) {
+                       if_printf(ifp, "tx_coal_ticks_int -> %u\n",
+                           sc->bge_tx_coal_ticks_int);
+               }
+       }
+
+       if (sc->bge_coal_chg & BGE_RX_COAL_BDS_INT_CHG) {
+               CSR_WRITE_4(sc, BGE_HCC_RX_MAX_COAL_BDS_INT,
+                   sc->bge_rx_coal_bds_int);
+               DELAY(10);
+               val = CSR_READ_4(sc, BGE_HCC_RX_MAX_COAL_BDS_INT);
+
+               if (bootverbose) {
+                       if_printf(ifp, "rx_coal_bds_int -> %u\n",
+                           sc->bge_rx_coal_bds_int);
+               }
+       }
+
+       if (sc->bge_coal_chg & BGE_TX_COAL_BDS_INT_CHG) {
+               CSR_WRITE_4(sc, BGE_HCC_TX_MAX_COAL_BDS_INT,
+                   sc->bge_tx_coal_bds_int);
+               DELAY(10);
+               val = CSR_READ_4(sc, BGE_HCC_TX_MAX_COAL_BDS_INT);
+
+               if (bootverbose) {
+                       if_printf(ifp, "tx_coal_bds_int -> %u\n",
+                           sc->bge_tx_coal_bds_int);
                }
        }
 
@@ -4254,12 +4683,17 @@ bge_enable_intr(struct bge_softc *sc)
        /*
         * Enable interrupt.
         */
-       bge_writembx(sc, BGE_MBX_IRQ0_LO, 0);
+       bge_writembx(sc, BGE_MBX_IRQ0_LO, sc->bge_status_tag << 24);
+       if (sc->bge_flags & BGE_FLAG_ONESHOT_MSI) {
+               /* XXX Linux driver */
+               bge_writembx(sc, BGE_MBX_IRQ0_LO, sc->bge_status_tag << 24);
+       }
 
        /*
         * Unmask the interrupt when we stop polling.
         */
-       BGE_CLRBIT(sc, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_MASK_PCI_INTR);
+       PCI_CLRBIT(sc->bge_dev, BGE_PCI_MISC_CTL,
+           BGE_PCIMISCCTL_MASK_PCI_INTR, 4);
 
        /*
         * Trigger another interrupt, since above writing
@@ -4277,7 +4711,8 @@ bge_disable_intr(struct bge_softc *sc)
        /*
         * Mask the interrupt when we start polling.
         */
-       BGE_SETBIT(sc, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_MASK_PCI_INTR);
+       PCI_SETBIT(sc->bge_dev, BGE_PCI_MISC_CTL,
+           BGE_PCIMISCCTL_MASK_PCI_INTR, 4);
 
        /*
         * Acknowledge possible asserted interrupt.
@@ -4380,3 +4815,95 @@ bge_defrag_shortdma(struct mbuf *m)
                n = m;
        return n;
 }
+
+static void
+bge_stop_block(struct bge_softc *sc, bus_size_t reg, uint32_t bit)
+{
+       int i;
+
+       BGE_CLRBIT(sc, reg, bit);
+       for (i = 0; i < BGE_TIMEOUT; i++) {
+               if ((CSR_READ_4(sc, reg) & bit) == 0)
+                       return;
+               DELAY(100);
+       }
+}
+
+static void
+bge_link_poll(struct bge_softc *sc)
+{
+       uint32_t status;
+
+       status = CSR_READ_4(sc, BGE_MAC_STS);
+       if ((status & sc->bge_link_chg) || sc->bge_link_evt) {
+               sc->bge_link_evt = 0;
+               sc->bge_link_upd(sc, status);
+       }
+}
+
+static void
+bge_enable_msi(struct bge_softc *sc)
+{
+       uint32_t msi_mode;
+
+       msi_mode = CSR_READ_4(sc, BGE_MSI_MODE);
+       msi_mode |= BGE_MSIMODE_ENABLE;
+       if (sc->bge_flags & BGE_FLAG_ONESHOT_MSI) {
+               /*
+                * According to all of the datasheets that are publicly
+                * available, bit 5 of the MSI_MODE is defined to be
+                * "MSI FIFO Underrun Attn" for BCM5755+ and BCM5906, on
+                * which "oneshot MSI" is enabled.  However, it is always
+                * safe to clear it here.
+                */
+               msi_mode &= ~BGE_MSIMODE_ONESHOT_DISABLE;
+       }
+       CSR_WRITE_4(sc, BGE_MSI_MODE, msi_mode);
+}
+
+static int
+bge_setup_tso(struct bge_softc *sc, struct mbuf **mp,
+    uint16_t *mss0, uint16_t *flags0)
+{
+       struct mbuf *m;
+       struct ip *ip;
+       struct tcphdr *th;
+       int thoff, iphlen, hoff, hlen;
+       uint16_t flags, mss;
+
+       m = *mp;
+       KASSERT(M_WRITABLE(m), ("TSO mbuf not writable"));
+
+       hoff = m->m_pkthdr.csum_lhlen;
+       iphlen = m->m_pkthdr.csum_iphlen;
+       thoff = m->m_pkthdr.csum_thlen;
+
+       KASSERT(hoff > 0, ("invalid ether header len"));
+       KASSERT(iphlen > 0, ("invalid ip header len"));
+       KASSERT(thoff > 0, ("invalid tcp header len"));
+
+       if (__predict_false(m->m_len < hoff + iphlen + thoff)) {
+               m = m_pullup(m, hoff + iphlen + thoff);
+               if (m == NULL) {
+                       *mp = NULL;
+                       return ENOBUFS;
+               }
+               *mp = m;
+       }
+       ip = mtodoff(m, struct ip *, hoff);
+       th = mtodoff(m, struct tcphdr *, hoff + iphlen);
+
+       mss = m->m_pkthdr.tso_segsz;
+       flags = BGE_TXBDFLAG_CPU_PRE_DMA | BGE_TXBDFLAG_CPU_POST_DMA;
+
+       ip->ip_len = htons(mss + iphlen + thoff);
+       th->th_sum = 0;
+
+       hlen = (iphlen + thoff) >> 2;
+       mss |= (hlen << 11);
+
+       *mss0 = mss;
+       *flags0 = flags;
+
+       return 0;
+}