static void em_intr(void *);
static void em_rxeof(struct adapter *, int);
static void em_txeof(struct adapter *);
+static void em_tx_collect(struct adapter *);
static void em_tx_purge(struct adapter *);
static void em_enable_intr(struct adapter *);
static void em_disable_intr(struct adapter *);
static void em_rxcsum(struct adapter *, struct e1000_rx_desc *,
struct mbuf *);
static int em_txcsum_pullup(struct adapter *, struct mbuf **);
-static void em_txcsum(struct adapter *, struct mbuf *,
+static int em_txcsum(struct adapter *, struct mbuf *,
uint32_t *, uint32_t *);
static int em_get_hw_info(struct adapter *);
static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
static int em_sysctl_int_throttle(SYSCTL_HANDLER_ARGS);
+static int em_sysctl_int_tx_nsegs(SYSCTL_HANDLER_ARGS);
static void em_add_sysctl(struct adapter *adapter);
static void em_add_int_delay_sysctl(struct adapter *, const char *,
const char *, struct em_int_delay_info *, int, int);
adapter->spare_tx_desc = EM_TX_SPARE;
}
+ /*
+ * Keep following relationship between spare_tx_desc, oact_tx_desc
+ * and tx_int_nsegs:
+ * (spare_tx_desc + EM_TX_RESERVED) <=
+ * oact_tx_desc <= EM_TX_OACTIVE_MAX <= tx_int_nsegs
+ */
+ adapter->oact_tx_desc = adapter->num_tx_desc / 8;
+ if (adapter->oact_tx_desc > EM_TX_OACTIVE_MAX)
+ adapter->oact_tx_desc = EM_TX_OACTIVE_MAX;
+ if (adapter->oact_tx_desc < adapter->spare_tx_desc + EM_TX_RESERVED)
+ adapter->oact_tx_desc = adapter->spare_tx_desc + EM_TX_RESERVED;
+
+ adapter->tx_int_nsegs = adapter->num_tx_desc / 16;
+ if (adapter->tx_int_nsegs < adapter->oact_tx_desc)
+ adapter->tx_int_nsegs = adapter->oact_tx_desc;
+
error = bus_setup_intr(dev, adapter->intr_res, INTR_MPSAFE,
em_intr, adapter, &adapter->intr_tag,
ifp->if_serializer);
}
while (!ifq_is_empty(&ifp->if_snd)) {
- /*
- * Force a cleanup if number of TX descriptors
- * available hits the threshold
- */
- if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
- em_txeof(adapter);
-
- /* Now do we at least have a minimal? */
+ /* Now do we at least have a minimal? */
+ if (EM_IS_OACTIVE(adapter)) {
+ em_tx_collect(adapter);
if (EM_IS_OACTIVE(adapter)) {
- adapter->no_tx_desc_avail1++;
ifp->if_flags |= IFF_OACTIVE;
+ adapter->no_tx_desc_avail1++;
break;
}
}
if (em_encap(adapter, &m_head)) {
ifp->if_oerrors++;
- if (adapter->num_tx_desc_avail ==
- adapter->num_tx_desc) {
- continue;
- } else {
- ifp->if_flags |= IFF_OACTIVE;
- break;
- }
+ em_tx_collect(adapter);
+ continue;
}
/* Send a copy of the frame to the BPF listener */
* set to 0.
*/
+ if (E1000_READ_REG(&adapter->hw, E1000_TDT(0)) ==
+ E1000_READ_REG(&adapter->hw, E1000_TDH(0))) {
+ /*
+ * If we reach here, all TX jobs are completed and
+ * the TX engine should have been idled for some time.
+ * We don't need to call if_devstart() here.
+ */
+ ifp->if_flags &= ~IFF_OACTIVE;
+ ifp->if_timer = 0;
+ return;
+ }
+
/*
* If we are in this routine because of pause frames, then
* don't reset the hardware.
}
if (ifp->if_flags & IFF_RUNNING) {
- em_rxeof(adapter, -1);
- em_txeof(adapter);
+ if (reg_icr &
+ (E1000_IMS_RXT0 | E1000_IMS_RXDMT0 | E1000_ICR_RXO))
+ em_rxeof(adapter, -1);
+ if (reg_icr & E1000_IMS_TXDW) {
+ em_txeof(adapter);
+ if (!ifq_is_empty(&ifp->if_snd))
+ if_devstart(ifp);
+ }
}
/* Link status change */
if (reg_icr & E1000_ICR_RXO)
adapter->rx_overruns++;
- if ((ifp->if_flags & IFF_RUNNING) && !ifq_is_empty(&ifp->if_snd))
- if_devstart(ifp);
-
logif(intr_end);
}
struct em_buffer *tx_buffer, *tx_buffer_mapped;
struct e1000_tx_desc *ctxd = NULL;
struct mbuf *m_head = *m_headp;
- uint32_t txd_upper, txd_lower, txd_used;
+ uint32_t txd_upper, txd_lower, txd_used, cmd = 0;
int maxsegs, nsegs, i, j, first, last = 0, error;
if (__predict_false(m_head->m_len < EM_TXCSUM_MINHL) &&
bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
m_head = *m_headp;
+ adapter->tx_nsegs += nsegs;
if (m_head->m_pkthdr.csum_flags & EM_CSUM_FEATURES) {
/* TX csum offloading will consume one TX desc */
- em_txcsum(adapter, m_head, &txd_upper, &txd_lower);
+ adapter->tx_nsegs += em_txcsum(adapter, m_head,
+ &txd_upper, &txd_lower);
}
i = adapter->next_avail_tx_desc;
i = 0;
tx_buffer->m_head = NULL;
- tx_buffer->next_eop = -1;
txd_used++;
}
} else {
i = 0;
tx_buffer->m_head = NULL;
- tx_buffer->next_eop = -1;
}
}
tx_buffer_mapped->map = tx_buffer->map;
tx_buffer->map = map;
+ if (adapter->tx_nsegs >= adapter->tx_int_nsegs) {
+ adapter->tx_nsegs = 0;
+ cmd = E1000_TXD_CMD_RS;
+
+ adapter->tx_dd[adapter->tx_dd_tail] = last;
+ EM_INC_TXDD_IDX(adapter->tx_dd_tail);
+ KKASSERT(adapter->tx_dd_tail != adapter->tx_dd_head);
+ }
+
/*
* Last Descriptor of Packet needs End Of Packet (EOP)
* and Report Status (RS)
*/
- ctxd->lower.data |= htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
+ ctxd->lower.data |= htole32(E1000_TXD_CMD_EOP | cmd);
/*
* Keep track in the first buffer which descriptor will be
* written back
*/
tx_buffer = &adapter->tx_buffer_area[first];
- tx_buffer->next_eop = last;
/*
* Advance the Transmit Descriptor Tail (TDT), this tells the E1000
m_freem(tx_buffer->m_head);
tx_buffer->m_head = NULL;
}
- tx_buffer->next_eop = -1;
}
for (i = 0; i < adapter->num_rx_desc; i++) {
adapter->csum_flags = 0;
adapter->csum_ehlen = 0;
adapter->csum_iphlen = 0;
+
+ adapter->tx_dd_head = 0;
+ adapter->tx_dd_tail = 0;
+ adapter->tx_nsegs = 0;
}
static int
em_destroy_tx_ring(adapter, i);
return error;
}
- tx_buffer->next_eop = -1;
}
return (0);
}
/* Setup Transmit Descriptor Base Settings */
adapter->txd_cmd = E1000_TXD_CMD_IFCS;
-
- if (adapter->tx_int_delay.value > 0)
- adapter->txd_cmd |= E1000_TXD_CMD_IDE;
}
static void
* csum offloading type are same as the previous packet, we should
* avoid allocating a new csum context descriptor; mainly to take
* advantage of the pipeline effect of the TX data read request.
+ *
+ * This function returns number of TX descrptors allocated for
+ * csum context.
*/
-static void
+static int
em_txcsum(struct adapter *adapter, struct mbuf *mp,
uint32_t *txd_upper, uint32_t *txd_lower)
{
* TODO: Support SCTP too when it hits the tree.
*/
if (etype != ETHERTYPE_IP)
- return;
+ return 0;
KASSERT(mp->m_len >= ehdrlen + EM_IPVHL_SIZE,
("em_txcsum_pullup is not called (eh+ip_vhl)?\n"));
*/
*txd_upper = adapter->csum_txd_upper;
*txd_lower = adapter->csum_txd_lower;
- return;
+ return 0;
}
/*
TXD->cmd_and_length =
htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
tx_buffer->m_head = NULL;
- tx_buffer->next_eop = -1;
if (++curr_txd == adapter->num_tx_desc)
curr_txd = 0;
adapter->num_tx_desc_avail--;
adapter->next_avail_tx_desc = curr_txd;
+ return 1;
}
static int
static void
em_txeof(struct adapter *adapter)
{
- int first, last, done, num_avail;
- struct em_buffer *tx_buffer;
- struct e1000_tx_desc *tx_desc, *eop_desc;
struct ifnet *ifp = &adapter->arpcom.ac_if;
+ struct e1000_tx_desc *tx_desc;
+ struct em_buffer *tx_buffer;
+ int first, num_avail;
+
+ if (adapter->tx_dd_head == adapter->tx_dd_tail)
+ return;
if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
return;
num_avail = adapter->num_tx_desc_avail;
first = adapter->next_tx_to_clean;
- tx_desc = &adapter->tx_desc_base[first];
- tx_buffer = &adapter->tx_buffer_area[first];
- last = tx_buffer->next_eop;
- eop_desc = &adapter->tx_desc_base[last];
+ while (adapter->tx_dd_head != adapter->tx_dd_tail) {
+ int dd_idx = adapter->tx_dd[adapter->tx_dd_head];
- /*
- * What this does is get the index of the
- * first descriptor AFTER the EOP of the
- * first packet, that way we can do the
- * simple comparison on the inner while loop.
- */
- if (++last == adapter->num_tx_desc)
- last = 0;
- done = last;
-
- while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
- /* We clean the range of the packet */
- while (first != done) {
- logif(pkt_txclean);
-
- tx_desc->upper.data = 0;
- tx_desc->lower.data = 0;
- tx_desc->buffer_addr = 0;
- num_avail++;
-
- if (tx_buffer->m_head) {
- ifp->if_opackets++;
- bus_dmamap_unload(adapter->txtag,
- tx_buffer->map);
- m_freem(tx_buffer->m_head);
- tx_buffer->m_head = NULL;
- }
- tx_buffer->next_eop = -1;
+ tx_desc = &adapter->tx_desc_base[dd_idx];
- if (++first == adapter->num_tx_desc)
- first = 0;
+ if (tx_desc->upper.fields.status & E1000_TXD_STAT_DD) {
+ EM_INC_TXDD_IDX(adapter->tx_dd_head);
- tx_buffer = &adapter->tx_buffer_area[first];
- tx_desc = &adapter->tx_desc_base[first];
- }
+ if (++dd_idx == adapter->num_tx_desc)
+ dd_idx = 0;
- /* See if we can continue to the next packet */
- last = tx_buffer->next_eop;
- if (last != -1) {
- eop_desc = &adapter->tx_desc_base[last];
+ while (first != dd_idx) {
+ tx_buffer = &adapter->tx_buffer_area[first];
+ tx_desc = &adapter->tx_desc_base[first];
- /* Get new done point */
- if (++last == adapter->num_tx_desc)
- last = 0;
- done = last;
+ tx_desc->upper.data = 0;
+ tx_desc->lower.data = 0;
+ tx_desc->buffer_addr = 0;
+ num_avail++;
+
+ if (tx_buffer->m_head) {
+ ifp->if_opackets++;
+ bus_dmamap_unload(adapter->txtag,
+ tx_buffer->map);
+ m_freem(tx_buffer->m_head);
+ tx_buffer->m_head = NULL;
+ }
+
+ if (++first == adapter->num_tx_desc)
+ first = 0;
+ }
} else {
break;
}
}
- adapter->next_tx_to_clean = first;
+ adapter->next_tx_to_clean = first;
+ adapter->num_tx_desc_avail = num_avail;
+
+ if (adapter->tx_dd_head == adapter->tx_dd_tail) {
+ adapter->tx_dd_head = 0;
+ adapter->tx_dd_tail = 0;
+ }
+
+ if (!EM_IS_OACTIVE(adapter)) {
+ ifp->if_flags &= ~IFF_OACTIVE;
+
+ /* All clean, turn off the timer */
+ if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
+ ifp->if_timer = 0;
+ }
+}
+
+static void
+em_tx_collect(struct adapter *adapter)
+{
+ struct ifnet *ifp = &adapter->arpcom.ac_if;
+ struct e1000_tx_desc *tx_desc;
+ struct em_buffer *tx_buffer;
+ int tdh, first, num_avail, dd_idx = -1;
+
+ if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
+ return;
+
+ tdh = E1000_READ_REG(&adapter->hw, E1000_TDH(0));
+ if (tdh == adapter->next_tx_to_clean)
+ return;
+
+ if (adapter->tx_dd_head != adapter->tx_dd_tail)
+ dd_idx = adapter->tx_dd[adapter->tx_dd_head];
+
+ num_avail = adapter->num_tx_desc_avail;
+ first = adapter->next_tx_to_clean;
+
+ while (first != tdh) {
+ tx_buffer = &adapter->tx_buffer_area[first];
+ tx_desc = &adapter->tx_desc_base[first];
+
+ tx_desc->upper.data = 0;
+ tx_desc->lower.data = 0;
+ tx_desc->buffer_addr = 0;
+ num_avail++;
+
+ if (tx_buffer->m_head) {
+ ifp->if_opackets++;
+ bus_dmamap_unload(adapter->txtag,
+ tx_buffer->map);
+ m_freem(tx_buffer->m_head);
+ tx_buffer->m_head = NULL;
+ }
+
+ if (first == dd_idx) {
+ EM_INC_TXDD_IDX(adapter->tx_dd_head);
+ if (adapter->tx_dd_head == adapter->tx_dd_tail) {
+ adapter->tx_dd_head = 0;
+ adapter->tx_dd_tail = 0;
+ dd_idx = -1;
+ } else {
+ dd_idx = adapter->tx_dd[adapter->tx_dd_head];
+ }
+ }
+
+ if (++first == adapter->num_tx_desc)
+ first = 0;
+ }
+ adapter->next_tx_to_clean = first;
adapter->num_tx_desc_avail = num_avail;
- if (adapter->num_tx_desc_avail > EM_TX_CLEANUP_THRESHOLD) {
+ if (!EM_IS_OACTIVE(adapter)) {
ifp->if_flags &= ~IFF_OACTIVE;
/* All clean, turn off the timer */
struct ifnet *ifp = &adapter->arpcom.ac_if;
if (!adapter->link_active && ifp->if_timer) {
- em_txeof(adapter);
+ em_tx_collect(adapter);
if (ifp->if_timer) {
if_printf(ifp, "Link lost, TX pending, reinit\n");
ifp->if_timer = 0;
em_sysctl_int_throttle, "I",
"interrupt throttling rate");
}
+ SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
+ SYSCTL_CHILDREN(adapter->sysctl_tree),
+ OID_AUTO, "int_tx_nsegs",
+ CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
+ em_sysctl_int_tx_nsegs, "I",
+ "# segments per TX interrupt");
}
/* Set up some sysctls for the tunable interrupt delays */
}
return 0;
}
+
+static int
+em_sysctl_int_tx_nsegs(SYSCTL_HANDLER_ARGS)
+{
+ struct adapter *adapter = (void *)arg1;
+ struct ifnet *ifp = &adapter->arpcom.ac_if;
+ int error, segs;
+
+ segs = adapter->tx_int_nsegs;
+ error = sysctl_handle_int(oidp, &segs, 0, req);
+ if (error || req->newptr == NULL)
+ return error;
+ if (segs <= 0)
+ return EINVAL;
+
+ lwkt_serialize_enter(ifp->if_serializer);
+
+ /*
+ * Don't allow int_tx_nsegs to become:
+ * o Less the oact_tx_desc
+ * o Too large that no TX desc will cause TX interrupt to
+ * be generated (OACTIVE will never recover)
+ * o Too small that will cause tx_dd[] overflow
+ */
+ if (segs < adapter->oact_tx_desc ||
+ segs >= adapter->num_tx_desc - adapter->oact_tx_desc ||
+ segs < adapter->num_tx_desc / EM_TXDD_SAFE) {
+ error = EINVAL;
+ } else {
+ error = 0;
+ adapter->tx_int_nsegs = segs;
+ }
+
+ lwkt_serialize_exit(ifp->if_serializer);
+
+ return error;
+}
/*
* EM_TXD: Maximum number of Transmit Descriptors
- * Valid Range: 80-256 for 82542 and 82543-based adapters
- * 80-4096 for others
+ * Valid Range: 256 for 82542 and 82543-based adapters
+ * 256-4096 for others
* Default Value: 256
* This value is the number of transmit descriptors allocated by the driver.
* Increasing this value allows the driver to queue more transmits. Each
* desscriptors should meet the following condition.
* (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0
*/
-#define EM_MIN_TXD 80
-#define EM_MAX_TXD_82543 256
+#define EM_MIN_TXD 256
+#define EM_MAX_TXD_82543 EM_MIN_TXD
#define EM_MAX_TXD 4096
-#define EM_DEFAULT_TXD EM_MAX_TXD_82543
+#define EM_DEFAULT_TXD EM_MIN_TXD
/*
* EM_RXD - Maximum number of receive Descriptors
- * Valid Range: 80-256 for 82542 and 82543-based adapters
- * 80-4096 for others
+ * Valid Range: 256 for 82542 and 82543-based adapters
+ * 256-4096 for others
* Default Value: 256
* This value is the number of receive descriptors allocated by the driver.
* Increasing this value allows the driver to buffer more incoming packets.
* desscriptors should meet the following condition.
* (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0
*/
-#define EM_MIN_RXD 80
-#define EM_MAX_RXD_82543 256
+#define EM_MIN_RXD 256
+#define EM_MAX_RXD_82543 EM_MIN_RXD
#define EM_MAX_RXD 4096
-#define EM_DEFAULT_RXD EM_MAX_RXD_82543
+#define EM_DEFAULT_RXD EM_MIN_RXD
/*
* EM_TIDV - Transmit Interrupt Delay Value
/* Large enough for 16K jumbo frame */
#define EM_TX_SPARE 8
+#define EM_TX_OACTIVE_MAX 64
+
/* Interrupt throttle rate */
#define EM_DEFAULT_ITR 10000
/*
- * This parameter controls when the driver calls the routine to reclaim
- * transmit descriptors.
- */
-#define EM_TX_CLEANUP_THRESHOLD (adapter->num_tx_desc / 8)
-#define EM_TX_OP_THRESHOLD (adapter->num_tx_desc / 32)
-
-/*
* This parameter controls whether or not autonegotation is enabled.
* 0 - Disable autonegotiation
* 1 - Enable autonegotiation
*/
struct em_dma_alloc txdma; /* bus_dma glue for tx desc */
struct e1000_tx_desc *tx_desc_base;
+ struct em_buffer *tx_buffer_area;
uint32_t next_avail_tx_desc;
uint32_t next_tx_to_clean;
int num_tx_desc_avail;
int num_tx_desc;
uint32_t txd_cmd;
- struct em_buffer *tx_buffer_area;
bus_dma_tag_t txtag; /* dma tag for tx */
int spare_tx_desc;
+ int oact_tx_desc;
/* Saved csum offloading context information */
int csum_flags;
uint32_t csum_txd_lower;
/*
+ * Variables used to reduce TX interrupt rate and
+ * number of device's TX ring write requests.
+ *
+ * tx_nsegs:
+ * Number of TX descriptors setup so far.
+ *
+ * tx_int_nsegs:
+ * Once tx_nsegs > tx_int_nsegs, RS bit will be set
+ * in the last TX descriptor of the packet, and
+ * tx_nsegs will be reset to 0. So TX interrupt and
+ * TX ring write request should be generated roughly
+ * every tx_int_nsegs TX descriptors.
+ *
+ * tx_dd[]:
+ * Index of the TX descriptors which have RS bit set,
+ * i.e. DD bit will be set on this TX descriptor after
+ * the data of the TX descriptor are transfered to
+ * hardware's internal packet buffer. Only the TX
+ * descriptors listed in tx_dd[] will be checked upon
+ * TX interrupt. This array is used as circular ring.
+ *
+ * tx_dd_tail, tx_dd_head:
+ * Tail and head index of valid elements in tx_dd[].
+ * tx_dd_tail == tx_dd_head means there is no valid
+ * elements in tx_dd[]. tx_dd_tail points to the position
+ * which is one beyond the last valid element in tx_dd[].
+ * tx_dd_head points to the first valid element in
+ * tx_dd[].
+ */
+ int tx_int_nsegs;
+ int tx_nsegs;
+ int tx_dd_tail;
+ int tx_dd_head;
+#define EM_TXDD_MAX 64
+#define EM_TXDD_SAFE 50 /* must be less than EM_TXDD_MAX */
+ int tx_dd[EM_TXDD_MAX];
+
+ /*
* Receive definitions
*
* we have an array of num_rx_desc rx_desc (handled by the
};
struct em_buffer {
- int next_eop; /* Index of the desc to watch */
struct mbuf *m_head;
bus_dmamap_t map; /* bus_dma map for packet */
};
} DESC_ARRAY, *PDESC_ARRAY;
#define EM_IS_OACTIVE(adapter) \
- ((adapter)->num_tx_desc_avail < \
- (adapter)->spare_tx_desc + EM_TX_RESERVED)
+ ((adapter)->num_tx_desc_avail <= (adapter)->oact_tx_desc)
+
+#define EM_INC_TXDD_IDX(idx) \
+do { \
+ if (++(idx) == EM_TXDD_MAX) \
+ (idx) = 0; \
+} while (0)
#endif /* _IF_EM_H_ */