From afdeb9daba9af3b7a1cf7be6b7008348fc3d635a Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Thu, 16 Oct 2008 12:29:13 +0000 Subject: [PATCH 01/16] Add hardware csum offload support for MAC style 2 chips, which include 8102E, 8102EL, 8168C, 8168CP and 8168D. Obtained-from: RealTek r8101-1.009.00 r8168-8.008.00 Add RE_C_AUTOPAD capability to indicate hardware could correctly pad short ether frames. Turn it on for newer version of 8168B (0x38000000 and 0xb8000000) and MAC style 2 chips; manually padding short UDP packets for newer version 8168B will result in incorrect UDP csum, while manually padding short ICMP packets for MAC style 2 chips will result in both incorrect IP header csum and incorrect IP length (o_O) --- sys/dev/netif/re/if_re.c | 156 ++++++++++++++++++++++++------------ sys/dev/netif/re/if_rereg.h | 19 +++-- sys/dev/netif/re/if_revar.h | 3 +- 3 files changed, 118 insertions(+), 60 deletions(-) diff --git a/sys/dev/netif/re/if_re.c b/sys/dev/netif/re/if_re.c index 97af8eed1d..b47e6a4638 100644 --- a/sys/dev/netif/re/if_re.c +++ b/sys/dev/netif/re/if_re.c @@ -33,7 +33,7 @@ * THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD: src/sys/dev/re/if_re.c,v 1.25 2004/06/09 14:34:01 naddy Exp $ - * $DragonFly: src/sys/dev/netif/re/if_re.c,v 1.82 2008/10/14 15:11:38 sephe Exp $ + * $DragonFly: src/sys/dev/netif/re/if_re.c,v 1.83 2008/10/16 12:29:13 sephe Exp $ */ /* @@ -221,19 +221,22 @@ static const struct re_hwrev re_hwrevs[] = { RE_C_HWIM | RE_C_HWCSUM | RE_C_JUMBO | RE_C_PHYPMGT }, { RE_HWREV_8168B2, RE_MACVER_23, - RE_C_HWIM | RE_C_HWCSUM | RE_C_JUMBO | RE_C_PHYPMGT }, + RE_C_HWIM | RE_C_HWCSUM | RE_C_JUMBO | RE_C_PHYPMGT | RE_C_AUTOPAD }, { RE_HWREV_8168B3, RE_MACVER_23, - RE_C_HWIM | RE_C_HWCSUM | RE_C_JUMBO | RE_C_PHYPMGT }, + RE_C_HWIM | RE_C_HWCSUM | RE_C_JUMBO | RE_C_PHYPMGT | RE_C_AUTOPAD }, { RE_HWREV_8168C, RE_MACVER_29, - RE_C_HWIM | RE_C_JUMBO | RE_C_MAC2 | RE_C_PHYPMGT }, + RE_C_HWIM | RE_C_HWCSUM | RE_C_JUMBO | RE_C_MAC2 | RE_C_PHYPMGT | + RE_C_AUTOPAD }, { RE_HWREV_8168CP, RE_MACVER_2B, - RE_C_HWIM | RE_C_JUMBO | RE_C_MAC2 | RE_C_PHYPMGT }, + RE_C_HWIM | RE_C_HWCSUM | RE_C_JUMBO | RE_C_MAC2 | RE_C_PHYPMGT | + RE_C_AUTOPAD }, { RE_HWREV_8168D, RE_MACVER_2A, - RE_C_HWIM | RE_C_JUMBO | RE_C_MAC2 | RE_C_PHYPMGT }, + RE_C_HWIM | RE_C_HWCSUM | RE_C_JUMBO | RE_C_MAC2 | RE_C_PHYPMGT | + RE_C_AUTOPAD }, { RE_HWREV_8100E, RE_MACVER_UNKN, RE_C_HWCSUM }, @@ -245,10 +248,10 @@ static const struct re_hwrev re_hwrevs[] = { RE_C_HWCSUM }, { RE_HWREV_8102E, RE_MACVER_15, - RE_C_MAC2 }, + RE_C_HWCSUM | RE_C_MAC2 | RE_C_AUTOPAD }, { RE_HWREV_8102EL, RE_MACVER_15, - RE_C_MAC2 }, + RE_C_HWCSUM | RE_C_MAC2 | RE_C_AUTOPAD }, { RE_HWREV_NULL, 0, 0 } }; @@ -1806,6 +1809,29 @@ re_rx_list_init(struct re_softc *sc) return(0); } +#define RE_IP4_PACKET 0x1 +#define RE_TCP_PACKET 0x2 +#define RE_UDP_PACKET 0x4 + +static __inline uint8_t +re_packet_type(struct re_softc *sc, uint32_t rxstat, uint32_t rxctrl) +{ + uint8_t packet_type = 0; + + if (sc->re_caps & RE_C_MAC2) { + if (rxctrl & RE_RDESC_CTL_PROTOIP4) + packet_type |= RE_IP4_PACKET; + } else { + if (rxstat & RE_RDESC_STAT_PROTOID) + packet_type |= RE_IP4_PACKET; + } + if (RE_TCPPKT(rxstat)) + packet_type |= RE_TCP_PACKET; + else if (RE_UDPPKT(rxstat)) + packet_type |= RE_UDP_PACKET; + return packet_type; +} + /* * RX handler for C+ and 8169. For the gigE chips, we support * the reception of jumbo frames that have been fragmented @@ -1817,7 +1843,7 @@ re_rxeof(struct re_softc *sc) struct ifnet *ifp = &sc->arpcom.ac_if; struct mbuf *m; struct re_desc *cur_rx; - uint32_t rxstat, rxvlan; + uint32_t rxstat, rxctrl; int i, total_len, rx = 0; struct mbuf_chain chain[MAXCPU]; @@ -1834,7 +1860,7 @@ re_rxeof(struct re_softc *sc) m = sc->re_ldata.re_rx_mbuf[i]; total_len = RE_RXBYTES(cur_rx); rxstat = le32toh(cur_rx->re_cmdstat); - rxvlan = le32toh(cur_rx->re_vlanctl); + rxctrl = le32toh(cur_rx->re_control); rx = 1; @@ -1942,17 +1968,22 @@ re_rxeof(struct re_softc *sc) /* Do RX checksumming if enabled */ if (ifp->if_capenable & IFCAP_RXCSUM) { + uint8_t packet_type; + + packet_type = re_packet_type(sc, rxstat, rxctrl); + /* Check IP header checksum */ - if (rxstat & RE_RDESC_STAT_PROTOID) + if (packet_type & RE_IP4_PACKET) { m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; - if ((rxstat & RE_RDESC_STAT_IPSUMBAD) == 0) - m->m_pkthdr.csum_flags |= CSUM_IP_VALID; + if ((rxstat & RE_RDESC_STAT_IPSUMBAD) == 0) + m->m_pkthdr.csum_flags |= CSUM_IP_VALID; + } /* Check TCP/UDP checksum */ - if ((RE_TCPPKT(rxstat) && - (rxstat & RE_RDESC_STAT_TCPSUMBAD) == 0) || - (RE_UDPPKT(rxstat) && - (rxstat & RE_RDESC_STAT_UDPSUMBAD)) == 0) { + if (((packet_type & RE_TCP_PACKET) && + (rxstat & RE_RDESC_STAT_TCPSUMBAD) == 0) || + ((packet_type & RE_UDP_PACKET) && + (rxstat & RE_RDESC_STAT_UDPSUMBAD) == 0)) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID|CSUM_PSEUDO_HDR| CSUM_FRAG_NOT_CHECKED; @@ -1960,10 +1991,10 @@ re_rxeof(struct re_softc *sc) } } - if (rxvlan & RE_RDESC_VLANCTL_TAG) { + if (rxctrl & RE_RDESC_CTL_HASTAG) { m->m_flags |= M_VLANTAG; m->m_pkthdr.ether_vlantag = - be16toh((rxvlan & RE_RDESC_VLANCTL_DATA)); + be16toh((rxctrl & RE_RDESC_CTL_TAGDATA)); } ether_input_chain(ifp, m, chain); } @@ -1980,6 +2011,10 @@ re_rxeof(struct re_softc *sc) return rx; } +#undef RE_IP4_PACKET +#undef RE_TCP_PACKET +#undef RE_UDP_PACKET + static int re_txeof(struct re_softc *sc) { @@ -2199,7 +2234,7 @@ re_encap(struct re_softc *sc, struct mbuf **m_head, int *idx0) bus_dmamap_t map; int error, maxsegs, idx, i; struct re_desc *d, *tx_ring; - uint32_t csum_flags; + uint32_t cmd_csum, ctl_csum; KASSERT(sc->re_ldata.re_tx_free > RE_TXDESC_SPARE, ("not enough free TX desc\n")); @@ -2213,13 +2248,25 @@ re_encap(struct re_softc *sc, struct mbuf **m_head, int *idx0) * attempt. (This is according to testing done with an 8169 * chip. I'm not sure if this is a requirement or a bug.) */ - csum_flags = 0; - if (m->m_pkthdr.csum_flags & CSUM_IP) - csum_flags |= RE_TDESC_CMD_IPCSUM; - if (m->m_pkthdr.csum_flags & CSUM_TCP) - csum_flags |= RE_TDESC_CMD_TCPCSUM; - if (m->m_pkthdr.csum_flags & CSUM_UDP) - csum_flags |= RE_TDESC_CMD_UDPCSUM; + cmd_csum = ctl_csum = 0; + if (m->m_pkthdr.csum_flags & CSUM_IP) { + cmd_csum |= RE_TDESC_CMD_IPCSUM; + ctl_csum |= RE_TDESC_CTL_IPCSUM; + } + if (m->m_pkthdr.csum_flags & CSUM_TCP) { + cmd_csum |= RE_TDESC_CMD_TCPCSUM; + ctl_csum |= RE_TDESC_CTL_TCPCSUM; + } + if (m->m_pkthdr.csum_flags & CSUM_UDP) { + cmd_csum |= RE_TDESC_CMD_UDPCSUM; + ctl_csum |= RE_TDESC_CTL_UDPCSUM; + } + + /* For MAC2 chips, csum flags are set on re_control */ + if (sc->re_caps & RE_C_MAC2) + cmd_csum = 0; + else + ctl_csum = 0; if (m->m_pkthdr.len > sc->re_swcsum_lim && (m->m_pkthdr.csum_flags & (CSUM_DELAY_IP | CSUM_DELAY_DATA))) { @@ -2270,30 +2317,34 @@ re_encap(struct re_softc *sc, struct mbuf **m_head, int *idx0) m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IP; } *m_head = m; /* 'm' may be changed by above two m_pullup() */ + + /* Clear hardware CSUM flags */ + cmd_csum = ctl_csum = 0; } - /* - * With some of the RealTek chips, using the checksum offload - * support in conjunction with the autopadding feature results - * in the transmission of corrupt frames. For example, if we - * need to send a really small IP fragment that's less than 60 - * bytes in size, and IP header checksumming is enabled, the - * resulting ethernet frame that appears on the wire will - * have garbled payload. To work around this, if TX checksum - * offload is enabled, we always manually pad short frames out - * to the minimum ethernet frame size. We do this by pretending - * the mbuf chain has too many fragments so the coalescing code - * below can assemble the packet into a single buffer that's - * padded out to the mininum frame size. - * - * Note: this appears unnecessary for TCP, and doing it for TCP - * with PCIe adapters seems to result in bad checksums. - */ - if (csum_flags && !(csum_flags & RE_TDESC_CMD_TCPCSUM) && - m->m_pkthdr.len < RE_MIN_FRAMELEN) { - error = re_pad_frame(m); - if (error) - goto back; + if ((sc->re_caps & RE_C_AUTOPAD) == 0) { + /* + * With some of the RealTek chips, using the checksum offload + * support in conjunction with the autopadding feature results + * in the transmission of corrupt frames. For example, if we + * need to send a really small IP fragment that's less than 60 + * bytes in size, and IP header checksumming is enabled, the + * resulting ethernet frame that appears on the wire will + * have garbled payload. To work around this, if TX checksum + * offload is enabled, we always manually pad short frames out + * to the minimum ethernet frame size. + * + * Note: this appears unnecessary for TCP, and doing it for TCP + * with PCIe adapters seems to result in bad checksums. + */ + if ((m->m_pkthdr.csum_flags & + (CSUM_DELAY_IP | CSUM_DELAY_DATA)) && + (m->m_pkthdr.csum_flags & CSUM_TCP) == 0 && + m->m_pkthdr.len < RE_MIN_FRAMELEN) { + error = re_pad_frame(m); + if (error) + goto back; + } } maxsegs = sc->re_ldata.re_tx_free; @@ -2367,7 +2418,8 @@ re_encap(struct re_softc *sc, struct mbuf **m_head, int *idx0) cmdstat |= RE_TDESC_CMD_OWN; if (idx == (sc->re_tx_desc_cnt - 1)) cmdstat |= RE_TDESC_CMD_EOR; - d->re_cmdstat = htole32(cmdstat | csum_flags); + d->re_cmdstat = htole32(cmdstat | cmd_csum); + d->re_control = htole32(ctl_csum); i++; if (i == arg.re_nsegs) @@ -2382,9 +2434,9 @@ re_encap(struct re_softc *sc, struct mbuf **m_head, int *idx0) * transmission attempt. */ if (m->m_flags & M_VLANTAG) { - tx_ring[*idx0].re_vlanctl = + tx_ring[*idx0].re_control |= htole32(htobe16(m->m_pkthdr.ether_vlantag) | - RE_TDESC_VLANCTL_TAG); + RE_TDESC_CTL_INSTAG); } /* Transfer ownership of packet to the chip. */ diff --git a/sys/dev/netif/re/if_rereg.h b/sys/dev/netif/re/if_rereg.h index 9fd571b2e9..25fcb38025 100644 --- a/sys/dev/netif/re/if_rereg.h +++ b/sys/dev/netif/re/if_rereg.h @@ -33,7 +33,7 @@ * THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD: src/sys/pci/if_rlreg.h,v 1.42 2004/05/24 19:39:23 jhb Exp $ - * $DragonFly: src/sys/dev/netif/re/if_rereg.h,v 1.22 2008/10/14 15:11:38 sephe Exp $ + * $DragonFly: src/sys/dev/netif/re/if_rereg.h,v 1.23 2008/10/16 12:29:13 sephe Exp $ */ /* @@ -477,7 +477,7 @@ struct re_mii_frame { struct re_desc { uint32_t re_cmdstat; - uint32_t re_vlanctl; + uint32_t re_control; uint32_t re_bufaddr_lo; uint32_t re_bufaddr_hi; }; @@ -493,8 +493,11 @@ struct re_desc { #define RE_TDESC_CMD_EOR 0x40000000 /* end of ring marker */ #define RE_TDESC_CMD_OWN 0x80000000 /* chip owns descriptor */ -#define RE_TDESC_VLANCTL_TAG 0x00020000 /* Insert VLAN tag */ -#define RE_TDESC_VLANCTL_DATA 0x0000FFFF /* TAG data */ +#define RE_TDESC_CTL_INSTAG 0x00020000 /* Insert VLAN tag */ +#define RE_TDESC_CTL_TAGDATA 0x0000FFFF /* TAG data */ +#define RE_TDESC_CTL_IPCSUM 0x20000000 /* IP header csum, MAC2 only */ +#define RE_TDESC_CTL_TCPCSUM 0x60000000 /* TCP csum, MAC2 only */ +#define RE_TDESC_CTL_UDPCSUM 0xa0000000 /* UDP csum, MAC2 only */ /* * Error bits are valid only on the last descriptor of a frame @@ -538,9 +541,11 @@ struct re_desc { #define RE_RDESC_STAT_FRAGLEN 0x00001FFF /* RX'ed frame/frag len */ #define RE_RDESC_STAT_GFRAGLEN 0x00003FFF /* RX'ed frame/frag len */ -#define RE_RDESC_VLANCTL_TAG 0x00010000 /* VLAN tag available - (re_vlandata valid)*/ -#define RE_RDESC_VLANCTL_DATA 0x0000FFFF /* TAG data */ +#define RE_RDESC_CTL_HASTAG 0x00010000 /* VLAN tag available + (TAG data valid) */ +#define RE_RDESC_CTL_TAGDATA 0x0000FFFF /* TAG data */ +#define RE_RDESC_CTL_PROTOIP4 0x40000000 /* IPv4 packet, MAC2 only */ +#define RE_RDESC_CTL_PROTOIP6 0x80000000 /* IPv6 packet, MAC2 only */ #define RE_PROTOID_NONIP 0x00000000 #define RE_PROTOID_TCPIP 0x00010000 diff --git a/sys/dev/netif/re/if_revar.h b/sys/dev/netif/re/if_revar.h index ac87afb6c3..c6568945c1 100644 --- a/sys/dev/netif/re/if_revar.h +++ b/sys/dev/netif/re/if_revar.h @@ -33,7 +33,7 @@ * THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD: src/sys/pci/if_rlreg.h,v 1.42 2004/05/24 19:39:23 jhb Exp $ - * $DragonFly: src/sys/dev/netif/re/if_revar.h,v 1.24 2008/10/14 15:11:38 sephe Exp $ + * $DragonFly: src/sys/dev/netif/re/if_revar.h,v 1.25 2008/10/16 12:29:13 sephe Exp $ */ #define RE_RX_DESC_CNT_DEF 256 @@ -188,6 +188,7 @@ struct re_softc { #define RE_C_MAC2 0x40 /* MAC style 2 */ #define RE_C_PHYPMGT 0x80 /* PHY supports power mgmt */ #define RE_C_8169 0x100 /* is 8110/8169 */ +#define RE_C_AUTOPAD 0x200 /* hardware auto-pad short frames */ #define RE_IS_8139CP(sc) ((sc)->re_caps & RE_C_8139CP) -- 2.41.0 From 39acdb23e5d1675a8dce46afd53f3121e516ff3c Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Thu, 16 Oct 2008 12:46:40 +0000 Subject: [PATCH 02/16] According to wpaul's comment, 8139C+ only support 64 TX/RX descriptors --- sys/dev/netif/re/if_re.c | 19 ++++++++++++------- sys/dev/netif/re/if_revar.h | 5 ++++- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/sys/dev/netif/re/if_re.c b/sys/dev/netif/re/if_re.c index b47e6a4638..1c889a6daf 100644 --- a/sys/dev/netif/re/if_re.c +++ b/sys/dev/netif/re/if_re.c @@ -33,7 +33,7 @@ * THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD: src/sys/dev/re/if_re.c,v 1.25 2004/06/09 14:34:01 naddy Exp $ - * $DragonFly: src/sys/dev/netif/re/if_re.c,v 1.83 2008/10/16 12:29:13 sephe Exp $ + * $DragonFly: src/sys/dev/netif/re/if_re.c,v 1.84 2008/10/16 12:46:40 sephe Exp $ */ /* @@ -1312,13 +1312,18 @@ re_attach(device_t dev) sc->re_dev = dev; #endif - sc->re_rx_desc_cnt = re_rx_desc_count; - if (sc->re_rx_desc_cnt > RE_RX_DESC_CNT_MAX) - sc->re_rx_desc_cnt = RE_RX_DESC_CNT_MAX; + if (RE_IS_8139CP(sc)) { + sc->re_rx_desc_cnt = RE_RX_DESC_CNT_8139CP; + sc->re_tx_desc_cnt = RE_TX_DESC_CNT_8139CP; + } else { + sc->re_rx_desc_cnt = re_rx_desc_count; + if (sc->re_rx_desc_cnt > RE_RX_DESC_CNT_MAX) + sc->re_rx_desc_cnt = RE_RX_DESC_CNT_MAX; - sc->re_tx_desc_cnt = re_tx_desc_count; - if (sc->re_tx_desc_cnt > RE_TX_DESC_CNT_MAX) - sc->re_tx_desc_cnt = RE_TX_DESC_CNT_MAX; + sc->re_tx_desc_cnt = re_tx_desc_count; + if (sc->re_tx_desc_cnt > RE_TX_DESC_CNT_MAX) + sc->re_tx_desc_cnt = RE_TX_DESC_CNT_MAX; + } qlen = RE_IFQ_MAXLEN; if (sc->re_tx_desc_cnt > qlen) diff --git a/sys/dev/netif/re/if_revar.h b/sys/dev/netif/re/if_revar.h index c6568945c1..e03ca7e94a 100644 --- a/sys/dev/netif/re/if_revar.h +++ b/sys/dev/netif/re/if_revar.h @@ -33,9 +33,12 @@ * THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD: src/sys/pci/if_rlreg.h,v 1.42 2004/05/24 19:39:23 jhb Exp $ - * $DragonFly: src/sys/dev/netif/re/if_revar.h,v 1.25 2008/10/16 12:29:13 sephe Exp $ + * $DragonFly: src/sys/dev/netif/re/if_revar.h,v 1.26 2008/10/16 12:46:40 sephe Exp $ */ +#define RE_RX_DESC_CNT_8139CP 64 +#define RE_TX_DESC_CNT_8139CP 64 + #define RE_RX_DESC_CNT_DEF 256 #define RE_TX_DESC_CNT_DEF 256 #define RE_RX_DESC_CNT_MAX 1024 -- 2.41.0 From 7c00fe94d692aa0e27894b8c18474506b4a679d5 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Thu, 16 Oct 2008 14:58:50 +0000 Subject: [PATCH 03/16] Adjust max read request size according to MTU; 512 seems to be the only value that works with jumbo frames without "watchdog timeout" during UDP_STREAM netperf tests. --- sys/dev/netif/re/if_re.c | 74 +++++++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 28 deletions(-) diff --git a/sys/dev/netif/re/if_re.c b/sys/dev/netif/re/if_re.c index 1c889a6daf..b9e3cdf214 100644 --- a/sys/dev/netif/re/if_re.c +++ b/sys/dev/netif/re/if_re.c @@ -33,7 +33,7 @@ * THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD: src/sys/dev/re/if_re.c,v 1.25 2004/06/09 14:34:01 naddy Exp $ - * $DragonFly: src/sys/dev/netif/re/if_re.c,v 1.84 2008/10/16 12:46:40 sephe Exp $ + * $DragonFly: src/sys/dev/netif/re/if_re.c,v 1.85 2008/10/16 14:58:50 sephe Exp $ */ /* @@ -184,7 +184,7 @@ static const struct re_type { "RealTek 8169SC/8110SC Single-chip Gigabit Ethernet" }, { PCI_VENDOR_COREGA, PCI_PRODUCT_COREGA_CG_LAPCIGT, - "Corega CG-LAPCIGT Gigabit Ethernet" }, + "Corega CG-LAPCIGT Gigabit Ethernet" }, { PCI_VENDOR_LINKSYS, PCI_PRODUCT_LINKSYS_EG1032, "Linksys EG1032 Gigabit Ethernet" }, @@ -304,6 +304,7 @@ static void re_setmulti(struct re_softc *); static void re_reset(struct re_softc *); static void re_get_eaddr(struct re_softc *, uint8_t *); static int re_pad_frame(struct mbuf *); +static void re_set_max_readrq(struct re_softc *, uint16_t); static void re_setup_hw_im(struct re_softc *); static void re_setup_sim_im(struct re_softc *); @@ -1305,12 +1306,9 @@ re_attach(device_t dev) struct ifnet *ifp; uint8_t eaddr[ETHER_ADDR_LEN]; int error = 0, rid, qlen; - uint8_t expr_ptr; callout_init(&sc->re_timer); -#ifdef RE_DIAG sc->re_dev = dev; -#endif if (RE_IS_8139CP(sc)) { sc->re_rx_desc_cnt = RE_RX_DESC_CNT_8139CP; @@ -1440,28 +1438,7 @@ re_attach(device_t dev) /* Reset the adapter. */ re_reset(sc); - expr_ptr = pci_get_pciecap_ptr(dev); - if (expr_ptr != 0) { - uint16_t val; - - /* - * We will set TX DMA burst to "unlimited" in - * re_init(), so push "max read request size" - * to the limit. - */ - val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2); - if ((val & PCIEM_DEVCTL_MAX_READRQ_MASK) != - PCIEM_DEVCTL_MAX_READRQ_4096) { - device_printf(dev, "adjust device control " - "0x%04x ", val); - - val &= ~PCIEM_DEVCTL_MAX_READRQ_MASK; - val |= PCIEM_DEVCTL_MAX_READRQ_4096; - pci_write_config(dev, expr_ptr + PCIER_DEVCTRL, - val, 2); - - kprintf("-> 0x%04x\n", val); - } + if (pci_get_pciecap_ptr(dev) != 0) { sc->re_caps |= RE_C_PCIE; /* Reduce the simulated interrupt moderation timer a bit */ @@ -1536,7 +1513,7 @@ re_attach(device_t dev) * 8168C's PCI Express device control is located at 0x78, * so the reading from 0x79 (higher part of 0x78) and setting * the 4~6bits intend to enlarge the "max read request size" - * (we have done it). The content of the rest part of this + * (we will do it). The content of the rest part of this * register is not meaningful to other PCI registers, so * writing the value to 0x54 could be completely wrong. * 0x80 is the lower part of PCI Express device status, non- @@ -2559,6 +2536,22 @@ re_init(void *xsc) */ re_stop(sc); + /* + * Adjust max read request size according to MTU. + * Mainly to improve TX performance for common case (ETHERMTU). + */ + if (sc->re_caps & RE_C_PCIE) { + if (ifp->if_mtu > ETHERMTU) { + /* + * 512 seems to be the only value that works + * reliably with jumbo frame + */ + re_set_max_readrq(sc, PCIEM_DEVCTL_MAX_READRQ_512); + } else { + re_set_max_readrq(sc, PCIEM_DEVCTL_MAX_READRQ_4096); + } + } + /* * Enable C+ RX and TX mode, as well as VLAN stripping and * RX checksum offload. We must configure the C+ register @@ -3299,3 +3292,28 @@ re_get_eaddr(struct re_softc *sc, uint8_t *eaddr) for (i = 0; i < ETHER_ADDR_LEN; ++i) eaddr[i] = CSR_READ_1(sc, RE_IDR0 + i); } + +static void +re_set_max_readrq(struct re_softc *sc, uint16_t size) +{ + device_t dev = sc->re_dev; + uint8_t expr_ptr; + uint16_t val, rqsize; + + rqsize = size & PCIEM_DEVCTL_MAX_READRQ_MASK; + + expr_ptr = pci_get_pciecap_ptr(dev); + KKASSERT(expr_ptr != 0); + + val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2); + if ((val & PCIEM_DEVCTL_MAX_READRQ_MASK) != rqsize) { + device_printf(dev, "adjust device control " + "0x%04x ", val); + + val &= ~PCIEM_DEVCTL_MAX_READRQ_MASK; + val |= rqsize; + pci_write_config(dev, expr_ptr + PCIER_DEVCTRL, val, 2); + + kprintf("-> 0x%04x\n", val); + } +} -- 2.41.0 From 1e3b54fcb93e2db160a5da345060dca68464a9c5 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Thu, 16 Oct 2008 17:23:20 +0000 Subject: [PATCH 04/16] Add a new utility called 'monitor' which uses kqueue to monitor a list of files and directories specified on the command line. --- usr.bin/Makefile | 3 +- usr.bin/monitor/Makefile | 6 ++ usr.bin/monitor/monitor.1 | 69 ++++++++++++ usr.bin/monitor/monitor.c | 217 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 294 insertions(+), 1 deletion(-) create mode 100644 usr.bin/monitor/Makefile create mode 100644 usr.bin/monitor/monitor.1 create mode 100644 usr.bin/monitor/monitor.c diff --git a/usr.bin/Makefile b/usr.bin/Makefile index 34cec791fc..002fbd6ef7 100644 --- a/usr.bin/Makefile +++ b/usr.bin/Makefile @@ -1,6 +1,6 @@ # From: @(#)Makefile 8.3 (Berkeley) 1/7/94 # $FreeBSD: src/usr.bin/Makefile,v 1.144.2.17 2003/01/04 17:17:07 obrien Exp $ -# $DragonFly: src/usr.bin/Makefile,v 1.45 2008/07/12 14:57:33 pavalos Exp $ +# $DragonFly: src/usr.bin/Makefile,v 1.46 2008/10/16 17:23:18 dillon Exp $ # XXX MISSING: deroff diction graph learn plot # spell spline struct xsend @@ -118,6 +118,7 @@ SUBDIR= alias \ mklocale \ mkstr \ mktemp \ + monitor \ msgs \ mt \ ncal \ diff --git a/usr.bin/monitor/Makefile b/usr.bin/monitor/Makefile new file mode 100644 index 0000000000..6bb73d88f0 --- /dev/null +++ b/usr.bin/monitor/Makefile @@ -0,0 +1,6 @@ +# +# $DragonFly: src/usr.bin/monitor/Makefile,v 1.1 2008/10/16 17:23:20 dillon Exp $ + +PROG= monitor + +.include diff --git a/usr.bin/monitor/monitor.1 b/usr.bin/monitor/monitor.1 new file mode 100644 index 0000000000..f5d1498e98 --- /dev/null +++ b/usr.bin/monitor/monitor.1 @@ -0,0 +1,69 @@ +.\" Copyright (c) 2008 The DragonFly Project. All rights reserved. +.\" +.\" This code is derived from software contributed to The DragonFly Project +.\" by Matthew Dillon +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in +.\" the documentation and/or other materials provided with the +.\" distribution. +.\" 3. Neither the name of The DragonFly Project nor the names of its +.\" contributors may be used to endorse or promote products derived +.\" from this software without specific, prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +.\" LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +.\" FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +.\" COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +.\" INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, +.\" BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +.\" LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +.\" AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +.\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +.\" OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $DragonFly: src/usr.bin/monitor/monitor.1,v 1.1 2008/10/16 17:23:20 dillon Exp $ +.Dd October 16, 2008 +.Dt MONITOR 1 +.Os +.Sh NAME +.Nm monitor +.Nd File and directory monitoring utility +.Sh SYNOPSIS +.Nm +.Op Fl qvx +.Ar files +.Sh DESCRIPTION +The +.Nm +utility monitors one or more files or directories and outputs the +appropriate file name to stdout whenever a change occurs. +Supported options are as follows: +.Bl -tag -width indent +.It Fl q +Be more quiet. +Specifying this option will cause only the file paths to be output. +.It Fl v +Be more verbose. +.It Fl x +Exit after first hit. +.Nm +will exit after reporting the first event. +.El +.\".Sh EXAMPLES +.\".Sh SEE ALSO +.Sh HISTORY +The +.Nm +utility first appeared in +.Dx 2.1 . +.Sh AUTHORS +.An Matthew Dillon Aq dillon@backplane.com diff --git a/usr.bin/monitor/monitor.c b/usr.bin/monitor/monitor.c new file mode 100644 index 0000000000..18b5e3ab5b --- /dev/null +++ b/usr.bin/monitor/monitor.c @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2008 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/usr.bin/monitor/monitor.c,v 1.1 2008/10/16 17:23:20 dillon Exp $ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef struct monitor_elm { + const char *path; + int fd; +} *monitor_elm_t; + +static void usage(int exit_code); +static void monitor_add(const char *path); +static void monitor_events(void); + +static int VerboseOpt; +static int QuietOpt; +static int ExitOpt; +static int KQueueFd; +static int NumFiles; +static int MaxFiles; +static monitor_elm_t *Elms; + +int +main(int ac, char **av) +{ + int ch; + int i; + + while ((ch = getopt(ac, av, "qvx")) != -1) { + switch(ch) { + case 'q': + if (VerboseOpt > 0) + --VerboseOpt; + else + ++QuietOpt; + break; + case 'v': + if (QuietOpt > 0) + --QuietOpt; + else + ++VerboseOpt; + break; + case 'x': + ExitOpt = 1; + break; + default: + usage(1); + /* not reached */ + } + } + ac -= optind; + av += optind; + + if (ac < 1) { + usage(1); + /* not reached */ + } + + if ((KQueueFd = kqueue()) < 0) { + perror("kqueue"); + exit(1); + } + NumFiles = MaxFiles = 16; + Elms = calloc(MaxFiles, sizeof(monitor_elm_t)); + + for (i = 0; i < ac; ++i) { + monitor_add(av[i]); + } + fflush(stdout); + do { + monitor_events(); + fflush(stdout); + } while (ExitOpt == 0); + exit(0); +} + +static +void +monitor_add(const char *path) +{ + monitor_elm_t elm; + struct kevent kev; + int n; + + elm = malloc(sizeof(*elm)); + bzero(elm, sizeof(*elm)); + elm->path = path; + elm->fd = open(path, O_RDONLY); + if (elm->fd < 0) { + printf("%s\tnot found\n", path); + return; + } + EV_SET(&kev, elm->fd, EVFILT_VNODE, EV_ADD|EV_ENABLE|EV_CLEAR, + NOTE_DELETE|NOTE_WRITE|NOTE_EXTEND|NOTE_ATTRIB| + NOTE_LINK|NOTE_RENAME|NOTE_REVOKE, + 0, NULL); + n = kevent(KQueueFd, &kev, 1, NULL, 0, NULL); + + if (elm->fd >= NumFiles) { + MaxFiles = (elm->fd + 16) * 3 / 2; + Elms = realloc(Elms, MaxFiles * sizeof(elm)); + bzero(&Elms[NumFiles], (NumFiles - MaxFiles) * sizeof(elm)); + NumFiles = MaxFiles; + } + Elms[elm->fd] = elm; +} + +static +void +monitor_events(void) +{ + struct kevent kev_array[1]; + struct kevent *kev; + monitor_elm_t elm; + struct stat st; + int bno; + int i; + int n; + + n = kevent(KQueueFd, NULL, 0, kev_array, 1, NULL); + for (i = 0; i < n; ++i) { + kev = &kev_array[i]; + elm = Elms[kev->ident]; + printf("%-23s", elm->path); + if (VerboseOpt && fstat(kev->ident, &st) == 0 && + S_ISREG(st.st_mode)) { + printf(" %10lld", st.st_size); + } + while (QuietOpt == 0 && (bno = ffs(kev->fflags)) > 0) { + printf(" "); + --bno; + kev->fflags &= ~(1 << bno); + switch(1 << bno) { + case NOTE_DELETE: + printf("delete"); + break; + case NOTE_WRITE: + printf("write"); + break; + case NOTE_EXTEND: + printf("extend"); + break; + case NOTE_ATTRIB: + printf("attrib"); + break; + case NOTE_LINK: + printf("link"); + break; + case NOTE_RENAME: + printf("rename"); + break; + case NOTE_REVOKE: + printf("revoke"); + break; + default: + printf("%08x", 1 << bno); + break; + } + } + printf("\n"); + } +} + +static +void +usage(int exit_code) +{ + fprintf(stderr, + "monitor [-vx] files...\n" + " -v Be more verbose\n" + " -x Exit after first event reported\n" + ); + exit(exit_code); +} + -- 2.41.0 From fbb84158019cd0638327560f5a88f8fcb4abfb61 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Thu, 16 Oct 2008 17:24:16 +0000 Subject: [PATCH 05/16] Add KQUEUE support to HAMMER. --- sys/vfs/hammer/hammer_vnops.c | 167 ++++++++++++++++++++++++++++++++-- 1 file changed, 160 insertions(+), 7 deletions(-) diff --git a/sys/vfs/hammer/hammer_vnops.c b/sys/vfs/hammer/hammer_vnops.c index e9d37bf39f..937bcf9549 100644 --- a/sys/vfs/hammer/hammer_vnops.c +++ b/sys/vfs/hammer/hammer_vnops.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.101 2008/10/15 22:38:37 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.102 2008/10/16 17:24:16 dillon Exp $ */ #include @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include "hammer.h" @@ -79,10 +80,12 @@ static int hammer_vop_nsymlink(struct vop_nsymlink_args *); static int hammer_vop_nwhiteout(struct vop_nwhiteout_args *); static int hammer_vop_ioctl(struct vop_ioctl_args *); static int hammer_vop_mountctl(struct vop_mountctl_args *); +static int hammer_vop_kqfilter (struct vop_kqfilter_args *); static int hammer_vop_fifoclose (struct vop_close_args *); static int hammer_vop_fiforead (struct vop_read_args *); static int hammer_vop_fifowrite (struct vop_write_args *); +static int hammer_vop_fifokqfilter (struct vop_kqfilter_args *); static int hammer_vop_specclose (struct vop_close_args *); static int hammer_vop_specread (struct vop_read_args *); @@ -121,7 +124,8 @@ struct vop_ops hammer_vnode_vops = { .vop_nsymlink = hammer_vop_nsymlink, .vop_nwhiteout = hammer_vop_nwhiteout, .vop_ioctl = hammer_vop_ioctl, - .vop_mountctl = hammer_vop_mountctl + .vop_mountctl = hammer_vop_mountctl, + .vop_kqfilter = hammer_vop_kqfilter }; struct vop_ops hammer_spec_vops = { @@ -147,9 +151,18 @@ struct vop_ops hammer_fifo_vops = { .vop_getattr = hammer_vop_getattr, .vop_inactive = hammer_vop_inactive, .vop_reclaim = hammer_vop_reclaim, - .vop_setattr = hammer_vop_setattr + .vop_setattr = hammer_vop_setattr, + .vop_kqfilter = hammer_vop_fifokqfilter }; +static __inline +void +hammer_knote(struct vnode *vp, int flags) +{ + if (flags) + KNOTE(&vp->v_pollinfo.vpi_selinfo.si_note, flags); +} + #ifdef DEBUG_TRUNCATE struct hammer_inode *HammerTruncIp; #endif @@ -303,6 +316,7 @@ hammer_vop_write(struct vop_write_args *ap) int offset; off_t base_offset; struct buf *bp; + int kflags; int error; int n; int flags; @@ -314,6 +328,7 @@ hammer_vop_write(struct vop_write_args *ap) ip = VTOI(ap->a_vp); hmp = ip->hmp; error = 0; + kflags = 0; seqcount = ap->a_ioflag >> 16; if (ip->flags & HAMMER_INODE_RO) @@ -430,6 +445,7 @@ hammer_vop_write(struct vop_write_args *ap) if (uio->uio_offset + n > ip->ino_data.size) { vnode_pager_setsize(ap->a_vp, uio->uio_offset + n); fixsize = 1; + kflags |= NOTE_EXTEND; } if (uio->uio_segflg == UIO_NOCOPY) { @@ -490,6 +506,7 @@ hammer_vop_write(struct vop_write_args *ap) } break; } + kflags |= NOTE_WRITE; hammer_stats_file_write += n; /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */ if (ip->ino_data.size < uio->uio_offset) { @@ -524,6 +541,7 @@ hammer_vop_write(struct vop_write_args *ap) } } hammer_done_transaction(&trans); + hammer_knote(ap->a_vp, kflags); return (error); } @@ -641,6 +659,7 @@ hammer_vop_ncreate(struct vop_ncreate_args *ap) cache_setunresolved(ap->a_nch); cache_setvp(ap->a_nch, *ap->a_vpp); } + hammer_knote(ap->a_dvp, NOTE_WRITE); } return (error); } @@ -1043,6 +1062,8 @@ hammer_vop_nlink(struct vop_nlink_args *ap) cache_setvp(nch, ap->a_vp); } hammer_done_transaction(&trans); + hammer_knote(ap->a_vp, NOTE_LINK); + hammer_knote(ap->a_dvp, NOTE_WRITE); return (error); } @@ -1113,6 +1134,8 @@ hammer_vop_nmkdir(struct vop_nmkdir_args *ap) } } hammer_done_transaction(&trans); + if (error == 0) + hammer_knote(ap->a_dvp, NOTE_WRITE | NOTE_LINK); return (error); } @@ -1183,6 +1206,8 @@ hammer_vop_nmknod(struct vop_nmknod_args *ap) } } hammer_done_transaction(&trans); + if (error == 0) + hammer_knote(ap->a_dvp, NOTE_WRITE); return (error); } @@ -1489,7 +1514,8 @@ hammer_vop_nremove(struct vop_nremove_args *ap) ++hammer_stats_file_iopsw; error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, ap->a_cred, 0, 0); hammer_done_transaction(&trans); - + if (error == 0) + hammer_knote(ap->a_dvp, NOTE_WRITE); return (error); } @@ -1629,8 +1655,13 @@ retry: * Cleanup and tell the kernel that the rename succeeded. */ hammer_done_cursor(&cursor); - if (error == 0) + if (error == 0) { cache_rename(ap->a_fnch, ap->a_tnch); + hammer_knote(ap->a_fdvp, NOTE_WRITE); + hammer_knote(ap->a_tdvp, NOTE_WRITE); + if (ip->vp) + hammer_knote(ip->vp, NOTE_RENAME); + } failed: hammer_done_transaction(&trans); @@ -1659,7 +1690,8 @@ hammer_vop_nrmdir(struct vop_nrmdir_args *ap) ++hammer_stats_file_iopsw; error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, ap->a_cred, 0, 1); hammer_done_transaction(&trans); - + if (error == 0) + hammer_knote(ap->a_dvp, NOTE_WRITE | NOTE_LINK); return (error); } @@ -1677,12 +1709,14 @@ hammer_vop_setattr(struct vop_setattr_args *ap) int error; int truncating; int blksize; + int kflags; int64_t aligned_size; u_int32_t flags; vap = ap->a_vap; ip = ap->a_vp->v_data; modflags = 0; + kflags = 0; if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) return(EROFS); @@ -1706,6 +1740,7 @@ hammer_vop_setattr(struct vop_setattr_args *ap) if (ip->ino_data.uflags != flags) { ip->ino_data.uflags = flags; modflags |= HAMMER_INODE_DDIRTY; + kflags |= NOTE_ATTRIB; } if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) { error = 0; @@ -1742,6 +1777,7 @@ hammer_vop_setattr(struct vop_setattr_args *ap) ip->ino_data.mode = cur_mode; } modflags |= HAMMER_INODE_DDIRTY; + kflags |= NOTE_ATTRIB; } } while (vap->va_size != VNOVAL && ip->ino_data.size != vap->va_size) { @@ -1758,9 +1794,11 @@ hammer_vop_setattr(struct vop_setattr_args *ap) if (vap->va_size < ip->ino_data.size) { vtruncbuf(ap->a_vp, vap->va_size, blksize); truncating = 1; + kflags |= NOTE_WRITE; } else { vnode_pager_setsize(ap->a_vp, vap->va_size); truncating = 0; + kflags |= NOTE_WRITE | NOTE_EXTEND; } ip->ino_data.size = vap->va_size; modflags |= HAMMER_INODE_DDIRTY; @@ -1835,6 +1873,7 @@ hammer_vop_setattr(struct vop_setattr_args *ap) hammer_ip_frontend_trunc(ip, vap->va_size); ip->ino_data.size = vap->va_size; modflags |= HAMMER_INODE_DDIRTY; + kflags |= NOTE_ATTRIB; break; default: error = EINVAL; @@ -1846,11 +1885,13 @@ hammer_vop_setattr(struct vop_setattr_args *ap) ip->ino_data.atime = hammer_timespec_to_time(&vap->va_atime); modflags |= HAMMER_INODE_ATIME; + kflags |= NOTE_ATTRIB; } if (vap->va_mtime.tv_sec != VNOVAL) { ip->ino_data.mtime = hammer_timespec_to_time(&vap->va_mtime); modflags |= HAMMER_INODE_MTIME; + kflags |= NOTE_ATTRIB; } if (vap->va_mode != (mode_t)VNOVAL) { mode_t cur_mode = ip->ino_data.mode; @@ -1862,12 +1903,14 @@ hammer_vop_setattr(struct vop_setattr_args *ap) if (error == 0 && ip->ino_data.mode != cur_mode) { ip->ino_data.mode = cur_mode; modflags |= HAMMER_INODE_DDIRTY; + kflags |= NOTE_ATTRIB; } } done: if (error == 0) hammer_modify_inode(ip, modflags); hammer_done_transaction(&trans); + hammer_knote(ap->a_vp, kflags); return (error); } @@ -1962,6 +2005,7 @@ hammer_vop_nsymlink(struct vop_nsymlink_args *ap) if (error == 0) { cache_setunresolved(ap->a_nch); cache_setvp(ap->a_nch, *ap->a_vpp); + hammer_knote(ap->a_dvp, NOTE_WRITE); } } hammer_done_transaction(&trans); @@ -2757,8 +2801,10 @@ retry: cache_setunresolved(nch); cache_setvp(nch, NULL); /* XXX locking */ - if (ip->vp) + if (ip->vp) { + hammer_knote(ip->vp, NOTE_DELETE); cache_inval_vp(ip->vp, CINV_DESTROY); + } } if (ip) hammer_rel_inode(ip, 0); @@ -2804,6 +2850,18 @@ hammer_vop_fifowrite (struct vop_write_args *ap) return (error); } +static +int +hammer_vop_fifokqfilter(struct vop_kqfilter_args *ap) +{ + int error; + + error = VOCALL(&fifo_vnode_vops, &ap->a_head); + if (error) + error = hammer_vop_kqfilter(ap); + return(error); +} + static int hammer_vop_specclose (struct vop_close_args *ap) { @@ -2825,3 +2883,98 @@ hammer_vop_specwrite (struct vop_write_args *ap) return (VOCALL(&spec_vnode_vops, &ap->a_head)); } +/************************************************************************ + * KQFILTER OPS * + ************************************************************************ + * + */ +static void filt_hammerdetach(struct knote *kn); +static int filt_hammerread(struct knote *kn, long hint); +static int filt_hammerwrite(struct knote *kn, long hint); +static int filt_hammervnode(struct knote *kn, long hint); + +static struct filterops hammerread_filtops = + { 1, NULL, filt_hammerdetach, filt_hammerread }; +static struct filterops hammerwrite_filtops = + { 1, NULL, filt_hammerdetach, filt_hammerwrite }; +static struct filterops hammervnode_filtops = + { 1, NULL, filt_hammerdetach, filt_hammervnode }; + +static +int +hammer_vop_kqfilter(struct vop_kqfilter_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct knote *kn = ap->a_kn; + lwkt_tokref ilock; + + switch (kn->kn_filter) { + case EVFILT_READ: + kn->kn_fop = &hammerread_filtops; + break; + case EVFILT_WRITE: + kn->kn_fop = &hammerwrite_filtops; + break; + case EVFILT_VNODE: + kn->kn_fop = &hammervnode_filtops; + break; + default: + return (1); + } + + kn->kn_hook = (caddr_t)vp; + + lwkt_gettoken(&ilock, &vp->v_pollinfo.vpi_token); + SLIST_INSERT_HEAD(&vp->v_pollinfo.vpi_selinfo.si_note, kn, kn_selnext); + lwkt_reltoken(&ilock); + + return(0); +} + +static void +filt_hammerdetach(struct knote *kn) +{ + struct vnode *vp = (void *)kn->kn_hook; + lwkt_tokref ilock; + + lwkt_gettoken(&ilock, &vp->v_pollinfo.vpi_token); + SLIST_REMOVE(&vp->v_pollinfo.vpi_selinfo.si_note, + kn, knote, kn_selnext); + lwkt_reltoken(&ilock); +} + +static int +filt_hammerread(struct knote *kn, long hint) +{ + struct vnode *vp = (void *)kn->kn_hook; + hammer_inode_t ip = VTOI(vp); + + if (hint == NOTE_REVOKE) { + kn->kn_flags |= (EV_EOF | EV_ONESHOT); + return(1); + } + kn->kn_data = ip->ino_data.size - kn->kn_fp->f_offset; + return (kn->kn_data != 0); +} + +static int +filt_hammerwrite(struct knote *kn, long hint) +{ + if (hint == NOTE_REVOKE) + kn->kn_flags |= (EV_EOF | EV_ONESHOT); + kn->kn_data = 0; + return (1); +} + +static int +filt_hammervnode(struct knote *kn, long hint) +{ + if (kn->kn_sfflags & hint) + kn->kn_fflags |= hint; + if (hint == NOTE_REVOKE) { + kn->kn_flags |= EV_EOF; + return (1); + } + return (kn->kn_fflags != 0); +} + -- 2.41.0 From 9acb76453e013e5736d0e265f0a4a6d146b5b240 Mon Sep 17 00:00:00 2001 From: Sascha Wildner Date: Thu, 16 Oct 2008 23:08:30 +0000 Subject: [PATCH 06/16] Change 'then' to 'than' in comparisons. Found-by: LanguageTool --- sbin/dump/dump.8 | 4 ++-- sbin/mountctl/mountctl.8 | 4 ++-- share/man/man7/firewall.7 | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sbin/dump/dump.8 b/sbin/dump/dump.8 index b5f15ff16d..f09fe8c256 100644 --- a/sbin/dump/dump.8 +++ b/sbin/dump/dump.8 @@ -32,7 +32,7 @@ .\" .\" @(#)dump.8 8.3 (Berkeley) 5/1/95 .\" $FreeBSD: src/sbin/dump/dump.8,v 1.27.2.18 2003/02/23 19:58:23 trhodes Exp $ -.\" $DragonFly: src/sbin/dump/dump.8,v 1.5 2006/04/17 18:01:37 swildner Exp $ +.\" $DragonFly: src/sbin/dump/dump.8,v 1.6 2008/10/16 23:08:30 swildner Exp $ .\" .Dd March 1, 2002 .Dt DUMP 8 @@ -153,7 +153,7 @@ at the cost of possibly not noticing changes in the filesystem between passes. Beware that .Nm -forks, and the actual memory use may be larger then the specified cache +forks, and the actual memory use may be larger than the specified cache size. The recommended cache size is between 8 and 32 (megabytes). .It Fl d Ar density Set tape density to diff --git a/sbin/mountctl/mountctl.8 b/sbin/mountctl/mountctl.8 index 0ccbe2398e..f018b97115 100644 --- a/sbin/mountctl/mountctl.8 +++ b/sbin/mountctl/mountctl.8 @@ -31,7 +31,7 @@ .\" OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.\" $DragonFly: src/sbin/mountctl/mountctl.8,v 1.14 2008/02/09 00:10:29 swildner Exp $ +.\" $DragonFly: src/sbin/mountctl/mountctl.8,v 1.15 2008/10/16 23:08:30 swildner Exp $ .\" .Dd January 8, 2005 .Dt MOUNTCTL 8 @@ -203,7 +203,7 @@ option. Specify the size of the in-kernel memory FIFO used to buffer the journaling stream between processes doing filesystem operations and the worker thread writing out the journal. Since the kernel has limited virtual memory -buffers larger then 4MB are not recommended. +buffers larger than 4MB are not recommended. .It Ar swapfifo=size[k,m,g] Specify the size of the kernel-managed swap-backed FIFO used to buffer overflows. diff --git a/share/man/man7/firewall.7 b/share/man/man7/firewall.7 index 2d935ef530..9f03bba433 100644 --- a/share/man/man7/firewall.7 +++ b/share/man/man7/firewall.7 @@ -3,7 +3,7 @@ .\" the source tree. .\" .\" $FreeBSD: src/share/man/man7/firewall.7,v 1.1.2.8 2003/04/29 07:57:22 brueffer Exp $ -.\" $DragonFly: src/share/man/man7/firewall.7,v 1.15 2008/05/02 02:05:06 swildner Exp $ +.\" $DragonFly: src/share/man/man7/firewall.7,v 1.16 2008/10/16 23:08:29 swildner Exp $ .\" .Dd May 26, 2001 .Dt FIREWALL 7 @@ -47,7 +47,7 @@ get them wrong. The most common mistake is to create an exclusive firewall rather than an inclusive firewall. An exclusive firewall allows all packets through except for those matching a set of rules. An inclusive firewall allows only packets matching the ruleset -through. Inclusive firewalls are much, much safer then exclusive +through. Inclusive firewalls are much, much safer than exclusive firewalls but a tad more difficult to build properly. The second most common mistake is to blackhole everything except the particular port you want to let through. TCP/IP needs to be able -- 2.41.0 From 902f7135017729b703241be4a622af4f96fcd573 Mon Sep 17 00:00:00 2001 From: Sascha Wildner Date: Fri, 17 Oct 2008 07:52:56 +0000 Subject: [PATCH 07/16] Use standard markup for one or more files. --- usr.bin/monitor/monitor.1 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/usr.bin/monitor/monitor.1 b/usr.bin/monitor/monitor.1 index f5d1498e98..535f69776e 100644 --- a/usr.bin/monitor/monitor.1 +++ b/usr.bin/monitor/monitor.1 @@ -30,7 +30,7 @@ .\" OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.\" $DragonFly: src/usr.bin/monitor/monitor.1,v 1.1 2008/10/16 17:23:20 dillon Exp $ +.\" $DragonFly: src/usr.bin/monitor/monitor.1,v 1.2 2008/10/17 07:52:56 swildner Exp $ .Dd October 16, 2008 .Dt MONITOR 1 .Os @@ -40,7 +40,7 @@ .Sh SYNOPSIS .Nm .Op Fl qvx -.Ar files +.Ar .Sh DESCRIPTION The .Nm -- 2.41.0 From f79ec5716224ea8dba2dda1354da51aa094152f1 Mon Sep 17 00:00:00 2001 From: Sascha Wildner Date: Fri, 17 Oct 2008 11:30:24 +0000 Subject: [PATCH 08/16] Change 'then' to 'than' in comparisons. Found-by: LanguageTool --- share/man/man4/tcp.4 | 8 ++++---- share/man/man7/security.7 | 10 +++++----- share/man/man7/tuning.7 | 8 ++++---- usr.sbin/pfctl/pf.conf.5 | 4 ++-- usr.sbin/rndcontrol/random.4 | 4 ++-- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/share/man/man4/tcp.4 b/share/man/man4/tcp.4 index cfd7875102..28fd505256 100644 --- a/share/man/man4/tcp.4 +++ b/share/man/man4/tcp.4 @@ -31,7 +31,7 @@ .\" .\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93 .\" $FreeBSD: src/share/man/man4/tcp.4,v 1.11.2.14 2002/12/29 16:35:38 schweikh Exp $ -.\" $DragonFly: src/share/man/man4/tcp.4,v 1.8 2007/09/14 23:47:53 swildner Exp $ +.\" $DragonFly: src/share/man/man4/tcp.4,v 1.9 2008/10/17 11:30:24 swildner Exp $ .\" .Dd February 14, 1995 .Dt TCP 4 @@ -340,18 +340,18 @@ effectively disables the algorithm. This puts an upper bound on the bandwidth delay product window, in bytes. This value should not generally be modified but may be used to set a global per-connection limit on queued data, potentially allowing you to -intentionally set a less then optimum limit to smooth data flow over a +intentionally set a less than optimum limit to smooth data flow over a network while still being able to specify huge internal TCP buffers. .It tcp.inflight_stab The bandwidth delay product algorithm requires a slightly larger window -then it otherwise calculates for stability. This parameter determines the +than it otherwise calculates for stability. This parameter determines the extra window in maximal packets / 10. The default value of 20 represents 2 maximal packets. Reducing this value is not recommended but you may come across a situation with very slow links where the ping time reduction of the default inflight code is not sufficient. If this case occurs you should first try reducing tcp.inflight_min and, if that does not work, reduce both tcp.inflight_min and tcp.inflight_stab, trying values of -15, 10, or 5 for the latter. Never use a value less then 5. Reducing +15, 10, or 5 for the latter. Never use a value less than 5. Reducing tcp.inflight_stab can lead to upwards of a 20% underutilization of the link as well as reducing the algorithm's ability to adapt to changing situations and should only be done as a last resort. diff --git a/share/man/man7/security.7 b/share/man/man7/security.7 index 8881df47df..5eb87d6b0a 100644 --- a/share/man/man7/security.7 +++ b/share/man/man7/security.7 @@ -3,7 +3,7 @@ .\" the source tree. .\" .\" $FreeBSD: src/share/man/man7/security.7,v 1.13.2.11 2002/04/13 02:04:44 keramida Exp $ -.\" $DragonFly: src/share/man/man7/security.7,v 1.14 2008/02/19 21:46:33 thomas Exp $ +.\" $DragonFly: src/share/man/man7/security.7,v 1.15 2008/10/17 11:30:24 swildner Exp $ .\" .Dd September 18, 1999 .Dt SECURITY 7 @@ -75,7 +75,7 @@ nearly impossible to stop short of cutting your system off from the Internet. It may not be able to take your machine down, but it can fill up Internet pipe. .Pp -A user account compromise is even more common then a D.O.S. attack. Many +A user account compromise is even more common than a D.O.S. attack. Many sysadmins still run standard telnetd, rlogind, rshd, and ftpd servers on their machines. These servers, by default, do not operate over encrypted connections. The result is that if you have any moderate-sized user base, @@ -175,7 +175,7 @@ to root without having to place anyone at all in the wheel group. This may be the better solution since the wheel mechanism still allows an intruder to break root if the intruder has gotten hold of your password file and can break into a staff account. While having the wheel mechanism -is better then having nothing at all, it isn't necessarily the safest +is better than having nothing at all, it isn't necessarily the safest option. .Pp An indirect way to secure the root account is to secure your staff accounts @@ -277,7 +277,7 @@ Still, root holes are occasionally found in these binaries. A root hole was found in Xlib in 1998 that made xterm (which is typically suid) vulnerable. -It is better to be safe then sorry and the prudent sysadmin will restrict suid +It is better to be safe than sorry and the prudent sysadmin will restrict suid binaries that only staff should run to a special group that only staff can access, and get rid of .Pq Li "chmod 000" @@ -617,7 +617,7 @@ with These routes typically timeout in 1600 seconds or so. If the kernel detects that the cached route table has gotten too big it will dynamically reduce the rtexpire but will never decrease it to -less then rtminexpire. There are two problems: (1) The kernel does not react +less than rtminexpire. There are two problems: (1) The kernel does not react quickly enough when a lightly loaded server is suddenly attacked, and (2) The rtminexpire is not low enough for the kernel to survive a sustained attack. If your servers are connected to the internet via a T3 or better it may be diff --git a/share/man/man7/tuning.7 b/share/man/man7/tuning.7 index 5126663cb2..48a732320f 100644 --- a/share/man/man7/tuning.7 +++ b/share/man/man7/tuning.7 @@ -3,7 +3,7 @@ .\" the source tree. .\" .\" $FreeBSD: src/share/man/man7/tuning.7,v 1.1.2.30 2002/12/17 19:32:08 dillon Exp $ -.\" $DragonFly: src/share/man/man7/tuning.7,v 1.20 2008/09/06 10:36:27 thomas Exp $ +.\" $DragonFly: src/share/man/man7/tuning.7,v 1.21 2008/10/17 11:30:24 swildner Exp $ .\" .Dd March 4, 2007 .Dt TUNING 7 @@ -467,7 +467,7 @@ Many people also enforce artificial bandwidth limitations in order to ensure that they are not charged for using too much bandwidth. .Pp -Setting the send or receive TCP buffer to values larger then 65535 will result +Setting the send or receive TCP buffer to values larger than 65535 will result in a marginal performance improvement unless both hosts support the window scaling extension of the TCP protocol, which is controlled by the .Va net.inet.tcp.rfc1323 @@ -546,7 +546,7 @@ This parameter defaults to 20, representing 2 maximal packets added to the bandwidth delay product window calculation. The additional window is required to stabilize the algorithm and improve responsiveness to changing conditions, but it can also result in higher ping times -over slow links (though still much lower then you would get without +over slow links (though still much lower than you would get without the inflight algorithm). In such cases you may wish to try reducing this parameter to 15, 10, or 5, and you may also have to reduce @@ -618,7 +618,7 @@ seconds) via and .Va vm.swap_idle_threshold2 allows you to depress the priority of pages associated with idle processes -more quickly then the normal pageout algorithm. +more quickly than the normal pageout algorithm. This gives a helping hand to the pageout daemon. Do not turn this option on unless you need it, diff --git a/usr.sbin/pfctl/pf.conf.5 b/usr.sbin/pfctl/pf.conf.5 index d6d21be922..8ef103db84 100644 --- a/usr.sbin/pfctl/pf.conf.5 +++ b/usr.sbin/pfctl/pf.conf.5 @@ -1,5 +1,5 @@ .\" $OpenBSD: pf.conf.5,v 1.291 2004/02/04 19:38:30 jmc Exp $ -.\" $DragonFly: src/usr.sbin/pfctl/pf.conf.5,v 1.15 2008/08/22 20:53:00 thomas Exp $ +.\" $DragonFly: src/usr.sbin/pfctl/pf.conf.5,v 1.16 2008/10/17 11:30:24 swildner Exp $ .\" .\" Copyright (c) 2002, Daniel Hartmeier .\" All rights reserved. @@ -951,7 +951,7 @@ This option is parsed but not yet supported. This option allows low bandwidth connections to burst up to the specified bandwidth by not advancing the round robin when taking packets out of the related queue. -When using this option a small value no greater then 1/20 available interface +When using this option a small value no greater than 1/20 available interface bandwidth is recommended. .El .Pp diff --git a/usr.sbin/rndcontrol/random.4 b/usr.sbin/rndcontrol/random.4 index 1cff30ea3b..139c596846 100644 --- a/usr.sbin/rndcontrol/random.4 +++ b/usr.sbin/rndcontrol/random.4 @@ -37,7 +37,7 @@ .\" OF THE POSSIBILITY OF SUCH DAMAGE. .\" .\" $FreeBSD: src/usr.sbin/rndcontrol/random.4,v 1.9.2.2 2001/11/24 16:14:18 dd Exp $ -.\" $DragonFly: src/usr.sbin/rndcontrol/random.4,v 1.12 2008/06/23 06:22:15 swildner Exp $ +.\" $DragonFly: src/usr.sbin/rndcontrol/random.4,v 1.13 2008/10/17 11:30:24 swildner Exp $ .\" .Dd October 21, 1995 .Dt RANDOM 4 i386 @@ -102,7 +102,7 @@ is good enough). .Pp Root may write entropy to .Pa /dev/random -to seed the random number generator only if the securelevel is less then +to seed the random number generator only if the securelevel is less than or equal to zero and the .Va kern.seedenable sysctl is non-zero. A certain -- 2.41.0 From a287c64d2470c5a16da871d8a745788b2e08ca21 Mon Sep 17 00:00:00 2001 From: Sascha Wildner Date: Fri, 17 Oct 2008 12:41:38 +0000 Subject: [PATCH 09/16] Fix some typos. Found-by: LanguageTool --- lib/libc/sys/getrlimit.2 | 4 ++-- share/man/man3/queue.3 | 4 ++-- share/man/man4/carp.4 | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/libc/sys/getrlimit.2 b/lib/libc/sys/getrlimit.2 index a1b8c50bf8..a946f5b2c0 100644 --- a/lib/libc/sys/getrlimit.2 +++ b/lib/libc/sys/getrlimit.2 @@ -31,7 +31,7 @@ .\" .\" @(#)getrlimit.2 8.1 (Berkeley) 6/4/93 .\" $FreeBSD: src/lib/libc/sys/getrlimit.2,v 1.10.2.5 2001/12/14 18:34:00 ru Exp $ -.\" $DragonFly: src/lib/libc/sys/getrlimit.2,v 1.4 2004/06/06 13:33:28 hmp Exp $ +.\" $DragonFly: src/lib/libc/sys/getrlimit.2,v 1.5 2008/10/17 12:41:38 swildner Exp $ .\" .Dd June 4, 1993 .Dt GETRLIMIT 2 @@ -149,7 +149,7 @@ if this signal is not caught by a handler using the signal stack, this signal will kill the process. .Pp -A file I/O operation that would create a file larger that the process' +A file I/O operation that would create a file larger than the process' soft limit will cause the write to fail and a signal .Dv SIGXFSZ to be diff --git a/share/man/man3/queue.3 b/share/man/man3/queue.3 index ee0b0f64c2..04623f4ed1 100644 --- a/share/man/man3/queue.3 +++ b/share/man/man3/queue.3 @@ -31,7 +31,7 @@ .\" .\" @(#)queue.3 8.2 (Berkeley) 1/24/94 .\" $FreeBSD: src/share/man/man3/queue.3,v 1.15.2.7 2001/12/18 10:09:02 ru Exp $ -.\" $DragonFly: src/share/man/man3/queue.3,v 1.7 2008/08/28 09:35:53 hasso Exp $ +.\" $DragonFly: src/share/man/man3/queue.3,v 1.8 2008/10/17 12:41:38 swildner Exp $ .\" .Dd August 28, 2008 .Dt QUEUE 3 @@ -673,7 +673,7 @@ evaluates to an initializer for the list .Pp The macro .Nm LIST_EMPTY -evaluates to true if their are no elements in the list. +evaluates to true if there are no elements in the list. .Pp The macro .Nm LIST_ENTRY diff --git a/share/man/man4/carp.4 b/share/man/man4/carp.4 index 75a66f9cae..6515c02dd5 100644 --- a/share/man/man4/carp.4 +++ b/share/man/man4/carp.4 @@ -24,7 +24,7 @@ .\" .\" $OpenBSD: carp.4,v 1.16 2004/12/07 23:41:35 jmc Exp $ .\" $FreeBSD: src/share/man/man4/carp.4,v 1.10 2006/06/07 10:26:51 glebius Exp $ -.\" $DragonFly: src/share/man/man4/carp.4,v 1.3 2007/08/18 18:04:59 swildner Exp $ +.\" $DragonFly: src/share/man/man4/carp.4,v 1.4 2008/10/17 12:41:38 swildner Exp $ .\" .Dd April 9, 2007 .Dt CARP 4 @@ -83,7 +83,7 @@ parameter stands for .Dq "advertisement skew" . It is measured in 1/256 of seconds. It is added to the base advertisement interval to make one host advertise -a bit slower that the other does. +a bit slower than the other does. Both .Cm advbase and -- 2.41.0 From a7d57e629e0fd7b7d9244a45e852a62657f21971 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Fri, 17 Oct 2008 14:12:23 +0000 Subject: [PATCH 10/16] Correct jumbo frame support for 8168C/CP/D. These newer chips use ancient design, which does _not_ support gathering RX. An even worse aspect of the new chips' design is that it does not compat with old ones: the buffer length field in the RX descriptor seems to be completely ignored by the hardware. This means host memory will be trashed by hardware if driver uses gathering RX. Allocate a jumbo buffer pool for these chips and configure "max RX packet size" register according to MTU. --- sys/dev/netif/re/if_re.c | 282 ++++++++++++++++++++++++++++++++++-- sys/dev/netif/re/if_revar.h | 28 +++- 2 files changed, 293 insertions(+), 17 deletions(-) diff --git a/sys/dev/netif/re/if_re.c b/sys/dev/netif/re/if_re.c index b9e3cdf214..a77e7b12a6 100644 --- a/sys/dev/netif/re/if_re.c +++ b/sys/dev/netif/re/if_re.c @@ -33,7 +33,7 @@ * THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD: src/sys/dev/re/if_re.c,v 1.25 2004/06/09 14:34:01 naddy Exp $ - * $DragonFly: src/sys/dev/netif/re/if_re.c,v 1.85 2008/10/16 14:58:50 sephe Exp $ + * $DragonFly: src/sys/dev/netif/re/if_re.c,v 1.86 2008/10/17 14:12:23 sephe Exp $ */ /* @@ -228,15 +228,15 @@ static const struct re_hwrev re_hwrevs[] = { { RE_HWREV_8168C, RE_MACVER_29, RE_C_HWIM | RE_C_HWCSUM | RE_C_JUMBO | RE_C_MAC2 | RE_C_PHYPMGT | - RE_C_AUTOPAD }, + RE_C_AUTOPAD | RE_C_CONTIGRX }, { RE_HWREV_8168CP, RE_MACVER_2B, RE_C_HWIM | RE_C_HWCSUM | RE_C_JUMBO | RE_C_MAC2 | RE_C_PHYPMGT | - RE_C_AUTOPAD }, + RE_C_AUTOPAD | RE_C_CONTIGRX }, { RE_HWREV_8168D, RE_MACVER_2A, RE_C_HWIM | RE_C_HWCSUM | RE_C_JUMBO | RE_C_MAC2 | RE_C_PHYPMGT | - RE_C_AUTOPAD }, + RE_C_AUTOPAD | RE_C_CONTIGRX }, { RE_HWREV_8100E, RE_MACVER_UNKN, RE_C_HWCSUM }, @@ -270,7 +270,8 @@ static int re_allocmem(device_t); static void re_freemem(device_t); static void re_freebufmem(struct re_softc *, int, int); static int re_encap(struct re_softc *, struct mbuf **, int *); -static int re_newbuf(struct re_softc *, int, int); +static int re_newbuf_std(struct re_softc *, int, int); +static int re_newbuf_jumbo(struct re_softc *, int, int); static void re_setup_rxdesc(struct re_softc *, int); static int re_rx_list_init(struct re_softc *); static int re_tx_list_init(struct re_softc *); @@ -319,6 +320,12 @@ static int re_sysctl_txtime(SYSCTL_HANDLER_ARGS); static int re_sysctl_simtime(SYSCTL_HANDLER_ARGS); static int re_sysctl_imtype(SYSCTL_HANDLER_ARGS); +static int re_jpool_alloc(struct re_softc *); +static void re_jpool_free(struct re_softc *); +static struct re_jbuf *re_jbuf_alloc(struct re_softc *); +static void re_jbuf_free(void *); +static void re_jbuf_ref(void *); + #ifdef RE_DIAG static int re_diag(struct re_softc *); #endif @@ -1217,6 +1224,15 @@ re_allocmem(device_t dev) return(error); } } + + /* Create jumbo buffer pool for RX if required */ + if (sc->re_caps & RE_C_CONTIGRX) { + error = re_jpool_alloc(sc); + if (error) { + re_jpool_free(sc); + sc->re_maxmtu = ETHERMTU; + } + } return(0); } @@ -1280,6 +1296,9 @@ re_freemem(device_t dev) bus_dma_tag_destroy(sc->re_ldata.re_stag); } + if (sc->re_caps & RE_C_CONTIGRX) + re_jpool_free(sc); + if (sc->re_parent_tag) bus_dma_tag_destroy(sc->re_parent_tag); @@ -1327,6 +1346,9 @@ re_attach(device_t dev) if (sc->re_tx_desc_cnt > qlen) qlen = sc->re_tx_desc_cnt; + sc->re_rxbuf_size = MCLBYTES; + sc->re_newbuf = re_newbuf_std; + sc->re_tx_time = 5; /* 125us */ sc->re_rx_time = 2; /* 50us */ sc->re_sim_time = 125; /* 125us */ @@ -1678,14 +1700,14 @@ re_setup_rxdesc(struct re_softc *sc, int idx) d->re_bufaddr_lo = htole32(RE_ADDR_LO(paddr)); d->re_bufaddr_hi = htole32(RE_ADDR_HI(paddr)); - cmdstat = MCLBYTES | RE_RDESC_CMD_OWN; + cmdstat = sc->re_rxbuf_size | RE_RDESC_CMD_OWN; if (idx == (sc->re_rx_desc_cnt - 1)) - cmdstat |= RE_TDESC_CMD_EOR; + cmdstat |= RE_RDESC_CMD_EOR; d->re_cmdstat = htole32(cmdstat); } static int -re_newbuf(struct re_softc *sc, int idx, int init) +re_newbuf_std(struct re_softc *sc, int idx, int init) { struct re_dmaload_arg arg; bus_dma_segment_t seg; @@ -1752,6 +1774,60 @@ back: return error; } +static int +re_newbuf_jumbo(struct re_softc *sc, int idx, int init) +{ + struct mbuf *m; + struct re_jbuf *jbuf; + int error = 0; + + MGETHDR(m, init ? MB_WAIT : MB_DONTWAIT, MT_DATA); + if (m == NULL) { + error = ENOBUFS; + if (init) { + if_printf(&sc->arpcom.ac_if, "MGETHDR failed\n"); + return error; + } else { + goto back; + } + } + + jbuf = re_jbuf_alloc(sc); + if (jbuf == NULL) { + m_freem(m); + + error = ENOBUFS; + if (init) { + if_printf(&sc->arpcom.ac_if, "jpool is empty\n"); + return error; + } else { + goto back; + } + } + + m->m_ext.ext_arg = jbuf; + m->m_ext.ext_buf = jbuf->re_buf; + m->m_ext.ext_free = re_jbuf_free; + m->m_ext.ext_ref = re_jbuf_ref; + m->m_ext.ext_size = sc->re_rxbuf_size; + + m->m_data = m->m_ext.ext_buf; + m->m_flags |= M_EXT; + m->m_len = m->m_pkthdr.len = m->m_ext.ext_size; + + /* + * NOTE: + * Some re(4) chips(e.g. RTL8101E) need address of the receive buffer + * to be 8-byte aligned, so don't call m_adj(m, ETHER_ALIGN) here. + */ + + sc->re_ldata.re_rx_mbuf[idx] = m; + sc->re_ldata.re_rx_paddr[idx] = jbuf->re_paddr; +back: + re_setup_rxdesc(sc, idx); + return error; +} + static int re_tx_list_init(struct re_softc *sc) { @@ -1776,7 +1852,7 @@ re_rx_list_init(struct re_softc *sc) bzero(sc->re_ldata.re_rx_list, RE_RX_LIST_SZ(sc)); for (i = 0; i < sc->re_rx_desc_cnt; i++) { - error = re_newbuf(sc, i, 1); + error = sc->re_newbuf(sc, i, 1); if (error) return(error); } @@ -1846,13 +1922,18 @@ re_rxeof(struct re_softc *sc) rx = 1; +#ifdef INVARIANTS + if (sc->re_flags & RE_F_USE_JPOOL) + KKASSERT(rxstat & RE_RDESC_STAT_EOF); +#endif + if ((rxstat & RE_RDESC_STAT_EOF) == 0) { if (sc->re_drop_rxfrag) { re_setup_rxdesc(sc, i); continue; } - if (re_newbuf(sc, i, 0)) { + if (sc->re_newbuf(sc, i, 0)) { /* Drop upcoming fragments */ sc->re_drop_rxfrag = 1; continue; @@ -1914,9 +1995,8 @@ re_rxeof(struct re_softc *sc) * reload the current one. */ - if (re_newbuf(sc, i, 0)) { + if (sc->re_newbuf(sc, i, 0)) { ifp->if_ierrors++; - re_free_rxchain(sc); continue; } @@ -2536,6 +2616,19 @@ re_init(void *xsc) */ re_stop(sc); + if (sc->re_caps & RE_C_CONTIGRX) { + if (ifp->if_mtu > ETHERMTU) { + KKASSERT(sc->re_ldata.re_jbuf != NULL); + sc->re_flags |= RE_F_USE_JPOOL; + sc->re_rxbuf_size = RE_JUMBO_FRAME_9K; + sc->re_newbuf = re_newbuf_jumbo; + } else { + sc->re_flags &= ~RE_F_USE_JPOOL; + sc->re_rxbuf_size = MCLBYTES; + sc->re_newbuf = re_newbuf_std; + } + } + /* * Adjust max read request size according to MTU. * Mainly to improve TX performance for common case (ETHERMTU). @@ -2690,8 +2783,12 @@ re_init(void *xsc) * For 8169 gigE NICs, set the max allowed RX packet * size so we can receive jumbo frames. */ - if (!RE_IS_8139CP(sc)) - CSR_WRITE_2(sc, RE_MAXRXPKTLEN, 16383); + if (!RE_IS_8139CP(sc)) { + if (sc->re_caps & RE_C_CONTIGRX) + CSR_WRITE_2(sc, RE_MAXRXPKTLEN, sc->re_rxbuf_size); + else + CSR_WRITE_2(sc, RE_MAXRXPKTLEN, 16383); + } if (sc->re_testmode) return; @@ -2855,8 +2952,10 @@ re_stop(struct re_softc *sc) /* Free the RX list buffers. */ for (i = 0; i < sc->re_rx_desc_cnt; i++) { if (sc->re_ldata.re_rx_mbuf[i] != NULL) { - bus_dmamap_unload(sc->re_ldata.re_mtag, - sc->re_ldata.re_rx_dmamap[i]); + if ((sc->re_flags & RE_F_USE_JPOOL) == 0) { + bus_dmamap_unload(sc->re_ldata.re_mtag, + sc->re_ldata.re_rx_dmamap[i]); + } m_freem(sc->re_ldata.re_rx_mbuf[i]); sc->re_ldata.re_rx_mbuf[i] = NULL; } @@ -3317,3 +3416,154 @@ re_set_max_readrq(struct re_softc *sc, uint16_t size) kprintf("-> 0x%04x\n", val); } } + +static int +re_jpool_alloc(struct re_softc *sc) +{ + struct re_list_data *ldata = &sc->re_ldata; + struct re_jbuf *jbuf; + bus_addr_t paddr; + bus_size_t jpool_size; + caddr_t buf; + int i, error; + + lwkt_serialize_init(&ldata->re_jbuf_serializer); + + ldata->re_jbuf = kmalloc(sizeof(struct re_jbuf) * RE_JBUF_COUNT(sc), + M_DEVBUF, M_WAITOK | M_ZERO); + + jpool_size = RE_JBUF_COUNT(sc) * RE_JBUF_SIZE; + + error = bus_dma_tag_create(sc->re_parent_tag, + RE_BUF_ALIGN, 0, /* alignment, boundary */ + BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + jpool_size, 1, /* nsegments, maxsize */ + BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ + BUS_DMA_ALLOCNOW, /* flags */ + &ldata->re_jpool_tag); + if (error) { + device_printf(sc->re_dev, "could not allocate jumbo dma tag\n"); + return error; + } + + error = bus_dmamem_alloc(ldata->re_jpool_tag, (void **)&ldata->re_jpool, + BUS_DMA_WAITOK, &ldata->re_jpool_map); + if (error) { + device_printf(sc->re_dev, + "could not allocate jumbo dma memory\n"); + bus_dma_tag_destroy(ldata->re_jpool_tag); + ldata->re_jpool_tag = NULL; + return error; + } + + error = bus_dmamap_load(ldata->re_jpool_tag, ldata->re_jpool_map, + ldata->re_jpool, jpool_size, + re_dma_map_addr, &paddr, BUS_DMA_WAITOK); + if (error) { + device_printf(sc->re_dev, "could not load jumbo dma map\n"); + bus_dmamem_free(ldata->re_jpool_tag, ldata->re_jpool, + ldata->re_jpool_map); + bus_dma_tag_destroy(ldata->re_jpool_tag); + ldata->re_jpool_tag = NULL; + return error; + } + + /* ..and split it into 9KB chunks */ + SLIST_INIT(&ldata->re_jbuf_free); + + buf = ldata->re_jpool; + for (i = 0; i < RE_JBUF_COUNT(sc); i++) { + jbuf = &ldata->re_jbuf[i]; + + jbuf->re_sc = sc; + jbuf->re_inuse = 0; + jbuf->re_slot = i; + jbuf->re_buf = buf; + jbuf->re_paddr = paddr; + + SLIST_INSERT_HEAD(&ldata->re_jbuf_free, jbuf, re_link); + + buf += RE_JBUF_SIZE; + paddr += RE_JBUF_SIZE; + } + return 0; +} + +static void +re_jpool_free(struct re_softc *sc) +{ + struct re_list_data *ldata = &sc->re_ldata; + + if (ldata->re_jpool_tag != NULL) { + bus_dmamap_unload(ldata->re_jpool_tag, ldata->re_jpool_map); + bus_dmamem_free(ldata->re_jpool_tag, ldata->re_jpool, + ldata->re_jpool_map); + bus_dma_tag_destroy(ldata->re_jpool_tag); + ldata->re_jpool_tag = NULL; + } + + if (ldata->re_jbuf != NULL) { + kfree(ldata->re_jbuf, M_DEVBUF); + ldata->re_jbuf = NULL; + } +} + +static struct re_jbuf * +re_jbuf_alloc(struct re_softc *sc) +{ + struct re_list_data *ldata = &sc->re_ldata; + struct re_jbuf *jbuf; + + lwkt_serialize_enter(&ldata->re_jbuf_serializer); + + jbuf = SLIST_FIRST(&ldata->re_jbuf_free); + if (jbuf != NULL) { + SLIST_REMOVE_HEAD(&ldata->re_jbuf_free, re_link); + jbuf->re_inuse = 1; + } + + lwkt_serialize_exit(&ldata->re_jbuf_serializer); + + return jbuf; +} + +static void +re_jbuf_free(void *arg) +{ + struct re_jbuf *jbuf = arg; + struct re_softc *sc = jbuf->re_sc; + struct re_list_data *ldata = &sc->re_ldata; + + if (&ldata->re_jbuf[jbuf->re_slot] != jbuf) { + panic("%s: free wrong jumbo buffer\n", + sc->arpcom.ac_if.if_xname); + } else if (jbuf->re_inuse == 0) { + panic("%s: jumbo buffer already freed\n", + sc->arpcom.ac_if.if_xname); + } + + lwkt_serialize_enter(&ldata->re_jbuf_serializer); + atomic_subtract_int(&jbuf->re_inuse, 1); + if (jbuf->re_inuse == 0) + SLIST_INSERT_HEAD(&ldata->re_jbuf_free, jbuf, re_link); + lwkt_serialize_exit(&ldata->re_jbuf_serializer); +} + +static void +re_jbuf_ref(void *arg) +{ + struct re_jbuf *jbuf = arg; + struct re_softc *sc = jbuf->re_sc; + struct re_list_data *ldata = &sc->re_ldata; + + if (&ldata->re_jbuf[jbuf->re_slot] != jbuf) { + panic("%s: ref wrong jumbo buffer\n", + sc->arpcom.ac_if.if_xname); + } else if (jbuf->re_inuse == 0) { + panic("%s: jumbo buffer already freed\n", + sc->arpcom.ac_if.if_xname); + } + atomic_add_int(&jbuf->re_inuse, 1); +} diff --git a/sys/dev/netif/re/if_revar.h b/sys/dev/netif/re/if_revar.h index e03ca7e94a..12e3b1761c 100644 --- a/sys/dev/netif/re/if_revar.h +++ b/sys/dev/netif/re/if_revar.h @@ -33,7 +33,7 @@ * THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD: src/sys/pci/if_rlreg.h,v 1.42 2004/05/24 19:39:23 jhb Exp $ - * $DragonFly: src/sys/dev/netif/re/if_revar.h,v 1.26 2008/10/16 12:46:40 sephe Exp $ + * $DragonFly: src/sys/dev/netif/re/if_revar.h,v 1.27 2008/10/17 14:12:23 sephe Exp $ */ #define RE_RX_DESC_CNT_8139CP 64 @@ -50,6 +50,7 @@ #define RE_IFQ_MAXLEN 512 #define RE_MAXSEGS 16 #define RE_TXDESC_SPARE 4 +#define RE_JBUF_COUNT(sc) (((sc)->re_rx_desc_cnt * 3) / 2) #define RE_RXDESC_INC(sc, x) (x = (x + 1) % (sc)->re_rx_desc_cnt) #define RE_TXDESC_INC(sc, x) (x = (x + 1) % (sc)->re_tx_desc_cnt) @@ -66,6 +67,10 @@ #define RE_FRAMELEN(mtu) (mtu + ETHER_HDR_LEN + ETHER_CRC_LEN) #define RE_SWCSUM_LIM_8169 2038 +#define RE_BUF_ALIGN 8 +#define RE_JUMBO_FRAME_9K 9022 +#define RE_JBUF_SIZE roundup2(RE_JUMBO_FRAME_9K, RE_BUF_ALIGN) + #define RE_TIMEOUT 1000 struct re_hwrev { @@ -102,6 +107,16 @@ struct re_dmaload_arg { bus_dma_segment_t *re_segs; }; +struct re_softc; +struct re_jbuf { + struct re_softc *re_sc; + int re_inuse; + int re_slot; + caddr_t re_buf; + bus_addr_t re_paddr; + SLIST_ENTRY(re_jbuf) re_link; +}; + struct re_list_data { struct mbuf **re_tx_mbuf; struct mbuf **re_rx_mbuf; @@ -126,6 +141,13 @@ struct re_list_data { bus_dmamap_t re_tx_list_map; struct re_desc *re_tx_list; bus_addr_t re_tx_list_addr; + + bus_dma_tag_t re_jpool_tag; + bus_dmamap_t re_jpool_map; + caddr_t re_jpool; + struct re_jbuf *re_jbuf; + struct lwkt_serialize re_jbuf_serializer; + SLIST_HEAD(, re_jbuf) re_jbuf_free; }; struct re_softc { @@ -162,6 +184,8 @@ struct re_softc { int re_tx_desc_cnt; int re_bus_speed; int rxcycles; + int re_rxbuf_size; + int (*re_newbuf)(struct re_softc *, int, int); uint32_t re_flags; /* see RE_F_ */ @@ -192,6 +216,7 @@ struct re_softc { #define RE_C_PHYPMGT 0x80 /* PHY supports power mgmt */ #define RE_C_8169 0x100 /* is 8110/8169 */ #define RE_C_AUTOPAD 0x200 /* hardware auto-pad short frames */ +#define RE_C_CONTIGRX 0x400 /* need contig buf to RX jumbo frames */ #define RE_IS_8139CP(sc) ((sc)->re_caps & RE_C_8139CP) @@ -201,6 +226,7 @@ struct re_softc { #define RE_IMTYPE_HW 2 /* hardware based */ #define RE_F_TIMER_INTR 0x1 +#define RE_F_USE_JPOOL 0x2 /* * register space access macros -- 2.41.0 From bb7bf7b235f83aed36bd11628dede746f4008b5e Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sat, 18 Oct 2008 01:13:54 +0000 Subject: [PATCH 11/16] Add vfs.nfs.flush_on_hlink and default to off. --- sys/vfs/nfs/nfs_vnops.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/sys/vfs/nfs/nfs_vnops.c b/sys/vfs/nfs/nfs_vnops.c index 14d9bb9bd3..5434c7fb52 100644 --- a/sys/vfs/nfs/nfs_vnops.c +++ b/sys/vfs/nfs/nfs_vnops.c @@ -35,7 +35,7 @@ * * @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95 * $FreeBSD: src/sys/nfs/nfs_vnops.c,v 1.150.2.5 2001/12/20 19:56:28 dillon Exp $ - * $DragonFly: src/sys/vfs/nfs/nfs_vnops.c,v 1.79 2008/07/16 18:20:40 dillon Exp $ + * $DragonFly: src/sys/vfs/nfs/nfs_vnops.c,v 1.80 2008/10/18 01:13:54 dillon Exp $ */ @@ -229,6 +229,9 @@ SYSCTL_DECL(_vfs_nfs); static int nfs_flush_on_rename = 1; SYSCTL_INT(_vfs_nfs, OID_AUTO, flush_on_rename, CTLFLAG_RW, &nfs_flush_on_rename, 0, "flush fvp prior to rename"); +static int nfs_flush_on_hlink = 0; +SYSCTL_INT(_vfs_nfs, OID_AUTO, flush_on_hlink, CTLFLAG_RW, + &nfs_flush_on_hlink, 0, "flush fvp prior to hard link"); static int nfsaccess_cache_timeout = NFS_DEFATTRTIMO; SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW, @@ -1723,8 +1726,12 @@ nfs_rename(struct vop_old_rename_args *ap) } /* - * We shouldn't have to flush fvp on rename as the file handle should - * not change, but the default is to do so. + * We shouldn't have to flush fvp on rename for most server-side + * filesystems as the file handle should not change. Unfortunately + * the inode for some filesystems (msdosfs) might be tied to the + * file name or directory position so to be completely safe + * vfs.nfs.flush_on_rename is set by default. Clear to improve + * performance. * * We must flush tvp on rename because it might become stale on the * server after the rename. @@ -1849,11 +1856,13 @@ nfs_link(struct vop_old_link_args *ap) } /* - * Push all writes to the server, so that the attribute cache - * doesn't get "out of sync" with the server. - * XXX There should be a better way! + * The attribute cache may get out of sync with the server on link. + * Pushing writes to the server before handle was inherited from + * long long ago and it is unclear if we still need to do this. + * Defaults to off. */ - VOP_FSYNC(vp, MNT_WAIT); + if (nfs_flush_on_hlink) + VOP_FSYNC(vp, MNT_WAIT); v3 = NFS_ISV3(vp); nfsstats.rpccnt[NFSPROC_LINK]++; -- 2.41.0 From cc44085f89643c3793d324296845e227c6e97365 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sat, 18 Oct 2008 01:15:33 +0000 Subject: [PATCH 12/16] Allow an alignment default of 0 to be treated as 1. Obtained-from: FreeBSD --- sys/bus/isa/isa_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/bus/isa/isa_common.c b/sys/bus/isa/isa_common.c index 6ce9e2200f..cc07df872f 100644 --- a/sys/bus/isa/isa_common.c +++ b/sys/bus/isa/isa_common.c @@ -24,7 +24,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/isa/isa_common.c,v 1.16.2.1 2000/09/16 15:49:52 roger Exp $ - * $DragonFly: src/sys/bus/isa/isa_common.c,v 1.13 2006/12/22 23:12:16 swildner Exp $ + * $DragonFly: src/sys/bus/isa/isa_common.c,v 1.14 2008/10/18 01:15:33 dillon Exp $ */ /* * Modifications for Intel architecture by Garrett A. Wollman. @@ -133,7 +133,7 @@ isa_find_memory(device_t child, size = config->ic_mem[i].ir_size, align = config->ic_mem[i].ir_align; start + size - 1 <= end; - start += align) { + start += MAX(align, 1)) { bus_set_resource(child, SYS_RES_MEMORY, i, start, size); res[i] = bus_alloc_resource(child, -- 2.41.0 From 50b3d04766fa6f664415b5a7ca418aeaf9aba264 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sat, 18 Oct 2008 01:17:53 +0000 Subject: [PATCH 13/16] Try to do a better job aborting active requests when a usb mass storage device is pulled. --- sys/bus/usb/uhci.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/sys/bus/usb/uhci.c b/sys/bus/usb/uhci.c index 40d3928ba1..8776797094 100644 --- a/sys/bus/usb/uhci.c +++ b/sys/bus/usb/uhci.c @@ -1,6 +1,6 @@ /* $NetBSD: uhci.c,v 1.170 2003/02/19 01:35:04 augustss Exp $ */ /* $FreeBSD: src/sys/dev/usb/uhci.c,v 1.162.2.1 2006/03/01 01:59:04 iedowse Exp $ */ -/* $DragonFly: src/sys/bus/usb/uhci.c,v 1.29 2008/08/13 00:55:55 dillon Exp $ */ +/* $DragonFly: src/sys/bus/usb/uhci.c,v 1.30 2008/10/18 01:17:53 dillon Exp $ */ /* Also already incorporated from NetBSD: * $NetBSD: uhci.c,v 1.172 2003/02/23 04:19:26 simonb Exp $ @@ -1192,11 +1192,7 @@ uhci_softintr(void *v) DPRINTFN(10,("%s: uhci_softintr (%d)\n", device_get_nameunit(sc->sc_bus.bdev), sc->sc_bus.intr_context)); - /* - * this is a software interrupt, not a real interrupt, - * do not bump intr_context - */ - /* sc->sc_bus.intr_context++; */ + sc->sc_bus.intr_context++; /* * Interrupts on UHCI really suck. When the host controller @@ -1219,7 +1215,7 @@ uhci_softintr(void *v) } #endif /* USB_USE_SOFTINTR */ - /* sc->sc_bus.intr_context--; */ + sc->sc_bus.intr_context--; } /* Check for an interrupt. */ @@ -1859,14 +1855,18 @@ uhci_abort_xfer(usbd_xfer_handle xfer, usbd_status status) return; } +#if 0 if (xfer->device->bus->intr_context /* || !curproc REMOVED DFly */) panic("uhci_abort_xfer: not in process context"); +#endif /* * If an abort is already in progress then just wait for it to * complete and return. */ + crit_enter(); if (uxfer->uhci_xfer_flags & UHCI_XFER_ABORTING) { + crit_exit(); DPRINTFN(2, ("uhci_abort_xfer: already aborting\n")); /* No need to wait if we're aborting from a timeout. */ if (status == USBD_TIMEOUT) @@ -1875,15 +1875,16 @@ uhci_abort_xfer(usbd_xfer_handle xfer, usbd_status status) xfer->status = status; DPRINTFN(2, ("uhci_abort_xfer: waiting for abort to finish\n")); uxfer->uhci_xfer_flags |= UHCI_XFER_ABORTWAIT; + crit_enter(); while (uxfer->uhci_xfer_flags & UHCI_XFER_ABORTING) tsleep(&uxfer->uhci_xfer_flags, 0, "uhciaw", 0); + crit_exit(); return; } /* * Step 1: Make interrupt routine and hardware ignore xfer. */ - crit_enter(); uxfer->uhci_xfer_flags |= UHCI_XFER_ABORTING; xfer->status = status; /* make software ignore it */ callout_stop(&xfer->timeout_handle); -- 2.41.0 From a1dc54ffab632f65d64ace39f94521034fed0f2f Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Sat, 18 Oct 2008 03:00:29 +0000 Subject: [PATCH 14/16] Fix re_ioctl SIOCSIFCAP support, so that VLAN_HWTAGGING and VLAN_MTU could be turned off. --- sys/dev/netif/re/if_re.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/sys/dev/netif/re/if_re.c b/sys/dev/netif/re/if_re.c index a77e7b12a6..386d0febfb 100644 --- a/sys/dev/netif/re/if_re.c +++ b/sys/dev/netif/re/if_re.c @@ -33,7 +33,7 @@ * THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD: src/sys/dev/re/if_re.c,v 1.25 2004/06/09 14:34:01 naddy Exp $ - * $DragonFly: src/sys/dev/netif/re/if_re.c,v 1.86 2008/10/17 14:12:23 sephe Exp $ + * $DragonFly: src/sys/dev/netif/re/if_re.c,v 1.87 2008/10/18 03:00:29 sephe Exp $ */ /* @@ -2651,7 +2651,9 @@ re_init(void *xsc) * before all others. */ CSR_WRITE_2(sc, RE_CPLUS_CMD, RE_CPLUSCMD_RXENB | RE_CPLUSCMD_TXENB | - RE_CPLUSCMD_PCI_MRW | RE_CPLUSCMD_VLANSTRIP | + RE_CPLUSCMD_PCI_MRW | + (ifp->if_capenable & IFCAP_VLAN_HWTAGGING ? + RE_CPLUSCMD_VLANSTRIP : 0) | (ifp->if_capenable & IFCAP_RXCSUM ? RE_CPLUSCMD_RXCSUM_ENB : 0)); @@ -2845,7 +2847,7 @@ re_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr) struct re_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; struct mii_data *mii; - int error = 0; + int error = 0, mask; ASSERT_SERIALIZED(ifp->if_serializer); @@ -2877,14 +2879,17 @@ re_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr) error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command); break; case SIOCSIFCAP: - ifp->if_capenable &= ~(IFCAP_HWCSUM); - ifp->if_capenable |= - ifr->ifr_reqcap & (IFCAP_HWCSUM); - if (ifp->if_capenable & IFCAP_TXCSUM) - ifp->if_hwassist = RE_CSUM_FEATURES; - else - ifp->if_hwassist = 0; - if (ifp->if_flags & IFF_RUNNING) + mask = (ifr->ifr_reqcap ^ ifp->if_capenable) & + ifp->if_capabilities; + ifp->if_capenable ^= mask; + + if (mask & IFCAP_HWCSUM) { + if (ifp->if_capenable & IFCAP_TXCSUM) + ifp->if_hwassist = RE_CSUM_FEATURES; + else + ifp->if_hwassist = 0; + } + if (mask && (ifp->if_flags & IFF_RUNNING)) re_init(sc); break; default: -- 2.41.0 From 1dea81b1262da8a2b77748e4cbf59c861c7d8387 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Sat, 18 Oct 2008 04:44:41 +0000 Subject: [PATCH 15/16] Fix hardware vlan tagging support by setting vlan information on all TX descriptors for multi-segment packets. # Even with this fix in place, 8169 still does not work reliably with vlan. # Certain packets are never seen on the wire; maybe caused by the trailing # ether frame CRC generated by the hardware? --- sys/dev/netif/re/if_re.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/sys/dev/netif/re/if_re.c b/sys/dev/netif/re/if_re.c index 386d0febfb..0ecea3e355 100644 --- a/sys/dev/netif/re/if_re.c +++ b/sys/dev/netif/re/if_re.c @@ -33,7 +33,7 @@ * THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD: src/sys/dev/re/if_re.c,v 1.25 2004/06/09 14:34:01 naddy Exp $ - * $DragonFly: src/sys/dev/netif/re/if_re.c,v 1.87 2008/10/18 03:00:29 sephe Exp $ + * $DragonFly: src/sys/dev/netif/re/if_re.c,v 1.88 2008/10/18 04:44:41 sephe Exp $ */ /* @@ -2296,7 +2296,7 @@ re_encap(struct re_softc *sc, struct mbuf **m_head, int *idx0) bus_dmamap_t map; int error, maxsegs, idx, i; struct re_desc *d, *tx_ring; - uint32_t cmd_csum, ctl_csum; + uint32_t cmd_csum, ctl_csum, vlantag; KASSERT(sc->re_ldata.re_tx_free > RE_TXDESC_SPARE, ("not enough free TX desc\n")); @@ -2409,6 +2409,12 @@ re_encap(struct re_softc *sc, struct mbuf **m_head, int *idx0) } } + vlantag = 0; + if (m->m_flags & M_VLANTAG) { + vlantag = htobe16(m->m_pkthdr.ether_vlantag) | + RE_TDESC_CTL_INSTAG; + } + maxsegs = sc->re_ldata.re_tx_free; if (maxsegs > RE_MAXSEGS) maxsegs = RE_MAXSEGS; @@ -2481,7 +2487,7 @@ re_encap(struct re_softc *sc, struct mbuf **m_head, int *idx0) if (idx == (sc->re_tx_desc_cnt - 1)) cmdstat |= RE_TDESC_CMD_EOR; d->re_cmdstat = htole32(cmdstat | cmd_csum); - d->re_control = htole32(ctl_csum); + d->re_control = htole32(ctl_csum | vlantag); i++; if (i == arg.re_nsegs) @@ -2490,17 +2496,6 @@ re_encap(struct re_softc *sc, struct mbuf **m_head, int *idx0) } d->re_cmdstat |= htole32(RE_TDESC_CMD_EOF); - /* - * Set up hardware VLAN tagging. Note: vlan tag info must - * appear in the first descriptor of a multi-descriptor - * transmission attempt. - */ - if (m->m_flags & M_VLANTAG) { - tx_ring[*idx0].re_control |= - htole32(htobe16(m->m_pkthdr.ether_vlantag) | - RE_TDESC_CTL_INSTAG); - } - /* Transfer ownership of packet to the chip. */ d->re_cmdstat |= htole32(RE_TDESC_CMD_OWN); if (*idx0 != idx) -- 2.41.0 From b0c15aadac884841d46f5146a7d1bc0804a71d99 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Sat, 18 Oct 2008 11:26:52 +0000 Subject: [PATCH 16/16] - Don't claim 7422 MTU size is support by various 8111/8169 chips (PCI devices); 6144 MTU size works reliably. Set MTU above 6144 (6 * 1024) on these chips and do following test: netperf -H host -l 30 -t UDP_STREAM -- -m (mtu-28) All kinds of wiredness will pop up on the test box. - Set max supported MTU to 9216 for 8168D. Obtained-from: Realtek r8168-8.008.00 - Set max supported MTU to 6144 for non-8168D GigE chips. - Cleanup jumbo frame/MTU size related macros. # As usual, 8169(with 88E1000 PHY) does not seem to work well with any jumbo # frame size --- sys/dev/netif/re/if_re.c | 86 +++++++++++++++++-------------------- sys/dev/netif/re/if_revar.h | 22 ++++++---- 2 files changed, 53 insertions(+), 55 deletions(-) diff --git a/sys/dev/netif/re/if_re.c b/sys/dev/netif/re/if_re.c index 0ecea3e355..54bf0defe7 100644 --- a/sys/dev/netif/re/if_re.c +++ b/sys/dev/netif/re/if_re.c @@ -33,7 +33,7 @@ * THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD: src/sys/dev/re/if_re.c,v 1.25 2004/06/09 14:34:01 naddy Exp $ - * $DragonFly: src/sys/dev/netif/re/if_re.c,v 1.88 2008/10/18 04:44:41 sephe Exp $ + * $DragonFly: src/sys/dev/netif/re/if_re.c,v 1.89 2008/10/18 11:26:52 sephe Exp $ */ /* @@ -196,61 +196,61 @@ static const struct re_type { }; static const struct re_hwrev re_hwrevs[] = { - { RE_HWREV_8139CPLUS, RE_MACVER_UNKN, + { RE_HWREV_8139CPLUS, RE_MACVER_UNKN, ETHERMTU, RE_C_HWCSUM | RE_C_8139CP }, - { RE_HWREV_8169, RE_MACVER_UNKN, - RE_C_HWCSUM | RE_C_JUMBO | RE_C_8169 }, + { RE_HWREV_8169, RE_MACVER_UNKN, RE_MTU_6K, + RE_C_HWCSUM | RE_C_8169 }, - { RE_HWREV_8110S, RE_MACVER_03, - RE_C_HWCSUM | RE_C_JUMBO | RE_C_8169 }, + { RE_HWREV_8110S, RE_MACVER_03, RE_MTU_6K, + RE_C_HWCSUM | RE_C_8169 }, - { RE_HWREV_8169S, RE_MACVER_03, - RE_C_HWCSUM | RE_C_JUMBO | RE_C_8169 }, + { RE_HWREV_8169S, RE_MACVER_03, RE_MTU_6K, + RE_C_HWCSUM | RE_C_8169 }, - { RE_HWREV_8169SB, RE_MACVER_04, - RE_C_HWCSUM | RE_C_JUMBO | RE_C_PHYPMGT | RE_C_8169 }, + { RE_HWREV_8169SB, RE_MACVER_04, RE_MTU_6K, + RE_C_HWCSUM | RE_C_PHYPMGT | RE_C_8169 }, - { RE_HWREV_8169SC1, RE_MACVER_05, - RE_C_HWCSUM | RE_C_JUMBO | RE_C_PHYPMGT | RE_C_8169 }, + { RE_HWREV_8169SC1, RE_MACVER_05, RE_MTU_6K, + RE_C_HWCSUM | RE_C_PHYPMGT | RE_C_8169 }, - { RE_HWREV_8169SC2, RE_MACVER_06, - RE_C_HWCSUM | RE_C_JUMBO | RE_C_PHYPMGT | RE_C_8169 }, + { RE_HWREV_8169SC2, RE_MACVER_06, RE_MTU_6K, + RE_C_HWCSUM | RE_C_PHYPMGT | RE_C_8169 }, - { RE_HWREV_8168B1, RE_MACVER_21, - RE_C_HWIM | RE_C_HWCSUM | RE_C_JUMBO | RE_C_PHYPMGT }, + { RE_HWREV_8168B1, RE_MACVER_21, RE_MTU_6K, + RE_C_HWIM | RE_C_HWCSUM | RE_C_PHYPMGT }, - { RE_HWREV_8168B2, RE_MACVER_23, - RE_C_HWIM | RE_C_HWCSUM | RE_C_JUMBO | RE_C_PHYPMGT | RE_C_AUTOPAD }, + { RE_HWREV_8168B2, RE_MACVER_23, RE_MTU_6K, + RE_C_HWIM | RE_C_HWCSUM | RE_C_PHYPMGT | RE_C_AUTOPAD }, - { RE_HWREV_8168B3, RE_MACVER_23, - RE_C_HWIM | RE_C_HWCSUM | RE_C_JUMBO | RE_C_PHYPMGT | RE_C_AUTOPAD }, + { RE_HWREV_8168B3, RE_MACVER_23, RE_MTU_6K, + RE_C_HWIM | RE_C_HWCSUM | RE_C_PHYPMGT | RE_C_AUTOPAD }, - { RE_HWREV_8168C, RE_MACVER_29, - RE_C_HWIM | RE_C_HWCSUM | RE_C_JUMBO | RE_C_MAC2 | RE_C_PHYPMGT | + { RE_HWREV_8168C, RE_MACVER_29, RE_MTU_6K, + RE_C_HWIM | RE_C_HWCSUM | RE_C_MAC2 | RE_C_PHYPMGT | RE_C_AUTOPAD | RE_C_CONTIGRX }, - { RE_HWREV_8168CP, RE_MACVER_2B, - RE_C_HWIM | RE_C_HWCSUM | RE_C_JUMBO | RE_C_MAC2 | RE_C_PHYPMGT | + { RE_HWREV_8168CP, RE_MACVER_2B, RE_MTU_6K, + RE_C_HWIM | RE_C_HWCSUM | RE_C_MAC2 | RE_C_PHYPMGT | RE_C_AUTOPAD | RE_C_CONTIGRX }, - { RE_HWREV_8168D, RE_MACVER_2A, - RE_C_HWIM | RE_C_HWCSUM | RE_C_JUMBO | RE_C_MAC2 | RE_C_PHYPMGT | + { RE_HWREV_8168D, RE_MACVER_2A, RE_MTU_9K, + RE_C_HWIM | RE_C_HWCSUM | RE_C_MAC2 | RE_C_PHYPMGT | RE_C_AUTOPAD | RE_C_CONTIGRX }, - { RE_HWREV_8100E, RE_MACVER_UNKN, + { RE_HWREV_8100E, RE_MACVER_UNKN, ETHERMTU, RE_C_HWCSUM }, - { RE_HWREV_8101E1, RE_MACVER_16, + { RE_HWREV_8101E1, RE_MACVER_16, ETHERMTU, RE_C_HWCSUM }, - { RE_HWREV_8101E2, RE_MACVER_16, + { RE_HWREV_8101E2, RE_MACVER_16, ETHERMTU, RE_C_HWCSUM }, - { RE_HWREV_8102E, RE_MACVER_15, + { RE_HWREV_8102E, RE_MACVER_15, ETHERMTU, RE_C_HWCSUM | RE_C_MAC2 | RE_C_AUTOPAD }, - { RE_HWREV_8102EL, RE_MACVER_15, + { RE_HWREV_8102EL, RE_MACVER_15, ETHERMTU, RE_C_HWCSUM | RE_C_MAC2 | RE_C_AUTOPAD }, { RE_HWREV_NULL, 0, 0 } @@ -955,15 +955,8 @@ re_probe(device_t dev) sc->re_hwrev = hw_rev->re_hwrev; sc->re_macver = hw_rev->re_macver; sc->re_caps = hw_rev->re_caps; - - if (sc->re_caps & RE_C_JUMBO) { - sc->re_swcsum_lim = RE_JUMBO_MTU; - sc->re_maxmtu = RE_JUMBO_MTU; - } else { - sc->re_swcsum_lim = ETHERMTU; - sc->re_maxmtu = ETHERMTU; - } - sc->re_swcsum_lim += ETHER_HDR_LEN; + sc->re_maxmtu = hw_rev->re_maxmtu; + sc->re_swcsum_lim = RE_SWCSUM_UNLIMITED; /* * Apply chip property fixup @@ -1185,7 +1178,7 @@ re_allocmem(device_t dev) ETHER_ALIGN, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, - RE_JUMBO_FRAMELEN, RE_MAXSEGS, MCLBYTES, + RE_FRAMELEN_MAX, RE_MAXSEGS, MCLBYTES, BUS_DMA_ALLOCNOW, &sc->re_ldata.re_mtag); if (error) { @@ -1230,6 +1223,7 @@ re_allocmem(device_t dev) error = re_jpool_alloc(sc); if (error) { re_jpool_free(sc); + /* Disable jumbo frame support */ sc->re_maxmtu = ETHERMTU; } } @@ -2615,7 +2609,7 @@ re_init(void *xsc) if (ifp->if_mtu > ETHERMTU) { KKASSERT(sc->re_ldata.re_jbuf != NULL); sc->re_flags |= RE_F_USE_JPOOL; - sc->re_rxbuf_size = RE_JUMBO_FRAME_9K; + sc->re_rxbuf_size = RE_FRAMELEN_MAX; sc->re_newbuf = re_newbuf_jumbo; } else { sc->re_flags &= ~RE_F_USE_JPOOL; @@ -2710,12 +2704,10 @@ re_init(void *xsc) CSR_WRITE_4(sc, RE_TXCFG, RE_TXCFG_CONFIG); framelen = RE_FRAMELEN(ifp->if_mtu); - if (framelen < RE_FRAMELEN_2K) { - CSR_WRITE_1(sc, RE_EARLY_TX_THRESH, - howmany(RE_FRAMELEN_2K, 128)); - } else { + if (framelen < MCLBYTES) + CSR_WRITE_1(sc, RE_EARLY_TX_THRESH, howmany(MCLBYTES, 128)); + else CSR_WRITE_1(sc, RE_EARLY_TX_THRESH, howmany(framelen, 128)); - } CSR_WRITE_4(sc, RE_RXCFG, RE_RXCFG_CONFIG); diff --git a/sys/dev/netif/re/if_revar.h b/sys/dev/netif/re/if_revar.h index 12e3b1761c..692c919b03 100644 --- a/sys/dev/netif/re/if_revar.h +++ b/sys/dev/netif/re/if_revar.h @@ -33,7 +33,7 @@ * THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD: src/sys/pci/if_rlreg.h,v 1.42 2004/05/24 19:39:23 jhb Exp $ - * $DragonFly: src/sys/dev/netif/re/if_revar.h,v 1.27 2008/10/17 14:12:23 sephe Exp $ + * $DragonFly: src/sys/dev/netif/re/if_revar.h,v 1.28 2008/10/18 11:26:52 sephe Exp $ */ #define RE_RX_DESC_CNT_8139CP 64 @@ -61,21 +61,28 @@ #define RE_ADDR_LO(y) ((uint64_t) (y) & 0xFFFFFFFF) #define RE_ADDR_HI(y) ((uint64_t) (y) >> 32) -#define RE_JUMBO_FRAMELEN 7440 -#define RE_JUMBO_MTU (RE_JUMBO_FRAMELEN-ETHER_HDR_LEN-ETHER_CRC_LEN) -#define RE_FRAMELEN_2K 2048 -#define RE_FRAMELEN(mtu) (mtu + ETHER_HDR_LEN + ETHER_CRC_LEN) +#define RE_MTU_6K (6 * 1024) +#define RE_MTU_9K (9 * 1024) + +#define RE_ETHER_EXTRA (ETHER_HDR_LEN + ETHER_CRC_LEN + EVL_ENCAPLEN) +#define RE_FRAMELEN(mtu) ((mtu) + RE_ETHER_EXTRA) + +#define RE_FRAMELEN_6K RE_FRAMELEN(RE_MTU_6K) +#define RE_FRAMELEN_9K RE_FRAMELEN(RE_MTU_9K) +#define RE_FRAMELEN_MAX RE_FRAMELEN_9K + #define RE_SWCSUM_LIM_8169 2038 +#define RE_SWCSUM_UNLIMITED 65536 /* XXX should be enough */ #define RE_BUF_ALIGN 8 -#define RE_JUMBO_FRAME_9K 9022 -#define RE_JBUF_SIZE roundup2(RE_JUMBO_FRAME_9K, RE_BUF_ALIGN) +#define RE_JBUF_SIZE roundup2(RE_FRAMELEN_MAX, RE_BUF_ALIGN) #define RE_TIMEOUT 1000 struct re_hwrev { uint32_t re_hwrev; uint32_t re_macver; /* see RE_MACVER_ */ + int re_maxmtu; uint32_t re_caps; /* see RE_C_ */ }; @@ -210,7 +217,6 @@ struct re_softc { #define RE_C_PCI64 0x2 /* PCI-X */ #define RE_C_HWIM 0x4 /* hardware interrupt moderation */ #define RE_C_HWCSUM 0x8 /* hardware csum offload */ -#define RE_C_JUMBO 0x10 /* jumbo frame */ #define RE_C_8139CP 0x20 /* is 8139C+ */ #define RE_C_MAC2 0x40 /* MAC style 2 */ #define RE_C_PHYPMGT 0x80 /* PHY supports power mgmt */ -- 2.41.0