#
TGTS= all all-man buildkernel quickkernel nativekernel \
buildworld crossworld quickworld realquickworld checkdpadd clean \
- cleandepend cleandir depend distribute distributeworld everything \
+ cleandepend cleandir depend everything \
hierarchy install installcheck installkernel \
reinstallkernel installmost installworld libraries lint maninstall \
manlint mk most obj objlink regress rerelease tags update \
BITGTS:=${BITGTS} ${BITGTS:S/^/build/} ${BITGTS:S/^/install/}
.ORDER: buildworld installworld
-.ORDER: buildworld distributeworld
.ORDER: buildworld buildkernel
.ORDER: buildworld nativekernel
.ORDER: buildworld quickkernel
.ORDER: buildkernel installkernel
.ORDER: buildkernel reinstallkernel
.ORDER: quickworld installworld
-.ORDER: quickworld distributeworld
.ORDER: quickworld buildkernel
.ORDER: quickworld nativekernel
.ORDER: quickworld quickkernel
SUBDIR+= usr.sbin
.endif
-# etc must be last for "distribute" to work
.if exists(${.CURDIR}/etc)
SUBDIR+= etc
.endif
.if !defined(DESTDIR) || ${DESTDIR} == "" || ${DESTDIR} == "/"
@case `uname -r` in 1.2*|1.3-*|1.3.*|1.4.*|1.5.0-*|1.5.1-*|1.5.2-*|1.5.3-*) echo "You must upgrade your kernel to at least 1.5.4 and reboot before you can safely installworld, due to libc/system call ABI changes" ; exit 1;; esac
.endif
-# distributeworld
-#
-# Distributes everything compiled by a `buildworld'.
#
# installworld
#
# Installs everything compiled by a 'buildworld'.
#
-distributeworld installworld: installcheck
+installworld: installcheck
cd ${.CURDIR}; ${IMAKE} re${.TARGET:S/world$//}
${INSTALL} -o root -g wheel -m 644 ${.CURDIR}/Makefile_upgrade.inc ${DESTDIR}/etc/upgrade/
cd ${.CURDIR}/share/man; ${MAKE} makedb
.endif
-redistribute:
- @echo "--------------------------------------------------------------"
- @echo ">>> Distributing everything.."
- @echo "--------------------------------------------------------------"
- cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 distribute
-
#
# buildkernel, nativekernel, quickkernel, and installkernel
#
WARNS?= 2
.if exists(${.CURDIR}/../../secure) && !defined(NO_CRYPT)
-DISTRIBUTION=crypto
CFLAGS+=-DDES
DPADD= ${LIBCRYPTO}
LDADD= -lcrypto
# Use this directory as the source for new configuration files when upgrading
UPGRADE_SRCDIR?=${.CURDIR}
-distribute:
- cd ${.CURDIR} ; ${MAKE} distribution DESTDIR=${DISTDIR}/${DISTRIBUTION}
-
# Include file which contains obsolete files
.if exists(${DESTDIR}/etc/upgrade/Makefile_upgrade.inc)
.include "${DESTDIR}/etc/upgrade/Makefile_upgrade.inc"
-.else
- @echo "Please do a make installworld first. See build(7) for further"
- @echo "information."
.endif
remove-obsolete-files:
+ @if [ -z "${TO_REMOVE}" ]; then \
+ echo "Please do a 'make installworld' first."; \
+ echo "See build(7) for further information."; \
+ exit 1; \
+ fi;
@echo "===> Remove now obsolete files"
@for item in ${TO_REMOVE:M*.info.gz}; do \
if [ -e ${DESTDIR}$${item} ]; then \
useeditmode = 0;
if (tty_isediting())
useeditmode |= MODE_EDIT;
- if (tty_istrapsig)
+ if (tty_istrapsig())
useeditmode |= MODE_TRAPSIG;
if (tty_issofttab())
useeditmode |= MODE_SOFT_TAB;
CFLAGS+=-DKERBEROS
DPADD= ${LIBKRB} ${LIBCRYPTO}
LDADD= -lkrb -lcrypto
-DISTRIBUTION= krb
.endif
.include <bsd.prog.mk>
${MAKE} ${MFLAGS} obj; \
${MAKE} ${MFLAGS} depend all install
-CODAD= ${MAKE} ${MFLAGS} cleandir; \
- ${MAKE} ${MFLAGS} obj; \
- ${MAKE} ${MFLAGS} depend all distribute
-
# These are the programs which depend on secure libs
sprog:
cd ${SDIR}/bin/ed; ${CODAI}
# $FreeBSD: src/secure/Makefile.inc,v 1.13.2.5 2002/07/03 22:13:19 des Exp $
# $DragonFly: src/secure/Makefile.inc,v 1.4 2005/09/06 18:55:25 dillon Exp $
-DISTRIBUTION?=crypto
-
.if exists(${.CURDIR}/../../lib/libcrypt/obj)
CRYPTOBJDIR= ${.CURDIR}/../../lib/libcrypt/obj
.else
uidswap.c
.if defined(WANT_KERBEROS)
-DISTRIBUTION=krb5
CFLAGS+= -DKRB5 -DHEIMDAL
LDADD+= -lkrb5 -lasn1 -lcom_err -lmd -L${.OBJDIR}/../../../kerberos5/lib/libroken -lroken -lcrypt
DPADD+= ${LIBKRB5} ${LIBCOM_ERR} ${LIBASN1} ${LIBMD} ${LIBCRYPT}
-DHAVE_PAM_GETENVLIST -DHAVE_PAM_PUTENV
.if defined(WANT_KERBEROS)
-DISTRIBUTION=krb5
CFLAGS+= -DKRB5 -DHEIMDAL
SRCS+= auth-krb5.c
LDADD+= -lkrb5 -lasn1 -lcom_err -lmd -L${.OBJDIR}/../../../kerberos5/lib/libroken -lroken
acx.4 \
adv.4 \
adw.4 \
+ ae.4 \
age.4 \
agp.4 \
aha.4 \
--- /dev/null
+.\" Copyright (c) 2008 Stanislav Sedov <stas@FreeBSD.org>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD: src/share/man/man4/ae.4,v 1.2.2.1.4.1 2009/04/15 03:14:26 kensmith Exp $
+.\"
+.Dd July 13, 2009
+.Dt AE 4
+.Os
+.Sh NAME
+.Nm ae
+.Nd "Attansic/Atheros L2 FastEthernet controller driver"
+.Sh SYNOPSIS
+To compile this driver into the kernel, place the following lines in your
+kernel configuration file:
+.Bd -ragged -offset indent
+.Cd "device miibus"
+.Cd "device ae"
+.Ed
+.Pp
+Alternatively, to load the driver as a
+module at boot time, place the following line in
+.Xr loader.conf 5 :
+.Bd -literal -offset -indent
+if_ae_load="YES"
+.Ed
+.Sh DESCRIPTION
+The
+.Nm
+device driver provides support for Attansic/Atheros L2 PCIe FastEthernet
+controllers.
+.Pp
+The controller supports hardware Ethernet checksum processing, hardware
+VLAN tag stripping/insertion and an interrupt moderation mechanism.
+Attansic L2 also features a 64-bit multicast hash filter.
+.Pp
+The
+.Nm
+driver supports the following media types:
+.Bl -tag -width ".Cm 10baseT/UTP"
+.It Cm autoselect
+Enable autoselection of the media type and options.
+The user can manually override the autoselected mode by
+adding media options to
+.Xr rc.conf 5 .
+.It Cm 10baseT/UTP
+Select 10Mbps operation.
+.It Cm 100baseTX
+Set 100Mbps (FastEthernet) operation.
+.El
+.Pp
+The
+.Nm
+driver provides support for the following media options:
+.Bl -tag -width ".Cm full-duplex"
+.It Cm full-duplex
+Force full duplex operation.
+.It Cm half-duplex
+Force half duplex operation.
+.El
+.Pp
+For more information on configuring this device, see
+.Xr ifconfig 8 .
+.Sh HARDWARE
+The
+.Nm
+driver supports Attansic/Atheros L2 PCIe FastEthernet controllers, and
+is known to support the following hardware:
+.Pp
+.Bl -bullet -compact
+.It
+ASUS EeePC 701
+.It
+ASUS EeePC 900
+.El
+.Pp
+Other hardware may or may not work with this driver.
+.Sh SYSCTL VARIABLES
+The
+.Nm
+driver collects a number of useful MAC counter during the work.
+The statistics is available via the
+.Va hw.ae%d.stats
+.Xr sysctl 8
+tree, where %d corresponds to the controller number.
+.Sh DIAGNOSTICS
+.Bl -diag
+.It "ae%d: watchdog timeout."
+The device has stopped responding to the network, or there is a problem with
+the network connection (cable).
+.It "ae%d: reset timeout."
+The card reset operation has been timed out.
+.It "ae%d: Generating random ethernet address."
+No valid ethernet address was found neither in the controller registers not in
+NVRAM.
+Random locally administered address with ASUS OUI identifier will be used
+instead.
+.El
+.Sh SEE ALSO
+.Xr arp 4 ,
+.Xr ifmedia 4 ,
+.Xr miibus 4 ,
+.Xr netintro 4 ,
+.Xr ng_ether 4 ,
+.Xr vlan 4 ,
+.Xr ifconfig 8
+.Sh BUGS
+The Attansic L2 FastEthernet contoller supports DMA but do not use a descriptor
+based transfer mechanism via scatter-gather DMA.
+Thus the data should be copied to/from the controller memory on each
+transmit/receive.
+Furthermore, a lot of data alignment restrictions apply.
+This may introduce a high CPU load on systems with heavy network activity.
+Luckily enough this should not be a problem on modern hardware as L2 does
+not support speeds faster than 100Mbps.
+.Sh HISTORY
+The
+.Nm
+driver and this manual page was written by
+.An Stanislav Sedov
+.Aq stas@FreeBSD.org .
+It first appeared in
+.Fx 7.1 .
Right button status; cleared if pressed, otherwise set.
.El
.It Byte 2
-Horizontal movement count in two's compliment; -128 through 127.
+Horizontal movement count in two's complement; -128 through 127.
.It Byte 3
-Vertical movement count in two's compliment; -128 through 127.
+Vertical movement count in two's complement; -128 through 127.
.It Byte 4
Always zero.
.It Byte 5
.endif
.endif
-DISTRIBUTION?= doc
-
.include <bsd.obj.mk>
#
# +++ variables +++
#
-# DISTRIBUTION Name of distribution. [bin]
-#
# SUBDIR A list of subdirectories that should be built as well.
# Each of the targets will execute the same target in the
# subdirectories.
#
# +++ targets +++
#
-# distribute:
-# This is a variant of install, which will
-# put the stuff into the right "distribution".
-#
# afterinstall, all, all-man, beforeinstall, checkdpadd,
# clean, cleandepend, cleandir, depend, install, lint, maninstall,
# manlint, obj, objlink, realinstall, regress, tags
.include <bsd.init.mk>
-DISTRIBUTION?= bin
-.if !target(distribute)
-distribute:
-.for dist in ${DISTRIBUTION}
- cd ${.CURDIR}; \
- ${MAKE} install -DNO_SUBDIR DESTDIR=${DISTDIR}/${dist} SHARED=copies
-.endfor
-.endif
-
_SUBDIR: .USE
.if defined(SUBDIR) && !empty(SUBDIR) && !defined(NO_SUBDIR)
@for entry in ${SUBDIR}; do \
.for __target in all all-man checkdpadd clean cleandepend cleandir \
- depend distribute lint maninstall manlint \
+ depend lint maninstall manlint \
obj objlink realinstall regress tags
${__target}: _SUBDIR
.endfor
.ORDER: clean cleandepend cleandir cleanobj \
obj objlink tags depend all all-man \
- install maninstall realinstall distribute
-
+ install maninstall realinstall
dev/disk/advansys/adwcam.c optional adw
dev/disk/advansys/adwlib.c optional adw
dev/disk/advansys/adwmcode.c optional adw
+dev/netif/ae/if_ae.c optional ae
+dev/netif/age/if_age.c optional age
dev/netif/an/if_an.c optional an
dev/netif/an/if_an_isa.c optional an isa
dev/netif/an/if_an_pci.c optional an pci
# PCI Ethernet NICs that use the common MII bus controller code.
# NOTE: Be sure to keep the 'device miibus' line in order to use these NICs!
device miibus # MII bus support
+device ae # Attansic/Atheros L2 Fast Ethernet
+device age # Attansic/Atheros L1 Gigabit Ethernet
device ale # Atheros AR8121/AR8113/AR8114
device bce # Broadcom NetXtreme II Gigabit Ethernet
device bfe # Broadcom BCM440x 10/100 Ethernet
device miibus
# PCI Ethernet NICs that use the common MII bus controller code.
+device ae # Attansic/Atheros L2 Fast Ethernet
device ale # Atheros AR8121/AR8113/AR8114
+device age # Attansic/Atheros L1 Gigabit Ethernet
device bce # Broadcom NetXtreme II Gigabit Ethernet
device bfe # Broadcom BCM440x 10/100 Ethernet
device dc # DEC/Intel 21143 and various workalikes
SUBDIR= an acx age ale ar ath aue axe bce bfe bge bwi cue dc ed em ep et fwe \
fxp iwi iwl jme kue lge lnc mii_layer my msk mxge nfe nge pcn ral re \
rl rtw rue rum sbni sbsh sf sis sk sln sr ste stge ti tl tx txp ural \
- vge vr vx wb wi xe xl ig_hal emx
+ vge vr vx wb wi xe xl ig_hal emx ae
.include <bsd.subdir.mk>
--- /dev/null
+KMOD= if_ae
+SRCS= if_ae.c
+SRCS+= miibus_if.h device_if.h bus_if.h pci_if.h
+KMODDEPS= miibus
+
+.include <bsd.kmod.mk>
--- /dev/null
+/*-
+ * Copyright (c) 2008 Stanislav Sedov <stas@FreeBSD.org>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Driver for Attansic Technology Corp. L2 FastEthernet adapter.
+ *
+ * This driver is heavily based on age(4) Attansic L1 driver by Pyun YongHyeon.
+ *
+ * $FreeBSD: src/sys/dev/ae/if_ae.c,v 1.1.2.3.2.1 2009/04/15 03:14:26 kensmith Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/endian.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <sys/interrupt.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <sys/rman.h>
+#include <sys/serialize.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/sysctl.h>
+
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/bpf.h>
+#include <net/if_arp.h>
+#include <net/if_dl.h>
+#include <net/if_media.h>
+#include <net/ifq_var.h>
+#include <net/vlan/if_vlan_var.h>
+#include <net/vlan/if_vlan_ether.h>
+
+#include <bus/pci/pcireg.h>
+#include <bus/pci/pcivar.h>
+#include <bus/pci/pcidevs.h>
+
+#include <dev/netif/mii_layer/miivar.h>
+
+#include <dev/netif/ae/if_aereg.h>
+#include <dev/netif/ae/if_aevar.h>
+
+/* "device miibus" required. See GENERIC if you get errors here. */
+#include "miibus_if.h"
+
+/*
+ * Devices supported by this driver.
+ */
+static const struct ae_dev {
+ uint16_t ae_vendorid;
+ uint16_t ae_deviceid;
+ const char *ae_name;
+} ae_devs[] = {
+ { VENDORID_ATTANSIC, DEVICEID_ATTANSIC_L2,
+ "Attansic Technology Corp, L2 Fast Ethernet" },
+ /* Required last entry */
+ { 0, 0, NULL }
+};
+
+
+static int ae_probe(device_t);
+static int ae_attach(device_t);
+static int ae_detach(device_t);
+static int ae_shutdown(device_t);
+static int ae_suspend(device_t);
+static int ae_resume(device_t);
+static int ae_miibus_readreg(device_t, int, int);
+static int ae_miibus_writereg(device_t, int, int, int);
+static void ae_miibus_statchg(device_t);
+
+static int ae_mediachange(struct ifnet *);
+static void ae_mediastatus(struct ifnet *, struct ifmediareq *);
+static void ae_init(void *);
+static void ae_start(struct ifnet *);
+static int ae_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
+static void ae_watchdog(struct ifnet *);
+static void ae_stop(struct ae_softc *);
+static void ae_tick(void *);
+
+static void ae_intr(void *);
+static void ae_tx_intr(struct ae_softc *);
+static void ae_rx_intr(struct ae_softc *);
+static int ae_rxeof(struct ae_softc *, struct ae_rxd *);
+
+static int ae_encap(struct ae_softc *, struct mbuf **);
+static void ae_sysctl_node(struct ae_softc *);
+static void ae_phy_reset(struct ae_softc *);
+static int ae_reset(struct ae_softc *);
+static void ae_pcie_init(struct ae_softc *);
+static void ae_get_eaddr(struct ae_softc *);
+static void ae_dma_free(struct ae_softc *);
+static int ae_dma_alloc(struct ae_softc *);
+static void ae_mac_config(struct ae_softc *);
+static void ae_stop_rxmac(struct ae_softc *);
+static void ae_stop_txmac(struct ae_softc *);
+static void ae_rxfilter(struct ae_softc *);
+static void ae_rxvlan(struct ae_softc *);
+static void ae_update_stats_rx(uint16_t, struct ae_stats *);
+static void ae_update_stats_tx(uint16_t, struct ae_stats *);
+static void ae_powersave_disable(struct ae_softc *);
+static void ae_powersave_enable(struct ae_softc *);
+
+static device_method_t ae_methods[] = {
+ /* Device interface. */
+ DEVMETHOD(device_probe, ae_probe),
+ DEVMETHOD(device_attach, ae_attach),
+ DEVMETHOD(device_detach, ae_detach),
+ DEVMETHOD(device_shutdown, ae_shutdown),
+ DEVMETHOD(device_suspend, ae_suspend),
+ DEVMETHOD(device_resume, ae_resume),
+
+ /* Bus interface. */
+ DEVMETHOD(bus_print_child, bus_generic_print_child),
+ DEVMETHOD(bus_driver_added, bus_generic_driver_added),
+
+ /* MII interface. */
+ DEVMETHOD(miibus_readreg, ae_miibus_readreg),
+ DEVMETHOD(miibus_writereg, ae_miibus_writereg),
+ DEVMETHOD(miibus_statchg, ae_miibus_statchg),
+ { NULL, NULL }
+};
+
+static driver_t ae_driver = {
+ "ae",
+ ae_methods,
+ sizeof(struct ae_softc)
+};
+
+static devclass_t ae_devclass;
+DECLARE_DUMMY_MODULE(if_ae);
+MODULE_DEPEND(if_ae, miibus, 1, 1, 1);
+DRIVER_MODULE(if_ae, pci, ae_driver, ae_devclass, 0, 0);
+DRIVER_MODULE(miibus, ae, miibus_driver, miibus_devclass, 0, 0);
+
+/* Register access macros. */
+#define AE_WRITE_4(_sc, reg, val) \
+ bus_space_write_4((_sc)->ae_mem_bt, (_sc)->ae_mem_bh, (reg), (val))
+#define AE_WRITE_2(_sc, reg, val) \
+ bus_space_write_2((_sc)->ae_mem_bt, (_sc)->ae_mem_bh, (reg), (val))
+#define AE_WRITE_1(_sc, reg, val) \
+ bus_space_write_1((_sc)->ae_mem_bt, (_sc)->ae_mem_bh, (reg), (val))
+#define AE_READ_4(_sc, reg) \
+ bus_space_read_4((_sc)->ae_mem_bt, (_sc)->ae_mem_bh, (reg))
+#define AE_READ_2(_sc, reg) \
+ bus_space_read_2((_sc)->ae_mem_bt, (_sc)->ae_mem_bh, (reg))
+#define AE_READ_1(_sc, reg) \
+ bus_space_read_1((_sc)->ae_mem_bt, (_sc)->ae_mem_bh, (reg))
+
+#define AE_PHY_READ(sc, reg) \
+ ae_miibus_readreg(sc->ae_dev, 0, reg)
+#define AE_PHY_WRITE(sc, reg, val) \
+ ae_miibus_writereg(sc->ae_dev, 0, reg, val)
+#define AE_CHECK_EADDR_VALID(eaddr) \
+ ((eaddr[0] == 0 && eaddr[1] == 0) || \
+ (eaddr[0] == 0xffffffff && eaddr[1] == 0xffff))
+#define AE_RXD_VLAN(vtag) \
+ (((vtag) >> 4) | (((vtag) & 0x07) << 13) | (((vtag) & 0x08) << 9))
+#define AE_TXD_VLAN(vtag) \
+ (((vtag) << 4) | (((vtag) >> 13) & 0x07) | (((vtag) >> 9) & 0x08))
+
+/*
+ * ae statistics.
+ */
+#define STATS_ENTRY(node, desc, field) \
+ { node, desc, offsetof(struct ae_stats, field) }
+struct {
+ const char *node;
+ const char *desc;
+ intptr_t offset;
+} ae_stats_tx[] = {
+ STATS_ENTRY("bcast", "broadcast frames", tx_bcast),
+ STATS_ENTRY("mcast", "multicast frames", tx_mcast),
+ STATS_ENTRY("pause", "PAUSE frames", tx_pause),
+ STATS_ENTRY("control", "control frames", tx_ctrl),
+ STATS_ENTRY("defers", "deferrals occuried", tx_defer),
+ STATS_ENTRY("exc_defers", "excessive deferrals occuried", tx_excdefer),
+ STATS_ENTRY("singlecols", "single collisions occuried", tx_singlecol),
+ STATS_ENTRY("multicols", "multiple collisions occuried", tx_multicol),
+ STATS_ENTRY("latecols", "late collisions occuried", tx_latecol),
+ STATS_ENTRY("aborts", "transmit aborts due collisions", tx_abortcol),
+ STATS_ENTRY("underruns", "Tx FIFO underruns", tx_underrun)
+}, ae_stats_rx[] = {
+ STATS_ENTRY("bcast", "broadcast frames", rx_bcast),
+ STATS_ENTRY("mcast", "multicast frames", rx_mcast),
+ STATS_ENTRY("pause", "PAUSE frames", rx_pause),
+ STATS_ENTRY("control", "control frames", rx_ctrl),
+ STATS_ENTRY("crc_errors", "frames with CRC errors", rx_crcerr),
+ STATS_ENTRY("code_errors", "frames with invalid opcode", rx_codeerr),
+ STATS_ENTRY("runt", "runt frames", rx_runt),
+ STATS_ENTRY("frag", "fragmented frames", rx_frag),
+ STATS_ENTRY("align_errors", "frames with alignment errors", rx_align),
+ STATS_ENTRY("truncated", "frames truncated due to Rx FIFO inderrun",
+ rx_trunc)
+};
+#define AE_STATS_RX_LEN (sizeof(ae_stats_rx) / sizeof(*ae_stats_rx))
+#define AE_STATS_TX_LEN (sizeof(ae_stats_tx) / sizeof(*ae_stats_tx))
+
+static void
+ae_stop(struct ae_softc *sc)
+{
+ struct ifnet *ifp = &sc->arpcom.ac_if;
+ int i;
+
+ ASSERT_SERIALIZED(ifp->if_serializer);
+
+ ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
+ ifp->if_timer = 0;
+
+ sc->ae_flags &= ~AE_FLAG_LINK;
+ callout_stop(&sc->ae_tick_ch);
+
+ /*
+ * Clear and disable interrupts.
+ */
+ AE_WRITE_4(sc, AE_IMR_REG, 0);
+ AE_WRITE_4(sc, AE_ISR_REG, 0xffffffff);
+
+ /*
+ * Stop Rx/Tx MACs.
+ */
+ ae_stop_txmac(sc);
+ ae_stop_rxmac(sc);
+
+ /*
+ * Stop DMA engines.
+ */
+ AE_WRITE_1(sc, AE_DMAREAD_REG, ~AE_DMAREAD_EN);
+ AE_WRITE_1(sc, AE_DMAWRITE_REG, ~AE_DMAWRITE_EN);
+
+ /*
+ * Wait for everything to enter idle state.
+ */
+ for (i = 0; i < AE_IDLE_TIMEOUT; i++) {
+ if (AE_READ_4(sc, AE_IDLE_REG) == 0)
+ break;
+ DELAY(100);
+ }
+ if (i == AE_IDLE_TIMEOUT)
+ if_printf(ifp, "could not enter idle state in stop.\n");
+}
+
+static void
+ae_stop_rxmac(struct ae_softc *sc)
+{
+ uint32_t val;
+ int i;
+
+ /*
+ * Stop Rx MAC engine.
+ */
+ val = AE_READ_4(sc, AE_MAC_REG);
+ if ((val & AE_MAC_RX_EN) != 0) {
+ val &= ~AE_MAC_RX_EN;
+ AE_WRITE_4(sc, AE_MAC_REG, val);
+ }
+
+ /*
+ * Stop Rx DMA engine.
+ */
+ if (AE_READ_1(sc, AE_DMAWRITE_REG) == AE_DMAWRITE_EN)
+ AE_WRITE_1(sc, AE_DMAWRITE_REG, 0);
+
+ /*
+ * Wait for IDLE state.
+ */
+ for (i = 0; i < AE_IDLE_TIMEOUT; i--) {
+ val = AE_READ_4(sc, AE_IDLE_REG);
+ if ((val & (AE_IDLE_RXMAC | AE_IDLE_DMAWRITE)) == 0)
+ break;
+ DELAY(100);
+ }
+ if (i == AE_IDLE_TIMEOUT) {
+ if_printf(&sc->arpcom.ac_if,
+ "timed out while stopping Rx MAC.\n");
+ }
+}
+
+static void
+ae_stop_txmac(struct ae_softc *sc)
+{
+ uint32_t val;
+ int i;
+
+ /*
+ * Stop Tx MAC engine.
+ */
+ val = AE_READ_4(sc, AE_MAC_REG);
+ if ((val & AE_MAC_TX_EN) != 0) {
+ val &= ~AE_MAC_TX_EN;
+ AE_WRITE_4(sc, AE_MAC_REG, val);
+ }
+
+ /*
+ * Stop Tx DMA engine.
+ */
+ if (AE_READ_1(sc, AE_DMAREAD_REG) == AE_DMAREAD_EN)
+ AE_WRITE_1(sc, AE_DMAREAD_REG, 0);
+
+ /*
+ * Wait for IDLE state.
+ */
+ for (i = 0; i < AE_IDLE_TIMEOUT; i--) {
+ val = AE_READ_4(sc, AE_IDLE_REG);
+ if ((val & (AE_IDLE_TXMAC | AE_IDLE_DMAREAD)) == 0)
+ break;
+ DELAY(100);
+ }
+ if (i == AE_IDLE_TIMEOUT) {
+ if_printf(&sc->arpcom.ac_if,
+ "timed out while stopping Tx MAC.\n");
+ }
+}
+
+/*
+ * Callback from MII layer when media changes.
+ */
+static void
+ae_miibus_statchg(device_t dev)
+{
+ struct ae_softc *sc = device_get_softc(dev);
+ struct ifnet *ifp = &sc->arpcom.ac_if;
+ struct mii_data *mii;
+ uint32_t val;
+
+ ASSERT_SERIALIZED(ifp->if_serializer);
+
+ if ((ifp->if_flags & IFF_RUNNING) == 0)
+ return;
+
+ mii = device_get_softc(sc->ae_miibus);
+ sc->ae_flags &= ~AE_FLAG_LINK;
+ if ((mii->mii_media_status & IFM_AVALID) != 0) {
+ switch (IFM_SUBTYPE(mii->mii_media_active)) {
+ case IFM_10_T:
+ case IFM_100_TX:
+ sc->ae_flags |= AE_FLAG_LINK;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* Stop Rx/Tx MACs. */
+ ae_stop_rxmac(sc);
+ ae_stop_txmac(sc);
+
+ /* Program MACs with resolved speed/duplex/flow-control. */
+ if ((sc->ae_flags & AE_FLAG_LINK) != 0) {
+ ae_mac_config(sc);
+
+ /*
+ * Restart DMA engines.
+ */
+ AE_WRITE_1(sc, AE_DMAREAD_REG, AE_DMAREAD_EN);
+ AE_WRITE_1(sc, AE_DMAWRITE_REG, AE_DMAWRITE_EN);
+
+ /*
+ * Enable Rx and Tx MACs.
+ */
+ val = AE_READ_4(sc, AE_MAC_REG);
+ val |= AE_MAC_TX_EN | AE_MAC_RX_EN;
+ AE_WRITE_4(sc, AE_MAC_REG, val);
+ }
+}
+
+static void
+ae_sysctl_node(struct ae_softc *sc)
+{
+ struct sysctl_ctx_list *ctx;
+ struct sysctl_oid *root, *stats, *stats_rx, *stats_tx;
+ struct ae_stats *ae_stats;
+ unsigned int i;
+
+ ae_stats = &sc->stats;
+ sysctl_ctx_init(&sc->ae_sysctl_ctx);
+ sc->ae_sysctl_tree = SYSCTL_ADD_NODE(&sc->ae_sysctl_ctx,
+ SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO,
+ device_get_nameunit(sc->ae_dev),
+ CTLFLAG_RD, 0, "");
+ if (sc->ae_sysctl_tree == NULL) {
+ device_printf(sc->ae_dev, "can't add sysctl node\n");
+ return;
+ }
+ ctx = &sc->ae_sysctl_ctx;
+ root = sc->ae_sysctl_tree;
+
+ stats = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(root), OID_AUTO, "stats",
+ CTLFLAG_RD, NULL, "ae statistics");
+ if (stats == NULL) {
+ device_printf(sc->ae_dev, "can't add stats sysctl node\n");
+ return;
+ }
+
+ /*
+ * Receiver statistcics.
+ */
+ stats_rx = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "rx",
+ CTLFLAG_RD, NULL, "Rx MAC statistics");
+ if (stats_rx != NULL) {
+ for (i = 0; i < AE_STATS_RX_LEN; i++) {
+ SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(stats_rx),
+ OID_AUTO, ae_stats_rx[i].node, CTLFLAG_RD,
+ (char *)ae_stats + ae_stats_rx[i].offset, 0,
+ ae_stats_rx[i].desc);
+ }
+ }
+
+ /*
+ * Transmitter statistcics.
+ */
+ stats_tx = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "tx",
+ CTLFLAG_RD, NULL, "Tx MAC statistics");
+ if (stats_tx != NULL) {
+ for (i = 0; i < AE_STATS_TX_LEN; i++) {
+ SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(stats_tx),
+ OID_AUTO, ae_stats_tx[i].node, CTLFLAG_RD,
+ (char *)ae_stats + ae_stats_tx[i].offset, 0,
+ ae_stats_tx[i].desc);
+ }
+ }
+}
+
+static int
+ae_miibus_readreg(device_t dev, int phy, int reg)
+{
+ struct ae_softc *sc = device_get_softc(dev);
+ uint32_t val;
+ int i;
+
+ /*
+ * Locking is done in upper layers.
+ */
+ if (phy != sc->ae_phyaddr)
+ return (0);
+ val = ((reg << AE_MDIO_REGADDR_SHIFT) & AE_MDIO_REGADDR_MASK) |
+ AE_MDIO_START | AE_MDIO_READ | AE_MDIO_SUP_PREAMBLE |
+ ((AE_MDIO_CLK_25_4 << AE_MDIO_CLK_SHIFT) & AE_MDIO_CLK_MASK);
+ AE_WRITE_4(sc, AE_MDIO_REG, val);
+
+ /*
+ * Wait for operation to complete.
+ */
+ for (i = 0; i < AE_MDIO_TIMEOUT; i++) {
+ DELAY(2);
+ val = AE_READ_4(sc, AE_MDIO_REG);
+ if ((val & (AE_MDIO_START | AE_MDIO_BUSY)) == 0)
+ break;
+ }
+ if (i == AE_MDIO_TIMEOUT) {
+ device_printf(sc->ae_dev, "phy read timeout: %d.\n", reg);
+ return (0);
+ }
+ return ((val << AE_MDIO_DATA_SHIFT) & AE_MDIO_DATA_MASK);
+}
+
+static int
+ae_miibus_writereg(device_t dev, int phy, int reg, int val)
+{
+ struct ae_softc *sc = device_get_softc(dev);
+ uint32_t aereg;
+ int i;
+
+ /*
+ * Locking is done in upper layers.
+ */
+ if (phy != sc->ae_phyaddr)
+ return (0);
+ aereg = ((reg << AE_MDIO_REGADDR_SHIFT) & AE_MDIO_REGADDR_MASK) |
+ AE_MDIO_START | AE_MDIO_SUP_PREAMBLE |
+ ((AE_MDIO_CLK_25_4 << AE_MDIO_CLK_SHIFT) & AE_MDIO_CLK_MASK) |
+ ((val << AE_MDIO_DATA_SHIFT) & AE_MDIO_DATA_MASK);
+ AE_WRITE_4(sc, AE_MDIO_REG, aereg);
+
+ /*
+ * Wait for operation to complete.
+ */
+ for (i = 0; i < AE_MDIO_TIMEOUT; i++) {
+ DELAY(2);
+ aereg = AE_READ_4(sc, AE_MDIO_REG);
+ if ((aereg & (AE_MDIO_START | AE_MDIO_BUSY)) == 0)
+ break;
+ }
+ if (i == AE_MDIO_TIMEOUT)
+ device_printf(sc->ae_dev, "phy write timeout: %d.\n", reg);
+ return (0);
+}
+
+static int
+ae_probe(device_t dev)
+{
+ uint16_t vendor, devid;
+ const struct ae_dev *sp;
+
+ vendor = pci_get_vendor(dev);
+ devid = pci_get_device(dev);
+ for (sp = ae_devs; sp->ae_name != NULL; sp++) {
+ if (vendor == sp->ae_vendorid &&
+ devid == sp->ae_deviceid) {
+ device_set_desc(dev, sp->ae_name);
+ return (0);
+ }
+ }
+ return (ENXIO);
+}
+
+static int
+ae_dma_alloc(struct ae_softc *sc)
+{
+ bus_addr_t busaddr;
+ int error;
+
+ /*
+ * Create parent DMA tag.
+ */
+ error = bus_dma_tag_create(NULL, 1, 0,
+ BUS_SPACE_MAXADDR_32BIT,
+ BUS_SPACE_MAXADDR,
+ NULL, NULL,
+ BUS_SPACE_MAXSIZE_32BIT,
+ 0,
+ BUS_SPACE_MAXSIZE_32BIT,
+ 0, &sc->dma_parent_tag);
+ if (error) {
+ device_printf(sc->ae_dev, "could not creare parent DMA tag.\n");
+ return (error);
+ }
+
+ /*
+ * Create DMA stuffs for TxD.
+ */
+ sc->txd_base = bus_dmamem_coherent_any(sc->dma_parent_tag, 4,
+ AE_TXD_BUFSIZE_DEFAULT, BUS_DMA_WAITOK | BUS_DMA_ZERO,
+ &sc->dma_txd_tag, &sc->dma_txd_map,
+ &sc->dma_txd_busaddr);
+ if (sc->txd_base == NULL) {
+ device_printf(sc->ae_dev, "could not creare TxD DMA stuffs.\n");
+ return ENOMEM;
+ }
+
+ /*
+ * Create DMA stuffs for TxS.
+ */
+ sc->txs_base = bus_dmamem_coherent_any(sc->dma_parent_tag, 4,
+ AE_TXS_COUNT_DEFAULT * 4, BUS_DMA_WAITOK | BUS_DMA_ZERO,
+ &sc->dma_txs_tag, &sc->dma_txs_map,
+ &sc->dma_txs_busaddr);
+ if (sc->txs_base == NULL) {
+ device_printf(sc->ae_dev, "could not creare TxS DMA stuffs.\n");
+ return ENOMEM;
+ }
+
+ /*
+ * Create DMA stuffs for RxD.
+ */
+ sc->rxd_base_dma = bus_dmamem_coherent_any(sc->dma_parent_tag, 128,
+ AE_RXD_COUNT_DEFAULT * 1536 + 120,
+ BUS_DMA_WAITOK | BUS_DMA_ZERO,
+ &sc->dma_rxd_tag, &sc->dma_rxd_map,
+ &busaddr);
+ if (sc->rxd_base_dma == NULL) {
+ device_printf(sc->ae_dev, "could not creare RxD DMA stuffs.\n");
+ return ENOMEM;
+ }
+ sc->dma_rxd_busaddr = busaddr + 120;
+ sc->rxd_base = (struct ae_rxd *)(sc->rxd_base_dma + 120);
+
+ return (0);
+}
+
+static void
+ae_mac_config(struct ae_softc *sc)
+{
+ struct mii_data *mii;
+ uint32_t val;
+
+ mii = device_get_softc(sc->ae_miibus);
+ val = AE_READ_4(sc, AE_MAC_REG);
+ val &= ~AE_MAC_FULL_DUPLEX;
+ /* XXX disable AE_MAC_TX_FLOW_EN? */
+ if ((IFM_OPTIONS(mii->mii_media_active) & IFM_FDX) != 0)
+ val |= AE_MAC_FULL_DUPLEX;
+ AE_WRITE_4(sc, AE_MAC_REG, val);
+}
+
+static int
+ae_rxeof(struct ae_softc *sc, struct ae_rxd *rxd)
+{
+ struct ifnet *ifp = &sc->arpcom.ac_if;
+ struct mbuf *m;
+ unsigned int size;
+ uint16_t flags;
+
+ flags = le16toh(rxd->flags);
+#ifdef AE_DEBUG
+ if_printf(ifp, "Rx interrupt occuried.\n");
+#endif
+ size = le16toh(rxd->len) - ETHER_CRC_LEN;
+ if (size < (ETHER_MIN_LEN - ETHER_CRC_LEN -
+ sizeof(struct ether_vlan_header))) {
+ if_printf(ifp, "Runt frame received.");
+ return (EIO);
+ }
+
+ m = m_devget(&rxd->data[0], size, ETHER_ALIGN, ifp, NULL);
+ if (m == NULL)
+ return (ENOBUFS);
+
+ if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) &&
+ (flags & AE_RXD_HAS_VLAN)) {
+ m->m_pkthdr.ether_vlantag = AE_RXD_VLAN(le16toh(rxd->vlan));
+ m->m_flags |= M_VLANTAG;
+ }
+ ifp->if_input(ifp, m);
+
+ return (0);
+}
+
+static void
+ae_rx_intr(struct ae_softc *sc)
+{
+ struct ifnet *ifp = &sc->arpcom.ac_if;
+ struct ae_rxd *rxd;
+ uint16_t flags;
+ int error;
+
+ /*
+ * Syncronize DMA buffers.
+ */
+ bus_dmamap_sync(sc->dma_rxd_tag, sc->dma_rxd_map,
+ BUS_DMASYNC_POSTREAD);
+ for (;;) {
+ rxd = (struct ae_rxd *)(sc->rxd_base + sc->rxd_cur);
+
+ flags = le16toh(rxd->flags);
+ if ((flags & AE_RXD_UPDATE) == 0)
+ break;
+ rxd->flags = htole16(flags & ~AE_RXD_UPDATE);
+
+ /* Update stats. */
+ ae_update_stats_rx(flags, &sc->stats);
+
+ /*
+ * Update position index.
+ */
+ sc->rxd_cur = (sc->rxd_cur + 1) % AE_RXD_COUNT_DEFAULT;
+ if ((flags & AE_RXD_SUCCESS) == 0) {
+ ifp->if_ierrors++;
+ continue;
+ }
+
+ error = ae_rxeof(sc, rxd);
+ if (error)
+ ifp->if_ierrors++;
+ else
+ ifp->if_ipackets++;
+ }
+
+ /* Update Rx index. */
+ AE_WRITE_2(sc, AE_MB_RXD_IDX_REG, sc->rxd_cur);
+}
+
+static void
+ae_tx_intr(struct ae_softc *sc)
+{
+ struct ifnet *ifp = &sc->arpcom.ac_if;
+ struct ae_txd *txd;
+ struct ae_txs *txs;
+ uint16_t flags;
+
+ /*
+ * Syncronize DMA buffers.
+ */
+ bus_dmamap_sync(sc->dma_txd_tag, sc->dma_txd_map, BUS_DMASYNC_POSTREAD);
+ bus_dmamap_sync(sc->dma_txs_tag, sc->dma_txs_map, BUS_DMASYNC_POSTREAD);
+
+ for (;;) {
+ txs = sc->txs_base + sc->txs_ack;
+
+ flags = le16toh(txs->flags);
+ if ((flags & AE_TXS_UPDATE) == 0)
+ break;
+ txs->flags = htole16(flags & ~AE_TXS_UPDATE);
+
+ /* Update stats. */
+ ae_update_stats_tx(flags, &sc->stats);
+
+ /*
+ * Update TxS position.
+ */
+ sc->txs_ack = (sc->txs_ack + 1) % AE_TXS_COUNT_DEFAULT;
+ sc->ae_flags |= AE_FLAG_TXAVAIL;
+ txd = (struct ae_txd *)(sc->txd_base + sc->txd_ack);
+ if (txs->len != txd->len) {
+ device_printf(sc->ae_dev, "Size mismatch: "
+ "TxS:%d TxD:%d\n",
+ le16toh(txs->len), le16toh(txd->len));
+ }
+
+ /*
+ * Move txd ack and align on 4-byte boundary.
+ */
+ sc->txd_ack = ((sc->txd_ack + le16toh(txd->len) + 4 + 3) & ~3) %
+ AE_TXD_BUFSIZE_DEFAULT;
+ if ((flags & AE_TXS_SUCCESS) != 0)
+ ifp->if_opackets++;
+ else
+ ifp->if_oerrors++;
+ sc->tx_inproc--;
+ }
+
+ if (sc->tx_inproc < 0) {
+ /* XXX assert? */
+ if_printf(ifp, "Received stray Tx interrupt(s).\n");
+ sc->tx_inproc = 0;
+ }
+ if (sc->tx_inproc == 0)
+ ifp->if_timer = 0; /* Unarm watchdog. */
+ if (sc->ae_flags & AE_FLAG_TXAVAIL) {
+ ifp->if_flags &= ~IFF_OACTIVE;
+ if (!ifq_is_empty(&ifp->if_snd))
+#ifdef foo
+ ae_intr(sc);
+#else
+ if_devstart(ifp);
+#endif
+ }
+
+ /*
+ * Syncronize DMA buffers.
+ */
+ bus_dmamap_sync(sc->dma_txd_tag, sc->dma_txd_map, BUS_DMASYNC_PREWRITE);
+ bus_dmamap_sync(sc->dma_txs_tag, sc->dma_txs_map, BUS_DMASYNC_PREWRITE);
+}
+
+static void
+ae_intr(void *xsc)
+{
+ struct ae_softc *sc = xsc;
+ struct ifnet *ifp = &sc->arpcom.ac_if;
+ uint32_t val;
+
+ ASSERT_SERIALIZED(ifp->if_serializer);
+
+ val = AE_READ_4(sc, AE_ISR_REG);
+ if (val == 0 || (val & AE_IMR_DEFAULT) == 0)
+ return;
+
+#ifdef foo
+ AE_WRITE_4(sc, AE_ISR_REG, AE_ISR_DISABLE);
+#endif
+
+ /* Read interrupt status. */
+ val = AE_READ_4(sc, AE_ISR_REG);
+
+ /* Clear interrupts and disable them. */
+ AE_WRITE_4(sc, AE_ISR_REG, val | AE_ISR_DISABLE);
+
+ if (ifp->if_flags & IFF_RUNNING) {
+ if (val & (AE_ISR_DMAR_TIMEOUT |
+ AE_ISR_DMAW_TIMEOUT |
+ AE_ISR_PHY_LINKDOWN)) {
+ ae_init(sc);
+ }
+ if (val & AE_ISR_TX_EVENT)
+ ae_tx_intr(sc);
+ if (val & AE_ISR_RX_EVENT)
+ ae_rx_intr(sc);
+ }
+
+ /* Re-enable interrupts. */
+ AE_WRITE_4(sc, AE_ISR_REG, 0);
+}
+
+static void
+ae_init(void *xsc)
+{
+ struct ae_softc *sc = xsc;
+ struct ifnet *ifp = &sc->arpcom.ac_if;
+ struct mii_data *mii;
+ uint8_t eaddr[ETHER_ADDR_LEN];
+ uint32_t val;
+ bus_addr_t addr;
+
+ ASSERT_SERIALIZED(ifp->if_serializer);
+
+ mii = device_get_softc(sc->ae_miibus);
+ ae_stop(sc);
+ ae_reset(sc);
+ ae_pcie_init(sc);
+ ae_powersave_disable(sc);
+
+ /*
+ * Clear and disable interrupts.
+ */
+ AE_WRITE_4(sc, AE_ISR_REG, 0xffffffff);
+
+ /*
+ * Set the MAC address.
+ */
+ bcopy(IF_LLADDR(ifp), eaddr, ETHER_ADDR_LEN);
+ val = eaddr[2] << 24 | eaddr[3] << 16 | eaddr[4] << 8 | eaddr[5];
+ AE_WRITE_4(sc, AE_EADDR0_REG, val);
+ val = eaddr[0] << 8 | eaddr[1];
+ AE_WRITE_4(sc, AE_EADDR1_REG, val);
+
+ /*
+ * Set ring buffers base addresses.
+ */
+ addr = sc->dma_rxd_busaddr;
+ AE_WRITE_4(sc, AE_DESC_ADDR_HI_REG, BUS_ADDR_HI(addr));
+ AE_WRITE_4(sc, AE_RXD_ADDR_LO_REG, BUS_ADDR_LO(addr));
+ addr = sc->dma_txd_busaddr;
+ AE_WRITE_4(sc, AE_TXD_ADDR_LO_REG, BUS_ADDR_LO(addr));
+ addr = sc->dma_txs_busaddr;
+ AE_WRITE_4(sc, AE_TXS_ADDR_LO_REG, BUS_ADDR_LO(addr));
+
+ /*
+ * Configure ring buffers sizes.
+ */
+ AE_WRITE_2(sc, AE_RXD_COUNT_REG, AE_RXD_COUNT_DEFAULT);
+ AE_WRITE_2(sc, AE_TXD_BUFSIZE_REG, AE_TXD_BUFSIZE_DEFAULT / 4);
+ AE_WRITE_2(sc, AE_TXS_COUNT_REG, AE_TXS_COUNT_DEFAULT);
+
+ /*
+ * Configure interframe gap parameters.
+ */
+ val = ((AE_IFG_TXIPG_DEFAULT << AE_IFG_TXIPG_SHIFT) &
+ AE_IFG_TXIPG_MASK) |
+ ((AE_IFG_RXIPG_DEFAULT << AE_IFG_RXIPG_SHIFT) &
+ AE_IFG_RXIPG_MASK) |
+ ((AE_IFG_IPGR1_DEFAULT << AE_IFG_IPGR1_SHIFT) &
+ AE_IFG_IPGR1_MASK) |
+ ((AE_IFG_IPGR2_DEFAULT << AE_IFG_IPGR2_SHIFT) &
+ AE_IFG_IPGR2_MASK);
+ AE_WRITE_4(sc, AE_IFG_REG, val);
+
+ /*
+ * Configure half-duplex operation.
+ */
+ val = ((AE_HDPX_LCOL_DEFAULT << AE_HDPX_LCOL_SHIFT) &
+ AE_HDPX_LCOL_MASK) |
+ ((AE_HDPX_RETRY_DEFAULT << AE_HDPX_RETRY_SHIFT) &
+ AE_HDPX_RETRY_MASK) |
+ ((AE_HDPX_ABEBT_DEFAULT << AE_HDPX_ABEBT_SHIFT) &
+ AE_HDPX_ABEBT_MASK) |
+ ((AE_HDPX_JAMIPG_DEFAULT << AE_HDPX_JAMIPG_SHIFT) &
+ AE_HDPX_JAMIPG_MASK) | AE_HDPX_EXC_EN;
+ AE_WRITE_4(sc, AE_HDPX_REG, val);
+
+ /*
+ * Configure interrupt moderate timer.
+ */
+ AE_WRITE_2(sc, AE_IMT_REG, AE_IMT_DEFAULT);
+ val = AE_READ_4(sc, AE_MASTER_REG);
+ val |= AE_MASTER_IMT_EN;
+ AE_WRITE_4(sc, AE_MASTER_REG, val);
+
+ /*
+ * Configure interrupt clearing timer.
+ */
+ AE_WRITE_2(sc, AE_ICT_REG, AE_ICT_DEFAULT);
+
+ /*
+ * Configure MTU.
+ */
+ val = ifp->if_mtu + ETHER_HDR_LEN + sizeof(struct ether_vlan_header) +
+ ETHER_CRC_LEN;
+ AE_WRITE_2(sc, AE_MTU_REG, val);
+
+ /*
+ * Configure cut-through threshold.
+ */
+ AE_WRITE_4(sc, AE_CUT_THRESH_REG, AE_CUT_THRESH_DEFAULT);
+
+ /*
+ * Configure flow control.
+ */
+ AE_WRITE_2(sc, AE_FLOW_THRESH_HI_REG, (AE_RXD_COUNT_DEFAULT / 8) * 7);
+ AE_WRITE_2(sc, AE_FLOW_THRESH_LO_REG, (AE_RXD_COUNT_MIN / 8) >
+ (AE_RXD_COUNT_DEFAULT / 12) ? (AE_RXD_COUNT_MIN / 8) :
+ (AE_RXD_COUNT_DEFAULT / 12));
+
+ /*
+ * Init mailboxes.
+ */
+ sc->txd_cur = sc->rxd_cur = 0;
+ sc->txd_cur = sc->rxd_cur = 0;
+ sc->txs_ack = sc->txd_ack = 0;
+ sc->rxd_cur = 0;
+ AE_WRITE_2(sc, AE_MB_TXD_IDX_REG, sc->txd_cur);
+ AE_WRITE_2(sc, AE_MB_RXD_IDX_REG, sc->rxd_cur);
+ sc->tx_inproc = 0;
+ sc->ae_flags |= AE_FLAG_TXAVAIL; /* Free Tx's available. */
+
+ /*
+ * Enable DMA.
+ */
+ AE_WRITE_1(sc, AE_DMAREAD_REG, AE_DMAREAD_EN);
+ AE_WRITE_1(sc, AE_DMAWRITE_REG, AE_DMAWRITE_EN);
+
+ /*
+ * Check if everything is OK.
+ */
+ val = AE_READ_4(sc, AE_ISR_REG);
+ if ((val & AE_ISR_PHY_LINKDOWN) != 0) {
+ device_printf(sc->ae_dev, "Initialization failed.\n");
+ return;
+ }
+
+ /*
+ * Clear interrupt status.
+ */
+ AE_WRITE_4(sc, AE_ISR_REG, 0x3fffffff);
+ AE_WRITE_4(sc, AE_ISR_REG, 0x0);
+
+ /*
+ * Enable interrupts.
+ */
+ val = AE_READ_4(sc, AE_MASTER_REG);
+ AE_WRITE_4(sc, AE_MASTER_REG, val | AE_MASTER_MANUAL_INT);
+ AE_WRITE_4(sc, AE_IMR_REG, AE_IMR_DEFAULT);
+
+ /*
+ * Disable WOL.
+ */
+ AE_WRITE_4(sc, AE_WOL_REG, 0);
+
+ /*
+ * Configure MAC.
+ */
+ val = AE_MAC_TX_CRC_EN | AE_MAC_TX_AUTOPAD |
+ AE_MAC_FULL_DUPLEX | AE_MAC_CLK_PHY |
+ AE_MAC_TX_FLOW_EN | AE_MAC_RX_FLOW_EN |
+ ((AE_HALFBUF_DEFAULT << AE_HALFBUF_SHIFT) & AE_HALFBUF_MASK) |
+ ((AE_MAC_PREAMBLE_DEFAULT << AE_MAC_PREAMBLE_SHIFT) &
+ AE_MAC_PREAMBLE_MASK);
+ AE_WRITE_4(sc, AE_MAC_REG, val);
+
+ /*
+ * Configure Rx MAC.
+ */
+ ae_rxfilter(sc);
+ ae_rxvlan(sc);
+
+ /*
+ * Enable Tx/Rx.
+ */
+ val = AE_READ_4(sc, AE_MAC_REG);
+ AE_WRITE_4(sc, AE_MAC_REG, val | AE_MAC_TX_EN | AE_MAC_RX_EN);
+
+ sc->ae_flags &= ~AE_FLAG_LINK;
+ mii_mediachg(mii); /* Switch to the current media. */
+
+ callout_reset(&sc->ae_tick_ch, hz, ae_tick, sc);
+ ifp->if_flags |= IFF_RUNNING;
+ ifp->if_flags &= ~IFF_OACTIVE;
+}
+
+static void
+ae_watchdog(struct ifnet *ifp)
+{
+ struct ae_softc *sc = ifp->if_softc;
+
+ ASSERT_SERIALIZED(ifp->if_serializer);
+
+ if ((sc->ae_flags & AE_FLAG_LINK) == 0)
+ if_printf(ifp, "watchdog timeout (missed link).\n");
+ else
+ if_printf(ifp, "watchdog timeout - resetting.\n");
+ ifp->if_oerrors++;
+
+ ae_init(sc);
+ if (!ifq_is_empty(&ifp->if_snd))
+ if_devstart(ifp);
+}
+
+static void
+ae_tick(void *xsc)
+{
+ struct ae_softc *sc = xsc;
+ struct ifnet *ifp = &sc->arpcom.ac_if;
+ struct mii_data *mii = device_get_softc(sc->ae_miibus);
+
+ lwkt_serialize_enter(ifp->if_serializer);
+ mii_tick(mii);
+ callout_reset(&sc->ae_tick_ch, hz, ae_tick, sc);
+ lwkt_serialize_exit(ifp->if_serializer);
+}
+
+static void
+ae_rxvlan(struct ae_softc *sc)
+{
+ struct ifnet *ifp = &sc->arpcom.ac_if;
+ uint32_t val;
+
+ val = AE_READ_4(sc, AE_MAC_REG);
+ val &= ~AE_MAC_RMVLAN_EN;
+ if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
+ val |= AE_MAC_RMVLAN_EN;
+ AE_WRITE_4(sc, AE_MAC_REG, val);
+}
+
+static void
+ae_rxfilter(struct ae_softc *sc)
+{
+ struct ifnet *ifp = &sc->arpcom.ac_if;
+ struct ifmultiaddr *ifma;
+ uint32_t crc;
+ uint32_t mchash[2];
+ uint32_t rxcfg;
+
+ rxcfg = AE_READ_4(sc, AE_MAC_REG);
+ rxcfg &= ~(AE_MAC_MCAST_EN | AE_MAC_BCAST_EN | AE_MAC_PROMISC_EN);
+ rxcfg |= AE_MAC_BCAST_EN;
+ if (ifp->if_flags & IFF_PROMISC)
+ rxcfg |= AE_MAC_PROMISC_EN;
+ if (ifp->if_flags & IFF_ALLMULTI)
+ rxcfg |= AE_MAC_MCAST_EN;
+
+ /*
+ * Wipe old settings.
+ */
+ AE_WRITE_4(sc, AE_REG_MHT0, 0);
+ AE_WRITE_4(sc, AE_REG_MHT1, 0);
+ if (ifp->if_flags & (IFF_PROMISC | IFF_ALLMULTI)) {
+ AE_WRITE_4(sc, AE_REG_MHT0, 0xffffffff);
+ AE_WRITE_4(sc, AE_REG_MHT1, 0xffffffff);
+ AE_WRITE_4(sc, AE_MAC_REG, rxcfg);
+ return;
+ }
+
+ /*
+ * Load multicast tables.
+ */
+ bzero(mchash, sizeof(mchash));
+ LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+ if (ifma->ifma_addr->sa_family != AF_LINK)
+ continue;
+ crc = ether_crc32_le(LLADDR((struct sockaddr_dl *)
+ ifma->ifma_addr), ETHER_ADDR_LEN);
+ mchash[crc >> 31] |= 1 << ((crc >> 26) & 0x1f);
+ }
+ AE_WRITE_4(sc, AE_REG_MHT0, mchash[0]);
+ AE_WRITE_4(sc, AE_REG_MHT1, mchash[1]);
+ AE_WRITE_4(sc, AE_MAC_REG, rxcfg);
+}
+
+static unsigned int
+ae_tx_avail_size(struct ae_softc *sc)
+{
+ unsigned int avail;
+
+ if (sc->txd_cur >= sc->txd_ack)
+ avail = AE_TXD_BUFSIZE_DEFAULT - (sc->txd_cur - sc->txd_ack);
+ else
+ avail = sc->txd_ack - sc->txd_cur;
+ return (avail - 4); /* 4-byte header. */
+}
+
+static int
+ae_encap(struct ae_softc *sc, struct mbuf **m_head)
+{
+ struct mbuf *m0;
+ struct ae_txd *hdr;
+ unsigned int to_end;
+ uint16_t len;
+
+ M_ASSERTPKTHDR((*m_head));
+ m0 = *m_head;
+ len = m0->m_pkthdr.len;
+ if ((sc->ae_flags & AE_FLAG_TXAVAIL) == 0 ||
+ ae_tx_avail_size(sc) < len) {
+#ifdef AE_DEBUG
+ if_printf(sc->ifp, "No free Tx available.\n");
+#endif
+ return ENOBUFS;
+ }
+
+ hdr = (struct ae_txd *)(sc->txd_base + sc->txd_cur);
+ bzero(hdr, sizeof(*hdr));
+
+ /* Header size. */
+ sc->txd_cur = (sc->txd_cur + 4) % AE_TXD_BUFSIZE_DEFAULT;
+
+ /* Space available to the end of the ring */
+ to_end = AE_TXD_BUFSIZE_DEFAULT - sc->txd_cur;
+
+ if (to_end >= len) {
+ m_copydata(m0, 0, len, (caddr_t)(sc->txd_base + sc->txd_cur));
+ } else {
+ m_copydata(m0, 0, to_end, (caddr_t)(sc->txd_base +
+ sc->txd_cur));
+ m_copydata(m0, to_end, len - to_end, (caddr_t)sc->txd_base);
+ }
+
+ /*
+ * Set TxD flags and parameters.
+ */
+ if ((m0->m_flags & M_VLANTAG) != 0) {
+ hdr->vlan = htole16(AE_TXD_VLAN(m0->m_pkthdr.ether_vlantag));
+ hdr->len = htole16(len | AE_TXD_INSERT_VTAG);
+ } else {
+ hdr->len = htole16(len);
+ }
+
+ /*
+ * Set current TxD position and round up to a 4-byte boundary.
+ */
+ sc->txd_cur = ((sc->txd_cur + len + 3) & ~3) % AE_TXD_BUFSIZE_DEFAULT;
+ if (sc->txd_cur == sc->txd_ack)
+ sc->ae_flags &= ~AE_FLAG_TXAVAIL;
+#ifdef AE_DEBUG
+ if_printf(sc->ifp, "New txd_cur = %d.\n", sc->txd_cur);
+#endif
+
+ /*
+ * Update TxS position and check if there are empty TxS available.
+ */
+ sc->txs_base[sc->txs_cur].flags &= ~htole16(AE_TXS_UPDATE);
+ sc->txs_cur = (sc->txs_cur + 1) % AE_TXS_COUNT_DEFAULT;
+ if (sc->txs_cur == sc->txs_ack)
+ sc->ae_flags &= ~AE_FLAG_TXAVAIL;
+
+ /*
+ * Synchronize DMA memory.
+ */
+ bus_dmamap_sync(sc->dma_txd_tag, sc->dma_txd_map, BUS_DMASYNC_PREWRITE);
+ bus_dmamap_sync(sc->dma_txs_tag, sc->dma_txs_map, BUS_DMASYNC_PREWRITE);
+
+ return (0);
+}
+
+static void
+ae_start(struct ifnet *ifp)
+{
+ struct ae_softc *sc = ifp->if_softc;
+ int error, trans;
+
+ ASSERT_SERIALIZED(ifp->if_serializer);
+
+#ifdef AE_DEBUG
+ if_printf(ifp, "Start called.\n");
+#endif
+ if ((sc->ae_flags & AE_FLAG_LINK) == 0) {
+ ifq_purge(&ifp->if_snd);
+ return;
+ }
+ if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)
+ return;
+
+ trans = 0;
+ while (!ifq_is_empty(&ifp->if_snd)) {
+ struct mbuf *m0;
+
+ m0 = ifq_dequeue(&ifp->if_snd, NULL);
+ if (m0 == NULL)
+ break; /* Nothing to do. */
+
+ error = ae_encap(sc, &m0);
+ if (error != 0) {
+ if (m0 != NULL) {
+ ifq_prepend(&ifp->if_snd, m0);
+ ifp->if_flags |= IFF_OACTIVE;
+#ifdef AE_DEBUG
+ if_printf(ifp, "Setting OACTIVE.\n");
+#endif
+ }
+ break;
+ }
+ trans = 1;
+ sc->tx_inproc++;
+
+ /* Bounce a copy of the frame to BPF. */
+ ETHER_BPF_MTAP(ifp, m0);
+ m_freem(m0);
+ }
+ if (trans) { /* Something was dequeued. */
+ AE_WRITE_2(sc, AE_MB_TXD_IDX_REG, sc->txd_cur / 4);
+ ifp->if_timer = AE_TX_TIMEOUT; /* Load watchdog. */
+#ifdef AE_DEBUG
+ if_printf(ifp, "%d packets dequeued.\n", count);
+ if_printf(ifp, "Tx pos now is %d.\n", sc->txd_cur);
+#endif
+ }
+}
+
+static int
+ae_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
+{
+ struct ae_softc *sc = ifp->if_softc;
+ struct ifreq *ifr;
+ struct mii_data *mii;
+ int error = 0, mask;
+
+ ASSERT_SERIALIZED(ifp->if_serializer);
+
+ ifr = (struct ifreq *)data;
+ switch (cmd) {
+ case SIOCSIFFLAGS:
+ if (ifp->if_flags & IFF_UP) {
+ if (ifp->if_flags & IFF_RUNNING) {
+ if (((ifp->if_flags ^ sc->ae_if_flags)
+ & (IFF_PROMISC | IFF_ALLMULTI)) != 0)
+ ae_rxfilter(sc);
+ } else {
+ ae_init(sc);
+ }
+ } else {
+ if (ifp->if_flags & IFF_RUNNING)
+ ae_stop(sc);
+ }
+ sc->ae_if_flags = ifp->if_flags;
+ break;
+
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ if (ifp->if_flags & IFF_RUNNING)
+ ae_rxfilter(sc);
+ break;
+
+ case SIOCSIFMEDIA:
+ case SIOCGIFMEDIA:
+ mii = device_get_softc(sc->ae_miibus);
+ error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, cmd);
+ break;
+
+ case SIOCSIFCAP:
+ mask = ifr->ifr_reqcap ^ ifp->if_capenable;
+ if (mask & IFCAP_VLAN_HWTAGGING) {
+ ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
+ ae_rxvlan(sc);
+ }
+ break;
+
+ default:
+ error = ether_ioctl(ifp, cmd, data);
+ break;
+ }
+ return (error);
+}
+
+static int
+ae_attach(device_t dev)
+{
+ struct ae_softc *sc = device_get_softc(dev);
+ struct ifnet *ifp = &sc->arpcom.ac_if;
+ int error = 0;
+
+ sc->ae_dev = dev;
+ if_initname(ifp, device_get_name(dev), device_get_unit(dev));
+ callout_init(&sc->ae_tick_ch);
+
+ /* Enable bus mastering */
+ pci_enable_busmaster(dev);
+
+ /*
+ * Allocate memory mapped IO
+ */
+ sc->ae_mem_rid = PCIR_BAR(0);
+ sc->ae_mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
+ &sc->ae_mem_rid, RF_ACTIVE);
+ if (sc->ae_mem_res == NULL) {
+ device_printf(dev, "can't allocate IO memory\n");
+ return ENXIO;
+ }
+ sc->ae_mem_bt = rman_get_bustag(sc->ae_mem_res);
+ sc->ae_mem_bh = rman_get_bushandle(sc->ae_mem_res);
+
+ /*
+ * Allocate IRQ
+ */
+ sc->ae_irq_rid = 0;
+ sc->ae_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
+ &sc->ae_irq_rid,
+ RF_SHAREABLE | RF_ACTIVE);
+ if (sc->ae_irq_res == NULL) {
+ device_printf(dev, "can't allocate irq\n");
+ error = ENXIO;
+ goto fail;
+ }
+
+ /* Set PHY address. */
+ sc->ae_phyaddr = AE_PHYADDR_DEFAULT;
+
+ /* Create sysctl tree */
+ ae_sysctl_node(sc);
+
+ /* Reset PHY. */
+ ae_phy_reset(sc);
+
+ /*
+ * Reset the ethernet controller.
+ */
+ ae_reset(sc);
+ ae_pcie_init(sc);
+
+ /*
+ * Get PCI and chip id/revision.
+ */
+ sc->ae_rev = pci_get_revid(dev);
+ sc->ae_chip_rev =
+ (AE_READ_4(sc, AE_MASTER_REG) >> AE_MASTER_REVNUM_SHIFT) &
+ AE_MASTER_REVNUM_MASK;
+ if (bootverbose) {
+ device_printf(dev, "PCI device revision : 0x%04x\n", sc->ae_rev);
+ device_printf(dev, "Chip id/revision : 0x%04x\n",
+ sc->ae_chip_rev);
+ }
+
+ /*
+ * XXX
+ * Unintialized hardware returns an invalid chip id/revision
+ * as well as 0xFFFFFFFF for Tx/Rx fifo length. It seems that
+ * unplugged cable results in putting hardware into automatic
+ * power down mode which in turn returns invalld chip revision.
+ */
+ if (sc->ae_chip_rev == 0xFFFF) {
+ device_printf(dev,"invalid chip revision : 0x%04x -- "
+ "not initialized?\n", sc->ae_chip_rev);
+ error = ENXIO;
+ goto fail;
+ }
+#if 0
+ /* Get DMA parameters from PCIe device control register. */
+ pcie_ptr = pci_get_pciecap_ptr(dev);
+ if (pcie_ptr) {
+ uint16_t devctl;
+ sc->ae_flags |= AE_FLAG_PCIE;
+ devctl = pci_read_config(dev, pcie_ptr + PCIER_DEVCTRL, 2);
+ /* Max read request size. */
+ sc->ae_dma_rd_burst = ((devctl >> 12) & 0x07) <<
+ DMA_CFG_RD_BURST_SHIFT;
+ /* Max payload size. */
+ sc->ae_dma_wr_burst = ((devctl >> 5) & 0x07) <<
+ DMA_CFG_WR_BURST_SHIFT;
+ if (bootverbose) {
+ device_printf(dev, "Read request size : %d bytes.\n",
+ 128 << ((devctl >> 12) & 0x07));
+ device_printf(dev, "TLP payload size : %d bytes.\n",
+ 128 << ((devctl >> 5) & 0x07));
+ }
+ } else {
+ sc->ae_dma_rd_burst = DMA_CFG_RD_BURST_128;
+ sc->ae_dma_wr_burst = DMA_CFG_WR_BURST_128;
+ }
+#endif
+
+ /* Create DMA stuffs */
+ error = ae_dma_alloc(sc);
+ if (error)
+ goto fail;
+
+ /* Load station address. */
+ ae_get_eaddr(sc);
+
+ ifp->if_softc = sc;
+ ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+ ifp->if_ioctl = ae_ioctl;
+ ifp->if_start = ae_start;
+ ifp->if_init = ae_init;
+ ifp->if_watchdog = ae_watchdog;
+ ifq_set_maxlen(&ifp->if_snd, IFQ_MAXLEN - 1);
+ ifq_set_ready(&ifp->if_snd);
+ ifp->if_capabilities = IFCAP_VLAN_MTU |
+ IFCAP_VLAN_HWTAGGING;
+ ifp->if_hwassist = 0;
+ ifp->if_capenable = ifp->if_capabilities;
+
+ /* Set up MII bus. */
+ error = mii_phy_probe(dev, &sc->ae_miibus,
+ ae_mediachange, ae_mediastatus);
+ if (error) {
+ device_printf(dev, "no PHY found!\n");
+ goto fail;
+ }
+ ether_ifattach(ifp, sc->ae_eaddr, NULL);
+
+ /* Tell the upper layer(s) we support long frames. */
+ ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
+
+ error = bus_setup_intr(dev, sc->ae_irq_res, INTR_MPSAFE, ae_intr, sc,
+ &sc->ae_irq_handle, ifp->if_serializer);
+ if (error) {
+ device_printf(dev, "could not set up interrupt handler.\n");
+ ether_ifdetach(ifp);
+ goto fail;
+ }
+ ifp->if_cpuid = ithread_cpuid(rman_get_start(sc->ae_irq_res));
+ KKASSERT(ifp->if_cpuid >= 0 && ifp->if_cpuid < ncpus);
+ return 0;
+fail:
+ ae_detach(dev);
+ return (error);
+}
+
+static int
+ae_detach(device_t dev)
+{
+ struct ae_softc *sc = device_get_softc(dev);
+
+ if (device_is_attached(dev)) {
+ struct ifnet *ifp = &sc->arpcom.ac_if;
+
+ lwkt_serialize_enter(ifp->if_serializer);
+ sc->ae_flags |= AE_FLAG_DETACH;
+ ae_stop(sc);
+ bus_teardown_intr(dev, sc->ae_irq_res, sc->ae_irq_handle);
+ lwkt_serialize_exit(ifp->if_serializer);
+
+ ether_ifdetach(ifp);
+ }
+
+ if (sc->ae_miibus != NULL)
+ device_delete_child(dev, sc->ae_miibus);
+ bus_generic_detach(dev);
+
+ if (sc->ae_irq_res != NULL) {
+ bus_release_resource(dev, SYS_RES_IRQ, sc->ae_irq_rid,
+ sc->ae_irq_res);
+ }
+ if (sc->ae_mem_res != NULL) {
+ bus_release_resource(dev, SYS_RES_MEMORY, sc->ae_mem_rid,
+ sc->ae_mem_res);
+ }
+
+ if (sc->ae_sysctl_tree != NULL)
+ sysctl_ctx_free(&sc->ae_sysctl_ctx);
+
+ ae_dma_free(sc);
+
+ return (0);
+}
+
+static void
+ae_dma_free(struct ae_softc *sc)
+{
+ if (sc->dma_txd_tag != NULL) {
+ bus_dmamap_unload(sc->dma_txd_tag, sc->dma_txd_map);
+ bus_dmamem_free(sc->dma_txd_tag, sc->txd_base,
+ sc->dma_txd_map);
+ bus_dma_tag_destroy(sc->dma_txd_tag);
+ }
+ if (sc->dma_txs_tag != NULL) {
+ bus_dmamap_unload(sc->dma_txs_tag, sc->dma_txs_map);
+ bus_dmamem_free(sc->dma_txs_tag, sc->txs_base,
+ sc->dma_txs_map);
+ bus_dma_tag_destroy(sc->dma_txs_tag);
+ }
+ if (sc->dma_rxd_tag != NULL) {
+ bus_dmamap_unload(sc->dma_rxd_tag, sc->dma_rxd_map);
+ bus_dmamem_free(sc->dma_rxd_tag,
+ sc->rxd_base_dma, sc->dma_rxd_map);
+ bus_dma_tag_destroy(sc->dma_rxd_tag);
+ }
+ if (sc->dma_parent_tag != NULL)
+ bus_dma_tag_destroy(sc->dma_parent_tag);
+}
+
+static void
+ae_pcie_init(struct ae_softc *sc)
+{
+ AE_WRITE_4(sc, AE_PCIE_LTSSM_TESTMODE_REG,
+ AE_PCIE_LTSSM_TESTMODE_DEFAULT);
+ AE_WRITE_4(sc, AE_PCIE_DLL_TX_CTRL_REG,
+ AE_PCIE_DLL_TX_CTRL_DEFAULT);
+}
+
+static void
+ae_phy_reset(struct ae_softc *sc)
+{
+ AE_WRITE_4(sc, AE_PHY_ENABLE_REG, AE_PHY_ENABLE);
+ DELAY(1000); /* XXX: pause(9) ? */
+}
+
+static int
+ae_reset(struct ae_softc *sc)
+{
+ int i;
+
+ /*
+ * Issue a soft reset.
+ */
+ AE_WRITE_4(sc, AE_MASTER_REG, AE_MASTER_SOFT_RESET);
+ bus_space_barrier(sc->ae_mem_bt, sc->ae_mem_bh, AE_MASTER_REG, 4,
+ BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
+
+ /*
+ * Wait for reset to complete.
+ */
+ for (i = 0; i < AE_RESET_TIMEOUT; i++) {
+ if ((AE_READ_4(sc, AE_MASTER_REG) & AE_MASTER_SOFT_RESET) == 0)
+ break;
+ DELAY(10);
+ }
+ if (i == AE_RESET_TIMEOUT) {
+ device_printf(sc->ae_dev, "reset timeout.\n");
+ return (ENXIO);
+ }
+
+ /*
+ * Wait for everything to enter idle state.
+ */
+ for (i = 0; i < AE_IDLE_TIMEOUT; i++) {
+ if (AE_READ_4(sc, AE_IDLE_REG) == 0)
+ break;
+ DELAY(100);
+ }
+ if (i == AE_IDLE_TIMEOUT) {
+ device_printf(sc->ae_dev, "could not enter idle state.\n");
+ return (ENXIO);
+ }
+ return (0);
+}
+
+static int
+ae_check_eeprom_present(struct ae_softc *sc, int *vpdc)
+{
+ int error;
+ uint32_t val;
+
+ /*
+ * Not sure why, but Linux does this.
+ */
+ val = AE_READ_4(sc, AE_SPICTL_REG);
+ if ((val & AE_SPICTL_VPD_EN) != 0) {
+ val &= ~AE_SPICTL_VPD_EN;
+ AE_WRITE_4(sc, AE_SPICTL_REG, val);
+ }
+ error = pci_find_extcap(sc->ae_dev, PCIY_VPD, vpdc);
+ return (error);
+}
+
+static int
+ae_vpd_read_word(struct ae_softc *sc, int reg, uint32_t *word)
+{
+ uint32_t val;
+ int i;
+
+ AE_WRITE_4(sc, AE_VPD_DATA_REG, 0); /* Clear register value. */
+
+ /*
+ * VPD registers start at offset 0x100. Read them.
+ */
+ val = 0x100 + reg * 4;
+ AE_WRITE_4(sc, AE_VPD_CAP_REG, (val << AE_VPD_CAP_ADDR_SHIFT) &
+ AE_VPD_CAP_ADDR_MASK);
+ for (i = 0; i < AE_VPD_TIMEOUT; i++) {
+ DELAY(2000);
+ val = AE_READ_4(sc, AE_VPD_CAP_REG);
+ if ((val & AE_VPD_CAP_DONE) != 0)
+ break;
+ }
+ if (i == AE_VPD_TIMEOUT) {
+ device_printf(sc->ae_dev, "timeout reading VPD register %d.\n",
+ reg);
+ return (ETIMEDOUT);
+ }
+ *word = AE_READ_4(sc, AE_VPD_DATA_REG);
+ return (0);
+}
+
+static int
+ae_get_vpd_eaddr(struct ae_softc *sc, uint32_t *eaddr)
+{
+ uint32_t word, reg, val;
+ int error;
+ int found;
+ int vpdc;
+ int i;
+
+ /*
+ * Check for EEPROM.
+ */
+ error = ae_check_eeprom_present(sc, &vpdc);
+ if (error != 0)
+ return (error);
+
+ /*
+ * Read the VPD configuration space.
+ * Each register is prefixed with signature,
+ * so we can check if it is valid.
+ */
+ for (i = 0, found = 0; i < AE_VPD_NREGS; i++) {
+ error = ae_vpd_read_word(sc, i, &word);
+ if (error != 0)
+ break;
+
+ /*
+ * Check signature.
+ */
+ if ((word & AE_VPD_SIG_MASK) != AE_VPD_SIG)
+ break;
+ reg = word >> AE_VPD_REG_SHIFT;
+ i++; /* Move to the next word. */
+ if (reg != AE_EADDR0_REG && reg != AE_EADDR1_REG)
+ continue;
+
+ error = ae_vpd_read_word(sc, i, &val);
+ if (error != 0)
+ break;
+ if (reg == AE_EADDR0_REG)
+ eaddr[0] = val;
+ else
+ eaddr[1] = val;
+ found++;
+ }
+ if (found < 2)
+ return (ENOENT);
+
+ eaddr[1] &= 0xffff; /* Only last 2 bytes are used. */
+ if (AE_CHECK_EADDR_VALID(eaddr) != 0) {
+ if (bootverbose)
+ device_printf(sc->ae_dev,
+ "VPD ethernet address registers are invalid.\n");
+ return (EINVAL);
+ }
+ return (0);
+}
+
+static int
+ae_get_reg_eaddr(struct ae_softc *sc, uint32_t *eaddr)
+{
+ /*
+ * BIOS is supposed to set this.
+ */
+ eaddr[0] = AE_READ_4(sc, AE_EADDR0_REG);
+ eaddr[1] = AE_READ_4(sc, AE_EADDR1_REG);
+ eaddr[1] &= 0xffff; /* Only last 2 bytes are used. */
+ if (AE_CHECK_EADDR_VALID(eaddr) != 0) {
+ if (bootverbose)
+ device_printf(sc->ae_dev,
+ "Ethetnet address registers are invalid.\n");
+ return (EINVAL);
+ }
+ return (0);
+}
+
+static void
+ae_get_eaddr(struct ae_softc *sc)
+{
+ uint32_t eaddr[2] = {0, 0};
+ int error;
+
+ /*
+ *Check for EEPROM.
+ */
+ error = ae_get_vpd_eaddr(sc, eaddr);
+ if (error)
+ error = ae_get_reg_eaddr(sc, eaddr);
+ if (error) {
+ if (bootverbose)
+ device_printf(sc->ae_dev,
+ "Generating random ethernet address.\n");
+ eaddr[0] = karc4random();
+ /*
+ * Set OUI to ASUSTek COMPUTER INC.
+ */
+ sc->ae_eaddr[0] = 0x02; /* U/L bit set. */
+ sc->ae_eaddr[1] = 0x1f;
+ sc->ae_eaddr[2] = 0xc6;
+ sc->ae_eaddr[3] = (eaddr[0] >> 16) & 0xff;
+ sc->ae_eaddr[4] = (eaddr[0] >> 8) & 0xff;
+ sc->ae_eaddr[5] = (eaddr[0] >> 0) & 0xff;
+ } else {
+ sc->ae_eaddr[0] = (eaddr[1] >> 8) & 0xff;
+ sc->ae_eaddr[1] = (eaddr[1] >> 0) & 0xff;
+ sc->ae_eaddr[2] = (eaddr[0] >> 24) & 0xff;
+ sc->ae_eaddr[3] = (eaddr[0] >> 16) & 0xff;
+ sc->ae_eaddr[4] = (eaddr[0] >> 8) & 0xff;
+ sc->ae_eaddr[5] = (eaddr[0] >> 0) & 0xff;
+ }
+}
+
+static int
+ae_mediachange(struct ifnet *ifp)
+{
+ struct ae_softc *sc = ifp->if_softc;
+ struct mii_data *mii = device_get_softc(sc->ae_miibus);
+ int error;
+
+ ASSERT_SERIALIZED(ifp->if_serializer);
+ if (mii->mii_instance != 0) {
+ struct mii_softc *miisc;
+ LIST_FOREACH(miisc, &mii->mii_phys, mii_list)
+ mii_phy_reset(miisc);
+ }
+ error = mii_mediachg(mii);
+ return (error);
+}
+
+static void
+ae_mediastatus(struct ifnet *ifp, struct ifmediareq *ifmr)
+{
+ struct ae_softc *sc = ifp->if_softc;
+ struct mii_data *mii = device_get_softc(sc->ae_miibus);
+
+ ASSERT_SERIALIZED(ifp->if_serializer);
+ mii_pollstat(mii);
+ ifmr->ifm_status = mii->mii_media_status;
+ ifmr->ifm_active = mii->mii_media_active;
+}
+
+static void
+ae_update_stats_tx(uint16_t flags, struct ae_stats *stats)
+{
+ if ((flags & AE_TXS_BCAST) != 0)
+ stats->tx_bcast++;
+ if ((flags & AE_TXS_MCAST) != 0)
+ stats->tx_mcast++;
+ if ((flags & AE_TXS_PAUSE) != 0)
+ stats->tx_pause++;
+ if ((flags & AE_TXS_CTRL) != 0)
+ stats->tx_ctrl++;
+ if ((flags & AE_TXS_DEFER) != 0)
+ stats->tx_defer++;
+ if ((flags & AE_TXS_EXCDEFER) != 0)
+ stats->tx_excdefer++;
+ if ((flags & AE_TXS_SINGLECOL) != 0)
+ stats->tx_singlecol++;
+ if ((flags & AE_TXS_MULTICOL) != 0)
+ stats->tx_multicol++;
+ if ((flags & AE_TXS_LATECOL) != 0)
+ stats->tx_latecol++;
+ if ((flags & AE_TXS_ABORTCOL) != 0)
+ stats->tx_abortcol++;
+ if ((flags & AE_TXS_UNDERRUN) != 0)
+ stats->tx_underrun++;
+}
+
+static void
+ae_update_stats_rx(uint16_t flags, struct ae_stats *stats)
+{
+ if ((flags & AE_RXD_BCAST) != 0)
+ stats->rx_bcast++;
+ if ((flags & AE_RXD_MCAST) != 0)
+ stats->rx_mcast++;
+ if ((flags & AE_RXD_PAUSE) != 0)
+ stats->rx_pause++;
+ if ((flags & AE_RXD_CTRL) != 0)
+ stats->rx_ctrl++;
+ if ((flags & AE_RXD_CRCERR) != 0)
+ stats->rx_crcerr++;
+ if ((flags & AE_RXD_CODEERR) != 0)
+ stats->rx_codeerr++;
+ if ((flags & AE_RXD_RUNT) != 0)
+ stats->rx_runt++;
+ if ((flags & AE_RXD_FRAG) != 0)
+ stats->rx_frag++;
+ if ((flags & AE_RXD_TRUNC) != 0)
+ stats->rx_trunc++;
+ if ((flags & AE_RXD_ALIGN) != 0)
+ stats->rx_align++;
+}
+
+static int
+ae_resume(device_t dev)
+{
+ struct ae_softc *sc = device_get_softc(dev);
+ struct ifnet *ifp = &sc->arpcom.ac_if;
+
+ lwkt_serialize_enter(ifp->if_serializer);
+#if 0
+ AE_READ_4(sc, AE_WOL_REG); /* Clear WOL status. */
+#endif
+ if ((ifp->if_flags & IFF_UP) != 0)
+ ae_init(sc);
+ lwkt_serialize_exit(ifp->if_serializer);
+ return (0);
+}
+
+static int
+ae_suspend(device_t dev)
+{
+ struct ae_softc *sc = device_get_softc(dev);
+ struct ifnet *ifp = &sc->arpcom.ac_if;
+
+ lwkt_serialize_enter(ifp->if_serializer);
+ ae_stop(sc);
+#if 0
+ /* we don't use ae_pm_init because we don't want WOL */
+ ae_pm_init(sc);
+#endif
+ lwkt_serialize_exit(ifp->if_serializer);
+ return (0);
+}
+
+static int
+ae_shutdown(device_t dev)
+{
+ struct ae_softc *sc = device_get_softc(dev);
+ struct ifnet *ifp = &sc->arpcom.ac_if;
+
+ ae_suspend(dev);
+
+ lwkt_serialize_enter(ifp->if_serializer);
+ ae_powersave_enable(sc);
+ lwkt_serialize_exit(ifp->if_serializer);
+
+ return (0);
+}
+
+static void
+ae_powersave_disable(struct ae_softc *sc)
+{
+ uint32_t val;
+
+ AE_PHY_WRITE(sc, AE_PHY_DBG_ADDR, 0);
+ val = AE_PHY_READ(sc, AE_PHY_DBG_DATA);
+ if (val & AE_PHY_DBG_POWERSAVE) {
+ val &= ~AE_PHY_DBG_POWERSAVE;
+ AE_PHY_WRITE(sc, AE_PHY_DBG_DATA, val);
+ DELAY(1000);
+ }
+}
+
+static void
+ae_powersave_enable(struct ae_softc *sc)
+{
+ uint32_t val;
+
+ /*
+ * XXX magic numbers.
+ */
+ AE_PHY_WRITE(sc, AE_PHY_DBG_ADDR, 0);
+ val = AE_PHY_READ(sc, AE_PHY_DBG_DATA);
+ AE_PHY_WRITE(sc, AE_PHY_DBG_ADDR, val | 0x1000);
+ AE_PHY_WRITE(sc, AE_PHY_DBG_ADDR, 2);
+ AE_PHY_WRITE(sc, AE_PHY_DBG_DATA, 0x3000);
+ AE_PHY_WRITE(sc, AE_PHY_DBG_ADDR, 3);
+ AE_PHY_WRITE(sc, AE_PHY_DBG_DATA, 0);
+}
--- /dev/null
+/*-
+ * Copyright (c) 2008 Stanislav Sedov <stas@FreeBSD.org>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/dev/ae/if_aereg.h,v 1.1.2.1.4.1 2009/04/15 03:14:26 kensmith Exp $
+ */
+
+/*
+ * Master configuration register
+ */
+#define AE_MASTER_REG 0x1400
+
+#define AE_MASTER_SOFT_RESET 0x1 /* Reset adapter. */
+#define AE_MASTER_MTIMER_EN 0x2 /* Unknown. */
+#define AE_MASTER_IMT_EN 0x4 /* Interrupt moderation timer enable. */
+#define AE_MASTER_MANUAL_INT 0x8 /* Software manual interrupt. */
+#define AE_MASTER_REVNUM_SHIFT 16 /* Chip revision number. */
+#define AE_MASTER_REVNUM_MASK 0xff
+#define AE_MASTER_DEVID_SHIFT 24 /* PCI device id. */
+#define AE_MASTER_DEVID_MASK 0xff
+
+/*
+ * Interrupt status register
+ */
+#define AE_ISR_REG 0x1600
+#define AE_ISR_TIMER 0x00000001 /* Counter expired. */
+#define AE_ISR_MANUAL 0x00000002 /* Manual interrupt occuried. */
+#define AE_ISR_RXF_OVERFLOW 0x00000004 /* RxF overflow occuried. */
+#define AE_ISR_TXF_UNDERRUN 0x00000008 /* TxF underrun occuried. */
+#define AE_ISR_TXS_OVERFLOW 0x00000010 /* TxS overflow occuried. */
+#define AE_ISR_RXS_OVERFLOW 0x00000020 /* Internal RxS ring overflow. */
+#define AE_ISR_LINK_CHG 0x00000040 /* Link state changed. */
+#define AE_ISR_TXD_UNDERRUN 0x00000080 /* TxD underrun occuried. */
+#define AE_ISR_RXD_OVERFLOW 0x00000100 /* RxD overflow occuried. */
+#define AE_ISR_DMAR_TIMEOUT 0x00000200 /* DMA read timeout. */
+#define AE_ISR_DMAW_TIMEOUT 0x00000400 /* DMA write timeout. */
+#define AE_ISR_PHY 0x00000800 /* PHY interrupt. */
+#define AE_ISR_TXS_UPDATED 0x00010000 /* Tx status updated. */
+#define AE_ISR_RXD_UPDATED 0x00020000 /* Rx status updated. */
+#define AE_ISR_TX_EARLY 0x00040000 /* TxMAC started transmit. */
+#define AE_ISR_FIFO_UNDERRUN 0x01000000 /* FIFO underrun. */
+#define AE_ISR_FRAME_ERROR 0x02000000 /* Frame receive error. */
+#define AE_ISR_FRAME_SUCCESS 0x04000000 /* Frame receive success. */
+#define AE_ISR_CRC_ERROR 0x08000000 /* CRC error occuried. */
+#define AE_ISR_PHY_LINKDOWN 0x10000000 /* PHY link down. */
+#define AE_ISR_DISABLE 0x80000000 /* Disable interrupts. */
+
+#define AE_ISR_TX_EVENT (AE_ISR_TXF_UNDERRUN | AE_ISR_TXS_OVERFLOW | \
+ AE_ISR_TXD_UNDERRUN | AE_ISR_TXS_UPDATED | \
+ AE_ISR_TX_EARLY)
+#define AE_ISR_RX_EVENT (AE_ISR_RXF_OVERFLOW | AE_ISR_RXS_OVERFLOW | \
+ AE_ISR_RXD_OVERFLOW | AE_ISR_RXD_UPDATED)
+
+/* Interrupt mask register. */
+#define AE_IMR_REG 0x1604
+
+#define AE_IMR_DEFAULT (AE_ISR_DMAR_TIMEOUT | AE_ISR_DMAW_TIMEOUT | \
+ AE_ISR_PHY_LINKDOWN | \
+ AE_ISR_TXS_UPDATED | AE_ISR_RXD_UPDATED )
+
+/*
+ * Ethernet address register.
+ */
+#define AE_EADDR0_REG 0x1488 /* 5 - 2 bytes */
+#define AE_EADDR1_REG 0x148c /* 1 - 0 bytes */
+
+/*
+ * Desriptor rings registers.
+ * L2 supports 64-bit addressing but all rings base addresses
+ * should have the same high 32 bits of address.
+ */
+#define AE_DESC_ADDR_HI_REG 0x1540 /* High 32 bits of ring base address. */
+#define AE_RXD_ADDR_LO_REG 0x1554 /* Low 32 bits of RxD ring address. */
+#define AE_TXD_ADDR_LO_REG 0x1544 /* Low 32 bits of TxD ring address. */
+#define AE_TXS_ADDR_LO_REG 0x154c /* Low 32 bits of TxS ring address. */
+#define AE_RXD_COUNT_REG 0x1558 /* Number of RxD descriptors in ring.
+ Should be 120-byte aligned (i.e.
+ the 'data' field of RxD should
+ have 128-byte alignment). */
+#define AE_TXD_BUFSIZE_REG 0x1548 /* Size of TxD ring in 4-byte units.
+ Should be 4-byte aligned. */
+#define AE_TXS_COUNT_REG 0x1550 /* Number of TxS descriptors in ring.
+ 4 byte alignment. */
+#define AE_RXD_COUNT_MIN 16
+#define AE_RXD_COUNT_MAX 512
+#define AE_RXD_COUNT_DEFAULT 64
+
+#define AE_TXD_BUFSIZE_MIN 4096
+#define AE_TXD_BUFSIZE_MAX 65536
+#define AE_TXD_BUFSIZE_DEFAULT 8192
+
+#define AE_TXS_COUNT_MIN 8 /* Not sure. */
+#define AE_TXS_COUNT_MAX 160
+#define AE_TXS_COUNT_DEFAULT 64 /* AE_TXD_BUFSIZE_DEFAULT / 128 */
+
+/*
+ * Inter-frame gap configuration register.
+ */
+#define AE_IFG_REG 0x1484
+
+#define AE_IFG_TXIPG_DEFAULT 0x60 /* 96-bit IFG time. */
+#define AE_IFG_TXIPG_SHIFT 0
+#define AE_IFG_TXIPG_MASK 0x7f
+
+#define AE_IFG_RXIPG_DEFAULT 0x50 /* 80-bit IFG time. */
+#define AE_IFG_RXIPG_SHIFT 8
+#define AE_IFG_RXIPG_MASK 0xff00
+
+#define AE_IFG_IPGR1_DEFAULT 0x40 /* Carrier-sense window. */
+#define AE_IFG_IPGR1_SHIFT 16
+#define AE_IFG_IPGR1_MASK 0x7f0000
+
+#define AE_IFG_IPGR2_DEFAULT 0x60 /* IFG window. */
+#define AE_IFG_IPGR2_SHIFT 24
+#define AE_IFG_IPGR2_MASK 0x7f000000
+
+/*
+ * Half-duplex mode configuration register.
+ */
+#define AE_HDPX_REG 0x1498
+
+/* Collision window. */
+#define AE_HDPX_LCOL_SHIFT 0
+#define AE_HDPX_LCOL_MASK 0x000003ff
+#define AE_HDPX_LCOL_DEFAULT 0x37
+
+/* Max retransmission time, after that the packet will be discarded. */
+#define AE_HDPX_RETRY_SHIFT 12
+#define AE_HDPX_RETRY_MASK 0x0000f000
+#define AE_HDPX_RETRY_DEFAULT 0x0f
+
+/* Alternative binary exponential back-off time. */
+#define AE_HDPX_ABEBT_SHIFT 20
+#define AE_HDPX_ABEBT_MASK 0x00f00000
+#define AE_HDPX_ABEBT_DEFAULT 0x0a
+
+/* IFG to start JAM for collision based flow control (8-bit time units).*/
+#define AE_HDPX_JAMIPG_SHIFT 24
+#define AE_HDPX_JAMIPG_MASK 0x0f000000
+#define AE_HDPX_JAMIPG_DEFAULT 0x07
+
+/* Allow the transmission of a packet which has been excessively deferred. */
+#define AE_HDPX_EXC_EN 0x00010000
+/* No back-off on collision, immediately start the retransmission. */
+#define AE_HDPX_NO_BACK_C 0x00020000
+/* No back-off on backpressure, immediately start the transmission. */
+#define AE_HDPX_NO_BACK_P 0x00040000
+/* Alternative binary exponential back-off enable. */
+#define AE_HDPX_ABEBE 0x00080000
+
+/*
+ * Interrupt moderation timer configuration register.
+ */
+#define AE_IMT_REG 0x1408 /* Timer value in 2 us units. */
+#define AE_IMT_MAX 65000
+#define AE_IMT_MIN 50
+#define AE_IMT_DEFAULT 100 /* 200 microseconds. */
+
+/*
+ * Interrupt clearing timer configuration register.
+ */
+#define AE_ICT_REG 0x140e /* Maximum time allowed to clear
+ interrupt. In 2 us units. */
+#define AE_ICT_DEFAULT 50000 /* 100ms */
+
+/*
+ * MTU configuration register.
+ */
+#define AE_MTU_REG 0x149c /* MTU size in bytes. */
+
+/*
+ * Cut-through configuration register.
+ */
+#define AE_CUT_THRESH_REG 0x1590 /* Cut-through threshold in unknown units. */
+#define AE_CUT_THRESH_DEFAULT 0x177
+
+/*
+ * Flow-control configuration registers.
+ */
+#define AE_FLOW_THRESH_HI_REG 0x15a8 /* High watermark of RxD
+ overflow threshold. */
+#define AE_FLOW_THRESH_LO_REG 0x15aa /* Lower watermark of RxD
+ overflow threshold */
+
+/*
+ * Mailbox configuration registers.
+*/
+#define AE_MB_TXD_IDX_REG 0x15f0 /* TxD read index. */
+#define AE_MB_RXD_IDX_REG 0x15f4 /* RxD write index. */
+
+/*
+ * DMA configuration registers.
+ */
+#define AE_DMAREAD_REG 0x1580 /* Read DMA configuration register. */
+#define AE_DMAREAD_EN 1
+#define AE_DMAWRITE_REG 0x15a0 /* Write DMA configuration register. */
+#define AE_DMAWRITE_EN 1
+
+/*
+ * MAC configuration register.
+ */
+#define AE_MAC_REG 0x1480
+
+#define AE_MAC_TX_EN 0x00000001 /* Enable transmit. */
+#define AE_MAC_RX_EN 0x00000002 /* Enable receive. */
+#define AE_MAC_TX_FLOW_EN 0x00000004 /* Enable Tx flow control. */
+#define AE_MAC_RX_FLOW_EN 0x00000008 /* Enable Rx flow control. */
+#define AE_MAC_LOOPBACK 0x00000010 /* Loopback at MII. */
+#define AE_MAC_FULL_DUPLEX 0x00000020 /* Enable full-duplex. */
+#define AE_MAC_TX_CRC_EN 0x00000040 /* Enable CRC generation. */
+#define AE_MAC_TX_AUTOPAD 0x00000080 /* Pad short frames. */
+#define AE_MAC_PREAMBLE_MASK 0x00003c00 /* Preamble length. */
+#define AE_MAC_PREAMBLE_SHIFT 10
+#define AE_MAC_PREAMBLE_DEFAULT 0x07 /* By standard. */
+#define AE_MAC_RMVLAN_EN 0x00004000 /* Remove VLAN tags in
+ incoming packets. */
+#define AE_MAC_PROMISC_EN 0x00008000 /* Enable promiscue mode. */
+#define AE_MAC_TX_MAXBACKOFF 0x00100000 /* Unknown. */
+#define AE_MAC_MCAST_EN 0x02000000 /* Pass all multicast frames. */
+#define AE_MAC_BCAST_EN 0x04000000 /* Pass all broadcast frames. */
+#define AE_MAC_CLK_PHY 0x08000000 /* If 1 uses loopback clock
+ PHY, if 0 - system clock. */
+#define AE_HALFBUF_MASK 0xf0000000 /* Half-duplex retry buffer. */
+#define AE_HALFBUF_SHIFT 28
+#define AE_HALFBUF_DEFAULT 2 /* XXX: From Linux. */
+
+/*
+ * MDIO control register.
+ */
+#define AE_MDIO_REG 0x1414
+#define AE_MDIO_DATA_MASK 0xffff
+#define AE_MDIO_DATA_SHIFT 0
+#define AE_MDIO_REGADDR_MASK 0x1f0000
+#define AE_MDIO_REGADDR_SHIFT 16
+#define AE_MDIO_READ 0x00200000 /* Read operation. */
+#define AE_MDIO_SUP_PREAMBLE 0x00400000 /* Suppress preamble. */
+#define AE_MDIO_START 0x00800000 /* Initiate MDIO transfer. */
+#define AE_MDIO_CLK_SHIFT 24 /* Clock selection. */
+#define AE_MDIO_CLK_MASK 0x07000000 /* Clock selection. */
+#define AE_MDIO_CLK_25_4 0 /* Dividers? */
+#define AE_MDIO_CLK_25_6 2
+#define AE_MDIO_CLK_25_8 3
+#define AE_MDIO_CLK_25_10 4
+#define AE_MDIO_CLK_25_14 5
+#define AE_MDIO_CLK_25_20 6
+#define AE_MDIO_CLK_25_28 7
+#define AE_MDIO_BUSY 0x08000000 /* MDIO is busy. */
+
+/*
+ * Idle status register.
+ */
+#define AE_IDLE_REG 0x1410
+
+/*
+ * Idle status bits.
+ * If bit is set then the corresponding module is in non-idle state.
+ */
+#define AE_IDLE_RXMAC 1
+#define AE_IDLE_TXMAC 2
+#define AE_IDLE_DMAREAD 8
+#define AE_IDLE_DMAWRITE 4
+
+/*
+ * Multicast hash tables registers.
+ */
+#define AE_REG_MHT0 0x1490
+#define AE_REG_MHT1 0x1494
+
+/*
+ * Wake on lan (WOL).
+ */
+#define AE_WOL_REG 0x14a0
+#define AE_WOL_MAGIC 0x00000004
+#define AE_WOL_MAGIC_PME 0x00000008
+#define AE_WOL_LNKCHG 0x00000010
+#define AE_WOL_LNKCHG_PME 0x00000020
+
+/*
+ * PCIE configuration registers. Descriptions unknown.
+ */
+#define AE_PCIE_LTSSM_TESTMODE_REG 0x12fc
+#define AE_PCIE_LTSSM_TESTMODE_DEFAULT 0x6500
+#define AE_PCIE_DLL_TX_CTRL_REG 0x1104
+#define AE_PCIE_DLL_TX_CTRL_SEL_NOR_CLK 0x0400
+#define AE_PCIE_DLL_TX_CTRL_DEFAULT 0x0568
+#define AE_PCIE_PHYMISC_REG 0x1000
+#define AE_PCIE_PHYMISC_FORCE_RCV_DET 0x4
+
+/*
+ * PHY enable register.
+ */
+#define AE_PHY_ENABLE_REG 0x140c
+#define AE_PHY_ENABLE 1
+
+/*
+ * VPD registers.
+ */
+#define AE_VPD_CAP_REG 0x6c /* Command register. */
+#define AE_VPD_CAP_ID_MASK 0xff
+#define AE_VPD_CAP_ID_SHIFT 0
+#define AE_VPD_CAP_NEXT_MASK 0xff00
+#define AE_VPD_CAP_NEXT_SHIFT 8
+#define AE_VPD_CAP_ADDR_MASK 0x7fff0000
+#define AE_VPD_CAP_ADDR_SHIFT 16
+#define AE_VPD_CAP_DONE 0x80000000
+#define AE_VPD_DATA_REG 0x70 /* Data register. */
+
+#define AE_VPD_NREGS 64 /* Maximum number of VPD regs. */
+#define AE_VPD_SIG_MASK 0xff
+#define AE_VPD_SIG 0x5a /* VPD block signature. */
+#define AE_VPD_REG_SHIFT 16 /* Register id offset. */
+
+/*
+ * SPI registers.
+ */
+#define AE_SPICTL_REG 0x200
+#define AE_SPICTL_VPD_EN 0x2000 /* Enable VPD. */
+
+/*
+ * PHY-specific registers constants.
+ */
+#define AE_PHY_DBG_ADDR 0x1d
+#define AE_PHY_DBG_DATA 0x1e
+#define AE_PHY_DBG_POWERSAVE 0x1000
+
+/*
+ * TxD flags.
+ */
+#define AE_TXD_INSERT_VTAG 0x8000 /* Insert VLAN tag on transfer. */
+
+/*
+ * TxS flags.
+ */
+#define AE_TXS_SUCCESS 0x0001 /* Packed transmitted successfully. */
+#define AE_TXS_BCAST 0x0002 /* Transmitted broadcast frame. */
+#define AE_TXS_MCAST 0x0004 /* Transmitted multicast frame. */
+#define AE_TXS_PAUSE 0x0008 /* Transmitted pause frame. */
+#define AE_TXS_CTRL 0x0010 /* Transmitted control frame. */
+#define AE_TXS_DEFER 0x0020 /* Frame transmitted with defer. */
+#define AE_TXS_EXCDEFER 0x0040 /* Excessive collision. */
+#define AE_TXS_SINGLECOL 0x0080 /* Single collision occuried. */
+#define AE_TXS_MULTICOL 0x0100 /* Multiple collisions occuried. */
+#define AE_TXS_LATECOL 0x0200 /* Late collision occuried. */
+#define AE_TXS_ABORTCOL 0x0400 /* Frame abort due to collisions. */
+#define AE_TXS_UNDERRUN 0x0800 /* Tx SRAM underrun occuried. */
+#define AE_TXS_UPDATE 0x8000
+
+/*
+ * RxD flags.
+ */
+#define AE_RXD_SUCCESS 0x0001
+#define AE_RXD_BCAST 0x0002 /* Broadcast frame received. */
+#define AE_RXD_MCAST 0x0004 /* Multicast frame received. */
+#define AE_RXD_PAUSE 0x0008 /* Pause frame received. */
+#define AE_RXD_CTRL 0x0010 /* Control frame received. */
+#define AE_RXD_CRCERR 0x0020 /* Invalid frame CRC. */
+#define AE_RXD_CODEERR 0x0040 /* Invalid frame opcode. */
+#define AE_RXD_RUNT 0x0080 /* Runt frame received. */
+#define AE_RXD_FRAG 0x0100 /* Collision fragment received. */
+#define AE_RXD_TRUNC 0x0200 /* The frame was truncated due
+ to Rx SRAM underrun. */
+#define AE_RXD_ALIGN 0x0400 /* Frame alignment error. */
+#define AE_RXD_HAS_VLAN 0x0800 /* VLAN tag present. */
+#define AE_RXD_UPDATE 0x8000
--- /dev/null
+/*-
+ * Copyright (c) 2008 Stanislav Sedov <stas@FreeBSD.org>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/dev/ae/if_aevar.h,v 1.1.2.1.4.1 2009/04/15 03:14:26 kensmith Exp $
+ */
+
+#ifndef IF_AEVAR_H
+#define IF_AEVAR_H
+
+/*
+ * Supported chips identifiers.
+*/
+#define VENDORID_ATTANSIC 0x1969
+#define DEVICEID_ATTANSIC_L2 0x2048
+
+/* How much to wait for reset to complete (10 microsecond units). */
+#define AE_RESET_TIMEOUT 100
+
+/* How much to wait for device to enter idle state (100 microsecond units). */
+#define AE_IDLE_TIMEOUT 100
+
+/* How much to wait for MDIO to do the work (2 microsecond units). */
+#define AE_MDIO_TIMEOUT 10
+
+/* How much to wait for VPD reading operation to complete (2 ms units). */
+#define AE_VPD_TIMEOUT 10
+
+/* How much to wait for send operation to complete (HZ units). */
+#define AE_TX_TIMEOUT 5
+
+/* Default PHY address. */
+#define AE_PHYADDR_DEFAULT 0
+
+/* Tx packet descriptor header format. */
+struct ae_txd {
+ uint16_t len;
+ uint16_t vlan;
+} __packed;
+
+/* Tx status descriptor format. */
+struct ae_txs {
+ uint16_t len;
+ uint16_t flags;
+} __packed;
+
+/* Rx packet descriptor format. */
+struct ae_rxd {
+ uint16_t len;
+ uint16_t flags;
+ uint16_t vlan;
+ uint16_t __pad;
+ uint8_t data[1528];
+} __packed;
+
+/* Statistics. */
+struct ae_stats {
+ uint32_t rx_bcast;
+ uint32_t rx_mcast;
+ uint32_t rx_pause;
+ uint32_t rx_ctrl;
+ uint32_t rx_crcerr;
+ uint32_t rx_codeerr;
+ uint32_t rx_runt;
+ uint32_t rx_frag;
+ uint32_t rx_trunc;
+ uint32_t rx_align;
+ uint32_t tx_bcast;
+ uint32_t tx_mcast;
+ uint32_t tx_pause;
+ uint32_t tx_ctrl;
+ uint32_t tx_defer;
+ uint32_t tx_excdefer;
+ uint32_t tx_singlecol;
+ uint32_t tx_multicol;
+ uint32_t tx_latecol;
+ uint32_t tx_abortcol;
+ uint32_t tx_underrun;
+};
+
+/* Software state structure. */
+struct ae_softc {
+ struct arpcom arpcom;
+ device_t ae_dev;
+
+ int ae_mem_rid;
+ struct resource *ae_mem_res;
+ bus_space_tag_t ae_mem_bt;
+ bus_space_handle_t ae_mem_bh;
+
+ int ae_irq_rid;
+ struct resource *ae_irq_res;
+ void *ae_irq_handle;
+
+ int ae_phyaddr;
+ device_t ae_miibus;
+
+ int ae_rev;
+ int ae_chip_rev;
+ uint8_t ae_eaddr[ETHER_ADDR_LEN];
+ uint8_t ae_flags;
+ int ae_if_flags;
+
+ struct callout ae_tick_ch;
+
+ /* DMA tags. */
+ bus_dma_tag_t dma_parent_tag;
+ bus_dma_tag_t dma_rxd_tag;
+ bus_dma_tag_t dma_txd_tag;
+ bus_dma_tag_t dma_txs_tag;
+ bus_dmamap_t dma_rxd_map;
+ bus_dmamap_t dma_txd_map;
+ bus_dmamap_t dma_txs_map;
+
+ bus_addr_t dma_rxd_busaddr;
+ bus_addr_t dma_txd_busaddr;
+ bus_addr_t dma_txs_busaddr;
+
+ uint8_t *rxd_base_dma; /* Start of allocated area. */
+ struct ae_rxd *rxd_base; /* Start of RxD ring. */
+ uint8_t *txd_base; /* Start of TxD ring. */
+ struct ae_txs *txs_base; /* Start of TxS ring. */
+
+ /* Ring pointers. */
+ unsigned int rxd_cur;
+ unsigned int txd_cur;
+ unsigned int txs_cur;
+ unsigned int txs_ack;
+ unsigned int txd_ack;
+
+ int tx_inproc; /* Active Tx frames in ring. */
+ int wd_timer; /* XXX remove */
+
+ struct ae_stats stats;
+ struct sysctl_ctx_list ae_sysctl_ctx;
+ struct sysctl_oid *ae_sysctl_tree;
+};
+
+#define BUS_ADDR_LO(x) ((uint64_t) (x) & 0xFFFFFFFF)
+#define BUS_ADDR_HI(x) ((uint64_t) (x) >> 32)
+
+#define AE_FLAG_LINK 0x01 /* Has link. */
+#define AE_FLAG_DETACH 0x02 /* Is detaching. */
+#define AE_FLAG_TXAVAIL 0x04 /* Tx'es available. */
+#define AE_FLAG_MSI 0x08 /* Using MSI. */
+#define AE_FLAG_PMG 0x10 /* Supports PCI power management. */
+
+#endif /* IF_AEVAR_H */
-#$FreeBSD: src/sys/modules/em/Makefile,v 1.1.2.3 2002/06/18 21:00:56 pdeuskar Exp $
-#$DragonFly: src/sys/dev/netif/em/Makefile,v 1.10 2008/09/17 08:51:29 sephe Exp $
-
KMOD= if_mxge
-SRCS= if_mxge.c if_mxge_var.h mcp_gen_header.h mxge_mcp.h
-SRCS+= device_if.h bus_if.h pci_if.h
-SRCS+= opt_polling.h
+SRCS= if_mxge.c
+SRCS+= device_if.h bus_if.h pci_if.h opt_inet.h
.ifndef BUILDING_WITH_KERNEL
-opt_polling.h:
- echo '#define DEVICE_POLLING 1' > ${.OBJDIR}/${.TARGET}
+opt_inet.h:
+ echo "#define INET 1" > ${.TARGET}
.endif
.include <bsd.kmod.mk>
{ AS(umtx_sleep_args), (sy_call_t *)sys_umtx_sleep }, /* 469 = umtx_sleep */
{ AS(umtx_wakeup_args), (sy_call_t *)sys_umtx_wakeup }, /* 470 = umtx_wakeup */
{ AS(jail_attach_args), (sy_call_t *)sys_jail_attach }, /* 471 = jail_attach */
- { AS(set_tls_area_args), (sy_call_t *)sys_set_tls_area }, /* 472 = set_tls_area */
- { AS(get_tls_area_args), (sy_call_t *)sys_get_tls_area }, /* 473 = get_tls_area */
+ { SYF_MPSAFE | AS(set_tls_area_args), (sy_call_t *)sys_set_tls_area }, /* 472 = set_tls_area */
+ { SYF_MPSAFE | AS(get_tls_area_args), (sy_call_t *)sys_get_tls_area }, /* 473 = get_tls_area */
{ SYF_MPSAFE | AS(closefrom_args), (sy_call_t *)sys_closefrom }, /* 474 = closefrom */
{ AS(stat_args), (sy_call_t *)sys_stat }, /* 475 = stat */
- { AS(fstat_args), (sy_call_t *)sys_fstat }, /* 476 = fstat */
+ { SYF_MPSAFE | AS(fstat_args), (sy_call_t *)sys_fstat }, /* 476 = fstat */
{ AS(lstat_args), (sy_call_t *)sys_lstat }, /* 477 = lstat */
{ AS(fhstat_args), (sy_call_t *)sys_fhstat }, /* 478 = fhstat */
{ AS(getdirentries_args), (sy_call_t *)sys_getdirentries }, /* 479 = getdirentries */
#include <sys/uio.h>
#include <machine/limits.h>
+#include <sys/spinlock2.h>
+
struct ccms_lock_scan_info {
ccms_dataspace_t ds;
ccms_lock_t lock;
cst->end_offset = LLONG_MAX;
cst->state = CCMS_STATE_INVALID;
RB_INSERT(ccms_rb_tree, &ds->tree, cst);
+ spin_init(&ds->spin);
+}
+
+/*
+ * Helper to destroy deleted cst's.
+ */
+static __inline
+void
+ccms_delayed_free(ccms_cst_t cstn)
+{
+ ccms_cst_t cst;
+
+ while((cst = cstn) != NULL) {
+ cstn = cst->delayed_next;
+ objcache_put(ccms_oc, cst);
+ }
}
/*
* Destroy a CCMS dataspace.
+ *
+ * MPSAFE
*/
void
ccms_dataspace_destroy(ccms_dataspace_t ds)
{
+ ccms_cst_t cst;
+
+ spin_lock_wr(&ds->spin);
RB_SCAN(ccms_rb_tree, &ds->tree, NULL,
ccms_dataspace_destroy_match, ds);
+ cst = ds->delayed_free;
+ ds->delayed_free = NULL;
+ spin_unlock_wr(&ds->spin);
+ ccms_delayed_free(cst);
}
+/*
+ * Helper routine to delete matches during a destroy.
+ *
+ * NOTE: called with spinlock held.
+ */
static
int
ccms_dataspace_destroy_match(ccms_cst_t cst, void *arg)
ccms_dataspace_t ds = arg;
RB_REMOVE(ccms_rb_tree, &ds->tree, cst);
- objcache_put(ccms_oc, cst);
+ cst->delayed_next = ds->delayed_free;
+ ds->delayed_free = cst;
return(0);
}
/*
* Obtain a CCMS lock
+ *
+ * MPSAFE
*/
int
ccms_lock_get(ccms_dataspace_t ds, ccms_lock_t lock)
{
struct ccms_lock_scan_info info;
+ ccms_cst_t cst;
if (ccms_enable == 0) {
lock->ds = NULL;
info.cst1 = objcache_get(ccms_oc, M_WAITOK);
info.cst2 = objcache_get(ccms_oc, M_WAITOK);
+ spin_lock_wr(&ds->spin);
RB_SCAN(ccms_rb_tree, &ds->tree, ccms_lock_scan_cmp,
ccms_lock_get_match, &info);
RB_SCAN(ccms_rb_tree, &ds->tree, ccms_lock_undo_cmp,
ccms_lock_undo_match, &info);
info.coll_cst->blocked = 1;
- tsleep(info.coll_cst, 0,
+ msleep(info.coll_cst, &ds->spin, 0,
((lock->ltype == CCMS_LTYPE_SHARED) ? "rngsh" : "rngex"),
hz);
info.coll_cst = NULL;
RB_SCAN(ccms_rb_tree, &ds->tree, ccms_lock_scan_cmp,
ccms_lock_redo_match, &info);
}
+ cst = ds->delayed_free;
+ ds->delayed_free = NULL;
+ spin_unlock_wr(&ds->spin);
/*
* Cleanup
*/
+ ccms_delayed_free(cst);
if (info.cst1)
objcache_put(ccms_oc, info.cst1);
if (info.cst2)
/*
* Obtain a CCMS lock, initialize the lock structure from the uio.
+ *
+ * MPSAFE
*/
int
ccms_lock_get_uio(ccms_dataspace_t ds, ccms_lock_t lock, struct uio *uio)
return(ccms_lock_get(ds, lock));
}
+/*
+ * Helper routine.
+ *
+ * NOTE: called with spinlock held.
+ */
static
int
ccms_lock_get_match(ccms_cst_t cst, void *arg)
* Undo a partially resolved ccms_ltype rangelock. This is atomic with
* the scan/redo code so there should not be any blocked locks when
* transitioning to 0.
+ *
+ * NOTE: called with spinlock held.
*/
static
int
/*
* Redo the local lock request for a range which has already been
* partitioned.
+ *
+ * NOTE: called with spinlock held.
*/
static
int
/*
* Release a CCMS lock
+ *
+ * MPSAFE
*/
int
ccms_lock_put(ccms_dataspace_t ds, ccms_lock_t lock)
{
struct ccms_lock_scan_info info;
+ ccms_cst_t cst;
if (lock->ds == NULL)
return(0);
info.cst1 = NULL;
info.cst2 = NULL;
+ spin_lock_wr(&ds->spin);
RB_SCAN(ccms_rb_tree, &ds->tree, ccms_lock_scan_cmp,
ccms_lock_put_match, &info);
+ cst = ds->delayed_free;
+ ds->delayed_free = NULL;
+ spin_unlock_wr(&ds->spin);
+ ccms_delayed_free(cst);
if (info.cst1)
objcache_put(ccms_oc, info.cst1);
if (info.cst2)
return(0);
}
+/*
+ * NOTE: called with spinlock held.
+ */
static
int
ccms_lock_put_match(ccms_cst_t cst, void *arg)
ocst->blocked = 0;
wakeup(ocst);
}
- objcache_put(ccms_oc, ocst);
+ /*objcache_put(ccms_oc, ocst);*/
+ ocst->delayed_next = info->ds->delayed_free;
+ info->ds->delayed_free = ocst;
}
}
}
(long long)cst->beg_offset,
(long long)cst->end_offset);
}
- objcache_put(ccms_oc, ocst);
+ /*objcache_put(ccms_oc, ocst);*/
+ ocst->delayed_next = info->ds->delayed_free;
+ info->ds->delayed_free = ocst;
}
}
}
return(0);
}
-
/*
* RB tree compare function for insertions and deletions. This function
* compares two CSTs.
return (error);
}
+/*
+ * MPSAFE
+ */
int
kern_fstat(int fd, struct stat *ub)
{
/*
* Return status information about a file descriptor.
+ *
+ * MPSAFE
*/
int
sys_fstat(struct fstat_args *uap)
track = &dev->si_track_read;
else
track = &dev->si_track_write;
- atomic_add_int(&track->bk_active, 1);
+ bio_track_ref(track);
bio->bio_track = track;
(void)dev->si_ops->d_strategy(&ap);
}
* DEVICE HELPER FUNCTIONS *
************************************************************************/
+/*
+ * MPSAFE
+ */
int
dev_drefs(cdev_t dev)
{
return(dev->si_sysref.refcnt);
}
+/*
+ * MPSAFE
+ */
const char *
dev_dname(cdev_t dev)
{
return(dev->si_ops->head.name);
}
+/*
+ * MPSAFE
+ */
int
dev_dflags(cdev_t dev)
{
return(dev->si_ops->head.flags);
}
+/*
+ * MPSAFE
+ */
int
dev_dmaj(cdev_t dev)
{
/*
* Check if permisson for a specific privilege is granted within jail.
+ *
+ * MPSAFE
*/
int
prison_priv_check(struct ucred *cred, int priv)
* priv_check_cred() should be used instead of priv_check().
*
* Returns 0 or error.
+ *
+ * MPSAFE
*/
int
priv_check(struct thread *td, int priv)
* Check a credential for privilege.
*
* A non-null credential is expected unless NULL_CRED_OKAY is set.
+ *
+ * MPSAFE
*/
int
priv_check_cred(struct ucred *cred, int priv, int flags)
* on the related vnode.
*/
if (bp->b_vp == NULL ||
- bp->b_vp->v_track_write.bk_active == 0) {
+ bio_track_active(&bp->b_vp->v_track_write) == 0) {
return (0);
}
#if defined(SHOW_BUSYBUFS) || defined(DIAGNOSTIC)
return(0);
}
+/*
+ * MPSAFE
+ */
static
int
slfileop_stat (struct file *fp, struct stat *sb, struct ucred *cred)
* are held.
*/
rpipe->pipe_state |= PIPE_WANTR;
+ crit_enter();
tsleep_interlock(rpipe);
lwkt_reltoken(&wlock);
error = tsleep(rpipe, PCATCH, "piperd", 0);
+ crit_exit();
++pipe_rblocked_count;
if (error)
break;
}
/*
- * MPALMOSTSAFE - acquires mplock
+ * MPSAFE
*/
static int
pipe_stat(struct file *fp, struct stat *ub, struct ucred *cred)
}
/*
- * MPALMOSTSAFE - acquires mplock
+ * MPSAFE - acquires mplock
*/
int
soo_stat(struct file *fp, struct stat *ub, struct ucred *cred)
bzero((caddr_t)ub, sizeof (*ub));
ub->st_mode = S_IFSOCK;
- get_mplock();
so = (struct socket *)fp->f_data;
+
/*
* If SS_CANTRCVMORE is set, but there's still data left in the
* receive buffer, the socket is still readable.
ub->st_uid = so->so_cred->cr_uid;
ub->st_gid = so->so_cred->cr_gid;
error = so_pru_sense(so, ub);
- rel_mplock();
return (error);
}
469 STD BSD { int umtx_sleep(volatile const int *ptr, int value, int timeout); }
470 STD BSD { int umtx_wakeup(volatile const int *ptr, int count); }
471 STD BSD { int jail_attach(int jid); }
-472 STD BSD { int set_tls_area(int which, struct tls_info *info, size_t infosize); }
-473 STD BSD { int get_tls_area(int which, struct tls_info *info, size_t infosize); }
+472 MPSAFE STD BSD { int set_tls_area(int which, struct tls_info *info, size_t infosize); }
+473 MPSAFE STD BSD { int get_tls_area(int which, struct tls_info *info, size_t infosize); }
474 MPSAFE STD BSD { int closefrom(int fd); }
475 STD POSIX { int stat(const char *path, struct stat *ub); }
-476 STD POSIX { int fstat(int fd, struct stat *sb); }
+476 MPSAFE STD POSIX { int fstat(int fd, struct stat *sb); }
477 STD POSIX { int lstat(const char *path, struct stat *ub); }
478 STD BSD { int fhstat(const struct fhandle *u_fhp, struct stat *sb); }
479 STD BSD { int getdirentries(int fd, char *buf, u_int count, \
return (error);
}
+/*
+ * MPSAFE
+ */
int
so_pru_sense(struct socket *so, struct stat *sb)
{
* lock pushdown
*/
static struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL };
-static ino_t unp_ino; /* prototype for fake inode numbers */
+static ino_t unp_ino = 1; /* prototype for fake inode numbers */
+static struct spinlock unp_ino_spin = SPINLOCK_INITIALIZER(&unp_ino_spin);
static int unp_attach (struct socket *, struct pru_attach_info *);
static void unp_detach (struct unpcb *);
return error;
}
+/*
+ * MPSAFE
+ */
static int
uipc_sense(struct socket *so, struct stat *sb)
{
return EINVAL;
sb->st_blksize = so->so_snd.ssb_hiwat;
sb->st_dev = NOUDEV;
- if (unp->unp_ino == 0) /* make up a non-zero inode number */
- unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino;
+ if (unp->unp_ino == 0) { /* make up a non-zero inode number */
+ spin_lock_wr(&unp_ino_spin);
+ unp->unp_ino = unp_ino++;
+ spin_unlock_wr(&unp_ino_spin);
+ }
sb->st_ino = unp->unp_ino;
return (0);
}
#define BD_WAKE_MASK (BD_WAKE_SIZE - 1)
TAILQ_HEAD(bqueues, buf) bufqueues[BUFFER_QUEUES];
+struct spinlock bufspin = SPINLOCK_INITIALIZER(&bufspin);
static MALLOC_DEFINE(M_BIOBUF, "BIO buffer", "BIO buffer");
* account for the buffer and to wakeup anyone waiting for free buffers.
* This typically occurs when large amounts of metadata are being handled
* by the buffer cache ( else buffer space runs out first, usually ).
+ *
+ * MPSAFE
*/
-
static __inline void
bufcountwakeup(void)
{
*
* Spank the buf_daemon[_hw] if the total dirty buffer space exceeds the
* low water mark.
+ *
+ * MPSAFE
*/
static __inline__
void
*
* Get the buf_daemon heated up when the number of running and dirty
* buffers exceeds the mid-point.
+ *
+ * MPSAFE
*/
int
bd_heatup(void)
*
* Regardless this function blocks while the number of dirty buffers
* exceeds hidirtybufspace.
+ *
+ * MPSAFE
*/
void
bd_wait(int totalspace)
count = totalspace / BKVASIZE;
if (count >= BD_WAKE_SIZE)
count = BD_WAKE_SIZE - 1;
+
+ spin_lock_wr(&needsbuffer_spin);
i = (bd_wake_index + count) & BD_WAKE_MASK;
++bd_wake_ary[i];
+ tsleep_interlock(&bd_wake_ary[i]);
+ spin_unlock_wr(&needsbuffer_spin);
+
tsleep(&bd_wake_ary[i], 0, "flstik", hz);
crit_exit();
* This function is called whenever runningbufspace or dirtybufspace
* is reduced. Track threads waiting for run+dirty buffer I/O
* complete.
+ *
+ * MPSAFE
*/
static void
bd_signal(int totalspace)
{
u_int i;
- while (totalspace > 0) {
- i = atomic_fetchadd_int(&bd_wake_index, 1);
- i &= BD_WAKE_MASK;
- if (bd_wake_ary[i]) {
- bd_wake_ary[i] = 0;
- wakeup(&bd_wake_ary[i]);
+ if (totalspace > 0) {
+ if (totalspace > BKVASIZE * BD_WAKE_SIZE)
+ totalspace = BKVASIZE * BD_WAKE_SIZE;
+ spin_lock_wr(&needsbuffer_spin);
+ while (totalspace > 0) {
+ i = bd_wake_index++;
+ i &= BD_WAKE_MASK;
+ if (bd_wake_ary[i]) {
+ bd_wake_ary[i] = 0;
+ spin_unlock_wr(&needsbuffer_spin);
+ wakeup(&bd_wake_ary[i]);
+ spin_lock_wr(&needsbuffer_spin);
+ }
+ totalspace -= BKVASIZE;
+ }
+ spin_unlock_wr(&needsbuffer_spin);
+ }
+}
+
+/*
+ * BIO tracking support routines.
+ *
+ * Release a ref on a bio_track. Wakeup requests are atomically released
+ * along with the last reference so bk_active will never wind up set to
+ * only 0x80000000.
+ *
+ * MPSAFE
+ */
+static
+void
+bio_track_rel(struct bio_track *track)
+{
+ int active;
+ int desired;
+
+ /*
+ * Shortcut
+ */
+ active = track->bk_active;
+ if (active == 1 && atomic_cmpset_int(&track->bk_active, 1, 0))
+ return;
+
+ /*
+ * Full-on. Note that the wait flag is only atomically released on
+ * the 1->0 count transition.
+ *
+ * We check for a negative count transition using bit 30 since bit 31
+ * has a different meaning.
+ */
+ for (;;) {
+ desired = (active & 0x7FFFFFFF) - 1;
+ if (desired)
+ desired |= active & 0x80000000;
+ if (atomic_cmpset_int(&track->bk_active, active, desired)) {
+ if (desired & 0x40000000)
+ panic("bio_track_rel: bad count: %p\n", track);
+ if (active & 0x80000000)
+ wakeup(track);
+ break;
+ }
+ active = track->bk_active;
+ }
+}
+
+/*
+ * Wait for the tracking count to reach 0.
+ *
+ * Use atomic ops such that the wait flag is only set atomically when
+ * bk_active is non-zero.
+ *
+ * MPSAFE
+ */
+int
+bio_track_wait(struct bio_track *track, int slp_flags, int slp_timo)
+{
+ int active;
+ int desired;
+ int error;
+
+ /*
+ * Shortcut
+ */
+ if (track->bk_active == 0)
+ return(0);
+
+ /*
+ * Full-on. Note that the wait flag may only be atomically set if
+ * the active count is non-zero.
+ */
+ crit_enter(); /* for tsleep_interlock */
+ error = 0;
+ while ((active = track->bk_active) != 0) {
+ desired = active | 0x80000000;
+ tsleep_interlock(track);
+ if (active == desired ||
+ atomic_cmpset_int(&track->bk_active, active, desired)) {
+ error = tsleep(track, slp_flags, "iowait", slp_timo);
+ if (error)
+ break;
}
- totalspace -= BKVASIZE;
}
+ crit_exit();
+ return (error);
}
/*
* buffer_map.
*
* Since this call frees up buffer space, we call bufspacewakeup().
+ *
+ * MPALMOSTSAFE
*/
static void
bfreekva(struct buf *bp)
int count;
if (bp->b_kvasize) {
+ get_mplock();
++buffreekvacnt;
count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
vm_map_lock(&buffer_map);
vm_map_entry_release(count);
bp->b_kvasize = 0;
bufspacewakeup();
+ rel_mplock();
}
}
*
* Remove the buffer from the appropriate free list.
*/
-void
-bremfree(struct buf *bp)
+static __inline void
+_bremfree(struct buf *bp)
{
- crit_enter();
-
if (bp->b_qindex != BQUEUE_NONE) {
KASSERT(BUF_REFCNTNB(bp) == 1,
("bremfree: bp %p not locked",bp));
if (BUF_REFCNTNB(bp) <= 1)
panic("bremfree: removing a buffer not on a queue");
}
+}
- crit_exit();
+void
+bremfree(struct buf *bp)
+{
+ spin_lock_wr(&bufspin);
+ _bremfree(bp);
+ spin_unlock_wr(&bufspin);
}
+static void
+bremfree_locked(struct buf *bp)
+{
+ _bremfree(bp);
+}
/*
* bread:
* must clear B_ERROR and B_INVAL prior to initiating I/O. If B_CACHE
* is set, the buffer is valid and we do not have to do anything ( see
* getblk() ).
+ *
+ * MPALMOSTSAFE
*/
int
bread(struct vnode *vp, off_t loffset, int size, struct buf **bpp)
/* if not found in cache, do some I/O */
if ((bp->b_flags & B_CACHE) == 0) {
+ get_mplock();
KASSERT(!(bp->b_flags & B_ASYNC),
("bread: illegal async bp %p", bp));
bp->b_flags &= ~(B_ERROR | B_INVAL);
bp->b_cmd = BUF_CMD_READ;
vfs_busy_pages(vp, bp);
vn_strategy(vp, &bp->b_bio1);
+ rel_mplock();
return (biowait(bp));
}
return (0);
* read-ahead blocks. We must clear B_ERROR and B_INVAL prior
* to initiating I/O . If B_CACHE is set, the buffer is valid
* and we do not have to do anything.
+ *
+ * MPALMOSTSAFE
*/
int
breadn(struct vnode *vp, off_t loffset, int size, off_t *raoffset,
/* if not found in cache, do some I/O */
if ((bp->b_flags & B_CACHE) == 0) {
+ get_mplock();
bp->b_flags &= ~(B_ERROR | B_INVAL);
bp->b_cmd = BUF_CMD_READ;
vfs_busy_pages(vp, bp);
vn_strategy(vp, &bp->b_bio1);
++readwait;
+ rel_mplock();
}
for (i = 0; i < cnt; i++, raoffset++, rabsize++) {
rabp = getblk(vp, *raoffset, *rabsize, 0, 0);
if ((rabp->b_flags & B_CACHE) == 0) {
+ rel_mplock();
rabp->b_flags |= B_ASYNC;
rabp->b_flags &= ~(B_ERROR | B_INVAL);
rabp->b_cmd = BUF_CMD_READ;
vfs_busy_pages(vp, rabp);
BUF_KERNPROC(rabp);
vn_strategy(vp, &rabp->b_bio1);
+ rel_mplock();
} else {
brelse(rabp);
}
}
-
- if (readwait) {
+ if (readwait)
rv = biowait(bp);
- }
return (rv);
}
if ((bp->b_flags & B_DELWRI) == 0) {
bp->b_flags |= B_DELWRI;
reassignbuf(bp);
- ++dirtybufcount;
+ atomic_add_int(&dirtybufcount, 1);
dirtybufspace += bp->b_bufsize;
if (bp->b_flags & B_HEAVY) {
- ++dirtybufcounthw;
- dirtybufspacehw += bp->b_bufsize;
+ atomic_add_int(&dirtybufcounthw, 1);
+ atomic_add_int(&dirtybufspacehw, bp->b_bufsize);
}
bd_heatup();
}
if ((bp->b_flags & B_HEAVY) == 0) {
bp->b_flags |= B_HEAVY;
if (bp->b_flags & B_DELWRI) {
- ++dirtybufcounthw;
- dirtybufspacehw += bp->b_bufsize;
+ atomic_add_int(&dirtybufcounthw, 1);
+ atomic_add_int(&dirtybufspacehw, bp->b_bufsize);
}
}
}
* The buffer is typically on BQUEUE_NONE but there is one case in
* brelse() that calls this function after placing the buffer on
* a different queue.
+ *
+ * MPSAFE
*/
-
void
bundirty(struct buf *bp)
{
if (bp->b_flags & B_DELWRI) {
bp->b_flags &= ~B_DELWRI;
reassignbuf(bp);
- --dirtybufcount;
- dirtybufspace -= bp->b_bufsize;
+ atomic_subtract_int(&dirtybufcount, 1);
+ atomic_subtract_int(&dirtybufspace, bp->b_bufsize);
if (bp->b_flags & B_HEAVY) {
- --dirtybufcounthw;
- dirtybufspacehw -= bp->b_bufsize;
+ atomic_subtract_int(&dirtybufcounthw, 1);
+ atomic_subtract_int(&dirtybufspacehw, bp->b_bufsize);
}
bd_signal(bp->b_bufsize);
}
* Release a busy buffer and, if requested, free its resources. The
* buffer will be stashed in the appropriate bufqueue[] allowing it
* to be accessed later as a cache entity or reused for other purposes.
+ *
+ * MPALMOSTSAFE
*/
void
brelse(struct buf *bp)
KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)), ("brelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp));
- crit_enter();
-
/*
* If B_NOCACHE is set we are being asked to destroy the buffer and
* its backing store. Clear B_DELWRI.
* buffer cannot be immediately freed.
*/
bp->b_flags |= B_INVAL;
- if (LIST_FIRST(&bp->b_dep) != NULL)
+ if (LIST_FIRST(&bp->b_dep) != NULL) {
+ get_mplock();
buf_deallocate(bp);
+ rel_mplock();
+ }
if (bp->b_flags & B_DELWRI) {
- --dirtybufcount;
- dirtybufspace -= bp->b_bufsize;
+ atomic_subtract_int(&dirtybufcount, 1);
+ atomic_subtract_int(&dirtybufspace, bp->b_bufsize);
if (bp->b_flags & B_HEAVY) {
- --dirtybufcounthw;
- dirtybufspacehw -= bp->b_bufsize;
+ atomic_subtract_int(&dirtybufcounthw, 1);
+ atomic_subtract_int(&dirtybufspacehw, bp->b_bufsize);
}
bd_signal(bp->b_bufsize);
}
if (bp->b_flags & (B_DELWRI | B_LOCKED)) {
bp->b_flags &= ~B_RELBUF;
} else if (vm_page_count_severe()) {
- if (LIST_FIRST(&bp->b_dep) != NULL)
+ if (LIST_FIRST(&bp->b_dep) != NULL) {
+ get_mplock();
buf_deallocate(bp); /* can set B_LOCKED */
+ rel_mplock();
+ }
if (bp->b_flags & (B_DELWRI | B_LOCKED))
bp->b_flags &= ~B_RELBUF;
else
resid = bp->b_bufsize;
foff = bp->b_loffset;
+ get_mplock();
for (i = 0; i < bp->b_xio.xio_npages; i++) {
m = bp->b_xio.xio_pages[i];
vm_page_flag_clear(m, PG_ZERO);
}
if (bp->b_flags & (B_INVAL | B_RELBUF))
vfs_vmio_release(bp);
+ rel_mplock();
} else {
/*
* Rundown for non-VMIO buffers.
*/
if (bp->b_flags & (B_INVAL | B_RELBUF)) {
-#if 0
- if (bp->b_vp)
- kprintf("brelse bp %p %08x/%08x: Warning, caught and fixed brelvp bug\n", bp, saved_flags, bp->b_flags);
-#endif
+ get_mplock();
if (bp->b_bufsize)
allocbuf(bp, 0);
KKASSERT (LIST_FIRST(&bp->b_dep) == NULL);
if (bp->b_vp)
brelvp(bp);
+ rel_mplock();
}
}
/* Temporary panic to verify exclusive locking */
/* This panic goes away when we allow shared refs */
panic("brelse: multiple refs");
- /* do not release to free list */
- BUF_UNLOCK(bp);
- crit_exit();
+ /* NOT REACHED */
return;
}
* Buffers placed in the EMPTY or EMPTYKVA had better already be
* disassociated from their vnode.
*/
+ spin_lock_wr(&bufspin);
if (bp->b_flags & B_LOCKED) {
/*
* Buffers that are locked are placed in the locked queue
break;
}
}
+ spin_unlock_wr(&bufspin);
/*
* If B_INVAL, clear B_DELWRI. We've already placed the buffer
*/
bp->b_flags &= ~(B_ORDERED | B_ASYNC | B_NOCACHE | B_RELBUF | B_DIRECT);
BUF_UNLOCK(bp);
- crit_exit();
}
/*
* again soon.
*
* XXX we should be able to leave the B_RELBUF hint set on completion.
+ *
+ * MPSAFE
*/
void
bqrelse(struct buf *bp)
{
- crit_enter();
-
KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)), ("bqrelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp));
if (bp->b_qindex != BQUEUE_NONE)
if (BUF_REFCNTNB(bp) > 1) {
/* do not release to free list */
panic("bqrelse: multiple refs");
- BUF_UNLOCK(bp);
- crit_exit();
return;
}
+
+ spin_lock_wr(&bufspin);
if (bp->b_flags & B_LOCKED) {
/*
* Locked buffers are released to the locked queue. However,
* buffer (most importantly: the wired pages making up its
* backing store) *now*.
*/
- crit_exit();
+ spin_unlock_wr(&bufspin);
brelse(bp);
return;
} else {
bp->b_qindex = BQUEUE_CLEAN;
TAILQ_INSERT_TAIL(&bufqueues[BQUEUE_CLEAN], bp, b_freelist);
}
+ spin_unlock_wr(&bufspin);
if ((bp->b_flags & B_LOCKED) == 0 &&
((bp->b_flags & B_INVAL) || (bp->b_flags & B_DELWRI) == 0)) {
*/
bp->b_flags &= ~(B_ORDERED | B_ASYNC | B_NOCACHE | B_RELBUF);
BUF_UNLOCK(bp);
- crit_exit();
}
/*
bp->b_xio.xio_npages = 0;
bp->b_flags &= ~B_VMIO;
KKASSERT (LIST_FIRST(&bp->b_dep) == NULL);
- if (bp->b_vp)
+ if (bp->b_vp) {
+ get_mplock();
brelvp(bp);
+ rel_mplock();
+ }
}
/*
int nwritten;
int size;
- crit_enter();
/*
* right now we support clustered writing only to regular files. If
* we find a clusterable block we could be in the middle of a cluster
size = vp->v_mount->mnt_stat.f_iosize;
for (i = size; i < MAXPHYS; i += size) {
- if ((bpa = findblk(vp, loffset + i)) &&
+ if ((bpa = findblk(vp, loffset + i, FINDBLK_TEST)) &&
BUF_REFCNT(bpa) == 0 &&
((bpa->b_flags & (B_DELWRI | B_CLUSTEROK | B_INVAL)) ==
(B_DELWRI | B_CLUSTEROK)) &&
}
}
for (j = size; i + j <= MAXPHYS && j <= loffset; j += size) {
- if ((bpa = findblk(vp, loffset - j)) &&
+ if ((bpa = findblk(vp, loffset - j, FINDBLK_TEST)) &&
BUF_REFCNT(bpa) == 0 &&
((bpa->b_flags & (B_DELWRI | B_CLUSTEROK | B_INVAL)) ==
(B_DELWRI | B_CLUSTEROK)) &&
}
j -= size;
nbytes = (i + j);
+
/*
* this is a possible cluster write
*/
BUF_UNLOCK(bp);
nwritten = cluster_wbuild(vp, size,
loffset - j, nbytes);
- crit_exit();
return nwritten;
}
}
bremfree(bp);
bp->b_flags |= B_ASYNC;
- crit_exit();
/*
* default (old) behavior, writing out only one block
*
* To avoid VFS layer recursion we do not flush dirty buffers ourselves.
* Instead we ask the buf daemon to do it for us. We attempt to
* avoid piecemeal wakeups of the pageout daemon.
+ *
+ * MPALMOSTSAFE
*/
-
static struct buf *
getnewbuf(int blkflags, int slptimeo, int size, int maxsize)
{
* where we cannot backup.
*/
nqindex = BQUEUE_EMPTYKVA;
+ spin_lock_wr(&bufspin);
nbp = TAILQ_FIRST(&bufqueues[BQUEUE_EMPTYKVA]);
if (nbp == NULL) {
/*
* Run scan, possibly freeing data and/or kva mappings on the fly
* depending.
+ *
+ * WARNING! bufspin is held!
*/
-
while ((bp = nbp) != NULL) {
int qindex = nqindex;
*/
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
+ spin_unlock_wr(&bufspin);
kprintf("getnewbuf: warning, locked buf %p, race corrected\n", bp);
tsleep(&bd_request, 0, "gnbxxx", hz / 100);
goto restart;
}
if (bp->b_qindex != qindex) {
+ spin_unlock_wr(&bufspin);
kprintf("getnewbuf: warning, BUF_LOCK blocked unexpectedly on buf %p index %d->%d, race corrected\n", bp, qindex, bp->b_qindex);
BUF_UNLOCK(bp);
goto restart;
}
- bremfree(bp);
+ bremfree_locked(bp);
+ spin_unlock_wr(&bufspin);
/*
* Dependancies must be handled before we disassociate the
* NOTE: HAMMER will set B_LOCKED if the buffer cannot
* be immediately disassociated. HAMMER then becomes
* responsible for releasing the buffer.
+ *
+ * NOTE: bufspin is UNLOCKED now.
*/
if (LIST_FIRST(&bp->b_dep) != NULL) {
+ get_mplock();
buf_deallocate(bp);
+ rel_mplock();
if (bp->b_flags & B_LOCKED) {
bqrelse(bp);
goto restart;
}
if (qindex == BQUEUE_CLEAN) {
+ get_mplock();
if (bp->b_flags & B_VMIO) {
bp->b_flags &= ~B_ASYNC;
+ get_mplock();
vfs_vmio_release(bp);
+ rel_mplock();
}
if (bp->b_vp)
brelvp(bp);
+ rel_mplock();
}
/*
* scrapping a buffer's contents because it is already
* wired.
*/
- if (bp->b_bufsize)
+ if (bp->b_bufsize) {
+ get_mplock();
allocbuf(bp, 0);
+ rel_mplock();
+ }
bp->b_flags = B_BNOCLIP;
bp->b_cmd = BUF_CMD_DONE;
if (bufspace < lobufspace)
flushingbufs = 0;
break;
+ /* NOT REACHED, bufspin not held */
}
/*
* wakeup various daemons and write out some dirty buffers.
*
* Generally we are sleeping due to insufficient buffer space.
+ *
+ * NOTE: bufspin is held if bp is NULL, else it is not held.
*/
-
if (bp == NULL) {
int flags;
char *waitmsg;
+ spin_unlock_wr(&bufspin);
if (defrag) {
flags = VFS_BIO_NEED_BUFSPACE;
waitmsg = "nbufkv";
* woods, we still have to reserve kva space. In order
* to keep fragmentation sane we only allocate kva in
* BKVASIZE chunks.
+ *
+ * (bufspin is not held)
*/
maxsize = (maxsize + BKVAMASK) & ~BKVAMASK;
bfreekva(bp);
+ get_mplock();
count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
vm_map_lock(&buffer_map);
++bufdefragcnt;
defrag = 1;
bp->b_flags |= B_INVAL;
+ rel_mplock();
brelse(bp);
goto restart;
}
}
vm_map_unlock(&buffer_map);
vm_map_entry_release(count);
+ rel_mplock();
}
bp->b_data = bp->b_kvabase;
}
++recoverbufcalls;
+ spin_lock_wr(&bufspin);
while (bytes < MAXBSIZE) {
bp = TAILQ_FIRST(&bufqueues[BQUEUE_CLEAN]);
if (bp == NULL)
BUF_UNLOCK(bp);
continue;
}
- bremfree(bp);
+ bremfree_locked(bp);
+ spin_unlock_wr(&bufspin);
/*
* Dependancies must be handled before we disassociate the
buf_deallocate(bp);
if (bp->b_flags & B_LOCKED) {
bqrelse(bp);
+ spin_lock_wr(&bufspin);
continue;
}
KKASSERT(LIST_FIRST(&bp->b_dep) == NULL);
bytes += bp->b_bufsize;
+ get_mplock();
if (bp->b_flags & B_VMIO) {
bp->b_flags &= ~B_ASYNC;
bp->b_flags |= B_DIRECT; /* try to free pages */
*/
if (bp->b_bufsize)
allocbuf(bp, 0);
+ rel_mplock();
bp->b_flags = B_BNOCLIP;
bp->b_cmd = BUF_CMD_DONE;
bp->b_flags |= B_INVAL;
/* bfreekva(bp); */
brelse(bp);
+ spin_lock_wr(&bufspin);
}
+ spin_unlock_wr(&bufspin);
return(bytes);
}
* that we really want to try to get the buffer out and reuse it
* due to the write load on the machine.
*/
-
static int
flushbufqueues(bufq_type_t q)
{
struct buf *bp;
int r = 0;
+ int spun;
+
+ spin_lock_wr(&bufspin);
+ spun = 1;
bp = TAILQ_FIRST(&bufqueues[q]);
while (bp) {
if (bp->b_flags & B_DELWRI) {
if (bp->b_flags & B_INVAL) {
+ spin_unlock_wr(&bufspin);
+ spun = 0;
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT) != 0)
panic("flushbufqueues: locked buf");
bremfree(bp);
* avoid a live lock.
*/
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT) == 0) {
+ spin_unlock_wr(&bufspin);
+ spun = 0;
if (LIST_FIRST(&bp->b_dep) != NULL &&
buf_checkwrite(bp)) {
bremfree(bp);
}
bp = TAILQ_NEXT(bp, b_freelist);
}
+ if (spun)
+ spin_unlock_wr(&bufspin);
return (r);
}
vm_offset_t toff, tinc, size;
vm_page_t m;
- if (findblk(vp, loffset))
+ if (findblk(vp, loffset, FINDBLK_TEST))
return 1;
if (vp->v_mount == NULL)
return 0;
/*
* findblk:
*
- * Locate and return the specified buffer, or NULL if the buffer does
- * not exist. Do not attempt to lock the buffer or manipulate it in
- * any way. The caller must validate that the correct buffer has been
- * obtain after locking it.
+ * Locate and return the specified buffer. Unless flagged otherwise,
+ * a locked buffer will be returned if it exists or NULL if it does not.
+ *
+ * FINDBLK_TEST - Do not lock the buffer. The caller is responsible
+ * for locking the buffer and ensuring that it remains
+ * the desired buffer after locking.
+ *
+ * FINDBLK_NBLOCK - Lock the buffer non-blocking. If we are unable
+ * to acquire the lock we return NULL, even if the
+ * buffer exists.
+ *
+ * (0) - Lock the buffer blocking.
+ *
+ * MPSAFE
*/
struct buf *
-findblk(struct vnode *vp, off_t loffset)
+findblk(struct vnode *vp, off_t loffset, int flags)
{
+ lwkt_tokref vlock;
struct buf *bp;
+ int lkflags;
- crit_enter();
- bp = buf_rb_hash_RB_LOOKUP(&vp->v_rbhash_tree, loffset);
- crit_exit();
+ lkflags = LK_EXCLUSIVE;
+ if (flags & FINDBLK_NBLOCK)
+ lkflags |= LK_NOWAIT;
+
+ for (;;) {
+ lwkt_gettoken(&vlock, &vp->v_token);
+ bp = buf_rb_hash_RB_LOOKUP(&vp->v_rbhash_tree, loffset);
+ lwkt_reltoken(&vlock);
+ if (bp == NULL || (flags & FINDBLK_TEST))
+ break;
+ if (BUF_LOCK(bp, lkflags)) {
+ bp = NULL;
+ break;
+ }
+ if (bp->b_vp == vp && bp->b_loffset == loffset)
+ break;
+ BUF_UNLOCK(bp);
+ }
return(bp);
}
/*
+ * getcacheblk:
+ *
+ * Similar to getblk() except only returns the buffer if it is
+ * B_CACHE and requires no other manipulation. Otherwise NULL
+ * is returned.
+ *
+ * If B_RAM is set the buffer might be just fine, but we return
+ * NULL anyway because we want the code to fall through to the
+ * cluster read. Otherwise read-ahead breaks.
+ */
+struct buf *
+getcacheblk(struct vnode *vp, off_t loffset)
+{
+ struct buf *bp;
+
+ bp = findblk(vp, loffset, 0);
+ if (bp) {
+ if ((bp->b_flags & (B_INVAL | B_CACHE | B_RAM)) == B_CACHE) {
+ bp->b_flags &= ~B_AGE;
+ bremfree(bp);
+ } else {
+ BUF_UNLOCK(bp);
+ bp = NULL;
+ }
+ }
+ return (bp);
+}
+
+/*
* getblk:
*
* Get a block given a specified block and offset into a file/device.
*
* GETBLK_PCATCH - catch signal if blocked, can cause NULL return
* GETBLK_BHEAVY - heavy-weight buffer cache buffer
+ *
+ * MPALMOSTSAFE
*/
struct buf *
getblk(struct vnode *vp, off_t loffset, int size, int blkflags, int slptimeo)
struct buf *bp;
int slpflags = (blkflags & GETBLK_PCATCH) ? PCATCH : 0;
int error;
+ int lkflags;
if (size > MAXBSIZE)
panic("getblk: size(%d) > MAXBSIZE(%d)", size, MAXBSIZE);
if (vp->v_object == NULL)
panic("getblk: vnode %p has no object!", vp);
- crit_enter();
loop:
- if ((bp = findblk(vp, loffset))) {
+ if ((bp = findblk(vp, loffset, FINDBLK_TEST)) != NULL) {
/*
* The buffer was found in the cache, but we need to lock it.
* Even with LK_NOWAIT the lockmgr may break our critical
* once the lock has been obtained.
*/
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
- if (blkflags & GETBLK_NOWAIT) {
- crit_exit();
+ if (blkflags & GETBLK_NOWAIT)
return(NULL);
- }
- int lkflags = LK_EXCLUSIVE | LK_SLEEPFAIL;
+ lkflags = LK_EXCLUSIVE | LK_SLEEPFAIL;
if (blkflags & GETBLK_PCATCH)
lkflags |= LK_PCATCH;
error = BUF_TIMELOCK(bp, lkflags, "getblk", slptimeo);
if (error) {
if (error == ENOLCK)
goto loop;
- crit_exit();
return (NULL);
}
+ /* buffer may have changed on us */
}
/*
*/
if ((blkflags & GETBLK_SZMATCH) && size != bp->b_bcount) {
BUF_UNLOCK(bp);
- crit_exit();
return(NULL);
}
* the buffer in such circumstances can lead to problems.
*/
if (size != bp->b_bcount) {
+ get_mplock();
if (bp->b_flags & B_DELWRI) {
bp->b_flags |= B_NOCACHE;
bwrite(bp);
bp->b_flags |= B_RELBUF;
brelse(bp);
}
+ rel_mplock();
goto loop;
}
KKASSERT(size <= bp->b_kvasize);
*/
if ((bp->b_flags & (B_CACHE|B_DELWRI)) == B_DELWRI) {
+ get_mplock();
bp->b_flags |= B_NOCACHE;
bwrite(bp);
+ rel_mplock();
goto loop;
}
- crit_exit();
} else {
/*
* Buffer is not in-core, create new buffer. The buffer
maxsize = size + (loffset & PAGE_MASK);
maxsize = imax(maxsize, bsize);
- if ((bp = getnewbuf(blkflags, slptimeo, size, maxsize)) == NULL) {
- if (slpflags || slptimeo) {
- crit_exit();
+ bp = getnewbuf(blkflags, slptimeo, size, maxsize);
+ if (bp == NULL) {
+ if (slpflags || slptimeo)
return NULL;
- }
goto loop;
}
/*
- * This code is used to make sure that a buffer is not
- * created while the getnewbuf routine is blocked.
- * This can be a problem whether the vnode is locked or not.
- * If the buffer is created out from under us, we have to
- * throw away the one we just created. There is no window
- * race because we are safely running in a critical section
- * from the point of the duplicate buffer creation through
- * to here, and we've locked the buffer.
- */
- if (findblk(vp, loffset)) {
- bp->b_flags |= B_INVAL;
- brelse(bp);
- goto loop;
- }
-
- /*
- * Insert the buffer into the hash, so that it can
- * be found by findblk().
+ * Atomically insert the buffer into the hash, so that it can
+ * be found by findblk().
+ *
+ * If bgetvp() returns non-zero a collision occured, and the
+ * bp will not be associated with the vnode.
*
* Make sure the translation layer has been cleared.
*/
bp->b_bio2.bio_offset = NOOFFSET;
/* bp->b_bio2.bio_next = NULL; */
- bgetvp(vp, bp);
+ if (bgetvp(vp, bp)) {
+ bp->b_flags |= B_INVAL;
+ brelse(bp);
+ goto loop;
+ }
/*
* All vnode-based buffers must be backed by a VM object.
bp->b_flags |= B_VMIO;
KKASSERT(bp->b_cmd == BUF_CMD_DONE);
+ get_mplock();
allocbuf(bp, size);
-
- crit_exit();
+ rel_mplock();
}
return (bp);
}
*
* To this end, either B_LOCKED must be set or the dependancy list must be
* non-empty.
+ *
+ * MPSAFE
*/
void
regetblk(struct buf *bp)
{
KKASSERT((bp->b_flags & B_LOCKED) || LIST_FIRST(&bp->b_dep) != NULL);
BUF_LOCK(bp, LK_EXCLUSIVE | LK_RETRY);
- crit_enter();
bremfree(bp);
- crit_exit();
}
/*
*
* critical section protection is not required for the allocbuf()
* call because races are impossible here.
+ *
+ * MPALMOSTSAFE
*/
struct buf *
geteblk(int size)
maxsize = (size + BKVAMASK) & ~BKVAMASK;
- crit_enter();
while ((bp = getnewbuf(0, 0, size, maxsize)) == 0)
;
- crit_exit();
+ get_mplock();
allocbuf(bp, size);
+ rel_mplock();
bp->b_flags |= B_INVAL; /* b_dep cleared by getnewbuf() */
return (bp);
}
*
* This routine does not need to be called from a critical section but you
* must own the buffer.
+ *
+ * NOTMPSAFE
*/
int
allocbuf(struct buf *bp, int size)
*
* NOTE! The original b_cmd is lost on return, since b_cmd will be
* set to BUF_CMD_DONE.
+ *
+ * MPSAFE
*/
int
biowait(struct buf *bp)
{
- crit_enter();
- while (bp->b_cmd != BUF_CMD_DONE) {
- if (bp->b_cmd == BUF_CMD_READ)
- tsleep(bp, 0, "biord", 0);
- else
- tsleep(bp, 0, "biowr", 0);
+ if (bp->b_cmd != BUF_CMD_DONE) {
+ crit_enter();
+ for (;;) {
+ tsleep_interlock(bp);
+ if (bp->b_cmd == BUF_CMD_DONE)
+ break;
+ if (bp->b_cmd == BUF_CMD_READ)
+ tsleep(bp, 0, "biord", 0);
+ else
+ tsleep(bp, 0, "biowr", 0);
+ }
+ crit_exit();
}
- crit_exit();
if (bp->b_flags & B_EINTR) {
bp->b_flags &= ~B_EINTR;
return (EINTR);
bio_start_transaction(struct bio *bio, struct bio_track *track)
{
bio->bio_track = track;
- atomic_add_int(&track->bk_active, 1);
+ bio_track_ref(track);
}
/*
else
track = &vp->v_track_write;
bio->bio_track = track;
- atomic_add_int(&track->bk_active, 1);
+ bio_track_ref(track);
vop_strategy(*vp->v_ops, vp, bio);
}
-
/*
* biodone:
*
* BIO tracking. Most but not all BIOs are tracked.
*/
if ((track = bio->bio_track) != NULL) {
- atomic_subtract_int(&track->bk_active, 1);
- if (track->bk_active < 0) {
- panic("biodone: bad active count bio %p\n",
- bio);
- }
- if (track->bk_waitflag) {
- track->bk_waitflag = 0;
- wakeup(track);
- }
+ bio_track_rel(track);
bio->bio_track = NULL;
}
} else {
struct buf *tbp;
bp->b_flags &= ~B_RAM;
+
/*
- * We do the crit here so that there is no window
- * between the findblk and the b_usecount increment
- * below. We opt to keep the crit out of the loop
- * for efficiency.
+ * Set read-ahead-mark only if we can passively lock
+ * the buffer. Note that with these flags the bp
+ * could very exist even though NULL is returned.
*/
- crit_enter();
for (i = 1; i < maxra; i++) {
- if (!(tbp = findblk(vp, loffset + i * blksize))) {
+ tbp = findblk(vp, loffset + i * blksize,
+ FINDBLK_NBLOCK);
+ if (tbp == NULL)
break;
- }
-
- /*
- * Set another read-ahead mark so we know
- * to check again.
- */
if (((i % racluster) == (racluster - 1)) ||
- (i == (maxra - 1)))
+ (i == (maxra - 1))) {
tbp->b_flags |= B_RAM;
+ }
+ BUF_UNLOCK(tbp);
}
- crit_exit();
- if (i >= maxra) {
+ if (i >= maxra)
return 0;
- }
loffset += i * blksize;
}
reqbp = bp = NULL;
int maxiosize = vmaxiosize(vp);
while (bytes > 0) {
- crit_enter();
/*
* If the buffer is not delayed-write (i.e. dirty), or it
* is delayed-write but either locked or inval, it cannot
* partake in the clustered write.
*/
- if (((tbp = findblk(vp, start_loffset)) == NULL) ||
- ((tbp->b_flags & (B_LOCKED | B_INVAL | B_DELWRI)) != B_DELWRI) ||
- (LIST_FIRST(&tbp->b_dep) != NULL && buf_checkwrite(tbp)) ||
- BUF_LOCK(tbp, LK_EXCLUSIVE | LK_NOWAIT)) {
+ tbp = findblk(vp, start_loffset, FINDBLK_NBLOCK);
+ if (tbp == NULL ||
+ (tbp->b_flags & (B_LOCKED | B_INVAL | B_DELWRI)) != B_DELWRI ||
+ (LIST_FIRST(&tbp->b_dep) && buf_checkwrite(tbp))) {
+ if (tbp)
+ BUF_UNLOCK(tbp);
start_loffset += blksize;
bytes -= blksize;
- crit_exit();
continue;
}
bremfree(tbp);
KKASSERT(tbp->b_cmd == BUF_CMD_DONE);
- crit_exit();
/*
* Extra memory in the buffer, punt on this buffer.
* hassle.
*/
if (((tbp->b_flags & (B_CLUSTEROK|B_MALLOC)) != B_CLUSTEROK) ||
- (tbp->b_bcount != tbp->b_bufsize) ||
- (tbp->b_bcount != blksize) ||
- (bytes == blksize) ||
- ((bp = getpbuf(&cluster_pbuf_freecnt)) == NULL)) {
+ (tbp->b_bcount != tbp->b_bufsize) ||
+ (tbp->b_bcount != blksize) ||
+ (bytes == blksize) ||
+ ((bp = getpbuf(&cluster_pbuf_freecnt)) == NULL)) {
totalwritten += tbp->b_bufsize;
bawrite(tbp);
start_loffset += blksize;
bp->b_bio1.bio_done = cluster_callback;
bp->b_bio1.bio_caller_info1.cluster_head = NULL;
bp->b_bio1.bio_caller_info2.cluster_tail = NULL;
+
/*
* From this location in the file, scan forward to see
* if there are buffers with adjacent data that need to
*/
for (i = 0; i < bytes; (i += blksize), (start_loffset += blksize)) {
if (i != 0) { /* If not the first buffer */
- crit_enter();
+ tbp = findblk(vp, start_loffset,
+ FINDBLK_NBLOCK);
/*
- * If the adjacent data is not even in core it
- * can't need to be written.
+ * Buffer not found or could not be locked
+ * non-blocking.
*/
- if ((tbp = findblk(vp, start_loffset)) == NULL) {
- crit_exit();
+ if (tbp == NULL)
break;
- }
/*
* If it IS in core, but has different
- * characteristics, or is locked (which
- * means it could be undergoing a background
- * I/O or be in a weird state), then don't
- * cluster with it.
+ * characteristics, then don't cluster
+ * with it.
*/
if ((tbp->b_flags & (B_VMIO | B_CLUSTEROK |
- B_INVAL | B_DELWRI | B_NEEDCOMMIT))
- != (B_DELWRI | B_CLUSTEROK |
- (bp->b_flags & (B_VMIO | B_NEEDCOMMIT))) ||
+ B_INVAL | B_DELWRI | B_NEEDCOMMIT))
+ != (B_DELWRI | B_CLUSTEROK |
+ (bp->b_flags & (B_VMIO | B_NEEDCOMMIT))) ||
(tbp->b_flags & B_LOCKED) ||
- (LIST_FIRST(&tbp->b_dep) != NULL && buf_checkwrite(tbp)) ||
- BUF_LOCK(tbp, LK_EXCLUSIVE | LK_NOWAIT)) {
- crit_exit();
+ (LIST_FIRST(&tbp->b_dep) &&
+ buf_checkwrite(tbp))
+ ) {
+ BUF_UNLOCK(tbp);
break;
}
((tbp->b_xio.xio_npages + bp->b_xio.xio_npages) >
(maxiosize / PAGE_SIZE))) {
BUF_UNLOCK(tbp);
- crit_exit();
break;
}
/*
*/
bremfree(tbp);
KKASSERT(tbp->b_cmd == BUF_CMD_DONE);
- crit_exit();
} /* end of code for non-first buffers only */
/*
bp->b_bcount += blksize;
bp->b_bufsize += blksize;
- crit_enter();
bundirty(tbp);
tbp->b_flags &= ~B_ERROR;
tbp->b_flags |= B_ASYNC;
tbp->b_cmd = BUF_CMD_WRITE;
- crit_exit();
BUF_KERNPROC(tbp);
cluster_append(&bp->b_bio1, tbp);
*
* An auxiliary reference DOES NOT move a vnode out of the VFREE state
* once it has entered it.
+ *
+ * MPSAFE
*/
void
vhold(struct vnode *vp)
{
struct vnode *vp = obj;
- lwkt_token_init(&vp->v_pollinfo.vpi_token);
+ lwkt_token_init(&vp->v_token);
lockinit(&vp->v_lock, "vnode", 0, 0);
ccms_dataspace_init(&vp->v_ccms);
TAILQ_INIT(&vp->v_namecache);
#ifdef INVARIANTS
if (vp->v_data)
panic("cleaned vnode isn't");
- if (vp->v_track_read.bk_active + vp->v_track_write.bk_active)
+ if (bio_track_active(&vp->v_track_read) ||
+ bio_track_active(&vp->v_track_write)) {
panic("Clean vnode has pending I/O's");
+ }
if (vp->v_flag & VONWORKLST)
panic("Clean vnode still pending on syncer worklist!");
if (!RB_EMPTY(&vp->v_rbdirty_tree))
vinvalbuf(struct vnode *vp, int flags, int slpflag, int slptimeo)
{
struct vinvalbuf_bp_info info;
- int error;
vm_object_t object;
+ lwkt_tokref vlock;
+ int error;
+
+ lwkt_gettoken(&vlock, &vp->v_token);
/*
* If we are being asked to save, call fsync to ensure that the inode
* is updated.
*/
if (flags & V_SAVE) {
- crit_enter();
- while (vp->v_track_write.bk_active) {
- vp->v_track_write.bk_waitflag = 1;
- error = tsleep(&vp->v_track_write, slpflag,
- "vinvlbuf", slptimeo);
- if (error) {
- crit_exit();
- return (error);
- }
- }
+ error = bio_track_wait(&vp->v_track_write, slpflag, slptimeo);
+ if (error)
+ goto done;
if (!RB_EMPTY(&vp->v_rbdirty_tree)) {
- crit_exit();
if ((error = VOP_FSYNC(vp, MNT_WAIT)) != 0)
- return (error);
- crit_enter();
+ goto done;
/*
* Dirty bufs may be left or generated via races
* panic if we are trying to reclaim the vnode.
*/
if ((vp->v_flag & VRECLAIMED) &&
- (vp->v_track_write.bk_active > 0 ||
+ (bio_track_active(&vp->v_track_write) ||
!RB_EMPTY(&vp->v_rbdirty_tree))) {
panic("vinvalbuf: dirty bufs");
}
}
- crit_exit();
}
- crit_enter();
info.slptimeo = slptimeo;
info.lkflags = LK_EXCLUSIVE | LK_SLEEPFAIL;
if (slpflag & PCATCH)
* Flush the buffer cache until nothing is left.
*/
while (!RB_EMPTY(&vp->v_rbclean_tree) ||
- !RB_EMPTY(&vp->v_rbdirty_tree)) {
+ !RB_EMPTY(&vp->v_rbdirty_tree)) {
error = RB_SCAN(buf_rb_tree, &vp->v_rbclean_tree, NULL,
vinvalbuf_bp, &info);
if (error == 0) {
}
/*
- * Wait for I/O to complete. XXX needs cleaning up. The vnode can
- * have write I/O in-progress but if there is a VM object then the
- * VM object can also have read-I/O in-progress.
+ * Wait for I/O completion. We may block in the pip code so we have
+ * to re-check.
*/
do {
- while (vp->v_track_write.bk_active > 0) {
- vp->v_track_write.bk_waitflag = 1;
- tsleep(&vp->v_track_write, 0, "vnvlbv", 0);
- }
+ bio_track_wait(&vp->v_track_write, 0, 0);
if ((object = vp->v_object) != NULL) {
while (object->paging_in_progress)
vm_object_pip_sleep(object, "vnvlbx");
}
- } while (vp->v_track_write.bk_active > 0);
-
- crit_exit();
+ } while (bio_track_active(&vp->v_track_write));
/*
* Destroy the copy in the VM cache, too.
panic("vinvalbuf: flush failed");
if (!RB_EMPTY(&vp->v_rbhash_tree))
panic("vinvalbuf: flush failed, buffers still present");
- return (0);
+ error = 0;
+done:
+ lwkt_reltoken(&vlock);
+ return (error);
}
static int
vtruncbuf(struct vnode *vp, off_t length, int blksize)
{
off_t truncloffset;
- int count;
const char *filename;
+ lwkt_tokref vlock;
+ int count;
/*
* Round up to the *next* block, then destroy the buffers in question.
else
truncloffset = length;
- crit_enter();
+ lwkt_gettoken(&vlock, &vp->v_token);
do {
count = RB_SCAN(buf_rb_tree, &vp->v_rbclean_tree,
vtruncbuf_bp_trunc_cmp,
/*
* Clean out any left over VM backing store.
- */
- crit_exit();
-
- vnode_pager_setsize(vp, length);
-
- crit_enter();
-
- /*
+ *
* It is possible to have in-progress I/O from buffers that were
* not part of the truncation. This should not happen if we
* are truncating to 0-length.
*/
+ vnode_pager_setsize(vp, length);
+ bio_track_wait(&vp->v_track_write, 0, 0);
+
filename = TAILQ_FIRST(&vp->v_namecache) ?
TAILQ_FIRST(&vp->v_namecache)->nc_name : "?";
- while ((count = vp->v_track_write.bk_active) > 0) {
- vp->v_track_write.bk_waitflag = 1;
- tsleep(&vp->v_track_write, 0, "vbtrunc", 0);
- if (length == 0) {
- kprintf("Warning: vtruncbuf(): Had to wait for "
- "%d buffer I/Os to finish in %s\n",
- count, filename);
- }
- }
-
/*
* Make sure no buffers were instantiated while we were trying
* to clean out the remaining VM pages. This could occur due
}
} while(count);
- crit_exit();
+ lwkt_reltoken(&vlock);
return (0);
}
int (*waitoutput)(struct vnode *, struct thread *))
{
struct vfsync_info info;
+ lwkt_tokref vlock;
int error;
bzero(&info, sizeof(info));
if ((info.checkdef = checkdef) == NULL)
info.syncdeps = 1;
- crit_enter_id("vfsync");
+ lwkt_gettoken(&vlock, &vp->v_token);
switch(waitfor) {
case MNT_LAZY:
kprintf("Warning: vfsync skipped %d dirty bufs in pass2!\n", info.skippedbufs);
}
while (error == 0 && passes > 0 &&
- !RB_EMPTY(&vp->v_rbdirty_tree)) {
+ !RB_EMPTY(&vp->v_rbdirty_tree)
+ ) {
if (--passes == 0) {
info.synchronous = 1;
info.syncdeps = 1;
}
break;
}
- crit_exit_id("vfsync");
+ lwkt_reltoken(&vlock);
return(error);
}
static int
-vfsync_wait_output(struct vnode *vp, int (*waitoutput)(struct vnode *, struct thread *))
+vfsync_wait_output(struct vnode *vp,
+ int (*waitoutput)(struct vnode *, struct thread *))
{
- int error = 0;
+ int error;
- while (vp->v_track_write.bk_active) {
- vp->v_track_write.bk_waitflag = 1;
- tsleep(&vp->v_track_write, 0, "fsfsn", 0);
- }
+ error = bio_track_wait(&vp->v_track_write, 0, 0);
if (waitoutput)
error = waitoutput(vp, curthread);
return(error);
* Synchronous flushing. An error may be returned.
*/
bremfree(bp);
- crit_exit_id("vfsync");
error = bwrite(bp);
- crit_enter_id("vfsync");
} else {
/*
* Asynchronous flushing. A negative return value simply
} else {
info->lazycount += bp->b_bufsize;
bremfree(bp);
- crit_exit_id("vfsync");
bawrite(bp);
- crit_enter_id("vfsync");
}
if (info->lazylimit && info->lazycount >= info->lazylimit)
error = 1;
/*
* Associate a buffer with a vnode.
+ *
+ * MPSAFE
*/
-void
+int
bgetvp(struct vnode *vp, struct buf *bp)
{
+ lwkt_tokref vlock;
+
KASSERT(bp->b_vp == NULL, ("bgetvp: not free"));
KKASSERT((bp->b_flags & (B_HASHED|B_DELWRI|B_VNCLEAN|B_VNDIRTY)) == 0);
- vhold(vp);
/*
* Insert onto list for new vnode.
*/
- crit_enter();
+ lwkt_gettoken(&vlock, &vp->v_token);
+ if (buf_rb_hash_RB_INSERT(&vp->v_rbhash_tree, bp)) {
+ lwkt_reltoken(&vlock);
+ return (EEXIST);
+ }
bp->b_vp = vp;
bp->b_flags |= B_HASHED;
- if (buf_rb_hash_RB_INSERT(&vp->v_rbhash_tree, bp))
- panic("reassignbuf: dup lblk vp %p bp %p", vp, bp);
-
bp->b_flags |= B_VNCLEAN;
if (buf_rb_tree_RB_INSERT(&vp->v_rbclean_tree, bp))
panic("reassignbuf: dup lblk/clean vp %p bp %p", vp, bp);
- crit_exit();
+ vhold(vp);
+ lwkt_reltoken(&vlock);
+ return(0);
}
/*
brelvp(struct buf *bp)
{
struct vnode *vp;
+ lwkt_tokref vlock;
KASSERT(bp->b_vp != NULL, ("brelvp: NULL"));
* Delete from old vnode list, if on one.
*/
vp = bp->b_vp;
- crit_enter();
+ lwkt_gettoken(&vlock, &vp->v_token);
if (bp->b_flags & (B_VNDIRTY | B_VNCLEAN)) {
if (bp->b_flags & B_VNDIRTY)
buf_rb_tree_RB_REMOVE(&vp->v_rbdirty_tree, bp);
vp->v_flag &= ~VONWORKLST;
LIST_REMOVE(vp, v_synclist);
}
- crit_exit();
bp->b_vp = NULL;
+ lwkt_reltoken(&vlock);
+
vdrop(vp);
}
/*
* Reassign the buffer to the proper clean/dirty list based on B_DELWRI.
* This routine is called when the state of the B_DELWRI bit is changed.
+ *
+ * MPSAFE
*/
void
reassignbuf(struct buf *bp)
{
struct vnode *vp = bp->b_vp;
+ lwkt_tokref vlock;
int delay;
KKASSERT(vp != NULL);
if (bp->b_flags & B_PAGING)
panic("cannot reassign paging buffer");
- crit_enter();
+ lwkt_gettoken(&vlock, &vp->v_token);
if (bp->b_flags & B_DELWRI) {
/*
* Move to the dirty list, add the vnode to the worklist
LIST_REMOVE(vp, v_synclist);
}
}
- crit_exit();
+ lwkt_reltoken(&vlock);
}
/*
int
vn_pollrecord(struct vnode *vp, int events)
{
- lwkt_tokref ilock;
+ lwkt_tokref vlock;
KKASSERT(curthread->td_proc != NULL);
- lwkt_gettoken(&ilock, &vp->v_pollinfo.vpi_token);
+ lwkt_gettoken(&vlock, &vp->v_token);
if (vp->v_pollinfo.vpi_revents & events) {
/*
* This leaves events we are not interested
events &= vp->v_pollinfo.vpi_revents;
vp->v_pollinfo.vpi_revents &= ~events;
- lwkt_reltoken(&ilock);
+ lwkt_reltoken(&vlock);
return events;
}
vp->v_pollinfo.vpi_events |= events;
selrecord(curthread, &vp->v_pollinfo.vpi_selinfo);
- lwkt_reltoken(&ilock);
+ lwkt_reltoken(&vlock);
return 0;
}
void
vn_pollevent(struct vnode *vp, int events)
{
- lwkt_tokref ilock;
+ lwkt_tokref vlock;
- lwkt_gettoken(&ilock, &vp->v_pollinfo.vpi_token);
+ lwkt_gettoken(&vlock, &vp->v_token);
if (vp->v_pollinfo.vpi_events & events) {
/*
* We clear vpi_events so that we don't
vp->v_pollinfo.vpi_revents |= events;
selwakeup(&vp->v_pollinfo.vpi_selinfo);
}
- lwkt_reltoken(&ilock);
+ lwkt_reltoken(&vlock);
}
/*
void
vn_pollgone(struct vnode *vp)
{
- lwkt_tokref ilock;
+ lwkt_tokref vlock;
- lwkt_gettoken(&ilock, &vp->v_pollinfo.vpi_token);
+ lwkt_gettoken(&vlock, &vp->v_token);
if (vp->v_pollinfo.vpi_events) {
vp->v_pollinfo.vpi_events = 0;
selwakeup(&vp->v_pollinfo.vpi_selinfo);
}
- lwkt_reltoken(&ilock);
+ lwkt_reltoken(&vlock);
}
/*
/*
* Check if vnode represents a disk device. The vnode does not need to be
* opened.
+ *
+ * MPALMOSTSAFE
*/
int
vn_isdisk(struct vnode *vp, int *errp)
return (0);
}
- if ((dev = vp->v_rdev) == NULL)
+ if ((dev = vp->v_rdev) == NULL) {
+ get_mplock();
dev = get_dev(vp->v_umajor, vp->v_uminor);
+ rel_mplock();
+ }
if (dev == NULL) {
if (errp != NULL)
static int syncer_delayno = 0;
static long syncer_mask;
+static struct lwkt_token syncer_token;
LIST_HEAD(synclist, vnode);
static struct synclist *syncer_workitem_pending;
syncer_workitem_pending = hashinit(syncer_maxdelay, M_DEVBUF,
&syncer_mask);
syncer_maxdelay = syncer_mask + 1;
+ lwkt_token_init(&syncer_token);
}
/*
/*
* Add an item to the syncer work queue.
+ *
+ * MPSAFE
*/
void
vn_syncer_add_to_worklist(struct vnode *vp, int delay)
{
+ lwkt_tokref ilock;
int slot;
- crit_enter();
+ lwkt_gettoken(&ilock, &syncer_token);
- if (vp->v_flag & VONWORKLST) {
+ if (vp->v_flag & VONWORKLST)
LIST_REMOVE(vp, v_synclist);
- }
-
if (delay > syncer_maxdelay - 2)
delay = syncer_maxdelay - 2;
slot = (syncer_delayno + delay) & syncer_mask;
LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist);
vp->v_flag |= VONWORKLST;
- crit_exit();
+
+ lwkt_reltoken(&ilock);
}
struct thread *updatethread;
void
sched_sync(void)
{
+ struct thread *td = curthread;
struct synclist *slp;
struct vnode *vp;
+ lwkt_tokref ilock;
+ lwkt_tokref vlock;
long starttime;
- struct thread *td = curthread;
EVENTHANDLER_REGISTER(shutdown_pre_sync, shutdown_kproc, td,
SHUTDOWN_PRI_LAST);
kproc_suspend_loop();
starttime = time_second;
+ lwkt_gettoken(&ilock, &syncer_token);
/*
* Push files whose dirty time has expired. Be careful
* of interrupt race on slp queue.
*/
- crit_enter();
slp = &syncer_workitem_pending[syncer_delayno];
syncer_delayno += 1;
if (syncer_delayno == syncer_maxdelay)
syncer_delayno = 0;
- crit_exit();
while ((vp = LIST_FIRST(slp)) != NULL) {
if (vget(vp, LK_EXCLUSIVE | LK_NOWAIT) == 0) {
VOP_FSYNC(vp, MNT_LAZY);
vput(vp);
}
- crit_enter();
/*
* If the vnode is still at the head of the list
* here.
*/
if (LIST_FIRST(slp) == vp) {
- if (RB_EMPTY(&vp->v_rbdirty_tree) &&
- !vn_isdisk(vp, NULL)) {
- panic("sched_sync: fsync failed vp %p tag %d", vp, vp->v_tag);
+ lwkt_gettoken(&vlock, &vp->v_token);
+ if (LIST_FIRST(slp) == vp) {
+ if (RB_EMPTY(&vp->v_rbdirty_tree) &&
+ !vn_isdisk(vp, NULL)) {
+ panic("sched_sync: fsync "
+ "failed vp %p tag %d",
+ vp, vp->v_tag);
+ }
+ vn_syncer_add_to_worklist(vp, syncdelay);
}
- vn_syncer_add_to_worklist(vp, syncdelay);
+ lwkt_reltoken(&vlock);
}
- crit_exit();
}
+ lwkt_reltoken(&ilock);
/*
* Do sync processing for each mount.
sync_reclaim(struct vop_reclaim_args *ap)
{
struct vnode *vp = ap->a_vp;
+ lwkt_tokref ilock;
- crit_enter();
+ lwkt_gettoken(&ilock, &syncer_token);
KKASSERT(vp->v_mount->mnt_syncer != vp);
if (vp->v_flag & VONWORKLST) {
LIST_REMOVE(vp, v_synclist);
vp->v_flag &= ~VONWORKLST;
}
- crit_exit();
+ lwkt_reltoken(&ilock);
return (0);
}
#include <sys/filio.h>
#include <sys/ttycom.h>
#include <sys/conf.h>
+#include <sys/sysctl.h>
#include <sys/syslog.h>
+#include <sys/thread2.h>
+
static int vn_closefile (struct file *fp);
static int vn_ioctl (struct file *fp, u_long com, caddr_t data,
struct ucred *cred);
static int svn_write (struct file *fp, struct uio *uio,
struct ucred *cred, int flags);
+#ifdef SMP
+static int read_mpsafe = 0;
+SYSCTL_INT(_vfs, OID_AUTO, read_mpsafe, CTLFLAG_RW, &read_mpsafe, 0, "");
+static int write_mpsafe = 0;
+SYSCTL_INT(_vfs, OID_AUTO, write_mpsafe, CTLFLAG_RW, &write_mpsafe, 0, "");
+static int getattr_mpsafe = 0;
+SYSCTL_INT(_vfs, OID_AUTO, getattr_mpsafe, CTLFLAG_RW, &getattr_mpsafe, 0, "");
+#else
+#define read_mpsafe 0
+#define write_mpsafe 0
+#define getattr_mpsafe 0
+#endif
+
struct fileops vnode_fileops = {
.fo_read = vn_read,
.fo_write = vn_write,
{
/*
* Sequential heuristic - detect sequential operation
+ *
+ * NOTE: SMP: We allow f_seqcount updates to race.
*/
if ((uio->uio_offset == 0 && fp->f_seqcount > 0) ||
uio->uio_offset == fp->f_nextoff) {
/*
* Not sequential, quick draw-down of seqcount
+ *
+ * NOTE: SMP: We allow f_seqcount updates to race.
*/
if (fp->f_seqcount > 1)
fp->f_seqcount = 1;
}
/*
+ * get - lock and return the f_offset field.
+ * set - set and unlock the f_offset field.
+ *
+ * These routines serve the dual purpose of serializing access to the
+ * f_offset field (at least on i386) and guaranteeing operational integrity
+ * when multiple read()ers and write()ers are present on the same fp.
+ */
+static __inline off_t
+vn_get_fpf_offset(struct file *fp)
+{
+ u_int flags;
+ u_int nflags;
+
+ /*
+ * Shortcut critical path.
+ */
+ flags = fp->f_flag & ~FOFFSETLOCK;
+ if (atomic_cmpset_int(&fp->f_flag, flags, flags | FOFFSETLOCK))
+ return(fp->f_offset);
+
+ /*
+ * The hard way
+ */
+ for (;;) {
+ flags = fp->f_flag;
+ if (flags & FOFFSETLOCK) {
+ nflags = flags | FOFFSETWAKE;
+ crit_enter();
+ tsleep_interlock(&fp->f_flag);
+ if (atomic_cmpset_int(&fp->f_flag, flags, nflags))
+ tsleep(&fp->f_flag, 0, "fpoff", 0);
+ crit_exit();
+ } else {
+ nflags = flags | FOFFSETLOCK;
+ if (atomic_cmpset_int(&fp->f_flag, flags, nflags))
+ break;
+ }
+ }
+ return(fp->f_offset);
+}
+
+static __inline void
+vn_set_fpf_offset(struct file *fp, off_t offset)
+{
+ u_int flags;
+ u_int nflags;
+
+ /*
+ * We hold the lock so we can set the offset without interference.
+ */
+ fp->f_offset = offset;
+
+ /*
+ * Normal release is already a reasonably critical path.
+ */
+ for (;;) {
+ flags = fp->f_flag;
+ nflags = flags & ~(FOFFSETLOCK | FOFFSETWAKE);
+ if (atomic_cmpset_int(&fp->f_flag, flags, nflags)) {
+ if (flags & FOFFSETWAKE)
+ wakeup(&fp->f_flag);
+ break;
+ }
+ }
+}
+
+static __inline off_t
+vn_poll_fpf_offset(struct file *fp)
+{
+#if defined(__amd64__) || !defined(SMP)
+ return(fp->f_offset);
+#else
+ off_t off = vn_get_fpf_offset(fp);
+ vn_set_fpf_offset(fp, off);
+ return(off);
+#endif
+}
+
+/*
* Package up an I/O request on a vnode into a uio and do it.
*/
int
/*
* MPALMOSTSAFE - acquires mplock
+ *
+ * File pointers can no longer get ripped up by revoke so
+ * we don't need to lock access to the vp.
+ *
+ * f_offset updates are not guaranteed against multiple readers
*/
static int
vn_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags)
struct vnode *vp;
int error, ioflag;
- get_mplock();
KASSERT(uio->uio_td == curthread,
("uio_td %p is not td %p", uio->uio_td, curthread));
vp = (struct vnode *)fp->f_data;
} else if (fp->f_flag & O_DIRECT) {
ioflag |= IO_DIRECT;
}
+ if ((flags & O_FOFFSET) == 0 && (vp->v_flag & VNOTSEEKABLE) == 0)
+ uio->uio_offset = vn_get_fpf_offset(fp);
vn_lock(vp, LK_SHARED | LK_RETRY);
- if ((flags & O_FOFFSET) == 0)
- uio->uio_offset = fp->f_offset;
ioflag |= sequential_heuristic(uio, fp);
ccms_lock_get_uio(&vp->v_ccms, &ccms_lock, uio);
- error = VOP_READ(vp, uio, ioflag, cred);
+ if (read_mpsafe && (vp->v_flag & VMP_READ)) {
+ error = VOP_READ(vp, uio, ioflag, cred);
+ } else {
+ get_mplock();
+ error = VOP_READ(vp, uio, ioflag, cred);
+ rel_mplock();
+ }
ccms_lock_put(&vp->v_ccms, &ccms_lock);
- if ((flags & O_FOFFSET) == 0)
- fp->f_offset = uio->uio_offset;
fp->f_nextoff = uio->uio_offset;
vn_unlock(vp);
- rel_mplock();
+ if ((flags & O_FOFFSET) == 0 && (vp->v_flag & VNOTSEEKABLE) == 0)
+ vn_set_fpf_offset(fp, uio->uio_offset);
return (error);
}
error = 0;
goto done;
}
- if ((flags & O_FOFFSET) == 0)
- uio->uio_offset = fp->f_offset;
+ if ((flags & O_FOFFSET) == 0 && (vp->v_flag & VNOTSEEKABLE) == 0)
+ uio->uio_offset = vn_get_fpf_offset(fp);
ioflag = 0;
if (flags & O_FBLOCKING) {
error = dev_dread(dev, uio, ioflag);
release_dev(dev);
- if ((flags & O_FOFFSET) == 0)
- fp->f_offset = uio->uio_offset;
fp->f_nextoff = uio->uio_offset;
+ if ((flags & O_FOFFSET) == 0 && (vp->v_flag & VNOTSEEKABLE) == 0)
+ vn_set_fpf_offset(fp, uio->uio_offset);
done:
rel_mplock();
return (error);
struct vnode *vp;
int error, ioflag;
- get_mplock();
KASSERT(uio->uio_td == curthread,
("uio_td %p is not p %p", uio->uio_td, curthread));
vp = (struct vnode *)fp->f_data;
-#if 0
- /* VOP_WRITE should handle this now */
- if (vp->v_type == VREG || vp->v_type == VDATABASE)
- bwillwrite();
-#endif
- vp = (struct vnode *)fp->f_data; /* XXX needed? */
ioflag = IO_UNIT;
if (vp->v_type == VREG &&
if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))
ioflag |= IO_SYNC;
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
if ((flags & O_FOFFSET) == 0)
- uio->uio_offset = fp->f_offset;
+ uio->uio_offset = vn_get_fpf_offset(fp);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
ioflag |= sequential_heuristic(uio, fp);
ccms_lock_get_uio(&vp->v_ccms, &ccms_lock, uio);
- error = VOP_WRITE(vp, uio, ioflag, cred);
+ if (write_mpsafe && (vp->v_flag & VMP_WRITE)) {
+ error = VOP_WRITE(vp, uio, ioflag, cred);
+ } else {
+ get_mplock();
+ error = VOP_WRITE(vp, uio, ioflag, cred);
+ rel_mplock();
+ }
ccms_lock_put(&vp->v_ccms, &ccms_lock);
- if ((flags & O_FOFFSET) == 0)
- fp->f_offset = uio->uio_offset;
fp->f_nextoff = uio->uio_offset;
vn_unlock(vp);
- rel_mplock();
+ if ((flags & O_FOFFSET) == 0)
+ vn_set_fpf_offset(fp, uio->uio_offset);
return (error);
}
reference_dev(dev);
if ((flags & O_FOFFSET) == 0)
- uio->uio_offset = fp->f_offset;
+ uio->uio_offset = vn_get_fpf_offset(fp);
ioflag = IO_UNIT;
if (vp->v_type == VREG &&
error = dev_dwrite(dev, uio, ioflag);
release_dev(dev);
- if ((flags & O_FOFFSET) == 0)
- fp->f_offset = uio->uio_offset;
fp->f_nextoff = uio->uio_offset;
+ if ((flags & O_FOFFSET) == 0)
+ vn_set_fpf_offset(fp, uio->uio_offset);
done:
rel_mplock();
return (error);
}
/*
- * MPALMOSTSAFE - acquires mplock
+ * MPSAFE
*/
static int
vn_statfile(struct file *fp, struct stat *sb, struct ucred *cred)
struct vnode *vp;
int error;
- get_mplock();
vp = (struct vnode *)fp->f_data;
error = vn_stat(vp, sb, cred);
- rel_mplock();
return (error);
}
+/*
+ * MPSAFE (if vnode has VMP_GETATTR)
+ */
int
vn_stat(struct vnode *vp, struct stat *sb, struct ucred *cred)
{
cdev_t dev;
vap = &vattr;
- error = VOP_GETATTR(vp, vap);
+ if (getattr_mpsafe && (vp->v_flag & VMP_GETATTR)) {
+ error = VOP_GETATTR(vp, vap);
+ } else {
+ get_mplock();
+ error = VOP_GETATTR(vp, vap);
+ rel_mplock();
+ }
if (error)
return (error);
*/
dev = vp->v_rdev;
if (dev == NULL && vp->v_type == VCHR) {
+ get_mplock();
dev = get_dev(vp->v_umajor, vp->v_uminor);
+ rel_mplock();
}
sb->st_blksize = dev->si_bsize_best;
if (sb->st_blksize < dev->si_bsize_phys)
struct vnode *ovp;
struct vattr vattr;
int error;
+ off_t size;
get_mplock();
error = VOP_GETATTR(vp, &vattr);
if (error)
break;
- *(int *)data = vattr.va_size - fp->f_offset;
+ size = vattr.va_size;
+ if ((vp->v_flag & VNOTSEEKABLE) == 0)
+ size -= vn_poll_fpf_offset(fp);
+ if (size > 0x7FFFFFFF)
+ size = 0x7FFFFFFF;
+ *(int *)data = size;
error = 0;
break;
}
}
void
-lapic_init(vm_offset_t lapic_addr)
+lapic_map(vm_offset_t lapic_addr)
{
/* Local apic is mapped on last page */
SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N |
#endif
-void lapic_init(vm_offset_t /* XXX should be vm_paddr_t */);
+void lapic_map(vm_offset_t /* XXX should be vm_paddr_t */);
#endif /* _MACHINE_MPAPIC_H */
mp_naps = 1; /* exclude BSP */
/* Map local apic before the id field is accessed */
- lapic_init(DEFAULT_APIC_BASE);
+ lapic_map(DEFAULT_APIC_BASE);
bsp_apicid = APIC_ID(lapic.id);
ap_apicid = (bsp_apicid == 0) ? 1 : 0;
KKASSERT(arg2.found_bsp);
/* Map local apic */
- lapic_init(lapic_addr);
+ lapic_map(lapic_addr);
mptable_unmap(&mpt);
}
if (lapic_addr == 0)
panic("madt_lapic_enumerate no local apic\n");
- lapic_init(lapic_addr);
+ lapic_map(lapic_addr);
bsp_apic_id = APIC_ID(lapic.id);
if (madt_pass2(madt_paddr, bsp_apic_id))
}
/*
- * this routine jerks page mappings from the
+ * This routine jerks page mappings from the
* kernel -- it is meant only for temporary mappings.
+ *
+ * MPSAFE, INTERRUPT SAFE (cluster callback)
*/
void
pmap_qremove(vm_offset_t va, int count)
*
* Must be called from a critical section (else an interrupt thread preemption
* may cause %gs to fault). Normally called from the low level swtch.s code.
+ *
+ * MPSAFE
*/
void
set_user_TLS(void)
* TLS descriptor or -1 on error.
*
* (int which, struct tls_info *info, size_t infosize)
+ *
+ * MPSAFE
*/
int
sys_set_tls_area(struct set_tls_area_args *uap)
* TLS descriptor or -1 on error.
*
* (int which, struct tls_info *info, size_t infosize)
+ *
+ * MPSAFE
*/
int
sys_get_tls_area(struct get_tls_area_args *uap)
APIC TPR priority vector levels:
0xff (255) +-------------+
- | | 15 (IPIs: Xspuriousint)
+ | | 15 (IPIs: Xcpustop, Xspuriousint)
0xf0 (240) +-------------+
- | | 14
+ | | 14 (IPIs: Xinvltlb, Xipiq, Xtimer)
0xe0 (224) +-------------+
| | 13
0xd0 (208) +-------------+
0xc0 (192) +-------------+
| | 11
0xb0 (176) +-------------+
- | | 10 (IPIs: Xcpustop)
+ | | 10
0xa0 (160) +-------------+
- | | 9 (IPIs: Xinvltlb)
+ | | 9
0x90 (144) +-------------+
| | 8 (linux/BSD syscall, IGNORE FAST HW INTS)
0x80 (128) +-------------+
#define TPR_IGNORE_HWI 0x5f /* ignore INTs */
#define TPR_BLOCK_FHWI 0x7f /* hardware FAST INTs */
#define TPR_IGNORE_FHWI 0x8f /* ignore FAST INTs */
-#define TPR_IPI_ONLY 0x8f /* ignore FAST INTs */
-#define TPR_BLOCK_XINVLTLB 0x9f /* */
-#define TPR_BLOCK_XCPUSTOP 0xaf /* */
+#define TPR_IPI_ONLY 0xdf /* ignore FAST INTs */
+#define TPR_BLOCK_XINVLTLB 0xef /* block most IPIs */
+#define TPR_BLOCK_XCPUSTOP 0xf0 /* block Xcpustop */
#define TPR_BLOCK_ALL 0xff /* all INTs */
-
/* TLB shootdowns */
-#define XINVLTLB_OFFSET (IDT_OFFSET + 112)
+#define XINVLTLB_OFFSET (IDT_OFFSET + 192)
/* unused/open (was inter-cpu clock handling) */
-#define XUNUSED113_OFFSET (IDT_OFFSET + 113)
+#define XUNUSED113_OFFSET (IDT_OFFSET + 193)
-/* inter-CPU rendezvous */
-#define XUNUSED114_OFFSET (IDT_OFFSET + 114)
+/* unused/open (was inter-cpu rendezvous) */
+#define XUNUSED114_OFFSET (IDT_OFFSET + 194)
-/* IPIQ rendezvous */
-#define XIPIQ_OFFSET (IDT_OFFSET + 115)
+/* IPIQ */
+#define XIPIQ_OFFSET (IDT_OFFSET + 195)
-/* TIMER rendezvous */
-#define XTIMER_OFFSET (IDT_OFFSET + 116)
+/* Local APIC TIMER */
+#define XTIMER_OFFSET (IDT_OFFSET + 196)
/* IPI to signal CPUs to stop and wait for another CPU to restart them */
-#define XCPUSTOP_OFFSET (IDT_OFFSET + 128)
+#define XCPUSTOP_OFFSET (IDT_OFFSET + 208)
/*
* Note: this vector MUST be xxxx1111, 32 + 223 = 255 = 0xff:
}
/*
- * this routine jerks page mappings from the
+ * This routine jerks page mappings from the
* kernel -- it is meant only for temporary mappings.
+ *
+ * MPSAFE, INTERRUPT SAFE (cluster callback)
*/
void
pmap_qremove(vm_offset_t va, int count)
* TLS descriptor or -1 on error.
*
* (int which, struct tls_info *info, size_t infosize)
+ *
+ * MPSAFE
*/
int
sys_get_tls_area(struct get_tls_area_args *uap)
/*
* Install the TLS
+ *
+ * MPSAFE
*/
void
set_user_TLS(void)
* TLS descriptor or -1 on error.
*
* (struct tls_info *info, int infosize, int which)
+ *
+ * MPSAFE
*/
int
sys_set_tls_area(struct set_tls_area_args *uap)
* TLS descriptor or -1 on error.
*
* (struct tls_info *info, int infosize, int which)
+ *
+ * MPSAFE
*/
int
sys_get_tls_area(struct get_tls_area_args *uap)
*/
struct bio_track {
int bk_active; /* I/O's currently in progress */
- int bk_waitflag;
};
+#define bio_track_active(track) ((track)->bk_active)
+#define bio_track_ref(track) atomic_add_int(&(track)->bk_active, 1)
+
+#ifdef _KERNEL
+
+int bio_track_wait(struct bio_track *track, int slp_flags, int slp_timo);
+
+#endif
+
#endif
#define GETBLK_SZMATCH 0x0004 /* pre-existing buffer must match */
#define GETBLK_NOWAIT 0x0008 /* non-blocking */
+#define FINDBLK_TEST 0x0010 /* test only, do not lock */
+#define FINDBLK_NBLOCK 0x0020 /* use non-blocking lock, can return NULL */
+
/*
* These flags are kept in b_flags.
*
int vfs_bio_awrite (struct buf *);
struct buf *getpbuf (int *);
int inmem (struct vnode *, off_t);
-struct buf *findblk (struct vnode *, off_t);
+struct buf *findblk (struct vnode *, off_t, int);
struct buf *getblk (struct vnode *, off_t, int, int, int);
+struct buf *getcacheblk (struct vnode *, off_t);
struct buf *geteblk (int);
void regetblk(struct buf *bp);
struct bio *push_bio(struct bio *);
void vunmapbuf (struct buf *);
void relpbuf (struct buf *, int *);
void brelvp (struct buf *);
-void bgetvp (struct vnode *, struct buf *);
+int bgetvp (struct vnode *, struct buf *);
int allocbuf (struct buf *bp, int size);
int scan_all_buffers (int (*)(struct buf *, void *), void *);
void reassignbuf (struct buf *);
#ifndef _SYS_SERIALIZE_H_
#include <sys/serialize.h>
#endif
+#ifndef _SYS_SPINLOCK_H_
+#include <sys/spinlock.h>
+#endif
#ifndef _SYS_TREE_H_
#include <sys/tree.h>
#endif
struct ccms_info *info;
struct ccms_dataspace *chain;
ccms_state_t defstate;
+ struct spinlock spin;
+ struct ccms_cst *delayed_free; /* delayed frees */
};
/*
*/
struct ccms_cst {
RB_ENTRY(ccms_cst) rbnode; /* stored in a red-black tree */
+ struct ccms_cst *delayed_next; /* linked list to free */
off_t beg_offset;
off_t end_offset;
ccms_state_t state; /* local cache state */
#define D_MEM 0x0008
#define D_TYPEMASK 0xffff
+#define D_SEEKABLE (D_TAPE | D_DISK | D_MEM)
/*
* Flags for d_flags.
#if defined(_KERNEL) || defined(_KERNEL_STRUCTURES)
#define FREVOKED 0x10000000 /* revoked by fdrevoke() */
#define FAPPENDONLY 0x20000000 /* O_APPEND cannot be changed */
+#define FOFFSETLOCK 0x40000000 /* f_offset locked */
+#define FOFFSETWAKE 0x80000000 /* f_offset wakeup */
#endif
#define O_FMASK (O_FBLOCKING|O_FNONBLOCKING|O_FAPPEND|O_FOFFSET|\
/*
* Return 1 if the passed credential is in a jail, otherwise 0.
+ *
+ * MPSAFE
*/
static __inline int
jailed(struct ucred *cred)
* System call hiders.
*
* DO NOT EDIT-- this file is automatically generated.
- * $DragonFly: src/sys/sys/syscall-hide.h,v 1.63 2008/11/11 00:55:49 pavalos Exp $
- * created from DragonFly: src/sys/kern/syscalls.master,v 1.60 2008/11/10 22:11:45 pavalos Exp
*/
#ifdef COMPAT_43
* System call numbers.
*
* DO NOT EDIT-- this file is automatically generated.
- * $DragonFly: src/sys/sys/syscall.h,v 1.63 2008/11/11 00:55:49 pavalos Exp $
- * created from DragonFly: src/sys/kern/syscalls.master,v 1.60 2008/11/10 22:11:45 pavalos Exp
*/
#define SYS_syscall 0
# DragonFly system call names.
# DO NOT EDIT-- this file is automatically generated.
-# $DragonFly: src/sys/sys/syscall.mk,v 1.63 2008/11/11 00:55:49 pavalos Exp $
-# created from DragonFly: src/sys/kern/syscalls.master,v 1.60 2008/11/10 22:11:45 pavalos Exp
MIASM = \
syscall.o \
exit.o \
* System call prototypes.
*
* DO NOT EDIT-- this file is automatically generated.
- * $DragonFly: src/sys/sys/sysproto.h,v 1.63 2008/11/11 00:55:49 pavalos Exp $
- * created from DragonFly: src/sys/kern/syscalls.master,v 1.60 2008/11/10 22:11:45 pavalos Exp
*/
#ifndef _SYS_SYSPROTO_H_
* Union of syscall args for messaging.
*
* DO NOT EDIT-- this file is automatically generated.
- * $DragonFly: src/sys/sys/sysunion.h,v 1.60 2008/11/11 00:55:49 pavalos Exp $
- * created from DragonFly: src/sys/kern/syscalls.master,v 1.60 2008/11/10 22:11:45 pavalos Exp
*/
union sysunion {
#ifndef _SYS_TERMIOS_H_
#define _SYS_TERMIOS_H_
+/* Needed by tcgetsid(3). */
+#include <sys/stdint.h>
+#ifndef _PID_T_DECLARED
+typedef __pid_t pid_t;
+#define _PID_T_DECLARED
+#endif
+
/*
* Special Control Characters
*
int tcflush (int, int);
int tcsendbreak (int, int);
+#if __XSI_VISIBLE
+pid_t tcgetsid(int);
+#endif /* !_XSI_VISIBLE */
+
#ifndef _POSIX_SOURCE
void cfmakeraw (struct termios *);
int cfsetspeed (struct termios *, speed_t);
}
#endif
+#define ASSERT_LWKT_TOKEN_HELD(token) \
+ KKASSERT((token)->t_owner == curthread)
+
typedef struct lwkt_tokref {
lwkt_token_t tr_tok; /* token in question */
lwkt_tokref_t tr_next; /* linked list */
* deal with clustered cache coherency issues and, more immediately, to
* protect operations associated with the kernel-managed journaling module.
*
+ * Certain fields within the vnode structure requires v_token to be held.
+ *
+ * v_rbclean_tree
+ * v_rbdirty_tree
+ * v_rbhash_tree
+ * v_pollinfo
+ *
* NOTE: The vnode operations vector, v_ops, is a double-indirect that
* typically points to &v_mount->mnt_vn_use_ops. We use a double
* pointer because mnt_vn_use_ops may change dynamically when e.g.
int v_clen; /* length of current cluster */
struct vm_object *v_object; /* Place to store VM object */
struct lock v_lock; /* file/dir ops lock */
+ struct lwkt_token v_token; /* structural access */
enum vtagtype v_tag; /* type of underlying data */
void *v_data; /* private data for fs */
struct namecache_list v_namecache; /* associated nc entries */
struct {
- struct lwkt_token vpi_token; /* lock to protect below */
struct selinfo vpi_selinfo; /* identity of poller(s) */
short vpi_events; /* what they are looking for */
short vpi_revents; /* what has happened */
/*
* Vnode flags.
*/
-#define VROOT 0x00001 /* root of its file system */
-#define VTEXT 0x00002 /* vnode is a pure text prototype */
-#define VSYSTEM 0x00004 /* vnode being used by kernel */
-#define VISTTY 0x00008 /* vnode represents a tty */
-#define VCTTYISOPEN 0x00010 /* controlling terminal tty is open */
-#define VCKPT 0x00020 /* checkpoint-restored vnode */
-#define VFSMID 0x00040 /* request FSMID update */
-#define VMAYHAVELOCKS 0x00080 /* there may be posix or flock locks on vp */
-#define VPFSROOT 0x00100 /* may be a pseudo filesystem root */
-/* open for business 0x00200 */
-/* open for business 0x00400 */
-/* open for business 0x00800 */
-#define VCACHED 0x01000 /* No active references but has cache value */
-#define VOBJBUF 0x02000 /* Allocate buffers in VM object */
-#define VINACTIVE 0x04000 /* The vnode is inactive (did VOP_INACTIVE) */
-#define VAGE 0x08000 /* Insert vnode at head of free list */
-#define VOLOCK 0x10000 /* vnode is locked waiting for an object */
-#define VOWANT 0x20000 /* a process is waiting for VOLOCK */
-#define VRECLAIMED 0x40000 /* This vnode has been destroyed */
-#define VFREE 0x80000 /* This vnode is on the freelist */
-/* open for business 0x100000 */
-#define VONWORKLST 0x200000 /* On syncer work-list */
-#define VMOUNT 0x400000 /* Mount in progress */
-#define VOBJDIRTY 0x800000 /* object might be dirty */
+#define VROOT 0x00000001 /* root of its file system */
+#define VTEXT 0x00000002 /* vnode is a pure text prototype */
+#define VSYSTEM 0x00000004 /* vnode being used by kernel */
+#define VISTTY 0x00000008 /* vnode represents a tty */
+#define VCTTYISOPEN 0x00000010 /* controlling terminal tty is open */
+#define VCKPT 0x00000020 /* checkpoint-restored vnode */
+#define VFSMID 0x00000040 /* request FSMID update */
+#define VMAYHAVELOCKS 0x00000080 /* maybe posix or flock locks on vp */
+#define VPFSROOT 0x00000100 /* may be a pseudo filesystem root */
+/* open for business 0x00000200 */
+/* open for business 0x00000400 */
+/* open for business 0x00000800 */
+#define VCACHED 0x00001000 /* No active references but has cache value */
+#define VOBJBUF 0x00002000 /* Allocate buffers in VM object */
+#define VINACTIVE 0x00004000 /* The vnode is inactive (did VOP_INACTIVE) */
+#define VAGE 0x00008000 /* Insert vnode at head of free list */
+#define VOLOCK 0x00010000 /* vnode is locked waiting for an object */
+#define VOWANT 0x00020000 /* a process is waiting for VOLOCK */
+#define VRECLAIMED 0x00040000 /* This vnode has been destroyed */
+#define VFREE 0x00080000 /* This vnode is on the freelist */
+#define VNOTSEEKABLE 0x00100000 /* rd/wr ignores file offset */
+#define VONWORKLST 0x00200000 /* On syncer work-list */
+#define VMOUNT 0x00400000 /* Mount in progress */
+#define VOBJDIRTY 0x00800000 /* object might be dirty */
+
+#define VMP_READ 0x01000000 /* supports MPSAFE read */
+#define VMP_WRITE 0x02000000 /* supports MPSAFE write */
+#define VMP_GETATTR 0x04000000 /* supports MPSAFE getattr */
/*
* vmntvnodescan() flags
}
}
}
+ vp->v_flag |= VNOTSEEKABLE;
return (vop_stdopen(ap));
bad:
vop_stdopen(ap); /* bump opencount/writecount as appropriate */
*/
metalbn = xap->in_lbn;
- if ((daddr == 0 && !findblk(vp, dbtodoff(fs, metalbn))) || metalbn == bn)
+ if ((daddr == 0 &&
+ !findblk(vp, dbtodoff(fs, metalbn), FINDBLK_TEST)) ||
+ metalbn == bn) {
break;
+ }
/*
* If we get here, we've either got the block in the cache
* or we have a disk address for it, go fetch it.
{
struct ext2_fsync_bp_info info;
struct vnode *vp = ap->a_vp;
+ lwkt_tokref vlock;
int count;
/*
*/
ext2_discard_prealloc(VTOI(vp));
- crit_enter();
+ lwkt_gettoken(&vlock, &vp->v_token);
info.vp = vp;
loop:
info.waitfor = ap->a_waitfor;
goto loop;
if (ap->a_waitfor == MNT_WAIT) {
- while (vp->v_track_write.bk_active) {
- vp->v_track_write.bk_waitflag = 1;
- tsleep(&vp->v_track_write, 0, "e2fsyn", 0);
- }
+ bio_track_wait(&vp->v_track_write, 0, 0);
#if DIAGNOSTIC
if (!RB_EMPTY(&vp->v_rbdirty_tree)) {
vprint("ext2_fsync: dirty", vp);
}
#endif
}
- crit_exit();
+ lwkt_reltoken(&vlock);
return (EXT2_UPDATE(ap->a_vp, ap->a_waitfor == MNT_WAIT));
}
{
struct vnode *vp = ap->a_vp;
struct knote *kn = ap->a_kn;
- lwkt_tokref ilock;
+ lwkt_tokref vlock;
switch (kn->kn_filter) {
case EVFILT_READ:
kn->kn_hook = (caddr_t)vp;
- lwkt_gettoken(&ilock, &vp->v_pollinfo.vpi_token);
+ lwkt_gettoken(&vlock, &vp->v_token);
SLIST_INSERT_HEAD(&vp->v_pollinfo.vpi_selinfo.si_note, kn, kn_selnext);
- lwkt_reltoken(&ilock);
+ lwkt_reltoken(&vlock);
return (0);
}
filt_ext2detach(struct knote *kn)
{
struct vnode *vp = (struct vnode *)kn->kn_hook;
- lwkt_tokref ilock;
+ lwkt_tokref vlock;
- lwkt_gettoken(&ilock, &vp->v_pollinfo.vpi_token);
+ lwkt_gettoken(&vlock, &vp->v_token);
SLIST_REMOVE(&vp->v_pollinfo.vpi_selinfo.si_note,
kn, knote, kn_selnext);
- lwkt_reltoken(&ilock);
+ lwkt_reltoken(&vlock);
}
/*ARGSUSED*/
* HAMMER locks
*/
struct hammer_lock {
- int refs; /* active references delay writes */
- int lockcount; /* lock count for exclusive/shared access */
- int wanted;
- int exwanted; /* number of threads waiting for ex lock */
- struct thread *locktd;
+ int refs; /* active references delay writes */
+ volatile u_int lockval; /* lock count and control bits */
+ struct thread *owner; /* owner if exclusively held */
};
+#define HAMMER_LOCKF_EXCLUSIVE 0x40000000
+#define HAMMER_LOCKF_WANTED 0x80000000
+
+static __inline int
+hammer_notlocked(struct hammer_lock *lock)
+{
+ return(lock->lockval == 0);
+}
+
static __inline int
hammer_islocked(struct hammer_lock *lock)
{
- return(lock->lockcount != 0);
+ return(lock->lockval != 0);
}
static __inline int
static __inline int
hammer_lock_excl_owned(struct hammer_lock *lock, thread_t td)
{
- if (lock->lockcount > 0 && lock->locktd == td)
+ if ((lock->lockval & HAMMER_LOCKF_EXCLUSIVE) &&
+ lock->owner == td) {
return(1);
+ }
return(0);
}
case HAMMER_OBJTYPE_FIFO:
vp->v_ops = &hmp->mp->mnt_vn_fifo_ops;
break;
+ case HAMMER_OBJTYPE_REGFILE:
+ /*
+ * MPSAFE read supported.
+ */
+ vp->v_flag |= VMP_READ;
+ break;
default:
break;
}
+ vp->v_flag |= VMP_GETATTR;
/*
* Only mark as the root vnode if the ip is not
KKASSERT(ip->vp == NULL);
KKASSERT(ip->flush_state == HAMMER_FST_IDLE);
KKASSERT(ip->cursor_ip_refs == 0);
- KKASSERT(ip->lock.lockcount == 0);
+ KKASSERT(hammer_notlocked(&ip->lock));
KKASSERT((ip->flags & HAMMER_INODE_MODMASK) == 0);
KKASSERT(RB_EMPTY(&ip->rec_tree));
phys_offset = volume->ondisk->vol_buf_beg +
(zone2_offset & HAMMER_OFF_SHORT_MASK);
crit_enter();
- if ((bp = findblk(volume->devvp, phys_offset)) != NULL)
+ if ((bp = findblk(volume->devvp, phys_offset, FINDBLK_TEST)) != NULL)
bp = getblk(volume->devvp, phys_offset, bp->b_bufsize, 0, 0);
else
bp = getblk(volume->devvp, phys_offset, HAMMER_BUFSIZE, 0, 0);
hammer_ref(&ip->lock);
if (hammer_get_vnode(ip, &vp) == 0) {
- if ((bp = findblk(ip->vp, file_offset)) != NULL &&
+ if ((bp = findblk(ip->vp, file_offset, FINDBLK_TEST)) != NULL &&
bp->b_bio2.bio_offset != NOOFFSET) {
bp = getblk(ip->vp, file_offset, blksize, 0, 0);
bp->b_bio2.bio_offset = NOOFFSET;
hammer_lock_ex_ident(struct hammer_lock *lock, const char *ident)
{
thread_t td = curthread;
+ u_int lv;
+ u_int nlv;
KKASSERT(lock->refs > 0);
- crit_enter();
- if (lock->locktd != td) {
- while (lock->locktd != NULL || lock->lockcount) {
- ++lock->exwanted;
- lock->wanted = 1;
+ for (;;) {
+ lv = lock->lockval;
+
+ if (lv == 0) {
+ nlv = 1 | HAMMER_LOCKF_EXCLUSIVE;
+ if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
+ lock->owner = td;
+ break;
+ }
+ } else if ((lv & HAMMER_LOCKF_EXCLUSIVE) && lock->owner == td) {
+ nlv = (lv + 1);
+ if (atomic_cmpset_int(&lock->lockval, lv, nlv))
+ break;
+ } else {
if (hammer_debug_locks) {
kprintf("hammer_lock_ex: held by %p\n",
- lock->locktd);
+ lock->owner);
}
+ nlv = lv | HAMMER_LOCKF_WANTED;
++hammer_contention_count;
- tsleep(lock, 0, ident, 0);
- if (hammer_debug_locks)
- kprintf("hammer_lock_ex: try again\n");
- --lock->exwanted;
+ crit_enter();
+ tsleep_interlock(lock);
+ if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
+ tsleep(lock, 0, ident, 0);
+ if (hammer_debug_locks)
+ kprintf("hammer_lock_ex: try again\n");
+ }
+ crit_exit();
}
- lock->locktd = td;
}
- KKASSERT(lock->lockcount >= 0);
- ++lock->lockcount;
- crit_exit();
}
/*
hammer_lock_ex_try(struct hammer_lock *lock)
{
thread_t td = curthread;
+ int error;
+ u_int lv;
+ u_int nlv;
KKASSERT(lock->refs > 0);
- crit_enter();
- if (lock->locktd != td) {
- if (lock->locktd != NULL || lock->lockcount) {
- crit_exit();
- return(EAGAIN);
+ for (;;) {
+ lv = lock->lockval;
+
+ if (lv == 0) {
+ nlv = 1 | HAMMER_LOCKF_EXCLUSIVE;
+ if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
+ lock->owner = td;
+ error = 0;
+ break;
+ }
+ } else if ((lv & HAMMER_LOCKF_EXCLUSIVE) && lock->owner == td) {
+ nlv = (lv + 1);
+ if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
+ error = 0;
+ break;
+ }
+ } else {
+ error = EAGAIN;
+ break;
}
- lock->locktd = td;
}
- KKASSERT(lock->lockcount >= 0);
- ++lock->lockcount;
- crit_exit();
- return(0);
+ return (error);
}
/*
void
hammer_lock_sh(struct hammer_lock *lock)
{
+ thread_t td = curthread;
+ u_int lv;
+ u_int nlv;
+
KKASSERT(lock->refs > 0);
- crit_enter();
- while (lock->locktd != NULL) {
- if (lock->locktd == curthread) {
- Debugger("hammer_lock_sh: lock_sh on exclusive");
- ++lock->lockcount;
+ for (;;) {
+ lv = lock->lockval;
+
+ if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) {
+ nlv = (lv + 1);
+ if (atomic_cmpset_int(&lock->lockval, lv, nlv))
+ break;
+ } else if (lock->owner == td) {
+ /*
+ * Disallowed case, drop into kernel debugger for
+ * now. A cont continues w/ an exclusive lock.
+ */
+ nlv = (lv + 1);
+ if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
+ Debugger("hammer_lock_sh: already hold ex");
+ break;
+ }
+ } else {
+ nlv = lv | HAMMER_LOCKF_WANTED;
+ ++hammer_contention_count;
+ crit_enter();
+ tsleep_interlock(lock);
+ if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
+ tsleep(lock, 0, "hmrlck", 0);
+ }
crit_exit();
- return;
}
- lock->wanted = 1;
- tsleep(lock, 0, "hmrlck", 0);
}
- KKASSERT(lock->lockcount <= 0);
- --lock->lockcount;
- crit_exit();
}
int
hammer_lock_sh_try(struct hammer_lock *lock)
{
+ thread_t td = curthread;
+ u_int lv;
+ u_int nlv;
+ int error;
+
KKASSERT(lock->refs > 0);
- crit_enter();
- if (lock->locktd) {
- crit_exit();
- return(EAGAIN);
+ for (;;) {
+ lv = lock->lockval;
+
+ if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) {
+ nlv = (lv + 1);
+ if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
+ error = 0;
+ break;
+ }
+ } else if (lock->owner == td) {
+ /*
+ * Disallowed case, drop into kernel debugger for
+ * now. A cont continues w/ an exclusive lock.
+ */
+ nlv = (lv + 1);
+ if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
+ Debugger("hammer_lock_sh: already hold ex");
+ error = 0;
+ break;
+ }
+ } else {
+ error = EAGAIN;
+ break;
+ }
}
- KKASSERT(lock->lockcount <= 0);
- --lock->lockcount;
- crit_exit();
- return(0);
+ return (error);
}
/*
int
hammer_lock_upgrade(struct hammer_lock *lock)
{
+ thread_t td = curthread;
+ u_int lv;
+ u_int nlv;
int error;
- crit_enter();
- if (lock->lockcount > 0) {
- if (lock->locktd != curthread)
- panic("hammer_lock_upgrade: illegal lock state");
- error = 0;
- } else if (lock->lockcount == -1) {
- lock->lockcount = 1;
- lock->locktd = curthread;
- error = 0;
- } else if (lock->lockcount != 0) {
- error = EDEADLK;
- } else {
- panic("hammer_lock_upgrade: lock is not held");
- /* NOT REACHED */
- error = 0;
+ for (;;) {
+ lv = lock->lockval;
+
+ if ((lv & ~HAMMER_LOCKF_WANTED) == 1) {
+ nlv = lv | HAMMER_LOCKF_EXCLUSIVE;
+ if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
+ lock->owner = td;
+ error = 0;
+ break;
+ }
+ } else if (lv & HAMMER_LOCKF_EXCLUSIVE) {
+ if (lock->owner != curthread)
+ panic("hammer_lock_upgrade: illegal state");
+ error = 0;
+ break;
+ } else if ((lv & ~HAMMER_LOCKF_WANTED) == 0) {
+ panic("hammer_lock_upgrade: lock is not held");
+ /* NOT REACHED */
+ error = EDEADLK;
+ break;
+ } else {
+ error = EDEADLK;
+ break;
+ }
}
- crit_exit();
- return(error);
+ return (error);
}
/*
void
hammer_lock_downgrade(struct hammer_lock *lock)
{
- KKASSERT(lock->lockcount == 1 && lock->locktd == curthread);
- crit_enter();
- lock->lockcount = -1;
- lock->locktd = NULL;
- if (lock->wanted) {
- lock->wanted = 0;
- wakeup(lock);
+ thread_t td = curthread;
+ u_int lv;
+ u_int nlv;
+
+ KKASSERT((lock->lockval & ~HAMMER_LOCKF_WANTED) ==
+ (HAMMER_LOCKF_EXCLUSIVE | 1));
+ KKASSERT(lock->owner == td);
+
+ /*
+ * NOTE: Must clear owner before releasing exclusivity
+ */
+ lock->owner = NULL;
+
+ for (;;) {
+ lv = lock->lockval;
+ nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED);
+ if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
+ if (lv & HAMMER_LOCKF_WANTED)
+ wakeup(lock);
+ break;
+ }
}
- crit_exit();
- /* XXX memory barrier */
}
void
hammer_unlock(struct hammer_lock *lock)
{
- crit_enter();
- KKASSERT(lock->lockcount != 0);
- if (lock->lockcount < 0) {
- if (++lock->lockcount == 0 && lock->wanted) {
- lock->wanted = 0;
- wakeup(lock);
- }
- } else {
- KKASSERT(lock->locktd == curthread);
- if (--lock->lockcount == 0) {
- lock->locktd = NULL;
- if (lock->wanted) {
- lock->wanted = 0;
- wakeup(lock);
+ thread_t td = curthread;
+ u_int lv;
+ u_int nlv;
+
+ lv = lock->lockval;
+ KKASSERT(lv != 0);
+ if (lv & HAMMER_LOCKF_EXCLUSIVE)
+ KKASSERT(lock->owner == td);
+
+ for (;;) {
+ lv = lock->lockval;
+ nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED);
+ if (nlv > 1) {
+ nlv = lv - 1;
+ if (atomic_cmpset_int(&lock->lockval, lv, nlv))
+ break;
+ } else if (nlv == 1) {
+ nlv = 0;
+ if (lv & HAMMER_LOCKF_EXCLUSIVE)
+ lock->owner = NULL;
+ if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
+ if (lv & HAMMER_LOCKF_WANTED)
+ wakeup(lock);
+ break;
}
+ } else {
+ panic("hammer_unlock: lock %p is not held", lock);
}
-
}
- crit_exit();
}
/*
int
hammer_lock_status(struct hammer_lock *lock)
{
- if (lock->lockcount < 0)
- return(-1);
- if (lock->lockcount > 0)
+ u_int lv = lock->lockval;
+
+ if (lv & HAMMER_LOCKF_EXCLUSIVE)
return(1);
+ else if (lv)
+ return(-1);
panic("hammer_lock_status: lock must be held: %p", lock);
}
hammer_ref(struct hammer_lock *lock)
{
KKASSERT(lock->refs >= 0);
- crit_enter();
- ++lock->refs;
- crit_exit();
+ atomic_add_int(&lock->refs, 1);
}
void
hammer_unref(struct hammer_lock *lock)
{
KKASSERT(lock->refs > 0);
- crit_enter();
- --lock->refs;
- crit_exit();
+ atomic_subtract_int(&lock->refs, 1);
}
/*
/*
* hammer_vop_read { vp, uio, ioflag, cred }
+ *
+ * MPALMOSTSAFE
*/
static
int
int seqcount;
int ioseqcount;
int blksize;
+ int got_mplock;
if (ap->a_vp->v_type != VREG)
return (EINVAL);
if (seqcount < ioseqcount)
seqcount = ioseqcount;
- hammer_start_transaction(&trans, ip->hmp);
+ if (curthread->td_mpcount) {
+ got_mplock = -1;
+ hammer_start_transaction(&trans, ip->hmp);
+ } else {
+ got_mplock = 0;
+ }
/*
* Access the data typically in HAMMER_BUFSIZE blocks via the
* buffer cache, but HAMMER may use a variable block size based
* on the offset.
+ *
+ * XXX Temporary hack, delay the start transaction while we remain
+ * MPSAFE. NOTE: ino_data.size cannot change while vnode is
+ * locked-shared.
*/
while (uio->uio_resid > 0 && uio->uio_offset < ip->ino_data.size) {
int64_t base_offset;
offset = (int)uio->uio_offset & (blksize - 1);
base_offset = uio->uio_offset - offset;
+ /*
+ * MPSAFE
+ */
+ bp = getcacheblk(ap->a_vp, base_offset);
+ if (bp) {
+ error = 0;
+ goto skip;
+ }
+
+ /*
+ * MPUNSAFE
+ */
+ if (got_mplock == 0) {
+ got_mplock = 1;
+ get_mplock();
+ hammer_start_transaction(&trans, ip->hmp);
+ }
+
if (hammer_cluster_enable) {
/*
* Use file_limit to prevent cluster_read() from
brelse(bp);
break;
}
+skip:
/* bp->b_flags |= B_CLUSTEROK; temporarily disabled */
n = blksize - offset;
break;
hammer_stats_file_read += n;
}
- if ((ip->flags & HAMMER_INODE_RO) == 0 &&
- (ip->hmp->mp->mnt_flag & MNT_NOATIME) == 0) {
- ip->ino_data.atime = trans.time;
- hammer_modify_inode(ip, HAMMER_INODE_ATIME);
+
+ /*
+ * XXX only update the atime if we had to get the MP lock.
+ * XXX hack hack hack, fixme.
+ */
+ if (got_mplock) {
+ if ((ip->flags & HAMMER_INODE_RO) == 0 &&
+ (ip->hmp->mp->mnt_flag & MNT_NOATIME) == 0) {
+ ip->ino_data.atime = trans.time;
+ hammer_modify_inode(ip, HAMMER_INODE_ATIME);
+ }
+ hammer_done_transaction(&trans);
+ if (got_mplock > 0)
+ rel_mplock();
}
- hammer_done_transaction(&trans);
return (error);
}
* historically we fake the atime field to ensure consistent results.
* The atime field is stored in the B-Tree element and allowed to be
* updated without cycling the element.
+ *
+ * MPSAFE
*/
static
int
* mount structure.
*/
++hammer_stats_file_iopsr;
+ hammer_lock_sh(&ip->lock);
vap->va_fsid = ip->pfsm->fsid_udev ^ (u_int32_t)ip->obj_asof ^
(u_int32_t)(ip->obj_asof >> 32);
default:
break;
}
+ hammer_unlock(&ip->lock);
return(0);
}
{
struct vnode *vp = ap->a_vp;
struct knote *kn = ap->a_kn;
- lwkt_tokref ilock;
+ lwkt_tokref vlock;
switch (kn->kn_filter) {
case EVFILT_READ:
kn->kn_hook = (caddr_t)vp;
- lwkt_gettoken(&ilock, &vp->v_pollinfo.vpi_token);
+ lwkt_gettoken(&vlock, &vp->v_token);
SLIST_INSERT_HEAD(&vp->v_pollinfo.vpi_selinfo.si_note, kn, kn_selnext);
- lwkt_reltoken(&ilock);
+ lwkt_reltoken(&vlock);
return(0);
}
filt_hammerdetach(struct knote *kn)
{
struct vnode *vp = (void *)kn->kn_hook;
- lwkt_tokref ilock;
+ lwkt_tokref vlock;
- lwkt_gettoken(&ilock, &vp->v_pollinfo.vpi_token);
+ lwkt_gettoken(&vlock, &vp->v_token);
SLIST_REMOVE(&vp->v_pollinfo.vpi_selinfo.si_note,
kn, knote, kn_selnext);
- lwkt_reltoken(&ilock);
+ lwkt_reltoken(&vlock);
}
static int
kprintf("cleanblkhd %p, dirtyblkhd %p, numoutput %d, type %d\n",
RB_ROOT(&vp->v_rbclean_tree),
RB_ROOT(&vp->v_rbdirty_tree),
- vp->v_track_write.bk_active, vp->v_type);
+ bio_track_active(&vp->v_track_write),
+ vp->v_type);
kprintf("union %p, tag %d, data[0] %08x, data[1] %08x\n",
vp->v_socket, vp->v_tag,
((u_int *)vp->v_data)[0],
(off_t)(lbn + 1 + nra) * biosize < np->n_size; nra++) {
rabn = lbn + 1 + nra;
raoffset = (off_t)rabn * biosize;
- if (!findblk(vp, raoffset)) {
+ if (findblk(vp, raoffset, FINDBLK_TEST) == NULL) {
rabp = nfs_getcacheblk(vp, raoffset, biosize, td);
if (!rabp)
return (EINTR);
(np->n_direofoffset == 0 ||
loffset + NFS_DIRBLKSIZ < np->n_direofoffset) &&
(np->n_flag & NDONTCACHE) == 0 &&
- !findblk(vp, loffset + NFS_DIRBLKSIZ)) {
+ findblk(vp, loffset + NFS_DIRBLKSIZ, FINDBLK_TEST) == NULL
+ ) {
rabp = nfs_getcacheblk(vp, loffset + NFS_DIRBLKSIZ,
&nbs