if (lock != &sim_mplock) {
/* lock should be held already */
KKASSERT(lockstatus(lock, curthread) != 0);
- crit_enter();
- tsleep_interlock(ident);
+ tsleep_interlock(ident, flags);
lockmgr(lock, LK_RELEASE);
retval = tsleep(ident, flags | PINTERLOCKED, wmesg, timo);
} else {
if (lock != &sim_mplock) {
lockmgr(lock, LK_EXCLUSIVE);
- crit_exit();
}
return (retval);
#endif
}
xsoftc.ccb_scanq_running = 0;
- crit_enter();
- tsleep_interlock(&xsoftc.ccb_scanq);
+ tsleep_interlock(&xsoftc.ccb_scanq, 0);
xpt_unlock_buses();
tsleep(&xsoftc.ccb_scanq, PINTERLOCKED, "ccb_scanq", 0);
- crit_exit();
}
}
while ((mask & AP_SIGF_STOP) == 0) {
atomic_clear_int(&ap->ap_signal, mask);
ahci_port_thread_core(ap, mask);
- crit_enter();
- tsleep_interlock(&ap->ap_thread);
+ tsleep_interlock(&ap->ap_thread, 0);
if (ap->ap_signal == 0)
tsleep(&ap->ap_thread, PINTERLOCKED, "ahport", 0);
- crit_exit();
mask = ap->ap_signal;
}
ap->ap_thread = NULL;
* Sleep on a slightly different location
* for this interlock just for added safety.
*/
- crit_enter();
aic_lock(aic);
- tsleep_interlock(aic->platform_data);
+ tsleep_interlock(aic->platform_data, 0);
aic_unlock(aic);
tsleep(aic->platform_data, PINTERLOCKED, "thtrm", 0);
- crit_exit();
}
static void
if (LIST_EMPTY(&aic->timedout_scbs) != 0
&& (aic->flags & AIC_SHUTDOWN_RECOVERY) == 0) {
- crit_enter();
- tsleep_interlock(aic);
+ tsleep_interlock(aic, 0);
aic_unlock(aic);
tsleep(aic, PINTERLOCKED, "idle", 0);
aic_lock(aic);
- crit_exit();
}
if ((aic->flags & AIC_SHUTDOWN_RECOVERY) != 0)
/* prototypes */
static void ar_attach_raid(struct ar_softc *, int);
static void ar_done(struct bio *);
-static void ar_sync_done(struct bio *);
static void ar_config_changed(struct ar_softc *, int);
static int ar_rebuild(struct ar_softc *);
static int ar_highpoint_read_conf(struct ad_softc *, struct ar_softc **);
}
static void
-ar_sync_done(struct bio *bio)
-{
- bio->bio_buf->b_cmd = BUF_CMD_DONE;
- wakeup(bio);
-}
-
-static void
ar_config_changed(struct ar_softc *rdp, int writeback)
{
int disk, flags;
bp->b_data = data;
bp->b_bio1.bio_offset = (off_t)lba << DEV_BSHIFT;
bp->b_bcount = count;
- if (flags & AR_WAIT)
- bp->b_bio1.bio_done = ar_sync_done;
- else
+ if (flags & AR_WAIT) {
+ bp->b_bio1.bio_flags |= BIO_SYNC;
+ bp->b_bio1.bio_done = biodone_sync;
+ } else {
bp->b_bio1.bio_done = ar_rw_done;
+ }
if (flags & AR_READ)
bp->b_cmd = BUF_CMD_READ;
if (flags & AR_WRITE)
dev_dstrategy(adp->dev, &bp->b_bio1);
if (flags & AR_WAIT) {
- while ((retry++ < (15*hz/10)) && (error = !(bp->b_cmd == BUF_CMD_DONE)))
- error = tsleep(&bp->b_bio1, 0, "arrw", 10);
+ while (retry++ < (15*hz/10))
+ error = biowait_timeout(&bp->b_bio1, "arrw", 10);
if (!error && (bp->b_flags & B_ERROR))
error = bp->b_error;
- kfree(bp, M_AR);
+ if (error == EWOULDBLOCK)
+ bp->b_bio1.bio_done = ar_rw_done;
+ else
+ kfree(bp, M_AR);
}
return error;
}
return (1);
}
-static void
-fdformat_wakeup(struct bio *bio)
-{
- bio->bio_buf->b_cmd = BUF_CMD_DONE;
- wakeup(bio);
-}
-
static int
fdformat(cdev_t dev, struct fd_formb *finfo, struct ucred *cred)
{
(fd->ft.sectrac * fd->ft.heads)
+ finfo->head * fd->ft.sectrac) * fdblk;
bp->b_bio1.bio_driver_info = dev;
- bp->b_bio1.bio_done = fdformat_wakeup;
+ bp->b_bio1.bio_flags |= BIO_SYNC;
+ bp->b_bio1.bio_done = biodone_sync;
bp->b_bcount = sizeof(struct fd_idfield_data) * finfo->fd_formb_nsecs;
bp->b_data = (caddr_t)finfo;
dev_dstrategy(dev, &bp->b_bio1);
/* ...and wait for it to complete */
- crit_enter();
- while (bp->b_cmd != BUF_CMD_DONE) {
- rv = tsleep(&bp->b_bio1, 0, "fdform", 20 * hz);
- if (rv == EWOULDBLOCK)
- break;
- }
- crit_exit();
-
+ rv = biowait_timeout(&bp->b_bio1, "fdform", 20 * hz);
if (rv == EWOULDBLOCK) {
/* timed out */
rv = EIO;
/* bio_offset is byte granularity, convert block granularity a_blkno */
dbuf.b_bio1.bio_offset = (off_t)(ap->a_blkno << DEV_BSHIFT);
dbuf.b_bio1.bio_caller_info1.ptr = (void *)rdp;
+ dbuf.b_bio1.bio_flags |= BIO_SYNC;
+ dbuf.b_bio1.bio_done = biodone_sync;
dbuf.b_bcount = dumppages * PAGE_SIZE;
dbuf.b_data = va;
dbuf.b_cmd = BUF_CMD_WRITE;
dev_dstrategy(rdp->cdev, &dbuf.b_bio1);
/* wait for completion, unlock the buffer, check status */
- if (biowait(&dbuf)) {
+ if (biowait(&dbuf.b_bio1, "dumpw")) {
BUF_UNLOCK(&dbuf);
return(dbuf.b_error ? dbuf.b_error : EIO);
}
while ((mask & AP_SIGF_STOP) == 0) {
atomic_clear_int(&ap->ap_signal, mask);
sili_port_thread_core(ap, mask);
- crit_enter();
- tsleep_interlock(&ap->ap_thread);
+ tsleep_interlock(&ap->ap_thread, 0);
if (ap->ap_signal == 0)
tsleep(&ap->ap_thread, PINTERLOCKED, "ahport", 0);
- crit_exit();
mask = ap->ap_signal;
}
ap->ap_thread = NULL;
* Run strategy routine for VN device. We use VOP_READ/VOP_WRITE calls
* for vnode-backed vn's, and the new vm_pager_strategy() call for
* vm_object-backed vn's.
- *
- * Currently B_ASYNC is only partially handled - for OBJT_SWAP I/O only.
*/
static int
vnstrategy(struct dev_strategy_args *ap)
DRM_UNLOCK(); \
lwkt_serialize_enter(&dev->irq_lock); \
if (!(condition)) { \
- crit_enter(); \
- tsleep_interlock(&(queue)); \
+ tsleep_interlock(&(queue), PCATCH); \
lwkt_serialize_exit(&dev->irq_lock); \
ret = -tsleep(&(queue), PCATCH | PINTERLOCKED, \
"drmwtq", (timeout)); \
- crit_exit(); \
} else { \
lwkt_serialize_exit(&dev->irq_lock); \
} \
break; /* Got lock */
}
/* Contention */
- crit_enter();
- tsleep_interlock((void *)&dev->lock.lock_queue);
+ tsleep_interlock((void *)&dev->lock.lock_queue, PCATCH);
DRM_UNLOCK();
retcode = tsleep((void *)&dev->lock.lock_queue,
- PCATCH | PINTERLOCKED, "drmlk2", 0);
- crit_exit();
+ PCATCH | PINTERLOCKED, "drmlk2", 0);
DRM_LOCK();
if (retcode)
break;
}
/* Contention */
- crit_enter();
- tsleep_interlock((void *)&dev->lock.lock_queue);
+ tsleep_interlock((void *)&dev->lock.lock_queue, PCATCH);
DRM_UNLOCK();
ret = tsleep((void *)&dev->lock.lock_queue,
PCATCH | PINTERLOCKED, "drmlk2", 0);
- crit_exit();
DRM_LOCK();
if (ret != 0)
break;
if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600) {
while ((ret = r600_do_cp_idle(dev_priv)) != 0) {
DRM_DEBUG("radeon_do_cp_idle %d\n", ret);
- crit_enter();
- tsleep_interlock(&dev->lock.lock_queue);
+ tsleep_interlock(&dev->lock.lock_queue,
+ PCATCH);
DRM_UNLOCK();
ret = tsleep(&dev->lock.lock_queue,
PCATCH | PINTERLOCKED,
"rdnrel", 0);
- crit_exit();
DRM_LOCK();
}
} else {
while ((ret = radeon_do_cp_idle(dev_priv)) != 0) {
DRM_DEBUG("radeon_do_cp_idle %d\n", ret);
- crit_enter();
- tsleep_interlock(&dev->lock.lock_queue);
+ tsleep_interlock(&dev->lock.lock_queue,
+ PCATCH);
DRM_UNLOCK();
ret = tsleep(&dev->lock.lock_queue,
PCATCH | PINTERLOCKED,
"rdnrel", 0);
- crit_exit();
DRM_LOCK();
}
}
* especially when attaching fails.
*/
if ((sc->flags & IWI_FLAG_EXIT) == 0) {
- crit_enter();
- tsleep_interlock(IWI_FW_WAKE_MONITOR(sc));
+ tsleep_interlock(IWI_FW_WAKE_MONITOR(sc), 0);
lwkt_serialize_exit(ifp->if_serializer);
error = tsleep(IWI_FW_WAKE_MONITOR(sc),
PINTERLOCKED, "iwifwm", 0);
- crit_exit();
lwkt_serialize_enter(ifp->if_serializer);
}
if (sc->flags & IWI_FLAG_EXIT)
break;
- crit_enter();
- tsleep_interlock(IWI_FW_CMD_ACKED(sc));
+ tsleep_interlock(IWI_FW_CMD_ACKED(sc), 0);
lwkt_serialize_exit(ifp->if_serializer);
error = tsleep(IWI_FW_CMD_ACKED(sc),
PINTERLOCKED,
"iwirun", boff * hz);
- crit_exit();
lwkt_serialize_enter(ifp->if_serializer);
}
}
sc->flags |= IWI_FLAG_EXIT;
wakeup(IWI_FW_WAKE_MONITOR(sc));
- crit_enter();
- tsleep_interlock(IWI_FW_EXIT_MONITOR(sc));
+ tsleep_interlock(IWI_FW_EXIT_MONITOR(sc), 0);
lwkt_serialize_exit(ifp->if_serializer);
tsleep(IWI_FW_EXIT_MONITOR(sc), PINTERLOCKED, "iwiexi", 0);
- crit_exit();
/* No need to hold serializer again */
if_printf(ifp, "fw monitor exited\n");
if (!async) {
ASSERT_SERIALIZED(ifp->if_serializer);
- crit_enter();
- tsleep_interlock(IWI_FW_CMD_ACKED(sc));
+ tsleep_interlock(IWI_FW_CMD_ACKED(sc), 0);
lwkt_serialize_exit(ifp->if_serializer);
ret = tsleep(IWI_FW_CMD_ACKED(sc), PINTERLOCKED, "iwicmd", hz);
- crit_exit();
lwkt_serialize_enter(ifp->if_serializer);
} else {
ret = 0;
CSR_WRITE_4(sc, IWI_CSR_CTL, tmp | IWI_CTL_ALLOW_STANDBY);
/* wait at most one second for firmware initialization to complete */
- crit_enter();
- tsleep_interlock(IWI_FW_INITIALIZED(sc));
+ tsleep_interlock(IWI_FW_INITIALIZED(sc), 0);
lwkt_serialize_exit(ifp->if_serializer);
error = tsleep(IWI_FW_INITIALIZED(sc), PINTERLOCKED, "iwiinit", hz);
- crit_exit();
lwkt_serialize_enter(ifp->if_serializer);
if (error != 0) {
device_printf(sc->sc_dev, "timeout waiting for firmware "
while ((sc->aifflags & AAC_AIFFLAGS_EXIT) == 0) {
retval = 0;
if ((sc->aifflags & AAC_AIFFLAGS_PENDING) == 0) {
- crit_enter();
- tsleep_interlock(sc->aifthread);
+ tsleep_interlock(sc->aifthread, 0);
AAC_LOCK_RELEASE(&sc->aac_io_lock);
retval = tsleep(sc->aifthread, PINTERLOCKED,
"aifthd", AAC_PERIODIC_INTERVAL * hz);
AAC_LOCK_ACQUIRE(&sc->aac_io_lock);
- crit_exit();
}
/*
* First see if any FIBs need to be allocated. This needs
aac_startio(sc);
/* Lock is held */
KKASSERT(lockstatus(&sc->aac_io_lock, curthread) != 0);
- crit_enter();
- tsleep_interlock(cm);
+ tsleep_interlock(cm, 0);
AAC_LOCK_RELEASE(&sc->aac_io_lock);
error = tsleep(cm, PINTERLOCKED, "aacwait", 0);
AAC_LOCK_ACQUIRE(&sc->aac_io_lock);
- crit_exit();
return(error);
}
event->ev_callback = aac_ioctl_event;
event->ev_arg = &cm;
aac_add_event(sc, event);
- crit_enter();
- tsleep_interlock(&cm);
+ tsleep_interlock(&cm, 0);
AAC_LOCK_RELEASE(&sc->aac_io_lock);
tsleep(&cm, PINTERLOCKED, "sendfib", 0);
AAC_LOCK_ACQUIRE(&sc->aac_io_lock);
- crit_exit();
}
AAC_LOCK_RELEASE(&sc->aac_io_lock);
SD[sbp->sdno].writes++;
SD[sbp->sdno].bytes_written += sbp->b.b_bcount;
}
+ biodone_sync(bio);
biodone(sbp->bio); /* complete the caller's I/O */
BUF_UNLOCK(&sbp->b);
BUF_LOCKFREE(&sbp->b);
bp = geteblk(len); /* get a buffer header */
bp->b_cmd = cmd;
bp->b_bio1.bio_offset = offset; /* disk offset */
+ bp->b_bio1.bio_done = biodone_sync;
+ bp->b_bio1.bio_flags |= BIO_SYNC;
saveaddr = bp->b_data;
bp->b_data = buf;
bp->b_bcount = len;
vn_strategy(drive->vp, &bp->b_bio1);
- error = biowait(bp);
+ error = biowait(&bp->b_bio1, (cmd == BUF_CMD_READ ? "drvrd" : "drvwr"));
bp->b_data = saveaddr;
bp->b_flags |= B_INVAL | B_AGE;
bp->b_flags &= ~B_ERROR;
/* Initialize the buf struct */
/* copy these flags from user bp */
- bp->b_flags = ubp->b_flags & (B_ORDERED | B_NOCACHE | B_ASYNC);
+ bp->b_flags = ubp->b_flags & (B_ORDERED | B_NOCACHE);
bp->b_cmd = ubp->b_cmd;
#ifdef VINUMDEBUG
if (rqe->flags & XFR_BUFLOCKED) /* paranoia */
initbufbio(&sbp->b);
sbp->b.b_bio1.bio_offset = bio->bio_offset + ((off_t)sd->driveoffset << DEV_BSHIFT);
sbp->b.b_bio1.bio_done = sdio_done; /* come here on completion */
+ sbp->b.b_bio1.bio_flags |= BIO_SYNC;
sbp->bio = bio; /* note the address of the original header */
sbp->sdno = sd->sdno; /* note for statistics */
sbp->driveno = sd->driveno;
bp->b_bcount = size;
bp->b_resid = bp->b_bcount;
bp->b_bio1.bio_offset = (off_t)plexblkno << DEV_BSHIFT; /* start here */
+ bp->b_bio1.bio_done = biodone_sync;
+ bp->b_bio1.bio_flags |= BIO_SYNC;
if (isstriped(plex)) /* we need to lock striped plexes */
lock = lockrange(plexblkno << DEV_BSHIFT, bp, plex); /* lock it */
if (vol != NULL) /* it's part of a volume, */
bp->b_cmd = BUF_CMD_READ;
vinumstart(dev, &bp->b_bio1, 1);
- biowait(bp);
+ biowait(&bp->b_bio1, "drvrd");
}
if (bp->b_flags & B_ERROR)
bp->b_bio1.bio_offset = (off_t)sd->revived << DEV_BSHIFT; /* write it to here */
bp->b_bio1.bio_driver_info = dev;
sdio(&bp->b_bio1); /* perform the I/O */
- biowait(bp);
+ biowait(&bp->b_bio1, "drvwr");
if (bp->b_flags & B_ERROR)
error = bp->b_error;
else {
pbp->b_cmd = BUF_CMD_WRITE;
pbp->b_resid = pbp->b_bcount;
sdio(&pbp->b_bio1); /* write the parity block */
- biowait(pbp);
+ biowait(&pbp->b_bio1, "drvwr");
}
if (((op == checkparity)
|| (op == rebuildandcheckparity))
*/
for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each subdisk */
if ((sdno != psd) || (op != rebuildparity)) {
- biowait(bpp[sdno]);
+ biowait(&bpp[sdno]->b_bio1, "drvio");
if (bpp[sdno]->b_flags & B_ERROR) /* can't read, */
error = bpp[sdno]->b_error;
else if (sdno != psd) { /* update parity */
bzero(bp->b_data, bp->b_bcount);
bp->b_cmd = BUF_CMD_WRITE;
sdio(&bp->b_bio1); /* perform the I/O */
- biowait(bp);
+ biowait(&bp->b_bio1, "drvwr");
if (bp->b_flags & B_ERROR)
error = bp->b_error;
if ((error == 0) && verify) { /* check that it got there */
bp->b_bio1.bio_driver_info = VINUM_SD(sdno);
bp->b_cmd = BUF_CMD_READ; /* read it back */
sdio(&bp->b_bio1);
- biowait(bp);
+ biowait(&bp->b_bio1, "drvrd");
/*
* XXX Bug fix code. This is hopefully no
* longer needed (21 February 2000).
{
int r;
- crit_enter();
- tsleep_interlock(addr);
+ tsleep_interlock(addr, flags);
snd_mtxunlock(lock);
r = tsleep(addr, flags | PINTERLOCKED, wmesg, timo);
snd_mtxlock(lock);
- crit_exit();
return(r);
}
track = &dev->si_track_write;
bio_track_ref(track);
bio->bio_track = track;
+ KKASSERT((bio->bio_flags & BIO_DONE) == 0);
(void)dev->si_ops->d_strategy(&ap);
}
{
struct dev_strategy_args ap;
- KKASSERT(bio->bio_track != NULL);
ap.a_head.a_desc = &dev_strategy_desc;
ap.a_head.a_dev = dev;
ap.a_bio = bio;
+
+ KKASSERT(bio->bio_track != NULL);
+ KKASSERT((bio->bio_flags & BIO_DONE) == 0);
(void)dev->si_ops->d_strategy(&ap);
}
if (lkp->lk_lockholder != td &&
lkp->lk_lockholder != LK_KERNTHREAD) {
spin_unlock_wr(&lkp->lk_spinlock);
- panic("lockmgr: pid %d, not %s thr %p unlocking",
- (td->td_proc ? td->td_proc->p_pid : -99),
+ panic("lockmgr: pid %d, not %s thr %p/%p unlocking",
+ (td->td_proc ? td->td_proc->p_pid : -1),
"exclusive lock holder",
- lkp->lk_lockholder);
+ td, lkp->lk_lockholder);
}
if (lkp->lk_lockholder != LK_KERNTHREAD) {
COUNT(td, -1);
#include <vm/vm.h>
#include <vm/vm_extern.h>
-static void
-physwakeup(struct bio *bio)
-{
- bio->bio_buf->b_cmd = BUF_CMD_DONE;
- wakeup(bio);
-}
-
static int
physio(cdev_t dev, struct uio *uio, int ioflag)
{
reinitbufbio(bp); /* clear translation cache */
bp->b_bio1.bio_offset = uio->uio_offset;
- bp->b_bio1.bio_done = physwakeup;
+ bp->b_bio1.bio_done = biodone_sync;
+ bp->b_bio1.bio_flags |= BIO_SYNC;
/*
* Setup for mapping the request into kernel memory.
bp->b_bcount = bcount;
}
dev_dstrategy(dev, &bp->b_bio1);
- crit_enter();
- while (bp->b_cmd != BUF_CMD_DONE)
- tsleep(&bp->b_bio1, 0, "physstr", 0);
- crit_exit();
+ biowait(&bp->b_bio1, "physstr");
iolen = bp->b_bcount - bp->b_resid;
if (uio->uio_segflg == UIO_USERSPACE) {
};
static void endtsleep (void *);
-static void unsleep_and_wakeup_thread(struct thread *td);
+static void tsleep_wakeup(struct thread *td);
static void loadav (void *arg);
static void schedcpu (void *arg);
}
/*
+ * This is a dandy function that allows us to interlock tsleep/wakeup
+ * operations with unspecified upper level locks, such as lockmgr locks,
+ * simply by holding a critical section. The sequence is:
+ *
+ * (acquire upper level lock)
+ * tsleep_interlock(blah)
+ * (release upper level lock)
+ * tsleep(blah, ...)
+ *
+ * Basically this functions queues us on the tsleep queue without actually
+ * descheduling us. When tsleep() is later called with PINTERLOCK it
+ * assumes the thread was already queued, otherwise it queues it there.
+ *
+ * Thus it is possible to receive the wakeup prior to going to sleep and
+ * the race conditions are covered.
+ */
+static __inline void
+_tsleep_interlock(globaldata_t gd, void *ident, int flags)
+{
+ thread_t td = gd->gd_curthread;
+ int id;
+
+ crit_enter_quick(td);
+ if (td->td_flags & TDF_TSLEEPQ) {
+ id = LOOKUP(td->td_wchan);
+ TAILQ_REMOVE(&gd->gd_tsleep_hash[id], td, td_sleepq);
+ if (TAILQ_FIRST(&gd->gd_tsleep_hash[id]) == NULL)
+ atomic_clear_int(&slpque_cpumasks[id], gd->gd_cpumask);
+ } else {
+ td->td_flags |= TDF_TSLEEPQ;
+ }
+ id = LOOKUP(ident);
+ TAILQ_INSERT_TAIL(&gd->gd_tsleep_hash[id], td, td_sleepq);
+ atomic_set_int(&slpque_cpumasks[id], gd->gd_cpumask);
+ td->td_wchan = ident;
+ td->td_wdomain = flags & PDOMAIN_MASK;
+ atomic_set_int(&slpque_cpumasks[id], gd->gd_cpumask);
+ crit_exit_quick(td);
+}
+
+void
+tsleep_interlock(void *ident, int flags)
+{
+ _tsleep_interlock(mycpu, ident, flags);
+}
+
+/*
+ * Remove thread from sleepq. Must be called with a critical section held.
+ */
+static __inline void
+_tsleep_remove(thread_t td)
+{
+ globaldata_t gd = mycpu;
+ int id;
+
+ KKASSERT(td->td_gd == gd);
+ if (td->td_flags & TDF_TSLEEPQ) {
+ td->td_flags &= ~TDF_TSLEEPQ;
+ id = LOOKUP(td->td_wchan);
+ TAILQ_REMOVE(&gd->gd_tsleep_hash[id], td, td_sleepq);
+ if (TAILQ_FIRST(&gd->gd_tsleep_hash[id]) == NULL)
+ atomic_clear_int(&slpque_cpumasks[id], gd->gd_cpumask);
+ td->td_wchan = NULL;
+ td->td_wdomain = 0;
+ }
+}
+
+void
+tsleep_remove(thread_t td)
+{
+ _tsleep_remove(td);
+}
+
+/*
+ * This function removes a thread from the tsleep queue and schedules
+ * it. This function may act asynchronously. The target thread may be
+ * sleeping on a different cpu.
+ *
+ * This function mus be called while in a critical section but if the
+ * target thread is sleeping on a different cpu we cannot safely probe
+ * td_flags.
+ */
+static __inline
+void
+_tsleep_wakeup(struct thread *td)
+{
+ globaldata_t gd = mycpu;
+
+#ifdef SMP
+ if (td->td_gd != gd) {
+ lwkt_send_ipiq(td->td_gd, (ipifunc1_t)tsleep_wakeup, td);
+ return;
+ }
+#endif
+ _tsleep_remove(td);
+ if (td->td_flags & TDF_TSLEEP_DESCHEDULED) {
+ td->td_flags &= ~TDF_TSLEEP_DESCHEDULED;
+ lwkt_schedule(td);
+ }
+}
+
+static
+void
+tsleep_wakeup(struct thread *td)
+{
+ _tsleep_wakeup(td);
+}
+
+
+/*
* General sleep call. Suspends the current process until a wakeup is
* performed on the specified identifier. The process will then be made
* runnable with the specified priority. Sleeps at most timo/hz seconds
* The entire sequence through to where we actually sleep must
* run without breaking the critical section.
*/
- id = LOOKUP(ident);
catch = flags & PCATCH;
error = 0;
sig = 0;
*
* Even the usched->release function just above can muff it up.
*/
- if ((flags & PINTERLOCKED) &&
- (slpque_cpumasks[id] & gd->gd_cpumask) == 0) {
- logtsleep2(ilockfail, ident);
- goto resume;
+ if (flags & PINTERLOCKED) {
+ if ((td->td_flags & TDF_TSLEEPQ) == 0) {
+ logtsleep2(ilockfail, ident);
+ goto resume;
+ }
+ } else {
+ id = LOOKUP(ident);
+ _tsleep_interlock(gd, ident, flags);
}
-
- /*
- * Move our thread to the correct queue and setup our wchan, etc.
- */
lwkt_deschedule_self(td);
- td->td_flags |= TDF_TSLEEPQ;
- TAILQ_INSERT_TAIL(&gd->gd_tsleep_hash[id], td, td_threadq);
- atomic_set_int(&slpque_cpumasks[id], gd->gd_cpumask);
-
- td->td_wchan = ident;
+ td->td_flags |= TDF_TSLEEP_DESCHEDULED;
td->td_wmesg = wmesg;
- td->td_wdomain = flags & PDOMAIN_MASK;
/*
* Setup the timeout, if any
}
/*
- * Since td_threadq is used both for our run queue AND for the
- * tsleep hash queue, we can't still be on it at this point because
- * we've gotten cpu back.
+ * Make sure we have been removed from the sleepq. This should
+ * have been done for us already.
*/
- KASSERT((td->td_flags & TDF_TSLEEPQ) == 0, ("tsleep: impossible thread flags %08x", td->td_flags));
- td->td_wchan = NULL;
+ _tsleep_remove(td);
td->td_wmesg = NULL;
- td->td_wdomain = 0;
+ if (td->td_flags & TDF_TSLEEP_DESCHEDULED) {
+ td->td_flags &= ~TDF_TSLEEP_DESCHEDULED;
+ kprintf("td %p (%s) unexpectedly rescheduled\n",
+ td, td->td_comm);
+ }
/*
* Figure out the correct error return. If interrupted by a
}
/*
- * This is a dandy function that allows us to interlock tsleep/wakeup
- * operations with unspecified upper level locks, such as lockmgr locks,
- * simply by holding a critical section. The sequence is:
- *
- * (enter critical section)
- * (acquire upper level lock)
- * tsleep_interlock(blah)
- * (release upper level lock)
- * tsleep(blah, ...)
- * (exit critical section)
- *
- * Basically this function sets our cpumask for the ident which informs
- * other cpus that our cpu 'might' be waiting (or about to wait on) the
- * hash index related to the ident. The critical section prevents another
- * cpu's wakeup() from being processed on our cpu until we are actually
- * able to enter the tsleep(). Thus, no race occurs between our attempt
- * to release a resource and sleep, and another cpu's attempt to acquire
- * a resource and call wakeup.
- *
- * There isn't much of a point to this function unless you call it while
- * holding a critical section.
- */
-static __inline void
-_tsleep_interlock(globaldata_t gd, void *ident)
-{
- int id = LOOKUP(ident);
-
- atomic_set_int(&slpque_cpumasks[id], gd->gd_cpumask);
-}
-
-void
-tsleep_interlock(void *ident)
-{
- _tsleep_interlock(mycpu, ident);
-}
-
-/*
* Interlocked spinlock sleep. An exclusively held spinlock must
* be passed to msleep(). The function will atomically release the
* spinlock and tsleep on the ident, then reacquire the spinlock and
globaldata_t gd = mycpu;
int error;
- crit_enter_gd(gd);
- _tsleep_interlock(gd, ident);
+ _tsleep_interlock(gd, ident, flags);
spin_unlock_wr_quick(gd, spin);
error = tsleep(ident, flags | PINTERLOCKED, wmesg, timo);
spin_lock_wr_quick(gd, spin);
- crit_exit_gd(gd);
return (error);
}
serialize_sleep(void *ident, struct lwkt_serialize *slz, int flags,
const char *wmesg, int timo)
{
+ globaldata_t gd = mycpu;
int ret;
ASSERT_SERIALIZED(slz);
- crit_enter();
- tsleep_interlock(ident);
+ _tsleep_interlock(gd, ident, flags);
lwkt_serialize_exit(slz);
ret = tsleep(ident, flags | PINTERLOCKED, wmesg, timo);
lwkt_serialize_enter(slz);
- crit_exit();
return ret;
}
*
* Setting TDF_SINTR will cause new signals to directly schedule us.
*
- * This routine is typically called while in a critical section.
+ * This routine must be called while in a critical section.
*/
int
lwkt_sleep(const char *wmesg, int flags)
* the cpu owning the thread. proc flags are only manipulated
* by the older of the MP lock. We have both.
*/
- if (td->td_flags & TDF_TSLEEPQ) {
+ if (td->td_flags & TDF_TSLEEP_DESCHEDULED) {
td->td_flags |= TDF_TIMEOUT;
if ((lp = td->td_lwp) != NULL) {
if (lp->lwp_proc->p_stat != SSTOP)
setrunnable(lp);
} else {
- unsleep_and_wakeup_thread(td);
+ _tsleep_wakeup(td);
}
}
crit_exit();
}
/*
- * Unsleep and wakeup a thread. This function runs without the MP lock
- * which means that it can only manipulate thread state on the owning cpu,
- * and cannot touch the process state at all.
- */
-static
-void
-unsleep_and_wakeup_thread(struct thread *td)
-{
- globaldata_t gd = mycpu;
- int id;
-
-#ifdef SMP
- if (td->td_gd != gd) {
- lwkt_send_ipiq(td->td_gd, (ipifunc1_t)unsleep_and_wakeup_thread, td);
- return;
- }
-#endif
- crit_enter();
- if (td->td_flags & TDF_TSLEEPQ) {
- td->td_flags &= ~TDF_TSLEEPQ;
- id = LOOKUP(td->td_wchan);
- TAILQ_REMOVE(&gd->gd_tsleep_hash[id], td, td_threadq);
- if (TAILQ_FIRST(&gd->gd_tsleep_hash[id]) == NULL)
- atomic_clear_int(&slpque_cpumasks[id], gd->gd_cpumask);
- lwkt_schedule(td);
- }
- crit_exit();
-}
-
-/*
* Make all processes sleeping on the specified identifier runnable.
* count may be zero or one only.
*
qp = &gd->gd_tsleep_hash[id];
restart:
for (td = TAILQ_FIRST(qp); td != NULL; td = ntd) {
- ntd = TAILQ_NEXT(td, td_threadq);
+ ntd = TAILQ_NEXT(td, td_sleepq);
if (td->td_wchan == ident &&
td->td_wdomain == (domain & PDOMAIN_MASK)
) {
- KKASSERT(td->td_flags & TDF_TSLEEPQ);
- td->td_flags &= ~TDF_TSLEEPQ;
- TAILQ_REMOVE(qp, td, td_threadq);
- if (TAILQ_FIRST(qp) == NULL) {
- atomic_clear_int(&slpque_cpumasks[id],
- gd->gd_cpumask);
+ KKASSERT(td->td_gd == gd);
+ _tsleep_remove(td);
+ if (td->td_flags & TDF_TSLEEP_DESCHEDULED) {
+ td->td_flags &= ~TDF_TSLEEP_DESCHEDULED;
+ lwkt_schedule(td);
+ if (domain & PWAKEUP_ONE)
+ goto done;
}
- lwkt_schedule(td);
- if (domain & PWAKEUP_ONE)
- goto done;
goto restart;
}
}
if (lp->lwp_stat == LSSTOP)
lp->lwp_stat = LSSLEEP;
if (lp->lwp_stat == LSSLEEP && (lp->lwp_flag & LWP_BREAKTSLEEP))
- unsleep_and_wakeup_thread(lp->lwp_thread);
+ _tsleep_wakeup(lp->lwp_thread);
crit_exit();
}
}
waddr = (void *)((intptr_t)VM_PAGE_TO_PHYS(m) + offset);
crit_enter();
- tsleep_interlock(waddr);
+ tsleep_interlock(waddr, PCATCH | PDOMAIN_UMTX);
if (*(int *)(sf_buf_kva(sf) + offset) == uap->value) {
vm_page_init_action(&action, umtx_sleep_page_action_cow, waddr);
vm_page_register_action(m, &action, VMEVENT_COW);
other_cpumask = mycpu->gd_other_cpus & smp_active_mask;
lwkt_send_ipiq_mask(other_cpumask, lwkt_sync_ipiq, &other_cpumask);
- crit_enter();
while (other_cpumask != 0) {
- tsleep_interlock(&other_cpumask);
+ tsleep_interlock(&other_cpumask, 0);
if (other_cpumask != 0)
tsleep(&other_cpumask, PINTERLOCKED, wmesg, 0);
}
- crit_exit();
}
#endif
lwkt_serialize_sleep(void *info)
{
lwkt_serialize_t s = info;
- crit_enter();
- tsleep_interlock(s);
+
+ tsleep_interlock(s, 0);
if (atomic_intr_cond_test(&s->interlock) != 0) {
logslz(sleep_beg, s);
tsleep(s, PINTERLOCKED, "slize", 0);
logslz(sleep_end, s);
}
- crit_exit();
}
#ifdef SMP
return;
}
- crit_enter();
- tsleep_interlock(s);
+ tsleep_interlock(s, 0);
if (atomic_intr_cond_test(&s->interlock) != 0) {
logslz(sleep_beg, s);
tsleep(s, PINTERLOCKED, "slize", 0);
logslz(sleep_end, s);
}
- crit_exit();
}
#endif /* SMP */
* At any point after lwkt_giveaway() is called, the target cpu may
* 'pull' the thread by calling lwkt_acquire().
*
+ * We have to make sure the thread is not sitting on a per-cpu tsleep
+ * queue or it will blow up when it moves to another cpu.
+ *
* MPSAFE - must be called under very specific conditions.
*/
void
globaldata_t gd = mycpu;
crit_enter_gd(gd);
+ if (td->td_flags & TDF_TSLEEPQ)
+ tsleep_remove(td);
KKASSERT(td->td_gd == gd);
TAILQ_REMOVE(&gd->gd_tdallq, td, td_allq);
td->td_flags |= TDF_MIGRATING;
* moving our thread to the tdallq of the target cpu, IPI messaging the
* target cpu, and switching out. TDF_MIGRATING prevents scheduling
* races while the thread is being migrated.
+ *
+ * We must be sure to remove ourselves from the current cpu's tsleepq
+ * before potentially moving to another queue. The thread can be on
+ * a tsleepq due to a left-over tsleep_interlock().
*/
#ifdef SMP
static void lwkt_setcpu_remote(void *arg);
if (td->td_gd != rgd) {
crit_enter_quick(td);
+ if (td->td_flags & TDF_TSLEEPQ)
+ tsleep_remove(td);
td->td_flags |= TDF_MIGRATING;
lwkt_deschedule_self(td);
TAILQ_REMOVE(&td->td_gd->gd_tdallq, td, td_allq);
lockmgr(&devsoftc.lock, LK_RELEASE);
return (EAGAIN);
}
- crit_enter();
- tsleep_interlock(&devsoftc);
+ tsleep_interlock(&devsoftc, PCATCH);
lockmgr(&devsoftc.lock, LK_RELEASE);
rv = tsleep(&devsoftc, PCATCH | PINTERLOCKED, "devctl", 0);
- crit_exit();
lockmgr(&devsoftc.lock, LK_EXCLUSIVE);
if (rv) {
/*
dname = dev_dname(wdev);
bp1 = geteblk((int)info->d_media_blksize);
bp1->b_bio1.bio_offset = info->d_media_blksize;
+ bp1->b_bio1.bio_done = biodone_sync;
+ bp1->b_bio1.bio_flags |= BIO_SYNC;
bp1->b_bcount = info->d_media_blksize;
bp1->b_cmd = BUF_CMD_READ;
dev_dstrategy(wdev, &bp1->b_bio1);
- if (biowait(bp1) != 0) {
+ if (biowait(&bp1->b_bio1, "gptrd") != 0) {
kprintf("%s: reading GPT @ block 1: error %d\n",
dname, bp1->b_error);
error = EIO;
*/
bp2 = geteblk((int)(table_blocks * info->d_media_blksize));
bp2->b_bio1.bio_offset = (off_t)table_lba * info->d_media_blksize;
+ bp2->b_bio1.bio_done = biodone_sync;
+ bp2->b_bio1.bio_flags |= BIO_SYNC;
bp2->b_bcount = table_blocks * info->d_media_blksize;
bp2->b_cmd = BUF_CMD_READ;
dev_dstrategy(wdev, &bp2->b_bio1);
- if (biowait(bp2) != 0) {
+ if (biowait(&bp2->b_bio1, "gptrd") != 0) {
kprintf("%s: reading GPT partition table @ %lld: error %d\n",
dname,
(long long)bp2->b_bio1.bio_offset,
bp = geteblk(secsize);
bp->b_bio1.bio_offset = (off_t)LABELSECTOR32 * secsize;
+ bp->b_bio1.bio_done = biodone_sync;
+ bp->b_bio1.bio_flags |= BIO_SYNC;
bp->b_bcount = secsize;
bp->b_flags &= ~B_INVAL;
bp->b_cmd = BUF_CMD_READ;
dev_dstrategy(dev, &bp->b_bio1);
- if (biowait(bp))
+ if (biowait(&bp->b_bio1, "labrd"))
msg = "I/O error";
else for (dlp = (struct disklabel32 *)bp->b_data;
dlp <= (struct disklabel32 *)((char *)bp->b_data +
return (EXDEV); /* not quite right */
bp = geteblk((int)lp->d_secsize);
bp->b_bio1.bio_offset = (off_t)LABELSECTOR32 * lp->d_secsize;
+ bp->b_bio1.bio_done = biodone_sync;
+ bp->b_bio1.bio_flags |= BIO_SYNC;
bp->b_bcount = lp->d_secsize;
#if 1
/*
bp->b_flags &= ~B_INVAL;
bp->b_cmd = BUF_CMD_READ;
dev_dstrategy(dkmodpart(dev, WHOLE_SLICE_PART), &bp->b_bio1);
- error = biowait(bp);
+ error = biowait(&bp->b_bio1, "labrd");
if (error)
goto done;
for (dlp = (struct disklabel32 *)bp->b_data;
error = EINVAL;
} else {
bp->b_cmd = BUF_CMD_WRITE;
+ bp->b_bio1.bio_done = biodone_sync;
+ bp->b_bio1.bio_flags |= BIO_SYNC;
dev_dstrategy(dkmodpart(dev, WHOLE_SLICE_PART),
&bp->b_bio1);
- error = biowait(bp);
+ error = biowait(&bp->b_bio1, "labwr");
}
goto done;
}
*dlp = *lp;
bp->b_flags &= ~B_INVAL;
bp->b_cmd = BUF_CMD_WRITE;
+ bp->b_bio1.bio_done = biodone_sync;
+ bp->b_bio1.bio_flags |= BIO_SYNC;
BUF_STRATEGY(bp, 1);
- error = biowait(bp);
+ error = biowait(&bp->b_bio1, "labwr");
#endif
bp->b_flags |= B_INVAL | B_AGE;
brelse(bp);
bp = geteblk(bpsize);
bp->b_bio1.bio_offset = 0;
+ bp->b_bio1.bio_done = biodone_sync;
+ bp->b_bio1.bio_flags |= BIO_SYNC;
bp->b_bcount = bpsize;
bp->b_flags &= ~B_INVAL;
bp->b_cmd = BUF_CMD_READ;
dev_dstrategy(dev, &bp->b_bio1);
- if (biowait(bp)) {
+ if (biowait(&bp->b_bio1, "labrd")) {
msg = "I/O error";
} else {
dlp = (struct disklabel64 *)bp->b_data;
bp = geteblk(bpsize);
bp->b_bio1.bio_offset = 0;
+ bp->b_bio1.bio_done = biodone_sync;
+ bp->b_bio1.bio_flags |= BIO_SYNC;
bp->b_bcount = bpsize;
/*
bp->b_flags &= ~B_INVAL;
bp->b_cmd = BUF_CMD_READ;
dev_dstrategy(dkmodpart(dev, WHOLE_SLICE_PART), &bp->b_bio1);
- error = biowait(bp);
+ error = biowait(&bp->b_bio1, "labrd");
if (error)
goto done;
bcopy(&lp->d_magic, &dlp->d_magic,
sizeof(*lp) - offsetof(struct disklabel64, d_magic));
bp->b_cmd = BUF_CMD_WRITE;
+ bp->b_bio1.bio_done = biodone_sync;
+ bp->b_bio1.bio_flags |= BIO_SYNC;
dev_dstrategy(dkmodpart(dev, WHOLE_SLICE_PART), &bp->b_bio1);
- error = biowait(bp);
+ error = biowait(&bp->b_bio1, "labwr");
done:
bp->b_flags |= B_INVAL | B_AGE;
brelse(bp);
wdev = dkmodpart(dkmodslice(dev, WHOLE_DISK_SLICE), WHOLE_SLICE_PART);
bp = geteblk((int)info->d_media_blksize);
bp->b_bio1.bio_offset = (off_t)mbr_offset * info->d_media_blksize;
+ bp->b_bio1.bio_done = biodone_sync;
+ bp->b_bio1.bio_flags |= BIO_SYNC;
bp->b_bcount = info->d_media_blksize;
bp->b_cmd = BUF_CMD_READ;
dev_dstrategy(wdev, &bp->b_bio1);
- if (biowait(bp) != 0) {
+ if (biowait(&bp->b_bio1, "mbrrd") != 0) {
diskerr(&bp->b_bio1, wdev,
"reading primary partition table: error",
LOG_PRINTF, 0);
/* Read extended boot record. */
bp = geteblk((int)info->d_media_blksize);
bp->b_bio1.bio_offset = (off_t)ext_offset * info->d_media_blksize;
+ bp->b_bio1.bio_done = biodone_sync;
+ bp->b_bio1.bio_flags |= BIO_SYNC;
bp->b_bcount = info->d_media_blksize;
bp->b_cmd = BUF_CMD_READ;
dev_dstrategy(dev, &bp->b_bio1);
- if (biowait(bp) != 0) {
+ if (biowait(&bp->b_bio1, "mbrrd") != 0) {
diskerr(&bp->b_bio1, dev,
"reading extended partition table: error",
LOG_PRINTF, 0);
* are held.
*/
rpipe->pipe_state |= PIPE_WANTR;
- crit_enter();
- tsleep_interlock(rpipe);
+ tsleep_interlock(rpipe, PCATCH);
lwkt_reltoken(&wlock);
error = tsleep(rpipe, PCATCH | PINTERLOCKED, "piperd", 0);
- crit_exit();
++pipe_rblocked_count;
if (error)
break;
bp->b_cmd = (cb->aio_lio_opcode == LIO_WRITE) ?
BUF_CMD_WRITE : BUF_CMD_READ;
bp->b_bio1.bio_done = aio_physwakeup;
+ bp->b_bio1.bio_flags |= BIO_SYNC;
bp->b_bio1.bio_offset = cb->aio_offset;
/* Bring buffer into kernel space. */
notify = 0;
crit_enter();
+#if 0
/*
* If we had an error invoking the request, or an error in processing
* the request before we have returned, we process it as an error in
notify = 1;
}
}
+#endif
crit_exit();
if (notify)
KNOTE(&aiocbe->klist, 0);
bp = iocb->bp;
- crit_enter();
- while (bp->b_cmd != BUF_CMD_DONE) {
- if (tsleep(bp, 0, "physstr", aiod_timeout)) {
- if (bp->b_cmd != BUF_CMD_DONE) {
- crit_exit();
- return EINPROGRESS;
- } else {
- break;
- }
- }
+ error = biowait_timeout(&bp->b_bio1, "physstr", aiod_timeout);
+ if (error) {
+ if (error == EWOULDBLOCK)
+ return EINPROGRESS;
+ break;
}
- crit_exit();
/* Release mapping into kernel space. */
vunmapbuf(bp);
process_signal, aiocbe);
}
}
- bp->b_cmd = BUF_CMD_DONE;
- wakeup(bp);
+ biodone_sync(bio);
}
#endif /* VFS_AIO */
int totalspace;
if ((totalspace = bp->b_runningbufspace) != 0) {
- runningbufspace -= totalspace;
- --runningbufcount;
+ atomic_subtract_int(&runningbufspace, totalspace);
+ atomic_subtract_int(&runningbufcount, 1);
bp->b_runningbufspace = 0;
if (runningbufreq && runningbufspace <= lorunningspace) {
runningbufreq = 0;
}
/*
+ * buf_dirty_count_severe:
+ *
+ * Return true if we have too many dirty buffers.
+ */
+int
+buf_dirty_count_severe(void)
+{
+ return (runningbufspace + dirtybufspace >= hidirtybufspace ||
+ dirtybufcount >= nbuf / 2);
+}
+
+/*
* vfs_buf_test_cache:
*
* Called when a buffer is extended. This function clears the B_CACHE
while (totalspace > 0) {
bd_heatup();
- crit_enter();
if (totalspace > runningbufspace + dirtybufspace)
totalspace = runningbufspace + dirtybufspace;
count = totalspace / BKVASIZE;
spin_lock_wr(&needsbuffer_spin);
i = (bd_wake_index + count) & BD_WAKE_MASK;
++bd_wake_ary[i];
- tsleep_interlock(&bd_wake_ary[i]);
+ tsleep_interlock(&bd_wake_ary[i], 0);
spin_unlock_wr(&needsbuffer_spin);
-
tsleep(&bd_wake_ary[i], PINTERLOCKED, "flstik", hz);
- crit_exit();
totalspace = runningbufspace + dirtybufspace - hidirtybufspace;
}
* Full-on. Note that the wait flag may only be atomically set if
* the active count is non-zero.
*/
- crit_enter(); /* for tsleep_interlock */
error = 0;
while ((active = track->bk_active) != 0) {
desired = active | 0x80000000;
- tsleep_interlock(track);
+ tsleep_interlock(track, slp_flags);
if (active == desired ||
atomic_cmpset_int(&track->bk_active, active, desired)) {
error = tsleep(track, slp_flags | PINTERLOCKED,
break;
}
}
- crit_exit();
return (error);
}
bp->b_bio1.bio_offset = NOOFFSET;
bp->b_bio1.bio_next = &bp->b_bio2;
bp->b_bio1.bio_done = NULL;
+ bp->b_bio1.bio_flags = 0;
bp->b_bio2.bio_buf = bp;
bp->b_bio2.bio_prev = &bp->b_bio1;
bp->b_bio2.bio_offset = NOOFFSET;
bp->b_bio2.bio_next = NULL;
bp->b_bio2.bio_done = NULL;
+ bp->b_bio2.bio_flags = 0;
}
/*
/* if not found in cache, do some I/O */
if ((bp->b_flags & B_CACHE) == 0) {
get_mplock();
- KASSERT(!(bp->b_flags & B_ASYNC),
- ("bread: illegal async bp %p", bp));
- bp->b_flags &= ~(B_ERROR | B_INVAL);
+ bp->b_flags &= ~(B_ERROR | B_EINTR | B_INVAL);
bp->b_cmd = BUF_CMD_READ;
+ bp->b_bio1.bio_done = biodone_sync;
+ bp->b_bio1.bio_flags |= BIO_SYNC;
vfs_busy_pages(vp, bp);
vn_strategy(vp, &bp->b_bio1);
rel_mplock();
- return (biowait(bp));
+ return (biowait(&bp->b_bio1, "biord"));
}
return (0);
}
/* if not found in cache, do some I/O */
if ((bp->b_flags & B_CACHE) == 0) {
get_mplock();
- bp->b_flags &= ~(B_ERROR | B_INVAL);
+ bp->b_flags &= ~(B_ERROR | B_EINTR | B_INVAL);
bp->b_cmd = BUF_CMD_READ;
+ bp->b_bio1.bio_done = biodone_sync;
+ bp->b_bio1.bio_flags |= BIO_SYNC;
vfs_busy_pages(vp, bp);
vn_strategy(vp, &bp->b_bio1);
++readwait;
if ((rabp->b_flags & B_CACHE) == 0) {
rel_mplock();
- rabp->b_flags |= B_ASYNC;
- rabp->b_flags &= ~(B_ERROR | B_INVAL);
+ rabp->b_flags &= ~(B_ERROR | B_EINTR | B_INVAL);
rabp->b_cmd = BUF_CMD_READ;
vfs_busy_pages(vp, rabp);
BUF_KERNPROC(rabp);
}
}
if (readwait)
- rv = biowait(bp);
+ rv = biowait(&bp->b_bio1, "biord");
return (rv);
}
/*
* bwrite:
*
+ * Synchronous write, waits for completion.
+ *
* Write, release buffer on completion. (Done by iodone
* if async). Do not bother writing anything if the buffer
* is invalid.
int
bwrite(struct buf *bp)
{
- int oldflags;
+ int error;
if (bp->b_flags & B_INVAL) {
brelse(bp);
return (0);
}
-
- oldflags = bp->b_flags;
-
if (BUF_REFCNTNB(bp) == 0)
panic("bwrite: buffer is not busy???");
- crit_enter();
/* Mark the buffer clean */
bundirty(bp);
- bp->b_flags &= ~B_ERROR;
+ bp->b_flags &= ~(B_ERROR | B_EINTR);
bp->b_flags |= B_CACHE;
bp->b_cmd = BUF_CMD_WRITE;
+ bp->b_bio1.bio_done = biodone_sync;
+ bp->b_bio1.bio_flags |= BIO_SYNC;
vfs_busy_pages(bp->b_vp, bp);
/*
++runningbufcount;
}
- crit_exit();
- if (oldflags & B_ASYNC)
- BUF_KERNPROC(bp);
vn_strategy(bp->b_vp, &bp->b_bio1);
+ error = biowait(&bp->b_bio1, "biows");
+ brelse(bp);
+ return (error);
+}
- if ((oldflags & B_ASYNC) == 0) {
- int rtval = biowait(bp);
+/*
+ * bawrite:
+ *
+ * Asynchronous write. Start output on a buffer, but do not wait for
+ * it to complete. The buffer is released when the output completes.
+ *
+ * bwrite() ( or the VOP routine anyway ) is responsible for handling
+ * B_INVAL buffers. Not us.
+ */
+void
+bawrite(struct buf *bp)
+{
+ if (bp->b_flags & B_INVAL) {
brelse(bp);
- return (rtval);
+ return;
+ }
+ if (BUF_REFCNTNB(bp) == 0)
+ panic("bwrite: buffer is not busy???");
+
+ /* Mark the buffer clean */
+ bundirty(bp);
+
+ bp->b_flags &= ~(B_ERROR | B_EINTR);
+ bp->b_flags |= B_CACHE;
+ bp->b_cmd = BUF_CMD_WRITE;
+ KKASSERT(bp->b_bio1.bio_done == NULL);
+ vfs_busy_pages(bp->b_vp, bp);
+
+ /*
+ * Normal bwrites pipeline writes. NOTE: b_bufsize is only
+ * valid for vnode-backed buffers.
+ */
+ bp->b_runningbufspace = bp->b_bufsize;
+ if (bp->b_runningbufspace) {
+ runningbufspace += bp->b_runningbufspace;
+ ++runningbufcount;
}
+
+ BUF_KERNPROC(bp);
+ vn_strategy(bp->b_vp, &bp->b_bio1);
+}
+
+/*
+ * bowrite:
+ *
+ * Ordered write. Start output on a buffer, and flag it so that the
+ * device will write it in the order it was queued. The buffer is
+ * released when the output completes. bwrite() ( or the VOP routine
+ * anyway ) is responsible for handling B_INVAL buffers.
+ */
+int
+bowrite(struct buf *bp)
+{
+ bp->b_flags |= B_ORDERED;
+ bawrite(bp);
return (0);
}
}
/*
- * bawrite:
- *
- * Asynchronous write. Start output on a buffer, but do not wait for
- * it to complete. The buffer is released when the output completes.
- *
- * bwrite() ( or the VOP routine anyway ) is responsible for handling
- * B_INVAL buffers. Not us.
- */
-void
-bawrite(struct buf *bp)
-{
- bp->b_flags |= B_ASYNC;
- bwrite(bp);
-}
-
-/*
- * bowrite:
- *
- * Ordered write. Start output on a buffer, and flag it so that the
- * device will write it in the order it was queued. The buffer is
- * released when the output completes. bwrite() ( or the VOP routine
- * anyway ) is responsible for handling B_INVAL buffers.
- */
-int
-bowrite(struct buf *bp)
-{
- bp->b_flags |= B_ORDERED | B_ASYNC;
- return (bwrite(bp));
-}
-
-/*
- * buf_dirty_count_severe:
- *
- * Return true if we have too many dirty buffers.
- */
-int
-buf_dirty_count_severe(void)
-{
- return (runningbufspace + dirtybufspace >= hidirtybufspace ||
- dirtybufcount >= nbuf / 2);
-}
-
-/*
* brelse:
*
* Release a busy buffer and, if requested, free its resources. The
/*
* Clean up temporary flags and unlock the buffer.
*/
- bp->b_flags &= ~(B_ORDERED | B_ASYNC | B_NOCACHE | B_RELBUF | B_DIRECT);
+ bp->b_flags &= ~(B_ORDERED | B_NOCACHE | B_RELBUF | B_DIRECT);
BUF_UNLOCK(bp);
}
* Final cleanup and unlock. Clear bits that are only used while a
* buffer is actively locked.
*/
- bp->b_flags &= ~(B_ORDERED | B_ASYNC | B_NOCACHE | B_RELBUF);
+ bp->b_flags &= ~(B_ORDERED | B_NOCACHE | B_RELBUF);
BUF_UNLOCK(bp);
}
* no valid data. We also free the page if the
* buffer was used for direct I/O.
*/
+#if 0
if ((bp->b_flags & B_ASYNC) == 0 && !m->valid &&
m->hold_count == 0) {
vm_page_busy(m);
vm_page_protect(m, VM_PROT_NONE);
vm_page_free(m);
- } else if (bp->b_flags & B_DIRECT) {
+ } else
+#endif
+ if (bp->b_flags & B_DIRECT) {
vm_page_try_to_free(m);
} else if (vm_page_count_severe()) {
vm_page_try_to_cache(m);
}
}
- bremfree(bp);
- bp->b_flags |= B_ASYNC;
-
/*
* default (old) behavior, writing out only one block
*
* XXX returns b_bufsize instead of b_bcount for nwritten?
*/
nwritten = bp->b_bufsize;
- bwrite(bp);
+ bremfree(bp);
+ bawrite(bp);
return nwritten;
}
if (qindex == BQUEUE_CLEAN) {
get_mplock();
if (bp->b_flags & B_VMIO) {
- bp->b_flags &= ~B_ASYNC;
get_mplock();
vfs_vmio_release(bp);
rel_mplock();
get_mplock();
if (bp->b_flags & B_VMIO) {
- bp->b_flags &= ~B_ASYNC;
bp->b_flags |= B_DIRECT; /* try to free pages */
vfs_vmio_release(bp);
}
* Locate and return the specified buffer. Unless flagged otherwise,
* a locked buffer will be returned if it exists or NULL if it does not.
*
+ * findblk()'d buffers are still on the bufqueues and if you intend
+ * to use your (locked NON-TEST) buffer you need to bremfree(bp)
+ * and possibly do other stuff to it.
+ *
* FINDBLK_TEST - Do not lock the buffer. The caller is responsible
* for locking the buffer and ensuring that it remains
* the desired buffer after locking.
/*
* biowait:
*
- * Wait for buffer I/O completion, returning error status. The buffer
- * is left locked on return. B_EINTR is converted into an EINTR error
- * and cleared.
+ * Wait for buffer I/O completion, returning error status. B_EINTR
+ * is converted into an EINTR error but not cleared (since a chain
+ * of biowait() calls may occur).
*
- * NOTE! The original b_cmd is lost on return, since b_cmd will be
- * set to BUF_CMD_DONE.
+ * On return bpdone() will have been called but the buffer will remain
+ * locked and will not have been brelse()'d.
+ *
+ * NOTE! If a timeout is specified and ETIMEDOUT occurs the I/O is
+ * likely still in progress on return.
+ *
+ * NOTE! This operation is on a BIO, not a BUF.
+ *
+ * NOTE! BIO_DONE is cleared by vn_strategy()
*
* MPSAFE
*/
-int
-biowait(struct buf *bp)
+static __inline int
+_biowait(struct bio *bio, const char *wmesg, int to)
{
- if (bp->b_cmd != BUF_CMD_DONE) {
- crit_enter();
- for (;;) {
- tsleep_interlock(bp);
- if (bp->b_cmd == BUF_CMD_DONE)
- break;
- if (bp->b_cmd == BUF_CMD_READ)
- tsleep(bp, PINTERLOCKED, "biord", 0);
+ struct buf *bp = bio->bio_buf;
+ u_int32_t flags;
+ u_int32_t nflags;
+ int error;
+
+ KKASSERT(bio == &bp->b_bio1);
+ for (;;) {
+ flags = bio->bio_flags;
+ if (flags & BIO_DONE)
+ break;
+ tsleep_interlock(bio, 0);
+ nflags = flags | BIO_WANT;
+ tsleep_interlock(bio, 0);
+ if (atomic_cmpset_int(&bio->bio_flags, flags, nflags)) {
+ if (wmesg)
+ error = tsleep(bio, PINTERLOCKED, wmesg, to);
+ else if (bp->b_cmd == BUF_CMD_READ)
+ error = tsleep(bio, PINTERLOCKED, "biord", to);
else
- tsleep(bp, PINTERLOCKED, "biowr", 0);
+ error = tsleep(bio, PINTERLOCKED, "biowr", to);
+ if (error) {
+ kprintf("tsleep error biowait %d\n", error);
+ return (error);
+ }
+ break;
}
- crit_exit();
}
- if (bp->b_flags & B_EINTR) {
- bp->b_flags &= ~B_EINTR;
+
+ /*
+ * Finish up.
+ */
+ KKASSERT(bp->b_cmd == BUF_CMD_DONE);
+ bio->bio_flags &= ~(BIO_DONE | BIO_SYNC);
+ if (bp->b_flags & B_EINTR)
return (EINTR);
- }
- if (bp->b_flags & B_ERROR) {
+ if (bp->b_flags & B_ERROR)
return (bp->b_error ? bp->b_error : EIO);
- } else {
- return (0);
- }
+ return (0);
+}
+
+int
+biowait(struct bio *bio, const char *wmesg)
+{
+ return(_biowait(bio, wmesg, 0));
+}
+
+int
+biowait_timeout(struct bio *bio, const char *wmesg, int to)
+{
+ return(_biowait(bio, wmesg, to));
}
/*
track = &vp->v_track_read;
else
track = &vp->v_track_write;
+ KKASSERT((bio->bio_flags & BIO_DONE) == 0);
bio->bio_track = track;
bio_track_ref(track);
vop_strategy(*vp->v_ops, vp, bio);
}
/*
- * biodone:
+ * bpdone:
*
- * Finish I/O on a buffer, optionally calling a completion function.
- * This is usually called from an interrupt so process blocking is
- * not allowed.
+ * Finish I/O on a buffer after all BIOs have been processed.
+ * Called when the bio chain is exhausted or by biowait. If called
+ * by biowait, elseit is typically 0.
*
- * biodone is also responsible for setting B_CACHE in a B_VMIO bp.
+ * bpdone is also responsible for setting B_CACHE in a B_VMIO bp.
* In a non-VMIO bp, B_CACHE will be set on the next getblk()
* assuming B_INVAL is clear.
*
* read error occured, or if the op was a write. B_CACHE is never
* set if the buffer is invalid or otherwise uncacheable.
*
- * biodone does not mess with B_INVAL, allowing the I/O routine or the
+ * bpdone does not mess with B_INVAL, allowing the I/O routine or the
* initiator to leave B_INVAL set to brelse the buffer out of existance
* in the biodone routine.
*/
void
-biodone(struct bio *bio)
+bpdone(struct buf *bp, int elseit)
{
- struct buf *bp = bio->bio_buf;
buf_cmd_t cmd;
- crit_enter();
-
KASSERT(BUF_REFCNTNB(bp) > 0,
("biodone: bp %p not busy %d", bp, BUF_REFCNTNB(bp)));
KASSERT(bp->b_cmd != BUF_CMD_DONE,
("biodone: bp %p already done!", bp));
- runningbufwakeup(bp);
-
/*
- * Run up the chain of BIO's. Leave b_cmd intact for the duration.
+ * No more BIOs are left. All completion functions have been dealt
+ * with, now we clean up the buffer.
*/
- while (bio) {
- biodone_t *done_func;
- struct bio_track *track;
-
- /*
- * BIO tracking. Most but not all BIOs are tracked.
- */
- if ((track = bio->bio_track) != NULL) {
- bio_track_rel(track);
- bio->bio_track = NULL;
- }
-
- /*
- * A bio_done function terminates the loop. The function
- * will be responsible for any further chaining and/or
- * buffer management.
- *
- * WARNING! The done function can deallocate the buffer!
- */
- if ((done_func = bio->bio_done) != NULL) {
- bio->bio_done = NULL;
- done_func(bio);
- crit_exit();
- return;
- }
- bio = bio->bio_prev;
- }
-
cmd = bp->b_cmd;
bp->b_cmd = BUF_CMD_DONE;
if (cmd != BUF_CMD_READ && cmd != BUF_CMD_WRITE) {
if (cmd == BUF_CMD_FREEBLKS)
bp->b_flags |= B_NOCACHE;
- brelse(bp);
- crit_exit();
+ if (elseit)
+ brelse(bp);
return;
}
bdirty(bp);
}
-
if (bp->b_flags & B_VMIO) {
int i;
vm_ooffset_t foff;
* routines.
*/
iosize = bp->b_bcount - bp->b_resid;
- if (cmd == BUF_CMD_READ && (bp->b_flags & (B_INVAL|B_NOCACHE|B_ERROR)) == 0) {
+ if (cmd == BUF_CMD_READ &&
+ (bp->b_flags & (B_INVAL|B_NOCACHE|B_ERROR)) == 0) {
bp->b_flags |= B_CACHE;
}
+ crit_enter();
+ get_mplock();
for (i = 0; i < bp->b_xio.xio_npages; i++) {
int bogusflag = 0;
int resid;
}
if (obj)
vm_object_pip_wakeupn(obj, 0);
+ rel_mplock();
+ crit_exit();
}
/*
- * For asynchronous completions, release the buffer now. The brelse
- * will do a wakeup there if necessary - so no need to do a wakeup
- * here in the async case. The sync case always needs to do a wakeup.
+ * Finish up by releasing the buffer. There are no more synchronous
+ * or asynchronous completions, those were handled by bio_done
+ * callbacks.
*/
-
- if (bp->b_flags & B_ASYNC) {
- if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_ERROR | B_RELBUF)) != 0)
+ if (elseit) {
+ if (bp->b_flags & (B_NOCACHE|B_INVAL|B_ERROR|B_RELBUF))
brelse(bp);
else
bqrelse(bp);
- } else {
- wakeup(bp);
}
- crit_exit();
+}
+
+/*
+ * Normal biodone.
+ */
+void
+biodone(struct bio *bio)
+{
+ struct buf *bp = bio->bio_buf;
+
+ runningbufwakeup(bp);
+
+ /*
+ * Run up the chain of BIO's. Leave b_cmd intact for the duration.
+ */
+ while (bio) {
+ biodone_t *done_func;
+ struct bio_track *track;
+
+ /*
+ * BIO tracking. Most but not all BIOs are tracked.
+ */
+ if ((track = bio->bio_track) != NULL) {
+ bio_track_rel(track);
+ bio->bio_track = NULL;
+ }
+
+ /*
+ * A bio_done function terminates the loop. The function
+ * will be responsible for any further chaining and/or
+ * buffer management.
+ *
+ * WARNING! The done function can deallocate the buffer!
+ */
+ if ((done_func = bio->bio_done) != NULL) {
+ bio->bio_done = NULL;
+ done_func(bio);
+ return;
+ }
+ bio = bio->bio_prev;
+ }
+
+ /*
+ * If we've run out of bio's do normal [a]synchronous completion.
+ */
+ bpdone(bp, 1);
+}
+
+/*
+ * Synchronous biodone - this terminates a synchronous BIO.
+ *
+ * bpdone() is called with elseit=FALSE, leaving the buffer completed
+ * but still locked. The caller must brelse() the buffer after waiting
+ * for completion.
+ */
+void
+biodone_sync(struct bio *bio)
+{
+ struct buf *bp = bio->bio_buf;
+ int flags;
+ int nflags;
+
+ KKASSERT(bio == &bp->b_bio1);
+ bpdone(bp, 0);
+
+ for (;;) {
+ flags = bio->bio_flags;
+ nflags = (flags | BIO_DONE) & ~BIO_WANT;
+
+ if (atomic_cmpset_int(&bio->bio_flags, flags, nflags)) {
+ if (flags & BIO_WANT)
+ wakeup(bio);
+ break;
+ }
+ }
}
/*
int i, mask = 0;
caddr_t sa, ea;
if ((bp->b_flags & (B_VMIO | B_MALLOC)) == B_VMIO) {
- bp->b_flags &= ~(B_INVAL|B_ERROR);
+ bp->b_flags &= ~(B_INVAL | B_EINTR | B_ERROR);
if ((bp->b_xio.xio_npages == 1) && (bp->b_bufsize < PAGE_SIZE) &&
(bp->b_loffset & PAGE_MASK) == 0) {
mask = (1 << (bp->b_bufsize / DEV_BSIZE)) - 1;
static struct buf *
cluster_rbuild (struct vnode *vp, off_t filesize, off_t loffset,
off_t doffset, int blksize, int run,
- struct buf *fbp, int doasync);
+ struct buf *fbp);
static void cluster_callback (struct bio *);
maxra = nbuf/8;
/*
- * get the requested block
+ * Get the requested block.
*/
*bpp = reqbp = bp = getblk(vp, loffset, blksize, 0, 0);
origoffset = loffset;
off_t firstread = bp->b_loffset;
int nblks;
+ /*
+ * Set-up synchronous read for bp.
+ */
+ bp->b_cmd = BUF_CMD_READ;
+ bp->b_bio1.bio_done = biodone_sync;
+ bp->b_bio1.bio_flags |= BIO_SYNC;
+
KASSERT(firstread != NOOFFSET,
("cluster_read: no buffer offset"));
if (firstread + totread > filesize)
nblks = burstbytes / blksize;
bp = cluster_rbuild(vp, filesize, loffset,
- doffset, blksize, nblks, bp, 0);
+ doffset, blksize, nblks, bp);
loffset += bp->b_bufsize;
} else {
single_block_read:
}
/*
- * Handle the synchronous read. This only occurs if B_CACHE was
- * not set. bp (and rbp) could be either a cluster bp or a normal
- * bp depending on the what cluster_rbuild() decided to do. If
- * it is a cluster bp, vfs_busy_pages() has already been called.
+ * If B_CACHE was not set issue bp. bp will either be an
+ * asynchronous cluster buf or a synchronous single-buf.
+ * If it is a single buf it will be the same as reqbp.
+ *
+ * NOTE: Once an async cluster buf is issued bp becomes invalid.
*/
if (bp) {
#if defined(CLUSTERDEBUG)
kprintf("S(%lld,%d,%d) ",
bp->b_loffset, bp->b_bcount, seqcount);
#endif
- bp->b_cmd = BUF_CMD_READ;
if ((bp->b_flags & B_CLUSTER) == 0)
vfs_busy_pages(vp, bp);
bp->b_flags &= ~(B_ERROR|B_INVAL);
- if ((bp->b_flags & B_ASYNC) || bp->b_bio1.bio_done != NULL)
- BUF_KERNPROC(bp);
vn_strategy(vp, &bp->b_bio1);
- if (bp->b_flags & B_ERROR) {
- if ((error = bp->b_error) == 0)
- error = EIO;
- } else {
- error = 0;
- }
+ error = 0;
+ /* bp invalid now */
}
/*
if (ntoread > seqcount)
ntoread = seqcount;
+ /*
+ * rbp: async read
+ */
+ rbp->b_cmd = BUF_CMD_READ;
rbp->b_flags |= B_RAM/* | B_AGE*/;
+
if (burstbytes) {
rbp = cluster_rbuild(vp, filesize, loffset,
doffset, blksize,
- ntoread, rbp, 1);
+ ntoread, rbp);
} else {
rbp->b_bio2.bio_offset = doffset;
}
}
#endif
rbp->b_flags &= ~(B_ERROR|B_INVAL);
- rbp->b_flags |= B_ASYNC;
- rbp->b_cmd = BUF_CMD_READ;
if ((rbp->b_flags & B_CLUSTER) == 0)
vfs_busy_pages(vp, rbp);
- BUF_KERNPROC(rbp); /* B_ASYNC */
+ BUF_KERNPROC(rbp);
vn_strategy(vp, &rbp->b_bio1);
+ /* rbp invalid now */
}
-no_read_ahead:
- if (reqbp)
- return (biowait(reqbp));
- else
- return (error);
+ /*
+ * Wait for our original buffer to complete its I/O. reqbp will
+ * be NULL if the original buffer was B_CACHE. We are returning
+ * (*bpp) which is the same as reqbp when reqbp != NULL.
+ */
+no_read_ahead:
+ if (reqbp) {
+ KKASSERT(reqbp->b_bio1.bio_flags & BIO_SYNC);
+ error = biowait(&reqbp->b_bio1, "clurd");
+ }
+ return (error);
}
/*
* If blocks are contiguous on disk, use this to provide clustered
* read ahead. We will read as many blocks as possible sequentially
* and then parcel them up into logical blocks in the buffer hash table.
+ *
+ * This function either returns a cluster buf or it returns fbp. fbp is
+ * already expected to be set up as a synchronous or asynchronous request.
+ *
+ * If a cluster buf is returned it will always be async.
*/
static struct buf *
-cluster_rbuild(struct vnode *vp, off_t filesize, off_t loffset,
- off_t doffset, int blksize, int run, struct buf *fbp, int doasync)
+cluster_rbuild(struct vnode *vp, off_t filesize, off_t loffset, off_t doffset,
+ int blksize, int run, struct buf *fbp)
{
struct buf *bp, *tbp;
off_t boffset;
}
bp = trypbuf(&cluster_pbuf_freecnt);
- if (bp == NULL)
+ if (bp == NULL) {
return tbp;
+ }
/*
* We are synthesizing a buffer out of vm_page_t's, but
*/
bp->b_data = (char *)((vm_offset_t)bp->b_data |
((vm_offset_t)tbp->b_data & PAGE_MASK));
- bp->b_flags |= B_ASYNC | B_CLUSTER | B_VMIO;
+ bp->b_flags |= B_CLUSTER | B_VMIO;
bp->b_cmd = BUF_CMD_READ;
- bp->b_bio1.bio_done = cluster_callback;
+ bp->b_bio1.bio_done = cluster_callback; /* default to async */
bp->b_bio1.bio_caller_info1.cluster_head = NULL;
bp->b_bio1.bio_caller_info2.cluster_tail = NULL;
bp->b_loffset = loffset;
break;
}
}
+
/*
- * The first buffer is setup async if doasync is specified.
- * All other buffers in the cluster are setup async. This
- * way the caller can decide how to deal with the requested
- * buffer.
+ * The passed-in tbp (i == 0) will already be set up for
+ * async or sync operation. All other tbp's acquire in
+ * our loop are set up for async operation.
*/
- if (i || doasync)
- tbp->b_flags |= B_ASYNC;
tbp->b_cmd = BUF_CMD_READ;
BUF_KERNPROC(tbp);
cluster_append(&bp->b_bio1, tbp);
panic("cluster_rbuild: b_bufsize(%d) > b_kvasize(%d)",
bp->b_bufsize, bp->b_kvasize);
}
-
pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
(vm_page_t *)bp->b_xio.xio_pages, bp->b_xio.xio_npages);
+ BUF_KERNPROC(bp);
return (bp);
}
bp->b_flags &= ~B_ERROR;
bp->b_flags |= B_CLUSTER | B_BNOCLIP |
(tbp->b_flags & (B_VMIO | B_NEEDCOMMIT));
- bp->b_bio1.bio_done = cluster_callback;
bp->b_bio1.bio_caller_info1.cluster_head = NULL;
bp->b_bio1.bio_caller_info2.cluster_tail = NULL;
bundirty(tbp);
tbp->b_flags &= ~B_ERROR;
- tbp->b_flags |= B_ASYNC;
tbp->b_cmd = BUF_CMD_WRITE;
BUF_KERNPROC(tbp);
cluster_append(&bp->b_bio1, tbp);
totalwritten += bp->b_bufsize;
bp->b_dirtyoff = 0;
bp->b_dirtyend = bp->b_bufsize;
- bp->b_flags |= B_ASYNC;
+ bp->b_bio1.bio_done = cluster_callback;
bp->b_cmd = BUF_CMD_WRITE;
+
vfs_busy_pages(vp, bp);
bp->b_runningbufspace = bp->b_bufsize;
if (bp->b_runningbufspace) {
runningbufspace += bp->b_runningbufspace;
++runningbufcount;
}
- BUF_KERNPROC(bp); /* B_ASYNC */
+ BUF_KERNPROC(bp);
vn_strategy(vp, &bp->b_bio1);
bytes -= i;
vfs_bio_awrite(bp);
} else {
bremfree(bp);
- bp->b_flags |= B_ASYNC;
- bwrite(bp);
+ bawrite(bp);
}
} else {
bremfree(bp);
*/
bremfree(bp);
bp->b_flags |= (B_INVAL | B_RELBUF);
- bp->b_flags &= ~B_ASYNC;
brelse(bp);
} else {
bremfree(bp);
bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF);
- bp->b_flags &= ~B_ASYNC;
brelse(bp);
}
return(0);
} else {
bremfree(bp);
bp->b_flags |= (B_INVAL | B_RELBUF | B_NOCACHE);
- bp->b_flags &= ~B_ASYNC;
brelse(bp);
}
return(1);
BUF_UNLOCK(bp);
} else {
bremfree(bp);
- if (bp->b_vp == vp) {
- bp->b_flags |= B_ASYNC;
- } else {
- bp->b_flags &= ~B_ASYNC;
- }
- bwrite(bp);
+ if (bp->b_vp == vp)
+ bawrite(bp);
+ else
+ bwrite(bp);
}
return(1);
} else {
flags = fp->f_flag;
if (flags & FOFFSETLOCK) {
nflags = flags | FOFFSETWAKE;
- crit_enter();
- tsleep_interlock(&fp->f_flag);
+ tsleep_interlock(&fp->f_flag, 0);
if (atomic_cmpset_int(&fp->f_flag, flags, nflags))
tsleep(&fp->f_flag, PINTERLOCKED, "fpoff", 0);
- crit_exit();
} else {
nflags = flags | FOFFSETLOCK;
if (atomic_cmpset_int(&fp->f_flag, flags, nflags))
return (EWOULDBLOCK);
}
tp->tap_flags |= TAP_RWAIT;
- crit_enter();
- tsleep_interlock(tp);
+ tsleep_interlock(tp, PCATCH);
ifnet_deserialize_all(ifp);
error = tsleep(tp, PCATCH | PINTERLOCKED, "taprd", 0);
- crit_exit();
if (error)
return (error);
} else {
int error;
if (sl) {
- crit_enter();
- tsleep_interlock(chan);
+ tsleep_interlock(chan, slpflags);
smb_sl_unlock(sl);
error = tsleep(chan, slpflags | PINTERLOCKED, wmesg, timo);
if ((slpflags & PDROP) == 0)
smb_sl_lock(sl);
- crit_exit();
} else {
error = tsleep(chan, slpflags, wmesg, timo);
}
biodone_t *bio_done; /* Caller completion function */
off_t bio_offset; /* Logical offset relative to device */
void *bio_driver_info;
+ int bio_flags;
union {
void *ptr;
off_t offset;
} bio_caller_info2;
};
+/*
+ * BIO flags, used for strategy/biodone/biodone_sync interactions.
+ */
+#define BIO_SYNC 0x00000001
+#define BIO_WANT 0x20000000
+#define BIO_DONE 0x40000000
+
void bio_start_transaction(struct bio *, struct bio_track *);
#endif
*
* Notes:
*
- * B_ASYNC VOP calls on bp's are usually async whether or not
- * B_ASYNC is set, but some subsystems, such as NFS, like
- * to know what is best for the caller so they can
- * optimize the I/O.
- *
* B_PAGING Indicates that bp is being used by the paging system or
* some paging system and that the bp is not linked into
* the b_vp's clean/dirty linked lists or ref counts.
#define B_AGE 0x00000001 /* Reuse more quickly */
#define B_NEEDCOMMIT 0x00000002 /* Append-write in progress. */
-#define B_ASYNC 0x00000004 /* Start I/O, do not wait. */
+#define B_UNUSED2 0x00000004
#define B_DIRECT 0x00000008 /* direct I/O flag (pls free vmio) */
#define B_DEFERRED 0x00000010 /* vfs-controlled deferment */
#define B_CACHE 0x00000020 /* Bread found us in the cache. */
#define B_HASHED 0x00000040 /* Indexed via v_rbhash_tree */
#define B_DELWRI 0x00000080 /* Delay I/O until buffer reused. */
#define B_BNOCLIP 0x00000100 /* EOF clipping b_bcount not allowed */
-#define B_UNUSED0200 0x00000200
+#define B_UNUSED9 0x00000200
#define B_EINTR 0x00000400 /* I/O was interrupted */
#define B_ERROR 0x00000800 /* I/O error occurred. */
-#define B_UNUSED1000 0x00001000 /* Unused */
+#define B_UNUSED12 0x00001000 /* Unused */
#define B_INVAL 0x00002000 /* Does not contain valid info. */
#define B_LOCKED 0x00004000 /* Locked in core (not reusable). */
#define B_NOCACHE 0x00008000 /* Destroy buffer AND backing store */
#define B_MALLOC 0x00010000 /* malloced b_data */
#define B_CLUSTEROK 0x00020000 /* Pagein op, so swap() can count it. */
-#define B_UNUSED40000 0x00040000
+#define B_UNUSED18 0x00040000
#define B_RAW 0x00080000 /* Set by physio for raw transfers. */
#define B_HEAVY 0x00100000 /* Heavy-weight buffer */
#define B_DIRTY 0x00200000 /* Needs writing later. */
#define B_RELBUF 0x00400000 /* Release VMIO buffer. */
-#define B_WANT 0x00800000 /* Used by vm_pager.c */
+#define B_UNUSED23 0x00800000 /* Request wakeup on done */
#define B_VNCLEAN 0x01000000 /* On vnode clean list */
#define B_VNDIRTY 0x02000000 /* On vnode dirty list */
#define B_PAGING 0x04000000 /* volatile paging I/O -- bypass VMIO */
#define B_RAM 0x10000000 /* Read ahead mark (flag) */
#define B_VMIO 0x20000000 /* VMIO flag */
#define B_CLUSTER 0x40000000 /* pagein op, so swap() can count it */
-#define B_UNUSED80000000 0x80000000
+#define B_UNUSED31 0x80000000 /* synchronous operation done */
#define PRINT_BUF_FLAGS "\20" \
"\40unused31\37cluster\36vmio\35ram\34ordered" \
- "\33paging\32vndirty\31vnclean\30want\27relbuf\26dirty" \
+ "\33paging\32vndirty\31vnclean\30unused23\27relbuf\26dirty" \
"\25unused20\24raw\23unused18\22clusterok\21malloc\20nocache" \
"\17locked\16inval\15unused12\14error\13eintr\12unused9\11bnoclip" \
- "\10delwri\7hashed\6cache\5deferred\4direct\3async\2needcommit\1age"
+ "\10delwri\7hashed\6cache\5deferred\4direct\3unused2\2needcommit\1age"
#define NOOFFSET (-1LL) /* No buffer offset calculated yet */
void regetblk(struct buf *bp);
struct bio *push_bio(struct bio *);
struct bio *pop_bio(struct bio *);
-int biowait (struct buf *);
+int biowait (struct bio *, const char *);
+int biowait_timeout (struct bio *, const char *, int);
+void bpdone (struct buf *, int);
void biodone (struct bio *);
+void biodone_sync (struct bio *);
void cluster_append(struct bio *, struct buf *);
int cluster_read (struct vnode *, off_t, off_t, int,
return(0);
}
+/*
+ * Chained biodone. The bio callback was made and the callback function
+ * wishes to chain the biodone. If no BIO's are left we call bpdone()
+ * with elseit=TRUE (asynchronous completion).
+ */
+static __inline void
+biodone_chain(struct bio *bio)
+{
+ if (bio->bio_prev)
+ biodone(bio->bio_prev);
+ else
+ bpdone(bio->bio_buf, 1);
+}
+
#endif /* _KERNEL */
#endif /* !_SYS_BUF2_H_ */
int msleep (void *, struct spinlock *, int, const char *, int);
int serialize_sleep(void *, struct lwkt_serialize *, int,
const char *, int);
-void tsleep_interlock (void *chan);
+void tsleep_interlock (void *, int);
+void tsleep_remove (struct thread *);
int lwkt_sleep (const char *, int);
void tstop (void);
void wakeup (void *chan);
struct thread {
TAILQ_ENTRY(thread) td_threadq;
TAILQ_ENTRY(thread) td_allq;
+ TAILQ_ENTRY(thread) td_sleepq;
lwkt_port td_msgport; /* built-in message port for replies */
struct lwp *td_lwp; /* (optional) associated lwp */
struct proc *td_proc; /* (optional) associated process */
#define TDF_WAKEREQ 0x4000 /* resume_kproc */
#define TDF_TIMEOUT 0x8000 /* tsleep timeout */
#define TDF_INTTHREAD 0x00010000 /* interrupt thread */
-#define TDF_UNUSED20000 0x00020000
+#define TDF_TSLEEP_DESCHEDULED 0x00020000 /* tsleep core deschedule */
#define TDF_BLOCKED 0x00040000 /* Thread is blocked */
#define TDF_PANICWARN 0x00080000 /* panic warning in switch */
#define TDF_BLOCKQ 0x00100000 /* on block queue */
if (!daddr)
panic("ext2_bmaparray: indirect block not in cache");
#endif
+ /*
+ * This runs through ext2_strategy using bio2 to
+ * cache the disk offset, then comes back through
+ * bio1. So we want to wait on bio1
+ */
+ bp->b_bio1.bio_done = biodone_sync;
+ bp->b_bio1.bio_flags |= BIO_SYNC;
bp->b_bio2.bio_offset = fsbtodoff(fs, daddr);
bp->b_flags &= ~(B_INVAL|B_ERROR);
bp->b_cmd = BUF_CMD_READ;
vfs_busy_pages(bp->b_vp, bp);
vn_strategy(bp->b_vp, &bp->b_bio1);
- error = biowait(bp);
+ error = biowait(&bp->b_bio1, "biord");
if (error) {
brelse(bp);
return (error);
if (bp->b_bcount > bp->b_bufsize)
panic("ext2_indirtrunc: bad buffer size");
bp->b_bio2.bio_offset = doffset;
+ bp->b_bio1.bio_done = biodone_sync;
+ bp->b_bio1.bio_flags |= BIO_SYNC;
vfs_busy_pages(bp->b_vp, bp);
vn_strategy(vp, &bp->b_bio1);
- error = biowait(bp);
+ error = biowait(&bp->b_bio1, "biord");
}
if (error) {
brelse(bp);
static int hammer_io_direct_uncache_callback(hammer_inode_t ip, void *data);
static void hammer_io_set_modlist(struct hammer_io *io);
static void hammer_io_flush_mark(hammer_volume_t volume);
-static void hammer_io_flush_sync_done(struct bio *bio);
/*
/*
* Wait for any physical IO to complete
+ *
+ * XXX we aren't interlocked against a spinlock or anything so there
+ * is a small window in the interlock / io->running == 0 test.
*/
void
hammer_io_wait(hammer_io_t io)
{
if (io->running) {
- crit_enter();
- tsleep_interlock(io);
- io->waiting = 1;
for (;;) {
- tsleep(io, PINTERLOCKED, "hmrflw", 0);
+ io->waiting = 1;
+ tsleep_interlock(io, 0);
if (io->running == 0)
break;
- tsleep_interlock(io);
- io->waiting = 1;
+ tsleep(io, PINTERLOCKED, "hmrflw", hz);
if (io->running == 0)
break;
}
- crit_exit();
}
}
bp->b_bcount = 0;
bp->b_cmd = BUF_CMD_FLUSH;
bp->b_bio1.bio_caller_info1.cluster_head = bp_base;
- bp->b_bio1.bio_done = hammer_io_flush_sync_done;
- bp->b_flags |= B_ASYNC;
+ bp->b_bio1.bio_done = biodone_sync;
+ bp->b_bio1.bio_flags |= BIO_SYNC;
bp_base = bp;
vn_strategy(volume->devvp, &bp->b_bio1);
}
}
while ((bp = bp_base) != NULL) {
bp_base = bp->b_bio1.bio_caller_info1.cluster_head;
- while (bp->b_cmd != BUF_CMD_DONE) {
- crit_enter();
- tsleep_interlock(&bp->b_cmd);
- if (bp->b_cmd != BUF_CMD_DONE)
- tsleep(&bp->b_cmd, PINTERLOCKED, "hmrFLS", 0);
- crit_exit();
- }
- bp->b_flags &= ~B_ASYNC;
+ biowait(&bp->b_bio1, "hmrFLS");
relpbuf(bp, NULL);
}
}
-
-/*
- * Callback to deal with completed flush commands to the device.
- */
-static void
-hammer_io_flush_sync_done(struct bio *bio)
-{
- struct buf *bp;
-
- bp = bio->bio_buf;
- bp->b_cmd = BUF_CMD_DONE;
- wakeup(&bp->b_cmd);
-}
-
}
nlv = lv | HAMMER_LOCKF_WANTED;
++hammer_contention_count;
- crit_enter();
- tsleep_interlock(lock);
+ tsleep_interlock(lock, 0);
if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
tsleep(lock, PINTERLOCKED, ident, 0);
if (hammer_debug_locks)
kprintf("hammer_lock_ex: try again\n");
}
- crit_exit();
}
}
}
} else {
nlv = lv | HAMMER_LOCKF_WANTED;
++hammer_contention_count;
- crit_enter();
- tsleep_interlock(lock);
+ tsleep_interlock(lock, 0);
if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
tsleep(lock, PINTERLOCKED, "hmrlck", 0);
}
- crit_exit();
}
}
}
struct vnode *vp = ap->a_vp;
bp = geteblk(ap->a_length);
- bp->b_flags |= B_ASYNC;
bp->b_cmd = BUF_CMD_FREEBLKS;
bp->b_bio1.bio_offset = ap->a_offset;
bp->b_bcount = ap->a_length;
static struct buf *nfs_getcacheblk(struct vnode *vp, off_t loffset,
int size, struct thread *td);
static int nfs_check_dirent(struct nfs_dirent *dp, int maxlen);
+static void nfsiodone_sync(struct bio *bio);
extern int nfs_numasync;
extern int nfs_pbuf_freecnt;
if (!rabp)
return (EINTR);
if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
- rabp->b_flags |= B_ASYNC;
rabp->b_cmd = BUF_CMD_READ;
vfs_busy_pages(vp, rabp);
if (nfs_asyncio(vp, &rabp->b_bio2, td)) {
if ((bp->b_flags & B_CACHE) == 0) {
bp->b_cmd = BUF_CMD_READ;
+ bp->b_bio2.bio_done = nfsiodone_sync;
+ bp->b_bio2.bio_flags |= BIO_SYNC;
vfs_busy_pages(vp, bp);
error = nfs_doio(vp, &bp->b_bio2, td);
if (error) {
return (EINTR);
if ((bp->b_flags & B_CACHE) == 0) {
bp->b_cmd = BUF_CMD_READ;
+ bp->b_bio2.bio_done = nfsiodone_sync;
+ bp->b_bio2.bio_flags |= BIO_SYNC;
vfs_busy_pages(vp, bp);
error = nfs_doio(vp, &bp->b_bio2, td);
if (error) {
if ((bp->b_flags & B_CACHE) == 0) {
bp->b_cmd = BUF_CMD_READ;
+ bp->b_bio2.bio_done = nfsiodone_sync;
+ bp->b_bio2.bio_flags |= BIO_SYNC;
vfs_busy_pages(vp, bp);
error = nfs_doio(vp, &bp->b_bio2, td);
if (error) {
return (EINTR);
if ((bp->b_flags & B_CACHE) == 0) {
bp->b_cmd = BUF_CMD_READ;
+ bp->b_bio2.bio_done = nfsiodone_sync;
+ bp->b_bio2.bio_flags |= BIO_SYNC;
vfs_busy_pages(vp, bp);
error = nfs_doio(vp, &bp->b_bio2, td);
/*
NFS_DIRBLKSIZ, td);
if (rabp) {
if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
- rabp->b_flags |= B_ASYNC;
rabp->b_cmd = BUF_CMD_READ;
vfs_busy_pages(vp, rabp);
if (nfs_asyncio(vp, &rabp->b_bio2, td)) {
if ((bp->b_flags & B_CACHE) == 0) {
bp->b_cmd = BUF_CMD_READ;
+ bp->b_bio2.bio_done = nfsiodone_sync;
+ bp->b_bio2.bio_flags |= BIO_SYNC;
vfs_busy_pages(vp, bp);
error = nfs_doio(vp, &bp->b_bio2, td);
if (error) {
* Do an I/O operation to/from a cache block. This may be called
* synchronously or from an nfsiod. The BIO is normalized for DEV_BSIZE.
*
+ * A locked, completed I/O is returned and the caller is responsible for
+ * brelse()'ing it.
+ *
* NOTE! TD MIGHT BE NULL
*/
int
uiop->uio_rw = UIO_WRITE;
nfsstats.write_bios++;
- if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE | B_CLUSTER)) == B_ASYNC)
+ if ((bp->b_flags & (B_NEEDCOMMIT | B_NOCACHE | B_CLUSTER)) == 0)
iomode = NFSV3WRITE_UNSTABLE;
else
iomode = NFSV3WRITE_FILESYNC;
* For an interrupted write, the buffer is still valid
* and the write hasn't been pushed to the server yet,
* so we can't set B_ERROR and report the interruption
- * by setting B_EINTR. For the B_ASYNC case, B_EINTR
+ * by setting B_EINTR. For the async case, B_EINTR
* is not relevant, so the rpc attempt is essentially
* a noop. For the case of a V3 write rpc not being
* committed to stable storage, the block is still
bp->b_flags &= ~(B_INVAL|B_NOCACHE);
if ((bp->b_flags & B_PAGING) == 0)
bdirty(bp);
- if (error && (bp->b_flags & B_ASYNC) == 0)
+ if (error)
bp->b_flags |= B_EINTR;
crit_exit();
} else {
return(error);
}
+/*
+ * Synchronous completion for nfs_doio. Call bpdone() with elseit=FALSE.
+ * Caller is responsible for brelse()'ing the bp.
+ */
+static void
+nfsiodone_sync(struct bio *bio)
+{
+ bio->bio_flags = 0;
+ bpdone(bio->bio_buf, 0);
+}
else
bp = NULL;
}
- if (bp && (bp->b_flags & B_DELWRI)) {
- bremfree(bp);
- bp->b_flags &= ~B_ASYNC;
- bwrite(bp);
- ++nfs_commit_miss;
+ if (bp) {
+ if (bp->b_flags & B_DELWRI) {
+ bremfree(bp);
+ bwrite(bp);
+ ++nfs_commit_miss;
+ } else {
+ BUF_UNLOCK(bp);
+ }
}
++nfs_commit_blks;
if (cnt < iosize)
wakeup(&nmp->nm_bioq);
}
nfs_doio((struct vnode *)bio->bio_driver_info, bio, NULL);
+
/*
* If there are more than one iod on this mount, then defect
* so that the iods can be shared out fairly between the mounts
KASSERT(BUF_REFCNT(bp) > 0,
("nfs_strategy: buffer %p not locked", bp));
- if (bp->b_flags & B_ASYNC)
- td = NULL;
- else
+ if (bio->bio_flags & BIO_SYNC)
td = curthread; /* XXX */
+ else
+ td = NULL;
/*
* We probably don't need to push an nbio any more since no
* queue the request, wake it up and wait for completion
* otherwise just do it ourselves.
*/
- if ((bp->b_flags & B_ASYNC) == 0 || nfs_asyncio(ap->a_vp, nbio, td))
+ if ((bio->bio_flags & BIO_SYNC) || nfs_asyncio(ap->a_vp, nbio, td))
error = nfs_doio(ap->a_vp, nbio, td);
return (error);
}
* start the transaction in order to
* immediately biodone() it.
*/
- bp->b_flags |= B_ASYNC;
bundirty(bp);
bp->b_flags &= ~B_ERROR;
bp->b_dirtyoff = bp->b_dirtyend = 0;
* For an interrupted write, the buffer is still valid
* and the write hasn't been pushed to the server yet,
* so we can't set B_ERROR and report the interruption
- * by setting B_EINTR. For the B_ASYNC case, B_EINTR
+ * by setting B_EINTR. For the async case, B_EINTR
* is not relevant, so the rpc attempt is essentially
* a noop. For the case of a V3 write rpc not being
* committed to stable storage, the block is still
crit_enter();
bp->b_flags &= ~(B_INVAL|B_NOCACHE);
- if ((bp->b_flags & B_ASYNC) == 0)
- bp->b_flags |= B_EINTR;
if ((bp->b_flags & B_PAGING) == 0)
bdirty(bp);
- if ((bp->b_flags & B_ASYNC) == 0)
- bp->b_flags |= B_EINTR;
+ bp->b_flags |= B_EINTR;
crit_exit();
} else {
if (error) {
nwfs_strategy(struct vop_strategy_args *ap)
{
struct bio *bio = ap->a_bio;
- struct buf *bp = bio->bio_buf;
int error = 0;
struct thread *td = NULL;
NCPVNDEBUG("\n");
- if ((bp->b_flags & B_ASYNC) == 0)
+ if ((bio->bio_flags & BIO_SYNC))
td = curthread; /* YYY dunno if this is legal */
/*
* If the op is asynchronous and an i/o daemon is waiting
* queue the request, wake it up and wait for completion
* otherwise just do it ourselves.
*/
- if ((bp->b_flags & B_ASYNC) == 0 )
+ if (bio->bio_flags & BIO_SYNC)
error = nwfs_doio(ap->a_vp, bio, proc0.p_ucred, td);
return (error);
}
* For an interrupted write, the buffer is still valid
* and the write hasn't been pushed to the server yet,
* so we can't set BIO_ERROR and report the interruption
- * by setting B_EINTR. For the B_ASYNC case, B_EINTR
+ * by setting B_EINTR. For the async case, B_EINTR
* is not relevant, so the rpc attempt is essentially
* a noop. For the case of a V3 write rpc not being
* committed to stable storage, the block is still
crit_enter();
bp->b_flags &= ~(B_INVAL|B_NOCACHE);
- if ((bp->b_flags & B_ASYNC) == 0)
- bp->b_flags |= B_EINTR;
if ((bp->b_flags & B_PAGING) == 0)
bdirty(bp);
- if ((bp->b_flags & B_ASYNC) == 0)
- bp->b_flags |= B_EINTR;
+ bp->b_flags |= B_EINTR;
crit_exit();
} else {
if (error) {
smbfs_strategy(struct vop_strategy_args *ap)
{
struct bio *bio = ap->a_bio;
- struct buf *bp = bio->bio_buf;
struct thread *td = NULL;
int error = 0;
SMBVDEBUG("\n");
- if ((bp->b_flags & B_ASYNC) == 0)
+ if (bio->bio_flags & BIO_SYNC)
td = curthread; /* XXX */
- if ((bp->b_flags & B_ASYNC) == 0 )
+ if (bio->bio_flags & BIO_SYNC)
error = smbfs_doio(ap->a_vp, bio, proc0.p_ucred, td);
return error;
}
return (VOCALL(&spec_vnode_vops, ap));
}
-static void spec_getpages_iodone (struct bio *bio);
-
/*
* Open a special file.
*
KKASSERT(vp->v_rdev != NULL); /* XXX */
if (vn_isdisk(vp, NULL) && (mp = vp->v_rdev->si_mountpoint) != NULL) {
if (bp->b_cmd == BUF_CMD_READ) {
- if (bp->b_flags & B_ASYNC)
- mp->mnt_stat.f_asyncreads++;
- else
+ if (bio->bio_flags & BIO_SYNC)
mp->mnt_stat.f_syncreads++;
- } else {
- if (bp->b_flags & B_ASYNC)
- mp->mnt_stat.f_asyncwrites++;
else
+ mp->mnt_stat.f_asyncreads++;
+ } else {
+ if (bio->bio_flags & BIO_SYNC)
mp->mnt_stat.f_syncwrites++;
+ else
+ mp->mnt_stat.f_asyncwrites++;
}
}
return ((ap->a_flags & F_POSIX) ? EINVAL : EOPNOTSUPP);
}
-static void
-spec_getpages_iodone(struct bio *bio)
-{
- bio->bio_buf->b_cmd = BUF_CMD_DONE;
- wakeup(bio->bio_buf);
-}
-
/*
* spec_getpages() - get pages associated with device vnode.
*
}
bp->b_bio1.bio_offset = offset;
- bp->b_bio1.bio_done = spec_getpages_iodone;
+ bp->b_bio1.bio_done = biodone_sync;
+ bp->b_bio1.bio_flags |= BIO_SYNC;
mycpu->gd_cnt.v_vnodein++;
mycpu->gd_cnt.v_vnodepgsin += pcount;
/* Do the input. */
vn_strategy(ap->a_vp, &bp->b_bio1);
-
- crit_enter();
-
- /* We definitely need to be at splbio here. */
- while (bp->b_cmd != BUF_CMD_DONE)
- tsleep(bp, 0, "spread", 0);
-
- crit_exit();
+ biowait(&bp->b_bio1, "spread");
if (bp->b_flags & B_ERROR) {
if (bp->b_error)
*/
bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0);
bp->b_flags |= (B_INVAL | B_RELBUF);
- bp->b_flags &= ~B_ASYNC;
brelse(bp);
deallocated += fs->fs_bsize;
}
bp->b_cmd = BUF_CMD_READ;
if (bp->b_bcount > bp->b_bufsize)
panic("ffs_indirtrunc: bad buffer size");
+ /*
+ * BIO is bio2 which chains back to bio1. We wait
+ * on bio1.
+ */
bp->b_bio2.bio_offset = dbtodoff(fs, dbn);
+ bp->b_bio1.bio_done = biodone_sync;
+ bp->b_bio1.bio_flags |= BIO_SYNC;
vfs_busy_pages(vp, bp);
/*
* Access the block device layer using the device vnode
*/
bio_start_transaction(&bp->b_bio1, &vp->v_track_read);
vn_strategy(ip->i_devvp, &bp->b_bio2);
- error = biowait(bp);
+ error = biowait(&bp->b_bio1, "biord");
}
if (error) {
brelse(bp);
void ffs_rawread_setup(void);
-static void ffs_rawreadwakeup(struct bio *bio);
-
-
SYSCTL_DECL(_vfs_ffs);
static int ffsrawbufcnt = 4;
if (iolen != 0)
len -= PAGE_SIZE;
}
+
+ /*
+ * Raw disk address is in bio2, but we wait for it to
+ * chain to bio1.
+ */
bp->b_flags &= ~B_ERROR;
bp->b_loffset = loffset;
bp->b_bio2.bio_offset = NOOFFSET;
- bp->b_bio2.bio_done = ffs_rawreadwakeup;
+ bp->b_bio1.bio_done = biodone_sync;
+ bp->b_bio1.bio_flags |= BIO_SYNC;
blockoff = (loffset % bsize) / DEV_BSIZE;
}
}
- crit_enter();
- while (bp->b_cmd != BUF_CMD_DONE)
- tsleep((caddr_t)&bp->b_bio2, 0, "rawrd", 0);
- crit_exit();
+ biowait(&bp->b_bio1, "rawrd");
vunmapbuf(bp);
if (bp != NULL)
relpbuf(bp, &ffsrawbufcnt);
if (nbp != NULL) { /* Run down readahead buffer */
- crit_enter();
- while (nbp->b_cmd != BUF_CMD_DONE)
- tsleep(&nbp->b_bio2, 0, "rawrd", 0);
- crit_exit();
+ biowait(&nbp->b_bio1, "rawrd");
vunmapbuf(nbp);
relpbuf(nbp, &ffsrawbufcnt);
}
return 0;
}
-
-static void
-ffs_rawreadwakeup(struct bio *bio)
-{
- bio->bio_buf->b_cmd = BUF_CMD_DONE;
- wakeup(bio);
-}
if (!daddr)
panic("ufs_bmaparray: indirect block not in cache");
#endif
+ /*
+ * cached disk addr in bio2, do I/O on bio1. It
+ * will probably hit the vfs's strategy function
+ * which will then use the cached offset in bio2.
+ */
+ bp->b_bio1.bio_done = biodone_sync;
+ bp->b_bio1.bio_flags |= BIO_SYNC;
bp->b_bio2.bio_offset = fsbtodoff(fs, daddr);
bp->b_flags &= ~(B_INVAL|B_ERROR);
bp->b_cmd = BUF_CMD_READ;
vfs_busy_pages(bp->b_vp, bp);
vn_strategy(bp->b_vp, &bp->b_bio1);
- error = biowait(bp);
+ error = biowait(&bp->b_bio1, "biord");
if (error) {
brelse(bp);
return (error);
#define SWM_FREE 0x02 /* free, period */
#define SWM_POP 0x04 /* pop out */
-#define AUTOCHAINDONE ((struct buf *)(intptr_t)-1)
-
/*
* vm_swap_size is in page-sized chunks now. It was DEV_BSIZE'd chunks
* in the old system.
int nswap_hiwat = 512; /* in pages, swap_pager_almost_full warn */
static __inline void swp_sizecheck (void);
-static void swp_pager_sync_iodone (struct bio *bio);
static void swp_pager_async_iodone (struct bio *bio);
/*
vm_pindex_t biox_blkno = 0;
int count;
char *data;
- struct bio *biox = NULL;
- struct buf *bufx = NULL;
+ struct bio *biox;
+ struct buf *bufx;
struct bio_track *track;
/*
count = howmany(bp->b_bcount, PAGE_SIZE);
data = bp->b_data;
- crit_enter();
-
/*
* Deal with BUF_CMD_FREEBLKS
*/
* needed.
*/
swp_pager_meta_free(object, start, count);
- crit_exit();
bp->b_resid = 0;
biodone(bio);
return;
nbio->bio_caller_info1.cluster_head = NULL;
nbio->bio_caller_info2.cluster_tail = NULL;
+ biox = NULL;
+ bufx = NULL;
+
/*
* Execute read or write
*/
-
while (count > 0) {
daddr_t blk;
* Obtain block. If block not found and writing, allocate a
* new block and build it into the object.
*/
-
blk = swp_pager_meta_ctl(object, start, 0);
if ((blk == SWAPBLK_NONE) && bp->b_cmd != BUF_CMD_READ) {
blk = swp_pager_getswapspace(1);
* - we cross a physical disk boundry in the
* stripe.
*/
-
if (
biox && (biox_blkno + btoc(bufx->b_bcount) != blk ||
((biox_blkno ^ blk) & dmmax_mask)
)
) {
- crit_exit();
if (bp->b_cmd == BUF_CMD_READ) {
++mycpu->gd_cnt.v_swapin;
mycpu->gd_cnt.v_swappgsin += btoc(bufx->b_bcount);
}
/*
- * Flush the biox to the swap device.
+ * Finished with this buf.
*/
- if (bufx->b_bcount) {
- if (bufx->b_cmd != BUF_CMD_READ)
- bufx->b_dirtyend = bufx->b_bcount;
- BUF_KERNPROC(bufx);
- vn_strategy(swapdev_vp, biox);
- } else {
- biodone(biox);
- }
- crit_enter();
+ KKASSERT(bufx->b_bcount != 0);
+ if (bufx->b_cmd != BUF_CMD_READ)
+ bufx->b_dirtyend = bufx->b_bcount;
biox = NULL;
bufx = NULL;
}
bufx = getpbuf(NULL);
biox = &bufx->b_bio1;
cluster_append(nbio, bufx);
- bufx->b_flags |= (bufx->b_flags & B_ORDERED) |
- B_ASYNC;
+ bufx->b_flags |= (bufx->b_flags & B_ORDERED);
bufx->b_cmd = bp->b_cmd;
biox->bio_done = swap_chain_iodone;
biox->bio_offset = (off_t)blk << PAGE_SHIFT;
/*
* Flush out last buffer
*/
- crit_exit();
-
if (biox) {
- if ((bp->b_flags & B_ASYNC) == 0)
- bufx->b_flags &= ~B_ASYNC;
if (bufx->b_cmd == BUF_CMD_READ) {
++mycpu->gd_cnt.v_swapin;
mycpu->gd_cnt.v_swappgsin += btoc(bufx->b_bcount);
mycpu->gd_cnt.v_swappgsout += btoc(bufx->b_bcount);
bufx->b_dirtyend = bufx->b_bcount;
}
- if (bufx->b_bcount) {
- if (bufx->b_cmd != BUF_CMD_READ)
- bufx->b_dirtyend = bufx->b_bcount;
- BUF_KERNPROC(bufx);
- vn_strategy(swapdev_vp, biox);
- } else {
- biodone(biox);
- }
+ KKASSERT(bufx->b_bcount);
+ if (bufx->b_cmd != BUF_CMD_READ)
+ bufx->b_dirtyend = bufx->b_bcount;
/* biox, bufx = NULL */
}
/*
- * Wait for completion. Now that we are no longer using
- * cluster_append, use the cluster_tail field to indicate
- * auto-completion if there are still I/O's in progress.
+ * Now initiate all the I/O. Be careful looping on our chain as
+ * I/O's may complete while we are still initiating them.
*/
- if (bp->b_flags & B_ASYNC) {
- crit_enter();
- if (nbio->bio_caller_info1.cluster_head == NULL) {
- biodone(bio);
- } else {
- nbio->bio_caller_info2.cluster_tail = AUTOCHAINDONE;
- }
- crit_exit();
- } else {
- crit_enter();
- while (nbio->bio_caller_info1.cluster_head != NULL) {
- bp->b_flags |= B_WANT;
- tsleep(bp, 0, "bpchain", 0);
- }
- if (bp->b_resid != 0 && !(bp->b_flags & B_ERROR)) {
- bp->b_flags |= B_ERROR;
- bp->b_error = EINVAL;
- }
- biodone(bio);
- crit_exit();
+ nbio->bio_caller_info2.cluster_tail = NULL;
+ bufx = nbio->bio_caller_info1.cluster_head;
+
+ while (bufx) {
+ biox = &bufx->b_bio1;
+ BUF_KERNPROC(bufx);
+ bufx = bufx->b_cluster_next;
+ vn_strategy(swapdev_vp, biox);
}
+
+ /*
+ * Completion of the cluster will also call biodone_chain(nbio).
+ * We never call biodone(nbio) so we don't have to worry about
+ * setting up a bio_done callback. It's handled in the sub-IO.
+ */
+ /**/
}
static void
struct buf *bufx; /* chained sub-buffer */
struct bio *nbio; /* parent nbio with chain glue */
struct buf *bp; /* original bp associated with nbio */
+ int chain_empty;
bufx = biox->bio_buf;
nbio = biox->bio_caller_info1.cluster_parent;
*/
KKASSERT(bp != NULL);
if (bufx->b_flags & B_ERROR) {
- bp->b_flags |= B_ERROR;
+ atomic_set_int(&bufx->b_flags, B_ERROR);
bp->b_error = bufx->b_error;
} else if (bufx->b_resid != 0) {
- bp->b_flags |= B_ERROR;
+ atomic_set_int(&bufx->b_flags, B_ERROR);
bp->b_error = EINVAL;
} else {
- bp->b_resid -= bufx->b_bcount;
+ atomic_subtract_int(&bp->b_resid, bufx->b_bcount);
}
/*
- * Remove us from the chain. It is sufficient to clean up
- * cluster_head. Once the chain is operational cluster_tail
- * may be used to indicate AUTOCHAINDONE. Note that I/O's
- * can complete while the swap system is still appending new
- * BIOs to the chain.
+ * Remove us from the chain.
*/
+ spin_lock_wr(&bp->b_lock.lk_spinlock);
nextp = &nbio->bio_caller_info1.cluster_head;
while (*nextp != bufx) {
KKASSERT(*nextp != NULL);
nextp = &(*nextp)->b_cluster_next;
}
*nextp = bufx->b_cluster_next;
- if (bp->b_flags & B_WANT) {
- bp->b_flags &= ~B_WANT;
- wakeup(bp);
- }
+ chain_empty = (nbio->bio_caller_info1.cluster_head == NULL);
+ spin_unlock_wr(&bp->b_lock.lk_spinlock);
/*
- * Clean up bufx. If this was the last buffer in the chain
- * and AUTOCHAINDONE was set, finish off the original I/O
- * as well.
- *
- * nbio was just a fake BIO layer to hold the cluster links,
- * we can issue the biodone() on the layer above it.
+ * Clean up bufx. If the chain is now empty we finish out
+ * the parent. Note that we may be racing other completions
+ * so we must use the chain_empty status from above.
*/
- if (nbio->bio_caller_info1.cluster_head == NULL &&
- nbio->bio_caller_info2.cluster_tail == AUTOCHAINDONE
- ) {
- nbio->bio_caller_info2.cluster_tail = NULL;
+ if (chain_empty) {
if (bp->b_resid != 0 && !(bp->b_flags & B_ERROR)) {
- bp->b_flags |= B_ERROR;
+ atomic_set_int(&bp->b_flags, B_ERROR);
bp->b_error = EINVAL;
}
- biodone(nbio->bio_prev);
+ biodone_chain(nbio);
}
- bufx->b_flags &= ~B_ASYNC;
relpbuf(bufx, NULL);
}
* asynchronous
*/
if (sync == FALSE) {
- bp->b_flags |= B_ASYNC;
bio->bio_done = swp_pager_async_iodone;
BUF_KERNPROC(bp);
vn_strategy(swapdev_vp, bio);
}
/*
- * synchronous
- */
-
- bio->bio_done = swp_pager_sync_iodone;
- vn_strategy(swapdev_vp, bio);
-
- /*
+ * Issue synchrnously.
+ *
* Wait for the sync I/O to complete, then update rtvals.
* We just set the rtvals[] to VM_PAGER_PEND so we can call
* our async completion routine at the end, thus avoiding a
* double-free.
*/
- crit_enter();
-
- while (bp->b_cmd != BUF_CMD_DONE)
- tsleep(bp, 0, "swwrt", 0);
+ bio->bio_done = biodone_sync;
+ bio->bio_flags |= BIO_SYNC;
+ vn_strategy(swapdev_vp, bio);
+ biowait(bio, "swwrt");
for (j = 0; j < n; ++j)
rtvals[i+j] = VM_PAGER_PEND;
* Now that we are through with the bp, we can call the
* normal async completion, which frees everything up.
*/
-
swp_pager_async_iodone(bio);
-
- crit_exit();
}
}
}
/*
- * swap_pager_sync_iodone:
- *
- * Completion routine for synchronous reads and writes from/to swap.
- * We just mark the bp is complete and wake up anyone waiting on it.
- *
- * This routine may not block. This routine is called at splbio()
- * or better.
- */
-
-static void
-swp_pager_sync_iodone(struct bio *bio)
-{
- struct buf *bp = bio->bio_buf;
-
- bp->b_flags &= ~B_ASYNC;
- bp->b_cmd = BUF_CMD_DONE;
- wakeup(bp);
-}
-
-/*
* swp_pager_async_iodone:
*
* Completion routine for asynchronous reads and writes from/to swap.
*
* This routine may not block.
*/
-
static void
swp_pager_async_iodone(struct bio *bio)
{
/*
* remove the mapping for kernel virtual
*/
-
pmap_qremove((vm_offset_t)bp->b_data, bp->b_xio.xio_npages);
/*
* but do not free it in the rlist. The errornous block(s) are thus
* never reallocated as swap. Redirty the page and continue.
*/
-
for (i = 0; i < bp->b_xio.xio_npages; ++i) {
vm_page_t m = bp->b_xio.xio_pages[i];
*/
if (bp->b_cmd == BUF_CMD_READ)
nswptr = &nsw_rcount;
- else if (bp->b_flags & B_ASYNC)
- nswptr = &nsw_wcount_async;
- else
+ else if (bio->bio_flags & BIO_SYNC)
nswptr = &nsw_wcount_sync;
+ else
+ nswptr = &nsw_wcount_async;
bp->b_cmd = BUF_CMD_DONE;
relpbuf(bp, nswptr);
crit_exit();