* (for intel/amd anyhow) is not strictly needed as cache bus resource use
* is governed by the lazy update.
*
- * WARNING!!!! Performance matters here, by a huge margin. There are still
- * a few bottlenecks in the kernel (e.g. the PQ_INACTIVE
- * vm_page_queue) where things like parallel compiles hit up
- * against full all-cores contention right here.
+ * WARNING!!!! Performance matters here, by a huge margin.
*
- * 48-core test with pre-read / -j 48 no-modules kernel compile
- * came in at 75 seconds. Without pre-read it came in at 170 seconds.
+ * 48-core test with pre-read / -j 48 no-modules kernel compile
+ * with fanned-out inactive and active queues came in at 55 seconds.
*
- * 4-core test with pre-read / -j 48 no-modules kernel compile
- * came in at 83 seconds. Without pre-read it came in at 83 seconds
- * as well (no difference).
+ * 48-core test with pre-read / -j 48 no-modules kernel compile
+ * came in at 75 seconds. Without pre-read it came in at 170 seconds.
+ *
+ * 4-core test with pre-read / -j 48 no-modules kernel compile
+ * came in at 83 seconds. Without pre-read it came in at 83 seconds
+ * as well (no difference).
*/
void
spin_lock_contested(struct spinlock *spin)
}
spin_unlock(&bufqspin);
+ lwkt_yield();
spun = 0;
if (LIST_FIRST(&bp->b_dep) != NULL &&
* the potentially conflicting pv and
* re-checking.
*/
+ lwkt_yield();
if (*ptep == 0) {
pte_pv = pv_find(pmap, pmap_pte_pindex(sva));
if (pte_pv == NULL) {
info->addr + x86_64_ptob(rel_index), p);
}
vm_page_wakeup(p);
+ lwkt_yield();
return(0);
}
}
#endif
/*
- * Don't use a locked instruction here.
+ * Don't use a locked instruction here. To reduce latency we avoid
+ * reading spin->counta prior to writing to it.
*/
+#ifdef DEBUG_LOCKS
KKASSERT(spin->counta != 0);
+#endif
cpu_sfence();
spin->counta = 0;
cpu_sfence();
#endif
+#ifdef DEBUG_LOCKS
KKASSERT(gd->gd_spinlocks_wr > 0);
+#endif
--gd->gd_spinlocks_wr;
cpu_ccfence();
--gd->gd_curthread->td_critcount;
#if 0
+ /* FUTURE */
if (__predict_false(gd->gd_reqflags & RQF_IDLECHECK_MASK))
lwkt_maybe_splz(gd->gd_curthread);
#endif
} else {
info->error = 1;
}
+ lwkt_yield();
return(0);
}
* the cleaning pass.
*/
if ((p->flags & PG_CLEANCHK) == 0)
- return(0);
+ goto done;
generation = info->object->generation;
vm_page_busy_wait(p, TRUE, "vpcwai");
info->object->generation != generation) {
info->error = 1;
vm_page_wakeup(p);
- return(0);
+ goto done;
}
/*
if (p->valid == 0 || (p->queue - p->pc) == PQ_CACHE) {
KKASSERT((p->dirty & p->valid) == 0);
vm_page_wakeup(p);
- return(0);
+ goto done;
}
/*
if ((p->dirty & p->valid) == 0) {
vm_page_flag_clear(p, PG_CLEANCHK);
vm_page_wakeup(p);
- return(0);
+ goto done;
}
/*
if ((info->limit & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) {
vm_page_flag_clear(p, PG_CLEANCHK);
vm_page_wakeup(p);
- return(0);
+ goto done;
}
/*
* we raced an object modification.
*/
vm_object_page_collect_flush(info->object, p, info->pagerflags);
+done:
+ lwkt_yield();
return(0);
}
}
} else if (mt->valid == 0) {
if (error == 0) {
- kprintf("page failed but no I/O error page %p object %p pindex %d\n", mt, mt->object, (int) mt->pindex);
+ kprintf("page failed but no I/O error page "
+ "%p object %p pindex %d\n",
+ mt, mt->object, (int) mt->pindex);
/* whoops, something happened */
error = EINVAL;
}