*/
for (i = 0; i < mem->am_size; i += PAGE_SIZE) {
/*
- * Find a page from the object and wire it
- * down. This page will be mapped using one or more
- * entries in the GATT (assuming that PAGE_SIZE >=
- * AGP_PAGE_SIZE. If this is the first call to bind,
- * the pages will be allocated and zeroed.
+ * Find a page from the object and wire it down. This page
+ * will be mapped using one or more entries in the GATT
+ * (assuming that PAGE_SIZE >= AGP_PAGE_SIZE. If this is
+ * the first call to bind, the pages will be allocated
+ * and zeroed.
*/
m = vm_page_grab(mem->am_obj, OFF_TO_IDX(i),
- VM_ALLOC_NORMAL | VM_ALLOC_ZERO | VM_ALLOC_RETRY);
- if ((m->flags & PG_ZERO) == 0)
- vm_page_zero_fill(m);
+ VM_ALLOC_NORMAL | VM_ALLOC_ZERO |
+ VM_ALLOC_RETRY);
AGP_DPF("found page pa=%#x\n", VM_PAGE_TO_PHYS(m));
vm_page_wire(m);
*/
vm_page_t m;
- m = vm_page_grab(mem->am_obj, 0,
- VM_ALLOC_NORMAL|VM_ALLOC_ZERO|VM_ALLOC_RETRY);
- if ((m->flags & PG_ZERO) == 0)
- vm_page_zero_fill(m);
+ m = vm_page_grab(mem->am_obj, 0, VM_ALLOC_NORMAL |
+ VM_ALLOC_ZERO |
+ VM_ALLOC_RETRY);
vm_page_wire(m);
mem->am_physical = VM_PAGE_TO_PHYS(m);
vm_page_wakeup(m);
static struct lwp *
lwp_fork(struct lwp *origlp, struct proc *destproc, int flags)
{
+ globaldata_t gd = mycpu;
struct lwp *lp;
struct thread *td;
* scheduler specific data.
*/
crit_enter();
- lp->lwp_cpbase = mycpu->gd_schedclock.time -
- mycpu->gd_schedclock.periodic;
+ lp->lwp_cpbase = gd->gd_schedclock.time - gd->gd_schedclock.periodic;
destproc->p_usched->heuristic_forking(origlp, lp);
crit_exit();
lp->lwp_cpumask &= usched_mastermask;
- td = lwkt_alloc_thread(NULL, LWKT_THREAD_STACK, -1, 0);
+ /*
+ * Assign the thread to the current cpu to begin with so we
+ * can manipulate it.
+ */
+ td = lwkt_alloc_thread(NULL, LWKT_THREAD_STACK, gd->gd_cpuid, 0);
lp->lwp_thread = td;
td->td_proc = destproc;
td->td_lwp = lp;
destproc->p_lasttid = lp->lwp_tid;
destproc->p_nthreads++;
-
return (lp);
}
*/
if (emergency_intr_thread.td_kstack == NULL) {
lwkt_create(ithread_emergency, NULL, NULL, &emergency_intr_thread,
- TDF_STOPREQ | TDF_INTTHREAD, -1, "ithread emerg");
+ TDF_STOPREQ | TDF_INTTHREAD, ncpus - 1, "ithread emerg");
systimer_init_periodic_nq(&emergency_intr_timer,
emergency_intr_timer_callback, &emergency_intr_thread,
(emergency_intr_enable ? emergency_intr_freq : 1));
/*
* Create an interrupt thread if necessary, leave it in an unscheduled
* state.
+ *
+ * Put it on cpu 0 for now, other work is pending related to this.
*/
if (info->i_state == ISTATE_NOTHREAD) {
info->i_state = ISTATE_NORMAL;
lwkt_create(ithread_handler, (void *)(intptr_t)intr, NULL,
- &info->i_thread, TDF_STOPREQ | TDF_INTTHREAD, -1,
+ &info->i_thread, TDF_STOPREQ | TDF_INTTHREAD, 0,
"ithread %d", intr);
if (intr >= FIRST_SOFTINT)
lwkt_setpri(&info->i_thread, TDPRI_SOFT_NORM);
return 0;
}
+#if 0
/*
* Same as kthread_create() but you can specify a custom stack size.
*/
lwkt_schedule(td);
return 0;
}
+#endif
/*
* Destroy an LWKT thread. Warning! This function is not called when
}
}
lwkt_reltoken(&p->p_token);
+ lwkt_yield();
PRELE(p);
return(0);
}
break;
}
lwkt_reltoken(&p->p_token);
+ lwkt_yield();
PRELE(p);
return(0);
}
default:
break;
}
+ lwkt_yield();
return(0);
}
int mpsafe = 1;
#endif
- lwkt_setpri_self(TDPRI_SOFT_NORM);
+ /*
+ * Run the callout thread at the same priority as other kernel
+ * threads so it can be round-robined.
+ */
+ /*lwkt_setpri_self(TDPRI_SOFT_NORM);*/
sc = arg;
crit_enter();
thread_t
lwkt_alloc_thread(struct thread *td, int stksize, int cpu, int flags)
{
+ static int cpu_rotator;
globaldata_t gd = mycpu;
void *stack;
stack = (void *)kmem_alloc_stack(&kernel_map, stksize);
flags |= TDF_ALLOCATED_STACK;
}
- if (cpu < 0)
- lwkt_init_thread(td, stack, stksize, flags, gd);
- else
- lwkt_init_thread(td, stack, stksize, flags, globaldata_find(cpu));
+ if (cpu < 0) {
+ cpu = ++cpu_rotator;
+ cpu_ccfence();
+ cpu %= ncpus;
+ }
+ lwkt_init_thread(td, stack, stksize, flags, globaldata_find(cpu));
return(td);
}
/*
- * This implements a normal yield. This routine is virtually a nop if
- * there is nothing to yield to but it will always run any pending interrupts
- * if called from a critical section.
+ * This implements a LWKT yield, allowing a kernel thread to yield to other
+ * kernel threads at the same or higher priority. This function can be
+ * called in a tight loop and will typically only yield once per tick.
*
- * This yield is designed for kernel threads without a user context.
+ * Most kernel threads run at the same priority in order to allow equal
+ * sharing.
*
* (self contained on a per cpu basis)
*/
void
lwkt_setpri(thread_t td, int pri)
{
- KKASSERT(td->td_gd == mycpu);
if (td->td_pri != pri) {
KKASSERT(pri >= 0);
crit_enter();
if (td->td_flags & TDF_RUNQ) {
+ KKASSERT(td->td_gd == mycpu);
_lwkt_dequeue(td);
td->td_pri = pri;
_lwkt_enqueue(td);
* Create a kernel process/thread/whatever. It shares it's address space
* with proc0 - ie: kernel only.
*
- * NOTE! By default new threads are created with the MP lock held. A
- * thread which does not require the MP lock should release it by calling
- * rel_mplock() at the start of the new thread.
+ * If the cpu is not specified one will be selected. In the future
+ * specifying a cpu of -1 will enable kernel thread migration between
+ * cpus.
*/
int
lwkt_create(void (*func)(void *), void *arg, struct thread **tdp,
lwkt_gettoken(&disklist_token);
lwkt_create(disk_msg_core, /*args*/NULL, &td_core, NULL,
- 0, 0, "disk_msg_core");
+ 0, -1, "disk_msg_core");
tsleep(td_core, 0, "diskcore", 0);
lwkt_reltoken(&disklist_token);
}
goto retry_lookup;
}
if (pg == NULL) {
- pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL);
+ pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL |
+ VM_ALLOC_NULL_OK);
if (pg == NULL) {
vm_wait(0);
vm_object_drop(obj);
struct nchandle nch;
int error;
+ *freebuf = NULL;
atomic_add_int(&numfullpathcalls, 1);
if (disablefullpath)
return (ENODEV);
*/
if ((ptdpg = pmap->pm_pdirm) == NULL) {
ptdpg = vm_page_grab(pmap->pm_pteobj, PTDPTDI,
- VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
+ VM_ALLOC_NORMAL | VM_ALLOC_RETRY |
+ VM_ALLOC_ZERO);
pmap->pm_pdirm = ptdpg;
vm_page_flag_clear(ptdpg, PG_MAPPED);
vm_page_wire(ptdpg);
- ptdpg->valid = VM_PAGE_BITS_ALL;
+ KKASSERT(ptdpg->valid == VM_PAGE_BITS_ALL);
pmap_kenter((vm_offset_t)pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
vm_page_wakeup(ptdpg);
}
- if ((ptdpg->flags & PG_ZERO) == 0)
- bzero(pmap->pm_pdir, PAGE_SIZE);
-#ifdef PMAP_DEBUG
- else
- pmap_page_assertzero(VM_PAGE_TO_PHYS(ptdpg));
-#endif
-
pmap->pm_pdir[MPPTDI] = PTD[MPPTDI];
/* install self-referential address mapping entry */
vm_page_t m;
/*
- * Find or fabricate a new pagetable page
+ * Find or fabricate a new pagetable page. Setting VM_ALLOC_ZERO
+ * will zero any new page and mark it valid.
*/
m = vm_page_grab(pmap->pm_pteobj, ptepindex,
- VM_ALLOC_NORMAL | VM_ALLOC_ZERO | VM_ALLOC_RETRY);
+ VM_ALLOC_NORMAL | VM_ALLOC_ZERO | VM_ALLOC_RETRY);
KASSERT(m->queue == PQ_NONE,
("_pmap_allocpte: %p->queue != PQ_NONE", m));
* Set the page table hint
*/
pmap->pm_ptphint = m;
-
- /*
- * Try to use the new mapping, but if we cannot, then
- * do it with the routine that maps the page explicitly.
- */
- if (m->valid == 0) {
- if ((m->flags & PG_ZERO) == 0) {
- if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
- (((unsigned) PTDpde) & PG_FRAME)) {
- pteva = UPT_MIN_ADDRESS + i386_ptob(ptepindex);
- bzero((caddr_t) pteva, PAGE_SIZE);
- } else {
- pmap_zero_page(ptepa);
- }
- }
- m->valid = VM_PAGE_BITS_ALL;
- vm_page_flag_clear(m, PG_ZERO);
- } else {
- KKASSERT((m->flags & PG_ZERO) == 0);
- }
-
vm_page_flag_set(m, PG_MAPPED);
vm_page_wakeup(m);
#include <machine/pcb.h>
#include <machine/smp.h>
#include <machine/thread.h>
+#include <machine/clock.h>
#include <machine/vmparam.h>
#include <machine/md_var.h>
#include <machine_base/isa/isa_intr.h>
static int ddb_on_seg_fault = 0;
SYSCTL_INT(_machdep, OID_AUTO, ddb_on_seg_fault, CTLFLAG_RW,
&ddb_on_seg_fault, 0, "Go to DDB on user seg-fault");
+static int freeze_on_seg_fault = 0;
+SYSCTL_INT(_machdep, OID_AUTO, freeze_on_seg_fault, CTLFLAG_RW,
+ &freeze_on_seg_fault, 0, "Go to DDB on user seg-fault");
#endif
static int panic_on_nmi = 1;
SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW,
&slow_release, 0, "Passive Release was nonoptimal");
/*
+ * System call debugging records the worst-case system call
+ * overhead (inclusive of blocking), but may be inaccurate.
+ */
+/*#define SYSCALL_DEBUG*/
+#ifdef SYSCALL_DEBUG
+uint64_t SysCallsWorstCase[SYS_MAXSYSCALL];
+#endif
+
+/*
* Passively intercepts the thread switch function to increase
* the thread priority from a user priority to a kernel priority, reducing
* syscall and trap overhead for the case where no switch occurs.
case T_PAGEFLT: /* page fault */
i = trap_pfault(frame, TRUE);
- if (frame->tf_rip == 0)
+ if (frame->tf_rip == 0) {
kprintf("T_PAGEFLT: Warning %%rip == 0!\n");
+ while (freeze_on_seg_fault) {
+ tsleep(p, 0, "freeze", hz * 20);
+ }
+ }
if (i == -1)
goto out;
if (i == 0)
*/
p = td->td_proc;
if (td->td_lwp->lwp_vkernel == NULL) {
- if (bootverbose)
+ if (bootverbose || freeze_on_seg_fault || ddb_on_seg_fault) {
kprintf("seg-fault ft=%04x ff=%04x addr=%p rip=%p "
"pid=%d p_comm=%s\n",
ftype, fault_flags,
(void *)frame->tf_addr,
(void *)frame->tf_rip,
p->p_pid, p->p_comm);
+ }
#ifdef DDB
+ while (freeze_on_seg_fault) {
+ tsleep(p, 0, "freeze", hz * 20);
+ }
if (ddb_on_seg_fault)
Debugger("ddb_on_seg_fault");
#endif
* NOTE: All system calls run MPSAFE now. The system call itself
* is responsible for getting the MP lock.
*/
+#ifdef SYSCALL_DEBUG
+ uint64_t tscval = rdtsc();
+#endif
error = (*callp->sy_call)(&args);
+#ifdef SYSCALL_DEBUG
+ tscval = rdtsc() - tscval;
+ tscval = tscval * 1000000 / tsc_frequency;
+ if (SysCallsWorstCase[code] < tscval)
+ SysCallsWorstCase[code] = tscval;
+#endif
out:
/*
* allocate the page directory page
*/
ptdpg = vm_page_grab(pmap->pm_pteobj, pmap->pm_pdindex,
- VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
+ VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_ZERO);
ptdpg->wire_count = 1;
atomic_add_int(&vmstats.v_wire_count, 1);
/* not usually mapped */
- ptdpg->valid = VM_PAGE_BITS_ALL;
vm_page_flag_clear(ptdpg, PG_MAPPED);
vm_page_wakeup(ptdpg);
pmap_kenter((vm_offset_t)pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
pmap->pm_pdirpte = KernelPTA[(vm_offset_t)pmap->pm_pdir >> PAGE_SHIFT];
- if ((ptdpg->flags & PG_ZERO) == 0)
- bzero(pmap->pm_pdir, PAGE_SIZE);
- vm_page_flag_clear(ptdpg, PG_ZERO);
pmap->pm_count = 1;
pmap->pm_active = 0;
*/
m = vm_page_grab(pmap->pm_pteobj, ptepindex,
VM_ALLOC_NORMAL | VM_ALLOC_ZERO | VM_ALLOC_RETRY);
-
- if (m->valid == 0) {
- if ((m->flags & PG_ZERO) == 0)
- pmap_zero_page(VM_PAGE_TO_PHYS(m));
- m->valid = VM_PAGE_BITS_ALL;
- vm_page_flag_clear(m, PG_ZERO);
- } else {
- KKASSERT((m->flags & PG_ZERO) == 0);
- }
vm_page_flag_set(m, PG_MAPPED);
KASSERT(m->queue == PQ_NONE,
* already be set appropriately.
*/
if ((ptdpg = pmap->pm_pdirm) == NULL) {
- ptdpg = vm_page_grab(pmap->pm_pteobj, NUPDE + NUPDPE + PML4PML4I,
- VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
+ ptdpg = vm_page_grab(pmap->pm_pteobj,
+ NUPDE + NUPDPE + PML4PML4I,
+ VM_ALLOC_NORMAL | VM_ALLOC_RETRY |
+ VM_ALLOC_ZERO);
pmap->pm_pdirm = ptdpg;
vm_page_flag_clear(ptdpg, PG_MAPPED);
- ptdpg->valid = VM_PAGE_BITS_ALL;
if (ptdpg->wire_count == 0)
atomic_add_int(&vmstats.v_wire_count, 1);
ptdpg->wire_count = 1;
vm_page_wakeup(ptdpg);
pmap_kenter((vm_offset_t)pmap->pm_pml4, VM_PAGE_TO_PHYS(ptdpg));
}
- if ((ptdpg->flags & PG_ZERO) == 0)
- bzero(pmap->pm_pml4, PAGE_SIZE);
- vm_page_flag_clear(ptdpg, PG_ZERO);
-
pmap->pm_count = 1;
pmap->pm_active = 0;
pmap->pm_ptphint = NULL;
* races by checking m->valid.
*/
m = vm_page_grab(pmap->pm_pteobj, ptepindex,
- VM_ALLOC_NORMAL | VM_ALLOC_ZERO | VM_ALLOC_RETRY);
-
- if (m->valid == 0) {
- if ((m->flags & PG_ZERO) == 0) {
- pmap_zero_page(VM_PAGE_TO_PHYS(m));
- }
- m->valid = VM_PAGE_BITS_ALL;
- vm_page_flag_clear(m, PG_ZERO);
- } else {
- KKASSERT((m->flags & PG_ZERO) == 0);
- }
+ VM_ALLOC_NORMAL | VM_ALLOC_ZERO | VM_ALLOC_RETRY);
KASSERT(m->queue == PQ_NONE,
("_pmap_allocpte: %p->queue != PQ_NONE", m));
const char *, ...) __printflike(4, 5);
int kthread_create_cpu (void (*)(void *), void *, struct thread **,
int, const char *, ...) __printflike(5, 6);
-int kthread_create_stk (void (*)(void *), void *, struct thread **,
- int, const char *, ...) __printflike(5, 6);
void kthread_exit (void) __dead2;
#endif /* _KERNEL */
lockmgr(&devfs_lock, LK_EXCLUSIVE);
lwkt_create(devfs_msg_core, /*args*/NULL, &td_core, NULL,
- 0, 0, "devfs_msg_core");
+ 0, -1, "devfs_msg_core");
while (devfs_run == 0)
lksleep(td_core, &devfs_lock, 0, "devfsc", 0);
lockmgr(&devfs_lock, LK_RELEASE);
txcpu = 1;
break;
default:
- rxcpu = 1;
- txcpu = 2;
+ rxcpu = -1;
+ txcpu = -1;
break;
}
#else
page, pageq);
vm_object_hold(object);
vm_page_free(*mpp);
- vm_page_insert(page, object, offset);
+ if (vm_page_insert(page, object, offset) == FALSE) {
+ panic("dev_pager_getpage: page (%p,%ld) exists",
+ object, offset);
+ }
vm_object_drop(object);
}
mtx_unlock(&dev_pager_mtx);
--info->endi;
if ((int)info->begi < 0 || (int)info->endi < 0)
return(-1);
+ lwkt_yield();
return(0);
}
vm_object_drop(object);
return(VM_PAGER_OK);
} else if (m == NULL) {
+ /*
+ * Use VM_ALLOC_QUICK to avoid blocking on cache
+ * page reuse.
+ */
m = vm_page_alloc(object, mreq->pindex + 1,
VM_ALLOC_QUICK);
if (m == NULL) {
if (error) {
break;
} else if (m == NULL) {
+ /*
+ * Use VM_ALLOC_QUICK to avoid blocking on cache
+ * page reuse.
+ */
m = vm_page_alloc(object, mreq->pindex + i,
VM_ALLOC_QUICK);
if (m == NULL)
tmp_addr = addr;
for (i = start; i < (start + size / PAGE_SIZE); i++) {
vm_page_t m = &pga[i];
- vm_page_insert(m, &kernel_object, OFF_TO_IDX(tmp_addr));
+ if (vm_page_insert(m, &kernel_object, OFF_TO_IDX(tmp_addr)) ==
+ FALSE) {
+ panic("vm_contig_pg_kmap: page already exists @%p",
+ (void *)(intptr_t)tmp_addr);
+ }
if ((flags & M_ZERO) && !(m->flags & PG_ZERO))
pmap_zero_page(VM_PAGE_TO_PHYS(m));
m->flags = 0;
}
/*
+ * Update the pmap. We really only have to do this if a COW
+ * occured to replace the read-only page with the new page. For
+ * now just do it unconditionally. XXX
+ */
+ pmap_enter(fs.map->pmap, vaddr, fs.m, fs.prot, fs.wired);
+ vm_page_flag_set(fs.m, PG_REFERENCED);
+
+ /*
* On success vm_fault_object() does not unlock or deallocate, and fs.m
- * will contain a busied page.
+ * will contain a busied page. So we must unlock here after having
+ * messed with the pmap.
*/
unlock_things(&fs);
vm_page_dirty(fs.m);
/*
- * Update the pmap. We really only have to do this if a COW
- * occured to replace the read-only page with the new page. For
- * now just do it unconditionally. XXX
- */
- pmap_enter(fs.map->pmap, vaddr, fs.m, fs.prot, fs.wired);
- vm_page_flag_set(fs.m, PG_REFERENCED);
-
- /*
* Unbusy the page by activating it. It remains held and will not
* be reclaimed.
*/
}
/*
- * On success vm_fault_object() does not unlock or deallocate, and fs.m
- * will contain a busied page.
+ * On success vm_fault_object() does not unlock or deallocate, so we
+ * do it here. Note that the returned fs.m will be busied.
*/
unlock_things(&fs);
for (;;) {
/*
+ * The entire backing chain from first_object to object
+ * inclusive is chainlocked.
+ *
* If the object is dead, we stop here
*/
if (fs->object->flags & OBJ_DEAD) {
/*
* Allocate a new page for this object/offset pair.
+ *
+ * It is possible for the allocation to race, so
+ * handle the case.
*/
fs->m = NULL;
if (!vm_page_count_severe()) {
fs->m = vm_page_alloc(fs->object, pindex,
((fs->vp || fs->object->backing_object) ?
- VM_ALLOC_NORMAL :
- VM_ALLOC_NORMAL | VM_ALLOC_ZERO));
+ VM_ALLOC_NULL_OK | VM_ALLOC_NORMAL :
+ VM_ALLOC_NULL_OK | VM_ALLOC_NORMAL |
+ VM_ALLOC_ZERO));
}
if (fs->m == NULL) {
vm_object_pip_wakeup(fs->first_object);
* pager has it, and potentially fault in additional pages
* at the same time.
*
- * We are NOT in splvm here and if TRYPAGER is true then
- * fs.m will be non-NULL and will be PG_BUSY for us.
+ * If TRYPAGER is true then fs.m will be non-NULL and busied
+ * for us.
*/
if (TRYPAGER(fs)) {
int rv;
*/
do {
dst_m = vm_page_alloc(dst_object,
- OFF_TO_IDX(dst_offset), VM_ALLOC_NORMAL);
+ OFF_TO_IDX(dst_offset),
+ VM_ALLOC_NORMAL);
if (dst_m == NULL) {
vm_wait(0);
}
i = 0;
while (tpindex < pindex) {
- rtm = vm_page_alloc(object, tpindex, VM_ALLOC_SYSTEM);
+ rtm = vm_page_alloc(object, tpindex, VM_ALLOC_SYSTEM |
+ VM_ALLOC_NULL_OK);
if (rtm == NULL) {
for (j = 0; j < i; j++) {
vm_page_free(marray[j]);
while (tpindex < endpindex) {
if (vm_page_lookup(object, tpindex))
break;
- rtm = vm_page_alloc(object, tpindex, VM_ALLOC_SYSTEM);
+ rtm = vm_page_alloc(object, tpindex, VM_ALLOC_SYSTEM |
+ VM_ALLOC_NULL_OK);
if (rtm == NULL)
break;
marray[i] = rtm;
* vm_map_entry via the normal fault code. Do NOT call this
* shortcut unless the normal fault code has run on this entry.
*
+ * The related map must be locked.
* No other requirements.
*/
static int vm_prefault_pages = 8;
int error;
/*
+ * This can eat a lot of time on a heavily contended
+ * machine so yield on the tick if needed.
+ */
+ if ((i & 7) == 7)
+ lwkt_yield();
+
+ /*
* Calculate the page to pre-fault, stopping the scan in
* each direction separately if the limit is reached.
*/
* NOTE: Allocated from base object
*/
m = vm_page_alloc(object, index,
- VM_ALLOC_NORMAL | VM_ALLOC_ZERO);
+ VM_ALLOC_NORMAL |
+ VM_ALLOC_ZERO |
+ VM_ALLOC_NULL_OK);
+ if (m == NULL)
+ break;
if ((m->flags & PG_ZERO) == 0) {
vm_page_zero_fill(m);
vm_page_t mem;
mem = vm_page_grab(&kernel_object, OFF_TO_IDX(addr + i),
- VM_ALLOC_ZERO | VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
- if ((mem->flags & PG_ZERO) == 0)
- vm_page_zero_fill(mem);
- mem->valid = VM_PAGE_BITS_ALL;
- vm_page_flag_clear(mem, PG_ZERO);
+ VM_ALLOC_FORCE_ZERO | VM_ALLOC_NORMAL |
+ VM_ALLOC_RETRY);
vm_page_wakeup(mem);
}
vm_object_drop(&kernel_object);
* Copies the contents of the source entry to the destination
* entry. The entries *must* be aligned properly.
*
- * The vm_map must be exclusively locked.
+ * The vm_maps must be exclusively locked.
* The vm_map's token must be held.
+ *
+ * Because the maps are locked no faults can be in progress during the
+ * operation.
*/
static void
vm_map_copy_entry(vm_map_t src_map, vm_map_t dst_map,
}
/*
- * This releases the entire chain starting with object and recursing
- * through backing_object until stopobj is encountered. stopobj is
- * not released. The caller will typically release stopobj manually
- * before making this call (as the deepest object is the most likely
- * to collide with other threads).
+ * This releases the entire chain of objects from first_object to and
+ * including stopobj, flowing through object->backing_object.
*
- * object and stopobj must be held by the caller. This code looks a
- * bit odd but has been optimized fairly heavily.
+ * We release stopobj first as an optimization as this object is most
+ * likely to be shared across multiple processes.
*/
void
vm_object_chain_release_all(vm_object_t first_object, vm_object_t stopobj)
vm_page_remove(p);
vm_page_wakeup(p);
}
+ lwkt_yield();
return(0);
}
* This routine may not block.
* This routine must be called with the vm_object held.
* This routine must be called with a critical section held.
+ *
+ * This routine returns TRUE if the page was inserted into the object
+ * successfully, and FALSE if the page already exists in the object.
*/
-void
+int
vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
{
ASSERT_LWKT_TOKEN_HELD(vm_object_token(object));
panic("vm_page_insert: already inserted");
object->generation++;
- object->resident_page_count++;
/*
* Record the object/offset pair in this page and add the
vm_page_spin_lock(m);
m->object = object;
m->pindex = pindex;
- vm_page_rb_tree_RB_INSERT(&object->rb_memq, m);
+ if (vm_page_rb_tree_RB_INSERT(&object->rb_memq, m)) {
+ m->object = NULL;
+ m->pindex = 0;
+ vm_page_spin_unlock(m);
+ return FALSE;
+ }
+ object->resident_page_count++;
/* atomic_add_int(&object->agg_pv_list_count, m->md.pv_list_count); */
vm_page_spin_unlock(m);
* Checks for a swap assignment and sets PG_SWAPPED if appropriate.
*/
swap_pager_page_inserted(m);
+ return TRUE;
}
/*
ASSERT_LWKT_TOKEN_HELD(vm_object_token(m->object));
vm_page_remove(m);
}
- vm_page_insert(m, new_object, new_pindex);
+ if (vm_page_insert(m, new_object, new_pindex) == FALSE) {
+ panic("vm_page_rename: target exists (%p,%ld)",
+ new_object, new_pindex);
+ }
if (m->queue - m->pc == PQ_CACHE)
vm_page_deactivate(m);
vm_page_dirty(m);
* Allocate and return a memory cell associated with this VM object/offset
* pair. If object is NULL an unassociated page will be allocated.
*
- * page_req classes:
+ * The returned page will be busied and removed from its queues. This
+ * routine can block and may return NULL if a race occurs and the page
+ * is found to already exist at the specified (object, pindex).
*
* VM_ALLOC_NORMAL allow use of cache pages, nominal free drain
* VM_ALLOC_QUICK like normal but cannot use cache
* VM_ALLOC_SYSTEM greater free drain
* VM_ALLOC_INTERRUPT allow free list to be completely drained
- * VM_ALLOC_ZERO advisory request for pre-zero'd page
- *
- * The object must be locked if not NULL
+ * VM_ALLOC_ZERO advisory request for pre-zero'd page only
+ * VM_ALLOC_FORCE_ZERO advisory request for pre-zero'd page only
+ * VM_ALLOC_NULL_OK ok to return NULL on insertion collision
+ * (see vm_page_grab())
+ * The object must be held if not NULL
* This routine may not block
- * The returned page will be marked PG_BUSY
*
* Additional special handling is required when called from an interrupt
* (VM_ALLOC_INTERRUPT). We are not allowed to mess with the page cache
if (object) {
pg_color = mycpu->gd_cpuid + (pindex & ~ncpus_fit_mask) +
(object->pg_color & ~ncpus_fit_mask);
- KASSERT(vm_page_lookup(object, pindex) == NULL,
- ("vm_page_alloc: page already allocated"));
} else {
pg_color = mycpu->gd_cpuid + (pindex & ~ncpus_fit_mask);
}
*/
if (object) {
pg_color = object->pg_color + pindex;
- KASSERT(vm_page_lookup(object, pindex) == NULL,
- ("vm_page_alloc: page already allocated"));
} else {
pg_color = pindex;
}
/*
* The free queue has sufficient free pages to take one out.
*/
- if (page_req & VM_ALLOC_ZERO)
+ if (page_req & (VM_ALLOC_ZERO | VM_ALLOC_FORCE_ZERO))
m = vm_page_select_free(pg_color, TRUE);
else
m = vm_page_select_free(pg_color, FALSE);
*/
if (m != NULL) {
KASSERT(m->dirty == 0,
- ("Found dirty cache page %p", m));
+ ("Found dirty cache page %p", m));
vm_page_protect(m, VM_PROT_NONE);
vm_page_free(m);
goto loop;
}
/*
- * Good page found. The page has already been busied for us.
- *
* v_free_count can race so loop if we don't find the expected
* page.
*/
if (m == NULL)
goto loop;
- KASSERT(m->dirty == 0,
- ("vm_page_alloc: free/cache page %p was dirty", m));
/*
- * NOTE: page has already been removed from its queue and busied.
+ * Good page found. The page has already been busied for us and
+ * removed from its queues.
*/
+ KASSERT(m->dirty == 0,
+ ("vm_page_alloc: free/cache page %p was dirty", m));
KKASSERT(m->queue == PQ_NONE);
/*
- * Initialize structure. Only the PG_ZERO flag is inherited. Set
- * the page PG_BUSY
+ * Initialize the structure, inheriting some flags but clearing
+ * all the rest. The page has already been busied for us.
*/
- vm_page_flag_clear(m, ~(PG_ZERO | PG_BUSY));
+ vm_page_flag_clear(m, ~(PG_ZERO | PG_BUSY | PG_SBUSY));
KKASSERT(m->wire_count == 0);
KKASSERT(m->busy == 0);
m->act_count = 0;
* NOTE: If no object an unassociated page is allocated, m->pindex
* can be used by the caller for any purpose.
*/
- if (object)
- vm_page_insert(m, object, pindex);
- else
+ if (object) {
+ if (vm_page_insert(m, object, pindex) == FALSE) {
+ kprintf("PAGE RACE (%p:%d,%ld)\n",
+ object, object->type, pindex);
+ vm_page_free(m);
+ m = NULL;
+ if ((page_req & VM_ALLOC_NULL_OK) == 0)
+ panic("PAGE RACE");
+ }
+ } else {
m->pindex = pindex;
+ }
/*
* Don't wakeup too often - wakeup the pageout daemon when
/*
* Grab a page, blocking if it is busy and allocating a page if necessary.
- * A busy page is returned or NULL.
+ * A busy page is returned or NULL. The page may or may not be valid and
+ * might not be on a queue (the caller is responsible for the disposition of
+ * the page).
+ *
+ * If VM_ALLOC_ZERO is specified and the grab must allocate a new page, the
+ * page will be zero'd and marked valid.
*
- * The page is not removed from its queues. XXX?
+ * If VM_ALLOC_FORCE_ZERO is specified the page will be zero'd and marked
+ * valid even if it already exists.
*
- * If VM_ALLOC_RETRY is specified VM_ALLOC_NORMAL must also be specified.
- * If VM_ALLOC_RETRY is not specified
+ * If VM_ALLOC_RETRY is specified this routine will never return NULL. Also
+ * note that VM_ALLOC_NORMAL must be specified if VM_ALLOC_RETRY is specified.
*
* This routine may block, but if VM_ALLOC_RETRY is not set then NULL is
* always returned if we had blocked.
- * This routine will never return NULL if VM_ALLOC_RETRY is set.
+ *
* This routine may not be called from an interrupt.
- * The returned page may not be entirely valid.
*
- * This routine may be called from mainline code without spl protection and
- * be guarenteed a busied page associated with the object at the specified
- * index.
+ * PG_ZERO is *ALWAYS* cleared by this routine.
*
- * No requirements.
+ * No other requirements.
*/
vm_page_t
vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags)
m = NULL;
break;
}
+ /* retry */
} else if (m == NULL) {
m = vm_page_alloc(object, pindex,
allocflags & ~VM_ALLOC_RETRY);
break;
vm_wait(0);
if ((allocflags & VM_ALLOC_RETRY) == 0)
- break;
+ goto failed;
} else {
/* m found */
break;
}
}
+
+ /*
+ * If VM_ALLOC_ZERO an invalid page will be zero'd and set valid.
+ *
+ * If VM_ALLOC_FORCE_ZERO the page is unconditionally zero'd and set
+ * valid even if already valid.
+ */
+ if (m->valid == 0) {
+ if (allocflags & (VM_ALLOC_ZERO | VM_ALLOC_FORCE_ZERO)) {
+ if ((m->flags & PG_ZERO) == 0)
+ pmap_zero_page(VM_PAGE_TO_PHYS(m));
+ m->valid = VM_PAGE_BITS_ALL;
+ }
+ } else if (allocflags & VM_ALLOC_FORCE_ZERO) {
+ pmap_zero_page(VM_PAGE_TO_PHYS(m));
+ m->valid = VM_PAGE_BITS_ALL;
+ }
+ vm_page_flag_clear(m, PG_ZERO);
+failed:
vm_object_drop(object);
return(m);
}
#define VM_ALLOC_INTERRUPT 0x04 /* ok to exhaust entire free list */
#define VM_ALLOC_ZERO 0x08 /* req pre-zero'd memory if avail */
#define VM_ALLOC_QUICK 0x10 /* like NORMAL but do not use cache */
+#define VM_ALLOC_FORCE_ZERO 0x20 /* zero page even if already valid */
+#define VM_ALLOC_NULL_OK 0x40 /* ok to return NULL on collision */
#define VM_ALLOC_RETRY 0x80 /* indefinite block (vm_page_grab()) */
void vm_page_queue_spin_lock(vm_page_t);
void vm_page_dontneed (vm_page_t);
void vm_page_deactivate (vm_page_t);
void vm_page_deactivate_locked (vm_page_t);
-void vm_page_insert (vm_page_t, struct vm_object *, vm_pindex_t);
+int vm_page_insert (vm_page_t, struct vm_object *, vm_pindex_t);
vm_page_t vm_page_lookup (struct vm_object *, vm_pindex_t);
vm_page_t VM_PAGE_DEBUG_EXT(vm_page_lookup_busy_wait)(struct vm_object *, vm_pindex_t,
int, const char * VM_PAGE_DEBUG_ARGS);
/*
* Free a page. The page must be marked BUSY.
*
- * The clearing of PG_ZERO is a temporary safety until the code can be
- * reviewed to determine that PG_ZERO is being properly cleared on
- * write faults or maps. PG_ZERO was previously cleared in
- * vm_page_alloc().
+ * Always clear PG_ZERO when freeing a page, which ensures the flag is not
+ * set unless we are absolutely certain the page is zerod. This is
+ * particularly important when the vm_page_alloc*() code moves pages from
+ * PQ_CACHE to PQ_FREE.
*/
static __inline void
vm_page_free(vm_page_t m)
}
/*
- * Free a page to the zerod-pages queue
+ * Free a page to the zerod-pages queue. The caller must ensure that the
+ * page has been zerod.
*/
static __inline void
vm_page_free_zero(vm_page_t m)
vm_page_and_queue_spin_unlock(m);
KKASSERT(m->queue == PQ_INACTIVE);
+ lwkt_yield();
+
/*
* The page has been successfully busied and is now no
* longer spinlocked. The queue is no longer spinlocked
continue;
}
vm_page_and_queue_spin_unlock(m);
+ lwkt_yield();
/*
* The page has been successfully busied and the page and
}
vm_page_spin_unlock(m);
pagedaemon_wakeup();
+ lwkt_yield();
/*
* Page has been successfully busied and it and its queue
info->bigproc = p;
info->bigsize = size;
}
+ lwkt_yield();
return(0);
}
lwkt_gettoken(&vmobj_token);
while ((object = TAILQ_NEXT(object, object_list)) != NULL) {
+ lwkt_yield();
if (--count <= 0)
break;
-
vm_object_hold(object);
/*