*/
cpu_sfence();
basetime_index = ni;
-
- /*
- * Figure out how badly the system is starved for memory
- */
- vm_fault_ratecheck();
}
/*
/*
* see waitrunningbufspace() for limit test.
*/
- limit = hirunningspace * 4 / 6;
+ limit = hirunningspace * 3 / 6;
if (runningbufreq && runningbufspace <= limit) {
runningbufreq = 0;
spin_unlock(&bufcspin);
/*
* waitrunningbufspace()
*
- * Wait for the amount of running I/O to drop to hirunningspace * 4 / 6.
- * This is the point where write bursting stops so we don't want to wait
- * for the running amount to drop below it (at least if we still want bioq
- * to burst writes).
+ * If runningbufspace exceeds 4/6 hirunningspace we block until
+ * runningbufspace drops to 3/6 hirunningspace. We also block if another
+ * thread blocked here in order to be fair, even if runningbufspace
+ * is now lower than the limit.
*
* The caller may be using this function to block in a tight loop, we
- * must block while runningbufspace is greater then or equal to
- * hirunningspace * 4 / 6.
- *
- * And even with that it may not be enough, due to the presence of
- * B_LOCKED dirty buffers, so also wait for at least one running buffer
- * to complete.
+ * must block while runningbufspace is greater than at least
+ * hirunningspace * 3 / 6.
*/
void
waitrunningbufspace(void)
{
int limit = hirunningspace * 4 / 6;
- int dummy;
- spin_lock(&bufcspin);
- if (runningbufspace > limit) {
- while (runningbufspace > limit) {
- ++runningbufreq;
+ if (runningbufspace > limit || runningbufreq) {
+ spin_lock(&bufcspin);
+ while (runningbufspace > limit || runningbufreq) {
+ runningbufreq = 1;
ssleep(&runningbufreq, &bufcspin, 0, "wdrn1", 0);
}
spin_unlock(&bufcspin);
- } else if (runningbufspace > limit / 2) {
- ++runningbufreq;
- spin_unlock(&bufcspin);
- tsleep(&dummy, 0, "wdrn2", 1);
- } else {
- spin_unlock(&bufcspin);
}
}
int td_fairq_load; /* fairq */
int td_fairq_count; /* fairq */
struct globaldata *td_migrate_gd; /* target gd for thread migration */
- const void *td_mplock_stallpc; /* last mplock stall address */
+ const void *unused01;
#ifdef DEBUG_CRIT_SECTIONS
#define CRIT_DEBUG_ARRAY_SIZE 32
#define CRIT_DEBUG_ARRAY_MASK (CRIT_DEBUG_ARRAY_SIZE - 1)
struct hammer_mod_rb_tree lose_root; /* loose buffers */
int locked_dirty_space; /* meta/volu count */
int io_running_space; /* io_token */
- int io_running_wakeup; /* io_token */
+ int unused01;
int objid_cache_count;
int dedup_cache_count;
int error; /* critical I/O error */
extern int hammer_count_io_running_write;
extern int hammer_count_io_locked;
extern int hammer_limit_dirtybufspace;
-extern int hammer_limit_running_io;
extern int hammer_limit_recs;
extern int hammer_limit_inode_recs;
extern int hammer_limit_reclaims;
TAILQ_INIT(&hmp->flusher.ready_list);
lwkt_create(hammer_flusher_master_thread, hmp,
- &hmp->flusher.td, NULL, TDF_SYSTHREAD, -1, "hammer-M");
+ &hmp->flusher.td, NULL, 0, -1, "hammer-M");
for (i = 0; i < HAMMER_MAX_FLUSHERS; ++i) {
info = kmalloc(sizeof(*info), hmp->m_misc, M_WAITOK|M_ZERO);
info->hmp = hmp;
TAILQ_INSERT_TAIL(&hmp->flusher.ready_list, info, entry);
lwkt_create(hammer_flusher_slave_thread, info,
- &info->td, NULL, TDF_SYSTHREAD, -1, "hammer-S%d", i);
+ &info->td, NULL, 0, -1, "hammer-S%d", i);
}
}
hammer_stats_disk_write += iou->io.bytes;
atomic_add_int(&hammer_count_io_running_write, -iou->io.bytes);
atomic_add_int(&hmp->io_running_space, -iou->io.bytes);
- if (hmp->io_running_wakeup &&
- hmp->io_running_space < hammer_limit_running_io / 2) {
- hmp->io_running_wakeup = 0;
- wakeup(&hmp->io_running_wakeup);
- }
KKASSERT(hmp->io_running_space >= 0);
iou->io.running = 0;
void
hammer_io_limit_backlog(hammer_mount_t hmp)
{
- while (hmp->io_running_space > hammer_limit_running_io) {
- hmp->io_running_wakeup = 1;
- tsleep(&hmp->io_running_wakeup, 0, "hmiolm", hz / 10);
- }
+ waitrunningbufspace();
}
int hammer_count_io_running_write;
int hammer_count_io_locked;
int hammer_limit_dirtybufspace; /* per-mount */
-int hammer_limit_running_io; /* per-mount */
int hammer_limit_recs; /* as a whole XXX */
int hammer_limit_inode_recs = 2048; /* per inode */
int hammer_limit_reclaims;
SYSCTL_INT(_vfs_hammer, OID_AUTO, limit_dirtybufspace, CTLFLAG_RW,
&hammer_limit_dirtybufspace, 0, "");
-SYSCTL_INT(_vfs_hammer, OID_AUTO, limit_running_io, CTLFLAG_RW,
- &hammer_limit_running_io, 0, "");
SYSCTL_INT(_vfs_hammer, OID_AUTO, limit_recs, CTLFLAG_RW,
&hammer_limit_recs, 0, "");
SYSCTL_INT(_vfs_hammer, OID_AUTO, limit_inode_recs, CTLFLAG_RW,
}
/*
- * Set reasonable limits to maintain an I/O pipeline. This is
- * used by the flush code which explicitly initiates I/O, and
- * is per-mount.
- *
- * The system-driven buffer cache uses vfs.lorunningspace and
- * vfs.hirunningspace globally.
- */
- if (hammer_limit_running_io == 0)
- hammer_limit_running_io = hammer_limit_dirtybufspace;
-
- if (hammer_limit_running_io > 10 * 1024 * 1024)
- hammer_limit_running_io = 10 * 1024 * 1024;
-
- /*
* The hammer_inode structure detaches from the vnode on reclaim.
* This limits the number of inodes in this state to prevent a
* memory pool blowout.
void vm_fault_unwire (vm_map_t, vm_map_entry_t);
int vm_fault_wire (vm_map_t, vm_map_entry_t, boolean_t);
void vm_fork (struct proc *, struct proc *, int);
-void vm_fault_ratecheck(void);
int vm_test_nominal (void);
void vm_wait_nominal (void);
void vm_init_limits(struct proc *);
vm_map_t map;
vm_map_entry_t entry;
int lookup_still_valid;
- int didlimit;
int hardfault;
int fault_flags;
int map_generation;
#if 0
static int vm_fault_additional_pages (vm_page_t, int, int, vm_page_t *, int *);
#endif
-static int vm_fault_ratelimit(struct vmspace *);
static void vm_set_nosync(vm_page_t m, vm_map_entry_t entry);
static void vm_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry,
int prot);
mycpu->gd_cnt.v_vm_faults++;
- fs.didlimit = 0;
fs.hardfault = 0;
fs.fault_flags = fault_flags;
growstack = 1;
mycpu->gd_cnt.v_vm_faults++;
- fs.didlimit = 0;
fs.hardfault = 0;
fs.fault_flags = fault_flags;
KKASSERT((fault_flags & VM_FAULT_WIRE_MASK) == 0);
entry.maptype = VM_MAPTYPE_NORMAL;
entry.protection = entry.max_protection = fault_type;
- fs.didlimit = 0;
fs.hardfault = 0;
fs.fault_flags = fault_flags;
fs.map = NULL;
}
/*
- * Ratelimit.
- */
- if (fs->didlimit == 0 && curproc != NULL) {
- int limticks;
-
- limticks = vm_fault_ratelimit(curproc->p_vmspace);
- if (limticks) {
- vm_object_pip_wakeup(fs->first_object);
- vm_object_chain_release_all(
- fs->first_object, fs->object);
- if (fs->object != fs->first_object)
- vm_object_drop(fs->object);
- unlock_and_deallocate(fs);
- tsleep(curproc, 0, "vmrate", limticks);
- fs->didlimit = 1;
- return (KERN_TRY_AGAIN);
- }
- }
-
- /*
* Allocate a new page for this object/offset pair.
*
* It is possible for the allocation to race, so
}
/*
- * Reduce the rate at which memory is allocated to a process based
- * on the perceived load on the VM system. As the load increases
- * the allocation burst rate goes down and the delay increases.
- *
- * Rate limiting does not apply when faulting active or inactive
- * pages. When faulting 'cache' pages, rate limiting only applies
- * if the system currently has a severe page deficit.
- *
- * XXX vm_pagesupply should be increased when a page is freed.
- *
- * We sleep up to 1/10 of a second.
- */
-static int
-vm_fault_ratelimit(struct vmspace *vmspace)
-{
- if (vm_load_enable == 0)
- return(0);
- if (vmspace->vm_pagesupply > 0) {
- --vmspace->vm_pagesupply; /* SMP race ok */
- return(0);
- }
-#ifdef INVARIANTS
- if (vm_load_debug) {
- kprintf("load %-4d give %d pgs, wait %d, pid %-5d (%s)\n",
- vm_load,
- (1000 - vm_load ) / 10, vm_load * hz / 10000,
- curproc->p_pid, curproc->p_comm);
- }
-#endif
- vmspace->vm_pagesupply = (1000 - vm_load) / 10;
- return(vm_load * hz / 10000);
-}
-
-/*
* Copy all of the pages from a wired-down map entry to another.
*
* The source and destination maps must be locked for write.
SYSCTL_INT(_vm, OID_AUTO, pageout_lock_miss,
CTLFLAG_RD, &pageout_lock_miss, 0, "vget() lock misses during pageout");
-int vm_load;
-SYSCTL_INT(_vm, OID_AUTO, vm_load,
- CTLFLAG_RD, &vm_load, 0, "load on the VM system");
-int vm_load_enable = 1;
-SYSCTL_INT(_vm, OID_AUTO, vm_load_enable,
- CTLFLAG_RW, &vm_load_enable, 0, "enable vm_load rate limiting");
-#ifdef INVARIANTS
-int vm_load_debug;
-SYSCTL_INT(_vm, OID_AUTO, vm_load_debug,
- CTLFLAG_RW, &vm_load_debug, 0, "debug vm_load");
-#endif
-
#define VM_PAGEOUT_PAGE_COUNT 16
int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT;
static void vm_pageout_page_stats(int q);
/*
- * Update vm_load to slow down faulting processes.
- *
- * SMP races ok.
- * No requirements.
- */
-void
-vm_fault_ratecheck(void)
-{
- if (vm_pages_needed) {
- if (vm_load < 1000)
- ++vm_load;
- } else {
- if (vm_load > 0)
- --vm_load;
- }
-}
-
-/*
* vm_pageout_clean:
*
* Clean the page and remove it from the laundry. The page must not be
active_shortage = vmstats.v_inactive_target -
vmstats.v_inactive_count;
- tmp = inactive_shortage;
- if (tmp < vmstats.v_inactive_target / 10)
- tmp = vmstats.v_inactive_target / 10;
- inactive_shortage -= delta1;
- if (inactive_shortage <= 0 && active_shortage > tmp * 2)
- active_shortage = tmp * 2;
+ /*
+ * If we were unable to free sufficient inactive pages to
+ * satisfy the free/cache queue requirements then simply
+ * reaching the inactive target may not be good enough.
+ * Try to deactivate pages in excess of the target based
+ * on the shortfall.
+ *
+ * However to prevent thrashing the VM system do not
+ * deactivate more than an additional 1/10 the inactive
+ * target's worth of active pages.
+ */
+ if (delta1 < inactive_shortage) {
+ tmp = (inactive_shortage - delta1) * 2;
+ if (tmp > vmstats.v_inactive_target / 10)
+ tmp = vmstats.v_inactive_target / 10;
+ active_shortage += tmp;
+ }
delta2 = 0;
for (q = 0; q < PQ_MAXL2_SIZE; ++q) {
extern int vm_pages_needed; /* should be some "event" structure */
extern int vm_pageout_pages_needed;
extern int vm_pageout_deficit;
-extern int vm_load;
-extern int vm_load_enable;
-extern int vm_load_debug;
#define VM_PAGEOUT_ASYNC 0
#define VM_PAGEOUT_SYNC 1