kernel - Fix numerous performance problems with the pageout daemon
authorMatthew Dillon <dillon@apollo.backplane.com>
Fri, 11 Nov 2011 03:31:24 +0000 (19:31 -0800)
committerMatthew Dillon <dillon@apollo.backplane.com>
Fri, 11 Nov 2011 03:31:24 +0000 (19:31 -0800)
* The VM page queues were not being fully utilized, causing the pageout
  daemon to calculate incorrect average page counts for deactivation/freeing.
  This caused the pageout daemon to dig into the active queue even when it
  did not need to.

* The pageout daemon was incorrectly calculating the maxscan value for each
  queue.  It was using the aggregate count (across all 256 queues) instead of
  the per-queue count, resulting in long stalls when memory is low.

* Clean up the PQ_L2* knobs, constants, and other cruft, reducing them to
  the essentials for our goals.

Reported-by: vsrinivas, thesjg, luxh, etc
sys/conf/options
sys/config/LINT
sys/config/LINT64
sys/emulation/linux/Makefile
sys/vfs/nfs/Makefile
sys/vfs/nwfs/Makefile
sys/vfs/smbfs/Makefile
sys/vm/vm_page.h
sys/vm/vm_pageout.c
sys/vm/vm_swap.c
sys/vm/vm_swapcache.c

index da7b154..ec26b96 100644 (file)
@@ -488,7 +488,6 @@ DEBUG_PCTRACK               opt_pctrack.h
 
 # These are VM related options
 NO_SWAPPING            opt_vm.h
 
 # These are VM related options
 NO_SWAPPING            opt_vm.h
-PQ_CACHESIZE           opt_vmpage.h
 
 # Standard SMP options
 SMP                    opt_global.h
 
 # Standard SMP options
 SMP                    opt_global.h
index 879ba94..955387c 100644 (file)
@@ -90,9 +90,6 @@ options       DFLDSIZ="(256*1024*1024)"
 #
 options        BLKDEV_IOSIZE=8192
 
 #
 options        BLKDEV_IOSIZE=8192
 
-# Options for the VM subsystem.
-options        PQ_CACHESIZE=512        # color for 512k/16k cache
-
 # This allows you to actually store this configuration file into
 # the kernel binary itself, where it may be later read by saying:
 #    strings -n 3 /kernel | sed -n 's/^___//p' > MYKERNEL
 # This allows you to actually store this configuration file into
 # the kernel binary itself, where it may be later read by saying:
 #    strings -n 3 /kernel | sed -n 's/^___//p' > MYKERNEL
index 0e59d11..d9594dc 100644 (file)
@@ -90,9 +90,6 @@ options       DFLDSIZ="(256*1024*1024)"
 #
 options        BLKDEV_IOSIZE=8192
 
 #
 options        BLKDEV_IOSIZE=8192
 
-# Options for the VM subsystem.
-options        PQ_CACHESIZE=512        # color for 512k/16k cache
-
 # This allows you to actually store this configuration file into
 # the kernel binary itself, where it may be later read by saying:
 #    strings -n 3 /kernel | sed -n 's/^___//p' > MYKERNEL
 # This allows you to actually store this configuration file into
 # the kernel binary itself, where it may be later read by saying:
 #    strings -n 3 /kernel | sed -n 's/^___//p' > MYKERNEL
index 759037c..5668acb 100644 (file)
@@ -10,7 +10,7 @@ SRCS= linux_dummy.c linux_emuldata.c linux_epoll.c \
        linux_machdep.c linux_mib.c linux_misc.c linux_time.c linux_signal.c \
        linux_socket.c \
        linux_stats.c linux_sysctl.c linux_sysent.c linux_sysvec.c \
        linux_machdep.c linux_mib.c linux_misc.c linux_time.c linux_signal.c \
        linux_socket.c \
        linux_stats.c linux_sysctl.c linux_sysent.c linux_sysvec.c \
-       linux_util.c opt_compat.h opt_global.h opt_vmpage.h
+       linux_util.c opt_compat.h opt_global.h
 SRCS+= bus_if.h device_if.h
 SRCS+= opt_nfs.h assym.s
 OBJS=  linux_support.o linux_locore.o
 SRCS+= bus_if.h device_if.h
 SRCS+= opt_nfs.h assym.s
 OBJS=  linux_support.o linux_locore.o
index f3e8e49..59d5eec 100644 (file)
@@ -5,7 +5,7 @@ KMOD=   nfs
 SRCS=  nfs_bio.c nfs_node.c nfs_kerb.c nfs_serv.c nfs_socket.c \
        nfs_srvcache.c nfs_subs.c nfs_syscalls.c nfs_vfsops.c nfs_iod.c \
        nfsm_subs.c nfs_vnops.c \
 SRCS=  nfs_bio.c nfs_node.c nfs_kerb.c nfs_serv.c nfs_socket.c \
        nfs_srvcache.c nfs_subs.c nfs_syscalls.c nfs_vfsops.c nfs_iod.c \
        nfsm_subs.c nfs_vnops.c \
-       opt_inet.h opt_nfs.h opt_vmpage.h opt_bootp.h opt_nfsroot.h
+       opt_inet.h opt_nfs.h opt_bootp.h opt_nfsroot.h
 
 # 0/1 - requires INET to be configured in kernel
 #
 
 # 0/1 - requires INET to be configured in kernel
 #
index cbfd80e..4c0fa1c 100644 (file)
@@ -4,7 +4,7 @@
 KMOD=  nwfs
 
 SRCS=  nwfs_node.c nwfs_ioctl.c nwfs_io.c nwfs_vfsops.c nwfs_vnops.c \
 KMOD=  nwfs
 
 SRCS=  nwfs_node.c nwfs_ioctl.c nwfs_io.c nwfs_vfsops.c nwfs_vnops.c \
-       nwfs_subr.c opt_ncp.h opt_nwfs.h opt_vmpage.h
+       nwfs_subr.c opt_ncp.h opt_nwfs.h
 
 .if defined(VNPRINT)
 CFLAGS+= -DVNPRINT
 
 .if defined(VNPRINT)
 CFLAGS+= -DVNPRINT
index 238db0e..2378301 100644 (file)
@@ -9,7 +9,7 @@
 KMOD=  smbfs
 
 SRCS=  opt_inet.h opt_ipx.h \
 KMOD=  smbfs
 
 SRCS=  opt_inet.h opt_ipx.h \
-       opt_netsmb.h opt_vmpage.h \
+       opt_netsmb.h \
        iconv_converter_if.h \
        md4c.c \
        smb_conn.c smb_dev.c smb_trantcp.c smb_smb.c smb_subr.c smb_rq.c \
        iconv_converter_if.h \
        md4c.c \
        smb_conn.c smb_dev.c smb_trantcp.c smb_smb.c smb_subr.c smb_rq.c \
index c7e9c73..ebafa88 100644 (file)
 #ifndef        _VM_VM_PAGE_H_
 #define        _VM_VM_PAGE_H_
 
 #ifndef        _VM_VM_PAGE_H_
 #define        _VM_VM_PAGE_H_
 
-#if !defined(KLD_MODULE) && defined(_KERNEL)
-#include "opt_vmpage.h"
-#endif
-
 #ifndef _SYS_TYPES_H_
 #include <sys/types.h>
 #endif
 #ifndef _SYS_TYPES_H_
 #include <sys/types.h>
 #endif
@@ -201,69 +197,46 @@ typedef struct vm_page *vm_page_t;
 #endif
 
 /*
 #endif
 
 /*
- * Page coloring parameters.  We default to a middle of the road optimization.
- * Larger selections would not really hurt us but if a machine does not have
- * a lot of memory it could cause vm_page_alloc() to eat more cpu cycles 
- * looking for free pages.
+ * Page coloring parameters.  We use generous parameters designed to
+ * statistically spread pages over available cpu cache space.  This has
+ * become less important over time as cache associativity is higher
+ * in modern times but we still use the core algorithm to help reduce
+ * lock contention between cpus.
  *
  *
- * Page coloring cannot be disabled.  Modules do not have access to most PQ
- * constants because they can change between builds.
+ * Page coloring cannot be disabled.
  */
  */
-#if defined(_KERNEL) && !defined(KLD_MODULE)
-
-#if !defined(PQ_CACHESIZE)
-#define PQ_CACHESIZE 256       /* max is 1024 (MB) */
-#endif
 
 
-#if PQ_CACHESIZE >= 1024
 #define PQ_PRIME1 31   /* Prime number somewhat less than PQ_HASH_SIZE */
 #define PQ_PRIME2 23   /* Prime number somewhat less than PQ_HASH_SIZE */
 #define PQ_L2_SIZE 256 /* A number of colors opt for 1M cache */
 
 #define PQ_PRIME1 31   /* Prime number somewhat less than PQ_HASH_SIZE */
 #define PQ_PRIME2 23   /* Prime number somewhat less than PQ_HASH_SIZE */
 #define PQ_L2_SIZE 256 /* A number of colors opt for 1M cache */
 
-#elif PQ_CACHESIZE >= 512
+#if 0
 #define PQ_PRIME1 31   /* Prime number somewhat less than PQ_HASH_SIZE */
 #define PQ_PRIME2 23   /* Prime number somewhat less than PQ_HASH_SIZE */
 #define PQ_L2_SIZE 128 /* A number of colors opt for 512K cache */
 
 #define PQ_PRIME1 31   /* Prime number somewhat less than PQ_HASH_SIZE */
 #define PQ_PRIME2 23   /* Prime number somewhat less than PQ_HASH_SIZE */
 #define PQ_L2_SIZE 128 /* A number of colors opt for 512K cache */
 
-#elif PQ_CACHESIZE >= 256
 #define PQ_PRIME1 13   /* Prime number somewhat less than PQ_HASH_SIZE */
 #define PQ_PRIME2 7    /* Prime number somewhat less than PQ_HASH_SIZE */
 #define PQ_L2_SIZE 64  /* A number of colors opt for 256K cache */
 
 #define PQ_PRIME1 13   /* Prime number somewhat less than PQ_HASH_SIZE */
 #define PQ_PRIME2 7    /* Prime number somewhat less than PQ_HASH_SIZE */
 #define PQ_L2_SIZE 64  /* A number of colors opt for 256K cache */
 
-#elif PQ_CACHESIZE >= 128
 #define PQ_PRIME1 9    /* Produces a good PQ_L2_SIZE/3 + PQ_PRIME1 */
 #define PQ_PRIME2 5    /* Prime number somewhat less than PQ_HASH_SIZE */
 #define PQ_L2_SIZE 32  /* A number of colors opt for 128k cache */
 
 #define PQ_PRIME1 9    /* Produces a good PQ_L2_SIZE/3 + PQ_PRIME1 */
 #define PQ_PRIME2 5    /* Prime number somewhat less than PQ_HASH_SIZE */
 #define PQ_L2_SIZE 32  /* A number of colors opt for 128k cache */
 
-#else
 #define PQ_PRIME1 5    /* Prime number somewhat less than PQ_HASH_SIZE */
 #define PQ_PRIME2 3    /* Prime number somewhat less than PQ_HASH_SIZE */
 #define PQ_L2_SIZE 16  /* A reasonable number of colors (opt for 64K cache) */
 #define PQ_PRIME1 5    /* Prime number somewhat less than PQ_HASH_SIZE */
 #define PQ_PRIME2 3    /* Prime number somewhat less than PQ_HASH_SIZE */
 #define PQ_L2_SIZE 16  /* A reasonable number of colors (opt for 64K cache) */
-
 #endif
 
 #define PQ_L2_MASK     (PQ_L2_SIZE - 1)
 
 #endif
 
 #define PQ_L2_MASK     (PQ_L2_SIZE - 1)
 
-#endif /* KERNEL && !KLD_MODULE */
-
-/*
- *
- * The queue array is always based on PQ_MAXL2_SIZE regardless of the actual
- * cache size chosen in order to present a uniform interface for modules.
- */
-#define PQ_MAXL2_SIZE  256     /* fixed maximum (in pages) / module compat */
-
-#if PQ_L2_SIZE > PQ_MAXL2_SIZE
-#error "Illegal PQ_L2_SIZE"
-#endif
-
 #define PQ_NONE                0
 #define PQ_NONE                0
-#define PQ_FREE                (1 + 0*PQ_MAXL2_SIZE)
-#define PQ_INACTIVE    (1 + 1*PQ_MAXL2_SIZE)
-#define PQ_ACTIVE      (1 + 2*PQ_MAXL2_SIZE)
-#define PQ_CACHE       (1 + 3*PQ_MAXL2_SIZE)
-#define PQ_HOLD                (1 + 4*PQ_MAXL2_SIZE)
-#define PQ_COUNT       (1 + 5*PQ_MAXL2_SIZE)
+#define PQ_FREE                (1 + 0*PQ_L2_SIZE)
+#define PQ_INACTIVE    (1 + 1*PQ_L2_SIZE)
+#define PQ_ACTIVE      (1 + 2*PQ_L2_SIZE)
+#define PQ_CACHE       (1 + 3*PQ_L2_SIZE)
+#define PQ_HOLD                (1 + 4*PQ_L2_SIZE)
+#define PQ_COUNT       (1 + 5*PQ_L2_SIZE)
 
 /*
  * Scan support
 
 /*
  * Scan support
index 328fd2e..ba1922d 100644 (file)
@@ -119,7 +119,6 @@ static struct kproc_desc vm_kp = {
 SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp)
 #endif
 
 SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp)
 #endif
 
-
 int vm_pages_needed=0;         /* Event on which pageout daemon sleeps */
 int vm_pageout_deficit=0;      /* Estimated number of pages deficit */
 int vm_pageout_pages_needed=0; /* flag saying that the pageout daemon needs pages */
 int vm_pages_needed=0;         /* Event on which pageout daemon sleeps */
 int vm_pageout_deficit=0;      /* Estimated number of pages deficit */
 int vm_pageout_pages_needed=0; /* flag saying that the pageout daemon needs pages */
@@ -196,6 +195,15 @@ static void vm_req_vmdaemon (void);
 #endif
 static void vm_pageout_page_stats(int q);
 
 #endif
 static void vm_pageout_page_stats(int q);
 
+static __inline int
+PQAVERAGE(int n)
+{
+       if (n >= 0)
+               return((n + (PQ_L2_SIZE - 1)) / PQ_L2_SIZE + 1);
+       else
+               return((n - (PQ_L2_SIZE - 1)) / PQ_L2_SIZE - 1);
+}
+
 /*
  * vm_pageout_clean:
  *
 /*
  * vm_pageout_clean:
  *
@@ -718,7 +726,7 @@ struct vm_pageout_scan_info {
 static int vm_pageout_scan_callback(struct proc *p, void *data);
 
 static int
 static int vm_pageout_scan_callback(struct proc *p, void *data);
 
 static int
-vm_pageout_scan_inactive(int pass, int q, int inactive_shortage,
+vm_pageout_scan_inactive(int pass, int q, int avail_shortage,
                         int *vnodes_skippedp)
 {
        vm_page_t m;
                         int *vnodes_skippedp)
 {
        vm_page_t m;
@@ -771,11 +779,11 @@ vm_pageout_scan_inactive(int pass, int q, int inactive_shortage,
 
        vm_page_queues_spin_lock(PQ_INACTIVE + q);
        TAILQ_INSERT_HEAD(&vm_page_queues[PQ_INACTIVE + q].pl, &marker, pageq);
 
        vm_page_queues_spin_lock(PQ_INACTIVE + q);
        TAILQ_INSERT_HEAD(&vm_page_queues[PQ_INACTIVE + q].pl, &marker, pageq);
-       maxscan = vmstats.v_inactive_count;
+       maxscan = vm_page_queues[PQ_INACTIVE + q].lcnt;
        vm_page_queues_spin_unlock(PQ_INACTIVE + q);
 
        while ((m = TAILQ_NEXT(&marker, pageq)) != NULL &&
        vm_page_queues_spin_unlock(PQ_INACTIVE + q);
 
        while ((m = TAILQ_NEXT(&marker, pageq)) != NULL &&
-              maxscan-- > 0 && inactive_shortage - delta > 0)
+              maxscan-- > 0 && avail_shortage - delta > 0)
        {
                vm_page_and_queue_spin_lock(m);
                if (m != TAILQ_NEXT(&marker, pageq)) {
        {
                vm_page_and_queue_spin_lock(m);
                if (m != TAILQ_NEXT(&marker, pageq)) {
@@ -1129,20 +1137,19 @@ vm_pageout_scan_inactive(int pass, int q, int inactive_shortage,
        vm_page_queues_spin_lock(PQ_INACTIVE + q);
        TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE + q].pl, &marker, pageq);
        vm_page_queues_spin_unlock(PQ_INACTIVE + q);
        vm_page_queues_spin_lock(PQ_INACTIVE + q);
        TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE + q].pl, &marker, pageq);
        vm_page_queues_spin_unlock(PQ_INACTIVE + q);
-
        return (delta);
 }
 
 static int
 vm_pageout_scan_active(int pass, int q,
        return (delta);
 }
 
 static int
 vm_pageout_scan_active(int pass, int q,
-                      int inactive_shortage, int active_shortage,
+                      int avail_shortage, int inactive_shortage,
                       int *recycle_countp)
 {
        struct vm_page marker;
        vm_page_t m;
        int actcount;
        int delta = 0;
                       int *recycle_countp)
 {
        struct vm_page marker;
        vm_page_t m;
        int actcount;
        int delta = 0;
-       int pcount;
+       int maxscan;
 
        /*
         * We want to move pages from the active queue to the inactive
 
        /*
         * We want to move pages from the active queue to the inactive
@@ -1173,17 +1180,17 @@ vm_pageout_scan_active(int pass, int q,
 
        vm_page_queues_spin_lock(PQ_ACTIVE + q);
        TAILQ_INSERT_HEAD(&vm_page_queues[PQ_ACTIVE + q].pl, &marker, pageq);
 
        vm_page_queues_spin_lock(PQ_ACTIVE + q);
        TAILQ_INSERT_HEAD(&vm_page_queues[PQ_ACTIVE + q].pl, &marker, pageq);
+       maxscan = vm_page_queues[PQ_ACTIVE + q].lcnt;
        vm_page_queues_spin_unlock(PQ_ACTIVE + q);
        vm_page_queues_spin_unlock(PQ_ACTIVE + q);
-       pcount = vmstats.v_active_count;
 
        while ((m = TAILQ_NEXT(&marker, pageq)) != NULL &&
 
        while ((m = TAILQ_NEXT(&marker, pageq)) != NULL &&
-              pcount-- > 0 && (inactive_shortage - delta > 0 ||
-                               active_shortage > 0))
+              maxscan-- > 0 && (avail_shortage - delta > 0 ||
+                               inactive_shortage > 0))
        {
                vm_page_and_queue_spin_lock(m);
                if (m != TAILQ_NEXT(&marker, pageq)) {
                        vm_page_and_queue_spin_unlock(m);
        {
                vm_page_and_queue_spin_lock(m);
                if (m != TAILQ_NEXT(&marker, pageq)) {
                        vm_page_and_queue_spin_unlock(m);
-                       ++pcount;
+                       ++maxscan;
                        continue;
                }
                KKASSERT(m->queue - m->pc == PQ_ACTIVE);
                        continue;
                }
                KKASSERT(m->queue - m->pc == PQ_ACTIVE);
@@ -1285,15 +1292,14 @@ vm_pageout_scan_active(int pass, int q,
                                 * inactive scan, that could lead to
                                 * gigabytes being moved.
                                 */
                                 * inactive scan, that could lead to
                                 * gigabytes being moved.
                                 */
-                               --active_shortage;
-                               if (inactive_shortage - delta > 0 ||
+                               --inactive_shortage;
+                               if (avail_shortage - delta > 0 ||
                                    m->object->ref_count == 0) {
                                    m->object->ref_count == 0) {
-                                       if (inactive_shortage - delta > 0)
+                                       if (avail_shortage - delta > 0)
                                                ++*recycle_countp;
                                        vm_page_protect(m, VM_PROT_NONE);
                                        if (m->dirty == 0 &&
                                                ++*recycle_countp;
                                        vm_page_protect(m, VM_PROT_NONE);
                                        if (m->dirty == 0 &&
-                                           inactive_shortage - delta > 0) {
-                                               ++delta;
+                                           avail_shortage - delta > 0) {
                                                vm_page_cache(m);
                                        } else {
                                                vm_page_deactivate(m);
                                                vm_page_cache(m);
                                        } else {
                                                vm_page_deactivate(m);
@@ -1303,6 +1309,7 @@ vm_pageout_scan_active(int pass, int q,
                                        vm_page_deactivate(m);
                                        vm_page_wakeup(m);
                                }
                                        vm_page_deactivate(m);
                                        vm_page_wakeup(m);
                                }
+                               ++delta;
                        } else {
                                vm_page_and_queue_spin_lock(m);
                                if (m->queue - m->pc == PQ_ACTIVE) {
                        } else {
                                vm_page_and_queue_spin_lock(m);
                                if (m->queue - m->pc == PQ_ACTIVE) {
@@ -1359,8 +1366,7 @@ vm_pageout_scan_active(int pass, int q,
  * pages_freed counter.
  */
 static void
  * pages_freed counter.
  */
 static void
-vm_pageout_scan_cache(int inactive_shortage,
-                     int vnodes_skipped, int recycle_count)
+vm_pageout_scan_cache(int avail_shortage, int vnodes_skipped, int recycle_count)
 {
        struct vm_pageout_scan_info info;
        vm_page_t m;
 {
        struct vm_pageout_scan_info info;
        vm_page_t m;
@@ -1456,7 +1462,7 @@ vm_pageout_scan_cache(int inactive_shortage,
         *   enough pages to meet bare minimum needs.  This test only
         *   works if the inactive queue is bloated.
         *
         *   enough pages to meet bare minimum needs.  This test only
         *   works if the inactive queue is bloated.
         *
-        * - due to a positive inactive_shortage we shifted the remaining
+        * - due to a positive avail_shortage we shifted the remaining
         *   dirty pages from the active queue to the inactive queue
         *   trying to find clean ones to free.
         */
         *   dirty pages from the active queue to the inactive queue
         *   trying to find clean ones to free.
         */
@@ -1464,7 +1470,7 @@ vm_pageout_scan_cache(int inactive_shortage,
                kprintf("Warning: system low on memory+swap!\n");
        if (swap_pager_full && vm_page_count_min(recycle_count) &&
            vmstats.v_inactive_count > vmstats.v_active_count * 4 &&
                kprintf("Warning: system low on memory+swap!\n");
        if (swap_pager_full && vm_page_count_min(recycle_count) &&
            vmstats.v_inactive_count > vmstats.v_active_count * 4 &&
-           inactive_shortage > 0) {
+           avail_shortage > 0) {
                /*
                 * Kill something.
                 */
                /*
                 * Kill something.
                 */
@@ -1554,11 +1560,11 @@ vm_pageout_page_stats(int q)
        if (page_shortage <= 0)
                return;
 
        if (page_shortage <= 0)
                return;
 
-       pcount = vmstats.v_active_count;
+       pcount = vm_page_queues[PQ_ACTIVE + q].lcnt;
        fullintervalcount += vm_pageout_stats_interval;
        if (fullintervalcount < vm_pageout_full_stats_interval) {
        fullintervalcount += vm_pageout_stats_interval;
        if (fullintervalcount < vm_pageout_full_stats_interval) {
-               tpcount = (vm_pageout_stats_max * vmstats.v_active_count) /
-                         vmstats.v_page_count;
+               tpcount = (vm_pageout_stats_max * pcount) /
+                         vmstats.v_page_count + 1;
                if (pcount > tpcount)
                        pcount = tpcount;
        } else {
                if (pcount > tpcount)
                        pcount = tpcount;
        } else {
@@ -1823,8 +1829,8 @@ vm_pageout_thread(void)
                int error;
                int delta1;
                int delta2;
                int error;
                int delta1;
                int delta2;
+               int avail_shortage;
                int inactive_shortage;
                int inactive_shortage;
-               int active_shortage;
                int vnodes_skipped = 0;
                int recycle_count = 0;
                int tmp;
                int vnodes_skipped = 0;
                int recycle_count = 0;
                int tmp;
@@ -1841,7 +1847,7 @@ vm_pageout_thread(void)
                        if (error &&
                            vm_paging_needed() == 0 &&
                            vm_pages_needed == 0) {
                        if (error &&
                            vm_paging_needed() == 0 &&
                            vm_pages_needed == 0) {
-                               for (q = 0; q < PQ_MAXL2_SIZE; ++q)
+                               for (q = 0; q < PQ_L2_SIZE; ++q)
                                        vm_pageout_page_stats(q);
                                continue;
                        }
                                        vm_pageout_page_stats(q);
                                continue;
                        }
@@ -1863,16 +1869,20 @@ vm_pageout_thread(void)
                 * want to get to.  This is higher then the number that causes
                 * allocations to stall (severe) in order to provide hysteresis,
                 * and if we don't make it all the way but get to the minimum
                 * want to get to.  This is higher then the number that causes
                 * allocations to stall (severe) in order to provide hysteresis,
                 * and if we don't make it all the way but get to the minimum
-                * we're happy.
+                * we're happy.  Goose it a bit if there are multipler
+                * requests for memory.
                 */
                 */
-               inactive_shortage = vm_paging_target() + vm_pageout_deficit;
+               avail_shortage = vm_paging_target() + vm_pageout_deficit;
                vm_pageout_deficit = 0;
                delta1 = 0;
                vm_pageout_deficit = 0;
                delta1 = 0;
-               for (q = 0; q < PQ_MAXL2_SIZE; ++q) {
-                       delta1 += vm_pageout_scan_inactive(
-                                       pass, q,
-                                       inactive_shortage / PQ_MAXL2_SIZE + 1,
-                                       &vnodes_skipped);
+               if (avail_shortage > 0) {
+                       for (q = 0; q < PQ_L2_SIZE; ++q) {
+                               delta1 += vm_pageout_scan_inactive(
+                                           pass, q,
+                                           PQAVERAGE(avail_shortage),
+                                           &vnodes_skipped);
+                       }
+                       avail_shortage -= delta1;
                }
 
                /*
                }
 
                /*
@@ -1881,8 +1891,8 @@ vm_pageout_thread(void)
                 * scan above we limit the number of active pages we
                 * deactivate to reduce unnecessary work.
                 */
                 * scan above we limit the number of active pages we
                 * deactivate to reduce unnecessary work.
                 */
-               active_shortage = vmstats.v_inactive_target -
-                                 vmstats.v_inactive_count;
+               inactive_shortage = vmstats.v_inactive_target -
+                                   vmstats.v_inactive_count;
 
                /*
                 * If we were unable to free sufficient inactive pages to
 
                /*
                 * If we were unable to free sufficient inactive pages to
@@ -1895,20 +1905,24 @@ vm_pageout_thread(void)
                 * deactivate more than an additional 1/10 the inactive
                 * target's worth of active pages.
                 */
                 * deactivate more than an additional 1/10 the inactive
                 * target's worth of active pages.
                 */
-               if (delta1 < inactive_shortage) {
-                       tmp = (inactive_shortage - delta1) * 2;
+               if (avail_shortage > 0) {
+                       tmp = avail_shortage * 2;
                        if (tmp > vmstats.v_inactive_target / 10)
                                tmp = vmstats.v_inactive_target / 10;
                        if (tmp > vmstats.v_inactive_target / 10)
                                tmp = vmstats.v_inactive_target / 10;
-                       active_shortage += tmp;
+                       inactive_shortage += tmp;
                }
 
                }
 
-               delta2 = 0;
-               for (q = 0; q < PQ_MAXL2_SIZE; ++q) {
-                       delta2 += vm_pageout_scan_active(
-                                       pass, q,
-                                       inactive_shortage / PQ_MAXL2_SIZE + 1,
-                                       active_shortage / PQ_MAXL2_SIZE + 1,
-                                       &recycle_count);
+               if (avail_shortage > 0 || inactive_shortage > 0) {
+                       delta2 = 0;
+                       for (q = 0; q < PQ_L2_SIZE; ++q) {
+                               delta2 += vm_pageout_scan_active(
+                                               pass, q,
+                                               PQAVERAGE(avail_shortage),
+                                               PQAVERAGE(inactive_shortage),
+                                               &recycle_count);
+                       }
+                       inactive_shortage -= delta2;
+                       avail_shortage -= delta2;
                }
 
                /*
                }
 
                /*
@@ -1916,14 +1930,13 @@ vm_pageout_thread(void)
                 * requirement and take more drastic measures if we are
                 * still in trouble.
                 */
                 * requirement and take more drastic measures if we are
                 * still in trouble.
                 */
-               inactive_shortage -= delta2;
-               vm_pageout_scan_cache(inactive_shortage, vnodes_skipped,
+               vm_pageout_scan_cache(avail_shortage, vnodes_skipped,
                                      recycle_count);
 
                /*
                 * Wait for more work.
                 */
                                      recycle_count);
 
                /*
                 * Wait for more work.
                 */
-               if (inactive_shortage > 0) {
+               if (avail_shortage > 0) {
                        ++pass;
                        if (swap_pager_full) {
                                /*
                        ++pass;
                        if (swap_pager_full) {
                                /*
index 88f73a0..6291df5 100644 (file)
@@ -459,7 +459,7 @@ swapoff_one(int index)
         * of data we will have to page back in, plus an epsilon so
         * the system doesn't become critically low on swap space.
         */
         * of data we will have to page back in, plus an epsilon so
         * the system doesn't become critically low on swap space.
         */
-       for (q = 0; q < PQ_MAXL2_SIZE; ++q) {
+       for (q = 0; q < PQ_L2_SIZE; ++q) {
                bzero(&marker, sizeof(marker));
                marker.flags = PG_BUSY | PG_FICTITIOUS | PG_MARKER;
                marker.queue = PQ_ACTIVE + q;
                bzero(&marker, sizeof(marker));
                marker.flags = PG_BUSY | PG_FICTITIOUS | PG_MARKER;
                marker.queue = PQ_ACTIVE + q;
index baf7865..fe44599 100644 (file)
@@ -159,7 +159,7 @@ vm_swapcached_thread(void)
 {
        enum { SWAPC_WRITING, SWAPC_CLEANING } state = SWAPC_WRITING;
        enum { SWAPB_BURSTING, SWAPB_RECOVERING } burst = SWAPB_BURSTING;
 {
        enum { SWAPC_WRITING, SWAPC_CLEANING } state = SWAPC_WRITING;
        enum { SWAPB_BURSTING, SWAPB_RECOVERING } burst = SWAPB_BURSTING;
-       static struct vm_page page_marker[PQ_MAXL2_SIZE];
+       static struct vm_page page_marker[PQ_L2_SIZE];
        static struct vm_object object_marker;
        int q;
 
        static struct vm_object object_marker;
        int q;
 
@@ -176,7 +176,7 @@ vm_swapcached_thread(void)
         * Initialize our marker for the inactive scan (SWAPC_WRITING)
         */
        bzero(&page_marker, sizeof(page_marker));
         * Initialize our marker for the inactive scan (SWAPC_WRITING)
         */
        bzero(&page_marker, sizeof(page_marker));
-       for (q = 0; q < PQ_MAXL2_SIZE; ++q) {
+       for (q = 0; q < PQ_L2_SIZE; ++q) {
                page_marker[q].flags = PG_BUSY | PG_FICTITIOUS | PG_MARKER;
                page_marker[q].queue = PQ_INACTIVE + q;
                page_marker[q].pc = q;
                page_marker[q].flags = PG_BUSY | PG_FICTITIOUS | PG_MARKER;
                page_marker[q].queue = PQ_INACTIVE + q;
                page_marker[q].pc = q;
@@ -254,7 +254,7 @@ vm_swapcached_thread(void)
                if (state == SWAPC_WRITING) {
                        if (vm_swapcache_curburst >= vm_swapcache_accrate) {
                                if (burst == SWAPB_BURSTING) {
                if (state == SWAPC_WRITING) {
                        if (vm_swapcache_curburst >= vm_swapcache_accrate) {
                                if (burst == SWAPB_BURSTING) {
-                                       for (q = 0; q < PQ_MAXL2_SIZE; ++q) {
+                                       for (q = 0; q < PQ_L2_SIZE; ++q) {
                                                vm_swapcache_writing(
                                                        &page_marker[q]);
                                        }
                                                vm_swapcache_writing(
                                                        &page_marker[q]);
                                        }
@@ -262,7 +262,7 @@ vm_swapcached_thread(void)
                                                burst = SWAPB_RECOVERING;
                                } else if (vm_swapcache_curburst >
                                           vm_swapcache_minburst) {
                                                burst = SWAPB_RECOVERING;
                                } else if (vm_swapcache_curburst >
                                           vm_swapcache_minburst) {
-                                       for (q = 0; q < PQ_MAXL2_SIZE; ++q) {
+                                       for (q = 0; q < PQ_L2_SIZE; ++q) {
                                                vm_swapcache_writing(
                                                        &page_marker[q]);
                                        }
                                                vm_swapcache_writing(
                                                        &page_marker[q]);
                                        }
@@ -277,7 +277,7 @@ vm_swapcached_thread(void)
        /*
         * Cleanup (NOT REACHED)
         */
        /*
         * Cleanup (NOT REACHED)
         */
-       for (q = 0; q < PQ_MAXL2_SIZE; ++q) {
+       for (q = 0; q < PQ_L2_SIZE; ++q) {
                vm_page_queues_spin_lock(PQ_INACTIVE + q);
                TAILQ_REMOVE(
                        &vm_page_queues[PQ_INACTIVE + q].pl,
                vm_page_queues_spin_lock(PQ_INACTIVE + q);
                TAILQ_REMOVE(
                        &vm_page_queues[PQ_INACTIVE + q].pl,