kernel - Add Proportional RSS (PRES)
authorMatthew Dillon <dillon@apollo.backplane.com>
Tue, 5 Oct 2010 02:16:57 +0000 (19:16 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Tue, 5 Oct 2010 02:16:57 +0000 (19:16 -0700)
* Track vm_object sharing and calculate proportional RSS in kinfo_proc

* Add PRES to the top(1) output

Submitted-by: Pratyush Kshirsagar <pratyush.kshirsagar@gmail.com>
sys/kern/kern_kinfo.c
sys/platform/pc32/i386/pmap.c
sys/platform/pc64/x86_64/pmap.c
sys/platform/vkernel/platform/pmap.c
sys/platform/vkernel64/platform/pmap.c
sys/sys/kinfo.h
sys/vm/vm_map.h
sys/vm/vm_object.c
sys/vm/vm_object.h
sys/vm/vm_page.c
usr.bin/top/m_dragonfly.c

index 63de1d2..ef3fa3a 100644 (file)
@@ -147,6 +147,7 @@ fill_kinfo_proc(struct proc *p, struct kinfo_proc *kp)
        if (p->p_vmspace) {
                kp->kp_vm_map_size = p->p_vmspace->vm_map.size;
                kp->kp_vm_rssize = vmspace_resident_count(p->p_vmspace);
+               kp->kp_vm_prssize = vmspace_president_count(p->p_vmspace);
                kp->kp_vm_swrss = p->p_vmspace->vm_swrss;
                kp->kp_vm_tsize = p->p_vmspace->vm_tsize;
                kp->kp_vm_dsize = p->p_vmspace->vm_dsize;
index e362aef..916b8cc 100644 (file)
@@ -1777,6 +1777,7 @@ pmap_remove_entry(struct pmap *pmap, vm_page_t m,
        test_m_maps_pv(m, pv);
        TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
        m->md.pv_list_count--;
+        m->object->agg_pv_list_count--;
        if (TAILQ_EMPTY(&m->md.pv_list))
                vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
        TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
@@ -1811,6 +1812,7 @@ pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m)
        TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
        ++pmap->pm_generation;
        m->md.pv_list_count++;
+        m->object->agg_pv_list_count++;
 
        crit_exit();
 }
@@ -2052,6 +2054,7 @@ pmap_remove_all(vm_page_t m)
                TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
                ++pv->pv_pmap->pm_generation;
                m->md.pv_list_count--;
+               m->object->agg_pv_list_count--;
                if (TAILQ_EMPTY(&m->md.pv_list))
                        vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
                pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem, &info);
@@ -2948,6 +2951,7 @@ pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
                save_generation = ++pmap->pm_generation;
 
                m->md.pv_list_count--;
+               m->object->agg_pv_list_count--;
                TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
                if (TAILQ_EMPTY(&m->md.pv_list))
                        vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
index ec41211..6bb1e25 100644 (file)
@@ -2098,6 +2098,7 @@ pmap_remove_entry(struct pmap *pmap, vm_page_t m,
 
        TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
        m->md.pv_list_count--;
+       m->object->agg_pv_list_count--;
        KKASSERT(m->md.pv_list_count >= 0);
        if (TAILQ_EMPTY(&m->md.pv_list))
                vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
@@ -2130,6 +2131,7 @@ pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m)
        TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
        ++pmap->pm_generation;
        m->md.pv_list_count++;
+       m->object->agg_pv_list_count++;
 
        crit_exit();
 }
@@ -2373,6 +2375,7 @@ pmap_remove_all(vm_page_t m)
                TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
                ++pv->pv_pmap->pm_generation;
                m->md.pv_list_count--;
+               m->object->agg_pv_list_count--;
                KKASSERT(m->md.pv_list_count >= 0);
                if (TAILQ_EMPTY(&m->md.pv_list))
                        vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
@@ -3349,6 +3352,7 @@ pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
                save_generation = ++pmap->pm_generation;
 
                m->md.pv_list_count--;
+               m->object->agg_pv_list_count--;
                TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
                if (TAILQ_EMPTY(&m->md.pv_list))
                        vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
index 5e40238..fba3d8b 100644 (file)
@@ -1347,6 +1347,7 @@ pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va)
 
        TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
        m->md.pv_list_count--;
+       m->object->agg_pv_list_count--;
        TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
        if (TAILQ_EMPTY(&m->md.pv_list))
                vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
@@ -1377,6 +1378,7 @@ pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m)
        TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
        ++pmap->pm_generation;
        m->md.pv_list_count++;
+       m->object->agg_pv_list_count++;
 
        crit_exit();
 }
@@ -1615,6 +1617,7 @@ pmap_remove_all(vm_page_t m)
                TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
                ++pv->pv_pmap->pm_generation;
                m->md.pv_list_count--;
+               m->object->agg_pv_list_count--;
                if (TAILQ_EMPTY(&m->md.pv_list))
                        vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
                pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
@@ -2566,6 +2569,7 @@ pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
                save_generation = ++pmap->pm_generation;
 
                m->md.pv_list_count--;
+               m->object->agg_pv_list_count--;
                TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
                if (TAILQ_FIRST(&m->md.pv_list) == NULL)
                        vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
index b29427b..6b5e43a 100644 (file)
@@ -1796,6 +1796,7 @@ pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va)
        if (pv) {
                TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
                m->md.pv_list_count--;
+               m->object->agg_pv_list_count--;
                KKASSERT(m->md.pv_list_count >= 0);
                if (TAILQ_EMPTY(&m->md.pv_list))
                        vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
@@ -1826,6 +1827,7 @@ pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m)
        TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
        TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
        m->md.pv_list_count++;
+       m->object->agg_pv_list_count++;
 
        crit_exit();
 }
@@ -2069,6 +2071,7 @@ pmap_remove_all(vm_page_t m)
                TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
                ++pv->pv_pmap->pm_generation;
                m->md.pv_list_count--;
+               m->object->agg_pv_list_count--;
                KKASSERT(m->md.pv_list_count >= 0);
                if (TAILQ_EMPTY(&m->md.pv_list))
                        vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
@@ -2837,6 +2840,7 @@ pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
                save_generation = ++pmap->pm_generation;
 
                m->md.pv_list_count--;
+               m->object->agg_pv_list_count--;
                TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
                if (TAILQ_EMPTY(&m->md.pv_list))
                        vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
index c547c6d..d5b3409 100644 (file)
@@ -198,6 +198,7 @@ struct kinfo_proc {
        segsz_t         kp_vm_tsize;            /* text size */
        segsz_t         kp_vm_dsize;            /* data size */
        segsz_t         kp_vm_ssize;            /* stack size */
+        u_int          kp_vm_prssize;          /* proportional rss */
 
        int             kp_jailid;
 
index ac325db..eb43d70 100644 (file)
 #ifndef _MACHINE_PMAP_H_
 #include <machine/pmap.h>
 #endif
+#ifndef _VM_VM_OBJECT_H_
+#include <vm/vm_object.h>
+#endif
+#ifndef _SYS_NULL_H_
+#include <sys/_null.h>
+#endif
 
 struct vm_map_rb_tree;
 RB_PROTOTYPE(vm_map_rb_tree, vm_map_entry, rb_entry, rb_vm_map_compare);
@@ -400,6 +406,38 @@ vmspace_resident_count(struct vmspace *vmspace)
        return pmap_resident_count(vmspace_pmap(vmspace));
 }
 
+/* Calculates the proportional RSS and returning the
+ * accrued result.
+ */
+static __inline u_int
+vmspace_president_count(struct vmspace *vmspace)
+{
+       vm_map_t map = &vmspace->vm_map;
+       vm_map_entry_t cur;
+       vm_object_t object;
+       u_int count = 0;
+
+       for (cur = map->header.next; cur != &map->header; cur = cur->next) {
+               switch(cur->maptype) {
+               case VM_MAPTYPE_NORMAL:
+               case VM_MAPTYPE_VPAGETABLE:
+                       if ((object = cur->object.vm_object) == NULL)
+                               break;
+                       if (object->type != OBJT_DEFAULT &&
+                           object->type != OBJT_SWAP) {
+                               break;
+                       }
+                       if(object->agg_pv_list_count != 0) {
+                                       count += (object->resident_page_count / object->agg_pv_list_count);
+                       }
+                       break;
+               default:
+                       break;
+               }
+       }
+       return(count);
+}
+
 /*
  * Number of kernel maps and entries to statically allocate, required
  * during boot to bootstrap the VM system.
index d8a9272..15a2011 100644 (file)
@@ -164,6 +164,7 @@ _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object)
                vm_object_set_flag(object, OBJ_ONEMAPPING);
        object->paging_in_progress = 0;
        object->resident_page_count = 0;
+       object->agg_pv_list_count = 0;
        object->shadow_count = 0;
        object->pg_color = next_index;
        if ( size > (PQ_L2_SIZE / 3 + PQ_PRIME1))
index d20b350..0b20a66 100644 (file)
@@ -161,6 +161,7 @@ struct vm_object {
        u_short unused01;
        int paging_in_progress;         /* Paging (in or out) so don't collapse or destroy */
        int resident_page_count;        /* number of resident pages */
+        u_int agg_pv_list_count;        /* aggregate pv list count */
        struct vm_object *backing_object; /* object that I'm a shadow of */
        vm_ooffset_t backing_object_offset;/* Offset in backing object */
        TAILQ_ENTRY(vm_object) pager_object_list; /* list of all objects of this pager type */
index afe0b01..b43417c 100644 (file)
@@ -440,6 +440,12 @@ vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
         */
        object->resident_page_count++;
 
+       /*
+        * Add the pv_list_cout of the page when its inserted in
+        * the object
+       */
+       object->agg_pv_list_count = object->agg_pv_list_count + m->md.pv_list_count;
+
        /*
         * Since we are inserting a new and possibly dirty page,
         * update the object's OBJ_WRITEABLE and OBJ_MIGHTBEDIRTY flags.
@@ -489,6 +495,7 @@ vm_page_remove(vm_page_t m)
         */
        vm_page_rb_tree_RB_REMOVE(&object->rb_memq, m);
        object->resident_page_count--;
+       object->agg_pv_list_count = object->agg_pv_list_count - m->md.pv_list_count;
        object->generation++;
        m->object = NULL;
 
index ffa789a..f9f1619 100644 (file)
@@ -97,16 +97,17 @@ struct handle {
  */
 
 static char smp_header[] =
-"  PID %-*.*s PRI NICE  SIZE    RES STATE  C   TIME   CTIME   CPU COMMAND";
+"  PID %-*.*s PRI NICE  SIZE    RES   PRES  STATE  C   TIME   CTIME   CPU COMMAND";
 
 #define smp_Proc_format \
-       "%5d %-*.*s %3d %3d%7s %6s %-6.6s %1x%7s %7s %5.2f%% %.*s"
+       "%5d %-*.*s %3d %3d%7s %6s %6s  %-6.6s %1x%7s %7s %5.2f%% %.*s"
 
 static char up_header[] =
-"  PID %-*.*s PRI NICE  SIZE    RES STATE    TIME   CTIME   CPU COMMAND";
+"  PID %-*.*s PRI NICE  SIZE    RES   PRES  STATE    TIME   CTIME   CPU COMMAND";
 
 #define up_Proc_format \
-       "%5d %-*.*s %3d %3d%7s %6s %-6.6s%.0d%7s %7s %5.2f%% %.*s"
+       "%5d %-*.*s %3d %3d%7s %6s %6s  %-6.6s%.0d%7s%7s %5.2f%% %.*s"
+
 
 
 
@@ -183,7 +184,7 @@ static int pageshift;               /* log base 2 of the pagesize */
 
 /* sorting orders. first is default */
 char *ordernames[] = {
-       "cpu", "size", "res", "time", "pri", "thr", "pid", "ctime",  NULL
+  "cpu", "size", "res", "time", "pri", "thr", "pid", "ctime",  "pres", NULL
 };
 
 /* compare routines */
@@ -195,6 +196,7 @@ int compare_ctime (struct kinfo_proc **, struct kinfo_proc **);
 int compare_prio(struct kinfo_proc **, struct kinfo_proc **);
 int compare_thr (struct kinfo_proc **, struct kinfo_proc **);
 int compare_pid (struct kinfo_proc **, struct kinfo_proc **);
+int compare_pres(struct kinfo_proc **, struct kinfo_proc **);
 
 int (*proc_compares[]) (struct kinfo_proc **,struct kinfo_proc **) = {
        proc_compare,
@@ -205,6 +207,7 @@ int (*proc_compares[]) (struct kinfo_proc **,struct kinfo_proc **) = {
        compare_thr,
        compare_pid,
        compare_ctime,
+       compare_pres,
        NULL
 };
 
@@ -361,7 +364,6 @@ get_system_info(struct system_info *si)
                if (cp_time == NULL)
                        err(1, "cp_time");
                cp_old = cp_time + n_cpus;
-
                len = n_cpus * sizeof(cp_old[0]);
                bzero(cp_time, len);
                if (sysctlbyname("kern.cputime", cp_old, &len, NULL, 0))
@@ -628,6 +630,7 @@ format_next_process(caddr_t xhandle, char *(*get_userid) (int))
            (int)xnice,
            format_k(PROCSIZE(pp)),
            format_k(pagetok(VP(pp, rssize))),
+           format_k(pagetok(VP(pp, prssize))),
            status,
            (int)(smpmode ? LP(pp, cpuid) : 0),
            cputime_fmt,
@@ -704,6 +707,9 @@ static unsigned char sorted_state[] =
 #define ORDERKEY_PID \
   if ( (result = PP(p1, pid) - PP(p2, pid)) == 0)
 
+#define ORDERKEY_PRSSIZE \
+  if((result = VP(p2, prssize) - VP(p1, prssize)) == 0)
+
 /* compare_cpu - the comparison function for sorting by cpu percentage */
 
 int
@@ -779,6 +785,32 @@ compare_res(struct kinfo_proc **pp1, struct kinfo_proc **pp2)
        return (result);
 }
 
+/* compare_pres - the comparison function for sorting by proportional resident set size */
+
+int
+compare_pres(struct kinfo_proc **pp1, struct kinfo_proc **pp2)
+{
+       struct kinfo_proc *p1;
+       struct kinfo_proc *p2;
+       int result;
+       pctcpu lresult;
+
+       /* remove one level of indirection */
+       p1 = *(struct kinfo_proc **) pp1;
+       p2 = *(struct kinfo_proc **) pp2;
+
+       ORDERKEY_PRSSIZE
+       ORDERKEY_RSSIZE
+       ORDERKEY_MEM
+       ORDERKEY_PCTCPU
+       ORDERKEY_CPTICKS
+       ORDERKEY_STATE
+       ORDERKEY_PRIO
+       {}
+
+       return (result);
+}
+
 /* compare_time - the comparison function for sorting by total cpu time */
 
 int