kernel - Break up scheduler and loadavg callout
[dragonfly.git] / sys / vm / vm_meter.c
1 /*
2  * (MPSAFE)
3  *
4  * Copyright (c) 1982, 1986, 1989, 1993
5  *      The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  *      @(#)vm_meter.c  8.4 (Berkeley) 1/4/94
32  * $FreeBSD: src/sys/vm/vm_meter.c,v 1.34.2.7 2002/10/10 19:28:22 dillon Exp $
33  * $DragonFly: src/sys/vm/vm_meter.c,v 1.15 2008/04/28 18:04:08 dillon Exp $
34  */
35
36 #include <sys/param.h>
37 #include <sys/proc.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/resource.h>
41 #include <sys/vmmeter.h>
42 #include <sys/kcollect.h>
43
44 #include <vm/vm.h>
45 #include <vm/vm_page.h>
46 #include <vm/vm_extern.h>
47 #include <vm/vm_param.h>
48 #include <sys/lock.h>
49 #include <vm/pmap.h>
50 #include <vm/vm_map.h>
51 #include <vm/vm_object.h>
52 #include <sys/sysctl.h>
53
54 /*
55  * WARNING: vmstats represents the final say, but individual cpu's may
56  *          accumulative adjustments in gd->gd_vmstats_adj.  These are
57  *          synchronized to the global vmstats in hardclock.
58  *
59  *          In addition, most individual cpus check vmstats using a local
60  *          copy of the global vmstats in gd->gd_vmstats.  Hardclock also
61  *          sychronizes the copy.  The pageout code and vm_page_alloc will
62  *          also synchronize their local copies as necessary.
63  *
64  *          Other consumers should not expect perfect values.
65  */
66 __cachealign struct vmstats vmstats;
67
68 static int maxslp = MAXSLP;
69
70 SYSCTL_UINT(_vm, VM_V_FREE_MIN, v_free_min,
71         CTLFLAG_RW, &vmstats.v_free_min, 0,
72         "Minimum number of pages desired free");
73 SYSCTL_UINT(_vm, VM_V_FREE_TARGET, v_free_target,
74         CTLFLAG_RW, &vmstats.v_free_target, 0,
75         "Number of pages desired free");
76 SYSCTL_UINT(_vm, VM_V_FREE_RESERVED, v_free_reserved,
77         CTLFLAG_RW, &vmstats.v_free_reserved, 0,
78         "Number of pages reserved for deadlock");
79 SYSCTL_UINT(_vm, VM_V_INACTIVE_TARGET, v_inactive_target,
80         CTLFLAG_RW, &vmstats.v_inactive_target, 0,
81         "Number of pages desired inactive");
82 SYSCTL_UINT(_vm, VM_V_CACHE_MIN, v_cache_min,
83         CTLFLAG_RW, &vmstats.v_cache_min, 0,
84         "Min number of pages desired on cache queue");
85 SYSCTL_UINT(_vm, VM_V_CACHE_MAX, v_cache_max,
86         CTLFLAG_RW, &vmstats.v_cache_max, 0,
87         "Max number of pages in cached obj");
88 SYSCTL_UINT(_vm, VM_V_PAGEOUT_FREE_MIN, v_pageout_free_min,
89         CTLFLAG_RW, &vmstats.v_pageout_free_min, 0,
90         "Min number pages reserved for kernel");
91 SYSCTL_UINT(_vm, OID_AUTO, v_free_severe,
92         CTLFLAG_RW, &vmstats.v_free_severe, 0, "");
93
94 SYSCTL_STRUCT(_vm, VM_LOADAVG, loadavg, CTLFLAG_RD, 
95     &averunnable, loadavg, "Machine loadaverage history");
96
97 static int do_vmtotal_callback(struct proc *p, void *data);
98
99 /*
100  * No requirements.
101  */
102 static int
103 do_vmtotal(SYSCTL_HANDLER_ARGS)
104 {
105         struct vmtotal total;
106         globaldata_t gd;
107         int n;
108
109         bzero(&total, sizeof(total));
110         for (n = 0; n < ncpus; ++n) {
111                 gd = globaldata_find(n);
112
113                 /* total.t_rq calculated separately */
114                 /* total.t_dw calculated separately */
115                 /* total.t_pw calculated separately */
116                 /* total.t_sl calculated separately */
117                 /* total.t_sw calculated separately */
118                 total.t_vm += gd->gd_vmtotal.t_vm;
119                 total.t_avm += gd->gd_vmtotal.t_avm;
120                 total.t_rm += gd->gd_vmtotal.t_rm;
121                 total.t_arm += gd->gd_vmtotal.t_arm;
122                 total.t_vmshr += gd->gd_vmtotal.t_vmshr;
123                 total.t_avmshr += gd->gd_vmtotal.t_avmshr;
124                 total.t_rmshr += gd->gd_vmtotal.t_rmshr;
125                 total.t_armshr += gd->gd_vmtotal.t_armshr;
126                 /* total.t_free calculated separately */
127         }
128
129         /*
130          * Calculate process statistics.
131          */
132         allproc_scan(do_vmtotal_callback, &total, 0);
133
134         /*
135          * Adjust for sysctl return.  Add real memory into virtual memory.
136          * Set t_free.
137          *
138          * t_rm - Real memory
139          * t_vm - Virtual memory (real + swap)
140          */
141         total.t_vm += total.t_rm;
142         total.t_free = vmstats.v_free_count + vmstats.v_cache_count;
143
144         return (sysctl_handle_opaque(oidp, &total, sizeof(total), req));
145 }
146
147 static int
148 do_vmtotal_callback(struct proc *p, void *data)
149 {
150         struct vmtotal *totalp = data;
151         struct lwp *lp;
152
153         if (p->p_flags & P_SYSTEM)
154                 return(0);
155
156         lwkt_gettoken(&p->p_token);
157
158         FOREACH_LWP_IN_PROC(lp, p) {
159                 switch (lp->lwp_stat) {
160                 case LSSTOP:
161                 case LSSLEEP:
162                         if ((p->p_flags & P_SWAPPEDOUT) == 0) {
163                                 if ((lp->lwp_flags & LWP_SINTR) == 0)
164                                         totalp->t_dw++;
165                                 else if (lp->lwp_slptime < maxslp)
166                                         totalp->t_sl++;
167                         } else if (lp->lwp_slptime < maxslp) {
168                                 totalp->t_sw++;
169                         }
170                         if (lp->lwp_slptime >= maxslp)
171                                 goto out;
172                         break;
173
174                 case LSRUN:
175                         if (p->p_flags & P_SWAPPEDOUT)
176                                 totalp->t_sw++;
177                         else
178                                 totalp->t_rq++;
179                         if (p->p_stat == SIDL)
180                                 goto out;
181                         break;
182
183                 default:
184                         goto out;
185                 }
186
187                 /*
188                  * Set while in vm_fault()
189                  */
190                 if (lp->lwp_flags & LWP_PAGING)
191                         totalp->t_pw++;
192         }
193 out:
194         lwkt_reltoken(&p->p_token);
195         return(0);
196 }
197
198 /*
199  * No requirements.
200  */
201 static int
202 do_vmstats(SYSCTL_HANDLER_ARGS)
203 {
204         struct vmstats vms = vmstats;
205         return (sysctl_handle_opaque(oidp, &vms, sizeof(vms), req));
206 }
207
208 /*
209  * No requirements.
210  */
211 static int
212 do_vmmeter(SYSCTL_HANDLER_ARGS)
213 {
214         int boffset = offsetof(struct vmmeter, vmmeter_uint_begin);
215         int eoffset = offsetof(struct vmmeter, vmmeter_uint_end);
216         struct vmmeter vmm;
217         int i;
218
219         bzero(&vmm, sizeof(vmm));
220         for (i = 0; i < ncpus; ++i) {
221                 int off;
222                 struct globaldata *gd = globaldata_find(i);
223
224                 for (off = boffset; off <= eoffset; off += sizeof(u_int)) {
225                         *(u_int *)((char *)&vmm + off) +=
226                                 *(u_int *)((char *)&gd->gd_cnt + off);
227                 }
228                 
229         }
230         vmm.v_intr += vmm.v_ipi + vmm.v_timer;
231         return (sysctl_handle_opaque(oidp, &vmm, sizeof(vmm), req));
232 }
233
234 /*
235  * vcnt() -     accumulate statistics from the cnt structure for each cpu
236  *
237  *      The vmmeter structure is now per-cpu as well as global.  Those
238  *      statistics which can be kept on a per-cpu basis (to avoid cache
239  *      stalls between cpus) can be moved to the per-cpu vmmeter.  Remaining
240  *      statistics, such as v_free_reserved, are left in the global
241  *      structure.
242  *
243  * (sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req)
244  *
245  * No requirements.
246  */
247 static int
248 vcnt(SYSCTL_HANDLER_ARGS)
249 {
250         int i;
251         int count = 0;
252         int offset = arg2;
253
254         for (i = 0; i < ncpus; ++i) {
255                 struct globaldata *gd = globaldata_find(i);
256                 count += *(int *)((char *)&gd->gd_cnt + offset);
257         }
258         return(SYSCTL_OUT(req, &count, sizeof(int)));
259 }
260
261 /*
262  * No requirements.
263  */
264 static int
265 vcnt_intr(SYSCTL_HANDLER_ARGS)
266 {
267         int i;
268         int count = 0;
269
270         for (i = 0; i < ncpus; ++i) {
271                 struct globaldata *gd = globaldata_find(i);
272
273                 count += gd->gd_cnt.v_intr + gd->gd_cnt.v_ipi +
274                          gd->gd_cnt.v_timer;
275         }
276         return(SYSCTL_OUT(req, &count, sizeof(int)));
277 }
278
279 #define VMMETEROFF(var) offsetof(struct vmmeter, var)
280
281 SYSCTL_PROC(_vm, OID_AUTO, vmtotal, CTLTYPE_OPAQUE|CTLFLAG_RD,
282     0, sizeof(struct vmtotal), do_vmtotal, "S,vmtotal", 
283     "System virtual memory aggregate");
284 SYSCTL_PROC(_vm, OID_AUTO, vmstats, CTLTYPE_OPAQUE|CTLFLAG_RD,
285     0, sizeof(struct vmstats), do_vmstats, "S,vmstats", 
286     "System virtual memory statistics");
287 SYSCTL_PROC(_vm, OID_AUTO, vmmeter, CTLTYPE_OPAQUE|CTLFLAG_RD,
288     0, sizeof(struct vmmeter), do_vmmeter, "S,vmmeter", 
289     "System statistics");
290 SYSCTL_NODE(_vm, OID_AUTO, stats, CTLFLAG_RW, 0, "VM meter stats");
291 SYSCTL_NODE(_vm_stats, OID_AUTO, sys, CTLFLAG_RW, 0, "VM meter sys stats");
292 SYSCTL_NODE(_vm_stats, OID_AUTO, vm, CTLFLAG_RW, 0, "VM meter vm stats");
293 SYSCTL_NODE(_vm_stats, OID_AUTO, misc, CTLFLAG_RW, 0, "VM meter misc stats");
294
295 SYSCTL_PROC(_vm_stats_sys, OID_AUTO, v_swtch, CTLTYPE_UINT|CTLFLAG_RD,
296         0, VMMETEROFF(v_swtch), vcnt, "IU", "Context switches");
297 SYSCTL_PROC(_vm_stats_sys, OID_AUTO, v_intrans_coll, CTLTYPE_UINT|CTLFLAG_RD,
298         0, VMMETEROFF(v_intrans_coll), vcnt, "IU", "Intransit map collisions (total)");
299 SYSCTL_PROC(_vm_stats_sys, OID_AUTO, v_intrans_wait, CTLTYPE_UINT|CTLFLAG_RD,
300         0, VMMETEROFF(v_intrans_wait), vcnt, "IU", "Intransit map collisions which blocked");
301 SYSCTL_PROC(_vm_stats_sys, OID_AUTO, v_forwarded_ints, CTLTYPE_UINT|CTLFLAG_RD,
302         0, VMMETEROFF(v_forwarded_ints), vcnt, "IU", "Forwarded interrupts due to MP lock");
303 SYSCTL_PROC(_vm_stats_sys, OID_AUTO, v_forwarded_hits, CTLTYPE_UINT|CTLFLAG_RD,
304         0, VMMETEROFF(v_forwarded_hits), vcnt, "IU", "Forwarded hits due to MP lock");
305 SYSCTL_PROC(_vm_stats_sys, OID_AUTO, v_forwarded_misses, CTLTYPE_UINT|CTLFLAG_RD,
306         0, VMMETEROFF(v_forwarded_misses), vcnt, "IU", "Forwarded misses due to MP lock");
307 SYSCTL_PROC(_vm_stats_sys, OID_AUTO, v_trap, CTLTYPE_UINT|CTLFLAG_RD,
308         0, VMMETEROFF(v_trap), vcnt, "IU", "Traps");
309 SYSCTL_PROC(_vm_stats_sys, OID_AUTO, v_syscall, CTLTYPE_UINT|CTLFLAG_RD,
310         0, VMMETEROFF(v_syscall), vcnt, "IU", "Syscalls");
311 SYSCTL_PROC(_vm_stats_sys, OID_AUTO, v_intr, CTLTYPE_UINT|CTLFLAG_RD,
312         0, VMMETEROFF(v_intr), vcnt_intr, "IU", "Hardware interrupts");
313 SYSCTL_PROC(_vm_stats_sys, OID_AUTO, v_ipi, CTLTYPE_UINT|CTLFLAG_RD,
314         0, VMMETEROFF(v_ipi), vcnt, "IU", "Inter-processor interrupts");
315 SYSCTL_PROC(_vm_stats_sys, OID_AUTO, v_timer, CTLTYPE_UINT|CTLFLAG_RD,
316         0, VMMETEROFF(v_timer), vcnt, "IU", "LAPIC timer interrupts");
317 SYSCTL_PROC(_vm_stats_sys, OID_AUTO, v_soft, CTLTYPE_UINT|CTLFLAG_RD,
318         0, VMMETEROFF(v_soft), vcnt, "IU", "Software interrupts");
319 SYSCTL_PROC(_vm_stats_vm, OID_AUTO, v_vm_faults, CTLTYPE_UINT|CTLFLAG_RD,
320         0, VMMETEROFF(v_vm_faults), vcnt, "IU", "VM faults");
321 SYSCTL_PROC(_vm_stats_vm, OID_AUTO, v_cow_faults, CTLTYPE_UINT|CTLFLAG_RD,
322         0, VMMETEROFF(v_cow_faults), vcnt, "IU", "COW faults");
323 SYSCTL_PROC(_vm_stats_vm, OID_AUTO, v_cow_optim, CTLTYPE_UINT|CTLFLAG_RD,
324         0, VMMETEROFF(v_cow_optim), vcnt, "IU", "Optimized COW faults");
325 SYSCTL_PROC(_vm_stats_vm, OID_AUTO, v_zfod, CTLTYPE_UINT|CTLFLAG_RD,
326         0, VMMETEROFF(v_zfod), vcnt, "IU", "Zero fill");
327 SYSCTL_PROC(_vm_stats_vm, OID_AUTO, v_ozfod, CTLTYPE_UINT|CTLFLAG_RD,
328         0, VMMETEROFF(v_ozfod), vcnt, "IU", "Optimized zero fill");
329 SYSCTL_PROC(_vm_stats_vm, OID_AUTO, v_swapin, CTLTYPE_UINT|CTLFLAG_RD,
330         0, VMMETEROFF(v_swapin), vcnt, "IU", "Swapin operations");
331 SYSCTL_PROC(_vm_stats_vm, OID_AUTO, v_swapout, CTLTYPE_UINT|CTLFLAG_RD,
332         0, VMMETEROFF(v_swapout), vcnt, "IU", "Swapout operations");
333 SYSCTL_PROC(_vm_stats_vm, OID_AUTO, v_swappgsin, CTLTYPE_UINT|CTLFLAG_RD,
334         0, VMMETEROFF(v_swappgsin), vcnt, "IU", "Swapin pages");
335 SYSCTL_PROC(_vm_stats_vm, OID_AUTO, v_swappgsout, CTLTYPE_UINT|CTLFLAG_RD,
336         0, VMMETEROFF(v_swappgsout), vcnt, "IU", "Swapout pages");
337 SYSCTL_PROC(_vm_stats_vm, OID_AUTO, v_vnodein, CTLTYPE_UINT|CTLFLAG_RD,
338         0, VMMETEROFF(v_vnodein), vcnt, "IU", "Vnodein operations");
339 SYSCTL_PROC(_vm_stats_vm, OID_AUTO, v_vnodeout, CTLTYPE_UINT|CTLFLAG_RD,
340         0, VMMETEROFF(v_vnodeout), vcnt, "IU", "Vnodeout operations");
341 SYSCTL_PROC(_vm_stats_vm, OID_AUTO, v_vnodepgsin, CTLTYPE_UINT|CTLFLAG_RD,
342         0, VMMETEROFF(v_vnodepgsin), vcnt, "IU", "Vnodein pages");
343 SYSCTL_PROC(_vm_stats_vm, OID_AUTO, v_vnodepgsout, CTLTYPE_UINT|CTLFLAG_RD,
344         0, VMMETEROFF(v_vnodepgsout), vcnt, "IU", "Vnodeout pages");
345 SYSCTL_PROC(_vm_stats_vm, OID_AUTO, v_intrans, CTLTYPE_UINT|CTLFLAG_RD,
346         0, VMMETEROFF(v_intrans), vcnt, "IU", "In transit page blocking");
347 SYSCTL_PROC(_vm_stats_vm, OID_AUTO, v_reactivated, CTLTYPE_UINT|CTLFLAG_RD,
348         0, VMMETEROFF(v_reactivated), vcnt, "IU", "Reactivated pages");
349 SYSCTL_PROC(_vm_stats_vm, OID_AUTO, v_pdwakeups, CTLTYPE_UINT|CTLFLAG_RD,
350         0, VMMETEROFF(v_pdwakeups), vcnt, "IU", "Pagedaemon wakeups");
351 SYSCTL_PROC(_vm_stats_vm, OID_AUTO, v_ppwakeups, CTLTYPE_UINT|CTLFLAG_RD,
352         0, VMMETEROFF(v_ppwakeups), vcnt, "IU", "vm_wait wakeups");
353 SYSCTL_PROC(_vm_stats_vm, OID_AUTO, v_pdpages, CTLTYPE_UINT|CTLFLAG_RD,
354         0, VMMETEROFF(v_pdpages), vcnt, "IU", "Pagedaemon page scans");
355 SYSCTL_PROC(_vm_stats_vm, OID_AUTO, v_dfree, CTLTYPE_UINT|CTLFLAG_RD,
356         0, VMMETEROFF(v_dfree), vcnt, "IU", "Pages freed by daemon");
357 SYSCTL_PROC(_vm_stats_vm, OID_AUTO, v_pfree, CTLTYPE_UINT|CTLFLAG_RD,
358         0, VMMETEROFF(v_pfree), vcnt, "IU", "Pages freed by exiting processes");
359 SYSCTL_PROC(_vm_stats_vm, OID_AUTO, v_tfree, CTLTYPE_UINT|CTLFLAG_RD,
360         0, VMMETEROFF(v_tfree), vcnt, "IU", "Total pages freed");
361 SYSCTL_PROC(_vm_stats_vm, OID_AUTO, v_forks, CTLTYPE_UINT|CTLFLAG_RD,
362         0, VMMETEROFF(v_forks), vcnt, "IU", "Number of fork() calls");
363 SYSCTL_PROC(_vm_stats_vm, OID_AUTO, v_vforks, CTLTYPE_UINT|CTLFLAG_RD,
364         0, VMMETEROFF(v_vforks), vcnt, "IU", "Number of vfork() calls");
365 SYSCTL_PROC(_vm_stats_vm, OID_AUTO, v_rforks, CTLTYPE_UINT|CTLFLAG_RD,
366         0, VMMETEROFF(v_rforks), vcnt, "IU", "Number of rfork() calls");
367 SYSCTL_PROC(_vm_stats_vm, OID_AUTO, v_kthreads, CTLTYPE_UINT|CTLFLAG_RD,
368         0, VMMETEROFF(v_kthreads), vcnt, "IU", "Number of fork() calls by kernel");
369 SYSCTL_PROC(_vm_stats_vm, OID_AUTO, v_forkpages, CTLTYPE_UINT|CTLFLAG_RD,
370         0, VMMETEROFF(v_forkpages), vcnt, "IU", "VM pages affected by fork()");
371 SYSCTL_PROC(_vm_stats_vm, OID_AUTO, v_vforkpages, CTLTYPE_UINT|CTLFLAG_RD,
372         0, VMMETEROFF(v_vforkpages), vcnt, "IU", "VM pages affected by vfork()");
373 SYSCTL_PROC(_vm_stats_vm, OID_AUTO, v_rforkpages, CTLTYPE_UINT|CTLFLAG_RD,
374         0, VMMETEROFF(v_rforkpages), vcnt, "IU", "VM pages affected by rfork()");
375 SYSCTL_PROC(_vm_stats_vm, OID_AUTO, v_kthreadpages, CTLTYPE_UINT|CTLFLAG_RD,
376         0, VMMETEROFF(v_kthreadpages), vcnt, "IU", "VM pages affected by fork() by kernel");
377
378 SYSCTL_UINT(_vm_stats_vm, OID_AUTO,
379         v_page_size, CTLFLAG_RD, &vmstats.v_page_size, 0,
380         "Page size in bytes");
381 SYSCTL_UINT(_vm_stats_vm, OID_AUTO,
382         v_page_count, CTLFLAG_RD, &vmstats.v_page_count, 0, 
383         "Total number of pages in system");
384 SYSCTL_UINT(_vm_stats_vm, OID_AUTO,
385         v_free_reserved, CTLFLAG_RD, &vmstats.v_free_reserved, 0,
386         "Number of pages reserved for deadlock");
387 SYSCTL_UINT(_vm_stats_vm, OID_AUTO,
388         v_free_target, CTLFLAG_RD, &vmstats.v_free_target, 0,
389         "Number of pages desired free");
390 SYSCTL_UINT(_vm_stats_vm, OID_AUTO,
391         v_free_min, CTLFLAG_RD, &vmstats.v_free_min, 0,
392         "Minimum number of pages desired free");
393 SYSCTL_UINT(_vm_stats_vm, OID_AUTO,
394         v_free_count, CTLFLAG_RD, &vmstats.v_free_count, 0,
395         "Number of pages free");
396 SYSCTL_UINT(_vm_stats_vm, OID_AUTO,
397         v_wire_count, CTLFLAG_RD, &vmstats.v_wire_count, 0,
398         "Number of pages wired down");
399 SYSCTL_UINT(_vm_stats_vm, OID_AUTO,
400         v_active_count, CTLFLAG_RD, &vmstats.v_active_count, 0,
401         "Number of pages active");
402 SYSCTL_UINT(_vm_stats_vm, OID_AUTO,
403         v_inactive_target, CTLFLAG_RD, &vmstats.v_inactive_target, 0,
404         "Number of pages desired inactive");
405 SYSCTL_UINT(_vm_stats_vm, OID_AUTO,
406         v_inactive_count, CTLFLAG_RD, &vmstats.v_inactive_count, 0,
407         "Number of pages inactive");
408 SYSCTL_UINT(_vm_stats_vm, OID_AUTO,
409         v_cache_count, CTLFLAG_RD, &vmstats.v_cache_count, 0,
410         "Number of pages on buffer cache queue");
411 SYSCTL_UINT(_vm_stats_vm, OID_AUTO,
412         v_cache_min, CTLFLAG_RD, &vmstats.v_cache_min, 0,
413         "Min number of pages desired on cache queue");
414 SYSCTL_UINT(_vm_stats_vm, OID_AUTO,
415         v_cache_max, CTLFLAG_RD, &vmstats.v_cache_max, 0,
416         "Max number of pages in cached obj");
417 SYSCTL_UINT(_vm_stats_vm, OID_AUTO,
418         v_pageout_free_min, CTLFLAG_RD, &vmstats.v_pageout_free_min, 0,
419         "Min number pages reserved for kernel");
420 SYSCTL_UINT(_vm_stats_vm, OID_AUTO,
421         v_interrupt_free_min, CTLFLAG_RD, &vmstats.v_interrupt_free_min, 0,
422         "Reserved number of pages for int code");
423
424 /*
425  * No requirements.
426  */
427 static int
428 do_vmmeter_pcpu(SYSCTL_HANDLER_ARGS)
429 {
430         int boffset = offsetof(struct vmmeter, vmmeter_uint_begin);
431         int eoffset = offsetof(struct vmmeter, vmmeter_uint_end);
432         struct globaldata *gd = arg1;
433         struct vmmeter vmm;
434         int off;
435
436         bzero(&vmm, sizeof(vmm));
437         for (off = boffset; off <= eoffset; off += sizeof(u_int)) {
438                 *(u_int *)((char *)&vmm + off) +=
439                         *(u_int *)((char *)&gd->gd_cnt + off);
440         }
441         vmm.v_intr += vmm.v_ipi + vmm.v_timer;
442         return (sysctl_handle_opaque(oidp, &vmm, sizeof(vmm), req));
443 }
444
445 /*
446  * Callback for long-term slow data collection on 10-second interval.
447  *
448  * Return faults, set data for other entries.
449  */
450 #define PTOB(value)     ((uint64_t)(value) << PAGE_SHIFT)
451
452 static uint64_t
453 collect_vmstats_callback(int n)
454 {
455         static struct vmmeter last_vmm;
456         struct vmmeter cur_vmm;
457         const int boffset = offsetof(struct vmmeter, vmmeter_uint_begin);
458         const int eoffset = offsetof(struct vmmeter, vmmeter_uint_end);
459         uint64_t total;
460
461         /*
462          * The hardclock already rolls up vmstats for us.
463          */
464         kcollect_setvalue(KCOLLECT_MEMFRE, PTOB(vmstats.v_free_count));
465         kcollect_setvalue(KCOLLECT_MEMCAC, PTOB(vmstats.v_cache_count));
466         kcollect_setvalue(KCOLLECT_MEMINA, PTOB(vmstats.v_inactive_count));
467         kcollect_setvalue(KCOLLECT_MEMACT, PTOB(vmstats.v_active_count));
468         kcollect_setvalue(KCOLLECT_MEMWIR, PTOB(vmstats.v_wire_count));
469
470         /*
471          * Collect pcpu statistics for things like faults.
472          */
473         bzero(&cur_vmm, sizeof(cur_vmm));
474         for (n = 0; n < ncpus; ++n) {
475                 struct globaldata *gd = globaldata_find(n);
476                 int off;
477
478                 for (off = boffset; off <= eoffset; off += sizeof(u_int)) {
479                         *(u_int *)((char *)&cur_vmm + off) +=
480                                 *(u_int *)((char *)&gd->gd_cnt + off);
481                 }
482
483         }
484
485         total = cur_vmm.v_cow_faults - last_vmm.v_cow_faults;
486         last_vmm.v_cow_faults = cur_vmm.v_cow_faults;
487         kcollect_setvalue(KCOLLECT_COWFAULT, total);
488
489         total = cur_vmm.v_zfod - last_vmm.v_zfod;
490         last_vmm.v_zfod = cur_vmm.v_zfod;
491         kcollect_setvalue(KCOLLECT_ZFILL, total);
492
493         total = cur_vmm.v_syscall - last_vmm.v_syscall;
494         last_vmm.v_syscall = cur_vmm.v_syscall;
495         kcollect_setvalue(KCOLLECT_SYSCALLS, total);
496
497         total = cur_vmm.v_intr - last_vmm.v_intr;
498         last_vmm.v_intr = cur_vmm.v_intr;
499         kcollect_setvalue(KCOLLECT_INTR, total);
500
501         total = cur_vmm.v_ipi - last_vmm.v_ipi;
502         last_vmm.v_ipi = cur_vmm.v_ipi;
503         kcollect_setvalue(KCOLLECT_IPI, total);
504
505         total = cur_vmm.v_timer - last_vmm.v_timer;
506         last_vmm.v_timer = cur_vmm.v_timer;
507         kcollect_setvalue(KCOLLECT_TIMER, total);
508
509         total = cur_vmm.v_vm_faults - last_vmm.v_vm_faults;
510         last_vmm.v_vm_faults = cur_vmm.v_vm_faults;
511
512         return total;
513 }
514
515 /*
516  * Called from the low level boot code only.
517  */
518 static void
519 vmmeter_init(void *dummy __unused)
520 {
521         int i;
522
523         for (i = 0; i < ncpus; ++i) {
524                 struct sysctl_ctx_list *ctx;
525                 struct sysctl_oid *oid;
526                 struct globaldata *gd;
527                 char name[32];
528
529                 ksnprintf(name, sizeof(name), "cpu%d", i);
530
531                 ctx = kmalloc(sizeof(*ctx), M_TEMP, M_WAITOK);
532                 sysctl_ctx_init(ctx);
533                 oid = SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(_vm),
534                                       OID_AUTO, name, CTLFLAG_RD, 0, "");
535
536                 gd = globaldata_find(i);
537                 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
538                                 "vmmeter", CTLTYPE_OPAQUE|CTLFLAG_RD,
539                                 gd, sizeof(struct vmmeter), do_vmmeter_pcpu,
540                                 "S,vmmeter", "System per-cpu statistics");
541         }
542         kcollect_register(KCOLLECT_VMFAULT, "fault", collect_vmstats_callback,
543                           KCOLLECT_SCALE(KCOLLECT_VMFAULT_FORMAT, 0));
544         kcollect_register(KCOLLECT_COWFAULT, "cow", NULL,
545                           KCOLLECT_SCALE(KCOLLECT_COWFAULT_FORMAT, 0));
546         kcollect_register(KCOLLECT_ZFILL, "zfill", NULL,
547                           KCOLLECT_SCALE(KCOLLECT_ZFILL_FORMAT, 0));
548
549         kcollect_register(KCOLLECT_MEMFRE, "free", NULL,
550                           KCOLLECT_SCALE(KCOLLECT_MEMFRE_FORMAT,
551                                          PTOB(vmstats.v_page_count)));
552         kcollect_register(KCOLLECT_MEMCAC, "cache", NULL,
553                           KCOLLECT_SCALE(KCOLLECT_MEMCAC_FORMAT,
554                                          PTOB(vmstats.v_page_count)));
555         kcollect_register(KCOLLECT_MEMINA, "inact", NULL,
556                           KCOLLECT_SCALE(KCOLLECT_MEMINA_FORMAT,
557                                          PTOB(vmstats.v_page_count)));
558         kcollect_register(KCOLLECT_MEMACT, "act", NULL,
559                           KCOLLECT_SCALE(KCOLLECT_MEMACT_FORMAT,
560                                          PTOB(vmstats.v_page_count)));
561         kcollect_register(KCOLLECT_MEMWIR, "wired", NULL,
562                           KCOLLECT_SCALE(KCOLLECT_MEMWIR_FORMAT,
563                                          PTOB(vmstats.v_page_count)));
564
565         kcollect_register(KCOLLECT_SYSCALLS, "syscalls", NULL,
566                           KCOLLECT_SCALE(KCOLLECT_SYSCALLS_FORMAT, 0));
567
568         kcollect_register(KCOLLECT_INTR, "intr", NULL,
569                           KCOLLECT_SCALE(KCOLLECT_INTR_FORMAT, 0));
570         kcollect_register(KCOLLECT_IPI, "ipi", NULL,
571                           KCOLLECT_SCALE(KCOLLECT_IPI_FORMAT, 0));
572         kcollect_register(KCOLLECT_TIMER, "timer", NULL,
573                           KCOLLECT_SCALE(KCOLLECT_TIMER_FORMAT, 0));
574 }
575 SYSINIT(vmmeter, SI_SUB_PSEUDO, SI_ORDER_ANY, vmmeter_init, 0);
576
577 /*
578  * Rolls up accumulated pcpu adjustments to vmstats counts into the global
579  * structure, copy the global structure into our pcpu structure.  Critical
580  * path checks will use our pcpu structure.
581  *
582  * This is somewhat expensive and only called when needed, and by the
583  * hardclock.
584  */
585 void
586 vmstats_rollup(void)
587 {
588         int cpu;
589
590         for (cpu = 0; cpu < ncpus; ++cpu) {
591                 vmstats_rollup_cpu(globaldata_find(cpu));
592         }
593         mycpu->gd_vmstats = vmstats;
594 }
595
596 void
597 vmstats_rollup_cpu(globaldata_t gd)
598 {
599         int value;
600
601         if (gd->gd_vmstats_adj.v_free_count) {
602                 value = atomic_swap_int(&gd->gd_vmstats_adj.v_free_count, 0);
603                 atomic_add_int(&vmstats.v_free_count, value);
604         }
605         if (gd->gd_vmstats_adj.v_cache_count) {
606                 value = atomic_swap_int(&gd->gd_vmstats_adj.v_cache_count, 0);
607                 atomic_add_int(&vmstats.v_cache_count, value);
608         }
609         if (gd->gd_vmstats_adj.v_inactive_count) {
610                 value =atomic_swap_int(&gd->gd_vmstats_adj.v_inactive_count, 0);
611                 atomic_add_int(&vmstats.v_inactive_count, value);
612         }
613         if (gd->gd_vmstats_adj.v_active_count) {
614                 value = atomic_swap_int(&gd->gd_vmstats_adj.v_active_count, 0);
615                 atomic_add_int(&vmstats.v_active_count, value);
616         }
617         if (gd->gd_vmstats_adj.v_wire_count) {
618                 value = atomic_swap_int(&gd->gd_vmstats_adj.v_wire_count, 0);
619                 atomic_add_int(&vmstats.v_wire_count, value);
620         }
621 }