kernel - Fix up hysteresis for the zeroidle code
[dragonfly.git] / sys / vm / vm_zeroidle.c
1 /*
2  * Copyright (c) 1994 John Dyson
3  * Copyright (c) 2001 Matt Dillon
4  * Copyright (c) 2010 The DragonFly Project
5  *
6  * All Rights Reserved.
7  *
8  * This code is derived from software contributed to The DragonFly Project
9  * by Venkatesh Srinivas <me@endeavour.zapto.org>
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
24  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
25  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
27  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
29  * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
31  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  *
35  *      from: @(#)vm_machdep.c  7.3 (Berkeley) 5/13/91
36  *      Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
37  * from FreeBSD: .../i386/vm_machdep.c,v 1.165 2001/07/04 23:27:04 dillon
38  */
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/kernel.h>
43 #include <sys/proc.h>
44 #include <sys/vmmeter.h>
45 #include <sys/sched.h>
46 #include <sys/sysctl.h>
47 #include <sys/thread.h>
48 #include <sys/thread2.h>
49 #include <sys/kthread.h>
50 #include <sys/mplock2.h>
51 #include <sys/unistd.h>
52 #include <vm/vm.h>
53 #include <vm/vm_page.h>
54 #include <cpu/lwbuf.h>
55
56 /*
57  * Implement the pre-zeroed page mechanism.
58  */
59 #define ZIDLE_LO(v)     ((v) * 2 / 3)
60 #define ZIDLE_HI(v)     ((v) * 4 / 5)
61
62 /* Number of bytes to zero between reschedule checks */
63 #define IDLEZERO_RUN    (32)
64
65 /* Maximum number of pages per second to zero */
66 #define NPAGES_RUN      (20000)
67
68 static int idlezero_enable = 0;
69 TUNABLE_INT("vm.idlezero_enable", &idlezero_enable);
70 SYSCTL_INT(_vm, OID_AUTO, idlezero_enable, CTLFLAG_RW, &idlezero_enable, 0,
71            "Allow the kernel to use idle CPU cycles to zero pages");
72 static int idlezero_rate = NPAGES_RUN;
73 SYSCTL_INT(_vm, OID_AUTO, idlezero_rate, CTLFLAG_RW, &idlezero_rate, 0,
74            "Maximum pages per second to zero");
75 static int idlezero_nocache = 0;
76 SYSCTL_INT(_vm, OID_AUTO, idlezero_nocache, CTLFLAG_RW, &idlezero_nocache, 0,
77            "Maximum pages per second to zero");
78
79 static int idlezero_count = 0;
80 SYSCTL_INT(_vm, OID_AUTO, idlezero_count, CTLFLAG_RD, &idlezero_count, 0,
81            "The number of physical pages prezeroed at idle time");
82
83 enum zeroidle_state {
84         STATE_IDLE,
85         STATE_GET_PAGE,
86         STATE_ZERO_PAGE,
87         STATE_RELEASE_PAGE
88 };
89
90 static int zero_state;
91
92 /*
93  * Attempt to maintain approximately 1/2 of our free pages in a
94  * PG_ZERO'd state. Add some hysteresis to (attempt to) avoid
95  * generally zeroing a page when the system is near steady-state.
96  * Otherwise we might get 'flutter' during disk I/O / IPC or
97  * fast sleeps. We also do not want to be continuously zeroing
98  * pages because doing so may flush our L1 and L2 caches too much.
99  *
100  * Returns non-zero if pages should be zerod.
101  */
102 static int
103 vm_page_zero_check(void)
104 {
105         if (idlezero_enable == 0)
106                 return (0);
107         if (zero_state == 0) {
108                 /*
109                  * Wait for the count to fall to LO before starting
110                  * to zero pages.
111                  */
112                 if (vm_page_zero_count <= ZIDLE_LO(vmstats.v_free_count))
113                         zero_state = 1;
114         } else {
115                 /*
116                  * Once we are zeroing pages wait for the count to
117                  * increase to HI before we stop zeroing pages.
118                  */
119                 if (vm_page_zero_count >= ZIDLE_HI(vmstats.v_free_count))
120                         zero_state = 0;
121         }
122         return (zero_state);
123 }
124
125 static void
126 vm_pagezero(void __unused *arg)
127 {
128         vm_page_t m = NULL;
129         struct lwbuf *buf = NULL;
130         enum zeroidle_state state = STATE_IDLE;
131         char *pg = NULL;
132         int npages = 0;
133         int i = 0;
134
135         /*
136          * Adjust thread parameters before entering our loop.  The thread
137          * is started with the MP lock held and with normal kernel thread
138          * priority.
139          *
140          * Also put us on the last cpu for now.
141          */
142         rel_mplock();
143         lwkt_setpri_self(TDPRI_IDLE_WORK);
144         lwkt_setcpu_self(globaldata_find(ncpus - 1));
145
146         /*
147          * Loop forever
148          */
149         for (;;) {
150                 switch(state) {
151                 case STATE_IDLE:
152                         /*
153                          * Wait for work.
154                          */
155                         tsleep(&zero_state, 0, "pgzero", hz / 10);
156                         if (vm_page_zero_check())
157                                 npages = idlezero_rate / 10;
158                         if (npages)
159                                 state = STATE_GET_PAGE; /* Fallthrough */
160                         break;
161                 case STATE_GET_PAGE:
162                         /*
163                          * Acquire page to zero
164                          */
165                         if (try_mplock() == 0) {
166                                 state = STATE_IDLE;
167                         } else if (--npages == 0) {
168                                 state = STATE_IDLE;
169                                 rel_mplock();
170                         } else {
171                                 m = vm_page_free_fromq_fast();
172                                 if (m == NULL) {
173                                         state = STATE_IDLE;
174                                 } else {
175                                         state = STATE_ZERO_PAGE;
176                                         buf = lwbuf_alloc(m);
177                                         pg = (char *)lwbuf_kva(buf);
178                                         i = 0;
179                                 }
180                                 rel_mplock();
181                         }
182                         break;
183                 case STATE_ZERO_PAGE:
184                         /*
185                          * Zero-out the page, stop immediately if a
186                          * resched has been requested.
187                          */
188                         while (i < PAGE_SIZE) {
189                                 if (lwkt_check_resched(curthread))
190                                         break;
191                                 if (idlezero_nocache == 1)
192                                         bzeront(&pg[i], IDLEZERO_RUN);
193                                 else
194                                         bzero(&pg[i], IDLEZERO_RUN);
195                                 i += IDLEZERO_RUN;
196                         }
197                         if (i == PAGE_SIZE)
198                                 state = STATE_RELEASE_PAGE;
199                         break;
200                 case STATE_RELEASE_PAGE:
201                         if (try_mplock()) {
202                                 lwbuf_free(buf);
203                                 vm_page_flag_set(m, PG_ZERO);
204                                 vm_page_free_toq(m);
205                                 state = STATE_GET_PAGE;
206                                 ++idlezero_count;
207                                 rel_mplock();
208                         }
209                         break;
210                 }
211                 lwkt_switch();
212         }
213 }
214
215 static void
216 pagezero_start(void __unused *arg)
217 {
218         int error;
219         struct thread *td;
220
221         error = kthread_create(vm_pagezero, NULL, &td, "pagezero");
222         if (error)
223                 panic("pagezero_start: error %d\n", error);
224 }
225
226 SYSINIT(pagezero, SI_SUB_KTHREAD_VM, SI_ORDER_ANY, pagezero_start, NULL);