kernel - Optimize the x86-64 lwbuf API
[dragonfly.git] / sys / vm / vm_zeroidle.c
CommitLineData
bb6811be 1/*
0ac0a48a
MD
2 * (MPSAFE)
3 *
bb6811be
MD
4 * Copyright (c) 1994 John Dyson
5 * Copyright (c) 2001 Matt Dillon
6 * Copyright (c) 2010 The DragonFly Project
7 *
8 * All Rights Reserved.
9 *
10 * This code is derived from software contributed to The DragonFly Project
11 * by Venkatesh Srinivas <me@endeavour.zapto.org>
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
26 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
27 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
29 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
31 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
33 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91
38 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
39 * from FreeBSD: .../i386/vm_machdep.c,v 1.165 2001/07/04 23:27:04 dillon
bb6811be
MD
40 */
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/kernel.h>
45#include <sys/proc.h>
46#include <sys/vmmeter.h>
47#include <sys/sched.h>
48#include <sys/sysctl.h>
49#include <sys/thread.h>
bb6811be 50#include <sys/kthread.h>
bb6811be
MD
51#include <sys/unistd.h>
52#include <vm/vm.h>
53#include <vm/vm_page.h>
54#include <cpu/lwbuf.h>
55
cd8ab232
MD
56#include <sys/thread2.h>
57#include <sys/mplock2.h>
58
bb6811be
MD
59/*
60 * Implement the pre-zeroed page mechanism.
61 */
62#define ZIDLE_LO(v) ((v) * 2 / 3)
63#define ZIDLE_HI(v) ((v) * 4 / 5)
64
65/* Number of bytes to zero between reschedule checks */
8787825a 66#define IDLEZERO_RUN (64)
bb6811be
MD
67
68/* Maximum number of pages per second to zero */
69#define NPAGES_RUN (20000)
70
e6b9120d 71static int idlezero_enable = 1;
bb6811be
MD
72TUNABLE_INT("vm.idlezero_enable", &idlezero_enable);
73SYSCTL_INT(_vm, OID_AUTO, idlezero_enable, CTLFLAG_RW, &idlezero_enable, 0,
74 "Allow the kernel to use idle CPU cycles to zero pages");
75static int idlezero_rate = NPAGES_RUN;
76SYSCTL_INT(_vm, OID_AUTO, idlezero_rate, CTLFLAG_RW, &idlezero_rate, 0,
77 "Maximum pages per second to zero");
d87e79b9 78static int idlezero_nocache = -1;
bb6811be
MD
79SYSCTL_INT(_vm, OID_AUTO, idlezero_nocache, CTLFLAG_RW, &idlezero_nocache, 0,
80 "Maximum pages per second to zero");
81
82static int idlezero_count = 0;
83SYSCTL_INT(_vm, OID_AUTO, idlezero_count, CTLFLAG_RD, &idlezero_count, 0,
84 "The number of physical pages prezeroed at idle time");
85
86enum zeroidle_state {
87 STATE_IDLE,
88 STATE_GET_PAGE,
89 STATE_ZERO_PAGE,
90 STATE_RELEASE_PAGE
91};
92
62b382d3
VS
93#define DEFAULT_SLEEP_TIME (hz / 10)
94#define LONG_SLEEP_TIME (hz * 10)
95
bb6811be
MD
96static int zero_state;
97
98/*
99 * Attempt to maintain approximately 1/2 of our free pages in a
100 * PG_ZERO'd state. Add some hysteresis to (attempt to) avoid
101 * generally zeroing a page when the system is near steady-state.
102 * Otherwise we might get 'flutter' during disk I/O / IPC or
103 * fast sleeps. We also do not want to be continuously zeroing
104 * pages because doing so may flush our L1 and L2 caches too much.
a863c5ec
MD
105 *
106 * Returns non-zero if pages should be zerod.
bb6811be
MD
107 */
108static int
109vm_page_zero_check(void)
110{
111 if (idlezero_enable == 0)
112 return (0);
a863c5ec
MD
113 if (zero_state == 0) {
114 /*
115 * Wait for the count to fall to LO before starting
116 * to zero pages.
117 */
118 if (vm_page_zero_count <= ZIDLE_LO(vmstats.v_free_count))
119 zero_state = 1;
120 } else {
121 /*
122 * Once we are zeroing pages wait for the count to
123 * increase to HI before we stop zeroing pages.
124 */
125 if (vm_page_zero_count >= ZIDLE_HI(vmstats.v_free_count))
126 zero_state = 0;
127 }
128 return (zero_state);
bb6811be
MD
129}
130
62b382d3
VS
131/*
132 * vm_pagezero should sleep for a longer time when idlezero is disabled or
133 * when there is an excess of zeroed pages.
134 */
135static int
136vm_page_zero_time(void)
137{
138 if (idlezero_enable == 0)
139 return (LONG_SLEEP_TIME);
140 if (vm_page_zero_count >= ZIDLE_HI(vmstats.v_free_count))
141 return (LONG_SLEEP_TIME);
142 return (DEFAULT_SLEEP_TIME);
143}
144
cd8ab232
MD
145/*
146 * MPSAFE thread
147 */
bb6811be
MD
148static void
149vm_pagezero(void __unused *arg)
150{
151 vm_page_t m = NULL;
7a683a24
MD
152 struct lwbuf *lwb = NULL;
153 struct lwbuf lwb_cache;
bb6811be
MD
154 enum zeroidle_state state = STATE_IDLE;
155 char *pg = NULL;
156 int npages = 0;
62b382d3 157 int sleep_time;
bb6811be
MD
158 int i = 0;
159
160 /*
161 * Adjust thread parameters before entering our loop. The thread
162 * is started with the MP lock held and with normal kernel thread
163 * priority.
164 *
165 * Also put us on the last cpu for now.
d2783775
MD
166 *
167 * For now leave the MP lock held, the VM routines cannot be called
168 * with it released until tokenization is finished.
bb6811be 169 */
bb6811be
MD
170 lwkt_setpri_self(TDPRI_IDLE_WORK);
171 lwkt_setcpu_self(globaldata_find(ncpus - 1));
62b382d3 172 sleep_time = DEFAULT_SLEEP_TIME;
bb6811be
MD
173
174 /*
175 * Loop forever
176 */
177 for (;;) {
178 switch(state) {
179 case STATE_IDLE:
180 /*
181 * Wait for work.
182 */
62b382d3 183 tsleep(&zero_state, 0, "pgzero", sleep_time);
bb6811be
MD
184 if (vm_page_zero_check())
185 npages = idlezero_rate / 10;
62b382d3 186 sleep_time = vm_page_zero_time();
bb6811be
MD
187 if (npages)
188 state = STATE_GET_PAGE; /* Fallthrough */
189 break;
190 case STATE_GET_PAGE:
191 /*
192 * Acquire page to zero
193 */
c5c91ee6 194 if (--npages == 0) {
bb6811be 195 state = STATE_IDLE;
bb6811be
MD
196 } else {
197 m = vm_page_free_fromq_fast();
198 if (m == NULL) {
199 state = STATE_IDLE;
200 } else {
201 state = STATE_ZERO_PAGE;
7a683a24
MD
202 lwb = lwbuf_alloc(m, &lwb_cache);
203 pg = (char *)lwbuf_kva(lwb);
bb6811be
MD
204 i = 0;
205 }
bb6811be
MD
206 }
207 break;
208 case STATE_ZERO_PAGE:
209 /*
8787825a 210 * Zero-out the page
bb6811be
MD
211 */
212 while (i < PAGE_SIZE) {
bb6811be
MD
213 if (idlezero_nocache == 1)
214 bzeront(&pg[i], IDLEZERO_RUN);
215 else
216 bzero(&pg[i], IDLEZERO_RUN);
217 i += IDLEZERO_RUN;
8787825a 218 lwkt_yield();
bb6811be 219 }
8787825a 220 state = STATE_RELEASE_PAGE;
bb6811be
MD
221 break;
222 case STATE_RELEASE_PAGE:
7a683a24 223 lwbuf_free(lwb);
c5c91ee6
VS
224 vm_page_flag_set(m, PG_ZERO);
225 vm_page_free_toq(m);
226 state = STATE_GET_PAGE;
227 ++idlezero_count;
bb6811be
MD
228 break;
229 }
f9235b6d 230 lwkt_yield();
bb6811be
MD
231 }
232}
233
234static void
235pagezero_start(void __unused *arg)
236{
237 int error;
238 struct thread *td;
239
d87e79b9
MD
240 if (idlezero_nocache < 0 && (cpu_mi_feature & CPU_MI_BZERONT))
241 idlezero_nocache = 1;
5e9b48f4 242
bb6811be
MD
243 error = kthread_create(vm_pagezero, NULL, &td, "pagezero");
244 if (error)
245 panic("pagezero_start: error %d\n", error);
246}
247
248SYSINIT(pagezero, SI_SUB_KTHREAD_VM, SI_ORDER_ANY, pagezero_start, NULL);