Commit | Line | Data |
---|---|---|
984263bc | 1 | /* |
99ad9bc4 MD |
2 | * (MPSAFE) |
3 | * | |
984263bc MD |
4 | * Copyright (c) 1991, 1993 |
5 | * The Regents of the University of California. All rights reserved. | |
6 | * | |
7 | * This code is derived from software contributed to Berkeley by | |
8 | * The Mach Operating System project at Carnegie-Mellon University. | |
9 | * | |
10 | * Redistribution and use in source and binary forms, with or without | |
11 | * modification, are permitted provided that the following conditions | |
12 | * are met: | |
13 | * 1. Redistributions of source code must retain the above copyright | |
14 | * notice, this list of conditions and the following disclaimer. | |
15 | * 2. Redistributions in binary form must reproduce the above copyright | |
16 | * notice, this list of conditions and the following disclaimer in the | |
17 | * documentation and/or other materials provided with the distribution. | |
dc71b7ab | 18 | * 3. Neither the name of the University nor the names of its contributors |
984263bc MD |
19 | * may be used to endorse or promote products derived from this software |
20 | * without specific prior written permission. | |
21 | * | |
22 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
23 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
24 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
25 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
26 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
27 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
28 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
29 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
30 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
31 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
32 | * SUCH DAMAGE. | |
33 | * | |
34 | * from: @(#)vm_pager.c 8.6 (Berkeley) 1/12/94 | |
35 | * | |
36 | * | |
37 | * Copyright (c) 1987, 1990 Carnegie-Mellon University. | |
38 | * All rights reserved. | |
39 | * | |
40 | * Authors: Avadis Tevanian, Jr., Michael Wayne Young | |
41 | * | |
42 | * Permission to use, copy, modify and distribute this software and | |
43 | * its documentation is hereby granted, provided that both the copyright | |
44 | * notice and this permission notice appear in all copies of the | |
45 | * software, derivative works or modified versions, and any portions | |
46 | * thereof, and that both notices appear in supporting documentation. | |
47 | * | |
48 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" | |
49 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND | |
50 | * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. | |
51 | * | |
52 | * Carnegie Mellon requests users of this software to return to | |
53 | * | |
54 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU | |
55 | * School of Computer Science | |
56 | * Carnegie Mellon University | |
57 | * Pittsburgh PA 15213-3890 | |
58 | * | |
59 | * any improvements or extensions that they make and grant Carnegie the | |
60 | * rights to redistribute these changes. | |
61 | * | |
62 | * $FreeBSD: src/sys/vm/vm_pager.c,v 1.54.2.2 2001/11/18 07:11:00 dillon Exp $ | |
63 | */ | |
64 | ||
65 | /* | |
66 | * Paging space routine stubs. Emulates a matchmaker-like interface | |
67 | * for builtin pagers. | |
68 | */ | |
69 | ||
70 | #include <sys/param.h> | |
71 | #include <sys/systm.h> | |
72 | #include <sys/kernel.h> | |
73 | #include <sys/vnode.h> | |
74 | #include <sys/buf.h> | |
75 | #include <sys/ucred.h> | |
8c72e3d5 | 76 | #include <sys/dsched.h> |
984263bc | 77 | #include <sys/proc.h> |
9a82e536 | 78 | #include <sys/sysctl.h> |
984263bc MD |
79 | |
80 | #include <vm/vm.h> | |
81 | #include <vm/vm_param.h> | |
9a82e536 | 82 | #include <vm/vm_kern.h> |
984263bc MD |
83 | #include <vm/vm_object.h> |
84 | #include <vm/vm_page.h> | |
85 | #include <vm/vm_pager.h> | |
86 | #include <vm/vm_extern.h> | |
87 | ||
3020e3be | 88 | #include <sys/buf2.h> |
2f0acc22 | 89 | #include <vm/vm_page2.h> |
3020e3be | 90 | |
2d7bef58 SW |
91 | static pgo_dealloc_t dead_pager_dealloc; |
92 | static pgo_getpage_t dead_pager_getpage; | |
93 | static pgo_putpages_t dead_pager_putpages; | |
94 | static pgo_haspage_t dead_pager_haspage; | |
95 | ||
96 | static struct pagerops deadpagerops = { | |
97 | .pgo_dealloc = dead_pager_dealloc, | |
98 | .pgo_getpage = dead_pager_getpage, | |
99 | .pgo_putpages = dead_pager_putpages, | |
100 | .pgo_haspage = dead_pager_haspage | |
101 | }; | |
102 | ||
984263bc MD |
103 | extern struct pagerops defaultpagerops; |
104 | extern struct pagerops swappagerops; | |
105 | extern struct pagerops vnodepagerops; | |
106 | extern struct pagerops devicepagerops; | |
107 | extern struct pagerops physpagerops; | |
108 | ||
99ad9bc4 MD |
109 | /* |
110 | * No requirements. | |
111 | */ | |
984263bc | 112 | static int |
5ebb17ad MD |
113 | dead_pager_getpage(vm_object_t obj, vm_pindex_t pindex, |
114 | vm_page_t *mpp, int seqaccess) | |
984263bc MD |
115 | { |
116 | return VM_PAGER_FAIL; | |
117 | } | |
118 | ||
99ad9bc4 MD |
119 | /* |
120 | * No requirements. | |
121 | */ | |
984263bc | 122 | static void |
57e43348 | 123 | dead_pager_putpages(vm_object_t object, vm_page_t *m, int count, int flags, |
5a648714 | 124 | int *rtvals) |
984263bc MD |
125 | { |
126 | int i; | |
127 | ||
128 | for (i = 0; i < count; i++) { | |
129 | rtvals[i] = VM_PAGER_AGAIN; | |
130 | } | |
131 | } | |
132 | ||
99ad9bc4 MD |
133 | /* |
134 | * No requirements. | |
135 | */ | |
99ebfb7c | 136 | static boolean_t |
1b9d3514 | 137 | dead_pager_haspage(vm_object_t object, vm_pindex_t pindex) |
984263bc | 138 | { |
984263bc MD |
139 | return FALSE; |
140 | } | |
141 | ||
99ad9bc4 MD |
142 | /* |
143 | * No requirements. | |
144 | */ | |
984263bc | 145 | static void |
57e43348 | 146 | dead_pager_dealloc(vm_object_t object) |
984263bc | 147 | { |
8d292090 | 148 | KKASSERT(object->swblock_count == 0); |
984263bc MD |
149 | return; |
150 | } | |
151 | ||
984263bc MD |
152 | struct pagerops *pagertab[] = { |
153 | &defaultpagerops, /* OBJT_DEFAULT */ | |
154 | &swappagerops, /* OBJT_SWAP */ | |
155 | &vnodepagerops, /* OBJT_VNODE */ | |
156 | &devicepagerops, /* OBJT_DEVICE */ | |
f6a0c819 | 157 | &devicepagerops, /* OBJT_MGTDEVICE */ |
984263bc MD |
158 | &physpagerops, /* OBJT_PHYS */ |
159 | &deadpagerops /* OBJT_DEAD */ | |
160 | }; | |
161 | ||
984263bc MD |
162 | /* |
163 | * Kernel address space for mapping pages. | |
164 | * Used by pagers where KVAs are needed for IO. | |
165 | * | |
166 | * XXX needs to be large enough to support the number of pending async | |
167 | * cleaning requests (NPENDINGIO == 64) * the maximum swap cluster size | |
168 | * (MAXPHYS == 64k) if you want to get the most efficiency. | |
169 | */ | |
170 | #define PAGER_MAP_SIZE (8 * 1024 * 1024) | |
171 | ||
2f0acc22 MD |
172 | #define BSWHSIZE 16 |
173 | #define BSWHMASK (BSWHSIZE - 1) | |
174 | ||
9a82e536 MD |
175 | TAILQ_HEAD(swqueue, buf); |
176 | ||
984263bc | 177 | int pager_map_size = PAGER_MAP_SIZE; |
5936d3e8 AL |
178 | |
179 | static struct vm_map pager_map_store; | |
180 | struct vm_map *pager_map = &pager_map_store; | |
e4846942 | 181 | |
2f0acc22 MD |
182 | static vm_offset_t swapbkva_mem; /* swap buffers kva */ |
183 | static vm_offset_t swapbkva_kva; /* swap buffers kva */ | |
184 | static struct swqueue bswlist_mem[BSWHSIZE]; /* with preallocated memory */ | |
185 | static struct swqueue bswlist_kva[BSWHSIZE]; /* with kva */ | |
186 | static struct swqueue bswlist_raw[BSWHSIZE]; /* without kva */ | |
187 | static struct spinlock bswspin_mem[BSWHSIZE]; | |
188 | static struct spinlock bswspin_kva[BSWHSIZE]; | |
189 | static struct spinlock bswspin_raw[BSWHSIZE]; | |
9a82e536 MD |
190 | static int pbuf_raw_count; |
191 | static int pbuf_kva_count; | |
2f0acc22 | 192 | static int pbuf_mem_count; |
9a82e536 | 193 | |
9ae1f67e | 194 | SYSCTL_INT(_vm, OID_AUTO, pbuf_raw_count, CTLFLAG_RD, &pbuf_raw_count, 0, |
2f0acc22 | 195 | "Kernel pbuf raw reservations"); |
9ae1f67e | 196 | SYSCTL_INT(_vm, OID_AUTO, pbuf_kva_count, CTLFLAG_RD, &pbuf_kva_count, 0, |
2f0acc22 | 197 | "Kernel pbuf kva reservations"); |
9ae1f67e | 198 | SYSCTL_INT(_vm, OID_AUTO, pbuf_mem_count, CTLFLAG_RD, &pbuf_mem_count, 0, |
2f0acc22 | 199 | "Kernel pbuf mem reservations"); |
984263bc | 200 | |
99ad9bc4 MD |
201 | /* |
202 | * Initialize the swap buffer list. | |
203 | * | |
204 | * Called from the low level boot code only. | |
205 | */ | |
107e9bcc MD |
206 | static void |
207 | vm_pager_init(void *arg __unused) | |
984263bc | 208 | { |
2f0acc22 MD |
209 | int i; |
210 | ||
211 | for (i = 0; i < BSWHSIZE; ++i) { | |
212 | TAILQ_INIT(&bswlist_mem[i]); | |
213 | TAILQ_INIT(&bswlist_kva[i]); | |
214 | TAILQ_INIT(&bswlist_raw[i]); | |
215 | spin_init(&bswspin_mem[i], "bswmem"); | |
216 | spin_init(&bswspin_kva[i], "bswkva"); | |
217 | spin_init(&bswspin_raw[i], "bswraw"); | |
218 | } | |
984263bc | 219 | } |
f3f3eadb | 220 | SYSINIT(vm_mem, SI_BOOT1_VM, SI_ORDER_SECOND, vm_pager_init, NULL); |
984263bc | 221 | |
99ad9bc4 MD |
222 | /* |
223 | * Called from the low level boot code only. | |
224 | */ | |
ca667d3c | 225 | static |
984263bc | 226 | void |
ca667d3c | 227 | vm_pager_bufferinit(void *dummy __unused) |
984263bc MD |
228 | { |
229 | struct buf *bp; | |
74d62460 | 230 | long i; |
984263bc | 231 | |
984263bc | 232 | /* |
312dcd01 MD |
233 | * Reserve KVM space for pbuf data. |
234 | */ | |
5936d3e8 | 235 | swapbkva_mem = kmem_alloc_pageable(pager_map, nswbuf_mem * MAXPHYS, |
3091de50 | 236 | VM_SUBSYS_BUFDATA); |
2f0acc22 MD |
237 | if (!swapbkva_mem) |
238 | panic("Not enough pager_map VM space for physical buffers"); | |
5936d3e8 | 239 | swapbkva_kva = kmem_alloc_pageable(pager_map, nswbuf_kva * MAXPHYS, |
3091de50 | 240 | VM_SUBSYS_BUFDATA); |
2f0acc22 | 241 | if (!swapbkva_kva) |
312dcd01 MD |
242 | panic("Not enough pager_map VM space for physical buffers"); |
243 | ||
244 | /* | |
2f0acc22 MD |
245 | * Initial pbuf setup. |
246 | * | |
247 | * mem - These pbufs have permanently allocated memory | |
248 | * kva - These pbufs have unallocated kva reservations | |
249 | * raw - These pbufs have no kva reservations | |
250 | */ | |
251 | ||
252 | /* | |
253 | * Buffers with pre-allocated kernel memory can be convenient for | |
254 | * copyin/copyout because no SMP page invalidation or other pmap | |
255 | * operations are needed. | |
256 | */ | |
2f0acc22 MD |
257 | bp = swbuf_mem; |
258 | for (i = 0; i < nswbuf_mem; ++i, ++bp) { | |
259 | vm_page_t m; | |
260 | vm_pindex_t pg; | |
261 | int j; | |
262 | ||
263 | bp->b_kvabase = (caddr_t)((intptr_t)i * MAXPHYS) + swapbkva_mem; | |
264 | bp->b_kvasize = MAXPHYS; | |
265 | bp->b_swindex = i & BSWHMASK; | |
d32579c3 | 266 | bp->b_cpumask = smp_active_mask; |
2f0acc22 MD |
267 | BUF_LOCKINIT(bp); |
268 | buf_dep_init(bp); | |
269 | TAILQ_INSERT_HEAD(&bswlist_mem[i & BSWHMASK], bp, b_freelist); | |
270 | atomic_add_int(&pbuf_mem_count, 1); | |
271 | bp->b_data = bp->b_kvabase; | |
272 | bp->b_bcount = MAXPHYS; | |
273 | bp->b_xio.xio_pages = bp->b_xio.xio_internal_pages; | |
274 | ||
275 | pg = (vm_offset_t)bp->b_kvabase >> PAGE_SHIFT; | |
712b6620 | 276 | vm_object_hold(kernel_object); |
2f0acc22 | 277 | for (j = 0; j < MAXPHYS / PAGE_SIZE; ++j) { |
712b6620 | 278 | m = vm_page_alloc(kernel_object, pg, VM_ALLOC_NORMAL | |
2f0acc22 MD |
279 | VM_ALLOC_SYSTEM); |
280 | KKASSERT(m != NULL); | |
281 | bp->b_xio.xio_internal_pages[j] = m; | |
282 | vm_page_wire(m); | |
2f0acc22 MD |
283 | /* early boot, no other cpus running yet */ |
284 | pmap_kenter_noinval(pg * PAGE_SIZE, VM_PAGE_TO_PHYS(m)); | |
285 | cpu_invlpg((void *)(pg * PAGE_SIZE)); | |
286 | vm_page_wakeup(m); | |
287 | ++pg; | |
288 | } | |
712b6620 | 289 | vm_object_drop(kernel_object); |
2f0acc22 MD |
290 | bp->b_xio.xio_npages = j; |
291 | } | |
2f0acc22 MD |
292 | |
293 | /* | |
294 | * Buffers with pre-assigned KVA bases. The KVA has no memory pages | |
295 | * assigned to it. Saves the caller from having to reserve KVA for | |
296 | * the page map. | |
984263bc | 297 | */ |
2f0acc22 MD |
298 | bp = swbuf_kva; |
299 | for (i = 0; i < nswbuf_kva; ++i, ++bp) { | |
300 | bp->b_kvabase = (caddr_t)((intptr_t)i * MAXPHYS) + swapbkva_kva; | |
312dcd01 | 301 | bp->b_kvasize = MAXPHYS; |
2f0acc22 | 302 | bp->b_swindex = i & BSWHMASK; |
984263bc | 303 | BUF_LOCKINIT(bp); |
408357d8 | 304 | buf_dep_init(bp); |
2f0acc22 MD |
305 | TAILQ_INSERT_HEAD(&bswlist_kva[i & BSWHMASK], bp, b_freelist); |
306 | atomic_add_int(&pbuf_kva_count, 1); | |
9a82e536 MD |
307 | } |
308 | ||
309 | /* | |
2f0acc22 | 310 | * RAW buffers with no KVA mappings. |
06c66eb2 MD |
311 | * |
312 | * NOTE: We use KM_NOTLBSYNC here to reduce unnecessary IPIs | |
313 | * during startup, which can really slow down emulated | |
314 | * systems. | |
9a82e536 MD |
315 | */ |
316 | nswbuf_raw = nbuf * 2; | |
1eeaf6b2 | 317 | swbuf_raw = (void *)kmem_alloc3(kernel_map, |
06c66eb2 | 318 | round_page(nswbuf_raw * sizeof(struct buf)), |
3091de50 | 319 | VM_SUBSYS_BUFDATA, |
06c66eb2 MD |
320 | KM_NOTLBSYNC); |
321 | smp_invltlb(); | |
9a82e536 MD |
322 | bp = swbuf_raw; |
323 | for (i = 0; i < nswbuf_raw; ++i, ++bp) { | |
2f0acc22 | 324 | bp->b_swindex = i & BSWHMASK; |
9a82e536 MD |
325 | BUF_LOCKINIT(bp); |
326 | buf_dep_init(bp); | |
2f0acc22 MD |
327 | TAILQ_INSERT_HEAD(&bswlist_raw[i & BSWHMASK], bp, b_freelist); |
328 | atomic_add_int(&pbuf_raw_count, 1); | |
984263bc | 329 | } |
984263bc MD |
330 | } |
331 | ||
ca667d3c MD |
332 | SYSINIT(do_vmpg, SI_BOOT2_MACHDEP, SI_ORDER_FIRST, vm_pager_bufferinit, NULL); |
333 | ||
99ad9bc4 MD |
334 | /* |
335 | * No requirements. | |
336 | */ | |
984263bc | 337 | void |
57e43348 | 338 | vm_pager_deallocate(vm_object_t object) |
984263bc MD |
339 | { |
340 | (*pagertab[object->type]->pgo_dealloc) (object); | |
341 | } | |
342 | ||
984263bc MD |
343 | /* |
344 | * vm_pager_get_pages() - inline, see vm/vm_pager.h | |
345 | * vm_pager_put_pages() - inline, see vm/vm_pager.h | |
346 | * vm_pager_has_page() - inline, see vm/vm_pager.h | |
347 | * vm_pager_page_inserted() - inline, see vm/vm_pager.h | |
348 | * vm_pager_page_removed() - inline, see vm/vm_pager.h | |
349 | */ | |
350 | ||
984263bc | 351 | /* |
f2c2051e JH |
352 | * Search the specified pager object list for an object with the |
353 | * specified handle. If an object with the specified handle is found, | |
354 | * increase its reference count and return it. Otherwise, return NULL. | |
984263bc | 355 | * |
f2c2051e | 356 | * The pager object list must be locked. |
984263bc | 357 | */ |
f2c2051e JH |
358 | vm_object_t |
359 | vm_pager_object_lookup(struct pagerlst *pg_list, void *handle) | |
984263bc | 360 | { |
f2c2051e JH |
361 | vm_object_t object; |
362 | ||
5b329e62 | 363 | TAILQ_FOREACH(object, pg_list, pager_object_entry) { |
f2c2051e JH |
364 | if (object->handle == handle) { |
365 | VM_OBJECT_LOCK(object); | |
366 | if ((object->flags & OBJ_DEAD) == 0) { | |
367 | vm_object_reference_locked(object); | |
368 | VM_OBJECT_UNLOCK(object); | |
369 | break; | |
370 | } | |
371 | VM_OBJECT_UNLOCK(object); | |
372 | } | |
373 | } | |
374 | return (object); | |
984263bc MD |
375 | } |
376 | ||
984263bc | 377 | /* |
312dcd01 | 378 | * Initialize a physical buffer. |
99ad9bc4 MD |
379 | * |
380 | * No requirements. | |
984263bc | 381 | */ |
984263bc MD |
382 | static void |
383 | initpbuf(struct buf *bp) | |
384 | { | |
9a82e536 MD |
385 | bp->b_qindex = 0; /* BQUEUE_NONE */ |
386 | bp->b_data = bp->b_kvabase; /* NULL if pbuf sans kva */ | |
a8f169e2 | 387 | bp->b_flags = B_PAGING; |
10f3fee5 | 388 | bp->b_cmd = BUF_CMD_DONE; |
984263bc | 389 | bp->b_error = 0; |
9a71d53f MD |
390 | bp->b_bcount = 0; |
391 | bp->b_bufsize = MAXPHYS; | |
81b5c339 | 392 | initbufbio(bp); |
54f51aeb | 393 | xio_init(&bp->b_xio); |
984263bc MD |
394 | BUF_LOCK(bp, LK_EXCLUSIVE); |
395 | } | |
396 | ||
397 | /* | |
a50fe973 | 398 | * Allocate a physical buffer |
984263bc | 399 | * |
d84f6fa1 MD |
400 | * If (pfreecnt != NULL) then *pfreecnt will be decremented on return and |
401 | * the function will block while it is <= 0. | |
984263bc | 402 | * |
d84f6fa1 MD |
403 | * Physical buffers can be with or without KVA space reserved. There |
404 | * are severe limitations on the ones with KVA reserved, and fewer | |
405 | * limitations on the ones without. getpbuf() gets one without, | |
406 | * getpbuf_kva() gets one with. | |
9a82e536 | 407 | * |
99ad9bc4 | 408 | * No requirements. |
984263bc MD |
409 | */ |
410 | struct buf * | |
d84f6fa1 | 411 | getpbuf(int *pfreecnt) |
984263bc | 412 | { |
984263bc | 413 | struct buf *bp; |
2f0acc22 MD |
414 | int iter; |
415 | int loops; | |
984263bc MD |
416 | |
417 | for (;;) { | |
2f0acc22 MD |
418 | while (pfreecnt && *pfreecnt <= 0) { |
419 | tsleep_interlock(pfreecnt, 0); | |
743146ae | 420 | if ((int)atomic_fetchadd_int(pfreecnt, 0) <= 0) |
2f0acc22 MD |
421 | tsleep(pfreecnt, PINTERLOCKED, "wswbuf0", 0); |
422 | } | |
423 | if (pbuf_raw_count <= 0) { | |
424 | tsleep_interlock(&pbuf_raw_count, 0); | |
743146ae | 425 | if ((int)atomic_fetchadd_int(&pbuf_raw_count, 0) <= 0) |
2f0acc22 | 426 | tsleep(&pbuf_raw_count, PINTERLOCKED, |
d84f6fa1 | 427 | "wswbuf1", 0); |
2f0acc22 MD |
428 | continue; |
429 | } | |
430 | iter = mycpuid & BSWHMASK; | |
431 | for (loops = BSWHSIZE; loops; --loops) { | |
432 | if (TAILQ_FIRST(&bswlist_raw[iter]) == NULL) { | |
433 | iter = (iter + 1) & BSWHMASK; | |
434 | continue; | |
435 | } | |
436 | spin_lock(&bswspin_raw[iter]); | |
437 | if ((bp = TAILQ_FIRST(&bswlist_raw[iter])) == NULL) { | |
438 | spin_unlock(&bswspin_raw[iter]); | |
439 | iter = (iter + 1) & BSWHMASK; | |
440 | continue; | |
441 | } | |
442 | TAILQ_REMOVE(&bswlist_raw[iter], bp, b_freelist); | |
443 | atomic_add_int(&pbuf_raw_count, -1); | |
444 | if (pfreecnt) | |
445 | atomic_add_int(pfreecnt, -1); | |
446 | spin_unlock(&bswspin_raw[iter]); | |
447 | initpbuf(bp); | |
448 | ||
449 | return bp; | |
984263bc | 450 | } |
984263bc | 451 | } |
2f0acc22 | 452 | /* not reached */ |
9a82e536 MD |
453 | } |
454 | ||
455 | struct buf * | |
456 | getpbuf_kva(int *pfreecnt) | |
457 | { | |
458 | struct buf *bp; | |
2f0acc22 MD |
459 | int iter; |
460 | int loops; | |
9a82e536 MD |
461 | |
462 | for (;;) { | |
2f0acc22 MD |
463 | while (pfreecnt && *pfreecnt <= 0) { |
464 | tsleep_interlock(pfreecnt, 0); | |
743146ae | 465 | if ((int)atomic_fetchadd_int(pfreecnt, 0) <= 0) |
d84f6fa1 | 466 | tsleep(pfreecnt, PINTERLOCKED, "wswbuf2", 0); |
2f0acc22 MD |
467 | } |
468 | if (pbuf_kva_count <= 0) { | |
469 | tsleep_interlock(&pbuf_kva_count, 0); | |
743146ae | 470 | if ((int)atomic_fetchadd_int(&pbuf_kva_count, 0) <= 0) |
2f0acc22 | 471 | tsleep(&pbuf_kva_count, PINTERLOCKED, |
d84f6fa1 | 472 | "wswbuf3", 0); |
2f0acc22 MD |
473 | continue; |
474 | } | |
475 | iter = mycpuid & BSWHMASK; | |
476 | for (loops = BSWHSIZE; loops; --loops) { | |
477 | if (TAILQ_FIRST(&bswlist_kva[iter]) == NULL) { | |
478 | iter = (iter + 1) & BSWHMASK; | |
479 | continue; | |
480 | } | |
481 | spin_lock(&bswspin_kva[iter]); | |
482 | if ((bp = TAILQ_FIRST(&bswlist_kva[iter])) == NULL) { | |
483 | spin_unlock(&bswspin_kva[iter]); | |
484 | iter = (iter + 1) & BSWHMASK; | |
485 | continue; | |
486 | } | |
487 | TAILQ_REMOVE(&bswlist_kva[iter], bp, b_freelist); | |
488 | atomic_add_int(&pbuf_kva_count, -1); | |
489 | if (pfreecnt) | |
490 | atomic_add_int(pfreecnt, -1); | |
491 | spin_unlock(&bswspin_kva[iter]); | |
492 | initpbuf(bp); | |
493 | ||
494 | return bp; | |
9a82e536 | 495 | } |
9a82e536 | 496 | } |
2f0acc22 MD |
497 | /* not reached */ |
498 | } | |
9a82e536 | 499 | |
2f0acc22 MD |
500 | /* |
501 | * Allocate a pbuf with kernel memory already preallocated. Caller must | |
502 | * not change the mapping. | |
503 | */ | |
504 | struct buf * | |
505 | getpbuf_mem(int *pfreecnt) | |
506 | { | |
507 | struct buf *bp; | |
508 | int iter; | |
509 | int loops; | |
9a82e536 | 510 | |
2f0acc22 MD |
511 | for (;;) { |
512 | while (pfreecnt && *pfreecnt <= 0) { | |
513 | tsleep_interlock(pfreecnt, 0); | |
743146ae | 514 | if ((int)atomic_fetchadd_int(pfreecnt, 0) <= 0) |
d84f6fa1 | 515 | tsleep(pfreecnt, PINTERLOCKED, "wswbuf4", 0); |
2f0acc22 MD |
516 | } |
517 | if (pbuf_mem_count <= 0) { | |
518 | tsleep_interlock(&pbuf_mem_count, 0); | |
743146ae | 519 | if ((int)atomic_fetchadd_int(&pbuf_mem_count, 0) <= 0) |
2f0acc22 | 520 | tsleep(&pbuf_mem_count, PINTERLOCKED, |
d84f6fa1 | 521 | "wswbuf5", 0); |
2f0acc22 MD |
522 | continue; |
523 | } | |
524 | iter = mycpuid & BSWHMASK; | |
525 | for (loops = BSWHSIZE; loops; --loops) { | |
526 | if (TAILQ_FIRST(&bswlist_mem[iter]) == NULL) { | |
527 | iter = (iter + 1) & BSWHMASK; | |
528 | continue; | |
529 | } | |
530 | spin_lock(&bswspin_mem[iter]); | |
531 | if ((bp = TAILQ_FIRST(&bswlist_mem[iter])) == NULL) { | |
532 | spin_unlock(&bswspin_mem[iter]); | |
533 | iter = (iter + 1) & BSWHMASK; | |
534 | continue; | |
535 | } | |
536 | TAILQ_REMOVE(&bswlist_mem[iter], bp, b_freelist); | |
537 | atomic_add_int(&pbuf_mem_count, -1); | |
538 | if (pfreecnt) | |
539 | atomic_add_int(pfreecnt, -1); | |
540 | spin_unlock(&bswspin_mem[iter]); | |
541 | initpbuf(bp); | |
542 | ||
543 | return bp; | |
544 | } | |
545 | } | |
546 | /* not reached */ | |
984263bc MD |
547 | } |
548 | ||
549 | /* | |
a50fe973 | 550 | * Allocate a physical buffer, if one is available. |
984263bc | 551 | * |
2f0acc22 | 552 | * Note that there is no NULL hack here - all subsystems using this |
d84f6fa1 | 553 | * call are required to use a non-NULL pfreecnt. |
a50fe973 | 554 | * |
99ad9bc4 | 555 | * No requirements. |
984263bc MD |
556 | */ |
557 | struct buf * | |
d84f6fa1 | 558 | trypbuf(int *pfreecnt) |
984263bc | 559 | { |
984263bc | 560 | struct buf *bp; |
2f0acc22 MD |
561 | int iter = mycpuid & BSWHMASK; |
562 | int loops; | |
984263bc | 563 | |
2f0acc22 MD |
564 | for (loops = BSWHSIZE; loops; --loops) { |
565 | if (*pfreecnt <= 0 || TAILQ_FIRST(&bswlist_raw[iter]) == NULL) { | |
566 | iter = (iter + 1) & BSWHMASK; | |
567 | continue; | |
568 | } | |
569 | spin_lock(&bswspin_raw[iter]); | |
570 | if (*pfreecnt <= 0 || | |
571 | (bp = TAILQ_FIRST(&bswlist_raw[iter])) == NULL) { | |
572 | spin_unlock(&bswspin_raw[iter]); | |
573 | iter = (iter + 1) & BSWHMASK; | |
574 | continue; | |
575 | } | |
576 | TAILQ_REMOVE(&bswlist_raw[iter], bp, b_freelist); | |
577 | atomic_add_int(&pbuf_raw_count, -1); | |
578 | atomic_add_int(pfreecnt, -1); | |
9a82e536 | 579 | |
2f0acc22 | 580 | spin_unlock(&bswspin_raw[iter]); |
9a82e536 | 581 | |
2f0acc22 | 582 | initpbuf(bp); |
9a82e536 | 583 | |
2f0acc22 MD |
584 | return bp; |
585 | } | |
586 | return NULL; | |
9a82e536 MD |
587 | } |
588 | ||
589 | struct buf * | |
590 | trypbuf_kva(int *pfreecnt) | |
591 | { | |
592 | struct buf *bp; | |
2f0acc22 MD |
593 | int iter = mycpuid & BSWHMASK; |
594 | int loops; | |
9a82e536 | 595 | |
2f0acc22 MD |
596 | for (loops = BSWHSIZE; loops; --loops) { |
597 | if (*pfreecnt <= 0 || TAILQ_FIRST(&bswlist_kva[iter]) == NULL) { | |
598 | iter = (iter + 1) & BSWHMASK; | |
599 | continue; | |
600 | } | |
601 | spin_lock(&bswspin_kva[iter]); | |
602 | if (*pfreecnt <= 0 || | |
603 | (bp = TAILQ_FIRST(&bswlist_kva[iter])) == NULL) { | |
604 | spin_unlock(&bswspin_kva[iter]); | |
605 | iter = (iter + 1) & BSWHMASK; | |
606 | continue; | |
607 | } | |
608 | TAILQ_REMOVE(&bswlist_kva[iter], bp, b_freelist); | |
609 | atomic_add_int(&pbuf_kva_count, -1); | |
610 | atomic_add_int(pfreecnt, -1); | |
984263bc | 611 | |
2f0acc22 | 612 | spin_unlock(&bswspin_kva[iter]); |
984263bc | 613 | |
2f0acc22 | 614 | initpbuf(bp); |
984263bc | 615 | |
2f0acc22 MD |
616 | return bp; |
617 | } | |
618 | return NULL; | |
984263bc MD |
619 | } |
620 | ||
621 | /* | |
a50fe973 | 622 | * Release a physical buffer |
984263bc MD |
623 | * |
624 | * NOTE: pfreecnt can be NULL, but this 'feature' will be removed | |
625 | * relatively soon when the rest of the subsystems get smart about it. XXX | |
a50fe973 | 626 | * |
99ad9bc4 | 627 | * No requirements. |
984263bc MD |
628 | */ |
629 | void | |
57e43348 | 630 | relpbuf(struct buf *bp, int *pfreecnt) |
984263bc | 631 | { |
2f0acc22 MD |
632 | int wake = 0; |
633 | int wake_free = 0; | |
634 | int iter = bp->b_swindex; | |
984263bc | 635 | |
a8f169e2 | 636 | KKASSERT(bp->b_flags & B_PAGING); |
3573cf7b | 637 | dsched_buf_exit(bp); |
984263bc | 638 | |
a50fe973 | 639 | BUF_UNLOCK(bp); |
7757d1c2 | 640 | |
2f0acc22 MD |
641 | if (bp >= swbuf_mem && bp < &swbuf_mem[nswbuf_mem]) { |
642 | KKASSERT(bp->b_kvabase); | |
643 | spin_lock(&bswspin_mem[iter]); | |
644 | TAILQ_INSERT_HEAD(&bswlist_mem[iter], bp, b_freelist); | |
9942a851 | 645 | if (atomic_fetchadd_int(&pbuf_mem_count, 1) == nswbuf_mem / 4) |
2f0acc22 MD |
646 | wake = 1; |
647 | if (pfreecnt) { | |
9942a851 | 648 | if (atomic_fetchadd_int(pfreecnt, 1) == 1) |
2f0acc22 MD |
649 | wake_free = 1; |
650 | } | |
651 | spin_unlock(&bswspin_mem[iter]); | |
652 | if (wake) | |
653 | wakeup(&pbuf_mem_count); | |
743146ae | 654 | } else if (bp >= swbuf_kva && bp < &swbuf_kva[nswbuf_kva]) { |
2f0acc22 | 655 | KKASSERT(bp->b_kvabase); |
d32579c3 | 656 | CPUMASK_ASSZERO(bp->b_cpumask); |
2f0acc22 MD |
657 | spin_lock(&bswspin_kva[iter]); |
658 | TAILQ_INSERT_HEAD(&bswlist_kva[iter], bp, b_freelist); | |
9942a851 | 659 | if (atomic_fetchadd_int(&pbuf_kva_count, 1) == nswbuf_kva / 4) |
2f0acc22 MD |
660 | wake = 1; |
661 | if (pfreecnt) { | |
9942a851 | 662 | if (atomic_fetchadd_int(pfreecnt, 1) == 1) |
2f0acc22 MD |
663 | wake_free = 1; |
664 | } | |
665 | spin_unlock(&bswspin_kva[iter]); | |
666 | if (wake) | |
667 | wakeup(&pbuf_kva_count); | |
9a82e536 | 668 | } else { |
2f0acc22 MD |
669 | KKASSERT(bp->b_kvabase == NULL); |
670 | KKASSERT(bp >= swbuf_raw && bp < &swbuf_raw[nswbuf_raw]); | |
d32579c3 | 671 | CPUMASK_ASSZERO(bp->b_cpumask); |
2f0acc22 MD |
672 | spin_lock(&bswspin_raw[iter]); |
673 | TAILQ_INSERT_HEAD(&bswlist_raw[iter], bp, b_freelist); | |
9942a851 | 674 | if (atomic_fetchadd_int(&pbuf_raw_count, 1) == nswbuf_raw / 4) |
2f0acc22 MD |
675 | wake = 1; |
676 | if (pfreecnt) { | |
9942a851 | 677 | if (atomic_fetchadd_int(pfreecnt, 1) == 1) |
2f0acc22 MD |
678 | wake_free = 1; |
679 | } | |
680 | spin_unlock(&bswspin_raw[iter]); | |
681 | if (wake) | |
682 | wakeup(&pbuf_raw_count); | |
984263bc | 683 | } |
2f0acc22 | 684 | if (wake_free) |
a50fe973 | 685 | wakeup(pfreecnt); |
984263bc | 686 | } |
7f86d367 MD |
687 | |
688 | void | |
689 | pbuf_adjcount(int *pfreecnt, int n) | |
690 | { | |
691 | if (n) { | |
2f0acc22 | 692 | atomic_add_int(pfreecnt, n); |
7f86d367 MD |
693 | wakeup(pfreecnt); |
694 | } | |
695 | } |