| Commit | Line | Data |
|---|---|---|
| 984263bc | 1 | /* |
| 99ad9bc4 MD |
2 | * (MPSAFE) |
| 3 | * | |
| 984263bc MD |
4 | * Copyright (c) 1982, 1986, 1989, 1993 |
| 5 | * The Regents of the University of California. All rights reserved. | |
| 6 | * | |
| 7 | * Redistribution and use in source and binary forms, with or without | |
| 8 | * modification, are permitted provided that the following conditions | |
| 9 | * are met: | |
| 10 | * 1. Redistributions of source code must retain the above copyright | |
| 11 | * notice, this list of conditions and the following disclaimer. | |
| 12 | * 2. Redistributions in binary form must reproduce the above copyright | |
| 13 | * notice, this list of conditions and the following disclaimer in the | |
| 14 | * documentation and/or other materials provided with the distribution. | |
| 15 | * 3. All advertising materials mentioning features or use of this software | |
| 16 | * must display the following acknowledgement: | |
| 17 | * This product includes software developed by the University of | |
| 18 | * California, Berkeley and its contributors. | |
| 19 | * 4. Neither the name of the University nor the names of its contributors | |
| 20 | * may be used to endorse or promote products derived from this software | |
| 21 | * without specific prior written permission. | |
| 22 | * | |
| 23 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
| 24 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 25 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 26 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
| 27 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 28 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
| 29 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 30 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
| 31 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
| 32 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 33 | * SUCH DAMAGE. | |
| 34 | * | |
| 35 | * @(#)vm_swap.c 8.5 (Berkeley) 2/17/94 | |
| 36 | * $FreeBSD: src/sys/vm/vm_swap.c,v 1.96.2.2 2001/10/14 18:46:47 iedowse Exp $ | |
| 37 | */ | |
| 38 | ||
| 39 | #include "opt_swap.h" | |
| 40 | ||
| 41 | #include <sys/param.h> | |
| 42 | #include <sys/systm.h> | |
| 43 | #include <sys/sysproto.h> | |
| 44 | #include <sys/buf.h> | |
| 45 | #include <sys/proc.h> | |
| 895c1f85 | 46 | #include <sys/priv.h> |
| fad57d0e | 47 | #include <sys/nlookup.h> |
| 099f3e5e | 48 | #include <sys/sysctl.h> |
| 984263bc MD |
49 | #include <sys/dmap.h> /* XXX */ |
| 50 | #include <sys/vnode.h> | |
| 51 | #include <sys/fcntl.h> | |
| 52 | #include <sys/blist.h> | |
| 53 | #include <sys/kernel.h> | |
| 54 | #include <sys/lock.h> | |
| 55 | #include <sys/conf.h> | |
| 56 | #include <sys/stat.h> | |
| 684a93c4 | 57 | |
| 984263bc MD |
58 | #include <vm/vm.h> |
| 59 | #include <vm/vm_extern.h> | |
| 60 | #include <vm/swap_pager.h> | |
| 61 | #include <vm/vm_zone.h> | |
| 099f3e5e | 62 | #include <vm/vm_param.h> |
| 984263bc | 63 | |
| 684a93c4 MD |
64 | #include <sys/thread2.h> |
| 65 | #include <sys/mplock2.h> | |
| 99ad9bc4 | 66 | #include <sys/mutex2.h> |
| b12defdc | 67 | #include <sys/spinlock2.h> |
| 684a93c4 | 68 | |
| 984263bc MD |
69 | /* |
| 70 | * Indirect driver for multi-controller paging. | |
| 71 | */ | |
| 72 | ||
| 73 | #ifndef NSWAPDEV | |
| 74 | #define NSWAPDEV 4 | |
| 75 | #endif | |
| 76 | static struct swdevt should_be_malloced[NSWAPDEV]; | |
| 460426e6 | 77 | struct swdevt *swdevt = should_be_malloced; /* exported to pstat/systat */ |
| 79634a66 | 78 | static swblk_t nswap; /* first block after the interleaved devs */ |
| 99ad9bc4 | 79 | static struct mtx swap_mtx = MTX_INITIALIZER; |
| 460426e6 | 80 | int nswdev = NSWAPDEV; /* exported to pstat/systat */ |
| 984263bc | 81 | int vm_swap_size; |
| 1e5196f0 | 82 | int vm_swap_max; |
| 984263bc | 83 | |
| 027193eb | 84 | static int swapoff_one(int index); |
| 984263bc MD |
85 | struct vnode *swapdev_vp; |
| 86 | ||
| 87 | /* | |
| 99ad9bc4 | 88 | * (struct vnode *a_vp, struct bio *b_bio) |
| 984263bc | 89 | * |
| 99ad9bc4 MD |
90 | * vn_strategy() for swapdev_vp. Perform swap strategy interleave device |
| 91 | * selection. | |
| 81b5c339 | 92 | * |
| 99ad9bc4 | 93 | * No requirements. |
| 984263bc | 94 | */ |
| 984263bc | 95 | static int |
| 81b5c339 | 96 | swapdev_strategy(struct vop_strategy_args *ap) |
| 984263bc | 97 | { |
| 81b5c339 MD |
98 | struct bio *bio = ap->a_bio; |
| 99 | struct bio *nbio; | |
| 100 | struct buf *bp = bio->bio_buf; | |
| 54078292 | 101 | int sz, off, seg, index, blkno, nblkno; |
| 5f910b2f | 102 | struct swdevt *sp; |
| 984263bc | 103 | sz = howmany(bp->b_bcount, PAGE_SIZE); |
| 54078292 | 104 | blkno = (int)(bio->bio_offset >> PAGE_SHIFT); |
| 984263bc MD |
105 | |
| 106 | /* | |
| 107 | * Convert interleaved swap into per-device swap. Note that | |
| 108 | * the block size is left in PAGE_SIZE'd chunks (for the newswap) | |
| 109 | * here. | |
| 110 | */ | |
| 81b5c339 | 111 | nbio = push_bio(bio); |
| 984263bc | 112 | if (nswdev > 1) { |
| 54078292 | 113 | off = blkno % dmmax; |
| 984263bc MD |
114 | if (off + sz > dmmax) { |
| 115 | bp->b_error = EINVAL; | |
| 116 | bp->b_flags |= B_ERROR; | |
| 81b5c339 | 117 | biodone(bio); |
| 984263bc MD |
118 | return 0; |
| 119 | } | |
| 54078292 | 120 | seg = blkno / dmmax; |
| 984263bc MD |
121 | index = seg % nswdev; |
| 122 | seg /= nswdev; | |
| 54078292 | 123 | nbio->bio_offset = (off_t)(seg * dmmax + off) << PAGE_SHIFT; |
| 984263bc MD |
124 | } else { |
| 125 | index = 0; | |
| 54078292 | 126 | nbio->bio_offset = bio->bio_offset; |
| 984263bc | 127 | } |
| 54078292 | 128 | nblkno = (int)(nbio->bio_offset >> PAGE_SHIFT); |
| 984263bc | 129 | sp = &swdevt[index]; |
| 54078292 | 130 | if (nblkno + sz > sp->sw_nblks) { |
| 984263bc MD |
131 | bp->b_error = EINVAL; |
| 132 | bp->b_flags |= B_ERROR; | |
| 81b5c339 MD |
133 | /* I/O was never started on nbio, must biodone(bio) */ |
| 134 | biodone(bio); | |
| 984263bc MD |
135 | return 0; |
| 136 | } | |
| 984263bc MD |
137 | if (sp->sw_vp == NULL) { |
| 138 | bp->b_error = ENODEV; | |
| 139 | bp->b_flags |= B_ERROR; | |
| 81b5c339 MD |
140 | /* I/O was never started on nbio, must biodone(bio) */ |
| 141 | biodone(bio); | |
| 984263bc MD |
142 | return 0; |
| 143 | } | |
| 144 | ||
| 145 | /* | |
| 81b5c339 MD |
146 | * Issue a strategy call on the appropriate swap vnode. Note that |
| 147 | * bp->b_vp is not modified. Strategy code is always supposed to | |
| 148 | * use the passed vp. | |
| 149 | * | |
| c34665ce MD |
150 | * We have to use vn_strategy() here even if we know we have a |
| 151 | * device in order to properly break up requests which exceed the | |
| 152 | * device's DMA limits. | |
| 984263bc | 153 | */ |
| 81b5c339 | 154 | vn_strategy(sp->sw_vp, nbio); |
| 984263bc MD |
155 | return 0; |
| 156 | } | |
| 157 | ||
| 9f3543c6 MD |
158 | static int |
| 159 | swapdev_inactive(struct vop_inactive_args *ap) | |
| 160 | { | |
| 161 | vrecycle(ap->a_vp); | |
| 162 | return(0); | |
| 163 | } | |
| 164 | ||
| 165 | static int | |
| 166 | swapdev_reclaim(struct vop_reclaim_args *ap) | |
| 167 | { | |
| 168 | return(0); | |
| 169 | } | |
| 170 | ||
| 984263bc MD |
171 | /* |
| 172 | * Create a special vnode op vector for swapdev_vp - we only use | |
| 81b5c339 | 173 | * vn_strategy(), everything else returns an error. |
| 984263bc | 174 | */ |
| 66a1ddf5 MD |
175 | static struct vop_ops swapdev_vnode_vops = { |
| 176 | .vop_default = vop_defaultop, | |
| 9f3543c6 MD |
177 | .vop_strategy = swapdev_strategy, |
| 178 | .vop_inactive = swapdev_inactive, | |
| 179 | .vop_reclaim = swapdev_reclaim | |
| 984263bc | 180 | }; |
| 66a1ddf5 | 181 | static struct vop_ops *swapdev_vnode_vops_p = &swapdev_vnode_vops; |
| 984263bc | 182 | |
| 66a1ddf5 | 183 | VNODEOP_SET(swapdev_vnode_vops); |
| 984263bc MD |
184 | |
| 185 | /* | |
| 41c20dac MD |
186 | * swapon_args(char *name) |
| 187 | * | |
| 984263bc MD |
188 | * System call swapon(name) enables swapping on device name, |
| 189 | * which must be in the swdevsw. Return EBUSY | |
| 190 | * if already swapping on this device. | |
| 3919ced0 | 191 | * |
| 99ad9bc4 | 192 | * No requirements. |
| 984263bc | 193 | */ |
| 984263bc | 194 | int |
| 753fd850 | 195 | sys_swapon(struct swapon_args *uap) |
| 984263bc | 196 | { |
| dadab5e9 | 197 | struct thread *td = curthread; |
| 984263bc | 198 | struct vattr attr; |
| 5f910b2f | 199 | struct vnode *vp; |
| fad57d0e | 200 | struct nlookupdata nd; |
| 984263bc | 201 | int error; |
| dadab5e9 | 202 | |
| 895c1f85 | 203 | error = priv_check(td, PRIV_ROOT); |
| 984263bc MD |
204 | if (error) |
| 205 | return (error); | |
| 206 | ||
| 99ad9bc4 | 207 | mtx_lock(&swap_mtx); |
| 3919ced0 | 208 | get_mplock(); |
| fad57d0e MD |
209 | vp = NULL; |
| 210 | error = nlookup_init(&nd, uap->name, UIO_USERSPACE, NLC_FOLLOW); | |
| 211 | if (error == 0) | |
| 212 | error = nlookup(&nd); | |
| 213 | if (error == 0) | |
| 28623bf9 | 214 | error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); |
| fad57d0e | 215 | nlookup_done(&nd); |
| 3919ced0 MD |
216 | if (error) { |
| 217 | rel_mplock(); | |
| 99ad9bc4 | 218 | mtx_unlock(&swap_mtx); |
| 984263bc | 219 | return (error); |
| 3919ced0 | 220 | } |
| 984263bc | 221 | |
| 3919ced0 | 222 | if (vn_isdisk(vp, &error)) { |
| e4c9c0c8 | 223 | error = swaponvp(td, vp, 0); |
| 3919ced0 MD |
224 | } else if (vp->v_type == VREG && vp->v_tag == VT_NFS && |
| 225 | (error = VOP_GETATTR(vp, &attr)) == 0) { | |
| 984263bc MD |
226 | /* |
| 227 | * Allow direct swapping to NFS regular files in the same | |
| 228 | * way that nfs_mountroot() sets up diskless swapping. | |
| 229 | */ | |
| e4c9c0c8 | 230 | error = swaponvp(td, vp, attr.va_size / DEV_BSIZE); |
| 984263bc | 231 | } |
| 984263bc MD |
232 | if (error) |
| 233 | vrele(vp); | |
| 3919ced0 | 234 | rel_mplock(); |
| 99ad9bc4 | 235 | mtx_unlock(&swap_mtx); |
| 984263bc MD |
236 | |
| 237 | return (error); | |
| 238 | } | |
| 239 | ||
| 240 | /* | |
| 241 | * Swfree(index) frees the index'th portion of the swap map. | |
| 242 | * Each of the nswdev devices provides 1/nswdev'th of the swap | |
| 243 | * space, which is laid out with blocks of dmmax pages circularly | |
| 244 | * among the devices. | |
| 245 | * | |
| 246 | * The new swap code uses page-sized blocks. The old swap code used | |
| 247 | * DEV_BSIZE'd chunks. | |
| 248 | * | |
| 249 | * XXX locking when multiple swapon's run in parallel | |
| 250 | */ | |
| 251 | int | |
| 79634a66 | 252 | swaponvp(struct thread *td, struct vnode *vp, u_quad_t nblks) |
| 984263bc | 253 | { |
| 79634a66 | 254 | swblk_t aligned_nblks; |
| e0fc5693 | 255 | int64_t dpsize; |
| e4c9c0c8 | 256 | struct ucred *cred; |
| 5f910b2f RG |
257 | struct swdevt *sp; |
| 258 | swblk_t vsbase; | |
| 984263bc | 259 | swblk_t dvbase; |
| b13267a5 | 260 | cdev_t dev; |
| e4c9c0c8 | 261 | int index; |
| 984263bc | 262 | int error; |
| 9f3543c6 | 263 | swblk_t blk; |
| dadab5e9 | 264 | |
| 9910d07b | 265 | cred = td->td_ucred; |
| 984263bc | 266 | |
| 138b6cd2 | 267 | lwkt_gettoken(&vm_token); /* needed for vm_swap_size and blist */ |
| 99ad9bc4 MD |
268 | mtx_lock(&swap_mtx); |
| 269 | ||
| 984263bc | 270 | if (!swapdev_vp) { |
| 66a1ddf5 | 271 | error = getspecialvnode(VT_NON, NULL, &swapdev_vnode_vops_p, |
| 3446c007 | 272 | &swapdev_vp, 0, 0); |
| 984263bc MD |
273 | if (error) |
| 274 | panic("Cannot get vnode for swapdev"); | |
| 275 | swapdev_vp->v_type = VNON; /* Untyped */ | |
| 5fd012e0 | 276 | vx_unlock(swapdev_vp); |
| 984263bc MD |
277 | } |
| 278 | ||
| 984263bc | 279 | for (sp = swdevt, index = 0 ; index < nswdev; index++, sp++) { |
| 99ad9bc4 | 280 | if (sp->sw_vp == vp) { |
| 138b6cd2 MD |
281 | error = EBUSY; |
| 282 | goto done; | |
| 99ad9bc4 | 283 | } |
| 984263bc MD |
284 | if (!sp->sw_vp) |
| 285 | goto found; | |
| 286 | ||
| 287 | } | |
| 138b6cd2 MD |
288 | error = EINVAL; |
| 289 | goto done; | |
| 984263bc | 290 | found: |
| ca466bae | 291 | vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); |
| 87de5057 | 292 | error = VOP_OPEN(vp, FREAD | FWRITE, cred, NULL); |
| a11aaa81 | 293 | vn_unlock(vp); |
| 138b6cd2 MD |
294 | if (error) |
| 295 | goto done; | |
| 984263bc | 296 | |
| e4c9c0c8 MD |
297 | /* |
| 298 | * v_rdev is not valid until after the VOP_OPEN() call. dev_psize() | |
| 299 | * must be supported if a character device has been specified. | |
| 300 | */ | |
| 301 | if (vp->v_type == VCHR) | |
| 302 | dev = vp->v_rdev; | |
| 303 | else | |
| 028066b1 | 304 | dev = NULL; |
| e4c9c0c8 | 305 | |
| e0fc5693 MD |
306 | if (nblks == 0 && dev != NULL) { |
| 307 | dpsize = dev_dpsize(dev); | |
| 308 | if (dpsize == -1) { | |
| 309 | VOP_CLOSE(vp, FREAD | FWRITE); | |
| 138b6cd2 MD |
310 | error = ENXIO; |
| 311 | goto done; | |
| e0fc5693 | 312 | } |
| 79634a66 | 313 | nblks = (u_quad_t)dpsize; |
| 984263bc MD |
314 | } |
| 315 | if (nblks == 0) { | |
| 87de5057 | 316 | VOP_CLOSE(vp, FREAD | FWRITE); |
| 138b6cd2 MD |
317 | error = ENXIO; |
| 318 | goto done; | |
| 984263bc MD |
319 | } |
| 320 | ||
| 321 | /* | |
| 984263bc MD |
322 | * nblks is in DEV_BSIZE'd chunks, convert to PAGE_SIZE'd chunks. |
| 323 | * First chop nblks off to page-align it, then convert. | |
| 324 | * | |
| 325 | * sw->sw_nblks is in page-sized chunks now too. | |
| 326 | */ | |
| 79634a66 | 327 | nblks &= ~(u_quad_t)(ctodb(1) - 1); |
| 984263bc MD |
328 | nblks = dbtoc(nblks); |
| 329 | ||
| 79634a66 MD |
330 | /* |
| 331 | * Post-conversion nblks must not be >= BLIST_MAXBLKS, and | |
| 332 | * we impose a 4-swap-device limit so we have to divide it out | |
| 333 | * further. Going beyond this will result in overflows in the | |
| 334 | * blist code. | |
| 335 | * | |
| 336 | * Post-conversion nblks must fit within a (swblk_t), which | |
| 337 | * this test also ensures. | |
| 338 | */ | |
| 339 | if (nblks > BLIST_MAXBLKS / nswdev) { | |
| 340 | kprintf("exceeded maximum of %d blocks per swap unit\n", | |
| 341 | (int)BLIST_MAXBLKS / nswdev); | |
| 342 | VOP_CLOSE(vp, FREAD | FWRITE); | |
| 138b6cd2 MD |
343 | error = ENXIO; |
| 344 | goto done; | |
| 79634a66 MD |
345 | } |
| 346 | ||
| 984263bc MD |
347 | sp->sw_vp = vp; |
| 348 | sp->sw_dev = dev2udev(dev); | |
| 349 | sp->sw_device = dev; | |
| 9f3543c6 | 350 | sp->sw_flags = SW_FREED; |
| 099f3e5e | 351 | sp->sw_nused = 0; |
| 984263bc MD |
352 | |
| 353 | /* | |
| 354 | * nblks, nswap, and dmmax are PAGE_SIZE'd parameters now, not | |
| 355 | * DEV_BSIZE'd. aligned_nblks is used to calculate the | |
| 356 | * size of the swap bitmap, taking into account the stripe size. | |
| 357 | */ | |
| 79634a66 | 358 | aligned_nblks = (swblk_t)((nblks + (dmmax - 1)) & ~(u_long)(dmmax - 1)); |
| 099f3e5e | 359 | sp->sw_nblks = aligned_nblks; |
| 984263bc MD |
360 | |
| 361 | if (aligned_nblks * nswdev > nswap) | |
| 362 | nswap = aligned_nblks * nswdev; | |
| 363 | ||
| 364 | if (swapblist == NULL) | |
| 365 | swapblist = blist_create(nswap); | |
| 366 | else | |
| 367 | blist_resize(&swapblist, nswap, 0); | |
| 368 | ||
| 099f3e5e MD |
369 | for (dvbase = dmmax; dvbase < aligned_nblks; dvbase += dmmax) { |
| 370 | blk = min(aligned_nblks - dvbase, dmmax); | |
| 984263bc MD |
371 | vsbase = index * dmmax + dvbase * nswdev; |
| 372 | blist_free(swapblist, vsbase, blk); | |
| 373 | vm_swap_size += blk; | |
| 1e5196f0 | 374 | vm_swap_max += blk; |
| 984263bc | 375 | } |
| c84c24da | 376 | swap_pager_newswap(); |
| 138b6cd2 MD |
377 | error = 0; |
| 378 | done: | |
| 99ad9bc4 | 379 | mtx_unlock(&swap_mtx); |
| 138b6cd2 MD |
380 | lwkt_reltoken(&vm_token); |
| 381 | return (error); | |
| 984263bc | 382 | } |
| 099f3e5e MD |
383 | |
| 384 | /* | |
| 9f3543c6 MD |
385 | * swapoff_args(char *name) |
| 386 | * | |
| 387 | * System call swapoff(name) disables swapping on device name, | |
| 388 | * which must be an active swap device. Return ENOMEM | |
| 389 | * if there is not enough memory to page in the contents of | |
| 390 | * the given device. | |
| 391 | * | |
| 392 | * No requirements. | |
| 393 | */ | |
| 394 | int | |
| 395 | sys_swapoff(struct swapoff_args *uap) | |
| 396 | { | |
| 397 | struct vnode *vp; | |
| 398 | struct nlookupdata nd; | |
| 399 | struct swdevt *sp; | |
| 400 | int error, index; | |
| 401 | ||
| 402 | error = priv_check(curthread, PRIV_ROOT); | |
| 403 | if (error) | |
| 404 | return (error); | |
| 405 | ||
| 406 | mtx_lock(&swap_mtx); | |
| 407 | get_mplock(); | |
| 408 | vp = NULL; | |
| 409 | error = nlookup_init(&nd, uap->name, UIO_USERSPACE, NLC_FOLLOW); | |
| 410 | if (error == 0) | |
| 411 | error = nlookup(&nd); | |
| 412 | if (error == 0) | |
| 413 | error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); | |
| 414 | nlookup_done(&nd); | |
| 415 | if (error) | |
| 416 | goto done; | |
| 417 | ||
| 418 | for (sp = swdevt, index = 0; index < nswdev; index++, sp++) { | |
| 419 | if (sp->sw_vp == vp) | |
| 420 | goto found; | |
| 421 | } | |
| 422 | error = EINVAL; | |
| 423 | goto done; | |
| 424 | found: | |
| 425 | error = swapoff_one(index); | |
| 426 | ||
| 427 | done: | |
| 428 | rel_mplock(); | |
| 429 | mtx_unlock(&swap_mtx); | |
| 430 | return (error); | |
| 431 | } | |
| 432 | ||
| 433 | static int | |
| 434 | swapoff_one(int index) | |
| 435 | { | |
| 436 | swblk_t blk, aligned_nblks; | |
| 437 | swblk_t dvbase, vsbase; | |
| 438 | u_int pq_active_clean, pq_inactive_clean; | |
| 439 | struct swdevt *sp; | |
| b12defdc | 440 | struct vm_page marker; |
| 9f3543c6 | 441 | vm_page_t m; |
| 027193eb | 442 | int q; |
| 9f3543c6 MD |
443 | |
| 444 | mtx_lock(&swap_mtx); | |
| 445 | ||
| 446 | sp = &swdevt[index]; | |
| 447 | aligned_nblks = sp->sw_nblks; | |
| 448 | pq_active_clean = pq_inactive_clean = 0; | |
| 449 | ||
| 450 | /* | |
| 451 | * We can turn off this swap device safely only if the | |
| 452 | * available virtual memory in the system will fit the amount | |
| 453 | * of data we will have to page back in, plus an epsilon so | |
| 454 | * the system doesn't become critically low on swap space. | |
| 455 | */ | |
| 51c99c61 | 456 | for (q = 0; q < PQ_L2_SIZE; ++q) { |
| 027193eb MD |
457 | bzero(&marker, sizeof(marker)); |
| 458 | marker.flags = PG_BUSY | PG_FICTITIOUS | PG_MARKER; | |
| 459 | marker.queue = PQ_ACTIVE + q; | |
| 460 | marker.pc = q; | |
| 461 | marker.wire_count = 1; | |
| 462 | ||
| 463 | vm_page_queues_spin_lock(marker.queue); | |
| 464 | TAILQ_INSERT_HEAD(&vm_page_queues[marker.queue].pl, | |
| 465 | &marker, pageq); | |
| 466 | ||
| 467 | while ((m = TAILQ_NEXT(&marker, pageq)) != NULL) { | |
| 468 | TAILQ_REMOVE(&vm_page_queues[marker.queue].pl, | |
| 469 | &marker, pageq); | |
| 470 | TAILQ_INSERT_AFTER(&vm_page_queues[marker.queue].pl, m, | |
| 471 | &marker, pageq); | |
| 472 | if (m->flags & (PG_MARKER | PG_FICTITIOUS)) | |
| 473 | continue; | |
| 474 | ||
| 475 | if (vm_page_busy_try(m, FALSE) == 0) { | |
| 476 | vm_page_queues_spin_unlock(marker.queue); | |
| 477 | if (m->dirty == 0) { | |
| 478 | vm_page_test_dirty(m); | |
| 479 | if (m->dirty == 0) | |
| 480 | ++pq_active_clean; | |
| 481 | } | |
| 482 | vm_page_wakeup(m); | |
| 483 | vm_page_queues_spin_lock(marker.queue); | |
| b12defdc | 484 | } |
| 9f3543c6 | 485 | } |
| 027193eb MD |
486 | TAILQ_REMOVE(&vm_page_queues[marker.queue].pl, &marker, pageq); |
| 487 | vm_page_queues_spin_unlock(marker.queue); | |
| 488 | ||
| 489 | marker.queue = PQ_INACTIVE + q; | |
| 490 | marker.pc = q; | |
| 491 | vm_page_queues_spin_lock(marker.queue); | |
| 492 | TAILQ_INSERT_HEAD(&vm_page_queues[marker.queue].pl, | |
| 493 | &marker, pageq); | |
| 494 | ||
| 495 | while ((m = TAILQ_NEXT(&marker, pageq)) != NULL) { | |
| 496 | TAILQ_REMOVE( | |
| 497 | &vm_page_queues[marker.queue].pl, | |
| 498 | &marker, pageq); | |
| 499 | TAILQ_INSERT_AFTER( | |
| 500 | &vm_page_queues[marker.queue].pl, | |
| 501 | m, &marker, pageq); | |
| 502 | if (m->flags & (PG_MARKER | PG_FICTITIOUS)) | |
| 503 | continue; | |
| 504 | ||
| 505 | if (vm_page_busy_try(m, FALSE) == 0) { | |
| 506 | vm_page_queues_spin_unlock(marker.queue); | |
| 507 | if (m->dirty == 0) { | |
| 508 | vm_page_test_dirty(m); | |
| 509 | if (m->dirty == 0) | |
| 510 | ++pq_inactive_clean; | |
| 511 | } | |
| 512 | vm_page_wakeup(m); | |
| 513 | vm_page_queues_spin_lock(marker.queue); | |
| b12defdc | 514 | } |
| 9f3543c6 | 515 | } |
| 027193eb MD |
516 | TAILQ_REMOVE(&vm_page_queues[marker.queue].pl, |
| 517 | &marker, pageq); | |
| 518 | vm_page_queues_spin_unlock(marker.queue); | |
| 9f3543c6 | 519 | } |
| 9f3543c6 MD |
520 | |
| 521 | if (vmstats.v_free_count + vmstats.v_cache_count + pq_active_clean + | |
| 522 | pq_inactive_clean + vm_swap_size < aligned_nblks + nswap_lowat) { | |
| 523 | mtx_unlock(&swap_mtx); | |
| 524 | return (ENOMEM); | |
| 525 | } | |
| 526 | ||
| 527 | /* | |
| 528 | * Prevent further allocations on this device | |
| 529 | */ | |
| 530 | sp->sw_flags |= SW_CLOSING; | |
| 531 | for (dvbase = dmmax; dvbase < aligned_nblks; dvbase += dmmax) { | |
| 532 | blk = min(aligned_nblks - dvbase, dmmax); | |
| 533 | vsbase = index * dmmax + dvbase * nswdev; | |
| 534 | vm_swap_size -= blist_fill(swapblist, vsbase, blk); | |
| 535 | vm_swap_max -= blk; | |
| 536 | } | |
| 537 | ||
| 538 | /* | |
| 539 | * Page in the contents of the device and close it. | |
| 540 | */ | |
| 541 | if (swap_pager_swapoff(index)) { | |
| 542 | mtx_unlock(&swap_mtx); | |
| 543 | return (EINTR); | |
| 544 | } | |
| 545 | ||
| 546 | VOP_CLOSE(sp->sw_vp, FREAD | FWRITE); | |
| 547 | vrele(sp->sw_vp); | |
| 548 | bzero(swdevt + index, sizeof(struct swdevt)); | |
| 549 | ||
| 550 | /* | |
| 551 | * Resize the bitmap based on the nem largest swap device, | |
| 552 | * or free the bitmap if there are no more devices. | |
| 553 | */ | |
| 554 | for (sp = swdevt, aligned_nblks = 0; sp < swdevt + nswdev; sp++) { | |
| 555 | if (sp->sw_vp) | |
| 556 | aligned_nblks = max(aligned_nblks, sp->sw_nblks); | |
| 557 | } | |
| 558 | ||
| 559 | nswap = aligned_nblks * nswdev; | |
| 560 | ||
| 561 | if (nswap == 0) { | |
| 562 | blist_destroy(swapblist); | |
| 563 | swapblist = NULL; | |
| 564 | vrele(swapdev_vp); | |
| 565 | swapdev_vp = NULL; | |
| 566 | } else { | |
| 567 | blist_resize(&swapblist, nswap, 0); | |
| 568 | } | |
| 569 | ||
| 570 | mtx_unlock(&swap_mtx); | |
| 571 | return (0); | |
| 572 | } | |
| 573 | ||
| 574 | /* | |
| 099f3e5e MD |
575 | * Account for swap space in individual swdevt's. The caller ensures |
| 576 | * that the provided range falls into a single swdevt. | |
| 577 | * | |
| 578 | * +count space freed | |
| 579 | * -count space allocated | |
| 580 | */ | |
| 581 | void | |
| 582 | swapacctspace(swblk_t base, swblk_t count) | |
| 583 | { | |
| 584 | int index; | |
| 585 | int seg; | |
| 586 | ||
| 587 | vm_swap_size += count; | |
| 588 | seg = base / dmmax; | |
| 589 | index = seg % nswdev; | |
| 590 | swdevt[index].sw_nused -= count; | |
| 591 | } | |
| 592 | ||
| 593 | /* | |
| 594 | * Retrieve swap info | |
| 595 | */ | |
| 596 | static int | |
| 597 | sysctl_vm_swap_info(SYSCTL_HANDLER_ARGS) | |
| 598 | { | |
| 599 | struct xswdev xs; | |
| 600 | struct swdevt *sp; | |
| 601 | int error; | |
| 602 | int n; | |
| 603 | ||
| 604 | error = 0; | |
| 605 | for (n = 0; n < nswdev; ++n) { | |
| 606 | sp = &swdevt[n]; | |
| 607 | ||
| 608 | xs.xsw_size = sizeof(xs); | |
| 609 | xs.xsw_version = XSWDEV_VERSION; | |
| 610 | xs.xsw_blksize = PAGE_SIZE; | |
| 611 | xs.xsw_dev = sp->sw_dev; | |
| 612 | xs.xsw_flags = sp->sw_flags; | |
| 613 | xs.xsw_nblks = sp->sw_nblks; | |
| 614 | xs.xsw_used = sp->sw_nused; | |
| 615 | ||
| 616 | error = SYSCTL_OUT(req, &xs, sizeof(xs)); | |
| 617 | if (error) | |
| 618 | break; | |
| 619 | } | |
| 620 | return (error); | |
| 621 | } | |
| 622 | ||
| 623 | SYSCTL_INT(_vm, OID_AUTO, nswapdev, CTLFLAG_RD, &nswdev, 0, | |
| 624 | "Number of swap devices"); | |
| 625 | SYSCTL_NODE(_vm, OID_AUTO, swap_info_array, CTLFLAG_RD, sysctl_vm_swap_info, | |
| 626 | "Swap statistics by device"); |