| Commit | Line | Data |
|---|---|---|
| 984263bc MD |
1 | /* |
| 2 | * Copyright (c) 1982, 1986, 1989, 1993 | |
| 3 | * The Regents of the University of California. All rights reserved. | |
| 4 | * | |
| 5 | * Redistribution and use in source and binary forms, with or without | |
| 6 | * modification, are permitted provided that the following conditions | |
| 7 | * are met: | |
| 8 | * 1. Redistributions of source code must retain the above copyright | |
| 9 | * notice, this list of conditions and the following disclaimer. | |
| 10 | * 2. Redistributions in binary form must reproduce the above copyright | |
| 11 | * notice, this list of conditions and the following disclaimer in the | |
| 12 | * documentation and/or other materials provided with the distribution. | |
| 13 | * 3. All advertising materials mentioning features or use of this software | |
| 14 | * must display the following acknowledgement: | |
| 15 | * This product includes software developed by the University of | |
| 16 | * California, Berkeley and its contributors. | |
| 17 | * 4. Neither the name of the University nor the names of its contributors | |
| 18 | * may be used to endorse or promote products derived from this software | |
| 19 | * without specific prior written permission. | |
| 20 | * | |
| 21 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
| 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 24 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
| 25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
| 27 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
| 29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
| 30 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 31 | * SUCH DAMAGE. | |
| 32 | * | |
| 33 | * @(#)vm_swap.c 8.5 (Berkeley) 2/17/94 | |
| 34 | * $FreeBSD: src/sys/vm/vm_swap.c,v 1.96.2.2 2001/10/14 18:46:47 iedowse Exp $ | |
| c34665ce | 35 | * $DragonFly: src/sys/vm/vm_swap.c,v 1.36 2007/07/20 17:21:54 dillon Exp $ |
| 984263bc MD |
36 | */ |
| 37 | ||
| 38 | #include "opt_swap.h" | |
| 39 | ||
| 40 | #include <sys/param.h> | |
| 41 | #include <sys/systm.h> | |
| 42 | #include <sys/sysproto.h> | |
| 43 | #include <sys/buf.h> | |
| 44 | #include <sys/proc.h> | |
| 895c1f85 | 45 | #include <sys/priv.h> |
| fad57d0e | 46 | #include <sys/nlookup.h> |
| 984263bc MD |
47 | #include <sys/dmap.h> /* XXX */ |
| 48 | #include <sys/vnode.h> | |
| 49 | #include <sys/fcntl.h> | |
| 50 | #include <sys/blist.h> | |
| 51 | #include <sys/kernel.h> | |
| 52 | #include <sys/lock.h> | |
| 53 | #include <sys/conf.h> | |
| 54 | #include <sys/stat.h> | |
| cdd46d2e | 55 | #include <sys/thread2.h> |
| 984263bc MD |
56 | #include <vm/vm.h> |
| 57 | #include <vm/vm_extern.h> | |
| 58 | #include <vm/swap_pager.h> | |
| 59 | #include <vm/vm_zone.h> | |
| 60 | ||
| 61 | /* | |
| 62 | * Indirect driver for multi-controller paging. | |
| 63 | */ | |
| 64 | ||
| 65 | #ifndef NSWAPDEV | |
| 66 | #define NSWAPDEV 4 | |
| 67 | #endif | |
| 68 | static struct swdevt should_be_malloced[NSWAPDEV]; | |
| 460426e6 | 69 | struct swdevt *swdevt = should_be_malloced; /* exported to pstat/systat */ |
| 79634a66 | 70 | static swblk_t nswap; /* first block after the interleaved devs */ |
| 460426e6 | 71 | int nswdev = NSWAPDEV; /* exported to pstat/systat */ |
| 984263bc MD |
72 | int vm_swap_size; |
| 73 | ||
| 1388df65 | 74 | static int swapdev_strategy (struct vop_strategy_args *ap); |
| 984263bc MD |
75 | struct vnode *swapdev_vp; |
| 76 | ||
| 77 | /* | |
| 78 | * swapdev_strategy: | |
| 79 | * | |
| 81b5c339 | 80 | * vn_strategy() for swapdev_vp. |
| 984263bc MD |
81 | * Perform swap strategy interleave device selection. |
| 82 | * | |
| 10f3fee5 | 83 | * The bp is expected to be locked and on call. |
| 81b5c339 MD |
84 | * |
| 85 | * (struct vnode *a_vp, struct bio *b_bio) | |
| 984263bc MD |
86 | */ |
| 87 | ||
| 88 | static int | |
| 81b5c339 | 89 | swapdev_strategy(struct vop_strategy_args *ap) |
| 984263bc | 90 | { |
| 81b5c339 MD |
91 | struct bio *bio = ap->a_bio; |
| 92 | struct bio *nbio; | |
| 93 | struct buf *bp = bio->bio_buf; | |
| 54078292 | 94 | int sz, off, seg, index, blkno, nblkno; |
| 5f910b2f | 95 | struct swdevt *sp; |
| 984263bc | 96 | struct vnode *vp; |
| 984263bc | 97 | |
| 81b5c339 | 98 | vp = ap->a_vp; |
| 984263bc | 99 | sz = howmany(bp->b_bcount, PAGE_SIZE); |
| 54078292 | 100 | blkno = (int)(bio->bio_offset >> PAGE_SHIFT); |
| 984263bc MD |
101 | |
| 102 | /* | |
| 103 | * Convert interleaved swap into per-device swap. Note that | |
| 104 | * the block size is left in PAGE_SIZE'd chunks (for the newswap) | |
| 105 | * here. | |
| 106 | */ | |
| 81b5c339 | 107 | nbio = push_bio(bio); |
| 984263bc | 108 | if (nswdev > 1) { |
| 54078292 | 109 | off = blkno % dmmax; |
| 984263bc MD |
110 | if (off + sz > dmmax) { |
| 111 | bp->b_error = EINVAL; | |
| 112 | bp->b_flags |= B_ERROR; | |
| 81b5c339 | 113 | biodone(bio); |
| 984263bc MD |
114 | return 0; |
| 115 | } | |
| 54078292 | 116 | seg = blkno / dmmax; |
| 984263bc MD |
117 | index = seg % nswdev; |
| 118 | seg /= nswdev; | |
| 54078292 | 119 | nbio->bio_offset = (off_t)(seg * dmmax + off) << PAGE_SHIFT; |
| 984263bc MD |
120 | } else { |
| 121 | index = 0; | |
| 54078292 | 122 | nbio->bio_offset = bio->bio_offset; |
| 984263bc | 123 | } |
| 54078292 | 124 | nblkno = (int)(nbio->bio_offset >> PAGE_SHIFT); |
| 984263bc | 125 | sp = &swdevt[index]; |
| 54078292 | 126 | if (nblkno + sz > sp->sw_nblks) { |
| 984263bc MD |
127 | bp->b_error = EINVAL; |
| 128 | bp->b_flags |= B_ERROR; | |
| 81b5c339 MD |
129 | /* I/O was never started on nbio, must biodone(bio) */ |
| 130 | biodone(bio); | |
| 984263bc MD |
131 | return 0; |
| 132 | } | |
| 984263bc MD |
133 | if (sp->sw_vp == NULL) { |
| 134 | bp->b_error = ENODEV; | |
| 135 | bp->b_flags |= B_ERROR; | |
| 81b5c339 MD |
136 | /* I/O was never started on nbio, must biodone(bio) */ |
| 137 | biodone(bio); | |
| 984263bc MD |
138 | return 0; |
| 139 | } | |
| 140 | ||
| 141 | /* | |
| 81b5c339 MD |
142 | * Issue a strategy call on the appropriate swap vnode. Note that |
| 143 | * bp->b_vp is not modified. Strategy code is always supposed to | |
| 144 | * use the passed vp. | |
| 145 | * | |
| c34665ce MD |
146 | * We have to use vn_strategy() here even if we know we have a |
| 147 | * device in order to properly break up requests which exceed the | |
| 148 | * device's DMA limits. | |
| 984263bc | 149 | */ |
| 81b5c339 | 150 | vn_strategy(sp->sw_vp, nbio); |
| 984263bc MD |
151 | return 0; |
| 152 | } | |
| 153 | ||
| 154 | /* | |
| 155 | * Create a special vnode op vector for swapdev_vp - we only use | |
| 81b5c339 | 156 | * vn_strategy(), everything else returns an error. |
| 984263bc | 157 | */ |
| 66a1ddf5 MD |
158 | static struct vop_ops swapdev_vnode_vops = { |
| 159 | .vop_default = vop_defaultop, | |
| 160 | .vop_strategy = swapdev_strategy | |
| 984263bc | 161 | }; |
| 66a1ddf5 | 162 | static struct vop_ops *swapdev_vnode_vops_p = &swapdev_vnode_vops; |
| 984263bc | 163 | |
| 66a1ddf5 | 164 | VNODEOP_SET(swapdev_vnode_vops); |
| 984263bc MD |
165 | |
| 166 | /* | |
| 41c20dac MD |
167 | * swapon_args(char *name) |
| 168 | * | |
| 984263bc MD |
169 | * System call swapon(name) enables swapping on device name, |
| 170 | * which must be in the swdevsw. Return EBUSY | |
| 171 | * if already swapping on this device. | |
| 3919ced0 MD |
172 | * |
| 173 | * MPALMOSTSAFE | |
| 984263bc | 174 | */ |
| 984263bc | 175 | int |
| 753fd850 | 176 | sys_swapon(struct swapon_args *uap) |
| 984263bc | 177 | { |
| dadab5e9 | 178 | struct thread *td = curthread; |
| 984263bc | 179 | struct vattr attr; |
| 5f910b2f | 180 | struct vnode *vp; |
| fad57d0e | 181 | struct nlookupdata nd; |
| 984263bc | 182 | int error; |
| dadab5e9 | 183 | struct ucred *cred; |
| 984263bc | 184 | |
| 9910d07b | 185 | cred = td->td_ucred; |
| dadab5e9 | 186 | |
| 895c1f85 | 187 | error = priv_check(td, PRIV_ROOT); |
| 984263bc MD |
188 | if (error) |
| 189 | return (error); | |
| 190 | ||
| 3919ced0 | 191 | get_mplock(); |
| fad57d0e MD |
192 | vp = NULL; |
| 193 | error = nlookup_init(&nd, uap->name, UIO_USERSPACE, NLC_FOLLOW); | |
| 194 | if (error == 0) | |
| 195 | error = nlookup(&nd); | |
| 196 | if (error == 0) | |
| 28623bf9 | 197 | error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); |
| fad57d0e | 198 | nlookup_done(&nd); |
| 3919ced0 MD |
199 | if (error) { |
| 200 | rel_mplock(); | |
| 984263bc | 201 | return (error); |
| 3919ced0 | 202 | } |
| 984263bc | 203 | |
| 3919ced0 | 204 | if (vn_isdisk(vp, &error)) { |
| e4c9c0c8 | 205 | error = swaponvp(td, vp, 0); |
| 3919ced0 MD |
206 | } else if (vp->v_type == VREG && vp->v_tag == VT_NFS && |
| 207 | (error = VOP_GETATTR(vp, &attr)) == 0) { | |
| 984263bc MD |
208 | /* |
| 209 | * Allow direct swapping to NFS regular files in the same | |
| 210 | * way that nfs_mountroot() sets up diskless swapping. | |
| 211 | */ | |
| e4c9c0c8 | 212 | error = swaponvp(td, vp, attr.va_size / DEV_BSIZE); |
| 984263bc | 213 | } |
| 984263bc MD |
214 | if (error) |
| 215 | vrele(vp); | |
| 3919ced0 | 216 | rel_mplock(); |
| 984263bc MD |
217 | |
| 218 | return (error); | |
| 219 | } | |
| 220 | ||
| 221 | /* | |
| 222 | * Swfree(index) frees the index'th portion of the swap map. | |
| 223 | * Each of the nswdev devices provides 1/nswdev'th of the swap | |
| 224 | * space, which is laid out with blocks of dmmax pages circularly | |
| 225 | * among the devices. | |
| 226 | * | |
| 227 | * The new swap code uses page-sized blocks. The old swap code used | |
| 228 | * DEV_BSIZE'd chunks. | |
| 229 | * | |
| 230 | * XXX locking when multiple swapon's run in parallel | |
| 231 | */ | |
| 232 | int | |
| 79634a66 | 233 | swaponvp(struct thread *td, struct vnode *vp, u_quad_t nblks) |
| 984263bc | 234 | { |
| 79634a66 | 235 | swblk_t aligned_nblks; |
| e0fc5693 | 236 | int64_t dpsize; |
| e4c9c0c8 | 237 | struct ucred *cred; |
| 5f910b2f RG |
238 | struct swdevt *sp; |
| 239 | swblk_t vsbase; | |
| 984263bc | 240 | swblk_t dvbase; |
| b13267a5 | 241 | cdev_t dev; |
| e4c9c0c8 | 242 | int index; |
| 984263bc | 243 | int error; |
| e4c9c0c8 | 244 | long blk; |
| dadab5e9 | 245 | |
| 9910d07b | 246 | cred = td->td_ucred; |
| 984263bc MD |
247 | |
| 248 | if (!swapdev_vp) { | |
| 66a1ddf5 | 249 | error = getspecialvnode(VT_NON, NULL, &swapdev_vnode_vops_p, |
| 3446c007 | 250 | &swapdev_vp, 0, 0); |
| 984263bc MD |
251 | if (error) |
| 252 | panic("Cannot get vnode for swapdev"); | |
| 253 | swapdev_vp->v_type = VNON; /* Untyped */ | |
| 5fd012e0 | 254 | vx_unlock(swapdev_vp); |
| 984263bc MD |
255 | } |
| 256 | ||
| 984263bc MD |
257 | for (sp = swdevt, index = 0 ; index < nswdev; index++, sp++) { |
| 258 | if (sp->sw_vp == vp) | |
| 259 | return EBUSY; | |
| 260 | if (!sp->sw_vp) | |
| 261 | goto found; | |
| 262 | ||
| 263 | } | |
| 264 | return EINVAL; | |
| 265 | found: | |
| ca466bae | 266 | vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); |
| 87de5057 | 267 | error = VOP_OPEN(vp, FREAD | FWRITE, cred, NULL); |
| a11aaa81 | 268 | vn_unlock(vp); |
| 984263bc MD |
269 | if (error) |
| 270 | return (error); | |
| 271 | ||
| e4c9c0c8 MD |
272 | /* |
| 273 | * v_rdev is not valid until after the VOP_OPEN() call. dev_psize() | |
| 274 | * must be supported if a character device has been specified. | |
| 275 | */ | |
| 276 | if (vp->v_type == VCHR) | |
| 277 | dev = vp->v_rdev; | |
| 278 | else | |
| 028066b1 | 279 | dev = NULL; |
| e4c9c0c8 | 280 | |
| e0fc5693 MD |
281 | if (nblks == 0 && dev != NULL) { |
| 282 | dpsize = dev_dpsize(dev); | |
| 283 | if (dpsize == -1) { | |
| 284 | VOP_CLOSE(vp, FREAD | FWRITE); | |
| 285 | return (ENXIO); | |
| 286 | } | |
| 79634a66 | 287 | nblks = (u_quad_t)dpsize; |
| 984263bc MD |
288 | } |
| 289 | if (nblks == 0) { | |
| 87de5057 | 290 | VOP_CLOSE(vp, FREAD | FWRITE); |
| 984263bc MD |
291 | return (ENXIO); |
| 292 | } | |
| 293 | ||
| 294 | /* | |
| 984263bc MD |
295 | * nblks is in DEV_BSIZE'd chunks, convert to PAGE_SIZE'd chunks. |
| 296 | * First chop nblks off to page-align it, then convert. | |
| 297 | * | |
| 298 | * sw->sw_nblks is in page-sized chunks now too. | |
| 299 | */ | |
| 79634a66 | 300 | nblks &= ~(u_quad_t)(ctodb(1) - 1); |
| 984263bc MD |
301 | nblks = dbtoc(nblks); |
| 302 | ||
| 79634a66 MD |
303 | /* |
| 304 | * Post-conversion nblks must not be >= BLIST_MAXBLKS, and | |
| 305 | * we impose a 4-swap-device limit so we have to divide it out | |
| 306 | * further. Going beyond this will result in overflows in the | |
| 307 | * blist code. | |
| 308 | * | |
| 309 | * Post-conversion nblks must fit within a (swblk_t), which | |
| 310 | * this test also ensures. | |
| 311 | */ | |
| 312 | if (nblks > BLIST_MAXBLKS / nswdev) { | |
| 313 | kprintf("exceeded maximum of %d blocks per swap unit\n", | |
| 314 | (int)BLIST_MAXBLKS / nswdev); | |
| 315 | VOP_CLOSE(vp, FREAD | FWRITE); | |
| 316 | return (ENXIO); | |
| 317 | } | |
| 318 | ||
| 984263bc MD |
319 | sp->sw_vp = vp; |
| 320 | sp->sw_dev = dev2udev(dev); | |
| 321 | sp->sw_device = dev; | |
| 322 | sp->sw_flags |= SW_FREED; | |
| 79634a66 | 323 | sp->sw_nblks = (swblk_t)nblks; |
| 984263bc MD |
324 | |
| 325 | /* | |
| 326 | * nblks, nswap, and dmmax are PAGE_SIZE'd parameters now, not | |
| 327 | * DEV_BSIZE'd. aligned_nblks is used to calculate the | |
| 328 | * size of the swap bitmap, taking into account the stripe size. | |
| 329 | */ | |
| 79634a66 | 330 | aligned_nblks = (swblk_t)((nblks + (dmmax - 1)) & ~(u_long)(dmmax - 1)); |
| 984263bc MD |
331 | |
| 332 | if (aligned_nblks * nswdev > nswap) | |
| 333 | nswap = aligned_nblks * nswdev; | |
| 334 | ||
| 335 | if (swapblist == NULL) | |
| 336 | swapblist = blist_create(nswap); | |
| 337 | else | |
| 338 | blist_resize(&swapblist, nswap, 0); | |
| 339 | ||
| 340 | for (dvbase = dmmax; dvbase < nblks; dvbase += dmmax) { | |
| 341 | blk = min(nblks - dvbase, dmmax); | |
| 342 | vsbase = index * dmmax + dvbase * nswdev; | |
| 343 | blist_free(swapblist, vsbase, blk); | |
| 344 | vm_swap_size += blk; | |
| 345 | } | |
| c84c24da | 346 | swap_pager_newswap(); |
| 984263bc MD |
347 | |
| 348 | return (0); | |
| 349 | } |