Commit | Line | Data |
---|---|---|
984263bc MD |
1 | /* |
2 | * Copyright (c) 1982, 1986, 1989, 1993 | |
3 | * The Regents of the University of California. All rights reserved. | |
4 | * | |
5 | * Redistribution and use in source and binary forms, with or without | |
6 | * modification, are permitted provided that the following conditions | |
7 | * are met: | |
8 | * 1. Redistributions of source code must retain the above copyright | |
9 | * notice, this list of conditions and the following disclaimer. | |
10 | * 2. Redistributions in binary form must reproduce the above copyright | |
11 | * notice, this list of conditions and the following disclaimer in the | |
12 | * documentation and/or other materials provided with the distribution. | |
13 | * 3. All advertising materials mentioning features or use of this software | |
14 | * must display the following acknowledgement: | |
15 | * This product includes software developed by the University of | |
16 | * California, Berkeley and its contributors. | |
17 | * 4. Neither the name of the University nor the names of its contributors | |
18 | * may be used to endorse or promote products derived from this software | |
19 | * without specific prior written permission. | |
20 | * | |
21 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
24 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
27 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
30 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
31 | * SUCH DAMAGE. | |
32 | * | |
33 | * @(#)vm_swap.c 8.5 (Berkeley) 2/17/94 | |
34 | * $FreeBSD: src/sys/vm/vm_swap.c,v 1.96.2.2 2001/10/14 18:46:47 iedowse Exp $ | |
c34665ce | 35 | * $DragonFly: src/sys/vm/vm_swap.c,v 1.36 2007/07/20 17:21:54 dillon Exp $ |
984263bc MD |
36 | */ |
37 | ||
38 | #include "opt_swap.h" | |
39 | ||
40 | #include <sys/param.h> | |
41 | #include <sys/systm.h> | |
42 | #include <sys/sysproto.h> | |
43 | #include <sys/buf.h> | |
44 | #include <sys/proc.h> | |
895c1f85 | 45 | #include <sys/priv.h> |
fad57d0e | 46 | #include <sys/nlookup.h> |
984263bc MD |
47 | #include <sys/dmap.h> /* XXX */ |
48 | #include <sys/vnode.h> | |
49 | #include <sys/fcntl.h> | |
50 | #include <sys/blist.h> | |
51 | #include <sys/kernel.h> | |
52 | #include <sys/lock.h> | |
53 | #include <sys/conf.h> | |
54 | #include <sys/stat.h> | |
cdd46d2e | 55 | #include <sys/thread2.h> |
984263bc MD |
56 | #include <vm/vm.h> |
57 | #include <vm/vm_extern.h> | |
58 | #include <vm/swap_pager.h> | |
59 | #include <vm/vm_zone.h> | |
60 | ||
61 | /* | |
62 | * Indirect driver for multi-controller paging. | |
63 | */ | |
64 | ||
65 | #ifndef NSWAPDEV | |
66 | #define NSWAPDEV 4 | |
67 | #endif | |
68 | static struct swdevt should_be_malloced[NSWAPDEV]; | |
460426e6 | 69 | struct swdevt *swdevt = should_be_malloced; /* exported to pstat/systat */ |
79634a66 | 70 | static swblk_t nswap; /* first block after the interleaved devs */ |
460426e6 | 71 | int nswdev = NSWAPDEV; /* exported to pstat/systat */ |
984263bc MD |
72 | int vm_swap_size; |
73 | ||
1388df65 | 74 | static int swapdev_strategy (struct vop_strategy_args *ap); |
984263bc MD |
75 | struct vnode *swapdev_vp; |
76 | ||
77 | /* | |
78 | * swapdev_strategy: | |
79 | * | |
81b5c339 | 80 | * vn_strategy() for swapdev_vp. |
984263bc MD |
81 | * Perform swap strategy interleave device selection. |
82 | * | |
10f3fee5 | 83 | * The bp is expected to be locked and on call. |
81b5c339 MD |
84 | * |
85 | * (struct vnode *a_vp, struct bio *b_bio) | |
984263bc MD |
86 | */ |
87 | ||
88 | static int | |
81b5c339 | 89 | swapdev_strategy(struct vop_strategy_args *ap) |
984263bc | 90 | { |
81b5c339 MD |
91 | struct bio *bio = ap->a_bio; |
92 | struct bio *nbio; | |
93 | struct buf *bp = bio->bio_buf; | |
54078292 | 94 | int sz, off, seg, index, blkno, nblkno; |
5f910b2f | 95 | struct swdevt *sp; |
984263bc | 96 | struct vnode *vp; |
984263bc | 97 | |
81b5c339 | 98 | vp = ap->a_vp; |
984263bc | 99 | sz = howmany(bp->b_bcount, PAGE_SIZE); |
54078292 | 100 | blkno = (int)(bio->bio_offset >> PAGE_SHIFT); |
984263bc MD |
101 | |
102 | /* | |
103 | * Convert interleaved swap into per-device swap. Note that | |
104 | * the block size is left in PAGE_SIZE'd chunks (for the newswap) | |
105 | * here. | |
106 | */ | |
81b5c339 | 107 | nbio = push_bio(bio); |
984263bc | 108 | if (nswdev > 1) { |
54078292 | 109 | off = blkno % dmmax; |
984263bc MD |
110 | if (off + sz > dmmax) { |
111 | bp->b_error = EINVAL; | |
112 | bp->b_flags |= B_ERROR; | |
81b5c339 | 113 | biodone(bio); |
984263bc MD |
114 | return 0; |
115 | } | |
54078292 | 116 | seg = blkno / dmmax; |
984263bc MD |
117 | index = seg % nswdev; |
118 | seg /= nswdev; | |
54078292 | 119 | nbio->bio_offset = (off_t)(seg * dmmax + off) << PAGE_SHIFT; |
984263bc MD |
120 | } else { |
121 | index = 0; | |
54078292 | 122 | nbio->bio_offset = bio->bio_offset; |
984263bc | 123 | } |
54078292 | 124 | nblkno = (int)(nbio->bio_offset >> PAGE_SHIFT); |
984263bc | 125 | sp = &swdevt[index]; |
54078292 | 126 | if (nblkno + sz > sp->sw_nblks) { |
984263bc MD |
127 | bp->b_error = EINVAL; |
128 | bp->b_flags |= B_ERROR; | |
81b5c339 MD |
129 | /* I/O was never started on nbio, must biodone(bio) */ |
130 | biodone(bio); | |
984263bc MD |
131 | return 0; |
132 | } | |
984263bc MD |
133 | if (sp->sw_vp == NULL) { |
134 | bp->b_error = ENODEV; | |
135 | bp->b_flags |= B_ERROR; | |
81b5c339 MD |
136 | /* I/O was never started on nbio, must biodone(bio) */ |
137 | biodone(bio); | |
984263bc MD |
138 | return 0; |
139 | } | |
140 | ||
141 | /* | |
81b5c339 MD |
142 | * Issue a strategy call on the appropriate swap vnode. Note that |
143 | * bp->b_vp is not modified. Strategy code is always supposed to | |
144 | * use the passed vp. | |
145 | * | |
c34665ce MD |
146 | * We have to use vn_strategy() here even if we know we have a |
147 | * device in order to properly break up requests which exceed the | |
148 | * device's DMA limits. | |
984263bc | 149 | */ |
81b5c339 | 150 | vn_strategy(sp->sw_vp, nbio); |
984263bc MD |
151 | return 0; |
152 | } | |
153 | ||
154 | /* | |
155 | * Create a special vnode op vector for swapdev_vp - we only use | |
81b5c339 | 156 | * vn_strategy(), everything else returns an error. |
984263bc | 157 | */ |
66a1ddf5 MD |
158 | static struct vop_ops swapdev_vnode_vops = { |
159 | .vop_default = vop_defaultop, | |
160 | .vop_strategy = swapdev_strategy | |
984263bc | 161 | }; |
66a1ddf5 | 162 | static struct vop_ops *swapdev_vnode_vops_p = &swapdev_vnode_vops; |
984263bc | 163 | |
66a1ddf5 | 164 | VNODEOP_SET(swapdev_vnode_vops); |
984263bc MD |
165 | |
166 | /* | |
41c20dac MD |
167 | * swapon_args(char *name) |
168 | * | |
984263bc MD |
169 | * System call swapon(name) enables swapping on device name, |
170 | * which must be in the swdevsw. Return EBUSY | |
171 | * if already swapping on this device. | |
3919ced0 MD |
172 | * |
173 | * MPALMOSTSAFE | |
984263bc | 174 | */ |
984263bc | 175 | int |
753fd850 | 176 | sys_swapon(struct swapon_args *uap) |
984263bc | 177 | { |
dadab5e9 | 178 | struct thread *td = curthread; |
984263bc | 179 | struct vattr attr; |
5f910b2f | 180 | struct vnode *vp; |
fad57d0e | 181 | struct nlookupdata nd; |
984263bc | 182 | int error; |
dadab5e9 | 183 | struct ucred *cred; |
984263bc | 184 | |
9910d07b | 185 | cred = td->td_ucred; |
dadab5e9 | 186 | |
895c1f85 | 187 | error = priv_check(td, PRIV_ROOT); |
984263bc MD |
188 | if (error) |
189 | return (error); | |
190 | ||
3919ced0 | 191 | get_mplock(); |
fad57d0e MD |
192 | vp = NULL; |
193 | error = nlookup_init(&nd, uap->name, UIO_USERSPACE, NLC_FOLLOW); | |
194 | if (error == 0) | |
195 | error = nlookup(&nd); | |
196 | if (error == 0) | |
28623bf9 | 197 | error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); |
fad57d0e | 198 | nlookup_done(&nd); |
3919ced0 MD |
199 | if (error) { |
200 | rel_mplock(); | |
984263bc | 201 | return (error); |
3919ced0 | 202 | } |
984263bc | 203 | |
3919ced0 | 204 | if (vn_isdisk(vp, &error)) { |
e4c9c0c8 | 205 | error = swaponvp(td, vp, 0); |
3919ced0 MD |
206 | } else if (vp->v_type == VREG && vp->v_tag == VT_NFS && |
207 | (error = VOP_GETATTR(vp, &attr)) == 0) { | |
984263bc MD |
208 | /* |
209 | * Allow direct swapping to NFS regular files in the same | |
210 | * way that nfs_mountroot() sets up diskless swapping. | |
211 | */ | |
e4c9c0c8 | 212 | error = swaponvp(td, vp, attr.va_size / DEV_BSIZE); |
984263bc | 213 | } |
984263bc MD |
214 | if (error) |
215 | vrele(vp); | |
3919ced0 | 216 | rel_mplock(); |
984263bc MD |
217 | |
218 | return (error); | |
219 | } | |
220 | ||
221 | /* | |
222 | * Swfree(index) frees the index'th portion of the swap map. | |
223 | * Each of the nswdev devices provides 1/nswdev'th of the swap | |
224 | * space, which is laid out with blocks of dmmax pages circularly | |
225 | * among the devices. | |
226 | * | |
227 | * The new swap code uses page-sized blocks. The old swap code used | |
228 | * DEV_BSIZE'd chunks. | |
229 | * | |
230 | * XXX locking when multiple swapon's run in parallel | |
231 | */ | |
232 | int | |
79634a66 | 233 | swaponvp(struct thread *td, struct vnode *vp, u_quad_t nblks) |
984263bc | 234 | { |
79634a66 | 235 | swblk_t aligned_nblks; |
e0fc5693 | 236 | int64_t dpsize; |
e4c9c0c8 | 237 | struct ucred *cred; |
5f910b2f RG |
238 | struct swdevt *sp; |
239 | swblk_t vsbase; | |
984263bc | 240 | swblk_t dvbase; |
b13267a5 | 241 | cdev_t dev; |
e4c9c0c8 | 242 | int index; |
984263bc | 243 | int error; |
e4c9c0c8 | 244 | long blk; |
dadab5e9 | 245 | |
9910d07b | 246 | cred = td->td_ucred; |
984263bc MD |
247 | |
248 | if (!swapdev_vp) { | |
66a1ddf5 | 249 | error = getspecialvnode(VT_NON, NULL, &swapdev_vnode_vops_p, |
3446c007 | 250 | &swapdev_vp, 0, 0); |
984263bc MD |
251 | if (error) |
252 | panic("Cannot get vnode for swapdev"); | |
253 | swapdev_vp->v_type = VNON; /* Untyped */ | |
5fd012e0 | 254 | vx_unlock(swapdev_vp); |
984263bc MD |
255 | } |
256 | ||
984263bc MD |
257 | for (sp = swdevt, index = 0 ; index < nswdev; index++, sp++) { |
258 | if (sp->sw_vp == vp) | |
259 | return EBUSY; | |
260 | if (!sp->sw_vp) | |
261 | goto found; | |
262 | ||
263 | } | |
264 | return EINVAL; | |
265 | found: | |
ca466bae | 266 | vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); |
87de5057 | 267 | error = VOP_OPEN(vp, FREAD | FWRITE, cred, NULL); |
a11aaa81 | 268 | vn_unlock(vp); |
984263bc MD |
269 | if (error) |
270 | return (error); | |
271 | ||
e4c9c0c8 MD |
272 | /* |
273 | * v_rdev is not valid until after the VOP_OPEN() call. dev_psize() | |
274 | * must be supported if a character device has been specified. | |
275 | */ | |
276 | if (vp->v_type == VCHR) | |
277 | dev = vp->v_rdev; | |
278 | else | |
028066b1 | 279 | dev = NULL; |
e4c9c0c8 | 280 | |
e0fc5693 MD |
281 | if (nblks == 0 && dev != NULL) { |
282 | dpsize = dev_dpsize(dev); | |
283 | if (dpsize == -1) { | |
284 | VOP_CLOSE(vp, FREAD | FWRITE); | |
285 | return (ENXIO); | |
286 | } | |
79634a66 | 287 | nblks = (u_quad_t)dpsize; |
984263bc MD |
288 | } |
289 | if (nblks == 0) { | |
87de5057 | 290 | VOP_CLOSE(vp, FREAD | FWRITE); |
984263bc MD |
291 | return (ENXIO); |
292 | } | |
293 | ||
984263bc MD |
294 | /* |
295 | * nblks is in DEV_BSIZE'd chunks, convert to PAGE_SIZE'd chunks. | |
296 | * First chop nblks off to page-align it, then convert. | |
297 | * | |
298 | * sw->sw_nblks is in page-sized chunks now too. | |
299 | */ | |
79634a66 | 300 | nblks &= ~(u_quad_t)(ctodb(1) - 1); |
984263bc MD |
301 | nblks = dbtoc(nblks); |
302 | ||
79634a66 MD |
303 | /* |
304 | * Post-conversion nblks must not be >= BLIST_MAXBLKS, and | |
305 | * we impose a 4-swap-device limit so we have to divide it out | |
306 | * further. Going beyond this will result in overflows in the | |
307 | * blist code. | |
308 | * | |
309 | * Post-conversion nblks must fit within a (swblk_t), which | |
310 | * this test also ensures. | |
311 | */ | |
312 | if (nblks > BLIST_MAXBLKS / nswdev) { | |
313 | kprintf("exceeded maximum of %d blocks per swap unit\n", | |
314 | (int)BLIST_MAXBLKS / nswdev); | |
315 | VOP_CLOSE(vp, FREAD | FWRITE); | |
316 | return (ENXIO); | |
317 | } | |
318 | ||
984263bc MD |
319 | sp->sw_vp = vp; |
320 | sp->sw_dev = dev2udev(dev); | |
321 | sp->sw_device = dev; | |
322 | sp->sw_flags |= SW_FREED; | |
79634a66 | 323 | sp->sw_nblks = (swblk_t)nblks; |
984263bc MD |
324 | |
325 | /* | |
326 | * nblks, nswap, and dmmax are PAGE_SIZE'd parameters now, not | |
327 | * DEV_BSIZE'd. aligned_nblks is used to calculate the | |
328 | * size of the swap bitmap, taking into account the stripe size. | |
329 | */ | |
79634a66 | 330 | aligned_nblks = (swblk_t)((nblks + (dmmax - 1)) & ~(u_long)(dmmax - 1)); |
984263bc MD |
331 | |
332 | if (aligned_nblks * nswdev > nswap) | |
333 | nswap = aligned_nblks * nswdev; | |
334 | ||
335 | if (swapblist == NULL) | |
336 | swapblist = blist_create(nswap); | |
337 | else | |
338 | blist_resize(&swapblist, nswap, 0); | |
339 | ||
340 | for (dvbase = dmmax; dvbase < nblks; dvbase += dmmax) { | |
341 | blk = min(nblks - dvbase, dmmax); | |
342 | vsbase = index * dmmax + dvbase * nswdev; | |
343 | blist_free(swapblist, vsbase, blk); | |
344 | vm_swap_size += blk; | |
345 | } | |
c84c24da | 346 | swap_pager_newswap(); |
984263bc MD |
347 | |
348 | return (0); | |
349 | } |