| Commit | Line | Data |
|---|---|---|
| 984263bc MD |
1 | /* |
| 2 | * Copyright (c) 1988 University of Utah. | |
| 3 | * Copyright (c) 1990, 1993 | |
| 4 | * The Regents of the University of California. All rights reserved. | |
| 5 | * | |
| 6 | * This code is derived from software contributed to Berkeley by | |
| 7 | * the Systems Programming Group of the University of Utah Computer | |
| 8 | * Science Department. | |
| 9 | * | |
| 10 | * Redistribution and use in source and binary forms, with or without | |
| 11 | * modification, are permitted provided that the following conditions | |
| 12 | * are met: | |
| 13 | * 1. Redistributions of source code must retain the above copyright | |
| 14 | * notice, this list of conditions and the following disclaimer. | |
| 15 | * 2. Redistributions in binary form must reproduce the above copyright | |
| 16 | * notice, this list of conditions and the following disclaimer in the | |
| 17 | * documentation and/or other materials provided with the distribution. | |
| 18 | * 3. All advertising materials mentioning features or use of this software | |
| 19 | * must display the following acknowledgement: | |
| 20 | * This product includes software developed by the University of | |
| 21 | * California, Berkeley and its contributors. | |
| 22 | * 4. Neither the name of the University nor the names of its contributors | |
| 23 | * may be used to endorse or promote products derived from this software | |
| 24 | * without specific prior written permission. | |
| 25 | * | |
| 26 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
| 27 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 28 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 29 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
| 30 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 31 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
| 32 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 33 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
| 34 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
| 35 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 36 | * SUCH DAMAGE. | |
| 37 | * | |
| 38 | * from: Utah Hdr: vn.c 1.13 94/04/02 | |
| 39 | * | |
| 40 | * from: @(#)vn.c 8.6 (Berkeley) 4/1/94 | |
| 41 | * $FreeBSD: src/sys/dev/vn/vn.c,v 1.105.2.4 2001/11/18 07:11:00 dillon Exp $ | |
| 42 | */ | |
| 43 | ||
| 44 | /* | |
| 45 | * Vnode disk driver. | |
| 46 | * | |
| 47 | * Block/character interface to a vnode. Allows one to treat a file | |
| 48 | * as a disk (e.g. build a filesystem in it, mount it, etc.). | |
| 49 | * | |
| 54078292 | 50 | * NOTE 1: There is a security issue involved with this driver. |
| 984263bc MD |
51 | * Once mounted all access to the contents of the "mapped" file via |
| 52 | * the special file is controlled by the permissions on the special | |
| 53 | * file, the protection of the mapped file is ignored (effectively, | |
| 54 | * by using root credentials in all transactions). | |
| 55 | * | |
| 54078292 | 56 | * NOTE 2: Doesn't interact with leases, should it? |
| 984263bc MD |
57 | */ |
| 58 | ||
| 95db3aac | 59 | #include "use_vn.h" |
| 984263bc MD |
60 | #include <sys/param.h> |
| 61 | #include <sys/systm.h> | |
| 62 | #include <sys/kernel.h> | |
| 984263bc | 63 | #include <sys/proc.h> |
| 895c1f85 | 64 | #include <sys/priv.h> |
| fad57d0e | 65 | #include <sys/nlookup.h> |
| 984263bc MD |
66 | #include <sys/buf.h> |
| 67 | #include <sys/malloc.h> | |
| 68 | #include <sys/mount.h> | |
| 69 | #include <sys/vnode.h> | |
| 70 | #include <sys/fcntl.h> | |
| 71 | #include <sys/conf.h> | |
| 984263bc | 72 | #include <sys/diskslice.h> |
| 84f8b009 | 73 | #include <sys/disk.h> |
| 984263bc | 74 | #include <sys/stat.h> |
| 984263bc MD |
75 | #include <sys/module.h> |
| 76 | #include <sys/vnioctl.h> | |
| 77 | ||
| 78 | #include <vm/vm.h> | |
| 79 | #include <vm/vm_object.h> | |
| 80 | #include <vm/vm_page.h> | |
| 81 | #include <vm/vm_pager.h> | |
| 82 | #include <vm/vm_pageout.h> | |
| 83 | #include <vm/swap_pager.h> | |
| 84 | #include <vm/vm_extern.h> | |
| 85 | #include <vm/vm_zone.h> | |
| 2c1e28dd | 86 | #include <sys/devfs.h> |
| 984263bc MD |
87 | |
| 88 | static d_ioctl_t vnioctl; | |
| 89 | static d_open_t vnopen; | |
| 90 | static d_close_t vnclose; | |
| 91 | static d_psize_t vnsize; | |
| 92 | static d_strategy_t vnstrategy; | |
| 8be7edad | 93 | static d_clone_t vnclone; |
| cc80c90e | 94 | |
| 7c47ea01 | 95 | MALLOC_DEFINE(M_VN, "vn_softc", "vn driver structures"); |
| 8be7edad | 96 | DEVFS_DECLARE_CLONE_BITMAP(vn); |
| 95db3aac AH |
97 | |
| 98 | #if NVN <= 1 | |
| 8be7edad | 99 | #define VN_PREALLOCATED_UNITS 4 |
| 95db3aac AH |
100 | #else |
| 101 | #define VN_PREALLOCATED_UNITS NVN | |
| 102 | #endif | |
| 984263bc | 103 | |
| 984263bc MD |
104 | #define VN_BSIZE_BEST 8192 |
| 105 | ||
| 106 | /* | |
| fef8985e | 107 | * dev_ops |
| 984263bc | 108 | * D_DISK we want to look like a disk |
| 10f3fee5 | 109 | * D_CANFREE We support BUF_CMD_FREEBLKS |
| 984263bc MD |
110 | */ |
| 111 | ||
| fef8985e | 112 | static struct dev_ops vn_ops = { |
| 88abd8b5 | 113 | { "vn", 0, D_DISK | D_CANFREE }, |
| fef8985e MD |
114 | .d_open = vnopen, |
| 115 | .d_close = vnclose, | |
| 116 | .d_read = physread, | |
| 117 | .d_write = physwrite, | |
| 118 | .d_ioctl = vnioctl, | |
| 119 | .d_strategy = vnstrategy, | |
| 120 | .d_psize = vnsize | |
| 984263bc MD |
121 | }; |
| 122 | ||
| 984263bc MD |
123 | struct vn_softc { |
| 124 | int sc_unit; | |
| 125 | int sc_flags; /* flags */ | |
| e0fc5693 | 126 | u_int64_t sc_size; /* size of vn, sc_secsize scale */ |
| 984263bc | 127 | int sc_secsize; /* sector size */ |
| 8be7edad | 128 | struct disk sc_disk; |
| 984263bc MD |
129 | struct vnode *sc_vp; /* vnode if not NULL */ |
| 130 | vm_object_t sc_object; /* backing object if not NULL */ | |
| 131 | struct ucred *sc_cred; /* credentials */ | |
| 132 | int sc_maxactive; /* max # of active requests */ | |
| 133 | struct buf sc_tab; /* transfer queue */ | |
| 134 | u_long sc_options; /* options */ | |
| 13983537 | 135 | cdev_t sc_dev; /* devices that refer to this unit */ |
| 984263bc MD |
136 | SLIST_ENTRY(vn_softc) sc_list; |
| 137 | }; | |
| 138 | ||
| 139 | static SLIST_HEAD(, vn_softc) vn_list; | |
| 140 | ||
| 141 | /* sc_flags */ | |
| 142 | #define VNF_INITED 0x01 | |
| 143 | #define VNF_READONLY 0x02 | |
| 13983537 AH |
144 | #define VNF_OPENED 0x10 |
| 145 | #define VNF_DESTROY 0x20 | |
| 984263bc MD |
146 | |
| 147 | static u_long vn_options; | |
| 148 | ||
| 149 | #define IFOPT(vn,opt) if (((vn)->sc_options|vn_options) & (opt)) | |
| 150 | #define TESTOPT(vn,opt) (((vn)->sc_options|vn_options) & (opt)) | |
| 151 | ||
| 152 | static int vnsetcred (struct vn_softc *vn, struct ucred *cred); | |
| 153 | static void vnclear (struct vn_softc *vn); | |
| 30c1fde0 | 154 | static int vnget (cdev_t dev, struct vn_softc *vn , struct vn_user *vnu); |
| 984263bc | 155 | static int vn_modevent (module_t, int, void *); |
| b13267a5 MD |
156 | static int vniocattach_file (struct vn_softc *, struct vn_ioctl *, cdev_t dev, int flag, struct ucred *cred); |
| 157 | static int vniocattach_swap (struct vn_softc *, struct vn_ioctl *, cdev_t dev, int flag, struct ucred *cred); | |
| 13983537 | 158 | static cdev_t vn_create(int unit, struct devfs_bitmap *bitmap, int clone); |
| cc80c90e AH |
159 | |
| 160 | static int | |
| 161 | vnclone(struct dev_clone_args *ap) | |
| 162 | { | |
| 163 | int unit; | |
| 164 | ||
| 165 | unit = devfs_clone_bitmap_get(&DEVFS_CLONE_BITMAP(vn), 0); | |
| 13983537 | 166 | ap->a_dev = vn_create(unit, &DEVFS_CLONE_BITMAP(vn), 1); |
| cc80c90e AH |
167 | |
| 168 | return 0; | |
| 169 | } | |
| 984263bc MD |
170 | |
| 171 | static int | |
| fef8985e | 172 | vnclose(struct dev_close_args *ap) |
| 984263bc | 173 | { |
| 13983537 AH |
174 | cdev_t dev = ap->a_head.a_dev; |
| 175 | struct vn_softc *vn; | |
| 176 | ||
| 177 | vn = dev->si_drv1; | |
| 178 | KKASSERT(vn != NULL); | |
| 179 | ||
| 180 | vn->sc_flags &= ~VNF_OPENED; | |
| 181 | ||
| 182 | /* The disk has been detached and can now be safely destroyed */ | |
| 183 | if (vn->sc_flags & VNF_DESTROY) { | |
| 184 | KKASSERT(disk_getopencount(&vn->sc_disk) == 0); | |
| 185 | disk_destroy(&vn->sc_disk); | |
| 186 | devfs_clone_bitmap_put(&DEVFS_CLONE_BITMAP(vn), dkunit(dev)); | |
| 187 | SLIST_REMOVE(&vn_list, vn, vn_softc, sc_list); | |
| 7c47ea01 | 188 | kfree(vn, M_VN); |
| 13983537 | 189 | } |
| 984263bc MD |
190 | return (0); |
| 191 | } | |
| 192 | ||
| 8be7edad MD |
193 | static struct vn_softc * |
| 194 | vncreatevn(void) | |
| 195 | { | |
| 196 | struct vn_softc *vn; | |
| 197 | ||
| 7c47ea01 | 198 | vn = kmalloc(sizeof *vn, M_VN, M_WAITOK | M_ZERO); |
| 8be7edad MD |
199 | return vn; |
| 200 | } | |
| 201 | ||
| 202 | static void | |
| 203 | vninitvn(struct vn_softc *vn, cdev_t dev) | |
| 204 | { | |
| 205 | int unit; | |
| 206 | ||
| 207 | KKASSERT(vn != NULL); | |
| 208 | KKASSERT(dev != NULL); | |
| 209 | unit = dkunit(dev); | |
| 210 | ||
| 211 | vn->sc_unit = unit; | |
| 212 | dev->si_drv1 = vn; | |
| 13983537 | 213 | vn->sc_dev = dev; |
| 8be7edad | 214 | |
| 13983537 | 215 | SLIST_INSERT_HEAD(&vn_list, vn, sc_list); |
| 984263bc MD |
216 | } |
| 217 | ||
| 218 | static int | |
| fef8985e | 219 | vnopen(struct dev_open_args *ap) |
| 984263bc | 220 | { |
| b13267a5 | 221 | cdev_t dev = ap->a_head.a_dev; |
| 984263bc MD |
222 | struct vn_softc *vn; |
| 223 | ||
| 224 | /* | |
| 225 | * Locate preexisting device | |
| 226 | */ | |
| 227 | ||
| 13983537 AH |
228 | vn = dev->si_drv1; |
| 229 | KKASSERT(vn != NULL); | |
| 984263bc MD |
230 | |
| 231 | /* | |
| 232 | * Update si_bsize fields for device. This data will be overriden by | |
| 233 | * the slice/parition code for vn accesses through partitions, and | |
| 234 | * used directly if you open the 'whole disk' device. | |
| 235 | * | |
| 236 | * si_bsize_best must be reinitialized in case VN has been | |
| 237 | * reconfigured, plus make it at least VN_BSIZE_BEST for efficiency. | |
| 238 | */ | |
| 239 | dev->si_bsize_phys = vn->sc_secsize; | |
| 240 | dev->si_bsize_best = vn->sc_secsize; | |
| 241 | if (dev->si_bsize_best < VN_BSIZE_BEST) | |
| 242 | dev->si_bsize_best = VN_BSIZE_BEST; | |
| 243 | ||
| fef8985e | 244 | if ((ap->a_oflags & FWRITE) && (vn->sc_flags & VNF_READONLY)) |
| 984263bc MD |
245 | return (EACCES); |
| 246 | ||
| 247 | IFOPT(vn, VN_FOLLOW) | |
| e3869ec7 | 248 | kprintf("vnopen(%s, 0x%x, 0x%x)\n", |
| fef8985e | 249 | devtoname(dev), ap->a_oflags, ap->a_devtype); |
| 984263bc | 250 | |
| 13983537 | 251 | vn->sc_flags |= VNF_OPENED; |
| 984263bc MD |
252 | return(0); |
| 253 | } | |
| 254 | ||
| 255 | /* | |
| 256 | * vnstrategy: | |
| 257 | * | |
| 258 | * Run strategy routine for VN device. We use VOP_READ/VOP_WRITE calls | |
| 107e9bcc | 259 | * for vnode-backed vn's, and the swap_pager_strategy() call for |
| 984263bc | 260 | * vm_object-backed vn's. |
| 984263bc | 261 | */ |
| fef8985e MD |
262 | static int |
| 263 | vnstrategy(struct dev_strategy_args *ap) | |
| 984263bc | 264 | { |
| b13267a5 | 265 | cdev_t dev = ap->a_head.a_dev; |
| fef8985e | 266 | struct bio *bio = ap->a_bio; |
| 81b5c339 MD |
267 | struct buf *bp; |
| 268 | struct bio *nbio; | |
| 984263bc MD |
269 | int unit; |
| 270 | struct vn_softc *vn; | |
| 271 | int error; | |
| 272 | ||
| 81b5c339 | 273 | unit = dkunit(dev); |
| 13983537 AH |
274 | vn = dev->si_drv1; |
| 275 | KKASSERT(vn != NULL); | |
| 81b5c339 MD |
276 | |
| 277 | bp = bio->bio_buf; | |
| 984263bc MD |
278 | |
| 279 | IFOPT(vn, VN_DEBUG) | |
| e3869ec7 | 280 | kprintf("vnstrategy(%p): unit %d\n", bp, unit); |
| 984263bc MD |
281 | |
| 282 | if ((vn->sc_flags & VNF_INITED) == 0) { | |
| 283 | bp->b_error = ENXIO; | |
| 284 | bp->b_flags |= B_ERROR; | |
| 81b5c339 | 285 | biodone(bio); |
| fef8985e | 286 | return(0); |
| 984263bc MD |
287 | } |
| 288 | ||
| 289 | bp->b_resid = bp->b_bcount; | |
| 290 | ||
| 8be7edad MD |
291 | /* |
| 292 | * The vnode device is using disk/slice label support. | |
| 293 | * | |
| 294 | * The dscheck() function is called for validating the | |
| 295 | * slices that exist ON the vnode device itself, and | |
| 296 | * translate the "slice-relative" block number, again. | |
| 297 | * dscheck() will call biodone() and return NULL if | |
| 298 | * we are at EOF or beyond the device size. | |
| 299 | */ | |
| 984263bc | 300 | |
| 8be7edad | 301 | nbio = bio; |
| 984263bc | 302 | |
| 81b5c339 MD |
303 | /* |
| 304 | * Use the translated nbio from this point on | |
| 305 | */ | |
| 10f3fee5 | 306 | if (vn->sc_vp && bp->b_cmd == BUF_CMD_FREEBLKS) { |
| 984263bc | 307 | /* |
| 10f3fee5 | 308 | * Freeblks is not handled for vnode-backed elements yet. |
| 984263bc | 309 | */ |
| 9a71d53f | 310 | bp->b_resid = 0; |
| 4414f2c9 | 311 | /* operation complete */ |
| 984263bc MD |
312 | } else if (vn->sc_vp) { |
| 313 | /* | |
| 314 | * VNODE I/O | |
| 315 | * | |
| 316 | * If an error occurs, we set B_ERROR but we do not set | |
| 317 | * B_INVAL because (for a write anyway), the buffer is | |
| 318 | * still valid. | |
| 319 | */ | |
| 320 | struct uio auio; | |
| 321 | struct iovec aiov; | |
| 322 | ||
| 323 | bzero(&auio, sizeof(auio)); | |
| 324 | ||
| 325 | aiov.iov_base = bp->b_data; | |
| 326 | aiov.iov_len = bp->b_bcount; | |
| 327 | auio.uio_iov = &aiov; | |
| 328 | auio.uio_iovcnt = 1; | |
| 54078292 | 329 | auio.uio_offset = nbio->bio_offset; |
| 984263bc | 330 | auio.uio_segflg = UIO_SYSSPACE; |
| 10f3fee5 | 331 | if (bp->b_cmd == BUF_CMD_READ) |
| 984263bc MD |
332 | auio.uio_rw = UIO_READ; |
| 333 | else | |
| 334 | auio.uio_rw = UIO_WRITE; | |
| 335 | auio.uio_resid = bp->b_bcount; | |
| dadab5e9 | 336 | auio.uio_td = curthread; |
| b527c4c5 MD |
337 | |
| 338 | /* | |
| 339 | * Don't use IO_DIRECT here, it really gets in the way | |
| 340 | * due to typical blocksize differences between the | |
| 341 | * fs backing the VN device and whatever is running on | |
| 342 | * the VN device. | |
| 343 | */ | |
| 3bdcc4d9 VS |
344 | switch (bp->b_cmd) { |
| 345 | case (BUF_CMD_READ): | |
| b527c4c5 MD |
346 | vn_lock(vn->sc_vp, LK_SHARED | LK_RETRY); |
| 347 | error = VOP_READ(vn->sc_vp, &auio, IO_RECURSE, | |
| 348 | vn->sc_cred); | |
| 3bdcc4d9 VS |
349 | break; |
| 350 | ||
| 351 | case (BUF_CMD_WRITE): | |
| b527c4c5 MD |
352 | vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY); |
| 353 | error = VOP_WRITE(vn->sc_vp, &auio, IO_RECURSE, | |
| 354 | vn->sc_cred); | |
| 3bdcc4d9 VS |
355 | break; |
| 356 | ||
| 357 | case (BUF_CMD_FLUSH): | |
| 358 | auio.uio_resid = 0; | |
| 359 | vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY); | |
| 360 | error = VOP_FSYNC(vn->sc_vp, MNT_WAIT, 0); | |
| 361 | break; | |
| 362 | default: | |
| 363 | auio.uio_resid = 0; | |
| 364 | error = 0; | |
| 365 | break; | |
| b527c4c5 | 366 | } |
| a11aaa81 | 367 | vn_unlock(vn->sc_vp); |
| 984263bc | 368 | bp->b_resid = auio.uio_resid; |
| 984263bc MD |
369 | if (error) { |
| 370 | bp->b_error = error; | |
| 371 | bp->b_flags |= B_ERROR; | |
| 372 | } | |
| 4414f2c9 | 373 | /* operation complete */ |
| 984263bc MD |
374 | } else if (vn->sc_object) { |
| 375 | /* | |
| 4414f2c9 | 376 | * OBJT_SWAP I/O (handles read, write, freebuf) |
| 984263bc | 377 | * |
| 4414f2c9 MD |
378 | * We have nothing to do if freeing blocks on a reserved |
| 379 | * swap area, othrewise execute the op. | |
| 984263bc | 380 | */ |
| 10f3fee5 | 381 | if (bp->b_cmd == BUF_CMD_FREEBLKS && TESTOPT(vn, VN_RESERVE)) { |
| 4414f2c9 MD |
382 | bp->b_resid = 0; |
| 383 | /* operation complete */ | |
| 984263bc | 384 | } else { |
| 107e9bcc | 385 | swap_pager_strategy(vn->sc_object, nbio); |
| fef8985e | 386 | return(0); |
| 4414f2c9 | 387 | /* NOT REACHED */ |
| 984263bc MD |
388 | } |
| 389 | } else { | |
| 4414f2c9 MD |
390 | bp->b_resid = bp->b_bcount; |
| 391 | bp->b_flags |= B_ERROR | B_INVAL; | |
| 984263bc | 392 | bp->b_error = EINVAL; |
| 4414f2c9 | 393 | /* operation complete */ |
| 984263bc | 394 | } |
| 4414f2c9 | 395 | biodone(nbio); |
| fef8985e | 396 | return(0); |
| 984263bc MD |
397 | } |
| 398 | ||
| 399 | /* ARGSUSED */ | |
| 400 | static int | |
| fef8985e | 401 | vnioctl(struct dev_ioctl_args *ap) |
| 984263bc | 402 | { |
| b13267a5 | 403 | cdev_t dev = ap->a_head.a_dev; |
| 984263bc MD |
404 | struct vn_softc *vn; |
| 405 | struct vn_ioctl *vio; | |
| 406 | int error; | |
| 407 | u_long *f; | |
| 408 | ||
| 409 | vn = dev->si_drv1; | |
| fef8985e | 410 | IFOPT(vn,VN_FOLLOW) { |
| e3869ec7 | 411 | kprintf("vnioctl(%s, 0x%lx, %p, 0x%x): unit %d\n", |
| fef8985e | 412 | devtoname(dev), ap->a_cmd, ap->a_data, ap->a_fflag, |
| 984263bc | 413 | dkunit(dev)); |
| fef8985e | 414 | } |
| 984263bc | 415 | |
| fef8985e | 416 | switch (ap->a_cmd) { |
| 984263bc MD |
417 | case VNIOCATTACH: |
| 418 | case VNIOCDETACH: | |
| 419 | case VNIOCGSET: | |
| 420 | case VNIOCGCLEAR: | |
| 30c1fde0 | 421 | case VNIOCGET: |
| 984263bc MD |
422 | case VNIOCUSET: |
| 423 | case VNIOCUCLEAR: | |
| 424 | goto vn_specific; | |
| 425 | } | |
| 426 | ||
| 8be7edad MD |
427 | #if 0 |
| 428 | if (dkslice(dev) != WHOLE_DISK_SLICE || | |
| 429 | dkpart(dev) != WHOLE_SLICE_PART) | |
| 430 | return (ENOTTY); | |
| 431 | #endif | |
| 984263bc MD |
432 | |
| 433 | vn_specific: | |
| 434 | ||
| 895c1f85 | 435 | error = priv_check_cred(ap->a_cred, PRIV_ROOT, 0); |
| 984263bc MD |
436 | if (error) |
| 437 | return (error); | |
| 438 | ||
| fef8985e MD |
439 | vio = (struct vn_ioctl *)ap->a_data; |
| 440 | f = (u_long*)ap->a_data; | |
| 984263bc | 441 | |
| fef8985e | 442 | switch (ap->a_cmd) { |
| 984263bc MD |
443 | case VNIOCATTACH: |
| 444 | if (vn->sc_flags & VNF_INITED) | |
| 445 | return(EBUSY); | |
| 446 | ||
| 13983537 AH |
447 | if (vn->sc_flags & VNF_DESTROY) |
| 448 | return(ENXIO); | |
| 449 | ||
| 984263bc | 450 | if (vio->vn_file == NULL) |
| fef8985e | 451 | error = vniocattach_swap(vn, vio, dev, ap->a_fflag, ap->a_cred); |
| 984263bc | 452 | else |
| fef8985e | 453 | error = vniocattach_file(vn, vio, dev, ap->a_fflag, ap->a_cred); |
| 984263bc MD |
454 | break; |
| 455 | ||
| 456 | case VNIOCDETACH: | |
| 457 | if ((vn->sc_flags & VNF_INITED) == 0) | |
| 458 | return(ENXIO); | |
| 459 | /* | |
| 460 | * XXX handle i/o in progress. Return EBUSY, or wait, or | |
| 461 | * flush the i/o. | |
| 462 | * XXX handle multiple opens of the device. Return EBUSY, | |
| 463 | * or revoke the fd's. | |
| 464 | * How are these problems handled for removable and failing | |
| 465 | * hardware devices? (Hint: They are not) | |
| 466 | */ | |
| 13983537 | 467 | if ((disk_getopencount(&vn->sc_disk)) > 1) |
| c9f4065d SK |
468 | return (EBUSY); |
| 469 | ||
| 984263bc MD |
470 | vnclear(vn); |
| 471 | IFOPT(vn, VN_FOLLOW) | |
| e3869ec7 | 472 | kprintf("vnioctl: CLRed\n"); |
| cc80c90e AH |
473 | |
| 474 | if (dkunit(dev) >= VN_PREALLOCATED_UNITS) { | |
| 13983537 | 475 | vn->sc_flags |= VNF_DESTROY; |
| cc80c90e AH |
476 | } |
| 477 | ||
| 984263bc MD |
478 | break; |
| 479 | ||
| 30c1fde0 CT |
480 | case VNIOCGET: |
| 481 | error = vnget(dev, vn, (struct vn_user *) ap->a_data); | |
| 482 | break; | |
| 483 | ||
| 984263bc MD |
484 | case VNIOCGSET: |
| 485 | vn_options |= *f; | |
| 486 | *f = vn_options; | |
| 487 | break; | |
| 488 | ||
| 489 | case VNIOCGCLEAR: | |
| 490 | vn_options &= ~(*f); | |
| 491 | *f = vn_options; | |
| 492 | break; | |
| 493 | ||
| 494 | case VNIOCUSET: | |
| 495 | vn->sc_options |= *f; | |
| 496 | *f = vn->sc_options; | |
| 497 | break; | |
| 498 | ||
| 499 | case VNIOCUCLEAR: | |
| 500 | vn->sc_options &= ~(*f); | |
| 501 | *f = vn->sc_options; | |
| 502 | break; | |
| 503 | ||
| 504 | default: | |
| 505 | error = ENOTTY; | |
| 506 | break; | |
| 507 | } | |
| 508 | return(error); | |
| 509 | } | |
| 510 | ||
| 511 | /* | |
| 512 | * vniocattach_file: | |
| 513 | * | |
| 514 | * Attach a file to a VN partition. Return the size in the vn_size | |
| 515 | * field. | |
| 516 | */ | |
| 517 | ||
| 518 | static int | |
| b13267a5 | 519 | vniocattach_file(struct vn_softc *vn, struct vn_ioctl *vio, cdev_t dev, |
| fef8985e | 520 | int flag, struct ucred *cred) |
| 984263bc MD |
521 | { |
| 522 | struct vattr vattr; | |
| fad57d0e | 523 | struct nlookupdata nd; |
| 984263bc | 524 | int error, flags; |
| fad57d0e | 525 | struct vnode *vp; |
| 8be7edad | 526 | struct disk_info info; |
| 984263bc MD |
527 | |
| 528 | flags = FREAD|FWRITE; | |
| fad57d0e MD |
529 | error = nlookup_init(&nd, vio->vn_file, |
| 530 | UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP); | |
| 531 | if (error) | |
| 532 | return (error); | |
| 533 | if ((error = vn_open(&nd, NULL, flags, 0)) != 0) { | |
| 984263bc | 534 | if (error != EACCES && error != EPERM && error != EROFS) |
| fad57d0e | 535 | goto done; |
| 984263bc | 536 | flags &= ~FWRITE; |
| fad57d0e MD |
537 | nlookup_done(&nd); |
| 538 | error = nlookup_init(&nd, vio->vn_file, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP); | |
| 984263bc MD |
539 | if (error) |
| 540 | return (error); | |
| fad57d0e MD |
541 | if ((error = vn_open(&nd, NULL, flags, 0)) != 0) |
| 542 | goto done; | |
| 984263bc | 543 | } |
| fad57d0e MD |
544 | vp = nd.nl_open_vp; |
| 545 | if (vp->v_type != VREG || | |
| 87de5057 | 546 | (error = VOP_GETATTR(vp, &vattr))) { |
| fad57d0e MD |
547 | if (error == 0) |
| 548 | error = EINVAL; | |
| 549 | goto done; | |
| 984263bc | 550 | } |
| a11aaa81 | 551 | vn_unlock(vp); |
| 984263bc | 552 | vn->sc_secsize = DEV_BSIZE; |
| fad57d0e MD |
553 | vn->sc_vp = vp; |
| 554 | nd.nl_open_vp = NULL; | |
| 984263bc MD |
555 | |
| 556 | /* | |
| 557 | * If the size is specified, override the file attributes. Note that | |
| 558 | * the vn_size argument is in PAGE_SIZE sized blocks. | |
| 559 | */ | |
| 560 | if (vio->vn_size) | |
| e0fc5693 | 561 | vn->sc_size = vio->vn_size * PAGE_SIZE / vn->sc_secsize; |
| 984263bc MD |
562 | else |
| 563 | vn->sc_size = vattr.va_size / vn->sc_secsize; | |
| fef8985e | 564 | error = vnsetcred(vn, cred); |
| 984263bc | 565 | if (error) { |
| fad57d0e | 566 | vn->sc_vp = NULL; |
| 87de5057 | 567 | vn_close(vp, flags); |
| fad57d0e | 568 | goto done; |
| 984263bc MD |
569 | } |
| 570 | vn->sc_flags |= VNF_INITED; | |
| 571 | if (flags == FREAD) | |
| 572 | vn->sc_flags |= VNF_READONLY; | |
| 8be7edad MD |
573 | |
| 574 | /* | |
| 575 | * Set the disk info so that probing is triggered | |
| 576 | */ | |
| 577 | bzero(&info, sizeof(struct disk_info)); | |
| 578 | info.d_media_blksize = vn->sc_secsize; | |
| 579 | info.d_media_blocks = vn->sc_size; | |
| 580 | /* | |
| 581 | * reserve mbr sector for backwards compatibility | |
| 582 | * when no slices exist. | |
| 583 | */ | |
| 8d3475ef | 584 | info.d_dsflags = DSO_COMPATMBR | DSO_RAWPSIZE; |
| 8be7edad MD |
585 | info.d_secpertrack = 32; |
| 586 | info.d_nheads = 64 / (vn->sc_secsize / DEV_BSIZE); | |
| 587 | info.d_secpercyl = info.d_secpertrack * info.d_nheads; | |
| 588 | info.d_ncylinders = vn->sc_size / info.d_secpercyl; | |
| 589 | disk_setdiskinfo_sync(&vn->sc_disk, &info); | |
| 590 | ||
| 591 | error = dev_dopen(dev, flag, S_IFCHR, cred); | |
| 592 | if (error) | |
| 593 | vnclear(vn); | |
| 594 | ||
| 984263bc | 595 | IFOPT(vn, VN_FOLLOW) |
| e0fc5693 | 596 | kprintf("vnioctl: SET vp %p size %llx blks\n", |
| 665d7d25 | 597 | vn->sc_vp, (long long)vn->sc_size); |
| fad57d0e MD |
598 | done: |
| 599 | nlookup_done(&nd); | |
| 600 | return(error); | |
| 984263bc MD |
601 | } |
| 602 | ||
| 603 | /* | |
| 604 | * vniocattach_swap: | |
| 605 | * | |
| 606 | * Attach swap backing store to a VN partition of the size specified | |
| 607 | * in vn_size. | |
| 608 | */ | |
| 609 | ||
| 610 | static int | |
| b13267a5 | 611 | vniocattach_swap(struct vn_softc *vn, struct vn_ioctl *vio, cdev_t dev, |
| fef8985e | 612 | int flag, struct ucred *cred) |
| 984263bc MD |
613 | { |
| 614 | int error; | |
| 8be7edad | 615 | struct disk_info info; |
| 984263bc MD |
616 | |
| 617 | /* | |
| 618 | * Range check. Disallow negative sizes or any size less then the | |
| 619 | * size of a page. Then round to a page. | |
| 620 | */ | |
| 621 | ||
| 622 | if (vio->vn_size <= 0) | |
| 623 | return(EDOM); | |
| 624 | ||
| 625 | /* | |
| 626 | * Allocate an OBJT_SWAP object. | |
| 627 | * | |
| 628 | * sc_secsize is PAGE_SIZE'd | |
| 629 | * | |
| 630 | * vio->vn_size is in PAGE_SIZE'd chunks. | |
| 631 | * sc_size must be in PAGE_SIZE'd chunks. | |
| 632 | * Note the truncation. | |
| 633 | */ | |
| 634 | ||
| 635 | vn->sc_secsize = PAGE_SIZE; | |
| 636 | vn->sc_size = vio->vn_size; | |
| 5a648714 MD |
637 | vn->sc_object = swap_pager_alloc(NULL, |
| 638 | vn->sc_secsize * (off_t)vio->vn_size, | |
| 639 | VM_PROT_DEFAULT, 0); | |
| 984263bc MD |
640 | IFOPT(vn, VN_RESERVE) { |
| 641 | if (swap_pager_reserve(vn->sc_object, 0, vn->sc_size) < 0) { | |
| 642 | vm_pager_deallocate(vn->sc_object); | |
| 643 | vn->sc_object = NULL; | |
| 644 | return(EDOM); | |
| 645 | } | |
| 646 | } | |
| 647 | vn->sc_flags |= VNF_INITED; | |
| 648 | ||
| fef8985e | 649 | error = vnsetcred(vn, cred); |
| 984263bc | 650 | if (error == 0) { |
| 8be7edad MD |
651 | /* |
| 652 | * Set the disk info so that probing is triggered | |
| 653 | */ | |
| 654 | bzero(&info, sizeof(struct disk_info)); | |
| 655 | info.d_media_blksize = vn->sc_secsize; | |
| 656 | info.d_media_blocks = vn->sc_size; | |
| 657 | /* | |
| 658 | * reserve mbr sector for backwards compatibility | |
| 659 | * when no slices exist. | |
| 660 | */ | |
| 8d3475ef | 661 | info.d_dsflags = DSO_COMPATMBR | DSO_RAWPSIZE; |
| 8be7edad MD |
662 | info.d_secpertrack = 32; |
| 663 | info.d_nheads = 64 / (vn->sc_secsize / DEV_BSIZE); | |
| 664 | info.d_secpercyl = info.d_secpertrack * info.d_nheads; | |
| 665 | info.d_ncylinders = vn->sc_size / info.d_secpercyl; | |
| 666 | disk_setdiskinfo_sync(&vn->sc_disk, &info); | |
| 667 | ||
| 668 | error = dev_dopen(dev, flag, S_IFCHR, cred); | |
| 984263bc MD |
669 | } |
| 670 | if (error == 0) { | |
| 671 | IFOPT(vn, VN_FOLLOW) { | |
| e0fc5693 | 672 | kprintf("vnioctl: SET vp %p size %llx\n", |
| 665d7d25 | 673 | vn->sc_vp, (long long)vn->sc_size); |
| 984263bc MD |
674 | } |
| 675 | } | |
| 676 | if (error) | |
| 677 | vnclear(vn); | |
| 678 | return(error); | |
| 679 | } | |
| 680 | ||
| 681 | /* | |
| 682 | * Duplicate the current processes' credentials. Since we are called only | |
| 683 | * as the result of a SET ioctl and only root can do that, any future access | |
| 684 | * to this "disk" is essentially as root. Note that credentials may change | |
| 685 | * if some other uid can write directly to the mapped file (NFS). | |
| 686 | */ | |
| 687 | int | |
| 688 | vnsetcred(struct vn_softc *vn, struct ucred *cred) | |
| 689 | { | |
| 690 | char *tmpbuf; | |
| 691 | int error = 0; | |
| 692 | ||
| 693 | /* | |
| 694 | * Set credits in our softc | |
| 695 | */ | |
| 696 | ||
| 697 | if (vn->sc_cred) | |
| 698 | crfree(vn->sc_cred); | |
| 699 | vn->sc_cred = crdup(cred); | |
| 700 | ||
| 701 | /* | |
| 702 | * Horrible kludge to establish credentials for NFS XXX. | |
| 703 | */ | |
| 704 | ||
| 705 | if (vn->sc_vp) { | |
| 706 | struct uio auio; | |
| 707 | struct iovec aiov; | |
| 708 | ||
| efda3bd0 | 709 | tmpbuf = kmalloc(vn->sc_secsize, M_TEMP, M_WAITOK); |
| 984263bc MD |
710 | bzero(&auio, sizeof(auio)); |
| 711 | ||
| 712 | aiov.iov_base = tmpbuf; | |
| 713 | aiov.iov_len = vn->sc_secsize; | |
| 714 | auio.uio_iov = &aiov; | |
| 715 | auio.uio_iovcnt = 1; | |
| 716 | auio.uio_offset = 0; | |
| 717 | auio.uio_rw = UIO_READ; | |
| 718 | auio.uio_segflg = UIO_SYSSPACE; | |
| 719 | auio.uio_resid = aiov.iov_len; | |
| ca466bae | 720 | vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY); |
| 984263bc | 721 | error = VOP_READ(vn->sc_vp, &auio, 0, vn->sc_cred); |
| a11aaa81 | 722 | vn_unlock(vn->sc_vp); |
| efda3bd0 | 723 | kfree(tmpbuf, M_TEMP); |
| 984263bc MD |
724 | } |
| 725 | return (error); | |
| 726 | } | |
| 727 | ||
| 728 | void | |
| 729 | vnclear(struct vn_softc *vn) | |
| 730 | { | |
| 984263bc | 731 | IFOPT(vn, VN_FOLLOW) |
| e3869ec7 | 732 | kprintf("vnclear(%p): vp=%p\n", vn, vn->sc_vp); |
| 984263bc MD |
733 | vn->sc_flags &= ~VNF_INITED; |
| 734 | if (vn->sc_vp != NULL) { | |
| 87de5057 MD |
735 | vn_close(vn->sc_vp, |
| 736 | (vn->sc_flags & VNF_READONLY) ? FREAD : (FREAD|FWRITE)); | |
| 984263bc MD |
737 | vn->sc_vp = NULL; |
| 738 | } | |
| 739 | vn->sc_flags &= ~VNF_READONLY; | |
| 740 | if (vn->sc_cred) { | |
| 741 | crfree(vn->sc_cred); | |
| 742 | vn->sc_cred = NULL; | |
| 743 | } | |
| 744 | if (vn->sc_object != NULL) { | |
| 745 | vm_pager_deallocate(vn->sc_object); | |
| 746 | vn->sc_object = NULL; | |
| 747 | } | |
| 8be7edad MD |
748 | |
| 749 | disk_unprobe(&vn->sc_disk); | |
| 750 | ||
| 984263bc MD |
751 | vn->sc_size = 0; |
| 752 | } | |
| 753 | ||
| 30c1fde0 CT |
754 | /* |
| 755 | * vnget: | |
| 756 | * | |
| 757 | * populate a struct vn_user for the VNIOCGET ioctl. | |
| 758 | * interface conventions defined in sys/sys/vnioctl.h. | |
| 759 | */ | |
| 760 | ||
| 761 | static int | |
| 762 | vnget(cdev_t dev, struct vn_softc *vn, struct vn_user *vnu) | |
| 763 | { | |
| 764 | int error, found = 0; | |
| 765 | char *freepath, *fullpath; | |
| 766 | struct vattr vattr; | |
| 767 | ||
| 768 | if (vnu->vnu_unit == -1) { | |
| 769 | vnu->vnu_unit = dkunit(dev); | |
| 770 | } | |
| 771 | else if (vnu->vnu_unit < 0) | |
| 772 | return (EINVAL); | |
| 773 | ||
| 774 | SLIST_FOREACH(vn, &vn_list, sc_list) { | |
| 775 | ||
| 776 | if(vn->sc_unit != vnu->vnu_unit) | |
| 777 | continue; | |
| 778 | ||
| 779 | found = 1; | |
| 780 | ||
| 781 | if (vn->sc_flags & VNF_INITED && vn->sc_vp != NULL) { | |
| 782 | ||
| 783 | /* note: u_cred checked in vnioctl above */ | |
| 784 | error = VOP_GETATTR(vn->sc_vp, &vattr); | |
| 785 | if (error) { | |
| 786 | kprintf("vnget: VOP_GETATTR for %p failed\n", | |
| 787 | vn->sc_vp); | |
| 788 | return (error); | |
| 789 | } | |
| 790 | ||
| 791 | error = vn_fullpath(curproc, vn->sc_vp, | |
| 5b4cfb7e | 792 | &fullpath, &freepath, 0); |
| 30c1fde0 CT |
793 | |
| 794 | if (error) { | |
| 795 | kprintf("vnget: unable to resolve vp %p\n", | |
| 796 | vn->sc_vp); | |
| 797 | return(error); | |
| 798 | } | |
| 799 | ||
| 800 | strlcpy(vnu->vnu_file, fullpath, | |
| 801 | sizeof(vnu->vnu_file)); | |
| 802 | kfree(freepath, M_TEMP); | |
| 803 | vnu->vnu_dev = vattr.va_fsid; | |
| 804 | vnu->vnu_ino = vattr.va_fileid; | |
| 805 | ||
| 806 | } | |
| 807 | else if (vn->sc_flags & VNF_INITED && vn->sc_object != NULL){ | |
| 808 | ||
| 809 | strlcpy(vnu->vnu_file, _VN_USER_SWAP, | |
| 810 | sizeof(vnu->vnu_file)); | |
| 811 | vnu->vnu_size = vn->sc_size; | |
| 812 | vnu->vnu_secsize = vn->sc_secsize; | |
| 813 | ||
| 814 | } else { | |
| 815 | ||
| 816 | bzero(vnu->vnu_file, sizeof(vnu->vnu_file)); | |
| 817 | vnu->vnu_dev = 0; | |
| 818 | vnu->vnu_ino = 0; | |
| 819 | ||
| 820 | } | |
| 821 | break; | |
| 822 | } | |
| 823 | ||
| 824 | if (!found) | |
| 825 | return(ENXIO); | |
| 826 | ||
| 827 | return(0); | |
| 828 | } | |
| 829 | ||
| fef8985e MD |
830 | static int |
| 831 | vnsize(struct dev_psize_args *ap) | |
| 984263bc | 832 | { |
| b13267a5 | 833 | cdev_t dev = ap->a_head.a_dev; |
| 984263bc MD |
834 | struct vn_softc *vn; |
| 835 | ||
| 836 | vn = dev->si_drv1; | |
| 837 | if (!vn) | |
| fef8985e | 838 | return(ENXIO); |
| 984263bc | 839 | if ((vn->sc_flags & VNF_INITED) == 0) |
| fef8985e | 840 | return(ENXIO); |
| e0fc5693 | 841 | ap->a_result = (int64_t)vn->sc_size; |
| fef8985e | 842 | return(0); |
| 984263bc MD |
843 | } |
| 844 | ||
| cc80c90e | 845 | static cdev_t |
| 13983537 | 846 | vn_create(int unit, struct devfs_bitmap *bitmap, int clone) |
| 8be7edad | 847 | { |
| cc80c90e AH |
848 | struct vn_softc *vn; |
| 849 | struct disk_info info; | |
| 13983537 | 850 | cdev_t dev, ret_dev; |
| 8be7edad | 851 | |
| cc80c90e | 852 | vn = vncreatevn(); |
| 13983537 AH |
853 | if (clone) { |
| 854 | /* | |
| 855 | * For clone devices we need to return the top-level cdev, | |
| 856 | * not the raw dev we'd normally work with. | |
| 857 | */ | |
| 858 | dev = disk_create_clone(unit, &vn->sc_disk, &vn_ops); | |
| 859 | ret_dev = vn->sc_disk.d_cdev; | |
| 860 | } else { | |
| 861 | ret_dev = dev = disk_create(unit, &vn->sc_disk, &vn_ops); | |
| 862 | } | |
| cc80c90e | 863 | vninitvn(vn, dev); |
| 8be7edad | 864 | |
| cc80c90e AH |
865 | bzero(&info, sizeof(struct disk_info)); |
| 866 | info.d_media_blksize = 512; | |
| 867 | info.d_media_blocks = 0; | |
| 8d3475ef | 868 | info.d_dsflags = DSO_MBRQUIET | DSO_RAWPSIZE; |
| cc80c90e AH |
869 | info.d_secpertrack = 32; |
| 870 | info.d_nheads = 64; | |
| 871 | info.d_secpercyl = info.d_secpertrack * info.d_nheads; | |
| 872 | info.d_ncylinders = 0; | |
| 873 | disk_setdiskinfo_sync(&vn->sc_disk, &info); | |
| 874 | ||
| 875 | if (bitmap != NULL) | |
| 876 | devfs_clone_bitmap_set(bitmap, unit); | |
| 877 | ||
| 13983537 | 878 | return ret_dev; |
| 8be7edad | 879 | } |
| 8be7edad | 880 | |
| 984263bc MD |
881 | static int |
| 882 | vn_modevent(module_t mod, int type, void *data) | |
| 883 | { | |
| 884 | struct vn_softc *vn; | |
| cc80c90e | 885 | static cdev_t dev = NULL; |
| 8be7edad | 886 | int i; |
| 984263bc MD |
887 | |
| 888 | switch (type) { | |
| 889 | case MOD_LOAD: | |
| cc80c90e AH |
890 | dev = make_autoclone_dev(&vn_ops, &DEVFS_CLONE_BITMAP(vn), vnclone, UID_ROOT, |
| 891 | GID_OPERATOR, 0640, "vn"); | |
| 892 | ||
| 8be7edad | 893 | for (i = 0; i < VN_PREALLOCATED_UNITS; i++) { |
| 13983537 | 894 | vn_create(i, &DEVFS_CLONE_BITMAP(vn), 0); |
| 8be7edad | 895 | } |
| 984263bc | 896 | break; |
| 13983537 | 897 | |
| 984263bc | 898 | case MOD_UNLOAD: |
| 984263bc | 899 | case MOD_SHUTDOWN: |
| e2f51e29 | 900 | while ((vn = SLIST_FIRST(&vn_list)) != NULL) { |
| 13983537 AH |
901 | /* |
| 902 | * XXX: no idea if we can return EBUSY even in the | |
| 903 | * shutdown case, so err on the side of caution | |
| 904 | * and just rip stuff out on shutdown. | |
| 905 | */ | |
| 906 | if (type != MOD_SHUTDOWN) { | |
| 907 | if (vn->sc_flags & VNF_OPENED) | |
| 908 | return (EBUSY); | |
| 909 | } | |
| 910 | ||
| 911 | disk_destroy(&vn->sc_disk); | |
| 912 | ||
| 984263bc | 913 | SLIST_REMOVE_HEAD(&vn_list, sc_list); |
| 13983537 | 914 | |
| 984263bc MD |
915 | if (vn->sc_flags & VNF_INITED) |
| 916 | vnclear(vn); | |
| 13983537 | 917 | |
| 7c47ea01 | 918 | kfree(vn, M_VN); |
| 984263bc | 919 | } |
| 13983537 | 920 | destroy_autoclone_dev(dev, &DEVFS_CLONE_BITMAP(vn)); |
| cd29885a | 921 | dev_ops_remove_all(&vn_ops); |
| 984263bc MD |
922 | break; |
| 923 | default: | |
| 924 | break; | |
| 925 | } | |
| 926 | return 0; | |
| 927 | } | |
| 928 | ||
| 929 | DEV_MODULE(vn, vn_modevent, 0); |