| Commit | Line | Data |
|---|---|---|
| 984263bc MD |
1 | /* |
| 2 | * Copyright (c) 1988 University of Utah. | |
| 3 | * Copyright (c) 1990, 1993 | |
| 4 | * The Regents of the University of California. All rights reserved. | |
| 5 | * | |
| 6 | * This code is derived from software contributed to Berkeley by | |
| 7 | * the Systems Programming Group of the University of Utah Computer | |
| 8 | * Science Department. | |
| 9 | * | |
| 10 | * Redistribution and use in source and binary forms, with or without | |
| 11 | * modification, are permitted provided that the following conditions | |
| 12 | * are met: | |
| 13 | * 1. Redistributions of source code must retain the above copyright | |
| 14 | * notice, this list of conditions and the following disclaimer. | |
| 15 | * 2. Redistributions in binary form must reproduce the above copyright | |
| 16 | * notice, this list of conditions and the following disclaimer in the | |
| 17 | * documentation and/or other materials provided with the distribution. | |
| 18 | * 3. All advertising materials mentioning features or use of this software | |
| 19 | * must display the following acknowledgement: | |
| 20 | * This product includes software developed by the University of | |
| 21 | * California, Berkeley and its contributors. | |
| 22 | * 4. Neither the name of the University nor the names of its contributors | |
| 23 | * may be used to endorse or promote products derived from this software | |
| 24 | * without specific prior written permission. | |
| 25 | * | |
| 26 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
| 27 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 28 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 29 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
| 30 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 31 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
| 32 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 33 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
| 34 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
| 35 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 36 | * SUCH DAMAGE. | |
| 37 | * | |
| 38 | * from: Utah Hdr: vn.c 1.13 94/04/02 | |
| 39 | * | |
| 40 | * from: @(#)vn.c 8.6 (Berkeley) 4/1/94 | |
| 41 | * $FreeBSD: src/sys/dev/vn/vn.c,v 1.105.2.4 2001/11/18 07:11:00 dillon Exp $ | |
| 4ecf7cc9 | 42 | * $DragonFly: src/sys/dev/disk/vn/vn.c,v 1.38 2008/07/01 02:02:53 dillon Exp $ |
| 984263bc MD |
43 | */ |
| 44 | ||
| 45 | /* | |
| 46 | * Vnode disk driver. | |
| 47 | * | |
| 48 | * Block/character interface to a vnode. Allows one to treat a file | |
| 49 | * as a disk (e.g. build a filesystem in it, mount it, etc.). | |
| 50 | * | |
| 54078292 | 51 | * NOTE 1: There is a security issue involved with this driver. |
| 984263bc MD |
52 | * Once mounted all access to the contents of the "mapped" file via |
| 53 | * the special file is controlled by the permissions on the special | |
| 54 | * file, the protection of the mapped file is ignored (effectively, | |
| 55 | * by using root credentials in all transactions). | |
| 56 | * | |
| 54078292 | 57 | * NOTE 2: Doesn't interact with leases, should it? |
| 984263bc MD |
58 | */ |
| 59 | ||
| 60 | #include <sys/param.h> | |
| 61 | #include <sys/systm.h> | |
| 62 | #include <sys/kernel.h> | |
| 984263bc | 63 | #include <sys/proc.h> |
| fad57d0e | 64 | #include <sys/nlookup.h> |
| 984263bc MD |
65 | #include <sys/buf.h> |
| 66 | #include <sys/malloc.h> | |
| 67 | #include <sys/mount.h> | |
| 68 | #include <sys/vnode.h> | |
| 69 | #include <sys/fcntl.h> | |
| 70 | #include <sys/conf.h> | |
| 984263bc | 71 | #include <sys/diskslice.h> |
| 84f8b009 | 72 | #include <sys/disk.h> |
| 984263bc | 73 | #include <sys/stat.h> |
| 984263bc MD |
74 | #include <sys/module.h> |
| 75 | #include <sys/vnioctl.h> | |
| 76 | ||
| 77 | #include <vm/vm.h> | |
| 78 | #include <vm/vm_object.h> | |
| 79 | #include <vm/vm_page.h> | |
| 80 | #include <vm/vm_pager.h> | |
| 81 | #include <vm/vm_pageout.h> | |
| 82 | #include <vm/swap_pager.h> | |
| 83 | #include <vm/vm_extern.h> | |
| 84 | #include <vm/vm_zone.h> | |
| 85 | ||
| 86 | static d_ioctl_t vnioctl; | |
| 87 | static d_open_t vnopen; | |
| 88 | static d_close_t vnclose; | |
| 89 | static d_psize_t vnsize; | |
| 90 | static d_strategy_t vnstrategy; | |
| 91 | ||
| 92 | #define CDEV_MAJOR 43 | |
| 984263bc MD |
93 | |
| 94 | #define VN_BSIZE_BEST 8192 | |
| 95 | ||
| 96 | /* | |
| fef8985e | 97 | * dev_ops |
| 984263bc | 98 | * D_DISK we want to look like a disk |
| 10f3fee5 | 99 | * D_CANFREE We support BUF_CMD_FREEBLKS |
| 984263bc MD |
100 | */ |
| 101 | ||
| fef8985e MD |
102 | static struct dev_ops vn_ops = { |
| 103 | { "vn", CDEV_MAJOR, D_DISK | D_CANFREE }, | |
| 104 | .d_open = vnopen, | |
| 105 | .d_close = vnclose, | |
| 106 | .d_read = physread, | |
| 107 | .d_write = physwrite, | |
| 108 | .d_ioctl = vnioctl, | |
| 109 | .d_strategy = vnstrategy, | |
| 110 | .d_psize = vnsize | |
| 984263bc MD |
111 | }; |
| 112 | ||
| 984263bc MD |
113 | struct vn_softc { |
| 114 | int sc_unit; | |
| 115 | int sc_flags; /* flags */ | |
| e0fc5693 | 116 | u_int64_t sc_size; /* size of vn, sc_secsize scale */ |
| 984263bc | 117 | int sc_secsize; /* sector size */ |
| 84f8b009 MD |
118 | struct diskslices *sc_slices; /* XXX fields from struct disk */ |
| 119 | struct disk_info sc_info; /* XXX fields from struct disk */ | |
| 984263bc MD |
120 | struct vnode *sc_vp; /* vnode if not NULL */ |
| 121 | vm_object_t sc_object; /* backing object if not NULL */ | |
| 122 | struct ucred *sc_cred; /* credentials */ | |
| 123 | int sc_maxactive; /* max # of active requests */ | |
| 124 | struct buf sc_tab; /* transfer queue */ | |
| 125 | u_long sc_options; /* options */ | |
| b13267a5 | 126 | cdev_t sc_devlist; /* devices that refer to this unit */ |
| 984263bc MD |
127 | SLIST_ENTRY(vn_softc) sc_list; |
| 128 | }; | |
| 129 | ||
| 130 | static SLIST_HEAD(, vn_softc) vn_list; | |
| 131 | ||
| 132 | /* sc_flags */ | |
| 133 | #define VNF_INITED 0x01 | |
| 134 | #define VNF_READONLY 0x02 | |
| 135 | ||
| 136 | static u_long vn_options; | |
| 137 | ||
| 138 | #define IFOPT(vn,opt) if (((vn)->sc_options|vn_options) & (opt)) | |
| 139 | #define TESTOPT(vn,opt) (((vn)->sc_options|vn_options) & (opt)) | |
| 140 | ||
| 141 | static int vnsetcred (struct vn_softc *vn, struct ucred *cred); | |
| 142 | static void vnclear (struct vn_softc *vn); | |
| 30c1fde0 | 143 | static int vnget (cdev_t dev, struct vn_softc *vn , struct vn_user *vnu); |
| 984263bc | 144 | static int vn_modevent (module_t, int, void *); |
| b13267a5 MD |
145 | static int vniocattach_file (struct vn_softc *, struct vn_ioctl *, cdev_t dev, int flag, struct ucred *cred); |
| 146 | static int vniocattach_swap (struct vn_softc *, struct vn_ioctl *, cdev_t dev, int flag, struct ucred *cred); | |
| 984263bc MD |
147 | |
| 148 | static int | |
| fef8985e | 149 | vnclose(struct dev_close_args *ap) |
| 984263bc | 150 | { |
| b13267a5 | 151 | cdev_t dev = ap->a_head.a_dev; |
| 984263bc MD |
152 | struct vn_softc *vn = dev->si_drv1; |
| 153 | ||
| 154 | IFOPT(vn, VN_LABELS) | |
| 155 | if (vn->sc_slices != NULL) | |
| fef8985e | 156 | dsclose(dev, ap->a_devtype, vn->sc_slices); |
| 984263bc MD |
157 | return (0); |
| 158 | } | |
| 159 | ||
| e2f51e29 MD |
160 | /* |
| 161 | * Called only when si_drv1 is NULL. Locate the associated vn node and | |
| 162 | * attach the device to it. | |
| 163 | */ | |
| 984263bc | 164 | static struct vn_softc * |
| b13267a5 | 165 | vnfindvn(cdev_t dev) |
| 984263bc MD |
166 | { |
| 167 | int unit; | |
| 168 | struct vn_softc *vn; | |
| 169 | ||
| 170 | unit = dkunit(dev); | |
| e4c9c0c8 MD |
171 | SLIST_FOREACH(vn, &vn_list, sc_list) { |
| 172 | if (vn->sc_unit == unit) { | |
| e2f51e29 | 173 | dev->si_drv1 = vn; |
| e4c9c0c8 MD |
174 | dev->si_drv2 = vn->sc_devlist; |
| 175 | vn->sc_devlist = dev; | |
| 176 | reference_dev(dev); | |
| e4c9c0c8 | 177 | break; |
| 984263bc MD |
178 | } |
| 179 | } | |
| e4c9c0c8 | 180 | if (vn == NULL) { |
| efda3bd0 | 181 | vn = kmalloc(sizeof *vn, M_DEVBUF, M_WAITOK | M_ZERO); |
| 984263bc MD |
182 | vn->sc_unit = unit; |
| 183 | dev->si_drv1 = vn; | |
| fef8985e | 184 | vn->sc_devlist = make_dev(&vn_ops, 0, UID_ROOT, |
| e4c9c0c8 | 185 | GID_OPERATOR, 0640, "vn%d", unit); |
| e2f51e29 MD |
186 | if (vn->sc_devlist->si_drv1 == NULL) { |
| 187 | reference_dev(vn->sc_devlist); | |
| 188 | vn->sc_devlist->si_drv1 = vn; | |
| 189 | vn->sc_devlist->si_drv2 = NULL; | |
| 190 | } | |
| 984263bc | 191 | if (vn->sc_devlist != dev) { |
| e4c9c0c8 | 192 | dev->si_drv1 = vn; |
| 984263bc MD |
193 | dev->si_drv2 = vn->sc_devlist; |
| 194 | vn->sc_devlist = dev; | |
| e4c9c0c8 | 195 | reference_dev(dev); |
| 984263bc MD |
196 | } |
| 197 | SLIST_INSERT_HEAD(&vn_list, vn, sc_list); | |
| 198 | } | |
| 199 | return (vn); | |
| 200 | } | |
| 201 | ||
| 202 | static int | |
| fef8985e | 203 | vnopen(struct dev_open_args *ap) |
| 984263bc | 204 | { |
| b13267a5 | 205 | cdev_t dev = ap->a_head.a_dev; |
| 984263bc | 206 | struct vn_softc *vn; |
| 84f8b009 | 207 | struct disk_info *info; |
| 984263bc MD |
208 | |
| 209 | /* | |
| 210 | * Locate preexisting device | |
| 211 | */ | |
| 212 | ||
| 213 | if ((vn = dev->si_drv1) == NULL) | |
| 214 | vn = vnfindvn(dev); | |
| 215 | ||
| 216 | /* | |
| 217 | * Update si_bsize fields for device. This data will be overriden by | |
| 218 | * the slice/parition code for vn accesses through partitions, and | |
| 219 | * used directly if you open the 'whole disk' device. | |
| 220 | * | |
| 221 | * si_bsize_best must be reinitialized in case VN has been | |
| 222 | * reconfigured, plus make it at least VN_BSIZE_BEST for efficiency. | |
| 223 | */ | |
| 224 | dev->si_bsize_phys = vn->sc_secsize; | |
| 225 | dev->si_bsize_best = vn->sc_secsize; | |
| 226 | if (dev->si_bsize_best < VN_BSIZE_BEST) | |
| 227 | dev->si_bsize_best = VN_BSIZE_BEST; | |
| 228 | ||
| fef8985e | 229 | if ((ap->a_oflags & FWRITE) && (vn->sc_flags & VNF_READONLY)) |
| 984263bc MD |
230 | return (EACCES); |
| 231 | ||
| 232 | IFOPT(vn, VN_FOLLOW) | |
| e3869ec7 | 233 | kprintf("vnopen(%s, 0x%x, 0x%x)\n", |
| fef8985e | 234 | devtoname(dev), ap->a_oflags, ap->a_devtype); |
| 984263bc MD |
235 | |
| 236 | /* | |
| 237 | * Initialize label | |
| 238 | */ | |
| 239 | ||
| 240 | IFOPT(vn, VN_LABELS) { | |
| 241 | if (vn->sc_flags & VNF_INITED) { | |
| 84f8b009 MD |
242 | info = &vn->sc_info; |
| 243 | bzero(info, sizeof(*info)); | |
| 244 | info->d_media_blksize = vn->sc_secsize; | |
| 245 | info->d_media_blocks = vn->sc_size; | |
| 7dc62e37 MD |
246 | /* |
| 247 | * reserve mbr sector for backwards compatibility | |
| 248 | * when no slices exist. | |
| 249 | */ | |
| 250 | info->d_dsflags = DSO_COMPATMBR; | |
| 84f8b009 MD |
251 | |
| 252 | info->d_secpertrack = 32; | |
| 253 | info->d_nheads = 64 / (vn->sc_secsize / DEV_BSIZE); | |
| 254 | info->d_secpercyl = info->d_secpertrack * | |
| 255 | info->d_nheads; | |
| 256 | info->d_ncylinders = vn->sc_size / info->d_secpercyl; | |
| 257 | ||
| 258 | return (dsopen(dev, ap->a_devtype, 0, | |
| 259 | &vn->sc_slices, info)); | |
| 984263bc MD |
260 | } |
| 261 | if (dkslice(dev) != WHOLE_DISK_SLICE || | |
| 3127bf20 | 262 | dkpart(dev) != WHOLE_SLICE_PART || |
| fef8985e | 263 | ap->a_devtype != S_IFCHR) { |
| 984263bc MD |
264 | return (ENXIO); |
| 265 | } | |
| 266 | } | |
| 267 | return(0); | |
| 268 | } | |
| 269 | ||
| 270 | /* | |
| 271 | * vnstrategy: | |
| 272 | * | |
| 273 | * Run strategy routine for VN device. We use VOP_READ/VOP_WRITE calls | |
| 274 | * for vnode-backed vn's, and the new vm_pager_strategy() call for | |
| 275 | * vm_object-backed vn's. | |
| 276 | * | |
| 277 | * Currently B_ASYNC is only partially handled - for OBJT_SWAP I/O only. | |
| 984263bc | 278 | */ |
| fef8985e MD |
279 | static int |
| 280 | vnstrategy(struct dev_strategy_args *ap) | |
| 984263bc | 281 | { |
| b13267a5 | 282 | cdev_t dev = ap->a_head.a_dev; |
| fef8985e | 283 | struct bio *bio = ap->a_bio; |
| 81b5c339 MD |
284 | struct buf *bp; |
| 285 | struct bio *nbio; | |
| 984263bc MD |
286 | int unit; |
| 287 | struct vn_softc *vn; | |
| 288 | int error; | |
| 289 | ||
| 81b5c339 MD |
290 | unit = dkunit(dev); |
| 291 | if ((vn = dev->si_drv1) == NULL) | |
| 292 | vn = vnfindvn(dev); | |
| 293 | ||
| 294 | bp = bio->bio_buf; | |
| 984263bc MD |
295 | |
| 296 | IFOPT(vn, VN_DEBUG) | |
| e3869ec7 | 297 | kprintf("vnstrategy(%p): unit %d\n", bp, unit); |
| 984263bc MD |
298 | |
| 299 | if ((vn->sc_flags & VNF_INITED) == 0) { | |
| 300 | bp->b_error = ENXIO; | |
| 301 | bp->b_flags |= B_ERROR; | |
| 81b5c339 | 302 | biodone(bio); |
| fef8985e | 303 | return(0); |
| 984263bc MD |
304 | } |
| 305 | ||
| 306 | bp->b_resid = bp->b_bcount; | |
| 307 | ||
| 308 | IFOPT(vn, VN_LABELS) { | |
| 6f76c57e HP |
309 | /* |
| 310 | * The vnode device is using disk/slice label support. | |
| 311 | * | |
| 312 | * The dscheck() function is called for validating the | |
| 313 | * slices that exist ON the vnode device itself, and | |
| 314 | * translate the "slice-relative" block number, again. | |
| 9a71d53f MD |
315 | * dscheck() will call biodone() and return NULL if |
| 316 | * we are at EOF or beyond the device size. | |
| 6f76c57e | 317 | */ |
| 81b5c339 MD |
318 | if (vn->sc_slices == NULL) { |
| 319 | nbio = bio; | |
| 320 | } else if ((nbio = dscheck(dev, bio, vn->sc_slices)) == NULL) { | |
| 4414f2c9 | 321 | goto done; |
| 984263bc MD |
322 | } |
| 323 | } else { | |
| e0fc5693 | 324 | int64_t pbn; /* in sc_secsize chunks */ |
| 984263bc MD |
325 | long sz; /* in sc_secsize chunks */ |
| 326 | ||
| 327 | /* | |
| 328 | * Check for required alignment. Transfers must be a valid | |
| 329 | * multiple of the sector size. | |
| 330 | */ | |
| 331 | if (bp->b_bcount % vn->sc_secsize != 0 || | |
| 54078292 | 332 | bio->bio_offset % vn->sc_secsize != 0) { |
| 4414f2c9 | 333 | goto bad; |
| 984263bc MD |
334 | } |
| 335 | ||
| 54078292 | 336 | pbn = bio->bio_offset / vn->sc_secsize; |
| 984263bc MD |
337 | sz = howmany(bp->b_bcount, vn->sc_secsize); |
| 338 | ||
| 339 | /* | |
| 4414f2c9 | 340 | * Check for an illegal pbn or EOF truncation |
| 984263bc | 341 | */ |
| 4414f2c9 MD |
342 | if (pbn < 0) |
| 343 | goto bad; | |
| 984263bc | 344 | if (pbn + sz > vn->sc_size) { |
| 4414f2c9 MD |
345 | if (pbn > vn->sc_size || (bp->b_flags & B_BNOCLIP)) |
| 346 | goto bad; | |
| 347 | if (pbn == vn->sc_size) { | |
| 348 | bp->b_resid = bp->b_bcount; | |
| 349 | bp->b_flags |= B_INVAL; | |
| 350 | goto done; | |
| 351 | } | |
| 984263bc | 352 | bp->b_bcount = (vn->sc_size - pbn) * vn->sc_secsize; |
| 984263bc | 353 | } |
| 81b5c339 | 354 | nbio = push_bio(bio); |
| e0fc5693 | 355 | nbio->bio_offset = pbn * vn->sc_secsize; |
| 984263bc MD |
356 | } |
| 357 | ||
| 81b5c339 MD |
358 | /* |
| 359 | * Use the translated nbio from this point on | |
| 360 | */ | |
| 10f3fee5 | 361 | if (vn->sc_vp && bp->b_cmd == BUF_CMD_FREEBLKS) { |
| 984263bc | 362 | /* |
| 10f3fee5 | 363 | * Freeblks is not handled for vnode-backed elements yet. |
| 984263bc | 364 | */ |
| 9a71d53f | 365 | bp->b_resid = 0; |
| 4414f2c9 | 366 | /* operation complete */ |
| 984263bc MD |
367 | } else if (vn->sc_vp) { |
| 368 | /* | |
| 369 | * VNODE I/O | |
| 370 | * | |
| 371 | * If an error occurs, we set B_ERROR but we do not set | |
| 372 | * B_INVAL because (for a write anyway), the buffer is | |
| 373 | * still valid. | |
| 374 | */ | |
| 375 | struct uio auio; | |
| 376 | struct iovec aiov; | |
| 377 | ||
| 378 | bzero(&auio, sizeof(auio)); | |
| 379 | ||
| 380 | aiov.iov_base = bp->b_data; | |
| 381 | aiov.iov_len = bp->b_bcount; | |
| 382 | auio.uio_iov = &aiov; | |
| 383 | auio.uio_iovcnt = 1; | |
| 54078292 | 384 | auio.uio_offset = nbio->bio_offset; |
| 984263bc | 385 | auio.uio_segflg = UIO_SYSSPACE; |
| 10f3fee5 | 386 | if (bp->b_cmd == BUF_CMD_READ) |
| 984263bc MD |
387 | auio.uio_rw = UIO_READ; |
| 388 | else | |
| 389 | auio.uio_rw = UIO_WRITE; | |
| 390 | auio.uio_resid = bp->b_bcount; | |
| dadab5e9 | 391 | auio.uio_td = curthread; |
| ca466bae | 392 | vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY); |
| 10f3fee5 | 393 | if (bp->b_cmd == BUF_CMD_READ) |
| cb63d1bc | 394 | error = VOP_READ(vn->sc_vp, &auio, IO_DIRECT | IO_RECURSE, vn->sc_cred); |
| 984263bc | 395 | else |
| cb63d1bc | 396 | error = VOP_WRITE(vn->sc_vp, &auio, IO_DIRECT | IO_RECURSE, vn->sc_cred); |
| a11aaa81 | 397 | vn_unlock(vn->sc_vp); |
| 984263bc | 398 | bp->b_resid = auio.uio_resid; |
| 984263bc MD |
399 | if (error) { |
| 400 | bp->b_error = error; | |
| 401 | bp->b_flags |= B_ERROR; | |
| 402 | } | |
| 4414f2c9 | 403 | /* operation complete */ |
| 984263bc MD |
404 | } else if (vn->sc_object) { |
| 405 | /* | |
| 4414f2c9 | 406 | * OBJT_SWAP I/O (handles read, write, freebuf) |
| 984263bc | 407 | * |
| 4414f2c9 MD |
408 | * We have nothing to do if freeing blocks on a reserved |
| 409 | * swap area, othrewise execute the op. | |
| 984263bc | 410 | */ |
| 10f3fee5 | 411 | if (bp->b_cmd == BUF_CMD_FREEBLKS && TESTOPT(vn, VN_RESERVE)) { |
| 4414f2c9 MD |
412 | bp->b_resid = 0; |
| 413 | /* operation complete */ | |
| 984263bc | 414 | } else { |
| 81b5c339 | 415 | vm_pager_strategy(vn->sc_object, nbio); |
| fef8985e | 416 | return(0); |
| 4414f2c9 | 417 | /* NOT REACHED */ |
| 984263bc MD |
418 | } |
| 419 | } else { | |
| 4414f2c9 MD |
420 | bp->b_resid = bp->b_bcount; |
| 421 | bp->b_flags |= B_ERROR | B_INVAL; | |
| 984263bc | 422 | bp->b_error = EINVAL; |
| 4414f2c9 | 423 | /* operation complete */ |
| 984263bc | 424 | } |
| 4414f2c9 | 425 | biodone(nbio); |
| fef8985e | 426 | return(0); |
| 4414f2c9 MD |
427 | |
| 428 | /* | |
| 429 | * Shortcuts / check failures on the original bio (not nbio). | |
| 430 | */ | |
| 431 | bad: | |
| 432 | bp->b_error = EINVAL; | |
| 4414f2c9 MD |
433 | bp->b_flags |= B_ERROR | B_INVAL; |
| 434 | done: | |
| 435 | biodone(bio); | |
| fef8985e | 436 | return(0); |
| 984263bc MD |
437 | } |
| 438 | ||
| 439 | /* ARGSUSED */ | |
| 440 | static int | |
| fef8985e | 441 | vnioctl(struct dev_ioctl_args *ap) |
| 984263bc | 442 | { |
| b13267a5 | 443 | cdev_t dev = ap->a_head.a_dev; |
| 984263bc MD |
444 | struct vn_softc *vn; |
| 445 | struct vn_ioctl *vio; | |
| 446 | int error; | |
| 447 | u_long *f; | |
| 448 | ||
| 449 | vn = dev->si_drv1; | |
| fef8985e | 450 | IFOPT(vn,VN_FOLLOW) { |
| e3869ec7 | 451 | kprintf("vnioctl(%s, 0x%lx, %p, 0x%x): unit %d\n", |
| fef8985e | 452 | devtoname(dev), ap->a_cmd, ap->a_data, ap->a_fflag, |
| 984263bc | 453 | dkunit(dev)); |
| fef8985e | 454 | } |
| 984263bc | 455 | |
| fef8985e | 456 | switch (ap->a_cmd) { |
| 984263bc MD |
457 | case VNIOCATTACH: |
| 458 | case VNIOCDETACH: | |
| 459 | case VNIOCGSET: | |
| 460 | case VNIOCGCLEAR: | |
| 30c1fde0 | 461 | case VNIOCGET: |
| 984263bc MD |
462 | case VNIOCUSET: |
| 463 | case VNIOCUCLEAR: | |
| 464 | goto vn_specific; | |
| 465 | } | |
| 466 | ||
| 467 | IFOPT(vn,VN_LABELS) { | |
| 468 | if (vn->sc_slices != NULL) { | |
| fef8985e | 469 | error = dsioctl(dev, ap->a_cmd, ap->a_data, |
| 84f8b009 MD |
470 | ap->a_fflag, |
| 471 | &vn->sc_slices, &vn->sc_info); | |
| 984263bc MD |
472 | if (error != ENOIOCTL) |
| 473 | return (error); | |
| 474 | } | |
| 475 | if (dkslice(dev) != WHOLE_DISK_SLICE || | |
| 3127bf20 | 476 | dkpart(dev) != WHOLE_SLICE_PART) |
| 984263bc MD |
477 | return (ENOTTY); |
| 478 | } | |
| 479 | ||
| 480 | vn_specific: | |
| 481 | ||
| fef8985e | 482 | error = suser_cred(ap->a_cred, 0); |
| 984263bc MD |
483 | if (error) |
| 484 | return (error); | |
| 485 | ||
| fef8985e MD |
486 | vio = (struct vn_ioctl *)ap->a_data; |
| 487 | f = (u_long*)ap->a_data; | |
| 984263bc | 488 | |
| fef8985e | 489 | switch (ap->a_cmd) { |
| 984263bc MD |
490 | case VNIOCATTACH: |
| 491 | if (vn->sc_flags & VNF_INITED) | |
| 492 | return(EBUSY); | |
| 493 | ||
| 494 | if (vio->vn_file == NULL) | |
| fef8985e | 495 | error = vniocattach_swap(vn, vio, dev, ap->a_fflag, ap->a_cred); |
| 984263bc | 496 | else |
| fef8985e | 497 | error = vniocattach_file(vn, vio, dev, ap->a_fflag, ap->a_cred); |
| 984263bc MD |
498 | break; |
| 499 | ||
| 500 | case VNIOCDETACH: | |
| 501 | if ((vn->sc_flags & VNF_INITED) == 0) | |
| 502 | return(ENXIO); | |
| 503 | /* | |
| 504 | * XXX handle i/o in progress. Return EBUSY, or wait, or | |
| 505 | * flush the i/o. | |
| 506 | * XXX handle multiple opens of the device. Return EBUSY, | |
| 507 | * or revoke the fd's. | |
| 508 | * How are these problems handled for removable and failing | |
| 509 | * hardware devices? (Hint: They are not) | |
| 510 | */ | |
| 511 | vnclear(vn); | |
| 512 | IFOPT(vn, VN_FOLLOW) | |
| e3869ec7 | 513 | kprintf("vnioctl: CLRed\n"); |
| 984263bc MD |
514 | break; |
| 515 | ||
| 30c1fde0 CT |
516 | case VNIOCGET: |
| 517 | error = vnget(dev, vn, (struct vn_user *) ap->a_data); | |
| 518 | break; | |
| 519 | ||
| 984263bc MD |
520 | case VNIOCGSET: |
| 521 | vn_options |= *f; | |
| 522 | *f = vn_options; | |
| 523 | break; | |
| 524 | ||
| 525 | case VNIOCGCLEAR: | |
| 526 | vn_options &= ~(*f); | |
| 527 | *f = vn_options; | |
| 528 | break; | |
| 529 | ||
| 530 | case VNIOCUSET: | |
| 531 | vn->sc_options |= *f; | |
| 532 | *f = vn->sc_options; | |
| 533 | break; | |
| 534 | ||
| 535 | case VNIOCUCLEAR: | |
| 536 | vn->sc_options &= ~(*f); | |
| 537 | *f = vn->sc_options; | |
| 538 | break; | |
| 539 | ||
| 540 | default: | |
| 541 | error = ENOTTY; | |
| 542 | break; | |
| 543 | } | |
| 544 | return(error); | |
| 545 | } | |
| 546 | ||
| 547 | /* | |
| 548 | * vniocattach_file: | |
| 549 | * | |
| 550 | * Attach a file to a VN partition. Return the size in the vn_size | |
| 551 | * field. | |
| 552 | */ | |
| 553 | ||
| 554 | static int | |
| b13267a5 | 555 | vniocattach_file(struct vn_softc *vn, struct vn_ioctl *vio, cdev_t dev, |
| fef8985e | 556 | int flag, struct ucred *cred) |
| 984263bc MD |
557 | { |
| 558 | struct vattr vattr; | |
| fad57d0e | 559 | struct nlookupdata nd; |
| 984263bc | 560 | int error, flags; |
| fad57d0e | 561 | struct vnode *vp; |
| 984263bc MD |
562 | |
| 563 | flags = FREAD|FWRITE; | |
| fad57d0e MD |
564 | error = nlookup_init(&nd, vio->vn_file, |
| 565 | UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP); | |
| 566 | if (error) | |
| 567 | return (error); | |
| 568 | if ((error = vn_open(&nd, NULL, flags, 0)) != 0) { | |
| 984263bc | 569 | if (error != EACCES && error != EPERM && error != EROFS) |
| fad57d0e | 570 | goto done; |
| 984263bc | 571 | flags &= ~FWRITE; |
| fad57d0e MD |
572 | nlookup_done(&nd); |
| 573 | error = nlookup_init(&nd, vio->vn_file, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP); | |
| 984263bc MD |
574 | if (error) |
| 575 | return (error); | |
| fad57d0e MD |
576 | if ((error = vn_open(&nd, NULL, flags, 0)) != 0) |
| 577 | goto done; | |
| 984263bc | 578 | } |
| fad57d0e MD |
579 | vp = nd.nl_open_vp; |
| 580 | if (vp->v_type != VREG || | |
| 87de5057 | 581 | (error = VOP_GETATTR(vp, &vattr))) { |
| fad57d0e MD |
582 | if (error == 0) |
| 583 | error = EINVAL; | |
| 584 | goto done; | |
| 984263bc | 585 | } |
| a11aaa81 | 586 | vn_unlock(vp); |
| 984263bc | 587 | vn->sc_secsize = DEV_BSIZE; |
| fad57d0e MD |
588 | vn->sc_vp = vp; |
| 589 | nd.nl_open_vp = NULL; | |
| 984263bc MD |
590 | |
| 591 | /* | |
| 592 | * If the size is specified, override the file attributes. Note that | |
| 593 | * the vn_size argument is in PAGE_SIZE sized blocks. | |
| 594 | */ | |
| 595 | if (vio->vn_size) | |
| e0fc5693 | 596 | vn->sc_size = vio->vn_size * PAGE_SIZE / vn->sc_secsize; |
| 984263bc MD |
597 | else |
| 598 | vn->sc_size = vattr.va_size / vn->sc_secsize; | |
| fef8985e | 599 | error = vnsetcred(vn, cred); |
| 984263bc | 600 | if (error) { |
| fad57d0e | 601 | vn->sc_vp = NULL; |
| 87de5057 | 602 | vn_close(vp, flags); |
| fad57d0e | 603 | goto done; |
| 984263bc MD |
604 | } |
| 605 | vn->sc_flags |= VNF_INITED; | |
| 606 | if (flags == FREAD) | |
| 607 | vn->sc_flags |= VNF_READONLY; | |
| 608 | IFOPT(vn, VN_LABELS) { | |
| 609 | /* | |
| 610 | * Reopen so that `ds' knows which devices are open. | |
| 611 | * If this is the first VNIOCSET, then we've | |
| 612 | * guaranteed that the device is the cdev and that | |
| 613 | * no other slices or labels are open. Otherwise, | |
| 614 | * we rely on VNIOCCLR not being abused. | |
| 615 | */ | |
| fef8985e | 616 | error = dev_dopen(dev, flag, S_IFCHR, cred); |
| 984263bc MD |
617 | if (error) |
| 618 | vnclear(vn); | |
| 619 | } | |
| 620 | IFOPT(vn, VN_FOLLOW) | |
| e0fc5693 | 621 | kprintf("vnioctl: SET vp %p size %llx blks\n", |
| 984263bc | 622 | vn->sc_vp, vn->sc_size); |
| fad57d0e MD |
623 | done: |
| 624 | nlookup_done(&nd); | |
| 625 | return(error); | |
| 984263bc MD |
626 | } |
| 627 | ||
| 628 | /* | |
| 629 | * vniocattach_swap: | |
| 630 | * | |
| 631 | * Attach swap backing store to a VN partition of the size specified | |
| 632 | * in vn_size. | |
| 633 | */ | |
| 634 | ||
| 635 | static int | |
| b13267a5 | 636 | vniocattach_swap(struct vn_softc *vn, struct vn_ioctl *vio, cdev_t dev, |
| fef8985e | 637 | int flag, struct ucred *cred) |
| 984263bc MD |
638 | { |
| 639 | int error; | |
| 640 | ||
| 641 | /* | |
| 642 | * Range check. Disallow negative sizes or any size less then the | |
| 643 | * size of a page. Then round to a page. | |
| 644 | */ | |
| 645 | ||
| 646 | if (vio->vn_size <= 0) | |
| 647 | return(EDOM); | |
| 648 | ||
| 649 | /* | |
| 650 | * Allocate an OBJT_SWAP object. | |
| 651 | * | |
| 652 | * sc_secsize is PAGE_SIZE'd | |
| 653 | * | |
| 654 | * vio->vn_size is in PAGE_SIZE'd chunks. | |
| 655 | * sc_size must be in PAGE_SIZE'd chunks. | |
| 656 | * Note the truncation. | |
| 657 | */ | |
| 658 | ||
| 659 | vn->sc_secsize = PAGE_SIZE; | |
| 660 | vn->sc_size = vio->vn_size; | |
| 57f7b636 MD |
661 | vn->sc_object = vm_pager_allocate(OBJT_SWAP, NULL, |
| 662 | vn->sc_secsize * (off_t)vio->vn_size, | |
| 663 | VM_PROT_DEFAULT, 0); | |
| 984263bc MD |
664 | IFOPT(vn, VN_RESERVE) { |
| 665 | if (swap_pager_reserve(vn->sc_object, 0, vn->sc_size) < 0) { | |
| 666 | vm_pager_deallocate(vn->sc_object); | |
| 667 | vn->sc_object = NULL; | |
| 668 | return(EDOM); | |
| 669 | } | |
| 670 | } | |
| 671 | vn->sc_flags |= VNF_INITED; | |
| 672 | ||
| fef8985e | 673 | error = vnsetcred(vn, cred); |
| 984263bc MD |
674 | if (error == 0) { |
| 675 | IFOPT(vn, VN_LABELS) { | |
| 676 | /* | |
| 677 | * Reopen so that `ds' knows which devices are open. | |
| 678 | * If this is the first VNIOCSET, then we've | |
| 679 | * guaranteed that the device is the cdev and that | |
| 680 | * no other slices or labels are open. Otherwise, | |
| 681 | * we rely on VNIOCCLR not being abused. | |
| 682 | */ | |
| fef8985e | 683 | error = dev_dopen(dev, flag, S_IFCHR, cred); |
| 984263bc MD |
684 | } |
| 685 | } | |
| 686 | if (error == 0) { | |
| 687 | IFOPT(vn, VN_FOLLOW) { | |
| e0fc5693 | 688 | kprintf("vnioctl: SET vp %p size %llx\n", |
| 984263bc MD |
689 | vn->sc_vp, vn->sc_size); |
| 690 | } | |
| 691 | } | |
| 692 | if (error) | |
| 693 | vnclear(vn); | |
| 694 | return(error); | |
| 695 | } | |
| 696 | ||
| 697 | /* | |
| 698 | * Duplicate the current processes' credentials. Since we are called only | |
| 699 | * as the result of a SET ioctl and only root can do that, any future access | |
| 700 | * to this "disk" is essentially as root. Note that credentials may change | |
| 701 | * if some other uid can write directly to the mapped file (NFS). | |
| 702 | */ | |
| 703 | int | |
| 704 | vnsetcred(struct vn_softc *vn, struct ucred *cred) | |
| 705 | { | |
| 706 | char *tmpbuf; | |
| 707 | int error = 0; | |
| 708 | ||
| 709 | /* | |
| 710 | * Set credits in our softc | |
| 711 | */ | |
| 712 | ||
| 713 | if (vn->sc_cred) | |
| 714 | crfree(vn->sc_cred); | |
| 715 | vn->sc_cred = crdup(cred); | |
| 716 | ||
| 717 | /* | |
| 718 | * Horrible kludge to establish credentials for NFS XXX. | |
| 719 | */ | |
| 720 | ||
| 721 | if (vn->sc_vp) { | |
| 722 | struct uio auio; | |
| 723 | struct iovec aiov; | |
| 724 | ||
| efda3bd0 | 725 | tmpbuf = kmalloc(vn->sc_secsize, M_TEMP, M_WAITOK); |
| 984263bc MD |
726 | bzero(&auio, sizeof(auio)); |
| 727 | ||
| 728 | aiov.iov_base = tmpbuf; | |
| 729 | aiov.iov_len = vn->sc_secsize; | |
| 730 | auio.uio_iov = &aiov; | |
| 731 | auio.uio_iovcnt = 1; | |
| 732 | auio.uio_offset = 0; | |
| 733 | auio.uio_rw = UIO_READ; | |
| 734 | auio.uio_segflg = UIO_SYSSPACE; | |
| 735 | auio.uio_resid = aiov.iov_len; | |
| ca466bae | 736 | vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY); |
| 984263bc | 737 | error = VOP_READ(vn->sc_vp, &auio, 0, vn->sc_cred); |
| a11aaa81 | 738 | vn_unlock(vn->sc_vp); |
| efda3bd0 | 739 | kfree(tmpbuf, M_TEMP); |
| 984263bc MD |
740 | } |
| 741 | return (error); | |
| 742 | } | |
| 743 | ||
| 744 | void | |
| 745 | vnclear(struct vn_softc *vn) | |
| 746 | { | |
| 984263bc | 747 | IFOPT(vn, VN_FOLLOW) |
| e3869ec7 | 748 | kprintf("vnclear(%p): vp=%p\n", vn, vn->sc_vp); |
| 984263bc MD |
749 | if (vn->sc_slices != NULL) |
| 750 | dsgone(&vn->sc_slices); | |
| 751 | vn->sc_flags &= ~VNF_INITED; | |
| 752 | if (vn->sc_vp != NULL) { | |
| 87de5057 MD |
753 | vn_close(vn->sc_vp, |
| 754 | (vn->sc_flags & VNF_READONLY) ? FREAD : (FREAD|FWRITE)); | |
| 984263bc MD |
755 | vn->sc_vp = NULL; |
| 756 | } | |
| 757 | vn->sc_flags &= ~VNF_READONLY; | |
| 758 | if (vn->sc_cred) { | |
| 759 | crfree(vn->sc_cred); | |
| 760 | vn->sc_cred = NULL; | |
| 761 | } | |
| 762 | if (vn->sc_object != NULL) { | |
| 763 | vm_pager_deallocate(vn->sc_object); | |
| 764 | vn->sc_object = NULL; | |
| 765 | } | |
| 766 | vn->sc_size = 0; | |
| 767 | } | |
| 768 | ||
| 30c1fde0 CT |
769 | /* |
| 770 | * vnget: | |
| 771 | * | |
| 772 | * populate a struct vn_user for the VNIOCGET ioctl. | |
| 773 | * interface conventions defined in sys/sys/vnioctl.h. | |
| 774 | */ | |
| 775 | ||
| 776 | static int | |
| 777 | vnget(cdev_t dev, struct vn_softc *vn, struct vn_user *vnu) | |
| 778 | { | |
| 779 | int error, found = 0; | |
| 780 | char *freepath, *fullpath; | |
| 781 | struct vattr vattr; | |
| 782 | ||
| 783 | if (vnu->vnu_unit == -1) { | |
| 784 | vnu->vnu_unit = dkunit(dev); | |
| 785 | } | |
| 786 | else if (vnu->vnu_unit < 0) | |
| 787 | return (EINVAL); | |
| 788 | ||
| 789 | SLIST_FOREACH(vn, &vn_list, sc_list) { | |
| 790 | ||
| 791 | if(vn->sc_unit != vnu->vnu_unit) | |
| 792 | continue; | |
| 793 | ||
| 794 | found = 1; | |
| 795 | ||
| 796 | if (vn->sc_flags & VNF_INITED && vn->sc_vp != NULL) { | |
| 797 | ||
| 798 | /* note: u_cred checked in vnioctl above */ | |
| 799 | error = VOP_GETATTR(vn->sc_vp, &vattr); | |
| 800 | if (error) { | |
| 801 | kprintf("vnget: VOP_GETATTR for %p failed\n", | |
| 802 | vn->sc_vp); | |
| 803 | return (error); | |
| 804 | } | |
| 805 | ||
| 806 | error = vn_fullpath(curproc, vn->sc_vp, | |
| 807 | &fullpath, &freepath); | |
| 808 | ||
| 809 | if (error) { | |
| 810 | kprintf("vnget: unable to resolve vp %p\n", | |
| 811 | vn->sc_vp); | |
| 812 | return(error); | |
| 813 | } | |
| 814 | ||
| 815 | strlcpy(vnu->vnu_file, fullpath, | |
| 816 | sizeof(vnu->vnu_file)); | |
| 817 | kfree(freepath, M_TEMP); | |
| 818 | vnu->vnu_dev = vattr.va_fsid; | |
| 819 | vnu->vnu_ino = vattr.va_fileid; | |
| 820 | ||
| 821 | } | |
| 822 | else if (vn->sc_flags & VNF_INITED && vn->sc_object != NULL){ | |
| 823 | ||
| 824 | strlcpy(vnu->vnu_file, _VN_USER_SWAP, | |
| 825 | sizeof(vnu->vnu_file)); | |
| 826 | vnu->vnu_size = vn->sc_size; | |
| 827 | vnu->vnu_secsize = vn->sc_secsize; | |
| 828 | ||
| 829 | } else { | |
| 830 | ||
| 831 | bzero(vnu->vnu_file, sizeof(vnu->vnu_file)); | |
| 832 | vnu->vnu_dev = 0; | |
| 833 | vnu->vnu_ino = 0; | |
| 834 | ||
| 835 | } | |
| 836 | break; | |
| 837 | } | |
| 838 | ||
| 839 | if (!found) | |
| 840 | return(ENXIO); | |
| 841 | ||
| 842 | return(0); | |
| 843 | } | |
| 844 | ||
| fef8985e MD |
845 | static int |
| 846 | vnsize(struct dev_psize_args *ap) | |
| 984263bc | 847 | { |
| b13267a5 | 848 | cdev_t dev = ap->a_head.a_dev; |
| 984263bc MD |
849 | struct vn_softc *vn; |
| 850 | ||
| 851 | vn = dev->si_drv1; | |
| 852 | if (!vn) | |
| fef8985e | 853 | return(ENXIO); |
| 984263bc | 854 | if ((vn->sc_flags & VNF_INITED) == 0) |
| fef8985e | 855 | return(ENXIO); |
| e0fc5693 | 856 | ap->a_result = (int64_t)vn->sc_size; |
| fef8985e | 857 | return(0); |
| 984263bc MD |
858 | } |
| 859 | ||
| 860 | static int | |
| 861 | vn_modevent(module_t mod, int type, void *data) | |
| 862 | { | |
| 863 | struct vn_softc *vn; | |
| b13267a5 | 864 | cdev_t dev; |
| 984263bc MD |
865 | |
| 866 | switch (type) { | |
| 867 | case MOD_LOAD: | |
| fef8985e | 868 | dev_ops_add(&vn_ops, 0, 0); |
| 984263bc | 869 | break; |
| 984263bc MD |
870 | case MOD_UNLOAD: |
| 871 | /* fall through */ | |
| 872 | case MOD_SHUTDOWN: | |
| e2f51e29 | 873 | while ((vn = SLIST_FIRST(&vn_list)) != NULL) { |
| 984263bc MD |
874 | SLIST_REMOVE_HEAD(&vn_list, sc_list); |
| 875 | if (vn->sc_flags & VNF_INITED) | |
| 876 | vnclear(vn); | |
| b13267a5 | 877 | /* Cleanup all cdev_t's that refer to this unit */ |
| 984263bc MD |
878 | while ((dev = vn->sc_devlist) != NULL) { |
| 879 | vn->sc_devlist = dev->si_drv2; | |
| 880 | dev->si_drv1 = dev->si_drv2 = NULL; | |
| e4c9c0c8 | 881 | destroy_dev(dev); |
| 984263bc | 882 | } |
| efda3bd0 | 883 | kfree(vn, M_DEVBUF); |
| 984263bc | 884 | } |
| fef8985e | 885 | dev_ops_remove(&vn_ops, 0, 0); |
| 984263bc MD |
886 | break; |
| 887 | default: | |
| 888 | break; | |
| 889 | } | |
| 890 | return 0; | |
| 891 | } | |
| 892 | ||
| 893 | DEV_MODULE(vn, vn_modevent, 0); |