| Commit | Line | Data |
|---|---|---|
| 984263bc MD |
1 | /* |
| 2 | * Copyright (c) 1988 University of Utah. | |
| 3 | * Copyright (c) 1990, 1993 | |
| 4 | * The Regents of the University of California. All rights reserved. | |
| 5 | * | |
| 6 | * This code is derived from software contributed to Berkeley by | |
| 7 | * the Systems Programming Group of the University of Utah Computer | |
| 8 | * Science Department. | |
| 9 | * | |
| 10 | * Redistribution and use in source and binary forms, with or without | |
| 11 | * modification, are permitted provided that the following conditions | |
| 12 | * are met: | |
| 13 | * 1. Redistributions of source code must retain the above copyright | |
| 14 | * notice, this list of conditions and the following disclaimer. | |
| 15 | * 2. Redistributions in binary form must reproduce the above copyright | |
| 16 | * notice, this list of conditions and the following disclaimer in the | |
| 17 | * documentation and/or other materials provided with the distribution. | |
| 18 | * 3. All advertising materials mentioning features or use of this software | |
| 19 | * must display the following acknowledgement: | |
| 20 | * This product includes software developed by the University of | |
| 21 | * California, Berkeley and its contributors. | |
| 22 | * 4. Neither the name of the University nor the names of its contributors | |
| 23 | * may be used to endorse or promote products derived from this software | |
| 24 | * without specific prior written permission. | |
| 25 | * | |
| 26 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
| 27 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 28 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 29 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
| 30 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 31 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
| 32 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 33 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
| 34 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
| 35 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 36 | * SUCH DAMAGE. | |
| 37 | * | |
| 38 | * from: Utah Hdr: vn.c 1.13 94/04/02 | |
| 39 | * | |
| 40 | * from: @(#)vn.c 8.6 (Berkeley) 4/1/94 | |
| 41 | * $FreeBSD: src/sys/dev/vn/vn.c,v 1.105.2.4 2001/11/18 07:11:00 dillon Exp $ | |
| 42 | */ | |
| 43 | ||
| 44 | /* | |
| 45 | * Vnode disk driver. | |
| 46 | * | |
| 47 | * Block/character interface to a vnode. Allows one to treat a file | |
| 48 | * as a disk (e.g. build a filesystem in it, mount it, etc.). | |
| 49 | * | |
| 54078292 | 50 | * NOTE 1: There is a security issue involved with this driver. |
| 984263bc MD |
51 | * Once mounted all access to the contents of the "mapped" file via |
| 52 | * the special file is controlled by the permissions on the special | |
| 53 | * file, the protection of the mapped file is ignored (effectively, | |
| 54 | * by using root credentials in all transactions). | |
| 55 | * | |
| 54078292 | 56 | * NOTE 2: Doesn't interact with leases, should it? |
| 984263bc MD |
57 | */ |
| 58 | ||
| 95db3aac | 59 | #include "use_vn.h" |
| 984263bc MD |
60 | #include <sys/param.h> |
| 61 | #include <sys/systm.h> | |
| 62 | #include <sys/kernel.h> | |
| 984263bc | 63 | #include <sys/proc.h> |
| 895c1f85 | 64 | #include <sys/priv.h> |
| fad57d0e | 65 | #include <sys/nlookup.h> |
| 984263bc MD |
66 | #include <sys/buf.h> |
| 67 | #include <sys/malloc.h> | |
| 68 | #include <sys/mount.h> | |
| 69 | #include <sys/vnode.h> | |
| 70 | #include <sys/fcntl.h> | |
| 71 | #include <sys/conf.h> | |
| 984263bc | 72 | #include <sys/diskslice.h> |
| 84f8b009 | 73 | #include <sys/disk.h> |
| 984263bc | 74 | #include <sys/stat.h> |
| 984263bc MD |
75 | #include <sys/module.h> |
| 76 | #include <sys/vnioctl.h> | |
| 77 | ||
| 78 | #include <vm/vm.h> | |
| 79 | #include <vm/vm_object.h> | |
| 80 | #include <vm/vm_page.h> | |
| 81 | #include <vm/vm_pager.h> | |
| 82 | #include <vm/vm_pageout.h> | |
| 83 | #include <vm/swap_pager.h> | |
| 84 | #include <vm/vm_extern.h> | |
| 85 | #include <vm/vm_zone.h> | |
| 2c1e28dd | 86 | #include <sys/devfs.h> |
| 984263bc MD |
87 | |
| 88 | static d_ioctl_t vnioctl; | |
| 89 | static d_open_t vnopen; | |
| 90 | static d_close_t vnclose; | |
| 91 | static d_psize_t vnsize; | |
| 92 | static d_strategy_t vnstrategy; | |
| 8be7edad | 93 | static d_clone_t vnclone; |
| cc80c90e | 94 | |
| 8be7edad | 95 | DEVFS_DECLARE_CLONE_BITMAP(vn); |
| 95db3aac AH |
96 | |
| 97 | #if NVN <= 1 | |
| 8be7edad | 98 | #define VN_PREALLOCATED_UNITS 4 |
| 95db3aac AH |
99 | #else |
| 100 | #define VN_PREALLOCATED_UNITS NVN | |
| 101 | #endif | |
| 984263bc | 102 | |
| 984263bc MD |
103 | #define VN_BSIZE_BEST 8192 |
| 104 | ||
| 105 | /* | |
| fef8985e | 106 | * dev_ops |
| 984263bc | 107 | * D_DISK we want to look like a disk |
| 10f3fee5 | 108 | * D_CANFREE We support BUF_CMD_FREEBLKS |
| 984263bc MD |
109 | */ |
| 110 | ||
| fef8985e | 111 | static struct dev_ops vn_ops = { |
| 88abd8b5 | 112 | { "vn", 0, D_DISK | D_CANFREE }, |
| fef8985e MD |
113 | .d_open = vnopen, |
| 114 | .d_close = vnclose, | |
| 115 | .d_read = physread, | |
| 116 | .d_write = physwrite, | |
| 117 | .d_ioctl = vnioctl, | |
| 118 | .d_strategy = vnstrategy, | |
| 119 | .d_psize = vnsize | |
| 984263bc MD |
120 | }; |
| 121 | ||
| 984263bc MD |
122 | struct vn_softc { |
| 123 | int sc_unit; | |
| 124 | int sc_flags; /* flags */ | |
| e0fc5693 | 125 | u_int64_t sc_size; /* size of vn, sc_secsize scale */ |
| 984263bc | 126 | int sc_secsize; /* sector size */ |
| 8be7edad | 127 | struct disk sc_disk; |
| 984263bc MD |
128 | struct vnode *sc_vp; /* vnode if not NULL */ |
| 129 | vm_object_t sc_object; /* backing object if not NULL */ | |
| 130 | struct ucred *sc_cred; /* credentials */ | |
| 131 | int sc_maxactive; /* max # of active requests */ | |
| 132 | struct buf sc_tab; /* transfer queue */ | |
| 133 | u_long sc_options; /* options */ | |
| 13983537 | 134 | cdev_t sc_dev; /* devices that refer to this unit */ |
| 984263bc MD |
135 | SLIST_ENTRY(vn_softc) sc_list; |
| 136 | }; | |
| 137 | ||
| 138 | static SLIST_HEAD(, vn_softc) vn_list; | |
| 139 | ||
| 140 | /* sc_flags */ | |
| 141 | #define VNF_INITED 0x01 | |
| 142 | #define VNF_READONLY 0x02 | |
| 13983537 AH |
143 | #define VNF_OPENED 0x10 |
| 144 | #define VNF_DESTROY 0x20 | |
| 984263bc MD |
145 | |
| 146 | static u_long vn_options; | |
| 147 | ||
| 148 | #define IFOPT(vn,opt) if (((vn)->sc_options|vn_options) & (opt)) | |
| 149 | #define TESTOPT(vn,opt) (((vn)->sc_options|vn_options) & (opt)) | |
| 150 | ||
| 151 | static int vnsetcred (struct vn_softc *vn, struct ucred *cred); | |
| 152 | static void vnclear (struct vn_softc *vn); | |
| 30c1fde0 | 153 | static int vnget (cdev_t dev, struct vn_softc *vn , struct vn_user *vnu); |
| 984263bc | 154 | static int vn_modevent (module_t, int, void *); |
| b13267a5 MD |
155 | static int vniocattach_file (struct vn_softc *, struct vn_ioctl *, cdev_t dev, int flag, struct ucred *cred); |
| 156 | static int vniocattach_swap (struct vn_softc *, struct vn_ioctl *, cdev_t dev, int flag, struct ucred *cred); | |
| 13983537 | 157 | static cdev_t vn_create(int unit, struct devfs_bitmap *bitmap, int clone); |
| cc80c90e AH |
158 | |
| 159 | static int | |
| 160 | vnclone(struct dev_clone_args *ap) | |
| 161 | { | |
| 162 | int unit; | |
| 163 | ||
| 164 | unit = devfs_clone_bitmap_get(&DEVFS_CLONE_BITMAP(vn), 0); | |
| 13983537 | 165 | ap->a_dev = vn_create(unit, &DEVFS_CLONE_BITMAP(vn), 1); |
| cc80c90e AH |
166 | |
| 167 | return 0; | |
| 168 | } | |
| 984263bc MD |
169 | |
| 170 | static int | |
| fef8985e | 171 | vnclose(struct dev_close_args *ap) |
| 984263bc | 172 | { |
| 13983537 AH |
173 | cdev_t dev = ap->a_head.a_dev; |
| 174 | struct vn_softc *vn; | |
| 175 | ||
| 176 | vn = dev->si_drv1; | |
| 177 | KKASSERT(vn != NULL); | |
| 178 | ||
| 179 | vn->sc_flags &= ~VNF_OPENED; | |
| 180 | ||
| 181 | /* The disk has been detached and can now be safely destroyed */ | |
| 182 | if (vn->sc_flags & VNF_DESTROY) { | |
| 183 | KKASSERT(disk_getopencount(&vn->sc_disk) == 0); | |
| 184 | disk_destroy(&vn->sc_disk); | |
| 185 | devfs_clone_bitmap_put(&DEVFS_CLONE_BITMAP(vn), dkunit(dev)); | |
| 186 | SLIST_REMOVE(&vn_list, vn, vn_softc, sc_list); | |
| 187 | kfree(vn, M_DEVBUF); | |
| 188 | } | |
| 984263bc MD |
189 | return (0); |
| 190 | } | |
| 191 | ||
| 8be7edad MD |
192 | static struct vn_softc * |
| 193 | vncreatevn(void) | |
| 194 | { | |
| 195 | struct vn_softc *vn; | |
| 196 | ||
| 197 | vn = kmalloc(sizeof *vn, M_DEVBUF, M_WAITOK | M_ZERO); | |
| 198 | return vn; | |
| 199 | } | |
| 200 | ||
| 201 | static void | |
| 202 | vninitvn(struct vn_softc *vn, cdev_t dev) | |
| 203 | { | |
| 204 | int unit; | |
| 205 | ||
| 206 | KKASSERT(vn != NULL); | |
| 207 | KKASSERT(dev != NULL); | |
| 208 | unit = dkunit(dev); | |
| 209 | ||
| 210 | vn->sc_unit = unit; | |
| 211 | dev->si_drv1 = vn; | |
| 13983537 | 212 | vn->sc_dev = dev; |
| 8be7edad | 213 | |
| 13983537 | 214 | SLIST_INSERT_HEAD(&vn_list, vn, sc_list); |
| 984263bc MD |
215 | } |
| 216 | ||
| 217 | static int | |
| fef8985e | 218 | vnopen(struct dev_open_args *ap) |
| 984263bc | 219 | { |
| b13267a5 | 220 | cdev_t dev = ap->a_head.a_dev; |
| 984263bc MD |
221 | struct vn_softc *vn; |
| 222 | ||
| 223 | /* | |
| 224 | * Locate preexisting device | |
| 225 | */ | |
| 226 | ||
| 13983537 AH |
227 | vn = dev->si_drv1; |
| 228 | KKASSERT(vn != NULL); | |
| 984263bc MD |
229 | |
| 230 | /* | |
| 231 | * Update si_bsize fields for device. This data will be overriden by | |
| 232 | * the slice/parition code for vn accesses through partitions, and | |
| 233 | * used directly if you open the 'whole disk' device. | |
| 234 | * | |
| 235 | * si_bsize_best must be reinitialized in case VN has been | |
| 236 | * reconfigured, plus make it at least VN_BSIZE_BEST for efficiency. | |
| 237 | */ | |
| 238 | dev->si_bsize_phys = vn->sc_secsize; | |
| 239 | dev->si_bsize_best = vn->sc_secsize; | |
| 240 | if (dev->si_bsize_best < VN_BSIZE_BEST) | |
| 241 | dev->si_bsize_best = VN_BSIZE_BEST; | |
| 242 | ||
| fef8985e | 243 | if ((ap->a_oflags & FWRITE) && (vn->sc_flags & VNF_READONLY)) |
| 984263bc MD |
244 | return (EACCES); |
| 245 | ||
| 246 | IFOPT(vn, VN_FOLLOW) | |
| e3869ec7 | 247 | kprintf("vnopen(%s, 0x%x, 0x%x)\n", |
| fef8985e | 248 | devtoname(dev), ap->a_oflags, ap->a_devtype); |
| 984263bc | 249 | |
| 13983537 | 250 | vn->sc_flags |= VNF_OPENED; |
| 984263bc MD |
251 | return(0); |
| 252 | } | |
| 253 | ||
| 254 | /* | |
| 255 | * vnstrategy: | |
| 256 | * | |
| 257 | * Run strategy routine for VN device. We use VOP_READ/VOP_WRITE calls | |
| 107e9bcc | 258 | * for vnode-backed vn's, and the swap_pager_strategy() call for |
| 984263bc | 259 | * vm_object-backed vn's. |
| 984263bc | 260 | */ |
| fef8985e MD |
261 | static int |
| 262 | vnstrategy(struct dev_strategy_args *ap) | |
| 984263bc | 263 | { |
| b13267a5 | 264 | cdev_t dev = ap->a_head.a_dev; |
| fef8985e | 265 | struct bio *bio = ap->a_bio; |
| 81b5c339 MD |
266 | struct buf *bp; |
| 267 | struct bio *nbio; | |
| 984263bc MD |
268 | int unit; |
| 269 | struct vn_softc *vn; | |
| 270 | int error; | |
| 271 | ||
| 81b5c339 | 272 | unit = dkunit(dev); |
| 13983537 AH |
273 | vn = dev->si_drv1; |
| 274 | KKASSERT(vn != NULL); | |
| 81b5c339 MD |
275 | |
| 276 | bp = bio->bio_buf; | |
| 984263bc MD |
277 | |
| 278 | IFOPT(vn, VN_DEBUG) | |
| e3869ec7 | 279 | kprintf("vnstrategy(%p): unit %d\n", bp, unit); |
| 984263bc MD |
280 | |
| 281 | if ((vn->sc_flags & VNF_INITED) == 0) { | |
| 282 | bp->b_error = ENXIO; | |
| 283 | bp->b_flags |= B_ERROR; | |
| 81b5c339 | 284 | biodone(bio); |
| fef8985e | 285 | return(0); |
| 984263bc MD |
286 | } |
| 287 | ||
| 288 | bp->b_resid = bp->b_bcount; | |
| 289 | ||
| 8be7edad MD |
290 | /* |
| 291 | * The vnode device is using disk/slice label support. | |
| 292 | * | |
| 293 | * The dscheck() function is called for validating the | |
| 294 | * slices that exist ON the vnode device itself, and | |
| 295 | * translate the "slice-relative" block number, again. | |
| 296 | * dscheck() will call biodone() and return NULL if | |
| 297 | * we are at EOF or beyond the device size. | |
| 298 | */ | |
| 984263bc | 299 | |
| 8be7edad | 300 | nbio = bio; |
| 984263bc | 301 | |
| 81b5c339 MD |
302 | /* |
| 303 | * Use the translated nbio from this point on | |
| 304 | */ | |
| 10f3fee5 | 305 | if (vn->sc_vp && bp->b_cmd == BUF_CMD_FREEBLKS) { |
| 984263bc | 306 | /* |
| 10f3fee5 | 307 | * Freeblks is not handled for vnode-backed elements yet. |
| 984263bc | 308 | */ |
| 9a71d53f | 309 | bp->b_resid = 0; |
| 4414f2c9 | 310 | /* operation complete */ |
| 984263bc MD |
311 | } else if (vn->sc_vp) { |
| 312 | /* | |
| 313 | * VNODE I/O | |
| 314 | * | |
| 315 | * If an error occurs, we set B_ERROR but we do not set | |
| 316 | * B_INVAL because (for a write anyway), the buffer is | |
| 317 | * still valid. | |
| 318 | */ | |
| 319 | struct uio auio; | |
| 320 | struct iovec aiov; | |
| 321 | ||
| 322 | bzero(&auio, sizeof(auio)); | |
| 323 | ||
| 324 | aiov.iov_base = bp->b_data; | |
| 325 | aiov.iov_len = bp->b_bcount; | |
| 326 | auio.uio_iov = &aiov; | |
| 327 | auio.uio_iovcnt = 1; | |
| 54078292 | 328 | auio.uio_offset = nbio->bio_offset; |
| 984263bc | 329 | auio.uio_segflg = UIO_SYSSPACE; |
| 10f3fee5 | 330 | if (bp->b_cmd == BUF_CMD_READ) |
| 984263bc MD |
331 | auio.uio_rw = UIO_READ; |
| 332 | else | |
| 333 | auio.uio_rw = UIO_WRITE; | |
| 334 | auio.uio_resid = bp->b_bcount; | |
| dadab5e9 | 335 | auio.uio_td = curthread; |
| b527c4c5 MD |
336 | |
| 337 | /* | |
| 338 | * Don't use IO_DIRECT here, it really gets in the way | |
| 339 | * due to typical blocksize differences between the | |
| 340 | * fs backing the VN device and whatever is running on | |
| 341 | * the VN device. | |
| 342 | */ | |
| 343 | if (bp->b_cmd == BUF_CMD_READ) { | |
| 344 | vn_lock(vn->sc_vp, LK_SHARED | LK_RETRY); | |
| 345 | error = VOP_READ(vn->sc_vp, &auio, IO_RECURSE, | |
| 346 | vn->sc_cred); | |
| 347 | } else { | |
| 348 | vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY); | |
| 349 | error = VOP_WRITE(vn->sc_vp, &auio, IO_RECURSE, | |
| 350 | vn->sc_cred); | |
| 351 | } | |
| a11aaa81 | 352 | vn_unlock(vn->sc_vp); |
| 984263bc | 353 | bp->b_resid = auio.uio_resid; |
| 984263bc MD |
354 | if (error) { |
| 355 | bp->b_error = error; | |
| 356 | bp->b_flags |= B_ERROR; | |
| 357 | } | |
| 4414f2c9 | 358 | /* operation complete */ |
| 984263bc MD |
359 | } else if (vn->sc_object) { |
| 360 | /* | |
| 4414f2c9 | 361 | * OBJT_SWAP I/O (handles read, write, freebuf) |
| 984263bc | 362 | * |
| 4414f2c9 MD |
363 | * We have nothing to do if freeing blocks on a reserved |
| 364 | * swap area, othrewise execute the op. | |
| 984263bc | 365 | */ |
| 10f3fee5 | 366 | if (bp->b_cmd == BUF_CMD_FREEBLKS && TESTOPT(vn, VN_RESERVE)) { |
| 4414f2c9 MD |
367 | bp->b_resid = 0; |
| 368 | /* operation complete */ | |
| 984263bc | 369 | } else { |
| 107e9bcc | 370 | swap_pager_strategy(vn->sc_object, nbio); |
| fef8985e | 371 | return(0); |
| 4414f2c9 | 372 | /* NOT REACHED */ |
| 984263bc MD |
373 | } |
| 374 | } else { | |
| 4414f2c9 MD |
375 | bp->b_resid = bp->b_bcount; |
| 376 | bp->b_flags |= B_ERROR | B_INVAL; | |
| 984263bc | 377 | bp->b_error = EINVAL; |
| 4414f2c9 | 378 | /* operation complete */ |
| 984263bc | 379 | } |
| 4414f2c9 | 380 | biodone(nbio); |
| fef8985e | 381 | return(0); |
| 984263bc MD |
382 | } |
| 383 | ||
| 384 | /* ARGSUSED */ | |
| 385 | static int | |
| fef8985e | 386 | vnioctl(struct dev_ioctl_args *ap) |
| 984263bc | 387 | { |
| b13267a5 | 388 | cdev_t dev = ap->a_head.a_dev; |
| 984263bc MD |
389 | struct vn_softc *vn; |
| 390 | struct vn_ioctl *vio; | |
| 391 | int error; | |
| 392 | u_long *f; | |
| 393 | ||
| 394 | vn = dev->si_drv1; | |
| fef8985e | 395 | IFOPT(vn,VN_FOLLOW) { |
| e3869ec7 | 396 | kprintf("vnioctl(%s, 0x%lx, %p, 0x%x): unit %d\n", |
| fef8985e | 397 | devtoname(dev), ap->a_cmd, ap->a_data, ap->a_fflag, |
| 984263bc | 398 | dkunit(dev)); |
| fef8985e | 399 | } |
| 984263bc | 400 | |
| fef8985e | 401 | switch (ap->a_cmd) { |
| 984263bc MD |
402 | case VNIOCATTACH: |
| 403 | case VNIOCDETACH: | |
| 404 | case VNIOCGSET: | |
| 405 | case VNIOCGCLEAR: | |
| 30c1fde0 | 406 | case VNIOCGET: |
| 984263bc MD |
407 | case VNIOCUSET: |
| 408 | case VNIOCUCLEAR: | |
| 409 | goto vn_specific; | |
| 410 | } | |
| 411 | ||
| 8be7edad MD |
412 | #if 0 |
| 413 | if (dkslice(dev) != WHOLE_DISK_SLICE || | |
| 414 | dkpart(dev) != WHOLE_SLICE_PART) | |
| 415 | return (ENOTTY); | |
| 416 | #endif | |
| 984263bc MD |
417 | |
| 418 | vn_specific: | |
| 419 | ||
| 895c1f85 | 420 | error = priv_check_cred(ap->a_cred, PRIV_ROOT, 0); |
| 984263bc MD |
421 | if (error) |
| 422 | return (error); | |
| 423 | ||
| fef8985e MD |
424 | vio = (struct vn_ioctl *)ap->a_data; |
| 425 | f = (u_long*)ap->a_data; | |
| 984263bc | 426 | |
| fef8985e | 427 | switch (ap->a_cmd) { |
| 984263bc MD |
428 | case VNIOCATTACH: |
| 429 | if (vn->sc_flags & VNF_INITED) | |
| 430 | return(EBUSY); | |
| 431 | ||
| 13983537 AH |
432 | if (vn->sc_flags & VNF_DESTROY) |
| 433 | return(ENXIO); | |
| 434 | ||
| 984263bc | 435 | if (vio->vn_file == NULL) |
| fef8985e | 436 | error = vniocattach_swap(vn, vio, dev, ap->a_fflag, ap->a_cred); |
| 984263bc | 437 | else |
| fef8985e | 438 | error = vniocattach_file(vn, vio, dev, ap->a_fflag, ap->a_cred); |
| 984263bc MD |
439 | break; |
| 440 | ||
| 441 | case VNIOCDETACH: | |
| 442 | if ((vn->sc_flags & VNF_INITED) == 0) | |
| 443 | return(ENXIO); | |
| 444 | /* | |
| 445 | * XXX handle i/o in progress. Return EBUSY, or wait, or | |
| 446 | * flush the i/o. | |
| 447 | * XXX handle multiple opens of the device. Return EBUSY, | |
| 448 | * or revoke the fd's. | |
| 449 | * How are these problems handled for removable and failing | |
| 450 | * hardware devices? (Hint: They are not) | |
| 451 | */ | |
| 13983537 | 452 | if ((disk_getopencount(&vn->sc_disk)) > 1) |
| c9f4065d SK |
453 | return (EBUSY); |
| 454 | ||
| 984263bc MD |
455 | vnclear(vn); |
| 456 | IFOPT(vn, VN_FOLLOW) | |
| e3869ec7 | 457 | kprintf("vnioctl: CLRed\n"); |
| cc80c90e AH |
458 | |
| 459 | if (dkunit(dev) >= VN_PREALLOCATED_UNITS) { | |
| 13983537 | 460 | vn->sc_flags |= VNF_DESTROY; |
| cc80c90e AH |
461 | } |
| 462 | ||
| 984263bc MD |
463 | break; |
| 464 | ||
| 30c1fde0 CT |
465 | case VNIOCGET: |
| 466 | error = vnget(dev, vn, (struct vn_user *) ap->a_data); | |
| 467 | break; | |
| 468 | ||
| 984263bc MD |
469 | case VNIOCGSET: |
| 470 | vn_options |= *f; | |
| 471 | *f = vn_options; | |
| 472 | break; | |
| 473 | ||
| 474 | case VNIOCGCLEAR: | |
| 475 | vn_options &= ~(*f); | |
| 476 | *f = vn_options; | |
| 477 | break; | |
| 478 | ||
| 479 | case VNIOCUSET: | |
| 480 | vn->sc_options |= *f; | |
| 481 | *f = vn->sc_options; | |
| 482 | break; | |
| 483 | ||
| 484 | case VNIOCUCLEAR: | |
| 485 | vn->sc_options &= ~(*f); | |
| 486 | *f = vn->sc_options; | |
| 487 | break; | |
| 488 | ||
| 489 | default: | |
| 490 | error = ENOTTY; | |
| 491 | break; | |
| 492 | } | |
| 493 | return(error); | |
| 494 | } | |
| 495 | ||
| 496 | /* | |
| 497 | * vniocattach_file: | |
| 498 | * | |
| 499 | * Attach a file to a VN partition. Return the size in the vn_size | |
| 500 | * field. | |
| 501 | */ | |
| 502 | ||
| 503 | static int | |
| b13267a5 | 504 | vniocattach_file(struct vn_softc *vn, struct vn_ioctl *vio, cdev_t dev, |
| fef8985e | 505 | int flag, struct ucred *cred) |
| 984263bc MD |
506 | { |
| 507 | struct vattr vattr; | |
| fad57d0e | 508 | struct nlookupdata nd; |
| 984263bc | 509 | int error, flags; |
| fad57d0e | 510 | struct vnode *vp; |
| 8be7edad | 511 | struct disk_info info; |
| 984263bc MD |
512 | |
| 513 | flags = FREAD|FWRITE; | |
| fad57d0e MD |
514 | error = nlookup_init(&nd, vio->vn_file, |
| 515 | UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP); | |
| 516 | if (error) | |
| 517 | return (error); | |
| 518 | if ((error = vn_open(&nd, NULL, flags, 0)) != 0) { | |
| 984263bc | 519 | if (error != EACCES && error != EPERM && error != EROFS) |
| fad57d0e | 520 | goto done; |
| 984263bc | 521 | flags &= ~FWRITE; |
| fad57d0e MD |
522 | nlookup_done(&nd); |
| 523 | error = nlookup_init(&nd, vio->vn_file, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP); | |
| 984263bc MD |
524 | if (error) |
| 525 | return (error); | |
| fad57d0e MD |
526 | if ((error = vn_open(&nd, NULL, flags, 0)) != 0) |
| 527 | goto done; | |
| 984263bc | 528 | } |
| fad57d0e MD |
529 | vp = nd.nl_open_vp; |
| 530 | if (vp->v_type != VREG || | |
| 87de5057 | 531 | (error = VOP_GETATTR(vp, &vattr))) { |
| fad57d0e MD |
532 | if (error == 0) |
| 533 | error = EINVAL; | |
| 534 | goto done; | |
| 984263bc | 535 | } |
| a11aaa81 | 536 | vn_unlock(vp); |
| 984263bc | 537 | vn->sc_secsize = DEV_BSIZE; |
| fad57d0e MD |
538 | vn->sc_vp = vp; |
| 539 | nd.nl_open_vp = NULL; | |
| 984263bc MD |
540 | |
| 541 | /* | |
| 542 | * If the size is specified, override the file attributes. Note that | |
| 543 | * the vn_size argument is in PAGE_SIZE sized blocks. | |
| 544 | */ | |
| 545 | if (vio->vn_size) | |
| e0fc5693 | 546 | vn->sc_size = vio->vn_size * PAGE_SIZE / vn->sc_secsize; |
| 984263bc MD |
547 | else |
| 548 | vn->sc_size = vattr.va_size / vn->sc_secsize; | |
| fef8985e | 549 | error = vnsetcred(vn, cred); |
| 984263bc | 550 | if (error) { |
| fad57d0e | 551 | vn->sc_vp = NULL; |
| 87de5057 | 552 | vn_close(vp, flags); |
| fad57d0e | 553 | goto done; |
| 984263bc MD |
554 | } |
| 555 | vn->sc_flags |= VNF_INITED; | |
| 556 | if (flags == FREAD) | |
| 557 | vn->sc_flags |= VNF_READONLY; | |
| 8be7edad MD |
558 | |
| 559 | /* | |
| 560 | * Set the disk info so that probing is triggered | |
| 561 | */ | |
| 562 | bzero(&info, sizeof(struct disk_info)); | |
| 563 | info.d_media_blksize = vn->sc_secsize; | |
| 564 | info.d_media_blocks = vn->sc_size; | |
| 565 | /* | |
| 566 | * reserve mbr sector for backwards compatibility | |
| 567 | * when no slices exist. | |
| 568 | */ | |
| 569 | info.d_dsflags = DSO_COMPATMBR; | |
| 570 | info.d_secpertrack = 32; | |
| 571 | info.d_nheads = 64 / (vn->sc_secsize / DEV_BSIZE); | |
| 572 | info.d_secpercyl = info.d_secpertrack * info.d_nheads; | |
| 573 | info.d_ncylinders = vn->sc_size / info.d_secpercyl; | |
| 574 | disk_setdiskinfo_sync(&vn->sc_disk, &info); | |
| 575 | ||
| 576 | error = dev_dopen(dev, flag, S_IFCHR, cred); | |
| 577 | if (error) | |
| 578 | vnclear(vn); | |
| 579 | ||
| 984263bc | 580 | IFOPT(vn, VN_FOLLOW) |
| e0fc5693 | 581 | kprintf("vnioctl: SET vp %p size %llx blks\n", |
| 665d7d25 | 582 | vn->sc_vp, (long long)vn->sc_size); |
| fad57d0e MD |
583 | done: |
| 584 | nlookup_done(&nd); | |
| 585 | return(error); | |
| 984263bc MD |
586 | } |
| 587 | ||
| 588 | /* | |
| 589 | * vniocattach_swap: | |
| 590 | * | |
| 591 | * Attach swap backing store to a VN partition of the size specified | |
| 592 | * in vn_size. | |
| 593 | */ | |
| 594 | ||
| 595 | static int | |
| b13267a5 | 596 | vniocattach_swap(struct vn_softc *vn, struct vn_ioctl *vio, cdev_t dev, |
| fef8985e | 597 | int flag, struct ucred *cred) |
| 984263bc MD |
598 | { |
| 599 | int error; | |
| 8be7edad | 600 | struct disk_info info; |
| 984263bc MD |
601 | |
| 602 | /* | |
| 603 | * Range check. Disallow negative sizes or any size less then the | |
| 604 | * size of a page. Then round to a page. | |
| 605 | */ | |
| 606 | ||
| 607 | if (vio->vn_size <= 0) | |
| 608 | return(EDOM); | |
| 609 | ||
| 610 | /* | |
| 611 | * Allocate an OBJT_SWAP object. | |
| 612 | * | |
| 613 | * sc_secsize is PAGE_SIZE'd | |
| 614 | * | |
| 615 | * vio->vn_size is in PAGE_SIZE'd chunks. | |
| 616 | * sc_size must be in PAGE_SIZE'd chunks. | |
| 617 | * Note the truncation. | |
| 618 | */ | |
| 619 | ||
| 620 | vn->sc_secsize = PAGE_SIZE; | |
| 621 | vn->sc_size = vio->vn_size; | |
| 5a648714 MD |
622 | vn->sc_object = swap_pager_alloc(NULL, |
| 623 | vn->sc_secsize * (off_t)vio->vn_size, | |
| 624 | VM_PROT_DEFAULT, 0); | |
| 984263bc MD |
625 | IFOPT(vn, VN_RESERVE) { |
| 626 | if (swap_pager_reserve(vn->sc_object, 0, vn->sc_size) < 0) { | |
| 627 | vm_pager_deallocate(vn->sc_object); | |
| 628 | vn->sc_object = NULL; | |
| 629 | return(EDOM); | |
| 630 | } | |
| 631 | } | |
| 632 | vn->sc_flags |= VNF_INITED; | |
| 633 | ||
| fef8985e | 634 | error = vnsetcred(vn, cred); |
| 984263bc | 635 | if (error == 0) { |
| 8be7edad MD |
636 | /* |
| 637 | * Set the disk info so that probing is triggered | |
| 638 | */ | |
| 639 | bzero(&info, sizeof(struct disk_info)); | |
| 640 | info.d_media_blksize = vn->sc_secsize; | |
| 641 | info.d_media_blocks = vn->sc_size; | |
| 642 | /* | |
| 643 | * reserve mbr sector for backwards compatibility | |
| 644 | * when no slices exist. | |
| 645 | */ | |
| 646 | info.d_dsflags = DSO_COMPATMBR; | |
| 647 | info.d_secpertrack = 32; | |
| 648 | info.d_nheads = 64 / (vn->sc_secsize / DEV_BSIZE); | |
| 649 | info.d_secpercyl = info.d_secpertrack * info.d_nheads; | |
| 650 | info.d_ncylinders = vn->sc_size / info.d_secpercyl; | |
| 651 | disk_setdiskinfo_sync(&vn->sc_disk, &info); | |
| 652 | ||
| 653 | error = dev_dopen(dev, flag, S_IFCHR, cred); | |
| 984263bc MD |
654 | } |
| 655 | if (error == 0) { | |
| 656 | IFOPT(vn, VN_FOLLOW) { | |
| e0fc5693 | 657 | kprintf("vnioctl: SET vp %p size %llx\n", |
| 665d7d25 | 658 | vn->sc_vp, (long long)vn->sc_size); |
| 984263bc MD |
659 | } |
| 660 | } | |
| 661 | if (error) | |
| 662 | vnclear(vn); | |
| 663 | return(error); | |
| 664 | } | |
| 665 | ||
| 666 | /* | |
| 667 | * Duplicate the current processes' credentials. Since we are called only | |
| 668 | * as the result of a SET ioctl and only root can do that, any future access | |
| 669 | * to this "disk" is essentially as root. Note that credentials may change | |
| 670 | * if some other uid can write directly to the mapped file (NFS). | |
| 671 | */ | |
| 672 | int | |
| 673 | vnsetcred(struct vn_softc *vn, struct ucred *cred) | |
| 674 | { | |
| 675 | char *tmpbuf; | |
| 676 | int error = 0; | |
| 677 | ||
| 678 | /* | |
| 679 | * Set credits in our softc | |
| 680 | */ | |
| 681 | ||
| 682 | if (vn->sc_cred) | |
| 683 | crfree(vn->sc_cred); | |
| 684 | vn->sc_cred = crdup(cred); | |
| 685 | ||
| 686 | /* | |
| 687 | * Horrible kludge to establish credentials for NFS XXX. | |
| 688 | */ | |
| 689 | ||
| 690 | if (vn->sc_vp) { | |
| 691 | struct uio auio; | |
| 692 | struct iovec aiov; | |
| 693 | ||
| efda3bd0 | 694 | tmpbuf = kmalloc(vn->sc_secsize, M_TEMP, M_WAITOK); |
| 984263bc MD |
695 | bzero(&auio, sizeof(auio)); |
| 696 | ||
| 697 | aiov.iov_base = tmpbuf; | |
| 698 | aiov.iov_len = vn->sc_secsize; | |
| 699 | auio.uio_iov = &aiov; | |
| 700 | auio.uio_iovcnt = 1; | |
| 701 | auio.uio_offset = 0; | |
| 702 | auio.uio_rw = UIO_READ; | |
| 703 | auio.uio_segflg = UIO_SYSSPACE; | |
| 704 | auio.uio_resid = aiov.iov_len; | |
| ca466bae | 705 | vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY); |
| 984263bc | 706 | error = VOP_READ(vn->sc_vp, &auio, 0, vn->sc_cred); |
| a11aaa81 | 707 | vn_unlock(vn->sc_vp); |
| efda3bd0 | 708 | kfree(tmpbuf, M_TEMP); |
| 984263bc MD |
709 | } |
| 710 | return (error); | |
| 711 | } | |
| 712 | ||
| 713 | void | |
| 714 | vnclear(struct vn_softc *vn) | |
| 715 | { | |
| 984263bc | 716 | IFOPT(vn, VN_FOLLOW) |
| e3869ec7 | 717 | kprintf("vnclear(%p): vp=%p\n", vn, vn->sc_vp); |
| 984263bc MD |
718 | vn->sc_flags &= ~VNF_INITED; |
| 719 | if (vn->sc_vp != NULL) { | |
| 87de5057 MD |
720 | vn_close(vn->sc_vp, |
| 721 | (vn->sc_flags & VNF_READONLY) ? FREAD : (FREAD|FWRITE)); | |
| 984263bc MD |
722 | vn->sc_vp = NULL; |
| 723 | } | |
| 724 | vn->sc_flags &= ~VNF_READONLY; | |
| 725 | if (vn->sc_cred) { | |
| 726 | crfree(vn->sc_cred); | |
| 727 | vn->sc_cred = NULL; | |
| 728 | } | |
| 729 | if (vn->sc_object != NULL) { | |
| 730 | vm_pager_deallocate(vn->sc_object); | |
| 731 | vn->sc_object = NULL; | |
| 732 | } | |
| 8be7edad MD |
733 | |
| 734 | disk_unprobe(&vn->sc_disk); | |
| 735 | ||
| 984263bc MD |
736 | vn->sc_size = 0; |
| 737 | } | |
| 738 | ||
| 30c1fde0 CT |
739 | /* |
| 740 | * vnget: | |
| 741 | * | |
| 742 | * populate a struct vn_user for the VNIOCGET ioctl. | |
| 743 | * interface conventions defined in sys/sys/vnioctl.h. | |
| 744 | */ | |
| 745 | ||
| 746 | static int | |
| 747 | vnget(cdev_t dev, struct vn_softc *vn, struct vn_user *vnu) | |
| 748 | { | |
| 749 | int error, found = 0; | |
| 750 | char *freepath, *fullpath; | |
| 751 | struct vattr vattr; | |
| 752 | ||
| 753 | if (vnu->vnu_unit == -1) { | |
| 754 | vnu->vnu_unit = dkunit(dev); | |
| 755 | } | |
| 756 | else if (vnu->vnu_unit < 0) | |
| 757 | return (EINVAL); | |
| 758 | ||
| 759 | SLIST_FOREACH(vn, &vn_list, sc_list) { | |
| 760 | ||
| 761 | if(vn->sc_unit != vnu->vnu_unit) | |
| 762 | continue; | |
| 763 | ||
| 764 | found = 1; | |
| 765 | ||
| 766 | if (vn->sc_flags & VNF_INITED && vn->sc_vp != NULL) { | |
| 767 | ||
| 768 | /* note: u_cred checked in vnioctl above */ | |
| 769 | error = VOP_GETATTR(vn->sc_vp, &vattr); | |
| 770 | if (error) { | |
| 771 | kprintf("vnget: VOP_GETATTR for %p failed\n", | |
| 772 | vn->sc_vp); | |
| 773 | return (error); | |
| 774 | } | |
| 775 | ||
| 776 | error = vn_fullpath(curproc, vn->sc_vp, | |
| 5b4cfb7e | 777 | &fullpath, &freepath, 0); |
| 30c1fde0 CT |
778 | |
| 779 | if (error) { | |
| 780 | kprintf("vnget: unable to resolve vp %p\n", | |
| 781 | vn->sc_vp); | |
| 782 | return(error); | |
| 783 | } | |
| 784 | ||
| 785 | strlcpy(vnu->vnu_file, fullpath, | |
| 786 | sizeof(vnu->vnu_file)); | |
| 787 | kfree(freepath, M_TEMP); | |
| 788 | vnu->vnu_dev = vattr.va_fsid; | |
| 789 | vnu->vnu_ino = vattr.va_fileid; | |
| 790 | ||
| 791 | } | |
| 792 | else if (vn->sc_flags & VNF_INITED && vn->sc_object != NULL){ | |
| 793 | ||
| 794 | strlcpy(vnu->vnu_file, _VN_USER_SWAP, | |
| 795 | sizeof(vnu->vnu_file)); | |
| 796 | vnu->vnu_size = vn->sc_size; | |
| 797 | vnu->vnu_secsize = vn->sc_secsize; | |
| 798 | ||
| 799 | } else { | |
| 800 | ||
| 801 | bzero(vnu->vnu_file, sizeof(vnu->vnu_file)); | |
| 802 | vnu->vnu_dev = 0; | |
| 803 | vnu->vnu_ino = 0; | |
| 804 | ||
| 805 | } | |
| 806 | break; | |
| 807 | } | |
| 808 | ||
| 809 | if (!found) | |
| 810 | return(ENXIO); | |
| 811 | ||
| 812 | return(0); | |
| 813 | } | |
| 814 | ||
| fef8985e MD |
815 | static int |
| 816 | vnsize(struct dev_psize_args *ap) | |
| 984263bc | 817 | { |
| b13267a5 | 818 | cdev_t dev = ap->a_head.a_dev; |
| 984263bc MD |
819 | struct vn_softc *vn; |
| 820 | ||
| 821 | vn = dev->si_drv1; | |
| 822 | if (!vn) | |
| fef8985e | 823 | return(ENXIO); |
| 984263bc | 824 | if ((vn->sc_flags & VNF_INITED) == 0) |
| fef8985e | 825 | return(ENXIO); |
| e0fc5693 | 826 | ap->a_result = (int64_t)vn->sc_size; |
| fef8985e | 827 | return(0); |
| 984263bc MD |
828 | } |
| 829 | ||
| cc80c90e | 830 | static cdev_t |
| 13983537 | 831 | vn_create(int unit, struct devfs_bitmap *bitmap, int clone) |
| 8be7edad | 832 | { |
| cc80c90e AH |
833 | struct vn_softc *vn; |
| 834 | struct disk_info info; | |
| 13983537 | 835 | cdev_t dev, ret_dev; |
| 8be7edad | 836 | |
| cc80c90e | 837 | vn = vncreatevn(); |
| 13983537 AH |
838 | if (clone) { |
| 839 | /* | |
| 840 | * For clone devices we need to return the top-level cdev, | |
| 841 | * not the raw dev we'd normally work with. | |
| 842 | */ | |
| 843 | dev = disk_create_clone(unit, &vn->sc_disk, &vn_ops); | |
| 844 | ret_dev = vn->sc_disk.d_cdev; | |
| 845 | } else { | |
| 846 | ret_dev = dev = disk_create(unit, &vn->sc_disk, &vn_ops); | |
| 847 | } | |
| cc80c90e | 848 | vninitvn(vn, dev); |
| 8be7edad | 849 | |
| cc80c90e AH |
850 | bzero(&info, sizeof(struct disk_info)); |
| 851 | info.d_media_blksize = 512; | |
| 852 | info.d_media_blocks = 0; | |
| 853 | info.d_dsflags = DSO_MBRQUIET; | |
| 854 | info.d_secpertrack = 32; | |
| 855 | info.d_nheads = 64; | |
| 856 | info.d_secpercyl = info.d_secpertrack * info.d_nheads; | |
| 857 | info.d_ncylinders = 0; | |
| 858 | disk_setdiskinfo_sync(&vn->sc_disk, &info); | |
| 859 | ||
| 860 | if (bitmap != NULL) | |
| 861 | devfs_clone_bitmap_set(bitmap, unit); | |
| 862 | ||
| 13983537 | 863 | return ret_dev; |
| 8be7edad | 864 | } |
| 8be7edad | 865 | |
| 984263bc MD |
866 | static int |
| 867 | vn_modevent(module_t mod, int type, void *data) | |
| 868 | { | |
| 869 | struct vn_softc *vn; | |
| cc80c90e | 870 | static cdev_t dev = NULL; |
| 8be7edad | 871 | int i; |
| 984263bc MD |
872 | |
| 873 | switch (type) { | |
| 874 | case MOD_LOAD: | |
| cc80c90e AH |
875 | dev = make_autoclone_dev(&vn_ops, &DEVFS_CLONE_BITMAP(vn), vnclone, UID_ROOT, |
| 876 | GID_OPERATOR, 0640, "vn"); | |
| 877 | ||
| 8be7edad | 878 | for (i = 0; i < VN_PREALLOCATED_UNITS; i++) { |
| 13983537 | 879 | vn_create(i, &DEVFS_CLONE_BITMAP(vn), 0); |
| 8be7edad | 880 | } |
| 984263bc | 881 | break; |
| 13983537 | 882 | |
| 984263bc | 883 | case MOD_UNLOAD: |
| 984263bc | 884 | case MOD_SHUTDOWN: |
| e2f51e29 | 885 | while ((vn = SLIST_FIRST(&vn_list)) != NULL) { |
| 13983537 AH |
886 | /* |
| 887 | * XXX: no idea if we can return EBUSY even in the | |
| 888 | * shutdown case, so err on the side of caution | |
| 889 | * and just rip stuff out on shutdown. | |
| 890 | */ | |
| 891 | if (type != MOD_SHUTDOWN) { | |
| 892 | if (vn->sc_flags & VNF_OPENED) | |
| 893 | return (EBUSY); | |
| 894 | } | |
| 895 | ||
| 896 | disk_destroy(&vn->sc_disk); | |
| 897 | ||
| 984263bc | 898 | SLIST_REMOVE_HEAD(&vn_list, sc_list); |
| 13983537 | 899 | |
| 984263bc MD |
900 | if (vn->sc_flags & VNF_INITED) |
| 901 | vnclear(vn); | |
| 13983537 | 902 | |
| efda3bd0 | 903 | kfree(vn, M_DEVBUF); |
| 984263bc | 904 | } |
| 13983537 | 905 | destroy_autoclone_dev(dev, &DEVFS_CLONE_BITMAP(vn)); |
| cd29885a | 906 | dev_ops_remove_all(&vn_ops); |
| 984263bc MD |
907 | break; |
| 908 | default: | |
| 909 | break; | |
| 910 | } | |
| 911 | return 0; | |
| 912 | } | |
| 913 | ||
| 914 | DEV_MODULE(vn, vn_modevent, 0); |