2 * Copyright (c) 1994 Bruce D. Evans.
5 * Copyright (c) 1990 The Regents of the University of California.
8 * This code is derived from software contributed to Berkeley by
11 * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
12 * All rights reserved.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. All advertising materials mentioning features or use of this software
23 * must display the following acknowledgement:
24 * This product includes software developed by the University of
25 * California, Berkeley and its contributors.
26 * 4. Neither the name of the University nor the names of its contributors
27 * may be used to endorse or promote products derived from this software
28 * without specific prior written permission.
30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
42 * from: @(#)wd.c 7.2 (Berkeley) 5/9/91
43 * from: wd.c,v 1.55 1994/10/22 01:57:12 phk Exp $
44 * from: @(#)ufs_disksubr.c 7.16 (Berkeley) 5/4/91
45 * from: ufs_disksubr.c,v 1.8 1994/06/07 01:21:39 phk Exp $
46 * $FreeBSD: src/sys/kern/subr_diskslice.c,v 1.82.2.6 2001/07/24 09:49:41 dd Exp $
47 * $DragonFly: src/sys/kern/subr_diskslice.c,v 1.51 2008/08/29 20:08:36 dillon Exp $
50 #include <sys/param.h>
51 #include <sys/systm.h>
54 #include <sys/disklabel.h>
55 #include <sys/disklabel32.h>
56 #include <sys/disklabel64.h>
57 #include <sys/diskslice.h>
59 #include <sys/diskmbr.h>
60 #include <sys/fcntl.h>
61 #include <sys/malloc.h>
63 #include <sys/syslog.h>
65 #include <sys/vnode.h>
66 #include <sys/device.h>
67 #include <sys/thread2.h>
69 #include <vfs/ufs/dinode.h> /* XXX used only for fs.h */
70 #include <vfs/ufs/fs.h> /* XXX used only to get BBSIZE/SBSIZE */
71 #include <sys/devfs.h>
73 static int dsreadandsetlabel(cdev_t dev, u_int flags,
74 struct diskslices *ssp, struct diskslice *sp,
75 struct disk_info *info);
76 static void free_ds_label (struct diskslices *ssp, int slice);
77 static void set_ds_label (struct diskslices *ssp, int slice, disklabel_t lp,
79 static void set_ds_wlabel (struct diskslices *ssp, int slice, int wlabel);
82 * Determine the size of the transfer, and make sure it is
83 * within the boundaries of the partition. Adjust transfer
84 * if needed, and signal errors or early completion.
87 * o Split buffers that are too big for the device.
88 * o Check for overflow.
89 * o Finish cleaning this up.
91 * This function returns 1 on success, 0 if transfer equates
92 * to EOF (end of disk) or -1 on failure. The appropriate
93 * 'errno' value is also set in bp->b_error and bp->b_flags
94 * is marked with B_ERROR.
97 dscheck(cdev_t dev, struct bio *bio, struct diskslices *ssp)
99 struct buf *bp = bio->bio_buf;
106 u_int64_t slicerel_secno;
107 struct diskslice *sp;
113 slice = dkslice(dev);
116 if (bio->bio_offset < 0) {
117 kprintf("dscheck(%s): negative bio_offset %lld\n",
118 devtoname(dev), (long long)bio->bio_offset);
121 if (slice >= ssp->dss_nslices) {
122 kprintf("dscheck(%s): slice too large %d/%d\n",
123 devtoname(dev), slice, ssp->dss_nslices);
126 sp = &ssp->dss_slices[slice];
128 * Calculate secno and nsec
130 if (ssp->dss_secmult == 1) {
133 } else if (ssp->dss_secshift != -1) {
134 shift = DEV_BSHIFT + ssp->dss_secshift;
136 mask = (1 << shift) - 1;
137 if ((int)bp->b_bcount & mask)
139 if ((int)bio->bio_offset & mask)
141 secno = bio->bio_offset >> shift;
142 nsec = bp->b_bcount >> shift;
144 if (bp->b_bcount % ssp->dss_secsize)
146 if (bio->bio_offset % ssp->dss_secsize)
148 secno = bio->bio_offset / ssp->dss_secsize;
149 nsec = bp->b_bcount / ssp->dss_secsize;
153 * Calculate slice-relative sector number end slice-relative
156 if (slice == WHOLE_DISK_SLICE) {
158 * Labels have not been allowed on whole-disks for a while.
159 * This really puts the nail in the coffin.
161 * Accesses to the WHOLE_DISK_SLICE do not use a disklabel
162 * and partition numbers are special-cased. Currently numbers
163 * less then 128 are not allowed. Partition numbers >= 128
164 * are encoded in the high 8 bits of the 64 bit buffer offset
165 * and are fed directly through to the device with no
166 * further interpretation. In particular, no sector
167 * translation interpretation should occur because the
168 * sector size for the special raw access may not be the
169 * same as the nominal sector size for the device.
173 kprintf("dscheck(%s): illegal partition number (%d) "
174 "for WHOLE_DISK_SLICE access\n",
175 devtoname(dev), part);
177 } else if (part != WHOLE_SLICE_PART) {
178 nbio = push_bio(bio);
179 nbio->bio_offset = bio->bio_offset |
180 (u_int64_t)part << 56;
184 * If writing to the raw disk request a
185 * reprobe on the last close.
187 if (bp->b_cmd == BUF_CMD_WRITE)
188 sp->ds_flags |= DSF_REPROBE;
192 * sp->ds_size is for the whole disk in the WHOLE_DISK_SLICE,
193 * there are no reserved areas.
195 endsecno = sp->ds_size;
196 slicerel_secno = secno;
197 } else if (part == WHOLE_SLICE_PART) {
199 * NOTE! opens on a whole-slice partition will not attempt
200 * to read a disklabel in, so there may not be an in-core
201 * disklabel even if there is one on the disk.
203 endsecno = sp->ds_size;
204 slicerel_secno = secno;
205 } else if ((lp = sp->ds_label).opaque != NULL) {
207 * A label is present, extract the partition. Snooping of
208 * the disklabel is not supported even if accessible. Of
209 * course, the reserved area is still write protected.
212 if (ops->op_getpartbounds(ssp, lp, part,
213 &slicerel_secno, &endsecno)) {
214 kprintf("dscheck(%s): partition %d out of bounds\n",
215 devtoname(dev), part);
218 slicerel_secno += secno;
221 * Attempt to access partition when no disklabel present
223 kprintf("dscheck(%s): attempt to access non-existent partition\n",
229 * Disallow writes to reserved areas unless ds_wlabel allows it.
230 * If the reserved area is written to request a reprobe of the
231 * disklabel when the slice is closed.
233 if (slicerel_secno < sp->ds_reserved && nsec &&
234 bp->b_cmd == BUF_CMD_WRITE) {
235 if (sp->ds_wlabel == 0) {
239 sp->ds_flags |= DSF_REPROBE;
243 * If we get here, bio_offset must be on a block boundary and
244 * the sector size must be a power of 2.
246 if ((bio->bio_offset & (ssp->dss_secsize - 1)) ||
247 (ssp->dss_secsize ^ (ssp->dss_secsize - 1)) !=
248 ((ssp->dss_secsize << 1) - 1)) {
249 kprintf("%s: invalid BIO offset, not sector aligned or"
250 " invalid sector size (not power of 2) %08llx %d\n",
251 devtoname(dev), (long long)bio->bio_offset,
259 if (secno + nsec > endsecno) {
261 * Return an error if beyond the end of the disk, or
262 * if B_BNOCLIP is set. Tell the system that we do not
263 * need to keep the buffer around.
265 if (secno > endsecno || (bp->b_flags & B_BNOCLIP))
269 * If exactly at end of disk, return an EOF. Throw away
270 * the buffer contents, if any, by setting B_INVAL.
272 if (secno == endsecno) {
273 bp->b_resid = bp->b_bcount;
274 bp->b_flags |= B_INVAL;
281 nsec = endsecno - secno;
282 bp->b_bcount = nsec * ssp->dss_secsize;
285 nbio = push_bio(bio);
286 nbio->bio_offset = (off_t)(sp->ds_offset + slicerel_secno) *
292 "dscheck(%s): b_bcount %d is not on a sector boundary (ssize %d)\n",
293 devtoname(dev), bp->b_bcount, ssp->dss_secsize);
298 "dscheck(%s): bio_offset %lld is not on a sector boundary (ssize %d)\n",
299 devtoname(dev), (long long)bio->bio_offset, ssp->dss_secsize);
301 bp->b_error = EINVAL;
305 * Terminate the I/O with a ranging error. Since the buffer is
306 * either illegal or beyond the file EOF, mark it B_INVAL as well.
308 bp->b_resid = bp->b_bcount;
309 bp->b_flags |= B_ERROR | B_INVAL;
312 * Caller must biodone() the originally passed bio if NULL is
319 * dsclose() - close a cooked disk slice.
321 * WARNING! The passed diskslices and related diskslice structures may
322 * be invalidated or replaced by this function, callers must
323 * reload from the disk structure for continued access.
326 dsclose(cdev_t dev, int mode, struct diskslices *ssp)
330 struct diskslice *sp;
332 slice = dkslice(dev);
334 if (slice < ssp->dss_nslices) {
335 sp = &ssp->dss_slices[slice];
337 if (sp->ds_flags & DSF_REPROBE) {
338 sp->ds_flags &= ~DSF_REPROBE;
339 if (slice == WHOLE_DISK_SLICE) {
340 disk_msg_send_sync(DISK_DISK_REPROBE,
344 disk_msg_send_sync(DISK_SLICE_REPROBE,
348 /* ssp and sp may both be invalid after reprobe */
354 dsgone(struct diskslices **sspp)
357 struct diskslice *sp;
358 struct diskslices *ssp;
360 if ((ssp = *sspp) != NULL) {
361 for (slice = 0; slice < ssp->dss_nslices; slice++) {
362 sp = &ssp->dss_slices[slice];
363 free_ds_label(ssp, slice);
365 kfree(ssp, M_DEVBUF);
371 * For the "write" commands (DIOCSDINFO and DIOCWDINFO), this
372 * is subject to the same restriction as dsopen().
375 dsioctl(cdev_t dev, u_long cmd, caddr_t data, int flags,
376 struct diskslices **sspp, struct disk_info *info)
383 u_int32_t openmask[DKMAXPARTITIONS/(sizeof(u_int32_t)*8)];
386 struct diskslice *sp;
387 struct diskslices *ssp;
389 slice = dkslice(dev);
394 if (slice >= ssp->dss_nslices)
396 sp = &ssp->dss_slices[slice];
398 ops = sp->ds_ops; /* may be NULL if no label */
402 ops = &disklabel32_ops;
405 if (cmd != DIOCGDVIRGIN32)
406 ops = &disklabel64_ops;
408 * You can only retrieve a virgin disklabel on the whole
409 * disk slice or whole-slice partition.
411 if (slice != WHOLE_DISK_SLICE &&
412 part != WHOLE_SLICE_PART) {
417 ops->op_makevirginlabel(lp, ssp, sp, info);
423 * You can only retrieve a disklabel on the whole
426 * We do not support labels directly on whole-disks
427 * any more (that is, disks without slices), unless the
428 * device driver has asked for a compatible label (e.g.
429 * for a CD) to allow booting off of storage that is
430 * otherwise unlabeled.
433 if (part != WHOLE_SLICE_PART)
435 if (slice == WHOLE_DISK_SLICE &&
436 (info->d_dsflags & DSO_COMPATLABEL) == 0) {
439 if (sp->ds_label.opaque == NULL) {
440 error = dsreadandsetlabel(dev, info->d_dsflags,
442 ops = sp->ds_ops; /* may be NULL */
446 * The type of label we found must match the type of
449 if (error == 0 && IOCPARM_LEN(cmd) != ops->labelsize)
452 bcopy(sp->ds_label.opaque, data, ops->labelsize);
457 struct partinfo *dpart = (void *)data;
460 * The disk management layer may not have read the
461 * disklabel yet because simply opening a slice no
462 * longer 'probes' the disk that way. Be sure we
465 * We ignore any error.
467 if (sp->ds_label.opaque == NULL &&
468 part == WHOLE_SLICE_PART &&
469 slice != WHOLE_DISK_SLICE) {
470 dsreadandsetlabel(dev, info->d_dsflags,
472 ops = sp->ds_ops; /* may be NULL */
475 bzero(dpart, sizeof(*dpart));
476 dpart->media_offset = (u_int64_t)sp->ds_offset *
477 info->d_media_blksize;
478 dpart->media_size = (u_int64_t)sp->ds_size *
479 info->d_media_blksize;
480 dpart->media_blocks = sp->ds_size;
481 dpart->media_blksize = info->d_media_blksize;
482 dpart->reserved_blocks= sp->ds_reserved;
483 dpart->fstype_uuid = sp->ds_type_uuid;
484 dpart->storage_uuid = sp->ds_stor_uuid;
486 if (slice != WHOLE_DISK_SLICE &&
487 part != WHOLE_SLICE_PART) {
490 if (lp.opaque == NULL)
492 if (ops->op_getpartbounds(ssp, lp, part,
496 ops->op_loadpartinfo(lp, part, dpart);
497 dpart->media_offset += start *
498 info->d_media_blksize;
499 dpart->media_size = blocks *
500 info->d_media_blksize;
501 dpart->media_blocks = blocks;
504 * partition starting sector (p_offset)
505 * requires slice's reserved areas to be
508 if (dpart->reserved_blocks > start)
509 dpart->reserved_blocks -= start;
511 dpart->reserved_blocks = 0;
515 * Load remaining fields from the info structure
517 dpart->d_nheads = info->d_nheads;
518 dpart->d_ncylinders = info->d_ncylinders;
519 dpart->d_secpertrack = info->d_secpertrack;
520 dpart->d_secpercyl = info->d_secpercyl;
525 bcopy(ssp, data, (char *)&ssp->dss_slices[ssp->dss_nslices] -
530 ops = &disklabel32_ops;
533 if (cmd != DIOCSDINFO32)
534 ops = &disklabel64_ops;
536 * You can write a disklabel on the whole disk slice or
537 * whole-slice partition.
539 if (slice != WHOLE_DISK_SLICE &&
540 part != WHOLE_SLICE_PART) {
545 * We no longer support writing disklabels directly to media
546 * without there being a slice. Keep this as a separate
549 if (slice == WHOLE_DISK_SLICE)
551 if (!(flags & FWRITE))
555 * If an existing label is present it must be the same
556 * type as the label being passed by the ioctl.
558 if (sp->ds_label.opaque && sp->ds_ops != ops)
562 * Create a temporary copy of the existing label
563 * (if present) so setdisklabel can compare it against
566 lp.opaque = kmalloc(ops->labelsize, M_DEVBUF, M_WAITOK);
567 if (sp->ds_label.opaque == NULL)
568 bzero(lp.opaque, ops->labelsize);
570 bcopy(sp->ds_label.opaque, lp.opaque, ops->labelsize);
571 if (sp->ds_label.opaque == NULL) {
572 bzero(openmask, sizeof(openmask));
574 bcopy(sp->ds_openmask, openmask, sizeof(openmask));
577 error = ops->op_setdisklabel(lp, lptmp, ssp, sp, openmask);
578 disk_msg_send_sync(DISK_SLICE_REPROBE, dev->si_disk, sp);
581 kfree(lp.opaque, M_DEVBUF);
584 free_ds_label(ssp, slice);
585 set_ds_label(ssp, slice, lp, ops);
588 case DIOCSYNCSLICEINFO:
590 * This ioctl can only be done on the whole disk
592 if (slice != WHOLE_DISK_SLICE || part != WHOLE_SLICE_PART)
595 if (*(int *)data == 0) {
596 for (slice = 0; slice < ssp->dss_nslices; slice++) {
597 struct diskslice *ds = &ssp->dss_slices[slice];
599 switch(dscountmask(ds)) {
603 if (slice != WHOLE_DISK_SLICE)
605 if (!dschkmask(ds, RAW_PART))
614 disk_msg_send_sync(DISK_DISK_REPROBE, dev->si_disk, NULL);
620 error = dsioctl(dev, ((cmd == DIOCWDINFO32) ?
621 DIOCSDINFO32 : DIOCSDINFO64),
622 data, flags, &ssp, info);
623 if (error == 0 && sp->ds_label.opaque == NULL)
625 if (part != WHOLE_SLICE_PART)
631 * Allow the reserved area to be written, reload ops
632 * because the DIOCSDINFO op above may have installed
636 old_wlabel = sp->ds_wlabel;
637 set_ds_wlabel(ssp, slice, TRUE);
638 error = ops->op_writedisklabel(dev, ssp, sp, sp->ds_label);
639 disk_msg_send_sync(DISK_SLICE_REPROBE, dev->si_disk, sp);
641 set_ds_wlabel(ssp, slice, old_wlabel);
642 /* XXX should invalidate in-core label if write failed. */
646 if (slice == WHOLE_DISK_SLICE)
648 if (!(flags & FWRITE))
650 set_ds_wlabel(ssp, slice, *(int *)data != 0);
659 dsisopen(struct diskslices *ssp)
665 for (slice = 0; slice < ssp->dss_nslices; slice++) {
666 if (dscountmask(&ssp->dss_slices[slice]))
673 * Allocate a slices "struct" and initialize it to contain only an empty
674 * compatibility slice (pointing to itself), a whole disk slice (covering
675 * the disk as described by the label), and (nslices - BASE_SLICES) empty
676 * slices beginning at BASE_SLICE.
678 * Note that the compatibility slice is no longer really a compatibility
679 * slice. It is slice 0 if a GPT label is present, and the dangerously
680 * dedicated slice if no slice table otherwise exists. Else it is 0-sized.
683 dsmakeslicestruct(int nslices, struct disk_info *info)
685 struct diskslice *sp;
686 struct diskslices *ssp;
688 ssp = kmalloc(offsetof(struct diskslices, dss_slices) +
689 nslices * sizeof *sp, M_DEVBUF, M_WAITOK);
690 ssp->dss_first_bsd_slice = COMPATIBILITY_SLICE;
691 ssp->dss_nslices = nslices;
695 * Figure out if we can use shifts or whether we have to
696 * use mod/multply to translate byte offsets into sector numbers.
698 if ((info->d_media_blksize ^ (info->d_media_blksize - 1)) ==
699 (info->d_media_blksize << 1) - 1) {
700 ssp->dss_secmult = info->d_media_blksize / DEV_BSIZE;
701 if (ssp->dss_secmult & (ssp->dss_secmult - 1))
702 ssp->dss_secshift = -1;
704 ssp->dss_secshift = ffs(ssp->dss_secmult) - 1;
706 ssp->dss_secmult = 0;
707 ssp->dss_secshift = -1;
709 ssp->dss_secsize = info->d_media_blksize;
710 sp = &ssp->dss_slices[0];
711 bzero(sp, nslices * sizeof *sp);
712 sp[WHOLE_DISK_SLICE].ds_size = info->d_media_blocks;
717 dsname(cdev_t dev, int unit, int slice, int part, char *partname)
723 * This should only be called when the unit is inactive and the strategy
724 * routine should not allow it to become active unless we call it. Our
725 * strategy routine must be special to allow activity.
728 dsopen(cdev_t dev, int mode, u_int flags,
729 struct diskslices **sspp, struct disk_info *info)
731 struct diskslice *sp;
732 struct diskslices *ssp;
737 dev->si_bsize_phys = info->d_media_blksize;
738 slice = dkslice(dev);
740 sp = &ssp->dss_slices[slice];
747 * Attempt to read the disklabel. If successful, store it in sp->ds_label.
749 * If we cannot read the disklabel and DSO_COMPATLABEL is set, we construct
750 * a fake label covering the whole disk.
754 dsreadandsetlabel(cdev_t dev, u_int flags,
755 struct diskslices *ssp, struct diskslice *sp,
756 struct disk_info *info)
763 int slice = dkslice(dev);
766 * Probe the disklabel
769 sname = dsname(dev, dkunit(dev), slice, WHOLE_SLICE_PART, partname);
770 ops = &disklabel32_ops;
771 msg = ops->op_readdisklabel(dev, sp, &lp, info);
772 if (msg && strcmp(msg, "no disk label") == 0) {
773 ops = &disklabel64_ops;
774 msg = disklabel64_ops.op_readdisklabel(dev, sp, &lp, info);
778 * If we failed and COMPATLABEL is set, create a dummy disklabel.
780 if (msg != NULL && (flags & DSO_COMPATLABEL)) {
782 if (sp->ds_size >= 0x100000000ULL)
783 ops = &disklabel64_ops;
785 ops = &disklabel32_ops;
786 lp = ops->op_clone_label(info, sp);
789 if (sp->ds_type == DOSPTYP_386BSD /* XXX */)
790 log(LOG_WARNING, "%s: cannot find label (%s)\n",
793 kfree(lp.opaque, M_DEVBUF);
795 set_ds_label(ssp, slice, lp, ops);
796 set_ds_wlabel(ssp, slice, FALSE);
798 return (msg ? EINVAL : 0);
802 dssize(cdev_t dev, struct diskslices **sspp)
808 struct diskslices *ssp;
812 slice = dkslice(dev);
815 if (ssp == NULL || slice >= ssp->dss_nslices
816 || !dschkmask(&ssp->dss_slices[slice], part)) {
817 if (dev_dopen(dev, FREAD, S_IFCHR, proc0.p_ucred) != 0)
819 dev_dclose(dev, FREAD, S_IFCHR);
822 lp = ssp->dss_slices[slice].ds_label;
823 if (lp.opaque == NULL)
825 ops = ssp->dss_slices[slice].ds_ops;
826 if (ops->op_getpartbounds(ssp, lp, part, &start, &blocks))
828 return ((int64_t)blocks);
832 free_ds_label(struct diskslices *ssp, int slice)
834 struct diskslice *sp;
837 sp = &ssp->dss_slices[slice];
839 if (lp.opaque != NULL) {
840 kfree(lp.opaque, M_DEVBUF);
842 set_ds_label(ssp, slice, lp, NULL);
847 set_ds_label(struct diskslices *ssp, int slice,
848 disklabel_t lp, disklabel_ops_t ops)
850 struct diskslice *sp = &ssp->dss_slices[slice];
854 if (lp.opaque && slice != WHOLE_DISK_SLICE)
855 ops->op_adjust_label_reserved(ssp, slice, sp);
861 set_ds_wlabel(struct diskslices *ssp, int slice, int wlabel)
863 ssp->dss_slices[slice].ds_wlabel = wlabel;