Implement non-booting support for the DragonFly 64 bit disklabel:
[dragonfly.git] / sys / kern / subr_diskslice.c
CommitLineData
984263bc
MD
1/*-
2 * Copyright (c) 1994 Bruce D. Evans.
3 * All rights reserved.
4 *
5 * Copyright (c) 1990 The Regents of the University of California.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * William Jolitz.
10 *
11 * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
12 * All rights reserved.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. All advertising materials mentioning features or use of this software
23 * must display the following acknowledgement:
24 * This product includes software developed by the University of
25 * California, Berkeley and its contributors.
26 * 4. Neither the name of the University nor the names of its contributors
27 * may be used to endorse or promote products derived from this software
28 * without specific prior written permission.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40 * SUCH DAMAGE.
41 *
42 * from: @(#)wd.c 7.2 (Berkeley) 5/9/91
43 * from: wd.c,v 1.55 1994/10/22 01:57:12 phk Exp $
44 * from: @(#)ufs_disksubr.c 7.16 (Berkeley) 5/4/91
45 * from: ufs_disksubr.c,v 1.8 1994/06/07 01:21:39 phk Exp $
46 * $FreeBSD: src/sys/kern/subr_diskslice.c,v 1.82.2.6 2001/07/24 09:49:41 dd Exp $
0ffe40b3 47 * $DragonFly: src/sys/kern/subr_diskslice.c,v 1.47 2007/06/19 02:53:56 dillon Exp $
984263bc
MD
48 */
49
50#include <sys/param.h>
51#include <sys/systm.h>
52#include <sys/buf.h>
53#include <sys/conf.h>
54#include <sys/disklabel.h>
2b961883 55#include <sys/disklabel32.h>
0ffe40b3 56#include <sys/disklabel64.h>
984263bc 57#include <sys/diskslice.h>
a688b15c 58#include <sys/disk.h>
dc62b251 59#include <sys/diskmbr.h>
984263bc
MD
60#include <sys/fcntl.h>
61#include <sys/malloc.h>
62#include <sys/stat.h>
63#include <sys/syslog.h>
fef8985e 64#include <sys/proc.h>
984263bc 65#include <sys/vnode.h>
335dda38 66#include <sys/device.h>
e43a034f 67#include <sys/thread2.h>
984263bc 68
50e58362
MD
69#include <vfs/ufs/dinode.h> /* XXX used only for fs.h */
70#include <vfs/ufs/fs.h> /* XXX used only to get BBSIZE/SBSIZE */
984263bc 71
5350e1e9
MD
72static int dsreadandsetlabel(cdev_t dev, u_int flags,
73 struct diskslices *ssp, struct diskslice *sp,
74 struct disk_info *info);
402ed7e1 75static void free_ds_label (struct diskslices *ssp, int slice);
0ffe40b3
MD
76static void set_ds_label (struct diskslices *ssp, int slice, disklabel_t lp,
77 disklabel_ops_t ops);
e1c7bccd 78static void set_ds_wlabel (struct diskslices *ssp, int slice, int wlabel);
984263bc 79
984263bc
MD
80/*
81 * Determine the size of the transfer, and make sure it is
82 * within the boundaries of the partition. Adjust transfer
83 * if needed, and signal errors or early completion.
84 *
85 * XXX TODO:
86 * o Split buffers that are too big for the device.
87 * o Check for overflow.
88 * o Finish cleaning this up.
ea5f2871
HP
89 *
90 * This function returns 1 on success, 0 if transfer equates
91 * to EOF (end of disk) or -1 on failure. The appropriate
92 * 'errno' value is also set in bp->b_error and bp->b_flags
93 * is marked with B_ERROR.
984263bc 94 */
81b5c339 95struct bio *
b13267a5 96dscheck(cdev_t dev, struct bio *bio, struct diskslices *ssp)
984263bc 97{
81b5c339
MD
98 struct buf *bp = bio->bio_buf;
99 struct bio *nbio;
2b961883 100 disklabel_t lp;
0ffe40b3 101 disklabel_ops_t ops;
e1c7bccd 102 long nsec;
e0fc5693
MD
103 u_int64_t secno;
104 u_int64_t endsecno;
e0fc5693 105 u_int64_t slicerel_secno;
984263bc 106 struct diskslice *sp;
5350e1e9
MD
107 u_int32_t part;
108 u_int32_t slice;
54078292
MD
109 int shift;
110 int mask;
984263bc 111
5350e1e9
MD
112 slice = dkslice(dev);
113 part = dkpart(dev);
114
54078292 115 if (bio->bio_offset < 0) {
6ea70f76 116 kprintf("dscheck(%s): negative bio_offset %lld\n",
5350e1e9 117 devtoname(dev), bio->bio_offset);
984263bc
MD
118 goto bad;
119 }
5350e1e9
MD
120 if (slice >= ssp->dss_nslices) {
121 kprintf("dscheck(%s): slice too large %d/%d\n",
122 devtoname(dev), slice, ssp->dss_nslices);
123 goto bad;
124 }
125 sp = &ssp->dss_slices[slice];
54078292 126
5350e1e9
MD
127 /*
128 * Calculate secno and nsec
129 */
984263bc 130 if (ssp->dss_secmult == 1) {
54078292
MD
131 shift = DEV_BSHIFT;
132 goto doshift;
984263bc 133 } else if (ssp->dss_secshift != -1) {
54078292
MD
134 shift = DEV_BSHIFT + ssp->dss_secshift;
135doshift:
136 mask = (1 << shift) - 1;
137 if ((int)bp->b_bcount & mask)
984263bc 138 goto bad_bcount;
54078292 139 if ((int)bio->bio_offset & mask)
984263bc 140 goto bad_blkno;
e0fc5693 141 secno = bio->bio_offset >> shift;
54078292 142 nsec = bp->b_bcount >> shift;
984263bc
MD
143 } else {
144 if (bp->b_bcount % ssp->dss_secsize)
145 goto bad_bcount;
54078292 146 if (bio->bio_offset % ssp->dss_secsize)
984263bc 147 goto bad_blkno;
e0fc5693 148 secno = bio->bio_offset / ssp->dss_secsize;
984263bc
MD
149 nsec = bp->b_bcount / ssp->dss_secsize;
150 }
5350e1e9
MD
151
152 /*
153 * Calculate slice-relative sector number end slice-relative
154 * limit.
155 */
8a88e0d0 156 if (slice == WHOLE_DISK_SLICE) {
5350e1e9
MD
157 /*
158 * Labels have not been allowed on whole-disks for a while.
2b961883 159 * This really puts the nail in the coffin.
8a88e0d0
MD
160 *
161 * Accesses to the WHOLE_DISK_SLICE do not use a disklabel
162 * and partition numbers are special-cased. Currently numbers
163 * less then 128 are not allowed. Partition numbers >= 128
164 * are encoded in the high 8 bits of the 64 bit buffer offset
165 * and are fed directly through to the device with no
166 * further interpretation. In particular, no sector
167 * translation interpretation should occur because the
168 * sector size for the special raw access may not be the
169 * same as the nominal sector size for the device.
170 */
2b961883 171 lp.opaque = NULL;
8a88e0d0
MD
172 if (part < 128) {
173 kprintf("dscheck(%s): illegal partition number (%d) "
174 "for WHOLE_DISK_SLICE access\n",
175 devtoname(dev), part);
176 goto bad;
177 } else if (part != WHOLE_SLICE_PART) {
178 nbio = push_bio(bio);
179 nbio->bio_offset = bio->bio_offset |
180 (u_int64_t)part << 56;
181 return(nbio);
182 }
183
184 /*
1c3c151b
MD
185 * sp->ds_size is for the whole disk in the WHOLE_DISK_SLICE,
186 * there are no reserved areas.
8a88e0d0 187 */
984263bc
MD
188 endsecno = sp->ds_size;
189 slicerel_secno = secno;
5350e1e9 190 } else if (part == WHOLE_SLICE_PART) {
5a8edc7a 191 /*
5a8edc7a 192 * NOTE! opens on a whole-slice partition will not attempt
1c3c151b
MD
193 * to read a disklabel in, so there may not be an in-core
194 * disklabel even if there is one on the disk.
5350e1e9 195 */
5350e1e9
MD
196 endsecno = sp->ds_size;
197 slicerel_secno = secno;
2b961883 198 } else if ((lp = sp->ds_label).opaque != NULL) {
5a8edc7a 199 /*
ba0cc1ab
MD
200 * A label is present, extract the partition. Snooping of
201 * the disklabel is not supported even if accessible. Of
202 * course, the reserved area is still write protected.
5a8edc7a 203 */
0ffe40b3
MD
204 ops = sp->ds_ops;
205 if (ops->op_getpartbounds(ssp, lp, part,
206 &slicerel_secno, &endsecno)) {
ba0cc1ab
MD
207 kprintf("dscheck(%s): partition %d out of bounds\n",
208 devtoname(dev), part);
209 goto bad;
210 }
211 slicerel_secno += secno;
984263bc 212 } else {
8a88e0d0 213 /*
5a8edc7a 214 * Attempt to access partition when no disklabel present
8a88e0d0 215 */
5a8edc7a
MD
216 kprintf("dscheck(%s): attempt to access non-existant partition\n",
217 devtoname(dev));
218 goto bad;
984263bc
MD
219 }
220
5a8edc7a 221 /*
1c3c151b 222 * Disallow writes to reserved areas unless ds_wlabel allows it.
5a8edc7a 223 */
1c3c151b 224 if (slicerel_secno < sp->ds_reserved && nsec &&
10f3fee5 225 bp->b_cmd != BUF_CMD_READ && sp->ds_wlabel == 0) {
984263bc 226 bp->b_error = EROFS;
4414f2c9 227 goto error;
984263bc
MD
228 }
229
8a88e0d0 230 /*
5a8edc7a
MD
231 * If we get here, bio_offset must be on a block boundary and
232 * the sector size must be a power of 2.
8a88e0d0
MD
233 */
234 if ((bio->bio_offset & (ssp->dss_secsize - 1)) ||
235 (ssp->dss_secsize ^ (ssp->dss_secsize - 1)) !=
236 ((ssp->dss_secsize << 1) - 1)) {
237 kprintf("%s: invalid BIO offset, not sector aligned or"
238 " invalid sector size (not power of 2) %08llx %d\n",
239 devtoname(dev), bio->bio_offset, ssp->dss_secsize);
240 goto bad;
984263bc 241 }
984263bc 242
4414f2c9
MD
243 /*
244 * EOF handling
245 */
984263bc 246 if (secno + nsec > endsecno) {
9a71d53f 247 /*
4414f2c9
MD
248 * Return an error if beyond the end of the disk, or
249 * if B_BNOCLIP is set. Tell the system that we do not
250 * need to keep the buffer around.
251 */
252 if (secno > endsecno || (bp->b_flags & B_BNOCLIP))
253 goto bad;
254
255 /*
256 * If exactly at end of disk, return an EOF. Throw away
257 * the buffer contents, if any, by setting B_INVAL.
9a71d53f 258 */
984263bc
MD
259 if (secno == endsecno) {
260 bp->b_resid = bp->b_bcount;
9a71d53f 261 bp->b_flags |= B_INVAL;
4414f2c9 262 goto done;
984263bc 263 }
4414f2c9
MD
264
265 /*
266 * Else truncate
267 */
984263bc 268 nsec = endsecno - secno;
984263bc
MD
269 bp->b_bcount = nsec * ssp->dss_secsize;
270 }
271
81b5c339 272 nbio = push_bio(bio);
4414f2c9
MD
273 nbio->bio_offset = (off_t)(sp->ds_offset + slicerel_secno) *
274 ssp->dss_secsize;
81b5c339 275 return (nbio);
984263bc
MD
276
277bad_bcount:
6ea70f76 278 kprintf(
54078292 279 "dscheck(%s): b_bcount %d is not on a sector boundary (ssize %d)\n",
81b5c339 280 devtoname(dev), bp->b_bcount, ssp->dss_secsize);
984263bc
MD
281 goto bad;
282
283bad_blkno:
6ea70f76 284 kprintf(
54078292
MD
285 "dscheck(%s): bio_offset %lld is not on a sector boundary (ssize %d)\n",
286 devtoname(dev), bio->bio_offset, ssp->dss_secsize);
4414f2c9 287bad:
984263bc 288 bp->b_error = EINVAL;
9a71d53f 289 /* fall through */
4414f2c9 290error:
9a71d53f
MD
291 /*
292 * Terminate the I/O with a ranging error. Since the buffer is
293 * either illegal or beyond the file EOF, mark it B_INVAL as well.
294 */
984263bc 295 bp->b_resid = bp->b_bcount;
9a71d53f 296 bp->b_flags |= B_ERROR | B_INVAL;
4414f2c9
MD
297done:
298 /*
299 * Caller must biodone() the originally passed bio if NULL is
300 * returned.
301 */
81b5c339 302 return (NULL);
984263bc
MD
303}
304
305void
b13267a5 306dsclose(cdev_t dev, int mode, struct diskslices *ssp)
984263bc 307{
5350e1e9
MD
308 u_int32_t part;
309 u_int32_t slice;
984263bc
MD
310 struct diskslice *sp;
311
5350e1e9
MD
312 slice = dkslice(dev);
313 part = dkpart(dev);
314 if (slice < ssp->dss_nslices) {
315 sp = &ssp->dss_slices[slice];
41cf3502 316 dsclrmask(sp, part);
5350e1e9 317 }
984263bc
MD
318}
319
320void
e1c7bccd 321dsgone(struct diskslices **sspp)
984263bc 322{
e1c7bccd 323 int slice;
984263bc
MD
324 struct diskslice *sp;
325 struct diskslices *ssp;
326
327 for (slice = 0, ssp = *sspp; slice < ssp->dss_nslices; slice++) {
328 sp = &ssp->dss_slices[slice];
329 free_ds_label(ssp, slice);
330 }
efda3bd0 331 kfree(ssp, M_DEVBUF);
984263bc
MD
332 *sspp = NULL;
333}
334
335/*
336 * For the "write" commands (DIOCSDINFO and DIOCWDINFO), this
337 * is subject to the same restriction as dsopen().
338 */
339int
84f8b009
MD
340dsioctl(cdev_t dev, u_long cmd, caddr_t data, int flags,
341 struct diskslices **sspp, struct disk_info *info)
984263bc 342{
e1c7bccd 343 int error;
2b961883
MD
344 disklabel_t lp;
345 disklabel_t lptmp;
0ffe40b3 346 disklabel_ops_t ops;
e1c7bccd 347 int old_wlabel;
ae81fc47 348 u_int32_t openmask[DKMAXPARTITIONS/(sizeof(u_int32_t)*8)];
e1c7bccd
MD
349 int part;
350 int slice;
984263bc
MD
351 struct diskslice *sp;
352 struct diskslices *ssp;
984263bc
MD
353
354 slice = dkslice(dev);
5350e1e9 355 part = dkpart(dev);
984263bc 356 ssp = *sspp;
5350e1e9
MD
357 if (slice >= ssp->dss_nslices)
358 return (EINVAL);
984263bc
MD
359 sp = &ssp->dss_slices[slice];
360 lp = sp->ds_label;
0ffe40b3 361 ops = sp->ds_ops; /* may be NULL if no label */
984263bc 362
1c3c151b 363 switch (cmd) {
2b961883 364 case DIOCGDVIRGIN32:
0ffe40b3
MD
365 ops = &disklabel32_ops;
366 /* fall through */
367 case DIOCGDVIRGIN64:
368 if (cmd != DIOCGDVIRGIN32)
369 ops = &disklabel64_ops;
5350e1e9
MD
370 /*
371 * You can only retrieve a virgin disklabel on the whole
372 * disk slice or whole-slice partition.
373 */
374 if (slice != WHOLE_DISK_SLICE &&
375 part != WHOLE_SLICE_PART) {
376 return(EINVAL);
377 }
378
2b961883
MD
379 lp.opaque = data;
380 ops->op_makevirginlabel(lp, ssp, sp, info);
984263bc
MD
381 return (0);
382
2b961883 383 case DIOCGDINFO32:
0ffe40b3 384 case DIOCGDINFO64:
5350e1e9
MD
385 /*
386 * You can only retrieve a disklabel on the whole
387 * slice partition.
388 *
389 * We do not support labels directly on whole-disks
390 * any more (that is, disks without slices), unless the
391 * device driver has asked for a compatible label (e.g.
392 * for a CD) to allow booting off of storage that is
393 * otherwise unlabeled.
394 */
395 error = 0;
396 if (part != WHOLE_SLICE_PART)
397 return(EINVAL);
398 if (slice == WHOLE_DISK_SLICE &&
399 (info->d_dsflags & DSO_COMPATLABEL) == 0) {
400 return (ENODEV);
401 }
2b961883 402 if (sp->ds_label.opaque == NULL) {
5350e1e9
MD
403 error = dsreadandsetlabel(dev, info->d_dsflags,
404 ssp, sp, info);
0ffe40b3 405 ops = sp->ds_ops; /* may be NULL */
5350e1e9 406 }
0ffe40b3
MD
407
408 /*
409 * The type of label we found must match the type of
410 * label requested.
411 */
412 if (error == 0 && IOCPARM_LEN(cmd) != ops->labelsize)
413 error = ENOATTR;
5350e1e9 414 if (error == 0)
2b961883 415 bcopy(sp->ds_label.opaque, data, ops->labelsize);
5350e1e9 416 return (error);
984263bc
MD
417
418 case DIOCGPART:
2ec8fb79
MD
419 {
420 struct partinfo *dpart = (void *)data;
421
5a8edc7a 422 /*
1c3c151b
MD
423 * The disk management layer may not have read the
424 * disklabel yet because simply opening a slice no
425 * longer 'probes' the disk that way. Be sure we
426 * have tried.
5a8edc7a
MD
427 *
428 * We ignore any error.
429 */
2b961883
MD
430 if (sp->ds_label.opaque == NULL &&
431 part == WHOLE_SLICE_PART &&
5a8edc7a
MD
432 slice != WHOLE_DISK_SLICE) {
433 dsreadandsetlabel(dev, info->d_dsflags,
434 ssp, sp, info);
0ffe40b3 435 ops = sp->ds_ops; /* may be NULL */
5a8edc7a
MD
436 }
437
2ec8fb79
MD
438 bzero(dpart, sizeof(*dpart));
439 dpart->media_offset = (u_int64_t)sp->ds_offset *
440 info->d_media_blksize;
441 dpart->media_size = (u_int64_t)sp->ds_size *
442 info->d_media_blksize;
443 dpart->media_blocks = sp->ds_size;
444 dpart->media_blksize = info->d_media_blksize;
1c3c151b 445 dpart->reserved_blocks= sp->ds_reserved;
5350e1e9
MD
446
447 if (slice != WHOLE_DISK_SLICE &&
448 part != WHOLE_SLICE_PART) {
ba0cc1ab
MD
449 u_int64_t start;
450 u_int64_t blocks;
2b961883 451 if (lp.opaque == NULL)
5350e1e9 452 return(EINVAL);
0ffe40b3
MD
453 if (ops->op_getpartbounds(ssp, lp, part,
454 &start, &blocks)) {
ba0cc1ab 455 return(EINVAL);
0ffe40b3 456 }
2b961883 457 dpart->fstype = ops->op_getpartfstype(lp, part);
ba0cc1ab 458 dpart->media_offset += start *
2ec8fb79 459 info->d_media_blksize;
ba0cc1ab 460 dpart->media_size = blocks *
2ec8fb79 461 info->d_media_blksize;
ba0cc1ab 462 dpart->media_blocks = blocks;
154b688d
MD
463
464 /*
465 * partition starting sector (p_offset)
466 * requires slice's reserved areas to be
467 * adjusted.
468 */
ba0cc1ab
MD
469 if (dpart->reserved_blocks > start)
470 dpart->reserved_blocks -= start;
154b688d 471 else
1c3c151b 472 dpart->reserved_blocks = 0;
2ec8fb79 473 }
291fc38f
MD
474
475 /*
476 * Load remaining fields from the info structure
477 */
478 dpart->d_nheads = info->d_nheads;
479 dpart->d_ncylinders = info->d_ncylinders;
480 dpart->d_secpertrack = info->d_secpertrack;
481 dpart->d_secpercyl = info->d_secpercyl;
2ec8fb79 482 }
984263bc
MD
483 return (0);
484
485 case DIOCGSLICEINFO:
486 bcopy(ssp, data, (char *)&ssp->dss_slices[ssp->dss_nslices] -
487 (char *)ssp);
488 return (0);
489
2b961883 490 case DIOCSDINFO32:
0ffe40b3
MD
491 ops = &disklabel32_ops;
492 /* fall through */
493 case DIOCSDINFO64:
494 if (cmd != DIOCSDINFO32)
495 ops = &disklabel64_ops;
5350e1e9
MD
496 /*
497 * You can write a disklabel on the whole disk slice or
498 * whole-slice partition.
499 */
500 if (slice != WHOLE_DISK_SLICE &&
501 part != WHOLE_SLICE_PART) {
502 return(EINVAL);
503 }
504
505 /*
506 * We no longer support writing disklabels directly to media
507 * without there being a slice. Keep this as a separate
508 * conditional.
509 */
984263bc
MD
510 if (slice == WHOLE_DISK_SLICE)
511 return (ENODEV);
512 if (!(flags & FWRITE))
513 return (EBADF);
0ffe40b3
MD
514
515 /*
516 * If an existing label is present it must be the same
517 * type as the label being passed by the ioctl.
518 */
519 if (sp->ds_label.opaque && sp->ds_ops != ops)
520 return (ENOATTR);
521
522 /*
523 * Create a temporary copy of the existing label
524 * (if present) so setdisklabel can compare it against
525 * the new label.
526 */
2b961883
MD
527 lp.opaque = kmalloc(ops->labelsize, M_DEVBUF, M_WAITOK);
528 if (sp->ds_label.opaque == NULL)
529 bzero(lp.opaque, ops->labelsize);
984263bc 530 else
2b961883
MD
531 bcopy(sp->ds_label.opaque, lp.opaque, ops->labelsize);
532 if (sp->ds_label.opaque == NULL) {
41cf3502 533 bzero(openmask, sizeof(openmask));
154b688d 534 } else {
41cf3502 535 bcopy(sp->ds_openmask, openmask, sizeof(openmask));
984263bc 536 }
2b961883 537 lptmp.opaque = data;
0ffe40b3 538 error = ops->op_setdisklabel(lp, lptmp, ssp, sp, openmask);
984263bc 539 if (error != 0) {
2b961883 540 kfree(lp.opaque, M_DEVBUF);
984263bc
MD
541 return (error);
542 }
543 free_ds_label(ssp, slice);
0ffe40b3 544 set_ds_label(ssp, slice, lp, ops);
984263bc
MD
545 return (0);
546
547 case DIOCSYNCSLICEINFO:
5350e1e9
MD
548 /*
549 * This ioctl can only be done on the whole disk
550 */
551 if (slice != WHOLE_DISK_SLICE || part != WHOLE_SLICE_PART)
984263bc 552 return (EINVAL);
5350e1e9 553
84f8b009 554 if (*(int *)data == 0) {
984263bc 555 for (slice = 0; slice < ssp->dss_nslices; slice++) {
41cf3502
MD
556 struct diskslice *ds = &ssp->dss_slices[slice];
557
558 switch(dscountmask(ds)) {
559 case 0:
560 break;
561 case 1:
562 if (slice != WHOLE_DISK_SLICE)
563 return (EBUSY);
564 if (!dschkmask(ds, RAW_PART))
565 return (EBUSY);
566 break;
567 default:
984263bc 568 return (EBUSY);
84f8b009 569 }
984263bc 570 }
84f8b009 571 }
984263bc
MD
572
573 /*
574 * Temporarily forget the current slices struct and read
575 * the current one.
84f8b009
MD
576 *
577 * NOTE:
578 *
984263bc
MD
579 * XXX should wait for current accesses on this disk to
580 * complete, then lock out future accesses and opens.
581 */
582 *sspp = NULL;
84f8b009 583 error = dsopen(dev, S_IFCHR, ssp->dss_oflags, sspp, info);
984263bc 584 if (error != 0) {
984263bc
MD
585 *sspp = ssp;
586 return (error);
587 }
588
589 /*
590 * Reopen everything. This is a no-op except in the "force"
591 * case and when the raw bdev and cdev are both open. Abort
592 * if anything fails.
593 */
594 for (slice = 0; slice < ssp->dss_nslices; slice++) {
41cf3502
MD
595 for (part = 0; part < DKMAXPARTITIONS; ++part) {
596 if (!dschkmask(&ssp->dss_slices[slice], part))
984263bc
MD
597 continue;
598 error = dsopen(dkmodslice(dkmodpart(dev, part),
599 slice),
600 S_IFCHR, ssp->dss_oflags, sspp,
84f8b009 601 info);
984263bc 602 if (error != 0) {
984263bc
MD
603 *sspp = ssp;
604 return (EBUSY);
605 }
606 }
607 }
608
984263bc
MD
609 dsgone(&ssp);
610 return (0);
611
2b961883 612 case DIOCWDINFO32:
0ffe40b3
MD
613 case DIOCWDINFO64:
614 error = dsioctl(dev, ((cmd == DIOCWDINFO32) ?
615 DIOCSDINFO32 : DIOCSDINFO64),
616 data, flags, &ssp, info);
617 if (error == 0 && sp->ds_label.opaque == NULL)
618 error = EINVAL;
984263bc
MD
619 if (error != 0)
620 return (error);
1c3c151b 621
984263bc 622 /*
0ffe40b3
MD
623 * Allow the reserved area to be written, reload ops
624 * because the DIOCSDINFO op above may have installed
625 * a new label type.
984263bc 626 */
0ffe40b3 627 ops = sp->ds_ops;
984263bc
MD
628 old_wlabel = sp->ds_wlabel;
629 set_ds_wlabel(ssp, slice, TRUE);
0ffe40b3 630 error = ops->op_writedisklabel(dev, ssp, sp, sp->ds_label);
984263bc 631 set_ds_wlabel(ssp, slice, old_wlabel);
1c3c151b 632 /* XXX should invalidate in-core label if write failed. */
984263bc
MD
633 return (error);
634
635 case DIOCWLABEL:
984263bc
MD
636 if (slice == WHOLE_DISK_SLICE)
637 return (ENODEV);
984263bc
MD
638 if (!(flags & FWRITE))
639 return (EBADF);
640 set_ds_wlabel(ssp, slice, *(int *)data != 0);
641 return (0);
642
643 default:
644 return (ENOIOCTL);
645 }
646}
647
984263bc 648int
e1c7bccd 649dsisopen(struct diskslices *ssp)
984263bc 650{
e1c7bccd 651 int slice;
984263bc
MD
652
653 if (ssp == NULL)
654 return (0);
e1c7bccd 655 for (slice = 0; slice < ssp->dss_nslices; slice++) {
41cf3502 656 if (dscountmask(&ssp->dss_slices[slice]))
984263bc 657 return (1);
e1c7bccd 658 }
984263bc
MD
659 return (0);
660}
661
662/*
663 * Allocate a slices "struct" and initialize it to contain only an empty
664 * compatibility slice (pointing to itself), a whole disk slice (covering
665 * the disk as described by the label), and (nslices - BASE_SLICES) empty
666 * slices beginning at BASE_SLICE.
1c3c151b
MD
667 *
668 * Note that the compatibility slice is no longer really a compatibility
669 * slice. It is slice 0 if a GPT label is present, and the dangerously
670 * dedicated slice if no slice table otherwise exists. Else it is 0-sized.
984263bc
MD
671 */
672struct diskslices *
84f8b009 673dsmakeslicestruct(int nslices, struct disk_info *info)
984263bc
MD
674{
675 struct diskslice *sp;
676 struct diskslices *ssp;
677
77652cad 678 ssp = kmalloc(offsetof(struct diskslices, dss_slices) +
984263bc
MD
679 nslices * sizeof *sp, M_DEVBUF, M_WAITOK);
680 ssp->dss_first_bsd_slice = COMPATIBILITY_SLICE;
681 ssp->dss_nslices = nslices;
682 ssp->dss_oflags = 0;
8a88e0d0
MD
683
684 /*
685 * Figure out if we can use shifts or whether we have to
686 * use mod/multply to translate byte offsets into sector numbers.
687 */
688 if ((info->d_media_blksize ^ (info->d_media_blksize - 1)) ==
689 (info->d_media_blksize << 1) - 1) {
690 ssp->dss_secmult = info->d_media_blksize / DEV_BSIZE;
691 if (ssp->dss_secmult & (ssp->dss_secmult - 1))
692 ssp->dss_secshift = -1;
693 else
694 ssp->dss_secshift = ffs(ssp->dss_secmult) - 1;
695 } else {
696 ssp->dss_secmult = 0;
984263bc 697 ssp->dss_secshift = -1;
8a88e0d0 698 }
84f8b009 699 ssp->dss_secsize = info->d_media_blksize;
984263bc
MD
700 sp = &ssp->dss_slices[0];
701 bzero(sp, nslices * sizeof *sp);
84f8b009 702 sp[WHOLE_DISK_SLICE].ds_size = info->d_media_blocks;
984263bc
MD
703 return (ssp);
704}
705
706char *
b13267a5 707dsname(cdev_t dev, int unit, int slice, int part, char *partname)
984263bc
MD
708{
709 static char name[32];
710 const char *dname;
5350e1e9 711 int used;
984263bc 712
335dda38 713 dname = dev_dname(dev);
984263bc
MD
714 if (strlen(dname) > 16)
715 dname = "nametoolong";
f8c7a42d 716 ksnprintf(name, sizeof(name), "%s%d", dname, unit);
984263bc 717 partname[0] = '\0';
5350e1e9
MD
718 used = strlen(name);
719
720 if (slice != WHOLE_DISK_SLICE) {
c6f49b01
MD
721 /*
722 * slice or slice + partition. BASE_SLICE is s1, but
723 * the compatibility slice (0) needs to be s0.
724 */
291fc38f 725 used += ksnprintf(name + used, sizeof(name) - used,
c6f49b01 726 "s%d", (slice ? slice - BASE_SLICE + 1 : 0));
5350e1e9
MD
727 if (part != WHOLE_SLICE_PART) {
728 used += ksnprintf(name + used, sizeof(name) - used,
729 "%c", 'a' + part);
730 partname[0] = 'a' + part;
731 partname[1] = 0;
e1c7bccd 732 }
c6f49b01
MD
733 } else if (part == WHOLE_SLICE_PART) {
734 /*
735 * whole-disk-device, raw access to disk
736 */
737 /* no string extension */
8a88e0d0
MD
738 } else if (part > 128) {
739 /*
c6f49b01
MD
740 * whole-disk-device, extended raw access partitions.
741 * (typically used to access CD audio tracks)
8a88e0d0
MD
742 */
743 used += ksnprintf(name + used, sizeof(name) - used,
744 "t%d", part - 128);
c6f49b01
MD
745 } else {
746 /*
747 * whole-disk-device, illegal partition number
748 */
749 used += ksnprintf(name + used, sizeof(name) - used,
750 "?%d", part);
984263bc
MD
751 }
752 return (name);
753}
754
755/*
756 * This should only be called when the unit is inactive and the strategy
757 * routine should not allow it to become active unless we call it. Our
758 * strategy routine must be special to allow activity.
759 */
760int
b13267a5 761dsopen(cdev_t dev, int mode, u_int flags,
84f8b009 762 struct diskslices **sspp, struct disk_info *info)
984263bc 763{
b13267a5 764 cdev_t dev1;
e1c7bccd 765 int error;
ba0cc1ab 766 int need_init;
984263bc
MD
767 struct diskslice *sp;
768 struct diskslices *ssp;
5350e1e9
MD
769 int slice;
770 int part;
984263bc 771
84f8b009 772 dev->si_bsize_phys = info->d_media_blksize;
984263bc 773
77dc851e
MD
774 /*
775 * Do not attempt to read the slice table or disk label when
5a8edc7a 776 * accessing the whole-disk slice or a while-slice partition.
77dc851e 777 */
5350e1e9 778 if (dkslice(dev) == WHOLE_DISK_SLICE)
77dc851e 779 flags |= DSO_ONESLICE | DSO_NOLABELS;
5350e1e9
MD
780 if (dkpart(dev) == WHOLE_SLICE_PART)
781 flags |= DSO_NOLABELS;
77dc851e 782
984263bc 783 /*
5a8edc7a
MD
784 * Reinitialize the slice table unless there is an open device
785 * on the unit.
786 *
787 * It would be nice if we didn't have to do this but when a
788 * user is slicing and partitioning up a disk it is a lot safer
789 * to not take any chances.
984263bc
MD
790 */
791 ssp = *sspp;
792 need_init = !dsisopen(ssp);
793 if (ssp != NULL && need_init)
794 dsgone(sspp);
795 if (need_init) {
796 /*
797 * Allocate a minimal slices "struct". This will become
798 * the final slices "struct" if we don't want real slices
799 * or if we can't find any real slices.
154b688d
MD
800 *
801 * Then scan the disk
984263bc 802 */
84f8b009 803 *sspp = dsmakeslicestruct(BASE_SLICE, info);
984263bc 804
5350e1e9 805 if ((flags & DSO_ONESLICE) == 0) {
84f8b009 806 error = mbrinit(dev, info, sspp);
984263bc
MD
807 if (error != 0) {
808 dsgone(sspp);
809 return (error);
810 }
811 }
812 ssp = *sspp;
813 ssp->dss_oflags = flags;
814
815 /*
816 * If there are no real slices, then make the compatiblity
817 * slice cover the whole disk.
818 */
84f8b009 819 if (ssp->dss_nslices == BASE_SLICE) {
7dc62e37
MD
820 sp = &ssp->dss_slices[COMPATIBILITY_SLICE];
821
822 sp->ds_size = info->d_media_blocks;
1c3c151b 823 sp->ds_reserved = 0;
84f8b009 824 }
984263bc 825
154b688d 826 /*
1c3c151b
MD
827 * Set dss_first_bsd_slice to point at the first BSD
828 * slice, if any.
154b688d 829 */
984263bc
MD
830 for (slice = BASE_SLICE; slice < ssp->dss_nslices; slice++) {
831 sp = &ssp->dss_slices[slice];
832 if (sp->ds_type == DOSPTYP_386BSD /* XXX */) {
1c3c151b 833#if 0
154b688d 834 struct diskslice *csp;
1c3c151b 835#endif
154b688d 836
984263bc 837 ssp->dss_first_bsd_slice = slice;
1c3c151b
MD
838#if 0
839 /*
840 * no longer supported, s0 is a real slice
841 * for GPT
842 */
843 csp = &ssp->dss_slices[COMPATIBILITY_SLICE];
154b688d
MD
844 csp->ds_offset = sp->ds_offset;
845 csp->ds_size = sp->ds_size;
846 csp->ds_type = sp->ds_type;
1c3c151b
MD
847 csp->ds_reserved = sp->ds_reserved;
848#endif
984263bc
MD
849 break;
850 }
851 }
154b688d
MD
852
853 /*
854 * By definition accesses via the whole-disk device do not
855 * specify any reserved areas. The whole disk may be read
856 * or written by the whole-disk device.
857 *
0ffe40b3 858 * The whole-disk slice does not ever have a label.
154b688d 859 */
84f8b009 860 sp = &ssp->dss_slices[WHOLE_DISK_SLICE];
84f8b009 861 sp->ds_wlabel = TRUE;
1c3c151b 862 sp->ds_reserved = 0;
984263bc
MD
863 }
864
865 /*
5a8edc7a
MD
866 * Load the disklabel for the slice being accessed unless it is
867 * a whole-disk-slice or a whole-slice-partition (as determined
868 * by DSO_NOLABELS).
84f8b009 869 *
5a8edc7a
MD
870 * We could scan all slices here and try to load up their
871 * disklabels, but that would cause us to access slices that
872 * the user may otherwise not intend us to access, or corrupted
873 * slices, etc.
874 *
875 * XXX if there are no opens on the slice we may want to re-read
876 * the disklabel anyway, even if we have one cached.
984263bc 877 */
5a8edc7a
MD
878 slice = dkslice(dev);
879 if (slice >= ssp->dss_nslices)
880 return (ENXIO);
881 sp = &ssp->dss_slices[slice];
882 part = dkpart(dev);
5350e1e9 883
2b961883 884 if ((flags & DSO_NOLABELS) == 0 && sp->ds_label.opaque == NULL) {
5350e1e9
MD
885 dev1 = dkmodslice(dkmodpart(dev, WHOLE_SLICE_PART), slice);
886
984263bc 887 /*
5a8edc7a
MD
888 * If opening a raw disk we do not try to
889 * read the disklabel now. No interpretation of raw disks
890 * (e.g. like 'da0') ever occurs. We will try to read the
891 * disklabel for a raw slice if asked to via DIOC* ioctls.
892 *
893 * Access to the label area is disallowed by default. Note
894 * however that accesses via WHOLE_DISK_SLICE, and accesses
895 * via WHOLE_SLICE_PART for slices without valid disklabels,
896 * will allow writes and ignore the flag.
984263bc 897 */
5a8edc7a
MD
898 set_ds_wlabel(ssp, slice, FALSE);
899 dsreadandsetlabel(dev1, flags, ssp, sp, info);
984263bc
MD
900 }
901
5350e1e9
MD
902 /*
903 * If opening a particular partition the disklabel must exist and
904 * the partition must be present in the label.
905 *
906 * If the partition is the special whole-disk-slice no partition
907 * table need exist.
908 */
909 if (part != WHOLE_SLICE_PART && slice != WHOLE_DISK_SLICE) {
2b961883 910 if (sp->ds_label.opaque == NULL ||
0ffe40b3 911 part >= sp->ds_ops->op_getnumparts(sp->ds_label)) {
5350e1e9 912 return (EINVAL);
2b961883 913 }
5350e1e9 914 }
41cf3502 915 dssetmask(sp, part);
5350e1e9 916
8a88e0d0
MD
917 /*
918 * Do not allow special raw-extension partitions to be opened
919 * if the device doesn't support them. Raw-extension partitions
920 * are typically used to handle CD tracks.
5350e1e9 921 */
5a8edc7a
MD
922 if (slice == WHOLE_DISK_SLICE && part >= 128 &&
923 part != WHOLE_SLICE_PART) {
8a88e0d0
MD
924 if ((info->d_dsflags & DSO_RAWEXTENSIONS) == 0)
925 return (EINVAL);
926 }
984263bc
MD
927 return (0);
928}
929
5350e1e9
MD
930/*
931 * Attempt to read the disklabel. If successful, store it in sp->ds_label.
932 *
933 * If we cannot read the disklabel and DSO_COMPATLABEL is set, we construct
934 * a fake label covering the whole disk.
935 */
936static
937int
938dsreadandsetlabel(cdev_t dev, u_int flags,
939 struct diskslices *ssp, struct diskslice *sp,
940 struct disk_info *info)
941{
2b961883 942 disklabel_t lp;
0ffe40b3 943 disklabel_ops_t ops;
5350e1e9
MD
944 const char *msg;
945 const char *sname;
946 char partname[2];
947 int slice = dkslice(dev);
2b961883 948
0ffe40b3
MD
949 /*
950 * Probe the disklabel
951 */
2b961883 952 lp.opaque = NULL;
5350e1e9 953 sname = dsname(dev, dkunit(dev), slice, WHOLE_SLICE_PART, partname);
0ffe40b3 954 ops = &disklabel32_ops;
2b961883 955 msg = ops->op_readdisklabel(dev, sp, &lp, info);
0ffe40b3
MD
956 if (msg && strcmp(msg, "no disk label") == 0) {
957 ops = &disklabel64_ops;
958 msg = disklabel64_ops.op_readdisklabel(dev, sp, &lp, info);
959 }
5350e1e9 960
0ffe40b3
MD
961 /*
962 * If we failed and COMPATLABEL is set, create a dummy disklabel.
963 */
5350e1e9
MD
964 if (msg != NULL && (flags & DSO_COMPATLABEL)) {
965 msg = NULL;
0ffe40b3
MD
966 if (sp->ds_size >= 0x100000000ULL)
967 ops = &disklabel64_ops;
968 else
969 ops = &disklabel32_ops;
2b961883 970 lp = ops->op_clone_label(info, sp);
5350e1e9 971 }
5350e1e9
MD
972 if (msg != NULL) {
973 if (sp->ds_type == DOSPTYP_386BSD /* XXX */)
974 log(LOG_WARNING, "%s: cannot find label (%s)\n",
975 sname, msg);
2b961883
MD
976 if (lp.opaque)
977 kfree(lp.opaque, M_DEVBUF);
5350e1e9 978 } else {
0ffe40b3 979 set_ds_label(ssp, slice, lp, ops);
5350e1e9
MD
980 set_ds_wlabel(ssp, slice, FALSE);
981 }
982 return (msg ? EINVAL : 0);
983}
984
e0fc5693 985int64_t
b13267a5 986dssize(cdev_t dev, struct diskslices **sspp)
984263bc 987{
2b961883 988 disklabel_t lp;
0ffe40b3 989 disklabel_ops_t ops;
e1c7bccd
MD
990 int part;
991 int slice;
984263bc 992 struct diskslices *ssp;
ba0cc1ab
MD
993 u_int64_t start;
994 u_int64_t blocks;
984263bc
MD
995
996 slice = dkslice(dev);
997 part = dkpart(dev);
998 ssp = *sspp;
999 if (ssp == NULL || slice >= ssp->dss_nslices
41cf3502 1000 || !dschkmask(&ssp->dss_slices[slice], part)) {
fef8985e 1001 if (dev_dopen(dev, FREAD, S_IFCHR, proc0.p_ucred) != 0)
984263bc 1002 return (-1);
fef8985e 1003 dev_dclose(dev, FREAD, S_IFCHR);
984263bc
MD
1004 ssp = *sspp;
1005 }
1006 lp = ssp->dss_slices[slice].ds_label;
2b961883 1007 if (lp.opaque == NULL)
984263bc 1008 return (-1);
0ffe40b3
MD
1009 ops = ssp->dss_slices[slice].ds_ops;
1010 if (ops->op_getpartbounds(ssp, lp, part, &start, &blocks))
ba0cc1ab
MD
1011 return (-1);
1012 return ((int64_t)blocks);
984263bc
MD
1013}
1014
1015static void
e1c7bccd 1016free_ds_label(struct diskslices *ssp, int slice)
984263bc 1017{
984263bc 1018 struct diskslice *sp;
2b961883 1019 disklabel_t lp;
984263bc
MD
1020
1021 sp = &ssp->dss_slices[slice];
1022 lp = sp->ds_label;
2b961883
MD
1023 if (lp.opaque != NULL) {
1024 kfree(lp.opaque, M_DEVBUF);
1025 lp.opaque = NULL;
0ffe40b3 1026 set_ds_label(ssp, slice, lp, NULL);
2b961883 1027 }
984263bc
MD
1028}
1029
984263bc 1030static void
0ffe40b3
MD
1031set_ds_label(struct diskslices *ssp, int slice,
1032 disklabel_t lp, disklabel_ops_t ops)
984263bc 1033{
ba0cc1ab 1034 struct diskslice *sp = &ssp->dss_slices[slice];
154b688d 1035
ba0cc1ab 1036 sp->ds_label = lp;
0ffe40b3
MD
1037 sp->ds_ops = ops;
1038 if (lp.opaque && slice != WHOLE_DISK_SLICE)
1039 ops->op_adjust_label_reserved(ssp, slice, sp);
1040 else
1041 sp->ds_reserved = 0;
984263bc
MD
1042}
1043
984263bc 1044static void
e1c7bccd 1045set_ds_wlabel(struct diskslices *ssp, int slice, int wlabel)
984263bc
MD
1046{
1047 ssp->dss_slices[slice].ds_wlabel = wlabel;
984263bc 1048}
ba0cc1ab 1049