sys/kern/subr_disk.c

   1 /*
   2  * Copyright (c) 2003,2004 The DragonFly Project.  All rights reserved.
   3  *
   4  * This code is derived from software contributed to The DragonFly Project
   5  * by Matthew Dillon <dillon@backplane.com>
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  *
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in
  15  *    the documentation and/or other materials provided with the
  16  *    distribution.
  17  * 3. Neither the name of The DragonFly Project nor the names of its
  18  *    contributors may be used to endorse or promote products derived
  19  *    from this software without specific, prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
  25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
  27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  *
  34  * ----------------------------------------------------------------------------
  35  * "THE BEER-WARE LICENSE" (Revision 42):
  36  * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
  37  * can do whatever you want with this stuff. If we meet some day, and you think
  38  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
  39  * ----------------------------------------------------------------------------
  40  *
  41  * Copyright (c) 1982, 1986, 1988, 1993
  42  *      The Regents of the University of California.  All rights reserved.
  43  * (c) UNIX System Laboratories, Inc.
  44  * All or some portions of this file are derived from material licensed
  45  * to the University of California by American Telephone and Telegraph
  46  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  47  * the permission of UNIX System Laboratories, Inc.
  48  *
  49  * Redistribution and use in source and binary forms, with or without
  50  * modification, are permitted provided that the following conditions
  51  * are met:
  52  * 1. Redistributions of source code must retain the above copyright
  53  *    notice, this list of conditions and the following disclaimer.
  54  * 2. Redistributions in binary form must reproduce the above copyright
  55  *    notice, this list of conditions and the following disclaimer in the
  56  *    documentation and/or other materials provided with the distribution.
  57  * 3. All advertising materials mentioning features or use of this software
  58  *    must display the following acknowledgement:
  59  *      This product includes software developed by the University of
  60  *      California, Berkeley and its contributors.
  61  * 4. Neither the name of the University nor the names of its contributors
  62  *    may be used to endorse or promote products derived from this software
  63  *    without specific prior written permission.
  64  *
  65  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  66  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  68  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  69  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  70  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  71  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  72  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  73  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  74  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  75  * SUCH DAMAGE.
  76  *
  77  *      @(#)ufs_disksubr.c      8.5 (Berkeley) 1/21/94
  78  * $FreeBSD: src/sys/kern/subr_disk.c,v 1.20.2.6 2001/10/05 07:14:57 peter Exp $
  79  * $FreeBSD: src/sys/ufs/ufs/ufs_disksubr.c,v 1.44.2.3 2001/03/05 05:42:19 obrien Exp $
  80  * $DragonFly: src/sys/kern/subr_disk.c,v 1.26 2006/09/10 01:26:39 dillon Exp $
  81  */
  82
  83 #include <sys/param.h>
  84 #include <sys/systm.h>
  85 #include <sys/kernel.h>
  86 #include <sys/proc.h>
  87 #include <sys/sysctl.h>
  88 #include <sys/buf.h>
  89 #include <sys/conf.h>
  90 #include <sys/disklabel.h>
  91 #include <sys/diskslice.h>
  92 #include <sys/disk.h>
  93 #include <sys/malloc.h>
  94 #include <sys/sysctl.h>
  95 #include <machine/md_var.h>
  96 #include <sys/ctype.h>
  97 #include <sys/syslog.h>
  98 #include <sys/device.h>
  99 #include <sys/msgport.h>
 100 #include <sys/msgport2.h>
 101 #include <sys/buf2.h>
 102
 103 static MALLOC_DEFINE(M_DISK, "disk", "disk data");
 104
 105 static d_open_t diskopen;
 106 static d_close_t diskclose;
 107 static d_ioctl_t diskioctl;
 108 static d_strategy_t diskstrategy;
 109 static d_psize_t diskpsize;
 110 static d_clone_t diskclone;
 111 static d_dump_t diskdump;
 112
 113 static LIST_HEAD(, disk) disklist = LIST_HEAD_INITIALIZER(&disklist);
 114
 115 static struct dev_ops disk_ops = {
 116         { "disk" },
 117         .d_open = diskopen,
 118         .d_close = diskclose,
 119         .d_read = physread,
 120         .d_write = physwrite,
 121         .d_ioctl = diskioctl,
 122         .d_strategy = diskstrategy,
 123         .d_dump = diskdump,
 124         .d_psize = diskpsize,
 125         .d_clone = diskclone
 126 };
 127
 128 /*
 129  * Create a raw device for the dev_ops template (which is returned).  Also
 130  * create a slice and unit managed disk and overload the user visible
 131  * device space with it.
 132  *
 133  * NOTE: The returned raw device is NOT a slice and unit managed device.
 134  * It is an actual raw device representing the raw disk as specified by
 135  * the passed dev_ops.  The disk layer not only returns such a raw device,
 136  * it also uses it internally when passing (modified) commands through.
 137  */
 138 cdev_t
 139 disk_create(int unit, struct disk *dp, int flags, struct dev_ops *raw_ops)
 140 {
 141         cdev_t rawdev;
 142         struct dev_ops *dev_ops;
 143
 144         /*
 145          * Create the raw backing device
 146          */
 147         compile_dev_ops(raw_ops);
 148         rawdev = make_dev(raw_ops,
 149                             dkmakeminor(unit, WHOLE_DISK_SLICE, RAW_PART),
 150                             UID_ROOT, GID_OPERATOR, 0640,
 151                             "%s%d", raw_ops->head.name, unit);
 152
 153         bzero(dp, sizeof(*dp));
 154
 155         /*
 156          * We install a custom cdevsw rather then the passed cdevsw,
 157          * and save our disk structure in d_data so we can get at it easily
 158          * without any complex cloning code.
 159          */
 160         dev_ops = dev_ops_add_override(rawdev, &disk_ops,
 161                                        dkunitmask(), dkmakeunit(unit));
 162         dev_ops->head.data = dp;
 163
 164         dp->d_rawdev = rawdev;
 165         dp->d_raw_ops = raw_ops;
 166         dp->d_dev_ops = dev_ops;
 167         dp->d_cdev = make_dev(dev_ops,
 168                             dkmakeminor(unit, WHOLE_DISK_SLICE, RAW_PART),
 169                             UID_ROOT, GID_OPERATOR, 0640,
 170                             "%s%d", dev_ops->head.name, unit);
 171
 172         dp->d_dsflags = flags;
 173         LIST_INSERT_HEAD(&disklist, dp, d_list);
 174         return (dp->d_rawdev);
 175 }
 176
 177 /*
 178  * This routine is called when an adapter detaches.  The higher level
 179  * managed disk device is destroyed while the lower level raw device is
 180  * released.
 181  */
 182 void
 183 disk_destroy(struct disk *disk)
 184 {
 185         if (disk->d_dev_ops) {
 186             dev_ops_remove(disk->d_dev_ops, dkunitmask(),
 187                             dkmakeunit(dkunit(disk->d_cdev)));
 188             LIST_REMOVE(disk, d_list);
 189         }
 190         if (disk->d_raw_ops) {
 191             destroy_all_devs(disk->d_raw_ops, dkunitmask(),
 192                             dkmakeunit(dkunit(disk->d_rawdev)));
 193         }
 194         bzero(disk, sizeof(*disk));
 195 }
 196
 197 int
 198 disk_dumpcheck(cdev_t dev, u_int *count, u_int *blkno, u_int *secsize)
 199 {
 200         struct disk *dp;
 201         struct disklabel *dl;
 202         u_int boff;
 203
 204         dp = dev->si_disk;
 205         if (!dp)
 206                 return (ENXIO);
 207         if (!dp->d_slice)
 208                 return (ENXIO);
 209         dl = dsgetlabel(dev, dp->d_slice);
 210         if (!dl)
 211                 return (ENXIO);
 212         *count = Maxmem * (PAGE_SIZE / dl->d_secsize);
 213         if (dumplo <= LABELSECTOR ||
 214             (dumplo + *count > dl->d_partitions[dkpart(dev)].p_size))
 215                 return (EINVAL);
 216         boff = dl->d_partitions[dkpart(dev)].p_offset +
 217             dp->d_slice->dss_slices[dkslice(dev)].ds_offset;
 218         *blkno = boff + dumplo;
 219         *secsize = dl->d_secsize;
 220         return (0);
 221
 222 }
 223
 224 void
 225 disk_invalidate (struct disk *disk)
 226 {
 227         if (disk->d_slice)
 228                 dsgone(&disk->d_slice);
 229 }
 230
 231 struct disk *
 232 disk_enumerate(struct disk *disk)
 233 {
 234         if (!disk)
 235                 return (LIST_FIRST(&disklist));
 236         else
 237                 return (LIST_NEXT(disk, d_list));
 238 }
 239
 240 static
 241 int
 242 sysctl_disks(SYSCTL_HANDLER_ARGS)
 243 {
 244         struct disk *disk;
 245         int error, first;
 246
 247         disk = NULL;
 248         first = 1;
 249
 250         while ((disk = disk_enumerate(disk))) {
 251                 if (!first) {
 252                         error = SYSCTL_OUT(req, " ", 1);
 253                         if (error)
 254                                 return error;
 255                 } else {
 256                         first = 0;
 257                 }
 258                 error = SYSCTL_OUT(req, disk->d_rawdev->si_name,
 259                                    strlen(disk->d_rawdev->si_name));
 260                 if (error)
 261                         return error;
 262         }
 263         error = SYSCTL_OUT(req, "", 1);
 264         return error;
 265 }
 266
 267 SYSCTL_PROC(_kern, OID_AUTO, disks, CTLTYPE_STRING | CTLFLAG_RD, 0, NULL,
 268     sysctl_disks, "A", "names of available disks");
 269
 270 /*
 271  * Open a disk device or partition.
 272  */
 273 static
 274 int
 275 diskopen(struct dev_open_args *ap)
 276 {
 277         cdev_t dev = ap->a_head.a_dev;
 278         struct disk *dp;
 279         int error;
 280
 281         /*
 282          * dp can't be NULL here XXX.
 283          */
 284         dp = dev->si_disk;
 285         if (dp == NULL)
 286                 return (ENXIO);
 287         error = 0;
 288
 289         /*
 290          * Deal with open races
 291          */
 292         while (dp->d_flags & DISKFLAG_LOCK) {
 293                 dp->d_flags |= DISKFLAG_WANTED;
 294                 error = tsleep(dp, PCATCH, "diskopen", hz);
 295                 if (error)
 296                         return (error);
 297         }
 298         dp->d_flags |= DISKFLAG_LOCK;
 299
 300         /*
 301          * Open the underlying raw device.
 302          */
 303         if (!dsisopen(dp->d_slice)) {
 304 #if 0
 305                 if (!pdev->si_iosize_max)
 306                         pdev->si_iosize_max = dev->si_iosize_max;
 307 #endif
 308                 error = dev_dopen(dp->d_rawdev, ap->a_oflags,
 309                                   ap->a_devtype, ap->a_cred);
 310         }
 311
 312         /*
 313          * Inherit properties from the underlying device now that it is
 314          * open.
 315          */
 316         dev_dclone(dev);
 317
 318         if (error)
 319                 goto out;
 320
 321         error = dsopen(dev, ap->a_devtype, dp->d_dsflags,
 322                        &dp->d_slice, &dp->d_label);
 323
 324         if (!dsisopen(dp->d_slice))
 325                 dev_dclose(dp->d_rawdev, ap->a_oflags, ap->a_devtype);
 326 out:
 327         dp->d_flags &= ~DISKFLAG_LOCK;
 328         if (dp->d_flags & DISKFLAG_WANTED) {
 329                 dp->d_flags &= ~DISKFLAG_WANTED;
 330                 wakeup(dp);
 331         }
 332
 333         return(error);
 334 }
 335
 336 /*
 337  * Close a disk device or partition
 338  */
 339 static
 340 int
 341 diskclose(struct dev_close_args *ap)
 342 {
 343         cdev_t dev = ap->a_head.a_dev;
 344         struct disk *dp;
 345         int error;
 346
 347         error = 0;
 348         dp = dev->si_disk;
 349
 350         dsclose(dev, ap->a_devtype, dp->d_slice);
 351         if (!dsisopen(dp->d_slice))
 352                 error = dev_dclose(dp->d_rawdev, ap->a_fflag, ap->a_devtype);
 353         return (error);
 354 }
 355
 356 /*
 357  * First execute the ioctl on the disk device, and if it isn't supported
 358  * try running it on the backing device.
 359  */
 360 static
 361 int
 362 diskioctl(struct dev_ioctl_args *ap)
 363 {
 364         cdev_t dev = ap->a_head.a_dev;
 365         struct disk *dp;
 366         int error;
 367
 368         dp = dev->si_disk;
 369         if (dp == NULL)
 370                 return (ENXIO);
 371         error = dsioctl(dev, ap->a_cmd, ap->a_data, ap->a_fflag, &dp->d_slice);
 372         if (error == ENOIOCTL) {
 373                 error = dev_dioctl(dp->d_rawdev, ap->a_cmd, ap->a_data,
 374                                    ap->a_fflag, ap->a_cred);
 375         }
 376         return (error);
 377 }
 378
 379 /*
 380  * Execute strategy routine
 381  */
 382 static
 383 int
 384 diskstrategy(struct dev_strategy_args *ap)
 385 {
 386         cdev_t dev = ap->a_head.a_dev;
 387         struct bio *bio = ap->a_bio;
 388         struct bio *nbio;
 389         struct disk *dp;
 390
 391         dp = dev->si_disk;
 392
 393         if (dp == NULL) {
 394                 bio->bio_buf->b_error = ENXIO;
 395                 bio->bio_buf->b_flags |= B_ERROR;
 396                 biodone(bio);
 397                 return(0);
 398         }
 399         KKASSERT(dev->si_disk == dp);
 400
 401         /*
 402          * The dscheck() function will also transform the slice relative
 403          * block number i.e. bio->bio_offset into a block number that can be
 404          * passed directly to the underlying raw device.  If dscheck()
 405          * returns NULL it will have handled the bio for us (e.g. EOF
 406          * or error due to being beyond the device size).
 407          */
 408         if ((nbio = dscheck(dev, bio, dp->d_slice)) != NULL)
 409                 dev_dstrategy(dp->d_rawdev, nbio);
 410         else
 411                 biodone(bio);
 412         return(0);
 413 }
 414
 415 /*
 416  * Return the partition size in ?blocks?
 417  */
 418 static
 419 int
 420 diskpsize(struct dev_psize_args *ap)
 421 {
 422         cdev_t dev = ap->a_head.a_dev;
 423         struct disk *dp;
 424
 425         dp = dev->si_disk;
 426         if (dp == NULL)
 427                 return(ENODEV);
 428         ap->a_result = dssize(dev, &dp->d_slice);
 429         return(0);
 430 }
 431
 432 /*
 433  * When new device entries are instantiated, make sure they inherit our
 434  * si_disk structure and block and iosize limits from the raw device.
 435  *
 436  * This routine is always called synchronously in the context of the
 437  * client.
 438  *
 439  * XXX The various io and block size constraints are not always initialized
 440  * properly by devices.
 441  */
 442 static
 443 int
 444 diskclone(struct dev_clone_args *ap)
 445 {
 446         cdev_t dev = ap->a_head.a_dev;
 447         struct disk *dp;
 448
 449         dp = dev->si_ops->head.data;
 450         KKASSERT(dp != NULL);
 451         dev->si_disk = dp;
 452         dev->si_iosize_max = dp->d_rawdev->si_iosize_max;
 453         dev->si_bsize_phys = dp->d_rawdev->si_bsize_phys;
 454         dev->si_bsize_best = dp->d_rawdev->si_bsize_best;
 455         return(0);
 456 }
 457
 458 int
 459 diskdump(struct dev_dump_args *ap)
 460 {
 461         cdev_t dev = ap->a_head.a_dev;
 462         struct disk *dp = dev->si_ops->head.data;
 463         int error;
 464
 465         error = disk_dumpcheck(dev, &ap->a_count, &ap->a_blkno, &ap->a_secsize);
 466         if (error == 0) {
 467                 ap->a_head.a_dev = dp->d_rawdev;
 468                 error = dev_doperate(&ap->a_head);
 469         }
 470
 471         return(error);
 472 }
 473
 474
 475 SYSCTL_INT(_debug_sizeof, OID_AUTO, disklabel, CTLFLAG_RD,
 476     0, sizeof(struct disklabel), "sizeof(struct disklabel)");
 477
 478 SYSCTL_INT(_debug_sizeof, OID_AUTO, diskslices, CTLFLAG_RD,
 479     0, sizeof(struct diskslices), "sizeof(struct diskslices)");
 480
 481 SYSCTL_INT(_debug_sizeof, OID_AUTO, disk, CTLFLAG_RD,
 482     0, sizeof(struct disk), "sizeof(struct disk)");
 483
 484
 485 /*
 486  * Seek sort for disks.
 487  *
 488  * The bio_queue keep two queues, sorted in ascending block order.  The first
 489  * queue holds those requests which are positioned after the current block
 490  * (in the first request); the second, which starts at queue->switch_point,
 491  * holds requests which came in after their block number was passed.  Thus
 492  * we implement a one way scan, retracting after reaching the end of the drive
 493  * to the first request on the second queue, at which time it becomes the
 494  * first queue.
 495  *
 496  * A one-way scan is natural because of the way UNIX read-ahead blocks are
 497  * allocated.
 498  */
 499 void
 500 bioqdisksort(struct bio_queue_head *bioq, struct bio *bio)
 501 {
 502         struct bio *bq;
 503         struct bio *bn;
 504         struct bio *be;
 505
 506         be = TAILQ_LAST(&bioq->queue, bio_queue);
 507         /*
 508          * If the queue is empty or we are an
 509          * ordered transaction, then it's easy.
 510          */
 511         if ((bq = bioq_first(bioq)) == NULL ||
 512             (bio->bio_buf->b_flags & B_ORDERED) != 0) {
 513                 bioq_insert_tail(bioq, bio);
 514                 return;
 515         } else if (bioq->insert_point != NULL) {
 516
 517                 /*
 518                  * A certain portion of the list is
 519                  * "locked" to preserve ordering, so
 520                  * we can only insert after the insert
 521                  * point.
 522                  */
 523                 bq = bioq->insert_point;
 524         } else {
 525
 526                 /*
 527                  * If we lie before the last removed (currently active)
 528                  * request, and are not inserting ourselves into the
 529                  * "locked" portion of the list, then we must add ourselves
 530                  * to the second request list.
 531                  */
 532                 if (bio->bio_offset < bioq->last_offset) {
 533                         bq = bioq->switch_point;
 534                         /*
 535                          * If we are starting a new secondary list,
 536                          * then it's easy.
 537                          */
 538                         if (bq == NULL) {
 539                                 bioq->switch_point = bio;
 540                                 bioq_insert_tail(bioq, bio);
 541                                 return;
 542                         }
 543                         /*
 544                          * If we lie ahead of the current switch point,
 545                          * insert us before the switch point and move
 546                          * the switch point.
 547                          */
 548                         if (bio->bio_offset < bq->bio_offset) {
 549                                 bioq->switch_point = bio;
 550                                 TAILQ_INSERT_BEFORE(bq, bio, bio_act);
 551                                 return;
 552                         }
 553                 } else {
 554                         if (bioq->switch_point != NULL)
 555                                 be = TAILQ_PREV(bioq->switch_point,
 556                                                 bio_queue, bio_act);
 557                         /*
 558                          * If we lie between last_offset and bq,
 559                          * insert before bq.
 560                          */
 561                         if (bio->bio_offset < bq->bio_offset) {
 562                                 TAILQ_INSERT_BEFORE(bq, bio, bio_act);
 563                                 return;
 564                         }
 565                 }
 566         }
 567
 568         /*
 569          * Request is at/after our current position in the list.
 570          * Optimize for sequential I/O by seeing if we go at the tail.
 571          */
 572         if (bio->bio_offset > be->bio_offset) {
 573                 TAILQ_INSERT_AFTER(&bioq->queue, be, bio, bio_act);
 574                 return;
 575         }
 576
 577         /* Otherwise, insertion sort */
 578         while ((bn = TAILQ_NEXT(bq, bio_act)) != NULL) {
 579
 580                 /*
 581                  * We want to go after the current request if it is the end
 582                  * of the first request list, or if the next request is a
 583                  * larger cylinder than our request.
 584                  */
 585                 if (bn == bioq->switch_point
 586                  || bio->bio_offset < bn->bio_offset)
 587                         break;
 588                 bq = bn;
 589         }
 590         TAILQ_INSERT_AFTER(&bioq->queue, bq, bio, bio_act);
 591 }
 592
 593
 594 /*
 595  * Attempt to read a disk label from a device using the indicated strategy
 596  * routine.  The label must be partly set up before this: secpercyl, secsize
 597  * and anything required in the strategy routine (e.g., dummy bounds for the
 598  * partition containing the label) must be filled in before calling us.
 599  * Returns NULL on success and an error string on failure.
 600  */
 601 char *
 602 readdisklabel(cdev_t dev, struct disklabel *lp)
 603 {
 604         struct buf *bp;
 605         struct disklabel *dlp;
 606         char *msg = NULL;
 607
 608         bp = geteblk((int)lp->d_secsize);
 609         bp->b_bio1.bio_offset = (off_t)LABELSECTOR * lp->d_secsize;
 610         bp->b_bcount = lp->d_secsize;
 611         bp->b_flags &= ~B_INVAL;
 612         bp->b_cmd = BUF_CMD_READ;
 613         dev_dstrategy(dev, &bp->b_bio1);
 614         if (biowait(bp))
 615                 msg = "I/O error";
 616         else for (dlp = (struct disklabel *)bp->b_data;
 617             dlp <= (struct disklabel *)((char *)bp->b_data +
 618             lp->d_secsize - sizeof(*dlp));
 619             dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
 620                 if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) {
 621                         if (msg == NULL)
 622                                 msg = "no disk label";
 623                 } else if (dlp->d_npartitions > MAXPARTITIONS ||
 624                            dkcksum(dlp) != 0)
 625                         msg = "disk label corrupted";
 626                 else {
 627                         *lp = *dlp;
 628                         msg = NULL;
 629                         break;
 630                 }
 631         }
 632         bp->b_flags |= B_INVAL | B_AGE;
 633         brelse(bp);
 634         return (msg);
 635 }
 636
 637 /*
 638  * Check new disk label for sensibility before setting it.
 639  */
 640 int
 641 setdisklabel(struct disklabel *olp, struct disklabel *nlp, u_long openmask)
 642 {
 643         int i;
 644         struct partition *opp, *npp;
 645
 646         /*
 647          * Check it is actually a disklabel we are looking at.
 648          */
 649         if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC ||
 650             dkcksum(nlp) != 0)
 651                 return (EINVAL);
 652         /*
 653          * For each partition that we think is open,
 654          */
 655         while ((i = ffs((long)openmask)) != 0) {
 656                 i--;
 657                 /*
 658                  * Check it is not changing....
 659                  */
 660                 openmask &= ~(1 << i);
 661                 if (nlp->d_npartitions <= i)
 662                         return (EBUSY);
 663                 opp = &olp->d_partitions[i];
 664                 npp = &nlp->d_partitions[i];
 665                 if (npp->p_offset != opp->p_offset || npp->p_size < opp->p_size)
 666                         return (EBUSY);
 667                 /*
 668                  * Copy internally-set partition information
 669                  * if new label doesn't include it.             XXX
 670                  * (If we are using it then we had better stay the same type)
 671                  * This is possibly dubious, as someone else noted (XXX)
 672                  */
 673                 if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) {
 674                         npp->p_fstype = opp->p_fstype;
 675                         npp->p_fsize = opp->p_fsize;
 676                         npp->p_frag = opp->p_frag;
 677                         npp->p_cpg = opp->p_cpg;
 678                 }
 679         }
 680         nlp->d_checksum = 0;
 681         nlp->d_checksum = dkcksum(nlp);
 682         *olp = *nlp;
 683         return (0);
 684 }
 685
 686 /*
 687  * Write disk label back to device after modification.
 688  */
 689 int
 690 writedisklabel(cdev_t dev, struct disklabel *lp)
 691 {
 692         struct buf *bp;
 693         struct disklabel *dlp;
 694         int error = 0;
 695
 696         if (lp->d_partitions[RAW_PART].p_offset != 0)
 697                 return (EXDEV);                 /* not quite right */
 698         bp = geteblk((int)lp->d_secsize);
 699         bp->b_bio1.bio_offset = (off_t)LABELSECTOR * lp->d_secsize;
 700         bp->b_bcount = lp->d_secsize;
 701 #if 1
 702         /*
 703          * We read the label first to see if it's there,
 704          * in which case we will put ours at the same offset into the block..
 705          * (I think this is stupid [Julian])
 706          * Note that you can't write a label out over a corrupted label!
 707          * (also stupid.. how do you write the first one? by raw writes?)
 708          */
 709         bp->b_flags &= ~B_INVAL;
 710         bp->b_cmd = BUF_CMD_READ;
 711         dev_dstrategy(dkmodpart(dev, RAW_PART), &bp->b_bio1);
 712         error = biowait(bp);
 713         if (error)
 714                 goto done;
 715         for (dlp = (struct disklabel *)bp->b_data;
 716             dlp <= (struct disklabel *)
 717               ((char *)bp->b_data + lp->d_secsize - sizeof(*dlp));
 718             dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
 719                 if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
 720                     dkcksum(dlp) == 0) {
 721                         *dlp = *lp;
 722                         bp->b_cmd = BUF_CMD_WRITE;
 723                         dev_dstrategy(dkmodpart(dev, RAW_PART), &bp->b_bio1);
 724                         error = biowait(bp);
 725                         goto done;
 726                 }
 727         }
 728         error = ESRCH;
 729 done:
 730 #else
 731         bzero(bp->b_data, lp->d_secsize);
 732         dlp = (struct disklabel *)bp->b_data;
 733         *dlp = *lp;
 734         bp->b_flags &= ~B_INVAL;
 735         bp->b_cmd = BUF_CMD_WRITE;
 736         BUF_STRATEGY(bp, 1);
 737         error = biowait(bp);
 738 #endif
 739         bp->b_flags |= B_INVAL | B_AGE;
 740         brelse(bp);
 741         return (error);
 742 }
 743
 744 /*
 745  * Disk error is the preface to plaintive error messages
 746  * about failing disk transfers.  It prints messages of the form
 747
 748 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d)
 749
 750  * if the offset of the error in the transfer and a disk label
 751  * are both available.  blkdone should be -1 if the position of the error
 752  * is unknown; the disklabel pointer may be null from drivers that have not
 753  * been converted to use them.  The message is printed with printf
 754  * if pri is LOG_PRINTF, otherwise it uses log at the specified priority.
 755  * The message should be completed (with at least a newline) with printf
 756  * or addlog, respectively.  There is no trailing space.
 757  */
 758 void
 759 diskerr(struct bio *bio, cdev_t dev, const char *what, int pri,
 760         int donecnt, struct disklabel *lp)
 761 {
 762         struct buf *bp = bio->bio_buf;
 763         int unit = dkunit(dev);
 764         int slice = dkslice(dev);
 765         int part = dkpart(dev);
 766         char partname[2];
 767         char *sname;
 768
 769         sname = dsname(dev, unit, slice, part, partname);
 770         printf("%s%s: %s %sing ", sname, partname, what,
 771               (bp->b_cmd == BUF_CMD_READ) ? "read" : "writ");
 772         printf("offset %012llx for %d", bio->bio_offset, bp->b_bcount);
 773         if (donecnt)
 774                 printf(" (%d bytes completed)", donecnt);
 775 }
 776