2 * Copyright (c) 1982, 1986, 1988, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94
39 * $FreeBSD: src/sys/ufs/ufs/ufs_disksubr.c,v 1.44.2.3 2001/03/05 05:42:19 obrien Exp $
40 * $DragonFly: src/sys/vfs/ufs/Attic/ufs_disksubr.c,v 1.5 2003/07/26 22:04:27 rob Exp $
43 #include <sys/param.h>
44 #include <sys/systm.h>
48 #include <sys/disklabel.h>
49 #include <sys/diskslice.h>
50 #include <sys/syslog.h>
51 #include <sys/device.h>
56 * Seek sort for disks.
58 * The buf_queue keep two queues, sorted in ascending block order. The first
59 * queue holds those requests which are positioned after the current block
60 * (in the first request); the second, which starts at queue->switch_point,
61 * holds requests which came in after their block number was passed. Thus
62 * we implement a one way scan, retracting after reaching the end of the drive
63 * to the first request on the second queue, at which time it becomes the
66 * A one-way scan is natural because of the way UNIX read-ahead blocks are
71 bufqdisksort(bufq, bp)
72 struct buf_queue_head *bufq;
79 be = TAILQ_LAST(&bufq->queue, buf_queue);
81 * If the queue is empty or we are an
82 * ordered transaction, then it's easy.
84 if ((bq = bufq_first(bufq)) == NULL
85 || (bp->b_flags & B_ORDERED) != 0) {
86 bufq_insert_tail(bufq, bp);
88 } else if (bufq->insert_point != NULL) {
91 * A certain portion of the list is
92 * "locked" to preserve ordering, so
93 * we can only insert after the insert
96 bq = bufq->insert_point;
100 * If we lie before the last removed (currently active)
101 * request, and are not inserting ourselves into the
102 * "locked" portion of the list, then we must add ourselves
103 * to the second request list.
105 if (bp->b_pblkno < bufq->last_pblkno) {
107 bq = bufq->switch_point;
109 * If we are starting a new secondary list,
113 bufq->switch_point = bp;
114 bufq_insert_tail(bufq, bp);
118 * If we lie ahead of the current switch point,
119 * insert us before the switch point and move
122 if (bp->b_pblkno < bq->b_pblkno) {
123 bufq->switch_point = bp;
124 TAILQ_INSERT_BEFORE(bq, bp, b_act);
128 if (bufq->switch_point != NULL)
129 be = TAILQ_PREV(bufq->switch_point,
132 * If we lie between last_pblkno and bq,
135 if (bp->b_pblkno < bq->b_pblkno) {
136 TAILQ_INSERT_BEFORE(bq, bp, b_act);
143 * Request is at/after our current position in the list.
144 * Optimize for sequential I/O by seeing if we go at the tail.
146 if (bp->b_pblkno > be->b_pblkno) {
147 TAILQ_INSERT_AFTER(&bufq->queue, be, bp, b_act);
151 /* Otherwise, insertion sort */
152 while ((bn = TAILQ_NEXT(bq, b_act)) != NULL) {
155 * We want to go after the current request if it is the end
156 * of the first request list, or if the next request is a
157 * larger cylinder than our request.
159 if (bn == bufq->switch_point
160 || bp->b_pblkno < bn->b_pblkno)
164 TAILQ_INSERT_AFTER(&bufq->queue, bq, bp, b_act);
169 * Attempt to read a disk label from a device using the indicated strategy
170 * routine. The label must be partly set up before this: secpercyl, secsize
171 * and anything required in the strategy routine (e.g., dummy bounds for the
172 * partition containing the label) must be filled in before calling us.
173 * Returns NULL on success and an error string on failure.
176 readdisklabel(dev, lp)
178 struct disklabel *lp;
181 struct disklabel *dlp;
184 bp = geteblk((int)lp->d_secsize);
186 bp->b_blkno = LABELSECTOR * ((int)lp->d_secsize/DEV_BSIZE);
187 bp->b_bcount = lp->d_secsize;
188 bp->b_flags &= ~B_INVAL;
189 bp->b_flags |= B_READ;
193 else for (dlp = (struct disklabel *)bp->b_data;
194 dlp <= (struct disklabel *)((char *)bp->b_data +
195 lp->d_secsize - sizeof(*dlp));
196 dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
197 if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) {
199 msg = "no disk label";
200 } else if (dlp->d_npartitions > MAXPARTITIONS ||
202 msg = "disk label corrupted";
209 bp->b_flags |= B_INVAL | B_AGE;
215 * Check new disk label for sensibility before setting it.
218 setdisklabel(olp, nlp, openmask)
219 struct disklabel *olp, *nlp;
223 struct partition *opp, *npp;
226 * Check it is actually a disklabel we are looking at.
228 if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC ||
232 * For each partition that we think is open,
234 while ((i = ffs((long)openmask)) != 0) {
237 * Check it is not changing....
239 openmask &= ~(1 << i);
240 if (nlp->d_npartitions <= i)
242 opp = &olp->d_partitions[i];
243 npp = &nlp->d_partitions[i];
244 if (npp->p_offset != opp->p_offset || npp->p_size < opp->p_size)
247 * Copy internally-set partition information
248 * if new label doesn't include it. XXX
249 * (If we are using it then we had better stay the same type)
250 * This is possibly dubious, as someone else noted (XXX)
252 if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) {
253 npp->p_fstype = opp->p_fstype;
254 npp->p_fsize = opp->p_fsize;
255 npp->p_frag = opp->p_frag;
256 npp->p_cpg = opp->p_cpg;
260 nlp->d_checksum = dkcksum(nlp);
266 * Write disk label back to device after modification.
269 writedisklabel(dev, lp)
271 struct disklabel *lp;
274 struct disklabel *dlp;
277 if (lp->d_partitions[RAW_PART].p_offset != 0)
278 return (EXDEV); /* not quite right */
279 bp = geteblk((int)lp->d_secsize);
280 bp->b_dev = dkmodpart(dev, RAW_PART);
281 bp->b_blkno = LABELSECTOR * ((int)lp->d_secsize/DEV_BSIZE);
282 bp->b_bcount = lp->d_secsize;
285 * We read the label first to see if it's there,
286 * in which case we will put ours at the same offset into the block..
287 * (I think this is stupid [Julian])
288 * Note that you can't write a label out over a corrupted label!
289 * (also stupid.. how do you write the first one? by raw writes?)
291 bp->b_flags &= ~B_INVAL;
292 bp->b_flags |= B_READ;
297 for (dlp = (struct disklabel *)bp->b_data;
298 dlp <= (struct disklabel *)
299 ((char *)bp->b_data + lp->d_secsize - sizeof(*dlp));
300 dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
301 if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
304 bp->b_flags &= ~(B_DONE | B_READ);
305 bp->b_flags |= B_WRITE;
307 alpha_fix_srm_checksum(bp);
317 bzero(bp->b_data, lp->d_secsize);
318 dlp = (struct disklabel *)bp->b_data;
320 bp->b_flags &= ~B_INVAL;
321 bp->b_flags |= B_WRITE;
325 bp->b_flags |= B_INVAL | B_AGE;
331 * Disk error is the preface to plaintive error messages
332 * about failing disk transfers. It prints messages of the form
334 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d)
336 * if the offset of the error in the transfer and a disk label
337 * are both available. blkdone should be -1 if the position of the error
338 * is unknown; the disklabel pointer may be null from drivers that have not
339 * been converted to use them. The message is printed with printf
340 * if pri is LOG_PRINTF, otherwise it uses log at the specified priority.
341 * The message should be completed (with at least a newline) with printf
342 * or addlog, respectively. There is no trailing space.
345 diskerr(bp, what, pri, blkdone, lp)
349 struct disklabel *lp;
351 int unit = dkunit(bp->b_dev);
352 int slice = dkslice(bp->b_dev);
353 int part = dkpart(bp->b_dev);
358 sname = dsname(bp->b_dev, unit, slice, part, partname);
359 printf("%s%s: %s %sing fsbn ", sname, partname, what,
360 bp->b_flags & B_READ ? "read" : "writ");
362 if (bp->b_bcount <= DEV_BSIZE)
363 printf("%ld", (long)sn);
367 printf("%ld of ", (long)sn);
369 printf("%ld-%ld", (long)bp->b_blkno,
370 (long)(bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE));
372 if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) {
374 sn *= DEV_BSIZE / lp->d_secsize; /* XXX */
376 sn += lp->d_partitions[part].p_offset;
378 * XXX should add slice offset and not print the slice,
379 * but we don't know the slice pointer.
380 * XXX should print bp->b_pblkno so that this will work
381 * independent of slices, labels and bad sector remapping,
382 * but some drivers don't set bp->b_pblkno.
384 printf(" (%s bn %ld; cn %ld", sname, (long)sn,
385 (long)(sn / lp->d_secpercyl));
386 sn %= (long)lp->d_secpercyl;
387 printf(" tn %ld sn %ld)", (long)(sn / lp->d_nsectors),
388 (long)(sn % lp->d_nsectors));