2 * Copyright (c) 1982, 1986, 1988, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94
39 * $FreeBSD: src/sys/ufs/ufs/ufs_disksubr.c,v 1.44.2.3 2001/03/05 05:42:19 obrien Exp $
40 * $DragonFly: src/sys/vfs/ufs/Attic/ufs_disksubr.c,v 1.2 2003/06/17 04:28:59 dillon Exp $
43 #include <sys/param.h>
44 #include <sys/systm.h>
47 #include <sys/disklabel.h>
48 #include <sys/diskslice.h>
49 #include <sys/syslog.h>
52 * Seek sort for disks.
54 * The buf_queue keep two queues, sorted in ascending block order. The first
55 * queue holds those requests which are positioned after the current block
56 * (in the first request); the second, which starts at queue->switch_point,
57 * holds requests which came in after their block number was passed. Thus
58 * we implement a one way scan, retracting after reaching the end of the drive
59 * to the first request on the second queue, at which time it becomes the
62 * A one-way scan is natural because of the way UNIX read-ahead blocks are
67 bufqdisksort(bufq, bp)
68 struct buf_queue_head *bufq;
75 be = TAILQ_LAST(&bufq->queue, buf_queue);
77 * If the queue is empty or we are an
78 * ordered transaction, then it's easy.
80 if ((bq = bufq_first(bufq)) == NULL
81 || (bp->b_flags & B_ORDERED) != 0) {
82 bufq_insert_tail(bufq, bp);
84 } else if (bufq->insert_point != NULL) {
87 * A certain portion of the list is
88 * "locked" to preserve ordering, so
89 * we can only insert after the insert
92 bq = bufq->insert_point;
96 * If we lie before the last removed (currently active)
97 * request, and are not inserting ourselves into the
98 * "locked" portion of the list, then we must add ourselves
99 * to the second request list.
101 if (bp->b_pblkno < bufq->last_pblkno) {
103 bq = bufq->switch_point;
105 * If we are starting a new secondary list,
109 bufq->switch_point = bp;
110 bufq_insert_tail(bufq, bp);
114 * If we lie ahead of the current switch point,
115 * insert us before the switch point and move
118 if (bp->b_pblkno < bq->b_pblkno) {
119 bufq->switch_point = bp;
120 TAILQ_INSERT_BEFORE(bq, bp, b_act);
124 if (bufq->switch_point != NULL)
125 be = TAILQ_PREV(bufq->switch_point,
128 * If we lie between last_pblkno and bq,
131 if (bp->b_pblkno < bq->b_pblkno) {
132 TAILQ_INSERT_BEFORE(bq, bp, b_act);
139 * Request is at/after our current position in the list.
140 * Optimize for sequential I/O by seeing if we go at the tail.
142 if (bp->b_pblkno > be->b_pblkno) {
143 TAILQ_INSERT_AFTER(&bufq->queue, be, bp, b_act);
147 /* Otherwise, insertion sort */
148 while ((bn = TAILQ_NEXT(bq, b_act)) != NULL) {
151 * We want to go after the current request if it is the end
152 * of the first request list, or if the next request is a
153 * larger cylinder than our request.
155 if (bn == bufq->switch_point
156 || bp->b_pblkno < bn->b_pblkno)
160 TAILQ_INSERT_AFTER(&bufq->queue, bq, bp, b_act);
165 * Attempt to read a disk label from a device using the indicated strategy
166 * routine. The label must be partly set up before this: secpercyl, secsize
167 * and anything required in the strategy routine (e.g., dummy bounds for the
168 * partition containing the label) must be filled in before calling us.
169 * Returns NULL on success and an error string on failure.
172 readdisklabel(dev, lp)
174 register struct disklabel *lp;
176 register struct buf *bp;
177 struct disklabel *dlp;
180 bp = geteblk((int)lp->d_secsize);
182 bp->b_blkno = LABELSECTOR * ((int)lp->d_secsize/DEV_BSIZE);
183 bp->b_bcount = lp->d_secsize;
184 bp->b_flags &= ~B_INVAL;
185 bp->b_flags |= B_READ;
189 else for (dlp = (struct disklabel *)bp->b_data;
190 dlp <= (struct disklabel *)((char *)bp->b_data +
191 lp->d_secsize - sizeof(*dlp));
192 dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
193 if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) {
195 msg = "no disk label";
196 } else if (dlp->d_npartitions > MAXPARTITIONS ||
198 msg = "disk label corrupted";
205 bp->b_flags |= B_INVAL | B_AGE;
211 * Check new disk label for sensibility before setting it.
214 setdisklabel(olp, nlp, openmask)
215 register struct disklabel *olp, *nlp;
219 register struct partition *opp, *npp;
222 * Check it is actually a disklabel we are looking at.
224 if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC ||
228 * For each partition that we think is open,
230 while ((i = ffs((long)openmask)) != 0) {
233 * Check it is not changing....
235 openmask &= ~(1 << i);
236 if (nlp->d_npartitions <= i)
238 opp = &olp->d_partitions[i];
239 npp = &nlp->d_partitions[i];
240 if (npp->p_offset != opp->p_offset || npp->p_size < opp->p_size)
243 * Copy internally-set partition information
244 * if new label doesn't include it. XXX
245 * (If we are using it then we had better stay the same type)
246 * This is possibly dubious, as someone else noted (XXX)
248 if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) {
249 npp->p_fstype = opp->p_fstype;
250 npp->p_fsize = opp->p_fsize;
251 npp->p_frag = opp->p_frag;
252 npp->p_cpg = opp->p_cpg;
256 nlp->d_checksum = dkcksum(nlp);
262 * Write disk label back to device after modification.
265 writedisklabel(dev, lp)
267 register struct disklabel *lp;
270 struct disklabel *dlp;
273 if (lp->d_partitions[RAW_PART].p_offset != 0)
274 return (EXDEV); /* not quite right */
275 bp = geteblk((int)lp->d_secsize);
276 bp->b_dev = dkmodpart(dev, RAW_PART);
277 bp->b_blkno = LABELSECTOR * ((int)lp->d_secsize/DEV_BSIZE);
278 bp->b_bcount = lp->d_secsize;
281 * We read the label first to see if it's there,
282 * in which case we will put ours at the same offset into the block..
283 * (I think this is stupid [Julian])
284 * Note that you can't write a label out over a corrupted label!
285 * (also stupid.. how do you write the first one? by raw writes?)
287 bp->b_flags &= ~B_INVAL;
288 bp->b_flags |= B_READ;
293 for (dlp = (struct disklabel *)bp->b_data;
294 dlp <= (struct disklabel *)
295 ((char *)bp->b_data + lp->d_secsize - sizeof(*dlp));
296 dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
297 if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
300 bp->b_flags &= ~(B_DONE | B_READ);
301 bp->b_flags |= B_WRITE;
303 alpha_fix_srm_checksum(bp);
313 bzero(bp->b_data, lp->d_secsize);
314 dlp = (struct disklabel *)bp->b_data;
316 bp->b_flags &= ~B_INVAL;
317 bp->b_flags |= B_WRITE;
321 bp->b_flags |= B_INVAL | B_AGE;
327 * Disk error is the preface to plaintive error messages
328 * about failing disk transfers. It prints messages of the form
330 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d)
332 * if the offset of the error in the transfer and a disk label
333 * are both available. blkdone should be -1 if the position of the error
334 * is unknown; the disklabel pointer may be null from drivers that have not
335 * been converted to use them. The message is printed with printf
336 * if pri is LOG_PRINTF, otherwise it uses log at the specified priority.
337 * The message should be completed (with at least a newline) with printf
338 * or addlog, respectively. There is no trailing space.
341 diskerr(bp, what, pri, blkdone, lp)
345 struct disklabel *lp;
347 int unit = dkunit(bp->b_dev);
348 int slice = dkslice(bp->b_dev);
349 int part = dkpart(bp->b_dev);
354 sname = dsname(bp->b_dev, unit, slice, part, partname);
355 printf("%s%s: %s %sing fsbn ", sname, partname, what,
356 bp->b_flags & B_READ ? "read" : "writ");
358 if (bp->b_bcount <= DEV_BSIZE)
359 printf("%ld", (long)sn);
363 printf("%ld of ", (long)sn);
365 printf("%ld-%ld", (long)bp->b_blkno,
366 (long)(bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE));
368 if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) {
370 sn *= DEV_BSIZE / lp->d_secsize; /* XXX */
372 sn += lp->d_partitions[part].p_offset;
374 * XXX should add slice offset and not print the slice,
375 * but we don't know the slice pointer.
376 * XXX should print bp->b_pblkno so that this will work
377 * independent of slices, labels and bad sector remapping,
378 * but some drivers don't set bp->b_pblkno.
380 printf(" (%s bn %ld; cn %ld", sname, (long)sn,
381 (long)(sn / lp->d_secpercyl));
382 sn %= (long)lp->d_secpercyl;
383 printf(" tn %ld sn %ld)", (long)(sn / lp->d_nsectors),
384 (long)(sn % lp->d_nsectors));