2 * Copyright (c) 1982, 1986, 1988, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94
39 * $FreeBSD: src/sys/ufs/ufs/ufs_disksubr.c,v 1.44.2.3 2001/03/05 05:42:19 obrien Exp $
42 #include <sys/param.h>
43 #include <sys/systm.h>
46 #include <sys/disklabel.h>
47 #include <sys/diskslice.h>
48 #include <sys/syslog.h>
51 * Seek sort for disks.
53 * The buf_queue keep two queues, sorted in ascending block order. The first
54 * queue holds those requests which are positioned after the current block
55 * (in the first request); the second, which starts at queue->switch_point,
56 * holds requests which came in after their block number was passed. Thus
57 * we implement a one way scan, retracting after reaching the end of the drive
58 * to the first request on the second queue, at which time it becomes the
61 * A one-way scan is natural because of the way UNIX read-ahead blocks are
66 bufqdisksort(bufq, bp)
67 struct buf_queue_head *bufq;
74 be = TAILQ_LAST(&bufq->queue, buf_queue);
76 * If the queue is empty or we are an
77 * ordered transaction, then it's easy.
79 if ((bq = bufq_first(bufq)) == NULL
80 || (bp->b_flags & B_ORDERED) != 0) {
81 bufq_insert_tail(bufq, bp);
83 } else if (bufq->insert_point != NULL) {
86 * A certain portion of the list is
87 * "locked" to preserve ordering, so
88 * we can only insert after the insert
91 bq = bufq->insert_point;
95 * If we lie before the last removed (currently active)
96 * request, and are not inserting ourselves into the
97 * "locked" portion of the list, then we must add ourselves
98 * to the second request list.
100 if (bp->b_pblkno < bufq->last_pblkno) {
102 bq = bufq->switch_point;
104 * If we are starting a new secondary list,
108 bufq->switch_point = bp;
109 bufq_insert_tail(bufq, bp);
113 * If we lie ahead of the current switch point,
114 * insert us before the switch point and move
117 if (bp->b_pblkno < bq->b_pblkno) {
118 bufq->switch_point = bp;
119 TAILQ_INSERT_BEFORE(bq, bp, b_act);
123 if (bufq->switch_point != NULL)
124 be = TAILQ_PREV(bufq->switch_point,
127 * If we lie between last_pblkno and bq,
130 if (bp->b_pblkno < bq->b_pblkno) {
131 TAILQ_INSERT_BEFORE(bq, bp, b_act);
138 * Request is at/after our current position in the list.
139 * Optimize for sequential I/O by seeing if we go at the tail.
141 if (bp->b_pblkno > be->b_pblkno) {
142 TAILQ_INSERT_AFTER(&bufq->queue, be, bp, b_act);
146 /* Otherwise, insertion sort */
147 while ((bn = TAILQ_NEXT(bq, b_act)) != NULL) {
150 * We want to go after the current request if it is the end
151 * of the first request list, or if the next request is a
152 * larger cylinder than our request.
154 if (bn == bufq->switch_point
155 || bp->b_pblkno < bn->b_pblkno)
159 TAILQ_INSERT_AFTER(&bufq->queue, bq, bp, b_act);
164 * Attempt to read a disk label from a device using the indicated strategy
165 * routine. The label must be partly set up before this: secpercyl, secsize
166 * and anything required in the strategy routine (e.g., dummy bounds for the
167 * partition containing the label) must be filled in before calling us.
168 * Returns NULL on success and an error string on failure.
171 readdisklabel(dev, lp)
173 register struct disklabel *lp;
175 register struct buf *bp;
176 struct disklabel *dlp;
179 bp = geteblk((int)lp->d_secsize);
181 bp->b_blkno = LABELSECTOR * ((int)lp->d_secsize/DEV_BSIZE);
182 bp->b_bcount = lp->d_secsize;
183 bp->b_flags &= ~B_INVAL;
184 bp->b_flags |= B_READ;
188 else for (dlp = (struct disklabel *)bp->b_data;
189 dlp <= (struct disklabel *)((char *)bp->b_data +
190 lp->d_secsize - sizeof(*dlp));
191 dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
192 if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) {
194 msg = "no disk label";
195 } else if (dlp->d_npartitions > MAXPARTITIONS ||
197 msg = "disk label corrupted";
204 bp->b_flags |= B_INVAL | B_AGE;
210 * Check new disk label for sensibility before setting it.
213 setdisklabel(olp, nlp, openmask)
214 register struct disklabel *olp, *nlp;
218 register struct partition *opp, *npp;
221 * Check it is actually a disklabel we are looking at.
223 if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC ||
227 * For each partition that we think is open,
229 while ((i = ffs((long)openmask)) != 0) {
232 * Check it is not changing....
234 openmask &= ~(1 << i);
235 if (nlp->d_npartitions <= i)
237 opp = &olp->d_partitions[i];
238 npp = &nlp->d_partitions[i];
239 if (npp->p_offset != opp->p_offset || npp->p_size < opp->p_size)
242 * Copy internally-set partition information
243 * if new label doesn't include it. XXX
244 * (If we are using it then we had better stay the same type)
245 * This is possibly dubious, as someone else noted (XXX)
247 if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) {
248 npp->p_fstype = opp->p_fstype;
249 npp->p_fsize = opp->p_fsize;
250 npp->p_frag = opp->p_frag;
251 npp->p_cpg = opp->p_cpg;
255 nlp->d_checksum = dkcksum(nlp);
261 * Write disk label back to device after modification.
264 writedisklabel(dev, lp)
266 register struct disklabel *lp;
269 struct disklabel *dlp;
272 if (lp->d_partitions[RAW_PART].p_offset != 0)
273 return (EXDEV); /* not quite right */
274 bp = geteblk((int)lp->d_secsize);
275 bp->b_dev = dkmodpart(dev, RAW_PART);
276 bp->b_blkno = LABELSECTOR * ((int)lp->d_secsize/DEV_BSIZE);
277 bp->b_bcount = lp->d_secsize;
280 * We read the label first to see if it's there,
281 * in which case we will put ours at the same offset into the block..
282 * (I think this is stupid [Julian])
283 * Note that you can't write a label out over a corrupted label!
284 * (also stupid.. how do you write the first one? by raw writes?)
286 bp->b_flags &= ~B_INVAL;
287 bp->b_flags |= B_READ;
292 for (dlp = (struct disklabel *)bp->b_data;
293 dlp <= (struct disklabel *)
294 ((char *)bp->b_data + lp->d_secsize - sizeof(*dlp));
295 dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
296 if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
299 bp->b_flags &= ~(B_DONE | B_READ);
300 bp->b_flags |= B_WRITE;
302 alpha_fix_srm_checksum(bp);
312 bzero(bp->b_data, lp->d_secsize);
313 dlp = (struct disklabel *)bp->b_data;
315 bp->b_flags &= ~B_INVAL;
316 bp->b_flags |= B_WRITE;
320 bp->b_flags |= B_INVAL | B_AGE;
326 * Disk error is the preface to plaintive error messages
327 * about failing disk transfers. It prints messages of the form
329 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d)
331 * if the offset of the error in the transfer and a disk label
332 * are both available. blkdone should be -1 if the position of the error
333 * is unknown; the disklabel pointer may be null from drivers that have not
334 * been converted to use them. The message is printed with printf
335 * if pri is LOG_PRINTF, otherwise it uses log at the specified priority.
336 * The message should be completed (with at least a newline) with printf
337 * or addlog, respectively. There is no trailing space.
340 diskerr(bp, what, pri, blkdone, lp)
344 struct disklabel *lp;
346 int unit = dkunit(bp->b_dev);
347 int slice = dkslice(bp->b_dev);
348 int part = dkpart(bp->b_dev);
353 sname = dsname(bp->b_dev, unit, slice, part, partname);
354 printf("%s%s: %s %sing fsbn ", sname, partname, what,
355 bp->b_flags & B_READ ? "read" : "writ");
357 if (bp->b_bcount <= DEV_BSIZE)
358 printf("%ld", (long)sn);
362 printf("%ld of ", (long)sn);
364 printf("%ld-%ld", (long)bp->b_blkno,
365 (long)(bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE));
367 if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) {
369 sn *= DEV_BSIZE / lp->d_secsize; /* XXX */
371 sn += lp->d_partitions[part].p_offset;
373 * XXX should add slice offset and not print the slice,
374 * but we don't know the slice pointer.
375 * XXX should print bp->b_pblkno so that this will work
376 * independent of slices, labels and bad sector remapping,
377 * but some drivers don't set bp->b_pblkno.
379 printf(" (%s bn %ld; cn %ld", sname, (long)sn,
380 (long)(sn / lp->d_secpercyl));
381 sn %= (long)lp->d_secpercyl;
382 printf(" tn %ld sn %ld)", (long)(sn / lp->d_nsectors),
383 (long)(sn % lp->d_nsectors));