kernel - lwkt_token revamp
[dragonfly.git] / sys / kern / subr_disk.c
CommitLineData
984263bc 1/*
149e86b9 2 * Copyright (c) 2003,2004,2009 The DragonFly Project. All rights reserved.
d7d5e114 3 *
8c10bfcf
MD
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
d7d5e114 6 *
8c10bfcf
MD
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
d7d5e114 10 *
8c10bfcf
MD
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
d7d5e114 20 *
8c10bfcf
MD
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
d7d5e114 33 *
984263bc
MD
34 * ----------------------------------------------------------------------------
35 * "THE BEER-WARE LICENSE" (Revision 42):
36 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you
37 * can do whatever you want with this stuff. If we meet some day, and you think
38 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
39 * ----------------------------------------------------------------------------
40 *
7a9e53ad
MD
41 * Copyright (c) 1982, 1986, 1988, 1993
42 * The Regents of the University of California. All rights reserved.
43 * (c) UNIX System Laboratories, Inc.
44 * All or some portions of this file are derived from material licensed
45 * to the University of California by American Telephone and Telegraph
46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
47 * the permission of UNIX System Laboratories, Inc.
48 *
49 * Redistribution and use in source and binary forms, with or without
50 * modification, are permitted provided that the following conditions
51 * are met:
52 * 1. Redistributions of source code must retain the above copyright
53 * notice, this list of conditions and the following disclaimer.
54 * 2. Redistributions in binary form must reproduce the above copyright
55 * notice, this list of conditions and the following disclaimer in the
56 * documentation and/or other materials provided with the distribution.
57 * 3. All advertising materials mentioning features or use of this software
58 * must display the following acknowledgement:
59 * This product includes software developed by the University of
60 * California, Berkeley and its contributors.
61 * 4. Neither the name of the University nor the names of its contributors
62 * may be used to endorse or promote products derived from this software
63 * without specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
75 * SUCH DAMAGE.
984263bc 76 *
7a9e53ad
MD
77 * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94
78 * $FreeBSD: src/sys/kern/subr_disk.c,v 1.20.2.6 2001/10/05 07:14:57 peter Exp $
79 * $FreeBSD: src/sys/ufs/ufs/ufs_disksubr.c,v 1.44.2.3 2001/03/05 05:42:19 obrien Exp $
3641b7ca 80 * $DragonFly: src/sys/kern/subr_disk.c,v 1.40 2008/06/05 18:06:32 swildner Exp $
984263bc
MD
81 */
82
83#include <sys/param.h>
84#include <sys/systm.h>
85#include <sys/kernel.h>
7a9e53ad 86#include <sys/proc.h>
984263bc
MD
87#include <sys/sysctl.h>
88#include <sys/buf.h>
89#include <sys/conf.h>
cd29885a
MD
90#include <sys/disklabel.h>
91#include <sys/disklabel32.h>
92#include <sys/disklabel64.h>
7a9e53ad 93#include <sys/diskslice.h>
cd29885a 94#include <sys/diskmbr.h>
984263bc 95#include <sys/disk.h>
b24cd69c 96#include <sys/kerneldump.h>
984263bc
MD
97#include <sys/malloc.h>
98#include <sys/sysctl.h>
99#include <machine/md_var.h>
100#include <sys/ctype.h>
7a9e53ad
MD
101#include <sys/syslog.h>
102#include <sys/device.h>
335dda38
MD
103#include <sys/msgport.h>
104#include <sys/msgport2.h>
7a9e53ad 105#include <sys/buf2.h>
2c1e28dd 106#include <sys/devfs.h>
be755ff9 107#include <sys/thread.h>
cd29885a 108#include <sys/thread2.h>
8c72e3d5 109#include <sys/dsched.h>
cd29885a
MD
110#include <sys/queue.h>
111#include <sys/lock.h>
984263bc
MD
112
113static MALLOC_DEFINE(M_DISK, "disk", "disk data");
8c05caab 114static int disk_debug_enable = 0;
984263bc 115
cd29885a
MD
116static void disk_msg_autofree_reply(lwkt_port_t, lwkt_msg_t);
117static void disk_msg_core(void *);
aec8eea4
MD
118static int disk_probe_slice(struct disk *dp, cdev_t dev, int slice, int reprobe);
119static void disk_probe(struct disk *dp, int reprobe);
120static void _setdiskinfo(struct disk *disk, struct disk_info *info);
30e5862e 121static void bioqwritereorder(struct bio_queue_head *bioq);
fbbbca99 122static void disk_cleanserial(char *serno);
cd29885a 123
984263bc 124static d_open_t diskopen;
d7d5e114 125static d_close_t diskclose;
984263bc 126static d_ioctl_t diskioctl;
fef8985e 127static d_strategy_t diskstrategy;
984263bc 128static d_psize_t diskpsize;
e4c9c0c8 129static d_clone_t diskclone;
fef8985e 130static d_dump_t diskdump;
984263bc
MD
131
132static LIST_HEAD(, disk) disklist = LIST_HEAD_INITIALIZER(&disklist);
be755ff9 133static struct lwkt_token disklist_token;
984263bc 134
fef8985e 135static struct dev_ops disk_ops = {
daf0c2f6 136 { "disk", 0, D_DISK },
fef8985e
MD
137 .d_open = diskopen,
138 .d_close = diskclose,
139 .d_read = physread,
140 .d_write = physwrite,
141 .d_ioctl = diskioctl,
142 .d_strategy = diskstrategy,
143 .d_dump = diskdump,
144 .d_psize = diskpsize,
145 .d_clone = diskclone
146};
147
cd29885a
MD
148static struct objcache *disk_msg_cache;
149
150struct objcache_malloc_args disk_msg_malloc_args = {
151 sizeof(struct disk_msg), M_DISK };
152
153static struct lwkt_port disk_dispose_port;
154static struct lwkt_port disk_msg_port;
155
8c05caab
AH
156static int
157disk_debug(int level, char *fmt, ...)
158{
159 __va_list ap;
160
161 __va_start(ap, fmt);
162 if (level <= disk_debug_enable)
163 kvprintf(fmt, ap);
164 __va_end(ap);
165
166 return 0;
167}
cd29885a
MD
168
169static int
aec8eea4 170disk_probe_slice(struct disk *dp, cdev_t dev, int slice, int reprobe)
cd29885a
MD
171{
172 struct disk_info *info = &dp->d_info;
173 struct diskslice *sp = &dp->d_slice->dss_slices[slice];
174 disklabel_ops_t ops;
175 struct partinfo part;
176 const char *msg;
177 cdev_t ndev;
440a040b 178 int sno;
539f339e 179 u_int i;
cd29885a 180
8c05caab
AH
181 disk_debug(2,
182 "disk_probe_slice (begin): %s (%s)\n",
183 dev->si_name, dp->d_cdev->si_name);
184
440a040b
MD
185 sno = slice ? slice - 1 : 0;
186
cd29885a
MD
187 ops = &disklabel32_ops;
188 msg = ops->op_readdisklabel(dev, sp, &sp->ds_label, info);
189 if (msg && !strcmp(msg, "no disk label")) {
cd29885a
MD
190 ops = &disklabel64_ops;
191 msg = ops->op_readdisklabel(dev, sp, &sp->ds_label, info);
192 }
cd29885a
MD
193 if (msg == NULL) {
194 if (slice != WHOLE_DISK_SLICE)
195 ops->op_adjust_label_reserved(dp->d_slice, slice, sp);
196 else
197 sp->ds_reserved = 0;
198
199 sp->ds_ops = ops;
cd29885a
MD
200 for (i = 0; i < ops->op_getnumparts(sp->ds_label); i++) {
201 ops->op_loadpartinfo(sp->ds_label, i, &part);
cd29885a 202 if (part.fstype) {
aec8eea4 203 if (reprobe &&
149e86b9 204 (ndev = devfs_find_device_by_name("%s%c",
440a040b 205 dev->si_name, 'a' + i))
149e86b9
MD
206 ) {
207 /*
208 * Device already exists and
209 * is still valid.
210 */
aec8eea4
MD
211 ndev->si_flags |= SI_REPROBE_TEST;
212 } else {
8f960aa9 213 ndev = make_dev_covering(&disk_ops, dp->d_rawdev->si_ops,
149e86b9
MD
214 dkmakeminor(dkunit(dp->d_cdev),
215 slice, i),
aec8eea4 216 UID_ROOT, GID_OPERATOR, 0640,
440a040b 217 "%s%c", dev->si_name, 'a'+ i);
aec8eea4 218 ndev->si_disk = dp;
55230951 219 if (dp->d_info.d_serialno) {
149e86b9
MD
220 make_dev_alias(ndev,
221 "serno/%s.s%d%c",
222 dp->d_info.d_serialno,
440a040b 223 sno, 'a' + i);
55230951 224 }
aec8eea4
MD
225 ndev->si_flags |= SI_REPROBE_TEST;
226 }
cd29885a
MD
227 }
228 }
229 } else if (info->d_dsflags & DSO_COMPATLABEL) {
230 msg = NULL;
231 if (sp->ds_size >= 0x100000000ULL)
232 ops = &disklabel64_ops;
233 else
234 ops = &disklabel32_ops;
235 sp->ds_label = ops->op_clone_label(info, sp);
236 } else {
dc6af901
TN
237 if (sp->ds_type == DOSPTYP_386BSD || /* XXX */
238 sp->ds_type == DOSPTYP_NETBSD ||
239 sp->ds_type == DOSPTYP_OPENBSD) {
cd29885a
MD
240 log(LOG_WARNING, "%s: cannot find label (%s)\n",
241 dev->si_name, msg);
149e86b9 242 }
cd29885a
MD
243 }
244
245 if (msg == NULL) {
246 sp->ds_wlabel = FALSE;
247 }
248
249 return (msg ? EINVAL : 0);
250}
251
666ec833
MD
252/*
253 * This routine is only called for newly minted drives or to reprobe
254 * a drive with no open slices. disk_probe_slice() is called directly
255 * when reprobing partition changes within slices.
256 */
cd29885a 257static void
aec8eea4 258disk_probe(struct disk *dp, int reprobe)
cd29885a
MD
259{
260 struct disk_info *info = &dp->d_info;
261 cdev_t dev = dp->d_cdev;
262 cdev_t ndev;
0831f2ab 263 int error, i, sno;
666ec833 264 struct diskslices *osp;
0831f2ab 265 struct diskslice *sp;
cd29885a 266
cd29885a 267 KKASSERT (info->d_media_blksize != 0);
cd29885a 268
666ec833 269 osp = dp->d_slice;
cd29885a 270 dp->d_slice = dsmakeslicestruct(BASE_SLICE, info);
666ec833 271 disk_debug(1, "disk_probe (begin): %s\n", dp->d_cdev->si_name);
cd29885a
MD
272
273 error = mbrinit(dev, info, &(dp->d_slice));
666ec833
MD
274 if (error) {
275 dsgone(&osp);
cd29885a 276 return;
666ec833 277 }
aec8eea4 278
0831f2ab
MD
279 for (i = 0; i < dp->d_slice->dss_nslices; i++) {
280 /*
281 * Ignore the whole-disk slice, it has already been created.
282 */
283 if (i == WHOLE_DISK_SLICE)
284 continue;
285 sp = &dp->d_slice->dss_slices[i];
286
287 /*
288 * Handle s0. s0 is a compatibility slice if there are no
289 * other slices and it has not otherwise been set up, else
290 * we ignore it.
291 */
292 if (i == COMPATIBILITY_SLICE) {
293 sno = 0;
294 if (sp->ds_type == 0 &&
295 dp->d_slice->dss_nslices == BASE_SLICE) {
296 sp->ds_size = info->d_media_blocks;
297 sp->ds_reserved = 0;
aec8eea4 298 }
0831f2ab
MD
299 } else {
300 sno = i - 1;
301 sp->ds_reserved = 0;
cd29885a 302 }
0831f2ab
MD
303
304 /*
305 * Ignore 0-length slices
306 */
307 if (sp->ds_size == 0)
308 continue;
309
310 if (reprobe &&
311 (ndev = devfs_find_device_by_name("%ss%d",
312 dev->si_name, sno))) {
313 /*
314 * Device already exists and is still valid
315 */
316 ndev->si_flags |= SI_REPROBE_TEST;
317 } else {
318 /*
319 * Else create new device
320 */
8f960aa9 321 ndev = make_dev_covering(&disk_ops, dp->d_rawdev->si_ops,
aec8eea4
MD
322 dkmakewholeslice(dkunit(dev), i),
323 UID_ROOT, GID_OPERATOR, 0640,
0831f2ab
MD
324 "%ss%d", dev->si_name, sno);
325 if (dp->d_info.d_serialno) {
326 make_dev_alias(ndev, "serno/%s.s%d",
327 dp->d_info.d_serialno, sno);
cd29885a 328 }
0831f2ab
MD
329 ndev->si_disk = dp;
330 ndev->si_flags |= SI_REPROBE_TEST;
331 }
332 sp->ds_dev = ndev;
149e86b9 333
1cb0bdb6
MD
334 /*
335 * Probe appropriate slices for a disklabel
336 *
337 * XXX slice type 1 used by our gpt probe code.
d7d5e114 338 * XXX slice type 0 used by mbr compat slice.
1cb0bdb6 339 */
dc6af901
TN
340 if (sp->ds_type == DOSPTYP_386BSD ||
341 sp->ds_type == DOSPTYP_NETBSD ||
342 sp->ds_type == DOSPTYP_OPENBSD ||
343 sp->ds_type == 0 ||
344 sp->ds_type == 1) {
0831f2ab
MD
345 if (dp->d_slice->dss_first_bsd_slice == 0)
346 dp->d_slice->dss_first_bsd_slice = i;
347 disk_probe_slice(dp, ndev, i, reprobe);
cd29885a
MD
348 }
349 }
666ec833
MD
350 dsgone(&osp);
351 disk_debug(1, "disk_probe (end): %s\n", dp->d_cdev->si_name);
cd29885a
MD
352}
353
354
355static void
356disk_msg_core(void *arg)
357{
cd29885a
MD
358 struct disk *dp;
359 struct diskslice *sp;
149e86b9
MD
360 disk_msg_t msg;
361 int run;
cd29885a 362
cd29885a
MD
363 lwkt_initport_thread(&disk_msg_port, curthread);
364 wakeup(curthread);
149e86b9 365 run = 1;
cd29885a 366
149e86b9
MD
367 while (run) {
368 msg = (disk_msg_t)lwkt_waitport(&disk_msg_port, 0);
cd29885a 369
149e86b9
MD
370 switch (msg->hdr.u.ms_result) {
371 case DISK_DISK_PROBE:
cd29885a 372 dp = (struct disk *)msg->load;
8c05caab
AH
373 disk_debug(1,
374 "DISK_DISK_PROBE: %s\n",
375 dp->d_cdev->si_name);
aec8eea4 376 disk_probe(dp, 0);
cd29885a 377 break;
cd29885a
MD
378 case DISK_DISK_DESTROY:
379 dp = (struct disk *)msg->load;
8c05caab
AH
380 disk_debug(1,
381 "DISK_DISK_DESTROY: %s\n",
382 dp->d_cdev->si_name);
cd29885a
MD
383 devfs_destroy_subnames(dp->d_cdev->si_name);
384 devfs_destroy_dev(dp->d_cdev);
3b998fa9 385 lwkt_gettoken(&disklist_token);
aec8eea4 386 LIST_REMOVE(dp, d_list);
3b998fa9 387 lwkt_reltoken(&disklist_token);
55230951
MD
388 if (dp->d_info.d_serialno) {
389 kfree(dp->d_info.d_serialno, M_TEMP);
390 dp->d_info.d_serialno = NULL;
391 }
cd29885a 392 break;
aec8eea4
MD
393 case DISK_UNPROBE:
394 dp = (struct disk *)msg->load;
8c05caab
AH
395 disk_debug(1,
396 "DISK_DISK_UNPROBE: %s\n",
397 dp->d_cdev->si_name);
aec8eea4
MD
398 devfs_destroy_subnames(dp->d_cdev->si_name);
399 break;
cd29885a
MD
400 case DISK_SLICE_REPROBE:
401 dp = (struct disk *)msg->load;
402 sp = (struct diskslice *)msg->load2;
149e86b9
MD
403 devfs_clr_subnames_flag(sp->ds_dev->si_name,
404 SI_REPROBE_TEST);
8c05caab 405 disk_debug(1,
aec8eea4
MD
406 "DISK_SLICE_REPROBE: %s\n",
407 sp->ds_dev->si_name);
149e86b9
MD
408 disk_probe_slice(dp, sp->ds_dev,
409 dkslice(sp->ds_dev), 1);
410 devfs_destroy_subnames_without_flag(
411 sp->ds_dev->si_name, SI_REPROBE_TEST);
cd29885a 412 break;
cd29885a
MD
413 case DISK_DISK_REPROBE:
414 dp = (struct disk *)msg->load;
aec8eea4 415 devfs_clr_subnames_flag(dp->d_cdev->si_name, SI_REPROBE_TEST);
8c05caab 416 disk_debug(1,
aec8eea4
MD
417 "DISK_DISK_REPROBE: %s\n",
418 dp->d_cdev->si_name);
419 disk_probe(dp, 1);
149e86b9
MD
420 devfs_destroy_subnames_without_flag(
421 dp->d_cdev->si_name, SI_REPROBE_TEST);
cd29885a 422 break;
cd29885a 423 case DISK_SYNC:
8c05caab 424 disk_debug(1, "DISK_SYNC\n");
cd29885a 425 break;
149e86b9
MD
426 default:
427 devfs_debug(DEVFS_DEBUG_WARNING,
428 "disk_msg_core: unknown message "
429 "received at core\n");
430 break;
431 }
a9177e09 432 lwkt_replymsg(&msg->hdr, 0);
149e86b9 433 }
cd29885a
MD
434 lwkt_exit();
435}
436
437
149e86b9
MD
438/*
439 * Acts as a message drain. Any message that is replied to here gets
440 * destroyed and the memory freed.
441 */
cd29885a
MD
442static void
443disk_msg_autofree_reply(lwkt_port_t port, lwkt_msg_t msg)
444{
149e86b9 445 objcache_put(disk_msg_cache, msg);
cd29885a
MD
446}
447
448
449void
450disk_msg_send(uint32_t cmd, void *load, void *load2)
451{
149e86b9 452 disk_msg_t disk_msg;
cd29885a
MD
453 lwkt_port_t port = &disk_msg_port;
454
149e86b9 455 disk_msg = objcache_get(disk_msg_cache, M_WAITOK);
cd29885a 456
149e86b9 457 lwkt_initmsg(&disk_msg->hdr, &disk_dispose_port, 0);
cd29885a
MD
458
459 disk_msg->hdr.u.ms_result = cmd;
460 disk_msg->load = load;
461 disk_msg->load2 = load2;
462 KKASSERT(port);
a9177e09 463 lwkt_sendmsg(port, &disk_msg->hdr);
cd29885a
MD
464}
465
aec8eea4
MD
466void
467disk_msg_send_sync(uint32_t cmd, void *load, void *load2)
468{
469 struct lwkt_port rep_port;
a9177e09
MD
470 disk_msg_t disk_msg;
471 lwkt_port_t port;
aec8eea4 472
a9177e09
MD
473 disk_msg = objcache_get(disk_msg_cache, M_WAITOK);
474 port = &disk_msg_port;
475
476 /* XXX could probably use curthread's built-in msgport */
aec8eea4
MD
477 lwkt_initport_thread(&rep_port, curthread);
478 lwkt_initmsg(&disk_msg->hdr, &rep_port, 0);
479
480 disk_msg->hdr.u.ms_result = cmd;
481 disk_msg->load = load;
482 disk_msg->load2 = load2;
483
a9177e09
MD
484 lwkt_sendmsg(port, &disk_msg->hdr);
485 lwkt_waitmsg(&disk_msg->hdr, 0);
486 objcache_put(disk_msg_cache, disk_msg);
aec8eea4
MD
487}
488
335dda38 489/*
fef8985e
MD
490 * Create a raw device for the dev_ops template (which is returned). Also
491 * create a slice and unit managed disk and overload the user visible
492 * device space with it.
e4c9c0c8 493 *
fef8985e
MD
494 * NOTE: The returned raw device is NOT a slice and unit managed device.
495 * It is an actual raw device representing the raw disk as specified by
496 * the passed dev_ops. The disk layer not only returns such a raw device,
497 * it also uses it internally when passing (modified) commands through.
335dda38 498 */
b13267a5 499cdev_t
a688b15c 500disk_create(int unit, struct disk *dp, struct dev_ops *raw_ops)
984263bc 501{
b13267a5 502 cdev_t rawdev;
e4c9c0c8 503
8c05caab
AH
504 disk_debug(1,
505 "disk_create (begin): %s%d\n",
506 raw_ops->head.name, unit);
507
cd29885a 508 rawdev = make_only_dev(raw_ops, dkmakewholedisk(unit),
e4c9c0c8 509 UID_ROOT, GID_OPERATOR, 0640,
fef8985e 510 "%s%d", raw_ops->head.name, unit);
984263bc 511
cd29885a 512 bzero(dp, sizeof(*dp));
fef8985e 513
e4c9c0c8 514 dp->d_rawdev = rawdev;
fef8985e 515 dp->d_raw_ops = raw_ops;
cd29885a 516 dp->d_dev_ops = &disk_ops;
8f960aa9 517 dp->d_cdev = make_dev_covering(&disk_ops, dp->d_rawdev->si_ops,
5350e1e9 518 dkmakewholedisk(unit),
e4c9c0c8 519 UID_ROOT, GID_OPERATOR, 0640,
cd29885a
MD
520 "%s%d", raw_ops->head.name, unit);
521
522 dp->d_cdev->si_disk = dp;
523
e02e815e
AH
524 dsched_disk_create_callback(dp, raw_ops->head.name, unit);
525
3b998fa9 526 lwkt_gettoken(&disklist_token);
984263bc 527 LIST_INSERT_HEAD(&disklist, dp, d_list);
3b998fa9 528 lwkt_reltoken(&disklist_token);
8c05caab 529
3b998fa9 530 disk_debug(1, "disk_create (end): %s%d\n", raw_ops->head.name, unit);
8c05caab 531
e4c9c0c8 532 return (dp->d_rawdev);
984263bc
MD
533}
534
aec8eea4
MD
535
536static void
537_setdiskinfo(struct disk *disk, struct disk_info *info)
a688b15c 538{
55230951
MD
539 char *oldserialno;
540
55230951 541 oldserialno = disk->d_info.d_serialno;
a688b15c
MD
542 bcopy(info, &disk->d_info, sizeof(disk->d_info));
543 info = &disk->d_info;
544
8c05caab
AH
545 disk_debug(1,
546 "_setdiskinfo: %s\n",
547 disk->d_cdev->si_name);
548
55230951
MD
549 /*
550 * The serial number is duplicated so the caller can throw
551 * their copy away.
552 */
553 if (info->d_serialno && info->d_serialno[0]) {
554 info->d_serialno = kstrdup(info->d_serialno, M_TEMP);
fbbbca99 555 disk_cleanserial(info->d_serialno);
55230951
MD
556 if (disk->d_cdev) {
557 make_dev_alias(disk->d_cdev, "serno/%s",
558 info->d_serialno);
559 }
560 } else {
561 info->d_serialno = NULL;
562 }
563 if (oldserialno)
564 kfree(oldserialno, M_TEMP);
565
279e9fd5
AH
566 dsched_disk_update_callback(disk, info);
567
55230951
MD
568 /*
569 * The caller may set d_media_size or d_media_blocks and we
570 * calculate the other.
571 */
a688b15c
MD
572 KKASSERT(info->d_media_size == 0 || info->d_media_blksize == 0);
573 if (info->d_media_size == 0 && info->d_media_blocks) {
d7d5e114 574 info->d_media_size = (u_int64_t)info->d_media_blocks *
a688b15c 575 info->d_media_blksize;
d7d5e114 576 } else if (info->d_media_size && info->d_media_blocks == 0 &&
a688b15c 577 info->d_media_blksize) {
d7d5e114 578 info->d_media_blocks = info->d_media_size /
a688b15c
MD
579 info->d_media_blksize;
580 }
5d6c6885
MD
581
582 /*
583 * The si_* fields for rawdev are not set until after the
584 * disk_create() call, so someone using the cooked version
585 * of the raw device (i.e. da0s0) will not get the right
586 * si_iosize_max unless we fix it up here.
587 */
588 if (disk->d_cdev && disk->d_rawdev &&
589 disk->d_cdev->si_iosize_max == 0) {
590 disk->d_cdev->si_iosize_max = disk->d_rawdev->si_iosize_max;
591 disk->d_cdev->si_bsize_phys = disk->d_rawdev->si_bsize_phys;
592 disk->d_cdev->si_bsize_best = disk->d_rawdev->si_bsize_best;
593 }
aec8eea4 594}
cd29885a 595
aec8eea4
MD
596/*
597 * Disk drivers must call this routine when media parameters are available
598 * or have changed.
599 */
600void
601disk_setdiskinfo(struct disk *disk, struct disk_info *info)
602{
603 _setdiskinfo(disk, info);
cd29885a 604 disk_msg_send(DISK_DISK_PROBE, disk, NULL);
8c05caab
AH
605 disk_debug(1,
606 "disk_setdiskinfo: sent probe for %s\n",
607 disk->d_cdev->si_name);
a688b15c
MD
608}
609
aec8eea4
MD
610void
611disk_setdiskinfo_sync(struct disk *disk, struct disk_info *info)
612{
613 _setdiskinfo(disk, info);
aec8eea4 614 disk_msg_send_sync(DISK_DISK_PROBE, disk, NULL);
8c05caab
AH
615 disk_debug(1,
616 "disk_setdiskinfo_sync: sent probe for %s\n",
617 disk->d_cdev->si_name);
aec8eea4
MD
618}
619
a688b15c 620/*
e4c9c0c8
MD
621 * This routine is called when an adapter detaches. The higher level
622 * managed disk device is destroyed while the lower level raw device is
623 * released.
624 */
335dda38
MD
625void
626disk_destroy(struct disk *disk)
627{
0160356d 628 dsched_disk_destroy_callback(disk);
aec8eea4 629 disk_msg_send_sync(DISK_DISK_DESTROY, disk, NULL);
cd29885a 630 return;
335dda38
MD
631}
632
984263bc 633int
b24cd69c 634disk_dumpcheck(cdev_t dev, u_int64_t *size, u_int64_t *blkno, u_int32_t *secsize)
984263bc 635{
a6c0f342
MD
636 struct partinfo pinfo;
637 int error;
984263bc 638
a6c0f342 639 bzero(&pinfo, sizeof(pinfo));
87baaf0c
MD
640 error = dev_dioctl(dev, DIOCGPART, (void *)&pinfo, 0,
641 proc0.p_ucred, NULL);
a6c0f342
MD
642 if (error)
643 return (error);
b24cd69c 644
a6c0f342 645 if (pinfo.media_blksize == 0)
984263bc 646 return (ENXIO);
b24cd69c
AH
647
648 if (blkno) /* XXX: make sure this reserved stuff is right */
649 *blkno = pinfo.reserved_blocks +
650 pinfo.media_offset / pinfo.media_blksize;
651 if (secsize)
652 *secsize = pinfo.media_blksize;
653 if (size)
654 *size = (pinfo.media_blocks - pinfo.reserved_blocks);
655
984263bc 656 return (0);
984263bc
MD
657}
658
b24cd69c
AH
659int
660disk_dumpconf(cdev_t dev, u_int onoff)
661{
662 struct dumperinfo di;
663 u_int64_t size, blkno;
664 u_int32_t secsize;
665 int error;
666
667 if (!onoff)
668 return set_dumper(NULL);
669
670 error = disk_dumpcheck(dev, &size, &blkno, &secsize);
671
672 if (error)
673 return ENXIO;
674
675 bzero(&di, sizeof(struct dumperinfo));
676 di.dumper = diskdump;
677 di.priv = dev;
678 di.blocksize = secsize;
679 di.mediaoffset = blkno * DEV_BSIZE;
680 di.mediasize = size * DEV_BSIZE;
681
682 return set_dumper(&di);
683}
684
aec8eea4
MD
685void
686disk_unprobe(struct disk *disk)
687{
688 if (disk == NULL)
689 return;
690
691 disk_msg_send_sync(DISK_UNPROBE, disk, NULL);
692}
693
d7d5e114 694void
984263bc
MD
695disk_invalidate (struct disk *disk)
696{
666ec833 697 dsgone(&disk->d_slice);
984263bc
MD
698}
699
984263bc
MD
700struct disk *
701disk_enumerate(struct disk *disk)
702{
be755ff9 703 struct disk *dp;
be755ff9 704
3b998fa9 705 lwkt_gettoken(&disklist_token);
984263bc 706 if (!disk)
be755ff9 707 dp = (LIST_FIRST(&disklist));
984263bc 708 else
be755ff9 709 dp = (LIST_NEXT(disk, d_list));
3b998fa9 710 lwkt_reltoken(&disklist_token);
be755ff9
AH
711
712 return dp;
984263bc
MD
713}
714
d7d5e114 715static
fbda7fa6 716int
984263bc
MD
717sysctl_disks(SYSCTL_HANDLER_ARGS)
718{
719 struct disk *disk;
720 int error, first;
721
722 disk = NULL;
723 first = 1;
724
725 while ((disk = disk_enumerate(disk))) {
726 if (!first) {
727 error = SYSCTL_OUT(req, " ", 1);
728 if (error)
729 return error;
730 } else {
731 first = 0;
732 }
95ce4036
HP
733 error = SYSCTL_OUT(req, disk->d_rawdev->si_name,
734 strlen(disk->d_rawdev->si_name));
984263bc
MD
735 if (error)
736 return error;
737 }
738 error = SYSCTL_OUT(req, "", 1);
739 return error;
740}
d7d5e114 741
3641b7ca 742SYSCTL_PROC(_kern, OID_AUTO, disks, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0,
984263bc
MD
743 sysctl_disks, "A", "names of available disks");
744
745/*
e4c9c0c8
MD
746 * Open a disk device or partition.
747 */
fbda7fa6
MD
748static
749int
fef8985e 750diskopen(struct dev_open_args *ap)
984263bc 751{
b13267a5 752 cdev_t dev = ap->a_head.a_dev;
984263bc
MD
753 struct disk *dp;
754 int error;
755
e4c9c0c8
MD
756 /*
757 * dp can't be NULL here XXX.
7ba1363d
MD
758 *
759 * d_slice will be NULL if setdiskinfo() has not been called yet.
760 * setdiskinfo() is typically called whether the disk is present
761 * or not (e.g. CD), but the base disk device is created first
762 * and there may be a race.
e4c9c0c8 763 */
e4c9c0c8 764 dp = dev->si_disk;
7ba1363d 765 if (dp == NULL || dp->d_slice == NULL)
984263bc 766 return (ENXIO);
fef8985e 767 error = 0;
984263bc 768
e4c9c0c8
MD
769 /*
770 * Deal with open races
771 */
984263bc
MD
772 while (dp->d_flags & DISKFLAG_LOCK) {
773 dp->d_flags |= DISKFLAG_WANTED;
377d4740 774 error = tsleep(dp, PCATCH, "diskopen", hz);
984263bc
MD
775 if (error)
776 return (error);
777 }
778 dp->d_flags |= DISKFLAG_LOCK;
779
e4c9c0c8
MD
780 /*
781 * Open the underlying raw device.
782 */
984263bc 783 if (!dsisopen(dp->d_slice)) {
e4c9c0c8 784#if 0
984263bc
MD
785 if (!pdev->si_iosize_max)
786 pdev->si_iosize_max = dev->si_iosize_max;
e4c9c0c8 787#endif
fef8985e
MD
788 error = dev_dopen(dp->d_rawdev, ap->a_oflags,
789 ap->a_devtype, ap->a_cred);
984263bc 790 }
cd29885a 791#if 0
e4c9c0c8
MD
792 /*
793 * Inherit properties from the underlying device now that it is
794 * open.
795 */
fef8985e 796 dev_dclone(dev);
cd29885a 797#endif
984263bc
MD
798
799 if (error)
800 goto out;
a688b15c 801 error = dsopen(dev, ap->a_devtype, dp->d_info.d_dsflags,
84f8b009 802 &dp->d_slice, &dp->d_info);
cd29885a 803 if (!dsisopen(dp->d_slice)) {
fef8985e 804 dev_dclose(dp->d_rawdev, ap->a_oflags, ap->a_devtype);
cd29885a 805 }
d7d5e114 806out:
984263bc
MD
807 dp->d_flags &= ~DISKFLAG_LOCK;
808 if (dp->d_flags & DISKFLAG_WANTED) {
809 dp->d_flags &= ~DISKFLAG_WANTED;
810 wakeup(dp);
811 }
d7d5e114 812
984263bc
MD
813 return(error);
814}
815
e4c9c0c8
MD
816/*
817 * Close a disk device or partition
818 */
fbda7fa6
MD
819static
820int
fef8985e 821diskclose(struct dev_close_args *ap)
984263bc 822{
b13267a5 823 cdev_t dev = ap->a_head.a_dev;
984263bc
MD
824 struct disk *dp;
825 int error;
984263bc
MD
826
827 error = 0;
e4c9c0c8
MD
828 dp = dev->si_disk;
829
fef8985e 830 dsclose(dev, ap->a_devtype, dp->d_slice);
cd29885a 831 if (!dsisopen(dp->d_slice)) {
fef8985e 832 error = dev_dclose(dp->d_rawdev, ap->a_fflag, ap->a_devtype);
cd29885a 833 }
fef8985e
MD
834 return (error);
835}
836
837/*
d7d5e114 838 * First execute the ioctl on the disk device, and if it isn't supported
fef8985e
MD
839 * try running it on the backing device.
840 */
841static
842int
843diskioctl(struct dev_ioctl_args *ap)
844{
b13267a5 845 cdev_t dev = ap->a_head.a_dev;
fef8985e
MD
846 struct disk *dp;
847 int error;
b24cd69c 848 u_int u;
fef8985e
MD
849
850 dp = dev->si_disk;
851 if (dp == NULL)
852 return (ENXIO);
cd29885a 853
149e86b9 854 devfs_debug(DEVFS_DEBUG_DEBUG,
fcefa6f2 855 "diskioctl: cmd is: %lx (name: %s)\n",
149e86b9
MD
856 ap->a_cmd, dev->si_name);
857 devfs_debug(DEVFS_DEBUG_DEBUG,
fcefa6f2 858 "diskioctl: &dp->d_slice is: %p, %p\n",
149e86b9 859 &dp->d_slice, dp->d_slice);
cd29885a 860
b24cd69c
AH
861 if (ap->a_cmd == DIOCGKERNELDUMP) {
862 u = *(u_int *)ap->a_data;
863 return disk_dumpconf(dev, u);
864 }
865
84f8b009
MD
866 error = dsioctl(dev, ap->a_cmd, ap->a_data, ap->a_fflag,
867 &dp->d_slice, &dp->d_info);
cd29885a 868
fef8985e
MD
869 if (error == ENOIOCTL) {
870 error = dev_dioctl(dp->d_rawdev, ap->a_cmd, ap->a_data,
87baaf0c 871 ap->a_fflag, ap->a_cred, NULL);
fef8985e 872 }
984263bc
MD
873 return (error);
874}
875
e4c9c0c8
MD
876/*
877 * Execute strategy routine
878 */
fbda7fa6 879static
fef8985e
MD
880int
881diskstrategy(struct dev_strategy_args *ap)
984263bc 882{
b13267a5 883 cdev_t dev = ap->a_head.a_dev;
fef8985e 884 struct bio *bio = ap->a_bio;
81b5c339 885 struct bio *nbio;
984263bc
MD
886 struct disk *dp;
887
81b5c339 888 dp = dev->si_disk;
984263bc 889
e4c9c0c8 890 if (dp == NULL) {
81b5c339
MD
891 bio->bio_buf->b_error = ENXIO;
892 bio->bio_buf->b_flags |= B_ERROR;
893 biodone(bio);
fef8985e 894 return(0);
984263bc 895 }
81b5c339 896 KKASSERT(dev->si_disk == dp);
984263bc 897
6f76c57e
HP
898 /*
899 * The dscheck() function will also transform the slice relative
54078292 900 * block number i.e. bio->bio_offset into a block number that can be
9a71d53f
MD
901 * passed directly to the underlying raw device. If dscheck()
902 * returns NULL it will have handled the bio for us (e.g. EOF
903 * or error due to being beyond the device size).
6f76c57e 904 */
cd29885a 905 if ((nbio = dscheck(dev, bio, dp->d_slice)) != NULL) {
8c72e3d5 906 dsched_queue(dp, nbio);
cd29885a 907 } else {
81b5c339 908 biodone(bio);
cd29885a 909 }
fef8985e 910 return(0);
984263bc
MD
911}
912
335dda38 913/*
fef8985e 914 * Return the partition size in ?blocks?
335dda38 915 */
fbda7fa6
MD
916static
917int
fef8985e 918diskpsize(struct dev_psize_args *ap)
984263bc 919{
b13267a5 920 cdev_t dev = ap->a_head.a_dev;
984263bc 921 struct disk *dp;
984263bc 922
e4c9c0c8
MD
923 dp = dev->si_disk;
924 if (dp == NULL)
fef8985e
MD
925 return(ENODEV);
926 ap->a_result = dssize(dev, &dp->d_slice);
927 return(0);
984263bc
MD
928}
929
e4c9c0c8 930/*
fef8985e
MD
931 * When new device entries are instantiated, make sure they inherit our
932 * si_disk structure and block and iosize limits from the raw device.
e4c9c0c8 933 *
d7d5e114 934 * This routine is always called synchronously in the context of the
fef8985e
MD
935 * client.
936 *
937 * XXX The various io and block size constraints are not always initialized
938 * properly by devices.
e4c9c0c8 939 */
fbda7fa6
MD
940static
941int
fef8985e 942diskclone(struct dev_clone_args *ap)
984263bc 943{
b13267a5 944 cdev_t dev = ap->a_head.a_dev;
984263bc 945 struct disk *dp;
aec8eea4
MD
946 dp = dev->si_disk;
947
fef8985e
MD
948 KKASSERT(dp != NULL);
949 dev->si_disk = dp;
950 dev->si_iosize_max = dp->d_rawdev->si_iosize_max;
951 dev->si_bsize_phys = dp->d_rawdev->si_bsize_phys;
952 dev->si_bsize_best = dp->d_rawdev->si_bsize_best;
953 return(0);
954}
955
956int
957diskdump(struct dev_dump_args *ap)
958{
b13267a5 959 cdev_t dev = ap->a_head.a_dev;
aec8eea4 960 struct disk *dp = dev->si_disk;
b24cd69c 961 u_int64_t size, offset;
fef8985e
MD
962 int error;
963
b24cd69c
AH
964 error = disk_dumpcheck(dev, &size, &ap->a_blkno, &ap->a_secsize);
965 /* XXX: this should probably go in disk_dumpcheck somehow */
966 if (ap->a_length != 0) {
967 size *= DEV_BSIZE;
968 offset = ap->a_blkno * DEV_BSIZE;
969 if ((ap->a_offset < offset) ||
970 (ap->a_offset + ap->a_length - offset > size)) {
971 kprintf("Attempt to write outside dump device boundaries.\n");
972 error = ENOSPC;
973 }
974 }
975
fef8985e
MD
976 if (error == 0) {
977 ap->a_head.a_dev = dp->d_rawdev;
978 error = dev_doperate(&ap->a_head);
984263bc 979 }
fef8985e
MD
980
981 return(error);
984263bc
MD
982}
983
fef8985e 984
d7d5e114 985SYSCTL_INT(_debug_sizeof, OID_AUTO, diskslices, CTLFLAG_RD,
984263bc
MD
986 0, sizeof(struct diskslices), "sizeof(struct diskslices)");
987
d7d5e114 988SYSCTL_INT(_debug_sizeof, OID_AUTO, disk, CTLFLAG_RD,
984263bc 989 0, sizeof(struct disk), "sizeof(struct disk)");
7a9e53ad 990
ef548879 991/*
4afeea0d
MD
992 * Reorder interval for burst write allowance and minor write
993 * allowance.
994 *
995 * We always want to trickle some writes in to make use of the
996 * disk's zone cache. Bursting occurs on a longer interval and only
997 * runningbufspace is well over the hirunningspace limit.
ef548879 998 */
4afeea0d
MD
999int bioq_reorder_burst_interval = 60; /* should be multiple of minor */
1000SYSCTL_INT(_kern, OID_AUTO, bioq_reorder_burst_interval,
1001 CTLFLAG_RW, &bioq_reorder_burst_interval, 0, "");
1002int bioq_reorder_minor_interval = 5;
1003SYSCTL_INT(_kern, OID_AUTO, bioq_reorder_minor_interval,
1004 CTLFLAG_RW, &bioq_reorder_minor_interval, 0, "");
1005
1006int bioq_reorder_burst_bytes = 3000000;
1007SYSCTL_INT(_kern, OID_AUTO, bioq_reorder_burst_bytes,
1008 CTLFLAG_RW, &bioq_reorder_burst_bytes, 0, "");
1009int bioq_reorder_minor_bytes = 262144;
1010SYSCTL_INT(_kern, OID_AUTO, bioq_reorder_minor_bytes,
1011 CTLFLAG_RW, &bioq_reorder_minor_bytes, 0, "");
ef548879 1012
7a9e53ad
MD
1013
1014/*
30e5862e
MD
1015 * Order I/Os. Generally speaking this code is designed to make better
1016 * use of drive zone caches. A drive zone cache can typically track linear
1017 * reads or writes for around 16 zones simultaniously.
7a9e53ad 1018 *
30e5862e
MD
1019 * Read prioritization issues: It is possible for hundreds of megabytes worth
1020 * of writes to be queued asynchronously. This creates a huge bottleneck
1021 * for reads which reduce read bandwidth to a trickle.
7a9e53ad 1022 *
4afeea0d
MD
1023 * To solve this problem we generally reorder reads before writes.
1024 *
1025 * However, a large number of random reads can also starve writes and
1026 * make poor use of the drive zone cache so we allow writes to trickle
1027 * in every N reads.
7a9e53ad
MD
1028 */
1029void
81b5c339 1030bioqdisksort(struct bio_queue_head *bioq, struct bio *bio)
7a9e53ad 1031{
4afeea0d
MD
1032 /*
1033 * The BIO wants to be ordered. Adding to the tail also
1034 * causes transition to be set to NULL, forcing the ordering
1035 * of all prior I/O's.
1036 */
1037 if (bio->bio_buf->b_flags & B_ORDERED) {
1038 bioq_insert_tail(bioq, bio);
1039 return;
1040 }
1041
30e5862e
MD
1042 switch(bio->bio_buf->b_cmd) {
1043 case BUF_CMD_READ:
1044 if (bioq->transition) {
7a9e53ad 1045 /*
4afeea0d
MD
1046 * Insert before the first write. Bleedover writes
1047 * based on reorder intervals to prevent starvation.
7a9e53ad 1048 */
30e5862e 1049 TAILQ_INSERT_BEFORE(bioq->transition, bio, bio_act);
4afeea0d
MD
1050 ++bioq->reorder;
1051 if (bioq->reorder % bioq_reorder_minor_interval == 0) {
30e5862e 1052 bioqwritereorder(bioq);
4afeea0d
MD
1053 if (bioq->reorder >=
1054 bioq_reorder_burst_interval) {
1055 bioq->reorder = 0;
1056 }
7a9e53ad
MD
1057 }
1058 } else {
7a9e53ad 1059 /*
30e5862e
MD
1060 * No writes queued (or ordering was forced),
1061 * insert at tail.
7a9e53ad 1062 */
30e5862e 1063 TAILQ_INSERT_TAIL(&bioq->queue, bio, bio_act);
7a9e53ad 1064 }
30e5862e
MD
1065 break;
1066 case BUF_CMD_WRITE:
1067 /*
1068 * Writes are always appended. If no writes were previously
1069 * queued or an ordered tail insertion occured the transition
1070 * field will be NULL.
1071 */
1072 TAILQ_INSERT_TAIL(&bioq->queue, bio, bio_act);
1073 if (bioq->transition == NULL)
1074 bioq->transition = bio;
1075 break;
1076 default:
1077 /*
1078 * All other request types are forced to be ordered.
1079 */
1080 bioq_insert_tail(bioq, bio);
4afeea0d 1081 break;
7a9e53ad 1082 }
30e5862e 1083}
7a9e53ad 1084
30e5862e 1085/*
4afeea0d
MD
1086 * Move the read-write transition point to prevent reads from
1087 * completely starving our writes. This brings a number of writes into
30e5862e 1088 * the fold every N reads.
4afeea0d
MD
1089 *
1090 * We bring a few linear writes into the fold on a minor interval
1091 * and we bring a non-linear burst of writes into the fold on a major
1092 * interval. Bursting only occurs if runningbufspace is really high
1093 * (typically from syncs, fsyncs, or HAMMER flushes).
30e5862e
MD
1094 */
1095static
1096void
1097bioqwritereorder(struct bio_queue_head *bioq)
1098{
1099 struct bio *bio;
1100 off_t next_offset;
4afeea0d 1101 size_t left;
30e5862e 1102 size_t n;
4afeea0d
MD
1103 int check_off;
1104
1105 if (bioq->reorder < bioq_reorder_burst_interval ||
1106 !buf_runningbufspace_severe()) {
1107 left = (size_t)bioq_reorder_minor_bytes;
1108 check_off = 1;
1109 } else {
1110 left = (size_t)bioq_reorder_burst_bytes;
1111 check_off = 0;
1112 }
30e5862e
MD
1113
1114 next_offset = bioq->transition->bio_offset;
1115 while ((bio = bioq->transition) != NULL &&
4afeea0d
MD
1116 (check_off == 0 || next_offset == bio->bio_offset)
1117 ) {
30e5862e
MD
1118 n = bio->bio_buf->b_bcount;
1119 next_offset = bio->bio_offset + n;
1120 bioq->transition = TAILQ_NEXT(bio, bio_act);
1121 if (left < n)
7a9e53ad 1122 break;
30e5862e 1123 left -= n;
7a9e53ad 1124 }
7a9e53ad
MD
1125}
1126
7a9e53ad
MD
1127/*
1128 * Disk error is the preface to plaintive error messages
1129 * about failing disk transfers. It prints messages of the form
1130
1131hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d)
1132
1133 * if the offset of the error in the transfer and a disk label
1134 * are both available. blkdone should be -1 if the position of the error
1135 * is unknown; the disklabel pointer may be null from drivers that have not
6ea70f76 1136 * been converted to use them. The message is printed with kprintf
7a9e53ad 1137 * if pri is LOG_PRINTF, otherwise it uses log at the specified priority.
6ea70f76 1138 * The message should be completed (with at least a newline) with kprintf
a0a36cfd 1139 * or log(-1, ...), respectively. There is no trailing space.
7a9e53ad
MD
1140 */
1141void
a688b15c 1142diskerr(struct bio *bio, cdev_t dev, const char *what, int pri, int donecnt)
7a9e53ad 1143{
81b5c339 1144 struct buf *bp = bio->bio_buf;
c6f49b01
MD
1145 const char *term;
1146
1147 switch(bp->b_cmd) {
1148 case BUF_CMD_READ:
1149 term = "read";
1150 break;
1151 case BUF_CMD_WRITE:
1152 term = "write";
1153 break;
1154 default:
1155 term = "access";
1156 break;
1157 }
cd29885a 1158 kprintf("%s: %s %sing ", dev->si_name, what, term);
973c11b9
MD
1159 kprintf("offset %012llx for %d",
1160 (long long)bio->bio_offset,
1161 bp->b_bcount);
cd29885a 1162
54078292 1163 if (donecnt)
6ea70f76 1164 kprintf(" (%d bytes completed)", donecnt);
7a9e53ad 1165}
81b5c339 1166
a8873631
MD
1167/*
1168 * Locate a disk device
1169 */
1170cdev_t
1171disk_locate(const char *devname)
1172{
cd29885a
MD
1173 return devfs_find_device_by_name(devname);
1174}
a8873631 1175
cd29885a
MD
1176void
1177disk_config(void *arg)
1178{
aec8eea4 1179 disk_msg_send_sync(DISK_SYNC, NULL, NULL);
cd29885a
MD
1180}
1181
cd29885a
MD
1182static void
1183disk_init(void)
1184{
1185 struct thread* td_core;
cd29885a 1186
149e86b9
MD
1187 disk_msg_cache = objcache_create("disk-msg-cache", 0, 0,
1188 NULL, NULL, NULL,
1189 objcache_malloc_alloc,
1190 objcache_malloc_free,
1191 &disk_msg_malloc_args);
cd29885a 1192
3b998fa9 1193 lwkt_token_init(&disklist_token, 1);
be755ff9 1194
149e86b9
MD
1195 /*
1196 * Initialize the reply-only port which acts as a message drain
1197 */
cd29885a
MD
1198 lwkt_initport_replyonly(&disk_dispose_port, disk_msg_autofree_reply);
1199
1200 lwkt_create(disk_msg_core, /*args*/NULL, &td_core, NULL,
1201 0, 0, "disk_msg_core");
1202
1203 tsleep(td_core, 0, "diskcore", 0);
1204}
1205
cd29885a
MD
1206static void
1207disk_uninit(void)
1208{
cd29885a 1209 objcache_destroy(disk_msg_cache);
a8873631
MD
1210}
1211
fbbbca99
MD
1212/*
1213 * Clean out illegal characters in serial numbers.
1214 */
1215static void
1216disk_cleanserial(char *serno)
1217{
1218 char c;
1219
1220 while ((c = *serno) != 0) {
1221 if (c >= 'a' && c <= 'z')
1222 ;
1223 else if (c >= 'A' && c <= 'Z')
1224 ;
1225 else if (c >= '0' && c <= '9')
1226 ;
1227 else if (c == '-' || c == '@' || c == '+' || c == '.')
1228 ;
1229 else
1230 c = '_';
1231 *serno++= c;
1232 }
1233}
1234
8c05caab
AH
1235TUNABLE_INT("kern.disk_debug", &disk_debug_enable);
1236SYSCTL_INT(_kern, OID_AUTO, disk_debug, CTLFLAG_RW, &disk_debug_enable,
1237 0, "Enable subr_disk debugging");
1238
cd29885a
MD
1239SYSINIT(disk_register, SI_SUB_PRE_DRIVERS, SI_ORDER_FIRST, disk_init, NULL);
1240SYSUNINIT(disk_register, SI_SUB_PRE_DRIVERS, SI_ORDER_ANY, disk_uninit, NULL);