| Commit | Line | Data |
|---|---|---|
| 984263bc | 1 | /* |
| 149e86b9 | 2 | * Copyright (c) 2003,2004,2009 The DragonFly Project. All rights reserved. |
| d7d5e114 | 3 | * |
| 8c10bfcf MD |
4 | * This code is derived from software contributed to The DragonFly Project |
| 5 | * by Matthew Dillon <dillon@backplane.com> | |
| d7d5e114 | 6 | * |
| 8c10bfcf MD |
7 | * Redistribution and use in source and binary forms, with or without |
| 8 | * modification, are permitted provided that the following conditions | |
| 9 | * are met: | |
| d7d5e114 | 10 | * |
| 8c10bfcf MD |
11 | * 1. Redistributions of source code must retain the above copyright |
| 12 | * notice, this list of conditions and the following disclaimer. | |
| 13 | * 2. Redistributions in binary form must reproduce the above copyright | |
| 14 | * notice, this list of conditions and the following disclaimer in | |
| 15 | * the documentation and/or other materials provided with the | |
| 16 | * distribution. | |
| 17 | * 3. Neither the name of The DragonFly Project nor the names of its | |
| 18 | * contributors may be used to endorse or promote products derived | |
| 19 | * from this software without specific, prior written permission. | |
| d7d5e114 | 20 | * |
| 8c10bfcf MD |
21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 22 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | |
| 24 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | |
| 25 | * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | |
| 26 | * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, | |
| 27 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
| 28 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED | |
| 29 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 30 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT | |
| 31 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 32 | * SUCH DAMAGE. | |
| d7d5e114 | 33 | * |
| 984263bc MD |
34 | * ---------------------------------------------------------------------------- |
| 35 | * "THE BEER-WARE LICENSE" (Revision 42): | |
| 36 | * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you | |
| 37 | * can do whatever you want with this stuff. If we meet some day, and you think | |
| 38 | * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp | |
| 39 | * ---------------------------------------------------------------------------- | |
| 40 | * | |
| 7a9e53ad MD |
41 | * Copyright (c) 1982, 1986, 1988, 1993 |
| 42 | * The Regents of the University of California. All rights reserved. | |
| 43 | * (c) UNIX System Laboratories, Inc. | |
| 44 | * All or some portions of this file are derived from material licensed | |
| 45 | * to the University of California by American Telephone and Telegraph | |
| 46 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | |
| 47 | * the permission of UNIX System Laboratories, Inc. | |
| 48 | * | |
| 49 | * Redistribution and use in source and binary forms, with or without | |
| 50 | * modification, are permitted provided that the following conditions | |
| 51 | * are met: | |
| 52 | * 1. Redistributions of source code must retain the above copyright | |
| 53 | * notice, this list of conditions and the following disclaimer. | |
| 54 | * 2. Redistributions in binary form must reproduce the above copyright | |
| 55 | * notice, this list of conditions and the following disclaimer in the | |
| 56 | * documentation and/or other materials provided with the distribution. | |
| 57 | * 3. All advertising materials mentioning features or use of this software | |
| 58 | * must display the following acknowledgement: | |
| 59 | * This product includes software developed by the University of | |
| 60 | * California, Berkeley and its contributors. | |
| 61 | * 4. Neither the name of the University nor the names of its contributors | |
| 62 | * may be used to endorse or promote products derived from this software | |
| 63 | * without specific prior written permission. | |
| 64 | * | |
| 65 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
| 66 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 67 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 68 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
| 69 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 70 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
| 71 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 72 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
| 73 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
| 74 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 75 | * SUCH DAMAGE. | |
| 984263bc | 76 | * |
| 7a9e53ad MD |
77 | * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94 |
| 78 | * $FreeBSD: src/sys/kern/subr_disk.c,v 1.20.2.6 2001/10/05 07:14:57 peter Exp $ | |
| 79 | * $FreeBSD: src/sys/ufs/ufs/ufs_disksubr.c,v 1.44.2.3 2001/03/05 05:42:19 obrien Exp $ | |
| 3641b7ca | 80 | * $DragonFly: src/sys/kern/subr_disk.c,v 1.40 2008/06/05 18:06:32 swildner Exp $ |
| 984263bc MD |
81 | */ |
| 82 | ||
| 83 | #include <sys/param.h> | |
| 84 | #include <sys/systm.h> | |
| 85 | #include <sys/kernel.h> | |
| 7a9e53ad | 86 | #include <sys/proc.h> |
| 984263bc MD |
87 | #include <sys/sysctl.h> |
| 88 | #include <sys/buf.h> | |
| 89 | #include <sys/conf.h> | |
| cd29885a MD |
90 | #include <sys/disklabel.h> |
| 91 | #include <sys/disklabel32.h> | |
| 92 | #include <sys/disklabel64.h> | |
| 7a9e53ad | 93 | #include <sys/diskslice.h> |
| cd29885a | 94 | #include <sys/diskmbr.h> |
| 984263bc | 95 | #include <sys/disk.h> |
| b24cd69c | 96 | #include <sys/kerneldump.h> |
| 984263bc MD |
97 | #include <sys/malloc.h> |
| 98 | #include <sys/sysctl.h> | |
| 99 | #include <machine/md_var.h> | |
| 100 | #include <sys/ctype.h> | |
| 7a9e53ad MD |
101 | #include <sys/syslog.h> |
| 102 | #include <sys/device.h> | |
| 335dda38 MD |
103 | #include <sys/msgport.h> |
| 104 | #include <sys/msgport2.h> | |
| 7a9e53ad | 105 | #include <sys/buf2.h> |
| 2c1e28dd | 106 | #include <sys/devfs.h> |
| be755ff9 | 107 | #include <sys/thread.h> |
| cd29885a | 108 | #include <sys/thread2.h> |
| 8c72e3d5 | 109 | #include <sys/dsched.h> |
| cd29885a MD |
110 | #include <sys/queue.h> |
| 111 | #include <sys/lock.h> | |
| 984263bc MD |
112 | |
| 113 | static MALLOC_DEFINE(M_DISK, "disk", "disk data"); | |
| 8c05caab | 114 | static int disk_debug_enable = 0; |
| 984263bc | 115 | |
| cd29885a MD |
116 | static void disk_msg_autofree_reply(lwkt_port_t, lwkt_msg_t); |
| 117 | static void disk_msg_core(void *); | |
| aec8eea4 MD |
118 | static int disk_probe_slice(struct disk *dp, cdev_t dev, int slice, int reprobe); |
| 119 | static void disk_probe(struct disk *dp, int reprobe); | |
| 120 | static void _setdiskinfo(struct disk *disk, struct disk_info *info); | |
| 30e5862e | 121 | static void bioqwritereorder(struct bio_queue_head *bioq); |
| fbbbca99 | 122 | static void disk_cleanserial(char *serno); |
| cd29885a | 123 | |
| 984263bc | 124 | static d_open_t diskopen; |
| d7d5e114 | 125 | static d_close_t diskclose; |
| 984263bc | 126 | static d_ioctl_t diskioctl; |
| fef8985e | 127 | static d_strategy_t diskstrategy; |
| 984263bc | 128 | static d_psize_t diskpsize; |
| e4c9c0c8 | 129 | static d_clone_t diskclone; |
| fef8985e | 130 | static d_dump_t diskdump; |
| 984263bc MD |
131 | |
| 132 | static LIST_HEAD(, disk) disklist = LIST_HEAD_INITIALIZER(&disklist); | |
| be755ff9 | 133 | static struct lwkt_token disklist_token; |
| 984263bc | 134 | |
| fef8985e | 135 | static struct dev_ops disk_ops = { |
| daf0c2f6 | 136 | { "disk", 0, D_DISK }, |
| fef8985e MD |
137 | .d_open = diskopen, |
| 138 | .d_close = diskclose, | |
| 139 | .d_read = physread, | |
| 140 | .d_write = physwrite, | |
| 141 | .d_ioctl = diskioctl, | |
| 142 | .d_strategy = diskstrategy, | |
| 143 | .d_dump = diskdump, | |
| 144 | .d_psize = diskpsize, | |
| 145 | .d_clone = diskclone | |
| 146 | }; | |
| 147 | ||
| cd29885a MD |
148 | static struct objcache *disk_msg_cache; |
| 149 | ||
| 150 | struct objcache_malloc_args disk_msg_malloc_args = { | |
| 151 | sizeof(struct disk_msg), M_DISK }; | |
| 152 | ||
| 153 | static struct lwkt_port disk_dispose_port; | |
| 154 | static struct lwkt_port disk_msg_port; | |
| 155 | ||
| 8c05caab AH |
156 | static int |
| 157 | disk_debug(int level, char *fmt, ...) | |
| 158 | { | |
| 159 | __va_list ap; | |
| 160 | ||
| 161 | __va_start(ap, fmt); | |
| 162 | if (level <= disk_debug_enable) | |
| 163 | kvprintf(fmt, ap); | |
| 164 | __va_end(ap); | |
| 165 | ||
| 166 | return 0; | |
| 167 | } | |
| cd29885a MD |
168 | |
| 169 | static int | |
| aec8eea4 | 170 | disk_probe_slice(struct disk *dp, cdev_t dev, int slice, int reprobe) |
| cd29885a MD |
171 | { |
| 172 | struct disk_info *info = &dp->d_info; | |
| 173 | struct diskslice *sp = &dp->d_slice->dss_slices[slice]; | |
| 174 | disklabel_ops_t ops; | |
| 175 | struct partinfo part; | |
| 176 | const char *msg; | |
| 177 | cdev_t ndev; | |
| 440a040b | 178 | int sno; |
| 539f339e | 179 | u_int i; |
| cd29885a | 180 | |
| 8c05caab AH |
181 | disk_debug(2, |
| 182 | "disk_probe_slice (begin): %s (%s)\n", | |
| 183 | dev->si_name, dp->d_cdev->si_name); | |
| 184 | ||
| 440a040b MD |
185 | sno = slice ? slice - 1 : 0; |
| 186 | ||
| cd29885a MD |
187 | ops = &disklabel32_ops; |
| 188 | msg = ops->op_readdisklabel(dev, sp, &sp->ds_label, info); | |
| 189 | if (msg && !strcmp(msg, "no disk label")) { | |
| cd29885a MD |
190 | ops = &disklabel64_ops; |
| 191 | msg = ops->op_readdisklabel(dev, sp, &sp->ds_label, info); | |
| 192 | } | |
| cd29885a MD |
193 | if (msg == NULL) { |
| 194 | if (slice != WHOLE_DISK_SLICE) | |
| 195 | ops->op_adjust_label_reserved(dp->d_slice, slice, sp); | |
| 196 | else | |
| 197 | sp->ds_reserved = 0; | |
| 198 | ||
| 199 | sp->ds_ops = ops; | |
| cd29885a MD |
200 | for (i = 0; i < ops->op_getnumparts(sp->ds_label); i++) { |
| 201 | ops->op_loadpartinfo(sp->ds_label, i, &part); | |
| cd29885a | 202 | if (part.fstype) { |
| aec8eea4 | 203 | if (reprobe && |
| 149e86b9 | 204 | (ndev = devfs_find_device_by_name("%s%c", |
| 440a040b | 205 | dev->si_name, 'a' + i)) |
| 149e86b9 MD |
206 | ) { |
| 207 | /* | |
| 208 | * Device already exists and | |
| 209 | * is still valid. | |
| 210 | */ | |
| aec8eea4 MD |
211 | ndev->si_flags |= SI_REPROBE_TEST; |
| 212 | } else { | |
| 8f960aa9 | 213 | ndev = make_dev_covering(&disk_ops, dp->d_rawdev->si_ops, |
| 149e86b9 MD |
214 | dkmakeminor(dkunit(dp->d_cdev), |
| 215 | slice, i), | |
| aec8eea4 | 216 | UID_ROOT, GID_OPERATOR, 0640, |
| 440a040b | 217 | "%s%c", dev->si_name, 'a'+ i); |
| aec8eea4 | 218 | ndev->si_disk = dp; |
| 55230951 | 219 | if (dp->d_info.d_serialno) { |
| 149e86b9 MD |
220 | make_dev_alias(ndev, |
| 221 | "serno/%s.s%d%c", | |
| 222 | dp->d_info.d_serialno, | |
| 440a040b | 223 | sno, 'a' + i); |
| 55230951 | 224 | } |
| aec8eea4 MD |
225 | ndev->si_flags |= SI_REPROBE_TEST; |
| 226 | } | |
| cd29885a MD |
227 | } |
| 228 | } | |
| 229 | } else if (info->d_dsflags & DSO_COMPATLABEL) { | |
| 230 | msg = NULL; | |
| 231 | if (sp->ds_size >= 0x100000000ULL) | |
| 232 | ops = &disklabel64_ops; | |
| 233 | else | |
| 234 | ops = &disklabel32_ops; | |
| 235 | sp->ds_label = ops->op_clone_label(info, sp); | |
| 236 | } else { | |
| dc6af901 TN |
237 | if (sp->ds_type == DOSPTYP_386BSD || /* XXX */ |
| 238 | sp->ds_type == DOSPTYP_NETBSD || | |
| 239 | sp->ds_type == DOSPTYP_OPENBSD) { | |
| cd29885a MD |
240 | log(LOG_WARNING, "%s: cannot find label (%s)\n", |
| 241 | dev->si_name, msg); | |
| 149e86b9 | 242 | } |
| cd29885a MD |
243 | } |
| 244 | ||
| 245 | if (msg == NULL) { | |
| 246 | sp->ds_wlabel = FALSE; | |
| 247 | } | |
| 248 | ||
| 249 | return (msg ? EINVAL : 0); | |
| 250 | } | |
| 251 | ||
| 666ec833 MD |
252 | /* |
| 253 | * This routine is only called for newly minted drives or to reprobe | |
| 254 | * a drive with no open slices. disk_probe_slice() is called directly | |
| 255 | * when reprobing partition changes within slices. | |
| 256 | */ | |
| cd29885a | 257 | static void |
| aec8eea4 | 258 | disk_probe(struct disk *dp, int reprobe) |
| cd29885a MD |
259 | { |
| 260 | struct disk_info *info = &dp->d_info; | |
| 261 | cdev_t dev = dp->d_cdev; | |
| 262 | cdev_t ndev; | |
| 0831f2ab | 263 | int error, i, sno; |
| 666ec833 | 264 | struct diskslices *osp; |
| 0831f2ab | 265 | struct diskslice *sp; |
| cd29885a | 266 | |
| cd29885a | 267 | KKASSERT (info->d_media_blksize != 0); |
| cd29885a | 268 | |
| 666ec833 | 269 | osp = dp->d_slice; |
| cd29885a | 270 | dp->d_slice = dsmakeslicestruct(BASE_SLICE, info); |
| 666ec833 | 271 | disk_debug(1, "disk_probe (begin): %s\n", dp->d_cdev->si_name); |
| cd29885a MD |
272 | |
| 273 | error = mbrinit(dev, info, &(dp->d_slice)); | |
| 666ec833 MD |
274 | if (error) { |
| 275 | dsgone(&osp); | |
| cd29885a | 276 | return; |
| 666ec833 | 277 | } |
| aec8eea4 | 278 | |
| 0831f2ab MD |
279 | for (i = 0; i < dp->d_slice->dss_nslices; i++) { |
| 280 | /* | |
| 281 | * Ignore the whole-disk slice, it has already been created. | |
| 282 | */ | |
| 283 | if (i == WHOLE_DISK_SLICE) | |
| 284 | continue; | |
| 285 | sp = &dp->d_slice->dss_slices[i]; | |
| 286 | ||
| 287 | /* | |
| 288 | * Handle s0. s0 is a compatibility slice if there are no | |
| 289 | * other slices and it has not otherwise been set up, else | |
| 290 | * we ignore it. | |
| 291 | */ | |
| 292 | if (i == COMPATIBILITY_SLICE) { | |
| 293 | sno = 0; | |
| 294 | if (sp->ds_type == 0 && | |
| 295 | dp->d_slice->dss_nslices == BASE_SLICE) { | |
| 296 | sp->ds_size = info->d_media_blocks; | |
| 297 | sp->ds_reserved = 0; | |
| aec8eea4 | 298 | } |
| 0831f2ab MD |
299 | } else { |
| 300 | sno = i - 1; | |
| 301 | sp->ds_reserved = 0; | |
| cd29885a | 302 | } |
| 0831f2ab MD |
303 | |
| 304 | /* | |
| 305 | * Ignore 0-length slices | |
| 306 | */ | |
| 307 | if (sp->ds_size == 0) | |
| 308 | continue; | |
| 309 | ||
| 310 | if (reprobe && | |
| 311 | (ndev = devfs_find_device_by_name("%ss%d", | |
| 312 | dev->si_name, sno))) { | |
| 313 | /* | |
| 314 | * Device already exists and is still valid | |
| 315 | */ | |
| 316 | ndev->si_flags |= SI_REPROBE_TEST; | |
| 317 | } else { | |
| 318 | /* | |
| 319 | * Else create new device | |
| 320 | */ | |
| 8f960aa9 | 321 | ndev = make_dev_covering(&disk_ops, dp->d_rawdev->si_ops, |
| aec8eea4 MD |
322 | dkmakewholeslice(dkunit(dev), i), |
| 323 | UID_ROOT, GID_OPERATOR, 0640, | |
| 0831f2ab MD |
324 | "%ss%d", dev->si_name, sno); |
| 325 | if (dp->d_info.d_serialno) { | |
| 326 | make_dev_alias(ndev, "serno/%s.s%d", | |
| 327 | dp->d_info.d_serialno, sno); | |
| cd29885a | 328 | } |
| 0831f2ab MD |
329 | ndev->si_disk = dp; |
| 330 | ndev->si_flags |= SI_REPROBE_TEST; | |
| 331 | } | |
| 332 | sp->ds_dev = ndev; | |
| 149e86b9 | 333 | |
| 1cb0bdb6 MD |
334 | /* |
| 335 | * Probe appropriate slices for a disklabel | |
| 336 | * | |
| 337 | * XXX slice type 1 used by our gpt probe code. | |
| d7d5e114 | 338 | * XXX slice type 0 used by mbr compat slice. |
| 1cb0bdb6 | 339 | */ |
| dc6af901 TN |
340 | if (sp->ds_type == DOSPTYP_386BSD || |
| 341 | sp->ds_type == DOSPTYP_NETBSD || | |
| 342 | sp->ds_type == DOSPTYP_OPENBSD || | |
| 343 | sp->ds_type == 0 || | |
| 344 | sp->ds_type == 1) { | |
| 0831f2ab MD |
345 | if (dp->d_slice->dss_first_bsd_slice == 0) |
| 346 | dp->d_slice->dss_first_bsd_slice = i; | |
| 347 | disk_probe_slice(dp, ndev, i, reprobe); | |
| cd29885a MD |
348 | } |
| 349 | } | |
| 666ec833 MD |
350 | dsgone(&osp); |
| 351 | disk_debug(1, "disk_probe (end): %s\n", dp->d_cdev->si_name); | |
| cd29885a MD |
352 | } |
| 353 | ||
| 354 | ||
| 355 | static void | |
| 356 | disk_msg_core(void *arg) | |
| 357 | { | |
| cd29885a MD |
358 | struct disk *dp; |
| 359 | struct diskslice *sp; | |
| 149e86b9 MD |
360 | disk_msg_t msg; |
| 361 | int run; | |
| cd29885a | 362 | |
| cd29885a MD |
363 | lwkt_initport_thread(&disk_msg_port, curthread); |
| 364 | wakeup(curthread); | |
| 149e86b9 | 365 | run = 1; |
| cd29885a | 366 | |
| 149e86b9 MD |
367 | while (run) { |
| 368 | msg = (disk_msg_t)lwkt_waitport(&disk_msg_port, 0); | |
| cd29885a | 369 | |
| 149e86b9 MD |
370 | switch (msg->hdr.u.ms_result) { |
| 371 | case DISK_DISK_PROBE: | |
| cd29885a | 372 | dp = (struct disk *)msg->load; |
| 8c05caab AH |
373 | disk_debug(1, |
| 374 | "DISK_DISK_PROBE: %s\n", | |
| 375 | dp->d_cdev->si_name); | |
| aec8eea4 | 376 | disk_probe(dp, 0); |
| cd29885a | 377 | break; |
| cd29885a MD |
378 | case DISK_DISK_DESTROY: |
| 379 | dp = (struct disk *)msg->load; | |
| 8c05caab AH |
380 | disk_debug(1, |
| 381 | "DISK_DISK_DESTROY: %s\n", | |
| 382 | dp->d_cdev->si_name); | |
| cd29885a MD |
383 | devfs_destroy_subnames(dp->d_cdev->si_name); |
| 384 | devfs_destroy_dev(dp->d_cdev); | |
| 3b998fa9 | 385 | lwkt_gettoken(&disklist_token); |
| aec8eea4 | 386 | LIST_REMOVE(dp, d_list); |
| 3b998fa9 | 387 | lwkt_reltoken(&disklist_token); |
| 55230951 MD |
388 | if (dp->d_info.d_serialno) { |
| 389 | kfree(dp->d_info.d_serialno, M_TEMP); | |
| 390 | dp->d_info.d_serialno = NULL; | |
| 391 | } | |
| cd29885a | 392 | break; |
| aec8eea4 MD |
393 | case DISK_UNPROBE: |
| 394 | dp = (struct disk *)msg->load; | |
| 8c05caab AH |
395 | disk_debug(1, |
| 396 | "DISK_DISK_UNPROBE: %s\n", | |
| 397 | dp->d_cdev->si_name); | |
| aec8eea4 MD |
398 | devfs_destroy_subnames(dp->d_cdev->si_name); |
| 399 | break; | |
| cd29885a MD |
400 | case DISK_SLICE_REPROBE: |
| 401 | dp = (struct disk *)msg->load; | |
| 402 | sp = (struct diskslice *)msg->load2; | |
| 149e86b9 MD |
403 | devfs_clr_subnames_flag(sp->ds_dev->si_name, |
| 404 | SI_REPROBE_TEST); | |
| 8c05caab | 405 | disk_debug(1, |
| aec8eea4 MD |
406 | "DISK_SLICE_REPROBE: %s\n", |
| 407 | sp->ds_dev->si_name); | |
| 149e86b9 MD |
408 | disk_probe_slice(dp, sp->ds_dev, |
| 409 | dkslice(sp->ds_dev), 1); | |
| 410 | devfs_destroy_subnames_without_flag( | |
| 411 | sp->ds_dev->si_name, SI_REPROBE_TEST); | |
| cd29885a | 412 | break; |
| cd29885a MD |
413 | case DISK_DISK_REPROBE: |
| 414 | dp = (struct disk *)msg->load; | |
| aec8eea4 | 415 | devfs_clr_subnames_flag(dp->d_cdev->si_name, SI_REPROBE_TEST); |
| 8c05caab | 416 | disk_debug(1, |
| aec8eea4 MD |
417 | "DISK_DISK_REPROBE: %s\n", |
| 418 | dp->d_cdev->si_name); | |
| 419 | disk_probe(dp, 1); | |
| 149e86b9 MD |
420 | devfs_destroy_subnames_without_flag( |
| 421 | dp->d_cdev->si_name, SI_REPROBE_TEST); | |
| cd29885a | 422 | break; |
| cd29885a | 423 | case DISK_SYNC: |
| 8c05caab | 424 | disk_debug(1, "DISK_SYNC\n"); |
| cd29885a | 425 | break; |
| 149e86b9 MD |
426 | default: |
| 427 | devfs_debug(DEVFS_DEBUG_WARNING, | |
| 428 | "disk_msg_core: unknown message " | |
| 429 | "received at core\n"); | |
| 430 | break; | |
| 431 | } | |
| a9177e09 | 432 | lwkt_replymsg(&msg->hdr, 0); |
| 149e86b9 | 433 | } |
| cd29885a MD |
434 | lwkt_exit(); |
| 435 | } | |
| 436 | ||
| 437 | ||
| 149e86b9 MD |
438 | /* |
| 439 | * Acts as a message drain. Any message that is replied to here gets | |
| 440 | * destroyed and the memory freed. | |
| 441 | */ | |
| cd29885a MD |
442 | static void |
| 443 | disk_msg_autofree_reply(lwkt_port_t port, lwkt_msg_t msg) | |
| 444 | { | |
| 149e86b9 | 445 | objcache_put(disk_msg_cache, msg); |
| cd29885a MD |
446 | } |
| 447 | ||
| 448 | ||
| 449 | void | |
| 450 | disk_msg_send(uint32_t cmd, void *load, void *load2) | |
| 451 | { | |
| 149e86b9 | 452 | disk_msg_t disk_msg; |
| cd29885a MD |
453 | lwkt_port_t port = &disk_msg_port; |
| 454 | ||
| 149e86b9 | 455 | disk_msg = objcache_get(disk_msg_cache, M_WAITOK); |
| cd29885a | 456 | |
| 149e86b9 | 457 | lwkt_initmsg(&disk_msg->hdr, &disk_dispose_port, 0); |
| cd29885a MD |
458 | |
| 459 | disk_msg->hdr.u.ms_result = cmd; | |
| 460 | disk_msg->load = load; | |
| 461 | disk_msg->load2 = load2; | |
| 462 | KKASSERT(port); | |
| a9177e09 | 463 | lwkt_sendmsg(port, &disk_msg->hdr); |
| cd29885a MD |
464 | } |
| 465 | ||
| aec8eea4 MD |
466 | void |
| 467 | disk_msg_send_sync(uint32_t cmd, void *load, void *load2) | |
| 468 | { | |
| 469 | struct lwkt_port rep_port; | |
| a9177e09 MD |
470 | disk_msg_t disk_msg; |
| 471 | lwkt_port_t port; | |
| aec8eea4 | 472 | |
| a9177e09 MD |
473 | disk_msg = objcache_get(disk_msg_cache, M_WAITOK); |
| 474 | port = &disk_msg_port; | |
| 475 | ||
| 476 | /* XXX could probably use curthread's built-in msgport */ | |
| aec8eea4 MD |
477 | lwkt_initport_thread(&rep_port, curthread); |
| 478 | lwkt_initmsg(&disk_msg->hdr, &rep_port, 0); | |
| 479 | ||
| 480 | disk_msg->hdr.u.ms_result = cmd; | |
| 481 | disk_msg->load = load; | |
| 482 | disk_msg->load2 = load2; | |
| 483 | ||
| a9177e09 MD |
484 | lwkt_sendmsg(port, &disk_msg->hdr); |
| 485 | lwkt_waitmsg(&disk_msg->hdr, 0); | |
| 486 | objcache_put(disk_msg_cache, disk_msg); | |
| aec8eea4 MD |
487 | } |
| 488 | ||
| 335dda38 | 489 | /* |
| fef8985e MD |
490 | * Create a raw device for the dev_ops template (which is returned). Also |
| 491 | * create a slice and unit managed disk and overload the user visible | |
| 492 | * device space with it. | |
| e4c9c0c8 | 493 | * |
| fef8985e MD |
494 | * NOTE: The returned raw device is NOT a slice and unit managed device. |
| 495 | * It is an actual raw device representing the raw disk as specified by | |
| 496 | * the passed dev_ops. The disk layer not only returns such a raw device, | |
| 497 | * it also uses it internally when passing (modified) commands through. | |
| 335dda38 | 498 | */ |
| b13267a5 | 499 | cdev_t |
| a688b15c | 500 | disk_create(int unit, struct disk *dp, struct dev_ops *raw_ops) |
| 984263bc | 501 | { |
| b13267a5 | 502 | cdev_t rawdev; |
| e4c9c0c8 | 503 | |
| 8c05caab AH |
504 | disk_debug(1, |
| 505 | "disk_create (begin): %s%d\n", | |
| 506 | raw_ops->head.name, unit); | |
| 507 | ||
| cd29885a | 508 | rawdev = make_only_dev(raw_ops, dkmakewholedisk(unit), |
| e4c9c0c8 | 509 | UID_ROOT, GID_OPERATOR, 0640, |
| fef8985e | 510 | "%s%d", raw_ops->head.name, unit); |
| 984263bc | 511 | |
| cd29885a | 512 | bzero(dp, sizeof(*dp)); |
| fef8985e | 513 | |
| e4c9c0c8 | 514 | dp->d_rawdev = rawdev; |
| fef8985e | 515 | dp->d_raw_ops = raw_ops; |
| cd29885a | 516 | dp->d_dev_ops = &disk_ops; |
| 8f960aa9 | 517 | dp->d_cdev = make_dev_covering(&disk_ops, dp->d_rawdev->si_ops, |
| 5350e1e9 | 518 | dkmakewholedisk(unit), |
| e4c9c0c8 | 519 | UID_ROOT, GID_OPERATOR, 0640, |
| cd29885a MD |
520 | "%s%d", raw_ops->head.name, unit); |
| 521 | ||
| 522 | dp->d_cdev->si_disk = dp; | |
| 523 | ||
| e02e815e AH |
524 | dsched_disk_create_callback(dp, raw_ops->head.name, unit); |
| 525 | ||
| 3b998fa9 | 526 | lwkt_gettoken(&disklist_token); |
| 984263bc | 527 | LIST_INSERT_HEAD(&disklist, dp, d_list); |
| 3b998fa9 | 528 | lwkt_reltoken(&disklist_token); |
| 8c05caab | 529 | |
| 3b998fa9 | 530 | disk_debug(1, "disk_create (end): %s%d\n", raw_ops->head.name, unit); |
| 8c05caab | 531 | |
| e4c9c0c8 | 532 | return (dp->d_rawdev); |
| 984263bc MD |
533 | } |
| 534 | ||
| aec8eea4 MD |
535 | |
| 536 | static void | |
| 537 | _setdiskinfo(struct disk *disk, struct disk_info *info) | |
| a688b15c | 538 | { |
| 55230951 MD |
539 | char *oldserialno; |
| 540 | ||
| 55230951 | 541 | oldserialno = disk->d_info.d_serialno; |
| a688b15c MD |
542 | bcopy(info, &disk->d_info, sizeof(disk->d_info)); |
| 543 | info = &disk->d_info; | |
| 544 | ||
| 8c05caab AH |
545 | disk_debug(1, |
| 546 | "_setdiskinfo: %s\n", | |
| 547 | disk->d_cdev->si_name); | |
| 548 | ||
| 55230951 MD |
549 | /* |
| 550 | * The serial number is duplicated so the caller can throw | |
| 551 | * their copy away. | |
| 552 | */ | |
| 553 | if (info->d_serialno && info->d_serialno[0]) { | |
| 554 | info->d_serialno = kstrdup(info->d_serialno, M_TEMP); | |
| fbbbca99 | 555 | disk_cleanserial(info->d_serialno); |
| 55230951 MD |
556 | if (disk->d_cdev) { |
| 557 | make_dev_alias(disk->d_cdev, "serno/%s", | |
| 558 | info->d_serialno); | |
| 559 | } | |
| 560 | } else { | |
| 561 | info->d_serialno = NULL; | |
| 562 | } | |
| 563 | if (oldserialno) | |
| 564 | kfree(oldserialno, M_TEMP); | |
| 565 | ||
| 279e9fd5 AH |
566 | dsched_disk_update_callback(disk, info); |
| 567 | ||
| 55230951 MD |
568 | /* |
| 569 | * The caller may set d_media_size or d_media_blocks and we | |
| 570 | * calculate the other. | |
| 571 | */ | |
| a688b15c MD |
572 | KKASSERT(info->d_media_size == 0 || info->d_media_blksize == 0); |
| 573 | if (info->d_media_size == 0 && info->d_media_blocks) { | |
| d7d5e114 | 574 | info->d_media_size = (u_int64_t)info->d_media_blocks * |
| a688b15c | 575 | info->d_media_blksize; |
| d7d5e114 | 576 | } else if (info->d_media_size && info->d_media_blocks == 0 && |
| a688b15c | 577 | info->d_media_blksize) { |
| d7d5e114 | 578 | info->d_media_blocks = info->d_media_size / |
| a688b15c MD |
579 | info->d_media_blksize; |
| 580 | } | |
| 5d6c6885 MD |
581 | |
| 582 | /* | |
| 583 | * The si_* fields for rawdev are not set until after the | |
| 584 | * disk_create() call, so someone using the cooked version | |
| 585 | * of the raw device (i.e. da0s0) will not get the right | |
| 586 | * si_iosize_max unless we fix it up here. | |
| 587 | */ | |
| 588 | if (disk->d_cdev && disk->d_rawdev && | |
| 589 | disk->d_cdev->si_iosize_max == 0) { | |
| 590 | disk->d_cdev->si_iosize_max = disk->d_rawdev->si_iosize_max; | |
| 591 | disk->d_cdev->si_bsize_phys = disk->d_rawdev->si_bsize_phys; | |
| 592 | disk->d_cdev->si_bsize_best = disk->d_rawdev->si_bsize_best; | |
| 593 | } | |
| aec8eea4 | 594 | } |
| cd29885a | 595 | |
| aec8eea4 MD |
596 | /* |
| 597 | * Disk drivers must call this routine when media parameters are available | |
| 598 | * or have changed. | |
| 599 | */ | |
| 600 | void | |
| 601 | disk_setdiskinfo(struct disk *disk, struct disk_info *info) | |
| 602 | { | |
| 603 | _setdiskinfo(disk, info); | |
| cd29885a | 604 | disk_msg_send(DISK_DISK_PROBE, disk, NULL); |
| 8c05caab AH |
605 | disk_debug(1, |
| 606 | "disk_setdiskinfo: sent probe for %s\n", | |
| 607 | disk->d_cdev->si_name); | |
| a688b15c MD |
608 | } |
| 609 | ||
| aec8eea4 MD |
610 | void |
| 611 | disk_setdiskinfo_sync(struct disk *disk, struct disk_info *info) | |
| 612 | { | |
| 613 | _setdiskinfo(disk, info); | |
| aec8eea4 | 614 | disk_msg_send_sync(DISK_DISK_PROBE, disk, NULL); |
| 8c05caab AH |
615 | disk_debug(1, |
| 616 | "disk_setdiskinfo_sync: sent probe for %s\n", | |
| 617 | disk->d_cdev->si_name); | |
| aec8eea4 MD |
618 | } |
| 619 | ||
| a688b15c | 620 | /* |
| e4c9c0c8 MD |
621 | * This routine is called when an adapter detaches. The higher level |
| 622 | * managed disk device is destroyed while the lower level raw device is | |
| 623 | * released. | |
| 624 | */ | |
| 335dda38 MD |
625 | void |
| 626 | disk_destroy(struct disk *disk) | |
| 627 | { | |
| 0160356d | 628 | dsched_disk_destroy_callback(disk); |
| aec8eea4 | 629 | disk_msg_send_sync(DISK_DISK_DESTROY, disk, NULL); |
| cd29885a | 630 | return; |
| 335dda38 MD |
631 | } |
| 632 | ||
| 984263bc | 633 | int |
| b24cd69c | 634 | disk_dumpcheck(cdev_t dev, u_int64_t *size, u_int64_t *blkno, u_int32_t *secsize) |
| 984263bc | 635 | { |
| a6c0f342 MD |
636 | struct partinfo pinfo; |
| 637 | int error; | |
| 984263bc | 638 | |
| a6c0f342 | 639 | bzero(&pinfo, sizeof(pinfo)); |
| 87baaf0c MD |
640 | error = dev_dioctl(dev, DIOCGPART, (void *)&pinfo, 0, |
| 641 | proc0.p_ucred, NULL); | |
| a6c0f342 MD |
642 | if (error) |
| 643 | return (error); | |
| b24cd69c | 644 | |
| a6c0f342 | 645 | if (pinfo.media_blksize == 0) |
| 984263bc | 646 | return (ENXIO); |
| b24cd69c AH |
647 | |
| 648 | if (blkno) /* XXX: make sure this reserved stuff is right */ | |
| 649 | *blkno = pinfo.reserved_blocks + | |
| 650 | pinfo.media_offset / pinfo.media_blksize; | |
| 651 | if (secsize) | |
| 652 | *secsize = pinfo.media_blksize; | |
| 653 | if (size) | |
| 654 | *size = (pinfo.media_blocks - pinfo.reserved_blocks); | |
| 655 | ||
| 984263bc | 656 | return (0); |
| 984263bc MD |
657 | } |
| 658 | ||
| b24cd69c AH |
659 | int |
| 660 | disk_dumpconf(cdev_t dev, u_int onoff) | |
| 661 | { | |
| 662 | struct dumperinfo di; | |
| 663 | u_int64_t size, blkno; | |
| 664 | u_int32_t secsize; | |
| 665 | int error; | |
| 666 | ||
| 667 | if (!onoff) | |
| 668 | return set_dumper(NULL); | |
| 669 | ||
| 670 | error = disk_dumpcheck(dev, &size, &blkno, &secsize); | |
| 671 | ||
| 672 | if (error) | |
| 673 | return ENXIO; | |
| 674 | ||
| 675 | bzero(&di, sizeof(struct dumperinfo)); | |
| 676 | di.dumper = diskdump; | |
| 677 | di.priv = dev; | |
| 678 | di.blocksize = secsize; | |
| 679 | di.mediaoffset = blkno * DEV_BSIZE; | |
| 680 | di.mediasize = size * DEV_BSIZE; | |
| 681 | ||
| 682 | return set_dumper(&di); | |
| 683 | } | |
| 684 | ||
| aec8eea4 MD |
685 | void |
| 686 | disk_unprobe(struct disk *disk) | |
| 687 | { | |
| 688 | if (disk == NULL) | |
| 689 | return; | |
| 690 | ||
| 691 | disk_msg_send_sync(DISK_UNPROBE, disk, NULL); | |
| 692 | } | |
| 693 | ||
| d7d5e114 | 694 | void |
| 984263bc MD |
695 | disk_invalidate (struct disk *disk) |
| 696 | { | |
| 666ec833 | 697 | dsgone(&disk->d_slice); |
| 984263bc MD |
698 | } |
| 699 | ||
| 984263bc MD |
700 | struct disk * |
| 701 | disk_enumerate(struct disk *disk) | |
| 702 | { | |
| be755ff9 | 703 | struct disk *dp; |
| be755ff9 | 704 | |
| 3b998fa9 | 705 | lwkt_gettoken(&disklist_token); |
| 984263bc | 706 | if (!disk) |
| be755ff9 | 707 | dp = (LIST_FIRST(&disklist)); |
| 984263bc | 708 | else |
| be755ff9 | 709 | dp = (LIST_NEXT(disk, d_list)); |
| 3b998fa9 | 710 | lwkt_reltoken(&disklist_token); |
| be755ff9 AH |
711 | |
| 712 | return dp; | |
| 984263bc MD |
713 | } |
| 714 | ||
| d7d5e114 | 715 | static |
| fbda7fa6 | 716 | int |
| 984263bc MD |
717 | sysctl_disks(SYSCTL_HANDLER_ARGS) |
| 718 | { | |
| 719 | struct disk *disk; | |
| 720 | int error, first; | |
| 721 | ||
| 722 | disk = NULL; | |
| 723 | first = 1; | |
| 724 | ||
| 725 | while ((disk = disk_enumerate(disk))) { | |
| 726 | if (!first) { | |
| 727 | error = SYSCTL_OUT(req, " ", 1); | |
| 728 | if (error) | |
| 729 | return error; | |
| 730 | } else { | |
| 731 | first = 0; | |
| 732 | } | |
| 95ce4036 HP |
733 | error = SYSCTL_OUT(req, disk->d_rawdev->si_name, |
| 734 | strlen(disk->d_rawdev->si_name)); | |
| 984263bc MD |
735 | if (error) |
| 736 | return error; | |
| 737 | } | |
| 738 | error = SYSCTL_OUT(req, "", 1); | |
| 739 | return error; | |
| 740 | } | |
| d7d5e114 | 741 | |
| 3641b7ca | 742 | SYSCTL_PROC(_kern, OID_AUTO, disks, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0, |
| 984263bc MD |
743 | sysctl_disks, "A", "names of available disks"); |
| 744 | ||
| 745 | /* | |
| e4c9c0c8 MD |
746 | * Open a disk device or partition. |
| 747 | */ | |
| fbda7fa6 MD |
748 | static |
| 749 | int | |
| fef8985e | 750 | diskopen(struct dev_open_args *ap) |
| 984263bc | 751 | { |
| b13267a5 | 752 | cdev_t dev = ap->a_head.a_dev; |
| 984263bc MD |
753 | struct disk *dp; |
| 754 | int error; | |
| 755 | ||
| e4c9c0c8 MD |
756 | /* |
| 757 | * dp can't be NULL here XXX. | |
| 7ba1363d MD |
758 | * |
| 759 | * d_slice will be NULL if setdiskinfo() has not been called yet. | |
| 760 | * setdiskinfo() is typically called whether the disk is present | |
| 761 | * or not (e.g. CD), but the base disk device is created first | |
| 762 | * and there may be a race. | |
| e4c9c0c8 | 763 | */ |
| e4c9c0c8 | 764 | dp = dev->si_disk; |
| 7ba1363d | 765 | if (dp == NULL || dp->d_slice == NULL) |
| 984263bc | 766 | return (ENXIO); |
| fef8985e | 767 | error = 0; |
| 984263bc | 768 | |
| e4c9c0c8 MD |
769 | /* |
| 770 | * Deal with open races | |
| 771 | */ | |
| 984263bc MD |
772 | while (dp->d_flags & DISKFLAG_LOCK) { |
| 773 | dp->d_flags |= DISKFLAG_WANTED; | |
| 377d4740 | 774 | error = tsleep(dp, PCATCH, "diskopen", hz); |
| 984263bc MD |
775 | if (error) |
| 776 | return (error); | |
| 777 | } | |
| 778 | dp->d_flags |= DISKFLAG_LOCK; | |
| 779 | ||
| e4c9c0c8 MD |
780 | /* |
| 781 | * Open the underlying raw device. | |
| 782 | */ | |
| 984263bc | 783 | if (!dsisopen(dp->d_slice)) { |
| e4c9c0c8 | 784 | #if 0 |
| 984263bc MD |
785 | if (!pdev->si_iosize_max) |
| 786 | pdev->si_iosize_max = dev->si_iosize_max; | |
| e4c9c0c8 | 787 | #endif |
| fef8985e MD |
788 | error = dev_dopen(dp->d_rawdev, ap->a_oflags, |
| 789 | ap->a_devtype, ap->a_cred); | |
| 984263bc | 790 | } |
| cd29885a | 791 | #if 0 |
| e4c9c0c8 MD |
792 | /* |
| 793 | * Inherit properties from the underlying device now that it is | |
| 794 | * open. | |
| 795 | */ | |
| fef8985e | 796 | dev_dclone(dev); |
| cd29885a | 797 | #endif |
| 984263bc MD |
798 | |
| 799 | if (error) | |
| 800 | goto out; | |
| a688b15c | 801 | error = dsopen(dev, ap->a_devtype, dp->d_info.d_dsflags, |
| 84f8b009 | 802 | &dp->d_slice, &dp->d_info); |
| cd29885a | 803 | if (!dsisopen(dp->d_slice)) { |
| fef8985e | 804 | dev_dclose(dp->d_rawdev, ap->a_oflags, ap->a_devtype); |
| cd29885a | 805 | } |
| d7d5e114 | 806 | out: |
| 984263bc MD |
807 | dp->d_flags &= ~DISKFLAG_LOCK; |
| 808 | if (dp->d_flags & DISKFLAG_WANTED) { | |
| 809 | dp->d_flags &= ~DISKFLAG_WANTED; | |
| 810 | wakeup(dp); | |
| 811 | } | |
| d7d5e114 | 812 | |
| 984263bc MD |
813 | return(error); |
| 814 | } | |
| 815 | ||
| e4c9c0c8 MD |
816 | /* |
| 817 | * Close a disk device or partition | |
| 818 | */ | |
| fbda7fa6 MD |
819 | static |
| 820 | int | |
| fef8985e | 821 | diskclose(struct dev_close_args *ap) |
| 984263bc | 822 | { |
| b13267a5 | 823 | cdev_t dev = ap->a_head.a_dev; |
| 984263bc MD |
824 | struct disk *dp; |
| 825 | int error; | |
| 984263bc MD |
826 | |
| 827 | error = 0; | |
| e4c9c0c8 MD |
828 | dp = dev->si_disk; |
| 829 | ||
| fef8985e | 830 | dsclose(dev, ap->a_devtype, dp->d_slice); |
| cd29885a | 831 | if (!dsisopen(dp->d_slice)) { |
| fef8985e | 832 | error = dev_dclose(dp->d_rawdev, ap->a_fflag, ap->a_devtype); |
| cd29885a | 833 | } |
| fef8985e MD |
834 | return (error); |
| 835 | } | |
| 836 | ||
| 837 | /* | |
| d7d5e114 | 838 | * First execute the ioctl on the disk device, and if it isn't supported |
| fef8985e MD |
839 | * try running it on the backing device. |
| 840 | */ | |
| 841 | static | |
| 842 | int | |
| 843 | diskioctl(struct dev_ioctl_args *ap) | |
| 844 | { | |
| b13267a5 | 845 | cdev_t dev = ap->a_head.a_dev; |
| fef8985e MD |
846 | struct disk *dp; |
| 847 | int error; | |
| b24cd69c | 848 | u_int u; |
| fef8985e MD |
849 | |
| 850 | dp = dev->si_disk; | |
| 851 | if (dp == NULL) | |
| 852 | return (ENXIO); | |
| cd29885a | 853 | |
| 149e86b9 | 854 | devfs_debug(DEVFS_DEBUG_DEBUG, |
| fcefa6f2 | 855 | "diskioctl: cmd is: %lx (name: %s)\n", |
| 149e86b9 MD |
856 | ap->a_cmd, dev->si_name); |
| 857 | devfs_debug(DEVFS_DEBUG_DEBUG, | |
| fcefa6f2 | 858 | "diskioctl: &dp->d_slice is: %p, %p\n", |
| 149e86b9 | 859 | &dp->d_slice, dp->d_slice); |
| cd29885a | 860 | |
| b24cd69c AH |
861 | if (ap->a_cmd == DIOCGKERNELDUMP) { |
| 862 | u = *(u_int *)ap->a_data; | |
| 863 | return disk_dumpconf(dev, u); | |
| 864 | } | |
| 865 | ||
| 84f8b009 MD |
866 | error = dsioctl(dev, ap->a_cmd, ap->a_data, ap->a_fflag, |
| 867 | &dp->d_slice, &dp->d_info); | |
| cd29885a | 868 | |
| fef8985e MD |
869 | if (error == ENOIOCTL) { |
| 870 | error = dev_dioctl(dp->d_rawdev, ap->a_cmd, ap->a_data, | |
| 87baaf0c | 871 | ap->a_fflag, ap->a_cred, NULL); |
| fef8985e | 872 | } |
| 984263bc MD |
873 | return (error); |
| 874 | } | |
| 875 | ||
| e4c9c0c8 MD |
876 | /* |
| 877 | * Execute strategy routine | |
| 878 | */ | |
| fbda7fa6 | 879 | static |
| fef8985e MD |
880 | int |
| 881 | diskstrategy(struct dev_strategy_args *ap) | |
| 984263bc | 882 | { |
| b13267a5 | 883 | cdev_t dev = ap->a_head.a_dev; |
| fef8985e | 884 | struct bio *bio = ap->a_bio; |
| 81b5c339 | 885 | struct bio *nbio; |
| 984263bc MD |
886 | struct disk *dp; |
| 887 | ||
| 81b5c339 | 888 | dp = dev->si_disk; |
| 984263bc | 889 | |
| e4c9c0c8 | 890 | if (dp == NULL) { |
| 81b5c339 MD |
891 | bio->bio_buf->b_error = ENXIO; |
| 892 | bio->bio_buf->b_flags |= B_ERROR; | |
| 893 | biodone(bio); | |
| fef8985e | 894 | return(0); |
| 984263bc | 895 | } |
| 81b5c339 | 896 | KKASSERT(dev->si_disk == dp); |
| 984263bc | 897 | |
| 6f76c57e HP |
898 | /* |
| 899 | * The dscheck() function will also transform the slice relative | |
| 54078292 | 900 | * block number i.e. bio->bio_offset into a block number that can be |
| 9a71d53f MD |
901 | * passed directly to the underlying raw device. If dscheck() |
| 902 | * returns NULL it will have handled the bio for us (e.g. EOF | |
| 903 | * or error due to being beyond the device size). | |
| 6f76c57e | 904 | */ |
| cd29885a | 905 | if ((nbio = dscheck(dev, bio, dp->d_slice)) != NULL) { |
| 8c72e3d5 | 906 | dsched_queue(dp, nbio); |
| cd29885a | 907 | } else { |
| 81b5c339 | 908 | biodone(bio); |
| cd29885a | 909 | } |
| fef8985e | 910 | return(0); |
| 984263bc MD |
911 | } |
| 912 | ||
| 335dda38 | 913 | /* |
| fef8985e | 914 | * Return the partition size in ?blocks? |
| 335dda38 | 915 | */ |
| fbda7fa6 MD |
916 | static |
| 917 | int | |
| fef8985e | 918 | diskpsize(struct dev_psize_args *ap) |
| 984263bc | 919 | { |
| b13267a5 | 920 | cdev_t dev = ap->a_head.a_dev; |
| 984263bc | 921 | struct disk *dp; |
| 984263bc | 922 | |
| e4c9c0c8 MD |
923 | dp = dev->si_disk; |
| 924 | if (dp == NULL) | |
| fef8985e MD |
925 | return(ENODEV); |
| 926 | ap->a_result = dssize(dev, &dp->d_slice); | |
| 927 | return(0); | |
| 984263bc MD |
928 | } |
| 929 | ||
| e4c9c0c8 | 930 | /* |
| fef8985e MD |
931 | * When new device entries are instantiated, make sure they inherit our |
| 932 | * si_disk structure and block and iosize limits from the raw device. | |
| e4c9c0c8 | 933 | * |
| d7d5e114 | 934 | * This routine is always called synchronously in the context of the |
| fef8985e MD |
935 | * client. |
| 936 | * | |
| 937 | * XXX The various io and block size constraints are not always initialized | |
| 938 | * properly by devices. | |
| e4c9c0c8 | 939 | */ |
| fbda7fa6 MD |
940 | static |
| 941 | int | |
| fef8985e | 942 | diskclone(struct dev_clone_args *ap) |
| 984263bc | 943 | { |
| b13267a5 | 944 | cdev_t dev = ap->a_head.a_dev; |
| 984263bc | 945 | struct disk *dp; |
| aec8eea4 MD |
946 | dp = dev->si_disk; |
| 947 | ||
| fef8985e MD |
948 | KKASSERT(dp != NULL); |
| 949 | dev->si_disk = dp; | |
| 950 | dev->si_iosize_max = dp->d_rawdev->si_iosize_max; | |
| 951 | dev->si_bsize_phys = dp->d_rawdev->si_bsize_phys; | |
| 952 | dev->si_bsize_best = dp->d_rawdev->si_bsize_best; | |
| 953 | return(0); | |
| 954 | } | |
| 955 | ||
| 956 | int | |
| 957 | diskdump(struct dev_dump_args *ap) | |
| 958 | { | |
| b13267a5 | 959 | cdev_t dev = ap->a_head.a_dev; |
| aec8eea4 | 960 | struct disk *dp = dev->si_disk; |
| b24cd69c | 961 | u_int64_t size, offset; |
| fef8985e MD |
962 | int error; |
| 963 | ||
| b24cd69c AH |
964 | error = disk_dumpcheck(dev, &size, &ap->a_blkno, &ap->a_secsize); |
| 965 | /* XXX: this should probably go in disk_dumpcheck somehow */ | |
| 966 | if (ap->a_length != 0) { | |
| 967 | size *= DEV_BSIZE; | |
| 968 | offset = ap->a_blkno * DEV_BSIZE; | |
| 969 | if ((ap->a_offset < offset) || | |
| 970 | (ap->a_offset + ap->a_length - offset > size)) { | |
| 971 | kprintf("Attempt to write outside dump device boundaries.\n"); | |
| 972 | error = ENOSPC; | |
| 973 | } | |
| 974 | } | |
| 975 | ||
| fef8985e MD |
976 | if (error == 0) { |
| 977 | ap->a_head.a_dev = dp->d_rawdev; | |
| 978 | error = dev_doperate(&ap->a_head); | |
| 984263bc | 979 | } |
| fef8985e MD |
980 | |
| 981 | return(error); | |
| 984263bc MD |
982 | } |
| 983 | ||
| fef8985e | 984 | |
| d7d5e114 | 985 | SYSCTL_INT(_debug_sizeof, OID_AUTO, diskslices, CTLFLAG_RD, |
| 984263bc MD |
986 | 0, sizeof(struct diskslices), "sizeof(struct diskslices)"); |
| 987 | ||
| d7d5e114 | 988 | SYSCTL_INT(_debug_sizeof, OID_AUTO, disk, CTLFLAG_RD, |
| 984263bc | 989 | 0, sizeof(struct disk), "sizeof(struct disk)"); |
| 7a9e53ad | 990 | |
| ef548879 | 991 | /* |
| 4afeea0d MD |
992 | * Reorder interval for burst write allowance and minor write |
| 993 | * allowance. | |
| 994 | * | |
| 995 | * We always want to trickle some writes in to make use of the | |
| 996 | * disk's zone cache. Bursting occurs on a longer interval and only | |
| 997 | * runningbufspace is well over the hirunningspace limit. | |
| ef548879 | 998 | */ |
| 4afeea0d MD |
999 | int bioq_reorder_burst_interval = 60; /* should be multiple of minor */ |
| 1000 | SYSCTL_INT(_kern, OID_AUTO, bioq_reorder_burst_interval, | |
| 1001 | CTLFLAG_RW, &bioq_reorder_burst_interval, 0, ""); | |
| 1002 | int bioq_reorder_minor_interval = 5; | |
| 1003 | SYSCTL_INT(_kern, OID_AUTO, bioq_reorder_minor_interval, | |
| 1004 | CTLFLAG_RW, &bioq_reorder_minor_interval, 0, ""); | |
| 1005 | ||
| 1006 | int bioq_reorder_burst_bytes = 3000000; | |
| 1007 | SYSCTL_INT(_kern, OID_AUTO, bioq_reorder_burst_bytes, | |
| 1008 | CTLFLAG_RW, &bioq_reorder_burst_bytes, 0, ""); | |
| 1009 | int bioq_reorder_minor_bytes = 262144; | |
| 1010 | SYSCTL_INT(_kern, OID_AUTO, bioq_reorder_minor_bytes, | |
| 1011 | CTLFLAG_RW, &bioq_reorder_minor_bytes, 0, ""); | |
| ef548879 | 1012 | |
| 7a9e53ad MD |
1013 | |
| 1014 | /* | |
| 30e5862e MD |
1015 | * Order I/Os. Generally speaking this code is designed to make better |
| 1016 | * use of drive zone caches. A drive zone cache can typically track linear | |
| 1017 | * reads or writes for around 16 zones simultaniously. | |
| 7a9e53ad | 1018 | * |
| 30e5862e MD |
1019 | * Read prioritization issues: It is possible for hundreds of megabytes worth |
| 1020 | * of writes to be queued asynchronously. This creates a huge bottleneck | |
| 1021 | * for reads which reduce read bandwidth to a trickle. | |
| 7a9e53ad | 1022 | * |
| 4afeea0d MD |
1023 | * To solve this problem we generally reorder reads before writes. |
| 1024 | * | |
| 1025 | * However, a large number of random reads can also starve writes and | |
| 1026 | * make poor use of the drive zone cache so we allow writes to trickle | |
| 1027 | * in every N reads. | |
| 7a9e53ad MD |
1028 | */ |
| 1029 | void | |
| 81b5c339 | 1030 | bioqdisksort(struct bio_queue_head *bioq, struct bio *bio) |
| 7a9e53ad | 1031 | { |
| 4afeea0d MD |
1032 | /* |
| 1033 | * The BIO wants to be ordered. Adding to the tail also | |
| 1034 | * causes transition to be set to NULL, forcing the ordering | |
| 1035 | * of all prior I/O's. | |
| 1036 | */ | |
| 1037 | if (bio->bio_buf->b_flags & B_ORDERED) { | |
| 1038 | bioq_insert_tail(bioq, bio); | |
| 1039 | return; | |
| 1040 | } | |
| 1041 | ||
| 30e5862e MD |
1042 | switch(bio->bio_buf->b_cmd) { |
| 1043 | case BUF_CMD_READ: | |
| 1044 | if (bioq->transition) { | |
| 7a9e53ad | 1045 | /* |
| 4afeea0d MD |
1046 | * Insert before the first write. Bleedover writes |
| 1047 | * based on reorder intervals to prevent starvation. | |
| 7a9e53ad | 1048 | */ |
| 30e5862e | 1049 | TAILQ_INSERT_BEFORE(bioq->transition, bio, bio_act); |
| 4afeea0d MD |
1050 | ++bioq->reorder; |
| 1051 | if (bioq->reorder % bioq_reorder_minor_interval == 0) { | |
| 30e5862e | 1052 | bioqwritereorder(bioq); |
| 4afeea0d MD |
1053 | if (bioq->reorder >= |
| 1054 | bioq_reorder_burst_interval) { | |
| 1055 | bioq->reorder = 0; | |
| 1056 | } | |
| 7a9e53ad MD |
1057 | } |
| 1058 | } else { | |
| 7a9e53ad | 1059 | /* |
| 30e5862e MD |
1060 | * No writes queued (or ordering was forced), |
| 1061 | * insert at tail. | |
| 7a9e53ad | 1062 | */ |
| 30e5862e | 1063 | TAILQ_INSERT_TAIL(&bioq->queue, bio, bio_act); |
| 7a9e53ad | 1064 | } |
| 30e5862e MD |
1065 | break; |
| 1066 | case BUF_CMD_WRITE: | |
| 1067 | /* | |
| 1068 | * Writes are always appended. If no writes were previously | |
| 1069 | * queued or an ordered tail insertion occured the transition | |
| 1070 | * field will be NULL. | |
| 1071 | */ | |
| 1072 | TAILQ_INSERT_TAIL(&bioq->queue, bio, bio_act); | |
| 1073 | if (bioq->transition == NULL) | |
| 1074 | bioq->transition = bio; | |
| 1075 | break; | |
| 1076 | default: | |
| 1077 | /* | |
| 1078 | * All other request types are forced to be ordered. | |
| 1079 | */ | |
| 1080 | bioq_insert_tail(bioq, bio); | |
| 4afeea0d | 1081 | break; |
| 7a9e53ad | 1082 | } |
| 30e5862e | 1083 | } |
| 7a9e53ad | 1084 | |
| 30e5862e | 1085 | /* |
| 4afeea0d MD |
1086 | * Move the read-write transition point to prevent reads from |
| 1087 | * completely starving our writes. This brings a number of writes into | |
| 30e5862e | 1088 | * the fold every N reads. |
| 4afeea0d MD |
1089 | * |
| 1090 | * We bring a few linear writes into the fold on a minor interval | |
| 1091 | * and we bring a non-linear burst of writes into the fold on a major | |
| 1092 | * interval. Bursting only occurs if runningbufspace is really high | |
| 1093 | * (typically from syncs, fsyncs, or HAMMER flushes). | |
| 30e5862e MD |
1094 | */ |
| 1095 | static | |
| 1096 | void | |
| 1097 | bioqwritereorder(struct bio_queue_head *bioq) | |
| 1098 | { | |
| 1099 | struct bio *bio; | |
| 1100 | off_t next_offset; | |
| 4afeea0d | 1101 | size_t left; |
| 30e5862e | 1102 | size_t n; |
| 4afeea0d MD |
1103 | int check_off; |
| 1104 | ||
| 1105 | if (bioq->reorder < bioq_reorder_burst_interval || | |
| 1106 | !buf_runningbufspace_severe()) { | |
| 1107 | left = (size_t)bioq_reorder_minor_bytes; | |
| 1108 | check_off = 1; | |
| 1109 | } else { | |
| 1110 | left = (size_t)bioq_reorder_burst_bytes; | |
| 1111 | check_off = 0; | |
| 1112 | } | |
| 30e5862e MD |
1113 | |
| 1114 | next_offset = bioq->transition->bio_offset; | |
| 1115 | while ((bio = bioq->transition) != NULL && | |
| 4afeea0d MD |
1116 | (check_off == 0 || next_offset == bio->bio_offset) |
| 1117 | ) { | |
| 30e5862e MD |
1118 | n = bio->bio_buf->b_bcount; |
| 1119 | next_offset = bio->bio_offset + n; | |
| 1120 | bioq->transition = TAILQ_NEXT(bio, bio_act); | |
| 1121 | if (left < n) | |
| 7a9e53ad | 1122 | break; |
| 30e5862e | 1123 | left -= n; |
| 7a9e53ad | 1124 | } |
| 7a9e53ad MD |
1125 | } |
| 1126 | ||
| 7a9e53ad MD |
1127 | /* |
| 1128 | * Disk error is the preface to plaintive error messages | |
| 1129 | * about failing disk transfers. It prints messages of the form | |
| 1130 | ||
| 1131 | hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d) | |
| 1132 | ||
| 1133 | * if the offset of the error in the transfer and a disk label | |
| 1134 | * are both available. blkdone should be -1 if the position of the error | |
| 1135 | * is unknown; the disklabel pointer may be null from drivers that have not | |
| 6ea70f76 | 1136 | * been converted to use them. The message is printed with kprintf |
| 7a9e53ad | 1137 | * if pri is LOG_PRINTF, otherwise it uses log at the specified priority. |
| 6ea70f76 | 1138 | * The message should be completed (with at least a newline) with kprintf |
| a0a36cfd | 1139 | * or log(-1, ...), respectively. There is no trailing space. |
| 7a9e53ad MD |
1140 | */ |
| 1141 | void | |
| a688b15c | 1142 | diskerr(struct bio *bio, cdev_t dev, const char *what, int pri, int donecnt) |
| 7a9e53ad | 1143 | { |
| 81b5c339 | 1144 | struct buf *bp = bio->bio_buf; |
| c6f49b01 MD |
1145 | const char *term; |
| 1146 | ||
| 1147 | switch(bp->b_cmd) { | |
| 1148 | case BUF_CMD_READ: | |
| 1149 | term = "read"; | |
| 1150 | break; | |
| 1151 | case BUF_CMD_WRITE: | |
| 1152 | term = "write"; | |
| 1153 | break; | |
| 1154 | default: | |
| 1155 | term = "access"; | |
| 1156 | break; | |
| 1157 | } | |
| cd29885a | 1158 | kprintf("%s: %s %sing ", dev->si_name, what, term); |
| 973c11b9 MD |
1159 | kprintf("offset %012llx for %d", |
| 1160 | (long long)bio->bio_offset, | |
| 1161 | bp->b_bcount); | |
| cd29885a | 1162 | |
| 54078292 | 1163 | if (donecnt) |
| 6ea70f76 | 1164 | kprintf(" (%d bytes completed)", donecnt); |
| 7a9e53ad | 1165 | } |
| 81b5c339 | 1166 | |
| a8873631 MD |
1167 | /* |
| 1168 | * Locate a disk device | |
| 1169 | */ | |
| 1170 | cdev_t | |
| 1171 | disk_locate(const char *devname) | |
| 1172 | { | |
| cd29885a MD |
1173 | return devfs_find_device_by_name(devname); |
| 1174 | } | |
| a8873631 | 1175 | |
| cd29885a MD |
1176 | void |
| 1177 | disk_config(void *arg) | |
| 1178 | { | |
| aec8eea4 | 1179 | disk_msg_send_sync(DISK_SYNC, NULL, NULL); |
| cd29885a MD |
1180 | } |
| 1181 | ||
| cd29885a MD |
1182 | static void |
| 1183 | disk_init(void) | |
| 1184 | { | |
| 1185 | struct thread* td_core; | |
| cd29885a | 1186 | |
| 149e86b9 MD |
1187 | disk_msg_cache = objcache_create("disk-msg-cache", 0, 0, |
| 1188 | NULL, NULL, NULL, | |
| 1189 | objcache_malloc_alloc, | |
| 1190 | objcache_malloc_free, | |
| 1191 | &disk_msg_malloc_args); | |
| cd29885a | 1192 | |
| 3b998fa9 | 1193 | lwkt_token_init(&disklist_token, 1); |
| be755ff9 | 1194 | |
| 149e86b9 MD |
1195 | /* |
| 1196 | * Initialize the reply-only port which acts as a message drain | |
| 1197 | */ | |
| cd29885a MD |
1198 | lwkt_initport_replyonly(&disk_dispose_port, disk_msg_autofree_reply); |
| 1199 | ||
| 1200 | lwkt_create(disk_msg_core, /*args*/NULL, &td_core, NULL, | |
| 1201 | 0, 0, "disk_msg_core"); | |
| 1202 | ||
| 1203 | tsleep(td_core, 0, "diskcore", 0); | |
| 1204 | } | |
| 1205 | ||
| cd29885a MD |
1206 | static void |
| 1207 | disk_uninit(void) | |
| 1208 | { | |
| cd29885a | 1209 | objcache_destroy(disk_msg_cache); |
| a8873631 MD |
1210 | } |
| 1211 | ||
| fbbbca99 MD |
1212 | /* |
| 1213 | * Clean out illegal characters in serial numbers. | |
| 1214 | */ | |
| 1215 | static void | |
| 1216 | disk_cleanserial(char *serno) | |
| 1217 | { | |
| 1218 | char c; | |
| 1219 | ||
| 1220 | while ((c = *serno) != 0) { | |
| 1221 | if (c >= 'a' && c <= 'z') | |
| 1222 | ; | |
| 1223 | else if (c >= 'A' && c <= 'Z') | |
| 1224 | ; | |
| 1225 | else if (c >= '0' && c <= '9') | |
| 1226 | ; | |
| 1227 | else if (c == '-' || c == '@' || c == '+' || c == '.') | |
| 1228 | ; | |
| 1229 | else | |
| 1230 | c = '_'; | |
| 1231 | *serno++= c; | |
| 1232 | } | |
| 1233 | } | |
| 1234 | ||
| 8c05caab AH |
1235 | TUNABLE_INT("kern.disk_debug", &disk_debug_enable); |
| 1236 | SYSCTL_INT(_kern, OID_AUTO, disk_debug, CTLFLAG_RW, &disk_debug_enable, | |
| 1237 | 0, "Enable subr_disk debugging"); | |
| 1238 | ||
| cd29885a MD |
1239 | SYSINIT(disk_register, SI_SUB_PRE_DRIVERS, SI_ORDER_FIRST, disk_init, NULL); |
| 1240 | SYSUNINIT(disk_register, SI_SUB_PRE_DRIVERS, SI_ORDER_ANY, disk_uninit, NULL); |