2 * Copyright (c) 2012 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
38 #include <sys/sysctl.h>
41 #include <sys/disklabel.h>
42 #include <sys/disklabel32.h>
43 #include <sys/disklabel64.h>
44 #include <sys/diskslice.h>
45 #include <sys/diskmbr.h>
47 #include <sys/malloc.h>
48 #include <sys/device.h>
49 #include <sys/devfs.h>
50 #include <sys/thread.h>
51 #include <sys/queue.h>
58 #include <sys/mplock2.h>
59 #include <sys/msgport2.h>
60 #include <sys/thread2.h>
73 static MALLOC_DEFINE(M_DMSG_DISK, "dmsg_disk", "disk dmsg");
75 static int blk_active;
76 SYSCTL_INT(_debug, OID_AUTO, blk_active, CTLFLAG_RW, &blk_active, 0,
77 "Number of active iocom IOs");
79 static int disk_iocom_reconnect(struct disk *dp, struct file *fp);
80 static int disk_rcvdmsg(kdmsg_msg_t *msg);
82 static void disk_blk_open(struct disk *dp, kdmsg_msg_t *msg);
83 static void disk_blk_read(struct disk *dp, kdmsg_msg_t *msg);
84 static void disk_blk_write(struct disk *dp, kdmsg_msg_t *msg);
85 static void disk_blk_flush(struct disk *dp, kdmsg_msg_t *msg);
86 static void disk_blk_freeblks(struct disk *dp, kdmsg_msg_t *msg);
87 static void diskiodone(struct bio *bio);
90 disk_iocom_init(struct disk *dp)
92 kdmsg_iocom_init(&dp->d_iocom, dp,
93 KDMSG_IOCOMF_AUTOCONN |
94 KDMSG_IOCOMF_AUTORXSPAN |
95 KDMSG_IOCOMF_AUTOTXSPAN,
96 M_DMSG_DISK, disk_rcvdmsg);
100 disk_iocom_update(struct disk *dp)
105 disk_iocom_uninit(struct disk *dp)
107 kdmsg_iocom_uninit(&dp->d_iocom);
111 disk_iocom_ioctl(struct disk *dp, int cmd, void *data)
114 struct disk_ioc_recluster *recl;
120 fp = holdfp(curproc->p_fd, recl->fd, -1);
122 error = disk_iocom_reconnect(dp, fp);
136 disk_iocom_reconnect(struct disk *dp, struct file *fp)
140 ksnprintf(devname, sizeof(devname), "%s%d",
141 dev_dname(dp->d_rawdev), dkunit(dp->d_rawdev));
143 kdmsg_iocom_reconnect(&dp->d_iocom, fp, devname);
145 dp->d_iocom.auto_lnk_conn.pfs_type = DMSG_PFSTYPE_SERVER;
146 dp->d_iocom.auto_lnk_conn.proto_version = DMSG_SPAN_PROTO_1;
147 dp->d_iocom.auto_lnk_conn.peer_type = DMSG_PEER_BLOCK;
148 dp->d_iocom.auto_lnk_conn.peer_mask = 1LLU << DMSG_PEER_BLOCK;
149 dp->d_iocom.auto_lnk_conn.pfs_mask = (uint64_t)-1;
150 ksnprintf(dp->d_iocom.auto_lnk_conn.cl_label,
151 sizeof(dp->d_iocom.auto_lnk_conn.cl_label),
152 "%s/%s", hostname, devname);
153 if (dp->d_info.d_serialno) {
154 ksnprintf(dp->d_iocom.auto_lnk_conn.fs_label,
155 sizeof(dp->d_iocom.auto_lnk_conn.fs_label),
156 "%s", dp->d_info.d_serialno);
159 dp->d_iocom.auto_lnk_span.pfs_type = DMSG_PFSTYPE_SERVER;
160 dp->d_iocom.auto_lnk_span.proto_version = DMSG_SPAN_PROTO_1;
161 dp->d_iocom.auto_lnk_span.peer_type = DMSG_PEER_BLOCK;
162 dp->d_iocom.auto_lnk_span.media.block.bytes =
163 dp->d_info.d_media_size;
164 dp->d_iocom.auto_lnk_span.media.block.blksize =
165 dp->d_info.d_media_blksize;
166 ksnprintf(dp->d_iocom.auto_lnk_span.cl_label,
167 sizeof(dp->d_iocom.auto_lnk_span.cl_label),
168 "%s/%s", hostname, devname);
169 if (dp->d_info.d_serialno) {
170 ksnprintf(dp->d_iocom.auto_lnk_span.fs_label,
171 sizeof(dp->d_iocom.auto_lnk_span.fs_label),
172 "%s", dp->d_info.d_serialno);
175 kdmsg_iocom_autoinitiate(&dp->d_iocom, NULL);
181 disk_rcvdmsg(kdmsg_msg_t *msg)
183 struct disk *dp = msg->state->iocom->handle;
186 * Handle debug messages (these might not be in transactions)
188 switch(msg->any.head.cmd & DMSGF_CMDSWMASK) {
191 * Execute shell command (not supported atm)
193 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
195 case DMSG_DBG_SHELL | DMSGF_REPLY:
197 msg->aux_data[msg->aux_size - 1] = 0;
198 kprintf("diskiocom: DEBUGMSG: %s\n", msg->aux_data);
204 * All remaining messages must be in a transaction.
206 * NOTE! We currently don't care if the transaction is just
207 * the span transaction (for disk probes) or if it is the
208 * BLK_OPEN transaction.
210 * NOTE! We are switching on the first message's command. The
211 * actual message command within the transaction may be
212 * different (if streaming within a transaction).
214 if (msg->state == &msg->state->iocom->state0) {
215 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
219 switch(msg->state->rxcmd & DMSGF_CMDSWMASK) {
221 disk_blk_open(dp, msg);
225 * not reached normally but leave in for completeness
227 disk_blk_read(dp, msg);
230 disk_blk_write(dp, msg);
233 disk_blk_flush(dp, msg);
235 case DMSG_BLK_FREEBLKS:
236 disk_blk_freeblks(dp, msg);
239 if ((msg->any.head.cmd & DMSGF_REPLY) == 0) {
240 if (msg->any.head.cmd & DMSGF_DELETE)
241 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
243 kdmsg_msg_result(msg, DMSG_ERR_NOSUPP);
252 disk_blk_open(struct disk *dp, kdmsg_msg_t *msg)
254 struct dios_open *openst;
255 int error = DMSG_ERR_NOSUPP;
258 openst = msg->state->any.any;
259 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_OPEN) {
260 if (openst == NULL) {
261 openst = kmalloc(sizeof(*openst), M_DEVBUF,
263 msg->state->any.any = openst;
266 if (msg->any.blk_open.modes & DMSG_BLKOPEN_RD)
268 if (msg->any.blk_open.modes & DMSG_BLKOPEN_WR)
270 error = dev_dopen(dp->d_rawdev, fflags, S_IFCHR, proc0.p_ucred, NULL);
274 if (msg->any.blk_open.modes & DMSG_BLKOPEN_RD)
276 if (msg->any.blk_open.modes & DMSG_BLKOPEN_WR)
281 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_CLOSE &&
284 if ((msg->any.blk_open.modes & DMSG_BLKOPEN_RD) &&
288 if ((msg->any.blk_open.modes & DMSG_BLKOPEN_WR) &&
292 error = dev_dclose(dp->d_rawdev, fflags, S_IFCHR, NULL);
296 if (msg->any.blk_open.modes & DMSG_BLKOPEN_RD)
298 if (msg->any.blk_open.modes & DMSG_BLKOPEN_WR)
303 if (msg->any.head.cmd & DMSGF_DELETE) {
305 while (openst->openrd && openst->openwr) {
308 dev_dclose(dp->d_rawdev, FREAD|FWRITE, S_IFCHR, NULL);
310 while (openst->openrd) {
312 dev_dclose(dp->d_rawdev, FREAD, S_IFCHR, NULL);
314 while (openst->openwr) {
316 dev_dclose(dp->d_rawdev, FWRITE, S_IFCHR, NULL);
318 kfree(openst, M_DEVBUF);
319 msg->state->any.any = NULL;
321 kdmsg_msg_reply(msg, error);
323 kdmsg_msg_result(msg, error);
329 disk_blk_read(struct disk *dp, kdmsg_msg_t *msg)
331 struct dios_io *iost;
334 int error = DMSG_ERR_NOSUPP;
338 * Only DMSG_BLK_READ commands imply read ops.
340 iost = msg->state->any.any;
341 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_READ) {
342 if (msg->any.blk_read.bytes < DEV_BSIZE ||
343 msg->any.blk_read.bytes > MAXPHYS) {
344 error = DMSG_ERR_PARAM;
348 iost = kmalloc(sizeof(*iost), M_DEVBUF,
350 msg->state->any.any = iost;
353 bp = geteblk(msg->any.blk_read.bytes);
355 bp->b_cmd = BUF_CMD_READ;
356 bp->b_bcount = msg->any.blk_read.bytes;
357 bp->b_resid = bp->b_bcount;
358 bio->bio_offset = msg->any.blk_read.offset;
359 bio->bio_caller_info1.ptr = msg->state;
360 bio->bio_done = diskiodone;
362 /* kdmsg_state_hold(msg->state); */
363 atomic_add_int(&blk_active, 1);
364 atomic_add_int(&iost->count, 1);
365 if (msg->any.head.cmd & DMSGF_DELETE)
368 dev_dstrategy(dp->d_rawdev, bio);
372 if (msg->any.head.cmd & DMSGF_DELETE) {
373 if (iost && iost->count == 0) {
374 kfree(iost, M_DEVBUF);
375 msg->state->any.any = NULL;
377 kdmsg_msg_reply(msg, error);
379 kdmsg_msg_result(msg, error);
386 disk_blk_write(struct disk *dp, kdmsg_msg_t *msg)
388 struct dios_io *iost;
391 int error = DMSG_ERR_NOSUPP;
395 * Only DMSG_BLK_WRITE commands imply read ops.
397 iost = msg->state->any.any;
398 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_WRITE) {
399 if (msg->any.blk_write.bytes < DEV_BSIZE ||
400 msg->any.blk_write.bytes > MAXPHYS) {
401 error = DMSG_ERR_PARAM;
405 iost = kmalloc(sizeof(*iost), M_DEVBUF,
407 msg->state->any.any = iost;
411 * Issue WRITE. Short data implies zeros. Try to optimize
412 * the buffer cache buffer for the case where we can just
413 * use the message's data pointer.
416 if (msg->aux_size >= msg->any.blk_write.bytes)
419 bp = geteblk(msg->any.blk_write.bytes);
421 bp->b_cmd = BUF_CMD_WRITE;
422 bp->b_bcount = msg->any.blk_write.bytes;
423 bp->b_resid = bp->b_bcount;
424 if (msg->aux_size >= msg->any.blk_write.bytes) {
425 bp->b_data = msg->aux_data;
426 kdmsg_detach_aux_data(msg, &iost->data);
428 bcopy(msg->aux_data, bp->b_data, msg->aux_size);
429 bzero(bp->b_data + msg->aux_size,
430 msg->any.blk_write.bytes - msg->aux_size);
431 bzero(&iost->data, sizeof(iost->data));
433 bio->bio_offset = msg->any.blk_write.offset;
434 bio->bio_caller_info1.ptr = msg->state;
435 bio->bio_done = diskiodone;
437 /* kdmsg_state_hold(msg->state); */
438 atomic_add_int(&blk_active, 1);
439 atomic_add_int(&iost->count, 1);
440 if (msg->any.head.cmd & DMSGF_DELETE)
443 dev_dstrategy(dp->d_rawdev, bio);
447 if (msg->any.head.cmd & DMSGF_DELETE) {
448 if (iost && iost->count == 0) {
449 kfree(iost, M_DEVBUF);
450 msg->state->any.any = NULL;
452 kdmsg_msg_reply(msg, error);
454 kdmsg_msg_result(msg, error);
461 disk_blk_flush(struct disk *dp, kdmsg_msg_t *msg)
463 struct dios_io *iost;
466 int error = DMSG_ERR_NOSUPP;
470 * Only DMSG_BLK_FLUSH commands imply read ops.
472 iost = msg->state->any.any;
473 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_FLUSH) {
475 iost = kmalloc(sizeof(*iost), M_DEVBUF,
477 msg->state->any.any = iost;
482 bp->b_cmd = BUF_CMD_FLUSH;
483 bp->b_bcount = msg->any.blk_flush.bytes;
485 bio->bio_offset = msg->any.blk_flush.offset;
486 bio->bio_caller_info1.ptr = msg->state;
487 bio->bio_done = diskiodone;
489 /* kdmsg_state_hold(msg->state); */
490 atomic_add_int(&blk_active, 1);
491 atomic_add_int(&iost->count, 1);
492 if (msg->any.head.cmd & DMSGF_DELETE)
495 dev_dstrategy(dp->d_rawdev, bio);
498 if (msg->any.head.cmd & DMSGF_DELETE) {
499 if (iost && iost->count == 0) {
500 kfree(iost, M_DEVBUF);
501 msg->state->any.any = NULL;
503 kdmsg_msg_reply(msg, error);
505 kdmsg_msg_result(msg, error);
512 disk_blk_freeblks(struct disk *dp, kdmsg_msg_t *msg)
514 struct dios_io *iost;
517 int error = DMSG_ERR_NOSUPP;
521 * Only DMSG_BLK_FREEBLKS commands imply read ops.
523 iost = msg->state->any.any;
524 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_FREEBLKS) {
526 iost = kmalloc(sizeof(*iost), M_DEVBUF,
528 msg->state->any.any = iost;
533 bp->b_cmd = BUF_CMD_FREEBLKS;
534 bp->b_bcount = msg->any.blk_freeblks.bytes;
536 bio->bio_offset = msg->any.blk_freeblks.offset;
537 bio->bio_caller_info1.ptr = msg->state;
538 bio->bio_done = diskiodone;
540 /* kdmsg_state_hold(msg->state); */
541 atomic_add_int(&blk_active, 1);
542 atomic_add_int(&iost->count, 1);
543 if (msg->any.head.cmd & DMSGF_DELETE)
546 dev_dstrategy(dp->d_rawdev, bio);
549 if (msg->any.head.cmd & DMSGF_DELETE) {
550 if (iost && iost->count == 0) {
551 kfree(iost, M_DEVBUF);
552 msg->state->any.any = NULL;
554 kdmsg_msg_reply(msg, error);
556 kdmsg_msg_result(msg, error);
563 diskiodone(struct bio *bio)
565 struct buf *bp = bio->bio_buf;
566 kdmsg_state_t *state = bio->bio_caller_info1.ptr;
568 struct dios_io *iost = state->any.any;
575 cmd = DMSG_LNK_ERROR;
581 cmd = DMSG_LNK_ERROR;
583 bytes = bp->b_bcount;
586 if (bp->b_flags & B_ERROR) {
592 kdmsg_free_aux_data(&iost->data);
595 case BUF_CMD_FREEBLKS:
596 if (bp->b_flags & B_ERROR)
602 panic("diskiodone: Unknown bio cmd = %d\n",
603 bio->bio_buf->b_cmd);
604 error = 0; /* avoid compiler warning */
605 break; /* NOT REACHED */
609 * Convert error to DMSG_ERR_* code.
615 * Convert LNK_ERROR or BLK_ERROR if non-zero resid. READS will
616 * have already converted cmd to BLK_ERROR and set up data to return.
618 if (resid && cmd == DMSG_LNK_ERROR)
619 cmd = DMSG_BLK_ERROR;
620 /* XXX txcmd is delayed so this won't work for streaming */
621 if ((state->txcmd & DMSGF_CREATE) == 0) /* assume serialized */
624 if (atomic_fetchadd_int(&iost->count, -1) == 1)
627 atomic_add_int(&iost->count, -1);
629 atomic_add_int(&blk_active, -1);
633 * Allocate a basic or extended reply. Be careful not to populate
634 * extended header fields unless we allocated an extended reply.
636 rmsg = kdmsg_msg_alloc(state, cmd, NULL, 0);
638 rmsg->aux_data = kmalloc(bytes, state->iocom->mmsg, M_INTWAIT);
639 rmsg->aux_size = bytes;
640 rmsg->flags |= KDMSG_FLAG_AUXALLOC;
641 bcopy(data, rmsg->aux_data, bytes);
643 rmsg->any.blk_error.head.error = error;
644 if ((cmd & DMSGF_BASECMDMASK) == DMSG_BLK_ERROR)
645 rmsg->any.blk_error.resid = resid;
646 bio->bio_caller_info1.ptr = NULL;
647 /* kdmsg_state_drop(state); */
648 kdmsg_msg_write(rmsg);
649 if (bp->b_flags & B_PAGING) {
650 relpbuf(bio->bio_buf, NULL);
652 bp->b_flags |= B_INVAL | B_AGE;