2 * Copyright (c) 2012 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
38 #include <sys/sysctl.h>
41 #include <sys/disklabel.h>
42 #include <sys/disklabel32.h>
43 #include <sys/disklabel64.h>
44 #include <sys/diskslice.h>
45 #include <sys/diskmbr.h>
47 #include <sys/malloc.h>
48 #include <sys/device.h>
49 #include <sys/devfs.h>
50 #include <sys/thread.h>
51 #include <sys/queue.h>
58 #include <sys/mplock2.h>
59 #include <sys/msgport2.h>
60 #include <sys/thread2.h>
72 static MALLOC_DEFINE(M_DMSG_DISK, "dmsg_disk", "disk dmsg");
74 static int disk_iocom_reconnect(struct disk *dp, struct file *fp);
75 static int disk_rcvdmsg(kdmsg_msg_t *msg);
77 static void disk_blk_open(struct disk *dp, kdmsg_msg_t *msg);
78 static void disk_blk_read(struct disk *dp, kdmsg_msg_t *msg);
79 static void disk_blk_write(struct disk *dp, kdmsg_msg_t *msg);
80 static void disk_blk_flush(struct disk *dp, kdmsg_msg_t *msg);
81 static void disk_blk_freeblks(struct disk *dp, kdmsg_msg_t *msg);
82 static void diskiodone(struct bio *bio);
85 disk_iocom_init(struct disk *dp)
87 kdmsg_iocom_init(&dp->d_iocom, dp,
88 KDMSG_IOCOMF_AUTOCONN |
89 KDMSG_IOCOMF_AUTORXSPAN |
90 KDMSG_IOCOMF_AUTOTXSPAN,
91 M_DMSG_DISK, disk_rcvdmsg);
95 disk_iocom_update(struct disk *dp)
100 disk_iocom_uninit(struct disk *dp)
102 kdmsg_iocom_uninit(&dp->d_iocom);
106 disk_iocom_ioctl(struct disk *dp, int cmd, void *data)
109 struct disk_ioc_recluster *recl;
115 fp = holdfp(curproc->p_fd, recl->fd, -1);
117 error = disk_iocom_reconnect(dp, fp);
131 disk_iocom_reconnect(struct disk *dp, struct file *fp)
135 ksnprintf(devname, sizeof(devname), "%s%d",
136 dev_dname(dp->d_rawdev), dkunit(dp->d_rawdev));
138 kdmsg_iocom_reconnect(&dp->d_iocom, fp, devname);
140 dp->d_iocom.auto_lnk_conn.pfs_type = DMSG_PFSTYPE_SERVER;
141 dp->d_iocom.auto_lnk_conn.proto_version = DMSG_SPAN_PROTO_1;
142 dp->d_iocom.auto_lnk_conn.peer_type = DMSG_PEER_BLOCK;
143 dp->d_iocom.auto_lnk_conn.peer_mask = 1LLU << DMSG_PEER_BLOCK;
144 dp->d_iocom.auto_lnk_conn.pfs_mask = (uint64_t)-1;
145 ksnprintf(dp->d_iocom.auto_lnk_conn.cl_label,
146 sizeof(dp->d_iocom.auto_lnk_conn.cl_label),
147 "%s/%s", hostname, devname);
148 if (dp->d_info.d_serialno) {
149 ksnprintf(dp->d_iocom.auto_lnk_conn.fs_label,
150 sizeof(dp->d_iocom.auto_lnk_conn.fs_label),
151 "%s", dp->d_info.d_serialno);
154 dp->d_iocom.auto_lnk_span.pfs_type = DMSG_PFSTYPE_SERVER;
155 dp->d_iocom.auto_lnk_span.proto_version = DMSG_SPAN_PROTO_1;
156 dp->d_iocom.auto_lnk_span.peer_type = DMSG_PEER_BLOCK;
157 dp->d_iocom.auto_lnk_span.media.block.bytes =
158 dp->d_info.d_media_size;
159 dp->d_iocom.auto_lnk_span.media.block.blksize =
160 dp->d_info.d_media_blksize;
161 ksnprintf(dp->d_iocom.auto_lnk_span.cl_label,
162 sizeof(dp->d_iocom.auto_lnk_span.cl_label),
163 "%s/%s", hostname, devname);
164 if (dp->d_info.d_serialno) {
165 ksnprintf(dp->d_iocom.auto_lnk_span.fs_label,
166 sizeof(dp->d_iocom.auto_lnk_span.fs_label),
167 "%s", dp->d_info.d_serialno);
170 kdmsg_iocom_autoinitiate(&dp->d_iocom, NULL);
176 disk_rcvdmsg(kdmsg_msg_t *msg)
178 struct disk *dp = msg->state->iocom->handle;
181 * Handle debug messages (these might not be in transactions)
183 switch(msg->any.head.cmd & DMSGF_CMDSWMASK) {
186 * Execute shell command (not supported atm)
188 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
190 case DMSG_DBG_SHELL | DMSGF_REPLY:
192 msg->aux_data[msg->aux_size - 1] = 0;
193 kprintf("diskiocom: DEBUGMSG: %s\n", msg->aux_data);
199 * All remaining messages must be in a transaction.
201 * NOTE! We currently don't care if the transaction is just
202 * the span transaction (for disk probes) or if it is the
203 * BLK_OPEN transaction.
205 * NOTE! We are switching on the first message's command. The
206 * actual message command within the transaction may be
207 * different (if streaming within a transaction).
209 if (msg->state == &msg->state->iocom->state0) {
210 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
214 switch(msg->state->rxcmd & DMSGF_CMDSWMASK) {
216 disk_blk_open(dp, msg);
220 * not reached normally but leave in for completeness
222 disk_blk_read(dp, msg);
225 disk_blk_write(dp, msg);
228 disk_blk_flush(dp, msg);
230 case DMSG_BLK_FREEBLKS:
231 disk_blk_freeblks(dp, msg);
234 if ((msg->any.head.cmd & DMSGF_REPLY) == 0) {
235 if (msg->any.head.cmd & DMSGF_DELETE)
236 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
238 kdmsg_msg_result(msg, DMSG_ERR_NOSUPP);
247 disk_blk_open(struct disk *dp, kdmsg_msg_t *msg)
249 struct dios_open *openst;
250 int error = DMSG_ERR_NOSUPP;
253 openst = msg->state->any.any;
254 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_OPEN) {
255 if (openst == NULL) {
256 openst = kmalloc(sizeof(*openst), M_DEVBUF,
258 msg->state->any.any = openst;
261 if (msg->any.blk_open.modes & DMSG_BLKOPEN_RD)
263 if (msg->any.blk_open.modes & DMSG_BLKOPEN_WR)
265 error = dev_dopen(dp->d_rawdev, fflags, S_IFCHR, proc0.p_ucred, NULL);
269 if (msg->any.blk_open.modes & DMSG_BLKOPEN_RD)
271 if (msg->any.blk_open.modes & DMSG_BLKOPEN_WR)
276 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_CLOSE &&
279 if ((msg->any.blk_open.modes & DMSG_BLKOPEN_RD) &&
283 if ((msg->any.blk_open.modes & DMSG_BLKOPEN_WR) &&
287 error = dev_dclose(dp->d_rawdev, fflags, S_IFCHR, NULL);
291 if (msg->any.blk_open.modes & DMSG_BLKOPEN_RD)
293 if (msg->any.blk_open.modes & DMSG_BLKOPEN_WR)
298 if (msg->any.head.cmd & DMSGF_DELETE) {
300 while (openst->openrd && openst->openwr) {
303 dev_dclose(dp->d_rawdev, FREAD|FWRITE, S_IFCHR, NULL);
305 while (openst->openrd) {
307 dev_dclose(dp->d_rawdev, FREAD, S_IFCHR, NULL);
309 while (openst->openwr) {
311 dev_dclose(dp->d_rawdev, FWRITE, S_IFCHR, NULL);
313 kfree(openst, M_DEVBUF);
314 msg->state->any.any = NULL;
316 kdmsg_msg_reply(msg, error);
318 kdmsg_msg_result(msg, error);
324 disk_blk_read(struct disk *dp, kdmsg_msg_t *msg)
326 struct dios_io *iost;
329 int error = DMSG_ERR_NOSUPP;
333 * Only DMSG_BLK_READ commands imply read ops.
335 iost = msg->state->any.any;
336 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_READ) {
337 if (msg->any.blk_read.bytes < DEV_BSIZE ||
338 msg->any.blk_read.bytes > MAXPHYS) {
339 error = DMSG_ERR_PARAM;
343 iost = kmalloc(sizeof(*iost), M_DEVBUF,
345 msg->state->any.any = iost;
348 bp = geteblk(msg->any.blk_read.bytes);
350 bp->b_cmd = BUF_CMD_READ;
351 bp->b_bcount = msg->any.blk_read.bytes;
352 bp->b_resid = bp->b_bcount;
353 bio->bio_offset = msg->any.blk_read.offset;
354 bio->bio_caller_info1.ptr = msg->state;
355 bio->bio_done = diskiodone;
356 /* kdmsg_state_hold(msg->state); */
358 atomic_add_int(&iost->count, 1);
359 if (msg->any.head.cmd & DMSGF_DELETE)
362 dev_dstrategy(dp->d_rawdev, bio);
366 if (msg->any.head.cmd & DMSGF_DELETE) {
367 if (iost && iost->count == 0) {
368 kfree(iost, M_DEVBUF);
369 msg->state->any.any = NULL;
371 kdmsg_msg_reply(msg, error);
373 kdmsg_msg_result(msg, error);
380 disk_blk_write(struct disk *dp, kdmsg_msg_t *msg)
382 struct dios_io *iost;
385 int error = DMSG_ERR_NOSUPP;
389 * Only DMSG_BLK_WRITE commands imply read ops.
391 iost = msg->state->any.any;
392 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_WRITE) {
393 if (msg->any.blk_write.bytes < DEV_BSIZE ||
394 msg->any.blk_write.bytes > MAXPHYS) {
395 error = DMSG_ERR_PARAM;
399 iost = kmalloc(sizeof(*iost), M_DEVBUF,
401 msg->state->any.any = iost;
405 * Issue WRITE. Short data implies zeros. Try to optimize
406 * the buffer cache buffer for the case where we can just
407 * use the message's data pointer.
410 if (msg->aux_size >= msg->any.blk_write.bytes)
413 bp = geteblk(msg->any.blk_write.bytes);
415 bp->b_cmd = BUF_CMD_WRITE;
416 bp->b_bcount = msg->any.blk_write.bytes;
417 bp->b_resid = bp->b_bcount;
418 if (msg->aux_size >= msg->any.blk_write.bytes) {
419 bp->b_data = msg->aux_data;
421 bcopy(msg->aux_data, bp->b_data, msg->aux_size);
422 bzero(bp->b_data + msg->aux_size,
423 msg->any.blk_write.bytes - msg->aux_size);
425 bio->bio_offset = msg->any.blk_write.offset;
426 bio->bio_caller_info1.ptr = msg->state;
427 bio->bio_done = diskiodone;
428 /* kdmsg_state_hold(msg->state); */
430 atomic_add_int(&iost->count, 1);
431 if (msg->any.head.cmd & DMSGF_DELETE)
434 dev_dstrategy(dp->d_rawdev, bio);
438 if (msg->any.head.cmd & DMSGF_DELETE) {
439 if (iost && iost->count == 0) {
440 kfree(iost, M_DEVBUF);
441 msg->state->any.any = NULL;
443 kdmsg_msg_reply(msg, error);
445 kdmsg_msg_result(msg, error);
452 disk_blk_flush(struct disk *dp, kdmsg_msg_t *msg)
454 struct dios_io *iost;
457 int error = DMSG_ERR_NOSUPP;
461 * Only DMSG_BLK_FLUSH commands imply read ops.
463 iost = msg->state->any.any;
464 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_FLUSH) {
466 iost = kmalloc(sizeof(*iost), M_DEVBUF,
468 msg->state->any.any = iost;
473 bp->b_cmd = BUF_CMD_FLUSH;
474 bp->b_bcount = msg->any.blk_flush.bytes;
476 bio->bio_offset = msg->any.blk_flush.offset;
477 bio->bio_caller_info1.ptr = msg->state;
478 bio->bio_done = diskiodone;
479 /* kdmsg_state_hold(msg->state); */
481 atomic_add_int(&iost->count, 1);
482 if (msg->any.head.cmd & DMSGF_DELETE)
485 dev_dstrategy(dp->d_rawdev, bio);
488 if (msg->any.head.cmd & DMSGF_DELETE) {
489 if (iost && iost->count == 0) {
490 kfree(iost, M_DEVBUF);
491 msg->state->any.any = NULL;
493 kdmsg_msg_reply(msg, error);
495 kdmsg_msg_result(msg, error);
502 disk_blk_freeblks(struct disk *dp, kdmsg_msg_t *msg)
504 struct dios_io *iost;
507 int error = DMSG_ERR_NOSUPP;
511 * Only DMSG_BLK_FREEBLKS commands imply read ops.
513 iost = msg->state->any.any;
514 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_FREEBLKS) {
516 iost = kmalloc(sizeof(*iost), M_DEVBUF,
518 msg->state->any.any = iost;
523 bp->b_cmd = BUF_CMD_FREEBLKS;
524 bp->b_bcount = msg->any.blk_freeblks.bytes;
526 bio->bio_offset = msg->any.blk_freeblks.offset;
527 bio->bio_caller_info1.ptr = msg->state;
528 bio->bio_done = diskiodone;
529 /* kdmsg_state_hold(msg->state); */
531 atomic_add_int(&iost->count, 1);
532 if (msg->any.head.cmd & DMSGF_DELETE)
535 dev_dstrategy(dp->d_rawdev, bio);
538 if (msg->any.head.cmd & DMSGF_DELETE) {
539 if (iost && iost->count == 0) {
540 kfree(iost, M_DEVBUF);
541 msg->state->any.any = NULL;
543 kdmsg_msg_reply(msg, error);
545 kdmsg_msg_result(msg, error);
552 diskiodone(struct bio *bio)
554 struct buf *bp = bio->bio_buf;
555 kdmsg_state_t *state = bio->bio_caller_info1.ptr;
557 struct dios_io *iost = state->any.any;
564 cmd = DMSG_LNK_ERROR;
570 cmd = DMSG_LNK_ERROR;
572 bytes = bp->b_bcount;
575 if (bp->b_flags & B_ERROR) {
583 case BUF_CMD_FREEBLKS:
584 if (bp->b_flags & B_ERROR)
590 panic("diskiodone: Unknown bio cmd = %d\n",
591 bio->bio_buf->b_cmd);
592 error = 0; /* avoid compiler warning */
593 break; /* NOT REACHED */
597 * Convert error to DMSG_ERR_* code.
603 * Convert LNK_ERROR or BLK_ERROR if non-zero resid. READS will
604 * have already converted cmd to BLK_ERROR and set up data to return.
606 if (resid && cmd == DMSG_LNK_ERROR)
607 cmd = DMSG_BLK_ERROR;
608 /* XXX txcmd is delayed so this won't work for streaming */
609 if ((state->txcmd & DMSGF_CREATE) == 0) /* assume serialized */
612 if (atomic_fetchadd_int(&iost->count, -1) == 1)
615 atomic_add_int(&iost->count, -1);
620 * Allocate a basic or extended reply. Be careful not to populate
621 * extended header fields unless we allocated an extended reply.
623 rmsg = kdmsg_msg_alloc(state, cmd, NULL, 0);
625 rmsg->aux_data = kmalloc(bytes, state->iocom->mmsg, M_INTWAIT);
626 rmsg->aux_size = bytes;
627 rmsg->flags |= KDMSG_FLAG_AUXALLOC;
628 bcopy(data, rmsg->aux_data, bytes);
630 rmsg->any.blk_error.head.error = error;
631 if ((cmd & DMSGF_BASECMDMASK) == DMSG_BLK_ERROR)
632 rmsg->any.blk_error.resid = resid;
633 bio->bio_caller_info1.ptr = NULL;
634 /* kdmsg_state_drop(state); */
635 kdmsg_msg_write(rmsg);
636 if (bp->b_flags & B_PAGING) {
637 relpbuf(bio->bio_buf, NULL);
639 bp->b_flags |= B_INVAL | B_AGE;