kernel - work on dmsg disk exports
[dragonfly.git] / sys / kern / subr_diskiocom.c
1 /*
2  * Copyright (c) 2012 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/proc.h>
38 #include <sys/sysctl.h>
39 #include <sys/buf.h>
40 #include <sys/conf.h>
41 #include <sys/disklabel.h>
42 #include <sys/disklabel32.h>
43 #include <sys/disklabel64.h>
44 #include <sys/diskslice.h>
45 #include <sys/diskmbr.h>
46 #include <sys/disk.h>
47 #include <sys/malloc.h>
48 #include <sys/device.h>
49 #include <sys/devfs.h>
50 #include <sys/thread.h>
51 #include <sys/queue.h>
52 #include <sys/lock.h>
53 #include <sys/stat.h>
54 #include <sys/uuid.h>
55 #include <sys/dmsg.h>
56
57 #include <sys/buf2.h>
58 #include <sys/mplock2.h>
59 #include <sys/msgport2.h>
60 #include <sys/thread2.h>
61
62 struct dios_open {
63         int     openrd;
64         int     openwr;
65 };
66
67 struct dios_io {
68         int     count;
69         int     eof;
70         kdmsg_data_t data;
71 };
72
73 static MALLOC_DEFINE(M_DMSG_DISK, "dmsg_disk", "disk dmsg");
74
75 static int blk_active;
76 SYSCTL_INT(_debug, OID_AUTO, blk_active, CTLFLAG_RW, &blk_active, 0,
77            "Number of active iocom IOs");
78
79 static int disk_iocom_reconnect(struct disk *dp, struct file *fp);
80 static int disk_rcvdmsg(kdmsg_msg_t *msg);
81
82 static void disk_blk_open(struct disk *dp, kdmsg_msg_t *msg);
83 static void disk_blk_read(struct disk *dp, kdmsg_msg_t *msg);
84 static void disk_blk_write(struct disk *dp, kdmsg_msg_t *msg);
85 static void disk_blk_flush(struct disk *dp, kdmsg_msg_t *msg);
86 static void disk_blk_freeblks(struct disk *dp, kdmsg_msg_t *msg);
87 static void diskiodone(struct bio *bio);
88
89 void
90 disk_iocom_init(struct disk *dp)
91 {
92         kdmsg_iocom_init(&dp->d_iocom, dp,
93                          KDMSG_IOCOMF_AUTOCONN |
94                          KDMSG_IOCOMF_AUTORXSPAN |
95                          KDMSG_IOCOMF_AUTOTXSPAN,
96                          M_DMSG_DISK, disk_rcvdmsg);
97 }
98
99 void
100 disk_iocom_update(struct disk *dp)
101 {
102 }
103
104 void
105 disk_iocom_uninit(struct disk *dp)
106 {
107         kdmsg_iocom_uninit(&dp->d_iocom);
108 }
109
110 int
111 disk_iocom_ioctl(struct disk *dp, int cmd, void *data)
112 {
113         struct file *fp;
114         struct disk_ioc_recluster *recl;
115         int error;
116
117         switch(cmd) {
118         case DIOCRECLUSTER:
119                 recl = data;
120                 fp = holdfp(curproc->p_fd, recl->fd, -1);
121                 if (fp) {
122                         error = disk_iocom_reconnect(dp, fp);
123                 } else {
124                         error = EINVAL;
125                 }
126                 break;
127         default:
128                 error = EOPNOTSUPP;
129                 break;
130         }
131         return error;
132 }
133
134 static
135 int
136 disk_iocom_reconnect(struct disk *dp, struct file *fp)
137 {
138         char devname[64];
139
140         ksnprintf(devname, sizeof(devname), "%s%d",
141                   dev_dname(dp->d_rawdev), dkunit(dp->d_rawdev));
142
143         kdmsg_iocom_reconnect(&dp->d_iocom, fp, devname);
144
145         dp->d_iocom.auto_lnk_conn.pfs_type = DMSG_PFSTYPE_SERVER;
146         dp->d_iocom.auto_lnk_conn.proto_version = DMSG_SPAN_PROTO_1;
147         dp->d_iocom.auto_lnk_conn.peer_type = DMSG_PEER_BLOCK;
148         dp->d_iocom.auto_lnk_conn.peer_mask = 1LLU << DMSG_PEER_BLOCK;
149         dp->d_iocom.auto_lnk_conn.pfs_mask = (uint64_t)-1;
150         ksnprintf(dp->d_iocom.auto_lnk_conn.cl_label,
151                   sizeof(dp->d_iocom.auto_lnk_conn.cl_label),
152                   "%s/%s", hostname, devname);
153         if (dp->d_info.d_serialno) {
154                 ksnprintf(dp->d_iocom.auto_lnk_conn.fs_label,
155                           sizeof(dp->d_iocom.auto_lnk_conn.fs_label),
156                           "%s", dp->d_info.d_serialno);
157         }
158
159         dp->d_iocom.auto_lnk_span.pfs_type = DMSG_PFSTYPE_SERVER;
160         dp->d_iocom.auto_lnk_span.proto_version = DMSG_SPAN_PROTO_1;
161         dp->d_iocom.auto_lnk_span.peer_type = DMSG_PEER_BLOCK;
162         dp->d_iocom.auto_lnk_span.media.block.bytes =
163                                                 dp->d_info.d_media_size;
164         dp->d_iocom.auto_lnk_span.media.block.blksize =
165                                                 dp->d_info.d_media_blksize;
166         ksnprintf(dp->d_iocom.auto_lnk_span.cl_label,
167                   sizeof(dp->d_iocom.auto_lnk_span.cl_label),
168                   "%s/%s", hostname, devname);
169         if (dp->d_info.d_serialno) {
170                 ksnprintf(dp->d_iocom.auto_lnk_span.fs_label,
171                           sizeof(dp->d_iocom.auto_lnk_span.fs_label),
172                           "%s", dp->d_info.d_serialno);
173         }
174
175         kdmsg_iocom_autoinitiate(&dp->d_iocom, NULL);
176
177         return (0);
178 }
179
180 static int
181 disk_rcvdmsg(kdmsg_msg_t *msg)
182 {
183         struct disk *dp = msg->state->iocom->handle;
184
185         /*
186          * Handle debug messages (these might not be in transactions)
187          */
188         switch(msg->any.head.cmd & DMSGF_CMDSWMASK) {
189         case DMSG_DBG_SHELL:
190                 /*
191                  * Execute shell command (not supported atm)
192                  */
193                 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
194                 return(0);
195         case DMSG_DBG_SHELL | DMSGF_REPLY:
196                 if (msg->aux_data) {
197                         msg->aux_data[msg->aux_size - 1] = 0;
198                         kprintf("diskiocom: DEBUGMSG: %s\n", msg->aux_data);
199                 }
200                 return(0);
201         }
202
203         /*
204          * All remaining messages must be in a transaction. 
205          *
206          * NOTE!  We currently don't care if the transaction is just
207          *        the span transaction (for disk probes) or if it is the
208          *        BLK_OPEN transaction.
209          *
210          * NOTE!  We are switching on the first message's command.  The
211          *        actual message command within the transaction may be
212          *        different (if streaming within a transaction).
213          */
214         if (msg->state == &msg->state->iocom->state0) {
215                 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
216                 return(0);
217         }
218
219         switch(msg->state->rxcmd & DMSGF_CMDSWMASK) {
220         case DMSG_BLK_OPEN:
221                 disk_blk_open(dp, msg);
222                 break;
223         case DMSG_BLK_READ:
224                 /*
225                  * not reached normally but leave in for completeness
226                  */
227                 disk_blk_read(dp, msg);
228                 break;
229         case DMSG_BLK_WRITE:
230                 disk_blk_write(dp, msg);
231                 break;
232         case DMSG_BLK_FLUSH:
233                 disk_blk_flush(dp, msg);
234                 break;
235         case DMSG_BLK_FREEBLKS:
236                 disk_blk_freeblks(dp, msg);
237                 break;
238         default:
239                 if ((msg->any.head.cmd & DMSGF_REPLY) == 0) {
240                         if (msg->any.head.cmd & DMSGF_DELETE)
241                                 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
242                         else
243                                 kdmsg_msg_result(msg, DMSG_ERR_NOSUPP);
244                 }
245                 break;
246         }
247         return (0);
248 }
249
250 static
251 void
252 disk_blk_open(struct disk *dp, kdmsg_msg_t *msg)
253 {
254         struct dios_open *openst;
255         int error = DMSG_ERR_NOSUPP;
256         int fflags;
257
258         openst = msg->state->any.any;
259         if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_OPEN) {
260                 if (openst == NULL) {
261                         openst = kmalloc(sizeof(*openst), M_DEVBUF,
262                                                 M_WAITOK | M_ZERO);
263                         msg->state->any.any = openst;
264                 }
265                 fflags = 0;
266                 if (msg->any.blk_open.modes & DMSG_BLKOPEN_RD)
267                         fflags = FREAD;
268                 if (msg->any.blk_open.modes & DMSG_BLKOPEN_WR)
269                         fflags |= FWRITE;
270                 error = dev_dopen(dp->d_rawdev, fflags, S_IFCHR, proc0.p_ucred, NULL);
271                 if (error) {
272                         error = DMSG_ERR_IO;
273                 } else {
274                         if (msg->any.blk_open.modes & DMSG_BLKOPEN_RD)
275                                 ++openst->openrd;
276                         if (msg->any.blk_open.modes & DMSG_BLKOPEN_WR)
277                                 ++openst->openwr;
278                 }
279         }
280 #if 0
281         if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_CLOSE &&
282             openst) {
283                 fflags = 0;
284                 if ((msg->any.blk_open.modes & DMSG_BLKOPEN_RD) &&
285                     openst->openrd) {
286                         fflags = FREAD;
287                 }
288                 if ((msg->any.blk_open.modes & DMSG_BLKOPEN_WR) &&
289                     openst->openwr) {
290                         fflags |= FWRITE;
291                 }
292                 error = dev_dclose(dp->d_rawdev, fflags, S_IFCHR, NULL);
293                 if (error) {
294                         error = DMSG_ERR_IO;
295                 } else {
296                         if (msg->any.blk_open.modes & DMSG_BLKOPEN_RD)
297                                 --openst->openrd;
298                         if (msg->any.blk_open.modes & DMSG_BLKOPEN_WR)
299                                 --openst->openwr;
300                 }
301         }
302 #endif
303         if (msg->any.head.cmd & DMSGF_DELETE) {
304                 if (openst) {
305                         while (openst->openrd && openst->openwr) {
306                                 --openst->openrd;
307                                 --openst->openwr;
308                                 dev_dclose(dp->d_rawdev, FREAD|FWRITE, S_IFCHR, NULL);
309                         }
310                         while (openst->openrd) {
311                                 --openst->openrd;
312                                 dev_dclose(dp->d_rawdev, FREAD, S_IFCHR, NULL);
313                         }
314                         while (openst->openwr) {
315                                 --openst->openwr;
316                                 dev_dclose(dp->d_rawdev, FWRITE, S_IFCHR, NULL);
317                         }
318                         kfree(openst, M_DEVBUF);
319                         msg->state->any.any = NULL;
320                 }
321                 kdmsg_msg_reply(msg, error);
322         } else {
323                 kdmsg_msg_result(msg, error);
324         }
325 }
326
327 static
328 void
329 disk_blk_read(struct disk *dp, kdmsg_msg_t *msg)
330 {
331         struct dios_io *iost;
332         struct buf *bp;
333         struct bio *bio;
334         int error = DMSG_ERR_NOSUPP;
335         int reterr = 1;
336
337         /*
338          * Only DMSG_BLK_READ commands imply read ops.
339          */
340         iost = msg->state->any.any;
341         if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_READ) {
342                 if (msg->any.blk_read.bytes < DEV_BSIZE ||
343                     msg->any.blk_read.bytes > MAXPHYS) {
344                         error = DMSG_ERR_PARAM;
345                         goto done;
346                 }
347                 if (iost == NULL) {
348                         iost = kmalloc(sizeof(*iost), M_DEVBUF,
349                                        M_WAITOK | M_ZERO);
350                         msg->state->any.any = iost;
351                 }
352                 reterr = 0;
353                 bp = geteblk(msg->any.blk_read.bytes);
354                 bio = &bp->b_bio1;
355                 bp->b_cmd = BUF_CMD_READ;
356                 bp->b_bcount = msg->any.blk_read.bytes;
357                 bp->b_resid = bp->b_bcount;
358                 bio->bio_offset = msg->any.blk_read.offset;
359                 bio->bio_caller_info1.ptr = msg->state;
360                 bio->bio_done = diskiodone;
361
362                 /* kdmsg_state_hold(msg->state); */
363                 atomic_add_int(&blk_active, 1);
364                 atomic_add_int(&iost->count, 1);
365                 if (msg->any.head.cmd & DMSGF_DELETE)
366                         iost->eof = 1;
367                 BUF_KERNPROC(bp);
368                 dev_dstrategy(dp->d_rawdev, bio);
369         }
370 done:
371         if (reterr) {
372                 if (msg->any.head.cmd & DMSGF_DELETE) {
373                         if (iost && iost->count == 0) {
374                                 kfree(iost, M_DEVBUF);
375                                 msg->state->any.any = NULL;
376                         }
377                         kdmsg_msg_reply(msg, error);
378                 } else {
379                         kdmsg_msg_result(msg, error);
380                 }
381         }
382 }
383
384 static
385 void
386 disk_blk_write(struct disk *dp, kdmsg_msg_t *msg)
387 {
388         struct dios_io *iost;
389         struct buf *bp;
390         struct bio *bio;
391         int error = DMSG_ERR_NOSUPP;
392         int reterr = 1;
393
394         /*
395          * Only DMSG_BLK_WRITE commands imply read ops.
396          */
397         iost = msg->state->any.any;
398         if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_WRITE) {
399                 if (msg->any.blk_write.bytes < DEV_BSIZE ||
400                     msg->any.blk_write.bytes > MAXPHYS) {
401                         error = DMSG_ERR_PARAM;
402                         goto done;
403                 }
404                 if (iost == NULL) {
405                         iost = kmalloc(sizeof(*iost), M_DEVBUF,
406                                        M_WAITOK | M_ZERO);
407                         msg->state->any.any = iost;
408                 }
409
410                 /*
411                  * Issue WRITE.  Short data implies zeros.  Try to optimize
412                  * the buffer cache buffer for the case where we can just
413                  * use the message's data pointer.
414                  */
415                 reterr = 0;
416                 if (msg->aux_size >= msg->any.blk_write.bytes)
417                         bp = getpbuf(NULL);
418                 else
419                         bp = geteblk(msg->any.blk_write.bytes);
420                 bio = &bp->b_bio1;
421                 bp->b_cmd = BUF_CMD_WRITE;
422                 bp->b_bcount = msg->any.blk_write.bytes;
423                 bp->b_resid = bp->b_bcount;
424                 if (msg->aux_size >= msg->any.blk_write.bytes) {
425                         bp->b_data = msg->aux_data;
426                         kdmsg_detach_aux_data(msg, &iost->data);
427                 } else {
428                         bcopy(msg->aux_data, bp->b_data, msg->aux_size);
429                         bzero(bp->b_data + msg->aux_size,
430                               msg->any.blk_write.bytes - msg->aux_size);
431                         bzero(&iost->data, sizeof(iost->data));
432                 }
433                 bio->bio_offset = msg->any.blk_write.offset;
434                 bio->bio_caller_info1.ptr = msg->state;
435                 bio->bio_done = diskiodone;
436
437                 /* kdmsg_state_hold(msg->state); */
438                 atomic_add_int(&blk_active, 1);
439                 atomic_add_int(&iost->count, 1);
440                 if (msg->any.head.cmd & DMSGF_DELETE)
441                         iost->eof = 1;
442                 BUF_KERNPROC(bp);
443                 dev_dstrategy(dp->d_rawdev, bio);
444         }
445 done:
446         if (reterr) {
447                 if (msg->any.head.cmd & DMSGF_DELETE) {
448                         if (iost && iost->count == 0) {
449                                 kfree(iost, M_DEVBUF);
450                                 msg->state->any.any = NULL;
451                         }
452                         kdmsg_msg_reply(msg, error);
453                 } else {
454                         kdmsg_msg_result(msg, error);
455                 }
456         }
457 }
458
459 static
460 void
461 disk_blk_flush(struct disk *dp, kdmsg_msg_t *msg)
462 {
463         struct dios_io *iost;
464         struct buf *bp;
465         struct bio *bio;
466         int error = DMSG_ERR_NOSUPP;
467         int reterr = 1;
468
469         /*
470          * Only DMSG_BLK_FLUSH commands imply read ops.
471          */
472         iost = msg->state->any.any;
473         if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_FLUSH) {
474                 if (iost == NULL) {
475                         iost = kmalloc(sizeof(*iost), M_DEVBUF,
476                                        M_WAITOK | M_ZERO);
477                         msg->state->any.any = iost;
478                 }
479                 reterr = 0;
480                 bp = getpbuf(NULL);
481                 bio = &bp->b_bio1;
482                 bp->b_cmd = BUF_CMD_FLUSH;
483                 bp->b_bcount = msg->any.blk_flush.bytes;
484                 bp->b_resid = 0;
485                 bio->bio_offset = msg->any.blk_flush.offset;
486                 bio->bio_caller_info1.ptr = msg->state;
487                 bio->bio_done = diskiodone;
488
489                 /* kdmsg_state_hold(msg->state); */
490                 atomic_add_int(&blk_active, 1);
491                 atomic_add_int(&iost->count, 1);
492                 if (msg->any.head.cmd & DMSGF_DELETE)
493                         iost->eof = 1;
494                 BUF_KERNPROC(bp);
495                 dev_dstrategy(dp->d_rawdev, bio);
496         }
497         if (reterr) {
498                 if (msg->any.head.cmd & DMSGF_DELETE) {
499                         if (iost && iost->count == 0) {
500                                 kfree(iost, M_DEVBUF);
501                                 msg->state->any.any = NULL;
502                         }
503                         kdmsg_msg_reply(msg, error);
504                 } else {
505                         kdmsg_msg_result(msg, error);
506                 }
507         }
508 }
509
510 static
511 void
512 disk_blk_freeblks(struct disk *dp, kdmsg_msg_t *msg)
513 {
514         struct dios_io *iost;
515         struct buf *bp;
516         struct bio *bio;
517         int error = DMSG_ERR_NOSUPP;
518         int reterr = 1;
519
520         /*
521          * Only DMSG_BLK_FREEBLKS commands imply read ops.
522          */
523         iost = msg->state->any.any;
524         if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_FREEBLKS) {
525                 if (iost == NULL) {
526                         iost = kmalloc(sizeof(*iost), M_DEVBUF,
527                                        M_WAITOK | M_ZERO);
528                         msg->state->any.any = iost;
529                 }
530                 reterr = 0;
531                 bp = getpbuf(NULL);
532                 bio = &bp->b_bio1;
533                 bp->b_cmd = BUF_CMD_FREEBLKS;
534                 bp->b_bcount = msg->any.blk_freeblks.bytes;
535                 bp->b_resid = 0;
536                 bio->bio_offset = msg->any.blk_freeblks.offset;
537                 bio->bio_caller_info1.ptr = msg->state;
538                 bio->bio_done = diskiodone;
539
540                 /* kdmsg_state_hold(msg->state); */
541                 atomic_add_int(&blk_active, 1);
542                 atomic_add_int(&iost->count, 1);
543                 if (msg->any.head.cmd & DMSGF_DELETE)
544                         iost->eof = 1;
545                 BUF_KERNPROC(bp);
546                 dev_dstrategy(dp->d_rawdev, bio);
547         }
548         if (reterr) {
549                 if (msg->any.head.cmd & DMSGF_DELETE) {
550                         if (iost && iost->count == 0) {
551                                 kfree(iost, M_DEVBUF);
552                                 msg->state->any.any = NULL;
553                         }
554                         kdmsg_msg_reply(msg, error);
555                 } else {
556                         kdmsg_msg_result(msg, error);
557                 }
558         }
559 }
560
561 static
562 void
563 diskiodone(struct bio *bio)
564 {
565         struct buf *bp = bio->bio_buf;
566         kdmsg_state_t *state = bio->bio_caller_info1.ptr;
567         kdmsg_msg_t *rmsg;
568         struct dios_io *iost = state->any.any;
569         int error;
570         int resid = 0;
571         int bytes;
572         uint32_t cmd;
573         void *data;
574
575         cmd = DMSG_LNK_ERROR;
576         data = NULL;
577         bytes = 0;
578
579         switch(bp->b_cmd) {
580         case BUF_CMD_READ:
581                 cmd = DMSG_LNK_ERROR;
582                 data = bp->b_data;
583                 bytes = bp->b_bcount;
584                 /* fall through */
585         case BUF_CMD_WRITE:
586                 if (bp->b_flags & B_ERROR) {
587                         error = bp->b_error;
588                 } else {
589                         error = 0;
590                         resid = bp->b_resid;
591                 }
592                 kdmsg_free_aux_data(&iost->data);
593                 break;
594         case BUF_CMD_FLUSH:
595         case BUF_CMD_FREEBLKS:
596                 if (bp->b_flags & B_ERROR)
597                         error = bp->b_error;
598                 else
599                         error = 0;
600                 break;
601         default:
602                 panic("diskiodone: Unknown bio cmd = %d\n",
603                       bio->bio_buf->b_cmd);
604                 error = 0;      /* avoid compiler warning */
605                 break;          /* NOT REACHED */
606         }
607
608         /*
609          * Convert error to DMSG_ERR_* code.
610          */
611         if (error)
612                 error = DMSG_ERR_IO;
613
614         /*
615          * Convert LNK_ERROR or BLK_ERROR if non-zero resid.  READS will
616          * have already converted cmd to BLK_ERROR and set up data to return.
617          */
618         if (resid && cmd == DMSG_LNK_ERROR)
619                 cmd = DMSG_BLK_ERROR;
620         /* XXX txcmd is delayed so this won't work for streaming */
621         if ((state->txcmd & DMSGF_CREATE) == 0) /* assume serialized */
622                 cmd |= DMSGF_CREATE;
623         if (iost->eof) {
624                 if (atomic_fetchadd_int(&iost->count, -1) == 1)
625                         cmd |= DMSGF_DELETE;
626         } else {
627                 atomic_add_int(&iost->count, -1);
628         }
629         atomic_add_int(&blk_active, -1);
630         cmd |= DMSGF_REPLY;
631
632         /*
633          * Allocate a basic or extended reply.  Be careful not to populate
634          * extended header fields unless we allocated an extended reply.
635          */
636         rmsg = kdmsg_msg_alloc(state, cmd, NULL, 0);
637         if (data) {
638                 rmsg->aux_data = kmalloc(bytes, state->iocom->mmsg, M_INTWAIT);
639                 rmsg->aux_size = bytes;
640                 rmsg->flags |= KDMSG_FLAG_AUXALLOC;
641                 bcopy(data, rmsg->aux_data, bytes);
642         }
643         rmsg->any.blk_error.head.error = error;
644         if ((cmd & DMSGF_BASECMDMASK) == DMSG_BLK_ERROR)
645                 rmsg->any.blk_error.resid = resid;
646         bio->bio_caller_info1.ptr = NULL;
647         /* kdmsg_state_drop(state); */
648         kdmsg_msg_write(rmsg);
649         if (bp->b_flags & B_PAGING) {
650                 relpbuf(bio->bio_buf, NULL);
651         } else {
652                 bp->b_flags |= B_INVAL | B_AGE;
653                 brelse(bp);
654         }
655 }