61c4759c55c0c626b63a96cb5fefee72089e9a5e
[dragonfly.git] / sys / dev / virtual / virtio / block / virtio_blk.c
1 /*-
2  * Copyright (c) 2011, Bryan Venteicher <bryanv@daemoninthecloset.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * $FreeBSD: src/sys/dev/virtio/block/virtio_blk.c,v 1.4 2012/04/16 18:29:12 grehan Exp $
27  */
28
29 /* Driver for VirtIO block devices. */
30
31 #include <sys/cdefs.h>
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/bio.h>
37 #include <sys/malloc.h>
38 #include <sys/module.h>
39 #include <sys/sglist.h>
40 #include <sys/lock.h>
41 #include <sys/queue.h>
42 #include <sys/taskqueue.h>
43
44 #include <sys/buf2.h>
45 #include <sys/rman.h>
46 #include <sys/disk.h>
47 #include <sys/spinlock.h>
48 #include <sys/spinlock2.h>
49 #include <sys/devicestat.h>
50
51 #include <virtio/virtio.h>
52 #include <virtio/virtqueue.h>
53 #include "virtio_blk.h"
54
55 struct vtblk_request {
56         struct virtio_blk_outhdr        vbr_hdr;
57         struct bio                      *vbr_bp;
58         uint8_t                         vbr_ack;
59
60         TAILQ_ENTRY(vtblk_request)      vbr_link;
61 };
62
63 struct vtblk_softc {
64         device_t                        vtblk_dev;
65         struct spinlock                 vtblk_mtx;
66         uint64_t                        vtblk_features;
67
68 #define VTBLK_FLAG_READONLY             0x0002
69 #define VTBLK_FLAG_DETACH               0x0004
70 #define VTBLK_FLAG_SUSPEND              0x0008
71 #define VTBLK_FLAG_DUMPING              0x0010
72         uint32_t                        vtblk_flags;
73
74         struct virtqueue                *vtblk_vq;
75         struct sglist                   *vtblk_sglist;
76         struct disk                     vtblk_disk;
77         cdev_t                          cdev;
78         struct devstat                  stats;
79
80         struct bio_queue_head           vtblk_bioq;
81         TAILQ_HEAD(, vtblk_request)     vtblk_req_free;
82         TAILQ_HEAD(, vtblk_request)     vtblk_req_ready;
83
84         struct task                     vtblk_intr_task;
85
86         int                             vtblk_sector_size;
87         int                             vtblk_max_nsegs;
88         int                             vtblk_unit;
89         int                             vtblk_request_count;
90
91         struct vtblk_request            vtblk_dump_request;
92 };
93
94 static struct virtio_feature_desc vtblk_feature_desc[] = {
95         { VIRTIO_BLK_F_BARRIER,         "HostBarrier"   },
96         { VIRTIO_BLK_F_SIZE_MAX,        "MaxSegSize"    },
97         { VIRTIO_BLK_F_SEG_MAX,         "MaxNumSegs"    },
98         { VIRTIO_BLK_F_GEOMETRY,        "DiskGeometry"  },
99         { VIRTIO_BLK_F_RO,              "ReadOnly"      },
100         { VIRTIO_BLK_F_BLK_SIZE,        "BlockSize"     },
101         { VIRTIO_BLK_F_SCSI,            "SCSICmds"      },
102         { VIRTIO_BLK_F_FLUSH,           "FlushCmd"      },
103         { VIRTIO_BLK_F_TOPOLOGY,        "Topology"      },
104
105         { 0, NULL }
106 };
107
108 static int      vtblk_modevent(module_t, int, void *);
109
110 static int      vtblk_probe(device_t);
111 static int      vtblk_attach(device_t);
112 static int      vtblk_detach(device_t);
113 static int      vtblk_suspend(device_t);
114 static int      vtblk_resume(device_t);
115 static int      vtblk_shutdown(device_t);
116
117 static void     vtblk_negotiate_features(struct vtblk_softc *);
118 static int      vtblk_maximum_segments(struct vtblk_softc *,
119                                        struct virtio_blk_config *);
120 static int      vtblk_alloc_virtqueue(struct vtblk_softc *);
121 static void     vtblk_alloc_disk(struct vtblk_softc *,
122                                  struct virtio_blk_config *);
123 /*
124  * Interface to the device switch.
125  */
126 static d_open_t         vtblk_open;
127 static d_strategy_t     vtblk_strategy;
128 static d_dump_t         vtblk_dump;
129
130 static struct dev_ops vbd_disk_ops = {
131         { "vbd", 200, D_DISK | D_MPSAFE },
132         .d_open         = vtblk_open,
133         .d_close        = nullclose,
134         .d_read         = physread,
135         .d_write        = physwrite,
136         .d_strategy     = vtblk_strategy,
137         .d_dump         = vtblk_dump,
138 };
139
140 static void             vtblk_startio(struct vtblk_softc *);
141 static struct vtblk_request *vtblk_bio_request(struct vtblk_softc *);
142 static int              vtblk_execute_request(struct vtblk_softc *,
143                                               struct vtblk_request *);
144
145 static int              vtblk_vq_intr(void *);
146 static void             vtblk_complete(void *, int);
147
148 static void             vtblk_stop(struct vtblk_softc *);
149
150 static void             vtblk_prepare_dump(struct vtblk_softc *);
151 static int              vtblk_write_dump(struct vtblk_softc *, void *, off_t, size_t);
152 static int              vtblk_flush_dump(struct vtblk_softc *);
153 static int              vtblk_poll_request(struct vtblk_softc *,
154                                            struct vtblk_request *);
155
156 static void             vtblk_drain_vq(struct vtblk_softc *, int);
157 static void             vtblk_drain(struct vtblk_softc *);
158
159 static int              vtblk_alloc_requests(struct vtblk_softc *);
160 static void             vtblk_free_requests(struct vtblk_softc *);
161 static struct vtblk_request *vtblk_dequeue_request(struct vtblk_softc *);
162 static void             vtblk_enqueue_request(struct vtblk_softc *,
163                                               struct vtblk_request *);
164
165 static struct vtblk_request *vtblk_dequeue_ready(struct vtblk_softc *);
166 static void             vtblk_enqueue_ready(struct vtblk_softc *,
167                                             struct vtblk_request *);
168
169 static void             vtblk_bio_error(struct bio *, int);
170
171 /* Tunables. */
172 static int vtblk_no_ident = 0;
173 TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident);
174
175 /* Features desired/implemented by this driver. */
176 #define VTBLK_FEATURES \
177     (VIRTIO_BLK_F_BARRIER               | \
178      VIRTIO_BLK_F_SIZE_MAX              | \
179      VIRTIO_BLK_F_SEG_MAX               | \
180      VIRTIO_BLK_F_GEOMETRY              | \
181      VIRTIO_BLK_F_RO                    | \
182      VIRTIO_BLK_F_BLK_SIZE              | \
183      VIRTIO_BLK_F_FLUSH)
184
185 #define VTBLK_MTX(_sc)          &(_sc)->vtblk_mtx
186 #define VTBLK_LOCK_INIT(_sc)    spin_init(&(_sc)->vtblk_mtx)
187 #define VTBLK_LOCK(_sc)         spin_lock(VTBLK_MTX((_sc)))
188 #define VTBLK_TRYLOCK(_sc)      spin_trylock(VTBLK_MTX((_sc)))
189 #define VTBLK_UNLOCK(_sc)       spin_unlock(VTBLK_MTX((_sc)))
190 #define VTBLK_LOCK_DESTROY(_sc) spin_uninit(VTBLK_MTX((_sc)))
191
192 #define VTBLK_LOCK_ASSERT(_sc)
193 #define VTBLK_LOCK_ASSERT_NOTOWNED(_sc)
194
195 /*
196  * Each block request uses at least two segments - one for the header
197  * and one for the status.
198  */
199 #define VTBLK_MIN_SEGMENTS      2
200
201 static device_method_t vtblk_methods[] = {
202         /* Device methods. */
203         DEVMETHOD(device_probe,         vtblk_probe),
204         DEVMETHOD(device_attach,        vtblk_attach),
205         DEVMETHOD(device_detach,        vtblk_detach),
206         DEVMETHOD(device_suspend,       vtblk_suspend),
207         DEVMETHOD(device_resume,        vtblk_resume),
208         DEVMETHOD(device_shutdown,      vtblk_shutdown),
209
210         { 0, 0 }
211 };
212
213 static driver_t vtblk_driver = {
214         "vtblk",
215         vtblk_methods,
216         sizeof(struct vtblk_softc)
217 };
218 static devclass_t vtblk_devclass;
219
220 DRIVER_MODULE(virtio_blk, virtio_pci, vtblk_driver, vtblk_devclass,
221               vtblk_modevent, NULL);
222 MODULE_VERSION(virtio_blk, 1);
223 MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1);
224
225 static int
226 vtblk_modevent(module_t mod, int type, void *unused)
227 {
228         int error;
229
230         error = 0;
231
232         switch (type) {
233         case MOD_LOAD:
234                 break;
235         case MOD_UNLOAD:
236                 break;
237         case MOD_SHUTDOWN:
238                 break;
239         default:
240                 error = EOPNOTSUPP;
241                 break;
242         }
243
244         return (error);
245 }
246
247 static int
248 vtblk_probe(device_t dev)
249 {
250
251         if (virtio_get_device_type(dev) != VIRTIO_ID_BLOCK)
252                 return (ENXIO);
253
254         device_set_desc(dev, "VirtIO Block Adapter");
255
256         return (BUS_PROBE_DEFAULT);
257 }
258
259 static int
260 vtblk_attach(device_t dev)
261 {
262         struct vtblk_softc *sc;
263         struct virtio_blk_config blkcfg;
264         int error;
265
266         sc = device_get_softc(dev);
267         sc->vtblk_dev = dev;
268         sc->vtblk_unit = device_get_unit(dev);
269
270         VTBLK_LOCK_INIT(sc);
271
272         bioq_init(&sc->vtblk_bioq);
273         TAILQ_INIT(&sc->vtblk_req_free);
274         TAILQ_INIT(&sc->vtblk_req_ready);
275
276         virtio_set_feature_desc(dev, vtblk_feature_desc);
277         vtblk_negotiate_features(sc);
278
279         if (virtio_with_feature(dev, VIRTIO_BLK_F_RO))
280                 sc->vtblk_flags |= VTBLK_FLAG_READONLY;
281
282         /* Get local copy of config. */
283         virtio_read_device_config(dev, 0, &blkcfg,
284                                   sizeof(struct virtio_blk_config));
285
286         /*
287          * With the current sglist(9) implementation, it is not easy
288          * for us to support a maximum segment size as adjacent
289          * segments are coalesced. For now, just make sure it's larger
290          * than the maximum supported transfer size.
291          */
292         if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) {
293                 if (blkcfg.size_max < MAXPHYS) {
294                         error = ENOTSUP;
295                         device_printf(dev, "host requires unsupported "
296                             "maximum segment size feature\n");
297                         goto fail;
298                 }
299         }
300
301         sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg);
302         if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) {
303                 error = EINVAL;
304                 device_printf(dev, "fewer than minimum number of segments "
305                     "allowed: %d\n", sc->vtblk_max_nsegs);
306                 goto fail;
307         }
308
309         /*
310          * Allocate working sglist. The number of segments may be too
311          * large to safely store on the stack.
312          */
313         sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT);
314         if (sc->vtblk_sglist == NULL) {
315                 error = ENOMEM;
316                 device_printf(dev, "cannot allocate sglist\n");
317                 goto fail;
318         }
319
320         error = vtblk_alloc_virtqueue(sc);
321         if (error) {
322                 device_printf(dev, "cannot allocate virtqueue\n");
323                 goto fail;
324         }
325
326         error = vtblk_alloc_requests(sc);
327         if (error) {
328                 device_printf(dev, "cannot preallocate requests\n");
329                 goto fail;
330         }
331
332         vtblk_alloc_disk(sc, &blkcfg);
333
334         TASK_INIT(&sc->vtblk_intr_task, 0, vtblk_complete, sc);
335
336         error = virtio_setup_intr(dev);
337         if (error) {
338                 device_printf(dev, "cannot setup virtqueue interrupt\n");
339                 goto fail;
340         }
341
342         virtqueue_enable_intr(sc->vtblk_vq);
343
344 fail:
345         if (error)
346                 vtblk_detach(dev);
347
348         return (error);
349 }
350
351 static int
352 vtblk_detach(device_t dev)
353 {
354         struct vtblk_softc *sc;
355
356         sc = device_get_softc(dev);
357
358         VTBLK_LOCK(sc);
359         sc->vtblk_flags |= VTBLK_FLAG_DETACH;
360         if (device_is_attached(dev))
361                 vtblk_stop(sc);
362         VTBLK_UNLOCK(sc);
363
364         taskqueue_drain(taskqueue_thread[mycpuid], &sc->vtblk_intr_task);
365
366         vtblk_drain(sc);
367
368         if (sc->vtblk_sglist != NULL) {
369                 sglist_free(sc->vtblk_sglist);
370                 sc->vtblk_sglist = NULL;
371         }
372
373         VTBLK_LOCK_DESTROY(sc);
374
375         return (0);
376 }
377
378 static int
379 vtblk_suspend(device_t dev)
380 {
381         struct vtblk_softc *sc;
382
383         sc = device_get_softc(dev);
384
385         VTBLK_LOCK(sc);
386         sc->vtblk_flags |= VTBLK_FLAG_SUSPEND;
387         /* TODO Wait for any inflight IO to complete? */
388         VTBLK_UNLOCK(sc);
389
390         return (0);
391 }
392
393 static int
394 vtblk_resume(device_t dev)
395 {
396         struct vtblk_softc *sc;
397
398         sc = device_get_softc(dev);
399
400         VTBLK_LOCK(sc);
401         sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
402         /* TODO Resume IO? */
403         VTBLK_UNLOCK(sc);
404
405         return (0);
406 }
407
408 static int
409 vtblk_shutdown(device_t dev)
410 {
411         return (0);
412 }
413
414 static int
415 vtblk_open(struct dev_open_args *ap)
416 {
417         struct vtblk_softc *sc;
418         cdev_t dev = ap->a_head.a_dev;
419         sc = dev->si_drv1;
420         if (sc == NULL)
421                 return (ENXIO);
422
423         return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0);
424 }
425
426 static int
427 vtblk_dump(struct dev_dump_args *ap)
428 {
429         struct vtblk_softc *sc;
430         int error;
431
432         error = 0;
433
434         cdev_t dev = ap->a_head.a_dev;
435         sc = dev->si_drv1;
436
437         if (sc == NULL)
438                 return (ENXIO);
439
440         if (VTBLK_TRYLOCK(sc) == 0) {
441                 device_printf(sc->vtblk_dev,
442                     "softc already locked, cannot dump...\n");
443                 return (EBUSY);
444         }
445
446         if ((sc->vtblk_flags & VTBLK_FLAG_DUMPING) == 0) {
447                 vtblk_prepare_dump(sc);
448                 sc->vtblk_flags |= VTBLK_FLAG_DUMPING;
449         }
450
451         if (ap->a_length > 0) {
452                 error = vtblk_write_dump(sc, ap->a_virtual, ap->a_offset,
453                                          ap->a_length);
454         } else if (ap->a_virtual == NULL && ap->a_offset == 0) {
455                 error = vtblk_flush_dump(sc);
456         }
457
458         VTBLK_UNLOCK(sc);
459
460         return (error);
461 }
462
463 static int
464 vtblk_strategy(struct dev_strategy_args *ap)
465 {
466         struct vtblk_softc *sc;
467         cdev_t dev = ap->a_head.a_dev;
468         sc = dev->si_drv1;
469         struct bio *bio = ap->a_bio;
470         struct buf *bp = bio->bio_buf;
471
472         if (bp->b_cmd == BUF_CMD_READ || bp->b_cmd == BUF_CMD_WRITE) {
473                 KKASSERT(bp->b_count > 0);
474         }
475
476         if (sc == NULL) {
477                 vtblk_bio_error(bio, EINVAL);
478                 return EINVAL;
479         }
480
481         /*
482          * Fail any write if RO. Unfortunately, there does not seem to
483          * be a better way to report our readonly'ness to GEOM above.
484          *
485          * XXX: Is that true in DFly?
486          */
487         if (sc->vtblk_flags & VTBLK_FLAG_READONLY &&
488             (bp->b_cmd == BUF_CMD_READ || bp->b_cmd == BUF_CMD_FLUSH)) {
489                 vtblk_bio_error(bio, EROFS);
490                 return (EINVAL);
491         }
492
493         VTBLK_LOCK(sc);
494         if ((sc->vtblk_flags & VTBLK_FLAG_DETACH) == 0) {
495                 devstat_start_transaction(&sc->stats);
496                 bioqdisksort(&sc->vtblk_bioq, bio);
497                 vtblk_startio(sc);
498         } else {
499                 vtblk_bio_error(bio, ENXIO);
500         }
501         VTBLK_UNLOCK(sc);
502         return 0;
503 }
504
505 static void
506 vtblk_negotiate_features(struct vtblk_softc *sc)
507 {
508         device_t dev;
509         uint64_t features;
510
511         dev = sc->vtblk_dev;
512         features = VTBLK_FEATURES;
513
514         sc->vtblk_features = virtio_negotiate_features(dev, features);
515 }
516
517 static int
518 vtblk_maximum_segments(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
519 {
520         device_t dev;
521         int nsegs;
522
523         dev = sc->vtblk_dev;
524         nsegs = VTBLK_MIN_SEGMENTS;
525
526         if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) {
527                 nsegs += MIN(blkcfg->seg_max, MAXPHYS / PAGE_SIZE + 1);
528         } else {
529                 nsegs += 1;
530         }
531
532         return (nsegs);
533 }
534
535 static int
536 vtblk_alloc_virtqueue(struct vtblk_softc *sc)
537 {
538         device_t dev;
539         struct vq_alloc_info vq_info;
540
541         dev = sc->vtblk_dev;
542
543         VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs,
544                            vtblk_vq_intr, sc, &sc->vtblk_vq,
545                            "%s request", device_get_nameunit(dev));
546
547         return (virtio_alloc_virtqueues(dev, 0, 1, &vq_info));
548 }
549
550 static void
551 vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
552 {
553
554         struct disk_info info;
555
556         /* construct the disk_info */
557         bzero(&info, sizeof(info));
558
559         if (virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_BLK_SIZE))
560                 sc->vtblk_sector_size = blkcfg->blk_size;
561         else
562                 sc->vtblk_sector_size = DEV_BSIZE;
563
564         info.d_media_blksize = sc->vtblk_sector_size;
565         info.d_media_blocks = blkcfg->capacity;
566
567         info.d_ncylinders = blkcfg->geometry.cylinders;
568         info.d_nheads = blkcfg->geometry.heads;
569         info.d_secpertrack = blkcfg->geometry.sectors;
570
571         info.d_secpercyl = info.d_secpertrack * info.d_nheads;
572
573         devstat_add_entry(&sc->stats, "vbd", device_get_unit(sc->vtblk_dev),
574                           DEV_BSIZE, DEVSTAT_ALL_SUPPORTED,
575                           DEVSTAT_TYPE_DIRECT | DEVSTAT_TYPE_IF_OTHER,
576                           DEVSTAT_PRIORITY_DISK);
577
578         /* attach a generic disk device to ourselves */
579         sc->cdev = disk_create(device_get_unit(sc->vtblk_dev), &sc->vtblk_disk,
580                                &vbd_disk_ops);
581
582         sc->cdev->si_drv1 = sc;
583         disk_setdiskinfo(&sc->vtblk_disk, &info);
584 }
585
586 static void
587 vtblk_startio(struct vtblk_softc *sc)
588 {
589         struct virtqueue *vq;
590         struct vtblk_request *req;
591         int enq;
592
593         vq = sc->vtblk_vq;
594         enq = 0;
595
596         VTBLK_LOCK_ASSERT(sc);
597
598         if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
599                 return;
600
601         while (!virtqueue_full(vq)) {
602                 if ((req = vtblk_dequeue_ready(sc)) == NULL)
603                         req = vtblk_bio_request(sc);
604                 if (req == NULL)
605                         break;
606
607                 if (vtblk_execute_request(sc, req) != 0) {
608                         vtblk_enqueue_ready(sc, req);
609                         break;
610                 }
611
612                 enq++;
613         }
614
615         if (enq > 0)
616                 virtqueue_notify(vq, &sc->vtblk_mtx);
617 }
618
619 static struct vtblk_request *
620 vtblk_bio_request(struct vtblk_softc *sc)
621 {
622         struct bio_queue_head *bioq;
623         struct vtblk_request *req;
624         struct bio *bio;
625         struct buf *bp;
626
627         bioq = &sc->vtblk_bioq;
628
629         if (bioq_first(bioq) == NULL)
630                 return (NULL);
631
632         req = vtblk_dequeue_request(sc);
633         if (req == NULL)
634                 return (NULL);
635
636         bio = bioq_takefirst(bioq);
637         req->vbr_bp = bio;
638         req->vbr_ack = -1;
639         req->vbr_hdr.ioprio = 1;
640         bp = bio->bio_buf;
641
642         switch (bp->b_cmd) {
643         case BUF_CMD_FLUSH:
644                 req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH;
645                 break;
646         case BUF_CMD_READ:
647                 req->vbr_hdr.type = VIRTIO_BLK_T_IN;
648                 req->vbr_hdr.sector = bio->bio_offset / DEV_BSIZE;
649                 break;
650         case BUF_CMD_WRITE:
651                 req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
652                 req->vbr_hdr.sector = bio->bio_offset / DEV_BSIZE;
653                 break;
654         default:
655                 KASSERT(0, ("bio with unhandled cmd: %d", bp->b_cmd));
656                 req->vbr_hdr.type = -1;
657                 break;
658         }
659
660         if (bp->b_flags & B_ORDERED)
661                 req->vbr_hdr.type |= VIRTIO_BLK_T_BARRIER;
662
663         return (req);
664 }
665
666 static int
667 vtblk_execute_request(struct vtblk_softc *sc, struct vtblk_request *req)
668 {
669         struct sglist *sg;
670         struct bio *bio;
671         struct buf *bp;
672         int writable, error;
673
674         sg = sc->vtblk_sglist;
675         bio = req->vbr_bp;
676         bp = bio->bio_buf;
677         writable = 0;
678
679         /*
680          * sglist is live throughout this subroutine.
681          */
682         sglist_reset(sg);
683         
684         error = sglist_append(sg, &req->vbr_hdr,
685                               sizeof(struct virtio_blk_outhdr));
686         KASSERT(error == 0, ("error adding header to sglist"));
687         KASSERT(sg->sg_nseg == 1,
688             ("header spanned multiple segments: %d", sg->sg_nseg));
689
690         if (bp->b_cmd == BUF_CMD_READ || bp->b_cmd == BUF_CMD_WRITE) {
691                 error = sglist_append(sg, bp->b_data, bp->b_bcount);
692                 KASSERT(error == 0, ("error adding buffer to sglist"));
693
694                 /* BUF_CMD_READ means the host writes into our buffer. */
695                 if (bp->b_cmd == BUF_CMD_READ)
696                         writable += sg->sg_nseg - 1;
697         }
698
699         error = sglist_append(sg, &req->vbr_ack, sizeof(uint8_t));
700         KASSERT(error == 0, ("error adding ack to sglist"));
701         writable++;
702
703         KASSERT(sg->sg_nseg >= VTBLK_MIN_SEGMENTS,
704             ("fewer than min segments: %d", sg->sg_nseg));
705
706         error = virtqueue_enqueue(sc->vtblk_vq, req, sg,
707                                   sg->sg_nseg - writable, writable);
708
709         sglist_reset(sg);
710
711         return (error);
712 }
713
714 static int
715 vtblk_vq_intr(void *xsc)
716 {
717         struct vtblk_softc *sc;
718
719         sc = xsc;
720
721         virtqueue_disable_intr(sc->vtblk_vq);
722         taskqueue_enqueue(taskqueue_thread[mycpuid], &sc->vtblk_intr_task);
723
724         return (1);
725 }
726
727 static void
728 vtblk_complete(void *arg, int pending)
729 {
730         struct vtblk_softc *sc;
731         struct vtblk_request *req;
732         struct virtqueue *vq;
733         struct bio *bio;
734         struct buf *bp;
735         
736         sc = arg;
737         vq = sc->vtblk_vq;
738
739 retry:
740         VTBLK_LOCK(sc);
741         if (sc->vtblk_flags & VTBLK_FLAG_DETACH) {
742                 VTBLK_UNLOCK(sc);
743                 return;
744         }
745
746         while ((req = virtqueue_dequeue(vq, NULL)) != NULL) {
747                 bio = req->vbr_bp;
748                 bp = bio->bio_buf;
749
750                 if (req->vbr_ack == VIRTIO_BLK_S_OK)
751                         bp->b_resid = 0;
752                 else {
753                         bp->b_flags |= B_ERROR;
754                         if (req->vbr_ack == VIRTIO_BLK_S_UNSUPP) {
755                                 bp->b_error = ENOTSUP;
756                         } else {
757                                 bp->b_error = EIO;
758                         }
759                 }
760
761                 devstat_end_transaction_buf(&sc->stats, bio->bio_buf);
762
763                 VTBLK_UNLOCK(sc);
764                 /*
765                  * Unlocking the controller around biodone() does not allow
766                  * processing further device interrupts; when we queued
767                  * vtblk_intr_task, we disabled interrupts. It will allow
768                  * concurrent vtblk_strategy/_startio command dispatches.
769                  */
770                 biodone(bio);
771                 VTBLK_LOCK(sc);
772
773                 vtblk_enqueue_request(sc, req);
774         }
775
776         vtblk_startio(sc);
777
778         if (virtqueue_enable_intr(vq) != 0) {
779                 /* 
780                  * If new virtqueue entries appeared immediately after
781                  * enabling interrupts, process them now. Release and
782                  * retake softcontroller lock to try to avoid blocking
783                  * I/O dispatch for too long.
784                  */
785                 virtqueue_disable_intr(vq);
786                 VTBLK_UNLOCK(sc);
787                 goto retry;
788         }
789
790         VTBLK_UNLOCK(sc);
791 }
792
793 static void
794 vtblk_stop(struct vtblk_softc *sc)
795 {
796         virtqueue_disable_intr(sc->vtblk_vq);
797         virtio_stop(sc->vtblk_dev);
798 }
799
800 static void
801 vtblk_prepare_dump(struct vtblk_softc *sc)
802 {
803         device_t dev;
804         struct virtqueue *vq;
805
806         dev = sc->vtblk_dev;
807         vq = sc->vtblk_vq;
808
809         vtblk_stop(sc);
810
811         /*
812          * Drain all requests caught in-flight in the virtqueue,
813          * skipping biodone(). When dumping, only one request is
814          * outstanding at a time, and we just poll the virtqueue
815          * for the response.
816          */
817         vtblk_drain_vq(sc, 1);
818
819         if (virtio_reinit(dev, sc->vtblk_features) != 0)
820                 panic("cannot reinit VirtIO block device during dump");
821
822         virtqueue_disable_intr(vq);
823         virtio_reinit_complete(dev);
824 }
825
826 static int
827 vtblk_write_dump(struct vtblk_softc *sc, void *virtual, off_t offset,
828                  size_t length)
829 {
830         struct bio bio;
831         struct vtblk_request *req;
832         struct buf *bp;
833
834         req = &sc->vtblk_dump_request;
835         req->vbr_ack = -1;
836         req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
837         req->vbr_hdr.ioprio = 1;
838         req->vbr_hdr.sector = offset / DEV_BSIZE;
839
840         req->vbr_bp = &bio;
841         bzero(&buf, sizeof(struct bio));
842         bp = bio.bio_buf;
843
844         bp->b_cmd = BUF_CMD_WRITE;
845         bp->b_data = virtual;
846         bp->b_bcount = length;
847
848         return (vtblk_poll_request(sc, req));
849 }
850
851 static int
852 vtblk_flush_dump(struct vtblk_softc *sc)
853 {
854         struct bio bio;
855         struct vtblk_request *req;
856         struct buf *bp;
857
858         req = &sc->vtblk_dump_request;
859         req->vbr_ack = -1;
860         req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH;
861         req->vbr_hdr.ioprio = 1;
862         req->vbr_hdr.sector = 0;
863
864         req->vbr_bp = &bio;
865         bzero(&buf, sizeof(struct bio));
866         bp = bio.bio_buf;
867         
868         bp->b_cmd = BUF_CMD_FLUSH;
869
870         return (vtblk_poll_request(sc, req));
871 }
872
873 static int
874 vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req)
875 {
876         device_t dev;
877         struct virtqueue *vq;
878         struct vtblk_request *r __debugvar;
879         int error;
880
881         dev = sc->vtblk_dev;
882         vq = sc->vtblk_vq;
883
884         if (!virtqueue_empty(vq))
885                 return (EBUSY);
886
887         error = vtblk_execute_request(sc, req);
888         if (error)
889                 return (error);
890
891         virtqueue_notify(vq, &sc->vtblk_mtx);
892
893         r = virtqueue_poll(vq, NULL);
894         KASSERT(r == req, ("unexpected request response"));
895
896         if (req->vbr_ack != VIRTIO_BLK_S_OK) {
897                 error = req->vbr_ack == VIRTIO_BLK_S_UNSUPP ? ENOTSUP : EIO;
898                 if (bootverbose)
899                         device_printf(dev,
900                             "vtblk_poll_request: IO error: %d\n", error);
901         }
902
903         return (error);
904 }
905
906 static void
907 vtblk_drain_vq(struct vtblk_softc *sc, int skip_done)
908 {
909         struct virtqueue *vq;
910         struct vtblk_request *req;
911         int last;
912
913         vq = sc->vtblk_vq;
914         last = 0;
915
916         while ((req = virtqueue_drain(vq, &last)) != NULL) {
917                 if (!skip_done)
918                         vtblk_bio_error(req->vbr_bp, ENXIO);
919
920                 vtblk_enqueue_request(sc, req);
921         }
922
923         KASSERT(virtqueue_empty(vq), ("virtqueue not empty"));
924 }
925
926 static void
927 vtblk_drain(struct vtblk_softc *sc)
928 {
929         struct bio_queue_head *bioq;
930         struct vtblk_request *req;
931         struct bio *bp;
932
933         bioq = &sc->vtblk_bioq;
934
935         if (sc->vtblk_vq != NULL)
936                 vtblk_drain_vq(sc, 0);
937
938         while ((req = vtblk_dequeue_ready(sc)) != NULL) {
939                 vtblk_bio_error(req->vbr_bp, ENXIO);
940                 vtblk_enqueue_request(sc, req);
941         }
942
943         while (bioq_first(bioq) != NULL) {
944                 bp = bioq_takefirst(bioq);
945                 vtblk_bio_error(bp, ENXIO);
946         }
947
948         vtblk_free_requests(sc);
949 }
950
951 static int
952 vtblk_alloc_requests(struct vtblk_softc *sc)
953 {
954         struct vtblk_request *req;
955         int i, nreqs;
956
957         nreqs = virtqueue_size(sc->vtblk_vq);
958
959         /*
960          * Preallocate sufficient requests to keep the virtqueue full. Each
961          * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce
962          * the number allocated when indirect descriptors are not available.
963          */
964         nreqs /= VTBLK_MIN_SEGMENTS;
965
966         for (i = 0; i < nreqs; i++) {
967                 req = kmalloc(sizeof(struct vtblk_request), M_DEVBUF, M_WAITOK);
968
969                 sc->vtblk_request_count++;
970                 vtblk_enqueue_request(sc, req);
971         }
972
973         return (0);
974 }
975
976 static void
977 vtblk_free_requests(struct vtblk_softc *sc)
978 {
979         struct vtblk_request *req;
980
981         while ((req = vtblk_dequeue_request(sc)) != NULL) {
982                 sc->vtblk_request_count--;
983                 kfree(req, M_DEVBUF);
984         }
985
986         KASSERT(sc->vtblk_request_count == 0, ("leaked requests"));
987 }
988
989 static struct vtblk_request *
990 vtblk_dequeue_request(struct vtblk_softc *sc)
991 {
992         struct vtblk_request *req;
993
994         req = TAILQ_FIRST(&sc->vtblk_req_free);
995         if (req != NULL)
996                 TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link);
997
998         return (req);
999 }
1000
1001 static void
1002 vtblk_enqueue_request(struct vtblk_softc *sc, struct vtblk_request *req)
1003 {
1004         bzero(req, sizeof(struct vtblk_request));
1005         TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link);
1006 }
1007
1008 static struct vtblk_request *
1009 vtblk_dequeue_ready(struct vtblk_softc *sc)
1010 {
1011         struct vtblk_request *req;
1012
1013         req = TAILQ_FIRST(&sc->vtblk_req_ready);
1014         if (req != NULL)
1015                 TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link);
1016
1017         return (req);
1018 }
1019
1020 static void
1021 vtblk_enqueue_ready(struct vtblk_softc *sc, struct vtblk_request *req)
1022 {
1023         TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link);
1024 }
1025
1026 static void
1027 vtblk_bio_error(struct bio *bp, int error)
1028 {
1029         biodone(bp);
1030 }