2 * Copyright (c) 2011, Bryan Venteicher <bryanv@daemoninthecloset.org>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice unmodified, this list of conditions, and the following
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 * $FreeBSD: src/sys/dev/virtio/block/virtio_blk.c,v 1.4 2012/04/16 18:29:12 grehan Exp $
29 /* Driver for VirtIO block devices. */
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
35 #include <sys/malloc.h>
36 #include <sys/module.h>
37 #include <sys/sglist.h>
39 #include <sys/queue.h>
40 #include <sys/serialize.h>
44 #include <sys/devicestat.h>
46 #include <dev/virtual/virtio/virtio/virtio.h>
47 #include <dev/virtual/virtio/virtio/virtqueue.h>
48 #include "virtio_blk.h"
50 struct vtblk_request {
51 struct virtio_blk_outhdr vbr_hdr;
55 TAILQ_ENTRY(vtblk_request) vbr_link;
60 struct lwkt_serialize vtblk_slz;
61 uint64_t vtblk_features;
63 #define VTBLK_FLAG_READONLY 0x0002
64 #define VTBLK_FLAG_DETACH 0x0004
65 #define VTBLK_FLAG_SUSPEND 0x0008
68 struct virtqueue *vtblk_vq;
69 struct sglist *vtblk_sglist;
70 struct disk vtblk_disk;
74 struct bio_queue_head vtblk_bioq;
75 TAILQ_HEAD(, vtblk_request) vtblk_req_free;
76 TAILQ_HEAD(, vtblk_request) vtblk_req_ready;
78 int vtblk_sector_size;
81 int vtblk_request_count;
83 struct vtblk_request vtblk_dump_request;
86 static struct virtio_feature_desc vtblk_feature_desc[] = {
87 { VIRTIO_BLK_F_BARRIER, "HostBarrier" },
88 { VIRTIO_BLK_F_SIZE_MAX, "MaxSegSize" },
89 { VIRTIO_BLK_F_SEG_MAX, "MaxNumSegs" },
90 { VIRTIO_BLK_F_GEOMETRY, "DiskGeometry" },
91 { VIRTIO_BLK_F_RO, "ReadOnly" },
92 { VIRTIO_BLK_F_BLK_SIZE, "BlockSize" },
93 { VIRTIO_BLK_F_SCSI, "SCSICmds" },
94 { VIRTIO_BLK_F_FLUSH, "FlushCmd" },
95 { VIRTIO_BLK_F_TOPOLOGY, "Topology" },
100 static int vtblk_modevent(module_t, int, void *);
102 static int vtblk_probe(device_t);
103 static int vtblk_attach(device_t);
104 static int vtblk_detach(device_t);
105 static int vtblk_suspend(device_t);
106 static int vtblk_resume(device_t);
107 static int vtblk_shutdown(device_t);
109 static void vtblk_negotiate_features(struct vtblk_softc *);
110 static int vtblk_maximum_segments(struct vtblk_softc *,
111 struct virtio_blk_config *);
112 static int vtblk_alloc_virtqueue(struct vtblk_softc *);
113 static void vtblk_alloc_disk(struct vtblk_softc *,
114 struct virtio_blk_config *);
116 * Interface to the device switch.
118 static d_open_t vtblk_open;
119 static d_strategy_t vtblk_strategy;
120 static d_dump_t vtblk_dump;
122 static struct dev_ops vbd_disk_ops = {
123 { "vbd", 200, D_DISK | D_MPSAFE },
124 .d_open = vtblk_open,
125 .d_close = nullclose,
127 .d_write = physwrite,
128 .d_strategy = vtblk_strategy,
129 .d_dump = vtblk_dump,
132 static void vtblk_startio(struct vtblk_softc *);
133 static struct vtblk_request *vtblk_bio_request(struct vtblk_softc *);
134 static int vtblk_execute_request(struct vtblk_softc *, struct vtblk_request *);
136 static int vtblk_vq_intr(void *);
137 static void vtblk_complete(void *);
139 static void vtblk_stop(struct vtblk_softc *);
141 static void vtblk_drain_vq(struct vtblk_softc *, int);
142 static void vtblk_drain(struct vtblk_softc *);
144 static int vtblk_alloc_requests(struct vtblk_softc *);
145 static void vtblk_free_requests(struct vtblk_softc *);
146 static struct vtblk_request *vtblk_dequeue_request(struct vtblk_softc *);
147 static void vtblk_enqueue_request(struct vtblk_softc *,
148 struct vtblk_request *);
150 static struct vtblk_request *vtblk_dequeue_ready(struct vtblk_softc *);
151 static void vtblk_enqueue_ready(struct vtblk_softc *,
152 struct vtblk_request *);
154 static void vtblk_bio_error(struct bio *, int);
157 static int vtblk_no_ident = 0;
158 TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident);
160 /* Features desired/implemented by this driver. */
161 #define VTBLK_FEATURES \
162 (VIRTIO_BLK_F_BARRIER | \
163 VIRTIO_BLK_F_SIZE_MAX | \
164 VIRTIO_BLK_F_SEG_MAX | \
165 VIRTIO_BLK_F_GEOMETRY | \
167 VIRTIO_BLK_F_BLK_SIZE | \
171 * Each block request uses at least two segments - one for the header
172 * and one for the status.
174 #define VTBLK_MIN_SEGMENTS 2
176 static device_method_t vtblk_methods[] = {
177 /* Device methods. */
178 DEVMETHOD(device_probe, vtblk_probe),
179 DEVMETHOD(device_attach, vtblk_attach),
180 DEVMETHOD(device_detach, vtblk_detach),
181 DEVMETHOD(device_suspend, vtblk_suspend),
182 DEVMETHOD(device_resume, vtblk_resume),
183 DEVMETHOD(device_shutdown, vtblk_shutdown),
188 static driver_t vtblk_driver = {
191 sizeof(struct vtblk_softc)
193 static devclass_t vtblk_devclass;
195 DRIVER_MODULE(virtio_blk, virtio_pci, vtblk_driver, vtblk_devclass,
196 vtblk_modevent, NULL);
197 MODULE_VERSION(virtio_blk, 1);
198 MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1);
201 vtblk_modevent(module_t mod, int type, void *unused)
223 vtblk_probe(device_t dev)
226 if (virtio_get_device_type(dev) != VIRTIO_ID_BLOCK)
229 device_set_desc(dev, "VirtIO Block Adapter");
231 return (BUS_PROBE_DEFAULT);
235 vtblk_attach(device_t dev)
237 struct vtblk_softc *sc;
238 struct virtio_blk_config blkcfg;
241 sc = device_get_softc(dev);
243 sc->vtblk_unit = device_get_unit(dev);
245 lwkt_serialize_init(&sc->vtblk_slz);
247 bioq_init(&sc->vtblk_bioq);
248 TAILQ_INIT(&sc->vtblk_req_free);
249 TAILQ_INIT(&sc->vtblk_req_ready);
251 virtio_set_feature_desc(dev, vtblk_feature_desc);
252 vtblk_negotiate_features(sc);
254 if (virtio_with_feature(dev, VIRTIO_BLK_F_RO))
255 sc->vtblk_flags |= VTBLK_FLAG_READONLY;
257 /* Get local copy of config. */
258 virtio_read_device_config(dev, 0, &blkcfg,
259 sizeof(struct virtio_blk_config));
262 * With the current sglist(9) implementation, it is not easy
263 * for us to support a maximum segment size as adjacent
264 * segments are coalesced. For now, just make sure it's larger
265 * than the maximum supported transfer size.
267 if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) {
268 if (blkcfg.size_max < MAXPHYS) {
270 device_printf(dev, "host requires unsupported "
271 "maximum segment size feature\n");
276 sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg);
277 if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) {
279 device_printf(dev, "fewer than minimum number of segments "
280 "allowed: %d\n", sc->vtblk_max_nsegs);
285 * Allocate working sglist. The number of segments may be too
286 * large to safely store on the stack.
288 sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT);
289 if (sc->vtblk_sglist == NULL) {
291 device_printf(dev, "cannot allocate sglist\n");
295 error = vtblk_alloc_virtqueue(sc);
297 device_printf(dev, "cannot allocate virtqueue\n");
301 error = vtblk_alloc_requests(sc);
303 device_printf(dev, "cannot preallocate requests\n");
307 vtblk_alloc_disk(sc, &blkcfg);
309 error = virtio_setup_intr(dev, &sc->vtblk_slz);
311 device_printf(dev, "cannot setup virtqueue interrupt\n");
315 virtqueue_enable_intr(sc->vtblk_vq);
325 vtblk_detach(device_t dev)
327 struct vtblk_softc *sc;
329 sc = device_get_softc(dev);
331 lwkt_serialize_enter(&sc->vtblk_slz);
332 sc->vtblk_flags |= VTBLK_FLAG_DETACH;
333 if (device_is_attached(dev))
335 lwkt_serialize_exit(&sc->vtblk_slz);
339 if (sc->vtblk_sglist != NULL) {
340 sglist_free(sc->vtblk_sglist);
341 sc->vtblk_sglist = NULL;
348 vtblk_suspend(device_t dev)
350 struct vtblk_softc *sc;
352 sc = device_get_softc(dev);
354 lwkt_serialize_enter(&sc->vtblk_slz);
355 sc->vtblk_flags |= VTBLK_FLAG_SUSPEND;
356 /* TODO Wait for any inflight IO to complete? */
357 lwkt_serialize_exit(&sc->vtblk_slz);
363 vtblk_resume(device_t dev)
365 struct vtblk_softc *sc;
367 sc = device_get_softc(dev);
369 lwkt_serialize_enter(&sc->vtblk_slz);
370 sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
371 /* TODO Resume IO? */
372 lwkt_serialize_exit(&sc->vtblk_slz);
378 vtblk_shutdown(device_t dev)
384 vtblk_open(struct dev_open_args *ap)
386 struct vtblk_softc *sc;
387 cdev_t dev = ap->a_head.a_dev;
392 return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0);
396 vtblk_dump(struct dev_dump_args *ap)
403 vtblk_strategy(struct dev_strategy_args *ap)
405 struct vtblk_softc *sc;
406 cdev_t dev = ap->a_head.a_dev;
408 struct bio *bio = ap->a_bio;
409 struct buf *bp = bio->bio_buf;
412 vtblk_bio_error(bio, EINVAL);
417 * Fail any write if RO. Unfortunately, there does not seem to
418 * be a better way to report our readonly'ness to GEOM above.
420 * XXX: Is that true in DFly?
422 if (sc->vtblk_flags & VTBLK_FLAG_READONLY &&
423 (bp->b_cmd == BUF_CMD_READ || bp->b_cmd == BUF_CMD_FLUSH)) {
424 vtblk_bio_error(bio, EROFS);
428 lwkt_serialize_enter(&sc->vtblk_slz);
429 if ((sc->vtblk_flags & VTBLK_FLAG_DETACH) == 0) {
430 devstat_start_transaction(&sc->stats);
431 bioqdisksort(&sc->vtblk_bioq, bio);
434 vtblk_bio_error(bio, ENXIO);
436 lwkt_serialize_exit(&sc->vtblk_slz);
441 vtblk_negotiate_features(struct vtblk_softc *sc)
447 features = VTBLK_FEATURES;
449 sc->vtblk_features = virtio_negotiate_features(dev, features);
453 vtblk_maximum_segments(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
459 nsegs = VTBLK_MIN_SEGMENTS;
461 if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) {
462 nsegs += MIN(blkcfg->seg_max, MAXPHYS / PAGE_SIZE + 1);
471 vtblk_alloc_virtqueue(struct vtblk_softc *sc)
474 struct vq_alloc_info vq_info;
478 VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs,
479 vtblk_vq_intr, sc, &sc->vtblk_vq,
480 "%s request", device_get_nameunit(dev));
482 return (virtio_alloc_virtqueues(dev, 0, 1, &vq_info));
486 vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
489 struct disk_info info;
491 /* construct the disk_info */
492 bzero(&info, sizeof(info));
494 if (virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_BLK_SIZE))
495 sc->vtblk_sector_size = blkcfg->blk_size;
497 sc->vtblk_sector_size = DEV_BSIZE;
499 info.d_media_blksize = sc->vtblk_sector_size;
500 info.d_media_blocks = blkcfg->capacity;
502 info.d_ncylinders = blkcfg->geometry.cylinders;
503 info.d_nheads = blkcfg->geometry.heads;
504 info.d_secpertrack = blkcfg->geometry.sectors;
506 info.d_secpercyl = info.d_secpertrack * info.d_nheads;
508 devstat_add_entry(&sc->stats, "vbd", device_get_unit(sc->vtblk_dev),
509 DEV_BSIZE, DEVSTAT_ALL_SUPPORTED,
510 DEVSTAT_TYPE_DIRECT | DEVSTAT_TYPE_IF_OTHER,
511 DEVSTAT_PRIORITY_DISK);
513 /* attach a generic disk device to ourselves */
514 sc->cdev = disk_create(device_get_unit(sc->vtblk_dev), &sc->vtblk_disk,
517 sc->cdev->si_drv1 = sc;
518 disk_setdiskinfo(&sc->vtblk_disk, &info);
522 vtblk_startio(struct vtblk_softc *sc)
524 struct virtqueue *vq;
525 struct vtblk_request *req;
531 ASSERT_SERIALIZED(&sc->vtblk_slz);
533 if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
536 while (!virtqueue_full(vq)) {
537 if ((req = vtblk_dequeue_ready(sc)) == NULL)
538 req = vtblk_bio_request(sc);
542 if (vtblk_execute_request(sc, req) != 0) {
543 vtblk_enqueue_ready(sc, req);
551 virtqueue_notify(vq, &sc->vtblk_slz);
554 static struct vtblk_request *
555 vtblk_bio_request(struct vtblk_softc *sc)
557 struct bio_queue_head *bioq;
558 struct vtblk_request *req;
562 bioq = &sc->vtblk_bioq;
564 if (bioq_first(bioq) == NULL)
567 req = vtblk_dequeue_request(sc);
571 bio = bioq_takefirst(bioq);
574 req->vbr_hdr.ioprio = 1;
579 req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH;
582 req->vbr_hdr.type = VIRTIO_BLK_T_IN;
583 req->vbr_hdr.sector = bio->bio_offset / DEV_BSIZE;
586 req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
587 req->vbr_hdr.sector = bio->bio_offset / DEV_BSIZE;
590 KASSERT(0, ("bio with unhandled cmd: %d", bp->b_cmd));
591 req->vbr_hdr.type = -1;
595 if (bp->b_flags & B_ORDERED)
596 req->vbr_hdr.type |= VIRTIO_BLK_T_BARRIER;
602 vtblk_execute_request(struct vtblk_softc *sc, struct vtblk_request *req)
609 sg = sc->vtblk_sglist;
615 * sglist is live throughout this subroutine.
619 error = sglist_append(sg, &req->vbr_hdr,
620 sizeof(struct virtio_blk_outhdr));
621 KASSERT(error == 0, ("error adding header to sglist"));
622 KASSERT(sg->sg_nseg == 1,
623 ("header spanned multiple segments: %d", sg->sg_nseg));
625 if (bp->b_cmd == BUF_CMD_READ || bp->b_cmd == BUF_CMD_WRITE) {
626 error = sglist_append(sg, bp->b_data, bp->b_bcount);
627 KASSERT(error == 0, ("error adding buffer to sglist"));
629 /* BUF_CMD_READ means the host writes into our buffer. */
630 if (bp->b_cmd == BUF_CMD_READ)
631 writable += sg->sg_nseg - 1;
634 error = sglist_append(sg, &req->vbr_ack, sizeof(uint8_t));
635 KASSERT(error == 0, ("error adding ack to sglist"));
638 KASSERT(sg->sg_nseg >= VTBLK_MIN_SEGMENTS,
639 ("fewer than min segments: %d", sg->sg_nseg));
641 error = virtqueue_enqueue(sc->vtblk_vq, req, sg,
642 sg->sg_nseg - writable, writable);
650 vtblk_vq_intr(void *xsc)
658 vtblk_complete(void *arg)
660 struct vtblk_softc *sc;
661 struct vtblk_request *req;
662 struct virtqueue *vq;
669 lwkt_serialize_handler_disable(&sc->vtblk_slz);
670 virtqueue_disable_intr(sc->vtblk_vq);
671 ASSERT_SERIALIZED(&sc->vtblk_slz);
674 if (sc->vtblk_flags & VTBLK_FLAG_DETACH)
677 while ((req = virtqueue_dequeue(vq, NULL)) != NULL) {
681 if (req->vbr_ack == VIRTIO_BLK_S_OK)
684 bp->b_flags |= B_ERROR;
685 if (req->vbr_ack == VIRTIO_BLK_S_UNSUPP) {
686 bp->b_error = ENOTSUP;
692 devstat_end_transaction_buf(&sc->stats, bio->bio_buf);
694 lwkt_serialize_exit(&sc->vtblk_slz);
696 * Unlocking the controller around biodone() does not allow
697 * processing further device interrupts; when we queued
698 * vtblk_complete, we disabled interrupts. It will allow
699 * concurrent vtblk_strategy/_startio command dispatches.
702 lwkt_serialize_enter(&sc->vtblk_slz);
704 vtblk_enqueue_request(sc, req);
709 if (virtqueue_enable_intr(vq) != 0) {
711 * If new virtqueue entries appeared immediately after
712 * enabling interrupts, process them now. Release and
713 * retake softcontroller lock to try to avoid blocking
714 * I/O dispatch for too long.
716 virtqueue_disable_intr(vq);
719 lwkt_serialize_handler_enable(&sc->vtblk_slz);
723 vtblk_stop(struct vtblk_softc *sc)
725 virtqueue_disable_intr(sc->vtblk_vq);
726 virtio_stop(sc->vtblk_dev);
730 vtblk_drain_vq(struct vtblk_softc *sc, int skip_done)
732 struct virtqueue *vq;
733 struct vtblk_request *req;
739 while ((req = virtqueue_drain(vq, &last)) != NULL) {
741 vtblk_bio_error(req->vbr_bp, ENXIO);
743 vtblk_enqueue_request(sc, req);
746 KASSERT(virtqueue_empty(vq), ("virtqueue not empty"));
750 vtblk_drain(struct vtblk_softc *sc)
752 struct bio_queue_head *bioq;
753 struct vtblk_request *req;
756 bioq = &sc->vtblk_bioq;
758 if (sc->vtblk_vq != NULL)
759 vtblk_drain_vq(sc, 0);
761 while ((req = vtblk_dequeue_ready(sc)) != NULL) {
762 vtblk_bio_error(req->vbr_bp, ENXIO);
763 vtblk_enqueue_request(sc, req);
766 while (bioq_first(bioq) != NULL) {
767 bp = bioq_takefirst(bioq);
768 vtblk_bio_error(bp, ENXIO);
771 vtblk_free_requests(sc);
775 vtblk_alloc_requests(struct vtblk_softc *sc)
777 struct vtblk_request *req;
780 nreqs = virtqueue_size(sc->vtblk_vq);
783 * Preallocate sufficient requests to keep the virtqueue full. Each
784 * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce
785 * the number allocated when indirect descriptors are not available.
787 nreqs /= VTBLK_MIN_SEGMENTS;
789 for (i = 0; i < nreqs; i++) {
790 req = kmalloc(sizeof(struct vtblk_request), M_DEVBUF, M_WAITOK);
792 sc->vtblk_request_count++;
793 vtblk_enqueue_request(sc, req);
800 vtblk_free_requests(struct vtblk_softc *sc)
802 struct vtblk_request *req;
804 while ((req = vtblk_dequeue_request(sc)) != NULL) {
805 sc->vtblk_request_count--;
806 kfree(req, M_DEVBUF);
809 KASSERT(sc->vtblk_request_count == 0, ("leaked requests"));
812 static struct vtblk_request *
813 vtblk_dequeue_request(struct vtblk_softc *sc)
815 struct vtblk_request *req;
817 req = TAILQ_FIRST(&sc->vtblk_req_free);
819 TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link);
825 vtblk_enqueue_request(struct vtblk_softc *sc, struct vtblk_request *req)
827 bzero(req, sizeof(struct vtblk_request));
828 TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link);
831 static struct vtblk_request *
832 vtblk_dequeue_ready(struct vtblk_softc *sc)
834 struct vtblk_request *req;
836 req = TAILQ_FIRST(&sc->vtblk_req_ready);
838 TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link);
844 vtblk_enqueue_ready(struct vtblk_softc *sc, struct vtblk_request *req)
846 TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link);
850 vtblk_bio_error(struct bio *bp, int error)