Merge branch 'vendor/BYACC'
[dragonfly.git] / sys / dev / virtual / virtio / block / virtio_blk.c
1 /*-
2  * Copyright (c) 2011, Bryan Venteicher <bryanv@daemoninthecloset.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * $FreeBSD: src/sys/dev/virtio/block/virtio_blk.c,v 1.4 2012/04/16 18:29:12 grehan Exp $
27  */
28
29 /* Driver for VirtIO block devices. */
30
31 #include <sys/cdefs.h>
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/bio.h>
37 #include <sys/malloc.h>
38 #include <sys/module.h>
39 #include <sys/sglist.h>
40 #include <sys/lock.h>
41 #include <sys/queue.h>
42 #include <sys/taskqueue.h>
43
44 #include <sys/buf2.h>
45 #include <sys/rman.h>
46 #include <sys/disk.h>
47 #include <sys/spinlock.h>
48 #include <sys/spinlock2.h>
49 #include <sys/devicestat.h>
50
51 #include <dev/virtual/virtio/virtio/virtio.h>
52 #include <dev/virtual/virtio/virtio/virtqueue.h>
53 #include "virtio_blk.h"
54
55 struct vtblk_request {
56         struct virtio_blk_outhdr        vbr_hdr;
57         struct bio                      *vbr_bp;
58         uint8_t                         vbr_ack;
59
60         TAILQ_ENTRY(vtblk_request)      vbr_link;
61 };
62
63 struct vtblk_softc {
64         device_t                        vtblk_dev;
65         struct spinlock                 vtblk_mtx;
66         uint64_t                        vtblk_features;
67
68 #define VTBLK_FLAG_READONLY             0x0002
69 #define VTBLK_FLAG_DETACH               0x0004
70 #define VTBLK_FLAG_SUSPEND              0x0008
71 #define VTBLK_FLAG_DUMPING              0x0010
72         uint32_t                        vtblk_flags;
73
74         struct virtqueue                *vtblk_vq;
75         struct sglist                   *vtblk_sglist;
76         struct disk                     vtblk_disk;
77         cdev_t                          cdev;
78         struct devstat                  stats;
79
80         struct bio_queue_head           vtblk_bioq;
81         TAILQ_HEAD(, vtblk_request)     vtblk_req_free;
82         TAILQ_HEAD(, vtblk_request)     vtblk_req_ready;
83
84         struct task                     vtblk_intr_task;
85
86         int                             vtblk_sector_size;
87         int                             vtblk_max_nsegs;
88         int                             vtblk_unit;
89         int                             vtblk_request_count;
90
91         struct vtblk_request            vtblk_dump_request;
92 };
93
94 static struct virtio_feature_desc vtblk_feature_desc[] = {
95         { VIRTIO_BLK_F_BARRIER,         "HostBarrier"   },
96         { VIRTIO_BLK_F_SIZE_MAX,        "MaxSegSize"    },
97         { VIRTIO_BLK_F_SEG_MAX,         "MaxNumSegs"    },
98         { VIRTIO_BLK_F_GEOMETRY,        "DiskGeometry"  },
99         { VIRTIO_BLK_F_RO,              "ReadOnly"      },
100         { VIRTIO_BLK_F_BLK_SIZE,        "BlockSize"     },
101         { VIRTIO_BLK_F_SCSI,            "SCSICmds"      },
102         { VIRTIO_BLK_F_FLUSH,           "FlushCmd"      },
103         { VIRTIO_BLK_F_TOPOLOGY,        "Topology"      },
104
105         { 0, NULL }
106 };
107
108 static int      vtblk_modevent(module_t, int, void *);
109
110 static int      vtblk_probe(device_t);
111 static int      vtblk_attach(device_t);
112 static int      vtblk_detach(device_t);
113 static int      vtblk_suspend(device_t);
114 static int      vtblk_resume(device_t);
115 static int      vtblk_shutdown(device_t);
116
117 static void     vtblk_negotiate_features(struct vtblk_softc *);
118 static int      vtblk_maximum_segments(struct vtblk_softc *,
119                                        struct virtio_blk_config *);
120 static int      vtblk_alloc_virtqueue(struct vtblk_softc *);
121 static void     vtblk_alloc_disk(struct vtblk_softc *,
122                                  struct virtio_blk_config *);
123 /*
124  * Interface to the device switch.
125  */
126 static d_open_t         vtblk_open;
127 static d_strategy_t     vtblk_strategy;
128 static d_dump_t         vtblk_dump;
129
130 static struct dev_ops vbd_disk_ops = {
131         { "vbd", 200, D_DISK | D_MPSAFE },
132         .d_open         = vtblk_open,
133         .d_close        = nullclose,
134         .d_read         = physread,
135         .d_write        = physwrite,
136         .d_strategy     = vtblk_strategy,
137         .d_dump         = vtblk_dump,
138 };
139
140 static void             vtblk_startio(struct vtblk_softc *);
141 static struct vtblk_request *vtblk_bio_request(struct vtblk_softc *);
142 static int              vtblk_execute_request(struct vtblk_softc *,
143                                               struct vtblk_request *);
144
145 static int              vtblk_vq_intr(void *);
146 static void             vtblk_complete(void *, int);
147
148 static void             vtblk_stop(struct vtblk_softc *);
149
150 static void             vtblk_prepare_dump(struct vtblk_softc *);
151 static int              vtblk_write_dump(struct vtblk_softc *, void *, off_t, size_t);
152 static int              vtblk_flush_dump(struct vtblk_softc *);
153 static int              vtblk_poll_request(struct vtblk_softc *,
154                                            struct vtblk_request *);
155
156 static void             vtblk_drain_vq(struct vtblk_softc *, int);
157 static void             vtblk_drain(struct vtblk_softc *);
158
159 static int              vtblk_alloc_requests(struct vtblk_softc *);
160 static void             vtblk_free_requests(struct vtblk_softc *);
161 static struct vtblk_request *vtblk_dequeue_request(struct vtblk_softc *);
162 static void             vtblk_enqueue_request(struct vtblk_softc *,
163                                               struct vtblk_request *);
164
165 static struct vtblk_request *vtblk_dequeue_ready(struct vtblk_softc *);
166 static void             vtblk_enqueue_ready(struct vtblk_softc *,
167                                             struct vtblk_request *);
168
169 static void             vtblk_bio_error(struct bio *, int);
170
171 /* Tunables. */
172 static int vtblk_no_ident = 0;
173 TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident);
174
175 /* Features desired/implemented by this driver. */
176 #define VTBLK_FEATURES \
177     (VIRTIO_BLK_F_BARRIER               | \
178      VIRTIO_BLK_F_SIZE_MAX              | \
179      VIRTIO_BLK_F_SEG_MAX               | \
180      VIRTIO_BLK_F_GEOMETRY              | \
181      VIRTIO_BLK_F_RO                    | \
182      VIRTIO_BLK_F_BLK_SIZE              | \
183      VIRTIO_BLK_F_FLUSH)
184
185 #define VTBLK_MTX(_sc)          &(_sc)->vtblk_mtx
186 #define VTBLK_LOCK_INIT(_sc)    spin_init(&(_sc)->vtblk_mtx)
187 #define VTBLK_LOCK(_sc)         spin_lock(VTBLK_MTX((_sc)))
188 #define VTBLK_TRYLOCK(_sc)      spin_trylock(VTBLK_MTX((_sc)))
189 #define VTBLK_UNLOCK(_sc)       spin_unlock(VTBLK_MTX((_sc)))
190 #define VTBLK_LOCK_DESTROY(_sc) spin_uninit(VTBLK_MTX((_sc)))
191
192 #define VTBLK_LOCK_ASSERT(_sc)
193 #define VTBLK_LOCK_ASSERT_NOTOWNED(_sc)
194
195 /*
196  * Each block request uses at least two segments - one for the header
197  * and one for the status.
198  */
199 #define VTBLK_MIN_SEGMENTS      2
200
201 static device_method_t vtblk_methods[] = {
202         /* Device methods. */
203         DEVMETHOD(device_probe,         vtblk_probe),
204         DEVMETHOD(device_attach,        vtblk_attach),
205         DEVMETHOD(device_detach,        vtblk_detach),
206         DEVMETHOD(device_suspend,       vtblk_suspend),
207         DEVMETHOD(device_resume,        vtblk_resume),
208         DEVMETHOD(device_shutdown,      vtblk_shutdown),
209
210         { 0, 0 }
211 };
212
213 static driver_t vtblk_driver = {
214         "vtblk",
215         vtblk_methods,
216         sizeof(struct vtblk_softc)
217 };
218 static devclass_t vtblk_devclass;
219
220 DRIVER_MODULE(virtio_blk, virtio_pci, vtblk_driver, vtblk_devclass,
221               vtblk_modevent, NULL);
222 MODULE_VERSION(virtio_blk, 1);
223 MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1);
224
225 static int
226 vtblk_modevent(module_t mod, int type, void *unused)
227 {
228         int error;
229
230         error = 0;
231
232         switch (type) {
233         case MOD_LOAD:
234                 break;
235         case MOD_UNLOAD:
236                 break;
237         case MOD_SHUTDOWN:
238                 break;
239         default:
240                 error = EOPNOTSUPP;
241                 break;
242         }
243
244         return (error);
245 }
246
247 static int
248 vtblk_probe(device_t dev)
249 {
250
251         if (virtio_get_device_type(dev) != VIRTIO_ID_BLOCK)
252                 return (ENXIO);
253
254         device_set_desc(dev, "VirtIO Block Adapter");
255
256         return (BUS_PROBE_DEFAULT);
257 }
258
259 static int
260 vtblk_attach(device_t dev)
261 {
262         struct vtblk_softc *sc;
263         struct virtio_blk_config blkcfg;
264         int error;
265
266         sc = device_get_softc(dev);
267         sc->vtblk_dev = dev;
268         sc->vtblk_unit = device_get_unit(dev);
269
270         VTBLK_LOCK_INIT(sc);
271
272         bioq_init(&sc->vtblk_bioq);
273         TAILQ_INIT(&sc->vtblk_req_free);
274         TAILQ_INIT(&sc->vtblk_req_ready);
275
276         virtio_set_feature_desc(dev, vtblk_feature_desc);
277         vtblk_negotiate_features(sc);
278
279         if (virtio_with_feature(dev, VIRTIO_BLK_F_RO))
280                 sc->vtblk_flags |= VTBLK_FLAG_READONLY;
281
282         /* Get local copy of config. */
283         virtio_read_device_config(dev, 0, &blkcfg,
284                                   sizeof(struct virtio_blk_config));
285
286         /*
287          * With the current sglist(9) implementation, it is not easy
288          * for us to support a maximum segment size as adjacent
289          * segments are coalesced. For now, just make sure it's larger
290          * than the maximum supported transfer size.
291          */
292         if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) {
293                 if (blkcfg.size_max < MAXPHYS) {
294                         error = ENOTSUP;
295                         device_printf(dev, "host requires unsupported "
296                             "maximum segment size feature\n");
297                         goto fail;
298                 }
299         }
300
301         sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg);
302         if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) {
303                 error = EINVAL;
304                 device_printf(dev, "fewer than minimum number of segments "
305                     "allowed: %d\n", sc->vtblk_max_nsegs);
306                 goto fail;
307         }
308
309         /*
310          * Allocate working sglist. The number of segments may be too
311          * large to safely store on the stack.
312          */
313         sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT);
314         if (sc->vtblk_sglist == NULL) {
315                 error = ENOMEM;
316                 device_printf(dev, "cannot allocate sglist\n");
317                 goto fail;
318         }
319
320         error = vtblk_alloc_virtqueue(sc);
321         if (error) {
322                 device_printf(dev, "cannot allocate virtqueue\n");
323                 goto fail;
324         }
325
326         error = vtblk_alloc_requests(sc);
327         if (error) {
328                 device_printf(dev, "cannot preallocate requests\n");
329                 goto fail;
330         }
331
332         vtblk_alloc_disk(sc, &blkcfg);
333
334         TASK_INIT(&sc->vtblk_intr_task, 0, vtblk_complete, sc);
335
336         error = virtio_setup_intr(dev);
337         if (error) {
338                 device_printf(dev, "cannot setup virtqueue interrupt\n");
339                 goto fail;
340         }
341
342         virtqueue_enable_intr(sc->vtblk_vq);
343
344 fail:
345         if (error)
346                 vtblk_detach(dev);
347
348         return (error);
349 }
350
351 static int
352 vtblk_detach(device_t dev)
353 {
354         struct vtblk_softc *sc;
355
356         sc = device_get_softc(dev);
357
358         VTBLK_LOCK(sc);
359         sc->vtblk_flags |= VTBLK_FLAG_DETACH;
360         if (device_is_attached(dev))
361                 vtblk_stop(sc);
362         VTBLK_UNLOCK(sc);
363
364         taskqueue_drain(taskqueue_thread[mycpuid], &sc->vtblk_intr_task);
365
366         vtblk_drain(sc);
367
368         if (sc->vtblk_sglist != NULL) {
369                 sglist_free(sc->vtblk_sglist);
370                 sc->vtblk_sglist = NULL;
371         }
372
373         VTBLK_LOCK_DESTROY(sc);
374
375         return (0);
376 }
377
378 static int
379 vtblk_suspend(device_t dev)
380 {
381         struct vtblk_softc *sc;
382
383         sc = device_get_softc(dev);
384
385         VTBLK_LOCK(sc);
386         sc->vtblk_flags |= VTBLK_FLAG_SUSPEND;
387         /* TODO Wait for any inflight IO to complete? */
388         VTBLK_UNLOCK(sc);
389
390         return (0);
391 }
392
393 static int
394 vtblk_resume(device_t dev)
395 {
396         struct vtblk_softc *sc;
397
398         sc = device_get_softc(dev);
399
400         VTBLK_LOCK(sc);
401         sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
402         /* TODO Resume IO? */
403         VTBLK_UNLOCK(sc);
404
405         return (0);
406 }
407
408 static int
409 vtblk_shutdown(device_t dev)
410 {
411         return (0);
412 }
413
414 static int
415 vtblk_open(struct dev_open_args *ap)
416 {
417         struct vtblk_softc *sc;
418         cdev_t dev = ap->a_head.a_dev;
419         sc = dev->si_drv1;
420         if (sc == NULL)
421                 return (ENXIO);
422
423         return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0);
424 }
425
426 static int
427 vtblk_dump(struct dev_dump_args *ap)
428 {
429         struct vtblk_softc *sc;
430         int error;
431
432         error = 0;
433
434         cdev_t dev = ap->a_head.a_dev;
435         sc = dev->si_drv1;
436
437         if (sc == NULL)
438                 return (ENXIO);
439
440         if (VTBLK_TRYLOCK(sc) == 0) {
441                 device_printf(sc->vtblk_dev,
442                     "softc already locked, cannot dump...\n");
443                 return (EBUSY);
444         }
445
446         if ((sc->vtblk_flags & VTBLK_FLAG_DUMPING) == 0) {
447                 vtblk_prepare_dump(sc);
448                 sc->vtblk_flags |= VTBLK_FLAG_DUMPING;
449         }
450
451         if (ap->a_length > 0) {
452                 error = vtblk_write_dump(sc, ap->a_virtual, ap->a_offset,
453                                          ap->a_length);
454         } else if (ap->a_virtual == NULL && ap->a_offset == 0) {
455                 error = vtblk_flush_dump(sc);
456         }
457
458         VTBLK_UNLOCK(sc);
459
460         return (error);
461 }
462
463 static int
464 vtblk_strategy(struct dev_strategy_args *ap)
465 {
466         struct vtblk_softc *sc;
467         cdev_t dev = ap->a_head.a_dev;
468         sc = dev->si_drv1;
469         struct bio *bio = ap->a_bio;
470         struct buf *bp = bio->bio_buf;
471
472         if (sc == NULL) {
473                 vtblk_bio_error(bio, EINVAL);
474                 return EINVAL;
475         }
476
477         /*
478          * Fail any write if RO. Unfortunately, there does not seem to
479          * be a better way to report our readonly'ness to GEOM above.
480          *
481          * XXX: Is that true in DFly?
482          */
483         if (sc->vtblk_flags & VTBLK_FLAG_READONLY &&
484             (bp->b_cmd == BUF_CMD_READ || bp->b_cmd == BUF_CMD_FLUSH)) {
485                 vtblk_bio_error(bio, EROFS);
486                 return (EINVAL);
487         }
488
489         VTBLK_LOCK(sc);
490         if ((sc->vtblk_flags & VTBLK_FLAG_DETACH) == 0) {
491                 devstat_start_transaction(&sc->stats);
492                 bioqdisksort(&sc->vtblk_bioq, bio);
493                 vtblk_startio(sc);
494         } else {
495                 vtblk_bio_error(bio, ENXIO);
496         }
497         VTBLK_UNLOCK(sc);
498         return 0;
499 }
500
501 static void
502 vtblk_negotiate_features(struct vtblk_softc *sc)
503 {
504         device_t dev;
505         uint64_t features;
506
507         dev = sc->vtblk_dev;
508         features = VTBLK_FEATURES;
509
510         sc->vtblk_features = virtio_negotiate_features(dev, features);
511 }
512
513 static int
514 vtblk_maximum_segments(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
515 {
516         device_t dev;
517         int nsegs;
518
519         dev = sc->vtblk_dev;
520         nsegs = VTBLK_MIN_SEGMENTS;
521
522         if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) {
523                 nsegs += MIN(blkcfg->seg_max, MAXPHYS / PAGE_SIZE + 1);
524         } else {
525                 nsegs += 1;
526         }
527
528         return (nsegs);
529 }
530
531 static int
532 vtblk_alloc_virtqueue(struct vtblk_softc *sc)
533 {
534         device_t dev;
535         struct vq_alloc_info vq_info;
536
537         dev = sc->vtblk_dev;
538
539         VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs,
540                            vtblk_vq_intr, sc, &sc->vtblk_vq,
541                            "%s request", device_get_nameunit(dev));
542
543         return (virtio_alloc_virtqueues(dev, 0, 1, &vq_info));
544 }
545
546 static void
547 vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
548 {
549
550         struct disk_info info;
551
552         /* construct the disk_info */
553         bzero(&info, sizeof(info));
554
555         if (virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_BLK_SIZE))
556                 sc->vtblk_sector_size = blkcfg->blk_size;
557         else
558                 sc->vtblk_sector_size = DEV_BSIZE;
559
560         info.d_media_blksize = sc->vtblk_sector_size;
561         info.d_media_blocks = blkcfg->capacity;
562
563         info.d_ncylinders = blkcfg->geometry.cylinders;
564         info.d_nheads = blkcfg->geometry.heads;
565         info.d_secpertrack = blkcfg->geometry.sectors;
566
567         info.d_secpercyl = info.d_secpertrack * info.d_nheads;
568
569         devstat_add_entry(&sc->stats, "vbd", device_get_unit(sc->vtblk_dev),
570                           DEV_BSIZE, DEVSTAT_ALL_SUPPORTED,
571                           DEVSTAT_TYPE_DIRECT | DEVSTAT_TYPE_IF_OTHER,
572                           DEVSTAT_PRIORITY_DISK);
573
574         /* attach a generic disk device to ourselves */
575         sc->cdev = disk_create(device_get_unit(sc->vtblk_dev), &sc->vtblk_disk,
576                                &vbd_disk_ops);
577
578         sc->cdev->si_drv1 = sc;
579         disk_setdiskinfo(&sc->vtblk_disk, &info);
580 }
581
582 static void
583 vtblk_startio(struct vtblk_softc *sc)
584 {
585         struct virtqueue *vq;
586         struct vtblk_request *req;
587         int enq;
588
589         vq = sc->vtblk_vq;
590         enq = 0;
591
592         VTBLK_LOCK_ASSERT(sc);
593
594         if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
595                 return;
596
597         while (!virtqueue_full(vq)) {
598                 if ((req = vtblk_dequeue_ready(sc)) == NULL)
599                         req = vtblk_bio_request(sc);
600                 if (req == NULL)
601                         break;
602
603                 if (vtblk_execute_request(sc, req) != 0) {
604                         vtblk_enqueue_ready(sc, req);
605                         break;
606                 }
607
608                 enq++;
609         }
610
611         if (enq > 0)
612                 virtqueue_notify(vq, &sc->vtblk_mtx);
613 }
614
615 static struct vtblk_request *
616 vtblk_bio_request(struct vtblk_softc *sc)
617 {
618         struct bio_queue_head *bioq;
619         struct vtblk_request *req;
620         struct bio *bio;
621         struct buf *bp;
622
623         bioq = &sc->vtblk_bioq;
624
625         if (bioq_first(bioq) == NULL)
626                 return (NULL);
627
628         req = vtblk_dequeue_request(sc);
629         if (req == NULL)
630                 return (NULL);
631
632         bio = bioq_takefirst(bioq);
633         req->vbr_bp = bio;
634         req->vbr_ack = -1;
635         req->vbr_hdr.ioprio = 1;
636         bp = bio->bio_buf;
637
638         switch (bp->b_cmd) {
639         case BUF_CMD_FLUSH:
640                 req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH;
641                 break;
642         case BUF_CMD_READ:
643                 req->vbr_hdr.type = VIRTIO_BLK_T_IN;
644                 req->vbr_hdr.sector = bio->bio_offset / DEV_BSIZE;
645                 break;
646         case BUF_CMD_WRITE:
647                 req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
648                 req->vbr_hdr.sector = bio->bio_offset / DEV_BSIZE;
649                 break;
650         default:
651                 KASSERT(0, ("bio with unhandled cmd: %d", bp->b_cmd));
652                 req->vbr_hdr.type = -1;
653                 break;
654         }
655
656         if (bp->b_flags & B_ORDERED)
657                 req->vbr_hdr.type |= VIRTIO_BLK_T_BARRIER;
658
659         return (req);
660 }
661
662 static int
663 vtblk_execute_request(struct vtblk_softc *sc, struct vtblk_request *req)
664 {
665         struct sglist *sg;
666         struct bio *bio;
667         struct buf *bp;
668         int writable, error;
669
670         sg = sc->vtblk_sglist;
671         bio = req->vbr_bp;
672         bp = bio->bio_buf;
673         writable = 0;
674
675         /*
676          * sglist is live throughout this subroutine.
677          */
678         sglist_reset(sg);
679         
680         error = sglist_append(sg, &req->vbr_hdr,
681                               sizeof(struct virtio_blk_outhdr));
682         KASSERT(error == 0, ("error adding header to sglist"));
683         KASSERT(sg->sg_nseg == 1,
684             ("header spanned multiple segments: %d", sg->sg_nseg));
685
686         if (bp->b_cmd == BUF_CMD_READ || bp->b_cmd == BUF_CMD_WRITE) {
687                 error = sglist_append(sg, bp->b_data, bp->b_bcount);
688                 KASSERT(error == 0, ("error adding buffer to sglist"));
689
690                 /* BUF_CMD_READ means the host writes into our buffer. */
691                 if (bp->b_cmd == BUF_CMD_READ)
692                         writable += sg->sg_nseg - 1;
693         }
694
695         error = sglist_append(sg, &req->vbr_ack, sizeof(uint8_t));
696         KASSERT(error == 0, ("error adding ack to sglist"));
697         writable++;
698
699         KASSERT(sg->sg_nseg >= VTBLK_MIN_SEGMENTS,
700             ("fewer than min segments: %d", sg->sg_nseg));
701
702         error = virtqueue_enqueue(sc->vtblk_vq, req, sg,
703                                   sg->sg_nseg - writable, writable);
704
705         sglist_reset(sg);
706
707         return (error);
708 }
709
710 static int
711 vtblk_vq_intr(void *xsc)
712 {
713         struct vtblk_softc *sc;
714
715         sc = xsc;
716
717         virtqueue_disable_intr(sc->vtblk_vq);
718         taskqueue_enqueue(taskqueue_thread[mycpuid], &sc->vtblk_intr_task);
719
720         return (1);
721 }
722
723 static void
724 vtblk_complete(void *arg, int pending)
725 {
726         struct vtblk_softc *sc;
727         struct vtblk_request *req;
728         struct virtqueue *vq;
729         struct bio *bio;
730         struct buf *bp;
731         
732         sc = arg;
733         vq = sc->vtblk_vq;
734
735 retry:
736         VTBLK_LOCK(sc);
737         if (sc->vtblk_flags & VTBLK_FLAG_DETACH) {
738                 VTBLK_UNLOCK(sc);
739                 return;
740         }
741
742         while ((req = virtqueue_dequeue(vq, NULL)) != NULL) {
743                 bio = req->vbr_bp;
744                 bp = bio->bio_buf;
745
746                 if (req->vbr_ack == VIRTIO_BLK_S_OK)
747                         bp->b_resid = 0;
748                 else {
749                         bp->b_flags |= B_ERROR;
750                         if (req->vbr_ack == VIRTIO_BLK_S_UNSUPP) {
751                                 bp->b_error = ENOTSUP;
752                         } else {
753                                 bp->b_error = EIO;
754                         }
755                 }
756
757                 devstat_end_transaction_buf(&sc->stats, bio->bio_buf);
758
759                 VTBLK_UNLOCK(sc);
760                 /*
761                  * Unlocking the controller around biodone() does not allow
762                  * processing further device interrupts; when we queued
763                  * vtblk_intr_task, we disabled interrupts. It will allow
764                  * concurrent vtblk_strategy/_startio command dispatches.
765                  */
766                 biodone(bio);
767                 VTBLK_LOCK(sc);
768
769                 vtblk_enqueue_request(sc, req);
770         }
771
772         vtblk_startio(sc);
773
774         if (virtqueue_enable_intr(vq) != 0) {
775                 /* 
776                  * If new virtqueue entries appeared immediately after
777                  * enabling interrupts, process them now. Release and
778                  * retake softcontroller lock to try to avoid blocking
779                  * I/O dispatch for too long.
780                  */
781                 virtqueue_disable_intr(vq);
782                 VTBLK_UNLOCK(sc);
783                 goto retry;
784         }
785
786         VTBLK_UNLOCK(sc);
787 }
788
789 static void
790 vtblk_stop(struct vtblk_softc *sc)
791 {
792         virtqueue_disable_intr(sc->vtblk_vq);
793         virtio_stop(sc->vtblk_dev);
794 }
795
796 static void
797 vtblk_prepare_dump(struct vtblk_softc *sc)
798 {
799         device_t dev;
800         struct virtqueue *vq;
801
802         dev = sc->vtblk_dev;
803         vq = sc->vtblk_vq;
804
805         vtblk_stop(sc);
806
807         /*
808          * Drain all requests caught in-flight in the virtqueue,
809          * skipping biodone(). When dumping, only one request is
810          * outstanding at a time, and we just poll the virtqueue
811          * for the response.
812          */
813         vtblk_drain_vq(sc, 1);
814
815         if (virtio_reinit(dev, sc->vtblk_features) != 0)
816                 panic("cannot reinit VirtIO block device during dump");
817
818         virtqueue_disable_intr(vq);
819         virtio_reinit_complete(dev);
820 }
821
822 static int
823 vtblk_write_dump(struct vtblk_softc *sc, void *virtual, off_t offset,
824                  size_t length)
825 {
826         struct bio bio;
827         struct vtblk_request *req;
828         struct buf *bp;
829
830         req = &sc->vtblk_dump_request;
831         req->vbr_ack = -1;
832         req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
833         req->vbr_hdr.ioprio = 1;
834         req->vbr_hdr.sector = offset / DEV_BSIZE;
835
836         req->vbr_bp = &bio;
837         bzero(&buf, sizeof(struct bio));
838         bp = bio.bio_buf;
839
840         bp->b_cmd = BUF_CMD_WRITE;
841         bp->b_data = virtual;
842         bp->b_bcount = length;
843
844         return (vtblk_poll_request(sc, req));
845 }
846
847 static int
848 vtblk_flush_dump(struct vtblk_softc *sc)
849 {
850         struct bio bio;
851         struct vtblk_request *req;
852         struct buf *bp;
853
854         req = &sc->vtblk_dump_request;
855         req->vbr_ack = -1;
856         req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH;
857         req->vbr_hdr.ioprio = 1;
858         req->vbr_hdr.sector = 0;
859
860         req->vbr_bp = &bio;
861         bzero(&buf, sizeof(struct bio));
862         bp = bio.bio_buf;
863         
864         bp->b_cmd = BUF_CMD_FLUSH;
865
866         return (vtblk_poll_request(sc, req));
867 }
868
869 static int
870 vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req)
871 {
872         device_t dev;
873         struct virtqueue *vq;
874         struct vtblk_request *r __debugvar;
875         int error;
876
877         dev = sc->vtblk_dev;
878         vq = sc->vtblk_vq;
879
880         if (!virtqueue_empty(vq))
881                 return (EBUSY);
882
883         error = vtblk_execute_request(sc, req);
884         if (error)
885                 return (error);
886
887         virtqueue_notify(vq, &sc->vtblk_mtx);
888
889         r = virtqueue_poll(vq, NULL);
890         KASSERT(r == req, ("unexpected request response"));
891
892         if (req->vbr_ack != VIRTIO_BLK_S_OK) {
893                 error = req->vbr_ack == VIRTIO_BLK_S_UNSUPP ? ENOTSUP : EIO;
894                 if (bootverbose)
895                         device_printf(dev,
896                             "vtblk_poll_request: IO error: %d\n", error);
897         }
898
899         return (error);
900 }
901
902 static void
903 vtblk_drain_vq(struct vtblk_softc *sc, int skip_done)
904 {
905         struct virtqueue *vq;
906         struct vtblk_request *req;
907         int last;
908
909         vq = sc->vtblk_vq;
910         last = 0;
911
912         while ((req = virtqueue_drain(vq, &last)) != NULL) {
913                 if (!skip_done)
914                         vtblk_bio_error(req->vbr_bp, ENXIO);
915
916                 vtblk_enqueue_request(sc, req);
917         }
918
919         KASSERT(virtqueue_empty(vq), ("virtqueue not empty"));
920 }
921
922 static void
923 vtblk_drain(struct vtblk_softc *sc)
924 {
925         struct bio_queue_head *bioq;
926         struct vtblk_request *req;
927         struct bio *bp;
928
929         bioq = &sc->vtblk_bioq;
930
931         if (sc->vtblk_vq != NULL)
932                 vtblk_drain_vq(sc, 0);
933
934         while ((req = vtblk_dequeue_ready(sc)) != NULL) {
935                 vtblk_bio_error(req->vbr_bp, ENXIO);
936                 vtblk_enqueue_request(sc, req);
937         }
938
939         while (bioq_first(bioq) != NULL) {
940                 bp = bioq_takefirst(bioq);
941                 vtblk_bio_error(bp, ENXIO);
942         }
943
944         vtblk_free_requests(sc);
945 }
946
947 static int
948 vtblk_alloc_requests(struct vtblk_softc *sc)
949 {
950         struct vtblk_request *req;
951         int i, nreqs;
952
953         nreqs = virtqueue_size(sc->vtblk_vq);
954
955         /*
956          * Preallocate sufficient requests to keep the virtqueue full. Each
957          * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce
958          * the number allocated when indirect descriptors are not available.
959          */
960         nreqs /= VTBLK_MIN_SEGMENTS;
961
962         for (i = 0; i < nreqs; i++) {
963                 req = kmalloc(sizeof(struct vtblk_request), M_DEVBUF, M_WAITOK);
964
965                 sc->vtblk_request_count++;
966                 vtblk_enqueue_request(sc, req);
967         }
968
969         return (0);
970 }
971
972 static void
973 vtblk_free_requests(struct vtblk_softc *sc)
974 {
975         struct vtblk_request *req;
976
977         while ((req = vtblk_dequeue_request(sc)) != NULL) {
978                 sc->vtblk_request_count--;
979                 kfree(req, M_DEVBUF);
980         }
981
982         KASSERT(sc->vtblk_request_count == 0, ("leaked requests"));
983 }
984
985 static struct vtblk_request *
986 vtblk_dequeue_request(struct vtblk_softc *sc)
987 {
988         struct vtblk_request *req;
989
990         req = TAILQ_FIRST(&sc->vtblk_req_free);
991         if (req != NULL)
992                 TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link);
993
994         return (req);
995 }
996
997 static void
998 vtblk_enqueue_request(struct vtblk_softc *sc, struct vtblk_request *req)
999 {
1000         bzero(req, sizeof(struct vtblk_request));
1001         TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link);
1002 }
1003
1004 static struct vtblk_request *
1005 vtblk_dequeue_ready(struct vtblk_softc *sc)
1006 {
1007         struct vtblk_request *req;
1008
1009         req = TAILQ_FIRST(&sc->vtblk_req_ready);
1010         if (req != NULL)
1011                 TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link);
1012
1013         return (req);
1014 }
1015
1016 static void
1017 vtblk_enqueue_ready(struct vtblk_softc *sc, struct vtblk_request *req)
1018 {
1019         TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link);
1020 }
1021
1022 static void
1023 vtblk_bio_error(struct bio *bp, int error)
1024 {
1025         biodone(bp);
1026 }